[Haskell-cafe] HXT: how to get sibling element
Никитин Лев
leon.v.nikitin at pravmail.ru
Fri Mar 16 19:07:32 CET 2012
Thanx to all. I've done it!
===============
import Text.XML.HXT.Core
import Text.XML.HXT.Curl
import Text.XML.HXT.HTTP
import Control.Arrow.ArrowNavigatableTree
pageURL = "http://localhost/test.xml"
main = do
r <- runX (configSysVars [withCanonicalize no, withValidate no, withTrace 0, withParseHTML no] >>>
readDocument [withErrors no, withWarnings no, withHTTP []] pageURL >>>
getChildren >>> isElem >>> hasName "div" >>> (getTitle <+> getSections))
putStrLn "Articles:"
putStrLn "<"
mapM_ putStrLn $ map (\i -> (fst i) ++ " is " ++ (snd i) ++ "\n") r
putStrLn ">"
getTitle = listA (getChildren >>> isElem >>> hasName "span") >>> arr head >>> getChildren >>> getText >>> arr trim >>> arr ("Title",)
getSections = addNav >>>
listA (getChildren >>> withoutNav (isElem >>> hasName "span")) >>>
arr tail >>> unlistA >>>
((getChildren >>> remNav >>> getText) &&&
(listA followingSiblingAxis >>> arr head >>> remNav >>> getText >>> arr (rc . trim)))
ltrim [] = []
ltrim (' ':x) = ltrim x
ltrim ('\n':x) = ltrim x
ltrim ('\r':x) = ltrim x
ltrim ('\t':x) = ltrim x
ltrim x = x
rtrim = reverse . ltrim . reverse
trim = ltrim . rtrim
rc (':':' ':x) = x
rc x = x
==========================
More information about the Haskell-Cafe
mailing list