[Haskell-cafe] HXT: how to get sibling element

Никитин Лев leon.v.nikitin at pravmail.ru
Fri Mar 16 19:07:32 CET 2012


Thanx to all. I've done it!

===============

import Text.XML.HXT.Core
import Text.XML.HXT.Curl
import Text.XML.HXT.HTTP
import Control.Arrow.ArrowNavigatableTree

pageURL = "http://localhost/test.xml"

main = do
    r <- runX (configSysVars [withCanonicalize no, withValidate no, withTrace 0, withParseHTML no] >>>
              readDocument [withErrors no, withWarnings no, withHTTP []] pageURL >>>
              getChildren >>> isElem >>> hasName "div" >>> (getTitle <+> getSections))

    putStrLn "Articles:"
    putStrLn "<" 
    mapM_ putStrLn $ map (\i -> (fst i) ++ " is " ++ (snd i) ++ "\n") r
    putStrLn ">" 

getTitle = listA (getChildren >>> isElem >>> hasName "span") >>> arr head >>> getChildren >>> getText >>> arr trim  >>> arr ("Title",)

getSections = addNav >>> 
                listA (getChildren >>> withoutNav (isElem >>> hasName "span")) >>> 
                arr tail >>> unlistA >>> 
                ((getChildren >>> remNav >>> getText) &&& 
                (listA followingSiblingAxis >>> arr head >>> remNav >>> getText >>> arr (rc . trim)))


  ltrim [] = []
  ltrim (' ':x) = ltrim x
  ltrim ('\n':x) = ltrim x
  ltrim ('\r':x) = ltrim x
  ltrim ('\t':x) = ltrim x
  ltrim x = x 

  rtrim = reverse . ltrim . reverse

  trim = ltrim . rtrim

  rc (':':' ':x) = x 
  rc x = x 


==========================



More information about the Haskell-Cafe mailing list