[Haskell-cafe] Using wreq Session handling in a monad transformer stack
Cody Goodman
codygman.consulting at gmail.com
Sun Nov 16 21:08:17 UTC 2014
Could I somehow add a Wreq.Session.Session to my monad transformer
stack? Should I use something other than Wreq? How would I create a
default Session, Wreq doesn't seem to export the constructor.
Basically I want to achieve Sessions/cookie handling.
lpaste of my code:
http://lpaste.net/114405
my full code (for those who want to view in email):
{-# LANGUAGE OverloadedStrings #-}
module Network.Scraper.State where
import Control.Lens ((^.))
import Control.Monad
import Control.Monad.IO.Class (liftIO)
import qualified Control.Monad.Trans.State.Strict as ST
import qualified Data.ByteString.Lazy as LBS
import Data.Maybe (fromJust, fromMaybe)
import Data.Monoid
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Network.Wreq (FormParam (..))
import qualified Network.Wreq as Wreq
import Network.Wreq.Session (Session (..), withSession)
import qualified Network.Wreq.Session as Sesh
import Network.Wreq.Types
import Safe
import Text.HTML.DOM (parseLBS)
import Text.XML.Cursor
import qualified Text.XML.Cursor.Generic as CG
data ScraperState =
PS { currentOptions :: Wreq.Options
, currentHtml :: LBS.ByteString
, currentCursor :: Maybe Cursor
, currentSession :: Session
} deriving (Show)
type Scraper = ST.StateT ScraperState IO
toCursor = fromDocument . parseLBS
initialSt =
PS { currentOptions = Wreq.defaults
, currentHtml = ("" :: LBS.ByteString)
, currentCursor = Nothing
-- , currentSession = ... how do I get a Session? Wreq doesn't
seem to export this type
}
setCurrentOptions :: Wreq.Options -> Scraper ()
setCurrentOptions o = do
scraper <- ST.get
ST.put $ scraper { currentOptions = o }
-- getCurrentPage :: Shpider Page
getCurrentCursor :: Scraper (Maybe Cursor)
getCurrentCursor = do
scraper <- ST.get
return $ currentCursor scraper
getCurrentSession :: Scraper (Session)
getCurrentSession = do
scraper <- ST.get
return $ currentSession scraper
setCurrentSession :: Session -> Scraper ()
setCurrentSession s = do
scraper <- ST.get
ST.put $ scraper { currentSession = s}
setCurrentCursor :: Cursor -> Scraper ( )
setCurrentCursor c = do
scraper <- ST.get
ST.put $ scraper { currentCursor = Just c }
setCurrentHtml :: LBS.ByteString -> Scraper ()
setCurrentHtml html = do
scraper <- ST.get
ST.put $ scraper { currentHtml = html }
runScraper :: Scraper a -> IO a
runScraper k = evalScraperWith k initialSt
evalScraperWith :: Scraper a -> ScraperState -> IO a
evalScraperWith k s = withSession $ \sesh -> do
-- set the current session to the mutable session variable
return $ setCurrentSession sesh
ST.evalStateT k s
formShortInfo' f = formInfo'
where
go Nothing = "N/A"
go (Just x) = x
formInfo = (headMay . attribute "name" $ f, headMay . attribute
"action" $ f)
formInfo' = (\(x,y) -> (go x, go y)) formInfo
ppTuple :: (T.Text, T.Text) -> T.Text
ppTuple = \(x,y) -> "[" <> x <> "]" <> ": " <> y
-- move to ../Spider.hs
printFormNames :: Scraper ()
printFormNames = do
c <- getCurrentCursor
let c' = fromMaybe (error "No cursor set") c
forms = c' $// element "form"
formInfo = map (ppTuple . formShortInfo') forms
liftIO $ mapM_ (TIO.putStrLn) formInfo
getFormByName :: T.Text -> Scraper [Cursor]
getFormByName name = do
c <- getCurrentCursor
let c' = fromMaybe (error "No cursor set") c
return $ c' $// element "form" >=> attributeIs "name" name
get :: String -> Scraper (LBS.ByteString)
get url = do
r <- liftIO $ Wreq.get url
let html = r ^. Wreq.responseBody
setCurrentHtml html
setCurrentCursor (toCursor html)
return html
post :: Postable a => String -> a -> Scraper (LBS.ByteString)
post url params = do
r <- liftIO $ Wreq.post url params
let html = r ^. Wreq.responseBody
setCurrentHtml html
setCurrentCursor (toCursor html)
return html
test :: Scraper ()
test = do
get "https://www.google.com" >> printFormNames
More information about the Haskell-Cafe
mailing list