[Haskell-cafe] Using wreq Session handling in a monad transformer stack
Cody Goodman
codygman.consulting at gmail.com
Mon Nov 17 04:53:39 UTC 2014
withSession doesn't seem to persist anything with this implementation:
withInitialState :: (ScraperState -> IO a) -> IO a
withInitialState callback = withSession $ \s -> do
let initialState = PS { currentOptions = Wreq.defaults
, currentHtml = ("" :: LBS.ByteString)
, currentCursor = Nothing
, currentSession = s
}
callback initialState
runScraper :: Scraper a -> IO a
runScraper k = withInitialState (evalScraperWith k)
evalScraperWith :: Scraper a -> ScraperState -> IO a
evalScraperWith k s = ST.evalStateT k s
I looked up the source to withSession, since I don't get why this isn't working.
withSession :: (Session -> IO a) -> IO a
withSession act = do
mv <- newMVar $ HTTP.createCookieJar []
HTTP.withManager defaultManagerSettings $ \mgr ->
act Session { seshCookies = mv, seshManager = mgr }
http://hackage.haskell.org/package/wreq-0.2.0.0/docs/src/Network-Wreq-Session.html#withSession
I did however notice an addition added a few days ago, maybe this is
what I need?
withSessionWith :: HTTP.ManagerSettings -> (Session -> IO a) -> IO a
withSessionWith settings act = do
mv <- newMVar $ HTTP.createCookieJar []
HTTP.withManager settings $ \mgr ->
act Session { seshCookies = mv
, seshManager = mgr
, seshRun = runWith
}
runWith :: Session -> Run Body -> Run Body
runWith Session{..} act (Req _ req) =
modifyMVar seshCookies $ \cj -> do
resp <- act (Req (Right seshManager) (req & Lens.cookieJar ?~ cj))
return (resp ^. Wreq.responseCookieJar, resp)
https://github.com/bos/wreq/blob/master/Network/Wreq/Session.hs#L42
On Sun, Nov 16, 2014 at 4:07 PM, Chris Wong <lambda.fairy at gmail.com> wrote:
> Hi Cody,
>
> I don't use wreq myself, but withSession [1] is probably what you want.
>
> Here's an example:
>
> withInitialState :: (ScraperState -> IO a) -> IO a
> withInitialState callback = withSession $ \session ->
> let initialState = PS {
> -- ... other options here ...
> currentSession = session
> }
> in callback initialState
>
> [1] http://hackage.haskell.org/package/wreq-0.2.0.0/docs/Network-Wreq-Session.html
>
> On Mon, Nov 17, 2014 at 10:08 AM, Cody Goodman
> <codygman.consulting at gmail.com> wrote:
>> Could I somehow add a Wreq.Session.Session to my monad transformer
>> stack? Should I use something other than Wreq? How would I create a
>> default Session, Wreq doesn't seem to export the constructor.
>> Basically I want to achieve Sessions/cookie handling.
>>
>> lpaste of my code:
>> http://lpaste.net/114405
>>
>>
>> my full code (for those who want to view in email):
>>
>> {-# LANGUAGE OverloadedStrings #-}
>> module Network.Scraper.State where
>>
>> import Control.Lens ((^.))
>> import Control.Monad
>> import Control.Monad.IO.Class (liftIO)
>> import qualified Control.Monad.Trans.State.Strict as ST
>> import qualified Data.ByteString.Lazy as LBS
>> import Data.Maybe (fromJust, fromMaybe)
>> import Data.Monoid
>> import qualified Data.Text as T
>> import qualified Data.Text.IO as TIO
>> import Network.Wreq (FormParam (..))
>> import qualified Network.Wreq as Wreq
>> import Network.Wreq.Session (Session (..), withSession)
>> import qualified Network.Wreq.Session as Sesh
>> import Network.Wreq.Types
>> import Safe
>> import Text.HTML.DOM (parseLBS)
>> import Text.XML.Cursor
>> import qualified Text.XML.Cursor.Generic as CG
>>
>>
>> data ScraperState =
>> PS { currentOptions :: Wreq.Options
>> , currentHtml :: LBS.ByteString
>> , currentCursor :: Maybe Cursor
>> , currentSession :: Session
>> } deriving (Show)
>>
>> type Scraper = ST.StateT ScraperState IO
>>
>> toCursor = fromDocument . parseLBS
>>
>> initialSt =
>> PS { currentOptions = Wreq.defaults
>> , currentHtml = ("" :: LBS.ByteString)
>> , currentCursor = Nothing
>> -- , currentSession = ... how do I get a Session? Wreq doesn't
>> seem to export this type
>> }
>>
>> setCurrentOptions :: Wreq.Options -> Scraper ()
>> setCurrentOptions o = do
>> scraper <- ST.get
>> ST.put $ scraper { currentOptions = o }
>>
>> -- getCurrentPage :: Shpider Page
>> getCurrentCursor :: Scraper (Maybe Cursor)
>> getCurrentCursor = do
>> scraper <- ST.get
>> return $ currentCursor scraper
>>
>> getCurrentSession :: Scraper (Session)
>> getCurrentSession = do
>> scraper <- ST.get
>> return $ currentSession scraper
>>
>> setCurrentSession :: Session -> Scraper ()
>> setCurrentSession s = do
>> scraper <- ST.get
>> ST.put $ scraper { currentSession = s}
>>
>> setCurrentCursor :: Cursor -> Scraper ( )
>> setCurrentCursor c = do
>> scraper <- ST.get
>> ST.put $ scraper { currentCursor = Just c }
>>
>> setCurrentHtml :: LBS.ByteString -> Scraper ()
>> setCurrentHtml html = do
>> scraper <- ST.get
>> ST.put $ scraper { currentHtml = html }
>>
>> runScraper :: Scraper a -> IO a
>> runScraper k = evalScraperWith k initialSt
>>
>> evalScraperWith :: Scraper a -> ScraperState -> IO a
>> evalScraperWith k s = withSession $ \sesh -> do
>> -- set the current session to the mutable session variable
>> return $ setCurrentSession sesh
>> ST.evalStateT k s
>>
>> formShortInfo' f = formInfo'
>> where
>> go Nothing = "N/A"
>> go (Just x) = x
>> formInfo = (headMay . attribute "name" $ f, headMay . attribute
>> "action" $ f)
>> formInfo' = (\(x,y) -> (go x, go y)) formInfo
>>
>> ppTuple :: (T.Text, T.Text) -> T.Text
>> ppTuple = \(x,y) -> "[" <> x <> "]" <> ": " <> y
>>
>> -- move to ../Spider.hs
>> printFormNames :: Scraper ()
>> printFormNames = do
>> c <- getCurrentCursor
>> let c' = fromMaybe (error "No cursor set") c
>> forms = c' $// element "form"
>> formInfo = map (ppTuple . formShortInfo') forms
>> liftIO $ mapM_ (TIO.putStrLn) formInfo
>>
>> getFormByName :: T.Text -> Scraper [Cursor]
>> getFormByName name = do
>> c <- getCurrentCursor
>> let c' = fromMaybe (error "No cursor set") c
>> return $ c' $// element "form" >=> attributeIs "name" name
>>
>> get :: String -> Scraper (LBS.ByteString)
>> get url = do
>> r <- liftIO $ Wreq.get url
>> let html = r ^. Wreq.responseBody
>> setCurrentHtml html
>> setCurrentCursor (toCursor html)
>> return html
>>
>> post :: Postable a => String -> a -> Scraper (LBS.ByteString)
>> post url params = do
>> r <- liftIO $ Wreq.post url params
>> let html = r ^. Wreq.responseBody
>> setCurrentHtml html
>> setCurrentCursor (toCursor html)
>> return html
>>
>> test :: Scraper ()
>> test = do
>> get "https://www.google.com" >> printFormNames
>> _______________________________________________
>> Haskell-Cafe mailing list
>> Haskell-Cafe at haskell.org
>> http://www.haskell.org/mailman/listinfo/haskell-cafe
>
>
>
> --
> https://lambda.xyz
More information about the Haskell-Cafe
mailing list