[Haskell-cafe] Slow Text.JSON parser
Sjoerd Visscher
sjoerd at w3future.com
Sat Jan 17 16:07:57 EST 2009
Hi,
Somebody told me about Parsec 3, which uses a Stream type class so it
can parse any data type. This sounded like the right way to do
encoding independent parsing, so I decided to see how it would work to
parse UTF8 JSON.
Sadly I could not use Text.JSON.Parsec directly, because it uses the
old Parsec CharParser type. So I copied to code, and also replaced
p_number with the "floating" parser from Text.Parsec.Token, because
Text.JSON.Parsec uses readFloat (a dirty hack imho) which works only
on String.
If Text.JSON.Parsec was written for Parsec 3, the only thing to write
to get UTF8 JSON parsing would be:
instance (Monad m, U.UTF8Bytes string index) => Stream (U.UTF8 string)
m Char where
uncons = return . U.uncons
I did not do any performance measuring yet, I was glad I got it
working. Any comments on the code is appreciated!
greetings,
Sjoerd Visscher
{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses,
UndecidableInstances #-}
import qualified Data.String.UTF8 as U
import qualified Data.ByteString as B
import Text.Parsec hiding (many, optional, (<|>))
import Control.Applicative
import Text.JSON.Types
import Control.Monad
import Data.Char
import Numeric
instance (Monad m, U.UTF8Bytes string index) => Stream (U.UTF8 string)
m Char where
uncons = return . U.uncons
type CharParser st = Parsec (U.UTF8 B.ByteString) st
parseFile :: FilePath -> IO (Either ParseError JSValue)
parseFile fileName = do
bs <- B.readFile fileName
return $ runParser json () fileName (U.fromRep bs)
parseString :: String -> Either ParseError JSValue
parseString s = runParser json () "(unknown)" (U.fromString s)
json :: CharParser () JSValue
json = spaces *> p_value
tok :: CharParser () a -> CharParser () a
tok p = p <* spaces
p_value :: CharParser () JSValue
p_value = (JSNull <$ p_null)
<|> (JSBool <$> p_boolean)
<|> (JSArray <$> p_array)
<|> (JSString <$> p_js_string)
<|> (JSObject <$> p_js_object)
<|> (JSRational False <$> p_number)
<?> "JSON value"
p_null :: CharParser () ()
p_null = tok (string "null") >> return ()
p_boolean :: CharParser () Bool
p_boolean = tok
( (True <$ string "true")
<|> (False <$ string "false")
)
p_array :: CharParser () [JSValue]
p_array = between (tok (char '[')) (tok (char ']'))
$ p_value `sepBy` tok (char ',')
p_string :: CharParser () String
p_string = between (tok (char '"')) (char '"') (many p_char)
where p_char = (char '\\' >> p_esc)
<|> (satisfy (\x -> x /= '"' && x /= '\\'))
p_esc = ('"' <$ char '"')
<|> ('\\' <$ char '\\')
<|> ('/' <$ char '/')
<|> ('\b' <$ char 'b')
<|> ('\f' <$ char 'f')
<|> ('\n' <$ char 'n')
<|> ('\r' <$ char 'r')
<|> ('\t' <$ char 't')
<|> (char 'u' *> p_uni)
<?> "escape character"
p_uni = check =<< count 4 (satisfy isHexDigit)
where check x | code <= max_char = pure (toEnum code)
| otherwise = empty
where code = fst $ head $ readHex x
max_char = fromEnum (maxBound :: Char)
p_object :: CharParser () [(String,JSValue)]
p_object = between (tok (char '{')) (tok (char '}'))
$ p_field `sepBy` tok (char ',')
where p_field = (,) <$> (p_string <* tok (char ':')) <*> p_value
p_number :: CharParser () Rational
p_number = tok floating where
floating :: CharParser () Rational
floating = do{ n <- decimal
; fract <- option 0 fraction
; expo <- option 1 exponent'
; return ((fromInteger n + fract)*expo)
}
fraction = do{ char '.'
; digits <- many1 digit <?> "fraction"
; return (foldr op 0 digits)
}
<?> "fraction"
where
op d f = (f + fromIntegral (digitToInt d))/10
exponent' = do{ oneOf "eE"
; f <- sign
; e <- decimal <?> "exponent"
; return (power (f e))
}
<?> "exponent"
where
power e | e < 0 = 1/power(-e)
| otherwise = fromInteger (10^e)
sign = (char '-' >> return negate)
<|> (char '+' >> return id)
<|> return id
decimal = number 10 digit
number base baseDigit
= do{ digits <- many1 baseDigit
; let n = foldl (\x d -> base*x + toInteger (digitToInt
d)) 0 digits
; seq n (return n)
}
p_js_string :: CharParser () JSString
p_js_string = toJSString <$> p_string
p_js_object :: CharParser () (JSObject JSValue)
p_js_object = toJSObject <$> p_object
More information about the Haskell-Cafe
mailing list