[Haskell-cafe] request for code review
Shannon -jj Behrens
jjinux at gmail.com
Sun Mar 5 05:43:32 EST 2006
Hi,
I'm working on another article like
<http://www.linuxjournal.com/article/8850>. This time, I'm taking an
exercise out of "Expert C Programming: Deep C Secrets" and
translating it into Haskell. The program translates C type
declarations into English. I would greatly appreciate some code
review. I'd prefer to look like an idiot in front of you guys rather
than in front of everyone in the world! ;)
Please understand, I am not a Haskell expert! Therefore, please make
your suggestions simple enough that I can actually accomplish them!
By the way, my code *mostly* follows the code laid out in the book. I
don't use a lexer or a parser or greatly improve on his algorithm.
I'd like the Haskell and C versions to be similar so that they can be
compared.
The C version is:
<http://www.cs.may.ie/~jpower/Courses/compilers/labs/lab3/parse_decl.c>
The Haskell version is below.
Thanks!
-jj
{- Translate C type declarations into English.
This exercise was taken from "Expert C Programming: Deep C Secrets", p. 84.
Example: echo -n "int *p;" | runhugs cdecl.hs
Name: Shannon -jj Behrens <jjinux at gmail.com>
Date: Fri Feb 17 00:03:38 PST 2006
-}
import Char (isSpace, isAlphaNum, isDigit)
-- |> is like a UNIX pipe.
infixl 9 |>
x |> f = f x
data TokenType = Identifier | Qualifier | Type | Symbol Char
deriving (Show, Eq)
data Token = Token {
tokenType :: TokenType,
tokenValue :: String
} deriving Show
data ParseContext = ParseContext {
input :: String, -- The input that has not been parsed yet.
output :: [String], -- A list of strings in the reverse order of that which
-- they should be printed (e.g. [" a dog.", "I have"]).
currTok :: Token, -- The current token, if defined.
stack :: [Token] -- A stack of tokens we haven't dealt with yet.
} deriving Show
-- For convenience:
currTokType :: ParseContext -> TokenType
currTokType ctx = ctx |> currTok |> tokenType
currTokValue :: ParseContext -> String
currTokValue ctx = ctx |> currTok |> tokenValue
type ParseContextTransformation = ParseContext -> ParseContext
-- Start a new ParseContext given an input string.
createParseContext :: String -> ParseContext
createParseContext input = ParseContext {input=input, output=[], stack=[]}
-- Create the final output string given a ParseContext.
consolidateOutput :: ParseContext -> String
consolidateOutput ctx = ctx |> output |> reverse |> concat
{- "Write" to a ParseContext's output. The API is a bit strange.
(writeOutput s) is itself a ParseContextTransformation which you can apply
to ParseContexts. Strange but convenient.
-}
writeOutput :: String -> ParseContextTransformation
writeOutput s = \ctx ->
let newOutput = s : (output ctx) in
ctx {output=newOutput}
-- Return the top token on the stack.
stackTop :: ParseContext -> Token
stackTop ctx =
let (x:xs) = stack ctx in x
-- Pop the stack.
pop :: ParseContextTransformation
pop ctx =
let (x:xs) = stack ctx in ctx {stack=xs}
-- Write the value of the top of the stack and then pop it.
popAndWrite :: ParseContextTransformation
popAndWrite ctx =
ctx |>
((stackTop ctx) |>
tokenValue |>
writeOutput) |>
pop
-- Classify a string into a Token.
classifyString :: String -> Token
classifyString "const" = Token Qualifier "read-only"
classifyString "*" = Token (Symbol '*') "pointer to"
classifyString s@(c:[])
| not (isAlphaNum c) = Token (Symbol c) s
classifyString s = Token (whichType s) s
where whichType "volatile" = Qualifier
whichType "void" = Type
whichType "char" = Type
whichType "signed" = Type
whichType "unsigned" = Type
whichType "short" = Type
whichType "int" = Type
whichType "long" = Type
whichType "float" = Type
whichType "double" = Type
whichType "struct" = Type
whichType "union" = Type
whichType "enum" = Type
whichType _ = Identifier
-- Read the next token into currTok.
getToken :: ParseContextTransformation
getToken ctx@(ParseContext {input=s}) =
let lstrip s = dropWhile isSpace s
(token, theRest) = s |> lstrip |> lexString in
ctx {currTok=token, input=theRest}
-- Read a token. Return it and the left-over portion of the string.
lexString :: String -> (Token, String)
lexString s@(c:cs) | isAlphaNum c =
let (tokString, theRest) = span isAlphaNum s
token = classifyString tokString in
(token, theRest)
lexString ('*':cs) = (classifyString "*", cs)
lexString (c:cs) = (classifyString (c:[]), cs)
-- Put tokens on the stack until we reach the first identifier.
readToFirstIdentifier :: ParseContextTransformation
readToFirstIdentifier ctx =
let afterIdentifier = ctx |> getToken |> pushUntilIdentifier
identifier = afterIdentifier |> currTokValue
s = identifier ++ " is " in
(afterIdentifier {output=[s]}) |>
getToken
-- Keep pushing tokens until we hit an identifier.
pushUntilIdentifier :: ParseContextTransformation
pushUntilIdentifier ctx
| currTokType ctx == Identifier = ctx
| otherwise =
let newStack = (currTok ctx) : (stack ctx) in
(ctx {stack=newStack}) |>
getToken |>
pushUntilIdentifier
-- Deal with arrays.
dealWithArrays :: ParseContextTransformation
dealWithArrays ctx =
let writeIfNumber ctx = -- Call writeSize if a number.
if ctx |> currTokValue |> (!! 0) |> isDigit
then ctx |> writeSize |> getToken
else ctx
writeSize ctx = -- Output the array size.
let num = ctx |> currTokValue |> read |> (+ -1) |> show
s = "0.." ++ num ++ " " in
ctx |> (writeOutput s) in
case currTokType ctx of
Symbol '[' ->
ctx |>
(writeOutput "array ") |>
getToken |>
writeIfNumber |>
getToken |>
(writeOutput "of ") |>
dealWithArrays
_ -> ctx -- Recurse until we get past the ['s.
-- Deal with function arguments.
dealWithFunctionArgs :: ParseContextTransformation
dealWithFunctionArgs ctx =
let getUntilParen ctx = -- Read tokens until we hit ).
case currTokType ctx of
Symbol ')' -> ctx
_ -> ctx |> getToken |> getUntilParen in
ctx |>
getUntilParen |>
getToken |>
(writeOutput "function returning ")
-- Deal with pointers.
dealWithPointers :: ParseContextTransformation
dealWithPointers ctx =
case ctx |> stackTop |> tokenType of
Symbol '*' ->
ctx |>
popAndWrite |>
(writeOutput " ") |>
dealWithPointers
_ -> ctx -- Recurse until we get past the *'s.
-- Process tokens that we stacked while reading to identifier.
dealWithStack :: ParseContextTransformation
dealWithStack ctx =
case stack ctx of
[] -> ctx
(x:xs) ->
case tokenType x of
Symbol '(' ->
ctx |> pop |> getToken |> dealWithDeclarator
_ -> ctx |> popAndWrite
-- Do all parsing after first identifier.
dealWithDeclarator :: ParseContextTransformation
dealWithDeclarator ctx =
ctx |>
(case currTokType ctx of
Symbol '[' -> dealWithArrays
Symbol '(' -> dealWithFunctionArgs
_ -> id) |>
dealWithPointers |>
dealWithStack
-- Translate a C type declaration into English.
translate :: String -> String
translate s =
s |>
createParseContext |>
readToFirstIdentifier |>
dealWithDeclarator |>
consolidateOutput -- Change this to "show" to debug.
-- Main
main :: IO ()
main = do
input <- getContents
input |> translate |> putStrLn
More information about the Haskell-Cafe
mailing list