[Haskell-cafe] Re: request for code review

Sun Mar 12 04:47:09 EST 2006

Hi,

Thanks to everyone who reviewed my code and submitted comments the
first time!  I've updated the code and transitioned to using the State
monad.  Perhaps controversially, I've continued to use |> in a bunch
of places that the monad didn't get rid of because I think it's more
readable, but I'm still open for argument on this topic.  Using the
monad didn't make the code any shorter, but it kind of "felt" better,
once I figured out how to use it.  Figuring out how to use execState
to get into and out of "monad-ity" was the hardest part, because it's
mentioned in so few of the examples.  I think it's fair to say, of
course, that using a monad has increased the complexity, but I can
still read what I wrote.  I've posted my code below for additional
comments.

Thanks again!
-jj

{- Translate C type declarations into English.

   This exercise was taken from "Expert C Programming:  Deep C Secrets", p. 84.

   Example: echo -n "int *p;" | runhugs cdecl.hs

   Name: Shannon -jj Behrens <jjinux at gmail.com>
   Date: Fri Feb 17 00:03:38 PST 2006
-}

import Char (isSpace, isAlphaNum, isDigit)
import Control.Monad.State

-- |> is like a UNIX pipe.
infixl 9 |>
x |> f = f x

data TokenType = Identifier | Qualifier | Type | Symbol Char
  deriving (Show, Eq)

data Token = Token {
  tokenType :: TokenType,
  tokenValue :: String
} deriving Show

data ParseContext = ParseContext {
  input :: String,    -- The input that has not been parsed yet.
  output :: [String], -- A list of strings in the reverse order of that which
                      -- they should be printed (e.g. [" a dog.", "I have"]).
  currTok :: Token,   -- The current token, if defined.
  stack :: [Token]    -- A stack of tokens we haven't dealt with yet.
} deriving Show

-- For convenience:
currTokType :: ParseContext -> TokenType
currTokType ctx = ctx |> currTok |> tokenType

currTokValue :: ParseContext -> String
currTokValue ctx = ctx |> currTok |> tokenValue

-- Start a new State ParseContext given an input string.
createParseContext :: String -> ParseContext
createParseContext input =
  ParseContext {input = input, output = [], stack = []}

-- Create the final output string given a ParseContext.
consolidateOutput :: ParseContext -> String
consolidateOutput ctx =
  ctx |> output |> reverse |> concat

-- "Write" to a ParseContext's output.
writeOutput :: String -> State ParseContext ()
writeOutput s = modify (\ctx -> ctx {output = s : output ctx})

-- Return the top token on the stack.
stackTop :: ParseContext -> Token
stackTop ctx = ctx |> stack |> head

-- Pop the stack.
pop :: State ParseContext ()
pop = modify (\ctx -> ctx {stack = ctx |> stack |> tail})

-- Write the value of the top of the stack and then pop it.
popAndWrite :: State ParseContext ()
popAndWrite = do
  top <- gets stackTop
  writeOutput (tokenValue top)
  pop

-- Classify a string into a Token.
classifyString :: String -> Token
classifyString "const"  = Token Qualifier "read-only"
classifyString "*"      = Token (Symbol '*') "pointer to"
classifyString [c]
  | not (isAlphaNum c)  = Token (Symbol c) [c]
classifyString s        = Token tokType s
  where
    tokType = case s of
      "volatile" -> Qualifier
      x | x `elem` ["void", "char", "signed", "unsigned", "short",
                    "int", "long", "float", "double", "struct",
                    "union", "enum"] -> Type
      x -> Identifier

-- Read the next token into currTok.
getToken :: State ParseContext ()
getToken = modify getToken'
  where
    getToken' ctx@(ParseContext {input = s}) =
      ctx {currTok = token, input = theRest}
      where
        (token, theRest) = s |> lstrip |> lexString
        lstrip s = dropWhile isSpace s

-- Read a token.  Return it and the left-over portion of the string.
lexString :: String -> (Token, String)
lexString s@(c:cs) | isAlphaNum c = (token, theRest)
  where
    (tokString, theRest) = span isAlphaNum s
    token = classifyString tokString
lexString ('*':cs) = (classifyString "*", cs)
lexString (c:cs) = (classifyString [c], cs)

-- Put tokens on the stack until we reach the first identifier.
readToFirstIdentifier :: State ParseContext ()
readToFirstIdentifier = do
  getToken
  pushUntilIdentifier
  afterIdentifier <- get
  let s = identifier ++ " is "
      identifier = currTokValue afterIdentifier in
    put (afterIdentifier {output = [s]})
  getToken

-- Keep pushing tokens until we hit an identifier.
pushUntilIdentifier :: State ParseContext ()
pushUntilIdentifier = do
  ctx <- get
  if currTokType ctx == Identifier
    then return ()                      -- Leave things as they are.
    else do
      put (ctx {stack = (currTok ctx) : (stack ctx)})
      getToken
      pushUntilIdentifier
      return ()

-- Deal with arrays.
dealWithArrays :: State ParseContext ()
dealWithArrays = do
  ctx <- get
  case currTokType ctx of
    Symbol '[' -> do
      writeOutput "array "
      getToken
      writeIfNumber
      getToken
      writeOutput "of "
      dealWithArrays
    _ -> return ()                      -- Recurse until we get past the ['s.
  where
    writeIfNumber = do                  -- Call writeSize if a number.
      tokValue <- gets currTokValue
      if tokValue |> head |> isDigit
        then do
          writeSize
          getToken
        else return ()
    writeSize = do                      -- Output the array size.
      tokValue <- gets currTokValue
      let num = tokValue |> read |> (+ -1) |> show
          s = "0.." ++ num ++ " " in    -- Can't use where instead of let here.
        writeOutput s

-- Deal with function arguments.
dealWithFunctionArgs :: State ParseContext ()
dealWithFunctionArgs = do
  getUntilParen
  getToken
  writeOutput "function returning "
  where
    getUntilParen = do                  -- Read tokens until we hit ).
      ctx <- get
      case currTokType ctx of
        Symbol ')' -> return ()
        _ -> do
          getToken
          getUntilParen

-- Deal with pointers.
dealWithPointers :: State ParseContext ()
dealWithPointers = do
  top <- gets stackTop
  case tokenType top of
    Symbol '*' -> do
      popAndWrite
      writeOutput " "
      dealWithPointers
    _ -> return ()                      -- Recurse until we get past the *'s.

-- Process tokens that we stacked while reading to identifier.
dealWithStack :: State ParseContext ()
dealWithStack = do
  stack' <- gets stack
  case stack' of
    [] -> return ()
    (x:xs) ->
      case tokenType x of
        Symbol '(' -> do
          pop
          getToken
          dealWithDeclarator
        _ -> popAndWrite

-- Do all parsing after first identifier.
dealWithDeclarator :: State ParseContext ()
dealWithDeclarator = do
  tokType <- gets currTokType
  case tokType of
    Symbol '[' -> dealWithArrays
    Symbol '(' -> dealWithFunctionArgs
    _ -> return ()                      -- "Exit" the case, not the function.
  dealWithPointers
  dealWithStack

-- Do all parsing.
dealWithEverything :: State ParseContext ()
dealWithEverything = do
  readToFirstIdentifier
  dealWithDeclarator

-- Translate a C type declaration into English.
translate :: String -> String
translate s =
  -- Change "consolidateOutput" to "show" to debug.
  s |> createParseContext |> execState dealWithEverything |> consolidateOutput

main :: IO ()
main = do
  input <- getContents
  input |> translate |> putStrLn

On 3/5/06, Shannon -jj Behrens <jjinux at gmail.com> wrote:
> Hi,
>
> I'm working on another article like
> <http://www.linuxjournal.com/article/8850>.  This time, I'm taking an
> exercise out of "Expert C Programming:  Deep C Secrets" and
> translating it into Haskell.  The program translates C type
> declarations into English.  I would greatly appreciate some code
> review.  I'd prefer to look like an idiot in front of you guys rather
> than in front of everyone in the world! ;)
>
> Please understand, I am not a Haskell expert!  Therefore, please make
> your suggestions simple enough that I can actually accomplish them!
>
> By the way, my code *mostly* follows the code laid out in the book.  I
> don't use a lexer or a parser or greatly improve on his algorithm.
> I'd like the Haskell and C versions to be similar so that they can be
> compared.
>
> The C version is:
> <http://www.cs.may.ie/~jpower/Courses/compilers/labs/lab3/parse_decl.c>
>
> The Haskell version is below.
[snip]