[Haskell-cafe] request for code review

Sun Mar 5 05:43:32 EST 2006

Hi,

I'm working on another article like
<http://www.linuxjournal.com/article/8850>.  This time, I'm taking an
exercise out of "Expert C Programming:  Deep C Secrets" and
translating it into Haskell.  The program translates C type
declarations into English.  I would greatly appreciate some code
review.  I'd prefer to look like an idiot in front of you guys rather
than in front of everyone in the world! ;)

Please understand, I am not a Haskell expert!  Therefore, please make
your suggestions simple enough that I can actually accomplish them!

By the way, my code *mostly* follows the code laid out in the book.  I
don't use a lexer or a parser or greatly improve on his algorithm. 
I'd like the Haskell and C versions to be similar so that they can be
compared.

The C version is:
<http://www.cs.may.ie/~jpower/Courses/compilers/labs/lab3/parse_decl.c>

The Haskell version is below.

Thanks!
-jj

{- Translate C type declarations into English.

   This exercise was taken from "Expert C Programming:  Deep C Secrets", p. 84.

   Example: echo -n "int *p;" | runhugs cdecl.hs

   Name: Shannon -jj Behrens <jjinux at gmail.com>
   Date: Fri Feb 17 00:03:38 PST 2006
-}

import Char (isSpace, isAlphaNum, isDigit)

-- |> is like a UNIX pipe.
infixl 9 |>
x |> f = f x

data TokenType = Identifier | Qualifier | Type | Symbol Char
  deriving (Show, Eq)

data Token = Token {
  tokenType :: TokenType,
  tokenValue :: String
} deriving Show

data ParseContext = ParseContext {
  input :: String,    -- The input that has not been parsed yet.
  output :: [String], -- A list of strings in the reverse order of that which
                      -- they should be printed (e.g. [" a dog.", "I have"]).
  currTok :: Token,   -- The current token, if defined.
  stack :: [Token]    -- A stack of tokens we haven't dealt with yet.
} deriving Show

-- For convenience:
currTokType :: ParseContext -> TokenType
currTokType ctx = ctx |> currTok |> tokenType

currTokValue :: ParseContext -> String
currTokValue ctx = ctx |> currTok |> tokenValue

type ParseContextTransformation = ParseContext -> ParseContext

-- Start a new ParseContext given an input string.
createParseContext :: String -> ParseContext
createParseContext input = ParseContext {input=input, output=[], stack=[]}

-- Create the final output string given a ParseContext.
consolidateOutput :: ParseContext -> String
consolidateOutput ctx = ctx |> output |> reverse |> concat

{- "Write" to a ParseContext's output.  The API is a bit strange.
   (writeOutput s) is itself a ParseContextTransformation which you can apply
   to ParseContexts.  Strange but convenient.
-}
writeOutput :: String -> ParseContextTransformation
writeOutput s = \ctx ->
  let newOutput = s : (output ctx) in
    ctx {output=newOutput}

-- Return the top token on the stack.
stackTop :: ParseContext -> Token
stackTop ctx =
  let (x:xs) = stack ctx in x

-- Pop the stack.
pop :: ParseContextTransformation
pop ctx =
  let (x:xs) = stack ctx in ctx {stack=xs}

-- Write the value of the top of the stack and then pop it.
popAndWrite :: ParseContextTransformation
popAndWrite ctx =
  ctx |>
  ((stackTop ctx) |>
   tokenValue |>
   writeOutput) |>
  pop

-- Classify a string into a Token.
classifyString :: String -> Token
classifyString "const"  = Token Qualifier "read-only"
classifyString "*"      = Token (Symbol '*') "pointer to"
classifyString s@(c:[])
  | not (isAlphaNum c)  = Token (Symbol c) s
classifyString s        = Token (whichType s) s
  where whichType "volatile" = Qualifier
        whichType "void"     = Type
        whichType "char"     = Type
        whichType "signed"   = Type
        whichType "unsigned" = Type
        whichType "short"    = Type
        whichType "int"      = Type
        whichType "long"     = Type
        whichType "float"    = Type
        whichType "double"   = Type
        whichType "struct"   = Type
        whichType "union"    = Type
        whichType "enum"     = Type
        whichType _          = Identifier

-- Read the next token into currTok.
getToken :: ParseContextTransformation
getToken ctx@(ParseContext {input=s}) =
  let lstrip s = dropWhile isSpace s
      (token, theRest) = s |> lstrip |> lexString in
    ctx {currTok=token, input=theRest}

-- Read a token.  Return it and the left-over portion of the string.
lexString :: String -> (Token, String)
lexString s@(c:cs) | isAlphaNum c =
  let (tokString, theRest) = span isAlphaNum s
      token = classifyString tokString in
    (token, theRest)
lexString ('*':cs) = (classifyString "*", cs)
lexString (c:cs) = (classifyString (c:[]), cs)

-- Put tokens on the stack until we reach the first identifier.
readToFirstIdentifier :: ParseContextTransformation
readToFirstIdentifier ctx =
  let afterIdentifier = ctx |> getToken |> pushUntilIdentifier
      identifier = afterIdentifier |> currTokValue
      s = identifier ++ " is " in
    (afterIdentifier {output=[s]}) |>
    getToken

-- Keep pushing tokens until we hit an identifier.
pushUntilIdentifier :: ParseContextTransformation
pushUntilIdentifier ctx
  | currTokType ctx == Identifier = ctx
  | otherwise =
      let newStack = (currTok ctx) : (stack ctx) in
        (ctx {stack=newStack}) |>
        getToken |>
        pushUntilIdentifier

-- Deal with arrays.
dealWithArrays :: ParseContextTransformation
dealWithArrays ctx =
  let writeIfNumber ctx =               -- Call writeSize if a number.
        if ctx |> currTokValue |> (!! 0) |> isDigit
        then ctx |> writeSize |> getToken
        else ctx
      writeSize ctx =                   -- Output the array size.
        let num = ctx |> currTokValue |> read |> (+ -1) |> show
            s = "0.." ++ num ++ " " in
          ctx |> (writeOutput s) in
    case currTokType ctx of
      Symbol '[' ->
        ctx |>
        (writeOutput "array ") |>
        getToken |>
        writeIfNumber |>
        getToken |>
        (writeOutput "of ") |>
        dealWithArrays
      _ -> ctx                          -- Recurse until we get past the ['s.

-- Deal with function arguments.
dealWithFunctionArgs :: ParseContextTransformation
dealWithFunctionArgs ctx =
  let getUntilParen ctx =               -- Read tokens until we hit ).
        case currTokType ctx of
          Symbol ')' -> ctx
          _ -> ctx |> getToken |> getUntilParen in
    ctx |>
    getUntilParen |>
    getToken |>
    (writeOutput "function returning ")

-- Deal with pointers.
dealWithPointers :: ParseContextTransformation
dealWithPointers ctx =
  case ctx |> stackTop |> tokenType of
    Symbol '*' ->
      ctx |>
      popAndWrite |>
      (writeOutput " ") |>
      dealWithPointers
    _ -> ctx                            -- Recurse until we get past the *'s.

-- Process tokens that we stacked while reading to identifier.
dealWithStack :: ParseContextTransformation
dealWithStack ctx =
  case stack ctx of
    [] -> ctx
    (x:xs) ->
      case tokenType x of
        Symbol '(' ->
          ctx |> pop |> getToken |> dealWithDeclarator
        _ -> ctx |> popAndWrite

-- Do all parsing after first identifier.
dealWithDeclarator :: ParseContextTransformation
dealWithDeclarator ctx =
  ctx |>
  (case currTokType ctx of
     Symbol '[' -> dealWithArrays
     Symbol '(' -> dealWithFunctionArgs
     _ -> id) |>
  dealWithPointers |>
  dealWithStack

-- Translate a C type declaration into English.
translate :: String -> String
translate s =
  s |>
  createParseContext |>
  readToFirstIdentifier |>
  dealWithDeclarator |>
  consolidateOutput                     -- Change this to "show" to debug.

-- Main
main :: IO ()
main = do
  input <- getContents
  input |> translate |> putStrLn