[Haskell-cafe] Tiger compiler in Haskell: annotating abstract syntax tree

Sun Jul 25 18:37:27 EDT 2010

On Mon, Jul 19, 2010 at 01:51:52PM -0300, José Romildo Malaquias wrote:
>   data Exp
>     = IntExp Integer
>     | VarExp Symbol
>     | AssignExp Symbol Exp
>     | IfExp Exp Exp (Maybe Exp)
>     | CallExp Symbol [Exp]
>     | LetExp [Dec] Exp
>
>   data Dec
>      = TypeDec Symbol Ty
>      | FunctionDec Symbol [(Symbol,Symbol)] (Mybe Symbol) Exp
>      | VarDec Symbol (Maybe Symbol) Exp
>
> Expressions can have type annotations, but declarations can not.

Hi, my favorite solution to this is using two level types. They don't
only allow annotating the AST with information, but also allow things
like generic unification over terms or hash consing for trivial CSE. As
an example, you would translate. your thing to

>   data Exp e
>     = IntExp Integer
>     | VarExp Symbol
>     | AssignExp Symbol e
>     | IfExp Exp Exp (Maybe e)
>     | CallExp Symbol [e]
>     | LetExp [Dec e] e
>
>   data Dec e
>      = TypeDec Symbol Ty
>      | FunctionDec Symbol [(Symbol,Symbol)] (Maybe Symbol) e
>      | VarDec Symbol (Maybe Symbol) e

we simply replace the recursive argument 'Exp' with a type parameter. 

Now, to create an unannotated version of the AST

> newtype Fix e = F (e (Fix e))
> type SExp = Fix Exp

now if you want to annotate each node with something,

> data FixAnnotated a e = FA a (e (FixAnnotated a e))
> type ExpTy = TypeAnnotated Ty Exp

but you can do much more interesting things, imagine you want to do
common subexpression elimination on your whole program, using a hash
table of subterms to identify when the same thing is calculated more
than once. You could do something like

> newtype FixHash e = FixHash (e Int)

notice our recursive parameter is just an 'Int' this will be the index
into the table of the given subexpresion. You can write a wholely geneic
CSE pass that does not even know about the structure of your terms!

for more advanced things like a fully generic unification, see the
following paper. In addition to the two-level types trick, the paper
talks about parameterized classes, though I wouldn't recommend them so
much, a useful trick sure, but not really essential for this task. the
two level type stuff is golden though.

unify:
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.20.8205

I have attached a utility module I use for two level types, feel free to
modify it for your needs.

        John

--
John Meacham - ⑆repetae.net⑆john⑈ - http://notanumber.net/
-------------- next part --------------

{-# OPTIONS_GHC -XMultiParamTypeClasses -XFlexibleContexts -XUndecidableInstances #-}
module Fix where

import Control.Applicative
import Control.Monad
import Data.Monoid
import qualified Data.Foldable as F
import qualified Data.Traversable as F

-- The basic 'Fix' type, It creates a simple recursive type.

newtype Fix f = F (f (Fix f))

instance Show (f (Fix f)) => Show (Fix f) where
    showsPrec n (F x) = showsPrec n x

instance Eq (f (Fix f)) => Eq (Fix f) where
    F x == F y = x == y

instance Ord (f (Fix f)) => Ord (Fix f) where
    F x `compare` F y = x `compare` y

foldFix :: Functor f => (f w -> w) -> Fix f -> w
foldFix f (F ji) =  f (fmap (foldFix f) ji)

foldFixM :: (Monad m,F.Traversable f) => (f w -> m w) -> Fix f -> m w
foldFixM f (F ji) =  f =<< (F.mapM (foldFixM f) ji)

foldFixM' :: (Monad m,F.Traversable f) => (Fix f -> m (Fix f)) -> (f w -> m w) -> Fix f -> m w
foldFixM' fd f x = do
    F x <- fd x
    f =<< (F.mapM (foldFixM' fd f) x)

-- A recursive type that attaches some memoized data to each subterm

data FixM f a = FM a (f (FixM f a))

instance Functor f => Functor (FixM f) where
    fmap f (FM x y) = FM (f x) (fmap (fmap f) y)

instance F.Foldable f => F.Foldable (FixM f) where
    foldMap f (FM x y) = f x `mappend` F.foldMap (F.foldMap f) y

instance (Functor f, F.Traversable f) => F.Traversable (FixM f) where
    traverse f (FM x y) = FM <$> f x <*> (F.traverse (F.traverse f) y)

--instance Eq (f (FixM f a)) => Eq (FixM f a) where
--    FM _ x == FM _ y = x == y

--instance Ord (f (FixM f a)) => Ord (FixM f a) where
--    FM _ x `compare` FM _ y = x `compare` y

fixMemo :: FixM f a -> a
fixMemo (FM a _) = a

fromFixMemo :: FixM f a -> (a,f (FixM f a))
fromFixMemo (FM a x) = (a,x)

toFixMemo ::  (f (FixM f a) -> a) -> f (FixM f a) -> FixM f a
toFixMemo f x = FM (f x) x

fixDeMemoize :: Functor f => FixM f a -> Fix f
fixDeMemoize ja = f ja where
    f (FM _ j) = F (fmap f j)

fixMemoize :: Functor f => (f (FixM f a) -> a) -> Fix f -> FixM f a
fixMemoize f (F ji) =  foldFix f' (F ji) where
    f' x = FM (f x) x

-- relys on laziness
fixMemoizeKnot :: Functor f => (c -> f (FixM f a) -> (c,a)) -> c -> Fix f -> FixM f a
fixMemoizeKnot f c fji = g c fji where
    g c (F ji) = FM a nji where
        (c',a) = f c nji
        nji = fmap (g c') ji

fixMemoizeM :: (Monad m,F.Traversable f) => (f (FixM f a) -> m a) -> Fix f -> m (FixM f a)
fixMemoizeM f (F ji) =  foldFixM f' (F ji) where
    f' x = do fx <- f x; return $ FM fx x

-- like fixMemoize, but lets you examine nodes on the way down as well as annotate them on the way up
fixMemoizeM' :: (Monad m,F.Traversable f) => (Fix f -> m (Fix f)) -> (f (FixM f a) -> m a) -> Fix f -> m (FixM f a)
fixMemoizeM' fd f x =  foldFixM' fd f' x where
    f' x = do fx <- f x; return $ FM fx x

-- hash cons

--hashCons :: Ord (f Int) => Fix f -> [f Int]
--hashCons x = runState foldFix f x where

class SelfFunctor a where
    sfmap :: (a -> a) -> a -> a
    sfmapM :: Monad m => (a -> m a) -> a -> m a

class HasMemo a where
    type Memo a
    memo :: a -> Memo a

class HasContents a where
    type Contents a
    open :: a -> Contents a

instance HasContents (Fix t) where
    type Contents (Fix t) = t (Fix t)
    open (F x) = x

instance HasContents (FixM t a) where
    type Contents (FixM  t a) = t (FixM t a)
    open (FM _  x) = x

instance HasMemo (FixM t a) where
    type Memo (FixM t a) = a
    memo (FM a _) = a