[Git][ghc/ghc][wip/abs-den] Write a denotationaler interpreter for Core

Sebastian Graf (@sgraf812) gitlab at gitlab.haskell.org
Sun Jan 7 14:44:26 UTC 2024



Sebastian Graf pushed to branch wip/abs-den at Glasgow Haskell Compiler / GHC


Commits:
b9d54222 by Sebastian Graf at 2024-01-07T15:44:17+01:00
Write a denotationaler interpreter for Core

- - - - -


4 changed files:

- compiler/GHC/Builtin/Uniques.hs
- + compiler/GHC/Core/Semantics.hs
- compiler/ghc.cabal.in
- + ghdi.hs


Changes:

=====================================
compiler/GHC/Builtin/Uniques.hs
=====================================
@@ -51,6 +51,9 @@ module GHC.Builtin.Uniques
       -- Boxing data types
     , mkBoxingTyConUnique, boxingDataConUnique
 
+      -- Denotational interpreter 'GHC.Core.Semantics.eval'
+    , mkTempDataConArgUnique
+
     ) where
 
 import GHC.Prelude
@@ -326,7 +329,8 @@ Allocation of unique supply characters:
         j       constraint tuple superclass selectors
         k       constraint tuple tycons
         m       constraint tuple datacons
-        n       Native/LLVM codegen
+        n       Native/LLVM codegen, as well as GHC.Core.Semantics / demand analysis
+                (NB: The lifetimes of those uniques do not overlap)
         r       Hsc name cache
         s       simplifier
         u       Cmm pipeline
@@ -445,3 +449,8 @@ mkBoxingTyConUnique i = mkUniqueInt 'b' (5*i)
 
 boxingDataConUnique :: Unique -> Unique
 boxingDataConUnique u = stepUnique u 2
+
+-- | Make a temporary unique for a DataCon worker PAP, where we know exactly the
+-- scope of said unique. Used in 'GHC.Core.Semantics.eval'.
+mkTempDataConArgUnique :: Int -> Unique
+mkTempDataConArgUnique i = mkUniqueInt 'n' i


=====================================
compiler/GHC/Core/Semantics.hs
=====================================
@@ -0,0 +1,247 @@
+{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+-- {-# OPTIONS_GHC -fdefer-type-errors #-}
+-- {-# OPTIONS_GHC -Wwarn #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE QuantifiedConstraints #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+
+module GHC.Core.Semantics where
+
+import GHC.Prelude
+
+import GHC.Builtin.Names
+import GHC.Builtin.Uniques
+
+import GHC.Core
+import GHC.Core.Coercion
+import GHC.Core.DataCon
+
+import qualified GHC.Data.Word64Map as WM
+
+import GHC.Types.Literal
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.Var.Env
+import GHC.Types.Unique.FM
+import GHC.Types.Unique.Set
+
+import GHC.Utils.Misc
+import GHC.Utils.Outputable
+
+import Control.Monad
+import Control.Monad.Trans.State
+import Data.Word
+import GHC.Core.Utils hiding (findAlt)
+import GHC.Core.Type
+import GHC.Builtin.PrimOps
+
+data Event = Lookup Name | LookupArg CoreExpr | Update | App1 | App2 | Case1 | Case2 | Let1
+
+class Trace d where
+  step :: Event -> d -> d
+
+-- A slight extension of the Domain type from the paper.
+-- Note that the 'Name's bear no semantic significance: The `Domain (D τ)`
+-- instance simply ignores them. They are useful for analyses, however.
+class Domain d where
+  stuck :: d
+  erased :: d -- Think of it like coercionToken#
+  lit :: Literal -> d
+  primOp :: PrimOp -> d
+  fun :: Name -> (d -> d) -> d
+  con :: DataCon -> [d] -> d
+  apply :: d -> d -> d
+  select :: d -> Name -> [DAlt d] -> d
+type DAlt d = (AltCon, [Name], d -> [d] -> d)
+
+data BindHint = BindArg | BindNonRec Name | BindRec [Name]
+class HasBind d where
+  bind :: BindHint -> [[d] -> d] -> ([d] -> d) -> d
+    -- NB: The `BindHint` bears no semantic sigificance:
+    --     `HasBind (D (ByNeed T))` does not look at it.
+    --     Still useful for analyses!
+
+seq_ :: Domain d => d -> d -> d
+seq_ a b = select a wildCardName [(DEFAULT, [], \_a _ds -> b)]
+
+anfise :: (Trace d, Domain d, HasBind d) => CoreExpr -> IdEnv d -> (d -> d) -> d
+anfise (Lit l) _ k = k (lit l)
+anfise (Var x) env k | Just d <- lookupVarEnv env x = k d
+                     | otherwise = stuck
+anfise (Coercion co) env k = -- co is unlifted and will evaluate to coercionToken#
+  foldr (\x -> seq_ (eval (Var x) env)) (k erased) (NonDetUniqFM $ getUniqSet $ coVarsOfCo co)
+anfise (Type _ty) _ k = k erased
+anfise (Tick _t e) env k = anfise e env k
+anfise (Cast e _co) env k = anfise e env k
+anfise e env k = bind BindArg [const (step (LookupArg e) (eval e env))]
+                 (\ds -> if isUnliftedType (exprType e)
+                           then only ds `seq_` k (only ds)
+                           else k (only ds))
+
+anfiseMany :: (Trace d, Domain d, HasBind d) => [CoreExpr] -> IdEnv d -> ([d] -> d) -> d
+anfiseMany es env k = go es []
+  where
+    go [] ds = k (reverse ds)
+    go (e:es) ds = anfise e env $ \d -> go es (d:ds)
+
+eval :: (Trace d, Domain d, HasBind d) => CoreExpr -> IdEnv d -> d
+eval (Coercion co) env = anfise (Coercion co) env id
+eval (Type _ty) _ = erased
+eval (Lit l) _ = lit l
+eval (Tick _t e) env = eval e env
+eval (Cast e _co) env = eval e env
+eval (Var x) env
+  | Just dc <- isDataConWorkId_maybe x = con dc [] -- TODO
+  | Just op <- isPrimOpId_maybe x = primOp op
+  | isDataConWrapId x = eval (unfoldingTemplate (idUnfolding x)) emptyVarEnv
+  | Just d <- lookupVarEnv env x = d
+  | otherwise = stuck -- Scoping error. Actually ruled out by the Core type system
+eval (Lam x e) env = fun (idName x) (\d -> step App2 (eval e (extendVarEnv env x d)))
+eval e at App{} env
+  | Var v <- f, Just dc <- isDataConWorkId_maybe v
+  = anfiseMany as env $ \ds -> case compare (dataConRepArity dc) (length ds) of
+      EQ -> con dc ds
+      GT -> stuck                                                      -- oversaturated  => stuck
+      LT -> expand [] (take (length ds - dataConRepArity dc) papNames) -- undersaturated => PAP
+        where
+          expand etas []     = con dc (ds ++ reverse etas)
+          expand etas (x:xs) = fun x (\d -> expand (d:etas) xs)
+  | otherwise
+  = go (eval f env) as
+  where
+    (f, as) = collectArgs e
+    go df [] = df
+    go df (a:as) = go (anfise a env (step App1 . apply df)) as
+eval (Let (NonRec x rhs) body) env =
+  bind (BindNonRec (idName x))
+       [const (step (Lookup (idName x)) (eval rhs env))]
+       (\ds -> step Let1                (eval body (extendVarEnv env x (only ds))))
+eval (Let (Rec binds) body) env =
+  bind (BindRec (map idName xs))
+       [\ds -> step (Lookup (idName x)) (eval rhs  (new_env ds))  | (x,rhs) <- binds]
+       (\ds -> step Let1                (eval body (new_env ds)))
+  where
+    xs = map fst binds
+    new_env ds = extendVarEnvList env (zip xs ds)
+eval (Case e b _ty alts) env = step Case1 $
+  select (eval e env) (idName b)
+         [ (con, map idName xs, cont xs rhs) | Alt con xs rhs <- alts ]
+  where
+    cont xs rhs scrut ds = step Case2 $ eval rhs (extendVarEnvList env (zipEqual "eval Case{}" (b:xs) (scrut:ds)))
+
+x1,x2 :: Name
+papNames :: [Name]
+papNames@(x1:x2:_) = [ mkSystemName (mkTempDataConArgUnique i) (mkVarOcc "pap") | i <- [0..] ]
+
+
+-- By-need semantics, from the paper
+
+data T v = Step Event (T v) | Ret v
+  deriving Functor
+instance Applicative T where pure = Ret; (<*>) = ap
+instance Monad T where Ret a >>= f = f a; Step ev t >>= f = Step ev (t >>= f)
+instance Trace (T v) where step = Step
+
+type D τ = τ (Value τ)
+data Value τ
+  = Stuck
+  | Erased
+  | Litt Literal
+  | Fun (D τ -> D τ)
+  | Con DataCon [D τ]
+
+instance (Trace (D τ), Monad τ) => Domain (D τ) where
+  stuck = return Stuck
+  erased = return Erased
+  lit l = return (Litt l)
+  fun _x f = return (Fun f)
+  con k ds = return (Con k ds)
+  apply d a = d >>= \case Fun f -> f a; _ -> stuck
+  select d _b fs = d >>= \v -> case v of
+    Stuck                                                    -> stuck
+    Con k ds | Just (_con, _xs, f) <- findAlt (DataAlt k) fs -> f (return v) ds
+    Litt l   | Just (_con, _xs, f) <- findAlt (LitAlt l)  fs -> f (return v) []
+    _        | Just (_con, _xs, f) <- findAlt DEFAULT     fs -> f (return v) []
+    _                                                        -> stuck
+  primOp op = case op of
+    IntAddOp -> intop (+)
+    IntMulOp -> intop (*)
+    IntRemOp -> intop rem
+    _        -> stuck
+    where
+      intop op = binop (\v1 v2 -> case (v1,v2) of (Litt (LitNumber LitNumInt i1), Litt (LitNumber LitNumInt i2)) -> Litt (LitNumber LitNumInt (i1 `op` i2)); _ -> Stuck)
+      binop f = fun x1 $ \d1 -> step App2 $ fun x2 $ \d2 -> step App2 $ f <$> d1 <*> d2
+
+-- The following function was copy and pasted from GHC.Core.Utils.findAlt:
+findAlt :: AltCon -> [DAlt d] -> Maybe (DAlt d)
+    -- A "Nothing" result *is* legitimate
+    -- See Note [Unreachable code]
+findAlt con alts
+  = case alts of
+        (deflt@(DEFAULT, _, _):alts) -> go alts (Just deflt)
+        _                            -> go alts Nothing
+  where
+    go []                        deflt = deflt
+    go (alt@(con1, _, _) : alts) deflt
+      = case con `cmpAltCon` con1 of
+          LT -> deflt   -- Missed it already; the alts are in increasing order
+          EQ -> Just alt
+          GT -> go alts deflt
+
+-- By-need semantics, straight from the paper
+
+type Addr = Word64
+type Heap τ = WM.Word64Map (D τ)
+newtype ByNeed τ v = ByNeed { runByNeed :: StateT (Heap (ByNeed τ)) τ v }
+  deriving (Functor, Applicative, Monad)
+
+instance (forall v. Trace (τ v)) => Trace (ByNeed τ v) where
+  step ev (ByNeed (StateT m)) = ByNeed $ StateT $ step ev . m
+
+fetch :: Monad τ => Addr -> D (ByNeed τ)
+fetch a = ByNeed get >>= \μ -> μ WM.! a
+
+memo :: forall τ. (Monad τ, forall v. Trace (τ v)) => Addr -> D (ByNeed τ) -> D (ByNeed τ)
+memo a d = d >>= ByNeed . StateT . upd
+  where upd Stuck μ = return (Stuck :: Value (ByNeed τ), μ)
+        upd v     μ = step Update (return (v, WM.insert a (memo a (return v)) μ))
+
+freeList :: Heap τ -> [Addr]
+freeList μ = [a..]
+  where a = case WM.lookupMax μ of Just (a,_) -> a+1; _ -> 0
+
+instance (Monad τ, forall v. Trace (τ v)) => HasBind (D (ByNeed τ)) where
+  bind _hint rhss body = do
+    as <- take (length rhss) . freeList <$> ByNeed get
+    let ds = map fetch as
+    ByNeed $ modify (\μ -> foldr (\(a,rhs) -> WM.insert a (memo a (rhs ds))) μ (zip as rhss))
+    body ds
+
+evalByNeed :: CoreExpr -> T (Value (ByNeed T), Heap (ByNeed T))
+evalByNeed e = runStateT (runByNeed (eval e emptyVarEnv)) WM.empty
+
+-- Boilerplate
+instance Outputable Event where
+  ppr (Lookup n) = text "Lookup" <> parens (ppr n)
+  ppr (LookupArg e) = text "LookupArg" <> parens (ppr e)
+  ppr Update = text "Update"
+  ppr App1 = text "App1"
+  ppr App2 = text "App2"
+  ppr Case1 = text "Case1"
+  ppr Case2 = text "Case2"
+  ppr Let1 = text "Let1"
+instance Outputable v => Outputable (T v) where
+  ppr (Step ev τ) = ppr ev <> arrow <> ppr τ
+  ppr (Ret v) = char '<' <> ppr v <> char '>'
+instance Outputable (Value τ) where
+  ppr Stuck = text "stuck"
+  ppr Erased = char '_'
+  ppr (Litt l) = ppr l
+  ppr (Fun _f) = text "Fun"
+  ppr (Con dc _ds) = ppr dc
+instance Outputable (Heap τ) where
+  ppr μ = brackets (pprWithCommas (\(a,_) -> ppr a <> char '↦' <> underscore) (WM.toList μ))


=====================================
compiler/ghc.cabal.in
=====================================
@@ -374,6 +374,7 @@ Library
         GHC.Core.Opt.WorkWrap.Utils
         GHC.Core.PatSyn
         GHC.Core.Ppr
+        GHC.Core.Semantics
         GHC.Types.TyThing.Ppr
         GHC.Core.Predicate
         GHC.Core.Reduction


=====================================
ghdi.hs
=====================================
@@ -0,0 +1,86 @@
+-- Import necessary modules
+import GHC
+import GHC.Driver.Config.Parser
+import GHC.Driver.Env.Types
+import GHC.Driver.Session
+import GHC.Utils.Outputable
+import GHC.Unit.Types
+import GHC.Unit.Module.ModGuts
+import GHC.Data.StringBuffer
+import GHC.Data.FastString
+import qualified GHC.Parser.Lexer as L
+import qualified GHC.Parser as P
+import GHC.Types.SrcLoc
+import GHC.Core
+import Control.Monad
+import Control.Monad.IO.Class
+import System.IO
+import System.Environment
+import System.Directory
+import System.FilePath
+import Data.List
+import GHC.Types.Name
+import GHC.Core.Semantics
+import qualified GHC.LanguageExtensions as LangExt
+
+import System.Console.Haskeline
+
+--desugarFile :: GhcMonad m => FilePath -> m ModGuts
+--desugarFile f = do
+--  target <- guessTarget f Nothing Nothing
+--  setTargets [ target ]
+--  _successFlag <- load LoadAllTargets
+--  mg <- hsc_mod_graph <$> getSession
+--  let [ms] = mgModSummaries mg
+--  m <- desugarModule =<< typecheckModule =<< parseModule ms
+--  return (coreModule m)
+
+indent :: Int -> String -> String
+indent n = unlines . map (\s -> replicate n ' ' ++ s) . lines
+
+processExpression :: String -> String -> IO CoreExpr
+processExpression libdir expression = do
+  tmp <- getTemporaryDirectory
+  let file = tmp </> "_interactive_.hs"
+  writeFile file ("module Interactive where import GHC.Exts; it = " ++ indent 2 expression)
+  -- Initialize GHC session
+  defaultErrorHandler defaultFatalMessager defaultFlushOut $ do
+    runGhc (Just libdir) $ do
+      -- Set up GHC session
+      dflags <- getSessionDynFlags
+      setSessionDynFlags $
+        -- flip gopt_unset Opt_FullLaziness $
+        -- flip gopt_unset Opt_WorkerWrapper $
+        -- updOptLevel 1 $
+        flip gopt_unset Opt_LocalFloatOutTopLevel $
+        flip gopt_set Opt_Specialise $
+        flip gopt_unset Opt_IgnoreInterfacePragmas $
+        flip xopt_set LangExt.MagicHash $
+        dflags
+      mod_guts <- compileToCoreSimplified file
+      let binds = cm_binds mod_guts
+      let Just (NonRec _ e) = find (\b -> case b of NonRec x e -> getOccString x == "it"; _ -> False) binds
+      return e
+
+-- Main function to handle command-line arguments
+main :: IO ()
+main = do
+  args <- getArgs
+  tmp <- getTemporaryDirectory
+  let settings = defaultSettings { historyFile = Just (tmp </> ".ghdi.hist") }
+  case args of
+    [libdir] -> runInputT settings (loop libdir)
+    _        -> putStrLn "Usage: `ghdi <libdir>`, for example `ghdi $(ghc --print-libdir)`"
+
+loop :: FilePath -> InputT IO ()
+loop libdir = do
+  minput <- getInputLine "prompt> "
+  case minput of
+    Nothing      -> return ()
+    Just ":quit" -> return ()
+    Just input   -> do
+      e <- liftIO $ processExpression libdir input
+      outputStrLn (showSDocUnsafe (hang (text "Above expression as (optimised) Core:") 2 (ppr e)))
+      outputStrLn "Trace of denotational interpreter:"
+      outputStrLn (showSDocOneLine defaultSDocContext (hang empty 2 (ppr (evalByNeed e))))
+      loop libdir



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/b9d54222feb90f9ccdf63d2b89cd7bd582756bfb

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/b9d54222feb90f9ccdf63d2b89cd7bd582756bfb
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240107/2e134ccb/attachment-0001.html>


More information about the ghc-commits mailing list