[Git][ghc/ghc][wip/romes/eqsat-pmc] Imrove term representation in e-graph a lot... and code complexity too, it's almost reasonable

Tue Jul 18 01:09:49 UTC 2023

Rodrigo Mesquita pushed to branch wip/romes/eqsat-pmc at Glasgow Haskell Compiler / GHC


Commits:
8adaad48 by Rodrigo Mesquita at 2023-07-18T02:09:34+01:00
Imrove term representation in e-graph a lot... and code complexity too, it's almost reasonable

- - - - -


3 changed files:

- compiler/GHC/Core/Equality.hs
- compiler/GHC/HsToCore/Pmc/Solver.hs
- compiler/GHC/HsToCore/Pmc/Solver/Types.hs


Changes:

=====================================
compiler/GHC/Core/Equality.hs
=====================================
@@ -1,8 +1,7 @@
-{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE DeriveTraversable #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE ViewPatterns #-}
-{-# LANGUAGE StandaloneDeriving #-}
 {-# LANGUAGE FlexibleContexts #-}
 
 module GHC.Core.Equality where
@@ -12,28 +11,24 @@ module GHC.Core.Equality where
 -- than this for equality.
 -- E.g. representation could transform the CoreExpr to an actual debruijnized one (with Ints for Vars)
 
-import GHC.Exts (dataToTag#, tagToEnum#, (>#), (<#))
 import GHC.Prelude
 
 import GHC.Core
 import GHC.Core.TyCo.Rep
 import GHC.Core.Map.Type
-import GHC.Core.Map.Expr
 import GHC.Types.Var
 import GHC.Types.Literal
 import GHC.Types.Tickish
-import Unsafe.Coerce (unsafeCoerce)
 
-import Control.Monad.Trans.State.Strict (state)
 import Data.Equality.Graph as EG
 import Data.Equality.Analysis
 import qualified Data.Equality.Graph.Monad as EGM
-import Data.Equality.Utils (Fix(..))
-
-import GHC.Utils.Misc (all2)
 import GHC.Utils.Outputable
 import GHC.Core.Coercion (coercionType)
 
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.Reader
+
 -- Important to note the binders are also represented by $a$
 -- This is because in the e-graph we will represent binders with the
 -- equivalence class id of things equivalent to it.
@@ -41,300 +36,386 @@ import GHC.Core.Coercion (coercionType)
 -- Unfortunately type binders are still not correctly accounted for.
 -- Perhaps it'd really be better to make DeBruijn work over these types
 
-data AltF b a
-    = AltF AltCon [b] a
-    deriving (Functor, Foldable, Traversable)
+-- In the pattern match checker, expressions will always be kind of shallow.
+-- In practice, no-one writes gigantic lambda expressions
+
+data AltF a
+    = AltF AltCon [()] a
+    deriving (Functor, Foldable, Traversable, Eq, Ord)
 
-data BindF b a
-  = NonRecF b a
-  | RecF [(b, a)]
-  deriving (Functor, Foldable, Traversable)
+data BindF a
+  = NonRecF a
+  | RecF [a]
+  deriving (Functor, Foldable, Traversable, Eq, Ord, Show)
 
-data ExprF b a
-  = VarF Id
+type BoundVar = Int
+data ExprF a
+  = VarF BoundVar
+  | FreeVarF Id
   | LitF Literal
   | AppF a a
-  | LamF b a
-  | LetF (BindF b a) a
-  | CaseF a b Type [AltF b a]
-
-  | CastF a CoercionR
-  | TickF CoreTickish a
-  | TypeF Type
-  | CoercionF Coercion
-  deriving (Functor, Foldable, Traversable)
+  | LamF a
+  | LetF (BindF a) a
+  | CaseF a [AltF a] -- can we drop the case type for expr equality? we don't need them back, we just need to check equality. (perhaps this specialization makes this more suitable in the Pmc namespace)
+
+  -- | CastF a (DeBruijn CoercionR) -- can we drop this
+  -- | TickF CoreTickish a          -- this, when representing expressions for equality?
+  -- but it'll be pretty hard to trigger any bug related to equality matching wrt coercions and casts on view patterns solely
+  | TypeF DBType
+  | CoercionF DBCoercion
+  deriving (Functor, Foldable, Traversable, Eq, Ord)
+
+newtype DBType = DBT (DeBruijn Type) deriving Eq
+instance Ord DBType where
+  compare (DBT dt) (DBT dt') = cmpDeBruijnType dt dt'
+newtype DBCoercion = DBC (DeBruijn Coercion) deriving Eq
+instance Ord DBCoercion where
+  compare (DBC dt) (DBC dt') = cmpDeBruijnCoercion dt dt'
+
+
+-- this makes perfect sense, if we already have to represent this in the e-graph
+-- we might as well make it a better suited representation for the e-graph...
+-- keeping the on-fly debruijn makes no sense
+representCoreExprEgr :: forall a
+                   . Analysis a CoreExprF
+                  => CoreExpr
+                  -> EGraph a CoreExprF
+                  -> (ClassId, EGraph a CoreExprF)
+representCoreExprEgr expr egr = EGM.runEGraphM egr (runReaderT (go expr) emptyCME) where
+  go :: CoreExpr -> ReaderT CmEnv (EGM.EGraphM a CoreExprF) ClassId
+  go = \case
+    Var v -> do
+      env <- ask
+      case lookupCME env v of
+        Nothing -> addE (FreeVarF v)
+        Just i  -> addE (VarF i)
+    Lit lit -> addE (LitF lit)
+    Type t  -> addE (TypeF (DBT $ deBruijnize t))
+    Coercion c -> addE (CoercionF (DBC $ deBruijnize c))
+    Tick _ e -> go e -- bypass ticks!
+    Cast e _ -> go e -- bypass casts! ouch?
+    App f a -> do
+      f' <- go f
+      a' <- go a
+      addE (AppF f' a')
+    Lam b e -> do
+      e' <- local (`extendCME` b) $ go e
+      addE (LamF e')
+    Let (NonRec v r) e -> do
+      r' <- go r
+      e' <- local (`extendCME` v) $ go e
+      addE (LetF (NonRecF r') e')
+    Let (Rec (unzip -> (bs,rs))) e -> do
+      rs' <- traverse (local (`extendCMEs` bs) . go) rs
+      e'  <- local (`extendCMEs` bs) $ go e
+      addE (LetF (RecF rs') e')
+    Case e b _t as -> do
+      e' <- go e
+      as' <- traverse (local (`extendCME` b) . goAlt) as
+      addE (CaseF e' as')
+
+  goAlt :: CoreAlt -> ReaderT CmEnv (EGM.EGraphM a CoreExprF) (CoreAltF ClassId)
+  goAlt (Alt c bs e) = do
+    e' <- local (`extendCMEs` bs) $ go e
+    return (AltF c (map (const ()) bs) e')
+
+  addE :: Analysis a CoreExprF => CoreExprF ClassId -> ReaderT CmEnv (EGM.EGraphM a CoreExprF) ClassId
+  addE e = lift $ EGM.add $ Node e
+--  Var v   -> add (Node $ DF (D cmenv (VarF v)))   eg0
+--  Lit lit -> add (Node $ DF (D cmenv (LitF lit))) eg0
+--  Type t  -> add (Node $ DF (D cmenv (TypeF t)))  eg0
+--  Coercion c -> add (Node $ DF (D cmenv (CoercionF c))) eg0
+--  Cast e co  -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
+--                 in add (Node $ DF (D cmenv (CastF eid co))) eg1
+--  App f a -> let (fid,eg1) = representDBCoreExpr (D cmenv f) eg0
+--                 (aid,eg2) = representDBCoreExpr (D cmenv a) eg1
+--              in add (Node $ DF (D cmenv (AppF fid aid))) eg2
+--  Tick n e -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
+--               in add (Node $ DF (D cmenv (TickF n eid))) eg1
+--  Lam b e  -> let (eid,eg1) = representDBCoreExpr (D (extendCME cmenv b) e) eg0
+--               in add (Node $ DF (D cmenv (LamF b eid))) eg1
+--  Let (NonRec v r) e -> let (rid,eg1) = representDBCoreExpr (D cmenv r) eg0
+--                            (eid,eg2) = representDBCoreExpr (D (extendCME cmenv v) e) eg1
+--                         in add (Node $ DF (D cmenv (LetF (NonRecF v rid) eid))) eg2
+--  Let (Rec (unzip -> (bs,rs))) e ->
+--    let cmenv' = extendCMEs cmenv bs
+--        (bsids, eg1) = EGM.runEGraphM eg0 $
+--                         traverse (state . representDBCoreExpr . D cmenv') rs
+--        (eid, eg2)  = representDBCoreExpr (D cmenv' e) eg1
+--     in add (Node $ DF (D cmenv (LetF (RecF (zip bs bsids)) eid))) eg2
+--  Case e b t as -> let (eid, eg1)  = representDBCoreExpr (D cmenv e) eg0
+--                       (as', eg2) = EGM.runEGraphM eg1 $
+--                         traverse (state . representDBAltExpr . D (extendCME cmenv b)) as
+--                    in add (Node $ DF (D cmenv (CaseF eid b t as'))) eg2
 
 type CoreExprF
-  = ExprF CoreBndr
+  = ExprF
 type CoreAltF
-  = AltF CoreBndr
+  = AltF
 type CoreBindF
-  = BindF CoreBndr
-
-newtype DeBruijnF f a = DF (DeBruijn (f a))
-  deriving (Functor, Foldable, Traversable)
-
-eqDeBruijnExprF :: forall a. Eq a => DeBruijn (CoreExprF a) -> DeBruijn (CoreExprF a) -> Bool
-eqDeBruijnExprF (D env1 e1) (D env2 e2) = go e1 e2 where
-    go :: CoreExprF a -> CoreExprF a -> Bool
-    go (VarF v1) (VarF v2)             = eqDeBruijnVar (D env1 v1) (D env2 v2)
-    go (LitF lit1)    (LitF lit2)      = lit1 == lit2
-    go (TypeF t1)    (TypeF t2)        = eqDeBruijnType (D env1 t1) (D env2 t2)
-    -- See Note [Alpha-equality for Coercion arguments]
-    go (CoercionF {}) (CoercionF {}) = True
-    go (CastF e1 co1) (CastF e2 co2) = D env1 co1 == D env2 co2 && e1 == e2
-    go (AppF f1 a1)   (AppF f2 a2)   = f1 == f2 && a1 == a2
-    go (TickF n1 e1) (TickF n2 e2)
-      =  eqDeBruijnTickish (D env1 n1) (D env2 n2)
-      && e1 == e2
-
-    go (LamF b1 e1)  (LamF b2 e2)
-      =  eqDeBruijnType (D env1 (varType b1)) (D env2 (varType b2))
-      && D env1 (varMultMaybe b1) == D env2 (varMultMaybe b2)
-      && e1 == e2
-
-    go (LetF abs e1) (LetF bbs e2)
-      = D env1 abs == D env2 bbs
-      && e1 == e2
-
-    go (CaseF e1 _b1 t1 a1) (CaseF e2 _b2 t2 a2)
-      | null a1   -- See Note [Empty case alternatives]
-      = null a2 && e1 == e2 && D env1 t1 == D env2 t2
-      | otherwise
-      = e1 == e2 && D env1 a1 == D env2 a2
-
-    go _ _ = False
-

--- ROMES:TODO: This one can be derived automatically, but perhaps it's better
--- to be explicit here? We don't even really require the DeBruijn context here
-eqDeBruijnAltF :: forall a. Eq a => DeBruijn (CoreAltF a) -> DeBruijn (CoreAltF a) -> Bool
-eqDeBruijnAltF (D _env1 a1) (D _env2 a2) = go a1 a2 where
-    go (AltF DEFAULT _ rhs1) (AltF DEFAULT _ rhs2)
-        = rhs1 == rhs2
-    go (AltF (LitAlt lit1) _ rhs1) (AltF (LitAlt lit2) _ rhs2)
-        = lit1 == lit2 && rhs1 == rhs2
-    go (AltF (DataAlt dc1) _bs1 rhs1) (AltF (DataAlt dc2) _bs2 rhs2)
-        = dc1 == dc2 &&
-          rhs1 == rhs2 -- the CM environments were extended on representation (see 'representDBAltExpr')
-    go _ _ = False
-
--- | 'unsafeCoerce' mostly because I'm too lazy to write the boilerplate.
-fromCoreExpr :: CoreExpr -> Fix CoreExprF
-fromCoreExpr = unsafeCoerce
-
-toCoreExpr :: CoreExpr -> Fix CoreExprF
-toCoreExpr = unsafeCoerce
-
--- | Represents a DeBruijn CoreExpr being careful to correctly debruijnizie the expression as it is represented
---
--- Always represent Ids, at least for now. We're seemingly using inexistent ids
--- ROMES:TODO: do this all inside EGraphM instead
-representDBCoreExpr :: Analysis a (DeBruijnF CoreExprF)
-                    => DeBruijn CoreExpr
-                    -> EGraph a (DeBruijnF CoreExprF)
-                    -> (ClassId, EGraph a (DeBruijnF CoreExprF))
-representDBCoreExpr (D cmenv expr) eg0 = case expr of
-  Var v   -> add (Node $ DF (D cmenv (VarF v)))   eg0
-  Lit lit -> add (Node $ DF (D cmenv (LitF lit))) eg0
-  Type t  -> add (Node $ DF (D cmenv (TypeF t)))  eg0
-  Coercion c -> add (Node $ DF (D cmenv (CoercionF c))) eg0
-  Cast e co  -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
-                 in add (Node $ DF (D cmenv (CastF eid co))) eg1
-  App f a -> let (fid,eg1) = representDBCoreExpr (D cmenv f) eg0
-                 (aid,eg2) = representDBCoreExpr (D cmenv a) eg1
-              in add (Node $ DF (D cmenv (AppF fid aid))) eg2
-  Tick n e -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
-               in add (Node $ DF (D cmenv (TickF n eid))) eg1
-  Lam b e  -> let (eid,eg1) = representDBCoreExpr (D (extendCME cmenv b) e) eg0
-               in add (Node $ DF (D cmenv (LamF b eid))) eg1
-  Let (NonRec v r) e -> let (rid,eg1) = representDBCoreExpr (D cmenv r) eg0
-                            (eid,eg2) = representDBCoreExpr (D (extendCME cmenv v) e) eg1
-                         in add (Node $ DF (D cmenv (LetF (NonRecF v rid) eid))) eg2
-  Let (Rec (unzip -> (bs,rs))) e ->
-    let cmenv' = extendCMEs cmenv bs
-        (bsids, eg1) = EGM.runEGraphM eg0 $
-                         traverse (state . representDBCoreExpr . D cmenv') rs
-        (eid, eg2)  = representDBCoreExpr (D cmenv' e) eg1
-     in add (Node $ DF (D cmenv (LetF (RecF (zip bs bsids)) eid))) eg2
-  Case e b t as -> let (eid, eg1)  = representDBCoreExpr (D cmenv e) eg0
-                       (as', eg2) = EGM.runEGraphM eg1 $
-                         traverse (state . representDBAltExpr . D (extendCME cmenv b)) as
-                    in add (Node $ DF (D cmenv (CaseF eid b t as'))) eg2
-
-representDBAltExpr :: Analysis a (DeBruijnF CoreExprF)
-                   => DeBruijn CoreAlt
-                   -> EGraph a (DeBruijnF CoreExprF)
-                   -> (CoreAltF ClassId, EGraph a (DeBruijnF CoreExprF))
-representDBAltExpr (D cm (Alt cons bs a)) eg0 =
-  let (ai, eg1) = representDBCoreExpr (D (extendCMEs cm bs) a) eg0
-   in (AltF cons bs ai, eg1)
-
-instance Eq a => Eq (DeBruijn (CoreAltF a)) where
-  (==) = eqDeBruijnAltF
-
-instance Eq a => Eq (DeBruijn (CoreExprF a)) where
-  (==) = eqDeBruijnExprF
-
-instance Eq a => Eq (DeBruijnF CoreExprF a) where
-  (==) (DF a) (DF b) = eqDeBruijnExprF a b
-
-instance Eq a => Eq (DeBruijnF CoreAltF a) where
-  (==) (DF a) (DF b) = eqDeBruijnAltF a b
-
-deriving instance Ord a => Ord (DeBruijnF CoreExprF a)
-
-instance Ord a => Ord (DeBruijn (CoreExprF a)) where
-  -- We must assume that if `a` is DeBruijn expression, it is already correctly "extended" because 'representDBCoreExpr' ensures that.
-  -- RM:TODO: We don't yet compare the CmEnv at any point. Should we?
-  -- RM: I don't think so, the CmEnv is used to determine whether bound variables are equal, but they don't otherwise influence the result.
-  -- Or rather, if the subexpression with variables is equal, then the CmEnv is necessarily equal too?
-  -- So I think that just works...
-  -- Wait, in that sense, couldn't we find a way to derive ord? the important part being that to compare Types and Vars we must use the DeBruijn Env ...
-  compare a b
-    = case a of
-        D cma (VarF va)
-          -> case b of
-               D cmb (VarF vb) -> cmpDeBruijnVar (D cma va) (D cmb vb)
-               _ -> LT
-        D _ (LitF la)
-          -> case b of
-               D _ VarF{} -> GT
-               D _ (LitF lb) -> la `compare` lb
-               _ -> LT
-        D _ (AppF af aarg)
-          -> case dataToTag# b of
-               bt
-                 -> if tagToEnum# (bt ># 2#) then
-                        LT
-                    else
-                        case b of
-                          D _ (AppF bf barg)
-                            -> case compare af bf of
-                                 LT -> LT
-                                 EQ -> aarg `compare` barg -- e.g. here, if we had for children other expresssions debruijnized, they would have the *correct* environments, so we needn't worry.
-                                                           -- the issue to automatically deriving is only really the 'Var' and 'Type' parameters ...
-                                 GT -> GT
-                          _ -> GT
-        D _ (LamF _abind abody)
-          -> case dataToTag# b of
-               bt
-                 -> if tagToEnum# (bt ># 3#) then
-                        LT
-                    else
-                        case b of
-                          D _ (LamF _bbind bbody) -- we can ignore the binder since the represented DB expression has the correct DB environments by construction (see 'representDBCoreExpr')
-                            -> compare abody bbody
-                          _ -> GT
-        D cma (LetF as abody)
-          -> case dataToTag# b of
-               bt
-                 -> if tagToEnum# (bt ># 4#) then
-                        LT
-                    else
-                        case b of
-                          D cmb (LetF bs bbody)
-                            -> case compare (D cma as) (D cmb bs) of
-                                 LT -> LT
-                                 EQ -> compare abody bbody
-                                 GT -> GT
-                          _ -> GT
-        D cma (CaseF cax _cabind catype caalt)
-          -> case dataToTag# b of
-               bt
-                 -> if tagToEnum# (bt <# 5#) then
-                        GT
-                    else
-                        case b of
-                          D cmb (CaseF cbx _cbbind cbtype cbalt)
-                            -> case compare cax cbx of
-                                 LT -> LT
-                                 -- ROMES:TODO: Consider changing order of comparisons to a more efficient one
-                                 EQ -> case cmpDeBruijnType (D cma catype) (D cmb cbtype) of
-                                        LT -> LT
-                                        EQ -> D cma caalt `compare` D cmb cbalt
-                                        GT -> GT
-                                 GT -> GT
-                          _ -> LT
-        D cma (CastF cax caco)
-          -> case dataToTag# b of
-               bt
-                 -> if tagToEnum# (bt <# 6#) then
-                        GT
-                    else
-                        case b of
-                          D cmb (CastF cbx cbco)
-                            -> case compare cax cbx of
-                                 LT -> LT
-                                 EQ -> cmpDeBruijnCoercion (D cma caco) (D cmb cbco)
-                                 GT -> GT
-                          _ -> LT
-        D cma (TickF tatickish tax)
-          -> case dataToTag# b of
-               bt
-                 -> if tagToEnum# (bt <# 7#) then
-                        GT
-                    else
-                        case b of
-                          D cmb (TickF tbtickish tbx)
-                            -> case cmpDeBruijnTickish (D cma tatickish) (D cmb tbtickish) of
-                                 LT -> LT
-                                 EQ -> tax `compare` tbx
-                                 GT -> GT
-                          _ -> LT
-        D cma (TypeF at)
-          -> case b of
-               D _    CoercionF{} -> LT
-               D cmb (TypeF bt) -> cmpDeBruijnType (D cma at) (D cmb bt)
-               _ -> GT
-        D cma (CoercionF aco)
-          -> case b of
-               D cmb (CoercionF bco) -> cmpDeBruijnCoercion (D cma aco) (D cmb bco)
-               _ -> GT
-
-instance Eq a => Eq (DeBruijn (CoreBindF a)) where
-  D cma a == D cmb b = go a b where
-    go (NonRecF _v1 r1) (NonRecF _v2 r2)
-      = r1 == r2 -- See Note [Alpha-equality for let-bindings]
-
-    go (RecF ps1) (RecF ps2)
-      =
-      -- See Note [Alpha-equality for let-bindings]
-      all2 (\b1 b2 -> eqDeBruijnType (D cma (varType b1))
-                                     (D cmb (varType b2)))
-           bs1 bs2
-      && rs1 == rs2
-      where
-        (bs1,rs1) = unzip ps1
-        (bs2,rs2) = unzip ps2
-
-    go _ _ = False
-
-
-instance Ord a => Ord (DeBruijn (CoreBindF a)) where
-  compare a b
-    = case a of
-        D _cma (NonRecF _ab ax)
-          -> case b of
-               D _cmb (NonRecF _bb bx) -- Again, we ignore the binders bc on representation they were accounted for correctly.
-                 -> ax `compare` bx
-               _ -> LT
-        D _cma (RecF as)
-          -> case b of
-               D _cmb (RecF bs) -> compare (map snd as) (map snd bs)
-               _ -> GT
-
-
-instance Ord a => Ord (DeBruijn (CoreAltF a)) where
-  compare a b
-    = case a of
-        D _cma (AltF ac _abs arhs)
-          -> case b of
-               D _cmb (AltF bc _bbs brhs)
-                 -> case compare ac bc of
-                      LT -> LT
-                      EQ -> -- Again, we don't look at binders bc we assume on representation they were accounted for correctly.
-                        arhs `compare` brhs
-                      GT -> GT
+  = BindF
+
+--eqDeBruijnExprF :: forall a. Eq a => DeBruijn (CoreExprF a) -> DeBruijn (CoreExprF a) -> Bool
+--eqDeBruijnExprF (D env1 e1) (D env2 e2) = go e1 e2 where
+--    go :: CoreExprF a -> CoreExprF a -> Bool
+--    go (VarF v1) (VarF v2)             = eqDeBruijnVar (D env1 v1) (D env2 v2)
+--    go (LitF lit1)    (LitF lit2)      = lit1 == lit2
+--    go (TypeF t1)    (TypeF t2)        = eqDeBruijnType (D env1 t1) (D env2 t2)
+--    -- See Note [Alpha-equality for Coercion arguments]
+--    go (CoercionF {}) (CoercionF {}) = True
+--    go (CastF e1 co1) (CastF e2 co2) = D env1 co1 == D env2 co2 && e1 == e2
+--    go (AppF f1 a1)   (AppF f2 a2)   = f1 == f2 && a1 == a2
+--    go (TickF n1 e1) (TickF n2 e2)
+--      =  eqDeBruijnTickish (D env1 n1) (D env2 n2)
+--      && e1 == e2
+
+--    go (LamF b1 e1)  (LamF b2 e2)
+--      =  eqDeBruijnType (D env1 (varType b1)) (D env2 (varType b2))
+--      && D env1 (varMultMaybe b1) == D env2 (varMultMaybe b2)
+--      && e1 == e2
+
+--    go (LetF abs e1) (LetF bbs e2)
+--      = D env1 abs == D env2 bbs
+--      && e1 == e2
+
+--    go (CaseF e1 _b1 t1 a1) (CaseF e2 _b2 t2 a2)
+--      | null a1   -- See Note [Empty case alternatives]
+--      = null a2 && e1 == e2 && D env1 t1 == D env2 t2
+--      | otherwise
+--      = e1 == e2 && D env1 a1 == D env2 a2
+
+--    go _ _ = False
+
+---- ROMES:TODO: This one can be derived automatically, but perhaps it's better
+---- to be explicit here? We don't even really require the DeBruijn context here
+--eqDeBruijnAltF :: forall a. Eq a => DeBruijn (CoreAltF a) -> DeBruijn (CoreAltF a) -> Bool
+--eqDeBruijnAltF (D _env1 a1) (D _env2 a2) = go a1 a2 where
+--    go (AltF DEFAULT _ rhs1) (AltF DEFAULT _ rhs2)
+--        = rhs1 == rhs2
+--    go (AltF (LitAlt lit1) _ rhs1) (AltF (LitAlt lit2) _ rhs2)
+--        = lit1 == lit2 && rhs1 == rhs2
+--    go (AltF (DataAlt dc1) _bs1 rhs1) (AltF (DataAlt dc2) _bs2 rhs2)
+--        = dc1 == dc2 &&
+--          rhs1 == rhs2 -- the CM environments were extended on representation (see 'representDBAltExpr')
+--    go _ _ = False
+
+---- | 'unsafeCoerce' mostly because I'm too lazy to write the boilerplate.
+--fromCoreExpr :: CoreExpr -> Fix CoreExprF
+--fromCoreExpr = unsafeCoerce
+
+--toCoreExpr :: CoreExpr -> Fix CoreExprF
+--toCoreExpr = unsafeCoerce
+
+---- | Represents a DeBruijn CoreExpr being careful to correctly debruijnizie the expression as it is represented
+----
+---- Always represent Ids, at least for now. We're seemingly using inexistent ids
+---- ROMES:TODO: do this all inside EGraphM instead
+--representDBCoreExpr :: Analysis a (DeBruijnF CoreExprF)
+--                    => DeBruijn CoreExpr
+--                    -> EGraph a (DeBruijnF CoreExprF)
+--                    -> (ClassId, EGraph a (DeBruijnF CoreExprF))
+--representDBCoreExpr (D cmenv expr) eg0 = case expr of
+--  Var v   -> add (Node $ DF (D cmenv (VarF v)))   eg0
+--  Lit lit -> add (Node $ DF (D cmenv (LitF lit))) eg0
+--  Type t  -> add (Node $ DF (D cmenv (TypeF t)))  eg0
+--  Coercion c -> add (Node $ DF (D cmenv (CoercionF c))) eg0
+--  Cast e co  -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
+--                 in add (Node $ DF (D cmenv (CastF eid co))) eg1
+--  App f a -> let (fid,eg1) = representDBCoreExpr (D cmenv f) eg0
+--                 (aid,eg2) = representDBCoreExpr (D cmenv a) eg1
+--              in add (Node $ DF (D cmenv (AppF fid aid))) eg2
+--  Tick n e -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
+--               in add (Node $ DF (D cmenv (TickF n eid))) eg1
+--  Lam b e  -> let (eid,eg1) = representDBCoreExpr (D (extendCME cmenv b) e) eg0
+--               in add (Node $ DF (D cmenv (LamF b eid))) eg1
+--  Let (NonRec v r) e -> let (rid,eg1) = representDBCoreExpr (D cmenv r) eg0
+--                            (eid,eg2) = representDBCoreExpr (D (extendCME cmenv v) e) eg1
+--                         in add (Node $ DF (D cmenv (LetF (NonRecF v rid) eid))) eg2
+--  Let (Rec (unzip -> (bs,rs))) e ->
+--    let cmenv' = extendCMEs cmenv bs
+--        (bsids, eg1) = EGM.runEGraphM eg0 $
+--                         traverse (state . representDBCoreExpr . D cmenv') rs
+--        (eid, eg2)  = representDBCoreExpr (D cmenv' e) eg1
+--     in add (Node $ DF (D cmenv (LetF (RecF (zip bs bsids)) eid))) eg2
+--  Case e b t as -> let (eid, eg1)  = representDBCoreExpr (D cmenv e) eg0
+--                       (as', eg2) = EGM.runEGraphM eg1 $
+--                         traverse (state . representDBAltExpr . D (extendCME cmenv b)) as
+--                    in add (Node $ DF (D cmenv (CaseF eid b t as'))) eg2
+
+--representDBAltExpr :: Analysis a (DeBruijnF CoreExprF)
+--                   => DeBruijn CoreAlt
+--                   -> EGraph a (DeBruijnF CoreExprF)
+--                   -> (CoreAltF ClassId, EGraph a (DeBruijnF CoreExprF))
+--representDBAltExpr (D cm (Alt cons bs a)) eg0 =
+--  let (ai, eg1) = representDBCoreExpr (D (extendCMEs cm bs) a) eg0
+--   in (AltF cons bs ai, eg1)
+
+--instance Eq a => Eq (DeBruijn (CoreAltF a)) where
+--  (==) = eqDeBruijnAltF
+
+--instance Eq a => Eq (DeBruijn (CoreExprF a)) where
+--  (==) = eqDeBruijnExprF
+
+--instance Eq a => Eq (DeBruijnF CoreExprF a) where
+--  (==) (DF a) (DF b) = eqDeBruijnExprF a b
+
+--instance Eq a => Eq (DeBruijnF CoreAltF a) where
+--  (==) (DF a) (DF b) = eqDeBruijnAltF a b
+
+--deriving instance Ord a => Ord (DeBruijnF CoreExprF a)
+
+--instance Ord a => Ord (DeBruijn (CoreExprF a)) where
+--  -- We must assume that if `a` is DeBruijn expression, it is already correctly "extended" because 'representDBCoreExpr' ensures that.
+--  -- RM:TODO: We don't yet compare the CmEnv at any point. Should we?
+--  -- RM: I don't think so, the CmEnv is used to determine whether bound variables are equal, but they don't otherwise influence the result.
+--  -- Or rather, if the subexpression with variables is equal, then the CmEnv is necessarily equal too?
+--  -- So I think that just works...
+--  -- Wait, in that sense, couldn't we find a way to derive ord? the important part being that to compare Types and Vars we must use the DeBruijn Env ...
+--  compare a b
+--    = case a of
+--        D cma (VarF va)
+--          -> case b of
+--               D cmb (VarF vb) -> cmpDeBruijnVar (D cma va) (D cmb vb)
+--               _ -> LT
+--        D _ (LitF la)
+--          -> case b of
+--               D _ VarF{} -> GT
+--               D _ (LitF lb) -> la `compare` lb
+--               _ -> LT
+--        D _ (AppF af aarg)
+--          -> case dataToTag# b of
+--               bt
+--                 -> if tagToEnum# (bt ># 2#) then
+--                        LT
+--                    else
+--                        case b of
+--                          D _ (AppF bf barg)
+--                            -> case compare af bf of
+--                                 LT -> LT
+--                                 EQ -> aarg `compare` barg -- e.g. here, if we had for children other expresssions debruijnized, they would have the *correct* environments, so we needn't worry.
+--                                                           -- the issue to automatically deriving is only really the 'Var' and 'Type' parameters ...
+--                                 GT -> GT
+--                          _ -> GT
+--        D _ (LamF _abind abody)
+--          -> case dataToTag# b of
+--               bt
+--                 -> if tagToEnum# (bt ># 3#) then
+--                        LT
+--                    else
+--                        case b of
+--                          D _ (LamF _bbind bbody) -- we can ignore the binder since the represented DB expression has the correct DB environments by construction (see 'representDBCoreExpr')
+--                            -> compare abody bbody
+--                          _ -> GT
+--        D cma (LetF as abody)
+--          -> case dataToTag# b of
+--               bt
+--                 -> if tagToEnum# (bt ># 4#) then
+--                        LT
+--                    else
+--                        case b of
+--                          D cmb (LetF bs bbody)
+--                            -> case compare (D cma as) (D cmb bs) of
+--                                 LT -> LT
+--                                 EQ -> compare abody bbody
+--                                 GT -> GT
+--                          _ -> GT
+--        D cma (CaseF cax _cabind catype caalt)
+--          -> case dataToTag# b of
+--               bt
+--                 -> if tagToEnum# (bt <# 5#) then
+--                        GT
+--                    else
+--                        case b of
+--                          D cmb (CaseF cbx _cbbind cbtype cbalt)
+--                            -> case compare cax cbx of
+--                                 LT -> LT
+--                                 -- ROMES:TODO: Consider changing order of comparisons to a more efficient one
+--                                 EQ -> case cmpDeBruijnType (D cma catype) (D cmb cbtype) of
+--                                        LT -> LT
+--                                        EQ -> D cma caalt `compare` D cmb cbalt
+--                                        GT -> GT
+--                                 GT -> GT
+--                          _ -> LT
+--        D cma (CastF cax caco)
+--          -> case dataToTag# b of
+--               bt
+--                 -> if tagToEnum# (bt <# 6#) then
+--                        GT
+--                    else
+--                        case b of
+--                          D cmb (CastF cbx cbco)
+--                            -> case compare cax cbx of
+--                                 LT -> LT
+--                                 EQ -> cmpDeBruijnCoercion (D cma caco) (D cmb cbco)
+--                                 GT -> GT
+--                          _ -> LT
+--        D cma (TickF tatickish tax)
+--          -> case dataToTag# b of
+--               bt
+--                 -> if tagToEnum# (bt <# 7#) then
+--                        GT
+--                    else
+--                        case b of
+--                          D cmb (TickF tbtickish tbx)
+--                            -> case cmpDeBruijnTickish (D cma tatickish) (D cmb tbtickish) of
+--                                 LT -> LT
+--                                 EQ -> tax `compare` tbx
+--                                 GT -> GT
+--                          _ -> LT
+--        D cma (TypeF at)
+--          -> case b of
+--               D _    CoercionF{} -> LT
+--               D cmb (TypeF bt) -> cmpDeBruijnType (D cma at) (D cmb bt)
+--               _ -> GT
+--        D cma (CoercionF aco)
+--          -> case b of
+--               D cmb (CoercionF bco) -> cmpDeBruijnCoercion (D cma aco) (D cmb bco)
+--               _ -> GT
+
+--instance Eq a => Eq (DeBruijn (CoreBindF a)) where
+--  D cma a == D cmb b = go a b where
+--    go (NonRecF _v1 r1) (NonRecF _v2 r2)
+--      = r1 == r2 -- See Note [Alpha-equality for let-bindings]
+
+--    go (RecF ps1) (RecF ps2)
+--      =
+--      -- See Note [Alpha-equality for let-bindings]
+--      all2 (\b1 b2 -> eqDeBruijnType (D cma (varType b1))
+--                                     (D cmb (varType b2)))
+--           bs1 bs2
+--      && rs1 == rs2
+--      where
+--        (bs1,rs1) = unzip ps1
+--        (bs2,rs2) = unzip ps2
+
+--    go _ _ = False
+
+
+--instance Ord a => Ord (DeBruijn (CoreBindF a)) where
+--  compare a b
+--    = case a of
+--        D _cma (NonRecF _ab ax)
+--          -> case b of
+--               D _cmb (NonRecF _bb bx) -- Again, we ignore the binders bc on representation they were accounted for correctly.
+--                 -> ax `compare` bx
+--               _ -> LT
+--        D _cma (RecF as)
+--          -> case b of
+--               D _cmb (RecF bs) -> compare (map snd as) (map snd bs)
+--               _ -> GT
+
+
+--instance Ord a => Ord (DeBruijn (CoreAltF a)) where
+--  compare a b
+--    = case a of
+--        D _cma (AltF ac _abs arhs)
+--          -> case b of
+--               D _cmb (AltF bc _bbs brhs)
+--                 -> case compare ac bc of
+--                      LT -> LT
+--                      EQ -> -- Again, we don't look at binders bc we assume on representation they were accounted for correctly.
+--                        arhs `compare` brhs
+--                      GT -> GT
 
 cmpDeBruijnTickish :: DeBruijn CoreTickish -> DeBruijn CoreTickish -> Ordering
 cmpDeBruijnTickish (D env1 t1) (D env2 t2) = go t1 t2 where
@@ -359,26 +440,21 @@ cmpDeBruijnCoercion :: DeBruijn Coercion -> DeBruijn Coercion -> Ordering
 cmpDeBruijnCoercion (D env1 co1) (D env2 co2)
   = cmpDeBruijnType (D env1 (coercionType co1)) (D env2 (coercionType co2))
 
--- instances for debugging purposes
-instance Show a => Show (DeBruijnF CoreExprF a) where
-  show (DF (D _ (VarF id) )) = showPprUnsafe $ text "VarF"  <+> ppr id
-  show (DF (D _ (LitF lit))) = showPprUnsafe $ text "LitF" <+> ppr lit
-  show (DF (D _ (AppF a b))) = "AppF " ++ show a ++ " " ++ show b
-  show (DF (D _ (LamF b a))) = showPprUnsafe (text "LamF" <+> ppr b <+> text "") ++ show a
-  show (DF (D _ (LetF b a))) = "LetF " ++ show b ++ " " ++ show a
-  show (DF (D _ (CaseF a b t alts))) = "CaseF " ++ show a ++ showPprUnsafe (ppr b <+> ppr t) ++ show alts
-
-  show (DF (D _ (CastF _a _cor)   )) = "CastF"
-  show (DF (D _ (TickF _cotick _a))) = "Tick"
-  show (DF (D _ (TypeF t)       )) = "TypeF " ++ showPprUnsafe (ppr t)
-  show (DF (D _ (CoercionF co)  )) = "CoercionF " ++ showPprUnsafe co
-
-
-instance Show a => Show (BindF CoreBndr a) where
-  show (NonRecF b a) = "NonRecF " ++ showPprUnsafe b ++ show a
-  show (RecF bs) = "RecF " ++ unwords (map (\(a,b) -> showPprUnsafe a ++ show b) bs)
-
-instance Show a => Show (AltF CoreBndr a) where
+-- -- instances for debugging purposes
+instance Show a => Show (CoreExprF a) where
+  show (VarF id) = showPprUnsafe $ text "VarF"  <+> ppr id
+  show (FreeVarF id) = showPprUnsafe $ ppr id
+  show (LitF lit) = showPprUnsafe $ text "LitF" <+> ppr lit
+  show (AppF a b) = "AppF " ++ show a ++ " " ++ show b
+  show (LamF a) = "LamF " ++ show a
+  show (LetF b a) = "LetF " ++ show b ++ " " ++ show a
+  show (CaseF a alts) = "CaseF " ++ show a ++ show alts
+
+  -- show (CastF _a _cor) = "CastF"
+  -- show (TickF _cotick _a) = "Tick"
+  show (TypeF (DBT (D _ t))) = "TypeF " ++ showPprUnsafe (ppr t)
+  show (CoercionF (DBC (D _ co))) = "CoercionF " ++ showPprUnsafe co
+
+instance Show a => Show (AltF a) where
   show (AltF alt bs a) = "AltF " ++ showPprUnsafe (ppr alt <+> ppr bs) ++ show a
 
-


=====================================
compiler/GHC/HsToCore/Pmc/Solver.hs
=====================================
@@ -64,7 +64,6 @@ import GHC.Types.Unique.Supply
 import GHC.Core
 import GHC.Core.FVs         (exprFreeVars)
 import GHC.Core.TyCo.Compare( eqType )
-import GHC.Core.Map.Type
 import GHC.Core.Equality
 import GHC.Core.Predicate (typeDeterminesValue)
 import GHC.Core.SimpleOpt (simpleOptExpr, exprIsConApp_maybe)
@@ -1022,7 +1021,7 @@ modifyT f = StateT $ fmap ((,) ()) . f
 -- there weren't any such constraints.
 representCoreExpr :: Nabla -> CoreExpr -> (ClassId, Nabla)
 representCoreExpr nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = egraph } } e =
-  second (\g -> nabla{nabla_tm_st = ts{ts_facts = g}}) $ representDBCoreExpr (deBruijnize (makeDictsCoherent e)) egraph
+  second (\g -> nabla{nabla_tm_st = ts{ts_facts = EG.rebuild g}}) $ representCoreExprEgr (makeDictsCoherent e) egraph
   -- Use a key in which dictionaries for the same type become equal.
   -- See Note [Unique dictionaries in the TmOracle CoreMap]
 


=====================================
compiler/GHC/HsToCore/Pmc/Solver/Types.hs
=====================================
@@ -67,7 +67,6 @@ import GHC.Core.TyCon
 import GHC.Types.Literal
 import GHC.Core
 import GHC.Core.TyCo.Compare( eqType )
-import GHC.Core.Map.Type
 import GHC.Core.Utils (exprType)
 import GHC.Builtin.Names
 import GHC.Builtin.Types
@@ -173,7 +172,7 @@ data TmState
 lookupMatchIdMap :: Id -> Nabla -> Maybe ClassId
 lookupMatchIdMap id (MkNabla _ TmSt{ts_reps}) = lookupVarEnv ts_reps id
 
--- | Information about an 'Id'. Stores positive ('vi_pos') facts, like @x ~ Just 42@,
+-- | Information about a match id. Stores positive ('vi_pos') facts, like @x ~ Just 42@,
 -- and negative ('vi_neg') facts, like "x is not (:)".
 -- Also caches the type ('vi_ty'), the 'ResidualCompleteMatches' of a COMPLETE set
 -- ('vi_rcm').
@@ -187,6 +186,9 @@ data VarInfo
   -- ROMES:TODO: What is the Id in question when we might have multiple Ids in the same equivalence class?
   -- It seems currenlty this is the representative of the e-class, so we could probably drop it, in favour of Type or so (since sometimes we need to know the type, and that's also reasonable data for the e-class to have)
 
+  -- , vi_ty  :: !Type
+  -- ^ The type of the match-id
+
   , vi_pos :: ![PmAltConApp]
   -- ^ Positive info: 'PmAltCon' apps it is (i.e. @x ~ [Just y, PatSyn z]@), all
   -- at the same time (i.e. conjunctive).  We need a list because of nested
@@ -333,6 +335,7 @@ emptyVarInfo x
 -- romes:TODO I don't think this is what we want any longer, more like represent Id and see if it was previously represented by some data or not?
 lookupVarInfo :: TmState -> ClassId -> VarInfo
 lookupVarInfo (TmSt eg _ _) x
+-- We used to only create an emptyVarInfo when we looked up that id. Now we do it always, even if we never query the Id back T_T
 -- RM: Yea, I don't like the fact that currently all e-classes are created by Ids and have an Empty Var info, yet we must call "fromMaybe" here. Not good.
   = eg ^._class x._data
 
@@ -841,7 +844,7 @@ instance Outputable PmEquality where
 -- * E-graphs to represent normalised refinment types
 --
 
-type TmEGraph = EGraph VarInfo (DeBruijnF CoreExprF)
+type TmEGraph = EGraph VarInfo CoreExprF
 -- TODO delete orphans for showing TmEGraph for debugging reasons
 instance Show VarInfo where
   show = showPprUnsafe . ppr
@@ -858,13 +861,14 @@ representIdsNablas xs = execState (mapM (\x -> state (((),) . representIdNablas
 representId :: Id -> Nabla -> (ClassId, Nabla)
 representId x (MkNabla tyst tmst at TmSt{ts_facts=eg0, ts_reps=idmp})
     = case lookupVarEnv idmp x of
-        -- The reason why we can't use an empty new class is that we don't account for the IdMap in the 'representDBCoreExpr'
+        -- The reason why we can't use an empty new class is that we don't account for the IdMap in the 'representCoreExprEgr'
         -- In particular, if we represent "reverse @a xs" in the e-graph, the
         -- node in which "xs" will be represented won't match the e-class id
         -- representing "xs", because that class doesn't contain "VarF xs"
+        -- (but at least we can still mkMatchIds without storing the VarF for that one)
         -- Nothing -> case EG.newEClass (emptyVarInfo x) eg0 of
-        Nothing -> case EG.add (EG.Node (DF (deBruijnize (VarF x)))) eg0 of
-          (xid, eg1) -> (xid, MkNabla tyst tmst{ts_facts=eg1, ts_reps=extendVarEnv idmp x xid})
+        Nothing -> case EG.add (EG.Node (FreeVarF x)) eg0 of
+          (xid, eg1) -> (xid, MkNabla tyst tmst{ts_facts=EG.rebuild eg1, ts_reps=extendVarEnv idmp x xid})
         Just xid -> (xid, MkNabla tyst tmst)
 
 representIds :: [Id] -> Nabla -> ([ClassId], Nabla)
@@ -874,7 +878,7 @@ representIds xs = runState (mapM (state . representId) xs)
 -- There ought to be a better way.
 instance Eq VarInfo where
   (==) a b = vi_id a == vi_id b
-instance Analysis VarInfo (DeBruijnF CoreExprF) where
+instance Analysis VarInfo CoreExprF where
   {-# INLINE makeA #-}
   {-# INLINE joinA #-}
 
@@ -886,11 +890,10 @@ instance Analysis VarInfo (DeBruijnF CoreExprF) where
   -- Also, the Eq instance for DeBruijn Vars will ensure that two free
   -- variables with the same Id are equal and so they will be represented in
   -- the same e-class
-  makeA (DF (D _ (VarF x))) = emptyVarInfo x
-  makeA _ = emptyVarInfo unitDataConId -- ROMES:TODO: this is dummy information which should never be used, this is quite wrong :)
-                                       -- I think the reason we end up in this
-                                       -- situation is bc we first represent an expression and only then merge it with some Id.
-                                       -- we'd need a way to represent directly into an e-class then, to not trigger the new e-class.
+  makeA (FreeVarF x) = emptyVarInfo x
+  makeA _ = emptyVarInfo unitDataConId
+  -- makeA _ = Nothing
+    -- Always start with Nothing, and add things as we go?
 
   -- romes: so currently, variables are joined in 'addVarCt' manually by getting the old value of $x$ and assuming the value of $y$ was chosen.
   -- That's obviously bad now, it'd be much more clearer to do it here. It's just the nabla threading that's more trouble
@@ -904,7 +907,12 @@ instance Analysis VarInfo (DeBruijnF CoreExprF) where
 -- since it can fail when it is conflicting
 -- and that would allow us to do the merge procedure correcly here instead of in addVarCt
 -- we may be able to have Analysis (Effect VarInfo) (...)
-  joinA a b = b{ vi_id = if vi_id b == unitDataConId && vi_id a /= unitDataConId then vi_id a else vi_id b
+  -- joinA Nothing Nothing  = Nothing
+  -- joinA Nothing (Just b) = Just b
+  -- joinA (Just a) Nothing = Just a
+  -- joinA (Just a) (Just b)
+  joinA a b
+       = b{ vi_id = if vi_id b == unitDataConId && vi_id a /= unitDataConId then vi_id a else vi_id b
                , vi_pos = case (vi_pos a, vi_pos b) of
                             ([], []) -> []
                             ([], x) -> x



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8adaad4838a9f3ef3237d7132293c81759a160f5

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8adaad4838a9f3ef3237d7132293c81759a160f5
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230717/657fd371/attachment-0001.html>