[Git][ghc/ghc][wip/romes/eqsat-pmc] 6 commits: Create Core.Equality module

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Mon Jun 26 00:45:48 UTC 2023



Rodrigo Mesquita pushed to branch wip/romes/eqsat-pmc at Glasgow Haskell Compiler / GHC


Commits:
ef3491ab by Rodrigo Mesquita at 2023-06-25T21:38:30+01:00
Create Core.Equality module

This module defines CoreExprF -- the base functor of CoreExpr, and
equality and ordering operations on the debruijnized CoreExprF.

Furthermore, it provides a function to represent a CoreExpr in an
e-graph.

This is a requirement to represent, reason about equality, and
manipulate CoreExprs in e-graphs. E-graphs are going to be used in the
pattern match checker (#19272), and potentially for type family
rewriting (#TODO) -- amongst other oportunities that are unlocked by
having them available.

- - - - -
c10de2cb by Rodrigo Mesquita at 2023-06-25T21:54:59+01:00
Question

- - - - -
4196be31 by Rodrigo Mesquita at 2023-06-26T00:45:49+01:00
Was going great until I started needing to thread ClassIds together with Ids. Ret-think this.

- - - - -
0fb5b1d8 by Rodrigo Mesquita at 2023-06-26T01:18:46+01:00
A solution with more lookups

- - - - -
d7bb5a38 by Rodrigo Mesquita at 2023-06-26T01:33:26+01:00
Fixes to Pmc.Ppr module

- - - - -
fedc7282 by Rodrigo Mesquita at 2023-06-26T01:45:26+01:00
Wow, a lot (stage1) is working actually, without PMC errprs

- - - - -


11 changed files:

- + compiler/GHC/Core/Equality.hs
- compiler/GHC/Core/Map/Expr.hs
- compiler/GHC/Core/Map/Type.hs
- compiler/GHC/HsToCore/Pmc.hs
- compiler/GHC/HsToCore/Pmc/Check.hs
- compiler/GHC/HsToCore/Pmc/Desugar.hs
- compiler/GHC/HsToCore/Pmc/Ppr.hs
- compiler/GHC/HsToCore/Pmc/Solver.hs
- compiler/GHC/HsToCore/Pmc/Solver/Types.hs
- compiler/GHC/Types/Unique/SDFM.hs
- compiler/ghc.cabal.in


Changes:

=====================================
compiler/GHC/Core/Equality.hs
=====================================
@@ -0,0 +1,351 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+
+module GHC.Core.Equality where
+
+import GHC.Exts (dataToTag#, tagToEnum#, (>#), (<#))
+import GHC.Prelude
+
+import GHC.Core
+import GHC.Core.TyCo.Rep
+import GHC.Core.Map.Type
+import GHC.Core.Map.Expr
+import GHC.Types.Var
+import GHC.Types.Literal
+import GHC.Types.Tickish
+import Unsafe.Coerce (unsafeCoerce)
+
+import Control.Monad.Trans.State.Strict (state)
+import Data.Equality.Graph as EG
+import Data.Equality.Analysis
+import qualified Data.Equality.Graph.Monad as EGM
+import Data.Equality.Utils (Fix(..))
+
+import GHC.Utils.Misc (all2)
+
+-- Important to note the binders are also represented by $a$
+-- This is because in the e-graph we will represent binders with the
+-- equivalence class id of things equivalent to it.
+--
+-- Unfortunately type binders are still not correctly accounted for.
+-- Perhaps it'd really be better to make DeBruijn work over these types
+
+data AltF b a
+    = AltF AltCon [b] a
+    deriving (Functor, Foldable, Traversable)
+
+data BindF b a
+  = NonRecF b a
+  | RecF [(b, a)]
+  deriving (Functor, Foldable, Traversable)
+
+data ExprF b a
+  = VarF Id
+  | LitF Literal
+  | AppF a a
+  | LamF b a
+  | LetF (BindF b a) a
+  | CaseF a b Type [AltF b a]
+
+  | CastF a CoercionR
+  | TickF CoreTickish a
+  | TypeF Type
+  | CoercionF Coercion
+  deriving (Functor, Foldable, Traversable)
+
+type CoreExprF
+  = ExprF CoreBndr
+type CoreAltF
+  = AltF CoreBndr
+type CoreBindF
+  = BindF CoreBndr
+
+newtype DeBruijnF f a = DF (DeBruijn (f a))
+  deriving (Functor, Foldable, Traversable)
+
+eqDeBruijnExprF :: forall a. Eq a => DeBruijn (CoreExprF a) -> DeBruijn (CoreExprF a) -> Bool
+eqDeBruijnExprF (D env1 e1) (D env2 e2) = go e1 e2 where
+    go :: CoreExprF a -> CoreExprF a -> Bool
+    go (VarF v1) (VarF v2)             = eqDeBruijnVar (D env1 v1) (D env2 v2)
+    go (LitF lit1)    (LitF lit2)      = lit1 == lit2
+    go (TypeF t1)    (TypeF t2)        = eqDeBruijnType (D env1 t1) (D env2 t2)
+    -- See Note [Alpha-equality for Coercion arguments]
+    go (CoercionF {}) (CoercionF {}) = True
+    go (CastF e1 co1) (CastF e2 co2) = D env1 co1 == D env2 co2 && e1 == e2
+    go (AppF f1 a1)   (AppF f2 a2)   = f1 == f2 && a1 == a2
+    go (TickF n1 e1) (TickF n2 e2)
+      =  eqDeBruijnTickish (D env1 n1) (D env2 n2)
+      && e1 == e2
+
+    go (LamF b1 e1)  (LamF b2 e2)
+      =  eqDeBruijnType (D env1 (varType b1)) (D env2 (varType b2))
+      && D env1 (varMultMaybe b1) == D env2 (varMultMaybe b2)
+      && e1 == e2
+
+    go (LetF abs e1) (LetF bbs e2)
+      = D env1 abs == D env2 bbs
+      && e1 == e2
+
+    go (CaseF e1 _b1 t1 a1) (CaseF e2 _b2 t2 a2)
+      | null a1   -- See Note [Empty case alternatives]
+      = null a2 && e1 == e2 && D env1 t1 == D env2 t2
+      | otherwise
+      = e1 == e2 && D env1 a1 == D env2 a2
+
+    go _ _ = False
+
+-- ROMES:TODO: This one can be derived automatically, but perhaps it's better
+-- to be explicit here? We don't even really require the DeBruijn context here
+eqDeBruijnAltF :: forall a. Eq a => DeBruijn (CoreAltF a) -> DeBruijn (CoreAltF a) -> Bool
+eqDeBruijnAltF (D _env1 a1) (D _env2 a2) = go a1 a2 where
+    go (AltF DEFAULT _ rhs1) (AltF DEFAULT _ rhs2)
+        = rhs1 == rhs2
+    go (AltF (LitAlt lit1) _ rhs1) (AltF (LitAlt lit2) _ rhs2)
+        = lit1 == lit2 && rhs1 == rhs2
+    go (AltF (DataAlt dc1) _bs1 rhs1) (AltF (DataAlt dc2) _bs2 rhs2)
+        = dc1 == dc2 &&
+          rhs1 == rhs2 -- the CM environments were extended on representation (see 'representDBAltExpr')
+    go _ _ = False
+
+-- | 'unsafeCoerce' mostly because I'm too lazy to write the boilerplate.
+fromCoreExpr :: CoreExpr -> Fix CoreExprF
+fromCoreExpr = unsafeCoerce
+
+toCoreExpr :: CoreExpr -> Fix CoreExprF
+toCoreExpr = unsafeCoerce
+
+-- | Represents a DeBruijn CoreExpr being careful to correctly debruijnizie the expression as it is represented
+--
+-- Always represent Ids, at least for now. We're seemingly using inexistent ids
+-- ROMES:TODO: do this all inside EGraphM instead
+representDBCoreExpr :: Analysis a (DeBruijnF CoreExprF)
+                    => DeBruijn CoreExpr
+                    -> EGraph a (DeBruijnF CoreExprF)
+                    -> (ClassId, EGraph a (DeBruijnF CoreExprF))
+representDBCoreExpr (D cmenv expr) eg0 = case expr of
+  Var v   -> add (Node $ DF (D cmenv (VarF v)))   eg0
+  Lit lit -> add (Node $ DF (D cmenv (LitF lit))) eg0
+  Type t  -> add (Node $ DF (D cmenv (TypeF t)))  eg0
+  Coercion c -> add (Node $ DF (D cmenv (CoercionF c))) eg0
+  Cast e co  -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
+                 in add (Node $ DF (D cmenv (CastF eid co))) eg1
+  App f a -> let (fid,eg1) = representDBCoreExpr (D cmenv f) eg0
+                 (aid,eg2) = representDBCoreExpr (D cmenv a) eg1
+              in add (Node $ DF (D cmenv (AppF fid aid))) eg2
+  Tick n e -> let (eid,eg1) = representDBCoreExpr (D cmenv e) eg0
+               in add (Node $ DF (D cmenv (TickF n eid))) eg1
+  Lam b e  -> let (eid,eg1) = representDBCoreExpr (D (extendCME cmenv b) e) eg0
+               in add (Node $ DF (D cmenv (LamF b eid))) eg1
+  Let (NonRec v r) e -> let (rid,eg1) = representDBCoreExpr (D cmenv r) eg0
+                            (eid,eg2) = representDBCoreExpr (D (extendCME cmenv v) e) eg1
+                         in add (Node $ DF (D cmenv (LetF (NonRecF v rid) eid))) eg2
+  Let (Rec (unzip -> (bs,rs))) e ->
+    let cmenv' = extendCMEs cmenv bs
+        (bsids, eg1) = EGM.runEGraphM eg0 $
+                         traverse (state . representDBCoreExpr . D cmenv') rs
+        (eid, eg2)  = representDBCoreExpr (D cmenv' e) eg1
+     in add (Node $ DF (D cmenv (LetF (RecF (zip bs bsids)) eid))) eg2
+  Case e b t as -> let (eid, eg1)  = representDBCoreExpr (D cmenv e) eg0
+                       (as', eg2) = EGM.runEGraphM eg1 $
+                         traverse (state . representDBAltExpr . D (extendCME cmenv b)) as
+                    in add (Node $ DF (D cmenv (CaseF eid b t as'))) eg2
+
+representDBAltExpr :: Analysis a (DeBruijnF CoreExprF)
+                   => DeBruijn CoreAlt
+                   -> EGraph a (DeBruijnF CoreExprF)
+                   -> (CoreAltF ClassId, EGraph a (DeBruijnF CoreExprF))
+representDBAltExpr (D cm (Alt cons bs a)) eg0 =
+  let (ai, eg1) = representDBCoreExpr (D (extendCMEs cm bs) a) eg0
+   in (AltF cons bs ai, eg1)
+
+instance Eq a => Eq (DeBruijn (CoreAltF a)) where
+  (==) = eqDeBruijnAltF
+
+instance Eq a => Eq (DeBruijn (CoreExprF a)) where
+  (==) = eqDeBruijnExprF
+
+instance Eq a => Eq (DeBruijnF CoreExprF a) where
+  (==) (DF a) (DF b) = eqDeBruijnExprF a b
+
+instance Eq a => Eq (DeBruijnF CoreAltF a) where
+  (==) (DF a) (DF b) = eqDeBruijnAltF a b
+
+deriving instance Ord a => Ord (DeBruijnF CoreExprF a)
+
+instance Ord a => Ord (DeBruijn (CoreExprF a)) where
+  -- We must assume that if `a` is DeBruijn expression, it is already correctly "extended" because 'representDBCoreExpr' ensures that.
+  -- RM:TODO: We don't yet compare the CmEnv at any point. Should we?
+  -- RM: I don't think so, the CmEnv is used to determine whether bound variables are equal, but they don't otherwise influence the result.
+  -- Or rather, if the subexpression with variables is equal, then the CmEnv is necessarily equal too?
+  -- So I think that just works...
+  -- Wait, in that sense, couldn't we find a way to derive ord? the important part being that to compare Types and Vars we must use the DeBruijn Env ...
+  compare a b
+    = case a of
+        D cma (VarF va)
+          -> case b of
+               D cmb (VarF vb) -> cmpDeBruijnVar (D cma va) (D cmb vb)
+               _ -> LT
+        D _ (LitF la)
+          -> case b of
+               D _ VarF{} -> GT
+               D _ (LitF lb) -> la `compare` lb
+               _ -> LT
+        D _ (AppF af aarg)
+          -> case dataToTag# b of
+               bt
+                 -> if tagToEnum# (bt ># 2#) then
+                        LT
+                    else
+                        case b of
+                          D _ (AppF bf barg)
+                            -> case compare af bf of
+                                 LT -> LT
+                                 EQ -> aarg `compare` barg -- e.g. here, if we had for children other expresssions debruijnized, they would have the *correct* environments, so we needn't worry.
+                                                           -- the issue to automatically deriving is only really the 'Var' and 'Type' parameters ...
+                                 GT -> GT
+                          _ -> GT
+        D _ (LamF _abind abody)
+          -> case dataToTag# b of
+               bt
+                 -> if tagToEnum# (bt ># 3#) then
+                        LT
+                    else
+                        case b of
+                          D _ (LamF _bbind bbody) -- we can ignore the binder since the represented DB expression has the correct DB environments by construction (see 'representDBCoreExpr')
+                            -> compare abody bbody
+                          _ -> GT
+        D cma (LetF as abody)
+          -> case dataToTag# b of
+               bt
+                 -> if tagToEnum# (bt ># 4#) then
+                        LT
+                    else
+                        case b of
+                          D cmb (LetF bs bbody)
+                            -> case compare (D cma as) (D cmb bs) of
+                                 LT -> LT
+                                 EQ -> compare abody bbody
+                                 GT -> GT
+                          _ -> GT
+        D cma (CaseF cax _cabind catype caalt)
+          -> case dataToTag# b of
+               bt
+                 -> if tagToEnum# (bt <# 5#) then
+                        GT
+                    else
+                        case b of
+                          D cmb (CaseF cbx _cbbind cbtype cbalt)
+                            -> case compare cax cbx of
+                                 LT -> LT
+                                 -- ROMES:TODO: Consider changing order of comparisons to a more efficient one
+                                 EQ -> case cmpDeBruijnType (D cma catype) (D cmb cbtype) of
+                                        LT -> LT
+                                        EQ -> D cma caalt `compare` D cmb cbalt
+                                        GT -> GT
+                                 GT -> GT
+                          _ -> LT
+        D cma (CastF cax caco)
+          -> case dataToTag# b of
+               bt
+                 -> if tagToEnum# (bt <# 6#) then
+                        GT
+                    else
+                        case b of
+                          D cmb (CastF cbx cbco)
+                            -> case compare cax cbx of
+                                 LT -> LT
+                                 EQ -> cmpDeBruijnCoercion (D cma caco) (D cmb cbco)
+                                 GT -> GT
+                          _ -> LT
+        D cma (TickF tatickish tax)
+          -> case dataToTag# b of
+               bt
+                 -> if tagToEnum# (bt <# 7#) then
+                        GT
+                    else
+                        case b of
+                          D cmb (TickF tbtickish tbx)
+                            -> case cmpDeBruijnTickish (D cma tatickish) (D cmb tbtickish) of
+                                 LT -> LT
+                                 EQ -> tax `compare` tbx
+                                 GT -> GT
+                          _ -> LT
+        D cma (TypeF at)
+          -> case b of
+               D _    CoercionF{} -> LT
+               D cmb (TypeF bt) -> cmpDeBruijnType (D cma at) (D cmb bt)
+               _ -> GT
+        D cma (CoercionF aco)
+          -> case b of
+               D cmb (CoercionF bco) -> cmpDeBruijnCoercion (D cma aco) (D cmb bco)
+               _ -> GT
+
+instance Eq a => Eq (DeBruijn (CoreBindF a)) where
+  D cma a == D cmb b = go a b where
+    go (NonRecF _v1 r1) (NonRecF _v2 r2)
+      = r1 == r2 -- See Note [Alpha-equality for let-bindings]
+
+    go (RecF ps1) (RecF ps2)
+      =
+      -- See Note [Alpha-equality for let-bindings]
+      all2 (\b1 b2 -> eqDeBruijnType (D cma (varType b1))
+                                     (D cmb (varType b2)))
+           bs1 bs2
+      && rs1 == rs2
+      where
+        (bs1,rs1) = unzip ps1
+        (bs2,rs2) = unzip ps2
+
+    go _ _ = False
+
+
+instance Ord a => Ord (DeBruijn (CoreBindF a)) where
+  compare a b
+    = case a of
+        D _cma (NonRecF _ab ax)
+          -> case b of
+               D _cmb (NonRecF _bb bx) -- Again, we ignore the binders bc on representation they were accounted for correctly.
+                 -> ax `compare` bx
+               _ -> LT
+        D _cma (RecF as)
+          -> case b of
+               D _cmb (RecF bs) -> compare (map snd as) (map snd bs)
+               _ -> GT
+
+
+instance Ord a => Ord (DeBruijn (CoreAltF a)) where
+  compare a b
+    = case a of
+        D _cma (AltF ac _abs arhs)
+          -> case b of
+               D _cmb (AltF bc _bbs brhs)
+                 -> case compare ac bc of
+                      LT -> LT
+                      EQ -> -- Again, we don't look at binders bc we assume on representation they were accounted for correctly.
+                        arhs `compare` brhs
+                      GT -> GT
+
+cmpDeBruijnTickish :: DeBruijn CoreTickish -> DeBruijn CoreTickish -> Ordering
+cmpDeBruijnTickish (D env1 t1) (D env2 t2) = go t1 t2 where
+    go (Breakpoint lext lid lids) (Breakpoint rext rid rids)
+        = case compare lid rid of
+            LT -> LT
+            EQ -> case compare (D env1 lids) (D env2 rids) of
+                    LT -> LT
+                    EQ -> compare lext rext
+                    GT -> GT
+            GT -> GT
+    go l r = compare l r
+
+-- ROMES:TODO: DEBRUIJN ORDERING ON TYPES!!!
+cmpDeBruijnType :: DeBruijn Type -> DeBruijn Type -> Ordering
+cmpDeBruijnType _ _ = EQ
+
+-- ROMES:TODO: DEBRUIJN ORDERING ON COERCIONS!!!
+cmpDeBruijnCoercion :: DeBruijn Coercion -> DeBruijn Coercion -> Ordering
+cmpDeBruijnCoercion _ _ = EQ
+


=====================================
compiler/GHC/Core/Map/Expr.hs
=====================================
@@ -18,6 +18,8 @@ module GHC.Core.Map.Expr (
    CoreMap, emptyCoreMap, extendCoreMap, lookupCoreMap, foldCoreMap,
    -- * Alpha equality
    eqDeBruijnExpr, eqCoreExpr,
+   -- ** Exports for CoreExprF instances
+   eqDeBruijnTickish, eqDeBruijnVar,
    -- * 'TrieMap' class reexports
    TrieMap(..), insertTM, deleteTM,
    lkDFreeVar, xtDFreeVar,


=====================================
compiler/GHC/Core/Map/Type.hs
=====================================
@@ -6,6 +6,7 @@
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE DeriveTraversable #-}
 
 module GHC.Core.Map.Type (
      -- * Re-export generic interface
@@ -16,12 +17,13 @@ module GHC.Core.Map.Type (
    LooseTypeMap,
    -- ** With explicit scoping
    CmEnv, lookupCME, extendTypeMapWithScope, lookupTypeMapWithScope,
-   mkDeBruijnContext, extendCME, extendCMEs, emptyCME,
+   mkDeBruijnContext, extendCME, extendCMEs, emptyCME, sizeCME,
 
    -- * Utilities for use by friends only
    TypeMapG, CoercionMapG,
 
    DeBruijn(..), deBruijnize, eqDeBruijnType, eqDeBruijnVar,
+   cmpDeBruijnVar,
 
    BndrMap, xtBndr, lkBndr,
    VarMap, xtVar, lkVar, lkDFreeVar, xtDFreeVar,
@@ -282,6 +284,9 @@ eqDeBruijnType env_t1@(D env1 t1) env_t2@(D env2 t2) =
 instance Eq (DeBruijn Var) where
   (==) = eqDeBruijnVar
 
+instance Ord (DeBruijn Var) where
+  compare = cmpDeBruijnVar
+
 eqDeBruijnVar :: DeBruijn Var -> DeBruijn Var -> Bool
 eqDeBruijnVar (D env1 v1) (D env2 v2) =
     case (lookupCME env1 v1, lookupCME env2 v2) of
@@ -289,6 +294,13 @@ eqDeBruijnVar (D env1 v1) (D env2 v2) =
         (Nothing, Nothing) -> v1 == v2
         _ -> False
 
+cmpDeBruijnVar :: DeBruijn Var -> DeBruijn Var -> Ordering
+cmpDeBruijnVar (D env1 v1) (D env2 v2) =
+    case (lookupCME env1 v1, lookupCME env2 v2) of
+        (Just b1, Just b2) -> compare b1 b2
+        (Nothing, Nothing) -> compare v1 v2
+        (z,w) -> compare z w -- Compare Maybes on whether they're Just or Nothing
+
 instance {-# OVERLAPPING #-}
          Outputable a => Outputable (TypeMapG a) where
   ppr m = text "TypeMap elts" <+> ppr (foldTM (:) m [])
@@ -505,6 +517,10 @@ extendCMEs env vs = foldl' extendCME env vs
 lookupCME :: CmEnv -> Var -> Maybe BoundVar
 lookupCME (CME { cme_env = env }) v = lookupVarEnv env v
 
+-- | \(O(1)\). Number of elements in the CmEnv.
+sizeCME :: CmEnv -> Int
+sizeCME CME{cme_next=next} = next
+
 -- | @DeBruijn a@ represents @a@ modulo alpha-renaming.  This is achieved
 -- by equipping the value with a 'CmEnv', which tracks an on-the-fly deBruijn
 -- numbering.  This allows us to define an 'Eq' instance for @DeBruijn a@, even
@@ -512,6 +528,7 @@ lookupCME (CME { cme_env = env }) v = lookupVarEnv env v
 -- export the constructor.  Make a helper function if you find yourself
 -- needing it.
 data DeBruijn a = D CmEnv a
+  deriving (Functor, Foldable, Traversable) -- romes:TODO: for internal use only!
 
 -- | Synthesizes a @DeBruijn a@ from an @a@, by assuming that there are no
 -- bound binders (an empty 'CmEnv').  This is usually what you want if there
@@ -525,6 +542,15 @@ instance Eq (DeBruijn a) => Eq (DeBruijn [a]) where
                                       D env xs == D env' xs'
     _            == _               = False
 
+instance Ord (DeBruijn a) => Ord (DeBruijn [a]) where
+    D _   []     `compare` D _    []       = EQ
+    D env (x:xs) `compare` D env' (x':xs') = case D env x `compare` D env' x' of
+                                               LT -> LT
+                                               EQ -> D env xs `compare` D env' xs'
+                                               GT -> GT
+    D _   []     `compare` D _    (_:_)    = LT
+    D _   (_:_)  `compare` D _    []       = GT
+
 instance Eq (DeBruijn a) => Eq (DeBruijn (Maybe a)) where
     D _   Nothing  == D _    Nothing   = True
     D env (Just x) == D env' (Just x') = D env x  == D env' x'


=====================================
compiler/GHC/HsToCore/Pmc.hs
=====================================
@@ -393,7 +393,8 @@ getNFirstUncovered mode vars n (MkNablas nablas) = go n (bagToList nablas)
     go 0 _              = pure []
     go _ []             = pure []
     go n (nabla:nablas) = do
-      front <- generateInhabitingPatterns mode vars n nabla
+      let (vars', nabla') = representIds vars nabla -- they're already there, we're just getting the e-class ids back
+      front <- generateInhabitingPatterns mode vars' n nabla'
       back <- go (n - length front) nablas
       pure (front ++ back)
 


=====================================
compiler/GHC/HsToCore/Pmc/Check.hs
=====================================
@@ -106,6 +106,8 @@ checkGrd :: PmGrd -> CheckAction RedSets
 checkGrd grd = CA $ \inc -> case grd of
   -- let x = e: Refine with x ~ e
   PmLet x e -> do
+    -- romes: we could potentially do update the trees to use e-class ids here,
+    -- or in pmcMatches
     matched <- addPhiCtNablas inc (PhiCoreCt x e)
     tracePm "check:Let" (ppr x <+> char '=' <+> ppr e)
     pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched }
@@ -182,7 +184,7 @@ checkEmptyCase pe@(PmEmptyCase { pe_var = var }) = CA $ \inc -> do
   unc <- addPhiCtNablas inc (PhiNotBotCt var)
   pure CheckResult { cr_ret = pe, cr_uncov = unc, cr_approx = mempty }
 
-checkPatBind :: (PmPatBind Pre) -> CheckAction (PmPatBind Post)
+checkPatBind :: PmPatBind Pre -> CheckAction (PmPatBind Post)
 checkPatBind = coerce checkGRHS
 
 {- Note [Checking EmptyCase]


=====================================
compiler/GHC/HsToCore/Pmc/Desugar.hs
=====================================
@@ -326,6 +326,8 @@ desugarEmptyCase :: Id -> DsM PmEmptyCase
 desugarEmptyCase var = pure PmEmptyCase { pe_var = var }
 
 -- | Desugar the non-empty 'Match'es of a 'MatchGroup'.
+--
+-- Returns a desugared guard tree of guard expressions.
 desugarMatches :: [Id] -> NonEmpty (LMatch GhcTc (LHsExpr GhcTc))
                -> DsM (PmMatchGroup Pre)
 desugarMatches vars matches =


=====================================
compiler/GHC/HsToCore/Pmc/Ppr.hs
=====================================
@@ -13,8 +13,6 @@ import GHC.Data.List.Infinite (Infinite (..))
 import qualified GHC.Data.List.Infinite as Inf
 import GHC.Types.Basic
 import GHC.Types.Id
-import GHC.Types.Var.Env
-import GHC.Types.Unique.DFM
 import GHC.Core.ConLike
 import GHC.Core.DataCon
 import GHC.Builtin.Types
@@ -27,6 +25,10 @@ import Data.List.NonEmpty (NonEmpty, nonEmpty, toList)
 
 import GHC.HsToCore.Pmc.Types
 
+import Data.Equality.Graph (ClassId)
+import Data.IntMap (IntMap)
+import qualified Data.IntMap as IM
+
 -- | Pretty-print the guts of an uncovered value vector abstraction, i.e., its
 -- components and refutable shapes associated to any mentioned variables.
 --
@@ -42,16 +44,17 @@ import GHC.HsToCore.Pmc.Types
 -- additional elements are indicated by "...".
 pprUncovered :: Nabla -> [Id] -> SDoc
 pprUncovered nabla vas
-  | isNullUDFM refuts = fsep vec -- there are no refutations
-  | otherwise         = hang (fsep vec) 4 $
-                          text "where" <+> vcat (map (pprRefutableShapes . snd) (udfmToList refuts))
+  | IM.null refuts = fsep vec -- there are no refutations
+  | otherwise      = hang (fsep vec) 4 $
+                       text "where" <+> vcat (map (pprRefutableShapes . snd) (IM.toList refuts))
   where
     init_prec
       -- No outer parentheses when it's a unary pattern by assuming lowest
       -- precedence
       | [_] <- vas   = topPrec
       | otherwise    = appPrec
-    ppr_action       = mapM (pprPmVar init_prec) vas
+    (vas',_nabla')   = representIds vas nabla
+    ppr_action       = mapM (pprPmVar init_prec) vas'
     (vec, renamings) = runPmPpr nabla ppr_action
     refuts           = prettifyRefuts nabla renamings
 
@@ -96,35 +99,37 @@ substitution to the vectors before printing them out (see function `pprOne' in
 
 -- | Extract and assigns pretty names to constraint variables with refutable
 -- shapes.
-prettifyRefuts :: Nabla -> DIdEnv (Id, SDoc) -> DIdEnv (SDoc, [PmAltCon])
-prettifyRefuts nabla = listToUDFM_Directly . map attach_refuts . udfmToList
+prettifyRefuts :: Nabla -> IntMap (ClassId, SDoc) -> IntMap (SDoc, [PmAltCon])
+prettifyRefuts nabla = IM.mapWithKey attach_refuts
   where
-    attach_refuts (u, (x, sdoc)) = (u, (sdoc, lookupRefuts nabla x))
+    -- RM: why map with key?
+    attach_refuts :: ClassId -> (ClassId, SDoc) -> (SDoc, [PmAltCon])
+    attach_refuts _u (x, sdoc) = (sdoc, lookupRefuts nabla x)
 
 
-type PmPprM a = RWS Nabla () (DIdEnv (Id, SDoc), Infinite SDoc) a
+type PmPprM a = RWS Nabla () (IntMap (ClassId, SDoc), Infinite SDoc) a
 
 -- Try nice names p,q,r,s,t before using the (ugly) t_i
 nameList :: Infinite SDoc
 nameList = map text ["p","q","r","s","t"] Inf.++ flip Inf.unfoldr (0 :: Int) (\ u -> (text ('t':show u), u+1))
 
-runPmPpr :: Nabla -> PmPprM a -> (a, DIdEnv (Id, SDoc))
-runPmPpr nabla m = case runRWS m nabla (emptyDVarEnv, nameList) of
+runPmPpr :: Nabla -> PmPprM a -> (a, IntMap (ClassId, SDoc))
+runPmPpr nabla m = case runRWS m nabla (IM.empty, nameList) of
   (a, (renamings, _), _) -> (a, renamings)
 
 -- | Allocates a new, clean name for the given 'Id' if it doesn't already have
 -- one.
-getCleanName :: Id -> PmPprM SDoc
+getCleanName :: ClassId -> PmPprM SDoc
 getCleanName x = do
   (renamings, name_supply) <- get
   let Inf clean_name name_supply' = name_supply
-  case lookupDVarEnv renamings x of
+  case IM.lookup x renamings of
     Just (_, nm) -> pure nm
     Nothing -> do
-      put (extendDVarEnv renamings x (x, clean_name), name_supply')
+      put (IM.insert x (x, clean_name) renamings, name_supply')
       pure clean_name
 
-checkRefuts :: Id -> PmPprM (Maybe SDoc) -- the clean name if it has negative info attached
+checkRefuts :: ClassId -> PmPprM (Maybe SDoc) -- the clean name if it has negative info attached
 checkRefuts x = do
   nabla <- ask
   case lookupRefuts nabla x of
@@ -134,20 +139,20 @@ checkRefuts x = do
 -- | Pretty print a variable, but remember to prettify the names of the variables
 -- that refer to neg-literals. The ones that cannot be shown are printed as
 -- underscores.
-pprPmVar :: PprPrec -> Id -> PmPprM SDoc
+pprPmVar :: PprPrec -> ClassId -> PmPprM SDoc
 pprPmVar prec x = do
   nabla <- ask
   case lookupSolution nabla x of
     Just (PACA alt _tvs args) -> pprPmAltCon prec alt args
     Nothing                   -> fromMaybe underscore <$> checkRefuts x
 
-pprPmAltCon :: PprPrec -> PmAltCon -> [Id] -> PmPprM SDoc
+pprPmAltCon :: PprPrec -> PmAltCon -> [ClassId] -> PmPprM SDoc
 pprPmAltCon _prec (PmAltLit l)      _    = pure (ppr l)
 pprPmAltCon prec  (PmAltConLike cl) args = do
   nabla <- ask
   pprConLike nabla prec cl args
 
-pprConLike :: Nabla -> PprPrec -> ConLike -> [Id] -> PmPprM SDoc
+pprConLike :: Nabla -> PprPrec -> ConLike -> [ClassId] -> PmPprM SDoc
 pprConLike nabla _prec cl args
   | Just pm_expr_list <- pmExprAsList nabla (PmAltConLike cl) args
   = case pm_expr_list of
@@ -174,8 +179,8 @@ pprConLike _nabla prec cl args
 
 -- | The result of 'pmExprAsList'.
 data PmExprList
-  = NilTerminated [Id]
-  | WcVarTerminated (NonEmpty Id) Id
+  = NilTerminated [ClassId]
+  | WcVarTerminated (NonEmpty ClassId) ClassId
 
 -- | Extract a list of 'Id's out of a sequence of cons cells, optionally
 -- terminated by a wildcard variable instead of @[]@. Some examples:
@@ -186,7 +191,7 @@ data PmExprList
 --   ending in a wildcard variable x (of list type). Should be pretty-printed as
 --   (1:2:_).
 -- * @pmExprAsList [] == Just ('NilTerminated' [])@
-pmExprAsList :: Nabla -> PmAltCon -> [Id] -> Maybe PmExprList
+pmExprAsList :: Nabla -> PmAltCon -> [ClassId] -> Maybe PmExprList
 pmExprAsList nabla = go_con []
   where
     go_var rev_pref x


=====================================
compiler/GHC/HsToCore/Pmc/Solver.hs
=====================================
@@ -49,18 +49,17 @@ import GHC.Data.Bag
 import GHC.Types.CompleteMatch
 import GHC.Types.Unique.Set
 import GHC.Types.Unique.DSet
-import GHC.Types.Unique.SDFM
 import GHC.Types.Id
 import GHC.Types.Name
 import GHC.Types.Var      (EvVar)
 import GHC.Types.Var.Env
-import GHC.Types.Var.Set
 import GHC.Types.Unique.Supply
 
 import GHC.Core
 import GHC.Core.FVs         (exprFreeVars)
 import GHC.Core.TyCo.Compare( eqType )
-import GHC.Core.Map.Expr
+import GHC.Core.Map.Type
+import GHC.Core.Equality
 import GHC.Core.Predicate (typeDeterminesValue)
 import GHC.Core.SimpleOpt (simpleOptExpr, exprIsConApp_maybe)
 import GHC.Core.Utils     (exprType)
@@ -99,6 +98,15 @@ import Data.List     (sortBy, find)
 import qualified Data.List.NonEmpty as NE
 import Data.Ord      (comparing)
 
+import Data.Equality.Graph (ClassId)
+import Data.Equality.Graph.Lens
+import qualified Data.Equality.Graph as EG
+import Data.Bifunctor (second)
+import Data.Function ((&))
+import qualified Data.IntSet as IS
+import Data.Tuple (swap)
+import Data.Traversable (mapAccumL)
+
 --
 -- * Main exports
 --
@@ -556,6 +564,9 @@ where you can find the solution in a perhaps more digestible format.
 
 -- | A high-level pattern-match constraint. Corresponds to φ from Figure 3 of
 -- the LYG paper.
+-- ROMES:TODO: Ultimately, all these Ids could be replaced by e-class ids which
+-- are generated during desugaring, but there are some details to it
+-- (propagating the e-graphs in which these e-classes were created)
 data PhiCt
   = PhiTyCt !PredType
   -- ^ A type constraint "T ~ U".
@@ -662,74 +673,83 @@ nameTyCt pred_ty = do
 -- 'addTyCts' before, through 'addPhiCts'.
 addPhiTmCt :: Nabla -> PhiCt -> MaybeT DsM Nabla
 addPhiTmCt _     (PhiTyCt ct)              = pprPanic "addPhiCt:TyCt" (ppr ct) -- See the precondition
-addPhiTmCt nabla (PhiCoreCt x e)           = addCoreCt nabla x e
+addPhiTmCt nabla (PhiCoreCt x e)           = let (xid, nabla') = representId x nabla
+                                              in addCoreCt nabla' xid e
 addPhiTmCt nabla (PhiConCt x con tvs dicts args) = do
   -- Case (1) of Note [Strict fields and variables of unlifted type]
   -- PhiConCt correspond to the higher-level φ constraints from the paper with
   -- bindings semantics. It disperses into lower-level δ constraints that the
   -- 'add*Ct' functions correspond to.
-  nabla' <- addTyCts nabla (listToBag dicts)
-  nabla'' <- addConCt nabla' x con tvs args
-  foldlM addNotBotCt nabla'' (filterUnliftedFields con args)
-addPhiTmCt nabla (PhiNotConCt x con)       = addNotConCt nabla x con
-addPhiTmCt nabla (PhiBotCt x)              = addBotCt nabla x
-addPhiTmCt nabla (PhiNotBotCt x)           = addNotBotCt nabla x
-
-filterUnliftedFields :: PmAltCon -> [Id] -> [Id]
+  nabla1 <- addTyCts nabla (listToBag dicts)
+  let (xid, nabla2) = representId x nabla1
+  let (args_ids, nabla3) = representIds args nabla2
+  -- romes: here we could have something like (merge (add K arg_ids) x)
+  -- or actually that should be done by addConCt?
+  nabla4 <- addConCt nabla3 xid con tvs args_ids
+  foldlM addNotBotCt nabla4 (filterUnliftedFields con (zip args_ids args))
+addPhiTmCt nabla (PhiNotConCt x con)       = let (xid, nabla') = representId x nabla
+                                              in addNotConCt nabla' xid con
+addPhiTmCt nabla (PhiBotCt x)              = let (xid, nabla') = representId x nabla
+                                              in addBotCt nabla' xid
+addPhiTmCt nabla (PhiNotBotCt x)           = let (xid, nabla') = representId x nabla
+                                              in addNotBotCt nabla' xid
+
+filterUnliftedFields :: PmAltCon -> [(ClassId,Id)] -> [ClassId]
 filterUnliftedFields con args =
-  [ arg | (arg, bang) <- zipEqual "addPhiCt" args (pmAltConImplBangs con)
-        , isBanged bang || definitelyUnliftedType (idType arg) ]
+  [ arg_id | ((arg_id,arg), bang) <- zipEqual "addPhiCt" args (pmAltConImplBangs con)
+           , isBanged bang || definitelyUnliftedType (idType arg) ]
 
 -- | Adds the constraint @x ~ ⊥@, e.g. that evaluation of a particular 'Id' @x@
 -- surely diverges. Quite similar to 'addConCt', only that it only cares about
 -- ⊥.
-addBotCt :: Nabla -> Id -> MaybeT DsM Nabla
-addBotCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts=env } } x = do
-  let (y, vi at VI { vi_bot = bot }) = lookupVarInfoNT (nabla_tm_st nabla) x
-  case bot of
-    IsNotBot -> mzero      -- There was x ≁ ⊥. Contradiction!
-    IsBot    -> pure nabla -- There already is x ~ ⊥. Nothing left to do
-    MaybeBot               -- We add x ~ ⊥
-      | definitelyUnliftedType (idType x)
-      -- Case (3) in Note [Strict fields and variables of unlifted type]
-      -> mzero -- unlifted vars can never be ⊥
-      | otherwise
-      -> do
-          let vi' = vi{ vi_bot = IsBot }
-          pure nabla{ nabla_tm_st = ts{ts_facts = addToUSDFM env y vi' } }
+addBotCt :: Nabla -> ClassId -> MaybeT DsM Nabla
+addBotCt nabla x = updateVarInfo x go nabla
+  where
+    go :: VarInfo -> MaybeT DsM VarInfo
+    go vi at VI { vi_bot = bot }
+      = case bot of
+          IsNotBot -> mzero      -- There was x ≁ ⊥. Contradiction!
+          IsBot    -> return vi  -- There already is x ~ ⊥. Nothing left to do
+          MaybeBot               -- We add x ~ ⊥
+            | definitelyUnliftedType (eclassType x nabla)
+            -- Case (3) in Note [Strict fields and variables of unlifted type]
+            -> mzero -- unlifted vars can never be ⊥
+            | otherwise
+            -> do
+              return vi{ vi_bot = IsBot }
 
 -- | Adds the constraint @x ~/ ⊥@ to 'Nabla'. Quite similar to 'addNotConCt',
 -- but only cares for the ⊥ "constructor".
-addNotBotCt :: Nabla -> Id -> MaybeT DsM Nabla
+addNotBotCt :: Nabla -> ClassId -> MaybeT DsM Nabla
 addNotBotCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ts_facts=env} } x = do
-  let (y, vi at VI { vi_bot = bot }) = lookupVarInfoNT (nabla_tm_st nabla) x
+  let (yid, vi at VI { vi_bot = bot }) = lookupVarInfoNT ts x
   case bot of
     IsBot    -> mzero      -- There was x ~ ⊥. Contradiction!
     IsNotBot -> pure nabla -- There already is x ≁ ⊥. Nothing left to do
     MaybeBot -> do         -- We add x ≁ ⊥ and test if x is still inhabited
       -- Mark dirty for a delayed inhabitation test
       let vi' = vi{ vi_bot = IsNotBot}
-      pure $ markDirty y
-           $ nabla{ nabla_tm_st = ts{ ts_facts = addToUSDFM env y vi' } }
+      pure $ markDirty yid
+           $ nabla{nabla_tm_st = ts{ ts_facts = env & _class yid . _data .~ vi'}}
 
 -- | Record a @x ~/ K@ constraint, e.g. that a particular 'Id' @x@ can't
 -- take the shape of a 'PmAltCon' @K@ in the 'Nabla' and return @Nothing@ if
 -- that leads to a contradiction.
 -- See Note [TmState invariants].
-addNotConCt :: Nabla -> Id -> PmAltCon -> MaybeT DsM Nabla
+addNotConCt :: Nabla -> ClassId -> PmAltCon -> MaybeT DsM Nabla
 addNotConCt _     _ (PmAltConLike (RealDataCon dc))
   | isNewDataCon dc = mzero -- (3) in Note [Coverage checking Newtype matches]
 addNotConCt nabla x nalt = do
   (mb_mark_dirty, nabla') <- trvVarInfo go nabla x
   pure $ case mb_mark_dirty of
-    Just x  -> markDirty x nabla'
-    Nothing -> nabla'
+    True  -> markDirty x nabla'
+    False -> nabla'
   where
     -- Update `x`'s 'VarInfo' entry. Fail ('MaybeT') if contradiction,
     -- otherwise return updated entry and `Just x'` if `x` should be marked dirty,
     -- where `x'` is the representative of `x`.
-    go :: VarInfo -> MaybeT DsM (Maybe Id, VarInfo)
-    go vi@(VI x' pos neg _ rcm) = do
+    go :: VarInfo -> MaybeT DsM (Bool, VarInfo)
+    go vi@(VI _x' pos neg _ rcm) = do
       -- 1. Bail out quickly when nalt contradicts a solution
       let contradicts nalt sol = eqPmAltCon (paca_con sol) nalt == Equal
       guard (not (any (contradicts nalt) pos))
@@ -748,12 +768,12 @@ addNotConCt nabla x nalt = do
       pure $ case mb_rcm' of
         -- If nalt could be removed from a COMPLETE set, we'll get back Just and
         -- have to mark x dirty, by returning Just x'.
-        Just rcm' -> (Just x',  vi'{ vi_rcm = rcm' })
+        Just rcm' -> (True, vi'{ vi_rcm = rcm' })
         -- Otherwise, nalt didn't occur in any residual COMPLETE set and we
         -- don't have to mark it dirty. So we return Nothing, which in the case
         -- above would have compromised precision.
         -- See Note [Shortcutting the inhabitation test], grep for T17836.
-        Nothing   -> (Nothing, vi')
+        Nothing   -> (False, vi')
 
 hasRequiredTheta :: PmAltCon -> Bool
 hasRequiredTheta (PmAltConLike cl) = notNull req_theta
@@ -767,8 +787,9 @@ hasRequiredTheta _                 = False
 -- have on @x@, reject (@Nothing@) otherwise.
 --
 -- See Note [TmState invariants].
-addConCt :: Nabla -> Id -> PmAltCon -> [TyVar] -> [Id] -> MaybeT DsM Nabla
-addConCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts=env } } x alt tvs args = do
+addConCt :: Nabla -> ClassId -> PmAltCon -> [TyVar] -> [ClassId] -> MaybeT DsM Nabla
+addConCt nabla at MkNabla{ nabla_tm_st = ts } x alt tvs args = do
+  -- ROMES:TODO: Also looks like a function on varinfo (adjust)
   let vi@(VI _ pos neg bot _) = lookupVarInfo ts x
   -- First try to refute with a negative fact
   guard (not (elemPmAltConSet alt neg))
@@ -788,7 +809,7 @@ addConCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts=env } } x alt tvs args =
     Nothing -> do
       let pos' = PACA alt tvs args : pos
       let nabla_with bot' =
-            nabla{ nabla_tm_st = ts{ts_facts = addToUSDFM env x (vi{vi_pos = pos', vi_bot = bot'})} }
+            nabla{nabla_tm_st = ts{ts_facts = ts_facts ts & _class x ._data .~ vi{vi_pos = pos', vi_bot = bot'}}}
       -- Do (2) in Note [Coverage checking Newtype matches]
       case (alt, args) of
         (PmAltConLike (RealDataCon dc), [y]) | isNewDataCon dc ->
@@ -816,12 +837,15 @@ equateTys ts us =
 -- @nabla@ has integrated the knowledge from the equality constraint.
 --
 -- See Note [TmState invariants].
-addVarCt :: Nabla -> Id -> Id -> MaybeT DsM Nabla
+addVarCt :: Nabla -> ClassId -> ClassId -> MaybeT DsM Nabla
+-- This is where equality-graphs really come into play.
 addVarCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = env } } x y =
-  case equateUSDFM env x y of
-    (Nothing,   env') -> pure (nabla{ nabla_tm_st = ts{ ts_facts = env' } })
+  -- ROMES:TODO: equate auxiliary var that finds both vars, and lookups up the domain associated. However, I think we no longer should have Just/Nothing but rather always store emptyVarInfo for new e-nodes
+  -- equate should also update e-graph, basically re-implement "equateUSDFM" in terms of the e-graph, or inline it or so
+  case equate env x y of
     -- Add the constraints we had for x to y
-    (Just vi_x, env') -> do
+    -- See Note [Joining e-classes PMC] todo mention from joinA
+    (vi_x, env') -> do
       let nabla_equated = nabla{ nabla_tm_st = ts{ts_facts = env'} }
       -- and then gradually merge every positive fact we have on x into y
       let add_pos nabla (PACA cl tvs args) = addConCt nabla y cl tvs args
@@ -829,6 +853,22 @@ addVarCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = env } } x y =
       -- Do the same for negative info
       let add_neg nabla nalt = addNotConCt nabla y nalt
       foldlM add_neg nabla_pos (pmAltConSetElems (vi_neg vi_x))
+  where
+    -- @equate env x y@ makes @x@ and @y@ point to the same entry,
+    -- thereby merging @x@'s class with @y@'s.
+    -- If both @x@ and @y@ are in the domain of the map, then @y@'s entry will be
+    -- chosen as the new entry and @x@'s old entry will be returned.
+    --
+    -- Examples in terms of the model (see 'UniqSDFM'):
+    -- >>> equate [] u1 u2 == (Nothing, [({u1,u2}, Nothing)])
+    -- >>> equate [({u1,u3}, Just ele1)] u3 u4 == (Nothing, [({u1,u3,u4}, Just ele1)])
+    -- >>> equate [({u1,u3}, Just ele1)] u4 u3 == (Nothing, [({u1,u3,u4}, Just ele1)])
+    -- >>> equate [({u1,u3}, Just ele1), ({u2}, Just ele2)] u3 u2 == (Just ele1, [({u2,u1,u3}, Just ele2)])
+    equate :: TmEGraph -> ClassId -> ClassId -> (VarInfo, TmEGraph)
+    equate eg x y = let (_, eg')  = EG.merge x y eg
+                     in (eg  ^. _class x ._data, eg')
+                    -- Note: lookup in @eg@, not @eg'@, because it's before the merge.
+
 
 -- | Inspects a 'PmCoreCt' @let x = e@ by recording constraints for @x@ based
 -- on the shape of the 'CoreExpr' @e at . Examples:
@@ -842,7 +882,7 @@ addVarCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = env } } x y =
 --     for other literals. See 'coreExprAsPmLit'.
 --   * Finally, if we have @let x = e@ and we already have seen @let y = e@, we
 --     want to record @x ~ y at .
-addCoreCt :: Nabla -> Id -> CoreExpr -> MaybeT DsM Nabla
+addCoreCt :: Nabla -> ClassId -> CoreExpr -> MaybeT DsM Nabla
 addCoreCt nabla x e = do
   simpl_opts <- initSimpleOpts <$> getDynFlags
   let e' = simpleOptExpr simpl_opts e
@@ -851,8 +891,9 @@ addCoreCt nabla x e = do
   where
     -- Takes apart a 'CoreExpr' and tries to extract as much information about
     -- literals and constructor applications as possible.
-    core_expr :: Id -> CoreExpr -> StateT Nabla (MaybeT DsM) ()
+    core_expr :: ClassId -> CoreExpr -> StateT Nabla (MaybeT DsM) ()
     -- TODO: Handle newtypes properly, by wrapping the expression in a DataCon
+    -- RM: Could this be done better with e-graphs? The whole newtype stuff
     -- This is the right thing for casts involving data family instances and
     -- their representation TyCon, though (which are not visible in source
     -- syntax). See Note [COMPLETE sets on data families]
@@ -874,9 +915,10 @@ addCoreCt nabla x e = do
             <- exprIsConApp_maybe in_scope_env e
       = data_con_app x in_scope dc args
       -- See Note [Detecting pattern synonym applications in expressions]
-      | Var y <- e, Nothing <- isDataConId_maybe x
+      | Var y <- e, Nothing <- isDataConId_maybe (eclassMatchId x nabla) -- RM:TODO: can we lookup in this nabla or should we get the one from the StateT somehow?
       -- We don't consider DataCons flexible variables
-      = modifyT (\nabla -> addVarCt nabla x y)
+      = modifyT (\nabla -> let (yid, nabla') = representId y nabla
+                            in addVarCt nabla' x yid)
       | otherwise
       -- Any other expression. Try to find other uses of a semantically
       -- equivalent expression and represent them by the same variable!
@@ -894,17 +936,21 @@ addCoreCt nabla x e = do
     -- see if we already encountered a constraint @let y = e'@ with @e'@
     -- semantically equivalent to @e@, in which case we may add the constraint
     -- @x ~ y at .
-    equate_with_similar_expr :: Id -> CoreExpr -> StateT Nabla (MaybeT DsM) ()
-    equate_with_similar_expr x e = do
-      rep <- StateT $ \nabla -> lift (representCoreExpr nabla e)
+    equate_with_similar_expr :: ClassId -> CoreExpr -> StateT Nabla (MaybeT DsM) ()
+    equate_with_similar_expr _x e = do
+      rep <- StateT $ \nabla -> pure (representCoreExpr nabla e)
       -- Note that @rep == x@ if we encountered @e@ for the first time.
+
+      -- ROMES:TODO: I don't think we need to do the following anymore, represent should directly do so in the right e-class (if rebuilt)
       modifyT (\nabla -> addVarCt nabla x rep)
+      -- ROMES:TODO: When to rebuild?
 
-    bind_expr :: CoreExpr -> StateT Nabla (MaybeT DsM) Id
+    bind_expr :: CoreExpr -> StateT Nabla (MaybeT DsM) ClassId
     bind_expr e = do
       x <- lift (lift (mkPmId (exprType e)))
-      core_expr x e
-      pure x
+      xid <- StateT $ \nabla -> pure $ representId x nabla
+      core_expr xid e
+      pure xid
 
     -- Look at @let x = K taus theta es@ and generate the following
     -- constraints (assuming universals were dropped from @taus@ before):
@@ -913,7 +959,7 @@ addCoreCt nabla x e = do
     --   3. @y_1 ~ e_1, ..., y_m ~ e_m@ for fresh @y_i@
     --   4. @x ~ K as ys@
     -- This is quite similar to PmCheck.pmConCts.
-    data_con_app :: Id -> InScopeSet -> DataCon -> [CoreExpr] -> StateT Nabla (MaybeT DsM) ()
+    data_con_app :: ClassId -> InScopeSet -> DataCon -> [CoreExpr] -> StateT Nabla (MaybeT DsM) ()
     data_con_app x in_scope dc args = do
       let dc_ex_tvs              = dataConExTyCoVars dc
           arty                   = dataConSourceArity dc
@@ -936,13 +982,13 @@ addCoreCt nabla x e = do
     -- Adds a literal constraint, i.e. @x ~ 42 at .
     -- Also we assume that literal expressions won't diverge, so this
     -- will add a @x ~/ ⊥@ constraint.
-    pm_lit :: Id -> PmLit -> StateT Nabla (MaybeT DsM) ()
+    pm_lit :: ClassId -> PmLit -> StateT Nabla (MaybeT DsM) ()
     pm_lit x lit = do
       modifyT $ \nabla -> addNotBotCt nabla x
       pm_alt_con_app x (PmAltLit lit) [] []
 
     -- Adds the given constructor application as a solution for @x at .
-    pm_alt_con_app :: Id -> PmAltCon -> [TyVar] -> [Id] -> StateT Nabla (MaybeT DsM) ()
+    pm_alt_con_app :: ClassId -> PmAltCon -> [TyVar] -> [ClassId] -> StateT Nabla (MaybeT DsM) ()
     pm_alt_con_app x con tvs args = modifyT $ \nabla -> addConCt nabla x con tvs args
 
 -- | Like 'modify', but with an effectful modifier action
@@ -953,24 +999,18 @@ modifyT f = StateT $ fmap ((,) ()) . f
 -- Which is the @x@ of a @let x = e'@ constraint (with @e@ semantically
 -- equivalent to @e'@) we encountered earlier, or a fresh identifier if
 -- there weren't any such constraints.
-representCoreExpr :: Nabla -> CoreExpr -> DsM (Id, Nabla)
-representCoreExpr nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_reps = reps } } e
-  | Just rep <- lookupCoreMap reps key = pure (rep, nabla)
-  | otherwise = do
-      rep <- mkPmId (exprType e)
-      let reps'  = extendCoreMap reps key rep
-      let nabla' = nabla{ nabla_tm_st = ts{ ts_reps = reps' } }
-      pure (rep, nabla')
-  where
-    key = makeDictsCoherent e
-      -- Use a key in which dictionaries for the same type become equal.
-      -- See Note [Unique dictionaries in the TmOracle CoreMap]
+representCoreExpr :: Nabla -> CoreExpr -> (ClassId, Nabla)
+representCoreExpr nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = egraph } } e =
+  second (\g -> nabla{nabla_tm_st = ts{ts_facts = g}}) $ representDBCoreExpr (deBruijnize (makeDictsCoherent e)) egraph
+  -- Use a key in which dictionaries for the same type become equal.
+  -- See Note [Unique dictionaries in the TmOracle CoreMap]
 
 -- | Change out 'Id's which are uniquely determined by their type to a
 -- common value, so that different names for dictionaries of the same type
 -- are considered equal when building a 'CoreMap'.
 --
 -- See Note [Unique dictionaries in the TmOracle CoreMap]
+-- ROMES:TODO: I suppose this should be taken into account by the Eq instance of DeBruijnF CoreExprF -- if we do that there then we're sure that EG.represent takes that into account.
 makeDictsCoherent :: CoreExpr -> CoreExpr
 makeDictsCoherent var@(Var v)
   | let ty = idType v
@@ -1059,6 +1099,7 @@ In the end, replacing dictionaries with an error value in the pattern-match
 checker was the most self-contained, although we might want to revisit once
 we implement a more robust approach to computing equality in the pattern-match
 checker (see #19272).
+ROMES:TODO: I don't think e-graphs avoid this situation, because the names of the binders will still differ (although the Eq instance could take this into account?)
 -}
 
 {- Note [The Pos/Neg invariant]
@@ -1271,22 +1312,24 @@ tyStateRefined :: TyState -> TyState -> Bool
 -- refinement of b or vice versa!
 tyStateRefined a b = ty_st_n a /= ty_st_n b
 
-markDirty :: Id -> Nabla -> Nabla
+markDirty :: ClassId -> Nabla -> Nabla
 markDirty x nabla at MkNabla{nabla_tm_st = ts at TmSt{ts_dirty = dirty} } =
-  nabla{ nabla_tm_st = ts{ ts_dirty = extendDVarSet dirty x } }
+  nabla{nabla_tm_st = ts{ ts_dirty = IS.insert x dirty }}
 
-traverseDirty :: Monad m => (VarInfo -> m VarInfo) -> TmState -> m TmState
+traverseDirty :: Monad m => (ClassId -> VarInfo -> m VarInfo) -> TmState -> m TmState
 traverseDirty f ts at TmSt{ts_facts = env, ts_dirty = dirty} =
-  go (uniqDSetToList dirty) env
+
+  go (IS.elems dirty) env
   where
     go []     env  = pure ts{ts_facts=env}
-    go (x:xs) !env = do
-      vi' <- f (lookupVarInfo ts x)
-      go xs (addToUSDFM env x vi')
+    go (x:xs) !_env = do
+      let vi = env ^._class x._data
+      vi' <- f x vi
+      go xs (env & _class x._data .~ vi') -- Use 'over' or so instead?
 
-traverseAll :: Monad m => (VarInfo -> m VarInfo) -> TmState -> m TmState
+traverseAll :: Monad m => (ClassId -> VarInfo -> m VarInfo) -> TmState -> m TmState
 traverseAll f ts at TmSt{ts_facts = env} = do
-  env' <- traverseUSDFM f env
+  env' <- (_iclasses.(\fab (i,cl) -> let mvi = fab (i,cl^._data) in (cl &) . (_data .~) <$> mvi)) (uncurry f) env
   pure ts{ts_facts = env'}
 
 -- | Makes sure the given 'Nabla' is still inhabited, by trying to instantiate
@@ -1308,31 +1351,34 @@ inhabitationTest fuel  old_ty_st nabla at MkNabla{ nabla_tm_st = ts } = {-# SCC "in
   ts' <- if tyStateRefined old_ty_st (nabla_ty_st nabla)
             then traverseAll   test_one ts
             else traverseDirty test_one ts
-  pure nabla{ nabla_tm_st = ts'{ts_dirty=emptyDVarSet}}
+  pure nabla{ nabla_tm_st = ts'{ts_dirty=IS.empty}}
   where
-    nabla_not_dirty = nabla{ nabla_tm_st = ts{ts_dirty=emptyDVarSet} }
-    test_one :: VarInfo -> MaybeT DsM VarInfo
-    test_one vi =
-      lift (varNeedsTesting old_ty_st nabla vi) >>= \case
+    nabla_not_dirty = nabla{ nabla_tm_st = ts{ts_dirty=IS.empty} }
+    test_one :: ClassId -> VarInfo -> MaybeT DsM VarInfo
+    test_one cid vi =
+      lift (varNeedsTesting old_ty_st nabla cid vi) >>= \case
         True -> do
           -- lift $ tracePm "test_one" (ppr vi)
           -- No solution yet and needs testing
           -- We have to test with a Nabla where all dirty bits are cleared
-          instantiate (fuel-1) nabla_not_dirty vi
-        _ -> pure vi
+          instantiate (fuel-1) nabla_not_dirty (cid,vi)
+        _ -> return vi
+
+-- ROMES:TODO: The dirty shortcutting bit seems like the bookeeping on nodes to
+-- upward merge, perhaps we can rid of it too
 
 -- | Checks whether the given 'VarInfo' needs to be tested for inhabitants.
 -- Returns `False` when we can skip the inhabitation test, presuming it would
 -- say "yes" anyway. See Note [Shortcutting the inhabitation test].
-varNeedsTesting :: TyState -> Nabla -> VarInfo -> DsM Bool
-varNeedsTesting _         MkNabla{nabla_tm_st=tm_st}     vi
-  | elemDVarSet (vi_id vi) (ts_dirty tm_st) = pure True
-varNeedsTesting _         _                              vi
+varNeedsTesting :: TyState -> Nabla -> ClassId -> VarInfo -> DsM Bool
+varNeedsTesting _         MkNabla{nabla_tm_st=tm_st}     cid _
+  | IS.member cid (ts_dirty tm_st) = pure True
+varNeedsTesting _         _                              _ vi
   | notNull (vi_pos vi)                     = pure False
-varNeedsTesting old_ty_st MkNabla{nabla_ty_st=new_ty_st} _
+varNeedsTesting old_ty_st MkNabla{nabla_ty_st=new_ty_st} _ _
   -- Same type state => still inhabited
   | not (tyStateRefined old_ty_st new_ty_st) = pure False
-varNeedsTesting old_ty_st MkNabla{nabla_ty_st=new_ty_st} vi = do
+varNeedsTesting old_ty_st MkNabla{nabla_ty_st=new_ty_st} _ vi = do
   -- These normalisations are relatively expensive, but still better than having
   -- to perform a full inhabitation test
   (_, _, old_norm_ty) <- tntrGuts <$> pmTopNormaliseType old_ty_st (idType $ vi_id vi)
@@ -1349,25 +1395,25 @@ varNeedsTesting old_ty_st MkNabla{nabla_ty_st=new_ty_st} vi = do
 -- NB: Does /not/ filter each CompleteMatch with the oracle; members may
 --     remain that do not satisfy it.  This lazy approach just
 --     avoids doing unnecessary work.
-instantiate :: Int -> Nabla -> VarInfo -> MaybeT DsM VarInfo
-instantiate fuel nabla vi = {-# SCC "instantiate" #-}
-  (instBot fuel nabla vi <|> instCompleteSets fuel nabla vi)
+instantiate :: Int -> Nabla -> (ClassId, VarInfo) -> MaybeT DsM VarInfo
+instantiate fuel nabla (ci,vi) = {-# SCC "instantiate" #-}
+  (instBot fuel nabla (ci,vi) <|> instCompleteSets fuel nabla ci)
 
 -- | The \(⊢_{Bot}\) rule from the paper
-instBot :: Int -> Nabla -> VarInfo -> MaybeT DsM VarInfo
-instBot _fuel nabla vi = {-# SCC "instBot" #-} do
-  _nabla' <- addBotCt nabla (vi_id vi)
+instBot :: Int -> Nabla -> (ClassId,VarInfo) -> MaybeT DsM VarInfo
+instBot _fuel nabla (cid,vi) = {-# SCC "instBot" #-} do
+  _nabla' <- addBotCt nabla cid
   pure vi
 
-addNormalisedTypeMatches :: Nabla -> Id -> DsM (ResidualCompleteMatches, Nabla)
-addNormalisedTypeMatches nabla at MkNabla{ nabla_ty_st = ty_st } x
-  = trvVarInfo add_matches nabla x
+addNormalisedTypeMatches :: Nabla -> ClassId -> DsM (ResidualCompleteMatches, Nabla)
+addNormalisedTypeMatches nabla at MkNabla{ nabla_ty_st = ty_st } xid
+  = trvVarInfo add_matches nabla xid
   where
     add_matches vi at VI{ vi_rcm = rcm }
       -- important common case, shaving down allocations of PmSeriesG by -5%
       | isRcmInitialised rcm = pure (rcm, vi)
     add_matches vi at VI{ vi_rcm = rcm } = do
-      norm_res_ty <- normaliseSourceTypeWHNF ty_st (idType x)
+      norm_res_ty <- normaliseSourceTypeWHNF ty_st (eclassType xid nabla)
       env <- dsGetFamInstEnvs
       rcm' <- case splitReprTyConApp_maybe env norm_res_ty of
         Just (rep_tc, _args, _co)  -> addTyConMatches rep_tc rcm
@@ -1388,12 +1434,11 @@ splitReprTyConApp_maybe env ty =
 -- inhabitant, the whole thing is uninhabited. It returns the updated 'VarInfo'
 -- where all the attempted ConLike instantiations have been purged from the
 -- 'ResidualCompleteMatches', which functions as a cache.
-instCompleteSets :: Int -> Nabla -> VarInfo -> MaybeT DsM VarInfo
-instCompleteSets fuel nabla vi = {-# SCC "instCompleteSets" #-} do
-  let x = vi_id vi
-  (rcm, nabla) <- lift (addNormalisedTypeMatches nabla x)
-  nabla <- foldM (\nabla cls -> instCompleteSet fuel nabla x cls) nabla (getRcm rcm)
-  pure (lookupVarInfo (nabla_tm_st nabla) x)
+instCompleteSets :: Int -> Nabla -> ClassId -> MaybeT DsM VarInfo
+instCompleteSets fuel nabla cid = {-# SCC "instCompleteSets" #-} do
+  (rcm, nabla) <- lift (addNormalisedTypeMatches nabla cid)
+  nabla <- foldM (\nabla cls -> instCompleteSet fuel nabla cid cls) nabla (getRcm rcm)
+  pure (lookupVarInfo (nabla_tm_st nabla) cid)
 
 anyConLikeSolution :: (ConLike -> Bool) -> [PmAltConApp] -> Bool
 anyConLikeSolution p = any (go . paca_con)
@@ -1411,18 +1456,19 @@ anyConLikeSolution p = any (go . paca_con)
 -- original Nabla, not a proper refinement! No positive information will be
 -- added, only negative information from failed instantiation attempts,
 -- entirely as an optimisation.
-instCompleteSet :: Int -> Nabla -> Id -> CompleteMatch -> MaybeT DsM Nabla
-instCompleteSet fuel nabla x cs
-  | anyConLikeSolution (`elementOfUniqDSet` (cmConLikes cs)) (vi_pos vi)
+instCompleteSet :: Int -> Nabla -> ClassId -> CompleteMatch -> MaybeT DsM Nabla
+instCompleteSet fuel nabla xid cs
+  | anyConLikeSolution (`elementOfUniqDSet` cmConLikes cs) (vi_pos vi)
   -- No need to instantiate a constructor of this COMPLETE set if we already
   -- have a solution!
   = pure nabla
-  | not (completeMatchAppliesAtType (varType x) cs)
+  | not (completeMatchAppliesAtType (eclassType xid nabla) cs)
   = pure nabla
   | otherwise
   = {-# SCC "instCompleteSet" #-} go nabla (sorted_candidates cs)
   where
-    vi = lookupVarInfo (nabla_tm_st nabla) x
+    vi = lookupVarInfo (nabla_tm_st nabla) xid
+    x = vi_id vi
 
     sorted_candidates :: CompleteMatch -> [ConLike]
     sorted_candidates cm
@@ -1443,12 +1489,11 @@ instCompleteSet fuel nabla x cs
       | isDataConTriviallyInhabited dc
       = pure nabla
     go nabla (con:cons) = do
-      let x = vi_id vi
       let recur_not_con = do
-            nabla' <- addNotConCt nabla x (PmAltConLike con)
+            nabla' <- addNotConCt nabla xid (PmAltConLike con)
             go nabla' cons
       (nabla <$ instCon fuel nabla x con) -- return the original nabla, not the
-                                          -- refined one!
+                                                -- refined one!
             <|> recur_not_con -- Assume that x can't be con. Encode that fact
                               -- with addNotConCt and recur.
 
@@ -1532,6 +1577,7 @@ instCon fuel nabla at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} Ma
       -- (4) Instantiate fresh term variables as arguments to the constructor
       let field_tys' = substTys sigma_ex $ map scaledThing field_tys
       arg_ids <- mapM mkPmId field_tys'
+      let (nabla', arg_class_ids) = mapAccumL (\nab id -> swap $ representId id nab) nabla arg_ids
       tracePm (hdr "(cts)") $ vcat
         [ ppr x <+> dcolon <+> ppr match_ty
         , text "In WHNF:" <+> ppr (isSourceTypeInWHNF match_ty) <+> ppr norm_match_ty
@@ -1544,10 +1590,10 @@ instCon fuel nabla at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} Ma
       runMaybeT $ do
         -- Case (2) of Note [Strict fields and variables of unlifted type]
         let alt = PmAltConLike con
-        let branching_factor = length $ filterUnliftedFields alt arg_ids
+        let branching_factor = length $ filterUnliftedFields alt (zip arg_class_ids arg_ids)
         let ct = PhiConCt x alt ex_tvs gammas arg_ids
         nabla1 <- traceWhenFailPm (hdr "(ct unsatisfiable) }") (ppr ct) $
-                  addPhiTmCt nabla ct
+                  addPhiTmCt nabla' ct
         -- See Note [Fuel for the inhabitation test]
         let new_fuel
               | branching_factor <= 1 = fuel
@@ -1564,13 +1610,13 @@ instCon fuel nabla at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} Ma
           , ppr new_fuel
           ]
         nabla2 <- traceWhenFailPm (hdr "(inh test failed) }") (ppr nabla1) $
-                  inhabitationTest new_fuel (nabla_ty_st nabla) nabla1
+                  inhabitationTest new_fuel (nabla_ty_st nabla') nabla1
         lift $ tracePm (hdr "(inh test succeeded) }") (ppr nabla2)
         pure nabla2
     Nothing -> do
       tracePm (hdr "(match_ty not instance of res_ty) }") empty
       pure (Just nabla) -- Matching against match_ty failed. Inhabited!
-                        -- See Note [Instantiating a ConLike].
+                         -- See Note [Instantiating a ConLike].
 
 -- | @matchConLikeResTy _ _ ty K@ tries to match @ty@ against the result
 -- type of @K@, @res_ty at . It returns a substitution @s@ for @K@'s universal
@@ -1905,13 +1951,15 @@ instance Outputable GenerateInhabitingPatternsMode where
 -- perhaps empty) refinements of @nabla@ that represent inhabited patterns.
 -- Negative information is only retained if literals are involved or for
 -- recursive GADTs.
-generateInhabitingPatterns :: GenerateInhabitingPatternsMode -> [Id] -> Int -> Nabla -> DsM [Nabla]
+--
+-- The list of 'Id's @vs@ is the list of match-ids ? and they have all already been represented in the e-graph, we just represent them again to re-gain class id information
+generateInhabitingPatterns :: GenerateInhabitingPatternsMode -> [ClassId] -> Int -> Nabla -> DsM [Nabla]
 -- See Note [Why inhabitationTest doesn't call generateInhabitingPatterns]
 generateInhabitingPatterns _    _      0 _     = pure []
 generateInhabitingPatterns _    []     _ nabla = pure [nabla]
-generateInhabitingPatterns mode (x:xs) n nabla = do
+generateInhabitingPatterns mode (x:xs) n nabla at MkNabla{nabla_tm_st=ts} = do
   tracePm "generateInhabitingPatterns" (ppr mode <+> ppr n <+> ppr (x:xs) $$ ppr nabla)
-  let VI _ pos neg _ _ = lookupVarInfo (nabla_tm_st nabla) x
+  let VI _ pos neg _ _ = lookupVarInfo ts x
   case pos of
     _:_ -> do
       -- Example for multiple solutions (must involve a PatSyn):
@@ -1941,15 +1989,15 @@ generateInhabitingPatterns mode (x:xs) n nabla = do
     -- Tries to instantiate a variable by possibly following the chain of
     -- newtypes and then instantiating to all ConLikes of the wrapped type's
     -- minimal residual COMPLETE set.
-    try_instantiate :: Id -> [Id] -> Int -> Nabla -> DsM [Nabla]
+    try_instantiate :: ClassId -> [ClassId] -> Int -> Nabla -> DsM [Nabla]
     -- Convention: x binds the outer constructor in the chain, y the inner one.
     try_instantiate x xs n nabla = do
-      (_src_ty, dcs, rep_ty) <- tntrGuts <$> pmTopNormaliseType (nabla_ty_st nabla) (idType x)
+      (_src_ty, dcs, rep_ty) <- tntrGuts <$> pmTopNormaliseType (nabla_ty_st nabla) (eclassType x nabla)
       mb_stuff <- runMaybeT $ instantiate_newtype_chain x nabla dcs
       case mb_stuff of
         Nothing -> pure []
-        Just (y, newty_nabla) -> do
-          let vi = lookupVarInfo (nabla_tm_st newty_nabla) y
+        Just (y, newty_nabla at MkNabla{nabla_tm_st=ts}) -> do
+          let vi = lookupVarInfo ts y
           env <- dsGetFamInstEnvs
           rcm <- case splitReprTyConApp_maybe env rep_ty of
             Just (tc, _, _) -> addTyConMatches tc (vi_rcm vi)
@@ -1973,16 +2021,17 @@ generateInhabitingPatterns mode (x:xs) n nabla = do
     -- Instantiates a chain of newtypes, beginning at @x at .
     -- Turns @x nabla [T,U,V]@ to @(y, nabla')@, where @nabla'@ we has the fact
     -- @x ~ T (U (V y))@.
-    instantiate_newtype_chain :: Id -> Nabla -> [(Type, DataCon, Type)] -> MaybeT DsM (Id, Nabla)
+    instantiate_newtype_chain :: ClassId -> Nabla -> [(Type, DataCon, Type)] -> MaybeT DsM (ClassId, Nabla)
     instantiate_newtype_chain x nabla []                      = pure (x, nabla)
     instantiate_newtype_chain x nabla ((_ty, dc, arg_ty):dcs) = do
       y <- lift $ mkPmId arg_ty
+      let (yid,nabla') = representId y nabla
       -- Newtypes don't have existentials (yet?!), so passing an empty
       -- list as ex_tvs.
-      nabla' <- addConCt nabla x (PmAltConLike (RealDataCon dc)) [] [y]
-      instantiate_newtype_chain y nabla' dcs
+      nabla'' <- addConCt nabla' x (PmAltConLike (RealDataCon dc)) [] [yid]
+      instantiate_newtype_chain yid nabla'' dcs
 
-    instantiate_cons :: Id -> Type -> [Id] -> Int -> Nabla -> [ConLike] -> DsM [Nabla]
+    instantiate_cons :: ClassId -> Type -> [ClassId] -> Int -> Nabla -> [ConLike] -> DsM [Nabla]
     instantiate_cons _ _  _  _ _     []       = pure []
     instantiate_cons _ _  _  0 _     _        = pure []
     instantiate_cons _ ty xs n nabla _
@@ -1991,8 +2040,8 @@ generateInhabitingPatterns mode (x:xs) n nabla = do
       = generateInhabitingPatterns mode xs n nabla
     instantiate_cons x ty xs n nabla (cl:cls) = do
       -- The following line is where we call out to the inhabitationTest!
-      mb_nabla <- runMaybeT $ instCon 4 nabla x cl
-      tracePm "instantiate_cons" (vcat [ ppr x <+> dcolon <+> ppr (idType x)
+      mb_nabla <- runMaybeT $ instCon 4 nabla (eclassMatchId x nabla) cl
+      tracePm "instantiate_cons" (vcat [ ppr x <+> dcolon <+> ppr (eclassType x nabla)
                                        , ppr ty
                                        , ppr cl
                                        , ppr nabla
@@ -2082,3 +2131,17 @@ Note that for -XEmptyCase, we don't want to emit a minimal cover. We arrange
 that by passing 'CaseSplitTopLevel' to 'generateInhabitingPatterns'. We detect
 the -XEmptyCase case in 'reportWarnings' by looking for 'ReportEmptyCase'.
 -}
+
+-- | Update the value of the analysis data of some e-class by its id.
+updateVarInfo :: Functor f => ClassId -> (VarInfo -> f VarInfo) -> Nabla -> f Nabla
+-- Update the data at class @xid@ using lenses and the monadic action @go@
+updateVarInfo xid f nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts=eg } } = (\eg' -> nabla{ nabla_tm_st = ts{ts_facts = eg'} }) <$> (_class xid . _data) f eg
+
+eclassMatchId :: ClassId -> Nabla -> Id
+eclassMatchId cid = vi_id . (^. _class cid . _data) . (ts_facts . nabla_tm_st)
+
+eclassType :: ClassId -> Nabla -> Type
+eclassType cid = idType . eclassMatchId cid
+
+
+-- ROMES:TODO: When exactly to rebuild?


=====================================
compiler/GHC/HsToCore/Pmc/Solver/Types.hs
=====================================
@@ -1,3 +1,7 @@
+{-# LANGUAGE FlexibleInstances   #-}
+{-# LANGUAGE RankNTypes   #-}
+{-# LANGUAGE TypeApplications   #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
 {-# LANGUAGE ApplicativeDo       #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE ViewPatterns        #-}
@@ -10,12 +14,12 @@
 module GHC.HsToCore.Pmc.Solver.Types (
 
         -- * Normalised refinement types
-        BotInfo(..), PmAltConApp(..), VarInfo(..), TmState(..), TyState(..),
+        BotInfo(..), PmAltConApp(..), VarInfo(..), TmState(..), TmEGraph, TyState(..),
         Nabla(..), Nablas(..), initNablas,
         lookupRefuts, lookupSolution,
 
         -- ** Looking up 'VarInfo'
-        lookupVarInfo, lookupVarInfoNT, trvVarInfo,
+        lookupVarInfo, lookupVarInfoNT, trvVarInfo, emptyVarInfo, representId, representIds,
 
         -- ** Caching residual COMPLETE sets
         CompleteMatch, ResidualCompleteMatches(..), getRcm, isRcmInitialised,
@@ -42,10 +46,9 @@ import GHC.Prelude
 import GHC.Data.Bag
 import GHC.Data.FastString
 import GHC.Types.Id
-import GHC.Types.Var.Set
 import GHC.Types.Unique.DSet
-import GHC.Types.Unique.SDFM
 import GHC.Types.Name
+import GHC.Core.Equality
 import GHC.Core.DataCon
 import GHC.Core.ConLike
 import GHC.Utils.Outputable
@@ -58,7 +61,7 @@ import GHC.Core.TyCon
 import GHC.Types.Literal
 import GHC.Core
 import GHC.Core.TyCo.Compare( eqType )
-import GHC.Core.Map.Expr
+import GHC.Core.Map.Type
 import GHC.Core.Utils (exprType)
 import GHC.Builtin.Names
 import GHC.Builtin.Types
@@ -75,6 +78,17 @@ import Data.Ratio
 import GHC.Real (Ratio(..))
 import qualified Data.Semigroup as Semi
 
+import Data.Tuple (swap)
+import Data.Traversable (mapAccumL)
+import Data.Functor.Compose
+import Data.Equality.Analysis (Analysis(..))
+import Data.Equality.Graph (EGraph, ClassId)
+import Data.Equality.Graph.Lens
+import qualified Data.Equality.Graph as EG
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IS (empty)
+import Data.Bifunctor (second)
+
 -- import GHC.Driver.Ppr
 
 --
@@ -131,21 +145,19 @@ instance Outputable TyState where
 initTyState :: TyState
 initTyState = TySt 0 emptyInert
 
--- | The term oracle state. Stores 'VarInfo' for encountered 'Id's. These
--- entries are possibly shared when we figure out that two variables must be
--- equal, thus represent the same set of values.
+-- | The term oracle state. Stores 'VarInfo' for encountered 'Id's and
+-- 'CoreExpr's. These entries are possibly shared when we figure out that two
+-- variables must be equal, thus represent the same set of values.
 --
 -- See Note [TmState invariants] in "GHC.HsToCore.Pmc.Solver".
 data TmState
   = TmSt
-  { ts_facts :: !(UniqSDFM Id VarInfo)
-  -- ^ Facts about term variables. Deterministic env, so that we generate
-  -- deterministic error messages.
-  , ts_reps  :: !(CoreMap Id)
-  -- ^ An environment for looking up whether we already encountered semantically
-  -- equivalent expressions that we want to represent by the same 'Id'
-  -- representative.
-  , ts_dirty :: !DIdSet
+  { ts_facts :: !TmEGraph
+  -- ^ Facts about terms.
+
+  -- ROMES:TODO: ts_dirty looks a bit to me like the bookeeping needed to know
+  -- which nodes to upward merge, perhaps we can get rid of it too.
+  , ts_dirty :: !IntSet
   -- ^ Which 'VarInfo' needs to be checked for inhabitants because of new
   -- negative constraints (e.g. @x ≁ ⊥@ or @x ≁ K@).
   }
@@ -161,6 +173,8 @@ data VarInfo
   { vi_id  :: !Id
   -- ^ The 'Id' in question. Important for adding new constraints relative to
   -- this 'VarInfo' when we don't easily have the 'Id' available.
+  -- ROMES:TODO: What is the Id in question when we might have multiple Ids in the same equivalence class?
+  -- It seems currenlty this is the representative of the e-class, so we could probably drop it, in favour of Type or so (since sometimes we need to know the type, and that's also reasonable data for the e-class to have)
 
   , vi_pos :: ![PmAltConApp]
   -- ^ Positive info: 'PmAltCon' apps it is (i.e. @x ~ [Just y, PatSyn z]@), all
@@ -168,7 +182,7 @@ data VarInfo
   -- pattern matches involving pattern synonym
   --    case x of { Just y -> case x of PatSyn z -> ... }
   -- However, no more than one RealDataCon in the list, otherwise contradiction
-  -- because of generativity.
+  -- because of generativity (which would violate Invariant 1 from the paper).
 
   , vi_neg :: !PmAltConSet
   -- ^ Negative info: A list of 'PmAltCon's that it cannot match.
@@ -206,7 +220,7 @@ data PmAltConApp
   = PACA
   { paca_con :: !PmAltCon
   , paca_tvs :: ![TyVar]
-  , paca_ids :: ![Id]
+  , paca_ids :: ![ClassId]
   }
 
 -- | See 'vi_bot'.
@@ -227,7 +241,7 @@ instance Outputable BotInfo where
 
 -- | Not user-facing.
 instance Outputable TmState where
-  ppr (TmSt state reps dirty) = ppr state $$ ppr reps $$ ppr dirty
+  ppr (TmSt _ dirty) = text "<e-graph>" $$ ppr dirty
 
 -- | Not user-facing.
 instance Outputable VarInfo where
@@ -248,7 +262,7 @@ instance Outputable VarInfo where
 
 -- | Initial state of the term oracle.
 initTmState :: TmState
-initTmState = TmSt emptyUSDFM emptyCoreMap emptyDVarSet
+initTmState = TmSt EG.emptyEGraph IS.empty
 
 -- | A data type that caches for the 'VarInfo' of @x@ the results of querying
 -- 'dsGetCompleteMatches' and then striking out all occurrences of @K@ for
@@ -300,9 +314,14 @@ emptyVarInfo x
   , vi_rcm = emptyRCM
   }
 
-lookupVarInfo :: TmState -> Id -> VarInfo
--- (lookupVarInfo tms x) tells what we know about 'x'
-lookupVarInfo (TmSt env _ _) x = fromMaybe (emptyVarInfo x) (lookupUSDFM env x)
+-- | @lookupVarInfo tms x@ tells what we know about 'x'
+--- romes:TODO: This will have a different type. I don't know what yet.
+-- romes:TODO I don't think this is what we want any longer, more like represent Id and see if it was previously represented by some data or not?
+-- romes:TodO should return VarInfo rather than Maybe VarInfo
+lookupVarInfo :: TmState -> ClassId -> VarInfo
+lookupVarInfo (TmSt eg _) x
+-- RM: Yea, I don't like the fact that currently all e-classes are created by Ids and have an Empty Var info, yet we must call "fromMaybe" here. Not good.
+  = eg ^._class x._data
 
 -- | Like @lookupVarInfo ts x@, but @lookupVarInfo ts x = (y, vi)@ also looks
 -- through newtype constructors. We have @x ~ N1 (... (Nk y))@ such that the
@@ -314,27 +333,33 @@ lookupVarInfo (TmSt env _ _) x = fromMaybe (emptyVarInfo x) (lookupUSDFM env x)
 -- modulo type normalisation!
 --
 -- See also Note [Coverage checking Newtype matches] in GHC.HsToCore.Pmc.Solver.
-lookupVarInfoNT :: TmState -> Id -> (Id, VarInfo)
+--
+-- RM: looks like we could get perhaps represent the newtypes in the e-graph instead and somehow simplify this?
+lookupVarInfoNT :: TmState -> ClassId -> (ClassId, VarInfo)
 lookupVarInfoNT ts x = case lookupVarInfo ts x of
   VI{ vi_pos = as_newtype -> Just y } -> lookupVarInfoNT ts y
-  res                                 -> (x, res)
+  res -> (x, res)
   where
     as_newtype = listToMaybe . mapMaybe go
     go PACA{paca_con = PmAltConLike (RealDataCon dc), paca_ids = [y]}
       | isNewDataCon dc = Just y
     go _                = Nothing
 
-trvVarInfo :: Functor f => (VarInfo -> f (a, VarInfo)) -> Nabla -> Id -> f (a, Nabla)
+-- romes: We could probably inline this
+trvVarInfo :: forall f a. Functor f => (VarInfo -> f (a,VarInfo)) -> Nabla -> ClassId -> f (a,Nabla)
 trvVarInfo f nabla at MkNabla{ nabla_tm_st = ts at TmSt{ts_facts = env} } x
-  = set_vi <$> f (lookupVarInfo ts x)
-  where
-    set_vi (a, vi') =
-      (a, nabla{ nabla_tm_st = ts{ ts_facts = addToUSDFM env (vi_id vi') vi' } })
+  = second (\g -> nabla{nabla_tm_st = ts{ts_facts=g}}) <$> updateAccum (_class x._data) f env
+    where
+      updateAccum :: forall f a s c. Functor f => Lens' s a -> (a -> f (c,a)) -> s -> f (c,s)
+      updateAccum lens g = getCompose . lens @(Compose f ((,) c)) (Compose . g)
 
 ------------------------------------------------
 -- * Exported utility functions querying 'Nabla'
 
-lookupRefuts :: Nabla -> Id -> [PmAltCon]
+-- ROMES:TODO: Document
+-- | Lookup the refutable patterns, i.e. the pattern alt cons that certainly can't happen??
+-- ROMES:TODO: ClassId?
+lookupRefuts :: Nabla -> ClassId -> [PmAltCon]
 -- Unfortunately we need the extra bit of polymorphism and the unfortunate
 -- duplication of lookupVarInfo here.
 lookupRefuts MkNabla{ nabla_tm_st = ts } x =
@@ -346,7 +371,7 @@ isDataConSolution _                                             = False
 
 -- @lookupSolution nabla x@ picks a single solution ('vi_pos') of @x@ from
 -- possibly many, preferring 'RealDataCon' solutions whenever possible.
-lookupSolution :: Nabla -> Id -> Maybe PmAltConApp
+lookupSolution :: Nabla -> ClassId -> Maybe PmAltConApp
 lookupSolution nabla x = case vi_pos (lookupVarInfo (nabla_tm_st nabla) x) of
   []                                         -> Nothing
   pos@(x:_)
@@ -465,6 +490,7 @@ extendPmAltConSet (PACS cls lits) (PmAltConLike cl)
 extendPmAltConSet (PACS cls lits) (PmAltLit lit)
   = PACS cls (unionLists lits [lit])
 
+-- | The elements of a 'PmAltConSet'
 pmAltConSetElems :: PmAltConSet -> [PmAltCon]
 pmAltConSetElems (PACS cls lits)
   = map PmAltConLike (uniqDSetToList cls) ++ map PmAltLit lits
@@ -789,7 +815,7 @@ instance Outputable PmLit where
             , (charPrimTy, primCharSuffix)
             , (floatPrimTy, primFloatSuffix)
             , (doublePrimTy, primDoubleSuffix) ]
-      suffix = fromMaybe empty (snd <$> find (eqType ty . fst) tbl)
+      suffix = maybe empty snd (find (eqType ty . fst) tbl)
 
 instance Outputable PmAltCon where
   ppr (PmAltConLike cl) = ppr cl
@@ -797,3 +823,45 @@ instance Outputable PmAltCon where
 
 instance Outputable PmEquality where
   ppr = text . show
+
+--
+-- * E-graphs to represent normalised refinment types
+--
+
+type TmEGraph = EGraph VarInfo (DeBruijnF CoreExprF)
+
+representId :: Id -> Nabla -> (ClassId, Nabla)
+-- Will need to justify this well
+representId x (MkNabla tyst tmst at TmSt{ts_facts=eg0})
+  = case EG.add (EG.Node (DF (deBruijnize (VarF x)))) eg0 of
+      (xid, eg1) -> (xid, MkNabla tyst tmst{ts_facts=eg1})
+
+representIds :: [Id] -> Nabla -> ([ClassId], Nabla)
+representIds xs nabla = swap $ mapAccumL (\acc x -> swap $ representId x acc) nabla xs
+
+-- | This instance is seriously wrong for general purpose, it's just required for instancing Analysis.
+-- There ought to be a better way.
+instance Eq VarInfo where
+  (==) _ _ = False
+instance Analysis VarInfo (DeBruijnF CoreExprF) where
+  {-# INLINE makeA #-}
+  {-# INLINE joinA #-}
+
+  -- When an e-class is created for a variable, we create an VarInfo from it.
+  -- It doesn't matter if this variable is bound or free, since it's the first
+  -- variable in this e-class (and all others would have to be equivalent to
+  -- it)
+  --
+  -- Also, the Eq instance for DeBruijn Vars will ensure that two free
+  -- variables with the same Id are equal and so they will be represented in
+  -- the same e-class
+  makeA (DF (D _ (VarF x))) = emptyVarInfo x
+  makeA _ = emptyVarInfo unitDataConId -- ROMES:TODO: this is dummy information which should never be used, this is quite wrong :)
+                                       -- I think the reason we end up in this
+                                       -- situation is bc we first represent an expression and only then merge it with some Id.
+                                       -- we'd need a way to represent directly into an e-class then, to not trigger the new e-class.
+
+  -- romes: so currently, variables are joined in 'addVarCt' manually by getting the old value of $x$ and assuming the value of $y$ was chosen.
+  -- That's obviously bad now, it'd be much more clearer to do it here. It's just the nabla threading that's more trouble
+  joinA _a b = b
+


=====================================
compiler/GHC/Types/Unique/SDFM.hs
=====================================
@@ -82,6 +82,7 @@ lookupUSDFM usdfm x = snd (lookupReprAndEntryUSDFM usdfm x)
 -- >>> equateUSDFM [({u1,u3}, Just ele1)] u3 u4 == (Nothing, [({u1,u3,u4}, Just ele1)])
 -- >>> equateUSDFM [({u1,u3}, Just ele1)] u4 u3 == (Nothing, [({u1,u3,u4}, Just ele1)])
 -- >>> equateUSDFM [({u1,u3}, Just ele1), ({u2}, Just ele2)] u3 u2 == (Just ele1, [({u2,u1,u3}, Just ele2)])
+-- ROMES:TODO: Are all USDFM functions just for the PMC Nabla TM?
 equateUSDFM
   :: Uniquable key => UniqSDFM key ele -> key -> key -> (Maybe ele, UniqSDFM key ele)
 equateUSDFM usdfm@(USDFM env) x y =


=====================================
compiler/ghc.cabal.in
=====================================
@@ -300,6 +300,7 @@ Library
         GHC.Core.ConLike
         GHC.Core.DataCon
         GHC.Core.FamInstEnv
+        GHC.Core.Equality
         GHC.Core.FVs
         GHC.Core.InstEnv
         GHC.Core.Lint



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/4d72410fe0931fd1a94c8efcd3c8c8d52af2f396...fedc72828fa29a76215c927c01ea602a851cc1e0

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/4d72410fe0931fd1a94c8efcd3c8c8d52af2f396...fedc72828fa29a76215c927c01ea602a851cc1e0
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230625/4aa2386a/attachment-0001.html>


More information about the ghc-commits mailing list