[Git][ghc/ghc][wip/romes/eqsat-pmc] Improve a little bit the mixing of Ids and ClassIds

Thu Jul 6 18:00:46 UTC 2023

Rodrigo Mesquita pushed to branch wip/romes/eqsat-pmc at Glasgow Haskell Compiler / GHC


Commits:
d0917a69 by Rodrigo Mesquita at 2023-07-06T19:00:32+01:00
Improve a little bit the mixing of Ids and ClassIds

tWeaks

Don't use EG.rebuild as a view pattern

Debuggging

Touches

Fix to representId over multiple (different) nablas

Paper over

Chagnes2

- - - - -


10 changed files:

- compiler/GHC/Core/Equality.hs
- compiler/GHC/HsToCore/Errors/Ppr.hs
- compiler/GHC/HsToCore/Errors/Types.hs
- compiler/GHC/HsToCore/Pmc.hs
- compiler/GHC/HsToCore/Pmc/Check.hs
- compiler/GHC/HsToCore/Pmc/Desugar.hs
- compiler/GHC/HsToCore/Pmc/Ppr.hs
- compiler/GHC/HsToCore/Pmc/Solver.hs
- compiler/GHC/HsToCore/Pmc/Solver/Types.hs
- libraries/hegg


Changes:

=====================================
compiler/GHC/Core/Equality.hs
=====================================
@@ -7,6 +7,11 @@
 
 module GHC.Core.Equality where
 
+-- ROMES:TODO:
+-- I think, for the particular usages of Core e-graphs, we can do much better
+-- than this for equality.
+-- E.g. representation could transform the CoreExpr to an actual debruijnized one (with Ints for Vars)
+
 import GHC.Exts (dataToTag#, tagToEnum#, (>#), (<#))
 import GHC.Prelude
 


=====================================
compiler/GHC/HsToCore/Errors/Ppr.hs
=====================================
@@ -72,7 +72,8 @@ instance Diagnostic DsMessage where
              case vars of -- See #11245
                   [] -> text "Guards do not cover entire pattern space"
                   _  -> let us = map (\nabla -> pprUncovered nabla vars) nablas
-                            pp_tys = pprQuotedList $ map idType vars
+                            -- pp_tys = pprQuotedList $ map idType vars
+                            pp_tys = empty
                         in  hang
                               (text "Patterns of type" <+> pp_tys <+> text "not matched:")
                               4


=====================================
compiler/GHC/HsToCore/Errors/Types.hs
=====================================
@@ -20,6 +20,8 @@ import GHC.Types.Id
 import GHC.Types.Name (Name)
 import qualified GHC.LanguageExtensions as LangExt
 
+import Data.Equality.Graph (ClassId)
+
 import GHC.Generics (Generic)
 
 newtype MinBound = MinBound Integer
@@ -99,7 +101,7 @@ data DsMessage
   | DsNonExhaustivePatterns !(HsMatchContext GhcRn)
                             !ExhaustivityCheckType
                             !MaxUncoveredPatterns
-                            [Id]
+                            [ClassId]
                             [Nabla]
 
   | DsTopLevelBindsNotAllowed !BindsType !(HsBindLR GhcTc GhcTc)


=====================================
compiler/GHC/HsToCore/Pmc.hs
=====================================
@@ -75,6 +75,8 @@ import Data.List.NonEmpty ( NonEmpty(..) )
 import qualified Data.List.NonEmpty as NE
 import Data.Coerce
 
+import Data.Equality.Graph (ClassId)
+
 --
 -- * Exported entry points to the checker
 --
@@ -104,9 +106,20 @@ pmcPatBind ctxt@(DsMatchContext PatBindRhs loc) var p = do
   !missing <- getLdiNablas
   pat_bind <- noCheckDs $ desugarPatBind loc var p
   tracePm "pmcPatBind {" (vcat [ppr ctxt, ppr var, ppr p, ppr pat_bind, ppr missing])
-  result <- unCA (checkPatBind pat_bind) missing
-  tracePm "}: " (ppr (cr_uncov result))
-  formatReportWarnings ReportPatBind ctxt [var] result
+  result0 <- unCA (checkPatBind pat_bind) missing
+  tracePm "}: " (ppr (cr_uncov result0))
+  -- romes:todo: this seems redundant, hints that the right thing might be for desugar to return already the match variables already "represented" in the e-graph
+  -- doing this, however, wouuld require for desugar pat binds to care about/thread through nablas
+  -- DESIGN:TODO: However, if we represent the variables while desugaring, we
+  -- would no longer need representId to represent VarF in the e-class, and can
+  -- instead do newEClass. This would further reduce allocations.
+  -- The reason why we can't do that currently is that on checkPatBind we'll
+  -- representIds, and when we represent them again in the next line, we want
+  -- them to match the ones we represented during checkPatBind. If we made
+  -- empty-eclasses, the representId on the next line wouldn't match the match
+  -- ids we defined in checkPatBind.
+  let (varid, cr_uncov') = representId var (cr_uncov result0)
+  formatReportWarnings ReportPatBind ctxt [varid] result0{cr_uncov = cr_uncov'}
 pmcPatBind _ _ _ = pure ()
 
 -- | Exhaustive for guard matches, is used for guards in pattern bindings and
@@ -165,18 +178,23 @@ pmcMatches ctxt vars matches = {-# SCC "pmcMatches" #-} do
       -- This must be an -XEmptyCase. See Note [Checking EmptyCase]
       let var = only vars
       empty_case <- noCheckDs $ desugarEmptyCase var
-      result <- unCA (checkEmptyCase empty_case) missing
-      tracePm "}: " (ppr (cr_uncov result))
-      formatReportWarnings ReportEmptyCase ctxt vars result
+      result0 <- unCA (checkEmptyCase empty_case) missing
+      tracePm "}: " (ppr (cr_uncov result0))
+      let (varids, cr_uncov') = representIds vars (cr_uncov result0) -- romes:todo: this seems redundant, hints that the right thing might be for desugar to return already the match variables already "represented" in the e-graph
+      formatReportWarnings ReportEmptyCase ctxt varids result0{cr_uncov=cr_uncov'}
       return []
     Just matches -> do
       matches <- {-# SCC "desugarMatches" #-}
                  noCheckDs $ desugarMatches vars matches
-      result  <- {-# SCC "checkMatchGroup" #-}
+      result0 <- {-# SCC "checkMatchGroup" #-}
                  unCA (checkMatchGroup matches) missing
-      tracePm "}: " (ppr (cr_uncov result))
-      {-# SCC "formatReportWarnings" #-} formatReportWarnings ReportMatchGroup ctxt vars result
-      return (NE.toList (ldiMatchGroup (cr_ret result)))
+      tracePm "}: " (ppr (cr_uncov result0))
+      let (varids, cr_uncov') = representIds vars (cr_uncov result0)
+      -- romes:todo: this seems redundant, hints that the right thing might be
+      -- for desugar to return already the match variables already "represented"
+      -- in the e-graph
+      {-# SCC "formatReportWarnings" #-} formatReportWarnings ReportMatchGroup ctxt varids result0{cr_uncov=cr_uncov'}
+      return (NE.toList (ldiMatchGroup (cr_ret result0)))
 
 {- Note [pmcPatBind only checks PatBindRhs]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -340,7 +358,7 @@ collectInMode ReportEmptyCase  = cirbsEmptyCase
 
 -- | Given a 'FormatReportWarningsMode', this function will emit warnings
 -- for a 'CheckResult'.
-formatReportWarnings :: FormatReportWarningsMode ann -> DsMatchContext -> [Id] -> CheckResult ann -> DsM ()
+formatReportWarnings :: FormatReportWarningsMode ann -> DsMatchContext -> [ClassId] -> CheckResult ann -> DsM ()
 formatReportWarnings report_mode ctx vars cr at CheckResult { cr_ret = ann } = do
   cov_info <- collectInMode report_mode ann
   dflags <- getDynFlags
@@ -348,7 +366,7 @@ formatReportWarnings report_mode ctx vars cr at CheckResult { cr_ret = ann } = do
 
 -- | Issue all the warnings
 -- (redundancy, inaccessibility, exhaustiveness, redundant bangs).
-reportWarnings :: DynFlags -> FormatReportWarningsMode ann -> DsMatchContext -> [Id] -> CheckResult CIRB -> DsM ()
+reportWarnings :: DynFlags -> FormatReportWarningsMode ann -> DsMatchContext -> [ClassId] -> CheckResult CIRB -> DsM ()
 reportWarnings dflags report_mode (DsMatchContext kind loc) vars
   CheckResult { cr_ret    = CIRB { cirb_inacc = inaccessible_rhss
                                  , cirb_red   = redundant_rhss
@@ -387,14 +405,13 @@ reportWarnings dflags report_mode (DsMatchContext kind loc) vars
 
     maxPatterns = maxUncoveredPatterns dflags
 
-getNFirstUncovered :: GenerateInhabitingPatternsMode -> [Id] -> Int -> Nablas -> DsM [Nabla]
+getNFirstUncovered :: GenerateInhabitingPatternsMode -> [ClassId] -> Int -> Nablas -> DsM [Nabla]
 getNFirstUncovered mode vars n (MkNablas nablas) = go n (bagToList nablas)
   where
     go 0 _              = pure []
     go _ []             = pure []
     go n (nabla:nablas) = do
-      let (vars', nabla') = representIds vars nabla -- they're already there, we're just getting the e-class ids back
-      front <- generateInhabitingPatterns mode vars' n nabla'
+      front <- generateInhabitingPatterns mode vars n nabla
       back <- go (n - length front) nablas
       pure (front ++ back)
 
@@ -429,9 +446,10 @@ addTyCs origin ev_vars m = do
 -- to be added for multiple scrutinees rather than just one.
 addCoreScrutTmCs :: [CoreExpr] -> [Id] -> DsM a -> DsM a
 addCoreScrutTmCs []         _      k = k
-addCoreScrutTmCs (scr:scrs) (x:xs) k =
-  flip locallyExtendPmNablas (addCoreScrutTmCs scrs xs k) $ \nablas ->
-    addPhiCtsNablas nablas (unitBag (PhiCoreCt x scr))
+addCoreScrutTmCs (scr:scrs) (x0:xs) k =
+  flip locallyExtendPmNablas (addCoreScrutTmCs scrs xs k) $ \nablas0 ->
+    let (x, nablas) = representId x0 nablas0
+     in addPhiCtsNablas nablas (unitBag (PhiCoreCt x scr))
 addCoreScrutTmCs _   _   _ = panic "addCoreScrutTmCs: numbers of scrutinees and match ids differ"
 
 -- | 'addCoreScrutTmCs', but desugars the 'LHsExpr's first.


=====================================
compiler/GHC/HsToCore/Pmc/Check.hs
=====================================
@@ -103,9 +103,10 @@ emptyRedSets :: RedSets
 emptyRedSets = RedSets mempty mempty mempty
 
 checkGrd :: PmGrd -> CheckAction RedSets
-checkGrd grd = CA $ \inc -> case grd of
+checkGrd grd = CA $ \inc0 -> case grd of
   -- let x = e: Refine with x ~ e
-  PmLet x e -> do
+  PmLet x0 e -> do
+    let (x, inc) = representId x0 inc0
     -- romes: we could potentially do update the trees to use e-class ids here,
     -- or in pmcMatches
     matched <- addPhiCtNablas inc (PhiCoreCt x e)
@@ -114,7 +115,8 @@ checkGrd grd = CA $ \inc -> case grd of
                      , cr_uncov = mempty
                      , cr_approx = Precise }
   -- Bang x _: Diverge on x ~ ⊥, refine with x ≁ ⊥
-  PmBang x mb_info -> do
+  PmBang x0 mb_info -> do
+    let (x, inc) = representId x0 inc0
     div <- addPhiCtNablas inc (PhiBotCt x)
     matched <- addPhiCtNablas inc (PhiNotBotCt x)
     -- See Note [Dead bang patterns]
@@ -133,7 +135,9 @@ checkGrd grd = CA $ \inc -> case grd of
                         , cr_uncov = mempty
                         , cr_approx = Precise }
   -- Con: Fall through on x ≁ K and refine with x ~ K ys and type info
-  PmCon x con tvs dicts args -> do
+  PmCon x0 con tvs dicts args0 -> do
+    let (x, inc1) = representId x0 inc0
+        (args, inc) = representIds args0 inc1
     !div <- if isPmAltConMatchStrict con
       then addPhiCtNablas inc (PhiBotCt x)
       else pure mempty
@@ -181,7 +185,8 @@ checkGRHS (PmGRHS { pg_grds = GrdVec grds, pg_rhs = rhs_info }) =
 checkEmptyCase :: PmEmptyCase -> CheckAction PmEmptyCase
 -- See Note [Checking EmptyCase]
 checkEmptyCase pe@(PmEmptyCase { pe_var = var }) = CA $ \inc -> do
-  unc <- addPhiCtNablas inc (PhiNotBotCt var)
+  let (varid, inc') = representId var inc
+  unc <- addPhiCtNablas inc' (PhiNotBotCt varid)
   pure CheckResult { cr_ret = pe, cr_uncov = unc, cr_approx = mempty }
 
 checkPatBind :: PmPatBind Pre -> CheckAction (PmPatBind Post)


=====================================
compiler/GHC/HsToCore/Pmc/Desugar.hs
=====================================
@@ -327,7 +327,14 @@ desugarEmptyCase var = pure PmEmptyCase { pe_var = var }
 
 -- | Desugar the non-empty 'Match'es of a 'MatchGroup'.
 --

--- Returns a desugared guard tree of guard expressions.
+-- TODO: Would this be a good design?
+-- Returns a desugared guard tree of guard expressions /and/ an e-graph e-graph
+-- per tree branch.
+--
+-- These e-graphs have an equivalence class for each match-id in the guard
+-- expression, and are required in the subsequent passes of the PMC
+--
+-- Furthermore, the match-ids in the PmGrd expressions are e-class ids from said e-graph
 desugarMatches :: [Id] -> NonEmpty (LMatch GhcTc (LHsExpr GhcTc))
                -> DsM (PmMatchGroup Pre)
 desugarMatches vars matches =


=====================================
compiler/GHC/HsToCore/Pmc/Ppr.hs
=====================================
@@ -12,7 +12,6 @@ import GHC.Prelude
 import GHC.Data.List.Infinite (Infinite (..))
 import qualified GHC.Data.List.Infinite as Inf
 import GHC.Types.Basic
-import GHC.Types.Id
 import GHC.Core.ConLike
 import GHC.Core.DataCon
 import GHC.Builtin.Types
@@ -42,7 +41,7 @@ import qualified Data.IntMap as IM
 --
 -- When the set of refutable shapes contains more than 3 elements, the
 -- additional elements are indicated by "...".
-pprUncovered :: Nabla -> [Id] -> SDoc
+pprUncovered :: Nabla -> [ClassId] -> SDoc
 pprUncovered nabla vas
   | IM.null refuts = fsep vec -- there are no refutations
   | otherwise      = hang (fsep vec) 4 $
@@ -53,10 +52,9 @@ pprUncovered nabla vas
       -- precedence
       | [_] <- vas   = topPrec
       | otherwise    = appPrec
-    (vas',nabla')    = representIds vas nabla
-    ppr_action       = mapM (pprPmVar init_prec) vas'
-    (vec, renamings) = runPmPpr nabla' ppr_action
-    refuts           = prettifyRefuts nabla' renamings
+    ppr_action       = mapM (pprPmVar init_prec) vas
+    (vec, renamings) = runPmPpr nabla ppr_action
+    refuts           = prettifyRefuts nabla renamings
 
 -- | Output refutable shapes of a variable in the form of @var is not one of {2,
 -- Nothing, 3}@. Will never print more than 3 refutable shapes, the tail is


=====================================
compiler/GHC/HsToCore/Pmc/Solver.hs
=====================================
@@ -1,7 +1,9 @@
 {-# LANGUAGE LambdaCase          #-}
 {-# LANGUAGE MultiWayIf          #-}
+{-# LANGUAGE TypeApplications    #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# LANGUAGE ViewPatterns        #-}
+{-# LANGUAGE RankNTypes, GADTs        #-}
 
 {-
 Authors: George Karachalias <george.karachalias at cs.kuleuven.be>
@@ -105,8 +107,6 @@ import qualified Data.Equality.Graph as EG
 import Data.Bifunctor (second)
 import Data.Function ((&))
 import qualified Data.IntSet as IS
-import Data.Tuple (swap)
-import Data.Traversable (mapAccumL)
 
 --
 -- * Main exports
@@ -571,20 +571,20 @@ where you can find the solution in a perhaps more digestible format.
 data PhiCt
   = PhiTyCt !PredType
   -- ^ A type constraint "T ~ U".
-  | PhiCoreCt    !Id !CoreExpr
+  | PhiCoreCt    !ClassId !CoreExpr
   -- ^ @PhiCoreCt x e@ encodes "x ~ e", equating @x@ with the 'CoreExpr' @e at .
-  | PhiConCt     !Id !PmAltCon ![TyVar] ![PredType] ![Id]
+  | PhiConCt     !ClassId !PmAltCon ![TyVar] ![PredType] ![ClassId]
   -- ^ @PhiConCt x K tvs dicts ys@ encodes @K \@tvs dicts ys <- x@, matching @x@
   -- against the 'PmAltCon' application @K \@tvs dicts ys@, binding @tvs@,
   -- @dicts@ and possibly unlifted fields @ys@ in the process.
   -- See Note [Strict fields and variables of unlifted type].
-  | PhiNotConCt  !Id !PmAltCon
+  | PhiNotConCt  !ClassId !PmAltCon
   -- ^ @PhiNotConCt x K@ encodes "x ≁ K", asserting that @x@ can't be headed
   -- by @K at .
-  | PhiBotCt     !Id
+  | PhiBotCt     !ClassId
   -- ^ @PhiBotCt x@ encodes "x ~ ⊥", equating @x@ to ⊥.
   -- by @K at .
-  | PhiNotBotCt !Id
+  | PhiNotBotCt !ClassId
   -- ^ @PhiNotBotCt x y@ encodes "x ≁ ⊥", asserting that @x@ can't be ⊥.
 
 instance Outputable PhiCt where
@@ -674,31 +674,25 @@ nameTyCt pred_ty = do
 -- 'addTyCts' before, through 'addPhiCts'.
 addPhiTmCt :: Nabla -> PhiCt -> MaybeT DsM Nabla
 addPhiTmCt _     (PhiTyCt ct)              = pprPanic "addPhiCt:TyCt" (ppr ct) -- See the precondition
-addPhiTmCt nabla (PhiCoreCt x e)           = let (xid, nabla') = representId x nabla
-                                              in addCoreCt nabla' xid e
-addPhiTmCt nabla (PhiConCt x con tvs dicts args) = do
+addPhiTmCt nabla (PhiCoreCt x e)           = addCoreCt nabla x e
+addPhiTmCt nabla0 (PhiConCt x con tvs dicts args) = do
   -- Case (1) of Note [Strict fields and variables of unlifted type]
   -- PhiConCt correspond to the higher-level φ constraints from the paper with
   -- bindings semantics. It disperses into lower-level δ constraints that the
   -- 'add*Ct' functions correspond to.
-  nabla1 <- addTyCts nabla (listToBag dicts)
-  let (xid, nabla2) = representId x nabla1
-  let (args_ids, nabla3) = representIds args nabla2
+  nabla1 <- addTyCts nabla0 (listToBag dicts)
   -- romes: here we could have something like (merge (add K arg_ids) x)
   -- or actually that should be done by addConCt?
-  nabla4 <- addConCt nabla3 xid con tvs args_ids
-  foldlM addNotBotCt nabla4 (filterUnliftedFields con (zip args_ids args))
-addPhiTmCt nabla (PhiNotConCt x con)       = let (xid, nabla') = representId x nabla
-                                              in addNotConCt nabla' xid con
-addPhiTmCt nabla (PhiBotCt x)              = let (xid, nabla') = representId x nabla
-                                              in addBotCt nabla' xid
-addPhiTmCt nabla (PhiNotBotCt x)           = let (xid, nabla') = representId x nabla
-                                              in addNotBotCt nabla' xid
-
-filterUnliftedFields :: PmAltCon -> [(ClassId,Id)] -> [ClassId]
-filterUnliftedFields con args =
-  [ arg_id | ((arg_id,arg), bang) <- zipEqual "addPhiCt" args (pmAltConImplBangs con)
-           , isBanged bang || definitelyUnliftedType (idType arg) ]
+  nabla2 <- addConCt nabla1 x con tvs args
+  foldlM addNotBotCt nabla2 (filterUnliftedFields nabla2 con args)
+addPhiTmCt nabla (PhiNotConCt x con)       = addNotConCt nabla x con
+addPhiTmCt nabla (PhiBotCt x)              = addBotCt nabla x
+addPhiTmCt nabla (PhiNotBotCt x)           = addNotBotCt nabla x
+
+filterUnliftedFields :: Nabla -> PmAltCon -> [ClassId] -> [ClassId]
+filterUnliftedFields nabla con args =
+  [ arg_id | (arg_id, bang) <- zipEqual "addPhiCt" args (pmAltConImplBangs con)
+           , isBanged bang || definitelyUnliftedType (eclassType arg_id nabla) ]
 
 -- | Adds the constraint @x ~ ⊥@, e.g. that evaluation of a particular 'Id' @x@
 -- surely diverges. Quite similar to 'addConCt', only that it only cares about
@@ -845,7 +839,7 @@ addVarCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = env } } x y =
   -- equate should also update e-graph, basically re-implement "equateUSDFM" in terms of the e-graph, or inline it or so
   case equate env x y of
     -- Add the constraints we had for x to y
-    -- See Note [Joining e-classes PMC] todo mention from joinA
+    -- See Note (TODO) Joining e-classes PMC] todo mention from joinA
     -- Now, here's a really tricky bit (TODO Write note, is it the one above?)
     -- Bc the joinA operation is unlawful, and because the makeA operation for
     -- expressions is also unlawful (sets the type to ()::(), mostly out of
@@ -854,7 +848,7 @@ addVarCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = env } } x y =
     -- We *also update the type* (WTF1).
     -- This is because every e-class should always have a match-var first, which will always have a type, and it should appear on "the left"
     -- We also rebuild here, we did just merge two things. TODO: Where and when exactly should we merge?
-    (vi_x, EG.rebuild -> env') -> do
+    (vi_x, env') -> do
       let nabla_equated = nabla{ nabla_tm_st = ts{ts_facts = env'} }
       -- and then gradually merge every positive fact we have on x into y
       let add_pos nabla (PACA cl tvs args) = addConCt nabla y cl tvs args
@@ -875,7 +869,7 @@ addVarCt nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts = env } } x y =
     -- >>> equate [({u1,u3}, Just ele1), ({u2}, Just ele2)] u3 u2 == (Just ele1, [({u2,u1,u3}, Just ele2)])
     equate :: TmEGraph -> ClassId -> ClassId -> (VarInfo, TmEGraph)
     equate eg x y = let (_, eg')  = EG.merge x y eg
-                     in (eg  ^. _class x ._data, eg')
+                     in (eg  ^. _class x ._data, EG.rebuild eg')
                     -- Note: lookup in @eg@, not @eg'@, because we want to return x's data before the merge.
 
 
@@ -900,6 +894,7 @@ addCoreCt nabla x e = do
   where
     -- Takes apart a 'CoreExpr' and tries to extract as much information about
     -- literals and constructor applications as possible.
+    -- ROMES:TODO: Consider CoreExprF instead of CoreExpr already here?
     core_expr :: ClassId -> CoreExpr -> StateT Nabla (MaybeT DsM) ()
     -- TODO: Handle newtypes properly, by wrapping the expression in a DataCon
     -- RM: Could this be done better with e-graphs? The whole newtype stuff
@@ -924,18 +919,18 @@ addCoreCt nabla x e = do
             <- exprIsConApp_maybe in_scope_env e
       = data_con_app x in_scope dc args
       | otherwise
-      = do
-        nabla' <- get
-        if
+      = equate_with_similar_expr x e
+        -- nabla' <- get
+        -- if
           -- See Note [Detecting pattern synonym applications in expressions]
-          | Var y <- e, Nothing <- isDataConId_maybe (eclassMatchId x nabla')
+          -- ROMES:TODO: Can we fix this more easily with e-graphs?
+          -- x| Var y <- e, Nothing <- isDataConId_maybe (eclassMatchId x nabla')
           -- We don't consider DataCons flexible variables
-          -> modifyT (\nabla -> let (yid, nabla') = representId y nabla
-                                in addVarCt nabla' x yid)
-          | otherwise
+          -- -> modifyT (\nabla -> addVarCt nabla' x y)
+          -- x| otherwise
           -- Any other expression. Try to find other uses of a semantically
           -- equivalent expression and represent them by the same variable!
-          -> equate_with_similar_expr x e
+          -- -> equate_with_similar_expr x e
       where
         expr_ty       = exprType e
         expr_in_scope = mkInScopeSet (exprFreeVars e)
@@ -960,10 +955,9 @@ addCoreCt nabla x e = do
 
     bind_expr :: CoreExpr -> StateT Nabla (MaybeT DsM) ClassId
     bind_expr e = do
-      x <- lift (lift (mkPmId (exprType e)))
-      xid <- StateT $ \nabla -> pure $ representId x nabla
-      core_expr xid e
-      pure xid
+      x <- StateT $ \nabla -> lift (mkPmMatchId (exprType e) nabla)
+      core_expr x e
+      pure x
 
     -- Look at @let x = K taus theta es@ and generate the following
     -- constraints (assuming universals were dropped from @taus@ before):
@@ -1480,7 +1474,6 @@ instCompleteSet fuel nabla xid cs
   = {-# SCC "instCompleteSet" #-} go nabla (sorted_candidates cs)
   where
     vi = lookupVarInfo (nabla_tm_st nabla) xid
-    x = vi_id vi
 
     sorted_candidates :: CompleteMatch -> [ConLike]
     sorted_candidates cm
@@ -1504,8 +1497,8 @@ instCompleteSet fuel nabla xid cs
       let recur_not_con = do
             nabla' <- addNotConCt nabla xid (PmAltConLike con)
             go nabla' cons
-      (nabla <$ instCon fuel nabla x con) -- return the original nabla, not the
-                                                -- refined one!
+      (nabla <$ instCon fuel nabla xid con) -- return the original nabla, not the
+                                            -- refined one!
             <|> recur_not_con -- Assume that x can't be con. Encode that fact
                               -- with addNotConCt and recur.
 
@@ -1566,11 +1559,11 @@ compareConLikeTestability (RealDataCon a) (RealDataCon b) = mconcat
 -- adding the proper constructor constraint.
 --
 -- See Note [Instantiating a ConLike].
-instCon :: Int -> Nabla -> Id -> ConLike -> MaybeT DsM Nabla
-instCon fuel nabla at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} MaybeT $ do
+instCon :: Int -> Nabla -> ClassId -> ConLike -> MaybeT DsM Nabla
+instCon fuel nabla0 at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} MaybeT $ do
   let hdr what = "instCon " ++ show fuel ++ " " ++ what
   env <- dsGetFamInstEnvs
-  let match_ty = idType x
+  let match_ty = eclassType x nabla0
   tracePm (hdr "{") $
     ppr con <+> text "... <-" <+> ppr x <+> dcolon <+> ppr match_ty
   norm_match_ty <- normaliseSourceTypeWHNF ty_st match_ty
@@ -1588,24 +1581,23 @@ instCon fuel nabla at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} Ma
       let gammas = substTheta sigma_ex (eqSpecPreds eq_spec ++ thetas)
       -- (4) Instantiate fresh term variables as arguments to the constructor
       let field_tys' = substTys sigma_ex $ map scaledThing field_tys
-      arg_ids <- mapM mkPmId field_tys'
-      let (nabla', arg_class_ids) = mapAccumL (\nab id -> swap $ representId id nab) nabla arg_ids
+      (arg_ids, nabla1) <- runStateT (mapM (StateT @Nabla . mkPmMatchId) field_tys') nabla0
       tracePm (hdr "(cts)") $ vcat
         [ ppr x <+> dcolon <+> ppr match_ty
         , text "In WHNF:" <+> ppr (isSourceTypeInWHNF match_ty) <+> ppr norm_match_ty
         , ppr con <+> dcolon <+> text "... ->" <+> ppr _con_res_ty
         , ppr (map (\tv -> ppr tv <+> char '↦' <+> ppr (substTyVar sigma_univ tv)) _univ_tvs)
         , ppr gammas
-        , ppr (map (\x -> ppr x <+> dcolon <+> ppr (idType x)) arg_ids)
+        , ppr (map (\x -> ppr x <+> dcolon <+> ppr (eclassType x nabla1)) arg_ids)
         ]
       -- (5) Finally add the new constructor constraint
       runMaybeT $ do
         -- Case (2) of Note [Strict fields and variables of unlifted type]
         let alt = PmAltConLike con
-        let branching_factor = length $ filterUnliftedFields alt (zip arg_class_ids arg_ids)
+        let branching_factor = length $ filterUnliftedFields nabla1 alt arg_ids
         let ct = PhiConCt x alt ex_tvs gammas arg_ids
-        nabla1 <- traceWhenFailPm (hdr "(ct unsatisfiable) }") (ppr ct) $
-                  addPhiTmCt nabla' ct
+        nabla2 <- traceWhenFailPm (hdr "(ct unsatisfiable) }") (ppr ct) $
+                  addPhiTmCt nabla1 ct
         -- See Note [Fuel for the inhabitation test]
         let new_fuel
               | branching_factor <= 1 = fuel
@@ -1617,17 +1609,17 @@ instCon fuel nabla at MkNabla{nabla_ty_st = ty_st} x con = {-# SCC "instCon" #-} Ma
           , ppr con <+> dcolon <+> text "... ->" <+> ppr _con_res_ty
           , ppr (map (\tv -> ppr tv <+> char '↦' <+> ppr (substTyVar sigma_univ tv)) _univ_tvs)
           , ppr gammas
-          , ppr (map (\x -> ppr x <+> dcolon <+> ppr (idType x)) arg_ids)
+          , ppr (map (\x -> ppr x <+> dcolon <+> ppr (eclassType x nabla1)) arg_ids)
           , ppr branching_factor
           , ppr new_fuel
           ]
-        nabla2 <- traceWhenFailPm (hdr "(inh test failed) }") (ppr nabla1) $
-                  inhabitationTest new_fuel (nabla_ty_st nabla') nabla1
-        lift $ tracePm (hdr "(inh test succeeded) }") (ppr nabla2)
-        pure nabla2
+        nabla3 <- traceWhenFailPm (hdr "(inh test failed) }") (ppr nabla2) $
+                  inhabitationTest new_fuel (nabla_ty_st nabla1) nabla2
+        lift $ tracePm (hdr "(inh test succeeded) }") (ppr nabla3)
+        pure nabla3
     Nothing -> do
       tracePm (hdr "(match_ty not instance of res_ty) }") empty
-      pure (Just nabla) -- Matching against match_ty failed. Inhabited!
+      pure (Just nabla0) -- Matching against match_ty failed. Inhabited!
                          -- See Note [Instantiating a ConLike].
 
 -- | @matchConLikeResTy _ _ ty K@ tries to match @ty@ against the result
@@ -2036,12 +2028,11 @@ generateInhabitingPatterns mode (x:xs) n nabla at MkNabla{nabla_tm_st=ts} = do
     instantiate_newtype_chain :: ClassId -> Nabla -> [(Type, DataCon, Type)] -> MaybeT DsM (ClassId, Nabla)
     instantiate_newtype_chain x nabla []                      = pure (x, nabla)
     instantiate_newtype_chain x nabla ((_ty, dc, arg_ty):dcs) = do
-      y <- lift $ mkPmId arg_ty
-      let (yid,nabla') = representId y nabla
+      (y,nabla') <- lift $ mkPmMatchId arg_ty nabla
       -- Newtypes don't have existentials (yet?!), so passing an empty
       -- list as ex_tvs.
-      nabla'' <- addConCt nabla' x (PmAltConLike (RealDataCon dc)) [] [yid]
-      instantiate_newtype_chain yid nabla'' dcs
+      nabla'' <- addConCt nabla' x (PmAltConLike (RealDataCon dc)) [] [y]
+      instantiate_newtype_chain y nabla'' dcs
 
     instantiate_cons :: ClassId -> Type -> [ClassId] -> Int -> Nabla -> [ConLike] -> DsM [Nabla]
     instantiate_cons _ _  _  _ _     []       = pure []
@@ -2052,7 +2043,7 @@ generateInhabitingPatterns mode (x:xs) n nabla at MkNabla{nabla_tm_st=ts} = do
       = generateInhabitingPatterns mode xs n nabla
     instantiate_cons x ty xs n nabla (cl:cls) = do
       -- The following line is where we call out to the inhabitationTest!
-      mb_nabla <- runMaybeT $ instCon 4 nabla (eclassMatchId x nabla) cl
+      mb_nabla <- runMaybeT $ instCon 4 nabla x cl
       tracePm "instantiate_cons" (vcat [ ppr x <+> dcolon <+> ppr (eclassType x nabla)
                                        , ppr ty
                                        , ppr cl
@@ -2147,13 +2138,22 @@ the -XEmptyCase case in 'reportWarnings' by looking for 'ReportEmptyCase'.
 -- | Update the value of the analysis data of some e-class by its id.
 updateVarInfo :: Functor f => ClassId -> (VarInfo -> f VarInfo) -> Nabla -> f Nabla
 -- Update the data at class @xid@ using lenses and the monadic action @go@
-updateVarInfo xid f nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts=eg } } = (\eg' -> nabla{ nabla_tm_st = ts{ts_facts = eg'} }) <$> (_class xid . _data) f eg
+updateVarInfo xid f nabla at MkNabla{ nabla_tm_st = ts at TmSt{ ts_facts=eg } }
+  = (\eg' -> nabla{ nabla_tm_st = ts{ts_facts = eg'} }) <$> (_class xid . _data) f eg
 
-eclassMatchId :: HasCallStack => ClassId -> Nabla -> Id
+eclassMatchId :: ClassId -> Nabla -> Id
 eclassMatchId cid = vi_id . (^. _class cid . _data) . (ts_facts . nabla_tm_st)
 
 eclassType :: ClassId -> Nabla -> Type
 eclassType cid = idType . eclassMatchId cid
 
-
 -- ROMES:TODO: When exactly to rebuild?
+
+-- | Generate a fresh class for matching, returning the class-id as the match-id
+mkPmMatchId :: Type -> Nabla -> DsM (ClassId, Nabla)
+mkPmMatchId ty (MkNabla tyst ts at TmSt{ts_facts = egr}) = do
+  x <- mkPmId ty -- romes:Todo: for now, we still use mkPmId to get an Id for emptyVarInfo, but we want to get rid of this too
+  let (xid, egr') = EG.newEClass (emptyVarInfo x) egr
+  return (xid, MkNabla tyst ts{ts_facts=egr'})
+{-# NOINLINE mkPmMatchId #-} -- We'll CPR deeply, that should be enough
+


=====================================
compiler/GHC/HsToCore/Pmc/Solver/Types.hs
=====================================
@@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleInstances   #-}
+{-# LANGUAGE MagicHash           #-}
 {-# LANGUAGE RankNTypes   #-}
 {-# LANGUAGE TypeApplications   #-}
 {-# LANGUAGE MultiParamTypeClasses #-}
@@ -69,6 +70,7 @@ import GHC.Builtin.Types
 import GHC.Builtin.Types.Prim
 import GHC.Tc.Solver.InertSet (InertSet, emptyInert)
 import GHC.Tc.Utils.TcType (isStringTy)
+import GHC.Types.Var.Env
 import GHC.Types.CompleteMatch (CompleteMatch(..))
 import GHC.Types.SourceText (SourceText(..), mkFractionalLit, FractionalLit
                             , fractionalLitFromRational
@@ -79,8 +81,6 @@ import Data.Ratio
 import GHC.Real (Ratio(..))
 import qualified Data.Semigroup as Semi
 
-import Data.Tuple (swap)
-import Data.Traversable (mapAccumL)
 import Data.Functor.Compose
 import Data.Equality.Analysis (Analysis(..))
 import Data.Equality.Graph (EGraph, ClassId)
@@ -88,7 +88,8 @@ import Data.Equality.Graph.Lens
 import qualified Data.Equality.Graph as EG
 import Data.IntSet (IntSet)
 import qualified Data.IntSet as IS (empty)
-import Data.Bifunctor (second)
+import Data.Bifunctor (second, bimap, first)
+import Control.Monad.Trans.State (runState, state)
 
 -- import GHC.Driver.Ppr
 
@@ -155,6 +156,8 @@ data TmState
   = TmSt
   { ts_facts :: !TmEGraph
   -- ^ Facts about terms.
+  , ts_reps :: !(IdEnv ClassId)
+  -- ^ A mapping from match-id Ids to the class-id representing that match-id
 
   -- ROMES:TODO: ts_dirty looks a bit to me like the bookeeping needed to know
   -- which nodes to upward merge, perhaps we can get rid of it too.
@@ -242,7 +245,7 @@ instance Outputable BotInfo where
 
 -- | Not user-facing.
 instance Outputable TmState where
-  ppr (TmSt eg dirty) = text (show eg) $$ ppr dirty
+  ppr (TmSt eg idmp dirty) = text (show eg) $$ ppr idmp $$ ppr dirty
 
 -- | Not user-facing.
 instance Outputable VarInfo where
@@ -263,7 +266,7 @@ instance Outputable VarInfo where
 
 -- | Initial state of the term oracle.
 initTmState :: TmState
-initTmState = TmSt EG.emptyEGraph IS.empty
+initTmState = TmSt EG.emptyEGraph mempty IS.empty
 
 -- | A data type that caches for the 'VarInfo' of @x@ the results of querying
 -- 'dsGetCompleteMatches' and then striking out all occurrences of @K@ for
@@ -318,9 +321,8 @@ emptyVarInfo x
 -- | @lookupVarInfo tms x@ tells what we know about 'x'
 --- romes:TODO: This will have a different type. I don't know what yet.
 -- romes:TODO I don't think this is what we want any longer, more like represent Id and see if it was previously represented by some data or not?
--- romes:TodO should return VarInfo rather than Maybe VarInfo
 lookupVarInfo :: TmState -> ClassId -> VarInfo
-lookupVarInfo (TmSt eg _) x
+lookupVarInfo (TmSt eg _ _) x
 -- RM: Yea, I don't like the fact that currently all e-classes are created by Ids and have an Empty Var info, yet we must call "fromMaybe" here. Not good.
   = eg ^._class x._data
 
@@ -834,14 +836,31 @@ type TmEGraph = EGraph VarInfo (DeBruijnF CoreExprF)
 instance Show VarInfo where
   show = showPprUnsafe . ppr
 
-representId :: Id -> Nabla -> (ClassId, Nabla)
--- Will need to justify this well
-representId x (MkNabla tyst tmst at TmSt{ts_facts=eg0})
-  = case EG.add (EG.Node (DF (deBruijnize (VarF x)))) eg0 of
-      (xid, eg1) -> (xid, MkNabla tyst tmst{ts_facts=eg1})
 
-representIds :: [Id] -> Nabla -> ([ClassId], Nabla)
-representIds xs nabla = swap $ mapAccumL (\acc x -> swap $ representId x acc) nabla xs
+-- I think the right thing to do here is have representId always return a ClassId, or panic
+-- and have an additional method which is called by representIds...
+representId :: Id -> Nablas -> (ClassId, Nablas)
+representId i n = first fromJust $ representId' i n
+
+-- romes: temporary names...
+-- | Represents a match-id in 'Nablas' and returns the match-id in terms of the e-graph
+--
+-- It returns Nothing if the Nablas is empty, and hence there's no returned
+-- match-id in terms of e-graphs (there are none in which we can represent that Id)
+representId' :: Id -> Nablas -> (Maybe ClassId, Nablas)
+-- TODO:TOMORROW: pick the highest Id, and use newPointerToClassId on the Nablas on which the represented Id got a lower clsas Id
+--
+-- I suppose that if we do it earlier, we can make all the nablas share the exact same match-ids and avoid this complexity.
+representId' x (MkNablas nbs) = bimap headMaybe MkNablas $ unzipBag $ mapBag go nbs
+  where
+    go (MkNabla tyst tmst at TmSt{ts_facts=eg0, ts_reps=idmp})
+      = case lookupVarEnv idmp x of
+          Nothing -> case EG.newEClass (emptyVarInfo x) eg0 of
+            (xid, eg1) -> (xid, MkNabla tyst tmst{ts_facts=eg1, ts_reps=extendVarEnv idmp x xid})
+          Just xid -> (xid, MkNabla tyst tmst)
+
+representIds :: [Id] -> Nablas -> ([ClassId], Nablas)
+representIds xs = runState (catMaybes <$> mapM (state . representId') xs)
 
 -- | This instance is seriously wrong for general purpose, it's just required for instancing Analysis.
 -- There ought to be a better way.


=====================================
libraries/hegg
=====================================
@@ -1 +1 @@
-Subproject commit 238557096a773b8cbe70d141ed63aef302918a62
+Subproject commit 014e5c2b7acab76675ba2d2e16dd03a3dd19ee5d



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d0917a6917582e0ae102bf2235329c97b9c7e6b7

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d0917a6917582e0ae102bf2235329c97b9c7e6b7
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230706/53de0ae7/attachment-0001.html>