[Git][ghc/ghc][wip/DataToTagSmallOp] 2 commits: Introduce `dataToTagSmall#` primop (closes #21710)

Matthew Craven (@clyring) gitlab at gitlab.haskell.org
Sun Dec 10 00:48:19 UTC 2023



Matthew Craven pushed to branch wip/DataToTagSmallOp at Glasgow Haskell Compiler / GHC


Commits:
1504301b by Matthew Craven at 2023-12-09T19:45:22-05:00
Introduce `dataToTagSmall#` primop (closes #21710)

...and use it to generate slightly better code when dataToTag#
is used at a "small data type" where there is no need to mess
with "is_too_big_tag" or potentially look at an info table.

Metric Decrease:
    T18304

- - - - -
bfbd33d9 by Matthew Craven at 2023-12-09T19:47:47-05:00
Fix formatting of Note [alg-alt heap check]

- - - - -


17 changed files:

- compiler/GHC/Builtin/PrimOps.hs
- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/Core/Lint.hs
- compiler/GHC/Core/Opt/ConstantFold.hs
- compiler/GHC/Stg/InferTags/Rewrite.hs
- compiler/GHC/StgToCmm/Expr.hs
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- compiler/GHC/Tc/Instance/Class.hs
- libraries/base/src/GHC/Base.hs
- libraries/base/src/GHC/Exts.hs
- testsuite/tests/codeGen/should_compile/T21710a.stderr
- testsuite/tests/linters/notes.stdout
- testsuite/tests/simplCore/should_compile/T22375.hs
- testsuite/tests/simplCore/should_compile/T22375.stderr
- testsuite/tests/simplCore/should_compile/T22375DataFamily.hs
- testsuite/tests/simplCore/should_compile/T22375DataFamily.stderr


Changes:

=====================================
compiler/GHC/Builtin/PrimOps.hs
=====================================
@@ -921,5 +921,6 @@ instance Outputable PrimCall where
 primOpIsReallyInline :: PrimOp -> Bool
 primOpIsReallyInline = \case
   SeqOp       -> False
-  DataToTagOp -> False
+  DataToTagSmallOp -> False
+  DataToTagLargeOp -> False
   p           -> not (primOpOutOfLine p)


=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -3689,7 +3689,27 @@ section "Tag to enum stuff"
         and small integers.}
 ------------------------------------------------------------------------
 
-primop  DataToTagOp "dataToTagLarge#" GenPrimOp
+primop  DataToTagSmallOp "dataToTagSmall#" GenPrimOp
+   a_levpoly -> Int#
+   { Used internally to implement @dataToTag#@: Use that function instead!
+     This one offers /no advantage/ and comes with no stability
+     guarantees: it may change its type, its name, or its behavior
+     with /no warning/ between compiler releases.
+
+     It is expected that this function will be un-exposed in a future
+     release of ghc.
+
+     For more details, look at @Note [DataToTag overview]@
+     in GHC.Tc.Instance.Class in the source code for
+     /the specific compiler version you are using./
+   }
+   with
+   deprecated_msg = { Use dataToTag# from \"GHC.Magic\" instead. }
+   strictness = { \ _arity -> mkClosedDmdSig [evalDmd] topDiv }
+   effect = ThrowsException
+   cheap = True
+
+primop  DataToTagLargeOp "dataToTagLarge#" GenPrimOp
    a_levpoly -> Int#
    { Used internally to implement @dataToTag#@: Use that function instead!
      This one offers /no advantage/ and comes with no stability


=====================================
compiler/GHC/Core/Lint.hs
=====================================
@@ -1131,8 +1131,8 @@ checkTypeDataConOcc what dc
     (text "type data constructor found in a" <+> text what <> colon <+> ppr dc)
 
 {-
--- | Check that a use of dataToTagLarge# satisfies condition DTT2
--- from Note [DataToTag overview] in GHC.Tc.Instance.Class
+-- | Check that a use of dataToTagLarge# satisfies conditions DTT2
+-- and DTT3 from Note [DataToTag overview] in GHC.Tc.Instance.Class
 --
 -- Ignores applications not headed by dataToTagLarge#.
 
@@ -1142,12 +1142,17 @@ checkDataToTagPrimOpTyCon
   -> [CoreArg]  -- ^ The arguments to the application
   -> LintM ()
 checkDataToTagPrimOpTyCon (Var fun_id) args
-  | Just DataToTagOp <- isPrimOpId_maybe fun_id
+  | Just op <- isPrimOpId_maybe fun_id
+  , op == DataToTagSmallOp || op == DataToTagLargeOp
   = case args of
       Type _levity : Type dty : _rest
         | Just (tc, _) <- splitTyConApp_maybe dty
         , isValidDTT2TyCon tc
-          -> pure ()
+          -> do  platform <- getPlatform
+                 let  numConstrs = tyConFamilySize tc
+                      isSmallOp = op == DataToTagSmallOp
+                 checkL (isSmallFamily platform numConstrs == isSmallOp) $
+                   text "dataToTag# primop-size/tycon-family-size mismatch"
         | otherwise -> failWithL $ text "dataToTagLarge# used at non-ADT type:"
                                    <+> ppr dty
       _ -> failWithL $ text "dataToTagLarge# needs two type arguments but has args:"


=====================================
compiler/GHC/Core/Opt/ConstantFold.hs
=====================================
@@ -102,7 +102,8 @@ That is why these rules are built in here.
 primOpRules ::  Name -> PrimOp -> Maybe CoreRule
 primOpRules nm = \case
    TagToEnumOp -> mkPrimOpRule nm 2 [ tagToEnumRule ]
-   DataToTagOp -> mkPrimOpRule nm 3 [ dataToTagRule ]
+   DataToTagSmallOp -> mkPrimOpRule nm 3 [ dataToTagRule ]
+   DataToTagLargeOp -> mkPrimOpRule nm 3 [ dataToTagRule ]
 
    -- Int8 operations
    Int8AddOp   -> mkPrimOpRule nm 2 [ binaryLit (int8Op2 (+))
@@ -1985,7 +1986,9 @@ tagToEnumRule = do
 
 ------------------------------
 dataToTagRule :: RuleM CoreExpr
--- See Note [DataToTag overview] in GHC.Tc.Instance.Class.
+-- Used for both dataToTagSmall# and dataToTagLarge#.
+-- See Note [DataToTag overview] in GHC.Tc.Instance.Class,
+-- particularly wrinkle DTW5.
 dataToTagRule = a `mplus` b
   where
     -- dataToTag (tagToEnum x)   ==>   x
@@ -3374,7 +3377,8 @@ caseRules platform (App (App (Var f) type_arg) v)
 
 -- See Note [caseRules for dataToTag]
 caseRules _ (Var f `App` Type lev `App` Type ty `App` v) -- dataToTag x
-  | Just DataToTagOp <- isPrimOpId_maybe f
+  | Just op <- isPrimOpId_maybe f
+  , op == DataToTagSmallOp || op == DataToTagLargeOp
   = case splitTyConApp_maybe ty of
       Just (tc, _) | isValidDTT2TyCon tc
         -> Just (v, tx_con_dtt tc
@@ -3382,9 +3386,9 @@ caseRules _ (Var f `App` Type lev `App` Type ty `App` v) -- dataToTag x
       _ -> pprTraceUserWarning warnMsg Nothing
   where
     warnMsg = vcat $ map text
-      [ "Found dataToTag primop applied to a non-ADT type. This"
-      , "could be a future bug in GHC, or it may be caused by an"
-      , "unsupported use of the ghc-internal primop dataToTagLarge#."
+      [ "Found dataToTag primop applied to a non-ADT type. This could"
+      , "be a future bug in GHC, or it may be caused by an unsupported"
+      , "use of the ghc-internal primops dataToTagSmall# and dataToTagLarge#."
       , "In either case, the GHC developers would like to know about it!"
       , "Please report this as a GHC bug:  http://www.haskell.org/ghc/reportabug"
       ]
@@ -3554,7 +3558,7 @@ Note [caseRules for dataToTag]
 See also Note [DataToTag overview] in GHC.Tc.Instance.Class.
 
 We want to transform
-  case dataToTagLarge# x of
+  case dataToTagSmall# x of
     DEFAULT -> e1
     1# -> e2
 into
@@ -3569,12 +3573,17 @@ case-flattening and case-of-known-constructor and can be very
 important for code using derived Eq instances.
 
 We can apply this transformation only when we can easily get the
-constructors from the type at which dataToTagLarge# is used.  And we
+constructors from the type at which dataToTagSmall# is used.  And we
 cannot apply this transformation at "type data"-related types without
 breaking invariant I1 from Note [Type data declarations] in
 GHC.Rename.Module.  That leaves exactly the types satisfying condition
 DTT2 from Note [DataToTag overview] in GHC.Tc.Instance.Class.
 
+All of the above applies identically for `dataToTagLarge#`.  And
+thanks to wrinkle DTW5, there is no need to worry about large-tag
+arguments for `dataToTagSmall#`; those cause undefined behavior anyway.
+
+
 Note [Unreachable caseRules alternatives]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Take care if we see something like


=====================================
compiler/GHC/Stg/InferTags/Rewrite.hs
=====================================
@@ -495,10 +495,9 @@ occurrence of `x` and `y` to record whether it is evaluated and
 properly tagged. For the vast majority of primops that's a waste of
 time: the argument is an `Int#` or something.
 
-But code generation for `seq#` and `dataToTagLarge#` /does/ consult that
-tag, to statically avoid generating an eval:
-* `seq#`: uses `getCallMethod` on its first argument, which looks at the `tagSig`
-* `dataToTagLarge#`: checks `tagSig` directly in the `DataToTagOp` case of `cgExpr`.
+But code generation for `seq#` and the `dataToTag#` ops /does/ consult that
+tag, to statically avoid generating an eval.  All three do so via `cgIdApp`,
+which in turn uses `getCallMethod` which looks at the `tagSig`.
 
 So for these we should call `rewriteArgs`.
 
@@ -507,7 +506,7 @@ So for these we should call `rewriteArgs`.
 rewriteOpApp :: InferStgExpr -> RM TgStgExpr
 rewriteOpApp (StgOpApp op args res_ty) = case op of
   op@(StgPrimOp primOp)
-    | primOp == SeqOp || primOp == DataToTagOp
+    | primOp == SeqOp || primOp == DataToTagSmallOp || primOp == DataToTagLargeOp
     -- see Note [Rewriting primop arguments]
     -> (StgOpApp op) <$!> rewriteArgs args <*> pure res_ty
   _ -> pure $! StgOpApp op args res_ty


=====================================
compiler/GHC/StgToCmm/Expr.hs
=====================================
@@ -37,7 +37,7 @@ import GHC.Cmm.Graph
 import GHC.Cmm.BlockId
 import GHC.Cmm hiding ( succ )
 import GHC.Cmm.Info
-import GHC.Cmm.Utils ( zeroExpr, cmmTagMask, mkWordCLit, mAX_PTR_TAG )
+import GHC.Cmm.Utils ( cmmTagMask, mkWordCLit, mAX_PTR_TAG )
 import GHC.Core
 import GHC.Core.DataCon
 import GHC.Types.ForeignCall
@@ -73,55 +73,51 @@ cgExpr (StgApp fun args)     = cgIdApp fun args
 cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
   cgIdApp a []
 
+-- dataToTagSmall# :: a_levpoly -> Int#
+-- See Note [DataToTag overview] in GHC.Tc.Instance.Class,
+-- particularly wrinkles H3 and DTW4
+cgExpr (StgOpApp (StgPrimOp DataToTagSmallOp) [StgVarArg a] _res_ty) = do
+  platform <- getPlatform
+  emitComment (mkFastString "dataToTagSmall#")
+
+  a_eval_reg <- newTemp (bWord platform)
+  _ <- withSequel (AssignTo [a_eval_reg] False) (cgIdApp a [])
+  let a_eval_expr = CmmReg (CmmLocal a_eval_reg)
+      tag1 = cmmConstrTag1 platform a_eval_expr
+
+  -- subtract 1 because we need to return a zero-indexed tag
+  emitReturn [cmmSubWord platform tag1 (CmmLit $ mkWordCLit platform 1)]
+
 -- dataToTagLarge# :: a_levpoly -> Int#
--- See Note [DataToTag overview] in GHC.Tc.Instance.Class
--- TODO: There are some more optimization ideas for this code path
--- in #21710
-cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
+-- See Note [DataToTag overview] in GHC.Tc.Instance.Class,
+-- particularly wrinkles H3 and DTW4
+cgExpr (StgOpApp (StgPrimOp DataToTagLargeOp) [StgVarArg a] _res_ty) = do
   platform <- getPlatform
   emitComment (mkFastString "dataToTagLarge#")
-  info <- getCgIdInfo a
-  let amode = idInfoToAmode info
-  tag_reg <- assignTemp $ cmmConstrTag1 platform amode
+
+  a_eval_reg <- newTemp (bWord platform)
+  _ <- withSequel (AssignTo [a_eval_reg] False) (cgIdApp a [])
+  let a_eval_expr = CmmReg (CmmLocal a_eval_reg)
+
+  tag1_reg <- assignTemp $ cmmConstrTag1 platform a_eval_expr
   result_reg <- newTemp (bWord platform)
-  let tag = CmmReg $ CmmLocal tag_reg
-      is_tagged = cmmNeWord platform tag (zeroExpr platform)
-      is_too_big_tag = cmmEqWord platform tag (cmmTagMask platform)
-  -- Here we will first check the tag bits of the pointer we were given;
-  -- if this doesn't work then enter the closure and use the info table
-  -- to determine the constructor. Note that all tag bits set means that
-  -- the constructor index is too large to fit in the pointer and therefore
-  -- we must look in the info table. See Note [Tagging big families].
-
-  (fast_path :: CmmAGraph) <- getCode $ do
-      -- Return the constructor index from the pointer tag
-      return_ptr_tag <- getCode $ do
-          emitAssign (CmmLocal result_reg)
-            $ cmmSubWord platform tag (CmmLit $ mkWordCLit platform 1)
-      -- Return the constructor index recorded in the info table
-      return_info_tag <- getCode $ do
-          profile     <- getProfile
-          align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
-          emitAssign (CmmLocal result_reg)
-            $ getConstrTag profile align_check (cmmUntag platform amode)
-
-      emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
-  -- If we know the argument is already tagged there is no need to generate code to evaluate it
-  -- so we skip straight to the fast path. If we don't know if there is a tag we take the slow
-  -- path which evaluates the argument before fetching the tag.
-  case (idTagSig_maybe a) of
-    Just sig
-      | isTaggedSig sig
-      -> emit fast_path
-    _ -> do
-          slow_path <- getCode $ do
-              tmp <- newTemp (bWord platform)
-              _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
-              profile     <- getProfile
-              align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
-              emitAssign (CmmLocal result_reg)
-                $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp)))
-          emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
+  let tag1_expr = CmmReg $ CmmLocal tag1_reg
+      is_too_big_tag = cmmEqWord platform tag1_expr (cmmTagMask platform)
+
+  -- Return the constructor index from the pointer tag
+  -- (Used if pointer tag is small enough to be unambiguous)
+  return_ptr_tag <- getCode $ do
+    emitAssign (CmmLocal result_reg)
+      $ cmmSubWord platform tag1_expr (CmmLit $ mkWordCLit platform 1)
+
+  -- Return the constructor index recorded in the info table
+  return_info_tag <- getCode $ do
+    profile     <- getProfile
+    align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
+    emitAssign (CmmLocal result_reg)
+      $ getConstrTag profile align_check (cmmUntag platform a_eval_expr)
+
+  emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
   emitReturn [CmmReg $ CmmLocal result_reg]
 
 
@@ -638,9 +634,10 @@ isSimpleScrut _                    _         = return False
 isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
 -- True iff the op cannot block or allocate
 isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
--- dataToTagLarge# evaluates its argument;
+-- dataToTagSmall#/dataToTagLarge# evaluate an argument;
 -- see Note [DataToTag overview] in GHC.Tc.Instance.Class
-isSimpleOp (StgPrimOp DataToTagOp) _ = return False
+isSimpleOp (StgPrimOp DataToTagSmallOp) _ = return False
+isSimpleOp (StgPrimOp DataToTagLargeOp) _ = return False
 isSimpleOp (StgPrimOp op) stg_args                  = do
     arg_exprs <- getNonVoidArgAmodes stg_args
     cfg       <- getStgToCmmConfig
@@ -851,6 +848,7 @@ cgAlts _ _ _ _ = panic "cgAlts"
 
 
 -- Note [alg-alt heap check]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
 --
 -- In an algebraic case with more than one alternative, we will have
 -- code like


=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1640,7 +1640,8 @@ emitPrimOp cfg primop =
   SeqOp -> alwaysExternal
   GetSparkOp -> alwaysExternal
   NumSparks -> alwaysExternal
-  DataToTagOp -> alwaysExternal
+  DataToTagSmallOp -> alwaysExternal
+  DataToTagLargeOp -> alwaysExternal
   MkApUpd0_Op -> alwaysExternal
   NewBCOOp -> alwaysExternal
   UnpackClosureOp -> alwaysExternal


=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -967,7 +967,11 @@ genPrim prof bound ty op = case op of
 
 ------------------------------ Tag to enum stuff --------------------------------
 
-  DataToTagOp -> \[_r] [d] -> pure $ PRPrimCall $ mconcat
+  DataToTagSmallOp -> \[_r] [d] -> pure $ PRPrimCall $ mconcat
+      [ stack .! PreInc sp |= var "h$dataToTag_e"
+      , returnS (app "h$e" [d])
+      ]
+  DataToTagLargeOp -> \[_r] [d] -> pure $ PRPrimCall $ mconcat
       [ stack .! PreInc sp |= var "h$dataToTag_e"
       , returnS (app "h$e" [d])
       ]


=====================================
compiler/GHC/Tc/Instance/Class.hs
=====================================
@@ -50,6 +50,8 @@ import GHC.Core.Class
 
 import GHC.Core ( Expr(..) )
 
+import GHC.StgToCmm.Closure ( isSmallFamily )
+
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Misc( splitAtList, fstOf3 )
@@ -671,15 +673,17 @@ But, to avoid all this boilerplate code, and improve optimisation opportunities,
 GHC generates instances like this:
 
    instance DataToTag [a] where
-     dataToTag# = dataToTagLarge#
+     dataToTag# = dataToTagSmall#
 
-using a (temporarily strangely-named) primop `dataToTagLarge#`. The
-primop has the following over-polymorphic type
+using one of two dedicated primops: `dataToTagSmall#` and `dataToTagLarge#`.
+(Why two primops? What's the difference? See wrinkles DTW4 and DTW5.)
+Both primops have the following over-polymorphic type:
 
   dataToTagLarge# :: forall {l::levity} (a::TYPE (BoxedRep l)). a -> Int#
 
-Every call to (dataToTagLarge# @{lev} @ty) that we generate should
-satisfy these conditions:
+Every call to either primop that we generate should look like
+(dataToTagSmall# @{lev} @ty) with two type arguments that satisfy
+these conditions:
 
 (DTT1) `lev` is concrete (either lifted or unlifted), not polymorphic.
    This is an invariant--we must satisfy this or Core Lint will complain.
@@ -698,25 +702,37 @@ satisfy these conditions:
    GHC.Rename.Module.  See Note [caseRules for dataToTag] in
    GHC.Core.Opt.ConstantFold for why this matters.
 
-   While the dataToTagLarge# primop remains exposed from GHC.Prim
+   While the dataToTag# primops remain exposed from GHC.Prim
    (and abused in GHC.PrimopWrappers), this cannot be a true invariant.
-   But with a little effort we can ensure that every `dataToTagLarge#`
+   But with a little effort we can ensure that every primop
    call we generate in a DataToTag instance satisfies this condition.
 
-The `dataToTagLarge#` primop has special handling in several parts of
+(DTT3) If the TyCon in wrinkle DTT2 is a "large data type" with more
+   constructors than fit in pointer tags on the target, then the
+   primop must be dataToTagLarge# and not dataToTagSmall#.
+   Otherwise, the primop must be dataToTagSmall# and not dataToTagLarge#.
+   (See wrinkles DTW4 and DTW5.)
+
+These two primops have special handling in several parts of
 the compiler:
 
-- It has a couple of built-in rewrite rules, implemented in
-  GHC.Core.Opt.ConstantFold.dataToTagRule
+H1. They have a couple of built-in rewrite rules, implemented in
+    GHC.Core.Opt.ConstantFold.dataToTagRule
 
-- The simplifier rewrites most case expressions scrutinizing its result.
-  See Note [caseRules for dataToTag] in GHC.Core.Opt.ConstantFold.
+H2. The simplifier rewrites most case expressions scrutinizing their results.
+    See Note [caseRules for dataToTag] in GHC.Core.Opt.ConstantFold.
 
-- It evaluates its argument; this is implemented via a special case in
-  GHC.StgToCmm.Expr.cgExpr.
+H3. Each evaluates its argument.  But we want to omit this eval when the
+    actual argument is already evaluated and properly tagged.  To do this,
 
-- Additionally, a special case in GHC.Stg.InferTags.Rewrite.rewriteExpr ensures
-  that that any inferred tag information on the argument is retained until then.
+    * We have a special case in GHC.Stg.InferTags.Rewrite.rewriteOpApp
+      ensuring that any inferred tag information on the argument is
+      retained until code generation.
+
+    * We generate code via special cases in GHC.StgToCmm.Expr.cgExpr
+      instead of with the other primops in GHC.StgToCmm.Prim.emitPrimOp;
+      tag info is not readily available in the latter function.
+      (Wrinkle DTW4 describes what we generate after the eval.)
 
 Wrinkles:
 
@@ -727,12 +743,12 @@ Wrinkles:
      [W] DataToTag (D (Either t1 t2))
   GHC uses the built-in instance
      instance DataToTag (D (Either p q)) where
-        dataToTag# x = dataToTagLarge# @Lifted @(R:DEither p q)
+        dataToTag# x = dataToTagSmall# @Lifted @(R:DEither p q)
                                        (x |> sym (ax:DEither p q))
   where `ax:DEither` is the axiom arising from the `data instance`:
     ax:DEither p q :: D (Either p q) ~ R:DEither p q
 
-  Notice that we cast `x` before giving it to `dataToTagLarge#`, so
+  Notice that we cast `x` before giving it to `dataToTagSmall#`, so
   that (DTT2) is satisfied.
 
 (DTW2) Suppose we have module A (T(..)) where { data T = TCon }
@@ -747,7 +763,7 @@ Wrinkles:
 (DTW3) Similar to DTW2, consider this example:
 
     {-# LANGUAGE MagicHash #-}
-    module A (X(X2, X3), f) where
+    module A (X(X2, X3), g) where
     -- see also testsuite/tests/warnings/should_compile/DataToTagWarnings.hs
     import GHC.Exts (dataToTag#, Int#)
     data X = X1 | X2 | X3 | X4
@@ -774,23 +790,91 @@ Wrinkles:
   keepAlive on the constructor names.
   (Contrast with Note [Unused name reporting and HasField].)
 
-(DTW4) It is expected that in the future some instances may select more
-  efficient specialised implementations; for example we may use a
-  separate `dataToTagSmall#` primop for a type with only a few
-  constructors; see #17079 and #21710.
-
-(DTW5) We make no promises about the primops used to implement
+(DTW4) Why have two primops, `dataToTagSmall#` and `dataToTagLarge#`?
+  The way tag information is stored at runtime is described in
+  Note [Tagging big families] in GHC.StgToCmm.Expr.  In particular,
+  for "big data types" we must consult the heap object's info table at
+  least in the mAX_PTR_TAG case, while for "small data types" we can
+  always just examine the tag bits on the pointer itself. So:
+
+  * dataToTagSmall# consults the tag bits in the pointer, ignoring the
+    info table.  It should, therefore, be used only for data type with
+    few enough contructors that the tag always fits in the pointer.
+
+  * dataToTagLarge# also consults the tag bits in the pointer, but
+    must fall back te examining the info table whenever those tag
+    bits are equal to mAX_PTR_TAG.
+
+  One could imagine having one primop with a small/large tag, or just
+  the data type width, but the PrimOp data type is not currently set
+  up for that.  Looking at the type information on the argument during
+  code generation is also possible, but would be less reliable.
+  Remember: type information is not always preserved in STG.
+
+(DTW5) How do the two primops differ in their semantics?  We consider
+  a call `dataToTagSmall# x` to result in undefined behavior whenever
+  the target supports pointer tagging but the actual constructor index
+  for `x` is too large to fit in the pointer's tag bits.  Otherwise,
+  `dataToTagSmall#` behaves identically to `dataToTagLarge#`.
+
+  This allows the rewrites performed in GHC.Core.Opt.ConstantFold to
+  safely treat `dataToTagSmall#` identically to `dataToTagLarge#`:
+  the allowed program behaviors for the former is always a superset of
+  the allowed program behaviors for the latter.
+
+  This undefined behavior is only observable if a user writes a
+  wrongly-sized primop call.  The calls we generate are properly-sized
+  (condition DTT3 above) so that the type system protects us.
+
+(DTW6) We make no promises about the primops used to implement
   DataToTag instances.  Changes to GHC's representation of algebraic
   data types at runtime may force us to redesign these primops.
   Indeed, accommodating such changes without breaking users of the
   original (no longer existing) "dataToTag#" primop is one of the
   main reasons the DataToTag class exists!
 
-  We can currently get away with using the same primop for every
-  DataToTag instance because every Haskell-land data constructor use
-  gets translated to its own "real" heap or static data object at
-  runtime and the index of that constructor is always exposed via
-  pointer tagging and via the object's info table.
+  In particular, our current two primop implementations (as described
+  in wrinkle DTW4) are adequate for every DataToTag instance only
+  because every Haskell-land data constructor use gets translated to
+  its own "real" heap or static data object at runtime and the index
+  of that constructor is always exposed via pointer tagging and via
+  the object's info table.
+
+(DTW7) Currently, the generated module GHC.PrimopWrappers in ghc-prim
+  contains the following non-sense definitions:
+
+    {-# NOINLINE dataToTagSmall# #-}
+    dataToTagSmall# :: a_levpoly -> Int#
+    dataToTagSmall# a1 = GHC.Prim.dataToTagSmall# a1
+    {-# NOINLINE dataToTagLarge# #-}
+    dataToTagLarge# :: a_levpoly -> Int#
+    dataToTagLarge# a1 = GHC.Prim.dataToTagLarge# a1
+
+  Why do these exist? GHCi uses these symbols for... something.  There
+  is on-going work to get rid of them.  See also #24169 and !6245.
+  Their continued existence makes it difficult to do several nice things:
+
+   * As explained in DTW6, the dataToTag# primops are very internal.
+     We would like to hide them from GHC.Prim entirely to prevent
+     their mis-use, but doing so would cause GHC.PrimopWrappers to
+     fail to compile.
+
+   * The primops are applied at the (confusingly monomorphic) type
+     variable `a_levpoly` in the above definitions.  In particular,
+     they do not satisfy conditions DTT2 and DTT3 above.  We would
+     very much like these conditions to be invariants, but while
+     GHC.PrimopWrappers breaks them we cannot do so.
+
+   * This in turn means that `GHC.Core.Opt.ConstantFold.caseRules`
+     must check for condition DTT2 before doing the work described in
+     Note [caseRules for dataToTag].
+
+   * Likewise, wrinkle DTW5 is only necessary because condition DTT3
+     is not an invariant.  Otherwise, invoking the currently-specified
+     undefined behavior of `dataToTagSmall# @ty` would require passing it
+     an argument which will not really have type `ty` at runtime.  And
+     evaluating such an expression is always undefined behavior anyway!
+
 
 
 Historical note:
@@ -816,6 +900,7 @@ matchDataToTag :: Class -> [Type] -> TcM ClsInstResult
 matchDataToTag dataToTagClass [levity, dty] = do
   famEnvs <- tcGetFamInstEnvs
   (gbl_env, _lcl_env) <- getEnvs
+  platform <- getPlatform
   if | isConcreteType levity -- condition C3
      , Just (rawTyCon, rawTyConArgs) <- tcSplitTyConApp_maybe dty
      , let (repTyCon, repArgs, repCo)
@@ -828,13 +913,14 @@ matchDataToTag dataToTagClass [levity, dty] = do
      , let  rdr_env = tcg_rdr_env gbl_env
             inScope con = isJust $ lookupGRE_Name rdr_env $ dataConName con
      , all inScope constrs -- condition C2
+
      , let  repTy = mkTyConApp repTyCon repArgs
-            whichOp
-              -- TODO: More optimized implementations for:
-              --    * small constructor families
-              --    * Bool/Int/Float/etc. on JS backend
+            numConstrs = tyConFamilySize repTyCon
+            !whichOp -- see wrinkle DTW4
+              | isSmallFamily platform numConstrs
+                = primOpId DataToTagSmallOp
               | otherwise
-                = primOpId DataToTagOp
+                = primOpId DataToTagLargeOp
 
             -- See wrinkle DTW1; we must apply the underlying
             -- operation at the representation type and cast it


=====================================
libraries/base/src/GHC/Base.hs
=====================================
@@ -117,8 +117,8 @@ import GHC.Classes
 import GHC.CString
 import GHC.Magic
 import GHC.Magic.Dict
-import GHC.Prim hiding (dataToTagLarge#)
-  -- Hide dataToTagLarge# because it is expected to break for
+import GHC.Prim hiding (dataToTagSmall#, dataToTagLarge#)
+  -- Hide dataToTag# ops because they are expected to break for
   -- GHC-internal reasons in the near future, and shouldn't
   -- be exposed from base (not even GHC.Exts)
 


=====================================
libraries/base/src/GHC/Exts.hs
=====================================
@@ -133,8 +133,8 @@ module GHC.Exts
         maxTupleSize,
        ) where
 
-import GHC.Prim hiding ( coerce, dataToTagLarge# )
-  -- Hide dataToTagLarge# because it is expected to break for
+import GHC.Prim hiding ( coerce, dataToTagSmall#, dataToTagLarge# )
+  -- Hide dataToTag# ops because they are expected to break for
   -- GHC-internal reasons in the near future, and shouldn't
   -- be exposed from base (not even GHC.Exts)
 


=====================================
testsuite/tests/codeGen/should_compile/T21710a.stderr
=====================================
@@ -1,117 +1,44 @@
 
-==================== Output Cmm ====================
-[section ""cstring" . M.$tc'E2_bytes" {
-     M.$tc'E2_bytes:
-         I8[] "'E"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$tc'D2_bytes" {
-     M.$tc'D2_bytes:
-         I8[] "'D"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$tc'C2_bytes" {
-     M.$tc'C2_bytes:
-         I8[] "'C"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$tc'B2_bytes" {
-     M.$tc'B2_bytes:
-         I8[] "'B"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$tc'A3_bytes" {
-     M.$tc'A3_bytes:
-         I8[] "'A"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$tcE2_bytes" {
-     M.$tcE2_bytes:
-         I8[] "E"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$trModule2_bytes" {
-     M.$trModule2_bytes:
-         I8[] "M"
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""cstring" . M.$trModule4_bytes" {
-     M.$trModule4_bytes:
-         I8[] "main"
- }]
-
-
-
 ==================== Output Cmm ====================
 [M.foo_entry() { //  [R2]
-         { info_tbls: [(cBa,
-                        label: block_cBa_info
+         { info_tbls: [(cCU,
+                        label: block_cCU_info
                         rep: StackRep []
                         srt: Nothing),
-                       (cBi,
+                       (cD2,
                         label: M.foo_info
                         rep: HeapRep static { Fun {arity: 1 fun_type: ArgSpec 5} }
                         srt: Nothing)]
            stack_info: arg_space: 8
          }
      {offset
-       cBi: // global
-           if ((Sp + -8) < SpLim) (likely: False) goto cBj; else goto cBk;   // CmmCondBranch
-       cBj: // global
+       cD2: // global
+           if ((Sp + -8) < SpLim) (likely: False) goto cD3; else goto cD4;   // CmmCondBranch
+       cD3: // global
            R1 = M.foo_closure;   // CmmAssign
            call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;   // CmmCall
-       cBk: // global
-           I64[Sp - 8] = cBa;   // CmmStore
+       cD4: // global
+           I64[Sp - 8] = cCU;   // CmmStore
            R1 = R2;   // CmmAssign
            Sp = Sp - 8;   // CmmAssign
-           if (R1 & 7 != 0) goto cBa; else goto cBb;   // CmmCondBranch
-       cBb: // global
-           call (I64[R1])(R1) returns to cBa, args: 8, res: 8, upd: 8;   // CmmCall
-       cBa: // global
-           _cBh::P64 = R1 & 7;   // CmmAssign
-           if (_cBh::P64 != 1) goto uBz; else goto cBf;   // CmmCondBranch
-       uBz: // global
-           if (_cBh::P64 != 2) goto cBe; else goto cBg;   // CmmCondBranch
-       cBe: // global
-           // dataToTag#
-           _cBn::P64 = R1 & 7;   // CmmAssign
-           if (_cBn::P64 == 7) (likely: False) goto cBs; else goto cBr;   // CmmCondBranch
-       cBs: // global
-           _cBo::I64 = %MO_UU_Conv_W32_W64(I32[I64[R1 & (-8)] - 4]);   // CmmAssign
-           goto cBq;   // CmmBranch
-       cBr: // global
-           _cBo::I64 = _cBn::P64 - 1;   // CmmAssign
-           goto cBq;   // CmmBranch
-       cBq: // global
-           R1 = _cBo::I64;   // CmmAssign
+           if (R1 & 7 != 0) goto cCU; else goto cCV;   // CmmCondBranch
+       cCV: // global
+           call (I64[R1])(R1) returns to cCU, args: 8, res: 8, upd: 8;   // CmmCall
+       cCU: // global
+           _cD1::P64 = R1 & 7;   // CmmAssign
+           if (_cD1::P64 != 1) goto uDf; else goto cCZ;   // CmmCondBranch
+       uDf: // global
+           if (_cD1::P64 != 2) goto cCY; else goto cD0;   // CmmCondBranch
+       cCY: // global
+           // dataToTagSmall#
+           R1 = R1 & 7 - 1;   // CmmAssign
            Sp = Sp + 8;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
-       cBg: // global
+       cD0: // global
            R1 = 42;   // CmmAssign
            Sp = Sp + 8;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
-       cBf: // global
+       cCZ: // global
            R1 = 2;   // CmmAssign
            Sp = Sp + 8;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
@@ -124,190 +51,6 @@
 
 
 
-==================== Output Cmm ====================
-[section ""data" . M.$trModule3_closure" {
-     M.$trModule3_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$trModule4_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$trModule1_closure" {
-     M.$trModule1_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$trModule2_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$trModule_closure" {
-     M.$trModule_closure:
-         const GHC.Types.Module_con_info;
-         const M.$trModule3_closure+1;
-         const M.$trModule1_closure+1;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tcE1_closure" {
-     M.$tcE1_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$tcE2_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tcE_closure" {
-     M.$tcE_closure:
-         const GHC.Types.TyCon_con_info;
-         const M.$trModule_closure+1;
-         const M.$tcE1_closure+1;
-         const GHC.Types.krep$*_closure+5;
-         const 10475418246443540865;
-         const 12461417314693222409;
-         const 0;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'A1_closure" {
-     M.$tc'A1_closure:
-         const GHC.Types.KindRepTyConApp_con_info;
-         const M.$tcE_closure+1;
-         const GHC.Types.[]_closure+1;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'A2_closure" {
-     M.$tc'A2_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$tc'A3_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'A_closure" {
-     M.$tc'A_closure:
-         const GHC.Types.TyCon_con_info;
-         const M.$trModule_closure+1;
-         const M.$tc'A2_closure+1;
-         const M.$tc'A1_closure+1;
-         const 10991425535368257265;
-         const 3459663971500179679;
-         const 0;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'B1_closure" {
-     M.$tc'B1_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$tc'B2_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'B_closure" {
-     M.$tc'B_closure:
-         const GHC.Types.TyCon_con_info;
-         const M.$trModule_closure+1;
-         const M.$tc'B1_closure+1;
-         const M.$tc'A1_closure+1;
-         const 13038863156169552918;
-         const 13430333535161531545;
-         const 0;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'C1_closure" {
-     M.$tc'C1_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$tc'C2_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'C_closure" {
-     M.$tc'C_closure:
-         const GHC.Types.TyCon_con_info;
-         const M.$trModule_closure+1;
-         const M.$tc'C1_closure+1;
-         const M.$tc'A1_closure+1;
-         const 8482817676735632621;
-         const 8146597712321241387;
-         const 0;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'D1_closure" {
-     M.$tc'D1_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$tc'D2_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'D_closure" {
-     M.$tc'D_closure:
-         const GHC.Types.TyCon_con_info;
-         const M.$trModule_closure+1;
-         const M.$tc'D1_closure+1;
-         const M.$tc'A1_closure+1;
-         const 7525207739284160575;
-         const 13746130127476219356;
-         const 0;
-         const 3;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'E1_closure" {
-     M.$tc'E1_closure:
-         const GHC.Types.TrNameS_con_info;
-         const M.$tc'E2_bytes;
- }]
-
-
-
-==================== Output Cmm ====================
-[section ""data" . M.$tc'E_closure" {
-     M.$tc'E_closure:
-         const GHC.Types.TyCon_con_info;
-         const M.$trModule_closure+1;
-         const M.$tc'E1_closure+1;
-         const M.$tc'A1_closure+1;
-         const 6748545530683684316;
-         const 10193016702094081137;
-         const 0;
-         const 3;
- }]
-
-
-
 ==================== Output Cmm ====================
 [section ""data" . M.A_closure" {
      M.A_closure:
@@ -362,14 +105,14 @@
 
 ==================== Output Cmm ====================
 [M.A_con_entry() { //  []
-         { info_tbls: [(cC5,
+         { info_tbls: [(cDt,
                         label: M.A_con_info
                         rep: HeapRep 1 nonptrs { Con {tag: 0 descr:"main:M.A"} }
                         srt: Nothing)]
            stack_info: arg_space: 8
          }
      {offset
-       cC5: // global
+       cDt: // global
            R1 = R1 + 1;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
      }
@@ -379,14 +122,14 @@
 
 ==================== Output Cmm ====================
 [M.B_con_entry() { //  []
-         { info_tbls: [(cCa,
+         { info_tbls: [(cDy,
                         label: M.B_con_info
                         rep: HeapRep 1 nonptrs { Con {tag: 1 descr:"main:M.B"} }
                         srt: Nothing)]
            stack_info: arg_space: 8
          }
      {offset
-       cCa: // global
+       cDy: // global
            R1 = R1 + 2;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
      }
@@ -396,14 +139,14 @@
 
 ==================== Output Cmm ====================
 [M.C_con_entry() { //  []
-         { info_tbls: [(cCf,
+         { info_tbls: [(cDD,
                         label: M.C_con_info
                         rep: HeapRep 1 nonptrs { Con {tag: 2 descr:"main:M.C"} }
                         srt: Nothing)]
            stack_info: arg_space: 8
          }
      {offset
-       cCf: // global
+       cDD: // global
            R1 = R1 + 3;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
      }
@@ -413,14 +156,14 @@
 
 ==================== Output Cmm ====================
 [M.D_con_entry() { //  []
-         { info_tbls: [(cCk,
+         { info_tbls: [(cDI,
                         label: M.D_con_info
                         rep: HeapRep 1 nonptrs { Con {tag: 3 descr:"main:M.D"} }
                         srt: Nothing)]
            stack_info: arg_space: 8
          }
      {offset
-       cCk: // global
+       cDI: // global
            R1 = R1 + 4;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
      }
@@ -430,14 +173,14 @@
 
 ==================== Output Cmm ====================
 [M.E_con_entry() { //  []
-         { info_tbls: [(cCp,
+         { info_tbls: [(cDN,
                         label: M.E_con_info
                         rep: HeapRep 1 nonptrs { Con {tag: 4 descr:"main:M.E"} }
                         srt: Nothing)]
            stack_info: arg_space: 8
          }
      {offset
-       cCp: // global
+       cDN: // global
            R1 = R1 + 5;   // CmmAssign
            call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
      }


=====================================
testsuite/tests/linters/notes.stdout
=====================================
@@ -1,41 +1,40 @@
-ref    compiler/GHC/Core/Coercion/Axiom.hs:463:2:     Note [RoughMap and rm_empty]
-ref    compiler/GHC/Core/Opt/OccurAnal.hs:983:7:     Note [Loop breaking]
-ref    compiler/GHC/Core/Opt/SetLevels.hs:1574:30:     Note [Top level scope]
-ref    compiler/GHC/Core/Opt/Simplify/Iteration.hs:2826:13:     Note [Case binder next]
-ref    compiler/GHC/Core/Opt/Simplify/Iteration.hs:4018:8:     Note [Lambda-bound unfoldings]
-ref    compiler/GHC/Core/Opt/Simplify/Utils.hs:1343:37:     Note [Gentle mode]
-ref    compiler/GHC/Core/Opt/Specialise.hs:1765:29:     Note [Arity decrease]
-ref    compiler/GHC/Core/TyCo/Rep.hs:1565:31:     Note [What prevents a constraint from floating]
-ref    compiler/GHC/Driver/DynFlags.hs:1245:49:     Note [Eta-reduction in -O0]
-ref    compiler/GHC/Driver/Main.hs:1762:34:     Note [simpleTidyPgm - mkBootModDetailsTc]
-ref    compiler/GHC/Hs/Expr.hs:194:63:     Note [Pending Splices]
-ref    compiler/GHC/Hs/Expr.hs:1738:87:     Note [Lifecycle of a splice]
-ref    compiler/GHC/Hs/Expr.hs:1774:7:     Note [Pending Splices]
-ref    compiler/GHC/Hs/Extension.hs:146:5:     Note [Strict argument type constraints]
-ref    compiler/GHC/Hs/Pat.hs:143:74:     Note [Lifecycle of a splice]
-ref    compiler/GHC/HsToCore/Pmc/Solver.hs:858:20:     Note [COMPLETE sets on data families]
-ref    compiler/GHC/HsToCore/Quote.hs:1476:7:     Note [How brackets and nested splices are handled]
-ref    compiler/GHC/Stg/Unarise.hs:442:32:     Note [Renaming during unarisation]
-ref    compiler/GHC/StgToCmm/Expr.hs:585:4:     Note [case on bool]
-ref    compiler/GHC/StgToCmm/Expr.hs:853:3:     Note [alg-alt heap check]
+ref    compiler/GHC/Core/Coercion/Axiom.hs:472:2:     Note [RoughMap and rm_empty]
+ref    compiler/GHC/Core/Opt/OccurAnal.hs:1157:7:     Note [Loop breaking]
+ref    compiler/GHC/Core/Opt/SetLevels.hs:1586:30:     Note [Top level scope]
+ref    compiler/GHC/Core/Opt/Simplify/Iteration.hs:2832:13:     Note [Case binder next]
+ref    compiler/GHC/Core/Opt/Simplify/Iteration.hs:4023:8:     Note [Lambda-bound unfoldings]
+ref    compiler/GHC/Core/Opt/Simplify/Utils.hs:1342:37:     Note [Gentle mode]
+ref    compiler/GHC/Core/Opt/Specialise.hs:1763:29:     Note [Arity decrease]
+ref    compiler/GHC/Core/TyCo/Rep.hs:1652:31:     Note [What prevents a constraint from floating]
+ref    compiler/GHC/Driver/DynFlags.hs:1251:52:     Note [Eta-reduction in -O0]
+ref    compiler/GHC/Driver/Main.hs:1749:34:     Note [simpleTidyPgm - mkBootModDetailsTc]
+ref    compiler/GHC/Hs/Expr.hs:191:63:     Note [Pending Splices]
+ref    compiler/GHC/Hs/Expr.hs:1727:87:     Note [Lifecycle of a splice]
+ref    compiler/GHC/Hs/Expr.hs:1763:7:     Note [Pending Splices]
+ref    compiler/GHC/Hs/Extension.hs:147:5:     Note [Strict argument type constraints]
+ref    compiler/GHC/Hs/Pat.hs:141:74:     Note [Lifecycle of a splice]
+ref    compiler/GHC/HsToCore/Pmc/Solver.hs:856:20:     Note [COMPLETE sets on data families]
+ref    compiler/GHC/HsToCore/Quote.hs:1487:7:     Note [How brackets and nested splices are handled]
+ref    compiler/GHC/Stg/Unarise.hs:438:32:     Note [Renaming during unarisation]
+ref    compiler/GHC/StgToCmm/Expr.hs:578:4:     Note [case on bool]
 ref    compiler/GHC/Tc/Gen/HsType.hs:556:56:     Note [Skolem escape prevention]
-ref    compiler/GHC/Tc/Gen/HsType.hs:2621:7:     Note [Matching a kind signature with a declaration]
-ref    compiler/GHC/Tc/Gen/Pat.hs:176:20:     Note [Typing patterns in pattern bindings]
-ref    compiler/GHC/Tc/Gen/Pat.hs:1127:7:     Note [Matching polytyped patterns]
-ref    compiler/GHC/Tc/Gen/Sig.hs:81:10:     Note [Overview of type signatures]
-ref    compiler/GHC/Tc/Gen/Splice.hs:356:16:     Note [How brackets and nested splices are handled]
-ref    compiler/GHC/Tc/Gen/Splice.hs:531:35:     Note [PendingRnSplice]
-ref    compiler/GHC/Tc/Gen/Splice.hs:655:7:     Note [How brackets and nested splices are handled]
-ref    compiler/GHC/Tc/Gen/Splice.hs:888:11:     Note [How brackets and nested splices are handled]
-ref    compiler/GHC/Tc/Instance/Family.hs:474:35:     Note [Constrained family instances]
-ref    compiler/GHC/Tc/Solver/Rewrite.hs:1009:7:     Note [Stability of rewriting]
-ref    compiler/GHC/Tc/TyCl.hs:1130:6:     Note [Unification variables need fresh Names]
-ref    compiler/GHC/Tc/Types/Constraint.hs:226:34:     Note [NonCanonical Semantics]
-ref    compiler/GHC/Types/Demand.hs:302:25:     Note [Preserving Boxity of results is rarely a win]
-ref    compiler/GHC/Unit/Module/Deps.hs:81:13:     Note [Structure of dep_boot_mods]
+ref    compiler/GHC/Tc/Gen/HsType.hs:2676:7:     Note [Matching a kind signature with a declaration]
+ref    compiler/GHC/Tc/Gen/Pat.hs:174:20:     Note [Typing patterns in pattern bindings]
+ref    compiler/GHC/Tc/Gen/Pat.hs:1163:7:     Note [Matching polytyped patterns]
+ref    compiler/GHC/Tc/Gen/Sig.hs:80:10:     Note [Overview of type signatures]
+ref    compiler/GHC/Tc/Gen/Splice.hs:358:16:     Note [How brackets and nested splices are handled]
+ref    compiler/GHC/Tc/Gen/Splice.hs:533:35:     Note [PendingRnSplice]
+ref    compiler/GHC/Tc/Gen/Splice.hs:657:7:     Note [How brackets and nested splices are handled]
+ref    compiler/GHC/Tc/Gen/Splice.hs:891:11:     Note [How brackets and nested splices are handled]
+ref    compiler/GHC/Tc/Instance/Family.hs:406:35:     Note [Constrained family instances]
+ref    compiler/GHC/Tc/Solver/Rewrite.hs:1010:7:     Note [Stability of rewriting]
+ref    compiler/GHC/Tc/TyCl.hs:1316:6:     Note [Unification variables need fresh Names]
+ref    compiler/GHC/Tc/Types/Constraint.hs:206:38:     Note [NonCanonical Semantics]
+ref    compiler/GHC/Types/Demand.hs:301:25:     Note [Preserving Boxity of results is rarely a win]
+ref    compiler/GHC/Unit/Module/Deps.hs:83:13:     Note [Structure of dep_boot_mods]
 ref    compiler/GHC/Utils/Monad.hs:410:34:     Note [multiShotIO]
 ref    compiler/Language/Haskell/Syntax/Binds.hs:200:31:     Note [fun_id in Match]
-ref    configure.ac:210:10:     Note [Linking ghc-bin against threaded stage0 RTS]
+ref    configure.ac:203:10:     Note [Linking ghc-bin against threaded stage0 RTS]
 ref    docs/core-spec/core-spec.mng:177:6:     Note [TyBinders]
 ref    hadrian/src/Expression.hs:145:30:     Note [Linking ghc-bin against threaded stage0 RTS]
 ref    linters/lint-notes/Notes.hs:32:29:     Note [" <> T.unpack x <> "]


=====================================
testsuite/tests/simplCore/should_compile/T22375.hs
=====================================
@@ -1,12 +1,19 @@
 module T22375 where
 
-data X = A | B | C | D | E
+data X
+  = A | B | C | D | E
+  | F | G | H | I | J
   deriving Eq
 
 f :: X -> Int -> Int
 f x v
-  | x == A = 1 + v
-  | x == B = 2 + v
-  | x == C = 3 + v
-  | x == D = 4 + v
-  | otherwise = 5 + v
+  | x == A = v + 1
+  | x == B = v + 2
+  | x == C = v + 3
+  | x == D = v + 4
+  | x == E = v + 5
+  | x == F = v + 6
+  | x == G = v + 7
+  | x == H = v + 8
+  | x == I = v + 9
+  | otherwise = v + 10


=====================================
testsuite/tests/simplCore/should_compile/T22375.stderr
=====================================
@@ -1,7 +1,7 @@
 
 ==================== Tidy Core ====================
 Result size of Tidy Core
-  = {terms: 76, types: 41, coercions: 0, joins: 0/0}
+  = {terms: 96, types: 41, coercions: 0, joins: 0/0}
 
 -- RHS size: {terms: 14, types: 9, coercions: 0, joins: 0/0}
 T22375.$fEqX_$c== :: X -> X -> Bool
@@ -50,22 +50,27 @@ T22375.$fEqX [InlPrag=CONLIKE] :: Eq X
 T22375.$fEqX
   = GHC.Classes.C:Eq @X T22375.$fEqX_$c== T22375.$fEqX_$c/=
 
--- RHS size: {terms: 24, types: 3, coercions: 0, joins: 0/0}
+-- RHS size: {terms: 44, types: 3, coercions: 0, joins: 0/0}
 T22375.$wf [InlPrag=[2]] :: X -> GHC.Prim.Int# -> GHC.Prim.Int#
 [GblId[StrictWorker([!])],
  Arity=2,
  Str=<1L><L>,
  Unf=Unf{Src=<vanilla>, TopLvl=True,
          Value=True, ConLike=True, WorkFree=True, Expandable=True,
-         Guidance=IF_ARGS [64 0] 55 0}]
+         Guidance=IF_ARGS [119 0] 110 0}]
 T22375.$wf
   = \ (x :: X) (ww :: GHC.Prim.Int#) ->
       case x of {
-        A -> GHC.Prim.+# 1# ww;
-        B -> GHC.Prim.+# 2# ww;
-        C -> GHC.Prim.+# 3# ww;
-        D -> GHC.Prim.+# 4# ww;
-        E -> GHC.Prim.+# 5# ww
+        A -> GHC.Prim.+# ww 1#;
+        B -> GHC.Prim.+# ww 2#;
+        C -> GHC.Prim.+# ww 3#;
+        D -> GHC.Prim.+# ww 4#;
+        E -> GHC.Prim.+# ww 5#;
+        F -> GHC.Prim.+# ww 6#;
+        G -> GHC.Prim.+# ww 7#;
+        H -> GHC.Prim.+# ww 8#;
+        I -> GHC.Prim.+# ww 9#;
+        J -> GHC.Prim.+# ww 10#
       }
 
 -- RHS size: {terms: 12, types: 5, coercions: 0, joins: 0/0}


=====================================
testsuite/tests/simplCore/should_compile/T22375DataFamily.hs
=====================================
@@ -6,13 +6,20 @@ import Data.Kind
 
 type X :: Type -> Type
 data family X a
-data instance X () = A | B | C | D | E
+data instance X ()
+  = A | B | C | D | E
+  | F | G | H | I | J
   deriving Eq
 
 f :: X () -> Int -> Int
 f x v
-  | x == A = 1 + v
-  | x == B = 2 + v
-  | x == C = 3 + v
-  | x == D = 4 + v
-  | otherwise = 5 + v
+  | x == A = v + 1
+  | x == B = v + 2
+  | x == C = v + 3
+  | x == D = v + 4
+  | x == E = v + 5
+  | x == F = v + 6
+  | x == G = v + 7
+  | x == H = v + 8
+  | x == I = v + 9
+  | otherwise = v + 10


=====================================
testsuite/tests/simplCore/should_compile/T22375DataFamily.stderr
=====================================
@@ -1,7 +1,7 @@
 
 ==================== Tidy Core ====================
 Result size of Tidy Core
-  = {terms: 86, types: 65, coercions: 15, joins: 0/0}
+  = {terms: 116, types: 75, coercions: 25, joins: 0/0}
 
 -- RHS size: {terms: 1, types: 0, coercions: 2, joins: 0/0}
 T22375DataFamily.$WA [InlPrag=INLINE[final] CONLIKE] :: X ()
@@ -58,6 +58,61 @@ T22375DataFamily.$WE
     `cast` (Sym (T22375DataFamily.D:R:XUnit0[0])
             :: T22375DataFamily.R:XUnit ~R# X ())
 
+-- RHS size: {terms: 1, types: 0, coercions: 2, joins: 0/0}
+T22375DataFamily.$WF [InlPrag=INLINE[final] CONLIKE] :: X ()
+[GblId[DataConWrapper],
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=False)}]
+T22375DataFamily.$WF
+  = T22375DataFamily.F
+    `cast` (Sym (T22375DataFamily.D:R:XUnit0[0])
+            :: T22375DataFamily.R:XUnit ~R# X ())
+
+-- RHS size: {terms: 1, types: 0, coercions: 2, joins: 0/0}
+T22375DataFamily.$WG [InlPrag=INLINE[final] CONLIKE] :: X ()
+[GblId[DataConWrapper],
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=False)}]
+T22375DataFamily.$WG
+  = T22375DataFamily.G
+    `cast` (Sym (T22375DataFamily.D:R:XUnit0[0])
+            :: T22375DataFamily.R:XUnit ~R# X ())
+
+-- RHS size: {terms: 1, types: 0, coercions: 2, joins: 0/0}
+T22375DataFamily.$WH [InlPrag=INLINE[final] CONLIKE] :: X ()
+[GblId[DataConWrapper],
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=False)}]
+T22375DataFamily.$WH
+  = T22375DataFamily.H
+    `cast` (Sym (T22375DataFamily.D:R:XUnit0[0])
+            :: T22375DataFamily.R:XUnit ~R# X ())
+
+-- RHS size: {terms: 1, types: 0, coercions: 2, joins: 0/0}
+T22375DataFamily.$WI [InlPrag=INLINE[final] CONLIKE] :: X ()
+[GblId[DataConWrapper],
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=False)}]
+T22375DataFamily.$WI
+  = T22375DataFamily.I
+    `cast` (Sym (T22375DataFamily.D:R:XUnit0[0])
+            :: T22375DataFamily.R:XUnit ~R# X ())
+
+-- RHS size: {terms: 1, types: 0, coercions: 2, joins: 0/0}
+T22375DataFamily.$WJ [InlPrag=INLINE[final] CONLIKE] :: X ()
+[GblId[DataConWrapper],
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=0,unsat_ok=True,boring_ok=False)}]
+T22375DataFamily.$WJ
+  = T22375DataFamily.J
+    `cast` (Sym (T22375DataFamily.D:R:XUnit0[0])
+            :: T22375DataFamily.R:XUnit ~R# X ())
+
 -- RHS size: {terms: 14, types: 11, coercions: 2, joins: 0/0}
 T22375DataFamily.$fEqX_$c== :: X () -> X () -> Bool
 [GblId,
@@ -133,7 +188,7 @@ T22375DataFamily.$fEqX
   = GHC.Classes.C:Eq
       @(X ()) T22375DataFamily.$fEqX_$c== T22375DataFamily.$fEqX_$c/=
 
--- RHS size: {terms: 24, types: 4, coercions: 1, joins: 0/0}
+-- RHS size: {terms: 44, types: 4, coercions: 1, joins: 0/0}
 T22375DataFamily.$wf [InlPrag=[2]]
   :: X () -> GHC.Prim.Int# -> GHC.Prim.Int#
 [GblId[StrictWorker([!])],
@@ -141,18 +196,23 @@ T22375DataFamily.$wf [InlPrag=[2]]
  Str=<1L><L>,
  Unf=Unf{Src=<vanilla>, TopLvl=True,
          Value=True, ConLike=True, WorkFree=True, Expandable=True,
-         Guidance=IF_ARGS [64 0] 55 0}]
+         Guidance=IF_ARGS [119 0] 110 0}]
 T22375DataFamily.$wf
   = \ (x :: X ()) (ww :: GHC.Prim.Int#) ->
       case x
            `cast` (T22375DataFamily.D:R:XUnit0[0]
                    :: X () ~R# T22375DataFamily.R:XUnit)
       of {
-        A -> GHC.Prim.+# 1# ww;
-        B -> GHC.Prim.+# 2# ww;
-        C -> GHC.Prim.+# 3# ww;
-        D -> GHC.Prim.+# 4# ww;
-        E -> GHC.Prim.+# 5# ww
+        A -> GHC.Prim.+# ww 1#;
+        B -> GHC.Prim.+# ww 2#;
+        C -> GHC.Prim.+# ww 3#;
+        D -> GHC.Prim.+# ww 4#;
+        E -> GHC.Prim.+# ww 5#;
+        F -> GHC.Prim.+# ww 6#;
+        G -> GHC.Prim.+# ww 7#;
+        H -> GHC.Prim.+# ww 8#;
+        I -> GHC.Prim.+# ww 9#;
+        J -> GHC.Prim.+# ww 10#
       }
 
 -- RHS size: {terms: 12, types: 6, coercions: 0, joins: 0/0}



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/de1c039cbf4ed8bb07d33fb0768e55a8e79ef3fd...bfbd33d9a3af8c2866d1215937957a0a0c4d1457

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/de1c039cbf4ed8bb07d33fb0768e55a8e79ef3fd...bfbd33d9a3af8c2866d1215937957a0a0c4d1457
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231209/7df92103/attachment-0001.html>


More information about the ghc-commits mailing list