[Git][ghc/ghc][wip/T24124] 2 commits: CorePrep: Attach evaldUnfolding to floats to detect more values

Sebastian Graf (@sgraf812) gitlab at gitlab.haskell.org
Fri Oct 27 18:51:00 UTC 2023



Sebastian Graf pushed to branch wip/T24124 at Glasgow Haskell Compiler / GHC


Commits:
db6b4d53 by Sebastian Graf at 2023-10-27T20:50:26+02:00
CorePrep: Attach evaldUnfolding to floats to detect more values

See `Note [Pin evaluatedness on floats]`.

- - - - -
18ee9661 by Sebastian Graf at 2023-10-27T20:50:50+02:00
CorePrep: Treat seq# and dataToTag# as strict functions (#24124)

See the new `Note [seq# magic]`.
I also implemented a new `Note [Flatten case-bind]`.

Fixes #24124.

- - - - -


5 changed files:

- compiler/GHC/Core/Opt/ConstantFold.hs
- compiler/GHC/CoreToStg/Prep.hs
- + testsuite/tests/core-to-stg/T24124.hs
- + testsuite/tests/core-to-stg/T24124.stderr
- testsuite/tests/core-to-stg/all.T


Changes:

=====================================
compiler/GHC/Core/Opt/ConstantFold.hs
=====================================
@@ -2028,6 +2028,9 @@ is:
      case e of <transformed alts>
   by GHC.Core.Opt.ConstantFold.caseRules; see Note [caseRules for dataToTag]
 
+* Similar to Note [seq# magic], we case-bind the arg of dataToTag# in
+  GHC.CoreToStg.Prep.
+
 See #15696 for a long saga.
 -}
 
@@ -2108,6 +2111,19 @@ Implementing seq#.  The compiler has magic for SeqOp in
 - Simplify.addEvals records evaluated-ness for the result; see
   Note [Adding evaluatedness info to pattern-bound variables]
   in GHC.Core.Opt.Simplify.Iteration
+
+- GHC.CoreToStg.Prep: Finally case-bind the arg of seq#, e.g.,
+
+    case seq# (f 13) s of (# r, s' #) -> ...
+    ==>
+    case f 13 of sat of __DEFAULT ->
+    case seq# sat s of (# r, s' #) -> ...,
+
+  encoding its call-by-value nature. Note that strictness analysis and the
+  Simplifier will never see this case binding, so #5129 as above is not an
+  issue. Plus, CorePrep never case-binds an argument variable anyway.
+  (Exploiting CbV-ness for argument vars is a job for tag inference, see
+  #15226.)
 -}
 
 seqRule :: RuleM CoreExpr


=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -70,6 +70,7 @@ import GHC.Types.Unique.Supply
 
 import Data.List        ( unfoldr )
 import Control.Monad
+import GHC.Builtin.PrimOps
 
 {-
 Note [CorePrep Overview]
@@ -679,9 +680,11 @@ cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
                else warnPprTrace True "CorePrep: silly extra arguments:" (ppr bndr) $
                                -- Note [Silly extra arguments]
                     (do { v <- newVar (idType bndr)
-                        ; let float = mkNonRecFloat env topDmd False v rhs2
+                        ; let float@(Float (NonRec v' _) _ _) =
+                                mkNonRecFloat env topDmd False v rhs2
+                        -- v' has demand info and possibly evaldUnfolding
                         ; return ( snocFloat floats2 float
-                                 , cpeEtaExpand arity (Var v)) })
+                                 , cpeEtaExpand arity (Var v')) })
 
         -- Wrap floating ticks
        ; let (floats4, rhs4) = wrapTicks floats3 rhs3
@@ -848,7 +851,11 @@ cpeRhsE env (Case scrut bndr ty alts)
                where err = mkImpossibleExpr ty "cpeRhsE: missing case alternative"
        ; alts'' <- mapM (sat_alt env') alts'
 
-       ; return (floats, Case scrut' bndr2 ty alts'') }
+       ; case alts'' of
+           [Alt DEFAULT _ rhs] -- See Note [Flatten case-binds]
+             | float <- mkNonRecFloat env evalDmd (isUnliftedType ty) bndr2 scrut'
+             -> return (snocFloat floats float, rhs)
+           _ -> return (floats, Case scrut' bndr2 ty alts'') }
   where
     sat_alt env (Alt con bs rhs)
        = do { (env2, bs') <- cpCloneBndrs env bs
@@ -1087,16 +1094,24 @@ cpeApp top_env expr
            ; mb_saturate hd app floats unsat_ticks depth }
         where
           depth = val_args args
-          stricts = case idDmdSig v of
-                            DmdSig (DmdType _ demands)
-                              | listLengthCmp demands depth /= GT -> demands
-                                    -- length demands <= depth
-                              | otherwise                         -> []
-                -- If depth < length demands, then we have too few args to
-                -- satisfy strictness  info so we have to  ignore all the
-                -- strictness info, e.g. + (error "urk")
-                -- Here, we can't evaluate the arg strictly, because this
-                -- partial application might be seq'd
+          stricts
+            | PrimOpId op _ <- idDetails v
+            , Just demands <- case op of
+                SeqOp       -> Just [evalDmd,topDmd] -- See Note [seq# magic]
+                DataToTagOp -> Just [evalDmd]        -- See Note [dataToTag# magic]
+                _           -> Nothing
+            , listLengthCmp demands depth /= GT -- length demands <= depth
+            = pprTrace "here" (ppr op $$ ppr args) demands
+
+            | DmdSig (DmdType _ demands) <- idDmdSig v
+            , listLengthCmp demands depth /= GT -- length demands <= depth
+            = demands
+            | otherwise
+            = [] -- If depth < length demands, then we have too few args to
+                 -- satisfy strictness  info so we have to  ignore all the
+                 -- strictness info, e.g. + (error "urk")
+                 -- Here, we can't evaluate the arg strictly, because this
+                 -- partial application might be seq'd
 
         -- We inlined into something that's not a var and has no args.
         -- Bounce it back up to cpeRhsE.
@@ -1484,8 +1499,10 @@ cpeArg env dmd arg
          else do { v <- newVar arg_ty
                  -- See Note [Eta expansion of arguments in CorePrep]
                  ; let arg3 = cpeEtaExpandArg env arg2
-                       arg_float = mkNonRecFloat env dmd is_unlifted v arg3
-                 ; return (snocFloat floats2 arg_float, varToCoreExpr v) }
+                       arg_float@(Float (NonRec v' _) _ _) =
+                         mkNonRecFloat env dmd is_unlifted v arg3
+                       -- v' has demand info and possibly evaldUnfolding
+                 ; return (snocFloat floats2 arg_float, varToCoreExpr v') }
        }
 
 cpeEtaExpandArg :: CorePrepEnv -> CoreArg -> CoreArg
@@ -1704,6 +1721,81 @@ Note [Pin demand info on floats]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We pin demand info on floated lets, so that we can see the one-shot thunks.
 
+Note [Pin evaluatedness on floats]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider a call to a CBV function, such as a DataCon worker with strict fields:
+
+  data T a = T !a
+  ... f (T e) ...
+
+During ANFisation, we will `mkNonRecFloat` for `e`, binding it to a
+fresh binder `sat`.
+Now there are two interesting cases:
+
+ 1. When `e=Just y` is a value, we will float `sat=Just y` as far as possible,
+    to top-level, even. It is important that we mark `sat` as evaluated (via
+    setting its unfolding to `evaldUnfolding`), otherwise we get a superfluous
+    thunk to carry out the field seq on T's field, because
+    `exprIsHNF sat == False`:
+
+      let sat = Just y in
+      let sat2 = case sat of x { __DEFAULT } -> T x in
+        -- NONONO, want just `sat2 = T x`
+      f sat2
+
+    This happened in $walexGetByte, where the thunk caused additional
+    allocation.
+
+ 2. Similarly, when `e` is not a value, we still know that it is strictly
+    evaluated. Hence it is going to be case-bound, and we anticipate that `sat`
+    will be a case binder which is *always* evaluated.
+    Hence in this case, we also mark `sat` as evaluated via its unfolding.
+    This happened in GHC.Linker.Deps.$wgetLinkDeps, where without
+    `evaldUnfolding` we ended up with this:
+
+      Word64Map = ... | Bin ... ... !Word64Map !Word64Map
+      case ... of { Word64Map.Bin a b l r ->
+      case insert ... of sat { __DEFAULT ->
+      case Word64Map.Bin a b l sat of sat2 { __DEFAULT ->
+      f sat2
+      }}}
+
+    Note that *the DataCon app `Bin a b l sat` was case-bound*, because it was
+    not detected to be a value according to `exprIsHNF`.
+    That is because the strict field `sat` lacked the `evaldUnfolding`,
+    although it ended up being case-bound.
+
+    There is one small wrinkle: It could be that `sat=insert ...` floats to
+    top-level, where it is not eagerly evaluated. In this case, we may not
+    give `sat` an `evaldUnfolding`. We detect this case by looking at the
+    `FloatInfo` of `sat=insert ...`: If it says `TopLvlFloatable`, we are
+    conservative and will not give `sat` an `evaldUnfolding`.
+
+TLDR; when creating a new float `sat=e` in `mkNonRecFloat`, propagate `sat` with
+an `evaldUnfolding` if either
+
+ 1. `e` is a value, or
+ 2. `sat=e` is case-bound, but won't float to top-level.
+
+Note [Flatten case-binds]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider the following program involving seq#:
+
+  data T a = T !a
+  ... seq# (case x of y { __DEFAULT -> T y }) s ...
+  ==> {ANFise, exploiting CbV-ness as in Note [seq# magic] and cpe_app}
+  ... case (case x of y { __DEFAULT -> T y }) of sat { __DEFAULT -> seq# sat s }
+
+(Why didn't the Simplifier float out `case x of y`? Because `seq#` is lazy;
+see Note [seq# magic].)
+Note the case-of-case. This is not bad per sé, but we can easily flatten
+this situation by calling `mkNonRecFloat` to create strict binding `y=x`:
+
+  ... case x of y { __DEFAULT -> let sat = T y in seq# sat s } ...
+
+where `T y` is simply let-bound, thus far less likely to confuse passes
+downstream.
+
 Note [Speculative evaluation]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Since call-by-value is much cheaper than call-by-need, we case-bind arguments
@@ -1984,9 +2076,8 @@ zipManyFloats = foldr zipFloats emptyFloats
 
 mkNonRecFloat :: CorePrepEnv -> Demand -> Bool -> Id -> CpeRhs -> FloatingBind
 mkNonRecFloat env dmd is_unlifted bndr rhs = -- pprTraceWith "mkNonRecFloat" ppr $
-  Float (NonRec bndr' rhs) bound info
+  Float (NonRec bndr2 rhs) bound info
   where
-    bndr' = setIdDemandInfo bndr dmd -- See Note [Pin demand info on floats]
     (bound,info)
       | is_lifted, is_hnf        = (LetBound, TopLvlFloatable)
           -- is_lifted: We currently don't allow unlifted values at the
@@ -2017,6 +2108,14 @@ mkNonRecFloat env dmd is_unlifted bndr rhs = -- pprTraceWith "mkNonRecFloat" ppr
     is_rec_call = (`elemUnVarSet` cpe_rec_ids env)
     is_data_con = isJust . isDataConId_maybe
 
+    bndr1 = bndr `setIdDemandInfo` dmd -- See Note [Pin demand info on floats]
+    bndr2
+      | is_hnf || (bound == CaseBound && info /= TopLvlFloatable)
+        -- See Note [Pin evaluatedness on floats]
+      = bndr1 `setIdUnfolding` evaldUnfolding
+      | otherwise
+      = bndr1
+
 -- | Wrap floats around an expression
 wrapBinds :: Floats -> CpeBody -> CpeBody
 wrapBinds floats body


=====================================
testsuite/tests/core-to-stg/T24124.hs
=====================================
@@ -0,0 +1,11 @@
+module T24124 where
+
+import Control.Exception
+
+data StrictPair a b = MkStrictPair !a !b
+
+testFun :: a -> b -> IO (StrictPair a b)
+testFun x y = do
+  x' <- evaluate x
+  evaluate (MkStrictPair x' y)
+  -- CorePrep should evaluate `MkStrictPair` and its fields strictly


=====================================
testsuite/tests/core-to-stg/T24124.stderr
=====================================
@@ -0,0 +1,193 @@
+
+==================== CorePrep ====================
+Result size of CorePrep
+  = {terms: 119, types: 108, coercions: 22, joins: 0/1}
+
+-- RHS size: {terms: 13, types: 8, coercions: 0, joins: 0/0}
+T24124.$WMkStrictPair [InlPrag=INLINE[final] CONLIKE]
+  :: forall a b. a %1 -> b %1 -> T24124.StrictPair a b
+[GblId[DataConWrapper], Arity=2, Str=<SL><SL>, Unf=OtherCon []]
+T24124.$WMkStrictPair
+  = \ (@a)
+      (@b)
+      (conrep [Occ=Once1] :: a)
+      (conrep1 [Occ=Once1] :: b) ->
+      case conrep of conrep2 [Occ=Once1, Dmd=SL] { __DEFAULT ->
+      case conrep1 of conrep3 [Occ=Once1, Dmd=SL] { __DEFAULT ->
+      T24124.MkStrictPair @a @b conrep2 conrep3
+      }
+      }
+
+-- RHS size: {terms: 20, types: 27, coercions: 0, joins: 0/1}
+T24124.testFun1
+  :: forall a b.
+     a
+     -> b
+     -> GHC.Prim.State# GHC.Prim.RealWorld
+     -> (# GHC.Prim.State# GHC.Prim.RealWorld, T24124.StrictPair a b #)
+[GblId, Arity=3, Str=<L><ML><L>, Unf=OtherCon []]
+T24124.testFun1
+  = \ (@a)
+      (@b)
+      (x [Occ=Once1] :: a)
+      (y [Occ=Once1, OS=OneShot] :: b)
+      (s [Occ=Once1, OS=OneShot]
+         :: GHC.Prim.State# GHC.Prim.RealWorld) ->
+      case GHC.Prim.seq# @a @GHC.Prim.RealWorld x s of
+      { (# ipv [Occ=Once1], ipv1 [Occ=Once1] #) ->
+      case y of conrep [Occ=Once1, Dmd=SL] { __DEFAULT ->
+      let {
+        sat [Occ=Once1, Dmd=SL] :: T24124.StrictPair a b
+        [LclId, Unf=OtherCon []]
+        sat = T24124.MkStrictPair @a @b ipv1 conrep } in
+      GHC.Prim.seq# @(T24124.StrictPair a b) @GHC.Prim.RealWorld sat ipv
+      }
+      }
+
+-- RHS size: {terms: 9, types: 8, coercions: 15, joins: 0/0}
+T24124.testFun
+  :: forall a b. a -> b -> GHC.Types.IO (T24124.StrictPair a b)
+[GblId, Arity=3, Str=<L><ML><L>, Unf=OtherCon []]
+T24124.testFun
+  = (\ (@a)
+       (@b)
+       (eta [Occ=Once1] :: a)
+       (eta [Occ=Once1] :: b)
+       (eta [Occ=Once1] :: GHC.Prim.State# GHC.Prim.RealWorld) ->
+       T24124.testFun1 @a @b eta eta eta)
+    `cast` (forall (a :: <*>_N) (b :: <*>_N).
+            <a>_R
+            %<'GHC.Types.Many>_N ->_R <b>_R
+            %<'GHC.Types.Many>_N ->_R Sym (GHC.Types.N:IO[0]
+                                               <T24124.StrictPair a b>_R)
+            :: (forall a b.
+                a
+                -> b
+                -> GHC.Prim.State# GHC.Prim.RealWorld
+                -> (# GHC.Prim.State# GHC.Prim.RealWorld, T24124.StrictPair a b #))
+               ~R# (forall a b. a -> b -> GHC.Types.IO (T24124.StrictPair a b)))
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T24124.$trModule4 :: GHC.Prim.Addr#
+[GblId, Unf=OtherCon []]
+T24124.$trModule4 = "main"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T24124.$trModule3 :: GHC.Types.TrName
+[GblId, Unf=OtherCon []]
+T24124.$trModule3 = GHC.Types.TrNameS T24124.$trModule4
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T24124.$trModule2 :: GHC.Prim.Addr#
+[GblId, Unf=OtherCon []]
+T24124.$trModule2 = "T24124"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T24124.$trModule1 :: GHC.Types.TrName
+[GblId, Unf=OtherCon []]
+T24124.$trModule1 = GHC.Types.TrNameS T24124.$trModule2
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+T24124.$trModule :: GHC.Types.Module
+[GblId, Unf=OtherCon []]
+T24124.$trModule
+  = GHC.Types.Module T24124.$trModule3 T24124.$trModule1
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+$krep :: GHC.Types.KindRep
+[GblId, Unf=OtherCon []]
+$krep = GHC.Types.KindRepVar 1#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+$krep1 :: GHC.Types.KindRep
+[GblId, Unf=OtherCon []]
+$krep1 = GHC.Types.KindRepVar 0#
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T24124.$tcStrictPair2 :: GHC.Prim.Addr#
+[GblId, Unf=OtherCon []]
+T24124.$tcStrictPair2 = "StrictPair"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T24124.$tcStrictPair1 :: GHC.Types.TrName
+[GblId, Unf=OtherCon []]
+T24124.$tcStrictPair1 = GHC.Types.TrNameS T24124.$tcStrictPair2
+
+-- RHS size: {terms: 7, types: 0, coercions: 0, joins: 0/0}
+T24124.$tcStrictPair :: GHC.Types.TyCon
+[GblId, Unf=OtherCon []]
+T24124.$tcStrictPair
+  = GHC.Types.TyCon
+      9300255393514929474#Word64
+      9110283622559092784#Word64
+      T24124.$trModule
+      T24124.$tcStrictPair1
+      0#
+      GHC.Types.krep$*->*->*
+
+-- RHS size: {terms: 3, types: 2, coercions: 0, joins: 0/0}
+$krep2 :: [GHC.Types.KindRep]
+[GblId, Unf=OtherCon []]
+$krep2
+  = GHC.Types.:
+      @GHC.Types.KindRep $krep (GHC.Types.[] @GHC.Types.KindRep)
+
+-- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
+$krep3 :: [GHC.Types.KindRep]
+[GblId, Unf=OtherCon []]
+$krep3 = GHC.Types.: @GHC.Types.KindRep $krep1 $krep2
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+$krep4 :: GHC.Types.KindRep
+[GblId, Unf=OtherCon []]
+$krep4 = GHC.Types.KindRepTyConApp T24124.$tcStrictPair $krep3
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+$krep5 :: GHC.Types.KindRep
+[GblId, Unf=OtherCon []]
+$krep5 = GHC.Types.KindRepFun $krep $krep4
+
+-- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
+T24124.$tc'MkStrictPair1 [InlPrag=[~]] :: GHC.Types.KindRep
+[GblId, Unf=OtherCon []]
+T24124.$tc'MkStrictPair1 = GHC.Types.KindRepFun $krep1 $krep5
+
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+T24124.$tc'MkStrictPair3 :: GHC.Prim.Addr#
+[GblId, Unf=OtherCon []]
+T24124.$tc'MkStrictPair3 = "'MkStrictPair"#
+
+-- RHS size: {terms: 2, types: 0, coercions: 0, joins: 0/0}
+T24124.$tc'MkStrictPair2 :: GHC.Types.TrName
+[GblId, Unf=OtherCon []]
+T24124.$tc'MkStrictPair2
+  = GHC.Types.TrNameS T24124.$tc'MkStrictPair3
+
+-- RHS size: {terms: 7, types: 0, coercions: 0, joins: 0/0}
+T24124.$tc'MkStrictPair :: GHC.Types.TyCon
+[GblId, Unf=OtherCon []]
+T24124.$tc'MkStrictPair
+  = GHC.Types.TyCon
+      2381261223169708323#Word64
+      3151447712495713176#Word64
+      T24124.$trModule
+      T24124.$tc'MkStrictPair2
+      2#
+      T24124.$tc'MkStrictPair1
+
+-- RHS size: {terms: 7, types: 6, coercions: 7, joins: 0/0}
+T24124.MkStrictPair [InlPrag=CONLIKE]
+  :: forall {a} {b}. a %1 -> b %1 -> T24124.StrictPair a b
+[GblId[DataCon], Arity=2, Caf=NoCafRefs, Unf=OtherCon []]
+T24124.MkStrictPair
+  = (\ (@a[sk:1])
+       (@b[sk:1])
+       (eta [Occ=Once1] :: a)
+       (eta [Occ=Once1] :: b) ->
+       T24124.MkStrictPair @a[sk:1] @b[sk:1] eta eta)
+    `cast` (<forall {a} {b}. a %1 -> b %1 -> T24124.StrictPair a b>_R
+            :: (forall {a} {b}. a %1 -> b %1 -> T24124.StrictPair a b)
+               ~R# (forall {a} {b}. a %1 -> b %1 -> T24124.StrictPair a b))
+
+
+


=====================================
testsuite/tests/core-to-stg/all.T
=====================================
@@ -3,3 +3,4 @@
 test('T19700', normal, compile, ['-O'])
 test('T23270', [grep_errmsg(r'patError')], compile, ['-O0 -dsuppress-uniques -ddump-prep'])
 test('T23914', normal, compile, ['-O'])
+test('T24124', [grep_errmsg(r'= .*MkStrictPair.*in')], compile, ['-O -dsuppress-uniques -ddump-prep'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/bc604a0f7f941536ba2bd3a375bc554de2431098...18ee96617f034b03ea418c0117e3318a0c1d8594

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/bc604a0f7f941536ba2bd3a375bc554de2431098...18ee96617f034b03ea418c0117e3318a0c1d8594
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20231027/a9d9bb18/attachment-0001.html>


More information about the ghc-commits mailing list