[Git][ghc/ghc][wip/T18154] Don't attach CPR signatures to NOINLINE data structures (#18154)

Sebastian Graf gitlab at gitlab.haskell.org
Wed Sep 30 14:12:22 UTC 2020



Sebastian Graf pushed to branch wip/T18154 at Glasgow Haskell Compiler / GHC


Commits:
57c598ac by Sebastian Graf at 2020-09-30T16:11:33+02:00
Don't attach CPR signatures to NOINLINE data structures (#18154)

Because the generated `KindRep`s don't have an unfolding, !3230 did not
actually stop to compute, attach and serialise unnecessary CPR
signatures for them. As already said in
`Note [CPR for data structures]`, that leads to bloated interface
files which is ultimately quadratic for Nested CPR.

So we don't attach any CPR signature to bindings that

  * Are not thunks (because thunks are not in WHNF)
  * Have arity 0 (which means the top-level constructor is not a lambda)

If the data structure has an unfolding, we continue to look through it.
If not (as is the case for `KindRep`s), we look at the unchanged CPR
signature and see `topCprType`, as expected.

- - - - -


2 changed files:

- compiler/GHC/Core/Opt/CprAnal.hs
- testsuite/tests/simplCore/should_compile/T7360.stderr


Changes:

=====================================
compiler/GHC/Core/Opt/CprAnal.hs
=====================================
@@ -118,9 +118,9 @@ cprAnalTopBind :: AnalEnv
                -> CoreBind
                -> (AnalEnv, CoreBind)
 cprAnalTopBind env (NonRec id rhs)
-  = (extendAnalEnv env id' (idCprInfo id'), NonRec id' rhs')
+  = (env', NonRec id' rhs')
   where
-    (id', rhs') = cprAnalBind TopLevel env id rhs
+    (id', rhs', env') = cprAnalBind TopLevel env id rhs
 
 cprAnalTopBind env (Rec pairs)
   = (env', Rec pairs')
@@ -178,7 +178,7 @@ cprAnal' env (Lam var body)
   | otherwise
   = (lam_ty, Lam var body')
   where
-    env'             = extendAnalEnvForDemand env var (idDemandInfo var)
+    env'             = extendSigEnvForDemand env var (idDemandInfo var)
     (body_ty, body') = cprAnal env' body
     lam_ty           = abstractCprTy body_ty
 
@@ -194,9 +194,8 @@ cprAnal' env (Case scrut case_bndr ty alts)
 cprAnal' env (Let (NonRec id rhs) body)
   = (body_ty, Let (NonRec id' rhs') body')
   where
-    (id', rhs')      = cprAnalBind NotTopLevel env id rhs
-    env'             = extendAnalEnv env id' (idCprInfo id')
-    (body_ty, body') = cprAnal env' body
+    (id', rhs', env') = cprAnalBind NotTopLevel env id rhs
+    (body_ty, body')  = cprAnal env' body
 
 cprAnal' env (Let (Rec pairs) body)
   = body_ty `seq` (body_ty, Let (Rec pairs') body')
@@ -233,15 +232,15 @@ cprTransform env id
     sig
   where
     sig
-      -- See Note [CPR for expandable unfoldings]
-      | Just rhs <- cprExpandUnfolding_maybe id
+      -- Top-level binding, local let-binding or case binder
+      | Just sig <- lookupSigEnv env id
+      = getCprSig sig
+      -- See Note [CPR for data structures]
+      | Just rhs <- cprDataStructureUnfolding_maybe id
       = fst $ cprAnal env rhs
       -- Imported function or data con worker
       | isGlobalId id
       = getCprSig (idCprInfo id)
-      -- Local let-bound
-      | Just sig <- lookupSigEnv env id
-      = getCprSig sig
       | otherwise
       = topCprType
 
@@ -251,46 +250,43 @@ cprTransform env id
 
 -- Recursive bindings
 cprFix :: TopLevelFlag
-       -> AnalEnv                            -- Does not include bindings for this binding
+       -> AnalEnv                    -- Does not include bindings for this binding
        -> [(Id,CoreExpr)]
-       -> (AnalEnv, [(Id,CoreExpr)]) -- Binders annotated with stricness info
-
-cprFix top_lvl env orig_pairs
-  = loop 1 initial_pairs
+       -> (AnalEnv, [(Id,CoreExpr)]) -- Binders annotated with CPR info
+cprFix top_lvl orig_env orig_pairs
+  = loop 1 init_env init_pairs
   where
-    bot_sig = mkCprSig 0 botCpr
+    init_sig id rhs
+      -- See Note [CPR for data structures]
+      | isDataStructure id rhs = topCprSig
+      | otherwise              = mkCprSig 0 botCpr
     -- See Note [Initialising strictness] in GHC.Core.Opt.DmdAnal
-    initial_pairs | ae_virgin env = [(setIdCprInfo id bot_sig, rhs) | (id, rhs) <- orig_pairs ]
-                  | otherwise     = orig_pairs
-
-    -- The fixed-point varies the idCprInfo field of the binders, and terminates if that
-    -- annotation does not change any more.
-    loop :: Int -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)])
-    loop n pairs
-      | found_fixpoint = (final_anal_env, pairs')
-      | otherwise      = loop (n+1) pairs'
-      where
-        found_fixpoint    = map (idCprInfo . fst) pairs' == map (idCprInfo . fst) pairs
-        first_round       = n == 1
-        pairs'            = step first_round pairs
-        final_anal_env    = extendAnalEnvs env (map fst pairs')
-
-    step :: Bool -> [(Id, CoreExpr)] -> [(Id, CoreExpr)]
-    step first_round pairs = pairs'
+    orig_virgin = ae_virgin orig_env
+    init_pairs | orig_virgin  = [(setIdCprInfo id (init_sig id rhs), rhs) | (id, rhs) <- orig_pairs ]
+               | otherwise    = orig_pairs
+    init_env = extendSigEnvList orig_env (map fst init_pairs)
+
+    -- The fixed-point varies the idCprInfo field of the binders and and their
+    -- entries in the AnalEnv, and terminates if that annotation does not change
+    -- any more.
+    loop :: Int -> AnalEnv -> [(Id,CoreExpr)] -> (AnalEnv, [(Id,CoreExpr)])
+    loop n env pairs
+      | found_fixpoint = (reset_env', pairs')
+      | otherwise      = loop (n+1) env' pairs'
       where
         -- In all but the first iteration, delete the virgin flag
-        start_env | first_round = env
-                  | otherwise   = nonVirgin env
-
-        start = extendAnalEnvs start_env (map fst pairs)
-
-        (_, pairs') = mapAccumL my_downRhs start pairs
-
-        my_downRhs env (id,rhs)
-          = (env', (id', rhs'))
+        -- See Note [Initialising strictness] in GHC.Core.Opt.DmdAnal
+        (env', pairs') = step (applyWhen (n/=1) nonVirgin env) pairs
+        -- Make sure we reset the virgin flag to what it was when we are stable
+        reset_env'     = env'{ ae_virgin = orig_virgin }
+        found_fixpoint = map (idCprInfo . fst) pairs' == map (idCprInfo . fst) pairs
+
+    step :: AnalEnv -> [(Id, CoreExpr)] -> (AnalEnv, [(Id, CoreExpr)])
+    step env pairs = mapAccumL go env pairs
+      where
+        go env (id, rhs) = (env', (id', rhs'))
           where
-            (id', rhs') = cprAnalBind top_lvl env id rhs
-            env'        = extendAnalEnv env id (idCprInfo id')
+            (id', rhs', env') = cprAnalBind top_lvl env id rhs
 
 -- | Process the RHS of the binding for a sensible arity, add the CPR signature
 -- to the Id, and augment the environment with the signature as well.
@@ -299,9 +295,13 @@ cprAnalBind
   -> AnalEnv
   -> Id
   -> CoreExpr
-  -> (Id, CoreExpr)
+  -> (Id, CoreExpr, AnalEnv)
 cprAnalBind top_lvl env id rhs
-  = (id', rhs')
+  -- See Note [CPR for data structures]
+  | isDataStructure id rhs
+  = (id,  rhs,  env) -- Data structure => no code => need to analyse rhs
+  | otherwise
+  = (id', rhs', env')
   where
     (rhs_ty, rhs')  = cprAnal env rhs
     -- possibly trim thunk CPR info
@@ -310,12 +310,11 @@ cprAnalBind top_lvl env id rhs
       | stays_thunk = trimCprTy rhs_ty
       -- See Note [CPR for sum types]
       | returns_sum = trimCprTy rhs_ty
-      -- See Note [CPR for expandable unfoldings]
-      | will_expand = topCprType
       | otherwise   = rhs_ty
     -- See Note [Arity trimming for CPR signatures]
-    sig             = mkCprSigForArity (idArity id) rhs_ty'
-    id'             = setIdCprInfo id sig
+    sig  = mkCprSigForArity (idArity id) rhs_ty'
+    id'  = setIdCprInfo id sig
+    env' = extendSigEnv env id sig
 
     -- See Note [CPR for thunks]
     stays_thunk = is_thunk && not_strict
@@ -325,15 +324,22 @@ cprAnalBind top_lvl env id rhs
     (_, ret_ty) = splitPiTys (idType id)
     not_a_prod  = isNothing (deepSplitProductType_maybe (ae_fam_envs env) ret_ty)
     returns_sum = not (isTopLevel top_lvl) && not_a_prod
-    -- See Note [CPR for expandable unfoldings]
-    will_expand = isJust (cprExpandUnfolding_maybe id)
 
-cprExpandUnfolding_maybe :: Id -> Maybe CoreExpr
-cprExpandUnfolding_maybe id = do
-  guard (idArity id == 0)
+isDataStructure :: Id -> CoreExpr -> Bool
+-- See Note [CPR for data structures]
+isDataStructure id rhs =
+  idArity id == 0 && exprIsHNF rhs
+
+-- | Returns an expandable unfolding
+-- (See Note [exprIsExpandable] in "GHC.Core.Utils") that has
+-- So effectively is a constructor application.
+cprDataStructureUnfolding_maybe :: Id -> Maybe CoreExpr
+cprDataStructureUnfolding_maybe id = do
   -- There are only FinalPhase Simplifier runs after CPR analysis
   guard (activeInFinalPhase (idInlineActivation id))
-  expandUnfolding_maybe (idUnfolding id)
+  unf <- expandUnfolding_maybe (idUnfolding id)
+  guard (isDataStructure id unf)
+  return unf
 
 {- Note [Arity trimming for CPR signatures]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -394,15 +400,15 @@ emptyAnalEnv fam_envs
   , ae_fam_envs = fam_envs
   }
 
--- | Extend an environment with the strictness IDs attached to the id
-extendAnalEnvs :: AnalEnv -> [Id] -> AnalEnv
-extendAnalEnvs env ids
+-- | Extend an environment with the CPR sigs attached to the id
+extendSigEnvList :: AnalEnv -> [Id] -> AnalEnv
+extendSigEnvList env ids
   = env { ae_sigs = sigs' }
   where
     sigs' = extendVarEnvList (ae_sigs env) [ (id, idCprInfo id) | id <- ids ]
 
-extendAnalEnv :: AnalEnv -> Id -> CprSig -> AnalEnv
-extendAnalEnv env id sig
+extendSigEnv :: AnalEnv -> Id -> CprSig -> AnalEnv
+extendSigEnv env id sig
   = env { ae_sigs = extendVarEnv (ae_sigs env) id sig }
 
 lookupSigEnv :: AnalEnv -> Id -> Maybe CprSig
@@ -411,17 +417,17 @@ lookupSigEnv env id = lookupVarEnv (ae_sigs env) id
 nonVirgin :: AnalEnv -> AnalEnv
 nonVirgin env = env { ae_virgin = False }
 
--- | A version of 'extendAnalEnv' for a binder of which we don't see the RHS
+-- | A version of 'extendSigEnv' for a binder of which we don't see the RHS
 -- needed to compute a 'CprSig' (e.g. lambdas and DataAlt field binders).
 -- In this case, we can still look at their demand to attach CPR signatures
 -- anticipating the unboxing done by worker/wrapper.
 -- See Note [CPR for binders that will be unboxed].
-extendAnalEnvForDemand :: AnalEnv -> Id -> Demand -> AnalEnv
-extendAnalEnvForDemand env id dmd
+extendSigEnvForDemand :: AnalEnv -> Id -> Demand -> AnalEnv
+extendSigEnvForDemand env id dmd
   | isId id
   , Just (_, DataConAppContext { dcac_dc = dc })
       <- wantToUnbox (ae_fam_envs env) has_inlineable_prag (idType id) dmd
-  = extendAnalEnv env id (CprSig (conCprType (dataConTag dc)))
+  = extendSigEnv env id (CprSig (conCprType (dataConTag dc)))
   | otherwise
   = env
   where
@@ -436,7 +442,7 @@ extendEnvForDataAlt :: AnalEnv -> CoreExpr -> Id -> DataCon -> [Var] -> AnalEnv
 extendEnvForDataAlt env scrut case_bndr dc bndrs
   = foldl' do_con_arg env' ids_w_strs
   where
-    env' = extendAnalEnv env case_bndr (CprSig case_bndr_ty)
+    env' = extendSigEnv env case_bndr (CprSig case_bndr_ty)
 
     ids_w_strs    = filter isId bndrs `zip` dataConRepStrictness dc
 
@@ -460,7 +466,7 @@ extendEnvForDataAlt env scrut case_bndr dc bndrs
        | is_var scrut
        -- See Note [Add demands for strict constructors] in GHC.Core.Opt.WorkWrap.Utils
        , let dmd = applyWhen (isMarkedStrict str) strictifyDmd (idDemandInfo id)
-       = extendAnalEnvForDemand env id dmd
+       = extendSigEnvForDemand env id dmd
        | otherwise
        = env
 
@@ -645,17 +651,17 @@ assumption is that error cases are rarely entered and we are diverging anyway,
 so WW doesn't hurt.
 
 Should we also trim CPR on DataCon application bindings?
-See Note [CPR for expandable unfoldings]!
+See Note [CPR for data structures]!
 
-Note [CPR for expandable unfoldings]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [CPR for data structures]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Long static data structures (whether top-level or not) like
 
   xs = x1 : xs1
   xs1 = x2 : xs2
   xs2 = x3 : xs3
 
-should not get CPR signatures, because they
+should not get CPR signatures (#18154), because they
 
   * Never get WW'd, so their CPR signature should be irrelevant after analysis
     (in fact the signature might even be harmful for that reason)
@@ -663,28 +669,52 @@ should not get CPR signatures, because they
   * Recording CPR on them blows up interface file sizes and is redundant with
     their unfolding. In case of Nested CPR, this blow-up can be quadratic!
 
-But we can't just stop giving DataCon application bindings the CPR property,
+Hence we don't analyse or annotate data structures in 'cprAnalBind'. To
+implement this, the isDataStructure guard is triggered for bindings that satisfy
+
+  (1) idArity id == 0 (otherwise it's a function)
+  (2) exprIsHNF rhs   (otherwise it's a thunk, Note [CPR for thunks] applies)
+
+But we can't just stop giving DataCon application bindings the CPR *property*,
 for example
 
-  fac 0 = 1
+  fac 0 = I# 1#
   fac n = n * fac (n-1)
 
 fac certainly has the CPR property and should be WW'd! But FloatOut will
 transform the first clause to
 
-  lvl = 1
+  lvl = I# 1#
   fac 0 = lvl
 
-If lvl doesn't have the CPR property, fac won't either. But lvl doesn't have a
-CPR signature to extrapolate into a CPR transformer ('cprTransform'). So
-instead we keep on cprAnal'ing through *expandable* unfoldings for these arity
-0 bindings via 'cprExpandUnfolding_maybe'.
+If lvl doesn't have the CPR property, fac won't either. But lvl is a data
+structure, and hence (see above) will not have a CPR signature. So instead, when
+'cprAnal' meets a variable lacking a CPR signature to extrapolate into a CPR
+transformer, 'cprTransform' instead tries to get its unfolding (via
+'cprDataStructureUnfolding_maybe'), and analyses that instead.
 
 In practice, GHC generates a lot of (nested) TyCon and KindRep bindings, one
-for each data declaration. It's wasteful to attach CPR signatures to each of
-them (and intractable in case of Nested CPR).
-
-Tracked by #18154.
+for each data declaration. They should not have CPR signatures (blow up!).
+
+There is a perhaps surprising special case: KindRep bindings satisfy
+'isDataStructure' (so no CPR signature), but are marked NOINLINE at the same
+time (see the noinline wrinkle in Note [Grand plan for Typeable]). So there is
+no unfolding for 'cprDataStructureUnfolding_maybe' to look through and we'll
+return topCprType. And that is fine! We should refrain to look through NOINLINE
+data structures in general, as a constructed product could never be exposed
+after WW.
+
+It's also worth pointing out how ad-hoc this is: If we instead had
+
+    f1 x = x:[]
+    f2 x = x : f1 x
+    f3 x = x : f2 x
+    ...
+
+we still give every function an every deepening CPR signature. But it's very
+uncommon to find code like this, whereas the long static data structures from
+the beginning of this Note are very common because of GHC's strategy of ANF'ing
+data structure RHSs.
 
 Note [CPR examples]
 ~~~~~~~~~~~~~~~~~~~~


=====================================
testsuite/tests/simplCore/should_compile/T7360.stderr
=====================================
@@ -92,7 +92,7 @@ T7360.$trModule
 
 -- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
 $krep :: GHC.Types.KindRep
-[GblId, Cpr=m1, Unf=OtherCon []]
+[GblId, Unf=OtherCon []]
 $krep
   = GHC.Types.KindRepTyConApp
       GHC.Types.$tcInt (GHC.Types.[] @GHC.Types.KindRep)
@@ -127,7 +127,7 @@ T7360.$tcFoo
 
 -- RHS size: {terms: 3, types: 1, coercions: 0, joins: 0/0}
 T7360.$tc'Foo4 [InlPrag=NOUSERINLINE[~]] :: GHC.Types.KindRep
-[GblId, Cpr=m1, Unf=OtherCon []]
+[GblId, Unf=OtherCon []]
 T7360.$tc'Foo4
   = GHC.Types.KindRepTyConApp
       T7360.$tcFoo (GHC.Types.[] @GHC.Types.KindRep)
@@ -190,7 +190,7 @@ T7360.$tc'Foo2
 
 -- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
 T7360.$tc'Foo9 [InlPrag=NOUSERINLINE[~]] :: GHC.Types.KindRep
-[GblId, Cpr=m4, Unf=OtherCon []]
+[GblId, Unf=OtherCon []]
 T7360.$tc'Foo9 = GHC.Types.KindRepFun $krep T7360.$tc'Foo4
 
 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/57c598acf4ea4627017ec680959e376e8c9c5b8d

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/57c598acf4ea4627017ec680959e376e8c9c5b8d
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200930/c33c9018/attachment-0001.html>


More information about the ghc-commits mailing list