[Git][ghc/ghc][wip/T18223] Work in progress on #18223

Fri Aug 21 10:56:10 UTC 2020

Simon Peyton Jones pushed to branch wip/T18223 at Glasgow Haskell Compiler / GHC


Commits:
716b67d9 by Simon Peyton Jones at 2020-08-20T17:29:09+01:00
Work in progress on #18223

There are two main changes in here

1.  Stop Specialise specalising DFuns.  This cases a huge
    (and utterly unnecessary) blowup in program size in #18223.

    I also refactored the Specialise monad a bit... it was silly.

2.  Re-engineer eta-expansion (again).  The eta-expander was
    generating lots of intermediate stuff, which could be optimised
    away, but which choked the simplifier meanwhile.  Relatively
    easy to kill it off at source.

Needs some Notes etc, and perhaps two separate patches, but I want
to get this into CI and perf testing.

- - - - -


8 changed files:

- compiler/GHC/Core/Coercion.hs
- compiler/GHC/Core/Opt/Arity.hs
- compiler/GHC/Core/Opt/Simplify.hs
- compiler/GHC/Core/Opt/Simplify/Env.hs
- compiler/GHC/Core/Opt/Specialise.hs
- compiler/GHC/Core/Ppr.hs
- compiler/GHC/Core/SimpleOpt.hs
- compiler/GHC/Core/Subst.hs


Changes:

=====================================
compiler/GHC/Core/Coercion.hs
=====================================
@@ -31,7 +31,7 @@ module GHC.Core.Coercion (
         mkAxInstRHS, mkUnbranchedAxInstRHS,
         mkAxInstLHS, mkUnbranchedAxInstLHS,
         mkPiCo, mkPiCos, mkCoCast,
-        mkSymCo, mkTransCo, mkTransMCo,
+        mkSymCo, mkTransCo,
         mkNthCo, nthCoRole, mkLRCo,
         mkInstCo, mkAppCo, mkAppCos, mkTyConAppCo, mkFunCo,
         mkForAllCo, mkForAllCos, mkHomoForAllCos,
@@ -65,7 +65,8 @@ module GHC.Core.Coercion (
         pickLR,
 
         isGReflCo, isReflCo, isReflCo_maybe, isGReflCo_maybe, isReflexiveCo, isReflexiveCo_maybe,
-        isReflCoVar_maybe, isGReflMCo, coToMCo,
+        isReflCoVar_maybe, isGReflMCo,
+        coToMCo, mkTransMCo, mkTransMCoL,
 
         -- ** Coercion variables
         mkCoVar, isCoVar, coVarName, setCoVarName, setCoVarUnique,
@@ -288,6 +289,44 @@ tidyCoAxBndrsForUser init_env tcvs
                        ('_' : rest) -> all isDigit rest
                        _            -> False
 
+
+{- *********************************************************************
+*                                                                      *
+              MCoercion
+*                                                                      *
+********************************************************************* -}
+
+coToMCo :: Coercion -> MCoercion
+-- Convert a coercion to a MCoercion,
+-- checking aggressively for reflexivity
+coToMCo co | isReflexiveCo co = MRefl
+           | otherwise        = MCo co
+
+-- | Tests if this MCoercion is obviously generalized reflexive
+-- Guaranteed to work very quickly.
+isGReflMCo :: MCoercion -> Bool
+isGReflMCo MRefl = True
+isGReflMCo (MCo co) | isGReflCo co = True
+isGReflMCo _ = False
+
+-- | Make a generalized reflexive coercion
+mkGReflCo :: Role -> Type -> MCoercionN -> Coercion
+mkGReflCo r ty mco
+  | isGReflMCo mco = if r == Nominal then Refl ty
+                     else GRefl r ty MRefl
+  | otherwise    = GRefl r ty mco
+
+-- | Compose two MCoercions via transitivity
+mkTransMCo :: MCoercion -> MCoercion -> MCoercion
+mkTransMCo MRefl     co2       = co2
+mkTransMCo co1       MRefl     = co1
+mkTransMCo (MCo co1) (MCo co2) = MCo (mkTransCo co1 co2)
+
+mkTransMCoL :: MCoercion -> Coercion -> MCoercion
+mkTransMCoL MRefl     co2 = MCo co2
+mkTransMCoL (MCo co1) co2 = MCo (mkTransCo co1 co2)
+
+
 {-
 %************************************************************************
 %*                                                                      *
@@ -556,13 +595,6 @@ isGReflCo (GRefl{}) = True
 isGReflCo (Refl{})  = True -- Refl ty == GRefl N ty MRefl
 isGReflCo _         = False

--- | Tests if this MCoercion is obviously generalized reflexive
--- Guaranteed to work very quickly.
-isGReflMCo :: MCoercion -> Bool
-isGReflMCo MRefl = True
-isGReflMCo (MCo co) | isGReflCo co = True
-isGReflMCo _ = False
-
 -- | Tests if this coercion is obviously reflexive. Guaranteed to work
 -- very quickly. Sometimes a coercion can be reflexive, but not obviously
 -- so. c.f. 'isReflexiveCo'
@@ -603,10 +635,6 @@ isReflexiveCo_maybe co
   = Nothing
   where (Pair ty1 ty2, r) = coercionKindRole co
 
-coToMCo :: Coercion -> MCoercion
-coToMCo c = if isReflCo c
-  then MRefl
-  else MCo c
 
 {-
 %************************************************************************
@@ -669,13 +697,6 @@ role is bizarre and a caller should have to ask for this behavior explicitly.
 
 -}
 
--- | Make a generalized reflexive coercion
-mkGReflCo :: Role -> Type -> MCoercionN -> Coercion
-mkGReflCo r ty mco
-  | isGReflMCo mco = if r == Nominal then Refl ty
-                     else GRefl r ty MRefl
-  | otherwise    = GRefl r ty mco
-
 -- | Make a reflexive coercion
 mkReflCo :: Role -> Type -> Coercion
 mkReflCo Nominal ty = Refl ty
@@ -990,12 +1011,6 @@ mkTransCo (GRefl r t1 (MCo co1)) (GRefl _ _ (MCo co2))
   = GRefl r t1 (MCo $ mkTransCo co1 co2)
 mkTransCo co1 co2                 = TransCo co1 co2
 
--- | Compose two MCoercions via transitivity
-mkTransMCo :: MCoercion -> MCoercion -> MCoercion
-mkTransMCo MRefl     co2       = co2
-mkTransMCo co1       MRefl     = co1
-mkTransMCo (MCo co1) (MCo co2) = MCo (mkTransCo co1 co2)
-
 mkNthCo :: HasDebugCallStack
         => Role  -- The role of the coercion you're creating
         -> Int   -- Zero-indexed


=====================================
compiler/GHC/Core/Opt/Arity.hs
=====================================
@@ -15,10 +15,18 @@ module GHC.Core.Opt.Arity
    ( manifestArity, joinRhsArity, exprArity, typeArity
    , exprEtaExpandArity, findRhsArity
    , etaExpand, etaExpandAT
-   , etaExpandToJoinPoint, etaExpandToJoinPointRule
    , exprBotStrictness_maybe
+
+        -- ** ArityType
    , ArityType(..), expandableArityType, arityTypeArity
    , maxWithArity, isBotArityType, idArityType
+
+        -- ** Join points
+   , etaExpandToJoinPoint, etaExpandToJoinPointRule
+
+        -- ** Coercions and casts
+   , pushCoArg, pushCoArgs, pushCoValArg, pushCoTyArg
+   , pushCoercionIntoLambda, pushCoDataCon, collectBindersPushingCo
    )
 where
 
@@ -31,15 +39,21 @@ import GHC.Driver.Ppr
 import GHC.Core
 import GHC.Core.FVs
 import GHC.Core.Utils
-import GHC.Core.Subst
 import GHC.Types.Demand
 import GHC.Types.Var
 import GHC.Types.Var.Env
 import GHC.Types.Id
-import GHC.Core.Type as Type
-import GHC.Core.TyCon     ( initRecTc, checkRecTc )
+
+-- We have two sorts of substitution:
+--   GHC.Core.Subst.Subst, and GHC.Core.TyCo.TCvSubst
+-- Both have substTy, substCo  Hence need for qualification
+import GHC.Core.Subst    as Core
+import GHC.Core.Type     as Type
+import GHC.Core.Coercion as Type
+
+import GHC.Core.DataCon
+import GHC.Core.TyCon     ( initRecTc, checkRecTc, tyConArity )
 import GHC.Core.Predicate ( isDictTy )
-import GHC.Core.Coercion as Coercion
 import GHC.Core.Multiplicity
 import GHC.Types.Var.Set
 import GHC.Types.Basic
@@ -48,7 +62,8 @@ import GHC.Driver.Session ( DynFlags, GeneralFlag(..), gopt )
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Data.FastString
-import GHC.Utils.Misc     ( lengthAtLeast )
+import GHC.Data.Pair
+import GHC.Utils.Misc
 
 {-
 ************************************************************************
@@ -1076,12 +1091,11 @@ eta_expand one_shots orig_expr
     go oss (Cast expr co) = Cast (go oss expr) co
 
     go oss expr
-      = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $
-        retick $ etaInfoAbs etas (etaInfoApp subst' sexpr etas)
+      = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, pprEtaInfos etas]) $
+        retick $ etaInfoAbs etas (etaInfoApp in_scope' sexpr etas)
       where
           in_scope = mkInScopeSet (exprFreeVars expr)
           (in_scope', etas) = mkEtaWW oss (ppr orig_expr) in_scope (exprType expr)
-          subst' = mkEmptySubst in_scope'
 
           -- Find ticks behind type apps.
           -- See Note [Eta expansion and source notes]
@@ -1090,76 +1104,190 @@ eta_expand one_shots orig_expr
           sexpr = foldl' App expr'' args
           retick expr = foldr mkTick expr ticks
 
-                                -- Abstraction    Application
+{- *********************************************************************
+*                                                                      *
+              The EtaInfo mechanism
+*                                                                      *
+************************************************************************
+
+{- Note [The EtaInfo mechanism]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have (e :: ty) and we want to eta-expand it to arity N.
+This what eta_expand does.  We do it in two steps:
+
+1.  From 'ty' and 'N' build a [EtaInfo] which describes the shape
+    of the expansion.
+
+2.  Build the term
+       \ v1..vn.  e v1 .. vn
+    where those abstractions and applications are described by
+    the same [EtaInfo].  Specifically we build the term
+
+       etaInfoAbs etas (etaInfoApp in_scope e etas)
+
+   where etas :: [EtaInfo]#
+         etaInfoAbs builds the lambdas
+         etaInfoApp builds the applictions
+
+   Note that the /same/ [EtaInfo] drives both etaInfoAbs and etaInfoApp
+
+To a first approximation [EtaInfo] is just [Var].  But
+casts complicate the question.  If we have
+   newtype N a = MkN (S -> a)
+and
+   ty = N (N Int)
+then the eta-expansion must look like
+        (\x (\y. ((e |> co1) x) |> co2) y)
+           |> sym co2)
+        |> sym co1
+where
+  co1 :: N (N Int) ~ S -> N Int
+  co2 :: N Int     ~ S -> Int
+
+Blimey!  Look at all those casts.  Moreover, if the type
+is very deeply nested (as happens in #18223), the repetition
+of types can make the overall term very large.  So there is a big
+payoff in cancelling out casts aggressively wherever possible.
+(See also Note [No crap in eta-expanded code].)
+
+This matters a lot in etaEInfoApp, where we
+* Do beta-reduction on the fly
+* Use getARg_mabye to get a cast out of the way,
+  so that we can do beta reduction
+Together this makes a big difference.  Consider when e is
+   case x of
+      True  -> (\x -> e1) |> c1
+      False -> (\p -> e2) |> c2
+
+When we eta-expand this to arity 1, say, etaInfoAbs will wrap
+a (\eta) around the outside and use etaInfoApp to apply each
+alternative to 'eta'.  We want to beta-reduce all that junk
+away.
+
+#18223 was a dramtic example in which the intermediate term was
+grotesquely huge, even though the next Simplifier iteration squashed
+it.  Better to kill it at birth.
+-}
+
 --------------
-data EtaInfo = EtaVar Var       -- /\a. []        [] a
-                                -- \x.  []        [] x
-             | EtaCo Coercion   -- [] |> sym co   [] |> co
+data EtaInfo            -- Abstraction      Application
+  = EtaVar Var          -- /\a. []         [] a
+                        -- (\x. [])        [] x
+  | EtaCo CoercionR     -- [] |> sym co    [] |> co
 
 instance Outputable EtaInfo where
-   ppr (EtaVar v) = text "EtaVar" <+> ppr v
-   ppr (EtaCo co) = text "EtaCo"  <+> ppr co
+   ppr (EtaVar v) = text "EtaVar" <+> ppr v  <+> dcolon <+> ppr (idType v)
+   ppr (EtaCo co) = text "EtaCo"  <+> hang (ppr co) 2 (dcolon <+> ppr (coercionType co))
+
+-- Used in debug-printing
+-- pprEtaInfos :: [EtaInfo] -> SDoc
+-- pprEtaInfos eis = brackets $ vcat $ punctuate comma $ map ppr eis
 
 pushCoercion :: Coercion -> [EtaInfo] -> [EtaInfo]
+-- Puts a EtaCo on the front of a [EtaInfo], but combining
+-- with an existing EtaCo if possible
+-- A minor improvement
 pushCoercion co1 (EtaCo co2 : eis)
   | isReflCo co = eis
   | otherwise   = EtaCo co : eis
   where
     co = co1 `mkTransCo` co2
 
-pushCoercion co eis = EtaCo co : eis
+pushCoercion co eis
+  = EtaCo co : eis
+
+getArg_maybe :: [EtaInfo] -> Maybe (CoreArg, [EtaInfo])
+-- Get an argument to the front of the [EtaInfo], if possible,
+-- by pushing any EtaCo through the argument
+getArg_maybe eis = go MRefl eis
+  where
+    go :: MCoercion -> [EtaInfo] -> Maybe (CoreArg, [EtaInfo])
+    go _         []                = Nothing
+    go mco       (EtaCo co2 : eis) = go (mkTransMCoL mco co2) eis
+    go MRefl     (EtaVar v : eis)  = Just (varToCoreExpr v, eis)
+    go (MCo co)  (EtaVar v : eis)
+      | Just (arg, mco) <- pushCoArg co (varToCoreExpr v)
+      = case mco of
+           MRefl  -> Just (arg, eis)
+           MCo co -> Just (arg, pushCoercion co eis)
+      | otherwise
+      = Nothing
+
+mkCastMCo :: CoreExpr -> MCoercionR -> CoreExpr
+mkCastMCo e MRefl    = e
+mkCastMCo e (MCo co) = Cast e co
+
+mkPiMCo :: Var -> MCoercionR -> MCoercionR
+mkPiMCo _  MRefl    = MRefl
+mkPiMCo v (MCo co) = MCo (mkPiCo Representational v co)
 
 --------------
 etaInfoAbs :: [EtaInfo] -> CoreExpr -> CoreExpr
-etaInfoAbs []               expr = expr
-etaInfoAbs (EtaVar v : eis) expr = Lam v (etaInfoAbs eis expr)
-etaInfoAbs (EtaCo co : eis) expr = Cast (etaInfoAbs eis expr) (mkSymCo co)
+etaInfoAbs eis expr
+  | null eis  = expr
+  | otherwise = case final_mco of
+                   MRefl  -> expr'
+                   MCo co -> mkCast expr' co
+  where
+     (expr', final_mco) = foldr do_one (split_cast expr) eis
+
+     do_one :: EtaInfo -> (CoreExpr, MCoercion) -> (CoreExpr, MCoercion)
+     do_one (EtaVar v) (expr, mco) = (Lam v expr, mkPiMCo v mco)
+     do_one (EtaCo co) (expr, mco) = (expr, mco `mkTransMCoL` mkSymCo co)
+
+     split_cast :: CoreExpr -> (CoreExpr, MCoercion)
+     split_cast (Cast e co) = (e, MCo co)
+     split_cast e           = (e, MRefl)
+     -- We could look in the body of lets, and the branches of a case
+     -- But then we would have to worry about whether the cast mentioned
+     -- any of the bound variables, which is tiresome. Later maybe.
+     -- Result: we may end up with
+     --     (\(x::Int). case x of { DEFAULT -> e1 |> co }) |> sym (<Int>->co)
+     -- and fail to optimise it away
 
 --------------
-etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
+etaInfoApp :: InScopeSet -> CoreExpr -> [EtaInfo] -> CoreExpr
 -- (etaInfoApp s e eis) returns something equivalent to
---             ((substExpr s e) `appliedto` eis)
+--             (substExpr s e `appliedto` eis)
 
-etaInfoApp subst (Lam v1 e) (EtaVar v2 : eis)
-  = etaInfoApp (GHC.Core.Subst.extendSubstWithVar subst v1 v2) e eis
-
-etaInfoApp subst (Cast e co1) eis
-  = etaInfoApp subst e (pushCoercion co' eis)
+etaInfoApp in_scope expr eis
+  = go (mkEmptySubst in_scope) expr eis
   where
-    co' = GHC.Core.Subst.substCo subst co1
+    go :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
+    -- 'go' pushed down the eta-infos into the branch of a case
+    -- and the body of a let; and does beta-reduction if possible
+    go subst (Tick t e) eis
+      = Tick (substTickish subst t) (go subst e eis)
+    go subst (Cast e co) eis
+      = go subst e (pushCoercion (Core.substCo subst co) eis)
+    go subst (Case e b ty alts) eis
+      = Case (Core.substExprSC subst e) b1 ty' alts'
+      where
+        (subst1, b1) = Core.substBndr subst b
+        alts' = map subst_alt alts
+        ty'   = etaInfoAppTy (Core.substTy subst ty) eis
+        subst_alt (con, bs, rhs) = (con, bs', go subst2 rhs eis)
+                 where
+                    (subst2,bs') = Core.substBndrs subst1 bs
+    go subst (Let b e) eis
+      | not (isJoinBind b) -- See Note [Eta expansion for join points]
+      = Let b' (go subst' e eis)
+      where
+        (subst', b') = Core.substBindSC subst b
 
-etaInfoApp subst (Case e b ty alts) eis
-  = Case (subst_expr subst e) b1 ty' alts'
-  where
-    (subst1, b1) = substBndr subst b
-    alts' = map subst_alt alts
-    ty'   = etaInfoAppTy (GHC.Core.Subst.substTy subst ty) eis
-    subst_alt (con, bs, rhs) = (con, bs', etaInfoApp subst2 rhs eis)
-              where
-                 (subst2,bs') = substBndrs subst1 bs
-
-etaInfoApp subst (Let b e) eis
-  | not (isJoinBind b)
-    -- See Note [Eta expansion for join points]
-  = Let b' (etaInfoApp subst' e eis)
-  where
-    (subst', b') = substBindSC subst b
+    -- Beta-reduction if possible, using getArg_maybe to push
+    -- any intervening casts past the argument
+    -- See Note [The EtaInfo mechansim]
+    go subst (Lam v e) eis
+      | Just (arg, eis') <- getArg_maybe eis
+      = go (Core.extendSubst subst v arg) e eis'
 
-etaInfoApp subst (Tick t e) eis
-  = Tick (substTickish subst t) (etaInfoApp subst e eis)
+    -- Stop pushing down; just wrap the expression up
+    go subst e eis = wrap (Core.substExprSC subst e) eis
 
-etaInfoApp subst expr _
-  | (Var fun, _) <- collectArgs expr
-  , Var fun' <- lookupIdSubst subst fun
-  , isJoinId fun'
-  = subst_expr subst expr
-
-etaInfoApp subst e eis
-  = go (subst_expr subst e) eis
-  where
-    go e []                  = e
-    go e (EtaVar v    : eis) = go (App e (varToCoreExpr v)) eis
-    go e (EtaCo co    : eis) = go (Cast e co) eis
+    wrap e []               = e
+    wrap e (EtaVar v : eis) = wrap (App e (varToCoreExpr v)) eis
+    wrap e (EtaCo co : eis) = wrap (Cast e co) eis
 
 
 --------------
@@ -1235,7 +1363,7 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty
        -- We want to get
        --      coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
        | Just (co, ty') <- topNormaliseNewType_maybe ty
-       , let co' = Coercion.substCo subst co
+       , let co' = Type.substCo subst co
              -- Remember to apply the substitution to co (#16979)
              -- (or we could have applied to ty, but then
              --  we'd have had to zap it for the recursive call)
@@ -1253,21 +1381,288 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty
         -- with an explicit lambda having a non-function type
 
 
+{- *********************************************************************
+*                                                                      *
+              The "push rules"
+*                                                                      *
+************************************************************************
+
+Here we implement the "push rules" from FC papers:
+
+* The push-argument rules, where we can move a coercion past an argument.
+  We have
+      (fun |> co) arg
+  and we want to transform it to
+    (fun arg') |> co'
+  for some suitable co' and transformed arg'.
+
+* The PushK rule for data constructors.  We have
+       (K e1 .. en) |> co
+  and we want to transform to
+       (K e1' .. en')
+  by pushing the coercion into the arguments
+-}
+
+pushCoArgs :: CoercionR -> [CoreArg] -> Maybe ([CoreArg], MCoercion)
+pushCoArgs co []         = return ([], MCo co)
+pushCoArgs co (arg:args) = do { (arg',  m_co1) <- pushCoArg  co  arg
+                              ; case m_co1 of
+                                  MCo co1 -> do { (args', m_co2) <- pushCoArgs co1 args
+                                                 ; return (arg':args', m_co2) }
+                                  MRefl  -> return (arg':args, MRefl) }
+
+pushCoArg :: CoercionR -> CoreArg -> Maybe (CoreArg, MCoercion)
+-- We have (fun |> co) arg, and we want to transform it to
+--         (fun arg) |> co
+-- This may fail, e.g. if (fun :: N) where N is a newtype
+-- C.f. simplCast in GHC.Core.Opt.Simplify
+-- 'co' is always Representational
+-- If the returned coercion is Nothing, then it would have been reflexive
+pushCoArg co (Type ty) = do { (ty', m_co') <- pushCoTyArg co ty
+                            ; return (Type ty', m_co') }
+pushCoArg co val_arg   = do { (arg_co, m_co') <- pushCoValArg co
+                            ; return (val_arg `mkCastMCo` arg_co, m_co') }
+
+pushCoTyArg :: CoercionR -> Type -> Maybe (Type, MCoercionR)
+-- We have (fun |> co) @ty
+-- Push the coercion through to return
+--         (fun @ty') |> co'
+-- 'co' is always Representational
+-- If the returned coercion is Nothing, then it would have been reflexive;
+-- it's faster not to compute it, though.
+pushCoTyArg co ty
+  -- The following is inefficient - don't do `eqType` here, the coercion
+  -- optimizer will take care of it. See #14737.
+  -- -- | tyL `eqType` tyR
+  -- -- = Just (ty, Nothing)
+
+  | isReflCo co
+  = Just (ty, MRefl)
+
+  | isForAllTy_ty tyL
+  = ASSERT2( isForAllTy_ty tyR, ppr co $$ ppr ty )
+    Just (ty `mkCastTy` co1, MCo co2)
+
+  | otherwise
+  = Nothing
+  where
+    Pair tyL tyR = coercionKind co
+       -- co :: tyL ~ tyR
+       -- tyL = forall (a1 :: k1). ty1
+       -- tyR = forall (a2 :: k2). ty2
+
+    co1 = mkSymCo (mkNthCo Nominal 0 co)
+       -- co1 :: k2 ~N k1
+       -- Note that NthCo can extract a Nominal equality between the
+       -- kinds of the types related by a coercion between forall-types.
+       -- See the NthCo case in GHC.Core.Lint.
+
+    co2 = mkInstCo co (mkGReflLeftCo Nominal ty co1)
+        -- co2 :: ty1[ (ty|>co1)/a1 ] ~ ty2[ ty/a2 ]
+        -- Arg of mkInstCo is always nominal, hence mkNomReflCo
+
+pushCoValArg :: CoercionR -> Maybe (MCoercionR, MCoercionR)
+-- We have (fun |> co) arg
+-- Push the coercion through to return
+--         (fun (arg |> co_arg)) |> co_res
+-- 'co' is always Representational
+-- If the second returned Coercion is actually Nothing, then no cast is necessary;
+-- the returned coercion would have been reflexive.
+pushCoValArg co
+  -- The following is inefficient - don't do `eqType` here, the coercion
+  -- optimizer will take care of it. See #14737.
+  -- -- | tyL `eqType` tyR
+  -- -- = Just (mkRepReflCo arg, Nothing)
+
+  | isReflCo co
+  = Just (MRefl, MRefl)
+
+  | isFunTy tyL
+  , (co_mult, co1, co2) <- decomposeFunCo Representational co
+  , isReflexiveCo co_mult
+    -- We can't push the coercion in the case where co_mult isn't reflexivity:
+    -- it could be an unsafe axiom, and losing this information could yield
+    -- ill-typed terms. For instance (fun x ::(1) Int -> (fun _ -> () |> co) x)
+    -- with co :: (Int -> ()) ~ (Int #-> ()), would reduce to (fun x ::(1) Int
+    -- -> (fun _ ::(Many) Int -> ()) x) which is ill-typed
+
+              -- If   co  :: (tyL1 -> tyL2) ~ (tyR1 -> tyR2)
+              -- then co1 :: tyL1 ~ tyR1
+              --      co2 :: tyL2 ~ tyR2
+  = ASSERT2( isFunTy tyR, ppr co $$ ppr arg )
+    Just (coToMCo (mkSymCo co1), coToMCo co2)
+    -- Use coToMCo to check for reflexivity; the whole coercion may not
+    -- be reflexive, but either of its components might be
+
+  | otherwise
+  = Nothing
+  where
+    arg = funArgTy tyR
+    Pair tyL tyR = coercionKind co
+
+pushCoercionIntoLambda
+    :: InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
+-- This implements the Push rule from the paper on coercions
+--    (\x. e) |> co
+-- ===>
+--    (\x'. e |> co')
+pushCoercionIntoLambda in_scope x e co
+    | ASSERT(not (isTyVar x) && not (isCoVar x)) True
+    , Pair s1s2 t1t2 <- coercionKind co
+    , Just (_, _s1,_s2) <- splitFunTy_maybe s1s2
+    , Just (w1, t1,_t2) <- splitFunTy_maybe t1t2
+    , (co_mult, co1, co2) <- decomposeFunCo Representational co
+    , isReflexiveCo co_mult
+      -- We can't push the coercion in the case where co_mult isn't
+      -- reflexivity. See pushCoValArg for more details.
+    = let
+          -- Should we optimize the coercions here?
+          -- Otherwise they might not match too well
+          x' = x `setIdType` t1 `setIdMult` w1
+          in_scope' = in_scope `extendInScopeSet` x'
+          subst = extendIdSubst (mkEmptySubst in_scope')
+                                x
+                                (mkCast (Var x') co1)
+      in Just (x', substExpr subst e `mkCast` co2)
+    | otherwise
+    = pprTrace "exprIsLambda_maybe: Unexpected lambda in case" (ppr (Lam x e))
+      Nothing
+
+pushCoDataCon :: DataCon -> [CoreExpr] -> Coercion
+              -> Maybe (DataCon
+                       , [Type]      -- Universal type args
+                       , [CoreExpr]) -- All other args incl existentials
+-- Implement the KPush reduction rule as described in "Down with kinds"
+-- The transformation applies iff we have
+--      (C e1 ... en) `cast` co
+-- where co :: (T t1 .. tn) ~ to_ty
+-- The left-hand one must be a T, because exprIsConApp returned True
+-- but the right-hand one might not be.  (Though it usually will.)
+pushCoDataCon dc dc_args co
+  | isReflCo co || from_ty `eqType` to_ty  -- try cheap test first
+  , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) dc_args
+  = Just (dc, map exprToType univ_ty_args, rest_args)
+
+  | Just (to_tc, to_tc_arg_tys) <- splitTyConApp_maybe to_ty
+  , to_tc == dataConTyCon dc
+        -- These two tests can fail; we might see
+        --      (C x y) `cast` (g :: T a ~ S [a]),
+        -- where S is a type function.  In fact, exprIsConApp
+        -- will probably not be called in such circumstances,
+        -- but there's nothing wrong with it
+
+  = let
+        tc_arity       = tyConArity to_tc
+        dc_univ_tyvars = dataConUnivTyVars dc
+        dc_ex_tcvars   = dataConExTyCoVars dc
+        arg_tys        = dataConRepArgTys dc
+
+        non_univ_args  = dropList dc_univ_tyvars dc_args
+        (ex_args, val_args) = splitAtList dc_ex_tcvars non_univ_args
+
+        -- Make the "Psi" from the paper
+        omegas = decomposeCo tc_arity co (tyConRolesRepresentational to_tc)
+        (psi_subst, to_ex_arg_tys)
+          = liftCoSubstWithEx Representational
+                              dc_univ_tyvars
+                              omegas
+                              dc_ex_tcvars
+                              (map exprToType ex_args)
+
+          -- Cast the value arguments (which include dictionaries)
+        new_val_args = zipWith cast_arg (map scaledThing arg_tys) val_args
+        cast_arg arg_ty arg = mkCast arg (psi_subst arg_ty)
+
+        to_ex_args = map Type to_ex_arg_tys
+
+        dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tcvars,
+                         ppr arg_tys, ppr dc_args,
+                         ppr ex_args, ppr val_args, ppr co, ppr from_ty, ppr to_ty, ppr to_tc
+                         , ppr $ mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args) ]
+    in
+    ASSERT2( eqType from_ty (mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args)), dump_doc )
+    ASSERT2( equalLength val_args arg_tys, dump_doc )
+    Just (dc, to_tc_arg_tys, to_ex_args ++ new_val_args)
+
+  | otherwise
+  = Nothing
+
+  where
+    Pair from_ty to_ty = coercionKind co
 
-------------
-subst_expr :: Subst -> CoreExpr -> CoreExpr
--- Apply a substitution to an expression.  We use substExpr
--- not substExprSC (short-cutting substitution) because
--- we may be changing the types of join points, so applying
--- the in-scope set is necessary.
+collectBindersPushingCo :: CoreExpr -> ([Var], CoreExpr)
+-- Collect lambda binders, pushing coercions inside if possible
+-- E.g.   (\x.e) |> g         g :: <Int> -> blah
+--        = (\x. e |> Nth 1 g)
+--
+-- That is,
 --
--- ToDo: we could instead check if we actually *are*
--- changing any join points' types, and if not use substExprSC.
-subst_expr = substExpr
+-- collectBindersPushingCo ((\x.e) |> g) === ([x], e |> Nth 1 g)
+collectBindersPushingCo e
+  = go [] e
+  where
+    -- Peel off lambdas until we hit a cast.
+    go :: [Var] -> CoreExpr -> ([Var], CoreExpr)
+    -- The accumulator is in reverse order
+    go bs (Lam b e)   = go (b:bs) e
+    go bs (Cast e co) = go_c bs e co
+    go bs e           = (reverse bs, e)
+
+    -- We are in a cast; peel off casts until we hit a lambda.
+    go_c :: [Var] -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
+    -- (go_c bs e c) is same as (go bs e (e |> c))
+    go_c bs (Cast e co1) co2 = go_c bs e (co1 `mkTransCo` co2)
+    go_c bs (Lam b e)    co  = go_lam bs b e co
+    go_c bs e            co  = (reverse bs, mkCast e co)
+
+    -- We are in a lambda under a cast; peel off lambdas and build a
+    -- new coercion for the body.
+    go_lam :: [Var] -> Var -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
+    -- (go_lam bs b e c) is same as (go_c bs (\b.e) c)
+    go_lam bs b e co
+      | isTyVar b
+      , let Pair tyL tyR = coercionKind co
+      , ASSERT( isForAllTy_ty tyL )
+        isForAllTy_ty tyR
+      , isReflCo (mkNthCo Nominal 0 co)  -- See Note [collectBindersPushingCo]
+      = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkTyVarTy b)))
+
+      | isCoVar b
+      , let Pair tyL tyR = coercionKind co
+      , ASSERT( isForAllTy_co tyL )
+        isForAllTy_co tyR
+      , isReflCo (mkNthCo Nominal 0 co)  -- See Note [collectBindersPushingCo]
+      , let cov = mkCoVarCo b
+      = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkCoercionTy cov)))
+
+      | isId b
+      , let Pair tyL tyR = coercionKind co
+      , ASSERT( isFunTy tyL) isFunTy tyR
+      , (co_mult, co_arg, co_res) <- decomposeFunCo Representational co
+      , isReflCo co_mult -- See Note [collectBindersPushingCo]
+      , isReflCo co_arg  -- See Note [collectBindersPushingCo]
+      = go_c (b:bs) e co_res
+
+      | otherwise = (reverse bs, mkCast (Lam b e) co)
 
+{-
 
---------------
+Note [collectBindersPushingCo]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We just look for coercions of form
+   <type> # w -> blah
+(and similarly for foralls) to keep this function simple.  We could do
+more elaborate stuff, but it'd involve substitution etc.
 
+-}
+
+{- *********************************************************************
+*                                                                      *
+                Join points
+*                                                                      *
+********************************************************************* -}
+
+-------------------
 -- | Split an expression into the given number of binders and a body,
 -- eta-expanding if necessary. Counts value *and* type binders.
 etaExpandToJoinPoint :: JoinArity -> CoreExpr -> ([CoreBndr], CoreExpr)
@@ -1307,7 +1702,7 @@ etaBodyForJoinPoint need_args body
       = (reverse rev_bs, e)
     go n ty subst rev_bs e
       | Just (tv, res_ty) <- splitForAllTy_maybe ty
-      , let (subst', tv') = Type.substVarBndr subst tv
+      , let (subst', tv') = substVarBndr subst tv
       = go (n-1) res_ty subst' (tv' : rev_bs) (e `App` varToCoreExpr tv')
       | Just (mult, arg_ty, res_ty) <- splitFunTy_maybe ty
       , let (subst', b) = freshEtaId n subst (Scaled mult arg_ty)
@@ -1318,6 +1713,8 @@ etaBodyForJoinPoint need_args body
 
     init_subst e = mkEmptyTCvSubst (mkInScopeSet (exprFreeVars e))
 
+
+
 --------------
 freshEtaId :: Int -> TCvSubst -> Scaled Type -> (TCvSubst, Id)
 -- Make a fresh Id, with specified type (after applying substitution)
@@ -1336,3 +1733,4 @@ freshEtaId n subst ty
                   -- "OrCoVar" since this can be used to eta-expand
                   -- coercion abstractions
         subst'  = extendTCvInScope subst eta_id'
+


=====================================
compiler/GHC/Core/Opt/Simplify.hs
=====================================
@@ -48,9 +48,9 @@ import GHC.Types.Unique ( hasKey )
 import GHC.Core.Unfold
 import GHC.Core.Utils
 import GHC.Core.Opt.Arity ( ArityType(..), arityTypeArity, isBotArityType
+                          , pushCoTyArg, pushCoValArg
                           , idArityType, etaExpandAT )
-import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg
-                          , joinPointBinding_maybe, joinPointBindings_maybe )
+import GHC.Core.SimpleOpt ( joinPointBinding_maybe, joinPointBindings_maybe )
 import GHC.Core.FVs     ( mkRuleInfo )
 import GHC.Core.Rules   ( lookupRule, getRules, initRuleOpts )
 import GHC.Types.Basic
@@ -315,7 +315,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
         ; let rhs_cont = mkRhsStop (substTy body_env (exprType body))
         ; (body_floats0, body0) <- {-#SCC "simplExprF" #-} simplExprF body_env body rhs_cont
 
-              -- Never float join-floats out of a non-join let-binding
+              -- Never float join-floats out of a non-join let-binding (which this is)
               -- So wrap the body in the join-floats right now
               -- Hence: body_floats1 consists only of let-floats
         ; let (body_floats1, body1) = wrapJoinFloatsX body_floats0 body0
@@ -385,7 +385,8 @@ simplNonRecX env bndr new_rhs
 
   | otherwise
   = do  { (env', bndr') <- simplBinder env bndr
-        ; completeNonRecX NotTopLevel env' (isStrictId bndr) bndr bndr' new_rhs }
+        ; -- pprTrace "simplNonRecX" (ppr bndr <+> ppr bndr' $$ ppr (idOccInfo bndr) $$ ppr new_rhs) $
+          completeNonRecX NotTopLevel env' (isStrictId bndr) bndr bndr' new_rhs }
                 -- simplNonRecX is only used for NotTopLevel things
 
 --------------------------
@@ -600,7 +601,8 @@ prepareRhs mode top_lvl occ rhs0
         = do { (is_exp, floats1, fun') <- go (n_val_args+1) fun
              ; case is_exp of
                 False -> return (False, emptyLetFloats, App fun arg)
-                True  -> do { (floats2, arg') <- makeTrivial mode top_lvl topDmd occ arg
+                True  -> do { (floats2, arg') <- -- pprTrace "prepareRhs" (ppr occ <+> ppr rhs0) $
+                                                 makeTrivial mode top_lvl topDmd occ arg
                             ; return (True, floats1 `addLetFlts` floats2, App fun' arg') } }
     go n_val_args (Var fun)
         = return (is_exp, emptyLetFloats, Var fun)
@@ -680,7 +682,8 @@ makeTrivialBinding mode top_lvl occ_fs info expr expr_ty
         ; let final_id = addLetBndrInfo var arity_type unf
               bind     = NonRec final_id expr2
 
-        ; return ( floats `addLetFlts` unitLetFloat bind, final_id ) }
+        ; -- pprTrace "makeTrivial" (ppr bind) $
+          return ( floats `addLetFlts` unitLetFloat bind, final_id ) }
 
 bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool
 -- True iff we can have a binding of this expression at this level
@@ -1411,25 +1414,23 @@ simplCast env body co0 cont0
                                         -- type of the hole changes (#16312)
 
         -- (f |> co) e   ===>   (f (e |> co1)) |> co2
-        -- where   co :: (s1->s2) ~ (t1~t2)
+        -- where   co :: (s1->s2) ~ (t1->t2)
         --         co1 :: t1 ~ s1
         --         co2 :: s2 ~ t2
         addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se
                                       , sc_dup = dup, sc_cont = tail })
-          | Just (co1, m_co2) <- pushCoValArg co
-          , let new_ty = coercionRKind co1
-          , not (isTypeLevPoly new_ty)  -- Without this check, we get a lev-poly arg
-                                        -- See Note [Levity polymorphism invariants] in GHC.Core
-                                        -- test: typecheck/should_run/EtaExpandLevPoly
+          | Just (m_co1, m_co2) <- pushCoValArg co
+          , levity_ok m_co1
           = {-#SCC "addCoerce-pushCoValArg" #-}
             do { tail' <- addCoerceM m_co2 tail
-               ; if isReflCo co1
-                 then return (cont { sc_cont = tail'
-                                   , sc_hole_ty = coercionLKind co })
+               ; case m_co1 of {
+                   MRefl -> return (cont { sc_cont = tail'
+                                         , sc_hole_ty = coercionLKind co }) ;
                       -- Avoid simplifying if possible;
                       -- See Note [Avoiding exponential behaviour]
-                 else do
-               { (dup', arg_se', arg') <- simplArg env dup arg_se arg
+
+                   MCo co1 ->
+            do { (dup', arg_se', arg') <- simplArg env dup arg_se arg
                     -- When we build the ApplyTo we can't mix the OutCoercion
                     -- 'co' with the InExpr 'arg', so we simplify
                     -- to make it all consistent.  It's a bit messy.
@@ -1439,7 +1440,7 @@ simplCast env body co0 cont0
                                     , sc_env  = arg_se'
                                     , sc_dup  = dup'
                                     , sc_cont = tail'
-                                    , sc_hole_ty = coercionLKind co }) } }
+                                    , sc_hole_ty = coercionLKind co }) } } }
 
         addCoerce co cont
           | isReflexiveCo co = return cont  -- Having this at the end makes a huge
@@ -1447,6 +1448,13 @@ simplCast env body co0 cont0
                                             -- See Note [Optimising reflexivity]
           | otherwise        = return (CastIt co cont)
 
+        levity_ok :: MCoercionR -> Bool
+        levity_ok MRefl = True
+        levity_ok (MCo co) = not $ isTypeLevPoly $ coercionRKind co
+          -- Without this check, we get a lev-poly arg
+          -- See Note [Levity polymorphism invariants] in GHC.Core
+          -- test: typecheck/should_run/EtaExpandLevPoly
+
 simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr
          -> SimplM (DupFlag, StaticEnv, OutExpr)
 simplArg env dup_flag arg_env arg
@@ -3111,7 +3119,7 @@ knownCon :: SimplEnv
 
 knownCon env scrut dc_floats dc dc_ty_args dc_args bndr bs rhs cont
   = do  { (floats1, env1)  <- bind_args env bs dc_args
-        ; (floats2, env2) <- bind_case_bndr env1
+        ; (floats2, env2)  <- bind_case_bndr env1
         ; (floats3, expr') <- simplExprF env2 rhs cont
         ; case dc_floats of
             [] ->
@@ -3143,8 +3151,9 @@ knownCon env scrut dc_floats dc dc_ty_args dc_args bndr bs rhs cont
              -- it via postInlineUnconditionally.
              -- Nevertheless we must keep it if the case-binder is alive,
              -- because it may be used in the con_app.  See Note [knownCon occ info]
-           ; (floats1, env2) <- simplNonRecX env' b' arg  -- arg satisfies let/app invariant
-           ; (floats2, env3)  <- bind_args env2 bs' args
+           ; (floats1, env2) <- -- pprTrace "knownCon" (vcat [ pprBndr CasePatBind b, pprBndr CasePatBind b', ppr arg ]) $
+                                simplNonRecX env' b' arg  -- arg satisfies let/app invariant
+           ; (floats2, env3) <- bind_args env2 bs' args
            ; return (floats1 `addFloats` floats2, env3) }
 
     bind_args _ _ _ =
@@ -3237,6 +3246,7 @@ altsWouldDup [_] = False
 altsWouldDup (alt:alts)
   | is_bot_alt alt = altsWouldDup alts
   | otherwise      = not (all is_bot_alt alts)
+    -- otherwise case: first alt is non-bot, so all the rest must be bot
   where
     is_bot_alt (_,_,rhs) = exprIsDeadEnd rhs
 
@@ -3824,7 +3834,8 @@ simplStableUnfolding env top_lvl mb_cont id rhs_ty id_arity unf
                                         simplJoinRhs unf_env id expr cont
                            Nothing   -> -- Binder is not a join point
                                         do { expr' <- simplExprC unf_env expr (mkBoringStop rhs_ty)
-                                           ; return (eta_expand expr') }
+                                           ; -- pprTrace "ssu" (ppr expr $$ ppr expr' $$ ppr (eta_expand expr')) $
+                                             return (eta_expand expr') }
               ; case guide of
                   UnfWhen { ug_arity = arity
                           , ug_unsat_ok = sat_ok


=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -591,7 +591,7 @@ addJoinFlts :: JoinFloats -> JoinFloats -> JoinFloats
 addJoinFlts = appOL
 
 mkRecFloats :: SimplFloats -> SimplFloats
--- Flattens the floats from env2 into a single Rec group,
+-- Flattens the floats into a single Rec group,
 -- They must either all be lifted LetFloats or all JoinFloats
 mkRecFloats floats@(SimplFloats { sfLetFloats  = LetFloats bs ff
                                 , sfJoinFloats = jbs


=====================================
compiler/GHC/Core/Opt/Specialise.hs
=====================================
@@ -21,7 +21,7 @@ import GHC.Tc.Utils.TcType hiding( substTy )
 import GHC.Core.Type  hiding( substTy, extendTvSubstList )
 import GHC.Core.Multiplicity
 import GHC.Core.Predicate
-import GHC.Unit.Module( Module, HasModule(..) )
+import GHC.Unit.Module( Module )
 import GHC.Core.Coercion( Coercion )
 import GHC.Core.Opt.Monad
 import qualified GHC.Core.Subst as Core
@@ -31,11 +31,11 @@ import GHC.Types.Var.Set
 import GHC.Types.Var.Env
 import GHC.Core
 import GHC.Core.Rules
-import GHC.Core.SimpleOpt ( collectBindersPushingCo )
 import GHC.Core.Utils     ( exprIsTrivial, getIdFromTrivialExpr_maybe
                           , mkCast, exprType )
 import GHC.Core.FVs
-import GHC.Core.Opt.Arity     ( etaExpandToJoinPointRule )
+import GHC.Core.Opt.Arity     ( collectBindersPushingCo
+                              , etaExpandToJoinPointRule )
 import GHC.Types.Unique.Supply
 import GHC.Types.Name
 import GHC.Types.Id.Make  ( voidArgId, voidPrimId )
@@ -51,12 +51,9 @@ import GHC.Utils.Misc
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Data.FastString
-import GHC.Utils.Monad.State
 import GHC.Types.Unique.DFM
 import GHC.Core.TyCo.Rep (TyCoBinder (..))
 
-import Control.Monad
-
 {-
 ************************************************************************
 *                                                                      *
@@ -590,28 +587,29 @@ specProgram guts@(ModGuts { mg_module = this_mod
                           , mg_binds = binds })
   = do { dflags <- getDynFlags
 
+              -- We need to start with a Subst that knows all the things
+              -- that are in scope, so that the substitution engine doesn't
+              -- accidentally re-use a unique that's already in use
+              -- Easiest thing is to do it all at once, as if all the top-level
+              -- decls were mutually recursive
+       ; let top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $
+                                       bindersOfBinds binds
+                          , se_interesting = emptyVarSet
+                          , se_module = this_mod
+                          , se_dflags = dflags }
+
+             go []           = return ([], emptyUDs)
+             go (bind:binds) = do (binds', uds) <- go binds
+                                  (bind', uds') <- specBind top_env bind uds
+                                  return (bind' ++ binds', uds')
+
              -- Specialise the bindings of this module
-       ; (binds', uds) <- runSpecM dflags this_mod (go binds)
+       ; (binds', uds) <- runSpecM (go binds)
 
-       ; (spec_rules, spec_binds) <- specImports dflags this_mod top_env
-                                                 local_rules uds
+       ; (spec_rules, spec_binds) <- specImports top_env local_rules uds
 
        ; return (guts { mg_binds = spec_binds ++ binds'
                       , mg_rules = spec_rules ++ local_rules }) }
-  where
-        -- We need to start with a Subst that knows all the things
-        -- that are in scope, so that the substitution engine doesn't
-        -- accidentally re-use a unique that's already in use
-        -- Easiest thing is to do it all at once, as if all the top-level
-        -- decls were mutually recursive
-    top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $
-                              bindersOfBinds binds
-                 , se_interesting = emptyVarSet }
-
-    go []           = return ([], emptyUDs)
-    go (bind:binds) = do (binds', uds) <- go binds
-                         (bind', uds') <- specBind top_env bind uds
-                         return (bind' ++ binds', uds')
 
 {-
 Note [Wrap bindings returned by specImports]
@@ -641,13 +639,13 @@ See #10491
 *                                                                      *
 ********************************************************************* -}
 
-specImports :: DynFlags -> Module -> SpecEnv
+specImports :: SpecEnv
             -> [CoreRule]
             -> UsageDetails
             -> CoreM ([CoreRule], [CoreBind])
-specImports dflags this_mod top_env local_rules
+specImports top_env local_rules
             (MkUD { ud_binds = dict_binds, ud_calls = calls })
-  | not $ gopt Opt_CrossModuleSpecialise dflags
+  | not $ gopt Opt_CrossModuleSpecialise (se_dflags top_env)
     -- See Note [Disabling cross-module specialisation]
   = return ([], wrapDictBinds dict_binds [])
 
@@ -655,8 +653,7 @@ specImports dflags this_mod top_env local_rules
   = do { hpt_rules <- getRuleBase
        ; let rule_base = extendRuleBaseList hpt_rules local_rules
 
-       ; (spec_rules, spec_binds) <- spec_imports dflags this_mod top_env
-                                                  [] rule_base
+       ; (spec_rules, spec_binds) <- spec_imports top_env [] rule_base
                                                   dict_binds calls
 
              -- Don't forget to wrap the specialized bindings with
@@ -672,9 +669,7 @@ specImports dflags this_mod top_env local_rules
     }
 
 -- | Specialise a set of calls to imported bindings
-spec_imports :: DynFlags
-             -> Module
-             -> SpecEnv          -- Passed in so that all top-level Ids are in scope
+spec_imports :: SpecEnv          -- Passed in so that all top-level Ids are in scope
              -> [Id]             -- Stack of imported functions being specialised
                                  -- See Note [specImport call stack]
              -> RuleBase         -- Rules from this module and the home package
@@ -684,8 +679,7 @@ spec_imports :: DynFlags
              -> CallDetails      -- Calls for imported things
              -> CoreM ( [CoreRule]   -- New rules
                       , [CoreBind] ) -- Specialised bindings
-spec_imports dflags this_mod top_env
-             callers rule_base dict_binds calls
+spec_imports top_env callers rule_base dict_binds calls
   = do { let import_calls = dVarEnvElts calls
        -- ; debugTraceMsg (text "specImports {" <+>
        --                  vcat [ text "calls:" <+> ppr import_calls
@@ -699,16 +693,13 @@ spec_imports dflags this_mod top_env
     go _ [] = return ([], [])
     go rb (cis : other_calls)
       = do { -- debugTraceMsg (text "specImport {" <+> ppr cis)
-           ; (rules1, spec_binds1) <- spec_import dflags this_mod top_env
-                                                  callers rb dict_binds cis
+           ; (rules1, spec_binds1) <- spec_import top_env callers rb dict_binds cis
            -- ; debugTraceMsg (text "specImport }" <+> ppr cis)
 
            ; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls
            ; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) }
 
-spec_import :: DynFlags
-            -> Module
-            -> SpecEnv               -- Passed in so that all top-level Ids are in scope
+spec_import :: SpecEnv               -- Passed in so that all top-level Ids are in scope
             -> [Id]                  -- Stack of imported functions being specialised
                                      -- See Note [specImport call stack]
             -> RuleBase              -- Rules from this module
@@ -717,8 +708,7 @@ spec_import :: DynFlags
             -> CallInfoSet           -- Imported function and calls for it
             -> CoreM ( [CoreRule]    -- New rules
                      , [CoreBind] )  -- Specialised bindings
-spec_import dflags this_mod top_env callers
-            rb dict_binds cis@(CIS fn _)
+spec_import top_env callers rb dict_binds cis@(CIS fn _)
   | isIn "specImport" fn callers
   = return ([], [])     -- No warning.  This actually happens all the time
                         -- when specialising a recursive function, because
@@ -729,8 +719,7 @@ spec_import dflags this_mod top_env callers
   = do { -- debugTraceMsg (text "specImport:no valid calls")
        ; return ([], []) }
 
-  | wantSpecImport dflags unfolding
-  , Just rhs <- maybeUnfoldingTemplate unfolding
+  | Just rhs <- maybeUnfoldingTemplate unfolding
   = do {     -- Get rules from the external package state
              -- We keep doing this in case we "page-fault in"
              -- more rules as we go along
@@ -742,8 +731,8 @@ spec_import dflags this_mod top_env callers
 
        ; (rules1, spec_pairs, MkUD { ud_binds = dict_binds1, ud_calls = new_calls })
              <- do { -- debugTraceMsg (text "specImport1" <+> vcat [ppr fn, ppr good_calls, ppr rhs])
-                   ; runSpecM dflags this_mod $
-                     specCalls (Just this_mod) top_env rules_for_fn good_calls fn rhs }
+                   ; runSpecM $
+                     specCalls True top_env rules_for_fn good_calls fn rhs }
        ; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs]
              -- After the rules kick in we may get recursion, but
              -- we rely on a global GlomBinds to sort that out later
@@ -751,7 +740,7 @@ spec_import dflags this_mod top_env callers
 
               -- Now specialise any cascaded calls
        -- ; debugTraceMsg (text "specImport 2" <+> (ppr fn $$ ppr rules1 $$ ppr spec_binds1))
-       ; (rules2, spec_binds2) <- spec_imports dflags this_mod top_env
+       ; (rules2, spec_binds2) <- spec_imports top_env
                                                (fn:callers)
                                                (extendRuleBaseList rb rules1)
                                                (dict_binds `unionBags` dict_binds1)
@@ -767,6 +756,7 @@ spec_import dflags this_mod top_env callers
        ; return ([], [])}
 
   where
+    dflags = se_dflags top_env
     unfolding = realIdUnfolding fn   -- We want to see the unfolding even for loop breakers
     good_calls = filterCalls cis dict_binds
        -- SUPER IMPORTANT!  Drop calls that (directly or indirectly) refer to fn
@@ -796,22 +786,6 @@ tryWarnMissingSpecs dflags callers fn calls_for_fn
           , whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
           , text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ])
 
-wantSpecImport :: DynFlags -> Unfolding -> Bool
--- See Note [Specialise imported INLINABLE things]
-wantSpecImport dflags unf
- = case unf of
-     NoUnfolding      -> False
-     BootUnfolding    -> False
-     OtherCon {}      -> False
-     DFunUnfolding {} -> True
-     CoreUnfolding { uf_src = src, uf_guidance = _guidance }
-       | gopt Opt_SpecialiseAggressively dflags -> True
-       | isStableSource src -> True
-               -- Specialise even INLINE things; it hasn't inlined yet,
-               -- so perhaps it never will.  Moreover it may have calls
-               -- inside it that we want to specialise
-       | otherwise -> False    -- Stable, not INLINE, hence INLINABLE
-
 {- Note [Avoiding loops in specImports]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We must take great care when specialising instance declarations
@@ -1001,6 +975,9 @@ data SpecEnv
              -- Dict Ids that we know something about
              -- and hence may be worth specialising against
              -- See Note [Interesting dictionary arguments]
+
+       , se_module :: Module
+       , se_dflags :: DynFlags
      }
 
 instance Outputable SpecEnv where
@@ -1308,7 +1285,7 @@ specDefn :: SpecEnv
 specDefn env body_uds fn rhs
   = do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds
              rules_for_me = idCoreRules fn
-       ; (rules, spec_defns, spec_uds) <- specCalls Nothing env rules_for_me
+       ; (rules, spec_defns, spec_uds) <- specCalls False env rules_for_me
                                                     calls_for_me fn rhs
        ; return ( fn `addIdSpecialisations` rules
                 , spec_defns
@@ -1321,8 +1298,8 @@ specDefn env body_uds fn rhs
                 -- body_uds_without_me
 
 ---------------------------
-specCalls :: Maybe Module      -- Just this_mod  =>  specialising imported fn
-                               -- Nothing        =>  specialising local fn
+specCalls :: Bool              -- True  =>  specialising imported fn
+                               -- False =>  specialising local fn
           -> SpecEnv
           -> [CoreRule]        -- Existing RULES for the fn
           -> [CallInfo]
@@ -1337,7 +1314,7 @@ type SpecInfo = ( [CoreRule]       -- Specialisation rules
                 , [(Id,CoreExpr)]  -- Specialised definition
                 , UsageDetails )   -- Usage details from specialised RHSs
 
-specCalls mb_mod env existing_rules calls_for_me fn rhs
+specCalls spec_imp env existing_rules calls_for_me fn rhs
         -- The first case is the interesting one
   |  notNull calls_for_me               -- And there are some calls to specialise
   && not (isNeverActive (idInlineActivation fn))
@@ -1368,7 +1345,9 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
     inl_act   = inlinePragmaActivation inl_prag
     is_local  = isLocalId fn
     is_dfun   = isDFunId fn
-
+    dflags    = se_dflags env
+    ropts     = initRuleOpts dflags
+    this_mod  = se_module env
         -- Figure out whether the function has an INLINE pragma
         -- See Note [Inline specialisations]
 
@@ -1410,8 +1389,6 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
 --                                        , ppr dx_binds ]) $
 --             return ()
 
-           ; dflags <- getDynFlags
-           ; let ropts = initRuleOpts dflags
            ; if not useful  -- No useful specialisation
                 || already_covered ropts rules_acc rule_lhs_args
              then return spec_acc
@@ -1439,17 +1416,15 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
                                  = Nothing
 
            ; spec_fn <- newSpecIdSM fn spec_fn_ty spec_join_arity
-           ; this_mod <- getModule
            ; let
                 -- The rule to put in the function's specialisation is:
                 --      forall x @b d1' d2'.
                 --          f x @T1 @b @T2 d1' d2' = f1 x @b
                 -- See Note [Specialising Calls]
-                herald = case mb_mod of
-                           Nothing        -- Specialising local fn
-                               -> text "SPEC"
-                           Just this_mod  -- Specialising imported fn
-                               -> text "SPEC/" <> ppr this_mod
+                herald | spec_imp  = -- Specialising imported fn
+                                     text "SPEC/" <> ppr this_mod
+                       | otherwise = -- Specialising local fn
+                                     text "SPEC"
 
                 rule_name = mkFastString $ showSDoc dflags $
                             herald <+> ftext (occNameFS (getOccName fn))
@@ -2476,15 +2451,15 @@ mkCallUDs env f args
     res = mkCallUDs' env f args
 
 mkCallUDs' env f args
-  |  not (want_calls_for f)  -- Imported from elsewhere
-  || null ci_key             -- No useful specialisation
-   -- See also Note [Specialisations already covered]
+  | wantCallsFor env f    -- We want it, and...
+  , not (null ci_key)     -- this call site has a useful specialisation
+  = -- pprTrace "mkCallUDs: keeping" _trace_doc
+    singleCall f ci_key
+
+  | otherwise  -- See also Note [Specialisations already covered]
   = -- pprTrace "mkCallUDs: discarding" _trace_doc
     emptyUDs
 
-  | otherwise
-  = -- pprTrace "mkCallUDs: keeping" _trace_doc
-    singleCall f ci_key
   where
     _trace_doc = vcat [ppr f, ppr args, ppr ci_key]
     pis                = fst $ splitPiTys $ idType f
@@ -2518,13 +2493,6 @@ mkCallUDs' env f args
     mk_spec_arg _ (Anon VisArg _)
       = UnspecArg
 
-    want_calls_for f = isLocalId f || isJust (maybeUnfoldingTemplate (realIdUnfolding f))
-         -- For imported things, we gather call instances if
-         -- there is an unfolding that we could in principle specialise
-         -- We might still decide not to use it (consulting dflags)
-         -- in specImports
-         -- Use 'realIdUnfolding' to ignore the loop-breaker flag!
-
     type_determines_value pred    -- See Note [Type determines value]
         = case classifyPredType pred of
             ClassPred cls _ -> not (isIPClass cls)  -- Superclasses can't be IPs
@@ -2533,7 +2501,68 @@ mkCallUDs' env f args
                                       -- Constraint-ranged family; #7785
             ForAllPred {}   -> True
 
-{-
+wantCallsFor :: SpecEnv -> Id -> Bool
+wantCallsFor env f
+ | isLocalId f    -- Local function; don't look at the unfolding, because
+ = True           -- unfoldings for local functions are discarded by cloneBind
+                  -- ToDo: we could keep a candidate set of let-binders to
+                  --       reduce the size of the UsageDetails
+
+ | otherwise      -- Imported function
+ = case unf of
+     NoUnfolding      -> False
+     BootUnfolding    -> False
+     OtherCon {}      -> False
+     CoreUnfolding { uf_src = src }
+       | isStableSource src -> True  -- INLINEABLE/INLINE
+               -- See Note [Specialise imported INLINABLE things]
+               -- Specialise even INLINE things; it hasn't inlined yet,
+               -- so perhaps it never will.  Moreover it may have calls
+               -- inside it that we want to specialise
+       | otherwise   -> aggressive_only   -- Imported, no INLINABLE
+     DFunUnfolding {} -> aggressive_only -- See Note [Do not specialise DFuns]
+  where
+    aggressive_only = gopt Opt_SpecialiseAggressively (se_dflags env)
+    unf = realIdUnfolding f
+          -- 'realIdUnfolding' to ignore the loop-breaker flag!
+
+{- Note [Do not specialise DFuns]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Ticket #18223 shows that specialising calls of DFuns is can cause a huge
+and entirely unnecessary blowup in program size.  Consider a call to
+    f @[[[[[[[[T]]]]]]]] d1 x
+where df :: C a => C [a]
+      d1 :: C [[[[[[[[T]]]]]]]] = dfC[] @[[[[[[[T]]]]]]] d1
+      d2 :: C [[[[[[[T]]]]]]]   = dfC[] @[[[[[[T]]]]]] d3
+      ...
+Now we'll specialise f's RHS, which may give rise to calls to 'g',
+also overloaded, which we will specialise, and so on.  However, if
+we specialise the calls to dfC[], we'll generate specialised copies of
+all methods of C, at all types; and the same for C's superclasses.
+
+And many of these specialised functions will never be called.  We are
+going to call the specialised 'f', and the specialised 'g', but DFuns
+group functions into a tuple, many of whose elements may never be used.
+
+With deeply-nested types this can lead to a simply overwhelming number
+of specialisations: see #18223 for a simple example (from the wild).
+I measured the number of specialisations for various numbers of calls
+of `flip evalStateT ()`, and got this
+
+                       Size after one simplification
+  #calls    #SPEC rules    Terms     Types
+      5         56          3100     10600
+      9        108         13660     77206
+
+The real tests case has 60+ calls, which blew GHC out of the water.
+
+Solution: don't specialise DFuns.  The downside is that if we end
+up with (h (dfun d)), /and/ we don't specialise 'h', then we won't
+pass to 'h' a tuple of specialised functions.
+
+However, the flag -fspecialise-aggressively (experimental, off by default)
+allows DFuns to specialise as well.
+
 Note [Type determines value]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Only specialise on non-IP *class* params, because these are the ones
@@ -2796,55 +2825,12 @@ deleteCallsFor bs calls = delDVarEnvList calls bs
 ************************************************************************
 -}
 
-newtype SpecM a = SpecM (State SpecState a) deriving (Functor)
-
-data SpecState = SpecState {
-                     spec_uniq_supply :: UniqSupply,
-                     spec_module :: Module,
-                     spec_dflags :: DynFlags
-                 }
-
-instance Applicative SpecM where
-    pure x = SpecM $ return x
-    (<*>) = ap
-
-instance Monad SpecM where
-    SpecM x >>= f = SpecM $ do y <- x
-                               case f y of
-                                   SpecM z ->
-                                       z
-
-instance MonadFail SpecM where
-   fail str = SpecM $ error str
-
-instance MonadUnique SpecM where
-    getUniqueSupplyM
-        = SpecM $ do st <- get
-                     let (us1, us2) = splitUniqSupply $ spec_uniq_supply st
-                     put $ st { spec_uniq_supply = us2 }
-                     return us1
-
-    getUniqueM
-        = SpecM $ do st <- get
-                     let (u,us') = takeUniqFromSupply $ spec_uniq_supply st
-                     put $ st { spec_uniq_supply = us' }
-                     return u
-
-instance HasDynFlags SpecM where
-    getDynFlags = SpecM $ liftM spec_dflags get
-
-instance HasModule SpecM where
-    getModule = SpecM $ liftM spec_module get
-
-runSpecM :: DynFlags -> Module -> SpecM a -> CoreM a
-runSpecM dflags this_mod (SpecM spec)
-    = do us <- getUniqueSupplyM
-         let initialState = SpecState {
-                                spec_uniq_supply = us,
-                                spec_module = this_mod,
-                                spec_dflags = dflags
-                            }
-         return $ evalState spec initialState
+type SpecM a = UniqSM a
+
+runSpecM :: SpecM a -> CoreM a
+runSpecM thing_inside
+  = do { us <- getUniqueSupplyM
+       ; return (initUs_ us thing_inside) }
 
 mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails)
 mapAndCombineSM _ []     = return ([], emptyUDs)


=====================================
compiler/GHC/Core/Ppr.hs
=====================================
@@ -161,15 +161,18 @@ pprOptCo co = sdocOption sdocSuppressCoercions $ \case
               True  -> angleBrackets (text "Co:" <> int (coercionSize co))
               False -> parens $ sep [ppr co, dcolon <+> ppr (coercionType co)]
 
+ppr_id_occ :: (SDoc -> SDoc) -> Id -> SDoc
+ppr_id_occ add_par id
+  | isJoinId id = add_par ((text "jump") <+> pp_id)
+  | otherwise   = pp_id
+  where
+    pp_id = pprPrefixOcc id
+
 ppr_expr :: OutputableBndr b => (SDoc -> SDoc) -> Expr b -> SDoc
         -- The function adds parens in context that need
         -- an atomic value (e.g. function args)
 
-ppr_expr add_par (Var name)
- | isJoinId name               = add_par ((text "jump") <+> pp_name)
- | otherwise                   = pp_name
- where
-   pp_name = pprPrefixOcc name
+ppr_expr add_par (Var id)      = ppr_id_occ add_par id
 ppr_expr add_par (Type ty)     = add_par (text "TYPE:" <+> ppr ty)       -- Weird
 ppr_expr add_par (Coercion co) = add_par (text "CO:" <+> ppr co)
 ppr_expr add_par (Lit lit)     = pprLiteral add_par lit
@@ -212,8 +215,7 @@ ppr_expr add_par expr@(App {})
 
                    _ -> parens (hang fun_doc 2 pp_args)
                    where
-                     fun_doc | isJoinId f = text "jump" <+> ppr f
-                             | otherwise  = ppr f
+                     fun_doc = ppr_id_occ noParens f
 
         _ -> parens (hang (pprParendExpr fun) 2 pp_args)
     }


=====================================
compiler/GHC/Core/SimpleOpt.hs
=====================================
@@ -16,17 +16,14 @@ module GHC.Core.SimpleOpt (
         -- ** Predicates on expressions
         exprIsConApp_maybe, exprIsLiteral_maybe, exprIsLambda_maybe,
 
-        -- ** Coercions and casts
-        pushCoArg, pushCoValArg, pushCoTyArg, collectBindersPushingCo
     ) where
 
 #include "HsVersions.h"
 
 import GHC.Prelude
 
-import GHC.Core.Opt.Arity( etaExpandToJoinPoint )
-
 import GHC.Core
+import GHC.Core.Opt.Arity
 import GHC.Core.Subst
 import GHC.Core.Utils
 import GHC.Core.FVs
@@ -46,18 +43,14 @@ import GHC.Core.Coercion.Opt ( optCoercion )
 import GHC.Core.Type hiding ( substTy, extendTvSubst, extendCvSubst, extendTvSubstList
                             , isInScope, substTyVarBndr, cloneTyVarBndr )
 import GHC.Core.Coercion hiding ( substCo, substCoVarBndr )
-import GHC.Core.TyCon ( tyConArity )
-import GHC.Core.Multiplicity
 import GHC.Builtin.Types
 import GHC.Builtin.Names
 import GHC.Types.Basic
 import GHC.Unit.Module ( Module )
 import GHC.Utils.Error
 import GHC.Driver.Session
-import GHC.Driver.Ppr
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
-import GHC.Data.Pair
 import GHC.Utils.Misc
 import GHC.Data.Maybe       ( orElse )
 import GHC.Data.FastString
@@ -756,6 +749,28 @@ a good cause. And it won't hurt other RULES and such that it comes across.
 ************************************************************************
 -}
 
+{- Note [Strictness and join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have
+
+   let f = \x.  if x>200 then e1 else e1
+
+and we know that f is strict in x.  Then if we subsequently
+discover that f is an arity-2 join point, we'll eta-expand it to
+
+   let f = \x y.  if x>200 then e1 else e1
+
+and now it's only strict if applied to two arguments.  So we should
+adjust the strictness info.
+
+A more common case is when
+
+   f = \x. error ".."
+
+and again its arity increases (#15517)
+-}
+
+
 -- | Returns Just (bndr,rhs) if the binding is a join point:
 -- If it's a JoinId, just return it
 -- If it's not yet a JoinId but is always tail-called,
@@ -789,27 +804,6 @@ joinPointBindings_maybe bndrs
   = mapM (uncurry joinPointBinding_maybe) bndrs
 
 
-{- Note [Strictness and join points]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Suppose we have
-
-   let f = \x.  if x>200 then e1 else e1
-
-and we know that f is strict in x.  Then if we subsequently
-discover that f is an arity-2 join point, we'll eta-expand it to
-
-   let f = \x y.  if x>200 then e1 else e1
-
-and now it's only strict if applied to two arguments.  So we should
-adjust the strictness info.
-
-A more common case is when
-
-   f = \x. error ".."
-
-and again its arity increases (#15517)
--}
-
 {- *********************************************************************
 *                                                                      *
          exprIsConApp_maybe
@@ -1324,275 +1318,3 @@ exprIsLambda_maybe _ _e
       Nothing
 
 
-{- *********************************************************************
-*                                                                      *
-              The "push rules"
-*                                                                      *
-************************************************************************
-
-Here we implement the "push rules" from FC papers:
-
-* The push-argument rules, where we can move a coercion past an argument.
-  We have
-      (fun |> co) arg
-  and we want to transform it to
-    (fun arg') |> co'
-  for some suitable co' and transformed arg'.
-
-* The PushK rule for data constructors.  We have
-       (K e1 .. en) |> co
-  and we want to transform to
-       (K e1' .. en')
-  by pushing the coercion into the arguments
--}
-
-pushCoArgs :: CoercionR -> [CoreArg] -> Maybe ([CoreArg], MCoercion)
-pushCoArgs co []         = return ([], MCo co)
-pushCoArgs co (arg:args) = do { (arg',  m_co1) <- pushCoArg  co  arg
-                              ; case m_co1 of
-                                  MCo co1 -> do { (args', m_co2) <- pushCoArgs co1 args
-                                                 ; return (arg':args', m_co2) }
-                                  MRefl  -> return (arg':args, MRefl) }
-
-pushCoArg :: CoercionR -> CoreArg -> Maybe (CoreArg, MCoercion)
--- We have (fun |> co) arg, and we want to transform it to
---         (fun arg) |> co
--- This may fail, e.g. if (fun :: N) where N is a newtype
--- C.f. simplCast in GHC.Core.Opt.Simplify
--- 'co' is always Representational
--- If the returned coercion is Nothing, then it would have been reflexive
-pushCoArg co (Type ty) = do { (ty', m_co') <- pushCoTyArg co ty
-                            ; return (Type ty', m_co') }
-pushCoArg co val_arg   = do { (arg_co, m_co') <- pushCoValArg co
-                            ; return (val_arg `mkCast` arg_co, m_co') }
-
-pushCoTyArg :: CoercionR -> Type -> Maybe (Type, MCoercionR)
--- We have (fun |> co) @ty
--- Push the coercion through to return
---         (fun @ty') |> co'
--- 'co' is always Representational
--- If the returned coercion is Nothing, then it would have been reflexive;
--- it's faster not to compute it, though.
-pushCoTyArg co ty
-  -- The following is inefficient - don't do `eqType` here, the coercion
-  -- optimizer will take care of it. See #14737.
-  -- -- | tyL `eqType` tyR
-  -- -- = Just (ty, Nothing)
-
-  | isReflCo co
-  = Just (ty, MRefl)
-
-  | isForAllTy_ty tyL
-  = ASSERT2( isForAllTy_ty tyR, ppr co $$ ppr ty )
-    Just (ty `mkCastTy` co1, MCo co2)
-
-  | otherwise
-  = Nothing
-  where
-    Pair tyL tyR = coercionKind co
-       -- co :: tyL ~ tyR
-       -- tyL = forall (a1 :: k1). ty1
-       -- tyR = forall (a2 :: k2). ty2
-
-    co1 = mkSymCo (mkNthCo Nominal 0 co)
-       -- co1 :: k2 ~N k1
-       -- Note that NthCo can extract a Nominal equality between the
-       -- kinds of the types related by a coercion between forall-types.
-       -- See the NthCo case in GHC.Core.Lint.
-
-    co2 = mkInstCo co (mkGReflLeftCo Nominal ty co1)
-        -- co2 :: ty1[ (ty|>co1)/a1 ] ~ ty2[ ty/a2 ]
-        -- Arg of mkInstCo is always nominal, hence mkNomReflCo
-
-pushCoValArg :: CoercionR -> Maybe (Coercion, MCoercion)
--- We have (fun |> co) arg
--- Push the coercion through to return
---         (fun (arg |> co_arg)) |> co_res
--- 'co' is always Representational
--- If the second returned Coercion is actually Nothing, then no cast is necessary;
--- the returned coercion would have been reflexive.
-pushCoValArg co
-  -- The following is inefficient - don't do `eqType` here, the coercion
-  -- optimizer will take care of it. See #14737.
-  -- -- | tyL `eqType` tyR
-  -- -- = Just (mkRepReflCo arg, Nothing)
-
-  | isReflCo co
-  = Just (mkRepReflCo arg, MRefl)
-
-  | isFunTy tyL
-  , (co_mult, co1, co2) <- decomposeFunCo Representational co
-  , isReflexiveCo co_mult
-    -- We can't push the coercion in the case where co_mult isn't reflexivity:
-    -- it could be an unsafe axiom, and losing this information could yield
-    -- ill-typed terms. For instance (fun x ::(1) Int -> (fun _ -> () |> co) x)
-    -- with co :: (Int -> ()) ~ (Int #-> ()), would reduce to (fun x ::(1) Int
-    -- -> (fun _ ::(Many) Int -> ()) x) which is ill-typed
-
-              -- If   co  :: (tyL1 -> tyL2) ~ (tyR1 -> tyR2)
-              -- then co1 :: tyL1 ~ tyR1
-              --      co2 :: tyL2 ~ tyR2
-  = ASSERT2( isFunTy tyR, ppr co $$ ppr arg )
-    Just (mkSymCo co1, MCo co2)
-
-  | otherwise
-  = Nothing
-  where
-    arg = funArgTy tyR
-    Pair tyL tyR = coercionKind co
-
-pushCoercionIntoLambda
-    :: InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
--- This implements the Push rule from the paper on coercions
---    (\x. e) |> co
--- ===>
---    (\x'. e |> co')
-pushCoercionIntoLambda in_scope x e co
-    | ASSERT(not (isTyVar x) && not (isCoVar x)) True
-    , Pair s1s2 t1t2 <- coercionKind co
-    , Just (_, _s1,_s2) <- splitFunTy_maybe s1s2
-    , Just (w1, t1,_t2) <- splitFunTy_maybe t1t2
-    , (co_mult, co1, co2) <- decomposeFunCo Representational co
-    , isReflexiveCo co_mult
-      -- We can't push the coercion in the case where co_mult isn't
-      -- reflexivity. See pushCoValArg for more details.
-    = let
-          -- Should we optimize the coercions here?
-          -- Otherwise they might not match too well
-          x' = x `setIdType` t1 `setIdMult` w1
-          in_scope' = in_scope `extendInScopeSet` x'
-          subst = extendIdSubst (mkEmptySubst in_scope')
-                                x
-                                (mkCast (Var x') co1)
-      in Just (x', substExpr subst e `mkCast` co2)
-    | otherwise
-    = pprTrace "exprIsLambda_maybe: Unexpected lambda in case" (ppr (Lam x e))
-      Nothing
-
-pushCoDataCon :: DataCon -> [CoreExpr] -> Coercion
-              -> Maybe (DataCon
-                       , [Type]      -- Universal type args
-                       , [CoreExpr]) -- All other args incl existentials
--- Implement the KPush reduction rule as described in "Down with kinds"
--- The transformation applies iff we have
---      (C e1 ... en) `cast` co
--- where co :: (T t1 .. tn) ~ to_ty
--- The left-hand one must be a T, because exprIsConApp returned True
--- but the right-hand one might not be.  (Though it usually will.)
-pushCoDataCon dc dc_args co
-  | isReflCo co || from_ty `eqType` to_ty  -- try cheap test first
-  , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) dc_args
-  = Just (dc, map exprToType univ_ty_args, rest_args)
-
-  | Just (to_tc, to_tc_arg_tys) <- splitTyConApp_maybe to_ty
-  , to_tc == dataConTyCon dc
-        -- These two tests can fail; we might see
-        --      (C x y) `cast` (g :: T a ~ S [a]),
-        -- where S is a type function.  In fact, exprIsConApp
-        -- will probably not be called in such circumstances,
-        -- but there's nothing wrong with it
-
-  = let
-        tc_arity       = tyConArity to_tc
-        dc_univ_tyvars = dataConUnivTyVars dc
-        dc_ex_tcvars   = dataConExTyCoVars dc
-        arg_tys        = dataConRepArgTys dc
-
-        non_univ_args  = dropList dc_univ_tyvars dc_args
-        (ex_args, val_args) = splitAtList dc_ex_tcvars non_univ_args
-
-        -- Make the "Psi" from the paper
-        omegas = decomposeCo tc_arity co (tyConRolesRepresentational to_tc)
-        (psi_subst, to_ex_arg_tys)
-          = liftCoSubstWithEx Representational
-                              dc_univ_tyvars
-                              omegas
-                              dc_ex_tcvars
-                              (map exprToType ex_args)
-
-          -- Cast the value arguments (which include dictionaries)
-        new_val_args = zipWith cast_arg (map scaledThing arg_tys) val_args
-        cast_arg arg_ty arg = mkCast arg (psi_subst arg_ty)
-
-        to_ex_args = map Type to_ex_arg_tys
-
-        dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tcvars,
-                         ppr arg_tys, ppr dc_args,
-                         ppr ex_args, ppr val_args, ppr co, ppr from_ty, ppr to_ty, ppr to_tc
-                         , ppr $ mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args) ]
-    in
-    ASSERT2( eqType from_ty (mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args)), dump_doc )
-    ASSERT2( equalLength val_args arg_tys, dump_doc )
-    Just (dc, to_tc_arg_tys, to_ex_args ++ new_val_args)
-
-  | otherwise
-  = Nothing
-
-  where
-    Pair from_ty to_ty = coercionKind co
-
-collectBindersPushingCo :: CoreExpr -> ([Var], CoreExpr)
--- Collect lambda binders, pushing coercions inside if possible
--- E.g.   (\x.e) |> g         g :: <Int> -> blah
---        = (\x. e |> Nth 1 g)
---
--- That is,
---
--- collectBindersPushingCo ((\x.e) |> g) === ([x], e |> Nth 1 g)
-collectBindersPushingCo e
-  = go [] e
-  where
-    -- Peel off lambdas until we hit a cast.
-    go :: [Var] -> CoreExpr -> ([Var], CoreExpr)
-    -- The accumulator is in reverse order
-    go bs (Lam b e)   = go (b:bs) e
-    go bs (Cast e co) = go_c bs e co
-    go bs e           = (reverse bs, e)
-
-    -- We are in a cast; peel off casts until we hit a lambda.
-    go_c :: [Var] -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
-    -- (go_c bs e c) is same as (go bs e (e |> c))
-    go_c bs (Cast e co1) co2 = go_c bs e (co1 `mkTransCo` co2)
-    go_c bs (Lam b e)    co  = go_lam bs b e co
-    go_c bs e            co  = (reverse bs, mkCast e co)
-
-    -- We are in a lambda under a cast; peel off lambdas and build a
-    -- new coercion for the body.
-    go_lam :: [Var] -> Var -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
-    -- (go_lam bs b e c) is same as (go_c bs (\b.e) c)
-    go_lam bs b e co
-      | isTyVar b
-      , let Pair tyL tyR = coercionKind co
-      , ASSERT( isForAllTy_ty tyL )
-        isForAllTy_ty tyR
-      , isReflCo (mkNthCo Nominal 0 co)  -- See Note [collectBindersPushingCo]
-      = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkTyVarTy b)))
-
-      | isCoVar b
-      , let Pair tyL tyR = coercionKind co
-      , ASSERT( isForAllTy_co tyL )
-        isForAllTy_co tyR
-      , isReflCo (mkNthCo Nominal 0 co)  -- See Note [collectBindersPushingCo]
-      , let cov = mkCoVarCo b
-      = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkCoercionTy cov)))
-
-      | isId b
-      , let Pair tyL tyR = coercionKind co
-      , ASSERT( isFunTy tyL) isFunTy tyR
-      , (co_mult, co_arg, co_res) <- decomposeFunCo Representational co
-      , isReflCo co_mult -- See Note [collectBindersPushingCo]
-      , isReflCo co_arg  -- See Note [collectBindersPushingCo]
-      = go_c (b:bs) e co_res
-
-      | otherwise = (reverse bs, mkCast (Lam b e) co)
-
-{-
-
-Note [collectBindersPushingCo]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We just look for coercions of form
-   <type> # w -> blah
-(and similarly for foralls) to keep this function simple.  We could do
-more elaborate stuff, but it'd involve substitution etc.
-
--}


=====================================
compiler/GHC/Core/Subst.hs
=====================================
@@ -343,6 +343,8 @@ instance Outputable Subst where
 
 substExprSC :: HasDebugCallStack => Subst -> CoreExpr -> CoreExpr
 -- Just like substExpr, but a no-op if the substitution is empty
+-- Note that this does /not/ replace occurrences of free vars with
+-- their canonical representatives in the in-scope set
 substExprSC subst orig_expr
   | isEmptySubst subst = orig_expr
   | otherwise          = -- pprTrace "enter subst-expr" (doc $$ ppr orig_expr) $
@@ -628,6 +630,9 @@ substIdInfo subst new_id info
 
 ------------------
 -- | Substitutes for the 'Id's within an unfolding
+-- NB: substUnfolding /discards/ any unfolding without
+--     without a Stable source.  This is usually what we want,
+--     but it may be a bit unexpected
 substUnfolding, substUnfoldingSC :: Subst -> Unfolding -> Unfolding
         -- Seq'ing on the returned Unfolding is enough to cause
         -- all the substitutions to happen completely



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/716b67d9b0ce485e2bfb6951b5f0586af642cad8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/716b67d9b0ce485e2bfb6951b5f0586af642cad8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200821/b68db022/attachment-0001.html>