[Git][ghc/ghc][wip/T23914] Unarise: Split Rubbish literals in function args

Matthew Craven (@clyring) gitlab at gitlab.haskell.org
Tue Sep 5 19:12:49 UTC 2023



Matthew Craven pushed to branch wip/T23914 at Glasgow Haskell Compiler / GHC


Commits:
246fde0d by Matthew Craven at 2023-09-05T15:12:03-04:00
Unarise: Split Rubbish literals in function args

Fixes #23914.  Also adds a check to STG lint that
these args are properly unary or nullary after unarisation

- - - - -


6 changed files:

- compiler/GHC/Stg/Lint.hs
- compiler/GHC/Stg/Unarise.hs
- compiler/GHC/Types/Literal.hs
- compiler/GHC/Types/RepType.hs
- + testsuite/tests/core-to-stg/T23914.hs
- testsuite/tests/core-to-stg/all.T


Changes:

=====================================
compiler/GHC/Stg/Lint.hs
=====================================
@@ -175,9 +175,34 @@ lintStgTopBindings platform logger diag_opts opts extra_vars this_mod unarised w
     lint_bind (StgTopLifted bind) = lintStgBinds TopLevel bind
     lint_bind (StgTopStringLit v _) = return [v]
 
-lintStgArg :: StgArg -> LintM ()
-lintStgArg (StgLitArg _) = return ()
-lintStgArg (StgVarArg v) = lintStgVar v
+lintStgConArg :: StgArg -> LintM ()
+lintStgConArg arg = do
+  unarised <- lf_unarised <$> getLintFlags
+  when unarised $ case typePrimRep_maybe (stgArgType arg) of
+    -- Note [Post-unarisation invariants], invariant 4
+    Just [_] -> pure ()
+    badRep   -> addErrL $
+      text "Non-unary constructor arg: " <> ppr arg $$
+      text "Its PrimReps are: " <> ppr badRep
+
+  case arg of
+    StgLitArg _ -> pure ()
+    StgVarArg v -> lintStgVar v
+
+lintStgFunArg :: StgArg -> LintM ()
+lintStgFunArg arg = do
+  unarised <- lf_unarised <$> getLintFlags
+  when unarised $ case typePrimRep_maybe (stgArgType arg) of
+    -- Note [Post-unarisation invariants], invariant 3
+    Just []  -> pure ()
+    Just [_] -> pure ()
+    badRep   -> addErrL $
+      text "Function arg is not unary or void: " <> ppr arg $$
+      text "Its PrimReps are: " <> ppr badRep
+
+  case arg of
+    StgLitArg _ -> pure ()
+    StgVarArg v -> lintStgVar v
 
 lintStgVar :: Id -> LintM ()
 lintStgVar id = checkInScope id
@@ -248,8 +273,7 @@ lintStgRhs rhs@(StgRhsCon _ con _ _ args _) = do
 
     lintConApp con args (pprStgRhs opts rhs)
 
-    mapM_ lintStgArg args
-    mapM_ checkPostUnariseConArg args
+    mapM_ lintStgConArg args
 
 lintStgExpr :: (OutputablePass a, BinderP a ~ Id) => GenStgExpr a -> LintM ()
 
@@ -257,7 +281,7 @@ lintStgExpr (StgLit _) = return ()
 
 lintStgExpr e@(StgApp fun args) = do
   lintStgVar fun
-  mapM_ lintStgArg args
+  mapM_ lintStgFunArg args
   lintAppCbvMarks e
   lintStgAppReps fun args
 
@@ -275,11 +299,10 @@ lintStgExpr app@(StgConApp con _n args _arg_tys) = do
     opts <- getStgPprOpts
     lintConApp con args (pprStgExpr opts app)
 
-    mapM_ lintStgArg args
-    mapM_ checkPostUnariseConArg args
+    mapM_ lintStgConArg args
 
 lintStgExpr (StgOpApp _ args _) =
-    mapM_ lintStgArg args
+    mapM_ lintStgFunArg args
 
 lintStgExpr (StgLet _ binds body) = do
     binders <- lintStgBinds NotTopLevel binds
@@ -325,7 +348,7 @@ lintAlt GenStgAlt{ alt_con   = DataAlt _
 -- Post unarise check we apply constructors to the right number of args.
 -- This can be violated by invalid use of unsafeCoerce as showcased by test
 -- T9208
-lintConApp :: Foldable t => DataCon -> t a -> SDoc -> LintM ()
+lintConApp :: DataCon -> [StgArg] -> SDoc -> LintM ()
 lintConApp con args app = do
     unarised <- lf_unarised <$> getLintFlags
     when (unarised &&
@@ -361,6 +384,8 @@ lintStgAppReps fun args = do
         = match_args actual_reps_left expected_reps_left
 
         -- Check for void rep which can be either an empty list *or* [VoidRep]
+           -- No, typePrimRep_maybe will never return a result containing VoidRep.
+           -- We should refactor to make this obvious from the types.
         | isVoidRep actual_rep && isVoidRep expected_rep
         = match_args actual_reps_left expected_reps_left
 
@@ -507,20 +532,6 @@ checkPostUnariseBndr bndr = do
           ppr bndr <> text " has " <> text unexpected <> text " type " <>
           ppr (idType bndr)
 
--- Arguments shouldn't have sum, tuple, or void types.
-checkPostUnariseConArg :: StgArg -> LintM ()
-checkPostUnariseConArg arg = case arg of
-    StgLitArg _ ->
-      return ()
-    StgVarArg id -> do
-      lf <- getLintFlags
-      when (lf_unarised lf) $
-        forM_ (checkPostUnariseId id) $ \unexpected ->
-          addErrL $
-            text "After unarisation, arg " <>
-            ppr id <> text " has " <> text unexpected <> text " type " <>
-            ppr (idType id)
-
 -- Post-unarisation args and case alt binders should not have unboxed tuple,
 -- unboxed sum, or void types. Return what the binder is if it is one of these.
 checkPostUnariseId :: Id -> Maybe String


=====================================
compiler/GHC/Stg/Unarise.hs
=====================================
@@ -356,20 +356,17 @@ Note [Post-unarisation invariants]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 STG programs after unarisation have these invariants:
 
-  * No unboxed sums at all.
+ 1. No unboxed sums at all.
 
-  * No unboxed tuple binders. Tuples only appear in return position.
+ 2. No unboxed tuple binders. Tuples only appear in return position.
 
-  * DataCon applications (StgRhsCon and StgConApp) don't have void arguments.
+ 3. Binders and literals always have zero (for void arguments) or one PrimRep.
+
+ 4. DataCon applications (StgRhsCon and StgConApp) don't have void arguments.
     This means that it's safe to wrap `StgArg`s of DataCon applications with
     `GHC.StgToCmm.Env.NonVoid`, for example.
 
-  * Similar to unboxed tuples, Note [Rubbish literals] of TupleRep may only
-    appear in return position.
-
-  * Alt binders (binders in patterns) are always non-void.
-
-  * Binders always have zero (for void arguments) or one PrimRep.
+ 5. Alt binders (binders in patterns) are always non-void.
 -}
 
 module GHC.Stg.Unarise (unarise) where
@@ -555,7 +552,7 @@ unariseExpr rho (StgCase scrut bndr alt_ty alts)
 
   -- See (3) of Note [Rubbish literals] in GHC.Types.Literal
   | StgLit lit <- scrut
-  , Just args' <- unariseRubbish_maybe lit
+  , Just args' <- unariseLiteral_maybe lit
   = elimCase rho args' bndr alt_ty alts
 
   -- general case
@@ -592,20 +589,24 @@ unariseUbxSumOrTupleArgs rho us dc args ty_args
   | otherwise
   = panic "unariseUbxSumOrTupleArgs: Constructor not a unboxed sum or tuple"
 
--- Doesn't return void args.
-unariseRubbish_maybe :: Literal -> Maybe [OutStgArg]
-unariseRubbish_maybe (LitRubbish torc rep)
+-- Returns @Nothing@ if the given literal is already unary (exactly
+-- one PrimRep).  Doesn't return void args.
+--
+-- This needs to exist because rubbish literals can have any representation.
+-- See also Note [Rubbish literals] in GHC.Types.Literal.
+unariseLiteral_maybe :: Literal -> Maybe [OutStgArg]
+unariseLiteral_maybe (LitRubbish torc rep)
   | [prep] <- preps
-  , not (isVoidRep prep)
+  , assert (not (isVoidRep prep)) True
   = Nothing   -- Single, non-void PrimRep. Nothing to do!
 
   | otherwise -- Multiple reps, possibly with VoidRep. Eliminate via elimCase
   = Just [ StgLitArg (LitRubbish torc (primRepToRuntimeRep prep))
-         | prep <- preps, not (isVoidRep prep) ]
+         | prep <- preps, assert (not (isVoidRep prep)) True ]
   where
-    preps = runtimeRepPrimRep (text "unariseRubbish_maybe") rep
+    preps = runtimeRepPrimRep (text "unariseLiteral_maybe") rep
 
-unariseRubbish_maybe _ = Nothing
+unariseLiteral_maybe _ = Nothing
 
 --------------------------------------------------------------------------------
 
@@ -1052,7 +1053,11 @@ unariseFunArg rho (StgVarArg x) =
     Just (MultiVal as)  -> as
     Just (UnaryVal arg) -> [arg]
     Nothing             -> [StgVarArg x]
-unariseFunArg _ arg = [arg]
+unariseFunArg _ arg@(StgLitArg lit) = case unariseLiteral_maybe lit of
+  -- forgetting to unariseLiteral_maybe here caused #23914
+  Just [] -> [voidArg]
+  Just as -> as
+  Nothing -> [arg]
 
 unariseFunArgs :: UnariseEnv -> [StgArg] -> [StgArg]
 unariseFunArgs = concatMap . unariseFunArg
@@ -1078,7 +1083,7 @@ unariseConArg rho (StgVarArg x) =
                                      -- is a void, and so should be eliminated
       | otherwise -> [StgVarArg x]
 unariseConArg _ arg@(StgLitArg lit)
-  | Just as <- unariseRubbish_maybe lit
+  | Just as <- unariseLiteral_maybe lit
   = as
   | otherwise
   = assert (not (isZeroBitTy (literalType lit))) -- We have no non-rubbish void literals


=====================================
compiler/GHC/Types/Literal.hs
=====================================
@@ -1006,8 +1006,9 @@ data type. Here are the moving parts:
    take apart a case scrutinisation on, or arg occurrence of, e.g.,
    `RUBBISH[TupleRep[IntRep,DoubleRep]]` (which may stand in for `(# Int#, Double# #)`)
    into its sub-parts `RUBBISH[IntRep]` and `RUBBISH[DoubleRep]`, similar to
-   unboxed tuples. `RUBBISH[VoidRep]` is erased.
-   See 'unariseRubbish_maybe' and also Note [Post-unarisation invariants].
+   unboxed tuples.
+
+   See 'unariseLiteral_maybe' and also Note [Post-unarisation invariants].
 
 4. Cmm: We translate 'LitRubbish' to their actual rubbish value in 'cgLit'.
    The particulars are boring, and only matter when debugging illicit use of


=====================================
compiler/GHC/Types/RepType.hs
=====================================
@@ -607,8 +607,11 @@ kindPrimRep_maybe ki
   = pprPanic "kindPrimRep" (ppr ki)
 
 -- | Take a type of kind RuntimeRep and extract the list of 'PrimRep' that
--- it encodes. See also Note [Getting from RuntimeRep to PrimRep]
--- The [PrimRep] is the final runtime representation /after/ unarisation
+-- it encodes. See also Note [Getting from RuntimeRep to PrimRep].
+-- The @[PrimRep]@ is the final runtime representation /after/ unarisation
+-- and does not contain VoidRep.
+--
+-- The result does not contain any VoidRep.
 runtimeRepPrimRep :: HasDebugCallStack => SDoc -> RuntimeRepType -> [PrimRep]
 runtimeRepPrimRep doc rr_ty
   | Just rr_ty' <- coreView rr_ty
@@ -620,9 +623,11 @@ runtimeRepPrimRep doc rr_ty
   = pprPanic "runtimeRepPrimRep" (doc $$ ppr rr_ty)
 
 -- | Take a type of kind RuntimeRep and extract the list of 'PrimRep' that
--- it encodes. See also Note [Getting from RuntimeRep to PrimRep]
--- The [PrimRep] is the final runtime representation /after/ unarisation
--- Returns Nothing if rep can't be determined. Eg. levity polymorphic types.
+-- it encodes. See also Note [Getting from RuntimeRep to PrimRep].
+-- The @[PrimRep]@ is the final runtime representation /after/ unarisation
+-- and does not contain VoidRep.
+--
+-- Returns @Nothing@ if rep can't be determined. Eg. levity polymorphic types.
 runtimeRepPrimRep_maybe :: Type -> Maybe [PrimRep]
 runtimeRepPrimRep_maybe rr_ty
   | Just rr_ty' <- coreView rr_ty


=====================================
testsuite/tests/core-to-stg/T23914.hs
=====================================
@@ -0,0 +1,18 @@
+{-# LANGUAGE UnboxedTuples #-}
+module T23914 where
+
+type Registers = (# (), () #)
+
+p :: Registers -> ()
+p x = control0 () x
+
+control0 :: () -> Registers -> ()
+control0 x = controlWithMode x
+{-# SCC control0 #-}
+
+controlWithMode :: () -> Registers -> ()
+controlWithMode x = thro x
+{-# SCC controlWithMode #-}
+
+thro :: () -> Registers -> ()
+thro x y = thro x y


=====================================
testsuite/tests/core-to-stg/all.T
=====================================
@@ -2,3 +2,4 @@
 
 test('T19700', normal, compile, ['-O'])
 test('T23270', [grep_errmsg(r'patError')], compile, ['-O0 -dsuppress-uniques -ddump-prep'])
+test('T23914', normal, compile, ['-O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/246fde0da09c4d9e0ff2c530d19efc8c2373c5f0

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/246fde0da09c4d9e0ff2c530d19efc8c2373c5f0
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230905/9791efe4/attachment-0001.html>


More information about the ghc-commits mailing list