[Git][ghc/ghc][wip/T22023] More fixes for `type data` declarations

Simon Peyton Jones (@simonpj) gitlab at gitlab.haskell.org
Thu Mar 2 17:57:12 UTC 2023



Simon Peyton Jones pushed to branch wip/T22023 at Glasgow Haskell Compiler / GHC


Commits:
18d49249 by Simon Peyton Jones at 2023-03-02T17:58:12+00:00
More fixes for `type data` declarations

This MR fixes #23022 and #23023.  Specifically

* Beef up Note [Type data declarations] in GHC.Rename.Module,
  to make invariant (I1) explicit, and to name the several
  wrinkles.

  And add references to these specific wrinkles.

* Add a Lint check for invariant (I1) above.
  See GHC.Core.Lint.checkTypeDataConOcc

* Disable the `caseRules` for dataToTag# for `type data` values.
  See Wrinkle (W2c) in the Note above.  Fixes #23023.

* Refine the assertion in dataConRepArgTys, so that it does not
  complain about the absence of a wrapper for a `type data` constructor
  Fixes #23022.

Acked-by: Simon Peyton Jones <simon.peytonjones at gmail.com>

- - - - -


10 changed files:

- compiler/GHC/Core/DataCon.hs
- compiler/GHC/Core/Lint.hs
- compiler/GHC/Core/Opt/ConstantFold.hs
- compiler/GHC/Core/Utils.hs
- compiler/GHC/HsToCore/Pmc/Solver.hs
- compiler/GHC/Rename/Module.hs
- compiler/GHC/Tc/Gen/App.hs
- + testsuite/tests/gadt/T23022.hs
- + testsuite/tests/gadt/T23023.hs
- testsuite/tests/gadt/all.T


Changes:

=====================================
compiler/GHC/Core/DataCon.hs
=====================================
@@ -1669,13 +1669,25 @@ dataConOtherTheta dc = dcOtherTheta dc
 -- evidence, after any flattening has been done and without substituting for
 -- any type variables
 dataConRepArgTys :: DataCon -> [Scaled Type]
-dataConRepArgTys (MkData { dcRep = rep
-                         , dcEqSpec = eq_spec
+dataConRepArgTys (MkData { dcRep        = rep
+                         , dcEqSpec     = eq_spec
                          , dcOtherTheta = theta
-                         , dcOrigArgTys = orig_arg_tys })
+                         , dcOrigArgTys = orig_arg_tys
+                         , dcRepTyCon   = tc })
   = case rep of
-      NoDataConRep -> assert (null eq_spec) $ map unrestricted theta ++ orig_arg_tys
       DCR { dcr_arg_tys = arg_tys } -> arg_tys
+      NoDataConRep
+        | isTypeDataTyCon tc -> assert (null theta)   $
+                                orig_arg_tys
+          -- `type data` declarations can be GADTs (and hence have an eq_spec)
+          -- but no wrapper.  They cannot have a theta.
+          -- See Note [Type data declarations] in GHC.Rename.Module
+          -- You might wonder why we ever call dataConRepArgTys for `type data`;
+          -- I think it's because of the call in mkDataCon, which in turn feeds
+          -- into dcRepArity, which in turn is used in mkDataConWorkId.
+          -- c.f. #23022
+        | otherwise          -> assert (null eq_spec) $
+                                map unrestricted theta ++ orig_arg_tys
 
 -- | The string @package:module.name@ identifying a constructor, which is attached
 -- to its info table and used by the GHCi debugger and the heap profiler


=====================================
compiler/GHC/Core/Lint.hs
=====================================
@@ -1021,6 +1021,9 @@ lintIdOcc var nargs
 
         ; checkDeadIdOcc var
         ; checkJoinOcc var nargs
+        ; case isDataConId_maybe var of
+             Nothing -> return ()
+             Just dc -> checkTypeDataConOcc "expression" dc
 
         ; usage <- varCallSiteUsage var
 
@@ -1107,6 +1110,13 @@ checkJoinOcc var n_args
   | otherwise
   = return ()
 
+checkTypeDataConOcc :: String -> DataCon -> LintM ()
+-- Check that the Id is not a data constructor of a `type data` declaration
+-- Invariant (I1) of Note [Type data declarations] in GHC.Rename.Module
+checkTypeDataConOcc what dc
+  = checkL (not (isTypeDataTyCon (dataConTyCon dc))) $
+    (text "type data constructor found in a" <+> text what <> colon <+> ppr dc)
+
 -- | This function checks that we are able to perform eta expansion for
 -- functions with no binding, in order to satisfy invariant I3
 -- from Note [Representation polymorphism invariants] in GHC.Core.
@@ -1561,10 +1571,11 @@ lintCoreAlt case_bndr scrut_ty _scrut_mult alt_ty alt@(Alt (DataAlt con) args rh
   = zeroUE <$ addErrL (mkNewTyDataConAltMsg scrut_ty alt)
   | Just (tycon, tycon_arg_tys) <- splitTyConApp_maybe scrut_ty
   = addLoc (CaseAlt alt) $  do
-    {   -- First instantiate the universally quantified
-        -- type variables of the data constructor
-        -- We've already check
-      lintL (tycon == dataConTyCon con) (mkBadConMsg tycon con)
+    { checkTypeDataConOcc "pattern" con
+    ; lintL (tycon == dataConTyCon con) (mkBadConMsg tycon con)
+
+      -- Instantiate the universally quantified
+      -- type variables of the data constructor
     ; let { con_payload_ty = piResultTys (dataConRepType con) tycon_arg_tys
           ; binderMult (Named _)   = ManyTy
           ; binderMult (Anon st _) = scaledMult st


=====================================
compiler/GHC/Core/Opt/ConstantFold.hs
=====================================
@@ -55,7 +55,7 @@ import GHC.Core.TyCo.Compare( eqType )
 import GHC.Core.TyCon
    ( tyConDataCons_maybe, isAlgTyCon, isEnumerationTyCon
    , isNewTyCon, tyConDataCons
-   , tyConFamilySize )
+   , tyConFamilySize, isTypeDataTyCon )
 import GHC.Core.Map.Expr ( eqCoreExpr )
 
 import GHC.Builtin.PrimOps ( PrimOp(..), tagToEnumKey )
@@ -3188,6 +3188,8 @@ caseRules _ (App (App (Var f) (Type ty)) v)       -- dataToTag x
   | Just DataToTagOp <- isPrimOpId_maybe f
   , Just (tc, _) <- tcSplitTyConApp_maybe ty
   , isAlgTyCon tc
+  , not (isTypeDataTyCon tc) -- See wrinkle (W2c) in GHC.Rename.Module
+                             -- Note [Type data declarations]
   = Just (v, tx_con_dtt ty
            , \v -> App (App (Var f) (Type ty)) (Var v))
 


=====================================
compiler/GHC/Core/Utils.hs
=====================================
@@ -845,7 +845,8 @@ There are two exceptions where we avoid refining a DEFAULT case:
            __DEFAULT -> ()
 
   Namely, we do _not_ want to match on `A`, as it doesn't exist at the value
-  level!
+  level! See wrinkle (W2b) in GHC.Rename.Module Note [Type data declarations]
+
 
 Note [Combine identical alternatives]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


=====================================
compiler/GHC/HsToCore/Pmc/Solver.hs
=====================================
@@ -152,8 +152,8 @@ vanillaCompleteMatchTC tc =
                tc == tYPETyCon    = Just []
              | -- Similarly, treat `type data` declarations as empty data types on
                -- the term level, as `type data` data constructors only exist at
-               -- the type level (#22964).
-               -- See Note [Type data declarations] in GHC.Rename.Module.
+               -- the type level (#22964).  See wrinkle (W2a) in
+               -- Note [Type data declarations] in GHC.Rename.Module.
                isTypeDataTyCon tc = Just []
              | otherwise          = tyConDataCons_maybe tc
   in vanillaCompleteMatch . mkUniqDSet . map RealDataCon <$> mb_dcs


=====================================
compiler/GHC/Rename/Module.hs
=====================================
@@ -2073,9 +2073,31 @@ preceded by `type`, with the following restrictions:
 
 (R5) There are no deriving clauses.
 
+The data constructors of a `type data` declaration obey the following
+Core invariant:
+
+(I1) The data constructors of a `type data` declaration may be mentioned in
+     /types/, but never in /terms/ or the /pattern of a case alternative/.
+
 The main parts of the implementation are:
 
-* (R0): The parser recognizes `type data` (but not `type newtype`).
+* Wrappers.  A `type data` declaration _never_ generates wrappers for
+  its data constructors, as they only make sense for value-level data
+  constructors.  See `wrapped_reqd` in GHC.Types.Id.Make.mkDataConRep`
+  for the place where this check is implemented.
+
+  This includes `type data` declarations implemented as GADTs, such as
+  this example from #22948:
+
+    type data T a where
+      A :: T Int
+      B :: T a
+
+  If `T` were an ordinary `data` declaration, then `A` would have a wrapper
+  to account for the GADT-like equality in its return type. Because `T` is
+  declared as a `type data` declaration, however, the wrapper is omitted.
+
+* The parser recognizes `type data` (but not `type newtype`); this ensures (R0).
 
 * During the initial construction of the AST,
   GHC.Parser.PostProcess.checkNewOrData sets the `Bool` argument of the
@@ -2134,54 +2156,48 @@ The main parts of the implementation are:
   `type data` declarations.  When these are converted back to Hs types
   in a splice, the constructors are placed in the TcCls namespace.
 
-* A `type data` declaration _never_ generates wrappers for its data
-  constructors, as they only make sense for value-level data constructors.
-  See `wrapped_reqd` in GHC.Types.Id.Make.mkDataConRep` for the place where
-  this check is implemented.
+Extra wrinkles:
 
-  This includes `type data` declarations implemented as GADTs, such as
-  this example from #22948:
+(W1) To prevent users from conjuring up `type data` values at the term level,
+     we disallow using the tagToEnum# function on a type headed by a `type
+     data` type. For instance, GHC will reject this code:
 
-    type data T a where
-      A :: T Int
-      B :: T a
+       type data Letter = A | B | C
 
-  If `T` were an ordinary `data` declaration, then `A` would have a wrapper
-  to account for the GADT-like equality in its return type. Because `T` is
-  declared as a `type data` declaration, however, the wrapper is omitted.
-
-* Although `type data` data constructors do not exist at the value level,
-  it is still possible to match on a value whose type is headed by a `type data`
-  type constructor, such as this example from #22964:
-
-    type data T a where
-      A :: T Int
-      B :: T a
+       f :: Letter
+       f = tagToEnum# 0#
 
-    f :: T a -> ()
-    f x = case x of {}
+     See `GHC.Tc.Gen.App.checkTagToEnum`, specifically `check_enumeration`.
 
-  This has two consequences:
+(W2) Although `type data` data constructors do not exist at the value level,
+     it is still possible to match on a value whose type is headed by a `type data`
+     type constructor, such as this example from #22964:
 
-  * During checking the coverage of `f`'s pattern matches, we treat `T` as if it
-    were an empty data type so that GHC does not warn the user to match against
-    `A` or `B`. (Otherwise, you end up with the bug reported in #22964.)
-    See GHC.HsToCore.Pmc.Solver.vanillaCompleteMatchTC.
+       type data T a where
+         A :: T Int
+         B :: T a
 
-  * In `GHC.Core.Utils.refineDataAlt`, do /not/ fill in the DEFAULT case with
-    the data constructor. See
-    Note [Refine DEFAULT case alternatives] Exception 2, in GHC.Core.Utils.
+       f :: T a -> ()
+       f x = case x of {}
 
-* To prevent users from conjuring up `type data` values at the term level, we
-  disallow using the tagToEnum# function on a type headed by a `type data`
-  type. For instance, GHC will reject this code:
+     And yet we must guarantee invariant (I1). This has three consequences:
 
-    type data Letter = A | B | C
+     (W2a) During checking the coverage of `f`'s pattern matches, we treat `T`
+           as if it were an empty data type so that GHC does not warn the user
+           to match against `A` or `B`. (Otherwise, you end up with the bug
+           reported in #22964.)  See GHC.HsToCore.Pmc.Solver.vanillaCompleteMatchTC.
 
-    f :: Letter
-    f = tagToEnum# 0#
+     (W2b) In `GHC.Core.Utils.refineDataAlt`, do /not/ fill in the DEFAULT case
+           with the data constructor, else (I1) is violated. See GHC.Core.Utils
+           Note [Refine DEFAULT case alternatives] Exception 2
 
-  See `GHC.Tc.Gen.App.checkTagToEnum`, specifically `check_enumeration`.
+     (W2c) In `GHC.Core.Opt.ConstantFold.caseRules`, disable the rule for
+           `dataToTag#` in the case of `type data`.  We do not want to transform
+              case dataToTag# x of t -> blah
+           into
+              case x of { A -> ...; B -> .. }
+           because again that conjures up the type-level-only data contructors
+           `A` and `B` in a pattern, violating (I1) (#23023).
 -}
 
 warnNoDerivStrat :: Maybe (LDerivStrategy GhcRn)


=====================================
compiler/GHC/Tc/Gen/App.hs
=====================================
@@ -1222,7 +1222,8 @@ tcTagToEnum tc_fun fun_ctxt tc_args res_ty
     vanilla_result = rebuildHsApps tc_fun fun_ctxt tc_args res_ty
 
     check_enumeration ty' tc
-      | -- isTypeDataTyCon: see Note [Type data declarations] in GHC.Rename.Module
+      | -- isTypeDataTyCon: see wrinkle (W1) in
+        -- Note [Type data declarations] in GHC.Rename.Module
         isTypeDataTyCon tc    = addErrTc (TcRnTagToEnumResTyTypeData ty')
       | isEnumerationTyCon tc = return ()
       | otherwise             = addErrTc (TcRnTagToEnumResTyNotAnEnum ty')


=====================================
testsuite/tests/gadt/T23022.hs
=====================================
@@ -0,0 +1,8 @@
+{-# LANGUAGE TypeData #-}
+module B where
+
+type data T a b where
+  MkT :: T a a
+
+f :: T a b -> T a b
+f x = x


=====================================
testsuite/tests/gadt/T23023.hs
=====================================
@@ -0,0 +1,12 @@
+{-# LANGUAGE TypeData, MagicHash #-}
+module B where
+
+import GHC.Exts
+
+type data T a b where
+  MkT :: T a a
+
+f :: T Int Bool -> Char
+f x = case dataToTag# x of
+         0# -> 'a'
+         _  -> 'b'


=====================================
testsuite/tests/gadt/all.T
=====================================
@@ -129,3 +129,5 @@ test('T22235', normal, compile, [''])
 test('T19847', normal, compile, [''])
 test('T19847a', normal, compile, ['-ddump-types'])
 test('T19847b', normal, compile, [''])
+test('T23022', normal, compile, ['-dcore-lint'])
+test('T23023', normal, compile, ['-dcore-lint'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/18d492497f781722ef40c413d86cdf16448569f2

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/18d492497f781722ef40c413d86cdf16448569f2
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230302/a666fa86/attachment-0001.html>


More information about the ghc-commits mailing list