[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 4 commits: Type-check default declarations before deriving clauses (#24566)

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Thu Mar 21 20:12:01 UTC 2024



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
3de938eb by Sylvain Henry at 2024-03-21T16:11:43-04:00
Type-check default declarations before deriving clauses (#24566)

See added Note and #24566. Default declarations must be type-checked
before deriving clauses.

- - - - -
81eb4628 by Sylvain Henry at 2024-03-21T16:11:46-04:00
Lexer: small perf changes

- Use unsafeChr because we know our values to be valid
- Remove some unnecessary use of `ord` (return Word8 values directly)

- - - - -
ae52eca6 by Sylvain Henry at 2024-03-21T16:11:46-04:00
JS: fix some comments

- - - - -
fbad35ab by Sebastian Graf at 2024-03-21T16:11:47-04:00
Simplifier: Re-do dependency analysis in abstractFloats (#24551)

In #24551, we abstracted a string literal binding over a type variable,
triggering a CoreLint error when that binding floated to top-level.

The solution implemented in this patch fixes this by re-doing dependency
analysis on a simplified recursive let binding that is about to be type
abstracted, in order to find the minimal set of type variables to abstract over.
See wrinkle (AB5) of Note [Floating and type abstraction] for more details.

Fixes #24551

- - - - -


10 changed files:

- compiler/GHC/Core/Opt/Simplify/Utils.hs
- compiler/GHC/Parser/Lexer.x
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/Tc/Module.hs
- libraries/ghc-internal/src/GHC/Internal/Maybe.hs
- + testsuite/tests/simplCore/should_compile/T24551.hs
- testsuite/tests/simplCore/should_compile/all.T
- testsuite/tests/typecheck/should_compile/Makefile
- + testsuite/tests/typecheck/should_compile/T24566.hs
- testsuite/tests/typecheck/should_compile/all.T


Changes:

=====================================
compiler/GHC/Core/Opt/Simplify/Utils.hs
=====================================
@@ -85,6 +85,8 @@ import GHC.Utils.Panic
 
 import Control.Monad    ( when )
 import Data.List        ( sortBy )
+import GHC.Types.Name.Env
+import Data.Graph
 
 {- *********************************************************************
 *                                                                      *
@@ -2108,6 +2110,27 @@ new binding is abstracted.  Several points worth noting
       which showed that it's harder to do polymorphic specialisation well
       if there are dictionaries abstracted over unnecessary type variables.
       See Note [Weird special case for SpecDict] in GHC.Core.Opt.Specialise
+
+(AB5) We do dependency analysis on recursive groups prior to determining
+      which variables to abstract over.
+      This is useful, because ANFisation in prepareBinding may float out
+      values out of a complex recursive binding, e.g.,
+          letrec { xs = g @a "blah"# ((:) 1 []) xs } in ...
+        ==> { prepareBinding }
+          letrec { foo = "blah"#
+                   bar = [42]
+                   xs = g @a foo bar xs } in
+          ...
+      and we don't want to abstract foo and bar over @a.
+
+      (Why is it OK to float the unlifted `foo` there?
+      See Note [Core top-level string literals] in GHC.Core;
+      it is controlled by GHC.Core.Opt.Simplify.Env.unitLetFloat.)
+
+      It is also necessary to do dependency analysis, because
+      otherwise (in #24551) we might get `foo = \@_ -> "missing"#` at the
+      top-level, and that triggers a CoreLint error because `foo` is *not*
+      manifestly a literal string.
 -}
 
 abstractFloats :: UnfoldingOpts -> TopLevelFlag -> [OutTyVar] -> SimplFloats
@@ -2115,15 +2138,27 @@ abstractFloats :: UnfoldingOpts -> TopLevelFlag -> [OutTyVar] -> SimplFloats
 abstractFloats uf_opts top_lvl main_tvs floats body
   = assert (notNull body_floats) $
     assert (isNilOL (sfJoinFloats floats)) $
-    do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
+    do  { let sccs = concatMap to_sccs body_floats
+        ; (subst, float_binds) <- mapAccumLM abstract empty_subst sccs
         ; return (float_binds, GHC.Core.Subst.substExpr subst body) }
   where
     is_top_lvl  = isTopLevel top_lvl
     body_floats = letFloatBinds (sfLetFloats floats)
     empty_subst = GHC.Core.Subst.mkEmptySubst (sfInScope floats)
 
-    abstract :: GHC.Core.Subst.Subst -> OutBind -> SimplM (GHC.Core.Subst.Subst, OutBind)
-    abstract subst (NonRec id rhs)
+    -- See wrinkle (AB5) in Note [Which type variables to abstract over]
+    -- for why we need to re-do dependency analysis
+    to_sccs :: OutBind -> [SCC (Id, CoreExpr, VarSet)]
+    to_sccs (NonRec id e) = [AcyclicSCC (id, e, emptyVarSet)] -- emptyVarSet: abstract doesn't need it
+    to_sccs (Rec prs)     = sccs
+      where
+        (ids,rhss) = unzip prs
+        sccs = depAnal (\(id,_rhs,_fvs) -> [getName id])
+                       (\(_id,_rhs,fvs) -> nonDetStrictFoldVarSet ((:) . getName) [] fvs) -- Wrinkle (AB3)
+                       (zip3 ids rhss (map exprFreeVars rhss))
+
+    abstract :: GHC.Core.Subst.Subst -> SCC (Id, CoreExpr, VarSet) -> SimplM (GHC.Core.Subst.Subst, OutBind)
+    abstract subst (AcyclicSCC (id, rhs, _empty_var_set))
       = do { (poly_id1, poly_app) <- mk_poly1 tvs_here id
            ; let (poly_id2, poly_rhs) = mk_poly2 poly_id1 tvs_here rhs'
                  !subst' = GHC.Core.Subst.extendIdSubst subst id poly_app
@@ -2134,7 +2169,7 @@ abstractFloats uf_opts top_lvl main_tvs floats body
         -- tvs_here: see Note [Which type variables to abstract over]
         tvs_here = choose_tvs (exprSomeFreeVars isTyVar rhs')
 
-    abstract subst (Rec prs)
+    abstract subst (CyclicSCC trpls)
       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly1 tvs_here) ids
            ; let subst' = GHC.Core.Subst.extendSubstList subst (ids `zip` poly_apps)
                  poly_pairs = [ mk_poly2 poly_id tvs_here rhs'
@@ -2142,15 +2177,15 @@ abstractFloats uf_opts top_lvl main_tvs floats body
                               , let rhs' = GHC.Core.Subst.substExpr subst' rhs ]
            ; return (subst', Rec poly_pairs) }
       where
-        (ids,rhss) = unzip prs
-
+        (ids,rhss,_fvss) = unzip3 trpls
 
         -- tvs_here: see Note [Which type variables to abstract over]
-        tvs_here = choose_tvs (mapUnionVarSet get_bind_fvs prs)
+        tvs_here = choose_tvs (mapUnionVarSet get_bind_fvs trpls)
 
         -- See wrinkle (AB4) in Note [Which type variables to abstract over]
-        get_bind_fvs (id,rhs) = tyCoVarsOfType (idType id) `unionVarSet` get_rec_rhs_tvs rhs
-        get_rec_rhs_tvs rhs   = nonDetStrictFoldVarSet get_tvs emptyVarSet (exprFreeVars rhs)
+        get_bind_fvs (id,_rhs,rhs_fvs) = tyCoVarsOfType (idType id) `unionVarSet` get_rec_rhs_tvs rhs_fvs
+        get_rec_rhs_tvs rhs_fvs        = nonDetStrictFoldVarSet get_tvs emptyVarSet rhs_fvs
+                                  -- nonDet is safe because of wrinkle (AB3)
 
         get_tvs :: Var -> VarSet -> VarSet
         get_tvs var free_tvs


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -2814,19 +2814,19 @@ characters into single bytes.
 
 {-# INLINE adjustChar #-}
 adjustChar :: Char -> Word8
-adjustChar c = fromIntegral $ ord adj_c
-  where non_graphic     = '\x00'
-        upper           = '\x01'
-        lower           = '\x02'
-        digit           = '\x03'
-        symbol          = '\x04'
-        space           = '\x05'
-        other_graphic   = '\x06'
-        uniidchar       = '\x07'
+adjustChar c = adj_c
+  where non_graphic     = 0x00
+        upper           = 0x01
+        lower           = 0x02
+        digit           = 0x03
+        symbol          = 0x04
+        space           = 0x05
+        other_graphic   = 0x06
+        uniidchar       = 0x07
 
         adj_c
           | c <= '\x07' = non_graphic
-          | c <= '\x7f' = c
+          | c <= '\x7f' = fromIntegral (ord c)
           -- Alex doesn't handle Unicode, so when Unicode
           -- character is encountered we output these values
           -- with the actual character value hidden in the state.
@@ -2866,15 +2866,18 @@ adjustChar c = fromIntegral $ ord adj_c
 --
 -- See Note [Unicode in Alex] and #13986.
 alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ buf) = chr (fromIntegral (adjustChar pc))
+alexInputPrevChar (AI _ buf) = unsafeChr (fromIntegral (adjustChar pc))
   where pc = prevChar buf '\n'
 
+unsafeChr :: Int -> Char
+unsafeChr (I# c) = C# (chr# c)
+
 -- backwards compatibility for Alex 2.x
 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
 alexGetChar inp = case alexGetByte inp of
                     Nothing    -> Nothing
                     Just (b,i) -> c `seq` Just (c,i)
-                       where c = chr $ fromIntegral b
+                       where c = unsafeChr $ fromIntegral b
 
 -- See Note [Unicode in Alex]
 alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)


=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -738,7 +738,7 @@ writeExterns out = writeFile (out </> "all.js.externs")
 
 -- | Get all block dependencies for a given set of roots
 --
--- Returns the update block info map and the blocks.
+-- Returns the updated block info map and the blocks.
 getDeps :: Map Module LocatedBlockInfo     -- ^ Block info per module
         -> (Module -> IO LocatedBlockInfo) -- ^ Used to load block info if missing
         -> Set ExportedFun                 -- ^ start here
@@ -754,7 +754,7 @@ getDeps init_infos load_info root_funs root_blocks = traverse_funs init_infos S.
     --  1. We use the BlockInfos to find the block corresponding to every
     --  exported root functions.
     --
-    --  2. We had these blocks to the set of root_blocks if they aren't already
+    --  2. We add these blocks to the set of root_blocks if they aren't already
     --  added to the result.
     --
     --  3. Then we traverse the root_blocks to find their dependencies and we


=====================================
compiler/GHC/Tc/Module.hs
=====================================
@@ -709,7 +709,7 @@ tcRnHsBootDecls boot_or_sig decls
                 -- Typecheck type/class/instance decls
         ; traceTc "Tc2 (boot)" empty
         ; (tcg_env, inst_infos, _deriv_binds, _th_bndrs)
-             <- tcTyClsInstDecls tycl_decls deriv_decls val_binds
+             <- tcTyClsInstDecls tycl_decls deriv_decls def_decls val_binds
         ; setGblEnv tcg_env     $ do {
 
         -- Emit Typeable bindings
@@ -1612,7 +1612,7 @@ tcTopSrcDecls (HsGroup { hs_tyclds = tycl_decls,
         traceTc "Tc3" empty ;
         (tcg_env, inst_infos, th_bndrs,
          XValBindsLR (NValBinds deriv_binds deriv_sigs))
-            <- tcTyClsInstDecls tycl_decls deriv_decls val_binds ;
+            <- tcTyClsInstDecls tycl_decls deriv_decls default_decls val_binds ;
 
         updLclCtxt (\tcl_env -> tcl_env { tcl_th_bndrs = th_bndrs `plusNameEnv` tcl_th_bndrs tcl_env }) $
         setGblEnv tcg_env       $ do {
@@ -1622,11 +1622,6 @@ tcTopSrcDecls (HsGroup { hs_tyclds = tycl_decls,
         (fi_ids, fi_decls, fi_gres) <- tcForeignImports foreign_decls ;
         tcExtendGlobalValEnv fi_ids     $ do {
 
-                -- Default declarations
-        traceTc "Tc4a" empty ;
-        default_tys <- tcDefaults default_decls ;
-        updGblEnv (\gbl -> gbl { tcg_default = default_tys }) $ do {
-
                 -- Value declarations next.
                 -- It is important that we check the top-level value bindings
                 -- before the GHC-generated derived bindings, since the latter
@@ -1686,13 +1681,14 @@ tcTopSrcDecls (HsGroup { hs_tyclds = tycl_decls,
         addUsedGREs NoDeprecationWarnings (bagToList fo_gres) ;
 
         return (tcg_env', tcl_env)
-    }}}}}}
+    }}}}}
 
 tcTopSrcDecls _ = panic "tcTopSrcDecls: ValBindsIn"
 
 ---------------------------
 tcTyClsInstDecls :: [TyClGroup GhcRn]
                  -> [LDerivDecl GhcRn]
+                 -> [LDefaultDecl GhcRn]
                  -> [(RecFlag, LHsBinds GhcRn)]
                  -> TcM (TcGblEnv,            -- The full inst env
                          [InstInfo GhcRn],    -- Source-code instance decls to
@@ -1702,16 +1698,24 @@ tcTyClsInstDecls :: [TyClGroup GhcRn]
                           HsValBinds GhcRn)   -- Supporting bindings for derived
                                               -- instances
 
-tcTyClsInstDecls tycl_decls deriv_decls binds
+tcTyClsInstDecls tycl_decls deriv_decls default_decls binds
  = tcAddDataFamConPlaceholders (tycl_decls >>= group_instds) $
    tcAddPatSynPlaceholders (getPatSynBinds binds) $
    do { (tcg_env, inst_info, deriv_info, th_bndrs)
           <- tcTyAndClassDecls tycl_decls ;
+
       ; setGblEnv tcg_env $ do {
+
           -- With the @TyClDecl at s and @InstDecl at s checked we're ready to
           -- process the deriving clauses, including data family deriving
           -- clauses discovered in @tcTyAndClassDecls at .
           --
+          -- But only after we've typechecked 'default' declarations.
+          -- See Note [Typechecking default declarations]
+          default_tys <- tcDefaults default_decls ;
+          updGblEnv (\gbl -> gbl { tcg_default = default_tys }) $ do {
+
+
           -- Careful to quit now in case there were instance errors, so that
           -- the deriving errors don't pile up as well.
           ; failIfErrsM
@@ -1720,7 +1724,7 @@ tcTyClsInstDecls tycl_decls deriv_decls binds
           ; setGblEnv tcg_env' $ do {
                 failIfErrsM
               ; pure ( tcg_env', inst_info' ++ inst_info, th_bndrs, val_binds )
-      }}}
+      }}}}
 
 {- *********************************************************************
 *                                                                      *
@@ -3141,3 +3145,43 @@ mark_plugin_unsafe dflags = unless (gopt Opt_PluginTrustworthy dflags) $
     pluginUnsafe =
       singleMessage $
       mkPlainMsgEnvelope diag_opts noSrcSpan TcRnUnsafeDueToPlugin
+
+
+-- Note [Typechecking default declarations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Typechecking default declarations requires careful placement:
+--
+-- 1. We must check them after types (tcTyAndClassDecls) because they can refer
+-- to them. E.g.
+--
+--    data T = MkT ...
+--    default(Int, T, Integer)
+--
+--    -- or even (tested by T11974b and T2245)
+--    default(Int, T, Integer)
+--    data T = MkT ...
+--
+-- 2. We must check them before typechecking deriving clauses (tcInstDeclsDeriv)
+-- otherwise we may lookup default default types (Integer, Double) while checking
+-- deriving clauses, ignoring the default declaration.
+--
+-- Before this careful placement (#24566), compiling the following example
+-- (T24566) with "-ddump-if-trace -ddump-tc-trace" showed a call to
+-- `applyDefaultingRules` with default types set to "(Integer,Double)":
+--
+--     module M where
+--
+--     import GHC.Classes
+--     default ()
+--
+--     data Foo a = Nothing | Just a
+--       deriving (Eq, Ord)
+--
+-- This was an issue while building modules like M in the ghc-internal package
+-- because they would spuriously fail to build if the module defining Integer
+-- (ghc-bignum:GHC.Num.Integer) wasn't compiled yet and its interface not to be
+-- found. The implicit dependency between M and GHC.Num.Integer isn't known to
+-- the build system.
+-- In addition, trying to explicitly avoid the implicit dependency with `default
+-- ()` didn't work, except if *standalone* deriving was used, which was an
+-- inconsistent behavior.


=====================================
libraries/ghc-internal/src/GHC/Internal/Maybe.hs
=====================================
@@ -28,14 +28,5 @@ default ()
 --
 data  Maybe a  =  Nothing | Just a
   deriving ( Eq  -- ^ @since base-2.01
-
-           --, Ord -- ^ @since base-2.01
+           , Ord -- ^ @since base-2.01
            )
-
--- ???
--- A non-standalone instance will slurp the interface file for GHC.Num.Integer.
-  -- During simplifyInstanceContexts, a call to GHC.Tc.Utils.Env.tcGetDefaultTys
-  -- apparently sees mb_defaults = Nothing and thus tries to bring in the
-  -- default "default" types, including Integer.  This seems wrong.
-deriving instance Ord a => Ord (Maybe a) -- ^ @since base-2.01
-


=====================================
testsuite/tests/simplCore/should_compile/T24551.hs
=====================================
@@ -0,0 +1,15 @@
+{-# LANGUAGE MagicHash #-}
+module T24551 (f) where
+
+import GHC.Exts
+
+f :: a -> a
+f = repeatFB g
+
+repeatFB :: (Addr# -> (a -> a) -> a -> a) -> a -> a
+repeatFB c = let xs = c "missing"# xs in xs
+{-# INLINE [0] repeatFB #-}
+
+g :: Addr# -> (a -> a) -> a -> a
+g _ _ x = x
+{-# NOINLINE g #-}


=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -514,3 +514,4 @@ test('T23209', [extra_files(['T23209_Aux.hs'])], multimod_compile, ['T23209', '-
 test('T24229a', [ grep_errmsg(r'wfoo') ], compile, ['-O2 -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques -dppr-cols=99999'])
 test('T24229b', [ grep_errmsg(r'wfoo') ], compile, ['-O2 -ddump-simpl -dno-typeable-binds -dsuppress-all -dsuppress-uniques -dppr-cols=99999'])
 test('T24370', normal, compile, ['-O'])
+test('T24551', normal, compile, ['-O -dcore-lint'])


=====================================
testsuite/tests/typecheck/should_compile/Makefile
=====================================
@@ -122,3 +122,7 @@ InlinePatSyn_ExplicitBidiBuilder:
 InlinePatSyn_ExplicitBidiMatcher:
 	$(RM) -f InlinePatSyn_ExplicitBidiMatcher.o InlinePatSyn_ExplicitBidiMatcher.hi
 	'$(TEST_HC)' $(TEST_HC_OPTS) -c InlinePatSyn_ExplicitBidiMatcher.hs  -O -dsuppress-all -ddump-simpl | sed -n '/^test/,/^$$/p' | grep -vq 'Pattern'
+
+T24566:
+	'$(TEST_HC)' $(TEST_HC_OPTS) -c T24566.hs -fno-code -dno-typeable-binds -ddump-if-trace 2>&1 | grep Integer || true
+         # Not expecting any mention of Integer in the interface loading trace


=====================================
testsuite/tests/typecheck/should_compile/T24566.hs
=====================================
@@ -0,0 +1,7 @@
+module M where
+
+import GHC.Classes
+default ()
+
+data Foo a = Nothing | Just a
+  deriving (Eq, Ord)


=====================================
testsuite/tests/typecheck/should_compile/all.T
=====================================
@@ -913,3 +913,4 @@ test('T17594a', req_th, compile, [''])
 test('T17594f', normal, compile, [''])
 test('WarnDefaultedExceptionContext', normal, compile, ['-Wdefaulted-exception-context'])
 test('T24470b', normal, compile, [''])
+test('T24566', [], makefile_test, [])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c7543596dfba03a733aedee6683621721088b9b1...fbad35abf2a06c8890d932e03782de507e169ec8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c7543596dfba03a733aedee6683621721088b9b1...fbad35abf2a06c8890d932e03782de507e169ec8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240321/20353f04/attachment-0001.html>


More information about the ghc-commits mailing list