[Git][ghc/ghc][wip/T23952] 3 commits: Profiling: Properly escape characters when using `-pj`.

Simon Peyton Jones (@simonpj) gitlab at gitlab.haskell.org
Fri Sep 15 14:29:42 UTC 2023



Simon Peyton Jones pushed to branch wip/T23952 at Glasgow Haskell Compiler / GHC


Commits:
e5c00092 by Andreas Klebinger at 2023-09-14T08:57:43-04:00
Profiling: Properly escape characters when using `-pj`.

There are some ways in which unusual characters like quotes or others
can make it into cost centre names. So properly escape these.

Fixes #23924

- - - - -
ec490578 by Ellie Hermaszewska at 2023-09-14T08:58:24-04:00
Use clearer example variable names for bool eliminator

- - - - -
37fde1b3 by Simon Peyton Jones at 2023-09-15T15:29:31+01:00
Use correct FunTyFlag in adjustJoinPointType

As the Lint error in #23952 showed, the function adjustJoinPointType
was failing to adjust the FunTyFlag when adjusting the type.

I don't think this caused the seg-fault reported in the ticket,
but it is definitely.  This patch fixes it.

It is tricky to come up a small test case; Krzysztof came up with
this one, but it only triggers a failure in GHC 9.6.

- - - - -


8 changed files:

- compiler/GHC/Core/Opt/Simplify/Env.hs
- compiler/GHC/Core/Type.hs
- compiler/GHC/Types/Var.hs
- libraries/base/Data/Bool.hs
- rts/ProfilerReportJson.c
- + testsuite/tests/simplCore/should_compile/T23952.hs
- + testsuite/tests/simplCore/should_compile/T23952a.hs
- testsuite/tests/simplCore/should_compile/all.T


Changes:

=====================================
compiler/GHC/Core/Opt/Simplify/Env.hs
=====================================
@@ -58,29 +58,33 @@ import GHC.Core.Opt.Simplify.Monad
 import GHC.Core.Rules.Config ( RuleOpts(..) )
 import GHC.Core
 import GHC.Core.Utils
-import GHC.Core.Multiplicity     ( scaleScaled )
 import GHC.Core.Unfold
 import GHC.Core.TyCo.Subst (emptyIdSubstEnv)
+import GHC.Core.Multiplicity( Scaled(..), mkMultMul )
+import GHC.Core.Make            ( mkWildValBinder, mkCoreLet )
+import GHC.Core.Type hiding     ( substTy, substTyVar, substTyVarBndr, substCo
+                                , extendTvSubst, extendCvSubst )
+import qualified GHC.Core.Coercion as Coercion
+import GHC.Core.Coercion hiding ( substCo, substCoVar, substCoVarBndr )
+import qualified GHC.Core.Type as Type
+
 import GHC.Types.Var
 import GHC.Types.Var.Env
 import GHC.Types.Var.Set
+import GHC.Types.Id as Id
+import GHC.Types.Basic
+import GHC.Types.Unique.FM      ( pprUniqFM )
+
 import GHC.Data.OrdList
 import GHC.Data.Graph.UnVar
-import GHC.Types.Id as Id
-import GHC.Core.Make            ( mkWildValBinder, mkCoreLet )
+
 import GHC.Builtin.Types
-import qualified GHC.Core.Type as Type
-import GHC.Core.Type hiding     ( substTy, substTyVar, substTyVarBndr, substCo
-                                , extendTvSubst, extendCvSubst )
-import qualified GHC.Core.Coercion as Coercion
-import GHC.Core.Coercion hiding ( substCo, substCoVar, substCoVarBndr )
 import GHC.Platform ( Platform )
-import GHC.Types.Basic
+
 import GHC.Utils.Monad
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Utils.Misc
-import GHC.Types.Unique.FM      ( pprUniqFM )
 
 import Data.List ( intersperse, mapAccumL )
 
@@ -1170,21 +1174,34 @@ adjustJoinPointType mult new_res_ty join_id
   = assert (isJoinId join_id) $
     setIdType join_id new_join_ty
   where
-    orig_ar = idJoinArity join_id
-    orig_ty = idType join_id
-
-    new_join_ty = go orig_ar orig_ty :: Type
+    join_arity = idJoinArity join_id
+    orig_ty    = idType join_id
+    res_torc   = typeTypeOrConstraint new_res_ty :: TypeOrConstraint
+
+    new_join_ty = go join_arity orig_ty :: Type
+
+    go :: JoinArity -> Type -> Type
+    go n ty
+      | n == 0
+      = new_res_ty
+
+      | Just (arg_bndr, body_ty) <- splitPiTy_maybe ty
+      , let body_ty' = go (n-1) body_ty
+      = case arg_bndr of
+          Named b                          -> mkForAllTy b body_ty'
+          Anon (Scaled arg_mult arg_ty) af -> mkFunTy af' arg_mult' arg_ty body_ty'
+              where
+                -- Using "!": See Note [Bangs in the Simplifier]
+                -- mkMultMul: see Note [Scaling join point arguments]
+                !arg_mult' = arg_mult `mkMultMul` mult
+
+                -- the new_res_ty might be ConstraintLike while the original
+                -- one was TypeLike.  So we may need to adjust the FunTyFlag.
+                -- (see #23952)
+                !af' = mkFunTyFlag (funTyFlagArgTypeOrConstraint af) res_torc
 
-    go 0 _  = new_res_ty
-    go n ty | Just (arg_bndr, res_ty) <- splitPiTy_maybe ty
-            = mkPiTy (scale_bndr arg_bndr) $
-              go (n-1) res_ty
-            | otherwise
-            = pprPanic "adjustJoinPointType" (ppr orig_ar <+> ppr orig_ty)
-
-    -- See Note [Bangs in the Simplifier]
-    scale_bndr (Anon t af) = (Anon $! (scaleScaled mult t)) af
-    scale_bndr b@(Named _) = b
+      | otherwise
+      = pprPanic "adjustJoinPointType" (ppr join_arity <+> ppr orig_ty)
 
 {- Note [Scaling join point arguments]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


=====================================
compiler/GHC/Core/Type.hs
=====================================
@@ -2567,12 +2567,12 @@ Here are the key kinding rules for types
           -- in GHC.Builtin.Types.Prim
 
           torc is TYPE or CONSTRAINT
-          ty : torc rep
+          ty : body_torc rep
           ki : Type
           `a` is a type variable
           `a` is not free in rep
 (FORALL1) -----------------------
-          forall (a::ki). ty : torc rep
+          forall (a::ki). ty : body_torc rep
 
           torc is TYPE or CONSTRAINT
           ty : body_torc rep


=====================================
compiler/GHC/Types/Var.hs
=====================================
@@ -76,7 +76,7 @@ module GHC.Types.Var (
         mkFunTyFlag, visArg, invisArg,
         visArgTypeLike, visArgConstraintLike,
         invisArgTypeLike, invisArgConstraintLike,
-        funTyFlagResultTypeOrConstraint,
+        funTyFlagArgTypeOrConstraint, funTyFlagResultTypeOrConstraint,
         TypeOrConstraint(..),  -- Re-export this: it's an argument of FunTyFlag
 
         -- * PiTyBinder
@@ -609,6 +609,12 @@ isFUNArg :: FunTyFlag -> Bool
 isFUNArg FTF_T_T = True
 isFUNArg _       = False
 
+funTyFlagArgTypeOrConstraint :: FunTyFlag -> TypeOrConstraint
+-- Whether it /takes/ a type or a constraint
+funTyFlagArgTypeOrConstraint FTF_T_T = TypeLike
+funTyFlagArgTypeOrConstraint FTF_T_C = TypeLike
+funTyFlagArgTypeOrConstraint _       = ConstraintLike
+
 funTyFlagResultTypeOrConstraint :: FunTyFlag -> TypeOrConstraint
 -- Whether it /returns/ a type or a constraint
 funTyFlagResultTypeOrConstraint FTF_T_T = TypeLike


=====================================
libraries/base/Data/Bool.hs
=====================================
@@ -31,10 +31,10 @@ import GHC.Base
 -- $setup
 -- >>> import Prelude
 
--- | Case analysis for the 'Bool' type. @'bool' x y p@ evaluates to @x@
--- when @p@ is 'False', and evaluates to @y@ when @p@ is 'True'.
+-- | Case analysis for the 'Bool' type. @'bool' f t p@ evaluates to @f@
+-- when @p@ is 'False', and evaluates to @t@ when @p@ is 'True'.
 --
--- This is equivalent to @if p then y else x@; that is, one can
+-- This is equivalent to @if p then t else f@; that is, one can
 -- think of it as an if-then-else construct with its arguments
 -- reordered.
 --
@@ -49,14 +49,14 @@ import GHC.Base
 -- >>> bool "foo" "bar" False
 -- "foo"
 --
--- Confirm that @'bool' x y p@ and @if p then y else x@ are
+-- Confirm that @'bool' f t p@ and @if p then t else f@ are
 -- equivalent:
 --
--- >>> let p = True; x = "bar"; y = "foo"
--- >>> bool x y p == if p then y else x
+-- >>> let p = True; f = "bar"; t = "foo"
+-- >>> bool f t p == if p then t else f
 -- True
 -- >>> let p = False
--- >>> bool x y p == if p then y else x
+-- >>> bool f t p == if p then t else f
 -- True
 --
 bool :: a -> a -> Bool -> a


=====================================
rts/ProfilerReportJson.c
=====================================
@@ -17,36 +17,178 @@
 
 #include <string.h>
 
-// I don't think this code is all that perf critical.
-// So we just allocate a new buffer each time around.
+// Including zero byte
+static size_t escaped_size(char const* str)
+{
+    size_t escaped_size = 0;
+    for (; *str != '\0'; str++) {
+        const unsigned char c = *str;
+        switch (c)
+            {
+                // quotation mark (0x22)
+                case '"':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                case '\\':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                // backspace (0x08)
+                case '\b':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                // formfeed (0x0c)
+                case '\f':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                // newline (0x0a)
+                case '\n':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                // carriage return (0x0d)
+                case '\r':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                // horizontal tab (0x09)
+                case '\t':
+                {
+                    escaped_size += 2;
+                    break;
+                }
+
+                default:
+                {
+                    if (c <= 0x1f)
+                    {
+                        // print character c as \uxxxx
+                        escaped_size += 6;
+                    }
+                    else
+                    {
+                        escaped_size ++;
+                    }
+                    break;
+                }
+            }
+    }
+    escaped_size++; // null byte
+
+    return escaped_size;
+}
+
 static void escapeString(char const* str, char **buf)
 {
     char *out;
-    size_t req_size; //Max required size for decoding.
-    size_t in_size;  //Input size, including zero.
-
-    in_size = strlen(str) + 1;
-    // The strings are generally small and short
-    // lived so should be ok to just double the size.
-    req_size = in_size * 2;
-    out = stgMallocBytes(req_size, "writeCCSReportJson");
-    *buf = out;
-    // We provide an outputbuffer twice the size of the input,
-    // and at worse double the output size. So we can skip
-    // length checks.
+    size_t out_size; //Max required size for decoding.
+    size_t pos = 0;
+
+    out_size = escaped_size(str); //includes trailing zero byte
+    out = stgMallocBytes(out_size, "writeCCSReportJson");
     for (; *str != '\0'; str++) {
-        char c = *str;
-        if (c == '\\') {
-            *out = '\\'; out++;
-            *out = '\\'; out++;
-        } else if (c == '\n') {
-            *out = '\\'; out++;
-            *out = 'n';  out++;
-        } else {
-            *out = c; out++;
-        }
+        const unsigned char c = *str;
+        switch (c)
+            {
+                // quotation mark (0x22)
+                case '"':
+                {
+                    out[pos] = '\\';
+                    out[pos + 1] = '"';
+                    pos += 2;
+                    break;
+                }
+
+                // reverse solidus (0x5c)
+                case '\\':
+                {
+                    out[pos] = '\\';
+                    out[pos+1] = '\\';
+                    pos += 2;
+                    break;
+                }
+
+                // backspace (0x08)
+                case '\b':
+                {
+                    out[pos] = '\\';
+                    out[pos + 1] = 'b';
+                    pos += 2;
+                    break;
+                }
+
+                // formfeed (0x0c)
+                case '\f':
+                {
+                    out[pos] = '\\';
+                    out[pos + 1] = 'f';
+                    pos += 2;
+                    break;
+                }
+
+                // newline (0x0a)
+                case '\n':
+                {
+                    out[pos] = '\\';
+                    out[pos + 1] = 'n';
+                    pos += 2;
+                    break;
+                }
+
+                // carriage return (0x0d)
+                case '\r':
+                {
+                    out[pos] = '\\';
+                    out[pos + 1] = 'r';
+                    pos += 2;
+                    break;
+                }
+
+                // horizontal tab (0x09)
+                case '\t':
+                {
+                    out[pos] = '\\';
+                    out[pos + 1] = 't';
+                    pos += 2;
+                    break;
+                }
+
+                default:
+                {
+                    if (c <= 0x1f)
+                    {
+                        // print character c as \uxxxx
+                        out[pos] = '\\';
+                        sprintf(&out[pos + 1], "u%04x", (int)c);
+                        pos += 6;
+                    }
+                    else
+                    {
+                        // all other characters are added as-is
+                        out[pos++] = c;
+                    }
+                    break;
+                }
+            }
     }
-    *out = '\0';
+    out[pos++] = '\0';
+    assert(pos == out_size);
+    *buf = out;
 }
 
 static void


=====================================
testsuite/tests/simplCore/should_compile/T23952.hs
=====================================
@@ -0,0 +1,31 @@
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE UndecidableInstances #-}
+
+-- The Lint failure in in #23952 is very hard to trigger.
+-- The test case fails with GHC 9.6, but not 9.4, 9.8, or HEAD.
+-- But still, better something than nothing.
+
+module T23952 where
+
+import T23952a
+import Data.Proxy
+import Data.Kind
+
+type Filter :: Type -> Type
+data Filter ty = FilterWithMain Int Bool
+
+new :: forall n . Eq n => () -> Filter n
+{-# INLINABLE new #-}
+new _ = toFilter
+
+class FilterDSL x where
+  toFilter :: Filter x
+
+instance Eq c => FilterDSL c where
+  toFilter = case (case fromRep cid == cid of
+                     True -> FilterWithMain cid False
+                     False -> FilterWithMain cid True
+                  ) of FilterWithMain c x -> FilterWithMain (c+1) (not x)
+            where cid :: Int
+                  cid = 3
+  {-# INLINE toFilter #-}


=====================================
testsuite/tests/simplCore/should_compile/T23952a.hs
=====================================
@@ -0,0 +1,14 @@
+{-# LANGUAGE DerivingVia #-}
+module T23952a where
+
+class AsRep rep a where
+  fromRep :: rep -> a
+
+newtype ViaIntegral a = ViaIntegral a
+  deriving newtype (Eq, Ord, Real, Enum, Num, Integral)
+
+instance forall a n . (Integral a, Integral n, Eq a) => AsRep a (ViaIntegral n) where
+  fromRep r = fromIntegral $ r + 2
+  {-# INLINE fromRep #-}
+
+deriving via (ViaIntegral Int) instance (Integral r) => AsRep r Int


=====================================
testsuite/tests/simplCore/should_compile/all.T
=====================================
@@ -500,3 +500,4 @@ test('T22404', [only_ways(['optasm']), check_errmsg(r'let') ], compile, ['-ddump
 test('T23864', normal, compile, ['-O -dcore-lint -package ghc -Wno-gadt-mono-local-binds'])
 test('T23938', [extra_files(['T23938A.hs'])], multimod_compile, ['T23938', '-O -v0'])
 test('T23922a', normal, compile, ['-O'])
+test('T23952', [extra_files(['T23952a.hs'])], multimod_compile, ['T23952', '-v0 -O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1bd7ea6669efb40916a2b66a8523cc2ca9d6b285...37fde1b37c184b7bb9c1dd56acf45eb174908133

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/1bd7ea6669efb40916a2b66a8523cc2ca9d6b285...37fde1b37c184b7bb9c1dd56acf45eb174908133
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230915/388fa0a4/attachment-0001.html>


More information about the ghc-commits mailing list