[Git][ghc/ghc][wip/keepAlive-optionB] 7 commits: base: Use keepAlive# in alloca, et al.

Ben Gamari gitlab at gitlab.haskell.org
Thu Sep 10 13:40:22 UTC 2020



Ben Gamari pushed to branch wip/keepAlive-optionB at Glasgow Haskell Compiler / GHC


Commits:
36863ab9 by Ben Gamari at 2020-09-10T13:40:16+00:00
base: Use keepAlive# in alloca, et al.

- - - - -
d7b151c5 by GHC GitLab CI at 2020-09-10T13:40:16+00:00
Simplify: Factor out runRW rule

- - - - -
c193ca0f by GHC GitLab CI at 2020-09-10T13:40:16+00:00
Simplify

- - - - -
b1d535d2 by GHC GitLab CI at 2020-09-10T13:40:16+00:00
hadrian: Don't include -fdiagnostics-color in argument hash

Otherwise the input hash will vary with whether colors are requested,
which changed with `isatty`.

Fixes #17983.

- - - - -
bed62e4c by Ben Gamari at 2020-09-10T13:40:16+00:00
base: Use keepAlive# in withForeignPtr

- - - - -
ab8630e0 by GHC GitLab CI at 2020-09-10T13:40:16+00:00
base: Make touchForeignPtr more robust

Previously touchForeignPtr would touch the ForeignPtr's
associated ForeignPtrContents. However, this is a normal constructor and
therefore can be eliminated by the simplifier. To ensure that the
foreign pointer's contents isn't dropped we need to rather `touch#` the
underlying array (which is the same thing that we key the `Weak` on when
adding finalizers).

- - - - -
22837c60 by GHC GitLab CI at 2020-09-10T13:40:16+00:00
base: Make ForeignPtrContents of ForeignPtr strict

As mentioned in #17290, there are strong benefits to the
`ForeignPtrContents` field of `ForeignPtr` being strict. In particular,
when looking at the reproducer for #17746, I noticed that the
`ForeignPtrContents` was being allocated on every call to
`withForeignPtr` just to be `touch#`'d'. This is a pretty large
overhead for something like `withForeignPtr` that should be free.

This required updating the `bytestring` submodule to avoid a bottoming
`ForeignPtrContents` in `Data.ByteString.Internal`

Fixes #17290.

- - - - -


8 changed files:

- compiler/GHC/Core/Opt/Simplify.hs
- compiler/GHC/CoreToStg/Prep.hs
- hadrian/src/Settings/Builders/Ghc.hs
- hadrian/src/Target.hs
- libraries/base/Foreign/ForeignPtr/Imp.hs
- libraries/base/Foreign/Marshal/Alloc.hs
- libraries/base/GHC/ForeignPtr.hs
- libraries/bytestring


Changes:

=====================================
compiler/GHC/Core/Opt/Simplify.hs
=====================================
@@ -68,7 +68,7 @@ import GHC.Utils.Misc
 import GHC.Utils.Error
 import GHC.Unit.Module ( moduleName, pprModuleName )
 import GHC.Core.Multiplicity
-import GHC.Builtin.PrimOps ( PrimOp (SeqOp) )
+import GHC.Builtin.PrimOps ( PrimOp (SeqOp, KeepAliveOp) )
 
 
 {-
@@ -1975,29 +1975,9 @@ rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_c
   = rebuildCall env (addTyArgTo info arg_ty hole_ty) cont
 
 ---------- The runRW# rule. Do this after absorbing all arguments ------
--- See Note [Simplification of runRW#] in GHC.CoreToSTG.Prep.
---
--- runRW# :: forall (r :: RuntimeRep) (o :: TYPE r). (State# RealWorld -> o) -> o
--- K[ runRW# rr ty body ]   -->   runRW rr' ty' (\s. K[ body s ])
-rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
-            (ApplyToVal { sc_arg = arg, sc_env = arg_se
-                        , sc_cont = cont, sc_hole_ty = fun_ty })
-  | fun_id `hasKey` runRWKey
-  , not (contIsStop cont)  -- Don't fiddle around if the continuation is boring
-  , [ TyArg {}, TyArg {} ] <- rev_args
-  = do { s <- newId (fsLit "s") Many realWorldStatePrimTy
-       ; let (m,_,_) = splitFunTy fun_ty
-             env'  = (arg_se `setInScopeFromE` env) `addNewInScopeIds` [s]
-             ty'   = contResultType cont
-             cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s
-                                , sc_env = env', sc_cont = cont
-                                , sc_hole_ty = mkVisFunTy m realWorldStatePrimTy ty' }
-                     -- cont' applies to s, then K
-       ; body' <- simplExprC env' arg cont'
-       ; let arg'  = Lam s body'
-             rr'   = getRuntimeRep ty'
-             call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
-       ; return (emptyFloats env, call') }
+rebuildCall env arg_info cont
+  | Just do_it <- rebuildContOpCall env arg_info cont
+  = do_it
 
 rebuildCall env fun_info
             (ApplyToVal { sc_arg = arg, sc_env = arg_se
@@ -2034,6 +2014,87 @@ rebuildCall env fun_info
 rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont
   = rebuild env (argInfoExpr fun rev_args) cont
 
+-- | Simplifications of runRW# and keepAlive#
+rebuildContOpCall :: SimplEnv -> ArgInfo -> SimplCont -> Maybe (SimplM (SimplFloats, OutExpr))
+rebuildContOpCall _env _arg_info cont
+  | not (contIsStop cont)  -- Don't fiddle around if the continuation is boring
+  = Nothing
+
+-- See Note [Simplification of runRW#] in GHC.CoreToSTG.Prep.
+--
+-- N.B. runRW# :: forall (r :: RuntimeRep) (o :: TYPE r).
+--                (State# RealWorld -> o) -> o
+--
+--   K[ runRW# rr ty body ]
+--       ~>
+--   runRW rr' ty' (\s. K[ body s ])
+rebuildContOpCall
+    env
+    (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
+    (ApplyToVal { sc_arg = arg, sc_env = arg_se
+                , sc_cont = cont, sc_hole_ty = fun_ty })
+  | fun_id `hasKey` runRWKey
+  , [ TyArg {}, TyArg {} ] <- rev_args
+  = Just $
+    do { s <- newId (fsLit "s") Many realWorldStatePrimTy
+       ; let (m,_,_) = splitFunTy fun_ty
+             env'  = (arg_se `setInScopeFromE` env) `addNewInScopeIds` [s]
+             ty'   = contResultType cont
+             k'_ty = mkVisFunTy m realWorldStatePrimTy ty'
+             cont' = ApplyToVal { sc_dup = Simplified, sc_arg = Var s
+                                , sc_env = env', sc_cont = cont
+                                , sc_hole_ty = k'_ty }
+                     -- cont' applies to s, then K
+       ; body' <- simplExprC env' arg cont'
+       ; let arg'  = Lam s body'
+             rr'   = getRuntimeRep ty'
+             call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
+       ; return (emptyFloats env, call') }
+
+-- See Note [Simplification of keepAlive#] in GHC.CoreToStg.Prep.
+--
+--   K[keepAlive# @a_rep @a @r_rep @r x s k]
+--       ~>
+--   keepAlive# @a_rep @a @r_rep @r x s K[k]
+rebuildContOpCall
+    env
+    (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
+    (ApplyToVal { sc_arg = k, sc_env = k_se
+                , sc_cont = cont, sc_hole_ty = fun_ty })
+  | Just KeepAliveOp <- isPrimOpId_maybe fun_id
+  , [ ValArg {as_arg=s0}
+    , ValArg {as_arg=x}
+    , TyArg {} -- res_ty
+    , TyArg {} -- res_rep
+    , TyArg {as_arg_ty=arg_ty}
+    , TyArg {as_arg_ty=arg_rep}
+    ] <- rev_args
+  = Just $
+    do { s <- newId (fsLit "s") One realWorldStatePrimTy
+       ; let (m,_,_) = splitFunTy fun_ty
+             k_env   = (k_se `setInScopeFromE` env) `addNewInScopeIds` [s]
+             ty'     = contResultType cont
+             k'_ty   = mkVisFunTy m realWorldStatePrimTy ty'
+             k_cont  = ApplyToVal { sc_dup = Simplified, sc_arg = Var s
+                                  , sc_env = k_env, sc_cont = cont
+                                  , sc_hole_ty = k'_ty }
+       ; k' <- simplExprC k_env k k_cont
+       ; let env' = zapSubstEnv env
+       ; s0' <- simplExpr env' s0
+       ; x' <- simplExpr env' x
+       ; arg_rep' <- simplType env' arg_rep
+       ; arg_ty' <- simplType env' arg_ty
+       ; let call' = mkApps (Var fun_id)
+               [ mkTyArg arg_rep', mkTyArg arg_ty'
+               , mkTyArg (getRuntimeRep ty'), mkTyArg ty'
+               , x'
+               , s0'
+               , Lam s k'
+               ]
+       ; return (emptyFloats env, call') }
+
+rebuildContOpCall _ _ _ = Nothing
+
 {- Note [Trying rewrite rules]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider an application (f e1 e2 e3) where the e1,e2,e3 are not yet


=====================================
compiler/GHC/CoreToStg/Prep.hs
=====================================
@@ -1096,10 +1096,26 @@ Breaking our desired invariant. Ultimately we decided to simply accept that
 the continuation may not be a manifest lambda.
 
 
+Note [Simplification of keepAlive#]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The keepAlive# primop benefits from a similar optimisation to that described in
+Note [Simplification of runRW#] above. Specifically, we transform:
+
+    K[keepAlive# @a_rep @a @r_rep @r x s k]
+            ~>
+    keepAlive# @a_rep @a @r_rep @r x s K[k]
+
+The reasons are similar to those described in Note [Simplification of runRW#].
+
+-}
+
+
 -- ---------------------------------------------------------------------------
 --      CpeArg: produces a result satisfying CpeArg
 -- ---------------------------------------------------------------------------
 
+{-
 Note [ANF-ising literal string arguments]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 


=====================================
hadrian/src/Settings/Builders/Ghc.hs
=====================================
@@ -35,6 +35,9 @@ compileAndLinkHs = (builder (Ghc CompileHs) ||^ builder (Ghc LinkHs)) ? do
         hasDynamic = elem dynamic ways
     mconcat [ arg "-Wall"
             , not useColor ? builder (Ghc CompileHs) ?
+              -- N.B. Target.trackArgument ignores this argument from the
+              -- input hash to avoid superfluous recompilation, avoiding
+              -- #18672.
               arg "-fdiagnostics-color=never"
             , (hasVanilla && hasDynamic) ? builder (Ghc CompileHs) ?
               platformSupportsSharedLibs ? way vanilla ?


=====================================
hadrian/src/Target.hs
=====================================
@@ -21,11 +21,12 @@ type Target = H.Target Context Builder
 trackArgument :: Target -> String -> Bool
 trackArgument target arg = case builder target of
     Make _    -> not $ threadArg arg
-    Ghc _ _   -> not $ verbosityArg arg
+    Ghc _ _   -> not $ verbosityArg arg || diagnosticsColorArg arg
     Cabal _ _ -> not $ verbosityArg arg || cabal_configure_ignore arg
     _         -> True
   where
     threadArg s = dropWhileEnd isDigit s `elem` ["-j", "MAKEFLAGS=-j", "THREADS="]
     verbosityArg s = dropWhileEnd isDigit s == "-v"
+    diagnosticsColorArg s = "-fdiagnostics-color=" `isPrefixOf` s -- N.B. #18672
     cabal_configure_ignore s =
       s `elem` [ "--configure-option=--quiet", "--configure-option=--disable-option-checking" ]


=====================================
libraries/base/Foreign/ForeignPtr/Imp.hs
=====================================
@@ -66,31 +66,6 @@ newForeignPtr finalizer p
        addForeignPtrFinalizer finalizer fObj
        return fObj
 
-withForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
--- ^This is a way to look at the pointer living inside a
--- foreign object.  This function takes a function which is
--- applied to that pointer. The resulting 'IO' action is then
--- executed. The foreign object is kept alive at least during
--- the whole action, even if it is not used directly
--- inside. Note that it is not safe to return the pointer from
--- the action and use it after the action completes. All uses
--- of the pointer should be inside the
--- 'withForeignPtr' bracket.  The reason for
--- this unsafeness is the same as for
--- 'unsafeForeignPtrToPtr' below: the finalizer
--- may run earlier than expected, because the compiler can only
--- track usage of the 'ForeignPtr' object, not
--- a 'Ptr' object made from it.
---
--- This function is normally used for marshalling data to
--- or from the object pointed to by the
--- 'ForeignPtr', using the operations from the
--- 'Storable' class.
-withForeignPtr fo io
-  = do r <- io (unsafeForeignPtrToPtr fo)
-       touchForeignPtr fo
-       return r
-
 -- | This variant of 'newForeignPtr' adds a finalizer that expects an
 -- environment in addition to the finalized pointer.  The environment
 -- that will be passed to the finalizer is fixed by the second argument to


=====================================
libraries/base/Foreign/Marshal/Alloc.hs
=====================================
@@ -116,19 +116,6 @@ alloca :: forall a b . Storable a => (Ptr a -> IO b) -> IO b
 alloca  =
   allocaBytesAligned (sizeOf (undefined :: a)) (alignment (undefined :: a))
 
--- Note [NOINLINE for touch#]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Both allocaBytes and allocaBytesAligned use the touch#, which is notoriously
--- fragile in the presence of simplification (see #14346). In particular, the
--- simplifier may drop the continuation containing the touch# if it can prove
--- that the action passed to allocaBytes will not return. The hack introduced to
--- fix this for 8.2.2 is to mark allocaBytes as NOINLINE, ensuring that the
--- simplifier can't see the divergence.
---
--- These can be removed once #14375 is fixed, which suggests that we instead do
--- away with touch# in favor of a primitive that will capture the scoping left
--- implicit in the case of touch#.
-
 -- |@'allocaBytes' n f@ executes the computation @f@, passing as argument
 -- a pointer to a temporarily allocated block of memory of @n@ bytes.
 -- The block of memory is sufficiently aligned for any of the basic
@@ -142,13 +129,9 @@ allocaBytes (I# size) action = IO $ \ s0 ->
      case newPinnedByteArray# size s0      of { (# s1, mbarr# #) ->
      case unsafeFreezeByteArray# mbarr# s1 of { (# s2, barr#  #) ->
      let addr = Ptr (byteArrayContents# barr#) in
-     case action addr     of { IO action' ->
-     case action' s2      of { (# s3, r #) ->
-     case touch# barr# s3 of { s4 ->
-     (# s4, r #)
-  }}}}}
--- See Note [NOINLINE for touch#]
-{-# NOINLINE allocaBytes #-}
+     case action addr                      of { IO action' ->
+     keepAlive# barr# s2 action'
+  }}}
 
 allocaBytesAligned :: Int -> Int -> (Ptr a -> IO b) -> IO b
 allocaBytesAligned (I# size) (I# align) action = IO $ \ s0 ->
@@ -156,12 +139,8 @@ allocaBytesAligned (I# size) (I# align) action = IO $ \ s0 ->
      case unsafeFreezeByteArray# mbarr# s1 of { (# s2, barr#  #) ->
      let addr = Ptr (byteArrayContents# barr#) in
      case action addr     of { IO action' ->
-     case action' s2      of { (# s3, r #) ->
-     case touch# barr# s3 of { s4 ->
-     (# s4, r #)
-  }}}}}
--- See Note [NOINLINE for touch#]
-{-# NOINLINE allocaBytesAligned #-}
+     keepAlive# barr# s2 action'
+  }}}
 
 -- |Resize a memory area that was allocated with 'malloc' or 'mallocBytes'
 -- to the size needed to store values of type @b at .  The returned pointer


=====================================
libraries/base/GHC/ForeignPtr.hs
=====================================
@@ -3,6 +3,8 @@
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE UnboxedTuples #-}
 {-# LANGUAGE Unsafe #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE KindSignatures #-}
 
 {-# OPTIONS_HADDOCK not-home #-}
 
@@ -46,6 +48,7 @@ module GHC.ForeignPtr
         castForeignPtr,
         plusForeignPtr,
         -- * Finalization
+        withForeignPtr,
         touchForeignPtr,
         finalizeForeignPtr
         -- * Commentary
@@ -55,6 +58,7 @@ module GHC.ForeignPtr
 import Foreign.Storable
 import Data.Foldable    ( sequence_ )
 
+import GHC.Types
 import GHC.Show
 import GHC.Base
 import GHC.IORef
@@ -79,7 +83,7 @@ import Unsafe.Coerce    ( unsafeCoerce, unsafeCoerceUnlifted )
 -- type argument of 'ForeignPtr' should normally be an instance of
 -- class 'Storable'.
 --
-data ForeignPtr a = ForeignPtr Addr# ForeignPtrContents
+data ForeignPtr a = ForeignPtr Addr# !ForeignPtrContents
         -- The Addr# in the ForeignPtr object is intentionally stored
         -- separately from the finalizer. The primary aim of the
         -- representation is to make withForeignPtr efficient; in fact,
@@ -124,7 +128,7 @@ data ForeignPtrContents
     -- ^ The pointer refers to unmanaged memory that should not be freed when
     -- the 'ForeignPtr' becomes unreachable. Functions that add finalizers
     -- to a 'ForeignPtr' throw exceptions when the 'ForeignPtr' is backed by
-    -- 'PlainPtr'Most commonly, this is used with @Addr#@ literals.
+    -- 'PlainPtr'. Most commonly, this is used with @Addr#@ literals.
     -- See Note [Why FinalPtr].
     --
     -- @since 4.15
@@ -162,6 +166,7 @@ data ForeignPtrContents
     -- The invariants that apply to 'MallocPtr' apply to 'PlainPtr' as well.
 
 -- Note [Why FinalPtr]
+-- ~~~~~~~~~~~~~~~~~~~
 --
 -- FinalPtr exists as an optimization for foreign pointers created
 -- from Addr# literals. Most commonly, this happens in the bytestring
@@ -428,7 +433,7 @@ addForeignPtrConcFinalizer_ f@(MallocPtr fo r) finalizer = do
      else return ()
   where
     finalizer' :: State# RealWorld -> (# State# RealWorld, () #)
-    finalizer' = unIO (foreignPtrFinalizer r >> touch f)
+    finalizer' = unIO (foreignPtrFinalizer r >> touchForeignPtrContents f)
 
 addForeignPtrConcFinalizer_ _ _ =
   errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to plain pointer or a final pointer"
@@ -503,6 +508,36 @@ newForeignPtr_ (Ptr obj) =  do
   r <- newIORef NoFinalizers
   return (ForeignPtr obj (PlainForeignPtr r))
 
+withForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
+-- ^This is a way to look at the pointer living inside a
+-- foreign object.  This function takes a function which is
+-- applied to that pointer. The resulting 'IO' action is then
+-- executed. The foreign object is kept alive at least during
+-- the whole action, even if it is not used directly
+-- inside. Note that it is not safe to return the pointer from
+-- the action and use it after the action completes. All uses
+-- of the pointer should be inside the
+-- 'withForeignPtr' bracket.  The reason for
+-- this unsafeness is the same as for
+-- 'unsafeForeignPtrToPtr' below: the finalizer
+-- may run earlier than expected, because the compiler can only
+-- track usage of the 'ForeignPtr' object, not
+-- a 'Ptr' object made from it.
+--
+-- This function is normally used for marshalling data to
+-- or from the object pointed to by the
+-- 'ForeignPtr', using the operations from the
+-- 'Storable' class.
+withForeignPtr fo@(ForeignPtr _ r) f = IO $ \s ->
+  case f (unsafeForeignPtrToPtr fo) of
+    IO action# ->
+      case r of
+        PlainForeignPtr ref -> keepAlive# ref s action#
+        FinalPtr -> action# s
+        MallocPtr mba _ -> keepAlive# mba s action#
+        PlainPtr mba -> keepAlive# mba s action#
+
+
 touchForeignPtr :: ForeignPtr a -> IO ()
 -- ^This function ensures that the foreign object in
 -- question is alive at the given place in the sequence of IO
@@ -528,10 +563,19 @@ touchForeignPtr :: ForeignPtr a -> IO ()
 -- result in artificial deadlock.  Another alternative is to use
 -- explicit reference counting.
 --
-touchForeignPtr (ForeignPtr _ r) = touch r
+touchForeignPtr (ForeignPtr _ r) = touchForeignPtrContents r
+
+touchForeignPtrContents :: ForeignPtrContents -> IO ()
+touchForeignPtrContents (PlainForeignPtr ref) = touchLifted  ref
+touchForeignPtrContents FinalPtr = return ()
+touchForeignPtrContents (MallocPtr mba _) = touchUnlifted mba
+touchForeignPtrContents (PlainPtr mba) = touchUnlifted mba
+
+touchLifted :: a -> IO ()
+touchLifted r = IO $ \s -> case touch# r s of s' -> (# s', () #)
 
-touch :: ForeignPtrContents -> IO ()
-touch r = IO $ \s -> case touch# r s of s' -> (# s', () #)
+touchUnlifted :: forall (a :: TYPE 'UnliftedRep). a -> IO ()
+touchUnlifted r = IO $ \s -> case touch# r s of s' -> (# s', () #)
 
 unsafeForeignPtrToPtr :: ForeignPtr a -> Ptr a
 -- ^This function extracts the pointer component of a foreign


=====================================
libraries/bytestring
=====================================
@@ -1 +1 @@
-Subproject commit e6cb01e2ec0bfdd19298418c85f220925a9fa307
+Subproject commit ee9f5fcf533b283f321f2b121684df77f1bcc825



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/86888d093a16f8fe9ef7cff0d8b24b12619e3180...22837c607d7388c8117bdffbccd19bb77f0861d2

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/86888d093a16f8fe9ef7cff0d8b24b12619e3180...22837c607d7388c8117bdffbccd19bb77f0861d2
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200910/1805e9ac/attachment-0001.html>


More information about the ghc-commits mailing list