[Git][ghc/ghc][wip/andreask/weakly_pinned] 3 commits: simplifier: Fix space leak during demand analysis

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Fri Aug 30 15:33:32 UTC 2024



Andreas Klebinger pushed to branch wip/andreask/weakly_pinned at Glasgow Haskell Compiler / GHC


Commits:
3a5bebf8 by Matthew Pickering at 2024-08-28T14:16:42-04:00
simplifier: Fix space leak during demand analysis

The lazy structure (a list) in a strict field in `DmdType` is not fully
forced which leads to a very large thunk build-up.

It seems there is likely still more work to be done here as it seems we
may be trading space usage for work done. For now, this is the right
choice as rather than using all the memory on my computer, compilation
just takes a little bit longer.

See #25196

- - - - -
c2525e9e by Ryan Scott at 2024-08-28T14:17:17-04:00
Add missing parenthesizeHsType in cvtp's InvisP case

We need to ensure that when we convert an `InvisP` (invisible type pattern) to
a `Pat`, we parenthesize it (at precedence `appPrec`) so that patterns such as
`@(a :: k)` will parse correctly when roundtripped back through the parser.

Fixes #25209.

- - - - -
1453f6fb by Andreas Klebinger at 2024-08-30T17:14:58+02:00
Add functions to check for weakly pinned arrays.

This commit adds `isByteArrayWeaklyPinned#` and `isMutableByteArrayWeaklyPinned#` primops.
These check if a bytearray is *weakly* pinned. Which means it can still be explicitly moved
by the user via compaction but won't be moved by the RTS.

This moves us one more stop closer to nailing down #22255.

- - - - -


15 changed files:

- compiler/GHC/Builtin/primops.txt.pp
- compiler/GHC/StgToCmm/Prim.hs
- compiler/GHC/StgToJS/Prim.hs
- compiler/GHC/ThToHs.hs
- compiler/GHC/Types/Demand.hs
- docs/users_guide/exts/ffi.rst
- libraries/base/src/GHC/Base.hs
- libraries/base/src/GHC/Exts.hs
- rts/PrimOps.cmm
- rts/RtsSymbols.c
- rts/include/stg/MiscClosures.h
- testsuite/tests/rts/T13894.hs
- + testsuite/tests/th/T25209.hs
- + testsuite/tests/th/T25209.stderr
- testsuite/tests/th/all.T


Changes:

=====================================
compiler/GHC/Builtin/primops.txt.pp
=====================================
@@ -1925,7 +1925,25 @@ primop  MutableByteArrayIsPinnedOp "isMutableByteArrayPinned#" GenPrimOp
 
 primop  ByteArrayIsPinnedOp "isByteArrayPinned#" GenPrimOp
    ByteArray# -> Int#
-   {Determine whether a 'ByteArray#' is guaranteed not to move during GC.}
+   {Determine whether a 'ByteArray#' is guaranteed not to move.}
+   with out_of_line = True
+
+primop  ByteArrayIsWeaklyPinnedOp "isByteArrayWeaklyPinned#" GenPrimOp
+   ByteArray# -> Int#
+   {Similar to 'isByteArrayPinned#'. Weakly pinned byte arrays are allowed
+    to be copied into compact regions by the user, potentially invalidating
+    the results of earlier calls to 'byteArrayContents#'.
+
+    See the section `Pinned Byte Arrays` in the user guide for more information.
+
+    This function also returns true for regular pinned bytearrays.
+   }
+   with out_of_line = True
+
+primop  MutableByteArrayIsWeaklyPinnedOp "isMutableByteArrayWeaklyPinned#" GenPrimOp
+   MutableByteArray# s -> Int#
+   { 'isByteArrayWeaklyPinned#' but for mutable arrays.
+   }
    with out_of_line = True
 
 primop  ByteArrayContents_Char "byteArrayContents#" GenPrimOp


=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1668,10 +1668,12 @@ emitPrimOp cfg primop =
   NewPinnedByteArrayOp_Char -> alwaysExternal
   NewAlignedPinnedByteArrayOp_Char -> alwaysExternal
   MutableByteArrayIsPinnedOp -> alwaysExternal
+  MutableByteArrayIsWeaklyPinnedOp -> alwaysExternal
   DoubleDecode_2IntOp -> alwaysExternal
   DoubleDecode_Int64Op -> alwaysExternal
   FloatDecode_IntOp -> alwaysExternal
   ByteArrayIsPinnedOp -> alwaysExternal
+  ByteArrayIsWeaklyPinnedOp -> alwaysExternal
   ShrinkMutableByteArrayOp_Char -> alwaysExternal
   ResizeMutableByteArrayOp_Char -> alwaysExternal
   ShrinkSmallMutableArrayOp_Char -> alwaysExternal


=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -670,6 +670,8 @@ genPrim prof bound ty op = case op of
   NewAlignedPinnedByteArrayOp_Char  -> \[r]   [l,_align] -> pure $ PrimInline (newByteArray r l)
   MutableByteArrayIsPinnedOp        -> \[r]   [_]        -> pure $ PrimInline $ r |= one_
   ByteArrayIsPinnedOp               -> \[r]   [_]        -> pure $ PrimInline $ r |= one_
+  ByteArrayIsWeaklyPinnedOp         -> \[r]   [_]        -> pure $ PrimInline $ r |= one_
+  MutableByteArrayIsWeaklyPinnedOp  -> \[r]   [_]        -> pure $ PrimInline $ r |= one_
   ByteArrayContents_Char            -> \[a,o] [b]        -> pure $ PrimInline $ mconcat [a |= b, o |= zero_]
   MutableByteArrayContents_Char     -> \[a,o] [b]        -> pure $ PrimInline $ mconcat [a |= b, o |= zero_]
   ShrinkMutableByteArrayOp_Char     -> \[]    [a,n]      -> pure $ PrimInline $ appS hdShrinkMutableByteArrayStr [a,n]


=====================================
compiler/GHC/ThToHs.hs
=====================================
@@ -1519,7 +1519,7 @@ cvtp (ViewP e p)       = do { e' <- cvtl e; p' <- cvtPat p
                             ; wrapParLA gParPat $ ViewPat noAnn e' p'}
 cvtp (TypeP t)         = do { t' <- cvtType t
                             ; return $ EmbTyPat noAnn (mkHsTyPat t') }
-cvtp (InvisP t)        = do { t' <- cvtType t
+cvtp (InvisP t)        = do { t' <- parenthesizeHsType appPrec <$> cvtType t
                             ; pure (InvisPat noAnn (mkHsTyPat t'))}
 cvtp (OrP ps)          = do { ps' <- cvtPats ps
                             ; pure (OrPat noExtField ps')}


=====================================
compiler/GHC/Types/Demand.hs
=====================================
@@ -1913,7 +1913,7 @@ multDmdType :: Card -> DmdType -> DmdType
 multDmdType n (DmdType fv args)
   = -- pprTrace "multDmdType" (ppr n $$ ppr fv $$ ppr (multDmdEnv n fv)) $
     DmdType (multDmdEnv n fv)
-            (map (multDmd n) args)
+            (strictMap (multDmd n) args)
 
 peelFV :: DmdType -> Var -> (DmdType, Demand)
 peelFV (DmdType fv ds) id = -- pprTrace "rfv" (ppr id <+> ppr dmd $$ ppr fv)


=====================================
docs/users_guide/exts/ffi.rst
=====================================
@@ -1114,21 +1114,67 @@ Pinned Byte Arrays
 
 A pinned byte array is one that the garbage collector is not allowed
 to move. Consequently, it has a stable address that can be safely
-requested with ``byteArrayContents#``. Not that being pinned doesn't
-prevent the byteArray from being gc'ed in the same fashion a regular
-byte array would be.
+requested with ``byteArrayContents#``. As long as the array remains live
+the address returned by ``byteArrayContents#`` will remain valid. Note that
+being pinned doesn't prevent the byteArray from being gc'ed in the same fashion
+a regular byte array would be if there are no more references to the ``ByteArray#``.
 There are a handful of primitive functions in :base-ref:`GHC.Exts.`
 used to enforce or check for pinnedness: ``isByteArrayPinned#``,
-``isMutableByteArrayPinned#``, and ``newPinnedByteArray#``. A
-byte array can be pinned as a result of three possible causes:
+``isMutableByteArrayPinned#``, ``isByteArrayWeaklyPinned#``,
+``isMutableByteArrayWeaklyPinned#``, and ``newPinnedByteArray#``. A
+byte array can be pinned or weakly pinned as a result of three possible causes:
 
-1. It was allocated by ``newPinnedByteArray#``.
-2. It is large. Currently, GHC defines large object to be one
+1. It was allocated by ``newPinnedByteArray#``. This results in a regular pinned byte array.
+2. It is large, this results in a weakly pinned byte array. Currently, GHC defines large object to be one
    that is at least as large as 80% of a 4KB block (i.e. at
    least 3277 bytes).
-3. It has been copied into a compact region. The documentation
+3. It has been copied into a compact region, resulting in a weakly pinned array. The documentation
    for ``ghc-compact`` and ``compact`` describes this process.
 
+The difference between a pinned array and a weakly pinned array is simply that
+trying to compact a pinned array will result in an exception. Trying to compact
+a weakly pinned array will succeed. However the result of earlier
+calls to ``byteArrayContents#`` is not updated during compaction, which means
+these results will still point to the address where the array was located originally,
+and not to the new address inside the compact region.
+
+This is particularly dangerous when an address to a byte arrays content is stored
+inside a datastructure along with a reference to the byte array.
+If the data structure is compacted later on the pointer won't be updated but the
+reference to the byte array will point to a copy inside the compact region.
+A common data type susceptible to this is `ForeignPtr` when used to represent a ByteArray#.
+
+Here is an example to illustrate this:
+
+.. code-block:: haskell
+
+    workWithArrayContents :: (ByteArray, Ptr Word8) -> (Ptr Word8 -> IO ()) -> IO ()
+    workWithArrayContents (arr@(ByteArray uarr),ptr) worker =
+        case () of
+          _
+            -- Conservative but safe
+            | isByteArrayPinned arr -> keepAliveUnlifted uarr (worker ptr)
+            -- Potentially dangerous, the program needs to ensures the Ptr points into the array.
+            | isByteArrayWeaklyPinned arr -> keepAliveUnlifted uarr (worker ptr)
+            | otherwise -> ... -- Otherwise we can't directly use it for safe FFI calls directly at all.
+
+    main :: IO ()
+    main = do
+        -- We create a large array, which causes it to be implicitly pinned
+        arr <- newByteArray 5000
+        arr@(ByteArray uarr) <- freezeByteArray arr 0 5000 -- Make it immutable
+        let ptr = byteArrayContents arr
+
+        -- Compacting a data structure that contains both an array and a ptr to
+        -- the arrays content's is dangerous and usually the wrong thing to do.
+        let foo = (arr, ptr)
+        foo_compacted <- compact foo
+
+        -- This is fine
+        workWithArrayContents foo do_work
+        -- This is unsound
+        workWithArrayContents (getCompact foo_compacted) do_work
+
 .. [1] Prior to GHC 8.10, when passing an ``ArrayArray#`` argument
   to a foreign function, the foreign function would see a pointer
   to the ``StgMutArrPtrs`` rather than just the payload.


=====================================
libraries/base/src/GHC/Base.hs
=====================================
@@ -139,7 +139,8 @@ module GHC.Base
     ) where
 
 import GHC.Internal.Base
-import GHC.Prim hiding (dataToTagLarge#, dataToTagSmall#, whereFrom#)
+import GHC.Prim hiding (dataToTagLarge#, dataToTagSmall#, whereFrom#,
+                        isByteArrayWeaklyPinned#, isMutableByteArrayWeaklyPinned#)
    -- Hide dataToTagLarge# because it is expected to break for
    -- GHC-internal reasons in the near future, and shouldn't
    -- be exposed from base (not even GHC.Exts)


=====================================
libraries/base/src/GHC/Exts.hs
=====================================
@@ -112,7 +112,9 @@ module GHC.Exts
 
 import GHC.Internal.Exts
 import GHC.Internal.ArrayArray
-import GHC.Prim hiding ( coerce, dataToTagSmall#, dataToTagLarge#, whereFrom# )
+import GHC.Prim hiding (
+  coerce, dataToTagSmall#, dataToTagLarge#, whereFrom#,
+  isByteArrayWeaklyPinned#, isMutableByteArrayWeaklyPinned# )
   -- Hide dataToTag# ops because they are expected to break for
   -- GHC-internal reasons in the near future, and shouldn't
   -- be exposed from base (not even GHC.Exts)


=====================================
rts/PrimOps.cmm
=====================================
@@ -215,12 +215,29 @@ stg_isByteArrayPinnedzh ( gcptr ba )
     return (flags & BF_PINNED != 0);
 }
 
+stg_isByteArrayWeaklyPinnedzh ( gcptr ba )
+// ByteArray# s -> Int#
+{
+    W_ bd, flags;
+    bd = Bdescr(ba);
+    // See #22255 and the primop docs.
+    flags = TO_W_(bdescr_flags(bd));
+
+    return (flags & (BF_PINNED | BF_COMPACT | BF_LARGE) != 0);
+}
+
 stg_isMutableByteArrayPinnedzh ( gcptr mba )
 // MutableByteArray# s -> Int#
 {
     jump stg_isByteArrayPinnedzh(mba);
 }
 
+stg_isMutableByteArrayWeaklyPinnedzh ( gcptr mba )
+// MutableByteArray# s -> Int#
+{
+    jump stg_isByteArrayWeaklyPinnedzh(mba);
+}
+
 /* Note [LDV profiling and resizing arrays]
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * As far as the LDV profiler is concerned arrays are "inherently used" which


=====================================
rts/RtsSymbols.c
=====================================
@@ -656,6 +656,8 @@ extern char **environ;
       SymI_HasDataProto(stg_newAlignedPinnedByteArrayzh)                    \
       SymI_HasDataProto(stg_isByteArrayPinnedzh)                            \
       SymI_HasDataProto(stg_isMutableByteArrayPinnedzh)                     \
+      SymI_HasDataProto(stg_isByteArrayWeaklyPinnedzh)                      \
+      SymI_HasDataProto(stg_isMutableByteArrayWeaklyPinnedzh)               \
       SymI_HasDataProto(stg_shrinkMutableByteArrayzh)                       \
       SymI_HasDataProto(stg_resizzeMutableByteArrayzh)                      \
       SymI_HasDataProto(stg_shrinkSmallMutableArrayzh)                       \


=====================================
rts/include/stg/MiscClosures.h
=====================================
@@ -454,6 +454,8 @@ RTS_FUN_DECL(stg_newPinnedByteArrayzh);
 RTS_FUN_DECL(stg_newAlignedPinnedByteArrayzh);
 RTS_FUN_DECL(stg_isByteArrayPinnedzh);
 RTS_FUN_DECL(stg_isMutableByteArrayPinnedzh);
+RTS_FUN_DECL(stg_isByteArrayWeaklyPinnedzh);
+RTS_FUN_DECL(stg_isMutableByteArrayWeaklyPinnedzh);
 RTS_FUN_DECL(stg_shrinkMutableByteArrayzh);
 RTS_FUN_DECL(stg_resizzeMutableByteArrayzh);
 RTS_FUN_DECL(stg_shrinkSmallMutableArrayzh);


=====================================
testsuite/tests/rts/T13894.hs
=====================================
@@ -6,6 +6,7 @@
 
 import Control.Monad
 import GHC.Exts
+import GHC.Internal.Exts (isMutableByteArrayWeaklyPinned#)
 import GHC.IO
 
 main :: IO ()
@@ -16,3 +17,10 @@ main = do
             case isMutableByteArrayPinned# arr# of
               n# -> (# s1, isTrue# n# #)
     when pinned $ putStrLn "BAD"
+
+    weakly_pinned <- IO $ \s0 ->
+      case newByteArray# 1000000# s0 of
+        (# s1, arr# #) ->
+            case isMutableByteArrayWeaklyPinned# arr# of
+              n# -> (# s1, isTrue# n# #)
+    when (not weakly_pinned) $ putStrLn "BAD"


=====================================
testsuite/tests/th/T25209.hs
=====================================
@@ -0,0 +1,9 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TypeAbstractions #-}
+module T25209 where
+
+import Data.Proxy
+
+$([d| f :: Proxy a -> Proxy a
+      f @(a :: k) p = p
+    |])


=====================================
testsuite/tests/th/T25209.stderr
=====================================
@@ -0,0 +1,6 @@
+T25209.hs:(7,2)-(9,7): Splicing declarations
+    [d| f :: Proxy a -> Proxy a
+        f @(a :: k) p = p |]
+  ======>
+    f :: Proxy a -> Proxy a
+    f @(a :: k) p = p


=====================================
testsuite/tests/th/all.T
=====================================
@@ -622,4 +622,5 @@ test('T24572a', normal, compile, [''])
 test('T24572b', normal, compile_fail, [''])
 test('T24572c', normal, compile_fail, [''])
 test('T24572d', normal, compile, [''])
+test('T25209', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques'])
 test('TH_MultilineStrings', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0dd1325c117296b258aebb18355bd1f38a12faf1...1453f6fb6d7c35eb4e658fb7eed089df738bd5e7

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/0dd1325c117296b258aebb18355bd1f38a12faf1...1453f6fb6d7c35eb4e658fb7eed089df738bd5e7
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240830/0e9cb539/attachment-0001.html>


More information about the ghc-commits mailing list