[Git][ghc/ghc][wip/haddock-mem-fixes] 10 commits: base: Add test for #13660

Finley McIlwaine (@FinleyMcIlwaine) gitlab at gitlab.haskell.org
Thu May 18 20:02:23 UTC 2023



Finley McIlwaine pushed to branch wip/haddock-mem-fixes at Glasgow Haskell Compiler / GHC


Commits:
87626ef0 by Ben Gamari at 2023-05-18T15:18:53-04:00
base: Add test for #13660

- - - - -
9eef53b1 by Ben Gamari at 2023-05-18T15:18:53-04:00
base: Move implementation of GHC.Foreign to GHC.Internal

- - - - -
174ea2fa by Ben Gamari at 2023-05-18T15:18:53-04:00
base: Introduce {new,with}CStringLen0

These are useful helpers for implementing the internal-NUL code unit
check needed to fix #13660.

- - - - -
a46ced16 by Ben Gamari at 2023-05-18T15:18:53-04:00
base: Clean up documentation

- - - - -
b98d99cc by Ben Gamari at 2023-05-18T15:18:53-04:00
base: Ensure that FilePaths don't contain NULs

POSIX filepaths may not contain the NUL octet but previously we did not
reject such paths. This could be exploited by untrusted input to cause
discrepancies between various `FilePath` queries and the opened
filename. For instance, `readFile "hello.so\x00.txt"` would open the
file `"hello.so"` yet `takeFileExtension` would return `".txt"`.

The same argument applies to Windows FilePaths

Fixes #13660.

- - - - -
7ae45459 by Simon Peyton Jones at 2023-05-18T15:19:29-04:00
Allow the demand analyser to unpack tuple and equality dictionaries

Addresses #23398. The demand analyser usually does not unpack class
dictionaries: see Note [Do not unbox class dictionaries] in
GHC.Core.Opt.DmdAnal.

This patch makes an exception for tuple dictionaries and equality
dictionaries, for reasons explained in wrinkles (DNB1) and (DNB2) of
the above Note.

Compile times fall by 0.1% for some reason (max 0.7% on T18698b).

- - - - -
b53a9086 by Greg Steuck at 2023-05-18T15:20:08-04:00
Use a simpler and more portable construct in ld.ldd check

printf '%q\n' is a bash extension which led to incorrectly
failing an ld.lld test on OpenBSD which uses pdksh as /bin/sh

- - - - -
dd5710af by Torsten Schmits at 2023-05-18T15:20:50-04:00
Update the warning about interpreter optimizations

to reflect that they're not incompatible anymore, but guarded by a flag

- - - - -
4f6dd999 by Matthew Pickering at 2023-05-18T15:21:26-04:00
Remove stray dump flags in GHC.Rename.Names

- - - - -
dbf8b2b1 by Finley McIlwaine at 2023-05-18T20:02:12+00:00
Memory usage fixes for Haddock

- Do not include `mi_globals` in the `NoBackend` backend. It was only included
  for Haddock, but Haddock does not actually need it. This causes a 200MB
  reduction in max residency when generating haddocks on the Agda codebase
  (roughly 1GB to 800MB).
- Strictly evaluate some SrcSpans in mkDoc{Next,Prev} to avoid thunks
- Update Haddock submodule

- - - - -


25 changed files:

- compiler/GHC/Core/Opt/DmdAnal.hs
- compiler/GHC/Core/Predicate.hs
- compiler/GHC/Driver/Backend.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Parser/PostProcess/Haddock.hs
- compiler/GHC/Rename/Names.hs
- libraries/base/GHC/Foreign.hs
- + libraries/base/GHC/Foreign/Internal.hs
- libraries/base/System/Posix/Internals.hs
- libraries/base/base.cabal
- + libraries/base/tests/T13660.hs
- + libraries/base/tests/T13660.stdout
- libraries/base/tests/all.T
- m4/fp_ld_supports_response_files.m4
- testsuite/tests/ghc-api/T10052/T10052.stderr
- testsuite/tests/ghci.debugger/scripts/print007.stderr
- testsuite/tests/ghci/should_fail/T10549.stderr
- testsuite/tests/ghci/should_fail/T10549a.stderr
- testsuite/tests/indexed-types/should_compile/T7837.stderr
- testsuite/tests/safeHaskell/ghci/p14.stderr
- + testsuite/tests/stranal/should_compile/T23398.hs
- + testsuite/tests/stranal/should_compile/T23398.stderr
- testsuite/tests/stranal/should_compile/all.T
- testsuite/tests/th/T8333.stderr
- utils/haddock


Changes:

=====================================
compiler/GHC/Core/Opt/DmdAnal.hs
=====================================
@@ -16,37 +16,41 @@ where
 
 import GHC.Prelude
 
-import GHC.Core.Opt.WorkWrap.Utils
 import GHC.Types.Demand   -- All of it
+
 import GHC.Core
-import GHC.Core.Multiplicity ( scaledThing )
-import GHC.Utils.Outputable
-import GHC.Types.Var.Env
-import GHC.Types.Var.Set
-import GHC.Types.Basic
-import Data.List        ( mapAccumL )
 import GHC.Core.DataCon
-import GHC.Types.ForeignCall ( isSafeForeignCall )
-import GHC.Types.Id
 import GHC.Core.Utils
 import GHC.Core.TyCon
 import GHC.Core.Type
-import GHC.Core.Predicate( isClassPred )
+import GHC.Core.Predicate( isEqualityClass, isCTupleClass )
 import GHC.Core.FVs      ( rulesRhsFreeIds, bndrRuleAndUnfoldingIds )
 import GHC.Core.Coercion ( Coercion )
 import GHC.Core.TyCo.FVs     ( coVarsOfCos )
 import GHC.Core.TyCo.Compare ( eqType )
+import GHC.Core.Multiplicity ( scaledThing )
 import GHC.Core.FamInstEnv
 import GHC.Core.Opt.Arity ( typeArity )
-import GHC.Utils.Misc
-import GHC.Utils.Panic
-import GHC.Utils.Panic.Plain
+import GHC.Core.Opt.WorkWrap.Utils
+
 import GHC.Builtin.PrimOps
 import GHC.Builtin.Types.Prim ( realWorldStatePrimTy )
+
 import GHC.Types.Unique.Set
 import GHC.Types.Unique.MemoFun
 import GHC.Types.RepType
+import GHC.Types.ForeignCall ( isSafeForeignCall )
+import GHC.Types.Id
+import GHC.Types.Var.Env
+import GHC.Types.Var.Set
+import GHC.Types.Basic
 
+import GHC.Utils.Misc
+import GHC.Utils.Panic
+import GHC.Utils.Panic.Plain
+import GHC.Utils.Outputable
+
+import Data.List        ( mapAccumL )
 
 {-
 ************************************************************************
@@ -1499,7 +1503,7 @@ bounds-checking.
 
 So we want to give `indexError` a signature like `<1!P(!S,!S)><1!S><S!S>b`
 where the !S (meaning Poly Unboxed C1N) says that the polymorphic arguments
-are unboxed (recursively).  The wrapper for `indexError` won't /acutally/
+are unboxed (recursively).  The wrapper for `indexError` won't /actually/
 unbox them (because their polymorphic type doesn't allow that) but when
 demand-analysing /callers/, we'll behave as if that call needs the args
 unboxed.
@@ -1782,39 +1786,6 @@ applying the strictness demands to the final result of DmdAnal. The result is
 that we get the strict demand signature we wanted even if we can't float
 the case on `x` up through the case on `burble`.
 
-Note [Do not unbox class dictionaries]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We never unbox class dictionaries in worker/wrapper.
-
-1. INLINABLE functions
-   If we have
-      f :: Ord a => [a] -> Int -> a
-      {-# INLINABLE f #-}
-   and we worker/wrapper f, we'll get a worker with an INLINABLE pragma
-   (see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap),
-   which can still be specialised by the type-class specialiser, something like
-      fw :: Ord a => [a] -> Int# -> a
-
-   BUT if f is strict in the Ord dictionary, we might unpack it, to get
-      fw :: (a->a->Bool) -> [a] -> Int# -> a
-   and the type-class specialiser can't specialise that. An example is #6056.
-
-   Historical note: #14955 describes how I got this fix wrong the first time.
-   I got aware of the issue in T5075 by the change in boxity of loop between
-   demand analysis runs.
-
-2. -fspecialise-aggressively.  As #21286 shows, the same phenomenon can occur
-   occur without INLINABLE, when we use -fexpose-all-unfoldings and
-   -fspecialise-aggressively to do vigorous cross-module specialisation.
-
-3. #18421 found that unboxing a dictionary can also make the worker less likely
-   to inline; the inlining heuristics seem to prefer to inline a function
-   applied to a dictionary over a function applied to a bunch of functions.
-
-TL;DR we /never/ unbox class dictionaries. Unboxing the dictionary, and passing
-a raft of higher-order functions isn't a huge win anyway -- you really want to
-specialise the function.
-
 Note [Worker argument budget]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 In 'finaliseArgBoxities' we don't want to generate workers with zillions of
@@ -1998,22 +1969,13 @@ finaliseArgBoxities env fn threshold_arity rhs_dmds div rhs
 
     arg_triples :: [(Type, StrictnessMark, Demand)]
     arg_triples = take threshold_arity $
-                  [ (bndr_ty, NotMarkedStrict, get_dmd bndr bndr_ty)
-                  | bndr <- bndrs
-                  , isRuntimeVar bndr, let bndr_ty = idType bndr ]
-
-    get_dmd :: Id -> Type -> Demand
-    get_dmd bndr bndr_ty
-      | isClassPred bndr_ty = trimBoxity dmd
-        -- See Note [Do not unbox class dictionaries]
-        -- NB: 'ty' has not been normalised, so this will (rightly)
-        --     catch newtype dictionaries too.
-        -- NB: even for bottoming functions, don't unbox dictionaries
-
-      | is_bot_fn = unboxDeeplyDmd dmd
-        -- See Note [Boxity for bottoming functions], case (B)
-
-      | otherwise = dmd
+                  [ (idType bndr, NotMarkedStrict, get_dmd bndr)
+                  | bndr <- bndrs, isRuntimeVar bndr ]
+
+    get_dmd :: Id -> Demand
+    get_dmd bndr
+      | is_bot_fn = unboxDeeplyDmd dmd -- See Note [Boxity for bottoming functions],
+      | otherwise = dmd                --     case (B)
       where
         dmd = idDemandInfo bndr
 
@@ -2119,6 +2081,12 @@ wantToUnboxArg env ty str_mark dmd@(n :* _)
          -- isMarkedStrict: see Note [Unboxing evaluated arguments] in DmdAnal
        -> DontUnbox
 
+       | doNotUnbox ty
+       -> DontUnbox  -- See Note [Do not unbox class dictionaries]
+                     -- NB: 'ty' has not been normalised, so this will (rightly)
+                     --     catch newtype dictionaries too.
+                     -- NB: even for bottoming functions, don't unbox dictionaries
+
        | DefinitelyRecursive <- ae_rec_dc env dc
          -- See Note [Which types are unboxed?]
          -- and Note [Demand analysis for recursive data constructors]
@@ -2129,6 +2097,76 @@ wantToUnboxArg env ty str_mark dmd@(n :* _)
                         (dataConRepStrictness dc)
                         dmds)
 
+
+doNotUnbox :: Type -> Bool
+-- Do not unbox class dictionaries, except equality classes and tuples
+-- Note [Do not unbox class dictionaries]
+doNotUnbox arg_ty
+  = case tyConAppTyCon_maybe arg_ty of
+      Just tc | Just cls <- tyConClass_maybe tc
+              -> not (isEqualityClass cls || isCTupleClass cls)
+       -- See (DNB2) and (DNB1) in Note [Do not unbox class dictionaries]
+
+      _ -> False
+
+{- Note [Do not unbox class dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We never unbox class dictionaries in worker/wrapper.
+
+1. INLINABLE functions
+   If we have
+      f :: Ord a => [a] -> Int -> a
+      {-# INLINABLE f #-}
+   and we worker/wrapper f, we'll get a worker with an INLINABLE pragma
+   (see Note [Worker/wrapper for INLINABLE functions] in GHC.Core.Opt.WorkWrap),
+   which can still be specialised by the type-class specialiser, something like
+      fw :: Ord a => [a] -> Int# -> a
+
+   BUT if f is strict in the Ord dictionary, we might unpack it, to get
+      fw :: (a->a->Bool) -> [a] -> Int# -> a
+   and the type-class specialiser can't specialise that. An example is #6056.
+
+   Historical note: #14955 describes how I got this fix wrong the first time.
+   I got aware of the issue in T5075 by the change in boxity of loop between
+   demand analysis runs.
+
+2. -fspecialise-aggressively.  As #21286 shows, the same phenomenon can occur
+   occur without INLINABLE, when we use -fexpose-all-unfoldings and
+   -fspecialise-aggressively to do vigorous cross-module specialisation.
+
+3. #18421 found that unboxing a dictionary can also make the worker less likely
+   to inline; the inlining heuristics seem to prefer to inline a function
+   applied to a dictionary over a function applied to a bunch of functions.
+
+TL;DR we /never/ unbox class dictionaries. Unboxing the dictionary, and passing
+a raft of higher-order functions isn't a huge win anyway -- you really want to
+specialise the function.
+
+Wrinkle (DNB1): we /do/ want to unbox tuple dictionaries (#23398)
+     f :: (% Eq a, Show a %) => blah
+  with -fdicts-strict it is great to unbox to
+     $wf :: Eq a => Show a => blah
+  (where I have written out the currying explicitly).  Now we can specialise
+  $wf on the Eq or Show dictionary.  Nothing is lost.
+
+  And something is gained.  It is possible that `f` will look like this:
+     f = /\a. \d:(% Eq a, Show a %). ... f @a (% sel1 d, sel2 d %)...
+  where there is a recurive call to `f`, or to another function that takes the
+  same tuple dictionary, but where the tuple is built from the components of
+  `d`.  The Simplier does not fix this.  But if we unpacked the dictionary
+  we'd get
+     $wf = /\a. \(d1:Eq a) (d2:Show a). let d = (% d1, d2 %)
+             in ...f @a (% sel1 d, sel2 d %)
+  and all the tuple building and taking apart will disappear.
+
+Wrinkle (DNB2): we /do/ wnat to unbox equality dictionaries,
+  for (~), (~~), and Coercible (#23398).  Their payload is a single unboxed
+  coercion.  We never want to specialise on `(t1 ~ t2)`.  All that would do is
+  to make a copy of the function's RHS with a particular coercion.  Unlike
+  normal class methods, that does not unlock any new optimisation
+  opportunities in the specialised RHS.
+-}
+
 {- *********************************************************************
 *                                                                      *
                       Fixpoints


=====================================
compiler/GHC/Core/Predicate.hs
=====================================
@@ -20,7 +20,7 @@ module GHC.Core.Predicate (
 
   -- Class predicates
   mkClassPred, isDictTy, typeDeterminesValue,
-  isClassPred, isEqPredClass, isCTupleClass,
+  isClassPred, isEqPredClass, isCTupleClass, isEqualityClass,
   getClassPredTys, getClassPredTys_maybe,
   classMethodTy, classMethodInstTy,
 
@@ -219,11 +219,6 @@ isEvVarType :: Type -> Bool
 -- See Note [Evidence for quantified constraints]
 isEvVarType ty = isCoVarType ty || isPredTy ty
 
-isEqPredClass :: Class -> Bool
--- True of (~) and (~~)
-isEqPredClass cls =  cls `hasKey` eqTyConKey
-                  || cls `hasKey` heqTyConKey
-
 isClassPred :: PredType -> Bool
 isClassPred ty = case tyConAppTyCon_maybe ty of
     Just tc -> isClassTyCon tc
@@ -245,6 +240,20 @@ isEqPrimPred ty = isCoVarType ty
 isCTupleClass :: Class -> Bool
 isCTupleClass cls = isTupleTyCon (classTyCon cls)
 
+isEqPredClass :: Class -> Bool
+-- True of (~) and (~~)
+isEqPredClass cls =  cls `hasKey` eqTyConKey
+                  || cls `hasKey` heqTyConKey
+
+isEqualityClass :: Class -> Bool
+-- True of (~), (~~), and Coercible
+-- These all have a single primitive-equality superclass, either (~N# or ~R#)
+isEqualityClass cls
+  = cls `hasKey` heqTyConKey
+    || cls `hasKey` eqTyConKey
+    || cls `hasKey` coercibleTyConKey
+
+
 {- *********************************************************************
 *                                                                      *
               Implicit parameters


=====================================
compiler/GHC/Driver/Backend.hs
=====================================
@@ -551,17 +551,14 @@ backendRespectsSpecialise (Named NoBackend)   = False
 
 -- | This back end wants the `mi_globals` field of a
 -- `ModIface` to be populated (with the top-level bindings
--- of the original source).  True for the interpreter, and
--- also true for "no backend", which is used by Haddock.
--- (After typechecking a module, Haddock wants access to
--- the module's `GlobalRdrEnv`.)
+-- of the original source).  Only true for the interpreter.
 backendWantsGlobalBindings :: Backend -> Bool
 backendWantsGlobalBindings (Named NCG)         = False
 backendWantsGlobalBindings (Named LLVM)        = False
 backendWantsGlobalBindings (Named ViaC)        = False
 backendWantsGlobalBindings (Named JavaScript)  = False
+backendWantsGlobalBindings (Named NoBackend)   = False
 backendWantsGlobalBindings (Named Interpreter) = True
-backendWantsGlobalBindings (Named NoBackend)   = True
 
 -- | The back end targets a technology that implements
 -- `switch` natively.  (For example, LLVM or C.) Therefore


=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -3658,9 +3658,10 @@ makeDynFlagsConsistent dflags
  , gopt Opt_UnoptimizedCoreForInterpreter dflags
  , let (dflags', changed) = updOptLevelChanged 0 dflags
  , changed
-    = loop dflags' ("Optimization flags are incompatible with the " ++
-                   backendDescription (backend dflags) ++
-                                          "; optimization flags ignored.")
+    = loop dflags' $
+      "Ignoring optimization flags since they are experimental for the " ++
+      backendDescription (backend dflags) ++
+      ". Pass -fno-unoptimized-core-for-interpreter to enable this feature."
 
  | LinkInMemory <- ghcLink dflags
  , not (gopt Opt_ExternalInterpreter dflags)


=====================================
compiler/GHC/Parser/PostProcess/Haddock.hs
=====================================
@@ -1360,11 +1360,15 @@ mkDocIE (L l_comment hdk_comment) =
         span = mkSrcSpanPs l_comment
 
 mkDocNext :: PsLocated HdkComment -> Maybe (Located HsDocString)
-mkDocNext (L l (HdkCommentNext doc)) = Just (L (mkSrcSpanPs l) doc)
+mkDocNext (L l (HdkCommentNext doc)) =
+    let !src_span = mkSrcSpanPs l
+    in Just (L src_span doc)
 mkDocNext _ = Nothing
 
 mkDocPrev :: PsLocated HdkComment -> Maybe (Located HsDocString)
-mkDocPrev (L l (HdkCommentPrev doc)) = Just (L (mkSrcSpanPs l) doc)
+mkDocPrev (L l (HdkCommentPrev doc)) =
+    let !src_span = mkSrcSpanPs l
+    in Just (L src_span doc)
 mkDocPrev _ = Nothing
 
 


=====================================
compiler/GHC/Rename/Names.hs
=====================================
@@ -12,8 +12,6 @@ Extracting imported and top-level names in scope
 {-# LANGUAGE TypeFamilies #-}
 {-# LANGUAGE LambdaCase #-}
 
-{-# OPTIONS_GHC -ddump-to-file -ddump-simpl #-}
-
 module GHC.Rename.Names (
         rnImports, getLocalNonValBinders, newRecordFieldLabel,
         extendGlobalRdrEnvRn,


=====================================
libraries/base/GHC/Foreign.hs
=====================================
@@ -21,312 +21,22 @@ module GHC.Foreign (
     -- * C strings with a configurable encoding
     CString, CStringLen,
 
-    -- conversion of C strings into Haskell strings
-    --
+    -- * Conversion of C strings into Haskell strings
     peekCString,
     peekCStringLen,
 
-    -- conversion of Haskell strings into C strings
-    --
+    -- * Conversion of Haskell strings into C strings
     newCString,
     newCStringLen,
+    newCStringLen0,
 
-    -- conversion of Haskell strings into C strings using temporary storage
-    --
+    -- * Conversion of Haskell strings into C strings using temporary storage
     withCString,
     withCStringLen,
+    withCStringLen0,
     withCStringsLen,
 
     charIsRepresentable,
   ) where
 
-import Foreign.Marshal.Array
-import Foreign.C.Types
-import Foreign.Ptr
-import Foreign.Storable
-
-import Data.Word
-
--- Imports for the locale-encoding version of marshallers
-
-import Data.Tuple (fst)
-
-import GHC.Show ( show )
-
-import Foreign.Marshal.Alloc
-import Foreign.ForeignPtr
-
-import GHC.Debug
-import GHC.List
-import GHC.Num
-import GHC.Base
-
-import GHC.IO
-import GHC.IO.Exception
-import GHC.IO.Buffer
-import GHC.IO.Encoding.Types
-
-
-c_DEBUG_DUMP :: Bool
-c_DEBUG_DUMP = False
-
-putDebugMsg :: String -> IO ()
-putDebugMsg | c_DEBUG_DUMP = debugLn
-            | otherwise    = const (return ())
-
-
--- | A C string is a reference to an array of C characters terminated by NUL.
-type CString    = Ptr CChar
-
--- | A string with explicit length information in bytes instead of a
--- terminating NUL (allowing NUL characters in the middle of the string).
-type CStringLen = (Ptr CChar, Int)
-
--- exported functions
--- ------------------
-
--- | Marshal a NUL terminated C string into a Haskell string.
---
-peekCString    :: TextEncoding -> CString -> IO String
-peekCString enc cp = do
-    sz <- lengthArray0 nUL cp
-    peekEncodedCString enc (cp, sz * cCharSize)
-
--- | Marshal a C string with explicit length into a Haskell string.
---
-peekCStringLen           :: TextEncoding -> CStringLen -> IO String
-peekCStringLen = peekEncodedCString
-
--- | Marshal a Haskell string into a NUL terminated C string.
---
--- * the Haskell string may /not/ contain any NUL characters
---
--- * new storage is allocated for the C string and must be
---   explicitly freed using 'Foreign.Marshal.Alloc.free' or
---   'Foreign.Marshal.Alloc.finalizerFree'.
---
-newCString :: TextEncoding -> String -> IO CString
-newCString enc = liftM fst . newEncodedCString enc True
-
--- | Marshal a Haskell string into a C string (ie, character array) with
--- explicit length information.
---
--- * new storage is allocated for the C string and must be
---   explicitly freed using 'Foreign.Marshal.Alloc.free' or
---   'Foreign.Marshal.Alloc.finalizerFree'.
---
-newCStringLen     :: TextEncoding -> String -> IO CStringLen
-newCStringLen enc = newEncodedCString enc False
-
--- | Marshal a Haskell string into a NUL terminated C string using temporary
--- storage.
---
--- * the Haskell string may /not/ contain any NUL characters
---
--- * the memory is freed when the subcomputation terminates (either
---   normally or via an exception), so the pointer to the temporary
---   storage must /not/ be used after this.
---
-withCString :: TextEncoding -> String -> (CString -> IO a) -> IO a
-withCString enc s act = withEncodedCString enc True s $ \(cp, _sz) -> act cp
-
--- | Marshal a Haskell string into a C string (ie, character array)
--- in temporary storage, with explicit length information.
---
--- * the memory is freed when the subcomputation terminates (either
---   normally or via an exception), so the pointer to the temporary
---   storage must /not/ be used after this.
---
-withCStringLen         :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
-withCStringLen enc = withEncodedCString enc False
-
--- | Marshal a list of Haskell strings into an array of NUL terminated C strings
--- using temporary storage.
---
--- * the Haskell strings may /not/ contain any NUL characters
---
--- * the memory is freed when the subcomputation terminates (either
---   normally or via an exception), so the pointer to the temporary
---   storage must /not/ be used after this.
---
-withCStringsLen :: TextEncoding
-                -> [String]
-                -> (Int -> Ptr CString -> IO a)
-                -> IO a
-withCStringsLen enc strs f = go [] strs
-  where
-  go cs (s:ss) = withCString enc s $ \c -> go (c:cs) ss
-  go cs [] = withArrayLen (reverse cs) f
-
--- | Determines whether a character can be accurately encoded in a
--- 'Foreign.C.String.CString'.
---
--- Pretty much anyone who uses this function is in a state of sin because
--- whether or not a character is encodable will, in general, depend on the
--- context in which it occurs.
-charIsRepresentable :: TextEncoding -> Char -> IO Bool
--- We force enc explicitly because `catch` is lazy in its
--- first argument. We would probably like to force c as well,
--- but unfortunately worker/wrapper produces very bad code for
--- that.
---
--- TODO If this function is performance-critical, it would probably
--- pay to use a single-character specialization of withCString. That
--- would allow worker/wrapper to actually eliminate Char boxes, and
--- would also get rid of the completely unnecessary cons allocation.
-charIsRepresentable !enc c =
-  withCString enc [c]
-              (\cstr -> do str <- peekCString enc cstr
-                           case str of
-                             [ch] | ch == c -> pure True
-                             _ -> pure False)
-    `catch`
-       \(_ :: IOException) -> pure False
-
--- auxiliary definitions
--- ----------------------
-
--- C's end of string character
-nUL :: CChar
-nUL  = 0
-
--- Size of a CChar in bytes
-cCharSize :: Int
-cCharSize = sizeOf (undefined :: CChar)
-
-
-{-# INLINE peekEncodedCString #-}
-peekEncodedCString :: TextEncoding -- ^ Encoding of CString
-                   -> CStringLen
-                   -> IO String    -- ^ String in Haskell terms
-peekEncodedCString (TextEncoding { mkTextDecoder = mk_decoder }) (p, sz_bytes)
-  = bracket mk_decoder close $ \decoder -> do
-      let chunk_size = sz_bytes `max` 1 -- Decode buffer chunk size in characters: one iteration only for ASCII
-      !from0 <- fmap (\fp -> bufferAdd sz_bytes (emptyBuffer fp sz_bytes ReadBuffer)) $ newForeignPtr_ (castPtr p)
-      !to    <- newCharBuffer chunk_size WriteBuffer
-
-      let go !iteration !from = do
-            (why, from', !to') <- encode decoder from to
-            if isEmptyBuffer from'
-             then
-              -- No input remaining: @why@ will be InputUnderflow, but we don't care
-              withBuffer to' $ peekArray (bufferElems to')
-             else do
-              -- Input remaining: what went wrong?
-              putDebugMsg ("peekEncodedCString: " ++ show iteration ++ " " ++ show why)
-              (from'', to'') <- case why of InvalidSequence -> recover decoder from' to' -- These conditions are equally bad because
-                                            InputUnderflow  -> recover decoder from' to' -- they indicate malformed/truncated input
-                                            OutputUnderflow -> return (from', to')       -- We will have more space next time round
-              putDebugMsg ("peekEncodedCString: from " ++ summaryBuffer from ++ " " ++ summaryBuffer from' ++ " " ++ summaryBuffer from'')
-              putDebugMsg ("peekEncodedCString: to " ++ summaryBuffer to ++ " " ++ summaryBuffer to' ++ " " ++ summaryBuffer to'')
-              to_chars <- withBuffer to'' $ peekArray (bufferElems to'')
-              fmap (to_chars++) $ go (iteration + 1) from''
-
-      go (0 :: Int) from0
-
-{-# INLINE withEncodedCString #-}
-withEncodedCString :: TextEncoding         -- ^ Encoding of CString to create
-                   -> Bool                 -- ^ Null-terminate?
-                   -> String               -- ^ String to encode
-                   -> (CStringLen -> IO a) -- ^ Worker that can safely use the allocated memory
-                   -> IO a
-withEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s act
-  = bracket mk_encoder close $ \encoder -> withArrayLen s $ \sz p -> do
-      from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p
-
-      let go !iteration to_sz_bytes = do
-           putDebugMsg ("withEncodedCString: " ++ show iteration)
-           allocaBytes to_sz_bytes $ \to_p -> do
-            -- See Note [Check *before* fill in withEncodedCString] about why
-            -- this is subtle.
-            mb_res <- tryFillBuffer encoder null_terminate from to_p to_sz_bytes
-            case mb_res of
-              Nothing  -> go (iteration + 1) (to_sz_bytes * 2)
-              Just to_buf -> withCStringBuffer to_buf null_terminate act
-
-      -- If the input string is ASCII, this value will ensure we only allocate once
-      go (0 :: Int) (cCharSize * (sz + 1))
-
-withCStringBuffer :: Buffer Word8 -> Bool -> (CStringLen -> IO r) -> IO r
-withCStringBuffer to_buf null_terminate act = do
-  let bytes = bufferElems to_buf
-  withBuffer to_buf $ \to_ptr -> do
-    when null_terminate $ pokeElemOff to_ptr (bufR to_buf) 0
-    act (castPtr to_ptr, bytes) -- NB: the length information is specified as being in *bytes*
-
-{-# INLINE newEncodedCString #-}
-newEncodedCString :: TextEncoding  -- ^ Encoding of CString to create
-                  -> Bool          -- ^ Null-terminate?
-                  -> String        -- ^ String to encode
-                  -> IO CStringLen
-newEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s
-  = bracket mk_encoder close $ \encoder -> withArrayLen s $ \sz p -> do
-      from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p
-
-      let go !iteration to_p to_sz_bytes = do
-           putDebugMsg ("newEncodedCString: " ++ show iteration)
-           mb_res <- tryFillBuffer encoder null_terminate from to_p to_sz_bytes
-           case mb_res of
-             Nothing  -> do
-                 let to_sz_bytes' = to_sz_bytes * 2
-                 to_p' <- reallocBytes to_p to_sz_bytes'
-                 go (iteration + 1) to_p' to_sz_bytes'
-             Just to_buf -> withCStringBuffer to_buf null_terminate return
-
-      -- If the input string is ASCII, this value will ensure we only allocate once
-      let to_sz_bytes = cCharSize * (sz + 1)
-      to_p <- mallocBytes to_sz_bytes
-      go (0 :: Int) to_p to_sz_bytes
-
-
-tryFillBuffer :: TextEncoder dstate -> Bool -> Buffer Char -> Ptr Word8 -> Int
-                    ->  IO (Maybe (Buffer Word8))
-tryFillBuffer encoder null_terminate from0 to_p !to_sz_bytes = do
-    !to_fp <- newForeignPtr_ to_p
-    go (0 :: Int) from0 (emptyBuffer to_fp to_sz_bytes WriteBuffer)
-  where
-    go !iteration !from !to = do
-      (why, from', to') <- encode encoder from to
-      putDebugMsg ("tryFillBufferAndCall: " ++ show iteration ++ " " ++ show why ++ " " ++ summaryBuffer from ++ " " ++ summaryBuffer from')
-      if isEmptyBuffer from'
-       then if null_terminate && bufferAvailable to' == 0
-             then return Nothing -- We had enough for the string but not the terminator: ask the caller for more buffer
-             else return (Just to')
-       else case why of -- We didn't consume all of the input
-              InputUnderflow  -> recover encoder from' to' >>= \(a,b) -> go (iteration + 1) a b -- These conditions are equally bad
-              InvalidSequence -> recover encoder from' to' >>= \(a,b) -> go (iteration + 1) a b -- since the input was truncated/invalid
-              OutputUnderflow -> return Nothing -- Oops, out of buffer during decoding: ask the caller for more
-{-
-Note [Check *before* fill in withEncodedCString]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It's very important that the size check and readjustment peformed by tryFillBuffer
-happens before the continuation is called. The size check is the part which can
-fail, the call to the continuation never fails and so the caller should respond
-first to the size check failing and *then* call the continuation. Making this evident
-to the compiler avoids historic space leaks.
-
-In a previous iteration of this code we had a pattern that, somewhat simplified,
-looked like this:
-
-go :: State -> (State -> IO a) -> IO a
-go state action =
-    case tryFillBufferAndCall state action of
-        Left state' -> go state' action
-        Right result -> result
-
-`tryFillBufferAndCall` performed some checks, and then we either called action,
-or we modified the state and tried again.
-This went wrong because `action` can be a function closure containing a reference to
-a lazy data structure. If we call action directly, without retaining any references
-to action, that is fine. The data structure is consumed as it is produced and we operate
-in constant space.
-
-However the failure branch `go state' action` *does* capture a reference to action.
-This went wrong because the reference to action in the failure branch only becomes
-unreachable *after* action returns. This means we keep alive the function closure
-for `action` until `action` returns. Which in turn keeps alive the *whole* lazy list
-via `action` until the action has fully run.
-This went wrong in #20107, where the continuation kept an entire lazy bytestring alive
-rather than allowing it to be incrementally consumed and collected.
--}
-
+import GHC.Foreign.Internal


=====================================
libraries/base/GHC/Foreign/Internal.hs
=====================================
@@ -0,0 +1,357 @@
+{-# LANGUAGE Trustworthy #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE BangPatterns #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  GHC.Foreign.Internal
+-- Copyright   :  (c) The University of Glasgow, 2008-2011
+-- License     :  see libraries/base/LICENSE
+--
+-- Maintainer  :  libraries at haskell.org
+-- Stability   :  internal
+-- Portability :  non-portable
+--
+-- Foreign marshalling support for CStrings with configurable encodings
+--
+-----------------------------------------------------------------------------
+
+module GHC.Foreign.Internal (
+    -- * C strings with a configurable encoding
+    CString, CStringLen,
+
+    -- * Conversion of C strings into Haskell strings
+    peekCString,
+    peekCStringLen,
+
+    -- * Conversion of Haskell strings into C strings
+    newCString,
+    newCStringLen,
+    newCStringLen0,
+
+    -- * Conversion of Haskell strings into C strings using temporary storage
+    withCString,
+    withCStringLen,
+    withCStringLen0,
+    withCStringsLen,
+
+    charIsRepresentable,
+  ) where
+
+import Foreign.Marshal.Array
+import Foreign.C.Types
+import Foreign.Ptr
+import Foreign.Storable
+
+import Data.Word
+
+-- Imports for the locale-encoding version of marshallers
+
+import Data.Tuple (fst)
+
+import GHC.Show ( show )
+
+import Foreign.Marshal.Alloc
+import Foreign.ForeignPtr
+
+import GHC.Debug
+import GHC.List
+import GHC.Num
+import GHC.Base
+
+import GHC.IO
+import GHC.IO.Exception
+import GHC.IO.Buffer
+import GHC.IO.Encoding.Types
+
+
+c_DEBUG_DUMP :: Bool
+c_DEBUG_DUMP = False
+
+putDebugMsg :: String -> IO ()
+putDebugMsg | c_DEBUG_DUMP = debugLn
+            | otherwise    = const (return ())
+
+
+-- | A C string is a reference to an array of C characters terminated by NUL.
+type CString    = Ptr CChar
+
+-- | A string with explicit length information in bytes instead of a
+-- terminating NUL (allowing NUL characters in the middle of the string).
+type CStringLen = (Ptr CChar, Int)
+
+-- exported functions
+-- ------------------
+
+-- | Marshal a NUL terminated C string into a Haskell string.
+--
+peekCString    :: TextEncoding -> CString -> IO String
+peekCString enc cp = do
+    sz <- lengthArray0 nUL cp
+    peekEncodedCString enc (cp, sz * cCharSize)
+
+-- | Marshal a C string with explicit length into a Haskell string.
+--
+peekCStringLen           :: TextEncoding -> CStringLen -> IO String
+peekCStringLen = peekEncodedCString
+
+-- | Marshal a Haskell string into a NUL terminated C string.
+--
+-- * the Haskell string may /not/ contain any NUL characters
+--
+-- * new storage is allocated for the C string and must be
+--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
+--   'Foreign.Marshal.Alloc.finalizerFree'.
+--
+newCString :: TextEncoding -> String -> IO CString
+newCString enc = liftM fst . newEncodedCString enc True
+
+-- | Marshal a Haskell string into a C string (ie, character array) with
+-- explicit length information.
+--
+-- Note that this does not NUL terminate the resulting string.
+--
+-- * new storage is allocated for the C string and must be
+--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
+--   'Foreign.Marshal.Alloc.finalizerFree'.
+--
+newCStringLen     :: TextEncoding -> String -> IO CStringLen
+newCStringLen enc = newEncodedCString enc False
+
+-- | Marshal a Haskell string into a NUL terminated C string using temporary
+-- storage.
+--
+-- * the Haskell string may /not/ contain any NUL characters
+--
+-- * the memory is freed when the subcomputation terminates (either
+--   normally or via an exception), so the pointer to the temporary
+--   storage must /not/ be used after this.
+--
+withCString :: TextEncoding -> String -> (CString -> IO a) -> IO a
+withCString enc s act = withEncodedCString enc True s $ \(cp, _sz) -> act cp
+
+-- | Marshal a Haskell string into a C string (ie, character array)
+-- in temporary storage, with explicit length information.
+--
+-- Note that this does not NUL terminate the resulting string.
+--
+-- * the memory is freed when the subcomputation terminates (either
+--   normally or via an exception), so the pointer to the temporary
+--   storage must /not/ be used after this.
+--
+withCStringLen         :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
+withCStringLen enc = withEncodedCString enc False
+
+-- | Marshal a Haskell string into a NUL-terminated C string (ie, character array)
+-- with explicit length information.
+--
+-- * new storage is allocated for the C string and must be
+--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
+--   'Foreign.Marshal.Alloc.finalizerFree'.
+--
+-- @since 4.19.0.0
+newCStringLen0     :: TextEncoding -> String -> IO CStringLen
+newCStringLen0 enc = newEncodedCString enc True
+
+-- | Marshal a Haskell string into a NUL-terminated C string (ie, character array)
+-- in temporary storage, with explicit length information.
+--
+-- * the memory is freed when the subcomputation terminates (either
+--   normally or via an exception), so the pointer to the temporary
+--   storage must /not/ be used after this.
+--
+-- @since 4.19.0.0
+withCStringLen0         :: TextEncoding -> String -> (CStringLen -> IO a) -> IO a
+withCStringLen0 enc = withEncodedCString enc True
+
+-- | Marshal a list of Haskell strings into an array of NUL terminated C strings
+-- using temporary storage.
+--
+-- * the Haskell strings may /not/ contain any NUL characters
+--
+-- * the memory is freed when the subcomputation terminates (either
+--   normally or via an exception), so the pointer to the temporary
+--   storage must /not/ be used after this.
+--
+withCStringsLen :: TextEncoding
+                -> [String]
+                -> (Int -> Ptr CString -> IO a)
+                -> IO a
+withCStringsLen enc strs f = go [] strs
+  where
+  go cs (s:ss) = withCString enc s $ \c -> go (c:cs) ss
+  go cs [] = withArrayLen (reverse cs) f
+
+-- | Determines whether a character can be accurately encoded in a
+-- 'Foreign.C.String.CString'.
+--
+-- Pretty much anyone who uses this function is in a state of sin because
+-- whether or not a character is encodable will, in general, depend on the
+-- context in which it occurs.
+charIsRepresentable :: TextEncoding -> Char -> IO Bool
+-- We force enc explicitly because `catch` is lazy in its
+-- first argument. We would probably like to force c as well,
+-- but unfortunately worker/wrapper produces very bad code for
+-- that.
+--
+-- TODO If this function is performance-critical, it would probably
+-- pay to use a single-character specialization of withCString. That
+-- would allow worker/wrapper to actually eliminate Char boxes, and
+-- would also get rid of the completely unnecessary cons allocation.
+charIsRepresentable !enc c =
+  withCString enc [c]
+              (\cstr -> do str <- peekCString enc cstr
+                           case str of
+                             [ch] | ch == c -> pure True
+                             _ -> pure False)
+    `catch`
+       \(_ :: IOException) -> pure False
+
+-- auxiliary definitions
+-- ----------------------
+
+-- C's end of string character
+nUL :: CChar
+nUL  = 0
+
+-- Size of a CChar in bytes
+cCharSize :: Int
+cCharSize = sizeOf (undefined :: CChar)
+
+
+{-# INLINE peekEncodedCString #-}
+peekEncodedCString :: TextEncoding -- ^ Encoding of CString
+                   -> CStringLen
+                   -> IO String    -- ^ String in Haskell terms
+peekEncodedCString (TextEncoding { mkTextDecoder = mk_decoder }) (p, sz_bytes)
+  = bracket mk_decoder close $ \decoder -> do
+      let chunk_size = sz_bytes `max` 1 -- Decode buffer chunk size in characters: one iteration only for ASCII
+      !from0 <- fmap (\fp -> bufferAdd sz_bytes (emptyBuffer fp sz_bytes ReadBuffer)) $ newForeignPtr_ (castPtr p)
+      !to    <- newCharBuffer chunk_size WriteBuffer
+
+      let go !iteration !from = do
+            (why, from', !to') <- encode decoder from to
+            if isEmptyBuffer from'
+             then
+              -- No input remaining: @why@ will be InputUnderflow, but we don't care
+              withBuffer to' $ peekArray (bufferElems to')
+             else do
+              -- Input remaining: what went wrong?
+              putDebugMsg ("peekEncodedCString: " ++ show iteration ++ " " ++ show why)
+              (from'', to'') <- case why of InvalidSequence -> recover decoder from' to' -- These conditions are equally bad because
+                                            InputUnderflow  -> recover decoder from' to' -- they indicate malformed/truncated input
+                                            OutputUnderflow -> return (from', to')       -- We will have more space next time round
+              putDebugMsg ("peekEncodedCString: from " ++ summaryBuffer from ++ " " ++ summaryBuffer from' ++ " " ++ summaryBuffer from'')
+              putDebugMsg ("peekEncodedCString: to " ++ summaryBuffer to ++ " " ++ summaryBuffer to' ++ " " ++ summaryBuffer to'')
+              to_chars <- withBuffer to'' $ peekArray (bufferElems to'')
+              fmap (to_chars++) $ go (iteration + 1) from''
+
+      go (0 :: Int) from0
+
+{-# INLINE withEncodedCString #-}
+withEncodedCString :: TextEncoding         -- ^ Encoding of CString to create
+                   -> Bool                 -- ^ Null-terminate?
+                   -> String               -- ^ String to encode
+                   -> (CStringLen -> IO a) -- ^ Worker that can safely use the allocated memory
+                   -> IO a
+withEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s act
+  = bracket mk_encoder close $ \encoder -> withArrayLen s $ \sz p -> do
+      from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p
+
+      let go !iteration to_sz_bytes = do
+           putDebugMsg ("withEncodedCString: " ++ show iteration)
+           allocaBytes to_sz_bytes $ \to_p -> do
+            -- See Note [Check *before* fill in withEncodedCString] about why
+            -- this is subtle.
+            mb_res <- tryFillBuffer encoder null_terminate from to_p to_sz_bytes
+            case mb_res of
+              Nothing  -> go (iteration + 1) (to_sz_bytes * 2)
+              Just to_buf -> withCStringBuffer to_buf null_terminate act
+
+      -- If the input string is ASCII, this value will ensure we only allocate once
+      go (0 :: Int) (cCharSize * (sz + 1))
+
+withCStringBuffer :: Buffer Word8 -> Bool -> (CStringLen -> IO r) -> IO r
+withCStringBuffer to_buf null_terminate act = do
+  let bytes = bufferElems to_buf
+  withBuffer to_buf $ \to_ptr -> do
+    when null_terminate $ pokeElemOff to_ptr (bufR to_buf) 0
+    act (castPtr to_ptr, bytes) -- NB: the length information is specified as being in *bytes*
+
+{-# INLINE newEncodedCString #-}
+newEncodedCString :: TextEncoding  -- ^ Encoding of CString to create
+                  -> Bool          -- ^ Null-terminate?
+                  -> String        -- ^ String to encode
+                  -> IO CStringLen
+newEncodedCString (TextEncoding { mkTextEncoder = mk_encoder }) null_terminate s
+  = bracket mk_encoder close $ \encoder -> withArrayLen s $ \sz p -> do
+      from <- fmap (\fp -> bufferAdd sz (emptyBuffer fp sz ReadBuffer)) $ newForeignPtr_ p
+
+      let go !iteration to_p to_sz_bytes = do
+           putDebugMsg ("newEncodedCString: " ++ show iteration)
+           mb_res <- tryFillBuffer encoder null_terminate from to_p to_sz_bytes
+           case mb_res of
+             Nothing  -> do
+                 let to_sz_bytes' = to_sz_bytes * 2
+                 to_p' <- reallocBytes to_p to_sz_bytes'
+                 go (iteration + 1) to_p' to_sz_bytes'
+             Just to_buf -> withCStringBuffer to_buf null_terminate return
+
+      -- If the input string is ASCII, this value will ensure we only allocate once
+      let to_sz_bytes = cCharSize * (sz + 1)
+      to_p <- mallocBytes to_sz_bytes
+      go (0 :: Int) to_p to_sz_bytes
+
+
+tryFillBuffer :: TextEncoder dstate -> Bool -> Buffer Char -> Ptr Word8 -> Int
+                    ->  IO (Maybe (Buffer Word8))
+tryFillBuffer encoder null_terminate from0 to_p !to_sz_bytes = do
+    !to_fp <- newForeignPtr_ to_p
+    go (0 :: Int) from0 (emptyBuffer to_fp to_sz_bytes WriteBuffer)
+  where
+    go !iteration !from !to = do
+      (why, from', to') <- encode encoder from to
+      putDebugMsg ("tryFillBufferAndCall: " ++ show iteration ++ " " ++ show why ++ " " ++ summaryBuffer from ++ " " ++ summaryBuffer from')
+      if isEmptyBuffer from'
+       then if null_terminate && bufferAvailable to' == 0
+             then return Nothing -- We had enough for the string but not the terminator: ask the caller for more buffer
+             else return (Just to')
+       else case why of -- We didn't consume all of the input
+              InputUnderflow  -> recover encoder from' to' >>= \(a,b) -> go (iteration + 1) a b -- These conditions are equally bad
+              InvalidSequence -> recover encoder from' to' >>= \(a,b) -> go (iteration + 1) a b -- since the input was truncated/invalid
+              OutputUnderflow -> return Nothing -- Oops, out of buffer during decoding: ask the caller for more
+{-
+Note [Check *before* fill in withEncodedCString]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's very important that the size check and readjustment peformed by tryFillBuffer
+happens before the continuation is called. The size check is the part which can
+fail, the call to the continuation never fails and so the caller should respond
+first to the size check failing and *then* call the continuation. Making this evident
+to the compiler avoids historic space leaks.
+
+In a previous iteration of this code we had a pattern that, somewhat simplified,
+looked like this:
+
+go :: State -> (State -> IO a) -> IO a
+go state action =
+    case tryFillBufferAndCall state action of
+        Left state' -> go state' action
+        Right result -> result
+
+`tryFillBufferAndCall` performed some checks, and then we either called action,
+or we modified the state and tried again.
+This went wrong because `action` can be a function closure containing a reference to
+a lazy data structure. If we call action directly, without retaining any references
+to action, that is fine. The data structure is consumed as it is produced and we operate
+in constant space.
+
+However the failure branch `go state' action` *does* capture a reference to action.
+This went wrong because the reference to action in the failure branch only becomes
+unreachable *after* action returns. This means we keep alive the function closure
+for `action` until `action` returns. Which in turn keeps alive the *whole* lazy list
+via `action` until the action has fully run.
+This went wrong in #20107, where the continuation kept an entire lazy bytestring alive
+rather than allowing it to be incrementally consumed and collected.
+-}
+


=====================================
libraries/base/System/Posix/Internals.hs
=====================================
@@ -34,7 +34,6 @@ import System.Posix.Types
 import Foreign
 import Foreign.C
 
--- import Data.Bits
 import Data.Maybe
 
 #if !defined(HTYPE_TCFLAG_T)
@@ -51,6 +50,9 @@ import GHC.IO.Device
 #if !defined(mingw32_HOST_OS)
 import {-# SOURCE #-} GHC.IO.Encoding (getFileSystemEncoding)
 import qualified GHC.Foreign as GHC
+import GHC.Ptr
+#else
+import Data.OldList (elem)
 #endif
 
 -- ---------------------------------------------------------------------------
@@ -164,13 +166,23 @@ fdGetMode fd = do
 
 #if defined(mingw32_HOST_OS)
 withFilePath :: FilePath -> (CWString -> IO a) -> IO a
-withFilePath = withCWString
+withFilePath fp f = do
+    checkForInteriorNuls fp
+    withCWString fp f
 
 newFilePath :: FilePath -> IO CWString
-newFilePath = newCWString
+newFilePath fp = do
+    checkForInteriorNuls fp
+    newCWString fp
 
 peekFilePath :: CWString -> IO FilePath
 peekFilePath = peekCWString
+
+-- | Check a 'FilePath' for internal NUL codepoints as these are
+-- disallowed in Windows filepaths. See #13660.
+checkForInteriorNuls :: FilePath -> IO ()
+checkForInteriorNuls fp = when ('\0' `elem` fp) (throwInternalNulError fp)
+
 #else
 
 withFilePath :: FilePath -> (CString -> IO a) -> IO a
@@ -178,13 +190,43 @@ newFilePath :: FilePath -> IO CString
 peekFilePath :: CString -> IO FilePath
 peekFilePathLen :: CStringLen -> IO FilePath
 
-withFilePath fp f = getFileSystemEncoding >>= \enc -> GHC.withCString enc fp f
-newFilePath fp = getFileSystemEncoding >>= \enc -> GHC.newCString enc fp
+withFilePath fp f = do
+    enc <- getFileSystemEncoding
+    GHC.withCStringLen0 enc fp $ \(str, len) -> do
+        checkForInteriorNuls fp (str, len)
+        f str
+newFilePath fp = do
+    enc <- getFileSystemEncoding
+    (str, len) <- GHC.newCStringLen0 enc fp
+    checkForInteriorNuls fp (str, len)
+    return str
 peekFilePath fp = getFileSystemEncoding >>= \enc -> GHC.peekCString enc fp
 peekFilePathLen fp = getFileSystemEncoding >>= \enc -> GHC.peekCStringLen enc fp
 
+-- | Check an encoded 'FilePath' for internal NUL octets as these are
+-- disallowed in POSIX filepaths. See #13660.
+checkForInteriorNuls :: FilePath -> CStringLen -> IO ()
+checkForInteriorNuls fp (str, len) =
+    when (len' /= len) (throwInternalNulError fp)
+    -- N.B. If the string contains internal NUL codeunits then the strlen will
+    -- indicate a size smaller than that returned by withCStringLen.
+  where
+    len' = case str of Ptr ptr -> I# (cstringLength# ptr)
 #endif
 
+throwInternalNulError :: FilePath -> IO a
+throwInternalNulError fp = ioError err
+  where
+    err =
+      IOError
+        { ioe_handle = Nothing
+        , ioe_type = InvalidArgument
+        , ioe_location = "checkForInteriorNuls"
+        , ioe_description = "FilePaths must not contain internal NUL code units."
+        , ioe_errno = Nothing
+        , ioe_filename = Just fp
+        }
+
 -- ---------------------------------------------------------------------------
 -- Terminal-related stuff
 


=====================================
libraries/base/base.cabal
=====================================
@@ -351,6 +351,7 @@ Library
         GHC.Event.IntVar
         GHC.Event.PSQ
         GHC.Event.Unique
+        GHC.Foreign.Internal
         -- GHC.IOPort -- TODO: hide again after debug
         GHC.Unicode.Internal.Bits
         GHC.Unicode.Internal.Char.DerivedCoreProperties


=====================================
libraries/base/tests/T13660.hs
=====================================
@@ -0,0 +1,11 @@
+-- | This should print an InvalidArgument error complaining that
+-- the file path contains a NUL octet.
+module Main where
+
+import System.IO.Error
+
+main :: IO ()
+main = do
+    catchIOError
+      (writeFile "hello\x00world" "hello")
+      print


=====================================
libraries/base/tests/T13660.stdout
=====================================
Binary files /dev/null and b/libraries/base/tests/T13660.stdout differ


=====================================
libraries/base/tests/all.T
=====================================
@@ -256,6 +256,7 @@ test('T13191',
       ['-O'])
 test('T13525', [when(opsys('mingw32'), skip), js_broken(22374), req_process], compile_and_run, [''])
 test('T13097', normal, compile_and_run, [''])
+test('T13660', when(opsys('mingw32'), skip), compile_and_run, [''])
 test('functorOperators', normal, compile_and_run, [''])
 test('T3474',
      [collect_stats('max_bytes_used',5),


=====================================
m4/fp_ld_supports_response_files.m4
=====================================
@@ -5,7 +5,7 @@ AC_DEFUN([FP_LD_SUPPORTS_RESPONSE_FILES], [
     AC_MSG_CHECKING([whether $LD supports response files])
     echo 'int main(void) {return 0;}' > conftest.c
     "$CC" -c -o conftest.o conftest.c > /dev/null 2>&1
-    printf '%q\n' -o conftest conftest.o > args.txt
+    printf "-o\nconftest\nconftest.o\n" > args.txt
     if "$LD" -shared @args.txt > /dev/null 2>&1 || "$LD" -dylib @args.txt > /dev/null 2>&1
     then
         LdSupportsResponseFiles=YES


=====================================
testsuite/tests/ghc-api/T10052/T10052.stderr
=====================================
@@ -1,3 +1,3 @@
 
 when making flags consistent: warning:
-    Optimization flags are incompatible with the byte-code interpreter; optimization flags ignored.
+    Ignoring optimization flags since they are experimental for the byte-code interpreter. Pass -fno-unoptimized-core-for-interpreter to enable this feature.


=====================================
testsuite/tests/ghci.debugger/scripts/print007.stderr
=====================================
@@ -1,3 +1,3 @@
 
 when making flags consistent: warning:
-    Optimization flags are incompatible with the byte-code interpreter; optimization flags ignored.
+    Ignoring optimization flags since they are experimental for the byte-code interpreter. Pass -fno-unoptimized-core-for-interpreter to enable this feature.


=====================================
testsuite/tests/ghci/should_fail/T10549.stderr
=====================================
@@ -1,3 +1,3 @@
 
 when making flags consistent: warning:
-    Optimization flags are incompatible with the byte-code interpreter; optimization flags ignored.
+    Ignoring optimization flags since they are experimental for the byte-code interpreter. Pass -fno-unoptimized-core-for-interpreter to enable this feature.


=====================================
testsuite/tests/ghci/should_fail/T10549a.stderr
=====================================
@@ -1,3 +1,3 @@
 
 when making flags consistent: warning:
-    Optimization flags are incompatible with the byte-code interpreter; optimization flags ignored.
+    Ignoring optimization flags since they are experimental for the byte-code interpreter. Pass -fno-unoptimized-core-for-interpreter to enable this feature.


=====================================
testsuite/tests/indexed-types/should_compile/T7837.stderr
=====================================
@@ -1,3 +1,4 @@
 Rule fired: Class op signum (BUILTIN)
 Rule fired: Class op abs (BUILTIN)
 Rule fired: normalize/Double (T7837)
+Rule fired: Class op eq_sel (BUILTIN)


=====================================
testsuite/tests/safeHaskell/ghci/p14.stderr
=====================================
@@ -1,6 +1,6 @@
 
 when making flags consistent: warning:
-    Optimization flags are incompatible with the byte-code interpreter; optimization flags ignored.
+    Ignoring optimization flags since they are experimental for the byte-code interpreter. Pass -fno-unoptimized-core-for-interpreter to enable this feature.
 
 <interactive>:10:25: error: [GHC-39999]
     • No instance for ‘Num a’ arising from a use of ‘f’


=====================================
testsuite/tests/stranal/should_compile/T23398.hs
=====================================
@@ -0,0 +1,15 @@
+{-# OPTIONS_GHC -fdicts-strict #-}
+module T23398 where
+
+type PairDict a = (Eq a, Show a)
+
+foo :: PairDict a => a -> a -> String
+foo x y | x==y      = show x
+        | otherwise = show y
+
+-- In worker/wrapper we'd like to unbox the pair
+-- but not (Eq a) and (Show a)
+
+bar :: (a ~ b, Show a) => Int -> a -> (b, String)
+bar 0 x = (x, show x)
+bar n x = bar (n-1) x


=====================================
testsuite/tests/stranal/should_compile/T23398.stderr
=====================================
@@ -0,0 +1,109 @@
+
+==================== Tidy Core ====================
+Result size of Tidy Core
+  = {terms: 76, types: 117, coercions: 4, joins: 0/0}
+
+-- RHS size: {terms: 18, types: 11, coercions: 0, joins: 0/0}
+T23398.$wfoo [InlPrag=[2]]
+  :: forall {a}. (Eq a, Show a) => a -> a -> String
+[GblId[StrictWorker([!, !])],
+ Arity=4,
+ Str=<SP(1C(1,C(1,L)),A)><SP(A,1C(1,L),A)><L><L>,
+ Unf=Unf{Src=<vanilla>, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=IF_ARGS [30 60 0 0] 120 0}]
+T23398.$wfoo
+  = \ (@a) (ww :: Eq a) (ww1 :: Show a) (eta :: a) (eta1 :: a) ->
+      case == @a ww eta eta1 of {
+        False -> show @a ww1 eta1;
+        True -> show @a ww1 eta
+      }
+
+-- RHS size: {terms: 12, types: 12, coercions: 0, joins: 0/0}
+foo [InlPrag=[2]] :: forall a. PairDict a => a -> a -> String
+[GblId,
+ Arity=3,
+ Str=<S!P(SP(SC(S,C(1,L)),A),SP(A,SC(S,L),A))><L><L>,
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=3,unsat_ok=True,boring_ok=False)
+         Tmpl= \ (@a)
+                 ($d(%,%) [Occ=Once1!] :: PairDict a)
+                 (eta [Occ=Once1] :: a)
+                 (eta1 [Occ=Once1] :: a) ->
+                 case $d(%,%) of { (ww [Occ=Once1], ww1 [Occ=Once1]) ->
+                 T23398.$wfoo @a ww ww1 eta eta1
+                 }}]
+foo
+  = \ (@a) ($d(%,%) :: PairDict a) (eta :: a) (eta1 :: a) ->
+      case $d(%,%) of { (ww, ww1) -> T23398.$wfoo @a ww ww1 eta eta1 }
+
+Rec {
+-- RHS size: {terms: 21, types: 19, coercions: 3, joins: 0/0}
+T23398.$wbar [InlPrag=[2], Occ=LoopBreaker]
+  :: forall {a} {b}.
+     (a GHC.Prim.~# b, Show a) =>
+     GHC.Prim.Int# -> a -> (# b, String #)
+[GblId[StrictWorker([~, !])],
+ Arity=4,
+ Str=<L><SP(A,SC(S,L),A)><1L><L>,
+ Unf=OtherCon []]
+T23398.$wbar
+  = \ (@a)
+      (@b)
+      (ww :: a GHC.Prim.~# b)
+      ($dShow :: Show a)
+      (ww1 :: GHC.Prim.Int#)
+      (eta :: a) ->
+      case ww1 of ds {
+        __DEFAULT ->
+          T23398.$wbar
+            @a @b @~(ww :: a GHC.Prim.~# b) $dShow (GHC.Prim.-# ds 1#) eta;
+        0# -> (# eta `cast` (Sub ww :: a ~R# b), show @a $dShow eta #)
+      }
+end Rec }
+
+-- RHS size: {terms: 21, types: 32, coercions: 1, joins: 0/0}
+bar [InlPrag=[2]]
+  :: forall a b. (a ~ b, Show a) => Int -> a -> (b, String)
+[GblId,
+ Arity=4,
+ Str=<S!P(L)><SP(A,SC(S,L),A)><1!P(1L)><L>,
+ Cpr=1,
+ Unf=Unf{Src=StableSystem, TopLvl=True,
+         Value=True, ConLike=True, WorkFree=True, Expandable=True,
+         Guidance=ALWAYS_IF(arity=4,unsat_ok=True,boring_ok=False)
+         Tmpl= \ (@a)
+                 (@b)
+                 ($d~ [Occ=Once1!] :: a ~ b)
+                 ($dShow [Occ=Once1] :: Show a)
+                 (eta [Occ=Once1!] :: Int)
+                 (eta1 [Occ=Once1] :: a) ->
+                 case $d~ of { GHC.Types.Eq# ww ->
+                 case eta of { GHC.Types.I# ww1 [Occ=Once1] ->
+                 case T23398.$wbar @a @b @~(ww :: a GHC.Prim.~# b) $dShow ww1 eta1
+                 of
+                 { (# ww2 [Occ=Once1], ww3 [Occ=Once1] #) ->
+                 (ww2, ww3)
+                 }
+                 }
+                 }}]
+bar
+  = \ (@a)
+      (@b)
+      ($d~ :: a ~ b)
+      ($dShow :: Show a)
+      (eta :: Int)
+      (eta1 :: a) ->
+      case $d~ of { GHC.Types.Eq# ww ->
+      case eta of { GHC.Types.I# ww1 ->
+      case T23398.$wbar @a @b @~(ww :: a GHC.Prim.~# b) $dShow ww1 eta1
+      of
+      { (# ww2, ww3 #) ->
+      (ww2, ww3)
+      }
+      }
+      }
+
+
+


=====================================
testsuite/tests/stranal/should_compile/all.T
=====================================
@@ -93,3 +93,4 @@ test('T22039', normal, compile, [''])
 test('T22388', [ grep_errmsg(r'^\S+\$w\S+') ], compile, ['-dsuppress-uniques -ddump-simpl'])
 # T22997: Just a panic that should not happen
 test('T22997', normal, compile, [''])
+test('T23398', normal, compile, ['-dsuppress-uniques -ddump-simpl -dno-typeable-binds'])


=====================================
testsuite/tests/th/T8333.stderr
=====================================
@@ -1,3 +1,3 @@
 
 when making flags consistent: warning:
-    Optimization flags are incompatible with the byte-code interpreter; optimization flags ignored.
+    Ignoring optimization flags since they are experimental for the byte-code interpreter. Pass -fno-unoptimized-core-for-interpreter to enable this feature.


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit e16e20d592a6f5d9ed1af17b77fafd6495242345
+Subproject commit 04e9d6048bb297de5831651e60d496217525ef62



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/72e21440778d5316380e4f40950935865bf4921c...dbf8b2b15d2d62622f30fce7ff564f03eb0d78a6

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/72e21440778d5316380e4f40950935865bf4921c...dbf8b2b15d2d62622f30fce7ff564f03eb0d78a6
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230518/e8a4d7b4/attachment-0001.html>


More information about the ghc-commits mailing list