[Git][ghc/ghc][master] Introduce a standard thunk for allocating strings

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Sat Oct 22 11:41:56 UTC 2022



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
86e6549e by Ömer Sinan Ağacan at 2022-10-22T07:41:30-04:00
Introduce a standard thunk for allocating strings

Currently for a top-level closure in the form

    hey = unpackCString# x

we generate code like this:

    Main.hey_entry() //  [R1]
             { info_tbls: [(c2T4,
                            label: Main.hey_info
                            rep: HeapRep static { Thunk }
                            srt: Nothing)]
               stack_info: arg_space: 8 updfr_space: Just 8
             }
         {offset
           c2T4: // global
               _rqm::P64 = R1;
               if ((Sp + 8) - 24 < SpLim) (likely: False) goto c2T5; else goto c2T6;
           c2T5: // global
               R1 = _rqm::P64;
               call (stg_gc_enter_1)(R1) args: 8, res: 0, upd: 8;
           c2T6: // global
               (_c2T1::I64) = call "ccall" arg hints:  [PtrHint,
                                                        PtrHint]  result hints:  [PtrHint] newCAF(BaseReg, _rqm::P64);
               if (_c2T1::I64 == 0) goto c2T3; else goto c2T2;
           c2T3: // global
               call (I64[_rqm::P64])() args: 8, res: 0, upd: 8;
           c2T2: // global
               I64[Sp - 16] = stg_bh_upd_frame_info;
               I64[Sp - 8] = _c2T1::I64;
               R2 = hey1_r2Gg_bytes;
               Sp = Sp - 16;
               call GHC.CString.unpackCString#_info(R2) args: 24, res: 0, upd: 24;
         }
     }

This code is generated for every string literal. Only difference between
top-level closures like this is the argument for the bytes of the string
(hey1_r2Gg_bytes in the code above).

With this patch we introduce a standard thunk in the RTS, called
stg_MK_STRING_info, that does what `unpackCString# x` does, except it
gets the bytes address from the payload. Using this, for the closure
above, we generate this:

    Main.hey_closure" {
        Main.hey_closure:
            const stg_MK_STRING_info;
            const 0; // padding for indirectee
            const 0; // static link
            const 0; // saved info
            const hey1_r1Gg_bytes; // the payload
    }

This is much smaller in code.

Metric Decrease:
    T10421
    T11195
    T12150
    T12425
    T16577
    T18282
    T18698a
    T18698b

Co-Authored By: Ben Gamari <ben at well-typed.com>

- - - - -


11 changed files:

- compiler/GHC/Cmm.hs
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/Cmm/Info/Build.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/StgToCmm/Bind.hs
- compiler/GHC/StgToCmm/Heap.hs
- compiler/GHC/StgToCmm/Utils.hs
- rts/Prelude.h
- rts/RtsSymbols.c
- rts/StgStdThunks.cmm
- rts/include/stg/MiscClosures.h


Changes:

=====================================
compiler/GHC/Cmm.hs
=====================================
@@ -301,6 +301,9 @@ data GenCmmStatics (rawOnly :: Bool) where
       -> CmmInfoTable
       -> CostCentreStack
       -> [CmmLit]     -- Payload
+      -> [CmmLit]     -- Non-pointers that go to the end of the closure
+                      -- This is used by stg_unpack_cstring closures.
+                      -- See Note [unpack_cstring closures] in StgStdThunks.cmm.
       -> GenCmmStatics 'False
 
     -- | Static data, after SRTs are generated
@@ -432,8 +435,8 @@ pprInfoTable platform (CmmInfoTable { cit_lbl = lbl, cit_rep = rep
 --
 
 pprStatics :: Platform -> GenCmmStatics a -> SDoc
-pprStatics platform (CmmStatics lbl itbl ccs payload) =
-  pdoc platform lbl <> colon <+> pdoc platform itbl <+> ppr ccs <+> pdoc platform payload
+pprStatics platform (CmmStatics lbl itbl ccs payload extras) =
+  pdoc platform lbl <> colon <+> pdoc platform itbl <+> ppr ccs <+> pdoc platform payload <+> ppr extras
 pprStatics platform (CmmStaticsRaw lbl ds) = vcat ((pdoc platform lbl <> colon) : map (pprStatic platform) ds)
 
 pprStatic :: Platform -> CmmStatic -> SDoc


=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -72,6 +72,8 @@ module GHC.Cmm.CLabel (
         mkCAFBlackHoleInfoTableLabel,
         mkRtsPrimOpLabel,
         mkRtsSlowFastTickyCtrLabel,
+        mkRtsUnpackCStringLabel,
+        mkRtsUnpackCStringUtf8Label,
 
         mkSelectorInfoLabel,
         mkSelectorEntryLabel,
@@ -562,6 +564,8 @@ data RtsLabelInfo
   | RtsApInfoTable       Bool{-updatable-} Int{-arity-}    -- ^ AP thunks
   | RtsApEntry           Bool{-updatable-} Int{-arity-}
 
+  | RtsUnpackCStringInfoTable
+  | RtsUnpackCStringUtf8InfoTable
   | RtsPrimOp            PrimOp
   | RtsApFast            NonDetFastString    -- ^ _fast versions of generic apply
   | RtsSlowFastTickyCtr String
@@ -734,7 +738,6 @@ mkApEntryLabel platform upd arity =
    assert (arity > 0 && arity <= pc_MAX_SPEC_AP_SIZE (platformConstants platform)) $
    RtsLabel (RtsApEntry upd arity)
 
-
 -- A call to some primitive hand written Cmm code
 mkPrimCallLabel :: PrimCall -> CLabel
 mkPrimCallLabel (PrimCall str pkg)
@@ -852,6 +855,11 @@ mkRtsApFastLabel str = RtsLabel (RtsApFast (NonDetFastString str))
 mkRtsSlowFastTickyCtrLabel :: String -> CLabel
 mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
 
+-- | A standard string unpacking thunk. See Note [unpack_cstring closures] in
+-- StgStdThunks.cmm.
+mkRtsUnpackCStringLabel, mkRtsUnpackCStringUtf8Label :: CLabel
+mkRtsUnpackCStringLabel = RtsLabel RtsUnpackCStringInfoTable
+mkRtsUnpackCStringUtf8Label = RtsLabel RtsUnpackCStringUtf8InfoTable
 
 -- Constructing Code Coverage Labels
 mkHpcTicksLabel :: Module -> CLabel
@@ -958,6 +966,9 @@ hasIdLabelInfo _ = Nothing
 hasCAF :: CLabel -> Bool
 hasCAF (IdLabel _ _ (IdTickyInfo TickyRednCounts)) = False -- See Note [ticky for LNE]
 hasCAF (IdLabel _ MayHaveCafRefs _) = True
+hasCAF (RtsLabel RtsUnpackCStringInfoTable) = True
+hasCAF (RtsLabel RtsUnpackCStringUtf8InfoTable) = True
+  -- The info table stg_MK_STRING_info is for thunks
 hasCAF _                            = False
 
 -- Note [ticky for LNE]
@@ -1195,6 +1206,9 @@ labelType (CmmLabel _ _ _ CmmRet)               = CodeLabel
 labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel
 labelType (RtsLabel (RtsApInfoTable _ _))       = DataLabel
 labelType (RtsLabel (RtsApFast _))              = CodeLabel
+labelType (RtsLabel RtsUnpackCStringInfoTable)  = CodeLabel
+labelType (RtsLabel RtsUnpackCStringUtf8InfoTable)
+                                                = CodeLabel
 labelType (RtsLabel _)                          = DataLabel
 labelType (LocalBlockLabel _)                   = CodeLabel
 labelType (SRTLabel _)                          = DataLabel
@@ -1525,6 +1539,11 @@ pprCLabel !platform !sty lbl = -- see Note [Bangs in CLabel]
    RtsLabel (RtsSlowFastTickyCtr pat)
       -> maybe_underscore $ text "SLOW_CALL_fast_" <> text pat <> text "_ctr"
 
+   RtsLabel RtsUnpackCStringInfoTable
+      -> maybe_underscore $ text "stg_unpack_cstring_info"
+   RtsLabel RtsUnpackCStringUtf8InfoTable
+      -> maybe_underscore $ text "stg_unpack_cstring_utf8_info"
+
    LargeBitmapLabel u
       -> maybe_underscore $ tempLabelPrefixOrUnderscore
                             <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm"


=====================================
compiler/GHC/Cmm/Info/Build.hs
=====================================
@@ -576,7 +576,7 @@ cafAnalData
   -> CAFSet
 cafAnalData platform st = case st of
    CmmStaticsRaw _lbl _data           -> Set.empty
-   CmmStatics _lbl _itbl _ccs payload ->
+   CmmStatics _lbl _itbl _ccs payload _extras ->
        foldl' analyzeStatic Set.empty payload
      where
        analyzeStatic s lit =
@@ -741,7 +741,9 @@ getBlockLabels = mapMaybe getBlockLabel
 getLabelledBlocks :: Platform -> CmmDecl -> [(SomeLabel, CAFfyLabel)]
 getLabelledBlocks platform decl = case decl of
    CmmData _ (CmmStaticsRaw _ _)    -> []
-   CmmData _ (CmmStatics lbl _ _ _) -> [ (DeclLabel lbl, mkCAFfyLabel platform lbl) ]
+   CmmData _ (CmmStatics lbl info _ _ _) -> [ (DeclLabel lbl, mkCAFfyLabel platform lbl)
+                                            | not (isThunkRep (cit_rep info))
+                                            ]
    CmmProc top_info _ _ _           -> [ (BlockLabel blockId, caf_lbl)
                                        | (blockId, info) <- mapToList (info_tbls top_info)
                                        , let rep = cit_rep info
@@ -786,28 +788,48 @@ depAnalSRTs platform cafEnv cafEnv_static decls =
   graph :: [SCC (SomeLabel, CAFfyLabel, Set CAFfyLabel)]
   graph = stronglyConnCompFromEdgedVerticesOrd nodes
 
--- | Get @(Label, CAFfyLabel, Set CAFfyLabel)@ for each CAF block.
--- The @Set CafLabel@ represents the set of CAFfy things which this CAF's code
+-- | Get @(Maybe Label, CAFfyLabel, Set CAFfyLabel)@ for each CAF block.
+-- The @Set CAFfyLabel@ represents the set of CAFfy things which this CAF's code
 -- depends upon.
 --
--- CAFs are treated differently from other labelled blocks:
+--  - The 'Label' represents the entry code of the closure. This may be
+--    'Nothing' if it is a standard closure type (e.g. @stg_unpack_cstring@; see
+--    Note [unpack_cstring closures] in StgStdThunks.cmm).
+--  - The 'CAFLabel' is the label of the CAF closure.
+--  - The @Set CAFLabel@ is the set of CAFfy closures which should be included
+--    in the closure's SRT.
+--
+-- Note that CAFs are treated differently from other labelled blocks:
 --
 --  - we never shortcut a reference to a CAF to the contents of its
 --    SRT, since the point of SRTs is to keep CAFs alive.
 --
 --  - CAFs therefore don't take part in the dependency analysis in depAnalSRTs.
 --    instead we generate their SRTs after everything else.
---
-getCAFs :: Platform -> CAFEnv -> [CmmDecl] -> [(Label, CAFfyLabel, Set CAFfyLabel)]
-getCAFs platform cafEnv decls =
-  [ (g_entry g, mkCAFfyLabel platform topLbl, cafs)
-  | CmmProc top_info topLbl _ g <- decls
-  , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
-  , let rep = cit_rep info
-  , isStaticRep rep && isThunkRep rep
-  , Just cafs <- [mapLookup (g_entry g) cafEnv]
-  ]
+getCAFs :: Platform -> CAFEnv -> [CmmDecl] -> [(Maybe Label, CAFfyLabel, Set CAFfyLabel)]
+getCAFs platform cafEnv = mapMaybe getCAFLabel
+  where
+    getCAFLabel :: CmmDecl -> Maybe (Maybe Label, CAFfyLabel, Set CAFfyLabel)
+
+    getCAFLabel (CmmProc top_info top_lbl _ g)
+      | Just info <- mapLookup (g_entry g) (info_tbls top_info)
+      , let rep = cit_rep info
+      , isStaticRep rep && isThunkRep rep
+      , Just cafs <- mapLookup (g_entry g) cafEnv
+      = Just (Just (g_entry g), mkCAFfyLabel platform top_lbl, cafs)
+
+      | otherwise
+      = Nothing
+
+    getCAFLabel (CmmData _ (CmmStatics top_lbl info _ccs _payload _extras))
+      | isThunkRep (cit_rep info)
+      = Just (Nothing, mkCAFfyLabel platform top_lbl, Set.empty)
+
+      | otherwise
+      = Nothing
 
+    getCAFLabel (CmmData _ (CmmStaticsRaw _lbl _payload))
+      = Nothing
 
 -- | Get the list of blocks that correspond to the entry points for
 -- @FUN_STATIC@ closures.  These are the blocks for which if we have an
@@ -882,7 +904,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do
               pprPanic "doSRTs" (text "Proc in static data list:" <+> pdoc platform decl)
             CmmData _ static ->
               case static of
-                CmmStatics lbl _ _ _ -> (lbl, set)
+                CmmStatics lbl _ _ _ _ -> (lbl, set)
                 CmmStaticsRaw lbl _ -> (lbl, set)
 
       (proc_envs, procss) = unzip procs
@@ -902,7 +924,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do
     sccs :: [SCC (SomeLabel, CAFfyLabel, Set CAFfyLabel)]
     sccs = {-# SCC depAnalSRTs #-} depAnalSRTs platform cafEnv static_data_env decls
 
-    cafsWithSRTs :: [(Label, CAFfyLabel, Set CAFfyLabel)]
+    cafsWithSRTs :: [(Maybe Label, CAFfyLabel, Set CAFfyLabel)]
     cafsWithSRTs = getCAFs platform cafEnv decls
 
   srtTraceM "doSRTs" (text "data:"            <+> pdoc platform data_ $$
@@ -925,7 +947,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do
         flip runStateT moduleSRTInfo $ do
           nonCAFs <- mapM (doSCC cfg staticFuns static_data_env) sccs
           cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
-            oneSRT cfg staticFuns [BlockLabel l] [cafLbl]
+            oneSRT cfg staticFuns (map BlockLabel (maybeToList l)) [cafLbl]
                    True{-is a CAF-} cafs static_data_env
           return (nonCAFs ++ cAFs)
 
@@ -1248,6 +1270,7 @@ buildSRT profile refs = do
         [] -- no padding
         [mkIntCLit platform 0] -- link field
         [] -- no saved info
+        [] -- no extras
   return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
 
 -- | Update info tables with references to their SRTs. Also generate
@@ -1263,10 +1286,10 @@ updInfoSRTs
 updInfoSRTs _ _ _ _ (CmmData s (CmmStaticsRaw lbl statics))
   = [CmmData s (CmmStaticsRaw lbl statics)]
 
-updInfoSRTs profile _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload))
+updInfoSRTs profile _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload extras))
   = [CmmData s (CmmStaticsRaw lbl (map CmmStaticLit field_lits))]
   where
-    field_lits = mkStaticClosureFields profile itbl ccs caffy payload
+    field_lits = mkStaticClosureFields profile itbl ccs caffy payload extras
 
 updInfoSRTs profile srt_env funSRTEnv caffy (CmmProc top_info top_l live g)
   | Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
@@ -1296,7 +1319,7 @@ updInfoSRTs profile srt_env funSRTEnv caffy (CmmProc top_info top_l live g)
             Just srtEntries -> srtTrace "maybeStaticFun" (pdoc (profilePlatform profile) res)
               (info_tbl { cit_rep = new_rep }, res)
               where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
-          fields = mkStaticClosureFields profile info_tbl ccs caffy srtEntries
+          fields = mkStaticClosureFields profile info_tbl ccs caffy srtEntries []
           new_rep = case cit_rep of
              HeapRep sta ptrs nptrs ty ->
                HeapRep sta (ptrs + length srtEntries) nptrs ty


=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -435,7 +435,7 @@ static  :: { CmmParse [CmmStatic] }
                         mkStaticClosure profile (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData)
                          -- mkForeignLabel because these are only used
                          -- for CHARLIKE and INTLIKE closures in the RTS.
-                        dontCareCCS (map getLit lits) [] [] [] } }
+                        dontCareCCS (map getLit lits) [] [] [] [] } }
         -- arrays of closures required for the CHARLIKE & INTLIKE arrays
 
 lits    :: { [CmmParse CmmExpr] }
@@ -1248,7 +1248,7 @@ profilingInfo profile desc_str ty_str
 staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
 staticClosure pkg cl_label info payload
   = do profile <- getProfile
-       let lits = mkStaticClosure profile (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
+       let lits = mkStaticClosure profile (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] []
        code $ emitDataLits (mkCmmDataLabel pkg (NeedExternDecl True) cl_label) lits
 
 foreignCall


=====================================
compiler/GHC/StgToCmm/Bind.hs
=====================================
@@ -25,6 +25,8 @@ import GHC.Stg.Syntax
 import GHC.Platform
 import GHC.Platform.Profile
 
+import GHC.Builtin.Names (unpackCStringName, unpackCStringUtf8Name)
+
 import GHC.StgToCmm.Config
 import GHC.StgToCmm.Expr
 import GHC.StgToCmm.Monad
@@ -87,6 +89,9 @@ cgTopRhsClosure platform rec id ccs upd_flag args body =
       lf_info       = mkClosureLFInfo platform id TopLevel [] upd_flag args
   in (cg_id_info, gen_code lf_info closure_label)
   where
+
+  gen_code :: LambdaFormInfo -> CLabel -> FCode ()
+
   -- special case for a indirection (f = g).  We create an IND_STATIC
   -- closure pointing directly to the indirectee.  This is exactly
   -- what the CAF will eventually evaluate to anyway, we're just
@@ -101,11 +106,44 @@ cgTopRhsClosure platform rec id ccs upd_flag args body =
   -- concurrent/should_run/4030 fails, for instance.
   --
   gen_code _ closure_label
-    | StgApp f [] <- body, null args, isNonRec rec
+    | StgApp f [] <- body
+    , null args
+    , isNonRec rec
     = do
          cg_info <- getCgIdInfo f
          emitDataCon closure_label indStaticInfoTable ccs [unLit (idInfoToAmode cg_info)]
 
+  -- Emit standard stg_unpack_cstring closures for top-level unpackCString# thunks.
+  --
+  -- Note that we do not do this for thunks enclosured in code ticks (e.g. hpc
+  -- ticks) since we want to ensure that these ticks are not lost (e.g.
+  -- resulting in Strings being reported by hpc as uncovered). However, we
+  -- don't worry about standard profiling ticks since unpackCString tends not
+  -- be terribly interesting in profiles. See Note [unpack_cstring closures] in
+  -- StgStdThunks.cmm.
+  gen_code _ closure_label
+    | null args
+    , StgApp f [arg] <- stripStgTicksTopE (not . tickishIsCode) body
+    , Just unpack <- is_string_unpack_op f
+    = do arg' <- getArgAmode (NonVoid arg)
+         case arg' of
+           CmmLit lit -> do
+             let info = CmmInfoTable
+                   { cit_lbl = unpack
+                   , cit_rep = HeapRep True 0 1 Thunk
+                   , cit_prof = NoProfilingInfo
+                   , cit_srt = Nothing
+                   , cit_clo = Nothing
+                   }
+             emitDecl $ CmmData (Section Data closure_label) $
+                 CmmStatics closure_label info ccs [] [lit]
+           _ -> panic "cgTopRhsClosure.gen_code"
+    where
+      is_string_unpack_op f
+        | idName f == unpackCStringName     = Just mkRtsUnpackCStringLabel
+        | idName f == unpackCStringUtf8Name = Just mkRtsUnpackCStringUtf8Label
+        | otherwise                         = Nothing
+
   gen_code lf_info _closure_label
    = do { profile <- getProfile
         ; let name = idName id


=====================================
compiler/GHC/StgToCmm/Heap.hs
=====================================
@@ -161,19 +161,20 @@ hpStore base vals = do
 --              Layout of static closures
 -----------------------------------------------------------
 
--- Make a static closure, adding on any extra padding needed for CAFs,
--- and adding a static link field if necessary.
-
+-- | Make a static closure, adding on any extra padding needed for CAFs, and
+-- adding a static link field if necessary.
 mkStaticClosureFields
         :: Profile
         -> CmmInfoTable
         -> CostCentreStack
         -> CafInfo
-        -> [CmmLit]             -- Payload
+        -> [CmmLit]             -- ^ Payload
+        -> [CmmLit]             -- ^ Extra non-pointers that go to the end of the closure.
+                                -- See Note [unpack_cstring closures] in StgStdThunks.cmm.
         -> [CmmLit]             -- The full closure
-mkStaticClosureFields profile info_tbl ccs caf_refs payload
+mkStaticClosureFields profile info_tbl ccs caf_refs payload extras
   = mkStaticClosure profile info_lbl ccs payload padding
-        static_link_field saved_info_field
+        static_link_field saved_info_field extras
   where
     platform = profilePlatform profile
     info_lbl = cit_lbl info_tbl
@@ -218,14 +219,15 @@ mkStaticClosureFields profile info_tbl ccs caf_refs payload
                                       -- in rts/sm/Storage.h
 
 mkStaticClosure :: Profile -> CLabel -> CostCentreStack -> [CmmLit]
-  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
-mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field
+  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
+mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field extras
   =  [CmmLabel info_lbl]
   ++ staticProfHdr profile ccs
   ++ payload
   ++ padding
   ++ static_link_field
   ++ saved_info_field
+  ++ extras
 
 -----------------------------------------------------------
 --              Heap overflow checking


=====================================
compiler/GHC/StgToCmm/Utils.hs
=====================================
@@ -266,7 +266,7 @@ emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits)
 
 emitDataCon :: CLabel -> CmmInfoTable -> CostCentreStack -> [CmmLit] -> FCode ()
 emitDataCon lbl itbl ccs payload =
-  emitDecl (CmmData (Section Data lbl) (CmmStatics lbl itbl ccs payload))
+  emitDecl (CmmData (Section Data lbl) (CmmStatics lbl itbl ccs payload []))
 
 -------------------------------------------------------------------------
 --


=====================================
rts/Prelude.h
=====================================
@@ -33,6 +33,7 @@ PRELUDE_CLOSURE(ghczmprim_GHCziTupleziPrim_Z0T_closure);
 PRELUDE_CLOSURE(ghczmprim_GHCziTypes_True_closure);
 PRELUDE_CLOSURE(ghczmprim_GHCziTypes_False_closure);
 PRELUDE_CLOSURE(base_GHCziPack_unpackCString_closure);
+PRELUDE_CLOSURE(base_GHCziPack_unpackCStringUtf8_closure);
 PRELUDE_CLOSURE(base_GHCziWeak_runFinalizzerBatch_closure);
 PRELUDE_CLOSURE(base_GHCziWeakziFinalizze_runFinalizzerBatch_closure);
 
@@ -70,6 +71,7 @@ PRELUDE_CLOSURE(base_GHCziEventziWindows_processRemoteCompletion_closure);
 PRELUDE_CLOSURE(base_GHCziTopHandler_flushStdHandles_closure);
 PRELUDE_CLOSURE(base_GHCziTopHandler_runMainIO_closure);
 
+PRELUDE_INFO(ghczmprim_GHCziCString_unpackCStringzh_info);
 PRELUDE_INFO(ghczmprim_GHCziTypes_Czh_con_info);
 PRELUDE_INFO(ghczmprim_GHCziTypes_Izh_con_info);
 PRELUDE_INFO(ghczmprim_GHCziTypes_Fzh_con_info);


=====================================
rts/RtsSymbols.c
=====================================
@@ -9,6 +9,7 @@
 #include "ghcplatform.h"
 #include "Rts.h"
 #include "RtsSymbols.h"
+
 #include "TopHandler.h"
 #include "HsFFI.h"
 #include "CloneStack.h"
@@ -713,7 +714,7 @@ extern char **environ;
       SymI_HasProto(defaultRtsConfig)                                   \
       SymI_HasProto(initLinker)                                         \
       SymI_HasProto(initLinker_)                                        \
-      SymI_HasDataProto(stg_unpackClosurezh)                                \
+      SymI_HasDataProto(stg_unpackClosurezh)                            \
       SymI_HasDataProto(stg_closureSizzezh)                                 \
       SymI_HasDataProto(stg_whereFromzh)                                 \
       SymI_HasDataProto(stg_getApStackValzh)                                \
@@ -976,6 +977,8 @@ extern char **environ;
       SymI_HasDataProto(stg_sel_13_noupd_info)                              \
       SymI_HasDataProto(stg_sel_14_noupd_info)                              \
       SymI_HasDataProto(stg_sel_15_noupd_info)                              \
+      SymI_HasDataProto(stg_unpack_cstring_info)                            \
+      SymI_HasDataProto(stg_unpack_cstring_utf8_info)                       \
       SymI_HasDataProto(stg_upd_frame_info)                                 \
       SymI_HasDataProto(stg_bh_upd_frame_info)                              \
       SymI_HasProto(suspendThread)                                      \


=====================================
rts/StgStdThunks.cmm
=====================================
@@ -13,6 +13,9 @@
 #include "Cmm.h"
 #include "Updates.h"
 
+import ghczmprim_GHCziCString_unpackCStringzh_info;
+import ghczmprim_GHCziCString_unpackCStringUtf8zh_info;
+
 /* -----------------------------------------------------------------------------
    The code for a thunk that simply extracts a field from a
    single-constructor datatype depends only on the offset of the field
@@ -286,3 +289,100 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
            StgThunk_payload(node,6));
     }
 }
+
+/* -----------------------------------------------------------------------------
+   Making strings
+   -------------------------------------------------------------------------- */
+
+/*
+ * Note [unpack_cstring closures]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Strings are extremely common. In Core they will typically manifest as the
+ * a pair of top-level bindings:
+ *
+ *     s :: String
+ *     s = unpackCString# s#
+ *
+ *     s# :: Addr#
+ *     s# = "hello world"#
+ *
+ * It turns out that `s` is a non-trivial amount of code which is duplicated
+ * for every `String` literal. To avoid this duplicate, we have a standard
+ * string-unpacking closure, unpack_cstring. Note that currently we only do
+ * this for ASCII strings; strings mentioning non-ASCII characters are
+ * represented by CAF applications of unpackCStringUtf8# as before.
+ *
+ * unpack_cstring closures are similar to standard THUNK_STATIC closures but
+ * with a non-GC pointer to a C-string at the end (the "extra" pointer).
+ * We must place this extra pointer at the end of the closure to ensure that
+ * it has a similar layout to a normal THUNK_STATIC closure, which has no space
+ * for free variables (since these would be contained in the thunk's code and SRT).
+ *
+ * When it is evaluated, an stg_unpack_cstring closure is updated to be an
+ * indirection to the resulting [Char], just as a normal unpackCString# thunk
+ * would be.
+ *
+ * Closure layout:
+ *
+ * ┌───────────────────┐       ┌──► ┌──────────────────────────┐
+ * │ stg_unpack_cstring│       │    │ "hello world ..."        │
+ * ├───────────────────┤       │    └──────────────────────────┘
+ * │ indirectee        │       │
+ * ├───────────────────┤       │
+ * │ static_link       │       │
+ * ├───────────────────┤       │
+ * │ saved_info        │       │
+ * ├───────────────────┤       │
+ * │ the_string       ─┼───────┘
+ * └───────────────────┘
+ *
+ */
+
+stg_do_unpack_cstring(P_ node, P_ newCAF_ret) {
+    STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring, node, newCAF_ret);
+    W_ str;
+    str = StgThunk_payload(node, 2);
+    push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+        jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringzh_info)(node, str);
+    }
+}
+
+INFO_TABLE(stg_unpack_cstring, 0, 0, THUNK_STATIC, "stg_unpack_cstring", "stg_unpack_cstring")
+    (P_ node)
+{
+    W_ newCAF_ret;
+    (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+    if (newCAF_ret == 0) {
+        // We raced with another thread to evaluate the CAF and they won;
+        // `node` should now be an indirection.
+        ENTER(node);
+    } else {
+        jump stg_do_unpack_cstring(node, newCAF_ret);
+    }
+}
+
+stg_do_unpack_cstring_utf8(P_ node, P_ newCAF_ret) {
+    STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring_utf8, node, newCAF_ret);
+    W_ str;
+    str = StgThunk_payload(node, 2);
+    push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+        jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringUtf8zh_info)(node, str);
+    }
+}
+
+INFO_TABLE(stg_unpack_cstring_utf8, 0, 0, THUNK_STATIC, "stg_unpack_cstring_utf8", "stg_unpack_cstring_utf8")
+    (P_ node)
+{
+    W_ newCAF_ret;
+    (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+    if (newCAF_ret == 0) {
+        // We raced with another thread to evaluate the CAF and they won;
+        // `node` should now be an indirection.
+        ENTER(node);
+    } else {
+        jump stg_do_unpack_cstring_utf8(node, newCAF_ret);
+    }
+}
+


=====================================
rts/include/stg/MiscClosures.h
=====================================
@@ -318,6 +318,10 @@ RTS_THUNK(stg_ap_5_upd);
 RTS_THUNK(stg_ap_6_upd);
 RTS_THUNK(stg_ap_7_upd);
 
+// Standard entry for `unpackCString# str` thunks
+RTS_ENTRY(stg_unpack_cstring);
+RTS_ENTRY(stg_unpack_cstring_utf8);
+
 /* standard application routines (see also utils/genapply,
  * and GHC.StgToCmm.ArgRep).
  */



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/86e6549ea3090d0d79c2aaed8373ba5696f2b6a9

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/86e6549ea3090d0d79c2aaed8373ba5696f2b6a9
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221022/c469a5a0/attachment-0001.html>


More information about the ghc-commits mailing list