[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 2 commits: Introduce a standard thunk for allocating strings
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Sat Oct 22 08:41:45 UTC 2022
Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC
Commits:
9fe02bdb by Ömer Sinan Ağacan at 2022-10-22T04:41:18-04:00
Introduce a standard thunk for allocating strings
Currently for a top-level closure in the form
hey = unpackCString# x
we generate code like this:
Main.hey_entry() // [R1]
{ info_tbls: [(c2T4,
label: Main.hey_info
rep: HeapRep static { Thunk }
srt: Nothing)]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c2T4: // global
_rqm::P64 = R1;
if ((Sp + 8) - 24 < SpLim) (likely: False) goto c2T5; else goto c2T6;
c2T5: // global
R1 = _rqm::P64;
call (stg_gc_enter_1)(R1) args: 8, res: 0, upd: 8;
c2T6: // global
(_c2T1::I64) = call "ccall" arg hints: [PtrHint,
PtrHint] result hints: [PtrHint] newCAF(BaseReg, _rqm::P64);
if (_c2T1::I64 == 0) goto c2T3; else goto c2T2;
c2T3: // global
call (I64[_rqm::P64])() args: 8, res: 0, upd: 8;
c2T2: // global
I64[Sp - 16] = stg_bh_upd_frame_info;
I64[Sp - 8] = _c2T1::I64;
R2 = hey1_r2Gg_bytes;
Sp = Sp - 16;
call GHC.CString.unpackCString#_info(R2) args: 24, res: 0, upd: 24;
}
}
This code is generated for every string literal. Only difference between
top-level closures like this is the argument for the bytes of the string
(hey1_r2Gg_bytes in the code above).
With this patch we introduce a standard thunk in the RTS, called
stg_MK_STRING_info, that does what `unpackCString# x` does, except it
gets the bytes address from the payload. Using this, for the closure
above, we generate this:
Main.hey_closure" {
Main.hey_closure:
const stg_MK_STRING_info;
const 0; // padding for indirectee
const 0; // static link
const 0; // saved info
const hey1_r1Gg_bytes; // the payload
}
This is much smaller in code.
Metric Decrease:
T10421
T11195
T12150
T12425
T16577
T18282
T18698a
T18698b
Co-Authored By: Ben Gamari <ben at well-typed.com>
- - - - -
4384f012 by Andreas Klebinger at 2022-10-22T04:41:25-04:00
hadrian: Improve error for wrong key/value errors.
- - - - -
12 changed files:
- compiler/GHC/Cmm.hs
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/Cmm/Info/Build.hs
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/StgToCmm/Bind.hs
- compiler/GHC/StgToCmm/Heap.hs
- compiler/GHC/StgToCmm/Utils.hs
- hadrian/src/Settings.hs
- rts/Prelude.h
- rts/RtsSymbols.c
- rts/StgStdThunks.cmm
- rts/include/stg/MiscClosures.h
Changes:
=====================================
compiler/GHC/Cmm.hs
=====================================
@@ -301,6 +301,9 @@ data GenCmmStatics (rawOnly :: Bool) where
-> CmmInfoTable
-> CostCentreStack
-> [CmmLit] -- Payload
+ -> [CmmLit] -- Non-pointers that go to the end of the closure
+ -- This is used by stg_unpack_cstring closures.
+ -- See Note [unpack_cstring closures] in StgStdThunks.cmm.
-> GenCmmStatics 'False
-- | Static data, after SRTs are generated
@@ -432,8 +435,8 @@ pprInfoTable platform (CmmInfoTable { cit_lbl = lbl, cit_rep = rep
--
pprStatics :: Platform -> GenCmmStatics a -> SDoc
-pprStatics platform (CmmStatics lbl itbl ccs payload) =
- pdoc platform lbl <> colon <+> pdoc platform itbl <+> ppr ccs <+> pdoc platform payload
+pprStatics platform (CmmStatics lbl itbl ccs payload extras) =
+ pdoc platform lbl <> colon <+> pdoc platform itbl <+> ppr ccs <+> pdoc platform payload <+> ppr extras
pprStatics platform (CmmStaticsRaw lbl ds) = vcat ((pdoc platform lbl <> colon) : map (pprStatic platform) ds)
pprStatic :: Platform -> CmmStatic -> SDoc
=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -72,6 +72,8 @@ module GHC.Cmm.CLabel (
mkCAFBlackHoleInfoTableLabel,
mkRtsPrimOpLabel,
mkRtsSlowFastTickyCtrLabel,
+ mkRtsUnpackCStringLabel,
+ mkRtsUnpackCStringUtf8Label,
mkSelectorInfoLabel,
mkSelectorEntryLabel,
@@ -562,6 +564,8 @@ data RtsLabelInfo
| RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks
| RtsApEntry Bool{-updatable-} Int{-arity-}
+ | RtsUnpackCStringInfoTable
+ | RtsUnpackCStringUtf8InfoTable
| RtsPrimOp PrimOp
| RtsApFast NonDetFastString -- ^ _fast versions of generic apply
| RtsSlowFastTickyCtr String
@@ -734,7 +738,6 @@ mkApEntryLabel platform upd arity =
assert (arity > 0 && arity <= pc_MAX_SPEC_AP_SIZE (platformConstants platform)) $
RtsLabel (RtsApEntry upd arity)
-
-- A call to some primitive hand written Cmm code
mkPrimCallLabel :: PrimCall -> CLabel
mkPrimCallLabel (PrimCall str pkg)
@@ -852,6 +855,11 @@ mkRtsApFastLabel str = RtsLabel (RtsApFast (NonDetFastString str))
mkRtsSlowFastTickyCtrLabel :: String -> CLabel
mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
+-- | A standard string unpacking thunk. See Note [unpack_cstring closures] in
+-- StgStdThunks.cmm.
+mkRtsUnpackCStringLabel, mkRtsUnpackCStringUtf8Label :: CLabel
+mkRtsUnpackCStringLabel = RtsLabel RtsUnpackCStringInfoTable
+mkRtsUnpackCStringUtf8Label = RtsLabel RtsUnpackCStringUtf8InfoTable
-- Constructing Code Coverage Labels
mkHpcTicksLabel :: Module -> CLabel
@@ -958,6 +966,9 @@ hasIdLabelInfo _ = Nothing
hasCAF :: CLabel -> Bool
hasCAF (IdLabel _ _ (IdTickyInfo TickyRednCounts)) = False -- See Note [ticky for LNE]
hasCAF (IdLabel _ MayHaveCafRefs _) = True
+hasCAF (RtsLabel RtsUnpackCStringInfoTable) = True
+hasCAF (RtsLabel RtsUnpackCStringUtf8InfoTable) = True
+ -- The info table stg_MK_STRING_info is for thunks
hasCAF _ = False
-- Note [ticky for LNE]
@@ -1195,6 +1206,9 @@ labelType (CmmLabel _ _ _ CmmRet) = CodeLabel
labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel
labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel
labelType (RtsLabel (RtsApFast _)) = CodeLabel
+labelType (RtsLabel RtsUnpackCStringInfoTable) = CodeLabel
+labelType (RtsLabel RtsUnpackCStringUtf8InfoTable)
+ = CodeLabel
labelType (RtsLabel _) = DataLabel
labelType (LocalBlockLabel _) = CodeLabel
labelType (SRTLabel _) = DataLabel
@@ -1525,6 +1539,11 @@ pprCLabel !platform !sty lbl = -- see Note [Bangs in CLabel]
RtsLabel (RtsSlowFastTickyCtr pat)
-> maybe_underscore $ text "SLOW_CALL_fast_" <> text pat <> text "_ctr"
+ RtsLabel RtsUnpackCStringInfoTable
+ -> maybe_underscore $ text "stg_unpack_cstring_info"
+ RtsLabel RtsUnpackCStringUtf8InfoTable
+ -> maybe_underscore $ text "stg_unpack_cstring_utf8_info"
+
LargeBitmapLabel u
-> maybe_underscore $ tempLabelPrefixOrUnderscore
<> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm"
=====================================
compiler/GHC/Cmm/Info/Build.hs
=====================================
@@ -576,7 +576,7 @@ cafAnalData
-> CAFSet
cafAnalData platform st = case st of
CmmStaticsRaw _lbl _data -> Set.empty
- CmmStatics _lbl _itbl _ccs payload ->
+ CmmStatics _lbl _itbl _ccs payload _extras ->
foldl' analyzeStatic Set.empty payload
where
analyzeStatic s lit =
@@ -741,7 +741,9 @@ getBlockLabels = mapMaybe getBlockLabel
getLabelledBlocks :: Platform -> CmmDecl -> [(SomeLabel, CAFfyLabel)]
getLabelledBlocks platform decl = case decl of
CmmData _ (CmmStaticsRaw _ _) -> []
- CmmData _ (CmmStatics lbl _ _ _) -> [ (DeclLabel lbl, mkCAFfyLabel platform lbl) ]
+ CmmData _ (CmmStatics lbl info _ _ _) -> [ (DeclLabel lbl, mkCAFfyLabel platform lbl)
+ | not (isThunkRep (cit_rep info))
+ ]
CmmProc top_info _ _ _ -> [ (BlockLabel blockId, caf_lbl)
| (blockId, info) <- mapToList (info_tbls top_info)
, let rep = cit_rep info
@@ -786,28 +788,48 @@ depAnalSRTs platform cafEnv cafEnv_static decls =
graph :: [SCC (SomeLabel, CAFfyLabel, Set CAFfyLabel)]
graph = stronglyConnCompFromEdgedVerticesOrd nodes
--- | Get @(Label, CAFfyLabel, Set CAFfyLabel)@ for each CAF block.
--- The @Set CafLabel@ represents the set of CAFfy things which this CAF's code
+-- | Get @(Maybe Label, CAFfyLabel, Set CAFfyLabel)@ for each CAF block.
+-- The @Set CAFfyLabel@ represents the set of CAFfy things which this CAF's code
-- depends upon.
--
--- CAFs are treated differently from other labelled blocks:
+-- - The 'Label' represents the entry code of the closure. This may be
+-- 'Nothing' if it is a standard closure type (e.g. @stg_unpack_cstring@; see
+-- Note [unpack_cstring closures] in StgStdThunks.cmm).
+-- - The 'CAFLabel' is the label of the CAF closure.
+-- - The @Set CAFLabel@ is the set of CAFfy closures which should be included
+-- in the closure's SRT.
+--
+-- Note that CAFs are treated differently from other labelled blocks:
--
-- - we never shortcut a reference to a CAF to the contents of its
-- SRT, since the point of SRTs is to keep CAFs alive.
--
-- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs.
-- instead we generate their SRTs after everything else.
---
-getCAFs :: Platform -> CAFEnv -> [CmmDecl] -> [(Label, CAFfyLabel, Set CAFfyLabel)]
-getCAFs platform cafEnv decls =
- [ (g_entry g, mkCAFfyLabel platform topLbl, cafs)
- | CmmProc top_info topLbl _ g <- decls
- , Just info <- [mapLookup (g_entry g) (info_tbls top_info)]
- , let rep = cit_rep info
- , isStaticRep rep && isThunkRep rep
- , Just cafs <- [mapLookup (g_entry g) cafEnv]
- ]
+getCAFs :: Platform -> CAFEnv -> [CmmDecl] -> [(Maybe Label, CAFfyLabel, Set CAFfyLabel)]
+getCAFs platform cafEnv = mapMaybe getCAFLabel
+ where
+ getCAFLabel :: CmmDecl -> Maybe (Maybe Label, CAFfyLabel, Set CAFfyLabel)
+
+ getCAFLabel (CmmProc top_info top_lbl _ g)
+ | Just info <- mapLookup (g_entry g) (info_tbls top_info)
+ , let rep = cit_rep info
+ , isStaticRep rep && isThunkRep rep
+ , Just cafs <- mapLookup (g_entry g) cafEnv
+ = Just (Just (g_entry g), mkCAFfyLabel platform top_lbl, cafs)
+
+ | otherwise
+ = Nothing
+
+ getCAFLabel (CmmData _ (CmmStatics top_lbl info _ccs _payload _extras))
+ | isThunkRep (cit_rep info)
+ = Just (Nothing, mkCAFfyLabel platform top_lbl, Set.empty)
+
+ | otherwise
+ = Nothing
+ getCAFLabel (CmmData _ (CmmStaticsRaw _lbl _payload))
+ = Nothing
-- | Get the list of blocks that correspond to the entry points for
-- @FUN_STATIC@ closures. These are the blocks for which if we have an
@@ -882,7 +904,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do
pprPanic "doSRTs" (text "Proc in static data list:" <+> pdoc platform decl)
CmmData _ static ->
case static of
- CmmStatics lbl _ _ _ -> (lbl, set)
+ CmmStatics lbl _ _ _ _ -> (lbl, set)
CmmStaticsRaw lbl _ -> (lbl, set)
(proc_envs, procss) = unzip procs
@@ -902,7 +924,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do
sccs :: [SCC (SomeLabel, CAFfyLabel, Set CAFfyLabel)]
sccs = {-# SCC depAnalSRTs #-} depAnalSRTs platform cafEnv static_data_env decls
- cafsWithSRTs :: [(Label, CAFfyLabel, Set CAFfyLabel)]
+ cafsWithSRTs :: [(Maybe Label, CAFfyLabel, Set CAFfyLabel)]
cafsWithSRTs = getCAFs platform cafEnv decls
srtTraceM "doSRTs" (text "data:" <+> pdoc platform data_ $$
@@ -925,7 +947,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do
flip runStateT moduleSRTInfo $ do
nonCAFs <- mapM (doSCC cfg staticFuns static_data_env) sccs
cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
- oneSRT cfg staticFuns [BlockLabel l] [cafLbl]
+ oneSRT cfg staticFuns (map BlockLabel (maybeToList l)) [cafLbl]
True{-is a CAF-} cafs static_data_env
return (nonCAFs ++ cAFs)
@@ -1248,6 +1270,7 @@ buildSRT profile refs = do
[] -- no padding
[mkIntCLit platform 0] -- link field
[] -- no saved info
+ [] -- no extras
return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
-- | Update info tables with references to their SRTs. Also generate
@@ -1263,10 +1286,10 @@ updInfoSRTs
updInfoSRTs _ _ _ _ (CmmData s (CmmStaticsRaw lbl statics))
= [CmmData s (CmmStaticsRaw lbl statics)]
-updInfoSRTs profile _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload))
+updInfoSRTs profile _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload extras))
= [CmmData s (CmmStaticsRaw lbl (map CmmStaticLit field_lits))]
where
- field_lits = mkStaticClosureFields profile itbl ccs caffy payload
+ field_lits = mkStaticClosureFields profile itbl ccs caffy payload extras
updInfoSRTs profile srt_env funSRTEnv caffy (CmmProc top_info top_l live g)
| Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
@@ -1296,7 +1319,7 @@ updInfoSRTs profile srt_env funSRTEnv caffy (CmmProc top_info top_l live g)
Just srtEntries -> srtTrace "maybeStaticFun" (pdoc (profilePlatform profile) res)
(info_tbl { cit_rep = new_rep }, res)
where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
- fields = mkStaticClosureFields profile info_tbl ccs caffy srtEntries
+ fields = mkStaticClosureFields profile info_tbl ccs caffy srtEntries []
new_rep = case cit_rep of
HeapRep sta ptrs nptrs ty ->
HeapRep sta (ptrs + length srtEntries) nptrs ty
=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -435,7 +435,7 @@ static :: { CmmParse [CmmStatic] }
mkStaticClosure profile (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData)
-- mkForeignLabel because these are only used
-- for CHARLIKE and INTLIKE closures in the RTS.
- dontCareCCS (map getLit lits) [] [] [] } }
+ dontCareCCS (map getLit lits) [] [] [] [] } }
-- arrays of closures required for the CHARLIKE & INTLIKE arrays
lits :: { [CmmParse CmmExpr] }
@@ -1248,7 +1248,7 @@ profilingInfo profile desc_str ty_str
staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
staticClosure pkg cl_label info payload
= do profile <- getProfile
- let lits = mkStaticClosure profile (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
+ let lits = mkStaticClosure profile (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] []
code $ emitDataLits (mkCmmDataLabel pkg (NeedExternDecl True) cl_label) lits
foreignCall
=====================================
compiler/GHC/StgToCmm/Bind.hs
=====================================
@@ -25,6 +25,8 @@ import GHC.Stg.Syntax
import GHC.Platform
import GHC.Platform.Profile
+import GHC.Builtin.Names (unpackCStringName, unpackCStringUtf8Name)
+
import GHC.StgToCmm.Config
import GHC.StgToCmm.Expr
import GHC.StgToCmm.Monad
@@ -87,6 +89,9 @@ cgTopRhsClosure platform rec id ccs upd_flag args body =
lf_info = mkClosureLFInfo platform id TopLevel [] upd_flag args
in (cg_id_info, gen_code lf_info closure_label)
where
+
+ gen_code :: LambdaFormInfo -> CLabel -> FCode ()
+
-- special case for a indirection (f = g). We create an IND_STATIC
-- closure pointing directly to the indirectee. This is exactly
-- what the CAF will eventually evaluate to anyway, we're just
@@ -101,11 +106,44 @@ cgTopRhsClosure platform rec id ccs upd_flag args body =
-- concurrent/should_run/4030 fails, for instance.
--
gen_code _ closure_label
- | StgApp f [] <- body, null args, isNonRec rec
+ | StgApp f [] <- body
+ , null args
+ , isNonRec rec
= do
cg_info <- getCgIdInfo f
emitDataCon closure_label indStaticInfoTable ccs [unLit (idInfoToAmode cg_info)]
+ -- Emit standard stg_unpack_cstring closures for top-level unpackCString# thunks.
+ --
+ -- Note that we do not do this for thunks enclosured in code ticks (e.g. hpc
+ -- ticks) since we want to ensure that these ticks are not lost (e.g.
+ -- resulting in Strings being reported by hpc as uncovered). However, we
+ -- don't worry about standard profiling ticks since unpackCString tends not
+ -- be terribly interesting in profiles. See Note [unpack_cstring closures] in
+ -- StgStdThunks.cmm.
+ gen_code _ closure_label
+ | null args
+ , StgApp f [arg] <- stripStgTicksTopE (not . tickishIsCode) body
+ , Just unpack <- is_string_unpack_op f
+ = do arg' <- getArgAmode (NonVoid arg)
+ case arg' of
+ CmmLit lit -> do
+ let info = CmmInfoTable
+ { cit_lbl = unpack
+ , cit_rep = HeapRep True 0 1 Thunk
+ , cit_prof = NoProfilingInfo
+ , cit_srt = Nothing
+ , cit_clo = Nothing
+ }
+ emitDecl $ CmmData (Section Data closure_label) $
+ CmmStatics closure_label info ccs [] [lit]
+ _ -> panic "cgTopRhsClosure.gen_code"
+ where
+ is_string_unpack_op f
+ | idName f == unpackCStringName = Just mkRtsUnpackCStringLabel
+ | idName f == unpackCStringUtf8Name = Just mkRtsUnpackCStringUtf8Label
+ | otherwise = Nothing
+
gen_code lf_info _closure_label
= do { profile <- getProfile
; let name = idName id
=====================================
compiler/GHC/StgToCmm/Heap.hs
=====================================
@@ -161,19 +161,20 @@ hpStore base vals = do
-- Layout of static closures
-----------------------------------------------------------
--- Make a static closure, adding on any extra padding needed for CAFs,
--- and adding a static link field if necessary.
-
+-- | Make a static closure, adding on any extra padding needed for CAFs, and
+-- adding a static link field if necessary.
mkStaticClosureFields
:: Profile
-> CmmInfoTable
-> CostCentreStack
-> CafInfo
- -> [CmmLit] -- Payload
+ -> [CmmLit] -- ^ Payload
+ -> [CmmLit] -- ^ Extra non-pointers that go to the end of the closure.
+ -- See Note [unpack_cstring closures] in StgStdThunks.cmm.
-> [CmmLit] -- The full closure
-mkStaticClosureFields profile info_tbl ccs caf_refs payload
+mkStaticClosureFields profile info_tbl ccs caf_refs payload extras
= mkStaticClosure profile info_lbl ccs payload padding
- static_link_field saved_info_field
+ static_link_field saved_info_field extras
where
platform = profilePlatform profile
info_lbl = cit_lbl info_tbl
@@ -218,14 +219,15 @@ mkStaticClosureFields profile info_tbl ccs caf_refs payload
-- in rts/sm/Storage.h
mkStaticClosure :: Profile -> CLabel -> CostCentreStack -> [CmmLit]
- -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
-mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field
+ -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
+mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field extras
= [CmmLabel info_lbl]
++ staticProfHdr profile ccs
++ payload
++ padding
++ static_link_field
++ saved_info_field
+ ++ extras
-----------------------------------------------------------
-- Heap overflow checking
=====================================
compiler/GHC/StgToCmm/Utils.hs
=====================================
@@ -266,7 +266,7 @@ emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits)
emitDataCon :: CLabel -> CmmInfoTable -> CostCentreStack -> [CmmLit] -> FCode ()
emitDataCon lbl itbl ccs payload =
- emitDecl (CmmData (Section Data lbl) (CmmStatics lbl itbl ccs payload))
+ emitDecl (CmmData (Section Data lbl) (CmmStatics lbl itbl ccs payload []))
-------------------------------------------------------------------------
--
=====================================
hadrian/src/Settings.hs
=====================================
@@ -74,7 +74,8 @@ flavour = do
(settingErrs, tweak) = applySettings kvs
when (not $ null settingErrs) $ fail
- $ "failed to apply key-value settings:" ++ unlines (map (" - " ++) settingErrs)
+ $ "failed to apply key-value settings:\n\t" ++ unlines (map (" - " ++) settingErrs) ++
+ "\t Entries should look something like \"stage1.containers.ghc.hs.opts += -Werror\""
case parseFlavour flavours flavourTransformers flavourName of
Left err -> fail err
=====================================
rts/Prelude.h
=====================================
@@ -33,6 +33,7 @@ PRELUDE_CLOSURE(ghczmprim_GHCziTupleziPrim_Z0T_closure);
PRELUDE_CLOSURE(ghczmprim_GHCziTypes_True_closure);
PRELUDE_CLOSURE(ghczmprim_GHCziTypes_False_closure);
PRELUDE_CLOSURE(base_GHCziPack_unpackCString_closure);
+PRELUDE_CLOSURE(base_GHCziPack_unpackCStringUtf8_closure);
PRELUDE_CLOSURE(base_GHCziWeak_runFinalizzerBatch_closure);
PRELUDE_CLOSURE(base_GHCziWeakziFinalizze_runFinalizzerBatch_closure);
@@ -70,6 +71,7 @@ PRELUDE_CLOSURE(base_GHCziEventziWindows_processRemoteCompletion_closure);
PRELUDE_CLOSURE(base_GHCziTopHandler_flushStdHandles_closure);
PRELUDE_CLOSURE(base_GHCziTopHandler_runMainIO_closure);
+PRELUDE_INFO(ghczmprim_GHCziCString_unpackCStringzh_info);
PRELUDE_INFO(ghczmprim_GHCziTypes_Czh_con_info);
PRELUDE_INFO(ghczmprim_GHCziTypes_Izh_con_info);
PRELUDE_INFO(ghczmprim_GHCziTypes_Fzh_con_info);
=====================================
rts/RtsSymbols.c
=====================================
@@ -9,6 +9,7 @@
#include "ghcplatform.h"
#include "Rts.h"
#include "RtsSymbols.h"
+
#include "TopHandler.h"
#include "HsFFI.h"
#include "CloneStack.h"
@@ -713,7 +714,7 @@ extern char **environ;
SymI_HasProto(defaultRtsConfig) \
SymI_HasProto(initLinker) \
SymI_HasProto(initLinker_) \
- SymI_HasDataProto(stg_unpackClosurezh) \
+ SymI_HasDataProto(stg_unpackClosurezh) \
SymI_HasDataProto(stg_closureSizzezh) \
SymI_HasDataProto(stg_whereFromzh) \
SymI_HasDataProto(stg_getApStackValzh) \
@@ -976,6 +977,8 @@ extern char **environ;
SymI_HasDataProto(stg_sel_13_noupd_info) \
SymI_HasDataProto(stg_sel_14_noupd_info) \
SymI_HasDataProto(stg_sel_15_noupd_info) \
+ SymI_HasDataProto(stg_unpack_cstring_info) \
+ SymI_HasDataProto(stg_unpack_cstring_utf8_info) \
SymI_HasDataProto(stg_upd_frame_info) \
SymI_HasDataProto(stg_bh_upd_frame_info) \
SymI_HasProto(suspendThread) \
=====================================
rts/StgStdThunks.cmm
=====================================
@@ -13,6 +13,9 @@
#include "Cmm.h"
#include "Updates.h"
+import ghczmprim_GHCziCString_unpackCStringzh_info;
+import ghczmprim_GHCziCString_unpackCStringUtf8zh_info;
+
/* -----------------------------------------------------------------------------
The code for a thunk that simply extracts a field from a
single-constructor datatype depends only on the offset of the field
@@ -286,3 +289,100 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
StgThunk_payload(node,6));
}
}
+
+/* -----------------------------------------------------------------------------
+ Making strings
+ -------------------------------------------------------------------------- */
+
+/*
+ * Note [unpack_cstring closures]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Strings are extremely common. In Core they will typically manifest as the
+ * a pair of top-level bindings:
+ *
+ * s :: String
+ * s = unpackCString# s#
+ *
+ * s# :: Addr#
+ * s# = "hello world"#
+ *
+ * It turns out that `s` is a non-trivial amount of code which is duplicated
+ * for every `String` literal. To avoid this duplicate, we have a standard
+ * string-unpacking closure, unpack_cstring. Note that currently we only do
+ * this for ASCII strings; strings mentioning non-ASCII characters are
+ * represented by CAF applications of unpackCStringUtf8# as before.
+ *
+ * unpack_cstring closures are similar to standard THUNK_STATIC closures but
+ * with a non-GC pointer to a C-string at the end (the "extra" pointer).
+ * We must place this extra pointer at the end of the closure to ensure that
+ * it has a similar layout to a normal THUNK_STATIC closure, which has no space
+ * for free variables (since these would be contained in the thunk's code and SRT).
+ *
+ * When it is evaluated, an stg_unpack_cstring closure is updated to be an
+ * indirection to the resulting [Char], just as a normal unpackCString# thunk
+ * would be.
+ *
+ * Closure layout:
+ *
+ * ┌───────────────────┐ ┌──► ┌──────────────────────────┐
+ * │ stg_unpack_cstring│ │ │ "hello world ..." │
+ * ├───────────────────┤ │ └──────────────────────────┘
+ * │ indirectee │ │
+ * ├───────────────────┤ │
+ * │ static_link │ │
+ * ├───────────────────┤ │
+ * │ saved_info │ │
+ * ├───────────────────┤ │
+ * │ the_string ─┼───────┘
+ * └───────────────────┘
+ *
+ */
+
+stg_do_unpack_cstring(P_ node, P_ newCAF_ret) {
+ STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring, node, newCAF_ret);
+ W_ str;
+ str = StgThunk_payload(node, 2);
+ push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+ jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringzh_info)(node, str);
+ }
+}
+
+INFO_TABLE(stg_unpack_cstring, 0, 0, THUNK_STATIC, "stg_unpack_cstring", "stg_unpack_cstring")
+ (P_ node)
+{
+ W_ newCAF_ret;
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ // We raced with another thread to evaluate the CAF and they won;
+ // `node` should now be an indirection.
+ ENTER(node);
+ } else {
+ jump stg_do_unpack_cstring(node, newCAF_ret);
+ }
+}
+
+stg_do_unpack_cstring_utf8(P_ node, P_ newCAF_ret) {
+ STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring_utf8, node, newCAF_ret);
+ W_ str;
+ str = StgThunk_payload(node, 2);
+ push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+ jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringUtf8zh_info)(node, str);
+ }
+}
+
+INFO_TABLE(stg_unpack_cstring_utf8, 0, 0, THUNK_STATIC, "stg_unpack_cstring_utf8", "stg_unpack_cstring_utf8")
+ (P_ node)
+{
+ W_ newCAF_ret;
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ // We raced with another thread to evaluate the CAF and they won;
+ // `node` should now be an indirection.
+ ENTER(node);
+ } else {
+ jump stg_do_unpack_cstring_utf8(node, newCAF_ret);
+ }
+}
+
=====================================
rts/include/stg/MiscClosures.h
=====================================
@@ -318,6 +318,10 @@ RTS_THUNK(stg_ap_5_upd);
RTS_THUNK(stg_ap_6_upd);
RTS_THUNK(stg_ap_7_upd);
+// Standard entry for `unpackCString# str` thunks
+RTS_ENTRY(stg_unpack_cstring);
+RTS_ENTRY(stg_unpack_cstring_utf8);
+
/* standard application routines (see also utils/genapply,
* and GHC.StgToCmm.ArgRep).
*/
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e09780bd70bd0c99283507ed3000d6c416c290ff...4384f012adcbd1713dd93d92638f12568f91eac1
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/e09780bd70bd0c99283507ed3000d6c416c290ff...4384f012adcbd1713dd93d92638f12568f91eac1
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221022/5b2ffa7d/attachment-0001.html>
More information about the ghc-commits
mailing list