[Git][ghc/ghc][wip/osa1/std_string_thunks] Introduce a standard thunk for allocating strings
Ömer Sinan Ağacan
gitlab at gitlab.haskell.org
Tue Apr 28 13:46:02 UTC 2020
Ömer Sinan Ağacan pushed to branch wip/osa1/std_string_thunks at Glasgow Haskell Compiler / GHC
Commits:
3a39d538 by Ömer Sinan Ağacan at 2020-04-28T16:45:53+03:00
Introduce a standard thunk for allocating strings
Currently for a top-level closure in the form
hey = unpackCString# x
we generate code like this:
Main.hey_entry() // [R1]
{ info_tbls: [(c2T4,
label: Main.hey_info
rep: HeapRep static { Thunk }
srt: Nothing)]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c2T4: // global
_rqm::P64 = R1;
if ((Sp + 8) - 24 < SpLim) (likely: False) goto c2T5; else goto c2T6;
c2T5: // global
R1 = _rqm::P64;
call (stg_gc_enter_1)(R1) args: 8, res: 0, upd: 8;
c2T6: // global
(_c2T1::I64) = call "ccall" arg hints: [PtrHint,
PtrHint] result hints: [PtrHint] newCAF(BaseReg, _rqm::P64);
if (_c2T1::I64 == 0) goto c2T3; else goto c2T2;
c2T3: // global
call (I64[_rqm::P64])() args: 8, res: 0, upd: 8;
c2T2: // global
I64[Sp - 16] = stg_bh_upd_frame_info;
I64[Sp - 8] = _c2T1::I64;
R2 = hey1_r2Gg_bytes;
Sp = Sp - 16;
call GHC.CString.unpackCString#_info(R2) args: 24, res: 0, upd: 24;
}
}
This code is generated for every string literal. Only difference between
top-level closures like this is the argument for the bytes of the string
(hey1_r2Gg_bytes in the code above).
With this patch we introduce a standard thunk in the RTS, called
stg_MK_STRING_info, that does what `unpackCString# x` does, except it
gets the bytes address from the payload. Using this, for the closure
above, we generate this:
Main.hey_closure" {
Main.hey_closure:
const stg_MK_STRING_info;
const hey1_r1Gg_bytes;
const 0;
const 0;
}
This is much smaller in code.
- - - - -
6 changed files:
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/StgToCmm/Bind.hs
- compiler/GHC/StgToCmm/Closure.hs
- includes/stg/MiscClosures.h
- rts/RtsSymbols.c
- rts/StgStdThunks.cmm
Changes:
=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -23,6 +23,7 @@ module GHC.Cmm.CLabel (
mkConInfoTableLabel,
mkApEntryLabel,
mkApInfoTableLabel,
+ mkMkStringInfoTableLabel,
mkClosureTableLabel,
mkBytesLabel,
@@ -61,6 +62,7 @@ module GHC.Cmm.CLabel (
mkCAFBlackHoleInfoTableLabel,
mkRtsPrimOpLabel,
mkRtsSlowFastTickyCtrLabel,
+ mkRtsMkStringLabel,
mkSelectorInfoLabel,
mkSelectorEntryLabel,
@@ -427,6 +429,8 @@ data RtsLabelInfo
| RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks
| RtsApEntry Bool{-updatable-} Int{-arity-}
+ | RtsMkStringInfoTable
+
| RtsPrimOp PrimOp
| RtsApFast FastString -- ^ _fast versions of generic apply
| RtsSlowFastTickyCtr String
@@ -570,16 +574,16 @@ mkLocalBlockLabel u = LocalBlockLabel u
mkRtsPrimOpLabel :: PrimOp -> CLabel
mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop)
-mkSelectorInfoLabel :: Bool -> Int -> CLabel
-mkSelectorEntryLabel :: Bool -> Int -> CLabel
+mkSelectorInfoLabel, mkSelectorEntryLabel :: Bool -> Int -> CLabel
mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off)
mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off)
-mkApInfoTableLabel :: Bool -> Int -> CLabel
-mkApEntryLabel :: Bool -> Int -> CLabel
+mkApInfoTableLabel, mkApEntryLabel :: Bool -> Int -> CLabel
mkApInfoTableLabel upd off = RtsLabel (RtsApInfoTable upd off)
mkApEntryLabel upd off = RtsLabel (RtsApEntry upd off)
+mkMkStringInfoTableLabel :: CLabel
+mkMkStringInfoTableLabel = RtsLabel RtsMkStringInfoTable
-- A call to some primitive hand written Cmm code
mkPrimCallLabel :: PrimCall -> CLabel
@@ -672,6 +676,8 @@ mkRtsApFastLabel str = RtsLabel (RtsApFast str)
mkRtsSlowFastTickyCtrLabel :: String -> CLabel
mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
+mkRtsMkStringLabel :: CLabel
+mkRtsMkStringLabel = RtsLabel RtsMkStringInfoTable
-- Constructing Code Coverage Labels
mkHpcTicksLabel :: Module -> CLabel
@@ -1297,6 +1303,8 @@ pprCLbl dflags = \case
(CCS_Label ccs) -> ppr ccs
(HpcTicksLabel mod) -> text "_hpc_tickboxes_" <> ppr mod <> ptext (sLit "_hpc")
+ (RtsLabel RtsMkStringInfoTable) -> text "stg_MK_STRING_info"
+
(AsmTempLabel {}) -> panic "pprCLbl AsmTempLabel"
(AsmTempDerivedLabel {}) -> panic "pprCLbl AsmTempDerivedLabel"
(DynamicLinkerLabel {}) -> panic "pprCLbl DynamicLinkerLabel"
=====================================
compiler/GHC/StgToCmm/Bind.hs
=====================================
@@ -51,6 +51,8 @@ import GHC.Utils.Outputable
import GHC.Data.FastString
import GHC.Driver.Session
+import GHC.Builtin.Names (unpackCStringName)
+
import Control.Monad
------------------------------------------------------------------------
@@ -76,6 +78,9 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
lf_info = mkClosureLFInfo platform id TopLevel [] upd_flag args
in (cg_id_info, gen_code dflags lf_info closure_label)
where
+
+ gen_code :: DynFlags -> LambdaFormInfo -> CLabel -> FCode ()
+
-- special case for a indirection (f = g). We create an IND_STATIC
-- closure pointing directly to the indirectee. This is exactly
-- what the CAF will eventually evaluate to anyway, we're just
@@ -90,11 +95,34 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
-- concurrent/should_run/4030 fails, for instance.
--
gen_code _ _ closure_label
- | StgApp f [] <- body, null args, isNonRec rec
+ | StgApp f [] <- body
+ , null args
+ , isNonRec rec
= do
cg_info <- getCgIdInfo f
emitDataCon closure_label indStaticInfoTable ccs [unLit (idInfoToAmode cg_info)]
+ gen_code _ _ closure_label
+ | StgApp f [arg] <- stripStgTicksTopE (not . tickishIsCode) body
+ , idName f == unpackCStringName
+ = do -- TODO: What to do with ticks?
+ pprTrace "unpackCString#" (ppr body) (return ())
+ arg' <- getArgAmode (NonVoid arg)
+ case arg' of
+ CmmLit lit -> do
+ let payload = [lit]
+ let info = CmmInfoTable
+ { cit_lbl = mkRtsMkStringLabel
+ , cit_rep = HeapRep True 0 1 Thunk
+ , cit_prof = NoProfilingInfo
+ , cit_srt = Nothing
+ , cit_clo = Nothing
+ }
+ emitDecl (CmmData (Section Data closure_label) (CmmStatics closure_label info ccs payload))
+
+ _ -> panic "cgTopRhsClosure.gen_code"
+
+
gen_code dflags lf_info _closure_label
= do { let name = idName id
; mod_name <- getModuleName
@@ -222,6 +250,34 @@ mkRhsClosure :: DynFlags -> Id -> CostCentreStack
-> CgStgExpr
-> FCode (CgIdInfo, FCode CmmAGraph)
+{-
+ TODO: Consider handling this too. Not sure if it's going to save us much to
+ so this needs benchmarking.
+
+---------- unpackCString# --------------------
+mkRhsClosure dflags bndr _cc
+ [] -- No free variables, because this is top-level
+ Updatable -- Updatable thunk
+ [] -- A thunk
+ expr
+
+ | let expr_no_ticks = stripStgTicksTopE (not . tickishIsCode) expr
+ , StgApp fn [arg] <- expr
+ , idName fn == unpackCStringName
+ = -- TODO: What to do with ticks?
+ -- A non-top-level unpackCString# closure. Most unpackCString# closures are
+ -- floted to the top-level, but sometimes we see simplifier-generated thunks
+ -- like:
+ --
+ -- sat_sK0 [Occ=Once] :: [GHC.Types.Char]
+ -- [LclId] =
+ -- {} \u []
+ -- GHC.CString.unpackCString#
+ -- "Oops! The program has entered an `absent' argument!\n"#;
+ --
+ pprPanic "mkRhsClosure" (text "unpackCString# closure:" <+> ppr expr)
+-}
+
{- mkRhsClosure looks for two special forms of the right-hand side:
a) selector thunks
b) AP thunks
@@ -375,7 +431,8 @@ mkRhsClosure dflags bndr cc fvs upd_flag args body
-------------------------
cgRhsStdThunk
- :: Id
+ :: HasCallStack
+ => Id
-> LambdaFormInfo
-> [StgArg] -- payload
-> FCode (CgIdInfo, FCode CmmAGraph)
@@ -432,7 +489,8 @@ mkClosureLFInfo platform bndr top fvs upd_flag args
-- The code for closures
------------------------------------------------------------------------
-closureCodeBody :: Bool -- whether this is a top-level binding
+closureCodeBody :: HasCallStack
+ => Bool -- whether this is a top-level binding
-> Id -- the closure's name
-> ClosureInfo -- Lots of information about this closure
-> CostCentreStack -- Optional cost centre attached to closure
=====================================
compiler/GHC/StgToCmm/Closure.hs
=====================================
@@ -258,7 +258,6 @@ data StandardFormInfo
-- in the RTS to save space.
RepArity -- Arity, n
-
------------------------------------------------------
-- Building LambdaFormInfo
------------------------------------------------------
@@ -666,7 +665,7 @@ data ClosureInfo
}
-- | Convert from 'ClosureInfo' to 'CmmInfoTable'.
-mkCmmInfo :: ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable
+mkCmmInfo :: HasCallStack => ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable
mkCmmInfo ClosureInfo {..} id ccs
= CmmInfoTable { cit_lbl = closureInfoLabel
, cit_rep = closureSMRep
=====================================
includes/stg/MiscClosures.h
=====================================
@@ -241,6 +241,10 @@ RTS_THUNK(stg_ap_5_upd);
RTS_THUNK(stg_ap_6_upd);
RTS_THUNK(stg_ap_7_upd);
+/* the `unpackCString# ...` thunk */
+
+RTS_THUNK(stg_MK_STRING);
+
/* standard application routines (see also utils/genapply,
* and GHC.StgToCmm.ArgRep).
*/
=====================================
rts/RtsSymbols.c
=====================================
@@ -913,6 +913,7 @@
SymI_HasProto(stg_sel_13_noupd_info) \
SymI_HasProto(stg_sel_14_noupd_info) \
SymI_HasProto(stg_sel_15_noupd_info) \
+ SymI_HasProto(stg_MK_STRING_info) \
SymI_HasProto(stg_upd_frame_info) \
SymI_HasProto(stg_bh_upd_frame_info) \
SymI_HasProto(suspendThread) \
=====================================
rts/StgStdThunks.cmm
=====================================
@@ -13,6 +13,8 @@
#include "Cmm.h"
#include "Updates.h"
+import CLOSURE ghczmprim_GHCziCString_unpackCStringzh_info;
+
/* -----------------------------------------------------------------------------
The code for a thunk that simply extracts a field from a
single-constructor datatype depends only on the offset of the field
@@ -284,3 +286,26 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
StgThunk_payload(node,6));
}
}
+
+/* -----------------------------------------------------------------------------
+ Making strings
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_MK_STRING, 0, 1, THUNK_STATIC, "stg_MK_STRING", "stg_MK_STRING")
+ (P_ node)
+{
+ W_ newCAF_ret;
+
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ LDV_ENTER(node);
+ } else {
+ // TODO: Stack checks?
+ Sp_adj(-2);
+ Sp(1) = node;
+ Sp(0) = stg_bh_upd_frame_info;
+ jump ghczmprim_GHCziCString_unpackCStringzh_info(StgThunk_payload(node,0));
+
+ }
+}
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3a39d53833746c0b025815b1a604c294c63ca385
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3a39d53833746c0b025815b1a604c294c63ca385
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200428/43eb1a13/attachment-0001.html>
More information about the ghc-commits
mailing list