[Git][ghc/ghc][wip/osa1/std_string_thunks] Introduce a standard thunk for allocating strings
Ömer Sinan Ağacan
gitlab at gitlab.haskell.org
Thu May 28 11:28:22 UTC 2020
Ömer Sinan Ağacan pushed to branch wip/osa1/std_string_thunks at Glasgow Haskell Compiler / GHC
Commits:
df2ece33 by Ömer Sinan Ağacan at 2020-05-28T14:27:55+03:00
Introduce a standard thunk for allocating strings
Currently for a top-level closure in the form
hey = unpackCString# x
we generate code like this:
Main.hey_entry() // [R1]
{ info_tbls: [(c2T4,
label: Main.hey_info
rep: HeapRep static { Thunk }
srt: Nothing)]
stack_info: arg_space: 8 updfr_space: Just 8
}
{offset
c2T4: // global
_rqm::P64 = R1;
if ((Sp + 8) - 24 < SpLim) (likely: False) goto c2T5; else goto c2T6;
c2T5: // global
R1 = _rqm::P64;
call (stg_gc_enter_1)(R1) args: 8, res: 0, upd: 8;
c2T6: // global
(_c2T1::I64) = call "ccall" arg hints: [PtrHint,
PtrHint] result hints: [PtrHint] newCAF(BaseReg, _rqm::P64);
if (_c2T1::I64 == 0) goto c2T3; else goto c2T2;
c2T3: // global
call (I64[_rqm::P64])() args: 8, res: 0, upd: 8;
c2T2: // global
I64[Sp - 16] = stg_bh_upd_frame_info;
I64[Sp - 8] = _c2T1::I64;
R2 = hey1_r2Gg_bytes;
Sp = Sp - 16;
call GHC.CString.unpackCString#_info(R2) args: 24, res: 0, upd: 24;
}
}
This code is generated for every string literal. Only difference between
top-level closures like this is the argument for the bytes of the string
(hey1_r2Gg_bytes in the code above).
With this patch we introduce a standard thunk in the RTS, called
stg_MK_STRING_info, that does what `unpackCString# x` does, except it
gets the bytes address from the payload. Using this, for the closure
above, we generate this:
Main.hey_closure" {
Main.hey_closure:
const stg_MK_STRING_info;
const hey1_r1Gg_bytes;
const 0;
const 0;
}
This is much smaller in code.
- - - - -
5 changed files:
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/StgToCmm/Bind.hs
- includes/stg/MiscClosures.h
- rts/RtsSymbols.c
- rts/StgStdThunks.cmm
Changes:
=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -23,6 +23,7 @@ module GHC.Cmm.CLabel (
mkConInfoTableLabel,
mkApEntryLabel,
mkApInfoTableLabel,
+ mkMkStringInfoTableLabel,
mkClosureTableLabel,
mkBytesLabel,
@@ -61,6 +62,7 @@ module GHC.Cmm.CLabel (
mkCAFBlackHoleInfoTableLabel,
mkRtsPrimOpLabel,
mkRtsSlowFastTickyCtrLabel,
+ mkRtsMkStringLabel,
mkSelectorInfoLabel,
mkSelectorEntryLabel,
@@ -426,6 +428,8 @@ data RtsLabelInfo
| RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks
| RtsApEntry Bool{-updatable-} Int{-arity-}
+ | RtsMkStringInfoTable
+
| RtsPrimOp PrimOp
| RtsApFast FastString -- ^ _fast versions of generic apply
| RtsSlowFastTickyCtr String
@@ -569,16 +573,16 @@ mkLocalBlockLabel u = LocalBlockLabel u
mkRtsPrimOpLabel :: PrimOp -> CLabel
mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop)
-mkSelectorInfoLabel :: Bool -> Int -> CLabel
-mkSelectorEntryLabel :: Bool -> Int -> CLabel
+mkSelectorInfoLabel, mkSelectorEntryLabel :: Bool -> Int -> CLabel
mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off)
mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off)
-mkApInfoTableLabel :: Bool -> Int -> CLabel
-mkApEntryLabel :: Bool -> Int -> CLabel
+mkApInfoTableLabel, mkApEntryLabel :: Bool -> Int -> CLabel
mkApInfoTableLabel upd off = RtsLabel (RtsApInfoTable upd off)
mkApEntryLabel upd off = RtsLabel (RtsApEntry upd off)
+mkMkStringInfoTableLabel :: CLabel
+mkMkStringInfoTableLabel = RtsLabel RtsMkStringInfoTable
-- A call to some primitive hand written Cmm code
mkPrimCallLabel :: PrimCall -> CLabel
@@ -671,6 +675,8 @@ mkRtsApFastLabel str = RtsLabel (RtsApFast str)
mkRtsSlowFastTickyCtrLabel :: String -> CLabel
mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
+mkRtsMkStringLabel :: CLabel
+mkRtsMkStringLabel = RtsLabel RtsMkStringInfoTable
-- Constructing Code Coverage Labels
mkHpcTicksLabel :: Module -> CLabel
@@ -1296,6 +1302,8 @@ pprCLbl dflags = \case
(CCS_Label ccs) -> ppr ccs
(HpcTicksLabel mod) -> text "_hpc_tickboxes_" <> ppr mod <> ptext (sLit "_hpc")
+ (RtsLabel RtsMkStringInfoTable) -> text "stg_MK_STRING_info"
+
(AsmTempLabel {}) -> panic "pprCLbl AsmTempLabel"
(AsmTempDerivedLabel {}) -> panic "pprCLbl AsmTempDerivedLabel"
(DynamicLinkerLabel {}) -> panic "pprCLbl DynamicLinkerLabel"
=====================================
compiler/GHC/StgToCmm/Bind.hs
=====================================
@@ -51,6 +51,8 @@ import GHC.Utils.Outputable
import GHC.Data.FastString
import GHC.Driver.Session
+import GHC.Builtin.Names (unpackCStringName)
+
import Control.Monad
------------------------------------------------------------------------
@@ -76,6 +78,9 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
lf_info = mkClosureLFInfo platform id TopLevel [] upd_flag args
in (cg_id_info, gen_code dflags lf_info closure_label)
where
+
+ gen_code :: DynFlags -> LambdaFormInfo -> CLabel -> FCode ()
+
-- special case for a indirection (f = g). We create an IND_STATIC
-- closure pointing directly to the indirectee. This is exactly
-- what the CAF will eventually evaluate to anyway, we're just
@@ -90,11 +95,34 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
-- concurrent/should_run/4030 fails, for instance.
--
gen_code _ _ closure_label
- | StgApp f [] <- body, null args, isNonRec rec
+ | StgApp f [] <- body
+ , null args
+ , isNonRec rec
= do
cg_info <- getCgIdInfo f
emitDataCon closure_label indStaticInfoTable ccs [unLit (idInfoToAmode cg_info)]
+ gen_code _ _ closure_label
+ | StgApp f [arg] <- stripStgTicksTopE (not . tickishIsCode) body
+ , idName f == unpackCStringName
+ = do -- TODO: What to do with ticks?
+ pprTrace "unpackCString#" (ppr body) (return ())
+ arg' <- getArgAmode (NonVoid arg)
+ case arg' of
+ CmmLit lit -> do
+ let payload = [lit]
+ let info = CmmInfoTable
+ { cit_lbl = mkRtsMkStringLabel
+ , cit_rep = HeapRep True 0 1 Thunk
+ , cit_prof = NoProfilingInfo
+ , cit_srt = Nothing
+ , cit_clo = Nothing
+ }
+ emitDecl (CmmData (Section Data closure_label) (CmmStatics closure_label info ccs payload))
+
+ _ -> panic "cgTopRhsClosure.gen_code"
+
+
gen_code dflags lf_info _closure_label
= do { let name = idName id
; mod_name <- getModuleName
@@ -222,6 +250,34 @@ mkRhsClosure :: DynFlags -> Id -> CostCentreStack
-> CgStgExpr
-> FCode (CgIdInfo, FCode CmmAGraph)
+{-
+ TODO: Consider handling this too. Not sure if it's going to save us much to
+ so this needs benchmarking.
+
+---------- unpackCString# --------------------
+mkRhsClosure dflags bndr _cc
+ [] -- No free variables, because this is top-level
+ Updatable -- Updatable thunk
+ [] -- A thunk
+ expr
+
+ | let expr_no_ticks = stripStgTicksTopE (not . tickishIsCode) expr
+ , StgApp fn [arg] <- expr
+ , idName fn == unpackCStringName
+ = -- TODO: What to do with ticks?
+ -- A non-top-level unpackCString# closure. Most unpackCString# closures are
+ -- floted to the top-level, but sometimes we see simplifier-generated thunks
+ -- like:
+ --
+ -- sat_sK0 [Occ=Once] :: [GHC.Types.Char]
+ -- [LclId] =
+ -- {} \u []
+ -- GHC.CString.unpackCString#
+ -- "Oops! The program has entered an `absent' argument!\n"#;
+ --
+ pprPanic "mkRhsClosure" (text "unpackCString# closure:" <+> ppr expr)
+-}
+
{- mkRhsClosure looks for two special forms of the right-hand side:
a) selector thunks
b) AP thunks
=====================================
includes/stg/MiscClosures.h
=====================================
@@ -241,6 +241,9 @@ RTS_THUNK(stg_ap_5_upd);
RTS_THUNK(stg_ap_6_upd);
RTS_THUNK(stg_ap_7_upd);
+// Standard entry for `unpackCString# str` thunks
+RTS_ENTRY(stg_MK_STRING);
+
/* standard application routines (see also utils/genapply,
* and GHC.StgToCmm.ArgRep).
*/
=====================================
rts/RtsSymbols.c
=====================================
@@ -914,6 +914,7 @@
SymI_HasProto(stg_sel_13_noupd_info) \
SymI_HasProto(stg_sel_14_noupd_info) \
SymI_HasProto(stg_sel_15_noupd_info) \
+ SymI_HasProto(stg_MK_STRING_info) \
SymI_HasProto(stg_upd_frame_info) \
SymI_HasProto(stg_bh_upd_frame_info) \
SymI_HasProto(suspendThread) \
=====================================
rts/StgStdThunks.cmm
=====================================
@@ -13,6 +13,8 @@
#include "Cmm.h"
#include "Updates.h"
+import CLOSURE ghczmprim_GHCziCString_unpackCStringzh_info;
+
/* -----------------------------------------------------------------------------
The code for a thunk that simply extracts a field from a
single-constructor datatype depends only on the offset of the field
@@ -284,3 +286,29 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
StgThunk_payload(node,6));
}
}
+
+/* -----------------------------------------------------------------------------
+ Making strings
+ -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_MK_STRING, 0, 1, THUNK_STATIC, "stg_MK_STRING", "stg_MK_STRING")
+ (P_ node)
+{
+ W_ newCAF_ret;
+
+ // TODO (osa): Not sure why we do stack check before `newCAF`, but this is
+ // how `unpackCString# str` thunks are today.
+ STK_CHK_ENTER(WDS(2), node);
+
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ jump node();
+ } else {
+ // Stack check done above
+ Sp_adj(-2);
+ Sp(1) = node;
+ Sp(0) = stg_bh_upd_frame_info;
+ jump stg_ap_n_fast(ghczmprim_GHCziCString_unpackCStringzh_info, StgThunk_payload(node, 0));
+ }
+}
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/df2ece33892e4f38608bdb76ddb0278e61749f59
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/df2ece33892e4f38608bdb76ddb0278e61749f59
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200528/6a83b15d/attachment-0001.html>
More information about the ghc-commits
mailing list