[Git][ghc/ghc][wip/osa1/std_string_thunks] Introduce a standard thunk for allocating strings

Ömer Sinan Ağacan gitlab at gitlab.haskell.org
Thu May 28 10:45:16 UTC 2020



Ömer Sinan Ağacan pushed to branch wip/osa1/std_string_thunks at Glasgow Haskell Compiler / GHC


Commits:
6a1ac542 by Ömer Sinan Ağacan at 2020-05-28T13:44:54+03:00
Introduce a standard thunk for allocating strings

Currently for a top-level closure in the form

    hey = unpackCString# x

we generate code like this:

    Main.hey_entry() //  [R1]
             { info_tbls: [(c2T4,
                            label: Main.hey_info
                            rep: HeapRep static { Thunk }
                            srt: Nothing)]
               stack_info: arg_space: 8 updfr_space: Just 8
             }
         {offset
           c2T4: // global
               _rqm::P64 = R1;
               if ((Sp + 8) - 24 < SpLim) (likely: False) goto c2T5; else goto c2T6;
           c2T5: // global
               R1 = _rqm::P64;
               call (stg_gc_enter_1)(R1) args: 8, res: 0, upd: 8;
           c2T6: // global
               (_c2T1::I64) = call "ccall" arg hints:  [PtrHint,
                                                        PtrHint]  result hints:  [PtrHint] newCAF(BaseReg, _rqm::P64);
               if (_c2T1::I64 == 0) goto c2T3; else goto c2T2;
           c2T3: // global
               call (I64[_rqm::P64])() args: 8, res: 0, upd: 8;
           c2T2: // global
               I64[Sp - 16] = stg_bh_upd_frame_info;
               I64[Sp - 8] = _c2T1::I64;
               R2 = hey1_r2Gg_bytes;
               Sp = Sp - 16;
               call GHC.CString.unpackCString#_info(R2) args: 24, res: 0, upd: 24;
         }
     }

This code is generated for every string literal. Only difference between
top-level closures like this is the argument for the bytes of the string
(hey1_r2Gg_bytes in the code above).

With this patch we introduce a standard thunk in the RTS, called
stg_MK_STRING_info, that does what `unpackCString# x` does, except it
gets the bytes address from the payload. Using this, for the closure
above, we generate this:

    Main.hey_closure" {
        Main.hey_closure:
            const stg_MK_STRING_info;
            const hey1_r1Gg_bytes;
            const 0;
            const 0;
    }

This is much smaller in code.

- - - - -


5 changed files:

- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/StgToCmm/Bind.hs
- includes/stg/MiscClosures.h
- rts/RtsSymbols.c
- rts/StgStdThunks.cmm


Changes:

=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -23,6 +23,7 @@ module GHC.Cmm.CLabel (
         mkConInfoTableLabel,
         mkApEntryLabel,
         mkApInfoTableLabel,
+        mkMkStringInfoTableLabel,
         mkClosureTableLabel,
         mkBytesLabel,
 
@@ -61,6 +62,7 @@ module GHC.Cmm.CLabel (
         mkCAFBlackHoleInfoTableLabel,
         mkRtsPrimOpLabel,
         mkRtsSlowFastTickyCtrLabel,
+        mkRtsMkStringLabel,
 
         mkSelectorInfoLabel,
         mkSelectorEntryLabel,
@@ -426,6 +428,8 @@ data RtsLabelInfo
   | RtsApInfoTable       Bool{-updatable-} Int{-arity-}    -- ^ AP thunks
   | RtsApEntry           Bool{-updatable-} Int{-arity-}
 
+  | RtsMkStringInfoTable
+
   | RtsPrimOp PrimOp
   | RtsApFast     FastString    -- ^ _fast versions of generic apply
   | RtsSlowFastTickyCtr String
@@ -569,16 +573,16 @@ mkLocalBlockLabel u = LocalBlockLabel u
 mkRtsPrimOpLabel :: PrimOp -> CLabel
 mkRtsPrimOpLabel primop         = RtsLabel (RtsPrimOp primop)
 
-mkSelectorInfoLabel  :: Bool -> Int -> CLabel
-mkSelectorEntryLabel :: Bool -> Int -> CLabel
+mkSelectorInfoLabel, mkSelectorEntryLabel  :: Bool -> Int -> CLabel
 mkSelectorInfoLabel  upd off    = RtsLabel (RtsSelectorInfoTable upd off)
 mkSelectorEntryLabel upd off    = RtsLabel (RtsSelectorEntry     upd off)
 
-mkApInfoTableLabel :: Bool -> Int -> CLabel
-mkApEntryLabel     :: Bool -> Int -> CLabel
+mkApInfoTableLabel, mkApEntryLabel :: Bool -> Int -> CLabel
 mkApInfoTableLabel   upd off    = RtsLabel (RtsApInfoTable       upd off)
 mkApEntryLabel       upd off    = RtsLabel (RtsApEntry           upd off)
 
+mkMkStringInfoTableLabel :: CLabel
+mkMkStringInfoTableLabel = RtsLabel RtsMkStringInfoTable
 
 -- A call to some primitive hand written Cmm code
 mkPrimCallLabel :: PrimCall -> CLabel
@@ -671,6 +675,8 @@ mkRtsApFastLabel str = RtsLabel (RtsApFast str)
 mkRtsSlowFastTickyCtrLabel :: String -> CLabel
 mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat)
 
+mkRtsMkStringLabel :: CLabel
+mkRtsMkStringLabel = RtsLabel RtsMkStringInfoTable
 
 -- Constructing Code Coverage Labels
 mkHpcTicksLabel :: Module -> CLabel
@@ -1296,6 +1302,8 @@ pprCLbl dflags = \case
    (CCS_Label ccs)     -> ppr ccs
    (HpcTicksLabel mod) -> text "_hpc_tickboxes_"  <> ppr mod <> ptext (sLit "_hpc")
 
+   (RtsLabel RtsMkStringInfoTable) -> text "stg_MK_STRING_info"
+
    (AsmTempLabel {})        -> panic "pprCLbl AsmTempLabel"
    (AsmTempDerivedLabel {}) -> panic "pprCLbl AsmTempDerivedLabel"
    (DynamicLinkerLabel {})  -> panic "pprCLbl DynamicLinkerLabel"


=====================================
compiler/GHC/StgToCmm/Bind.hs
=====================================
@@ -51,6 +51,8 @@ import GHC.Utils.Outputable
 import GHC.Data.FastString
 import GHC.Driver.Session
 
+import GHC.Builtin.Names (unpackCStringName)
+
 import Control.Monad
 
 ------------------------------------------------------------------------
@@ -76,6 +78,9 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
       lf_info       = mkClosureLFInfo platform id TopLevel [] upd_flag args
   in (cg_id_info, gen_code dflags lf_info closure_label)
   where
+
+  gen_code :: DynFlags -> LambdaFormInfo -> CLabel -> FCode ()
+
   -- special case for a indirection (f = g).  We create an IND_STATIC
   -- closure pointing directly to the indirectee.  This is exactly
   -- what the CAF will eventually evaluate to anyway, we're just
@@ -90,11 +95,34 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
   -- concurrent/should_run/4030 fails, for instance.
   --
   gen_code _ _ closure_label
-    | StgApp f [] <- body, null args, isNonRec rec
+    | StgApp f [] <- body
+    , null args
+    , isNonRec rec
     = do
          cg_info <- getCgIdInfo f
          emitDataCon closure_label indStaticInfoTable ccs [unLit (idInfoToAmode cg_info)]
 
+  gen_code _ _ closure_label
+    | StgApp f [arg] <- stripStgTicksTopE (not . tickishIsCode) body
+    , idName f == unpackCStringName
+    = do -- TODO: What to do with ticks?
+         pprTrace "unpackCString#" (ppr body) (return ())
+         arg' <- getArgAmode (NonVoid arg)
+         case arg' of
+           CmmLit lit -> do
+             let payload = [lit]
+             let info = CmmInfoTable
+                   { cit_lbl = mkRtsMkStringLabel
+                   , cit_rep = HeapRep True 0 1 Thunk
+                   , cit_prof = NoProfilingInfo
+                   , cit_srt = Nothing
+                   , cit_clo = Nothing
+                   }
+             emitDecl (CmmData (Section Data closure_label) (CmmStatics closure_label info ccs payload))
+
+           _ -> panic "cgTopRhsClosure.gen_code"
+
+
   gen_code dflags lf_info _closure_label
    = do { let name = idName id
         ; mod_name <- getModuleName
@@ -222,6 +250,34 @@ mkRhsClosure :: DynFlags -> Id -> CostCentreStack
              -> CgStgExpr
              -> FCode (CgIdInfo, FCode CmmAGraph)
 
+{-
+    TODO: Consider handling this too. Not sure if it's going to save us much to
+          so this needs benchmarking.
+
+---------- unpackCString# --------------------
+mkRhsClosure    dflags bndr _cc
+                []         -- No free variables, because this is top-level
+                Updatable  -- Updatable thunk
+                []         -- A thunk
+                expr
+
+  | let expr_no_ticks = stripStgTicksTopE (not . tickishIsCode) expr
+  , StgApp fn [arg] <- expr
+  , idName fn == unpackCStringName
+  = -- TODO: What to do with ticks?
+    -- A non-top-level unpackCString# closure. Most unpackCString# closures are
+    -- floted to the top-level, but sometimes we see simplifier-generated thunks
+    -- like:
+    --
+    --     sat_sK0 [Occ=Once] :: [GHC.Types.Char]
+    --     [LclId] =
+    --         {} \u []
+    --             GHC.CString.unpackCString#
+    --                 "Oops! The program has entered an `absent' argument!\n"#;
+    --
+    pprPanic "mkRhsClosure" (text "unpackCString# closure:" <+> ppr expr)
+-}
+
 {- mkRhsClosure looks for two special forms of the right-hand side:
         a) selector thunks
         b) AP thunks


=====================================
includes/stg/MiscClosures.h
=====================================
@@ -241,6 +241,9 @@ RTS_THUNK(stg_ap_5_upd);
 RTS_THUNK(stg_ap_6_upd);
 RTS_THUNK(stg_ap_7_upd);
 
+// Standard entry for `unpackCString# str` thunks
+RTS_ENTRY(stg_MK_STRING);
+
 /* standard application routines (see also utils/genapply,
  * and GHC.StgToCmm.ArgRep).
  */


=====================================
rts/RtsSymbols.c
=====================================
@@ -914,6 +914,7 @@
       SymI_HasProto(stg_sel_13_noupd_info)                              \
       SymI_HasProto(stg_sel_14_noupd_info)                              \
       SymI_HasProto(stg_sel_15_noupd_info)                              \
+      SymI_HasProto(stg_MK_STRING_info)                                 \
       SymI_HasProto(stg_upd_frame_info)                                 \
       SymI_HasProto(stg_bh_upd_frame_info)                              \
       SymI_HasProto(suspendThread)                                      \


=====================================
rts/StgStdThunks.cmm
=====================================
@@ -13,6 +13,8 @@
 #include "Cmm.h"
 #include "Updates.h"
 
+import CLOSURE ghczmprim_GHCziCString_unpackCStringzh_info;
+
 /* -----------------------------------------------------------------------------
    The code for a thunk that simply extracts a field from a
    single-constructor datatype depends only on the offset of the field
@@ -284,3 +286,31 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
            StgThunk_payload(node,6));
     }
 }
+
+/* -----------------------------------------------------------------------------
+   Making strings
+   -------------------------------------------------------------------------- */
+
+INFO_TABLE(stg_MK_STRING, 0, 1, THUNK_STATIC, "stg_MK_STRING", "stg_MK_STRING")
+    (P_ node)
+{
+    W_ newCAF_ret;
+
+    // TODO (osa): Not sure why we do stack check before `newCAF`, but this is
+    // how `unpackCString# str` thunks are today.
+    if (Sp - WDS(2) < SpLim) {
+        jump stg_gc_enter_1(node);
+    }
+
+    (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+    if (newCAF_ret == 0) {
+        jump node();
+    } else {
+        // Stack check done above
+        Sp_adj(-2);
+        Sp(1) = node;
+        Sp(0) = stg_bh_upd_frame_info;
+        jump stg_ap_n_fast(ghczmprim_GHCziCString_unpackCStringzh_info, StgThunk_payload(node, 0));
+    }
+}



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6a1ac542f52e86daf4992adcf30eef39ebeb8db3

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6a1ac542f52e86daf4992adcf30eef39ebeb8db3
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200528/9093d571/attachment-0001.html>


More information about the ghc-commits mailing list