[Git][ghc/ghc][wip/romes/12935] WIP

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Fri May 24 14:08:31 UTC 2024



Rodrigo Mesquita pushed to branch wip/romes/12935 at Glasgow Haskell Compiler / GHC


Commits:
d4d6e4aa by Rodrigo Mesquita at 2024-05-24T15:08:15+01:00
WIP

- - - - -


6 changed files:

- compiler/GHC/Cmm/CLabel.hs
- + compiler/GHC/Cmm/UniqueRenamer.hs
- compiler/GHC/CmmToAsm.hs
- compiler/GHC/Driver/CodeOutput.hs
- compiler/GHC/Types/Unique.hs
- compiler/ghc.cabal.in


Changes:

=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -1,3 +1,4 @@
+{-# LANGUAGE LambdaCase #-}
 -----------------------------------------------------------------------------
 --
 -- Object-file symbols (called CLabel for historical reasons).
@@ -134,7 +135,8 @@ module GHC.Cmm.CLabel (
         -- * Others
         dynamicLinkerLabelInfo,
         addLabelSize,
-        foreignLabelStdcallInfo
+        foreignLabelStdcallInfo,
+        mapInternalNonDetUniques
     ) where
 
 import GHC.Prelude
@@ -1878,3 +1880,37 @@ The transformation is performed because
      T15155.a_closure `mayRedirectTo` a1_rXq_closure+1
 returns True.
 -}
+
+-- | A utility for renaming uniques in CLabels to produce deterministic object.
+-- Note that not all Uniques are mapped over. Only those that can be safely alpha
+-- renamed, eg uniques of local symbols or of system names.
+-- See Note [....TODO]
+mapInternalNonDetUniques :: Applicative m => (Unique -> m Unique) -> CLabel -> m CLabel
+mapInternalNonDetUniques f = \case
+  il@(IdLabel name cafInfo idLabelInfo)
+    | isInternalName name
+    -> IdLabel . setNameUnique name <$> f (nameUnique name) <*> pure cafInfo <*> pure idLabelInfo
+    | otherwise
+    -> pure il
+  cl at CmmLabel{} -> pure cl
+  -- ROMES:TODO: what about `RtsApFast NonDetFastString`?
+  RtsLabel rtsLblInfo -> pure $ RtsLabel rtsLblInfo
+  LocalBlockLabel unique -> LocalBlockLabel <$> f unique
+  fl at ForeignLabel{} -> pure fl 
+  AsmTempLabel unique -> AsmTempLabel <$> f unique
+  AsmTempDerivedLabel clbl fs -> AsmTempDerivedLabel <$> mapInternalNonDetUniques f clbl <*> pure fs
+  StringLitLabel unique -> StringLitLabel <$> f unique
+  CC_Label  cc -> pure $ CC_Label cc
+  CCS_Label ccs -> pure $ CCS_Label ccs
+  IPE_Label ipe at InfoProvEnt{infoTablePtr} ->
+    (\cl' -> IPE_Label ipe{infoTablePtr = cl'}) <$> mapInternalNonDetUniques f infoTablePtr
+  ml at ModuleLabel{} -> pure ml
+  -- ROMES:TODO: Suspicious, maybe we shouldn't rename these.
+  DynamicLinkerLabel dlli clbl -> DynamicLinkerLabel dlli <$> mapInternalNonDetUniques f clbl
+  PicBaseLabel -> pure PicBaseLabel
+  DeadStripPreventer clbl -> DeadStripPreventer <$> mapInternalNonDetUniques f clbl
+  HpcTicksLabel mod -> pure $ HpcTicksLabel mod
+  SRTLabel unique -> SRTLabel <$> f unique
+  LargeBitmapLabel unique -> LargeBitmapLabel <$> f unique
+
+


=====================================
compiler/GHC/Cmm/UniqueRenamer.hs
=====================================
@@ -0,0 +1,213 @@
+{-# LANGUAGE LambdaCase #-}
+module GHC.Cmm.UniqueRenamer
+  ( detRenameUniques
+
+  -- Careful! Not for general use!
+  , DetUniqFM, emptyDetUFM)
+  where
+
+import Prelude
+import Control.Monad.Trans.State
+import GHC.Word
+import GHC.Cmm
+import GHC.Cmm.CLabel
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Types.Unique
+import GHC.Types.Unique.FM
+import Data.Tuple (swap)
+
+{-
+--------------------------------------------------------------------------------
+-- * Deterministic Objects
+--------------------------------------------------------------------------------
+
+** Write many notes in a collective note.
+
+Topics:
+* Before generating Code, we rename all uniques of local symbols deterministically
+* The code generation (like Assembly fix ups) need
+
+-}
+
+-- | A mapping from non-deterministic uniques to deterministic uniques, to
+-- rename local symbols with the end goal of producing deterministic object files.
+-- See Note [....TODO]
+data DetUniqFM = DetUniqFM
+  { mapping :: UniqFM Unique Unique
+  , supply :: !Word64
+  }
+
+type DetRnM = State DetUniqFM
+
+emptyDetUFM :: DetUniqFM
+emptyDetUFM = DetUniqFM { mapping = emptyUFM, supply = 1 }
+
+newDetUniq :: DetRnM Unique
+newDetUniq = do
+  w <- gets supply
+  modify' (\d -> d{supply = supply d + 1})
+  return $ mkUniqueGrimily w
+
+renameDetUniq :: Unique -> DetRnM Unique
+renameDetUniq uq = do
+  m <- gets mapping
+  case lookupUFM m uq of
+    Nothing -> do
+      det_uniq <- newDetUniq
+      modify' (\m' -> m'{mapping = addToUFM (mapping m') uq det_uniq})
+      return det_uniq
+    Just det_uniq ->
+      return det_uniq
+
+-- Rename local symbols deterministically (in order of appearance)
+detRenameUniques :: DetUniqFM -> RawCmmGroup -> (DetUniqFM, RawCmmGroup)
+detRenameUniques dufm group = swap $ runState (mapM uniqRename group) dufm
+
+-- The most important function here, which does the actual renaming.
+-- Arguably, maybe we should rename this to CLabelRenamer
+detRenameCLabel :: CLabel -> DetRnM CLabel
+detRenameCLabel = mapInternalNonDetUniques renameDetUniq
+
+--------------------------------------------------------------------------------
+-- Traversals
+--------------------------------------------------------------------------------
+-- I think I should be able to implement this using some generic traversal,
+-- which would be cleaner
+
+class UniqRenamable a where
+  uniqRename :: a -> DetRnM a
+
+instance UniqRenamable CLabel where
+  -- The most important renaming. The rest are just traversals.
+  uniqRename = detRenameCLabel
+
+instance UniqRenamable CmmFormal where
+  -- = LocalReg {-# UNPACK #-} !Unique !CmmType
+  --   -- ^ Parameters:
+  --   --   1. Identifier
+  --   --   2. Type
+  -- ROMES:TODO:
+  uniqRename = pure
+
+instance UniqRenamable CmmTickScope where
+  -- ROMES:TODO: What about this one for ticks? Ask Andreas
+  uniqRename = pure
+
+instance UniqRenamable CmmTickish where
+  -- ROMES:TODO: Hmmm
+  uniqRename = pure
+
+-- * Traversals from here on out
+
+instance UniqRenamable (GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph) where
+  uniqRename (CmmProc h lbl regs g)
+    = CmmProc <$> uniqRename h <*> uniqRename lbl <*> mapM uniqRename regs <*> uniqRename g
+  uniqRename (CmmData sec d)
+    = CmmData <$> uniqRename sec <*> uniqRename d
+
+instance UniqRenamable Section where
+  uniqRename (Section ty lbl) = Section ty <$> uniqRename lbl
+
+instance UniqRenamable RawCmmStatics where
+  uniqRename (CmmStaticsRaw lbl sts)
+    = CmmStaticsRaw <$> uniqRename lbl <*> mapM uniqRename sts
+
+instance UniqRenamable CmmStatic where
+  uniqRename = \case
+    CmmStaticLit l -> CmmStaticLit <$> uniqRename l
+    CmmUninitialised x -> pure $ CmmUninitialised x
+    CmmString x -> pure $ CmmString x
+    CmmFileEmbed f i -> pure $ CmmFileEmbed f i
+
+instance UniqRenamable CmmLit where
+  uniqRename = \case
+    CmmInt i w -> pure $ CmmInt i w
+    CmmFloat r w -> pure $ CmmFloat r w
+    CmmVec lits -> CmmVec <$> mapM uniqRename lits
+    CmmLabel lbl -> CmmLabel <$> uniqRename lbl
+    CmmLabelOff lbl i -> CmmLabelOff <$> uniqRename lbl <*> pure i
+    CmmLabelDiffOff lbl1 lbl2 i w ->
+      CmmLabelDiffOff <$> uniqRename lbl1 <*> uniqRename lbl2 <*> pure i <*> pure w
+    CmmBlock bid -> pure $ CmmBlock bid
+    CmmHighStackMark -> pure CmmHighStackMark
+
+instance UniqRenamable a {- for 'Body' and on 'RawCmmStatics' -}
+  => UniqRenamable (LabelMap a) where
+  uniqRename = traverse uniqRename
+
+instance UniqRenamable CmmGraph where
+  uniqRename (CmmGraph e g) = CmmGraph e <$> uniqRename g
+
+instance UniqRenamable (Graph CmmNode n m) where
+  uniqRename = \case
+    GNil  -> pure GNil
+    GUnit block -> GUnit <$> uniqRename block
+    GMany m1 b m2 -> GMany <$> uniqRename m1 <*> uniqRename b <*> uniqRename m2
+
+instance UniqRenamable t => UniqRenamable (MaybeO n t) where
+  uniqRename (JustO x) = JustO <$> uniqRename x
+  uniqRename NothingO = pure NothingO
+
+instance UniqRenamable (Block CmmNode n m) where
+  uniqRename = \case
+    BlockCO n bn -> BlockCO <$> uniqRename n <*> uniqRename bn
+    BlockCC n1 bn n2 -> BlockCC <$> uniqRename n1 <*> uniqRename bn <*> uniqRename n2
+    BlockOC bn n -> BlockOC <$> uniqRename bn <*> uniqRename n
+                                                             
+    BNil    -> pure BNil
+    BMiddle n -> BMiddle <$> uniqRename n
+    BCat    b1 b2 -> BCat <$> uniqRename b1 <*> uniqRename b2
+    BSnoc   bn n -> BSnoc <$> uniqRename bn <*> uniqRename n
+    BCons   n bn -> BCons <$> uniqRename n <*> uniqRename bn
+
+instance UniqRenamable (CmmNode n m) where
+  uniqRename = \case
+    CmmEntry l t -> CmmEntry l <$> uniqRename t
+    CmmComment fs -> pure $ CmmComment fs
+    CmmTick tickish -> CmmTick <$> uniqRename tickish
+    CmmUnwind xs -> CmmUnwind <$> mapM uniqRename xs
+    CmmAssign reg e -> CmmAssign <$> uniqRename reg <*> uniqRename e
+    CmmStore e1 e2 align -> CmmStore <$> uniqRename e1 <*> uniqRename e2 <*> pure align
+    CmmUnsafeForeignCall ftgt cmmformal cmmactual ->
+      CmmUnsafeForeignCall <$> uniqRename ftgt <*> mapM uniqRename cmmformal <*> mapM uniqRename cmmactual
+    CmmBranch l -> pure $ CmmBranch l
+    CmmCondBranch pred t f likely ->
+      CmmCondBranch <$> uniqRename pred <*> pure t <*> pure f <*> pure likely
+    CmmSwitch e sts -> CmmSwitch <$> uniqRename e <*> pure sts
+    CmmCall tgt cont regs args retargs retoff ->
+      CmmCall <$> uniqRename tgt <*> pure cont <*> mapM uniqRename regs
+              <*> pure args <*> pure retargs <*> pure retoff
+    CmmForeignCall tgt res args succ retargs retoff intrbl ->
+      CmmForeignCall <$> uniqRename tgt <*> mapM uniqRename res <*> mapM uniqRename args
+                     <*> pure succ <*> pure retargs <*> pure retoff <*> pure intrbl
+
+instance UniqRenamable GlobalReg where
+  uniqRename = pure
+
+instance UniqRenamable CmmExpr where
+  uniqRename = \case
+    CmmLit l -> CmmLit <$> uniqRename l
+    CmmLoad e t a -> CmmLoad <$> uniqRename e <*> pure t <*> pure a
+    CmmReg r -> CmmReg <$> uniqRename r
+    CmmMachOp mop es -> CmmMachOp mop <$> mapM uniqRename es
+    CmmStackSlot a i -> pure $ CmmStackSlot a i
+    CmmRegOff r i -> CmmRegOff <$> uniqRename r <*> pure i
+
+instance UniqRenamable ForeignTarget where
+  uniqRename = \case
+    ForeignTarget e fc -> ForeignTarget <$> uniqRename e <*> pure fc
+    PrimTarget cmop -> pure $ PrimTarget cmop
+
+instance UniqRenamable CmmReg where
+  uniqRename = \case
+    CmmLocal l -> CmmLocal <$> uniqRename l
+    CmmGlobal x -> pure $ CmmGlobal x
+
+instance (UniqRenamable a, UniqRenamable b) => UniqRenamable (a, b) where
+  uniqRename (a, b) = (,) <$> uniqRename a <*> uniqRename b
+
+instance (UniqRenamable a) => UniqRenamable (Maybe a) where
+  uniqRename Nothing = pure Nothing
+  uniqRename (Just x) = Just <$> uniqRename x


=====================================
compiler/GHC/CmmToAsm.hs
=====================================
@@ -494,6 +494,8 @@ cmmNativeGen logger ncgImpl us fileIds dbgMap cmm count
                 FormatCMM
                 (vcat $ map (pprLiveCmmDecl platform) withLiveness)
 
+        -- ROMES:TODO: RENAME VIRTUAL REGISTERS DETERMINISTICALLY
+
         -- allocate registers
         (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
          if ( ncgRegsGraph config || ncgRegsIterative config )
@@ -555,6 +557,7 @@ cmmNativeGen logger ncgImpl us fileIds dbgMap cmm count
                         , [], stack_updt_blks)
 
           else do
+
                 -- do linear register allocation
                 let reg_alloc proc = do
                        (alloced, maybe_more_stack, ra_stats) <-


=====================================
compiler/GHC/Driver/CodeOutput.hs
=====================================
@@ -38,6 +38,7 @@ import GHC.Driver.Backend
 import qualified GHC.Data.ShortText as ST
 import GHC.Data.Stream           ( Stream )
 import qualified GHC.Data.Stream as Stream
+import GHC.Cmm.UniqueRenamer
 
 import GHC.Utils.TmpFs
 
@@ -94,11 +95,21 @@ codeOutput logger tmpfs llvm_config dflags unit_state this_mod filenm location g
   cmm_stream
   =
     do  {
+        -- To produce deterministic object code, we alpha-rename all Uniques to deterministic uniques before Cmm linting.
+        -- From here on out, the backend code generation can't use (non-deterministic) Uniques, or risk producing non-deterministic code.
+        -- For example, the fix-up action in the ASM NCG should use determinist names for potential new blocks it has to create.
+        -- Therefore, in the ASM NCG `NatM` Monad we use a deterministic `UniqSuply` (which won't be shared about multiple threads)
+        -- TODO: Put these all into notes carefully organized
+        ; let renamed_cmm_stream =
+                -- if gopt Opt_DeterministicObjects dflags
+
+                snd  <$> Stream.mapAccumL_ (fmap pure . detRenameUniques) emptyDetUFM cmm_stream
+
         -- Lint each CmmGroup as it goes past
         ; let linted_cmm_stream =
                  if gopt Opt_DoCmmLinting dflags
-                    then Stream.mapM do_lint cmm_stream
-                    else cmm_stream
+                    then Stream.mapM do_lint renamed_cmm_stream
+                    else renamed_cmm_stream
 
               do_lint cmm = withTimingSilent logger
                   (text "CmmLint"<+>brackets (ppr this_mod))
@@ -397,3 +408,4 @@ ipInitCode do_info_table platform this_mod
 
    ipe_buffer_decl =
        text "extern IpeBufferListNode" <+> ipe_buffer_label <> text ";"
+


=====================================
compiler/GHC/Types/Unique.hs
=====================================
@@ -249,6 +249,8 @@ use `deriving' because we want {\em precise} control of ordering
 -- the interface files are created, in particular we don't care about
 -- register allocation and code generation.
 -- To track progress on bit-for-bit determinism see #12262.
+--
+-- ROMES:TODO: Will need to update this ^ !
 
 eqUnique :: Unique -> Unique -> Bool
 eqUnique (MkUnique u1) (MkUnique u2) = u1 == u2


=====================================
compiler/ghc.cabal.in
=====================================
@@ -260,6 +260,7 @@ Library
         GHC.Cmm.Switch
         GHC.Cmm.Switch.Implement
         GHC.Cmm.ThreadSanitizer
+        GHC.Cmm.UniqueRenamer
         GHC.CmmToAsm
         GHC.Cmm.LRegSet
         GHC.CmmToAsm.AArch64



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d4d6e4aae66a389014025c6db057965c084d67e3

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/d4d6e4aae66a389014025c6db057965c084d67e3
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240524/0142f4ee/attachment-0001.html>


More information about the ghc-commits mailing list