[Git][ghc/ghc][wip/ncg-simd] 2 commits: Revert "X86 genCCall64: simplify loadArg code"

Sat Sep 21 10:48:10 UTC 2024

sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
2a02aea3 by sheaf at 2024-09-21T12:25:11+02:00
Revert "X86 genCCall64: simplify loadArg code"

This reverts commit 3cc298b9343218d49507c2f669c9303e58e115b5.

- - - - -
7f962063 by sheaf at 2024-09-21T12:47:58+02:00
WIP pushArgByValue

- - - - -


1 changed file:

- compiler/GHC/CmmToAsm/X86/CodeGen.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1965,17 +1965,6 @@ getRegister' platform is32Bit (CmmLit lit)
 getRegister' platform is32Bit (CmmLit lit) = do
   avx <- avxEnabled
 
-  -- NB: it is important that the code produced here (to load a literal into
-  -- a register) doesn't clobber any registers other than the destination
-  -- register; the code for generating C calls relies on this property.
-  --
-  -- In particular, we have:
-  --
-  -- > loadIntoRegMightClobberOtherReg (CmmLit _) = False
-  --
-  -- which means that we assume that loading a literal into a register
-  -- will not clobber any other registers.
-
   -- TODO: this function mishandles floating-point negative zero,
   -- because -0.0 == 0.0 returns True and because we represent CmmFloat as
   -- Rational, which can't properly represent negative zero.
@@ -3091,8 +3080,10 @@ genSimplePrim _   op                   dst     args           = do
   platform <- ncgPlatform <$> getConfig
   pprPanic "genSimplePrim: unhandled primop" (ppr (pprCallishMachOp op, dst, fmap (pdoc platform) args))
 
-{- Note [Evaluate C-call arguments before placing in destination registers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{-
+Note [Evaluate C-call arguments before placing in destination registers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 When producing code for C calls we must take care when placing arguments
 in their final registers. Specifically, we must ensure that temporary register
 usage due to evaluation of one argument does not clobber a register in which we
@@ -3143,11 +3134,15 @@ genForeignCall{32,64}.
 -- | See Note [Evaluate C-call arguments before placing in destination registers]
 evalArgs :: BlockId -> [CmmActual] -> NatM (InstrBlock, [CmmActual])
 evalArgs bid actuals
-  | any loadIntoRegMightClobberOtherReg actuals = do
+  | any mightContainMachOp actuals = do
       regs_blks <- mapM evalArg actuals
       return (concatOL $ map fst regs_blks, map snd regs_blks)
   | otherwise = return (nilOL, actuals)
   where
+    mightContainMachOp (CmmReg _)      = False
+    mightContainMachOp (CmmRegOff _ _) = False
+    mightContainMachOp (CmmLit _)      = False
+    mightContainMachOp _               = True
 
     evalArg :: CmmActual -> NatM (InstrBlock, CmmExpr)
     evalArg actual = do
@@ -3161,16 +3156,6 @@ evalArgs bid actuals
     newLocalReg :: CmmType -> NatM LocalReg
     newLocalReg ty = LocalReg <$> getUniqueM <*> pure ty

--- | Might the code to put this expression into a register
--- clobber any other registers?
-loadIntoRegMightClobberOtherReg :: CmmExpr -> Bool
-loadIntoRegMightClobberOtherReg (CmmReg _)      = False
-loadIntoRegMightClobberOtherReg (CmmRegOff _ _) = False
-loadIntoRegMightClobberOtherReg (CmmLit _)      = False
-  -- NB: this last 'False' is slightly risky, because the code for loading
-  -- a literal into a register is not entirely trivial.
-loadIntoRegMightClobberOtherReg _               = True
-
 -- Note [DIV/IDIV for bytes]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~
 -- IDIV reminder:
@@ -3245,9 +3230,10 @@ genCCall
   -> NatM InstrBlock
 genCCall bid addr conv@(ForeignConvention _ argHints _ _) dest_regs args = do
   platform <- getPlatform
-  let args_hints = zip args (argHints ++ repeat NoHint)
-      prom_args = map (maybePromoteCArg platform W32) args_hints
   is32Bit <- is32BitPlatform
+  let args_hints = zip args (argHints ++ repeat NoHint)
+      prom_width = if is32Bit then W32 else W64
+      prom_args = map (maybePromoteCArg platform prom_width) args_hints
   (instrs0, args') <- evalArgs bid prom_args
   instrs1 <- if is32Bit
     then genCCall32 addr conv dest_regs args'
@@ -3256,12 +3242,17 @@ genCCall bid addr conv@(ForeignConvention _ argHints _ _) dest_regs args = do
 
 maybePromoteCArg :: Platform -> Width -> (CmmExpr, ForeignHint) -> CmmExpr
 maybePromoteCArg platform wto (arg, hint)
- | wfrom < wto = case hint of
-     SignedHint -> CmmMachOp (MO_SS_Conv wfrom wto) [arg]
-     _          -> CmmMachOp (MO_UU_Conv wfrom wto) [arg]
+ | wfrom < wto =
+    if isFloatType ty
+    then CmmMachOp (MO_FF_Conv wfrom wto) [arg]
+    else
+     case hint of
+       SignedHint -> CmmMachOp (MO_SS_Conv wfrom wto) [arg]
+       _          -> CmmMachOp (MO_UU_Conv wfrom wto) [arg]
  | otherwise   = arg
  where
-   wfrom = cmmExprWidth platform arg
+   ty = cmmExprType platform arg
+   wfrom = typeWidth ty
 
 genCCall32 :: CmmExpr           -- ^ address of the function to call
            -> ForeignConvention -- ^ calling convention
@@ -3431,6 +3422,7 @@ genCCall64 addr conv dest_regs args = do
       { stackArgs       = proper_stack_args
       , stackDataArgs   = stack_data_args
       , usedRegs        = arg_regs_used
+      , computeArgsCode = compute_args_code
       , assignArgsCode  = assign_args_code
       }
       <- loadArgs config args
@@ -3529,6 +3521,7 @@ genCCall64 addr conv dest_regs args = do
 
     return (align_call_code     `appOL`
             push_code           `appOL`
+            compute_args_code   `appOL`
             assign_args_code    `appOL`
             load_data_refs      `appOL`
             shadow_space_code   `appOL`
@@ -3549,14 +3542,16 @@ data LoadArgs
   , stackDataArgs :: [CmmExpr]
   -- | Which registers are we using for argument passing?
   , usedRegs      :: [RegWithFormat]
+  -- | The code to compute arguments into (possibly temporary) registers.
+  , computeArgsCode :: InstrBlock
   -- | The code to assign arguments to registers used for argument passing.
   , assignArgsCode :: InstrBlock
   }
 instance Semigroup LoadArgs where
-  LoadArgs a1 d1 r1 j1 <> LoadArgs a2 d2 r2 j2
-    = LoadArgs (a1 ++ a2) (d1 ++ d2) (r1 ++ r2) (j1 S.<> j2)
+  LoadArgs a1 d1 r1 i1 j1 <> LoadArgs a2 d2 r2 i2 j2
+    = LoadArgs (a1 ++ a2) (d1 ++ d2) (r1 ++ r2) (i1 S.<> i2) (j1 S.<> j2)
 instance Monoid LoadArgs where
-  mempty = LoadArgs [] [] [] nilOL
+  mempty = LoadArgs [] [] [] nilOL nilOL
 
 -- | An argument passed on the stack, either directly or by reference.
 --
@@ -3715,6 +3710,7 @@ loadArgsSysV config (arg:rest) = do
           LoadArgs
             { stackArgs       = map RawStackArg (arg:rest)
             , stackDataArgs   = []
+            , computeArgsCode = nilOL
             , assignArgsCode  = nilOL
             , usedRegs        = []
             }
@@ -3734,11 +3730,12 @@ loadArgsSysV config (arg:rest) = do
     this_arg <-
       case mbReg of
         Just reg -> do
-          assign_code <- lift $ loadArgIntoReg arg reg
+          (compute_code, assign_code) <- lift $ loadArgIntoReg config arg rest reg
           return $
             LoadArgs
                 { stackArgs       = [] -- passed in register
                 , stackDataArgs   = []
+                , computeArgsCode = compute_code
                 , assignArgsCode  = assign_code
                 , usedRegs        = [RegWithFormat reg arg_fmt]
                 }
@@ -3748,6 +3745,7 @@ loadArgsSysV config (arg:rest) = do
             LoadArgs
                 { stackArgs       = [RawStackArg arg]
                 , stackDataArgs   = []
+                , computeArgsCode = nilOL
                 , assignArgsCode  = nilOL
                 , usedRegs        = []
                 }
@@ -3799,6 +3797,7 @@ loadArgsWin config (arg:rest) = do
         LoadArgs
           { stackArgs       = stk_args
           , stackDataArgs   = data_args
+          , computeArgsCode = nilOL
           , assignArgsCode  = nilOL
           , usedRegs        = []
           }
@@ -3814,7 +3813,8 @@ loadArgsWin config (arg:rest) = do
                 -- Pass the reference in a register,
                 -- and the argument data on the stack.
                 { stackArgs       = [RawStackArgRef (InReg ireg) (argSize platform arg)]
-                , stackDataArgs   = [arg] -- we don't yet know where the data will reside,
+                , stackDataArgs   = [arg]
+                , computeArgsCode = nilOL -- we don't yet know where the data will reside,
                 , assignArgsCode  = nilOL -- so we defer computing the reference and storing it
                                           -- in the register until later
                 , usedRegs        = [RegWithFormat ireg II64]
@@ -3825,7 +3825,7 @@ loadArgsWin config (arg:rest) = do
                   = freg
                   | otherwise
                   = ireg
-           assign_code <- loadArgIntoReg arg arg_reg
+           (compute_code, assign_code) <- loadArgIntoReg config arg rest arg_reg
            -- Recall that, for varargs, we must pass floating-point
            -- arguments in both fp and integer registers.
            let (assign_code', regs')
@@ -3838,23 +3838,42 @@ loadArgsWin config (arg:rest) = do
              LoadArgs
                { stackArgs       = [] -- passed in register
                , stackDataArgs   = []
+               , computeArgsCode = compute_code
                , assignArgsCode = assign_code'
                , usedRegs = regs'
                }
 
--- | Load an argument into a register.
+
+-- | Return two pieces of code:
 --
--- Assumes that the expression does not contain any MachOps,
--- as per Note [Evaluate C-call arguments before placing in destination registers].
-loadArgIntoReg :: CmmExpr -> Reg -> NatM InstrBlock
-loadArgIntoReg arg reg = do
-  when (debugIsOn && loadIntoRegMightClobberOtherReg arg) $ do
-    platform <- getPlatform
-    massertPpr False $
-      vcat [ text "loadArgIntoReg: arg might contain MachOp"
-           , text "arg:" <+> pdoc platform arg ]
-  arg_code <- getAnyReg arg
-  return $ arg_code reg
+--  - code to compute a the given 'CmmExpr' into some (possibly temporary) register
+--  - code to assign the resulting value to the specified register
+--
+-- Using two separate pieces of code handles clobbering issues reported
+-- in e.g. #11792, #12614.
+loadArgIntoReg :: NCGConfig -> CmmExpr -> [CmmExpr] -> Reg -> NatM (InstrBlock, InstrBlock)
+loadArgIntoReg config arg rest reg
+  -- "operand" args can be directly assigned into the register
+  | isOperand platform arg
+  = do arg_code <- getAnyReg arg
+       return (nilOL, arg_code reg)
+  -- The last non-operand arg can be directly assigned after its
+  -- computation without going into a temporary register
+  | all (isOperand platform) rest
+  = do arg_code <- getAnyReg arg
+       return (arg_code reg, nilOL)
+  -- Other args need to be computed beforehand to avoid clobbering
+  -- previously assigned registers used to pass parameters (see
+  -- #11792, #12614). They are assigned into temporary registers
+  -- and get assigned to proper call ABI registers after they all
+  -- have been computed.
+  | otherwise
+  = do arg_code <- getAnyReg arg
+       tmp      <- getNewRegNat arg_fmt
+       return (arg_code tmp, unitOL $ mkRegRegMoveInstr config arg_fmt tmp reg)
+  where
+    platform = ncgPlatform config
+    arg_fmt = cmmTypeFormat $ cmmExprType platform arg
 
 -- -----------------------------------------------------------------------------
 -- Pushing arguments onto the stack for 64-bit C calls.
@@ -3885,23 +3904,39 @@ addStackPadding pad_bytes
 --
 -- Assumes the current stack pointer fulfills any necessary alignment requirements.
 pushArgByValue :: NCGConfig -> CmmExpr -> NatM InstrBlock
-pushArgByValue config arg = do
-  (arg_reg, arg_code) <- getSomeReg arg
-  delta <- getDeltaNat
-  setDeltaNat (delta-arg_size)
-  let fmt = cmmTypeFormat arg_rep
-  return $ arg_code `appOL` toOL
-     [ SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp)
-     , DELTA (delta-arg_size)
-     -- NB: it's important to only move the actual size,
-     -- e.g. for W8 width we must only move 8 bits and not 64.
-     -- Suppose for example the argument is (CmmLoad addr W8 aln);
-     -- then we must make sure not to try to read more than 8 bits from 'addr'.
-     , movInstr config fmt (OpReg arg_reg) (OpAddr (spRel platform 0)) ]
+pushArgByValue config arg
+   | isIntFormat fmt
+   = do
+     -- Arguments have been promoted to 64-bits wide, so we can use @PUSH II64 at .
+     -- (Recall that both the Windows and System V 64-bit C calling conventions
+     -- expect all arguments to be 8-byte aligned.)
+     --
+     -- Note that promotion is crucial. Otherwise, if we tried to push an
+     -- argument such as @CmmLoad addr W32 aln@, we could end up reading
+     -- unmapped memory and segfaulting.
+     (arg_op, arg_code) <- getOperand arg
+     delta <- getDeltaNat
+     setDeltaNat (delta-arg_size)
+     return $
+       arg_code `appOL` toOL
+       [ PUSH II64 arg_op
+       , DELTA (delta-arg_size) ]
+
+   | otherwise
+   = do
+     (arg_reg, arg_code) <- getSomeReg arg
+     delta <- getDeltaNat
+     setDeltaNat (delta-arg_size)
+     return $ arg_code `appOL` toOL
+        [ SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp)
+        , DELTA (delta-arg_size)
+        , movInstr config fmt (OpReg arg_reg) (OpAddr (spRel platform 0)) ]
+
     where
       platform = ncgPlatform config
       arg_size = argSize platform arg
       arg_rep = cmmExprType platform arg
+      fmt = cmmTypeFormat arg_rep
 
 -- | Load an argument into a register or push it to the stack.
 loadOrPushArg :: NCGConfig -> (StackArg, Maybe Int) -> NatM (InstrBlock, InstrBlock)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/13c949c5140994c7499551f105d8d7c85315470f...7f962063eeb6676ae559d88146b1a2d7f4950596

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/13c949c5140994c7499551f105d8d7c85315470f...7f962063eeb6676ae559d88146b1a2d7f4950596
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240921/7cc246cd/attachment-0001.html>