[Git][ghc/ghc][wip/supersven/ghc-9.10-riscv-ncg] 4 commits: Comments and formatting

Fri Jul 19 11:28:23 UTC 2024

Sven Tennie pushed to branch wip/supersven/ghc-9.10-riscv-ncg at Glasgow Haskell Compiler / GHC


Commits:
0fc9f7e8 by Sven Tennie at 2024-07-19T12:19:24+02:00
Comments and formatting

- - - - -
832ce6aa by Sven Tennie at 2024-07-19T12:20:36+02:00
De-obfuscate immediate creation a bit

- - - - -
bf05a0a5 by Sven Tennie at 2024-07-19T12:21:08+02:00
Fix C Calling Convention

Arguments are passed as whole words on the stack!

- - - - -
d691cd4f by Sven Tennie at 2024-07-19T12:55:26+02:00
Cleanup comments

- - - - -


2 changed files:

- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -372,8 +372,8 @@ getFloatReg expr = do
 --
 -- N.B. this is a partial function, because not all `CmmLit`s have an immediate
 -- representation.
-litToImm' :: CmmLit -> NatM (Operand, InstrBlock)
-litToImm' lit = return (OpImm (litToImm lit), nilOL)
+litToImm' :: CmmLit -> Operand
+litToImm' = OpImm . litToImm
 
 -- | Compute a `CmmExpr` into a `Register`
 getRegister :: CmmExpr -> NatM Register
@@ -384,10 +384,10 @@ getRegister e = do
 -- | The register width to be used for an operation on the given width
 -- operand.
 opRegWidth :: Width -> Width
-opRegWidth W64 = W64  -- x
-opRegWidth W32 = W32  -- w
-opRegWidth W16 = W32  -- w
-opRegWidth W8  = W32  -- w
+opRegWidth W64 = W64
+opRegWidth W32 = W32
+opRegWidth W16 = W32
+opRegWidth W8  = W32
 opRegWidth w   = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
 
 -- Note [Signed arithmetic on RISCV64]
@@ -428,17 +428,16 @@ opRegWidth w   = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
 -- Craig Topper covers possible future improvements
 -- (https://llvm.org/devmtg/2022-11/slides/TechTalk21-RISC-VSignExtensionOptimizations.pdf)
 --

--- TODO:
---   Don't use Width in Operands
---   Instructions should rather carry a RegWidth
 --
 -- Note [Handling PIC on RV64]
 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- RV64 does not have a special PIC register, the general approach is to
--- simply go through the GOT, and there is assembly support for this:
+-- RV64 does not have a special PIC register, the general approach is to simply
+-- do PC-relative addressing or go through the GOT. There is assembly support
+-- for both.
 --
 -- rv64 assembly has a `la` (load address) pseudo-instruction, that allows
--- loading a label, ... into a register.  The instruction is desugared into
+-- loading a label's address into a register. The instruction is desugared into
+-- different addressing modes, e.g. PC-relative addressing:
 --
 -- 1: lui  rd1, %pcrel_hi(label)
 --    addi rd1, %pcrel_lo(1b)
@@ -462,12 +461,12 @@ opRegWidth w   = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
 --
 
 getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
+
 -- OPTIMIZATION WARNING: CmmExpr rewrites
 -- 1. Rewrite: Reg + (-n) => Reg - n
 --    TODO: this expression shouldn't even be generated to begin with.
 getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
   = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
-
 getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
   = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
 
@@ -475,6 +474,7 @@ getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i <
 getRegister' config plat expr =
   case expr of
     CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _)) ->
+      -- See Note [Handling PIC on RV64]
       pprPanic "getRegister': There's no PIC base register on RISCV" (ppr PicBaseReg)
 
     CmmLit lit ->
@@ -487,10 +487,9 @@ getRegister' config plat expr =
                      in
                         pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm)))
 
-        -- floatToBytes (fromRational f)
         CmmFloat 0 w   -> do
-          (op, imm_code) <- litToImm' lit
-          return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op)))
+          let op = litToImm' lit
+          pure (Any (floatFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) op)))
 
         CmmFloat _f W8  -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
         CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
@@ -509,26 +508,26 @@ getRegister' config plat expr =
                                                       , MOV (OpReg W64 dst) (OpReg W64 tmp)
                                                       ]))
         CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
-        CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
-        CmmLabel _lbl -> do
-          (op, imm_code) <- litToImm' lit
-          let rep = cmmLitType plat lit
+        CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
+        CmmLabel lbl -> do
+          let op = OpImm (ImmCLbl lbl)
+              rep = cmmLitType plat lit
               format = cmmTypeFormat rep
-          return (Any format (\dst -> imm_code `snocOL` annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
+          return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
 
-        CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
-          (op, imm_code) <- litToImm' lit
-          let rep = cmmLitType plat lit
+        CmmLabelOff lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
+          let op = OpImm (ImmIndex lbl off)
+              rep = cmmLitType plat lit
               format = cmmTypeFormat rep
-          return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op))
+          return (Any format (\dst -> unitOL $ LDR format (OpReg (formatToWidth format) dst) op))
 
         CmmLabelOff lbl off -> do
-          (op, imm_code) <- litToImm' (CmmLabel lbl)
-          let rep = cmmLitType plat lit
+          let op = litToImm' (CmmLabel lbl)
+              rep = cmmLitType plat lit
               format = cmmTypeFormat rep
               width = typeWidth rep
           (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
-          return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)))
+          return (Any format (\dst -> off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)))
 
         CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
         CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
@@ -875,16 +874,14 @@ getRegister' config plat expr =
 
     -- Generic ternary case.
     CmmMachOp op [x, y, z] ->
-
       case op of
 
         -- Floating-point fused multiply-add operations
-
-        -- x86 fmadd    x * y + z <=> AArch64 fmadd : d =   r1 * r2 + r3
-        -- x86 fmsub    x * y - z <=> AArch64 fnmsub: d =   r1 * r2 - r3
-        -- x86 fnmadd - x * y + z <=> AArch64 fmsub : d = - r1 * r2 + r3
-        -- x86 fnmsub - x * y - z <=> AArch64 fnmadd: d = - r1 * r2 - r3
-
+        --
+        -- x86 fmadd    x * y + z <=> RISCV64 fmadd : d =   r1 * r2 + r3
+        -- x86 fmsub    x * y - z <=> RISCV64 fnmsub: d =   r1 * r2 - r3
+        -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3
+        -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3
         MO_FMA var w -> case var of
           FMAdd  -> float3Op w (\d n m a -> unitOL $ FMA FMAdd  d n m a)
           FMSub  -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
@@ -893,7 +890,6 @@ getRegister' config plat expr =
 
         _ -> pprPanic "getRegister' (unhandled ternary CmmMachOp): " $
                 pprMachOp op <+> text "in" <+> pdoc plat expr
-
       where
           float3Op w op = do
             (reg_fx, format_x, code_fx) <- getFloatReg x
@@ -901,7 +897,7 @@ getRegister' config plat expr =
             (reg_fz, format_z, code_fz) <- getFloatReg z
             massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z) $
               text "float3Op: non-float"
-            return $
+            pure $
               Any (floatFormat w) $ \ dst ->
                 code_fx `appOL`
                 code_fy `appOL`
@@ -1328,81 +1324,15 @@ genCondBranch true false expr =
 -- -----------------------------------------------------------------------------
 --  Generating C calls
 
--- Now the biggest nightmare---calls.  Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations.  Apart from that, the code is easy.
---
--- As per *convention*:
--- x0-x7:   (volatile) argument registers
--- x8:      (volatile) indirect result register / Linux syscall no
--- x9-x15:  (volatile) caller saved regs
--- x16,x17: (volatile) intra-procedure-call registers
--- x18:     (volatile) platform register. don't use for portability
--- x19-x28: (non-volatile) callee save regs
--- x29:     (non-volatile) frame pointer
--- x30:                    link register
--- x31:                    stack pointer / zero reg
---
--- Thus, this is what a c function will expect. Find the arguments in x0-x7,
--- anything above that on the stack.  We'll ignore c functions with more than
--- 8 arguments for now.  Sorry.
---
--- We need to make sure we preserve x9-x15, don't want to touch x16, x17.
-
--- Note [PLT vs GOT relocations]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- When linking objects together, we may need to lookup foreign references. That
--- is symbolic references to functions or values in other objects. When
--- compiling the object, we can not know where those elements will end up in
--- memory (relative to the current location). Thus the use of symbols. There
--- are two types of items we are interested, code segments we want to jump to
--- and continue execution there (functions, ...), and data items we want to look
--- up (strings, numbers, ...). For functions we can use the fact that we can use
--- an intermediate jump without visibility to the programs execution.  If we
--- want to jump to a function that is simply too far away to reach for the B/BL
--- instruction, we can create a small piece of code that loads the full target
--- address and jumps to that on demand. Say f wants to call g, however g is out
--- of range for a direct jump, we can create a function h in range for f, that
--- will load the address of g, and jump there. The area where we construct h
--- is called the Procedure Linking Table (PLT), we have essentially replaced
--- f -> g with f -> h -> g.  This is fine for function calls.  However if we
--- want to lookup values, this trick doesn't work, so we need something else.
--- We will instead reserve a slot in memory, and have a symbol pointing to that
--- slot. Now what we essentially do is, we reference that slot, and expect that
--- slot to hold the final resting address of the data we are interested in.
--- Thus what that symbol really points to is the location of the final data.
--- The block of memory where we hold all those slots is the Global Offset Table
--- (GOT).  Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y].
---
--- FIXME: Update for RISCV, the below is still AArch64.
--- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only
--- have 19bits (+/- 1MB).  Symbol lookups are also within +/- 1MB, thus for most
--- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within
--- 4GB of the PC, and load that.  For anything outside of that range, we'd have
--- to go through the GOT.
---
---  adrp x0, <symbol>
---  add x0, :lo:<symbol>
---
--- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the
--- PC.
---
--- If we want to get the slot in the global offset table (GOT), we can do this:
---
---   adrp x0, #:got:<symbol>
---   ldr x0, [x0, #:got_lo12:<symbol>]
---
--- this will compute the address anywhere in the addressable 64bit space into
--- x0, by loading the address from the GOT slot.
---
--- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which
--- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>]
--- instead of the add instruction.
---
--- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do
--- not need to go through the GOT, unless we want to address the full address
--- range within 64bit.
 
+-- | Generate a call to a C function.
+--
+-- - Integer values are passed in GP registers a0-a7.
+-- - Floating point values are passed in FP registers fa0-fa7.
+-- - If there are no free floating point registers, the FP values are passed in GP registers.
+-- - If all GP registers are taken, the values are spilled as whole words (!) onto the stack.
+-- - For integers/words, the return value is in a0.
+-- - The return value is in fa0 if the return type is a floating point value.
 genCCall
     :: ForeignTarget      -- function to call
     -> [CmmFormal]        -- where to put the result
@@ -1496,7 +1426,8 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
     -- No mor regs left to pass. Must pass on stack.
     passArguments [] [] ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode = do
       let w = formatToWidth format
-          str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpaceWords)))
+          spOffet = 8 * stackSpaceWords
+          str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt spOffet)))
           stackCode =
             if hint == SignedHint
               then
@@ -1511,7 +1442,8 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
     -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then.
     passArguments [] fpRegs ((r, format, _hint, code_r):args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
       let w = formatToWidth format
-          str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpaceWords)))
+          spOffet = 8 * stackSpaceWords
+          str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt spOffet)))
           stackCode = code_r `snocOL`
                       ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
       passArguments [] fpRegs args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode)


=====================================
compiler/GHC/CmmToAsm/RV64/Regs.hs
=====================================
@@ -151,6 +151,10 @@ data Imm
   | ImmConstantDiff Imm Imm
   deriving (Eq, Show)
 
+-- | Map `CmmLit` to `Imm`
+--
+-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
+-- representation.
 litToImm :: CmmLit -> Imm
 litToImm (CmmInt i w) = ImmInteger (narrowS w i)
 -- narrow to the width: a CmmInt might be out of



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3b7865a75bbeb725b85bdfc718d5273f28726a8b...d691cd4f53f51c62c10adcbaf81af5d54a92c9da

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3b7865a75bbeb725b85bdfc718d5273f28726a8b...d691cd4f53f51c62c10adcbaf81af5d54a92c9da
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240719/d47a3ce2/attachment-0001.html>