[Git][ghc/ghc][wip/supersven/ghc-9.10-riscv-ncg] 4 commits: Comments and formatting
Sven Tennie (@supersven)
gitlab at gitlab.haskell.org
Fri Jul 19 11:28:23 UTC 2024
Sven Tennie pushed to branch wip/supersven/ghc-9.10-riscv-ncg at Glasgow Haskell Compiler / GHC
Commits:
0fc9f7e8 by Sven Tennie at 2024-07-19T12:19:24+02:00
Comments and formatting
- - - - -
832ce6aa by Sven Tennie at 2024-07-19T12:20:36+02:00
De-obfuscate immediate creation a bit
- - - - -
bf05a0a5 by Sven Tennie at 2024-07-19T12:21:08+02:00
Fix C Calling Convention
Arguments are passed as whole words on the stack!
- - - - -
d691cd4f by Sven Tennie at 2024-07-19T12:55:26+02:00
Cleanup comments
- - - - -
2 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/RV64/CodeGen.hs
=====================================
@@ -372,8 +372,8 @@ getFloatReg expr = do
--
-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
-- representation.
-litToImm' :: CmmLit -> NatM (Operand, InstrBlock)
-litToImm' lit = return (OpImm (litToImm lit), nilOL)
+litToImm' :: CmmLit -> Operand
+litToImm' = OpImm . litToImm
-- | Compute a `CmmExpr` into a `Register`
getRegister :: CmmExpr -> NatM Register
@@ -384,10 +384,10 @@ getRegister e = do
-- | The register width to be used for an operation on the given width
-- operand.
opRegWidth :: Width -> Width
-opRegWidth W64 = W64 -- x
-opRegWidth W32 = W32 -- w
-opRegWidth W16 = W32 -- w
-opRegWidth W8 = W32 -- w
+opRegWidth W64 = W64
+opRegWidth W32 = W32
+opRegWidth W16 = W32
+opRegWidth W8 = W32
opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
-- Note [Signed arithmetic on RISCV64]
@@ -428,17 +428,16 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
-- Craig Topper covers possible future improvements
-- (https://llvm.org/devmtg/2022-11/slides/TechTalk21-RISC-VSignExtensionOptimizations.pdf)
--
--- TODO:
--- Don't use Width in Operands
--- Instructions should rather carry a RegWidth
--
-- Note [Handling PIC on RV64]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- RV64 does not have a special PIC register, the general approach is to
--- simply go through the GOT, and there is assembly support for this:
+-- RV64 does not have a special PIC register, the general approach is to simply
+-- do PC-relative addressing or go through the GOT. There is assembly support
+-- for both.
--
-- rv64 assembly has a `la` (load address) pseudo-instruction, that allows
--- loading a label, ... into a register. The instruction is desugared into
+-- loading a label's address into a register. The instruction is desugared into
+-- different addressing modes, e.g. PC-relative addressing:
--
-- 1: lui rd1, %pcrel_hi(label)
-- addi rd1, %pcrel_lo(1b)
@@ -462,12 +461,12 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
--
getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
+
-- OPTIMIZATION WARNING: CmmExpr rewrites
-- 1. Rewrite: Reg + (-n) => Reg - n
-- TODO: this expression shouldn't even be generated to begin with.
getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
= getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
-
getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
= getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
@@ -475,6 +474,7 @@ getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i <
getRegister' config plat expr =
case expr of
CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _)) ->
+ -- See Note [Handling PIC on RV64]
pprPanic "getRegister': There's no PIC base register on RISCV" (ppr PicBaseReg)
CmmLit lit ->
@@ -487,10 +487,9 @@ getRegister' config plat expr =
in
pure (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) imm)))
- -- floatToBytes (fromRational f)
CmmFloat 0 w -> do
- (op, imm_code) <- litToImm' lit
- return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op)))
+ let op = litToImm' lit
+ pure (Any (floatFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg w dst) op)))
CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
@@ -509,26 +508,26 @@ getRegister' config plat expr =
, MOV (OpReg W64 dst) (OpReg W64 tmp)
]))
CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr)
- CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
- CmmLabel _lbl -> do
- (op, imm_code) <- litToImm' lit
- let rep = cmmLitType plat lit
+ CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
+ CmmLabel lbl -> do
+ let op = OpImm (ImmCLbl lbl)
+ rep = cmmLitType plat lit
format = cmmTypeFormat rep
- return (Any format (\dst -> imm_code `snocOL` annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
+ return (Any format (\dst -> unitOL $ annExpr expr (LDR format (OpReg (formatToWidth format) dst) op)))
- CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
- (op, imm_code) <- litToImm' lit
- let rep = cmmLitType plat lit
+ CmmLabelOff lbl off | isNbitEncodeable 12 (fromIntegral off) -> do
+ let op = OpImm (ImmIndex lbl off)
+ rep = cmmLitType plat lit
format = cmmTypeFormat rep
- return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op))
+ return (Any format (\dst -> unitOL $ LDR format (OpReg (formatToWidth format) dst) op))
CmmLabelOff lbl off -> do
- (op, imm_code) <- litToImm' (CmmLabel lbl)
- let rep = cmmLitType plat lit
+ let op = litToImm' (CmmLabel lbl)
+ rep = cmmLitType plat lit
format = cmmTypeFormat rep
width = typeWidth rep
(off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width)
- return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)))
+ return (Any format (\dst -> off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r)))
CmmLabelDiffOff {} -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr)
@@ -875,16 +874,14 @@ getRegister' config plat expr =
-- Generic ternary case.
CmmMachOp op [x, y, z] ->
-
case op of
-- Floating-point fused multiply-add operations
-
- -- x86 fmadd x * y + z <=> AArch64 fmadd : d = r1 * r2 + r3
- -- x86 fmsub x * y - z <=> AArch64 fnmsub: d = r1 * r2 - r3
- -- x86 fnmadd - x * y + z <=> AArch64 fmsub : d = - r1 * r2 + r3
- -- x86 fnmsub - x * y - z <=> AArch64 fnmadd: d = - r1 * r2 - r3
-
+ --
+ -- x86 fmadd x * y + z <=> RISCV64 fmadd : d = r1 * r2 + r3
+ -- x86 fmsub x * y - z <=> RISCV64 fnmsub: d = r1 * r2 - r3
+ -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3
+ -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3
MO_FMA var w -> case var of
FMAdd -> float3Op w (\d n m a -> unitOL $ FMA FMAdd d n m a)
FMSub -> float3Op w (\d n m a -> unitOL $ FMA FMSub d n m a)
@@ -893,7 +890,6 @@ getRegister' config plat expr =
_ -> pprPanic "getRegister' (unhandled ternary CmmMachOp): " $
pprMachOp op <+> text "in" <+> pdoc plat expr
-
where
float3Op w op = do
(reg_fx, format_x, code_fx) <- getFloatReg x
@@ -901,7 +897,7 @@ getRegister' config plat expr =
(reg_fz, format_z, code_fz) <- getFloatReg z
massertPpr (isFloatFormat format_x && isFloatFormat format_y && isFloatFormat format_z) $
text "float3Op: non-float"
- return $
+ pure $
Any (floatFormat w) $ \ dst ->
code_fx `appOL`
code_fy `appOL`
@@ -1328,81 +1324,15 @@ genCondBranch true false expr =
-- -----------------------------------------------------------------------------
-- Generating C calls
--- Now the biggest nightmare---calls. Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations. Apart from that, the code is easy.
---
--- As per *convention*:
--- x0-x7: (volatile) argument registers
--- x8: (volatile) indirect result register / Linux syscall no
--- x9-x15: (volatile) caller saved regs
--- x16,x17: (volatile) intra-procedure-call registers
--- x18: (volatile) platform register. don't use for portability
--- x19-x28: (non-volatile) callee save regs
--- x29: (non-volatile) frame pointer
--- x30: link register
--- x31: stack pointer / zero reg
---
--- Thus, this is what a c function will expect. Find the arguments in x0-x7,
--- anything above that on the stack. We'll ignore c functions with more than
--- 8 arguments for now. Sorry.
---
--- We need to make sure we preserve x9-x15, don't want to touch x16, x17.
-
--- Note [PLT vs GOT relocations]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- When linking objects together, we may need to lookup foreign references. That
--- is symbolic references to functions or values in other objects. When
--- compiling the object, we can not know where those elements will end up in
--- memory (relative to the current location). Thus the use of symbols. There
--- are two types of items we are interested, code segments we want to jump to
--- and continue execution there (functions, ...), and data items we want to look
--- up (strings, numbers, ...). For functions we can use the fact that we can use
--- an intermediate jump without visibility to the programs execution. If we
--- want to jump to a function that is simply too far away to reach for the B/BL
--- instruction, we can create a small piece of code that loads the full target
--- address and jumps to that on demand. Say f wants to call g, however g is out
--- of range for a direct jump, we can create a function h in range for f, that
--- will load the address of g, and jump there. The area where we construct h
--- is called the Procedure Linking Table (PLT), we have essentially replaced
--- f -> g with f -> h -> g. This is fine for function calls. However if we
--- want to lookup values, this trick doesn't work, so we need something else.
--- We will instead reserve a slot in memory, and have a symbol pointing to that
--- slot. Now what we essentially do is, we reference that slot, and expect that
--- slot to hold the final resting address of the data we are interested in.
--- Thus what that symbol really points to is the location of the final data.
--- The block of memory where we hold all those slots is the Global Offset Table
--- (GOT). Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y].
---
--- FIXME: Update for RISCV, the below is still AArch64.
--- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only
--- have 19bits (+/- 1MB). Symbol lookups are also within +/- 1MB, thus for most
--- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within
--- 4GB of the PC, and load that. For anything outside of that range, we'd have
--- to go through the GOT.
---
--- adrp x0, <symbol>
--- add x0, :lo:<symbol>
---
--- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the
--- PC.
---
--- If we want to get the slot in the global offset table (GOT), we can do this:
---
--- adrp x0, #:got:<symbol>
--- ldr x0, [x0, #:got_lo12:<symbol>]
---
--- this will compute the address anywhere in the addressable 64bit space into
--- x0, by loading the address from the GOT slot.
---
--- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which
--- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>]
--- instead of the add instruction.
---
--- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do
--- not need to go through the GOT, unless we want to address the full address
--- range within 64bit.
+-- | Generate a call to a C function.
+--
+-- - Integer values are passed in GP registers a0-a7.
+-- - Floating point values are passed in FP registers fa0-fa7.
+-- - If there are no free floating point registers, the FP values are passed in GP registers.
+-- - If all GP registers are taken, the values are spilled as whole words (!) onto the stack.
+-- - For integers/words, the return value is in a0.
+-- - The return value is in fa0 if the return type is a floating point value.
genCCall
:: ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
@@ -1496,7 +1426,8 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
-- No mor regs left to pass. Must pass on stack.
passArguments [] [] ((r, format, hint, code_r) : args) stackSpaceWords accumRegs accumCode = do
let w = formatToWidth format
- str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpaceWords)))
+ spOffet = 8 * stackSpaceWords
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt spOffet)))
stackCode =
if hint == SignedHint
then
@@ -1511,7 +1442,8 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do
-- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then.
passArguments [] fpRegs ((r, format, _hint, code_r):args) stackSpaceWords accumRegs accumCode | isIntFormat format = do
let w = formatToWidth format
- str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt stackSpaceWords)))
+ spOffet = 8 * stackSpaceWords
+ str = STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 2) (ImmInt spOffet)))
stackCode = code_r `snocOL`
ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) str
passArguments [] fpRegs args (stackSpaceWords + 1) accumRegs (stackCode `appOL` accumCode)
=====================================
compiler/GHC/CmmToAsm/RV64/Regs.hs
=====================================
@@ -151,6 +151,10 @@ data Imm
| ImmConstantDiff Imm Imm
deriving (Eq, Show)
+-- | Map `CmmLit` to `Imm`
+--
+-- N.B. this is a partial function, because not all `CmmLit`s have an immediate
+-- representation.
litToImm :: CmmLit -> Imm
litToImm (CmmInt i w) = ImmInteger (narrowS w i)
-- narrow to the width: a CmmInt might be out of
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3b7865a75bbeb725b85bdfc718d5273f28726a8b...d691cd4f53f51c62c10adcbaf81af5d54a92c9da
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/3b7865a75bbeb725b85bdfc718d5273f28726a8b...d691cd4f53f51c62c10adcbaf81af5d54a92c9da
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240719/d47a3ce2/attachment-0001.html>
More information about the ghc-commits
mailing list