[Git][ghc/ghc][wip/angerman/aarch64-ncg] 16 commits: Revert "Overflow guard"
Moritz Angermann
gitlab at gitlab.haskell.org
Tue Jul 14 12:06:00 UTC 2020
Moritz Angermann pushed to branch wip/angerman/aarch64-ncg at Glasgow Haskell Compiler / GHC
Commits:
566e5330 by Moritz Angermann at 2020-07-12T00:00:00+00:00
Revert "Overflow guard"
They are Integers not Ints.
This reverts commit 3ef94e593a2848cf2bdc4251f5be34536642675f.
Signed-off-by: Moritz Angermann <moritz.angermann at gmail.com>
- - - - -
b3902d8c by Moritz Angermann at 2020-07-12T00:00:00+00:00
Add CmmAssign and CmmStore comments
- - - - -
1670ab82 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Minor address mode changes
- - - - -
9b8edfa6 by Moritz Angermann at 2020-07-13T00:00:00+00:00
More Amode optimizations
- - - - -
ba1fd295 by Moritz Angermann at 2020-07-13T00:00:00+00:00
I think this shoudl work for all Registers, not just CmmGlobal
- - - - -
cee1984b by Moritz Angermann at 2020-07-13T00:00:00+00:00
Opt <<, >>
- - - - -
8f290367 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Opt &&, ||
- - - - -
590eec2c by Moritz Angermann at 2020-07-13T00:00:00+00:00
Add branch ANNotations.
- - - - -
d5835570 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Disable Opt &&, ||, due to mask immediate
- - - - -
0037d5d5 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Opt: Adds CBZ, CBNZ
- - - - -
244e836e by Moritz Angermann at 2020-07-14T00:00:00+00:00
More generic CBZ, CBNZ
- - - - -
1590c456 by Moritz Angermann at 2020-07-14T00:00:00+00:00
Fixup
- - - - -
eb22b846 by Moritz Angermann at 2020-07-14T00:00:00+00:00
very rudimentary bitmask support.
- - - - -
8a45c408 by Moritz Angermann at 2020-07-14T00:00:00+00:00
Add some more bitmasks
- - - - -
0bdf6cb2 by Moritz Angermann at 2020-07-14T00:00:00+00:00
Opt STR
- - - - -
fe3aeaff by Moritz Angermann at 2020-07-14T00:00:00+00:00
Fixup
- - - - -
3 changed files:
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -2,7 +2,7 @@
{-# language GADTs #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}
-
+{-# LANGUAGE BinaryLiterals, NumericUnderscores #-}
module GHC.CmmToAsm.AArch64.CodeGen (
cmmTopCodeGen
, generateJumpTableForInstr
@@ -427,12 +427,10 @@ getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
-- OPTIMIZATION WARNING: CmmExpr rewrites
-- 1. Rewrite: Reg + (-n) => Reg - n
-- XXX: this expression souldn't even be generated to begin with.
--- NOTE: i /= (minBound::Int) is for overflow checking. As negating woudl case
--- an overflow.
-getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0 && i /= (minBound::Int)
+getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
= getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
-getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0 && i /= (minBound::Int)
+getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
= getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
@@ -545,7 +543,7 @@ getRegister' config plat expr
CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (ppr expr)
CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (ppr expr)
CmmLoad mem rep -> do
- Amode addr addr_code <- getAmode mem
+ Amode addr addr_code <- getAmode plat mem
let format = cmmTypeFormat rep
return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr)))
CmmStackSlot _ _
@@ -610,16 +608,40 @@ getRegister' config plat expr
CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
-- 1. Compute Reg +/- n directly.
-- For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
- CmmMachOp (MO_Add w) [(CmmReg reg@(CmmGlobal _)), CmmLit (CmmInt n _)]
+ CmmMachOp (MO_Add w) [(CmmReg reg), CmmLit (CmmInt n _)]
| n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
-- XXX: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
- -- OPTIMIZATION WARNING: This only works because reg is CmmGlobal
where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
r' = getRegisterReg plat reg
- CmmMachOp (MO_Sub w) [(CmmReg reg@(CmmGlobal _)), CmmLit (CmmInt n _)]
+ CmmMachOp (MO_Sub w) [(CmmReg reg), CmmLit (CmmInt n _)]
| n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
-- XXX: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
- -- OPTIMIZATION WARNING: This only works because reg is CmmGlobal
+ where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+ r' = getRegisterReg plat reg
+
+ -- 2. Shifts. x << n, x >> n.
+ CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+ CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+ CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+ CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+ -- 3. Logic &&, ||
+ CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+ return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+ where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+ r' = getRegisterReg plat reg
+
+ CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+ return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
r' = getRegisterReg plat reg
@@ -773,15 +795,64 @@ getRegister' config plat expr
is16bit i = (-1 `shiftL` 15) <= i && i < (1 `shiftL` 15)
is32bit :: Integer -> Bool
is32bit i = (-1 `shiftL` 31) <= i && i < (1 `shiftL` 31)
+ -- This needs to check if n can be encoded as a bitmask immediate:
+ --
+ -- See https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly
+ --
+ isBitMaskImmediate :: Integer -> Bool
+ isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000
+ ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000
+ ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000
+ ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000
+ ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000
+ ,0b0011_1111, 0b0111_1110, 0b1111_1100
+ ,0b0111_1111, 0b1111_1110
+ ,0b1111_1111]
+
-- -----------------------------------------------------------------------------
-- The 'Amode' type: Memory addressing modes passed up the tree.
data Amode = Amode AddrMode InstrBlock
-getAmode :: CmmExpr -> NatM Amode
+getAmode :: Platform -> CmmExpr -> NatM Amode
-- XXX: Specialize stuff we can destructure here.
+
+-- OPTIMIZATION WARNING: Addressing modes.
+-- Addressing options:
+-- LDUR/STUR: imm9: -256 - 255
+getAmode platform (CmmRegOff reg off) | -256 <= off, off <= 255
+ = return $ Amode (AddrRegImm reg' off') nilOL
+ where reg' = getRegisterReg platform reg
+ off' = ImmInt off
+-- LDR/STR: imm12: if reg is 32bit: 0 -- 16380 in multiples of 4
+getAmode platform (CmmRegOff reg off)
+ | typeWidth (cmmRegType platform reg) == W32, 0 <= off, off <= 16380, off `mod` 4 == 0
+ = return $ Amode (AddrRegImm reg' off') nilOL
+ where reg' = getRegisterReg platform reg
+ off' = ImmInt off
+-- LDR/STR: imm12: if reg is 64bit: 0 -- 32760 in multiples of 8
+getAmode platform (CmmRegOff reg off)
+ | typeWidth (cmmRegType platform reg) == W64, 0 <= off, off <= 32760, off `mod` 8 == 0
+ = return $ Amode (AddrRegImm reg' off') nilOL
+ where reg' = getRegisterReg platform reg
+ off' = ImmInt off
+
+-- For Stores we often see something like this:
+-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2)
+-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ]
+-- for `n` in range.
+getAmode platform (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
+ | -256 <= off, off <= 255
+ = do (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrRegImm reg (ImmInteger off)) code
+
+getAmode platform (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
+ | -256 <= -off, -off <= 255
+ = do (reg, _format, code) <- getSomeReg expr
+ return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
+
-- Generic case
-getAmode expr
+getAmode _plat expr
= do (reg, _format, code) <- getSomeReg expr
return $ Amode (AddrReg reg) code
@@ -806,14 +877,13 @@ assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
assignMem_IntCode rep addrE srcE
= do
(src_reg, _format, code) <- getSomeReg srcE
- Amode addr addr_code <- getAmode addrE
+ platform <- getPlatform
+ Amode addr addr_code <- getAmode platform addrE
let AddrReg r1 = addr
- return $ unitOL (COMMENT $ text "RHS:" <+> ppr srcE)
- `appOL` code
- `appOL` unitOL (COMMENT $ text "LHS:" <+> ppr addrE)
+ return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
+ `consOL` (code
`appOL` addr_code
- `snocOL` COMMENT (text "Store:" <+> ppr r1 <+> text "<-" <+> ppr src_reg)
- `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr)
+ `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr))
assignReg_IntCode _ reg src
= do
@@ -823,8 +893,8 @@ assignReg_IntCode _ reg src
p = showSDocUnsafe . ppr
r <- getRegister src
return $ case r of
- Any _ code -> code dst
- Fixed format freg fcode -> fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
+ Any _ code -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst
+ Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg))
-- Let's treat Floating point stuff
-- as integer code for now. Opaque.
@@ -843,7 +913,7 @@ genJump (CmmLit (CmmLabel lbl)) regs
-- , DELTA 0] )
genJump expr regs = do
(target, _format, code) <- getSomeReg expr
- return (code `appOL` unitOL (B (TReg target))
+ return (code `appOL` unitOL (ANN (text $ show expr) (B (TReg target)))
-- toOL [ PUSH_STACK_FRAME
-- , DELTA (-16)
-- , B (TReg target)
@@ -863,20 +933,31 @@ genCondJump
:: BlockId
-> CmmExpr
-> NatM InstrBlock
-genCondJump bid expr
- = case expr of
+genCondJump bid expr = do
+ case expr of
+ -- Optimized == 0 case.
+ CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ code_x `snocOL` (ANN (text $ show expr) (CBZ (OpReg w reg_x) (TBlock bid)))
+
+ -- Optimized /= 0 case.
+ CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
+ (reg_x, _format_x, code_x) <- getSomeReg x
+ return $ code_x `snocOL` (ANN (text $ show expr) (CBNZ (OpReg w reg_x) (TBlock bid)))
+
+ -- Generic case.
CmmMachOp mop [x, y] -> do
let bcond w cmp = do
-- compute both sides.
(reg_x, _format_x, code_x) <- getSomeReg x
(reg_y, _format_y, code_y) <- getSomeReg y
- return $ code_x `appOL` code_y `snocOL` CMP (OpReg w reg_x) (OpReg w reg_y) `snocOL` BCOND cmp (TBlock bid)
+ return $ code_x `appOL` code_y `snocOL` CMP (OpReg w reg_x) (OpReg w reg_y) `snocOL` (ANN (text $ show expr) (BCOND cmp (TBlock bid)))
fbcond w cmp = do
-- ensure we get float regs
(reg_fx, _format_fx, code_fx) <- getFloatReg x
(reg_fy, _format_fy, code_fy) <- getFloatReg y
- return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` BCOND cmp (TBlock bid)
+ return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` (ANN (text $ show expr) (BCOND cmp (TBlock bid)))
case mop of
MO_F_Eq w -> fbcond w EQ
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -134,6 +134,8 @@ aarch64_regUsageOfInstr platform instr = case instr of
-- 5. Atomic Instructions ----------------------------------------------------
-- 6. Conditional Instructions -----------------------------------------------
CSET dst _ -> usage ([], regOp dst)
+ CBZ src _ -> usage (regOp src, [])
+ CBNZ src _ -> usage (regOp src, [])
-- 7. Load and Store Instructions --------------------------------------------
STR _ src dst -> usage (regOp src ++ regOp dst, [])
LDR _ dst src -> usage (regOp src, regOp dst)
@@ -257,7 +259,8 @@ aarch64_patchRegsOfInstr instr env = case instr of
-- 5. Atomic Instructions --------------------------------------------------
-- 6. Conditional Instructions ---------------------------------------------
CSET o c -> CSET (patchOp o) c
-
+ CBZ o l -> CBZ (patchOp o) l
+ CBNZ o l -> CBNZ (patchOp o) l
-- 7. Load and Store Instructions ------------------------------------------
STR f o1 o2 -> STR f (patchOp o1) (patchOp o2)
LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2)
@@ -293,6 +296,8 @@ aarch64_patchRegsOfInstr instr env = case instr of
aarch64_isJumpishInstr :: Instr -> Bool
aarch64_isJumpishInstr instr = case instr of
ANN _ i -> aarch64_isJumpishInstr i
+ CBZ{} -> True
+ CBNZ{} -> True
B{} -> True
BL{} -> True
BCOND{} -> True
@@ -303,6 +308,8 @@ aarch64_isJumpishInstr instr = case instr of
-- register allocator needs to worry about.
aarch64_jumpDestsOfInstr :: Instr -> [BlockId]
aarch64_jumpDestsOfInstr (ANN _ i) = aarch64_jumpDestsOfInstr i
+aarch64_jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
+aarch64_jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
aarch64_jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
aarch64_jumpDestsOfInstr (BL t) = [ id | TBlock id <- [t]]
aarch64_jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]]
@@ -315,6 +322,8 @@ aarch64_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
aarch64_patchJumpInstr instr patchF
= case instr of
ANN d i -> ANN d (aarch64_patchJumpInstr i patchF)
+ CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid))
+ CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid))
B (TBlock bid) -> B (TBlock (patchF bid))
BL (TBlock bid) -> BL (TBlock (patchF bid))
BCOND c (TBlock bid) -> BCOND c (TBlock (patchF bid))
@@ -527,12 +536,14 @@ data Instr
| LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8)
-- Conditional instructions
- | CSET Operand Cond -- if(cond) op <- 1 else op <- 0
+ | CSET Operand Cond -- if(cond) op <- 1 else op <- 0
+ | CBZ Operand Target -- if op == 0, then branch.
+ | CBNZ Operand Target -- if op /= 0, then branch.
-- Branching.
- | B Target -- unconditional branching b/br. (To a blockid, label or register)
- | BL Target -- branch and link (e.g. set x30 to next pc, and branch)
- | BCOND Cond Target -- branch with condition. b.<cond>
+ | B Target -- unconditional branching b/br. (To a blockid, label or register)
+ | BL Target -- branch and link (e.g. set x30 to next pc, and branch)
+ | BCOND Cond Target -- branch with condition. b.<cond>
-- 8. Synchronization Instructions -----------------------------------------
| DMBSY
=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -435,6 +435,14 @@ pprInstr platform instr = case instr of
-- 6. Conditional Instructions -----------------------------------------------
CSET o c -> text "\tcset" <+> pprOp o <> comma <+> pprCond c
+ CBZ o (TBlock bid) -> text "\tcbz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
+ CBZ o (TLabel lbl) -> text "\tcbz" <+> pprOp o <> comma <+> ppr lbl
+ CBZ c (TReg r) -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+
+ CBNZ o (TBlock bid) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
+ CBNZ o (TLabel lbl) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr lbl
+ CBNZ c (TReg r) -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+
-- 7. Load and Store Instructions --------------------------------------------
-- NOTE: GHC may do whacky things where it only load the lower part of an
-- address. Not observing the correct size when loading will lead
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/ce9cbc92c945c350dc3cc605bc7b78c97a133719...fe3aeaff2444881ef30adb9a82870f81a8880e7e
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/ce9cbc92c945c350dc3cc605bc7b78c97a133719...fe3aeaff2444881ef30adb9a82870f81a8880e7e
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200714/d599b73d/attachment-0001.html>
More information about the ghc-commits
mailing list