[Git][ghc/ghc][wip/angerman/aarch64-ncg] 16 commits: Revert "Overflow guard"

Tue Jul 14 12:06:00 UTC 2020

Moritz Angermann pushed to branch wip/angerman/aarch64-ncg at Glasgow Haskell Compiler / GHC


Commits:
566e5330 by Moritz Angermann at 2020-07-12T00:00:00+00:00
Revert "Overflow guard"

They are Integers not Ints.

This reverts commit 3ef94e593a2848cf2bdc4251f5be34536642675f.

Signed-off-by: Moritz Angermann <moritz.angermann at gmail.com>

- - - - -
b3902d8c by Moritz Angermann at 2020-07-12T00:00:00+00:00
Add CmmAssign and CmmStore comments

- - - - -
1670ab82 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Minor address mode changes

- - - - -
9b8edfa6 by Moritz Angermann at 2020-07-13T00:00:00+00:00
More Amode optimizations

- - - - -
ba1fd295 by Moritz Angermann at 2020-07-13T00:00:00+00:00
I think this shoudl work for all Registers, not just CmmGlobal

- - - - -
cee1984b by Moritz Angermann at 2020-07-13T00:00:00+00:00
Opt <<, >>

- - - - -
8f290367 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Opt &&, ||

- - - - -
590eec2c by Moritz Angermann at 2020-07-13T00:00:00+00:00
Add branch ANNotations.

- - - - -
d5835570 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Disable Opt &&, ||, due to mask immediate

- - - - -
0037d5d5 by Moritz Angermann at 2020-07-13T00:00:00+00:00
Opt: Adds CBZ, CBNZ

- - - - -
244e836e by Moritz Angermann at 2020-07-14T00:00:00+00:00
More generic CBZ, CBNZ

- - - - -
1590c456 by Moritz Angermann at 2020-07-14T00:00:00+00:00
Fixup

- - - - -
eb22b846 by Moritz Angermann at 2020-07-14T00:00:00+00:00
very rudimentary bitmask support.

- - - - -
8a45c408 by Moritz Angermann at 2020-07-14T00:00:00+00:00
Add some more bitmasks

- - - - -
0bdf6cb2 by Moritz Angermann at 2020-07-14T00:00:00+00:00
Opt STR

- - - - -
fe3aeaff by Moritz Angermann at 2020-07-14T00:00:00+00:00
Fixup

- - - - -


3 changed files:

- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -2,7 +2,7 @@
 {-# language GADTs #-}
 {-# LANGUAGE TupleSections #-}
 {-# LANGUAGE BangPatterns #-}
-
+{-# LANGUAGE BinaryLiterals, NumericUnderscores #-}
 module GHC.CmmToAsm.AArch64.CodeGen (
       cmmTopCodeGen
     , generateJumpTableForInstr
@@ -427,12 +427,10 @@ getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
 -- OPTIMIZATION WARNING: CmmExpr rewrites
 -- 1. Rewrite: Reg + (-n) => Reg - n
 --    XXX: this expression souldn't even be generated to begin with.

---    NOTE: i /= (minBound::Int) is for overflow checking. As negating woudl case
---          an overflow.
-getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0 && i /= (minBound::Int)
+getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
   = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
 
-getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0 && i /= (minBound::Int)
+getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
   = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
 
 
@@ -545,7 +543,7 @@ getRegister' config plat expr
         CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (ppr expr)
         CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (ppr expr)
     CmmLoad mem rep -> do
-      Amode addr addr_code <- getAmode mem
+      Amode addr addr_code <- getAmode plat mem
       let format = cmmTypeFormat rep
       return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr)))
     CmmStackSlot _ _
@@ -610,16 +608,40 @@ getRegister' config plat expr
     CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
     -- 1. Compute Reg +/- n directly.
     --    For Add/Sub we can directly encode 12bits, or 12bits lsl #12.
-    CmmMachOp (MO_Add w) [(CmmReg reg@(CmmGlobal _)), CmmLit (CmmInt n _)]
+    CmmMachOp (MO_Add w) [(CmmReg reg), CmmLit (CmmInt n _)]
       | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
       -- XXX: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
-      -- OPTIMIZATION WARNING: This only works because reg is CmmGlobal
       where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
             r' = getRegisterReg plat reg
-    CmmMachOp (MO_Sub w) [(CmmReg reg@(CmmGlobal _)), CmmLit (CmmInt n _)]
+    CmmMachOp (MO_Sub w) [(CmmReg reg), CmmLit (CmmInt n _)]
       | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
       -- XXX: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12.
-      -- OPTIMIZATION WARNING: This only works because reg is CmmGlobal
+      where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+            r' = getRegisterReg plat reg
+
+    -- 2. Shifts. x << n, x >> n.
+    CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+      (reg_x, _format_x, code_x) <- getSomeReg x
+      return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+    CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+      (reg_x, _format_x, code_x) <- getSomeReg x
+      return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+    CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do
+      (reg_x, _format_x, code_x) <- getSomeReg x
+      return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+    CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do
+      (reg_x, _format_x, code_x) <- getSomeReg x
+      return $ Any (intFormat w) (\dst -> code_x `snocOL` ANN (text $ show expr) (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
+
+    -- 3. Logic &&, ||
+    CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+      return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
+      where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
+            r' = getRegisterReg plat reg
+
+    CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) ->
+      return $ Any (intFormat w) (\d -> unitOL $ ANN (text $ show expr) (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n))))
       where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg))
             r' = getRegisterReg plat reg
 
@@ -773,15 +795,64 @@ getRegister' config plat expr
     is16bit i = (-1 `shiftL` 15) <= i && i < (1 `shiftL` 15)
     is32bit :: Integer -> Bool
     is32bit i = (-1 `shiftL` 31) <= i && i < (1 `shiftL` 31)
+    -- This needs to check if n can be encoded as a bitmask immediate:
+    --
+    -- See https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly
+    --
+    isBitMaskImmediate :: Integer -> Bool
+    isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000
+                                    ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000
+                                    ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000
+                                    ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000
+                                    ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000
+                                    ,0b0011_1111, 0b0111_1110, 0b1111_1100
+                                    ,0b0111_1111, 0b1111_1110
+                                    ,0b1111_1111]
+
 
 -- -----------------------------------------------------------------------------
 --  The 'Amode' type: Memory addressing modes passed up the tree.
 data Amode = Amode AddrMode InstrBlock
 
-getAmode :: CmmExpr -> NatM Amode
+getAmode :: Platform -> CmmExpr -> NatM Amode
 -- XXX: Specialize stuff we can destructure here.
+
+-- OPTIMIZATION WARNING: Addressing modes.
+-- Addressing options:
+-- LDUR/STUR: imm9: -256 - 255
+getAmode platform (CmmRegOff reg off) | -256 <= off, off <= 255
+  = return $ Amode (AddrRegImm reg' off') nilOL
+    where reg' = getRegisterReg platform reg
+          off' = ImmInt off
+-- LDR/STR: imm12: if reg is 32bit: 0 -- 16380 in multiples of 4
+getAmode platform (CmmRegOff reg off)
+  | typeWidth (cmmRegType platform reg) == W32, 0 <= off, off <= 16380, off `mod` 4 == 0
+  = return $ Amode (AddrRegImm reg' off') nilOL
+    where reg' = getRegisterReg platform reg
+          off' = ImmInt off
+-- LDR/STR: imm12: if reg is 64bit: 0 -- 32760 in multiples of 8
+getAmode platform (CmmRegOff reg off)
+  | typeWidth (cmmRegType platform reg) == W64, 0 <= off, off <= 32760, off `mod` 8 == 0
+  = return $ Amode (AddrRegImm reg' off') nilOL
+    where reg' = getRegisterReg platform reg
+          off' = ImmInt off
+
+-- For Stores we often see something like this:
+-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2)
+-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ]
+-- for `n` in range.
+getAmode platform (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')])
+  | -256 <= off, off <= 255
+  = do (reg, _format, code) <- getSomeReg expr
+       return $ Amode (AddrRegImm reg (ImmInteger off)) code
+
+getAmode platform (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')])
+  | -256 <= -off, -off <= 255
+  = do (reg, _format, code) <- getSomeReg expr
+       return $ Amode (AddrRegImm reg (ImmInteger (-off))) code
+
 -- Generic case
-getAmode expr
+getAmode _plat expr
   = do (reg, _format, code) <- getSomeReg expr
        return $ Amode (AddrReg reg) code
 
@@ -806,14 +877,13 @@ assignReg_FltCode :: Format -> CmmReg  -> CmmExpr -> NatM InstrBlock
 assignMem_IntCode rep addrE srcE
   = do
     (src_reg, _format, code) <- getSomeReg srcE
-    Amode addr addr_code <- getAmode addrE
+    platform <- getPlatform
+    Amode addr addr_code <- getAmode platform addrE
     let AddrReg r1 = addr
-    return $ unitOL (COMMENT $ text "RHS:" <+> ppr srcE)
-            `appOL` code
-            `appOL` unitOL (COMMENT $ text "LHS:" <+> ppr addrE)
+    return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
+            `consOL` (code
             `appOL` addr_code
-            `snocOL` COMMENT (text "Store:" <+> ppr r1 <+> text "<-" <+> ppr src_reg)
-            `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr)
+            `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr))
 
 assignReg_IntCode _ reg src
   = do
@@ -823,8 +893,8 @@ assignReg_IntCode _ reg src
         p = showSDocUnsafe . ppr
     r <- getRegister src
     return $ case r of
-      Any _ code         -> code dst
-      Fixed format freg fcode -> fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)
+      Any _ code              -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst
+      Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg))
 
 -- Let's treat Floating point stuff
 -- as integer code for now. Opaque.
@@ -843,7 +913,7 @@ genJump (CmmLit (CmmLabel lbl)) regs
   --               , DELTA 0] )
 genJump expr regs = do
     (target, _format, code) <- getSomeReg expr
-    return (code `appOL` unitOL (B (TReg target))
+    return (code `appOL` unitOL (ANN (text $ show expr) (B (TReg target)))
                         --  toOL [ PUSH_STACK_FRAME
                         --       , DELTA (-16)
                         --       , B (TReg target)
@@ -863,20 +933,31 @@ genCondJump
     :: BlockId
     -> CmmExpr
     -> NatM InstrBlock
-genCondJump bid expr
-  = case expr of
+genCondJump bid expr = do
+    case expr of
+      -- Optimized == 0 case.
+      CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do
+        (reg_x, _format_x, code_x) <- getSomeReg x
+        return $ code_x `snocOL` (ANN (text $ show expr) (CBZ (OpReg w reg_x) (TBlock bid)))
+
+      -- Optimized /= 0 case.
+      CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do
+        (reg_x, _format_x, code_x) <- getSomeReg x
+        return $ code_x `snocOL`  (ANN (text $ show expr) (CBNZ (OpReg w reg_x) (TBlock bid)))
+
+      -- Generic case.
       CmmMachOp mop [x, y] -> do
 
         let bcond w cmp = do
               -- compute both sides.
               (reg_x, _format_x, code_x) <- getSomeReg x
               (reg_y, _format_y, code_y) <- getSomeReg y
-              return $ code_x `appOL` code_y `snocOL` CMP (OpReg w reg_x) (OpReg w reg_y) `snocOL` BCOND cmp (TBlock bid)
+              return $ code_x `appOL` code_y `snocOL` CMP (OpReg w reg_x) (OpReg w reg_y) `snocOL` (ANN (text $ show expr) (BCOND cmp (TBlock bid)))
             fbcond w cmp = do
               -- ensure we get float regs
               (reg_fx, _format_fx, code_fx) <- getFloatReg x
               (reg_fy, _format_fy, code_fy) <- getFloatReg y
-              return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` BCOND cmp (TBlock bid)
+              return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` (ANN (text $ show expr) (BCOND cmp (TBlock bid)))
 
         case mop of
           MO_F_Eq w -> fbcond w EQ


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -134,6 +134,8 @@ aarch64_regUsageOfInstr platform instr = case instr of
   -- 5. Atomic Instructions ----------------------------------------------------
   -- 6. Conditional Instructions -----------------------------------------------
   CSET dst _               -> usage ([], regOp dst)
+  CBZ src _                -> usage (regOp src, [])
+  CBNZ src _               -> usage (regOp src, [])
   -- 7. Load and Store Instructions --------------------------------------------
   STR _ src dst            -> usage (regOp src ++ regOp dst, [])
   LDR _ dst src            -> usage (regOp src, regOp dst)
@@ -257,7 +259,8 @@ aarch64_patchRegsOfInstr instr env = case instr of
     -- 5. Atomic Instructions --------------------------------------------------
     -- 6. Conditional Instructions ---------------------------------------------
     CSET o c       -> CSET (patchOp o) c
-
+    CBZ o l        -> CBZ (patchOp o) l
+    CBNZ o l       -> CBNZ (patchOp o) l
     -- 7. Load and Store Instructions ------------------------------------------
     STR f o1 o2    -> STR f (patchOp o1) (patchOp o2)
     LDR f o1 o2    -> LDR f (patchOp o1) (patchOp o2)
@@ -293,6 +296,8 @@ aarch64_patchRegsOfInstr instr env = case instr of
 aarch64_isJumpishInstr :: Instr -> Bool
 aarch64_isJumpishInstr instr = case instr of
     ANN _ i -> aarch64_isJumpishInstr i
+    CBZ{} -> True
+    CBNZ{} -> True
     B{} -> True
     BL{} -> True
     BCOND{} -> True
@@ -303,6 +308,8 @@ aarch64_isJumpishInstr instr = case instr of
 -- register allocator needs to worry about.
 aarch64_jumpDestsOfInstr :: Instr -> [BlockId]
 aarch64_jumpDestsOfInstr (ANN _ i) = aarch64_jumpDestsOfInstr i
+aarch64_jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
+aarch64_jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (BL t) = [ id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]]
@@ -315,6 +322,8 @@ aarch64_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
 aarch64_patchJumpInstr instr patchF
     = case instr of
         ANN d i -> ANN d (aarch64_patchJumpInstr i patchF)
+        CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid))
+        CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid))
         B (TBlock bid) -> B (TBlock (patchF bid))
         BL (TBlock bid) -> BL (TBlock (patchF bid))
         BCOND c (TBlock bid) -> BCOND c (TBlock (patchF bid))
@@ -527,12 +536,14 @@ data Instr
     | LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8)
 
     -- Conditional instructions
-    | CSET Operand Cond  -- if(cond) op <- 1 else op <- 0
+    | CSET Operand Cond   -- if(cond) op <- 1 else op <- 0
 
+    | CBZ Operand Target  -- if op == 0, then branch.
+    | CBNZ Operand Target -- if op /= 0, then branch.
     -- Branching.
-    | B Target           -- unconditional branching b/br. (To a blockid, label or register)
-    | BL Target          -- branch and link (e.g. set x30 to next pc, and branch)
-    | BCOND Cond Target  -- branch with condition. b.<cond>
+    | B Target            -- unconditional branching b/br. (To a blockid, label or register)
+    | BL Target           -- branch and link (e.g. set x30 to next pc, and branch)
+    | BCOND Cond Target   -- branch with condition. b.<cond>
 
     -- 8. Synchronization Instructions -----------------------------------------
     | DMBSY


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -435,6 +435,14 @@ pprInstr platform instr = case instr of
   -- 6. Conditional Instructions -----------------------------------------------
   CSET o c  -> text "\tcset" <+> pprOp o <> comma <+> pprCond c
 
+  CBZ o (TBlock bid) -> text "\tcbz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
+  CBZ o (TLabel lbl) -> text "\tcbz" <+> pprOp o <> comma <+> ppr lbl
+  CBZ c (TReg r)     -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+
+  CBNZ o (TBlock bid) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
+  CBNZ o (TLabel lbl) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr lbl
+  CBNZ c (TReg r)     -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+
   -- 7. Load and Store Instructions --------------------------------------------
   -- NOTE: GHC may do whacky things where it only load the lower part of an
   --       address. Not observing the correct size when loading will lead



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/ce9cbc92c945c350dc3cc605bc7b78c97a133719...fe3aeaff2444881ef30adb9a82870f81a8880e7e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/ce9cbc92c945c350dc3cc605bc7b78c97a133719...fe3aeaff2444881ef30adb9a82870f81a8880e7e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200714/d599b73d/attachment-0001.html>