[Git][ghc/ghc][wip/angerman/aarch64-ncg] 4 commits: [Aarch64] No div-by-zero; disable test.

Fri Sep 11 05:34:34 UTC 2020

Moritz Angermann pushed to branch wip/angerman/aarch64-ncg at Glasgow Haskell Compiler / GHC


Commits:
8009d84f by Moritz Angermann at 2020-09-09T15:34:03+00:00
[Aarch64] No div-by-zero; disable test.

- - - - -
cb99c9c9 by Moritz Angermann at 2020-09-11T02:25:50+00:00
Simplify aarch64 StgRun

We don't need to do the callee save register dance. The compiler will
do this for us already:

0000000000000000 <StgRun>:
   0:   a9b653f3        stp     x19, x20, [sp, #-160]!
   4:   a9015bf5        stp     x21, x22, [sp, #16]
   8:   a90263f7        stp     x23, x24, [sp, #32]
   c:   a9036bf9        stp     x25, x26, [sp, #48]
  10:   a90473fb        stp     x27, x28, [sp, #64]
  14:   f9002bfe        str     x30, [sp, #80]
  18:   6d0627e8        stp     d8, d9, [sp, #96]
  1c:   6d072fea        stp     d10, d11, [sp, #112]
  20:   6d0837ec        stp     d12, d13, [sp, #128]
  24:   6d093fee        stp     d14, d15, [sp, #144]
  28:   a9bf47f0        stp     x16, x17, [sp, #-16]!
  2c:   d14013ff        sub     sp, sp, #0x4, lsl #12
  30:   aa0103f3        mov     x19, x1
  34:   d61f0000        br      x0

0000000000000038 <StgReturn>:
  38:   914013ff        add     sp, sp, #0x4, lsl #12
  3c:   aa1603e0        mov     x0, x22
  40:   a8c147f0        ldp     x16, x17, [sp], #16
  44:   a9415bf5        ldp     x21, x22, [sp, #16]
  48:   a94263f7        ldp     x23, x24, [sp, #32]
  4c:   a9436bf9        ldp     x25, x26, [sp, #48]
  50:   a94473fb        ldp     x27, x28, [sp, #64]
  54:   f9402bfe        ldr     x30, [sp, #80]
  58:   6d4627e8        ldp     d8, d9, [sp, #96]
  5c:   6d472fea        ldp     d10, d11, [sp, #112]
  60:   6d4837ec        ldp     d12, d13, [sp, #128]
  64:   6d493fee        ldp     d14, d15, [sp, #144]
  68:   a8ca53f3        ldp     x19, x20, [sp], #160
  6c:   d65f03c0        ret

- - - - -
795a3a8d by Moritz Angermann at 2020-09-11T02:25:50+00:00
Use ip0 for spills/reloads

- - - - -
1ca1fe4e by Moritz Angermann at 2020-09-11T05:34:14+00:00
:broom: Cleanup imports/unused args

- - - - -


8 changed files:

- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Regs.hs
- compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
- compiler/GHC/CmmToAsm/SPARC/Instr.hs
- includes/CodeGen.Platform.hs
- rts/StgCRun.c
- testsuite/tests/rts/all.T


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -34,14 +34,14 @@ import GHC.CmmToAsm.Instr
 import GHC.CmmToAsm.PIC
 import GHC.CmmToAsm.Format
 import GHC.CmmToAsm.Config
-import GHC.Platform.Reg.Class
+-- import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
-import GHC.CmmToAsm.Reg.Target
+-- import GHC.CmmToAsm.Reg.Target
 import GHC.Platform
 
 -- Our intermediate code:
 import GHC.Cmm.BlockId
-import GHC.Cmm.Ppr           ( pprExpr )
+-- import GHC.Cmm.Ppr           ( pprExpr )
 import GHC.Cmm
 import GHC.Cmm.Utils
 import GHC.Cmm.Switch
@@ -59,7 +59,7 @@ import Control.Monad    ( mapAndUnzipM, when, foldM )
 import Data.Bits
 import Data.Word
 import Data.Maybe
-import Data.Int
+-- import Data.Int
 import GHC.Float
 
 import GHC.Types.Basic
@@ -67,7 +67,7 @@ import GHC.Types.ForeignCall
 import GHC.Data.FastString
 import GHC.Utils.Misc
 
-import Debug.Trace
+-- import Debug.Trace
 
 -- @cmmTopCodeGen@ will be our main entry point to code gen.  Here we'll get
 -- @RawCmmDecl@; see GHC.Cmm
@@ -113,11 +113,9 @@ cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do
   let blocks = toBlockListEntryFirst graph
   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
   picBaseMb <- getPicBaseMaybeNat
-  platform <- getPlatform
 
   let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
       tops = proc : concat statics
-      os   = platformOS platform
 
   case picBaseMb of
       Just _picBase -> panic "AArch64.cmmTopCodeGen: picBase not implemented"
@@ -332,10 +330,11 @@ getRegisterReg platform (CmmGlobal mid)
         -- platform.  Hence ...
 
 -- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
-jumpTableEntry config Nothing   = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
-jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
-    where blockLabel = blockLbl blockid
+-- XXX: Add JumpTable Logic
+-- jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
+-- jumpTableEntry config Nothing   = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
+-- jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
+--     where blockLabel = blockLbl blockid
 
 -- -----------------------------------------------------------------------------
 -- Utility
@@ -879,7 +878,6 @@ assignMem_IntCode rep addrE srcE
     (src_reg, _format, code) <- getSomeReg srcE
     platform <- getPlatform
     Amode addr addr_code <- getAmode platform addrE
-    let AddrReg r1 = addr
     return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
             `consOL` (code
             `appOL` addr_code
@@ -889,8 +887,6 @@ assignReg_IntCode _ reg src
   = do
     platform <- getPlatform
     let dst = getRegisterReg platform reg
-        p :: Outputable a => a -> String
-        p = showSDocUnsafe . ppr
     r <- getRegister src
     return $ case r of
       Any _ code              -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst
@@ -980,7 +976,8 @@ genCondJump bid expr = do
           MO_U_Ge w -> bcond w UGE
           MO_U_Lt w -> bcond w ULT
           MO_U_Le w -> bcond w ULE
-      _ -> pprPanic "AArch64.genCondJump: " (ppr expr)
+          _ -> pprPanic "AArch64.genCondJump:case mop: " (text $ show expr)
+      _ -> pprPanic "AArch64.genCondJump: " (text $ show expr)
 
 
 genCondBranch
@@ -1234,8 +1231,6 @@ genCCall target dest_regs arg_regs bid = do
         -- XXX: this should be implemented properly!
         MO_Xchg w           -> mkCCall (xchgLabel w)
 
-        _ -> pprPanic "genCCall:PrimTarget" (ppr target)
-
   where
     unsupported :: Show a => a -> b
     unsupported mop = panic ("outOfLineCmmOp: " ++ show mop


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -13,9 +13,9 @@ import GHC.CmmToAsm.AArch64.Regs
 
 import GHC.CmmToAsm.Instr
 import GHC.CmmToAsm.Format
-import GHC.CmmToAsm.Reg.Target
+-- import GHC.CmmToAsm.Reg.Target
 import GHC.CmmToAsm.Config
-import GHC.Platform.Reg.Class
+-- import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
 
 import GHC.Platform.Regs
@@ -23,18 +23,18 @@ import GHC.Cmm.BlockId
 import GHC.Cmm.Dataflow.Collections
 import GHC.Cmm.Dataflow.Label
 import GHC.Cmm
-import GHC.Cmm.Info
-import GHC.Data.FastString
+-- import GHC.Cmm.Info
+-- import GHC.Data.FastString
 import GHC.Cmm.CLabel
 import GHC.Utils.Outputable
 import GHC.Platform
-import GHC.Types.Unique.FM (listToUFM, lookupUFM)
+-- import GHC.Types.Unique.FM (listToUFM, lookupUFM)
 import GHC.Types.Unique.Supply
 
 import Control.Monad (replicateM)
 import Data.Maybe (fromMaybe)
 
-import Debug.Trace
+-- import Debug.Trace
 import GHC.Stack
 
 import Data.Bits ((.&.), complement)
@@ -115,7 +115,6 @@ aarch64_regUsageOfInstr platform instr = case instr of
 
   -- 3. Logical and Move Instructions ------------------------------------------
   AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
-  ADDS dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
   ASR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   BIC dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   BICS dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -183,7 +182,7 @@ aarch64_regUsageOfInstr platform instr = case instr of
         -- Is this register interesting for the register allocator?
         interesting :: Platform -> Reg -> Bool
         interesting _        (RegVirtual _)                 = True
-        interesting platform (RegReal (RealRegSingle (-1))) = False
+        interesting _        (RegReal (RealRegSingle (-1))) = False
         interesting platform (RegReal (RealRegSingle i))    = freeReg platform i
         interesting _        (RegReal (RealRegPair{}))
             = panic "AArch64.Instr.interesting: no reg pairs on this arch"
@@ -280,6 +279,7 @@ aarch64_patchRegsOfInstr instr env = case instr of
     SCVTF o1 o2    -> SCVTF (patchOp o1) (patchOp o2)
     FCVTZS o1 o2   -> FCVTZS (patchOp o1) (patchOp o2)
 
+    _ -> pprPanic "aarch64_patchRegsOfInstr" (text $ show instr)
     where
         patchOp :: Operand -> Operand
         patchOp (OpReg w r) = OpReg w (env r)
@@ -338,7 +338,21 @@ aarch64_patchJumpInstr instr patchF
         _ -> pprPanic "patchJumpInstr" (text $ show instr)
 
 -- -----------------------------------------------------------------------------
-
+-- Note [Spills and Reloads]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
+-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading
+-- registers.  AArch64s maximum displacement for SP relative spills and reloads
+-- is essentially [-256,255], or [0, 0xFFF]*8 = [0, 32760] for 64bits.
+--
+-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't address any location in a
+-- single instruction.  The idea is to use the Inter Procedure 0 (ip0) register
+-- to perform the computations for larger offsets.
+--
+-- Using sp to compute the offset will violate assumptions about the stack pointer
+-- pointing to the top of the stack during signal handling.  As we can't force
+-- every signal to use its own stack, we have to ensure that the stack poitner
+-- always poitns to the top of the stack, and we can't use it for computation.
+--
 -- | An instruction to spill a register into a spill slot.
 aarch64_mkSpillInstr
    :: HasCallStack
@@ -348,35 +362,13 @@ aarch64_mkSpillInstr
    -> Int       -- spill slot to use
    -> [Instr]

--- Alright, so here's the plan.  On aarch64, we can't spill into arbitrary locations,
--- the range is essentially [-256,255], or [0, 0xFFF]*8 = [0, 32760] for 64bits. For
--- other ranges we need to adjust SP first; we should strive to keep it 16byte aligned.
---
--- To adjust for other range, we can use ADD/SUB, with a positive immediate of [0, 0xFFF],
--- or [0, 0xFFF] << 12.
-
 aarch64_mkSpillInstr config reg delta slot =
   case (spillSlotToOffset config slot) - delta of
-    imm | -256 <= imm && imm <= 255                               -> [ mkStr imm ]
-    imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkStr imm ]
-    imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkAdd (imm .&~. 0xfff)
-                                                                     , mkStr (imm .&.  0xfff)
-                                                                     , mkSub (imm .&~. 0xfff) ]
-    -- imm | imm > 0xfff && imm <= 0xffffff                          -> [ mkAdd (imm .&~. 0xfff)
-    --                                                                  , mkAdd (imm .&.  0xff0)
-    --                                                                  , mkStr (imm .&.  0x00f)
-    --                                                                  , mkSub (imm .&.  0xff0)
-    --                                                                  , mkSub (imm .&~. 0xfff) ]
-    -- if we have a negative offset, well subtract another 0x1000 from it, and then
-    -- use the positive
-    -- imm | -imm > 0xfff && -imm <= 0xffefff && imm .&. 0x7 == 0x0  -> [ mkSub (-imm .&~. 0xfff + 0x1000)
-    --                                                                  , mkStr (0x1000 - (-imm .&. 0xfff))
-    --                                                                  , mkAdd (-imm .&~. 0xfff + 0x1000) ]
-    -- imm | -imm > 0xfff && -imm <= 0xffffff                        -> [ mkSub (-imm .&~. 0xfff)
-    --                                                                  , mkSub (-imm .&.  0xff0)
-    --                                                                  , mkStr (-(-imm .&. 0x00f))
-    --                                                                  , mkAdd (-imm .&.  0xff0)
-    --                                                                  , mkAdd (-imm .&~. 0xfff) ]
+    imm | -256 <= imm && imm <= 255                               -> [ mkStrSp imm ]
+    imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkStrSp imm ]
+    imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkIp0SpillAddr (imm .&~. 0xfff)
+                                                                     , mkStrIp0 (imm .&.  0xfff)
+                                                                     ]
     imm -> pprPanic "aarch64_mkSpillInstr" (text "Unable to spill into" <+> int imm)
     where
         a .&~. b = a .&. (complement b)
@@ -384,14 +376,12 @@ aarch64_mkSpillInstr config reg delta slot =
         fmt = case reg of
             RegReal (RealRegSingle n) | n < 32 -> II64
             _                                  -> FF64
-
-        mkStr imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm)))
-        mkAdd imm = ANN (text "Spill Add") $ ADD sp sp (OpImm (ImmInt imm))
-        mkSub imm = ANN (text "Spill Sub") $ SUB sp sp (OpImm (ImmInt imm))
+        mkIp0SpillAddr imm = ANN (text "Spill: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm))
+        mkStrSp imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm)))
+        mkStrIp0 imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm)))
 
         off = spillSlotToOffset config slot
 
--- fails in compiler/stage2/build/GHC/Driver/Pipeline.o
 aarch64_mkLoadInstr
    :: NCGConfig
    -> Reg       -- register to load
@@ -401,26 +391,11 @@ aarch64_mkLoadInstr
 
 aarch64_mkLoadInstr config reg delta slot =
   case (spillSlotToOffset config slot) - delta of
-    imm | -256 <= imm && imm <= 255                               -> [ mkLdr imm ]
-    imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkLdr imm ]
-    imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkAdd (imm .&~. 0xfff)
-                                                                     , mkLdr (imm .&.  0xfff)
-                                                                     , mkSub (imm .&~. 0xfff) ]
-    -- imm | imm > 0xfff && imm <= 0xffffff                          -> [ mkAdd (imm .&~. 0xfff)
-    --                                                                  , mkAdd (imm .&.  0xff0)
-    --                                                                  , mkLdr (imm .&.  0x00f)
-    --                                                                  , mkSub (imm .&.  0xff0)
-    --                                                                  , mkSub (imm .&~. 0xfff) ]
-    -- if we have a negative offset, well subtract another 0x1000 from it, and then
-    -- use the positive
-    -- imm | -imm > 0xfff && -imm <= 0xffefff && imm .&. 0x7 == 0x0  -> [ mkSub (-imm .&~. 0xfff + 0x1000)
-    --                                                                  , mkLdr (0x1000 - (-imm .&. 0xfff))
-    --                                                                  , mkAdd (-imm .&~. 0xfff + 0x1000) ]
-    -- imm | -imm > 0xfff && -imm <= 0xffffff                        -> [ mkSub (-imm .&~. 0xfff)
-    --                                                                  , mkSub (-imm .&.  0xff0)
-    --                                                                  , mkLdr (-(-imm .&. 0x00f))
-    --                                                                  , mkAdd (-imm .&.  0xff0)
-    --                                                                  , mkAdd (-imm .&~. 0xfff) ]
+    imm | -256 <= imm && imm <= 255                               -> [ mkLdrSp imm ]
+    imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkLdrSp imm ]
+    imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkIp0SpillAddr (imm .&~. 0xfff)
+                                                                     , mkLdrIp0 (imm .&.  0xfff)
+                                                                     ]
     imm -> pprPanic "aarch64_mkSpillInstr" (text "Unable to spill into" <+> int imm)
     where
         a .&~. b = a .&. (complement b)
@@ -429,9 +404,9 @@ aarch64_mkLoadInstr config reg delta slot =
             RegReal (RealRegSingle n) | n < 32 -> II64
             _                                  -> FF64
 
-        mkLdr imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm)))
-        mkAdd imm = ANN (text "Reload Add") $ ADD sp sp (OpImm (ImmInt imm))
-        mkSub imm = ANN (text "Reload Sub") $ SUB sp sp (OpImm (ImmInt imm))
+        mkIp0SpillAddr imm = ANN (text "Reload: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm))
+        mkLdrSp imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm)))
+        mkLdrIp0 imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm)))
 
         off = spillSlotToOffset config slot
 
@@ -477,14 +452,14 @@ aarch64_mkStackAllocInstr platform n
     | n == 0 = []
     | n > 0 && n < 4096 = [ ANN (text "Alloc More Stack") $ SUB sp sp (OpImm (ImmInt n)) ]
     | n > 0 =  ANN (text "Alloc More Stack") (SUB sp sp (OpImm (ImmInt 4095))) : aarch64_mkStackAllocInstr platform (n - 4095)
-aarch64_mkStackAllocInstr platform n = pprPanic "aarch64_mkStackAllocInstr" (int n)
+aarch64_mkStackAllocInstr _platform n = pprPanic "aarch64_mkStackAllocInstr" (int n)
 
 aarch64_mkStackDeallocInstr :: Platform -> Int -> [Instr]
 aarch64_mkStackDeallocInstr platform n
     | n == 0 = []
     | n > 0 && n < 4096 = [ ANN (text "Dealloc More Stack") $ ADD sp sp (OpImm (ImmInt n)) ]
     | n > 0 =  ANN (text "Dealloc More Stack") (ADD sp sp (OpImm (ImmInt 4095))) : aarch64_mkStackDeallocInstr platform (n - 4095)
-aarch64_mkStackDeallocInstr platform n = pprPanic "aarch64_mkStackDeallocInstr" (int n)
+aarch64_mkStackDeallocInstr _platform n = pprPanic "aarch64_mkStackDeallocInstr" (int n)
 
 --
 -- See note [extra spill slots] in X86/Instr.hs
@@ -668,7 +643,7 @@ data Instr
     | FCVTZS Operand Operand
 
 instance Show Instr where
-    show (LDR f o1 o2) = "LDR " ++ show o1 ++ ", " ++ show o2
+    show (LDR _f o1 o2) = "LDR " ++ show o1 ++ ", " ++ show o2
     show (MOV o1 o2) = "MOV " ++ show o1 ++ ", " ++ show o2
     show _ = "missing"
 
@@ -709,10 +684,11 @@ data Operand
 opReg :: Width -> Reg -> Operand
 opReg = OpReg
 
-xzr, wzr, sp :: Operand
+xzr, wzr, sp, ip0 :: Operand
 xzr = OpReg W64 (RegReal (RealRegSingle (-1)))
 wzr = OpReg W32 (RegReal (RealRegSingle (-1)))
 sp  = OpReg W64 (RegReal (RealRegSingle 31))
+ip0 = OpReg W64 (RegReal (RealRegSingle 16))
 
 _x :: Int -> Operand
 _x i = OpReg W64 (RegReal (RealRegSingle i))
@@ -797,9 +773,11 @@ opRegUExt W64 r = OpRegExt W64 r EUXTX 0
 opRegUExt W32 r = OpRegExt W32 r EUXTW 0
 opRegUExt W16 r = OpRegExt W16 r EUXTH 0
 opRegUExt W8  r = OpRegExt W8  r EUXTB 0
+opRegUExt w  _r = pprPanic "opRegUExt" (text $ show w)
 
 opRegSExt :: Width -> Reg -> Operand
 opRegSExt W64 r = OpRegExt W64 r ESXTX 0
 opRegSExt W32 r = OpRegExt W32 r ESXTW 0
 opRegSExt W16 r = OpRegExt W16 r ESXTH 0
-opRegSExt W8  r = OpRegExt W8  r ESXTB 0
\ No newline at end of file
+opRegSExt W8  r = OpRegExt W8  r ESXTB 0
+opRegSExt w  _r = pprPanic "opRegSExt" (text $ show w)


=====================================
compiler/GHC/CmmToAsm/AArch64/Regs.hs
=====================================
@@ -9,7 +9,7 @@ import GHC.Prelude
 import GHC.Platform.Reg
 import GHC.Platform.Reg.Class
 import GHC.CmmToAsm.Format
-import GHC.CmmToAsm.Config
+-- import GHC.CmmToAsm.Config
 
 import GHC.Cmm
 import GHC.Cmm.CLabel           ( CLabel )
@@ -19,8 +19,8 @@ import GHC.Platform.Regs
 import GHC.Utils.Outputable
 import GHC.Platform
 
-import Data.Word        ( Word8, Word16, Word32, Word64 )
-import Data.Int         ( Int8, Int16, Int32, Int64 )
+-- import Data.Word        ( Word8, Word16, Word32, Word64 )
+-- import Data.Int         ( Int8, Int16, Int32, Int64 )
 
 allMachRegNos   :: [RegNo]
 allMachRegNos   = [0..31] ++ [32..63]


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
=====================================
@@ -12,7 +12,7 @@ import GHC.Platform
 import Data.Word
 import Data.Bits
 
-import Debug.Trace
+-- import Debug.Trace
 import GHC.Stack
 -- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp
 -- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON
@@ -122,7 +122,7 @@ getFreeRegs cls (FreeRegs g f)
   | RcDouble  <- cls = go 32 f 31
   | RcInteger <- cls = go  0 g 18
     where
-        go off _ i | i < 0 = []
+        go _   _ i | i < 0 = []
         go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1)
                    | otherwise   = go off x $! i - 1
 


=====================================
compiler/GHC/CmmToAsm/SPARC/Instr.hs
=====================================
@@ -376,7 +376,7 @@ sparc_mkSpillInstr
     -> Int      -- ^ spill slot to use
     -> [Instr]
 
-sparc_mkSpillInstr config reg delta slot
+sparc_mkSpillInstr config reg _delta slot
  = let  platform = ncgPlatform config
         off      = spillSlotToOffset config slot
         off_w    = 1 + (off `div` 4)
@@ -396,7 +396,7 @@ sparc_mkLoadInstr
     -> Int      -- ^ spill slot to use
     -> [Instr]
 
-sparc_mkLoadInstr config reg delta slot
+sparc_mkLoadInstr config reg _delta slot
   = let platform = ncgPlatform config
         off      = spillSlotToOffset config slot
         off_w    = 1 + (off `div` 4)


=====================================
includes/CodeGen.Platform.hs
=====================================
@@ -957,7 +957,8 @@ freeReg 31 = False
 freeReg 30 = False
 -- frame pointer
 freeReg 29 = False
-
+-- ip0 -- used for spill offset computations
+freeReg 16 = False
 
 # if defined(REG_Base)
 freeReg REG_Base  = False


=====================================
rts/StgCRun.c
=====================================
@@ -883,30 +883,41 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
 
 #if defined(aarch64_HOST_ARCH)
 
+/* See also AArch64/Instr.hs
+ *
+ * Save caller save registers
+ * This is x0-x18
+ *
+ * For SIMD/FP Registers:
+ * Registers v8-v15 must be preserved by a callee across subroutine calls;
+ * the remaining registers (v0-v7, v16-v31) do not need to be preserved (or
+ * should be preserved by the caller). Additionally, only the bottom 64 bits
+ * of each value stored in v8-v15 need to be preserved [7]; it is the
+ * responsibility of the caller to preserve larger values.
+ *
+ * .---------------------------------------------------------------------------------------------------------------------------------------------------------------.
+ * |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
+ * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
+ * |== General Purpose registers ==================================================================================================================================|
+ * | <---- argument passing -------------> | IR | <------- tmp registers --------> | IP0| IP1| PL | <------------------- callee saved ------------> | FP | LR | SP |
+ * | <------ free registers --------------------------------------------------------------------> | BR | Sp | Hp | R1 | R2 | R3 | R4 | R5 | R6 | SL | -- | -- | -- |
+ * |== SIMD/FP Registers ==========================================================================================================================================|
+ * | <---- argument passing -------------> | <-- callee saved (lower 64 bits) ---> | <--------------------------------------- caller saved ----------------------> |
+ * | <------ free registers -------------> | F1 | F2 | F3 | F4 | D1 | D2 | D3 | D4 | <------ free registers -----------------------------------------------------> |
+ * '---------------------------------------------------------------------------------------------------------------------------------------------------------------'
+ * IR: Indirect result location register, IP: Intra-procedure register, PL: Platform register, FP: Frame pointer, LR: Link register, SP: Stack pointer
+ * BR: Base, SL: SpLim
+ */
+
 StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg) {
     StgRegTable * r;
     __asm__ volatile (
         /*
          * Save callee-saves registers on behalf of the STG code.
-         * Floating point registers only need the bottom 64 bits preserved.
-         * We need to use the names x16, x17, x29 and x30 instead of ip0
-         * ip1, fp and lp because one of either clang or gcc doesn't understand
-         * the later names.
+         * Note: The compiler will insert this for us if we specify the
+         *       Clobbered correctly. See below.
          */
-        "stp x29,  x30,  [sp, #-16]!\n\t"
-        "mov x29, sp\n\t"
-        "stp x16, x17, [sp, #-16]!\n\t"
-        "stp x19, x20, [sp, #-16]!\n\t"
-        "stp x21, x22, [sp, #-16]!\n\t"
-        "stp x23, x24, [sp, #-16]!\n\t"
-        "stp x25, x26, [sp, #-16]!\n\t"
-        "stp x27, x28, [sp, #-16]!\n\t"
-        "stp d8,  d9,  [sp, #-16]!\n\t"
-        "stp d10, d11, [sp, #-16]!\n\t"
-        "stp d12, d13, [sp, #-16]!\n\t"
-        "stp d14, d15, [sp, #-16]!\n\t"
-
         /*
          * allocate some space for Stg machine's temporary storage.
          * Note: RESERVED_C_STACK_BYTES has to be a round number here or
@@ -935,26 +946,28 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
          * Return the new register table, taking it from Stg's R1 (ARM64's R22).
          */
         "mov %0, x22\n\t"
-        /*
-         * restore callee-saves registers.
-         */
 
-        "ldp d14, d15, [sp], #16\n\t"
-        "ldp d12, d13, [sp], #16\n\t"
-        "ldp d10, d11, [sp], #16\n\t"
-        "ldp d8,  d9,  [sp], #16\n\t"
-        "ldp x27, x28, [sp], #16\n\t"
-        "ldp x25, x26, [sp], #16\n\t"
-        "ldp x23, x24, [sp], #16\n\t"
-        "ldp x21, x22, [sp], #16\n\t"
-        "ldp x19, x20, [sp], #16\n\t"
-        "ldp x16, x17, [sp], #16\n\t"
-        "ldp x29,  x30,  [sp], #16\n\t"
+        /* Restore callee-saves register
+         * Note: The compiler will insert this for us if we specify the
+         *       Clobbered correctly. See below.
+         */
 
+      /* Outputs (r) */
       : "=r" (r)
+      /* Inputs (f, regbase, RESERVED_C_STACK_BYTES) */
       : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
-        : "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
-          "%x16", "%x17", "%x30"
+      /* Clobbered */
+      :   // any of the stg calls may directly or indirectly modify these:
+          "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
+          // the IP usually, not, but better safe than sorry. However, I'm not sure
+          // we even have to save them. There is no expectation they survive a call.
+          "%x16", "%x17",
+          // The Link Register will hold the point we want to return to; and we may
+          // overwrite it with BL instructions in the haskell code.
+          "%x30",
+          // floating point registers
+          "%d8", "%d9", "%d10", "%d11", "%d12", "%d13", "%d14", "%d15",
+          "memory"
     );
     return r;
 }


=====================================
testsuite/tests/rts/all.T
=====================================
@@ -50,6 +50,11 @@ test('divbyzero',
       # behavior on division-by-zero (#10332).
       omit_ways(llvm_ways),
       when(not(have_ncg()), skip),
+      # Aarch64 does not have div-by-zero exceptions for sdiv/udiv.
+      # The only option would be to implement this in assembly with checks for
+      # each devision. Neither gcc, nor llvm do this as of right now.  Microsoft
+      # apparently does so though?
+      when(arch('aarch64'), skip),
       # Apparently the output can be different on different
       # Linux setups, so just ignore it. As long as we get
       # the right exit code we're OK.



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c0f422766833fe2a4747a94897100e9c99ac0164...1ca1fe4eca475cca2ec6037488778400a336ecf8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c0f422766833fe2a4747a94897100e9c99ac0164...1ca1fe4eca475cca2ec6037488778400a336ecf8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200911/4236a970/attachment-0001.html>