[Git][ghc/ghc][wip/angerman/aarch64-ncg] 11 commits: Fix MO_SF_Conv

Moritz Angermann gitlab at gitlab.haskell.org
Mon Jul 20 05:43:34 UTC 2020



Moritz Angermann pushed to branch wip/angerman/aarch64-ncg at Glasgow Haskell Compiler / GHC


Commits:
94769087 by Moritz Angermann at 2020-07-18T00:00:00+00:00
Fix MO_SF_Conv

- - - - -
6138e2f1 by Moritz Angermann at 2020-07-18T00:00:00+00:00
Add Comment re MO_Memcpy

- - - - -
11ee14ae by Moritz Angermann at 2020-07-18T00:00:00+00:00
Always PIC via GOT

- - - - -
c806f76a by Moritz Angermann at 2020-07-18T00:00:00+00:00
Fix up generated assembly.

Don't generate identity moves
e.g. mov x18, x18

- - - - -
1c66c543 by Moritz Angermann at 2020-07-18T00:00:00+00:00
Drop superfulous alignment generation.

- - - - -
b977926b by Moritz Angermann at 2020-07-18T00:00:00+00:00
Hadrian :fire:

- - - - -
456f08b2 by Moritz Angermann at 2020-07-18T00:00:00+00:00
Address Tekenobus comments.

Thanks!

- - - - -
ec2f768d by Moritz Angermann at 2020-07-19T00:00:00+00:00
Adds J to distinguish jumps from B.

Maybe this would be better handled with a phantom type?

- - - - -
3bf25c2b by Moritz Angermann at 2020-07-19T00:00:00+00:00
Make sp an Operand

- - - - -
dbcf0669 by Moritz Angermann at 2020-07-19T00:00:20+00:00
allocMoreStack

This is still broken, as we can't spill into arbitrary ranges. Hence while we can allocate extra space, we can't really spill past 4096 offsets due to the immediat having to be encoded. This leaves us with a max of 512 spill slots.

We *can* work around this if we change the sp though.

- - - - -
b765dfe4 by Moritz Angermann at 2020-07-19T00:00:00+00:00
[Spill/Reload] Spill Around :fire:

- - - - -


15 changed files:

- compiler/GHC/CmmToAsm.hs
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- compiler/GHC/CmmToAsm/AArch64/Regs.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Linear/State.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/SPARC/Instr.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- hadrian/src/Oracles/Flag.hs


Changes:

=====================================
compiler/GHC/CmmToAsm.hs
=====================================
@@ -275,7 +275,7 @@ aarch64NcgImpl config
        ,pprNatCmmDecl             = AArch64.Ppr.pprNatCmmDecl config
        ,maxSpillSlots             = AArch64.Instr.maxSpillSlots config
        ,allocatableRegs           = AArch64.Regs.allocatableRegs platform
-       ,ncgAllocMoreStack         = noAllocMoreStack
+       ,ncgAllocMoreStack         = AArch64.Instr.noAllocMoreStack
        ,ncgExpandTop              = id
        ,ncgMakeFarBranches        = const id
        ,extractUnwindPoints       = const []


=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -575,7 +575,7 @@ getRegister' config plat expr
         MO_S_Neg w -> return $ Any (intFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg))
         MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg))
 
-        MO_SF_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg))  -- (Signed ConVerT Float)
+        MO_SF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg))  -- (Signed ConVerT Float)
         MO_FS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed)
 
         -- XXX this is very hacky
@@ -905,7 +905,7 @@ assignReg_FltCode = assignReg_IntCode
 -- Jumps
 genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
 genJump (CmmLit (CmmLabel lbl)) regs
-  = return $ unitOL (B (TLabel lbl))
+  = return $ unitOL (J (TLabel lbl))
   -- = return (toOL [ PUSH_STACK_FRAME
   --               , DELTA (-16)
   --               , B (TLabel lbl)
@@ -913,7 +913,7 @@ genJump (CmmLit (CmmLabel lbl)) regs
   --               , DELTA 0] )
 genJump expr regs = do
     (target, _format, code) <- getSomeReg expr
-    return (code `appOL` unitOL (ANN (text $ show expr) (B (TReg target)))
+    return (code `appOL` unitOL (ANN (text $ show expr) (J (TReg target)))
                         --  toOL [ PUSH_STACK_FRAME
                         --       , DELTA (-16)
                         --       , B (TReg target)
@@ -1204,6 +1204,11 @@ genCCall target dest_regs arg_regs bid = do
 
         -- Memory copy/set/move/cmp, with alignment for optimization
 
+        -- XXX Optimize and use e.g. quad registers to move memory around instead
+        -- of offloading this to memcpy. For small memcpys we can utilize
+        -- the 128bit quad registers in NEON to move block of bytes around.
+        -- Might also make sense of small memsets? Use xzr? What's the function
+        -- call overhead?
         MO_Memcpy  _align   -> mkCCall "memcpy"
         MO_Memset  _align   -> mkCCall "memset"
         MO_Memmove _align   -> mkCCall "memmove"


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -127,6 +127,7 @@ aarch64_regUsageOfInstr platform instr = case instr of
   ROR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   TST src1 src2            -> usage (regOp src1 ++ regOp src2, [])
   -- 4. Branch Instructions ----------------------------------------------------
+  J t                      -> usage (regTarget t, [])
   B t                      -> usage (regTarget t, [])
   BCOND _ t                -> usage (regTarget t, [])
   BL t                     -> usage (regTarget t, callerSavedRegisters)
@@ -252,6 +253,7 @@ aarch64_patchRegsOfInstr instr env = case instr of
     TST o1 o2      -> TST  (patchOp o1) (patchOp o2)
 
     -- 4. Branch Instructions --------------------------------------------------
+    J t            -> J (patchTarget t)
     B t            -> B (patchTarget t)
     BL t           -> BL (patchTarget t)
     BCOND c t      -> BCOND c (patchTarget t)
@@ -298,6 +300,7 @@ aarch64_isJumpishInstr instr = case instr of
     ANN _ i -> aarch64_isJumpishInstr i
     CBZ{} -> True
     CBNZ{} -> True
+    J{} -> True
     B{} -> True
     BL{} -> True
     BCOND{} -> True
@@ -310,6 +313,7 @@ aarch64_jumpDestsOfInstr :: Instr -> [BlockId]
 aarch64_jumpDestsOfInstr (ANN _ i) = aarch64_jumpDestsOfInstr i
 aarch64_jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
+aarch64_jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (BL t) = [ id | TBlock id <- [t]]
 aarch64_jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]]
@@ -324,10 +328,11 @@ aarch64_patchJumpInstr instr patchF
         ANN d i -> ANN d (aarch64_patchJumpInstr i patchF)
         CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid))
         CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid))
+        J (TBlock bid) -> J (TBlock (patchF bid))
         B (TBlock bid) -> B (TBlock (patchF bid))
         BL (TBlock bid) -> BL (TBlock (patchF bid))
         BCOND c (TBlock bid) -> BCOND c (TBlock (patchF bid))
-        _ -> pprPanic "patchJumpInstr" (text "<instr>")
+        _ -> pprPanic "patchJumpInstr" (text $ show instr)
 
 -- -----------------------------------------------------------------------------
 
@@ -337,10 +342,28 @@ aarch64_mkSpillInstr
    -> Reg       -- register to spill
    -> Int       -- current stack delta
    -> Int       -- spill slot to use
-   -> Instr
+   -> (Int, [Instr])
+-- XXX this is stupid. We essentially do sp <- sp - 4095; str xN, [sp - ...] ; sp <- sp + 4095
+aarch64_mkSpillInstr config reg delta slot | (spillSlotToOffset config slot) - delta > 4095
+    = let (d, isns) = aarch64_mkSpillInstr config reg (delta + 4095) slot
+      in (d, SUB sp sp (OpImm (ImmInt 4095)) : isns ++ [ADD sp sp (OpImm (ImmInt 4095))])
+
+aarch64_mkSpillInstr config reg delta slot | (spillSlotToOffset config slot) - delta > 255
+    = let (d, isns) = aarch64_mkSpillInstr config reg (delta + delta') slot
+      in (d, SUB sp sp (OpImm (ImmInt delta')) : isns ++ [ADD sp sp (OpImm (ImmInt delta'))])
+      where delta' = (spillSlotToOffset config slot) - delta
+
+aarch64_mkSpillInstr config reg delta slot | (spillSlotToOffset config slot) - delta < -4095
+    = let (d, isns) = aarch64_mkSpillInstr config reg (delta - 4095) slot
+      in (d, ADD sp sp (OpImm (ImmInt 4095)) : isns ++ [SUB sp sp (OpImm (ImmInt 4095))])
+
+aarch64_mkSpillInstr config reg delta slot | (spillSlotToOffset config slot) - delta < -256
+    = let (d, isns) = aarch64_mkSpillInstr config reg (delta + delta') slot
+      in (d, SUB sp sp (OpImm (ImmInt delta')) : isns ++ [ADD sp sp (OpImm (ImmInt delta'))])
+      where delta' = (spillSlotToOffset config slot) - delta
 
 aarch64_mkSpillInstr config reg delta slot
-    = ANN (text "Spill") $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt $ off - delta)))
+    = (delta, [ANN (text "Spill") $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt $ off - delta)))])
     where
         fmt = case reg of
             RegReal (RealRegSingle n) | n < 32 -> II64
@@ -352,10 +375,29 @@ aarch64_mkLoadInstr
    -> Reg       -- register to load
    -> Int       -- current stack delta
    -> Int       -- spill slot to use
-   -> Instr
+   -> (Int, [Instr])
+-- XXX this is stupid. We essentially do sp <- sp - 4095; str xN, [sp - ...] ; sp <- sp + 4095
+aarch64_mkLoadInstr config reg delta slot | (spillSlotToOffset config slot) - delta > 4095
+    = let (d, isns) = aarch64_mkLoadInstr config reg (delta + 4095) slot
+      in (d, SUB sp sp (OpImm (ImmInt 4095)) : isns ++ [ADD sp sp (OpImm (ImmInt 4095))])
+
+aarch64_mkLoadInstr config reg delta slot | (spillSlotToOffset config slot) - delta > 255
+    = let (d, isns) = aarch64_mkLoadInstr config reg (delta + delta') slot
+      in (d, SUB sp sp (OpImm (ImmInt delta')) : isns ++ [ADD sp sp (OpImm (ImmInt delta'))])
+      where delta' = (spillSlotToOffset config slot) - delta
+
+aarch64_mkLoadInstr config reg delta slot | (spillSlotToOffset config slot) - delta < -4095
+    = let (d, isns) = aarch64_mkLoadInstr config reg (delta - 4096) slot
+      in (d, ADD sp sp (OpImm (ImmInt 4095)) : isns ++ [SUB sp sp (OpImm (ImmInt 4095))])
+
+aarch64_mkLoadInstr config reg delta slot | (spillSlotToOffset config slot) - delta < -256
+    = let (d, isns) = aarch64_mkLoadInstr config reg (delta + delta') slot
+      in (d, SUB sp sp (OpImm (ImmInt delta')) : isns ++ [ADD sp sp (OpImm (ImmInt delta'))])
+      where delta' = (spillSlotToOffset config slot) - delta
+
 
 aarch64_mkLoadInstr config reg delta slot
-    = ANN (text "Reload") $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt $ off - delta)))
+    = (delta, [ANN (text "Reload") $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt $ off - delta)))])
     where
         fmt = case reg of
             RegReal (RealRegSingle n) | n < 32 -> II64
@@ -418,11 +460,66 @@ aarch64_mkJumpInstr :: BlockId -> [Instr]
 aarch64_mkJumpInstr id = [B (TBlock id)]
 
 aarch64_mkStackAllocInstr :: Platform -> Int -> [Instr]
-aarch64_mkStackAllocInstr platform amount = pprPanic "mkStackAllocInstr" (ppr amount)
+aarch64_mkStackAllocInstr platform n
+    | n == 0 = []
+    | n > 0 && n < 4096 = [ SUB sp sp (OpImm (ImmInt n)) ]
+    | n > 0 = SUB sp sp (OpImm (ImmInt 4095)) : aarch64_mkStackAllocInstr platform (n - 4095)
+aarch64_mkStackAllocInstr platform n = pprPanic "aarch64_mkStackAllocInstr" (int n)
 
 aarch64_mkStackDeallocInstr :: Platform -> Int -> [Instr]
-aarch64_mkStackDeallocInstr platform amount = pprPanic "mkStackDeallocInstr" (ppr amount)
+aarch64_mkStackDeallocInstr platform n
+    | n == 0 = []
+    | n > 0 && n < 4096 = [ ADD sp sp (OpImm (ImmInt n)) ]
+    | n > 0 = ADD sp sp (OpImm (ImmInt 4095)) : aarch64_mkStackAllocInstr platform (n - 4095)
+aarch64_mkStackDeallocInstr platform n = pprPanic "aarch64_mkStackAllocInstr" (int n)
 
+--
+-- See note [extra spill slots] in X86/Instr.hs
+--
+allocMoreStack
+  :: Platform
+  -> Int
+  -> NatCmmDecl statics GHC.CmmToAsm.AArch64.Instr.Instr
+  -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.AArch64.Instr.Instr, [(BlockId,BlockId)])
+
+allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
+allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
+    let entries = entryBlocks proc
+
+    uniqs <- replicateM (length entries) getUniqueM
+
+    let
+      delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
+        where x = slots * spillSlotSize -- sp delta
+
+      alloc   = mkStackAllocInstr   platform delta
+      dealloc = mkStackDeallocInstr platform delta
+
+      retargetList = (zip entries (map mkBlockId uniqs))
+
+      new_blockmap :: LabelMap BlockId
+      new_blockmap = mapFromList retargetList
+
+      insert_stack_insn (BasicBlock id insns)
+        | Just new_blockid <- mapLookup id new_blockmap
+        = [ BasicBlock id $ alloc ++ [ B (TBlock new_blockid) ]
+          , BasicBlock new_blockid block' ]
+        | otherwise
+        = [ BasicBlock id block' ]
+        where
+          block' = foldr insert_dealloc [] insns
+
+      insert_dealloc insn r = case insn of
+        J _ -> dealloc ++ (insn : r)
+        _other | aarch64_jumpDestsOfInstr insn /= []
+            -> aarch64_patchJumpInstr insn retarget : r
+        _other -> insn : r
+
+        where retarget b = fromMaybe b (mapLookup b new_blockmap)
+
+      new_code = concatMap insert_stack_insn code
+    -- in
+    return (CmmProc info lbl live (ListGraph new_code), retargetList)
 -- -----------------------------------------------------------------------------
 -- Machine's assembly language
 
@@ -541,6 +638,7 @@ data Instr
     | CBZ Operand Target  -- if op == 0, then branch.
     | CBNZ Operand Target -- if op /= 0, then branch.
     -- Branching.
+    | J Target            -- like B, but only generated from genJump. Used to distinguish genJumps from others.
     | B Target            -- unconditional branching b/br. (To a blockid, label or register)
     | BL Target           -- branch and link (e.g. set x30 to next pc, and branch)
     | BCOND Cond Target   -- branch with condition. b.<cond>
@@ -571,11 +669,11 @@ data Target
 data ExtMode
     = EUXTB | EUXTH | EUXTW | EUXTX
     | ESXTB | ESXTH | ESXTW | ESXTX
-    deriving Show
+    deriving (Eq, Show)
 
 data ShiftMode
     = SLSL | SLSR | SASR | SROR
-    deriving Show
+    deriving (Eq, Show)
 
 
 -- We can also add ExtShift to Extension.
@@ -591,15 +689,16 @@ data Operand
         | OpImm Imm            -- immediate value
         | OpImmShift Imm ShiftMode RegShift
         | OpAddr AddrMode       -- memory reference
-        deriving Show
+        deriving (Eq, Show)
 
 -- Smart constructors
 opReg :: Width -> Reg -> Operand
 opReg = OpReg
 
-xzr, wzr :: Operand
+xzr, wzr, sp :: Operand
 xzr = OpReg W64 (RegReal (RealRegSingle (-1)))
 wzr = OpReg W32 (RegReal (RealRegSingle (-1)))
+sp  = OpReg W64 (RegReal (RealRegSingle 31))
 
 _x :: Int -> Operand
 _x i = OpReg W64 (RegReal (RealRegSingle i))


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -2,6 +2,8 @@ module GHC.CmmToAsm.AArch64.Ppr (pprNatCmmDecl) where
 
 import GHC.Prelude hiding (EQ)
 
+import Data.List (findIndex, all)
+
 import GHC.CmmToAsm.AArch64.Instr
 import GHC.CmmToAsm.AArch64.Regs
 import GHC.CmmToAsm.AArch64.Cond
@@ -44,7 +46,8 @@ pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
     Nothing ->
         -- special case for code without info table:
         pprSectionAlign config (Section Text lbl) $$
-        pprProcAlignment config $$
+        -- do not
+        -- pprProcAlignment config $$
         pprLabel platform lbl $$ -- blocks guaranteed not null, so label needed
         vcat (map (pprBasicBlock config top_info) blocks) $$
         (if ncgDebugLevel config > 0
@@ -53,7 +56,7 @@ pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
 
     Just (CmmStaticsRaw info_lbl _) ->
       pprSectionAlign config (Section Text info_lbl) $$
-      pprProcAlignment config $$
+      -- pprProcAlignment config $$
       (if platformHasSubsectionsViaSymbols platform
           then ppr (mkDeadStripPreventer info_lbl) <> char ':'
           else empty) $$
@@ -79,37 +82,31 @@ pprLabel platform lbl =
 
 pprAlign :: Platform -> Alignment -> SDoc
 pprAlign platform alignment
-        = text ".align " <> int (alignmentOn platform)
-  where
-        bytes = alignmentBytes alignment
-        alignmentOn platform = if platformOS platform == OSDarwin
-                               then log2 bytes
-                               else      bytes
-
-        log2 :: Int -> Int  -- cache the common ones
-        log2 1 = 0
-        log2 2 = 1
-        log2 4 = 2
-        log2 8 = 3
-        log2 n = 1 + log2 (n `quot` 2)
+        = text "\t.balign " <> int (alignmentBytes alignment)
 
 -- | Print appropriate alignment for the given section type.
 pprAlignForSection :: Platform -> SectionType -> SDoc
 pprAlignForSection _platform _seg
     -- .balign is stable, whereas .align is platform dependent.
-    = text ".balign 8" --  always 8
+    = text "\t.balign 8" --  always 8
 
 instance Outputable Instr where
     ppr instr = sdocWithDynFlags $ \dflags ->
                        pprInstr (targetPlatform dflags) instr
 
 -- | Print section header and appropriate alignment for that section.
+--
+-- This one will emit the header:
+--
+--     .section .text
+--     .balign 8
+--
 pprSectionAlign :: NCGConfig -> Section -> SDoc
 pprSectionAlign _config (Section (OtherSection _) _) =
      panic "AArch64.Ppr.pprSectionAlign: unknown section"
 pprSectionAlign config sec@(Section seg _) =
-    pprSectionHeader config sec $$
-    pprAlignForSection (ncgPlatform config) seg
+    pprSectionHeader config sec
+    $$ pprAlignForSection (ncgPlatform config) seg
 
 -- | Output the ELF .size directive.
 pprSizeDecl :: Platform -> CLabel -> SDoc
@@ -123,18 +120,50 @@ pprBasicBlock :: NCGConfig -> LabelMap RawCmmStatics -> NatBasicBlock Instr
 pprBasicBlock config info_env (BasicBlock blockid instrs)
   = maybe_infotable $
     pprLabel platform asmLbl $$
-    vcat (map (pprInstr platform) instrs) $$
+    vcat (map (pprInstr platform) (detectTrivialDeadlock optInstrs)) $$
     (if  ncgDebugLevel config > 0
       then ppr (mkAsmTempEndLabel asmLbl) <> char ':'
       else empty
     )
   where
+    -- Filter out identity moves. E.g. mov x18, x18 will be dropped.
+    optInstrs = filter f instrs
+      where f (MOV o1 o2) | o1 == o2 = False
+            f _ = True
+
+    -- XXX: put deadlock detection behind a flag. This will need to pass over
+    -- each emitted instruction and can thus cause a slowdown in the number of
+    -- instructions we generate.
+    --
+    -- detect the trivial cases where we would need -fno-omit-yields
+    -- those are deadlocks where we have only an unconditional branch
+    -- instruction back to the block head, with no escape inbetween.
+    -- See https://gitlab.haskell.org/ghc/ghc/-/issues/367
+    -- This only intends to catch the very trivial case, not the more
+    -- compilicated cases.
+    detectTrivialDeadlock :: [Instr] -> [Instr]
+    detectTrivialDeadlock instrs = case (findIndex isSelfBranch instrs) of
+      Just n | all (not . aarch64_isJumpishInstr) (take n instrs) ->
+        pprPanic "AArch64 NCG"
+                $  text "Deadlock detected! Re compile with -fno-omit-yields."
+                $$ text ""
+                $$ pprLabel platform asmLbl
+                $$ vcat (map (pprInstr platform) (take (n + 1) instrs))
+                $$ text ""
+                $$ text "See https://gitlab.haskell.org/ghc/ghc/-/issues/367"
+      -- Nothing, or there are jumpishInstructions before the self branch,
+      -- probably not a deadlock.
+      _ -> instrs
+
+      where isSelfBranch (B (TBlock blockid')) = blockid' == blockid
+            isSelfBranch _ = False
+
     asmLbl = blockLbl blockid
     platform = ncgPlatform config
     maybe_infotable c = case mapLookup blockid info_env of
        Nothing   -> c
        Just (CmmStaticsRaw info_lbl info) ->
-           pprAlignForSection platform Text $$
+          --  pprAlignForSection platform Text $$
            infoTableLoc $$
            vcat (map (pprData config) info) $$
            pprLabel platform info_lbl $$
@@ -180,7 +209,7 @@ pprData config (CmmStaticLit lit) = pprDataItem config lit
 pprGloblDecl :: CLabel -> SDoc
 pprGloblDecl lbl
   | not (externallyVisibleCLabel lbl) = empty
-  | otherwise = text ".globl " <> ppr lbl
+  | otherwise = text "\t.globl " <> ppr lbl
 
 -- See discussion in X86.Ppr
 -- for why this is necessary.  Essentially we need to ensure that we never
@@ -419,6 +448,7 @@ pprInstr platform instr = case instr of
   TST o1 o2     -> text "\ttst" <+> pprOp o1 <> comma <+> pprOp o2
 
   -- 4. Branch Instructions ----------------------------------------------------
+  J t            -> pprInstr platform (B t)
   B (TBlock bid) -> text "\tb" <+> ppr (mkLocalBlockLabel (getUnique bid))
   B (TLabel lbl) -> text "\tb" <+> ppr lbl
   B (TReg r)     -> text "\tbr" <+> pprReg W64 r
@@ -441,7 +471,7 @@ pprInstr platform instr = case instr of
 
   CBNZ o (TBlock bid) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
   CBNZ o (TLabel lbl) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr lbl
-  CBNZ c (TReg r)     -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+  CBNZ c (TReg r)     -> panic "AArch64.ppr: No conditional (cbnz) branching to registers!"
 
   -- 7. Load and Store Instructions --------------------------------------------
   -- NOTE: GHC may do whacky things where it only load the lower part of an
@@ -453,14 +483,22 @@ pprInstr platform instr = case instr of
     text "\tstrh" <+> pprOp o1 <> comma <+> pprOp o2
   STR f o1 o2 -> text "\tstr" <+> pprOp o1 <> comma <+> pprOp o2
 
+  -- LDR f o1 (OpImm (ImmIndex lbl off)) ->
+  --   text "\tadrp" <+> pprOp o1 <> comma <+> ppr lbl $$
+  --   text "\tadd" <+> pprOp o1 <> comma <+> pprOp o1 <> comma <+> text ":lo12:" <> ppr lbl $$
+  --   text "\tadd" <+> pprOp o1 <> comma <+> pprOp o1 <> comma <+> char '#' <> int off -- XXX: check that off is in 12bits.
+  -- always GOT loads
   LDR f o1 (OpImm (ImmIndex lbl off)) ->
-    text "\tadrp " <+> pprOp o1 <> comma <+> ppr lbl $$
-    text "\tadd" <+> pprOp o1 <> comma <+> pprOp o1 <> comma <+> text ":lo12:" <> ppr lbl $$
+    text "\tadrp" <+> pprOp o1 <> comma <+> text ":got:" <> ppr lbl $$
+    text "\tldr" <+> pprOp o1 <> comma <+> text "[" <> pprOp o1 <> comma <+> text":got_lo12:" <> ppr lbl <> text "]" $$
     text "\tadd" <+> pprOp o1 <> comma <+> pprOp o1 <> comma <+> char '#' <> int off -- XXX: check that off is in 12bits.
 
+  -- LDR f o1 (OpImm (ImmCLbl lbl)) ->
+  --   text "\tadrp" <+> pprOp o1 <> comma <+> ppr lbl $$
+  --   text "\tadd" <+> pprOp o1 <> comma <+> pprOp o1 <> comma <+> text ":lo12:" <> ppr lbl
   LDR f o1 (OpImm (ImmCLbl lbl)) ->
-    text "\tadrp" <+> pprOp o1 <> comma <+> ppr lbl $$
-    text "\tadd" <+> pprOp o1 <> comma <+> pprOp o1 <> comma <+> text ":lo12:" <> ppr lbl
+    text "\tadrp" <+> pprOp o1 <> comma <+> text ":got:" <> ppr lbl $$
+    text "\tldr" <+> pprOp o1 <> comma <+> text "[" <> pprOp o1 <> comma <+> text":got_lo12:" <> ppr lbl <> text "]"
 
   LDR f o1@(OpReg W8 (RegReal (RealRegSingle i))) o2 | i < 32 ->
     text "\tldrsb" <+> pprOp o1 <> comma <+> pprOp o2


=====================================
compiler/GHC/CmmToAsm/AArch64/Regs.hs
=====================================
@@ -46,8 +46,9 @@ allFpArgRegs = map regSingle [32..39]
 -- 22-27: R1-R6
 -- 28: SpLim
 
-sp :: Reg
-sp = regSingle 20
+-- This is the STG Sp reg.
+-- sp :: Reg
+-- sp = regSingle 20
 
 -- addressing modes ------------------------------------------------------------
 
@@ -55,7 +56,7 @@ data AddrMode
         = AddrRegReg    Reg Reg
         | AddrRegImm    Reg Imm
         | AddrReg       Reg
-        deriving Show
+        deriving (Eq, Show)
 
 -- -----------------------------------------------------------------------------
 -- Immediates
@@ -70,11 +71,14 @@ data Imm
   | ImmDouble   Rational
   | ImmConstantSum Imm Imm
   | ImmConstantDiff Imm Imm
-  deriving Show
+  deriving (Eq, Show)
 
 instance Show SDoc where
   show = showSDocUnsafe
 
+instance Eq SDoc where
+  lhs == rhs = show lhs == show rhs
+
 strImmLit :: String -> Imm
 strImmLit s = ImmLit (text s)
 


=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -141,7 +141,7 @@ class   Instruction instr where
                 -> Reg          -- ^ the reg to spill
                 -> Int          -- ^ the current stack delta
                 -> Int          -- ^ spill slot to use
-                -> instr
+                -> (Int, [instr]) -- ^ (new stack delta, instruction)
 
 
         -- | An instruction to reload a register from a spill slot.
@@ -150,7 +150,7 @@ class   Instruction instr where
                 -> Reg          -- ^ the reg to reload.
                 -> Int          -- ^ the current stack delta
                 -> Int          -- ^ the spill slot to use
-                -> instr
+                -> (Int, [instr]) -- ^ (new stack delta, instruction)
 
         -- | See if this instruction is telling us the current C stack delta
         takeDeltaInstr


=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -541,7 +541,7 @@ ppc_mkSpillInstr
    -> Reg       -- register to spill
    -> Int       -- current stack delta
    -> Int       -- spill slot to use
-   -> Instr
+   -> (Int, [Instr])
 
 ppc_mkSpillInstr config reg delta slot
   = let platform = ncgPlatform config
@@ -558,7 +558,7 @@ ppc_mkSpillInstr config reg delta slot
                 Just _  -> ST
                 Nothing -> STFAR -- pseudo instruction: 32 bit offsets
 
-    in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
+    in (delta, [instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))])
 
 
 ppc_mkLoadInstr
@@ -566,7 +566,7 @@ ppc_mkLoadInstr
    -> Reg       -- register to load
    -> Int       -- current stack delta
    -> Int       -- spill slot to use
-   -> Instr
+   -> (Int, [Instr])
 
 ppc_mkLoadInstr config reg delta slot
   = let platform = ncgPlatform config
@@ -583,7 +583,7 @@ ppc_mkLoadInstr config reg delta slot
                 Just _  -> LD
                 Nothing -> LDFAR -- pseudo instruction: 32 bit offsets
 
-    in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
+    in (delta, [instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))])
 
 
 -- | The size of a minimal stackframe header including minimal


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -701,7 +701,7 @@ saveClobberedTemps clobbered dying
 
                   let new_assign  = addToUFM assig temp (InBoth reg slot)
 
-                  clobber new_assign (spill : instrs) rest
+                  clobber new_assign (spill ++ instrs) rest
 
 
 
@@ -921,8 +921,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
                         = do
                                 (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out
                                 let spill_store  = (if reading then id else reverse)
-                                                        [ -- COMMENT (fsLit "spill alloc")
-                                                           spill_insn ]
+                                                        -- COMMENT (fsLit "spill alloc"):
+                                                           spill_insn
 
                                 -- record that this temp was spilled
                                 recordSpill (SpillAlloc temp_to_push_out)
@@ -972,7 +972,7 @@ loadTemp vreg (ReadMem slot) hreg spills
  = do
         insn <- loadR (RegReal hreg) slot
         recordSpill (SpillLoad $ getUnique vreg)
-        return  $  {- COMMENT (fsLit "spill load") : -} insn : spills
+        return  $  {- COMMENT (fsLit "spill load") : -} insn ++ spills
 
 loadTemp _ _ _ spills =
    return spills


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
=====================================
@@ -118,8 +118,8 @@ def CC_AArch64_GHC : CallingConv<[
 
 getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
 getFreeRegs cls (FreeRegs g f)
-  | RcFloat   <- cls = [] -- go 32 f 3]
-  | RcDouble  <- cls = go 32 f 32
+  | RcFloat   <- cls = [] -- go 32 f 31
+  | RcDouble  <- cls = go 32 f 31
   | RcInteger <- cls = go  0 g 18
     where
         go off _ i | i < 0 = []


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -28,6 +28,8 @@ import GHC.Types.Unique
 import GHC.Types.Unique.FM
 import GHC.Types.Unique.Set
 
+import GHC.Utils.Monad (concatMapM)
+
 -- | For a jump instruction at the end of a block, generate fixup code so its
 --      vregs are in the correct regs for its destination.
 --
@@ -304,7 +306,7 @@ handleComponent
 --      go via a spill slot.
 --
 handleComponent delta _  (AcyclicSCC (DigraphNode vreg src dsts))
-        = mapM (makeMove delta vreg src) dsts
+        = concatMapM (makeMove delta vreg src) dsts
 
 
 -- Handle some cyclic moves.
@@ -338,7 +340,7 @@ handleComponent delta instr
 
         -- make sure to do all the reloads after all the spills,
         --      so we don't end up clobbering the source values.
-        return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad])
+        return (instrSpill ++ concat remainingFixUps ++ instrLoad)
 
 handleComponent _ _ (CyclicSCC _)
  = panic "Register Allocator: handleComponent cyclic"
@@ -352,22 +354,28 @@ makeMove
     -> Unique   -- ^ unique of the vreg that we're moving.
     -> Loc      -- ^ source location.
     -> Loc      -- ^ destination location.
-    -> RegM freeRegs instr  -- ^ move instruction.
+    -> RegM freeRegs [instr]  -- ^ move instruction.
 
-makeMove delta vreg src dst
+makeMove _ vreg src dst
  = do config <- getConfig
       let platform = ncgPlatform config
 
       case (src, dst) of
           (InReg s, InReg d) ->
               do recordSpill (SpillJoinRR vreg)
-                 return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d)
+                 return $ [mkRegRegMoveInstr platform (RegReal s) (RegReal d)]
           (InMem s, InReg d) ->
               do recordSpill (SpillJoinRM vreg)
-                 return $ mkLoadInstr config (RegReal d) delta s
+                 delta <- getDeltaR
+                 let (new_delta, instrs) = mkLoadInstr config (RegReal d) delta s
+                 setDeltaR new_delta
+                 return instrs
           (InReg s, InMem d) ->
               do recordSpill (SpillJoinRM vreg)
-                 return $ mkSpillInstr config (RegReal s) delta d
+                 delta <- getDeltaR
+                 let (new_delta, instrs) = mkSpillInstr config (RegReal s) delta d
+                 setDeltaR new_delta
+                 return instrs
           _ ->
               -- we don't handle memory to memory moves.
               -- they shouldn't happen because we don't share
@@ -375,4 +383,3 @@ makeMove delta vreg src dst
               panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") ("
                   ++ show dst ++ ")"
                   ++ " we don't handle mem->mem moves.")
-


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/State.hs
=====================================
@@ -127,20 +127,22 @@ makeRAStats state
 
 
 spillR :: Instruction instr
-       => Reg -> Unique -> RegM freeRegs (instr, Int)
+       => Reg -> Unique -> RegM freeRegs ([instr], Int)
 
 spillR reg temp = RegM $ \s ->
   let (stack1,slot) = getStackSlotFor (ra_stack s) temp
-      instr  = mkSpillInstr (ra_config s) reg (ra_delta s) slot
+      (new_delta, instrs)  = mkSpillInstr (ra_config s) reg (ra_delta s) slot
   in
-  RA_Result s{ra_stack=stack1} (instr,slot)
+  RA_Result s{ra_stack=stack1, ra_delta=new_delta} (instrs,slot)
 
 
 loadR :: Instruction instr
-      => Reg -> Int -> RegM freeRegs instr
+      => Reg -> Int -> RegM freeRegs [instr]
 
 loadR reg slot = RegM $ \s ->
-  RA_Result s (mkLoadInstr (ra_config s) reg (ra_delta s) slot)
+  let (new_delta, instrs) = mkLoadInstr (ra_config s) reg (ra_delta s) slot
+  in
+  RA_Result s{ra_delta=new_delta} instrs
 
 getFreeRegsR :: RegM freeRegs freeRegs
 getFreeRegsR = RegM $ \ s at RA_State{ra_freeregs = freeregs} ->


=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -529,11 +529,15 @@ stripLiveBlock config (BasicBlock i lis)
 
         spillNat acc (LiveInstr (SPILL reg slot) _ : instrs)
          = do   delta   <- get
-                spillNat (mkSpillInstr config reg delta slot : acc) instrs
+                let (new_delta, instrs') = mkSpillInstr config reg delta slot
+                put new_delta
+                spillNat (instrs' ++ acc) instrs
 
         spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs)
          = do   delta   <- get
-                spillNat (mkLoadInstr config reg delta slot : acc) instrs
+                let (new_delta, instrs') = mkLoadInstr config reg delta slot
+                put new_delta
+                spillNat (instrs' ++ acc) instrs
 
         spillNat acc (LiveInstr (Instr instr) _ : instrs)
          | Just i <- takeDeltaInstr instr


=====================================
compiler/GHC/CmmToAsm/SPARC/Instr.hs
=====================================
@@ -373,9 +373,9 @@ sparc_mkSpillInstr
     -> Reg      -- ^ register to spill
     -> Int      -- ^ current stack delta
     -> Int      -- ^ spill slot to use
-    -> Instr
+    -> (Int, [Instr])
 
-sparc_mkSpillInstr config reg _ slot
+sparc_mkSpillInstr config reg delta slot
  = let  platform = ncgPlatform config
         off      = spillSlotToOffset config slot
         off_w    = 1 + (off `div` 4)
@@ -384,7 +384,7 @@ sparc_mkSpillInstr config reg _ slot
                         RcFloat   -> FF32
                         RcDouble  -> FF64
 
-    in ST fmt reg (fpRel (negate off_w))
+    in (delta, [ST fmt reg (fpRel (negate off_w))])
 
 
 -- | Make a spill reload instruction.
@@ -393,9 +393,9 @@ sparc_mkLoadInstr
     -> Reg      -- ^ register to load into
     -> Int      -- ^ current stack delta
     -> Int      -- ^ spill slot to use
-    -> Instr
+    -> (Int, [Instr])
 
-sparc_mkLoadInstr config reg _ slot
+sparc_mkLoadInstr config reg delta slot
   = let platform = ncgPlatform config
         off      = spillSlotToOffset config slot
         off_w    = 1 + (off `div` 4)
@@ -404,7 +404,7 @@ sparc_mkLoadInstr config reg _ slot
                         RcFloat   -> FF32
                         RcDouble  -> FF64
 
-        in LD fmt (fpRel (- off_w)) reg
+        in (delta, [LD fmt (fpRel (- off_w)) reg])
 
 
 --------------------------------------------------------------------------------


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -668,15 +668,15 @@ x86_mkSpillInstr
     -> Reg      -- register to spill
     -> Int      -- current stack delta
     -> Int      -- spill slot to use
-    -> Instr
+    -> (Int, [Instr])
 
 x86_mkSpillInstr config reg delta slot
   = let off     = spillSlotToOffset platform slot - delta
     in
     case targetClassOfReg platform reg of
-           RcInteger   -> MOV (archWordFormat is32Bit)
-                              (OpReg reg) (OpAddr (spRel platform off))
-           RcDouble    -> MOV FF64 (OpReg reg) (OpAddr (spRel platform off))
+           RcInteger   -> (delta, [MOV (archWordFormat is32Bit)
+                                   (OpReg reg) (OpAddr (spRel platform off))])
+           RcDouble    -> (delta, [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))])
            _         -> panic "X86.mkSpillInstr: no match"
     where platform = ncgPlatform config
           is32Bit = target32Bit platform
@@ -687,15 +687,15 @@ x86_mkLoadInstr
     -> Reg      -- register to load
     -> Int      -- current stack delta
     -> Int      -- spill slot to use
-    -> Instr
+    -> (Int, [Instr])
 
 x86_mkLoadInstr config reg delta slot
   = let off     = spillSlotToOffset platform slot - delta
     in
         case targetClassOfReg platform reg of
-              RcInteger -> MOV (archWordFormat is32Bit)
-                               (OpAddr (spRel platform off)) (OpReg reg)
-              RcDouble  -> MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)
+              RcInteger -> (delta, [MOV (archWordFormat is32Bit)
+                                    (OpAddr (spRel platform off)) (OpReg reg)])
+              RcDouble  -> (delta, [MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
               _           -> panic "X86.x86_mkLoadInstr"
     where platform = ncgPlatform config
           is32Bit = target32Bit platform


=====================================
hadrian/src/Oracles/Flag.hs
=====================================
@@ -80,7 +80,7 @@ targetSupportsSMP = do
 
 ghcWithNativeCodeGen :: Action Bool
 ghcWithNativeCodeGen = do
-    goodArch <- anyTargetArch ["i386", "x86_64", "sparc", "powerpc"]
-    badOs    <- anyTargetOs ["ios", "aix"]
+    goodArch <- anyTargetArch ["i386", "x86_64", "sparc", "powerpc", "aarch64"]
+    badOs    <- anyTargetOs ["aix"]
     ghcUnreg <- flag GhcUnregisterised
     return $ goodArch && not badOs && not ghcUnreg



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fe3aeaff2444881ef30adb9a82870f81a8880e7e...b765dfe4ad8d5b74e007553b684394e136bab5b3

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/fe3aeaff2444881ef30adb9a82870f81a8880e7e...b765dfe4ad8d5b74e007553b684394e136bab5b3
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200720/1514e80e/attachment-0001.html>


More information about the ghc-commits mailing list