[Git][ghc/ghc][wip/angerman/aarch64-ncg] 4 commits: :broom: Cleanup

Moritz Angermann gitlab at gitlab.haskell.org
Sat Sep 26 09:26:42 UTC 2020



Moritz Angermann pushed to branch wip/angerman/aarch64-ncg at Glasgow Haskell Compiler / GHC


Commits:
2f72882d by Moritz Angermann at 2020-09-23T14:17:11+00:00
:broom: Cleanup

- - - - -
0067fa36 by Moritz Angermann at 2020-09-23T14:17:27+00:00
Adds LLVM (AArch64) CI Job

- - - - -
87817368 by Moritz Angermann at 2020-09-23T14:17:27+00:00
Add validate as well.

- - - - -
a378fc67 by Moritz Angermann at 2020-09-25T08:33:30+00:00
Revert "Simplify aarch64 StgRun"

This reverts commit f27472c0483db2382344f4a8f4c1b2a192d98725.

- - - - -


22 changed files:

- .gitlab-ci.yml
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/CmmToAsm.hs
- + compiler/GHC/CmmToAsm/AArch64.hs
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Cond.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- compiler/GHC/CmmToAsm/AArch64/RegInfo.hs
- compiler/GHC/CmmToAsm/AArch64/Regs.hs
- compiler/GHC/CmmToAsm/Instr.hs
- compiler/GHC/CmmToAsm/PIC.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
- compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
- compiler/GHC/CmmToAsm/Reg/Liveness.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/Driver/Backend.hs
- compiler/ghc.cabal.in
- docs/users_guide/expected-undocumented-flags.txt
- hadrian/src/Oracles/Flag.hs
- rts/StgCRun.c


Changes:

=====================================
.gitlab-ci.yml
=====================================
@@ -489,6 +489,20 @@ nightly-aarch64-linux-deb10:
   variables:
     TEST_TYPE: slowtest
 
+.build-aarch64-linux-deb10-llvm:
+  extends: .build-aarch64-linux-deb10
+  stage: full-build
+  variables:
+    BUILD_FLAVOUR: perf-llvm
+  tags:
+    - aarch64-linux
+
+validate-aarch64-linux-deb10-llvm:
+  extends: .build-aarch64-linux-deb10-llvm
+  artifacts:
+    when: always
+    expire_in: 2 week
+
 #################################
 # armv7-linux-deb10
 #################################


=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -131,6 +131,7 @@ import GHC.Utils.Panic
 import GHC.Data.FastString
 import GHC.Driver.Session
 import GHC.Driver.Backend
+import GHC.Driver.Ppr
 import GHC.Platform
 import GHC.Types.Unique.Set
 import GHC.Utils.Misc
@@ -271,7 +272,7 @@ data CLabel
   deriving Eq
 
 instance Show CLabel where
-  show = showSDocUnsafe . ppr
+  show = showPprUnsafe . ppr
 
 isIdLabel :: CLabel -> Bool
 isIdLabel IdLabel{} = True


=====================================
compiler/GHC/CmmToAsm.hs
=====================================
@@ -83,12 +83,7 @@ import GHC.Prelude
 import qualified GHC.CmmToAsm.X86   as X86
 import qualified GHC.CmmToAsm.PPC   as PPC
 import qualified GHC.CmmToAsm.SPARC as SPARC
-
-import qualified GHC.CmmToAsm.AArch64.CodeGen as AArch64.CodeGen
-import qualified GHC.CmmToAsm.AArch64.Regs    as AArch64.Regs
-import qualified GHC.CmmToAsm.AArch64.RegInfo as AArch64.RegInfo
-import qualified GHC.CmmToAsm.AArch64.Instr   as AArch64.Instr
-import qualified GHC.CmmToAsm.AArch64.Ppr     as AArch64.Ppr
+import qualified GHC.CmmToAsm.AArch64 as AArch64
 
 import GHC.CmmToAsm.Reg.Liveness
 import qualified GHC.CmmToAsm.Reg.Linear                as Linear
@@ -168,50 +163,13 @@ nativeCodeGen dflags this_mod modLoc h us cmms
       ArchSPARC64   -> panic "nativeCodeGen: No NCG for SPARC64"
       ArchS390X     -> panic "nativeCodeGen: No NCG for S390X"
       ArchARM {}    -> panic "nativeCodeGen: No NCG for ARM"
-      ArchAArch64   -> nCG' (aarch64NcgImpl config)
+      ArchAArch64   -> nCG' (AArch64.ncgAArch64 config)
       ArchAlpha     -> panic "nativeCodeGen: No NCG for Alpha"
       ArchMipseb    -> panic "nativeCodeGen: No NCG for mipseb"
       ArchMipsel    -> panic "nativeCodeGen: No NCG for mipsel"
       ArchUnknown   -> panic "nativeCodeGen: No NCG for unknown arch"
       ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
 
-aarch64NcgImpl :: NCGConfig -> NcgImpl RawCmmStatics AArch64.Instr.Instr AArch64.RegInfo.JumpDest
-aarch64NcgImpl config
- = NcgImpl {
-        ncgConfig                 = config
-       ,cmmTopCodeGen             = AArch64.CodeGen.cmmTopCodeGen
-       ,generateJumpTableForInstr = AArch64.CodeGen.generateJumpTableForInstr config
-       ,getJumpDestBlockId        = AArch64.RegInfo.getJumpDestBlockId
-       ,canShortcut               = AArch64.RegInfo.canShortcut
-       ,shortcutStatics           = AArch64.RegInfo.shortcutStatics
-       ,shortcutJump              = AArch64.RegInfo.shortcutJump
-       ,pprNatCmmDecl             = AArch64.Ppr.pprNatCmmDecl config
-       ,maxSpillSlots             = AArch64.Instr.maxSpillSlots config
-       ,allocatableRegs           = AArch64.Regs.allocatableRegs platform
-       ,ncgAllocMoreStack         = AArch64.Instr.allocMoreStack platform
-       ,ncgExpandTop              = id
-       ,ncgMakeFarBranches        = const id
-       ,extractUnwindPoints       = const []
-       ,invertCondBranches        = \_ _ -> id
-  }
-    where
-      platform = ncgPlatform config
---
--- Allocating more stack space for spilling is currently only
--- supported for the linear register allocator on x86/x86_64, the rest
--- default to the panic below.  To support allocating extra stack on
--- more platforms provide a definition of ncgAllocMoreStack.
---
-noAllocMoreStack :: Int -> NatCmmDecl statics instr
-                 -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)])
-noAllocMoreStack amount _
-  = panic $   "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
-        ++  "   If you are trying to compile SHA1.hs from the crypto library then this\n"
-        ++  "   is a known limitation in the linear allocator.\n"
-        ++  "\n"
-        ++  "   Try enabling the graph colouring allocator with -fregs-graph instead."
-        ++  "   You can still file a bug report if you like.\n"
-
 -- | Data accumulated during code generation. Mostly about statistics,
 -- but also collects debug data for DWARF generation.
 data NativeGenAcc statics instr


=====================================
compiler/GHC/CmmToAsm/AArch64.hs
=====================================
@@ -0,0 +1,60 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-- | Native code generator for x86 and x86-64 architectures
+module GHC.CmmToAsm.AArch64
+   ( ncgAArch64 )
+where
+
+import GHC.Prelude
+
+import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.Monad
+import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Types
+
+import qualified GHC.CmmToAsm.AArch64.Instr   as AArch64
+import qualified GHC.CmmToAsm.AArch64.Ppr     as AArch64
+import qualified GHC.CmmToAsm.AArch64.CodeGen as AArch64
+import qualified GHC.CmmToAsm.AArch64.Regs    as AArch64
+import qualified GHC.CmmToAsm.AArch64.RegInfo as AArch64
+
+ncgAArch64 :: NCGConfig -> NcgImpl RawCmmStatics AArch64.Instr AArch64.JumpDest
+ncgAArch64 config
+ = NcgImpl {
+        ncgConfig                 = config
+       ,cmmTopCodeGen             = AArch64.cmmTopCodeGen
+       ,generateJumpTableForInstr = AArch64.generateJumpTableForInstr config
+       ,getJumpDestBlockId        = AArch64.getJumpDestBlockId
+       ,canShortcut               = AArch64.canShortcut
+       ,shortcutStatics           = AArch64.shortcutStatics
+       ,shortcutJump              = AArch64.shortcutJump
+       ,pprNatCmmDecl             = AArch64.pprNatCmmDecl config
+       ,maxSpillSlots             = AArch64.maxSpillSlots config
+       ,allocatableRegs           = AArch64.allocatableRegs platform
+       ,ncgAllocMoreStack         = AArch64.allocMoreStack platform
+       ,ncgExpandTop              = id
+       ,ncgMakeFarBranches        = const id
+       ,extractUnwindPoints       = const []
+       ,invertCondBranches        = \_ _ -> id
+  }
+    where
+      platform = ncgPlatform config
+
+-- | Instruction instance for aarch64
+instance Instruction AArch64.Instr where
+        regUsageOfInstr         = AArch64.regUsageOfInstr
+        patchRegsOfInstr        = AArch64.patchRegsOfInstr
+        isJumpishInstr          = AArch64.isJumpishInstr
+        jumpDestsOfInstr        = AArch64.jumpDestsOfInstr
+        patchJumpInstr          = AArch64.patchJumpInstr
+        mkSpillInstr            = AArch64.mkSpillInstr
+        mkLoadInstr             = AArch64.mkLoadInstr
+        takeDeltaInstr          = AArch64.takeDeltaInstr
+        isMetaInstr             = AArch64.isMetaInstr
+        mkRegRegMoveInstr _     = AArch64.mkRegRegMoveInstr
+        takeRegRegMoveInstr     = AArch64.takeRegRegMoveInstr
+        mkJumpInstr             = AArch64.mkJumpInstr
+        mkStackAllocInstr       = AArch64.mkStackAllocInstr
+        mkStackDeallocInstr     = AArch64.mkStackDeallocInstr
+        mkComment               = pure . AArch64.COMMENT
+        pprInstr                = AArch64.pprInstr


=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -18,30 +18,25 @@ import GHC.Prelude hiding (EQ)
 import GHC.Platform.Regs
 import GHC.CmmToAsm.AArch64.Instr
 import GHC.CmmToAsm.AArch64.Regs
-import GHC.CmmToAsm.AArch64.RegInfo
 import GHC.CmmToAsm.AArch64.Cond
-import GHC.CmmToAsm.AArch64.Ppr
 
 import GHC.CmmToAsm.CPrim
 import GHC.Cmm.DebugBlock
 import GHC.CmmToAsm.Monad
-   ( NatM, getNewRegNat, getNewLabelNat
-   , getBlockIdNat, getPicBaseNat, getNewRegPairNat
+   ( NatM, getNewRegNat
    , getPicBaseMaybeNat, getPlatform, getConfig
    , getDebugBlock, getFileId
    )
-import GHC.CmmToAsm.Instr
+-- import GHC.CmmToAsm.Instr
 import GHC.CmmToAsm.PIC
 import GHC.CmmToAsm.Format
 import GHC.CmmToAsm.Config
-import GHC.Platform.Reg.Class
+import GHC.CmmToAsm.Types
 import GHC.Platform.Reg
-import GHC.CmmToAsm.Reg.Target
 import GHC.Platform
 
 -- Our intermediate code:
 import GHC.Cmm.BlockId
-import GHC.Cmm.Ppr           ( pprExpr )
 import GHC.Cmm
 import GHC.Cmm.Utils
 import GHC.Cmm.Switch
@@ -59,15 +54,13 @@ import Control.Monad    ( mapAndUnzipM, when, foldM )
 import Data.Bits
 import Data.Word
 import Data.Maybe
-import Data.Int
 import GHC.Float
 
 import GHC.Types.Basic
 import GHC.Types.ForeignCall
 import GHC.Data.FastString
 import GHC.Utils.Misc
-
-import Debug.Trace
+import GHC.Utils.Panic
 
 -- @cmmTopCodeGen@ will be our main entry point to code gen.  Here we'll get
 -- @RawCmmDecl@; see GHC.Cmm
@@ -113,11 +106,9 @@ cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do
   let blocks = toBlockListEntryFirst graph
   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
   picBaseMb <- getPicBaseMaybeNat
-  platform <- getPlatform
 
   let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
       tops = proc : concat statics
-      os   = platformOS platform
 
   case picBaseMb of
       Just _picBase -> panic "AArch64.cmmTopCodeGen: picBase not implemented"
@@ -332,10 +323,11 @@ getRegisterReg platform (CmmGlobal mid)
         -- platform.  Hence ...
 
 -- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
-jumpTableEntry config Nothing   = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
-jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
-    where blockLabel = blockLbl blockid
+-- XXX: Add JumpTable Logic
+-- jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic
+-- jumpTableEntry config Nothing   = CmmStaticLit (CmmInt 0 (ncgWordWidth config))
+-- jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
+--     where blockLabel = blockLbl blockid
 
 -- -----------------------------------------------------------------------------
 -- Utility
@@ -345,15 +337,15 @@ isIntFormat = not . isFloatFormat
 -- -----------------------------------------------------------------------------
 -- General things for putting together code sequences
 
--- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: Platform -> CmmExpr -> CmmExpr
-mangleIndexTree platform (CmmRegOff reg off)
-  = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
-  where width = typeWidth (cmmRegType platform reg)
+-- -- Expand CmmRegOff.  ToDo: should we do it this way around, or convert
+-- -- CmmExprs into CmmRegOff?
+-- mangleIndexTree :: Platform -> CmmExpr -> CmmExpr
+-- mangleIndexTree platform (CmmRegOff reg off)
+--   = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
+--   where width = typeWidth (cmmRegType platform reg)
 
-mangleIndexTree _ _
-        = panic "AArch64.CodeGen.mangleIndexTree: no match"
+-- mangleIndexTree _ _
+--         = panic "AArch64.CodeGen.mangleIndexTree: no match"
 
 -- | The dual to getAnyReg: compute an expression into a register, but
 --      we don't mind which one it is.
@@ -879,7 +871,6 @@ assignMem_IntCode rep addrE srcE
     (src_reg, _format, code) <- getSomeReg srcE
     platform <- getPlatform
     Amode addr addr_code <- getAmode platform addrE
-    let AddrReg r1 = addr
     return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
             `consOL` (code
             `appOL` addr_code
@@ -889,8 +880,6 @@ assignReg_IntCode _ reg src
   = do
     platform <- getPlatform
     let dst = getRegisterReg platform reg
-        p :: Outputable a => a -> String
-        p = showSDocUnsafe . ppr
     r <- getRegister src
     return $ case r of
       Any _ code              -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst
@@ -980,7 +969,8 @@ genCondJump bid expr = do
           MO_U_Ge w -> bcond w UGE
           MO_U_Lt w -> bcond w ULT
           MO_U_Le w -> bcond w ULE
-      _ -> pprPanic "AArch64.genCondJump: " (ppr expr)
+          _ -> pprPanic "AArch64.genCondJump:case mop: " (text $ show expr)
+      _ -> pprPanic "AArch64.genCondJump: " (text $ show expr)
 
 
 genCondBranch
@@ -1234,8 +1224,6 @@ genCCall target dest_regs arg_regs bid = do
         -- XXX: this should be implemented properly!
         MO_Xchg w           -> mkCCall (xchgLabel w)
 
-        _ -> pprPanic "genCCall:PrimTarget" (ppr target)
-
   where
     unsupported :: Show a => a -> b
     unsupported mop = panic ("outOfLineCmmOp: " ++ show mop


=====================================
compiler/GHC/CmmToAsm/AArch64/Cond.hs
=====================================
@@ -2,8 +2,6 @@ module GHC.CmmToAsm.AArch64.Cond  where
 
 import GHC.Prelude
 
-import GHC.Utils.Panic
-
 -- https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/data-processing-instructions/conditional-instructions
 
 -- XXX: This appears to go a bit overboard? Maybe we should stick with what LLVM


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -1,3 +1,4 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
 {-# language CPP, BangPatterns #-}
 
 module GHC.CmmToAsm.AArch64.Instr
@@ -11,11 +12,13 @@ import GHC.Prelude
 import GHC.CmmToAsm.AArch64.Cond
 import GHC.CmmToAsm.AArch64.Regs
 
-import GHC.CmmToAsm.Instr
+import GHC.CmmToAsm.Instr (RegUsage(..))
 import GHC.CmmToAsm.Format
-import GHC.CmmToAsm.Reg.Target
+import GHC.CmmToAsm.Types
+import GHC.CmmToAsm.Utils
+-- import GHC.CmmToAsm.Reg.Target
 import GHC.CmmToAsm.Config
-import GHC.Platform.Reg.Class
+-- import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
 
 import GHC.Platform.Regs
@@ -23,18 +26,20 @@ import GHC.Cmm.BlockId
 import GHC.Cmm.Dataflow.Collections
 import GHC.Cmm.Dataflow.Label
 import GHC.Cmm
-import GHC.Cmm.Info
-import GHC.Data.FastString
+-- import GHC.Cmm.Info
+-- import GHC.Data.FastString
 import GHC.Cmm.CLabel
 import GHC.Utils.Outputable
 import GHC.Platform
-import GHC.Types.Unique.FM (listToUFM, lookupUFM)
+-- import GHC.Types.Unique.FM (listToUFM, lookupUFM)
 import GHC.Types.Unique.Supply
 
+import GHC.Utils.Panic
+
 import Control.Monad (replicateM)
 import Data.Maybe (fromMaybe)
 
-import Debug.Trace
+-- import Debug.Trace
 import GHC.Stack
 
 import Data.Bits ((.&.), complement)
@@ -65,24 +70,6 @@ spillSlotToOffset :: NCGConfig -> Int -> Int
 spillSlotToOffset config slot
    = stackFrameHeaderSize (ncgPlatform config) + spillSlotSize * slot
 
--- | Instruction instance for powerpc
-instance Instruction Instr where
-        regUsageOfInstr         = aarch64_regUsageOfInstr
-        patchRegsOfInstr        = aarch64_patchRegsOfInstr
-        isJumpishInstr          = aarch64_isJumpishInstr
-        jumpDestsOfInstr        = aarch64_jumpDestsOfInstr
-        patchJumpInstr          = aarch64_patchJumpInstr
-        mkSpillInstr            = aarch64_mkSpillInstr
-        mkLoadInstr             = aarch64_mkLoadInstr
-        takeDeltaInstr          = aarch64_takeDeltaInstr
-        isMetaInstr             = aarch64_isMetaInstr
-        mkRegRegMoveInstr _     = aarch64_mkRegRegMoveInstr
-        takeRegRegMoveInstr     = aarch64_takeRegRegMoveInstr
-        mkJumpInstr             = aarch64_mkJumpInstr
-        mkStackAllocInstr       = aarch64_mkStackAllocInstr
-        mkStackDeallocInstr     = aarch64_mkStackDeallocInstr
-        mkComment               = pure . COMMENT
-
 -- | Get the registers that are being used by this instruction.
 -- regUsage doesn't need to do any trickery for jumps and such.
 -- Just state precisely the regs read and written by that insn.
@@ -94,12 +81,11 @@ instance Instruction Instr where
 instance Outputable RegUsage where
     ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')'
 
-aarch64_regUsageOfInstr :: Platform -> Instr -> RegUsage
-aarch64_regUsageOfInstr platform instr = case instr of
-  ANN _ i                  -> aarch64_regUsageOfInstr platform i
+regUsageOfInstr :: Platform -> Instr -> RegUsage
+regUsageOfInstr platform instr = case instr of
+  ANN _ i                  -> regUsageOfInstr platform i
   -- 1. Arithmetic Instructions ------------------------------------------------
   ADD dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
-  ADDS dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
   CMN l r                  -> usage (regOp l ++ regOp r, [])
   CMP l r                  -> usage (regOp l ++ regOp r, [])
   MSUB dst src1 src2 src3  -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
@@ -115,7 +101,6 @@ aarch64_regUsageOfInstr platform instr = case instr of
 
   -- 3. Logical and Move Instructions ------------------------------------------
   AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
-  ADDS dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
   ASR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   BIC dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   BICS dst src1 src2       -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -183,7 +168,7 @@ aarch64_regUsageOfInstr platform instr = case instr of
         -- Is this register interesting for the register allocator?
         interesting :: Platform -> Reg -> Bool
         interesting _        (RegVirtual _)                 = True
-        interesting platform (RegReal (RealRegSingle (-1))) = False
+        interesting _        (RegReal (RealRegSingle (-1))) = False
         interesting platform (RegReal (RealRegSingle i))    = freeReg platform i
         interesting _        (RegReal (RealRegPair{}))
             = panic "AArch64.Instr.interesting: no reg pairs on this arch"
@@ -218,13 +203,12 @@ callerSavedRegisters
 
 -- | Apply a given mapping to all the register references in this
 -- instruction.
-aarch64_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-aarch64_patchRegsOfInstr instr env = case instr of
+patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
+patchRegsOfInstr instr env = case instr of
     -- 0. Meta Instructions
-    ANN d i        -> ANN d (aarch64_patchRegsOfInstr i env)
+    ANN d i        -> ANN d (patchRegsOfInstr i env)
     -- 1. Arithmetic Instructions ----------------------------------------------
     ADD o1 o2 o3   -> ADD (patchOp o1) (patchOp o2) (patchOp o3)
-    ADDS o1 o2 o3  -> ADDS (patchOp o1) (patchOp o2) (patchOp o3)
     CMN o1 o2      -> CMN (patchOp o1) (patchOp o2)
     CMP o1 o2      -> CMP (patchOp o1) (patchOp o2)
     MSUB o1 o2 o3 o4 -> MSUB (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
@@ -280,6 +264,7 @@ aarch64_patchRegsOfInstr instr env = case instr of
     SCVTF o1 o2    -> SCVTF (patchOp o1) (patchOp o2)
     FCVTZS o1 o2   -> FCVTZS (patchOp o1) (patchOp o2)
 
+    _ -> pprPanic "patchRegsOfInstr" (text $ show instr)
     where
         patchOp :: Operand -> Operand
         patchOp (OpReg w r) = OpReg w (env r)
@@ -298,9 +283,9 @@ aarch64_patchRegsOfInstr instr env = case instr of
 -- | Checks whether this instruction is a jump/branch instruction.
 -- One that can change the flow of control in a way that the
 -- register allocator needs to worry about.
-aarch64_isJumpishInstr :: Instr -> Bool
-aarch64_isJumpishInstr instr = case instr of
-    ANN _ i -> aarch64_isJumpishInstr i
+isJumpishInstr :: Instr -> Bool
+isJumpishInstr instr = case instr of
+    ANN _ i -> isJumpishInstr i
     CBZ{} -> True
     CBNZ{} -> True
     J{} -> True
@@ -312,23 +297,23 @@ aarch64_isJumpishInstr instr = case instr of
 -- | Checks whether this instruction is a jump/branch instruction.
 -- One that can change the flow of control in a way that the
 -- register allocator needs to worry about.
-aarch64_jumpDestsOfInstr :: Instr -> [BlockId]
-aarch64_jumpDestsOfInstr (ANN _ i) = aarch64_jumpDestsOfInstr i
-aarch64_jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
-aarch64_jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
-aarch64_jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
-aarch64_jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
-aarch64_jumpDestsOfInstr (BL t _ _) = [ id | TBlock id <- [t]]
-aarch64_jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]]
-aarch64_jumpDestsOfInstr _ = []
+jumpDestsOfInstr :: Instr -> [BlockId]
+jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i
+jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
+jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
+jumpDestsOfInstr (BL t _ _) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]]
+jumpDestsOfInstr _ = []
 
 -- | Change the destination of this jump instruction.
 -- Used in the linear allocator when adding fixup blocks for join
 -- points.
-aarch64_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
-aarch64_patchJumpInstr instr patchF
+patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
+patchJumpInstr instr patchF
     = case instr of
-        ANN d i -> ANN d (aarch64_patchJumpInstr i patchF)
+        ANN d i -> ANN d (patchJumpInstr i patchF)
         CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid))
         CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid))
         J (TBlock bid) -> J (TBlock (patchF bid))
@@ -354,7 +339,7 @@ aarch64_patchJumpInstr instr patchF
 -- always poitns to the top of the stack, and we can't use it for computation.
 --
 -- | An instruction to spill a register into a spill slot.
-aarch64_mkSpillInstr
+mkSpillInstr
    :: HasCallStack
    => NCGConfig
    -> Reg       -- register to spill
@@ -362,14 +347,14 @@ aarch64_mkSpillInstr
    -> Int       -- spill slot to use
    -> [Instr]
 
-aarch64_mkSpillInstr config reg delta slot =
+mkSpillInstr config reg delta slot =
   case (spillSlotToOffset config slot) - delta of
     imm | -256 <= imm && imm <= 255                               -> [ mkStrSp imm ]
     imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkStrSp imm ]
     imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkIp0SpillAddr (imm .&~. 0xfff)
                                                                      , mkStrIp0 (imm .&.  0xfff)
                                                                      ]
-    imm -> pprPanic "aarch64_mkSpillInstr" (text "Unable to spill into" <+> int imm)
+    imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm)
     where
         a .&~. b = a .&. (complement b)
 
@@ -382,21 +367,21 @@ aarch64_mkSpillInstr config reg delta slot =
 
         off = spillSlotToOffset config slot
 
-aarch64_mkLoadInstr
+mkLoadInstr
    :: NCGConfig
    -> Reg       -- register to load
    -> Int       -- current stack delta
    -> Int       -- spill slot to use
    -> [Instr]
 
-aarch64_mkLoadInstr config reg delta slot =
+mkLoadInstr config reg delta slot =
   case (spillSlotToOffset config slot) - delta of
     imm | -256 <= imm && imm <= 255                               -> [ mkLdrSp imm ]
     imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff           -> [ mkLdrSp imm ]
     imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0    -> [ mkIp0SpillAddr (imm .&~. 0xfff)
                                                                      , mkLdrIp0 (imm .&.  0xfff)
                                                                      ]
-    imm -> pprPanic "aarch64_mkSpillInstr" (text "Unable to spill into" <+> int imm)
+    imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm)
     where
         a .&~. b = a .&. (complement b)
 
@@ -412,16 +397,16 @@ aarch64_mkLoadInstr config reg delta slot =
 
 --------------------------------------------------------------------------------
 -- | See if this instruction is telling us the current C stack delta
-aarch64_takeDeltaInstr :: Instr -> Maybe Int
-aarch64_takeDeltaInstr (ANN _ i) = aarch64_takeDeltaInstr i
-aarch64_takeDeltaInstr (DELTA i) = Just i
-aarch64_takeDeltaInstr _         = Nothing
+takeDeltaInstr :: Instr -> Maybe Int
+takeDeltaInstr (ANN _ i) = takeDeltaInstr i
+takeDeltaInstr (DELTA i) = Just i
+takeDeltaInstr _         = Nothing
 
 -- Not real instructions.  Just meta data
-aarch64_isMetaInstr :: Instr -> Bool
-aarch64_isMetaInstr instr
+isMetaInstr :: Instr -> Bool
+isMetaInstr instr
  = case instr of
-    ANN _ i     -> aarch64_isMetaInstr i
+    ANN _ i     -> isMetaInstr i
     COMMENT{}   -> True
     MULTILINE_COMMENT{} -> True
     LOCATION{}  -> True
@@ -434,32 +419,32 @@ aarch64_isMetaInstr instr
 
 -- | Copy the value in a register to another one.
 -- Must work for all register classes.
-aarch64_mkRegRegMoveInstr :: Reg -> Reg -> Instr
-aarch64_mkRegRegMoveInstr src dst = ANN (text $ "Reg->Reg Move: " ++ show src ++ " -> " ++ show dst) $ MOV (OpReg W64 dst) (OpReg W64 src)
+mkRegRegMoveInstr :: Reg -> Reg -> Instr
+mkRegRegMoveInstr src dst = ANN (text $ "Reg->Reg Move: " ++ show src ++ " -> " ++ show dst) $ MOV (OpReg W64 dst) (OpReg W64 src)
 
 -- | Take the source and destination from this reg -> reg move instruction
 -- or Nothing if it's not one
-aarch64_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
---aarch64_takeRegRegMoveInstr (MOV (OpReg fmt dst) (OpReg fmt' src)) | fmt == fmt' = Just (src, dst)
-aarch64_takeRegRegMoveInstr _ = Nothing
+takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
+--takeRegRegMoveInstr (MOV (OpReg fmt dst) (OpReg fmt' src)) | fmt == fmt' = Just (src, dst)
+takeRegRegMoveInstr _ = Nothing
 
 -- | Make an unconditional jump instruction.
-aarch64_mkJumpInstr :: BlockId -> [Instr]
-aarch64_mkJumpInstr id = [B (TBlock id)]
+mkJumpInstr :: BlockId -> [Instr]
+mkJumpInstr id = [B (TBlock id)]
 
-aarch64_mkStackAllocInstr :: Platform -> Int -> [Instr]
-aarch64_mkStackAllocInstr platform n
+mkStackAllocInstr :: Platform -> Int -> [Instr]
+mkStackAllocInstr platform n
     | n == 0 = []
     | n > 0 && n < 4096 = [ ANN (text "Alloc More Stack") $ SUB sp sp (OpImm (ImmInt n)) ]
-    | n > 0 =  ANN (text "Alloc More Stack") (SUB sp sp (OpImm (ImmInt 4095))) : aarch64_mkStackAllocInstr platform (n - 4095)
-aarch64_mkStackAllocInstr platform n = pprPanic "aarch64_mkStackAllocInstr" (int n)
+    | n > 0 =  ANN (text "Alloc More Stack") (SUB sp sp (OpImm (ImmInt 4095))) : mkStackAllocInstr platform (n - 4095)
+mkStackAllocInstr _platform n = pprPanic "mkStackAllocInstr" (int n)
 
-aarch64_mkStackDeallocInstr :: Platform -> Int -> [Instr]
-aarch64_mkStackDeallocInstr platform n
+mkStackDeallocInstr :: Platform -> Int -> [Instr]
+mkStackDeallocInstr platform n
     | n == 0 = []
     | n > 0 && n < 4096 = [ ANN (text "Dealloc More Stack") $ ADD sp sp (OpImm (ImmInt n)) ]
-    | n > 0 =  ANN (text "Dealloc More Stack") (ADD sp sp (OpImm (ImmInt 4095))) : aarch64_mkStackDeallocInstr platform (n - 4095)
-aarch64_mkStackDeallocInstr platform n = pprPanic "aarch64_mkStackDeallocInstr" (int n)
+    | n > 0 =  ANN (text "Dealloc More Stack") (ADD sp sp (OpImm (ImmInt 4095))) : mkStackDeallocInstr platform (n - 4095)
+mkStackDeallocInstr _platform n = pprPanic "mkStackDeallocInstr" (int n)
 
 --
 -- See note [extra spill slots] in X86/Instr.hs
@@ -500,8 +485,8 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
       insert_dealloc insn r = case insn of
         J _ -> dealloc ++ (insn : r)
         ANN _ (J _) -> dealloc ++ (insn : r)
-        _other | aarch64_jumpDestsOfInstr insn /= []
-            -> aarch64_patchJumpInstr insn retarget : r
+        _other | jumpDestsOfInstr insn /= []
+            -> patchJumpInstr insn retarget : r
         _other -> insn : r
 
         where retarget b = fromMaybe b (mapLookup b new_blockmap)
@@ -555,7 +540,7 @@ data Instr
     -- | ADC Operand Operand Operang -- rd = rn + rm + C
     -- | ADCS ...
     | ADD Operand Operand Operand -- rd = rn + rm
-    | ADDS Operand Operand Operand -- rd = rn + rm
+    -- | ADDS Operand Operand Operand -- rd = rn + rm
     -- | ADR ...
     -- | ADRP ...
     | CMN Operand Operand -- rd + op2
@@ -643,7 +628,7 @@ data Instr
     | FCVTZS Operand Operand
 
 instance Show Instr where
-    show (LDR f o1 o2) = "LDR " ++ show o1 ++ ", " ++ show o2
+    show (LDR _f o1 o2) = "LDR " ++ show o1 ++ ", " ++ show o2
     show (MOV o1 o2) = "MOV " ++ show o1 ++ ", " ++ show o2
     show _ = "missing"
 
@@ -684,7 +669,7 @@ data Operand
 opReg :: Width -> Reg -> Operand
 opReg = OpReg
 
-xzr, wzr, sp :: Operand
+xzr, wzr, sp, ip0 :: Operand
 xzr = OpReg W64 (RegReal (RealRegSingle (-1)))
 wzr = OpReg W32 (RegReal (RealRegSingle (-1)))
 sp  = OpReg W64 (RegReal (RealRegSingle 31))
@@ -773,9 +758,11 @@ opRegUExt W64 r = OpRegExt W64 r EUXTX 0
 opRegUExt W32 r = OpRegExt W32 r EUXTW 0
 opRegUExt W16 r = OpRegExt W16 r EUXTH 0
 opRegUExt W8  r = OpRegExt W8  r EUXTB 0
+opRegUExt w  _r = pprPanic "opRegUExt" (text $ show w)
 
 opRegSExt :: Width -> Reg -> Operand
 opRegSExt W64 r = OpRegExt W64 r ESXTX 0
 opRegSExt W32 r = OpRegExt W32 r ESXTW 0
 opRegSExt W16 r = OpRegExt W16 r ESXTH 0
-opRegSExt W8  r = OpRegExt W8  r ESXTB 0
\ No newline at end of file
+opRegSExt W8  r = OpRegExt W8  r ESXTB 0
+opRegSExt w  _r = pprPanic "opRegSExt" (text $ show w)


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -1,19 +1,23 @@
-module GHC.CmmToAsm.AArch64.Ppr (pprNatCmmDecl) where
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+module GHC.CmmToAsm.AArch64.Ppr (pprNatCmmDecl, pprInstr) where
 
 import GHC.Prelude hiding (EQ)
 
-import Data.List (findIndex, all)
+import Data.Word
+import qualified Data.Array.Unsafe as U ( castSTUArray )
+import Data.Array.ST
+import Control.Monad.ST
 
 import GHC.CmmToAsm.AArch64.Instr
 import GHC.CmmToAsm.AArch64.Regs
 import GHC.CmmToAsm.AArch64.Cond
 import GHC.CmmToAsm.Ppr
-import GHC.CmmToAsm.Instr
 import GHC.CmmToAsm.Format
 import GHC.Platform.Reg
-import GHC.Platform.Reg.Class
-import GHC.CmmToAsm.Reg.Target
 import GHC.CmmToAsm.Config
+import GHC.CmmToAsm.Types
+import GHC.CmmToAsm.Utils
 
 import GHC.Cmm hiding (topInfoTable)
 import GHC.Cmm.Dataflow.Collections
@@ -30,6 +34,8 @@ import GHC.Data.FastString
 import GHC.Utils.Outputable
 import GHC.Driver.Session (targetPlatform)
 
+import GHC.Utils.Panic
+
 pprProcAlignment :: NCGConfig -> SDoc
 pprProcAlignment config = maybe empty (pprAlign platform . mkAlignment) (ncgProcAlignment config)
    where
@@ -50,7 +56,7 @@ pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
         -- pprProcAlignment config $$
         pprLabel platform lbl $$ -- blocks guaranteed not null, so label needed
         vcat (map (pprBasicBlock config top_info) blocks) $$
-        (if ncgDebugLevel config > 0
+        (if ncgDwarfEnabled config
          then ppr (mkAsmTempEndLabel lbl) <> char ':' else empty) $$
         pprSizeDecl platform lbl
 
@@ -72,8 +78,6 @@ pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
        else empty) $$
       pprSizeDecl platform info_lbl
 
-pprNatCmmDecl _ _ = undefined
-
 pprLabel :: Platform -> CLabel -> SDoc
 pprLabel platform lbl =
    pprGloblDecl lbl
@@ -81,7 +85,7 @@ pprLabel platform lbl =
    $$ (ppr lbl <> char ':')
 
 pprAlign :: Platform -> Alignment -> SDoc
-pprAlign platform alignment
+pprAlign _platform alignment
         = text "\t.balign " <> int (alignmentBytes alignment)
 
 -- | Print appropriate alignment for the given section type.
@@ -121,7 +125,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs)
   = maybe_infotable $
     pprLabel platform asmLbl $$
     vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs)) $$
-    (if  ncgDebugLevel config > 0
+    (if  ncgDwarfEnabled config
       then ppr (mkAsmTempEndLabel asmLbl) <> char ':'
       else empty
     )
@@ -141,6 +145,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs)
     -- See https://gitlab.haskell.org/ghc/ghc/-/issues/367
     -- This only intends to catch the very trivial case, not the more
     -- compilicated cases.
+    {-
     detectTrivialDeadlock :: [Instr] -> [Instr]
     detectTrivialDeadlock instrs = case (findIndex isSelfBranch instrs) of
       Just n | all (not . aarch64_isJumpishInstr) (take n instrs) ->
@@ -157,6 +162,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs)
 
       where isSelfBranch (B (TBlock blockid')) = blockid' == blockid
             isSelfBranch _ = False
+    -}
 
     asmLbl = blockLbl blockid
     platform = ncgPlatform config
@@ -168,7 +174,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs)
            vcat (map (pprData config) info) $$
            pprLabel platform info_lbl $$
            c $$
-           (if ncgDebugLevel config > 0
+           (if ncgDwarfEnabled config
              then ppr (mkAsmTempEndLabel info_lbl) <> char ':'
              else empty)
     -- Make sure the info table has the right .loc for the block
@@ -254,8 +260,25 @@ pprDataItem config lit
            = let bs = doubleToBytes (fromRational r)
              in  map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
 
-pprImm :: Imm -> SDoc
+        ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit)
 
+floatToBytes :: Float -> [Int]
+floatToBytes f
+   = runST (do
+        arr <- newArray_ ((0::Int),3)
+        writeArray arr 0 f
+        arr <- castFloatToWord8Array arr
+        i0 <- readArray arr 0
+        i1 <- readArray arr 1
+        i2 <- readArray arr 2
+        i3 <- readArray arr 3
+        return (map fromIntegral [i0,i1,i2,i3])
+     )
+
+castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8)
+castFloatToWord8Array = U.castSTUArray
+
+pprImm :: Imm -> SDoc
 pprImm (ImmInt i)     = int i
 pprImm (ImmInteger i) = integer i
 pprImm (ImmCLbl l)    = ppr l
@@ -339,7 +362,6 @@ pprOp op = case op of
   OpAddr (AddrRegReg r1 r2) -> char '[' <+> pprReg W64 r1 <> comma <+> pprReg W64 r2 <+> char ']'
   OpAddr (AddrRegImm r1 im) -> char '[' <+> pprReg W64 r1 <> comma <+> pprImm im <+> char ']'
   OpAddr (AddrReg r1)       -> char '[' <+> pprReg W64 r1 <+> char ']'
-  OpAddr _          -> panic "AArch64.pprOp: no amode"
 
 pprReg :: Width -> Reg -> SDoc
 pprReg w r = case r of
@@ -349,6 +371,7 @@ pprReg w r = case r of
   RegVirtual (VirtualRegI u)   -> text "%vI_" <> pprUniqueAlways u
   RegVirtual (VirtualRegF u)   -> text "%vF_" <> pprUniqueAlways u
   RegVirtual (VirtualRegD u)   -> text "%vD_" <> pprUniqueAlways u
+  _                            -> pprPanic "AArch64.pprReg" (text $ show r)
 
   where
     ppr_reg_no :: Width -> Int -> SDoc
@@ -459,7 +482,7 @@ pprInstr platform instr = case instr of
 
   BCOND c (TBlock bid) -> text "\t" <> pprBcond c <+> ppr (mkLocalBlockLabel (getUnique bid))
   BCOND c (TLabel lbl) -> text "\t" <> pprBcond c <+> ppr lbl
-  BCOND c (TReg r)     -> panic "AArch64.ppr: No conditional branching to registers!"
+  BCOND _ (TReg _)     -> panic "AArch64.ppr: No conditional branching to registers!"
 
   -- 5. Atomic Instructions ----------------------------------------------------
   -- 6. Conditional Instructions -----------------------------------------------
@@ -467,11 +490,11 @@ pprInstr platform instr = case instr of
 
   CBZ o (TBlock bid) -> text "\tcbz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
   CBZ o (TLabel lbl) -> text "\tcbz" <+> pprOp o <> comma <+> ppr lbl
-  CBZ c (TReg r)     -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
+  CBZ _ (TReg _)     -> panic "AArch64.ppr: No conditional (cbz) branching to registers!"
 
   CBNZ o (TBlock bid) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr (mkLocalBlockLabel (getUnique bid))
   CBNZ o (TLabel lbl) -> text "\tcbnz" <+> pprOp o <> comma <+> ppr lbl
-  CBNZ c (TReg r)     -> panic "AArch64.ppr: No conditional (cbnz) branching to registers!"
+  CBNZ _ (TReg _)     -> panic "AArch64.ppr: No conditional (cbnz) branching to registers!"
 
   -- 7. Load and Store Instructions --------------------------------------------
   -- NOTE: GHC may do whacky things where it only load the lower part of an
@@ -544,4 +567,10 @@ pprCond c = case c of
   OLT    -> text "mi"
   OLE    -> text "ls"
   OGE    -> text "ge"
-  OGT    -> text "gt"
\ No newline at end of file
+  OGT    -> text "gt"
+
+  -- Unordered
+  UOLT   -> text "lt"
+  UOLE   -> text "le"
+  UOGE   -> text "pl"
+  UOGT   -> text "hi"
\ No newline at end of file


=====================================
compiler/GHC/CmmToAsm/AArch64/RegInfo.hs
=====================================
@@ -5,17 +5,10 @@ module GHC.CmmToAsm.AArch64.RegInfo where
 
 import GHC.Prelude
 
-import GHC.Platform.Reg
-import GHC.Platform.Reg.Class
-import GHC.CmmToAsm.Format
-
 import GHC.CmmToAsm.AArch64.Instr
-
 import GHC.Cmm.BlockId
 import GHC.Cmm
-import GHC.Cmm.CLabel
 
-import GHC.Types.Unique
 import GHC.Utils.Outputable
 
 data JumpDest = DestBlockId BlockId


=====================================
compiler/GHC/CmmToAsm/AArch64/Regs.hs
=====================================
@@ -1,4 +1,5 @@
 {-# language CPP #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
 module GHC.CmmToAsm.AArch64.Regs where
 
 
@@ -9,7 +10,7 @@ import GHC.Prelude
 import GHC.Platform.Reg
 import GHC.Platform.Reg.Class
 import GHC.CmmToAsm.Format
-import GHC.CmmToAsm.Config
+-- import GHC.CmmToAsm.Config
 
 import GHC.Cmm
 import GHC.Cmm.CLabel           ( CLabel )
@@ -17,10 +18,13 @@ import GHC.Types.Unique
 
 import GHC.Platform.Regs
 import GHC.Utils.Outputable
+import GHC.Utils.Panic
 import GHC.Platform
 
-import Data.Word        ( Word8, Word16, Word32, Word64 )
-import Data.Int         ( Int8, Int16, Int32, Int64 )
+import GHC.Driver.Ppr
+
+-- import Data.Word        ( Word8, Word16, Word32, Word64 )
+-- import Data.Int         ( Int8, Int16, Int32, Int64 )
 
 allMachRegNos   :: [RegNo]
 allMachRegNos   = [0..31] ++ [32..63]
@@ -74,7 +78,7 @@ data Imm
   deriving (Eq, Show)
 
 instance Show SDoc where
-  show = showSDocUnsafe
+  show = showPprUnsafe . ppr
 
 instance Eq SDoc where
   lhs == rhs = show lhs == show rhs
@@ -154,7 +158,7 @@ mkVirtualReg u format
    = case format of
         FF32    -> VirtualRegD u
         FF64    -> VirtualRegD u
-        _    -> panic "AArch64.mkVirtualReg"
+        _       -> panic "AArch64.mkVirtualReg"
 
 {-# INLINE classOfRealReg      #-}
 classOfRealReg :: RealReg -> RegClass


=====================================
compiler/GHC/CmmToAsm/Instr.hs
=====================================
@@ -16,8 +16,6 @@ import GHC.Cmm.BlockId
 
 import GHC.CmmToAsm.Config
 
-import GHC.Utils.Outputable (SDoc)
-
 import GHC.Stack
 
 -- | Holds a list of source and destination registers used by a


=====================================
compiler/GHC/CmmToAsm/PIC.hs
=====================================
@@ -262,7 +262,7 @@ howToAccessLabel config _ OSMinGW32 this_mod _ lbl
 -- is enough for ~64MB of range. Anything else will need to go through a veneer,
 -- which is the job of the linker to build.  We might only want to lookup
 -- Data References through the GOT.
-howToAccessLabel config ArchAArch64 _os this_mod kind lbl
+howToAccessLabel config ArchAArch64 _os this_mod _kind lbl
         | not (ncgExternalDynamicRefs config)
         = AccessDirectly
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -139,7 +139,6 @@ import GHC.Types.Unique.Supply
 import GHC.Utils.Outputable
 import GHC.Utils.Panic
 import GHC.Platform
-import GHC.Stack
 
 import Data.Maybe
 import Data.List
@@ -151,7 +150,7 @@ import Control.Applicative
 
 -- Allocate registers
 regAlloc
-        :: Instruction instr
+        :: (Instruction instr)
         => NCGConfig
         -> LiveCmmDecl statics instr
         -> UniqSM ( NatCmmDecl statics instr
@@ -208,7 +207,7 @@ regAlloc _ (CmmProc _ _ _ _)
 --   an entry in the block map or it is the first block.
 --
 linearRegAlloc
-        :: forall instr. Instruction instr
+        :: forall instr. (Instruction instr)
         => NCGConfig
         -> [BlockId] -- ^ entry points
         -> BlockMap RegSet
@@ -261,7 +260,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
         return  (blocks, stats, getStackUse stack)
 
 
-linearRA_SCCs :: (HasCallStack, OutputableRegConstraint freeRegs instr)
+linearRA_SCCs :: (OutputableRegConstraint freeRegs instr)
               => [BlockId]
               -> BlockMap RegSet
               -> [NatBasicBlock instr]
@@ -296,7 +295,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
    more sanity checking to guard against this eventuality.
 -}
 
-process :: (HasCallStack, OutputableRegConstraint freeRegs instr)
+process :: (OutputableRegConstraint freeRegs instr)
         => [BlockId]
         -> BlockMap RegSet
         -> [GenBasicBlock (LiveInstr instr)]
@@ -340,7 +339,7 @@ process entry_ids block_live (b@(BasicBlock id _) : blocks)
 -- | Do register allocation on this basic block
 --
 processBlock
-        :: (HasCallStack, OutputableRegConstraint freeRegs instr)
+        :: (OutputableRegConstraint freeRegs instr)
         => BlockMap RegSet              -- ^ live regs on entry to each basic block
         -> LiveBasicBlock instr         -- ^ block to do register allocation on
         -> RegM freeRegs [NatBasicBlock instr]   -- ^ block with registers allocated
@@ -387,7 +386,7 @@ initBlock id block_live
 
 -- | Do allocation for a sequence of instructions.
 linearRA
-        :: (HasCallStack, OutputableRegConstraint freeRegs instr)
+        :: (OutputableRegConstraint freeRegs instr)
         => BlockMap RegSet                      -- ^ map of what vregs are live on entry to each block.
         -> [instr]                              -- ^ accumulator for instructions already processed.
         -> [NatBasicBlock instr]                -- ^ accumulator for blocks of fixup code.
@@ -414,7 +413,7 @@ linearRA block_live accInstr accFixups id (instr:instrs)
 
 -- | Do allocation for a single instruction.
 raInsn
-        :: (HasCallStack, OutputableRegConstraint freeRegs instr)
+        :: (OutputableRegConstraint freeRegs instr)
         => BlockMap RegSet                      -- ^ map of what vregs are love on entry to each block.
         -> [instr]                              -- ^ accumulator for instructions already processed.
         -> BlockId                              -- ^ the id of the current block, for debugging
@@ -498,7 +497,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
 
 
 genRaInsn :: forall freeRegs instr.
-             OutputableRegConstraint freeRegs instr
+             (OutputableRegConstraint freeRegs instr)
           => BlockMap RegSet
           -> [instr]
           -> BlockId
@@ -877,7 +876,7 @@ findPrefRealReg vreg = do
 
 -- reading is redundant with reason, but we keep it around because it's
 -- convenient and it maintains the recursive structure of the allocator. -- EZY
-allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
+allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
                         => Bool
                         -> [VirtualReg]
                         -> [instr]


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
=====================================
@@ -7,12 +7,13 @@ import GHC.Platform.Reg.Class
 import GHC.Platform.Reg
 
 import GHC.Utils.Outputable
+import GHC.Utils.Panic
 import GHC.Platform
 
 import Data.Word
 import Data.Bits
 
-import Debug.Trace
+-- import Debug.Trace
 import GHC.Stack
 -- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp
 -- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON
@@ -122,7 +123,7 @@ getFreeRegs cls (FreeRegs g f)
   | RcDouble  <- cls = go 32 f 31
   | RcInteger <- cls = go  0 g 18
     where
-        go off _ i | i < 0 = []
+        go _   _ i | i < 0 = []
         go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1)
                    | otherwise   = go off x $! i - 1
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
=====================================
@@ -26,7 +26,6 @@ import GHC.Cmm.BlockId
 import GHC.Cmm.Dataflow.Collections
 import GHC.Data.Graph.Directed
 import GHC.Utils.Panic
-import GHC.Utils.Outputable
 import GHC.Utils.Monad (concatMapM)
 import GHC.Types.Unique
 import GHC.Types.Unique.FM


=====================================
compiler/GHC/CmmToAsm/Reg/Liveness.hs
=====================================
@@ -171,6 +171,8 @@ instance Instruction instr => Instruction (InstrSR instr) where
 
         pprInstr platform i = ppr (fmap (pprInstr platform) i)
 
+        mkComment               = fmap Instr . mkComment
+
 
 -- | An instruction with liveness information.
 data LiveInstr instr


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -699,9 +699,9 @@ mkLoadInstr config reg delta slot
   = let off     = spillSlotToOffset platform slot - delta
     in
         case targetClassOfReg platform reg of
-              RcInteger -> (delta, [MOV (archWordFormat is32Bit)
-                                    (OpAddr (spRel platform off)) (OpReg reg)])
-              RcDouble  -> (delta, [MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
+              RcInteger -> ([MOV (archWordFormat is32Bit)
+                                 (OpAddr (spRel platform off)) (OpReg reg)])
+              RcDouble  -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)])
               _         -> panic "X86.mkLoadInstr"
     where platform = ncgPlatform config
           is32Bit = target32Bit platform


=====================================
compiler/GHC/Driver/Backend.hs
=====================================
@@ -104,6 +104,7 @@ platformNcgSupported platform = if
          ArchPPC       -> True
          ArchPPC_64 {} -> True
          ArchSPARC     -> True
+         ArchAArch64   -> True
          _             -> False
 
 -- | Will this backend produce an object file on the disk?


=====================================
compiler/ghc.cabal.in
=====================================
@@ -600,6 +600,7 @@ Library
             GHC.CmmToAsm.X86.Cond
             GHC.CmmToAsm.X86.Ppr
             GHC.CmmToAsm.X86.CodeGen
+            GHC.CmmToAsm.AArch64
             GHC.CmmToAsm.AArch64.Regs
             GHC.CmmToAsm.AArch64.RegInfo
             GHC.CmmToAsm.AArch64.Instr


=====================================
docs/users_guide/expected-undocumented-flags.txt
=====================================
@@ -146,3 +146,8 @@
 -ticky-LNE
 -ticky-allocd
 -ticky-dyn-thunk
+-fasm-immload
+-fasm-jumptables
+-fasm-negoffset
+-fasm-regoffsets
+-fasm-usezeroreg


=====================================
hadrian/src/Oracles/Flag.hs
=====================================
@@ -79,10 +79,3 @@ targetSupportsSMP = do
      , ver < ARMv7          -> return False
      | goodArch             -> return True
      | otherwise            -> return False
-
-ghcWithNativeCodeGen :: Action Bool
-ghcWithNativeCodeGen = do
-    goodArch <- anyTargetArch ["i386", "x86_64", "sparc", "powerpc", "aarch64"]
-    badOs    <- anyTargetOs ["aix"]
-    ghcUnreg <- flag GhcUnregisterised
-    return $ goodArch && not badOs && not ghcUnreg


=====================================
rts/StgCRun.c
=====================================
@@ -883,41 +883,30 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
 
 #if defined(aarch64_HOST_ARCH)
 
-/* See also AArch64/Instr.hs
- *
- * Save caller save registers
- * This is x0-x18
- *
- * For SIMD/FP Registers:
- * Registers v8-v15 must be preserved by a callee across subroutine calls;
- * the remaining registers (v0-v7, v16-v31) do not need to be preserved (or
- * should be preserved by the caller). Additionally, only the bottom 64 bits
- * of each value stored in v8-v15 need to be preserved [7]; it is the
- * responsibility of the caller to preserve larger values.
- *
- * .---------------------------------------------------------------------------------------------------------------------------------------------------------------.
- * |  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |
- * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
- * |== General Purpose registers ==================================================================================================================================|
- * | <---- argument passing -------------> | IR | <------- tmp registers --------> | IP0| IP1| PL | <------------------- callee saved ------------> | FP | LR | SP |
- * | <------ free registers --------------------------------------------------------------------> | BR | Sp | Hp | R1 | R2 | R3 | R4 | R5 | R6 | SL | -- | -- | -- |
- * |== SIMD/FP Registers ==========================================================================================================================================|
- * | <---- argument passing -------------> | <-- callee saved (lower 64 bits) ---> | <--------------------------------------- caller saved ----------------------> |
- * | <------ free registers -------------> | F1 | F2 | F3 | F4 | D1 | D2 | D3 | D4 | <------ free registers -----------------------------------------------------> |
- * '---------------------------------------------------------------------------------------------------------------------------------------------------------------'
- * IR: Indirect result location register, IP: Intra-procedure register, PL: Platform register, FP: Frame pointer, LR: Link register, SP: Stack pointer
- * BR: Base, SL: SpLim
- */
-
 StgRegTable *
 StgRun(StgFunPtr f, StgRegTable *basereg) {
     StgRegTable * r;
     __asm__ volatile (
         /*
          * Save callee-saves registers on behalf of the STG code.
-         * Note: The compiler will insert this for us if we specify the
-         *       Clobbered correctly. See below.
+         * Floating point registers only need the bottom 64 bits preserved.
+         * We need to use the names x16, x17, x29 and x30 instead of ip0
+         * ip1, fp and lp because one of either clang or gcc doesn't understand
+         * the later names.
          */
+        "stp x29,  x30,  [sp, #-16]!\n\t"
+        "mov x29, sp\n\t"
+        "stp x16, x17, [sp, #-16]!\n\t"
+        "stp x19, x20, [sp, #-16]!\n\t"
+        "stp x21, x22, [sp, #-16]!\n\t"
+        "stp x23, x24, [sp, #-16]!\n\t"
+        "stp x25, x26, [sp, #-16]!\n\t"
+        "stp x27, x28, [sp, #-16]!\n\t"
+        "stp d8,  d9,  [sp, #-16]!\n\t"
+        "stp d10, d11, [sp, #-16]!\n\t"
+        "stp d12, d13, [sp, #-16]!\n\t"
+        "stp d14, d15, [sp, #-16]!\n\t"
+
         /*
          * allocate some space for Stg machine's temporary storage.
          * Note: RESERVED_C_STACK_BYTES has to be a round number here or
@@ -946,28 +935,26 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
          * Return the new register table, taking it from Stg's R1 (ARM64's R22).
          */
         "mov %0, x22\n\t"
-
-        /* Restore callee-saves register
-         * Note: The compiler will insert this for us if we specify the
-         *       Clobbered correctly. See below.
+        /*
+         * restore callee-saves registers.
          */
 
-      /* Outputs (r) */
+        "ldp d14, d15, [sp], #16\n\t"
+        "ldp d12, d13, [sp], #16\n\t"
+        "ldp d10, d11, [sp], #16\n\t"
+        "ldp d8,  d9,  [sp], #16\n\t"
+        "ldp x27, x28, [sp], #16\n\t"
+        "ldp x25, x26, [sp], #16\n\t"
+        "ldp x23, x24, [sp], #16\n\t"
+        "ldp x21, x22, [sp], #16\n\t"
+        "ldp x19, x20, [sp], #16\n\t"
+        "ldp x16, x17, [sp], #16\n\t"
+        "ldp x29,  x30,  [sp], #16\n\t"
+
       : "=r" (r)
-      /* Inputs (f, regbase, RESERVED_C_STACK_BYTES) */
       : "r" (f), "r" (basereg), "i" (RESERVED_C_STACK_BYTES)
-      /* Clobbered */
-      :   // any of the stg calls may directly or indirectly modify these:
-          "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
-          // the IP usually, not, but better safe than sorry. However, I'm not sure
-          // we even have to save them. There is no expectation they survive a call.
-          "%x16", "%x17",
-          // The Link Register will hold the point we want to return to; and we may
-          // overwrite it with BL instructions in the haskell code.
-          "%x30",
-          // floating point registers
-          "%d8", "%d9", "%d10", "%d11", "%d12", "%d13", "%d14", "%d15",
-          "memory"
+        : "%x19", "%x20", "%x21", "%x22", "%x23", "%x24", "%x25", "%x26", "%x27", "%x28",
+          "%x16", "%x17", "%x30"
     );
     return r;
 }



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/ec1a9788cff5bff38bbce0441ac43f02decdccae...a378fc67edd6686082494c8ee802de03e68506d7

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/ec1a9788cff5bff38bbce0441ac43f02decdccae...a378fc67edd6686082494c8ee802de03e68506d7
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200926/92f2756b/attachment-0001.html>


More information about the ghc-commits mailing list