[Git][ghc/ghc][wip/angerman/aarch64-ncg] 5 commits: Address Takenobu's comments

Moritz Angermann gitlab at gitlab.haskell.org
Wed Jul 8 05:22:04 UTC 2020



Moritz Angermann pushed to branch wip/angerman/aarch64-ncg at Glasgow Haskell Compiler / GHC


Commits:
41759181 by Moritz Angermann at 2020-07-08T05:18:47+00:00
Address Takenobu's comments

- - - - -
4692ebf1 by Moritz Angermann at 2020-07-08T05:18:48+00:00
Fix floating points handling of NaNs

- - - - -
32e76545 by Moritz Angermann at 2020-07-08T05:18:48+00:00
Add basic Graph Coloring support

- - - - -
5f47aed6 by Moritz Angermann at 2020-07-08T05:18:48+00:00
Drop debug

- - - - -
312e7d5e by Moritz Angermann at 2020-07-08T05:18:48+00:00
Add primops_match.cmm testsuite

- - - - -


10 changed files:

- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Cond.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- compiler/GHC/CmmToAsm/Config.hs
- compiler/GHC/CmmToAsm/Monad.hs
- compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
- compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
- tests/compiler/cmm/primops_math.cmm


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -106,9 +106,9 @@ cmmTopCodeGen
 -- Thus we'll have to deal with either CmmProc ...
 cmmTopCodeGen cmm@(CmmProc info lab live graph) = do
   config <- getConfig
-  when (ncgVerbosity config > 1) $ do
-    traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n"
-          ++ showSDocUnsafe (ppr cmm)
+  -- do
+  --   traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n"
+  --         ++ showSDocUnsafe (ppr cmm)
 
   let blocks = toBlockListEntryFirst graph
   (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
@@ -129,9 +129,9 @@ cmmTopCodeGen cmm@(CmmProc info lab live graph) = do
 -- ... or CmmData. Do we want to align this?
 cmmTopCodeGen cmm@(CmmData sec dat) = do
   config <- getConfig
-  when (ncgVerbosity config > 1) $ do
-    traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n"
-          ++ showSDocUnsafe (ppr cmm)
+  -- do
+  --   traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n"
+  --         ++ showSDocUnsafe (ppr cmm)
   return [CmmData sec dat] -- no translation, we just use CmmStatic
 
 -- So we need BasicBlockCodeGen
@@ -142,9 +142,9 @@ basicBlockCodeGen
 
 basicBlockCodeGen block = do
   config <- getConfig
-  when (ncgVerbosity config > 1) $ do
-    traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
-          ++ showSDocUnsafe (ppr block)
+  -- do
+  --   traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n"
+  --         ++ showSDocUnsafe (ppr block)
   let (_, nodes, tail)  = blockSplit block
       id = entryLabel block
       stmts = blockToList nodes
@@ -669,10 +669,15 @@ getRegister' config plat expr
         MO_F_Eq w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d EQ ])
         MO_F_Ne w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d NE ])
 
-        MO_F_Ge w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d SGE ])
-        MO_F_Le w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d SLE ])
-        MO_F_Gt w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d SGT ])
-        MO_F_Lt w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d SLT ])
+        -- careful with the floating point operations.
+        -- SLE is effectively LE or unordered (NaN)
+        -- SLT is the same. ULE, and ULT will not return true for NaN.
+        -- This is a bit counter intutive. Don't let yourself be fooled by
+        -- the S/U prefix for floats, it's only meaningful for integers.
+        MO_F_Ge w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGE ])
+        MO_F_Le w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLE ]) -- x <= y <=> y > x
+        MO_F_Gt w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGT ])
+        MO_F_Lt w    -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x
 
         -- Bitwise operations
         MO_And   w -> intOp w (\d x y -> unitOL $ AND d x y)


=====================================
compiler/GHC/CmmToAsm/AArch64/Cond.hs
=====================================
@@ -4,6 +4,37 @@ import GHC.Prelude
 
 import GHC.Utils.Panic
 
+-- XXX: This appears to go a bit overboard? Maybe we should stick with what LLVM
+-- settled on for fcmp?
+-- false: always yields false, regardless of operands.
+-- oeq: yields true if both operands are not a QNAN and op1 is equal to op2.
+-- ogt: yields true if both operands are not a QNAN and op1 is greater than op2.
+-- oge: yields true if both operands are not a QNAN and op1 is greater than or equal to op2.
+-- olt: yields true if both operands are not a QNAN and op1 is less than op2.
+-- ole: yields true if both operands are not a QNAN and op1 is less than or equal to op2.
+-- one: yields true if both operands are not a QNAN and op1 is not equal to op2.
+-- ord: yields true if both operands are not a QNAN.
+-- ueq: yields true if either operand is a QNAN or op1 is equal to op2.
+-- ugt: yields true if either operand is a QNAN or op1 is greater than op2.
+-- uge: yields true if either operand is a QNAN or op1 is greater than or equal to op2.
+-- ult: yields true if either operand is a QNAN or op1 is less than op2.
+-- ule: yields true if either operand is a QNAN or op1 is less than or equal to op2.
+-- une: yields true if either operand is a QNAN or op1 is not equal to op2.
+-- uno: yields true if either operand is a QNAN.
+-- true: always yields true, regardless of operands.
+--
+-- LLVMs icmp knows about:
+-- eq: yields true if the operands are equal, false otherwise. No sign interpretation is necessary or performed.
+-- ne: yields true if the operands are unequal, false otherwise. No sign interpretation is necessary or performed.
+-- ugt: interprets the operands as unsigned values and yields true if op1 is greater than op2.
+-- uge: interprets the operands as unsigned values and yields true if op1 is greater than or equal to op2.
+-- ult: interprets the operands as unsigned values and yields true if op1 is less than op2.
+-- ule: interprets the operands as unsigned values and yields true if op1 is less than or equal to op2.
+-- sgt: interprets the operands as signed values and yields true if op1 is greater than op2.
+-- sge: interprets the operands as signed values and yields true if op1 is greater than or equal to op2.
+-- slt: interprets the operands as signed values and yields true if op1 is less than op2.
+-- sle: interprets the operands as signed values and yields true if op1 is less than or equal to op2.
+
 data Cond
     = ALWAYS -- b.al
     | EQ     -- b.eq
@@ -18,6 +49,16 @@ data Cond
     | ULE    -- b.ls
     | UGE    -- b.hs
     | UGT    -- b.hi
+    -- ordered
+    | OLT    -- b.mi
+    | OLE    -- b.ls
+    | OGE    -- b.ge
+    | OGT    -- b.gt
+    -- unordered
+    | UOLT   -- b.lt
+    | UOLE   -- b.le
+    | UOGE   -- b.pl
+    | UOGT   -- b.hi
     -- others
     | NEVER  -- ne
     | VS     -- oVerflow set


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -320,7 +320,10 @@ pprReg w r = case r of
 
   where
     ppr_reg_no :: Width -> Int -> SDoc
-    ppr_reg_no _ 31 = text "sp"
+    ppr_reg_no w 31
+         | w == W64 = text "sp"
+         | w == W32 = test "wsp"
+
     ppr_reg_no w i
          | i < 0, w == W32 = text "wzr"
          | i < 0, w == W64 = text "xzr"
@@ -468,20 +471,26 @@ pprBcond c = text "b." <> pprCond c
 
 pprCond :: Cond -> SDoc
 pprCond c = case c of
-  ALWAYS -> text "al"
-  EQ     -> text "eq"
-  NE     -> text "ne"
-
-  SLT    -> text "lt"
-  SLE    -> text "le"
-  SGE    -> text "ge"
-  SGT    -> text "gt"
-
-  ULT    -> text "lo"
-  ULE    -> text "ls"
-  UGE    -> text "hs"
-  UGT    -> text "hi"
-
-  NEVER  -> text "ne"
-  VS     -> text "vs"
-  VC     -> text "vc"
\ No newline at end of file
+  ALWAYS -> text "al" -- Always
+  EQ     -> text "eq" -- Equal
+  NE     -> text "ne" -- Not Equal
+
+  SLT    -> text "lt" -- Signed less than                  ; Less than, or unordered
+  SLE    -> text "le" -- Signed less than or equal         ; Less than or equal, or unordered
+  SGE    -> text "ge" -- Signed greater than or equal      ; Greater than or equal
+  SGT    -> text "gt" -- Signed greater than               ; Greater than
+
+  ULT    -> text "lo" -- Carry clear/ unsigned lower       ; less than
+  ULE    -> text "ls" -- Unsigned lower or same            ; Less than or equal
+  UGE    -> text "hs" -- Carry set/unsigned higher or same ; Greater than or equal, or unordered
+  UGT    -> text "hi" -- Unsigned higher                   ; Greater than, or unordered
+
+  NEVER  -> text "ne" -- Never
+  VS     -> text "vs" -- Overflow                          ; Unordered (at least one NaN operand)
+  VC     -> text "vc" -- No overflow                       ; Not unordered
+
+  -- Orderd variants.  Respecting NaN.
+  OLT    -> text "mi"
+  OLE    -> text "ls"
+  OGE    -> text "ge"
+  OGT    -> text "gt"
\ No newline at end of file


=====================================
compiler/GHC/CmmToAsm/Config.hs
=====================================
@@ -15,7 +15,6 @@ data NCGConfig = NCGConfig
    { ncgPlatform              :: !Platform        -- ^ Target platform
    , ncgProcAlignment         :: !(Maybe Int)     -- ^ Mandatory proc alignment
    , ncgDebugLevel            :: !Int             -- ^ Debug level
-   , ncgVerbosity             :: !Int             -- ^ Verbosity level
    , ncgExternalDynamicRefs   :: !Bool            -- ^ Generate code to link against dynamic libraries
    , ncgPIC                   :: !Bool            -- ^ Enable Position-Independent Code
    , ncgInlineThresholdMemcpy :: !Word            -- ^ If inlining `memcpy` produces less than this threshold (in pseudo-instruction unit), do it


=====================================
compiler/GHC/CmmToAsm/Monad.hs
=====================================
@@ -151,7 +151,6 @@ initConfig dflags = NCGConfig
    { ncgPlatform              = targetPlatform dflags
    , ncgProcAlignment         = cmmProcAlignment dflags
    , ncgDebugLevel            = debugLevel dflags
-   , ncgVerbosity             = verbosity dflags
    , ncgExternalDynamicRefs   = gopt Opt_ExternalDynamicRefs dflags
    , ncgPIC                   = positionIndependent dflags
    , ncgInlineThresholdMemcpy = fromIntegral $ maxInlineMemcpyInsns dflags


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
=====================================
@@ -115,7 +115,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl
                             ArchSPARC64   -> panic "trivColorable ArchSPARC64"
                             ArchPPC_64 _  -> 15
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
-                            ArchAArch64     -> panic "trivColorable ArchAArch64"
+                            -- We should be able to allocate *a lot* more in princple.
+                            -- essentially all 32 - SP, so 31, we'd trash the link reg
+                            -- as well as the platform and all others though.
+                            ArchAArch64   -> 18
                             ArchAlpha     -> panic "trivColorable ArchAlpha"
                             ArchMipseb    -> panic "trivColorable ArchMipseb"
                             ArchMipsel    -> panic "trivColorable ArchMipsel"
@@ -146,7 +149,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus
                             ArchSPARC64   -> panic "trivColorable ArchSPARC64"
                             ArchPPC_64 _  -> 0
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
-                            ArchAArch64     -> panic "trivColorable ArchAArch64"
+                            -- we can in princple address all the float regs as
+                            -- segments. So we could have 64 Float regs. Or
+                            -- 128 Half regs, or even 256 Byte regs.
+                            ArchAArch64   -> 0
                             ArchAlpha     -> panic "trivColorable ArchAlpha"
                             ArchMipseb    -> panic "trivColorable ArchMipseb"
                             ArchMipsel    -> panic "trivColorable ArchMipsel"
@@ -179,7 +185,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu
                             ArchSPARC64   -> panic "trivColorable ArchSPARC64"
                             ArchPPC_64 _  -> 20
                             ArchARM _ _ _ -> panic "trivColorable ArchARM"
-                            ArchAArch64     -> panic "trivColorable ArchAArch64"
+                            ArchAArch64   -> 32
                             ArchAlpha     -> panic "trivColorable ArchAlpha"
                             ArchMipseb    -> panic "trivColorable ArchMipseb"
                             ArchMipsel    -> panic "trivColorable ArchMipsel"


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -352,7 +352,7 @@ processBlock block_live (BasicBlock id instrs)
 
 -- | Load the freeregs and current reg assignment into the RegM state
 --      for the basic block with this BlockId.
-initBlock :: (HasCallStack, FR freeRegs)
+initBlock :: FR freeRegs
           => BlockId -> BlockMap RegSet -> RegM freeRegs ()
 initBlock id block_live
  = do   platform    <- getPlatform
@@ -489,7 +489,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
                   | otherwise = False
 
 
-genRaInsn :: (HasCallStack, OutputableRegConstraint freeRegs instr)
+genRaInsn :: (OutputableRegConstraint freeRegs instr)
           => BlockMap RegSet
           -> [instr]
           -> BlockId
@@ -512,33 +512,27 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
     let real_read       = nub [ rr      | (RegReal rr) <- read]
     let virt_read       = nub [ vr      | (RegVirtual vr) <- read ]
 
-    config <- getConfig
-    when (ncgVerbosity config > 1) $ do
-        freeregs <- getFreeRegsR
-        assig    <- getAssigR
-
-        pprTraceM "genRaInsn"
-                (          text "block        = " <+> ppr block_id
-                        $$ text "instruction  = " <+> ppr instr
-                        $$ text "r_dying      = " <+> ppr r_dying
-                        $$ text "w_dying      = " <+> ppr w_dying
-                        $$ text "read         = " <+> ppr real_read    <+> ppr virt_read
-                        $$ text "written      = " <+> ppr real_written <+> ppr virt_written
-                        $$ text "freeregs     = " <+> ppr freeregs
-                        $$ text "assign       = " <+> ppr assig)
-{-
-        $ do
--}
+--     do
+--         freeregs <- getFreeRegsR
+--         assig    <- getAssigR
+
+--         pprTraceM "genRaInsn"
+--                 (          text "block        = " <+> ppr block_id
+--                         $$ text "instruction  = " <+> ppr instr
+--                         $$ text "r_dying      = " <+> ppr r_dying
+--                         $$ text "w_dying      = " <+> ppr w_dying
+--                         $$ text "read         = " <+> ppr real_read    <+> ppr virt_read
+--                         $$ text "written      = " <+> ppr real_written <+> ppr virt_written
+--                         $$ text "freeregs     = " <+> ppr freeregs
+--                         $$ text "assign       = " <+> ppr assig)
 
     -- (a), (b) allocate real regs for all regs read by this instruction.
     (r_spills, r_allocd) <-
         allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read
 
-    when (ncgVerbosity config > 1) $ do pprTraceM "ganRaInsn" (text "(c)")
     -- (c) save any temporaries which will be clobbered by this instruction
     clobber_saves <- saveClobberedTemps real_written r_dying
 
-    when (ncgVerbosity config > 1) $ do pprTraceM "ganRaInsn" (text "(d)")
     -- (d) Update block map for new destinations
     -- NB. do this before removing dead regs from the assignment, because
     -- these dead regs might in fact be live in the jump targets (they're
@@ -553,26 +547,21 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
     -- when (not $ null fixup_blocks) $
     --    pprTrace "fixup_blocks" (ppr fixup_blocks) (return ())
 
-    when (ncgVerbosity config > 1) $ pprTraceM "ganRaInsn" (text "(e)")
     -- (e) Delete all register assignments for temps which are read
     --     (only) and die here.  Update the free register list.
     releaseRegs r_dying
 
-    when (ncgVerbosity config > 1) $ pprTraceM "ganRaInsn" (text "(f)")
     -- (f) Mark regs which are clobbered as unallocatable
     clobberRegs real_written
 
-    when (ncgVerbosity config > 1) $ pprTraceM "ganRaInsn" (text "(g)")
     -- (g) Allocate registers for temporaries *written* (only)
     (w_spills, w_allocd) <-
         allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written
 
-    when (ncgVerbosity config > 1) $ pprTraceM "ganRaInsn" (text "(h)")
     -- (h) Release registers for temps which are written here and not
     -- used again.
     releaseRegs w_dying
 
-    when (ncgVerbosity config > 1) $ pprTraceM "ganRaInsn" (text "(i)")
     let
         -- (i) Patch the instruction
         patch_map
@@ -589,7 +578,6 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
                         Nothing -> x
                         Just y  -> y
 
-    when (ncgVerbosity config > 1) $ do pprTraceM "ganRaInsn" (text "(j)")
     -- (j) free up stack slots for dead spilled regs
     -- TODO (can't be bothered right now)
 
@@ -614,7 +602,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
 -- -----------------------------------------------------------------------------
 -- releaseRegs
 
-releaseRegs :: (HasCallStack, FR freeRegs) => [Reg] -> RegM freeRegs ()
+releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs ()
 releaseRegs regs = do
   platform <- getPlatform
   assig <- getAssigR
@@ -625,7 +613,6 @@ releaseRegs regs = do
 --       fltRegs = frGetFreeRegs platform RcFloat   free :: [RealReg]
 --       dblRegs = frGetFreeRegs platform RcDouble  free :: [RealReg]
 --       allFreeRegs = gpRegs ++ fltRegs ++ dblRegs
---   when (ncgVerbosity config > 1) $ do pprTraceM "releaseRegs" (text "allFreeRegs =" <+> ppr allFreeRegs)
 
   let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return ()
 --      loop assig !free (RegReal rr : rs) | rr `elem` allFreeRegs = loop assig free rs
@@ -655,7 +642,7 @@ releaseRegs regs = do
 --
 
 saveClobberedTemps
-        :: (HasCallStack, Instruction instr, FR freeRegs)
+        :: (Instruction instr, FR freeRegs)
         => [RealReg]            -- real registers clobbered by this instruction
         -> [Reg]                -- registers which are no longer live after this insn
         -> RegM freeRegs [instr]         -- return: instructions to spill any temps that will
@@ -721,7 +708,7 @@ saveClobberedTemps clobbered dying
 -- | Mark all these real regs as allocated,
 --      and kick out their vreg assignments.
 --
-clobberRegs :: (HasCallStack, FR freeRegs) => [RealReg] -> RegM freeRegs ()
+clobberRegs :: FR freeRegs => [RealReg] -> RegM freeRegs ()
 clobberRegs []
         = return ()
 
@@ -730,8 +717,6 @@ clobberRegs clobbered
         freeregs <- getFreeRegsR
 
         config <- getConfig
-        when (ncgVerbosity config > 1) $ do pprTraceM "clobberRegs" (ppr $ text "freeregs  =" <+> text (show freeregs)
-                                                                        $$ text "clobbered =" <+> ppr clobbered)
 
         let gpRegs  = frGetFreeRegs platform RcInteger freeregs :: [RealReg]
             fltRegs = frGetFreeRegs platform RcFloat   freeregs :: [RealReg]
@@ -740,11 +725,6 @@ clobberRegs clobbered
         let extra_clobbered = [ r | r <- clobbered
                                   , r `elem` (gpRegs ++ fltRegs ++ dblRegs) ]
 
-        when (ncgVerbosity config > 1) $ do pprTraceM "clobberRegs" (ppr $ text "gpRegs  =" <+> ppr gpRegs
-                                                                        $$ text "fltRegs =" <+> ppr fltRegs
-                                                                        $$ text "dblRegs =" <+> ppr dblRegs
-                                                                        $$ text "filterd =" <+> ppr extra_clobbered)
-
         setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs extra_clobbered
 
         -- setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered
@@ -859,7 +839,7 @@ findPrefRealReg vreg = do
 
 -- reading is redundant with reason, but we keep it around because it's
 -- convenient and it maintains the recursive structure of the allocator. -- EZY
-allocRegsAndSpill_spill :: (HasCallStack, FR freeRegs, Instruction instr, Outputable instr)
+allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
                         => Bool
                         -> [VirtualReg]
                         -> [instr]


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
=====================================
@@ -14,11 +14,35 @@ import Data.Bits
 
 import Debug.Trace
 import GHC.Stack
--- AArch64 has 32 64bit general purpose register x0..x31
--- AArch64 has 32 128bit floating point registers v0..v31
+-- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp
+-- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON
+-- extension in Armv8-A.
+--
+-- Armv8-A is a fundamental change to the Arm architecture. It supports the
+-- 64-bit Execution state called “AArch64”, and a new 64-bit instruction set
+-- “A64”. To provide compatibility with the Armv7-A (32-bit architecture)
+-- instruction set, a 32-bit variant of Armv8-A “AArch32” is provided. Most of
+-- existing Armv7-A code can be run in the AArch32 execution state of Armv8-A.
+--
 -- these can be addresses as q/d/s/h/b 0..31, or v.f<size>[idx]
 -- where size is 64, 32, 16, 8, ... and the index i allows us
 -- to access the given part.
+--
+-- History of Arm Adv SIMD
+-- .---------------------------------------------------------------------------.
+-- | Armv6                  | Armv7-A                | Armv8-A AArch64         |
+-- | SIMD extension         | NEON                   | NEON                    |
+-- |===========================================================================|
+-- | - Operates on 32-bit   | - Separate reg. bank,  | - Separate reg. bank,   |
+-- |   GP ARM registers     |    32x64-bit NEON regs |   32x128-bit NEON regs  |
+-- | - 8-bit/16-bit integer | - 8/16/32/64-bit int   | - 8/16/32/64-bit int    |
+-- |                        | - Single percision fp  | - Single percision fp   |
+-- |                        |                        | - Double precision fp   |
+-- |                        |                        | - Single/Double fp are  |
+-- |                        |                        |   IEEE compliant        |
+-- | - 2x16-bit/4x8-bit ops | - Up to 16x8-bit ops   | - Up to 16x8-bit ops    |
+-- |   per instruction      |   per instruction      |   per instruction       |
+-- '---------------------------------------------------------------------------'
 
 data FreeRegs = FreeRegs !Word32 !Word32
 


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
=====================================
@@ -17,7 +17,6 @@ import GHC.Platform.Reg.Class
 import GHC.CmmToAsm.Config
 import GHC.Utils.Panic
 import GHC.Platform
-import GHC.Stack
 
 -- -----------------------------------------------------------------------------
 -- The free register set
@@ -43,10 +42,10 @@ import qualified GHC.CmmToAsm.X86.Instr     as X86.Instr
 import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr
 
 class Show freeRegs => FR freeRegs where
-    frAllocateReg :: HasCallStack => Platform -> RealReg -> freeRegs -> freeRegs
-    frGetFreeRegs :: HasCallStack => Platform -> RegClass -> freeRegs -> [RealReg]
-    frInitFreeRegs :: HasCallStack => Platform -> freeRegs
-    frReleaseReg :: HasCallStack => Platform -> RealReg -> freeRegs -> freeRegs
+    frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs
+    frGetFreeRegs :: Platform -> RegClass -> freeRegs -> [RealReg]
+    frInitFreeRegs :: Platform -> freeRegs
+    frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs
 
 instance FR X86.FreeRegs where
     frAllocateReg  = \_ -> X86.allocateReg


=====================================
tests/compiler/cmm/primops_math.cmm
=====================================
@@ -104,5 +104,8 @@ main () {
     R3 = R1 `shra` R2;
     foreign "C" printf("%d shra %d => %d", R1, R2, R3);
 
+
+
+
     foreign "C" exit(0::I64);
 }
\ No newline at end of file



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/494802169966f93fd5dcaaa9237a093b16cca7cb...312e7d5e713d901f595df7ab875b8d10f0cb63ad

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/494802169966f93fd5dcaaa9237a093b16cca7cb...312e7d5e713d901f595df7ab875b8d10f0cb63ad
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200708/5bea9ef8/attachment-0001.html>


More information about the ghc-commits mailing list