[Git][ghc/ghc][wip/romes/bswap] ncg(aarch64): Implement MO_BSwap using REV

Rodrigo Mesquita (@alt-romes) gitlab at gitlab.haskell.org
Fri Jun 14 15:21:34 UTC 2024



Rodrigo Mesquita pushed to branch wip/romes/bswap at Glasgow Haskell Compiler / GHC


Commits:
9b65c166 by Rodrigo Mesquita at 2024-06-14T16:20:52+01:00
ncg(aarch64): Implement MO_BSwap using REV

Implements MO_BSwap by producing assembly to do the byte swapping
instead of producing a foreign call a C function.

In `tar`, the hot loop for `deserialise` got almost 4x faster by
avoiding the foreign call which caused spilling live variables to the
stack -- this means the loop did 4x more memory read/writing than
necessary in that particular case!

- - - - -


4 changed files:

- compiler/GHC/Cmm/Parser.y
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs


Changes:

=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -1147,10 +1147,12 @@ callishMachOps platform = listToUFM $
         ( "prefetch1", (MO_Prefetch_Data 1,)),
         ( "prefetch2", (MO_Prefetch_Data 2,)),
         ( "prefetch3", (MO_Prefetch_Data 3,))
+
     ] ++ concat
     [ allWidths "popcnt" MO_PopCnt
     , allWidths "pdep" MO_Pdep
     , allWidths "pext" MO_Pext
+    , allWidths "bswap" MO_BSwap
     , allWidths "cmpxchg" MO_Cmpxchg
     , allWidths "xchg" MO_Xchg
     , allWidths "load_relaxed" (\w -> MO_AtomicRead w MemOrderAcquire)


=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1546,7 +1546,7 @@ genCondBranch _ true false expr = do
 -- range within 64bit.
 
 genCCall
-    :: ForeignTarget      -- function to call
+    :: ForeignTarget      -- function to call (or primop)
     -> [CmmFormal]        -- where to put the result
     -> [CmmActual]        -- arguments (of mixed type)
     -> BlockId            -- The block we are in
@@ -2014,7 +2014,15 @@ genCCall target dest_regs arg_regs bid = do
         MO_PopCnt w         -> mkCCall (popCntLabel w)
         MO_Pdep w           -> mkCCall (pdepLabel w)
         MO_Pext w           -> mkCCall (pextLabel w)
-        MO_BSwap w          -> mkCCall (bSwapLabel w)
+        MO_BSwap w
+          | [src_reg] <- arg_regs
+          , [dst_reg] <- dest_regs -> do
+              (src, _fmt_p, code_p) <- getSomeReg src_reg
+              platform <- getPlatform
+              let dst = getRegisterReg platform (CmmLocal dst_reg)
+                  code = code_p `snocOL` REV (OpReg w dst) (OpReg w src)
+              return (code, Nothing)
+          | otherwise -> panic "mal-formed ByteSwap"
 
         -- -- Atomic read-modify-write.
         MO_AtomicRead w ord


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -102,6 +102,7 @@ regUsageOfInstr platform instr = case instr of
   UXTH dst src             -> usage (regOp src, regOp dst)
   CLZ  dst src             -> usage (regOp src, regOp dst)
   RBIT dst src             -> usage (regOp src, regOp dst)
+  REV  dst src             -> usage (regOp src, regOp dst)
   -- 3. Logical and Move Instructions ------------------------------------------
   AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   ASR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -238,7 +239,8 @@ patchRegsOfInstr instr env = case instr of
     SXTH o1 o2       -> SXTH (patchOp o1) (patchOp o2)
     UXTH o1 o2       -> UXTH (patchOp o1) (patchOp o2)
     CLZ o1 o2        -> CLZ  (patchOp o1) (patchOp o2)
-    RBIT o1 o2       -> RBIT  (patchOp o1) (patchOp o2)
+    RBIT o1 o2       -> RBIT (patchOp o1) (patchOp o2)
+    REV o1 o2        -> REV  (patchOp o1) (patchOp o2)
 
     -- 3. Logical and Move Instructions ----------------------------------------
     AND o1 o2 o3   -> AND  (patchOp o1) (patchOp o2) (patchOp o3)
@@ -599,6 +601,7 @@ data Instr
     | UBFX Operand Operand Operand Operand -- rd = rn[i,j]
     | CLZ  Operand Operand -- rd = countLeadingZeros(rn)
     | RBIT Operand Operand -- rd = reverseBits(rn)
+    | REV  Operand Operand -- rd = bswap(rn)
 
     -- 3. Logical and Move Instructions ----------------------------------------
     | AND Operand Operand Operand -- rd = rn & op2
@@ -686,6 +689,7 @@ instrCon i =
       UBFX{} -> "UBFX"
       CLZ{} -> "CLZ"
       RBIT{} -> "RBIT"
+      REV{} -> "REV"
       AND{} -> "AND"
       ASR{} -> "ASR"
       EOR{} -> "EOR"


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -397,7 +397,12 @@ pprInstr platform instr = case instr of
   SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
   UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
   CLZ  o1 o2       -> op2 (text "\tclz")  o1 o2
-  RBIT  o1 o2      -> op2 (text "\trbit")  o1 o2
+  RBIT o1 o2       -> op2 (text "\trbit")  o1 o2
+  REV  (OpReg W8  (RegReal (RealRegSingle i))) _ | i < 32 ->
+    {- swapping a single byte is a no-op -} empty
+  REV  o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 ->
+                      op2 (text "\trev16") o1 o2
+  REV  o1 o2       -> op2 (text "\trev")   o1 o2
   -- signed and unsigned bitfield extract
   SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
   UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9b65c16696995c9c930c18467a9ef0f675a85282

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9b65c16696995c9c930c18467a9ef0f675a85282
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240614/1f26afa7/attachment-0001.html>


More information about the ghc-commits mailing list