[Git][ghc/ghc][wip/romes/bswap] ncg(aarch64): Implement MO_BSwap using REV
Rodrigo Mesquita (@alt-romes)
gitlab at gitlab.haskell.org
Fri Jun 14 15:21:34 UTC 2024
Rodrigo Mesquita pushed to branch wip/romes/bswap at Glasgow Haskell Compiler / GHC
Commits:
9b65c166 by Rodrigo Mesquita at 2024-06-14T16:20:52+01:00
ncg(aarch64): Implement MO_BSwap using REV
Implements MO_BSwap by producing assembly to do the byte swapping
instead of producing a foreign call a C function.
In `tar`, the hot loop for `deserialise` got almost 4x faster by
avoiding the foreign call which caused spilling live variables to the
stack -- this means the loop did 4x more memory read/writing than
necessary in that particular case!
- - - - -
4 changed files:
- compiler/GHC/Cmm/Parser.y
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
Changes:
=====================================
compiler/GHC/Cmm/Parser.y
=====================================
@@ -1147,10 +1147,12 @@ callishMachOps platform = listToUFM $
( "prefetch1", (MO_Prefetch_Data 1,)),
( "prefetch2", (MO_Prefetch_Data 2,)),
( "prefetch3", (MO_Prefetch_Data 3,))
+
] ++ concat
[ allWidths "popcnt" MO_PopCnt
, allWidths "pdep" MO_Pdep
, allWidths "pext" MO_Pext
+ , allWidths "bswap" MO_BSwap
, allWidths "cmpxchg" MO_Cmpxchg
, allWidths "xchg" MO_Xchg
, allWidths "load_relaxed" (\w -> MO_AtomicRead w MemOrderAcquire)
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1546,7 +1546,7 @@ genCondBranch _ true false expr = do
-- range within 64bit.
genCCall
- :: ForeignTarget -- function to call
+ :: ForeignTarget -- function to call (or primop)
-> [CmmFormal] -- where to put the result
-> [CmmActual] -- arguments (of mixed type)
-> BlockId -- The block we are in
@@ -2014,7 +2014,15 @@ genCCall target dest_regs arg_regs bid = do
MO_PopCnt w -> mkCCall (popCntLabel w)
MO_Pdep w -> mkCCall (pdepLabel w)
MO_Pext w -> mkCCall (pextLabel w)
- MO_BSwap w -> mkCCall (bSwapLabel w)
+ MO_BSwap w
+ | [src_reg] <- arg_regs
+ , [dst_reg] <- dest_regs -> do
+ (src, _fmt_p, code_p) <- getSomeReg src_reg
+ platform <- getPlatform
+ let dst = getRegisterReg platform (CmmLocal dst_reg)
+ code = code_p `snocOL` REV (OpReg w dst) (OpReg w src)
+ return (code, Nothing)
+ | otherwise -> panic "mal-formed ByteSwap"
-- -- Atomic read-modify-write.
MO_AtomicRead w ord
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -102,6 +102,7 @@ regUsageOfInstr platform instr = case instr of
UXTH dst src -> usage (regOp src, regOp dst)
CLZ dst src -> usage (regOp src, regOp dst)
RBIT dst src -> usage (regOp src, regOp dst)
+ REV dst src -> usage (regOp src, regOp dst)
-- 3. Logical and Move Instructions ------------------------------------------
AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -238,7 +239,8 @@ patchRegsOfInstr instr env = case instr of
SXTH o1 o2 -> SXTH (patchOp o1) (patchOp o2)
UXTH o1 o2 -> UXTH (patchOp o1) (patchOp o2)
CLZ o1 o2 -> CLZ (patchOp o1) (patchOp o2)
- RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
+ RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
+ REV o1 o2 -> REV (patchOp o1) (patchOp o2)
-- 3. Logical and Move Instructions ----------------------------------------
AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
@@ -599,6 +601,7 @@ data Instr
| UBFX Operand Operand Operand Operand -- rd = rn[i,j]
| CLZ Operand Operand -- rd = countLeadingZeros(rn)
| RBIT Operand Operand -- rd = reverseBits(rn)
+ | REV Operand Operand -- rd = bswap(rn)
-- 3. Logical and Move Instructions ----------------------------------------
| AND Operand Operand Operand -- rd = rn & op2
@@ -686,6 +689,7 @@ instrCon i =
UBFX{} -> "UBFX"
CLZ{} -> "CLZ"
RBIT{} -> "RBIT"
+ REV{} -> "REV"
AND{} -> "AND"
ASR{} -> "ASR"
EOR{} -> "EOR"
=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -397,7 +397,12 @@ pprInstr platform instr = case instr of
SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
CLZ o1 o2 -> op2 (text "\tclz") o1 o2
- RBIT o1 o2 -> op2 (text "\trbit") o1 o2
+ RBIT o1 o2 -> op2 (text "\trbit") o1 o2
+ REV (OpReg W8 (RegReal (RealRegSingle i))) _ | i < 32 ->
+ {- swapping a single byte is a no-op -} empty
+ REV o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 ->
+ op2 (text "\trev16") o1 o2
+ REV o1 o2 -> op2 (text "\trev") o1 o2
-- signed and unsigned bitfield extract
SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9b65c16696995c9c930c18467a9ef0f675a85282
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/9b65c16696995c9c930c18467a9ef0f675a85282
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240614/1f26afa7/attachment-0001.html>
More information about the ghc-commits
mailing list