[Git][ghc/ghc][master] ncg(aarch64): Add fsqrt instruction, byteSwap primitives [#24956]
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Fri Jul 12 15:43:27 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
dee035bf by Alex Mason at 2024-07-12T11:42:41-04:00
ncg(aarch64): Add fsqrt instruction, byteSwap primitives [#24956]
Implements the FSQRT machop using native assembly rather than a C call.
Implements MO_BSwap by producing assembly to do the byte swapping
instead of producing a foreign call a C function.
In `tar`, the hot loop for `deserialise` got almost 4x faster by
avoiding the foreign call which caused spilling live variables to the
stack -- this means the loop did 4x more memory read/writing than
necessary in that particular case!
- - - - -
3 changed files:
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1607,9 +1607,20 @@ genCCall target dest_regs arg_regs = do
PrimTarget MO_F32_Fabs
| [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+ | otherwise -> panic "mal-formed MO_F32_Fabs"
PrimTarget MO_F64_Fabs
| [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
+ | otherwise -> panic "mal-formed MO_F64_Fabs"
+ PrimTarget MO_F32_Sqrt
+ | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
+ unaryFloatOp W32 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
+ | otherwise -> panic "mal-formed MO_F32_Sqrt"
+ PrimTarget MO_F64_Sqrt
+ | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
+ unaryFloatOp W64 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
+ | otherwise -> panic "mal-formed MO_F64_Sqrt"
+
PrimTarget (MO_S_Mul2 w)
-- Life is easier when we're working with word sized operands,
@@ -1864,7 +1875,25 @@ genCCall target dest_regs arg_regs = do
, RBIT (r dst') (r dst')
]
| otherwise -> unsupported (MO_BRev w)
-
+ PrimTarget (MO_BSwap w)
+ | w == W64 || w == W32
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
+ return $ code_x `snocOL` REV (OpReg w dst_reg) (OpReg w reg_a)
+ | w == W16
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ -- Swaps the bytes in each 16bit word
+ -- TODO: Expose the 32 & 64 bit version of this?
+ return $ code_x `snocOL` REV16 (r dst') (r reg_a)
+ | otherwise -> unsupported (MO_BSwap w)
-- or a possibly side-effecting machine operation
-- mop :: CallishMachOp (see GHC.Cmm.MachOp)
@@ -1894,8 +1923,6 @@ genCCall target dest_regs arg_regs = do
MO_F64_Log1P -> mkCCall "log1p"
MO_F64_Exp -> mkCCall "exp"
MO_F64_ExpM1 -> mkCCall "expm1"
- MO_F64_Fabs -> mkCCall "fabs"
- MO_F64_Sqrt -> mkCCall "sqrt"
-- 32 bit float ops
MO_F32_Pwr -> mkCCall "powf"
@@ -1916,8 +1943,6 @@ genCCall target dest_regs arg_regs = do
MO_F32_Log1P -> mkCCall "log1pf"
MO_F32_Exp -> mkCCall "expf"
MO_F32_ExpM1 -> mkCCall "expm1f"
- MO_F32_Fabs -> mkCCall "fabsf"
- MO_F32_Sqrt -> mkCCall "sqrtf"
-- 64-bit primops
MO_I64_ToI -> mkCCall "hs_int64ToInt"
@@ -1990,7 +2015,6 @@ genCCall target dest_regs arg_regs = do
MO_PopCnt w -> mkCCall (popCntLabel w)
MO_Pdep w -> mkCCall (pdepLabel w)
MO_Pext w -> mkCCall (pextLabel w)
- MO_BSwap w -> mkCCall (bSwapLabel w)
-- -- Atomic read-modify-write.
MO_AtomicRead w ord
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -102,6 +102,9 @@ regUsageOfInstr platform instr = case instr of
UXTH dst src -> usage (regOp src, regOp dst)
CLZ dst src -> usage (regOp src, regOp dst)
RBIT dst src -> usage (regOp src, regOp dst)
+ REV dst src -> usage (regOp src, regOp dst)
+ -- REV32 dst src -> usage (regOp src, regOp dst)
+ REV16 dst src -> usage (regOp src, regOp dst)
-- 3. Logical and Move Instructions ------------------------------------------
AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -139,6 +142,7 @@ regUsageOfInstr platform instr = case instr of
SCVTF dst src -> usage (regOp src, regOp dst)
FCVTZS dst src -> usage (regOp src, regOp dst)
FABS dst src -> usage (regOp src, regOp dst)
+ FSQRT dst src -> usage (regOp src, regOp dst)
FMA _ dst src1 src2 src3 ->
usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
@@ -237,7 +241,11 @@ patchRegsOfInstr instr env = case instr of
SXTH o1 o2 -> SXTH (patchOp o1) (patchOp o2)
UXTH o1 o2 -> UXTH (patchOp o1) (patchOp o2)
CLZ o1 o2 -> CLZ (patchOp o1) (patchOp o2)
- RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
+ RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
+ REV o1 o2 -> REV (patchOp o1) (patchOp o2)
+ -- REV32 o1 o2 -> REV32 (patchOp o1) (patchOp o2)
+ REV16 o1 o2 -> REV16 (patchOp o1) (patchOp o2)
+
-- 3. Logical and Move Instructions ----------------------------------------
AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
@@ -277,6 +285,7 @@ patchRegsOfInstr instr env = case instr of
SCVTF o1 o2 -> SCVTF (patchOp o1) (patchOp o2)
FCVTZS o1 o2 -> FCVTZS (patchOp o1) (patchOp o2)
FABS o1 o2 -> FABS (patchOp o1) (patchOp o2)
+ FSQRT o1 o2 -> FSQRT (patchOp o1) (patchOp o2)
FMA s o1 o2 o3 o4 ->
FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
@@ -598,6 +607,12 @@ data Instr
| UBFX Operand Operand Operand Operand -- rd = rn[i,j]
| CLZ Operand Operand -- rd = countLeadingZeros(rn)
| RBIT Operand Operand -- rd = reverseBits(rn)
+ | REV Operand Operand -- rd = reverseBytes(rn): (for 32 & 64 bit operands)
+ -- 0xAABBCCDD -> 0xDDCCBBAA
+ | REV16 Operand Operand -- rd = reverseBytes16(rn)
+ -- 0xAABB_CCDD -> xBBAA_DDCC
+ -- | REV32 Operand Operand -- rd = reverseBytes32(rn) - 64bit operands only!
+ -- -- 0xAABBCCDD_EEFFGGHH -> 0XDDCCBBAA_HHGGFFEE
-- 3. Logical and Move Instructions ----------------------------------------
| AND Operand Operand Operand -- rd = rn & op2
@@ -642,6 +657,8 @@ data Instr
| FCVTZS Operand Operand
-- Float ABSolute value
| FABS Operand Operand
+ -- Float SQuare RooT
+ | FSQRT Operand Operand
-- | Floating-point fused multiply-add instructions
--
@@ -685,6 +702,9 @@ instrCon i =
UBFX{} -> "UBFX"
CLZ{} -> "CLZ"
RBIT{} -> "RBIT"
+ REV{} -> "REV"
+ REV16{} -> "REV16"
+ -- REV32{} -> "REV32"
AND{} -> "AND"
ASR{} -> "ASR"
EOR{} -> "EOR"
@@ -712,6 +732,7 @@ instrCon i =
SCVTF{} -> "SCVTF"
FCVTZS{} -> "FCVTZS"
FABS{} -> "FABS"
+ FSQRT{} -> "FSQRT"
FMA variant _ _ _ _ ->
case variant of
FMAdd -> "FMADD"
=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -398,6 +398,9 @@ pprInstr platform instr = case instr of
UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
CLZ o1 o2 -> op2 (text "\tclz") o1 o2
RBIT o1 o2 -> op2 (text "\trbit") o1 o2
+ REV o1 o2 -> op2 (text "\trev") o1 o2
+ REV16 o1 o2 -> op2 (text "\trev16") o1 o2
+ -- REV32 o1 o2 -> op2 (text "\trev32") o1 o2
-- signed and unsigned bitfield extract
SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4
@@ -531,6 +534,7 @@ pprInstr platform instr = case instr of
SCVTF o1 o2 -> op2 (text "\tscvtf") o1 o2
FCVTZS o1 o2 -> op2 (text "\tfcvtzs") o1 o2
FABS o1 o2 -> op2 (text "\tfabs") o1 o2
+ FSQRT o1 o2 -> op2 (text "\tfsqrt") o1 o2
FMA variant d r1 r2 r3 ->
let fma = case variant of
FMAdd -> text "\tfmadd"
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dee035bf618d75a18fe72dd3977434c0749a2156
--
This project does not include diff previews in email notifications.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dee035bf618d75a18fe72dd3977434c0749a2156
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240712/3a8a15bd/attachment-0001.html>
More information about the ghc-commits
mailing list