[Git][ghc/ghc][master] Add AArch64 CLZ, CTZ, RBIT primop implementations.
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Tue Jun 4 16:09:46 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
71010381 by Alex Mason at 2024-06-04T12:09:07-04:00
Add AArch64 CLZ, CTZ, RBIT primop implementations.
Adds support for emitting the clz and rbit instructions, which are
used by GHC.Prim.clz*#, GHC.Prim.ctz*# and GHC.Prim.bitReverse*#.
- - - - -
5 changed files:
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- + testsuite/tests/codeGen/should_run/CtzClz0.hs
- testsuite/tests/codeGen/should_run/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1757,6 +1757,137 @@ genCCall target dest_regs arg_regs bid = do
truncateReg W64 w lo
, Nothing)
| otherwise -> unsupported (MO_U_Mul2 w)
+ PrimTarget (MO_Clz w)
+ | w == W64 || w == W32
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
+ return (
+ code_x `snocOL`
+ CLZ (OpReg w dst_reg) (OpReg w reg_a)
+ , Nothing)
+ | w == W16
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ imm n = OpImm (ImmInt n)
+ {- dst = clz(x << 16 | 0x0000_8000) -}
+ return (
+ code_x `appOL` toOL
+ [ LSL (r dst') (r reg_a) (imm 16)
+ , ORR (r dst') (r dst') (imm 0x00008000)
+ , CLZ (r dst') (r dst')
+ ]
+ , Nothing)
+ | w == W8
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ imm n = OpImm (ImmInt n)
+ {- dst = clz(x << 24 | 0x0080_0000) -}
+ return (
+ code_x `appOL` toOL
+ [ LSL (r dst') (r reg_a) (imm 24)
+ , ORR (r dst') (r dst') (imm 0x00800000)
+ , CLZ (r dst') (r dst')
+ ]
+ , Nothing)
+ | otherwise -> unsupported (MO_Clz w)
+ PrimTarget (MO_Ctz w)
+ | w == W64 || w == W32
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
+ return (
+ code_x `snocOL`
+ RBIT (OpReg w dst_reg) (OpReg w reg_a) `snocOL`
+ CLZ (OpReg w dst_reg) (OpReg w dst_reg)
+ , Nothing)
+ | w == W16
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ imm n = OpImm (ImmInt n)
+ {- dst = clz(reverseBits(x) | 0x0000_8000) -}
+ return (
+ code_x `appOL` toOL
+ [ RBIT (r dst') (r reg_a)
+ , ORR (r dst') (r dst') (imm 0x00008000)
+ , CLZ (r dst') (r dst')
+ ]
+ , Nothing)
+ | w == W8
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ imm n = OpImm (ImmInt n)
+ {- dst = clz(reverseBits(x) | 0x0080_0000) -}
+ return (
+ code_x `appOL` toOL
+ [ RBIT (r dst') (r reg_a)
+ , ORR (r dst') (r dst') (imm 0x00800000)
+ , CLZ (r dst') (r dst')
+ ]
+ , Nothing)
+ | otherwise -> unsupported (MO_Ctz w)
+ PrimTarget (MO_BRev w)
+ | w == W64 || w == W32
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst_reg = getRegisterReg platform (CmmLocal dst)
+ return (
+ code_x `snocOL`
+ RBIT (OpReg w dst_reg) (OpReg w reg_a)
+ , Nothing)
+ | w == W16
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ imm n = OpImm (ImmInt n)
+ {- dst = reverseBits32(x << 16) -}
+ return (
+ code_x `appOL` toOL
+ [ LSL (r dst') (r reg_a) (imm 16)
+ , RBIT (r dst') (r dst')
+ ]
+ , Nothing)
+ | w == W8
+ , [src] <- arg_regs
+ , [dst] <- dest_regs
+ -> do
+ (reg_a, _format_x, code_x) <- getSomeReg src
+ let dst' = getRegisterReg platform (CmmLocal dst)
+ r n = OpReg W32 n
+ imm n = OpImm (ImmInt n)
+ {- dst = reverseBits32(x << 24) -}
+ return (
+ code_x `appOL` toOL
+ [ LSL (r dst') (r reg_a) (imm 24)
+ , RBIT (r dst') (r dst')
+ ]
+ , Nothing)
+ | otherwise -> unsupported (MO_BRev w)
-- or a possibly side-effecting machine operation
@@ -1883,10 +2014,7 @@ genCCall target dest_regs arg_regs bid = do
MO_PopCnt w -> mkCCall (popCntLabel w)
MO_Pdep w -> mkCCall (pdepLabel w)
MO_Pext w -> mkCCall (pextLabel w)
- MO_Clz w -> mkCCall (clzLabel w)
- MO_Ctz w -> mkCCall (ctzLabel w)
MO_BSwap w -> mkCCall (bSwapLabel w)
- MO_BRev w -> mkCCall (bRevLabel w)
-- -- Atomic read-modify-write.
MO_AtomicRead w ord
=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -100,6 +100,8 @@ regUsageOfInstr platform instr = case instr of
UXTB dst src -> usage (regOp src, regOp dst)
SXTH dst src -> usage (regOp src, regOp dst)
UXTH dst src -> usage (regOp src, regOp dst)
+ CLZ dst src -> usage (regOp src, regOp dst)
+ RBIT dst src -> usage (regOp src, regOp dst)
-- 3. Logical and Move Instructions ------------------------------------------
AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -140,7 +142,8 @@ regUsageOfInstr platform instr = case instr of
FMA _ dst src1 src2 src3 ->
usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
- _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
+ LOCATION{} -> panic $ "regUsageOfInstr: " ++ instrCon instr
+ NEWBLOCK{} -> panic $ "regUsageOfInstr: " ++ instrCon instr
where
-- filtering the usage is necessary, otherwise the register
@@ -234,6 +237,8 @@ patchRegsOfInstr instr env = case instr of
UXTB o1 o2 -> UXTB (patchOp o1) (patchOp o2)
SXTH o1 o2 -> SXTH (patchOp o1) (patchOp o2)
UXTH o1 o2 -> UXTH (patchOp o1) (patchOp o2)
+ CLZ o1 o2 -> CLZ (patchOp o1) (patchOp o2)
+ RBIT o1 o2 -> RBIT (patchOp o1) (patchOp o2)
-- 3. Logical and Move Instructions ----------------------------------------
AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
@@ -276,7 +281,8 @@ patchRegsOfInstr instr env = case instr of
FMA s o1 o2 o3 o4 ->
FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
- _ -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+ NEWBLOCK{} -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+ LOCATION{} -> panic $ "patchRegsOfInstr: " ++ instrCon instr
where
patchOp :: Operand -> Operand
patchOp (OpReg w r) = OpReg w (env r)
@@ -591,6 +597,8 @@ data Instr
-- Signed/Unsigned bitfield extract
| SBFX Operand Operand Operand Operand -- rd = rn[i,j]
| UBFX Operand Operand Operand Operand -- rd = rn[i,j]
+ | CLZ Operand Operand -- rd = countLeadingZeros(rn)
+ | RBIT Operand Operand -- rd = reverseBits(rn)
-- 3. Logical and Move Instructions ----------------------------------------
| AND Operand Operand Operand -- rd = rn & op2
@@ -676,6 +684,8 @@ instrCon i =
UBFM{} -> "UBFM"
SBFX{} -> "SBFX"
UBFX{} -> "UBFX"
+ CLZ{} -> "CLZ"
+ RBIT{} -> "RBIT"
AND{} -> "AND"
ASR{} -> "ASR"
EOR{} -> "EOR"
=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -396,6 +396,8 @@ pprInstr platform instr = case instr of
-- 2. Bit Manipulation Instructions ------------------------------------------
SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
+ CLZ o1 o2 -> op2 (text "\tclz") o1 o2
+ RBIT o1 o2 -> op2 (text "\trbit") o1 o2
-- signed and unsigned bitfield extract
SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4
=====================================
testsuite/tests/codeGen/should_run/CtzClz0.hs
=====================================
@@ -0,0 +1,27 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE MagicHash #-}
+
+module Main where
+
+import GHC.Exts
+import Control.Monad
+
+#include <MachDeps.h>
+
+{-# OPAQUE x #-} -- needed to avoid triggering constant folding
+x :: Word
+x = 0
+
+main :: IO ()
+main = do
+ let !(W# w) = x
+
+ guard (W# (ctz# w) == WORD_SIZE_IN_BITS)
+ guard (W# (ctz8# w) == 8)
+ guard (W# (ctz16# w) == 16)
+ guard (W# (ctz32# w) == 32)
+
+ guard (W# (clz# w) == WORD_SIZE_IN_BITS)
+ guard (W# (clz8# w) == 8)
+ guard (W# (clz16# w) == 16)
+ guard (W# (clz32# w) == 32)
=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -246,3 +246,4 @@ test('T24295a', normal, compile_and_run, ['-O -floopification'])
test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])
test('T24664a', normal, compile_and_run, ['-O'])
test('T24664b', normal, compile_and_run, ['-O'])
+test('CtzClz0', normal, compile_and_run, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/71010381f4270966de334193ab2bfc67f8524212
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/71010381f4270966de334193ab2bfc67f8524212
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240604/c887495e/attachment-0001.html>
More information about the ghc-commits
mailing list