[Git][ghc/ghc][master] Add AArch64 CLZ, CTZ, RBIT primop implementations.

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Tue Jun 4 16:09:46 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
71010381 by Alex Mason at 2024-06-04T12:09:07-04:00
Add AArch64 CLZ, CTZ, RBIT primop implementations.

Adds support for emitting the clz and rbit instructions, which are
used by GHC.Prim.clz*#, GHC.Prim.ctz*# and GHC.Prim.bitReverse*#.

- - - - -


5 changed files:

- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- + testsuite/tests/codeGen/should_run/CtzClz0.hs
- testsuite/tests/codeGen/should_run/all.T


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1757,6 +1757,137 @@ genCCall target dest_regs arg_regs bid = do
                   truncateReg W64 w lo
                   , Nothing)
           | otherwise -> unsupported (MO_U_Mul2  w)
+    PrimTarget (MO_Clz  w)
+          | w == W64 || w == W32
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst_reg = getRegisterReg platform (CmmLocal dst)
+              return (
+                  code_x `snocOL`
+                  CLZ   (OpReg w dst_reg) (OpReg w reg_a)
+                  , Nothing)
+          | w == W16
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(x << 16 | 0x0000_8000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL (r dst') (r reg_a) (imm 16)
+                    , ORR (r dst') (r dst')  (imm 0x00008000)
+                    , CLZ (r dst') (r dst')
+                    ]
+                  , Nothing)
+          | w == W8
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(x << 24 | 0x0080_0000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL (r dst') (r reg_a) (imm 24)
+                    , ORR (r dst') (r dst')  (imm 0x00800000)
+                    , CLZ (r dst') (r dst')
+                    ]
+                  , Nothing)
+            | otherwise -> unsupported (MO_Clz  w)
+    PrimTarget (MO_Ctz  w)
+          | w == W64 || w == W32
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst_reg = getRegisterReg platform (CmmLocal dst)
+              return (
+                  code_x `snocOL`
+                  RBIT (OpReg w dst_reg) (OpReg w reg_a) `snocOL`
+                  CLZ  (OpReg w dst_reg) (OpReg w dst_reg)
+                  , Nothing)
+          | w == W16
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(reverseBits(x) | 0x0000_8000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ RBIT (r dst') (r reg_a)
+                    , ORR  (r dst') (r dst') (imm 0x00008000)
+                    , CLZ  (r dst') (r dst')
+                    ]
+                  , Nothing)
+          | w == W8
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(reverseBits(x) | 0x0080_0000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ RBIT (r dst') (r reg_a)
+                    , ORR (r dst')  (r dst') (imm 0x00800000)
+                    , CLZ  (r dst')  (r dst')
+                    ]
+                  , Nothing)
+            | otherwise -> unsupported (MO_Ctz  w)
+    PrimTarget (MO_BRev  w)
+          | w == W64 || w == W32
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst_reg = getRegisterReg platform (CmmLocal dst)
+              return (
+                  code_x `snocOL`
+                  RBIT (OpReg w dst_reg) (OpReg w reg_a)
+                  , Nothing)
+          | w == W16
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = reverseBits32(x << 16) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL  (r dst') (r reg_a) (imm 16)
+                    , RBIT (r dst') (r dst')
+                    ]
+                  , Nothing)
+          | w == W8
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = reverseBits32(x << 24) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL  (r dst') (r reg_a) (imm 24)
+                    , RBIT (r dst') (r dst')
+                    ]
+                  , Nothing)
+            | otherwise -> unsupported (MO_BRev  w)
 
 
     -- or a possibly side-effecting machine operation
@@ -1883,10 +2014,7 @@ genCCall target dest_regs arg_regs bid = do
         MO_PopCnt w         -> mkCCall (popCntLabel w)
         MO_Pdep w           -> mkCCall (pdepLabel w)
         MO_Pext w           -> mkCCall (pextLabel w)
-        MO_Clz w            -> mkCCall (clzLabel w)
-        MO_Ctz w            -> mkCCall (ctzLabel w)
         MO_BSwap w          -> mkCCall (bSwapLabel w)
-        MO_BRev w           -> mkCCall (bRevLabel w)
 
         -- -- Atomic read-modify-write.
         MO_AtomicRead w ord


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -100,6 +100,8 @@ regUsageOfInstr platform instr = case instr of
   UXTB dst src             -> usage (regOp src, regOp dst)
   SXTH dst src             -> usage (regOp src, regOp dst)
   UXTH dst src             -> usage (regOp src, regOp dst)
+  CLZ  dst src             -> usage (regOp src, regOp dst)
+  RBIT dst src             -> usage (regOp src, regOp dst)
   -- 3. Logical and Move Instructions ------------------------------------------
   AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   ASR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -140,7 +142,8 @@ regUsageOfInstr platform instr = case instr of
   FMA _ dst src1 src2 src3 ->
     usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
 
-  _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
+  LOCATION{} -> panic $ "regUsageOfInstr: " ++ instrCon instr
+  NEWBLOCK{} -> panic $ "regUsageOfInstr: " ++ instrCon instr
 
   where
         -- filtering the usage is necessary, otherwise the register
@@ -234,6 +237,8 @@ patchRegsOfInstr instr env = case instr of
     UXTB o1 o2       -> UXTB (patchOp o1) (patchOp o2)
     SXTH o1 o2       -> SXTH (patchOp o1) (patchOp o2)
     UXTH o1 o2       -> UXTH (patchOp o1) (patchOp o2)
+    CLZ o1 o2        -> CLZ  (patchOp o1) (patchOp o2)
+    RBIT o1 o2       -> RBIT  (patchOp o1) (patchOp o2)
 
     -- 3. Logical and Move Instructions ----------------------------------------
     AND o1 o2 o3   -> AND  (patchOp o1) (patchOp o2) (patchOp o3)
@@ -276,7 +281,8 @@ patchRegsOfInstr instr env = case instr of
     FMA s o1 o2 o3 o4 ->
       FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
 
-    _              -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+    NEWBLOCK{}     -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+    LOCATION{}     -> panic $ "patchRegsOfInstr: " ++ instrCon instr
     where
         patchOp :: Operand -> Operand
         patchOp (OpReg w r) = OpReg w (env r)
@@ -591,6 +597,8 @@ data Instr
     -- Signed/Unsigned bitfield extract
     | SBFX Operand Operand Operand Operand -- rd = rn[i,j]
     | UBFX Operand Operand Operand Operand -- rd = rn[i,j]
+    | CLZ  Operand Operand -- rd = countLeadingZeros(rn)
+    | RBIT Operand Operand -- rd = reverseBits(rn)
 
     -- 3. Logical and Move Instructions ----------------------------------------
     | AND Operand Operand Operand -- rd = rn & op2
@@ -676,6 +684,8 @@ instrCon i =
       UBFM{} -> "UBFM"
       SBFX{} -> "SBFX"
       UBFX{} -> "UBFX"
+      CLZ{} -> "CLZ"
+      RBIT{} -> "RBIT"
       AND{} -> "AND"
       ASR{} -> "ASR"
       EOR{} -> "EOR"


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -396,6 +396,8 @@ pprInstr platform instr = case instr of
   -- 2. Bit Manipulation Instructions ------------------------------------------
   SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
   UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
+  CLZ  o1 o2       -> op2 (text "\tclz")  o1 o2
+  RBIT  o1 o2      -> op2 (text "\trbit")  o1 o2
   -- signed and unsigned bitfield extract
   SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
   UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4


=====================================
testsuite/tests/codeGen/should_run/CtzClz0.hs
=====================================
@@ -0,0 +1,27 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE MagicHash #-}
+
+module Main where
+
+import GHC.Exts
+import Control.Monad
+
+#include <MachDeps.h>
+
+{-# OPAQUE x #-} -- needed to avoid triggering constant folding
+x :: Word
+x = 0
+
+main :: IO ()
+main = do
+  let !(W# w) = x
+
+  guard (W# (ctz# w) == WORD_SIZE_IN_BITS)
+  guard (W# (ctz8# w) == 8)
+  guard (W# (ctz16# w) == 16)
+  guard (W# (ctz32# w) == 32)
+
+  guard (W# (clz# w) == WORD_SIZE_IN_BITS)
+  guard (W# (clz8# w) == 8)
+  guard (W# (clz16# w) == 16)
+  guard (W# (clz32# w) == 32)


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -246,3 +246,4 @@ test('T24295a', normal, compile_and_run, ['-O -floopification'])
 test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])
 test('T24664a', normal, compile_and_run, ['-O'])
 test('T24664b', normal, compile_and_run, ['-O'])
+test('CtzClz0', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/71010381f4270966de334193ab2bfc67f8524212

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/71010381f4270966de334193ab2bfc67f8524212
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240604/c887495e/attachment-0001.html>


More information about the ghc-commits mailing list