[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 6 commits: Add AArch64 CLZ, CTZ, RBIT primop implementations.

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Tue Jun 4 16:40:53 UTC 2024



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
71010381 by Alex Mason at 2024-06-04T12:09:07-04:00
Add AArch64 CLZ, CTZ, RBIT primop implementations.

Adds support for emitting the clz and rbit instructions, which are
used by GHC.Prim.clz*#, GHC.Prim.ctz*# and GHC.Prim.bitReverse*#.

- - - - -
44e2abfb by Cheng Shao at 2024-06-04T12:09:43-04:00
hadrian: add +text_simdutf flavour transformer to allow building text with simdutf

This patch adds a +text_simdutf flavour transformer to hadrian to
allow downstream packagers and users that build from source to opt-in
simdutf support for text, in order to benefit from SIMD speedup at
run-time. It's still disabled by default for the time being.

- - - - -
077cb2e1 by Cheng Shao at 2024-06-04T12:09:43-04:00
ci: enable +text_simdutf flavour transformer for wasm jobs

This commit enables +text_simdutf flavour transformer for wasm jobs,
so text is now built with simdutf support for wasm.

- - - - -
08026887 by Teo Camarasu at 2024-06-04T12:40:36-04:00
base: Use TemplateHaskellQuotes in instance Lift ByteArray

Resolves #24852

- - - - -
fad1c253 by Teo Camarasu at 2024-06-04T12:40:36-04:00
base: Mark addrToByteArray as NOINLINE

This function should never be inlined in order to keep code size small.

- - - - -
7541e204 by Cheng Shao at 2024-06-04T12:40:37-04:00
compiler: remove unused CompilerInfo/LinkerInfo types

This patch removes CompilerInfo/LinkerInfo types from the compiler
since they aren't actually used anywhere.

- - - - -


16 changed files:

- .gitlab/generate-ci/gen_ci.hs
- .gitlab/jobs.yaml
- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
- compiler/GHC/CmmToAsm/AArch64/Instr.hs
- compiler/GHC/CmmToAsm/AArch64/Ppr.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Session.hs
- hadrian/doc/flavours.md
- hadrian/src/Flavour.hs
- hadrian/src/Flavour/Type.hs
- hadrian/src/Rules/Register.hs
- hadrian/src/Settings/Default.hs
- hadrian/src/Settings/Packages.hs
- libraries/base/src/Data/Array/Byte.hs
- + testsuite/tests/codeGen/should_run/CtzClz0.hs
- testsuite/tests/codeGen/should_run/all.T


Changes:

=====================================
.gitlab/generate-ci/gen_ci.hs
=====================================
@@ -153,6 +153,7 @@ data BuildConfig
                 , threadSanitiser :: Bool
                 , noSplitSections :: Bool
                 , validateNonmovingGc :: Bool
+                , textWithSIMDUTF :: Bool
                 }
 
 -- Extra arguments to pass to ./configure due to the BuildConfig
@@ -174,7 +175,8 @@ mkJobFlavour BuildConfig{..} = Flavour buildFlavour opts
            [FullyStatic | fullyStatic] ++
            [ThreadSanitiser | threadSanitiser] ++
            [NoSplitSections | noSplitSections, buildFlavour == Release ] ++
-           [BootNonmovingGc | validateNonmovingGc ]
+           [BootNonmovingGc | validateNonmovingGc ] ++
+           [TextWithSIMDUTF | textWithSIMDUTF]
 
 data Flavour = Flavour BaseFlavour [FlavourTrans]
 
@@ -185,6 +187,7 @@ data FlavourTrans =
     | ThreadSanitiser
     | NoSplitSections
     | BootNonmovingGc
+    | TextWithSIMDUTF
 
 data BaseFlavour = Release | Validate | SlowValidate deriving Eq
 
@@ -211,6 +214,7 @@ vanilla = BuildConfig
   , threadSanitiser = False
   , noSplitSections = False
   , validateNonmovingGc = False
+  , textWithSIMDUTF = False
   }
 
 splitSectionsBroken :: BuildConfig -> BuildConfig
@@ -344,6 +348,7 @@ flavourString (Flavour base trans) = base_string base ++ concatMap (("+" ++) . f
     flavour_string ThreadSanitiser = "thread_sanitizer_cmm"
     flavour_string NoSplitSections = "no_split_sections"
     flavour_string BootNonmovingGc = "boot_nonmoving_gc"
+    flavour_string TextWithSIMDUTF = "text_simdutf"
 
 -- The path to the docker image (just for linux builders)
 dockerImage :: Arch -> Opsys -> Maybe String
@@ -1057,6 +1062,7 @@ job_groups =
         {
           fullyStatic = True
           , buildFlavour     = Release -- TODO: This needs to be validate but wasm backend doesn't pass yet
+          , textWithSIMDUTF = True
         }
 
 
@@ -1082,10 +1088,10 @@ platform_mapping = Map.map go combined_result
                 , "x86_64-linux-fedora33-release"
                 , "x86_64-linux-deb11-cross_aarch64-linux-gnu-validate"
                 , "x86_64-windows-validate"
-                , "nightly-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static"
+                , "nightly-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf"
                 , "nightly-x86_64-linux-deb11-validate"
                 , "nightly-x86_64-linux-deb12-validate"
-                , "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static"
+                , "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf"
                 , "x86_64-linux-deb12-validate+thread_sanitizer_cmm"
                 , "nightly-aarch64-linux-deb10-validate"
                 , "nightly-x86_64-linux-alpine3_12-validate"


=====================================
.gitlab/jobs.yaml
=====================================
@@ -951,7 +951,7 @@
       "XZ_OPT": "-9"
     }
   },
-  "nightly-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static": {
+  "nightly-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
       ".gitlab/ci.sh save_test_output",
@@ -962,7 +962,7 @@
     "artifacts": {
       "expire_in": "8 weeks",
       "paths": [
-        "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static.tar.xz",
+        "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf.tar.xz",
         "junit.xml",
         "unexpected-test-output.tar.gz"
       ],
@@ -1005,17 +1005,17 @@
     ],
     "variables": {
       "BIGNUM_BACKEND": "gmp",
-      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static",
-      "BUILD_FLAVOUR": "release+fully_static",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf",
+      "BUILD_FLAVOUR": "release+fully_static+text_simdutf",
       "CONFIGURE_ARGS": "--with-intree-gmp --with-system-libffi --enable-strict-ghc-toolchain-check",
       "CROSS_TARGET": "wasm32-wasi",
       "HADRIAN_ARGS": "--docs=none",
       "RUNTEST_ARGS": "",
-      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static",
+      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf",
       "XZ_OPT": "-9"
     }
   },
-  "nightly-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static": {
+  "nightly-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
       ".gitlab/ci.sh save_test_output",
@@ -1026,7 +1026,7 @@
     "artifacts": {
       "expire_in": "8 weeks",
       "paths": [
-        "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static.tar.xz",
+        "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf.tar.xz",
         "junit.xml",
         "unexpected-test-output.tar.gz"
       ],
@@ -1069,17 +1069,17 @@
     ],
     "variables": {
       "BIGNUM_BACKEND": "native",
-      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static",
-      "BUILD_FLAVOUR": "release+fully_static",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf",
+      "BUILD_FLAVOUR": "release+fully_static+text_simdutf",
       "CONFIGURE_ARGS": "--with-intree-gmp --with-system-libffi --enable-strict-ghc-toolchain-check",
       "CROSS_TARGET": "wasm32-wasi",
       "HADRIAN_ARGS": "--docs=none",
       "RUNTEST_ARGS": "",
-      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static",
+      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf",
       "XZ_OPT": "-9"
     }
   },
-  "nightly-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static": {
+  "nightly-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
       ".gitlab/ci.sh save_test_output",
@@ -1090,7 +1090,7 @@
     "artifacts": {
       "expire_in": "8 weeks",
       "paths": [
-        "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static.tar.xz",
+        "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf.tar.xz",
         "junit.xml",
         "unexpected-test-output.tar.gz"
       ],
@@ -1133,13 +1133,13 @@
     ],
     "variables": {
       "BIGNUM_BACKEND": "gmp",
-      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static",
-      "BUILD_FLAVOUR": "release+fully_static",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf",
+      "BUILD_FLAVOUR": "release+fully_static+text_simdutf",
       "CONFIGURE_ARGS": "--enable-unregisterised --with-intree-gmp --with-system-libffi --enable-strict-ghc-toolchain-check",
       "CROSS_TARGET": "wasm32-wasi",
       "HADRIAN_ARGS": "--docs=none",
       "RUNTEST_ARGS": "",
-      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static",
+      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf",
       "XZ_OPT": "-9"
     }
   },
@@ -4465,7 +4465,7 @@
       "TEST_ENV": "x86_64-linux-alpine3_12-validate+fully_static"
     }
   },
-  "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static": {
+  "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
       ".gitlab/ci.sh save_test_output",
@@ -4476,7 +4476,7 @@
     "artifacts": {
       "expire_in": "2 weeks",
       "paths": [
-        "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static.tar.xz",
+        "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf.tar.xz",
         "junit.xml",
         "unexpected-test-output.tar.gz"
       ],
@@ -4519,16 +4519,16 @@
     ],
     "variables": {
       "BIGNUM_BACKEND": "gmp",
-      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static",
-      "BUILD_FLAVOUR": "release+fully_static",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf",
+      "BUILD_FLAVOUR": "release+fully_static+text_simdutf",
       "CONFIGURE_ARGS": "--with-intree-gmp --with-system-libffi --enable-strict-ghc-toolchain-check",
       "CROSS_TARGET": "wasm32-wasi",
       "HADRIAN_ARGS": "--docs=none",
       "RUNTEST_ARGS": "",
-      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static"
+      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-cross_wasm32-wasi-release+fully_static+text_simdutf"
     }
   },
-  "x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static": {
+  "x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
       ".gitlab/ci.sh save_test_output",
@@ -4539,7 +4539,7 @@
     "artifacts": {
       "expire_in": "2 weeks",
       "paths": [
-        "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static.tar.xz",
+        "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf.tar.xz",
         "junit.xml",
         "unexpected-test-output.tar.gz"
       ],
@@ -4583,16 +4583,16 @@
     ],
     "variables": {
       "BIGNUM_BACKEND": "native",
-      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static",
-      "BUILD_FLAVOUR": "release+fully_static",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf",
+      "BUILD_FLAVOUR": "release+fully_static+text_simdutf",
       "CONFIGURE_ARGS": "--with-intree-gmp --with-system-libffi --enable-strict-ghc-toolchain-check",
       "CROSS_TARGET": "wasm32-wasi",
       "HADRIAN_ARGS": "--docs=none",
       "RUNTEST_ARGS": "",
-      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static"
+      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-int_native-cross_wasm32-wasi-release+fully_static+text_simdutf"
     }
   },
-  "x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static": {
+  "x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
       ".gitlab/ci.sh save_test_output",
@@ -4603,7 +4603,7 @@
     "artifacts": {
       "expire_in": "2 weeks",
       "paths": [
-        "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static.tar.xz",
+        "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf.tar.xz",
         "junit.xml",
         "unexpected-test-output.tar.gz"
       ],
@@ -4647,13 +4647,13 @@
     ],
     "variables": {
       "BIGNUM_BACKEND": "gmp",
-      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static",
-      "BUILD_FLAVOUR": "release+fully_static",
+      "BIN_DIST_NAME": "ghc-x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf",
+      "BUILD_FLAVOUR": "release+fully_static+text_simdutf",
       "CONFIGURE_ARGS": "--enable-unregisterised --with-intree-gmp --with-system-libffi --enable-strict-ghc-toolchain-check",
       "CROSS_TARGET": "wasm32-wasi",
       "HADRIAN_ARGS": "--docs=none",
       "RUNTEST_ARGS": "",
-      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static"
+      "TEST_ENV": "x86_64-linux-alpine3_18-wasm-unreg-cross_wasm32-wasi-release+fully_static+text_simdutf"
     }
   },
   "x86_64-linux-deb10-int_native-validate": {


=====================================
compiler/GHC/CmmToAsm/AArch64/CodeGen.hs
=====================================
@@ -1757,6 +1757,137 @@ genCCall target dest_regs arg_regs bid = do
                   truncateReg W64 w lo
                   , Nothing)
           | otherwise -> unsupported (MO_U_Mul2  w)
+    PrimTarget (MO_Clz  w)
+          | w == W64 || w == W32
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst_reg = getRegisterReg platform (CmmLocal dst)
+              return (
+                  code_x `snocOL`
+                  CLZ   (OpReg w dst_reg) (OpReg w reg_a)
+                  , Nothing)
+          | w == W16
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(x << 16 | 0x0000_8000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL (r dst') (r reg_a) (imm 16)
+                    , ORR (r dst') (r dst')  (imm 0x00008000)
+                    , CLZ (r dst') (r dst')
+                    ]
+                  , Nothing)
+          | w == W8
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(x << 24 | 0x0080_0000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL (r dst') (r reg_a) (imm 24)
+                    , ORR (r dst') (r dst')  (imm 0x00800000)
+                    , CLZ (r dst') (r dst')
+                    ]
+                  , Nothing)
+            | otherwise -> unsupported (MO_Clz  w)
+    PrimTarget (MO_Ctz  w)
+          | w == W64 || w == W32
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst_reg = getRegisterReg platform (CmmLocal dst)
+              return (
+                  code_x `snocOL`
+                  RBIT (OpReg w dst_reg) (OpReg w reg_a) `snocOL`
+                  CLZ  (OpReg w dst_reg) (OpReg w dst_reg)
+                  , Nothing)
+          | w == W16
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(reverseBits(x) | 0x0000_8000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ RBIT (r dst') (r reg_a)
+                    , ORR  (r dst') (r dst') (imm 0x00008000)
+                    , CLZ  (r dst') (r dst')
+                    ]
+                  , Nothing)
+          | w == W8
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = clz(reverseBits(x) | 0x0080_0000) -}
+              return (
+                  code_x `appOL` toOL
+                    [ RBIT (r dst') (r reg_a)
+                    , ORR (r dst')  (r dst') (imm 0x00800000)
+                    , CLZ  (r dst')  (r dst')
+                    ]
+                  , Nothing)
+            | otherwise -> unsupported (MO_Ctz  w)
+    PrimTarget (MO_BRev  w)
+          | w == W64 || w == W32
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst_reg = getRegisterReg platform (CmmLocal dst)
+              return (
+                  code_x `snocOL`
+                  RBIT (OpReg w dst_reg) (OpReg w reg_a)
+                  , Nothing)
+          | w == W16
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = reverseBits32(x << 16) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL  (r dst') (r reg_a) (imm 16)
+                    , RBIT (r dst') (r dst')
+                    ]
+                  , Nothing)
+          | w == W8
+          , [src] <- arg_regs
+          , [dst] <- dest_regs
+          -> do
+              (reg_a, _format_x, code_x) <- getSomeReg src
+              let dst' = getRegisterReg platform (CmmLocal dst)
+                  r n = OpReg W32 n
+                  imm n = OpImm (ImmInt n)
+              {- dst = reverseBits32(x << 24) -}
+              return (
+                  code_x `appOL` toOL
+                    [ LSL  (r dst') (r reg_a) (imm 24)
+                    , RBIT (r dst') (r dst')
+                    ]
+                  , Nothing)
+            | otherwise -> unsupported (MO_BRev  w)
 
 
     -- or a possibly side-effecting machine operation
@@ -1883,10 +2014,7 @@ genCCall target dest_regs arg_regs bid = do
         MO_PopCnt w         -> mkCCall (popCntLabel w)
         MO_Pdep w           -> mkCCall (pdepLabel w)
         MO_Pext w           -> mkCCall (pextLabel w)
-        MO_Clz w            -> mkCCall (clzLabel w)
-        MO_Ctz w            -> mkCCall (ctzLabel w)
         MO_BSwap w          -> mkCCall (bSwapLabel w)
-        MO_BRev w           -> mkCCall (bRevLabel w)
 
         -- -- Atomic read-modify-write.
         MO_AtomicRead w ord


=====================================
compiler/GHC/CmmToAsm/AArch64/Instr.hs
=====================================
@@ -100,6 +100,8 @@ regUsageOfInstr platform instr = case instr of
   UXTB dst src             -> usage (regOp src, regOp dst)
   SXTH dst src             -> usage (regOp src, regOp dst)
   UXTH dst src             -> usage (regOp src, regOp dst)
+  CLZ  dst src             -> usage (regOp src, regOp dst)
+  RBIT dst src             -> usage (regOp src, regOp dst)
   -- 3. Logical and Move Instructions ------------------------------------------
   AND dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
   ASR dst src1 src2        -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -140,7 +142,8 @@ regUsageOfInstr platform instr = case instr of
   FMA _ dst src1 src2 src3 ->
     usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
 
-  _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
+  LOCATION{} -> panic $ "regUsageOfInstr: " ++ instrCon instr
+  NEWBLOCK{} -> panic $ "regUsageOfInstr: " ++ instrCon instr
 
   where
         -- filtering the usage is necessary, otherwise the register
@@ -234,6 +237,8 @@ patchRegsOfInstr instr env = case instr of
     UXTB o1 o2       -> UXTB (patchOp o1) (patchOp o2)
     SXTH o1 o2       -> SXTH (patchOp o1) (patchOp o2)
     UXTH o1 o2       -> UXTH (patchOp o1) (patchOp o2)
+    CLZ o1 o2        -> CLZ  (patchOp o1) (patchOp o2)
+    RBIT o1 o2       -> RBIT  (patchOp o1) (patchOp o2)
 
     -- 3. Logical and Move Instructions ----------------------------------------
     AND o1 o2 o3   -> AND  (patchOp o1) (patchOp o2) (patchOp o3)
@@ -276,7 +281,8 @@ patchRegsOfInstr instr env = case instr of
     FMA s o1 o2 o3 o4 ->
       FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
 
-    _              -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+    NEWBLOCK{}     -> panic $ "patchRegsOfInstr: " ++ instrCon instr
+    LOCATION{}     -> panic $ "patchRegsOfInstr: " ++ instrCon instr
     where
         patchOp :: Operand -> Operand
         patchOp (OpReg w r) = OpReg w (env r)
@@ -591,6 +597,8 @@ data Instr
     -- Signed/Unsigned bitfield extract
     | SBFX Operand Operand Operand Operand -- rd = rn[i,j]
     | UBFX Operand Operand Operand Operand -- rd = rn[i,j]
+    | CLZ  Operand Operand -- rd = countLeadingZeros(rn)
+    | RBIT Operand Operand -- rd = reverseBits(rn)
 
     -- 3. Logical and Move Instructions ----------------------------------------
     | AND Operand Operand Operand -- rd = rn & op2
@@ -676,6 +684,8 @@ instrCon i =
       UBFM{} -> "UBFM"
       SBFX{} -> "SBFX"
       UBFX{} -> "UBFX"
+      CLZ{} -> "CLZ"
+      RBIT{} -> "RBIT"
       AND{} -> "AND"
       ASR{} -> "ASR"
       EOR{} -> "EOR"


=====================================
compiler/GHC/CmmToAsm/AArch64/Ppr.hs
=====================================
@@ -396,6 +396,8 @@ pprInstr platform instr = case instr of
   -- 2. Bit Manipulation Instructions ------------------------------------------
   SBFM o1 o2 o3 o4 -> op4 (text "\tsbfm") o1 o2 o3 o4
   UBFM o1 o2 o3 o4 -> op4 (text "\tubfm") o1 o2 o3 o4
+  CLZ  o1 o2       -> op2 (text "\tclz")  o1 o2
+  RBIT  o1 o2      -> op2 (text "\trbit")  o1 o2
   -- signed and unsigned bitfield extract
   SBFX o1 o2 o3 o4 -> op4 (text "\tsbfx") o1 o2 o3 o4
   UBFX o1 o2 o3 o4 -> op4 (text "\tubfx") o1 o2 o3 o4


=====================================
compiler/GHC/Driver/DynFlags.hs
=====================================
@@ -60,10 +60,6 @@ module GHC.Driver.DynFlags (
         versionedAppDir, versionedFilePath,
         extraGccViaCFlags, globalPackageDatabasePath,
 
-        -- * Linker/compiler information
-        LinkerInfo(..),
-        CompilerInfo(..),
-
         -- * Include specifications
         IncludeSpecs(..), addGlobalInclude, addQuoteInclude, flattenIncludes,
         addImplicitQuoteInclude,
@@ -758,31 +754,6 @@ data ParMakeCount
   -- | Use the specific semaphore @<sem>@ to control parallelism (@-jsem <sem>@ flag).
   | ParMakeSemaphore FilePath
 
--- -----------------------------------------------------------------------------
--- Linker/compiler information
-
--- LinkerInfo contains any extra options needed by the system linker.
-data LinkerInfo
-  = GnuLD    [Option]
-  | Mold     [Option]
-  | GnuGold  [Option]
-  | LlvmLLD  [Option]
-  | DarwinLD [Option]
-  | SolarisLD [Option]
-  | AixLD    [Option]
-  | UnknownLD
-  deriving Eq
-
--- CompilerInfo tells us which C compiler we're using
-data CompilerInfo
-   = GCC
-   | Clang
-   | AppleClang
-   | AppleClang51
-   | Emscripten
-   | UnknownCC
-   deriving Eq
-
 -- | The 'GhcMode' tells us whether we're doing multi-module
 -- compilation (controlled via the "GHC" API) or one-shot
 -- (single-module) compilation.  This makes a difference primarily to


=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -215,8 +215,6 @@ module GHC.Driver.Session (
         isFmaEnabled,
 
         -- * Linker/compiler information
-        LinkerInfo(..),
-        CompilerInfo(..),
         useXLinkerRPath,
 
         -- * Include specifications


=====================================
hadrian/doc/flavours.md
=====================================
@@ -265,6 +265,10 @@ The supported transformers are listed below:
         <td><code>native_bignum</code></td>
         <td>Use the native <code>ghc-bignum</code> backend.</td>
     </tr>
+    <tr>
+        <td><code>text_simdutf</code></td>
+        <td>Enable building the <code>text</code> package with <code>simdutf</code> support.</td>
+    </tr>
     <tr>
         <td><code>no_profiled_libs</code></td>
         <td>Disables building of libraries in profiled build ways.</td>


=====================================
hadrian/src/Flavour.hs
=====================================
@@ -17,6 +17,7 @@ module Flavour
   , enableHaddock
   , enableHiCore
   , useNativeBignum
+  , enableTextWithSIMDUTF
   , omitPragmas
 
   , completeSetting
@@ -53,6 +54,7 @@ flavourTransformers = M.fromList
     , "no_dynamic_ghc"   =: disableDynamicGhcPrograms
     , "no_dynamic_libs"  =: disableDynamicLibs
     , "native_bignum"    =: useNativeBignum
+    , "text_simdutf"     =: enableTextWithSIMDUTF
     , "no_profiled_libs" =: disableProfiledLibs
     , "omit_pragmas"     =: omitPragmas
     , "ipe"              =: enableIPE
@@ -292,6 +294,12 @@ useNativeBignum flavour =
   flavour { bignumBackend = "native"
           }
 
+-- | Enable building the @text@ package with @simdutf@ support.
+enableTextWithSIMDUTF :: Flavour -> Flavour
+enableTextWithSIMDUTF flavour = flavour {
+  textWithSIMDUTF = True
+}
+
 -- | Build stage2 compiler with -fomit-interface-pragmas to reduce
 -- recompilation.
 omitPragmas :: Flavour -> Flavour


=====================================
hadrian/src/Flavour/Type.hs
=====================================
@@ -23,6 +23,9 @@ data Flavour = Flavour {
     bignumBackend :: String,
     -- | Check selected bignum backend against native backend
     bignumCheck :: Bool,
+    -- | Build the @text@ package with @simdutf@ support. Disabled by
+    -- default due to packaging difficulties described in #20724.
+    textWithSIMDUTF :: Bool,
     -- | Build libraries these ways.
     libraryWays :: Ways,
     -- | Build RTS these ways.
@@ -70,4 +73,3 @@ type DocTargets = Set DocTarget
 --   distribution.
 data DocTarget = Haddocks | SphinxHTML | SphinxPDFs | SphinxMan | SphinxInfo
   deriving (Eq, Ord, Show, Bounded, Enum)
-


=====================================
hadrian/src/Rules/Register.hs
=====================================
@@ -7,6 +7,7 @@ module Rules.Register (
 import Base
 import Context
 import Expression ( getContextData )
+import Flavour
 import Oracles.Setting
 import Hadrian.BuildPath
 import Hadrian.Expression
@@ -51,6 +52,14 @@ configurePackageRules = do
           isGmp <- (== "gmp") <$> interpretInContext ctx getBignumBackend
           when isGmp $
             need [buildP -/- "include/ghc-gmp.h"]
+        when (pkg == text) $ do
+          simdutf <- textWithSIMDUTF <$> flavour
+          when simdutf $ do
+            -- This is required, otherwise you get Error: hadrian:
+            -- Encountered missing or private dependencies:
+            -- system-cxx-std-lib ==1.0
+            cxxStdLib <- systemCxxStdLibConfPath $ PackageDbLoc stage Inplace
+            need [cxxStdLib]
         Cabal.configurePackage ctx
 
     root -/- "**/autogen/cabal_macros.h" %> \out -> do


=====================================
hadrian/src/Settings/Default.hs
=====================================
@@ -267,6 +267,7 @@ defaultFlavour = Flavour
     , packages           = defaultPackages
     , bignumBackend      = defaultBignumBackend
     , bignumCheck        = False
+    , textWithSIMDUTF    = False
     , libraryWays        = defaultLibraryWays
     , rtsWays            = defaultRtsWays
     , dynamicGhcPrograms = defaultDynamicGhcPrograms


=====================================
hadrian/src/Settings/Packages.hs
=====================================
@@ -191,12 +191,10 @@ packageArgs = do
             builder (Cabal Flags) ? stage0 `cabalFlag` "bootstrap"
 
         ---------------------------------- text --------------------------------
-        , package text ? mconcat
-          -- Disable SIMDUTF by default due to packaging difficulties
-          -- described in #20724.
-          [ builder (Cabal Flags) ? arg "-simdutf"
-          -- https://github.com/haskell/text/issues/415
-          , builder Ghc ? input "**/Data/Text/Encoding.hs"  ? arg "-Wno-unused-imports" ]
+        , package text ?
+            ifM (textWithSIMDUTF <$> expr flavour)
+              (builder (Cabal Flags) ? arg "+simdutf")
+              (builder (Cabal Flags) ? arg "-simdutf")
 
         ------------------------------- haskeline ------------------------------
         -- Hadrian doesn't currently support packages containing both libraries


=====================================
libraries/base/src/Data/Array/Byte.hs
=====================================
@@ -201,10 +201,11 @@ instance Show ByteArray where
                 | otherwise = showString ", "
 
 instance Lift ByteArray where
-  liftTyped x = unsafeCodeCoerce (lift x)
-  lift (ByteArray b) = return
-    (AppE (AppE (VarE addrToByteArrayName) (LitE (IntegerL (fromIntegral len))))
-      (LitE (BytesPrimL (Bytes ptr 0 (fromIntegral len)))))
+  liftTyped = unsafeCodeCoerce . lift
+  lift (ByteArray b) =
+    [| addrToByteArray $(lift len)
+                       $(pure . LitE . BytesPrimL $ Bytes ptr 0 (fromIntegral len))
+    |]
     where
       len# = sizeofByteArray# b
       len = I# len#
@@ -219,9 +220,7 @@ instance Lift ByteArray where
       ptr :: ForeignPtr Word8
       ptr = ForeignPtr (byteArrayContents# pb) (PlainPtr (unsafeCoerce# pb))
 
-addrToByteArrayName :: Name
-addrToByteArrayName = 'addrToByteArray
-
+{-# NOINLINE addrToByteArray #-}
 addrToByteArray :: Int -> Addr# -> ByteArray
 addrToByteArray (I# len) addr = runST $ ST $
   \s -> case newByteArray# len s of


=====================================
testsuite/tests/codeGen/should_run/CtzClz0.hs
=====================================
@@ -0,0 +1,27 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE MagicHash #-}
+
+module Main where
+
+import GHC.Exts
+import Control.Monad
+
+#include <MachDeps.h>
+
+{-# OPAQUE x #-} -- needed to avoid triggering constant folding
+x :: Word
+x = 0
+
+main :: IO ()
+main = do
+  let !(W# w) = x
+
+  guard (W# (ctz# w) == WORD_SIZE_IN_BITS)
+  guard (W# (ctz8# w) == 8)
+  guard (W# (ctz16# w) == 16)
+  guard (W# (ctz32# w) == 32)
+
+  guard (W# (clz# w) == WORD_SIZE_IN_BITS)
+  guard (W# (clz8# w) == 8)
+  guard (W# (clz16# w) == 16)
+  guard (W# (clz32# w) == 32)


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -246,3 +246,4 @@ test('T24295a', normal, compile_and_run, ['-O -floopification'])
 test('T24295b', normal, compile_and_run, ['-O -floopification -fpedantic-bottoms'])
 test('T24664a', normal, compile_and_run, ['-O'])
 test('T24664b', normal, compile_and_run, ['-O'])
+test('CtzClz0', normal, compile_and_run, [''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/789846b41e08c4d65affc9422205c6b38941ffb1...7541e2046e24ed1abf1066689ec2a0f4ea3ec9b2

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/789846b41e08c4d65affc9422205c6b38941ffb1...7541e2046e24ed1abf1066689ec2a0f4ea3ec9b2
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240604/a616da2f/attachment-0001.html>


More information about the ghc-commits mailing list