[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 5 commits: NCG(x86): Compile add+shift as lea if possible.

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Mon Aug 8 17:46:14 UTC 2022



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
20457d77 by Andreas Klebinger at 2022-08-08T14:42:26+02:00
NCG(x86): Compile add+shift as lea if possible.

- - - - -
3e6f1cfc by Andreas Klebinger at 2022-08-08T13:45:49-04:00
dataToTag#: Skip runtime tag check if argument is infered tagged

This addresses one part of #21710.

- - - - -
6543909a by Cheng Shao at 2022-08-08T13:45:54-04:00
rts: remove redundant stg_traceCcszh

This out-of-line primop has no Haskell wrapper and hasn't been used
anywhere in the tree. Furthermore, the code gets in the way of !7632, so
it should be garbage collected.

- - - - -
f15a7c8f by Andreas Klebinger at 2022-08-08T13:45:55-04:00
Document a divergence from the report in parsing function lhss.

GHC is happy to parse `(f) x y = x + y` when it should be a parse error
based on the Haskell report. Seems harmless enough so we won't fix it
but it's documented now.

Fixes #19788

- - - - -
5076c99b by Ben Gamari at 2022-08-08T13:45:55-04:00
gitlab-ci: Add release job for aarch64/debian 11

- - - - -


15 changed files:

- .gitlab/gen_ci.hs
- .gitlab/jobs.yaml
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/Stg/InferTags/Rewrite.hs
- compiler/GHC/StgToCmm/Expr.hs
- docs/users_guide/bugs.rst
- rts/PrimOps.cmm
- rts/RtsSymbols.c
- rts/include/stg/MiscClosures.h
- + testsuite/tests/codeGen/should_compile/T21710a.hs
- + testsuite/tests/codeGen/should_compile/T21710a.stderr
- testsuite/tests/codeGen/should_compile/all.T
- + testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm
- + testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs
- testsuite/tests/codeGen/should_gen_asm/all.T


Changes:

=====================================
.gitlab/gen_ci.hs
=====================================
@@ -769,6 +769,7 @@ jobs = M.fromList $ concatMap flattenJobGroup $
      , allowFailureGroup (addValidateRule FreeBSDTag (standardBuilds Amd64 FreeBSD))
      , standardBuilds AArch64 Darwin
      , standardBuilds AArch64 (Linux Debian10)
+     , disableValidate (standardBuilds AArch64 (Linux Debian11))
      , allowFailureGroup (disableValidate (standardBuilds ARMv7 (Linux Debian10)))
      , standardBuilds I386 (Linux Debian9)
      , allowFailureGroup (standardBuildsWithConfig Amd64 (Linux Alpine) static)


=====================================
.gitlab/jobs.yaml
=====================================
@@ -120,6 +120,64 @@
       "TEST_ENV": "aarch64-linux-deb10-validate"
     }
   },
+  "aarch64-linux-deb11-validate": {
+    "after_script": [
+      ".gitlab/ci.sh save_cache",
+      ".gitlab/ci.sh clean",
+      "cat ci_timings"
+    ],
+    "allow_failure": false,
+    "artifacts": {
+      "expire_in": "2 weeks",
+      "paths": [
+        "ghc-aarch64-linux-deb11-validate.tar.xz",
+        "junit.xml"
+      ],
+      "reports": {
+        "junit": "junit.xml"
+      },
+      "when": "always"
+    },
+    "cache": {
+      "key": "aarch64-linux-deb11-$CACHE_REV",
+      "paths": [
+        "cabal-cache",
+        "toolchain"
+      ]
+    },
+    "dependencies": [],
+    "image": "registry.gitlab.haskell.org/ghc/ci-images/aarch64-linux-deb11:$DOCKER_REV",
+    "needs": [
+      {
+        "artifacts": false,
+        "job": "hadrian-ghc-in-ghci"
+      }
+    ],
+    "rules": [
+      {
+        "if": "($CI_MERGE_REQUEST_LABELS !~ /.*fast-ci.*/) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY == null) && (\"true\" == \"true\") && (\"true\" == \"true\") && (\"disabled\" != \"disabled\")",
+        "when": "on_success"
+      }
+    ],
+    "script": [
+      "sudo chown ghc:ghc -R .",
+      ".gitlab/ci.sh setup",
+      ".gitlab/ci.sh configure",
+      ".gitlab/ci.sh build_hadrian",
+      ".gitlab/ci.sh test_hadrian"
+    ],
+    "stage": "full-build",
+    "tags": [
+      "aarch64-linux"
+    ],
+    "variables": {
+      "BIGNUM_BACKEND": "gmp",
+      "BIN_DIST_NAME": "ghc-aarch64-linux-deb11-validate",
+      "BUILD_FLAVOUR": "validate",
+      "CONFIGURE_ARGS": "",
+      "TEST_ENV": "aarch64-linux-deb11-validate"
+    }
+  },
   "armv7-linux-deb10-validate": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
@@ -358,6 +416,65 @@
       "XZ_OPT": "-9"
     }
   },
+  "nightly-aarch64-linux-deb11-validate": {
+    "after_script": [
+      ".gitlab/ci.sh save_cache",
+      ".gitlab/ci.sh clean",
+      "cat ci_timings"
+    ],
+    "allow_failure": false,
+    "artifacts": {
+      "expire_in": "8 weeks",
+      "paths": [
+        "ghc-aarch64-linux-deb11-validate.tar.xz",
+        "junit.xml"
+      ],
+      "reports": {
+        "junit": "junit.xml"
+      },
+      "when": "always"
+    },
+    "cache": {
+      "key": "aarch64-linux-deb11-$CACHE_REV",
+      "paths": [
+        "cabal-cache",
+        "toolchain"
+      ]
+    },
+    "dependencies": [],
+    "image": "registry.gitlab.haskell.org/ghc/ci-images/aarch64-linux-deb11:$DOCKER_REV",
+    "needs": [
+      {
+        "artifacts": false,
+        "job": "hadrian-ghc-in-ghci"
+      }
+    ],
+    "rules": [
+      {
+        "if": "($CI_MERGE_REQUEST_LABELS !~ /.*fast-ci.*/) && ($RELEASE_JOB != \"yes\") && ($NIGHTLY) && (\"true\" == \"true\") && (\"true\" == \"true\") && (\"true\" == \"true\")",
+        "when": "on_success"
+      }
+    ],
+    "script": [
+      "sudo chown ghc:ghc -R .",
+      ".gitlab/ci.sh setup",
+      ".gitlab/ci.sh configure",
+      ".gitlab/ci.sh build_hadrian",
+      ".gitlab/ci.sh test_hadrian"
+    ],
+    "stage": "full-build",
+    "tags": [
+      "aarch64-linux"
+    ],
+    "variables": {
+      "BIGNUM_BACKEND": "gmp",
+      "BIN_DIST_NAME": "ghc-aarch64-linux-deb11-validate",
+      "BUILD_FLAVOUR": "validate",
+      "CONFIGURE_ARGS": "",
+      "TEST_ENV": "aarch64-linux-deb11-validate",
+      "XZ_OPT": "-9"
+    }
+  },
   "nightly-armv7-linux-deb10-validate": {
     "after_script": [
       ".gitlab/ci.sh save_cache",
@@ -1864,6 +1981,66 @@
       "XZ_OPT": "-9"
     }
   },
+  "release-aarch64-linux-deb11-release": {
+    "after_script": [
+      ".gitlab/ci.sh save_cache",
+      ".gitlab/ci.sh clean",
+      "cat ci_timings"
+    ],
+    "allow_failure": false,
+    "artifacts": {
+      "expire_in": "1 year",
+      "paths": [
+        "ghc-aarch64-linux-deb11-release.tar.xz",
+        "junit.xml"
+      ],
+      "reports": {
+        "junit": "junit.xml"
+      },
+      "when": "always"
+    },
+    "cache": {
+      "key": "aarch64-linux-deb11-$CACHE_REV",
+      "paths": [
+        "cabal-cache",
+        "toolchain"
+      ]
+    },
+    "dependencies": [],
+    "image": "registry.gitlab.haskell.org/ghc/ci-images/aarch64-linux-deb11:$DOCKER_REV",
+    "needs": [
+      {
+        "artifacts": false,
+        "job": "hadrian-ghc-in-ghci"
+      }
+    ],
+    "rules": [
+      {
+        "if": "($CI_MERGE_REQUEST_LABELS !~ /.*fast-ci.*/) && ($RELEASE_JOB == \"yes\") && ($NIGHTLY == null) && (\"true\" == \"true\") && (\"true\" == \"true\") && (\"true\" == \"true\")",
+        "when": "on_success"
+      }
+    ],
+    "script": [
+      "sudo chown ghc:ghc -R .",
+      ".gitlab/ci.sh setup",
+      ".gitlab/ci.sh configure",
+      ".gitlab/ci.sh build_hadrian",
+      ".gitlab/ci.sh test_hadrian"
+    ],
+    "stage": "full-build",
+    "tags": [
+      "aarch64-linux"
+    ],
+    "variables": {
+      "BIGNUM_BACKEND": "gmp",
+      "BIN_DIST_NAME": "ghc-aarch64-linux-deb11-release",
+      "BUILD_FLAVOUR": "release",
+      "CONFIGURE_ARGS": "",
+      "IGNORE_PERF_FAILURES": "all",
+      "TEST_ENV": "aarch64-linux-deb11-release",
+      "XZ_OPT": "-9"
+    }
+  },
   "release-armv7-linux-deb10-release": {
     "after_script": [
       ".gitlab/ci.sh save_cache",


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1048,10 +1048,29 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
 
     --------------------
     add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    -- x + imm
     add_code rep x (CmmLit (CmmInt y _))
         | is32BitInteger y
         , rep /= W8 -- LEA doesn't support byte size (#18614)
         = add_int rep x y
+    -- x + (y << imm)
+    add_code rep x y
+        -- Byte size is not supported and 16bit size is slow when computed via LEA
+        | rep /= W8 && rep /= W16
+        -- 2^3 = 8 is the highest multiplicator supported by LEA.
+        , Just (x,y,shift_bits) <- get_shift x y
+        = add_shiftL rep x y (fromIntegral shift_bits)
+        where
+          -- x + (y << imm)
+          get_shift x (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)])
+            | shift_bits <= 3
+            = Just (x, y, shift_bits)
+          -- (y << imm) + x
+          get_shift (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)]) x
+            | shift_bits <= 3
+            = Just (x, y, shift_bits)
+          get_shift _ _
+            = Nothing
     add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
       where format = intFormat rep
     -- TODO: There are other interesting patterns we want to replace
@@ -1066,6 +1085,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
     sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
 
     -- our three-operand add instruction:
+    add_int :: (Width -> CmmExpr -> Integer -> NatM Register)
     add_int width x y = do
         (x_reg, x_code) <- getSomeReg x
         let
@@ -1079,6 +1099,22 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
         --
         return (Any format code)
 
+    -- x + (y << shift_bits) using LEA
+    add_shiftL :: (Width -> CmmExpr -> CmmExpr -> Int -> NatM Register)
+    add_shiftL width x y shift_bits = do
+        (x_reg, x_code) <- getSomeReg x
+        (y_reg, y_code) <- getSomeReg y
+        let
+            format = intFormat width
+            imm = ImmInt 0
+            code dst
+               = (x_code `appOL` y_code) `snocOL`
+                 LEA format
+                        (OpAddr (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg (2 ^ shift_bits)) imm))
+                        (OpReg dst)
+        --
+        return (Any format code)
+
     ----------------------
 
     -- See Note [DIV/IDIV for bytes]


=====================================
compiler/GHC/Stg/InferTags/Rewrite.hs
=====================================
@@ -20,6 +20,7 @@ where
 
 import GHC.Prelude
 
+import GHC.Builtin.PrimOps ( PrimOp(..) )
 import GHC.Types.Id
 import GHC.Types.Name
 import GHC.Types.Unique.Supply
@@ -346,6 +347,19 @@ fvArgs args = do
 
 type IsScrut = Bool
 
+rewriteArgs :: [StgArg] -> RM [StgArg]
+rewriteArgs = mapM rewriteArg
+rewriteArg :: StgArg -> RM StgArg
+rewriteArg (StgVarArg v) = StgVarArg <$!> rewriteId v
+rewriteArg  (lit at StgLitArg{}) = return lit
+
+-- Attach a tagSig if it's tagged
+rewriteId :: Id -> RM Id
+rewriteId v = do
+    is_tagged <- isTagged v
+    if is_tagged then return $! setIdTagSig v (TagSig TagProper)
+                 else return v
+
 rewriteExpr :: IsScrut -> InferStgExpr -> RM TgStgExpr
 rewriteExpr _ (e at StgCase {})          = rewriteCase e
 rewriteExpr _ (e at StgLet {})           = rewriteLet e
@@ -355,8 +369,11 @@ rewriteExpr _ e@(StgConApp {})        = rewriteConApp e
 
 rewriteExpr isScrut e@(StgApp {})     = rewriteApp isScrut e
 rewriteExpr _ (StgLit lit)           = return $! (StgLit lit)
+rewriteExpr _ (StgOpApp op@(StgPrimOp DataToTagOp)  args res_ty) = do
+        (StgOpApp op) <$!> rewriteArgs args <*> pure res_ty
 rewriteExpr _ (StgOpApp op args res_ty) = return $! (StgOpApp op args res_ty)
 
+
 rewriteCase :: InferStgExpr -> RM TgStgExpr
 rewriteCase (StgCase scrut bndr alt_type alts) =
     withBinder NotTopLevel bndr $
@@ -415,6 +432,7 @@ rewriteApp True (StgApp f []) = do
     -- isTagged looks at more than the result of our analysis.
     -- So always update here if useful.
     let f' = if f_tagged
+                -- TODO: We might consisder using a subst env instead of setting the sig only for select places.
                 then setIdTagSig f (TagSig TagProper)
                 else f
     return $! StgApp f' []


=====================================
compiler/GHC/StgToCmm/Expr.hs
=====================================
@@ -76,6 +76,8 @@ cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
 
 -- dataToTag# :: a -> Int#
 -- See Note [dataToTag# magic] in GHC.Core.Opt.ConstantFold
+-- TODO: There are some more optimization ideas for this code path
+-- in #21710
 cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
   platform <- getPlatform
   emitComment (mkFastString "dataToTag#")
@@ -92,15 +94,7 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
   -- the constructor index is too large to fit in the pointer and therefore
   -- we must look in the info table. See Note [Tagging big families].
 
-  slow_path <- getCode $ do
-      tmp <- newTemp (bWord platform)
-      _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
-      profile     <- getProfile
-      align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
-      emitAssign (CmmLocal result_reg)
-        $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp)))
-
-  fast_path <- getCode $ do
+  (fast_path :: CmmAGraph) <- getCode $ do
       -- Return the constructor index from the pointer tag
       return_ptr_tag <- getCode $ do
           emitAssign (CmmLocal result_reg)
@@ -113,8 +107,22 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
             $ getConstrTag profile align_check (cmmUntag platform amode)
 
       emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False)
-
-  emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
+  -- If we know the argument is already tagged there is no need to generate code to evaluate it
+  -- so we skip straight to the fast path. If we don't know if there is a tag we take the slow
+  -- path which evaluates the argument before fetching the tag.
+  case (idTagSig_maybe a) of
+    Just sig
+      | isTaggedSig sig
+      -> emit fast_path
+    _ -> do
+          slow_path <- getCode $ do
+              tmp <- newTemp (bWord platform)
+              _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
+              profile     <- getProfile
+              align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
+              emitAssign (CmmLocal result_reg)
+                $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp)))
+          emit =<< mkCmmIfThenElse' is_tagged fast_path slow_path (Just True)
   emitReturn [CmmReg $ CmmLocal result_reg]
 
 


=====================================
docs/users_guide/bugs.rst
=====================================
@@ -115,6 +115,10 @@ Lexical syntax
      varid       →   small {idchar} ⟨reservedid⟩
      conid       →   large {idchar}
 
+- GHC allows redundant parantheses around the function name in the `funlhs` part of declarations.
+  That is GHC will succeed in parsing a declaration like `((f)) x = <rhs>` for any number
+  of parantheses around `f`.
+
 .. _infelicities-syntax:
 
 Context-free syntax


=====================================
rts/PrimOps.cmm
=====================================
@@ -2801,21 +2801,6 @@ stg_getApStackValzh ( P_ ap_stack, W_ offset )
    }
 }
 
-// Write the cost center stack of the first argument on stderr; return
-// the second.  Possibly only makes sense for already evaluated
-// things?
-stg_traceCcszh ( P_ obj, P_ ret )
-{
-    W_ ccs;
-
-#if defined(PROFILING)
-    ccs = StgHeader_ccs(UNTAG(obj));
-    ccall fprintCCS_stderr(ccs "ptr");
-#endif
-
-    jump stg_ap_0_fast(ret);
-}
-
 stg_getSparkzh ()
 {
     W_ spark;


=====================================
rts/RtsSymbols.c
=====================================
@@ -1015,7 +1015,6 @@ extern char **environ;
       SymI_HasProto(stopTimer)                                          \
       SymI_HasProto(n_capabilities)                                     \
       SymI_HasProto(enabled_capabilities)                               \
-      SymI_HasDataProto(stg_traceCcszh)                                     \
       SymI_HasDataProto(stg_traceEventzh)                                   \
       SymI_HasDataProto(stg_traceMarkerzh)                                  \
       SymI_HasDataProto(stg_traceBinaryEventzh)                             \


=====================================
rts/include/stg/MiscClosures.h
=====================================
@@ -566,7 +566,6 @@ RTS_FUN_DECL(stg_numSparkszh);
 
 RTS_FUN_DECL(stg_noDuplicatezh);
 
-RTS_FUN_DECL(stg_traceCcszh);
 RTS_FUN_DECL(stg_clearCCSzh);
 RTS_FUN_DECL(stg_traceEventzh);
 RTS_FUN_DECL(stg_traceBinaryEventzh);


=====================================
testsuite/tests/codeGen/should_compile/T21710a.hs
=====================================
@@ -0,0 +1,15 @@
+{-# LANGUAGE MagicHash, UnboxedTuples #-}
+{-# OPTIONS_GHC -O #-}
+
+module M where
+
+import GHC.Exts
+
+data E = A | B | C | D | E
+
+foo x =
+    case x of
+        A -> 2#
+        B -> 42#
+        -- In this branch we already now `x` is evaluated, so we shouldn't generate an extra `call` for it.
+        _ -> dataToTag# x


=====================================
testsuite/tests/codeGen/should_compile/T21710a.stderr
=====================================
@@ -0,0 +1,446 @@
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'E2_bytes" {
+     M.$tc'E2_bytes:
+         I8[] "'E"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'D2_bytes" {
+     M.$tc'D2_bytes:
+         I8[] "'D"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'C2_bytes" {
+     M.$tc'C2_bytes:
+         I8[] "'C"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'B2_bytes" {
+     M.$tc'B2_bytes:
+         I8[] "'B"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tc'A3_bytes" {
+     M.$tc'A3_bytes:
+         I8[] "'A"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$tcE2_bytes" {
+     M.$tcE2_bytes:
+         I8[] "E"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$trModule2_bytes" {
+     M.$trModule2_bytes:
+         I8[] "M"
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""cstring" . M.$trModule4_bytes" {
+     M.$trModule4_bytes:
+         I8[] "main"
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.foo_entry() { //  [R2]
+         { info_tbls: [(cBa,
+                        label: block_cBa_info
+                        rep: StackRep []
+                        srt: Nothing),
+                       (cBi,
+                        label: M.foo_info
+                        rep: HeapRep static { Fun {arity: 1 fun_type: ArgSpec 5} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cBi: // global
+           if ((Sp + -8) < SpLim) (likely: False) goto cBj; else goto cBk;   // CmmCondBranch
+       cBj: // global
+           R1 = M.foo_closure;   // CmmAssign
+           call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8;   // CmmCall
+       cBk: // global
+           I64[Sp - 8] = cBa;   // CmmStore
+           R1 = R2;   // CmmAssign
+           Sp = Sp - 8;   // CmmAssign
+           if (R1 & 7 != 0) goto cBa; else goto cBb;   // CmmCondBranch
+       cBb: // global
+           call (I64[R1])(R1) returns to cBa, args: 8, res: 8, upd: 8;   // CmmCall
+       cBa: // global
+           _cBh::P64 = R1 & 7;   // CmmAssign
+           if (_cBh::P64 != 1) goto uBz; else goto cBf;   // CmmCondBranch
+       uBz: // global
+           if (_cBh::P64 != 2) goto cBe; else goto cBg;   // CmmCondBranch
+       cBe: // global
+           // dataToTag#
+           _cBn::P64 = R1 & 7;   // CmmAssign
+           if (_cBn::P64 == 7) (likely: False) goto cBs; else goto cBr;   // CmmCondBranch
+       cBs: // global
+           _cBo::I64 = %MO_UU_Conv_W32_W64(I32[I64[R1 & (-8)] - 4]);   // CmmAssign
+           goto cBq;   // CmmBranch
+       cBr: // global
+           _cBo::I64 = _cBn::P64 - 1;   // CmmAssign
+           goto cBq;   // CmmBranch
+       cBq: // global
+           R1 = _cBo::I64;   // CmmAssign
+           Sp = Sp + 8;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+       cBg: // global
+           R1 = 42;   // CmmAssign
+           Sp = Sp + 8;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+       cBf: // global
+           R1 = 2;   // CmmAssign
+           Sp = Sp + 8;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ },
+ section ""data" . M.foo_closure" {
+     M.foo_closure:
+         const M.foo_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$trModule3_closure" {
+     M.$trModule3_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$trModule4_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$trModule1_closure" {
+     M.$trModule1_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$trModule2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$trModule_closure" {
+     M.$trModule_closure:
+         const GHC.Types.Module_con_info;
+         const M.$trModule3_closure+1;
+         const M.$trModule1_closure+1;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tcE1_closure" {
+     M.$tcE1_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$tcE2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tcE_closure" {
+     M.$tcE_closure:
+         const GHC.Types.TyCon_con_info;
+         const M.$trModule_closure+1;
+         const M.$tcE1_closure+1;
+         const GHC.Types.krep$*_closure+5;
+         const 10475418246443540865;
+         const 12461417314693222409;
+         const 0;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'A1_closure" {
+     M.$tc'A1_closure:
+         const GHC.Types.KindRepTyConApp_con_info;
+         const M.$tcE_closure+1;
+         const GHC.Types.[]_closure+1;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'A2_closure" {
+     M.$tc'A2_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$tc'A3_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'A_closure" {
+     M.$tc'A_closure:
+         const GHC.Types.TyCon_con_info;
+         const M.$trModule_closure+1;
+         const M.$tc'A2_closure+1;
+         const M.$tc'A1_closure+1;
+         const 10991425535368257265;
+         const 3459663971500179679;
+         const 0;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'B1_closure" {
+     M.$tc'B1_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$tc'B2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'B_closure" {
+     M.$tc'B_closure:
+         const GHC.Types.TyCon_con_info;
+         const M.$trModule_closure+1;
+         const M.$tc'B1_closure+1;
+         const M.$tc'A1_closure+1;
+         const 13038863156169552918;
+         const 13430333535161531545;
+         const 0;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'C1_closure" {
+     M.$tc'C1_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$tc'C2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'C_closure" {
+     M.$tc'C_closure:
+         const GHC.Types.TyCon_con_info;
+         const M.$trModule_closure+1;
+         const M.$tc'C1_closure+1;
+         const M.$tc'A1_closure+1;
+         const 8482817676735632621;
+         const 8146597712321241387;
+         const 0;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'D1_closure" {
+     M.$tc'D1_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$tc'D2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'D_closure" {
+     M.$tc'D_closure:
+         const GHC.Types.TyCon_con_info;
+         const M.$trModule_closure+1;
+         const M.$tc'D1_closure+1;
+         const M.$tc'A1_closure+1;
+         const 7525207739284160575;
+         const 13746130127476219356;
+         const 0;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'E1_closure" {
+     M.$tc'E1_closure:
+         const GHC.Types.TrNameS_con_info;
+         const M.$tc'E2_bytes;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.$tc'E_closure" {
+     M.$tc'E_closure:
+         const GHC.Types.TyCon_con_info;
+         const M.$trModule_closure+1;
+         const M.$tc'E1_closure+1;
+         const M.$tc'A1_closure+1;
+         const 6748545530683684316;
+         const 10193016702094081137;
+         const 0;
+         const 3;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.A_closure" {
+     M.A_closure:
+         const M.A_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.B_closure" {
+     M.B_closure:
+         const M.B_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.C_closure" {
+     M.C_closure:
+         const M.C_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.D_closure" {
+     M.D_closure:
+         const M.D_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""data" . M.E_closure" {
+     M.E_closure:
+         const M.E_con_info;
+ }]
+
+
+
+==================== Output Cmm ====================
+[section ""relreadonly" . M.E_closure_tbl" {
+     M.E_closure_tbl:
+         const M.A_closure+1;
+         const M.B_closure+2;
+         const M.C_closure+3;
+         const M.D_closure+4;
+         const M.E_closure+5;
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.A_con_entry() { //  []
+         { info_tbls: [(cC5,
+                        label: M.A_con_info
+                        rep: HeapRep 1 nonptrs { Con {tag: 0 descr:"main:M.A"} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cC5: // global
+           R1 = R1 + 1;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.B_con_entry() { //  []
+         { info_tbls: [(cCa,
+                        label: M.B_con_info
+                        rep: HeapRep 1 nonptrs { Con {tag: 1 descr:"main:M.B"} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cCa: // global
+           R1 = R1 + 2;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.C_con_entry() { //  []
+         { info_tbls: [(cCf,
+                        label: M.C_con_info
+                        rep: HeapRep 1 nonptrs { Con {tag: 2 descr:"main:M.C"} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cCf: // global
+           R1 = R1 + 3;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.D_con_entry() { //  []
+         { info_tbls: [(cCk,
+                        label: M.D_con_info
+                        rep: HeapRep 1 nonptrs { Con {tag: 3 descr:"main:M.D"} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cCk: // global
+           R1 = R1 + 4;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ }]
+
+
+
+==================== Output Cmm ====================
+[M.E_con_entry() { //  []
+         { info_tbls: [(cCp,
+                        label: M.E_con_info
+                        rep: HeapRep 1 nonptrs { Con {tag: 4 descr:"main:M.E"} }
+                        srt: Nothing)]
+           stack_info: arg_space: 8
+         }
+     {offset
+       cCp: // global
+           R1 = R1 + 5;   // CmmAssign
+           call (P64[Sp])(R1) args: 8, res: 0, upd: 8;   // CmmCall
+     }
+ }]
+
+


=====================================
testsuite/tests/codeGen/should_compile/all.T
=====================================
@@ -108,3 +108,4 @@ test('T18614', normal, compile, [''])
 test('mk-big-obj',
      [unless(opsys('mingw32'), skip), pre_cmd('$PYTHON mk-big-obj.py > mk-big-obj.c')],
      multimod_compile, ['mk-big-obj.c', '-c -v0 -no-hs-main'])
+test('T21710a', [ only_ways(['optasm']), when(wordsize(32), skip), grep_errmsg('(call)',[1]) ], compile, ['-ddump-cmm -dno-typeable-binds'])


=====================================
testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm
=====================================
@@ -0,0 +1,46 @@
+.section .text
+.align 8
+.align 8
+	.quad	8589934604
+	.quad	0
+	.long	14
+	.long	0
+.globl AddMulX86_f_info
+.type AddMulX86_f_info, @function
+AddMulX86_f_info:
+.LcAx:
+	leaq (%r14,%rsi,8),%rbx
+	jmp *(%rbp)
+	.size AddMulX86_f_info, .-AddMulX86_f_info
+.section .data
+.align 8
+.align 1
+.globl AddMulX86_f_closure
+.type AddMulX86_f_closure, @object
+AddMulX86_f_closure:
+	.quad	AddMulX86_f_info
+.section .text
+.align 8
+.align 8
+	.quad	8589934604
+	.quad	0
+	.long	14
+	.long	0
+.globl AddMulX86_g_info
+.type AddMulX86_g_info, @function
+AddMulX86_g_info:
+.LcAL:
+	leaq (%r14,%rsi,8),%rbx
+	jmp *(%rbp)
+	.size AddMulX86_g_info, .-AddMulX86_g_info
+.section .data
+.align 8
+.align 1
+.globl AddMulX86_g_closure
+.type AddMulX86_g_closure, @object
+AddMulX86_g_closure:
+	.quad	AddMulX86_g_info
+.section .note.GNU-stack,"", at progbits
+.ident "GHC 9.3.20220228"
+
+


=====================================
testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs
=====================================
@@ -0,0 +1,12 @@
+{-# LANGUAGE MagicHash #-}
+
+module AddMulX86 where
+
+import GHC.Exts
+
+f :: Int# -> Int# -> Int#
+f x y =
+    x +# (y *# 8#) -- Should result in a lea instruction, which we grep the assembly output for.
+
+g x y =
+    (y *# 8#) +# x  -- Should result in a lea instruction, which we grep the assembly output for.


=====================================
testsuite/tests/codeGen/should_gen_asm/all.T
=====================================
@@ -10,3 +10,4 @@ test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
 test('bytearray-memset-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
 test('bytearray-memcpy-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
 test('T18137', [when(opsys('darwin'), skip), only_ways(llvm_ways)], compile_grep_asm, ['hs', False, '-fllvm -split-sections'])
+test('AddMulX86', is_amd64_codegen, compile_cmp_asm, ['hs', '-dno-typeable-binds'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/709ad7c3de1931c8760a92bf7d49b6a0587977be...5076c99beaf8d935937718d8d10037596ce5af2e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/709ad7c3de1931c8760a92bf7d49b6a0587977be...5076c99beaf8d935937718d8d10037596ce5af2e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20220808/a54d1f4d/attachment-0001.html>


More information about the ghc-commits mailing list