[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 4 commits: x86 NCG: Make MOVD's output format explicit

Tue Jan 28 20:04:54 UTC 2025


Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
62760367 by ARATA Mizuki at 2025-01-27T16:23:06-05:00
x86 NCG: Make MOVD's output format explicit

The old design led to inference of a wrong format,
losing upper bits of a vector register.

Fixes #25659

Co-authored-by: sheaf <sam.derbyshire at gmail.com>

- - - - -
f19ab490 by Simon Hengel at 2025-01-27T16:23:45-05:00
doc: Correct JSON schema for `-fdiagnostics-as-json` (fixes #25393)

- - - - -
e16eae65 by Cheng Shao at 2025-01-27T21:41:39+00:00
hadrian: fix bootstrap with 9.12.1

This patch bumps hadrian index-state to fix bootstrap with 9.12.1.

- - - - -
9955f737 by Jeffrey Young at 2025-01-28T15:04:38-05:00
base: add SrcLoc changes to changelog, 4.21.0.0

I accidentally dropped this in !13381

- closes #25614

See:

- ea4587794b9e3a098f9c02bd6cea2294af2539ce  (the 13381 commit)
- Issue #25614

- - - - -


11 changed files:

- .gitlab/ci.sh
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
- docs/users_guide/diagnostics-as-json-schema-1_0.json
- docs/users_guide/diagnostics-as-json-schema-1_1.json
- hadrian/cabal.project
- libraries/base/changelog.md
- + testsuite/tests/simd/should_run/T25659.hs
- + testsuite/tests/simd/should_run/T25659.stdout
- testsuite/tests/simd/should_run/all.T


Changes:

=====================================
.gitlab/ci.sh
=====================================
@@ -8,7 +8,7 @@ set -Eeuo pipefail
 
 # Configuration:
 # N.B. You may want to also update the index-state in hadrian/cabal.project.
-HACKAGE_INDEX_STATE="2024-10-30T22:56:00Z"
+HACKAGE_INDEX_STATE="2025-01-27T17:45:32Z"
 MIN_HAPPY_VERSION="1.20"
 MIN_ALEX_VERSION="3.2.6"
 


=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1171,7 +1171,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
         bitcast :: Format -> Format -> CmmExpr -> NatM Register
         bitcast fmt rfmt expr =
           do (src, e_code) <- getSomeReg expr
-             let code = \dst -> e_code `snocOL` (MOVD fmt (OpReg src) (OpReg dst))
+             let code = \dst -> e_code `snocOL` (MOVD fmt rfmt (OpReg src) (OpReg dst))
              return (Any rfmt code)
 
         toI8Reg :: Width -> CmmExpr -> NatM Register
@@ -1262,7 +1262,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
               code dst = exp `snocOL`
                          -- VPBROADCAST from GPR requires AVX-512,
                          -- so we use an additional MOVD.
-                         (MOVD movFormat (OpReg reg) (OpReg dst)) `snocOL`
+                         (MOVD movFormat fmt (OpReg reg) (OpReg dst)) `snocOL`
                          (VPBROADCAST fmt fmt (OpReg dst) dst)
           return $ Any fmt code
 
@@ -1272,7 +1272,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
           (reg, exp) <- getNonClobberedReg expr
           let fmt = VecFormat 16 FmtInt8
           return $ Any fmt (\dst -> exp `snocOL`
-                                    (MOVD II32 (OpReg reg) (OpReg dst)) `snocOL`
+                                    (MOVD II32 fmt (OpReg reg) (OpReg dst)) `snocOL`
                                     (PUNPCKLBW fmt (OpReg dst) dst) `snocOL`
                                     (PUNPCKLWD (VecFormat 8 FmtInt16) (OpReg dst) dst) `snocOL`
                                     (PSHUFD fmt (ImmInt 0x00) (OpReg dst) dst)
@@ -1284,7 +1284,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
           (reg, exp) <- getNonClobberedReg expr
           let fmt = VecFormat 8 FmtInt16
           return $ Any fmt (\dst -> exp `snocOL`
-                                    (MOVD II32 (OpReg reg) (OpReg dst)) `snocOL`
+                                    (MOVD II32 fmt (OpReg reg) (OpReg dst)) `snocOL`
                                     (PUNPCKLWD fmt (OpReg dst) dst) `snocOL`
                                     (PSHUFD fmt (ImmInt 0x00) (OpReg dst) dst)
                                     )
@@ -1295,7 +1295,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
           (reg, exp) <- getNonClobberedReg expr
           let fmt = VecFormat 4 FmtInt32
           return $ Any fmt (\dst -> exp `snocOL`
-                                    (MOVD II32 (OpReg reg) (OpReg dst)) `snocOL`
+                                    (MOVD II32 fmt (OpReg reg) (OpReg dst)) `snocOL`
                                     (PSHUFD fmt (ImmInt 0x00) (OpReg dst) dst)
                                     )
 
@@ -1305,7 +1305,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
           (reg, exp) <- getNonClobberedReg expr
           let fmt = VecFormat 2 FmtInt64
           return $ Any fmt (\dst -> exp `snocOL`
-                                    (MOVD II64 (OpReg reg) (OpReg dst)) `snocOL`
+                                    (MOVD II64 fmt (OpReg reg) (OpReg dst)) `snocOL`
                                     (PUNPCKLQDQ fmt (OpReg dst) dst)
                                     )
 
@@ -1793,16 +1793,16 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
       let code dst =
             case i of
               0 -> exp `snocOL`
-                   (MOVD FF32 (OpReg r) (OpReg dst))
+                   (MOVD fmt II32 (OpReg r) (OpReg dst))
               1 -> exp `snocOL`
                    (PSHUFD fmt (ImmInt 0b01_01_01_01) (OpReg r) tmp) `snocOL` -- tmp <- (r[1],r[1],r[1],r[1])
-                   (MOVD FF32 (OpReg tmp) (OpReg dst))
+                   (MOVD fmt II32 (OpReg tmp) (OpReg dst))
               2 -> exp `snocOL`
                    (PSHUFD fmt (ImmInt 0b11_10_11_10) (OpReg r) tmp) `snocOL` -- tmp <- (r[2],r[3],r[2],r[3])
-                   (MOVD FF32 (OpReg tmp) (OpReg dst))
+                   (MOVD fmt II32 (OpReg tmp) (OpReg dst))
               _ -> exp `snocOL`
                    (PSHUFD fmt (ImmInt 0b11_11_11_11) (OpReg r) tmp) `snocOL` -- tmp <- (r[3],r[3],r[3],r[3])
-                   (MOVD FF32 (OpReg tmp) (OpReg dst))
+                   (MOVD fmt II32 (OpReg tmp) (OpReg dst))
       return (Any II32 code)
     vector_int32x4_extract_sse2 _ offset
       = pprPanic "Unsupported offset" (pdoc platform offset)
@@ -1818,10 +1818,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
       let code dst =
             case lit of
               CmmInt 0 _ -> exp `snocOL`
-                            (MOVD FF64 (OpReg r) (OpReg dst))
+                            (MOVD fmt II64 (OpReg r) (OpReg dst))
               CmmInt 1 _ -> exp `snocOL`
                             (MOVHLPS fmt r tmp) `snocOL`
-                            (MOVD FF64 (OpReg tmp) (OpReg dst))
+                            (MOVD fmt II64 (OpReg tmp) (OpReg dst))
               _          -> panic "Error in offset while unpacking"
       return (Any II64 code)
     vector_int64x2_extract_sse2 _ offset
@@ -2103,22 +2103,22 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
               = case offset of
                   0 -> valExp `appOL`
                        (vecCode dst) `snocOL`
-                       (MOVD II32 (OpReg valReg) (OpReg tmp1)) `snocOL`
+                       (MOVD II32 vectorFormat (OpReg valReg) (OpReg tmp1)) `snocOL`
                        (MOV floatVectorFormat (OpReg tmp1) (OpReg dst)) -- MOVSS; dst <- (tmp1[0],dst[1],dst[2],dst[3])
                   1 -> valExp `appOL`
                        (vecCode tmp1) `snocOL`
-                       (MOVD II32 (OpReg valReg) (OpReg dst)) `snocOL` -- dst <- (val,0,0,0)
+                       (MOVD II32 vectorFormat (OpReg valReg) (OpReg dst)) `snocOL` -- dst <- (val,0,0,0)
                        (PUNPCKLQDQ vectorFormat (OpReg tmp1) dst) `snocOL` -- dst <- (dst[0],dst[1],tmp1[0],tmp1[1])
                        (SHUF floatVectorFormat (ImmInt 0b11_10_00_10) (OpReg tmp1) dst) -- SHUFPS; dst <- (dst[2],dst[0],tmp1[2],tmp1[3])
                   2 -> valExp `appOL`
                        (vecCode dst) `snocOL`
-                       (MOVD II32 (OpReg valReg) (OpReg tmp1)) `snocOL` -- tmp1 <- (val,0,0,0)
+                       (MOVD II32 vectorFormat (OpReg valReg) (OpReg tmp1)) `snocOL` -- tmp1 <- (val,0,0,0)
                        (MOVU floatVectorFormat (OpReg dst) (OpReg tmp2)) `snocOL` -- MOVUPS; tmp2 <- dst
                        (SHUF floatVectorFormat (ImmInt 0b01_00_01_11) (OpReg tmp1) tmp2) `snocOL` -- SHUFPS; tmp2 <- (tmp2[3],tmp2[1],tmp1[0],tmp1[1])
                        (SHUF floatVectorFormat (ImmInt 0b00_10_01_00) (OpReg tmp2) dst) -- SHUFPS; dst <- (dst[0],dst[1],tmp2[2],tmp2[0])
                   _ -> valExp `appOL`
                        (vecCode dst) `snocOL`
-                       (MOVD II32 (OpReg valReg) (OpReg tmp1)) `snocOL` -- tmp1 <- (val,0,0,0)
+                       (MOVD II32 vectorFormat (OpReg valReg) (OpReg tmp1)) `snocOL` -- tmp1 <- (val,0,0,0)
                        (SHUF floatVectorFormat (ImmInt 0b11_10_01_00) (OpReg dst) tmp1) `snocOL` -- SHUFPS; tmp1 <- (tmp1[0],tmp1[1],dst[2],dst[3])
                        (SHUF floatVectorFormat (ImmInt 0b00_10_01_00) (OpReg tmp1) dst) -- SHUFPS; dst <- (dst[0],dst[1],tmp1[2],tmp1[0])
         return $ Any vectorFormat code
@@ -2139,12 +2139,12 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
                   CmmInt 0 _ -> valExp `appOL`
                                 vecExp `snocOL`
                                 (MOVHLPS fmt vecReg tmp) `snocOL`
-                                (MOVD II64 (OpReg valReg) (OpReg dst)) `snocOL`
+                                (MOVD II64 fmt (OpReg valReg) (OpReg dst)) `snocOL`
                                 (PUNPCKLQDQ fmt (OpReg tmp) dst)
                   CmmInt 1 _ -> valExp `appOL`
                                 vecExp `snocOL`
-                                (MOV II64 (OpReg vecReg) (OpReg dst)) `snocOL`
-                                (MOVD II64 (OpReg valReg) (OpReg tmp)) `snocOL`
+                                (MOVDQU fmt (OpReg vecReg) (OpReg dst)) `snocOL`
+                                (MOVD II64 fmt (OpReg valReg) (OpReg tmp)) `snocOL`
                                 (PUNPCKLQDQ fmt (OpReg tmp) dst)
                   _ -> pprPanic "MO_V_Insert Int64X2: unsupported offset" (ppr offset)
          in return $ Any fmt code
@@ -4083,7 +4083,7 @@ loadArgsWin config (arg:rest) = do
            -- arguments in both fp and integer registers.
            let (assign_code', regs')
                 | isFloatFormat arg_fmt =
-                    ( assign_code `snocOL` MOVD FF64 (OpReg freg) (OpReg ireg),
+                    ( assign_code `snocOL` MOVD FF64 II64 (OpReg freg) (OpReg ireg),
                       [ RegWithFormat freg FF64
                       , RegWithFormat ireg II64 ])
                 | otherwise = (assign_code, [RegWithFormat ireg II64])


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -39,7 +39,6 @@ module GHC.CmmToAsm.X86.Instr
    , patchJumpInstr
    , isMetaInstr
    , isJumpishInstr
-   , movdOutFormat
    , MinOrMax(..), MinMaxType(..)
    )
 where
@@ -127,11 +126,16 @@ data Instr
              -- with @MOVABS@; we currently do not use this instruction in GHC.
              -- See https://stackoverflow.com/questions/52434073/whats-the-difference-between-the-x86-64-att-instructions-movq-and-movabsq.
 
-        | MOVD   Format Operand Operand -- ^ MOVD/MOVQ SSE2 instructions
-                                        -- (bitcast between a general purpose
-                                        -- register and a float register).
-                                        -- Format is input format, output format is
-                                        -- calculated in the 'movdOutFormat' function.
+        -- | MOVD/MOVQ SSE2 instructions
+        -- (bitcast between a general purpose register and a float register).
+        | MOVD
+           Format -- ^ input format
+           Format -- ^ output format
+           Operand Operand
+           -- NB: MOVD stores both the input and output formats. This is because
+           -- neither format fully determines the other, as either might be
+           -- a vector format, and we need to know the exact format in order to
+           -- correctly spill/unspill. See #25659.
         | CMOV   Cond Format Operand Reg
         | MOVZxL      Format Operand Operand
               -- ^ The format argument is the size of operand 1 (the number of bits we keep)
@@ -377,10 +381,10 @@ regUsageOfInstr platform instr
       -- (largely to avoid partial register stalls)
       | otherwise
       -> usageRW fmt src dst
-    MOVD   fmt src dst    ->
+    MOVD fmt1 fmt2 src dst    ->
       -- NB: MOVD and MOVQ always zero any remaining upper part of destination,
       -- so the destination is "written" not "modified".
-      usageRW' fmt (movdOutFormat fmt) src dst
+      usageRW' fmt1 fmt2 src dst
     CMOV _ fmt src dst    -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
     MOVZxL fmt src dst    -> usageRW fmt src dst
     MOVSxL fmt src dst    -> usageRW fmt src dst
@@ -650,14 +654,6 @@ interesting :: Platform -> Reg -> Bool
 interesting _        (RegVirtual _)              = True
 interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
 
-movdOutFormat :: Format -> Format
-movdOutFormat format = case format of
-  II32 -> FF32
-  II64 -> FF64
-  FF32 -> II32
-  FF64 -> II64
-  _    -> pprPanic "X86: improper format for movd/movq" (ppr format)
-
 
 -- | Applies the supplied function to all registers in instructions.
 -- Typically used to change virtual registers to real registers.
@@ -665,7 +661,7 @@ patchRegsOfInstr :: HasDebugCallStack => Platform -> Instr -> (Reg -> Reg) -> In
 patchRegsOfInstr platform instr env
   = case instr of
     MOV fmt src dst      -> MOV fmt (patchOp src) (patchOp dst)
-    MOVD fmt src dst     -> patch2 (MOVD fmt) src dst
+    MOVD fmt1 fmt2 src dst -> patch2 (MOVD fmt1 fmt2) src dst
     CMOV cc fmt src dst  -> CMOV cc fmt (patchOp src) (env dst)
     MOVZxL fmt src dst   -> patch2 (MOVZxL fmt) src dst
     MOVSxL fmt src dst   -> patch2 (MOVSxL fmt) src dst


=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -657,8 +657,8 @@ pprInstr platform i = case i of
    CMOV cc format src dst
      -> pprCondOpReg (text "cmov") format cc src dst
 
-   MOVD format src dst
-     -> pprMovdOpOp (text "mov") format src dst
+   MOVD format1 format2 src dst
+     -> pprMovdOpOp (text "mov") format1 format2 src dst
 
    MOVZxL II32 src dst
       -> pprFormatOpOp (text "mov") II32 src dst
@@ -1151,21 +1151,21 @@ pprInstr platform i = case i of
            pprOperand platform format op2
        ]
 
-   pprMovdOpOp :: Line doc -> Format -> Operand -> Operand -> doc
-   pprMovdOpOp name format op1 op2
-     = let instr = case format of
+   pprMovdOpOp :: Line doc -> Format -> Format -> Operand -> Operand -> doc
+   pprMovdOpOp name format1 format2 op1 op2
+     = let instr = case (format1, format2) of
              -- bitcasts to/from a general purpose register to a floating point
              -- register require II32 or II64.
-             II32 -> text "d"
-             II64 -> text "q"
-             FF32 -> text "d"
-             FF64 -> text "q"
-             _    -> panic "X86.Ppr.pprMovdOpOp: improper format for movd/movq."
+             (II32, _) -> text "d"
+             (II64, _) -> text "q"
+             (_, II32) -> text "d"
+             (_, II64) -> text "q"
+             _ -> panic "X86.Ppr.pprMovdOpOp: improper format for movd/movq."
        in line $ hcat [
            char '\t' <> name <> instr <> space,
-           pprOperand platform format op1,
+           pprOperand platform format1 op1,
            comma,
-           pprOperand platform (movdOutFormat format) op2
+           pprOperand platform format2 op2
            ]
 
    pprFormatImmRegOp :: Line doc -> Format -> Imm -> Reg -> Operand -> doc


=====================================
docs/users_guide/diagnostics-as-json-schema-1_0.json
=====================================
@@ -13,7 +13,10 @@
       "type": "string"
     },
     "span": {
-      "$ref": "#/$defs/span"
+      "oneOf": [
+        { "$ref": "#/$defs/span" },
+        { "type": "null" }
+      ]
     },
     "severity": {
       "description": "The diagnostic severity",


=====================================
docs/users_guide/diagnostics-as-json-schema-1_1.json
=====================================
@@ -13,7 +13,10 @@
       "type": "string"
     },
     "span": {
-      "$ref": "#/$defs/span"
+      "oneOf": [
+        { "$ref": "#/$defs/span" },
+        { "type": "null" }
+      ]
     },
     "severity": {
       "description": "The diagnostic severity",


=====================================
hadrian/cabal.project
=====================================
@@ -4,7 +4,7 @@ packages: ./
 
 -- This essentially freezes the build plan for hadrian
 -- It would be wise to keep this up to date with the state set in .gitlab/ci.sh.
-index-state: 2024-10-30T22:56:00Z
+index-state: 2025-01-27T17:45:32Z
 
 -- unordered-containers-0.2.20-r1 requires template-haskell < 2.22
 -- ghc-9.10 has template-haskell-2.22.0.0


=====================================
libraries/base/changelog.md
=====================================
@@ -11,6 +11,7 @@
   * `instance Functor NonEmpty` is now specified using `map` (rather than duplicating code). ([CLC proposal #300](https://github.com/haskell/core-libraries-committee/issues/300))
 
 ## 4.21.0.0 *TBA*
+  * Change `SrcLoc` to be a strict and unboxed (finishing [CLC proposal #55](https://github.com/haskell/core-libraries-committee/issues/55))
   * Introduce `Data.Bounded` module exporting the `Bounded` typeclass (finishing [CLC proposal #208](https://github.com/haskell/core-libraries-committee/issues/208))
   * Deprecate export of `Bounded` class from `Data.Enum` ([CLC proposal #208](https://github.com/haskell/core-libraries-committee/issues/208))
   * `GHC.Desugar` has been deprecated and should be removed in GHC 9.14. ([CLC proposal #216](https://github.com/haskell/core-libraries-committee/issues/216))


=====================================
testsuite/tests/simd/should_run/T25659.hs
=====================================
@@ -0,0 +1,19 @@
+{-# LANGUAGE MagicHash, UnboxedTuples, ExtendedLiterals #-}
+import GHC.Int
+import GHC.Prim
+
+test :: (Int64X2# -> Int64X2# -> Int64X2#) -> IO ()
+test f = do
+  let a = packInt64X2# (# 0#Int64, 11#Int64 #)
+      b = packInt64X2# (# 22#Int64, 33#Int64 #)
+      c = f a b
+      (# x0, x1 #) = unpackInt64X2# a
+      (# y0, y1 #) = unpackInt64X2# b
+      (# z0, z1 #) = unpackInt64X2# c
+  putStrLn $ "a = " ++ show (I64# x0, I64# x1)
+  putStrLn $ "b = " ++ show (I64# y0, I64# y1)
+  putStrLn $ "c = " ++ show (I64# z0, I64# z1)
+{-# NOINLINE test #-}
+
+main :: IO ()
+main = test (\_ b -> b)


=====================================
testsuite/tests/simd/should_run/T25659.stdout
=====================================
@@ -0,0 +1,3 @@
+a = (0,11)
+b = (22,33)
+c = (22,33)


=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -26,6 +26,7 @@ test('word32x4_basic_baseline', [], compile_and_run, [''])
 test('word64x2_basic_baseline', [], compile_and_run, [''])
 
 test('T25658', [], compile_and_run, ['']) # #25658 is a bug with SSE2 code generation
+test('T25659', [], compile_and_run, [''])
 
 # Ensure we set the CPU features we have available.
 #



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8ec0a4dd5c4a39cc55d4013892123c93075f9590...9955f7371eb210c53f53d0e42216fefb09b4fdad

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8ec0a4dd5c4a39cc55d4013892123c93075f9590...9955f7371eb210c53f53d0e42216fefb09b4fdad
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250128/0bce508b/attachment-0001.html>