[Git][ghc/ghc][wip/andreask/fma_globals] x86-ncg: Fix fma codegen when arguments are globals.

Mon Mar 4 15:17:21 UTC 2024


sheaf pushed to branch wip/andreask/fma_globals at Glasgow Haskell Compiler / GHC


Commits:
ad0ecb8d by Andreas Klebinger at 2024-03-04T16:17:12+01:00
x86-ncg: Fix fma codegen when arguments are globals.

Fix a bug in the x86 ncg where results would be wrong when the desired output
register and one of the input registers were the same global.

Also adds a tiny optimization to make use of the memory addressing
support when convenient.

Fixes #24496

- - - - -


4 changed files:

- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- + testsuite/tests/primops/should_run/T24496.hs
- + testsuite/tests/primops/should_run/T24496.stdout
- testsuite/tests/primops/should_run/all.T


Changes:

=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -3424,6 +3424,7 @@ genFMA3Code :: Width
             -> FMASign
             -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
 genFMA3Code w signs x y z = do
+  -- platform <- getPlatform
 
   -- For the FMA instruction, we want to compute x * y + z
   --
@@ -3445,17 +3446,46 @@ genFMA3Code w signs x y z = do
   --
   -- Currently we follow neither of these optimisations,
   -- opting to always use fmadd213 for simplicity.
+  --
+  -- We would like to compute the result directly into the requested register.
+  -- To do so we must first compute `x` into the destination register. This is
+  -- only possible if the other arguments don't use the destination register.
+  -- We check for this and if there is a conflict we move the result only after
+  -- the computation. See #24496 how this went wrong in the past.
   let rep = floatFormat w
   (y_reg, y_code) <- getNonClobberedReg y
-  (z_reg, z_code) <- getNonClobberedReg z
+  (z_op, z_code) <- getNonClobberedOperand z
   x_code <- getAnyReg x
+  x_tmp <- getNewRegNat rep
   let
      fma213 = FMA3 rep signs FMA213
-     code dst
-        = y_code `appOL`
+
+     code, code_direct, code_mov :: Reg -> InstrBlock
+     -- Ideal: Compute the result directly into dst
+     code_direct dst = x_code  dst   `snocOL`
+                       fma213 z_op y_reg dst
+     -- Fallback: Compute the result into a tmp reg and then move it.
+     code_mov dst    = x_code x_tmp `snocOL`
+                       fma213 z_op y_reg x_tmp `snocOL`
+                       MOV rep (OpReg x_tmp) (OpReg dst)
+
+     code dst =
+         y_code `appOL`
           z_code `appOL`
-          x_code dst `snocOL`
-          fma213 (OpReg z_reg) y_reg dst
+          ( if arg_regs_conflict then code_mov dst else code_direct dst )
+
+      where
+
+        arg_regs_conflict =
+          y_reg == dst ||
+          case z_op of
+            OpReg z_reg -> z_reg == dst
+            OpAddr amode -> dst `elem` addrModeRegs amode
+            OpImm {} -> False
+
+
+  -- NB: Computing the result into a desired register using Any can be tricky.
+  -- So for now keep it simple. (See #24496).
   return (Any rep code)
 
 -----------


=====================================
testsuite/tests/primops/should_run/T24496.hs
=====================================
@@ -0,0 +1,18 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+import GHC.Exts
+
+twoProductFloat# :: Float# -> Float# -> (# Float#, Float# #)
+twoProductFloat# x y = let !r = x `timesFloat#` y
+                       in (# r, fmsubFloat# x y r #)
+{-# NOINLINE twoProductFloat# #-}
+
+twoProductDouble# :: Double# -> Double# -> (# Double#, Double# #)
+twoProductDouble# x y = let !r = x *## y
+                        in (# r, fmsubDouble# x y r #)
+{-# NOINLINE twoProductDouble# #-}
+
+main :: IO ()
+main = do
+    print $ case twoProductFloat# 2.0# 3.0# of (# r, s #) -> (F# r, F# s)
+    print $ case twoProductDouble# 2.0## 3.0## of (# r, s #) -> (D# r, D# s)


=====================================
testsuite/tests/primops/should_run/T24496.stdout
=====================================
@@ -0,0 +1,2 @@
+(6.0,0.0)
+(6.0,0.0)


=====================================
testsuite/tests/primops/should_run/all.T
=====================================
@@ -77,3 +77,10 @@ test('FMA_ConstantFold'
 test('T21624', normal, compile_and_run, [''])
 test('T23071', ignore_stdout, compile_and_run, [''])
 test('T22710', normal, compile_and_run, [''])
+test('T24496'
+    , [ when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))
+      , js_skip # JS backend doesn't have an FMA implementation
+      , when(arch('wasm32'), skip)
+      , when(have_llvm(), extra_ways(["optllvm"]))
+      ]
+    , compile_and_run, ['-O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ad0ecb8d48764e1fec76b4231355c4492cd219ba

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/ad0ecb8d48764e1fec76b4231355c4492cd219ba
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240304/48f19079/attachment-0001.html>