[Git][ghc/ghc][wip/andreask/fma_globals] x86-ncg: Fix fma codegen when arguments are globals.

Andreas Klebinger (@AndreasK) gitlab at gitlab.haskell.org
Fri Mar 1 01:39:23 UTC 2024



Andreas Klebinger pushed to branch wip/andreask/fma_globals at Glasgow Haskell Compiler / GHC


Commits:
c24db25d by Andreas Klebinger at 2024-03-01T02:25:33+01:00
x86-ncg: Fix fma codegen when arguments are globals.

Fix a bug in the x86 ncg where results would be wrong when the desired output
register and one of the input registers were the same global.

Also adds a tiny optimization to make use of the memory addressing
support when convenient.

Fixes #24496

- - - - -


3 changed files:

- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- + testsuite/tests/primops/should_run/T24496.hs
- testsuite/tests/primops/should_run/all.T


Changes:

=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -3424,6 +3424,7 @@ genFMA3Code :: Width
             -> FMASign
             -> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
 genFMA3Code w signs x y z = do
+  -- platform <- getPlatform
 
   -- For the FMA instruction, we want to compute x * y + z
   --
@@ -3445,17 +3446,41 @@ genFMA3Code w signs x y z = do
   --
   -- Currently we follow neither of these optimisations,
   -- opting to always use fmadd213 for simplicity.
+  --
+
   let rep = floatFormat w
   (y_reg, y_code) <- getNonClobberedReg y
-  (z_reg, z_code) <- getNonClobberedReg z
+  (z_op, z_code) <- getNonClobberedOperand z
   x_code <- getAnyReg x
+  x_tmp <- getNewRegNat rep
   let
-     fma213 = FMA3 rep signs FMA213
-     code dst
-        = y_code `appOL`
+     code, code_direct, code_mov :: Reg -> InstrBlock
+     -- Ideal: Compute the result directly into dst
+     code_direct dst = x_code  dst   `snocOL`
+                       fma213 z_op y_reg dst
+     -- Fallback: Compute the result into a tmp reg and then move it.
+     code_mov dst    = x_code x_tmp `snocOL`
+                       fma213 z_op y_reg x_tmp `snocOL`
+                       MOV rep (OpReg x_tmp) (OpReg dst)
+
+     code dst =
+         y_code `appOL`
           z_code `appOL`
-          x_code dst `snocOL`
-          fma213 (OpReg z_reg) y_reg dst
+          ( if arg_regs_conflict then code_mov dst else code_direct dst )
+
+      where
+        -- We would like to compute `x` into `dst`, however if `dst` overlapps
+        -- with the other arguments we would produce garbage. See #24496
+        arg_regs_conflict =
+          y_reg == dst ||
+          case z_op of
+            OpReg z_reg -> z_reg == dst
+            OpAddr amode -> dst `elem` addrModeRegs amode
+            OpImm {} -> False
+
+
+  -- NB: Computing the result into a desired register using Any can be tricky.
+  -- So for now keep it simple. (See #24496).
   return (Any rep code)
 
 -----------


=====================================
testsuite/tests/primops/should_run/T24496.hs
=====================================
@@ -0,0 +1,18 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+import GHC.Exts
+
+twoProductFloat# :: Float# -> Float# -> (# Float#, Float# #)
+twoProductFloat# x y = let !r = x `timesFloat#` y
+                       in (# r, fmsubFloat# x y r #)
+{-# NOINLINE twoProductFloat# #-}
+
+twoProductDouble# :: Double# -> Double# -> (# Double#, Double# #)
+twoProductDouble# x y = let !r = x *## y
+                        in (# r, fmsubDouble# x y r #)
+{-# NOINLINE twoProductDouble# #-}
+
+main :: IO ()
+main = do
+    print $ case twoProductFloat# 2.0# 3.0# of (# r, s #) -> (F# r, F# s)
+    print $ case twoProductDouble# 2.0## 3.0## of (# r, s #) -> (D# r, D# s)


=====================================
testsuite/tests/primops/should_run/all.T
=====================================
@@ -77,3 +77,11 @@ test('FMA_ConstantFold'
 test('T21624', normal, compile_and_run, [''])
 test('T23071', ignore_stdout, compile_and_run, [''])
 test('T22710', normal, compile_and_run, [''])
+test('T24496', normal, compile_and_run, [''])
+test('T24496'
+    , [ when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))
+      , js_skip # JS backend doesn't have an FMA implementation
+      , when(arch('wasm32'), skip)
+      , when(have_llvm(), extra_ways(["optllvm"]))
+      ]
+    , compile_and_run, ['-O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c24db25d1c178352e57fa6ae797ab317e0cdfaf8

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/c24db25d1c178352e57fa6ae797ab317e0cdfaf8
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240229/96fb9460/attachment-0001.html>


More information about the ghc-commits mailing list