[Git][ghc/ghc][wip/andreask/fma_globals] x86-ncg: Fix fma codegen when arguments are globals
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Mon Mar 4 15:19:54 UTC 2024
sheaf pushed to branch wip/andreask/fma_globals at Glasgow Haskell Compiler / GHC
Commits:
4af8ca2b by Andreas Klebinger at 2024-03-04T16:19:33+01:00
x86-ncg: Fix fma codegen when arguments are globals
Fix a bug in the x86 ncg where results would be wrong when the desired output
register and one of the input registers were the same global.
Also adds a tiny optimization to make use of the memory addressing
support when convenient.
Fixes #24496
- - - - -
4 changed files:
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- + testsuite/tests/primops/should_run/T24496.hs
- + testsuite/tests/primops/should_run/T24496.stdout
- testsuite/tests/primops/should_run/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -3424,7 +3424,6 @@ genFMA3Code :: Width
-> FMASign
-> CmmExpr -> CmmExpr -> CmmExpr -> NatM Register
genFMA3Code w signs x y z = do
-
-- For the FMA instruction, we want to compute x * y + z
--
-- There are three possible instructions we could emit:
@@ -3445,17 +3444,45 @@ genFMA3Code w signs x y z = do
--
-- Currently we follow neither of these optimisations,
-- opting to always use fmadd213 for simplicity.
+ --
+ -- We would like to compute the result directly into the requested register.
+ -- To do so we must first compute `x` into the destination register. This is
+ -- only possible if the other arguments don't use the destination register.
+ -- We check for this and if there is a conflict we move the result only after
+ -- the computation. See #24496 how this went wrong in the past.
let rep = floatFormat w
(y_reg, y_code) <- getNonClobberedReg y
- (z_reg, z_code) <- getNonClobberedReg z
+ (z_op, z_code) <- getNonClobberedOperand z
x_code <- getAnyReg x
+ x_tmp <- getNewRegNat rep
let
fma213 = FMA3 rep signs FMA213
- code dst
- = y_code `appOL`
+
+ code, code_direct, code_mov :: Reg -> InstrBlock
+ -- Ideal: Compute the result directly into dst
+ code_direct dst = x_code dst `snocOL`
+ fma213 z_op y_reg dst
+ -- Fallback: Compute the result into a tmp reg and then move it.
+ code_mov dst = x_code x_tmp `snocOL`
+ fma213 z_op y_reg x_tmp `snocOL`
+ MOV rep (OpReg x_tmp) (OpReg dst)
+
+ code dst =
+ y_code `appOL`
z_code `appOL`
- x_code dst `snocOL`
- fma213 (OpReg z_reg) y_reg dst
+ ( if arg_regs_conflict then code_mov dst else code_direct dst )
+
+ where
+
+ arg_regs_conflict =
+ y_reg == dst ||
+ case z_op of
+ OpReg z_reg -> z_reg == dst
+ OpAddr amode -> dst `elem` addrModeRegs amode
+ OpImm {} -> False
+
+ -- NB: Computing the result into a desired register using Any can be tricky.
+ -- So for now, we keep it simple. (See #24496).
return (Any rep code)
-----------
=====================================
testsuite/tests/primops/should_run/T24496.hs
=====================================
@@ -0,0 +1,18 @@
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+import GHC.Exts
+
+twoProductFloat# :: Float# -> Float# -> (# Float#, Float# #)
+twoProductFloat# x y = let !r = x `timesFloat#` y
+ in (# r, fmsubFloat# x y r #)
+{-# NOINLINE twoProductFloat# #-}
+
+twoProductDouble# :: Double# -> Double# -> (# Double#, Double# #)
+twoProductDouble# x y = let !r = x *## y
+ in (# r, fmsubDouble# x y r #)
+{-# NOINLINE twoProductDouble# #-}
+
+main :: IO ()
+main = do
+ print $ case twoProductFloat# 2.0# 3.0# of (# r, s #) -> (F# r, F# s)
+ print $ case twoProductDouble# 2.0## 3.0## of (# r, s #) -> (D# r, D# s)
=====================================
testsuite/tests/primops/should_run/T24496.stdout
=====================================
@@ -0,0 +1,2 @@
+(6.0,0.0)
+(6.0,0.0)
=====================================
testsuite/tests/primops/should_run/all.T
=====================================
@@ -77,3 +77,10 @@ test('FMA_ConstantFold'
test('T21624', normal, compile_and_run, [''])
test('T23071', ignore_stdout, compile_and_run, [''])
test('T22710', normal, compile_and_run, [''])
+test('T24496'
+ , [ when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))
+ , js_skip # JS backend doesn't have an FMA implementation
+ , when(arch('wasm32'), skip)
+ , when(have_llvm(), extra_ways(["optllvm"]))
+ ]
+ , compile_and_run, ['-O'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4af8ca2bd52f2949c8fefa7b45fafddaee86d609
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/4af8ca2bd52f2949c8fefa7b45fafddaee86d609
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240304/4b344b0e/attachment-0001.html>
More information about the ghc-commits
mailing list