[Git][ghc/ghc][wip/supersven/imulMayOflo_x86] 2 commits: x86 CodeGen: Implement MO_S_MulMayOflo for W8

Fri Jun 30 08:28:43 UTC 2023


Sven Tennie pushed to branch wip/supersven/imulMayOflo_x86 at Glasgow Haskell Compiler / GHC


Commits:
7493dad6 by Sven Tennie at 2023-06-30T08:28:22+00:00
x86 CodeGen: Implement MO_S_MulMayOflo for W8

This case wasn't handled before. But, the test-primops test suite showed
that it actually might appear.

- - - - -
8b601d25 by Sven Tennie at 2023-06-30T08:28:30+00:00
Add test for %mulmayoflo primop

- - - - -


4 changed files:

- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- + testsuite/tests/codeGen/should_run/MulMayOflo.hs
- + testsuite/tests/codeGen/should_run/MulMayOflo_cmm.cmm
- testsuite/tests/codeGen/should_run/all.T


Changes:

=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -966,8 +966,29 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
 
         return (Fixed format eax code)
 
-
     imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+    imulMayOflo W8 a b = do
+         -- The general case (W16, W32, W64) doesn't work for W8 as its
+         -- multiplication doesn't use two registers.
+         --
+         -- The plan is:
+         -- 1. truncate and sign-extend a and b to 8bit width
+         -- 2. multiply a' = a * b in 32bit width
+         -- 3. copy and sign-extend 8bit from a' to c
+         -- 4. compare a' and c: they are equal if there was no overflow
+         (a_reg, a_code) <- getNonClobberedReg a
+         (b_reg, b_code) <- getNonClobberedReg b
+         let
+             code = a_code `appOL` b_code `appOL`
+                        toOL [
+                           MOVSxL II8 (OpReg a_reg) (OpReg a_reg),
+                           MOVSxL II8 (OpReg b_reg) (OpReg b_reg),
+                           IMUL II32 (OpReg b_reg) (OpReg a_reg),
+                           MOVSxL II8 (OpReg a_reg) (OpReg eax),
+                           CMP II16 (OpReg a_reg) (OpReg eax),
+                           SETCC NE (OpReg eax)
+                        ]
+         return (Fixed II8 eax code)
     imulMayOflo rep a b = do
          (a_reg, a_code) <- getNonClobberedReg a
          b_code <- getAnyReg b


=====================================
testsuite/tests/codeGen/should_run/MulMayOflo.hs
=====================================
@@ -0,0 +1,13 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+module Main where
+
+import GHC.Exts
+
+foreign import prim "runCmmzh" runCmmzh# :: Word# -> Word#
+
+main :: IO ()
+main = print . show $ W# (runCmmzh# 42##)


=====================================
testsuite/tests/codeGen/should_run/MulMayOflo_cmm.cmm
=====================================
@@ -0,0 +1,89 @@
+// Suppress empty ASSERT() optimization
+#define USE_ASSERTS_ALL_WAYS 1
+
+#include "Cmm.h"
+
+runCmmzh() {
+// BEWARE: Cmm isn't really type checked. I.e. you may construct
+// 256::I8, which is obviously wrong and let's to strange behaviour.
+
+// N.B. the contract of '%mulmayoflo' is a bit weak:
+// "Return non-zero if there is any possibility that the signed multiply
+// of a and b might overflow.  Return zero only if you are absolutely sure
+// that it won't overflow.  If in doubt, return non-zero." (Stg.h)
+// So, this test might be a bit too strict for some architectures as it
+// expects a perfect implementation.
+
+  // --- I8
+  ASSERT(%mulmayoflo(1::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(0::I8, 0::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-1::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(1::I8, -1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(127::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(1::I8, 127::I8) == 0::I8);
+  ASSERT(%mulmayoflo(63::I8, 2::I8) == 0::I8);
+  ASSERT(%mulmayoflo(2::I8, 63::I8) == 0::I8);
+  ASSERT(%mulmayoflo(64::I8, 2::I8) > 0::I8);
+  ASSERT(%mulmayoflo(2::I8, 64::I8) > 0::I8);
+  ASSERT(%mulmayoflo(127::I8, -1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-1::I8, 127::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-128::I8, 1::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-64::I8, 2::I8) == 0::I8);
+  ASSERT(%mulmayoflo(2::I8, -64::I8) == 0::I8);
+  ASSERT(%mulmayoflo(-65::I8, 2::I8) > 0::I8);
+  ASSERT(%mulmayoflo(2::I8, -65::I8) > 0::I8);
+
+  // --- I16
+  ASSERT(%mulmayoflo(1::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(0::I16, 0::I16) == 0::I16);
+  ASSERT(%mulmayoflo(-1::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(1::I16, -1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(32767::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(1::I16,32767 ::I16) == 0::I16);
+  ASSERT(%mulmayoflo(16383::I16, 2::I16) == 0::I16);
+  ASSERT(%mulmayoflo(2::I16, 16383::I16) == 0::I16);
+  ASSERT(%mulmayoflo(16384::I16, 2::I16) > 0::I16);
+  ASSERT(%mulmayoflo(2::I16, 16384::I16) > 0::I16);
+  ASSERT(%mulmayoflo(-16384::I16, 2::I16) == 0::I16);
+  ASSERT(%mulmayoflo(2::I16, -16384::I16) == 0::I16);
+  ASSERT(%mulmayoflo(-32768::I16, 1::I16) == 0::I16);
+  ASSERT(%mulmayoflo(1::I16, -32768::I16) == 0::I16);
+
+  // -- I32
+  ASSERT(%mulmayoflo(1::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(0::I32, 0::I32) == 0::I32);
+  ASSERT(%mulmayoflo(-1::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1::I32, -1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2147483647::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1::I32, 2147483647::I32) == 0::I32);
+  ASSERT(%mulmayoflo(-2147483648::I32, 1::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1::I32, -2147483648::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2::I32, 1073741823::I32) == 0::I32);
+  ASSERT(%mulmayoflo(1073741823::I32, 2::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2::I32, 1073741824::I32) > 0::I32);
+  ASSERT(%mulmayoflo(1073741824::I32, 2::I32) > 0::I32);
+  ASSERT(%mulmayoflo(2::I32, -1073741824::I32) == 0::I32);
+  ASSERT(%mulmayoflo(-1073741824::I32, 2::I32) == 0::I32);
+  ASSERT(%mulmayoflo(2::I32, -1073741825::I32) > 0::I32);
+  ASSERT(%mulmayoflo(-1073741825::I32, 2::I32) > 0::I32);
+
+  // -- I64
+  ASSERT(%mulmayoflo(1::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(0::I64, 0::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-1::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(1::I64, -1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(9223372036854775807::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(1::I64, 9223372036854775807::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-9223372036854775808::I64, 1::I64) == 0::I64);
+  ASSERT(%mulmayoflo(1::I64, -9223372036854775808::I64) == 0::I64);
+  ASSERT(%mulmayoflo(4611686018427387903::I64, 2::I64) == 0::I64);
+  ASSERT(%mulmayoflo(2::I64, 4611686018427387903::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-4611686018427387904::I64, 2::I64) == 0::I64);
+  ASSERT(%mulmayoflo(2::I64, -4611686018427387904::I64) == 0::I64);
+  ASSERT(%mulmayoflo(-4611686018427387905::I64, 2::I64) > 0::I64);
+  ASSERT(%mulmayoflo(2::I64, -4611686018427387905::I64) > 0::I64);
+  ASSERT(%mulmayoflo(4611686018427387904::I64, 2::I64) > 0::I64);
+  ASSERT(%mulmayoflo(2::I64, 4611686018427387904::I64) > 0::I64);
+
+  return(0);
+}


=====================================
testsuite/tests/codeGen/should_run/all.T
=====================================
@@ -225,3 +225,8 @@ test('T22296',[only_ways(llvm_ways)
               ,unless(arch('x86_64'), skip)],compile_and_run,[''])
 test('T22798', expect_broken_for(23272, ['ghci-opt']), compile_and_run, ['-fregs-graph'])
 test('CheckBoundsOK', normal, compile_and_run, ['-fcheck-prim-bounds'])
+
+test('MulMayOflo',
+     [ omit_ways(['ghci']), js_skip, ignore_stdout],
+     multi_compile_and_run,
+     ['MulMayOflo', [('MulMayOflo_cmm.cmm', '')], ''])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/de4739961ee2d1ca2a269f77106140a444a73e90...8b601d25f707b5ee5fa5dee87447d9537c7bed69

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/de4739961ee2d1ca2a269f77106140a444a73e90...8b601d25f707b5ee5fa5dee87447d9537c7bed69
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20230630/7c29fac1/attachment-0001.html>