[commit: ghc] master: LlvmCodeGen: add support for MO_U_Mul2 CallishMachOp (82ffc80)

Mon Jul 20 15:06:10 UTC 2015

Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/82ffc80df573512f788524c4616db3c08fc9f125/ghc

>---------------------------------------------------------------

commit 82ffc80df573512f788524c4616db3c08fc9f125
Author: Michal Terepeta <michal.terepeta at gmail.com>
Date:   Mon Jul 20 15:43:31 2015 +0200

    LlvmCodeGen: add support for MO_U_Mul2 CallishMachOp
    
    This adds support MO_U_Mul2 to the LLVM backend by simply using 'mul'
    instruction but operating at twice the bit width (e.g., for 64 bit
    words we will generate mul that operates on 128 bits and then extract
    the two 64 bit values for the result of the CallishMachOp).
    
    Test Plan: validate
    
    Reviewers: rwbarton, austin, bgamari
    
    Reviewed By: bgamari
    
    Subscribers: thomie
    
    Differential Revision: https://phabricator.haskell.org/D1068
    
    GHC Trac Issues: #9430


>---------------------------------------------------------------

82ffc80df573512f788524c4616db3c08fc9f125
 compiler/codeGen/StgCmmPrim.hs              |  3 ++-
 compiler/llvmGen/LlvmCodeGen/CodeGen.hs     | 34 +++++++++++++++++++++++++++++
 testsuite/tests/primops/should_run/T9430.hs | 18 +++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 563f6dc..243e2a3 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -823,7 +823,8 @@ callishPrimOpSupported dflags op
                          || llvm      -> Left (MO_SubIntC    (wordWidth dflags))
                      | otherwise      -> Right genericIntSubCOp
 
-      WordMul2Op     | ncg && x86ish  -> Left (MO_U_Mul2     (wordWidth dflags))
+      WordMul2Op     | ncg && x86ish
+                         || llvm      -> Left (MO_U_Mul2     (wordWidth dflags))
                      | otherwise      -> Right genericWordMul2Op
 
       _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 15350bc..fb02120 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -256,6 +256,38 @@ genCall t@(PrimTarget op) [] args
                 `appOL` stmts4 `snocOL` call
     return (stmts, top1 ++ top2)
 
+-- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
+-- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
+-- generate 'mul' on 128-bit operands. Then we only need some plumbing to
+-- extract the two 64-bit values out of 128-bit result.
+genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = do
+    let width = widthToLlvmInt w
+        bitWidth = widthInBits w
+        width2x = LMInt (bitWidth * 2)
+    -- First zero-extend the operands ('mul' instruction requires the operands
+    -- and the result to be of the same type). Note that we don't use 'castVars'
+    -- because it tries to do LM_Sext.
+    (lhsVar, stmts1, decls1) <- exprToVar lhs
+    (rhsVar, stmts2, decls2) <- exprToVar rhs
+    (lhsExt, stmt3) <- doExpr width2x $ Cast LM_Zext lhsVar width2x
+    (rhsExt, stmt4) <- doExpr width2x $ Cast LM_Zext rhsVar width2x
+    -- Do the actual multiplication (note that the result is also 2x width).
+    (retV, stmt5) <- doExpr width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
+    -- Extract the lower bits of the result into retL.
+    (retL, stmt6) <- doExpr width $ Cast LM_Trunc retV width
+    -- Now we right-shift the higher bits by width.
+    let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
+    (retShifted, stmt7) <- doExpr width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
+    -- And extract them into retH.
+    (retH, stmt8) <- doExpr width $ Cast LM_Trunc retShifted width
+    dstRegL <- getCmmReg (CmmLocal dstL)
+    dstRegH <- getCmmReg (CmmLocal dstH)
+    let storeL = Store retL dstRegL
+        storeH = Store retH dstRegH
+        stmts = stmts1 `appOL` stmts2 `appOL`
+           toOL [ stmt3 , stmt4, stmt5, stmt6, stmt7, stmt8, storeL, storeH ]
+    return (stmts, decls1 ++ decls2)
+
 -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
 -- which we need to extract the actual values.
 genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
@@ -621,6 +653,8 @@ cmmPrimOpFunctions mop = do
     MO_S_QuotRem {}  -> unsupported
     MO_U_QuotRem {}  -> unsupported
     MO_U_QuotRem2 {} -> unsupported
+    -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
+    -- appropriate case of genCall.
     MO_U_Mul2 {}     -> unsupported
     MO_WriteBarrier  -> unsupported
     MO_Touch         -> unsupported
diff --git a/testsuite/tests/primops/should_run/T9430.hs b/testsuite/tests/primops/should_run/T9430.hs
index 571b6db..aec2d26 100644
--- a/testsuite/tests/primops/should_run/T9430.hs
+++ b/testsuite/tests/primops/should_run/T9430.hs
@@ -73,3 +73,21 @@ main = do
       checkW (1, minBound + 1) plusWord2# maxBound 2
     check "plusWord2# 2 maxBound" $
       checkW (1, minBound + 1) plusWord2# 2 maxBound
+
+    check "timesWord2# maxBound 0" $ checkW (0, 0) timesWord2# maxBound 0
+    check "timesWord2# 0 maxBound" $ checkW (0, 0) timesWord2# 0 maxBound
+    check "timesWord2# maxBound 1" $ checkW (0, maxBound) timesWord2# maxBound 1
+    check "timesWord2# 1 maxBound" $ checkW (0, maxBound) timesWord2# 1 maxBound
+    -- Overflows
+    check "timesWord2# " $ checkW (1, 0) timesWord2# (2 ^ 63) 2
+    check "timesWord2# " $ checkW (2, 0) timesWord2# (2 ^ 63) (2 ^ 2)
+    check "timesWord2# " $ checkW (4, 0) timesWord2# (2 ^ 63) (2 ^ 3)
+    check "timesWord2# " $ checkW (8, 0) timesWord2# (2 ^ 63) (2 ^ 4)
+    check "timesWord2# maxBound 2" $
+      checkW (1, maxBound - 1) timesWord2# maxBound 2
+    check "timesWord2# 2 maxBound" $
+      checkW (1, maxBound - 1) timesWord2# 2 maxBound
+    check "timesWord2# maxBound 3" $
+      checkW (2, maxBound - 2) timesWord2# maxBound 3
+    check "timesWord2# 3 maxBound" $
+      checkW (2, maxBound - 2) timesWord2# 3 maxBound