[Git][ghc/ghc][master] NCG(x86): Compile add+shift as lea if possible.
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Mon Aug 8 20:46:17 UTC 2022
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
20457d77 by Andreas Klebinger at 2022-08-08T14:42:26+02:00
NCG(x86): Compile add+shift as lea if possible.
- - - - -
4 changed files:
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- + testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm
- + testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs
- testsuite/tests/codeGen/should_gen_asm/all.T
Changes:
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -1048,10 +1048,29 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
--------------------
add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
+ -- x + imm
add_code rep x (CmmLit (CmmInt y _))
| is32BitInteger y
, rep /= W8 -- LEA doesn't support byte size (#18614)
= add_int rep x y
+ -- x + (y << imm)
+ add_code rep x y
+ -- Byte size is not supported and 16bit size is slow when computed via LEA
+ | rep /= W8 && rep /= W16
+ -- 2^3 = 8 is the highest multiplicator supported by LEA.
+ , Just (x,y,shift_bits) <- get_shift x y
+ = add_shiftL rep x y (fromIntegral shift_bits)
+ where
+ -- x + (y << imm)
+ get_shift x (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)])
+ | shift_bits <= 3
+ = Just (x, y, shift_bits)
+ -- (y << imm) + x
+ get_shift (CmmMachOp (MO_Shl _w) [y, CmmLit (CmmInt shift_bits _)]) x
+ | shift_bits <= 3
+ = Just (x, y, shift_bits)
+ get_shift _ _
+ = Nothing
add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
where format = intFormat rep
-- TODO: There are other interesting patterns we want to replace
@@ -1066,6 +1085,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
-- our three-operand add instruction:
+ add_int :: (Width -> CmmExpr -> Integer -> NatM Register)
add_int width x y = do
(x_reg, x_code) <- getSomeReg x
let
@@ -1079,6 +1099,22 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = -- dyadic MachOps
--
return (Any format code)
+ -- x + (y << shift_bits) using LEA
+ add_shiftL :: (Width -> CmmExpr -> CmmExpr -> Int -> NatM Register)
+ add_shiftL width x y shift_bits = do
+ (x_reg, x_code) <- getSomeReg x
+ (y_reg, y_code) <- getSomeReg y
+ let
+ format = intFormat width
+ imm = ImmInt 0
+ code dst
+ = (x_code `appOL` y_code) `snocOL`
+ LEA format
+ (OpAddr (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg (2 ^ shift_bits)) imm))
+ (OpReg dst)
+ --
+ return (Any format code)
+
----------------------
-- See Note [DIV/IDIV for bytes]
=====================================
testsuite/tests/codeGen/should_gen_asm/AddMulX86.asm
=====================================
@@ -0,0 +1,46 @@
+.section .text
+.align 8
+.align 8
+ .quad 8589934604
+ .quad 0
+ .long 14
+ .long 0
+.globl AddMulX86_f_info
+.type AddMulX86_f_info, @function
+AddMulX86_f_info:
+.LcAx:
+ leaq (%r14,%rsi,8),%rbx
+ jmp *(%rbp)
+ .size AddMulX86_f_info, .-AddMulX86_f_info
+.section .data
+.align 8
+.align 1
+.globl AddMulX86_f_closure
+.type AddMulX86_f_closure, @object
+AddMulX86_f_closure:
+ .quad AddMulX86_f_info
+.section .text
+.align 8
+.align 8
+ .quad 8589934604
+ .quad 0
+ .long 14
+ .long 0
+.globl AddMulX86_g_info
+.type AddMulX86_g_info, @function
+AddMulX86_g_info:
+.LcAL:
+ leaq (%r14,%rsi,8),%rbx
+ jmp *(%rbp)
+ .size AddMulX86_g_info, .-AddMulX86_g_info
+.section .data
+.align 8
+.align 1
+.globl AddMulX86_g_closure
+.type AddMulX86_g_closure, @object
+AddMulX86_g_closure:
+ .quad AddMulX86_g_info
+.section .note.GNU-stack,"", at progbits
+.ident "GHC 9.3.20220228"
+
+
=====================================
testsuite/tests/codeGen/should_gen_asm/AddMulX86.hs
=====================================
@@ -0,0 +1,12 @@
+{-# LANGUAGE MagicHash #-}
+
+module AddMulX86 where
+
+import GHC.Exts
+
+f :: Int# -> Int# -> Int#
+f x y =
+ x +# (y *# 8#) -- Should result in a lea instruction, which we grep the assembly output for.
+
+g x y =
+ (y *# 8#) +# x -- Should result in a lea instruction, which we grep the assembly output for.
=====================================
testsuite/tests/codeGen/should_gen_asm/all.T
=====================================
@@ -10,3 +10,4 @@ test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
test('bytearray-memset-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
test('bytearray-memcpy-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])
test('T18137', [when(opsys('darwin'), skip), only_ways(llvm_ways)], compile_grep_asm, ['hs', False, '-fllvm -split-sections'])
+test('AddMulX86', is_amd64_codegen, compile_cmp_asm, ['hs', '-dno-typeable-binds'])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/20457d775885d6c3df020d204da9a7acfb3c2e5a
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/20457d775885d6c3df020d204da9a7acfb3c2e5a
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20220808/0c391125/attachment-0001.html>
More information about the ghc-commits
mailing list