[Git][ghc/ghc][wip/ncg-simd] rework X86 MOV instruction
sheaf (@sheaf)
gitlab at gitlab.haskell.org
Tue Jun 25 15:05:44 UTC 2024
sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC
Commits:
6ac63edb by sheaf at 2024-06-25T17:05:27+02:00
rework X86 MOV instruction
- - - - -
4 changed files:
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Ppr.hs
Changes:
=====================================
compiler/GHC/CmmToAsm/Format.hs
=====================================
@@ -25,6 +25,8 @@ module GHC.CmmToAsm.Format (
formatToWidth,
formatInBytes,
isIntScalarFormat,
+ isFloatScalarFormat,
+ scalarFormatFormat,
VirtualRegFormat(..),
RegFormat(..),
takeVirtualRegs,
@@ -101,12 +103,23 @@ data ScalarFormat
| FmtDouble
deriving (Show, Eq, Ord)
+scalarFormatFormat :: ScalarFormat -> Format
+scalarFormatFormat = \case
+ FmtInt8 -> II8
+ FmtInt16 -> II16
+ FmtInt32 -> II32
+ FmtInt64 -> II64
+ FmtFloat -> FF32
+ FmtDouble -> FF64
+
+isFloatScalarFormat :: ScalarFormat -> Bool
+isFloatScalarFormat = \case
+ FmtFloat -> True
+ FmtDouble -> True
+ _ -> False
+
isIntScalarFormat :: ScalarFormat -> Bool
-isIntScalarFormat FmtInt8 = True
-isIntScalarFormat FmtInt16 = True
-isIntScalarFormat FmtInt32 = True
-isIntScalarFormat FmtInt64 = True
-isIntScalarFormat _ = False
+isIntScalarFormat = not . isFloatScalarFormat
-- | Get the integer format of this width.
intFormat :: Width -> Format
=====================================
compiler/GHC/CmmToAsm/X86/CodeGen.hs
=====================================
@@ -717,8 +717,8 @@ iselExpr64 (CmmMachOp (MO_S_MulMayOflo W64) _) = do
-- We always return a (usually false) positive.
Reg64 rhi rlo <- getNewReg64
let code = toOL [
- MOV II32 (OpImm (ImmInt 1)) (OpReg rhi),
- MOV II32 (OpImm (ImmInt 1)) (OpReg rlo)
+ MOV II32 (OpImm (ImmInt 1)) (OpReg rhi),
+ MOV II32 (OpImm (ImmInt 1)) (OpReg rlo)
]
return (RegCode64 code rhi rlo)
@@ -995,7 +995,7 @@ getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _ _])
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _ _])
| not is32Bit = do
- code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
+ code <- intLoadCode (MOV II64) addr -- 32-bit loads zero-extend
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _ _])
@@ -1067,11 +1067,11 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
-- has 8-bit version). So for 32-bit code, we'll just zero-extend.
MO_XX_Conv W8 W32
| is32Bit -> integerExtend W8 W32 MOVZxL x
- | otherwise -> integerExtend W8 W32 (MOV) x
+ | otherwise -> integerExtend W8 W32 MOV x
MO_XX_Conv W8 W16
| is32Bit -> integerExtend W8 W16 MOVZxL x
- | otherwise -> integerExtend W8 W16 (MOV) x
- MO_XX_Conv W16 W32 -> integerExtend W16 W32 (MOV) x
+ | otherwise -> integerExtend W8 W16 MOV x
+ MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x
MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
@@ -1085,9 +1085,9 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
-- the form of a movzl and print it as a movl later.
-- This doesn't apply to MO_XX_Conv since in this case we don't care about
-- the upper bits. So we can just use MOV.
- MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 (MOV) x
- MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 (MOV) x
- MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 (MOV) x
+ MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x
+ MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
+ MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
@@ -1311,7 +1311,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
(reg, exp) <- getSomeReg expr
let fmt = VecFormat len FmtInt64
return $ Any fmt (\dst -> exp `snocOL`
- (MOV2 II64 fmt (OpReg reg) (OpReg dst)) `snocOL`
+ (MOVD II64 (OpReg reg) (OpReg dst)) `snocOL`
(PUNPCKLQDQ fmt (OpReg dst) dst)
)
vector_int_broadcast _ _ c
@@ -1716,7 +1716,7 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
CmmInt 0 _ -> exp `snocOL`
(MOV FF64 (OpReg r) (OpReg dst))
CmmInt 1 _ -> exp `snocOL`
- (MOVHLPS format (OpReg r) dst)
+ (MOVHLPS format r dst)
_ -> panic "Error in offset while unpacking"
return (Any format code)
vector_float_unpack _ w c e
@@ -1756,10 +1756,10 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
let code dst
= case lit of
CmmInt 0 _ -> exp `snocOL`
- (MOV2 fmt II64 (OpReg r) (OpReg dst))
+ (MOVD II64 (OpReg r) (OpReg dst))
CmmInt 1 _ -> exp `snocOL`
- (MOVHLPS fmt (OpReg r) tmp) `snocOL`
- (MOV2 fmt II64 (OpReg tmp) (OpReg dst))
+ (MOVHLPS fmt r tmp) `snocOL`
+ (MOVD II64 (OpReg tmp) (OpReg dst))
_ -> panic "Error in offset while unpacking"
return (Any fmt code)
vector_int_unpack_sse _ w c e
@@ -1938,13 +1938,13 @@ getRegister' platform _is32Bit (CmmMachOp mop [x, y, z]) = do -- ternary MachOps
= case offset of
CmmInt 0 _ -> valExp `appOL`
vecExp `snocOL`
- (MOVHLPS fmt (OpReg vecReg) tmp) `snocOL`
- (MOV2 II64 fmt (OpReg valReg) (OpReg dst)) `snocOL`
+ (MOVHLPS fmt vecReg tmp) `snocOL`
+ (MOVD II64 (OpReg valReg) (OpReg dst)) `snocOL`
(PUNPCKLQDQ fmt (OpReg tmp) dst)
CmmInt 1 _ -> valExp `appOL`
vecExp `snocOL`
- (MOV fmt (OpReg vecReg) (OpReg dst)) `snocOL`
- (MOV2 II64 fmt (OpReg valReg) (OpReg tmp)) `snocOL`
+ (MOV II64 (OpReg vecReg) (OpReg dst)) `snocOL`
+ (MOVD II64 (OpReg valReg) (OpReg tmp)) `snocOL`
(PUNPCKLQDQ fmt (OpReg tmp) dst)
_ -> pprPanic "MO_V_Insert Int64X2: unsupported offset" (ppr offset)
in return $ Any fmt code
@@ -1978,26 +1978,29 @@ getRegister' _ _ (CmmLoad mem pk _)
loadFloatAmode (typeWidth pk) addr mem_code
getRegister' _ is32Bit (CmmLoad mem pk _)
+ -- SIMD NCG TODO: what about vectors?
| is32Bit && not (isWord64 pk)
= do
let
instr = case width of
W8 -> MOVZxL II8
+ -- We always zero-extend 8-bit loads, if we
+ -- can't think of anything better. This is because
+ -- we can't guarantee access to an 8-bit variant of every register
+ -- (esi and edi don't have 8-bit variants), so to make things
+ -- simpler we do our 8-bit arithmetic with full 32-bit registers.
_other -> MOV format
code <- intLoadCode instr mem
return (Any format code)
where
width = typeWidth pk
format = intFormat width
- -- We always zero-extend 8-bit loads, if we
- -- can't think of anything better. This is because
- -- we can't guarantee access to an 8-bit variant of every register
- -- (esi and edi don't have 8-bit variants), so to make things
- -- simpler we do our 8-bit arithmetic with full 32-bit registers.
+
-- Simpler memory load code on x86_64
getRegister' _ is32Bit (CmmLoad mem pk _)
- | not is32Bit
+ -- SIMD NCG TODO: what about vectors?
+ | not is32Bit
= do
code <- intLoadCode (MOV format) mem
return (Any format code)
@@ -2058,13 +2061,17 @@ getRegister' platform _ (CmmLit lit)
where
cmmtype = cmmLitType platform lit
vectorRegister ctype
- = do
- --NOTE:
- -- This operation is only used to zero a register. For loading a
- -- vector literal there are pack and broadcast operations
- let format = cmmTypeFormat ctype
- code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
- return (Any format code)
+ | case lit of { CmmVec fs -> all (\case { CmmInt i _ -> i == 0; CmmFloat f _ -> f == 0; _ -> False }) fs; _ -> False }
+ = -- NOTE:
+ -- This operation is only used to zero a register. For loading a
+ -- vector literal there are pack and broadcast operations
+ let format = cmmTypeFormat ctype
+ code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
+ in return (Any format code)
+ | otherwise
+ = pprPanic "getRegister': no support for (nonzero) vector literals" $
+ vcat [ text "lit:" <+> ppr lit ]
+ -- SIMD NCG TODO: can we do better here?
standardRegister ctype
= do
let format = cmmTypeFormat ctype
@@ -3519,10 +3526,7 @@ genCCall64 addr conv dest_regs args = do
-- If we are calling a varargs function
-- then we need to define ireg as well
-- as freg
- CVTTSD2SIQ II64 (OpReg freg) ireg)
- -- SLD TODO: I changed this from MOV FF64 (OpReg freg) (OpReg ireg)
- -- to CVTTSD2SIQ ...
- -- because it is going between two different types of register
+ MOVD FF64 (OpReg freg) (OpReg ireg))
| otherwise = do
arg_code <- getAnyReg arg
load_args_win rest (RegFormat ireg II64: usedInt) usedFP regs
@@ -3538,10 +3542,11 @@ genCCall64 addr conv dest_regs args = do
(arg_reg, arg_code) <- getSomeReg arg
delta <- getDeltaNat
setDeltaNat (delta-arg_size)
- let code' = code `appOL` arg_code `appOL` toOL [
+ let fmt = floatFormat width
+ code' = code `appOL` arg_code `appOL` toOL [
SUB (intFormat (wordWidth platform)) (OpImm (ImmInt arg_size)) (OpReg rsp),
DELTA (delta-arg_size),
- MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel platform 0))]
+ MOV fmt (OpReg arg_reg) (OpAddr (spRel platform 0))]
push_args rest code'
| otherwise = do
@@ -3650,17 +3655,13 @@ genCCall64 addr conv dest_regs args = do
-- assign the results, if necessary
assign_code [] = nilOL
assign_code [dest] =
- case typeWidth rep of
- W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
- (OpReg xmm0)
- (OpReg r_dest))
- W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
- (OpReg xmm0)
- (OpReg r_dest))
- _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
+ unitOL $
+ mkRegRegMoveInstr config fmt reg r_dest
where
- rep = localRegType dest
- r_dest = getRegisterReg platform (CmmLocal dest)
+ reg = if isIntFormat fmt then rax else xmm0
+ fmt = cmmTypeFormat rep
+ rep = localRegType dest
+ r_dest = getRegisterReg platform (CmmLocal dest)
assign_code _many = panic "genForeignCall.assign_code many"
return (adjust_rsp `appOL`
@@ -3769,9 +3770,10 @@ genSwitch expr targets = do
tableReg <- getNewRegNat (intFormat (platformWordWidth platform))
targetReg <- getNewRegNat (intFormat (platformWordWidth platform))
let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) (EAIndex reg (platformWordSizeInBytes platform)) (ImmInt 0))
+ fmt = archWordFormat is32bit
code = e_code `appOL` toOL
- [ LEA (archWordFormat is32bit) (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl lbl))) (OpReg tableReg)
- , MOV (archWordFormat is32bit) op (OpReg targetReg)
+ [ LEA fmt (OpAddr (AddrBaseIndex EABaseRip EAIndexNone (ImmCLbl lbl))) (OpReg tableReg)
+ , MOV fmt op (OpReg targetReg)
, JMP_TBL (OpReg targetReg) ids (Section ReadOnlyData lbl) lbl
]
return code
@@ -4385,9 +4387,9 @@ genCtz64_32 bid dst src = do
-- dst = 64;
-- }
let instrs = vcode `appOL` toOL
- ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
+ ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
, OR II32 (OpReg rlo) (OpReg tmp_r)
- , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
+ , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
, JXX EQQ lbl2
, JXX ALWAYS lbl1
@@ -4432,10 +4434,10 @@ genCtzGeneric width dst src = do
src_r <- getNewRegNat format
tmp_r <- getNewRegNat format
let instrs = code_src src_r `appOL` toOL
- ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
- [ BSF format (OpReg src_r) tmp_r
- , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
- , CMOV NE format (OpReg tmp_r) dst_r
+ ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
+ [ BSF format (OpReg src_r) tmp_r
+ , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
+ , CMOV NE format (OpReg tmp_r) dst_r
]) -- NB: We don't need to zero-extend the result for the
-- W8/W16 cases because the 'MOV' insn already
-- took care of implicitly clearing the upper bits
@@ -4509,11 +4511,11 @@ genMemCpyInlineMaybe align dst src n = do
go dst src tmp (i - 4)
| i >= 2 =
unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
- unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+ unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
go dst src tmp (i - 2)
| i >= 1 =
unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
- unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+ unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
go dst src tmp (i - 1)
| otherwise = nilOL
where
@@ -4789,8 +4791,8 @@ genPext bid width dst src mask = do
toOL
[ MOVZxL format (OpReg src_r ) (OpReg src_r )
, MOVZxL format (OpReg mask_r) (OpReg mask_r)
- , PEXT II32 (OpReg mask_r) (OpReg src_r ) dst_r
- , MOVZxL format (OpReg dst_r) (OpReg dst_r) -- Truncate to op width
+ , PEXT II32 (OpReg mask_r) (OpReg src_r ) dst_r
+ , MOVZxL format (OpReg dst_r) (OpReg dst_r) -- Truncate to op width
]
else
unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)
@@ -4832,11 +4834,11 @@ genClz bid width dst src = do
src_r <- getNewRegNat format
tmp_r <- getNewRegNat format
return $ code_src src_r `appOL` toOL
- ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
- [ BSR format (OpReg src_r) tmp_r
- , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
- , CMOV NE format (OpReg tmp_r) dst_r
- , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
+ ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
+ [ BSR format (OpReg src_r) tmp_r
+ , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
+ , CMOV NE format (OpReg tmp_r) dst_r
+ , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
]) -- NB: We don't need to zero-extend the result for the
-- W8/W16 cases because the 'MOV' insn already
-- took care of implicitly clearing the upper bits
@@ -4848,7 +4850,8 @@ genWordToFloat bid width dst src =
genAtomicRead :: Width -> MemoryOrdering -> LocalReg -> CmmExpr -> NatM InstrBlock
genAtomicRead width _mord dst addr = do
- load_code <- intLoadCode (MOV (intFormat width)) addr
+ let fmt = intFormat width
+ load_code <- intLoadCode (MOV fmt) addr
return (load_code (getLocalRegReg dst))
genAtomicWrite :: Width -> MemoryOrdering -> CmmExpr -> CmmExpr -> NatM InstrBlock
=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -32,6 +32,7 @@ module GHC.CmmToAsm.X86.Instr
, mkStackDeallocInstr
, mkSpillInstr
, mkRegRegMoveInstr
+ , movInstr
, jumpDestsOfInstr
, canFallthroughTo
, patchRegsOfInstr
@@ -202,19 +203,13 @@ data Instr
| DELTA Int
-- Moves.
- | MOV Format Operand Operand
+ | MOV Format Operand Operand
-- ^ N.B. Due to AT&T assembler quirks, when used with 'II64'
-- 'Format' immediate source and memory target operand, the source
-- operand is interpreted to be a 32-bit sign-extended value.
-- True 64-bit operands need to be either first moved to a register or moved
-- with @MOVABS@; we currently do not use this instruction in GHC.
-- See https://stackoverflow.com/questions/52434073/whats-the-difference-between-the-x86-64-att-instructions-movq-and-movabsq.
- | MOV2 Format Format Operand Operand
- -- ^ Like MOV, but between two different kinds of registers
- -- (e.g. moving rax to xmm1)
- --
- -- SIMD NCG TODO: this is a bit of a hack, but the alternative would
- -- be to have MOV store two Formats to handle xmm -> rax and rax -> xmm.
| MOVD Format Operand Operand -- ^ MOVD/MOVQ SSE2 instructions
-- (bitcast between a general purpose
@@ -378,12 +373,20 @@ data Instr
| INSERTPS Format Imm Operand Reg
-- move operations
- | VMOVU Format Operand Operand
+
+ -- | SSE2 unaligned move of floating-point vectors
| MOVU Format Operand Operand
+ -- | AVX unaligned move of floating-point vectors
+ | VMOVU Format Operand Operand
+ -- | SSE2 move between memory and low-part of an xmm register
| MOVL Format Operand Operand
+ -- | SSE move between memory and high-part of an xmm register
| MOVH Format Operand Operand
+ -- | SSE aligned move of floating-point vectors
| MOVA Format Operand Operand
+ -- | SSE2 unaligned move of integer vectors
| MOVDQU Format Operand Operand
+ -- | AVX unaligned move of integer vectors
| VMOVDQU Format Operand Operand
-- logic operations
@@ -403,7 +406,9 @@ data Instr
| SHUFPD Format Imm Operand Reg
| VSHUFPD Format Imm Operand Reg Reg
- | MOVHLPS Format Operand Reg
+ -- | Move two 32-bit floats from the high part of an xmm register
+ -- to the low part of another xmm register.
+ | MOVHLPS Format Reg Reg
| PUNPCKLQDQ Format Operand Reg
-- Shift
@@ -426,9 +431,21 @@ data FMAPermutation = FMA132 | FMA213 | FMA231
regUsageOfInstr :: Platform -> Instr -> RegUsage
regUsageOfInstr platform instr
= case instr of
- MOV fmt src dst -> usageRW fmt src dst
- MOV2 srcFmt dstFmt src dst -> mkRU (use_R srcFmt src []) (use_R dstFmt dst [])
- MOVD fmt src dst -> mkRU (use_R fmt src []) (use_R (movdOutFormat fmt) dst [])
+ MOV fmt src dst
+ -- MOVSS/MOVSD preserve the upper half of vector registers,
+ -- but only for reg-2-reg moves
+ | VecFormat _ sFmt <- fmt
+ , isFloatScalarFormat sFmt
+ , OpReg {} <- src
+ , OpReg {} <- dst
+ -> usageRM fmt src dst
+ -- other MOV instructions zero any remaining upper part of the destination
+ -- (largely to avoid partial register stalls)
+ | otherwise
+ -> usageRW fmt src dst
+ MOVD fmt src dst ->
+ -- NB: MOVD/MOVQ always zero any remaining upper part of destination
+ mkRU (use_R fmt src []) (use_R (movdOutFormat fmt) dst [])
CMOV _ fmt src dst -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
MOVZxL fmt src dst -> usageRW fmt src dst
MOVSxL fmt src dst -> usageRW fmt src dst
@@ -566,7 +583,7 @@ regUsageOfInstr platform instr
PSLLDQ fmt off dst -> mkRU (use_R fmt off []) [mk fmt dst]
MOVHLPS fmt src dst
- -> mkRU (use_R fmt src []) [mk fmt dst]
+ -> mkRU [mk fmt src] [mk fmt dst]
PUNPCKLQDQ fmt src dst
-> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
@@ -680,7 +697,6 @@ patchRegsOfInstr :: HasDebugCallStack => Platform -> Instr -> (Reg -> Reg) -> In
patchRegsOfInstr platform instr env
= case instr of
MOV fmt src dst -> MOV fmt (patchOp src) (patchOp dst)
- MOV2 srcFmt dstFmt src dst -> MOV2 srcFmt dstFmt (patchOp src) (patchOp dst)
MOVD fmt src dst -> patch2 (MOVD fmt) src dst
CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
@@ -805,7 +821,7 @@ patchRegsOfInstr platform instr env
-> PSRLDQ fmt (patchOp off) (env dst)
MOVHLPS fmt src dst
- -> MOVHLPS fmt (patchOp src) (env dst)
+ -> MOVHLPS fmt (env src) (env dst)
PUNPCKLQDQ fmt src dst
-> PUNPCKLQDQ fmt (patchOp src) (env dst)
@@ -914,11 +930,12 @@ mkLoadInstr
-> [Instr]
mkLoadInstr config (RegFormat reg fmt) delta slot =
- [ movInstr config fmt (OpAddr (spRel platform off)) (OpReg reg) ]
+ [ movInstr config fmt (OpAddr (spRel platform off)) (OpReg reg) ]
where
platform = ncgPlatform config
off = spillSlotToOffset platform slot - delta
+-- | A move instruction for moving the entire contents of an operand.
movInstr :: NCGConfig -> Format -> (Operand -> Operand -> Instr)
movInstr config fmt =
case fmt of
@@ -1028,32 +1045,40 @@ takeRegRegMoveInstr
takeRegRegMoveInstr platform = \case
MOV fmt (OpReg r1) (OpReg r2)
- -- MOV zeroes the upper part of vector registers,
- -- so it is not a real "move" in that case.
- | not (isVecFormat fmt)
- -> go r1 r2
- MOVD _ (OpReg r1) (OpReg r2)
- -> go r1 r2
+ -- When used with vector registers, MOV only deals with the lower part,
+ -- so it is not a real move. For example, MOVSS/MOVSD between xmm registers
+ -- preserves the upper half, and MOVQ between xmm registers zeroes the upper half.
+ | not $ isVecFormat fmt
+ -- Don't eliminate a move between e.g. RAX and XMM:
+ -- even though we might be using XMM to store a scalar integer value,
+ -- some instructions only support XMM registers.
+ , targetClassOfReg platform r1 == targetClassOfReg platform r2
+ -> Just (r1, r2)
+ MOVD {}
+ -- MOVD moves between xmm registers and general-purpose registers,
+ -- and we don't want to eliminate those moves (as noted for MOV).
+ -> Nothing
+
+ -- SSE2/AVX move instructions always move the full register.
MOVA _ (OpReg r1) (OpReg r2)
- -> go r1 r2
+ -> Just (r1, r2)
MOVU _ (OpReg r1) (OpReg r2)
- -> go r1 r2
+ -> Just (r1, r2)
VMOVU _ (OpReg r1) (OpReg r2)
- -> go r1 r2
+ -> Just (r1, r2)
MOVDQU _ (OpReg r1) (OpReg r2)
- -> go r1 r2
+ -> Just (r1, r2)
VMOVDQU _ (OpReg r1) (OpReg r2)
- -> go r1 r2
+ -> Just (r1, r2)
+
+ -- MOVL, MOVH and MOVHLPS preserve some part of the destination register,
+ -- so are not simple moves.
+ MOVL {} -> Nothing
+ MOVH {} -> Nothing
+ MOVHLPS {} -> Nothing
+
+ -- Other instructions are not moves.
_ -> Nothing
- where
- go r1 r2
- -- Don't eliminate a move between e.g. RAX and XMM:
- -- even though we might be using XMM to store a scalar integer value,
- -- some instructions only support XMM registers.
- | targetClassOfReg platform r1 == targetClassOfReg platform r2
- = Just (r1, r2)
- | otherwise
- = Nothing
-- | Make an unconditional branch instruction.
mkJumpInstr
=====================================
compiler/GHC/CmmToAsm/X86/Ppr.hs
=====================================
@@ -619,15 +619,12 @@ pprInstr platform i = case i of
II64 -> II32 -- 32-bit version is equivalent, and smaller
_ -> format
- MOV format src dst
- -> pprFormatOpOp (text "mov") format src dst
-
- MOV2 srcFmt dstFmt src dst
- -> pprFormatOpOp (text "mov") fmt src dst
- where
- fmt = if formatInBytes srcFmt <= formatInBytes dstFmt
- then srcFmt
- else dstFmt
+ MOV fmt src dst
+ -> pprFormatOpOp (text "mov") fmt' src dst
+ where
+ fmt' = case fmt of
+ VecFormat _l sFmt -> scalarFormatFormat sFmt
+ _ -> fmt
CMOV cc format src dst
-> pprCondOpReg (text "cmov") format cc src dst
@@ -1004,7 +1001,7 @@ pprInstr platform i = case i of
-> pprDoubleShift (text "psrldq") format offset dst
MOVHLPS format from to
- -> pprOpReg (text "movhlps") format from to
+ -> pprOpReg (text "movhlps") format (OpReg from) to
PUNPCKLQDQ format from to
-> pprOpReg (text "punpcklqdq") format from to
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6ac63edbd1272a6d75f7a43d9df7f99a32e80856
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/6ac63edbd1272a6d75f7a43d9df7f99a32e80856
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240625/742ba1c9/attachment-0001.html>
More information about the ghc-commits
mailing list