[Git][ghc/ghc][master] LLVM: account for register type in funPrologue
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Tue Feb 25 13:59:26 UTC 2025
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
33aca30f by sheaf at 2025-02-25T08:58:46-05:00
LLVM: account for register type in funPrologue
We were not properly accounting for the live register type of
global registers in GHC.CmmToLlvm.CodeGen.funPrologue. This meant that
we could allocated a register at type <4 x i32> but try to write to it
at type <8 x i16>, which LLVM doesn't much like.
This patch fixes that by inserting intermerdiate casts when necessary.
Fixes #25730
- - - - -
8 changed files:
- compiler/GHC/Cmm/Reg.hs
- compiler/GHC/CmmToLlvm/Base.hs
- compiler/GHC/CmmToLlvm/CodeGen.hs
- compiler/GHC/Llvm/Types.hs
- + testsuite/tests/llvm/should_run/T25730.hs
- + testsuite/tests/llvm/should_run/T25730.stdout
- + testsuite/tests/llvm/should_run/T25730C.c
- testsuite/tests/llvm/should_run/all.T
Changes:
=====================================
compiler/GHC/Cmm/Reg.hs
=====================================
@@ -98,8 +98,8 @@ instance Outputable CmmReg where
pprReg :: CmmReg -> SDoc
pprReg r
= case r of
- CmmLocal local -> pprLocalReg local
- CmmGlobal (GlobalRegUse global _ty) -> pprGlobalReg global
+ CmmLocal local -> pprLocalReg local
+ CmmGlobal (GlobalRegUse global _) -> pprGlobalReg global
cmmRegType :: CmmReg -> CmmType
cmmRegType (CmmLocal reg) = localRegType reg
=====================================
compiler/GHC/CmmToLlvm/Base.hs
=====================================
@@ -290,7 +290,7 @@ data LlvmEnv = LlvmEnv
-- the following get cleared for every function (see @withClearVars@)
, envVarMap :: LlvmEnvMap -- ^ Local variables so far, with type
- , envStackRegs :: [GlobalReg] -- ^ Non-constant registers (alloca'd in the function prelude)
+ , envStackRegs :: [GlobalRegUse] -- ^ Non-constant registers (alloca'd in the function prelude)
}
type LlvmEnvMap = UniqFM Unique LlvmType
@@ -374,12 +374,14 @@ varLookup s = getEnv (flip lookupUFM (getUnique s) . envVarMap)
funLookup s = getEnv (flip lookupUFM (getUnique s) . envFunMap)
-- | Set a register as allocated on the stack
-markStackReg :: GlobalReg -> LlvmM ()
+markStackReg :: GlobalRegUse -> LlvmM ()
markStackReg r = modifyEnv $ \env -> env { envStackRegs = r : envStackRegs env }
-- | Check whether a register is allocated on the stack
-checkStackReg :: GlobalReg -> LlvmM Bool
-checkStackReg r = getEnv ((elem r) . envStackRegs)
+checkStackReg :: GlobalReg -> LlvmM (Maybe CmmType)
+checkStackReg r = do
+ stack_regs <- getEnv envStackRegs
+ return $ fmap globalRegUse_type $ lookupRegUse r stack_regs
-- | Allocate a new global unnamed metadata identifier
getMetaUniqueId :: LlvmM MetaId
=====================================
compiler/GHC/CmmToLlvm/CodeGen.hs
=====================================
@@ -47,7 +47,7 @@ import Data.Foldable ( toList )
import Data.List ( nub )
import qualified Data.List as List
import Data.List.NonEmpty ( NonEmpty (..), nonEmpty )
-import Data.Maybe ( catMaybes, isJust )
+import Data.Maybe ( catMaybes )
type Atomic = Maybe MemoryOrdering
type LlvmStatements = OrdList LlvmStatement
@@ -202,9 +202,8 @@ genCall (PrimTarget MO_Touch) _ _ =
return (nilOL, [])
genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do
- dstV <- getCmmRegW (CmmLocal dst)
- let ty = cmmToLlvmType $ localRegType dst
- width = widthToLlvmFloat w
+ (dstV, ty) <- getCmmRegW (CmmLocal dst)
+ let width = widthToLlvmFloat w
castV <- lift $ mkLocalVar ty
ve <- exprToVarW e
statement $ Assignment castV $ Cast LM_Uitofp ve width
@@ -255,7 +254,7 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $
let targetTy = widthToLlvmInt width
ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
ptrVar <- doExprW (pLift targetTy) ptrExpr
- dstVar <- getCmmRegW (CmmLocal dst)
+ (dstVar, _dst_ty) <- getCmmRegW (CmmLocal dst)
let op = case amop of
AMO_Add -> LAO_Add
AMO_Sub -> LAO_Sub
@@ -267,7 +266,7 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $
statement $ Store retVar dstVar Nothing []
genCall (PrimTarget (MO_AtomicRead _ mem_ord)) [dst] [addr] = runStmtsDecls $ do
- dstV <- getCmmRegW (CmmLocal dst)
+ (dstV, _dst_ty) <- getCmmRegW (CmmLocal dst)
v1 <- genLoadW (Just mem_ord) addr (localRegType dst) NaturallyAligned
statement $ Store v1 dstV Nothing []
@@ -279,14 +278,14 @@ genCall (PrimTarget (MO_Cmpxchg _width))
let targetTy = getVarType oldVar
ptrExpr = Cast LM_Inttoptr addrVar (pLift targetTy)
ptrVar <- doExprW (pLift targetTy) ptrExpr
- dstVar <- getCmmRegW (CmmLocal dst)
+ (dstVar, _dst_ty) <- getCmmRegW (CmmLocal dst)
retVar <- doExprW (LMStructU [targetTy,i1])
$ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst
retVar' <- doExprW targetTy $ ExtractV retVar 0
statement $ Store retVar' dstVar Nothing []
genCall (PrimTarget (MO_Xchg _width)) [dst] [addr, val] = runStmtsDecls $ do
- dstV <- getCmmRegW (CmmLocal dst) :: WriterT LlvmAccum LlvmM LlvmVar
+ (dstV, _dst_ty) <- getCmmRegW (CmmLocal dst)
addrVar <- exprToVarW addr
valVar <- exprToVarW val
let ptrTy = pLift $ getVarType valVar
@@ -352,8 +351,8 @@ genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do
retShifted <- doExprW width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
-- And extract them into retH.
retH <- doExprW width $ Cast LM_Trunc retShifted width
- dstRegL <- getCmmRegW (CmmLocal dstL)
- dstRegH <- getCmmRegW (CmmLocal dstH)
+ (dstRegL, _dstL_ty) <- getCmmRegW (CmmLocal dstL)
+ (dstRegH, _dstH_ty) <- getCmmRegW (CmmLocal dstH)
statement $ Store retL dstRegL Nothing []
statement $ Store retH dstRegH Nothing []
@@ -383,9 +382,9 @@ genCall (PrimTarget (MO_S_Mul2 w)) [dstC, dstH, dstL] [lhs, rhs] = runStmtsDecls
retH' <- doExprW width $ LlvmOp LM_MO_AShr retL widthLlvmLitm1
retC1 <- doExprW i1 $ Compare LM_CMP_Ne retH retH' -- Compare op returns a 1-bit value (i1)
retC <- doExprW width $ Cast LM_Zext retC1 width -- so we zero-extend it
- dstRegL <- getCmmRegW (CmmLocal dstL)
- dstRegH <- getCmmRegW (CmmLocal dstH)
- dstRegC <- getCmmRegW (CmmLocal dstC)
+ (dstRegL, _dstL_ty) <- getCmmRegW (CmmLocal dstL)
+ (dstRegH, _dstH_ty) <- getCmmRegW (CmmLocal dstH)
+ (dstRegC, _dstC_ty) <- getCmmRegW (CmmLocal dstC)
statement $ Store retL dstRegL Nothing []
statement $ Store retH dstRegH Nothing []
statement $ Store retC dstRegC Nothing []
@@ -420,8 +419,8 @@ genCall (PrimTarget (MO_U_QuotRem2 w))
let narrow var = doExprW width $ Cast LM_Trunc var width
retDiv <- narrow retExtDiv
retRem <- narrow retExtRem
- dstRegQ <- lift $ getCmmReg (CmmLocal dstQ)
- dstRegR <- lift $ getCmmReg (CmmLocal dstR)
+ (dstRegQ, _dstQ_ty) <- lift $ getCmmReg (CmmLocal dstQ)
+ (dstRegR, _dstR_ty) <- lift $ getCmmReg (CmmLocal dstR)
statement $ Store retDiv dstRegQ Nothing []
statement $ Store retRem dstRegR Nothing []
@@ -504,7 +503,6 @@ genCall target res args = do
let funTy = \name -> LMFunction $ LlvmFunctionDecl name ExternallyVisible
lmconv retTy FixedArgs argTy (llvmFunAlign platform)
-
argVars <- arg_varsW args_hints ([], nilOL, [])
fptr <- getFunPtrW funTy target
@@ -524,23 +522,21 @@ genCall target res args = do
ret_reg t = panic $ "genCall: Bad number of registers! Can only handle"
++ " 1, given " ++ show (length t) ++ "."
let creg = ret_reg res
- vreg <- getCmmRegW (CmmLocal creg)
- if retTy == pLower (getVarType vreg)
- then do
- statement $ Store v1 vreg Nothing []
- doReturn
- else do
- let ty = pLower $ getVarType vreg
- let op = case ty of
- vt | isPointer vt -> LM_Bitcast
- | isInt vt -> LM_Ptrtoint
- | otherwise ->
- panic $ "genCall: CmmReg bad match for"
- ++ " returned type!"
-
- v2 <- doExprW ty $ Cast op v1 ty
- statement $ Store v2 vreg Nothing []
- doReturn
+ (vreg, ty) <- getCmmRegW (CmmLocal creg)
+ if retTy == ty
+ then do
+ statement $ Store v1 vreg Nothing []
+ doReturn
+ else do
+ let op = case ty of
+ vt | isPointer vt -> LM_Bitcast
+ | isInt vt -> LM_Ptrtoint
+ | otherwise ->
+ panic $ "genCall: CmmReg bad match for"
+ ++ " returned type!"
+ v2 <- doExprW ty $ Cast op v1 ty
+ statement $ Store v2 vreg Nothing []
+ doReturn
-- | Generate a call to an LLVM intrinsic that performs arithmetic operation
-- with overflow bit (i.e., returns a struct containing the actual result of the
@@ -566,8 +562,8 @@ genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
-- value is i<width>, but overflowBit is i1, so we need to cast (Cmm expects
-- both to be i<width>)
(overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width
- dstRegV <- getCmmReg (CmmLocal dstV)
- dstRegO <- getCmmReg (CmmLocal dstO)
+ (dstRegV, _dstV_ty) <- getCmmReg (CmmLocal dstV)
+ (dstRegO, _dstO_ty) <- getCmmReg (CmmLocal dstO)
let storeV = Store value dstRegV Nothing []
storeO = Store overflow dstRegO Nothing []
return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top)
@@ -625,7 +621,7 @@ genCallSimpleCast w t@(PrimTarget op) [dst] args = do
fname <- cmmPrimOpFunctions op
(fptr, _, top3) <- getInstrinct fname width [width]
- dstV <- getCmmReg (CmmLocal dst)
+ (dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
let (_, arg_hints) = foreignTargetHints t
let args_hints = zip args arg_hints
@@ -657,7 +653,7 @@ genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
fname <- cmmPrimOpFunctions op
(fptr, _, top3) <- getInstrinct fname width (const width <$> args)
- dstV <- getCmmReg (CmmLocal dst)
+ (dstV, _dst_ty) <- getCmmReg (CmmLocal dst)
let (_, arg_hints) = foreignTargetHints t
let args_hints = zip args arg_hints
@@ -1089,11 +1085,9 @@ genJump expr live = do
-- these with registers when possible.
genAssign :: CmmReg -> CmmExpr -> LlvmM StmtData
genAssign reg val = do
- vreg <- getCmmReg reg
+ (vreg, ty) <- getCmmReg reg
(vval, stmts2, top2) <- exprToVar val
let stmts = stmts2
-
- let ty = (pLower . getVarType) vreg
platform <- getPlatform
case ty of
-- Some registers are pointer types, so need to cast value to pointer
@@ -2047,42 +2041,58 @@ mkLoad atomic vptr alignment
-- | Handle CmmReg expression. This will return a pointer to the stack
-- location of the register. Throws an error if it isn't allocated on
-- the stack.
-getCmmReg :: CmmReg -> LlvmM LlvmVar
+getCmmReg :: CmmReg -> LlvmM (LlvmVar, LlvmType)
getCmmReg (CmmLocal (LocalReg un _))
= do exists <- varLookup un
case exists of
- Just ety -> return (LMLocalVar un $ pLift ety)
+ Just ety -> return (LMLocalVar un $ pLift ety, ety)
Nothing -> pprPanic "getCmmReg: Cmm register " $
ppr un <> text " was not allocated!"
-- This should never happen, as every local variable should
-- have been assigned a value at some point, triggering
-- "funPrologue" to allocate it on the stack.
-getCmmReg (CmmGlobal ru@(GlobalRegUse r _))
- = do onStack <- checkStackReg r
+getCmmReg (CmmGlobal (GlobalRegUse reg _reg_ty))
+ = do onStack <- checkStackReg reg
platform <- getPlatform
- if onStack
- then return (lmGlobalRegVar platform ru)
- else pprPanic "getCmmReg: Cmm register " $
- ppr r <> text " not stack-allocated!"
+ case onStack of
+ Just stack_ty -> do
+ let var = lmGlobalRegVar platform (GlobalRegUse reg stack_ty)
+ return (var, pLower $ getVarType var)
+ Nothing ->
+ pprPanic "getCmmReg: Cmm register " $
+ ppr reg <> text " not stack-allocated!"
-- | Return the value of a given register, as well as its type. Might
-- need to be load from stack.
getCmmRegVal :: CmmReg -> LlvmM (LlvmVar, LlvmType, LlvmStatements)
getCmmRegVal reg =
case reg of
- CmmGlobal g -> do
- onStack <- checkStackReg (globalRegUse_reg g)
+ CmmGlobal gu@(GlobalRegUse g _) -> do
+ onStack <- checkStackReg g
platform <- getPlatform
- if onStack then loadFromStack else do
- let r = lmGlobalRegArg platform g
- return (r, getVarType r, nilOL)
+ case onStack of
+ Just {} ->
+ loadFromStack
+ Nothing -> do
+ let r = lmGlobalRegArg platform gu
+ return (r, getVarType r, nilOL)
_ -> loadFromStack
- where loadFromStack = do
- ptr <- getCmmReg reg
- let ty = pLower $ getVarType ptr
- (v, s) <- doExpr ty (Load ptr Nothing)
- return (v, ty, unitOL s)
+ where
+ loadFromStack = do
+ platform <- getPlatform
+ (ptr, stack_reg_ty) <- getCmmReg reg
+ let reg_ty = case reg of
+ CmmGlobal g -> pLower $ getVarType $ lmGlobalRegVar platform g
+ CmmLocal {} -> stack_reg_ty
+ if reg_ty /= stack_reg_ty
+ then do
+ (v1, s1) <- doExpr stack_reg_ty (Load ptr Nothing)
+ (v2, s2) <- doExpr reg_ty (Cast LM_Bitcast v1 reg_ty)
+ return (v2, reg_ty, toOL [s1, s2])
+ else do
+ (v, s) <- doExpr reg_ty (Load ptr Nothing)
+ return (v, reg_ty, unitOL s)
-- | Allocate a local CmmReg on the stack
allocReg :: CmmReg -> (LlvmVar, LlvmStatements)
@@ -2215,15 +2225,29 @@ funPrologue live cmmBlocks = do
let (newv, stmts) = allocReg reg
varInsert un (pLower $ getVarType newv)
return stmts
- CmmGlobal ru@(GlobalRegUse r _) -> do
+ CmmGlobal ru@(GlobalRegUse r ty0) -> do
let reg = lmGlobalRegVar platform ru
- arg = lmGlobalRegArg platform ru
ty = (pLower . getVarType) reg
trash = LMLitVar $ LMUndefLit ty
- rval = if isJust (mbLive r) then arg else trash
+ rval = case mbLive r of
+ Just (GlobalRegUse _ ty') ->
+ lmGlobalRegArg platform (GlobalRegUse r ty')
+ _ -> trash
alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1
- markStackReg r
- return $ toOL [alloc, Store rval reg Nothing []]
+ markStackReg ru
+ case mbLive r of
+ Just (GlobalRegUse _ ty')
+ | let llvm_ty = cmmToLlvmType ty0
+ llvm_ty' = cmmToLlvmType ty'
+ , llvm_ty /= llvm_ty'
+ -> do castV <- mkLocalVar (pLift llvm_ty')
+ return $
+ toOL [ alloc
+ , Assignment castV $ Cast LM_Bitcast reg (pLift llvm_ty')
+ , Store rval castV Nothing []
+ ]
+ _ ->
+ return $ toOL [alloc, Store rval reg Nothing []]
return (concatOL stmtss `snocOL` jumpToEntry, [])
where
@@ -2387,7 +2411,7 @@ runStmtsDecls action = do
LlvmAccum stmts decls <- execWriterT action
return (stmts, decls)
-getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM LlvmVar
+getCmmRegW :: CmmReg -> WriterT LlvmAccum LlvmM (LlvmVar, LlvmType)
getCmmRegW = lift . getCmmReg
genLoadW :: Atomic -> CmmExpr -> CmmType -> AlignmentSpec -> WriterT LlvmAccum LlvmM LlvmVar
=====================================
compiler/GHC/Llvm/Types.hs
=====================================
@@ -239,7 +239,7 @@ pVarLift :: LlvmVar -> LlvmVar
pVarLift (LMGlobalVar s t l x a c) = LMGlobalVar s (pLift t) l x a c
pVarLift (LMLocalVar s t ) = LMLocalVar s (pLift t)
pVarLift (LMNLocalVar s t ) = LMNLocalVar s (pLift t)
-pVarLift (LMLitVar _ ) = error $ "Can't lower a literal type!"
+pVarLift (LMLitVar _ ) = error $ "Can't lift a literal type!"
-- | Remove the pointer indirection of the supplied type. Only 'LMPointer'
-- constructors can be lowered.
=====================================
testsuite/tests/llvm/should_run/T25730.hs
=====================================
@@ -0,0 +1,17 @@
+{-# LANGUAGE MagicHash, UnboxedTuples, ExtendedLiterals, UnliftedFFITypes #-}
+
+module Main where
+
+import GHC.Exts
+import GHC.Int
+
+foreign import ccall unsafe
+ packsi32 :: Int32X4# -> Int32X4# -> Int16X8#
+
+main :: IO ()
+main = do
+ let a = broadcastInt32X4# 100#Int32
+ b = broadcastInt32X4# 200#Int32
+ c = packsi32 a b
+ (# x0, x1, x2, x3, x4, x5, x6, x7 #) = unpackInt16X8# c
+ print (I16# x0, I16# x1, I16# x2, I16# x3, I16# x4, I16# x5, I16# x6, I16# x7)
=====================================
testsuite/tests/llvm/should_run/T25730.stdout
=====================================
@@ -0,0 +1 @@
+(100,100,100,100,200,200,200,200)
=====================================
testsuite/tests/llvm/should_run/T25730C.c
=====================================
@@ -0,0 +1,7 @@
+#include <emmintrin.h>
+#include <stdio.h>
+
+__m128i packsi32(__m128i a, __m128i b)
+{
+ return _mm_packs_epi32(a, b);
+}
=====================================
testsuite/tests/llvm/should_run/all.T
=====================================
@@ -14,3 +14,5 @@ def ignore_llvm_and_vortex( msg ):
test('T22487', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_and_run, [''])
test('T22033', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_and_run, [''])
+test('T25730', [req_c, unless(arch('x86_64'), skip), normalise_errmsg_fun(ignore_llvm_and_vortex)], compile_and_run, ['T25730C.c'])
+ # T25730C.c contains Intel instrinsics, so only run this test on x86
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/33aca30fbff67188fd5692cf40f1e2542663bfec
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/33aca30fbff67188fd5692cf40f1e2542663bfec
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250225/b5b0da03/attachment-0001.html>
More information about the ghc-commits
mailing list