[commit: ghc] master: Fix for T14251 on ARM (d849554)

git at git.haskell.org git at git.haskell.org
Sun Oct 28 17:41:12 UTC 2018


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/d8495549ba9d194815c2d0eaee6797fc7c00756a/ghc

>---------------------------------------------------------------

commit d8495549ba9d194815c2d0eaee6797fc7c00756a
Author: Kavon Farvardin <kavon at farvard.in>
Date:   Sun Oct 28 12:11:49 2018 -0400

    Fix for T14251 on ARM
    
    We now calculate the SSE register padding needed to fix the calling
    convention in LLVM in a robust way: grouping them by whether
    registers in that class overlap (with the same class overlapping
    itself).
    
    My prior patch assumed that no matter the platform, physical
    register Fx aliases with Dx, etc, for our calling convention.
    
    This is unfortunately not the case for any platform except x86-64.
    
    Test Plan:
    Only know how to test on x86-64, but it should be tested on ARM with:
    
    `make test WAYS=llvm && make test WAYS=optllvm`
    
    Reviewers: bgamari, angerman
    
    Reviewed By: bgamari
    
    Subscribers: rwbarton, carter
    
    GHC Trac Issues: #15780, #14251, #15747
    
    Differential Revision: https://phabricator.haskell.org/D5254


>---------------------------------------------------------------

d8495549ba9d194815c2d0eaee6797fc7c00756a
 compiler/llvmGen/LlvmCodeGen/Base.hs    | 123 ++++++++++++++++++++++----------
 compiler/llvmGen/LlvmCodeGen/CodeGen.hs |   6 +-
 2 files changed, 90 insertions(+), 39 deletions(-)

diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index ec91bac..0a40b73 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -26,7 +26,7 @@ module LlvmCodeGen.Base (
 
         cmmToLlvmType, widthToLlvmFloat, widthToLlvmInt, llvmFunTy,
         llvmFunSig, llvmFunArgs, llvmStdFunAttrs, llvmFunAlign, llvmInfAlign,
-        llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isSSE,
+        llvmPtrBits, tysToParams, llvmFunSection, padLiveArgs, isFPR,
 
         strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm,
         getGlobalPtr, generateExternDecls,
@@ -47,6 +47,7 @@ import CodeGen.Platform ( activeStgRegs )
 import DynFlags
 import FastString
 import Cmm              hiding ( succ )
+import CmmUtils ( regsOverlap )
 import Outputable as Outp
 import Platform
 import UniqFM
@@ -58,8 +59,7 @@ import ErrUtils
 import qualified Stream
 
 import Control.Monad (ap)
-import Data.List (sort)
-import Data.Maybe (mapMaybe)
+import Data.List (sort, groupBy, head)
 
 -- ----------------------------------------------------------------------------
 -- * Some Data Types
@@ -152,36 +152,91 @@ llvmFunArgs dflags live =
     map (lmGlobalRegArg dflags) (filter isPassed allRegs)
     where platform = targetPlatform dflags
           allRegs = activeStgRegs platform
-          paddedLive = map (\(_,r) -> r) $ padLiveArgs live
+          paddedLive = map (\(_,r) -> r) $ padLiveArgs dflags live
           isLive r = r `elem` alwaysLive || r `elem` paddedLive
-          isPassed r = not (isSSE r) || isLive r
-
-
-isSSE :: GlobalReg -> Bool
-isSSE (FloatReg _)  = True
-isSSE (DoubleReg _) = True
-isSSE (XmmReg _)    = True
-isSSE (YmmReg _)    = True
-isSSE (ZmmReg _)    = True
-isSSE _             = False
-
-sseRegNum :: GlobalReg -> Maybe Int
-sseRegNum (FloatReg i)  = Just i
-sseRegNum (DoubleReg i) = Just i
-sseRegNum (XmmReg i)    = Just i
-sseRegNum (YmmReg i)    = Just i
-sseRegNum (ZmmReg i)    = Just i
-sseRegNum _             = Nothing
-
--- the bool indicates whether the global reg was added as padding.
--- the returned list is not sorted in any particular order,
--- but does indicate the set of live registers needed, with SSE padding.
-padLiveArgs :: LiveGlobalRegs -> [(Bool, GlobalReg)]
-padLiveArgs live = allRegs
+          isPassed r = not (isFPR r) || isLive r
+
+
+isFPR :: GlobalReg -> Bool
+isFPR (FloatReg _)  = True
+isFPR (DoubleReg _) = True
+isFPR (XmmReg _)    = True
+isFPR (YmmReg _)    = True
+isFPR (ZmmReg _)    = True
+isFPR _             = False
+
+sameFPRClass :: GlobalReg -> GlobalReg -> Bool
+sameFPRClass (FloatReg _)  (FloatReg _) = True
+sameFPRClass (DoubleReg _) (DoubleReg _) = True
+sameFPRClass (XmmReg _)    (XmmReg _) = True
+sameFPRClass (YmmReg _)    (YmmReg _) = True
+sameFPRClass (ZmmReg _)    (ZmmReg _) = True
+sameFPRClass _             _          = False
+
+normalizeFPRNum :: GlobalReg -> GlobalReg
+normalizeFPRNum (FloatReg _)  = FloatReg 1
+normalizeFPRNum (DoubleReg _) = DoubleReg 1
+normalizeFPRNum (XmmReg _)    = XmmReg 1
+normalizeFPRNum (YmmReg _)    = YmmReg 1
+normalizeFPRNum (ZmmReg _)    = ZmmReg 1
+normalizeFPRNum _ = error "normalizeFPRNum expected only FPR regs"
+
+getFPRCtor :: GlobalReg -> Int -> GlobalReg
+getFPRCtor (FloatReg _)  = FloatReg
+getFPRCtor (DoubleReg _) = DoubleReg
+getFPRCtor (XmmReg _)    = XmmReg
+getFPRCtor (YmmReg _)    = YmmReg
+getFPRCtor (ZmmReg _)    = ZmmReg
+getFPRCtor _ = error "getFPRCtor expected only FPR regs"
+
+fprRegNum :: GlobalReg -> Int
+fprRegNum (FloatReg i)  = i
+fprRegNum (DoubleReg i) = i
+fprRegNum (XmmReg i)    = i
+fprRegNum (YmmReg i)    = i
+fprRegNum (ZmmReg i)    = i
+fprRegNum _ = error "fprRegNum expected only FPR regs"
+
+-- | Input: dynflags, and the list of live registers
+--
+-- Output: An augmented list of live registers, where padding was
+-- added to the list of registers to ensure the calling convention is
+-- correctly used by LLVM.
+--
+-- Each global reg in the returned list is tagged with a bool, which
+-- indicates whether the global reg was added as padding, or was an original
+-- live register.
+--
+-- That is, True => padding, False => a real, live global register.
+--
+-- Also, the returned list is not sorted in any particular order.
+--
+padLiveArgs :: DynFlags -> LiveGlobalRegs -> [(Bool, GlobalReg)]
+padLiveArgs dflags live =
+      if platformUnregisterised plat
+        then taggedLive -- not using GHC's register convention for platform.
+        else padding ++ taggedLive
+  where
+    taggedLive = map (\x -> (False, x)) live
+    plat = targetPlatform dflags
+
+    fprLive = filter isFPR live
+    padding = concatMap calcPad $ groupBy sharesClass fprLive
+
+    sharesClass :: GlobalReg -> GlobalReg -> Bool
+    sharesClass a b = sameFPRClass a b || overlappingClass
+      where
+        overlappingClass = regsOverlap dflags (norm a) (norm b)
+        norm = CmmGlobal . normalizeFPRNum
+
+    calcPad :: [GlobalReg] -> [(Bool, GlobalReg)]
+    calcPad rs = getFPRPadding (getFPRCtor $ head rs) rs
+
+getFPRPadding :: (Int -> GlobalReg) -> LiveGlobalRegs -> [(Bool, GlobalReg)]
+getFPRPadding paddingCtor live = padding
     where
-        sseRegNums = sort $ mapMaybe sseRegNum live
-        (_, padding) = foldl assignSlots (1, []) $ sseRegNums
-        allRegs = padding ++ map (\r -> (False, r)) live
+        fprRegNums = sort $ map fprRegNum live
+        (_, padding) = foldl assignSlots (1, []) $ fprRegNums
 
         assignSlots (i, acc) regNum
             | i == regNum = -- don't need padding here
@@ -195,11 +250,7 @@ padLiveArgs live = allRegs
 
         genPad start n =
             take n $ flip map (iterate (+1) start) (\i ->
-                (True, FloatReg i))
-                -- NOTE: Picking float should be fine for the following reasons:
-                -- (1) Float aliases with all the other SSE register types on
-                -- the given platform.
-                -- (2) The argument is not live anyways.
+                (True, paddingCtor i))
 
 
 -- | Llvm standard fun attributes
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 1873400..21abc65 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -1814,14 +1814,14 @@ funPrologue live cmmBlocks = do
 -- STG Liveness optimisation done here.
 funEpilogue :: LiveGlobalRegs -> LlvmM ([LlvmVar], LlvmStatements)
 funEpilogue live = do
+    dflags <- getDynFlags
 
     -- the bool indicates whether the register is padding.
     let alwaysNeeded = map (\r -> (False, r)) alwaysLive
-        livePadded = alwaysNeeded ++ padLiveArgs live
+        livePadded = alwaysNeeded ++ padLiveArgs dflags live
 
     -- Set to value or "undef" depending on whether the register is
     -- actually live
-    dflags <- getDynFlags
     let loadExpr r = do
           (v, _, s) <- getCmmRegVal (CmmGlobal r)
           return (Just $ v, s)
@@ -1833,7 +1833,7 @@ funEpilogue live = do
     loads <- flip mapM allRegs $ \r -> case () of
       _ | (False, r) `elem` livePadded
                              -> loadExpr r   -- if r is not padding, load it
-        | not (isSSE r) || (True, r) `elem` livePadded
+        | not (isFPR r) || (True, r) `elem` livePadded
                              -> loadUndef r
         | otherwise          -> return (Nothing, nilOL)
 



More information about the ghc-commits mailing list