[Git][ghc/ghc][wip/ncg-simd] improve RegClass

Sat Jun 8 08:23:25 UTC 2024

sheaf pushed to branch wip/ncg-simd at Glasgow Haskell Compiler / GHC


Commits:
3bbc4fcd by sheaf at 2024-06-08T10:22:34+02:00
improve RegClass

- - - - -


14 changed files:

- compiler/GHC/CmmToAsm/AArch64/Regs.hs
- compiler/GHC/CmmToAsm/PPC/CodeGen.hs
- compiler/GHC/CmmToAsm/PPC/Instr.hs
- compiler/GHC/CmmToAsm/PPC/Regs.hs
- compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
- compiler/GHC/CmmToAsm/Reg/Linear.hs
- compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
- compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
- compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/CmmToAsm/X86/Regs.hs
- compiler/GHC/Platform/Reg.hs
- compiler/GHC/Platform/Reg/Class.hs


Changes:

=====================================
compiler/GHC/CmmToAsm/AArch64/Regs.hs
=====================================
@@ -108,14 +108,12 @@ virtualRegSqueeze cls vr
                 VirtualRegHi{}          -> 1
                 _other                  -> 0
 
-        RcDouble
+        RcFloatOrVector
          -> case vr of
                 VirtualRegD{}           -> 1
                 VirtualRegF{}           -> 0
                 _other                  -> 0
 
-        _other -> 0
-
 {-# INLINE realRegSqueeze #-}
 realRegSqueeze :: RegClass -> RealReg -> Int
 realRegSqueeze cls rr
@@ -126,14 +124,12 @@ realRegSqueeze cls rr
                         | regNo < 32    -> 1     -- first fp reg is 32
                         | otherwise     -> 0
 
-        RcDouble
+        RcFloatOrVector
          -> case rr of
                 RealRegSingle regNo
                         | regNo < 32    -> 0
                         | otherwise     -> 1
 
-        _other -> 0
-
 mkVirtualReg :: Unique -> Format -> VirtualReg
 mkVirtualReg u format
    | not (isFloatFormat format) = VirtualRegI u
@@ -147,18 +143,18 @@ mkVirtualReg u format
 classOfRealReg :: RealReg -> RegClass
 classOfRealReg (RealRegSingle i)
         | i < 32        = RcInteger
-        | otherwise     = RcDouble
+        | otherwise     = RcFloatOrVector
 
+-- SIMD NCG TODO: get rid of this function;
+-- mkSpillInstr will receive the Format of what's stored in the register.
 fmtOfRealReg :: RealReg -> Format
 fmtOfRealReg real_reg =
   case classOfRealReg real_reg of
-            RcInteger -> II64
-            RcDouble  -> FF64
-            RcFloat   -> panic "No float regs on arm"
+            RcInteger       -> II64
+            RcFloatOrVector -> FF64
 
 regDotColor :: RealReg -> SDoc
 regDotColor reg
  = case classOfRealReg reg of
         RcInteger       -> text "blue"
-        RcFloat         -> text "red"
-        RcDouble        -> text "green"
+        RcFloatOrVector -> text "red"


=====================================
compiler/GHC/CmmToAsm/PPC/CodeGen.hs
=====================================
@@ -449,7 +449,7 @@ getRegister' _ platform (CmmMachOp (MO_SS_Conv W64 W32) [x])
 getRegister' _ platform (CmmLoad mem pk _)
  | not (isWord64 pk) = do
         Amode addr addr_code <- getAmode D mem
-        let code dst = assert ((targetClassOfReg platform dst == RcDouble) == isFloatType pk) $
+        let code dst = assert ((targetClassOfReg platform dst == RcFloatOrVector) == isFloatType pk) $
                        addr_code `snocOL` LD format dst addr
         return (Any format code)
  | not (target32Bit platform) = do


=====================================
compiler/GHC/CmmToAsm/PPC/Instr.hs
=====================================
@@ -562,7 +562,7 @@ mkSpillInstr config reg delta slot
                 RcInteger -> case arch of
                                 ArchPPC -> II32
                                 _       -> II64
-                RcDouble  -> FF64
+                RcFloatOrVector  -> FF64
                 _         -> panic "PPC.Instr.mkSpillInstr: no match"
         instr = case makeImmediate W32 True (off-delta) of
                 Just _  -> ST
@@ -587,7 +587,7 @@ mkLoadInstr config reg delta slot
                 RcInteger ->  case arch of
                                  ArchPPC -> II32
                                  _       -> II64
-                RcDouble  -> FF64
+                RcFloatOrVector  -> FF64
                 _         -> panic "PPC.Instr.mkLoadInstr: no match"
         instr = case makeImmediate W32 True (off-delta) of
                 Just _  -> LD


=====================================
compiler/GHC/CmmToAsm/PPC/Regs.hs
=====================================
@@ -83,14 +83,12 @@ virtualRegSqueeze cls vr
                 VirtualRegHi{}          -> 1
                 _other                  -> 0
 
-        RcDouble
+        RcFloatOrVector
          -> case vr of
                 VirtualRegD{}           -> 1
                 VirtualRegF{}           -> 0
                 _other                  -> 0
 
-        _other -> 0
-
 {-# INLINE realRegSqueeze #-}
 realRegSqueeze :: RegClass -> RealReg -> Int
 realRegSqueeze cls rr
@@ -102,15 +100,13 @@ realRegSqueeze cls rr
                         | otherwise     -> 0
 
 
-        RcDouble
+        RcFloatOrVector
          -> case rr of
                 RealRegSingle regNo
                         | regNo < 32    -> 0
                         | otherwise     -> 1
 
 
-        _other -> 0
-
 mkVirtualReg :: Unique -> Format -> VirtualReg
 mkVirtualReg u format
    | not (isFloatFormat format) = VirtualRegI u
@@ -124,8 +120,7 @@ regDotColor :: RealReg -> SDoc
 regDotColor reg
  = case classOfRealReg reg of
         RcInteger       -> text "blue"
-        RcFloat         -> text "red"
-        RcDouble        -> text "green"
+        RcFloatOrVector -> text "red"
 
 
 
@@ -235,8 +230,8 @@ allMachRegNos   = [0..63]
 {-# INLINE classOfRealReg      #-}
 classOfRealReg :: RealReg -> RegClass
 classOfRealReg (RealRegSingle i)
-        | i < 32        = RcInteger
-        | otherwise     = RcDouble
+        | i < 32    = RcInteger
+        | otherwise = RcFloatOrVector
 
 showReg :: RegNo -> String
 showReg n


=====================================
compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
=====================================
@@ -22,7 +22,7 @@ import GHC.Utils.Panic
 --      This gets hammered by scanGraph during register allocation,
 --      so needs to be fairly efficient.
 --

---      NOTE:   This only works for architectures with just RcInteger and RcDouble
+--      NOTE:   This only works for architectures with just RcInteger and RcFloatOrVector
 --              (which are disjoint) ie. x86, x86_64 and ppc
 --
 --      The number of allocatable regs is hard coded in here so we can do
@@ -134,42 +134,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl
 
         = count3 < cALLOCATABLE_REGS_INTEGER
 
-trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
-        | let cALLOCATABLE_REGS_FLOAT
-                  =        (case platformArch platform of
-                    -- On x86_64 and x86, Float and RcDouble
-                    -- use the same registers,
-                    -- so we only use RcDouble to represent the
-                    -- register allocation problem on those types.
-                            ArchX86       -> 0
-                            ArchX86_64    -> 0
-                            ArchPPC       -> 0
-                            ArchPPC_64 _  -> 0
-                            ArchARM _ _ _ -> panic "trivColorable ArchARM"
-                            -- we can in principle address all the float regs as
-                            -- segments. So we could have 64 Float regs. Or
-                            -- 128 Half regs, or even 256 Byte regs.
-                            ArchAArch64   -> 0
-                            ArchAlpha     -> panic "trivColorable ArchAlpha"
-                            ArchMipseb    -> panic "trivColorable ArchMipseb"
-                            ArchMipsel    -> panic "trivColorable ArchMipsel"
-                            ArchS390X     -> panic "trivColorable ArchS390X"
-                            ArchRISCV64   -> panic "trivColorable ArchRISCV64"
-                            ArchLoongArch64->panic "trivColorable ArchLoongArch64"
-                            ArchJavaScript-> panic "trivColorable ArchJavaScript"
-                            ArchWasm32    -> panic "trivColorable ArchWasm32"
-                            ArchUnknown   -> panic "trivColorable ArchUnknown")
-        , count2        <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT
-                                (virtualRegSqueeze RcFloat)
-                                conflicts
-
-        , count3        <- accSqueeze  count2    cALLOCATABLE_REGS_FLOAT
-                                (realRegSqueeze   RcFloat)
-                                exclusions
-
-        = count3 < cALLOCATABLE_REGS_FLOAT
-
-trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
+trivColorable platform virtualRegSqueeze realRegSqueeze RcFloatOrVector conflicts exclusions
         | let cALLOCATABLE_REGS_DOUBLE
                   =        (case platformArch platform of
                             ArchX86       -> 8
@@ -194,11 +159,11 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu
                             ArchWasm32    -> panic "trivColorable ArchWasm32"
                             ArchUnknown   -> panic "trivColorable ArchUnknown")
         , count2        <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE
-                                (virtualRegSqueeze RcDouble)
+                                (virtualRegSqueeze RcFloatOrVector)
                                 conflicts
 
         , count3        <- accSqueeze  count2    cALLOCATABLE_REGS_DOUBLE
-                                (realRegSqueeze   RcDouble)
+                                (realRegSqueeze   RcFloatOrVector)
                                 exclusions
 
         = count3 < cALLOCATABLE_REGS_DOUBLE
@@ -221,21 +186,21 @@ trivColorable classN conflicts exclusions
         acc r (cd, cf)
          = case regClass r of
                 RcInteger       -> (cd+1, cf)
-                RcFloat         -> (cd,   cf+1)
+                RcFloatOrVector -> (cd,   cf+1)
                 _               -> panic "Regs.trivColorable: reg class not handled"
 
         tmp                     = nonDetFoldUFM acc (0, 0) conflicts
         (countInt,  countFloat) = nonDetFoldUFM acc tmp    exclusions
 
         squeese         = worst countInt   classN RcInteger
-                        + worst countFloat classN RcFloat
+                        + worst countFloat classN RcFloatOrVector
 
    in   squeese < allocatableRegsInClass classN
 
 -- | Worst case displacement
 --      node N of classN has n neighbors of class C.
 --
---      We currently only have RcInteger and RcDouble, which don't conflict at all.
+--      We currently only have RcInteger and RcFloatOrVector, which don't conflict at all.
 --      This is a bit boring compared to what's in RegArchX86.
 --
 worst :: Int -> RegClass -> RegClass -> Int
@@ -244,11 +209,11 @@ worst n classN classC
         RcInteger
          -> case classC of
                 RcInteger       -> min n (allocatableRegsInClass RcInteger)
-                RcFloat         -> 0
+                RcFloatOrVector -> 0
 
-        RcDouble
+        RcFloatOrVector
          -> case classC of
-                RcFloat         -> min n (allocatableRegsInClass RcFloat)
+                RcFloatOrVector -> min n (allocatableRegsInClass RcFloatOrVector)
                 RcInteger       -> 0
 
 -- allocatableRegs is allMachRegNos with the fixed-use regs removed.
@@ -267,7 +232,7 @@ allocatableRegsInClass :: RegClass -> Int
 allocatableRegsInClass cls
  = case cls of
         RcInteger       -> allocatableRegsInteger
-        RcFloat         -> allocatableRegsDouble
+        RcFloatOrVector -> allocatableRegsDouble
 
 allocatableRegsInteger :: Int
 allocatableRegsInteger
@@ -276,6 +241,6 @@ allocatableRegsInteger
 
 allocatableRegsFloat :: Int
 allocatableRegsFloat
-        = length $ filter (\r -> regClass r == RcFloat
+        = length $ filter (\r -> regClass r == RcFloatOrVector
                  $ map RealReg allocatableRegs
 -}


=====================================
compiler/GHC/CmmToAsm/Reg/Linear.hs
=====================================
@@ -742,12 +742,11 @@ clobberRegs clobbered
  = do   platform <- getPlatform
         freeregs <- getFreeRegsR
 
-        let gpRegs  = frGetFreeRegs platform RcInteger freeregs :: [RealReg]
-            fltRegs = frGetFreeRegs platform RcFloat   freeregs :: [RealReg]
-            dblRegs = frGetFreeRegs platform RcDouble  freeregs :: [RealReg]
+        let gpRegs  = frGetFreeRegs platform RcInteger       freeregs :: [RealReg]
+            vecRegs = frGetFreeRegs platform RcFloatOrVector freeregs :: [RealReg]
 
         let extra_clobbered = [ r | r <- clobbered
-                                  , r `elem` (gpRegs ++ fltRegs ++ dblRegs) ]
+                                  , r `elem` (gpRegs ++ vecRegs) ]
 
         setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs extra_clobbered
 
@@ -917,10 +916,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
 
                 -- the vregs we could kick out that are already in a slot
                 let compat reg' r'
-                      = let cls1 = targetClassOfRealReg platform reg'
-                            cls2 = classOfVirtualReg r'
-                        in  (if cls1 == RcVector128 then RcDouble else cls1)
-                         == (if cls2 == RcVector128 then RcDouble else cls2)
+                      =  targetClassOfRealReg platform reg'
+                      == classOfVirtualReg r'
                     candidates_inBoth :: [(Unique, RealRegUsage, StackSlot)]
                     candidates_inBoth
                         = [ (temp, reg, mem)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs
=====================================
@@ -115,10 +115,10 @@ def CC_AArch64_GHC : CallingConv<[
 -}
 
 getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
-getFreeRegs cls (FreeRegs g f)
-  | RcFloat   <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
-  | RcDouble  <- cls = go 32 f 31
-  | RcInteger <- cls = go  0 g 18
+getFreeRegs cls (FreeRegs g f) =
+  case cls of
+    RcFloatOrVector -> go 32 f 31
+    RcInteger       -> go  0 g 18
     where
         go _   _ i | i < 0 = []
         go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
=====================================
@@ -41,10 +41,10 @@ initFreeRegs :: Platform -> FreeRegs
 initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
 
 getFreeRegs :: RegClass -> FreeRegs -> [RealReg]        -- lazily
-getFreeRegs cls (FreeRegs g f)
-    | RcFloat <- cls = [] -- no float regs on PowerPC, use double
-    | RcDouble <- cls = go f (0x80000000) 63
-    | RcInteger <- cls = go g (0x80000000) 31
+getFreeRegs cls (FreeRegs g f) =
+    case cls of
+      RcFloatOrVector -> go f (0x80000000) 63
+      RcInteger       -> go g (0x80000000) 31
     where
         go _ 0 _ = []
         go x m i | x .&. m /= 0 = RealRegSingle i : (go x (m `shiftR` 1) $! i-1)


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
=====================================
@@ -40,11 +40,7 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
         -- ToDo: there's no point looking through all the integer registers
         -- in order to find a floating-point one.
         compatibleClass i =
-          let regClass = classOfRealReg platform (RealRegSingle i)
-          in   (if regClass == RcVector128 then RcDouble else regClass)
-            == (if cls == RcVector128 then RcDouble else cls)
-            -- SIMD NCG TODO: giant hack to account for xmm registers being
-            -- used for Double with SSE2.
+          cls == classOfRealReg platform (RealRegSingle i)
 
 
 allocateReg :: RealReg -> FreeRegs -> FreeRegs


=====================================
compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
=====================================
@@ -40,11 +40,7 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
         -- ToDo: there's no point looking through all the integer registers
         -- in order to find a floating-point one.
         compatibleClass i =
-          let regClass = classOfRealReg platform (RealRegSingle i)
-          in   (if regClass == RcVector128 then RcDouble else regClass)
-            == (if cls == RcVector128 then RcDouble else cls)
-            -- SIMD NCG TODO: giant hack to account for xmm registers being
-            -- used for Double with SSE2.
+          cls == classOfRealReg platform (RealRegSingle i)
 
 allocateReg :: RealReg -> FreeRegs -> FreeRegs
 allocateReg (RealRegSingle r) (FreeRegs f)


=====================================
compiler/GHC/CmmToAsm/X86/Instr.hs
=====================================
@@ -832,9 +832,7 @@ mkSpillInstr config reg fmt delta slot
           -- SIMD NCG TODO: panic for unsupported VecFormats (& same in mkLoadInstr)
           [MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off slot))
            -- Now shuffle the register, putting the high half into the lower half.
-          ,SHUFPD (VecFormat 2 FmtDouble W64) (ImmInt 0b01) (OpReg reg) reg
-            -- NB: this format doesn't matter, we emit the same instruction
-            -- regardless of what is stored...
+          ,SHUFPD fmt (ImmInt 0b01) (OpReg reg) reg
             -- SIMD NCG TODO: can we emit more efficient code here?
           ,MOV FF64 (OpReg reg) (OpAddr (spRel platform $ off (slot + 1)))]
         _ -> [MOV fmt (OpReg reg) (OpAddr (spRel platform $ off slot))]
@@ -922,7 +920,7 @@ mkRegRegMoveInstr platform src dst
                      ArchX86    -> MOV II32 (OpReg src) (OpReg dst)
                      ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
                      _          -> panic "X86.mkRegRegMoveInstr: Bad arch"
-        RcDouble    ->  MOV FF64 (OpReg src) (OpReg dst)
+        RcFloatOrVector    ->  MOV FF64 (OpReg src) (OpReg dst)
         -- this code is the lie we tell ourselves because both float and double
         -- use the same register class.on x86_64 and x86 32bit with SSE2,
         -- more plainly, both use the XMM registers


=====================================
compiler/GHC/CmmToAsm/X86/Regs.hs
=====================================
@@ -77,7 +77,7 @@ virtualRegSqueeze cls vr
                 VirtualRegHi{}          -> 1
                 _other                  -> 0
 
-        RcDouble
+        RcFloatOrVector
          -> case vr of
                 VirtualRegD{}           -> 1
                 VirtualRegF{}           -> 0
@@ -97,7 +97,7 @@ realRegSqueeze cls rr
                         | regNo < firstxmm -> 1
                         | otherwise     -> 0
 
-        RcDouble
+        RcFloatOrVector
          -> case rr of
                 RealRegSingle regNo
                         | regNo >= firstxmm  -> 1
@@ -243,7 +243,7 @@ classOfRealReg platform reg
     = case reg of
         RealRegSingle i
             | i <= lastint platform -> RcInteger
-            | i <= lastxmm platform -> RcDouble
+            | i <= lastxmm platform -> RcFloatOrVector
             | otherwise             -> panic "X86.Reg.classOfRealReg registerSingle too high"
 
 -- | Get the name of the register with this number.
@@ -272,11 +272,8 @@ Intel x86 architecture:
 - Registers 0-7 have 16-bit counterparts (ax, bx etc.)
 - Registers 0-3 have 8 bit counterparts (ah, bh etc.)
 
-The fp registers are all Double registers; we don't have any RcFloat class
-regs.  @regClass@ barfs if you give it a VirtualRegF, and mkVReg above should
-never generate them.
-
-TODO: cleanup modelling float vs double registers and how they are the same class.
+The fp registers support Float, Doubles and vectors of those, as well
+as vectors of integer values.
 -}
 
 


=====================================
compiler/GHC/Platform/Reg.hs
=====================================
@@ -119,9 +119,9 @@ classOfVirtualReg vr
   = case vr of
         VirtualRegI{}   -> RcInteger
         VirtualRegHi{}  -> RcInteger
-        VirtualRegF{}   -> RcFloat
-        VirtualRegD{}   -> RcDouble
-        VirtualRegVec{} -> RcVector128
+        VirtualRegF{}   -> RcFloatOrVector
+        VirtualRegD{}   -> RcFloatOrVector
+        VirtualRegVec{} -> RcFloatOrVector
 
 
 -- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform


=====================================
compiler/GHC/Platform/Reg/Class.hs
=====================================
@@ -19,26 +19,22 @@ import GHC.Builtin.Uniques
 --      We treat all registers in a class as being interchangeable.
 --
 data RegClass
+  -- | Supports (scalar) integers only.
   = RcInteger
-  | RcFloat
-  | RcDouble
-  | RcVector128
+  -- | Supports vectors (both integers & floats) as well as scalar values
+  -- (but in practice not used for scalar integer values).
+  | RcFloatOrVector
   deriving (Eq, Ord, Show)
 
 allRegClasses :: [RegClass]
-allRegClasses =
-  [ RcInteger, RcFloat, RcDouble, RcVector128 ]
+allRegClasses = [RcInteger, RcFloatOrVector]
 
 instance Uniquable RegClass where
     getUnique = \case
-      RcInteger   -> mkRegClassUnique 0
-      RcFloat     -> mkRegClassUnique 1
-      RcDouble    -> mkRegClassUnique 2
-      RcVector128 -> mkRegClassUnique 3
+      RcInteger       -> mkRegClassUnique 0
+      RcFloatOrVector -> mkRegClassUnique 1
 
 instance Outputable RegClass where
     ppr = \case
-      RcInteger   -> Outputable.text "I"
-      RcFloat     -> Outputable.text "F"
-      RcDouble    -> Outputable.text "D"
-      RcVector128 -> Outputable.text "V"
+      RcInteger       -> Outputable.text "I"
+      RcFloatOrVector -> Outputable.text "F"



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3bbc4fcd1a58596ccae43cf50da00f99a8880790

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/3bbc4fcd1a58596ccae43cf50da00f99a8880790
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240608/60be13f0/attachment-0001.html>