[Git][ghc/ghc][master] 4 commits: Revert "Division by constants optimization"
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Sat Jan 18 17:39:21 UTC 2025
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
b6f54188 by Ben Gamari at 2025-01-18T12:38:46-05:00
Revert "Division by constants optimization"
This appears to be responsible for the regression described in #25653.
This reverts commit daff1e30219d136977c71f42e82ccc58c9013cfb.
- - - - -
0fd90de8 by Ben Gamari at 2025-01-18T12:38:46-05:00
testsuite: Introduce div2 test
This is a useful test from !8392 which is worth keeping around.
- - - - -
32680979 by Ben Gamari at 2025-01-18T12:38:46-05:00
testsuite: Test shift correctness in mul2 test
- - - - -
163aa50a by Ben Gamari at 2025-01-18T12:38:46-05:00
testsuite: Add regression test for #25653
- - - - -
10 changed files:
- compiler/GHC/Cmm/Config.hs
- compiler/GHC/Cmm/MachOp.hs
- compiler/GHC/Cmm/Opt.hs
- compiler/GHC/Cmm/Pipeline.hs
- compiler/GHC/Cmm/Sink.hs
- compiler/GHC/Driver/Config/Cmm.hs
- compiler/GHC/StgToCmm/Prim.hs
- + testsuite/tests/numeric/should_run/T25653.hs
- + testsuite/tests/numeric/should_run/T25653.stdout
- testsuite/tests/numeric/should_run/all.T
Changes:
=====================================
compiler/GHC/Cmm/Config.hs
=====================================
@@ -24,8 +24,6 @@ data CmmConfig = CmmConfig
, cmmExternalDynamicRefs :: !Bool -- ^ Generate code to link against dynamic libraries
, cmmDoCmmSwitchPlans :: !Bool -- ^ Should the Cmm pass replace Stg switch statements
, cmmSplitProcPoints :: !Bool -- ^ Should Cmm split proc points or not
- , cmmAllowMul2 :: !Bool -- ^ Does this platform support mul2
- , cmmOptConstDivision :: !Bool -- ^ Should we optimize constant divisors
}
-- | retrieve the target Cmm platform
=====================================
compiler/GHC/Cmm/MachOp.hs
=====================================
@@ -7,7 +7,6 @@ module GHC.Cmm.MachOp
, pprMachOp, isCommutableMachOp, isAssociativeMachOp
, isComparisonMachOp, maybeIntComparison, machOpResultType
, machOpArgReps, maybeInvertComparison, isFloatComparison
- , isCommutableCallishMachOp
-- MachOp builders
, mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot
@@ -846,17 +845,3 @@ machOpMemcpyishAlign op = case op of
MO_Memmove align -> Just align
MO_Memcmp align -> Just align
_ -> Nothing
-
-isCommutableCallishMachOp :: CallishMachOp -> Bool
-isCommutableCallishMachOp op =
- case op of
- MO_x64_Add -> True
- MO_x64_Mul -> True
- MO_x64_Eq -> True
- MO_x64_Ne -> True
- MO_x64_And -> True
- MO_x64_Or -> True
- MO_x64_Xor -> True
- MO_S_Mul2 _ -> True
- MO_U_Mul2 _ -> True
- _ -> False
=====================================
compiler/GHC/Cmm/Opt.hs
=====================================
@@ -5,52 +5,27 @@
-- (c) The University of Glasgow 2006
--
-----------------------------------------------------------------------------
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE TypeApplications #-}
-{-# LANGUAGE PatternSynonyms #-}
module GHC.Cmm.Opt (
constantFoldNode,
constantFoldExpr,
cmmMachOpFold,
- cmmMachOpFoldM,
- Opt, runOpt
+ cmmMachOpFoldM
) where
import GHC.Prelude
-import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Utils
import GHC.Cmm
-import GHC.Cmm.Config
-import GHC.Types.Unique.DSM
-
import GHC.Utils.Misc
+
import GHC.Utils.Panic
import GHC.Platform
import Data.Maybe
-import Data.Word
-import GHC.Exts (oneShot)
-import Control.Monad
-
-constantFoldNode :: CmmNode e x -> Opt (CmmNode e x)
-constantFoldNode (CmmUnsafeForeignCall (PrimTarget op) res args)
- = traverse constantFoldExprOpt args >>= cmmCallishMachOpFold op res
-constantFoldNode node
- = mapExpOpt constantFoldExprOpt node
-
-constantFoldExprOpt :: CmmExpr -> Opt CmmExpr
-constantFoldExprOpt e = wrapRecExpOpt f e
- where
- f (CmmMachOp op args)
- = do
- cfg <- getConfig
- case cmmMachOpFold (cmmPlatform cfg) op args of
- CmmMachOp op' args' -> fromMaybe (CmmMachOp op' args') <$> cmmMachOpFoldOptM cfg op' args'
- e -> pure e
- f (CmmRegOff r 0) = pure (CmmReg r)
- f (CmmLit (CmmInt x rep)) = pure (CmmLit $ CmmInt (narrowU rep x) rep)
- f e = pure e
+
+
+constantFoldNode :: Platform -> CmmNode e x -> CmmNode e x
+constantFoldNode platform = mapExp (constantFoldExpr platform)
constantFoldExpr :: Platform -> CmmExpr -> CmmExpr
constantFoldExpr platform = wrapRecExp f
@@ -321,7 +296,7 @@ cmmMachOpFoldM platform cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep)
maybe_comparison _ _ _ = Nothing
--- We can often do something with constants of 0, 1 and (-1) ...
+-- We can often do something with constants of 0 and 1 ...
-- See Note [Comparison operators]
cmmMachOpFoldM platform mop [x, y@(CmmLit (CmmInt 0 _))]
@@ -392,8 +367,6 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _))]
MO_Mul rep
| Just p <- exactLog2 n ->
Just $! (cmmMachOpFold platform (MO_Shl rep) [x, CmmLit (CmmInt p $ wordWidth platform)])
- -- The optimization for division by power of 2 is technically duplicated, but since at least one other part of ghc uses
- -- the pure `constantFoldExpr` this remains
MO_U_Quot rep
| Just p <- exactLog2 n ->
Just $! (cmmMachOpFold platform (MO_U_Shr rep) [x, CmmLit (CmmInt p $ wordWidth platform)])
@@ -402,19 +375,46 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _))]
Just $! (cmmMachOpFold platform (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
MO_S_Quot rep
| Just p <- exactLog2 n,
- CmmReg _ <- x ->
+ CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
+ -- it is a reg. FIXME: remove this restriction.
Just $! (cmmMachOpFold platform (MO_S_Shr rep)
- [signedQuotRemHelper platform n x rep p, CmmLit (CmmInt p $ wordWidth platform)])
+ [signedQuotRemHelper rep p, CmmLit (CmmInt p $ wordWidth platform)])
MO_S_Rem rep
| Just p <- exactLog2 n,
- CmmReg _ <- x ->
+ CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require
+ -- it is a reg. FIXME: remove this restriction.
-- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
-- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
-- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
Just $! (cmmMachOpFold platform (MO_Sub rep)
[x, cmmMachOpFold platform (MO_And rep)
- [signedQuotRemHelper platform n x rep p, CmmLit (CmmInt (- n) rep)]])
+ [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
_ -> Nothing
+ where
+ -- In contrast with unsigned integers, for signed ones
+ -- shift right is not the same as quot, because it rounds
+ -- to minus infinity, whereas quot rounds toward zero.
+ -- To fix this up, we add one less than the divisor to the
+ -- dividend if it is a negative number.
+ --
+ -- to avoid a test/jump, we use the following sequence:
+ -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
+ -- x2 = y & (divisor-1)
+ -- result = x + x2
+ -- this could be done a bit more simply using conditional moves,
+ -- but we're processor independent here.
+ --
+ -- we optimise the divide by 2 case slightly, generating
+ -- x1 = x >> word_size-1 (unsigned)
+ -- return = x + x1
+ signedQuotRemHelper :: Width -> Integer -> CmmExpr
+ signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
+ where
+ bits = fromIntegral (widthInBits rep) - 1
+ shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
+ x1 = CmmMachOp shr [x, CmmLit (CmmInt bits $ wordWidth platform)]
+ x2 = if p == 1 then x1 else
+ CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
-- Unfortunately this needs a unique supply because x might not be a
@@ -448,533 +448,3 @@ That's what the constant-folding operations on comparison operators do above.
isPicReg :: CmmExpr -> Bool
isPicReg (CmmReg (CmmGlobal (GlobalRegUse PicBaseReg _))) = True
isPicReg _ = False
-
-canOptimizeDivision :: CmmConfig -> Width -> Bool
-canOptimizeDivision cfg rep = cmmOptConstDivision cfg &&
- -- we can either widen the arguments to simulate mul2 or use mul2 directly for the platform word size
- (rep < wordWidth platform || (rep == wordWidth platform && cmmAllowMul2 cfg))
- where platform = cmmPlatform cfg
-
--- -----------------------------------------------------------------------------
--- Folding callish machops
-
-cmmCallishMachOpFold :: CallishMachOp -> [CmmFormal] -> [CmmActual] -> Opt (CmmNode O O)
-cmmCallishMachOpFold op res args =
- fromMaybe (CmmUnsafeForeignCall (PrimTarget op) res args) <$> (getConfig >>= \cfg -> cmmCallishMachOpFoldM cfg op res args)
-
-cmmCallishMachOpFoldM :: CmmConfig -> CallishMachOp -> [CmmFormal] -> [CmmActual] -> Opt (Maybe (CmmNode O O))
-
--- If possible move the literals to the right, the following cases assume that to be the case
-cmmCallishMachOpFoldM cfg op res [x@(CmmLit _),y]
- | isCommutableCallishMachOp op && not (isLit y) = cmmCallishMachOpFoldM cfg op res [y,x]
-
--- Both arguments are literals, replace with the result
-cmmCallishMachOpFoldM _ op res [CmmLit (CmmInt x _), CmmLit (CmmInt y _)]
- = case op of
- MO_S_Mul2 rep
- | [rHiNeeded,rHi,rLo] <- res -> do
- let resSz = widthInBits rep
- resVal = (narrowS rep x) * (narrowS rep y)
- high = resVal `shiftR` resSz
- low = narrowS rep resVal
- isHiNeeded = high /= low `shiftR` resSz
- isHiNeededVal = if isHiNeeded then 1 else 0
- prependNode $! CmmAssign (CmmLocal rHiNeeded) (CmmLit $ CmmInt isHiNeededVal rep)
- prependNode $! CmmAssign (CmmLocal rHi) (CmmLit $ CmmInt high rep)
- pure . Just $! CmmAssign (CmmLocal rLo) (CmmLit $ CmmInt low rep)
- MO_U_Mul2 rep
- | [rHi,rLo] <- res -> do
- let resSz = widthInBits rep
- resVal = (narrowU rep x) * (narrowU rep y)
- high = resVal `shiftR` resSz
- low = narrowU rep resVal
- prependNode $! CmmAssign (CmmLocal rHi) (CmmLit $ CmmInt high rep)
- pure . Just $! CmmAssign (CmmLocal rLo) (CmmLit $ CmmInt low rep)
- MO_S_QuotRem rep
- | [rQuot, rRem] <- res,
- y /= 0 -> do
- let (q,r) = quotRem (narrowS rep x) (narrowS rep y)
- prependNode $! CmmAssign (CmmLocal rQuot) (CmmLit $ CmmInt q rep)
- pure . Just $! CmmAssign (CmmLocal rRem) (CmmLit $ CmmInt r rep)
- MO_U_QuotRem rep
- | [rQuot, rRem] <- res,
- y /= 0 -> do
- let (q,r) = quotRem (narrowU rep x) (narrowU rep y)
- prependNode $! CmmAssign (CmmLocal rQuot) (CmmLit $ CmmInt q rep)
- pure . Just $! CmmAssign (CmmLocal rRem) (CmmLit $ CmmInt r rep)
- _ -> pure Nothing
-
--- 0, 1 or -1 as one of the constants
-
-cmmCallishMachOpFoldM _ op res [_, CmmLit (CmmInt 0 _)]
- = case op of
- -- x * 0 == 0
- MO_S_Mul2 rep
- | [rHiNeeded, rHi, rLo] <- res -> do
- prependNode $! CmmAssign (CmmLocal rHiNeeded) (CmmLit $ CmmInt 0 rep)
- prependNode $! CmmAssign (CmmLocal rHi) (CmmLit $ CmmInt 0 rep)
- pure . Just $! CmmAssign (CmmLocal rLo) (CmmLit $ CmmInt 0 rep)
- -- x * 0 == 0
- MO_U_Mul2 rep
- | [rHi, rLo] <- res -> do
- prependNode $! CmmAssign (CmmLocal rHi) (CmmLit $ CmmInt 0 rep)
- pure . Just $! CmmAssign (CmmLocal rLo) (CmmLit $ CmmInt 0 rep)
- _ -> pure Nothing
-
-cmmCallishMachOpFoldM _ op res [CmmLit (CmmInt 0 _), _]
- = case op of
- -- 0 quotRem d == (0,0)
- MO_S_QuotRem rep
- | [rQuot, rRem] <- res -> do
- prependNode $! CmmAssign (CmmLocal rQuot) (CmmLit $ CmmInt 0 rep)
- pure . Just $! CmmAssign (CmmLocal rRem) (CmmLit $ CmmInt 0 rep)
- -- 0 quotRem d == (0,0)
- MO_U_QuotRem rep
- | [rQuot,rRem] <- res -> do
- prependNode $! CmmAssign (CmmLocal rQuot) (CmmLit $ CmmInt 0 rep)
- pure . Just $! CmmAssign (CmmLocal rRem) (CmmLit $ CmmInt 0 rep)
- _ -> pure Nothing
-
-cmmCallishMachOpFoldM cfg op res [x, CmmLit (CmmInt 1 _)]
- = case op of
- -- x * 1 == x -- Note: The high word needs to be a sign extension of the low word, so we use a sign extending shift
- MO_S_Mul2 rep
- | [rHiNeeded, rHi, rLo] <- res -> do
- let platform = cmmPlatform cfg
- wordRep = wordWidth platform
- repInBits = toInteger $ widthInBits rep
- prependNode $! CmmAssign (CmmLocal rHiNeeded) (CmmLit $ CmmInt 0 rep)
- prependNode $! CmmAssign (CmmLocal rHi) (cmmMachOpFold platform (MO_S_Shr rep) [x, CmmLit $ CmmInt (repInBits - 1) wordRep])
- pure . Just $! CmmAssign (CmmLocal rLo) x
- -- x * 1 == x
- MO_U_Mul2 rep
- | [rHi, rLo] <- res -> do
- prependNode $! CmmAssign (CmmLocal rHi) (CmmLit $ CmmInt 0 rep)
- pure . Just $! CmmAssign (CmmLocal rLo) x
- -- x quotRem 1 == (x, 0)
- MO_S_QuotRem rep
- | [rQuot, rRem] <- res -> do
- prependNode $! CmmAssign (CmmLocal rQuot) x
- pure . Just $! CmmAssign (CmmLocal rRem) (CmmLit $ CmmInt 0 rep)
- -- x quotRem 1 == (x, 0)
- MO_U_QuotRem rep
- | [rQuot, rRem] <- res -> do
- prependNode $! CmmAssign (CmmLocal rQuot) x
- pure . Just $! CmmAssign (CmmLocal rRem) (CmmLit $ CmmInt 0 rep)
- _ -> pure Nothing
-
--- handle quotRem with a constant divisor
-
-cmmCallishMachOpFoldM cfg op res [n, CmmLit (CmmInt d' _)]
- = case op of
- MO_S_QuotRem rep
- | Just p <- exactLog2 d,
- [rQuot,rRem] <- res -> do
- n' <- intoRegister n (cmmBits rep)
- -- first prepend the optimized division by a power 2
- prependNode $! CmmAssign (CmmLocal rQuot)
- (cmmMachOpFold platform (MO_S_Shr rep)
- [signedQuotRemHelper platform d n' rep p, CmmLit (CmmInt p $ wordWidth platform)])
- -- then output an optimized remainder by a power of 2
- pure . Just $! CmmAssign (CmmLocal rRem)
- (cmmMachOpFold platform (MO_Sub rep)
- [n', cmmMachOpFold platform (MO_And rep)
- [signedQuotRemHelper platform d n' rep p, CmmLit (CmmInt (- d) rep)]])
- | canOptimizeDivision cfg rep,
- d /= (-1), d /= 0, d /= 1,
- [rQuot,rRem] <- res -> do
- -- we are definitely going to use n multiple times, so put it into a register
- n' <- intoRegister n (cmmBits rep)
- -- generate an optimized (signed) division of n by d
- q <- generateDivisionBySigned platform cfg rep n' d
- -- we also need the result multiple times to calculate the remainder
- q' <- intoRegister q (cmmBits rep)
-
- prependNode $! CmmAssign (CmmLocal rQuot) q'
- -- The remainder now becomes n - q * d
- pure . Just $! CmmAssign (CmmLocal rRem) $ CmmMachOp (MO_Sub rep) [n', CmmMachOp (MO_Mul rep) [q', CmmLit $ CmmInt d rep]]
- where
- platform = cmmPlatform cfg
- d = narrowS rep d'
- MO_U_QuotRem rep
- | Just p <- exactLog2 d,
- [rQuot,rRem] <- res -> do
- -- first prepend the optimized division by a power 2
- prependNode $! CmmAssign (CmmLocal rQuot) $ CmmMachOp (MO_U_Shr rep) [n, CmmLit (CmmInt p $ wordWidth platform)]
- -- then output an optimized remainder by a power of 2
- pure . Just $! CmmAssign (CmmLocal rRem) $ CmmMachOp (MO_And rep) [n, CmmLit (CmmInt (d - 1) rep)]
- | canOptimizeDivision cfg rep,
- d /= 0, d /= 1,
- [rQuot,rRem] <- res -> do
- -- we are definitely going to use n multiple times, so put it into a register
- n' <- intoRegister n (cmmBits rep)
- -- generate an optimized (unsigned) division of n by d
- q <- generateDivisionByUnsigned platform cfg rep n' d
- -- we also need the result multiple times to calculate the remainder
- q' <- intoRegister q (cmmBits rep)
-
- prependNode $! CmmAssign (CmmLocal rQuot) q'
- -- The remainder now becomes n - q * d
- pure . Just $! CmmAssign (CmmLocal rRem) $ CmmMachOp (MO_Sub rep) [n', CmmMachOp (MO_Mul rep) [q', CmmLit $ CmmInt d rep]]
- where
- platform = cmmPlatform cfg
- d = narrowU rep d'
- _ -> pure Nothing
-
-cmmCallishMachOpFoldM _ _ _ _ = pure Nothing
-
--- -----------------------------------------------------------------------------
--- Specialized constant folding for MachOps which sometimes need to expand into multiple nodes
-
-cmmMachOpFoldOptM :: CmmConfig -> MachOp -> [CmmExpr] -> Opt (Maybe CmmExpr)
-
-cmmMachOpFoldOptM cfg op [n, CmmLit (CmmInt d' _)] =
- case op of
- MO_S_Quot rep
- -- recheck for power of 2 division. This may not be handled by cmmMachOpFoldM if n is not in a register
- | Just p <- exactLog2 d -> do
- n' <- intoRegister n (cmmBits rep)
- pure . Just $! cmmMachOpFold platform (MO_S_Shr rep)
- [ signedQuotRemHelper platform d n' rep p
- , CmmLit (CmmInt p $ wordWidth platform)
- ]
- | canOptimizeDivision cfg rep,
- d /= (-1), d /= 0, d /= 1 -> Just <$!> generateDivisionBySigned platform cfg rep n d
- where d = narrowS rep d'
- MO_S_Rem rep
- -- recheck for power of 2 remainder. This may not be handled by cmmMachOpFoldM if n is not in a register
- | Just p <- exactLog2 d -> do
- n' <- intoRegister n (cmmBits rep)
- pure . Just $! cmmMachOpFold platform (MO_Sub rep)
- [ n'
- , cmmMachOpFold platform (MO_And rep)
- [ signedQuotRemHelper platform d n' rep p
- , CmmLit (CmmInt (- d) rep)
- ]
- ]
- | canOptimizeDivision cfg rep,
- d /= (-1), d /= 0, d /= 1 -> do
- n' <- intoRegister n (cmmBits rep)
- -- first generate the division
- q <- generateDivisionBySigned platform cfg rep n' d
- -- then calculate the remainder by n - q * d
- pure . Just $! CmmMachOp (MO_Sub rep) [n', CmmMachOp (MO_Mul rep) [q, CmmLit $ CmmInt d rep]]
- where d = narrowS rep d'
- MO_U_Quot rep
- -- No need to recheck power of 2 division because cmmMachOpFoldM always handles that case
- | canOptimizeDivision cfg rep,
- d /= 0, d /= 1, Nothing <- exactLog2 d -> Just <$!> generateDivisionByUnsigned platform cfg rep n d
- where d = narrowU rep d'
- MO_U_Rem rep
- -- No need to recheck power of 2 remainder because cmmMachOpFoldM always handles that case
- | canOptimizeDivision cfg rep,
- d /= 0, d /= 1, Nothing <- exactLog2 d -> do
- n' <- intoRegister n (cmmBits rep)
- -- first generate the division
- q <- generateDivisionByUnsigned platform cfg rep n d
- -- then calculate the remainder by n - q * d
- pure . Just $! CmmMachOp (MO_Sub rep) [n', CmmMachOp (MO_Mul rep) [q, CmmLit $ CmmInt d rep]]
- where d = narrowU rep d'
- _ -> pure Nothing
- where platform = cmmPlatform cfg
-
-cmmMachOpFoldOptM _ _ _ = pure Nothing
-
--- -----------------------------------------------------------------------------
--- Utils for prepending new nodes
-
--- Move an expression into a register to possibly use it multiple times
-intoRegister :: CmmExpr -> CmmType -> Opt CmmExpr
-intoRegister e@(CmmReg _) _ = pure e
-intoRegister expr ty = do
- u <- getUniqueM
- let reg = LocalReg u ty
- CmmReg (CmmLocal reg) <$ prependNode (CmmAssign (CmmLocal reg) expr)
-
-prependNode :: CmmNode O O -> Opt ()
-prependNode n = Opt $ \_ xs -> pure (xs ++ [n], ())
-
--- -----------------------------------------------------------------------------
--- Division by constants utils
-
--- Helper for division by a power of 2
--- In contrast with unsigned integers, for signed ones
--- shift right is not the same as quot, because it rounds
--- to minus infinity, whereas quot rounds toward zero.
--- To fix this up, we add one less than the divisor to the
--- dividend if it is a negative number.
---
--- to avoid a test/jump, we use the following sequence:
--- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
--- x2 = y & (divisor-1)
--- result = x + x2
--- this could be done a bit more simply using conditional moves,
--- but we're processor independent here.
---
--- we optimize the divide by 2 case slightly, generating
--- x1 = x >> word_size-1 (unsigned)
--- return = x + x1
-signedQuotRemHelper :: Platform -> Integer -> CmmExpr -> Width -> Integer -> CmmExpr
-signedQuotRemHelper platform n x rep p = CmmMachOp (MO_Add rep) [x, x2]
- where
- bits = fromIntegral (widthInBits rep) - 1
- shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
- x1 = CmmMachOp shr [x, CmmLit (CmmInt bits $ wordWidth platform)]
- x2 = if p == 1 then x1 else
- CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
-
-{- Note: [Division by constants]
-
-Integer division is floor(n / d), the goal is to find m,p
-such that floor((m * n) / 2^p) = floor(n / d).
-
-The idea being: n/d = n * (1/d). But we cannot store 1/d in an integer without
-some error, so we choose some 2^p / d such that the error ends up small and
-thus vanishes when we divide by 2^p again.
-
-The algorithm below to generate these numbers is taken from Hacker's Delight
-Second Edition Chapter 10 "Integer division by constants". The chapter also
-contains proof that this method does indeed produce correct results.
-
-However this is a much more literal interpretation of the algorithm,
-which we can use because of the unbounded Integer type. Hacker's Delight
-also provides a much more complex algorithm which computes these numbers
-without the need to exceed the word size, but that is not necessary here.
--}
-
-generateDivisionBySigned :: Platform -> CmmConfig -> Width -> CmmExpr -> Integer -> Opt CmmExpr
-
--- Sanity checks, division will generate incorrect results or undesirable code for these cases
--- cmmMachOpFoldM and cmmMachOpFoldOptM should have already handled these cases!
-generateDivisionBySigned _ _ _ _ 0 = panic "generate signed division with 0"
-generateDivisionBySigned _ _ _ _ 1 = panic "generate signed division with 1"
-generateDivisionBySigned _ _ _ _ (-1) = panic "generate signed division with -1"
-generateDivisionBySigned _ _ _ _ d | Just _ <- exactLog2 d = panic $ "generate signed division with " ++ show d
-
-generateDivisionBySigned platform _cfg rep n divisor = do
- -- We only duplicate n' if we actually need to add/subtract it, so we may not need it in a register
- n' <- if sign == 0 then pure n else intoRegister n resRep
-
- -- Set up mul2
- (shift', qExpr) <- mul2 n'
-
- -- add/subtract n if necessary
- let qExpr' = case sign of
- 1 -> CmmMachOp (MO_Add rep) [qExpr, n']
- -1 -> CmmMachOp (MO_Sub rep) [qExpr, n']
- _ -> qExpr
-
- qExpr'' <- intoRegister (cmmMachOpFold platform (MO_S_Shr rep) [qExpr', CmmLit $ CmmInt shift' wordRep]) resRep
-
- -- Lastly add the sign of the quotient to correct for negative results
- pure $! cmmMachOpFold platform
- (MO_Add rep) [qExpr'', cmmMachOpFold platform (MO_U_Shr rep) [qExpr'', CmmLit $ CmmInt (toInteger $ widthInBits rep - 1) wordRep]]
- where
- resRep = cmmBits rep
- wordRep = wordWidth platform
- (magic, sign, shift) = divisionMagicS rep divisor
- -- generate the multiply with the magic number
- mul2 n
- -- Using mul2 for sub-word sizes regresses for signed integers only
- | rep == wordWidth platform = do
- (r1, r2, r3) <- (,,) <$> getUniqueM <*> getUniqueM <*> getUniqueM
- let rg1 = LocalReg r1 resRep
- resReg = LocalReg r2 resRep
- rg3 = LocalReg r3 resRep
- res <- CmmReg (CmmLocal resReg) <$ prependNode (CmmUnsafeForeignCall (PrimTarget (MO_S_Mul2 rep)) [rg1, resReg, rg3] [n, CmmLit $ CmmInt magic rep])
- pure (shift, res)
- -- widen the register and multiply without the MUL2 instruction
- -- if we don't need an additional add after this we can combine the shifts
- | otherwise = pure (if sign == 0 then 0 else shift, res)
- where
- wordRep = wordWidth platform
- -- (n * magic) >> widthInBits + (if sign == 0 then shift else 0) -- With conversion in between to not overflow
- res = cmmMachOpFold platform (MO_SS_Conv wordRep rep)
- [ cmmMachOpFold platform (MO_S_Shr wordRep)
- [ cmmMachOpFold platform (MO_Mul wordRep)
- [ cmmMachOpFold platform (MO_SS_Conv rep wordRep) [n]
- , CmmLit $ CmmInt magic wordRep
- ]
- -- Check if we need to generate an add/subtract later. If not we can combine this with the postshift
- , CmmLit $ CmmInt ((if sign == 0 then toInteger shift else 0) + (toInteger $ widthInBits rep)) wordRep
- ]
- ]
-
--- See hackers delight for how and why this works (chapter in note [Division by constants])
-divisionMagicS :: Width -> Integer -> (Integer, Integer, Integer)
-divisionMagicS rep divisor = (magic, sign, toInteger $ p - wSz)
- where
- sign = if divisor > 0
- then if magic < 0 then 1 else 0
- else if magic < 0 then 0 else -1
- wSz = widthInBits rep
- ad = abs divisor
- t = (1 `shiftL` (wSz - 1)) + if divisor > 0 then 0 else 1
- anc = t - 1 - rem t ad
- go p'
- | twoP > anc * (ad - rem twoP ad) = p'
- | otherwise = go (p' + 1)
- where twoP = 1 `shiftL` p'
- p = go wSz
- am = (twoP + ad - rem twoP ad) `quot` ad
- where twoP = 1 `shiftL` p
- magic = narrowS rep $ if divisor > 0 then am else -am
-
-generateDivisionByUnsigned :: Platform -> CmmConfig -> Width -> CmmExpr -> Integer -> Opt CmmExpr
--- Sanity checks, division will generate incorrect results or undesirable code for these cases
--- cmmMachOpFoldM and cmmMachOpFoldOptM should have already handled these cases!
-generateDivisionByUnsigned _ _ _ _ 0 = panic "generate signed division with 0"
-generateDivisionByUnsigned _ _ _ _ 1 = panic "generate signed division with 1"
-generateDivisionByUnsigned _ _ _ _ d | Just _ <- exactLog2 d = panic $ "generate signed division with " ++ show d
-
-generateDivisionByUnsigned platform cfg rep n divisor = do
- -- We only duplicate n' if we actually need to add/subtract it, so we may not need it in a register
- n' <- if not needsAdd -- Invariant: We also never preshift if we need an add, thus we don't need n in a register
- then pure $! cmmMachOpFold platform (MO_U_Shr rep) [n, CmmLit $ CmmInt preShift wordRep]
- else intoRegister n resRep
-
- -- Set up mul2
- (postShift', qExpr) <- mul2 n'
-
- -- add/subtract n if necessary
- let qExpr' = if needsAdd
- -- This is qExpr + (n - qExpr) / 2 = (qExpr + n) / 2 but with a guarantee that it'll not overflow
- then cmmMachOpFold platform (MO_Add rep)
- [ cmmMachOpFold platform (MO_U_Shr rep)
- [ cmmMachOpFold platform (MO_Sub rep) [n', qExpr]
- , CmmLit $ CmmInt 1 wordRep
- ]
- , qExpr
- ]
- else qExpr
- -- If we already divided by 2 in the add, remember to shift one bit less
- -- Hacker's Delight, Edition 2 Page 234: postShift > 0 if we needed an add, except if the divisor
- -- is 1, which we checked for above
- finalShift = if needsAdd then postShift' - 1 else postShift'
-
- -- apply the final postShift
- pure $! cmmMachOpFold platform (MO_U_Shr rep) [qExpr', CmmLit $ CmmInt finalShift wordRep]
- where
- resRep = cmmBits rep
- wordRep = wordWidth platform
- (preShift, magic, needsAdd, postShift) =
- let withPre = divisionMagicU rep True divisor
- noPre = divisionMagicU rep False divisor
- in case (withPre, noPre) of
- -- Use whatever does not cause us to take the expensive case
- ((_, _, False, _), (_, _, True, _)) -> withPre
- -- If we cannot avoid the expensive case, don't bother with the pre shift
- _ -> noPre
- -- generate the multiply with the magic number
- mul2 n
- | rep == wordWidth platform || (cmmAllowMul2 cfg && needsAdd) = do
- (r1, r2) <- (,) <$> getUniqueM <*> getUniqueM
- let rg1 = LocalReg r1 resRep
- resReg = LocalReg r2 resRep
- res <- CmmReg (CmmLocal resReg) <$ prependNode (CmmUnsafeForeignCall (PrimTarget (MO_U_Mul2 rep)) [resReg, rg1] [n, CmmLit $ CmmInt magic rep])
- pure (postShift, res)
- | otherwise = do
- pure (if needsAdd then postShift else 0, res)
- where
- wordRep = wordWidth platform
- -- (n * magic) >> widthInBits + (if sign == 0 then shift else 0) -- With conversion in between to not overflow
- res = cmmMachOpFold platform (MO_UU_Conv wordRep rep)
- [ cmmMachOpFold platform (MO_U_Shr wordRep)
- [ cmmMachOpFold platform (MO_Mul wordRep)
- [ cmmMachOpFold platform (MO_UU_Conv rep wordRep) [n]
- , CmmLit $ CmmInt magic wordRep
- ]
- -- Check if we need to generate an add later. If not we can combine this with the postshift
- , CmmLit $ CmmInt ((if needsAdd then 0 else postShift) + (toInteger $ widthInBits rep)) wordRep
- ]
- ]
-
--- See hackers delight for how and why this works (chapter in note [Division by constants])
--- The preshift isn't described there, but the idea is:
--- If a divisor d has n trailing zeros, then d is a multiple of 2^n. Since we want to divide x by d
--- we can also calculate (x / 2^n) / (d / 2^n) which may then not require an extra addition.
---
--- The addition performs: quotient + dividend, but we need to avoid overflows, so we actually need to
--- calculate: quotient + (dividend - quotient) / 2 = (quotient + dividend) / 2
--- Thus if the preshift can avoid all of this, we have 1 operation in place of 3.
---
--- The decision to use the preshift is made somewhere else, here we only report if the addition is needed
-divisionMagicU :: Width -> Bool -> Integer -> (Integer, Integer, Bool, Integer)
-divisionMagicU rep doPreShift divisor = (toInteger zeros, magic, needsAdd, toInteger $ p - wSz)
- where
- wSz = widthInBits rep
- zeros = if doPreShift then countTrailingZeros $ fromInteger @Word64 divisor else 0
- d = divisor `shiftR` zeros
- ones = ((1 `shiftL` wSz) - 1) `shiftR` zeros
- nc = ones - rem (ones - d) d
- go p'
- | twoP > nc * (d - 1 - rem (twoP - 1) d) = p'
- | otherwise = go (p' + 1)
- where twoP = 1 `shiftL` p'
- p = go wSz
- m = (twoP + d - 1 - rem (twoP - 1) d) `quot` d
- where twoP = 1 `shiftL` p
- needsAdd = d < 1 `shiftL` (p - wSz)
- magic = if needsAdd then m - (ones + 1) else m
-
--- -----------------------------------------------------------------------------
--- Opt monad
-
-newtype Opt a = OptI { runOptI :: CmmConfig -> [CmmNode O O] -> UniqDSM ([CmmNode O O], a) }
-
--- | Pattern synonym for 'Opt', as described in Note [The one-shot state
--- monad trick].
-pattern Opt :: (CmmConfig -> [CmmNode O O] -> UniqDSM ([CmmNode O O], a)) -> Opt a
-pattern Opt f <- OptI f
- where Opt f = OptI . oneShot $ \cfg -> oneShot $ \out -> f cfg out
-{-# COMPLETE Opt #-}
-
-runOpt :: CmmConfig -> Opt a -> UniqDSM ([CmmNode O O], a)
-runOpt cf (Opt g) = g cf []
-
-getConfig :: Opt CmmConfig
-getConfig = Opt $ \cf xs -> pure (xs, cf)
-
-instance Functor Opt where
- fmap f (Opt g) = Opt $ \cf xs -> fmap (fmap f) (g cf xs)
-
-instance Applicative Opt where
- pure a = Opt $ \_ xs -> pure (xs, a)
- ff <*> fa = do
- f <- ff
- f <$> fa
-
-instance Monad Opt where
- Opt g >>= f = Opt $ \cf xs -> do
- (ys, a) <- g cf xs
- runOptI (f a) cf ys
-
-instance MonadGetUnique Opt where
- getUniqueM = Opt $ \_ xs -> (xs,) <$> getUniqueDSM
-
-mapForeignTargetOpt :: (CmmExpr -> Opt CmmExpr) -> ForeignTarget -> Opt ForeignTarget
-mapForeignTargetOpt exp (ForeignTarget e c) = flip ForeignTarget c <$> exp e
-mapForeignTargetOpt _ m@(PrimTarget _) = pure m
-
-wrapRecExpOpt :: (CmmExpr -> Opt CmmExpr) -> CmmExpr -> Opt CmmExpr
-wrapRecExpOpt f (CmmMachOp op es) = traverse (wrapRecExpOpt f) es >>= f . CmmMachOp op
-wrapRecExpOpt f (CmmLoad addr ty align) = wrapRecExpOpt f addr >>= \newAddr -> f (CmmLoad newAddr ty align)
-wrapRecExpOpt f e = f e
-
-mapExpOpt :: (CmmExpr -> Opt CmmExpr) -> CmmNode e x -> Opt (CmmNode e x)
-mapExpOpt _ f@(CmmEntry{}) = pure f
-mapExpOpt _ m@(CmmComment _) = pure m
-mapExpOpt _ m@(CmmTick _) = pure m
-mapExpOpt f (CmmUnwind regs) = CmmUnwind <$> traverse (traverse (traverse f)) regs
-mapExpOpt f (CmmAssign r e) = CmmAssign r <$> f e
-mapExpOpt f (CmmStore addr e align) = CmmStore <$> f addr <*> f e <*> pure align
-mapExpOpt f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall <$> mapForeignTargetOpt f tgt <*> pure fs <*> traverse f as
-mapExpOpt _ l@(CmmBranch _) = pure l
-mapExpOpt f (CmmCondBranch e ti fi l) = f e >>= \newE -> pure (CmmCondBranch newE ti fi l)
-mapExpOpt f (CmmSwitch e ids) = flip CmmSwitch ids <$> f e
-mapExpOpt f n at CmmCall {cml_target=tgt} = f tgt >>= \newTgt -> pure n{cml_target = newTgt}
-mapExpOpt f (CmmForeignCall tgt fs as succ ret_args updfr intrbl)
- = do
- newTgt <- mapForeignTargetOpt f tgt
- newAs <- traverse f as
- pure $ CmmForeignCall newTgt fs newAs succ ret_args updfr intrbl
=====================================
compiler/GHC/Cmm/Pipeline.hs
=====================================
@@ -137,12 +137,9 @@ cpsTop logger platform cfg dus proc =
dump Opt_D_dump_cmm_sp "Layout Stack" g
----------- Sink and inline assignments --------------------------------
- (g, dus) <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
- if cmmOptSink cfg
- then pure $ runUniqueDSM dus $ cmmSink cfg g
- else return (g, dus)
- dump Opt_D_dump_cmm_sink "Sink assignments" g
-
+ g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout]
+ condPass (cmmOptSink cfg) (cmmSink platform) g
+ Opt_D_dump_cmm_sink "Sink assignments"
------------- CAF analysis ----------------------------------------------
let cafEnv = {-# SCC "cafAnal" #-} cafAnal platform call_pps l g
=====================================
compiler/GHC/Cmm/Sink.hs
=====================================
@@ -20,8 +20,6 @@ import GHC.Platform.Regs
import GHC.Platform
import GHC.Types.Unique.FM
-import GHC.Types.Unique.DSM
-import GHC.Cmm.Config
import Data.List (partition)
import Data.Maybe
@@ -152,10 +150,9 @@ type Assignments = [Assignment]
-- y = e2
-- x = e1
-cmmSink :: CmmConfig -> CmmGraph -> UniqDSM CmmGraph
-cmmSink cfg graph = ofBlockList (g_entry graph) <$> sink mapEmpty blocks
+cmmSink :: Platform -> CmmGraph -> CmmGraph
+cmmSink platform graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
where
- platform = cmmPlatform cfg
liveness = cmmLocalLivenessL platform graph
getLive l = mapFindWithDefault emptyLRegSet l liveness
@@ -163,41 +160,11 @@ cmmSink cfg graph = ofBlockList (g_entry graph) <$> sink mapEmpty blocks
join_pts = findJoinPoints blocks
- sink :: LabelMap Assignments -> [CmmBlock] -> UniqDSM [CmmBlock]
- sink _ [] = pure []
- sink sunk (b:bs) = do
- -- Now sink and inline in this block
- (prepend, last_fold) <- runOpt cfg $ constantFoldNode last
-
- (middle', assigs) <- walk cfg (ann_middles ++ annotate platform live_middle prepend) (mapFindWithDefault [] lbl sunk)
-
- let (final_last, assigs') = tryToInline platform live last_fold assigs
- -- Now, drop any assignments that we will not sink any further.
- (dropped_last, assigs'') = dropAssignments platform drop_if init_live_sets assigs'
- drop_if :: (LocalReg, CmmExpr, AbsMem)
- -> [LRegSet] -> (Bool, [LRegSet])
- drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
- where
- should_drop = conflicts platform a final_last
- || not (isTrivial platform rhs) && live_in_multi live_sets r
- || r `elemLRegSet` live_in_joins
-
- live_sets' | should_drop = live_sets
- | otherwise = map upd live_sets
-
- upd set | r `elemLRegSet` set = set `unionLRegSet` live_rhs
- | otherwise = set
-
- live_rhs = foldRegsUsed platform (flip insertLRegSet) emptyLRegSet rhs
-
- final_middle = foldl' blockSnoc middle' dropped_last
-
- sunk' = mapUnion sunk $
- mapFromList [ (l, filterAssignments platform (getLive l) assigs'')
- | l <- succs ]
-
- (blockJoin first final_middle final_last :) <$> sink sunk' bs
-
+ sink :: LabelMap Assignments -> [CmmBlock] -> [CmmBlock]
+ sink _ [] = []
+ sink sunk (b:bs) =
+ -- pprTrace "sink" (ppr lbl) $
+ blockJoin first final_middle final_last : sink sunk' bs
where
lbl = entryLabel b
(first, middle, last) = blockSplit b
@@ -211,6 +178,11 @@ cmmSink cfg graph = ofBlockList (g_entry graph) <$> sink mapEmpty blocks
live_middle = gen_killL platform last live
ann_middles = annotate platform live_middle (blockToList middle)
+ -- Now sink and inline in this block
+ (middle', assigs) = walk platform ann_middles (mapFindWithDefault [] lbl sunk)
+ fold_last = constantFoldNode platform last
+ (final_last, assigs') = tryToInline platform live fold_last assigs
+
-- We cannot sink into join points (successors with more than
-- one predecessor), so identify the join points and the set
-- of registers live in them.
@@ -228,6 +200,31 @@ cmmSink cfg graph = ofBlockList (g_entry graph) <$> sink mapEmpty blocks
(_one:_two:_) -> True
_ -> False
+ -- Now, drop any assignments that we will not sink any further.
+ (dropped_last, assigs'') = dropAssignments platform drop_if init_live_sets assigs'
+
+ drop_if :: (LocalReg, CmmExpr, AbsMem)
+ -> [LRegSet] -> (Bool, [LRegSet])
+ drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets')
+ where
+ should_drop = conflicts platform a final_last
+ || not (isTrivial platform rhs) && live_in_multi live_sets r
+ || r `elemLRegSet` live_in_joins
+
+ live_sets' | should_drop = live_sets
+ | otherwise = map upd live_sets
+
+ upd set | r `elemLRegSet` set = set `unionLRegSet` live_rhs
+ | otherwise = set
+
+ live_rhs = foldRegsUsed platform (flip insertLRegSet) emptyLRegSet rhs
+
+ final_middle = foldl' blockSnoc middle' dropped_last
+
+ sunk' = mapUnion sunk $
+ mapFromList [ (l, filterAssignments platform (getLive l) assigs'')
+ | l <- succs ]
+
{- TODO: enable this later, when we have some good tests in place to
measure the effect and tune it.
@@ -302,7 +299,7 @@ filterAssignments platform live assigs = reverse (go assigs [])
-- * a list of assignments that will be placed *after* that block.
--
-walk :: CmmConfig
+walk :: Platform
-> [(LRegSet, CmmNode O O)] -- nodes of the block, annotated with
-- the set of registers live *after*
-- this node.
@@ -312,39 +309,36 @@ walk :: CmmConfig
-- Earlier assignments may refer
-- to later ones.
- -> UniqDSM ( Block CmmNode O O -- The new block
- , Assignments -- Assignments to sink further
- )
+ -> ( Block CmmNode O O -- The new block
+ , Assignments -- Assignments to sink further
+ )
-walk cfg nodes assigs = go nodes emptyBlock assigs
+walk platform nodes assigs = go nodes emptyBlock assigs
where
- platform = cmmPlatform cfg
- go [] block as = pure (block, as)
+ go [] block as = (block, as)
go ((live,node):ns) block as
-- discard nodes representing dead assignment
| shouldDiscard node live = go ns block as
- | otherwise = do
- (prepend, node1) <- runOpt cfg $ constantFoldNode node
- if not (null prepend)
- then go (annotate platform live (prepend ++ [node1]) ++ ns) block as
- else do
- let -- Inline assignments
- (node2, as1) = tryToInline platform live node1 as
- -- Drop any earlier assignments conflicting with node2
- (dropped, as') = dropAssignmentsSimple platform
- (\a -> conflicts platform a node2) as1
- -- Walk over the rest of the block. Includes dropped assignments
- block' = foldl' blockSnoc block dropped `blockSnoc` node2
-
- (prepend2, node3) <- runOpt cfg $ constantFoldNode node2
- if | not (null prepend2) -> go (annotate platform live (prepend2 ++ [node3]) ++ ns) block as
- -- sometimes only after simplification we can tell we can discard the node.
- -- See Note [Discard simplified nodes]
- | noOpAssignment node3 -> go ns block as
- -- Pick up interesting assignments
- | Just a <- shouldSink platform node3 -> go ns block (a : as1)
- -- Try inlining, drop assignments and move on
- | otherwise -> go ns block' as'
+ -- sometimes only after simplification we can tell we can discard the node.
+ -- See Note [Discard simplified nodes]
+ | noOpAssignment node2 = go ns block as
+ -- Pick up interesting assignments
+ | Just a <- shouldSink platform node2 = go ns block (a : as1)
+ -- Try inlining, drop assignments and move on
+ | otherwise = go ns block' as'
+ where
+ -- Simplify node
+ node1 = constantFoldNode platform node
+
+ -- Inline assignments
+ (node2, as1) = tryToInline platform live node1 as
+
+ -- Drop any earlier assignments conflicting with node2
+ (dropped, as') = dropAssignmentsSimple platform
+ (\a -> conflicts platform a node2) as1
+
+ -- Walk over the rest of the block. Includes dropped assignments
+ block' = foldl' blockSnoc block dropped `blockSnoc` node2
{- Note [Discard simplified nodes]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
=====================================
compiler/GHC/Driver/Config/Cmm.hs
=====================================
@@ -24,17 +24,5 @@ initCmmConfig dflags = CmmConfig
, cmmDoCmmSwitchPlans = not (backendHasNativeSwitch (backend dflags))
, cmmSplitProcPoints = not (backendSupportsUnsplitProcPoints (backend dflags))
|| not (platformTablesNextToCode platform)
- , cmmAllowMul2 = (ncg && x86ish) || llvm
- , cmmOptConstDivision = not llvm
}
where platform = targetPlatform dflags
- -- Copied from StgToCmm
- (ncg, llvm) = case backendPrimitiveImplementation (backend dflags) of
- GenericPrimitives -> (False, False)
- NcgPrimitives -> (True, False)
- LlvmPrimitives -> (False, True)
- JSPrimitives -> (False, False)
- x86ish = case platformArch platform of
- ArchX86 -> True
- ArchX86_64 -> True
- _ -> False
=====================================
compiler/GHC/StgToCmm/Prim.hs
=====================================
@@ -1571,28 +1571,28 @@ emitPrimOp cfg primop =
CastDoubleToWord64Op -> translateBitcasts (MO_FW_Bitcast W64)
CastWord64ToDoubleOp -> translateBitcasts (MO_WF_Bitcast W64)
- IntQuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ IntQuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_S_QuotRem (wordWidth platform))
else Right (genericIntQuotRemOp (wordWidth platform))
- Int8QuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ Int8QuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_S_QuotRem W8)
else Right (genericIntQuotRemOp W8)
- Int16QuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ Int16QuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_S_QuotRem W16)
else Right (genericIntQuotRemOp W16)
- Int32QuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ Int32QuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_S_QuotRem W32)
else Right (genericIntQuotRemOp W32)
- WordQuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ WordQuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_U_QuotRem (wordWidth platform))
else Right (genericWordQuotRemOp (wordWidth platform))
@@ -1601,18 +1601,18 @@ emitPrimOp cfg primop =
then Left (MO_U_QuotRem2 (wordWidth platform))
else Right (genericWordQuotRem2Op platform)
- Word8QuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ Word8QuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_U_QuotRem W8)
else Right (genericWordQuotRemOp W8)
- Word16QuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ Word16QuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_U_QuotRem W16)
else Right (genericWordQuotRemOp W16)
- Word32QuotRemOp -> opCallishHandledLater $
- if allowQuotRem
+ Word32QuotRemOp -> \args -> flip opCallishHandledLater args $
+ if allowQuotRem && not (quotRemCanBeOptimized args)
then Left (MO_U_QuotRem W32)
else Right (genericWordQuotRemOp W32)
@@ -1835,6 +1835,23 @@ emitPrimOp cfg primop =
pure $ map (CmmReg . CmmLocal) regs
alwaysExternal = \_ -> PrimopCmmEmit_External
+ -- Note [QuotRem optimization]
+ -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ -- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
+ -- (shift, .&.).
+ --
+ -- Currently we only support optimization (performed in GHC.Cmm.Opt) when the
+ -- constant is a power of 2. #9041 tracks the implementation of the general
+ -- optimization.
+ --
+ -- `quotRem` can be optimized in the same way. However as it returns two values,
+ -- it is implemented as a "callish" primop which is harder to match and
+ -- to transform later on. For simplicity, the current implementation detects cases
+ -- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
+ -- primop into two CMM quot and rem primops.
+ quotRemCanBeOptimized = \case
+ [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
+ _ -> False
allowQuotRem = stgToCmmAllowQuotRemInstr cfg
allowQuotRem2 = stgToCmmAllowQuotRem2 cfg
=====================================
testsuite/tests/numeric/should_run/T25653.hs
=====================================
@@ -0,0 +1,10 @@
+module Main (main) where
+
+import Data.Word (Word8)
+
+myMod :: Word8 -> Word8
+myMod i = mod i 7
+{-# NOINLINE myMod #-}
+
+main :: IO ()
+main = print $ myMod 5
=====================================
testsuite/tests/numeric/should_run/T25653.stdout
=====================================
@@ -0,0 +1 @@
+5
=====================================
testsuite/tests/numeric/should_run/all.T
=====================================
@@ -86,3 +86,4 @@ test('foundation', [when(js_arch(), run_timeout_multiplier(2)), js_fragile(24259
test('T24066', normal, compile_and_run, [''])
test('div01', normal, compile_and_run, [''])
test('T24245', normal, compile_and_run, [''])
+test('T25653', normal, compile_and_run, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/bf4f5ad31ddeb87baad10de47f84081648f808dc...163aa50aad7a24912c5cc0ade3ef80b108e99f1a
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/bf4f5ad31ddeb87baad10de47f84081648f808dc...163aa50aad7a24912c5cc0ade3ef80b108e99f1a
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20250118/4057e4f7/attachment-0001.html>
More information about the ghc-commits
mailing list