[commit: ghc] master: Improve numeric stability of numericEnumFrom for floating numbers (4ffaf4b)
git at git.haskell.org
git at git.haskell.org
Wed May 16 19:41:01 UTC 2018
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/4ffaf4b67773af4c72d92bb8b6c87b1a7d34ac0f/ghc
>---------------------------------------------------------------
commit 4ffaf4b67773af4c72d92bb8b6c87b1a7d34ac0f
Author: HE, Tao <sighingnow at gmail.com>
Date: Wed May 16 12:13:16 2018 -0400
Improve numeric stability of numericEnumFrom for floating numbers
Fixes #15081.
Test Plan: cd libraries/base && make test TEST="enumNumeric"
Reviewers: hvr, bgamari
Reviewed By: bgamari
Subscribers: dfeuer, simonpj, thomie, carter
GHC Trac Issues: #15081
Differential Revision: https://phabricator.haskell.org/D4650
>---------------------------------------------------------------
4ffaf4b67773af4c72d92bb8b6c87b1a7d34ac0f
libraries/base/GHC/Real.hs | 56 +++++++++++++++++++++++++++++++--
libraries/base/tests/all.T | 1 +
libraries/base/tests/enumNumeric.hs | 7 +++++
libraries/base/tests/enumNumeric.stdout | 5 +++
testsuite/tests/perf/should_run/all.T | 3 +-
5 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/libraries/base/GHC/Real.hs b/libraries/base/GHC/Real.hs
index 938dff6..7f2ecd0 100644
--- a/libraries/base/GHC/Real.hs
+++ b/libraries/base/GHC/Real.hs
@@ -216,10 +216,19 @@ class (Real a, Fractional a) => RealFrac a where
-- These 'numeric' enumerations come straight from the Report
numericEnumFrom :: (Fractional a) => a -> [a]
-numericEnumFrom n = n `seq` (n : numericEnumFrom (n + 1))
+numericEnumFrom n = go 0
+ where
+ -- See Note [Numeric Stability of Enumerating Floating Numbers]
+ go !k = let !n' = n + k
+ in n' : go (k + 1)
numericEnumFromThen :: (Fractional a) => a -> a -> [a]
-numericEnumFromThen n m = n `seq` m `seq` (n : numericEnumFromThen m (m+m-n))
+numericEnumFromThen n m = go 0
+ where
+ step = m - n
+ -- See Note [Numeric Stability of Enumerating Floating Numbers]
+ go !k = let !n' = n + k * step
+ in n' : go (k + 1)
numericEnumFromTo :: (Ord a, Fractional a) => a -> a -> [a]
numericEnumFromTo n m = takeWhile (<= m + 1/2) (numericEnumFrom n)
@@ -232,6 +241,49 @@ numericEnumFromThenTo e1 e2 e3
predicate | e2 >= e1 = (<= e3 + mid)
| otherwise = (>= e3 + mid)
+{- Note [Numeric Stability of Enumerating Floating Numbers]
+-----------------------------------------------------------
+When enumerate floating numbers, we could add the increment to the last number
+at every run (as what we did previously):
+
+ numericEnumFrom n = n `seq` (n : numericEnumFrom (n + 1))
+
+This approach is concise and really fast, only needs an addition operation.
+However when a floating number is large enough, for `n`, `n` and `n+1` will
+have the same binary representation. For example (all number has type
+`Double`):
+
+ 9007199254740990 is: 0x433ffffffffffffe
+ 9007199254740990 + 1 is: 0x433fffffffffffff
+ (9007199254740990 + 1) + 1 is: 0x4340000000000000
+ ((9007199254740990 + 1) + 1) + 1 is: 0x4340000000000000
+
+When we evaluate ([9007199254740990..9007199254740991] :: Double), we would
+never reach the condition in `numericEnumFromTo`
+
+ 9007199254740990 + 1 + 1 + ... > 9007199254740991 + 1/2
+
+We would fall into infinite loop (as reported in Trac #15081).
+
+To remedy the situation, we record the number of `1` that needed to be added
+to the start number, rather than increasing `1` at every time. This approach
+can improvement the numeric stability greatly at the cost of a multiplication.
+
+Furthermore, we use the type of the enumerated number, `Fractional a => a`,
+as the type of multiplier. In rare situations, the multiplier could be very
+large and will lead to the enumeration to infinite loop, too, which should
+be very rare. Consider the following example:
+
+ [1..9007199254740994]
+
+We could fix that by using an Integer as multiplier but we don't do that.
+The benchmark on T7954.hs shows that this approach leads to significant
+degeneration on performance (33% increase allocation and 300% increase on
+elapsed time).
+
+See Trac #15081 and Phab:D4650 for the related discussion about this problem.
+-}
+
--------------------------------------------------------------
-- Instances for Int
--------------------------------------------------------------
diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T
index d530e10..eb00fc3 100644
--- a/libraries/base/tests/all.T
+++ b/libraries/base/tests/all.T
@@ -2,6 +2,7 @@
test('readFloat', exit_code(1), compile_and_run, [''])
test('enumDouble', normal, compile_and_run, [''])
test('enumRatio', normal, compile_and_run, [''])
+test('enumNumeric', normal, compile_and_run, [''])
test('tempfiles', normal, compile_and_run, [''])
test('fixed', normal, compile_and_run, [''])
test('quotOverflow', normal, compile_and_run, [''])
diff --git a/libraries/base/tests/enumNumeric.hs b/libraries/base/tests/enumNumeric.hs
new file mode 100644
index 0000000..36c4846
--- /dev/null
+++ b/libraries/base/tests/enumNumeric.hs
@@ -0,0 +1,7 @@
+main :: IO ()
+main = do
+ print $ map (/2) ([5..6] :: [Double])
+ print $ ([9007199254740990..9007199254740991] :: [Double])
+ print $ map (/2) ([9007199254740990..9007199254740991] :: [Double])
+ print $ ([9007199254740989..9007199254740990] :: [Double])
+ print $ map (/2) ([9007199254740989..9007199254740990] :: [Double])
diff --git a/libraries/base/tests/enumNumeric.stdout b/libraries/base/tests/enumNumeric.stdout
new file mode 100644
index 0000000..3d7eb74
--- /dev/null
+++ b/libraries/base/tests/enumNumeric.stdout
@@ -0,0 +1,5 @@
+[2.5,3.0]
+[9.00719925474099e15,9.007199254740991e15,9.007199254740992e15,9.007199254740992e15]
+[4.503599627370495e15,4.5035996273704955e15,4.503599627370496e15,4.503599627370496e15]
+[9.007199254740989e15,9.00719925474099e15]
+[4.5035996273704945e15,4.503599627370495e15]
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 27405b0..f47d8e0 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -384,8 +384,9 @@ test('T7954',
[(wordsize(32), 920045264, 10),
# some date: 1380051408 (64-bit Windows machine)
# 2014-04-04: 920045264 (64-bit Windows machine)
- (wordsize(64), 1680051336, 10)]),
+ (wordsize(64), 1280051632, 10)]),
# 2014-02-10: 1680051336 (x86_64/Linux), call arity analysis
+ # 2018-05-03: 1280051632 (x86_64/Linux), refactor numericEnumFrom
only_ways(['normal'])
],
compile_and_run,
More information about the ghc-commits
mailing list