[commit: ghc] master: Improve numeric stability of numericEnumFrom for floating numbers (4ffaf4b)

git at git.haskell.org git at git.haskell.org
Wed May 16 19:41:01 UTC 2018


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/4ffaf4b67773af4c72d92bb8b6c87b1a7d34ac0f/ghc

>---------------------------------------------------------------

commit 4ffaf4b67773af4c72d92bb8b6c87b1a7d34ac0f
Author: HE, Tao <sighingnow at gmail.com>
Date:   Wed May 16 12:13:16 2018 -0400

    Improve numeric stability of numericEnumFrom for floating numbers
    
    Fixes #15081.
    
    Test Plan: cd libraries/base && make test TEST="enumNumeric"
    
    Reviewers: hvr, bgamari
    
    Reviewed By: bgamari
    
    Subscribers: dfeuer, simonpj, thomie, carter
    
    GHC Trac Issues: #15081
    
    Differential Revision: https://phabricator.haskell.org/D4650


>---------------------------------------------------------------

4ffaf4b67773af4c72d92bb8b6c87b1a7d34ac0f
 libraries/base/GHC/Real.hs              | 56 +++++++++++++++++++++++++++++++--
 libraries/base/tests/all.T              |  1 +
 libraries/base/tests/enumNumeric.hs     |  7 +++++
 libraries/base/tests/enumNumeric.stdout |  5 +++
 testsuite/tests/perf/should_run/all.T   |  3 +-
 5 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/libraries/base/GHC/Real.hs b/libraries/base/GHC/Real.hs
index 938dff6..7f2ecd0 100644
--- a/libraries/base/GHC/Real.hs
+++ b/libraries/base/GHC/Real.hs
@@ -216,10 +216,19 @@ class  (Real a, Fractional a) => RealFrac a  where
 -- These 'numeric' enumerations come straight from the Report
 
 numericEnumFrom         :: (Fractional a) => a -> [a]
-numericEnumFrom n       =  n `seq` (n : numericEnumFrom (n + 1))
+numericEnumFrom n       = go 0
+  where
+    -- See Note [Numeric Stability of Enumerating Floating Numbers]
+    go !k = let !n' = n + k
+             in n' : go (k + 1)
 
 numericEnumFromThen     :: (Fractional a) => a -> a -> [a]
-numericEnumFromThen n m = n `seq` m `seq` (n : numericEnumFromThen m (m+m-n))
+numericEnumFromThen n m = go 0
+  where
+    step = m - n
+    -- See Note [Numeric Stability of Enumerating Floating Numbers]
+    go !k = let !n' = n + k * step
+             in n' : go (k + 1)
 
 numericEnumFromTo       :: (Ord a, Fractional a) => a -> a -> [a]
 numericEnumFromTo n m   = takeWhile (<= m + 1/2) (numericEnumFrom n)
@@ -232,6 +241,49 @@ numericEnumFromThenTo e1 e2 e3
                                  predicate | e2 >= e1  = (<= e3 + mid)
                                            | otherwise = (>= e3 + mid)
 
+{- Note [Numeric Stability of Enumerating Floating Numbers]
+-----------------------------------------------------------
+When enumerate floating numbers, we could add the increment to the last number
+at every run (as what we did previously):
+
+    numericEnumFrom n =  n `seq` (n : numericEnumFrom (n + 1))
+
+This approach is concise and really fast, only needs an addition operation.
+However when a floating number is large enough, for `n`, `n` and `n+1` will
+have the same binary representation. For example (all number has type
+`Double`):
+
+    9007199254740990                 is: 0x433ffffffffffffe
+    9007199254740990 + 1             is: 0x433fffffffffffff
+    (9007199254740990 + 1) + 1       is: 0x4340000000000000
+    ((9007199254740990 + 1) + 1) + 1 is: 0x4340000000000000
+
+When we evaluate ([9007199254740990..9007199254740991] :: Double), we would
+never reach the condition in `numericEnumFromTo`
+
+    9007199254740990 + 1 + 1 + ... > 9007199254740991 + 1/2
+
+We would fall into infinite loop (as reported in Trac #15081).
+
+To remedy the situation, we record the number of `1` that needed to be added
+to the start number, rather than increasing `1` at every time. This approach
+can improvement the numeric stability greatly at the cost of a multiplication.
+
+Furthermore, we use the type of the enumerated number, `Fractional a => a`,
+as the type of multiplier. In rare situations, the multiplier could be very
+large and will lead to the enumeration to infinite loop, too, which should
+be very rare. Consider the following example:
+
+    [1..9007199254740994]
+
+We could fix that by using an Integer as multiplier but we don't do that.
+The benchmark on T7954.hs shows that this approach leads to significant
+degeneration on performance (33% increase allocation and 300% increase on
+elapsed time).
+
+See Trac #15081 and Phab:D4650 for the related discussion about this problem.
+-}
+
 --------------------------------------------------------------
 -- Instances for Int
 --------------------------------------------------------------
diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T
index d530e10..eb00fc3 100644
--- a/libraries/base/tests/all.T
+++ b/libraries/base/tests/all.T
@@ -2,6 +2,7 @@
 test('readFloat', exit_code(1), compile_and_run, [''])
 test('enumDouble', normal, compile_and_run, [''])
 test('enumRatio', normal, compile_and_run, [''])
+test('enumNumeric', normal, compile_and_run, [''])
 test('tempfiles', normal, compile_and_run, [''])
 test('fixed', normal, compile_and_run, [''])
 test('quotOverflow', normal, compile_and_run, [''])
diff --git a/libraries/base/tests/enumNumeric.hs b/libraries/base/tests/enumNumeric.hs
new file mode 100644
index 0000000..36c4846
--- /dev/null
+++ b/libraries/base/tests/enumNumeric.hs
@@ -0,0 +1,7 @@
+main :: IO ()
+main = do
+    print $ map (/2) ([5..6] :: [Double])
+    print $ ([9007199254740990..9007199254740991] :: [Double])
+    print $ map (/2) ([9007199254740990..9007199254740991] :: [Double])
+    print $ ([9007199254740989..9007199254740990] :: [Double])
+    print $ map (/2) ([9007199254740989..9007199254740990] :: [Double])
diff --git a/libraries/base/tests/enumNumeric.stdout b/libraries/base/tests/enumNumeric.stdout
new file mode 100644
index 0000000..3d7eb74
--- /dev/null
+++ b/libraries/base/tests/enumNumeric.stdout
@@ -0,0 +1,5 @@
+[2.5,3.0]
+[9.00719925474099e15,9.007199254740991e15,9.007199254740992e15,9.007199254740992e15]
+[4.503599627370495e15,4.5035996273704955e15,4.503599627370496e15,4.503599627370496e15]
+[9.007199254740989e15,9.00719925474099e15]
+[4.5035996273704945e15,4.503599627370495e15]
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 27405b0..f47d8e0 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -384,8 +384,9 @@ test('T7954',
                       [(wordsize(32), 920045264, 10),
               # some date:  1380051408    (64-bit Windows machine)
               # 2014-04-04:  920045264    (64-bit Windows machine)
-                       (wordsize(64), 1680051336, 10)]),
+                       (wordsize(64), 1280051632, 10)]),
               # 2014-02-10: 1680051336 (x86_64/Linux), call arity analysis
+              # 2018-05-03: 1280051632 (x86_64/Linux), refactor numericEnumFrom
       only_ways(['normal'])
       ],
      compile_and_run,



More information about the ghc-commits mailing list