[commit: packages/integer-gmp] wip/T8647: Add more notes regarding the MPZ# hack (2ba0992)

Mon Jan 13 11:04:06 UTC 2014

Repository : ssh://git@git.haskell.org/integer-gmp

On branch  : wip/T8647
Link       : http://ghc.haskell.org/trac/ghc/changeset/2ba09925d8a42258df349d01d7d69f3cfe0a5867/integer-gmp

>---------------------------------------------------------------

commit 2ba09925d8a42258df349d01d7d69f3cfe0a5867
Author: Herbert Valerio Riedel <hvr at gnu.org>
Date:   Mon Jan 13 12:02:47 2014 +0100

    Add more notes regarding the MPZ# hack
    
    NOTE: This commit is to be merged into [20d7bfdd/integer-gmp]
    
    Signed-off-by: Herbert Valerio Riedel <hvr at gnu.org>


>---------------------------------------------------------------

2ba09925d8a42258df349d01d7d69f3cfe0a5867
 GHC/Integer/GMP/Prim.hs |   37 +++++++++++++++++++++++++++----------
 GHC/Integer/Type.lhs    |   16 ++++++++++------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/GHC/Integer/GMP/Prim.hs b/GHC/Integer/GMP/Prim.hs
index ef59e15..3790345 100644
--- a/GHC/Integer/GMP/Prim.hs
+++ b/GHC/Integer/GMP/Prim.hs
@@ -81,22 +81,39 @@ import GHC.Types
 -- Double isn't available yet, and we shouldn't be using defaults anyway:
 default ()
 
--- | This is represents a @mpz_t at .
+-- | This is represents a @mpz_t@ value in a heap-saving way.
 --
 -- The first tuple element, @/s/@, encodes the sign of the integer
 -- @/i/@ (i.e. @signum /s/ == signum /i/@), and the number of /limbs/
 -- used to represent the magnitude. If @abs /s/ > 1@, the 'ByteArray#'
 -- contains @abs /s/@ limbs encoding the integer. Otherwise, if @abs
 -- /s/ < 2@, the single limb is stored in the 'Word#' element instead
--- (and the 'ByteArray#' element is undefined)
---
--- This representation allows to reduce temporary heap allocations of
--- 1-limb 'ByteArray#'s which fit into the 'S#'-constructor. See also
--- Trac #8647 for more information.
---
--- See also 'GHC.Integer.Type.mpzToInteger' function and the
--- @MP_INT_1LIMB_RETURN()@ macro in @gmp-wrappers.cmm@ for
--- implementation details.
+-- (and the 'ByteArray#' element is undefined and MUST NOT be accessed
+-- as it doesn't point to a proper 'ByteArray#' but rather to an
+-- unsafe-coerced 'Int' in order be polite to the GC -- see
+-- @DUMMY_BYTE_ARR@ in gmp-wrappers.cmm)
+--
+-- More specifically, the following encoding is used (where `⊥` means
+-- undefined/unused):
+--
+-- * (#  0#, ⊥, 0## #) -> value = 0
+-- * (#  1#, ⊥, w   #) -> value = w
+-- * (# -1#, ⊥, w   #) -> value = -w
+-- * (#  s#, d, 0## #) -> value = J# s d
+--
+-- This representation allows to avoid temporary heap allocations
+-- (-> Trac #8647) of 1-limb 'ByteArray#'s which fit into the
+-- 'S#'-constructor. Moreover, this allows to delays 1-limb
+-- 'ByteArray#' heap allocations, as such 1-limb `mpz_t`s can be
+-- optimistically allocated on the Cmm stack and returned as a @#word@
+-- in case the `mpz_t` wasn't grown beyond 1 limb by the GMP
+-- operation.
+--
+-- See also the 'GHC.Integer.Type.mpzToInteger' function which ought
+-- to be used for converting 'MPZ#'s to 'Integer's and the
+-- @MP_INT_1LIMB_RETURN()@ macro in @gmp-wrappers.cmm@ which
+-- constructs 'MPZ#' values in the first place for implementation
+-- details.
 type MPZ# = (# Int#, ByteArray#, Word# #)
 
 -- | Returns -1,0,1 according as first argument is less than, equal to, or greater than second argument.
diff --git a/GHC/Integer/Type.lhs b/GHC/Integer/Type.lhs
index 8e8d9f5..ab4fe9d 100644
--- a/GHC/Integer/Type.lhs
+++ b/GHC/Integer/Type.lhs
@@ -174,17 +174,21 @@ smartJ# (-1#) mb# | isTrue# (v <# 0#) = S# v
       v = negateInt# (indexIntArray# mb# 0#)
 smartJ# s# mb# = J# s# mb#
 
--- |Construct 'Integer' out of 3-tuple returned by GMP wrapper primops
+-- |Construct 'Integer' out of a 'MPZ#' as returned by GMP wrapper primops
 --
--- See definition of 'MPZ#' in "GHC.Integer.GMP.Prim" for more details.
+-- IMPORTANT: The 'ByteArray#' element MUST NOT be accessed unless the
+-- size-element indicates more than one limb!
+--
+-- See notes at definition site of 'MPZ#' in "GHC.Integer.GMP.Prim"
+-- for more details.
 mpzToInteger :: MPZ# -> Integer
 mpzToInteger (# 0#, _, _ #) = S# 0#
 mpzToInteger (# 1#, _, w# #) | isTrue# (v# >=# 0#) = S# v#
-                              | True = case word2Integer# w# of (# _, d #) -> J# 1# d
+                             | True = case word2Integer# w# of (# _, d #) -> J# 1# d
     where
       v# = word2Int# w#
 mpzToInteger (# -1#, _, w# #) | isTrue# (v# <=# 0#) = S# v#
-                               | True = case word2Integer# w# of (# _, d #) -> J# -1# d
+                              | True = case word2Integer# w# of (# _, d #) -> J# -1# d
     where
       v# = negateInt# (word2Int# w#)
 mpzToInteger (# s#, mb#, _ #) = J# s# mb#
@@ -193,8 +197,8 @@ mpzToInteger (# s#, mb#, _ #) = J# s# mb#
 mpzToInteger2 :: (# MPZ#, MPZ# #) -> (# Integer, Integer #)
 mpzToInteger2 (# mpz1, mpz2 #) = (# i1, i2 #)
     where
-      !i1 = mpzToInteger mpz1
-      !i2 = mpzToInteger mpz2
+      !i1 = mpzToInteger mpz1 -- This use of `!` avoids creating thunks,
+      !i2 = mpzToInteger mpz2 -- see also Note [Use S# if possible].
 
 -- |Negate MPZ#
 mpzNeg :: MPZ# -> MPZ#