[commit: ghc] master: Sort field labels before fingerprint hashing (ffcdd84)

git at git.haskell.org git at git.haskell.org
Tue Oct 27 15:18:02 UTC 2015


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/ffcdd84f86727c1621be0a0e522eb6a3a64e479f/ghc

>---------------------------------------------------------------

commit ffcdd84f86727c1621be0a0e522eb6a3a64e479f
Author: Bartosz Nitka <niteria at gmail.com>
Date:   Tue Oct 27 10:19:48 2015 -0500

    Sort field labels before fingerprint hashing
    
    `fsEnvElts :: FastStringEnv a -> [a]` returns a list of `[a]` in the order of
    `Unique`s which is arbitrary. In this case it gives a list of record fields in
    arbitrary order, from which we then extract the field labels to contribute to
    the record fingerprint. The arbitrary ordering of field labels introduces
    unnecessary nondeterminism in interface files as demonstrated by the test case.
    
    We sort `FastString` here. It's safe, because the only way that the `Unique`
    associated with the `FastString` is used in comparison is for equality. If the
    `Unique`s are different it fallbacks to comparing the actual `ByteString`.
    
    Reviewed By: ezyang, thomie, bgamari, austin
    
    Differential Revision: https://phabricator.haskell.org/D1373
    
    GHC Trac Issues: #4012


>---------------------------------------------------------------

ffcdd84f86727c1621be0a0e522eb6a3a64e479f
 compiler/basicTypes/Unique.hs                            | 16 ++++++++++++++++
 compiler/iface/MkIface.hs                                |  9 ++++++++-
 testsuite/tests/determinism/determ002/A.hs               | 10 ++++++++++
 testsuite/tests/determinism/determ002/Makefile           | 13 +++++++++++++
 testsuite/tests/determinism/determ002/all.T              |  4 ++++
 .../tests/determinism/determ002/determ002.stderr         |  0
 testsuite/tests/determinism/determ002/determ002.stdout   |  2 ++
 7 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/compiler/basicTypes/Unique.hs b/compiler/basicTypes/Unique.hs
index 5ce9c64..12629ff 100644
--- a/compiler/basicTypes/Unique.hs
+++ b/compiler/basicTypes/Unique.hs
@@ -174,6 +174,22 @@ use `deriving' because we want {\em precise} control of ordering
 (equality on @Uniques@ is v common).
 -}
 
+-- Note [Unique Determinism]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
+-- The order of allocated @Uniques@ is not stable across rebuilds.
+-- The main reason for that is that typechecking interface files pulls
+-- @Uniques@ from @UniqSupply@ and the interface file for the module being
+-- currently compiled can, but doesn't have to exist.
+--
+-- It gets more complicated if you take into account that the interface
+-- files are loaded lazily and that building multiple files at once has to
+-- work for any subset of interface files present. When you add parallelism
+-- this makes @Uniques@ hopelessly random.
+--
+-- As such, to get deterministic builds, the order of the allocated
+-- @Uniques@ should not affect the final result.
+-- see also wiki/DeterministicBuilds
+
 eqUnique, ltUnique, leUnique :: Unique -> Unique -> Bool
 eqUnique (MkUnique u1) (MkUnique u2) = u1 == u2
 ltUnique (MkUnique u1) (MkUnique u2) = u1 <  u2
diff --git a/compiler/iface/MkIface.hs b/compiler/iface/MkIface.hs
index 66a885b..d4b764b 100644
--- a/compiler/iface/MkIface.hs
+++ b/compiler/iface/MkIface.hs
@@ -1700,7 +1700,14 @@ tyConToIfaceDecl env tycon
     ifaceOverloaded flds = case fsEnvElts flds of
                              fl:_ -> flIsOverloaded fl
                              []   -> False
-    ifaceFields flds = map flLabel $ fsEnvElts flds
+    ifaceFields flds = sort $ map flLabel $ fsEnvElts flds
+                       -- We need to sort the labels because they come out
+                       -- of FastStringEnv in arbitrary order, because
+                       -- FastStringEnv is keyed on Uniques.
+                       -- Sorting FastString is ok here, because Uniques
+                       -- are only used for equality checks in the Ord
+                       -- instance for FastString.
+                       -- See Note [Unique Determinism] in Unique.
 
 toIfaceBang :: TidyEnv -> HsImplBang -> IfaceBang
 toIfaceBang _    HsLazy              = IfNoBang
diff --git a/testsuite/tests/determinism/determ002/A.hs b/testsuite/tests/determinism/determ002/A.hs
new file mode 100644
index 0000000..9a88f2c
--- /dev/null
+++ b/testsuite/tests/determinism/determ002/A.hs
@@ -0,0 +1,10 @@
+module A where
+
+-- This is a repro for the issue where to fingerprint a record, field labels
+-- were pulled from FastStringEnv in the order of Uniques which are known
+-- to have arbitrary order - see Note [Unique Determinism] in Unique.
+
+data B = C
+  { e :: ()
+  , d :: ()
+  }
diff --git a/testsuite/tests/determinism/determ002/Makefile b/testsuite/tests/determinism/determ002/Makefile
new file mode 100644
index 0000000..d94a1c2
--- /dev/null
+++ b/testsuite/tests/determinism/determ002/Makefile
@@ -0,0 +1,13 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+TEST_HC_OPTS_NO_RECOMP = $(filter-out -fforce-recomp,$(TEST_HC_OPTS))
+
+determ002:
+	$(RM) A.hi A.o
+	'$(TEST_HC)' $(TEST_HC_OPTS_NO_RECOMP) A.hs
+	$(CP) A.hi A.old.hi
+	$(RM) A.o
+	'$(TEST_HC)' $(TEST_HC_OPTS_NO_RECOMP) A.hs
+	diff A.hi A.old.hi
diff --git a/testsuite/tests/determinism/determ002/all.T b/testsuite/tests/determinism/determ002/all.T
new file mode 100644
index 0000000..9bfd05d
--- /dev/null
+++ b/testsuite/tests/determinism/determ002/all.T
@@ -0,0 +1,4 @@
+test('determ002',
+     extra_clean(['A.o', 'A.hi', 'A.old.hi']),
+     run_command,
+     ['$MAKE -s --no-print-directory determ002'])
diff --git a/libraries/base/tests/IO/misc001.stdout b/testsuite/tests/determinism/determ002/determ002.stderr
similarity index 100%
copy from libraries/base/tests/IO/misc001.stdout
copy to testsuite/tests/determinism/determ002/determ002.stderr
diff --git a/testsuite/tests/determinism/determ002/determ002.stdout b/testsuite/tests/determinism/determ002/determ002.stdout
new file mode 100644
index 0000000..60c2bc3
--- /dev/null
+++ b/testsuite/tests/determinism/determ002/determ002.stdout
@@ -0,0 +1,2 @@
+[1 of 1] Compiling A                ( A.hs, A.o )
+[1 of 1] Compiling A                ( A.hs, A.o )



More information about the ghc-commits mailing list