[commit: ghc] master: Testsuite: recover from utf8 decoding errors (4a4bdda)

git at git.haskell.org git at git.haskell.org
Mon Jun 20 14:37:02 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/4a4bdda1e5564fa3cd27cb7d94eb36d415d4b574/ghc

>---------------------------------------------------------------

commit 4a4bdda1e5564fa3cd27cb7d94eb36d415d4b574
Author: Thomas Miedema <thomasmiedema at gmail.com>
Date:   Mon Jun 20 10:30:12 2016 +0200

    Testsuite: recover from utf8 decoding errors


>---------------------------------------------------------------

4a4bdda1e5564fa3cd27cb7d94eb36d415d4b574
 testsuite/driver/testlib.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index 1c20936..8fc1481 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -1498,7 +1498,7 @@ def interpreter_run( name, way, extra_hc_opts, compile_only, top_mod ):
 
 def split_file(in_fn, delimiter, out1_fn, out2_fn):
     # See Note [Universal newlines].
-    infile = io.open(in_fn, 'r', encoding='utf8', newline=None)
+    infile = io.open(in_fn, 'r', encoding='utf8', errors='replace', newline=None)
     out1 = io.open(out1_fn, 'w', encoding='utf8', newline='')
     out2 = io.open(out2_fn, 'w', encoding='utf8', newline='')
 
@@ -1569,7 +1569,7 @@ def read_no_crs(file):
     str = ''
     try:
         # See Note [Universal newlines].
-        h = io.open(file, 'r', encoding='utf8', newline=None)
+        h = io.open(file, 'r', encoding='utf8', errors='replace', newline=None)
         str = h.read()
         h.close
     except:
@@ -1602,6 +1602,12 @@ def write_file(file, str):
 #
 # This should work with both python2 and python3, and with both mingw*
 # as msys2 style Python.
+#
+# Do note that io.open returns unicode strings. So we have to specify
+# the expected encoding. But there is at least one file which is not
+# valid utf8 (decodingerror002.stdout). Solution: use errors='replace'.
+# Another solution would be to open files in binary mode always, and
+# operate on bytes.
 
 def check_hp_ok(name):
     opts = getTestOpts()



More information about the ghc-commits mailing list