[commit: ghc] master: Skip a possible BOM in utf8 encoding (f7fd864)

git at git.haskell.org git at git.haskell.org
Fri Sep 25 11:07:55 UTC 2015


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304/ghc

>---------------------------------------------------------------

commit f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304
Author: Joachim Breitner <mail at joachim-breitner.de>
Date:   Wed Sep 23 10:10:03 2015 +0200

    Skip a possible BOM in utf8 encoding
    
    and not the system locale, which might be something else. This fixes
    bug #10907. A test is added, but less useful than it could be until
    task #10909 is done.
    
    Differential Revision: D1274


>---------------------------------------------------------------

f7fd864ce6d41cf22d25f18a0cdc5e2e9db71304
 compiler/utils/StringBuffer.hs           | 10 +++++++---
 testsuite/tests/parser/unicode/T10907.hs |  1 +
 testsuite/tests/parser/unicode/all.T     |  3 +++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 2e339d8..6b39fc8 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -53,6 +53,8 @@ import Data.Maybe
 import Control.Exception
 import System.IO
 import System.IO.Unsafe         ( unsafePerformIO )
+import GHC.IO.Encoding.UTF8     ( mkUTF8 )
+import GHC.IO.Encoding.Failure  ( CodingFailureMode(IgnoreCodingFailure) )
 
 import GHC.Exts
 
@@ -131,14 +133,16 @@ skipBOM h size offset =
     then do
       -- Validate assumption that handle is in binary mode.
       ASSERTM( hGetEncoding h >>= return . isNothing )
-      -- Temporarily select text mode to make `hLookAhead` and
-      -- `hGetChar` return full Unicode characters.
-      bracket_ (hSetBinaryMode h False) (hSetBinaryMode h True) $ do
+      -- Temporarily select utf8 encoding with error ignoring,
+      -- to make `hLookAhead` and `hGetChar` return full Unicode characters.
+      bracket_ (hSetEncoding h safeEncoding) (hSetBinaryMode h True) $ do
         c <- hLookAhead h
         if c == '\xfeff'
           then hGetChar h >> hTell h
           else return offset
     else return offset
+  where
+    safeEncoding = mkUTF8 IgnoreCodingFailure
 
 newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
 newUTF8StringBuffer buf ptr size = do
diff --git a/testsuite/tests/parser/unicode/T10907.hs b/testsuite/tests/parser/unicode/T10907.hs
new file mode 100644
index 0000000..60aa3e7
--- /dev/null
+++ b/testsuite/tests/parser/unicode/T10907.hs
@@ -0,0 +1 @@
+module ByteOrderMark () where
diff --git a/testsuite/tests/parser/unicode/all.T b/testsuite/tests/parser/unicode/all.T
index ec08ae5..6972a0d 100644
--- a/testsuite/tests/parser/unicode/all.T
+++ b/testsuite/tests/parser/unicode/all.T
@@ -22,3 +22,6 @@ test('T2302', only_ways(['normal']), compile_fail, [''])
 test('T4373', normal, compile, [''])
 test('T6016', extra_clean(['T6016-twoBOMs']), compile_and_run, ['-package ghc'])
 test('T7671', normal, compile, [''])
+# TODO: This test ought to be run in a non-UTF8 locale, but this is not yet
+# supported by the test suite (see 10907)
+test('T10907', normal, compile, [''])



More information about the ghc-commits mailing list