[commit: ghc] master: Always use native-Haskell de/encoders for ASCII and latin1 (1319363)
git at git.haskell.org
git at git.haskell.org
Tue May 24 21:45:59 UTC 2016
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/1319363f7633c441bdb1f659616d71ecd700076d/ghc
>---------------------------------------------------------------
commit 1319363f7633c441bdb1f659616d71ecd700076d
Author: Thomas Miedema <thomasmiedema at gmail.com>
Date: Tue May 24 11:31:45 2016 +0200
Always use native-Haskell de/encoders for ASCII and latin1
This fixes test encoding005 on Windows (#10623).
Reviewed by: austin, bgamari
Differential Revision: https://phabricator.haskell.org/D2262
>---------------------------------------------------------------
1319363f7633c441bdb1f659616d71ecd700076d
libraries/base/GHC/IO/Encoding.hs | 34 +++++++++++++++++-------------
libraries/base/tests/IO/all.T | 3 +--
libraries/base/tests/IO/encoding005.hs | 16 +++++++-------
libraries/base/tests/IO/encoding005.stdout | 2 +-
4 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/libraries/base/GHC/IO/Encoding.hs b/libraries/base/GHC/IO/Encoding.hs
index 18b5432..578a420 100644
--- a/libraries/base/GHC/IO/Encoding.hs
+++ b/libraries/base/GHC/IO/Encoding.hs
@@ -245,8 +245,16 @@ mkTextEncoding' cfm enc =
"UTF32" -> return $ UTF32.mkUTF32 cfm
"UTF32LE" -> return $ UTF32.mkUTF32le cfm
"UTF32BE" -> return $ UTF32.mkUTF32be cfm
- -- ISO8859-1 we can handle ourselves as well
- "ISO88591" -> return $ Latin1.mkLatin1 cfm
+ -- On AIX, we want to avoid iconv, because it is either
+ -- a) totally broken, or b) non-reentrant, or c) actually works.
+ -- Detecting b) is difficult as you'd have to trigger the reentrancy
+ -- corruption.
+ -- Therefore, on AIX, we handle the popular ASCII and latin1 encodings
+ -- ourselves. For consistency, we do the same on other platforms.
+ -- We use `mkLatin1_checked` instead of `mkLatin1`, since the latter
+ -- completely ignores the CodingFailureMode (TEST=encoding005).
+ _ | isAscii -> return (Latin1.mkAscii cfm)
+ _ | isLatin1 -> return (Latin1.mkLatin1_checked cfm)
#if defined(mingw32_HOST_OS)
'C':'P':n | [(cp,"")] <- reads n -> return $ CodePage.mkCodePageEncoding cfm cp
_ -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
@@ -256,25 +264,21 @@ mkTextEncoding' cfm enc =
-- Unfortunately there is no good way to determine whether iconv is actually
-- functional without telling it to do something.
_ -> do res <- Iconv.mkIconvEncoding cfm enc
- let isAscii = any (== enc) ansiEncNames
case res of
Just e -> return e
- -- At this point we know that we can't count on iconv to work
- -- (see, for instance, Trac #10298). However, we still want to do
- -- what we can to work with what we have. For instance, ASCII is
- -- easy. We match on ASCII encodings directly using several
- -- possible aliases (specified by RFC 1345 & Co) and for this use
- -- the 'ascii' encoding
- Nothing
- | isAscii -> return (Latin1.mkAscii cfm)
- | otherwise ->
- unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
+ Nothing -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
+#endif
where
- ansiEncNames = -- ASCII aliases
+ isAscii = enc `elem` asciiEncNames
+ isLatin1 = enc `elem` latin1EncNames
+ asciiEncNames = -- ASCII aliases specified by RFC 1345 and RFC 3808.
[ "ANSI_X3.4-1968", "iso-ir-6", "ANSI_X3.4-1986", "ISO_646.irv:1991"
, "US-ASCII", "us", "IBM367", "cp367", "csASCII", "ASCII", "ISO646-US"
]
-#endif
+ latin1EncNames = -- latin1 aliases specified by RFC 1345 and RFC 3808.
+ [ "ISO_8859-1:1987", "iso-ir-100", "ISO_8859-1", "ISO-8859-1", "latin1",
+ "l1", "IBM819", "CP819", "csISOLatin1"
+ ]
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
diff --git a/libraries/base/tests/IO/all.T b/libraries/base/tests/IO/all.T
index d04f3c4..295553f 100644
--- a/libraries/base/tests/IO/all.T
+++ b/libraries/base/tests/IO/all.T
@@ -138,8 +138,7 @@ test('encoding001',
test('encoding002', normal, compile_and_run, [''])
test('encoding003', normal, compile_and_run, [''])
test('encoding004', normal, compile_and_run, [''])
-test('encoding005', when(opsys('mingw32'), expect_broken(10623)),
- compile_and_run, [''])
+test('encoding005', normal, compile_and_run, [''])
test('environment001',
[extra_clean(['environment001'])],
diff --git a/libraries/base/tests/IO/encoding005.hs b/libraries/base/tests/IO/encoding005.hs
index 99db84a..b4ee381 100644
--- a/libraries/base/tests/IO/encoding005.hs
+++ b/libraries/base/tests/IO/encoding005.hs
@@ -44,9 +44,9 @@ test_latin1 cfm enc = do
ErrorOnCodingFailure -> Nothing
IgnoreCodingFailure -> Just [0xfe,0xff,0xff,0xfe]
TransliterateCodingFailure -> Just [0xfe,0xff,0x3f,0x3f,0x3f,0xff,0xfe]
- -- N.B. The argument "LATIN1//TRANSLIT" to mkTextEncoding does not
- -- correspond to "LATIN1//TRANSLIT" in iconv! Instead GHC asks iconv
- -- to encode to "LATIN1" and uses its own "evil hack" to insert '?'
+ -- N.B. The argument "latin1//TRANSLIT" to mkTextEncoding does not
+ -- correspond to "latin1//TRANSLIT" in iconv! Instead GHC asks iconv
+ -- to encode to "latin1" and uses its own "evil hack" to insert '?'
-- (ASCII 0x3f) in place of failures. See GHC.IO.Encoding.recoverEncode.
--
-- U+0100 is LATIN CAPITAL LETTER A WITH MACRON, which iconv would
@@ -108,8 +108,8 @@ main = do
test_ascii TransliterateCodingFailure =<< mkTextEncoding "ASCII//TRANSLIT"
test_ascii RoundtripFailure =<< mkTextEncoding "ASCII//ROUNDTRIP"
- putStrLn "mkTextEncoding LATIN1 tests"
- test_latin1 ErrorOnCodingFailure =<< mkTextEncoding "LATIN1"
- test_latin1 IgnoreCodingFailure =<< mkTextEncoding "LATIN1//IGNORE"
- test_latin1 TransliterateCodingFailure =<< mkTextEncoding "LATIN1//TRANSLIT"
- test_latin1 RoundtripFailure =<< mkTextEncoding "LATIN1//ROUNDTRIP"
+ putStrLn "mkTextEncoding latin1 tests"
+ test_latin1 ErrorOnCodingFailure =<< mkTextEncoding "latin1"
+ test_latin1 IgnoreCodingFailure =<< mkTextEncoding "latin1//IGNORE"
+ test_latin1 TransliterateCodingFailure =<< mkTextEncoding "latin1//TRANSLIT"
+ test_latin1 RoundtripFailure =<< mkTextEncoding "latin1//ROUNDTRIP"
diff --git a/libraries/base/tests/IO/encoding005.stdout b/libraries/base/tests/IO/encoding005.stdout
index 664a193..e7995b1 100644
--- a/libraries/base/tests/IO/encoding005.stdout
+++ b/libraries/base/tests/IO/encoding005.stdout
@@ -2,4 +2,4 @@ char8 tests
Latin1.ascii tests
Latin1.latin1_checked tests
mkTextEncoding ASCII tests
-mkTextEncoding LATIN1 tests
+mkTextEncoding latin1 tests
More information about the ghc-commits
mailing list