[commit: ghc] master: Unbreak Text.Read.Lex.lex on Unicode symbols (fce0465)
git at git.haskell.org
git at git.haskell.org
Thu Nov 19 12:24:40 UTC 2015
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/fce04651f2389d59b3355c67d9e189c62969ac76/ghc
>---------------------------------------------------------------
commit fce04651f2389d59b3355c67d9e189c62969ac76
Author: M Farkas-Dyck <strake888 at gmail.com>
Date: Thu Nov 19 12:19:30 2015 +0100
Unbreak Text.Read.Lex.lex on Unicode symbols
Reviewers: thomie, hvr, austin, bgamari
Reviewed By: bgamari
Subscribers: bgamari, thomie
Differential Revision: https://phabricator.haskell.org/D1480
GHC Trac Issues: #10444
>---------------------------------------------------------------
fce04651f2389d59b3355c67d9e189c62969ac76
libraries/base/Text/Read/Lex.hs | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/libraries/base/Text/Read/Lex.hs b/libraries/base/Text/Read/Lex.hs
index d7d6547..608bf85 100644
--- a/libraries/base/Text/Read/Lex.hs
+++ b/libraries/base/Text/Read/Lex.hs
@@ -39,7 +39,8 @@ import GHC.Base
import GHC.Char
import GHC.Num( Num(..), Integer )
import GHC.Show( Show(..) )
-import GHC.Unicode( isSpace, isAlpha, isAlphaNum )
+import GHC.Unicode
+ ( GeneralCategory(..), generalCategory, isSpace, isAlpha, isAlphaNum )
import GHC.Real( Rational, (%), fromIntegral, Integral,
toInteger, (^), quot, even )
import GHC.List
@@ -198,8 +199,10 @@ lexPunc :: ReadP Lexeme
lexPunc =
do c <- satisfy isPuncChar
return (Punc [c])
- where
- isPuncChar c = c `elem` ",;()[]{}`"
+
+-- | The @special@ character class as defined in the Haskell Report.
+isPuncChar :: Char -> Bool
+isPuncChar c = c `elem` ",;()[]{}`"
-- ----------------------------------------------------------------------
-- Symbols
@@ -212,7 +215,15 @@ lexSymbol =
else
return (Symbol s)
where
- isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
+ isSymbolChar c = not (isPuncChar c) && case generalCategory c of
+ MathSymbol -> True
+ CurrencySymbol -> True
+ ModifierSymbol -> True
+ OtherSymbol -> True
+ DashPunctuation -> True
+ OtherPunctuation -> not (c `elem` "'\"")
+ ConnectorPunctuation -> c /= '_'
+ _ -> False
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
-- ----------------------------------------------------------------------
More information about the ghc-commits
mailing list