[commit: ghc] master: Unbreak Text.Read.Lex.lex on Unicode symbols (fce0465)

git at git.haskell.org git at git.haskell.org
Thu Nov 19 12:24:40 UTC 2015


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/fce04651f2389d59b3355c67d9e189c62969ac76/ghc

>---------------------------------------------------------------

commit fce04651f2389d59b3355c67d9e189c62969ac76
Author: M Farkas-Dyck <strake888 at gmail.com>
Date:   Thu Nov 19 12:19:30 2015 +0100

    Unbreak Text.Read.Lex.lex on Unicode symbols
    
    Reviewers: thomie, hvr, austin, bgamari
    
    Reviewed By: bgamari
    
    Subscribers: bgamari, thomie
    
    Differential Revision: https://phabricator.haskell.org/D1480
    
    GHC Trac Issues: #10444


>---------------------------------------------------------------

fce04651f2389d59b3355c67d9e189c62969ac76
 libraries/base/Text/Read/Lex.hs | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/libraries/base/Text/Read/Lex.hs b/libraries/base/Text/Read/Lex.hs
index d7d6547..608bf85 100644
--- a/libraries/base/Text/Read/Lex.hs
+++ b/libraries/base/Text/Read/Lex.hs
@@ -39,7 +39,8 @@ import GHC.Base
 import GHC.Char
 import GHC.Num( Num(..), Integer )
 import GHC.Show( Show(..) )
-import GHC.Unicode( isSpace, isAlpha, isAlphaNum )
+import GHC.Unicode
+  ( GeneralCategory(..), generalCategory, isSpace, isAlpha, isAlphaNum )
 import GHC.Real( Rational, (%), fromIntegral, Integral,
                  toInteger, (^), quot, even )
 import GHC.List
@@ -198,8 +199,10 @@ lexPunc :: ReadP Lexeme
 lexPunc =
   do c <- satisfy isPuncChar
      return (Punc [c])
- where
-  isPuncChar c = c `elem` ",;()[]{}`"
+
+-- | The @special@ character class as defined in the Haskell Report.
+isPuncChar :: Char -> Bool
+isPuncChar c = c `elem` ",;()[]{}`"
 
 -- ----------------------------------------------------------------------
 -- Symbols
@@ -212,7 +215,15 @@ lexSymbol =
       else
         return (Symbol s)
  where
-  isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
+  isSymbolChar c = not (isPuncChar c) && case generalCategory c of
+      MathSymbol              -> True
+      CurrencySymbol          -> True
+      ModifierSymbol          -> True
+      OtherSymbol             -> True
+      DashPunctuation         -> True
+      OtherPunctuation        -> not (c `elem` "'\"")
+      ConnectorPunctuation    -> c /= '_'
+      _                       -> False
   reserved_ops   = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
 
 -- ----------------------------------------------------------------------



More information about the ghc-commits mailing list