[commit: ghc] wip/rae: Fix #7484, checking for good binder names in Convert. (d10ec68)

git at git.haskell.org git at git.haskell.org
Tue Nov 4 02:10:11 UTC 2014


Repository : ssh://git@git.haskell.org/ghc

On branch  : wip/rae
Link       : http://ghc.haskell.org/trac/ghc/changeset/d10ec6831fca74e8e8f917ec32a89711f33e33db/ghc

>---------------------------------------------------------------

commit d10ec6831fca74e8e8f917ec32a89711f33e33db
Author: Richard Eisenberg <eir at cis.upenn.edu>
Date:   Mon Nov 3 15:34:53 2014 -0500

    Fix #7484, checking for good binder names in Convert.
    
    This includes a somewhat pedantic check against the code in Lexer.x
    to make sure that TH accepts the same set of names that the lexer
    does. Doing this unearthed a latent bug dealing with unicode identifiers
    in OccName.


>---------------------------------------------------------------

d10ec6831fca74e8e8f917ec32a89711f33e33db
 compiler/basicTypes/OccName.lhs |   5 +-
 compiler/hsSyn/Convert.lhs      | 137 ++++++++++++++++++++++++++++++++++++++--
 testsuite/tests/th/all.T        |   2 +-
 3 files changed, 135 insertions(+), 9 deletions(-)

diff --git a/compiler/basicTypes/OccName.lhs b/compiler/basicTypes/OccName.lhs
index 0010ad3..dc86991 100644
--- a/compiler/basicTypes/OccName.lhs
+++ b/compiler/basicTypes/OccName.lhs
@@ -905,7 +905,10 @@ isLexVarSym fs                          -- Infix identifiers e.g. "+"
 startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
 startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c)  -- Infix Ids
 startsConSym c = c == ':'               -- Infix data constructors
-startsVarId c  = isLower c || c == '_'  -- Ordinary Ids
+startsVarId c  = c == '_' || case generalCategory c of  -- Ordinary Ids
+  LowercaseLetter -> True
+  OtherLetter     -> True   -- See #1103
+  _               -> False
 startsConId c  = isUpper c || c == '('  -- Ordinary type constructors and data constructors
 
 isSymbolASCII :: Char -> Bool
diff --git a/compiler/hsSyn/Convert.lhs b/compiler/hsSyn/Convert.lhs
index 6cff928..6f2b14d 100644
--- a/compiler/hsSyn/Convert.lhs
+++ b/compiler/hsSyn/Convert.lhs
@@ -45,6 +45,8 @@ import Data.Maybe( catMaybes )
 import Language.Haskell.TH as TH hiding (sigP)
 import Language.Haskell.TH.Syntax as TH
 import GHC.Exts
+import Data.Char
+import qualified Data.Set as Set
 
 -------------------------------------------------------------------
 --              The external interface
@@ -1109,13 +1111,134 @@ cvtName ctxt_ns (TH.Name occ flavour)
     occ_str = TH.occString occ
 
 okOcc :: OccName.NameSpace -> String -> Bool
-okOcc _  []      = False
-okOcc ns str@(c:_)
-  | OccName.isVarNameSpace ns     = startsVarId c || startsVarSym c
-  | OccName.isDataConNameSpace ns = startsConId c || startsConSym c || str == "[]"
-  | otherwise                     = startsConId c || startsConSym c ||
-                                    startsVarSym c || str == "[]" || str == "->"
-                                     -- allow type operators like "+"
+okOcc ns str
+  | OccName.isVarNameSpace ns     = okVarOcc str
+  | OccName.isDataConNameSpace ns = okConOcc str
+  | otherwise                     = okTcOcc  str
+
+-- | Is this an acceptable variable name?
+okVarOcc :: String -> Bool
+okVarOcc str@(c:_)
+  | startsVarId c
+  = okVarIdOcc str
+  | startsVarSym c
+  = okVarSymOcc str
+okVarOcc _ = False
+
+-- | Is this an acceptable alphanumeric variable name, assuming it starts
+-- with an acceptable letter?
+okVarIdOcc :: String -> Bool
+okVarIdOcc str = all okIdChar str &&
+                 not (str `Set.member` reservedIds)
+
+-- | Is this an acceptable symbolic variable name, assuming it starts
+-- with an acceptable character?
+okVarSymOcc :: String -> Bool
+okVarSymOcc str = all okSymChar str &&
+                  not (str `Set.member` reservedOps) &&
+                  not (isDashes str)
+
+-- | Is this an acceptable constructor name?
+okConOcc :: String -> Bool
+okConOcc str@(c:_)
+  | startsConId c
+  = okConIdOcc str
+  | startsConSym c
+  = okConSymOcc str
+  | str == "[]"
+  = True
+okConOcc _ = False
+
+-- | Is this an acceptable alphanumeric constructor name, assuming it
+-- starts with an acceptable letter?
+okConIdOcc :: String -> Bool
+okConIdOcc str = all okIdChar str ||
+                 is_tuple_name1 str
+  where
+    -- check for tuple name, starting at the beginning
+    is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
+    is_tuple_name1 _            = False
+
+    -- check for tuple tail
+    is_tuple_name2 ")"          = True
+    is_tuple_name2 (',' : rest) = is_tuple_name2 rest
+    is_tuple_name2 (ws  : rest)
+      | isSpace ws              = is_tuple_name2 rest
+    is_tuple_name2 _            = False
+
+-- | Is this an acceptable symbolic constructor name, assuming it
+-- starts with an acceptable character?
+okConSymOcc :: String -> Bool
+okConSymOcc ":" = True
+okConSymOcc str = all okSymChar str &&
+                  not (str `Set.member` reservedOps)
+
+-- | Is this an acceptable type name?
+okTcOcc :: String -> Bool
+okTcOcc "[]" = True
+okTcOcc "->" = True
+okTcOcc "~"  = True
+okTcOcc str@(c:_)
+  | startsConId c
+  = okConIdOcc str
+  | startsConSym c
+  = okConSymOcc str
+  | startsVarSym c
+  = okVarSymOcc str
+okTcOcc _ = False
+
+-- | Is this character acceptable in an identifier (after the first letter)?
+-- See alexGetByte in Lexer.x
+okIdChar :: Char -> Bool
+okIdChar c = case generalCategory c of
+  UppercaseLetter -> True
+  LowercaseLetter -> True
+  OtherLetter     -> True
+  TitlecaseLetter -> True
+  DecimalNumber   -> True
+  OtherNumber     -> True
+  _               -> c == '\'' || c == '_' || c == '#'
+
+-- | Is this character acceptable in a symbol (after the first char)?
+-- See alexGetByte in Lexer.x
+okSymChar :: Char -> Bool
+okSymChar c
+  | c `elem` specialSymbols
+  = False
+  | c `elem` "_\"'"
+  = False
+  | otherwise
+  = case generalCategory c of
+      ConnectorPunctuation -> True
+      DashPunctuation      -> True
+      OtherPunctuation     -> True
+      MathSymbol           -> True
+      CurrencySymbol       -> True
+      ModifierSymbol       -> True
+      OtherSymbol          -> True
+      _                    -> False
+    
+-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
+reservedIds :: Set.Set String
+reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
+                           , "do", "else", "foreign", "if", "import", "in"
+                           , "infix", "infixl", "infixr", "instance", "let"
+                           , "module", "newtype", "of", "then", "type", "where"
+                           , "_" ]
+
+-- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
+specialSymbols :: [Char]
+specialSymbols = "(),;[]`{}"
+
+-- | All reserved operators. Taken from section 2.4 of the 2010 Report.
+reservedOps :: Set.Set String
+reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
+                           , "@", "~", "=>" ]
+
+-- | Does this string contain only dashes and has at least 2 of them?
+isDashes :: String -> Bool
+isDashes ('-' : '-' : rest) = all (== '-') rest
+isDashes _                  = False
             
 -- Determine the name space of a name in a type
 --
diff --git a/testsuite/tests/th/all.T b/testsuite/tests/th/all.T
index 4f71fd2..4fd131a 100644
--- a/testsuite/tests/th/all.T
+++ b/testsuite/tests/th/all.T
@@ -337,4 +337,4 @@ test('T9084', normal, compile_fail, ['-v0'])
 test('T9738', normal, compile, ['-v0'])
 test('T9066', normal, compile, ['-v0'])
 test('T9209', normal, compile_fail, ['-v0'])
-test('T7484', expect_broken(7484), compile_fail, ['-v0'])
+test('T7484', normal, compile_fail, ['-v0'])



More information about the ghc-commits mailing list