[commit: ghc] wip/rae: Fix #7484, checking for good binder names in Convert. (d10ec68)
git at git.haskell.org
git at git.haskell.org
Tue Nov 4 02:10:11 UTC 2014
Repository : ssh://git@git.haskell.org/ghc
On branch : wip/rae
Link : http://ghc.haskell.org/trac/ghc/changeset/d10ec6831fca74e8e8f917ec32a89711f33e33db/ghc
>---------------------------------------------------------------
commit d10ec6831fca74e8e8f917ec32a89711f33e33db
Author: Richard Eisenberg <eir at cis.upenn.edu>
Date: Mon Nov 3 15:34:53 2014 -0500
Fix #7484, checking for good binder names in Convert.
This includes a somewhat pedantic check against the code in Lexer.x
to make sure that TH accepts the same set of names that the lexer
does. Doing this unearthed a latent bug dealing with unicode identifiers
in OccName.
>---------------------------------------------------------------
d10ec6831fca74e8e8f917ec32a89711f33e33db
compiler/basicTypes/OccName.lhs | 5 +-
compiler/hsSyn/Convert.lhs | 137 ++++++++++++++++++++++++++++++++++++++--
testsuite/tests/th/all.T | 2 +-
3 files changed, 135 insertions(+), 9 deletions(-)
diff --git a/compiler/basicTypes/OccName.lhs b/compiler/basicTypes/OccName.lhs
index 0010ad3..dc86991 100644
--- a/compiler/basicTypes/OccName.lhs
+++ b/compiler/basicTypes/OccName.lhs
@@ -905,7 +905,10 @@ isLexVarSym fs -- Infix identifiers e.g. "+"
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
startsVarSym c = isSymbolASCII c || (ord c > 0x7f && isSymbol c) -- Infix Ids
startsConSym c = c == ':' -- Infix data constructors
-startsVarId c = isLower c || c == '_' -- Ordinary Ids
+startsVarId c = c == '_' || case generalCategory c of -- Ordinary Ids
+ LowercaseLetter -> True
+ OtherLetter -> True -- See #1103
+ _ -> False
startsConId c = isUpper c || c == '(' -- Ordinary type constructors and data constructors
isSymbolASCII :: Char -> Bool
diff --git a/compiler/hsSyn/Convert.lhs b/compiler/hsSyn/Convert.lhs
index 6cff928..6f2b14d 100644
--- a/compiler/hsSyn/Convert.lhs
+++ b/compiler/hsSyn/Convert.lhs
@@ -45,6 +45,8 @@ import Data.Maybe( catMaybes )
import Language.Haskell.TH as TH hiding (sigP)
import Language.Haskell.TH.Syntax as TH
import GHC.Exts
+import Data.Char
+import qualified Data.Set as Set
-------------------------------------------------------------------
-- The external interface
@@ -1109,13 +1111,134 @@ cvtName ctxt_ns (TH.Name occ flavour)
occ_str = TH.occString occ
okOcc :: OccName.NameSpace -> String -> Bool
-okOcc _ [] = False
-okOcc ns str@(c:_)
- | OccName.isVarNameSpace ns = startsVarId c || startsVarSym c
- | OccName.isDataConNameSpace ns = startsConId c || startsConSym c || str == "[]"
- | otherwise = startsConId c || startsConSym c ||
- startsVarSym c || str == "[]" || str == "->"
- -- allow type operators like "+"
+okOcc ns str
+ | OccName.isVarNameSpace ns = okVarOcc str
+ | OccName.isDataConNameSpace ns = okConOcc str
+ | otherwise = okTcOcc str
+
+-- | Is this an acceptable variable name?
+okVarOcc :: String -> Bool
+okVarOcc str@(c:_)
+ | startsVarId c
+ = okVarIdOcc str
+ | startsVarSym c
+ = okVarSymOcc str
+okVarOcc _ = False
+
+-- | Is this an acceptable alphanumeric variable name, assuming it starts
+-- with an acceptable letter?
+okVarIdOcc :: String -> Bool
+okVarIdOcc str = all okIdChar str &&
+ not (str `Set.member` reservedIds)
+
+-- | Is this an acceptable symbolic variable name, assuming it starts
+-- with an acceptable character?
+okVarSymOcc :: String -> Bool
+okVarSymOcc str = all okSymChar str &&
+ not (str `Set.member` reservedOps) &&
+ not (isDashes str)
+
+-- | Is this an acceptable constructor name?
+okConOcc :: String -> Bool
+okConOcc str@(c:_)
+ | startsConId c
+ = okConIdOcc str
+ | startsConSym c
+ = okConSymOcc str
+ | str == "[]"
+ = True
+okConOcc _ = False
+
+-- | Is this an acceptable alphanumeric constructor name, assuming it
+-- starts with an acceptable letter?
+okConIdOcc :: String -> Bool
+okConIdOcc str = all okIdChar str ||
+ is_tuple_name1 str
+ where
+ -- check for tuple name, starting at the beginning
+ is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
+ is_tuple_name1 _ = False
+
+ -- check for tuple tail
+ is_tuple_name2 ")" = True
+ is_tuple_name2 (',' : rest) = is_tuple_name2 rest
+ is_tuple_name2 (ws : rest)
+ | isSpace ws = is_tuple_name2 rest
+ is_tuple_name2 _ = False
+
+-- | Is this an acceptable symbolic constructor name, assuming it
+-- starts with an acceptable character?
+okConSymOcc :: String -> Bool
+okConSymOcc ":" = True
+okConSymOcc str = all okSymChar str &&
+ not (str `Set.member` reservedOps)
+
+-- | Is this an acceptable type name?
+okTcOcc :: String -> Bool
+okTcOcc "[]" = True
+okTcOcc "->" = True
+okTcOcc "~" = True
+okTcOcc str@(c:_)
+ | startsConId c
+ = okConIdOcc str
+ | startsConSym c
+ = okConSymOcc str
+ | startsVarSym c
+ = okVarSymOcc str
+okTcOcc _ = False
+
+-- | Is this character acceptable in an identifier (after the first letter)?
+-- See alexGetByte in Lexer.x
+okIdChar :: Char -> Bool
+okIdChar c = case generalCategory c of
+ UppercaseLetter -> True
+ LowercaseLetter -> True
+ OtherLetter -> True
+ TitlecaseLetter -> True
+ DecimalNumber -> True
+ OtherNumber -> True
+ _ -> c == '\'' || c == '_' || c == '#'
+
+-- | Is this character acceptable in a symbol (after the first char)?
+-- See alexGetByte in Lexer.x
+okSymChar :: Char -> Bool
+okSymChar c
+ | c `elem` specialSymbols
+ = False
+ | c `elem` "_\"'"
+ = False
+ | otherwise
+ = case generalCategory c of
+ ConnectorPunctuation -> True
+ DashPunctuation -> True
+ OtherPunctuation -> True
+ MathSymbol -> True
+ CurrencySymbol -> True
+ ModifierSymbol -> True
+ OtherSymbol -> True
+ _ -> False
+
+-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
+reservedIds :: Set.Set String
+reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
+ , "do", "else", "foreign", "if", "import", "in"
+ , "infix", "infixl", "infixr", "instance", "let"
+ , "module", "newtype", "of", "then", "type", "where"
+ , "_" ]
+
+-- | All punctuation that cannot appear in symbols. See $special in Lexer.x.
+specialSymbols :: [Char]
+specialSymbols = "(),;[]`{}"
+
+-- | All reserved operators. Taken from section 2.4 of the 2010 Report.
+reservedOps :: Set.Set String
+reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
+ , "@", "~", "=>" ]
+
+-- | Does this string contain only dashes and has at least 2 of them?
+isDashes :: String -> Bool
+isDashes ('-' : '-' : rest) = all (== '-') rest
+isDashes _ = False
-- Determine the name space of a name in a type
--
diff --git a/testsuite/tests/th/all.T b/testsuite/tests/th/all.T
index 4f71fd2..4fd131a 100644
--- a/testsuite/tests/th/all.T
+++ b/testsuite/tests/th/all.T
@@ -337,4 +337,4 @@ test('T9084', normal, compile_fail, ['-v0'])
test('T9738', normal, compile, ['-v0'])
test('T9066', normal, compile, ['-v0'])
test('T9209', normal, compile_fail, ['-v0'])
-test('T7484', expect_broken(7484), compile_fail, ['-v0'])
+test('T7484', normal, compile_fail, ['-v0'])
More information about the ghc-commits
mailing list