[commit: ghc] master: parser: Allow Lm (MODIFIER LETTER) category in identifiers (6b01d3c)
git at git.haskell.org
git at git.haskell.org
Fri Jul 3 20:44:35 UTC 2015
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/6b01d3ce6c681428e7a9865af85685c2a76ba657/ghc
>---------------------------------------------------------------
commit 6b01d3ce6c681428e7a9865af85685c2a76ba657
Author: Thomas Miedema <thomasmiedema at gmail.com>
Date: Fri Jul 3 22:37:18 2015 +0200
parser: Allow Lm (MODIFIER LETTER) category in identifiers
Easy fix in the parser to stop regressions, due to Unicode 7.0 changing
the classification of some prior code points.
Signed-off-by: Austin Seipp <austin at well-typed.com>
Test Plan: `tests/parser/should_compile/T10196.hs`
Reviewers: hvr, austin, bgamari
Reviewed By: austin, bgamari
Subscribers: thomie, bgamari
Differential Revision: https://phabricator.haskell.org/D969
GHC Trac Issues: #10196
>---------------------------------------------------------------
6b01d3ce6c681428e7a9865af85685c2a76ba657
compiler/basicTypes/Lexeme.hs | 11 ++++++++++-
compiler/parser/Lexer.x | 8 ++++++--
compiler/utils/Util.hs | 5 +++++
testsuite/tests/parser/should_compile/T10196.hs | 13 +++++++++++++
testsuite/tests/parser/should_compile/all.T | 1 +
testsuite/tests/parser/should_fail/T10196Fail1.hs | 4 ++++
testsuite/tests/parser/should_fail/T10196Fail1.stderr | 2 ++
testsuite/tests/parser/should_fail/T10196Fail2.hs | 4 ++++
testsuite/tests/parser/should_fail/T10196Fail2.stderr | 2 ++
testsuite/tests/parser/should_fail/T10196Fail3.hs | 6 ++++++
testsuite/tests/parser/should_fail/T10196Fail3.stderr | 2 ++
testsuite/tests/parser/should_fail/all.T | 3 +++
12 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs
index c5bda4d..a240961 100644
--- a/compiler/basicTypes/Lexeme.hs
+++ b/compiler/basicTypes/Lexeme.hs
@@ -28,6 +28,7 @@ module Lexeme (
) where
import FastString
+import Util ((<||>))
import Data.Char
import qualified Data.Set as Set
@@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str &&
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
- = let hashes = dropWhile okIdChar str in
+ -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
+ = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
@@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of
OtherNumber -> True
_ -> c == '\'' || c == '_'
+-- | Is this character acceptable in the suffix of an identifier.
+-- See alexGetByte in Lexer.x
+okIdSuffixChar :: Char -> Bool
+okIdSuffixChar c = case generalCategory c of
+ ModifierLetter -> True -- See #10196
+ _ -> False
+
-- | Is this character acceptable in a symbol (after the first char)?
-- See alexGetByte in Lexer.x
okSymChar :: Char -> Bool
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 5839a41..2e883fd 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -156,7 +156,10 @@ $graphic = [$small $large $symbol $digit $special $unigraphic \"\']
$binit = 0-1
$octit = 0-7
$hexit = [$decdigit A-F a-f]
-$idchar = [$small $large $digit \']
+
+$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
+-- TODO #10196. Only allow modifier letters in the suffix of an identifier.
+$idchar = [$small $large $digit $suffix \']
$pragmachar = [$small $large $digit]
@@ -1842,6 +1845,7 @@ alexGetByte (AI loc s)
symbol = '\x04'
space = '\x05'
other_graphic = '\x06'
+ suffix = '\x07'
adj_c
| c <= '\x06' = non_graphic
@@ -1858,7 +1862,7 @@ alexGetByte (AI loc s)
UppercaseLetter -> upper
LowercaseLetter -> lower
TitlecaseLetter -> upper
- ModifierLetter -> other_graphic
+ ModifierLetter -> suffix -- see #10196
OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
index 732f2b8f..96cd752 100644
--- a/compiler/utils/Util.hs
+++ b/compiler/utils/Util.hs
@@ -55,6 +55,7 @@ module Util (
isEqual, eqListBy, eqMaybeBy,
thenCmp, cmpList,
removeSpaces,
+ (<&&>), (<||>),
-- * Edit distance
fuzzyMatch, fuzzyLookup,
@@ -665,6 +666,10 @@ removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
(<&&>) = liftA2 (&&)
infixr 3 <&&> -- same as (&&)
+(<||>) :: Applicative f => f Bool -> f Bool -> f Bool
+(<||>) = liftA2 (||)
+infixr 2 <||> -- same as (||)
+
{-
************************************************************************
* *
diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs
new file mode 100644
index 0000000..f809118
--- /dev/null
+++ b/testsuite/tests/parser/should_compile/T10196.hs
@@ -0,0 +1,13 @@
+module T10196 where
+
+data X = Xᵦ | Xᵤ | Xᵩ | Xᵢ | Xᵪ | Xᵣ
+
+f :: Int
+f =
+ let xᵦ = 1
+ xᵤ = xᵦ
+ xᵩ = xᵤ
+ xᵢ = xᵩ
+ xᵪ = xᵢ
+ xᵣ = xᵪ
+ in xᵣ
diff --git a/testsuite/tests/parser/should_compile/all.T b/testsuite/tests/parser/should_compile/all.T
index 68845c1..521b5a4 100644
--- a/testsuite/tests/parser/should_compile/all.T
+++ b/testsuite/tests/parser/should_compile/all.T
@@ -101,4 +101,5 @@ test('T5682', normal, compile, [''])
test('T9723a', normal, compile, [''])
test('T9723b', normal, compile, [''])
test('T10188', normal, compile, [''])
+test('T10196', normal, compile, [''])
test('T10582', expect_broken(10582), compile, [''])
diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.hs b/testsuite/tests/parser/should_fail/T10196Fail1.hs
new file mode 100644
index 0000000..2f1c8f3
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail1.hs
@@ -0,0 +1,4 @@
+module T10196Fail1 where
+
+-- Constructors are not allowed to start with a modifier letter.
+data Foo = ᵦfoo
diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.stderr b/testsuite/tests/parser/should_fail/T10196Fail1.stderr
new file mode 100644
index 0000000..3c4a173
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail1.stderr
@@ -0,0 +1,2 @@
+
+T10196Fail1.hs:4:12: error: lexical error at character '\7526'
diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.hs b/testsuite/tests/parser/should_fail/T10196Fail2.hs
new file mode 100644
index 0000000..64b3cac
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail2.hs
@@ -0,0 +1,4 @@
+module T10196Fail2 where
+
+-- Variables are not allowed to start with a modifier letter.
+ᵦ = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.stderr b/testsuite/tests/parser/should_fail/T10196Fail2.stderr
new file mode 100644
index 0000000..abba8aa
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail2.stderr
@@ -0,0 +1,2 @@
+
+T10196Fail2.hs:4:1: error: lexical error at character '\7526'
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs
new file mode 100644
index 0000000..09b80dd
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail3.hs
@@ -0,0 +1,6 @@
+module T10196Fail3 where
+
+-- Modifier letters are not allowed in the middle of an identifier.
+-- And this should not be lexed as 2 separate identifiers either.
+xᵦx :: Int
+xᵦx = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr
new file mode 100644
index 0000000..6403744
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail3.stderr
@@ -0,0 +1,2 @@
+
+T10196Fail3.hs:5:2: error: lexical error at character '/7526'
diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T
index affea92..33da721 100644
--- a/testsuite/tests/parser/should_fail/all.T
+++ b/testsuite/tests/parser/should_fail/all.T
@@ -88,3 +88,6 @@ test('T8431', compile_timeout_multiplier(0.05),
compile_fail, ['-XAlternativeLayoutRule'])
test('T8506', normal, compile_fail, [''])
test('T9225', normal, compile_fail, [''])
+test('T10196Fail1', normal, compile_fail, [''])
+test('T10196Fail2', normal, compile_fail, [''])
+test('T10196Fail3', expect_broken(10196), compile_fail, [''])
More information about the ghc-commits
mailing list