[commit: ghc] master: Modifier letter in middle of identifier is ok (d738e66)
git at git.haskell.org
git at git.haskell.org
Fri Feb 19 15:54:31 UTC 2016
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/d738e66450ec09f69211330df05e381bfe996c13/ghc
>---------------------------------------------------------------
commit d738e66450ec09f69211330df05e381bfe996c13
Author: Thomas Miedema <thomasmiedema at gmail.com>
Date: Fri Feb 19 15:25:40 2016 +0100
Modifier letter in middle of identifier is ok
Refactoring only. Cleanup some loose ends from #10196.
Initially the idea was to only allow modifier letters at the end of
identifiers. Since we later decided to allow modifier letters also in
the middle of identifiers (because not doing so would not fix the
regression completely), the names `suffix` and `okIdSuffixChar` don't
seem appropriate anymore.
Remove TODO. Move test from should_fail to should_compile.
>---------------------------------------------------------------
d738e66450ec09f69211330df05e381bfe996c13
compiler/basicTypes/Lexeme.hs | 14 +++-----------
compiler/parser/Lexer.x | 9 ++++-----
testsuite/tests/parser/should_compile/T10196.hs | 5 +++++
testsuite/tests/parser/should_fail/T10196Fail3.hs | 6 ------
testsuite/tests/parser/should_fail/T10196Fail3.stderr | 2 --
testsuite/tests/parser/should_fail/all.T | 1 -
6 files changed, 12 insertions(+), 25 deletions(-)
diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs
index 4b1fe94..9e75376 100644
--- a/compiler/basicTypes/Lexeme.hs
+++ b/compiler/basicTypes/Lexeme.hs
@@ -28,7 +28,6 @@ module Lexeme (
) where
import FastString
-import Util ((<||>))
import Data.Char
import qualified Data.Set as Set
@@ -183,8 +182,7 @@ okConSymOcc str = all okSymChar str &&
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc str
- -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
- = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
+ = let hashes = dropWhile okIdChar str in
all (== '#') hashes -- -XMagicHash allows a suffix of hashes
-- of course, `all` says "True" to an empty list
@@ -194,19 +192,13 @@ okIdChar :: Char -> Bool
okIdChar c = case generalCategory c of
UppercaseLetter -> True
LowercaseLetter -> True
- OtherLetter -> True
TitlecaseLetter -> True
+ ModifierLetter -> True -- See #10196
+ OtherLetter -> True
DecimalNumber -> True
OtherNumber -> True
_ -> c == '\'' || c == '_'
--- | Is this character acceptable in the suffix of an identifier.
--- See alexGetByte in Lexer.x
-okIdSuffixChar :: Char -> Bool
-okIdSuffixChar c = case generalCategory c of
- ModifierLetter -> True -- See #10196
- _ -> False
-
-- | Is this character acceptable in a symbol (after the first char)?
-- See alexGetByte in Lexer.x
okSymChar :: Char -> Bool
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 719d886..5f3bdee 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -155,9 +155,8 @@ $binit = 0-1
$octit = 0-7
$hexit = [$decdigit A-F a-f]
-$suffix = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
--- TODO #10196. Only allow modifier letters in the suffix of an identifier.
-$idchar = [$small $large $digit $suffix \']
+$modifier = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
+$idchar = [$small $large $digit $modifier \']
$pragmachar = [$small $large $digit]
@@ -1875,7 +1874,7 @@ alexGetByte (AI loc s)
symbol = '\x04'
space = '\x05'
other_graphic = '\x06'
- suffix = '\x07'
+ modifier = '\x07'
adj_c
| c <= '\x06' = non_graphic
@@ -1892,7 +1891,7 @@ alexGetByte (AI loc s)
UppercaseLetter -> upper
LowercaseLetter -> lower
TitlecaseLetter -> upper
- ModifierLetter -> suffix -- see #10196
+ ModifierLetter -> modifier -- see #10196
OtherLetter -> lower -- see #1103
NonSpacingMark -> other_graphic
SpacingCombiningMark -> other_graphic
diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs
index f809118..a29f0c3 100644
--- a/testsuite/tests/parser/should_compile/T10196.hs
+++ b/testsuite/tests/parser/should_compile/T10196.hs
@@ -11,3 +11,8 @@ f =
xᵪ = xᵢ
xᵣ = xᵪ
in xᵣ
+
+-- Modifier letters are also allowed in the middle of an identifier.
+-- This should not be lexed as 2 separate identifiers.
+xᵦx :: Int
+xᵦx = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs
deleted file mode 100644
index 09b80dd..0000000
--- a/testsuite/tests/parser/should_fail/T10196Fail3.hs
+++ /dev/null
@@ -1,6 +0,0 @@
-module T10196Fail3 where
-
--- Modifier letters are not allowed in the middle of an identifier.
--- And this should not be lexed as 2 separate identifiers either.
-xᵦx :: Int
-xᵦx = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr
deleted file mode 100644
index 6403744..0000000
--- a/testsuite/tests/parser/should_fail/T10196Fail3.stderr
+++ /dev/null
@@ -1,2 +0,0 @@
-
-T10196Fail3.hs:5:2: error: lexical error at character '/7526'
diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T
index 21b523a..e6c6f41 100644
--- a/testsuite/tests/parser/should_fail/all.T
+++ b/testsuite/tests/parser/should_fail/all.T
@@ -91,6 +91,5 @@ test('T8506', normal, compile_fail, [''])
test('T9225', normal, compile_fail, [''])
test('T10196Fail1', normal, compile_fail, [''])
test('T10196Fail2', normal, compile_fail, [''])
-test('T10196Fail3', expect_broken(10196), compile_fail, [''])
test('T10498a', normal, compile_fail, [''])
test('T10498b', normal, compile_fail, [''])
More information about the ghc-commits
mailing list