[commit: ghc] master: Modifier letter in middle of identifier is ok (d738e66)

git at git.haskell.org git at git.haskell.org
Fri Feb 19 15:54:31 UTC 2016


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/d738e66450ec09f69211330df05e381bfe996c13/ghc

>---------------------------------------------------------------

commit d738e66450ec09f69211330df05e381bfe996c13
Author: Thomas Miedema <thomasmiedema at gmail.com>
Date:   Fri Feb 19 15:25:40 2016 +0100

    Modifier letter in middle of identifier is ok
    
    Refactoring only. Cleanup some loose ends from #10196.
    
    Initially the idea was to only allow modifier letters at the end of
    identifiers. Since we later decided to allow modifier letters also in
    the middle of identifiers (because not doing so would not fix the
    regression completely), the names `suffix` and `okIdSuffixChar` don't
    seem appropriate anymore.
    
    Remove TODO. Move test from should_fail to should_compile.


>---------------------------------------------------------------

d738e66450ec09f69211330df05e381bfe996c13
 compiler/basicTypes/Lexeme.hs                         | 14 +++-----------
 compiler/parser/Lexer.x                               |  9 ++++-----
 testsuite/tests/parser/should_compile/T10196.hs       |  5 +++++
 testsuite/tests/parser/should_fail/T10196Fail3.hs     |  6 ------
 testsuite/tests/parser/should_fail/T10196Fail3.stderr |  2 --
 testsuite/tests/parser/should_fail/all.T              |  1 -
 6 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs
index 4b1fe94..9e75376 100644
--- a/compiler/basicTypes/Lexeme.hs
+++ b/compiler/basicTypes/Lexeme.hs
@@ -28,7 +28,6 @@ module Lexeme (
   ) where
 
 import FastString
-import Util ((<||>))
 
 import Data.Char
 import qualified Data.Set as Set
@@ -183,8 +182,7 @@ okConSymOcc str = all okSymChar str &&
 -- but not worrying about case or clashing with reserved words?
 okIdOcc :: String -> Bool
 okIdOcc str
-  -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
-  = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
+  = let hashes = dropWhile okIdChar str in
     all (== '#') hashes   -- -XMagicHash allows a suffix of hashes
                           -- of course, `all` says "True" to an empty list
 
@@ -194,19 +192,13 @@ okIdChar :: Char -> Bool
 okIdChar c = case generalCategory c of
   UppercaseLetter -> True
   LowercaseLetter -> True
-  OtherLetter     -> True
   TitlecaseLetter -> True
+  ModifierLetter  -> True -- See #10196
+  OtherLetter     -> True
   DecimalNumber   -> True
   OtherNumber     -> True
   _               -> c == '\'' || c == '_'
 
--- | Is this character acceptable in the suffix of an identifier.
--- See alexGetByte in Lexer.x
-okIdSuffixChar :: Char -> Bool
-okIdSuffixChar c = case generalCategory c of
-  ModifierLetter  -> True  -- See #10196
-  _               -> False
-
 -- | Is this character acceptable in a symbol (after the first char)?
 -- See alexGetByte in Lexer.x
 okSymChar :: Char -> Bool
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 719d886..5f3bdee 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -155,9 +155,8 @@ $binit     = 0-1
 $octit     = 0-7
 $hexit     = [$decdigit A-F a-f]
 
-$suffix    = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
--- TODO #10196. Only allow modifier letters in the suffix of an identifier.
-$idchar    = [$small $large $digit $suffix \']
+$modifier  = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
+$idchar    = [$small $large $digit $modifier \']
 
 $pragmachar = [$small $large $digit]
 
@@ -1875,7 +1874,7 @@ alexGetByte (AI loc s)
         symbol          = '\x04'
         space           = '\x05'
         other_graphic   = '\x06'
-        suffix          = '\x07'
+        modifier        = '\x07'
 
         adj_c
           | c <= '\x06' = non_graphic
@@ -1892,7 +1891,7 @@ alexGetByte (AI loc s)
                   UppercaseLetter       -> upper
                   LowercaseLetter       -> lower
                   TitlecaseLetter       -> upper
-                  ModifierLetter        -> suffix -- see #10196
+                  ModifierLetter        -> modifier -- see #10196
                   OtherLetter           -> lower -- see #1103
                   NonSpacingMark        -> other_graphic
                   SpacingCombiningMark  -> other_graphic
diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs
index f809118..a29f0c3 100644
--- a/testsuite/tests/parser/should_compile/T10196.hs
+++ b/testsuite/tests/parser/should_compile/T10196.hs
@@ -11,3 +11,8 @@ f =
       xᵪ = xᵢ
       xᵣ = xᵪ
   in xᵣ
+
+-- Modifier letters are also allowed in the middle of an identifier.
+-- This should not be lexed as 2 separate identifiers.
+xᵦx :: Int
+xᵦx = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs
deleted file mode 100644
index 09b80dd..0000000
--- a/testsuite/tests/parser/should_fail/T10196Fail3.hs
+++ /dev/null
@@ -1,6 +0,0 @@
-module T10196Fail3 where
-
--- Modifier letters are not allowed in the middle of an identifier.
--- And this should not be lexed as 2 separate identifiers either.
-xᵦx :: Int
-xᵦx = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr
deleted file mode 100644
index 6403744..0000000
--- a/testsuite/tests/parser/should_fail/T10196Fail3.stderr
+++ /dev/null
@@ -1,2 +0,0 @@
-
-T10196Fail3.hs:5:2: error: lexical error at character '/7526'
diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T
index 21b523a..e6c6f41 100644
--- a/testsuite/tests/parser/should_fail/all.T
+++ b/testsuite/tests/parser/should_fail/all.T
@@ -91,6 +91,5 @@ test('T8506', normal, compile_fail, [''])
 test('T9225', normal, compile_fail, [''])
 test('T10196Fail1', normal, compile_fail, [''])
 test('T10196Fail2', normal, compile_fail, [''])
-test('T10196Fail3', expect_broken(10196), compile_fail, [''])
 test('T10498a', normal, compile_fail, [''])
 test('T10498b', normal, compile_fail, [''])



More information about the ghc-commits mailing list