[commit: ghc] master: parser: Allow Lm (MODIFIER LETTER) category in identifiers (6b01d3c)

git at git.haskell.org git at git.haskell.org
Fri Jul 3 20:44:35 UTC 2015


Repository : ssh://git@git.haskell.org/ghc

On branch  : master
Link       : http://ghc.haskell.org/trac/ghc/changeset/6b01d3ce6c681428e7a9865af85685c2a76ba657/ghc

>---------------------------------------------------------------

commit 6b01d3ce6c681428e7a9865af85685c2a76ba657
Author: Thomas Miedema <thomasmiedema at gmail.com>
Date:   Fri Jul 3 22:37:18 2015 +0200

    parser: Allow Lm (MODIFIER LETTER) category in identifiers
    
    Easy fix in the parser to stop regressions, due to Unicode 7.0 changing
    the classification of some prior code points.
    
    Signed-off-by: Austin Seipp <austin at well-typed.com>
    
    Test Plan: `tests/parser/should_compile/T10196.hs`
    
    Reviewers: hvr, austin, bgamari
    
    Reviewed By: austin, bgamari
    
    Subscribers: thomie, bgamari
    
    Differential Revision: https://phabricator.haskell.org/D969
    
    GHC Trac Issues: #10196


>---------------------------------------------------------------

6b01d3ce6c681428e7a9865af85685c2a76ba657
 compiler/basicTypes/Lexeme.hs                         | 11 ++++++++++-
 compiler/parser/Lexer.x                               |  8 ++++++--
 compiler/utils/Util.hs                                |  5 +++++
 testsuite/tests/parser/should_compile/T10196.hs       | 13 +++++++++++++
 testsuite/tests/parser/should_compile/all.T           |  1 +
 testsuite/tests/parser/should_fail/T10196Fail1.hs     |  4 ++++
 testsuite/tests/parser/should_fail/T10196Fail1.stderr |  2 ++
 testsuite/tests/parser/should_fail/T10196Fail2.hs     |  4 ++++
 testsuite/tests/parser/should_fail/T10196Fail2.stderr |  2 ++
 testsuite/tests/parser/should_fail/T10196Fail3.hs     |  6 ++++++
 testsuite/tests/parser/should_fail/T10196Fail3.stderr |  2 ++
 testsuite/tests/parser/should_fail/all.T              |  3 +++
 12 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/compiler/basicTypes/Lexeme.hs b/compiler/basicTypes/Lexeme.hs
index c5bda4d..a240961 100644
--- a/compiler/basicTypes/Lexeme.hs
+++ b/compiler/basicTypes/Lexeme.hs
@@ -28,6 +28,7 @@ module Lexeme (
   ) where
 
 import FastString
+import Util ((<||>))
 
 import Data.Char
 import qualified Data.Set as Set
@@ -194,7 +195,8 @@ okConSymOcc str = all okSymChar str &&
 -- but not worrying about case or clashing with reserved words?
 okIdOcc :: String -> Bool
 okIdOcc str
-  = let hashes = dropWhile okIdChar str in
+  -- TODO. #10196. Only allow modifier letters in the suffix of an identifier.
+  = let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
     all (== '#') hashes   -- -XMagicHash allows a suffix of hashes
                           -- of course, `all` says "True" to an empty list
 
@@ -210,6 +212,13 @@ okIdChar c = case generalCategory c of
   OtherNumber     -> True
   _               -> c == '\'' || c == '_'
 
+-- | Is this character acceptable in the suffix of an identifier.
+-- See alexGetByte in Lexer.x
+okIdSuffixChar :: Char -> Bool
+okIdSuffixChar c = case generalCategory c of
+  ModifierLetter  -> True  -- See #10196
+  _               -> False
+
 -- | Is this character acceptable in a symbol (after the first char)?
 -- See alexGetByte in Lexer.x
 okSymChar :: Char -> Bool
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 5839a41..2e883fd 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -156,7 +156,10 @@ $graphic   = [$small $large $symbol $digit $special $unigraphic \"\']
 $binit     = 0-1
 $octit     = 0-7
 $hexit     = [$decdigit A-F a-f]
-$idchar    = [$small $large $digit \']
+
+$suffix    = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
+-- TODO #10196. Only allow modifier letters in the suffix of an identifier.
+$idchar    = [$small $large $digit $suffix \']
 
 $pragmachar = [$small $large $digit]
 
@@ -1842,6 +1845,7 @@ alexGetByte (AI loc s)
         symbol          = '\x04'
         space           = '\x05'
         other_graphic   = '\x06'
+        suffix          = '\x07'
 
         adj_c
           | c <= '\x06' = non_graphic
@@ -1858,7 +1862,7 @@ alexGetByte (AI loc s)
                   UppercaseLetter       -> upper
                   LowercaseLetter       -> lower
                   TitlecaseLetter       -> upper
-                  ModifierLetter        -> other_graphic
+                  ModifierLetter        -> suffix -- see #10196
                   OtherLetter           -> lower -- see #1103
                   NonSpacingMark        -> other_graphic
                   SpacingCombiningMark  -> other_graphic
diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
index 732f2b8f..96cd752 100644
--- a/compiler/utils/Util.hs
+++ b/compiler/utils/Util.hs
@@ -55,6 +55,7 @@ module Util (
         isEqual, eqListBy, eqMaybeBy,
         thenCmp, cmpList,
         removeSpaces,
+        (<&&>), (<||>),
 
         -- * Edit distance
         fuzzyMatch, fuzzyLookup,
@@ -665,6 +666,10 @@ removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
 (<&&>) = liftA2 (&&)
 infixr 3 <&&> -- same as (&&)
 
+(<||>) :: Applicative f => f Bool -> f Bool -> f Bool
+(<||>) = liftA2 (||)
+infixr 2 <||> -- same as (||)
+
 {-
 ************************************************************************
 *                                                                      *
diff --git a/testsuite/tests/parser/should_compile/T10196.hs b/testsuite/tests/parser/should_compile/T10196.hs
new file mode 100644
index 0000000..f809118
--- /dev/null
+++ b/testsuite/tests/parser/should_compile/T10196.hs
@@ -0,0 +1,13 @@
+module T10196 where
+
+data X = Xᵦ | Xᵤ | Xᵩ | Xᵢ | Xᵪ | Xᵣ
+
+f :: Int
+f =
+  let xᵦ = 1
+      xᵤ = xᵦ
+      xᵩ = xᵤ
+      xᵢ = xᵩ
+      xᵪ = xᵢ
+      xᵣ = xᵪ
+  in xᵣ
diff --git a/testsuite/tests/parser/should_compile/all.T b/testsuite/tests/parser/should_compile/all.T
index 68845c1..521b5a4 100644
--- a/testsuite/tests/parser/should_compile/all.T
+++ b/testsuite/tests/parser/should_compile/all.T
@@ -101,4 +101,5 @@ test('T5682', normal, compile, [''])
 test('T9723a', normal, compile, [''])
 test('T9723b', normal, compile, [''])
 test('T10188', normal, compile, [''])
+test('T10196', normal, compile, [''])
 test('T10582', expect_broken(10582), compile, [''])
diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.hs b/testsuite/tests/parser/should_fail/T10196Fail1.hs
new file mode 100644
index 0000000..2f1c8f3
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail1.hs
@@ -0,0 +1,4 @@
+module T10196Fail1 where
+
+-- Constructors are not allowed to start with a modifier letter.
+data Foo = ᵦfoo
diff --git a/testsuite/tests/parser/should_fail/T10196Fail1.stderr b/testsuite/tests/parser/should_fail/T10196Fail1.stderr
new file mode 100644
index 0000000..3c4a173
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail1.stderr
@@ -0,0 +1,2 @@
+
+T10196Fail1.hs:4:12: error: lexical error at character '\7526'
diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.hs b/testsuite/tests/parser/should_fail/T10196Fail2.hs
new file mode 100644
index 0000000..64b3cac
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail2.hs
@@ -0,0 +1,4 @@
+module T10196Fail2 where
+
+-- Variables are not allowed to start with a modifier letter.
+ᵦ = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail2.stderr b/testsuite/tests/parser/should_fail/T10196Fail2.stderr
new file mode 100644
index 0000000..abba8aa
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail2.stderr
@@ -0,0 +1,2 @@
+
+T10196Fail2.hs:4:1: error: lexical error at character '\7526'
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.hs b/testsuite/tests/parser/should_fail/T10196Fail3.hs
new file mode 100644
index 0000000..09b80dd
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail3.hs
@@ -0,0 +1,6 @@
+module T10196Fail3 where
+
+-- Modifier letters are not allowed in the middle of an identifier.
+-- And this should not be lexed as 2 separate identifiers either.
+xᵦx :: Int
+xᵦx = 1
diff --git a/testsuite/tests/parser/should_fail/T10196Fail3.stderr b/testsuite/tests/parser/should_fail/T10196Fail3.stderr
new file mode 100644
index 0000000..6403744
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/T10196Fail3.stderr
@@ -0,0 +1,2 @@
+
+T10196Fail3.hs:5:2: error: lexical error at character '/7526'
diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T
index affea92..33da721 100644
--- a/testsuite/tests/parser/should_fail/all.T
+++ b/testsuite/tests/parser/should_fail/all.T
@@ -88,3 +88,6 @@ test('T8431', compile_timeout_multiplier(0.05),
      compile_fail, ['-XAlternativeLayoutRule'])
 test('T8506', normal, compile_fail, [''])
 test('T9225', normal, compile_fail, [''])
+test('T10196Fail1', normal, compile_fail, [''])
+test('T10196Fail2', normal, compile_fail, [''])
+test('T10196Fail3', expect_broken(10196), compile_fail, [''])



More information about the ghc-commits mailing list