[Git][ghc/ghc][wip/lexical-negation] Implement -XLexicalNegation (GHC Proposal #229)

Vladislav Zavialov gitlab at gitlab.haskell.org
Mon Jun 29 19:57:03 UTC 2020



Vladislav Zavialov pushed to branch wip/lexical-negation at Glasgow Haskell Compiler / GHC


Commits:
fa01c77b by Vladislav Zavialov at 2020-06-29T22:56:49+03:00
Implement -XLexicalNegation (GHC Proposal #229)

This patch introduces a new extension, -XLexicalNegation, which detects
whether the minus sign stands for negation or subtraction using the
whitespace-based rules described in GHC Proposal #229.

- - - - -


16 changed files:

- compiler/GHC/Driver/Session.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- docs/users_guide/8.12.1-notes.rst
- + docs/users_guide/exts/lexical_negation.rst
- docs/users_guide/exts/negative_literals.rst
- docs/users_guide/exts/syntax.rst
- libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
- testsuite/tests/driver/T4437.hs
- + testsuite/tests/parser/should_compile/LexNegVsNegLit.hs
- + testsuite/tests/parser/should_compile/LexicalNegation.hs
- testsuite/tests/parser/should_compile/all.T
- + testsuite/tests/parser/should_run/LexNegLit.hs
- + testsuite/tests/parser/should_run/LexNegLit.stdout
- testsuite/tests/parser/should_run/all.T
- utils/haddock


Changes:

=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -3784,6 +3784,7 @@ xFlagsDeps = [
   flagSpec "JavaScriptFFI"                    LangExt.JavaScriptFFI,
   flagSpec "KindSignatures"                   LangExt.KindSignatures,
   flagSpec "LambdaCase"                       LangExt.LambdaCase,
+  flagSpec "LexicalNegation"                  LangExt.LexicalNegation,
   flagSpec "LiberalTypeSynonyms"              LangExt.LiberalTypeSynonyms,
   flagSpec "LinearTypes"                      LangExt.LinearTypes,
   flagSpec "MagicHash"                        LangExt.MagicHash,


=====================================
compiler/GHC/Parser.y
=====================================
@@ -93,7 +93,7 @@ import GHC.Builtin.Types ( unitTyCon, unitDataCon, tupleTyCon, tupleDataCon, nil
                            manyDataConTyCon)
 }
 
-%expect 232 -- shift/reduce conflicts
+%expect 234 -- shift/reduce conflicts
 
 {- Last updated: 08 June 2020
 
@@ -553,6 +553,7 @@ are the most common patterns, rewritten as regular expressions for clarity:
  '-'            { L _ ITminus }
  PREFIX_TILDE   { L _ ITtilde }
  PREFIX_BANG    { L _ ITbang }
+ PREFIX_MINUS   { L _ ITprefixminus }
  '*'            { L _ (ITstar _) }
  '-<'           { L _ (ITlarrowtail _) }            -- for arrow notation
  '>-'           { L _ (ITrarrowtail _) }            -- for arrow notation
@@ -703,10 +704,21 @@ litpkgname_segment :: { Located FastString }
         | CONID  { sL1 $1 $ getCONID $1 }
         | special_id { $1 }
 
+-- Parse a minus sign regardless of whether -XLexicalNegation is turned on or off.
+-- See Note [Minus tokens] in GHC.Parser.Lexer
+HYPHEN :: { [AddAnn] }
+      : '-'          { [mj AnnMinus $1 ] }
+      | PREFIX_MINUS { [mj AnnMinus $1 ] }
+      | VARSYM  {% if (getVARSYM $1 == fsLit "-")
+                   then return [mj AnnMinus $1]
+                   else do { addError (getLoc $1) $ text "Expected a hyphen"
+                           ; return [] } }
+
+
 litpkgname :: { Located FastString }
         : litpkgname_segment { $1 }
         -- a bit of a hack, means p - b is parsed same as p-b, enough for now.
-        | litpkgname_segment '-' litpkgname  { sLL $1 $> $ appendFS (unLoc $1) (consFS '-' (unLoc $3)) }
+        | litpkgname_segment HYPHEN litpkgname  { sLL $1 $> $ appendFS (unLoc $1) (consFS '-' (unLoc $3)) }
 
 mayberns :: { Maybe [LRenaming] }
         : {- empty -} { Nothing }
@@ -2738,12 +2750,12 @@ prag_e :: { Located ([AddAnn], HsPragE GhcPs) }
                                                   HsPragSCC noExtField
                                                     (getSCC_PRAGs $1)
                                                     (StringLiteral NoSourceText (getVARID $2))) }
-      | '{-# GENERATED' STRING INTEGER ':' INTEGER '-' INTEGER ':' INTEGER '#-}'
+      | '{-# GENERATED' STRING INTEGER ':' INTEGER HYPHEN INTEGER ':' INTEGER '#-}'
                                       { let getINT = fromInteger . il_value . getINTEGER in
                                         sLL $1 $> $ ([mo $1,mj AnnVal $2
                                               ,mj AnnVal $3,mj AnnColon $4
-                                              ,mj AnnVal $5,mj AnnMinus $6
-                                              ,mj AnnVal $7,mj AnnColon $8
+                                              ,mj AnnVal $5] ++ $6 ++
+                                              [mj AnnVal $7,mj AnnColon $8
                                               ,mj AnnVal $9,mc $10],
                                               HsPragTick noExtField
                                                 (getGENERATED_PRAGs $1)
@@ -2789,6 +2801,9 @@ aexp    :: { ECP }
         | PREFIX_BANG aexp      { ECP $
                                    runECP_PV $2 >>= \ $2 ->
                                    amms (mkHsBangPatPV (comb2 $1 $>) $2) [mj AnnBang $1] }
+        | PREFIX_MINUS aexp     { ECP $
+                                   runECP_PV $2 >>= \ $2 ->
+                                   amms (mkHsNegAppPV (comb2 $1 $>) $2) [mj AnnMinus $1] }
 
         | '\\' apat apats '->' exp
                    {  ECP $


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -505,19 +505,19 @@ $tab          { warnTab }
   0[bB] @numspc @binary                / { ifExtension BinaryLiteralsBit }   { tok_num positive 2 2 binary }
   0[oO] @numspc @octal                                                       { tok_num positive 2 2 octal }
   0[xX] @numspc @hexadecimal                                                 { tok_num positive 2 2 hexadecimal }
-  @negative @decimal                   / { ifExtension NegativeLiteralsBit } { tok_num negative 1 1 decimal }
-  @negative 0[bB] @numspc @binary      / { ifExtension NegativeLiteralsBit `alexAndPred`
+  @negative @decimal                   / { negLitPred }                      { tok_num negative 1 1 decimal }
+  @negative 0[bB] @numspc @binary      / { negLitPred `alexAndPred`
                                            ifExtension BinaryLiteralsBit }   { tok_num negative 3 3 binary }
-  @negative 0[oO] @numspc @octal       / { ifExtension NegativeLiteralsBit } { tok_num negative 3 3 octal }
-  @negative 0[xX] @numspc @hexadecimal / { ifExtension NegativeLiteralsBit } { tok_num negative 3 3 hexadecimal }
+  @negative 0[oO] @numspc @octal       / { negLitPred }                      { tok_num negative 3 3 octal }
+  @negative 0[xX] @numspc @hexadecimal / { negLitPred }                      { tok_num negative 3 3 hexadecimal }
 
   -- Normal rational literals (:: Fractional a => a, from Rational)
   @floating_point                                                            { tok_frac 0 tok_float }
-  @negative @floating_point            / { ifExtension NegativeLiteralsBit } { tok_frac 0 tok_float }
+  @negative @floating_point            / { negLitPred }                      { tok_frac 0 tok_float }
   0[xX] @numspc @hex_floating_point    / { ifExtension HexFloatLiteralsBit } { tok_frac 0 tok_hex_float }
   @negative 0[xX] @numspc @hex_floating_point
                                        / { ifExtension HexFloatLiteralsBit `alexAndPred`
-                                           ifExtension NegativeLiteralsBit } { tok_frac 0 tok_hex_float }
+                                           negLitPred }                      { tok_frac 0 tok_hex_float }
 }
 
 <0> {
@@ -771,7 +771,8 @@ data Token
   | ITrarrow            IsUnicodeSyntax
   | ITlolly             IsUnicodeSyntax
   | ITdarrow            IsUnicodeSyntax
-  | ITminus
+  | ITminus       -- See Note [Minus tokens]
+  | ITprefixminus -- See Note [Minus tokens]
   | ITbang     -- Prefix (!) only, e.g. f !x = rhs
   | ITtilde    -- Prefix (~) only, e.g. f ~x = rhs
   | ITat       -- Tight infix (@) only, e.g. f x at pat = rhs
@@ -871,6 +872,37 @@ instance Outputable Token where
   ppr x = text (show x)
 
 
+{- Note [Minus tokens]
+~~~~~~~~~~~~~~~~~~~~~~
+A minus sign can be used in prefix form (-x) and infix form (a - b).
+
+When LexicalNegation is on:
+  * ITprefixminus  represents the prefix form
+  * ITvarsym "-"   represents the infix form
+  * ITminus        is not used
+
+When LexicalNegation is off:
+  * ITminus        represents all forms
+  * ITprefixminus  is not used
+  * ITvarsym "-"   is not used
+-}
+
+{- Note [Why not LexicalNegationBit]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+One might wonder why we define NoLexicalNegationBit instead of
+LexicalNegationBit. The problem lies in the following line in reservedSymsFM:
+
+    ,("-", ITminus, NormalSyntax, xbit NoLexicalNegationBit)
+
+We want to generate ITminus only when LexicalNegation is off. How would one
+do it if we had LexicalNegationBit? I (int-index) tried to use bitwise
+complement:
+
+    ,("-", ITminus, NormalSyntax, complement (xbit LexicalNegationBit))
+
+This did not work, so I opted for NoLexicalNegationBit instead.
+-}
+
 
 -- the bitmap provided as the third component indicates whether the
 -- corresponding extension keyword is valid under the extension options
@@ -975,7 +1007,7 @@ reservedSymsFM = listToUFM $
        ,("<-",  ITlarrow NormalSyntax,      NormalSyntax,  0 )
        ,("->",  ITrarrow NormalSyntax,      NormalSyntax,  0 )
        ,("=>",  ITdarrow NormalSyntax,      NormalSyntax,  0 )
-       ,("-",   ITminus,                    NormalSyntax,  0 )
+       ,("-",   ITminus,                    NormalSyntax,  xbit NoLexicalNegationBit)
 
        ,("*",   ITstar NormalSyntax,        NormalSyntax,  xbit StarIsTypeBit)
 
@@ -1156,6 +1188,27 @@ afterOptionalSpace buf p
 atEOL :: AlexAccPred ExtsBitmap
 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 
+-- Check if we should parse a negative literal (e.g. -123) as a single token.
+negLitPred :: AlexAccPred ExtsBitmap
+negLitPred =
+    negative_literals `alexOrPred`
+    (lexical_negation `alexAndPred` prefix_minus)
+  where
+    negative_literals = ifExtension NegativeLiteralsBit
+
+    lexical_negation  =
+      -- See Note [Why not LexicalNegationBit]
+      alexNotPred (ifExtension NoLexicalNegationBit)
+
+    prefix_minus =
+      -- The condition for a prefix occurrence of an operator is:
+      --
+      --   not precededByClosingToken && followedByOpeningToken
+      --
+      -- but we don't check followedByOpeningToken here as it holds
+      -- simply because we immediately lex a literal after the minus.
+      alexNotPred precededByClosingToken
+
 ifExtension :: ExtBits -> AlexAccPred ExtsBitmap
 ifExtension extBits bits _ _ _ = extBits `xtest` bits
 
@@ -1483,6 +1536,9 @@ varsym_prefix = sym $ \exts s ->
      -> return ITdollar
      | ThQuotesBit `xtest` exts, s == fsLit "$$"
      -> return ITdollardollar
+     | s == fsLit "-"   -- Only when LexicalNegation is on, otherwise we get ITminus and
+                        -- don't hit this code path. See Note [Minus tokens]
+     -> return ITprefixminus
      | s == fsLit "!" -> return ITbang
      | s == fsLit "~" -> return ITtilde
      | otherwise -> return (ITvarsym s)
@@ -2500,6 +2556,7 @@ data ExtBits
   | GadtSyntaxBit
   | ImportQualifiedPostBit
   | LinearTypesBit
+  | NoLexicalNegationBit   -- See Note [Why not LexicalNegationBit]
 
   -- Flags that are updated once parsing starts
   | InRulePragBit
@@ -2588,12 +2645,14 @@ mkParserFlags' warningFlags extensionFlags homeUnitId
       .|. GadtSyntaxBit               `xoptBit` LangExt.GADTSyntax
       .|. ImportQualifiedPostBit      `xoptBit` LangExt.ImportQualifiedPost
       .|. LinearTypesBit              `xoptBit` LangExt.LinearTypes
+      .|. NoLexicalNegationBit     `xoptNotBit` LangExt.LexicalNegation -- See Note [Why not LexicalNegationBit]
     optBits =
           HaddockBit        `setBitIf` isHaddock
       .|. RawTokenStreamBit `setBitIf` rawTokStream
       .|. UsePosPragsBit    `setBitIf` usePosPrags
 
     xoptBit bit ext = bit `setBitIf` EnumSet.member ext extensionFlags
+    xoptNotBit bit ext = bit `setBitIf` not (EnumSet.member ext extensionFlags)
 
     setBitIf :: ExtBits -> Bool -> ExtsBitmap
     b `setBitIf` cond | cond      = xbit b


=====================================
docs/users_guide/8.12.1-notes.rst
=====================================
@@ -164,6 +164,16 @@ Language
 
   See :ref:`qualified-do-notation` for more details.
 
+* :extension:`LexicalNegation` is a new extension that detects whether the
+  minus sign stands for negation during lexical analysis by checking for the
+  surrounding whitespace: ::
+
+    a = x - y  -- subtraction
+    b = f -x   -- negation
+
+    f = (- x)  -- operator section
+    c = (-x)   -- negation
+
 Compiler
 ~~~~~~~~
 


=====================================
docs/users_guide/exts/lexical_negation.rst
=====================================
@@ -0,0 +1,57 @@
+.. _lexical-negation:
+
+Lexical negation
+----------------
+
+.. extension:: LexicalNegation
+    :shortdesc: Use whitespace to determine whether the minus sign stands for
+                negation or subtraction.
+
+    :since: 8.12.1
+
+    Detect if the minus sign stands for negation during lexical analysis by
+    checking for the surrounding whitespace.
+
+In Haskell 2010, the minus sign stands for negation when it has no left-hand
+side. Consider ``x = - 5`` and ``y = 2 - 5``. In ``x``, there's no expression
+between the ``=`` and ``-``, so the minus stands for negation, whereas in
+``y``, there's ``2`` to the left of the minus, therefore it stands for
+subtraction.
+
+This leads to certain syntactic anomalies:
+
+* ``(% x)`` is an operator section for any operator ``(%)`` except for ``(-)``.
+  ``(- x)`` is negated ``x`` rather than the right operator section of
+  subtraction. Consequently, it is impossible to write such a section, and
+  users are advised to write ``(subtract x)`` instead.
+
+* Negative numbers must be parenthesized when they appear in function argument
+  position. ``f (-5)`` is correct, whereas ``f -5`` is parsed as ``(-) f 5``.
+
+The latter issue is partly mitigated by :extension:`NegativeLiterals`. When it
+is enabled, ``-5`` is parsed as negative 5 regardless of context, so ``f
+-5`` works as expected. However, it only applies to literals, so ``f -x`` or
+``f -(a*2)`` are still parsed as subtraction.
+
+With :extension:`LexicalNegation`, both anomalies are resolved:
+
+* ``(% x)`` is an operator section for any operator ``(%)``, no exceptions, as
+  long as there's whitespace between ``%`` and ``x``.
+
+* In ``f -x``, the ``-x`` is parsed as the negation of ``x`` for any
+  syntactically atomic expression ``x`` (variable, literal, or parenthesized
+  expression).
+
+* The prefix ``-`` binds tighter than any infix operator. ``-a % b`` is parsed
+  as ``(-a) % b`` regardless of the fixity of ``%``.
+
+This means that ``(- x)`` is the right operator section of subtraction, whereas
+``(-x)`` is the negation of ``x``. Note that these expressions will often have
+different types (``(- x)`` might have type ``Int -> Int`` while ``(-x)`` will
+have type ``Int``), and so users mistaking one for the other will likely get a
+compile error.
+
+Under :extension:`LexicalNegation`, negated literals are desugared without
+``negate``. That is, ``-123`` stands for ``fromInteger (-123)`` rather than
+``negate (fromInteger 123)``. This makes :extension:`LexicalNegation` a valid
+replacement for :extension:`NegativeLiterals`.


=====================================
docs/users_guide/exts/negative_literals.rst
=====================================
@@ -27,5 +27,6 @@ as two tokens.
 One pitfall is that with :extension:`NegativeLiterals`, ``x-1`` will
 be parsed as ``x`` applied to the argument ``-1``, which is usually
 not what you want.  ``x - 1`` or even ``x- 1`` can be used instead
-for subtraction.
+for subtraction. To avoid this, consider using :extension:`LexicalNegation`
+instead.
 


=====================================
docs/users_guide/exts/syntax.rst
=====================================
@@ -25,3 +25,4 @@ Syntax
     block_arguments
     typed_holes
     arrows
+    lexical_negation


=====================================
libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
=====================================
@@ -146,6 +146,7 @@ data Extension
    | ImportQualifiedPost
    | CUSKs
    | StandaloneKindSignatures
+   | LexicalNegation
    deriving (Eq, Enum, Show, Generic, Bounded)
 -- 'Ord' and 'Bounded' are provided for GHC API users (see discussions
 -- in https://gitlab.haskell.org/ghc/ghc/merge_requests/2707 and


=====================================
testsuite/tests/driver/T4437.hs
=====================================
@@ -42,6 +42,7 @@ expectedGhcOnlyExtensions =
     , "AlternativeLayoutRuleTransitional"
     , "LinearTypes"
     , "QualifiedDo"
+    , "LexicalNegation"
     ]
 
 expectedCabalOnlyExtensions :: [String]


=====================================
testsuite/tests/parser/should_compile/LexNegVsNegLit.hs
=====================================
@@ -0,0 +1,17 @@
+{-# LANGUAGE NegativeLiterals, LexicalNegation #-}
+
+module LexNegVsNegLit where
+
+-- NegativeLiterals specifies that we parse x-1 as x (-1), even though it's
+-- considered a shortcoming.
+--
+-- LexicalNegation does not change that.
+--
+b :: Bool
+b = even-1  -- parsed as: even (-1)
+            -- so it is well-typed.
+            --
+            -- with LexicalNegation alone, we'd get (-) even 1,
+            -- but NegativeLiterals takes precedence here.
+
+-- See also: GHC Proposal #344


=====================================
testsuite/tests/parser/should_compile/LexicalNegation.hs
=====================================
@@ -0,0 +1,15 @@
+{-# LANGUAGE LexicalNegation #-}
+
+module LexicalNegation where
+
+x :: Int
+x = 42
+
+negx :: Int
+negx = f -x  where  f = (- 5)
+
+subx :: Int -> Int
+subx = (- x)
+
+assertion1 :: Bool
+assertion1 = (- x) -x  ==  -(2*x)


=====================================
testsuite/tests/parser/should_compile/all.T
=====================================
@@ -152,6 +152,8 @@ test('proposal-229a', normal, compile, [''])
 test('proposal-229b', normal, compile, [''])
 test('proposal-229d', normal, compile, [''])
 test('proposal-229e', normal, compile, [''])
+test('LexicalNegation', normal, compile, [''])
+test('LexNegVsNegLit', normal, compile, [''])
 
 # We omit 'profasm' because it fails with:
 # Cannot load -prof objects when GHC is built with -dynamic


=====================================
testsuite/tests/parser/should_run/LexNegLit.hs
=====================================
@@ -0,0 +1,26 @@
+{-# LANGUAGE LexicalNegation #-}
+
+data FreeNum
+  = FromInteger Integer
+  | FromRational Rational
+  | Negate FreeNum
+  | FreeNum `Subtract` FreeNum
+  deriving (Show)
+
+instance Num FreeNum where
+  fromInteger = FromInteger
+  negate = Negate
+  (-) = Subtract
+
+instance Fractional FreeNum where
+  fromRational = FromRational
+
+main = do
+  print (-123 :: FreeNum)
+  print (-1.5 :: FreeNum)
+  print (let x = 5 in -x :: FreeNum)
+  print (5-1 :: FreeNum)  -- unlike NegativeLiterals, we parse it as (5 - 1), not (5 (-1))
+  print (-0 :: FreeNum)
+  print (-0.0 :: FreeNum)
+  print (-0o10 :: FreeNum)
+  print (-0x10 :: FreeNum)


=====================================
testsuite/tests/parser/should_run/LexNegLit.stdout
=====================================
@@ -0,0 +1,8 @@
+FromInteger (-123)
+FromRational ((-3) % 2)
+Negate (FromInteger 5)
+FromInteger 5 `Subtract` FromInteger 1
+Negate (FromInteger 0)
+Negate (FromRational (0 % 1))
+FromInteger (-8)
+FromInteger (-16)


=====================================
testsuite/tests/parser/should_run/all.T
=====================================
@@ -18,3 +18,4 @@ test('CountParserDeps',
      [ only_ways(['normal']), extra_run_opts('"' + config.libdir + '"') ],
      compile_and_run,
      ['-package ghc'])
+test('LexNegLit', normal, compile_and_run, [''])


=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 54ed6ae2556dc787916e2d56ce0e99808af14e61
+Subproject commit 9bd65ee47a43529af2ad8e350fdd0c372bc5964c



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/fa01c77b51422885fbf6d114e23f2ab58bdb1dd5

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/fa01c77b51422885fbf6d114e23f2ab58bdb1dd5
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200629/fc784706/attachment-0001.html>


More information about the ghc-commits mailing list