[Git][ghc/ghc][wip/lexical-negation] Implement -XLexicalNegation (GHC Proposal #229)
Vladislav Zavialov
gitlab at gitlab.haskell.org
Mon Jun 29 19:57:03 UTC 2020
Vladislav Zavialov pushed to branch wip/lexical-negation at Glasgow Haskell Compiler / GHC
Commits:
fa01c77b by Vladislav Zavialov at 2020-06-29T22:56:49+03:00
Implement -XLexicalNegation (GHC Proposal #229)
This patch introduces a new extension, -XLexicalNegation, which detects
whether the minus sign stands for negation or subtraction using the
whitespace-based rules described in GHC Proposal #229.
- - - - -
16 changed files:
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Parser.y
- compiler/GHC/Parser/Lexer.x
- docs/users_guide/8.12.1-notes.rst
- + docs/users_guide/exts/lexical_negation.rst
- docs/users_guide/exts/negative_literals.rst
- docs/users_guide/exts/syntax.rst
- libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
- testsuite/tests/driver/T4437.hs
- + testsuite/tests/parser/should_compile/LexNegVsNegLit.hs
- + testsuite/tests/parser/should_compile/LexicalNegation.hs
- testsuite/tests/parser/should_compile/all.T
- + testsuite/tests/parser/should_run/LexNegLit.hs
- + testsuite/tests/parser/should_run/LexNegLit.stdout
- testsuite/tests/parser/should_run/all.T
- utils/haddock
Changes:
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -3784,6 +3784,7 @@ xFlagsDeps = [
flagSpec "JavaScriptFFI" LangExt.JavaScriptFFI,
flagSpec "KindSignatures" LangExt.KindSignatures,
flagSpec "LambdaCase" LangExt.LambdaCase,
+ flagSpec "LexicalNegation" LangExt.LexicalNegation,
flagSpec "LiberalTypeSynonyms" LangExt.LiberalTypeSynonyms,
flagSpec "LinearTypes" LangExt.LinearTypes,
flagSpec "MagicHash" LangExt.MagicHash,
=====================================
compiler/GHC/Parser.y
=====================================
@@ -93,7 +93,7 @@ import GHC.Builtin.Types ( unitTyCon, unitDataCon, tupleTyCon, tupleDataCon, nil
manyDataConTyCon)
}
-%expect 232 -- shift/reduce conflicts
+%expect 234 -- shift/reduce conflicts
{- Last updated: 08 June 2020
@@ -553,6 +553,7 @@ are the most common patterns, rewritten as regular expressions for clarity:
'-' { L _ ITminus }
PREFIX_TILDE { L _ ITtilde }
PREFIX_BANG { L _ ITbang }
+ PREFIX_MINUS { L _ ITprefixminus }
'*' { L _ (ITstar _) }
'-<' { L _ (ITlarrowtail _) } -- for arrow notation
'>-' { L _ (ITrarrowtail _) } -- for arrow notation
@@ -703,10 +704,21 @@ litpkgname_segment :: { Located FastString }
| CONID { sL1 $1 $ getCONID $1 }
| special_id { $1 }
+-- Parse a minus sign regardless of whether -XLexicalNegation is turned on or off.
+-- See Note [Minus tokens] in GHC.Parser.Lexer
+HYPHEN :: { [AddAnn] }
+ : '-' { [mj AnnMinus $1 ] }
+ | PREFIX_MINUS { [mj AnnMinus $1 ] }
+ | VARSYM {% if (getVARSYM $1 == fsLit "-")
+ then return [mj AnnMinus $1]
+ else do { addError (getLoc $1) $ text "Expected a hyphen"
+ ; return [] } }
+
+
litpkgname :: { Located FastString }
: litpkgname_segment { $1 }
-- a bit of a hack, means p - b is parsed same as p-b, enough for now.
- | litpkgname_segment '-' litpkgname { sLL $1 $> $ appendFS (unLoc $1) (consFS '-' (unLoc $3)) }
+ | litpkgname_segment HYPHEN litpkgname { sLL $1 $> $ appendFS (unLoc $1) (consFS '-' (unLoc $3)) }
mayberns :: { Maybe [LRenaming] }
: {- empty -} { Nothing }
@@ -2738,12 +2750,12 @@ prag_e :: { Located ([AddAnn], HsPragE GhcPs) }
HsPragSCC noExtField
(getSCC_PRAGs $1)
(StringLiteral NoSourceText (getVARID $2))) }
- | '{-# GENERATED' STRING INTEGER ':' INTEGER '-' INTEGER ':' INTEGER '#-}'
+ | '{-# GENERATED' STRING INTEGER ':' INTEGER HYPHEN INTEGER ':' INTEGER '#-}'
{ let getINT = fromInteger . il_value . getINTEGER in
sLL $1 $> $ ([mo $1,mj AnnVal $2
,mj AnnVal $3,mj AnnColon $4
- ,mj AnnVal $5,mj AnnMinus $6
- ,mj AnnVal $7,mj AnnColon $8
+ ,mj AnnVal $5] ++ $6 ++
+ [mj AnnVal $7,mj AnnColon $8
,mj AnnVal $9,mc $10],
HsPragTick noExtField
(getGENERATED_PRAGs $1)
@@ -2789,6 +2801,9 @@ aexp :: { ECP }
| PREFIX_BANG aexp { ECP $
runECP_PV $2 >>= \ $2 ->
amms (mkHsBangPatPV (comb2 $1 $>) $2) [mj AnnBang $1] }
+ | PREFIX_MINUS aexp { ECP $
+ runECP_PV $2 >>= \ $2 ->
+ amms (mkHsNegAppPV (comb2 $1 $>) $2) [mj AnnMinus $1] }
| '\\' apat apats '->' exp
{ ECP $
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -505,19 +505,19 @@ $tab { warnTab }
0[bB] @numspc @binary / { ifExtension BinaryLiteralsBit } { tok_num positive 2 2 binary }
0[oO] @numspc @octal { tok_num positive 2 2 octal }
0[xX] @numspc @hexadecimal { tok_num positive 2 2 hexadecimal }
- @negative @decimal / { ifExtension NegativeLiteralsBit } { tok_num negative 1 1 decimal }
- @negative 0[bB] @numspc @binary / { ifExtension NegativeLiteralsBit `alexAndPred`
+ @negative @decimal / { negLitPred } { tok_num negative 1 1 decimal }
+ @negative 0[bB] @numspc @binary / { negLitPred `alexAndPred`
ifExtension BinaryLiteralsBit } { tok_num negative 3 3 binary }
- @negative 0[oO] @numspc @octal / { ifExtension NegativeLiteralsBit } { tok_num negative 3 3 octal }
- @negative 0[xX] @numspc @hexadecimal / { ifExtension NegativeLiteralsBit } { tok_num negative 3 3 hexadecimal }
+ @negative 0[oO] @numspc @octal / { negLitPred } { tok_num negative 3 3 octal }
+ @negative 0[xX] @numspc @hexadecimal / { negLitPred } { tok_num negative 3 3 hexadecimal }
-- Normal rational literals (:: Fractional a => a, from Rational)
@floating_point { tok_frac 0 tok_float }
- @negative @floating_point / { ifExtension NegativeLiteralsBit } { tok_frac 0 tok_float }
+ @negative @floating_point / { negLitPred } { tok_frac 0 tok_float }
0[xX] @numspc @hex_floating_point / { ifExtension HexFloatLiteralsBit } { tok_frac 0 tok_hex_float }
@negative 0[xX] @numspc @hex_floating_point
/ { ifExtension HexFloatLiteralsBit `alexAndPred`
- ifExtension NegativeLiteralsBit } { tok_frac 0 tok_hex_float }
+ negLitPred } { tok_frac 0 tok_hex_float }
}
<0> {
@@ -771,7 +771,8 @@ data Token
| ITrarrow IsUnicodeSyntax
| ITlolly IsUnicodeSyntax
| ITdarrow IsUnicodeSyntax
- | ITminus
+ | ITminus -- See Note [Minus tokens]
+ | ITprefixminus -- See Note [Minus tokens]
| ITbang -- Prefix (!) only, e.g. f !x = rhs
| ITtilde -- Prefix (~) only, e.g. f ~x = rhs
| ITat -- Tight infix (@) only, e.g. f x at pat = rhs
@@ -871,6 +872,37 @@ instance Outputable Token where
ppr x = text (show x)
+{- Note [Minus tokens]
+~~~~~~~~~~~~~~~~~~~~~~
+A minus sign can be used in prefix form (-x) and infix form (a - b).
+
+When LexicalNegation is on:
+ * ITprefixminus represents the prefix form
+ * ITvarsym "-" represents the infix form
+ * ITminus is not used
+
+When LexicalNegation is off:
+ * ITminus represents all forms
+ * ITprefixminus is not used
+ * ITvarsym "-" is not used
+-}
+
+{- Note [Why not LexicalNegationBit]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+One might wonder why we define NoLexicalNegationBit instead of
+LexicalNegationBit. The problem lies in the following line in reservedSymsFM:
+
+ ,("-", ITminus, NormalSyntax, xbit NoLexicalNegationBit)
+
+We want to generate ITminus only when LexicalNegation is off. How would one
+do it if we had LexicalNegationBit? I (int-index) tried to use bitwise
+complement:
+
+ ,("-", ITminus, NormalSyntax, complement (xbit LexicalNegationBit))
+
+This did not work, so I opted for NoLexicalNegationBit instead.
+-}
+
-- the bitmap provided as the third component indicates whether the
-- corresponding extension keyword is valid under the extension options
@@ -975,7 +1007,7 @@ reservedSymsFM = listToUFM $
,("<-", ITlarrow NormalSyntax, NormalSyntax, 0 )
,("->", ITrarrow NormalSyntax, NormalSyntax, 0 )
,("=>", ITdarrow NormalSyntax, NormalSyntax, 0 )
- ,("-", ITminus, NormalSyntax, 0 )
+ ,("-", ITminus, NormalSyntax, xbit NoLexicalNegationBit)
,("*", ITstar NormalSyntax, NormalSyntax, xbit StarIsTypeBit)
@@ -1156,6 +1188,27 @@ afterOptionalSpace buf p
atEOL :: AlexAccPred ExtsBitmap
atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
+-- Check if we should parse a negative literal (e.g. -123) as a single token.
+negLitPred :: AlexAccPred ExtsBitmap
+negLitPred =
+ negative_literals `alexOrPred`
+ (lexical_negation `alexAndPred` prefix_minus)
+ where
+ negative_literals = ifExtension NegativeLiteralsBit
+
+ lexical_negation =
+ -- See Note [Why not LexicalNegationBit]
+ alexNotPred (ifExtension NoLexicalNegationBit)
+
+ prefix_minus =
+ -- The condition for a prefix occurrence of an operator is:
+ --
+ -- not precededByClosingToken && followedByOpeningToken
+ --
+ -- but we don't check followedByOpeningToken here as it holds
+ -- simply because we immediately lex a literal after the minus.
+ alexNotPred precededByClosingToken
+
ifExtension :: ExtBits -> AlexAccPred ExtsBitmap
ifExtension extBits bits _ _ _ = extBits `xtest` bits
@@ -1483,6 +1536,9 @@ varsym_prefix = sym $ \exts s ->
-> return ITdollar
| ThQuotesBit `xtest` exts, s == fsLit "$$"
-> return ITdollardollar
+ | s == fsLit "-" -- Only when LexicalNegation is on, otherwise we get ITminus and
+ -- don't hit this code path. See Note [Minus tokens]
+ -> return ITprefixminus
| s == fsLit "!" -> return ITbang
| s == fsLit "~" -> return ITtilde
| otherwise -> return (ITvarsym s)
@@ -2500,6 +2556,7 @@ data ExtBits
| GadtSyntaxBit
| ImportQualifiedPostBit
| LinearTypesBit
+ | NoLexicalNegationBit -- See Note [Why not LexicalNegationBit]
-- Flags that are updated once parsing starts
| InRulePragBit
@@ -2588,12 +2645,14 @@ mkParserFlags' warningFlags extensionFlags homeUnitId
.|. GadtSyntaxBit `xoptBit` LangExt.GADTSyntax
.|. ImportQualifiedPostBit `xoptBit` LangExt.ImportQualifiedPost
.|. LinearTypesBit `xoptBit` LangExt.LinearTypes
+ .|. NoLexicalNegationBit `xoptNotBit` LangExt.LexicalNegation -- See Note [Why not LexicalNegationBit]
optBits =
HaddockBit `setBitIf` isHaddock
.|. RawTokenStreamBit `setBitIf` rawTokStream
.|. UsePosPragsBit `setBitIf` usePosPrags
xoptBit bit ext = bit `setBitIf` EnumSet.member ext extensionFlags
+ xoptNotBit bit ext = bit `setBitIf` not (EnumSet.member ext extensionFlags)
setBitIf :: ExtBits -> Bool -> ExtsBitmap
b `setBitIf` cond | cond = xbit b
=====================================
docs/users_guide/8.12.1-notes.rst
=====================================
@@ -164,6 +164,16 @@ Language
See :ref:`qualified-do-notation` for more details.
+* :extension:`LexicalNegation` is a new extension that detects whether the
+ minus sign stands for negation during lexical analysis by checking for the
+ surrounding whitespace: ::
+
+ a = x - y -- subtraction
+ b = f -x -- negation
+
+ f = (- x) -- operator section
+ c = (-x) -- negation
+
Compiler
~~~~~~~~
=====================================
docs/users_guide/exts/lexical_negation.rst
=====================================
@@ -0,0 +1,57 @@
+.. _lexical-negation:
+
+Lexical negation
+----------------
+
+.. extension:: LexicalNegation
+ :shortdesc: Use whitespace to determine whether the minus sign stands for
+ negation or subtraction.
+
+ :since: 8.12.1
+
+ Detect if the minus sign stands for negation during lexical analysis by
+ checking for the surrounding whitespace.
+
+In Haskell 2010, the minus sign stands for negation when it has no left-hand
+side. Consider ``x = - 5`` and ``y = 2 - 5``. In ``x``, there's no expression
+between the ``=`` and ``-``, so the minus stands for negation, whereas in
+``y``, there's ``2`` to the left of the minus, therefore it stands for
+subtraction.
+
+This leads to certain syntactic anomalies:
+
+* ``(% x)`` is an operator section for any operator ``(%)`` except for ``(-)``.
+ ``(- x)`` is negated ``x`` rather than the right operator section of
+ subtraction. Consequently, it is impossible to write such a section, and
+ users are advised to write ``(subtract x)`` instead.
+
+* Negative numbers must be parenthesized when they appear in function argument
+ position. ``f (-5)`` is correct, whereas ``f -5`` is parsed as ``(-) f 5``.
+
+The latter issue is partly mitigated by :extension:`NegativeLiterals`. When it
+is enabled, ``-5`` is parsed as negative 5 regardless of context, so ``f
+-5`` works as expected. However, it only applies to literals, so ``f -x`` or
+``f -(a*2)`` are still parsed as subtraction.
+
+With :extension:`LexicalNegation`, both anomalies are resolved:
+
+* ``(% x)`` is an operator section for any operator ``(%)``, no exceptions, as
+ long as there's whitespace between ``%`` and ``x``.
+
+* In ``f -x``, the ``-x`` is parsed as the negation of ``x`` for any
+ syntactically atomic expression ``x`` (variable, literal, or parenthesized
+ expression).
+
+* The prefix ``-`` binds tighter than any infix operator. ``-a % b`` is parsed
+ as ``(-a) % b`` regardless of the fixity of ``%``.
+
+This means that ``(- x)`` is the right operator section of subtraction, whereas
+``(-x)`` is the negation of ``x``. Note that these expressions will often have
+different types (``(- x)`` might have type ``Int -> Int`` while ``(-x)`` will
+have type ``Int``), and so users mistaking one for the other will likely get a
+compile error.
+
+Under :extension:`LexicalNegation`, negated literals are desugared without
+``negate``. That is, ``-123`` stands for ``fromInteger (-123)`` rather than
+``negate (fromInteger 123)``. This makes :extension:`LexicalNegation` a valid
+replacement for :extension:`NegativeLiterals`.
=====================================
docs/users_guide/exts/negative_literals.rst
=====================================
@@ -27,5 +27,6 @@ as two tokens.
One pitfall is that with :extension:`NegativeLiterals`, ``x-1`` will
be parsed as ``x`` applied to the argument ``-1``, which is usually
not what you want. ``x - 1`` or even ``x- 1`` can be used instead
-for subtraction.
+for subtraction. To avoid this, consider using :extension:`LexicalNegation`
+instead.
=====================================
docs/users_guide/exts/syntax.rst
=====================================
@@ -25,3 +25,4 @@ Syntax
block_arguments
typed_holes
arrows
+ lexical_negation
=====================================
libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
=====================================
@@ -146,6 +146,7 @@ data Extension
| ImportQualifiedPost
| CUSKs
| StandaloneKindSignatures
+ | LexicalNegation
deriving (Eq, Enum, Show, Generic, Bounded)
-- 'Ord' and 'Bounded' are provided for GHC API users (see discussions
-- in https://gitlab.haskell.org/ghc/ghc/merge_requests/2707 and
=====================================
testsuite/tests/driver/T4437.hs
=====================================
@@ -42,6 +42,7 @@ expectedGhcOnlyExtensions =
, "AlternativeLayoutRuleTransitional"
, "LinearTypes"
, "QualifiedDo"
+ , "LexicalNegation"
]
expectedCabalOnlyExtensions :: [String]
=====================================
testsuite/tests/parser/should_compile/LexNegVsNegLit.hs
=====================================
@@ -0,0 +1,17 @@
+{-# LANGUAGE NegativeLiterals, LexicalNegation #-}
+
+module LexNegVsNegLit where
+
+-- NegativeLiterals specifies that we parse x-1 as x (-1), even though it's
+-- considered a shortcoming.
+--
+-- LexicalNegation does not change that.
+--
+b :: Bool
+b = even-1 -- parsed as: even (-1)
+ -- so it is well-typed.
+ --
+ -- with LexicalNegation alone, we'd get (-) even 1,
+ -- but NegativeLiterals takes precedence here.
+
+-- See also: GHC Proposal #344
=====================================
testsuite/tests/parser/should_compile/LexicalNegation.hs
=====================================
@@ -0,0 +1,15 @@
+{-# LANGUAGE LexicalNegation #-}
+
+module LexicalNegation where
+
+x :: Int
+x = 42
+
+negx :: Int
+negx = f -x where f = (- 5)
+
+subx :: Int -> Int
+subx = (- x)
+
+assertion1 :: Bool
+assertion1 = (- x) -x == -(2*x)
=====================================
testsuite/tests/parser/should_compile/all.T
=====================================
@@ -152,6 +152,8 @@ test('proposal-229a', normal, compile, [''])
test('proposal-229b', normal, compile, [''])
test('proposal-229d', normal, compile, [''])
test('proposal-229e', normal, compile, [''])
+test('LexicalNegation', normal, compile, [''])
+test('LexNegVsNegLit', normal, compile, [''])
# We omit 'profasm' because it fails with:
# Cannot load -prof objects when GHC is built with -dynamic
=====================================
testsuite/tests/parser/should_run/LexNegLit.hs
=====================================
@@ -0,0 +1,26 @@
+{-# LANGUAGE LexicalNegation #-}
+
+data FreeNum
+ = FromInteger Integer
+ | FromRational Rational
+ | Negate FreeNum
+ | FreeNum `Subtract` FreeNum
+ deriving (Show)
+
+instance Num FreeNum where
+ fromInteger = FromInteger
+ negate = Negate
+ (-) = Subtract
+
+instance Fractional FreeNum where
+ fromRational = FromRational
+
+main = do
+ print (-123 :: FreeNum)
+ print (-1.5 :: FreeNum)
+ print (let x = 5 in -x :: FreeNum)
+ print (5-1 :: FreeNum) -- unlike NegativeLiterals, we parse it as (5 - 1), not (5 (-1))
+ print (-0 :: FreeNum)
+ print (-0.0 :: FreeNum)
+ print (-0o10 :: FreeNum)
+ print (-0x10 :: FreeNum)
=====================================
testsuite/tests/parser/should_run/LexNegLit.stdout
=====================================
@@ -0,0 +1,8 @@
+FromInteger (-123)
+FromRational ((-3) % 2)
+Negate (FromInteger 5)
+FromInteger 5 `Subtract` FromInteger 1
+Negate (FromInteger 0)
+Negate (FromRational (0 % 1))
+FromInteger (-8)
+FromInteger (-16)
=====================================
testsuite/tests/parser/should_run/all.T
=====================================
@@ -18,3 +18,4 @@ test('CountParserDeps',
[ only_ways(['normal']), extra_run_opts('"' + config.libdir + '"') ],
compile_and_run,
['-package ghc'])
+test('LexNegLit', normal, compile_and_run, [''])
=====================================
utils/haddock
=====================================
@@ -1 +1 @@
-Subproject commit 54ed6ae2556dc787916e2d56ce0e99808af14e61
+Subproject commit 9bd65ee47a43529af2ad8e350fdd0c372bc5964c
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/fa01c77b51422885fbf6d114e23f2ab58bdb1dd5
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/fa01c77b51422885fbf6d114e23f2ab58bdb1dd5
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20200629/fc784706/attachment-0001.html>
More information about the ghc-commits
mailing list