[Git][ghc/ghc][wip/layouter] Implemented a layouter that does not work

Sebastian Graf (@sgraf812) gitlab at gitlab.haskell.org
Tue Oct 15 10:12:00 UTC 2024



Sebastian Graf pushed to branch wip/layouter at Glasgow Haskell Compiler / GHC


Commits:
5f102494 by Sebastian Graf at 2024-10-15T12:11:45+02:00
Implemented a layouter that does not work

Counter example:

```hs
baz :: Bool -> Bool -> [Int]
baz p q = [case () of _ | p, q -> 0, 42]
```

- - - - -


7 changed files:

- compiler/GHC/Parser.y
- compiler/GHC/Parser/Header.hs
- + compiler/GHC/Parser/Layouter.hs
- + compiler/GHC/Parser/Layouter.hs-boot
- compiler/GHC/Parser/Lexer.x
- compiler/ghc.cabal.in
- utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs


Changes:

=====================================
compiler/GHC/Parser.y
=====================================
@@ -83,6 +83,7 @@ import GHC.Core.DataCon ( DataCon, dataConName )
 
 import GHC.Parser.PostProcess
 import GHC.Parser.PostProcess.Haddock
+import GHC.Parser.Layouter
 import GHC.Parser.Lexer
 import GHC.Parser.HaddockLex
 import GHC.Parser.Annotation
@@ -762,7 +763,7 @@ TH_QUASIQUOTE   { L _ (ITquasiQuote _) }
 TH_QQUASIQUOTE  { L _ (ITqQuasiQuote _) }
 
 %monad { P } { >>= } { return }
-%lexer { (lexer True) } { L _ ITeof }
+%lexer { layouter (lexer True) } { L _ ITeof }
   -- Replace 'lexer' above with 'lexerDbg'
   -- to dump the tokens fed to the parser.
 %tokentype { (Located Token) }
@@ -797,7 +798,7 @@ identifier :: { LocatedN RdrName }
 -- Backpack stuff
 
 backpack :: { [LHsUnit PackageName] }
-         : implicit_top units close { fromOL $2 }
+         : implicit_top units vccurly { fromOL $2 }
          | '{' units '}'            { fromOL $2 }
 
 units :: { OrdList (LHsUnit PackageName) }
@@ -867,7 +868,7 @@ rn :: { LRenaming }
 
 unitbody :: { OrdList (LHsUnitDecl PackageName) }
         : '{'     unitdecls '}'   { $2 }
-        | vocurly unitdecls close { $2 }
+        | vocurly unitdecls vccurly { $2 }
 
 unitdecls :: { OrdList (LHsUnitDecl PackageName) }
         : unitdecls ';' unitdecl { $1 `appOL` unitOL $3 }
@@ -944,13 +945,13 @@ body    :: { ([TrailingAnn]
              ,([LImportDecl GhcPs], [LHsDecl GhcPs])
              ,EpLayout) }
         :  '{'            top '}'      { (fst $2, snd $2, epExplicitBraces $1 $3) }
-        |      vocurly    top close    { (fst $2, snd $2, EpVirtualBraces (getVOCURLY $1)) }
+        |      vocurly    top vccurly    { (fst $2, snd $2, EpVirtualBraces (getVOCURLY $1)) }
 
 body2   :: { ([TrailingAnn]
              ,([LImportDecl GhcPs], [LHsDecl GhcPs])
              ,EpLayout) }
         :  '{' top '}'                          { (fst $2, snd $2, epExplicitBraces $1 $3) }
-        |  missing_module_keyword top close     { ([], snd $2, EpVirtualBraces leftmostColumn) }
+        |  missing_module_keyword top vccurly     { ([], snd $2, EpVirtualBraces leftmostColumn) }
 
 
 top     :: { ([TrailingAnn]
@@ -1467,11 +1468,11 @@ where_type_family :: { Located ([AddEpAnn],FamilyInfo GhcPs) }
 ty_fam_inst_eqn_list :: { Located ([AddEpAnn],Maybe [LTyFamInstEqn GhcPs]) }
         :     '{' ty_fam_inst_eqns '}'     { sLL $1 $> ([moc $1,mcc $3]
                                                 ,Just (unLoc $2)) }
-        | vocurly ty_fam_inst_eqns close   { let (L loc _) = $2 in
+        | vocurly ty_fam_inst_eqns vccurly   { let (L loc _) = $2 in
                                              L loc ([],Just (unLoc $2)) }
         |     '{' '..' '}'                 { sLL $1 $> ([moc $1,mj AnnDotdot $2
                                                  ,mcc $3],Nothing) }
-        | vocurly '..' close               { let (L loc _) = $2 in
+        | vocurly '..' vccurly               { let (L loc _) = $2 in
                                              L loc ([mj AnnDotdot $2],Nothing) }
 
 ty_fam_inst_eqns :: { Located [LTyFamInstEqn GhcPs] }
@@ -1724,7 +1725,7 @@ cvars1 :: { [RecordPatSynField GhcPs] }
 where_decls :: { LocatedL (OrdList (LHsDecl GhcPs)) }
         : 'where' '{' decls '}'       {% amsr (sLL $1 $> (snd $ unLoc $3))
                                               (AnnList (Just $ glR $3) (Just $ moc $2) (Just $ mcc $4) (mj AnnWhere $1: (fst $ unLoc $3)) []) }
-        | 'where' vocurly decls close {% amsr (sLL $1 $3 (snd $ unLoc $3))
+        | 'where' vocurly decls vccurly {% amsr (sLL $1 $3 (snd $ unLoc $3))
                                               (AnnList (Just $ glR $3) Nothing Nothing (mj AnnWhere $1: (fst $ unLoc $3)) []) }
 
 pattern_synonym_sig :: { LSig GhcPs }
@@ -1780,7 +1781,7 @@ decllist_cls
                      , EpLayout) }      -- Reversed
         : '{'         decls_cls '}'     { sLL $1 $> (moc $1:mcc $3:(fst $ unLoc $2)
                                              ,snd $ unLoc $2, epExplicitBraces $1 $3) }
-        |     vocurly decls_cls close   { let { L l (anns, decls) = $2 }
+        |     vocurly decls_cls vccurly   { let { L l (anns, decls) = $2 }
                                            in L l (anns, decls, EpVirtualBraces (getVOCURLY $1)) }
 
 -- Class body
@@ -1824,7 +1825,7 @@ decllist_inst
         :: { Located ([AddEpAnn]
                      , OrdList (LHsDecl GhcPs)) }      -- Reversed
         : '{'         decls_inst '}'    { sLL $1 $> (moc $1:mcc $3:(fst $ unLoc $2),snd $ unLoc $2) }
-        |     vocurly decls_inst close  { L (gl $2) (unLoc $2) }
+        |     vocurly decls_inst vccurly  { L (gl $2) (unLoc $2) }
 
 -- Instance body
 --
@@ -1864,7 +1865,7 @@ decls   :: { Located ([AddEpAnn], OrdList (LHsDecl GhcPs)) }
 decllist :: { Located (AnnList,Located (OrdList (LHsDecl GhcPs))) }
         : '{'            decls '}'     { sLL $1 $> (AnnList (Just $ glR $2) (Just $ moc $1) (Just $ mcc $3)  (fst $ unLoc $2) []
                                                    ,sL1 $2 $ snd $ unLoc $2) }
-        |     vocurly    decls close   { L (gl $2) (AnnList (Just $ glR $2) Nothing Nothing (fst $ unLoc $2) []
+        |     vocurly    decls vccurly   { L (gl $2) (AnnList (Just $ glR $2) Nothing Nothing (fst $ unLoc $2) []
                                                    ,sL1 $2 $ snd $ unLoc $2) }
 
 -- Binding groups other than those of class and instance declarations
@@ -1879,7 +1880,7 @@ binds   ::  { Located (HsLocalBinds GhcPs) }
         | '{'            dbinds '}'     {% acs (comb3 $1 $2 $3) (\loc cs -> (L loc
                                              $ HsIPBinds (EpAnn (spanAsAnchor (comb3 $1 $2 $3)) (AnnList (Just$ glR $2) (Just $ moc $1) (Just $ mcc $3) [] []) cs) (IPBinds noExtField (reverse $ unLoc $2)))) }
 
-        |     vocurly    dbinds close   {% acs (gl $2) (\loc cs -> (L loc
+        |     vocurly    dbinds vccurly   {% acs (gl $2) (\loc cs -> (L loc
                                              $ HsIPBinds (EpAnn (glR $1) (AnnList (Just $ glR $2) Nothing Nothing [] []) cs) (IPBinds noExtField (reverse $ unLoc $2)))) }
 
 
@@ -2492,7 +2493,7 @@ gadt_constrlist :: { Located ([AddEpAnn]
                                                          ,moc $2
                                                          ,mcc $4]
                                                         , unLoc $3) }
-        | 'where' vocurly    gadt_constrs close  {% checkEmptyGADTs $
+        | 'where' vocurly    gadt_constrs vccurly  {% checkEmptyGADTs $
                                                       L (comb2 $1 $3)
                                                         ([mj AnnWhere $1]
                                                         , unLoc $3) }
@@ -3205,7 +3206,7 @@ acmd    :: { LHsCmdTop GhcPs }
 cvtopbody :: { ([AddEpAnn],[LHsDecl GhcPs]) }
         :  '{'            cvtopdecls0 '}'      { ([mj AnnOpenC $1
                                                   ,mj AnnCloseC $3],$2) }
-        |      vocurly    cvtopdecls0 close    { ([],$2) }
+        |      vocurly    cvtopdecls0 vccurly    { ([],$2) }
 
 cvtopdecls0 :: { [LHsDecl GhcPs] }
         : topdecls_semi         { cvTopDecls $1 }
@@ -3436,11 +3437,11 @@ altslist(PATS) :: { forall b. DisambECP b => PV (LocatedL [LMatch GhcPs (Located
         : '{'        alts(PATS) '}'    { $2 >>= \ $2 -> amsr
                                            (sLL $1 $> (reverse (snd $ unLoc $2)))
                                            (AnnList (Just $ glR $2) (Just $ moc $1) (Just $ mcc $3) (fst $ unLoc $2) []) }
-        | vocurly    alts(PATS)  close { $2 >>= \ $2 -> amsr
+        | vocurly    alts(PATS)  vccurly { $2 >>= \ $2 -> amsr
                                            (L (getLoc $2) (reverse (snd $ unLoc $2)))
                                            (AnnList (Just $ glR $2) Nothing Nothing (fst $ unLoc $2) []) }
         | '{'              '}'   { amsr (sLL $1 $> []) (AnnList Nothing (Just $ moc $1) (Just $ mcc $2) [] []) }
-        | vocurly          close { return $ noLocA [] }
+        | vocurly          vccurly { return $ noLocA [] }
 
 alts(PATS) :: { forall b. DisambECP b => PV (Located ([AddEpAnn],[LMatch GhcPs (LocatedA b)])) }
         : alts1(PATS)              { $1 >>= \ $1 -> return $
@@ -3492,14 +3493,11 @@ gdpats :: { forall b. DisambECP b => PV (Located [LGRHS GhcPs (LocatedA b)]) }
                          return $ sLL gdpats gdpat (gdpat : unLoc gdpats) }
         | gdpat        { $1 >>= \gdpat -> return $ sL1 gdpat [gdpat] }
 
--- layout for MultiWayIf doesn't begin with an open brace, because it's hard to
--- generate the open brace in addition to the vertical bar in the lexer, and
--- we don't need it.
 ifgdpats :: { Located ((EpToken "{", EpToken "}"), [LGRHS GhcPs (LHsExpr GhcPs)]) }
-         : '{' gdpats '}'                 {% runPV $2 >>= \ $2 ->
+         : '{'     gdpats '}'             {% runPV $2 >>= \ $2 ->
                                              return $ sLL $1 $> ((epTok $1,epTok $3),unLoc $2)  }
-         |     gdpats close               {% runPV $1 >>= \ $1 ->
-                                             return $ sL1 $1 ((NoEpTok, NoEpTok),unLoc $1) }
+         | vocurly gdpats vccurly         {% runPV $2 >>= \ $2 ->
+                                             return $ sL1 $1 ((NoEpTok, NoEpTok),unLoc $2) }
 
 gdpat   :: { forall b. DisambECP b => PV (LGRHS GhcPs (LocatedA b)) }
         : '|' guardquals '->' exp
@@ -3550,7 +3548,7 @@ apat    : aexp                  {% (checkPattern <=< runPV) (unECP $1) }
 stmtlist :: { forall b. DisambECP b => PV (LocatedL [LocatedA (Stmt GhcPs (LocatedA b))]) }
         : '{'           stmts '}'       { $2 >>= \ $2 ->
                                           amsr (sLL $1 $> (reverse $ snd $ unLoc $2)) (AnnList (stmtsAnchor $2) (Just $ moc $1) (Just $ mcc $3) (fromOL $ fst $ unLoc $2) []) }
-        |     vocurly   stmts close     { $2 >>= \ $2 -> amsr
+        |     vocurly   stmts vccurly   { $2 >>= \ $2 -> amsr
                                           (L (stmtsLoc $2) (reverse $ snd $ unLoc $2)) (AnnList (stmtsAnchor $2) Nothing Nothing (fromOL $ fst $ unLoc $2) []) }
 
 --      do { ;; s ; s ; ; s ;; }
@@ -4133,7 +4131,7 @@ unrelated tokens.
 -}
 close :: { () }
         : vccurly               { () } -- context popped in lexer.
-        | error                 {% popContext } -- See Note [Layout and error]
+--        | error                 {% popContext } -- See Note [Layout and error]
 
 -----------------------------------------------------------------------------
 -- Miscellaneous (mostly renamings)


=====================================
compiler/GHC/Parser/Header.hs
=====================================
@@ -28,6 +28,7 @@ import GHC.Driver.Errors.Types -- Unfortunate, needed due to the fact we throw e
 
 import GHC.Parser.Errors.Types
 import GHC.Parser           ( parseHeader )
+import GHC.Parser.Layouter
 import GHC.Parser.Lexer
 
 import GHC.Hs
@@ -203,7 +204,7 @@ lazyGetToks popts filename handle = do
 
   lazyLexBuf :: Handle -> PState -> Bool -> Int -> IO [Located Token]
   lazyLexBuf handle state eof size =
-    case unP (lexer False return) state of
+    case unP (layouter (lexer False) return) state of
       POk state' t -> do
         -- pprTrace "lazyLexBuf" (text (show (buffer state'))) (return ())
         if atEnd (buffer state') && not eof
@@ -238,7 +239,7 @@ getToks popts filename buf = lexAll pstate
   pstate = initPragState popts buf loc
   loc  = mkRealSrcLoc (mkFastString filename) 1 1
 
-  lexAll state = case unP (lexer False return) state of
+  lexAll state = case unP (layouter (lexer False) return) state of
                    POk _      t@(L _ ITeof) -> [t]
                    POk state' t -> t : lexAll state'
                    _ -> [L (mkSrcSpanPs (last_loc state)) ITeof]


=====================================
compiler/GHC/Parser/Layouter.hs
=====================================
@@ -0,0 +1,275 @@
+{-# LANGUAGE MultiWayIf #-}
+
+module GHC.Parser.Layouter where
+
+import GHC.Prelude
+import GHC.Hs
+import GHC.Parser.Lexer
+import GHC.Types.SrcLoc
+import GHC.Utils.Panic
+import Data.Maybe
+import Control.Monad.Trans.Class
+import Control.Monad.Trans.Cont
+import GHC.Utils.Error
+import GHC.Parser.Errors.Types
+import Data.Sequence
+import Debug.Trace
+
+data LayItem
+  = LayFlexi !PsSpan !LayHerald
+  | LayImplicit !LayHerald !Int
+    -- ^ We are inside an implicit layout block.
+    -- We know the layout herald (e.g., `do`), the indentation level and have a
+    -- list of pending closing delimiters (such as `)`, `]`, `||]`) that must
+    -- be closed before closing the implicit layout context.
+  | LayExplicit
+    -- ^ We are inside an explicit layout block.
+    -- We know which delimiters must be closed before closing the layout block.
+
+data LayHerald
+  = LetHerald
+  | DoHerald
+  | IfHerald -- ^ For -XMultiWayIf
+  | RecHerald
+  | WhereHerald
+  | OfHerald
+  | LCaseHerald
+  | LCasesHerald
+  deriving (Eq, Ord, Show)
+
+type LayContext = (LayItem, [Token])
+
+data LayState = LS
+  { lay_stack   :: ![LayContext]
+  , lay_output  :: !(Seq (PsLocated Token))
+  }
+
+initLayState :: LayState
+initLayState = LS { lay_stack = [], lay_output = Empty }
+
+overLayState :: (LayState -> (a, LayState)) -> P a
+overLayState f = P $ \s -> case f (lay_state s) of (a, ls) -> POk s{lay_state=ls} a
+{-# INLINE overLayState #-}
+layouter, layouterDbg :: ((PsLocated Token -> P a) -> P a)
+                      -> ((Located Token -> P a) -> P a)
+
+getLayStack :: P [LayContext]
+getLayStack = P $ \s -> POk s (lay_stack (lay_state s))
+
+-- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
+layouterDbg lexer cont = layouter lexer contDbg
+  where
+    contDbg tok = trace ("token: " ++ show (unLoc tok)) (cont tok)
+
+layouter lexer = runContT $ yieldOutput $ ContT lexer >>= lift . interp lay
+
+data LayLang = PsLocated Token ::: LayLang | Done [LayContext]
+infixr :::
+
+interp :: ((ExtBits -> Bool) -> PsLocated Token -> [LayContext] -> LayLang) -> PsLocated Token -> P ()
+interp lay tok = do
+  stk <- getLayStack
+  bits <- pExtsBitmap <$> getParserOpts
+  go (lay (`xtest` bits) tok stk)
+  where
+    go (Done stk) = overLayState $ \s -> ((), s{lay_stack = stk})
+    go (tok ::: l) = enqueueOutput tok >> go l
+
+lay :: (ExtBits -> Bool) -> PsLocated Token -> [LayContext] -> LayLang
+lay ext ltok@(L span tok) = lay_module
+  where
+    offset | ITeof <- tok = 0
+           | otherwise    = srcSpanStartCol (psRealSpan span)
+    indent_decreasing h old new
+      | isLayHeraldNonDecreasing h      = old > new
+      | ext NondecreasingIndentationBit = old > new
+      | otherwise                       = old >= new
+
+    lay_module []
+      | ITocurly <- tok = ltok ::: Done [(LayExplicit,[])]
+      | ITwhere <- tok  = ltok ::: Done [(LayFlexi span WhereHerald,[])]
+      | otherwise       = ltok ::: Done []
+    lay_module stk      = lay_bol stk
+
+    lay_bol [] | ITeof <- tok = ltok ::: Done []
+               | otherwise    = panic (show span ++ " " ++ show tok)
+    lay_bol stk@((item,clos):stk') = case item of
+      LayImplicit herald offs -> case compare offset offs of
+        LT | LetHerald <- herald, ITin <- tok
+           -> L span ITvccurly ::: ltok ::: Done stk' -- So that `in` does not close multiple blocks
+           | otherwise
+           -> L span ITvccurly ::: lay_bol stk' -- NB: pop item
+        EQ | isInfixForm tok -- an (e.g.) `where` on the same indent would require something to the left of where; leave the current block
+           -> L span ITvccurly ::: lay_bol stk'
+           | generateSemis herald
+           -> L span ITsemi    ::: lay_rest ((item,clos):stk')
+        _  -> lay_rest ((item,clos):stk')
+      LayFlexi span herald
+        | ITocurly <- tok
+        -> ltok ::: Done ((LayExplicit,[]):stk')
+        | IfHerald <- herald, not (isVBar tok)
+        -> lay_rest stk' -- Vanilla If -> no layout
+        | (LayImplicit _ prev_off, _) : _ <- stk'
+        , indent_decreasing herald prev_off offset
+        -> L span ITvocurly ::: L span ITvccurly ::: lay_bol stk'
+        | [] <- stk'
+        , ITeof <- tok -- Directory.Internal.Windows: `module M where\n <eof>`
+        -> L span ITvocurly ::: L span ITvccurly ::: lay_bol stk'
+        | otherwise
+        -> L span ITvocurly ::: lay_rest ((LayImplicit herald offset,[]):stk')
+      LayExplicit{} -> lay_rest stk
+    lay_rest stk = case tok of
+      ITccurly | (LayExplicit,_):stk' <- stk -> ltok ::: Done stk'
+               | (LayImplicit{},_):stk' <- stk -> L span ITvccurly ::: lay_rest stk'
+      ITocurly -> ltok ::: Done ((LayExplicit,[]):stk)
+      _ | (item at LayImplicit{},clos):stk' <- stk, isClosingDelim tok
+        -> case clos of
+             clo:clos | clo `eqClosingDelim` tok
+               -> ltok ::: Done ((item,clos):stk')
+             _ -> L span ITvccurly ::: lay_rest stk'
+        | (LayImplicit LetHerald _,_):stk' <- stk, ITin <- tok
+        -> L span ITvccurly ::: ltok ::: Done stk' -- for let .. in
+        | (LayImplicit herald _,_):stk' <- stk, tok `killsLayoutOf` herald
+        -> L span ITvccurly ::: lay_rest stk'
+        | Just clo <- isOpeningDelim_maybe tok
+        , (item,clos):stk' <- stk
+        -> ltok ::: Done ((item,clo:clos):stk')
+        | Just herald <- isLayHerald_maybe tok
+        -> ltok ::: Done ((LayFlexi span herald,[]):stk)
+        | otherwise
+        -> ltok ::: Done stk
+
+yieldOutput :: ContT r P () -> ContT r P (Located Token)
+yieldOutput next = lift dequeueOutput >>= \mb_ltok -> case mb_ltok of
+  Nothing -> next >> yieldOutput (panic "should not need to do next twice")
+  Just (L span tok) -> return (L (mkSrcSpanPs span) tok)
+
+enqueueOutput :: PsLocated Token -> P ()
+enqueueOutput tk = overLayState $ \s -> trace ("token: " ++ show (unLoc tk)) ((), s{lay_output = lay_output s :|> tk})
+
+dequeueOutput :: P (Maybe (PsLocated Token))
+dequeueOutput = overLayState $ \s -> case lay_output s of
+  Empty -> (Nothing, s)
+  tk :<| tks -> (Just tk, s {lay_output = tks})
+
+pushLayStack :: LayItem -> P ()
+pushLayStack l = overLayState $ \s -> ((), s{lay_stack = (l,[]):lay_stack s})
+
+popLayStack :: P (Maybe LayContext)
+popLayStack = overLayState $ \s -> case lay_stack s of
+  []    -> (Nothing, s)
+  l:stk -> (Just l, s{lay_stack = stk})
+
+pushClosingTok :: Token -> P ()
+pushClosingTok tok = overLayState $ \s -> case lay_stack s of
+  []           -> panic "impossible"
+  (l,toks):stk -> ((), s{lay_stack = (l,tok:toks):stk})
+
+popClosingTok :: P ()
+popClosingTok = overLayState $ \s -> case lay_stack s of
+  (l,_:toks):stk -> ((), s{lay_stack = (l,toks):stk})
+  (l,[]):stk -> ((), s{lay_stack = (l,[]):stk}) -- genuinely can happen on error, I think. ex: `do foo)`
+  []         -> panic "impossible"
+
+isInfixForm :: Token -> Bool
+isInfixForm ITwhere    = True
+isInfixForm ITvarsym{} = True
+isInfixForm _          = False
+
+isOpeningDelim_maybe :: Token -> Maybe Token
+isOpeningDelim_maybe IToparen = Just ITcparen
+isOpeningDelim_maybe ITobrack = Just ITcbrack
+-- isOpeningDelim_maybe ITocurly = Just ITccurly
+isOpeningDelim_maybe IToubxparen = Just ITcubxparen
+isOpeningDelim_maybe (IToparenbar uni) = Just (ITcparenbar uni)
+isOpeningDelim_maybe (ITopenExpQuote _ uni) = Just (ITcloseQuote uni)
+isOpeningDelim_maybe ITopenTypQuote = Just (ITcloseQuote NormalSyntax)
+isOpeningDelim_maybe ITopenPatQuote = Just (ITcloseQuote NormalSyntax)
+isOpeningDelim_maybe ITopenDecQuote = Just (ITcloseQuote NormalSyntax)
+isOpeningDelim_maybe ITopenTExpQuote{} = Just ITcloseTExpQuote
+isOpeningDelim_maybe _ = Nothing
+
+isClosingDelim :: Token -> Bool
+isClosingDelim ITcparen = True
+isClosingDelim ITcbrack = True
+-- isClosingDelim ITccurly = True
+isClosingDelim ITcubxparen = True
+isClosingDelim ITcparenbar{} = True
+isClosingDelim ITcloseQuote{} = True
+isClosingDelim ITcloseTExpQuote = True
+isClosingDelim _ = False
+
+eqClosingDelim :: Token -> Token -> Bool
+eqClosingDelim ITcparen ITcparen = True
+eqClosingDelim ITcbrack ITcbrack = True
+-- eqClosingDelim ITccurly ITccurly = True
+eqClosingDelim ITcubxparen ITcubxparen = True
+eqClosingDelim (ITcparenbar uni1) (ITcparenbar uni2) = uni1 == uni2
+eqClosingDelim (ITcloseQuote uni1) (ITcloseQuote uni2) = uni1 == uni2
+eqClosingDelim ITcloseTExpQuote ITcloseTExpQuote = True
+eqClosingDelim _ _ = False
+
+separatesDelim :: Token -> Token -> Bool
+separatesDelim ITcomma ITcparen    = True
+separatesDelim ITcomma ITcbrack    = True
+separatesDelim ITcomma ITcubxparen = True
+separatesDelim _       _           = False
+
+generateSemis :: LayHerald -> Bool
+generateSemis IfHerald = False
+generateSemis _        = True
+
+isVBar :: Token -> Bool
+isVBar ITvbar = True
+isVBar _      = False
+
+isPragma :: Token -> Bool
+isPragma ITinline_prag{} = True
+isPragma ITopaque_prag{} = True
+isPragma ITspec_prag{} = True
+isPragma ITspec_inline_prag{} = True
+isPragma ITsource_prag{} = True
+isPragma ITrules_prag{} = True
+isPragma ITwarning_prag{} = True
+isPragma ITdeprecated_prag{} = True
+isPragma ITline_prag{} = True
+isPragma ITcolumn_prag{} = True
+isPragma ITscc_prag{} = True
+isPragma ITunpack_prag{} = True
+isPragma ITnounpack_prag{} = True
+isPragma ITann_prag{} = True
+isPragma ITcomplete_prag{} = True
+isPragma IToptions_prag{} = True
+isPragma ITinclude_prag{} = True
+isPragma ITlanguage_prag = True
+isPragma ITminimal_prag{} = True
+isPragma IToverlappable_prag{} = True
+isPragma IToverlapping_prag{} = True
+isPragma IToverlaps_prag{} = True
+isPragma ITincoherent_prag{} = True
+isPragma ITctype{} = True
+isPragma ITcomment_line_prag = True
+isPragma _ = False
+
+isLayHerald_maybe :: Token -> Maybe LayHerald
+isLayHerald_maybe (ITdo _)  = Just DoHerald
+isLayHerald_maybe (ITmdo _) = Just DoHerald
+isLayHerald_maybe ITof      = Just OfHerald
+isLayHerald_maybe ITlcase   = Just LCaseHerald
+isLayHerald_maybe ITlcases  = Just LCasesHerald
+isLayHerald_maybe ITlet     = Just LetHerald
+isLayHerald_maybe ITwhere   = Just WhereHerald
+isLayHerald_maybe ITrec     = Just RecHerald
+isLayHerald_maybe ITif      = Just IfHerald
+isLayHerald_maybe _         = Nothing
+
+isLayHeraldNonDecreasing :: LayHerald -> Bool
+isLayHeraldNonDecreasing DoHerald = True
+isLayHeraldNonDecreasing _        = False
+
+killsLayoutOf :: Token -> LayHerald -> Bool
+killsLayoutOf ITin LetHerald = True
+killsLayoutOf ITwhere DoHerald = True
+killsLayoutOf ITin OfHerald = True
+-- killsLayoutOf ITwhere OfHerald = True -- not true! `case x of True -> bar where ...; False -> foo where ...` is OK
+killsLayoutOf _ _ = False


=====================================
compiler/GHC/Parser/Layouter.hs-boot
=====================================
@@ -0,0 +1,6 @@
+module GHC.Parser.Layouter where
+
+import GHC.Prelude
+
+data LayState
+initLayState :: LayState


=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -60,7 +60,7 @@ module GHC.Parser.Lexer (
    P(..), ParseResult(POk, PFailed),
    allocateComments, allocatePriorComments, allocateFinalComments,
    MonadP(..), getBit,
-   getRealSrcLoc, getPState,
+   getRealSrcLoc, getPState, getLastLoc,
    failMsgP, failLocMsgP, srcParseFail,
    getPsErrorMessages, getPsMessages,
    popContext, pushModuleContext, setLastToken, setSrcLoc,
@@ -93,6 +93,7 @@ import qualified Data.List.NonEmpty as NE
 import Data.Maybe
 import Data.Word
 import Debug.Trace (trace)
+import {-# SOURCE #-} GHC.Parser.Layouter
 
 import GHC.Data.EnumSet as EnumSet
 
@@ -1999,15 +2000,15 @@ do_bol span _str _len _buf2 = do
         if b then return (L span ITcomment_line_prag) else do
           (pos, gen_semic) <- getOffside
           case pos of
-              LT -> do
-                  --trace "layout: inserting '}'" $ do
-                  popContext
-                  -- do NOT pop the lex state, we might have a ';' to insert
-                  return (L span ITvccurly)
-              EQ | gen_semic -> do
-                  --trace "layout: inserting ';'" $ do
-                  _ <- popLexState
-                  return (L span ITsemi)
+--              LT -> do
+--                  --trace "layout: inserting '}'" $ do
+--                  popContext
+--                  -- do NOT pop the lex state, we might have a ';' to insert
+--                  return (L span ITvccurly)
+--              EQ | gen_semic -> do
+--                  --trace "layout: inserting ';'" $ do
+--                  _ <- popLexState
+--                  return (L span ITsemi)
               _ -> do
                   _ <- popLexState
                   lexToken
@@ -2024,15 +2025,15 @@ maybe_layout t = do -- If the alternative layout rule is enabled then
                     -- context.
                     alr <- getBit AlternativeLayoutRuleBit
                     unless alr $ f t
-    where f (ITdo _)    = pushLexState layout_do
-          f (ITmdo _)   = pushLexState layout_do
-          f ITof        = pushLexState layout
-          f ITlcase     = pushLexState layout
-          f ITlcases    = pushLexState layout
-          f ITlet       = pushLexState layout
-          f ITwhere     = pushLexState layout
-          f ITrec       = pushLexState layout
-          f ITif        = pushLexState layout_if
+    where f (ITdo _)    = return () -- pushLexState layout_do
+          f (ITmdo _)   = return () -- pushLexState layout_do
+          f ITof        = return () -- pushLexState layout
+          f ITlcase     = return () -- pushLexState layout
+          f ITlcases    = return () -- pushLexState layout
+          f ITlet       = return () -- pushLexState layout
+          f ITwhere     = return () -- pushLexState layout
+          f ITrec       = return () -- pushLexState layout
+          f ITif        = return () -- pushLexState layout_if
           f _           = return ()
 
 -- Pushing a new implicit layout context.  If the indentation of the
@@ -2469,6 +2470,8 @@ data PState = PState {
         -- token doesn't need to close anything:
         alr_justClosedExplicitLetBlock :: Bool,
 
+        lay_state :: LayState,
+
         -- The next three are used to implement Annotations giving the
         -- locations of 'noise' tokens in the source, so that users of
         -- the GHC API can do source to source conversions.
@@ -3009,6 +3012,7 @@ initParserState options buf loc =
       alr_context = [],
       alr_expecting_ocurly = Nothing,
       alr_justClosedExplicitLetBlock = False,
+      lay_state = initLayState,
       eof_pos = Strict.Nothing,
       header_comments = Strict.Nothing,
       comment_q = [],
@@ -3236,7 +3240,7 @@ lexError e = do
 -- This is the top-level function: called from the parser each time a
 -- new token is to be read from the input.
 
-lexer, lexerDbg :: Bool -> (Located Token -> P a) -> P a
+lexer, lexerDbg :: Bool -> (PsLocated Token -> P a) -> P a
 
 lexer queueComments cont = do
   alr <- getBit AlternativeLayoutRuleBit
@@ -3246,7 +3250,7 @@ lexer queueComments cont = do
 
   if (queueComments && isComment tok)
     then queueComment (L (psRealSpan span) tok) >> lexer queueComments cont
-    else cont (L (mkSrcSpanPs span) tok)
+    else cont (L span tok)
 
 -- Use this instead of 'lexer' in GHC.Parser to dump the tokens for debugging.
 lexerDbg queueComments cont = lexer queueComments contDbg
@@ -3526,7 +3530,7 @@ lexTokenStream opts buf loc = unP go initState{ options = opts' }
       ltok <- lexer False return
       case ltok of
         L _ ITeof -> return []
-        _ -> liftM (ltok:) go
+        L span tk -> liftM (L (mkSrcSpanPs span) tk:) go
 
 linePrags = Map.singleton "line" linePrag
 


=====================================
compiler/ghc.cabal.in
=====================================
@@ -642,6 +642,7 @@ Library
         GHC.Parser.Errors.Ppr
         GHC.Parser.Errors.Types
         GHC.Parser.Header
+        GHC.Parser.Layouter
         GHC.Parser.Lexer
         GHC.Parser.HaddockLex
         GHC.Parser.PostProcess


=====================================
utils/haddock/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
=====================================
@@ -13,6 +13,7 @@ import GHC.Data.Bag (bagToList)
 import GHC.Data.FastString (mkFastString)
 import GHC.Data.StringBuffer (StringBuffer, atEnd)
 import GHC.Parser.Errors.Ppr ()
+import GHC.Parser.Layouter as Layouter
 import GHC.Parser.Lexer as Lexer
   ( P (..)
   , PState (..)
@@ -76,7 +77,7 @@ parse parserOpts sDocContext fpath bs = case unP (go False []) initState of
 
     -- \| Like 'Lexer.lexer', but slower, with a better API, and filtering out empty tokens
     wrappedLexer :: P (RealLocated Lexer.Token)
-    wrappedLexer = Lexer.lexer False andThen
+    wrappedLexer = Layouter.layouter (Lexer.lexer False) andThen
       where
         andThen (L (RealSrcSpan s _) t)
           | srcSpanStartLine s /= srcSpanEndLine s
@@ -104,7 +105,7 @@ parse parserOpts sDocContext fpath bs = case unP (go False []) initState of
     parsePlainTok :: Bool -> MaybeT P ([T.Token], Bool) -- return list is only ever 0-2 elements
     parsePlainTok inPrag = do
       (bInit, lInit) <- lift getInput
-      L sp tok <- tryP (Lexer.lexer False return)
+      L sp tok <- tryP (Layouter.layouter (Lexer.lexer False) return)
       (bEnd, _) <- lift getInput
       case sp of
         UnhelpfulSpan _ -> pure ([], False) -- pretend the token never existed



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5f102494dbe42179ae9453d801d8c490b4c3596e

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/5f102494dbe42179ae9453d801d8c490b4c3596e
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20241015/02a1c36b/attachment-0001.html>


More information about the ghc-commits mailing list