[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 5 commits: remove name shadowing

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Thu Oct 13 02:38:01 UTC 2022



Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
81915089 by Curran McConnell at 2022-10-12T16:32:26-04:00
remove name shadowing

- - - - -
626652f7 by Tamar Christina at 2022-10-12T16:33:13-04:00
winio: do not re-translate input when handle is uncooked

- - - - -
5172789a by Charles Taylor at 2022-10-12T16:33:57-04:00
Unrestricted OverloadedLabels (#11671)

Implements GHC proposal:
https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0170-unrestricted-overloadedlabels.rst

- - - - -
77be3115 by Andreas Klebinger at 2022-10-12T22:37:33-04:00
Add a perf test for the generics code pattern from #21839.

This code showed a strong shift between compile time (got worse) and
run time (got a lot better) recently which is perfectly acceptable.

However it wasn't clear why the compile time regression was happening
initially so I'm adding this test to make it easier to track such changes
in the future.

- - - - -
4838827c by Simon Hengel at 2022-10-12T22:37:38-04:00
Update phases.rst

(the name of the original source file is $1, not $2)
- - - - -


14 changed files:

- compiler/GHC/Parser/Lexer.x
- docs/users_guide/9.6.1-notes.rst
- docs/users_guide/phases.rst
- ghc/GHCi/UI/Monad.hs
- libraries/base/GHC/IO/Windows/Handle.hsc
- + testsuite/tests/overloadedrecflds/should_run/T11671_run.hs
- + testsuite/tests/overloadedrecflds/should_run/T11671_run.stdout
- testsuite/tests/overloadedrecflds/should_run/all.T
- + testsuite/tests/perf/compiler/T21839c.hs
- + testsuite/tests/perf/compiler/T21839c.stdout
- testsuite/tests/perf/compiler/all.T
- + testsuite/tests/perf/should_run/T21839r.hs
- + testsuite/tests/perf/should_run/T21839r.stdout
- testsuite/tests/perf/should_run/all.T


Changes:

=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -163,6 +163,7 @@ $small     = [$ascsmall $unismall \_]
 
 $uniidchar = \x07 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $idchar    = [$small $large $digit $uniidchar \']
+$labelchar = [$small $large $digit $uniidchar \' \.]
 
 $unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $graphic   = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
@@ -451,7 +452,8 @@ $tab          { warnTab }
 }
 
 <0> {
-  "#" @varid / { ifExtension OverloadedLabelsBit } { skip_one_varid ITlabelvarid }
+  "#" $labelchar+ / { ifExtension OverloadedLabelsBit } { skip_one_varid ITlabelvarid }
+  "#" \" / { ifExtension OverloadedLabelsBit } { lex_quoted_label }
 }
 
 <0> {
@@ -2023,46 +2025,64 @@ lex_string_prag_comment mkTok span _buf _len _buf2
 
 lex_string_tok :: Action
 lex_string_tok span buf _len _buf2 = do
-  tok <- lex_string ""
+  lexed <- lex_string
   (AI end bufEnd) <- getInput
   let
-    tok' = case tok of
-            ITprimstring _ bs -> ITprimstring (SourceText src) bs
-            ITstring _ s -> ITstring (SourceText src) s
-            _ -> panic "lex_string_tok"
+    tok = case lexed of
+      LexedPrimString s -> ITprimstring (SourceText src) (unsafeMkByteString s)
+      LexedRegularString s -> ITstring (SourceText src) (mkFastString s)
     src = lexemeToString buf (cur bufEnd - cur buf)
-  return (L (mkPsSpan (psSpanStart span) end) tok')
+  return $ L (mkPsSpan (psSpanStart span) end) tok
 
-lex_string :: String -> P Token
-lex_string s = do
+
+lex_quoted_label :: Action
+lex_quoted_label span _buf _len _buf2 = do
+  s <- lex_string_helper ""
+  (AI end _) <- getInput
+  let
+    token = ITlabelvarid (mkFastString s)
+    start = psSpanStart span
+
+  return $ L (mkPsSpan start end) token
+
+
+data LexedString = LexedRegularString String | LexedPrimString String
+
+lex_string :: P LexedString
+lex_string = do
+  s <- lex_string_helper ""
+  magicHash <- getBit MagicHashBit
+  if magicHash
+    then do
+      i <- getInput
+      case alexGetChar' i of
+        Just ('#',i) -> do
+          setInput i
+          when (any (> '\xFF') s) $ do
+            pState <- getPState
+            let msg = PsErrPrimStringInvalidChar
+            let err = mkPlainErrorMsgEnvelope (mkSrcSpanPs (last_loc pState)) msg
+            addError err
+          return $ LexedPrimString s
+        _other ->
+          return $ LexedRegularString s
+    else
+      return $ LexedRegularString s
+
+
+lex_string_helper :: String -> P String
+lex_string_helper s = do
   i <- getInput
   case alexGetChar' i of
     Nothing -> lit_error i
 
     Just ('"',i)  -> do
-        setInput i
-        let s' = reverse s
-        magicHash <- getBit MagicHashBit
-        if magicHash
-          then do
-            i <- getInput
-            case alexGetChar' i of
-              Just ('#',i) -> do
-                setInput i
-                when (any (> '\xFF') s') $ do
-                  pState <- getPState
-                  let msg = PsErrPrimStringInvalidChar
-                  let err = mkPlainErrorMsgEnvelope (mkSrcSpanPs (last_loc pState)) msg
-                  addError err
-                return (ITprimstring (SourceText s') (unsafeMkByteString s'))
-              _other ->
-                return (ITstring (SourceText s') (mkFastString s'))
-          else
-                return (ITstring (SourceText s') (mkFastString s'))
+      setInput i
+      return (reverse s)
 
     Just ('\\',i)
         | Just ('&',i) <- next -> do
-                setInput i; lex_string s
+                setInput i; lex_string_helper s
         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
                            -- is_space only works for <= '\x7f' (#3751, #5425)
                 setInput i; lex_stringgap s
@@ -2070,16 +2090,17 @@ lex_string s = do
 
     Just (c, i1) -> do
         case c of
-          '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
-          c | isAny c -> do setInput i1; lex_string (c:s)
+          '\\' -> do setInput i1; c' <- lex_escape; lex_string_helper (c':s)
+          c | isAny c -> do setInput i1; lex_string_helper (c:s)
           _other -> lit_error i
 
-lex_stringgap :: String -> P Token
+
+lex_stringgap :: String -> P String
 lex_stringgap s = do
   i <- getInput
   c <- getCharOrFail i
   case c of
-    '\\' -> lex_string s
+    '\\' -> lex_string_helper s
     c | c <= '\x7f' && is_space c -> lex_stringgap s
                            -- is_space only works for <= '\x7f' (#3751, #5425)
     _other -> lit_error i


=====================================
docs/users_guide/9.6.1-notes.rst
=====================================
@@ -78,6 +78,15 @@ Language
   Then GHC will use the second quantified constraint to solve ``C a b Int``,
   as it has a strictly weaker precondition.
 
+- GHC proposal `#170 Unrestricted OverloadedLabels
+  <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0170-unrestricted-overloadedlabels.rst>`_
+  has been implemented.
+  This extends the variety syntax for constructing labels under :extension:`OverloadedLabels`.
+  Examples of newly allowed syntax:
+  - Leading capital letters: `#Foo` equivalant to `getLabel @"Foo"`
+  - Numeric characters: `#3.14` equivalent to `getLabel @"3.14"`
+  - Arbitrary strings: `#"Hello, World!"` equivalent to `getLabel @"Hello, World!"`
+
 Compiler
 ~~~~~~~~
 


=====================================
docs/users_guide/phases.rst
=====================================
@@ -576,13 +576,13 @@ Options affecting a Haskell pre-processor
     .. code-block:: sh
 
         #!/bin/sh
-        ( echo "{-# LINE 1 \"$2\" #-}" ; iconv -f l1 -t utf-8 $2 ) > $3
+        ( echo "{-# LINE 1 \"$1\" #-}" ; iconv -f l1 -t utf-8 $2 ) > $3
 
     and pass ``-F -pgmF convert.sh`` to GHC. The ``-f l1`` option tells
     iconv to convert your Latin-1 file, supplied in argument ``$2``,
     while the "-t utf-8" options tell iconv to return a UTF-8 encoded
     file. The result is redirected into argument ``$3``. The
-    ``echo "{-# LINE 1 \"$2\" #-}"`` just makes sure that your error
+    ``echo "{-# LINE 1 \"$1\" #-}"`` just makes sure that your error
     positions are reported as in the original source file.
 
 .. _options-codegen:


=====================================
ghc/GHCi/UI/Monad.hs
=====================================
@@ -1,6 +1,5 @@
 {-# LANGUAGE FlexibleInstances, DeriveFunctor, DerivingVia #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# OPTIONS -fno-warn-name-shadowing #-}
 
 -----------------------------------------------------------------------------
 --
@@ -474,10 +473,10 @@ printStats dflags ActionStats{actionAllocs = mallocs, actionElapsedTime = secs}
                            Just allocs ->
                              text (separateThousands allocs) <+> text "bytes")))
   where
-    separateThousands n = reverse . sep . reverse . show $ n
-      where sep n'
+    separateThousands n = reverse . separate . reverse . show $ n
+      where separate n'
               | n' `lengthAtMost` 3 = n'
-              | otherwise           = take 3 n' ++ "," ++ sep (drop 3 n')
+              | otherwise           = take 3 n' ++ "," ++ separate (drop 3 n')
 
 -----------------------------------------------------------------------------
 -- reverting CAFs
@@ -526,13 +525,13 @@ turnOffBuffering_ fhv = do
   liftIO $ evalIO interp fhv
 
 mkEvalWrapper :: GhcMonad m => String -> [String] ->  m ForeignHValue
-mkEvalWrapper progname args =
+mkEvalWrapper progname' args' =
   runInternal $ GHC.compileParsedExprRemote
-  $ evalWrapper `GHC.mkHsApp` nlHsString progname
-                `GHC.mkHsApp` nlList (map nlHsString args)
+  $ evalWrapper' `GHC.mkHsApp` nlHsString progname'
+                 `GHC.mkHsApp` nlList (map nlHsString args')
   where
     nlHsString = nlHsLit . mkHsString
-    evalWrapper =
+    evalWrapper' =
       GHC.nlHsVar $ RdrName.mkOrig gHC_GHCI_HELPERS (mkVarOcc "evalWrapper")
 
 -- | Run a 'GhcMonad' action to compile an expression for internal usage.


=====================================
libraries/base/GHC/IO/Windows/Handle.hsc
=====================================
@@ -576,24 +576,23 @@ consoleWriteNonBlocking hwnd ptr _offset bytes
 
 consoleRead :: Bool -> Io ConsoleHandle -> Ptr Word8 -> Word64 -> Int -> IO Int
 consoleRead blocking hwnd ptr _offset bytes
-  = withUTF16ToGhcInternal ptr bytes $ \reqBytes w_ptr ->
-      alloca $ \res -> do
-       cooked <- isCooked hwnd
-       -- Cooked input must be handled differently when the STD handles are
-       -- attached to a real console handle.  For File based handles we can't do
-       -- proper cooked inputs, but since the actions are async you would get
-       -- results as soon as available.
-       --
-       -- For console handles We have to use a lower level API then ReadConsole,
-       -- namely we must use ReadConsoleInput which requires us to process
-       -- all console message manually.
-       --
-       -- Do note that MSYS2 shells such as bash don't attach to a real handle,
-       -- and instead have by default a pipe/file based std handles.  Which
-       -- means the cooked behaviour is best when used in a native Windows
-       -- terminal such as cmd, powershell or ConEmu.
-       case cooked || not blocking of
-        False -> do
+  = alloca $ \res -> do
+      cooked <- isCooked hwnd
+      -- Cooked input must be handled differently when the STD handles are
+      -- attached to a real console handle.  For File based handles we can't do
+      -- proper cooked inputs, but since the actions are async you would get
+      -- results as soon as available.
+      --
+      -- For console handles We have to use a lower level API then ReadConsole,
+      -- namely we must use ReadConsoleInput which requires us to process
+      -- all console message manually.
+      --
+      -- Do note that MSYS2 shells such as bash don't attach to a real handle,
+      -- and instead have by default a pipe/file based std handles.  Which
+      -- means the cooked behaviour is best when used in a native Windows
+      -- terminal such as cmd, powershell or ConEmu.
+      case cooked || not blocking of
+        False -> withUTF16ToGhcInternal ptr bytes $ \reqBytes w_ptr ->  do
           debugIO "consoleRead :: un-cooked I/O read."
           -- eotControl allows us to handle control characters like EOL
           -- without needing a newline, which would sort of defeat the point
@@ -628,9 +627,9 @@ consoleRead blocking hwnd ptr _offset bytes
           -- characters as they are.  Technically this function can handle any
           -- console event.  Including mouse, window and virtual key events
           -- but for now I'm only interested in key presses.
-          let entries = fromIntegral $ reqBytes `div` (#size INPUT_RECORD)
+          let entries = fromIntegral $ bytes `div` (#size INPUT_RECORD)
           allocaBytes entries $ \p_inputs ->
-            maybeReadEvent p_inputs entries res w_ptr
+            maybeReadEvent p_inputs entries res ptr
 
           -- Check to see if we have been explicitly asked to do a non-blocking
           -- I/O, and if we were, make sure that if we didn't have any console
@@ -657,6 +656,7 @@ consoleRead blocking hwnd ptr _offset bytes
 
             b_read <- fromIntegral <$> peek res
             read <- cobble b_read w_ptr p_inputs
+            debugIO $ "readEvent: =" ++ show read
             if read > 0
                then return $ fromIntegral read
                else maybeReadEvent p_inputs entries res w_ptr
@@ -665,7 +665,7 @@ consoleRead blocking hwnd ptr _offset bytes
           -- minimum required to know which key/sequences were pressed.  To do
           -- this and prevent having to fully port the PINPUT_RECORD structure
           -- in Haskell we use some GCC builtins to find the correct offsets.
-          cobble :: Int -> Ptr Word16 -> PINPUT_RECORD -> IO Int
+          cobble :: Int -> Ptr Word8 -> PINPUT_RECORD -> IO Int
           cobble 0 _ _ = do debugIO "cobble: done."
                             return 0
           cobble n w_ptr p_inputs =
@@ -690,8 +690,18 @@ consoleRead blocking hwnd ptr _offset bytes
                           debugIO $ "cobble: offset - " ++ show char_offset
                           debugIO $ "cobble: show > " ++ show char
                           debugIO $ "cobble: repeat: " ++ show repeated
+                          -- The documentation here is rather subtle, but
+                          -- according to MSDN the uWChar being provided here
+                          -- has been "translated".  What this actually means
+                          -- is that the surrogate pairs have already been
+                          -- translated into byte sequences.  That is, despite
+                          -- the Word16 storage type, it's actually a byte
+                          -- stream.  This means we shouldn't try to decode
+                          -- to UTF-8 again since we'd end up incorrectly
+                          -- interpreting two bytes as an extended unicode
+                          -- character.
                           pokeArray w_ptr $ replicate repeated char
-                          (+1) <$> cobble n' w_ptr' p_inputs'
+                          (+repeated) <$> cobble n' w_ptr' p_inputs'
                   else do debugIO $ "cobble: skip event."
                           cobble n' w_ptr p_inputs'
 


=====================================
testsuite/tests/overloadedrecflds/should_run/T11671_run.hs
=====================================
@@ -0,0 +1,47 @@
+{-# LANGUAGE DataKinds             #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedLabels      #-}
+{-# LANGUAGE MagicHash             #-}
+
+import Data.Foldable (traverse_)
+import Data.Proxy (Proxy(..))
+import GHC.OverloadedLabels (IsLabel(..))
+import GHC.TypeLits (KnownSymbol, symbolVal)
+import GHC.Prim (Addr#)
+
+instance KnownSymbol symbol => IsLabel symbol String where
+  fromLabel = symbolVal (Proxy :: Proxy symbol)
+
+(#) :: String -> Int -> String
+(#) _ i = show i
+
+f :: Addr# -> Int -> String
+f _ i = show i
+
+main :: IO ()
+main = traverse_ putStrLn
+  [ #a
+  , #number17
+  , #do
+  , #type
+  , #Foo
+  , #3
+  , #199.4
+  , #17a23b
+  , #f'a'
+  , #'a'
+  , #'
+  , #''notTHSplice
+  , #...
+  , #привет
+  , #こんにちは
+  , #"3"
+  , #":"
+  , #"Foo"
+  , #"The quick brown fox"
+  , #"\""
+  , (++) #hello#world
+  , (++) #"hello"#"world"
+  , #"hello"# 1 -- equivalent to `(fromLabel @"hello") # 1`
+  , f "hello"#2 -- equivalent to `f ("hello"# :: Addr#) 2`
+  ]


=====================================
testsuite/tests/overloadedrecflds/should_run/T11671_run.stdout
=====================================
@@ -0,0 +1,24 @@
+a
+number17
+do
+type
+Foo
+3
+199.4
+17a23b
+f'a'
+'a'
+'
+''notTHSplice
+...
+привет
+こんにちは
+3
+:
+Foo
+The quick brown fox
+"
+helloworld
+helloworld
+1
+2


=====================================
testsuite/tests/overloadedrecflds/should_run/all.T
=====================================
@@ -17,3 +17,4 @@ test('hasfieldrun01', normal, compile_and_run, [''])
 test('hasfieldrun02', normal, compile_and_run, [''])
 test('T12243', normal, compile_and_run, [''])
 test('T11228', normal, compile_and_run, [''])
+test('T11671_run', normal, compile_and_run, [''])


=====================================
testsuite/tests/perf/compiler/T21839c.hs
=====================================
@@ -0,0 +1,46 @@
+-- For in depth details see the ticket #21839. The short version:
+
+-- We noticed that GHC got slower compiling Cabal the libary.
+-- Eventually I narrowed it down to the pattern below of deriving Generics
+-- for a Enum, and then deriving a Binary instance for that Enum via Generics.
+-- A pattern very frequently used in Cabal.
+-- However this turned out to be a classic compile vs runtime tradeoff.
+-- In benchmarks I found the resulting code for the Binary instance was running
+-- more than twice as fast!
+-- So we decided to merely document this change and add a test representing this behaviour
+-- rather than trying to coax ghc back into its old behaviour.
+
+{-# LANGUAGE DeriveGeneric #-}
+
+{-# OPTIONS_GHC #-}
+module Main
+  ( main
+  ) where
+
+import GHC.Generics
+import Data.Typeable
+import Data.Binary
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
+
+data PathTemplateVariable =
+
+       Var0
+     | Var1
+     | Var2
+     | Var3
+     | Var4
+     | Var5
+     | Var6
+     | Var7
+     | Var8
+     | Var9
+  deriving (Generic,Enum)
+
+instance Binary PathTemplateVariable
+
+main :: IO ()
+main = do
+  let lists = replicate 10000 Var0
+      lbs = encode lists
+  print $ BS.length $ BS.toStrict lbs


=====================================
testsuite/tests/perf/compiler/T21839c.stdout
=====================================
@@ -0,0 +1 @@
+10008


=====================================
testsuite/tests/perf/compiler/all.T
=====================================
@@ -643,3 +643,10 @@ test ('T20261',
       [collect_compiler_stats('all')],
       compile,
       [''])
+
+# Track perf of generics based binary instances
+test('T21839c',
+    [   collect_compiler_stats('all', 1),
+        only_ways(['normal'])],
+    compile,
+    ['-O'])
\ No newline at end of file


=====================================
testsuite/tests/perf/should_run/T21839r.hs
=====================================
@@ -0,0 +1,46 @@
+-- For in depth details see the ticket #21839. The short version:
+
+-- We noticed that GHC got slower compiling Cabal the libary.
+-- Eventually I narrowed it down to the pattern below of deriving Generics
+-- for a Enum, and then deriving a Binary instance for that Enum via Generics.
+-- A pattern very frequently used in Cabal.
+-- However this turned out to be a classic compile vs runtime tradeoff.
+-- In benchmarks I found the resulting code for the Binary instance was running
+-- more than twice as fast!
+-- So we decided to merely document this change and add a test representing this behaviour
+-- rather than trying to coax ghc back into its old behaviour.
+
+{-# LANGUAGE DeriveGeneric #-}
+
+{-# OPTIONS_GHC #-}
+module Main
+  ( main
+  ) where
+
+import GHC.Generics
+import Data.Typeable
+import Data.Binary
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString as BS
+
+data PathTemplateVariable =
+
+       Var0
+     | Var1
+     | Var2
+     | Var3
+     | Var4
+     | Var5
+     | Var6
+     | Var7
+     | Var8
+     | Var9
+  deriving (Generic,Enum)
+
+instance Binary PathTemplateVariable
+
+main :: IO ()
+main = do
+  let lists = replicate 10000 Var0
+      lbs = encode lists
+  print $ BS.length $ BS.toStrict lbs


=====================================
testsuite/tests/perf/should_run/T21839r.stdout
=====================================
@@ -0,0 +1 @@
+10008


=====================================
testsuite/tests/perf/should_run/all.T
=====================================
@@ -395,3 +395,11 @@ test('T19347',
     compile_and_run,
     ['-O'])
 
+# Track perf of generics based binary instances
+test('T21839r',
+    [   collect_stats('bytes allocated', 10),
+        collect_runtime_residency(10),
+        collect_compiler_stats('bytes allocated', 1),
+        only_ways(['normal'])],
+    compile_and_run,
+    ['-O'])



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/5327ef918c1f6bc9cadbb7bf900efa80bd1efdbe...4838827cbb4c42a3d0a9879d38f1fbaf4960d578

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/5327ef918c1f6bc9cadbb7bf900efa80bd1efdbe...4838827cbb4c42a3d0a9879d38f1fbaf4960d578
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20221012/835c65f3/attachment-0001.html>


More information about the ghc-commits mailing list