[Git][ghc/ghc][master] Unicode: make ucd2haskell build-able again

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Sat Jun 1 13:37:06 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
adb1fe42 by Serge S. Gulin at 2024-06-01T09:35:53-04:00
Unicode: make ucd2haskell build-able again

ucd2haskell tool used streamly library which version in cabal was out of date. It is updated to the latest version at hackage with deprecated parts rewritten.

Also following fixes were applied to existing code in suppose that from its last run the code structure was changed and now it was required to be up to date with actual folder structures:
1. Ghc module path environment got a suffix with `src`.
2. Generated code got
2.1 `GHC.Internal` prefix for `Data.*`.
2.2 `GHC.Unicode.Internal` swapped on `GHC.Internal.Unicode` according to actual structure.

- - - - -


4 changed files:

- libraries/ghc-internal/tools/ucd2haskell/README.md
- libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
- libraries/ghc-internal/tools/ucd2haskell/ucd.sh
- libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal


Changes:

=====================================
libraries/ghc-internal/tools/ucd2haskell/README.md
=====================================
@@ -1,6 +1,6 @@
 # Generating GHC’s Unicode modules
 
-`GHC.Unicode.Internal.*` modules are generated with the internal tool `ucd2haskell`.
+`GHC.Internal.Unicode.*` modules are generated with the internal tool `ucd2haskell`.
 
 ```bash
 cd ucd2haskell
@@ -13,7 +13,7 @@ cd ucd2haskell
 2. _Comment_ the line in `ucd.sh` with `VERIFY_CHECKSUM=y`.
 3. Run `./ucd.sh download`.
 4. Update the checksums in `ucd.sh` and _uncomment_ `VERIFY_CHECKSUM=y`.
-5. Run `./ucd.sh generate`. This will generate the `GHC.Unicode.Internal.*` 
+5. Run `./ucd.sh generate`. This will generate the `GHC.Internal.Unicode.*`
    modules.
 6. Check and update the output of the tests `base/tests/unicodeXXX.hs`.
 7. Compare with Python (see hereinafter) and fix any error.
@@ -32,7 +32,7 @@ __Warning:__ A Python version with the _exact same Unicode version_ is required.
 Check the properties of all the characters.
 
 ```bash
-ghc -O2 tests/export_all_chars.hs 
+ghc -O2 tests/export_all_chars.hs
 ./tests/export_all_chars > tests/all_chars.csv
 python3 tests/check_all_chars.py tests/all_chars.csv
 ```


=====================================
libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
=====================================
@@ -21,7 +21,6 @@ module Parser.Text (genModules) where
 
 import Control.Exception (catch, IOException)
 import Control.Monad (void)
-import Control.Monad.IO.Class (MonadIO(liftIO))
 import Data.Bits (Bits(..))
 import Data.Word (Word8)
 import Data.Char (chr, ord, isSpace)
@@ -31,19 +30,20 @@ import Data.List (intersperse, unfoldr)
 import Data.List.Split (splitWhen)
 import Numeric (showHex)
 import Streamly.Data.Fold (Fold)
-import Streamly.Prelude (IsStream, SerialT)
 import System.Directory (createDirectoryIfMissing)
 import System.Environment (getEnv)
 import System.FilePath ((</>), (<.>))
 
 -- import qualified Data.Set as Set
-import qualified Streamly.Prelude as Stream
+import Streamly.Data.Stream (Stream)
+import qualified Streamly.Data.Stream.Prelude as Stream
 import qualified Streamly.Data.Fold as Fold
 import qualified Streamly.Internal.Data.Fold as Fold
 import qualified Streamly.Data.Unfold as Unfold
 import qualified Streamly.FileSystem.Handle as Handle
-import qualified System.IO as Sys
 import qualified Streamly.Unicode.Stream as Unicode
+import qualified Streamly.Internal.Unicode.Stream as Unicode
+import qualified System.IO as Sys
 
 import Prelude hiding (pred)
 
@@ -271,7 +271,7 @@ genUnicodeVersion outdir = do
               (\(_ :: IOException) -> return "<unknown>")
   Stream.fold f (Stream.fromList (body version))
   where
-    moduleName = "GHC.Unicode.Internal.Version"
+    moduleName = "GHC.Internal.Unicode.Version"
     f = moduleFileEmitter Nothing outdir
           (moduleName, \_ -> Fold.foldMap (<> "\n"))
     body :: String -> [String]
@@ -284,12 +284,12 @@ genUnicodeVersion outdir = do
       , "(unicodeVersion)"
       , "where"
       , ""
-      , "import {-# SOURCE #-} Data.Version"
+      , "import {-# SOURCE #-} GHC.Internal.Data.Version"
       , ""
       , "-- | Version of Unicode standard used by @base@:"
       , "-- [" <> version <> "](https://www.unicode.org/versions/Unicode" <> version <> "/)."
       , "--"
-      , "-- @since 4.15.0.0"
+      , "-- @since base-4.15.0.0"
       , "unicodeVersion :: Version"
       , "unicodeVersion = makeVersion [" <> mkVersion version <> "]" ]
     mkVersion = foldr (\c acc -> case c of {'.' -> ',':' ':acc; _ -> c:acc}) mempty
@@ -331,8 +331,8 @@ genGeneralCategoryModule moduleName =
         , "(generalCategory)"
         , "where"
         , ""
-        , "import GHC.Base (Char, Int, Ord(..), ord)"
-        , "import GHC.Unicode.Internal.Bits (lookupIntN)"
+        , "import GHC.Internal.Base (Char, Int, Ord(..), ord)"
+        , "import GHC.Internal.Unicode.Bits (lookupIntN)"
         , ""
         , genEnumBitmap "generalCategory" Cn (reverse acc)
         ]
@@ -415,7 +415,7 @@ genDecomposableModule moduleName dtype =
             , "where"
             , ""
             , "import Data.Char (ord)"
-            , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+            , "import GHC.Internal.Unicode.Bits (lookupBit64)"
             , ""
             , genBitmap "isDecomposable" (reverse st)
             ]
@@ -443,7 +443,7 @@ genCombiningClassModule moduleName =
             , "where"
             , ""
             , "import Data.Char (ord)"
-            , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+            , "import GHC.Internal.Unicode.Bits (lookupBit64)"
             , ""
             , "combiningClass :: Char -> Int"
             , unlines (reverse st1)
@@ -566,8 +566,8 @@ genCompositionsModule moduleName compExclu non0CC =
         , "(compose, composeStarters, isSecondStarter)"
         , "where"
         , ""
-        , "import GHC.Base (Char, ord)"
-        , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+        , "import GHC.Internal.Base (Char, ord)"
+        , "import GHC.Internal.Unicode.Bits (lookupBit64)"
         , ""
         ]
 
@@ -616,7 +616,7 @@ genSimpleCaseMappingModule moduleName funcName field =
         , "(" <> funcName <> ")"
         , "where"
         , ""
-        , "import GHC.Base (Char)"
+        , "import GHC.Internal.Base (Char)"
         , ""
         ]
     genSign =
@@ -670,8 +670,8 @@ genCorePropertiesModule moduleName isProp =
         , "(" <> unwords (intersperse "," (map prop2FuncName exports)) <> ")"
         , "where"
         , ""
-        , "import GHC.Base (Bool, Char, Ord(..), (&&), ord)"
-        , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+        , "import GHC.Internal.Base (Bool, Char, Ord(..), (&&), ord)"
+        , "import GHC.Internal.Unicode.Bits (lookupBit64)"
         , ""
         ]
 
@@ -818,7 +818,7 @@ parsePropertyLine ln
 isDivider :: String -> Bool
 isDivider x = x == "# ================================================"
 
-parsePropertyLines :: (IsStream t, Monad m) => t m String -> t m PropertyLine
+parsePropertyLines :: (Monad m) => Stream m String -> Stream m PropertyLine
 parsePropertyLines =
     Stream.splitOn isDivider
         $ Fold.lmap parsePropertyLine
@@ -843,11 +843,11 @@ Parse ranges according to https://www.unicode.org/reports/tr44/#Code_Point_Range
 __Note:__ this does /not/ fill missing char entries,
 i.e. entries with no explicit entry nor within a range.
 -}
-parseUnicodeDataLines :: forall t m. (IsStream t, Monad m) => t m String -> t m DetailedChar
+parseUnicodeDataLines :: forall m. (Monad m) => Stream m String -> Stream m DetailedChar
 parseUnicodeDataLines
     = Stream.unfoldMany (Unfold.unfoldr unitToRange)
     . Stream.foldMany ( Fold.lmap parseDetailedChar
-                      $ Fold.mkFold_ step initial )
+                      $ Fold.foldt' step initial id)
 
     where
 
@@ -913,19 +913,14 @@ parseDetailedChar line = case splitWhen (== ';') line of
 -- Generation
 -------------------------------------------------------------------------------
 
-readLinesFromFile :: String -> SerialT IO String
+readLinesFromFile :: String -> Stream IO String
 readLinesFromFile file =
     withFile file Sys.ReadMode
-        $ \h ->
-              Stream.unfold Handle.read h & Unicode.decodeUtf8
-                  & unicodeLines Fold.toList
+        $ \h -> Handle.read h & Unicode.decodeUtf8 & Unicode.lines Fold.toList
 
     where
-
-    unicodeLines = Stream.splitOnSuffix (== '\n')
-
     withFile file_ mode =
-        Stream.bracket (liftIO $ Sys.openFile file_ mode) (liftIO . Sys.hClose)
+        Stream.bracketIO (Sys.openFile file_ mode) (Sys.hClose)
 
 
 moduleToFileName :: String -> String
@@ -995,7 +990,7 @@ testOutputFileEmitter outdir (name, fldGen) = Fold.rmapM action fldGen
 runGenerator ::
        FilePath
     -> FilePath
-    -> (SerialT IO String -> SerialT IO a)
+    -> (Stream IO String -> Stream IO a)
     -> FilePath
     -> GeneratorRecipe a
     -> IO ()
@@ -1067,64 +1062,64 @@ genModules indir outdir props = do
 
     -- [NOTE] Disabled generator
     -- propList =
-    --     ("GHC.Unicode.Internal.Char.PropList"
+    --     ("GHC.Internal.Unicode.Char.PropList"
     --     , (`genCorePropertiesModule` (`elem` props)))
 
     derivedCoreProperties =
-        ("GHC.Unicode.Internal.Char.DerivedCoreProperties"
+        ("GHC.Internal.Unicode.Char.DerivedCoreProperties"
         , (`genCorePropertiesModule` (`elem` props)))
 
     -- [NOTE] Disabled generator
     -- compositions exc non0 =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.Compositions"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.Compositions"
     --     , \m -> genCompositionsModule m exc non0)
 
     -- [NOTE] Disabled generator
     -- combiningClass =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.CombiningClass"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.CombiningClass"
     --     , genCombiningClassModule)
 
     -- [NOTE] Disabled generator
     -- decomposable =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.Decomposable"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.Decomposable"
     --     , (`genDecomposableModule` Canonical))
 
     -- [NOTE] Disabled generator
     -- decomposableK =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.DecomposableK"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.DecomposableK"
     --     , (`genDecomposableModule` Kompat))
 
     -- [NOTE] Disabled generator
     -- decompositions =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.Decompositions"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.Decompositions"
     --     , \m -> genDecomposeDefModule m [] [] Canonical (const True))
 
     -- [NOTE] Disabled generator
     -- decompositionsK2 =
-    --     ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK2"
+    --     ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK2"
     --     , \m -> genDecomposeDefModule m [] [] Kompat (>= 60000))
 
     -- [NOTE] Disabled generator
     -- decompositionsK =
     --     let pre = ["import qualified " <> fst decompositionsK2 <> " as DK2", ""]
     --         post = ["decompose c = DK2.decompose c"]
-    --      in ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK"
+    --      in ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK"
     --         , \m -> genDecomposeDefModule m pre post Kompat (< 60000))
 
     generalCategory =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.GeneralCategory"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.GeneralCategory"
          , genGeneralCategoryModule)
 
     simpleUpperCaseMapping =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleUpperCaseMapping"
          , \m -> genSimpleCaseMappingModule m "toSimpleUpperCase" _simpleUppercaseMapping)
 
     simpleLowerCaseMapping =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleLowerCaseMapping"
          , \m -> genSimpleCaseMappingModule m "toSimpleLowerCase" _simpleLowercaseMapping)
 
     simpleTitleCaseMapping =
-         ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping"
+         ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleTitleCaseMapping"
          , \m -> genSimpleCaseMappingModule m "toSimpleTitleCase" _simpleTitlecaseMapping)
 
     -- unicode002Test =


=====================================
libraries/ghc-internal/tools/ucd2haskell/ucd.sh
=====================================
@@ -52,7 +52,7 @@ download_files() {
     done
 }
 
-GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../")
+GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../src")
 
 # Generate the Haskell files.
 run_generator() {
@@ -63,7 +63,7 @@ run_generator() {
           --core-prop Uppercase \
           --core-prop Lowercase
         # [NOTE] disabled generator
-        #   --core-prop Alphabetic 
+        #   --core-prop Alphabetic
         #   --core-prop White_Space \
         #   --core-prop ID_Start \
         #   --core-prop ID_Continue \


=====================================
libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal
=====================================
@@ -52,10 +52,11 @@ executable ucd2haskell
   main-is: UCD2Haskell.hs
   other-modules: Parser.Text
   build-depends:
-      base             >= 4.7   && < 4.18
-    , streamly         >= 0.8   && < 0.9
+      base             >= 4.7   && < 4.20
+    , streamly-core    >= 0.2.2 && < 0.3
+    , streamly         >= 0.10   && < 0.11
     , split            >= 0.2.3 && < 0.3
     , getopt-generics  >= 0.13  && < 0.14
     , containers       >= 0.5   && < 0.7
-    , directory        >= 1.3.6 && < 1.3.7
+    , directory        >= 1.3.6 && < 1.3.8
     , filepath         >= 1.4.2 && < 1.5



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/adb1fe42c00ceeddf6a4412550d8e34ac1b49ce9

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/adb1fe42c00ceeddf6a4412550d8e34ac1b49ce9
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240601/b263a8b4/attachment-0001.html>


More information about the ghc-commits mailing list