[Git][ghc/ghc][master] Unicode: make ucd2haskell build-able again
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Sat Jun 1 13:37:06 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
adb1fe42 by Serge S. Gulin at 2024-06-01T09:35:53-04:00
Unicode: make ucd2haskell build-able again
ucd2haskell tool used streamly library which version in cabal was out of date. It is updated to the latest version at hackage with deprecated parts rewritten.
Also following fixes were applied to existing code in suppose that from its last run the code structure was changed and now it was required to be up to date with actual folder structures:
1. Ghc module path environment got a suffix with `src`.
2. Generated code got
2.1 `GHC.Internal` prefix for `Data.*`.
2.2 `GHC.Unicode.Internal` swapped on `GHC.Internal.Unicode` according to actual structure.
- - - - -
4 changed files:
- libraries/ghc-internal/tools/ucd2haskell/README.md
- libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
- libraries/ghc-internal/tools/ucd2haskell/ucd.sh
- libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal
Changes:
=====================================
libraries/ghc-internal/tools/ucd2haskell/README.md
=====================================
@@ -1,6 +1,6 @@
# Generating GHC’s Unicode modules
-`GHC.Unicode.Internal.*` modules are generated with the internal tool `ucd2haskell`.
+`GHC.Internal.Unicode.*` modules are generated with the internal tool `ucd2haskell`.
```bash
cd ucd2haskell
@@ -13,7 +13,7 @@ cd ucd2haskell
2. _Comment_ the line in `ucd.sh` with `VERIFY_CHECKSUM=y`.
3. Run `./ucd.sh download`.
4. Update the checksums in `ucd.sh` and _uncomment_ `VERIFY_CHECKSUM=y`.
-5. Run `./ucd.sh generate`. This will generate the `GHC.Unicode.Internal.*`
+5. Run `./ucd.sh generate`. This will generate the `GHC.Internal.Unicode.*`
modules.
6. Check and update the output of the tests `base/tests/unicodeXXX.hs`.
7. Compare with Python (see hereinafter) and fix any error.
@@ -32,7 +32,7 @@ __Warning:__ A Python version with the _exact same Unicode version_ is required.
Check the properties of all the characters.
```bash
-ghc -O2 tests/export_all_chars.hs
+ghc -O2 tests/export_all_chars.hs
./tests/export_all_chars > tests/all_chars.csv
python3 tests/check_all_chars.py tests/all_chars.csv
```
=====================================
libraries/ghc-internal/tools/ucd2haskell/exe/Parser/Text.hs
=====================================
@@ -21,7 +21,6 @@ module Parser.Text (genModules) where
import Control.Exception (catch, IOException)
import Control.Monad (void)
-import Control.Monad.IO.Class (MonadIO(liftIO))
import Data.Bits (Bits(..))
import Data.Word (Word8)
import Data.Char (chr, ord, isSpace)
@@ -31,19 +30,20 @@ import Data.List (intersperse, unfoldr)
import Data.List.Split (splitWhen)
import Numeric (showHex)
import Streamly.Data.Fold (Fold)
-import Streamly.Prelude (IsStream, SerialT)
import System.Directory (createDirectoryIfMissing)
import System.Environment (getEnv)
import System.FilePath ((</>), (<.>))
-- import qualified Data.Set as Set
-import qualified Streamly.Prelude as Stream
+import Streamly.Data.Stream (Stream)
+import qualified Streamly.Data.Stream.Prelude as Stream
import qualified Streamly.Data.Fold as Fold
import qualified Streamly.Internal.Data.Fold as Fold
import qualified Streamly.Data.Unfold as Unfold
import qualified Streamly.FileSystem.Handle as Handle
-import qualified System.IO as Sys
import qualified Streamly.Unicode.Stream as Unicode
+import qualified Streamly.Internal.Unicode.Stream as Unicode
+import qualified System.IO as Sys
import Prelude hiding (pred)
@@ -271,7 +271,7 @@ genUnicodeVersion outdir = do
(\(_ :: IOException) -> return "<unknown>")
Stream.fold f (Stream.fromList (body version))
where
- moduleName = "GHC.Unicode.Internal.Version"
+ moduleName = "GHC.Internal.Unicode.Version"
f = moduleFileEmitter Nothing outdir
(moduleName, \_ -> Fold.foldMap (<> "\n"))
body :: String -> [String]
@@ -284,12 +284,12 @@ genUnicodeVersion outdir = do
, "(unicodeVersion)"
, "where"
, ""
- , "import {-# SOURCE #-} Data.Version"
+ , "import {-# SOURCE #-} GHC.Internal.Data.Version"
, ""
, "-- | Version of Unicode standard used by @base@:"
, "-- [" <> version <> "](https://www.unicode.org/versions/Unicode" <> version <> "/)."
, "--"
- , "-- @since 4.15.0.0"
+ , "-- @since base-4.15.0.0"
, "unicodeVersion :: Version"
, "unicodeVersion = makeVersion [" <> mkVersion version <> "]" ]
mkVersion = foldr (\c acc -> case c of {'.' -> ',':' ':acc; _ -> c:acc}) mempty
@@ -331,8 +331,8 @@ genGeneralCategoryModule moduleName =
, "(generalCategory)"
, "where"
, ""
- , "import GHC.Base (Char, Int, Ord(..), ord)"
- , "import GHC.Unicode.Internal.Bits (lookupIntN)"
+ , "import GHC.Internal.Base (Char, Int, Ord(..), ord)"
+ , "import GHC.Internal.Unicode.Bits (lookupIntN)"
, ""
, genEnumBitmap "generalCategory" Cn (reverse acc)
]
@@ -415,7 +415,7 @@ genDecomposableModule moduleName dtype =
, "where"
, ""
, "import Data.Char (ord)"
- , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+ , "import GHC.Internal.Unicode.Bits (lookupBit64)"
, ""
, genBitmap "isDecomposable" (reverse st)
]
@@ -443,7 +443,7 @@ genCombiningClassModule moduleName =
, "where"
, ""
, "import Data.Char (ord)"
- , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+ , "import GHC.Internal.Unicode.Bits (lookupBit64)"
, ""
, "combiningClass :: Char -> Int"
, unlines (reverse st1)
@@ -566,8 +566,8 @@ genCompositionsModule moduleName compExclu non0CC =
, "(compose, composeStarters, isSecondStarter)"
, "where"
, ""
- , "import GHC.Base (Char, ord)"
- , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+ , "import GHC.Internal.Base (Char, ord)"
+ , "import GHC.Internal.Unicode.Bits (lookupBit64)"
, ""
]
@@ -616,7 +616,7 @@ genSimpleCaseMappingModule moduleName funcName field =
, "(" <> funcName <> ")"
, "where"
, ""
- , "import GHC.Base (Char)"
+ , "import GHC.Internal.Base (Char)"
, ""
]
genSign =
@@ -670,8 +670,8 @@ genCorePropertiesModule moduleName isProp =
, "(" <> unwords (intersperse "," (map prop2FuncName exports)) <> ")"
, "where"
, ""
- , "import GHC.Base (Bool, Char, Ord(..), (&&), ord)"
- , "import GHC.Unicode.Internal.Bits (lookupBit64)"
+ , "import GHC.Internal.Base (Bool, Char, Ord(..), (&&), ord)"
+ , "import GHC.Internal.Unicode.Bits (lookupBit64)"
, ""
]
@@ -818,7 +818,7 @@ parsePropertyLine ln
isDivider :: String -> Bool
isDivider x = x == "# ================================================"
-parsePropertyLines :: (IsStream t, Monad m) => t m String -> t m PropertyLine
+parsePropertyLines :: (Monad m) => Stream m String -> Stream m PropertyLine
parsePropertyLines =
Stream.splitOn isDivider
$ Fold.lmap parsePropertyLine
@@ -843,11 +843,11 @@ Parse ranges according to https://www.unicode.org/reports/tr44/#Code_Point_Range
__Note:__ this does /not/ fill missing char entries,
i.e. entries with no explicit entry nor within a range.
-}
-parseUnicodeDataLines :: forall t m. (IsStream t, Monad m) => t m String -> t m DetailedChar
+parseUnicodeDataLines :: forall m. (Monad m) => Stream m String -> Stream m DetailedChar
parseUnicodeDataLines
= Stream.unfoldMany (Unfold.unfoldr unitToRange)
. Stream.foldMany ( Fold.lmap parseDetailedChar
- $ Fold.mkFold_ step initial )
+ $ Fold.foldt' step initial id)
where
@@ -913,19 +913,14 @@ parseDetailedChar line = case splitWhen (== ';') line of
-- Generation
-------------------------------------------------------------------------------
-readLinesFromFile :: String -> SerialT IO String
+readLinesFromFile :: String -> Stream IO String
readLinesFromFile file =
withFile file Sys.ReadMode
- $ \h ->
- Stream.unfold Handle.read h & Unicode.decodeUtf8
- & unicodeLines Fold.toList
+ $ \h -> Handle.read h & Unicode.decodeUtf8 & Unicode.lines Fold.toList
where
-
- unicodeLines = Stream.splitOnSuffix (== '\n')
-
withFile file_ mode =
- Stream.bracket (liftIO $ Sys.openFile file_ mode) (liftIO . Sys.hClose)
+ Stream.bracketIO (Sys.openFile file_ mode) (Sys.hClose)
moduleToFileName :: String -> String
@@ -995,7 +990,7 @@ testOutputFileEmitter outdir (name, fldGen) = Fold.rmapM action fldGen
runGenerator ::
FilePath
-> FilePath
- -> (SerialT IO String -> SerialT IO a)
+ -> (Stream IO String -> Stream IO a)
-> FilePath
-> GeneratorRecipe a
-> IO ()
@@ -1067,64 +1062,64 @@ genModules indir outdir props = do
-- [NOTE] Disabled generator
-- propList =
- -- ("GHC.Unicode.Internal.Char.PropList"
+ -- ("GHC.Internal.Unicode.Char.PropList"
-- , (`genCorePropertiesModule` (`elem` props)))
derivedCoreProperties =
- ("GHC.Unicode.Internal.Char.DerivedCoreProperties"
+ ("GHC.Internal.Unicode.Char.DerivedCoreProperties"
, (`genCorePropertiesModule` (`elem` props)))
-- [NOTE] Disabled generator
-- compositions exc non0 =
- -- ( "GHC.Unicode.Internal.Char.UnicodeData.Compositions"
+ -- ( "GHC.Internal.Unicode.Char.UnicodeData.Compositions"
-- , \m -> genCompositionsModule m exc non0)
-- [NOTE] Disabled generator
-- combiningClass =
- -- ( "GHC.Unicode.Internal.Char.UnicodeData.CombiningClass"
+ -- ( "GHC.Internal.Unicode.Char.UnicodeData.CombiningClass"
-- , genCombiningClassModule)
-- [NOTE] Disabled generator
-- decomposable =
- -- ( "GHC.Unicode.Internal.Char.UnicodeData.Decomposable"
+ -- ( "GHC.Internal.Unicode.Char.UnicodeData.Decomposable"
-- , (`genDecomposableModule` Canonical))
-- [NOTE] Disabled generator
-- decomposableK =
- -- ( "GHC.Unicode.Internal.Char.UnicodeData.DecomposableK"
+ -- ( "GHC.Internal.Unicode.Char.UnicodeData.DecomposableK"
-- , (`genDecomposableModule` Kompat))
-- [NOTE] Disabled generator
-- decompositions =
- -- ( "GHC.Unicode.Internal.Char.UnicodeData.Decompositions"
+ -- ( "GHC.Internal.Unicode.Char.UnicodeData.Decompositions"
-- , \m -> genDecomposeDefModule m [] [] Canonical (const True))
-- [NOTE] Disabled generator
-- decompositionsK2 =
- -- ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK2"
+ -- ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK2"
-- , \m -> genDecomposeDefModule m [] [] Kompat (>= 60000))
-- [NOTE] Disabled generator
-- decompositionsK =
-- let pre = ["import qualified " <> fst decompositionsK2 <> " as DK2", ""]
-- post = ["decompose c = DK2.decompose c"]
- -- in ( "GHC.Unicode.Internal.Char.UnicodeData.DecompositionsK"
+ -- in ( "GHC.Internal.Unicode.Char.UnicodeData.DecompositionsK"
-- , \m -> genDecomposeDefModule m pre post Kompat (< 60000))
generalCategory =
- ( "GHC.Unicode.Internal.Char.UnicodeData.GeneralCategory"
+ ( "GHC.Internal.Unicode.Char.UnicodeData.GeneralCategory"
, genGeneralCategoryModule)
simpleUpperCaseMapping =
- ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping"
+ ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleUpperCaseMapping"
, \m -> genSimpleCaseMappingModule m "toSimpleUpperCase" _simpleUppercaseMapping)
simpleLowerCaseMapping =
- ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping"
+ ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleLowerCaseMapping"
, \m -> genSimpleCaseMappingModule m "toSimpleLowerCase" _simpleLowercaseMapping)
simpleTitleCaseMapping =
- ( "GHC.Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping"
+ ( "GHC.Internal.Unicode.Char.UnicodeData.SimpleTitleCaseMapping"
, \m -> genSimpleCaseMappingModule m "toSimpleTitleCase" _simpleTitlecaseMapping)
-- unicode002Test =
=====================================
libraries/ghc-internal/tools/ucd2haskell/ucd.sh
=====================================
@@ -52,7 +52,7 @@ download_files() {
done
}
-GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../")
+GHC_MODULE_PATH=$(realpath "$SCRIPT_DIR/../../src")
# Generate the Haskell files.
run_generator() {
@@ -63,7 +63,7 @@ run_generator() {
--core-prop Uppercase \
--core-prop Lowercase
# [NOTE] disabled generator
- # --core-prop Alphabetic
+ # --core-prop Alphabetic
# --core-prop White_Space \
# --core-prop ID_Start \
# --core-prop ID_Continue \
=====================================
libraries/ghc-internal/tools/ucd2haskell/ucd2haskell.cabal
=====================================
@@ -52,10 +52,11 @@ executable ucd2haskell
main-is: UCD2Haskell.hs
other-modules: Parser.Text
build-depends:
- base >= 4.7 && < 4.18
- , streamly >= 0.8 && < 0.9
+ base >= 4.7 && < 4.20
+ , streamly-core >= 0.2.2 && < 0.3
+ , streamly >= 0.10 && < 0.11
, split >= 0.2.3 && < 0.3
, getopt-generics >= 0.13 && < 0.14
, containers >= 0.5 && < 0.7
- , directory >= 1.3.6 && < 1.3.7
+ , directory >= 1.3.6 && < 1.3.8
, filepath >= 1.4.2 && < 1.5
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/adb1fe42c00ceeddf6a4412550d8e34ac1b49ce9
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/adb1fe42c00ceeddf6a4412550d8e34ac1b49ce9
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240601/b263a8b4/attachment-0001.html>
More information about the ghc-commits
mailing list