[Git][ghc/ghc][master] 4 commits: JS: handle stored null StablePtr
Marge Bot (@marge-bot)
gitlab at gitlab.haskell.org
Mon Feb 12 17:19:28 UTC 2024
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
df9fd9f7 by Sylvain Henry at 2024-02-12T12:18:42-05:00
JS: handle stored null StablePtr
Some Haskell codes unsafely cast StablePtr into ptr to compare against
NULL. E.g. in direct-sqlite:
if castStablePtrToPtr aggStPtr /= nullPtr then
where `aggStPtr` is read (`peek`) from zeroed memory initially.
We fix this by giving these StablePtr the same representation as other
null pointers. It's safe because StablePtr at offset 0 is unused (for
this exact reason).
- - - - -
55346ede by Sylvain Henry at 2024-02-12T12:18:42-05:00
JS: disable MergeObjsMode test
This isn't implemented for JS backend objects.
- - - - -
aef587f6 by Sylvain Henry at 2024-02-12T12:18:42-05:00
JS: add support for linking C sources
Support linking C sources with JS output of the JavaScript backend.
See the added documentation in the users guide.
The implementation simply extends the JS linker to use the objects (.o)
that were already produced by the emcc compiler and which were filtered
out previously. I've also added some options to control the link with C
functions (see the documentation about pragmas).
With this change I've successfully compiled the direct-sqlite package
which embeds the sqlite.c database code. Some wrappers are still
required (see the documentation about wrappers) but everything generic
enough to be reused for other libraries have been integrated into
rts/js/mem.js.
- - - - -
b71b392f by Sylvain Henry at 2024-02-12T12:18:42-05:00
JS: avoid EMCC logging spurious failure
emcc would sometime output messages like:
cache:INFO: generating system asset: symbol_lists/424b44514e43d789148e69e4e7d1c7fdc0350b79.json... (this will be cached in "/emsdk/upstream/emscripten/cache/symbol_lists/424b44514e43d789148e69e4e7d1c7fdc0350b79.json" for subsequent builds)
cache:INFO: - ok
Cf https://github.com/emscripten-core/emscripten/issues/18607
This breaks our tests matching the stderr output. We avoid this by setting EMCC_LOGGING=0
- - - - -
29 changed files:
- compiler/GHC/Driver/Config/StgToJS.hs
- compiler/GHC/Driver/Flags.hs
- compiler/GHC/Driver/Pipeline.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Runtime/Interpreter/JS.hs
- compiler/GHC/StgToJS/Linker/Linker.hs
- compiler/GHC/StgToJS/Linker/Types.hs
- compiler/GHC/StgToJS/Linker/Utils.hs
- compiler/GHC/StgToJS/Object.hs
- compiler/GHC/StgToJS/Prim.hs
- compiler/GHC/StgToJS/Types.hs
- compiler/GHC/Utils/Binary.hs
- docs/users_guide/9.10.1-notes.rst
- docs/users_guide/debugging.rst
- docs/users_guide/javascript.rst
- rts/js/mem.js
- rts/js/rts.js
- rts/js/stableptr.js
- testsuite/driver/runtests.py
- testsuite/driver/testlib.py
- testsuite/tests/driver/MergeObjsMode/A.hs
- testsuite/tests/driver/MergeObjsMode/B.hs
- testsuite/tests/driver/MergeObjsMode/all.T
- testsuite/tests/driver/all.T
- testsuite/tests/driver/recomp011/all.T
- + testsuite/tests/javascript/js-c-sources/js-c-sources01.hs
- + testsuite/tests/javascript/js-c-sources/js-c-sources01.stdout
- + testsuite/tests/javascript/js-c-sources/js-c-sources01_c.c
- + testsuite/tests/javascript/js-c-sources/js-c-sources01_js.js
Changes:
=====================================
compiler/GHC/Driver/Config/StgToJS.hs
=====================================
@@ -1,11 +1,15 @@
module GHC.Driver.Config.StgToJS
( initStgToJSConfig
+ , initJSLinkConfig
)
where
import GHC.StgToJS.Types
+import GHC.StgToJS.Linker.Types
import GHC.Driver.DynFlags
+import GHC.Driver.Config.Linker
+
import GHC.Platform.Ways
import GHC.Utils.Outputable
@@ -30,4 +34,19 @@ initStgToJSConfig dflags = StgToJSConfig
, csRuntimeAssert = False
-- settings
, csContext = initSDocContext dflags defaultDumpStyle
+ , csLinkerConfig = initLinkerConfig dflags
+ }
+
+-- | Default linker configuration
+initJSLinkConfig :: DynFlags -> JSLinkConfig
+initJSLinkConfig dflags = JSLinkConfig
+ { lcNoJSExecutables = False
+ , lcNoHsMain = False
+ , lcNoRts = False
+ , lcNoStats = False
+ , lcCombineAll = True
+ , lcForeignRefs = True
+ , lcForceEmccRts = False
+ , lcLinkCsources = not (gopt Opt_DisableJsCsources dflags)
}
+
=====================================
compiler/GHC/Driver/Flags.hs
=====================================
@@ -328,6 +328,7 @@ data GeneralFlag
-- JavaScript opts
| Opt_DisableJsMinifier -- ^ render JavaScript pretty-printed instead of minified (compacted)
+ | Opt_DisableJsCsources -- ^ don't link C sources (compiled to JS) with Haskell code (compiled to JS)
-- profiling opts
| Opt_AutoSccsOnIndividualCafs
=====================================
compiler/GHC/Driver/Pipeline.hs
=====================================
@@ -76,7 +76,6 @@ import GHC.Linker.Static.Utils
import GHC.Linker.Types
import GHC.StgToJS.Linker.Linker
-import GHC.StgToJS.Linker.Types (defaultJSLinkConfig)
import GHC.Utils.Outputable
import GHC.Utils.Error
@@ -440,7 +439,7 @@ link' logger tmpfs fc dflags unit_env batch_attempt_linking mHscMessager hpt
-- Don't showPass in Batch mode; doLink will do that for us.
case ghcLink dflags of
LinkBinary
- | backendUseJSLinker (backend dflags) -> linkJSBinary logger fc dflags unit_env obj_files pkg_deps
+ | backendUseJSLinker (backend dflags) -> linkJSBinary logger tmpfs fc dflags unit_env obj_files pkg_deps
| otherwise -> linkBinary logger tmpfs dflags unit_env obj_files pkg_deps
LinkStaticLib -> linkStaticLib logger dflags unit_env obj_files pkg_deps
LinkDynLib -> linkDynLibCheck logger tmpfs dflags unit_env obj_files pkg_deps
@@ -457,14 +456,13 @@ link' logger tmpfs fc dflags unit_env batch_attempt_linking mHscMessager hpt
return Succeeded
-linkJSBinary :: Logger -> FinderCache -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO ()
-linkJSBinary logger fc dflags unit_env obj_files pkg_deps = do
+linkJSBinary :: Logger -> TmpFs -> FinderCache -> DynFlags -> UnitEnv -> [FilePath] -> [UnitId] -> IO ()
+linkJSBinary logger tmpfs fc dflags unit_env obj_files pkg_deps = do
-- we use the default configuration for now. In the future we may expose
-- settings to the user via DynFlags.
- let lc_cfg = defaultJSLinkConfig
+ let lc_cfg = initJSLinkConfig dflags
let cfg = initStgToJSConfig dflags
- let extra_js = mempty
- jsLinkBinary fc lc_cfg cfg extra_js logger dflags unit_env obj_files pkg_deps
+ jsLinkBinary fc lc_cfg cfg logger tmpfs dflags unit_env obj_files pkg_deps
linkingNeeded :: Logger -> DynFlags -> UnitEnv -> Bool -> [Linkable] -> [UnitId] -> IO RecompileRequired
linkingNeeded logger dflags unit_env staticLink linkables pkg_deps = do
@@ -582,7 +580,7 @@ doLink hsc_env o_files = do
NoLink -> return ()
LinkBinary
| backendUseJSLinker (backend dflags)
- -> linkJSBinary logger fc dflags unit_env o_files []
+ -> linkJSBinary logger tmpfs fc dflags unit_env o_files []
| otherwise -> linkBinary logger tmpfs dflags unit_env o_files []
LinkStaticLib -> linkStaticLib logger dflags unit_env o_files []
LinkDynLib -> linkDynLibCheck logger tmpfs dflags unit_env o_files []
=====================================
compiler/GHC/Driver/Session.hs
=====================================
@@ -1904,6 +1904,7 @@ dynamic_flags_deps = [
------ JavaScript flags -----------------------------------------------
++ [ make_ord_flag defFlag "ddisable-js-minifier" (NoArg (setGeneralFlag Opt_DisableJsMinifier))
+ , make_ord_flag defFlag "ddisable-js-c-sources" (NoArg (setGeneralFlag Opt_DisableJsCsources))
]
------ Language flags -------------------------------------------------
=====================================
compiler/GHC/Runtime/Interpreter/JS.hs
=====================================
@@ -186,6 +186,18 @@ spawnJSInterp cfg = do
, instExtra = extra
}
+ -- TODO: to support incremental linking of wasm modules (e.g. produced from C
+ -- sources), we should:
+ --
+ -- 1. link the emcc rts without trimming dead code as we don't know what might
+ -- be needed later by the Wasm modules we will dynamically load (cf
+ -- -sMAIN_MODULE).
+ -- 2. make the RUN_SERVER command wait for the emcc rts to be loaded.
+ -- 3. link wasm modules with -sSIDE_MODULE
+ -- 4. add a new command to load side modules with Emscripten's dlopen
+ --
+ -- cf https://emscripten.org/docs/compiling/Dynamic-Linking.html
+
-- link rts and its deps
jsLinkRts logger tmpfs tmp_dir codegen_cfg unit_env inst
@@ -213,22 +225,26 @@ jsLinkRts logger tmpfs tmp_dir cfg unit_env inst = do
, lcForeignRefs = False -- we don't need foreign references
, lcNoJSExecutables = True -- we don't need executables
, lcNoHsMain = True -- nor HsMain
+ , lcForceEmccRts = False -- nor the emcc rts
+ , lcLinkCsources = False -- we know that there are no C sources to load for the RTS
}
-- link the RTS and its dependencies (things it uses from `base`, etc.)
let link_spec = LinkSpec
{ lks_unit_ids = [rtsUnitId, ghcInternalUnitId, primUnitId]
- , lks_obj_files = mempty
, lks_obj_root_filter = const False
, lks_extra_roots = mempty
- , lks_extra_js = mempty
+ , lks_objs_hs = mempty
+ , lks_objs_js = mempty
+ , lks_objs_cc = mempty
}
let finder_opts = instFinderOpts (instExtra inst)
finder_cache = instFinderCache (instExtra inst)
- link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache
- jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan
+ ar_cache <- newArchiveCache
+ link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache
+ jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan
-- | Link JS interpreter
jsLinkInterp :: Logger -> TmpFs -> TempDir -> StgToJSConfig -> UnitEnv -> ExtInterpInstance JSInterpExtra -> IO ()
@@ -241,6 +257,8 @@ jsLinkInterp logger tmpfs tmp_dir cfg unit_env inst = do
, lcForeignRefs = False -- we don't need foreign references
, lcNoJSExecutables = True -- we don't need executables
, lcNoHsMain = True -- nor HsMain
+ , lcForceEmccRts = False -- nor the emcc rts
+ , lcLinkCsources = True -- enable C sources, if any
}
let is_root _ = True -- FIXME: we shouldn't consider every function as a root
@@ -258,18 +276,19 @@ jsLinkInterp logger tmpfs tmp_dir cfg unit_env inst = do
-- link the interpreter and its dependencies
let link_spec = LinkSpec
{ lks_unit_ids = units
- , lks_obj_files = mempty
, lks_obj_root_filter = is_root
, lks_extra_roots = root_deps
- , lks_extra_js = mempty
+ , lks_objs_hs = mempty
+ , lks_objs_js = mempty
+ , lks_objs_cc = mempty
}
let finder_cache = instFinderCache (instExtra inst)
finder_opts = instFinderOpts (instExtra inst)
- link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache
-
- jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan
+ ar_cache <- newArchiveCache
+ link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache
+ jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan
-- | Link object files
@@ -282,6 +301,8 @@ jsLinkObjects logger tmpfs tmp_dir cfg unit_env inst objs is_root = do
, lcForeignRefs = False -- we don't need foreign references
, lcNoJSExecutables = True -- we don't need executables
, lcNoHsMain = True -- nor HsMain
+ , lcForceEmccRts = False -- nor the emcc rts
+ , lcLinkCsources = True -- enable C sources, if any
}
let units = preloadUnits (ue_units unit_env)
@@ -290,19 +311,19 @@ jsLinkObjects logger tmpfs tmp_dir cfg unit_env inst objs is_root = do
-- compute dependencies
let link_spec = LinkSpec
{ lks_unit_ids = units
- , lks_obj_files = fmap ObjFile objs
, lks_obj_root_filter = is_root
, lks_extra_roots = mempty
- , lks_extra_js = mempty
+ , lks_objs_hs = objs
+ , lks_objs_js = mempty
+ , lks_objs_cc = mempty
}
let finder_opts = instFinderOpts (instExtra inst)
finder_cache = instFinderCache (instExtra inst)
- link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache
-
- -- link
- jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan
+ ar_cache <- newArchiveCache
+ link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache
+ jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan
@@ -317,8 +338,8 @@ jsLinkObject logger tmpfs tmp_dir cfg unit_env inst obj roots = do
-- | Link the given link plan
--
-- Perform incremental linking by removing what is already linked from the plan
-jsLinkPlan :: Logger -> TmpFs -> TempDir -> JSLinkConfig -> StgToJSConfig -> ExtInterpInstance JSInterpExtra -> LinkPlan -> IO ()
-jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan = do
+jsLinkPlan :: Logger -> TmpFs -> TempDir -> ArchiveCache -> JSLinkConfig -> StgToJSConfig -> ExtInterpInstance JSInterpExtra -> LinkPlan -> IO ()
+jsLinkPlan logger tmpfs tmp_dir ar_cache link_cfg cfg inst link_plan = do
----------------------------------------------------------------
-- Get already linked stuff and compute incremental plan
----------------------------------------------------------------
@@ -333,7 +354,7 @@ jsLinkPlan logger tmpfs tmp_dir link_cfg cfg inst link_plan = do
----------------------------------------------------------------
tmp_out <- newTempSubDir logger tmpfs tmp_dir
- void $ jsLink link_cfg cfg logger tmp_out diff_plan
+ void $ jsLink link_cfg cfg logger tmpfs ar_cache tmp_out diff_plan
-- Code has been linked into the following files:
-- - generated rts from tmp_out/rts.js (depends on link options)
=====================================
compiler/GHC/StgToJS/Linker/Linker.hs
=====================================
@@ -1,6 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE BlockArguments #-}
-----------------------------------------------------------------------------
-- |
@@ -32,10 +33,12 @@ module GHC.StgToJS.Linker.Linker
, LinkPlan (..)
, emptyLinkPlan
, incrementLinkPlan
+ , ArchiveCache
+ , newArchiveCache
)
where
-import Prelude
+import GHC.Prelude
import GHC.Platform.Host (hostPlatformArchOS)
@@ -54,6 +57,7 @@ import GHC.SysTools
import GHC.Linker.Static.Utils (exeFileName)
import GHC.Linker.Types (Unlinked(..), linkableUnlinked)
+import GHC.Linker.External
import GHC.StgToJS.Linker.Types
import GHC.StgToJS.Linker.Utils
@@ -78,7 +82,6 @@ import GHC.Utils.Error
import GHC.Utils.Logger (Logger, logVerbAtLeast)
import GHC.Utils.Binary
import qualified GHC.Utils.Ppr as Ppr
-import GHC.Utils.Monad
import GHC.Utils.TmpFs
import GHC.Types.Unique.Set
@@ -105,6 +108,7 @@ import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as S
import Data.Word
+import Data.Monoid
import System.IO
import System.FilePath ((<.>), (</>), dropExtension, takeDirectory)
@@ -125,10 +129,10 @@ data LinkerStats = LinkerStats
, packedMetaDataSize :: !Word64 -- ^ number of bytes for metadata
}
-newtype ArchiveState = ArchiveState { loadedArchives :: IORef (Map FilePath Ar.Archive) }
+newtype ArchiveCache = ArchiveCache { loadedArchives :: IORef (Map FilePath Ar.Archive) }
-emptyArchiveState :: IO ArchiveState
-emptyArchiveState = ArchiveState <$> newIORef M.empty
+newArchiveCache :: IO ArchiveCache
+newArchiveCache = ArchiveCache <$> newIORef M.empty
defaultJsContext :: SDocContext
defaultJsContext = defaultSDocContext{sdocStyle = PprCode}
@@ -137,52 +141,70 @@ jsLinkBinary
:: FinderCache
-> JSLinkConfig
-> StgToJSConfig
- -> [FilePath]
-> Logger
+ -> TmpFs
-> DynFlags
-> UnitEnv
-> [FilePath]
-> [UnitId]
-> IO ()
-jsLinkBinary finder_cache lc_cfg cfg js_srcs logger dflags unit_env objs dep_units
+jsLinkBinary finder_cache lc_cfg cfg logger tmpfs dflags unit_env hs_objs dep_units
| lcNoJSExecutables lc_cfg = return ()
| otherwise = do
+
-- additional objects to link are passed as FileOption ldInputs...
let cmdline_objs = [ f | FileOption _ f <- ldInputs dflags ]
- -- discriminate JavaScript sources from real object files.
- (cmdline_js_srcs, cmdline_js_objs) <- partitionM isJsFile cmdline_objs
+
+ -- cmdline objects: discriminate between the 3 kinds of objects we have
+ let disc hss jss ccs = \case
+ [] -> pure (hss, jss, ccs)
+ (o:os) -> getObjectKind o >>= \case
+ Just ObjHs -> disc (o:hss) jss ccs os
+ Just ObjJs -> disc hss (o:jss) ccs os
+ Just ObjCc -> disc hss jss (o:ccs) os
+ Nothing -> do
+ logInfo logger (vcat [text "Ignoring unexpected command-line object: ", text o])
+ disc hss jss ccs os
+ (cmdline_hs_objs, cmdline_js_objs, cmdline_cc_objs) <- disc [] [] [] cmdline_objs
+
let
- objs' = map ObjFile (objs ++ cmdline_js_objs)
- js_srcs' = js_srcs ++ cmdline_js_srcs
- is_root _ = True -- FIXME: we shouldn't consider every function as a root,
- -- but only the program entry point (main), either the
- -- generated one or coming from an object
- exe = jsExeFileName dflags
+ exe = jsExeFileName dflags
+ all_hs_objs = hs_objs ++ cmdline_hs_objs
+ all_js_objs = cmdline_js_objs
+ all_cc_objs = cmdline_cc_objs
+ is_root _ = True
+ -- FIXME: we shouldn't consider every function as a root,
+ -- but only the program entry point (main), either the
+ -- generated one or coming from an object
-- compute dependencies
let link_spec = LinkSpec
{ lks_unit_ids = dep_units
- , lks_obj_files = objs'
, lks_obj_root_filter = is_root
, lks_extra_roots = mempty
- , lks_extra_js = js_srcs'
+ , lks_objs_hs = all_hs_objs
+ , lks_objs_js = all_js_objs
+ , lks_objs_cc = all_cc_objs
}
let finder_opts = initFinderOpts dflags
+ ar_cache <- newArchiveCache
- link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache
+ link_plan <- computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache
- void $ jsLink lc_cfg cfg logger exe link_plan
+ void $ jsLink lc_cfg cfg logger tmpfs ar_cache exe link_plan
-- | link and write result to disk (jsexe directory)
jsLink
:: JSLinkConfig
-> StgToJSConfig
-> Logger
+ -> TmpFs
+ -> ArchiveCache
-> FilePath -- ^ output file/directory
-> LinkPlan
-> IO ()
-jsLink lc_cfg cfg logger out link_plan = do
+jsLink lc_cfg cfg logger tmpfs ar_cache out link_plan = do
-- create output directory
createDirectoryIfMissing False out
@@ -194,11 +216,11 @@ jsLink lc_cfg cfg logger out link_plan = do
-- link all Haskell code (program + dependencies) into out.js
-- retrieve code for Haskell dependencies
- mods <- collectModuleCodes link_plan
+ mods <- collectModuleCodes ar_cache link_plan
-- LTO + rendering of JS code
link_stats <- withBinaryFile (out </> "out.js") WriteMode $ \h ->
- renderLinker h (csPrettyRender cfg) mods (lkp_extra_js link_plan)
+ renderModules h (csPrettyRender cfg) mods
-------------------------------------------------------------
@@ -224,49 +246,128 @@ jsLink lc_cfg cfg logger out link_plan = do
void $
hPutJS (csPrettyRender cfg) h (jsOptimize $ runJSM jsm $ jStgStatToJS <$> rts cfg)
- -- link dependencies' JS files into lib.js
- withBinaryFile (out </> "lib.js") WriteMode $ \h -> do
- forM_ (lkp_archives link_plan) $ \archive_file -> do
- Ar.Archive entries <- Ar.loadAr archive_file
- forM_ entries $ \entry -> do
- case getJsArchiveEntry entry of
- Nothing -> return ()
- Just bs -> do
- B.hPut h bs
+ -- link user-provided JS files into lib.js
+ (emcc_opts,lib_cc_objs) <- withBinaryFile (out </> "lib.js") WriteMode $ \h -> do
+
+ let
+ tmp_dir = linkerTempDir (csLinkerConfig cfg)
+
+ -- JS objects from dependencies' archives (.a)
+ go_archives emcc_opts cc_objs = \case
+ [] -> pure (emcc_opts, cc_objs)
+ (a:as) -> do
+ Ar.Archive entries <- loadArchive ar_cache a
+ (emcc_opts', cc_objs') <- go_entries emcc_opts cc_objs entries
+ go_archives emcc_opts' cc_objs' as
+
+ -- archive's entries
+ go_entries emcc_opts cc_objs = \case
+ [] -> pure (emcc_opts, cc_objs)
+ (e:es) -> case getObjectKindBS (Ar.filedata e) of
+ Just ObjHs -> do
+ -- Nothing to do. HS objects are collected in
+ -- collectModuleCodes
+ go_entries emcc_opts cc_objs es
+ Just ObjCc -> do
+ -- extract the object file from the archive in a temporary
+ -- file and return its path
+ cc_obj_fn <- newTempName logger tmpfs tmp_dir TFL_CurrentModule "o"
+ B.writeFile cc_obj_fn (Ar.filedata e)
+ let cc_objs' = cc_obj_fn:cc_objs
+ go_entries emcc_opts cc_objs' es
+ Just ObjJs -> do
+ -- extract the JS code and append it to the `lib.js` file
+ (opts,bs) <- parseJSObjectBS (Ar.filedata e)
+ B.hPut h bs
+ hPutChar h '\n'
+ let emcc_opts' = emcc_opts <> opts
+ go_entries emcc_opts' cc_objs es
+ Nothing -> do
+ logInfo logger (vcat [text "Ignoring unexpected archive entry: ", text (Ar.filename e)])
+ go_entries emcc_opts cc_objs es
+
+ -- additional JS objects (e.g. from the command-line)
+ go_extra emcc_opts = \case
+ [] -> pure emcc_opts
+ (e:es) -> do
+ (opts,bs) <- readJSObject e
+ B.hPut h bs
hPutChar h '\n'
+ let emcc_opts' = emcc_opts <> opts
+ go_extra emcc_opts' es
+
+ -- archives
+ (emcc_opts0, cc_objs) <- go_archives defaultJSOptions [] (S.toList (lkp_archives link_plan))
+ -- extra object files
+ emcc_opts1 <- go_extra emcc_opts0 (S.toList (lkp_objs_js link_plan))
+ pure (emcc_opts1,cc_objs)
+
+
+ -- Link Cc objects using emcc's linker
+ --
+ -- Cc objects have been extracted from archives (see above) and are listed
+ -- in lib_cc_objs.
+ --
+ -- We don't link C sources if there are none (obviously) or if asked
+ -- explicitly by the user with -ddisable-js-c-sources (mostly used for
+ -- debugging purpose).
+ let emcc_objs = lib_cc_objs ++ S.toList (lkp_objs_cc link_plan)
+ let has_emcc_objs = not (null emcc_objs)
+ let link_c_sources = lcLinkCsources lc_cfg && has_emcc_objs
+
+ when link_c_sources $ do
+
+ runLink logger tmpfs (csLinkerConfig cfg) $
+ [ Option "-o"
+ , FileOption "" (out </> "clibs.js")
+ -- Embed wasm files into a single .js file
+ , Option "-sSINGLE_FILE=1"
+ -- Enable support for addFunction (callbacks)
+ , Option "-sALLOW_TABLE_GROWTH"
+ -- keep some RTS methods and functions (otherwise removed as dead
+ -- code)
+ , Option ("-sEXPORTED_RUNTIME_METHODS=" ++ concat (intersperse "," (emccExportedRuntimeMethods emcc_opts)))
+ , Option ("-sEXPORTED_FUNCTIONS=" ++ concat (intersperse "," (emccExportedFunctions emcc_opts)))
+ ]
+ -- pass extra options from JS files' pragmas
+ ++ map Option (emccExtraOptions emcc_opts)
+ -- link objects
+ ++ map (FileOption "") emcc_objs
+
+ -- Don't enable the Emcc rts when not needed (i.e. no Wasm module to link
+ -- with) and not forced by the caller (e.g. in the future iserv may require
+ -- incremental linking of Wasm modules, hence the emcc rts even building
+ -- iserv itself doesn't require the emcc rts)
+ let use_emcc_rts = UseEmccRts $ link_c_sources || lcForceEmccRts lc_cfg
+
-- link everything together into a runnable all.js
-- only if we link a complete application,
-- no incremental linking and no skipped parts
when (lcCombineAll lc_cfg && not (lcNoRts lc_cfg)) $ do
- _ <- combineFiles lc_cfg out
+ writeRunMain out use_emcc_rts
+ _ <- combineFiles lc_cfg link_c_sources out
writeHtml out
- writeRunMain out
writeRunner lc_cfg out
writeExterns out
data LinkSpec = LinkSpec
{ lks_unit_ids :: [UnitId]
-
- , lks_obj_files :: [LinkedObj]
-
- , lks_obj_root_filter :: ExportedFun -> Bool
- -- ^ Predicate for exported functions in objects to declare as root
-
- , lks_extra_roots :: Set ExportedFun
- -- ^ Extra root functions from loaded units
-
- , lks_extra_js :: [FilePath]
- -- ^ Extra JS files to link
+ , lks_obj_root_filter :: ExportedFun -> Bool -- ^ Predicate for exported functions in objects to declare as root
+ , lks_extra_roots :: Set ExportedFun -- ^ Extra root functions from loaded units
+ , lks_objs_hs :: [FilePath] -- ^ HS objects to link
+ , lks_objs_js :: [FilePath] -- ^ JS objects to link
+ , lks_objs_cc :: [FilePath] -- ^ Cc objects to link
}
instance Outputable LinkSpec where
ppr s = hang (text "LinkSpec") 2 $ vcat
[ hcat [text "Unit ids: ", ppr (lks_unit_ids s)]
- , hcat [text "Object files:", ppr (lks_obj_files s)]
+ , hcat [text "HS objects:", vcat (fmap text (lks_objs_hs s))]
+ , hang (text "JS objects::") 2 (vcat (fmap text (lks_objs_js s)))
+ , hang (text "Cc objects::") 2 (vcat (fmap text (lks_objs_cc s)))
, text "Object root filter: <function>"
, hcat [text "Extra roots: ", ppr (lks_extra_roots s)]
- , hang (text "Extra JS:") 2 (vcat (fmap text (lks_extra_js s)))
]
emptyLinkPlan :: LinkPlan
@@ -274,7 +375,8 @@ emptyLinkPlan = LinkPlan
{ lkp_block_info = mempty
, lkp_dep_blocks = mempty
, lkp_archives = mempty
- , lkp_extra_js = mempty
+ , lkp_objs_js = mempty
+ , lkp_objs_cc = mempty
}
-- | Given a `base` link plan (assumed to be already linked) and a `new` link
@@ -289,13 +391,15 @@ incrementLinkPlan base new = (diff,total)
{ lkp_block_info = M.union (lkp_block_info base) (lkp_block_info new)
, lkp_dep_blocks = S.union (lkp_dep_blocks base) (lkp_dep_blocks new)
, lkp_archives = S.union (lkp_archives base) (lkp_archives new)
- , lkp_extra_js = S.union (lkp_extra_js base) (lkp_extra_js new)
+ , lkp_objs_js = S.union (lkp_objs_js base) (lkp_objs_js new)
+ , lkp_objs_cc = S.union (lkp_objs_cc base) (lkp_objs_cc new)
}
diff = LinkPlan
{ lkp_block_info = lkp_block_info new -- block info from "new" contains all we need to load new blocks
, lkp_dep_blocks = S.difference (lkp_dep_blocks new) (lkp_dep_blocks base)
, lkp_archives = S.difference (lkp_archives new) (lkp_archives base)
- , lkp_extra_js = S.difference (lkp_extra_js new) (lkp_extra_js base)
+ , lkp_objs_js = S.difference (lkp_objs_js new) (lkp_objs_js base)
+ , lkp_objs_cc = S.difference (lkp_objs_cc new) (lkp_objs_cc base)
}
@@ -305,11 +409,14 @@ computeLinkDependencies
-> LinkSpec
-> FinderOpts
-> FinderCache
+ -> ArchiveCache
-> IO LinkPlan
-computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do
+computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache ar_cache = do
let units = lks_unit_ids link_spec
- let obj_files = lks_obj_files link_spec
+ let hs_objs = lks_objs_hs link_spec
+ let js_objs = lks_objs_js link_spec
+ let cc_objs = lks_objs_cc link_spec
let extra_roots = lks_extra_roots link_spec
let obj_is_root = lks_obj_root_filter link_spec
@@ -323,7 +430,7 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do
-- find/load linkable on-demand when a module is missing.
- (objs_block_info, objs_required_blocks) <- loadObjBlockInfo obj_files
+ (objs_block_info, objs_required_blocks) <- loadObjBlockInfo hs_objs
let obj_roots = S.fromList . filter obj_is_root $ concatMap (M.keys . bi_exports . lbi_info) (M.elems objs_block_info)
obj_units = map moduleUnitId $ nub (M.keys objs_block_info)
@@ -343,7 +450,7 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do
let all_units = fmap unitId all_units_infos
dep_archives <- getPackageArchives cfg unit_env all_units
- (archives_block_info, archives_required_blocks) <- loadArchiveBlockInfo dep_archives
+ (archives_block_info, archives_required_blocks) <- loadArchiveBlockInfo ar_cache dep_archives
-- compute dependencies
let block_info = objs_block_info `M.union` archives_block_info
@@ -378,7 +485,7 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do
case linkableUnlinked linkable of
[DotO p] -> do
- (bis, req_b) <- loadObjBlockInfo [ObjFile p]
+ (bis, req_b) <- loadObjBlockInfo [p]
-- Store new required blocks in IORef
modifyIORef new_required_blocks_var ((++) req_b)
case M.lookup mod bis of
@@ -405,7 +512,8 @@ computeLinkDependencies cfg unit_env link_spec finder_opts finder_cache = do
{ lkp_block_info = updated_block_info
, lkp_dep_blocks = all_deps
, lkp_archives = S.fromList dep_archives
- , lkp_extra_js = S.fromList (lks_extra_js link_spec)
+ , lkp_objs_js = S.fromList js_objs
+ , lkp_objs_cc = S.fromList cc_objs
}
return plan
@@ -450,19 +558,17 @@ hPutJS render_pretty h = \case
pure $! (after - before)
-- | Link modules and pretty-print them into the given Handle
-renderLinker
+renderModules
:: Handle
-> Bool -- ^ should we render readable JS for debugging?
-> [ModuleCode] -- ^ linked code per module
- -> Set FilePath -- ^ additional JS files
-> IO LinkerStats
-renderLinker h render_pretty mods js_files = do
+renderModules h render_pretty mods = do
-- link modules
let (compacted_mods, meta) = linkModules mods
let
- putBS = B.hPut h
putJS = hPutJS render_pretty h
---------------------------------------------------------
@@ -482,13 +588,10 @@ renderLinker h render_pretty mods js_files = do
!meta_length <- fromIntegral <$> putJS (jsOptimize meta)
-- module exports
- mapM_ (putBS . cmc_exports) compacted_mods
-
- -- explicit additional JS files
- mapM_ (\i -> B.readFile i >>= putBS) (S.toList js_files)
+ mapM_ (B.hPut h . cmc_exports) compacted_mods
-- stats
- let link_stats = LinkerStats
+ let !link_stats = LinkerStats
{ bytesPerModule = M.fromList mod_sizes
, packedMetaDataSize = meta_length
}
@@ -547,15 +650,20 @@ getPackageArchives cfg unit_env units =
-- | Combine rts.js, lib.js, out.js to all.js that can be run
-- directly with node.js or SpiderMonkey jsshell
combineFiles :: JSLinkConfig
+ -> Bool -- has clibs.js
-> FilePath
-> IO ()
-combineFiles cfg fp = do
- let files = map (fp </>) ["rts.js", "lib.js", "out.js"]
- withBinaryFile (fp </> "all.js") WriteMode $ \h -> do
- let cpy i = B.readFile i >>= B.hPut h
- mapM_ cpy files
- unless (lcNoHsMain cfg) $ do
- B.hPut h runMainJS
+combineFiles cfg has_clibs fp = do
+ let files = map (fp </>) $ catMaybes
+ [ Just "rts.js"
+ , Just "lib.js"
+ , Just "out.js"
+ , if has_clibs then Just "clibs.js" else Nothing
+ , if lcNoHsMain cfg then Nothing else Just "runmain.js"
+ ]
+ withBinaryFile (fp </> "all.js") WriteMode $ \h ->
+ forM_ files $ \i ->
+ B.readFile i >>= B.hPut h
-- | write the index.html file that loads the program if it does not exit
writeHtml
@@ -583,15 +691,21 @@ templateHtml =
-- index.html is loaded
writeRunMain
:: FilePath -- ^ output directory
+ -> UseEmccRts
-> IO ()
-writeRunMain out = do
+writeRunMain out use_emcc_rts = do
let runMainFile = out </> "runmain.js"
- e <- doesFileExist runMainFile
- unless e $
- B.writeFile runMainFile runMainJS
+ B.writeFile runMainFile (runMainJS use_emcc_rts)
-runMainJS :: B.ByteString
-runMainJS = "h$main(h$mainZCZCMainzimain);\n"
+newtype UseEmccRts = UseEmccRts Bool
+
+runMainJS :: UseEmccRts -> B.ByteString
+runMainJS (UseEmccRts use_emcc_rts) = if use_emcc_rts
+ then "Module['onRuntimeInitialized'] = function() {\n\
+ \h$initEmscriptenHeap();\n\
+ \h$main(h$mainZCZCMainzimain);\n\
+ \}\n"
+ else "h$main(h$mainZCZCMainzimain);\n"
writeRunner :: JSLinkConfig -- ^ Settings
-> FilePath -- ^ Output directory
@@ -711,8 +825,8 @@ getDeps init_infos load_info root_funs root_blocks = traverse_funs init_infos S.
in open `S.union` S.fromList (filter (not . alreadyLinked) new_blocks)
-- | collect dependencies for a set of roots
-collectModuleCodes :: LinkPlan -> IO [ModuleCode]
-collectModuleCodes link_plan = do
+collectModuleCodes :: ArchiveCache -> LinkPlan -> IO [ModuleCode]
+collectModuleCodes ar_cache link_plan = do
let block_info = lkp_block_info link_plan
let blocks = lkp_dep_blocks link_plan
@@ -738,13 +852,12 @@ collectModuleCodes link_plan = do
sorted_module_blocks = sortBy cmp (M.toList module_blocks)
-- load blocks
- ar_state <- emptyArchiveState
forM sorted_module_blocks $ \(mod,bids) -> do
case M.lookup mod block_info of
Nothing -> pprPanic "collectModuleCodes: couldn't find block info for module" (ppr mod)
- Just lbi -> extractBlocks ar_state lbi bids
+ Just lbi -> extractBlocks ar_cache lbi bids
-extractBlocks :: ArchiveState -> LocatedBlockInfo -> BlockIds -> IO ModuleCode
+extractBlocks :: ArchiveCache -> LocatedBlockInfo -> BlockIds -> IO ModuleCode
extractBlocks ar_state lbi blocks = do
case lbi_loc lbi of
ObjectFile fp -> do
@@ -771,16 +884,22 @@ extractBlocks ar_state lbi blocks = do
, mc_frefs = concatMap oiFImports l
}
-readArObject :: ArchiveState -> Module -> FilePath -> IO Object
-readArObject ar_state mod ar_file = do
- loaded_ars <- readIORef (loadedArchives ar_state)
- (Ar.Archive entries) <- case M.lookup ar_file loaded_ars of
+-- | Load an archive in memory and store it in the cache for future loads.
+loadArchive :: ArchiveCache -> FilePath -> IO Ar.Archive
+loadArchive ar_cache ar_file = do
+ loaded_ars <- readIORef (loadedArchives ar_cache)
+ case M.lookup ar_file loaded_ars of
Just a -> pure a
Nothing -> do
a <- Ar.loadAr ar_file
- modifyIORef (loadedArchives ar_state) (M.insert ar_file a)
+ modifyIORef (loadedArchives ar_cache) (M.insert ar_file a)
pure a
+
+readArObject :: ArchiveCache -> Module -> FilePath -> IO Object
+readArObject ar_cache mod ar_file = do
+ Ar.Archive entries <- loadArchive ar_cache ar_file
+
-- look for the right object in archive
let go_entries = \case
-- XXX this shouldn't be an exception probably
@@ -888,15 +1007,16 @@ mkExportedModFuns mod symbols = map mk_fun symbols
mk_fun sym = ExportedFun mod (LexicalFastString sym)
-- | read all dependency data from the to-be-linked files
-loadObjBlockInfo :: [LinkedObj] -- ^ object files to link
- -> IO (Map Module LocatedBlockInfo, [BlockRef])
+loadObjBlockInfo
+ :: [FilePath] -- ^ object files to link
+ -> IO (Map Module LocatedBlockInfo, [BlockRef])
loadObjBlockInfo objs = (prepareLoadedDeps . catMaybes) <$> mapM readBlockInfoFromObj objs
-- | Load dependencies for the Linker from Ar
-loadArchiveBlockInfo :: [FilePath] -> IO (Map Module LocatedBlockInfo, [BlockRef])
-loadArchiveBlockInfo archives = do
+loadArchiveBlockInfo :: ArchiveCache -> [FilePath] -> IO (Map Module LocatedBlockInfo, [BlockRef])
+loadArchiveBlockInfo ar_cache archives = do
archDeps <- forM archives $ \file -> do
- (Ar.Archive entries) <- Ar.loadAr file
+ (Ar.Archive entries) <- loadArchive ar_cache file
catMaybes <$> mapM (readEntry file) entries
return (prepareLoadedDeps $ concat archDeps)
where
@@ -911,34 +1031,6 @@ loadArchiveBlockInfo archives = do
let !info = objBlockInfo obj
pure $ Just (LocatedBlockInfo (ArchiveFile ar_file) info)
--- | Predicate to check that an entry in Ar is a JS source
--- and to return it without its header
-getJsArchiveEntry :: Ar.ArchiveEntry -> Maybe B.ByteString
-getJsArchiveEntry entry = getJsBS (Ar.filedata entry)
-
--- | Predicate to check that a file is a JS source
-isJsFile :: FilePath -> IO Bool
-isJsFile fp = withBinaryFile fp ReadMode $ \h -> do
- bs <- B.hGet h jsHeaderLength
- pure (isJsBS bs)
-
-isJsBS :: B.ByteString -> Bool
-isJsBS bs = isJust (getJsBS bs)
-
--- | Get JS source with its header (if it's one)
-getJsBS :: B.ByteString -> Maybe B.ByteString
-getJsBS bs = B.stripPrefix jsHeader bs
-
--- Header added to JS sources to discriminate them from other object files.
--- They all have .o extension but JS sources have this header.
-jsHeader :: B.ByteString
-jsHeader = "//JavaScript"
-
-jsHeaderLength :: Int
-jsHeaderLength = B.length jsHeader
-
-
-
prepareLoadedDeps :: [LocatedBlockInfo]
-> (Map Module LocatedBlockInfo, [BlockRef])
prepareLoadedDeps lbis = (module_blocks, must_link)
@@ -956,25 +1048,21 @@ requiredBlocks d = map mk_block_ref (IS.toList $ bi_must_link d)
-- | read block info from an object that might have already been into memory
-- pulls in all Deps from an archive
-readBlockInfoFromObj :: LinkedObj -> IO (Maybe LocatedBlockInfo)
-readBlockInfoFromObj = \case
- ObjLoaded name obj -> do
- let !info = objBlockInfo obj
- pure $ Just (LocatedBlockInfo (InMemory name obj) info)
- ObjFile file -> do
- readObjectBlockInfo file >>= \case
- Nothing -> pure Nothing
- Just info -> pure $ Just (LocatedBlockInfo (ObjectFile file) info)
-
-
--- | Embed a JS file into a .o file
---
--- The JS file is merely copied into a .o file with an additional header
--- ("//Javascript") in order to be recognized later on.
+readBlockInfoFromObj :: FilePath -> IO (Maybe LocatedBlockInfo)
+readBlockInfoFromObj file = do
+ readObjectBlockInfo file >>= \case
+ Nothing -> pure Nothing
+ Just info -> pure $ Just (LocatedBlockInfo (ObjectFile file) info)
+
+
+-- | Embed a JS file into a JS object .o file
--
-- JS files may contain option pragmas of the form: //#OPTIONS:
--- For now, only the CPP option is supported. If the CPP option is set, we
--- append some common CPP definitions to the file and call cpp on it.
+-- One of those is //#OPTIONS:CPP. When it is set, we append some common CPP
+-- definitions to the file and call cpp on it.
+--
+-- Other options (e.g. EMCC additional flags for link time) are stored in the
+-- JS object header. See JSOptions.
embedJsFile :: Logger -> DynFlags -> TmpFs -> UnitEnv -> FilePath -> FilePath -> IO ()
embedJsFile logger dflags tmpfs unit_env input_fn output_fn = do
let profiling = False -- FIXME: add support for profiling way
@@ -984,12 +1072,14 @@ embedJsFile logger dflags tmpfs unit_env input_fn output_fn = do
-- the header lets the linker recognize processed JavaScript files
-- But don't add JavaScript header to object files!
- -- header appended to JS files stored as .o to recognize them.
- let header = "//JavaScript\n"
- jsFileNeedsCpp input_fn >>= \case
- False -> copyWithHeader header input_fn output_fn
- True -> do
+ -- read pragmas from JS file
+ -- we need to store them explicitly as they can be removed by CPP.
+ opts <- getOptionsFromJsFile input_fn
+ -- run CPP if needed
+ cpp_fn <- case enableCPP opts of
+ False -> pure input_fn
+ True -> do
-- append common CPP definitions to the .js file.
-- They define macros that avoid directly wiring zencoded names
-- in RTS JS files
@@ -1011,13 +1101,11 @@ embedJsFile logger dflags tmpfs unit_env input_fn output_fn = do
cpp_opts
pp_fn
js_fn
- -- add header to recognize the object as a JS file
- copyWithHeader header js_fn output_fn
+ pure js_fn
-jsFileNeedsCpp :: FilePath -> IO Bool
-jsFileNeedsCpp fn = do
- opts <- getOptionsFromJsFile fn
- pure (CPP `elem` opts)
+ -- write JS object
+ cpp_bs <- B.readFile cpp_fn
+ writeJSObject opts cpp_bs output_fn
-- | Link module codes.
--
=====================================
compiler/GHC/StgToJS/Linker/Types.hs
=====================================
@@ -18,8 +18,6 @@
module GHC.StgToJS.Linker.Types
( JSLinkConfig (..)
- , defaultJSLinkConfig
- , LinkedObj (..)
, LinkPlan (..)
)
where
@@ -27,7 +25,7 @@ where
import GHC.StgToJS.Object
import GHC.Unit.Types
-import GHC.Utils.Outputable (hsep,Outputable(..),text,ppr, hang, IsDoc (vcat), IsLine (hcat))
+import GHC.Utils.Outputable (Outputable(..),text,ppr, hang, IsDoc (vcat), IsLine (hcat))
import Data.Map.Strict (Map)
import Data.Set (Set)
@@ -42,23 +40,18 @@ import Prelude
--------------------------------------------------------------------------------
data JSLinkConfig = JSLinkConfig
- { lcNoJSExecutables :: !Bool -- ^ Dont' build JS executables
- , lcNoHsMain :: !Bool -- ^ Don't generate Haskell main entry
- , lcNoRts :: !Bool -- ^ Don't dump the generated RTS
- , lcNoStats :: !Bool -- ^ Disable .stats file generation
- , lcForeignRefs :: !Bool -- ^ Dump .frefs (foreign references) files
- , lcCombineAll :: !Bool -- ^ Generate all.js (combined js) + wrappers
- }
-
--- | Default linker configuration
-defaultJSLinkConfig :: JSLinkConfig
-defaultJSLinkConfig = JSLinkConfig
- { lcNoJSExecutables = False
- , lcNoHsMain = False
- , lcNoRts = False
- , lcNoStats = False
- , lcCombineAll = True
- , lcForeignRefs = True
+ { lcNoJSExecutables :: !Bool -- ^ Dont' build JS executables
+ , lcNoHsMain :: !Bool -- ^ Don't generate Haskell main entry
+ , lcNoRts :: !Bool -- ^ Don't dump the generated RTS
+ , lcNoStats :: !Bool -- ^ Disable .stats file generation
+ , lcForeignRefs :: !Bool -- ^ Dump .frefs (foreign references) files
+ , lcCombineAll :: !Bool -- ^ Generate all.js (combined js) + wrappers
+ , lcForceEmccRts :: !Bool
+ -- ^ Force the link with the emcc rts. Use this if you plan to dynamically
+ -- load wasm modules made from C files (e.g. in iserv).
+ , lcLinkCsources :: !Bool
+ -- ^ Link C sources (compiled to JS/Wasm) with Haskell code compiled to
+ -- JS. This implies the use of the Emscripten RTS to load this code.
}
data LinkPlan = LinkPlan
@@ -68,11 +61,15 @@ data LinkPlan = LinkPlan
, lkp_dep_blocks :: Set BlockRef
-- ^ Blocks to link
- , lkp_archives :: Set FilePath
- -- ^ Archives to load JS sources from
+ , lkp_archives :: !(Set FilePath)
+ -- ^ Archives to load JS and Cc sources from (JS code corresponding to
+ -- Haskell code is handled with blocks above)
- , lkp_extra_js :: Set FilePath
- -- ^ Extra JS files to link
+ , lkp_objs_js :: !(Set FilePath)
+ -- ^ JS objects to link
+
+ , lkp_objs_cc :: !(Set FilePath)
+ -- ^ Cc objects to link
}
instance Outputable LinkPlan where
@@ -81,20 +78,7 @@ instance Outputable LinkPlan where
-- plan, just meta info used to retrieve actual block contents
-- [ hcat [ text "Block info: ", ppr (lkp_block_info s)]
[ hcat [ text "Blocks: ", ppr (S.size (lkp_dep_blocks s))]
- , hang (text "JS files from archives:") 2 (vcat (fmap text (S.toList (lkp_archives s))))
- , hang (text "Extra JS:") 2 (vcat (fmap text (S.toList (lkp_extra_js s))))
+ , hang (text "Archives:") 2 (vcat (fmap text (S.toList (lkp_archives s))))
+ , hang (text "Extra JS objects:") 2 (vcat (fmap text (S.toList (lkp_objs_js s))))
+ , hang (text "Extra Cc objects:") 2 (vcat (fmap text (S.toList (lkp_objs_cc s))))
]
-
---------------------------------------------------------------------------------
--- Linker Environment
---------------------------------------------------------------------------------
-
--- | An object file that's either already in memory (with name) or on disk
-data LinkedObj
- = ObjFile FilePath -- ^ load from this file
- | ObjLoaded String Object -- ^ already loaded: description and payload
-
-instance Outputable LinkedObj where
- ppr = \case
- ObjFile fp -> hsep [text "ObjFile", text fp]
- ObjLoaded s o -> hsep [text "ObjLoaded", text s, ppr (objModuleName o)]
=====================================
compiler/GHC/StgToJS/Linker/Utils.hs
=====================================
@@ -17,9 +17,7 @@
-----------------------------------------------------------------------------
module GHC.StgToJS.Linker.Utils
- ( getOptionsFromJsFile
- , JSOption(..)
- , jsExeFileName
+ ( jsExeFileName
, getInstalledPackageLibDirs
, getInstalledPackageHsLibs
, commonCppDefs
@@ -41,11 +39,7 @@ import GHC.StgToJS.Types
import Prelude
import GHC.Platform
-import GHC.Utils.Misc
import Data.List (isPrefixOf)
-import System.IO
-import Data.Char (isSpace)
-import qualified Control.Exception as Exception
import GHC.Builtin.Types
import Language.Haskell.Syntax.Basic
@@ -253,6 +247,10 @@ genCommonCppDefs profiling = mconcat
, "#define RETURN_UBX_TUP9(x1,x2,x3,x4,x5,x6,x7,x8,x9) { h$ret1 = (x2); h$ret2 = (x3); h$ret3 = (x4); h$ret4 = (x5); h$ret5 = (x6); h$ret6 = (x7); h$ret7 = (x8); h$ret8 = (x9); return (x1); }\n"
, "#define RETURN_UBX_TUP10(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10) { h$ret1 = (x2); h$ret2 = (x3); h$ret3 = (x4); h$ret4 = (x5); h$ret5 = (x6); h$ret6 = (x7); h$ret7 = (x8); h$ret8 = (x9); h$ret9 = (x10); return (x1); }\n"
+ , "#define RETURN_INT64(h,l) RETURN_UBX_TUP2((h)|0,(l)>>>0)\n"
+ , "#define RETURN_WORD64(h,l) RETURN_UBX_TUP2((h)>>>0,(l)>>>0)\n"
+ , "#define RETURN_ADDR(a,o) RETURN_UBX_TUP2(a,o)\n"
+
, "#define CALL_UBX_TUP2(r1,r2,c) { (r1) = (c); (r2) = h$ret1; }\n"
, "#define CALL_UBX_TUP3(r1,r2,r3,c) { (r1) = (c); (r2) = h$ret1; (r3) = h$ret2; }\n"
, "#define CALL_UBX_TUP4(r1,r2,r3,r4,c) { (r1) = (c); (r2) = h$ret1; (r3) = h$ret2; (r4) = h$ret3; }\n"
@@ -282,38 +280,3 @@ jsExeFileName dflags
dropPrefix prefix xs
| prefix `isPrefixOf` xs = drop (length prefix) xs
| otherwise = xs
-
-
--- | Parse option pragma in JS file
-getOptionsFromJsFile :: FilePath -- ^ Input file
- -> IO [JSOption] -- ^ Parsed options, if any.
-getOptionsFromJsFile filename
- = Exception.bracket
- (openBinaryFile filename ReadMode)
- hClose
- getJsOptions
-
-data JSOption = CPP deriving (Eq, Ord)
-
-getJsOptions :: Handle -> IO [JSOption]
-getJsOptions handle = do
- hSetEncoding handle utf8
- prefix' <- B.hGet handle prefixLen
- if prefix == prefix'
- then parseJsOptions <$> hGetLine handle
- else pure []
- where
- prefix :: B.ByteString
- prefix = "//#OPTIONS:"
- prefixLen = B.length prefix
-
-parseJsOptions :: String -> [JSOption]
-parseJsOptions xs = go xs
- where
- trim = dropWhileEndLE isSpace . dropWhile isSpace
- go [] = []
- go xs = let (tok, rest) = break (== ',') xs
- tok' = trim tok
- rest' = drop 1 rest
- in if | tok' == "CPP" -> CPP : go rest'
- | otherwise -> go rest'
=====================================
compiler/GHC/StgToJS/Object.hs
=====================================
@@ -4,6 +4,8 @@
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE MultiWayIf #-}
-- only for DB.Binary instances on Module
{-# OPTIONS_GHC -fno-warn-orphans #-}
@@ -21,28 +23,23 @@
-- Stability : experimental
--
-- Serialization/deserialization of binary .o files for the JavaScript backend
--- The .o files contain dependency information and generated code.
--- All strings are mapped to a central string table, which helps reduce
--- file size and gives us efficient hash consing on read
---
--- Binary intermediate JavaScript object files:
--- serialized [Text] -> ([ClosureInfo], JStat) blocks
---
--- file layout:
--- - magic "GHCJSOBJ"
--- - compiler version tag
--- - module name
--- - offsets of string table
--- - dependencies
--- - offset of the index
--- - unit infos
--- - index
--- - string table
--
-----------------------------------------------------------------------------
module GHC.StgToJS.Object
- ( putObject
+ ( ObjectKind(..)
+ , getObjectKind
+ , getObjectKindBS
+ -- * JS object
+ , JSOptions(..)
+ , defaultJSOptions
+ , getOptionsFromJsFile
+ , writeJSObject
+ , readJSObject
+ , parseJSObject
+ , parseJSObjectBS
+ -- * HS object
+ , putObject
, getObjectHeader
, getObjectBody
, getObject
@@ -51,7 +48,6 @@ module GHC.StgToJS.Object
, readObjectBlocks
, readObjectBlockInfo
, isGlobalBlock
- , isJsObjectFile
, Object(..)
, IndexEntry(..)
, LocatedBlockInfo (..)
@@ -74,12 +70,14 @@ import Data.Int
import Data.IntSet (IntSet)
import qualified Data.IntSet as IS
import Data.List (sortOn)
+import qualified Data.List as List
import Data.Map (Map)
import qualified Data.Map as M
import Data.Word
-import Data.Char
-import Foreign.Storable
-import Foreign.Marshal.Array
+import Data.Semigroup
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Unsafe as B
+import Data.Char (isSpace)
import System.IO
import GHC.Settings.Constants (hiVersion)
@@ -97,8 +95,76 @@ import GHC.Types.Unique.Map
import GHC.Utils.Binary hiding (SymbolTable)
import GHC.Utils.Outputable (ppr, Outputable, hcat, vcat, text, hsep)
import GHC.Utils.Monad (mapMaybeM)
+import GHC.Utils.Panic
+import GHC.Utils.Misc (dropWhileEndLE)
+import System.IO.Unsafe
+import qualified Control.Exception as Exception
+
+----------------------------------------------
+-- The JS backend supports 3 kinds of objects:
+-- 1. HS objects: produced from Haskell sources
+-- 2. JS objects: produced from JS sources
+-- 3. Cc objects: produced by emcc (e.g. from C sources)
+--
+-- They all have a different header that allows them to be distinguished.
+-- See ObjectKind type.
+----------------------------------------------
+
+-- | Different kinds of object (.o) supported by the JS backend
+data ObjectKind
+ = ObjJs -- ^ JavaScript source embedded in a .o
+ | ObjHs -- ^ JS backend object for Haskell code
+ | ObjCc -- ^ Wasm module object as produced by emcc
+ deriving (Show,Eq,Ord)
+
+-- | Get the kind of a file object, if any
+getObjectKind :: FilePath -> IO (Maybe ObjectKind)
+getObjectKind fp = withBinaryFile fp ReadMode $ \h -> do
+ let !max_header_length = max (B.length jsHeader)
+ $ max (B.length wasmHeader)
+ (B.length hsHeader)
+
+ bs <- B.hGet h max_header_length
+ pure $! getObjectKindBS bs
+
+-- | Get the kind of an object stored in a bytestring, if any
+getObjectKindBS :: B.ByteString -> Maybe ObjectKind
+getObjectKindBS bs
+ | jsHeader `B.isPrefixOf` bs = Just ObjJs
+ | hsHeader `B.isPrefixOf` bs = Just ObjHs
+ | wasmHeader `B.isPrefixOf` bs = Just ObjCc
+ | otherwise = Nothing
+
+-- Header added to JS sources to discriminate them from other object files.
+-- They all have .o extension but JS sources have this header.
+jsHeader :: B.ByteString
+jsHeader = unsafePerformIO $ B.unsafePackAddressLen 8 "GHCJS_JS"#
+
+hsHeader :: B.ByteString
+hsHeader = unsafePerformIO $ B.unsafePackAddressLen 8 "GHCJS_HS"#
+
+wasmHeader :: B.ByteString
+wasmHeader = unsafePerformIO $ B.unsafePackAddressLen 4 "\0asm"#
+
+
+
+------------------------------------------------
+-- HS objects
+--
+-- file layout:
+-- - magic "GHCJS_HS"
+-- - compiler version tag
+-- - module name
+-- - offsets of string table
+-- - dependencies
+-- - offset of the index
+-- - unit infos
+-- - index
+-- - string table
+--
+------------------------------------------------
--- | An object file
+-- | A HS object file
data Object = Object
{ objModuleName :: !ModuleName
-- ^ name of the module
@@ -217,11 +283,6 @@ getObjBlock syms bh = do
}
--- | A tag that determines the kind of payload in the .o file. See
--- @StgToJS.Linker.Arhive.magic@ for another kind of magic
-magic :: String
-magic = "GHCJSOBJ"
-
-- | Serialized block indexes and their exported symbols
-- (the first block is module-global)
type Index = [IndexEntry]
@@ -244,7 +305,7 @@ putObject
-> [ObjBlock] -- ^ linkable units and their symbols
-> IO ()
putObject bh mod_name deps os = do
- forM_ magic (putByte bh . fromIntegral . ord)
+ putByteString bh hsHeader
put_ bh (show hiVersion)
-- we store the module name as a String because we don't want to have to
@@ -267,37 +328,12 @@ putObject bh mod_name deps os = do
pure (oiSymbols o,p)
pure idx
--- | Test if the object file is a JS object
-isJsObjectFile :: FilePath -> IO Bool
-isJsObjectFile fp = do
- let !n = length magic
- withBinaryFile fp ReadMode $ \hdl -> do
- allocaArray n $ \ptr -> do
- n' <- hGetBuf hdl ptr n
- if (n' /= n)
- then pure False
- else checkMagic (peekElemOff ptr)
-
--- | Check magic
-checkMagic :: (Int -> IO Word8) -> IO Bool
-checkMagic get_byte = do
- let go_magic !i = \case
- [] -> pure True
- (e:es) -> get_byte i >>= \case
- c | fromIntegral (ord e) == c -> go_magic (i+1) es
- | otherwise -> pure False
- go_magic 0 magic
-
--- | Parse object magic
-getCheckMagic :: BinHandle -> IO Bool
-getCheckMagic bh = checkMagic (const (getByte bh))
-
-- | Parse object header
getObjectHeader :: BinHandle -> IO (Either String ModuleName)
getObjectHeader bh = do
- is_magic <- getCheckMagic bh
- case is_magic of
- False -> pure (Left "invalid magic header")
+ magic <- getByteString bh (B.length hsHeader)
+ case magic == hsHeader of
+ False -> pure (Left "invalid magic header for HS object")
True -> do
is_correct_version <- ((== hiVersion) . read) <$> get bh
case is_correct_version of
@@ -630,3 +666,134 @@ instance Binary StaticLit where
6 -> BinLit <$> get bh
7 -> LabelLit <$> get bh <*> get bh
n -> error ("Binary get bh StaticLit: invalid tag " ++ show n)
+
+
+------------------------------------------------
+-- JS objects
+------------------------------------------------
+
+-- | Options obtained from pragmas in JS files
+data JSOptions = JSOptions
+ { enableCPP :: !Bool -- ^ Enable CPP on the JS file
+ , emccExtraOptions :: ![String] -- ^ Pass additional options to emcc at link time
+ , emccExportedFunctions :: ![String] -- ^ Arguments for `-sEXPORTED_FUNCTIONS`
+ , emccExportedRuntimeMethods :: ![String] -- ^ Arguments for `-sEXPORTED_RUNTIME_METHODS`
+ }
+ deriving (Eq, Ord)
+
+
+instance Binary JSOptions where
+ put_ bh (JSOptions a b c d) = do
+ put_ bh a
+ put_ bh b
+ put_ bh c
+ put_ bh d
+ get bh = JSOptions <$> get bh <*> get bh <*> get bh <*> get bh
+
+instance Semigroup JSOptions where
+ a <> b = JSOptions
+ { enableCPP = enableCPP a || enableCPP b
+ , emccExtraOptions = emccExtraOptions a ++ emccExtraOptions b
+ , emccExportedFunctions = List.nub (List.sort (emccExportedFunctions a ++ emccExportedFunctions b))
+ , emccExportedRuntimeMethods = List.nub (List.sort (emccExportedRuntimeMethods a ++ emccExportedRuntimeMethods b))
+ }
+
+defaultJSOptions :: JSOptions
+defaultJSOptions = JSOptions
+ { enableCPP = False
+ , emccExtraOptions = []
+ , emccExportedRuntimeMethods = []
+ , emccExportedFunctions = []
+ }
+
+-- mimics `lines` implementation
+splitOnComma :: String -> [String]
+splitOnComma s = cons $ case break (== ',') s of
+ (l, s') -> (l, case s' of
+ [] -> []
+ _:s'' -> splitOnComma s'')
+ where
+ cons ~(h, t) = h : t
+
+
+
+-- | Get the JS option pragmas from .js files
+getJsOptions :: Handle -> IO JSOptions
+getJsOptions handle = do
+ hSetEncoding handle utf8
+ let trim = dropWhileEndLE isSpace . dropWhile isSpace
+ let go opts = do
+ hIsEOF handle >>= \case
+ True -> pure opts
+ False -> do
+ xs <- hGetLine handle
+ if not ("//#OPTIONS:" `List.isPrefixOf` xs)
+ then pure opts
+ else do
+ -- drop prefix and spaces
+ let ys = trim (drop 11 xs)
+ let opts' = if
+ | ys == "CPP"
+ -> opts {enableCPP = True}
+
+ | Just s <- List.stripPrefix "EMCC:EXPORTED_FUNCTIONS=" ys
+ , fns <- fmap trim (splitOnComma s)
+ -> opts { emccExportedFunctions = emccExportedFunctions opts ++ fns }
+
+ | Just s <- List.stripPrefix "EMCC:EXPORTED_RUNTIME_METHODS=" ys
+ , fns <- fmap trim (splitOnComma s)
+ -> opts { emccExportedRuntimeMethods = emccExportedRuntimeMethods opts ++ fns }
+
+ | Just s <- List.stripPrefix "EMCC:EXTRA=" ys
+ -> opts { emccExtraOptions = emccExtraOptions opts ++ [s] }
+
+ | otherwise
+ -> panic ("Unrecognized JS pragma: " ++ ys)
+
+ go opts'
+ go defaultJSOptions
+
+-- | Parse option pragma in JS file
+getOptionsFromJsFile :: FilePath -- ^ Input file
+ -> IO JSOptions -- ^ Parsed options.
+getOptionsFromJsFile filename
+ = Exception.bracket
+ (openBinaryFile filename ReadMode)
+ hClose
+ getJsOptions
+
+
+-- | Write a JS object (embed some handwritten JS code)
+writeJSObject :: JSOptions -> B.ByteString -> FilePath -> IO ()
+writeJSObject opts contents output_fn = do
+ bh <- openBinMem (B.length contents + 1000)
+
+ putByteString bh jsHeader
+ put_ bh opts
+ put_ bh contents
+
+ writeBinMem bh output_fn
+
+
+-- | Read a JS object from BinHandle
+parseJSObject :: BinHandle -> IO (JSOptions, B.ByteString)
+parseJSObject bh = do
+ magic <- getByteString bh (B.length jsHeader)
+ case magic == jsHeader of
+ False -> panic "invalid magic header for JS object"
+ True -> do
+ opts <- get bh
+ contents <- get bh
+ pure (opts,contents)
+
+-- | Read a JS object from ByteString
+parseJSObjectBS :: B.ByteString -> IO (JSOptions, B.ByteString)
+parseJSObjectBS bs = do
+ bh <- unsafeUnpackBinBuffer bs
+ parseJSObject bh
+
+-- | Read a JS object from file
+readJSObject :: FilePath -> IO (JSOptions, B.ByteString)
+readJSObject input_fn = do
+ bh <- readBinMem input_fn
+ parseJSObject bh
=====================================
compiler/GHC/StgToJS/Prim.hs
=====================================
@@ -1348,23 +1348,27 @@ write_boff_addr a i r o = mconcat
read_stableptr :: JStgExpr -> JStgExpr -> JStgExpr -> JStgExpr -> JStgStat
read_stableptr a i r o = mconcat
- [ r |= var "h$stablePtrBuf" -- stable pointers are always in this array
- , o |= read_i32 a i
+ [ o |= read_i32 a i
+ , ifS (o .===. zero_)
+ (r |= null_)
+ (r |= var "h$stablePtrBuf")
]
read_boff_stableptr :: JStgExpr -> JStgExpr -> JStgExpr -> JStgExpr -> JStgStat
read_boff_stableptr a i r o = mconcat
- [ r |= var "h$stablePtrBuf" -- stable pointers are always in this array
- , o |= read_boff_i32 a i
+ [ o |= read_boff_i32 a i
+ , ifS (o .===. zero_)
+ (r |= null_)
+ (r |= var "h$stablePtrBuf")
]
write_stableptr :: JStgExpr -> JStgExpr -> JStgExpr -> JStgExpr -> JStgStat
write_stableptr a i _r o = write_i32 a i o
- -- don't store "r" as it must be h$stablePtrBuf
+ -- don't store "r" as it must be h$stablePtrBuf or null
write_boff_stableptr :: JStgExpr -> JStgExpr -> JStgExpr -> JStgExpr -> JStgStat
write_boff_stableptr a i _r o = write_boff_i32 a i o
- -- don't store "r" as it must be h$stablePtrBuf
+ -- don't store "r" as it must be h$stablePtrBuf or null
write_u8 :: JStgExpr -> JStgExpr -> JStgExpr -> JStgStat
write_u8 a i v = idx_u8 a i |= v
=====================================
compiler/GHC/StgToJS/Types.hs
=====================================
@@ -30,6 +30,7 @@ import GHC.JS.Ppr ()
import GHC.Stg.Syntax
import GHC.Core.TyCon
+import GHC.Linker.Config
import GHC.Types.Unique
import GHC.Types.Unique.FM
@@ -94,6 +95,7 @@ data StgToJSConfig = StgToJSConfig
, csRuntimeAssert :: !Bool -- ^ Enable runtime assertions
-- settings
, csContext :: !SDocContext
+ , csLinkerConfig :: !LinkerConfig -- ^ Emscripten linker
}
-- | Information relevenat to code generation for closures.
=====================================
compiler/GHC/Utils/Binary.hs
=====================================
@@ -47,6 +47,8 @@ module GHC.Utils.Binary
-- * For writing instances
putByte,
getByte,
+ putByteString,
+ getByteString,
-- * Variable length encodings
putULEB128,
@@ -1227,6 +1229,19 @@ getFS bh = do
l <- get bh :: IO Int
getPrim bh l (\src -> pure $! mkFastStringBytes src l )
+-- | Put a ByteString without its length (can't be read back without knowing the
+-- length!)
+putByteString :: BinHandle -> ByteString -> IO ()
+putByteString bh bs =
+ BS.unsafeUseAsCStringLen bs $ \(ptr, l) -> do
+ putPrim bh l (\op -> copyBytes op (castPtr ptr) l)
+
+-- | Get a ByteString whose length is known
+getByteString :: BinHandle -> Int -> IO ByteString
+getByteString bh l =
+ BS.create l $ \dest -> do
+ getPrim bh l (\src -> copyBytes dest src l)
+
putBS :: BinHandle -> ByteString -> IO ()
putBS bh bs =
BS.unsafeUseAsCStringLen bs $ \(ptr, l) -> do
=====================================
docs/users_guide/9.10.1-notes.rst
=====================================
@@ -148,6 +148,16 @@ Compiler
This means that if you are using ``-fllvm`` you now need ``llc``, ``opt`` and ``clang``
available.
+JavaScript backend
+~~~~~~~~~~~~~~~~~~
+
+- The JavaScript backend now supports linking with C sources. It uses Emscripten
+ to compile them to WebAssembly. The resulting JS file embeds and loads these
+ WebAssembly files. Important note: JavaScript wrappers are required to call
+ into C functions and pragmas have been added to indicate which C functions are
+ exported (see the users guide).
+
+
GHCi
~~~~
=====================================
docs/users_guide/debugging.rst
=====================================
@@ -785,6 +785,14 @@ JavaScript code generator
Include human-readable spacing and indentation when generating JavaScript.
+.. ghc-flag:: -ddisable-js-c-sources
+ :shortdesc: Disable the link with C sources compiled to JavaScript
+ :type: dynamic
+
+ For debugging it can be useful to avoid linking with C sources compiled to
+ JavaScript with Emscripten. This also avoids linking with Emcscripten's RTS.
+ Note that code that calls into this C code or that uses Emscripten's
+ primitives will fail at runtime (e.g. undefined function errors).
Miscellaneous backend dumps
~~~~~~~~~~~~~~~~~~~~~~~~~~~
=====================================
docs/users_guide/javascript.rst
=====================================
@@ -421,3 +421,116 @@ hand-written JavaScript come from functions with data that stays as JavaScript
primitive types for a long time, especially strings. For this, ``JSVal`` allows
values to be passed between ``Haskell`` and ``JavaScript`` without a marshalling
penalty.
+
+
+Linking with C sources
+----------------------
+
+GHC supports compiling C sources into JavaScript (using Emscripten) and linking
+them with the rest of the JavaScript code (generated from Haskell codes and from
+the RTS).
+
+C functions compiled with Emscripten get a "_" prepended to their name in
+JavaScript. For example, C "malloc" becomes "_malloc" in JavaScript.
+
+EMCC pragmas
+~~~~~~~~~~~~
+
+By default the EMCC linker drops code considered dead and it has no way to know
+which code is alive due to some call from Haskell or from a JavaScript wrapper.
+As such, you must explicitly add some pragmas at the top of one of your `.js`
+files to indicate which functions are alive:
+
+```
+//#OPTIONS:EMCC:EXPORTED_RUNTIME_METHODS=foo,bar
+```
+
+Enable methods `foo` and `bar` from the Emscripten runtime system. This is used
+for methods such as `ccall`, `cwrap`, `addFunction`, `removeFunction`... that
+are described in Emscripten documentation.
+
+```
+//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_foo,_bar
+```
+
+Enable C functions `foo` and `bar` to be exported respectively as `_foo` and
+`_bar` (`_` prepended). This is used for C library functions (e.g. `_malloc`,
+`_free`, etc.) and for the C code compiled with your project (e.g.
+`_sqlite3_open` and others for the `sqlite` C library).
+
+You can use both pragmas as many times as you want. Ultimately all the entries
+end up in sets of functions passed to the Emscripten linker via
+`-sEXPORTED_RUNTIME_METHODS` and `-sEXPORTED_FUNCTIONS` (which can only be
+passed once; the latter argument overrides the former ones).
+
+
+```
+//#OPTIONS:EMCC:EXTRA=-foo,-bar
+```
+
+This pragma allows additional options to be passed to Emscripten if need be. We
+already pass:
+- `-sSINGLE_FILE=1`: required to create a single `.js` file as artefact
+ (otherwise `.wasm` files corresponding to C codes need to be present in the
+ current working directory when invoking the resulting `.js` file).
+- `-sALLOW_TABLE_GROWTH`: required to support `addFunction`
+- `-sEXPORTED_RUNTIME_METHODS` and `-sEXPORTED_FUNCTIONS`: see above.
+
+Be careful because some extra arguments may break the build in unsuspected ways.
+
+Wrappers
+~~~~~~~~
+
+The JavaScript backend doesn't generate wrappers for foreign imports to call
+directly into the compiled C code. I.e. given the following foreign import:
+
+```haskell
+foreign import ccall "foo" foo :: ...
+```
+
+The JavaScript backend will replace calls to `foo` with calls to the JavaScript
+function `h$foo`. It's still up to the programmer to call `_foo` or not from `h$foo`
+on a case by case basis. If `h$foo` calls the generated from C function
+`_foo`, then we say that `h$foo` is a wrapper function. These wrapper functions
+are used to marshal arguments and returned values between the JS heap and the
+Emscripten heap.
+
+On one hand, GHC's JavaScript backend creates a different array of bytes per
+allocation (in order to make use of the garbage collector of the JavaScript
+engine). On the other hand, Emscripten's C heap consists in a single array of
+bytes. To call C functions converted to JavaScript that have pointer arguments,
+wrapper functions have to:
+
+1. allocate some buffer in the Emscripten heap (using `_malloc`) to get a valid
+ Emscripten pointer
+2. copy the bytes from the JS object to the buffer in the Emscripten heap
+3. use the Emscripten pointer to make the call to the C function
+4. optionally copy back the bytes from the Emscripten heap if the call may have
+ changed the contents of the buffer
+5. free the Emscripten buffer (with `_free`)
+
+GHC's JavaScript rts provides helper functions for this in `rts/js/mem.js`. See
+`h$copyFromHeap`, `h$copyToHeap`, `h$initHeapBuffer`, etc.
+
+Callbacks
+~~~~~~~~~
+
+Some C functions take function pointers as arguments (e.g. callbacks). This is
+supported by the JavaScript backend but requires some work from the wrapper
+functions.
+
+1. On the Haskell side it is possible to create a pointer to an Haskell function
+ (a `FunPtr`) by using a "wrapper" foreign import. See the documentation of
+ `base:Foreign.Ptr.FunPtr`.
+2. This `FunPtr` can be passed to a JavaScript wrapper function. However it's
+ implemented as a `StablePtr` and needs to be converted into a function
+ pointer that Emscripten understands. This can be done with
+ `h$registerFunPtrOnHeap`.
+3. When a callback is no longer needed, it can be freed with
+ `h$unregisterFunPtrFromHeap`.
+
+Note that in some circumstances you may not want to register an Haskell function
+directly as a callback. It is perfectly possible to register/free regular JavaScript
+function as Emscripten functions using `Module.addFunction` and `Module.removeFunction`.
+That's what the helper functions mentioned above do.
+
=====================================
rts/js/mem.js
=====================================
@@ -1,4 +1,5 @@
-//#OPTIONS: CPP
+//#OPTIONS:CPP
+//#OPTIONS:EMCC:EXPORTED_RUNTIME_METHODS=addFunction,removeFunction,getEmptyTableSlot
// #define GHCJS_TRACE_META 1
@@ -995,6 +996,15 @@ function h$roundUpToMultipleOf(n,m) {
function h$newByteArray(len) {
var len0 = Math.max(h$roundUpToMultipleOf(len, 8), 8);
var buf = new ArrayBuffer(len0);
+ return h$wrapByteArray(buf,len);
+}
+
+// Create a ByteArray from a given ArrayBuffer
+//
+// This is useful to wrap pre-existing ArrayBuffer such as Emscripten heap
+// (Module.HEAP8). However don't rely on the ByteArray length ("len" field) too
+// much in this case because it isn't updated when the heap grows.
+function h$wrapByteArray(buf,len) {
return { buf: buf
, len: len
, i3: new Int32Array(buf)
@@ -1088,10 +1098,11 @@ function h$compareByteArrays(a1,o1,a2,o2,n) {
*/
function h$wrapBuffer(buf, unalignedOk, offset, length) {
if(!unalignedOk && offset && offset % 8 !== 0) {
- throw ("h$wrapBuffer: offset not aligned:" + offset);
+ throw new Error("h$wrapBuffer: offset not aligned:" + offset);
+ }
+ if(!buf || !(buf instanceof ArrayBuffer)) {
+ throw new Error("h$wrapBuffer: not an ArrayBuffer: " + buf)
}
- if(!buf || !(buf instanceof ArrayBuffer))
- throw "h$wrapBuffer: not an ArrayBuffer"
if(!offset) { offset = 0; }
if(!length || length < 0) { length = buf.byteLength - offset; }
return { buf: buf
@@ -1483,3 +1494,168 @@ function h$checkOverlapByteArray(a1, o1, a2, o2, n) {
if (o1 > o2) return o1 - o2 >= n;
return true;
}
+
+
+/////////////////////////////////////////
+// Interface with Emscripten's HEAP
+/////////////////////////////////////////
+
+// The Emscripten Heap is an ArrayBuffer that we wrap as if it was a ByteArray.
+// It allows pointers into Emscripten Heap to be representable as our usual
+// pointers (ByteArray, Offset).
+var h$HEAP = null;
+
+// Initialize the global h$HEAP variable. This must only be called when linking
+// with Emscripten.
+function h$initEmscriptenHeap() {
+ h$HEAP = h$wrapByteArray(Module.HEAP8.buffer, Module.HEAP8.buffer.byteLength);
+}
+
+// Create a pointer in Emscripten's HEAP
+function h$mkHeapPtr(offset) {
+ if (!h$HEAP) {
+ throw new Error("h$mkHeapPtr: Emscripten h$HEAP not initialized");
+ }
+ return {'array':h$HEAP, 'offset': offset};
+}
+
+// Copy len bytes from the given buffer to the heap
+function h$copyToHeap(buf_d, buf_o, tgt, len) {
+ if(len === 0) return;
+ var u8 = buf_d.u8;
+ for(var i=0;i<len;i++) {
+ Module.HEAPU8[tgt+i] = u8[buf_o+i];
+ }
+}
+
+// Copy len bytes from the heap to the given buffer
+function h$copyFromHeap(src, buf_d, buf_o, len) {
+ var u8 = buf_d.u8;
+ for(var i=0;i<len;i++) {
+ u8[buf_o+i] = Module.HEAPU8[src+i];
+ }
+}
+
+// malloc and initialize a buffer on the HEAP
+function h$initHeapBufferLen(buf_d, buf_o, len) {
+ var buf_ptr = _malloc(len);
+ h$copyToHeap(buf_d, buf_o, buf_ptr, len);
+ return buf_ptr;
+}
+
+// Allocate and copy a JS buffer on the heap
+function h$initHeapBuffer(str_d, str_o) {
+ if(str_d === null) return null;
+ return ptr = h$initHeapBufferLen(str_d, str_o, str_d.len);
+}
+
+
+
+// temporarily malloc and initialize a buffer on the HEAP, pass it to the
+// continuation, then release the buffer
+function h$withOutBufferOnHeap(ptr_d, ptr_o, len, cont) {
+ var ptr = _malloc(len);
+ h$copyToHeap(ptr_d, ptr_o, ptr, len);
+ var ret = cont(ptr);
+ h$copyFromHeap(ptr, ptr_d, ptr_o, len);
+ _free(ptr);
+ return ret;
+}
+
+// Temporarily allocate and initialize a buffer on the heap and pass it to the
+// continuation. The buffer is freed from the heap when the continuation
+// returns.
+function h$withCBufferOnHeap(str_d, str_o, len, cont) {
+ var str = _malloc(len);
+ if(str_d !== null) h$copyToHeap(str_d, str_o, str, len);
+ var ret = cont(str);
+ _free(str);
+ return ret;
+}
+
+// Temporarily allocate a CString on the heap and pass it to the continuation.
+// The string is freed from the heap when the continuation returns.
+function h$withCStringOnHeap(str_d, str_o, cont) {
+ return h$withCBufferOnHeap(str_d, str_o, str_d === null ? 0 : h$strlen(str_d,str_o), cont);
+}
+
+// Dereference a heap pointer to a heap pointer (a 32-bit offset in the heap)
+function h$derefHeapPtr_addr(offset) {
+ var ptr = h$newByteArray(4);
+ ptr.u8.set(Module.HEAPU8.subarray(offset, offset+4));
+ return ptr.i3[0];
+}
+
+// Write a heap pointer (h$HEAP,offset) at the given JS pointer
+function h$putHeapAddr(a,o,offset) {
+ if (offset == 0) {
+ // null pointer in HEAP must become null pointer in JS land
+ PUT_ADDR(a,o,null,0);
+ } else {
+ PUT_ADDR(a,o,h$HEAP,offset);
+ }
+}
+
+// get a C string (null-terminated) from HEAP
+// Convert HEAP null (i.e. 0) into JS null
+function h$copyCStringFromHeap(offset) {
+ if(offset == 0) return null;
+ var len = 0;
+ while(HEAPU8[offset+len] !== 0){ len++; };
+ var str = h$newByteArray(len+1);
+ str.u8.set(HEAPU8.subarray(offset,offset+len+1));
+ return str;
+}
+
+// get an array of n pointers from HEAP
+function h$copyPtrArrayFromHeap(offset,n) {
+ var ptr = h$newByteArray(4*n);
+ ptr.u8.set(HEAPU8.subarray(offset, offset+4*n));
+ return ptr;
+}
+
+// Given a FunPtr, allocate a function wrapper with Emscripten and register it
+// in the HEAP. Return the heap pointer to it.
+//
+// If `ask_ptr` is true, `mkfn` get passed both the function and the heap
+// pointer. This is useful in callbacks which should cleanup themselves from the
+// Emscripten heap during their execution. Call h$unregisterFunPtrFromHeap on the
+// heap pointer to clean it.
+//
+// Since Emscripten uses WebAssembly, function types must be known precisely.
+// The `ty` serves this purpose. See Emscripten's `addFunction` documentation
+// for the syntax.
+function h$registerFunPtrOnHeap(funptr_d, funptr_o, ask_ptr, ty, mkfn) {
+ // TODO: assert funptr_d is the StablePtr array
+ if (funptr_o == 0) return 0;
+
+ var fun = h$deRefStablePtr(funptr_o);
+
+ // In destroy callbacks we want to call removeFunction on the running
+ // callback. But it hasn't been registered yet so we don't have its pointer!
+ //
+ // So we call getEmptyTableSlot to get the next function slot in advance.
+ // But this has the side-effect of reserving the next empty slot... so we have
+ // to release it just after. The following call to addFunction will get the
+ // same slot. Warning: this hack doesn't work if addFunction is called in
+ // mkfn, but we check this with an assertion.
+ if (ask_ptr) {
+ var cb_ptr = getEmptyTableSlot();
+ Module.removeFunction(cb_ptr);
+
+ var cb = mkfn(fun,cb_ptr);
+ var ptr = Module.addFunction(cb,ty);
+
+ assert(cb_ptr === ptr, "h$registerJSFunPtrOnHeap: got different pointer offsets: " + cb_ptr + " and " + ptr);
+ return ptr;
+ }
+ else {
+ var cb = mkfn(fun);
+ return Module.addFunction(cb,ty);
+ }
+}
+
+// Unregister a function previously registered on the heap with h$registerFunPtrOnHeap
+function h$unregisterFunPtrFromHeap(p) {
+ return Module.removeFunction(p);
+}
=====================================
rts/js/rts.js
=====================================
@@ -111,25 +111,54 @@ function h$rts_toString(x) {
function h$rts_mkPtr(x) {
var buf, off = 0;
- if(typeof x == 'string') {
- // string: UTF-8 encode
+ // null pointer
+ if(x === null) {
+ buf = null;
+ off = 0;
+ }
+ // Haskell pointer
+ else if(typeof x == 'object' &&
+ typeof x.offset == 'number' &&
+ typeof x.array !== 'undefined') {
+ buf = x.array;
+ off = x.offset;
+ }
+ // JS string: UTF-8 encode
+ else if(typeof x == 'string') {
buf = h$encodeUtf8(x);
off = 0;
- } else if(typeof x == 'object' &&
+ }
+ // Haskell ByteArray
+ else if(typeof x == 'object' &&
typeof x.len == 'number' &&
x.buf instanceof ArrayBuffer) {
- // already a Haskell ByteArray
buf = x;
off = 0;
- } else if(x.isView) {
- // ArrayBufferView: make ByteArray with the same byteOffset
+ }
+ // Offset in the Emscripten heap
+ else if (typeof x == 'number' && h$HEAP !== null) {
+ if (x == 0) {
+ buf = null;
+ off = 0;
+ }
+ else {
+ buf = h$HEAP;
+ off = x;
+ }
+ }
+ // ArrayBufferView: make ByteArray with the same byteOffset
+ else if(x.isView) {
buf = h$wrapBuffer(x.buffer, true, 0, x.buffer.byteLength);
off = x.byteOffset;
- } else {
- // plain ArrayBuffer
+ }
+ // plain ArrayBuffer
+ else if (x instanceof ArrayBuffer) {
buf = h$wrapBuffer(x, true, 0, x.byteLength);
off = 0;
}
+ else {
+ throw new Error ("h$rts_mkPtr: invalid argument: " + x);
+ }
return MK_PTR(buf, off);
}
=====================================
rts/js/stableptr.js
=====================================
@@ -18,6 +18,13 @@ var h$stablePtrData = [null];
var h$stablePtrBuf = h$newByteArray(8);
var h$stablePtrN = 1;
var h$stablePtrFree = [];
+// Slot 0 isn't used as offset 0 is reserved for the null pointer. In
+// particular, when we store a StablePtr in an array, we don't store the array
+// part. When we read it back, we only have the offset. Some codes initialize
+// these stored StablePtr with NULL (hence offset 0) and if we were creating a
+// StablePtr from it (i.e. [$stablePtrBuf,0]) then we can't compare them to
+// nullPtr (castStablePtrToPtr [$stablePtrBuf,0] /= [null,0]).
+// This happens in direct-sqlite package for example.
function h$makeStablePtr(v) {
TRACE_STABLEPTR("makeStablePtr")
=====================================
testsuite/driver/runtests.py
=====================================
@@ -47,6 +47,11 @@ ghc_env['TERM'] = 'vt100'
# Ensure that GHC doesn't go looking for environment files. See #21365.
ghc_env['GHC_ENVIRONMENT'] = "-"
+# Ensure that EMCC doesn't output cache info
+# (cf https://github.com/emscripten-core/emscripten/issues/18607)
+os.environ['EMCC_LOGGING'] = '0'
+ghc_env['EMCC_LOGGING'] = '0'
+
global config
config = getConfig() # get it from testglobals
config.validate()
=====================================
testsuite/driver/testlib.py
=====================================
@@ -2676,6 +2676,8 @@ def normalise_errmsg(s: str) -> str:
s = re.sub('.*strip: changes being made to the file will invalidate the code signature in.*\n','',s)
# clang may warn about unused argument when used as assembler
s = re.sub('.*warning: argument unused during compilation:.*\n', '', s)
+ # Emscripten displays cache info and old emcc doesn't support EMCC_LOGGING=0
+ s = re.sub('cache:INFO: .*\n', '', s)
return s
=====================================
testsuite/tests/driver/MergeObjsMode/A.hs
=====================================
@@ -1,5 +1,7 @@
module A where
+-- Don't inline otherwise A.o may not be needed by Main.o
+{-# NOINLINE a #-}
a :: Int
a = 42
=====================================
testsuite/tests/driver/MergeObjsMode/B.hs
=====================================
@@ -1,4 +1,6 @@
module B where
+-- Don't inline otherwise B.o may not be needed by Main.o
+{-# NOINLINE b #-}
b :: String
b = "hello world"
=====================================
testsuite/tests/driver/MergeObjsMode/all.T
=====================================
@@ -1,5 +1,6 @@
test('MergeObjsMode',
[ extra_files(['A.hs', 'B.hs', 'Main.hs'])
+ , js_skip # Object merging isn't supported by the JS backend
],
makefile_test,
[])
=====================================
testsuite/tests/driver/all.T
=====================================
@@ -299,7 +299,7 @@ test('T18369', normal, compile, ['-O'])
test('T21682', normal, compile_fail, ['-Werror=unrecognised-warning-flags -Wfoo'])
test('FullGHCVersion', normal, compile_and_run, ['-package ghc-boot'])
test('OneShotTH', req_th, makefile_test, [])
-test('T17481', js_broken(22261), makefile_test, [])
+test('T17481', normal, makefile_test, [])
test('T20084', normal, makefile_test, [])
test('RunMode', [req_interp,extra_files(['RunMode/Test.hs'])], run_command, ['{compiler} --run -iRunMode/ -ignore-dot-ghci RunMode.hs -- hello'])
test('T20439', normal, run_command,
=====================================
testsuite/tests/driver/recomp011/all.T
=====================================
@@ -2,6 +2,5 @@
test('recomp011',
[ extra_files(['Main.hs'])
- , js_broken(22261)
],
makefile_test, [])
=====================================
testsuite/tests/javascript/js-c-sources/js-c-sources01.hs
=====================================
@@ -0,0 +1,47 @@
+import Foreign.C.Types
+import Foreign.C.String
+import Foreign.Ptr
+import Data.Char
+import Foreign.Marshal.Alloc
+import System.IO
+
+main :: IO ()
+main = do
+ -- avoid C and Haskell prints to stdout to be intermingled due to buffering on the Haskell side
+ hSetBuffering stdout NoBuffering
+
+ -- test sending int values back and forth
+ -- test printing on stdout in C
+ print =<< hello_c =<< hello_c =<< hello_c 17
+
+ -- test printing an Haskell string in C
+ withCString "Hello from Haskell" write_c
+
+ -- test allocating a CString in C and printing it in Haskell
+ c_str <- alloc_c
+ print =<< peekCString c_str
+ free c_str -- not really needed. The CString lives as an array on the JS heap and will be collected
+
+ -- test modifying Haskell allocated bytes in C
+ withCString "Hello from Haskell" $ \c_str -> do
+ modify_c c_str
+ print =<< peekCString c_str
+
+ -- test calling back into Haskell from C
+ let to_upper c = fromIntegral (ord (toUpper (chr (fromIntegral c))))
+ cb <- mkCallback to_upper
+ withCString "Hello from Haskell 1234" $ \c_str -> do
+ callback_c c_str cb
+ print =<< peekCString c_str
+ freeHaskellFunPtr cb
+
+
+
+foreign import javascript "hello_c_wrapper" hello_c :: Int -> IO Int
+foreign import javascript "write_c_wrapper" write_c :: CString -> IO ()
+foreign import javascript "alloc_c_wrapper" alloc_c :: IO CString
+foreign import javascript "modify_c_wrapper" modify_c :: CString -> IO ()
+foreign import javascript "callback_c_wrapper" callback_c :: CString -> (FunPtr (CChar -> CChar)) -> IO ()
+
+foreign import ccall "wrapper" mkCallback :: (CChar -> CChar) -> IO (FunPtr (CChar -> CChar))
+
=====================================
testsuite/tests/javascript/js-c-sources/js-c-sources01.stdout
=====================================
@@ -0,0 +1,8 @@
+Hello from C: 17
+Hello from C: 18
+Hello from C: 19
+20
+Received string: Hello from Haskell
+"ghc"
+"HELLO FROM HASKELL"
+"HELLO FROM HASKELL 1234"
=====================================
testsuite/tests/javascript/js-c-sources/js-c-sources01_c.c
=====================================
@@ -0,0 +1,33 @@
+#include<stdio.h>
+#include<stdlib.h>
+
+int hello_c(int a) {
+ printf("Hello from C: %d\n", a);
+ return a+1;
+}
+
+
+void write_c(char * s) {
+ printf("Received string: %s\n", s);
+}
+
+char * alloc_c() {
+ char * s = malloc(4);
+ s[0] = 'g';
+ s[1] = 'h';
+ s[2] = 'c';
+ s[3] = '\0';
+ return s;
+}
+
+void modify_c(char * s) {
+ for (int i=0;s[i]!=0;i++) {
+ if (s[i] >= 'a' && s[i] <= 'z') s[i] -= 32;
+ }
+}
+
+void callback_c(char *s, char (*f)(char)) {
+ for (int i=0;s[i]!=0;i++) {
+ s[i] = f(s[i]);
+ }
+}
=====================================
testsuite/tests/javascript/js-c-sources/js-c-sources01_js.js
=====================================
@@ -0,0 +1,56 @@
+//#OPTIONS:CPP
+//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_hello_c
+//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_write_c,_alloc_c,_modify_c
+//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_callback_c
+//#OPTIONS:EMCC:EXPORTED_FUNCTIONS=_free,_strlen,_malloc
+
+function hello_c_wrapper(a) {
+ return _hello_c(a);
+}
+
+function write_c_wrapper(a,o) {
+ h$withCStringOnHeap(a,o, (ptr) => {
+ _write_c(ptr)
+ });
+}
+
+function alloc_c_wrapper() {
+ const ptr = _alloc_c();
+ const a = h$copyCStringFromHeap(ptr);
+ _free(ptr);
+ RETURN_ADDR(a,0);
+}
+
+function modify_c_wrapper(a,o) {
+ const len = h$strlen(a,o);
+ h$withOutBufferOnHeap(a, o, len, (ptr) => {
+ _modify_c(ptr);
+ });
+}
+
+function callback_c_wrapper(a,o,f_ptr,f_o) {
+ const cb_c = h$registerFunPtrOnHeap(f_ptr, f_o, false, 'ii', (cb) => {
+ // we return the function that will actually be called by the C code.
+ // This is a wrapper to call the Haskell function (cb).
+ //
+ // Here it's simple because we only have 'char' arguments and results
+ // but with other arguments it could have to copy data from/to the heap
+ // (e.g. if CStrings were involved).
+ //
+ // 'ii' is the type of the function, according to Emscripten (return one
+ // int, take one int as argument).
+ //
+ // Finally we pass `false` because we don't need to unregister the
+ // callback asynchronously as would be the case for a `destructor`-like
+ // callback. We unregister it below explicitly after its use.
+ return function(arg) {
+ return cb(arg);
+ };
+ });
+ const len = h$strlen(a,o);
+ h$withOutBufferOnHeap(a, o, len, (ptr) => {
+ _callback_c(ptr, cb_c);
+ });
+
+ h$unregisterFunPtrFromHeap(cb_c);
+}
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8bbe12f288d2916c598cb72b87dfb602739ff80c...b71b392f2379b80a5bb3d98742247a6c3a594076
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8bbe12f288d2916c598cb72b87dfb602739ff80c...b71b392f2379b80a5bb3d98742247a6c3a594076
You're receiving this email because of your account on gitlab.haskell.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240212/6bfc496b/attachment-0001.html>
More information about the ghc-commits
mailing list