[Git][ghc/ghc][master] Escape multiple arguments in the settings file

Marge Bot (@marge-bot) gitlab at gitlab.haskell.org
Tue Mar 19 18:48:37 UTC 2024



Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
31bf85ee by Fendor at 2024-03-19T14:48:08-04:00
Escape multiple arguments in the settings file

Uses responseFile syntax.

The issue arises when GHC is installed on windows into a location that
has a space, for example the user name is 'Fake User'.
The $topdir will also contain a space, consequentially.
When we resolve the top dir in the string `-I$topdir/mingw/include`,
then `words` will turn this single argument into `-I/C/Users/Fake` and
`User/.../mingw/include` which trips up the flag argument parser of
various tools such as gcc or clang.
We avoid this by escaping the $topdir before replacing it in
`initSettngs`.
Additionally, we allow to escape spaces and quotation marks for
arguments in `settings` file.

Add regression test case to count the number of options after variable
expansion and argument escaping took place.
Additionally, we check that escaped spaces and double quotation marks are
correctly parsed.

- - - - -


8 changed files:

- compiler/GHC/Settings/IO.hs
- hadrian/src/Rules/Generate.hs
- + test.hs
- + testsuite/tests/ghc-api/settings-escape/T11938.hs
- + testsuite/tests/ghc-api/settings-escape/T11938.stderr
- + testsuite/tests/ghc-api/settings-escape/all.T
- + testsuite/tests/ghc-api/settings-escape/ghc-install-folder/lib/.gitkeep
- + testsuite/tests/ghc-api/settings-escape/ghc-install-folder/mingw/.gitkeep


Changes:

=====================================
compiler/GHC/Settings/IO.hs
=====================================
@@ -16,9 +16,11 @@ import GHC.Utils.CliOption
 import GHC.Utils.Fingerprint
 import GHC.Platform
 import GHC.Utils.Panic
+import GHC.ResponseFile
 import GHC.Settings
 import GHC.SysTools.BaseDir
 
+import Data.Char
 import Control.Monad.Trans.Except
 import Control.Monad.IO.Class
 import qualified Data.Map as Map
@@ -72,9 +74,13 @@ initSettings top_dir = do
   -- just partially applying those functions and throwing 'Left's; they're
   -- written in a very portable style to keep ghc-boot light.
   let getSetting key = either pgmError pure $
-        getRawFilePathSetting top_dir settingsFile mySettings key
+        -- Escape the 'top_dir', to make sure we don't accidentally introduce an
+        -- unescaped space
+        getRawFilePathSetting (escapeArg top_dir) settingsFile mySettings key
       getToolSetting :: String -> ExceptT SettingsError m String
-      getToolSetting key = expandToolDir useInplaceMinGW mtool_dir <$> getSetting key
+        -- Escape the 'mtool_dir', to make sure we don't accidentally introduce
+        -- an unescaped space
+      getToolSetting key = expandToolDir useInplaceMinGW (fmap escapeArg mtool_dir) <$> getSetting key
   targetPlatformString <- getSetting "target platform string"
   cc_prog <- getToolSetting "C compiler command"
   cxx_prog <- getToolSetting "C++ compiler command"
@@ -91,10 +97,10 @@ initSettings top_dir = do
   let unreg_cc_args = if platformUnregisterised platform
                       then ["-DNO_REGS", "-DUSE_MINIINTERPRETER"]
                       else []
-      cpp_args    = map Option (words cpp_args_str)
-      hs_cpp_args = map Option (words hs_cpp_args_str)
-      cc_args  = words cc_args_str ++ unreg_cc_args
-      cxx_args = words cxx_args_str
+      cpp_args    = map Option (unescapeArgs cpp_args_str)
+      hs_cpp_args = map Option (unescapeArgs hs_cpp_args_str)
+      cc_args  = unescapeArgs cc_args_str ++ unreg_cc_args
+      cxx_args = unescapeArgs cxx_args_str
 
       -- The extra flags we need to pass gcc when we invoke it to compile .hc code.
       --
@@ -135,12 +141,12 @@ initSettings top_dir = do
   let   as_prog  = cc_prog
         as_args  = map Option cc_args
         ld_prog  = cc_prog
-        ld_args  = map Option (cc_args ++ words cc_link_args_str)
+        ld_args  = map Option (cc_args ++ unescapeArgs cc_link_args_str)
   ld_r_prog <- getToolSetting "Merge objects command"
   ld_r_args <- getToolSetting "Merge objects flags"
   let ld_r
         | null ld_r_prog = Nothing
-        | otherwise      = Just (ld_r_prog, map Option $ words ld_r_args)
+        | otherwise      = Just (ld_r_prog, map Option $ unescapeArgs ld_r_args)
 
   llvmTarget <- getSetting "LLVM target"
 
@@ -261,3 +267,19 @@ getTargetPlatform settingsFile settings = do
     , platformHasLibm = targetHasLibm
     , platform_constants = Nothing -- will be filled later when loading (or building) the RTS unit
     }
+
+-- ----------------------------------------------------------------------------
+-- Escape Args helpers
+-- ----------------------------------------------------------------------------
+
+-- | Just like 'GHC.ResponseFile.escapeArg', but it is not exposed from base.
+escapeArg :: String -> String
+escapeArg = reverse . foldl' escape []
+
+escape :: String -> Char -> String
+escape cs c
+  |    isSpace c
+    || '\\' == c
+    || '\'' == c
+    || '"'  == c = c:'\\':cs -- n.b., our caller must reverse the result
+  | otherwise    = c:cs


=====================================
hadrian/src/Rules/Generate.hs
=====================================
@@ -6,6 +6,7 @@ module Rules.Generate (
     ) where
 
 import Development.Shake.FilePath
+import Data.Char (isSpace)
 import qualified Data.Set as Set
 import Base
 import qualified Context
@@ -416,7 +417,7 @@ generateSettings = do
 
         , ("Use interpreter", expr $ yesNo <$> ghcWithInterpreter)
         , ("Support SMP", expr $ yesNo <$> targetSupportsSMP)
-        , ("RTS ways", unwords . map show . Set.toList <$> getRtsWays)
+        , ("RTS ways", escapeArgs . map show . Set.toList <$> getRtsWays)
         , ("Tables next to code", queryTarget (yesNo . tgtTablesNextToCode))
         , ("Leading underscore",  queryTarget (yesNo . tgtSymbolsHaveLeadingUnderscore))
         , ("Use LibFFI", expr $ yesNo <$> useLibffiForAdjustors)
@@ -431,23 +432,23 @@ generateSettings = do
             ++ ["]"]
   where
     ccPath  = prgPath . ccProgram . tgtCCompiler
-    ccFlags = unwords . prgFlags . ccProgram . tgtCCompiler
+    ccFlags = escapeArgs . prgFlags . ccProgram . tgtCCompiler
     cxxPath  = prgPath . cxxProgram . tgtCxxCompiler
-    cxxFlags = unwords . prgFlags . cxxProgram . tgtCxxCompiler
-    clinkFlags = unwords . prgFlags . ccLinkProgram . tgtCCompilerLink
+    cxxFlags = escapeArgs . prgFlags . cxxProgram . tgtCxxCompiler
+    clinkFlags = escapeArgs . prgFlags . ccLinkProgram . tgtCCompilerLink
     linkSupportsNoPie = yesNo . ccLinkSupportsNoPie . tgtCCompilerLink
     cppPath  = prgPath . cppProgram . tgtCPreprocessor
-    cppFlags = unwords . prgFlags . cppProgram . tgtCPreprocessor
+    cppFlags = escapeArgs . prgFlags . cppProgram . tgtCPreprocessor
     hsCppPath  = prgPath . hsCppProgram . tgtHsCPreprocessor
-    hsCppFlags = unwords . prgFlags . hsCppProgram . tgtHsCPreprocessor
+    hsCppFlags = escapeArgs . prgFlags . hsCppProgram . tgtHsCPreprocessor
     mergeObjsPath  = maybe "" (prgPath . mergeObjsProgram) . tgtMergeObjs
-    mergeObjsFlags = maybe "" (unwords . prgFlags . mergeObjsProgram) . tgtMergeObjs
+    mergeObjsFlags = maybe "" (escapeArgs . prgFlags . mergeObjsProgram) . tgtMergeObjs
     linkSupportsSingleModule    = yesNo . ccLinkSupportsSingleModule . tgtCCompilerLink
     linkSupportsFilelist        = yesNo . ccLinkSupportsFilelist . tgtCCompilerLink
     linkSupportsCompactUnwind   = yesNo . ccLinkSupportsCompactUnwind . tgtCCompilerLink
     linkIsGnu                   = yesNo . ccLinkIsGnu . tgtCCompilerLink
     arPath  = prgPath . arMkArchive . tgtAr
-    arFlags = unwords . prgFlags . arMkArchive . tgtAr
+    arFlags = escapeArgs . prgFlags . arMkArchive . tgtAr
     arSupportsAtFile' = yesNo . arSupportsAtFile . tgtAr
     arSupportsDashL' = yesNo . arSupportsDashL . tgtAr
     ranlibPath  = maybe "" (prgPath . ranlibProgram) . tgtRanlib
@@ -571,3 +572,19 @@ generatePlatformHostHs = do
         , "hostPlatformArchOS :: ArchOS"
         , "hostPlatformArchOS = ArchOS hostPlatformArch hostPlatformOS"
         ]
+
+-- | Just like 'GHC.ResponseFile.escapeArgs', but use spaces instead of newlines
+-- for splitting elements.
+escapeArgs :: [String] -> String
+escapeArgs = unwords . map escapeArg
+
+escapeArg :: String -> String
+escapeArg = reverse . foldl' escape []
+
+escape :: String -> Char -> String
+escape cs c
+  |    isSpace c
+    || '\\' == c
+    || '\'' == c
+    || '"'  == c = c:'\\':cs -- n.b., our caller must reverse the result
+  | otherwise    = c:cs


=====================================
test.hs
=====================================
@@ -0,0 +1,14 @@
+import Data.Char
+import Data.Foldable
+-- | Just like 'GHC.ResponseFile.escapeArg', but it is not exposed from base.
+escapeArg :: String -> String
+escapeArg = reverse . foldl' escape []
+
+escape :: String -> Char -> String
+escape cs c
+  |    isSpace c
+    || '\\' == c
+    || '\'' == c
+    || '"'  == c = c:'\\':cs -- n.b., our caller must reverse the result
+  | otherwise    = c:cs
+


=====================================
testsuite/tests/ghc-api/settings-escape/T11938.hs
=====================================
@@ -0,0 +1,136 @@
+
+import GHC
+import GHC.ResponseFile (unescapeArgs)
+import GHC.Settings
+import GHC.Settings.IO
+import GHC.Driver.DynFlags
+import GHC.Driver.Session
+import GHC.Driver.Env
+import GHC.Utils.CliOption (Option, showOpt)
+
+import Control.Monad.Trans.Except (runExceptT)
+import qualified Data.Set as Set
+import Data.Maybe (fromJust)
+import Data.List (intercalate)
+import System.Directory (makeAbsolute, createDirectory)
+import System.Environment
+import System.IO (hPutStrLn, stderr)
+import System.Exit (exitWith, ExitCode(ExitFailure))
+
+-- Precondition: this test case must be executed in a directory with a space.
+--
+-- First we get the current settings file and amend it with extra arguments that we *know*
+-- contain spaces by construction.
+-- Then, we write this new settings file to disk where we know one of the parent
+-- directories contains a space by virtue of the ghc test suite. This is important
+-- for testing variable substitution containing spaces in the settings file.
+-- At last, we parse the settings file again and compare the options to the original settings
+-- file. As we added a fixed number of options, we verify that relevant all config options parser
+-- escaped the spaces appropriately.
+main :: IO ()
+main = do
+  libdir:args <- getArgs
+
+  (rawSettingOpts, originalSettings) <- runGhc (Just libdir) $ do
+    dflags <- hsc_dflags <$> getSession
+    pure (rawSettings dflags, settings dflags)
+
+  topDir <- makeAbsolute "./ghc-install-folder/lib"
+
+  let argsWithSpaces = "\"-some option\" -some\\ other"
+      numberOfExtraArgs = length $ unescapeArgs argsWithSpaces
+      -- These are all options that can have multiple 'String' or 'Option' values.
+      -- We explicitly do not add 'C compiler link flags' here, as 'initSettings'
+      -- already adds the options of "C compiler flags" to this config field.
+      multipleArguments = Set.fromList
+        [ "Haskell CPP flags"
+        , "C compiler flags"
+        , "C++ compiler flags"
+        , "CPP flags"
+        , "Merge objects flags"
+        ]
+
+  let rawSettingOptsWithExtraArgs =
+        map (\(name, args) -> if Set.member name multipleArguments
+          then (name, args ++ " " ++ argsWithSpaces)
+          else (name, args)) rawSettingOpts
+
+  -- write out the modified settings. We try to keep it legible
+  writeFile (topDir ++ "/settings") $
+    "[" ++ (intercalate "\n," (map show rawSettingOptsWithExtraArgs)) ++ "]"
+
+  settingsm <- runExceptT $ initSettings topDir
+
+  case settingsm of
+    Left (SettingsError_MissingData msg) -> do
+      hPutStrLn stderr $ "WARNING: " ++ show msg
+      hPutStrLn stderr $ "dont know target platform"
+      exitWith $ ExitFailure 1
+    Left (SettingsError_BadData msg) -> do
+      hPutStrLn stderr msg
+      exitWith $ ExitFailure 1
+    Right settings -> do
+      let
+        recordSetting :: String -> (Settings -> [String]) -> IO ()
+        recordSetting label selector = do
+          let opts = selector settings
+              origOpts = selector originalSettings
+              -- At least one of the options must contain a space
+              containsSpaces = any (' ' `elem`) opts
+          hPutStrLn stderr
+              $ "=== '" <> label <> "' contains " ++ show numberOfExtraArgs ++ " new entries: "
+              ++ show (length opts == length origOpts + numberOfExtraArgs)
+          hPutStrLn stderr $ "    Contains spaces: " ++ show containsSpaces
+
+        recordSettingM :: String -> (Settings -> Maybe [a]) -> IO ()
+        recordSettingM label selector = do
+          let optsM = selector settings
+              origOptsM = selector originalSettings
+          hPutStrLn stderr
+              $ "=== '" <> label <> "' contains expected entries: "
+              ++ show (case (optsM, origOptsM) of
+                  (Just opts, Just origOpts) -> length opts == length origOpts + numberOfExtraArgs
+                  (Nothing, Nothing) -> True
+                  (Just _, Nothing) -> False
+                  (Nothing, Just _) -> False
+              )
+
+        recordFpSetting :: String -> (Settings -> String) -> IO ()
+        recordFpSetting label selector = do
+          let fp = selector settings
+              containsOnlyEscapedSpaces ('\\':' ':xs) = containsOnlyEscapedSpaces xs
+              containsOnlyEscapedSpaces (' ':_) = False
+              containsOnlyEscapedSpaces [] = True
+              containsOnlyEscapedSpaces (_:xs) = containsOnlyEscapedSpaces xs
+
+              -- Filepath may only contain escaped spaces
+              containsSpaces = containsOnlyEscapedSpaces fp
+          hPutStrLn stderr $ "=== FilePath '" <> label <> "' contains only escaped spaces: " ++ show containsSpaces
+
+      -- Assertions
+      -- Assumption: this test case is executed in a directory with a space.
+
+      -- Setting 'Haskell CPP flags' contains '$topdir' reference.
+      -- Resolving those while containing spaces, should not introduce more options.
+      recordSetting "Haskell CPP flags" (map showOpt . snd . toolSettings_pgm_P . sToolSettings)
+      -- Setting 'C compiler flags' contains strings with spaces.
+      -- GHC should not split these by word.
+      recordSetting "C compiler flags" (toolSettings_opt_c . sToolSettings)
+      -- Setting 'C compiler link flags' contains strings with spaces.
+      -- GHC should not split these by word.
+      -- While we did not explicitly add the extra arguments, 'initSettings' adds "C compiler flags" options
+      -- to this field.
+      recordSetting "C compiler link flags" (map showOpt . snd . toolSettings_pgm_l . sToolSettings)
+      -- Setting 'C++ compiler flags' contains strings with spaces.
+      -- GHC should not split these by word.
+      recordSetting "C++ compiler flags" (toolSettings_opt_cxx . sToolSettings)
+      -- Setting 'CPP flags' contains strings with spaces.
+      -- GHC should not split these by word.
+      recordSetting "CPP flags" (map showOpt . snd . toolSettings_pgm_cpp . sToolSettings)
+      -- Setting 'Merge objects flags' contains strings with spaces.
+      -- GHC should not split these by word.
+      -- If 'Nothing', ignore this test, otherwise the same assertion holds as before.
+      recordSettingM "Merge objects flags" (fmap (map showOpt . snd) . toolSettings_pgm_lm . sToolSettings)
+      -- Setting 'unlit command' contains '$topdir' reference.
+      -- Resolving those while containing spaces, should be escaped correctly.
+      recordFpSetting "unlit command" (toolSettings_pgm_L . sToolSettings)


=====================================
testsuite/tests/ghc-api/settings-escape/T11938.stderr
=====================================
@@ -0,0 +1,12 @@
+=== 'Haskell CPP flags' contains 2 new entries: True
+    Contains spaces: True
+=== 'C compiler flags' contains 2 new entries: True
+    Contains spaces: True
+=== 'C compiler link flags' contains 2 new entries: True
+    Contains spaces: True
+=== 'C++ compiler flags' contains 2 new entries: True
+    Contains spaces: True
+=== 'CPP flags' contains 2 new entries: True
+    Contains spaces: True
+=== 'Merge objects flags' contains expected entries: True
+=== FilePath 'unlit command' contains only escaped spaces: True


=====================================
testsuite/tests/ghc-api/settings-escape/all.T
=====================================
@@ -0,0 +1,5 @@
+test('T11938',
+    [ extra_run_opts('"' + config.libdir + '"')
+    , extra_files(['ghc-install-folder/'])]
+    , compile_and_run
+    , ['-package ghc -package directory -package containers -package transformers'])


=====================================
testsuite/tests/ghc-api/settings-escape/ghc-install-folder/lib/.gitkeep
=====================================


=====================================
testsuite/tests/ghc-api/settings-escape/ghc-install-folder/mingw/.gitkeep
=====================================



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/31bf85ee49fe2ca0b17eaee0774e395f017a9373

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/31bf85ee49fe2ca0b17eaee0774e395f017a9373
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20240319/dfedb505/attachment-0001.html>


More information about the ghc-commits mailing list