[Git][ghc/ghc][wip/marge_bot_batch_merge_job] 6 commits: users-guide: Document how to disable package environments

Tue Apr 9 16:40:08 UTC 2019


 Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC


Commits:
36d38047 by Ben Gamari at 2019-04-09T14:23:47Z
users-guide: Document how to disable package environments

As noted in #16309 this somehow went undocumented.

- - - - -
af4cea7f by Artem Pyanykh at 2019-04-09T14:30:13Z
codegen: fix memset unroll for small bytearrays, add 64-bit sets

Fixes #16052

When the offset in `setByteArray#` is statically known, we can provide
better alignment guarantees then just 1 byte.

Also, memset can now do 64-bit wide sets.

The current memset intrinsic is not optimal however and can be
improved for the case when we know that we deal with

(baseAddress at known alignment) + offset

For instance, on 64-bit

`setByteArray# s 1# 23# 0#`

given that bytearray is 8 bytes aligned could be unrolled into
`movb, movw, movl, movq, movq`; but currently it is
`movb x23` since alignment of 1 is all we can embed into MO_Memset op.

- - - - -
bd2de4f0 by Artem Pyanykh at 2019-04-09T14:30:13Z
codegen: use newtype for Alignment in BasicTypes

- - - - -
14a78707 by Artem Pyanykh at 2019-04-09T14:30:13Z
docs: add a note about changes in memset unrolling to 8.10.1-notes

- - - - -
97b412e5 by Sylvain Henry at 2019-04-09T16:39:57Z
Hadrian: fix library install paths in bindist Makefile (#16498)

GHC now works out-of-the-box (i.e. without any wrapper script) by
assuming that @bin@ and @lib@ directories sit next to each other. In
particular, its RUNPATH uses $ORIGIN-based relative path to find the
libraries.

However, to be good citizens we want to support the case where @bin@ and
@lib@ directories (respectively BINDIR and LIBDIR) don't sit next to
each other or are renamed. To do that the install script simply creates
GHC specific @bin@ and @lib@ siblings directories into:

   LIBDIR/ghc-VERSION/{bin,lib}

Then it installs wrapper scripts into BINDIR that call the appropriate
programs into LIBDIR/ghc-VERSION/bin/.

The issue fixed by this patch is that libraries were not installed into
LIBDIR/ghc-VERSION/lib but directly into LIBDIR.

- - - - -
8e81d0ca by Ben Gamari at 2019-04-09T16:39:57Z
gitlab: Bump cabal-install version used by Windows builds to 2.4

Hopefully fixes Windows Hadrian build.

- - - - -


15 changed files:

- .gitlab-ci.yml
- .gitlab/win32-init.sh
- compiler/basicTypes/BasicTypes.hs
- compiler/codeGen/StgCmmPrim.hs
- compiler/main/DynFlags.hs
- compiler/nativeGen/X86/CodeGen.hs
- compiler/nativeGen/X86/Ppr.hs
- compiler/utils/Util.hs
- docs/users_guide/8.10.1-notes.rst
- docs/users_guide/packages.rst
- hadrian/src/Rules/BinaryDist.hs
- testsuite/driver/testlib.py
- testsuite/tests/codeGen/should_gen_asm/all.T
- + testsuite/tests/codeGen/should_gen_asm/bytearray-memset-unroll.asm
- + testsuite/tests/codeGen/should_gen_asm/bytearray-memset-unroll.hs


Changes:

=====================================
.gitlab-ci.yml
=====================================
@@ -4,6 +4,10 @@ variables:
   # Commit of ghc/ci-images repository from which to pull Docker images
   DOCKER_REV: cefaee3c742af193e0f7783f87edb0d35374515c
 
+  # Sequential version number capturing the versions of all tools fetched by
+  # .gitlab/win32-init.sh.
+  WINDOWS_TOOLCHAIN_VERSION: 1
+
 before_script:
   - python3 .gitlab/fix-submodules.py
   - git submodule sync --recursive
@@ -525,7 +529,7 @@ validate-x86_64-windows-hadrian:
   variables:
     MSYSTEM: MINGW64
   cache:
-    key: x86_64-windows-hadrian
+    key: "x86_64-windows-hadrian-$WINDOWS_TOOLCHAIN_VERSION"
 
 nightly-i386-windows-hadrian:
   extends: .build-windows-hadrian
@@ -535,7 +539,7 @@ nightly-i386-windows-hadrian:
     variables:
       - $NIGHTLY
   cache:
-    key: i386-windows-hadrian
+    key: "i386-windows-hadrian-$WINDOWS_TOOLCHAIN_VERSION"
 
 .build-windows-make:
   extends: .build-windows
@@ -571,7 +575,7 @@ validate-x86_64-windows:
     MSYSTEM: MINGW64
     CONFIGURE_ARGS: "--target=x86_64-unknown-mingw32"
   cache:
-    key: x86_64-windows
+    key: "x86_64-windows-$WINDOWS_TOOLCHAIN_VERSION"
 
 # Normal Windows validate builds are profiled; that won't do for releases.
 release-x86_64-windows:
@@ -592,7 +596,7 @@ release-i386-windows:
     BUILD_FLAVOUR: "perf"
     CONFIGURE_ARGS: "--target=i386-unknown-mingw32"
   cache:
-    key: i386-windows
+    key: "i386-windows-$WINDOWS_TOOLCHAIN_VERSION"
 
 nightly-i386-windows:
   extends: .build-windows-make
@@ -603,7 +607,7 @@ nightly-i386-windows:
     MSYSTEM: MINGW32
     CONFIGURE_ARGS: "--target=i386-unknown-mingw32"
   cache:
-    key: i386-windows
+    key: "i386-windows-$WINDOWS_TOOLCHAIN_VERSION"
 
 ############################################################
 # Cleanup


=====================================
.gitlab/win32-init.sh
=====================================
@@ -27,7 +27,8 @@ if [ ! -e $toolchain/bin/ghc ]; then
 fi
 
 if [ ! -e $toolchain/bin/cabal ]; then
-    curl https://www.haskell.org/cabal/release/cabal-install-2.2.0.0/cabal-install-2.2.0.0-i386-unknown-mingw32.zip > /tmp/cabal.zip
+    url="https://downloads.haskell.org/~cabal/cabal-install-latest/cabal-install-2.4.1.0-x86_64-unknown-mingw32.zip"
+    curl $url > /tmp/cabal.zip
     unzip /tmp/cabal.zip
     mv cabal.exe $toolchain/bin
 fi


=====================================
compiler/basicTypes/BasicTypes.hs
=====================================
@@ -26,7 +26,7 @@ module BasicTypes(
 
         Arity, RepArity, JoinArity,
 
-        Alignment,
+        Alignment, mkAlignment, alignmentOf, alignmentBytes,
 
         PromotionFlag(..), isPromoted,
         FunctionOrData(..),
@@ -116,6 +116,7 @@ import Outputable
 import SrcLoc ( Located,unLoc )
 import Data.Data hiding (Fixity, Prefix, Infix)
 import Data.Function (on)
+import Data.Bits
 
 {-
 ************************************************************************
@@ -196,8 +197,39 @@ fIRST_TAG =  1
 ************************************************************************
 -}
 
-type Alignment = Int -- align to next N-byte boundary (N must be a power of 2).
-
+-- | A power-of-two alignment
+newtype Alignment = Alignment { alignmentBytes :: Int } deriving (Eq, Ord)
+
+-- Builds an alignment, throws on non power of 2 input. This is not
+-- ideal, but convenient for internal use and better then silently
+-- passing incorrect data.
+mkAlignment :: Int -> Alignment
+mkAlignment n
+  | n == 1 = Alignment 1
+  | n == 2 = Alignment 2
+  | n == 4 = Alignment 4
+  | n == 8 = Alignment 8
+  | n == 16 = Alignment 16
+  | n == 32 = Alignment 32
+  | n == 64 = Alignment 64
+  | n == 128 = Alignment 128
+  | n == 256 = Alignment 256
+  | n == 512 = Alignment 512
+  | otherwise = panic "mkAlignment: received either a non power of 2 argument or > 512"
+
+-- Calculates an alignment of a number. x is aligned at N bytes means
+-- the remainder from x / N is zero. Currently, interested in N <= 8,
+-- but can be expanded to N <= 16 or N <= 32 if used within SSE or AVX
+-- context.
+alignmentOf :: Int -> Alignment
+alignmentOf x = case x .&. 7 of
+  0 -> Alignment 8
+  4 -> Alignment 4
+  2 -> Alignment 2
+  _ -> Alignment 1
+
+instance Outputable Alignment where
+  ppr (Alignment m) = ppr m
 {-
 ************************************************************************
 *                                                                      *


=====================================
compiler/codeGen/StgCmmPrim.hs
=====================================
@@ -2073,10 +2073,17 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
 -- character.
 doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
                  -> FCode ()
-doSetByteArrayOp ba off len c
-    = do dflags <- getDynFlags
-         p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
-         emitMemsetCall p c len 1
+doSetByteArrayOp ba off len c = do
+    dflags <- getDynFlags
+
+    let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
+        offsetAlignment = case off of
+            CmmLit (CmmInt intOff _) -> alignmentOf (fromInteger intOff)
+            _ -> mkAlignment 1
+        align = min byteArrayAlignment offsetAlignment
+
+    p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
+    emitMemsetCall p c len align
 
 -- ----------------------------------------------------------------------------
 -- Allocating arrays
@@ -2347,7 +2354,7 @@ emitSetCards dst_start dst_cards_start n = do
     emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
         (mkIntExpr dflags 1)
         (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
-        1 -- no alignment (1 byte)
+        (mkAlignment 1) -- no alignment (1 byte)
 
 -- Convert an element index to a card index
 cardCmm :: DynFlags -> CmmExpr -> CmmExpr
@@ -2473,11 +2480,11 @@ emitMemmoveCall dst src n align = do
 
 -- | Emit a call to @memset at .  The second argument must fit inside an
 -- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
 emitMemsetCall dst c n align = do
     emitPrimCall
         [ {- no results -} ]
-        (MO_Memset align)
+        (MO_Memset (alignmentBytes align))
         [ dst, c, n ]
 
 emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()


=====================================
compiler/main/DynFlags.hs
=====================================
@@ -147,6 +147,7 @@ module DynFlags (
 #include "GHCConstantsHaskellExports.hs"
         bLOCK_SIZE_W,
         wORD_SIZE_IN_BITS,
+        wordAlignment,
         tAG_MASK,
         mAX_PTR_TAG,
         tARGET_MIN_INT, tARGET_MAX_INT, tARGET_MAX_WORD,
@@ -205,7 +206,7 @@ import Maybes
 import MonadUtils
 import qualified Pretty
 import SrcLoc
-import BasicTypes       ( IntWithInf, treatZeroAsInf )
+import BasicTypes       ( Alignment, alignmentOf, IntWithInf, treatZeroAsInf )
 import FastString
 import Fingerprint
 import Outputable
@@ -5661,6 +5662,9 @@ bLOCK_SIZE_W dflags = bLOCK_SIZE dflags `quot` wORD_SIZE dflags
 wORD_SIZE_IN_BITS :: DynFlags -> Int
 wORD_SIZE_IN_BITS dflags = wORD_SIZE dflags * 8
 
+wordAlignment :: DynFlags -> Alignment
+wordAlignment dflags = alignmentOf (wORD_SIZE dflags)
+
 tAG_MASK :: DynFlags -> Int
 tAG_MASK dflags = (1 `shiftL` tAG_BITS dflags) - 1
 


=====================================
compiler/nativeGen/X86/CodeGen.hs
=====================================
@@ -128,7 +128,7 @@ cmmTopCodeGen (CmmProc info lab live graph) = do
       Nothing -> return tops
 
 cmmTopCodeGen (CmmData sec dat) = do
-  return [CmmData sec (1, dat)]  -- no translation, we just use CmmStatic
+  return [CmmData sec (mkAlignment 1, dat)]  -- no translation, we just use CmmStatic
 
 
 basicBlockCodeGen
@@ -569,7 +569,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
       return (Any format code)
 
    | otherwise = do
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       loadFloatAmode True w addr code
 
   float_const_x87 = case w of
@@ -583,7 +583,7 @@ getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
         in  return (Any FF80 code)
 
     _otherwise -> do
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       loadFloatAmode False w addr code
 
 -- catch simple cases of zero- or sign-extended load
@@ -1247,7 +1247,7 @@ getNonClobberedOperand (CmmLit lit) = do
   if use_sse2 && isSuitableFloatingPointLit lit
     then do
       let CmmFloat _ w = lit
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       return (OpAddr addr, code)
      else do
 
@@ -1303,7 +1303,7 @@ getOperand (CmmLit lit) = do
   if (use_sse2 && isSuitableFloatingPointLit lit)
     then do
       let CmmFloat _ w = lit
-      Amode addr code <- memConstant (widthInBytes w) lit
+      Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
       return (OpAddr addr, code)
     else do
 
@@ -1351,7 +1351,7 @@ addAlignmentCheck align reg =
              , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
              ]
 
-memConstant :: Int -> CmmLit -> NatM Amode
+memConstant :: Alignment -> CmmLit -> NatM Amode
 memConstant align lit = do
   lbl <- getNewLabelNat
   let rosection = Section ReadOnlyData lbl
@@ -1848,17 +1848,25 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
           CmmLit (CmmInt c _),
           CmmLit (CmmInt n _)]
          _
-    | fromInteger insns <= maxInlineMemsetInsns dflags && align .&. 3 == 0 = do
+    | fromInteger insns <= maxInlineMemsetInsns dflags = do
         code_dst <- getAnyReg dst
         dst_r <- getNewRegNat format
-        return $ code_dst dst_r `appOL` go dst_r (fromInteger n)
+        if format == II64 && n >= 8 then do
+          code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64))
+          imm8byte_r <- getNewRegNat II64
+          return $ code_dst dst_r `appOL`
+                   code_imm8byte imm8byte_r `appOL`
+                   go8 dst_r imm8byte_r (fromInteger n)
+        else
+          return $ code_dst dst_r `appOL`
+                   go4 dst_r (fromInteger n)
   where
-    (format, val) = case align .&. 3 of
-        2 -> (II16, c2)
-        0 -> (II32, c4)
-        _ -> (II8, c)
+    maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported
+    effectiveAlignment = min (alignmentOf align) maxAlignment
+    format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
     c2 = c `shiftL` 8 .|. c
     c4 = c2 `shiftL` 16 .|. c2
+    c8 = c4 `shiftL` 32 .|. c4
 
     -- The number of instructions we will generate (approx). We need 1
     -- instructions per move.
@@ -1868,25 +1876,45 @@ genCCall dflags _ (PrimTarget (MO_Memset align)) _
     sizeBytes :: Integer
     sizeBytes = fromIntegral (formatInBytes format)
 
-    go :: Reg -> Integer -> OrdList Instr
-    go dst i
-        -- TODO: Add movabs instruction and support 64-bit sets.
-        | i >= sizeBytes =  -- This might be smaller than the below sizes
-            unitOL (MOV format (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
-            go dst (i - sizeBytes)
-        | i >= 4 =  -- Will never happen on 32-bit
-            unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
-            go dst (i - 4)
-        | i >= 2 =
-            unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
-            go dst (i - 2)
-        | i >= 1 =
-            unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
-            go dst (i - 1)
-        | otherwise = nilOL
+    -- Depending on size returns the widest MOV instruction and its
+    -- width.
+    gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
+    gen4 addr size
+        | size >= 4 =
+            (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
+        | size >= 2 =
+            (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
+        | size >= 1 =
+            (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
+        | otherwise = (nilOL, 0)
+
+    -- Generates a 64-bit wide MOV instruction from REG to MEM.
+    gen8 :: AddrMode -> Reg -> InstrBlock
+    gen8 addr reg8byte =
+      unitOL (MOV format (OpReg reg8byte) (OpAddr addr))
+
+    -- Unrolls memset when the widest MOV is <= 4 bytes.
+    go4 :: Reg -> Integer -> InstrBlock
+    go4 dst left =
+      if left <= 0 then nilOL
+      else curMov `appOL` go4 dst (left - curWidth)
       where
-        dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
-                   (ImmInteger (n - i))
+        possibleWidth = minimum [left, sizeBytes]
+        dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
+        (curMov, curWidth) = gen4 dst_addr possibleWidth
+
+    -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
+    -- argument). Falls back to go4 when all 8 byte moves are
+    -- exhausted.
+    go8 :: Reg -> Reg -> Integer -> InstrBlock
+    go8 dst reg8byte left =
+      if possibleWidth >= 8 then
+        let curMov = gen8 dst_addr reg8byte
+        in  curMov `appOL` go8 dst reg8byte (left - 8)
+      else go4 dst left
+      where
+        possibleWidth = minimum [left, sizeBytes]
+        dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
 
 genCCall _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL
         -- write barrier compiles to no code on x86/x86-64;
@@ -2322,7 +2350,7 @@ genCCall _ is32Bit target dest_regs args bid = do
           let
             const | FF32 <- fmt = CmmInt 0x7fffffff W32
                   | otherwise   = CmmInt 0x7fffffffffffffff W64
-          Amode amode amode_code <- memConstant (widthInBytes w) const
+          Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
           tmp <- getNewRegNat fmt
           let
             code dst = x_code dst `appOL` amode_code `appOL` toOL [
@@ -3051,7 +3079,7 @@ createJumpTable dflags ids section lbl
                           where blockLabel = blockLbl blockid
                   in map jumpTableEntryRel ids
             | otherwise = map (jumpTableEntry dflags) ids
-      in CmmData section (1, Statics lbl jumpTable)
+      in CmmData section (mkAlignment 1, Statics lbl jumpTable)
 
 extractUnwindPoints :: [Instr] -> [UnwindPoint]
 extractUnwindPoints instrs =
@@ -3418,7 +3446,7 @@ sse2NegCode w x = do
       x at FF80 -> wrongFmt x
       where
         wrongFmt x = panic $ "sse2NegCode: " ++ show x
-  Amode amode amode_code <- memConstant (widthInBytes w) const
+  Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
   tmp <- getNewRegNat fmt
   let
     code dst = x_code dst `appOL` amode_code `appOL` toOL [


=====================================
compiler/nativeGen/X86/Ppr.hs
=====================================
@@ -36,7 +36,7 @@ import PprBase
 
 import Hoopl.Collections
 import Hoopl.Label
-import BasicTypes       (Alignment)
+import BasicTypes       (Alignment, mkAlignment, alignmentBytes)
 import DynFlags
 import Cmm              hiding (topInfoTable)
 import BlockId
@@ -72,7 +72,7 @@ import Data.Bits
 
 pprProcAlignment :: SDoc
 pprProcAlignment = sdocWithDynFlags $ \dflags ->
-  (maybe empty pprAlign . cmmProcAlignment $ dflags)
+  (maybe empty (pprAlign . mkAlignment) (cmmProcAlignment dflags))
 
 pprNatCmmDecl :: NatCmmDecl (Alignment, CmmStatics) Instr -> SDoc
 pprNatCmmDecl (CmmData section dats) =
@@ -236,14 +236,15 @@ pprLabel lbl = pprGloblDecl lbl
             $$ pprTypeDecl lbl
             $$ (ppr lbl <> char ':')
 
-pprAlign :: Int -> SDoc
-pprAlign bytes
+pprAlign :: Alignment -> SDoc
+pprAlign alignment
         = sdocWithPlatform $ \platform ->
-          text ".align " <> int (alignment platform)
+          text ".align " <> int (alignmentOn platform)
   where
-        alignment platform = if platformOS platform == OSDarwin
-                             then log2 bytes
-                             else      bytes
+        bytes = alignmentBytes alignment
+        alignmentOn platform = if platformOS platform == OSDarwin
+                               then log2 bytes
+                               else      bytes
 
         log2 :: Int -> Int  -- cache the common ones
         log2 1 = 0


=====================================
compiler/utils/Util.hs
=====================================
@@ -1149,7 +1149,6 @@ exactLog2 x
     pow2 x | x == 1 = 0
            | otherwise = 1 + pow2 (x `shiftR` 1)
 
-
 {-
 -- -----------------------------------------------------------------------------
 -- Floats


=====================================
docs/users_guide/8.10.1-notes.rst
=====================================
@@ -61,6 +61,11 @@ Compiler
   :ghc-flag:`-Wredundant-record-wildcards`  which warn users when they have
   redundant or unused uses of a record wildcard match.
 
+- Calls to `memset` are now unrolled more aggressively and the
+  produced code is more efficient on `x86_64` with added support for
+  64-bit `MOV`s. In particular, `setByteArray#` calls that were not
+  optimized before, now will be. See :ghc-ticket:`16052`.
+
 Runtime system
 ~~~~~~~~~~~~~~
 


=====================================
docs/users_guide/packages.rst
=====================================
@@ -579,6 +579,12 @@ must be relative to the location of the package environment file.
 
     Use the package environment in ⟨file⟩, or in
     ``$HOME/.ghc/arch-os-version/environments/⟨name⟩``
+    If set to ``-`` no package environment is read.
+
+.. envvar:: GHC_ENVIRONMENT
+
+    Specifies the path to the package environment file to be used by GHC.
+    Overridden by the :ghc-flag:`-package-env ⟨file⟩|⟨name⟩` flag if set.
 
 In order, ``ghc`` will look for the package environment in the following
 locations:
@@ -588,11 +594,11 @@ locations:
 -  File ``$HOME/.ghc/arch-os-version/environments/name`` if you pass the
    option ``-package-env ⟨name⟩``.
 
--  File ⟨file⟩ if the environment variable ``GHC_ENVIRONMENT`` is set to
+-  File ⟨file⟩ if the environment variable :envvar:`GHC_ENVIRONMENT` is set to
    ⟨file⟩.
 
 -  File ``$HOME/.ghc/arch-os-version/environments/name`` if the
-   environment variable ``GHC_ENVIRONMENT`` is set to ⟨name⟩.
+   environment variable :envvar:`GHC_ENVIRONMENT` is set to ⟨name⟩.
 
 Additionally, unless ``-hide-all-packages`` is specified ``ghc`` will also
 look for the package environment in the following locations:


=====================================
hadrian/src/Rules/BinaryDist.hs
=====================================
@@ -86,6 +86,12 @@ you can simply do:
   ./configure --prefix=<path> [... other configure options ...]
   make install
 
+In order to support @bin@ and @lib@ directories that don't sit next to each
+other, the install script:
+   * installs programs into @LIBDIR/ghc-VERSION/bin@
+   * installs libraries into @LIBDIR/ghc-VERSION/lib@
+   * installs the wrappers scripts into @BINDIR@ directory
+
 -}
 
 bindistRules :: Rules ()
@@ -268,6 +274,7 @@ bindistMakefile = unlines
     , "install: install_mingw update_package_db"
     , ""
     , "ActualBinsDir=${ghclibdir}/bin"
+    , "ActualLibsDir=${ghclibdir}/lib"
     , "WrapperBinsDir=${bindir}"
     , ""
     , "# We need to install binaries relative to libraries."
@@ -288,10 +295,10 @@ bindistMakefile = unlines
     , ""
     , "LIBRARIES = $(wildcard ./lib/*)"
     , "install_lib:"
-    , "\t at echo \"Copying libraries to $(libdir)\""
-    , "\t$(INSTALL_DIR) \"$(libdir)\""
+    , "\t at echo \"Copying libraries to $(ActualLibsDir)\""
+    , "\t$(INSTALL_DIR) \"$(ActualLibsDir)\""
     , "\tfor i in $(LIBRARIES); do \\"
-    , "\t\tcp -R $$i \"$(libdir)/\"; \\"
+    , "\t\tcp -R $$i \"$(ActualLibsDir)/\"; \\"
     , "\tdone"
     , ""
     , "INCLUDES = $(wildcard ./include/*)"
@@ -317,9 +324,9 @@ bindistMakefile = unlines
     , "\t$(foreach p, $(BINARY_NAMES),\\"
     , "\t\t$(call installscript,$p,$(WrapperBinsDir)/$p," ++
       "$(WrapperBinsDir),$(ActualBinsDir),$(ActualBinsDir)/$p," ++
-      "$(libdir),$(docdir),$(includedir)))"
+      "$(ActualLibsDir),$(docdir),$(includedir)))"
     , ""
-    , "PKG_CONFS = $(wildcard $(libdir)/package.conf.d/*)"
+    , "PKG_CONFS = $(wildcard $(ActualLibsDir)/package.conf.d/*)"
     , "update_package_db:"
     , "\t at echo \"Updating the package DB\""
     , "\t$(foreach p, $(PKG_CONFS),\\"


=====================================
testsuite/driver/testlib.py
=====================================
@@ -1131,9 +1131,9 @@ def do_compile(name, way, should_fail, top_mod, extra_mods, extra_hc_opts, **kwa
     # no problems found, this test passed
     return passed()
 
-def compile_cmp_asm( name, way, extra_hc_opts ):
+def compile_cmp_asm( name, way, ext, extra_hc_opts ):
     print('Compile only, extra args = ', extra_hc_opts)
-    result = simple_build(name + '.cmm', way, '-keep-s-files -O ' + extra_hc_opts, 0, '', 0, 0)
+    result = simple_build(name + '.' + ext, way, '-keep-s-files -O ' + extra_hc_opts, 0, '', 0, 0)
 
     if badResult(result):
         return result
@@ -1153,6 +1153,24 @@ def compile_cmp_asm( name, way, extra_hc_opts ):
     # no problems found, this test passed
     return passed()
 
+def compile_grep_asm( name, way, ext, is_substring, extra_hc_opts ):
+    print('Compile only, extra args = ', extra_hc_opts)
+    result = simple_build(name + '.' + ext, way, '-keep-s-files -O ' + extra_hc_opts, 0, '', 0, 0)
+
+    if badResult(result):
+        return result
+
+    expected_pat_file = find_expected_file(name, 'asm')
+    actual_asm_file = add_suffix(name, 's')
+
+    if not grep_output(join_normalisers(normalise_errmsg),
+                       expected_pat_file, actual_asm_file,
+                       is_substring):
+        return failBecause('asm mismatch')
+
+    # no problems found, this test passed
+    return passed()
+
 # -----------------------------------------------------------------------------
 # Compile-and-run tests
 
@@ -1735,6 +1753,43 @@ def compare_outputs(way, kind, normaliser, expected_file, actual_file, diff_file
         else:
             return False
 
+# Checks that each line from pattern_file is present in actual_file as
+# a substring or regex pattern depending on is_substring.
+def grep_output(normaliser, pattern_file, actual_file, is_substring=True):
+    expected_path = in_srcdir(pattern_file)
+    actual_path = in_testdir(actual_file)
+
+    expected_patterns = read_no_crs(expected_path).strip().split('\n')
+    actual_raw = read_no_crs(actual_path)
+    actual_str = normaliser(actual_raw)
+
+    success = True
+    failed_patterns = []
+
+    def regex_match(pat, actual):
+        return re.search(pat, actual) is not None
+
+    def substring_match(pat, actual):
+        return pat in actual
+
+    def is_match(pat, actual):
+        if is_substring:
+            return substring_match(pat, actual)
+        else:
+            return regex_match(pat, actual)
+
+    for pat in expected_patterns:
+        if not is_match(pat, actual_str):
+            success = False
+            failed_patterns.append(pat)
+
+    if not success:
+        print('Actual output does not contain the following patterns:')
+        for pat in failed_patterns:
+            print(pat)
+
+    return success
+
 # Note [Output comparison]
 #
 # We do two types of output comparison:


=====================================
testsuite/tests/codeGen/should_gen_asm/all.T
=====================================
@@ -3,7 +3,8 @@ is_amd64_codegen = [
     when(unregisterised(), skip),
 ]
 
-test('memcpy', is_amd64_codegen,  compile_cmp_asm, [''])
-test('memcpy-unroll', is_amd64_codegen, compile_cmp_asm, [''])
-test('memcpy-unroll-conprop', is_amd64_codegen, compile_cmp_asm, [''])
-test('memset-unroll', is_amd64_codegen, compile_cmp_asm, [''])
+test('memcpy', is_amd64_codegen,  compile_cmp_asm, ['cmm', ''])
+test('memcpy-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
+test('memcpy-unroll-conprop', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
+test('memset-unroll', is_amd64_codegen, compile_cmp_asm, ['cmm', ''])
+test('bytearray-memset-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, ''])


=====================================
testsuite/tests/codeGen/should_gen_asm/bytearray-memset-unroll.asm
=====================================
@@ -0,0 +1,6 @@
+movq $72340172838076673,%rcx
+movq %rcx,0(%rbx)
+movq %rcx,8(%rbx)
+movl $16843009,16(%rbx)
+movw $257,20(%rbx)
+movb $1,22(%rbx)


=====================================
testsuite/tests/codeGen/should_gen_asm/bytearray-memset-unroll.hs
=====================================
@@ -0,0 +1,17 @@
+{-# language MagicHash #-}
+{-# language UnboxedTuples #-}
+
+module FillArray
+  ( fill
+  ) where
+
+import GHC.Exts
+import GHC.IO
+
+data ByteArray = ByteArray ByteArray#
+
+fill :: IO ByteArray
+fill = IO $ \s0 -> case newByteArray# 24# s0 of
+  (# s1, m #) -> case setByteArray# m 0# 23# 1# s1 of
+    s2 -> case unsafeFreezeByteArray# m s2 of
+          (# s3, r #) -> (# s3, ByteArray r #)



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/compare/06b04e44e9ac588fbd54d1c6f904275de6d248c1...8e81d0ca39594f1c9ab1fb00ac80ef6a2cd85cb9

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/compare/06b04e44e9ac588fbd54d1c6f904275de6d248c1...8e81d0ca39594f1c9ab1fb00ac80ef6a2cd85cb9
You're receiving this email because of your account on gitlab.haskell.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.haskell.org/pipermail/ghc-commits/attachments/20190409/6c5df904/attachment-0001.html>