[GHC] #15570: Core transformations generate bad indexCharOffAddr# call
GHC
ghc-devs at haskell.org
Sun Aug 26 22:11:20 UTC 2018
#15570: Core transformations generate bad indexCharOffAddr# call
-------------------------------------+-------------------------------------
Reporter: alpmestan | Owner: (none)
Type: bug | Status: new
Priority: normal | Milestone:
Component: Compiler | Version: 8.5
Keywords: | Operating System: Unknown/Multiple
Architecture: | Type of failure: None/Unknown
Unknown/Multiple |
Test Case: | Blocked By:
Blocking: | Related Tickets:
Differential Rev(s): | Wiki Page:
-------------------------------------+-------------------------------------
Consider the following functions, which only differ in a bang pattern on
the local binding `q` in the inner loop:
{{{#!hs
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Bug where
import GHC.Prim
import GHC.Types
f :: Int -> String
f n_
= go n_ ""
where
go n cs | n < 62
= let !c = chooseChar62 n in c : cs
| otherwise
= go q (c : cs) where (q, r) = quotRem n 62
!c = chooseChar62 r
chooseChar62 :: Int -> Char
{-# INLINE chooseChar62 #-}
chooseChar62 (I# n) = C# (indexCharOffAddr# chars62 n)
chars62 =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
g :: Int -> String
g n_
= go n_ ""
where
go n cs | n < 62
= let !c = chooseChar62 n in c : cs
| otherwise
= go q (c : cs) where (!q, r) = quotRem n 62 -- !!! Note the
bang on q
!c = chooseChar62 r
chooseChar62 :: Int -> Char
{-# INLINE chooseChar62 #-}
chooseChar62 (I# n) = C# (indexCharOffAddr# chars62 n)
chars62 =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
}}}
When building with `-O -fPIC -dynamic -ddump-simpl`, this is the Core I
see, with a HEAD checkout from earlier this week built by hadrian:
{{{#!hs
-- chararacter array, used by both
chars62_r30r :: Addr#
chars62_r30r
= "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
-- Used at the end of Bug.$wgo, in the -9223372036854775808# branch,
-- therefore only used by generated Core for f, but not g!
lvl_r30s :: Char
lvl_r30s
= case indexCharOffAddr# chars62_r30r -9223372036854775808# of v_B2
{ __DEFAULT ->
GHC.Types.C# v_B2
}
-- Core for f
Rec {
Bug.$wgo
:: Int# -> [Char] -> (# Char, [Char] #)
Bug.$wgo
= \ (ww_s2WU :: Int#) (w_s2WR :: [Char]) ->
case GHC.Real.even3 of { I# y_a2QI -> -- GHC.Real.even3 == -1
case y_a2QI of {
__DEFAULT ->
case quotRemInt# ww_s2WU 62# of { (# ipv_a2QN, ipv1_a2QO #) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QO of wild2_X4
{ __DEFAULT ->
case <# ww_s2WU 62# of {
__DEFAULT ->
Bug.$wgo
ipv_a2QN (GHC.Types.: @ Char (GHC.Types.C# wild2_X4)
w_s2WR);
1# ->
case indexCharOffAddr# chars62_r30r ww_s2WU of wild3_X1G
{ __DEFAULT ->
(# GHC.Types.C# wild3_X1G, w_s2WR #)
}
}
}
};
62# ->
case ww_s2WU of wild2_a2QQ {
__DEFAULT ->
case quotRemInt# wild2_a2QQ 62# of { (# ipv_a2QT, ipv1_a2QU
#) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QU of wild4_X4
{ __DEFAULT ->
case <# wild2_a2QQ 62# of {
__DEFAULT ->
Bug.$wgo
ipv_a2QT (GHC.Types.: @ Char (GHC.Types.C# wild4_X4)
w_s2WR);
1# ->
case indexCharOffAddr# chars62_r30r wild2_a2QQ of
wild5_X1G
{ __DEFAULT ->
(# GHC.Types.C# wild5_X1G, w_s2WR #)
}
}
}
};
-9223372036854775808# ->
case lvl_r30s of { C# v1_B2 -> (# GHC.Types.C# v1_B2, w_s2WR
#) }
}
}
}
end Rec }
Bug.f_go :: Int -> [Char] -> [Char]
Bug.f_go
= \ (w_s2WQ :: Int) (w1_s2WR :: [Char]) ->
case w_s2WQ of { I# ww1_s2WU ->
case Bug.$wgo ww1_s2WU w1_s2WR of { (# ww3_s2Xa, ww4_s2Xb #) ->
GHC.Types.: @ Char ww3_s2Xa ww4_s2Xb
}
}
f :: Int -> String
f = \ (n__aXG :: Int) ->
case n__aXG of { I# ww1_s2WU ->
case Bug.$wgo ww1_s2WU (GHC.Types.[] @ Char) of
{ (# ww3_s2Xa, ww4_s2Xb #) ->
GHC.Types.: @ Char ww3_s2Xa ww4_s2Xb
}
}
-- Core for g
Rec {
Bug.$wgo1
:: Int# -> [Char] -> (# Char, [Char] #)
Bug.$wgo1
= \ (ww_s2X4 :: Int#) (w_s2X1 :: [Char]) ->
case GHC.Real.even3 of { I# y_a2QI ->
case y_a2QI of {
__DEFAULT ->
case quotRemInt# ww_s2X4 62# of { (# ipv_a2QN, ipv1_a2QO #) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QO of wild2_X4
{ __DEFAULT ->
case <# ww_s2X4 62# of {
__DEFAULT ->
Bug.$wgo1
ipv_a2QN (GHC.Types.: @ Char (GHC.Types.C# wild2_X4)
w_s2X1);
1# ->
case indexCharOffAddr# chars62_r30r ww_s2X4 of wild3_XY
{ __DEFAULT ->
(# GHC.Types.C# wild3_XY, w_s2X1 #)
}
}
}
};
62# ->
case ww_s2X4 of wild2_a2QQ {
__DEFAULT ->
case quotRemInt# wild2_a2QQ 62# of { (# ipv_a2QT, ipv1_a2QU
#) ->
case indexCharOffAddr# chars62_r30r ipv1_a2QU of wild4_X4
{ __DEFAULT ->
case <# wild2_a2QQ 62# of {
__DEFAULT ->
Bug.$wgo1
ipv_a2QT (GHC.Types.: @ Char (GHC.Types.C# wild4_X4)
w_s2X1);
1# ->
case indexCharOffAddr# chars62_r30r wild2_a2QQ of
wild5_XY
{ __DEFAULT ->
(# GHC.Types.C# wild5_XY, w_s2X1 #)
}
}
}
};
-9223372036854775808# ->
case GHC.Real.overflowError of wild4_00 { }
}
}
}
end Rec }
Bug.g_go :: Int -> [Char] -> [Char]
Bug.g_go
= \ (w_s2X0 :: Int) (w1_s2X1 :: [Char]) ->
case w_s2X0 of { I# ww1_s2X4 ->
case Bug.$wgo1 ww1_s2X4 w1_s2X1 of { (# ww3_s2Xd, ww4_s2Xe #) ->
GHC.Types.: @ Char ww3_s2Xd ww4_s2Xe
}
}
g :: Int -> String
g = \ (n__a29X :: Int) ->
case n__a29X of { I# ww1_s2X4 ->
case Bug.$wgo1 ww1_s2X4 (GHC.Types.[] @ Char) of
{ (# ww3_s2Xd, ww4_s2Xe #) ->
GHC.Types.: @ Char ww3_s2Xd ww4_s2Xe
}
}
}}}
Of particular interest is:
{{{#!hs
-- chararacter array, used by both
chars62_r30r :: Addr#
chars62_r30r
= "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"#
lvl_r30s :: Char
lvl_r30s
= case indexCharOffAddr# chars62_r30r -9223372036854775808# of v_B2
{ __DEFAULT ->
GHC.Types.C# v_B2
}
}}}
which is only used in the Core for `f`, not `g`! We're trying to access
index `minBound :: Int` of that array of chars. While this is only used
when we pass `minBound` to our function, it is still wrong I think.
Moreover, as [https://github.com/snowleopard/hadrian/issues/861 hadrian
issue 861] showed, this can lead to... linker errors! Which got fixed by
changing the implementation of `iToBase62` in Unique.hs from `f` to `g`
:-)
Note that when I build the same commit with the make build system, then
GHC generates Core close to `g`'s above for _both functions_.
I tried describing some of the transformations that occur in
[https://github.com/snowleopard/hadrian/issues/641#issuecomment-415881512
this comment] on hadrian's issue tracker. The gist of it is that the lack
of strictness in `q` leads GHC to not spotting it early and when we inline
`quotRem`/`quotRemInt` and start floating things in/out and distributing
`case ... of` branches around, we end up with a dedicated branch in the
inner loop for `minBound` which actually makes use of the result of
`indexAddrOffAddr#`, as you can see here:
{{{#!hs
-9223372036854775808# ->
-- lvl_r30s is our bad value
case lvl_r30s of { C# v1_B2 -> (# GHC.Types.C# v1_B2, w_s2WR
#) }
}}}
whereas this is what this branch looks like for `g`:
{{{#!hs
-9223372036854775808# ->
case GHC.Real.overflowError of wild4_00 { }
}}}
The `overflowError` is still there and GHC therefore realised that there's
no point in computing anything since we always raise an overflow error in
that branch. This `overflowError` just disappears in `f`.
--
Ticket URL: <http://ghc.haskell.org/trac/ghc/ticket/15570>
GHC <http://www.haskell.org/ghc/>
The Glasgow Haskell Compiler
More information about the ghc-tickets
mailing list