[commit: ghc] master: Remove special casing of singleton strings, split all strings. (fe9f7e4)
git at git.haskell.org
git at git.haskell.org
Tue Sep 16 12:59:34 UTC 2014
Repository : ssh://git@git.haskell.org/ghc
On branch : master
Link : http://ghc.haskell.org/trac/ghc/changeset/fe9f7e40844802443315ef2238c4cdefda756b62/ghc
>---------------------------------------------------------------
commit fe9f7e40844802443315ef2238c4cdefda756b62
Author: Thijs Alkemade <thijsalkemade at gmail.com>
Date: Tue Sep 16 07:55:34 2014 -0500
Remove special casing of singleton strings, split all strings.
Summary:
exprIsConApp_maybe now detects string literals and correctly
splits them. This means case-statemnts on string literals can
now push the literal into the cases.
fix trac issue #9400
Test Plan: validate
Reviewers: austin, simonpj
Reviewed By: austin, simonpj
Subscribers: simonmar, ezyang, carter
Differential Revision: https://phabricator.haskell.org/D199
GHC Trac Issues: #9400
>---------------------------------------------------------------
fe9f7e40844802443315ef2238c4cdefda756b62
compiler/coreSyn/CoreSubst.lhs | 58 +++++++++++++++++++++-
compiler/coreSyn/MkCore.lhs | 4 --
testsuite/tests/simplCore/should_compile/T9400.hs | 18 +++++++
.../tests/simplCore/should_compile/T9400.stderr | 36 ++++++++++++++
testsuite/tests/simplCore/should_compile/all.T | 1 +
5 files changed, 111 insertions(+), 6 deletions(-)
diff --git a/compiler/coreSyn/CoreSubst.lhs b/compiler/coreSyn/CoreSubst.lhs
index 1951252..76f42f4 100644
--- a/compiler/coreSyn/CoreSubst.lhs
+++ b/compiler/coreSyn/CoreSubst.lhs
@@ -42,7 +42,8 @@ module CoreSubst (
import CoreSyn
import CoreFVs
import CoreUtils
-import Literal ( Literal )
+import Literal ( Literal(MachStr) )
+import qualified Data.ByteString as BS
import OccurAnal( occurAnalyseExpr, occurAnalysePgm )
import qualified Type
@@ -55,7 +56,8 @@ import Coercion hiding ( substTy, substCo, extendTvSubst, substTyVarBndr, substC
import TyCon ( tyConArity )
import DataCon
-import PrelNames ( eqBoxDataConKey, coercibleDataConKey )
+import PrelNames ( eqBoxDataConKey, coercibleDataConKey, unpackCStringIdKey
+ , unpackCStringUtf8IdKey )
import OptCoercion ( optCoercion )
import PprCore ( pprCoreBindings, pprRules )
import Module ( Module )
@@ -78,6 +80,8 @@ import PprCore () -- Instances
import FastString
import Data.List
+
+import TysWiredIn
\end{code}
@@ -1135,6 +1139,25 @@ a data constructor.
However e might not *look* as if
+
+Note [exprIsConApp_maybe on literal strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+See #9400.
+
+Conceptually, a string literal "abc" is just ('a':'b':'c':[]), but in Core
+they are represented as unpackCString# "abc"# by MkCore.mkStringExprFS, or
+unpackCStringUtf8# when the literal contains multi-byte UTF8 characters.
+
+For optimizations we want to be able to treat it as a list, so they can be
+decomposed when used in a case-statement. exprIsConApp_maybe detects those
+calls to unpackCString# and returns:
+
+Just (':', [Char], ['a', unpackCString# "bc"]).
+
+We need to be careful about UTF8 strings here. ""# contains a ByteString, so
+we must parse it back into a FastString to split off the first character.
+That way we can treat unpackCString# and unpackCStringUtf8# in the same way.
+
\begin{code}
data ConCont = CC [CoreExpr] Coercion
-- Substitution already applied
@@ -1164,6 +1187,7 @@ exprIsConApp_maybe (in_scope, id_unf) expr
cont
go (Left in_scope) (Var fun) cont@(CC args co)
+
| Just con <- isDataConWorkId_maybe fun
, count isValArg args == idArity fun
= dealWithCoercion co con args
@@ -1183,6 +1207,11 @@ exprIsConApp_maybe (in_scope, id_unf) expr
, Just rhs <- expandUnfolding_maybe unfolding
, let in_scope' = extendInScopeSetSet in_scope (exprFreeVars rhs)
= go (Left in_scope') rhs cont
+
+ | (fun `hasKey` unpackCStringIdKey)
+ || (fun `hasKey` unpackCStringUtf8IdKey)
+ , [Lit (MachStr str)] <- args
+ = dealWithStringLiteral fun str co
where
unfolding = id_unf fun
@@ -1200,6 +1229,31 @@ exprIsConApp_maybe (in_scope, id_unf) expr
extend (Left in_scope) v e = Right (extendSubst (mkEmptySubst in_scope) v e)
extend (Right s) v e = Right (extendSubst s v e)
+-- See Note [exprIsConApp_maybe on literal strings]
+dealWithStringLiteral :: Var -> BS.ByteString -> Coercion
+ -> Maybe (DataCon, [Type], [CoreExpr])
+
+-- This is not possible with user-supplied empty literals, MkCore.mkStringExprFS
+-- turns those into [] automatically, but just in case something else in GHC
+-- generates a string literal directly.
+dealWithStringLiteral _ str co
+ | BS.null str
+ = dealWithCoercion co nilDataCon [Type charTy]
+
+dealWithStringLiteral fun str co
+ = let strFS = mkFastStringByteString str
+
+ char = mkConApp charDataCon [mkCharLit (headFS strFS)]
+ charTail = fastStringToByteString (tailFS strFS)
+
+ -- In singleton strings, just add [] instead of unpackCstring# ""#.
+ rest = if BS.null charTail
+ then mkConApp nilDataCon [Type charTy]
+ else App (Var fun)
+ (Lit (MachStr charTail))
+
+ in dealWithCoercion co consDataCon [Type charTy, char, rest]
+
dealWithCoercion :: Coercion -> DataCon -> [CoreExpr]
-> Maybe (DataCon, [Type], [CoreExpr])
dealWithCoercion co dc dc_args
diff --git a/compiler/coreSyn/MkCore.lhs b/compiler/coreSyn/MkCore.lhs
index 012306a..d749f82 100644
--- a/compiler/coreSyn/MkCore.lhs
+++ b/compiler/coreSyn/MkCore.lhs
@@ -283,10 +283,6 @@ mkStringExprFS str
| nullFS str
= return (mkNilExpr charTy)
- | lengthFS str == 1
- = do let the_char = mkCharExpr (headFS str)
- return (mkConsExpr charTy the_char (mkNilExpr charTy))
-
| all safeChar chars
= do unpack_id <- lookupId unpackCStringName
return (App (Var unpack_id) (Lit (MachStr (fastStringToByteString str))))
diff --git a/testsuite/tests/simplCore/should_compile/T9400.hs b/testsuite/tests/simplCore/should_compile/T9400.hs
new file mode 100644
index 0000000..4e9cb72
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T9400.hs
@@ -0,0 +1,18 @@
+module T9400 (main) where
+import GHC.Base
+
+str = "defg"
+
+main :: IO ()
+main = do
+ case "abc" of
+ (x:y:xs) -> putStrLn xs
+ case "" of
+ [] -> putStrLn "x"
+ case "ab" of
+ [] -> putStrLn "y"
+ (x:y:[]) -> putStrLn "z"
+ case str of
+ (x:xs) -> putStrLn xs
+ case "ab" of
+ "" -> putStrLn "fail"
diff --git a/testsuite/tests/simplCore/should_compile/T9400.stderr b/testsuite/tests/simplCore/should_compile/T9400.stderr
new file mode 100644
index 0000000..e66eecf
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T9400.stderr
@@ -0,0 +1,36 @@
+
+==================== Tidy Core ====================
+Result size of Tidy Core = {terms: 23, types: 16, coercions: 0}
+
+T9400.main :: GHC.Types.IO ()
+[GblId, Str=DmdType]
+T9400.main =
+ GHC.Base.>>
+ @ GHC.Types.IO
+ GHC.Base.$fMonadIO
+ @ ()
+ @ ()
+ (System.IO.putStrLn (GHC.CString.unpackCString# "c"#))
+ (GHC.Base.>>
+ @ GHC.Types.IO
+ GHC.Base.$fMonadIO
+ @ ()
+ @ ()
+ (System.IO.putStrLn (GHC.CString.unpackCString# "x"#))
+ (GHC.Base.>>
+ @ GHC.Types.IO
+ GHC.Base.$fMonadIO
+ @ ()
+ @ ()
+ (System.IO.putStrLn (GHC.CString.unpackCString# "z"#))
+ (GHC.Base.>>
+ @ GHC.Types.IO
+ GHC.Base.$fMonadIO
+ @ ()
+ @ ()
+ (System.IO.putStrLn (GHC.CString.unpackCString# "efg"#))
+ (Control.Exception.Base.patError
+ @ (GHC.Types.IO ()) "T9400.hs:(17,5)-(18,29)|case"#))))
+
+
+
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 88d1022..399498b 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -206,3 +206,4 @@ test('T8848', only_ways(['optasm']), compile, ['-ddump-rule-firings -dsuppress-u
test('T8848a', only_ways(['optasm']), compile, ['-ddump-rules'])
test('T8331', only_ways(['optasm']), compile, ['-ddump-rules'])
test('T6056', only_ways(['optasm']), multimod_compile, ['T6056', '-v0 -ddump-rule-firings'])
+test('T9400', only_ways(['optasm']), compile, ['-O0 -ddump-simpl -dsuppress-uniques'])
More information about the ghc-commits
mailing list