[commit: haddock] v2.14: Parse identifiers with ^ and ⋆ in them. (81c379d)

git at git.haskell.org git at git.haskell.org
Fri Apr 11 18:04:08 UTC 2014


Repository : ssh://git@git.haskell.org/haddock

On branch  : v2.14
Link       : http://git.haskell.org/haddock.git/commitdiff/81c379daddb0e46c1f62cef5f34e933d481ec4a7

>---------------------------------------------------------------

commit 81c379daddb0e46c1f62cef5f34e933d481ec4a7
Author: Mateusz Kowalczyk <fuuzetsu at fuuzetsu.co.uk>
Date:   Fri Apr 11 16:58:34 2014 +0100

    Parse identifiers with ^ and ⋆ in them.
    
    Fixes #298.


>---------------------------------------------------------------

81c379daddb0e46c1f62cef5f34e933d481ec4a7
 CHANGES                                    |    4 ++
 html-test/ref/{Bug201.html => Bug298.html} |   89 +++++++++++++++++++---------
 html-test/src/Bug298.hs                    |   22 +++++++
 src/Haddock/Parser.hs                      |   10 +++-
 4 files changed, 95 insertions(+), 30 deletions(-)

diff --git a/CHANGES b/CHANGES
index b3fb8dc..1fe0249 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,7 @@
+Changes in version 2.14.3
+
+ * Fix parsing of identifiers with ^ or ⋆ in them (#298)
+
 Changes in version 2.14.2
 
  * Always drop --split-objs GHC flag for performance reasons (#292)
diff --git a/html-test/ref/Bug201.html b/html-test/ref/Bug298.html
similarity index 56%
copy from html-test/ref/Bug201.html
copy to html-test/ref/Bug298.html
index 8b596cf..03ed5ee 100644
--- a/html-test/ref/Bug201.html
+++ b/html-test/ref/Bug298.html
@@ -3,13 +3,13 @@
 ><head
   ><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"
      /><title
-    >Bug201</title
+    >Bug298</title
     ><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean"
      /><script src="haddock-util.js" type="text/javascript"
     ></script
     ><script type="text/javascript"
     >//<![CDATA[
-window.onload = function () {pageLoad();setSynopsis("mini_Bug201.html");};
+window.onload = function () {pageLoad();setSynopsis("mini_Bug298.html");};
 //]]>
 </script
     ></head
@@ -39,7 +39,7 @@ window.onload = function () {pageLoad();setSynopsis("mini_Bug201.html");};
 	    ></tr
 	  ></table
 	><p class="caption"
-	>Bug201</p
+	>Bug298</p
 	></div
       ><div id="synopsis"
       ><p id="control.syn" class="caption expander" onclick="toggleSection('syn')"
@@ -47,11 +47,23 @@ window.onload = function () {pageLoad();setSynopsis("mini_Bug201.html");};
 	><ul id="section.syn" class="hide" onclick="toggleSection('syn')"
 	><li class="src short"
 	  ><a href=""
-	    >f</a
-	    > :: ()</li
+	    >(<^>)</a
+	    > ::  (a -> a) -> a -> a</li
+	  ><li class="src short"
+	  ><a href=""
+	    >(<^)</a
+	    > ::  a -> a -> a</li
 	  ><li class="src short"
 	  ><a href=""
-	    >g</a
+	    >(^>)</a
+	    > ::  a -> a -> a</li
+	  ><li class="src short"
+	  ><a href=""
+	    >(⋆^)</a
+	    > ::  a -> a -> a</li
+	  ><li class="src short"
+	  ><a href=""
+	    >f</a
 	    > :: ()</li
 	  ></ul
 	></div
@@ -60,33 +72,52 @@ window.onload = function () {pageLoad();setSynopsis("mini_Bug201.html");};
 	>Documentation</h1
 	><div class="top"
 	><p class="src"
-	  ><a name="v:f" class="def"
-	    >f</a
-	    > :: ()</p
-	  ><div class="doc"
-	  ><pre
-	    >This leading whitespace
-should be dropped
-</pre
-	    ></div
+	  ><a name="v:-60--94--62-" class="def"
+	    >(<^>)</a
+	    > ::  (a -> a) -> a -> a</p
+	  ></div
+	><div class="top"
+	><p class="src"
+	  ><a name="v:-60--94-" class="def"
+	    >(<^)</a
+	    > ::  a -> a -> a</p
+	  ></div
+	><div class="top"
+	><p class="src"
+	  ><a name="v:-94--62-" class="def"
+	    >(^>)</a
+	    > ::  a -> a -> a</p
+	  ></div
+	><div class="top"
+	><p class="src"
+	  ><a name="v:-8902--94-" class="def"
+	    >(⋆^)</a
+	    > ::  a -> a -> a</p
 	  ></div
 	><div class="top"
 	><p class="src"
-	  ><a name="v:g" class="def"
-	    >g</a
+	  ><a name="v:f" class="def"
+	    >f</a
 	    > :: ()</p
 	  ><div class="doc"
-	  ><pre
-	    > But this one
- should not
-</pre
-	    ><pre
-	    >this should
-be dropped</pre
-	    ><pre
-	    >and so should this
-because there's a space before closing @
-</pre
+	  ><p
+	    >Links to <code
+	      ><a href=""
+		><^></a
+		></code
+	      > and <code
+	      ><a href=""
+		>^></a
+		></code
+	      >, <code
+	      ><a href=""
+		><^</a
+		></code
+	      > and <code
+	      ><a href=""
+		>⋆^</a
+		></code
+	      >.</p
 	    ></div
 	  ></div
 	></div
@@ -95,7 +126,7 @@ because there's a space before closing @
     ><p
       >Produced by <a href=""
 	>Haddock</a
-	> version 2.14.2</p
+	> version 2.15.0</p
       ></div
     ></body
   ></html
diff --git a/html-test/src/Bug298.hs b/html-test/src/Bug298.hs
new file mode 100644
index 0000000..07d6fa0
--- /dev/null
+++ b/html-test/src/Bug298.hs
@@ -0,0 +1,22 @@
+-- We introduced a regression in 2.14.x where we don't consider
+-- identifiers with ^ as valid. We test that the regression goes away
+-- here. It's a silly typo in the parser, really. Same with ★ which is a valid
+-- symbol according to the 2010 report.
+module Bug298 where
+
+
+(<^>) :: (a -> a) -> a -> a
+x <^> y = x y
+
+(<^) :: a -> a -> a
+x <^ y = x
+
+(^>) :: a -> a -> a
+x ^> y = y
+
+(⋆^) :: a -> a -> a
+x ⋆^ y = y
+
+-- | Links to '<^>' and '^>', '<^' and '⋆^'.
+f :: ()
+f = ()
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index bd5cd20..ece9291 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -419,13 +419,21 @@ autoUrl = mkLink <$> url
 -- characters and does no actual validation itself.
 parseValid :: Parser String
 parseValid = do
-  vs <- many' $ satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:") <|> digit <|> letter_ascii
+  vs' <- many' $ utf8String "⋆" <|> return <$> idChar
+  let vs = concat vs'
   c <- peekChar
   case c of
     Just '`' -> return vs
     Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
                  <|> return vs
     _ -> fail "outofvalid"
+  where
+    idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
+             <|> digit <|> letter_ascii
+
+-- | Parses UTF8 strings from ByteString streams.
+utf8String :: String -> Parser String
+utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
 
 -- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
 -- string it deems valid.



More information about the ghc-commits mailing list