[commit: haddock] master: Drop leading whitespace in @-style blocks. (d6cf6f9)

Mon Mar 31 17:30:44 UTC 2014

Repository : ssh://git@git.haskell.org/haddock

On branch  : master
Link       : http://git.haskell.org/haddock.git/commitdiff/d6cf6f9c75e08ce1760c2dbdee81775ba97a5f0c

>---------------------------------------------------------------

commit d6cf6f9c75e08ce1760c2dbdee81775ba97a5f0c
Author: Mateusz Kowalczyk <fuuzetsu at fuuzetsu.co.uk>
Date:   Mon Mar 31 18:29:04 2014 +0100

    Drop leading whitespace in @-style blocks.
    
    Fixes #201.


>---------------------------------------------------------------

d6cf6f9c75e08ce1760c2dbdee81775ba97a5f0c
 CHANGES                                           |    2 +
 html-test/ref/{IgnoreExports.html => Bug201.html} |   53 +++++++++++----------
 html-test/src/Bug201.hs                           |   28 +++++++++++
 src/Haddock/Parser.hs                             |   31 ++++++++++--
 test/Haddock/ParserSpec.hs                        |   28 +++++++++--
 5 files changed, 111 insertions(+), 31 deletions(-)

diff --git a/CHANGES b/CHANGES
index 31851e5..5f01902 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,8 @@ Changes in version 2.15.0
 
  * Print kind signatures GADTs (#85)
 
+ * Drop single leading whitespace when reasonable from @-style blocks (#201)
+
 Changes in version 2.14.1
 
  * Render * and -> with their UnicodeSyntax equivalents if -U is enabled
diff --git a/html-test/ref/IgnoreExports.html b/html-test/ref/Bug201.html
similarity index 75%
copy from html-test/ref/IgnoreExports.html
copy to html-test/ref/Bug201.html
index f64c4ec..893ccbe 100644
--- a/html-test/ref/IgnoreExports.html
+++ b/html-test/ref/Bug201.html
@@ -3,13 +3,13 @@
 ><head
   ><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"
      /><title
-    >IgnoreExports</title
+    >Bug201</title
     ><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean"
      /><script src="haddock-util.js" type="text/javascript"
     ></script
     ><script type="text/javascript"
     >//<![CDATA[
-window.onload = function () {pageLoad();setSynopsis("mini_IgnoreExports.html");};
+window.onload = function () {pageLoad();setSynopsis("mini_Bug201.html");};
 //]]>
 </script
     ></head
@@ -39,7 +39,7 @@ window.onload = function () {pageLoad();setSynopsis("mini_IgnoreExports.html");}
 	    ></tr
 	  ></table
 	><p class="caption"
-	>IgnoreExports</p
+	>Bug201</p
 	></div
       ><div id="synopsis"
       ><p id="control.syn" class="caption expander" onclick="toggleSection('syn')"
@@ -47,16 +47,12 @@ window.onload = function () {pageLoad();setSynopsis("mini_IgnoreExports.html");}
 	><ul id="section.syn" class="hide" onclick="toggleSection('syn')"
 	><li class="src short"
 	  ><a href=""
-	    >foo</a
-	    > :: <a href=""
-	    >Int</a
-	    ></li
+	    >f</a
+	    > :: ()</li
 	  ><li class="src short"
 	  ><a href=""
-	    >bar</a
-	    > :: <a href=""
-	    >Int</a
-	    ></li
+	    >g</a
+	    > :: ()</li
 	  ></ul
 	></div
       ><div id="interface"
@@ -64,26 +60,33 @@ window.onload = function () {pageLoad();setSynopsis("mini_IgnoreExports.html");}
 	>Documentation</h1
 	><div class="top"
 	><p class="src"
-	  ><a name="v:foo" class="def"
-	    >foo</a
-	    > :: <a href=""
-	    >Int</a
-	    ></p
+	  ><a name="v:f" class="def"
+	    >f</a
+	    > :: ()</p
 	  ><div class="doc"
-	  ><p
-	    >documentation for foo</p
+	  ><pre
+	    >This leading whitespace
+should be dropped
+</pre
 	    ></div
 	  ></div
 	><div class="top"
 	><p class="src"
-	  ><a name="v:bar" class="def"
-	    >bar</a
-	    > :: <a href=""
-	    >Int</a
-	    ></p
+	  ><a name="v:g" class="def"
+	    >g</a
+	    > :: ()</p
 	  ><div class="doc"
-	  ><p
-	    >documentation for bar</p
+	  ><pre
+	    > But this one
+ should not
+</pre
+	    ><pre
+	    >this should
+be dropped</pre
+	    ><pre
+	    >and so should this
+because there's a space before closing @
+</pre
 	    ></div
 	  ></div
 	></div
diff --git a/html-test/src/Bug201.hs b/html-test/src/Bug201.hs
new file mode 100644
index 0000000..bf6cb9a
--- /dev/null
+++ b/html-test/src/Bug201.hs
@@ -0,0 +1,28 @@
+-- We test that leading whitespace gets properly dropped (or not!)
+-- from codeblocks
+module Bug201 where
+
+-- |
+-- @
+-- This leading whitespace
+-- should be dropped
+-- @
+f :: ()
+f = ()
+
+{-|
+@
+ But this one
+ should not
+@
+
+> this should
+> be dropped
+
+@
+ and so should this
+ because there's a space before closing @
+ @
+-}
+g :: ()
+g = ()
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index cd7bb02..bd5cd20 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -2,6 +2,7 @@
 {-# LANGUAGE StandaloneDeriving
              , FlexibleInstances, UndecidableInstances
              , IncoherentInstances #-}
+{-# LANGUAGE LambdaCase #-}
 -- |
 -- Module      :  Haddock.Parser
 -- Copyright   :  (c) Mateusz Kowalczyk 2013,
@@ -21,7 +22,7 @@ import           Control.Applicative
 import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
 import qualified Data.ByteString.Char8 as BS
 import           Data.Char (chr, isAsciiUpper)
-import           Data.List (stripPrefix, intercalate)
+import           Data.List (stripPrefix, intercalate, unfoldr)
 import           Data.Maybe (fromMaybe)
 import           Data.Monoid
 import           DynFlags
@@ -59,7 +60,8 @@ parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
     p :: Parser (Doc RdrName)
     p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n")
 
--- | Parse a text paragraph.
+-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
+-- drops leading whitespace and encodes the string to UTF8 first.
 parseString :: DynFlags -> String -> Doc RdrName
 parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace
 
@@ -366,8 +368,31 @@ property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')
 -- for markup.
 codeblock :: DynFlags -> Parser (Doc RdrName)
 codeblock d =
-  DocCodeBlock . parseStringBS d <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
+  DocCodeBlock . parseStringBS d . dropSpaces
+  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
   where
+    dropSpaces xs =
+      let rs = decodeUtf8 xs
+      in case splitByNl rs of
+        [] -> xs
+        ys -> case last ys of
+          ' ':_ -> case mapM dropSpace ys of
+            Nothing -> xs
+            Just zs -> encodeUtf8 $ intercalate "\n" zs
+          _ -> xs
+
+    -- This is necessary because ‘lines’ swallows up a trailing newline
+    -- and we lose information about whether the last line belongs to @ or to
+    -- text which we need to decide whether we actually want to be dropping
+    -- anything at all.
+    splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
+                               _ -> Nothing)
+                . ('\n' :)
+
+    dropSpace "" = Just ""
+    dropSpace (' ':xs) = Just xs
+    dropSpace _ = Nothing
+
     block' = scan False p
       where
         p isNewline c
diff --git a/test/Haddock/ParserSpec.hs b/test/Haddock/ParserSpec.hs
index db843cc..f44b7d0 100644
--- a/test/Haddock/ParserSpec.hs
+++ b/test/Haddock/ParserSpec.hs
@@ -432,12 +432,34 @@ spec = before initStaticOpts $ do
           ] `shouldParseTo` DocCodeBlock "foo\n@\nbar\n"
 
       it "accepts horizontal space before the @" $ do
+        unlines [ "     @"
+                , "foo"
+                , ""
+                , "bar"
+                , "@"
+                ] `shouldParseTo` DocCodeBlock "foo\n\nbar\n"
+
+      it "strips a leading space from a @ block if present" $ do
+        unlines [ " @"
+                , " hello"
+                , " world"
+                , " @"
+                ] `shouldParseTo` DocCodeBlock "hello\nworld\n"
+
         unlines [ " @"
-                , " foo"
+                , " hello"
                 , ""
-                , " bar"
+                , " world"
                 , " @"
-                ] `shouldParseTo` DocCodeBlock " foo\n\n bar\n "
+                ] `shouldParseTo` DocCodeBlock "hello\n\nworld\n"
+
+      it "only drops whitespace if there's some before closing @" $ do
+        unlines [ "@"
+                , "    Formatting"
+                , "        matters."
+                , "@"
+                ]
+          `shouldParseTo` DocCodeBlock "    Formatting\n        matters.\n"
 
       it "accepts unicode" $ do
         "@foo 灼眼のシャナ bar@" `shouldParseTo` DocCodeBlock "foo 灼眼のシャナ bar"