[commit: haddock] master: Create basic token classification method. (6fb8d5a)
git at git.haskell.org
git at git.haskell.org
Wed Jul 8 08:39:38 UTC 2015
Repository : ssh://git@git.haskell.org/haddock
On branch : master
Link : http://git.haskell.org/haddock.git/commitdiff/6fb8d5abbcc92f5155fdc9596ca1c87fe87f6187
>---------------------------------------------------------------
commit 6fb8d5abbcc92f5155fdc9596ca1c87fe87f6187
Author: Łukasz Hanuszczak <lukasz.hanuszczak at gmail.com>
Date: Thu Jun 4 23:21:17 2015 +0200
Create basic token classification method.
>---------------------------------------------------------------
6fb8d5abbcc92f5155fdc9596ca1c87fe87f6187
.../src/Haddock/Backends/Hyperlinker/Parser.hs | 104 +++++++++++++++++++--
1 file changed, 98 insertions(+), 6 deletions(-)
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index 4e0d738..be6b7ce 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -20,11 +20,18 @@ data Span = Span
}
data TokenType
- = Identifier
- | Comment
- | Whitespace
- | Operator
- | Symbol
+ = TkIdentifier
+ | TkKeyword
+ | TkString
+ | TkChar
+ | TkNumber
+ | TkOperator
+ | TkGlyph
+ | TkSpecial
+ | TkSpace
+ | TkComment
+ | TkCpp
+ | TkUnknown
parse :: String -> [Token]
parse = tokenize . tag . chunk
@@ -66,4 +73,89 @@ tag =
in (pos', (Span pos pos', c):cs)
tokenize :: [(Span, String)] -> [Token]
-tokenize = undefined
+tokenize =
+ map aux
+ where
+ aux (sp, str) = Token
+ { tkType = classify str
+ , tkValue = str
+ , tkSpan = sp
+ }
+
+classify :: String -> TokenType
+classify (c:_)
+ | isSpace c = TkSpace
+ | isDigit c = TkNumber
+ | c `elem` special = TkSpecial
+ | c == '#' = TkCpp
+ | c == '"' = TkString
+ | c == '\'' = TkChar
+classify str
+ | str `elem` keywords = TkKeyword
+ | str `elem` glyphs = TkGlyph
+ | all (`elem` symbols) str = TkOperator
+ | "--" `isPrefixOf` str = TkComment
+ | "{-" `isPrefixOf` str = TkComment
+ | isIdentifier str = TkIdentifier
+ | otherwise = TkUnknown
+
+keywords :: [String]
+keywords =
+ [ "as"
+ , "case"
+ , "class"
+ , "data"
+ , "default"
+ , "deriving"
+ , "do"
+ , "else"
+ , "hiding"
+ , "if"
+ , "import"
+ , "in"
+ , "infix"
+ , "infixl"
+ , "infixr"
+ , "instance"
+ , "let"
+ , "module"
+ , "newtype"
+ , "of"
+ , "qualified"
+ , "then"
+ , "type"
+ , "where"
+ , "forall"
+ , "mdo"
+ ]
+
+glyphs :: [String]
+glyphs =
+ [ ".."
+ , ":"
+ , "::"
+ , "="
+ , "\\"
+ , "|"
+ , "<-"
+ , "->"
+ , "@"
+ , "~"
+ , "~#"
+ , "=>"
+ , "-"
+ , "!"
+ ]
+
+special :: [Char]
+special = "()[]{},;`"
+
+-- TODO: Add support for any Unicode symbol or punctuation.
+-- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators
+symbols :: [Char]
+symbols = "!#$%&*+./<=>?@\\^|-~:"
+
+isIdentifier :: String -> Bool
+isIdentifier (c:str)
+ | isLetter c = all (\c' -> isAlphaNum c' || c == '\'') str
+isIdentifier _ = False
More information about the ghc-commits
mailing list