[Haskell-cafe] Need help to solve this question

Fri Mar 11 22:26:59 CET 2011

Both sections relate to the case study: Index for a document of text.

SECTION A:

Given the attached Haskell code which produces an index of words, make the
following alterations by modifying existing functions and including new
functions where necessary :

3) Treat a capitalised word (one or more capitals) as being different from
the word in all lower case (but they should still be sorted alphabetically)
– unless it is at the start of a sentence with only the initial letter
capitalised. A sentence is terminated by a ‘.’, ‘?’ or ‘!’.

import Prelude

type Doc = String type Line = String type Word = String

makeIndex :: Doc -> [ ([Int], Word) ]

makeIndex

= shorten . -- [([Int], Word)] -> [([Int], Word)]

amalgamate .-- [([Int], Word)] -> [([Int], Word)] makeLists . -- [(Int,
Word)] -> [([Int], Word)] sortLs . -- [(Int, Word)] -> [(Int, Word)]

allNumWords .-- [(Int, Line)] -> [(Int, Word)] numLines . -- [Line] ->
[(Int, Line)] splitUp -- Doc -> [Line]

splitUp :: Doc -> [Line]

splitUp [] = [] splitUp text

= takeWhile (/='\n') text : -- first line

(splitUp . -- splitup other lines

dropWhile (==’\n’) . -- delete 1st newline(s) dropWhile (/='\n')) text --
other lines

numLines :: [Line] -> [(Int, Line)]

numLines lines -- list of pairs of

= zip [1 .. length lines] lines -- line no. & line

-- for each line -- a) split into words -- b) attach line no. to each word

splitWords :: Line -> [Word] -- a)

splitWords [] = [] splitWords line

= takeWhile isLetter line : -- first word in line

(splitWords . -- split other words

dropWhile (not.isLetter) . -- delete separators dropWhile isLetter) line --
other words

where isLetter ch

= (‘a’<=ch) && (ch<=’z’)

(‘A’<=ch) && (ch<=’Z’)

numWords :: (Int, Line) -> [(Int, Word)] -- b)

numWords (number, line)

= map addLineNum ( splitWords line) -- all line pairs

where addLineNum word = (number, word) -- a pair

allNumWords :: [(Int, Line)] -> [(Int, Word)]

allNumWords = concat . map numWords -- doc pairs

sortLs :: [(Int, Word)] -> [(Int, Word)]

sortLs [ ] = [ ] sortLs (a:x)

= sortLs [b | b <- x, compare b a] -- sort 1st half

++ [a] ++ -- 1st in middle sortLs [b | b <- x, compare a b] -- sort 2nd half

where compare (n1, w1) (n2, w2)

= (w1 < w2) -- 1st word less

(w1 == w2 && n1 < n2) -- check no.

makeLists :: [(Int, Word)] -> [([Int], Word)]

makeLists

= map mk -- all pairs

where mk (num, word) = ([num], word)

-- list of single no.

amalgamate :: [([Int], Word)] -> [([Int], Word)]

amalgamate [ ] = [ ] amalgamate [a] = [a] amalgamate ((n1, w1) : (n2, w2) :
rest)-- pairs of pairs

| w1 /= w2 = (n1, w1) : amalgamate ((n2, w2) : rest) | otherwise =
amalgamate ((n1 ++ n2, w1) : rest)

-- if words are same grow list of numbers

shorten :: [([Int], Word)] -> [([Int], Word)]

shorten

= filter long -- keep pairs >4

where

long (num, word) = length word > 4 -- check word >4
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.haskell.org/pipermail/haskell-cafe/attachments/20110311/105a5e22/attachment.htm>