-- final1_prob3.hs

-- A regular expression matcher
module RegularExp where

import Char

type Instream = [Char]

-- We use a nondeterministic version of the Reader monad, which we
-- name NReader. This monad has elements of the List monad, which
-- can be interpreted as representing nondeterministic computations.

-- We define NReader as a datatype rather than as an abbreviation
-- so we can define a Monad instance for it
data NReader a = NR (Instream -> [(a, Instream)])

-- Reader as a monad
instance Monad NReader where
  return v = NR (\ instream -> [(v,instream)])
  NR p >>= f = NR (\ instream ->
                     foldl (\prev -> \(v,instream') -> 
                               let NR q = f v in (prev ++ q instream'))
                           [] (p instream))
  fail s = NR (\ instream -> [])

inputc :: NReader Char
inputc = NR (\ instream -> 
             case instream of
               [] -> []
               (c:cs) -> [(c, cs)])

performNRdr :: NReader a -> (Instream -> [(a, Instream)])
performNRdr (NR p) = p

nchoice :: NReader a -> NReader a -> NReader a
nchoice (NR p) (NR q) =
  NR (\instream -> p instream ++ q instream)

-- infix operator for nchoice
(+++) = nchoice

-- A "reader transform" that corresponds to the Kleene star.
nstar :: NReader a -> NReader [a]
nstar r = nstarPlus r +++ return []

nstarPlus :: NReader a -> NReader [a]
nstarPlus r = r >>= (\v -> nstar r >>= (\ vs -> return (v:vs)))

-- match a designated character
checkChar :: Char -> NReader [Char]
checkChar c = inputc >>= (\c' -> if c' == c then return [c] else fail "")

-- regular expressions over the "alphabet" (atomic symbols) of characters

data RE = Empty       -- the empty r.e., matches immediately
                      --   without consuming any characters
        | C Char      -- singleton character: C c matches the
                      --   character c
        | Seq RE RE   -- sequencial composition: Seq A B matches A
                       --   followed by B  (normally written AB)
        | Alt RE RE   -- alternative composition: Alt A B matches 
                      --   either A or B (normally written A|B)
        | Star RE     -- Kleene star: Star R matches zero or more
                      --   repetitions of R (normally written R*)

-- reToNReader produces an NReader that nondeterministically finds
-- initial segments of an input stream ([Char]) that match the regular
-- expression argument. It produces a list of all matches, in the form
-- of a list of pairs of strings [(match,remainder),...] where the first
-- string is the segment of the input that matched, and the second string
-- is the unconsumed remainder of the input stream.
reToNReader :: RE -> NReader [Char]

reToNReader Empty = return ""
reToNReader (C c) = checkChar c
reToNReader (Seq re1 re2) = 
   do s1 <- reToNReader re1
      s2 <- reToNReader re2
      return (s1 ++ s2)
reToNReader (Alt re1 re2) = reToNReader re1 +++ reToNReader re2
reToNReader (Star re) =
   do strs <- nstar (reToNReader re)
      return (concat strs)

-- To get a reader that produces the longest possible matches of the
-- regular expression, we define longNReader.

maxlength :: [[Char]] -> Int -> Int
maxlength [] n = n 
maxlength (x:xs) n =
  let l = length x
   in if l > n then maxlength xs l else maxlength xs n

filterLongest :: [([Char], Instream)] -> [([Char], Instream)]
filterLongest states =
   let m = maxlength (map fst states) 0
    in filter (\(s @ (str,ins)) -> length str == m) states

longNReader :: RE -> NReader [Char]
longNReader re = NR(\instream ->
                    filterLongest(performNRdr (reToNReader re) instream))

-- test cases
test1 :: RE
test1 = Empty

test2 :: RE
test2 = C 'a'

test3 :: RE
test3 = Seq (C 'a') (C 'b')

test4 :: RE
test4 = Alt (C 'a') (C 'b')

test5 :: RE
test5 = Star (C 'a')

test6 :: RE
test6 = Seq test5 test3

-- testing functions
runTest :: RE -> Instream -> [([Char], Instream)]
runTest re instream = performNRdr (reToNReader re) instream

runTestL :: RE -> Instream -> [([Char], Instream)]
runTestL re instream = performNRdr (longNReader re) instream


