{-# LANGUAGE FlexibleInstances #-}

module Parser
  ( lexPrlg
  , parsePrlg
  , shuntPrlg
  ) where

import Control.Monad (void)
import Data.Char
  ( isAlpha
  , isAlphaNum
  , isMark
  , isNumber
  , isPunctuation
  , isSpace
  , isSymbol
  )
import Data.List.NonEmpty (NonEmpty(..))
import Data.List.Split (splitOn)
import Data.Void (Void)
import Text.Megaparsec
  ( Parsec
  , PosState(..)
  , SourcePos(..)
  , TraversableStream(..)
  , VisualStream(..)
  , (<|>)
  , choice
  , eof
  , many
  , mkPos
  , notFollowedBy
  , oneOf
  , satisfy
  , single
  , some
  , try
  , unPos
  )
import Text.Megaparsec.Char (string)

import IR (PrlgStr(..))
import Operators (ArgKind(..), Fixity(..), Op(..), Ops)

singleToks = ",;|()[]"

identParts = "_"

notOpToks = "\'%" ++ identParts

isOperatorlike x =
  (isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)

isIdentStart x = (x `elem` identParts) || isAlpha x

isIdentOther x = isIdentStart x || isAlphaNum x || isMark x

type Lexer = Parsec Void String

data Lexeme
  = Blank String
  | Tok String
  | QTok String String -- unquoted quoted
  deriving (Show, Eq, Ord)

blank :: Lexer Lexeme
blank = Blank <$> some (satisfy isSpace)

tok :: Lexer Lexeme
tok =
  Tok <$>
  choice
    [ pure <$> oneOf singleToks
    , some $ satisfy isOperatorlike
    , (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
    , some (satisfy isNumber)
    ]

qtok :: Lexer Lexeme
qtok = do
  x <- string "'"
  y <- many $ satisfy (/= '\'')
  z <- string "'"
  return $ QTok y (x ++ y ++ z)

cmt :: Lexer Lexeme
cmt =
  Blank . concat <$>
  sequence
    [string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]

lexeme :: Lexer Lexeme
lexeme = choice [blank, tok, qtok, cmt]

lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof)

showTok (Blank x) = x
showTok (Tok x) = x
showTok (QTok _ x) = x

instance VisualStream [Lexeme] where
  showTokens _ (a :| b) = concatMap showTok (a : b)
  tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b)

instance TraversableStream [Lexeme] where
  reachOffset o pst = go
    where
      handleEmpty "" = "<empty line>"
      handleEmpty x = x
      go
        | o <= pstateOffset pst =
          ( Just . handleEmpty $
            pstateLinePrefix pst ++
            takeWhile (/= '\n') (concatMap showTok $ pstateInput pst)
          , pst)
        | o > pstateOffset pst =
          let (tok:rest) = pstateInput pst
              stok = showTok tok
              lines = splitOn "\n" stok
              nls = length lines - 1
              sp = pstateSourcePos pst
           in reachOffset
                o
                pst
                  { pstateInput = rest
                  , pstateOffset = pstateOffset pst + 1
                  , pstateLinePrefix =
                      if nls > 0
                        then last lines
                        else pstateLinePrefix pst ++ last lines
                  , pstateSourcePos =
                      sp
                        { sourceLine = mkPos $ unPos (sourceLine sp) + nls
                        , sourceColumn =
                            mkPos $
                            (if nls > 0
                               then 1
                               else unPos (sourceColumn sp)) +
                            length (last lines)
                        }
                  }

data PAST
  = Call String [[PAST]]
  | Seq [PAST]
  | List [[PAST]] (Maybe [PAST])
  | Literal String
  deriving (Show, Eq)

type Parser = Parsec Void [Lexeme]

isBlank (Blank _) = True
isBlank _ = False

ws = many $ satisfy isBlank

free = (<* ws) -- we eat blanks _after_ the token.

isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])

isNormalTok (Tok x) = isNormalTokStr x
isNormalTok (QTok x _) = isNormalTokStr x
isNormalTok _ = False

unTok (Tok t) = t
unTok (QTok t _) = t

literal :: Parser PAST
literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)

call = do
  fn <- unTok <$> satisfy isNormalTok -- not free
  Seq inner <- free parens
  return $ Call fn $ splitOn [Literal ","] inner

parens = Seq <$> (free lParen *> some seqItem <* free rParen)

list = do
  free lBracket
  choice
    [ List [] Nothing <$ free rBracket
    , do items <- splitOn [Literal ","] <$> some seqItem
         choice
           [ List items Nothing <$ free rBracket
           , List items . Just <$>
             (free listTail *> some seqItem <* free rBracket)
           ]
    ]

seqItem = choice [try call, literal, parens, list]

simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s)

comma = simpleTok "."

lParen = simpleTok "("

rParen = simpleTok ")"

lBracket = simpleTok "["

listTail = simpleTok "|"

rBracket = simpleTok "]"

clause :: Parser PAST
clause = Seq <$> some (free seqItem) <* free comma

parsePrlg :: Parser [PAST]
parsePrlg = ws *> many clause <* eof

type ShuntError = String

type ShuntResult = Either ShuntError PrlgStr

err :: ShuntError -> Either ShuntError a
err = Left

shuntPrlg :: Ops -> PAST -> ShuntResult
shuntPrlg ot = shuntPrlg' (("", Op 0 $ Infix X Y) : ot)

shuntPrlg' :: Ops -> PAST -> ShuntResult
shuntPrlg' ot (List hs t) =
  ListS <$> traverse (shunt ot) hs <*> traverse (shunt ot) t
shuntPrlg' ot (Seq ss) = shunt ot ss
shuntPrlg' ot (Literal s) = pure (LiteralS s)
shuntPrlg' ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss

shunt :: Ops -> [PAST] -> ShuntResult
shunt optable = start
  where
    start :: [PAST] -> ShuntResult
    start [x] = rec x --singleton, possibly either a single operator name or a single value
    start [] = err "empty parentheses?"
    start xs = wo [] [] xs
    resolve = foldr1 (<|>)
    {- "want operand" state, incoming literal -}
    wo :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult
    wo ops vs (l@(Literal x):xs) =
      resolve
        [ do getPrefix x
             (ops', vs') <- pushPrefix ops vs x
             wo ops' vs' xs
        , do getOperand x
             l' <- rec l
             ho ops (l' : vs) xs
        , err "expected operand"
        ]
    {- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
    wo ops vs (x:xs) = do
      x' <- rec x
      ho ops (x' : vs) xs
    {- end of stream, but the operand is missing -}
    wo ops vs [] = err "expected final operand"
    {- "have operand" state, expecting an operator -}
    ho :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult
    ho ops vs xs'@(Literal x:xs) =
      resolve
        [ do getSuffix x
             (ops', vs') <- pushSuffix ops vs x
             ho ops' vs' xs
        , do getInfix x
             (ops', vs') <- pushInfix ops vs x
             wo ops' vs' xs
        , do getOperand x
             ho ops vs (Literal "" : xs') -- app (see below)
        , do getPrefix x
             ho ops vs (Literal "" : xs') -- also app!
        , err "expected infix or suffix operator"
        ]
    {- incoming non-literal operand; there must be an app in between -}
    ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
    {- the last operand was last, pop until finished -}
    ho [] [res] [] = pure res
    ho ops vs [] = do
      (ops', vs') <- pop ops vs
      ho ops' vs' []
    {- recurse to delimited subexpression -}
    rec :: PAST -> ShuntResult
    rec = shuntPrlg' optable
    {- pop a level, possibly uncovering a higher prio -}
    pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs))
    pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
    pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
    pop _ _ = err "internal err: pop borked"
    {- Operator checks -}
    uniq [x] = pure x
    uniq _ = err "ambiguous operator"
    getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
    getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
    getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
    getOperand x
      | null [op | (s, op) <- optable, s == x] = pure ()
      | otherwise = err "expected an operand"
    {- actual pushery -}
    canPush :: Ops -> Op -> Either ShuntError Bool
    canPush [] op = pure True
    canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
        {- helper -}
      where
        prioLtOp X = (<)
        prioLtOp Y = (<=)
        {- pushing a prefix -}
        go prio (Infix _ l) nprio (Prefix _) =
          if prioLtOp l nprio prio
            then pure True
            else err "prefix on infix"
        go prio (Prefix l) nprio (Prefix r) =
          if prioLtOp l nprio prio
            then pure True
            else err "prefix on prefix"
        go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash
        {- pushing a suffix -}
        go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
        go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio
        go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
        {- pushing an infix -}
        go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
        go prio (Suffix _) nprio (Infix r _) =
          if prioLtOp r nprio prio
            then err "infix on suffix"
            else pure False
        go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
        {- helper for cases that look like: a `xfy` b `yfx` c -}
        clash p l np r
          | p < np = pure False
          | p > np = pure True
          | p == np
          , r == Y = pure False
          | p == np
          , l == Y
          , r == X = pure True
          | otherwise = err "priority clash"
    {- actual shunting -}
    pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
    pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
    pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
    shunt1 ops vs x op = do
      cp <- canPush ops op
      if cp
        then pure ((x, op) : ops, vs)
        else do
          (ops', vs') <- pop ops vs
          shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush