{-# LANGUAGE FlexibleInstances #-} module Parser where import Control.Applicative (liftA2) import Control.Monad (void) import Data.Char import Data.List (intercalate) import Data.List.NonEmpty (NonEmpty(..)) import Data.List.Split (splitOn) import Data.Void import Text.Megaparsec import Text.Megaparsec.Char import Compiler (PrlgStr(..)) import Debug.Trace singleToks = ",;|()[]" identParts = "_" notOpToks = "\'%" ++ identParts isOperatorlike x = (isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks) isIdentStart x = (x `elem` identParts) || isAlpha x isIdentOther x = isIdentStart x || isAlphaNum x || isMark x type Lexer = Parsec Void String data Lexeme = Blank String | Tok String | QTok String String -- unquoted quoted deriving (Show, Eq, Ord) blank :: Lexer Lexeme blank = Blank <$> some (satisfy isSpace) tok :: Lexer Lexeme tok = Tok <$> choice [ pure <$> oneOf singleToks , some $ satisfy isOperatorlike , (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther) , some (satisfy isNumber) ] qtok :: Lexer Lexeme qtok = do x <- string "'" y <- many $ satisfy (/= '\'') z <- string "'" return $ QTok y (x ++ y ++ z) cmt :: Lexer Lexeme cmt = Blank . concat <$> sequence [string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]] lexeme :: Lexer Lexeme lexeme = choice [blank, tok, qtok, cmt] lexPrlg :: Lexer [Lexeme] lexPrlg = many lexeme <* (many blank >> eof) showTok (Blank x) = x showTok (Tok x) = x showTok (QTok _ x) = x instance VisualStream [Lexeme] where showTokens _ (a :| b) = concatMap showTok (a : b) tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b) instance TraversableStream [Lexeme] where reachOffset o pst = go where handleEmpty "" = "" handleEmpty x = x go | o <= pstateOffset pst = ( Just . handleEmpty $ pstateLinePrefix pst ++ takeWhile (/= '\n') (concatMap showTok $ pstateInput pst) , pst) | o > pstateOffset pst = let (tok:rest) = pstateInput pst stok = showTok tok lines = splitOn "\n" stok nls = length lines - 1 sp = pstateSourcePos pst in reachOffset o pst { pstateInput = rest , pstateOffset = pstateOffset pst + 1 , pstateLinePrefix = if nls > 0 then last lines else pstateLinePrefix pst ++ last lines , pstateSourcePos = sp { sourceLine = mkPos $ unPos (sourceLine sp) + nls , sourceColumn = mkPos $ (if nls > 0 then 1 else unPos (sourceColumn sp)) + length (last lines) } } data AST = Call String [[AST]] | Seq [AST] | List [AST] (Maybe [AST]) | Literal String deriving (Show, Eq) type Parser = Parsec Void [Lexeme] isBlank (Blank _) = True isBlank _ = False ws = many $ satisfy isBlank free = (<* ws) -- we eat blanks _after_ the token. isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"]) isNormalTok (Tok x) = isNormalTokStr x isNormalTok (QTok x _) = isNormalTokStr x isNormalTok _ = False unTok (Tok t) = t unTok (QTok t _) = t literal :: Parser AST literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen) call = do fn <- unTok <$> satisfy isNormalTok -- not free Seq inner <- free parens return $ Call fn $ splitOn [Literal ","] inner parens = Seq <$> (free lParen *> some seqItem <* free rParen) list = do free lBracket choice [ List [] Nothing <$ free rBracket , do items <- some seqItem choice [ List items Nothing <$ free rBracket , List items . Just <$> (free listTail *> some seqItem <* free rBracket) ] ] seqItem = choice [try call, literal, parens, list] simpleTok :: String -> Parser () simpleTok s = void $ single (Tok s) comma = simpleTok "." lParen = simpleTok "(" rParen = simpleTok ")" lBracket = simpleTok "[" listTail = simpleTok "|" rBracket = simpleTok "]" clause :: Parser AST clause = Seq <$> some (free seqItem) <* free comma parsePrlg :: Parser [AST] parsePrlg = ws *> many clause <* eof data Op = Op Int Fixity deriving (Show, Eq) data ArgKind = X | Y deriving (Show, Eq) data Fixity = Infix ArgKind ArgKind | Prefix ArgKind | Suffix ArgKind deriving (Show, Eq) isPrefix (Prefix _) = True isPrefix _ = False numArgs :: Op -> Int numArgs (Op _ f) = go f where go (Infix _ _) = 2 go _ = 1 type Ops = [(String, Op)] defaultOps :: Ops defaultOps = [ ("", Op 0 $ Infix X Y) , ("+", Op 100 $ Prefix X) , ("!", Op 100 $ Suffix Y) , ("-", Op 100 $ Prefix Y) , ("*", Op 100 $ Infix Y X) , ("+", Op 200 $ Infix Y X) , ("++", Op 200 $ Infix X Y) , ("-", Op 200 $ Infix Y X) , ("<", Op 300 $ Infix X X) , (">", Op 300 $ Infix X X) , ("=", Op 400 $ Infix X X) , (",", Op 800 $ Infix X Y) , (";", Op 900 $ Infix X Y) , (":-", Op 1000 $ Infix X X) ] type PrlgError = String type PrlgResult = Either PrlgError PrlgStr err :: PrlgError -> Either PrlgError a err = Left ast2prlg :: Ops -> AST -> PrlgResult ast2prlg ot (List _ _) = err "no lists yet" ast2prlg ot (Seq ss) = shunt ot ss ast2prlg ot (Literal s) = pure (LiteralS s) ast2prlg ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss shunt :: Ops -> [AST] -> PrlgResult shunt optable = start where start :: [AST] -> PrlgResult start [x] = rec x --singleton, possibly either a single operator name or a single value start [] = err "empty parentheses?" start xs = wo [] [] xs {- "want operand" state, incoming literal -} wo :: Ops -> [PrlgStr] -> [AST] -> PrlgResult wo ops vs (l@(Literal x):xs) | Right _ <- getPrefix x , Right (ops', vs') <- pushPrefix ops vs x = wo ops' vs' xs | isOperand x = do l' <- rec l ho ops (l' : vs) xs | otherwise = err "expected operand" {- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -} wo ops vs (x:xs) = do x' <- rec x ho ops (x' : vs) xs {- end of stream, but the operand is missing -} wo ops vs [] = err "expected final operand" {- "have operand" state, incoming operator -} ho :: Ops -> [PrlgStr] -> [AST] -> PrlgResult ho ops vs xs'@(Literal x:xs) | Right _ <- getSuffix x , Right (ops', vs') <- pushSuffix ops vs x = ho ops' vs' xs | Right _ <- getInfix x , Right (ops', vs') <- pushInfix ops vs x = wo ops' vs' xs | isOperand x = ho ops vs (Literal "" : xs') -- app (see below) | Right _ <- getPrefix x = ho ops vs (Literal "" : xs') -- also app! | otherwise = err "expected infix or suffix operator" {- incoming operand; there's an app between -} ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs) {- the last operand was last, pop until finished -} ho [] [res] [] = pure res ho ops vs [] = do (ops', vs') <- pop ops vs ho ops' vs' [] {- recurse to delimited subexpression -} rec :: AST -> PrlgResult rec = ast2prlg optable {- pop a level, possibly uncovering a higher prio -} pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs)) pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs)) pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs)) pop _ _ = err "internal err: pop borked" {- Operator checks -} uniq [x] = pure x uniq _ = err "ambiguous operator" getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x] getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x] getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x] isOperand x = null [op | (s, op) <- optable, s == x] {- actual pushery -} canPush :: Ops -> Op -> Either PrlgError Bool canPush [] op = pure True canPush ((_, Op p f):ops) (Op np nf) = go p f np nf {- helper -} where prioLtOp X = (<) prioLtOp Y = (<=) {- pushing a prefix -} go prio (Infix _ l) nprio (Prefix _) = if prioLtOp l nprio prio then pure True else err "prefix on infix" go prio (Prefix l) nprio (Prefix r) = if prioLtOp l nprio prio then pure True else err "prefix on prefix" go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash {- pushing a suffix -} go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r {- pushing an infix -} go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r go prio (Suffix _) nprio (Infix r _) = if prioLtOp r nprio prio then err "infix on suffix" else pure False go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r {- helper for cases that look like: a `xfy` b `yfx` c -} clash p l np r | p < np = pure False | p > np = pure True | p == np , r == Y = pure False | p == np , l == Y , r == X = pure True | otherwise = err "priority clash" {- actual shunting -} pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x pushInfix ops vs x = getInfix x >>= shunt1 ops vs x shunt1 ops vs x op = do cp <- canPush ops op if cp then pure ((x, op) : ops, vs) else do (ops', vs') <- pop ops vs shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush