{-# LANGUAGE FlexibleInstances #-} module Parser where import Control.Applicative (liftA2) import Control.Monad (void) import Data.Char import Data.List (intercalate) import Data.List.NonEmpty (NonEmpty(..)) import Data.List.Split (splitOn) import Data.Void import Text.Megaparsec import Text.Megaparsec.Char import Compiler (PrlgStr(..)) singleToks = ",;|()[]" identParts = "_" notOpToks = "\'%" ++ identParts isOperatorlike x = (isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks) isIdentStart x = (x `elem` identParts) || isAlpha x isIdentOther x = isIdentStart x || isAlphaNum x || isMark x type Lexer = Parsec Void String data Lexeme = Blank String | Tok String | QTok String String -- unquoted quoted deriving (Show, Eq, Ord) blank :: Lexer Lexeme blank = Blank <$> some (satisfy isSpace) tok :: Lexer Lexeme tok = Tok <$> choice [ pure <$> oneOf singleToks , some $ satisfy isOperatorlike , (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther) , some (satisfy isNumber) ] qtok :: Lexer Lexeme qtok = do x <- string "'" y <- many $ satisfy (/= '\'') z <- string "'" return $ QTok y (x ++ y ++ z) cmt :: Lexer Lexeme cmt = Blank . concat <$> sequence [string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]] lexeme :: Lexer Lexeme lexeme = choice [blank, tok, qtok, cmt] lexPrlg :: Lexer [Lexeme] lexPrlg = many lexeme <* (many blank >> eof) showTok (Blank x) = x showTok (Tok x) = x showTok (QTok _ x) = x instance VisualStream [Lexeme] where showTokens _ (a :| b) = concatMap showTok (a : b) tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b) instance TraversableStream [Lexeme] where reachOffset o pst = go where handleEmpty "" = "" handleEmpty x = x go | o <= pstateOffset pst = ( Just . handleEmpty $ pstateLinePrefix pst ++ takeWhile (/= '\n') (concatMap showTok $ pstateInput pst) , pst) | o > pstateOffset pst = let (tok:rest) = pstateInput pst stok = showTok tok lines = splitOn "\n" stok nls = length lines - 1 sp = pstateSourcePos pst in reachOffset o pst { pstateInput = rest , pstateOffset = pstateOffset pst + 1 , pstateLinePrefix = if nls > 0 then last lines else pstateLinePrefix pst ++ last lines , pstateSourcePos = sp { sourceLine = mkPos $ unPos (sourceLine sp) + nls , sourceColumn = mkPos $ (if nls > 0 then 1 else unPos (sourceColumn sp)) + length (last lines) } } data AST = Call String [[AST]] | Seq [AST] | List [AST] (Maybe [AST]) | Literal String deriving (Show, Eq) type Parser = Parsec Void [Lexeme] isBlank (Blank _) = True isBlank _ = False ws = many $ satisfy isBlank free = (<* ws) -- we eat blanks _after_ the token. isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"]) isNormalTok (Tok x) = isNormalTokStr x isNormalTok (QTok x _) = isNormalTokStr x isNormalTok _ = False unTok (Tok t) = t unTok (QTok t _) = t literal :: Parser AST literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen) call = do fn <- unTok <$> satisfy isNormalTok -- not free Seq inner <- free parens return $ Call fn $ splitOn [Literal ","] inner parens = Seq <$> (free lParen *> some seqItem <* free rParen) list = do free lBracket choice [ List [] Nothing <$ free rBracket , do items <- some seqItem choice [ List items Nothing <$ free rBracket , List items . Just <$> (free listTail *> some seqItem <* free rBracket) ] ] seqItem = choice [try call, literal, parens, list] simpleTok :: String -> Parser () simpleTok s = void $ single (Tok s) comma = simpleTok "." lParen = simpleTok "(" rParen = simpleTok ")" lBracket = simpleTok "[" listTail = simpleTok "|" rBracket = simpleTok "]" clause :: Parser AST clause = Seq <$> some (free seqItem) <* free comma parsePrlg :: Parser [AST] parsePrlg = ws *> many clause <* eof data Op = Op Int Fixity deriving (Show, Eq) data ArgKind = X | Y deriving (Show, Eq) data Fixity = Infix ArgKind ArgKind | Prefix ArgKind | Suffix ArgKind deriving (Show, Eq) isPrefix (Prefix _) = True isPrefix _ = False numArgs :: Op -> Int numArgs (Op _ f) = go f where go (Infix _ _) = 2 go _ = 1 type Ops = [(String, Op)] type PrlgError = String type PrlgResult = Either PrlgError PrlgStr err :: PrlgError -> Either PrlgError a err = Left ast2prlg :: Ops -> AST -> PrlgResult ast2prlg ot = ast2prlg' (("", Op 0 $ Infix X Y) : ot) ast2prlg' :: Ops -> AST -> PrlgResult ast2prlg' ot (List _ _) = err "no lists yet" ast2prlg' ot (Seq ss) = shunt ot ss ast2prlg' ot (Literal s) = pure (LiteralS s) ast2prlg' ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss shunt :: Ops -> [AST] -> PrlgResult shunt optable = start where start :: [AST] -> PrlgResult start [x] = rec x --singleton, possibly either a single operator name or a single value start [] = err "empty parentheses?" start xs = wo [] [] xs resolve = foldr1 (<|>) {- "want operand" state, incoming literal -} wo :: Ops -> [PrlgStr] -> [AST] -> PrlgResult wo ops vs (l@(Literal x):xs) = resolve [ do getPrefix x (ops', vs') <- pushPrefix ops vs x wo ops' vs' xs , do getOperand x l' <- rec l ho ops (l' : vs) xs , err "expected operand" ] {- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -} wo ops vs (x:xs) = do x' <- rec x ho ops (x' : vs) xs {- end of stream, but the operand is missing -} wo ops vs [] = err "expected final operand" {- "have operand" state, expecting an operator -} ho :: Ops -> [PrlgStr] -> [AST] -> PrlgResult ho ops vs xs'@(Literal x:xs) = resolve [ do getSuffix x (ops', vs') <- pushSuffix ops vs x ho ops' vs' xs , do getInfix x (ops', vs') <- pushInfix ops vs x wo ops' vs' xs , do getOperand x ho ops vs (Literal "" : xs') -- app (see below) , do getPrefix x ho ops vs (Literal "" : xs') -- also app! , err "expected infix or suffix operator" ] {- incoming non-literal operand; there must be an app in between -} ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs) {- the last operand was last, pop until finished -} ho [] [res] [] = pure res ho ops vs [] = do (ops', vs') <- pop ops vs ho ops' vs' [] {- recurse to delimited subexpression -} rec :: AST -> PrlgResult rec = ast2prlg' optable {- pop a level, possibly uncovering a higher prio -} pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs)) pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs)) pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs)) pop _ _ = err "internal err: pop borked" {- Operator checks -} uniq [x] = pure x uniq _ = err "ambiguous operator" getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x] getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x] getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x] getOperand x | null [op | (s, op) <- optable, s == x] = pure () | otherwise = err "expected an operand" {- actual pushery -} canPush :: Ops -> Op -> Either PrlgError Bool canPush [] op = pure True canPush ((_, Op p f):ops) (Op np nf) = go p f np nf {- helper -} where prioLtOp X = (<) prioLtOp Y = (<=) {- pushing a prefix -} go prio (Infix _ l) nprio (Prefix _) = if prioLtOp l nprio prio then pure True else err "prefix on infix" go prio (Prefix l) nprio (Prefix r) = if prioLtOp l nprio prio then pure True else err "prefix on prefix" go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash {- pushing a suffix -} go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r {- pushing an infix -} go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r go prio (Suffix _) nprio (Infix r _) = if prioLtOp r nprio prio then err "infix on suffix" else pure False go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r {- helper for cases that look like: a `xfy` b `yfx` c -} clash p l np r | p < np = pure False | p > np = pure True | p == np , r == Y = pure False | p == np , l == Y , r == X = pure True | otherwise = err "priority clash" {- actual shunting -} pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x pushInfix ops vs x = getInfix x >>= shunt1 ops vs x shunt1 ops vs x op = do cp <- canPush ops op if cp then pure ((x, op) : ops, vs) else do (ops', vs') <- pop ops vs shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush