{-# LANGUAGE FlexibleInstances #-} module Parser ( lexPrlg , parsePrlg , shuntPrlg , PAST , Lexeme(..) , PrlgStr(..) ) where import Control.Monad (void) import Data.Char ( isAlpha , isAlphaNum , isMark , isNumber , isPunctuation , isSpace , isSymbol ) import Data.List.NonEmpty (NonEmpty(..)) import Data.List.Split (splitOn) import Data.Void (Void) import Text.Megaparsec ( Parsec , PosState(..) , SourcePos(..) , TraversableStream(..) , VisualStream(..) , (<|>) , choice , eof , many , mkPos , notFollowedBy , oneOf , satisfy , single , some , try , unPos ) import Text.Megaparsec.Char (string) import Operators (ArgKind(..), Fixity(..), Op(..), Ops) singleToks = ",;|()[]{}!" identParts = "_" notOpToks = "'\"" ++ identParts isOperatorlike x = (isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks) isIdentStart x = (x `elem` identParts) || isAlpha x isIdentOther x = isIdentStart x || isAlphaNum x || isMark x type Lexer = Parsec Void String data Lexeme = Blank String | Tok String | QTok String String -- unquoted quoted | DQTok String String -- unquoted quoted deriving (Show, Eq, Ord) blank :: Lexer Lexeme blank = Blank <$> some (satisfy isSpace) tok :: Lexer Lexeme tok = Tok <$> choice [ pure <$> oneOf singleToks , some $ satisfy isOperatorlike , (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther) , some (satisfy isNumber) ] qtok :: Lexer Lexeme qtok = do x <- string "'" y <- many $ satisfy (/= '\'') z <- string "'" return $ QTok y (x ++ y ++ z) dqtok :: Lexer Lexeme dqtok = do x <- string "\"" y <- many $ satisfy (/= '\"') -- TODO actual escaping z <- string "\"" return $ DQTok y (x ++ y ++ z) lexeme :: Lexer Lexeme lexeme = choice [blank, tok, qtok, dqtok] lexPrlg :: Lexer [Lexeme] lexPrlg = many lexeme <* (many blank >> eof) showTok (Blank x) = x showTok (Tok x) = x showTok (QTok _ x) = x showTok (DQTok _ x) = x instance VisualStream [Lexeme] where showTokens _ (a :| b) = concatMap showTok (a : b) tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b) instance TraversableStream [Lexeme] where reachOffset o pst = go where handleEmpty "" = "" handleEmpty x = x go | o <= pstateOffset pst = ( Just . handleEmpty $ pstateLinePrefix pst ++ takeWhile (/= '\n') (concatMap showTok $ pstateInput pst) , pst) | o > pstateOffset pst = let (tok:rest) = pstateInput pst stok = showTok tok lines = splitOn "\n" stok nls = length lines - 1 sp = pstateSourcePos pst in reachOffset o pst { pstateInput = rest , pstateOffset = pstateOffset pst + 1 , pstateLinePrefix = if nls > 0 then last lines else pstateLinePrefix pst ++ last lines , pstateSourcePos = sp { sourceLine = mkPos $ unPos (sourceLine sp) + nls , sourceColumn = mkPos $ (if nls > 0 then 1 else unPos (sourceColumn sp)) + length (last lines) } } data PAST = Call String [[PAST]] | Group [PAST] | List [[PAST]] (Maybe [PAST]) | Literal Lexeme deriving (Show, Eq) type Parser = Parsec Void [Lexeme] isBlank (Blank _) = True isBlank _ = False ws = many $ satisfy isBlank free = (<* ws) -- we eat blanks _after_ the token. isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")", "{", "}"]) isNormalTok :: Lexeme -> Bool isNormalTok (Tok x) = isNormalTokStr x isNormalTok (QTok _ _) = True isNormalTok (DQTok _ _) = True isNormalTok _ = False isCallTok :: Lexeme -> Bool isCallTok (Tok x) = all (\c -> not (isSymbol c) && not (isPunctuation c) || c `elem` identParts) x isCallTok (QTok _ _) = True isCallTok _ = False unTok (Tok t) = t unTok (QTok t _) = t unTok (DQTok t _) = t literal :: Parser PAST literal = Literal <$> free (choice [ satisfy isCallTok <* notFollowedBy (choice [lParen, lBrace]) , satisfy (\x -> not (isCallTok x) && isNormalTok x) ]) makeParams (Group inner) = splitOn [Literal (Tok ",")] inner call eb contents fmod = do fn <- fmod . unTok <$> satisfy isCallTok -- not free (Call fn [] <$ try eb) <|> (Call fn . makeParams <$> free contents) parens = Group <$> (free lParen *> some seqItem <* free rParen) braces = Group <$> (free lBrace *> some seqItem <* free rBrace) emptyParens = Literal (QTok "()" "()") <$ (free lParen >> free rParen) emptyBraces = Literal (QTok "{}" "{}") <$ (free lBrace >> free rBrace) list = do free lBracket (List [] Nothing <$ free rBracket) <|> do items <- splitOn [Literal (Tok ",")] <$> some seqItem (List items Nothing <$ free rBracket) <|> (List items . Just <$> (free listTail *> some seqItem <* free rBracket)) seqItem = choice [ try $ call emptyParens parens id , try $ call emptyBraces braces (++ "{}") , literal , try emptyParens , parens , try emptyBraces , Call "{}" . makeParams <$> braces , list ] simpleTok :: String -> Parser () simpleTok s = void $ single (Tok s) period = simpleTok "." lParen = simpleTok "(" rParen = simpleTok ")" lBracket = simpleTok "[" listTail = simpleTok "|" rBracket = simpleTok "]" lBrace = simpleTok "{" rBrace = simpleTok "}" clause :: Parser PAST clause = Group <$> some (free seqItem) <* free period parsePrlg :: Parser [PAST] parsePrlg = ws *> many clause <* eof type ShuntError = String type ShuntResult = Either ShuntError PrlgStr data PrlgStr = CallS String [PrlgStr] | LiteralS Lexeme deriving (Show) err :: ShuntError -> Either ShuntError a err = Left shuntPrlg :: Ops -> PAST -> ShuntResult shuntPrlg ot = shuntPrlg' (("", Op 0 $ Infix Y X) : ot) shuntPrlg' :: Ops -> PAST -> ShuntResult shuntPrlg' ot (List hs t) = foldr (\x y -> CallS "[]" [x, y]) <$> (maybe (LiteralS $ Tok "[]") id <$> traverse (shunt ot) t) <*> traverse (shunt ot) hs shuntPrlg' ot (Group ss) = shunt ot ss shuntPrlg' ot (Literal s) = pure (LiteralS s) shuntPrlg' ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss shunt :: Ops -> [PAST] -> ShuntResult shunt optable = start where start :: [PAST] -> ShuntResult start [x] = rec x --singleton, possibly either a single operator name or a single value start [] = err "empty parentheses?" start xs = wo [] [] xs resolve = foldr1 (<|>) {- "want operand" state, incoming literal -} wo :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult wo ops vs (l@(Literal x):xs) = resolve [ do getPrefix x (ops', vs') <- pushPrefix ops vs x wo ops' vs' xs , do getOperand x l' <- rec l ho ops (l' : vs) xs , err "expected operand" ] {- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -} wo ops vs (x:xs) = do x' <- rec x ho ops (x' : vs) xs {- end of stream, but the operand is missing -} wo ops vs [] = err "expected final operand" {- "have operand" state, expecting an operator -} ho :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult ho ops vs xs'@(Literal x:xs) = resolve [ do getSuffix x (ops', vs') <- pushSuffix ops vs x ho ops' vs' xs , do getInfix x (ops', vs') <- pushInfix ops vs x wo ops' vs' xs , do getOperand x ho ops vs (Literal (Tok "") : xs') -- app (see below) , do getPrefix x ho ops vs (Literal (Tok "") : xs') -- also app! , err "expected infix or suffix operator" ] {- incoming non-literal operand; there must be an app in between -} ho ops vs xs@(_:_) = ho ops vs (Literal (Tok "") : xs) {- the last operand was last, pop until finished -} ho [] [res] [] = pure res ho ops vs [] = do (ops', vs') <- pop ops vs ho ops' vs' [] {- recurse to delimited subexpression -} rec :: PAST -> ShuntResult rec = shuntPrlg' optable {- pop a level, possibly uncovering a higher prio -} pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs)) pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs)) pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs)) pop _ _ = err "internal err: pop borked" {- Operator checks -} uniq [x] = pure x uniq _ = err "ambiguous operator" getPrefix t = uniq [op | Tok x <- [t], (s, op@(Op _ (Prefix _))) <- optable, s == x] getSuffix t = uniq [op | Tok x <- [t], (s, op@(Op _ (Suffix _))) <- optable, s == x] getInfix t = uniq [op | Tok x <- [t], (s, op@(Op _ (Infix _ _))) <- optable, s == x] getOperand t | null [op | Tok x <- [t], (s, op) <- optable, s == x] = pure () | otherwise = err "expected an operand" {- actual pushery -} canPush :: Ops -> Op -> Either ShuntError Bool canPush [] op = pure True canPush ((_, Op p f):ops) (Op np nf) = go p f np nf {- helper -} where prioLtOp X = (<) prioLtOp Y = (<=) {- pushing a prefix -} go prio (Infix _ l) nprio (Prefix _) = if prioLtOp l nprio prio then pure True else err "prefix on infix" go prio (Prefix l) nprio (Prefix r) = if prioLtOp l nprio prio then pure True else err "prefix on prefix" go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash {- pushing a suffix -} go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r {- pushing an infix -} go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r go prio (Suffix _) nprio (Infix r _) = if prioLtOp r nprio prio then err "infix on suffix" else pure False go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r {- helper for cases that look like: a `xfy` b `yfx` c -} clash p l np r | p < np = pure False | p > np = pure True | p == np , r == Y = pure False | p == np , l == Y , r == X = pure True | otherwise = err "priority clash" {- actual shunting -} pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x pushInfix ops vs x = getInfix x >>= shunt1 ops vs x shunt1 ops vs x op = do cp <- canPush ops op if cp then pure ((unTok x, op) : ops, vs) else do (ops', vs') <- pop ops vs shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush