module Parser where import Control.Applicative (liftA2) import Control.Monad (void) import Data.Char import Data.List.Split (splitOn) import Data.Void import Text.Megaparsec import Text.Megaparsec.Char import Compiler (PrlgStr(..)) singleToks = ",;|()[]" identParts = "_" notOpToks = "\'%" ++ identParts isOperatorlike x = (isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks) isIdentStart x = (x `elem` identParts) || isAlpha x isIdentOther x = isIdentStart x || isAlphaNum x || isMark x type Lexer = Parsec Void String data Lexeme = Blank String | Tok String | QTok String String -- unquoted quoted deriving (Show, Eq, Ord) blank :: Lexer Lexeme blank = Blank <$> some (satisfy isSpace) tok :: Lexer Lexeme tok = Tok <$> choice [ pure <$> oneOf singleToks , some $ satisfy isOperatorlike , (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther) , some (satisfy isNumber) ] qtok :: Lexer Lexeme qtok = do x <- string "'" y <- many $ satisfy (/= '\'') z <- string "'" return $ QTok y (x ++ y ++ z) cmt :: Lexer Lexeme cmt = Blank . concat <$> sequence [string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]] lexeme :: Lexer Lexeme lexeme = choice [blank, tok, qtok, cmt] lexPrlg :: Lexer [Lexeme] lexPrlg = many lexeme <* (many blank >> eof) data AST = Call String [[AST]] | Seq [AST] | List [AST] (Maybe [AST]) | Literal String deriving (Show, Eq) type Parser = Parsec Void [Lexeme] isBlank (Blank _) = True isBlank _ = False ws = many $ satisfy isBlank free = (<* ws) -- we eat blanks _after_ the token. isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"]) isNormalTok (Tok x) = isNormalTokStr x isNormalTok (QTok x _) = isNormalTokStr x isNormalTok _ = False unTok (Tok t) = t unTok (QTok t _) = t literal :: Parser AST literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen) call = do fn <- unTok <$> satisfy isNormalTok -- not free Seq inner <- free parens return $ Call fn $ splitOn [Literal ","] inner parens = Seq <$> (free lParen *> some seqItem <* free rParen) list = do free lBracket choice [ List [] Nothing <$ free rBracket , do items <- some seqItem choice [ List items Nothing <$ free rBracket , List items . Just <$> (free listTail *> some seqItem <* free rBracket) ] ] seqItem = choice [try call, literal, parens, list] simpleTok :: String -> Parser () simpleTok s = void $ single (Tok s) comma = simpleTok "." lParen = simpleTok "(" rParen = simpleTok ")" lBracket = simpleTok "[" listTail = simpleTok "|" rBracket = simpleTok "]" clause :: Parser AST clause = Seq <$> some (free seqItem) <* free comma parsePrlg :: Parser [AST] parsePrlg = ws *> many clause <* eof data Op = Op Int Fixity deriving (Show, Eq) data ArgKind = X | Y deriving (Show, Eq) data Fixity = Infix ArgKind ArgKind | Prefix ArgKind | Suffix ArgKind deriving (Show, Eq) isPrefix (Prefix _) = True isPrefix _ = False numArgs :: Op -> Int numArgs (Op _ f) = go f where go (Infix _ _) = 2 go _ = 1 type Ops = [(String, Op)] defaultOps :: Ops defaultOps = [ ("", Op 0 $ Infix X Y) , ("+", Op 100 $ Prefix X) , ("!", Op 100 $ Suffix Y) , ("-", Op 100 $ Prefix Y) , ("*", Op 100 $ Infix Y X) , ("+", Op 200 $ Infix Y X) , ("++", Op 200 $ Infix X Y) , ("-", Op 200 $ Infix Y X) , ("<", Op 300 $ Infix X X) , (">", Op 300 $ Infix X X) , ("=", Op 400 $ Infix X X) , (",", Op 800 $ Infix X Y) , (";", Op 900 $ Infix X Y) , (":-", Op 1000 $ Infix X X) ] ast2prlg :: Ops -> AST -> PrlgStr ast2prlg ot (List _ _) = error "no lists yet" ast2prlg ot (Seq ss) = shunt ot ss ast2prlg ot (Literal s) = LiteralS s ast2prlg ot (Call fn ss) = CallS fn $ map (shunt ot) ss shunt :: Ops -> [AST] -> PrlgStr shunt optable = start where start :: [AST] -> PrlgStr start [x] = rec x --singleton, possibly either a single operator name or a single value start [] = error "wat seq" start xs = wo [] [] xs {- "want operand" state, incoming literal -} wo :: Ops -> [PrlgStr] -> [AST] -> PrlgStr wo ops vs (l@(Literal x):xs) | Just _ <- getPrefix x , Just (ops', vs') <- pushPrefix ops vs x = wo ops' vs' xs | isOperand x = ho ops (rec l : vs) xs | otherwise = error $ "want valid operand " ++ show (ops,vs,l,xs) {- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -} wo ops vs (x:xs) = ho ops (rec x : vs) xs {- end of stream, but the operand is missing -} wo ops vs [] = error "missing final operand" {- "have operand" state, incoming operator -} ho :: Ops -> [PrlgStr] -> [AST] -> PrlgStr ho ops vs xs'@(Literal x:xs) | Just _ <- getSuffix x , Just (ops', vs') <- pushSuffix ops vs x = ho ops' vs' xs | Just _ <- getInfix x , Just (ops', vs') <- pushInfix ops vs x = wo ops' vs' xs | isOperand x = ho ops vs (Literal "" : xs') -- app (see below) | Just _ <- getPrefix x = ho ops vs (Literal "" : xs') -- also app! | otherwise = error "want valid infix" {- incoming operand; there's an app between -} ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs) {- the last operand was last, pop until finished -} ho [] [res] [] = res ho ops vs [] = let (ops', vs') = pop ops vs in ho ops' vs' [] {- recurse to delimited subexpression -} rec :: AST -> PrlgStr rec = ast2prlg optable {- pop a level, possibly uncovering a higher prio -} pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = (ops, (CallS x [l, r] : vs)) pop ((x, Op _ (Prefix _)):ops) (p:vs) = (ops, (CallS x [p] : vs)) pop ((x, Op _ (Suffix _)):ops) (p:vs) = (ops, (CallS x [p] : vs)) pop _ _ = error "pop borked" {- Operator checks -} uniq [x] = Just x uniq _ = Nothing getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x] getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x] getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x] isOperand x = null [op | (s, op) <- optable, s == x] {- actual pushery -} canPush [] op = Just True canPush ((_, Op p f):ops) (Op np nf) = go p f np nf where {- helper -} prioLtOp X = (<) prioLtOp Y = (<=) {- pushing a prefix -} go prio (Infix _ l) nprio (Prefix _) = if prioLtOp l nprio prio then Just True else Nothing go prio (Prefix l) nprio (Prefix r) = if prioLtOp l nprio prio then Just True else Nothing go prio (Suffix l) nprio (Prefix r) = error "wat sufix" --not just a normal prio clash {- pushing a suffix -} go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r go prio (Suffix _) nprio (Suffix r) = Just $ prioLtOp r nprio prio go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r {- pushing an infix -} go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r go prio (Suffix _) nprio (Infix r _) = if prioLtOp r nprio prio then Nothing else Just False go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r {- helper for cases that look like: a `xfy` b `yfx` c -} clash p l np r | p < np = Just False | p > np = Just True | p == np , r == Y = Just False | p == np , l == Y , r == X = Just True | otherwise = Nothing {- actual shunting -} pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x pushInfix ops vs x = getInfix x >>= shunt1 ops vs x shunt1 ops vs x op = do cp <- canPush ops op if cp then return ((x, op) : ops, vs) else let (ops', vs') = pop ops vs in shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush