prlg/app/Parser.hs

341 lines
9.9 KiB
Haskell

{-# LANGUAGE FlexibleInstances #-}
module Parser where
import Control.Applicative (liftA2)
import Control.Monad (void)
import Data.Char
import Data.List (intercalate)
import Data.List.NonEmpty (NonEmpty(..))
import Data.List.Split (splitOn)
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Compiler (PrlgStr(..))
import Debug.Trace
singleToks = ",;|()[]"
identParts = "_"
notOpToks = "\'%" ++ identParts
isOperatorlike x =
(isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)
isIdentStart x = (x `elem` identParts) || isAlpha x
isIdentOther x = isIdentStart x || isAlphaNum x || isMark x
type Lexer = Parsec Void String
data Lexeme
= Blank String
| Tok String
| QTok String String -- unquoted quoted
deriving (Show, Eq, Ord)
blank :: Lexer Lexeme
blank = Blank <$> some (satisfy isSpace)
tok :: Lexer Lexeme
tok =
Tok <$>
choice
[ pure <$> oneOf singleToks
, some $ satisfy isOperatorlike
, (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
, some (satisfy isNumber)
]
qtok :: Lexer Lexeme
qtok = do
x <- string "'"
y <- many $ satisfy (/= '\'')
z <- string "'"
return $ QTok y (x ++ y ++ z)
cmt :: Lexer Lexeme
cmt =
Blank . concat <$>
sequence
[string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]
lexeme :: Lexer Lexeme
lexeme = choice [blank, tok, qtok, cmt]
lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof)
showTok (Blank x) = x
showTok (Tok x) = x
showTok (QTok _ x) = x
instance VisualStream [Lexeme] where
showTokens _ (a :| b) = concatMap showTok (a : b)
tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b)
instance TraversableStream [Lexeme] where
reachOffset o pst = go
where
handleEmpty "" = "<empty line>"
handleEmpty x = x
go
| o <= pstateOffset pst =
( Just . handleEmpty $ pstateLinePrefix pst ++
takeWhile (/= '\n') (concatMap showTok $ pstateInput pst)
, pst)
| o > pstateOffset pst =
let (tok:rest) = pstateInput pst
stok = showTok tok
lines = splitOn "\n" stok
nls = length lines - 1
sp = pstateSourcePos pst
in reachOffset
o
pst
{ pstateInput = rest
, pstateOffset = pstateOffset pst + 1
, pstateLinePrefix =
if nls > 0
then last lines
else pstateLinePrefix pst ++ last lines
, pstateSourcePos =
sp
{ sourceLine = mkPos $ unPos (sourceLine sp) + nls
, sourceColumn =
mkPos $
(if nls > 0
then 1
else unPos (sourceColumn sp)) +
length (last lines)
}
}
data AST
= Call String [[AST]]
| Seq [AST]
| List [AST] (Maybe [AST])
| Literal String
deriving (Show, Eq)
type Parser = Parsec Void [Lexeme]
isBlank (Blank _) = True
isBlank _ = False
ws = many $ satisfy isBlank
free = (<* ws) -- we eat blanks _after_ the token.
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])
isNormalTok (Tok x) = isNormalTokStr x
isNormalTok (QTok x _) = isNormalTokStr x
isNormalTok _ = False
unTok (Tok t) = t
unTok (QTok t _) = t
literal :: Parser AST
literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)
call = do
fn <- unTok <$> satisfy isNormalTok -- not free
Seq inner <- free parens
return $ Call fn $ splitOn [Literal ","] inner
parens = Seq <$> (free lParen *> some seqItem <* free rParen)
list = do
free lBracket
choice
[ List [] Nothing <$ free rBracket
, do items <- some seqItem
choice
[ List items Nothing <$ free rBracket
, List items . Just <$>
(free listTail *> some seqItem <* free rBracket)
]
]
seqItem = choice [try call, literal, parens, list]
simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s)
comma = simpleTok "."
lParen = simpleTok "("
rParen = simpleTok ")"
lBracket = simpleTok "["
listTail = simpleTok "|"
rBracket = simpleTok "]"
clause :: Parser AST
clause = Seq <$> some (free seqItem) <* free comma
parsePrlg :: Parser [AST]
parsePrlg = ws *> many clause <* eof
data Op =
Op Int Fixity
deriving (Show, Eq)
data ArgKind
= X
| Y
deriving (Show, Eq)
data Fixity
= Infix ArgKind ArgKind
| Prefix ArgKind
| Suffix ArgKind
deriving (Show, Eq)
isPrefix (Prefix _) = True
isPrefix _ = False
numArgs :: Op -> Int
numArgs (Op _ f) = go f
where
go (Infix _ _) = 2
go _ = 1
type Ops = [(String, Op)]
defaultOps :: Ops
defaultOps =
[ ("", Op 0 $ Infix X Y)
, ("+", Op 100 $ Prefix X)
, ("!", Op 100 $ Suffix Y)
, ("-", Op 100 $ Prefix Y)
, ("*", Op 100 $ Infix Y X)
, ("+", Op 200 $ Infix Y X)
, ("++", Op 200 $ Infix X Y)
, ("-", Op 200 $ Infix Y X)
, ("<", Op 300 $ Infix X X)
, (">", Op 300 $ Infix X X)
, ("=", Op 400 $ Infix X X)
, (",", Op 800 $ Infix X Y)
, (";", Op 900 $ Infix X Y)
, (":-", Op 1000 $ Infix X X)
]
type PrlgError = String
type PrlgResult = Either PrlgError PrlgStr
err :: PrlgError -> Either PrlgError a
err = Left
ast2prlg :: Ops -> AST -> PrlgResult
ast2prlg ot (List _ _) = err "no lists yet"
ast2prlg ot (Seq ss) = shunt ot ss
ast2prlg ot (Literal s) = pure (LiteralS s)
ast2prlg ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss
shunt :: Ops -> [AST] -> PrlgResult
shunt optable = start
where
start :: [AST] -> PrlgResult
start [x] = rec x --singleton, possibly either a single operator name or a single value
start [] = err "empty parentheses?"
start xs = wo [] [] xs
{- "want operand" state, incoming literal -}
wo :: Ops -> [PrlgStr] -> [AST] -> PrlgResult
wo ops vs (l@(Literal x):xs)
| Right _ <- getPrefix x
, Right (ops', vs') <- pushPrefix ops vs x = wo ops' vs' xs
| isOperand x = do l' <- rec l
ho ops (l' : vs) xs
| otherwise = err "expected operand"
{- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
wo ops vs (x:xs) = do x' <- rec x
ho ops (x' : vs) xs
{- end of stream, but the operand is missing -}
wo ops vs [] = err "expected final operand"
{- "have operand" state, incoming operator -}
ho :: Ops -> [PrlgStr] -> [AST] -> PrlgResult
ho ops vs xs'@(Literal x:xs)
| Right _ <- getSuffix x
, Right (ops', vs') <- pushSuffix ops vs x = ho ops' vs' xs
| Right _ <- getInfix x
, Right (ops', vs') <- pushInfix ops vs x = wo ops' vs' xs
| isOperand x = ho ops vs (Literal "" : xs') -- app (see below)
| Right _ <- getPrefix x = ho ops vs (Literal "" : xs') -- also app!
| otherwise = err "expected infix or suffix operator"
{- incoming operand; there's an app between -}
ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
{- the last operand was last, pop until finished -}
ho [] [res] [] = pure res
ho ops vs [] = do (ops', vs') <- pop ops vs
ho ops' vs' []
{- recurse to delimited subexpression -}
rec :: AST -> PrlgResult
rec = ast2prlg optable
{- pop a level, possibly uncovering a higher prio -}
pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs))
pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
pop _ _ = err "internal err: pop borked"
{- Operator checks -}
uniq [x] = pure x
uniq _ = err "ambiguous operator"
getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
isOperand x = null [op | (s, op) <- optable, s == x]
{- actual pushery -}
canPush :: Ops -> Op -> Either PrlgError Bool
canPush [] op = pure True
canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
{- helper -}
where
prioLtOp X = (<)
prioLtOp Y = (<=)
{- pushing a prefix -}
go prio (Infix _ l) nprio (Prefix _) =
if prioLtOp l nprio prio
then pure True
else err "prefix on infix"
go prio (Prefix l) nprio (Prefix r) =
if prioLtOp l nprio prio
then pure True
else err "prefix on prefix"
go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash
{- pushing a suffix -}
go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio
go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
{- pushing an infix -}
go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
go prio (Suffix _) nprio (Infix r _) =
if prioLtOp r nprio prio
then err "infix on suffix"
else pure False
go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
{- helper for cases that look like: a `xfy` b `yfx` c -}
clash p l np r
| p < np = pure False
| p > np = pure True
| p == np
, r == Y = pure False
| p == np
, l == Y
, r == X = pure True
| otherwise = err "priority clash"
{- actual shunting -}
pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
shunt1 ops vs x op = do
cp <- canPush ops op
if cp
then pure ((x, op) : ops, vs)
else do (ops', vs') <- pop ops vs
shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush