prlg/app/Parser.hs
2023-02-08 20:22:28 +01:00

366 lines
10 KiB
Haskell

{-# LANGUAGE FlexibleInstances #-}
module Parser
( lexPrlg
, parsePrlg
, shuntPrlg
, PAST
, Lexeme
) where
import Control.Monad (void)
import Data.Char
( isAlpha
, isAlphaNum
, isMark
, isNumber
, isPunctuation
, isSpace
, isSymbol
)
import Data.List.NonEmpty (NonEmpty(..))
import Data.List.Split (splitOn)
import Data.Void (Void)
import Text.Megaparsec
( Parsec
, PosState(..)
, SourcePos(..)
, TraversableStream(..)
, VisualStream(..)
, (<|>)
, choice
, eof
, many
, mkPos
, notFollowedBy
, oneOf
, satisfy
, single
, some
, try
, unPos
)
import Text.Megaparsec.Char (string)
import IR (PrlgStr(..))
import Operators (ArgKind(..), Fixity(..), Op(..), Ops)
singleToks = ",;|()[]{}!"
identParts = "_"
notOpToks = "\'%" ++ identParts
isOperatorlike x =
(isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)
isIdentStart x = (x `elem` identParts) || isAlpha x
isIdentOther x = isIdentStart x || isAlphaNum x || isMark x
type Lexer = Parsec Void String
data Lexeme
= Blank String
| Tok String
| QTok String String -- unquoted quoted
deriving (Show, Eq, Ord)
blank :: Lexer Lexeme
blank = Blank <$> some (satisfy isSpace)
tok :: Lexer Lexeme
tok =
Tok <$>
choice
[ pure <$> oneOf singleToks
, some $ satisfy isOperatorlike
, (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
, some (satisfy isNumber)
]
qtok :: Lexer Lexeme
qtok = do
x <- string "'"
y <- many $ satisfy (/= '\'')
z <- string "'"
return $ QTok y (x ++ y ++ z)
cmt :: Lexer Lexeme
cmt =
Blank . concat <$>
sequence
[string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]
lexeme :: Lexer Lexeme
lexeme = choice [blank, tok, qtok, cmt]
lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof)
showTok (Blank x) = x
showTok (Tok x) = x
showTok (QTok _ x) = x
instance VisualStream [Lexeme] where
showTokens _ (a :| b) = concatMap showTok (a : b)
tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b)
instance TraversableStream [Lexeme] where
reachOffset o pst = go
where
handleEmpty "" = "<empty line>"
handleEmpty x = x
go
| o <= pstateOffset pst =
( Just . handleEmpty $
pstateLinePrefix pst ++
takeWhile (/= '\n') (concatMap showTok $ pstateInput pst)
, pst)
| o > pstateOffset pst =
let (tok:rest) = pstateInput pst
stok = showTok tok
lines = splitOn "\n" stok
nls = length lines - 1
sp = pstateSourcePos pst
in reachOffset
o
pst
{ pstateInput = rest
, pstateOffset = pstateOffset pst + 1
, pstateLinePrefix =
if nls > 0
then last lines
else pstateLinePrefix pst ++ last lines
, pstateSourcePos =
sp
{ sourceLine = mkPos $ unPos (sourceLine sp) + nls
, sourceColumn =
mkPos $
(if nls > 0
then 1
else unPos (sourceColumn sp)) +
length (last lines)
}
}
data PAST
= Call String [[PAST]]
| Seq [PAST]
| List [[PAST]] (Maybe [PAST])
| Literal String
deriving (Show, Eq)
type Parser = Parsec Void [Lexeme]
isBlank (Blank _) = True
isBlank _ = False
ws = many $ satisfy isBlank
free = (<* ws) -- we eat blanks _after_ the token.
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")", "{", "}"])
isNormalTok (Tok x) = isNormalTokStr x
isNormalTok (QTok x _) = isNormalTokStr x
isNormalTok _ = False
unTok (Tok t) = t
unTok (QTok t _) = t
literal :: Parser PAST
literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)
makeParams (Seq inner) = splitOn [Literal ","] inner
call eb contents fmod = do
fn <- fmod . unTok <$> satisfy isNormalTok -- not free
(Call fn [] <$ try eb) <|> (Call fn . makeParams <$> free contents)
parens = Seq <$> (free lParen *> some seqItem <* free rParen)
braces = Seq <$> (free lBrace *> some seqItem <* free rBrace)
emptyParens = Literal "()" <$ (free lParen >> free rParen)
emptyBraces = Literal "{}" <$ (free lBrace >> free rBrace)
list = do
free lBracket
(List [] Nothing <$ free rBracket) <|> do
items <- splitOn [Literal ","] <$> some seqItem
(List items Nothing <$ free rBracket) <|>
(List items . Just <$> (free listTail *> some seqItem <* free rBracket))
seqItem =
choice
[ try $ call emptyParens parens id
, try $ call emptyBraces braces (++ "{}")
, literal
, try emptyParens
, parens
, try emptyBraces
, Call "{}" . makeParams <$> braces
, list
]
simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s)
period = simpleTok "."
lParen = simpleTok "("
rParen = simpleTok ")"
lBracket = simpleTok "["
listTail = simpleTok "|"
rBracket = simpleTok "]"
lBrace = simpleTok "{"
rBrace = simpleTok "}"
clause :: Parser PAST
clause = Seq <$> some (free seqItem) <* free period
parsePrlg :: Parser [PAST]
parsePrlg = ws *> many clause <* eof
type ShuntError = String
type ShuntResult = Either ShuntError PrlgStr
err :: ShuntError -> Either ShuntError a
err = Left
shuntPrlg :: Ops -> PAST -> ShuntResult
shuntPrlg ot = shuntPrlg' (("", Op 0 $ Infix Y X) : ot)
shuntPrlg' :: Ops -> PAST -> ShuntResult
shuntPrlg' ot (List hs t) =
ListS <$> traverse (shunt ot) hs <*> traverse (shunt ot) t
shuntPrlg' ot (Seq ss) = shunt ot ss
shuntPrlg' ot (Literal s) = pure (LiteralS s)
shuntPrlg' ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss
shunt :: Ops -> [PAST] -> ShuntResult
shunt optable = start
where
start :: [PAST] -> ShuntResult
start [x] = rec x --singleton, possibly either a single operator name or a single value
start [] = err "empty parentheses?"
start xs = wo [] [] xs
resolve = foldr1 (<|>)
{- "want operand" state, incoming literal -}
wo :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult
wo ops vs (l@(Literal x):xs) =
resolve
[ do getPrefix x
(ops', vs') <- pushPrefix ops vs x
wo ops' vs' xs
, do getOperand x
l' <- rec l
ho ops (l' : vs) xs
, err "expected operand"
]
{- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
wo ops vs (x:xs) = do
x' <- rec x
ho ops (x' : vs) xs
{- end of stream, but the operand is missing -}
wo ops vs [] = err "expected final operand"
{- "have operand" state, expecting an operator -}
ho :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult
ho ops vs xs'@(Literal x:xs) =
resolve
[ do getSuffix x
(ops', vs') <- pushSuffix ops vs x
ho ops' vs' xs
, do getInfix x
(ops', vs') <- pushInfix ops vs x
wo ops' vs' xs
, do getOperand x
ho ops vs (Literal "" : xs') -- app (see below)
, do getPrefix x
ho ops vs (Literal "" : xs') -- also app!
, err "expected infix or suffix operator"
]
{- incoming non-literal operand; there must be an app in between -}
ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
{- the last operand was last, pop until finished -}
ho [] [res] [] = pure res
ho ops vs [] = do
(ops', vs') <- pop ops vs
ho ops' vs' []
{- recurse to delimited subexpression -}
rec :: PAST -> ShuntResult
rec = shuntPrlg' optable
{- pop a level, possibly uncovering a higher prio -}
pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs))
pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
pop _ _ = err "internal err: pop borked"
{- Operator checks -}
uniq [x] = pure x
uniq _ = err "ambiguous operator"
getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
getOperand x
| null [op | (s, op) <- optable, s == x] = pure ()
| otherwise = err "expected an operand"
{- actual pushery -}
canPush :: Ops -> Op -> Either ShuntError Bool
canPush [] op = pure True
canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
{- helper -}
where
prioLtOp X = (<)
prioLtOp Y = (<=)
{- pushing a prefix -}
go prio (Infix _ l) nprio (Prefix _) =
if prioLtOp l nprio prio
then pure True
else err "prefix on infix"
go prio (Prefix l) nprio (Prefix r) =
if prioLtOp l nprio prio
then pure True
else err "prefix on prefix"
go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash
{- pushing a suffix -}
go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio
go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
{- pushing an infix -}
go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
go prio (Suffix _) nprio (Infix r _) =
if prioLtOp r nprio prio
then err "infix on suffix"
else pure False
go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
{- helper for cases that look like: a `xfy` b `yfx` c -}
clash p l np r
| p < np = pure False
| p > np = pure True
| p == np
, r == Y = pure False
| p == np
, l == Y
, r == X = pure True
| otherwise = err "priority clash"
{- actual shunting -}
pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
shunt1 ops vs x op = do
cp <- canPush ops op
if cp
then pure ((x, op) : ops, vs)
else do
(ops', vs') <- pop ops vs
shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush