366 lines
10 KiB
Haskell
366 lines
10 KiB
Haskell
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
module Parser
|
|
( lexPrlg
|
|
, parsePrlg
|
|
, shuntPrlg
|
|
, PAST
|
|
, Lexeme
|
|
) where
|
|
|
|
import Control.Monad (void)
|
|
import Data.Char
|
|
( isAlpha
|
|
, isAlphaNum
|
|
, isMark
|
|
, isNumber
|
|
, isPunctuation
|
|
, isSpace
|
|
, isSymbol
|
|
)
|
|
import Data.List.NonEmpty (NonEmpty(..))
|
|
import Data.List.Split (splitOn)
|
|
import Data.Void (Void)
|
|
import Text.Megaparsec
|
|
( Parsec
|
|
, PosState(..)
|
|
, SourcePos(..)
|
|
, TraversableStream(..)
|
|
, VisualStream(..)
|
|
, (<|>)
|
|
, choice
|
|
, eof
|
|
, many
|
|
, mkPos
|
|
, notFollowedBy
|
|
, oneOf
|
|
, satisfy
|
|
, single
|
|
, some
|
|
, try
|
|
, unPos
|
|
)
|
|
import Text.Megaparsec.Char (string)
|
|
|
|
import IR (PrlgStr(..))
|
|
import Operators (ArgKind(..), Fixity(..), Op(..), Ops)
|
|
|
|
singleToks = ",;|()[]{}!"
|
|
|
|
identParts = "_"
|
|
|
|
notOpToks = "\'%" ++ identParts
|
|
|
|
isOperatorlike x =
|
|
(isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)
|
|
|
|
isIdentStart x = (x `elem` identParts) || isAlpha x
|
|
|
|
isIdentOther x = isIdentStart x || isAlphaNum x || isMark x
|
|
|
|
type Lexer = Parsec Void String
|
|
|
|
data Lexeme
|
|
= Blank String
|
|
| Tok String
|
|
| QTok String String -- unquoted quoted
|
|
deriving (Show, Eq, Ord)
|
|
|
|
blank :: Lexer Lexeme
|
|
blank = Blank <$> some (satisfy isSpace)
|
|
|
|
tok :: Lexer Lexeme
|
|
tok =
|
|
Tok <$>
|
|
choice
|
|
[ pure <$> oneOf singleToks
|
|
, some $ satisfy isOperatorlike
|
|
, (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
|
|
, some (satisfy isNumber)
|
|
]
|
|
|
|
qtok :: Lexer Lexeme
|
|
qtok = do
|
|
x <- string "'"
|
|
y <- many $ satisfy (/= '\'')
|
|
z <- string "'"
|
|
return $ QTok y (x ++ y ++ z)
|
|
|
|
cmt :: Lexer Lexeme
|
|
cmt =
|
|
Blank . concat <$>
|
|
sequence
|
|
[string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]
|
|
|
|
lexeme :: Lexer Lexeme
|
|
lexeme = choice [blank, tok, qtok, cmt]
|
|
|
|
lexPrlg :: Lexer [Lexeme]
|
|
lexPrlg = many lexeme <* (many blank >> eof)
|
|
|
|
showTok (Blank x) = x
|
|
showTok (Tok x) = x
|
|
showTok (QTok _ x) = x
|
|
|
|
instance VisualStream [Lexeme] where
|
|
showTokens _ (a :| b) = concatMap showTok (a : b)
|
|
tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b)
|
|
|
|
instance TraversableStream [Lexeme] where
|
|
reachOffset o pst = go
|
|
where
|
|
handleEmpty "" = "<empty line>"
|
|
handleEmpty x = x
|
|
go
|
|
| o <= pstateOffset pst =
|
|
( Just . handleEmpty $
|
|
pstateLinePrefix pst ++
|
|
takeWhile (/= '\n') (concatMap showTok $ pstateInput pst)
|
|
, pst)
|
|
| o > pstateOffset pst =
|
|
let (tok:rest) = pstateInput pst
|
|
stok = showTok tok
|
|
lines = splitOn "\n" stok
|
|
nls = length lines - 1
|
|
sp = pstateSourcePos pst
|
|
in reachOffset
|
|
o
|
|
pst
|
|
{ pstateInput = rest
|
|
, pstateOffset = pstateOffset pst + 1
|
|
, pstateLinePrefix =
|
|
if nls > 0
|
|
then last lines
|
|
else pstateLinePrefix pst ++ last lines
|
|
, pstateSourcePos =
|
|
sp
|
|
{ sourceLine = mkPos $ unPos (sourceLine sp) + nls
|
|
, sourceColumn =
|
|
mkPos $
|
|
(if nls > 0
|
|
then 1
|
|
else unPos (sourceColumn sp)) +
|
|
length (last lines)
|
|
}
|
|
}
|
|
|
|
data PAST
|
|
= Call String [[PAST]]
|
|
| Seq [PAST]
|
|
| List [[PAST]] (Maybe [PAST])
|
|
| Literal String
|
|
deriving (Show, Eq)
|
|
|
|
type Parser = Parsec Void [Lexeme]
|
|
|
|
isBlank (Blank _) = True
|
|
isBlank _ = False
|
|
|
|
ws = many $ satisfy isBlank
|
|
|
|
free = (<* ws) -- we eat blanks _after_ the token.
|
|
|
|
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")", "{", "}"])
|
|
|
|
isNormalTok (Tok x) = isNormalTokStr x
|
|
isNormalTok (QTok x _) = isNormalTokStr x
|
|
isNormalTok _ = False
|
|
|
|
unTok (Tok t) = t
|
|
unTok (QTok t _) = t
|
|
|
|
literal :: Parser PAST
|
|
literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)
|
|
|
|
makeParams (Seq inner) = splitOn [Literal ","] inner
|
|
|
|
call eb contents fmod = do
|
|
fn <- fmod . unTok <$> satisfy isNormalTok -- not free
|
|
(Call fn [] <$ try eb) <|> (Call fn . makeParams <$> free contents)
|
|
|
|
parens = Seq <$> (free lParen *> some seqItem <* free rParen)
|
|
|
|
braces = Seq <$> (free lBrace *> some seqItem <* free rBrace)
|
|
|
|
emptyParens = Literal "()" <$ (free lParen >> free rParen)
|
|
|
|
emptyBraces = Literal "{}" <$ (free lBrace >> free rBrace)
|
|
|
|
list = do
|
|
free lBracket
|
|
(List [] Nothing <$ free rBracket) <|> do
|
|
items <- splitOn [Literal ","] <$> some seqItem
|
|
(List items Nothing <$ free rBracket) <|>
|
|
(List items . Just <$> (free listTail *> some seqItem <* free rBracket))
|
|
|
|
seqItem =
|
|
choice
|
|
[ try $ call emptyParens parens id
|
|
, try $ call emptyBraces braces (++ "{}")
|
|
, literal
|
|
, try emptyParens
|
|
, parens
|
|
, try emptyBraces
|
|
, Call "{}" . makeParams <$> braces
|
|
, list
|
|
]
|
|
|
|
simpleTok :: String -> Parser ()
|
|
simpleTok s = void $ single (Tok s)
|
|
|
|
period = simpleTok "."
|
|
|
|
lParen = simpleTok "("
|
|
|
|
rParen = simpleTok ")"
|
|
|
|
lBracket = simpleTok "["
|
|
|
|
listTail = simpleTok "|"
|
|
|
|
rBracket = simpleTok "]"
|
|
|
|
lBrace = simpleTok "{"
|
|
|
|
rBrace = simpleTok "}"
|
|
|
|
clause :: Parser PAST
|
|
clause = Seq <$> some (free seqItem) <* free period
|
|
|
|
parsePrlg :: Parser [PAST]
|
|
parsePrlg = ws *> many clause <* eof
|
|
|
|
type ShuntError = String
|
|
|
|
type ShuntResult = Either ShuntError PrlgStr
|
|
|
|
err :: ShuntError -> Either ShuntError a
|
|
err = Left
|
|
|
|
shuntPrlg :: Ops -> PAST -> ShuntResult
|
|
shuntPrlg ot = shuntPrlg' (("", Op 0 $ Infix Y X) : ot)
|
|
|
|
shuntPrlg' :: Ops -> PAST -> ShuntResult
|
|
shuntPrlg' ot (List hs t) =
|
|
ListS <$> traverse (shunt ot) hs <*> traverse (shunt ot) t
|
|
shuntPrlg' ot (Seq ss) = shunt ot ss
|
|
shuntPrlg' ot (Literal s) = pure (LiteralS s)
|
|
shuntPrlg' ot (Call fn ss) = CallS fn <$> traverse (shunt ot) ss
|
|
|
|
shunt :: Ops -> [PAST] -> ShuntResult
|
|
shunt optable = start
|
|
where
|
|
start :: [PAST] -> ShuntResult
|
|
start [x] = rec x --singleton, possibly either a single operator name or a single value
|
|
start [] = err "empty parentheses?"
|
|
start xs = wo [] [] xs
|
|
resolve = foldr1 (<|>)
|
|
{- "want operand" state, incoming literal -}
|
|
wo :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult
|
|
wo ops vs (l@(Literal x):xs) =
|
|
resolve
|
|
[ do getPrefix x
|
|
(ops', vs') <- pushPrefix ops vs x
|
|
wo ops' vs' xs
|
|
, do getOperand x
|
|
l' <- rec l
|
|
ho ops (l' : vs) xs
|
|
, err "expected operand"
|
|
]
|
|
{- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
|
|
wo ops vs (x:xs) = do
|
|
x' <- rec x
|
|
ho ops (x' : vs) xs
|
|
{- end of stream, but the operand is missing -}
|
|
wo ops vs [] = err "expected final operand"
|
|
{- "have operand" state, expecting an operator -}
|
|
ho :: Ops -> [PrlgStr] -> [PAST] -> ShuntResult
|
|
ho ops vs xs'@(Literal x:xs) =
|
|
resolve
|
|
[ do getSuffix x
|
|
(ops', vs') <- pushSuffix ops vs x
|
|
ho ops' vs' xs
|
|
, do getInfix x
|
|
(ops', vs') <- pushInfix ops vs x
|
|
wo ops' vs' xs
|
|
, do getOperand x
|
|
ho ops vs (Literal "" : xs') -- app (see below)
|
|
, do getPrefix x
|
|
ho ops vs (Literal "" : xs') -- also app!
|
|
, err "expected infix or suffix operator"
|
|
]
|
|
{- incoming non-literal operand; there must be an app in between -}
|
|
ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
|
|
{- the last operand was last, pop until finished -}
|
|
ho [] [res] [] = pure res
|
|
ho ops vs [] = do
|
|
(ops', vs') <- pop ops vs
|
|
ho ops' vs' []
|
|
{- recurse to delimited subexpression -}
|
|
rec :: PAST -> ShuntResult
|
|
rec = shuntPrlg' optable
|
|
{- pop a level, possibly uncovering a higher prio -}
|
|
pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = pure (ops, (CallS x [l, r] : vs))
|
|
pop ((x, Op _ (Prefix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
|
|
pop ((x, Op _ (Suffix _)):ops) (p:vs) = pure (ops, (CallS x [p] : vs))
|
|
pop _ _ = err "internal err: pop borked"
|
|
{- Operator checks -}
|
|
uniq [x] = pure x
|
|
uniq _ = err "ambiguous operator"
|
|
getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
|
|
getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
|
|
getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
|
|
getOperand x
|
|
| null [op | (s, op) <- optable, s == x] = pure ()
|
|
| otherwise = err "expected an operand"
|
|
{- actual pushery -}
|
|
canPush :: Ops -> Op -> Either ShuntError Bool
|
|
canPush [] op = pure True
|
|
canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
|
|
{- helper -}
|
|
where
|
|
prioLtOp X = (<)
|
|
prioLtOp Y = (<=)
|
|
{- pushing a prefix -}
|
|
go prio (Infix _ l) nprio (Prefix _) =
|
|
if prioLtOp l nprio prio
|
|
then pure True
|
|
else err "prefix on infix"
|
|
go prio (Prefix l) nprio (Prefix r) =
|
|
if prioLtOp l nprio prio
|
|
then pure True
|
|
else err "prefix on prefix"
|
|
go prio (Suffix l) nprio (Prefix r) = err "wat suffix?!" --not just a normal prio clash
|
|
{- pushing a suffix -}
|
|
go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
|
|
go prio (Suffix _) nprio (Suffix r) = pure $ prioLtOp r nprio prio
|
|
go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
|
|
{- pushing an infix -}
|
|
go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
|
|
go prio (Suffix _) nprio (Infix r _) =
|
|
if prioLtOp r nprio prio
|
|
then err "infix on suffix"
|
|
else pure False
|
|
go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
|
|
{- helper for cases that look like: a `xfy` b `yfx` c -}
|
|
clash p l np r
|
|
| p < np = pure False
|
|
| p > np = pure True
|
|
| p == np
|
|
, r == Y = pure False
|
|
| p == np
|
|
, l == Y
|
|
, r == X = pure True
|
|
| otherwise = err "priority clash"
|
|
{- actual shunting -}
|
|
pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
|
|
pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
|
|
pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
|
|
shunt1 ops vs x op = do
|
|
cp <- canPush ops op
|
|
if cp
|
|
then pure ((x, op) : ops, vs)
|
|
else do
|
|
(ops', vs') <- pop ops vs
|
|
shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush
|