prlg/app/Parser.hs
2022-10-23 23:38:13 +02:00

284 lines
7.9 KiB
Haskell

module Parser where
import Control.Applicative (liftA2)
import Control.Monad (void)
import Data.Char
import Data.List.Split (splitOn)
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Compiler (PrlgStr(..))
singleToks = ",;|()[]"
identParts = "_"
notOpToks = "\'%" ++ identParts
isOperatorlike x =
(isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)
isIdentStart x = (x `elem` identParts) || isAlpha x
isIdentOther x = isIdentStart x || isAlphaNum x || isMark x
type Lexer = Parsec Void String
data Lexeme
= Blank String
| Tok String
| QTok String String -- unquoted quoted
deriving (Show, Eq, Ord)
blank :: Lexer Lexeme
blank = Blank <$> some (satisfy isSpace)
tok :: Lexer Lexeme
tok =
Tok <$>
choice
[ pure <$> oneOf singleToks
, some $ satisfy isOperatorlike
, (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
, some (satisfy isNumber)
]
qtok :: Lexer Lexeme
qtok = do
x <- string "'"
y <- many $ satisfy (/= '\'')
z <- string "'"
return $ QTok y (x ++ y ++ z)
cmt :: Lexer Lexeme
cmt =
Blank . concat <$>
sequence
[string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]
lexeme :: Lexer Lexeme
lexeme = choice [blank, tok, qtok, cmt]
lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof)
data AST
= Call String [[AST]]
| Seq [AST]
| List [AST] (Maybe [AST])
| Literal String
deriving (Show, Eq)
type Parser = Parsec Void [Lexeme]
isBlank (Blank _) = True
isBlank _ = False
ws = many $ satisfy isBlank
free = (<* ws) -- we eat blanks _after_ the token.
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])
isNormalTok (Tok x) = isNormalTokStr x
isNormalTok (QTok x _) = isNormalTokStr x
isNormalTok _ = False
unTok (Tok t) = t
unTok (QTok t _) = t
literal :: Parser AST
literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)
call = do
fn <- unTok <$> satisfy isNormalTok -- not free
Seq inner <- free parens
return $ Call fn $ splitOn [Literal ","] inner
parens = Seq <$> (free lParen *> some seqItem <* free rParen)
list = do
free lBracket
choice
[ List [] Nothing <$ free rBracket
, do items <- some seqItem
choice
[ List items Nothing <$ free rBracket
, List items . Just <$>
(free listTail *> some seqItem <* free rBracket)
]
]
seqItem = choice [try call, literal, parens, list]
simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s)
comma = simpleTok "."
lParen = simpleTok "("
rParen = simpleTok ")"
lBracket = simpleTok "["
listTail = simpleTok "|"
rBracket = simpleTok "]"
clause :: Parser AST
clause = Seq <$> some (free seqItem) <* free comma
parsePrlg :: Parser [AST]
parsePrlg = ws *> many clause <* eof
data Op =
Op Int Fixity
deriving (Show, Eq)
data ArgKind
= X
| Y
deriving (Show, Eq)
data Fixity
= Infix ArgKind ArgKind
| Prefix ArgKind
| Suffix ArgKind
deriving (Show, Eq)
isPrefix (Prefix _) = True
isPrefix _ = False
numArgs :: Op -> Int
numArgs (Op _ f) = go f
where
go (Infix _ _) = 2
go _ = 1
type Ops = [(String, Op)]
defaultOps :: Ops
defaultOps =
[ ("", Op 0 $ Infix X Y)
, ("+", Op 100 $ Prefix X)
, ("!", Op 100 $ Suffix Y)
, ("-", Op 100 $ Prefix Y)
, ("*", Op 100 $ Infix Y X)
, ("+", Op 200 $ Infix Y X)
, ("++", Op 200 $ Infix X Y)
, ("-", Op 200 $ Infix Y X)
, ("<", Op 300 $ Infix X X)
, (">", Op 300 $ Infix X X)
, ("=", Op 400 $ Infix X X)
, (",", Op 800 $ Infix X Y)
, (";", Op 900 $ Infix X Y)
, (":-", Op 1000 $ Infix X X)
]
ast2prlg :: Ops -> AST -> PrlgStr
ast2prlg ot (List _ _) = error "no lists yet"
ast2prlg ot (Seq ss) = shunt ot ss
ast2prlg ot (Literal s) = LiteralS s
ast2prlg ot (Call fn ss) = CallS fn $ map (shunt ot) ss
shunt :: Ops -> [AST] -> PrlgStr
shunt optable = start
where
start :: [AST] -> PrlgStr
start [x] = rec x --singleton, possibly either a single operator name or a single value
start [] = error "wat seq"
start xs = wo [] [] xs
{- "want operand" state, incoming literal -}
wo :: Ops -> [PrlgStr] -> [AST] -> PrlgStr
wo ops vs (l@(Literal x):xs)
| Just _ <- getPrefix x
, Just (ops', vs') <- pushPrefix ops vs x = wo ops' vs' xs
| isOperand x = ho ops (rec l : vs) xs
| otherwise = error $ "want valid operand " ++ show (ops,vs,l,xs)
{- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
wo ops vs (x:xs) = ho ops (rec x : vs) xs
{- end of stream, but the operand is missing -}
wo ops vs [] = error "missing final operand"
{- "have operand" state, incoming operator -}
ho :: Ops -> [PrlgStr] -> [AST] -> PrlgStr
ho ops vs xs'@(Literal x:xs)
| Just _ <- getSuffix x
, Just (ops', vs') <- pushSuffix ops vs x = ho ops' vs' xs
| Just _ <- getInfix x
, Just (ops', vs') <- pushInfix ops vs x = wo ops' vs' xs
| isOperand x = ho ops vs (Literal "" : xs') -- app (see below)
| Just _ <- getPrefix x = ho ops vs (Literal "" : xs') -- also app!
| otherwise = error "want valid infix"
{- incoming operand; there's an app between -}
ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
{- the last operand was last, pop until finished -}
ho [] [res] [] = res
ho ops vs [] =
let (ops', vs') = pop ops vs
in ho ops' vs' []
{- recurse to delimited subexpression -}
rec :: AST -> PrlgStr
rec = ast2prlg optable
{- pop a level, possibly uncovering a higher prio -}
pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = (ops, (CallS x [l, r] : vs))
pop ((x, Op _ (Prefix _)):ops) (p:vs) = (ops, (CallS x [p] : vs))
pop ((x, Op _ (Suffix _)):ops) (p:vs) = (ops, (CallS x [p] : vs))
pop _ _ = error "pop borked"
{- Operator checks -}
uniq [x] = Just x
uniq _ = Nothing
getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
isOperand x = null [op | (s, op) <- optable, s == x]
{- actual pushery -}
canPush [] op = Just True
canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
where
{- helper -}
prioLtOp X = (<)
prioLtOp Y = (<=)
{- pushing a prefix -}
go prio (Infix _ l) nprio (Prefix _) =
if prioLtOp l nprio prio
then Just True
else Nothing
go prio (Prefix l) nprio (Prefix r) =
if prioLtOp l nprio prio
then Just True
else Nothing
go prio (Suffix l) nprio (Prefix r) = error "wat sufix" --not just a normal prio clash
{- pushing a suffix -}
go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
go prio (Suffix _) nprio (Suffix r) = Just $ prioLtOp r nprio prio
go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
{- pushing an infix -}
go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
go prio (Suffix _) nprio (Infix r _) =
if prioLtOp r nprio prio
then Nothing
else Just False
go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
{- helper for cases that look like: a `xfy` b `yfx` c -}
clash p l np r
| p < np = Just False
| p > np = Just True
| p == np
, r == Y = Just False
| p == np
, l == Y
, r == X = Just True
| otherwise = Nothing
{- actual shunting -}
pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
shunt1 ops vs x op = do
cp <- canPush ops op
if cp
then return ((x, op) : ops, vs)
else let (ops', vs') = pop ops vs
in shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush