prlg/app/Parser.hs
2022-10-24 23:43:35 +02:00

334 lines
9.5 KiB
Haskell

{-# LANGUAGE FlexibleInstances #-}
module Parser where
import Control.Applicative (liftA2)
import Control.Monad (void)
import Data.Char
import Data.List (intercalate)
import Data.List.NonEmpty (NonEmpty(..))
import Data.List.Split (splitOn)
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Compiler (PrlgStr(..))
import Debug.Trace
singleToks = ",;|()[]"
identParts = "_"
notOpToks = "\'%" ++ identParts
isOperatorlike x =
(isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)
isIdentStart x = (x `elem` identParts) || isAlpha x
isIdentOther x = isIdentStart x || isAlphaNum x || isMark x
type Lexer = Parsec Void String
data Lexeme
= Blank String
| Tok String
| QTok String String -- unquoted quoted
deriving (Show, Eq, Ord)
blank :: Lexer Lexeme
blank = Blank <$> some (satisfy isSpace)
tok :: Lexer Lexeme
tok =
Tok <$>
choice
[ pure <$> oneOf singleToks
, some $ satisfy isOperatorlike
, (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
, some (satisfy isNumber)
]
qtok :: Lexer Lexeme
qtok = do
x <- string "'"
y <- many $ satisfy (/= '\'')
z <- string "'"
return $ QTok y (x ++ y ++ z)
cmt :: Lexer Lexeme
cmt =
Blank . concat <$>
sequence
[string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]
lexeme :: Lexer Lexeme
lexeme = choice [blank, tok, qtok, cmt]
lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof)
showTok (Blank x) = x
showTok (Tok x) = x
showTok (QTok _ x) = x
instance VisualStream [Lexeme] where
showTokens _ (a :| b) = concatMap showTok (a : b)
tokensLength _ (a :| b) = sum $ map (length . showTok) (a : b)
instance TraversableStream [Lexeme] where
reachOffset o pst = go
where
handleEmpty "" = "<empty line>"
handleEmpty x = x
go
| o <= pstateOffset pst =
( Just . handleEmpty $ pstateLinePrefix pst ++
takeWhile (/= '\n') (concatMap showTok $ pstateInput pst)
, pst)
| o > pstateOffset pst =
let (tok:rest) = pstateInput pst
stok = showTok tok
lines = splitOn "\n" stok
nls = length lines - 1
sp = pstateSourcePos pst
in reachOffset
o
pst
{ pstateInput = rest
, pstateOffset = pstateOffset pst + 1
, pstateLinePrefix =
if nls > 0
then last lines
else pstateLinePrefix pst ++ last lines
, pstateSourcePos =
sp
{ sourceLine = mkPos $ unPos (sourceLine sp) + nls
, sourceColumn =
mkPos $
(if nls > 0
then 1
else unPos (sourceColumn sp)) +
length (last lines)
}
}
data AST
= Call String [[AST]]
| Seq [AST]
| List [AST] (Maybe [AST])
| Literal String
deriving (Show, Eq)
type Parser = Parsec Void [Lexeme]
isBlank (Blank _) = True
isBlank _ = False
ws = many $ satisfy isBlank
free = (<* ws) -- we eat blanks _after_ the token.
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])
isNormalTok (Tok x) = isNormalTokStr x
isNormalTok (QTok x _) = isNormalTokStr x
isNormalTok _ = False
unTok (Tok t) = t
unTok (QTok t _) = t
literal :: Parser AST
literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)
call = do
fn <- unTok <$> satisfy isNormalTok -- not free
Seq inner <- free parens
return $ Call fn $ splitOn [Literal ","] inner
parens = Seq <$> (free lParen *> some seqItem <* free rParen)
list = do
free lBracket
choice
[ List [] Nothing <$ free rBracket
, do items <- some seqItem
choice
[ List items Nothing <$ free rBracket
, List items . Just <$>
(free listTail *> some seqItem <* free rBracket)
]
]
seqItem = choice [try call, literal, parens, list]
simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s)
comma = simpleTok "."
lParen = simpleTok "("
rParen = simpleTok ")"
lBracket = simpleTok "["
listTail = simpleTok "|"
rBracket = simpleTok "]"
clause :: Parser AST
clause = Seq <$> some (free seqItem) <* free comma
parsePrlg :: Parser [AST]
parsePrlg = ws *> many clause <* eof
data Op =
Op Int Fixity
deriving (Show, Eq)
data ArgKind
= X
| Y
deriving (Show, Eq)
data Fixity
= Infix ArgKind ArgKind
| Prefix ArgKind
| Suffix ArgKind
deriving (Show, Eq)
isPrefix (Prefix _) = True
isPrefix _ = False
numArgs :: Op -> Int
numArgs (Op _ f) = go f
where
go (Infix _ _) = 2
go _ = 1
type Ops = [(String, Op)]
defaultOps :: Ops
defaultOps =
[ ("", Op 0 $ Infix X Y)
, ("+", Op 100 $ Prefix X)
, ("!", Op 100 $ Suffix Y)
, ("-", Op 100 $ Prefix Y)
, ("*", Op 100 $ Infix Y X)
, ("+", Op 200 $ Infix Y X)
, ("++", Op 200 $ Infix X Y)
, ("-", Op 200 $ Infix Y X)
, ("<", Op 300 $ Infix X X)
, (">", Op 300 $ Infix X X)
, ("=", Op 400 $ Infix X X)
, (",", Op 800 $ Infix X Y)
, (";", Op 900 $ Infix X Y)
, (":-", Op 1000 $ Infix X X)
]
ast2prlg :: Ops -> AST -> PrlgStr
ast2prlg ot (List _ _) = error "no lists yet"
ast2prlg ot (Seq ss) = shunt ot ss
ast2prlg ot (Literal s) = LiteralS s
ast2prlg ot (Call fn ss) = CallS fn $ map (shunt ot) ss
shunt :: Ops -> [AST] -> PrlgStr
shunt optable = start
where
start :: [AST] -> PrlgStr
start [x] = rec x --singleton, possibly either a single operator name or a single value
start [] = error "wat seq"
start xs = wo [] [] xs
{- "want operand" state, incoming literal -}
wo :: Ops -> [PrlgStr] -> [AST] -> PrlgStr
wo ops vs (l@(Literal x):xs)
| Just _ <- getPrefix x
, Just (ops', vs') <- pushPrefix ops vs x = wo ops' vs' xs
| isOperand x = ho ops (rec l : vs) xs
| otherwise = error $ "want valid operand"
{- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
wo ops vs (x:xs) = ho ops (rec x : vs) xs
{- end of stream, but the operand is missing -}
wo ops vs [] = error "missing final operand"
{- "have operand" state, incoming operator -}
ho :: Ops -> [PrlgStr] -> [AST] -> PrlgStr
ho ops vs xs'@(Literal x:xs)
| Just _ <- getSuffix x
, Just (ops', vs') <- pushSuffix ops vs x = ho ops' vs' xs
| Just _ <- getInfix x
, Just (ops', vs') <- pushInfix ops vs x = wo ops' vs' xs
| isOperand x = ho ops vs (Literal "" : xs') -- app (see below)
| Just _ <- getPrefix x = ho ops vs (Literal "" : xs') -- also app!
| otherwise = error "want valid infix"
{- incoming operand; there's an app between -}
ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
{- the last operand was last, pop until finished -}
ho [] [res] [] = res
ho ops vs [] =
let (ops', vs') = pop ops vs
in ho ops' vs' []
{- recurse to delimited subexpression -}
rec :: AST -> PrlgStr
rec = ast2prlg optable
{- pop a level, possibly uncovering a higher prio -}
pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = (ops, (CallS x [l, r] : vs))
pop ((x, Op _ (Prefix _)):ops) (p:vs) = (ops, (CallS x [p] : vs))
pop ((x, Op _ (Suffix _)):ops) (p:vs) = (ops, (CallS x [p] : vs))
pop _ _ = error "pop borked"
{- Operator checks -}
uniq [x] = Just x
uniq _ = Nothing
getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
isOperand x = null [op | (s, op) <- optable, s == x]
{- actual pushery -}
canPush [] op = Just True
canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
{- helper -}
where
prioLtOp X = (<)
prioLtOp Y = (<=)
{- pushing a prefix -}
go prio (Infix _ l) nprio (Prefix _) =
if prioLtOp l nprio prio
then Just True
else Nothing
go prio (Prefix l) nprio (Prefix r) =
if prioLtOp l nprio prio
then Just True
else Nothing
go prio (Suffix l) nprio (Prefix r) = error "wat sufix" --not just a normal prio clash
{- pushing a suffix -}
go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
go prio (Suffix _) nprio (Suffix r) = Just $ prioLtOp r nprio prio
go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
{- pushing an infix -}
go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
go prio (Suffix _) nprio (Infix r _) =
if prioLtOp r nprio prio
then Nothing
else Just False
go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
{- helper for cases that look like: a `xfy` b `yfx` c -}
clash p l np r
| p < np = Just False
| p > np = Just True
| p == np
, r == Y = Just False
| p == np
, l == Y
, r == X = Just True
| otherwise = Nothing
{- actual shunting -}
pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
shunt1 ops vs x op = do
cp <- canPush ops op
if cp
then return ((x, op) : ops, vs)
else let (ops', vs') = pop ops vs
in shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush