This commit is contained in:
Mirek Kratochvil 2022-10-23 23:38:13 +02:00
parent 60f5eb274c
commit f0d6558df9
3 changed files with 179 additions and 34 deletions

View file

@ -37,3 +37,7 @@ main = do
] ]
ppr interp ppr interp
ppr res ppr res
{-
ppr $ let { Right l = parse lexPrlg "stdin" "c(cc(X)) :- a, b. " ; Right p = parse parsePrlg "stdin" l; clause:_ = map (ast2prlg defaultOps) p; ((StrTable _ strfwd strback),codei) = strtablizePrlg emptystrtable clause } in compileRule (Id (strfwd M.! ":-") 2) (Id (strfwd M.! ",") 2) codei
-}

View file

@ -3,19 +3,25 @@ module Parser where
import Control.Applicative (liftA2) import Control.Applicative (liftA2)
import Control.Monad (void) import Control.Monad (void)
import Data.Char import Data.Char
import Data.List.Split (splitOn)
import Data.Void import Data.Void
import Text.Megaparsec import Text.Megaparsec
import Text.Megaparsec.Char import Text.Megaparsec.Char
import Compiler (PrlgStr (..)) import Compiler (PrlgStr(..))
specialOp = (`elem` ",;|") singleToks = ",;|()[]"
specialChar = (`elem` "()[]") identParts = "_"
specialName = (`elem` "_") notOpToks = "\'%" ++ identParts
specialUnused = (`elem` "\'%") isOperatorlike x =
(isSymbol x || isPunctuation x) && not (x `elem` singleToks ++ notOpToks)
isIdentStart x = (x `elem` identParts) || isAlpha x
isIdentOther x = isIdentStart x || isAlphaNum x || isMark x
type Lexer = Parsec Void String type Lexer = Parsec Void String
@ -32,20 +38,9 @@ tok :: Lexer Lexeme
tok = tok =
Tok <$> Tok <$>
choice choice
[ pure <$> satisfy specialOp [ pure <$> oneOf singleToks
, pure <$> satisfy specialChar , some $ satisfy isOperatorlike
, some $ , (:) <$> satisfy isIdentStart <*> many (satisfy isIdentOther)
satisfy $ \x ->
all
($ x)
[ not . specialOp
, not . specialChar
, not . specialUnused
, not . specialName
, liftA2 (||) isSymbol isPunctuation
]
, (:) <$> satisfy (\x -> any ($ x) [specialName, isAlpha]) <*>
many (satisfy $ \x -> any ($ x) [specialName, isAlphaNum, isMark])
, some (satisfy isNumber) , some (satisfy isNumber)
] ]
@ -69,11 +64,11 @@ lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof) lexPrlg = many lexeme <* (many blank >> eof)
data AST data AST
= Call String [AST] = Call String [[AST]]
| Seq [AST] | Seq [AST]
| List [AST] (Maybe [AST]) | List [AST] (Maybe [AST])
| Literal String | Literal String
deriving (Show) deriving (Show, Eq)
type Parser = Parsec Void [Lexeme] type Parser = Parsec Void [Lexeme]
@ -82,7 +77,7 @@ isBlank _ = False
ws = many $ satisfy isBlank ws = many $ satisfy isBlank
free = (ws >>) free = (<* ws) -- we eat blanks _after_ the token.
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"]) isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])
@ -94,17 +89,17 @@ unTok (Tok t) = t
unTok (QTok t _) = t unTok (QTok t _) = t
literal :: Parser AST literal :: Parser AST
literal = Literal . unTok <$> (satisfy isNormalTok <* notFollowedBy lParen) literal = Literal . unTok <$> free (satisfy isNormalTok <* notFollowedBy lParen)
call = do call = do
fn <- unTok <$> satisfy isNormalTok fn <- unTok <$> satisfy isNormalTok -- not free
Seq inner <- parens -- not free! Seq inner <- free parens
return $ Call fn inner return $ Call fn $ splitOn [Literal ","] inner
parens = Seq <$> (lParen *> some seqItem <* free rParen) parens = Seq <$> (free lParen *> some seqItem <* free rParen)
list = do list = do
lBracket free lBracket
choice choice
[ List [] Nothing <$ free rBracket [ List [] Nothing <$ free rBracket
, do items <- some seqItem , do items <- some seqItem
@ -115,7 +110,7 @@ list = do
] ]
] ]
seqItem = free $ choice [try call, literal, parens, list] seqItem = choice [try call, literal, parens, list]
simpleTok :: String -> Parser () simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s) simpleTok s = void $ single (Tok s)
@ -133,10 +128,156 @@ listTail = simpleTok "|"
rBracket = simpleTok "]" rBracket = simpleTok "]"
clause :: Parser AST clause :: Parser AST
clause = Seq <$> some seqItem <* free comma clause = Seq <$> some (free seqItem) <* free comma
parsePrlg :: Parser [AST] parsePrlg :: Parser [AST]
parsePrlg = many clause <* free eof parsePrlg = ws *> many clause <* eof
operatorize :: [AST] -> [PrlgStr] data Op =
operatorize = undefined Op Int Fixity
deriving (Show, Eq)
data ArgKind
= X
| Y
deriving (Show, Eq)
data Fixity
= Infix ArgKind ArgKind
| Prefix ArgKind
| Suffix ArgKind
deriving (Show, Eq)
isPrefix (Prefix _) = True
isPrefix _ = False
numArgs :: Op -> Int
numArgs (Op _ f) = go f
where
go (Infix _ _) = 2
go _ = 1
type Ops = [(String, Op)]
defaultOps :: Ops
defaultOps =
[ ("", Op 0 $ Infix X Y)
, ("+", Op 100 $ Prefix X)
, ("!", Op 100 $ Suffix Y)
, ("-", Op 100 $ Prefix Y)
, ("*", Op 100 $ Infix Y X)
, ("+", Op 200 $ Infix Y X)
, ("++", Op 200 $ Infix X Y)
, ("-", Op 200 $ Infix Y X)
, ("<", Op 300 $ Infix X X)
, (">", Op 300 $ Infix X X)
, ("=", Op 400 $ Infix X X)
, (",", Op 800 $ Infix X Y)
, (";", Op 900 $ Infix X Y)
, (":-", Op 1000 $ Infix X X)
]
ast2prlg :: Ops -> AST -> PrlgStr
ast2prlg ot (List _ _) = error "no lists yet"
ast2prlg ot (Seq ss) = shunt ot ss
ast2prlg ot (Literal s) = LiteralS s
ast2prlg ot (Call fn ss) = CallS fn $ map (shunt ot) ss
shunt :: Ops -> [AST] -> PrlgStr
shunt optable = start
where
start :: [AST] -> PrlgStr
start [x] = rec x --singleton, possibly either a single operator name or a single value
start [] = error "wat seq"
start xs = wo [] [] xs
{- "want operand" state, incoming literal -}
wo :: Ops -> [PrlgStr] -> [AST] -> PrlgStr
wo ops vs (l@(Literal x):xs)
| Just _ <- getPrefix x
, Just (ops', vs') <- pushPrefix ops vs x = wo ops' vs' xs
| isOperand x = ho ops (rec l : vs) xs
| otherwise = error $ "want valid operand " ++ show (ops,vs,l,xs)
{- incoming non-literal (i.e., surely operand), push it and switch to "have operand" -}
wo ops vs (x:xs) = ho ops (rec x : vs) xs
{- end of stream, but the operand is missing -}
wo ops vs [] = error "missing final operand"
{- "have operand" state, incoming operator -}
ho :: Ops -> [PrlgStr] -> [AST] -> PrlgStr
ho ops vs xs'@(Literal x:xs)
| Just _ <- getSuffix x
, Just (ops', vs') <- pushSuffix ops vs x = ho ops' vs' xs
| Just _ <- getInfix x
, Just (ops', vs') <- pushInfix ops vs x = wo ops' vs' xs
| isOperand x = ho ops vs (Literal "" : xs') -- app (see below)
| Just _ <- getPrefix x = ho ops vs (Literal "" : xs') -- also app!
| otherwise = error "want valid infix"
{- incoming operand; there's an app between -}
ho ops vs xs@(_:_) = ho ops vs (Literal "" : xs)
{- the last operand was last, pop until finished -}
ho [] [res] [] = res
ho ops vs [] =
let (ops', vs') = pop ops vs
in ho ops' vs' []
{- recurse to delimited subexpression -}
rec :: AST -> PrlgStr
rec = ast2prlg optable
{- pop a level, possibly uncovering a higher prio -}
pop ((x, Op _ (Infix _ _)):ops) (r:l:vs) = (ops, (CallS x [l, r] : vs))
pop ((x, Op _ (Prefix _)):ops) (p:vs) = (ops, (CallS x [p] : vs))
pop ((x, Op _ (Suffix _)):ops) (p:vs) = (ops, (CallS x [p] : vs))
pop _ _ = error "pop borked"
{- Operator checks -}
uniq [x] = Just x
uniq _ = Nothing
getPrefix x = uniq [op | (s, op@(Op _ (Prefix _))) <- optable, s == x]
getSuffix x = uniq [op | (s, op@(Op _ (Suffix _))) <- optable, s == x]
getInfix x = uniq [op | (s, op@(Op _ (Infix _ _))) <- optable, s == x]
isOperand x = null [op | (s, op) <- optable, s == x]
{- actual pushery -}
canPush [] op = Just True
canPush ((_, Op p f):ops) (Op np nf) = go p f np nf
where
{- helper -}
prioLtOp X = (<)
prioLtOp Y = (<=)
{- pushing a prefix -}
go prio (Infix _ l) nprio (Prefix _) =
if prioLtOp l nprio prio
then Just True
else Nothing
go prio (Prefix l) nprio (Prefix r) =
if prioLtOp l nprio prio
then Just True
else Nothing
go prio (Suffix l) nprio (Prefix r) = error "wat sufix" --not just a normal prio clash
{- pushing a suffix -}
go prio (Prefix l) nprio (Suffix r) = clash prio l nprio r
go prio (Suffix _) nprio (Suffix r) = Just $ prioLtOp r nprio prio
go prio (Infix _ l) nprio (Suffix r) = clash prio l nprio r
{- pushing an infix -}
go prio (Prefix l) nprio (Infix r _) = clash prio l nprio r
go prio (Suffix _) nprio (Infix r _) =
if prioLtOp r nprio prio
then Nothing
else Just False
go prio (Infix _ l) nprio (Infix r _) = clash prio l nprio r
{- helper for cases that look like: a `xfy` b `yfx` c -}
clash p l np r
| p < np = Just False
| p > np = Just True
| p == np
, r == Y = Just False
| p == np
, l == Y
, r == X = Just True
| otherwise = Nothing
{- actual shunting -}
pushPrefix ops vs x = getPrefix x >>= shunt1 ops vs x
pushSuffix ops vs x = getSuffix x >>= shunt1 ops vs x
pushInfix ops vs x = getInfix x >>= shunt1 ops vs x
shunt1 ops vs x op = do
cp <- canPush ops op
if cp
then return ((x, op) : ops, vs)
else let (ops', vs') = pop ops vs
in shunt1 ops' vs' x op --prefix would behave differently here but that's impossible by canPush

View file

@ -29,6 +29,6 @@ executable prlg
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
-- other-extensions: -- other-extensions:
build-depends: base >=4.16, containers, megaparsec, haskeline, pretty-simple build-depends: base >=4.16, containers, megaparsec, haskeline, pretty-simple, split
hs-source-dirs: app hs-source-dirs: app
default-language: Haskell2010 default-language: Haskell2010