143 lines
2.9 KiB
Haskell
143 lines
2.9 KiB
Haskell
module Parser where
|
|
|
|
import Control.Applicative (liftA2)
|
|
import Control.Monad (void)
|
|
import Data.Char
|
|
import Data.Void
|
|
import Text.Megaparsec
|
|
import Text.Megaparsec.Char
|
|
|
|
import Compiler (PrlgStr (..))
|
|
|
|
specialOp = (`elem` ",;|")
|
|
|
|
specialChar = (`elem` "()[]")
|
|
|
|
specialName = (`elem` "_")
|
|
|
|
specialUnused = (`elem` "\'%")
|
|
|
|
type Lexer = Parsec Void String
|
|
|
|
data Lexeme
|
|
= Blank String
|
|
| Tok String
|
|
| QTok String String -- unquoted quoted
|
|
deriving (Show, Eq, Ord)
|
|
|
|
blank :: Lexer Lexeme
|
|
blank = Blank <$> some (satisfy isSpace)
|
|
|
|
tok :: Lexer Lexeme
|
|
tok =
|
|
Tok <$>
|
|
choice
|
|
[ pure <$> satisfy specialOp
|
|
, pure <$> satisfy specialChar
|
|
, some $
|
|
satisfy $ \x ->
|
|
all
|
|
($ x)
|
|
[ not . specialOp
|
|
, not . specialChar
|
|
, not . specialUnused
|
|
, not . specialName
|
|
, liftA2 (||) isSymbol isPunctuation
|
|
]
|
|
, (:) <$> satisfy (\x -> any ($ x) [specialName, isAlpha]) <*>
|
|
many (satisfy $ \x -> any ($ x) [specialName, isAlphaNum, isMark])
|
|
, some (satisfy isNumber)
|
|
]
|
|
|
|
qtok :: Lexer Lexeme
|
|
qtok = do
|
|
x <- string "'"
|
|
y <- many $ satisfy (/= '\'')
|
|
z <- string "'"
|
|
return $ QTok y (x ++ y ++ z)
|
|
|
|
cmt :: Lexer Lexeme
|
|
cmt =
|
|
Blank . concat <$>
|
|
sequence
|
|
[string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]
|
|
|
|
lexeme :: Lexer Lexeme
|
|
lexeme = choice [blank, tok, qtok, cmt]
|
|
|
|
lexPrlg :: Lexer [Lexeme]
|
|
lexPrlg = many lexeme <* (many blank >> eof)
|
|
|
|
data AST
|
|
= Call String [AST]
|
|
| Seq [AST]
|
|
| List [AST] (Maybe [AST])
|
|
| Literal String
|
|
deriving (Show)
|
|
|
|
type Parser = Parsec Void [Lexeme]
|
|
|
|
isBlank (Blank _) = True
|
|
isBlank _ = False
|
|
|
|
ws = many $ satisfy isBlank
|
|
|
|
free = (ws >>)
|
|
|
|
isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])
|
|
|
|
isNormalTok (Tok x) = isNormalTokStr x
|
|
isNormalTok (QTok x _) = isNormalTokStr x
|
|
isNormalTok _ = False
|
|
|
|
unTok (Tok t) = t
|
|
unTok (QTok t _) = t
|
|
|
|
literal :: Parser AST
|
|
literal = Literal . unTok <$> (satisfy isNormalTok <* notFollowedBy lParen)
|
|
|
|
call = do
|
|
fn <- unTok <$> satisfy isNormalTok
|
|
Seq inner <- parens -- not free!
|
|
return $ Call fn inner
|
|
|
|
parens = Seq <$> (lParen *> some seqItem <* free rParen)
|
|
|
|
list = do
|
|
lBracket
|
|
choice
|
|
[ List [] Nothing <$ free rBracket
|
|
, do items <- some seqItem
|
|
choice
|
|
[ List items Nothing <$ free rBracket
|
|
, List items . Just <$>
|
|
(free listTail *> some seqItem <* free rBracket)
|
|
]
|
|
]
|
|
|
|
seqItem = free $ choice [try call, literal, parens, list]
|
|
|
|
simpleTok :: String -> Parser ()
|
|
simpleTok s = void $ single (Tok s)
|
|
|
|
comma = simpleTok "."
|
|
|
|
lParen = simpleTok "("
|
|
|
|
rParen = simpleTok ")"
|
|
|
|
lBracket = simpleTok "["
|
|
|
|
listTail = simpleTok "|"
|
|
|
|
rBracket = simpleTok "]"
|
|
|
|
clause :: Parser AST
|
|
clause = Seq <$> some seqItem <* free comma
|
|
|
|
parsePrlg :: Parser [AST]
|
|
parsePrlg = many clause <* free eof
|
|
|
|
operatorize :: [AST] -> [PrlgStr]
|
|
operatorize = undefined
|