summaryrefslogtreecommitdiff
path: root/app/Parser.hs
blob: 2e7020c6c520728c5354a07f2ca6aa91a8e44848 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
module Parser where

import Control.Applicative (liftA2)
import Control.Monad (void)
import Data.Char
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char

import Compiler (PrlgStr (..))

specialOp = (`elem` ",;|")

specialChar = (`elem` "()[]")

specialName = (`elem` "_")

specialUnused = (`elem` "\'%")

type Lexer = Parsec Void String

data Lexeme
  = Blank String
  | Tok String
  | QTok String String -- unquoted quoted
  deriving (Show, Eq, Ord)

blank :: Lexer Lexeme
blank = Blank <$> some (satisfy isSpace)

tok :: Lexer Lexeme
tok =
  Tok <$>
  choice
    [ pure <$> satisfy specialOp
    , pure <$> satisfy specialChar
    , some $
      satisfy $ \x ->
        all
          ($ x)
          [ not . specialOp
          , not . specialChar
          , not . specialUnused
          , not . specialName
          , liftA2 (||) isSymbol isPunctuation
          ]
    , (:) <$> satisfy (\x -> any ($ x) [specialName, isAlpha]) <*>
      many (satisfy $ \x -> any ($ x) [specialName, isAlphaNum, isMark])
    , some (satisfy isNumber)
    ]

qtok :: Lexer Lexeme
qtok = do
  x <- string "'"
  y <- many $ satisfy (/= '\'')
  z <- string "'"
  return $ QTok y (x ++ y ++ z)

cmt :: Lexer Lexeme
cmt =
  Blank . concat <$>
  sequence
    [string "%", many $ satisfy (/= '\n'), choice [string "\n", "" <$ eof]]

lexeme :: Lexer Lexeme
lexeme = choice [blank, tok, qtok, cmt]

lexPrlg :: Lexer [Lexeme]
lexPrlg = many lexeme <* (many blank >> eof)

data AST
  = Call String [AST]
  | Seq [AST]
  | List [AST] (Maybe [AST])
  | Literal String
  deriving (Show)

type Parser = Parsec Void [Lexeme]

isBlank (Blank _) = True
isBlank _ = False

ws = many $ satisfy isBlank

free = (ws >>)

isNormalTokStr = (`notElem` [".", "[", "]", "(", "|", ")"])

isNormalTok (Tok x) = isNormalTokStr x
isNormalTok (QTok x _) = isNormalTokStr x
isNormalTok _ = False

unTok (Tok t) = t
unTok (QTok t _) = t

literal :: Parser AST
literal = Literal . unTok <$> (satisfy isNormalTok <* notFollowedBy lParen)

call = do
  fn <- unTok <$> satisfy isNormalTok
  Seq inner <- parens -- not free!
  return $ Call fn inner

parens = Seq <$> (lParen *> some seqItem <* free rParen)

list = do
  lBracket
  choice
    [ List [] Nothing <$ free rBracket
    , do items <- some seqItem
         choice
           [ List items Nothing <$ free rBracket
           , List items . Just <$>
             (free listTail *> some seqItem <* free rBracket)
           ]
    ]

seqItem = free $ choice [try call, literal, parens, list]

simpleTok :: String -> Parser ()
simpleTok s = void $ single (Tok s)

comma = simpleTok "."

lParen = simpleTok "("

rParen = simpleTok ")"

lBracket = simpleTok "["

listTail = simpleTok "|"

rBracket = simpleTok "]"

clause :: Parser AST
clause = Seq <$> some seqItem <* free comma

parsePrlg :: Parser [AST]
parsePrlg = many clause <* free eof

operatorize :: [AST] -> [PrlgStr]
operatorize = undefined