improve loop checks
1 | 21 |
module CSTParser |
2 |
global debug = true |
|
3 |
|
|
4 |
using Tokenize |
|
5 |
import Base: length, first, last, getindex, setindex! |
|
6 |
import Tokenize.Tokens |
|
7 |
import Tokenize.Tokens: RawToken, AbstractToken, iskeyword, isliteral, isoperator, untokenize |
|
8 |
import Tokenize.Lexers: Lexer, peekchar, iswhitespace |
|
9 |
|
|
10 |
export ParseState, parse_expression |
|
11 |
|
|
12 |
include("lexer.jl") |
|
13 |
include("spec.jl") |
|
14 |
include("utils.jl") |
|
15 |
include("recovery.jl") |
|
16 |
include("components/internals.jl") |
|
17 |
include("components/keywords.jl") |
|
18 |
include("components/lists.jl") |
|
19 |
include("components/operators.jl") |
|
20 |
include("components/strings.jl") |
|
21 |
include("conversion.jl") |
|
22 |
include("display.jl") |
|
23 |
include("interface.jl") |
|
24 |
|
|
25 |
"""
|
|
26 |
parse_expression(ps)
|
|
27 |
|
|
28 |
Parses an expression until `closer(ps) == true`. Expects to enter the
|
|
29 |
`ParseState` the token before the the beginning of the expression and ends
|
|
30 |
on the last token.
|
|
31 |
|
|
32 |
Acceptable starting tokens are:
|
|
33 |
+ A keyword
|
|
34 |
+ An opening parentheses or brace.
|
|
35 |
+ An operator.
|
|
36 |
+ An instance (e.g. identifier, number, etc.)
|
|
37 |
+ An `@`.
|
|
38 |
|
|
39 |
"""
|
|
40 |
function parse_expression(ps::ParseState) |
|
41 | 23 |
if kindof(ps.nt) ∈ term_c && !(kindof(ps.nt) === Tokens.END && ps.closer.square) |
42 |
ret = mErrorToken(ps, INSTANCE(next(ps)), UnexpectedToken) |
|
43 |
else
|
|
44 | 23 |
next(ps) |
45 | 23 |
if iskeyword(kindof(ps.t)) && kindof(ps.t) != Tokens.DO |
46 | 23 |
ret = parse_kw(ps) |
47 | 23 |
elseif kindof(ps.t) === Tokens.LPAREN |
48 | 23 |
ret = parse_paren(ps) |
49 | 23 |
elseif kindof(ps.t) === Tokens.LSQUARE |
50 | 23 |
ret = @default ps parse_array(ps) |
51 | 23 |
elseif kindof(ps.t) === Tokens.LBRACE |
52 | 23 |
ret = @default ps @closebrace ps parse_braces(ps) |
53 | 23 |
elseif isinstance(ps.t) || isoperator(ps.t) |
54 | 23 |
if both_symbol_and_op(ps.t) |
55 | 23 |
ret = mIDENTIFIER(ps) |
56 |
else
|
|
57 | 23 |
ret = INSTANCE(ps) |
58 |
end
|
|
59 | 23 |
if is_colon(ret) && !(iscomma(ps.nt) || kindof(ps.ws) == SemiColonWS) |
60 | 23 |
ret = parse_unary(ps, ret) |
61 | 23 |
elseif isoperator(ret) && precedence(ret) == AssignmentOp && kindof(ret) !== Tokens.APPROX |
62 | 23 |
ret = mErrorToken(ps, ret, UnexpectedAssignmentOp) |
63 |
end
|
|
64 | 23 |
elseif kindof(ps.t) === Tokens.AT_SIGN |
65 | 23 |
ret = parse_macrocall(ps) |
66 |
else
|
|
67 | 23 |
ret = mErrorToken(ps, INSTANCE(ps), UnexpectedToken) |
68 |
end
|
|
69 | 23 |
ret = parse_compound_recur(ps, ret) |
70 |
end
|
|
71 | 23 |
return ret |
72 |
end
|
|
73 |
|
|
74 | 23 |
parse_compound_recur(ps, ret) = !closer(ps) ? parse_compound_recur(ps, parse_compound(ps, ret)) : ret |
75 |
|
|
76 |
"""
|
|
77 |
parse_compound(ps::ParseState, ret::EXPR)
|
|
78 |
|
|
79 |
Attempts to parse a compound expression given the preceding expression `ret`.
|
|
80 |
"""
|
|
81 |
function parse_compound(ps::ParseState, ret::EXPR) |
|
82 | 23 |
if kindof(ps.nt) === Tokens.FOR |
83 | 23 |
ret = parse_generator(ps, ret) |
84 | 23 |
elseif kindof(ps.nt) === Tokens.DO |
85 | 23 |
ret = @default ps @closer ps :block parse_do(ps, ret) |
86 | 23 |
elseif isajuxtaposition(ps, ret) |
87 | 23 |
if disallowednumberjuxt(ret) |
88 | 23 |
ret = mErrorToken(ps, ret, CannotJuxtapose) |
89 |
end
|
|
90 | 23 |
op = mOPERATOR(0, 0, Tokens.STAR, false) |
91 | 23 |
ret = parse_operator(ps, ret, op) |
92 | 23 |
elseif (typof(ret) === x_Str || typof(ret) === x_Cmd) && isidentifier(ps.nt) |
93 | 23 |
arg = mIDENTIFIER(next(ps)) |
94 | 23 |
push!(ret, mLITERAL(arg.fullspan, arg.span, val(ps.t, ps), Tokens.STRING)) |
95 | 23 |
elseif (isidentifier(ret) || is_getfield(ret)) && isemptyws(ps.ws) && isprefixableliteral(ps.nt) |
96 | 23 |
next(ps) |
97 | 23 |
arg = parse_string_or_cmd(ps, ret) |
98 | 23 |
if kindof(arg) === Tokens.CMD || kindof(arg) === Tokens.TRIPLE_CMD |
99 | 23 |
ret = EXPR(x_Cmd, EXPR[ret, arg]) |
100 | 23 |
elseif valof(ret) == "var" && VERSION > v"1.3.0-" |
101 | 4 |
ret = EXPR(NONSTDIDENTIFIER, EXPR[ret, arg]) |
102 |
else
|
|
103 | 23 |
ret = EXPR(x_Str, EXPR[ret, arg]) |
104 |
end
|
|
105 | 23 |
elseif kindof(ps.nt) === Tokens.LPAREN |
106 | 21 |
no_ws = !isemptyws(ps.ws) |
107 | 21 |
ret = @closeparen ps parse_call(ps, ret) |
108 | 23 |
if no_ws && !isunarycall(ret) |
109 | 23 |
ret = mErrorToken(ps, ret, UnexpectedWhiteSpace) |
110 |
end
|
|
111 | 23 |
elseif kindof(ps.nt) === Tokens.LBRACE |
112 | 23 |
if isemptyws(ps.ws) |
113 | 23 |
ret = @default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret) |
114 |
else
|
|
115 | 23 |
ret = mErrorToken(ps, (@default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret)), UnexpectedWhiteSpace) |
116 |
end
|
|
117 | 23 |
elseif kindof(ps.nt) === Tokens.LSQUARE && isemptyws(ps.ws) && !isoperator(ret) |
118 | 23 |
ret = @default ps @nocloser ps :block parse_ref(ps, ret) |
119 | 23 |
elseif iscomma(ps.nt) |
120 | 23 |
ret = parse_tuple(ps, ret) |
121 | 23 |
elseif isunaryop(ret) && kindof(ps.nt) != Tokens.EQ |
122 | 23 |
ret = parse_unary(ps, ret) |
123 | 23 |
elseif isoperator(ps.nt) |
124 | 23 |
op = mOPERATOR(next(ps)) |
125 | 23 |
ret = parse_operator(ps, ret, op) |
126 |
elseif isunarycall(ret) && is_prime(ret.args[2]) |
|
127 |
# prime operator followed by an identifier has an implicit multiplication
|
|
128 |
nextarg = @precedence ps TimesOp parse_expression(ps) |
|
129 |
ret = mBinaryOpCall(ret, mOPERATOR(0, 0, Tokens.STAR, false), nextarg) |
|
130 |
# ###############################################################################
|
|
131 |
# Everything below here is an error
|
|
132 |
# ###############################################################################
|
|
133 |
else
|
|
134 |
ps.errored = true |
|
135 |
if kindof(ps.nt) in (Tokens.RPAREN, Tokens.RSQUARE, Tokens.RBRACE) |
|
136 |
nextarg = mErrorToken(ps, mPUNCTUATION(next(ps)), Unknown) |
|
137 |
else
|
|
138 |
nextarg = parse_expression(ps) |
|
139 |
end
|
|
140 |
ret = EXPR(ErrorToken, EXPR[ret, nextarg]) |
|
141 |
end
|
|
142 | 23 |
return ret |
143 |
end
|
|
144 |
|
|
145 |
"""
|
|
146 |
parse_paren(ps, ret)
|
|
147 |
|
|
148 |
Parses an expression starting with a `(`.
|
|
149 |
"""
|
|
150 |
function parse_paren(ps::ParseState) |
|
151 | 23 |
args = EXPR[mPUNCTUATION(ps)] |
152 | 21 |
@closeparen ps @default ps @nocloser ps :inwhere parse_comma_sep(ps, args, false, true, true) |
153 |
|
|
154 | 23 |
if length(args) == 2 && ((kindof(ps.ws) !== SemiColonWS || typof(args[2]) === Block) && typof(args[2]) !== Parameters) |
155 | 21 |
accept_rparen(ps, args) |
156 | 23 |
ret = EXPR(InvisBrackets, args) |
157 |
else
|
|
158 | 21 |
accept_rparen(ps, args) |
159 | 23 |
ret = EXPR(TupleH, args) |
160 |
end
|
|
161 | 23 |
return ret |
162 |
end
|
|
163 |
|
|
164 |
"""
|
|
165 |
parse(str, cont = false)
|
|
166 |
|
|
167 |
Parses the passed string. If `cont` is true then will continue parsing until the end of the string returning the resulting expressions in a TOPLEVEL block.
|
|
168 |
"""
|
|
169 |
function parse(str::String, cont=false) |
|
170 | 23 |
ps = ParseState(str) |
171 | 23 |
x, ps = parse(ps, cont) |
172 | 23 |
return x |
173 |
end
|
|
174 |
|
|
175 |
"""
|
|
176 |
parse_doc(ps::ParseState)
|
|
177 |
|
|
178 |
Used for top-level parsing - attaches documentation (such as this) to expressions.
|
|
179 |
"""
|
|
180 |
function parse_doc(ps::ParseState) |
|
181 | 23 |
if (kindof(ps.nt) === Tokens.STRING || kindof(ps.nt) === Tokens.TRIPLE_STRING) && !isemptyws(ps.nws) |
182 | 23 |
doc = mLITERAL(next(ps)) |
183 | 23 |
if kindof(ps.nt) === Tokens.ENDMARKER || kindof(ps.nt) === Tokens.END || ps.t.endpos[1] + 1 < ps.nt.startpos[1] |
184 | 23 |
return doc |
185 | 23 |
elseif isbinaryop(ps.nt) && !closer(ps) |
186 |
ret = parse_compound(ps, doc) |
|
187 |
return ret |
|
188 |
end
|
|
189 |
|
|
190 | 23 |
ret = parse_expression(ps) |
191 | 23 |
ret = EXPR(MacroCall, EXPR[GlobalRefDOC(), doc, ret]) |
192 | 23 |
elseif nexttokenstartsdocstring(ps) |
193 |
doc = mIDENTIFIER(next(ps)) |
|
194 |
arg = parse_string_or_cmd(next(ps), doc) |
|
195 |
doc = EXPR(x_Str, EXPR[doc, arg]) |
|
196 |
ret = parse_expression(ps) |
|
197 |
ret = EXPR(MacroCall, EXPR[GlobalRefDOC(), doc, ret]) |
|
198 |
else
|
|
199 | 23 |
ret = parse_expression(ps) |
200 |
end
|
|
201 | 23 |
return ret |
202 |
end
|
|
203 |
|
|
204 |
function parse(ps::ParseState, cont=false) |
|
205 | 23 |
if ps.l.io.size == 0 |
206 |
return (cont ? EXPR(FileH, EXPR[]) : nothing), ps |
|
207 |
end
|
|
208 | 4 |
last_line = 0 |
209 | 4 |
curr_line = 0 |
210 |
|
|
211 | 23 |
if cont |
212 | 23 |
top = EXPR(FileH, EXPR[]) |
213 | 23 |
if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT |
214 | 23 |
next(ps) |
215 | 23 |
push!(top, mLITERAL(ps.nt.startbyte, ps.nt.startbyte, "", Tokens.NOTHING)) |
216 |
end
|
|
217 |
|
|
218 | 21 |
prevpos = position(ps) |
219 | 23 |
while kindof(ps.nt) !== Tokens.ENDMARKER |
220 | 21 |
curr_line = ps.nt.startpos[1] |
221 | 23 |
ret = parse_doc(ps) |
222 | 23 |
if _continue_doc_parse(ps, ret) |
223 |
push!(ret, parse_expression(ps)) |
|
224 |
end
|
|
225 |
# join semicolon sep items
|
|
226 | 23 |
if curr_line == last_line && typof(last(top.args)) === TopLevel |
227 |
push!(last(top.args), ret) |
|
228 |
top.fullspan += ret.fullspan |
|
229 |
top.span = top.fullspan - (ret.fullspan - ret.span) |
|
230 | 23 |
elseif kindof(ps.ws) == SemiColonWS |
231 |
push!(top, EXPR(TopLevel, EXPR[ret])) |
|
232 |
else
|
|
233 | 23 |
push!(top, ret) |
234 |
end
|
|
235 | 4 |
last_line = curr_line |
236 | 23 |
prevpos = loop_check(ps, prevpos) |
237 |
end
|
|
238 |
else
|
|
239 | 23 |
if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT |
240 |
next(ps) |
|
241 |
top = mLITERAL(ps.nt.startbyte, ps.nt.startbyte, "", Tokens.NOTHING) |
|
242 | 23 |
elseif !(ps.done || kindof(ps.nt) === Tokens.ENDMARKER) |
243 | 4 |
curr_line = ps.nt.startpos[1] |
244 | 23 |
top = parse_doc(ps) |
245 | 23 |
if _continue_doc_parse(ps, top) |
246 | 23 |
push!(top, parse_expression(ps)) |
247 |
end
|
|
248 | 21 |
last_line = ps.nt.startpos[1] |
249 | 23 |
if kindof(ps.ws) == SemiColonWS |
250 | 23 |
top = EXPR(TopLevel, EXPR[top]) |
251 | 4 |
safetytrip = 0 |
252 | 21 |
prevpos = position(ps) |
253 | 23 |
while kindof(ps.ws) == SemiColonWS && ps.nt.startpos[1] == last_line && kindof(ps.nt) != Tokens.ENDMARKER |
254 | 23 |
ret = parse_doc(ps) |
255 | 23 |
push!(top, ret) |
256 | 21 |
last_line = ps.nt.startpos[1] |
257 | 23 |
prevpos = loop_check(ps, prevpos) |
258 |
end
|
|
259 |
end
|
|
260 |
else
|
|
261 |
top = EXPR(ErrorToken, EXPR[], 0, 0) |
|
262 |
end
|
|
263 |
end
|
|
264 |
|
|
265 | 23 |
return top, ps |
266 |
end
|
|
267 |
|
|
268 |
function _continue_doc_parse(ps::ParseState, x::EXPR) |
|
269 | 23 |
typof(x) === MacroCall && |
270 |
typof(x.args[1]) === MacroName && |
|
271 |
length(x.args[1]) == 2 && |
|
272 |
valof(x.args[1].args[2]) == "doc" && |
|
273 |
length(x.args) < 3 && |
|
274 |
ps.t.endpos[1] + 1 <= ps.nt.startpos[1] |
|
275 |
end
|
|
276 |
|
|
277 |
include("precompile.jl") |
|
278 |
_precompile() |
|
279 |
end
|
Read our documentation on viewing source code .