1 21
module CSTParser
2
global debug = true
3

4
using Tokenize
5
import Base: length, first, last, getindex, setindex!
6
import Tokenize.Tokens
7
import Tokenize.Tokens: RawToken, AbstractToken, iskeyword, isliteral, isoperator, untokenize
8
import Tokenize.Lexers: Lexer, peekchar, iswhitespace
9

10
export ParseState, parse_expression
11

12
include("lexer.jl")
13
include("spec.jl")
14
include("utils.jl")
15
include("recovery.jl")
16
include("components/internals.jl")
17
include("components/keywords.jl")
18
include("components/lists.jl")
19
include("components/operators.jl")
20
include("components/strings.jl")
21
include("conversion.jl")
22
include("display.jl")
23
include("interface.jl")
24

25
"""
26
    parse_expression(ps)
27

28
Parses an expression until `closer(ps) == true`. Expects to enter the
29
`ParseState` the token before the the beginning of the expression and ends
30
on the last token.
31

32
Acceptable starting tokens are:
33
+ A keyword
34
+ An opening parentheses or brace.
35
+ An operator.
36
+ An instance (e.g. identifier, number, etc.)
37
+ An `@`.
38

39
"""
40
function parse_expression(ps::ParseState)
41 23
    if kindof(ps.nt)  term_c && !(kindof(ps.nt) === Tokens.END && ps.closer.square)
42 0
        ret = mErrorToken(ps, INSTANCE(next(ps)), UnexpectedToken)
43
    else
44 23
        next(ps)
45 23
        if iskeyword(kindof(ps.t)) && kindof(ps.t) != Tokens.DO
46 23
            ret = parse_kw(ps)
47 23
        elseif kindof(ps.t) === Tokens.LPAREN
48 23
            ret = parse_paren(ps)
49 23
        elseif kindof(ps.t) === Tokens.LSQUARE
50 23
            ret = @default ps parse_array(ps)
51 23
        elseif kindof(ps.t) === Tokens.LBRACE
52 23
            ret = @default ps @closebrace ps parse_braces(ps)
53 23
        elseif isinstance(ps.t) || isoperator(ps.t)
54 23
            if both_symbol_and_op(ps.t)
55 23
                ret = mIDENTIFIER(ps)
56
            else
57 23
                ret = INSTANCE(ps)
58
            end
59 23
            if is_colon(ret) && !(iscomma(ps.nt) || kindof(ps.ws) == SemiColonWS)
60 23
                ret = parse_unary(ps, ret)
61 23
            elseif isoperator(ret) && precedence(ret) == AssignmentOp && kindof(ret) !== Tokens.APPROX
62 23
                ret = mErrorToken(ps, ret, UnexpectedAssignmentOp)
63
            end
64 23
        elseif kindof(ps.t) === Tokens.AT_SIGN
65 23
            ret = parse_macrocall(ps)
66
        else
67 23
            ret = mErrorToken(ps, INSTANCE(ps), UnexpectedToken)
68
        end
69 23
        ret = parse_compound_recur(ps, ret)
70
    end
71 23
    return ret
72
end
73

74 23
parse_compound_recur(ps, ret) = !closer(ps) ? parse_compound_recur(ps, parse_compound(ps, ret)) : ret
75

76
"""
77
    parse_compound(ps::ParseState, ret::EXPR)
78

79
Attempts to parse a compound expression given the preceding expression `ret`.
80
"""
81
function parse_compound(ps::ParseState, ret::EXPR)
82 23
    if kindof(ps.nt) === Tokens.FOR
83 23
        ret = parse_generator(ps, ret)
84 23
    elseif kindof(ps.nt) === Tokens.DO
85 23
        ret = @default ps @closer ps :block parse_do(ps, ret)
86 23
    elseif isajuxtaposition(ps, ret)
87 23
        if disallowednumberjuxt(ret)
88 23
            ret = mErrorToken(ps, ret, CannotJuxtapose)
89
        end
90 23
        op = mOPERATOR(0, 0, Tokens.STAR, false)
91 23
        ret = parse_operator(ps, ret, op)
92 23
    elseif (typof(ret) === x_Str || typof(ret) === x_Cmd) && isidentifier(ps.nt)
93 23
        arg = mIDENTIFIER(next(ps))
94 23
        push!(ret, mLITERAL(arg.fullspan, arg.span, val(ps.t, ps), Tokens.STRING))
95 23
    elseif (isidentifier(ret) || is_getfield(ret)) && isemptyws(ps.ws) && isprefixableliteral(ps.nt)
96 23
        next(ps)
97 23
        arg = parse_string_or_cmd(ps, ret)
98 23
        if kindof(arg) === Tokens.CMD || kindof(arg) === Tokens.TRIPLE_CMD
99 23
            ret = EXPR(x_Cmd, EXPR[ret, arg])
100 23
        elseif valof(ret) == "var" && VERSION > v"1.3.0-"
101 4
            ret = EXPR(NONSTDIDENTIFIER, EXPR[ret, arg])
102
        else
103 23
            ret = EXPR(x_Str, EXPR[ret, arg])
104
        end
105 23
    elseif kindof(ps.nt) === Tokens.LPAREN
106 21
        no_ws = !isemptyws(ps.ws)
107 21
        ret = @closeparen ps parse_call(ps, ret)
108 23
        if no_ws && !isunarycall(ret)
109 23
            ret = mErrorToken(ps, ret, UnexpectedWhiteSpace)
110
        end
111 23
    elseif kindof(ps.nt) === Tokens.LBRACE
112 23
        if isemptyws(ps.ws)
113 23
            ret = @default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret)
114
        else
115 23
            ret = mErrorToken(ps, (@default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret)), UnexpectedWhiteSpace)
116
        end
117 23
    elseif kindof(ps.nt) === Tokens.LSQUARE && isemptyws(ps.ws) && !isoperator(ret)
118 23
        ret = @default ps @nocloser ps :block parse_ref(ps, ret)
119 23
    elseif iscomma(ps.nt)
120 23
        ret = parse_tuple(ps, ret)
121 23
    elseif isunaryop(ret) && kindof(ps.nt) != Tokens.EQ
122 23
        ret = parse_unary(ps, ret)
123 23
    elseif isoperator(ps.nt)
124 23
        op = mOPERATOR(next(ps))
125 23
        ret = parse_operator(ps, ret, op)
126 0
    elseif isunarycall(ret) && is_prime(ret.args[2])
127
        # prime operator followed by an identifier has an implicit multiplication
128 0
        nextarg = @precedence ps TimesOp parse_expression(ps)
129 0
        ret = mBinaryOpCall(ret, mOPERATOR(0, 0, Tokens.STAR, false), nextarg)
130
# ###############################################################################
131
# Everything below here is an error
132
# ###############################################################################
133
    else
134 0
        ps.errored = true
135 0
        if kindof(ps.nt) in (Tokens.RPAREN, Tokens.RSQUARE, Tokens.RBRACE)
136 0
            nextarg = mErrorToken(ps, mPUNCTUATION(next(ps)), Unknown)
137
        else
138 0
            nextarg = parse_expression(ps)
139
        end
140 0
        ret = EXPR(ErrorToken, EXPR[ret, nextarg])
141
    end
142 23
    return ret
143
end
144

145
"""
146
    parse_paren(ps, ret)
147

148
Parses an expression starting with a `(`.
149
"""
150
function parse_paren(ps::ParseState)
151 23
    args = EXPR[mPUNCTUATION(ps)]
152 21
    @closeparen ps @default ps @nocloser ps :inwhere parse_comma_sep(ps, args, false, true, true)
153

154 23
    if length(args) == 2 && ((kindof(ps.ws) !== SemiColonWS || typof(args[2]) === Block) && typof(args[2]) !== Parameters)
155 21
        accept_rparen(ps, args)
156 23
        ret = EXPR(InvisBrackets, args)
157
    else
158 21
        accept_rparen(ps, args)
159 23
        ret = EXPR(TupleH, args)
160
    end
161 23
    return ret
162
end
163

164
"""
165
    parse(str, cont = false)
166

167
Parses the passed string. If `cont` is true then will continue parsing until the end of the string returning the resulting expressions in a TOPLEVEL block.
168
"""
169
function parse(str::String, cont=false)
170 23
    ps = ParseState(str)
171 23
    x, ps = parse(ps, cont)
172 23
    return x
173
end
174

175
"""
176
    parse_doc(ps::ParseState)
177

178
Used for top-level parsing - attaches documentation (such as this) to expressions.
179
"""
180
function parse_doc(ps::ParseState)
181 23
    if (kindof(ps.nt) === Tokens.STRING || kindof(ps.nt) === Tokens.TRIPLE_STRING) && !isemptyws(ps.nws)
182 23
        doc = mLITERAL(next(ps))
183 23
        if kindof(ps.nt) === Tokens.ENDMARKER || kindof(ps.nt) === Tokens.END || ps.t.endpos[1] + 1 < ps.nt.startpos[1]
184 23
            return doc
185 23
        elseif isbinaryop(ps.nt) && !closer(ps)
186 0
            ret = parse_compound(ps, doc)
187 0
            return ret
188
        end
189

190 23
        ret = parse_expression(ps)
191 23
        ret = EXPR(MacroCall, EXPR[GlobalRefDOC(), doc, ret])
192 23
    elseif nexttokenstartsdocstring(ps)
193 0
        doc = mIDENTIFIER(next(ps))
194 0
        arg = parse_string_or_cmd(next(ps), doc)
195 0
        doc = EXPR(x_Str, EXPR[doc, arg])
196 0
        ret = parse_expression(ps)
197 0
        ret = EXPR(MacroCall, EXPR[GlobalRefDOC(), doc, ret])
198
    else
199 23
        ret = parse_expression(ps)
200
    end
201 23
    return ret
202
end
203

204
function parse(ps::ParseState, cont=false)
205 23
    if ps.l.io.size == 0
206 0
        return (cont ? EXPR(FileH, EXPR[]) : nothing), ps
207
    end
208 4
    last_line = 0
209 4
    curr_line = 0
210

211 23
    if cont
212 23
        top = EXPR(FileH, EXPR[])
213 23
        if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT
214 23
            next(ps)
215 23
            push!(top, mLITERAL(ps.nt.startbyte, ps.nt.startbyte, "", Tokens.NOTHING))
216
        end
217

218 21
        prevpos = position(ps)
219 23
        while kindof(ps.nt) !== Tokens.ENDMARKER
220 21
            curr_line = ps.nt.startpos[1]
221 23
            ret = parse_doc(ps)
222 23
            if _continue_doc_parse(ps, ret)
223 0
                push!(ret, parse_expression(ps))
224
            end
225
            # join semicolon sep items
226 23
            if curr_line == last_line && typof(last(top.args)) === TopLevel
227 0
                push!(last(top.args), ret)
228 0
                top.fullspan += ret.fullspan
229 0
                top.span = top.fullspan - (ret.fullspan - ret.span)
230 23
            elseif kindof(ps.ws) == SemiColonWS
231 0
                push!(top, EXPR(TopLevel, EXPR[ret]))
232
            else
233 23
                push!(top, ret)
234
            end
235 4
            last_line = curr_line
236 23
            prevpos = loop_check(ps, prevpos)
237
        end
238
    else
239 23
        if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT
240 0
            next(ps)
241 0
            top = mLITERAL(ps.nt.startbyte, ps.nt.startbyte, "", Tokens.NOTHING)
242 23
        elseif !(ps.done || kindof(ps.nt) === Tokens.ENDMARKER)
243 4
            curr_line = ps.nt.startpos[1]
244 23
            top = parse_doc(ps)
245 23
            if _continue_doc_parse(ps, top)
246 23
                push!(top, parse_expression(ps))
247
            end
248 21
            last_line = ps.nt.startpos[1]
249 23
            if kindof(ps.ws) == SemiColonWS
250 23
                top = EXPR(TopLevel, EXPR[top])
251 4
                safetytrip = 0
252 21
                prevpos = position(ps)
253 23
                while kindof(ps.ws) == SemiColonWS && ps.nt.startpos[1] == last_line && kindof(ps.nt) != Tokens.ENDMARKER
254 23
                    ret = parse_doc(ps)
255 23
                    push!(top, ret)
256 21
                    last_line = ps.nt.startpos[1]
257 23
                    prevpos = loop_check(ps, prevpos)
258
                end
259
            end
260
        else
261 0
            top = EXPR(ErrorToken, EXPR[], 0, 0)
262
        end
263
    end
264

265 23
    return top, ps
266
end
267

268
function _continue_doc_parse(ps::ParseState, x::EXPR)
269 23
    typof(x) === MacroCall &&
270
    typof(x.args[1]) === MacroName &&
271
    length(x.args[1]) == 2 &&
272
    valof(x.args[1].args[2]) == "doc" &&
273
    length(x.args) < 3 &&
274
    ps.t.endpos[1] + 1 <= ps.nt.startpos[1]
275
end
276

277
include("precompile.jl")
278
_precompile()
279
end

Read our documentation on viewing source code .

Loading