1
import Tokenize.Lexers: peekchar, readchar, iswhitespace, emit, emit_error,  accept_batch, eof
2

3
const EmptyWS = Tokens.EMPTY_WS
4
const SemiColonWS = Tokens.SEMICOLON_WS
5
const NewLineWS = Tokens.NEWLINE_WS
6
const WS = Tokens.WS
7
const EmptyWSToken = RawToken(EmptyWS, (0, 0), (0, 0), -1, -1)
8

9
mutable struct Closer
10 23
    newline::Bool
11
    semicolon::Bool
12
    tuple::Bool
13
    comma::Bool
14
    paren::Bool
15
    brace::Bool
16
    inmacro::Bool
17
    insquare::Bool
18
    inref::Bool
19
    inwhere::Bool
20
    square::Bool
21
    block::Bool
22
    ifop::Bool
23
    range::Bool
24
    ws::Bool
25
    wsop::Bool
26
    unary::Bool
27
    precedence::Int
28
end
29 21
Closer() = Closer(true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, -1)
30

31
mutable struct ParseState
32 23
    l::Lexer{Base.GenericIOBuffer{Array{UInt8,1}},RawToken}
33
    done::Bool # Remove this
34
    lt::RawToken
35
    t::RawToken
36
    nt::RawToken
37
    nnt::RawToken
38
    lws::RawToken
39
    ws::RawToken
40
    nws::RawToken
41
    nnws::RawToken
42
    closer::Closer
43
    errored::Bool
44
end
45
function ParseState(str::Union{IO,String})
46 21
    ps = ParseState(tokenize(str, RawToken), false, RawToken(), RawToken(), RawToken(), RawToken(), RawToken(), RawToken(), RawToken(), RawToken(), Closer(), false)
47 23
    return next(next(ps))
48
end
49

50 0
function ParseState(str::Union{IO,String}, loc::Int)
51 0
    ps = ParseState(str)
52 0
    prevpos = position(ps)
53 0
    while ps.nt.startbyte < loc
54 0
        next(ps)
55 0
        prevpos = loop_check(ps, prevpos)
56
    end
57 0
    return ps
58
end
59

60 0
function Base.show(io::IO, ps::ParseState)
61 0
    println(io, "ParseState at $(position(ps.l.io))")
62 0
    println(io, "last    : ", kindof(ps.lt), " ($(ps.lt))", "    ($(wstype(ps.lws)))")
63 0
    println(io, "current : ", kindof(ps.t), " ($(ps.t))", "    ($(wstype(ps.ws)))")
64 0
    println(io, "next    : ", kindof(ps.nt), " ($(ps.nt))", "    ($(wstype(ps.nws)))")
65
end
66 0
peekchar(ps::ParseState) = peekchar(ps.l)
67 0
wstype(t::AbstractToken) = kindof(t) == EmptyWS ? "empty" :
68
                    kindof(t) == NewLineWS ? "ws w/ newline" :
69
                    kindof(t) == SemiColonWS ? "ws w/ semicolon" : "ws"
70

71
function next(ps::ParseState)
72
    #  shift old tokens
73 23
    ps.lt = ps.t
74 21
    ps.t = ps.nt
75 21
    ps.nt = ps.nnt
76 21
    ps.lws = ps.ws
77 21
    ps.ws = ps.nws
78 21
    ps.nws = ps.nnws
79

80 21
    ps.nnt = Tokenize.Lexers.next_token(ps.l)
81

82
    # combines whitespace, comments and semicolons
83 23
    if iswhitespace(peekchar(ps.l)) || peekchar(ps.l) == '#' || peekchar(ps.l) == ';'
84 23
        ps.nnws = lex_ws_comment(ps.l, readchar(ps.l))
85
    else
86 21
        ps.nnws = EmptyWSToken
87
    end
88

89 23
    return ps
90
end
91

92 0
function Base.seek(ps::ParseState, offset)
93 0
    seek(ps.l, offset)
94 0
    next(next(ps))
95
end
96

97 21
Base.position(ps::ParseState) = ps.nt.startbyte
98

99
"""
100
    lex_ws_comment(l::Lexer, c)
101

102
Having hit an initial whitespace/comment/semicolon continues collecting similar
103
`Chars` until they end. Returns a WS token with an indication of newlines/ semicolons. Indicating a semicolons takes precedence over line breaks as the former is equivalent to the former in most cases.
104
"""
105
function read_ws_comment(l, c::Char)
106 23
    newline = c == '\n'
107 21
    semicolon = c == ';'
108 23
    if c == '#'
109 23
        newline = read_comment(l)
110
    else
111 23
        newline, semicolon = read_ws(l, newline, semicolon)
112
    end
113 23
    while iswhitespace(peekchar(l)) || peekchar(l) == '#' || peekchar(l) == ';'
114 23
        c = readchar(l)
115 23
        if c == '#'
116 23
            read_comment(l)
117 23
            newline = newline || peekchar(l) == '\n'
118 23
            semicolon = semicolon || peekchar(l) == ';'
119 23
        elseif c == ';'
120 23
            semicolon = true
121
        else
122 23
            newline, semicolon = read_ws(l, newline, semicolon)
123
        end
124
    end
125 23
    return newline, semicolon
126
end
127

128
function lex_ws_comment(l::Lexer, c::Char)
129 23
    newline, semicolon = read_ws_comment(l, c)
130 23
    return emit(l, semicolon ? SemiColonWS :
131
                   newline ? NewLineWS : WS)
132
end
133

134

135
function read_ws(l, newline, semicolon)
136 23
    while iswhitespace(peekchar(l))
137 23
        c = readchar(l)
138 23
        c == '\n' && (newline = true)
139 23
        c == ';' && (semicolon = true)
140
    end
141 23
    return newline, semicolon
142
end
143

144
function read_comment(l)
145 23
    if peekchar(l) != '='
146 0
        while true
147 21
            pc = peekchar(l)
148 23
            if pc == '\n' || eof(pc)
149 23
                return true
150
            end
151 23
            readchar(l)
152
        end
153
    else
154 23
        c = readchar(l) # consume the '='
155 0
        n_start, n_end = 1, 0
156 23
        while true
157 23
            if eof(c)
158 0
                return false
159
            end
160 23
            nc = readchar(l)
161 23
            if c == '#' && nc == '='
162 0
                n_start += 1
163 23
            elseif c == '=' && nc == '#'
164 21
                n_end += 1
165
            end
166 23
            if n_start == n_end
167 23
                return true
168
            end
169 23
            c = nc
170
        end
171
    end
172
end
173

174
# Functions relating to tokens
175 23
isemptyws(t::AbstractToken) = kindof(t) == EmptyWS
176 23
isnewlinews(t::AbstractToken) = kindof(t) === NewLineWS
177 23
isendoflinews(t::AbstractToken) = kindof(t) == SemiColonWS || kindof(t) == NewLineWS
178 23
@inline val(token::AbstractToken, ps::ParseState) = String(ps.l.io.data[token.startbyte + 1:token.endbyte + 1])
179 23
both_symbol_and_op(t::AbstractToken) = kindof(t) === Tokens.WHERE || kindof(t) === Tokens.IN || kindof(t) === Tokens.ISA
180 23
isprefixableliteral(t::AbstractToken) = (kindof(t) === Tokens.STRING || kindof(t) === Tokens.TRIPLE_STRING || kindof(t) === Tokens.CMD || kindof(t) === Tokens.TRIPLE_CMD)
181 23
isassignment(t::AbstractToken) = Tokens.begin_assignments < kindof(t) < Tokens.end_assignments
182

183 23
isidentifier(t::AbstractToken) = kindof(t) === Tokens.IDENTIFIER
184 23
isliteral(t::AbstractToken) = Tokens.begin_literal < kindof(t) < Tokens.end_literal
185 23
isbool(t::AbstractToken) =  Tokens.TRUE  kindof(t)  Tokens.FALSE
186 23
iscomma(t::AbstractToken) =  kindof(t) === Tokens.COMMA
187 23
iscolon(t::AbstractToken) =  kindof(t) === Tokens.COLON
188 23
iskw(t::AbstractToken) = Tokens.iskeyword(kindof(t))
189 23
isinstance(t::AbstractToken) = isidentifier(t) || isliteral(t) || isbool(t) || iskw(t)
190 23
ispunctuation(t::AbstractToken) = iscomma(t) || kindof(t) === Tokens.END || Tokens.LSQUARE  kindof(t)  Tokens.RPAREN || kindof(t) === Tokens.AT_SIGN
191

Read our documentation on viewing source code .

Loading