improve loop checks
1 |
function longest_common_prefix(prefixa, prefixb) |
|
2 | 23 |
maxplength = min(sizeof(prefixa), sizeof(prefixb)) |
3 | 23 |
maxplength == 0 && return "" |
4 | 23 |
idx = findfirst(i -> (prefixa[i] != prefixb[i]), 1:maxplength) |
5 | 23 |
idx = idx === nothing ? maxplength : idx - 1 |
6 | 23 |
prefixa[1:idx] |
7 |
end
|
|
8 |
|
|
9 |
function skip_to_nl(str, idxend) |
|
10 | 23 |
while (idxend < sizeof(str)) && str[idxend] != '\n' |
11 | 23 |
idxend = nextind(str, idxend) |
12 |
end
|
|
13 | 23 |
idxend > sizeof(str) ? prevind(str, idxend) : idxend |
14 |
end
|
|
15 |
|
|
16 | 23 |
tostr(buf::IOBuffer) = _unescape_string(String(take!(buf))) |
17 |
|
|
18 |
"""
|
|
19 |
parse_string_or_cmd(ps)
|
|
20 |
|
|
21 |
When trying to make an `INSTANCE` from a string token we must check for
|
|
22 |
interpolating opoerators.
|
|
23 |
"""
|
|
24 |
function parse_string_or_cmd(ps::ParseState, prefixed=false) |
|
25 | 23 |
sfullspan = ps.nt.startbyte - ps.t.startbyte |
26 | 21 |
sspan = 1 + ps.t.endbyte - ps.t.startbyte |
27 |
|
|
28 | 23 |
istrip = (kindof(ps.t) === Tokens.TRIPLE_STRING) || (kindof(ps.t) === Tokens.TRIPLE_CMD) |
29 | 23 |
iscmd = kindof(ps.t) === Tokens.CMD || kindof(ps.t) === Tokens.TRIPLE_CMD |
30 |
|
|
31 | 23 |
lcp = nothing |
32 | 21 |
exprs_to_adjust = [] |
33 | 23 |
function adjust_lcp(expr::EXPR, last=false) |
34 | 23 |
if isliteral(expr) |
35 | 23 |
push!(exprs_to_adjust, expr) |
36 | 23 |
str = valof(expr) |
37 | 23 |
(isempty(str) || (lcp !== nothing && isempty(lcp))) && return |
38 | 23 |
(last && str[end] == '\n') && return (lcp = "") |
39 |
idxstart, idxend = 2, 1 |
|
40 | 23 |
prevpos = idxend |
41 | 23 |
while nextind(str, idxend) - 1 < sizeof(str) && (lcp === nothing || !isempty(lcp)) |
42 | 23 |
idxend = skip_to_nl(str, idxend) |
43 | 23 |
idxstart = nextind(str, idxend) |
44 |
prevpos1 = idxend |
|
45 | 23 |
while nextind(str, idxend) - 1 < sizeof(str) |
46 | 23 |
c = str[nextind(str, idxend)] |
47 | 23 |
if c == ' ' || c == '\t' |
48 | 23 |
idxend += 1 |
49 | 23 |
elseif c == '\n' |
50 |
# All whitespace lines in the middle are ignored
|
|
51 | 23 |
idxend += 1 |
52 | 23 |
idxstart = idxend + 1 |
53 |
else
|
|
54 | 23 |
prefix = str[idxstart:idxend] |
55 | 23 |
lcp = lcp === nothing ? prefix : longest_common_prefix(lcp, prefix) |
56 | 23 |
break
|
57 |
end
|
|
58 | 23 |
if idxend <= prevpos1 |
59 |
throw(CSTInfiniteLoop("Infinite loop in adjust_lcp")) |
|
60 |
else
|
|
61 | 23 |
prevpos1 = idxend |
62 |
end
|
|
63 |
end
|
|
64 | 23 |
if idxend < prevpos |
65 |
throw(CSTInfiniteLoop("Infinite loop in adjust_lcp")) |
|
66 |
else
|
|
67 | 23 |
prevpos = idxend |
68 |
end
|
|
69 |
end
|
|
70 | 23 |
if idxstart != nextind(str, idxend) |
71 | 23 |
prefix = str[idxstart:idxend] |
72 | 23 |
lcp = lcp === nothing ? prefix : longest_common_prefix(lcp, prefix) |
73 |
end
|
|
74 |
end
|
|
75 |
end
|
|
76 |
|
|
77 |
# there are interpolations in the string
|
|
78 | 23 |
if prefixed != false || iscmd |
79 | 21 |
t_str = val(ps.t, ps) |
80 | 23 |
_val = istrip ? t_str[4:prevind(t_str, sizeof(t_str), 3)] : t_str[2:prevind(t_str, sizeof(t_str))] |
81 | 23 |
if iscmd |
82 | 23 |
_val = replace(_val, "\\\\" => "\\") |
83 | 23 |
_val = replace(_val, "\\`" => "`") |
84 |
else
|
|
85 | 23 |
if endswith(_val, "\\\\") |
86 | 23 |
_val = _val[1:end - 1] |
87 |
end
|
|
88 | 23 |
_val = replace(_val, "\\\"" => "\"") |
89 |
end
|
|
90 | 23 |
expr = mLITERAL(sfullspan, sspan, _val, kindof(ps.t)) |
91 | 23 |
if istrip |
92 | 21 |
adjust_lcp(expr) |
93 | 23 |
ret = EXPR(StringH, EXPR[expr], sfullspan, sspan) |
94 |
else
|
|
95 | 23 |
return expr |
96 |
end
|
|
97 |
else
|
|
98 | 23 |
ret = EXPR(StringH, EXPR[], sfullspan, sspan) |
99 | 21 |
str2 = val(ps.t, ps) |
100 | 21 |
input = IOBuffer(str2) |
101 | 23 |
startbytes = istrip ? 3 : 1 |
102 | 21 |
seek(input, startbytes) |
103 | 23 |
b = IOBuffer() |
104 |
safetytrip = 0 |
|
105 | 23 |
prevpos = position(input) |
106 | 23 |
while !eof(input) |
107 | 21 |
c = read(input, Char) |
108 | 23 |
if c == '\\' |
109 | 23 |
write(b, c) |
110 | 23 |
write(b, read(input, Char)) |
111 | 23 |
elseif c == '$' |
112 | 21 |
lspan = position(b) |
113 | 23 |
str = tostr(b) |
114 | 23 |
ex = mLITERAL(lspan + startbytes, lspan + startbytes, str, Tokens.STRING) |
115 | 23 |
push!(ret, ex) |
116 | 23 |
istrip && adjust_lcp(ex) |
117 |
startbytes = 0 |
|
118 | 23 |
op = mOPERATOR(1, 1, Tokens.EX_OR, false) |
119 | 23 |
if peekchar(input) == '(' |
120 | 21 |
skip(input, 1) # skip past '(' |
121 | 21 |
lpfullspan = -position(input) |
122 | 23 |
if iswhitespace(peekchar(input)) || peekchar(input) === '#' |
123 | 23 |
read_ws_comment(input, readchar(input)) |
124 |
end
|
|
125 | 23 |
lparen = mPUNCTUATION(Tokens.LPAREN, lpfullspan + position(input) + 1, 1) |
126 | 23 |
rparen = mPUNCTUATION(Tokens.RPAREN, 1, 1) |
127 |
|
|
128 | 21 |
ps1 = ParseState(input) |
129 |
|
|
130 | 23 |
if kindof(ps1.nt) === Tokens.RPAREN |
131 |
call = mUnaryOpCall(op, EXPR(InvisBrackets, EXPR[lparen, rparen])) |
|
132 |
push!(ret, call) |
|
133 |
skip(input, 1) |
|
134 |
else
|
|
135 | 21 |
interp = @closer ps1 :paren parse_expression(ps1) |
136 | 23 |
call = mUnaryOpCall(op, EXPR(InvisBrackets, EXPR[lparen, interp, rparen])) |
137 | 23 |
push!(ret, call) |
138 | 23 |
seek(input, ps1.nt.startbyte + 1) |
139 |
end
|
|
140 |
# Compared to flisp/JuliaParser, we have an extra lookahead token,
|
|
141 |
# so we need to back up one here
|
|
142 | 23 |
elseif Tokenize.Lexers.iswhitespace(peekchar(input)) || peekchar(input) === '#' |
143 | 23 |
push!(ret, mErrorToken(ps, op, StringInterpolationWithTrailingWhitespace)) |
144 |
else
|
|
145 | 21 |
pos = position(input) |
146 | 21 |
ps1 = ParseState(input) |
147 | 23 |
next(ps1) |
148 | 23 |
if kindof(ps1.t) === Tokens.WHITESPACE |
149 |
error("Unexpecte whitespace after \$ in String") |
|
150 |
else
|
|
151 | 23 |
t = INSTANCE(ps1) |
152 |
end
|
|
153 |
# Attribute trailing whitespace to the string
|
|
154 | 21 |
t = adjustspan(t) |
155 | 23 |
call = mUnaryOpCall(op, t) |
156 | 23 |
push!(ret, call) |
157 | 23 |
seek(input, pos + t.fullspan) |
158 |
end
|
|
159 |
else
|
|
160 | 23 |
write(b, c) |
161 |
end
|
|
162 | 23 |
prevpos = loop_check(input, prevpos) |
163 |
end
|
|
164 |
|
|
165 |
# handle last String section
|
|
166 | 21 |
lspan = position(b) |
167 | 23 |
if b.size == 0 |
168 |
ex = mErrorToken(ps, Unknown) |
|
169 |
else
|
|
170 | 23 |
str = tostr(b) |
171 | 23 |
if istrip |
172 | 23 |
str = str[1:prevind(str, lastindex(str), 3)] |
173 |
# only mark non-interpolated triple strings
|
|
174 | 23 |
ex = mLITERAL(lspan + ps.nt.startbyte - ps.t.endbyte - 1 + startbytes, lspan + startbytes, str, length(ret) == 0 ? Tokens.TRIPLE_STRING : Tokens.STRING) |
175 | 23 |
adjust_lcp(ex, true) |
176 |
else
|
|
177 | 23 |
str = str[1:prevind(str, lastindex(str))] |
178 | 23 |
ex = mLITERAL(lspan + ps.nt.startbyte - ps.t.endbyte - 1 + startbytes, lspan + startbytes, str, Tokens.STRING) |
179 |
end
|
|
180 |
end
|
|
181 | 23 |
push!(ret, ex) |
182 |
|
|
183 |
end
|
|
184 |
|
|
185 | 23 |
single_string_T = (Tokens.STRING, kindof(ps.t)) |
186 | 23 |
if istrip |
187 | 23 |
if lcp !== nothing && !isempty(lcp) |
188 | 23 |
for expr in exprs_to_adjust |
189 | 23 |
for (i, a) in enumerate(ret.args) |
190 | 23 |
if expr == a |
191 | 23 |
ret.args[i].val = replace(valof(expr), "\n$lcp" => "\n") |
192 | 23 |
break
|
193 |
end
|
|
194 |
end
|
|
195 |
end
|
|
196 |
end
|
|
197 |
# Drop leading newline
|
|
198 | 23 |
if isliteral(ret.args[1]) && kindof(ret.args[1]) in single_string_T && |
199 |
!isempty(valof(ret.args[1])) && valof(ret.args[1])[1] == '\n' |
|
200 | 23 |
ret.args[1] = dropleadlingnewline(ret.args[1]) |
201 |
end
|
|
202 |
end
|
|
203 |
|
|
204 | 23 |
if (length(ret.args) == 1 && isliteral(ret.args[1]) && kindof(ret.args[1]) in single_string_T) |
205 | 23 |
ret = ret.args[1] |
206 |
end
|
|
207 | 23 |
update_span!(ret) |
208 |
|
|
209 | 23 |
return ret |
210 |
end
|
|
211 |
|
|
212 |
function adjustspan(x::EXPR) |
|
213 | 21 |
x.fullspan = x.span |
214 |
return x |
|
215 |
end
|
|
216 |
|
|
217 | 23 |
dropleadlingnewline(x::EXPR) = mLITERAL(x.fullspan, x.span, valof(x)[2:end], kindof(x)) |
Read our documentation on viewing source code .