Internally these would treat the cast same as a normal conversion from int[7] to int[], which allows code at CTFE to erroneously succeed where it would raise a SEGV at run-time.
1 |
/**
|
|
2 |
* Implements the lexical analyzer, which converts source code into lexical tokens.
|
|
3 |
*
|
|
4 |
* Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
|
|
5 |
*
|
|
6 |
* Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
|
|
7 |
* Authors: $(LINK2 http://www.digitalmars.com, Walter Bright)
|
|
8 |
* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
|
|
9 |
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
|
|
10 |
* Documentation: https://dlang.org/phobos/dmd_lexer.html
|
|
11 |
* Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
|
|
12 |
*/
|
|
13 |
|
|
14 |
module dmd.lexer; |
|
15 |
|
|
16 |
import core.stdc.ctype; |
|
17 |
import core.stdc.errno; |
|
18 |
import core.stdc.stdarg; |
|
19 |
import core.stdc.stdio; |
|
20 |
import core.stdc.stdlib : getenv; |
|
21 |
import core.stdc.string; |
|
22 |
import core.stdc.time; |
|
23 |
|
|
24 |
import dmd.entity; |
|
25 |
import dmd.errors; |
|
26 |
import dmd.globals; |
|
27 |
import dmd.id; |
|
28 |
import dmd.identifier; |
|
29 |
import dmd.root.ctfloat; |
|
30 |
import dmd.root.outbuffer; |
|
31 |
import dmd.root.port; |
|
32 |
import dmd.root.rmem; |
|
33 |
import dmd.root.string; |
|
34 |
import dmd.tokens; |
|
35 |
import dmd.utf; |
|
36 |
import dmd.utils; |
|
37 |
|
|
38 |
nothrow: |
|
39 |
|
|
40 |
private enum LS = 0x2028; // UTF line separator |
|
41 |
private enum PS = 0x2029; // UTF paragraph separator |
|
42 |
|
|
43 |
/********************************************
|
|
44 |
* Do our own char maps
|
|
45 |
*/
|
|
46 |
private static immutable cmtable = () { |
|
47 |
ubyte[256] table; |
|
48 |
foreach (const c; 0 .. table.length) |
|
49 |
{
|
|
50 |
if ('0' <= c && c <= '7') |
|
51 |
table[c] |= CMoctal; |
|
52 |
if (c_isxdigit(c)) |
|
53 |
table[c] |= CMhex; |
|
54 |
if (c_isalnum(c) || c == '_') |
|
55 |
table[c] |= CMidchar; |
|
56 |
|
|
57 |
switch (c) |
|
58 |
{
|
|
59 |
case 'x': case 'X': |
|
60 |
case 'b': case 'B': |
|
61 |
table[c] |= CMzerosecond; |
|
62 |
break; |
|
63 |
|
|
64 |
case '0': .. case '9': |
|
65 |
case 'e': case 'E': |
|
66 |
case 'f': case 'F': |
|
67 |
case 'l': case 'L': |
|
68 |
case 'p': case 'P': |
|
69 |
case 'u': case 'U': |
|
70 |
case 'i': |
|
71 |
case '.': |
|
72 |
case '_': |
|
73 |
table[c] |= CMzerosecond | CMdigitsecond; |
|
74 |
break; |
|
75 |
|
|
76 |
default: |
|
77 |
break; |
|
78 |
}
|
|
79 |
|
|
80 |
switch (c) |
|
81 |
{
|
|
82 |
case '\\': |
|
83 |
case '\n': |
|
84 |
case '\r': |
|
85 |
case 0: |
|
86 |
case 0x1A: |
|
87 |
case '\'': |
|
88 |
break; |
|
89 |
default: |
|
90 |
if (!(c & 0x80)) |
|
91 |
table[c] |= CMsinglechar; |
|
92 |
break; |
|
93 |
}
|
|
94 |
}
|
|
95 |
return table; |
|
96 |
}();
|
|
97 |
|
|
98 |
private
|
|
99 |
{
|
|
100 |
enum CMoctal = 0x1; |
|
101 |
enum CMhex = 0x2; |
|
102 |
enum CMidchar = 0x4; |
|
103 |
enum CMzerosecond = 0x8; |
|
104 |
enum CMdigitsecond = 0x10; |
|
105 |
enum CMsinglechar = 0x20; |
|
106 |
}
|
|
107 |
|
|
108 |
private bool isoctal(const char c) pure @nogc @safe |
|
109 |
{
|
|
110 | 1 |
return (cmtable[c] & CMoctal) != 0; |
111 |
}
|
|
112 |
|
|
113 |
private bool ishex(const char c) pure @nogc @safe |
|
114 |
{
|
|
115 | 1 |
return (cmtable[c] & CMhex) != 0; |
116 |
}
|
|
117 |
|
|
118 |
private bool isidchar(const char c) pure @nogc @safe |
|
119 |
{
|
|
120 | 1 |
return (cmtable[c] & CMidchar) != 0; |
121 |
}
|
|
122 |
|
|
123 |
private bool isZeroSecond(const char c) pure @nogc @safe |
|
124 |
{
|
|
125 | 1 |
return (cmtable[c] & CMzerosecond) != 0; |
126 |
}
|
|
127 |
|
|
128 |
private bool isDigitSecond(const char c) pure @nogc @safe |
|
129 |
{
|
|
130 | 1 |
return (cmtable[c] & CMdigitsecond) != 0; |
131 |
}
|
|
132 |
|
|
133 |
private bool issinglechar(const char c) pure @nogc @safe |
|
134 |
{
|
|
135 | 1 |
return (cmtable[c] & CMsinglechar) != 0; |
136 |
}
|
|
137 |
|
|
138 |
private bool c_isxdigit(const int c) pure @nogc @safe |
|
139 |
{
|
|
140 |
return (( c >= '0' && c <= '9') || |
|
141 |
( c >= 'a' && c <= 'f') || |
|
142 |
( c >= 'A' && c <= 'F')); |
|
143 |
}
|
|
144 |
|
|
145 |
private bool c_isalnum(const int c) pure @nogc @safe |
|
146 |
{
|
|
147 |
return (( c >= '0' && c <= '9') || |
|
148 |
( c >= 'a' && c <= 'z') || |
|
149 |
( c >= 'A' && c <= 'Z')); |
|
150 |
}
|
|
151 |
|
|
152 |
unittest
|
|
153 |
{
|
|
154 |
//printf("lexer.unittest\n");
|
|
155 |
/* Not much here, just trying things out.
|
|
156 |
*/
|
|
157 |
string text = "int"; // We rely on the implicit null-terminator |
|
158 |
scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0); |
|
159 |
TOK tok; |
|
160 |
tok = lex1.nextToken(); |
|
161 |
//printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
|
|
162 |
assert(tok == TOK.int32); |
|
163 |
tok = lex1.nextToken(); |
|
164 |
assert(tok == TOK.endOfFile); |
|
165 |
tok = lex1.nextToken(); |
|
166 |
assert(tok == TOK.endOfFile); |
|
167 |
tok = lex1.nextToken(); |
|
168 |
assert(tok == TOK.endOfFile); |
|
169 |
}
|
|
170 |
|
|
171 |
unittest
|
|
172 |
{
|
|
173 |
// We don't want to see Lexer error output during these tests.
|
|
174 |
uint errors = global.startGagging(); |
|
175 |
scope(exit) global.endGagging(errors); |
|
176 |
|
|
177 |
// Test malformed input: even malformed input should end in a TOK.endOfFile.
|
|
178 |
static immutable char[][] testcases = |
|
179 |
[ // Testcase must end with 0 or 0x1A. |
|
180 |
[0], // not malformed, but pathological |
|
181 |
['\'', 0], |
|
182 |
['\'', 0x1A], |
|
183 |
['{', '{', 'q', '{', 0], |
|
184 |
[0xFF, 0], |
|
185 |
[0xFF, 0x80, 0], |
|
186 |
[0xFF, 0xFF, 0], |
|
187 |
[0xFF, 0xFF, 0], |
|
188 |
['x', '"', 0x1A], |
|
189 |
];
|
|
190 |
|
|
191 |
foreach (testcase; testcases) |
|
192 |
{
|
|
193 |
scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0); |
|
194 |
TOK tok = lex2.nextToken(); |
|
195 |
size_t iterations = 1; |
|
196 |
while ((tok != TOK.endOfFile) && (iterations++ < testcase.length)) |
|
197 |
{
|
|
198 |
tok = lex2.nextToken(); |
|
199 |
}
|
|
200 |
assert(tok == TOK.endOfFile); |
|
201 |
tok = lex2.nextToken(); |
|
202 |
assert(tok == TOK.endOfFile); |
|
203 |
}
|
|
204 |
}
|
|
205 |
|
|
206 |
/***********************************************************
|
|
207 |
*/
|
|
208 |
class Lexer |
|
209 |
{
|
|
210 |
private __gshared OutBuffer stringbuffer; |
|
211 |
|
|
212 |
Loc scanloc; // for error messages |
|
213 |
Loc prevloc; // location of token before current |
|
214 |
|
|
215 |
const(char)* p; // current character |
|
216 |
|
|
217 |
Token token; |
|
218 |
|
|
219 |
private
|
|
220 |
{
|
|
221 |
const(char)* base; // pointer to start of buffer |
|
222 |
const(char)* end; // pointer to last element of buffer |
|
223 |
const(char)* line; // start of current line |
|
224 |
|
|
225 |
bool doDocComment; // collect doc comment information |
|
226 |
bool anyToken; // seen at least one token |
|
227 |
bool commentToken; // comments are TOK.comment's |
|
228 |
int inTokenStringConstant; // can be larger than 1 when in nested q{} strings |
|
229 |
int lastDocLine; // last line of previous doc comment |
|
230 |
|
|
231 |
Token* tokenFreelist; |
|
232 |
}
|
|
233 |
|
|
234 |
nothrow: |
|
235 |
|
|
236 |
/*********************
|
|
237 |
* Creates a Lexer for the source code base[begoffset..endoffset+1].
|
|
238 |
* The last character, base[endoffset], must be null (0) or EOF (0x1A).
|
|
239 |
*
|
|
240 |
* Params:
|
|
241 |
* filename = used for error messages
|
|
242 |
* base = source code, must be terminated by a null (0) or EOF (0x1A) character
|
|
243 |
* begoffset = starting offset into base[]
|
|
244 |
* endoffset = the last offset to read into base[]
|
|
245 |
* doDocComment = handle documentation comments
|
|
246 |
* commentToken = comments become TOK.comment's
|
|
247 |
*/
|
|
248 | 1 |
this(const(char)* filename, const(char)* base, size_t begoffset, |
249 |
size_t endoffset, bool doDocComment, bool commentToken) pure |
|
250 |
{
|
|
251 | 1 |
scanloc = Loc(filename, 1, 1); |
252 |
//printf("Lexer::Lexer(%p,%d)\n",base,length);
|
|
253 |
//printf("lexer.filename = %s\n", filename);
|
|
254 | 1 |
token = Token.init; |
255 | 1 |
this.base = base; |
256 | 1 |
this.end = base + endoffset; |
257 | 1 |
p = base + begoffset; |
258 | 1 |
line = p; |
259 | 1 |
this.doDocComment = doDocComment; |
260 | 1 |
this.commentToken = commentToken; |
261 | 1 |
this.inTokenStringConstant = 0; |
262 | 1 |
this.lastDocLine = 0; |
263 |
//initKeywords();
|
|
264 |
/* If first line starts with '#!', ignore the line
|
|
265 |
*/
|
|
266 | 1 |
if (p && p[0] == '#' && p[1] == '!') |
267 |
{
|
|
268 | 1 |
p += 2; |
269 | 1 |
while (1) |
270 |
{
|
|
271 | 1 |
char c = *p++; |
272 | 1 |
switch (c) |
273 |
{
|
|
274 |
case 0: |
|
275 |
case 0x1A: |
|
276 |
p--; |
|
277 |
goto case; |
|
278 | 1 |
case '\n': |
279 | 1 |
break; |
280 | 1 |
default: |
281 | 1 |
continue; |
282 |
}
|
|
283 | 1 |
break; |
284 |
}
|
|
285 | 1 |
endOfLine(); |
286 |
}
|
|
287 |
}
|
|
288 |
|
|
289 |
/// Returns: a newly allocated `Token`.
|
|
290 |
Token* allocateToken() pure nothrow @safe |
|
291 |
{
|
|
292 | 1 |
if (tokenFreelist) |
293 |
{
|
|
294 | 1 |
Token* t = tokenFreelist; |
295 | 1 |
tokenFreelist = t.next; |
296 | 1 |
t.next = null; |
297 | 1 |
return t; |
298 |
}
|
|
299 | 1 |
return new Token(); |
300 |
}
|
|
301 |
|
|
302 |
/// Frees the given token by returning it to the freelist.
|
|
303 |
private void releaseToken(Token* token) pure nothrow @nogc @safe |
|
304 |
{
|
|
305 | 1 |
if (mem.isGCEnabled) |
306 | 1 |
*token = Token.init; |
307 | 1 |
token.next = tokenFreelist; |
308 | 1 |
tokenFreelist = token; |
309 |
}
|
|
310 |
|
|
311 |
final TOK nextToken() |
|
312 |
{
|
|
313 | 1 |
prevloc = token.loc; |
314 | 1 |
if (token.next) |
315 |
{
|
|
316 | 1 |
Token* t = token.next; |
317 | 1 |
memcpy(&token, t, Token.sizeof); |
318 | 1 |
releaseToken(t); |
319 |
}
|
|
320 |
else
|
|
321 |
{
|
|
322 | 1 |
scan(&token); |
323 |
}
|
|
324 |
//printf(token.toChars());
|
|
325 | 1 |
return token.value; |
326 |
}
|
|
327 |
|
|
328 |
/***********************
|
|
329 |
* Look ahead at next token's value.
|
|
330 |
*/
|
|
331 |
final TOK peekNext() |
|
332 |
{
|
|
333 | 1 |
return peek(&token).value; |
334 |
}
|
|
335 |
|
|
336 |
/***********************
|
|
337 |
* Look 2 tokens ahead at value.
|
|
338 |
*/
|
|
339 |
final TOK peekNext2() |
|
340 |
{
|
|
341 | 1 |
Token* t = peek(&token); |
342 | 1 |
return peek(t).value; |
343 |
}
|
|
344 |
|
|
345 |
/****************************
|
|
346 |
* Turn next token in buffer into a token.
|
|
347 |
*/
|
|
348 |
final void scan(Token* t) |
|
349 |
{
|
|
350 | 1 |
const lastLine = scanloc.linnum; |
351 | 1 |
Loc startLoc; |
352 | 1 |
t.blockComment = null; |
353 | 1 |
t.lineComment = null; |
354 |
|
|
355 | 1 |
while (1) |
356 |
{
|
|
357 | 1 |
t.ptr = p; |
358 |
//printf("p = %p, *p = '%c'\n",p,*p);
|
|
359 | 1 |
t.loc = loc(); |
360 | 1 |
switch (*p) |
361 |
{
|
|
362 | 1 |
case 0: |
363 | 1 |
case 0x1A: |
364 | 1 |
t.value = TOK.endOfFile; // end of file |
365 |
// Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
|
|
366 | 1 |
return; |
367 | 1 |
case ' ': |
368 | 1 |
case '\t': |
369 | 1 |
case '\v': |
370 | 1 |
case '\f': |
371 | 1 |
p++; |
372 | 1 |
continue; // skip white space |
373 | 1 |
case '\r': |
374 | 1 |
p++; |
375 | 1 |
if (*p != '\n') // if CR stands by itself |
376 |
{
|
|
377 |
endOfLine(); |
|
378 |
goto skipFourSpaces; |
|
379 |
}
|
|
380 | 1 |
continue; // skip white space |
381 | 1 |
case '\n': |
382 | 1 |
p++; |
383 | 1 |
endOfLine(); |
384 |
skipFourSpaces: |
|
385 | 1 |
while (*(cast(uint*)p) == 0x20202020) //' ' == 0x20 |
386 |
{
|
|
387 | 1 |
p+=4; |
388 |
}
|
|
389 | 1 |
continue; // skip white space |
390 | 1 |
case '0': |
391 | 1 |
if (!isZeroSecond(p[1])) // if numeric literal does not continue |
392 |
{
|
|
393 | 1 |
++p; |
394 | 1 |
t.unsvalue = 0; |
395 | 1 |
t.value = TOK.int32Literal; |
396 | 1 |
return; |
397 |
}
|
|
398 | 1 |
goto Lnumber; |
399 |
|
|
400 | 1 |
case '1': .. case '9': |
401 | 1 |
if (!isDigitSecond(p[1])) // if numeric literal does not continue |
402 |
{
|
|
403 | 1 |
t.unsvalue = *p - '0'; |
404 | 1 |
++p; |
405 | 1 |
t.value = TOK.int32Literal; |
406 | 1 |
return; |
407 |
}
|
|
408 |
Lnumber: |
|
409 | 1 |
t.value = number(t); |
410 | 1 |
return; |
411 |
|
|
412 | 1 |
case '\'': |
413 | 1 |
if (issinglechar(p[1]) && p[2] == '\'') |
414 |
{
|
|
415 | 1 |
t.unsvalue = p[1]; // simple one character literal |
416 | 1 |
t.value = TOK.charLiteral; |
417 | 1 |
p += 3; |
418 |
}
|
|
419 |
else
|
|
420 | 1 |
t.value = charConstant(t); |
421 | 1 |
return; |
422 | 1 |
case 'r': |
423 | 1 |
if (p[1] != '"') |
424 | 1 |
goto case_ident; |
425 | 1 |
p++; |
426 | 1 |
goto case '`'; |
427 | 1 |
case '`': |
428 | 1 |
wysiwygStringConstant(t); |
429 | 1 |
return; |
430 | 1 |
case 'x': |
431 | 1 |
if (p[1] != '"') |
432 | 1 |
goto case_ident; |
433 | 1 |
p++; |
434 | 1 |
auto start = p; |
435 | 1 |
auto hexString = new OutBuffer(); |
436 | 1 |
t.value = hexStringConstant(t); |
437 | 1 |
hexString.write(start[0 .. p - start]); |
438 | 1 |
error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars()); |
439 | 1 |
return; |
440 | 1 |
case 'q': |
441 | 1 |
if (p[1] == '"') |
442 |
{
|
|
443 | 1 |
p++; |
444 | 1 |
delimitedStringConstant(t); |
445 | 1 |
return; |
446 |
}
|
|
447 | 1 |
else if (p[1] == '{') |
448 |
{
|
|
449 | 1 |
p++; |
450 | 1 |
tokenStringConstant(t); |
451 | 1 |
return; |
452 |
}
|
|
453 |
else
|
|
454 | 1 |
goto case_ident; |
455 | 1 |
case '"': |
456 | 1 |
escapeStringConstant(t); |
457 | 1 |
return; |
458 | 1 |
case 'a': |
459 | 1 |
case 'b': |
460 | 1 |
case 'c': |
461 | 1 |
case 'd': |
462 | 1 |
case 'e': |
463 | 1 |
case 'f': |
464 | 1 |
case 'g': |
465 | 1 |
case 'h': |
466 | 1 |
case 'i': |
467 | 1 |
case 'j': |
468 | 1 |
case 'k': |
469 | 1 |
case 'l': |
470 | 1 |
case 'm': |
471 | 1 |
case 'n': |
472 | 1 |
case 'o': |
473 | 1 |
case 'p': |
474 |
/*case 'q': case 'r':*/
|
|
475 | 1 |
case 's': |
476 | 1 |
case 't': |
477 | 1 |
case 'u': |
478 | 1 |
case 'v': |
479 | 1 |
case 'w': |
480 |
/*case 'x':*/
|
|
481 | 1 |
case 'y': |
482 | 1 |
case 'z': |
483 | 1 |
case 'A': |
484 | 1 |
case 'B': |
485 | 1 |
case 'C': |
486 | 1 |
case 'D': |
487 | 1 |
case 'E': |
488 | 1 |
case 'F': |
489 | 1 |
case 'G': |
490 | 1 |
case 'H': |
491 | 1 |
case 'I': |
492 | 1 |
case 'J': |
493 | 1 |
case 'K': |
494 | 1 |
case 'L': |
495 | 1 |
case 'M': |
496 | 1 |
case 'N': |
497 | 1 |
case 'O': |
498 | 1 |
case 'P': |
499 | 1 |
case 'Q': |
500 | 1 |
case 'R': |
501 | 1 |
case 'S': |
502 | 1 |
case 'T': |
503 | 1 |
case 'U': |
504 | 1 |
case 'V': |
505 | 1 |
case 'W': |
506 | 1 |
case 'X': |
507 | 1 |
case 'Y': |
508 | 1 |
case 'Z': |
509 | 1 |
case '_': |
510 |
case_ident: |
|
511 |
{
|
|
512 | 1 |
while (1) |
513 |
{
|
|
514 | 1 |
const c = *++p; |
515 | 1 |
if (isidchar(c)) |
516 | 1 |
continue; |
517 | 1 |
else if (c & 0x80) |
518 |
{
|
|
519 | 1 |
const s = p; |
520 | 1 |
const u = decodeUTF(); |
521 | 1 |
if (isUniAlpha(u)) |
522 | 1 |
continue; |
523 |
error("char 0x%04x not allowed in identifier", u); |
|
524 |
p = s; |
|
525 |
}
|
|
526 | 1 |
break; |
527 |
}
|
|
528 | 1 |
Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr)); |
529 | 1 |
t.ident = id; |
530 | 1 |
t.value = cast(TOK)id.getValue(); |
531 | 1 |
anyToken = 1; |
532 | 1 |
if (*t.ptr == '_') // if special identifier token |
533 |
{
|
|
534 |
// Lazy initialization
|
|
535 | 1 |
TimeStampInfo.initialize(t.loc); |
536 |
|
|
537 | 1 |
if (id == Id.DATE) |
538 |
{
|
|
539 | 1 |
t.ustring = TimeStampInfo.date.ptr; |
540 | 1 |
goto Lstr; |
541 |
}
|
|
542 | 1 |
else if (id == Id.TIME) |
543 |
{
|
|
544 | 1 |
t.ustring = TimeStampInfo.time.ptr; |
545 | 1 |
goto Lstr; |
546 |
}
|
|
547 | 1 |
else if (id == Id.VENDOR) |
548 |
{
|
|
549 |
t.ustring = global.vendor.xarraydup.ptr; |
|
550 |
goto Lstr; |
|
551 |
}
|
|
552 | 1 |
else if (id == Id.TIMESTAMP) |
553 |
{
|
|
554 | 1 |
t.ustring = TimeStampInfo.timestamp.ptr; |
555 |
Lstr: |
|
556 | 1 |
t.value = TOK.string_; |
557 | 1 |
t.postfix = 0; |
558 | 1 |
t.len = cast(uint)strlen(t.ustring); |
559 |
}
|
|
560 | 1 |
else if (id == Id.VERSIONX) |
561 |
{
|
|
562 | 1 |
t.value = TOK.int64Literal; |
563 | 1 |
t.unsvalue = global.versionNumber(); |
564 |
}
|
|
565 | 1 |
else if (id == Id.EOFX) |
566 |
{
|
|
567 |
t.value = TOK.endOfFile; |
|
568 |
// Advance scanner to end of file
|
|
569 |
while (!(*p == 0 || *p == 0x1A)) |
|
570 |
p++; |
|
571 |
}
|
|
572 |
}
|
|
573 |
//printf("t.value = %d\n",t.value);
|
|
574 | 1 |
return; |
575 |
}
|
|
576 | 1 |
case '/': |
577 | 1 |
p++; |
578 | 1 |
switch (*p) |
579 |
{
|
|
580 | 1 |
case '=': |
581 | 1 |
p++; |
582 | 1 |
t.value = TOK.divAssign; |
583 | 1 |
return; |
584 | 1 |
case '*': |
585 | 1 |
p++; |
586 | 1 |
startLoc = loc(); |
587 | 1 |
while (1) |
588 |
{
|
|
589 | 1 |
while (1) |
590 |
{
|
|
591 | 1 |
const c = *p; |
592 | 1 |
switch (c) |
593 |
{
|
|
594 | 1 |
case '/': |
595 | 1 |
break; |
596 | 1 |
case '\n': |
597 | 1 |
endOfLine(); |
598 | 1 |
p++; |
599 | 1 |
continue; |
600 |
case '\r': |
|
601 |
p++; |
|
602 |
if (*p != '\n') |
|
603 |
endOfLine(); |
|
604 |
continue; |
|
605 |
case 0: |
|
606 |
case 0x1A: |
|
607 |
error("unterminated /* */ comment"); |
|
608 |
p = end; |
|
609 |
t.loc = loc(); |
|
610 |
t.value = TOK.endOfFile; |
|
611 |
return; |
|
612 | 1 |
default: |
613 | 1 |
if (c & 0x80) |
614 |
{
|
|
615 | 1 |
const u = decodeUTF(); |
616 | 1 |
if (u == PS || u == LS) |
617 |
endOfLine(); |
|
618 |
}
|
|
619 | 1 |
p++; |
620 | 1 |
continue; |
621 |
}
|
|
622 | 1 |
break; |
623 |
}
|
|
624 | 1 |
p++; |
625 | 1 |
if (p[-2] == '*' && p - 3 != t.ptr) |
626 | 1 |
break; |
627 |
}
|
|
628 | 1 |
if (commentToken) |
629 |
{
|
|
630 | 1 |
t.loc = startLoc; |
631 | 1 |
t.value = TOK.comment; |
632 | 1 |
return; |
633 |
}
|
|
634 | 1 |
else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) |
635 |
{
|
|
636 |
// if /** but not /**/
|
|
637 | 1 |
getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); |
638 | 1 |
lastDocLine = scanloc.linnum; |
639 |
}
|
|
640 | 1 |
continue; |
641 | 1 |
case '/': // do // style comments |
642 | 1 |
startLoc = loc(); |
643 | 1 |
while (1) |
644 |
{
|
|
645 | 1 |
const c = *++p; |
646 | 1 |
switch (c) |
647 |
{
|
|
648 | 1 |
case '\n': |
649 | 1 |
break; |
650 |
case '\r': |
|
651 |
if (p[1] == '\n') |
|
652 |
p++; |
|
653 |
break; |
|
654 |
case 0: |
|
655 |
case 0x1A: |
|
656 |
if (commentToken) |
|
657 |
{
|
|
658 |
p = end; |
|
659 |
t.loc = startLoc; |
|
660 |
t.value = TOK.comment; |
|
661 |
return; |
|
662 |
}
|
|
663 |
if (doDocComment && t.ptr[2] == '/') |
|
664 |
{
|
|
665 |
getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); |
|
666 |
lastDocLine = scanloc.linnum; |
|
667 |
}
|
|
668 |
p = end; |
|
669 |
t.loc = loc(); |
|
670 |
t.value = TOK.endOfFile; |
|
671 |
return; |
|
672 | 1 |
default: |
673 | 1 |
if (c & 0x80) |
674 |
{
|
|
675 | 1 |
const u = decodeUTF(); |
676 | 1 |
if (u == PS || u == LS) |
677 |
break; |
|
678 |
}
|
|
679 | 1 |
continue; |
680 |
}
|
|
681 | 1 |
break; |
682 |
}
|
|
683 | 1 |
if (commentToken) |
684 |
{
|
|
685 | 1 |
p++; |
686 | 1 |
endOfLine(); |
687 | 1 |
t.loc = startLoc; |
688 | 1 |
t.value = TOK.comment; |
689 | 1 |
return; |
690 |
}
|
|
691 | 1 |
if (doDocComment && t.ptr[2] == '/') |
692 |
{
|
|
693 | 1 |
getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); |
694 | 1 |
lastDocLine = scanloc.linnum; |
695 |
}
|
|
696 | 1 |
p++; |
697 | 1 |
endOfLine(); |
698 | 1 |
continue; |
699 | 1 |
case '+': |
700 |
{
|
|
701 | 1 |
int nest; |
702 | 1 |
startLoc = loc(); |
703 | 1 |
p++; |
704 | 1 |
nest = 1; |
705 | 1 |
while (1) |
706 |
{
|
|
707 | 1 |
char c = *p; |
708 | 1 |
switch (c) |
709 |
{
|
|
710 | 1 |
case '/': |
711 | 1 |
p++; |
712 | 1 |
if (*p == '+') |
713 |
{
|
|
714 |
p++; |
|
715 |
nest++; |
|
716 |
}
|
|
717 | 1 |
continue; |
718 | 1 |
case '+': |
719 | 1 |
p++; |
720 | 1 |
if (*p == '/') |
721 |
{
|
|
722 | 1 |
p++; |
723 | 1 |
if (--nest == 0) |
724 | 1 |
break; |
725 |
}
|
|
726 | 1 |
continue; |
727 |
case '\r': |
|
728 |
p++; |
|
729 |
if (*p != '\n') |
|
730 |
endOfLine(); |
|
731 |
continue; |
|
732 | 1 |
case '\n': |
733 | 1 |
endOfLine(); |
734 | 1 |
p++; |
735 | 1 |
continue; |
736 |
case 0: |
|
737 |
case 0x1A: |
|
738 |
error("unterminated /+ +/ comment"); |
|
739 |
p = end; |
|
740 |
t.loc = loc(); |
|
741 |
t.value = TOK.endOfFile; |
|
742 |
return; |
|
743 | 1 |
default: |
744 | 1 |
if (c & 0x80) |
745 |
{
|
|
746 | 1 |
uint u = decodeUTF(); |
747 | 1 |
if (u == PS || u == LS) |
748 |
endOfLine(); |
|
749 |
}
|
|
750 | 1 |
p++; |
751 | 1 |
continue; |
752 |
}
|
|
753 | 1 |
break; |
754 |
}
|
|
755 | 1 |
if (commentToken) |
756 |
{
|
|
757 | 1 |
t.loc = startLoc; |
758 | 1 |
t.value = TOK.comment; |
759 | 1 |
return; |
760 |
}
|
|
761 | 1 |
if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) |
762 |
{
|
|
763 |
// if /++ but not /++/
|
|
764 | 1 |
getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); |
765 | 1 |
lastDocLine = scanloc.linnum; |
766 |
}
|
|
767 | 1 |
continue; |
768 |
}
|
|
769 | 1 |
default: |
770 | 1 |
break; |
771 |
}
|
|
772 | 1 |
t.value = TOK.div; |
773 | 1 |
return; |
774 | 1 |
case '.': |
775 | 1 |
p++; |
776 | 1 |
if (isdigit(*p)) |
777 |
{
|
|
778 |
/* Note that we don't allow ._1 and ._ as being
|
|
779 |
* valid floating point numbers.
|
|
780 |
*/
|
|
781 | 1 |
p--; |
782 | 1 |
t.value = inreal(t); |
783 |
}
|
|
784 | 1 |
else if (p[0] == '.') |
785 |
{
|
|
786 | 1 |
if (p[1] == '.') |
787 |
{
|
|
788 | 1 |
p += 2; |
789 | 1 |
t.value = TOK.dotDotDot; |
790 |
}
|
|
791 |
else
|
|
792 |
{
|
|
793 | 1 |
p++; |
794 | 1 |
t.value = TOK.slice; |
795 |
}
|
|
796 |
}
|
|
797 |
else
|
|
798 | 1 |
t.value = TOK.dot; |
799 | 1 |
return; |
800 | 1 |
case '&': |
801 | 1 |
p++; |
802 | 1 |
if (*p == '=') |
803 |
{
|
|
804 | 1 |
p++; |
805 | 1 |
t.value = TOK.andAssign; |
806 |
}
|
|
807 | 1 |
else if (*p == '&') |
808 |
{
|
|
809 | 1 |
p++; |
810 | 1 |
t.value = TOK.andAnd; |
811 |
}
|
|
812 |
else
|
|
813 | 1 |
t.value = TOK.and; |
814 | 1 |
return; |
815 | 1 |
case '|': |
816 | 1 |
p++; |
817 | 1 |
if (*p == '=') |
818 |
{
|
|
819 | 1 |
p++; |
820 | 1 |
t.value = TOK.orAssign; |
821 |
}
|
|
822 | 1 |
else if (*p == '|') |
823 |
{
|
|
824 | 1 |
p++; |
825 | 1 |
t.value = TOK.orOr; |
826 |
}
|
|
827 |
else
|
|
828 | 1 |
t.value = TOK.or; |
829 | 1 |
return; |
830 | 1 |
case '-': |
831 | 1 |
p++; |
832 | 1 |
if (*p == '=') |
833 |
{
|
|
834 | 1 |
p++; |
835 | 1 |
t.value = TOK.minAssign; |
836 |
}
|
|
837 | 1 |
else if (*p == '-') |
838 |
{
|
|
839 | 1 |
p++; |
840 | 1 |
t.value = TOK.minusMinus; |
841 |
}
|
|
842 |
else
|
|
843 | 1 |
t.value = TOK.min; |
844 | 1 |
return; |
845 | 1 |
case '+': |
846 | 1 |
p++; |
847 | 1 |
if (*p == '=') |
848 |
{
|
|
849 | 1 |
p++; |
850 | 1 |
t.value = TOK.addAssign; |
851 |
}
|
|
852 | 1 |
else if (*p == '+') |
853 |
{
|
|
854 | 1 |
p++; |
855 | 1 |
t.value = TOK.plusPlus; |
856 |
}
|
|
857 |
else
|
|
858 | 1 |
t.value = TOK.add; |
859 | 1 |
return; |
860 | 1 |
case '<': |
861 | 1 |
p++; |
862 | 1 |
if (*p == '=') |
863 |
{
|
|
864 | 1 |
p++; |
865 | 1 |
t.value = TOK.lessOrEqual; // <= |
866 |
}
|
|
867 | 1 |
else if (*p == '<') |
868 |
{
|
|
869 | 1 |
p++; |
870 | 1 |
if (*p == '=') |
871 |
{
|
|
872 | 1 |
p++; |
873 | 1 |
t.value = TOK.leftShiftAssign; // <<= |
874 |
}
|
|
875 |
else
|
|
876 | 1 |
t.value = TOK.leftShift; // << |
877 |
}
|
|
878 |
else
|
|
879 | 1 |
t.value = TOK.lessThan; // < |
880 | 1 |
return; |
881 | 1 |
case '>': |
882 | 1 |
p++; |
883 | 1 |
if (*p == '=') |
884 |
{
|
|
885 | 1 |
p++; |
886 | 1 |
t.value = TOK.greaterOrEqual; // >= |
887 |
}
|
|
888 | 1 |
else if (*p == '>') |
889 |
{
|
|
890 | 1 |
p++; |
891 | 1 |
if (*p == '=') |
892 |
{
|
|
893 | 1 |
p++; |
894 | 1 |
t.value = TOK.rightShiftAssign; // >>= |
895 |
}
|
|
896 | 1 |
else if (*p == '>') |
897 |
{
|
|
898 | 1 |
p++; |
899 | 1 |
if (*p == '=') |
900 |
{
|
|
901 | 1 |
p++; |
902 | 1 |
t.value = TOK.unsignedRightShiftAssign; // >>>= |
903 |
}
|
|
904 |
else
|
|
905 | 1 |
t.value = TOK.unsignedRightShift; // >>> |
906 |
}
|
|
907 |
else
|
|
908 | 1 |
t.value = TOK.rightShift; // >> |
909 |
}
|
|
910 |
else
|
|
911 | 1 |
t.value = TOK.greaterThan; // > |
912 | 1 |
return; |
913 | 1 |
case '!': |
914 | 1 |
p++; |
915 | 1 |
if (*p == '=') |
916 |
{
|
|
917 | 1 |
p++; |
918 | 1 |
t.value = TOK.notEqual; // != |
919 |
}
|
|
920 |
else
|
|
921 | 1 |
t.value = TOK.not; // ! |
922 | 1 |
return; |
923 | 1 |
case '=': |
924 | 1 |
p++; |
925 | 1 |
if (*p == '=') |
926 |
{
|
|
927 | 1 |
p++; |
928 | 1 |
t.value = TOK.equal; // == |
929 |
}
|
|
930 | 1 |
else if (*p == '>') |
931 |
{
|
|
932 | 1 |
p++; |
933 | 1 |
t.value = TOK.goesTo; // => |
934 |
}
|
|
935 |
else
|
|
936 | 1 |
t.value = TOK.assign; // = |
937 | 1 |
return; |
938 | 1 |
case '~': |
939 | 1 |
p++; |
940 | 1 |
if (*p == '=') |
941 |
{
|
|
942 | 1 |
p++; |
943 | 1 |
t.value = TOK.concatenateAssign; // ~= |
944 |
}
|
|
945 |
else
|
|
946 | 1 |
t.value = TOK.tilde; // ~ |
947 | 1 |
return; |
948 | 1 |
case '^': |
949 | 1 |
p++; |
950 | 1 |
if (*p == '^') |
951 |
{
|
|
952 | 1 |
p++; |
953 | 1 |
if (*p == '=') |
954 |
{
|
|
955 | 1 |
p++; |
956 | 1 |
t.value = TOK.powAssign; // ^^= |
957 |
}
|
|
958 |
else
|
|
959 | 1 |
t.value = TOK.pow; // ^^ |
960 |
}
|
|
961 | 1 |
else if (*p == '=') |
962 |
{
|
|
963 | 1 |
p++; |
964 | 1 |
t.value = TOK.xorAssign; // ^= |
965 |
}
|
|
966 |
else
|
|
967 | 1 |
t.value = TOK.xor; // ^ |
968 | 1 |
return; |
969 | 1 |
case '(': |
970 | 1 |
p++; |
971 | 1 |
t.value = TOK.leftParentheses; |
972 | 1 |
return; |
973 | 1 |
case ')': |
974 | 1 |
p++; |
975 | 1 |
t.value = TOK.rightParentheses; |
976 | 1 |
return; |
977 | 1 |
case '[': |
978 | 1 |
p++; |
979 | 1 |
t.value = TOK.leftBracket; |
980 | 1 |
return; |
981 | 1 |
case ']': |
982 | 1 |
p++; |
983 | 1 |
t.value = TOK.rightBracket; |
984 | 1 |
return; |
985 | 1 |
case '{': |
986 | 1 |
p++; |
987 | 1 |
t.value = TOK.leftCurly; |
988 | 1 |
return; |
989 | 1 |
case '}': |
990 | 1 |
p++; |
991 | 1 |
t.value = TOK.rightCurly; |
992 | 1 |
return; |
993 | 1 |
case '?': |
994 | 1 |
p++; |
995 | 1 |
t.value = TOK.question; |
996 | 1 |
return; |
997 | 1 |
case ',': |
998 | 1 |
p++; |
999 | 1 |
t.value = TOK.comma; |
1000 | 1 |
return; |
1001 | 1 |
case ';': |
1002 | 1 |
p++; |
1003 | 1 |
t.value = TOK.semicolon; |
1004 | 1 |
return; |
1005 | 1 |
case ':': |
1006 | 1 |
p++; |
1007 | 1 |
t.value = TOK.colon; |
1008 | 1 |
return; |
1009 | 1 |
case '$': |
1010 | 1 |
p++; |
1011 | 1 |
t.value = TOK.dollar; |
1012 | 1 |
return; |
1013 | 1 |
case '@': |
1014 | 1 |
p++; |
1015 | 1 |
t.value = TOK.at; |
1016 | 1 |
return; |
1017 | 1 |
case '*': |
1018 | 1 |
p++; |
1019 | 1 |
if (*p == '=') |
1020 |
{
|
|
1021 | 1 |
p++; |
1022 | 1 |
t.value = TOK.mulAssign; |
1023 |
}
|
|
1024 |
else
|
|
1025 | 1 |
t.value = TOK.mul; |
1026 | 1 |
return; |
1027 | 1 |
case '%': |
1028 | 1 |
p++; |
1029 | 1 |
if (*p == '=') |
1030 |
{
|
|
1031 | 1 |
p++; |
1032 | 1 |
t.value = TOK.modAssign; |
1033 |
}
|
|
1034 |
else
|
|
1035 | 1 |
t.value = TOK.mod; |
1036 | 1 |
return; |
1037 | 1 |
case '#': |
1038 |
{
|
|
1039 | 1 |
p++; |
1040 | 1 |
Token n; |
1041 | 1 |
scan(&n); |
1042 | 1 |
if (n.value == TOK.identifier) |
1043 |
{
|
|
1044 | 1 |
if (n.ident == Id.line) |
1045 |
{
|
|
1046 | 1 |
poundLine(); |
1047 | 1 |
continue; |
1048 |
}
|
|
1049 |
else
|
|
1050 |
{
|
|
1051 | 1 |
const locx = loc(); |
1052 | 1 |
warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars()); |
1053 |
}
|
|
1054 |
}
|
|
1055 | 1 |
else if (n.value == TOK.if_) |
1056 |
{
|
|
1057 | 1 |
error("C preprocessor directive `#if` is not supported, use `version` or `static if`"); |
1058 |
}
|
|
1059 | 1 |
t.value = TOK.pound; |
1060 | 1 |
return; |
1061 |
}
|
|
1062 | 1 |
default: |
1063 |
{
|
|
1064 | 1 |
dchar c = *p; |
1065 | 1 |
if (c & 0x80) |
1066 |
{
|
|
1067 | 1 |
c = decodeUTF(); |
1068 |
// Check for start of unicode identifier
|
|
1069 | 1 |
if (isUniAlpha(c)) |
1070 | 1 |
goto case_ident; |
1071 |
if (c == PS || c == LS) |
|
1072 |
{
|
|
1073 |
endOfLine(); |
|
1074 |
p++; |
|
1075 |
continue; |
|
1076 |
}
|
|
1077 |
}
|
|
1078 | 1 |
if (c < 0x80 && isprint(c)) |
1079 | 1 |
error("character '%c' is not a valid token", c); |
1080 |
else
|
|
1081 |
error("character 0x%02x is not a valid token", c); |
|
1082 | 1 |
p++; |
1083 | 1 |
continue; |
1084 |
}
|
|
1085 |
}
|
|
1086 |
}
|
|
1087 |
}
|
|
1088 |
|
|
1089 |
final Token* peek(Token* ct) |
|
1090 |
{
|
|
1091 | 1 |
Token* t; |
1092 | 1 |
if (ct.next) |
1093 | 1 |
t = ct.next; |
1094 |
else
|
|
1095 |
{
|
|
1096 | 1 |
t = allocateToken(); |
1097 | 1 |
scan(t); |
1098 | 1 |
ct.next = t; |
1099 |
}
|
|
1100 | 1 |
return t; |
1101 |
}
|
|
1102 |
|
|
1103 |
/*********************************
|
|
1104 |
* tk is on the opening (.
|
|
1105 |
* Look ahead and return token that is past the closing ).
|
|
1106 |
*/
|
|
1107 |
final Token* peekPastParen(Token* tk) |
|
1108 |
{
|
|
1109 |
//printf("peekPastParen()\n");
|
|
1110 | 1 |
int parens = 1; |
1111 | 1 |
int curlynest = 0; |
1112 | 1 |
while (1) |
1113 |
{
|
|
1114 | 1 |
tk = peek(tk); |
1115 |
//tk.print();
|
|
1116 | 1 |
switch (tk.value) |
1117 |
{
|
|
1118 | 1 |
case TOK.leftParentheses: |
1119 | 1 |
parens++; |
1120 | 1 |
continue; |
1121 | 1 |
case TOK.rightParentheses: |
1122 | 1 |
--parens; |
1123 | 1 |
if (parens) |
1124 | 1 |
continue; |
1125 | 1 |
tk = peek(tk); |
1126 | 1 |
break; |
1127 | 1 |
case TOK.leftCurly: |
1128 | 1 |
curlynest++; |
1129 | 1 |
continue; |
1130 | 1 |
case TOK.rightCurly: |
1131 | 1 |
if (--curlynest >= 0) |
1132 | 1 |
continue; |
1133 |
break; |
|
1134 | 1 |
case TOK.semicolon: |
1135 | 1 |
if (curlynest) |
1136 | 1 |
continue; |
1137 |
break; |
|
1138 | 1 |
case TOK.endOfFile: |
1139 | 1 |
break; |
1140 | 1 |
default: |
1141 | 1 |
continue; |
1142 |
}
|
|
1143 | 1 |
return tk; |
1144 |
}
|
|
1145 |
}
|
|
1146 |
|
|
1147 |
/*******************************************
|
|
1148 |
* Parse escape sequence.
|
|
1149 |
*/
|
|
1150 |
private uint escapeSequence() |
|
1151 |
{
|
|
1152 | 1 |
return Lexer.escapeSequence(token.loc, p); |
1153 |
}
|
|
1154 |
|
|
1155 |
/**
|
|
1156 |
Parse the given string literal escape sequence into a single character.
|
|
1157 |
Params:
|
|
1158 |
loc = the location of the current token
|
|
1159 |
sequence = pointer to string with escape sequence to parse. this is a reference
|
|
1160 |
variable that is also used to return the position after the sequence
|
|
1161 |
Returns:
|
|
1162 |
the escaped sequence as a single character
|
|
1163 |
*/
|
|
1164 |
private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence) |
|
1165 |
{
|
|
1166 | 1 |
const(char)* p = sequence; // cache sequence reference on stack |
1167 | 1 |
scope(exit) sequence = p; |
1168 |
|
|
1169 | 1 |
uint c = *p; |
1170 | 1 |
int ndigits; |
1171 | 1 |
switch (c) |
1172 |
{
|
|
1173 | 1 |
case '\'': |
1174 | 1 |
case '"': |
1175 | 1 |
case '?': |
1176 | 1 |
case '\\': |
1177 |
Lconsume: |
|
1178 | 1 |
p++; |
1179 | 1 |
break; |
1180 | 1 |
case 'a': |
1181 | 1 |
c = 7; |
1182 | 1 |
goto Lconsume; |
1183 | 1 |
case 'b': |
1184 | 1 |
c = 8; |
1185 | 1 |
goto Lconsume; |
1186 | 1 |
case 'f': |
1187 | 1 |
c = 12; |
1188 | 1 |
goto Lconsume; |
1189 | 1 |
case 'n': |
1190 | 1 |
c = 10; |
1191 | 1 |
goto Lconsume; |
1192 | 1 |
case 'r': |
1193 | 1 |
c = 13; |
1194 | 1 |
goto Lconsume; |
1195 | 1 |
case 't': |
1196 | 1 |
c = 9; |
1197 | 1 |
goto Lconsume; |
1198 | 1 |
case 'v': |
1199 | 1 |
c = 11; |
1200 | 1 |
goto Lconsume; |
1201 | 1 |
case 'u': |
1202 | 1 |
ndigits = 4; |
1203 | 1 |
goto Lhex; |
1204 | 1 |
case 'U': |
1205 | 1 |
ndigits = 8; |
1206 | 1 |
goto Lhex; |
1207 | 1 |
case 'x': |
1208 | 1 |
ndigits = 2; |
1209 |
Lhex: |
|
1210 | 1 |
p++; |
1211 | 1 |
c = *p; |
1212 | 1 |
if (ishex(cast(char)c)) |
1213 |
{
|
|
1214 | 1 |
uint v = 0; |
1215 | 1 |
int n = 0; |
1216 | 1 |
while (1) |
1217 |
{
|
|
1218 | 1 |
if (isdigit(cast(char)c)) |
1219 | 1 |
c -= '0'; |
1220 | 1 |
else if (islower(c)) |
1221 | 1 |
c -= 'a' - 10; |
1222 |
else
|
|
1223 | 1 |
c -= 'A' - 10; |
1224 | 1 |
v = v * 16 + c; |
1225 | 1 |
c = *++p; |
1226 | 1 |
if (++n == ndigits) |
1227 | 1 |
break; |
1228 | 1 |
if (!ishex(cast(char)c)) |
1229 |
{
|
|
1230 | 1 |
.error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits); |
1231 | 1 |
break; |
1232 |
}
|
|
1233 |
}
|
|
1234 | 1 |
if (ndigits != 2 && !utf_isValidDchar(v)) |
1235 |
{
|
|
1236 | 1 |
.error(loc, "invalid UTF character \\U%08x", v); |
1237 | 1 |
v = '?'; // recover with valid UTF character |
1238 |
}
|
|
1239 | 1 |
c = v; |
1240 |
}
|
|
1241 |
else
|
|
1242 |
{
|
|
1243 | 1 |
.error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c); |
1244 | 1 |
p++; |
1245 |
}
|
|
1246 | 1 |
break; |
1247 | 1 |
case '&': |
1248 |
// named character entity
|
|
1249 | 1 |
for (const idstart = ++p; 1; p++) |
1250 |
{
|
|
1251 | 1 |
switch (*p) |
1252 |
{
|
|
1253 | 1 |
case ';': |
1254 | 1 |
c = HtmlNamedEntity(idstart, p - idstart); |
1255 | 1 |
if (c == ~0) |
1256 |
{
|
|
1257 | 1 |
.error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart); |
1258 | 1 |
c = '?'; |
1259 |
}
|
|
1260 | 1 |
p++; |
1261 | 1 |
break; |
1262 | 1 |
default: |
1263 | 1 |
if (isalpha(*p) || (p != idstart && isdigit(*p))) |
1264 | 1 |
continue; |
1265 | 1 |
.error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart); |
1266 | 1 |
c = '?'; |
1267 | 1 |
break; |
1268 |
}
|
|
1269 | 1 |
break; |
1270 |
}
|
|
1271 | 1 |
break; |
1272 |
case 0: |
|
1273 |
case 0x1A: |
|
1274 |
// end of file
|
|
1275 |
c = '\\'; |
|
1276 |
break; |
|
1277 | 1 |
default: |
1278 | 1 |
if (isoctal(cast(char)c)) |
1279 |
{
|
|
1280 | 1 |
uint v = 0; |
1281 | 1 |
int n = 0; |
1282 |
do
|
|
1283 |
{
|
|
1284 | 1 |
v = v * 8 + (c - '0'); |
1285 | 1 |
c = *++p; |
1286 |
}
|
|
1287 | 1 |
while (++n < 3 && isoctal(cast(char)c)); |
1288 | 1 |
c = v; |
1289 | 1 |
if (c > 0xFF) |
1290 | 1 |
.error(loc, "escape octal sequence \\%03o is larger than \\377", c); |
1291 |
}
|
|
1292 |
else
|
|
1293 |
{
|
|
1294 |
.error(loc, "undefined escape sequence \\%c", c); |
|
1295 |
p++; |
|
1296 |
}
|
|
1297 | 1 |
break; |
1298 |
}
|
|
1299 | 1 |
return c; |
1300 |
}
|
|
1301 |
|
|
1302 |
/**
|
|
1303 |
Lex a wysiwyg string. `p` must be pointing to the first character before the
|
|
1304 |
contents of the string literal. The character pointed to by `p` will be used as
|
|
1305 |
the terminating character (i.e. backtick or double-quote).
|
|
1306 |
Params:
|
|
1307 |
result = pointer to the token that accepts the result
|
|
1308 |
*/
|
|
1309 |
private void wysiwygStringConstant(Token* result) |
|
1310 |
{
|
|
1311 | 1 |
result.value = TOK.string_; |
1312 | 1 |
Loc start = loc(); |
1313 | 1 |
auto terminator = p[0]; |
1314 | 1 |
p++; |
1315 | 1 |
stringbuffer.setsize(0); |
1316 | 1 |
while (1) |
1317 |
{
|
|
1318 | 1 |
dchar c = p[0]; |
1319 | 1 |
p++; |
1320 | 1 |
switch (c) |
1321 |
{
|
|
1322 | 1 |
case '\n': |
1323 | 1 |
endOfLine(); |
1324 | 1 |
break; |
1325 |
case '\r': |
|
1326 |
if (p[0] == '\n') |
|
1327 |
continue; // ignore |
|
1328 |
c = '\n'; // treat EndOfLine as \n character |
|
1329 |
endOfLine(); |
|
1330 |
break; |
|
1331 |
case 0: |
|
1332 |
case 0x1A: |
|
1333 |
error("unterminated string constant starting at %s", start.toChars()); |
|
1334 |
result.setString(); |
|
1335 |
// rewind `p` so it points to the EOF character
|
|
1336 |
p--; |
|
1337 |
return; |
|
1338 | 1 |
default: |
1339 | 1 |
if (c == terminator) |
1340 |
{
|
|
1341 | 1 |
result.setString(stringbuffer); |
1342 | 1 |
stringPostfix(result); |
1343 | 1 |
return; |
1344 |
}
|
|
1345 | 1 |
else if (c & 0x80) |
1346 |
{
|
|
1347 | 1 |
p--; |
1348 | 1 |
const u = decodeUTF(); |
1349 | 1 |
p++; |
1350 | 1 |
if (u == PS || u == LS) |
1351 |
endOfLine(); |
|
1352 | 1 |
stringbuffer.writeUTF8(u); |
1353 | 1 |
continue; |
1354 |
}
|
|
1355 | 1 |
break; |
1356 |
}
|
|
1357 | 1 |
stringbuffer.writeByte(c); |
1358 |
}
|
|
1359 |
}
|
|
1360 |
|
|
1361 |
/**************************************
|
|
1362 |
* Lex hex strings:
|
|
1363 |
* x"0A ae 34FE BD"
|
|
1364 |
*/
|
|
1365 |
private TOK hexStringConstant(Token* t) |
|
1366 |
{
|
|
1367 | 1 |
Loc start = loc(); |
1368 | 1 |
uint n = 0; |
1369 | 1 |
uint v = ~0; // dead assignment, needed to suppress warning |
1370 | 1 |
p++; |
1371 | 1 |
stringbuffer.setsize(0); |
1372 | 1 |
while (1) |
1373 |
{
|
|
1374 | 1 |
dchar c = *p++; |
1375 | 1 |
switch (c) |
1376 |
{
|
|
1377 | 1 |
case ' ': |
1378 | 1 |
case '\t': |
1379 | 1 |
case '\v': |
1380 | 1 |
case '\f': |
1381 | 1 |
continue; // skip white space |
1382 |
case '\r': |
|
1383 |
if (*p == '\n') |
|
1384 |
continue; // ignore '\r' if followed by '\n' |
|
1385 |
// Treat isolated '\r' as if it were a '\n'
|
|
1386 |
goto case '\n'; |
|
1387 |
case '\n': |
|
1388 |
endOfLine(); |
|
1389 |
continue; |
|
1390 |
case 0: |
|
1391 |
case 0x1A: |
|
1392 |
error("unterminated string constant starting at %s", start.toChars()); |
|
1393 |
t.setString(); |
|
1394 |
// decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
|
|
1395 |
p--; |
|
1396 |
return TOK.hexadecimalString; |
|
1397 | 1 |
case '"': |
1398 | 1 |
if (n & 1) |
1399 |
{
|
|
1400 | 1 |
error("odd number (%d) of hex characters in hex string", n); |
1401 | 1 |
stringbuffer.writeByte(v); |
1402 |
}
|
|
1403 | 1 |
t.setString(stringbuffer); |
1404 | 1 |
stringPostfix(t); |
1405 | 1 |
return TOK.hexadecimalString; |
1406 | 1 |
default: |
1407 | 1 |
if (c >= '0' && c <= '9') |
1408 | 1 |
c -= '0'; |
1409 | 1 |
else if (c >= 'a' && c <= 'f') |
1410 |
c -= 'a' - 10; |
|
1411 | 1 |
else if (c >= 'A' && c <= 'F') |
1412 | 1 |
c -= 'A' - 10; |
1413 | 1 |
else if (c & 0x80) |
1414 |
{
|
|
1415 |
p--; |
|
1416 |
const u = decodeUTF(); |
|
1417 |
p++; |
|
1418 |
if (u == PS || u == LS) |
|
1419 |
endOfLine(); |
|
1420 |
else
|
|
1421 |
error("non-hex character \\u%04x in hex string", u); |
|
1422 |
}
|
|
1423 |
else
|
|
1424 | 1 |
error("non-hex character '%c' in hex string", c); |
1425 | 1 |
if (n & 1) |
1426 |
{
|
|
1427 | 1 |
v = (v << 4) | c; |
1428 | 1 |
stringbuffer.writeByte(v); |
1429 |
}
|
|
1430 |
else
|
|
1431 | 1 |
v = c; |
1432 | 1 |
n++; |
1433 | 1 |
break; |
1434 |
}
|
|
1435 |
}
|
|
1436 |
assert(0); // see bug 15731 |
|
1437 |
}
|
|
1438 |
|
|
1439 |
/**
|
|
1440 |
Lex a delimited string. Some examples of delimited strings are:
|
|
1441 |
---
|
|
1442 |
q"(foo(xxx))" // "foo(xxx)"
|
|
1443 |
q"[foo$(LPAREN)]" // "foo$(LPAREN)"
|
|
1444 |
q"/foo]/" // "foo]"
|
|
1445 |
q"HERE
|
|
1446 |
foo
|
|
1447 |
HERE" // "foo\n"
|
|
1448 |
---
|
|
1449 |
It is assumed that `p` points to the opening double-quote '"'.
|
|
1450 |
Params:
|
|
1451 |
result = pointer to the token that accepts the result
|
|
1452 |
*/
|
|
1453 |
private void delimitedStringConstant(Token* result) |
|
1454 |
{
|
|
1455 | 1 |
result.value = TOK.string_; |
1456 | 1 |
Loc start = loc(); |
1457 | 1 |
dchar delimleft = 0; |
1458 | 1 |
dchar delimright = 0; |
1459 | 1 |
uint nest = 1; |
1460 | 1 |
uint nestcount = ~0; // dead assignment, needed to suppress warning |
1461 | 1 |
Identifier hereid = null; |
1462 | 1 |
uint blankrol = 0; |
1463 | 1 |
uint startline = 0; |
1464 | 1 |
p++; |
1465 | 1 |
stringbuffer.setsize(0); |
1466 | 1 |
while (1) |
1467 |
{
|
|
1468 | 1 |
dchar c = *p++; |
1469 |
//printf("c = '%c'\n", c);
|
|
1470 | 1 |
switch (c) |
1471 |
{
|
|
1472 | 1 |
case '\n': |
1473 |
Lnextline: |
|
1474 | 1 |
endOfLine(); |
1475 | 1 |
startline = 1; |
1476 | 1 |
if (blankrol) |
1477 |
{
|
|
1478 | 1 |
blankrol = 0; |
1479 | 1 |
continue; |
1480 |
}
|
|
1481 | 1 |
if (hereid) |
1482 |
{
|
|
1483 | 1 |
stringbuffer.writeUTF8(c); |
1484 | 1 |
continue; |
1485 |
}
|
|
1486 | 1 |
break; |
1487 |
case '\r': |
|
1488 |
if (*p == '\n') |
|
1489 |
continue; // ignore |
|
1490 |
c = '\n'; // treat EndOfLine as \n character |
|
1491 |
goto Lnextline; |
|
1492 | 1 |
case 0: |
1493 | 1 |
case 0x1A: |
1494 | 1 |
error("unterminated delimited string constant starting at %s", start.toChars()); |
1495 | 1 |
result.setString(); |
1496 |
// decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
|
|
1497 | 1 |
p--; |
1498 | 1 |
return; |
1499 | 1 |
default: |
1500 | 1 |
if (c & 0x80) |
1501 |
{
|
|
1502 |
p--; |
|
1503 |
c = decodeUTF(); |
|
1504 |
p++; |
|
1505 |
if (c == PS || c == LS) |
|
1506 |
goto Lnextline; |
|
1507 |
}
|
|
1508 | 1 |
break; |
1509 |
}
|
|
1510 | 1 |
if (delimleft == 0) |
1511 |
{
|
|
1512 | 1 |
delimleft = c; |
1513 | 1 |
nest = 1; |
1514 | 1 |
nestcount = 1; |
1515 | 1 |
if (c == '(') |
1516 | 1 |
delimright = ')'; |
1517 | 1 |
else if (c == '{') |
1518 | 1 |
delimright = '}'; |
1519 | 1 |
else if (c == '[') |
1520 | 1 |
delimright = ']'; |
1521 | 1 |
else if (c == '<') |
1522 | 1 |
delimright = '>'; |
1523 | 1 |
else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) |
1524 |
{
|
|
1525 |
// Start of identifier; must be a heredoc
|
|
1526 | 1 |
Token tok; |
1527 | 1 |
p--; |
1528 | 1 |
scan(&tok); // read in heredoc identifier |
1529 | 1 |
if (tok.value != TOK.identifier) |
1530 |
{
|
|
1531 |
error("identifier expected for heredoc, not %s", tok.toChars()); |
|
1532 |
delimright = c; |
|
1533 |
}
|
|
1534 |
else
|
|
1535 |
{
|
|
1536 | 1 |
hereid = tok.ident; |
1537 |
//printf("hereid = '%s'\n", hereid.toChars());
|
|
1538 | 1 |
blankrol = 1; |
1539 |
}
|
|
1540 | 1 |
nest = 0; |
1541 |
}
|
|
1542 |
else
|
|
1543 |
{
|
|
1544 | 1 |
delimright = c; |
1545 | 1 |
nest = 0; |
1546 | 1 |
if (isspace(c)) |
1547 | 1 |
error("delimiter cannot be whitespace"); |
1548 |
}
|
|
1549 |
}
|
|
1550 |
else
|
|
1551 |
{
|
|
1552 | 1 |
if (blankrol) |
1553 |
{
|
|
1554 | 1 |
error("heredoc rest of line should be blank"); |
1555 | 1 |
blankrol = 0; |
1556 | 1 |
continue; |
1557 |
}
|
|
1558 | 1 |
if (nest == 1) |
1559 |
{
|
|
1560 | 1 |
if (c == delimleft) |
1561 | 1 |
nestcount++; |
1562 | 1 |
else if (c == delimright) |
1563 |
{
|
|
1564 | 1 |
nestcount--; |
1565 | 1 |
if (nestcount == 0) |
1566 | 1 |
goto Ldone; |
1567 |
}
|
|
1568 |
}
|
|
1569 | 1 |
else if (c == delimright) |
1570 | 1 |
goto Ldone; |
1571 | 1 |
if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid) |
1572 |
{
|
|
1573 | 1 |
Token tok; |
1574 | 1 |
auto psave = p; |
1575 | 1 |
p--; |
1576 | 1 |
scan(&tok); // read in possible heredoc identifier |
1577 |
//printf("endid = '%s'\n", tok.ident.toChars());
|
|
1578 | 1 |
if (tok.value == TOK.identifier && tok.ident is hereid) |
1579 |
{
|
|
1580 |
/* should check that rest of line is blank
|
|
1581 |
*/
|
|
1582 | 1 |
goto Ldone; |
1583 |
}
|
|
1584 | 1 |
p = psave; |
1585 |
}
|
|
1586 | 1 |
stringbuffer.writeUTF8(c); |
1587 | 1 |
startline = 0; |
1588 |
}
|
|
1589 |
}
|
|
1590 |
Ldone: |
|
1591 | 1 |
if (*p == '"') |
1592 | 1 |
p++; |
1593 | 1 |
else if (hereid) |
1594 | 1 |
error("delimited string must end in %s\"", hereid.toChars()); |
1595 |
else
|
|
1596 | 1 |
error("delimited string must end in %c\"", delimright); |
1597 | 1 |
result.setString(stringbuffer); |
1598 | 1 |
stringPostfix(result); |
1599 |
}
|
|
1600 |
|
|
1601 |
/**
|
|
1602 |
Lex a token string. Some examples of token strings are:
|
|
1603 |
---
|
|
1604 |
q{ foo(xxx) } // " foo(xxx) "
|
|
1605 |
q{foo$(LPAREN)} // "foo$(LPAREN)"
|
|
1606 |
q{{foo}"}"} // "{foo}"}""
|
|
1607 |
---
|
|
1608 |
It is assumed that `p` points to the opening curly-brace '{'.
|
|
1609 |
Params:
|
|
1610 |
result = pointer to the token that accepts the result
|
|
1611 |
*/
|
|
1612 |
private void tokenStringConstant(Token* result) |
|
1613 |
{
|
|
1614 | 1 |
result.value = TOK.string_; |
1615 |
|
|
1616 | 1 |
uint nest = 1; |
1617 | 1 |
const start = loc(); |
1618 | 1 |
const pstart = ++p; |
1619 | 1 |
inTokenStringConstant++; |
1620 | 1 |
scope(exit) inTokenStringConstant--; |
1621 | 1 |
while (1) |
1622 |
{
|
|
1623 | 1 |
Token tok; |
1624 | 1 |
scan(&tok); |
1625 | 1 |
switch (tok.value) |
1626 |
{
|
|
1627 | 1 |
case TOK.leftCurly: |
1628 | 1 |
nest++; |
1629 | 1 |
continue; |
1630 | 1 |
case TOK.rightCurly: |
1631 | 1 |
if (--nest == 0) |
1632 |
{
|
|
1633 | 1 |
result.setString(pstart, p - 1 - pstart); |
1634 | 1 |
stringPostfix(result); |
1635 | 1 |
return; |
1636 |
}
|
|
1637 | 1 |
continue; |
1638 | 1 |
case TOK.endOfFile: |
1639 | 1 |
error("unterminated token string constant starting at %s", start.toChars()); |
1640 | 1 |
result.setString(); |
1641 | 1 |
return; |
1642 | 1 |
default: |
1643 | 1 |
continue; |
1644 |
}
|
|
1645 |
}
|
|
1646 |
}
|
|
1647 |
|
|
1648 |
/**
|
|
1649 |
Scan a double-quoted string while building the processed string value by
|
|
1650 |
handling escape sequences. The result is returned in the given `t` token.
|
|
1651 |
This function assumes that `p` currently points to the opening double-quote
|
|
1652 |
of the string.
|
|
1653 |
Params:
|
|
1654 |
t = the token to set the resulting string to
|
|
1655 |
*/
|
|
1656 |
private void escapeStringConstant(Token* t) |
|
1657 |
{
|
|
1658 | 1 |
t.value = TOK.string_; |
1659 |
|
|
1660 | 1 |
const start = loc(); |
1661 | 1 |
p++; |
1662 | 1 |
stringbuffer.setsize(0); |
1663 | 1 |
while (1) |
1664 |
{
|
|
1665 | 1 |
dchar c = *p++; |
1666 | 1 |
switch (c) |
1667 |
{
|
|
1668 | 1 |
case '\\': |
1669 | 1 |
switch (*p) |
1670 |
{
|
|
1671 | 1 |
case 'u': |
1672 | 1 |
case 'U': |
1673 | 1 |
case '&': |
1674 | 1 |
c = escapeSequence(); |
1675 | 1 |
stringbuffer.writeUTF8(c); |
1676 | 1 |
continue; |
1677 | 1 |
default: |
1678 | 1 |
c = escapeSequence(); |
1679 | 1 |
break; |
1680 |
}
|
|
1681 | 1 |
break; |
1682 | 1 |
case '\n': |
1683 | 1 |
endOfLine(); |
1684 | 1 |
break; |
1685 |
case '\r': |
|
1686 |
if (*p == '\n') |
|
1687 |
continue; // ignore |
|
1688 |
c = '\n'; // treat EndOfLine as \n character |
|
1689 |
endOfLine(); |
|
1690 |
break; |
|
1691 | 1 |
case '"': |
1692 | 1 |
t.setString(stringbuffer); |
1693 | 1 |
stringPostfix(t); |
1694 | 1 |
return; |
1695 | 1 |
case 0: |
1696 | 1 |
case 0x1A: |
1697 |
// decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
|
|
1698 | 1 |
p--; |
1699 | 1 |
error("unterminated string constant starting at %s", start.toChars()); |
1700 | 1 |
t.setString(); |
1701 | 1 |
return; |
1702 | 1 |
default: |
1703 | 1 |
if (c & 0x80) |
1704 |
{
|
|
1705 | 1 |
p--; |
1706 | 1 |
c = decodeUTF(); |
1707 | 1 |
if (c == LS || c == PS) |
1708 |
{
|
|
1709 |
c = '\n'; |
|
1710 |
endOfLine(); |
|
1711 |
}
|
|
1712 | 1 |
p++; |
1713 | 1 |
stringbuffer.writeUTF8(c); |
1714 | 1 |
continue; |
1715 |
}
|
|
1716 | 1 |
break; |
1717 |
}
|
|
1718 | 1 |
stringbuffer.writeByte(c); |
1719 |
}
|
|
1720 |
}
|
|
1721 |
|
|
1722 |
/**************************************
|
|
1723 |
*/
|
|
1724 |
private TOK charConstant(Token* t) |
|
1725 |
{
|
|
1726 | 1 |
TOK tk = TOK.charLiteral; |
1727 |
//printf("Lexer::charConstant\n");
|
|
1728 | 1 |
p++; |
1729 | 1 |
dchar c = *p++; |
1730 | 1 |
switch (c) |
1731 |
{
|
|
1732 | 1 |
case '\\': |
1733 | 1 |
switch (*p) |
1734 |
{
|
|
1735 | 1 |
case 'u': |
1736 | 1 |
t.unsvalue = escapeSequence(); |
1737 | 1 |
tk = TOK.wcharLiteral; |
1738 | 1 |
break; |
1739 | 1 |
case 'U': |
1740 | 1 |
case '&': |
1741 | 1 |
t.unsvalue = escapeSequence(); |
1742 | 1 |
tk = TOK.dcharLiteral; |
1743 | 1 |
break; |
1744 | 1 |
default: |
1745 | 1 |
t.unsvalue = escapeSequence(); |
1746 | 1 |
break; |
1747 |
}
|
|
1748 | 1 |
break; |
1749 | 1 |
case '\n': |
1750 |
L1: |
|
1751 | 1 |
endOfLine(); |
1752 | 1 |
goto case; |
1753 | 1 |
case '\r': |
1754 | 1 |
goto case '\''; |
1755 |
case 0: |
|
1756 |
case 0x1A: |
|
1757 |
// decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
|
|
1758 |
p--; |
|
1759 |
goto case; |
|
1760 | 1 |
case '\'': |
1761 | 1 |
error("unterminated character constant"); |
1762 | 1 |
t.unsvalue = '?'; |
1763 | 1 |
return tk; |
1764 | 1 |
default: |
1765 | 1 |
if (c & 0x80) |
1766 |
{
|
|
1767 | 1 |
p--; |
1768 | 1 |
c = decodeUTF(); |
1769 | 1 |
p++; |
1770 | 1 |
if (c == LS || c == PS) |
1771 |
goto L1; |
|
1772 | 1 |
if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) |
1773 | 1 |
tk = TOK.wcharLiteral; |
1774 |
else
|
|
1775 | 1 |
tk = TOK.dcharLiteral; |
1776 |
}
|
|
1777 | 1 |
t.unsvalue = c; |
1778 | 1 |
break; |
1779 |
}
|
|
1780 | 1 |
if (*p != '\'') |
1781 |
{
|
|
1782 | 1 |
while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' && |
1783 | 1 |
*p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}') |
1784 |
{
|
|
1785 | 1 |
if (*p & 0x80) |
1786 |
{
|
|
1787 |
const s = p; |
|
1788 |
c = decodeUTF(); |
|
1789 |
if (c == LS || c == PS) |
|
1790 |
{
|
|
1791 |
p = s; |
|
1792 |
break; |
|
1793 |
}
|
|
1794 |
}
|
|
1795 | 1 |
p++; |
1796 |
}
|
|
1797 |
|
|
1798 | 1 |
if (*p == '\'') |
1799 |
{
|
|
1800 | 1 |
error("character constant has multiple characters"); |
1801 | 1 |
p++; |
1802 |
}
|
|
1803 |
else
|
|
1804 | 1 |
error("unterminated character constant"); |
1805 | 1 |
t.unsvalue = '?'; |
1806 | 1 |
return tk; |
1807 |
}
|
|
1808 | 1 |
p++; |
1809 | 1 |
return tk; |
1810 |
}
|
|
1811 |
|
|
1812 |
/***************************************
|
|
1813 |
* Get postfix of string literal.
|
|
1814 |
*/
|
|
1815 |
private void stringPostfix(Token* t) pure @nogc |
|
1816 |
{
|
|
1817 | 1 |
switch (*p) |
1818 |
{
|
|
1819 | 1 |
case 'c': |
1820 | 1 |
case 'w': |
1821 | 1 |
case 'd': |
1822 | 1 |
t.postfix = *p; |
1823 | 1 |
p++; |
1824 | 1 |
break; |
1825 | 1 |
default: |
1826 | 1 |
t.postfix = 0; |
1827 | 1 |
break; |
1828 |
}
|
|
1829 |
}
|
|
1830 |
|
|
1831 |
/**************************************
|
|
1832 |
* Read in a number.
|
|
1833 |
* If it's an integer, store it in tok.TKutok.Vlong.
|
|
1834 |
* integers can be decimal, octal or hex
|
|
1835 |
* Handle the suffixes U, UL, LU, L, etc.
|
|
1836 |
* If it's double, store it in tok.TKutok.Vdouble.
|
|
1837 |
* Returns:
|
|
1838 |
* TKnum
|
|
1839 |
* TKdouble,...
|
|
1840 |
*/
|
|
1841 |
private TOK number(Token* t) |
|
1842 |
{
|
|
1843 | 1 |
int base = 10; |
1844 | 1 |
const start = p; |
1845 | 1 |
uinteger_t n = 0; // unsigned >=64 bit integer type |
1846 | 1 |
int d; |
1847 | 1 |
bool err = false; |
1848 | 1 |
bool overflow = false; |
1849 | 1 |
bool anyBinaryDigitsNoSingleUS = false; |
1850 | 1 |
bool anyHexDigitsNoSingleUS = false; |
1851 | 1 |
dchar c = *p; |
1852 | 1 |
if (c == '0') |
1853 |
{
|
|
1854 | 1 |
++p; |
1855 | 1 |
c = *p; |
1856 | 1 |
switch (c) |
1857 |
{
|
|
1858 | 1 |
case '0': |
1859 | 1 |
case '1': |
1860 | 1 |
case '2': |
1861 | 1 |
case '3': |
1862 | 1 |
case '4': |
1863 | 1 |
case '5': |
1864 | 1 |
case '6': |
1865 | 1 |
case '7': |
1866 | 1 |
case '8': |
1867 | 1 |
case '9': |
1868 | 1 |
base = 8; |
1869 | 1 |
break; |
1870 | 1 |
case 'x': |
1871 | 1 |
case 'X': |
1872 | 1 |
++p; |
1873 | 1 |
base = 16; |
1874 | 1 |
break; |
1875 | 1 |
case 'b': |
1876 | 1 |
case 'B': |
1877 | 1 |
++p; |
1878 | 1 |
base = 2; |
1879 | 1 |
break; |
1880 | 1 |
case '.': |
1881 | 1 |
if (p[1] == '.') |
1882 | 1 |
goto Ldone; // if ".." |
1883 | 1 |
if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80) |
1884 | 1 |
goto Ldone; // if ".identifier" or ".unicode" |
1885 | 1 |
goto Lreal; // '.' is part of current token |
1886 | 1 |
case 'i': |
1887 | 1 |
case 'f': |
1888 | 1 |
case 'F': |
1889 | 1 |
goto Lreal; |
1890 | 1 |
case '_': |
1891 | 1 |
++p; |
1892 | 1 |
base = 8; |
1893 | 1 |
break; |
1894 | 1 |
case 'L': |
1895 | 1 |
if (p[1] == 'i') |
1896 | 1 |
goto Lreal; |
1897 | 1 |
break; |
1898 | 1 |
default: |
1899 | 1 |
break; |
1900 |
}
|
|
1901 |
}
|
|
1902 | 1 |
while (1) |
1903 |
{
|
|
1904 | 1 |
c = *p; |
1905 | 1 |
switch (c) |
1906 |
{
|
|
1907 | 1 |
case '0': |
1908 | 1 |
case '1': |
1909 | 1 |
case '2': |
1910 | 1 |
case '3': |
1911 | 1 |
case '4': |
1912 | 1 |
case '5': |
1913 | 1 |
case '6': |
1914 | 1 |
case '7': |
1915 | 1 |
case '8': |
1916 | 1 |
case '9': |
1917 | 1 |
++p; |
1918 | 1 |
d = c - '0'; |
1919 | 1 |
break; |
1920 | 1 |
case 'a': |
1921 | 1 |
case 'b': |
1922 | 1 |
case 'c': |
1923 | 1 |
case 'd': |
1924 | 1 |
case 'e': |
1925 | 1 |
case 'f': |
1926 | 1 |
case 'A': |
1927 | 1 |
case 'B': |
1928 | 1 |
case 'C': |
1929 | 1 |
case 'D': |
1930 | 1 |
case 'E': |
1931 | 1 |
case 'F': |
1932 | 1 |
++p; |
1933 | 1 |
if (base != 16) |
1934 |
{
|
|
1935 | 1 |
if (c == 'e' || c == 'E' || c == 'f' || c == 'F') |
1936 | 1 |
goto Lreal; |
1937 |
}
|
|
1938 | 1 |
if (c >= 'a') |
1939 | 1 |
d = c + 10 - 'a'; |
1940 |
else
|
|
1941 | 1 |
d = c + 10 - 'A'; |
1942 | 1 |
break; |
1943 | 1 |
case 'L': |
1944 | 1 |
if (p[1] == 'i') |
1945 | 1 |
goto Lreal; |
1946 | 1 |
goto Ldone; |
1947 | 1 |
case '.': |
1948 | 1 |
if (p[1] == '.') |
1949 | 1 |
goto Ldone; // if ".." |
1950 | 1 |
if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)) |
1951 | 1 |
goto Ldone; // if ".identifier" or ".unicode" |
1952 | 1 |
if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80)) |
1953 | 1 |
goto Ldone; // if ".identifier" or ".unicode" |
1954 | 1 |
if (base == 2) |
1955 | 1 |
goto Ldone; // if ".identifier" or ".unicode" |
1956 | 1 |
goto Lreal; // otherwise as part of a floating point literal |
1957 | 1 |
case 'p': |
1958 | 1 |
case 'P': |
1959 | 1 |
case 'i': |
1960 |
Lreal: |
|
1961 | 1 |
p = start; |
1962 | 1 |
return inreal(t); |
1963 | 1 |
case '_': |
1964 | 1 |
++p; |
1965 | 1 |
continue; |
1966 | 1 |
default: |
1967 | 1 |
goto Ldone; |
1968 |
}
|
|
1969 |
// got a digit here, set any necessary flags, check for errors
|
|
1970 | 1 |
anyHexDigitsNoSingleUS = true; |
1971 | 1 |
anyBinaryDigitsNoSingleUS = true; |
1972 | 1 |
if (!err && d >= base) |
1973 |
{
|
|
1974 | 1 |
error("%s digit expected, not `%c`", base == 2 ? "binary".ptr : |
1975 | 1 |
base == 8 ? "octal".ptr : |
1976 | 1 |
"decimal".ptr, c); |
1977 | 1 |
err = true; |
1978 |
}
|
|
1979 |
// Avoid expensive overflow check if we aren't at risk of overflow
|
|
1980 | 1 |
if (n <= 0x0FFF_FFFF_FFFF_FFFFUL) |
1981 | 1 |
n = n * base + d; |
1982 |
else
|
|
1983 |
{
|
|
1984 |
import core.checkedint : mulu, addu; |
|
1985 |
|
|
1986 | 1 |
n = mulu(n, base, overflow); |
1987 | 1 |
n = addu(n, d, overflow); |
1988 |
}
|
|
1989 |
}
|
|
1990 |
Ldone: |
|
1991 | 1 |
if (overflow && !err) |
1992 |
{
|
|
1993 | 1 |
error("integer overflow"); |
1994 | 1 |
err = true; |
1995 |
}
|
|
1996 | 1 |
if ((base == 2 && !anyBinaryDigitsNoSingleUS) || |
1997 | 1 |
(base == 16 && !anyHexDigitsNoSingleUS)) |
1998 | 1 |
error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start); |
1999 |
enum FLAGS : int |
|
2000 |
{
|
|
2001 |
none = 0, |
|
2002 |
decimal = 1, // decimal |
|
2003 |
unsigned = 2, // u or U suffix |
|
2004 |
long_ = 4, // L suffix |
|
2005 |
}
|
|
2006 |
|
|
2007 | 1 |
FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none; |
2008 |
// Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
2009 | 1 |
const psuffix = p; |
2010 | 1 |
while (1) |
2011 |
{
|
|
2012 | 1 |
FLAGS f; |
2013 | 1 |
switch (*p) |
2014 |
{
|
|
2015 | 1 |
case 'U': |
2016 | 1 |
case 'u': |
2017 | 1 |
f = FLAGS.unsigned; |
2018 | 1 |
goto L1; |
2019 | 1 |
case 'l': |
2020 | 1 |
f = FLAGS.long_; |
2021 | 1 |
error("lower case integer suffix 'l' is not allowed. Please use 'L' instead"); |
2022 | 1 |
goto L1; |
2023 | 1 |
case 'L': |
2024 | 1 |
f = FLAGS.long_; |
2025 |
L1: |
|
2026 | 1 |
p++; |
2027 | 1 |
if ((flags & f) && !err) |
2028 |
{
|
|
2029 | 1 |
error("unrecognized token"); |
2030 | 1 |
err = true; |
2031 |
}
|
|
2032 | 1 |
flags = cast(FLAGS)(flags | f); |
2033 | 1 |
continue; |
2034 | 1 |
default: |
2035 | 1 |
break; |
2036 |
}
|
|
2037 | 1 |
break; |
2038 |
}
|
|
2039 | 1 |
if (base == 8 && n >= 8) |
2040 |
{
|
|
2041 | 1 |
if (err) |
2042 |
// can't translate invalid octal value, just show a generic message
|
|
2043 | 1 |
error("octal literals larger than 7 are no longer supported"); |
2044 |
else
|
|
2045 | 1 |
error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead", |
2046 |
n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix); |
|
2047 |
}
|
|
2048 | 1 |
TOK result; |
2049 | 1 |
switch (flags) |
2050 |
{
|
|
2051 | 1 |
case FLAGS.none: |
2052 |
/* Octal or Hexadecimal constant.
|
|
2053 |
* First that fits: int, uint, long, ulong
|
|
2054 |
*/
|
|
2055 | 1 |
if (n & 0x8000000000000000L) |
2056 | 1 |
result = TOK.uns64Literal; |
2057 | 1 |
else if (n & 0xFFFFFFFF00000000L) |
2058 | 1 |
result = TOK.int64Literal; |
2059 | 1 |
else if (n & 0x80000000) |
2060 | 1 |
result = TOK.uns32Literal; |
2061 |
else
|
|
2062 | 1 |
result = TOK.int32Literal; |
2063 | 1 |
break; |
2064 | 1 |
case FLAGS.decimal: |
2065 |
/* First that fits: int, long, long long
|
|
2066 |
*/
|
|
2067 | 1 |
if (n & 0x8000000000000000L) |
2068 |
{
|
|
2069 | 1 |
result = TOK.uns64Literal; |
2070 |
}
|
|
2071 | 1 |
else if (n & 0xFFFFFFFF80000000L) |
2072 | 1 |
result = TOK.int64Literal; |
2073 |
else
|
|
2074 | 1 |
result = TOK.int32Literal; |
2075 | 1 |
break; |
2076 | 1 |
case FLAGS.unsigned: |
2077 | 1 |
case FLAGS.decimal | FLAGS.unsigned: |
2078 |
/* First that fits: uint, ulong
|
|
2079 |
*/
|
|
2080 | 1 |
if (n & 0xFFFFFFFF00000000L) |
2081 | 1 |
result = TOK.uns64Literal; |
2082 |
else
|
|
2083 | 1 |
result = TOK.uns32Literal; |
2084 | 1 |
break; |
2085 | 1 |
case FLAGS.decimal | FLAGS.long_: |
2086 | 1 |
if (n & 0x8000000000000000L) |
2087 |
{
|
|
2088 | 1 |
if (!err) |
2089 |
{
|
|
2090 | 1 |
error("signed integer overflow"); |
2091 | 1 |
err = true; |
2092 |
}
|
|
2093 | 1 |
result = TOK.uns64Literal; |
2094 |
}
|
|
2095 |
else
|
|
2096 | 1 |
result = TOK.int64Literal; |
2097 | 1 |
break; |
2098 | 1 |
case FLAGS.long_: |
2099 | 1 |
if (n & 0x8000000000000000L) |
2100 | 1 |
result = TOK.uns64Literal; |
2101 |
else
|
|
2102 | 1 |
result = TOK.int64Literal; |
2103 | 1 |
break; |
2104 | 1 |
case FLAGS.unsigned | FLAGS.long_: |
2105 | 1 |
case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_: |
2106 | 1 |
result = TOK.uns64Literal; |
2107 | 1 |
break; |
2108 |
default: |
|
2109 |
debug
|
|
2110 |
{
|
|
2111 |
printf("%x\n", flags); |
|
2112 |
}
|
|
2113 |
assert(0); |
|
2114 |
}
|
|
2115 | 1 |
t.unsvalue = n; |
2116 | 1 |
return result; |
2117 |
}
|
|
2118 |
|
|
2119 |
/**************************************
|
|
2120 |
* Read in characters, converting them to real.
|
|
2121 |
* Bugs:
|
|
2122 |
* Exponent overflow not detected.
|
|
2123 |
* Too much requested precision is not detected.
|
|
2124 |
*/
|
|
2125 |
private TOK inreal(Token* t) |
|
2126 |
{
|
|
2127 |
//printf("Lexer::inreal()\n");
|
|
2128 |
debug
|
|
2129 |
{
|
|
2130 | 1 |
assert(*p == '.' || isdigit(*p)); |
2131 |
}
|
|
2132 | 1 |
bool isWellformedString = true; |
2133 | 1 |
stringbuffer.setsize(0); |
2134 | 1 |
auto pstart = p; |
2135 | 1 |
bool hex = false; |
2136 | 1 |
dchar c = *p++; |
2137 |
// Leading '0x'
|
|
2138 | 1 |
if (c == '0') |
2139 |
{
|
|
2140 | 1 |
c = *p++; |
2141 | 1 |
if (c == 'x' || c == 'X') |
2142 |
{
|
|
2143 | 1 |
hex = true; |
2144 | 1 |
c = *p++; |
2145 |
}
|
|
2146 |
}
|
|
2147 |
// Digits to left of '.'
|
|
2148 | 1 |
while (1) |
2149 |
{
|
|
2150 | 1 |
if (c == '.') |
2151 |
{
|
|
2152 | 1 |
c = *p++; |
2153 | 1 |
break; |
2154 |
}
|
|
2155 | 1 |
if (isdigit(c) || (hex && isxdigit(c)) || c == '_') |
2156 |
{
|
|
2157 | 1 |
c = *p++; |
2158 | 1 |
continue; |
2159 |
}
|
|
2160 | 1 |
break; |
2161 |
}
|
|
2162 |
// Digits to right of '.'
|
|
2163 | 1 |
while (1) |
2164 |
{
|
|
2165 | 1 |
if (isdigit(c) || (hex && isxdigit(c)) || c == '_') |
2166 |
{
|
|
2167 | 1 |
c = *p++; |
2168 | 1 |
continue; |
2169 |
}
|
|
2170 | 1 |
break; |
2171 |
}
|
|
2172 | 1 |
if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P'))) |
2173 |
{
|
|
2174 | 1 |
c = *p++; |
2175 | 1 |
if (c == '-' || c == '+') |
2176 |
{
|
|
2177 | 1 |
c = *p++; |
2178 |
}
|
|
2179 | 1 |
bool anyexp = false; |
2180 | 1 |
while (1) |
2181 |
{
|
|
2182 | 1 |
if (isdigit(c)) |
2183 |
{
|
|
2184 | 1 |
anyexp = true; |
2185 | 1 |
c = *p++; |
2186 | 1 |
continue; |
2187 |
}
|
|
2188 | 1 |
if (c == '_') |
2189 |
{
|
|
2190 | 1 |
c = *p++; |
2191 | 1 |
continue; |
2192 |
}
|
|
2193 | 1 |
if (!anyexp) |
2194 |
{
|
|
2195 | 1 |
error("missing exponent"); |
2196 | 1 |
isWellformedString = false; |
2197 |
}
|
|
2198 | 1 |
break; |
2199 |
}
|
|
2200 |
}
|
|
2201 | 1 |
else if (hex) |
2202 |
{
|
|
2203 | 1 |
error("exponent required for hex float"); |
2204 | 1 |
isWellformedString = false; |
2205 |
}
|
|
2206 | 1 |
--p; |
2207 | 1 |
while (pstart < p) |
2208 |
{
|
|
2209 | 1 |
if (*pstart != '_') |
2210 | 1 |
stringbuffer.writeByte(*pstart); |
2211 | 1 |
++pstart; |
2212 |
}
|
|
2213 | 1 |
stringbuffer.writeByte(0); |
2214 | 1 |
auto sbufptr = cast(const(char)*)stringbuffer[].ptr; |
2215 | 1 |
TOK result; |
2216 | 1 |
bool isOutOfRange = false; |
2217 | 1 |
t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero); |
2218 | 1 |
switch (*p) |
2219 |
{
|
|
2220 | 1 |
case 'F': |
2221 | 1 |
case 'f': |
2222 | 1 |
if (isWellformedString && !isOutOfRange) |
2223 | 1 |
isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr); |
2224 | 1 |
result = TOK.float32Literal; |
2225 | 1 |
p++; |
2226 | 1 |
break; |
2227 | 1 |
default: |
2228 | 1 |
if (isWellformedString && !isOutOfRange) |
2229 | 1 |
isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr); |
2230 | 1 |
result = TOK.float64Literal; |
2231 | 1 |
break; |
2232 |
case 'l': |
|
2233 |
error("use 'L' suffix instead of 'l'"); |
|
2234 |
goto case 'L'; |
|
2235 | 1 |
case 'L': |
2236 | 1 |
result = TOK.float80Literal; |
2237 | 1 |
p++; |
2238 | 1 |
break; |
2239 |
}
|
|
2240 | 1 |
if (*p == 'i' || *p == 'I') |
2241 |
{
|
|
2242 | 1 |
if (*p == 'I') |
2243 | 1 |
error("use 'i' suffix instead of 'I'"); |
2244 | 1 |
p++; |
2245 | 1 |
switch (result) |
2246 |
{
|
|
2247 | 1 |
case TOK.float32Literal: |
2248 | 1 |
result = TOK.imaginary32Literal; |
2249 | 1 |
break; |
2250 | 1 |
case TOK.float64Literal: |
2251 | 1 |
result = TOK.imaginary64Literal; |
2252 | 1 |
break; |
2253 | 1 |
case TOK.float80Literal: |
2254 | 1 |
result = TOK.imaginary80Literal; |
2255 | 1 |
break; |
2256 |
default: |
|
2257 |
break; |
|
2258 |
}
|
|
2259 |
}
|
|
2260 | 1 |
const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal); |
2261 | 1 |
if (isOutOfRange && !isLong) |
2262 |
{
|
|
2263 | 1 |
const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : ""; |
2264 | 1 |
error(scanloc, "number `%s%s` is not representable", sbufptr, suffix); |
2265 |
}
|
|
2266 |
debug
|
|
2267 |
{
|
|
2268 | 1 |
switch (result) |
2269 |
{
|
|
2270 | 1 |
case TOK.float32Literal: |
2271 | 1 |
case TOK.float64Literal: |
2272 | 1 |
case TOK.float80Literal: |
2273 | 1 |
case TOK.imaginary32Literal: |
2274 | 1 |
case TOK.imaginary64Literal: |
2275 | 1 |
case TOK.imaginary80Literal: |
2276 | 1 |
break; |
2277 |
default: |
|
2278 |
assert(0); |
|
2279 |
}
|
|
2280 |
}
|
|
2281 | 1 |
return result; |
2282 |
}
|
|
2283 |
|
|
2284 |
final Loc loc() pure @nogc |
|
2285 |
{
|
|
2286 | 1 |
scanloc.charnum = cast(uint)(1 + p - line); |
2287 | 1 |
return scanloc; |
2288 |
}
|
|
2289 |
|
|
2290 |
final void error(const(char)* format, ...) |
|
2291 |
{
|
|
2292 | 1 |
va_list args; |
2293 | 1 |
va_start(args, format); |
2294 | 1 |
.verror(token.loc, format, args); |
2295 | 1 |
va_end(args); |
2296 |
}
|
|
2297 |
|
|
2298 |
final void error(const ref Loc loc, const(char)* format, ...) |
|
2299 |
{
|
|
2300 | 1 |
va_list args; |
2301 | 1 |
va_start(args, format); |
2302 | 1 |
.verror(loc, format, args); |
2303 | 1 |
va_end(args); |
2304 |
}
|
|
2305 |
|
|
2306 |
final void deprecation(const(char)* format, ...) |
|
2307 |
{
|
|
2308 | 1 |
va_list args; |
2309 | 1 |
va_start(args, format); |
2310 | 1 |
.vdeprecation(token.loc, format, args); |
2311 | 1 |
va_end(args); |
2312 |
}
|
|
2313 |
|
|
2314 |
/*********************************************
|
|
2315 |
* parse:
|
|
2316 |
* #line linnum [filespec]
|
|
2317 |
* also allow __LINE__ for linnum, and __FILE__ for filespec
|
|
2318 |
*/
|
|
2319 |
private void poundLine() |
|
2320 |
{
|
|
2321 | 1 |
auto linnum = this.scanloc.linnum; |
2322 | 1 |
const(char)* filespec = null; |
2323 | 1 |
const loc = this.loc(); |
2324 | 1 |
Token tok; |
2325 | 1 |
scan(&tok); |
2326 | 1 |
if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal) |
2327 |
{
|
|
2328 | 1 |
const lin = cast(int)(tok.unsvalue - 1); |
2329 | 1 |
if (lin != tok.unsvalue - 1) |
2330 | 1 |
error("line number `%lld` out of range", cast(ulong)tok.unsvalue); |
2331 |
else
|
|
2332 | 1 |
linnum = lin; |
2333 |
}
|
|
2334 | 1 |
else if (tok.value == TOK.line) |
2335 |
{
|
|
2336 |
}
|
|
2337 |
else
|
|
2338 | 1 |
goto Lerr; |
2339 | 1 |
while (1) |
2340 |
{
|
|
2341 | 1 |
switch (*p) |
2342 |
{
|
|
2343 |
case 0: |
|
2344 |
case 0x1A: |
|
2345 | 1 |
case '\n': |
2346 |
Lnewline: |
|
2347 | 1 |
if (!inTokenStringConstant) |
2348 |
{
|
|
2349 | 1 |
this.scanloc.linnum = linnum; |
2350 | 1 |
if (filespec) |
2351 | 1 |
this.scanloc.filename = filespec; |
2352 |
}
|
|
2353 | 1 |
return; |
2354 |
case '\r': |
|
2355 |
p++; |
|
2356 |
if (*p != '\n') |
|
2357 |
{
|
|
2358 |
p--; |
|
2359 |
goto Lnewline; |
|
2360 |
}
|
|
2361 |
continue; |
|
2362 | 1 |
case ' ': |
2363 | 1 |
case '\t': |
2364 | 1 |
case '\v': |
2365 | 1 |
case '\f': |
2366 | 1 |
p++; |
2367 | 1 |
continue; // skip white space |
2368 | 1 |
case '_': |
2369 | 1 |
if (memcmp(p, "__FILE__".ptr, 8) == 0) |
2370 |
{
|
|
2371 | 1 |
p += 8; |
2372 | 1 |
filespec = mem.xstrdup(scanloc.filename); |
2373 | 1 |
continue; |
2374 |
}
|
|
2375 | 1 |
goto Lerr; |
2376 | 1 |
case '"': |
2377 | 1 |
if (filespec) |
2378 | 1 |
goto Lerr; |
2379 | 1 |
stringbuffer.setsize(0); |
2380 | 1 |
p++; |
2381 | 1 |
while (1) |
2382 |
{
|
|
2383 | 1 |
uint c; |
2384 | 1 |
c = *p; |
2385 | 1 |
switch (c) |
2386 |
{
|
|
2387 |
case '\n': |
|
2388 |
case '\r': |
|
2389 |
case 0: |
|
2390 |
case 0x1A: |
|
2391 |
goto Lerr; |
|
2392 | 1 |
case '"': |
2393 | 1 |
stringbuffer.writeByte(0); |
2394 | 1 |
filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr); |
2395 | 1 |
p++; |
2396 | 1 |
break; |
2397 | 1 |
default: |
2398 | 1 |
if (c & 0x80) |
2399 |
{
|
|
2400 |
uint u = decodeUTF(); |
|
2401 |
if (u == PS || u == LS) |
|
2402 |
goto Lerr; |
|
2403 |
}
|
|
2404 | 1 |
stringbuffer.writeByte(c); |
2405 | 1 |
p++; |
2406 | 1 |
continue; |
2407 |
}
|
|
2408 | 1 |
break; |
2409 |
}
|
|
2410 | 1 |
continue; |
2411 |
default: |
|
2412 |
if (*p & 0x80) |
|
2413 |
{
|
|
2414 |
uint u = decodeUTF(); |
|
2415 |
if (u == PS || u == LS) |
|
2416 |
goto Lnewline; |
|
2417 |
}
|
|
2418 |
goto Lerr; |
|
2419 |
}
|
|
2420 |
}
|
|
2421 |
Lerr: |
|
2422 | 1 |
error(loc, "#line integer [\"filespec\"]\\n expected"); |
2423 |
}
|
|
2424 |
|
|
2425 |
/********************************************
|
|
2426 |
* Decode UTF character.
|
|
2427 |
* Issue error messages for invalid sequences.
|
|
2428 |
* Return decoded character, advance p to last character in UTF sequence.
|
|
2429 |
*/
|
|
2430 |
private uint decodeUTF() |
|
2431 |
{
|
|
2432 | 1 |
const s = p; |
2433 | 1 |
assert(*s & 0x80); |
2434 |
// Check length of remaining string up to 4 UTF-8 characters
|
|
2435 | 1 |
size_t len; |
2436 | 1 |
for (len = 1; len < 4 && s[len]; len++) |
2437 |
{
|
|
2438 |
}
|
|
2439 | 1 |
size_t idx = 0; |
2440 | 1 |
dchar u; |
2441 | 1 |
const msg = utf_decodeChar(s[0 .. len], idx, u); |
2442 | 1 |
p += idx - 1; |
2443 | 1 |
if (msg) |
2444 |
{
|
|
2445 | 1 |
error("%.*s", cast(int)msg.length, msg.ptr); |
2446 |
}
|
|
2447 | 1 |
return u; |
2448 |
}
|
|
2449 |
|
|
2450 |
/***************************************************
|
|
2451 |
* Parse doc comment embedded between t.ptr and p.
|
|
2452 |
* Remove trailing blanks and tabs from lines.
|
|
2453 |
* Replace all newlines with \n.
|
|
2454 |
* Remove leading comment character from each line.
|
|
2455 |
* Decide if it's a lineComment or a blockComment.
|
|
2456 |
* Append to previous one for this token.
|
|
2457 |
*
|
|
2458 |
* If newParagraph is true, an extra newline will be
|
|
2459 |
* added between adjoining doc comments.
|
|
2460 |
*/
|
|
2461 |
private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure |
|
2462 |
{
|
|
2463 |
/* ct tells us which kind of comment it is: '/', '*', or '+'
|
|
2464 |
*/
|
|
2465 | 1 |
const ct = t.ptr[2]; |
2466 |
/* Start of comment text skips over / * *, / + +, or / / /
|
|
2467 |
*/
|
|
2468 | 1 |
const(char)* q = t.ptr + 3; // start of comment text |
2469 | 1 |
const(char)* qend = p; |
2470 | 1 |
if (ct == '*' || ct == '+') |
2471 | 1 |
qend -= 2; |
2472 |
/* Scan over initial row of ****'s or ++++'s or ////'s
|
|
2473 |
*/
|
|
2474 | 1 |
for (; q < qend; q++) |
2475 |
{
|
|
2476 | 1 |
if (*q != ct) |
2477 | 1 |
break; |
2478 |
}
|
|
2479 |
/* Remove leading spaces until start of the comment
|
|
2480 |
*/
|
|
2481 | 1 |
int linestart = 0; |
2482 | 1 |
if (ct == '/') |
2483 |
{
|
|
2484 | 1 |
while (q < qend && (*q == ' ' || *q == '\t')) |
2485 | 1 |
++q; |
2486 |
}
|
|
2487 | 1 |
else if (q < qend) |
2488 |
{
|
|
2489 | 1 |
if (*q == '\r') |
2490 |
{
|
|
2491 |
++q; |
|
2492 |
if (q < qend && *q == '\n') |
|
2493 |
++q; |
|
2494 |
linestart = 1; |
|
2495 |
}
|
|
2496 | 1 |
else if (*q == '\n') |
2497 |
{
|
|
2498 | 1 |
++q; |
2499 | 1 |
linestart = 1; |
2500 |
}
|
|
2501 |
}
|
|
2502 |
/* Remove trailing row of ****'s or ++++'s
|
|
2503 |
*/
|
|
2504 | 1 |
if (ct != '/') |
2505 |
{
|
|
2506 | 1 |
for (; q < qend; qend--) |
2507 |
{
|
|
2508 | 1 |
if (qend[-1] != ct) |
2509 | 1 |
break; |
2510 |
}
|
|
2511 |
}
|
|
2512 |
/* Comment is now [q .. qend].
|
|
2513 |
* Canonicalize it into buf[].
|
|
2514 |
*/
|
|
2515 | 1 |
OutBuffer buf; |
2516 |
|
|
2517 |
void trimTrailingWhitespace() |
|
2518 |
{
|
|
2519 | 1 |
const s = buf[]; |
2520 | 1 |
auto len = s.length; |
2521 | 1 |
while (len && (s[len - 1] == ' ' || s[len - 1] == '\t')) |
2522 | 1 |
--len; |
2523 | 1 |
buf.setsize(len); |
2524 |
}
|
|
2525 |
|
|
2526 | 1 |
for (; q < qend; q++) |
2527 |
{
|
|
2528 | 1 |
char c = *q; |
2529 | 1 |
switch (c) |
2530 |
{
|
|
2531 | 1 |
case '*': |
2532 | 1 |
case '+': |
2533 | 1 |
if (linestart && c == ct) |
2534 |
{
|
|
2535 | 1 |
linestart = 0; |
2536 |
/* Trim preceding whitespace up to preceding \n
|
|
2537 |
*/
|
|
2538 | 1 |
trimTrailingWhitespace(); |
2539 | 1 |
continue; |
2540 |
}
|
|
2541 | 1 |
break; |
2542 | 1 |
case ' ': |
2543 | 1 |
case '\t': |
2544 | 1 |
break; |
2545 |
case '\r': |
|
2546 |
if (q[1] == '\n') |
|
2547 |
continue; // skip the \r |
|
2548 |
goto Lnewline; |
|
2549 | 1 |
default: |
2550 | 1 |
if (c == 226) |
2551 |
{
|
|
2552 |
// If LS or PS
|
|
2553 | 1 |
if (q[1] == 128 && (q[2] == 168 || q[2] == 169)) |
2554 |
{
|
|
2555 |
q += 2; |
|
2556 |
goto Lnewline; |
|
2557 |
}
|
|
2558 |
}
|
|
2559 | 1 |
linestart = 0; |
2560 | 1 |
break; |
2561 |
Lnewline: |
|
2562 |
c = '\n'; // replace all newlines with \n |
|
2563 |
goto case; |
|
2564 | 1 |
case '\n': |
2565 | 1 |
linestart = 1; |
2566 |
/* Trim trailing whitespace
|
|
2567 |
*/
|
|
2568 | 1 |
trimTrailingWhitespace(); |
2569 | 1 |
break; |
2570 |
}
|
|
2571 | 1 |
buf.writeByte(c); |
2572 |
}
|
|
2573 |
/* Trim trailing whitespace (if the last line does not have newline)
|
|
2574 |
*/
|
|
2575 | 1 |
trimTrailingWhitespace(); |
2576 |
|
|
2577 |
// Always end with a newline
|
|
2578 | 1 |
const s = buf[]; |
2579 | 1 |
if (s.length == 0 || s[$ - 1] != '\n') |
2580 | 1 |
buf.writeByte('\n'); |
2581 |
|
|
2582 |
// It's a line comment if the start of the doc comment comes
|
|
2583 |
// after other non-whitespace on the same line.
|
|
2584 | 1 |
auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment; |
2585 |
// Combine with previous doc comment, if any
|
|
2586 | 1 |
if (*dc) |
2587 | 1 |
*dc = combineComments(*dc, buf[], newParagraph).toDString(); |
2588 |
else
|
|
2589 | 1 |
*dc = buf.extractSlice(true); |
2590 |
}
|
|
2591 |
|
|
2592 |
/********************************************
|
|
2593 |
* Combine two document comments into one,
|
|
2594 |
* separated by an extra newline if newParagraph is true.
|
|
2595 |
*/
|
|
2596 |
static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure |
|
2597 |
{
|
|
2598 |
//printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
|
|
2599 | 1 |
const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n' |
2600 | 1 |
if (!c1) |
2601 | 1 |
return c2.ptr; |
2602 | 1 |
if (!c2) |
2603 | 1 |
return c1.ptr; |
2604 |
|
|
2605 | 1 |
int insertNewLine = 0; |
2606 | 1 |
if (c1.length && c1[$ - 1] != '\n') |
2607 |
insertNewLine = 1; |
|
2608 | 1 |
const retSize = c1.length + insertNewLine + newParagraphSize + c2.length; |
2609 | 1 |
auto p = cast(char*)mem.xmalloc_noscan(retSize + 1); |
2610 | 1 |
p[0 .. c1.length] = c1[]; |
2611 | 1 |
if (insertNewLine) |
2612 |
p[c1.length] = '\n'; |
|
2613 | 1 |
if (newParagraph) |
2614 | 1 |
p[c1.length + insertNewLine] = '\n'; |
2615 | 1 |
p[retSize - c2.length .. retSize] = c2[]; |
2616 | 1 |
p[retSize] = 0; |
2617 | 1 |
return p; |
2618 |
}
|
|
2619 |
|
|
2620 |
private: |
|
2621 |
void endOfLine() pure @nogc @safe |
|
2622 |
{
|
|
2623 | 1 |
scanloc.linnum++; |
2624 | 1 |
line = p; |
2625 |
}
|
|
2626 |
}
|
|
2627 |
|
|
2628 |
/// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
|
|
2629 |
private struct TimeStampInfo |
|
2630 |
{
|
|
2631 |
private __gshared bool initdone = false; |
|
2632 |
|
|
2633 |
// Note: Those properties need to be guarded by a call to `init`
|
|
2634 |
// The API isn't safe, and quite brittle, but it was left this way
|
|
2635 |
// over performance concerns.
|
|
2636 |
// This is currently only called once, from the lexer.
|
|
2637 |
__gshared char[11 + 1] date; |
|
2638 |
__gshared char[8 + 1] time; |
|
2639 |
__gshared char[24 + 1] timestamp; |
|
2640 |
|
|
2641 |
public static void initialize(const ref Loc loc) nothrow |
|
2642 |
{
|
|
2643 | 1 |
if (initdone) |
2644 | 1 |
return; |
2645 |
|
|
2646 | 1 |
initdone = true; |
2647 | 1 |
time_t ct; |
2648 |
// https://issues.dlang.org/show_bug.cgi?id=20444
|
|
2649 | 1 |
if (auto p = getenv("SOURCE_DATE_EPOCH")) |
2650 |
{
|
|
2651 | 1 |
if (!ct.parseDigits(p.toDString())) |
2652 |
error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p); |
|
2653 |
}
|
|
2654 |
else
|
|
2655 | 1 |
.time(&ct); |
2656 | 1 |
const p = ctime(&ct); |
2657 | 1 |
assert(p); |
2658 | 1 |
sprintf(&date[0], "%.6s %.4s", p + 4, p + 20); |
2659 | 1 |
sprintf(&time[0], "%.8s", p + 11); |
2660 | 1 |
sprintf(×tamp[0], "%.24s", p); |
2661 |
}
|
|
2662 |
}
|
|
2663 |
|
|
2664 |
unittest
|
|
2665 |
{
|
|
2666 |
import dmd.console; |
|
2667 |
nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header, |
|
2668 |
const(char)* format, va_list ap, const(char)* p1, const(char)* p2) |
|
2669 |
{
|
|
2670 |
assert(0); |
|
2671 |
}
|
|
2672 |
diagnosticHandler = &assertDiagnosticHandler; |
|
2673 |
|
|
2674 |
static void test(T)(string sequence, T expected) |
|
2675 |
{
|
|
2676 |
auto p = cast(const(char)*)sequence.ptr; |
|
2677 |
assert(expected == Lexer.escapeSequence(Loc.initial, p)); |
|
2678 |
assert(p == sequence.ptr + sequence.length); |
|
2679 |
}
|
|
2680 |
|
|
2681 |
test(`'`, '\''); |
|
2682 |
test(`"`, '"'); |
|
2683 |
test(`?`, '?'); |
|
2684 |
test(`\`, '\\'); |
|
2685 |
test(`0`, '\0'); |
|
2686 |
test(`a`, '\a'); |
|
2687 |
test(`b`, '\b'); |
|
2688 |
test(`f`, '\f'); |
|
2689 |
test(`n`, '\n'); |
|
2690 |
test(`r`, '\r'); |
|
2691 |
test(`t`, '\t'); |
|
2692 |
test(`v`, '\v'); |
|
2693 |
|
|
2694 |
test(`x00`, 0x00); |
|
2695 |
test(`xff`, 0xff); |
|
2696 |
test(`xFF`, 0xff); |
|
2697 |
test(`xa7`, 0xa7); |
|
2698 |
test(`x3c`, 0x3c); |
|
2699 |
test(`xe2`, 0xe2); |
|
2700 |
|
|
2701 |
test(`1`, '\1'); |
|
2702 |
test(`42`, '\42'); |
|
2703 |
test(`357`, '\357'); |
|
2704 |
|
|
2705 |
test(`u1234`, '\u1234'); |
|
2706 |
test(`uf0e4`, '\uf0e4'); |
|
2707 |
|
|
2708 |
test(`U0001f603`, '\U0001f603'); |
|
2709 |
|
|
2710 |
test(`"`, '"'); |
|
2711 |
test(`<`, '<'); |
|
2712 |
test(`>`, '>'); |
|
2713 |
|
|
2714 |
diagnosticHandler = null; |
|
2715 |
}
|
|
2716 |
unittest
|
|
2717 |
{
|
|
2718 |
import dmd.console; |
|
2719 |
string expected; |
|
2720 |
bool gotError; |
|
2721 |
|
|
2722 |
nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header, |
|
2723 |
const(char)* format, va_list ap, const(char)* p1, const(char)* p2) |
|
2724 |
{
|
|
2725 |
assert(cast(Classification)headerColor == Classification.error); |
|
2726 |
|
|
2727 |
gotError = true; |
|
2728 |
char[100] buffer = void; |
|
2729 |
auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)]; |
|
2730 |
assert(expected == actual); |
|
2731 |
return true; |
|
2732 |
}
|
|
2733 |
|
|
2734 |
diagnosticHandler = &expectDiagnosticHandler; |
|
2735 |
|
|
2736 |
void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength) |
|
2737 |
{
|
|
2738 |
uint errors = global.errors; |
|
2739 |
gotError = false; |
|
2740 |
expected = expectedError; |
|
2741 |
auto p = cast(const(char)*)sequence.ptr; |
|
2742 |
auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p); |
|
2743 |
assert(gotError); |
|
2744 |
assert(expectedReturnValue == actualReturnValue); |
|
2745 |
|
|
2746 |
auto actualScanLength = p - sequence.ptr; |
|
2747 |
assert(expectedScanLength == actualScanLength); |
|
2748 |
global.errors = errors; |
|
2749 |
}
|
|
2750 |
|
|
2751 |
test("c", `undefined escape sequence \c`, 'c', 1); |
|
2752 |
test("!", `undefined escape sequence \!`, '!', 1); |
|
2753 |
|
|
2754 |
test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2); |
|
2755 |
|
|
2756 |
test("u1" , `escape hex sequence has 1 hex digits instead of 4`, 0x1, 2); |
|
2757 |
test("u12" , `escape hex sequence has 2 hex digits instead of 4`, 0x12, 3); |
|
2758 |
test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4); |
|
2759 |
|
|
2760 |
test("U0" , `escape hex sequence has 1 hex digits instead of 8`, 0x0, 2); |
|
2761 |
test("U00" , `escape hex sequence has 2 hex digits instead of 8`, 0x00, 3); |
|
2762 |
test("U000" , `escape hex sequence has 3 hex digits instead of 8`, 0x000, 4); |
|
2763 |
test("U0000" , `escape hex sequence has 4 hex digits instead of 8`, 0x0000, 5); |
|
2764 |
test("U0001f" , `escape hex sequence has 5 hex digits instead of 8`, 0x0001f, 6); |
|
2765 |
test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`, 0x0001f6, 7); |
|
2766 |
test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8); |
|
2767 |
|
|
2768 |
test("ud800" , `invalid UTF character \U0000d800`, '?', 5); |
|
2769 |
test("udfff" , `invalid UTF character \U0000dfff`, '?', 5); |
|
2770 |
test("U00110000", `invalid UTF character \U00110000`, '?', 9); |
|
2771 |
|
|
2772 |
test("xg0" , `undefined escape hex sequence \xg`, 'g', 2); |
|
2773 |
test("ug000" , `undefined escape hex sequence \ug`, 'g', 2); |
|
2774 |
test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2); |
|
2775 |
|
|
2776 |
test("&BAD;", `unnamed character entity &BAD;` , '?', 5); |
|
2777 |
test(""", `unterminated named entity "`, '?', 5); |
|
2778 |
|
|
2779 |
test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3); |
|
2780 |
|
|
2781 |
diagnosticHandler = null; |
|
2782 |
}
|
Read our documentation on viewing source code .