1
/**
2
 * Implements the lexical analyzer, which converts source code into lexical tokens.
3
 *
4
 * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
5
 *
6
 * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
7
 * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8
 * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9
 * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
10
 * Documentation:  https://dlang.org/phobos/dmd_lexer.html
11
 * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
12
 */
13

14
module dmd.lexer;
15

16
import core.stdc.ctype;
17
import core.stdc.errno;
18
import core.stdc.stdarg;
19
import core.stdc.stdio;
20
import core.stdc.stdlib : getenv;
21
import core.stdc.string;
22
import core.stdc.time;
23

24
import dmd.entity;
25
import dmd.errors;
26
import dmd.globals;
27
import dmd.id;
28
import dmd.identifier;
29
import dmd.root.ctfloat;
30
import dmd.root.outbuffer;
31
import dmd.root.port;
32
import dmd.root.rmem;
33
import dmd.root.string;
34
import dmd.tokens;
35
import dmd.utf;
36
import dmd.utils;
37

38
nothrow:
39

40
private enum LS = 0x2028;       // UTF line separator
41
private enum PS = 0x2029;       // UTF paragraph separator
42

43
/********************************************
44
 * Do our own char maps
45
 */
46
private static immutable cmtable = () {
47
    ubyte[256] table;
48
    foreach (const c; 0 .. table.length)
49
    {
50
        if ('0' <= c && c <= '7')
51
            table[c] |= CMoctal;
52
        if (c_isxdigit(c))
53
            table[c] |= CMhex;
54
        if (c_isalnum(c) || c == '_')
55
            table[c] |= CMidchar;
56

57
        switch (c)
58
        {
59
            case 'x': case 'X':
60
            case 'b': case 'B':
61
                table[c] |= CMzerosecond;
62
                break;
63

64
            case '0': .. case '9':
65
            case 'e': case 'E':
66
            case 'f': case 'F':
67
            case 'l': case 'L':
68
            case 'p': case 'P':
69
            case 'u': case 'U':
70
            case 'i':
71
            case '.':
72
            case '_':
73
                table[c] |= CMzerosecond | CMdigitsecond;
74
                break;
75

76
            default:
77
                break;
78
        }
79

80
        switch (c)
81
        {
82
            case '\\':
83
            case '\n':
84
            case '\r':
85
            case 0:
86
            case 0x1A:
87
            case '\'':
88
                break;
89
            default:
90
                if (!(c & 0x80))
91
                    table[c] |= CMsinglechar;
92
                break;
93
        }
94
    }
95
    return table;
96
}();
97

98
private
99
{
100
    enum CMoctal  = 0x1;
101
    enum CMhex    = 0x2;
102
    enum CMidchar = 0x4;
103
    enum CMzerosecond = 0x8;
104
    enum CMdigitsecond = 0x10;
105
    enum CMsinglechar = 0x20;
106
}
107

108
private bool isoctal(const char c) pure @nogc @safe
109
{
110 1
    return (cmtable[c] & CMoctal) != 0;
111
}
112

113
private bool ishex(const char c) pure @nogc @safe
114
{
115 1
    return (cmtable[c] & CMhex) != 0;
116
}
117

118
private bool isidchar(const char c) pure @nogc @safe
119
{
120 1
    return (cmtable[c] & CMidchar) != 0;
121
}
122

123
private bool isZeroSecond(const char c) pure @nogc @safe
124
{
125 1
    return (cmtable[c] & CMzerosecond) != 0;
126
}
127

128
private bool isDigitSecond(const char c) pure @nogc @safe
129
{
130 1
    return (cmtable[c] & CMdigitsecond) != 0;
131
}
132

133
private bool issinglechar(const char c) pure @nogc @safe
134
{
135 1
    return (cmtable[c] & CMsinglechar) != 0;
136
}
137

138
private bool c_isxdigit(const int c) pure @nogc @safe
139
{
140 0
    return (( c >= '0' && c <= '9') ||
141 0
            ( c >= 'a' && c <= 'f') ||
142 0
            ( c >= 'A' && c <= 'F'));
143
}
144

145
private bool c_isalnum(const int c) pure @nogc @safe
146
{
147 0
    return (( c >= '0' && c <= '9') ||
148 0
            ( c >= 'a' && c <= 'z') ||
149 0
            ( c >= 'A' && c <= 'Z'));
150
}
151

152
unittest
153
{
154
    //printf("lexer.unittest\n");
155
    /* Not much here, just trying things out.
156
     */
157
    string text = "int"; // We rely on the implicit null-terminator
158
    scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0);
159
    TOK tok;
160
    tok = lex1.nextToken();
161
    //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
162
    assert(tok == TOK.int32);
163
    tok = lex1.nextToken();
164
    assert(tok == TOK.endOfFile);
165
    tok = lex1.nextToken();
166
    assert(tok == TOK.endOfFile);
167
    tok = lex1.nextToken();
168
    assert(tok == TOK.endOfFile);
169
}
170

171
unittest
172
{
173
    // We don't want to see Lexer error output during these tests.
174
    uint errors = global.startGagging();
175
    scope(exit) global.endGagging(errors);
176

177
    // Test malformed input: even malformed input should end in a TOK.endOfFile.
178
    static immutable char[][] testcases =
179
    [   // Testcase must end with 0 or 0x1A.
180
        [0], // not malformed, but pathological
181
        ['\'', 0],
182
        ['\'', 0x1A],
183
        ['{', '{', 'q', '{', 0],
184
        [0xFF, 0],
185
        [0xFF, 0x80, 0],
186
        [0xFF, 0xFF, 0],
187
        [0xFF, 0xFF, 0],
188
        ['x', '"', 0x1A],
189
    ];
190

191
    foreach (testcase; testcases)
192
    {
193
        scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0);
194
        TOK tok = lex2.nextToken();
195
        size_t iterations = 1;
196
        while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
197
        {
198
            tok = lex2.nextToken();
199
        }
200
        assert(tok == TOK.endOfFile);
201
        tok = lex2.nextToken();
202
        assert(tok == TOK.endOfFile);
203
    }
204
}
205

206
/***********************************************************
207
 */
208
class Lexer
209
{
210
    private __gshared OutBuffer stringbuffer;
211

212
    Loc scanloc;            // for error messages
213
    Loc prevloc;            // location of token before current
214

215
    const(char)* p;         // current character
216

217
    Token token;
218

219
    private
220
    {
221
        const(char)* base;      // pointer to start of buffer
222
        const(char)* end;       // pointer to last element of buffer
223
        const(char)* line;      // start of current line
224

225
        bool doDocComment;      // collect doc comment information
226
        bool anyToken;          // seen at least one token
227
        bool commentToken;      // comments are TOK.comment's
228
        int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
229
        int lastDocLine;        // last line of previous doc comment
230

231
        Token* tokenFreelist;
232
    }
233

234
  nothrow:
235

236
    /*********************
237
     * Creates a Lexer for the source code base[begoffset..endoffset+1].
238
     * The last character, base[endoffset], must be null (0) or EOF (0x1A).
239
     *
240
     * Params:
241
     *  filename = used for error messages
242
     *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
243
     *  begoffset = starting offset into base[]
244
     *  endoffset = the last offset to read into base[]
245
     *  doDocComment = handle documentation comments
246
     *  commentToken = comments become TOK.comment's
247
     */
248 1
    this(const(char)* filename, const(char)* base, size_t begoffset,
249
        size_t endoffset, bool doDocComment, bool commentToken) pure
250
    {
251 1
        scanloc = Loc(filename, 1, 1);
252
        //printf("Lexer::Lexer(%p,%d)\n",base,length);
253
        //printf("lexer.filename = %s\n", filename);
254 1
        token = Token.init;
255 1
        this.base = base;
256 1
        this.end = base + endoffset;
257 1
        p = base + begoffset;
258 1
        line = p;
259 1
        this.doDocComment = doDocComment;
260 1
        this.commentToken = commentToken;
261 1
        this.inTokenStringConstant = 0;
262 1
        this.lastDocLine = 0;
263
        //initKeywords();
264
        /* If first line starts with '#!', ignore the line
265
         */
266 1
        if (p && p[0] == '#' && p[1] == '!')
267
        {
268 1
            p += 2;
269 1
            while (1)
270
            {
271 1
                char c = *p++;
272 1
                switch (c)
273
                {
274 0
                case 0:
275 0
                case 0x1A:
276 0
                    p--;
277 0
                    goto case;
278 1
                case '\n':
279 1
                    break;
280 1
                default:
281 1
                    continue;
282
                }
283 1
                break;
284
            }
285 1
            endOfLine();
286
        }
287
    }
288

289
    /// Returns: a newly allocated `Token`.
290
    Token* allocateToken() pure nothrow @safe
291
    {
292 1
        if (tokenFreelist)
293
        {
294 1
            Token* t = tokenFreelist;
295 1
            tokenFreelist = t.next;
296 1
            t.next = null;
297 1
            return t;
298
        }
299 1
        return new Token();
300
    }
301

302
    /// Frees the given token by returning it to the freelist.
303
    private void releaseToken(Token* token) pure nothrow @nogc @safe
304
    {
305 1
        if (mem.isGCEnabled)
306 1
            *token = Token.init;
307 1
        token.next = tokenFreelist;
308 1
        tokenFreelist = token;
309
    }
310

311
    final TOK nextToken()
312
    {
313 1
        prevloc = token.loc;
314 1
        if (token.next)
315
        {
316 1
            Token* t = token.next;
317 1
            memcpy(&token, t, Token.sizeof);
318 1
            releaseToken(t);
319
        }
320
        else
321
        {
322 1
            scan(&token);
323
        }
324
        //printf(token.toChars());
325 1
        return token.value;
326
    }
327

328
    /***********************
329
     * Look ahead at next token's value.
330
     */
331
    final TOK peekNext()
332
    {
333 1
        return peek(&token).value;
334
    }
335

336
    /***********************
337
     * Look 2 tokens ahead at value.
338
     */
339
    final TOK peekNext2()
340
    {
341 1
        Token* t = peek(&token);
342 1
        return peek(t).value;
343
    }
344

345
    /****************************
346
     * Turn next token in buffer into a token.
347
     */
348
    final void scan(Token* t)
349
    {
350 1
        const lastLine = scanloc.linnum;
351 1
        Loc startLoc;
352 1
        t.blockComment = null;
353 1
        t.lineComment = null;
354

355 1
        while (1)
356
        {
357 1
            t.ptr = p;
358
            //printf("p = %p, *p = '%c'\n",p,*p);
359 1
            t.loc = loc();
360 1
            switch (*p)
361
            {
362 1
            case 0:
363 1
            case 0x1A:
364 1
                t.value = TOK.endOfFile; // end of file
365
                // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
366 1
                return;
367 1
            case ' ':
368 1
            case '\t':
369 1
            case '\v':
370 1
            case '\f':
371 1
                p++;
372 1
                continue; // skip white space
373 1
            case '\r':
374 1
                p++;
375 1
                if (*p != '\n') // if CR stands by itself
376
                {
377 0
                    endOfLine();
378 0
                    goto skipFourSpaces;
379
                }
380 1
                continue; // skip white space
381 1
            case '\n':
382 1
                p++;
383 1
                endOfLine();
384
                skipFourSpaces:
385 1
                while (*(cast(uint*)p) == 0x20202020) //' ' == 0x20
386
                {
387 1
                    p+=4;
388
                }
389 1
                continue; // skip white space
390 1
            case '0':
391 1
                if (!isZeroSecond(p[1]))        // if numeric literal does not continue
392
                {
393 1
                    ++p;
394 1
                    t.unsvalue = 0;
395 1
                    t.value = TOK.int32Literal;
396 1
                    return;
397
                }
398 1
                goto Lnumber;
399

400 1
            case '1': .. case '9':
401 1
                if (!isDigitSecond(p[1]))       // if numeric literal does not continue
402
                {
403 1
                    t.unsvalue = *p - '0';
404 1
                    ++p;
405 1
                    t.value = TOK.int32Literal;
406 1
                    return;
407
                }
408
            Lnumber:
409 1
                t.value = number(t);
410 1
                return;
411

412 1
            case '\'':
413 1
                if (issinglechar(p[1]) && p[2] == '\'')
414
                {
415 1
                    t.unsvalue = p[1];        // simple one character literal
416 1
                    t.value = TOK.charLiteral;
417 1
                    p += 3;
418
                }
419
                else
420 1
                    t.value = charConstant(t);
421 1
                return;
422 1
            case 'r':
423 1
                if (p[1] != '"')
424 1
                    goto case_ident;
425 1
                p++;
426 1
                goto case '`';
427 1
            case '`':
428 1
                wysiwygStringConstant(t);
429 1
                return;
430 1
            case 'x':
431 1
                if (p[1] != '"')
432 1
                    goto case_ident;
433 1
                p++;
434 1
                auto start = p;
435 1
                auto hexString = new OutBuffer();
436 1
                t.value = hexStringConstant(t);
437 1
                hexString.write(start[0 .. p - start]);
438 1
                error("Built-in hex string literals are obsolete, use `std.conv.hexString!%s` instead.", hexString.extractChars());
439 1
                return;
440 1
            case 'q':
441 1
                if (p[1] == '"')
442
                {
443 1
                    p++;
444 1
                    delimitedStringConstant(t);
445 1
                    return;
446
                }
447 1
                else if (p[1] == '{')
448
                {
449 1
                    p++;
450 1
                    tokenStringConstant(t);
451 1
                    return;
452
                }
453
                else
454 1
                    goto case_ident;
455 1
            case '"':
456 1
                escapeStringConstant(t);
457 1
                return;
458 1
            case 'a':
459 1
            case 'b':
460 1
            case 'c':
461 1
            case 'd':
462 1
            case 'e':
463 1
            case 'f':
464 1
            case 'g':
465 1
            case 'h':
466 1
            case 'i':
467 1
            case 'j':
468 1
            case 'k':
469 1
            case 'l':
470 1
            case 'm':
471 1
            case 'n':
472 1
            case 'o':
473 1
            case 'p':
474
                /*case 'q': case 'r':*/
475 1
            case 's':
476 1
            case 't':
477 1
            case 'u':
478 1
            case 'v':
479 1
            case 'w':
480
                /*case 'x':*/
481 1
            case 'y':
482 1
            case 'z':
483 1
            case 'A':
484 1
            case 'B':
485 1
            case 'C':
486 1
            case 'D':
487 1
            case 'E':
488 1
            case 'F':
489 1
            case 'G':
490 1
            case 'H':
491 1
            case 'I':
492 1
            case 'J':
493 1
            case 'K':
494 1
            case 'L':
495 1
            case 'M':
496 1
            case 'N':
497 1
            case 'O':
498 1
            case 'P':
499 1
            case 'Q':
500 1
            case 'R':
501 1
            case 'S':
502 1
            case 'T':
503 1
            case 'U':
504 1
            case 'V':
505 1
            case 'W':
506 1
            case 'X':
507 1
            case 'Y':
508 1
            case 'Z':
509 1
            case '_':
510
            case_ident:
511
                {
512 1
                    while (1)
513
                    {
514 1
                        const c = *++p;
515 1
                        if (isidchar(c))
516 1
                            continue;
517 1
                        else if (c & 0x80)
518
                        {
519 1
                            const s = p;
520 1
                            const u = decodeUTF();
521 1
                            if (isUniAlpha(u))
522 1
                                continue;
523 0
                            error("char 0x%04x not allowed in identifier", u);
524 0
                            p = s;
525
                        }
526 1
                        break;
527
                    }
528 1
                    Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
529 1
                    t.ident = id;
530 1
                    t.value = cast(TOK)id.getValue();
531 1
                    anyToken = 1;
532 1
                    if (*t.ptr == '_') // if special identifier token
533
                    {
534
                        // Lazy initialization
535 1
                        TimeStampInfo.initialize(t.loc);
536

537 1
                        if (id == Id.DATE)
538
                        {
539 1
                            t.ustring = TimeStampInfo.date.ptr;
540 1
                            goto Lstr;
541
                        }
542 1
                        else if (id == Id.TIME)
543
                        {
544 1
                            t.ustring = TimeStampInfo.time.ptr;
545 1
                            goto Lstr;
546
                        }
547 1
                        else if (id == Id.VENDOR)
548
                        {
549 0
                            t.ustring = global.vendor.xarraydup.ptr;
550 0
                            goto Lstr;
551
                        }
552 1
                        else if (id == Id.TIMESTAMP)
553
                        {
554 1
                            t.ustring = TimeStampInfo.timestamp.ptr;
555
                        Lstr:
556 1
                            t.value = TOK.string_;
557 1
                            t.postfix = 0;
558 1
                            t.len = cast(uint)strlen(t.ustring);
559
                        }
560 1
                        else if (id == Id.VERSIONX)
561
                        {
562 1
                            t.value = TOK.int64Literal;
563 1
                            t.unsvalue = global.versionNumber();
564
                        }
565 1
                        else if (id == Id.EOFX)
566
                        {
567 0
                            t.value = TOK.endOfFile;
568
                            // Advance scanner to end of file
569 0
                            while (!(*p == 0 || *p == 0x1A))
570 0
                                p++;
571
                        }
572
                    }
573
                    //printf("t.value = %d\n",t.value);
574 1
                    return;
575
                }
576 1
            case '/':
577 1
                p++;
578 1
                switch (*p)
579
                {
580 1
                case '=':
581 1
                    p++;
582 1
                    t.value = TOK.divAssign;
583 1
                    return;
584 1
                case '*':
585 1
                    p++;
586 1
                    startLoc = loc();
587 1
                    while (1)
588
                    {
589 1
                        while (1)
590
                        {
591 1
                            const c = *p;
592 1
                            switch (c)
593
                            {
594 1
                            case '/':
595 1
                                break;
596 1
                            case '\n':
597 1
                                endOfLine();
598 1
                                p++;
599 1
                                continue;
600 0
                            case '\r':
601 0
                                p++;
602 0
                                if (*p != '\n')
603 0
                                    endOfLine();
604 0
                                continue;
605 0
                            case 0:
606 0
                            case 0x1A:
607 0
                                error("unterminated /* */ comment");
608 0
                                p = end;
609 0
                                t.loc = loc();
610 0
                                t.value = TOK.endOfFile;
611 0
                                return;
612 1
                            default:
613 1
                                if (c & 0x80)
614
                                {
615 1
                                    const u = decodeUTF();
616 1
                                    if (u == PS || u == LS)
617 0
                                        endOfLine();
618
                                }
619 1
                                p++;
620 1
                                continue;
621
                            }
622 1
                            break;
623
                        }
624 1
                        p++;
625 1
                        if (p[-2] == '*' && p - 3 != t.ptr)
626 1
                            break;
627
                    }
628 1
                    if (commentToken)
629
                    {
630 1
                        t.loc = startLoc;
631 1
                        t.value = TOK.comment;
632 1
                        return;
633
                    }
634 1
                    else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
635
                    {
636
                        // if /** but not /**/
637 1
                        getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
638 1
                        lastDocLine = scanloc.linnum;
639
                    }
640 1
                    continue;
641 1
                case '/': // do // style comments
642 1
                    startLoc = loc();
643 1
                    while (1)
644
                    {
645 1
                        const c = *++p;
646 1
                        switch (c)
647
                        {
648 1
                        case '\n':
649 1
                            break;
650 0
                        case '\r':
651 0
                            if (p[1] == '\n')
652 0
                                p++;
653 0
                            break;
654 0
                        case 0:
655 0
                        case 0x1A:
656 0
                            if (commentToken)
657
                            {
658 0
                                p = end;
659 0
                                t.loc = startLoc;
660 0
                                t.value = TOK.comment;
661 0
                                return;
662
                            }
663 0
                            if (doDocComment && t.ptr[2] == '/')
664
                            {
665 0
                                getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
666 0
                                lastDocLine = scanloc.linnum;
667
                            }
668 0
                            p = end;
669 0
                            t.loc = loc();
670 0
                            t.value = TOK.endOfFile;
671 0
                            return;
672 1
                        default:
673 1
                            if (c & 0x80)
674
                            {
675 1
                                const u = decodeUTF();
676 1
                                if (u == PS || u == LS)
677 0
                                    break;
678
                            }
679 1
                            continue;
680
                        }
681 1
                        break;
682
                    }
683 1
                    if (commentToken)
684
                    {
685 1
                        p++;
686 1
                        endOfLine();
687 1
                        t.loc = startLoc;
688 1
                        t.value = TOK.comment;
689 1
                        return;
690
                    }
691 1
                    if (doDocComment && t.ptr[2] == '/')
692
                    {
693 1
                        getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
694 1
                        lastDocLine = scanloc.linnum;
695
                    }
696 1
                    p++;
697 1
                    endOfLine();
698 1
                    continue;
699 1
                case '+':
700
                    {
701 1
                        int nest;
702 1
                        startLoc = loc();
703 1
                        p++;
704 1
                        nest = 1;
705 1
                        while (1)
706
                        {
707 1
                            char c = *p;
708 1
                            switch (c)
709
                            {
710 1
                            case '/':
711 1
                                p++;
712 1
                                if (*p == '+')
713
                                {
714 0
                                    p++;
715 0
                                    nest++;
716
                                }
717 1
                                continue;
718 1
                            case '+':
719 1
                                p++;
720 1
                                if (*p == '/')
721
                                {
722 1
                                    p++;
723 1
                                    if (--nest == 0)
724 1
                                        break;
725
                                }
726 1
                                continue;
727 0
                            case '\r':
728 0
                                p++;
729 0
                                if (*p != '\n')
730 0
                                    endOfLine();
731 0
                                continue;
732 1
                            case '\n':
733 1
                                endOfLine();
734 1
                                p++;
735 1
                                continue;
736 0
                            case 0:
737 0
                            case 0x1A:
738 0
                                error("unterminated /+ +/ comment");
739 0
                                p = end;
740 0
                                t.loc = loc();
741 0
                                t.value = TOK.endOfFile;
742 0
                                return;
743 1
                            default:
744 1
                                if (c & 0x80)
745
                                {
746 1
                                    uint u = decodeUTF();
747 1
                                    if (u == PS || u == LS)
748 0
                                        endOfLine();
749
                                }
750 1
                                p++;
751 1
                                continue;
752
                            }
753 1
                            break;
754
                        }
755 1
                        if (commentToken)
756
                        {
757 1
                            t.loc = startLoc;
758 1
                            t.value = TOK.comment;
759 1
                            return;
760
                        }
761 1
                        if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
762
                        {
763
                            // if /++ but not /++/
764 1
                            getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
765 1
                            lastDocLine = scanloc.linnum;
766
                        }
767 1
                        continue;
768
                    }
769 1
                default:
770 1
                    break;
771
                }
772 1
                t.value = TOK.div;
773 1
                return;
774 1
            case '.':
775 1
                p++;
776 1
                if (isdigit(*p))
777
                {
778
                    /* Note that we don't allow ._1 and ._ as being
779
                     * valid floating point numbers.
780
                     */
781 1
                    p--;
782 1
                    t.value = inreal(t);
783
                }
784 1
                else if (p[0] == '.')
785
                {
786 1
                    if (p[1] == '.')
787
                    {
788 1
                        p += 2;
789 1
                        t.value = TOK.dotDotDot;
790
                    }
791
                    else
792
                    {
793 1
                        p++;
794 1
                        t.value = TOK.slice;
795
                    }
796
                }
797
                else
798 1
                    t.value = TOK.dot;
799 1
                return;
800 1
            case '&':
801 1
                p++;
802 1
                if (*p == '=')
803
                {
804 1
                    p++;
805 1
                    t.value = TOK.andAssign;
806
                }
807 1
                else if (*p == '&')
808
                {
809 1
                    p++;
810 1
                    t.value = TOK.andAnd;
811
                }
812
                else
813 1
                    t.value = TOK.and;
814 1
                return;
815 1
            case '|':
816 1
                p++;
817 1
                if (*p == '=')
818
                {
819 1
                    p++;
820 1
                    t.value = TOK.orAssign;
821
                }
822 1
                else if (*p == '|')
823
                {
824 1
                    p++;
825 1
                    t.value = TOK.orOr;
826
                }
827
                else
828 1
                    t.value = TOK.or;
829 1
                return;
830 1
            case '-':
831 1
                p++;
832 1
                if (*p == '=')
833
                {
834 1
                    p++;
835 1
                    t.value = TOK.minAssign;
836
                }
837 1
                else if (*p == '-')
838
                {
839 1
                    p++;
840 1
                    t.value = TOK.minusMinus;
841
                }
842
                else
843 1
                    t.value = TOK.min;
844 1
                return;
845 1
            case '+':
846 1
                p++;
847 1
                if (*p == '=')
848
                {
849 1
                    p++;
850 1
                    t.value = TOK.addAssign;
851
                }
852 1
                else if (*p == '+')
853
                {
854 1
                    p++;
855 1
                    t.value = TOK.plusPlus;
856
                }
857
                else
858 1
                    t.value = TOK.add;
859 1
                return;
860 1
            case '<':
861 1
                p++;
862 1
                if (*p == '=')
863
                {
864 1
                    p++;
865 1
                    t.value = TOK.lessOrEqual; // <=
866
                }
867 1
                else if (*p == '<')
868
                {
869 1
                    p++;
870 1
                    if (*p == '=')
871
                    {
872 1
                        p++;
873 1
                        t.value = TOK.leftShiftAssign; // <<=
874
                    }
875
                    else
876 1
                        t.value = TOK.leftShift; // <<
877
                }
878
                else
879 1
                    t.value = TOK.lessThan; // <
880 1
                return;
881 1
            case '>':
882 1
                p++;
883 1
                if (*p == '=')
884
                {
885 1
                    p++;
886 1
                    t.value = TOK.greaterOrEqual; // >=
887
                }
888 1
                else if (*p == '>')
889
                {
890 1
                    p++;
891 1
                    if (*p == '=')
892
                    {
893 1
                        p++;
894 1
                        t.value = TOK.rightShiftAssign; // >>=
895
                    }
896 1
                    else if (*p == '>')
897
                    {
898 1
                        p++;
899 1
                        if (*p == '=')
900
                        {
901 1
                            p++;
902 1
                            t.value = TOK.unsignedRightShiftAssign; // >>>=
903
                        }
904
                        else
905 1
                            t.value = TOK.unsignedRightShift; // >>>
906
                    }
907
                    else
908 1
                        t.value = TOK.rightShift; // >>
909
                }
910
                else
911 1
                    t.value = TOK.greaterThan; // >
912 1
                return;
913 1
            case '!':
914 1
                p++;
915 1
                if (*p == '=')
916
                {
917 1
                    p++;
918 1
                    t.value = TOK.notEqual; // !=
919
                }
920
                else
921 1
                    t.value = TOK.not; // !
922 1
                return;
923 1
            case '=':
924 1
                p++;
925 1
                if (*p == '=')
926
                {
927 1
                    p++;
928 1
                    t.value = TOK.equal; // ==
929
                }
930 1
                else if (*p == '>')
931
                {
932 1
                    p++;
933 1
                    t.value = TOK.goesTo; // =>
934
                }
935
                else
936 1
                    t.value = TOK.assign; // =
937 1
                return;
938 1
            case '~':
939 1
                p++;
940 1
                if (*p == '=')
941
                {
942 1
                    p++;
943 1
                    t.value = TOK.concatenateAssign; // ~=
944
                }
945
                else
946 1
                    t.value = TOK.tilde; // ~
947 1
                return;
948 1
            case '^':
949 1
                p++;
950 1
                if (*p == '^')
951
                {
952 1
                    p++;
953 1
                    if (*p == '=')
954
                    {
955 1
                        p++;
956 1
                        t.value = TOK.powAssign; // ^^=
957
                    }
958
                    else
959 1
                        t.value = TOK.pow; // ^^
960
                }
961 1
                else if (*p == '=')
962
                {
963 1
                    p++;
964 1
                    t.value = TOK.xorAssign; // ^=
965
                }
966
                else
967 1
                    t.value = TOK.xor; // ^
968 1
                return;
969 1
            case '(':
970 1
                p++;
971 1
                t.value = TOK.leftParentheses;
972 1
                return;
973 1
            case ')':
974 1
                p++;
975 1
                t.value = TOK.rightParentheses;
976 1
                return;
977 1
            case '[':
978 1
                p++;
979 1
                t.value = TOK.leftBracket;
980 1
                return;
981 1
            case ']':
982 1
                p++;
983 1
                t.value = TOK.rightBracket;
984 1
                return;
985 1
            case '{':
986 1
                p++;
987 1
                t.value = TOK.leftCurly;
988 1
                return;
989 1
            case '}':
990 1
                p++;
991 1
                t.value = TOK.rightCurly;
992 1
                return;
993 1
            case '?':
994 1
                p++;
995 1
                t.value = TOK.question;
996 1
                return;
997 1
            case ',':
998 1
                p++;
999 1
                t.value = TOK.comma;
1000 1
                return;
1001 1
            case ';':
1002 1
                p++;
1003 1
                t.value = TOK.semicolon;
1004 1
                return;
1005 1
            case ':':
1006 1
                p++;
1007 1
                t.value = TOK.colon;
1008 1
                return;
1009 1
            case '$':
1010 1
                p++;
1011 1
                t.value = TOK.dollar;
1012 1
                return;
1013 1
            case '@':
1014 1
                p++;
1015 1
                t.value = TOK.at;
1016 1
                return;
1017 1
            case '*':
1018 1
                p++;
1019 1
                if (*p == '=')
1020
                {
1021 1
                    p++;
1022 1
                    t.value = TOK.mulAssign;
1023
                }
1024
                else
1025 1
                    t.value = TOK.mul;
1026 1
                return;
1027 1
            case '%':
1028 1
                p++;
1029 1
                if (*p == '=')
1030
                {
1031 1
                    p++;
1032 1
                    t.value = TOK.modAssign;
1033
                }
1034
                else
1035 1
                    t.value = TOK.mod;
1036 1
                return;
1037 1
            case '#':
1038
                {
1039 1
                    p++;
1040 1
                    Token n;
1041 1
                    scan(&n);
1042 1
                    if (n.value == TOK.identifier)
1043
                    {
1044 1
                        if (n.ident == Id.line)
1045
                        {
1046 1
                            poundLine();
1047 1
                            continue;
1048
                        }
1049
                        else
1050
                        {
1051 1
                            const locx = loc();
1052 1
                            warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
1053
                        }
1054
                    }
1055 1
                    else if (n.value == TOK.if_)
1056
                    {
1057 1
                        error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
1058
                    }
1059 1
                    t.value = TOK.pound;
1060 1
                    return;
1061
                }
1062 1
            default:
1063
                {
1064 1
                    dchar c = *p;
1065 1
                    if (c & 0x80)
1066
                    {
1067 1
                        c = decodeUTF();
1068
                        // Check for start of unicode identifier
1069 1
                        if (isUniAlpha(c))
1070 1
                            goto case_ident;
1071 0
                        if (c == PS || c == LS)
1072
                        {
1073 0
                            endOfLine();
1074 0
                            p++;
1075 0
                            continue;
1076
                        }
1077
                    }
1078 1
                    if (c < 0x80 && isprint(c))
1079 1
                        error("character '%c' is not a valid token", c);
1080
                    else
1081 0
                        error("character 0x%02x is not a valid token", c);
1082 1
                    p++;
1083 1
                    continue;
1084
                }
1085
            }
1086
        }
1087
    }
1088

1089
    final Token* peek(Token* ct)
1090
    {
1091 1
        Token* t;
1092 1
        if (ct.next)
1093 1
            t = ct.next;
1094
        else
1095
        {
1096 1
            t = allocateToken();
1097 1
            scan(t);
1098 1
            ct.next = t;
1099
        }
1100 1
        return t;
1101
    }
1102

1103
    /*********************************
1104
     * tk is on the opening (.
1105
     * Look ahead and return token that is past the closing ).
1106
     */
1107
    final Token* peekPastParen(Token* tk)
1108
    {
1109
        //printf("peekPastParen()\n");
1110 1
        int parens = 1;
1111 1
        int curlynest = 0;
1112 1
        while (1)
1113
        {
1114 1
            tk = peek(tk);
1115
            //tk.print();
1116 1
            switch (tk.value)
1117
            {
1118 1
            case TOK.leftParentheses:
1119 1
                parens++;
1120 1
                continue;
1121 1
            case TOK.rightParentheses:
1122 1
                --parens;
1123 1
                if (parens)
1124 1
                    continue;
1125 1
                tk = peek(tk);
1126 1
                break;
1127 1
            case TOK.leftCurly:
1128 1
                curlynest++;
1129 1
                continue;
1130 1
            case TOK.rightCurly:
1131 1
                if (--curlynest >= 0)
1132 1
                    continue;
1133 0
                break;
1134 1
            case TOK.semicolon:
1135 1
                if (curlynest)
1136 1
                    continue;
1137 0
                break;
1138 1
            case TOK.endOfFile:
1139 1
                break;
1140 1
            default:
1141 1
                continue;
1142
            }
1143 1
            return tk;
1144
        }
1145
    }
1146

1147
    /*******************************************
1148
     * Parse escape sequence.
1149
     */
1150
    private uint escapeSequence()
1151
    {
1152 1
        return Lexer.escapeSequence(token.loc, p);
1153
    }
1154

1155
    /**
1156
    Parse the given string literal escape sequence into a single character.
1157
    Params:
1158
        loc = the location of the current token
1159
        sequence = pointer to string with escape sequence to parse. this is a reference
1160
                   variable that is also used to return the position after the sequence
1161
    Returns:
1162
        the escaped sequence as a single character
1163
    */
1164
    private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence)
1165
    {
1166 1
        const(char)* p = sequence; // cache sequence reference on stack
1167 1
        scope(exit) sequence = p;
1168

1169 1
        uint c = *p;
1170 1
        int ndigits;
1171 1
        switch (c)
1172
        {
1173 1
        case '\'':
1174 1
        case '"':
1175 1
        case '?':
1176 1
        case '\\':
1177
        Lconsume:
1178 1
            p++;
1179 1
            break;
1180 1
        case 'a':
1181 1
            c = 7;
1182 1
            goto Lconsume;
1183 1
        case 'b':
1184 1
            c = 8;
1185 1
            goto Lconsume;
1186 1
        case 'f':
1187 1
            c = 12;
1188 1
            goto Lconsume;
1189 1
        case 'n':
1190 1
            c = 10;
1191 1
            goto Lconsume;
1192 1
        case 'r':
1193 1
            c = 13;
1194 1
            goto Lconsume;
1195 1
        case 't':
1196 1
            c = 9;
1197 1
            goto Lconsume;
1198 1
        case 'v':
1199 1
            c = 11;
1200 1
            goto Lconsume;
1201 1
        case 'u':
1202 1
            ndigits = 4;
1203 1
            goto Lhex;
1204 1
        case 'U':
1205 1
            ndigits = 8;
1206 1
            goto Lhex;
1207 1
        case 'x':
1208 1
            ndigits = 2;
1209
        Lhex:
1210 1
            p++;
1211 1
            c = *p;
1212 1
            if (ishex(cast(char)c))
1213
            {
1214 1
                uint v = 0;
1215 1
                int n = 0;
1216 1
                while (1)
1217
                {
1218 1
                    if (isdigit(cast(char)c))
1219 1
                        c -= '0';
1220 1
                    else if (islower(c))
1221 1
                        c -= 'a' - 10;
1222
                    else
1223 1
                        c -= 'A' - 10;
1224 1
                    v = v * 16 + c;
1225 1
                    c = *++p;
1226 1
                    if (++n == ndigits)
1227 1
                        break;
1228 1
                    if (!ishex(cast(char)c))
1229
                    {
1230 1
                        .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
1231 1
                        break;
1232
                    }
1233
                }
1234 1
                if (ndigits != 2 && !utf_isValidDchar(v))
1235
                {
1236 1
                    .error(loc, "invalid UTF character \\U%08x", v);
1237 1
                    v = '?'; // recover with valid UTF character
1238
                }
1239 1
                c = v;
1240
            }
1241
            else
1242
            {
1243 1
                .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
1244 1
                p++;
1245
            }
1246 1
            break;
1247 1
        case '&':
1248
            // named character entity
1249 1
            for (const idstart = ++p; 1; p++)
1250
            {
1251 1
                switch (*p)
1252
                {
1253 1
                case ';':
1254 1
                    c = HtmlNamedEntity(idstart, p - idstart);
1255 1
                    if (c == ~0)
1256
                    {
1257 1
                        .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
1258 1
                        c = '?';
1259
                    }
1260 1
                    p++;
1261 1
                    break;
1262 1
                default:
1263 1
                    if (isalpha(*p) || (p != idstart && isdigit(*p)))
1264 1
                        continue;
1265 1
                    .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
1266 1
                    c = '?';
1267 1
                    break;
1268
                }
1269 1
                break;
1270
            }
1271 1
            break;
1272 0
        case 0:
1273 0
        case 0x1A:
1274
            // end of file
1275 0
            c = '\\';
1276 0
            break;
1277 1
        default:
1278 1
            if (isoctal(cast(char)c))
1279
            {
1280 1
                uint v = 0;
1281 1
                int n = 0;
1282
                do
1283
                {
1284 1
                    v = v * 8 + (c - '0');
1285 1
                    c = *++p;
1286
                }
1287 1
                while (++n < 3 && isoctal(cast(char)c));
1288 1
                c = v;
1289 1
                if (c > 0xFF)
1290 1
                    .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
1291
            }
1292
            else
1293
            {
1294 0
                .error(loc, "undefined escape sequence \\%c", c);
1295 0
                p++;
1296
            }
1297 1
            break;
1298
        }
1299 1
        return c;
1300
    }
1301

1302
    /**
1303
    Lex a wysiwyg string. `p` must be pointing to the first character before the
1304
    contents of the string literal. The character pointed to by `p` will be used as
1305
    the terminating character (i.e. backtick or double-quote).
1306
    Params:
1307
        result = pointer to the token that accepts the result
1308
    */
1309
    private void wysiwygStringConstant(Token* result)
1310
    {
1311 1
        result.value = TOK.string_;
1312 1
        Loc start = loc();
1313 1
        auto terminator = p[0];
1314 1
        p++;
1315 1
        stringbuffer.setsize(0);
1316 1
        while (1)
1317
        {
1318 1
            dchar c = p[0];
1319 1
            p++;
1320 1
            switch (c)
1321
            {
1322 1
            case '\n':
1323 1
                endOfLine();
1324 1
                break;
1325 0
            case '\r':
1326 0
                if (p[0] == '\n')
1327 0
                    continue; // ignore
1328 0
                c = '\n'; // treat EndOfLine as \n character
1329 0
                endOfLine();
1330 0
                break;
1331 0
            case 0:
1332 0
            case 0x1A:
1333 0
                error("unterminated string constant starting at %s", start.toChars());
1334 0
                result.setString();
1335
                // rewind `p` so it points to the EOF character
1336 0
                p--;
1337 0
                return;
1338 1
            default:
1339 1
                if (c == terminator)
1340
                {
1341 1
                    result.setString(stringbuffer);
1342 1
                    stringPostfix(result);
1343 1
                    return;
1344
                }
1345 1
                else if (c & 0x80)
1346
                {
1347 1
                    p--;
1348 1
                    const u = decodeUTF();
1349 1
                    p++;
1350 1
                    if (u == PS || u == LS)
1351 0
                        endOfLine();
1352 1
                    stringbuffer.writeUTF8(u);
1353 1
                    continue;
1354
                }
1355 1
                break;
1356
            }
1357 1
            stringbuffer.writeByte(c);
1358
        }
1359
    }
1360

1361
    /**************************************
1362
     * Lex hex strings:
1363
     *      x"0A ae 34FE BD"
1364
     */
1365
    private TOK hexStringConstant(Token* t)
1366
    {
1367 1
        Loc start = loc();
1368 1
        uint n = 0;
1369 1
        uint v = ~0; // dead assignment, needed to suppress warning
1370 1
        p++;
1371 1
        stringbuffer.setsize(0);
1372 1
        while (1)
1373
        {
1374 1
            dchar c = *p++;
1375 1
            switch (c)
1376
            {
1377 1
            case ' ':
1378 1
            case '\t':
1379 1
            case '\v':
1380 1
            case '\f':
1381 1
                continue; // skip white space
1382 0
            case '\r':
1383 0
                if (*p == '\n')
1384 0
                    continue; // ignore '\r' if followed by '\n'
1385
                // Treat isolated '\r' as if it were a '\n'
1386 0
                goto case '\n';
1387 0
            case '\n':
1388 0
                endOfLine();
1389 0
                continue;
1390 0
            case 0:
1391 0
            case 0x1A:
1392 0
                error("unterminated string constant starting at %s", start.toChars());
1393 0
                t.setString();
1394
                // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1395 0
                p--;
1396 0
                return TOK.hexadecimalString;
1397 1
            case '"':
1398 1
                if (n & 1)
1399
                {
1400 1
                    error("odd number (%d) of hex characters in hex string", n);
1401 1
                    stringbuffer.writeByte(v);
1402
                }
1403 1
                t.setString(stringbuffer);
1404 1
                stringPostfix(t);
1405 1
                return TOK.hexadecimalString;
1406 1
            default:
1407 1
                if (c >= '0' && c <= '9')
1408 1
                    c -= '0';
1409 1
                else if (c >= 'a' && c <= 'f')
1410 0
                    c -= 'a' - 10;
1411 1
                else if (c >= 'A' && c <= 'F')
1412 1
                    c -= 'A' - 10;
1413 1
                else if (c & 0x80)
1414
                {
1415 0
                    p--;
1416 0
                    const u = decodeUTF();
1417 0
                    p++;
1418 0
                    if (u == PS || u == LS)
1419 0
                        endOfLine();
1420
                    else
1421 0
                        error("non-hex character \\u%04x in hex string", u);
1422
                }
1423
                else
1424 1
                    error("non-hex character '%c' in hex string", c);
1425 1
                if (n & 1)
1426
                {
1427 1
                    v = (v << 4) | c;
1428 1
                    stringbuffer.writeByte(v);
1429
                }
1430
                else
1431 1
                    v = c;
1432 1
                n++;
1433 1
                break;
1434
            }
1435
        }
1436 0
        assert(0); // see bug 15731
1437
    }
1438

1439
    /**
1440
    Lex a delimited string. Some examples of delimited strings are:
1441
    ---
1442
    q"(foo(xxx))"      // "foo(xxx)"
1443
    q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
1444
    q"/foo]/"          // "foo]"
1445
    q"HERE
1446
    foo
1447
    HERE"              // "foo\n"
1448
    ---
1449
    It is assumed that `p` points to the opening double-quote '"'.
1450
    Params:
1451
        result = pointer to the token that accepts the result
1452
    */
1453
    private void delimitedStringConstant(Token* result)
1454
    {
1455 1
        result.value = TOK.string_;
1456 1
        Loc start = loc();
1457 1
        dchar delimleft = 0;
1458 1
        dchar delimright = 0;
1459 1
        uint nest = 1;
1460 1
        uint nestcount = ~0; // dead assignment, needed to suppress warning
1461 1
        Identifier hereid = null;
1462 1
        uint blankrol = 0;
1463 1
        uint startline = 0;
1464 1
        p++;
1465 1
        stringbuffer.setsize(0);
1466 1
        while (1)
1467
        {
1468 1
            dchar c = *p++;
1469
            //printf("c = '%c'\n", c);
1470 1
            switch (c)
1471
            {
1472 1
            case '\n':
1473
            Lnextline:
1474 1
                endOfLine();
1475 1
                startline = 1;
1476 1
                if (blankrol)
1477
                {
1478 1
                    blankrol = 0;
1479 1
                    continue;
1480
                }
1481 1
                if (hereid)
1482
                {
1483 1
                    stringbuffer.writeUTF8(c);
1484 1
                    continue;
1485
                }
1486 1
                break;
1487 0
            case '\r':
1488 0
                if (*p == '\n')
1489 0
                    continue; // ignore
1490 0
                c = '\n'; // treat EndOfLine as \n character
1491 0
                goto Lnextline;
1492 1
            case 0:
1493 1
            case 0x1A:
1494 1
                error("unterminated delimited string constant starting at %s", start.toChars());
1495 1
                result.setString();
1496
                // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1497 1
                p--;
1498 1
                return;
1499 1
            default:
1500 1
                if (c & 0x80)
1501
                {
1502 0
                    p--;
1503 0
                    c = decodeUTF();
1504 0
                    p++;
1505 0
                    if (c == PS || c == LS)
1506 0
                        goto Lnextline;
1507
                }
1508 1
                break;
1509
            }
1510 1
            if (delimleft == 0)
1511
            {
1512 1
                delimleft = c;
1513 1
                nest = 1;
1514 1
                nestcount = 1;
1515 1
                if (c == '(')
1516 1
                    delimright = ')';
1517 1
                else if (c == '{')
1518 1
                    delimright = '}';
1519 1
                else if (c == '[')
1520 1
                    delimright = ']';
1521 1
                else if (c == '<')
1522 1
                    delimright = '>';
1523 1
                else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
1524
                {
1525
                    // Start of identifier; must be a heredoc
1526 1
                    Token tok;
1527 1
                    p--;
1528 1
                    scan(&tok); // read in heredoc identifier
1529 1
                    if (tok.value != TOK.identifier)
1530
                    {
1531 0
                        error("identifier expected for heredoc, not %s", tok.toChars());
1532 0
                        delimright = c;
1533
                    }
1534
                    else
1535
                    {
1536 1
                        hereid = tok.ident;
1537
                        //printf("hereid = '%s'\n", hereid.toChars());
1538 1
                        blankrol = 1;
1539
                    }
1540 1
                    nest = 0;
1541
                }
1542
                else
1543
                {
1544 1
                    delimright = c;
1545 1
                    nest = 0;
1546 1
                    if (isspace(c))
1547 1
                        error("delimiter cannot be whitespace");
1548
                }
1549
            }
1550
            else
1551
            {
1552 1
                if (blankrol)
1553
                {
1554 1
                    error("heredoc rest of line should be blank");
1555 1
                    blankrol = 0;
1556 1
                    continue;
1557
                }
1558 1
                if (nest == 1)
1559
                {
1560 1
                    if (c == delimleft)
1561 1
                        nestcount++;
1562 1
                    else if (c == delimright)
1563
                    {
1564 1
                        nestcount--;
1565 1
                        if (nestcount == 0)
1566 1
                            goto Ldone;
1567
                    }
1568
                }
1569 1
                else if (c == delimright)
1570 1
                    goto Ldone;
1571 1
                if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
1572
                {
1573 1
                    Token tok;
1574 1
                    auto psave = p;
1575 1
                    p--;
1576 1
                    scan(&tok); // read in possible heredoc identifier
1577
                    //printf("endid = '%s'\n", tok.ident.toChars());
1578 1
                    if (tok.value == TOK.identifier && tok.ident is hereid)
1579
                    {
1580
                        /* should check that rest of line is blank
1581
                         */
1582 1
                        goto Ldone;
1583
                    }
1584 1
                    p = psave;
1585
                }
1586 1
                stringbuffer.writeUTF8(c);
1587 1
                startline = 0;
1588
            }
1589
        }
1590
    Ldone:
1591 1
        if (*p == '"')
1592 1
            p++;
1593 1
        else if (hereid)
1594 1
            error("delimited string must end in %s\"", hereid.toChars());
1595
        else
1596 1
            error("delimited string must end in %c\"", delimright);
1597 1
        result.setString(stringbuffer);
1598 1
        stringPostfix(result);
1599
    }
1600

1601
    /**
1602
    Lex a token string. Some examples of token strings are:
1603
    ---
1604
    q{ foo(xxx) }    // " foo(xxx) "
1605
    q{foo$(LPAREN)}  // "foo$(LPAREN)"
1606
    q{{foo}"}"}      // "{foo}"}""
1607
    ---
1608
    It is assumed that `p` points to the opening curly-brace '{'.
1609
    Params:
1610
        result = pointer to the token that accepts the result
1611
    */
1612
    private void tokenStringConstant(Token* result)
1613
    {
1614 1
        result.value = TOK.string_;
1615

1616 1
        uint nest = 1;
1617 1
        const start = loc();
1618 1
        const pstart = ++p;
1619 1
        inTokenStringConstant++;
1620 1
        scope(exit) inTokenStringConstant--;
1621 1
        while (1)
1622
        {
1623 1
            Token tok;
1624 1
            scan(&tok);
1625 1
            switch (tok.value)
1626
            {
1627 1
            case TOK.leftCurly:
1628 1
                nest++;
1629 1
                continue;
1630 1
            case TOK.rightCurly:
1631 1
                if (--nest == 0)
1632
                {
1633 1
                    result.setString(pstart, p - 1 - pstart);
1634 1
                    stringPostfix(result);
1635 1
                    return;
1636
                }
1637 1
                continue;
1638 1
            case TOK.endOfFile:
1639 1
                error("unterminated token string constant starting at %s", start.toChars());
1640 1
                result.setString();
1641 1
                return;
1642 1
            default:
1643 1
                continue;
1644
            }
1645
        }
1646
    }
1647

1648
    /**
1649
    Scan a double-quoted string while building the processed string value by
1650
    handling escape sequences. The result is returned in the given `t` token.
1651
    This function assumes that `p` currently points to the opening double-quote
1652
    of the string.
1653
    Params:
1654
        t = the token to set the resulting string to
1655
    */
1656
    private void escapeStringConstant(Token* t)
1657
    {
1658 1
        t.value = TOK.string_;
1659

1660 1
        const start = loc();
1661 1
        p++;
1662 1
        stringbuffer.setsize(0);
1663 1
        while (1)
1664
        {
1665 1
            dchar c = *p++;
1666 1
            switch (c)
1667
            {
1668 1
            case '\\':
1669 1
                switch (*p)
1670
                {
1671 1
                case 'u':
1672 1
                case 'U':
1673 1
                case '&':
1674 1
                    c = escapeSequence();
1675 1
                    stringbuffer.writeUTF8(c);
1676 1
                    continue;
1677 1
                default:
1678 1
                    c = escapeSequence();
1679 1
                    break;
1680
                }
1681 1
                break;
1682 1
            case '\n':
1683 1
                endOfLine();
1684 1
                break;
1685 0
            case '\r':
1686 0
                if (*p == '\n')
1687 0
                    continue; // ignore
1688 0
                c = '\n'; // treat EndOfLine as \n character
1689 0
                endOfLine();
1690 0
                break;
1691 1
            case '"':
1692 1
                t.setString(stringbuffer);
1693 1
                stringPostfix(t);
1694 1
                return;
1695 1
            case 0:
1696 1
            case 0x1A:
1697
                // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1698 1
                p--;
1699 1
                error("unterminated string constant starting at %s", start.toChars());
1700 1
                t.setString();
1701 1
                return;
1702 1
            default:
1703 1
                if (c & 0x80)
1704
                {
1705 1
                    p--;
1706 1
                    c = decodeUTF();
1707 1
                    if (c == LS || c == PS)
1708
                    {
1709 0
                        c = '\n';
1710 0
                        endOfLine();
1711
                    }
1712 1
                    p++;
1713 1
                    stringbuffer.writeUTF8(c);
1714 1
                    continue;
1715
                }
1716 1
                break;
1717
            }
1718 1
            stringbuffer.writeByte(c);
1719
        }
1720
    }
1721

1722
    /**************************************
1723
     */
1724
    private TOK charConstant(Token* t)
1725
    {
1726 1
        TOK tk = TOK.charLiteral;
1727
        //printf("Lexer::charConstant\n");
1728 1
        p++;
1729 1
        dchar c = *p++;
1730 1
        switch (c)
1731
        {
1732 1
        case '\\':
1733 1
            switch (*p)
1734
            {
1735 1
            case 'u':
1736 1
                t.unsvalue = escapeSequence();
1737 1
                tk = TOK.wcharLiteral;
1738 1
                break;
1739 1
            case 'U':
1740 1
            case '&':
1741 1
                t.unsvalue = escapeSequence();
1742 1
                tk = TOK.dcharLiteral;
1743 1
                break;
1744 1
            default:
1745 1
                t.unsvalue = escapeSequence();
1746 1
                break;
1747
            }
1748 1
            break;
1749 1
        case '\n':
1750
        L1:
1751 1
            endOfLine();
1752 1
            goto case;
1753 1
        case '\r':
1754 1
            goto case '\'';
1755 0
        case 0:
1756 0
        case 0x1A:
1757
            // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
1758 0
            p--;
1759 0
            goto case;
1760 1
        case '\'':
1761 1
            error("unterminated character constant");
1762 1
            t.unsvalue = '?';
1763 1
            return tk;
1764 1
        default:
1765 1
            if (c & 0x80)
1766
            {
1767 1
                p--;
1768 1
                c = decodeUTF();
1769 1
                p++;
1770 1
                if (c == LS || c == PS)
1771 0
                    goto L1;
1772 1
                if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
1773 1
                    tk = TOK.wcharLiteral;
1774
                else
1775 1
                    tk = TOK.dcharLiteral;
1776
            }
1777 1
            t.unsvalue = c;
1778 1
            break;
1779
        }
1780 1
        if (*p != '\'')
1781
        {
1782 1
            while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
1783 1
                    *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
1784
            {
1785 1
                if (*p & 0x80)
1786
                {
1787 0
                    const s = p;
1788 0
                    c = decodeUTF();
1789 0
                    if (c == LS || c == PS)
1790
                    {
1791 0
                        p = s;
1792 0
                        break;
1793
                    }
1794
                }
1795 1
                p++;
1796
            }
1797

1798 1
            if (*p == '\'')
1799
            {
1800 1
                error("character constant has multiple characters");
1801 1
                p++;
1802
            }
1803
            else
1804 1
                error("unterminated character constant");
1805 1
            t.unsvalue = '?';
1806 1
            return tk;
1807
        }
1808 1
        p++;
1809 1
        return tk;
1810
    }
1811

1812
    /***************************************
1813
     * Get postfix of string literal.
1814
     */
1815
    private void stringPostfix(Token* t) pure @nogc
1816
    {
1817 1
        switch (*p)
1818
        {
1819 1
        case 'c':
1820 1
        case 'w':
1821 1
        case 'd':
1822 1
            t.postfix = *p;
1823 1
            p++;
1824 1
            break;
1825 1
        default:
1826 1
            t.postfix = 0;
1827 1
            break;
1828
        }
1829
    }
1830

1831
    /**************************************
1832
     * Read in a number.
1833
     * If it's an integer, store it in tok.TKutok.Vlong.
1834
     *      integers can be decimal, octal or hex
1835
     *      Handle the suffixes U, UL, LU, L, etc.
1836
     * If it's double, store it in tok.TKutok.Vdouble.
1837
     * Returns:
1838
     *      TKnum
1839
     *      TKdouble,...
1840
     */
1841
    private TOK number(Token* t)
1842
    {
1843 1
        int base = 10;
1844 1
        const start = p;
1845 1
        uinteger_t n = 0; // unsigned >=64 bit integer type
1846 1
        int d;
1847 1
        bool err = false;
1848 1
        bool overflow = false;
1849 1
        bool anyBinaryDigitsNoSingleUS = false;
1850 1
        bool anyHexDigitsNoSingleUS = false;
1851 1
        dchar c = *p;
1852 1
        if (c == '0')
1853
        {
1854 1
            ++p;
1855 1
            c = *p;
1856 1
            switch (c)
1857
            {
1858 1
            case '0':
1859 1
            case '1':
1860 1
            case '2':
1861 1
            case '3':
1862 1
            case '4':
1863 1
            case '5':
1864 1
            case '6':
1865 1
            case '7':
1866 1
            case '8':
1867 1
            case '9':
1868 1
                base = 8;
1869 1
                break;
1870 1
            case 'x':
1871 1
            case 'X':
1872 1
                ++p;
1873 1
                base = 16;
1874 1
                break;
1875 1
            case 'b':
1876 1
            case 'B':
1877 1
                ++p;
1878 1
                base = 2;
1879 1
                break;
1880 1
            case '.':
1881 1
                if (p[1] == '.')
1882 1
                    goto Ldone; // if ".."
1883 1
                if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
1884 1
                    goto Ldone; // if ".identifier" or ".unicode"
1885 1
                goto Lreal; // '.' is part of current token
1886 1
            case 'i':
1887 1
            case 'f':
1888 1
            case 'F':
1889 1
                goto Lreal;
1890 1
            case '_':
1891 1
                ++p;
1892 1
                base = 8;
1893 1
                break;
1894 1
            case 'L':
1895 1
                if (p[1] == 'i')
1896 1
                    goto Lreal;
1897 1
                break;
1898 1
            default:
1899 1
                break;
1900
            }
1901
        }
1902 1
        while (1)
1903
        {
1904 1
            c = *p;
1905 1
            switch (c)
1906
            {
1907 1
            case '0':
1908 1
            case '1':
1909 1
            case '2':
1910 1
            case '3':
1911 1
            case '4':
1912 1
            case '5':
1913 1
            case '6':
1914 1
            case '7':
1915 1
            case '8':
1916 1
            case '9':
1917 1
                ++p;
1918 1
                d = c - '0';
1919 1
                break;
1920 1
            case 'a':
1921 1
            case 'b':
1922 1
            case 'c':
1923 1
            case 'd':
1924 1
            case 'e':
1925 1
            case 'f':
1926 1
            case 'A':
1927 1
            case 'B':
1928 1
            case 'C':
1929 1
            case 'D':
1930 1
            case 'E':
1931 1
            case 'F':
1932 1
                ++p;
1933 1
                if (base != 16)
1934
                {
1935 1
                    if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
1936 1
                        goto Lreal;
1937
                }
1938 1
                if (c >= 'a')
1939 1
                    d = c + 10 - 'a';
1940
                else
1941 1
                    d = c + 10 - 'A';
1942 1
                break;
1943 1
            case 'L':
1944 1
                if (p[1] == 'i')
1945 1
                    goto Lreal;
1946 1
                goto Ldone;
1947 1
            case '.':
1948 1
                if (p[1] == '.')
1949 1
                    goto Ldone; // if ".."
1950 1
                if (base == 10 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
1951 1
                    goto Ldone; // if ".identifier" or ".unicode"
1952 1
                if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
1953 1
                    goto Ldone; // if ".identifier" or ".unicode"
1954 1
                if (base == 2)
1955 1
                    goto Ldone; // if ".identifier" or ".unicode"
1956 1
                goto Lreal; // otherwise as part of a floating point literal
1957 1
            case 'p':
1958 1
            case 'P':
1959 1
            case 'i':
1960
            Lreal:
1961 1
                p = start;
1962 1
                return inreal(t);
1963 1
            case '_':
1964 1
                ++p;
1965 1
                continue;
1966 1
            default:
1967 1
                goto Ldone;
1968
            }
1969
            // got a digit here, set any necessary flags, check for errors
1970 1
            anyHexDigitsNoSingleUS = true;
1971 1
            anyBinaryDigitsNoSingleUS = true;
1972 1
            if (!err && d >= base)
1973
            {
1974 1
                error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
1975 1
                                                     base == 8 ? "octal".ptr :
1976 1
                                                     "decimal".ptr, c);
1977 1
                err = true;
1978
            }
1979
            // Avoid expensive overflow check if we aren't at risk of overflow
1980 1
            if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
1981 1
                n = n * base + d;
1982
            else
1983
            {
1984
                import core.checkedint : mulu, addu;
1985

1986 1
                n = mulu(n, base, overflow);
1987 1
                n = addu(n, d, overflow);
1988
            }
1989
        }
1990
    Ldone:
1991 1
        if (overflow && !err)
1992
        {
1993 1
            error("integer overflow");
1994 1
            err = true;
1995
        }
1996 1
        if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
1997 1
            (base == 16 && !anyHexDigitsNoSingleUS))
1998 1
            error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
1999
        enum FLAGS : int
2000
        {
2001
            none = 0,
2002
            decimal = 1, // decimal
2003
            unsigned = 2, // u or U suffix
2004
            long_ = 4, // L suffix
2005
        }
2006

2007 1
        FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
2008
        // Parse trailing 'u', 'U', 'l' or 'L' in any combination
2009 1
        const psuffix = p;
2010 1
        while (1)
2011
        {
2012 1
            FLAGS f;
2013 1
            switch (*p)
2014
            {
2015 1
            case 'U':
2016 1
            case 'u':
2017 1
                f = FLAGS.unsigned;
2018 1
                goto L1;
2019 1
            case 'l':
2020 1
                f = FLAGS.long_;
2021 1
                error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
2022 1
                goto L1;
2023 1
            case 'L':
2024 1
                f = FLAGS.long_;
2025
            L1:
2026 1
                p++;
2027 1
                if ((flags & f) && !err)
2028
                {
2029 1
                    error("unrecognized token");
2030 1
                    err = true;
2031
                }
2032 1
                flags = cast(FLAGS)(flags | f);
2033 1
                continue;
2034 1
            default:
2035 1
                break;
2036
            }
2037 1
            break;
2038
        }
2039 1
        if (base == 8 && n >= 8)
2040
        {
2041 1
            if (err)
2042
                // can't translate invalid octal value, just show a generic message
2043 1
                error("octal literals larger than 7 are no longer supported");
2044
            else
2045 1
                error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!%llo%.*s` instead",
2046
                    n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
2047
        }
2048 1
        TOK result;
2049 1
        switch (flags)
2050
        {
2051 1
        case FLAGS.none:
2052
            /* Octal or Hexadecimal constant.
2053
             * First that fits: int, uint, long, ulong
2054
             */
2055 1
            if (n & 0x8000000000000000L)
2056 1
                result = TOK.uns64Literal;
2057 1
            else if (n & 0xFFFFFFFF00000000L)
2058 1
                result = TOK.int64Literal;
2059 1
            else if (n & 0x80000000)
2060 1
                result = TOK.uns32Literal;
2061
            else
2062 1
                result = TOK.int32Literal;
2063 1
            break;
2064 1
        case FLAGS.decimal:
2065
            /* First that fits: int, long, long long
2066
             */
2067 1
            if (n & 0x8000000000000000L)
2068
            {
2069 1
                result = TOK.uns64Literal;
2070
            }
2071 1
            else if (n & 0xFFFFFFFF80000000L)
2072 1
                result = TOK.int64Literal;
2073
            else
2074 1
                result = TOK.int32Literal;
2075 1
            break;
2076 1
        case FLAGS.unsigned:
2077 1
        case FLAGS.decimal | FLAGS.unsigned:
2078
            /* First that fits: uint, ulong
2079
             */
2080 1
            if (n & 0xFFFFFFFF00000000L)
2081 1
                result = TOK.uns64Literal;
2082
            else
2083 1
                result = TOK.uns32Literal;
2084 1
            break;
2085 1
        case FLAGS.decimal | FLAGS.long_:
2086 1
            if (n & 0x8000000000000000L)
2087
            {
2088 1
                if (!err)
2089
                {
2090 1
                    error("signed integer overflow");
2091 1
                    err = true;
2092
                }
2093 1
                result = TOK.uns64Literal;
2094
            }
2095
            else
2096 1
                result = TOK.int64Literal;
2097 1
            break;
2098 1
        case FLAGS.long_:
2099 1
            if (n & 0x8000000000000000L)
2100 1
                result = TOK.uns64Literal;
2101
            else
2102 1
                result = TOK.int64Literal;
2103 1
            break;
2104 1
        case FLAGS.unsigned | FLAGS.long_:
2105 1
        case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
2106 1
            result = TOK.uns64Literal;
2107 1
            break;
2108 0
        default:
2109
            debug
2110
            {
2111 0
                printf("%x\n", flags);
2112
            }
2113 0
            assert(0);
2114
        }
2115 1
        t.unsvalue = n;
2116 1
        return result;
2117
    }
2118

2119
    /**************************************
2120
     * Read in characters, converting them to real.
2121
     * Bugs:
2122
     *      Exponent overflow not detected.
2123
     *      Too much requested precision is not detected.
2124
     */
2125
    private TOK inreal(Token* t)
2126
    {
2127
        //printf("Lexer::inreal()\n");
2128
        debug
2129
        {
2130 1
            assert(*p == '.' || isdigit(*p));
2131
        }
2132 1
        bool isWellformedString = true;
2133 1
        stringbuffer.setsize(0);
2134 1
        auto pstart = p;
2135 1
        bool hex = false;
2136 1
        dchar c = *p++;
2137
        // Leading '0x'
2138 1
        if (c == '0')
2139
        {
2140 1
            c = *p++;
2141 1
            if (c == 'x' || c == 'X')
2142
            {
2143 1
                hex = true;
2144 1
                c = *p++;
2145
            }
2146
        }
2147
        // Digits to left of '.'
2148 1
        while (1)
2149
        {
2150 1
            if (c == '.')
2151
            {
2152 1
                c = *p++;
2153 1
                break;
2154
            }
2155 1
            if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2156
            {
2157 1
                c = *p++;
2158 1
                continue;
2159
            }
2160 1
            break;
2161
        }
2162
        // Digits to right of '.'
2163 1
        while (1)
2164
        {
2165 1
            if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
2166
            {
2167 1
                c = *p++;
2168 1
                continue;
2169
            }
2170 1
            break;
2171
        }
2172 1
        if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
2173
        {
2174 1
            c = *p++;
2175 1
            if (c == '-' || c == '+')
2176
            {
2177 1
                c = *p++;
2178
            }
2179 1
            bool anyexp = false;
2180 1
            while (1)
2181
            {
2182 1
                if (isdigit(c))
2183
                {
2184 1
                    anyexp = true;
2185 1
                    c = *p++;
2186 1
                    continue;
2187
                }
2188 1
                if (c == '_')
2189
                {
2190 1
                    c = *p++;
2191 1
                    continue;
2192
                }
2193 1
                if (!anyexp)
2194
                {
2195 1
                    error("missing exponent");
2196 1
                    isWellformedString = false;
2197
                }
2198 1
                break;
2199
            }
2200
        }
2201 1
        else if (hex)
2202
        {
2203 1
            error("exponent required for hex float");
2204 1
            isWellformedString = false;
2205
        }
2206 1
        --p;
2207 1
        while (pstart < p)
2208
        {
2209 1
            if (*pstart != '_')
2210 1
                stringbuffer.writeByte(*pstart);
2211 1
            ++pstart;
2212
        }
2213 1
        stringbuffer.writeByte(0);
2214 1
        auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
2215 1
        TOK result;
2216 1
        bool isOutOfRange = false;
2217 1
        t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
2218 1
        switch (*p)
2219
        {
2220 1
        case 'F':
2221 1
        case 'f':
2222 1
            if (isWellformedString && !isOutOfRange)
2223 1
                isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
2224 1
            result = TOK.float32Literal;
2225 1
            p++;
2226 1
            break;
2227 1
        default:
2228 1
            if (isWellformedString && !isOutOfRange)
2229 1
                isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
2230 1
            result = TOK.float64Literal;
2231 1
            break;
2232 0
        case 'l':
2233 0
            error("use 'L' suffix instead of 'l'");
2234 0
            goto case 'L';
2235 1
        case 'L':
2236 1
            result = TOK.float80Literal;
2237 1
            p++;
2238 1
            break;
2239
        }
2240 1
        if (*p == 'i' || *p == 'I')
2241
        {
2242 1
            if (*p == 'I')
2243 1
                error("use 'i' suffix instead of 'I'");
2244 1
            p++;
2245 1
            switch (result)
2246
            {
2247 1
            case TOK.float32Literal:
2248 1
                result = TOK.imaginary32Literal;
2249 1
                break;
2250 1
            case TOK.float64Literal:
2251 1
                result = TOK.imaginary64Literal;
2252 1
                break;
2253 1
            case TOK.float80Literal:
2254 1
                result = TOK.imaginary80Literal;
2255 1
                break;
2256 0
            default:
2257 0
                break;
2258
            }
2259
        }
2260 1
        const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
2261 1
        if (isOutOfRange && !isLong)
2262
        {
2263 1
            const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
2264 1
            error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
2265
        }
2266
        debug
2267
        {
2268 1
            switch (result)
2269
            {
2270 1
            case TOK.float32Literal:
2271 1
            case TOK.float64Literal:
2272 1
            case TOK.float80Literal:
2273 1
            case TOK.imaginary32Literal:
2274 1
            case TOK.imaginary64Literal:
2275 1
            case TOK.imaginary80Literal:
2276 1
                break;
2277 0
            default:
2278 0
                assert(0);
2279
            }
2280
        }
2281 1
        return result;
2282
    }
2283

2284
    final Loc loc() pure @nogc
2285
    {
2286 1
        scanloc.charnum = cast(uint)(1 + p - line);
2287 1
        return scanloc;
2288
    }
2289

2290
    final void error(const(char)* format, ...)
2291
    {
2292 1
        va_list args;
2293 1
        va_start(args, format);
2294 1
        .verror(token.loc, format, args);
2295 1
        va_end(args);
2296
    }
2297

2298
    final void error(const ref Loc loc, const(char)* format, ...)
2299
    {
2300 1
        va_list args;
2301 1
        va_start(args, format);
2302 1
        .verror(loc, format, args);
2303 1
        va_end(args);
2304
    }
2305

2306
    final void deprecation(const(char)* format, ...)
2307
    {
2308 1
        va_list args;
2309 1
        va_start(args, format);
2310 1
        .vdeprecation(token.loc, format, args);
2311 1
        va_end(args);
2312
    }
2313

2314
    /*********************************************
2315
     * parse:
2316
     *      #line linnum [filespec]
2317
     * also allow __LINE__ for linnum, and __FILE__ for filespec
2318
     */
2319
    private void poundLine()
2320
    {
2321 1
        auto linnum = this.scanloc.linnum;
2322 1
        const(char)* filespec = null;
2323 1
        const loc = this.loc();
2324 1
        Token tok;
2325 1
        scan(&tok);
2326 1
        if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
2327
        {
2328 1
            const lin = cast(int)(tok.unsvalue - 1);
2329 1
            if (lin != tok.unsvalue - 1)
2330 1
                error("line number `%lld` out of range", cast(ulong)tok.unsvalue);
2331
            else
2332 1
                linnum = lin;
2333
        }
2334 1
        else if (tok.value == TOK.line)
2335
        {
2336
        }
2337
        else
2338 1
            goto Lerr;
2339 1
        while (1)
2340
        {
2341 1
            switch (*p)
2342
            {
2343 0
            case 0:
2344 0
            case 0x1A:
2345 1
            case '\n':
2346
            Lnewline:
2347 1
                if (!inTokenStringConstant)
2348
                {
2349 1
                    this.scanloc.linnum = linnum;
2350 1
                    if (filespec)
2351 1
                        this.scanloc.filename = filespec;
2352
                }
2353 1
                return;
2354 0
            case '\r':
2355 0
                p++;
2356 0
                if (*p != '\n')
2357
                {
2358 0
                    p--;
2359 0
                    goto Lnewline;
2360
                }
2361 0
                continue;
2362 1
            case ' ':
2363 1
            case '\t':
2364 1
            case '\v':
2365 1
            case '\f':
2366 1
                p++;
2367 1
                continue; // skip white space
2368 1
            case '_':
2369 1
                if (memcmp(p, "__FILE__".ptr, 8) == 0)
2370
                {
2371 1
                    p += 8;
2372 1
                    filespec = mem.xstrdup(scanloc.filename);
2373 1
                    continue;
2374
                }
2375 1
                goto Lerr;
2376 1
            case '"':
2377 1
                if (filespec)
2378 1
                    goto Lerr;
2379 1
                stringbuffer.setsize(0);
2380 1
                p++;
2381 1
                while (1)
2382
                {
2383 1
                    uint c;
2384 1
                    c = *p;
2385 1
                    switch (c)
2386
                    {
2387 0
                    case '\n':
2388 0
                    case '\r':
2389 0
                    case 0:
2390 0
                    case 0x1A:
2391 0
                        goto Lerr;
2392 1
                    case '"':
2393 1
                        stringbuffer.writeByte(0);
2394 1
                        filespec = mem.xstrdup(cast(const(char)*)stringbuffer[].ptr);
2395 1
                        p++;
2396 1
                        break;
2397 1
                    default:
2398 1
                        if (c & 0x80)
2399
                        {
2400 0
                            uint u = decodeUTF();
2401 0
                            if (u == PS || u == LS)
2402 0
                                goto Lerr;
2403
                        }
2404 1
                        stringbuffer.writeByte(c);
2405 1
                        p++;
2406 1
                        continue;
2407
                    }
2408 1
                    break;
2409
                }
2410 1
                continue;
2411 0
            default:
2412 0
                if (*p & 0x80)
2413
                {
2414 0
                    uint u = decodeUTF();
2415 0
                    if (u == PS || u == LS)
2416 0
                        goto Lnewline;
2417
                }
2418 0
                goto Lerr;
2419
            }
2420
        }
2421
    Lerr:
2422 1
        error(loc, "#line integer [\"filespec\"]\\n expected");
2423
    }
2424

2425
    /********************************************
2426
     * Decode UTF character.
2427
     * Issue error messages for invalid sequences.
2428
     * Return decoded character, advance p to last character in UTF sequence.
2429
     */
2430
    private uint decodeUTF()
2431
    {
2432 1
        const s = p;
2433 1
        assert(*s & 0x80);
2434
        // Check length of remaining string up to 4 UTF-8 characters
2435 1
        size_t len;
2436 1
        for (len = 1; len < 4 && s[len]; len++)
2437
        {
2438
        }
2439 1
        size_t idx = 0;
2440 1
        dchar u;
2441 1
        const msg = utf_decodeChar(s[0 .. len], idx, u);
2442 1
        p += idx - 1;
2443 1
        if (msg)
2444
        {
2445 1
            error("%.*s", cast(int)msg.length, msg.ptr);
2446
        }
2447 1
        return u;
2448
    }
2449

2450
    /***************************************************
2451
     * Parse doc comment embedded between t.ptr and p.
2452
     * Remove trailing blanks and tabs from lines.
2453
     * Replace all newlines with \n.
2454
     * Remove leading comment character from each line.
2455
     * Decide if it's a lineComment or a blockComment.
2456
     * Append to previous one for this token.
2457
     *
2458
     * If newParagraph is true, an extra newline will be
2459
     * added between adjoining doc comments.
2460
     */
2461
    private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
2462
    {
2463
        /* ct tells us which kind of comment it is: '/', '*', or '+'
2464
         */
2465 1
        const ct = t.ptr[2];
2466
        /* Start of comment text skips over / * *, / + +, or / / /
2467
         */
2468 1
        const(char)* q = t.ptr + 3; // start of comment text
2469 1
        const(char)* qend = p;
2470 1
        if (ct == '*' || ct == '+')
2471 1
            qend -= 2;
2472
        /* Scan over initial row of ****'s or ++++'s or ////'s
2473
         */
2474 1
        for (; q < qend; q++)
2475
        {
2476 1
            if (*q != ct)
2477 1
                break;
2478
        }
2479
        /* Remove leading spaces until start of the comment
2480
         */
2481 1
        int linestart = 0;
2482 1
        if (ct == '/')
2483
        {
2484 1
            while (q < qend && (*q == ' ' || *q == '\t'))
2485 1
                ++q;
2486
        }
2487 1
        else if (q < qend)
2488
        {
2489 1
            if (*q == '\r')
2490
            {
2491 0
                ++q;
2492 0
                if (q < qend && *q == '\n')
2493 0
                    ++q;
2494 0
                linestart = 1;
2495
            }
2496 1
            else if (*q == '\n')
2497
            {
2498 1
                ++q;
2499 1
                linestart = 1;
2500
            }
2501
        }
2502
        /* Remove trailing row of ****'s or ++++'s
2503
         */
2504 1
        if (ct != '/')
2505
        {
2506 1
            for (; q < qend; qend--)
2507
            {
2508 1
                if (qend[-1] != ct)
2509 1
                    break;
2510
            }
2511
        }
2512
        /* Comment is now [q .. qend].
2513
         * Canonicalize it into buf[].
2514
         */
2515 1
        OutBuffer buf;
2516

2517
        void trimTrailingWhitespace()
2518
        {
2519 1
            const s = buf[];
2520 1
            auto len = s.length;
2521 1
            while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
2522 1
                --len;
2523 1
            buf.setsize(len);
2524
        }
2525

2526 1
        for (; q < qend; q++)
2527
        {
2528 1
            char c = *q;
2529 1
            switch (c)
2530
            {
2531 1
            case '*':
2532 1
            case '+':
2533 1
                if (linestart && c == ct)
2534
                {
2535 1
                    linestart = 0;
2536
                    /* Trim preceding whitespace up to preceding \n
2537
                     */
2538 1
                    trimTrailingWhitespace();
2539 1
                    continue;
2540
                }
2541 1
                break;
2542 1
            case ' ':
2543 1
            case '\t':
2544 1
                break;
2545 0
            case '\r':
2546 0
                if (q[1] == '\n')
2547 0
                    continue; // skip the \r
2548 0
                goto Lnewline;
2549 1
            default:
2550 1
                if (c == 226)
2551
                {
2552
                    // If LS or PS
2553 1
                    if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
2554
                    {
2555 0
                        q += 2;
2556 0
                        goto Lnewline;
2557
                    }
2558
                }
2559 1
                linestart = 0;
2560 1
                break;
2561
            Lnewline:
2562 0
                c = '\n'; // replace all newlines with \n
2563 0
                goto case;
2564 1
            case '\n':
2565 1
                linestart = 1;
2566
                /* Trim trailing whitespace
2567
                 */
2568 1
                trimTrailingWhitespace();
2569 1
                break;
2570
            }
2571 1
            buf.writeByte(c);
2572
        }
2573
        /* Trim trailing whitespace (if the last line does not have newline)
2574
         */
2575 1
        trimTrailingWhitespace();
2576

2577
        // Always end with a newline
2578 1
        const s = buf[];
2579 1
        if (s.length == 0 || s[$ - 1] != '\n')
2580 1
            buf.writeByte('\n');
2581

2582
        // It's a line comment if the start of the doc comment comes
2583
        // after other non-whitespace on the same line.
2584 1
        auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
2585
        // Combine with previous doc comment, if any
2586 1
        if (*dc)
2587 1
            *dc = combineComments(*dc, buf[], newParagraph).toDString();
2588
        else
2589 1
            *dc = buf.extractSlice(true);
2590
    }
2591

2592
    /********************************************
2593
     * Combine two document comments into one,
2594
     * separated by an extra newline if newParagraph is true.
2595
     */
2596
    static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
2597
    {
2598
        //printf("Lexer::combineComments('%s', '%s', '%i')\n", c1, c2, newParagraph);
2599 1
        const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
2600 1
        if (!c1)
2601 1
            return c2.ptr;
2602 1
        if (!c2)
2603 1
            return c1.ptr;
2604

2605 1
        int insertNewLine = 0;
2606 1
        if (c1.length && c1[$ - 1] != '\n')
2607 0
            insertNewLine = 1;
2608 1
        const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
2609 1
        auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
2610 1
        p[0 .. c1.length] = c1[];
2611 1
        if (insertNewLine)
2612 0
            p[c1.length] = '\n';
2613 1
        if (newParagraph)
2614 1
            p[c1.length + insertNewLine] = '\n';
2615 1
        p[retSize - c2.length .. retSize] = c2[];
2616 1
        p[retSize] = 0;
2617 1
        return p;
2618
    }
2619

2620
private:
2621
    void endOfLine() pure @nogc @safe
2622
    {
2623 1
        scanloc.linnum++;
2624 1
        line = p;
2625
    }
2626
}
2627

2628
/// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
2629
private struct TimeStampInfo
2630
{
2631
    private __gshared bool initdone = false;
2632

2633
    // Note: Those properties need to be guarded by a call to `init`
2634
    // The API isn't safe, and quite brittle, but it was left this way
2635
    // over performance concerns.
2636
    // This is currently only called once, from the lexer.
2637
    __gshared char[11 + 1] date;
2638
    __gshared char[8 + 1] time;
2639
    __gshared char[24 + 1] timestamp;
2640

2641
    public static void initialize(const ref Loc loc) nothrow
2642
    {
2643 1
        if (initdone)
2644 1
            return;
2645

2646 1
        initdone = true;
2647 1
        time_t ct;
2648
        // https://issues.dlang.org/show_bug.cgi?id=20444
2649 1
        if (auto p = getenv("SOURCE_DATE_EPOCH"))
2650
        {
2651 1
            if (!ct.parseDigits(p.toDString()))
2652 0
                error(loc, "Value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
2653
        }
2654
        else
2655 1
            .time(&ct);
2656 1
        const p = ctime(&ct);
2657 1
        assert(p);
2658 1
        sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
2659 1
        sprintf(&time[0], "%.8s", p + 11);
2660 1
        sprintf(&timestamp[0], "%.24s", p);
2661
    }
2662
}
2663

2664
unittest
2665
{
2666
    import dmd.console;
2667
    nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2668
                                   const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2669
    {
2670
        assert(0);
2671
    }
2672
    diagnosticHandler = &assertDiagnosticHandler;
2673

2674
    static void test(T)(string sequence, T expected)
2675
    {
2676
        auto p = cast(const(char)*)sequence.ptr;
2677
        assert(expected == Lexer.escapeSequence(Loc.initial, p));
2678
        assert(p == sequence.ptr + sequence.length);
2679
    }
2680

2681
    test(`'`, '\'');
2682
    test(`"`, '"');
2683
    test(`?`, '?');
2684
    test(`\`, '\\');
2685
    test(`0`, '\0');
2686
    test(`a`, '\a');
2687
    test(`b`, '\b');
2688
    test(`f`, '\f');
2689
    test(`n`, '\n');
2690
    test(`r`, '\r');
2691
    test(`t`, '\t');
2692
    test(`v`, '\v');
2693

2694
    test(`x00`, 0x00);
2695
    test(`xff`, 0xff);
2696
    test(`xFF`, 0xff);
2697
    test(`xa7`, 0xa7);
2698
    test(`x3c`, 0x3c);
2699
    test(`xe2`, 0xe2);
2700

2701
    test(`1`, '\1');
2702
    test(`42`, '\42');
2703
    test(`357`, '\357');
2704

2705
    test(`u1234`, '\u1234');
2706
    test(`uf0e4`, '\uf0e4');
2707

2708
    test(`U0001f603`, '\U0001f603');
2709

2710
    test(`&quot;`, '"');
2711
    test(`&lt;`, '<');
2712
    test(`&gt;`, '>');
2713

2714
    diagnosticHandler = null;
2715
}
2716
unittest
2717
{
2718
    import dmd.console;
2719
    string expected;
2720
    bool gotError;
2721

2722
    nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
2723
                                         const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
2724
    {
2725
        assert(cast(Classification)headerColor == Classification.error);
2726

2727
        gotError = true;
2728
        char[100] buffer = void;
2729
        auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
2730
        assert(expected == actual);
2731
        return true;
2732
    }
2733

2734
    diagnosticHandler = &expectDiagnosticHandler;
2735

2736
    void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength)
2737
    {
2738
        uint errors = global.errors;
2739
        gotError = false;
2740
        expected = expectedError;
2741
        auto p = cast(const(char)*)sequence.ptr;
2742
        auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p);
2743
        assert(gotError);
2744
        assert(expectedReturnValue == actualReturnValue);
2745

2746
        auto actualScanLength = p - sequence.ptr;
2747
        assert(expectedScanLength == actualScanLength);
2748
        global.errors = errors;
2749
    }
2750

2751
    test("c", `undefined escape sequence \c`, 'c', 1);
2752
    test("!", `undefined escape sequence \!`, '!', 1);
2753

2754
    test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
2755

2756
    test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
2757
    test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
2758
    test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
2759

2760
    test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
2761
    test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
2762
    test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
2763
    test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
2764
    test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
2765
    test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
2766
    test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
2767

2768
    test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
2769
    test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
2770
    test("U00110000", `invalid UTF character \U00110000`, '?', 9);
2771

2772
    test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
2773
    test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
2774
    test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
2775

2776
    test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
2777
    test("&quot", `unterminated named entity &quot;`, '?', 5);
2778

2779
    test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
2780

2781
    diagnosticHandler = null;
2782
}

Read our documentation on viewing source code .

Loading