1
/**
2
 * Check the arguments to `printf` and `scanf` against the `format` string.
3
 *
4
 * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5
 * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6
 * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7
 * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8
 * Documentation:  https://dlang.org/phobos/dmd_chkformat.html
9
 * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10
 */
11
module dmd.chkformat;
12

13
//import core.stdc.stdio : printf, scanf;
14
import core.stdc.ctype : isdigit;
15

16
import dmd.errors;
17
import dmd.expression;
18
import dmd.globals;
19
import dmd.mtype;
20
import dmd.target;
21

22

23
/******************************************
24
 * Check that arguments to a printf format string are compatible
25
 * with that string. Issue errors for incompatibilities.
26
 *
27
 * Follows the C99 specification for printf.
28
 *
29
 * Takes a generous, rather than strict, view of compatiblity.
30
 * For example, an unsigned value can be formatted with a signed specifier.
31
 *
32
 * Diagnosed incompatibilities are:
33
 *
34
 * 1. incompatible sizes which will cause argument misalignment
35
 * 2. deferencing arguments that are not pointers
36
 * 3. insufficient number of arguments
37
 * 4. struct arguments
38
 * 5. array and slice arguments
39
 * 6. non-pointer arguments to `s` specifier
40
 * 7. non-standard formats
41
 * 8. undefined behavior per C99
42
 *
43
 * Per the C Standard, extra arguments are ignored.
44
 *
45
 * No attempt is made to fix the arguments or the format string.
46
 *
47
 * Params:
48
 *      loc = location for error messages
49
 *      format = format string
50
 *      args = arguments to match with format string
51
 *      isVa_list = if a "v" function (format check only)
52
 *
53
 * Returns:
54
 *      `true` if errors occurred
55
 * References:
56
 * C99 7.19.6.1
57
 * http://www.cplusplus.com/reference/cstdio/printf/
58
 */
59
bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
60
{
61
    //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
62 1
    size_t n = 0;
63 1
    for (size_t i = 0; i < format.length;)
64
    {
65 1
        if (format[i] != '%')
66
        {
67 1
            ++i;
68 1
            continue;
69
        }
70 1
        bool widthStar;
71 1
        bool precisionStar;
72 1
        size_t j = i;
73 1
        const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
74 1
        const slice = format[i .. j];
75 1
        i = j;
76

77 1
        if (fmt == Format.percent)
78 1
            continue;                   // "%%", no arguments
79

80 1
        if (isVa_list)
81
        {
82
            // format check only
83 1
            if (fmt == Format.error)
84 1
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
85 1
            continue;
86
        }
87

88
        Expression getNextArg()
89
        {
90 1
            if (n == args.length)
91
            {
92 1
                deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
93 1
                return null;
94
            }
95 1
            return args[n++];
96
        }
97

98
        void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
99
        {
100 1
            deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
101 1
                  prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
102
        }
103

104 1
        if (widthStar)
105
        {
106 1
            auto e = getNextArg();
107 1
            if (!e)
108 0
                return true;
109 1
            auto t = e.type.toBasetype();
110 1
            if (t.ty != Tint32 && t.ty != Tuns32)
111 1
                errorMsg("width ", slice, e, "int", t);
112
        }
113

114 1
        if (precisionStar)
115
        {
116 1
            auto e = getNextArg();
117 1
            if (!e)
118 0
                return true;
119 1
            auto t = e.type.toBasetype();
120 1
            if (t.ty != Tint32 && t.ty != Tuns32)
121 1
                errorMsg("precision ", slice, e, "int", t);
122
        }
123

124 1
        auto e = getNextArg();
125 1
        if (!e)
126 1
            return true;
127 1
        auto t = e.type.toBasetype();
128 1
        auto tnext = t.nextOf();
129 1
        const c_longsize = target.c.longsize;
130 1
        const is64bit = global.params.is64bit;
131

132
        // Types which are promoted to int are allowed.
133
        // Spec: C99 6.5.2.2.7
134 1
        final switch (fmt)
135
        {
136 1
            case Format.u:      // unsigned int
137 1
            case Format.d:      // int
138 1
                if (t.ty != Tint32 && t.ty != Tuns32)
139 1
                    errorMsg(null, slice, e, "int", t);
140 1
                break;
141

142 1
            case Format.hhu:    // unsigned char
143 1
            case Format.hhd:    // signed char
144 1
                if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
145 0
                    errorMsg(null, slice, e, "byte", t);
146 1
                break;
147

148 0
            case Format.hu:     // unsigned short int
149 1
            case Format.hd:     // short int
150 1
                if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
151 0
                    errorMsg(null, slice, e, "short", t);
152 1
                break;
153

154 0
            case Format.lu:     // unsigned long int
155 0
            case Format.ld:     // long int
156 0
                if (!(t.isintegral() && t.size() == c_longsize))
157 0
                    errorMsg(null, slice, e, (c_longsize == 4 ? "int" : "long"), t);
158 0
                break;
159

160 1
            case Format.llu:    // unsigned long long int
161 1
            case Format.lld:    // long long int
162 1
                if (t.ty != Tint64 && t.ty != Tuns64)
163 1
                    errorMsg(null, slice, e, "long", t);
164 1
                break;
165

166 0
            case Format.ju:     // uintmax_t
167 1
            case Format.jd:     // intmax_t
168 1
                if (t.ty != Tint64 && t.ty != Tuns64)
169 1
                    errorMsg(null, slice, e, "core.stdc.stdint.intmax_t", t);
170 1
                break;
171

172 1
            case Format.zd:     // size_t
173 1
                if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
174 1
                    errorMsg(null, slice, e, "size_t", t);
175 1
                break;
176

177 1
            case Format.td:     // ptrdiff_t
178 1
                if (!(t.isintegral() && t.size() == (is64bit ? 8 : 4)))
179 1
                    errorMsg(null, slice, e, "ptrdiff_t", t);
180 1
                break;
181

182 0
            case Format.lg:
183 1
            case Format.g:      // double
184 1
                if (t.ty != Tfloat64 && t.ty != Timaginary64)
185 1
                    errorMsg(null, slice, e, "double", t);
186 1
                break;
187

188 1
            case Format.Lg:     // long double
189 1
                if (t.ty != Tfloat80 && t.ty != Timaginary80)
190 1
                    errorMsg(null, slice, e, "real", t);
191 1
                break;
192

193 1
            case Format.p:      // pointer
194 1
                if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
195 1
                    errorMsg(null, slice, e, "void*", t);
196 1
                break;
197

198 1
            case Format.n:      // pointer to int
199 1
                if (!(t.ty == Tpointer && tnext.ty == Tint32))
200 1
                    errorMsg(null, slice, e, "int*", t);
201 1
                break;
202

203 0
            case Format.ln:     // pointer to long int
204 0
                if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
205 0
                    errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
206 0
                break;
207

208 1
            case Format.lln:    // pointer to long long int
209 1
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
210 1
                    errorMsg(null, slice, e, "long*", t);
211 1
                break;
212

213 1
            case Format.hn:     // pointer to short
214 1
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
215 1
                    errorMsg(null, slice, e, "short*", t);
216 1
                break;
217

218 1
            case Format.hhn:    // pointer to signed char
219 1
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
220 1
                    errorMsg(null, slice, e, "byte*", t);
221 1
                break;
222

223 0
            case Format.jn:     // pointer to intmax_t
224 0
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
225 0
                    errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
226 0
                break;
227

228 0
            case Format.zn:     // pointer to size_t
229 0
                if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
230 0
                    errorMsg(null, slice, e, "size_t*", t);
231 0
                break;
232

233 0
            case Format.tn:     // pointer to ptrdiff_t
234 0
                if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
235 0
                    errorMsg(null, slice, e, "ptrdiff_t*", t);
236 0
                break;
237

238 1
            case Format.c:      // char
239 1
                if (t.ty != Tint32 && t.ty != Tuns32)
240 1
                    errorMsg(null, slice, e, "char", t);
241 1
                break;
242

243 0
            case Format.lc:     // wint_t
244 0
                if (t.ty != Tint32 && t.ty != Tuns32)
245 0
                    errorMsg(null, slice, e, "wchar_t", t);
246 0
                break;
247

248 1
            case Format.s:      // pointer to char string
249 1
                if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
250 1
                    errorMsg(null, slice, e, "char*", t);
251 1
                break;
252

253 1
            case Format.ls:     // pointer to wchar_t string
254 1
                const twchar_t = global.params.isWindows ? Twchar : Tdchar;
255 1
                if (!(t.ty == Tpointer && tnext.ty == twchar_t))
256 1
                    errorMsg(null, slice, e, "wchar_t*", t);
257 1
                break;
258

259 0
            case Format.error:
260 0
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
261 0
                break;
262

263 0
            case Format.percent:
264 0
                assert(0);
265
        }
266
    }
267 1
    return false;
268
}
269

270
/******************************************
271
 * Check that arguments to a scanf format string are compatible
272
 * with that string. Issue errors for incompatibilities.
273
 *
274
 * Follows the C99 specification for scanf.
275
 *
276
 * Takes a generous, rather than strict, view of compatiblity.
277
 * For example, an unsigned value can be formatted with a signed specifier.
278
 *
279
 * Diagnosed incompatibilities are:
280
 *
281
 * 1. incompatible sizes which will cause argument misalignment
282
 * 2. deferencing arguments that are not pointers
283
 * 3. insufficient number of arguments
284
 * 4. struct arguments
285
 * 5. array and slice arguments
286
 * 6. non-standard formats
287
 * 7. undefined behavior per C99
288
 *
289
 * Per the C Standard, extra arguments are ignored.
290
 *
291
 * No attempt is made to fix the arguments or the format string.
292
 *
293
 * Params:
294
 *      loc = location for error messages
295
 *      format = format string
296
 *      args = arguments to match with format string
297
 *      isVa_list = if a "v" function (format check only)
298
 *
299
 * Returns:
300
 *      `true` if errors occurred
301
 * References:
302
 * C99 7.19.6.2
303
 * http://www.cplusplus.com/reference/cstdio/scanf/
304
 */
305
bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
306
{
307 1
    size_t n = 0;
308 1
    for (size_t i = 0; i < format.length;)
309
    {
310 1
        if (format[i] != '%')
311
        {
312 1
            ++i;
313 1
            continue;
314
        }
315 1
        bool asterisk;
316 1
        size_t j = i;
317 1
        const fmt = parseScanfFormatSpecifier(format, j, asterisk);
318 1
        const slice = format[i .. j];
319 1
        i = j;
320

321 1
        if (fmt == Format.percent || asterisk)
322 1
            continue;   // "%%", "%*": no arguments
323

324 1
        if (isVa_list)
325
        {
326
            // format check only
327 1
            if (fmt == Format.error)
328 1
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
329 1
            continue;
330
        }
331

332
        Expression getNextArg()
333
        {
334 1
            if (n == args.length)
335
            {
336 1
                if (!asterisk)
337 1
                    deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
338 1
                return null;
339
            }
340 1
            return args[n++];
341
        }
342

343
        void errorMsg(const char* prefix, const char[] specifier, Expression arg, const char* texpect, Type tactual)
344
        {
345 1
            deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
346 1
                  prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
347
        }
348

349 1
        auto e = getNextArg();
350 1
        if (!e)
351 1
            return true;
352

353 1
        auto t = e.type.toBasetype();
354 1
        auto tnext = t.nextOf();
355 1
        const c_longsize = target.c.longsize;
356 1
        const is64bit = global.params.is64bit;
357

358 1
        final switch (fmt)
359
        {
360 1
            case Format.n:
361 1
            case Format.d:      // pointer to int
362 1
                if (!(t.ty == Tpointer && tnext.ty == Tint32))
363 1
                    errorMsg(null, slice, e, "int*", t);
364 1
                break;
365

366 1
            case Format.hhn:
367 1
            case Format.hhd:    // pointer to signed char
368 1
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
369 1
                    errorMsg(null, slice, e, "byte*", t);
370 1
                break;
371

372 0
            case Format.hn:
373 1
            case Format.hd:     // pointer to short
374 1
                if (!(t.ty == Tpointer && tnext.ty == Tint16))
375 1
                    errorMsg(null, slice, e, "short*", t);
376 1
                break;
377

378 0
            case Format.ln:
379 0
            case Format.ld:     // pointer to long int
380 0
                if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
381 0
                    errorMsg(null, slice, e, (c_longsize == 4 ? "int*" : "long*"), t);
382 0
                break;
383

384 0
            case Format.lln:
385 1
            case Format.lld:    // pointer to long long int
386 1
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
387 1
                    errorMsg(null, slice, e, "long*", t);
388 1
                break;
389

390 0
            case Format.jn:
391 1
            case Format.jd:     // pointer to intmax_t
392 1
                if (!(t.ty == Tpointer && tnext.ty == Tint64))
393 1
                    errorMsg(null, slice, e, "core.stdc.stdint.intmax_t*", t);
394 1
                break;
395

396 0
            case Format.zn:
397 1
            case Format.zd:     // pointer to size_t
398 1
                if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
399 1
                    errorMsg(null, slice, e, "size_t*", t);
400 1
                break;
401

402 0
            case Format.tn:
403 1
            case Format.td:     // pointer to ptrdiff_t
404 1
                if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tint64 : Tint32)))
405 1
                    errorMsg(null, slice, e, "ptrdiff_t*", t);
406 1
                break;
407

408 1
            case Format.u:      // pointer to unsigned int
409 1
                if (!(t.ty == Tpointer && tnext.ty == Tuns32))
410 1
                    errorMsg(null, slice, e, "uint*", t);
411 1
                break;
412

413 1
            case Format.hhu:    // pointer to unsigned char
414 1
                if (!(t.ty == Tpointer && tnext.ty == Tuns8))
415 1
                    errorMsg(null, slice, e, "ubyte*", t);
416 1
                break;
417

418 1
            case Format.hu:     // pointer to unsigned short int
419 1
                if (!(t.ty == Tpointer && tnext.ty == Tuns16))
420 1
                    errorMsg(null, slice, e, "ushort*", t);
421 1
                break;
422

423 0
            case Format.lu:     // pointer to unsigned long int
424 0
                if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
425 0
                    errorMsg(null, slice, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
426 0
                break;
427

428 1
            case Format.llu:    // pointer to unsigned long long int
429 1
                if (!(t.ty == Tpointer && tnext.ty == Tuns64))
430 1
                    errorMsg(null, slice, e, "ulong*", t);
431 1
                break;
432

433 1
            case Format.ju:     // pointer to uintmax_t
434 1
                if (!(t.ty == Tpointer && tnext.ty == (is64bit ? Tuns64 : Tuns32)))
435 1
                    errorMsg(null, slice, e, "ulong*", t);
436 1
                break;
437

438 1
            case Format.g:      // pointer to float
439 1
                if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
440 1
                    errorMsg(null, slice, e, "float*", t);
441 1
                break;
442 1
            case Format.lg:     // pointer to double
443 1
                if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
444 1
                    errorMsg(null, slice, e, "double*", t);
445 1
                break;
446 1
            case Format.Lg:     // pointer to long double
447 1
                if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
448 1
                    errorMsg(null, slice, e, "real*", t);
449 1
                break;
450

451 0
            case Format.c:
452 1
            case Format.s:      // pointer to char string
453 1
                if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
454 1
                    errorMsg(null, slice, e, "char*", t);
455 1
                break;
456

457 0
            case Format.lc:
458 1
            case Format.ls:     // pointer to wchar_t string
459 1
                const twchar_t = global.params.isWindows ? Twchar : Tdchar;
460 1
                if (!(t.ty == Tpointer && tnext.ty == twchar_t))
461 1
                    errorMsg(null, slice, e, "wchar_t*", t);
462 1
                break;
463

464 1
            case Format.p:      // double pointer
465 1
                if (!(t.ty == Tpointer && tnext.ty == Tpointer))
466 1
                    errorMsg(null, slice, e, "void**", t);
467 1
                break;
468

469 1
            case Format.error:
470 1
                deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
471 1
                break;
472

473 0
            case Format.percent:
474 0
                assert(0);
475
        }
476
    }
477 1
    return false;
478
}
479

480
private:
481

482
/**************************************
483
 * Parse the *format specifier* which is of the form:
484
 *
485
 * `%[*][width][length]specifier`
486
 *
487
 * Params:
488
 *      format = format string
489
 *      idx = index of `%` of start of format specifier,
490
 *          which gets updated to index past the end of it,
491
 *          even if `Format.error` is returned
492
 *      asterisk = set if there is a `*` sub-specifier
493
 * Returns:
494
 *      Format
495
 */
496
pure nothrow @safe
497
Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
498
        out bool asterisk)
499
{
500 1
    auto i = idx;
501 1
    assert(format[i] == '%');
502 1
    const length = format.length;
503

504
    Format error()
505
    {
506 1
        idx = i;
507 1
        return Format.error;
508
    }
509

510 1
    ++i;
511 1
    if (i == length)
512 0
        return error();
513

514 1
    if (format[i] == '%')
515
    {
516 0
        idx = i + 1;
517 0
        return Format.percent;
518
    }
519

520
    // * sub-specifier
521 1
    if (format[i] == '*')
522
    {
523 1
        ++i;
524 1
        if (i == length)
525 0
            return error();
526 1
        asterisk = true;
527
    }
528

529
    // fieldWidth
530 1
    while (isdigit(format[i]))
531
    {
532 1
        i++;
533 1
        if (i == length)
534 0
            return error();
535
    }
536

537
    /* Read the scanset
538
     * A scanset can be anything, so we just check that it is paired
539
     */
540 1
    if (format[i] == '[')
541
    {
542 1
        while (i < length)
543
        {
544 1
            if (format[i] == ']')
545 0
                break;
546 1
            ++i;
547
        }
548

549
        // no `]` found
550 1
        if (i == length)
551 1
            return error();
552

553 0
        ++i;
554
        // no specifier after `]`
555
        // it could be mixed with the one above, but then idx won't have the right index
556 0
        if (i == length)
557 0
            return error();
558
    }
559

560
    /* Read the specifier
561
     */
562 1
    char genSpec;
563 1
    Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
564 1
    if (specifier == Format.error)
565 1
        return error();
566

567 1
    idx = i;
568 1
    return specifier;  // success
569
}
570

571
/**************************************
572
 * Parse the *format specifier* which is of the form:
573
 *
574
 * `%[flags][field width][.precision][length modifier]specifier`
575
 *
576
 * Params:
577
 *      format = format string
578
 *      idx = index of `%` of start of format specifier,
579
 *          which gets updated to index past the end of it,
580
 *          even if `Format.error` is returned
581
 *      widthStar = set if * for width
582
 *      precisionStar = set if * for precision
583
 * Returns:
584
 *      Format
585
 */
586
pure nothrow @safe
587
Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
588
        out bool widthStar, out bool precisionStar)
589
{
590 1
    auto i = idx;
591 1
    assert(format[i] == '%');
592 1
    const length = format.length;
593 1
    bool hash;
594 1
    bool zero;
595 1
    bool flags;
596 1
    bool width;
597 1
    bool precision;
598

599
    Format error()
600
    {
601 1
        idx = i;
602 1
        return Format.error;
603
    }
604

605 1
    ++i;
606 1
    if (i == length)
607 0
        return error();
608

609 1
    if (format[i] == '%')
610
    {
611 1
        idx = i + 1;
612 1
        return Format.percent;
613
    }
614

615
    /* Read the `flags`
616
     */
617 1
    while (1)
618
    {
619 1
        const c = format[i];
620 1
        if (c == '-' ||
621 1
            c == '+' ||
622 1
            c == ' ')
623
        {
624 1
            flags = true;
625
        }
626 1
        else if (c == '#')
627
        {
628 1
            hash = true;
629
        }
630 1
        else if (c == '0')
631
        {
632 1
            zero = true;
633
        }
634
        else
635 1
            break;
636 1
        ++i;
637 1
        if (i == length)
638 0
            return error();
639
    }
640

641
    /* Read the `field width`
642
     */
643
    {
644 1
        const c = format[i];
645 1
        if (c == '*')
646
        {
647 1
            width = true;
648 1
            widthStar = true;
649 1
            ++i;
650 1
            if (i == length)
651 0
                return error();
652
        }
653 1
        else if ('1' <= c && c <= '9')
654
        {
655 1
            width = true;
656 1
            ++i;
657 1
            if (i == length)
658 0
                return error();
659 1
            while ('0' <= format[i] && format[i] <= '9')
660
            {
661 1
               ++i;
662 1
               if (i == length)
663 0
                    return error();
664
            }
665
        }
666
    }
667

668
    /* Read the `precision`
669
     */
670 1
    if (format[i] == '.')
671
    {
672 1
        precision = true;
673 1
        ++i;
674 1
        if (i == length)
675 0
            return error();
676 1
        const c = format[i];
677 1
        if (c == '*')
678
        {
679 1
            precisionStar = true;
680 1
            ++i;
681 1
            if (i == length)
682 0
                return error();
683
        }
684 1
        else if ('0' <= c && c <= '9')
685
        {
686 1
            ++i;
687 1
            if (i == length)
688 0
                return error();
689 1
            while ('0' <= format[i] && format[i] <= '9')
690
            {
691 1
               ++i;
692 1
               if (i == length)
693 0
                    return error();
694
            }
695
        }
696
    }
697

698
    /* Read the specifier
699
     */
700 1
    char genSpec;
701 1
    Format specifier = parseGenericFormatSpecifier(format, i, genSpec);
702 1
    if (specifier == Format.error)
703 1
        return error();
704

705 1
    switch (genSpec)
706
    {
707 1
        case 'c':
708 1
        case 's':
709 1
            if (hash || zero)
710 0
                return error();
711 1
            break;
712

713 1
        case 'd':
714 1
        case 'i':
715 1
            if (hash)
716 0
                return error();
717 1
            break;
718

719 1
        case 'n':
720 1
            if (hash || zero || precision || width || flags)
721 0
                return error();
722 1
            break;
723

724 1
        default:
725 1
            break;
726
    }
727

728 1
    idx = i;
729 1
    return specifier;  // success
730
}
731

732
/* Different kinds of formatting specifications, variations we don't
733
   care about are merged. (Like we don't care about the difference between
734
   f, e, g, a, etc.)
735

736
   For `scanf`, every format is a pointer.
737
 */
738
enum Format
739
{
740
    d,          // int
741
    hhd,        // signed char
742
    hd,         // short int
743
    ld,         // long int
744
    lld,        // long long int
745
    jd,         // intmax_t
746
    zd,         // size_t
747
    td,         // ptrdiff_t
748
    u,          // unsigned int
749
    hhu,        // unsigned char
750
    hu,         // unsigned short int
751
    lu,         // unsigned long int
752
    llu,        // unsigned long long int
753
    ju,         // uintmax_t
754
    g,          // float (scanf) / double (printf)
755
    lg,         // double (scanf)
756
    Lg,         // long double (both)
757
    s,          // char string (both)
758
    ls,         // wchar_t string (both)
759
    c,          // char (printf)
760
    lc,         // wint_t (printf)
761
    p,          // pointer
762
    n,          // pointer to int
763
    hhn,        // pointer to signed char
764
    hn,         // pointer to short
765
    ln,         // pointer to long int
766
    lln,        // pointer to long long int
767
    jn,         // pointer to intmax_t
768
    zn,         // pointer to size_t
769
    tn,         // pointer to ptrdiff_t
770
    percent,    // %% (i.e. no argument)
771
    error,      // invalid format specification
772
}
773

774
/**************************************
775
 * Parse the *length specifier* and the *specifier* of the following form:
776
 * `[length]specifier`
777
 *
778
 * Params:
779
 *      format = format string
780
 *      idx = index of of start of format specifier,
781
 *          which gets updated to index past the end of it,
782
 *          even if `Format.error` is returned
783
 *      genSpecifier = Generic specifier. For instance, it will be set to `d` if the
784
 *           format is `hdd`.
785
 * Returns:
786
 *      Format
787
 */
788
pure @safe nothrow
789
Format parseGenericFormatSpecifier(scope const char[] format,
790
    ref size_t idx, out char genSpecifier)
791
{
792 1
    const length = format.length;
793

794
    /* Read the `length modifier`
795
     */
796 1
    const lm = format[idx];
797 1
    bool lm1;        // if jztL
798 1
    bool lm2;        // if `hh` or `ll`
799 1
    if (lm == 'j' ||
800 1
        lm == 'z' ||
801 1
        lm == 't' ||
802 1
        lm == 'L')
803
    {
804 1
        ++idx;
805 1
        if (idx == length)
806 0
            return Format.error;
807 1
        lm1 = true;
808
    }
809 1
    else if (lm == 'h' || lm == 'l')
810
    {
811 1
        ++idx;
812 1
        if (idx == length)
813 0
            return Format.error;
814 1
        lm2 = lm == format[idx];
815 1
        if (lm2)
816
        {
817 1
            ++idx;
818 1
            if (idx == length)
819 0
                return Format.error;
820
        }
821
    }
822

823
    /* Read the `specifier`
824
     */
825 1
    Format specifier;
826 1
    const sc = format[idx];
827 1
    genSpecifier = sc;
828 1
    switch (sc)
829
    {
830 1
        case 'd':
831 1
        case 'i':
832 1
            if (lm == 'L')
833 1
                specifier = Format.error;
834
            else
835 1
                specifier = lm == 'h' && lm2 ? Format.hhd :
836 1
                            lm == 'h'        ? Format.hd  :
837 1
                            lm == 'l' && lm2 ? Format.lld :
838 1
                            lm == 'l'        ? Format.ld  :
839 1
                            lm == 'j'        ? Format.jd  :
840 1
                            lm == 'z'        ? Format.zd  :
841 1
                            lm == 't'        ? Format.td  :
842 1
                                               Format.d;
843 1
            break;
844

845 1
        case 'u':
846 1
        case 'o':
847 1
        case 'x':
848 1
        case 'X':
849 1
            if (lm == 'L')
850 0
                specifier = Format.error;
851
            else
852 1
                specifier = lm == 'h' && lm2 ? Format.hhu :
853 1
                            lm == 'h'        ? Format.hu  :
854 1
                            lm == 'l' && lm2 ? Format.llu :
855 1
                            lm == 'l'        ? Format.lu  :
856 1
                            lm == 'j'        ? Format.ju  :
857 1
                            lm == 'z'        ? Format.zd  :
858 1
                            lm == 't'        ? Format.td  :
859 1
                                               Format.u;
860 1
            break;
861

862 1
        case 'f':
863 1
        case 'F':
864 1
        case 'e':
865 1
        case 'E':
866 1
        case 'g':
867 1
        case 'G':
868 1
        case 'a':
869 1
        case 'A':
870 1
            if (lm == 'L')
871 1
                specifier = Format.Lg;
872 1
            else if (lm1 || lm2 || lm == 'h')
873 0
                specifier = Format.error;
874
            else
875 1
                specifier = lm == 'l' ? Format.lg : Format.g;
876 1
            break;
877

878 1
        case 'c':
879 1
            if (lm1 || lm2 || lm == 'h')
880 0
                specifier = Format.error;
881
            else
882 1
                specifier = lm == 'l' ? Format.lc : Format.c;
883 1
            break;
884

885 1
        case 's':
886 1
            if (lm1 || lm2 || lm == 'h')
887 0
                specifier = Format.error;
888
            else
889 1
                specifier = lm == 'l' ? Format.ls : Format.s;
890 1
            break;
891

892 1
        case 'p':
893 1
            if (lm1 || lm2 || lm == 'h' || lm == 'l')
894 0
                specifier = Format.error;
895
            else
896 1
                specifier = Format.p;
897 1
            break;
898

899 1
        case 'n':
900 1
            if (lm == 'L')
901 0
                specifier = Format.error;
902
            else
903 1
                specifier = lm == 'l' && lm2 ? Format.lln :
904 1
                            lm == 'l'        ? Format.ln  :
905 1
                            lm == 'h' && lm2 ? Format.hhn :
906 1
                            lm == 'h'        ? Format.hn  :
907 1
                            lm == 'j'        ? Format.jn  :
908 1
                            lm == 'z'        ? Format.zn  :
909 1
                            lm == 't'        ? Format.tn  :
910 1
                                               Format.n;
911 1
            break;
912

913 1
        default:
914 1
            specifier = Format.error;
915 1
            break;
916
    }
917

918 1
    ++idx;
919 1
    return specifier; // success
920
}
921

922
unittest
923
{
924
    /* parseGenericFormatSpecifier
925
     */
926

927
    char genSpecifier;
928
    size_t idx;
929

930
    assert(parseGenericFormatSpecifier("hhd", idx, genSpecifier) == Format.hhd);
931
    assert(genSpecifier == 'd');
932

933
    idx = 0;
934
    assert(parseGenericFormatSpecifier("hn", idx, genSpecifier) == Format.hn);
935
    assert(genSpecifier == 'n');
936

937
    idx = 0;
938
    assert(parseGenericFormatSpecifier("ji", idx, genSpecifier) == Format.jd);
939
    assert(genSpecifier == 'i');
940

941
    idx = 0;
942
    assert(parseGenericFormatSpecifier("lu", idx, genSpecifier) == Format.lu);
943
    assert(genSpecifier == 'u');
944

945
    idx = 0;
946
    assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
947

948
    /* parsePrintfFormatSpecifier
949
     */
950

951
     bool widthStar;
952
     bool precisionStar;
953

954
     // one for each Format
955
     idx = 0;
956
     assert(parsePrintfFormatSpecifier("%d", idx, widthStar, precisionStar) == Format.d);
957
     assert(idx == 2);
958
     assert(!widthStar && !precisionStar);
959

960
     idx = 0;
961
     assert(parsePrintfFormatSpecifier("%ld", idx, widthStar, precisionStar) == Format.ld);
962
     assert(idx == 3);
963

964
     idx = 0;
965
     assert(parsePrintfFormatSpecifier("%lld", idx, widthStar, precisionStar) == Format.lld);
966
     assert(idx == 4);
967

968
     idx = 0;
969
     assert(parsePrintfFormatSpecifier("%jd", idx, widthStar, precisionStar) == Format.jd);
970
     assert(idx == 3);
971

972
     idx = 0;
973
     assert(parsePrintfFormatSpecifier("%zd", idx, widthStar, precisionStar) == Format.zd);
974
     assert(idx == 3);
975

976
     idx = 0;
977
     assert(parsePrintfFormatSpecifier("%td", idx, widthStar, precisionStar) == Format.td);
978
     assert(idx == 3);
979

980
     idx = 0;
981
     assert(parsePrintfFormatSpecifier("%g", idx, widthStar, precisionStar) == Format.g);
982
     assert(idx == 2);
983

984
     idx = 0;
985
     assert(parsePrintfFormatSpecifier("%Lg", idx, widthStar, precisionStar) == Format.Lg);
986
     assert(idx == 3);
987

988
     idx = 0;
989
     assert(parsePrintfFormatSpecifier("%p", idx, widthStar, precisionStar) == Format.p);
990
     assert(idx == 2);
991

992
     idx = 0;
993
     assert(parsePrintfFormatSpecifier("%n", idx, widthStar, precisionStar) == Format.n);
994
     assert(idx == 2);
995

996
     idx = 0;
997
     assert(parsePrintfFormatSpecifier("%ln", idx, widthStar, precisionStar) == Format.ln);
998
     assert(idx == 3);
999

1000
     idx = 0;
1001
     assert(parsePrintfFormatSpecifier("%lln", idx, widthStar, precisionStar) == Format.lln);
1002
     assert(idx == 4);
1003

1004
     idx = 0;
1005
     assert(parsePrintfFormatSpecifier("%hn", idx, widthStar, precisionStar) == Format.hn);
1006
     assert(idx == 3);
1007

1008
     idx = 0;
1009
     assert(parsePrintfFormatSpecifier("%hhn", idx, widthStar, precisionStar) == Format.hhn);
1010
     assert(idx == 4);
1011

1012
     idx = 0;
1013
     assert(parsePrintfFormatSpecifier("%jn", idx, widthStar, precisionStar) == Format.jn);
1014
     assert(idx == 3);
1015

1016
     idx = 0;
1017
     assert(parsePrintfFormatSpecifier("%zn", idx, widthStar, precisionStar) == Format.zn);
1018
     assert(idx == 3);
1019

1020
     idx = 0;
1021
     assert(parsePrintfFormatSpecifier("%tn", idx, widthStar, precisionStar) == Format.tn);
1022
     assert(idx == 3);
1023

1024
     idx = 0;
1025
     assert(parsePrintfFormatSpecifier("%c", idx, widthStar, precisionStar) == Format.c);
1026
     assert(idx == 2);
1027

1028
     idx = 0;
1029
     assert(parsePrintfFormatSpecifier("%lc", idx, widthStar, precisionStar) == Format.lc);
1030
     assert(idx == 3);
1031

1032
     idx = 0;
1033
     assert(parsePrintfFormatSpecifier("%s", idx, widthStar, precisionStar) == Format.s);
1034
     assert(idx == 2);
1035

1036
     idx = 0;
1037
     assert(parsePrintfFormatSpecifier("%ls", idx, widthStar, precisionStar) == Format.ls);
1038
     assert(idx == 3);
1039

1040
     idx = 0;
1041
     assert(parsePrintfFormatSpecifier("%%", idx, widthStar, precisionStar) == Format.percent);
1042
     assert(idx == 2);
1043

1044
     // Synonyms
1045
     idx = 0;
1046
     assert(parsePrintfFormatSpecifier("%i", idx, widthStar, precisionStar) == Format.d);
1047
     assert(idx == 2);
1048

1049
     idx = 0;
1050
     assert(parsePrintfFormatSpecifier("%u", idx, widthStar, precisionStar) == Format.u);
1051
     assert(idx == 2);
1052

1053
     idx = 0;
1054
     assert(parsePrintfFormatSpecifier("%o", idx, widthStar, precisionStar) == Format.u);
1055
     assert(idx == 2);
1056

1057
     idx = 0;
1058
     assert(parsePrintfFormatSpecifier("%x", idx, widthStar, precisionStar) == Format.u);
1059
     assert(idx == 2);
1060

1061
     idx = 0;
1062
     assert(parsePrintfFormatSpecifier("%X", idx, widthStar, precisionStar) == Format.u);
1063
     assert(idx == 2);
1064

1065
     idx = 0;
1066
     assert(parsePrintfFormatSpecifier("%f", idx, widthStar, precisionStar) == Format.g);
1067
     assert(idx == 2);
1068

1069
     idx = 0;
1070
     assert(parsePrintfFormatSpecifier("%F", idx, widthStar, precisionStar) == Format.g);
1071
     assert(idx == 2);
1072

1073
     idx = 0;
1074
     assert(parsePrintfFormatSpecifier("%G", idx, widthStar, precisionStar) == Format.g);
1075
     assert(idx == 2);
1076

1077
     idx = 0;
1078
     assert(parsePrintfFormatSpecifier("%a", idx, widthStar, precisionStar) == Format.g);
1079
     assert(idx == 2);
1080

1081
     idx = 0;
1082
     assert(parsePrintfFormatSpecifier("%A", idx, widthStar, precisionStar) == Format.g);
1083
     assert(idx == 2);
1084

1085
     idx = 0;
1086
     assert(parsePrintfFormatSpecifier("%lg", idx, widthStar, precisionStar) == Format.lg);
1087
     assert(idx == 3);
1088

1089
     // width, precision
1090
     idx = 0;
1091
     assert(parsePrintfFormatSpecifier("%*d", idx, widthStar, precisionStar) == Format.d);
1092
     assert(idx == 3);
1093
     assert(widthStar && !precisionStar);
1094

1095
     idx = 0;
1096
     assert(parsePrintfFormatSpecifier("%.*d", idx, widthStar, precisionStar) == Format.d);
1097
     assert(idx == 4);
1098
     assert(!widthStar && precisionStar);
1099

1100
     idx = 0;
1101
     assert(parsePrintfFormatSpecifier("%*.*d", idx, widthStar, precisionStar) == Format.d);
1102
     assert(idx == 5);
1103
     assert(widthStar && precisionStar);
1104

1105
     // Too short formats
1106
     {
1107
         foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1108
                      "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1109
         {
1110
             idx = 0;
1111
             assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1112
             assert(idx == s.length);
1113
         }
1114
     }
1115

1116
     // Undefined format combinations
1117
     {
1118
         foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1119
                      "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1120
                      "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1121
                      "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1122
                      "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1123
         {
1124
             idx = 0;
1125
             assert(parsePrintfFormatSpecifier(s, idx, widthStar, precisionStar) == Format.error);
1126
             import std.stdio;
1127
             assert(idx == s.length);
1128
         }
1129
     }
1130

1131
    /* parseScanfFormatSpecifier
1132
     */
1133

1134
    bool asterisk;
1135

1136
    // one for each Format
1137
    idx = 0;
1138
    assert(parseScanfFormatSpecifier("%d", idx, asterisk) == Format.d);
1139
    assert(idx == 2);
1140
    assert(!asterisk);
1141

1142
    idx = 0;
1143
    assert(parseScanfFormatSpecifier("%hhd", idx, asterisk) == Format.hhd);
1144
    assert(idx == 4);
1145

1146
    idx = 0;
1147
    assert(parseScanfFormatSpecifier("%hd", idx, asterisk) == Format.hd);
1148
    assert(idx == 3);
1149

1150
    idx = 0;
1151
    assert(parseScanfFormatSpecifier("%ld", idx, asterisk) == Format.ld);
1152
    assert(idx == 3);
1153

1154
    idx = 0;
1155
    assert(parseScanfFormatSpecifier("%lld", idx, asterisk) == Format.lld);
1156
    assert(idx == 4);
1157

1158
    idx = 0;
1159
    assert(parseScanfFormatSpecifier("%jd", idx, asterisk) == Format.jd);
1160
    assert(idx == 3);
1161

1162
    idx = 0;
1163
    assert(parseScanfFormatSpecifier("%zd", idx, asterisk) == Format.zd);
1164
    assert(idx == 3);
1165

1166
    idx = 0;
1167
    assert(parseScanfFormatSpecifier("%td", idx, asterisk,) == Format.td);
1168
    assert(idx == 3);
1169

1170
    idx = 0;
1171
    assert(parseScanfFormatSpecifier("%u", idx, asterisk) == Format.u);
1172
    assert(idx == 2);
1173

1174
    idx = 0;
1175
    assert(parseScanfFormatSpecifier("%hhu", idx, asterisk,) == Format.hhu);
1176
    assert(idx == 4);
1177

1178
    idx = 0;
1179
    assert(parseScanfFormatSpecifier("%hu", idx, asterisk) == Format.hu);
1180
    assert(idx == 3);
1181

1182
    idx = 0;
1183
    assert(parseScanfFormatSpecifier("%lu", idx, asterisk) == Format.lu);
1184
    assert(idx == 3);
1185

1186
    idx = 0;
1187
    assert(parseScanfFormatSpecifier("%llu", idx, asterisk) == Format.llu);
1188
    assert(idx == 4);
1189

1190
    idx = 0;
1191
    assert(parseScanfFormatSpecifier("%ju", idx, asterisk) == Format.ju);
1192
    assert(idx == 3);
1193

1194
    idx = 0;
1195
    assert(parseScanfFormatSpecifier("%g", idx, asterisk) == Format.g);
1196
    assert(idx == 2);
1197

1198
    idx = 0;
1199
    assert(parseScanfFormatSpecifier("%lg", idx, asterisk) == Format.lg);
1200
    assert(idx == 3);
1201

1202
    idx = 0;
1203
    assert(parseScanfFormatSpecifier("%Lg", idx, asterisk) == Format.Lg);
1204
    assert(idx == 3);
1205

1206
    idx = 0;
1207
    assert(parseScanfFormatSpecifier("%p", idx, asterisk) == Format.p);
1208
    assert(idx == 2);
1209

1210
    idx = 0;
1211
    assert(parseScanfFormatSpecifier("%s", idx, asterisk) == Format.s);
1212
    assert(idx == 2);
1213

1214
    idx = 0;
1215
    assert(parseScanfFormatSpecifier("%ls", idx, asterisk,) == Format.ls);
1216
    assert(idx == 3);
1217

1218
    idx = 0;
1219
    assert(parseScanfFormatSpecifier("%%", idx, asterisk) == Format.percent);
1220
    assert(idx == 2);
1221

1222
    // Synonyms
1223
    idx = 0;
1224
    assert(parseScanfFormatSpecifier("%i", idx, asterisk) == Format.d);
1225
    assert(idx == 2);
1226

1227
    idx = 0;
1228
    assert(parseScanfFormatSpecifier("%n", idx, asterisk) == Format.n);
1229
    assert(idx == 2);
1230

1231
    idx = 0;
1232
    assert(parseScanfFormatSpecifier("%o", idx, asterisk) == Format.u);
1233
    assert(idx == 2);
1234

1235
    idx = 0;
1236
    assert(parseScanfFormatSpecifier("%x", idx, asterisk) == Format.u);
1237
    assert(idx == 2);
1238

1239
    idx = 0;
1240
    assert(parseScanfFormatSpecifier("%f", idx, asterisk) == Format.g);
1241
    assert(idx == 2);
1242

1243
    idx = 0;
1244
    assert(parseScanfFormatSpecifier("%e", idx, asterisk) == Format.g);
1245
    assert(idx == 2);
1246

1247
    idx = 0;
1248
    assert(parseScanfFormatSpecifier("%a", idx, asterisk) == Format.g);
1249
    assert(idx == 2);
1250

1251
    idx = 0;
1252
    assert(parseScanfFormatSpecifier("%c", idx, asterisk) == Format.c);
1253
    assert(idx == 2);
1254

1255
    // asterisk
1256
    idx = 0;
1257
    assert(parseScanfFormatSpecifier("%*d", idx, asterisk) == Format.d);
1258
    assert(idx == 3);
1259
    assert(asterisk);
1260

1261
    idx = 0;
1262
    assert(parseScanfFormatSpecifier("%9ld", idx, asterisk) == Format.ld);
1263
    assert(idx == 4);
1264
    assert(!asterisk);
1265

1266
    idx = 0;
1267
    assert(parseScanfFormatSpecifier("%*25984hhd", idx, asterisk) == Format.hhd);
1268
    assert(idx == 10);
1269
    assert(asterisk);
1270

1271
    // scansets
1272
    idx = 0;
1273
    assert(parseScanfFormatSpecifier("%[a-zA-Z]s", idx, asterisk) == Format.s);
1274
    assert(idx == 10);
1275
    assert(!asterisk);
1276

1277
    idx = 0;
1278
    assert(parseScanfFormatSpecifier("%*25[a-z]hhd", idx, asterisk) == Format.hhd);
1279
    assert(idx == 12);
1280
    assert(asterisk);
1281

1282
    // Too short formats
1283
    foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1284
                 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1285
    {
1286
        idx = 0;
1287
        assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1288
        assert(idx == s.length);
1289
    }
1290

1291

1292
    // Undefined format combinations
1293
    foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1294
                 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1295
                 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1296
                 "%-", "%+", "%#", "%0", "%.", "%Ln"])
1297
    {
1298
        idx = 0;
1299
        assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1300
        assert(idx == s.length);
1301

1302
    }
1303

1304
    // Invalid scansets
1305
    foreach (s; ["%[]", "%[s", "%[0-9lld", "%[", "%[a-z]"])
1306
    {
1307
        idx = 0;
1308
        assert(parseScanfFormatSpecifier(s, idx, asterisk) == Format.error);
1309
        assert(idx == s.length);
1310
    }
1311

1312
}

Read our documentation on viewing source code .

Loading