1
/**
2
Command line tool that prints TSV data aligned for easier reading on consoles
3
and traditional command-line environments.
4

5
Copyright (c) 2017-2020, eBay Inc.
6
Initially written by Jon Degenhardt
7

8
License: Boost License 1.0 (http://boost.org/LICENSE_1_0.txt)
9
*/
10
module tsv_utils.tsv_pretty;
11

12
import std.exception : enforce;
13
import std.range;
14
import std.stdio;
15
import std.typecons : Flag, Yes, No, tuple;
16

17
static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
18

19
version(unittest)
20
{
21
    // When running unit tests, use main from -main compiler switch.
22
}
23
else
24
{
25
    /** Main program. Invokes command line arg processing and tsv-pretty to perform
26
     * the real work. Any errors are caught and reported.
27
     */
28
    int main(string[] cmdArgs)
29
    {
30
        /* When running in DMD code coverage mode, turn on report merging. */
31
        version(D_Coverage) version(DigitalMars)
32
        {
33
            import core.runtime : dmd_coverSetMerge;
34 16
            dmd_coverSetMerge(true);
35
        }
36

37 16
        TsvPrettyOptions options;
38 16
        auto r = options.processArgs(cmdArgs);
39 16
        if (!r[0]) return r[1];
40 16
        try tsvPretty(options, cmdArgs[1 .. $]);
41
        catch (Exception exc)
42
        {
43 16
            stderr.writefln("Error [%s]: %s", options.programName, exc.msg);
44 16
            return 1;
45
        }
46 16
        return 0;
47
    }
48
}
49

50
auto helpTextVerbose = q"EOS
51
Synopsis: tsv-pretty [options] [file...]
52

53
tsv-pretty outputs TSV data in a format intended to be more human readable when
54
working on the command line. This is done primarily by lining up data into
55
fixed-width columns. Text is left aligned, numbers are right aligned. Floating
56
points numbers are aligned on the decimal point when feasible.
57

58
Processing begins by reading the initial set of lines into memory to determine
59
the field widths and data types of each column. This look-ahead buffer is used
60
for header detection as well. Output begins after this processing is complete.
61

62
By default, only the alignment is changed, the actual values are not modified.
63
Several of the formatting options do modify the values.
64

65
Features:
66

67
* Floating point numbers: Floats can be printed in fixed-width precision, using
68
  the same precision for all floats in a column. This makes then line up nicely.
69
  Precision is determined by values seen during look-ahead processing. The max
70
  precision defaults to 9, this can be changed when smaller or larger values are
71
  desired. See the '--f|format-floats' and '--p|precision' options.
72

73
* Header lines: Headers are detected automatically when possible. This can be
74
  overridden when automatic detection doesn't work as desired. Headers can be
75
  underlined and repeated at regular intervals.
76

77
* Missing values: A substitute value can be used for empty fields. This is often
78
  less confusing than spaces. See '--e|replace-empty' and '--E|empty-replacement'.
79

80
* Exponential notion: As part float formatting, '--f|format-floats' re-formats
81
  columns where exponential notation is found so all the values in the column
82
  are displayed using exponential notation with the same precision.
83

84
* Preamble: A number of initial lines can be designated as a preamble and output
85
  unchanged. The preamble is before the header, if a header is present. Preamble
86
  lines can be auto-detected via the heuristic that they lack field delimiters.
87
  This works well when the field delimiter is a TAB.
88

89
* Fonts: Fixed-width fonts are assumed. CJK characters are assumed to be double
90
  width. This is not always correct, but works well in most cases.
91

92
Options:
93
EOS";
94

95
auto helpText = q"EOS
96
Synopsis: tsv-pretty [options] [file...]
97

98
tsv-pretty outputs TSV data in a more human readable format. This is done by lining
99
up data into fixed-width columns. Text is left aligned, numbers are right aligned.
100
Floating points numbers are aligned on the decimal point when feasible.
101

102
Options:
103
EOS";
104

105
/** TsvPrettyOptions is used to process and store command line options. */
106
struct TsvPrettyOptions
107
{
108
    string programName;
109
    bool helpVerbose = false;           // --help-verbose
110
    bool hasHeader = false;             // --H|header (Note: Default false assumed by validation code)
111
    bool autoDetectHeader = true;       // Derived (Note: Default true assumed by validation code)
112
    bool noHeader = false;              // --x|no-header (Note: Default false assumed by validation code)
113
    size_t lookahead = 1000;            // --l|lookahead
114
    size_t repeatHeader = 0;            // --r|repeat-header num (zero means no repeat)
115
    bool underlineHeader = false;       // --u|underline-header
116
    bool formatFloats = false;          // --f|format-floats
117
    size_t floatPrecision = 9;          // --p|precision num (max precision when formatting floats.)
118
    bool replaceEmpty = false;          // --e|replace-empty
119
    string emptyReplacement = "";       // --E|empty-replacement
120
    size_t emptyReplacementPrintWidth = 0;    // Derived
121
    char delim = '\t';                  // --d|delimiter
122
    size_t spaceBetweenFields = 2;      // --s|space-between-fields num
123
    size_t maxFieldPrintWidth = 40;     // --m|max-text-width num; Max width for variable width text fields.
124
    bool autoDetectPreamble = false;    // --a|auto-preamble
125
    size_t preambleLines = 0;           // --b|preamble; Number of preamble lines.
126
    bool versionWanted = false;         // --V|version
127

128
    /* Returns a tuple. First value is true if command line arguments were successfully
129
     * processed and execution should continue, or false if an error occurred or the user
130
     * asked for help. If false, the second value is the appropriate exit code (0 or 1).
131
     *
132
     * Returning true (execution continues) means args have been validated and derived
133
     * values calculated. In addition, field indices have been converted to zero-based.
134
     * If the whole line is the key, the individual fields list will be cleared.
135
     */
136
    auto processArgs (ref string[] cmdArgs)
137
    {
138
        import std.algorithm : any, each;
139
        import std.getopt;
140
        import std.path : baseName, stripExtension;
141

142 16
        programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
143

144
        try
145
        {
146 16
            arraySep = ",";    // Use comma to separate values in command line options
147 16
            auto r = getopt(
148
                cmdArgs,
149
                "help-verbose",           "       Print full help.", &helpVerbose,
150
                std.getopt.config.caseSensitive,
151
                "H|header",               "       Treat the first line of each file as a header.", &hasHeader,
152
                std.getopt.config.caseInsensitive,
153
                "x|no-header",            "       Assume no header. Turns off automatic header detection.", &noHeader,
154
                "l|lookahead",            "NUM    Lines to read to interpret data before generating output. Default: 1000", &lookahead,
155

156
                "r|repeat-header",        "NUM    Lines to print before repeating the header. Default: No repeating header", &repeatHeader,
157

158
                "u|underline-header",     "       Underline the header.", &underlineHeader,
159
                "f|format-floats",        "       Format floats for better readability. Default: No", &formatFloats,
160
                "p|precision",            "NUM    Max floating point precision. Implies --format-floats. Default: 9", &floatPrecisionOptionHandler,
161
                std.getopt.config.caseSensitive,
162
                "e|replace-empty",        "       Replace empty fields with '--'.", &replaceEmpty,
163
                "E|empty-replacement",    "STR    Replace empty fields with a string.", &emptyReplacement,
164
                std.getopt.config.caseInsensitive,
165
                "d|delimiter",            "CHR    Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
166
                "s|space-between-fields", "NUM    Spaces between each field (Default: 2)", &spaceBetweenFields,
167
                "m|max-text-width",       "NUM    Max reserved field width for variable width text fields. Default: 40", &maxFieldPrintWidth,
168
                "a|auto-preamble",        "       Treat initial lines in a file as a preamble if the line contains no field delimiters.", &autoDetectPreamble,
169
                "b|preamble",             "NUM    Treat the first NUM lines as a preamble and output them unchanged.", &preambleLines,
170
                std.getopt.config.caseSensitive,
171
                "V|version",              "       Print version information and exit.", &versionWanted,
172
                std.getopt.config.caseInsensitive,
173
                );
174

175 16
            if (r.helpWanted)
176
            {
177 16
                defaultGetoptPrinter(helpText, r.options);
178 16
                return tuple(false, 0);
179
            }
180 16
            else if (helpVerbose)
181
            {
182 16
                defaultGetoptPrinter(helpTextVerbose, r.options);
183 16
                return tuple(false, 0);
184
            }
185 16
            else if (versionWanted)
186
            {
187
                import tsv_utils.common.tsvutils_version;
188 16
                writeln(tsvutilsVersionNotice("tsv-pretty"));
189 16
                return tuple(false, 0);
190
            }
191

192
            /* Validation and derivations. */
193 16
            enforce(!(noHeader && hasHeader),
194 16
                    "Cannot specify both --H|header and --x|no-header.");
195

196 16
            if (noHeader || hasHeader) autoDetectHeader = false;
197

198
            /* Zero look-ahead has limited utility unless the first line is known to
199
             * be a header. Good chance the user will get an unintended behavior.
200
             */
201 16
            if (lookahead == 0 && autoDetectHeader)
202
            {
203 16
                enforce(noHeader || hasHeader,
204 16
                        "Cannot auto-detect header with zero look-ahead. Specify either '--H|header' or '--x|no-header' when using '--l|lookahead 0'.");
205
            }
206

207 16
            enforce(!(autoDetectPreamble && preambleLines != 0),
208 16
                    "Do not use '--b|preamble NUM' and '--a|auto-preamble' together. ('--b|preamble 0' is okay.)");
209

210 16
            if (emptyReplacement.length != 0) replaceEmpty = true;
211 16
            else if (replaceEmpty) emptyReplacement = "--";
212

213 16
            if (emptyReplacement.length != 0)
214
            {
215 16
                emptyReplacementPrintWidth = emptyReplacement.monospacePrintWidth;
216
            }
217
        }
218
        catch (Exception exc)
219
        {
220 16
            stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
221 16
            return tuple(false, 1);
222
        }
223 16
        return tuple(true, 0);
224
    }
225

226
    /* Option handler for --p|precision. It also sets --f|format-floats. */
227
    private void floatPrecisionOptionHandler(string option, string optionVal) @safe pure
228
    {
229
        import std.conv : to;
230 16
        floatPrecision = optionVal.to!size_t;
231 16
        formatFloats = true;
232
    }
233
}
234

235
/** tsvPretty is the main loop, operating on input files and passing control to a
236
 * TSVPrettyProccessor instance.
237
 *
238
 * This separates physical I/O sources and sinks from the underlying processing
239
 * algorithm, which operates on generic ranges. A lockingTextWriter is created and
240
 * released on every input line. This has effect flushing standard output every line,
241
 * desirable in command line tools.
242
 *
243
 * This routine also handles identification of preamble lines. This is mostly for
244
 * simplification of the TsvPrettyProcessor code.
245
 */
246
void tsvPretty(const ref TsvPrettyOptions options, const string[] files)
247
{
248
    import std.algorithm : canFind;
249

250 16
    auto firstNonPreambleLine = options.preambleLines + 1;
251 16
    auto tpp = TsvPrettyProcessor(options);
252 16
    foreach (filename; (files.length > 0) ? files : ["-"])
253
    {
254 16
        bool autoDetectPreambleDone = false;
255 16
        auto inputStream = (filename == "-") ? stdin : filename.File();
256 16
        foreach (lineNum, line; inputStream.byLine.enumerate(1))
257
        {
258 16
            bool isPreambleLine = false;
259 16
            bool isFirstNonPreambleLine = false;
260

261 16
            if (options.autoDetectPreamble)
262
            {
263 16
                if (!autoDetectPreambleDone)
264
                {
265 16
                    if (line.canFind(options.delim))
266
                    {
267 16
                        autoDetectPreambleDone = true;
268 16
                        isFirstNonPreambleLine = true;
269
                    }
270
                    else
271
                    {
272 16
                        isPreambleLine = true;
273
                    }
274
                }
275
            }
276 16
            else if (lineNum < firstNonPreambleLine)
277
            {
278 16
                isPreambleLine = true;
279
            }
280 16
            else if (lineNum == firstNonPreambleLine)
281
            {
282 16
                isFirstNonPreambleLine = true;
283
            }
284

285

286 16
            if (isPreambleLine)
287
            {
288 16
                tpp.processPreambleLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
289
            }
290 16
            else if (isFirstNonPreambleLine)
291
            {
292 16
                tpp.processFileFirstLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
293
            }
294
            else
295
            {
296 16
                tpp.processLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
297
            }
298
        }
299
    }
300 16
    tpp.finish(outputRangeObject!(char, char[])(stdout.lockingTextWriter));
301
}
302

303
/** TsvPrettyProcessor maintains state of processing and exposes operations for
304
 * processing individual input lines.
305
 *
306
 * TsvPrettyProcessor knows that input is file-based, but doesn't deal with actual
307
 * files or reading lines from input. That is the job of the caller. Output is
308
 * written to an output range. The caller is expected to pass each line to in the
309
 * order received, that is an assumption built-into the its processing.
310
 *
311
 * In addition to the constructor, there are four API methods:
312
 *  - processPreambleLine - Called to process a preamble line occurring before
313
 *    the header line or first line of data.
314
 *  - processFileFirstLine - Called to process the first line of each file. This
315
 *    enables header processing.
316
 *  - processLine - Called to process all lines except for the first line a file.
317
 *  - finish - Called at the end of all processing. This is needed in case the
318
 *    look-ahead cache is still being filled when input terminates.
319
 */
320

321
struct TsvPrettyProcessor
322
{
323
    import std.array : appender;
324

325
private:
326
    private enum AutoDetectHeaderResult { none, hasHeader, noHeader };
327

328
    private TsvPrettyOptions _options;
329
    private size_t _fileCount = 0;
330
    private size_t _dataLineOutputCount = 0;
331
    private bool _stillCaching = true;
332
    private string _candidateHeaderLine;
333
    private auto _lookaheadCache = appender!(string[])();
334
    private FieldFormat[] _fieldVector;
335
    private AutoDetectHeaderResult _autoDetectHeaderResult = AutoDetectHeaderResult.none;
336

337
    /** Constructor. */
338 16
    this(const TsvPrettyOptions options) @safe pure nothrow @nogc
339
    {
340 16
        _options = options;
341 16
        if (options.noHeader && options.lookahead == 0) _stillCaching = false;
342
    }
343

344
    invariant
345
    {
346 16
        assert(_options.hasHeader || _options.noHeader || _options.autoDetectHeader);
347 16
        assert((_options.lookahead == 0 && _lookaheadCache.data.length == 0) ||
348 16
               _lookaheadCache.data.length < _options.lookahead);
349
    }
350

351
    /** Called to process a preamble line occurring before the header line or first
352
     * line of data.
353
     */
354
    void processPreambleLine(OutputRange!char outputStream, const char[] line)
355
    {
356 16
        if (_fileCount == 0)
357
        {
358 16
            put(outputStream, line);
359 16
            put(outputStream, '\n');
360
        }
361
    }
362

363
    /** Called to process the first line of each file. This enables header processing. */
364
    void processFileFirstLine(OutputRange!char outputStream, const char[] line)
365
    {
366
        import std.conv : to;
367

368 16
        _fileCount++;
369

370 16
        if (_options.noHeader)
371
        {
372 16
            processLine(outputStream, line);
373
        }
374 16
        else if (_options.hasHeader)
375
        {
376 16
            if (_fileCount == 1)
377
            {
378 16
                setHeaderLine(line);
379 16
                if (_options.lookahead == 0) outputLookaheadCache(outputStream);
380
            }
381
        }
382
        else
383
        {
384 16
            assert(_options.autoDetectHeader);
385

386 16
            final switch (_autoDetectHeaderResult)
387
            {
388 16
            case AutoDetectHeaderResult.noHeader:
389 16
                assert(_fileCount > 1);
390 16
                processLine(outputStream, line);
391 16
                break;
392

393 16
            case AutoDetectHeaderResult.hasHeader:
394 16
                assert(_fileCount > 1);
395 16
                break;
396

397 16
            case AutoDetectHeaderResult.none:
398 16
                if (_fileCount == 1)
399
                {
400 16
                    assert(_candidateHeaderLine.length == 0);
401 16
                    _candidateHeaderLine = line.to!string;
402
                }
403 16
                else if (_fileCount == 2)
404
                {
405 16
                    if (_candidateHeaderLine == line)
406
                    {
407 16
                        _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
408 16
                        setHeaderLine(_candidateHeaderLine);
409

410
                        /* Edge case: First file has only a header line and look-ahead set to zero. */
411 16
                        if (_stillCaching && _options.lookahead == 0) outputLookaheadCache(outputStream);
412
                    }
413
                    else
414
                    {
415 16
                        _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
416 16
                        updateFieldFormatsForLine(_candidateHeaderLine);
417 16
                        processLine(outputStream, line);
418
                    }
419
                }
420 16
                break;
421
            }
422
        }
423
    }
424

425
    /** Called to process all lines except for the first line a file. */
426
    void processLine(OutputRange!char outputStream, const char[] line)
427
    {
428 16
        if (_stillCaching) cacheDataLine(outputStream, line);
429 16
        else outputDataLine(outputStream, line);
430
    }
431

432
    /** Called at the end of all processing. This is needed in case the look-ahead cache
433
     * is still being filled when input terminates.
434
     */
435
    void finish(OutputRange!char outputStream)
436
    {
437 16
        if (_stillCaching) outputLookaheadCache(outputStream);
438
    }
439

440
private:
441
    /* outputLookaheadCache finalizes processing of the lookahead cache. This includes
442
     * Setting the type and width of each field, finalizing the auto-detect header
443
     * decision, and outputing all lines in the cache.
444
     */
445
    void outputLookaheadCache(OutputRange!char outputStream)
446
    {
447
        import std.algorithm : splitter;
448

449 16
        assert(_stillCaching);
450

451 16
        if (_options.autoDetectHeader &&
452 16
            _autoDetectHeaderResult == AutoDetectHeaderResult.none &&
453 16
            _candidateHeaderLine.length != 0)
454
        {
455 16
            if (candidateHeaderLooksLikeHeader())
456
            {
457 16
                _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
458 16
                setHeaderLine(_candidateHeaderLine);
459
            }
460
            else
461
            {
462 16
                _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
463
            }
464
        }
465

466

467 16
        if (_options.hasHeader ||
468 16
            (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader))
469
        {
470 16
            finalizeFieldFormatting();
471 16
            outputHeader(outputStream);
472
        }
473 16
        else if (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.noHeader &&
474 16
                 _candidateHeaderLine.length != 0)
475
        {
476 16
            updateFieldFormatsForLine(_candidateHeaderLine);
477 16
            finalizeFieldFormatting();
478 16
            outputDataLine(outputStream, _candidateHeaderLine);
479
        }
480
        else
481
        {
482 16
            finalizeFieldFormatting();
483
        }
484

485 16
        foreach(line; _lookaheadCache.data) outputDataLine(outputStream, line);
486 16
        _lookaheadCache.clear;
487 16
        _stillCaching = false;
488
    }
489

490
    bool candidateHeaderLooksLikeHeader() @safe
491
    {
492
        import std.algorithm : splitter;
493

494
        /* The candidate header is declared as the header if the look-ahead cache has at least
495
         * one numeric field that is text in the candidate header.
496
         */
497 16
        foreach(fieldIndex, fieldValue; _candidateHeaderLine.splitter(_options.delim).enumerate)
498
        {
499 16
            auto candidateFieldFormat = FieldFormat(fieldIndex);
500 16
            candidateFieldFormat.updateForFieldValue(fieldValue, _options);
501 16
            if (_fieldVector.length > fieldIndex &&
502 16
                candidateFieldFormat.fieldType == FieldType.text &&
503 16
                (_fieldVector[fieldIndex].fieldType == FieldType.integer ||
504 16
                 _fieldVector[fieldIndex].fieldType == FieldType.floatingPoint ||
505 16
                 _fieldVector[fieldIndex].fieldType == FieldType.exponent))
506
            {
507 16
                return true;
508
            }
509
        }
510

511 16
        return false;
512
    }
513

514
    void setHeaderLine(const char[] line) @safe
515
    {
516
        import std.algorithm : splitter;
517

518 16
        foreach(fieldIndex, header; line.splitter(_options.delim).enumerate)
519
        {
520 16
            if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
521 16
            assert(_fieldVector.length > fieldIndex);
522 16
            _fieldVector[fieldIndex].setHeader(header);
523
        }
524
    }
525

526
    void cacheDataLine(OutputRange!char outputStream, const char[] line)
527
    {
528
        import std.conv : to;
529

530 16
        assert(_lookaheadCache.data.length < _options.lookahead);
531

532 16
        _lookaheadCache ~= line.to!string;
533 16
        updateFieldFormatsForLine(line);
534 16
        if (_lookaheadCache.data.length == _options.lookahead) outputLookaheadCache(outputStream);
535
    }
536

537
    void updateFieldFormatsForLine(const char[] line) @safe
538
    {
539
        import std.algorithm : splitter;
540

541 16
        foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
542
        {
543 16
            if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
544 16
            assert(_fieldVector.length > fieldIndex);
545 16
            _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
546
        }
547

548
    }
549

550
    void finalizeFieldFormatting() @safe pure @nogc nothrow
551
    {
552 16
        size_t nextFieldStart = 0;
553 16
        foreach(ref field; _fieldVector)
554
        {
555 16
            nextFieldStart = field.finalizeFormatting(nextFieldStart, _options) + _options.spaceBetweenFields;
556
        }
557
    }
558

559
    void outputHeader(OutputRange!char outputStream)
560
    {
561 16
        size_t nextOutputPosition = 0;
562 16
        foreach(fieldIndex, ref field; _fieldVector.enumerate)
563
        {
564 16
            size_t spacesNeeded = field.startPosition - nextOutputPosition;
565 16
            put(outputStream, repeat(" ", spacesNeeded));
566 16
            nextOutputPosition += spacesNeeded;
567 16
            nextOutputPosition += field.writeHeader(outputStream, _options);
568
        }
569 16
        put(outputStream, '\n');
570

571 16
        if (_options.underlineHeader)
572
        {
573 16
            nextOutputPosition = 0;
574 16
            foreach(fieldIndex, ref field; _fieldVector.enumerate)
575
            {
576 16
                size_t spacesNeeded = field.startPosition - nextOutputPosition;
577 16
                put(outputStream, repeat(" ", spacesNeeded));
578 16
                nextOutputPosition += spacesNeeded;
579 16
                nextOutputPosition += field.writeHeader!(Yes.writeUnderline)(outputStream, _options);
580
            }
581 16
            put(outputStream, '\n');
582
        }
583
    }
584

585
    void outputDataLine(OutputRange!char outputStream, const char[] line)
586
    {
587
        import std.algorithm : splitter;
588

589
        /* Repeating header option. */
590 16
        if (_options.repeatHeader != 0 && _dataLineOutputCount != 0 &&
591 16
            (_options.hasHeader || (_options.autoDetectHeader &&
592 16
                                    _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) &&
593 16
            _dataLineOutputCount % _options.repeatHeader == 0)
594
        {
595 16
            put(outputStream, '\n');
596 16
            outputHeader(outputStream);
597
        }
598

599 16
        _dataLineOutputCount++;
600

601 16
        size_t nextOutputPosition = 0;
602 16
        foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
603
        {
604 16
            if (fieldIndex == _fieldVector.length)
605
            {
606
                /* Line is longer than any seen while caching. Add a new FieldFormat entry
607
                 * and set the line formatting based on this field value.
608
                 */
609 16
                _fieldVector ~= FieldFormat(fieldIndex);
610 16
                size_t startPosition = (fieldIndex == 0) ?
611 16
                    0 :
612 16
                    _fieldVector[fieldIndex - 1].endPosition + _options.spaceBetweenFields;
613

614 16
                _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
615 16
                _fieldVector[fieldIndex].finalizeFormatting(startPosition, _options);
616
            }
617

618 16
            assert(fieldIndex < _fieldVector.length);
619

620 16
            FieldFormat fieldFormat = _fieldVector[fieldIndex];
621 16
            size_t nextFieldStart = fieldFormat.startPosition;
622 16
            size_t spacesNeeded = (nextOutputPosition < nextFieldStart) ?
623 16
                nextFieldStart - nextOutputPosition :
624 16
                (fieldIndex == 0) ? 0 : 1;  // Previous field went long. One space between fields
625

626 16
            put(outputStream, repeat(" ", spacesNeeded));
627 16
            nextOutputPosition += spacesNeeded;
628 16
            nextOutputPosition += fieldFormat.writeFieldValue(outputStream, nextOutputPosition, fieldValue, _options);
629
        }
630 16
        put(outputStream, '\n');
631
    }
632
}
633

634
/** Field types recognized and tracked by tsv-pretty processing. */
635
enum FieldType { unknown, text, integer, floatingPoint, exponent };
636

637
/** Field alignments used by tsv-pretty processing. */
638
enum FieldAlignment { left, right };
639

640
/** FieldFormat holds all the formatting info needed to format data values in a specific
641
 * column. e.g. Field 1 may be text, field 2 may be a float, etc. This is calculated
642
 * during the caching phase. Each FieldFormat instance is part of a vector representing
643
 * the full row, so each includes the start position on the line and similar data.
644
 *
645
 * APIs used during the caching phase to gather field value samples
646
 *  - this - Initial construction. Takes the field index.
647
 *  - setHeader - Used to set the header text.
648
 *  - updateForFieldValue - Used to add the next field value sample.
649
 *  - finalizeFormatting - Used at the end of caching to finalize the format choices.
650
 *
651
 * APIs used after caching is finished (after finalizeFormatting):
652
 *  - startPosition - Returns the expected start position for the field.
653
 *  - endPosition - Returns the expected end position for the field.
654
 *  - writeHeader - Outputs the header, properly aligned.
655
 *  - writeFieldValue - Outputs the current field value, properly aligned.
656
 */
657

658
struct FieldFormat
659
{
660
private:
661
    size_t _fieldIndex;                  // Zero-based index in the line
662
    string _header = "";                 // Original field header
663
    size_t _headerPrintWidth = 0;
664
    FieldType _type = FieldType.unknown;
665
    FieldAlignment _alignment = FieldAlignment.left;
666
    size_t _startPosition = 0;
667
    size_t _printWidth = 0;
668
    size_t _precision = 0;          // Number of digits after the decimal point
669

670
    /* These are used while doing initial type and print format detection. */
671
    size_t _minRawPrintWidth = 0;
672
    size_t _maxRawPrintWidth = 0;
673
    size_t _maxDigitsBeforeDecimal = 0;
674
    size_t _maxDigitsAfterDecimal = 0;
675
    size_t _maxSignificantDigits = 0;  // Digits to include in exponential notation
676

677
public:
678

679
    /** Initial construction. Takes a field index. */
680 16
    this(size_t fieldIndex) @safe pure nothrow @nogc
681
    {
682 16
        _fieldIndex = fieldIndex;
683
    }
684

685
    /** Sets the header text. */
686
    void setHeader(const char[] header) @safe
687
    {
688
        import std.conv : to;
689

690 16
        _header = header.to!string;
691 16
        _headerPrintWidth = _header.monospacePrintWidth;
692
    }
693

694
    /** Returns the expected start position for the field. */
695
    size_t startPosition() nothrow pure @safe @property
696
    {
697 16
        return _startPosition;
698
    }
699

700
    /** Returns the expected end position for the field. */
701
    size_t endPosition() nothrow pure @safe @property
702
    {
703 16
        return _startPosition + _printWidth;
704
    }
705

706
    /** Returns the type of field. */
707
    FieldType fieldType() nothrow pure @safe @property
708
    {
709 16
        return _type;
710
    }
711

712
    /** Writes the field header or underline characters to the output stream.
713
     *
714
     * The current output position should have been written up to the field's start position,
715
     * including any spaces between fields. Unlike data fields, there is no need to correct
716
     * for previous fields that have run long. This routine does not output trailing spaces.
717
     * This makes it simpler for lines to avoid unnecessary trailing spaces.
718
     *
719
     * Underlines can either be written the full width of the field or the just under the
720
     * text of the header. At present this is a template parameter (compile-time).
721
     *
722
     * The print width of the output is returned.
723
     */
724
    size_t writeHeader (Flag!"writeUnderline" writeUnderline = No.writeUnderline,
725
                        Flag!"fullWidthUnderline" fullWidthUnderline = No.fullWidthUnderline)
726
        (OutputRange!char outputStream, const ref TsvPrettyOptions options)
727
    {
728
        import std.range : repeat;
729

730 16
        size_t positionsWritten = 0;
731 16
        if (_headerPrintWidth > 0)
732
        {
733
            static if (writeUnderline)
734
            {
735
                static if (fullWidthUnderline)
736
                {
737
                    put(outputStream, repeat("-", _printWidth));
738
                    positionsWritten += _printWidth;
739
                }
740
                else  // Underline beneath the header text only
741
                {
742 16
                    if (_alignment == FieldAlignment.right)
743
                    {
744 16
                        put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
745 16
                        positionsWritten += _printWidth - _headerPrintWidth;
746
                    }
747 16
                    put(outputStream, repeat("-", _headerPrintWidth));
748 16
                    positionsWritten += _headerPrintWidth;
749
                }
750
            }
751
            else
752
            {
753 16
                if (_alignment == FieldAlignment.right)
754
                {
755 16
                    put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
756 16
                    positionsWritten += _printWidth - _headerPrintWidth;
757
                }
758 16
                put(outputStream, _header);
759 16
                positionsWritten += _headerPrintWidth;
760
            }
761
        }
762 16
        return positionsWritten;
763
    }
764

765
    /** Writes the field value for the current column.
766
     *
767
     * The caller needs to generate output at least to the column's start position, but
768
     * can go beyond if previous fields have run long.
769
     *
770
     * The field value is aligned properly in the field. Either left aligned (text) or
771
     * right aligned (numeric). Floating point fields are both right aligned and
772
     * decimal point aligned. The number of bytes written is returned. Trailing spaces
773
     * are not added, the caller must add any necessary trailing spaces prior to
774
     * printing the next field.
775
     */
776
    size_t writeFieldValue(OutputRange!char outputStream, size_t currPosition,
777
                           const char[] fieldValue, const ref TsvPrettyOptions options)
778
    in
779
    {
780 16
        assert(currPosition >= _startPosition);   // Caller resposible for advancing to field start position.
781 16
        assert(_type == FieldType.text || _type == FieldType.integer ||
782 16
               _type == FieldType.floatingPoint || _type == FieldType.exponent);
783
    }
784
    do
785
    {
786
        import std.algorithm : find, max, min;
787
        import std.conv : to, ConvException;
788
        import std.format : format;
789

790
        /* Create the print version of the string. Either the raw value or a formatted
791
         * version of a float.
792
         */
793 16
        string printValue;
794 16
        if (!options.formatFloats || _type == FieldType.text || _type == FieldType.integer)
795
        {
796 16
            printValue = fieldValue.to!string;
797
        }
798
        else
799
        {
800 16
            assert(options.formatFloats);
801 16
            assert(_type == FieldType.exponent || _type == FieldType.floatingPoint);
802

803 16
            if (_type == FieldType.exponent)
804
            {
805 16
                printValue = fieldValue.formatExponentValue(_precision);
806
            }
807
            else
808
            {
809 16
                printValue = fieldValue.formatFloatingPointValue(_precision);
810
            }
811
        }
812

813 16
        if (printValue.length == 0 && options.replaceEmpty) printValue = options.emptyReplacement;
814 16
        size_t printValuePrintWidth = printValue.monospacePrintWidth;
815

816
        /* Calculate leading spaces needed for right alignment. */
817 16
        size_t leadingSpaces = 0;
818 16
        if (_alignment == FieldAlignment.right)
819
        {
820
            /* Target width adjusts the column width to account for overrun by the previous field. */
821 16
            size_t targetWidth;
822 16
            if (currPosition == _startPosition)
823
            {
824 16
                targetWidth = _printWidth;
825
            }
826
            else
827
            {
828 16
                size_t startGap = currPosition - _startPosition;
829 16
                targetWidth = max(printValuePrintWidth,
830 16
                                  startGap < _printWidth ? _printWidth - startGap : 0);
831
            }
832

833 16
            leadingSpaces = (printValuePrintWidth < targetWidth) ?
834 16
                targetWidth - printValuePrintWidth : 0;
835

836
            /* The above calculation assumes the print value is fully right aligned.
837
             * This is not correct when raw value floats are being used rather than
838
             * formatted floats, as different values will have different precision.
839
             * The next adjustment accounts for this, dropping leading spaces as
840
             * needed to align the decimal point. Note that text and exponential
841
             * values get aligned strictly against right boundaries.
842
             */
843 16
            if (leadingSpaces > 0 && _precision > 0 &&
844 16
                _type == FieldType.floatingPoint && !options.formatFloats)
845
            {
846
                import std.algorithm : canFind, findSplit;
847
                import std.string : isNumeric;
848

849 16
                if (printValue.isNumeric && !printValue.canFind!(x => x == 'e' || x == 'E'))
850
                {
851 16
                    size_t decimalAndDigitsLength = printValue.find(".").length;
852 16
                    size_t trailingSpaces =
853 16
                        (decimalAndDigitsLength == 0) ? _precision + 1 :
854 16
                        (decimalAndDigitsLength > _precision) ? 0 :
855 16
                        _precision + 1 - decimalAndDigitsLength;
856

857 16
                    leadingSpaces = (leadingSpaces > trailingSpaces) ?
858 16
                        leadingSpaces - trailingSpaces : 0;
859
                }
860
            }
861
        }
862 16
        put(outputStream, repeat(' ', leadingSpaces));
863 16
        put(outputStream, printValue);
864 16
        return printValuePrintWidth + leadingSpaces;
865
    }
866

867
    /** Updates type and format given a new field value.
868
     *
869
     * This is called during look-ahead caching to register a new sample value for the
870
     * column. The key components updates are field type and print width.
871
     */
872
    void updateForFieldValue(const char[] fieldValue, const ref TsvPrettyOptions options) @safe
873
    {
874
        import std.algorithm : findAmong, findSplit, max, min;
875
        import std.conv : to, ConvException;
876
        import std.string : isNumeric;
877

878 16
        size_t fieldValuePrintWidth = fieldValue.monospacePrintWidth;
879 16
        size_t fieldValuePrintWidthWithEmpty =
880 16
            (fieldValuePrintWidth == 0 && options.replaceEmpty) ?
881 16
            options.emptyReplacementPrintWidth :
882 16
            fieldValuePrintWidth;
883

884 16
        _maxRawPrintWidth = max(_maxRawPrintWidth, fieldValuePrintWidthWithEmpty);
885 16
        _minRawPrintWidth = (_minRawPrintWidth == 0) ?
886 16
            fieldValuePrintWidthWithEmpty :
887 16
            min(_minRawPrintWidth, fieldValuePrintWidthWithEmpty);
888

889 16
        if (_type == FieldType.text)
890
        {
891
            /* Already text, can't become anything else. */
892
        }
893 16
        else if (fieldValuePrintWidth == 0)
894
        {
895
            /* Don't let an empty field override a numeric field type. */
896
        }
897 16
        else if (!fieldValue.isNumeric)
898
        {
899
            /* Not parsable as a number. Switch from unknown or numeric type to text. */
900 16
            _type = FieldType.text;
901
        }
902
        else
903
        {
904
            /* Field type is currently unknown or numeric, and current field parses as numeric.
905
             * See if it parses as integer or float. Integers will parse as floats, so try
906
             * integer types first.
907
             */
908 16
            FieldType parsesAs = FieldType.unknown;
909 16
            long longValue;
910 16
            ulong ulongValue;
911 16
            double doubleValue;
912
            try
913
            {
914 16
                longValue = fieldValue.to!long;
915 16
                parsesAs = FieldType.integer;
916
            }
917
            catch (ConvException)
918
            {
919
                try
920
                {
921 16
                    ulongValue = fieldValue.to!ulong;
922 0
                    parsesAs = FieldType.integer;
923
                }
924
                catch (ConvException)
925
                {
926
                    try
927
                    {
928 16
                        doubleValue = fieldValue.to!double;
929
                        import std.algorithm : findAmong;
930 16
                        parsesAs = (fieldValue.findAmong("eE").length == 0) ?
931 16
                            FieldType.floatingPoint : FieldType.exponent;
932
                    }
933
                    catch (ConvException)
934
                    {
935
                        /* Note: This means isNumeric thinks it's a number, but conversions all failed. */
936 0
                        parsesAs = FieldType.text;
937
                    }
938
                }
939
            }
940

941 16
            if (parsesAs == FieldType.text)
942
            {
943
                /* Not parsable as a number (despite isNumeric result). Switch to text type. */
944 0
                _type = FieldType.text;
945
            }
946 16
            else if (parsesAs == FieldType.exponent)
947
            {
948
                /* Exponential notion supersedes both vanilla floats and integers. */
949 16
                _type = FieldType.exponent;
950 16
                _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
951

952 16
                if (auto decimalSplit = fieldValue.findSplit("."))
953
                {
954 16
                    auto fromExponent = decimalSplit[2].findAmong("eE");
955 16
                    size_t numDigitsAfterDecimal = decimalSplit[2].length - fromExponent.length;
956 16
                    _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
957 16
                    _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, numDigitsAfterDecimal);
958
                }
959
                else
960
                {
961
                    /* Exponent without a decimal point. */
962 16
                    auto fromExponent = fieldValue.findAmong("eE");
963 16
                    assert(fromExponent.length > 0);
964 16
                    size_t numDigits = fieldValue.length - fromExponent.length;
965 16
                    _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, numDigits);
966
                }
967
            }
968 16
            else if (parsesAs == FieldType.floatingPoint)
969
            {
970
                /* Floating point supercedes integer but not exponential. */
971 16
                if (_type != FieldType.exponent) _type = FieldType.floatingPoint;
972 16
                _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
973

974 16
                if (auto decimalSplit = fieldValue.findSplit("."))
975
                {
976 16
                    _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
977 16
                    _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, decimalSplit[2].length);
978
                }
979
            }
980
            else
981
            {
982 16
                assert(parsesAs == FieldType.integer);
983 16
                if (_type != FieldType.floatingPoint) _type = FieldType.integer;
984 16
                _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
985 16
                _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, fieldValue.length);
986
            }
987
        }
988
    }
989

990
    /** Updates field formatting info based on the current state. It is expected to be
991
     * called after adding field entries via updateForFieldValue(). It returns its new
992
     * end position.
993
     */
994
    size_t finalizeFormatting (size_t startPosition, const ref TsvPrettyOptions options) @safe pure @nogc nothrow
995
    {
996
        import std.algorithm : max, min;
997 16
        _startPosition = startPosition;
998 16
        if (_type == FieldType.unknown) _type = FieldType.text;
999 16
        _alignment = (_type == FieldType.integer || _type == FieldType.floatingPoint
1000 16
                      || _type == FieldType.exponent) ?
1001 16
            FieldAlignment.right :
1002 16
            FieldAlignment.left;
1003

1004 16
        if (_type == FieldType.floatingPoint)
1005
        {
1006 16
            size_t precision = min(options.floatPrecision, _maxDigitsAfterDecimal);
1007 16
            size_t maxValueWidth = _maxDigitsBeforeDecimal + precision;
1008 16
            if (precision > 0) maxValueWidth++;  // Account for the decimal point.
1009 16
            _printWidth = max(1, _headerPrintWidth, maxValueWidth);
1010 16
            _precision = precision;
1011
        }
1012 16
        else if (_type == FieldType.exponent)
1013
        {
1014 16
            size_t maxPrecision = (_maxSignificantDigits > 0) ? _maxSignificantDigits - 1 : 0;
1015 16
            _precision = min(options.floatPrecision, maxPrecision);
1016

1017 16
            size_t maxValuePrintWidth = !options.formatFloats ? _maxRawPrintWidth : _precision + 7;
1018 16
            _printWidth = max(1, _headerPrintWidth, maxValuePrintWidth);
1019
        }
1020 16
        else if (_type == FieldType.integer)
1021
        {
1022 16
            _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, _maxRawPrintWidth);
1023 16
            _precision = 0;
1024
        }
1025
        else
1026
        {
1027 16
            _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth,
1028
                              min(options.maxFieldPrintWidth, _maxRawPrintWidth));
1029 16
            _precision = 0;
1030
        }
1031

1032 16
        return _startPosition + _printWidth;
1033
    }
1034
}
1035

1036
/** formatFloatingPointValue returns the printed representation of a raw value
1037
 * formatted as a fixed precision floating number. This includes zero padding or
1038
 * truncation of trailing digits as necessary to meet the desired precision.
1039
 *
1040
 * If the value cannot be interpreted as a double then the raw value is returned.
1041
 * Similarly, values in exponential notion are returned without reformatting.
1042
 *
1043
 * This routine is used to format values in columns identified as floating point.
1044
 */
1045
string formatFloatingPointValue(const char[] value, size_t precision) @safe
1046
{
1047
    import std.algorithm : canFind, find;
1048
    import std.array : join;
1049
    import std.conv : to, ConvException;
1050
    import std.format : format;
1051
    import std.math : isFinite;
1052
    import std.range : repeat;
1053

1054 16
    string printValue;
1055

1056 16
    if (value.canFind!(x => x == 'e' || x == 'E'))
1057
    {
1058
        /* Exponential notion. Use the raw value. */
1059 16
        printValue = value.to!string;
1060
    }
1061
    else
1062
    {
1063
        try
1064
        {
1065 16
            double doubleValue = value.to!double;
1066 16
            if (doubleValue.isFinite)
1067
            {
1068 16
                size_t numPrecisionDigits = value.precisionDigits;
1069 16
                if (numPrecisionDigits >= precision)
1070
                {
1071 16
                    printValue = format("%.*f", precision, doubleValue);
1072
                }
1073 16
                else if (numPrecisionDigits == 0)
1074
                {
1075 16
                    printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ "." ~ repeat("0", precision).join;
1076
                }
1077
                else
1078
                {
1079 16
                    printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ repeat("0", precision - numPrecisionDigits).join;
1080
                }
1081
            }
1082 16
            else printValue = value.to!string;  // NaN or Infinity
1083
        }
1084 16
        catch (ConvException) printValue = value.to!string;
1085
    }
1086 16
    return printValue;
1087
}
1088

1089
@safe unittest
1090
{
1091 16
    assert("".formatFloatingPointValue(3) == "");
1092 16
    assert(" ".formatFloatingPointValue(3) == " ");
1093 16
    assert("abc".formatFloatingPointValue(3) == "abc");
1094 16
    assert("nan".formatFloatingPointValue(3) == "nan");
1095 16
    assert("0".formatFloatingPointValue(0) == "0");
1096 16
    assert("1".formatFloatingPointValue(0) == "1");
1097 16
    assert("1.".formatFloatingPointValue(0) == "1");
1098 16
    assert("1".formatFloatingPointValue(3) == "1.000");
1099 16
    assert("1000".formatFloatingPointValue(3) == "1000.000");
1100 16
    assert("1000.001".formatFloatingPointValue(5) == "1000.00100");
1101 16
    assert("1000.001".formatFloatingPointValue(3) == "1000.001");
1102 16
    assert("1000.001".formatFloatingPointValue(2) == "1000.00");
1103 16
    assert("1000.006".formatFloatingPointValue(2) == "1000.01");
1104 16
    assert("-0.1".formatFloatingPointValue(1) == "-0.1");
1105 16
    assert("-0.1".formatFloatingPointValue(3) == "-0.100");
1106 16
    assert("-0.001".formatFloatingPointValue(3) == "-0.001");
1107 16
    assert("-0.006".formatFloatingPointValue(2) == "-0.01");
1108 16
    assert("-0.001".formatFloatingPointValue(1) == "-0.0");
1109 16
    assert("-0.001".formatFloatingPointValue(0) == "-0");
1110 16
    assert("0e+00".formatFloatingPointValue(0) == "0e+00");
1111 16
    assert("0.00e+00".formatFloatingPointValue(0) == "0.00e+00");
1112 16
    assert("1e+06".formatFloatingPointValue(1) == "1e+06");
1113 16
    assert("1e+06".formatFloatingPointValue(2) == "1e+06");
1114 16
    assert("1E-06".formatFloatingPointValue(1) == "1E-06");
1115 16
    assert("1.1E+6".formatFloatingPointValue(2) == "1.1E+6");
1116 16
    assert("1.1E+100".formatFloatingPointValue(2) == "1.1E+100");
1117
}
1118

1119
/** formatExponentValue returns the printed representation of a raw value formatted
1120
 * using exponential notation and a specific precision. If the value cannot be interpreted
1121
 * as a double then the a copy of the original value is returned.
1122
 *
1123
 * This routine is used to format values in columns identified as having exponent format.
1124
 */
1125
string formatExponentValue(const char[] value, size_t precision) @safe
1126
{
1127
    import std.algorithm : canFind, find, findSplit;
1128
    import std.array : join;
1129
    import std.conv : to, ConvException;
1130
    import std.format : format;
1131
    import std.math : isFinite;
1132
    import std.range : repeat;
1133

1134 16
    string printValue;
1135
    try
1136
    {
1137 16
        double doubleValue = value.to!double;
1138 16
        if (doubleValue.isFinite)
1139
        {
1140 16
            size_t numSignificantDigits = value.significantDigits;
1141 16
            size_t numPrecisionDigits = (numSignificantDigits == 0) ? 0 : numSignificantDigits - 1;
1142 16
            if (numPrecisionDigits >= precision)
1143
            {
1144 16
                printValue = format("%.*e", precision, doubleValue);
1145
            }
1146
            else
1147
            {
1148 16
                string unpaddedPrintValue = format("%.*e", numPrecisionDigits, doubleValue);
1149 16
                auto exponentSplit = unpaddedPrintValue.findSplit("e");   // Uses the same exponent case as format call.
1150 16
                if (numPrecisionDigits == 0)
1151
                {
1152 16
                    assert(precision != 0);
1153 16
                    assert(!exponentSplit[0].canFind("."));
1154 16
                    printValue = exponentSplit[0] ~ "." ~ repeat("0", precision).join ~ exponentSplit[1] ~ exponentSplit[2];
1155
                }
1156
                else
1157
                {
1158 16
                    printValue = exponentSplit[0] ~ repeat("0", precision - numPrecisionDigits).join ~ exponentSplit[1] ~ exponentSplit[2];
1159
                }
1160
            }
1161
        }
1162 16
        else printValue = value.to!string;  // NaN or Infinity
1163
    }
1164 16
    catch (ConvException) printValue = value.to!string;
1165

1166 16
    return printValue;
1167
}
1168

1169
@safe unittest
1170
{
1171 16
    assert("".formatExponentValue(3) == "");
1172 16
    assert(" ".formatExponentValue(3) == " ");
1173 16
    assert("abc".formatExponentValue(3) == "abc");
1174 16
    assert("nan".formatExponentValue(3) == "nan");
1175 16
    assert("0".formatExponentValue(0) == "0e+00");
1176 16
    assert("1".formatExponentValue(0) == "1e+00");
1177 16
    assert("1.".formatExponentValue(0) == "1e+00");
1178 16
    assert("1".formatExponentValue(3) == "1.000e+00");
1179 16
    assert("1000".formatExponentValue(3) == "1.000e+03");
1180 16
    assert("1000.001".formatExponentValue(5) == "1.00000e+03");
1181 16
    assert("1000.001".formatExponentValue(3) == "1.000e+03");
1182 16
    assert("1000.001".formatExponentValue(6) == "1.000001e+03");
1183 16
    assert("1000.006".formatExponentValue(5) == "1.00001e+03");
1184 16
    assert("-0.1".formatExponentValue(1) == "-1.0e-01");
1185 16
    assert("-0.1".formatExponentValue(3) == "-1.000e-01");
1186 16
    assert("-0.001".formatExponentValue(3) == "-1.000e-03");
1187 16
    assert("-0.001".formatExponentValue(1) == "-1.0e-03");
1188 16
    assert("-0.001".formatExponentValue(0) == "-1e-03");
1189 16
    assert("0e+00".formatExponentValue(0) == "0e+00");
1190 16
    assert("0.00e+00".formatExponentValue(0) == "0e+00");
1191 16
    assert("1e+06".formatExponentValue(1) == "1.0e+06");
1192 16
    assert("1e+06".formatExponentValue(2) == "1.00e+06");
1193 16
    assert("1.0001e+06".formatExponentValue(1) == "1.0e+06");
1194 16
    assert("1.0001e+06".formatExponentValue(5) == "1.00010e+06");
1195
}
1196

1197
/** Returns the number of significant digits in a numeric string.
1198
 *
1199
 * Significant digits are those needed to represent a number in exponential notation.
1200
 * Examples:
1201
 *   22.345 - 5 digits
1202
 *   10.010 - 4 digits
1203
 *   0.0032 - 2 digits
1204
 */
1205
size_t significantDigits(const char[] numericString) @safe pure
1206
{
1207
    import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1208
    import std.ascii : isDigit;
1209
    import std.math : isFinite;
1210
    import std.string : isNumeric;
1211
    import std.conv : to;
1212

1213 16
    assert (numericString.isNumeric);
1214

1215 16
    size_t significantDigits = 0;
1216 16
    if (numericString.to!double.isFinite)
1217
    {
1218 16
        auto digitsPart = numericString.find!(x => x.isDigit && x != '0');
1219 16
        auto exponentPart = digitsPart.findAmong("eE");
1220 16
        digitsPart = digitsPart[0 .. $ - exponentPart.length];
1221

1222 16
        if (digitsPart.canFind('.'))
1223
        {
1224 16
            digitsPart = digitsPart.stripRight('0');
1225 16
            significantDigits = digitsPart.length - 1;
1226
        }
1227
        else
1228
        {
1229 16
            significantDigits = digitsPart.length;
1230
        }
1231

1232 16
        if (significantDigits == 0) significantDigits = 1;
1233
    }
1234

1235 16
    return significantDigits;
1236
}
1237

1238
@safe pure unittest
1239
{
1240 16
    assert("0".significantDigits == 1);
1241 16
    assert("10".significantDigits == 2);
1242 16
    assert("0.0".significantDigits == 1);
1243 16
    assert("-10.0".significantDigits == 2);
1244 16
    assert("-.01".significantDigits == 1);
1245 16
    assert("-.5401".significantDigits == 4);
1246 16
    assert("1010.010".significantDigits == 6);
1247 16
    assert("0.0003003".significantDigits == 4);
1248 16
    assert("6e+06".significantDigits == 1);
1249 16
    assert("6.0e+06".significantDigits == 1);
1250 16
    assert("6.5e+06".significantDigits == 2);
1251 16
    assert("6.005e+06".significantDigits == 4);
1252
}
1253

1254
/** Returns the number of digits to the right of the decimal point in a numeric string.
1255
 * This routine includes trailing zeros in the count.
1256
 */
1257
size_t precisionDigits(const char[] numericString) @safe pure
1258
{
1259
    import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1260
    import std.ascii : isDigit;
1261
    import std.math : isFinite;
1262
    import std.string : isNumeric;
1263
    import std.conv : to;
1264

1265 16
    assert (numericString.isNumeric);
1266

1267 16
    size_t precisionDigits = 0;
1268 16
    if (numericString.to!double.isFinite)
1269
    {
1270 16
        if (auto decimalSplit = numericString.findSplit("."))
1271
        {
1272 16
            auto exponentPart = decimalSplit[2].findAmong("eE");
1273 16
            precisionDigits = decimalSplit[2].length - exponentPart.length;
1274
        }
1275
    }
1276

1277 16
    return precisionDigits;
1278
}
1279

1280
@safe pure unittest
1281
{
1282 16
    assert("0".precisionDigits == 0);
1283 16
    assert("10".precisionDigits == 0);
1284 16
    assert("0.0".precisionDigits == 1);
1285 16
    assert("-10.0".precisionDigits == 1);
1286 16
    assert("-.01".precisionDigits == 2);
1287 16
    assert("-.5401".precisionDigits == 4);
1288
}
1289

1290
/** Calculates the expected print width of a string in monospace (fixed-width) fonts.
1291
 */
1292
size_t monospacePrintWidth(const char[] str) @safe nothrow
1293
{
1294
    bool isCJK(dchar c)
1295
    {
1296 16
        return c >= '\u3000' && c <= '\u9fff';
1297
    }
1298

1299
    import std.uni : byGrapheme;
1300

1301 16
    size_t width = 0;
1302 16
    try foreach (g; str.byGrapheme) width += isCJK(g[0]) ? 2 : 1;
1303 16
    catch (Exception) width = str.length;  // Invalid utf-8 sequence. Catch avoids program failure.
1304

1305 16
    return width;
1306
}
1307

1308
unittest
1309
{
1310 16
    assert("".monospacePrintWidth == 0);
1311 16
    assert(" ".monospacePrintWidth == 1);
1312 16
    assert("abc".monospacePrintWidth == 3);
1313 16
    assert("林檎".monospacePrintWidth == 4);
1314 16
    assert("æble".monospacePrintWidth == 4);
1315 16
    assert("ვაშლი".monospacePrintWidth == 5);
1316 16
    assert("größten".monospacePrintWidth == 7);
1317
}

Read our documentation on viewing source code .

Loading