1
/**
2
Command line tool that prints TSV data aligned for easier reading on consoles
3
and traditional command-line environments.
4

5
Copyright (c) 2017-2020, eBay Inc.
6
Initially written by Jon Degenhardt
7

8
License: Boost License 1.0 (http://boost.org/LICENSE_1_0.txt)
9
*/
10
module tsv_utils.tsv_pretty;
11

12
import std.exception : enforce;
13
import std.range;
14
import std.stdio;
15
import std.typecons : Flag, Yes, No, tuple;
16

17
static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
18

19
version(unittest)
20
{
21
    // When running unit tests, use main from -main compiler switch.
22
}
23
else
24
{
25
    /** Main program. Invokes command line arg processing and tsv-pretty to perform
26
     * the real work. Any errors are caught and reported.
27
     */
28
    int main(string[] cmdArgs)
29
    {
30
        /* When running in DMD code coverage mode, turn on report merging. */
31
        version(D_Coverage) version(DigitalMars)
32
        {
33
            import core.runtime : dmd_coverSetMerge;
34 1
            dmd_coverSetMerge(true);
35
        }
36

37 1
        TsvPrettyOptions options;
38 1
        auto r = options.processArgs(cmdArgs);
39 1
        if (!r[0]) return r[1];
40 1
        try tsvPretty(options, cmdArgs[1 .. $]);
41
        catch (Exception exc)
42
        {
43 1
            stderr.writefln("Error [%s]: %s", options.programName, exc.msg);
44 1
            return 1;
45
        }
46 1
        return 0;
47
    }
48
}
49

50
auto helpTextVerbose = q"EOS
51
Synopsis: tsv-pretty [options] [file...]
52

53
tsv-pretty outputs TSV data in a format intended to be more human readable when
54
working on the command line. This is done primarily by lining up data into
55
fixed-width columns. Text is left aligned, numbers are right aligned. Floating
56
points numbers are aligned on the decimal point when feasible.
57

58
Processing begins by reading the initial set of lines into memory to determine
59
the field widths and data types of each column. This look-ahead buffer is used
60
for header detection as well. Output begins after this processing is complete.
61

62
By default, only the alignment is changed, the actual values are not modified.
63
Several of the formatting options do modify the values.
64

65
Features:
66

67
* Floating point numbers: Floats can be printed in fixed-width precision, using
68
  the same precision for all floats in a column. This makes then line up nicely.
69
  Precision is determined by values seen during look-ahead processing. The max
70
  precision defaults to 9, this can be changed when smaller or larger values are
71
  desired. See the '--f|format-floats' and '--p|precision' options.
72

73
* Header lines: Headers are detected automatically when possible. This can be
74
  overridden when automatic detection doesn't work as desired. Headers can be
75
  underlined and repeated at regular intervals.
76

77
* Missing values: A substitute value can be used for empty fields. This is often
78
  less confusing than spaces. See '--e|replace-empty' and '--E|empty-replacement'.
79

80
* Exponential notion: As part float formatting, '--f|format-floats' re-formats
81
  columns where exponential notation is found so all the values in the column
82
  are displayed using exponential notation with the same precision.
83

84
* Preamble: A number of initial lines can be designated as a preamble and output
85
  unchanged. The preamble is before the header, if a header is present. Preamble
86
  lines can be auto-detected via the heuristic that they lack field delimiters.
87
  This works well when the field delimiter is a TAB.
88

89
* Fonts: Fixed-width fonts are assumed. CJK characters are assumed to be double
90
  width. This is not always correct, but works well in most cases.
91

92
Options:
93
EOS";
94

95
auto helpText = q"EOS
96
Synopsis: tsv-pretty [options] [file...]
97

98
tsv-pretty outputs TSV data in a more human readable format. This is done by lining
99
up data into fixed-width columns. Text is left aligned, numbers are right aligned.
100
Floating points numbers are aligned on the decimal point when feasible.
101

102
Options:
103
EOS";
104

105
/** TsvPrettyOptions is used to process and store command line options. */
106
struct TsvPrettyOptions
107
{
108
    string programName;
109
    bool helpVerbose = false;           // --help-verbose
110
    bool hasHeader = false;             // --H|header (Note: Default false assumed by validation code)
111
    bool autoDetectHeader = true;       // Derived (Note: Default true assumed by validation code)
112
    bool noHeader = false;              // --x|no-header (Note: Default false assumed by validation code)
113
    size_t lookahead = 1000;            // --l|lookahead
114
    size_t repeatHeader = 0;            // --r|repeat-header num (zero means no repeat)
115
    bool underlineHeader = false;       // --u|underline-header
116
    bool formatFloats = false;          // --f|format-floats
117
    size_t floatPrecision = 9;          // --p|precision num (max precision when formatting floats.)
118
    bool replaceEmpty = false;          // --e|replace-empty
119
    string emptyReplacement = "";       // --E|empty-replacement
120
    size_t emptyReplacementPrintWidth = 0;    // Derived
121
    char delim = '\t';                  // --d|delimiter
122
    size_t spaceBetweenFields = 2;      // --s|space-between-fields num
123
    size_t maxFieldPrintWidth = 40;     // --m|max-text-width num; Max width for variable width text fields.
124
    bool autoDetectPreamble = false;    // --a|auto-preamble
125
    size_t preambleLines = 0;           // --b|preamble; Number of preamble lines.
126
    bool versionWanted = false;         // --V|version
127

128
    /* Returns a tuple. First value is true if command line arguments were successfully
129
     * processed and execution should continue, or false if an error occurred or the user
130
     * asked for help. If false, the second value is the appropriate exit code (0 or 1).
131
     *
132
     * Returning true (execution continues) means args have been validated and derived
133
     * values calculated. In addition, field indices have been converted to zero-based.
134
     * If the whole line is the key, the individual fields list will be cleared.
135
     */
136
    auto processArgs (ref string[] cmdArgs)
137
    {
138
        import std.algorithm : any, each;
139
        import std.getopt;
140
        import std.path : baseName, stripExtension;
141

142 1
        programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
143

144
        try
145
        {
146 1
            arraySep = ",";    // Use comma to separate values in command line options
147 1
            auto r = getopt(
148
                cmdArgs,
149
                "help-verbose",           "       Print full help.", &helpVerbose,
150
                std.getopt.config.caseSensitive,
151
                "H|header",               "       Treat the first line of each file as a header.", &hasHeader,
152
                std.getopt.config.caseInsensitive,
153
                "x|no-header",            "       Assume no header. Turns off automatic header detection.", &noHeader,
154
                "l|lookahead",            "NUM    Lines to read to interpret data before generating output. Default: 1000", &lookahead,
155

156
                "r|repeat-header",        "NUM    Lines to print before repeating the header. Default: No repeating header", &repeatHeader,
157

158
                "u|underline-header",     "       Underline the header.", &underlineHeader,
159
                "f|format-floats",        "       Format floats for better readability. Default: No", &formatFloats,
160
                "p|precision",            "NUM    Max floating point precision. Implies --format-floats. Default: 9", &floatPrecisionOptionHandler,
161
                std.getopt.config.caseSensitive,
162
                "e|replace-empty",        "       Replace empty fields with '--'.", &replaceEmpty,
163
                "E|empty-replacement",    "STR    Replace empty fields with a string.", &emptyReplacement,
164
                std.getopt.config.caseInsensitive,
165
                "d|delimiter",            "CHR    Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
166
                "s|space-between-fields", "NUM    Spaces between each field (Default: 2)", &spaceBetweenFields,
167
                "m|max-text-width",       "NUM    Max reserved field width for variable width text fields. Default: 40", &maxFieldPrintWidth,
168
                "a|auto-preamble",        "       Treat initial lines in a file as a preamble if the line contains no field delimiters.", &autoDetectPreamble,
169
                "b|preamble",             "NUM    Treat the first NUM lines as a preamble and output them unchanged.", &preambleLines,
170
                std.getopt.config.caseSensitive,
171
                "V|version",              "       Print version information and exit.", &versionWanted,
172
                std.getopt.config.caseInsensitive,
173
                );
174

175 1
            if (r.helpWanted)
176
            {
177 1
                defaultGetoptPrinter(helpText, r.options);
178 1
                return tuple(false, 0);
179
            }
180 1
            else if (helpVerbose)
181
            {
182 1
                defaultGetoptPrinter(helpTextVerbose, r.options);
183 1
                return tuple(false, 0);
184
            }
185 1
            else if (versionWanted)
186
            {
187
                import tsv_utils.common.tsvutils_version;
188 1
                writeln(tsvutilsVersionNotice("tsv-pretty"));
189 1
                return tuple(false, 0);
190
            }
191

192
            /* Validation and derivations. */
193 1
            enforce(!(noHeader && hasHeader),
194 1
                    "Cannot specify both --H|header and --x|no-header.");
195

196 1
            if (noHeader || hasHeader) autoDetectHeader = false;
197

198
            /* Zero look-ahead has limited utility unless the first line is known to
199
             * be a header. Good chance the user will get an unintended behavior.
200
             */
201 1
            if (lookahead == 0 && autoDetectHeader)
202
            {
203 1
                enforce(noHeader || hasHeader,
204 1
                        "Cannot auto-detect header with zero look-ahead. Specify either '--H|header' or '--x|no-header' when using '--l|lookahead 0'.");
205
            }
206

207 1
            enforce(!(autoDetectPreamble && preambleLines != 0),
208 1
                    "Do not use '--b|preamble NUM' and '--a|auto-preamble' together. ('--b|preamble 0' is okay.)");
209

210 1
            if (emptyReplacement.length != 0) replaceEmpty = true;
211 1
            else if (replaceEmpty) emptyReplacement = "--";
212

213 1
            if (emptyReplacement.length != 0)
214
            {
215 1
                emptyReplacementPrintWidth = emptyReplacement.monospacePrintWidth;
216
            }
217
        }
218
        catch (Exception exc)
219
        {
220 1
            stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
221 1
            return tuple(false, 1);
222
        }
223 1
        return tuple(true, 0);
224
    }
225

226
    /* Option handler for --p|precision. It also sets --f|format-floats. */
227
    private void floatPrecisionOptionHandler(string option, string optionVal) @safe pure
228
    {
229
        import std.conv : to;
230 1
        floatPrecision = optionVal.to!size_t;
231 1
        formatFloats = true;
232
    }
233
}
234

235
/** tsvPretty is the main loop, operating on input files and passing control to a
236
 * TSVPrettyProccessor instance.
237
 *
238
 * This separates physical I/O sources and sinks from the underlying processing
239
 * algorithm, which operates on generic ranges. A lockingTextWriter is created and
240
 * released on every input line. This has effect flushing standard output every line,
241
 * desirable in command line tools.
242
 *
243
 * This routine also handles identification of preamble lines. This is mostly for
244
 * simplification of the TsvPrettyProcessor code.
245
 */
246
void tsvPretty(const ref TsvPrettyOptions options, const string[] files)
247
{
248
    import std.algorithm : canFind;
249

250 1
    auto firstNonPreambleLine = options.preambleLines + 1;
251 1
    auto tpp = TsvPrettyProcessor(options);
252 1
    foreach (filename; (files.length > 0) ? files : ["-"])
253
    {
254 1
        bool autoDetectPreambleDone = false;
255 1
        auto inputStream = (filename == "-") ? stdin : filename.File();
256 1
        foreach (lineNum, line; inputStream.byLine.enumerate(1))
257
        {
258 1
            bool isPreambleLine = false;
259 1
            bool isFirstNonPreambleLine = false;
260

261 1
            if (options.autoDetectPreamble)
262
            {
263 1
                if (!autoDetectPreambleDone)
264
                {
265 1
                    if (line.canFind(options.delim))
266
                    {
267 1
                        autoDetectPreambleDone = true;
268 1
                        isFirstNonPreambleLine = true;
269
                    }
270
                    else
271
                    {
272 1
                        isPreambleLine = true;
273
                    }
274
                }
275
            }
276 1
            else if (lineNum < firstNonPreambleLine)
277
            {
278 1
                isPreambleLine = true;
279
            }
280 1
            else if (lineNum == firstNonPreambleLine)
281
            {
282 1
                isFirstNonPreambleLine = true;
283
            }
284

285

286 1
            if (isPreambleLine)
287
            {
288 1
                tpp.processPreambleLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
289
            }
290 1
            else if (isFirstNonPreambleLine)
291
            {
292 1
                tpp.processFileFirstLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
293
            }
294
            else
295
            {
296 1
                tpp.processLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
297
            }
298
        }
299
    }
300 1
    tpp.finish(outputRangeObject!(char, char[])(stdout.lockingTextWriter));
301
}
302

303
/** TsvPrettyProcessor maintains state of processing and exposes operations for
304
 * processing individual input lines.
305
 *
306
 * TsvPrettyProcessor knows that input is file-based, but doesn't deal with actual
307
 * files or reading lines from input. That is the job of the caller. Output is
308
 * written to an output range. The caller is expected to pass each line to in the
309
 * order received, that is an assumption built-into the its processing.
310
 *
311
 * In addition to the constructor, there are four API methods:
312
 *  - processPreambleLine - Called to process a preamble line occurring before
313
 *    the header line or first line of data.
314
 *  - processFileFirstLine - Called to process the first line of each file. This
315
 *    enables header processing.
316
 *  - processLine - Called to process all lines except for the first line a file.
317
 *  - finish - Called at the end of all processing. This is needed in case the
318
 *    look-ahead cache is still being filled when input terminates.
319
 */
320

321
struct TsvPrettyProcessor
322
{
323
    import std.array : appender;
324

325
private:
326
    private enum AutoDetectHeaderResult { none, hasHeader, noHeader };
327

328
    private TsvPrettyOptions _options;
329
    private size_t _fileCount = 0;
330
    private size_t _dataLineOutputCount = 0;
331
    private bool _stillCaching = true;
332
    private string _candidateHeaderLine;
333
    private auto _lookaheadCache = appender!(string[])();
334
    private FieldFormat[] _fieldVector;
335
    private AutoDetectHeaderResult _autoDetectHeaderResult = AutoDetectHeaderResult.none;
336

337
    /** Constructor. */
338 1
    this(const TsvPrettyOptions options) @safe pure nothrow @nogc
339
    {
340 1
        _options = options;
341 1
        if (options.noHeader && options.lookahead == 0) _stillCaching = false;
342
    }
343

344
    invariant
345
    {
346 1
        assert(_options.hasHeader || _options.noHeader || _options.autoDetectHeader);
347 1
        assert((_options.lookahead == 0 && _lookaheadCache.data.length == 0) ||
348 1
               _lookaheadCache.data.length < _options.lookahead);
349
    }
350

351
    /** Called to process a preamble line occurring before the header line or first
352
     * line of data.
353
     */
354
    void processPreambleLine(OutputRange!char outputStream, const char[] line)
355
    {
356 1
        if (_fileCount == 0)
357
        {
358 1
            put(outputStream, line);
359 1
            put(outputStream, '\n');
360
        }
361
    }
362

363
    /** Called to process the first line of each file. This enables header processing. */
364
    void processFileFirstLine(OutputRange!char outputStream, const char[] line)
365
    {
366
        import std.conv : to;
367

368 1
        _fileCount++;
369

370 1
        if (_options.noHeader)
371
        {
372 1
            processLine(outputStream, line);
373
        }
374 1
        else if (_options.hasHeader)
375
        {
376 1
            if (_fileCount == 1)
377
            {
378 1
                setHeaderLine(line);
379 1
                if (_options.lookahead == 0) outputLookaheadCache(outputStream);
380
            }
381
        }
382
        else
383
        {
384 1
            assert(_options.autoDetectHeader);
385

386 1
            final switch (_autoDetectHeaderResult)
387
            {
388 1
            case AutoDetectHeaderResult.noHeader:
389 1
                assert(_fileCount > 1);
390 1
                processLine(outputStream, line);
391 1
                break;
392

393 1
            case AutoDetectHeaderResult.hasHeader:
394 1
                assert(_fileCount > 1);
395 1
                break;
396

397 1
            case AutoDetectHeaderResult.none:
398 1
                if (_fileCount == 1)
399
                {
400 1
                    assert(_candidateHeaderLine.length == 0);
401 1
                    _candidateHeaderLine = line.to!string;
402
                }
403 1
                else if (_fileCount == 2)
404
                {
405 1
                    if (_candidateHeaderLine == line)
406
                    {
407 1
                        _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
408 1
                        setHeaderLine(_candidateHeaderLine);
409

410
                        /* Edge case: First file has only a header line and look-ahead set to zero. */
411 1
                        if (_stillCaching && _options.lookahead == 0) outputLookaheadCache(outputStream);
412
                    }
413
                    else
414
                    {
415 1
                        _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
416 1
                        updateFieldFormatsForLine(_candidateHeaderLine);
417 1
                        processLine(outputStream, line);
418
                    }
419
                }
420 1
                break;
421
            }
422
        }
423
    }
424

425
    /** Called to process all lines except for the first line a file. */
426
    void processLine(OutputRange!char outputStream, const char[] line)
427
    {
428 1
        if (_stillCaching) cacheDataLine(outputStream, line);
429 1
        else outputDataLine(outputStream, line);
430
    }
431

432
    /** Called at the end of all processing. This is needed in case the look-ahead cache
433
     * is still being filled when input terminates.
434
     */
435
    void finish(OutputRange!char outputStream)
436
    {
437 1
        if (_stillCaching) outputLookaheadCache(outputStream);
438
    }
439

440
private:
441
    /* outputLookaheadCache finalizes processing of the lookahead cache. This includes
442
     * Setting the type and width of each field, finalizing the auto-detect header
443
     * decision, and outputing all lines in the cache.
444
     */
445
    void outputLookaheadCache(OutputRange!char outputStream)
446
    {
447
        import std.algorithm : splitter;
448

449 1
        assert(_stillCaching);
450

451 1
        if (_options.autoDetectHeader &&
452 1
            _autoDetectHeaderResult == AutoDetectHeaderResult.none &&
453 1
            _candidateHeaderLine.length != 0)
454
        {
455 1
            if (candidateHeaderLooksLikeHeader())
456
            {
457 1
                _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
458 1
                setHeaderLine(_candidateHeaderLine);
459
            }
460
            else
461
            {
462 1
                _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
463
            }
464
        }
465

466

467 1
        if (_options.hasHeader ||
468 1
            (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader))
469
        {
470 1
            finalizeFieldFormatting();
471 1
            outputHeader(outputStream);
472
        }
473 1
        else if (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.noHeader &&
474 1
                 _candidateHeaderLine.length != 0)
475
        {
476 1
            updateFieldFormatsForLine(_candidateHeaderLine);
477 1
            finalizeFieldFormatting();
478 1
            outputDataLine(outputStream, _candidateHeaderLine);
479
        }
480
        else
481
        {
482 1
            finalizeFieldFormatting();
483
        }
484

485 1
        foreach(line; _lookaheadCache.data) outputDataLine(outputStream, line);
486 1
        _lookaheadCache.clear;
487 1
        _stillCaching = false;
488
    }
489

490
    bool candidateHeaderLooksLikeHeader() @safe
491
    {
492
        import std.algorithm : splitter;
493

494
        /* The candidate header is declared as the header if the look-ahead cache has at least
495
         * one numeric field that is text in the candidate header.
496
         */
497 1
        foreach(fieldIndex, fieldValue; _candidateHeaderLine.splitter(_options.delim).enumerate)
498
        {
499 1
            auto candidateFieldFormat = FieldFormat(fieldIndex);
500 1
            candidateFieldFormat.updateForFieldValue(fieldValue, _options);
501 1
            if (_fieldVector.length > fieldIndex &&
502 1
                candidateFieldFormat.fieldType == FieldType.text &&
503 1
                (_fieldVector[fieldIndex].fieldType == FieldType.integer ||
504 1
                 _fieldVector[fieldIndex].fieldType == FieldType.floatingPoint ||
505 1
                 _fieldVector[fieldIndex].fieldType == FieldType.exponent))
506
            {
507 1
                return true;
508
            }
509
        }
510

511 1
        return false;
512
    }
513

514
    void setHeaderLine(const char[] line) @safe
515
    {
516
        import std.algorithm : splitter;
517

518 1
        foreach(fieldIndex, header; line.splitter(_options.delim).enumerate)
519
        {
520 1
            if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
521 1
            assert(_fieldVector.length > fieldIndex);
522 1
            _fieldVector[fieldIndex].setHeader(header);
523
        }
524
    }
525

526
    void cacheDataLine(OutputRange!char outputStream, const char[] line)
527
    {
528
        import std.conv : to;
529

530 1
        assert(_lookaheadCache.data.length < _options.lookahead);
531

532 1
        _lookaheadCache ~= line.to!string;
533 1
        updateFieldFormatsForLine(line);
534 1
        if (_lookaheadCache.data.length == _options.lookahead) outputLookaheadCache(outputStream);
535
    }
536

537
    void updateFieldFormatsForLine(const char[] line) @safe
538
    {
539
        import std.algorithm : splitter;
540

541 1
        foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
542
        {
543 1
            if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
544 1
            assert(_fieldVector.length > fieldIndex);
545 1
            _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
546
        }
547

548
    }
549

550
    void finalizeFieldFormatting() @safe pure @nogc nothrow
551
    {
552 1
        size_t nextFieldStart = 0;
553 1
        foreach(ref field; _fieldVector)
554
        {
555 1
            nextFieldStart = field.finalizeFormatting(nextFieldStart, _options) + _options.spaceBetweenFields;
556
        }
557
    }
558

559
    void outputHeader(OutputRange!char outputStream)
560
    {
561 1
        size_t nextOutputPosition = 0;
562 1
        foreach(fieldIndex, ref field; _fieldVector.enumerate)
563
        {
564 1
            size_t spacesNeeded = field.startPosition - nextOutputPosition;
565 1
            put(outputStream, repeat(" ", spacesNeeded));
566 1
            nextOutputPosition += spacesNeeded;
567 1
            nextOutputPosition += field.writeHeader(outputStream, _options);
568
        }
569 1
        put(outputStream, '\n');
570

571 1
        if (_options.underlineHeader)
572
        {
573 1
            nextOutputPosition = 0;
574 1
            foreach(fieldIndex, ref field; _fieldVector.enumerate)
575
            {
576 1
                size_t spacesNeeded = field.startPosition - nextOutputPosition;
577 1
                put(outputStream, repeat(" ", spacesNeeded));
578 1
                nextOutputPosition += spacesNeeded;
579 1
                nextOutputPosition += field.writeHeader!(Yes.writeUnderline)(outputStream, _options);
580
            }
581 1
            put(outputStream, '\n');
582
        }
583
    }
584

585
    void outputDataLine(OutputRange!char outputStream, const char[] line)
586
    {
587
        import std.algorithm : splitter;
588

589
        /* Repeating header option. */
590 1
        if (_options.repeatHeader != 0 && _dataLineOutputCount != 0 &&
591 1
            (_options.hasHeader || (_options.autoDetectHeader &&
592 1
                                    _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) &&
593 1
            _dataLineOutputCount % _options.repeatHeader == 0)
594
        {
595 1
            put(outputStream, '\n');
596 1
            outputHeader(outputStream);
597
        }
598

599 1
        _dataLineOutputCount++;
600

601 1
        size_t nextOutputPosition = 0;
602 1
        foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
603
        {
604 1
            if (fieldIndex == _fieldVector.length)
605
            {
606
                /* Line is longer than any seen while caching. Add a new FieldFormat entry
607
                 * and set the line formatting based on this field value.
608
                 */
609 1
                _fieldVector ~= FieldFormat(fieldIndex);
610 1
                size_t startPosition = (fieldIndex == 0) ?
611 1
                    0 :
612 1
                    _fieldVector[fieldIndex - 1].endPosition + _options.spaceBetweenFields;
613

614 1
                _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
615 1
                _fieldVector[fieldIndex].finalizeFormatting(startPosition, _options);
616
            }
617

618 1
            assert(fieldIndex < _fieldVector.length);
619

620 1
            FieldFormat fieldFormat = _fieldVector[fieldIndex];
621 1
            size_t nextFieldStart = fieldFormat.startPosition;
622 1
            size_t spacesNeeded = (nextOutputPosition < nextFieldStart) ?
623 1
                nextFieldStart - nextOutputPosition :
624 1
                (fieldIndex == 0) ? 0 : 1;  // Previous field went long. One space between fields
625

626 1
            put(outputStream, repeat(" ", spacesNeeded));
627 1
            nextOutputPosition += spacesNeeded;
628 1
            nextOutputPosition += fieldFormat.writeFieldValue(outputStream, nextOutputPosition, fieldValue, _options);
629
        }
630 1
        put(outputStream, '\n');
631
    }
632
}
633

634
/** Field types recognized and tracked by tsv-pretty processing. */
635
enum FieldType { unknown, text, integer, floatingPoint, exponent };
636

637
/** Field alignments used by tsv-pretty processing. */
638
enum FieldAlignment { left, right };
639

640
/** FieldFormat holds all the formatting info needed to format data values in a specific
641
 * column. e.g. Field 1 may be text, field 2 may be a float, etc. This is calculated
642
 * during the caching phase. Each FieldFormat instance is part of a vector representing
643
 * the full row, so each includes the start position on the line and similar data.
644
 *
645
 * APIs used during the caching phase to gather field value samples
646
 *  - this - Initial construction. Takes the field index.
647
 *  - setHeader - Used to set the header text.
648
 *  - updateForFieldValue - Used to add the next field value sample.
649
 *  - finalizeFormatting - Used at the end of caching to finalize the format choices.
650
 *
651
 * APIs used after caching is finished (after finalizeFormatting):
652
 *  - startPosition - Returns the expected start position for the field.
653
 *  - endPosition - Returns the expected end position for the field.
654
 *  - writeHeader - Outputs the header, properly aligned.
655
 *  - writeFieldValue - Outputs the current field value, properly aligned.
656
 */
657

658
struct FieldFormat
659
{
660
private:
661
    size_t _fieldIndex;                  // Zero-based index in the line
662
    string _header = "";                 // Original field header
663
    size_t _headerPrintWidth = 0;
664
    FieldType _type = FieldType.unknown;
665
    FieldAlignment _alignment = FieldAlignment.left;
666
    size_t _startPosition = 0;
667
    size_t _printWidth = 0;
668
    size_t _precision = 0;          // Number of digits after the decimal point
669

670
    /* These are used while doing initial type and print format detection. */
671
    size_t _minRawPrintWidth = 0;
672
    size_t _maxRawPrintWidth = 0;
673
    size_t _maxDigitsBeforeDecimal = 0;
674
    size_t _maxDigitsAfterDecimal = 0;
675
    size_t _maxSignificantDigits = 0;  // Digits to include in exponential notation
676

677
public:
678

679
    /** Initial construction. Takes a field index. */
680 1
    this(size_t fieldIndex) @safe pure nothrow @nogc
681
    {
682 1
        _fieldIndex = fieldIndex;
683
    }
684

685
    /** Sets the header text. */
686
    void setHeader(const char[] header) @safe
687
    {
688
        import std.conv : to;
689

690 1
        _header = header.to!string;
691 1
        _headerPrintWidth = _header.monospacePrintWidth;
692
    }
693

694
    /** Returns the expected start position for the field. */
695
    size_t startPosition() nothrow pure @safe @property
696
    {
697 1
        return _startPosition;
698
    }
699

700
    /** Returns the expected end position for the field. */
701
    size_t endPosition() nothrow pure @safe @property
702
    {
703 1
        return _startPosition + _printWidth;
704
    }
705

706
    /** Returns the type of field. */
707
    FieldType fieldType() nothrow pure @safe @property
708
    {
709 1
        return _type;
710
    }
711

712
    /** Writes the field header or underline characters to the output stream.
713
     *
714
     * The current output position should have been written up to the field's start position,
715
     * including any spaces between fields. Unlike data fields, there is no need to correct
716
     * for previous fields that have run long. This routine does not output trailing spaces.
717
     * This makes it simpler for lines to avoid unnecessary trailing spaces.
718
     *
719
     * Underlines can either be written the full width of the field or the just under the
720
     * text of the header. At present this is a template parameter (compile-time).
721
     *
722
     * The print width of the output is returned.
723
     */
724
    size_t writeHeader (Flag!"writeUnderline" writeUnderline = No.writeUnderline,
725
                        Flag!"fullWidthUnderline" fullWidthUnderline = No.fullWidthUnderline)
726
        (OutputRange!char outputStream, const ref TsvPrettyOptions options)
727
    {
728
        import std.range : repeat;
729

730 1
        size_t positionsWritten = 0;
731 1
        if (_headerPrintWidth > 0)
732
        {
733
            static if (writeUnderline)
734
            {
735
                static if (fullWidthUnderline)
736
                {
737
                    put(outputStream, repeat("-", _printWidth));
738
                    positionsWritten += _printWidth;
739
                }
740
                else  // Underline beneath the header text only
741
                {
742 1
                    if (_alignment == FieldAlignment.right)
743
                    {
744 1
                        put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
745 1
                        positionsWritten += _printWidth - _headerPrintWidth;
746
                    }
747 1
                    put(outputStream, repeat("-", _headerPrintWidth));
748 1
                    positionsWritten += _headerPrintWidth;
749
                }
750
            }
751
            else
752
            {
753 1
                if (_alignment == FieldAlignment.right)
754
                {
755 1
                    put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
756 1
                    positionsWritten += _printWidth - _headerPrintWidth;
757
                }
758 1
                put(outputStream, _header);
759 1
                positionsWritten += _headerPrintWidth;
760
            }
761
        }
762 1
        return positionsWritten;
763
    }
764

765
    /** Writes the field value for the current column.
766
     *
767
     * The caller needs to generate output at least to the column's start position, but
768
     * can go beyond if previous fields have run long.
769
     *
770
     * The field value is aligned properly in the field. Either left aligned (text) or
771
     * right aligned (numeric). Floating point fields are both right aligned and
772
     * decimal point aligned. The number of bytes written is returned. Trailing spaces
773
     * are not added, the caller must add any necessary trailing spaces prior to
774
     * printing the next field.
775
     */
776
    size_t writeFieldValue(OutputRange!char outputStream, size_t currPosition,
777
                           const char[] fieldValue, const ref TsvPrettyOptions options)
778
    in
779
    {
780 1
        assert(currPosition >= _startPosition);   // Caller resposible for advancing to field start position.
781 1
        assert(_type == FieldType.text || _type == FieldType.integer ||
782 1
               _type == FieldType.floatingPoint || _type == FieldType.exponent);
783
    }
784
    do
785
    {
786
        import std.algorithm : find, max, min;
787
        import std.conv : to, ConvException;
788
        import std.format : format;
789

790
        /* Create the print version of the string. Either the raw value or a formatted
791
         * version of a float.
792
         */
793 1
        string printValue;
794 1
        if (!options.formatFloats || _type == FieldType.text || _type == FieldType.integer)
795
        {
796 1
            printValue = fieldValue.to!string;
797
        }
798
        else
799
        {
800 1
            assert(options.formatFloats);
801 1
            assert(_type == FieldType.exponent || _type == FieldType.floatingPoint);
802

803 1
            if (_type == FieldType.exponent)
804
            {
805 1
                printValue = fieldValue.formatExponentValue(_precision);
806
            }
807
            else
808
            {
809 1
                printValue = fieldValue.formatFloatingPointValue(_precision);
810
            }
811
        }
812

813 1
        if (printValue.length == 0 && options.replaceEmpty) printValue = options.emptyReplacement;
814 1
        size_t printValuePrintWidth = printValue.monospacePrintWidth;
815

816
        /* Calculate leading spaces needed for right alignment. */
817 1
        size_t leadingSpaces = 0;
818 1
        if (_alignment == FieldAlignment.right)
819
        {
820
            /* Target width adjusts the column width to account for overrun by the previous field. */
821 1
            size_t targetWidth;
822 1
            if (currPosition == _startPosition)
823
            {
824 1
                targetWidth = _printWidth;
825
            }
826
            else
827
            {
828 1
                size_t startGap = currPosition - _startPosition;
829 1
                targetWidth = max(printValuePrintWidth,
830 1
                                  startGap < _printWidth ? _printWidth - startGap : 0);
831
            }
832

833 1
            leadingSpaces = (printValuePrintWidth < targetWidth) ?
834 1
                targetWidth - printValuePrintWidth : 0;
835

836
            /* The above calculation assumes the print value is fully right aligned.
837
             * This is not correct when raw value floats are being used rather than
838
             * formatted floats, as different values will have different precision.
839
             * The next adjustment accounts for this, dropping leading spaces as
840
             * needed to align the decimal point. Note that text and exponential
841
             * values get aligned strictly against right boundaries.
842
             */
843 1
            if (leadingSpaces > 0 && _precision > 0 &&
844 1
                _type == FieldType.floatingPoint && !options.formatFloats)
845
            {
846
                import std.algorithm : canFind, findSplit;
847
                import std.string : isNumeric;
848

849 1
                if (printValue.isNumeric && !printValue.canFind!(x => x == 'e' || x == 'E'))
850
                {
851 1
                    size_t decimalAndDigitsLength = printValue.find(".").length;
852 1
                    size_t trailingSpaces =
853 1
                        (decimalAndDigitsLength == 0) ? _precision + 1 :
854 1
                        (decimalAndDigitsLength > _precision) ? 0 :
855 1
                        _precision + 1 - decimalAndDigitsLength;
856

857 1
                    leadingSpaces = (leadingSpaces > trailingSpaces) ?
858 1
                        leadingSpaces - trailingSpaces : 0;
859
                }
860
            }
861
        }
862 1
        put(outputStream, repeat(' ', leadingSpaces));
863 1
        put(outputStream, printValue);
864 1
        return printValuePrintWidth + leadingSpaces;
865
    }
866

867
    /** Updates type and format given a new field value.
868
     *
869
     * This is called during look-ahead caching to register a new sample value for the
870
     * column. The key components updates are field type and print width.
871
     */
872
    void updateForFieldValue(const char[] fieldValue, const ref TsvPrettyOptions options) @safe
873
    {
874
        import std.algorithm : findAmong, findSplit, max, min;
875
        import std.conv : to, ConvException;
876
        import std.string : isNumeric;
877

878 1
        size_t fieldValuePrintWidth = fieldValue.monospacePrintWidth;
879 1
        size_t fieldValuePrintWidthWithEmpty =
880 1
            (fieldValuePrintWidth == 0 && options.replaceEmpty) ?
881 1
            options.emptyReplacementPrintWidth :
882 1
            fieldValuePrintWidth;
883

884 1
        _maxRawPrintWidth = max(_maxRawPrintWidth, fieldValuePrintWidthWithEmpty);
885 1
        _minRawPrintWidth = (_minRawPrintWidth == 0) ?
886 1
            fieldValuePrintWidthWithEmpty :
887 1
            min(_minRawPrintWidth, fieldValuePrintWidthWithEmpty);
888

889 1
        if (_type == FieldType.text)
890
        {
891
            /* Already text, can't become anything else. */
892
        }
893 1
        else if (fieldValuePrintWidth == 0)
894
        {
895
            /* Don't let an empty field override a numeric field type. */
896
        }
897 1
        else if (!fieldValue.isNumeric)
898
        {
899
            /* Not parsable as a number. Switch from unknown or numeric type to text. */
900 1
            _type = FieldType.text;
901
        }
902
        else
903
        {
904
            /* Field type is currently unknown or numeric, and current field parses as numeric.
905
             * See if it parses as integer or float. Integers will parse as floats, so try
906
             * integer types first.
907
             */
908 1
            FieldType parsesAs = FieldType.unknown;
909 1
            long longValue;
910 1
            ulong ulongValue;
911 1
            double doubleValue;
912
            try
913
            {
914 1
                longValue = fieldValue.to!long;
915 1
                parsesAs = FieldType.integer;
916
            }
917
            catch (ConvException)
918
            {
919
                try
920
                {
921 1
                    ulongValue = fieldValue.to!ulong;
922 0
                    parsesAs = FieldType.integer;
923
                }
924
                catch (ConvException)
925
                {
926
                    try
927
                    {
928 1
                        doubleValue = fieldValue.to!double;
929
                        import std.algorithm : findAmong;
930 1
                        parsesAs = (fieldValue.findAmong("eE").length == 0) ?
931 1
                            FieldType.floatingPoint : FieldType.exponent;
932
                    }
933
                    catch (ConvException)
934
                    {
935
                        /* Note: This means isNumeric thinks it's a number, but conversions all failed. */
936 0
                        parsesAs = FieldType.text;
937
                    }
938
                }
939
            }
940

941 1
            if (parsesAs == FieldType.text)
942
            {
943
                /* Not parsable as a number (despite isNumeric result). Switch to text type. */
944 0
                _type = FieldType.text;
945
            }
946 1
            else if (parsesAs == FieldType.exponent)
947
            {
948
                /* Exponential notion supersedes both vanilla floats and integers. */
949 1
                _type = FieldType.exponent;
950 1
                _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
951

952 1
                if (auto decimalSplit = fieldValue.findSplit("."))
953
                {
954 1
                    auto fromExponent = decimalSplit[2].findAmong("eE");
955 1
                    size_t numDigitsAfterDecimal = decimalSplit[2].length - fromExponent.length;
956 1
                    _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
957 1
                    _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, numDigitsAfterDecimal);
958
                }
959
                else
960
                {
961
                    /* Exponent without a decimal point. */
962 1
                    auto fromExponent = fieldValue.findAmong("eE");
963 1
                    assert(fromExponent.length > 0);
964 1
                    size_t numDigits = fieldValue.length - fromExponent.length;
965 1
                    _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, numDigits);
966
                }
967
            }
968 1
            else if (parsesAs == FieldType.floatingPoint)
969
            {
970
                /* Floating point supercedes integer but not exponential. */
971 1
                if (_type != FieldType.exponent) _type = FieldType.floatingPoint;
972 1
                _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
973

974 1
                if (auto decimalSplit = fieldValue.findSplit("."))
975
                {
976 1
                    _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
977 1
                    _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, decimalSplit[2].length);
978
                }
979
            }
980
            else
981
            {
982 1
                assert(parsesAs == FieldType.integer);
983 1
                if (_type != FieldType.floatingPoint) _type = FieldType.integer;
984 1
                _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
985 1
                _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, fieldValue.length);
986
            }
987
        }
988
    }
989

990
    /** Updates field formatting info based on the current state. It is expected to be
991
     * called after adding field entries via updateForFieldValue(). It returns its new
992
     * end position.
993
     */
994
    size_t finalizeFormatting (size_t startPosition, const ref TsvPrettyOptions options) @safe pure @nogc nothrow
995
    {
996
        import std.algorithm : max, min;
997 1
        _startPosition = startPosition;
998 1
        if (_type == FieldType.unknown) _type = FieldType.text;
999 1
        _alignment = (_type == FieldType.integer || _type == FieldType.floatingPoint
1000 1
                      || _type == FieldType.exponent) ?
1001 1
            FieldAlignment.right :
1002 1
            FieldAlignment.left;
1003

1004 1
        if (_type == FieldType.floatingPoint)
1005
        {
1006 1
            size_t precision = min(options.floatPrecision, _maxDigitsAfterDecimal);
1007 1
            size_t maxValueWidth = _maxDigitsBeforeDecimal + precision;
1008 1
            if (precision > 0) maxValueWidth++;  // Account for the decimal point.
1009 1
            _printWidth = max(1, _headerPrintWidth, maxValueWidth);
1010 1
            _precision = precision;
1011
        }
1012 1
        else if (_type == FieldType.exponent)
1013
        {
1014 1
            size_t maxPrecision = (_maxSignificantDigits > 0) ? _maxSignificantDigits - 1 : 0;
1015 1
            _precision = min(options.floatPrecision, maxPrecision);
1016

1017 1
            size_t maxValuePrintWidth = !options.formatFloats ? _maxRawPrintWidth : _precision + 7;
1018 1
            _printWidth = max(1, _headerPrintWidth, maxValuePrintWidth);
1019
        }
1020 1
        else if (_type == FieldType.integer)
1021
        {
1022 1
            _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, _maxRawPrintWidth);
1023 1
            _precision = 0;
1024
        }
1025
        else
1026
        {
1027 1
            _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth,
1028
                              min(options.maxFieldPrintWidth, _maxRawPrintWidth));
1029 1
            _precision = 0;
1030
        }
1031

1032 1
        return _startPosition + _printWidth;
1033
    }
1034
}
1035

1036
/** formatFloatingPointValue returns the printed representation of a raw value
1037
 * formatted as a fixed precision floating number. This includes zero padding or
1038
 * truncation of trailing digits as necessary to meet the desired precision.
1039
 *
1040
 * If the value cannot be interpreted as a double then the raw value is returned.
1041
 * Similarly, values in exponential notion are returned without reformatting.
1042
 *
1043
 * This routine is used to format values in columns identified as floating point.
1044
 */
1045
string formatFloatingPointValue(const char[] value, size_t precision) @safe
1046
{
1047
    import std.algorithm : canFind, find;
1048
    import std.array : join;
1049
    import std.conv : to, ConvException;
1050
    import std.format : format;
1051
    import std.math : isFinite;
1052
    import std.range : repeat;
1053

1054 1
    string printValue;
1055

1056 1
    if (value.canFind!(x => x == 'e' || x == 'E'))
1057
    {
1058
        /* Exponential notion. Use the raw value. */
1059 1
        printValue = value.to!string;
1060
    }
1061
    else
1062
    {
1063
        try
1064
        {
1065 1
            double doubleValue = value.to!double;
1066 1
            if (doubleValue.isFinite)
1067
            {
1068 1
                size_t numPrecisionDigits = value.precisionDigits;
1069 1
                if (numPrecisionDigits >= precision)
1070
                {
1071 1
                    printValue = format("%.*f", precision, doubleValue);
1072
                }
1073 1
                else if (numPrecisionDigits == 0)
1074
                {
1075 1
                    printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ "." ~ repeat("0", precision).join;
1076
                }
1077
                else
1078
                {
1079 1
                    printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ repeat("0", precision - numPrecisionDigits).join;
1080
                }
1081
            }
1082 1
            else printValue = value.to!string;  // NaN or Infinity
1083
        }
1084 1
        catch (ConvException) printValue = value.to!string;
1085
    }
1086 1
    return printValue;
1087
}
1088

1089
@safe unittest
1090
{
1091 1
    assert("".formatFloatingPointValue(3) == "");
1092 1
    assert(" ".formatFloatingPointValue(3) == " ");
1093 1
    assert("abc".formatFloatingPointValue(3) == "abc");
1094 1
    assert("nan".formatFloatingPointValue(3) == "nan");
1095 1
    assert("0".formatFloatingPointValue(0) == "0");
1096 1
    assert("1".formatFloatingPointValue(0) == "1");
1097 1
    assert("1.".formatFloatingPointValue(0) == "1");
1098 1
    assert("1".formatFloatingPointValue(3) == "1.000");
1099 1
    assert("1000".formatFloatingPointValue(3) == "1000.000");
1100 1
    assert("1000.001".formatFloatingPointValue(5) == "1000.00100");
1101 1
    assert("1000.001".formatFloatingPointValue(3) == "1000.001");
1102 1
    assert("1000.001".formatFloatingPointValue(2) == "1000.00");
1103 1
    assert("1000.006".formatFloatingPointValue(2) == "1000.01");
1104 1
    assert("-0.1".formatFloatingPointValue(1) == "-0.1");
1105 1
    assert("-0.1".formatFloatingPointValue(3) == "-0.100");
1106 1
    assert("-0.001".formatFloatingPointValue(3) == "-0.001");
1107 1
    assert("-0.006".formatFloatingPointValue(2) == "-0.01");
1108 1
    assert("-0.001".formatFloatingPointValue(1) == "-0.0");
1109 1
    assert("-0.001".formatFloatingPointValue(0) == "-0");
1110 1
    assert("0e+00".formatFloatingPointValue(0) == "0e+00");
1111 1
    assert("0.00e+00".formatFloatingPointValue(0) == "0.00e+00");
1112 1
    assert("1e+06".formatFloatingPointValue(1) == "1e+06");
1113 1
    assert("1e+06".formatFloatingPointValue(2) == "1e+06");
1114 1
    assert("1E-06".formatFloatingPointValue(1) == "1E-06");
1115 1
    assert("1.1E+6".formatFloatingPointValue(2) == "1.1E+6");
1116 1
    assert("1.1E+100".formatFloatingPointValue(2) == "1.1E+100");
1117
}
1118

1119
/** formatExponentValue returns the printed representation of a raw value formatted
1120
 * using exponential notation and a specific precision. If the value cannot be interpreted
1121
 * as a double then the a copy of the original value is returned.
1122
 *
1123
 * This routine is used to format values in columns identified as having exponent format.
1124
 */
1125
string formatExponentValue(const char[] value, size_t precision) @safe
1126
{
1127
    import std.algorithm : canFind, find, findSplit;
1128
    import std.array : join;
1129
    import std.conv : to, ConvException;
1130
    import std.format : format;
1131
    import std.math : isFinite;
1132
    import std.range : repeat;
1133

1134 1
    string printValue;
1135
    try
1136
    {
1137 1
        double doubleValue = value.to!double;
1138 1
        if (doubleValue.isFinite)
1139
        {
1140 1
            size_t numSignificantDigits = value.significantDigits;
1141 1
            size_t numPrecisionDigits = (numSignificantDigits == 0) ? 0 : numSignificantDigits - 1;
1142 1
            if (numPrecisionDigits >= precision)
1143
            {
1144 1
                printValue = format("%.*e", precision, doubleValue);
1145
            }
1146
            else
1147
            {
1148 1
                string unpaddedPrintValue = format("%.*e", numPrecisionDigits, doubleValue);
1149 1
                auto exponentSplit = unpaddedPrintValue.findSplit("e");   // Uses the same exponent case as format call.
1150 1
                if (numPrecisionDigits == 0)
1151
                {
1152 1
                    assert(precision != 0);
1153 1
                    assert(!exponentSplit[0].canFind("."));
1154 1
                    printValue = exponentSplit[0] ~ "." ~ repeat("0", precision).join ~ exponentSplit[1] ~ exponentSplit[2];
1155
                }
1156
                else
1157
                {
1158 1
                    printValue = exponentSplit[0] ~ repeat("0", precision - numPrecisionDigits).join ~ exponentSplit[1] ~ exponentSplit[2];
1159
                }
1160
            }
1161
        }
1162 1
        else printValue = value.to!string;  // NaN or Infinity
1163
    }
1164 1
    catch (ConvException) printValue = value.to!string;
1165

1166 1
    return printValue;
1167
}
1168

1169
@safe unittest
1170
{
1171 1
    assert("".formatExponentValue(3) == "");
1172 1
    assert(" ".formatExponentValue(3) == " ");
1173 1
    assert("abc".formatExponentValue(3) == "abc");
1174 1
    assert("nan".formatExponentValue(3) == "nan");
1175 1
    assert("0".formatExponentValue(0) == "0e+00");
1176 1
    assert("1".formatExponentValue(0) == "1e+00");
1177 1
    assert("1.".formatExponentValue(0) == "1e+00");
1178 1
    assert("1".formatExponentValue(3) == "1.000e+00");
1179 1
    assert("1000".formatExponentValue(3) == "1.000e+03");
1180 1
    assert("1000.001".formatExponentValue(5) == "1.00000e+03");
1181 1
    assert("1000.001".formatExponentValue(3) == "1.000e+03");
1182 1
    assert("1000.001".formatExponentValue(6) == "1.000001e+03");
1183 1
    assert("1000.006".formatExponentValue(5) == "1.00001e+03");
1184 1
    assert("-0.1".formatExponentValue(1) == "-1.0e-01");
1185 1
    assert("-0.1".formatExponentValue(3) == "-1.000e-01");
1186 1
    assert("-0.001".formatExponentValue(3) == "-1.000e-03");
1187 1
    assert("-0.001".formatExponentValue(1) == "-1.0e-03");
1188 1
    assert("-0.001".formatExponentValue(0) == "-1e-03");
1189 1
    assert("0e+00".formatExponentValue(0) == "0e+00");
1190 1
    assert("0.00e+00".formatExponentValue(0) == "0e+00");
1191 1
    assert("1e+06".formatExponentValue(1) == "1.0e+06");
1192 1
    assert("1e+06".formatExponentValue(2) == "1.00e+06");
1193 1
    assert("1.0001e+06".formatExponentValue(1) == "1.0e+06");
1194 1
    assert("1.0001e+06".formatExponentValue(5) == "1.00010e+06");
1195
}
1196

1197
/** Returns the number of significant digits in a numeric string.
1198
 *
1199
 * Significant digits are those needed to represent a number in exponential notation.
1200
 * Examples:
1201
 *   22.345 - 5 digits
1202
 *   10.010 - 4 digits
1203
 *   0.0032 - 2 digits
1204
 */
1205
size_t significantDigits(const char[] numericString) @safe pure
1206
{
1207
    import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1208
    import std.ascii : isDigit;
1209
    import std.math : isFinite;
1210
    import std.string : isNumeric;
1211
    import std.conv : to;
1212

1213 1
    assert (numericString.isNumeric);
1214

1215 1
    size_t significantDigits = 0;
1216 1
    if (numericString.to!double.isFinite)
1217
    {
1218 1
        auto digitsPart = numericString.find!(x => x.isDigit && x != '0');
1219 1
        auto exponentPart = digitsPart.findAmong("eE");
1220 1
        digitsPart = digitsPart[0 .. $ - exponentPart.length];
1221

1222 1
        if (digitsPart.canFind('.'))
1223
        {
1224 1
            digitsPart = digitsPart.stripRight('0');
1225 1
            significantDigits = digitsPart.length - 1;
1226
        }
1227
        else
1228
        {
1229 1
            significantDigits = digitsPart.length;
1230
        }
1231

1232 1
        if (significantDigits == 0) significantDigits = 1;
1233
    }
1234

1235 1
    return significantDigits;
1236
}
1237

1238
@safe pure unittest
1239
{
1240 1
    assert("0".significantDigits == 1);
1241 1
    assert("10".significantDigits == 2);
1242 1
    assert("0.0".significantDigits == 1);
1243 1
    assert("-10.0".significantDigits == 2);
1244 1
    assert("-.01".significantDigits == 1);
1245 1
    assert("-.5401".significantDigits == 4);
1246 1
    assert("1010.010".significantDigits == 6);
1247 1
    assert("0.0003003".significantDigits == 4);
1248 1
    assert("6e+06".significantDigits == 1);
1249 1
    assert("6.0e+06".significantDigits == 1);
1250 1
    assert("6.5e+06".significantDigits == 2);
1251 1
    assert("6.005e+06".significantDigits == 4);
1252
}
1253

1254
/** Returns the number of digits to the right of the decimal point in a numeric string.
1255
 * This routine includes trailing zeros in the count.
1256
 */
1257
size_t precisionDigits(const char[] numericString) @safe pure
1258
{
1259
    import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1260
    import std.ascii : isDigit;
1261
    import std.math : isFinite;
1262
    import std.string : isNumeric;
1263
    import std.conv : to;
1264

1265 1
    assert (numericString.isNumeric);
1266

1267 1
    size_t precisionDigits = 0;
1268 1
    if (numericString.to!double.isFinite)
1269
    {
1270 1
        if (auto decimalSplit = numericString.findSplit("."))
1271
        {
1272 1
            auto exponentPart = decimalSplit[2].findAmong("eE");
1273 1
            precisionDigits = decimalSplit[2].length - exponentPart.length;
1274
        }
1275
    }
1276

1277 1
    return precisionDigits;
1278
}
1279

1280
@safe pure unittest
1281
{
1282 1
    assert("0".precisionDigits == 0);
1283 1
    assert("10".precisionDigits == 0);
1284 1
    assert("0.0".precisionDigits == 1);
1285 1
    assert("-10.0".precisionDigits == 1);
1286 1
    assert("-.01".precisionDigits == 2);
1287 1
    assert("-.5401".precisionDigits == 4);
1288
}
1289

1290
/** Calculates the expected print width of a string in monospace (fixed-width) fonts.
1291
 */
1292
size_t monospacePrintWidth(const char[] str) @safe nothrow
1293
{
1294
    bool isCJK(dchar c)
1295
    {
1296 1
        return c >= '\u3000' && c <= '\u9fff';
1297
    }
1298

1299
    import std.uni : byGrapheme;
1300

1301 1
    size_t width = 0;
1302 1
    try foreach (g; str.byGrapheme) width += isCJK(g[0]) ? 2 : 1;
1303 1
    catch (Exception) width = str.length;  // Invalid utf-8 sequence. Catch avoids program failure.
1304

1305 1
    return width;
1306
}
1307

1308
unittest
1309
{
1310 1
    assert("".monospacePrintWidth == 0);
1311 1
    assert(" ".monospacePrintWidth == 1);
1312 1
    assert("abc".monospacePrintWidth == 3);
1313 1
    assert("林檎".monospacePrintWidth == 4);
1314 1
    assert("æble".monospacePrintWidth == 4);
1315 1
    assert("ვაშლი".monospacePrintWidth == 5);
1316 1
    assert("größten".monospacePrintWidth == 7);
1317
}

Read our documentation on viewing source code .

Loading