brodieG / fansi
Showing 24 of 63 files from the diff.

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -126,33 +126,44 @@
Loading
126 126
// nocov end
127 127
128 128
/*
129 -
 * Confirm encoding is not obviously wrong
129 +
 * Confirm encoding is not obviously wrong, and length okay.
130 130
 */
131 131
132 -
void FANSI_check_enc(SEXP x, R_xlen_t i) {
132 +
void FANSI_check_chrsxp(SEXP x, R_xlen_t i) {
133 +
  if(TYPEOF(x) != CHARSXP)
134 +
    error("Internal Error: expected CHARSXP.");  // nocov
133 135
  cetype_t type = getCharCE(x);
134 136
  if(type != CE_NATIVE && type != CE_UTF8) {
135 137
    if(type == CE_BYTES)
136 138
      error(
137 -
        "%s at index %.0f. %s.",
138 -
        "Byte encoded string encountered", (double) i + 1,
139 +
        "%s at index %jd. %s.",
140 +
        "Byte encoded string encountered", FANSI_ind(i),
139 141
        "Byte encoded strings are not supported"
140 142
      );
141 143
    else
142 144
      // this should only happen if somehow a string not converted to UTF8
143 145
      // sneaks in.
144 146
      error(
145 -
        "%s %d encountered at index %.0f. %s.",
147 +
        "%s %d encountered at index %jd. %s.",
146 148
        "Internal Error: unexpected encoding", type,
147 -
        (double) i + 1, "Contact maintainer"
149 +
        FANSI_ind(i), "Contact maintainer"
148 150
      );
149 151
  }
152 +
  if(LENGTH(x) > FANSI_int_max) {
153 +
    error(
154 +
      "Strings longer than INT_MAX not supported (length %jd at index %jd).",
155 +
      (intmax_t)(LENGTH(x)), FANSI_ind(i)
156 +
    );
157 +
  }
150 158
}
159 +
151 160
/*
152 161
 * Testing interface
153 162
 */
154 163
SEXP FANSI_check_enc_ext(SEXP x, SEXP i) {
155 -
  FANSI_check_enc(STRING_ELT(x, asInteger(i) - 1), asInteger(i) - 1);
164 +
  if(TYPEOF(x) != STRSXP)
165 +
    error("Internal Error: expected character input."); // nocov
166 +
  FANSI_check_chrsxp(STRING_ELT(x, asInteger(i) - 1), asInteger(i) - 1);
156 167
  return ScalarLogical(1);
157 168
}
158 169

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -47,14 +47,14 @@
Loading
47 47
  for(R_xlen_t i = 0; i < len; ++i) {
48 48
    FANSI_interrupt(i);
49 49
    SEXP chrsxp = STRING_ELT(x, i);
50 -
    FANSI_check_enc(chrsxp, i);
50 +
    FANSI_check_chrsxp(chrsxp, i);
51 51
    int res_tmp = FANSI_has_int(chrsxp, ctl_int);
52 52
    // no great, but need to watch out for NA_LOGICAL == INT_MIN
53 53
    if(res_tmp == -1 && warn_int) {
54 54
      res_tmp = -res_tmp;
55 55
      warning(
56 -
        "Encountered invalid ESC sequence at index [%.0f], %s%s",
57 -
        (double) i + 1,
56 +
        "Encountered invalid ESC sequence at index [%jd], %s%s",
57 +
        FANSI_ind(i),
58 58
        "see `?unhandled_ctl`; you can use `warn=FALSE` to turn ",
59 59
        "off these warnings."
60 60
      );

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##
@@ -23,7 +23,7 @@
Loading
23 23
## A version of unique that isn't terrible for very long strings that are
24 24
## actually the same
25 25
26 -
unique_chr <- function(x) .Call(FANSI_unique_chr, x)
26 +
unique_chr <- function(x) .Call(FANSI_unique_chr, enc2utf8(x))
27 27
28 28
## Testing interface for color code to HTML conversion
29 29

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -53,7 +53,7 @@
Loading
53 53
    SEXP chrsxp = STRING_ELT(x, i);
54 54
55 55
    if(chrsxp != NA_STRING && LENGTH(chrsxp)) {
56 -
      FANSI_check_enc(chrsxp, i);
56 +
      FANSI_check_chrsxp(chrsxp, i);
57 57
      const char * string, * string_start;
58 58
59 59
      string = string_start = CHAR(chrsxp);

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 *  This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -587,13 +587,10 @@
Loading
587 587
 *
588 588
 * _basic is used just for the 1-9 SGR codes plus colors.
589 589
 */
590 -
int FANSI_state_comp_basic(
590 +
int FANSI_state_comp_color(
591 591
  struct FANSI_state target, struct FANSI_state current
592 592
) {
593 -
  // 1023 is '11 1111 1111' in binary, so this will grab the last ten bits
594 -
  // of the styles which are the 1-9 styles
595 593
  return !(
596 -
    (target.style & 1023) == (current.style & 1023) &&
597 594
    target.color == current.color &&
598 595
    target.bg_color == current.bg_color &&
599 596
    target.color_extra[0] == current.color_extra[0] &&
@@ -606,6 +603,14 @@
Loading
606 603
    target.bg_color_extra[3] == current.bg_color_extra[3]
607 604
  );
608 605
}
606 +
int FANSI_state_comp_basic(
607 +
  struct FANSI_state target, struct FANSI_state current
608 +
) {
609 +
  // 1023 is '11 1111 1111' in binary, so this will grab the last ten bits
610 +
  // of the styles which are the 1-9 styles
611 +
  return FANSI_state_comp_color(target, current) ||
612 +
    (target.style & 1023) != (current.style & 1023);
613 +
}
609 614
int FANSI_state_comp(struct FANSI_state target, struct FANSI_state current) {
610 615
  return !(
611 616
    !FANSI_state_comp_basic(target, current) &&
@@ -620,9 +625,7 @@
Loading
620 625
    state.style || state.color >= 0 || state.bg_color >= 0 ||
621 626
    state.font || state.border || state.ideogram;
622 627
}
623 -
int FANSI_state_has_style_basic(struct FANSI_state state) {
624 -
  return state.style || state.color >= 0 || state.bg_color >= 0;
625 -
}
628 +
626 629
/*
627 630
 * Copy the style members from current to target
628 631
 */
@@ -718,7 +721,7 @@
Loading
718 721
  SEXP res_chr, res_chr_prev = PROTECT(mkChar(""));
719 722
  // PROTECT should not be needed here, but rchk complaining
720 723
  SEXP text_chr = STRING_ELT(text, 0);
721 -
  FANSI_check_enc(text_chr, 0);
724 +
  FANSI_check_chrsxp(text_chr, 0);
722 725
  const char * string = CHAR(text_chr); // Should already be UTF-8 if needed
723 726
724 727
  SEXP R_true = PROTECT(ScalarLogical(1));

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -73,16 +73,15 @@
Loading
73 73
    if(chr_len > mem_req) mem_req = chr_len;
74 74
  }
75 75
  // Now strip
76 -
77 76
  int invalid_ansi = 0;
78 -
  int invalid_idx = 0;
77 +
  R_xlen_t invalid_idx = 0;
79 78
  char * chr_buff;
80 79
81 80
  for(i = 0; i < len; ++i) {
82 81
    FANSI_interrupt(i);
83 82
    SEXP x_chr = STRING_ELT(x, i);
84 83
    if(x_chr == NA_STRING) continue;
85 -
    FANSI_check_enc(x_chr, i);
84 +
    FANSI_check_chrsxp(x_chr, i);
86 85
87 86
    int has_ansi = 0;
88 87
    const char * chr = CHAR(x_chr);
@@ -107,7 +106,7 @@
Loading
107 106
        )
108 107
      ) {
109 108
        invalid_ansi = 1;
110 -
        invalid_idx = i + 1;
109 +
        invalid_idx = i;
111 110
      }
112 111
      if(csi.len) {
113 112
        has_ansi = 1;
@@ -177,6 +176,8 @@
Loading
177 176
          res_track += chr_end - chr_track;
178 177
      } }
179 178
      *res_track = '\0';
179 +
180 +
      FANSI_check_chr_size(res_start, res_track, i);
180 181
      SEXP chr_sexp = PROTECT(
181 182
        mkCharLenCE(
182 183
          res_start, res_track - res_start, getCharCE(x_chr)
@@ -189,9 +190,9 @@
Loading
189 190
    switch(warn_int) {
190 191
      case 1: {
191 192
        warning(
192 -
          "Encountered %s index [%.0f], %s%s",
193 +
          "Encountered %s index [%jd], %s%s",
193 194
          "invalid or possibly incorreclty handled ESC sequence at ",
194 -
          (double) invalid_idx,
195 +
          FANSI_ind(invalid_idx),
195 196
          "see `?unhandled_ctl`; you can use `warn=FALSE` to turn ",
196 197
          "off these warnings."
197 198
        );
@@ -231,7 +232,7 @@
Loading
231 232
  for(R_xlen_t i = 0; i < len; ++i) {
232 233
    FANSI_interrupt(i);
233 234
    SEXP chrsxp = STRING_ELT(res, i);
234 -
    FANSI_check_enc(chrsxp, i);
235 +
    FANSI_check_chrsxp(chrsxp, i);
235 236
    const char * string = CHAR(chrsxp);
236 237
    const char * string_start = string;
237 238
    char * buff_track;
@@ -384,14 +385,7 @@
Loading
384 385
    }
385 386
    if(strip_this) {
386 387
      *(buff_track) = 0;
387 -
      if(buff_track - buff->buff > FANSI_int_max)
388 -
        // nocov start
389 -
        error(
390 -
          "%s%s",
391 -
          "Internal Error: attempting to write string longer than INT_MAX; ",
392 -
          "contact maintainer."
393 -
        );
394 -
        // nocov end
388 +
      FANSI_check_chr_size(buff->buff, buff_track, i);
395 389
      SEXP chrsxp = PROTECT(
396 390
        mkCharLenCE(
397 391
          buff->buff, buff_track - buff->buff, getCharCE(STRING_ELT(input, i))

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -69,7 +69,7 @@
Loading
69 69
70 70
    SEXP chr = STRING_ELT(vec, i);
71 71
    if(chr == NA_STRING) continue;
72 -
    FANSI_check_enc(chr, i);
72 +
    FANSI_check_chrsxp(chr, i);
73 73
74 74
    source = CHAR(chr);
75 75
@@ -165,14 +165,7 @@
Loading
165 165
166 166
      cetype_t chr_type = CE_NATIVE;
167 167
      if(state.has_utf8) chr_type = CE_UTF8;
168 -
      if(buff_track - buff_start > FANSI_int_max)
169 -
        // nocov start
170 -
        error(
171 -
          "%s%s",
172 -
          "Internal Error: attempting to write string longer than INT_MAX; ",
173 -
          "contact maintainer (2)."
174 -
        );
175 -
        // nocov end
168 +
      FANSI_check_chr_size(buff_start, buff_track, i);
176 169
      SEXP chr_sxp = PROTECT(
177 170
        mkCharLenCE(buff_start, (int) (buff_track - buff_start), chr_type)
178 171
      );

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##
@@ -68,7 +68,11 @@
Loading
68 68
#'   interpreted, particularly if you are getting unexpected results.
69 69
#' @param x a character vector or object that can be coerced to character.
70 70
#' @param type character(1L) partial matching `c("chars", "width")`, although
71 -
#'   `type="width"` only works correctly with R >= 3.2.2.
71 +
#'   `type="width"` only works correctly with R >= 3.2.2.  With "width", whether
72 +
#'   C0 and C1 are treated as zero width may depend on R version and locale in
73 +
#'   addition what the `ctl` parameter is set to.  For example, for R4.1 in
74 +
#'   UTF-8 locales C0 and C1 will be zero width even if the value of `ctl` is
75 +
#'   such that they wouldn't be so in other circumstances.
72 76
#' @param round character(1L) partial matching
73 77
#'   `c("start", "stop", "both", "neither")`, controls how to resolve
74 78
#'   ambiguities when a `start` or `stop` value in "width" `type` mode falls
@@ -96,11 +100,11 @@
Loading
96 100
#'   assumptions `fansi` makes about how strings are rendered on your display
97 101
#'   to be incorrect, for example by moving the cursor (see [fansi]).
98 102
#' @param term.cap character a vector of the capabilities of the terminal, can
99 -
#'   be any combination "bright" (SGR codes 90-97, 100-107), "256" (SGR codes
103 +
#'   be any combination of "bright" (SGR codes 90-97, 100-107), "256" (SGR codes
100 104
#'   starting with "38;5" or "48;5"), and "truecolor" (SGR codes starting with
101 -
#'   "38;2" or "48;2"). Changing this parameter changes how `fansi` interprets
102 -
#'   escape sequences, so you should ensure that it matches your terminal
103 -
#'   capabilities. See [term_cap_test] for details.
105 +
#'   "38;2" or "48;2"). Changing this parameter changes how `fansi`
106 +
#'   interprets escape sequences, so you should ensure that it matches your
107 +
#'   terminal capabilities. See [term_cap_test] for details.
104 108
#' @examples
105 109
#' substr_ctl("\033[42mhello\033[m world", 1, 9)
106 110
#' substr_ctl("\033[42mhello\033[m world", 3, 9)

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -17,6 +17,14 @@
Loading
17 17
 */
18 18
19 19
#include "fansi.h"
20 +
// Which styles actuall produce HTML
21 +
22 +
static const unsigned int css_html_style[8] = {
23 +
  1, 2, 3, 4, 5, 6,
24 +
  // 7,                // Inverse doesn't actually produces a style
25 +
  8, 9
26 +
};
27 +
static unsigned int css_html_mask = 0;
20 28
/*
21 29
 * Looks like we don't need to worry about C0 sequences, however we must
22 30
 * parse all ESC sequences as HTML gladly displays everything right after the
@@ -27,43 +35,121 @@
Loading
27 35
 */
28 36
struct FANSI_css {const char * css; int len;};
29 37
38 +
// .len leftover from when we used pre-computed widths
39 +
30 40
static const struct FANSI_css css_style[9] = {
31 41
  // Code 1: bold
32 -
  {.css="font-weight: bold;", .len=18},
42 +
  {.css="font-weight: bold", .len=-1},
33 43
  // Code 2: lighter
34 -
  {.css="font-weight: 100;", .len=17},
44 +
  {.css="font-weight: 100", .len=-1},
35 45
  // Code 3: italic
36 -
  {.css="font-style: italic;", .len=19},
46 +
  {.css="font-style: italic", .len=-1},
37 47
  // Code 4: underline
38 -
  {.css="text-decoration: underline;", .len=27},
48 +
  {.css="text-decoration: underline", .len=-1},
39 49
  // Code 5: blink
40 -
  {.css="text-decoration: blink;", .len=23},
50 +
  {.css="text-decoration: blink", .len=-1},
41 51
  // Code 6: blink
42 -
  {.css="text-decoration: blink;", .len=23},
43 -
  // Code 7: invert; unused
52 +
  {.css="text-decoration: blink", .len=-1},
53 +
  // Code 7: invert; unused, but needs to be here for offset lookups to work;
44 54
  {.css="", .len=0},
45 55
  // Code 8: conceal
46 -
  {.css="color: transparent;", .len=19},
56 +
  {.css="color: transparent", .len=-1},
47 57
  // Code 9: line-through
48 -
  {.css="text-decoration: line-through;", .len=30},
58 +
  {.css="text-decoration: line-through", .len=-1},
49 59
};
60 +
// Generate mask for html styles in first pass
61 +
62 +
static unsigned int style_html_mask() {
63 +
  if(!css_html_mask) {
64 +
    int style_n = sizeof(css_html_style) / sizeof(unsigned int);
65 +
    for(int i = 0; i < style_n; ++i)
66 +
      css_html_mask |= 1U << css_html_style[i];
67 +
  }
68 +
  return css_html_mask;
69 +
}
70 +
static int state_has_color(struct FANSI_state state) {
71 +
  return state.color >= 0 || state.bg_color >= 0;
72 +
}
73 +
static int state_has_style_html(struct FANSI_state state) {
74 +
  // generate mask first time around.
75 +
  return (state.style & style_html_mask()) ||
76 +
    state.color >= 0 || state.bg_color >= 0;
77 +
}
78 +
static int state_comp_html(
79 +
  struct FANSI_state target, struct FANSI_state current
80 +
) {
81 +
  return
82 +
    // Colors are Different
83 +
    FANSI_state_comp_color(target, current) ||
84 +
    // HTML rendered styles are different
85 +
    (
86 +
      (target.style & style_html_mask()) !=
87 +
      (current.style & style_html_mask())
88 +
    ) ||
89 +
    // If one has color both have the same color, but we need to check
90 +
    // whether they have different invert status
91 +
    (
92 +
      (state_has_color(target)) &&
93 +
      (target.style & (1U << 7)) ^ (current.style & (1U << 7))
94 +
    );
95 +
}
96 +
97 +
/*
98 +
 * Converts basic, bright and 8 bit colors to a range of 0:255.
99 +
 * Returns -1 for other values including no color.
100 +
 *
101 +
 * For use with color classes.
102 +
 *
103 +
 * Recall colors in 30:39 and 40:49 already converted to 0:9.
104 +
 */
105 +
static int color_to_8bit(int color, int* color_extra) {
106 +
  int col256 = -1;
107 +
  if (color >= 0 && color <= 7) {
108 +
    // Basic colors
109 +
    col256 = color % 10;
110 +
  } else if ((color >= 100 && color <= 107) || (color >= 90 && color <= 97)) {
111 +
    // Brights
112 +
    col256 = color % 10 + 8;
113 +
  } else if ((color == 8) && color_extra[0] == 5) {
114 +
    // 8 Bit colors
115 +
    col256 = color_extra[1];
116 +
    if(col256 < 0 || col256 > 255)
117 +
      error("Internal Error: 0-255 color outside of that range."); // nocov
118 +
  }
119 +
  return col256;
120 +
}
121 +
/*
122 +
 * Given CSI SGR Color Codes and User Provided color classes,
123 +
 * return the corresponding color class if the color can be mapped to one of the
124 +
 * 8 bit color codes in 0:255, or NULL if not.
125 +
 *
126 +
 * CAUTION: test result before dereferencing: could be a pointer to NULL.
127 +
 *
128 +
 * @param whether to return foreground or background styles
129 +
 */
130 +
131 +
static const char * get_color_class(
132 +
  int color, int* color_extra, SEXP color_classes, int bg
133 +
) {
134 +
  int col8bit = color_to_8bit(color, color_extra);
135 +
  if(col8bit >= 0 && XLENGTH(color_classes) / 2 > (R_xlen_t) col8bit)
136 +
    return CHAR(STRING_ELT(color_classes, col8bit * 2 + bg));
137 +
  else return NULL;
138 +
}
50 139
/*
51 140
 * All color conversions taken from
52 141
 *
53 142
 * <https://en.wikipedia.org/wiki/ANSI_escape_code>
54 143
 *
55 -
 * @param color an integer expected to be between 0 and 9
144 +
 * @param color an integer expected to be in 0:9, 90:97, 100:107. NB: ranges
145 +
 *   30:39 and 40:49 already converted to 0:9.
56 146
 * @param color_extra a pointer to a 4 long integer array as you would get in
57 147
 *   struct FANSI_state.color_extra
58 -
 * @param buff must be pre-allocated to be able to hold the color in format
59 -
 *   #FFFFFF including the null terminator (so at least 8 bytes)
60 -
 * @return how many bytes were written, guaranteed to be 7 bytes, does not
61 -
 *   include the NULL terminator that is also written just in case.
148 +
 * @param buff a buffer with at least 8 bytes allocated.
149 +
 * @return the *buff pointer
62 150
 */
63 151
64 -
static int color_to_html(
65 -
  int color, int * color_extra, char * buff
66 -
) {
152 +
static char * color_to_html(int color, int * color_extra, char * buff) {
67 153
  // CAREFUL: DON'T WRITE MORE THAN 7 BYTES + NULL TERMINATOR
68 154
69 155
  const char * dectohex = "0123456789ABCDEF";
@@ -157,200 +243,241 @@
Loading
157 243
    error("Internal Error: invalid color code %d", color); // nocov
158 244
  }
159 245
  *buff_track = 0;
160 -
  return (int) (buff_track - buff);
161 -
}
246 +
  int dist = (int) (buff_track - buff);
247 +
  if(dist != 7) error("Internal Error: unexpected byte count for color.");
162 248
163 -
static int state_as_html(struct FANSI_state state, int first, char * buff) {
249 +
  return buff;
250 +
}
251 +
// Central error function.
252 +
253 +
static void overflow_err(const char * type, R_xlen_t i) {
254 +
  error(
255 +
    "%s %s %s %jd%s",
256 +
    "Expanding SGR sequences into HTML will create a string longer than",
257 +
    type, "at position", FANSI_ind(i), ". Try again with smaller strings."
258 +
  );
259 +
}
260 +
static void overflow_err2(R_xlen_t i) {
261 +
  error(
262 +
    "%s %s %s %jd%s",
263 +
    "Escaping HTML special characters will create a string longer than",
264 +
    "INT_MAX", "at position", FANSI_ind(i), ". Try again with smaller strings."
265 +
  );
266 +
}
267 +
/*
268 +
 * If *buff is not NULL, copy tmp into it and advance, else measure tmp
269 +
 * and advance length
270 +
 *
271 +
 *   vvvvvvvv
272 +
 * !> DANGER <!
273 +
 *   ^^^^^^^^
274 +
 *
275 +
 * This advances *buff so that it points to to the NULL terminator
276 +
 * the end of what is written to so string is ready to append to.
277 +
 *
278 +
 * @param i index in overal character vector, needed to report overflow string.
279 +
 */
280 +
static unsigned int copy_or_measure(
281 +
  char ** buff, const char * tmp, unsigned int len, R_xlen_t i
282 +
) {
283 +
  size_t tmp_len = strlen(tmp);
284 +
  // strictly it's possible for len > FANSI_int_max, but shouldn't happen even
285 +
  // in testing since we only grow len by first checking.
286 +
  if(tmp_len > FANSI_int_max - len) overflow_err("INT_MAX", i);
287 +
  if(*buff) {
288 +
    strcpy(*buff, tmp);
289 +
    *buff += tmp_len;
290 +
    **buff = 0;  // not necessary, but helps to debug
291 +
  }
292 +
  return tmp_len;
293 +
}
294 +
/*
295 +
 * Compute HTML Size of Each Individual State, Or Write It
296 +
 *
297 +
 * This used to be two distinct functions, but the risk of getting out of sync
298 +
 * was too high so we merged them into one.  We try to minimize the
299 +
 * differences between size calculation vs. writing modes to avoid mistakes, but
300 +
 * this means the code is less efficient than could be, possibly repeating
301 +
 * calcualtions, overflow checks, or computing compile time knowable string
302 +
 * widths.
303 +
 *
304 +
 * @param buff the buffer to write to, if it is null only computes size instead
305 +
 *   also of writing.
306 +
 */
307 +
static int state_size_and_write_as_html(
308 +
  struct FANSI_state state,
309 +
  struct FANSI_state state_prev,
310 +
  char * buff,
311 +
  SEXP color_classes, R_xlen_t i,
312 +
  int bytes_html
313 +
) {
164 314
  /****************************************************\
165 315
  | IMPORTANT: KEEP THIS ALIGNED WITH FANSI_csi_write  |
166 316
  | although right now ignoring rare escapes in html   |
167 317
  \****************************************************/
168 318
169 -
  // Styles
319 +
  // Not all basic styles are html styles (e.g. invert), so state only changes
320 +
  // on invert when current or previous also has a color style
321 +
322 +
  int has_cur_state = state_has_style_html(state);
323 +
  int has_prev_state = state_has_style_html(state_prev);
324 +
  int state_change = state_comp_html(state, state_prev);
325 +
170 326
  const char * buff_start = buff;
171 -
  if(!FANSI_state_has_style_basic(state)) {
172 -
    if(first)
173 -
      // nocov start
174 -
      error("Internal Error: no state in first span; contact maintainer.");
175 -
      // nocov end
176 -
    if(state.string[state.pos_byte]) {
177 -
      memcpy(buff, "</span><span>", 13);
178 -
      buff += 13;
179 -
    }
180 -
  } else {
181 -
    if(first) {
182 -
      memcpy(buff, "<span style='", 13);
183 -
      buff += 13;
327 +
  unsigned int len = bytes_html;  // this is for overflow check
328 +
329 +
  if(state_change) {
330 +
    if(!has_cur_state) {
331 +
      len += copy_or_measure(&buff, "</span>", len, i);
184 332
    } else {
185 -
      memcpy(buff, "</span><span style='", 20);
186 -
      buff += 20;
187 -
    }
188 -
    // Colors color: #FFFFFF; background-color: #FFFFFF;
189 -
190 -
    int invert = state.style & (1 << 7);
191 -
    int color = invert ? state.bg_color : state.color;
192 -
    int * color_extra = invert ? state.bg_color_extra : state.color_extra;
193 -
    int bg_color = invert ? state.color : state.bg_color;
194 -
    int * bg_color_extra = invert ? state.color_extra : state.bg_color_extra;
195 -
196 -
    if(color >= 0) {
197 -
      memcpy(buff, "color: ", 7);
198 -
      buff += 7;
199 -
      buff += color_to_html(color, color_extra, buff);
200 -
      *(buff++) = ';';
201 -
    }
202 -
    if(bg_color >= 0) {
203 -
      memcpy(buff, "background-color: ", 18);
204 -
      buff += 18;
205 -
      buff += color_to_html(bg_color, bg_color_extra, buff);
206 -
      *(buff++) = ';';
207 -
    }
208 -
    // Styles (need to go after color for transparent to work)
333 +
      if (!has_prev_state) {
334 +
        len += copy_or_measure(&buff, "<span", len, i);
335 +
      } else {
336 +
        len += copy_or_measure(&buff, "</span><span", len, i);
337 +
      }
338 +
      // Styles
339 +
      int invert = state.style & (1 << 7);
340 +
      int color = invert ? state.bg_color : state.color;
341 +
      int * color_extra = invert ? state.bg_color_extra : state.color_extra;
342 +
      int bg_color = invert ? state.color : state.bg_color;
343 +
      int * bg_color_extra = invert ? state.color_extra : state.bg_color_extra;
344 +
345 +
      // Use provided classes instead of inline styles?
346 +
      const char * color_class =
347 +
        get_color_class(color, color_extra, color_classes, 0);
348 +
      const char * bgcol_class =
349 +
        get_color_class(bg_color, bg_color_extra, color_classes, 1);
350 +
351 +
      // Class based colors e.g. " class='fansi-color-06 fansi-bgcolor-04'"
352 +
      // Brights remapped to 8-15
353 +
354 +
      if(color_class || bgcol_class) {
355 +
        len += copy_or_measure(&buff, " class='", len, i);
356 +
        if(color_class) len += copy_or_measure(&buff, color_class, len, i);
357 +
        if(color_class && bgcol_class) len += copy_or_measure(&buff, " ", len, i);
358 +
        if(bgcol_class) len += copy_or_measure(&buff, bgcol_class, len, i);
359 +
        len += copy_or_measure(&buff, "'", len, i);
360 +
      }
361 +
      // inline style and/or colors
362 +
      if(
363 +
        state.style & css_html_mask ||
364 +
        (color >= 0 && (!color_class)) ||
365 +
        (bg_color >= 0 && (!bgcol_class))
366 +
      ) {
367 +
        len += copy_or_measure(&buff, " style='", len, i);
368 +
        unsigned int len_start = len;
369 +
        char color_tmp[8];
370 +
        if(color >= 0 && (!color_class)) {
371 +
          len += copy_or_measure(&buff, "color: ", len, i);
372 +
          len += copy_or_measure(
373 +
            &buff, color_to_html(color, color_extra, color_tmp), len, i
374 +
          );
375 +
        }
376 +
        if(bg_color >= 0 && (!bgcol_class)) {
377 +
          if(len_start < len) len += copy_or_measure(&buff, "; ", len, i);
378 +
          len += copy_or_measure(&buff,  "background-color: ", len, i);
379 +
          len += copy_or_measure(
380 +
            &buff, color_to_html(bg_color, bg_color_extra, color_tmp), len, i
381 +
          );
382 +
        }
383 +
        // Styles (need to go after color for transparent to work)
384 +
        for(int i = 1; i < 10; ++i)
385 +
          if(state.style & css_html_mask & (1 << i)) {
386 +
            if(len_start < len) len += copy_or_measure(&buff, "; ", len, i);
387 +
            len += copy_or_measure(&buff, css_style[i - 1].css, len, i);
388 +
          }
209 389
210 -
    for(int i = 1; i < 10; ++i) {
211 -
      if(state.style & (1 << i)) {
212 -
        memcpy(buff, css_style[i - 1].css, css_style[i - 1].len);
213 -
        buff += css_style[i - 1].len;
390 +
        len += copy_or_measure(&buff, ";'", len, i);
214 391
      }
215 -
    }
216 -
    *(buff++) = '\'';
217 -
    *(buff++) = '>';
392 +
      len += copy_or_measure(&buff, ">", len, i);
393 +
  } }
394 +
  len -= bytes_html;
395 +
  if(buff) {
218 396
    *buff = 0;
397 +
    if((unsigned int)(buff - buff_start) != len)
398 +
      // nocov start
399 +
      error(
400 +
        "Internal Error: buffer length mismatch in html generation (%ud vs %ud).",
401 +
        len, (unsigned int)(buff - buff_start)
402 +
      );
403 +
      // nocov end
219 404
  }
220 -
  return (int)(buff - buff_start);
405 +
  // We've checked len at every step, so it cannot overflow INT_MAX.
406 +
407 +
  return (int)len;
221 408
}
222 409
/*
223 -
 * Compute size of each state
410 +
 * Final checks for unsual size, and include space for terminator.
224 411
 */
225 -
static int state_size_as_html(struct FANSI_state state, int first) {
226 -
  int size = 0;
227 -
  if(!FANSI_state_has_style_basic(state)) {
228 -
    if(first)
229 -
      // nocov start
230 -
      error("Internal Error: no state in first span; contact maintainer.");
231 -
      // nocov end
232 -
233 -
    // Only need to re-open tag if not at end of string
234 -
    if(state.string[state.pos_byte]) {
235 -
      size = 13;  // </span><span>
236 -
    }
237 -
  } else {
238 -
    if(first) {
239 -
      size = 15;  // <span style="">
240 -
    } else {
241 -
      size = 22;  // </span><span style="">
242 -
    }
243 -
    // Styles
244 412
245 -
    for(int i = 1; i < 10; ++i) {
246 -
      if(state.style & (1 << i)) size += css_style[i - 1].len;
247 -
    }
248 -
    // Colors color: #FFFFFF; background-color: #FFFFFF;
413 +
static size_t final_string_size(int bytes, R_xlen_t i) {
414 +
  // In the extremely unlikely case we're on a systems with weird integer sizes
415 +
  // or R changes what R_len_t.  >= SIZE_MAX b/c we need room for the extra NULL
416 +
  // terminator byte
417 +
  if(INT_MAX >= SIZE_MAX && (unsigned int) bytes >= SIZE_MAX)
418 +
    overflow_err("SIZE_MAX", i);     // nocov
419 +
  if(INT_MAX > R_LEN_T_MAX && bytes > R_LEN_T_MAX)
420 +
    overflow_err("R_LEN_T_MAX", i);  // nocov
249 421
250 -
    int invert = state.style & (1 << 7);
251 -
    if(state.color >= 0) size += invert ? 26 : 15;
252 -
    if(state.bg_color >= 0) size += invert ? 15 : 26;
253 -
  }
254 -
  return size;
422 +
  return (size_t) bytes + 1;   // include terminator
255 423
}
256 424
/*
257 -
 * Helper functions to process size and write the HTML split off
258 -
 * for clarity
425 +
 * Check for overall overflow, recall that R allows up to R_LEN_T_MAX long
426 +
 * strings (which currently is INT_MAX) excluding the NULL.
427 +
 *
428 +
 * However, need size_t return so we can allocate one extra byte for the NULL
429 +
 * terminator.  Strictly speaking we could avoid this as R accepts character
430 +
 * buffers of known length via mkCharLenCE or some such, but it feels
431 +
 * uncomfortable having an unterminated string floating around.
432 +
 *
433 +
 * @return the size of the string **including** the NULL terminator
259 434
 */
260 -
261 -
static int html_compute_size(
262 -
  struct FANSI_state state, int bytes_extra, int bytes_esc_start, int first,
263 -
  R_xlen_t i
435 +
static size_t html_check_overflow(
436 +
  int bytes_html, int bytes_esc, int bytes_init, int span_extra, R_xlen_t i
264 437
) {
265 -
  // bytes_esc cannot overflow int because the input is supposed to be an
266 -
  // R sourced string
438 +
  if(bytes_init < 0 || span_extra < 0)
439 +
    error("Internal Error: illegal -ve lengths in overflow check."); // nocov
267 440
268 -
  int bytes_esc = state.pos_byte - bytes_esc_start;
269 -
  int bytes_html = state_size_as_html(state, first);
270 -
  int bytes_net = bytes_html - bytes_esc;
271 -
272 -
  if(bytes_net >= 0) {
273 -
    if(bytes_extra > FANSI_int_max - bytes_net) {
274 -
      error(
275 -
        "%s%s %.0f %s",
276 -
        "Expanding SGR sequences into CSS will create a string longer ",
277 -
        "than INT_MAX at position", (double) (i + 1),
278 -
        "which is not allowed by R."
279 -
      );
280 -
    }
281 -
    bytes_extra += bytes_net;
282 -
  } else {
283 -
    if(bytes_extra < FANSI_int_min - bytes_net) {
284 -
      // This should actually be impossible as a string that is only ESC
285 -
      // sequences with no SGR is the only way to get that big a decrease,
286 -
      // and if it doesn't have SGR then it wouldn't enter this loop
287 -
      // nocov start
288 -
      error(
289 -
        "%s%s",
290 -
        "Internal Error: unexpectedly large byte shrinking when ",
291 -
        "converting ESC sequences to CSS; contact maintainer."
292 -
      );
293 -
      // nocov end
294 -
    }
295 -
    bytes_extra += bytes_net;
296 -
  }
297 -
  return bytes_extra;
298 -
}
299 -
// Check for overall overflow, recall that R allows up to INT_MAX long strings
300 -
// excluding the NULL
301 -
//
302 -
// However, need size_t return since including the NULL terminator we could need
303 -
// over int size.
441 +
  // both bytes_html and byte_esc are positive, so this cannot overflow
304 442
305 -
static size_t html_check_overflow(
306 -
  int bytes_extra, int bytes_init, int span_extra, R_xlen_t i
307 -
) {
308 -
  size_t bytes_final;
309 -
  if(bytes_init < 0) error("Internal error: bytes_init must be positive.");
443 +
  int bytes_extra = bytes_html - bytes_esc;
310 444
  if(
311 -
    bytes_extra >= 0 && (
312 -
      bytes_init > FANSI_int_max - bytes_extra - span_extra
445 +
    (
446 +
       bytes_extra >= 0 &&
447 +
       bytes_init > FANSI_int_max - bytes_extra - span_extra
313 448
    )
314 -
  ) {
449 +
    ||
450 +
    (
451 +
       bytes_extra < 0 &&
452 +
       bytes_init + bytes_extra > FANSI_int_max - span_extra
453 +
    )
454 +
  ) overflow_err("INT_MAX", i);
455 +
456 +
  if(bytes_init + bytes_extra + span_extra < 0) {
457 +
    // nocov start
315 458
    error(
316 -
      "%s%s %.0f %s",
317 -
      "String with SGR sequences as CSS is longer ",
318 -
      "than INT_MAX at position", (double) (i + 1),
319 -
      "which is not allowed by R."
459 +
      "%s%s",
460 +
      "Internal Error: CSS would translate to negative length string; ",
461 +
      "this should not happen."
320 462
    );
321 -
  } else if(bytes_extra < 0) {
322 -
    if(bytes_extra <= FANSI_int_min + span_extra) {
323 -
      // nocov start
324 -
      error(
325 -
        "%s%s%s",
326 -
        "Internal error: integer overflow when trying to compute net ",
327 -
        "additional bytes requires by conversion of SGR to HTML. ",
328 -
        "Contact maintainer"
329 -
      );
330 -
      // nocov end
331 -
    }
332 -
    int bytes_extra_extra = bytes_extra + span_extra;
333 -
334 -
    if(bytes_init + bytes_extra_extra < 0)
335 -
      // nocov start
336 -
      error(
337 -
        "%s%s",
338 -
        "Internal Error: CSS would translate to negative length string; ",
339 -
        "this should not happen."
340 -
      );
341 -
      // nocov end
463 +
    // nocov end
342 464
  }
343 -
  bytes_final = (size_t) bytes_init + bytes_extra + span_extra + 1;
344 -
  return bytes_final;
465 +
  int bytes_final = bytes_init + bytes_extra + span_extra;
466 +
  return final_string_size(bytes_final, i);
345 467
}
346 -
SEXP FANSI_esc_to_html(SEXP x, SEXP warn, SEXP term_cap) {
468 +
469 +
SEXP FANSI_esc_to_html(SEXP x, SEXP warn, SEXP term_cap, SEXP color_classes) {
347 470
  if(TYPEOF(x) != STRSXP)
348 471
    error("Internal Error: `x` must be a character vector");  // nocov
472 +
  if(TYPEOF(color_classes) != STRSXP)
473 +
    error("Internal Error: `color_classes` must be a character vector");  // nocov
349 474
350 475
  R_xlen_t x_len = XLENGTH(x);
351 476
  struct FANSI_buff buff = {.len=0};
352 477
  struct FANSI_state state, state_prev, state_init;
353 478
  state = state_prev = state_init = FANSI_state_init("", warn, term_cap);
479 +
  const char * span_end = "</span>";
480 +
  int span_end_len = (int) strlen(span_end);
354 481
355 482
  SEXP res = x;
356 483
  // Reserve spot on protection stack
@@ -361,173 +488,150 @@
Loading
361 488
    FANSI_interrupt(i);
362 489
363 490
    SEXP chrsxp = STRING_ELT(x, i);
364 -
    FANSI_check_enc(chrsxp, i);
491 +
    if(chrsxp == NA_STRING) continue;
492 +
    FANSI_check_chrsxp(chrsxp, i);
493 +
    const char * string = CHAR(chrsxp);
365 494
366 -
    const char * string_start = CHAR(chrsxp);
367 -
    const char * string = string_start;
368 -
369 -
    // Reset position info and string; we want to preserve the rest of the state
370 -
    // info so that SGR styles can spill across lines
371 -
372 -
    state = FANSI_reset_pos(state);
495 +
    // Reset position info and string; rest of state info is preserved from
496 +
    // prior line so that the state can be continued on new line.
497 +
    state = FANSI_reset_pos(state_prev);
373 498
    state.string = string;
374 499
    struct FANSI_state state_start = FANSI_reset_pos(state);
500 +
    state_prev = state_init;  // but there are no styles in the string yet
375 501
376 -
    // Save what the state was at the end of the prior string
377 -
378 -
    R_len_t bytes_init = LENGTH(chrsxp);
502 +
    int bytes_init = (int) LENGTH(chrsxp);
503 +
    int bytes_html = 0;
504 +
    int bytes_esc = 0;
379 505
380 -
    int bytes_extra = 0;   // Net bytes being add via tags (css - ESC)
381 506
    size_t bytes_final = 0;
382 -
    int has_esc, any_esc;
383 -
    has_esc = any_esc = 0;
507 +
508 +
    // Some ESCs may not produce any HTML, and some strings may gain HTML from
509 +
    // an ESC from a prior element even if they have no ESCs.
510 +
    int has_esc = 0;
511 +
    int has_state = state_has_style_html(state);
512 +
    int trail_span = 0;
384 513
385 514
    // Process the strings in two passes, in pass 1 we compute how many bytes
386 515
    // we'll need to store the string, and in the second we actually write it.
387 -
    // This is obviously a bit wasteful as we parse the ESC sequences twice, but
388 -
    // the alternative is to track a growing list or some such of accrued parsed
389 -
    // sequences.  The latter might be faster, but more work for us so we'll
390 -
    // leave it and see if it becomes a major issue.
516 +
    // We trade efficiency for convenience.
517 +
518 +
    // We cheat by only using FANSI_read_next to read escape sequences as we
519 +
    // don't care about display width, etc.  Normally we would _read_next over
520 +
    // all characters, not just skip from ESC to ESC.
391 521
392 -
    // It is possible for a state to be left over from prior string.
522 +
    // - Pass 1: Measure -------------------------------------------------------
393 523
394 -
    if(FANSI_state_has_style_basic(state)) {
395 -
      bytes_extra = html_compute_size(
396 -
        state, bytes_extra, state.pos_byte, 0, i
524 +
    // Leftover from prior element (only if can't be merged with new)
525 +
    if(*string && *string != 0x1b && state_has_style_html(state)) {
526 +
      bytes_html += state_size_and_write_as_html(
527 +
        state, state_prev,  NULL, color_classes, i, bytes_html
397 528
      );
398 -
      has_esc = any_esc = 1;
529 +
      state_prev = state;
399 530
    }
400 -
    state_prev = state;
401 -
402 -
    // Now check string proper
403 -
404 -
    while(*string && (string = strchr(string, 0x1b))) {
405 -
      if(!any_esc) any_esc = 1;
406 -
407 -
      // Since we don't care about width, etc, we only use the state objects to
408 -
      // parse the ESC sequences, so we don't have to worry about UTF8
409 -
      // conversions.
410 -
411 -
      state.pos_byte = (string - string_start);
412 -
413 -
      // read all sequential ESC tags and compute the net change in size to hold
414 -
      // them
415 -
416 -
      int esc_start = state.pos_byte;
417 -
      state = FANSI_read_next(state);
418 -
      if(FANSI_state_comp_basic(state, state_prev)) {
419 -
        bytes_extra =
420 -
          html_compute_size(state, bytes_extra, esc_start, !has_esc, i);
421 -
        if(!has_esc) has_esc = 1;
531 +
    // New in this element
532 +
    while(1) {
533 +
      trail_span = state_has_style_html(state_prev);
534 +
      string = strchr(string, 0x1b);
535 +
      if(!string) string = state.string + bytes_init;
536 +
      else {
537 +
        has_esc = 1;
538 +
        state.pos_byte = (string - state.string);
422 539
      }
423 -
      state_prev = state;
424 -
      ++string;
540 +
      // State as html, skip if at end of string
541 +
      if(*string) {
542 +
        int esc_start = state.pos_byte;
543 +
        state = FANSI_read_next(state);
544 +
        string = state.string + state.pos_byte;
545 +
        bytes_esc += state.pos_byte - esc_start;  // cannot overflow int
546 +
        if(*string) {
547 +
          bytes_html += state_size_and_write_as_html(
548 +
            state, state_prev,  NULL, color_classes, i, bytes_html
549 +
          );
550 +
        }
551 +
        state_prev = state;
552 +
        has_state |= state_has_style_html(state);
553 +
        if(!*string) break; // nothing after state, so done
554 +
      } else break;
425 555
    }
426 -
    if(any_esc) {
427 -
      // we will use an extra <span></span> to simplify logic
428 -
429 -
      int span_end = has_esc * 7;
430 -
431 -
      bytes_final = html_check_overflow(bytes_extra, bytes_init, span_end, i);
556 +
    // - Pass 2: Write ---------------------------------------------------------
432 557
558 +
    if(has_esc || has_state) {
559 +
      bytes_final = html_check_overflow(
560 +
        bytes_html, bytes_esc, bytes_init,
561 +
        span_end_len * trail_span, // Last non-terminal state has style?
562 +
        i
563 +
      );
564 +
      trail_span = 0;
433 565
      // Allocate target vector if it hasn't been yet
434 -
435 566
      if(res == x) REPROTECT(res = duplicate(x), ipx);
436 567
437 -
      // Allocate buffer and do second pass
438 -
568 +
      // Allocate buffer and do second pass, bytes_final includes space for NULL
439 569
      FANSI_size_buff(&buff, bytes_final);
440 -
      string = string_start;
570 +
      string = state.string;  // always points to first byte
441 571
      state_start.warn = state.warn;
442 572
      state = state_start;
573 +
      state_prev = state_init;
443 574
444 -
      // Rprintf("string is '%s'\n", state.string);
445 -
446 -
      int first_esc = 1;
447 575
      char * buff_track = buff.buff;
448 576
449 -
      // Handle state left-over from previous char elem
577 +
      // Very similar to pass 1 loop, but different enough it would be annoying
578 +
      // to make a common function
450 579
451 -
      if(FANSI_state_has_style_basic(state)) {
452 -
        int bytes_html = state_as_html(state, first_esc, buff_track);
453 -
        buff_track += bytes_html;
454 -
        first_esc = 0;
580 +
      if(*string && *string != 0x1b && state_has_style_html(state)) {
581 +
        buff_track += state_size_and_write_as_html(
582 +
          state, state_prev,  buff_track, color_classes, i, 0
583 +
        );
584 +
        state_prev = state;
455 585
      }
456 -
      state_prev = state;
457 -
458 -
      // Deal with state changes in this string
459 -
460 -
      while(*string && (string = strchr(string, 0x1b))) {
461 -
        state.pos_byte = (string - string_start);
462 -
463 -
        // read all sequential ESC tags
464 -
465 -
        state = FANSI_read_next(state);
586 +
      while(1) {
587 +
        const char * string_prev = string;
588 +
        trail_span = state_has_style_html(state_prev);
589 +
        string = strchr(string, 0x1b);
590 +
        if(!string) string = state.string + bytes_init;
591 +
        else state.pos_byte = (string - state.string);
466 592
467 593
        // The text since the last ESC
468 -
469 -
        // Rprintf("prev_byte: %d\n", state_prev.pos_byte);
470 -
        const char * string_last = string_start + state_prev.pos_byte;
471 -
        int bytes_prev = string - string_last;
472 -
        // Rprintf("bytes prev: %d\n", bytes_prev);
473 -
        // Rprintf("write prev: '%.*s'\n", bytes_prev, string_last);
474 -
        memcpy(buff_track, string_last, bytes_prev);
594 +
        int bytes_prev = string - string_prev;
595 +
        memcpy(buff_track, string_prev, bytes_prev);
475 596
        buff_track += bytes_prev;
476 -
477 -
        // If we have a change from the previous tag, write html/css
478 -
479 -
        if(FANSI_state_comp_basic(state, state_prev)) {
480 -
          int bytes_html = state_as_html(state, first_esc, buff_track);
481 -
          // Rprintf("write html: '%.*s'\n", bytes_html, buff_track);
482 -
          buff_track += bytes_html;
483 -
          if(first_esc) first_esc = 0;
484 -
        }
485 -
        state_prev = state;
486 -
        string = state.string + state.pos_byte;
597 +
        state.pos_byte = (string - state.string);
598 +
599 +
        // State as html, skip if at end of string
600 +
        if(*string) {
601 +
          state = FANSI_read_next(state);
602 +
          string = state.string + state.pos_byte;
603 +
          if(*string) {
604 +
            buff_track += state_size_and_write_as_html(
605 +
              state, state_prev,  buff_track, color_classes, i, 0
606 +
            );
607 +
          }
608 +
          state_prev = state;
609 +
          if(!*string) break; // nothing after state, so done
610 +
        } else break;
487 611
      }
488 -
      // Last hunk left to write and trailing SPAN
489 -
490 -
      const char * string_last = state_prev.string + state_prev.pos_byte;
491 -
      int bytes_stub = bytes_init - (string_last - string_start);
492 -
      // Rprintf("last: '%s'\n", string_last);
493 -
      // Rprintf("stub %d string %d\n", bytes_stub, (string_last - string_start));
494 -
495 -
      memcpy(buff_track, string_last, bytes_stub);
496 -
      buff_track += bytes_stub;
497 -
498 -
      if(has_esc) {
499 -
        // Always close (I think, I'm writing this over a year after I wrote the
500 -
        // code) tag.
501 -
502 -
        /*--------------------------------------------------------------------*\
503 -
        // WARNING: we're relying on this behavior to deal with the            |
504 -
        // black friday business, see #59)                                     |
505 -
        \*--------------------------------------------------------------------*/
506 -
507 -
        // Old comment: odd case where the only thing in the string is a null
508 -
        // SGR (from looking at code this will always be require, so not sure
509 -
        // what I mean by "odd case" as this is the only place we close tags
510 -
        // without immediately reopening another).
511 -
512 -
        memcpy(buff_track, "</span>", span_end);
513 -
        buff_track += span_end;
612 +
      // Trailing SPAN if needed
613 +
      if(trail_span) {
614 +
        memcpy(buff_track,span_end, span_end_len);
615 +
        buff_track += span_end_len;
514 616
      }
515 617
      *(buff_track) = '0';  // not strictly needed
516 618
517 -
      // Now create the charsxp what encoding to use.
518 -
519 -
      if(buff_track - buff.buff > FANSI_int_max)
619 +
      // Final check that we're not out of sync (recall buff.len includes NULL)
620 +
      if(buff_track - buff.buff != (int)(bytes_final - 1))
520 621
        // nocov start
521 622
        error(
522 -
          "%s%s",
523 -
          "Internal Error: attempting to write string longer than INT_MAX; ",
524 -
          "contact maintainer (3)."
623 +
          "Internal Error: %s (%td vs %zu).",
624 +
          "buffer length mismatch in html generation (2)",
625 +
          buff_track - buff.buff, bytes_final - 1
525 626
        );
526 627
        // nocov end
527 628
629 +
      // Now create the charsxp with the original encoding.  Since we're only
630 +
      // removing SGR and adding FANSI, it should be okay.
631 +
528 632
      cetype_t chr_type = getCharCE(chrsxp);
529 633
      SEXP chrsxp = PROTECT(
530 -
        mkCharLenCE(buff.buff, (int) (buff_track - buff.buff), chr_type)
634 +
        mkCharLenCE(buff.buff, (R_len_t)(buff_track - buff.buff), chr_type)
531 635
      );
532 636
      SET_STRING_ELT(res, i, chrsxp);
533 637
      UNPROTECT(1);
@@ -559,9 +663,8 @@
Loading
559 663
  SEXP res = PROTECT(allocVector(STRSXP, len / 5));
560 664
561 665
  for(R_xlen_t i = 0; i < len; i += 5) {
562 -
    int size = color_to_html(x_int[i], x_int + (i + 1), buff.buff);
563 -
    if(size < 1) error("Internal Error: size should be at least one");
564 -
    SEXP chrsxp = PROTECT(mkCharLenCE(buff.buff, size, CE_BYTES));
666 +
    color_to_html(x_int[i], x_int + (i + 1), buff.buff);
667 +
    SEXP chrsxp = PROTECT(mkCharLenCE(buff.buff, 7, CE_BYTES));
565 668
    SET_STRING_ELT(res, i / 5, chrsxp);
566 669
    UNPROTECT(1);
567 670
  }
@@ -569,4 +672,118 @@
Loading
569 672
  return res;
570 673
}
571 674
675 +
/*
676 +
 * Escape special HTML characters.
677 +
 */
678 +
679 +
SEXP FANSI_esc_html(SEXP x) {
680 +
  if(TYPEOF(x) != STRSXP)
681 +
    error("Internal Error: `x` must be a character vector");  // nocov
682 +
683 +
  R_xlen_t x_len = XLENGTH(x);
684 +
  SEXP res = x;
685 +
  // Reserve spot on protection stack
686 +
  PROTECT_INDEX ipx;
687 +
  PROTECT_WITH_INDEX(res, &ipx);
688 +
689 +
  for(R_xlen_t i = 0; i < x_len; ++i) {
690 +
    FANSI_interrupt(i);
691 +
692 +
    SEXP chrsxp = STRING_ELT(x, i);
693 +
    if(chrsxp == NA_STRING) continue;
694 +
    FANSI_check_chrsxp(chrsxp, i);
695 +
    int bytes = (int) LENGTH(chrsxp);
696 +
    const char * string = CHAR(chrsxp);
697 +
    struct FANSI_buff buff = {.len=0};
698 +
699 +
    // - Pass 1: Measure -------------------------------------------------------
700 +
701 +
    while(*string) {
702 +
      if(*string > '>') { // All specials are less than this
703 +
        ++string;
704 +
        continue;
705 +
      }
706 +
      switch(*string) {
707 +
        case '&': // &amp;
708 +
          if(bytes <= FANSI_int_max - 4) bytes += 4;
709 +
          else overflow_err2(i);
710 +
          break;
711 +
        case '"':
712 +
        case '\'':
713 +
          if(bytes <= FANSI_int_max - 5) bytes += 5;
714 +
          else overflow_err2(i);
715 +
          break;
716 +
        case '<':
717 +
        case '>':
718 +
          if(bytes <= FANSI_int_max - 3) bytes += 3;
719 +
          else overflow_err2(i);
720 +
          break;
721 +
      }
722 +
      ++string;
723 +
    }
724 +
    // Leftover from prior element (only if can't be merged with new)
725 +
726 +
    // - Pass 2: Write ---------------------------------------------------------
727 +
728 +
    if(bytes > LENGTH(chrsxp)) {
729 +
      // Allocate target vector if it hasn't been yet
730 +
      if(res == x) REPROTECT(res = duplicate(x), ipx);
731 +
732 +
      // Allocate buffer and do second pass, bytes_final includes space for NULL
733 +
734 +
      FANSI_size_buff(&buff, final_string_size(bytes, i));
735 +
736 +
      char * buff_track = buff.buff;
737 +
      string = CHAR(chrsxp);
738 +
739 +
      while(*string) {
740 +
        if(*string > '>') { // All specials are less than this
741 +
          *(buff_track++) = *(string++);
742 +
          continue;
743 +
        }
744 +
        switch(*string) {
745 +
          case '&': // &amp;
746 +
            memcpy(buff_track, "&amp;", 5);
747 +
            buff_track += 5;
748 +
            break;
749 +
          case '"':
750 +
            memcpy(buff_track, "&quot;", 6);
751 +
            buff_track += 6;
752 +
            break;
753 +
          case '\'':
754 +
            memcpy(buff_track, "&#039;", 6);
755 +
            buff_track += 6;
756 +
            break;
757 +
          case '<':
758 +
            memcpy(buff_track, "&lt;", 4);
759 +
            buff_track += 4;
760 +
            break;
761 +
          case '>':
762 +
            memcpy(buff_track, "&gt;", 4);
763 +
            buff_track += 4;
764 +
            break;
765 +
          default:
766 +
            *(buff_track++) = *string;
767 +
        }
768 +
        ++string;
769 +
      }
770 +
      *buff_track = 0;
771 +
      if(buff_track - buff.buff != bytes)
772 +
        // nocov start
773 +
        error(
774 +
          "Internal Error: %s (%td vs %zu).",
775 +
          "buffer length mismatch in html escaping",
776 +
          buff_track - buff.buff, bytes
777 +
        );
778 +
        // nocov end
779 +
780 +
      cetype_t chr_type = getCharCE(chrsxp);
781 +
      SEXP reschr = PROTECT(mkCharLenCE(buff.buff, (R_len_t)(bytes), chr_type));
782 +
      SET_STRING_ELT(res, i, reschr);
783 +
      UNPROTECT(1);
784 +
    }
785 +
  }
786 +
  UNPROTECT(1);
787 +
  return res;
788 +
}
572 789

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##
@@ -16,24 +16,134 @@
Loading
16 16
17 17
#' Convert ANSI CSI SGR Escape Sequence to HTML Equivalents
18 18
#'
19 -
#' Only the colors, background-colors, and basic styles (CSI SGR codes 1-9) are
20 -
#' translated.  Others are dropped silently.
19 +
#' Interprets CSI SGR sequences and produces a string with equivalent
20 +
#' formats applied with SPAN elements and inline CSS styles.  Optionally for
21 +
#' colors, the SPAN elements may be assigned classes instead of inline styles,
22 +
#' in which case it is the user's responsibility to provide a style sheet.
23 +
#' Input that contains special HTML characters ("<", ">", "&", "'", and "\""),
24 +
#' particularly the first two, should be escaped with [`html_esc`].
25 +
#'
26 +
#' Only "observable" styles are translated.  These include colors,
27 +
#' background-colors, and basic styles (CSI SGR codes 1-6, 8, 9).  Style 7, the
28 +
#' "inverse" style, is implemented by explicitly switching foreground and
29 +
#' background colors, if there are any.  Styles 5-6 (blink) are rendered as
30 +
#' "text-decoration" but likely will do nothing in the browser.  Style 8
31 +
#' (conceal) sets the color to transparent.
32 +
#'
33 +
#' Each element of the input vector is translated into a stand-alone valid HTML
34 +
#' string.  In particular, any open SPAN tags are closed at the end of an
35 +
#' element and re-opened on the subsequent element with the same style.  This
36 +
#' allows safe combination of HTML translated strings, for example by
37 +
#' [`paste`]ing them together.  The trade-off is that there may be redundant
38 +
#' HTML produced.  To reduce redundancy you can first collapse the input vector
39 +
#' into one string, being mindful that very large strings may exceed maximum
40 +
#' string size when converted to HTML.
41 +
#'
42 +
#' Active SPAN tags are closed and new ones open anytime the "observable"
43 +
#' state changes.  `sgr_to_html` never produces nested SPAN tags, even if at
44 +
#' times that might produce more compact output.  This is because ANSI CSI SGR
45 +
#' is a state based formatting system and is not constrained by the semantics of
46 +
#' a nested one like HTML, so dealing with the complexity of nesting when it
47 +
#' cannot reproduce all inputs anyway does not seem worthwhile.
21 48
#'
22 49
#' @note Non-ASCII strings are converted to and returned in UTF-8 encoding.
23 50
#' @export
51 +
#' @family HTML functions
24 52
#' @inheritParams substr_ctl
25 -
#' @seealso [fansi] for details on how _Control Sequences_ are
53 +
#' @seealso [`fansi`] for details on how _Control Sequences_ are
26 54
#'   interpreted, particularly if you are getting unexpected results,
27 -
#'   [set_knit_hooks()] for how to use ANSI CSI styled text with knitr and HTML
28 -
#'   output.
29 -
#' @return a character vector with all escape sequences removed and any basic
30 -
#'   ANSI CSI SGR escape sequences applied via SPAN html objects with
31 -
#'   inline css styles.
55 +
#'   [`set_knit_hooks`] for how to use ANSI CSI styled text with knitr and HTML
56 +
#'   output, [`sgr_256`] to generate a demo string with all 256 8 bit colors.
57 +
#' @param classes FALSE (default), TRUE, or character vector of either 16,
58 +
#'   32, or 512 class names.  Character strings may only contain ASCII
59 +
#'   characters corresponding to letters, numbers, the hyphen, or the
60 +
#'   underscore.  It is the user's responsibility to provide values that are
61 +
#'   legal class names.
62 +
#'
63 +
#'   * FALSE: All colors rendered as inline CSS styles.
64 +
#'   * TRUE: Each of the 256 basic colors is mapped to a class in form
65 +
#'     "fansi-color-###" (or "fansi-bgcol-###" for background colors)
66 +
#'     where "###" is a zero padded three digit number in 0:255.  Basic colors
67 +
#'     specified with SGR codes 30-37 (or 40-47) map to 000:007, and bright ones
68 +
#'     specified with 90-97 (or 100-107) map to 008:015.  8 bit colors specified
69 +
#'     with SGR codes 38;5;### or 48;5;### map directly based on the value of
70 +
#'     "###".  Implicitly, this maps the 8 bit colors in 0:7 to the basic
71 +
#'     colors, and those in 8:15 to the bright ones even though these are not
72 +
#'     exactly the same when using inline styles.  "truecolor"s specified with
73 +
#'     38;2;#;#;# or 48;2;#;#;# do not map to classes and are rendered as inline
74 +
#'     styles.
75 +
#'   * character(16): The eight basic colors are mapped to the string values in
76 +
#'     the vector, all others are rendered as inline CSS styles.  Basic colors
77 +
#'     are mapped irrespective of whether they are encoded as the basic colors
78 +
#'     or as 8-bit colors.  Sixteen elements are needed because there must be
79 +
#'     eight classes for foreground colors, and eight classes for background
80 +
#'     colors.  Classes should be ordered in ascending order of color number,
81 +
#'     with foreground and background classes alternating starting with
82 +
#'     foreground (see examples).
83 +
#'   * character(32): Like character(16), except the basic and bright colors are
84 +
#'     mapped.
85 +
#'   * character(512): Like character(16), except the basic, bright, and all
86 +
#'     other 8-bit colors are mapped.
87 +
#'
88 +
#' @return A character vector of the same length as `x` with all escape
89 +
#'   sequences removed and any basic ANSI CSI SGR escape sequences applied via
90 +
#'   SPAN HTML tags.
32 91
#' @examples
33 92
#' sgr_to_html("hello\033[31;42;1mworld\033[m")
93 +
#' sgr_to_html("hello\033[31;42;1mworld\033[m", classes=TRUE)
94 +
#'
95 +
#' ## Input contains HTML special chars
96 +
#' x <- "<hello \033[42m'there' \033[34m &amp;\033[m \"moon\"!"
97 +
#' writeLines(x)
98 +
#' \dontrun{
99 +
#' in_html(
100 +
#'   c(
101 +
#'     sgr_to_html(html_esc(x)),  # Good
102 +
#'     sgr_to_html(x)             # Bad!
103 +
#' ) )
104 +
#' }
105 +
#' ## Generate some class names for basic colors
106 +
#' classes <- expand.grid(
107 +
#'   "myclass",
108 +
#'   c("fg", "bg"),
109 +
#'   c("black", "red", "green", "yellow", "blue", "magenta", "cyan", "white")
110 +
#' )
111 +
#' classes  # order is important!
112 +
#' classes <- do.call(paste, c(classes, sep="-"))
113 +
#' ## We only provide 16 classes, so Only basic colors are
114 +
#' ## mapped to classes; others styled inline.
115 +
#' sgr_to_html(
116 +
#'   "\033[94mhello\033[m \033[31;42;1mworld\033[m",
117 +
#'   classes=classes
118 +
#' )
119 +
#' ## Create a whole web page with a style sheet for 256 colors and
120 +
#' ## the colors shown in a table.
121 +
#' class.256 <- do.call(paste, c(expand.grid(c("fg", "bg"), 0:255), sep="-"))
122 +
#' sgr.256 <- sgr_256()     # A demo of all 256 colors
123 +
#' writeLines(sgr.256[1:8]) # SGR formatting
124 +
#'
125 +
#' ## Convert to HTML using classes instead of inline styles:
126 +
#' html.256 <- sgr_to_html(sgr.256, classes=class.256)
127 +
#' writeLines(html.256[1])  # No inline colors
128 +
#'
129 +
#' ## Generate different style sheets.  See `?make_styles` for details.
130 +
#' default <- make_styles(class.256)
131 +
#' mix <- matrix(c(.6,.2,.2, .2,.6,.2, .2,.2,.6), 3)
132 +
#' desaturated <- make_styles(class.256, mix)
133 +
#' writeLines(default[1:4])
134 +
#' writeLines(desaturated[1:4])
135 +
#'
136 +
#' ## Embed in HTML page and diplay; only CSS changing
137 +
#' \dontrun{
138 +
#' in_html(html.256)                  # no CSS
139 +
#' in_html(html.256, css=default)     # default CSS
140 +
#' in_html(html.256, css=desaturated) # desaturated CSS
141 +
#' }
34 142
35 143
sgr_to_html <- function(
36 -
  x, warn=getOption('fansi.warn'), term.cap=getOption('fansi.term.cap')
144 +
  x, warn=getOption('fansi.warn'),
145 +
  term.cap=getOption('fansi.term.cap'),
146 +
  classes=FALSE
37 147
) {
38 148
  if(!is.character(x)) x <- as.character(x)
39 149
  if(!is.logical(warn)) warn <- as.logical(warn)
@@ -48,6 +158,179 @@
Loading
48 158
      deparse(VALID.TERM.CAP)
49 159
    )
50 160
51 -
  .Call(FANSI_esc_to_html, enc2utf8(x), warn, term.cap.int)
161 +
  classes <- if(isTRUE(classes)) {
162 +
    FANSI.CLASSES
163 +
  } else if (identical(classes, FALSE)) {
164 +
    character()
165 +
  } else if (is.character(classes)) {
166 +
    check_classes(classes)
167 +
  } else
168 +
    stop("Argument `classes` must be TRUE, FALSE, or a character vector.")
169 +
170 +
  .Call(FANSI_esc_to_html, enc2utf8(x), warn, term.cap.int, classes)
171 +
}
172 +
#' Generate CSS Mapping Classes to Colors
173 +
#'
174 +
#' Given a set of class names, produce the CSS that maps them to the default
175 +
#' 8-bit colors.  This is a helper function to generate style sheets for use
176 +
#' in examples with either default or remixed `fansi` colors.  In practice users
177 +
#' will create their own style sheets mapping their classes to their preferred
178 +
#' styles.
179 +
#'
180 +
#' @family HTML functions
181 +
#' @importFrom grDevices col2rgb rgb
182 +
#' @export
183 +
#' @param classes a character vector of either 16, 32, or 512 class names, or a
184 +
#'   scalar integer with value 8, 16, or 256.  The character vectors are
185 +
#'   described in [`sgr_to_html`].  The scalar integers will cause this function
186 +
#'   to generate classes for the basic colors (8), basic + bright (16), or all
187 +
#'   256 8-bit colors (256), with class names in "fansi-color-###" (or
188 +
#'   "fansi-bgcol-###" for background colors), which is what [`sgr_to_html`]
189 +
#'   generates when-user defined classes are not provided.  TRUE is also a valid
190 +
#'   input and is equivalent to 256.
191 +
#' @param rgb.mix 3 x 3 numeric matrix to remix color channels.  Given a N x 3
192 +
#'   matrix of numeric RGB colors `rgb`, the colors used in the style sheet will
193 +
#'   be `rgb %*% rgb.mix`.  Out of range values are clipped to the nearest bound
194 +
#'   of the range.
195 +
#' @return A character vector that can be used as the contents of a style sheet.
196 +
#' @examples
197 +
#' ## Generate some class strings; order matters
198 +
#' classes <- do.call(paste, c(expand.grid(c("fg", "bg"), 0:7), sep="-"))
199 +
#' writeLines(classes[1:4])
200 +
#'
201 +
#' ## Some Default CSS
202 +
#' css0 <- "span {font-size: 60pt; padding: 10px; display: inline-block}"
203 +
#'
204 +
#' ## Associated class strings to styles
205 +
#' css1 <- make_styles(classes)
206 +
#' writeLines(css1[1:4])
207 +
#'
208 +
#' ## Generate SGR-derived HTML, mapping to classes
209 +
#' string <- "\033[43mYellow\033[m\n\033[45mMagenta\033[m\n\033[46mCyan\033[m"
210 +
#' html <- sgr_to_html(string, classes=classes)
211 +
#' writeLines(html)
212 +
#'
213 +
#' ## Combine in a page with styles and display in browser
214 +
#' \dontrun{
215 +
#' in_html(html, css=c(css0, css1))
216 +
#' }
217 +
#'
218 +
#' ## Change CSS by remixing colors, and apply to exact same HTML
219 +
#' mix <- matrix(
220 +
#'   c(
221 +
#'     0, 1, 0,  # red output is green input
222 +
#'     0, 0, 1,  # green output is blue input
223 +
#'     1, 0, 0   # blue output is red input
224 +
#'   ),
225 +
#'   nrow=3, byrow=TRUE
226 +
#' )
227 +
#' css2 <- make_styles(classes, rgb.mix=mix)
228 +
#' ## Display in browser: same HTML but colors changed by CSS
229 +
#' \dontrun{
230 +
#' in_html(html, css=c(css0, css2))
231 +
#' }
232 +
233 +
make_styles <- function(classes, rgb.mix=diag(3)) {
234 +
  if(!is.character(classes)) stop("Argument `classes` is not character.")
235 +
  if(
236 +
    !is.matrix(rgb.mix) || !is.numeric(rgb.mix) ||
237 +
    !identical(dim(rgb.mix), c(3L, 3L)) ||
238 +
    anyNA(rgb.mix)
239 +
  )
240 +
    stop("Argument `rgb.mix` must be a 3 x 3 numeric matrix with no NAs.")
241 +
242 +
  classes <- check_classes(classes)
243 +
244 +
  colors <- rep(seq_len(length(classes) / 2) - 1L, each=2)
245 +
  colors.hex <- esc_color_code_to_html(rbind(8L, 5L, colors, 0L, 0L))
246 +
247 +
  if(!identical(rgb.mix, diag(3))) {
248 +
    color.vals <- t(col2rgb(colors.hex)) %*% rgb.mix
249 +
    color.vals[color.vals > 255] <- 255
250 +
    color.vals[color.vals < 0] <- 0
251 +
    colors.hex <- rgb(color.vals, maxColorValue=255)
252 +
  }
253 +
  paste0(
254 +
    ".", classes,
255 +
    " {", c("color", "background-color"), ": ", colors.hex, ";}"
256 +
  )
257 +
}
258 +
259 +
check_classes <- function(classes) {
260 +
  class.len <- length(classes)
261 +
  if(!class.len %in% c(16L, 32L, 512L)) {
262 +
    stop(
263 +
      "Argument `classes` must be length 16, 32, or 512 if it is a ",
264 +
      "character vector (is ", class.len, ")."
265 +
    )
266 +
  }
267 +
  if(anyNA(classes))
268 +
    stop("Argument `classes` contains NA values.")
269 +
  if(!all(grepl("^[0-9a-zA-Z_\\-]*$", classes)))
270 +
    stop(
271 +
      "Argument `classes` contains charcters other than ASCII letters, ",
272 +
      "numbers, the hyphen, and underscore."
273 +
    )
274 +
  classes
275 +
}
276 +
#' Frame HTML in a Web Page And Display
277 +
#'
278 +
#' Helper function that assembles user provided HTML and CSS into a temporary
279 +
#' text file, and by default displays it in the browser.  Intended for use in
280 +
#' examples.
281 +
#'
282 +
#' @export
283 +
#' @importFrom utils browseURL
284 +
#' @family HTML functions
285 +
#' @param x character vector of html encoded strings.
286 +
#' @param css character vector of css styles.
287 +
#' @param display TRUE or FALSE, whether to display the resulting page in a
288 +
#'   browser window.  If TRUE, will sleep for one second before returning, and
289 +
#'   will delete the temporary file used to store the HTML.
290 +
#' @param clean TRUE or FALSE, if TRUE and `display == TRUE`, will delete the
291 +
#'   temporary file used for the web page, otherwise will leave it.
292 +
#' @param pre TRUE (default) or FALSE, whether to wrap `x` in PRE tags.
293 +
#' @return character(1L) the file location of the page, invisibly, but keep in
294 +
#'   mind it will have been deleted if `clean=TRUE`.
295 +
#' @seealso [make_styles()].
296 +
#' @examples
297 +
#' txt <- "\033[31;42mHello \033[7mWorld\033[m"
298 +
#' writeLines(txt)
299 +
#' html <- sgr_to_html(txt)
300 +
#' \dontrun{
301 +
#' in_html(html) # spawns a browser window
302 +
#' }
303 +
#' writeLines(readLines(in_html(html, display=FALSE)))
304 +
#' css <- "SPAN {text-decoration: underline;}"
305 +
#' writeLines(readLines(in_html(html, css=css, display=FALSE)))
306 +
#' \dontrun{
307 +
#' in_html(html, css)
308 +
#' }
309 +
310 +
in_html <- function(x, css=character(), pre=TRUE, display=TRUE, clean=display) {
311 +
  html <- c(
312 +
    "<!DOCTYPE html>",
313 +
    "<html>",
314 +
    if(any(nzchar(css))) c("<style>", css, "</style>"),
315 +
    "<body>",
316 +
    if(pre) "<pre>",
317 +
    x,
318 +
    if(pre) "</pre>",
319 +
    "</body>", "</html>"
320 +
  )
321 +
  f <- tempfile()
322 +
  writeLines(html, f)
323 +
  if(display) browseURL(f)  # nocov, can't do this in tests
324 +
  if(clean) {
325 +
    Sys.sleep(1)
326 +
    unlink(f)
327 +
  }
328 +
  invisible(f)
52 329
}
53 330
331 +
FANSI.CLASSES <- do.call(
332 +
  paste,
333 +
  c(
334 +
    expand.grid('fansi', c('color', 'bgcol'), sprintf("%03d", 0:255)),
335 +
    sep="-"
336 +
) )

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -30,7 +30,7 @@
Loading
30 30
  {"digits_in_int", (DL_FUNC) &FANSI_digits_in_int_ext, 1},
31 31
  {"tabs_as_spaces", (DL_FUNC) &FANSI_tabs_as_spaces_ext, 5},
32 32
  {"color_to_html", (DL_FUNC) &FANSI_color_to_html_ext, 1},
33 -
  {"esc_to_html", (DL_FUNC) &FANSI_esc_to_html, 3},
33 +
  {"esc_to_html", (DL_FUNC) &FANSI_esc_to_html, 4},
34 34
  {"unhandled_esc", (DL_FUNC) &FANSI_unhandled_esc, 2},
35 35
  {"unique_chr", (DL_FUNC) &FANSI_unique_chr, 1},
36 36
  {"nzchar_esc", (DL_FUNC) &FANSI_nzchar, 5},
@@ -44,6 +44,7 @@
Loading
44 44
  {"get_int_max", (DL_FUNC) &FANSI_get_int_max, 0},
45 45
  {"check_enc", (DL_FUNC) &FANSI_check_enc_ext, 2},
46 46
  {"ctl_as_int", (DL_FUNC) &FANSI_ctl_as_int_ext, 1},
47 +
  {"esc_html", (DL_FUNC) &FANSI_esc_html, 1},
47 48
  {NULL, NULL, 0}
48 49
};
49 50

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 *  This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -41,6 +41,7 @@
Loading
41 41
    for(R_xlen_t i = 1; i < x_len; ++i) {
42 42
      SEXP x_cur;
43 43
      x_cur = STRING_ELT(x_srt, i);
44 +
      FANSI_check_chrsxp(x_cur, i);
44 45
      if(x_prev != x_cur) {
45 46
        ++u_count;
46 47
        x_prev = x_cur;

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##
@@ -30,15 +30,13 @@
Loading
30 30
#'
31 31
#' @inheritParams substr_ctl
32 32
#' @inheritParams base::nchar
33 +
#' @inheritParams strip_ctl
33 34
#' @inheritSection substr_ctl _ctl vs. _sgr
34 35
#' @note the `keepNA` parameter is ignored for R < 3.2.2.
35 36
#' @export
36 -
#' @param type character string, one of "chars", or "width".  For byte counts
37 -
#'   use [base::nchar].
38 -
#' @param strip deprecated in favor of `ctl`.
39 37
#' @seealso [fansi] for details on how _Control Sequences_ are
40 38
#'   interpreted, particularly if you are getting unexpected results,
41 -
#'   [strip_ctl] for removing _Control Sequences_.
39 +
#'   [`strip_ctl`] for removing _Control Sequences_.
42 40
#' @examples
43 41
#' nchar_ctl("\033[31m123\a\r")
44 42
#' ## with some wide characters
@@ -49,17 +47,16 @@
Loading
49 47
#' ## Remember newlines are not counted by default
50 48
#' nchar_ctl("\t\n\r")
51 49
#'
52 -
#' ## The 'c0' value for the `ctl` argument does
53 -
#' ## not include newlines.
50 +
#' ## The 'c0' value for the `ctl` argument does not include 
51 +
#' ## newlines.
54 52
#' nchar_ctl("\t\n\r", ctl="c0")
55 53
#' nchar_ctl("\t\n\r", ctl=c("c0", "nl"))
56 54
#'
57 55
#' ## The _sgr flavor only treats SGR sequences as zero width
58 -
#'
59 56
#' nchar_sgr("\033[31m123")
60 57
#' nchar_sgr("\t\n\n123")
61 58
#'
62 -
#' ## All of the following are Control Sequences
59 +
#' ## All of the following are Control Sequences or C0 controls
63 60
#' nzchar_ctl("\n\033[42;31m\033[123P\a")
64 61
65 62
nchar_ctl <- function(

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 *  This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -40,10 +40,10 @@
Loading
40 40
41 41
  SEXP res = PROTECT(allocVector(LGLSXP, x_len));
42 42
43 -
  for(R_len_t i = 0; i < x_len; ++i) {
43 +
  for(R_xlen_t i = 0; i < x_len; ++i) {
44 44
    FANSI_interrupt(i);
45 45
    SEXP string_elt = STRING_ELT(x, i);
46 -
    FANSI_check_enc(string_elt, i);
46 +
    FANSI_check_chrsxp(string_elt, i);
47 47
48 48
    if(string_elt == R_NaString) {
49 49
      if(keepNA_int == 1) {
@@ -61,9 +61,9 @@
Loading
61 61
        ) {
62 62
          warned = 1;
63 63
          warning(
64 -
            "Encountered %s ESC sequence at index [%.0f], %s%s",
64 +
            "Encountered %s ESC sequence at index [%jd], %s%s",
65 65
            !pos.valid ? "invalid" : "possibly incorrectly handled",
66 -
            (double) i + 1,
66 +
            FANSI_ind(i),
67 67
            "see `?unhandled_ctl`; you can use `warn=FALSE` to turn ",
68 68
            "off these warnings."
69 69
          );

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -38,7 +38,7 @@
Loading
38 38
39 39
static struct FANSI_prefix_dat make_pre(SEXP x) {
40 40
  SEXP chrsxp = STRING_ELT(x, 0);
41 -
  FANSI_check_enc(chrsxp, 0);
41 +
  FANSI_check_chrsxp(chrsxp, 0);
42 42
  const char * x_utf8 = CHAR(chrsxp);
43 43
  // ideally we would IS_ASCII(x), but that's not available to extensions
44 44
  int x_has_utf8 = FANSI_has_utf8(x_utf8);
@@ -164,9 +164,14 @@
Loading
164 164
  // If we are going to pad the end, adjust sizes and widths
165 165
166 166
  if(target_size > (size_t) FANSI_int_max)
167 +
    // Not possible for this to be longer than INT_MAX as we check on
168 +
    // entry with FANSI_check_chrsxp and we're not expanding anything.
169 +
    // nocov start, but jut in cae
167 170
    error(
168 -
      "Substring to write (%.0f) is longer than INT_MAX.", (double) target_size
171 +
      "Substring to write (%ju) is longer than INT_MAX.",
172 +
      (uintmax_t) target_size
169 173
    );
174 +
    // nocov end
170 175
171 176
  if(target_width <= (size_t) tar_width && *pad_chr) {
172 177
    target_pad = tar_width - target_width;
@@ -649,7 +654,7 @@
Loading
649 654
    FANSI_interrupt(i);
650 655
    SEXP chr = STRING_ELT(x, i);
651 656
    if(chr == NA_STRING) continue;
652 -
    FANSI_check_enc(chr, i);
657 +
    FANSI_check_chrsxp(chr, i);
653 658
    const char * chr_utf8 = CHAR(chr);
654 659
655 660
    SEXP str_i = PROTECT(

@@ -1,5 +1,5 @@
Loading
1 1
/*
2 -
 * Copyright (C) 2020  Brodie Gaslam
2 +
 * Copyright (C) 2021  Brodie Gaslam
3 3
 *
4 4
 * This file is part of "fansi - ANSI Control Sequence Aware String Functions"
5 5
 *
@@ -216,11 +216,12 @@
Loading
216 216
        size = 128;  // in theory little penalty to ask this minimum
217 217
      else if(size > (size_t) FANSI_int_max + 1) {
218 218
        // nocov start
219 +
        // assumptions check that  SIZE_T fits INT_MAX + 1
219 220
        // too difficult to test, all the code pretty much checks for overflow
220 221
        // before requesting memory
221 222
        error(
222 -
          "Internal Error: requested buff size %.0f greater than INT_MAX + 1.",
223 -
          (double) size
223 +
          "Internal Error: requested buff size %zu greater than INT_MAX + 1.",
224 +
           size
224 225
        );
225 226
        // nocov end
226 227
      }
@@ -242,9 +243,9 @@
Loading
242 243
        // this can't really happen unless size starts off bigger than
243 244
        // INT_MAX + 1
244 245
        error(
245 -
          "%s  Requesting %.0f",
246 +
          "%s  Requesting %zu",
246 247
          "Internal Error: max allowed buffer size is INT_MAX + 1.",
247 -
          (double) tmp_double_size
248 +
           tmp_double_size
248 249
        );
249 250
        // nocov end
250 251
      buff->len = tmp_double_size;
@@ -504,3 +505,29 @@
Loading
504 505
  }
505 506
  return res;
506 507
}
508 +
/*
509 +
 * So we can use a consistent integer type in printing possibly large indeces.
510 +
 *
511 +
 * Returns in 1 based indexing, -1 in the unlikely case R_xlen_t == intmax_t.
512 +
 */
513 +
514 +
intmax_t FANSI_ind(R_xlen_t i) {
515 +
  intmax_t ind = i >= INTMAX_MAX ? -2 : i; // i == INTMAX_MAX is the issue
516 +
  return ind + 1;
517 +
}
518 +
519 +
void FANSI_check_chr_size(char * start, char * end, R_xlen_t i) {
520 +
  if(end - start > FANSI_int_max) {
521 +
    // Can't get to this point with a string that violates, AFAICT
522 +
    // nocov start
523 +
    error(
524 +
      "Internal Error: %s at index [%jd] (3).",
525 +
      "attempting to write string longer than INT_MAX",
526 +
      FANSI_ind(i)
527 +
    );
528 +
    // nocov end
529 +
  }
530 +
}
531 +
532 +
533 +

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##
@@ -167,14 +167,16 @@
Loading
167 167
}
168 168
#' Escape Characters With Special HTML Meaning
169 169
#'
170 -
#' This allows displaying strings that contain them in HTML without disrupting
171 -
#' the HTML.  It is assumed that the string to be escaped does not contain
172 -
#' actual HTML as this function would destroy it.
170 +
#' Arbitrary text may contain characters with special meaning in HTML, which may
171 +
#' cause HTML display to be corrupted if they are included unescaped in a web
172 +
#' page.  This function escapes those special characters so they do not
173 +
#' interfere with the HTML markup generated by e.g. [`sgr_to_html`].
173 174
#'
174 175
#' @export
176 +
#' @family HTML functions
175 177
#' @param x character vector
176 -
#' @return character vector consisting of `x`, but with the "<", ">", and "&"
177 -
#'   characters replaced by their HTML entity codes.
178 +
#' @return `x`, but with "<", ">", "&", "'", and "\"" characters replaced by
179 +
#'   their HTML entity codes, and Encoding set to UTF-8.
178 180
#' @examples
179 181
#' html_esc("day > night")
180 182
#' html_esc("<SPAN>hello world</SPAN>")
@@ -182,7 +184,7 @@
Loading
182 184
html_esc <- function(x) {
183 185
  if(!is.character(x))
184 186
    stop("Argument `x` must be character, is ", typeof(x), ".")
185 -
  gsub("<", "&lt;", gsub(">", "&gt;", gsub("&", "&amp;", x)))
187 +
  .Call(FANSI_esc_html, enc2utf8(x))
186 188
}
187 189
188 190
#' Format Character Vector for Display as Code in HTML
@@ -197,8 +199,8 @@
Loading
197 199
#' @param class character vectors of classes to apply to the PRE HTML tags.  It
198 200
#'   is the users responsibility to ensure the classes are valid CSS class
199 201
#'   names.
200 -
#' @return character(1L) `x`, with <PRE> and <CODE> HTML tags applied and
201 -
#'   collapsed into one line with newlines as the line separator.
202 +
#' @return character(1L) `x`, with &lt;PRE&gt; and &lt;CODE&gt; HTML tags
203 +
#'   applied and collapsed into one line with newlines as the line separator.
202 204
#' @examples
203 205
#' html_code_block(c("hello world"))
204 206
#' html_code_block(c("hello world"), class="pretty")
@@ -221,8 +223,8 @@
Loading
221 223
#' document.  It overrides the `knitr` output hooks by using
222 224
#' `knitr::knit_hooks$set`.  It replaces the hooks with ones that convert ANSI
223 225
#' CSI SGR sequences into HTML.  In addition to replacing the hook functions,
224 -
#' this will output a <STYLE> HTML block to stdout.  These two actions are side
225 -
#' effects as a result of which R chunks in the `rmarkdown` document that
226 +
#' this will output a &lt;STYLE&gt; HTML block to stdout.  These two actions are
227 +
#' side effects as a result of which R chunks in the `rmarkdown` document that
226 228
#' contain ANSI CSI SGR are shown in their HTML equivalent form.
227 229
#'
228 230
#' The replacement hook function tests for the presence of ANSI CSI SGR
@@ -257,9 +259,9 @@
Loading
257 259
#' @param proc.fun function that will be applied to output that contains ANSI
258 260
#'   CSI SGR sequences.  Should accept parameters `x` and `class`, where `x` is
259 261
#'   the output, and `class` is the CSS class that should be applied to
260 -
#'   the <PRE><CODE> blocks the output will be placed in.
262 +
#'   the &lt;PRE&gt;&lt;CODE&gt; blocks the output will be placed in.
261 263
#' @param style character a vector of CSS styles; these will be output inside
262 -
#'   HTML <STYLE> tags as a side effect.  The default value is designed to
264 +
#'   HTML &gt;STYLE&lt; tags as a side effect.  The default value is designed to
263 265
#'   ensure that there is no visible gap in background color with lines with
264 266
#'   height 1.5 (as is the default setting in `rmarkdown` documents v1.1).
265 267
#' @param split.nl TRUE or FALSE (default), set to TRUE to split input strings
@@ -424,3 +426,48 @@
Loading
424 426
  if(.test) list(old.hooks=old.hook.list, new.hooks=new.hook.list, res=set.res)
425 427
  else old.hook.list
426 428
}
429 +
#' Show 8 Bit ANSI CSI SGR Colors
430 +
#'
431 +
#' Generates text with each 8 bit SGR code (e.g. the "###" in "38;5;###") with
432 +
#' the background colored by itself, and the foreground in a contrasting color
433 +
#' and interesting color (we sacrifice some contrast for interest as this is
434 +
#' intended for demo rather than reference purposes).
435 +
#'
436 +
#' @seealso [make_styles()].
437 +
#' @export
438 +
#' @return character vector with SGR codes with background color set as
439 +
#'   themselves.
440 +
#' @examples
441 +
#' writeLines(sgr_256())
442 +
443 +
sgr_256 <- function() {
444 +
  tpl <- "\033[38;5;%d;48;5;%dm%s\033[m"
445 +
446 +
  # Basic, bright, grayscale
447 +
  basic <- paste0(sprintf(tpl, 15, 0:7, format(0:7, width=3)), collapse=" ")
448 +
  bright <- paste0(sprintf(tpl, 0, 8:15, format(8:15, width=3)), collapse=" ")
449 +
  gs1 <-
450 +
    paste0(sprintf(tpl, 15, 232:243, format(232:243, width=3)), collapse=" ")
451 +
  gs2 <-
452 +
    paste0(sprintf(tpl, 0, 244:255, format(244:255, width=3)), collapse=" ")
453 +
454 +
  # Color parts
455 +
  fg <- 231:16
456 +
  bg <- rev(fg)  # reverse fg/bg so we can read the numbers                  }
457 +
458 +
  table <- matrix(sprintf(tpl, fg, bg, format(bg)), 36)
459 +
  part.a <- do.call(paste0, c(split(table[1:18,], row(table[1:18,]))))
460 +
  part.b <- do.call(paste0, c(split(table[-(1:18),], row(table[-(1:18),]))))
461 +
462 +
  ## Output
463 +
  c(
464 +
    "Standard", basic, "",
465 +
    "High-Intensity", bright, "",
466 +
    "216 Colors (Dark)",
467 +
    part.a, "",
468 +
    "216 Colors (Light)",
469 +
    part.b, "",
470 +
    "Grayscale",
471 +
    gs1, gs2
472 +
  )
473 +
}

@@ -1,4 +1,4 @@
Loading
1 -
## Copyright (C) 2020  Brodie Gaslam
1 +
## Copyright (C) 2021  Brodie Gaslam
2 2
##
3 3
## This file is part of "fansi - ANSI Control Sequence Aware String Functions"
4 4
##
Files Coverage
R 100.00%
src 100.00%
Project Totals (24 files) 100.00%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading