r-lib / cli
1

2
#' Working around the bad Unicode character widths
3
#'
4
#' R 3.6.2 and also the coming 3.6.3 and 4.0.0 versions use the Unicode 8
5
#' standard to calculate the display width of Unicode characters.
6
#' Unfortunately the widths of most emojis are incorrect in this standard,
7
#' and width 1 is reported instead of the correct 2 value.
8
#'
9
#' See more about this here: https://github.com/brodieG/fansi/issues/62
10
#'
11
#' cli implements a workaround for this. The package contains a table that
12
#' contains all Unicode ranges that have wide characters (display width 2).
13
#'
14
#' On first use of one of the workaround wrappers (`strwrap2_fixed()`, etc.)
15
#' we check what the current version of R thinks about the width of these
16
#' characters, and then create a regex that matches the ones that R
17
#' is wrong about (`re_bad_char_width`).
18
#'
19
#' Then we use this regex to duplicate all of the problematic characters
20
#' in the input string to the wrapper function, before calling the real
21
#' string manupulation function (char, strwrap) etc. At end we undo the
22
#' duplication before we return the result.
23
#'
24
#' This workaround is fine for `nchar()` and `strwrap()` (& co in fansi).
25
#' It is potentially not fine for `substr()`, but we don't currently use
26
#' that...
27
#'
28
#' @keywords internal
29
#' @name unicode-width-workaround
30
NULL
31

32
setup_unicode_width_fix <- function() {
33 1
  bad <- fansi::nchar_ctl(wide_chars$test, type = "width") == 1
34 1
  re <- paste0(wide_chars$regex[bad], collapse = "")
35 1
  clienv$re_bad_char_width <- paste0("([", re, "])")
36 1
  clienv$re_bad_char_width_fix <- paste0("([", re, "])\\1")
37
}
38

39
#' @importFrom fansi strwrap_ctl
40

41
strwrap_fixed <- function(x, ...) {
42 0
  if (.Platform$OS.type == "windows") return(strwrap_ctl(x, ...))
43 1
  if (is.null(clienv$re_bad_char_width)) setup_unicode_width_fix()
44 0
  if (clienv$re_bad_char_width == "([])") return(strwrap_ctl(x, ...))
45 1
  x <- gsub(clienv$re_bad_char_width, "\\1\\1", x)
46 1
  ret <- strwrap_ctl(x, ...)
47 1
  gsub(clienv$re_bad_char_width_fix, "\\1", ret)
48
}
49

50
#' @importFrom fansi strwrap2_ctl
51

52
strwrap2_fixed <- function(x, ...) {
53 0
  if (.Platform$OS.type == "windows") return(strwrap2_ctl(x, ...))
54 0
  if (is.null(clienv$re_bad_char_width)) setup_unicode_width_fix()
55 0
  if (clienv$re_bad_char_width == "([])") return(strwrap2_ctl(x, ...))
56 1
  x <- gsub(clienv$re_bad_char_width, "\\1\\1", x)
57 1
  ret <- strwrap2_ctl(x, ...)
58 1
  gsub(clienv$re_bad_char_width_fix, "\\1", ret)
59
}
60

61
#' @importFrom fansi nchar_ctl
62

63
nchar_fixed <- function(x, type = "chars", ...) {
64 0
  if (.Platform$OS.type == "windows") return(nchar_ctl(x, type = type, ...))
65 1
  if (type != "width") return(nchar_ctl(x, type = type, ...))
66 0
  if (is.null(clienv$re_bad_char_width)) setup_unicode_width_fix()
67 0
  if (clienv$re_bad_char_width == "([])") return(nchar_ctl(x, type = type, ...))
68 1
  x <- gsub(clienv$re_bad_char_width, "\\1\\1", x)
69 1
  nchar_ctl(x, type = type, ...)
70
}

Read our documentation on viewing source code .

Loading