r-lib / vctrs
Showing 3 of 7 files from the diff.

@@ -135,6 +135,8 @@
Loading
135 135
extern SEXP vctrs_fill_missing(SEXP, SEXP, SEXP);
136 136
extern SEXP vctrs_chr_paste_prefix(SEXP, SEXP, SEXP);
137 137
extern r_obj* vctrs_rank(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*);
138 +
extern r_obj* vctrs_integer64_proxy(r_obj*);
139 +
extern r_obj* vctrs_integer64_restore(r_obj*);
138 140
139 141
140 142
// Maturing
@@ -288,6 +290,8 @@
Loading
288 290
  {"vctrs_fill_missing",               (DL_FUNC) &vctrs_fill_missing, 3},
289 291
  {"vctrs_chr_paste_prefix",           (DL_FUNC) &vctrs_chr_paste_prefix, 3},
290 292
  {"vctrs_rank",                       (DL_FUNC) &vctrs_rank, 7},
293 +
  {"vctrs_integer64_proxy",            (DL_FUNC) &vctrs_integer64_proxy, 1},
294 +
  {"vctrs_integer64_restore",          (DL_FUNC) &vctrs_integer64_restore, 1},
291 295
  {NULL, NULL, 0}
292 296
};
293 297

@@ -1,7 +1,14 @@
Loading
1 -
2 1
#' @export
3 -
vec_proxy_compare.integer64 <- function(x, ...) {
4 -
  bit64::rank.integer64(x)
2 +
vec_proxy_equal.integer64 <- function(x, ...) {
3 +
  if (is.array(x)) {
4 +
    # Stopgap to convert arrays to data frames, then run them through
5 +
    # `vec_proxy_equal()` again, which will proxy each column
6 +
    x <- as_data_frame_from_array(x)
7 +
    x <- vec_proxy_equal(x)
8 +
    return(x)
9 +
  }
10 +
11 +
  integer64_proxy(x)
5 12
}
6 13
7 14
# Print -------------------------------------------------------------------
@@ -108,3 +115,46 @@
Loading
108 115
vec_cast.double.integer64 <- function(x, to, ...) {
109 116
  as.double(x)
110 117
}
118 +
119 +
# ------------------------------------------------------------------------------
120 +
121 +
integer64_proxy <- function(x) {
122 +
  .Call(vctrs_integer64_proxy, x)
123 +
}
124 +
integer64_restore <- function(x) {
125 +
  .Call(vctrs_integer64_restore, x)
126 +
}
127 +
128 +
# ------------------------------------------------------------------------------
129 +
130 +
as_data_frame_from_array <- function(x) {
131 +
  # Alternative to `as.data.frame.array()` that always strips 1-D arrays
132 +
  # of their dimensions. Unlike `as.data.frame2()`, it doesn't unclass the
133 +
  # input, which means that each column retains its original class.
134 +
  # This function doesn't attempt to keep the names of `x` at all.
135 +
136 +
  dim <- dim(x)
137 +
  n_dim <- length(dim)
138 +
139 +
  if (n_dim == 1) {
140 +
    # Treat 1-D arrays as 1 column matrices
141 +
    dim(x) <- c(dim, 1L)
142 +
    n_dim <- 2L
143 +
  }
144 +
145 +
  n_row <- dim[[1L]]
146 +
  n_col <- prod(dim[-1L])
147 +
  n_col_seq <- seq_len(n_col)
148 +
149 +
  dim(x) <- c(n_row, n_col)
150 +
151 +
  out <- vector("list", n_col)
152 +
  names(out) <- as_unique_names(rep("", n_col), quiet = TRUE)
153 +
154 +
  for (i in n_col_seq) {
155 +
    out[[i]] <- x[, i, drop = TRUE]
156 +
  }
157 +
158 +
  new_data_frame(out, n = n_row)
159 +
}
160 +

@@ -0,0 +1,158 @@
Loading
1 +
#include <rlang.h>
2 +
#include "vctrs.h"
3 +
#include "utils.h"
4 +
#include "decl/type-integer64-decl.h"
5 +
6 +
#define r_na_llong LLONG_MIN
7 +
8 +
9 +
static
10 +
const char* v_integer64_proxy_df_names_c_strings[] = {
11 +
  "left",
12 +
  "right"
13 +
};
14 +
static
15 +
const enum r_type v_integer64_proxy_df_types[] = {
16 +
  R_TYPE_double,
17 +
  R_TYPE_double
18 +
};
19 +
enum integer64_proxy_df_locs {
20 +
  INTEGER64_PROXY_DF_LOCS_left,
21 +
  INTEGER64_PROXY_DF_LOCS_right
22 +
};
23 +
#define INTEGER64_PROXY_DF_SIZE R_ARR_SIZEOF(v_integer64_proxy_df_types)
24 +
25 +
26 +
// [[ register() ]]
27 +
r_obj* vctrs_integer64_proxy(r_obj* x) {
28 +
  if (r_typeof(x) != R_TYPE_double) {
29 +
    r_stop_internal("vctrs_integer64_proxy", "`x` must be a double.");
30 +
  }
31 +
  if (r_attrib_get(x, R_DimSymbol) != r_null) {
32 +
    r_stop_internal("vctrs_integer64_proxy", "`x` should not have a `dim` attribute.");
33 +
  }
34 +
35 +
  r_ssize size = r_length(x);
36 +
  // Casting `const double*` to `const long long*` is UB, but we are mimicking
37 +
  // what bit64 is doing, so if this ever breaks it means that bit64 is broken.
38 +
  const long long* v_x = (const long long*) r_dbl_cbegin(x);
39 +
40 +
  r_obj* nms = KEEP(r_chr_n(
41 +
    v_integer64_proxy_df_names_c_strings,
42 +
    INTEGER64_PROXY_DF_SIZE
43 +
  ));
44 +
45 +
  r_obj* out = KEEP(r_alloc_df_list(
46 +
    size,
47 +
    nms,
48 +
    v_integer64_proxy_df_types,
49 +
    INTEGER64_PROXY_DF_SIZE
50 +
  ));
51 +
52 +
  r_init_data_frame(out, size);
53 +
54 +
  r_obj* left = r_list_get(out, INTEGER64_PROXY_DF_LOCS_left);
55 +
  r_obj* right = r_list_get(out, INTEGER64_PROXY_DF_LOCS_right);
56 +
57 +
  double* v_left = r_dbl_begin(left);
58 +
  double* v_right = r_dbl_begin(right);
59 +
60 +
  for (r_ssize i = 0; i < size; ++i) {
61 +
    const long long elt = v_x[i];
62 +
63 +
    if (elt == r_na_llong) {
64 +
      v_left[i] = r_globals.na_dbl;
65 +
      v_right[i] = r_globals.na_dbl;
66 +
      continue;
67 +
    }
68 +
69 +
    const int64_t elt_i64 = (int64_t) elt;
70 +
71 +
    int64_unpack(elt_i64, i, v_left, v_right);
72 +
  }
73 +
74 +
  FREE(2);
75 +
  return out;
76 +
}
77 +
78 +
// [[ register() ]]
79 +
r_obj* vctrs_integer64_restore(r_obj* x) {
80 +
  if (!is_data_frame(x)) {
81 +
    r_stop_internal("vctrs_integer64_restore", "`x` must be a data frame.");
82 +
  }
83 +
  if (r_length(x) != 2) {
84 +
    r_stop_internal("vctrs_integer64_restore", "`x` must have two columns.");
85 +
  }
86 +
87 +
  r_obj* left = r_list_get(x, INTEGER64_PROXY_DF_LOCS_left);
88 +
  r_obj* right = r_list_get(x, INTEGER64_PROXY_DF_LOCS_right);
89 +
90 +
  const double* v_left = r_dbl_cbegin(left);
91 +
  const double* v_right = r_dbl_cbegin(right);
92 +
93 +
  r_ssize size = r_length(left);
94 +
95 +
  r_obj* out = KEEP(r_alloc_double(size));
96 +
  // See above comment about UB in this cast
97 +
  long long* v_out = (long long*) r_dbl_begin(out);
98 +
99 +
  r_attrib_poke_class(out, r_chr("integer64"));
100 +
101 +
  for (r_ssize i = 0; i < size; ++i) {
102 +
    const double left = v_left[i];
103 +
    const double right = v_right[i];
104 +
105 +
    if (isnan(left)) {
106 +
      v_out[i] = r_na_llong;
107 +
      continue;
108 +
    }
109 +
110 +
    v_out[i] = (long long) int64_pack(left, right);
111 +
  }
112 +
113 +
  FREE(1);
114 +
  return out;
115 +
}
116 +
117 +
// -----------------------------------------------------------------------------
118 +
119 +
/*
120 +
 * This pair of functions facilitates:
121 +
 * - Splitting an `int64_t` into two `uint32_t` values, maintaining order
122 +
 * - Combining those two `uint32_t` values back into the original `int32_t`
123 +
 *
124 +
 * The two `uint32_t` values are stored in two doubles. This allows us to store
125 +
 * it in a two column data frame that vctrs knows how to work with, and we can
126 +
 * use the standard `NA_real_` as the missing value without fear of conflicting
127 +
 * with any other valid `int64_t` value.
128 +
 *
129 +
 * Unsigned 32-bit integers are used because bit shifting is undefined on signed
130 +
 * types.
131 +
 *
132 +
 * An arithmetic shift of `- INT64_MIN` is done to remap the int64_t value
133 +
 * into uint64_t space, while maintaining order. This relies on unsigned
134 +
 * arithmetic overflow behavior, which is well-defined.
135 +
 */
136 +
137 +
static inline
138 +
void int64_unpack(int64_t x, r_ssize i, double* v_left, double* v_right) {
139 +
  const uint64_t x_u64 = ((uint64_t) x) - INT64_MIN;
140 +
141 +
  const uint32_t left_u32 = (uint32_t) (x_u64 >> 32);
142 +
  const uint32_t right_u32 = (uint32_t) x_u64;
143 +
144 +
  v_left[i] = (double) left_u32;
145 +
  v_right[i] = (double) right_u32;
146 +
}
147 +
148 +
static inline
149 +
int64_t int64_pack(double left, double right) {
150 +
  const uint32_t left_u32 = (uint32_t) left;
151 +
  const uint32_t right_u32 = (uint32_t) right;
152 +
153 +
  const uint64_t out_u64 = ((uint64_t) left_u32) << 32 | right_u32;
154 +
155 +
  const int64_t out = (int64_t) (out_u64 + INT64_MIN);
156 +
157 +
  return out;
158 +
}
Files Coverage
R 87.13%
src 83.22%
Project Totals (184 files) 83.93%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
        informational: true
10
    patch:
11
      default:
12
        target: auto
13
        threshold: 1%
14
        informational: true
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading