TommyJones / mvrsquared

Compare 76ba5f5 ... +0 ... a310abc

Coverage Reach

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -1,47 +1,63 @@
Loading
1 -
1 +
// [[Rcpp::plugins(cpp11)]]
2 +
// [[Rcpp::depends(RcppThread)]]
2 3
// [[Rcpp::depends(RcppArmadillo)]]
4 +
#include "RcppThread.h"
3 5
#include <RcppArmadillo.h>
4 6
#include <cmath>
5 7
#define ARMA_64BIT_WORD
6 8
using namespace Rcpp ;
7 9
8 10
// [[Rcpp::export]]
9 -
NumericVector calc_sum_squares_latent(arma::sp_mat Y, NumericMatrix X, NumericMatrix W, NumericVector ybar) {
10 -
11 -
  int n_obs = Y.n_rows; // number of observations
12 -
  int n_latent_vars = X.ncol(); // number of latent variables
13 -
  int n_explicit_vars = W.ncol(); // number of explicit variables
11 +
NumericVector calc_sum_squares_latent(
12 +
    arma::sp_mat Y,
13 +
    NumericMatrix X,
14 +
    NumericMatrix W,
15 +
    NumericVector ybar,
16 +
    int threads
17 +
) {
18 +
19 +
  int n_obs = Y.n_cols; // number of observations
14 20
  NumericVector result(2); // final result
15 21
  double SSE = 0; // sum of squared errors across all documents
16 22
  double SST = 0; // total sum of squares across all documents
17 23
18 24
19 25
  // for each observations...
20 -
  for(int d = 0; d < n_obs; d++){
26 +
  RcppThread::parallelFor(
27 +
    0,
28 +
    n_obs,
29 +
    [&Y,
30 +
     &X,
31 +
     &W,
32 +
     &ybar,
33 +
     &SSE,
34 +
     &SST
35 +
    ] (unsigned int d){
36 +
      RcppThread::checkUserInterrupt();
21 37
22 -
    R_CheckUserInterrupt();
38 +
      // Yhat = X %*% W. But doing it funny below to optimize calculation
39 +
      double sse = 0;
40 +
      double sst = 0;
23 41
24 -
    // Yhat = X %*% W. But doing it funny below to optimize calculation
25 -
    double sse = 0;
26 -
    double sst = 0;
42 +
      for(int v = 0; v < W.ncol(); v++ ){
43 +
        double Yhat = 0;
27 44
28 -
    for(int v = 0; v < n_explicit_vars; v++ ){
29 -
      double Yhat = 0;
45 +
        for(int k = 0; k < X.ncol(); k++ ){
46 +
          Yhat = Yhat + X(d , k ) * W(k , v );
47 +
        }
30 48
31 -
      for(int k = 0; k < n_latent_vars; k++ ){
32 -
        Yhat = Yhat + X(d , k ) * W(k , v );
33 -
      }
49 +
        sse = sse + ((Y(v, d) - Yhat) * (Y(v, d) - Yhat));
34 50
35 -
      sse = sse + ((Y(d , v) - Yhat) * (Y(d , v) - Yhat));
51 +
        sst = sst + ((Y(v, d) - ybar[ v ]) * (Y(v, d) - ybar[ v ]));
36 52
37 -
      sst = sst + ((Y(d , v) - ybar[ v ]) * (Y(d , v) - ybar[ v ]));
53 +
      }
38 54
39 -
    }
55 +
      SSE = SSE + sse;
40 56
41 -
    SSE = SSE + sse;
57 +
      SST = SST + sst;
58 +
    },
59 +
    threads);
42 60
43 -
    SST = SST + sst;
44 -
  }
45 61
46 62
  result[ 0 ] = SSE;
47 63
  result[ 1 ] = SST;

@@ -12,6 +12,7 @@
Loading
12 12
#'             computation in batches.
13 13
#' @param return_ss_only Logical. Do you want to forego calculating R-squared and
14 14
#'                       only return the sums of squares?
15 +
#' @param threads Integer number of threads for parallelism; defaults to 1.
15 16
#' @return
16 17
#'     If \code{return_ss_only = FALSE}, \code{calc_rsqured} returns a numeric
17 18
#'     scalar R-squared. If \code{return_ss_only = TRUE}, \code{calc_rsqured}
@@ -67,34 +68,14 @@
Loading
67 68
#'
68 69
#' # multivariate r-squared with yhat as a linear reconstruction of two matrices
69 70
#' calc_rsquared(y = cbind(y, y), yhat = list(x, cbind(w,w)))
70 -
#'
71 -
#' # multivariate r-squared calculated in parallel
72 -
#' \donttest{
73 -
#' library(furrr)
74 -
#'
75 -
#' plan(multiprocess)
76 -
#'
77 -
#' batches <- list(list(y = cbind(y[1:16], y[1:16]), yhat = cbind(yhat[1:16], yhat[1:16])),
78 -
#'                 list(y = cbind(y[17:32], y[17:32]), yhat = cbind(yhat[17:32], yhat[17:32])))
79 -
#'
80 -
#' ybar <- c(mean(y), mean(y))
81 -
#'
82 -
#' ss <- future_map(.x = batches,
83 -
#'                  .f = function(ybatch){
84 -
#'                    calc_rsquared(y = ybatch$y, yhat = ybatch$yhat,
85 -
#'                                  ybar = ybar, return_ss_only = TRUE)
86 -
#'                  },
87 -
#'                  .options = future_options(scheduling = 2))
88 -
#'
89 -
#'
90 -
#' sse <- sum(sapply(ss, function(x) x["sse"]))
91 -
#'
92 -
#' sst <- sum(sapply(ss, function(x) x["sst"]))
93 -
#'
94 -
#' 1 - sse / sst # final r-squared value here
95 -
#'}
96 71
#' @export
97 -
calc_rsquared <- function(y, yhat, ybar = NULL, return_ss_only = FALSE) {
72 +
calc_rsquared <- function(
73 +
  y,
74 +
  yhat,
75 +
  ybar = NULL,
76 +
  return_ss_only = FALSE,
77 +
  threads = 1
78 +
) {
98 79
99 80
  ### Preliminary input checking ----
100 81
  if (! is.logical(return_ss_only) || is.na(return_ss_only))
@@ -121,10 +102,13 @@
Loading
121 102
  if (is.null(ybar))
122 103
    ybar <- Matrix::colMeans(Y)
123 104
124 -
  result <- calc_sum_squares_latent(Y = Y,
125 -
                                    X = Yhat[[1]],
126 -
                                    W = Yhat[[2]],
127 -
                                    ybar = ybar)
105 +
  result <- calc_sum_squares_latent(
106 +
    Y = t(Y), # transpose so we can take advantage of column major form
107 +
    X = Yhat[[1]],
108 +
    W = Yhat[[2]],
109 +
    ybar = ybar,
110 +
    threads = threads
111 +
  )
128 112
129 113
  if (return_ss_only) {
130 114

Everything is accounted for!

No changes detected that need to be reviewed.
What changes does Codecov check for?
Lines, not adjusted in diff, that have changed coverage data.
Files that introduced coverage data that had none before.
Files that have missing coverage data that once were tracked.
Files Coverage
R/calc_rsquared.R 0.05% 97.87%
src/calc_sum_squares_latent.cpp 100.00%
Project Totals (2 files) 98.28%
Loading