mmollina / MAPpoly
Showing 2 of 7 files from the diff.

@@ -499,4 +499,78 @@
Loading
499 499
  delete [] G;
500 500
  return(out);
501 501
}
502 +
RcppExport SEXP pairwise_given_0rf(SEXP m_R,
503 +
                                   SEXP mrk_pairs_R,
504 +
                                   SEXP geno_R,
505 +
                                   SEXP dP_R,
506 +
                                   SEXP dQ_R,
507 +
                                   SEXP count_cache_R)
508 +
{
509 +
  Rcpp::NumericMatrix mrk_pairs = Rcpp::as<Rcpp::NumericMatrix>(mrk_pairs_R);
510 +
  Rcpp::NumericMatrix geno = Rcpp::as<Rcpp::NumericMatrix>(geno_R);
511 +
  Rcpp::NumericVector dP = Rcpp::as<Rcpp::NumericVector>(dP_R);
512 +
  Rcpp::NumericVector dQ = Rcpp::as<Rcpp::NumericVector>(dQ_R);
513 +
  Rcpp::List count_cache = Rcpp::as<Rcpp::List>(count_cache_R);
514 +
  Rcpp::NumericVector d_pair(4);
515 +
  Rcpp::List out(mrk_pairs.ncol());
516 +
  int m = Rcpp::as<int>(m_R);
517 +
  int n_ind = geno.ncol();
518 +
  for(int k=0; k < mrk_pairs.ncol(); k++)
519 +
  {
520 +
    //Rcpp::Rcout << mrk_pairs(0,k) << " - " << mrk_pairs(1,k) <<  std::endl;
521 +
    int id = (m+1)*(m+1)*(m+1)*dQ[mrk_pairs(1,k)]+(m+1)*(m+1)*dQ[mrk_pairs(0,k)]+(m+1)*dP[mrk_pairs(1,k)]+dP[mrk_pairs(0,k)]+1;
522 +
    //Rcpp::Rcout << "id: " << id <<  std::endl;
523 +
    Rcpp::List temp_list = count_cache[(id-1)];
524 +
    //Rcpp::List temp_list = count_cache[k];
525 +
    if(temp_list.size() > 1)
526 +
    {
527 +
      NumericVector gen_1 = geno( mrk_pairs(0,k), _);
528 +
      NumericVector gen_2 = geno( mrk_pairs(1,k), _);
529 +
      Rcpp::NumericMatrix res(3, temp_list.size());
530 +
      Rcpp::CharacterVector zn = temp_list.attr( "names" ) ;
531 +
      colnames(res)=zn;
532 +
      for(int i=0; i < temp_list.size(); i++)
533 +
      {
534 +
        Rcpp::NumericMatrix count_mat = temp_list[i] ;
535 +
        Rcpp::List dimnames = count_mat.attr( "dimnames" ) ;
536 +
        Rcpp::CharacterVector z = dimnames[0];
537 +
        Rcpp::NumericVector dk(z.size()), dk1(z.size());
538 +
        std::string delimiter = " ";
539 +
        for(int j=0; j < z.size(); j++)
540 +
        {
541 +
          std::string lnames = Rcpp::as<std::string>(z(j));
542 +
          dk(j) = std::stoi(lnames.substr(0,lnames.find(delimiter)));
543 +
          dk1(j) = std::stoi(lnames.substr(lnames.find(delimiter)+1, lnames.length()));
544 +
        }
545 +
        vector<double> x{0.0, 0.2, 0.5};
546 +
        double lltemp, ll, curx;
547 +
        ll = twopt_likelihood_dosage(x[0], m, n_ind, dP[mrk_pairs(0,k)], dQ[mrk_pairs(0,k)], dk, dk1, gen_1, gen_2, count_mat);
548 +
        curx = x[0];
549 +
        for(int j=1; j < x.size(); j++)
550 +
        {
551 +
          lltemp = twopt_likelihood_dosage(x[j], m, n_ind, dP[mrk_pairs(0,k)], dQ[mrk_pairs(0,k)], dk, dk1, gen_1, gen_2, count_mat);
552 +
          if(ll - lltemp < 0.0) break;
553 +
          else{
554 +
            curx = x[j];
555 +
            ll = lltemp;
556 +
          }
557 +
        }
558 +
          res(0,i) = curx;
559 +
          res(1,i) = ll;
560 +
          res(2,i) = ll;
561 +
      }
562 +
      out(k)=res;
563 +
    }
564 +
    else
565 +
    {
566 +
      Rcpp::NumericVector d_out(4);
567 +
      d_out(0)=dP[mrk_pairs(0,k)];
568 +
      d_out(1)=dP[mrk_pairs(1,k)];
569 +
      d_out(2)=dQ[mrk_pairs(0,k)];
570 +
      d_out(3)=dQ[mrk_pairs(1,k)];
571 +
      out(k)=d_out;
572 +
    }
573 +
  }
574 +
  return(out);
575 +
}
502 576
//end of file two_pts_est.cpp

@@ -22,7 +22,10 @@
Loading
22 22
#'    time. However, it will require less memory. See examples.
23 23
#'    
24 24
#' @param est.type  Indicates whether to use the discrete ("disc") or the probabilistic ("prob") dosage scoring 
25 -
#'                  when estimating the two-point recombination fractions. 
25 +
#'                  when estimating the two-point recombination fractions. The option 
26 +
#'                  "sparse_grid" is for development purposes use only. 
27 +
#'                  It compares the likelihoods of a set of recombination fractions and returns the most likely.
28 +
#'                  Currently, it tests rf = c(0.0, 0.2, 0.5).
26 29
#'
27 30
#' @param verbose If \code{TRUE} (default), current progress is shown; if
28 31
#'     \code{FALSE}, no output is produced
@@ -84,7 +87,7 @@
Loading
84 87
#' @importFrom dplyr filter arrange
85 88
est_pairwise_rf <- function(input.seq, count.cache = NULL, ncpus = 1L,
86 89
                            mrk.pairs = NULL, n.batches = 1L,
87 -
                            est.type = c("disc","prob"),
90 +
                            est.type = c("disc","prob","sparse_grid"),
88 91
                            verbose = TRUE, memory.warning = TRUE, 
89 92
                            parallelization.type = c("PSOCK", "FORK"), 
90 93
                            tol = .Machine$double.eps^0.25)
@@ -130,12 +133,12 @@
Loading
130 133
  }
131 134
  est.type = match.arg(est.type)
132 135
  ## Checking for genotype probability 
133 -
  if(!exists('geno', where = get(input.seq$data.name, pos = 1)) & est.type != "disc"){
136 +
  if(!exists('geno', where = get(input.seq$data.name, pos = 1)) & est.type == "prob"){
134 137
    warning("There is no probabilistic dosage scoring in the dataset. Using est.type = 'disc'")
135 138
    est.type <- "disc"
136 139
  }
137 140
  ## get genotypes
138 -
  if(est.type  ==  "disc"){
141 +
  if(est.type  ==  "disc" || est.type == "sparse_grid"){
139 142
    geno <- as.matrix(get(input.seq$data.name, pos = 1)$geno.dose)
140 143
  } else {
141 144
    d1 <- get(input.seq$data.name, pos = 1)$geno 
@@ -169,6 +172,8 @@
Loading
169 172
        parallel::clusterExport(cl, "paralell_pairwise_discrete")
170 173
      if(est.type  ==  "prob")
171 174
        parallel::clusterExport(cl, "paralell_pairwise_probability")
175 +
      if(est.type  ==  "sparse_grid")
176 +
        parallel::clusterExport(cl, "paralell_pairwise_given_sparse_grid")
172 177
      on.exit(parallel::stopCluster(cl))
173 178
      if(est.type  ==  "disc"){
174 179
        res <- parallel::parLapply(cl,
@@ -192,6 +197,17 @@
Loading
192 197
                                   count.cache = count.cache,
193 198
                                   tol = tol)
194 199
      } 
200 +
      else if(est.type  ==  "sparse_grid"){
201 +
        res <- parallel::parLapply(cl,
202 +
                                   input.list,
203 +
                                   paralell_pairwise_given_sparse_grid,
204 +
                                   input.seq = input.seq,
205 +
                                   geno = geno,
206 +
                                   dP = get(input.seq$data.name)$dosage.p1,
207 +
                                   dQ = get(input.seq$data.name)$dosage.p2,
208 +
                                   count.cache = count.cache)
209 +
      } 
210 +
      
195 211
      end <- proc.time()
196 212
      if (verbose) {
197 213
        cat("INFO: Done with",
@@ -228,6 +244,14 @@
Loading
228 244
                      dQ = get(input.seq$data.name)$dosage.p2,
229 245
                      count.cache = count.cache,
230 246
                      tol = tol)
247 +
      } else if(est.type  ==  "sparse_grid"){
248 +
        res <- lapply(input.list,
249 +
                      paralell_pairwise_given_sparse_grid,
250 +
                      input.seq = input.seq,
251 +
                      geno = geno,
252 +
                      dP = get(input.seq$data.name)$dosage.p1,
253 +
                      dQ = get(input.seq$data.name)$dosage.p2,
254 +
                      count.cache = count.cache)
231 255
      } 
232 256
    }
233 257
    res <- unlist(res,
@@ -362,13 +386,28 @@
Loading
362 386
  return(lapply(res, format_rf))
363 387
}
364 388
365 -
366 -
367 -
368 -
369 -
370 -
371 -
389 +
#' Wrapper function to discrete-based pairwise likelihood computation given rf = 1e-5
390 +
#'
391 +
#' @param void internal function to be documented
392 +
#' @keywords internal
393 +
#' @export
394 +
paralell_pairwise_given_sparse_grid <- function(mrk.pairs,
395 +
                                       input.seq,
396 +
                                       geno,
397 +
                                       dP,
398 +
                                       dQ,
399 +
                                       count.cache)
400 +
{
401 +
  res <- .Call("pairwise_given_0rf",
402 +
               input.seq$ploidy,
403 +
               as.matrix(mrk.pairs),
404 +
               as.matrix(geno),
405 +
               as.vector(dP),
406 +
               as.vector(dQ),
407 +
               count.cache$cond,
408 +
               PACKAGE = "mappoly")
409 +
  return(lapply(res, format_rf))
410 +
}
372 411
373 412
#' Format results from pairwise two-point estimation in C++
374 413
#'
Files Coverage
R 69.07%
src 74.53%
Project Totals (59 files) 70.32%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading