mmollina / MAPpoly

Compare 95236d0 ... +0 ... fb39440

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -130,6 +130,7 @@
Loading
130 130
#'
131 131
#' @param void internal function to be documented
132 132
#' @keywords internal
133 +
#' @export
133 134
create_map <- function(input.map, step = 0,
134 135
                     phase.config = "best")
135 136
{

@@ -0,0 +1,413 @@
Loading
1 +
/*
2 +
 * MAPpoly: a package to construct genetic maps in autopolyploids
3 +
 Copyright (C) 2014-2020 Marcelo Mollinari
4 +
 
5 +
 This file is part of MAPpoly.
6 +
 
7 +
 MAPpoly is free software: you can redistribute it and/or modify
8 +
 it under the terms of the GNU General Public License as published by
9 +
 the Free Software Foundation, either version 3 of the License, or
10 +
 (at your option) any later version.
11 +
 
12 +
 This program is distributed in the hope that it will be useful,
13 +
 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 +
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 +
 GNU General Public License for more details.
16 +
 
17 +
 For a copy of the GNU General Public License, please visit
18 +
 <http://www.gnu.org/licenses/>.
19 +
 
20 +
 */ 
21 +
22 +
/*
23 +
 File: hmm_one_parent.cpp
24 +
 
25 +
 Description: Set of functions to be used with software R
26 +
 
27 +
 Implements the methodology of Hidden Markov Models (HMM) to
28 +
 construct multipoint linkage maps in full-sib populations in
29 +
 autopolyploid species.
30 +
 
31 +
 Functions Written by Marcelo Mollinari.
32 +
 
33 +
 Bioinformatics Research Center
34 +
 Department of Horticultural Science
35 +
 North Carolina State University
36 +
 Contact: mmollin@ncsu.edu
37 +
 First version:       2021
38 +
 Last update: Sep 14, 2021
39 +
 */
40 +
41 +
#include <Rcpp.h>
42 +
#include <algorithm>
43 +
#include <iostream>
44 +
#include <vector>
45 +
#include <math.h>
46 +
#include <R.h>
47 +
#include <Rmath.h>
48 +
#include <R_ext/PrtUtil.h>
49 +
#include "combinatorial.h"
50 +
#include "est_hmm_map_based_on_phased_mrk_blocks.h"
51 +
#include "hmm_elements.h"
52 +
using namespace std;
53 +
using namespace Rcpp;
54 +
55 +
RcppExport SEXP est_hmm_map_one_parent(SEXP ploidyR,
56 +
                                       SEXP n_mrkR,
57 +
                                       SEXP n_indR,
58 +
                                       SEXP haploR,
59 +
                                       SEXP emitR,
60 +
                                       SEXP rfR,
61 +
                                       SEXP verboseR,
62 +
                                       SEXP tolR,
63 +
                                       SEXP ret_H0R)
64 +
{
65 +
  //convert input to C++ types
66 +
  int m = Rcpp::as<int>(ploidyR);
67 +
  int n_mrk = Rcpp::as<int>(n_mrkR);
68 +
  int n_ind = Rcpp::as<int>(n_indR);
69 +
  Rcpp::List haplo(haploR);
70 +
  Rcpp::List emit(emitR);
71 +
  Rcpp::NumericVector rf(rfR);
72 +
  int verbose = Rcpp::as<int>(verboseR);
73 +
  int ret_H0 = Rcpp::as<int>(ret_H0R);
74 +
  double tol = Rcpp::as<double>(tolR);
75 +
  
76 +
  //Initializing some variables
77 +
  int k, k1,  maxit = 1000, flag=0;
78 +
  double s, loglike=0.0, nr=0.0, temp=0.0;
79 +
  std::vector<double> rf_cur(rf.size());
80 +
  std::vector<double> term(n_ind);
81 +
  std::fill(term.begin(), term.end(), 0.0);
82 +
  
83 +
  if(verbose)
84 +
  {
85 +
    Rcpp::Rcout << "\tPloidy level: " << m << "\n" ;
86 +
    Rcpp::Rcout << "\tNumber of markers: " << n_mrk << "\n" ;
87 +
    Rcpp::Rcout << "\tNumber of individuals: " << n_ind << "\n" ;
88 +
  }
89 +
  
90 +
  //Initializing v: states hmm should visit for each marker
91 +
  //Initializing e: emission probabilities associated to the states hmm should visit for each marker
92 +
  std::vector<std::vector<std::vector<int> > > v;
93 +
  std::vector<std::vector<std::vector<double> > > e;
94 +
  for(int i=0; i < haplo.size(); i++) // i: number of markers
95 +
  {
96 +
    Rcpp::List haplo_temp(haplo(i)); //states hmm should visit for marker i
97 +
    Rcpp::List emit_temp(emit(i)); //emission probs. for states hmm should visit for marker i
98 +
    std::vector<std::vector<int> > v1;
99 +
    std::vector<std::vector<double> > e1;
100 +
    for(int j=0; j < haplo_temp.size(); j++) //iterate for all j individuals
101 +
    {
102 +
      Rcpp::NumericVector M_temp = haplo_temp(j);
103 +
      Rcpp::NumericVector E_temp = emit_temp(j);
104 +
      std::vector<int> v2 = Rcpp::as<std::vector<int> >(M_temp);
105 +
      std::vector<double> e2 = Rcpp::as<std::vector<double> >(E_temp);
106 +
      v1.push_back(v2);
107 +
      e1.push_back(e2);
108 +
    }
109 +
    v.push_back(v1);
110 +
    e.push_back(e1);
111 +
  }
112 +
  
113 +
  //Initializing alpha and beta
114 +
  std::vector<std::vector<std::vector<double> > > alpha(n_ind);
115 +
  std::vector<std::vector<std::vector<double> > > beta(n_ind);
116 +
  for(int ind=0; ind < n_ind; ind++)
117 +
  {
118 +
    for(int i=0; i < n_mrk; i++)
119 +
    {
120 +
      std::vector<double> temp3(v[i][ind].size());
121 +
      alpha[ind].push_back(temp3);
122 +
      beta[ind].push_back(temp3);
123 +
    }
124 +
  }
125 +
  
126 +
  //Initializing recombination number matrix
127 +
  std::vector< std::vector<double> > R;
128 +
  R = rec_num(m);
129 +
130 +
  //begin EM algorithm
131 +
  for(int it=0; it<maxit; it++)
132 +
  {
133 +
    //Initializing recombination fraction vector for Baum-Welch
134 +
    for(int j=0; j<n_mrk-1; j++)
135 +
    {
136 +
      rf_cur[j] = rf[j];
137 +
      rf[j] = 0.0;
138 +
    }
139 +
    //Initializing transition matrices
140 +
    std::vector< std::vector< std::vector<double> > > T;
141 +
    for(int i=0; i < n_mrk-1; i++)
142 +
    {
143 +
      T.push_back(transition(m, rf_cur[i]));
144 +
    }
145 +
    //Loop over all individuals
146 +
    for(int ind=0; ind < n_ind; ind++)
147 +
    {
148 +
      R_CheckUserInterrupt();
149 +
      for(int j=0; (unsigned)j < e[0][ind].size(); j++)
150 +
      {
151 +
        alpha[ind][0][j] = e[0][ind][j];
152 +
      }
153 +
      std::fill(beta[ind][n_mrk-1].begin(), beta[ind][n_mrk-1].end(), 1);
154 +
      //forward-backward
155 +
      for(k=1,k1=n_mrk-2; k < n_mrk; k++, k1--)
156 +
      {
157 +
        std::vector<double> temp4 (v[k][ind].size());
158 +
        temp4 = forward_emit_one_parent(m, alpha[ind][k-1], v[k-1][ind], v[k][ind], e[k][ind], T[k-1]);
159 +
        for(int j=0; (unsigned)j < temp4.size(); j++)
160 +
        {
161 +
          alpha[ind][k][j]=temp4[j];
162 +
        }
163 +
        std::vector<double> temp5 (v[k1][ind].size());
164 +
        temp5=backward_emit_one_parent(m, beta[ind][k1+1], v[k1][ind], v[k1+1][ind], e[k1+1][ind], T[k1]);
165 +
        for(int j=0; (unsigned)j < temp5.size(); j++)
166 +
        {
167 +
          beta[ind][k1][j]=temp5[j];
168 +
        }
169 +
      }
170 +
      if(ret_H0 == 0)
171 +
      {
172 +
        //Updating recombination fraction
173 +
        for(int k = 0; k < n_mrk-1; k++)
174 +
        {
175 +
          vector<vector<double> > gamma(alpha[ind][k].size(), vector<double>(beta[ind][k+1].size()));
176 +
          s=0.0;
177 +
          int ngeni = alpha[ind][k].size();
178 +
          int ngenj = beta[ind][k+1].size();
179 +
          for(int i = 0; i < ngeni; i++)
180 +
          {
181 +
            for(int j = 0; j < ngenj; j++)
182 +
            {
183 +
              gamma[i][j] = alpha[ind][k][i] * beta[ind][k+1][j] *
184 +
                T[k][v[k][ind][i]][v[k+1][ind][j]];
185 +
              if(i==0 && j==0) s = gamma[i][j];
186 +
              else s += gamma[i][j];
187 +
            }
188 +
          }
189 +
          for(int i=0; i < ngeni; i++)
190 +
          {
191 +
            for(int j=0; j < ngenj; j++)
192 +
            {
193 +
              nr = R[v[k][ind][i]][v[k+1][ind][j]] * 2;
194 +
              if(s > 0) // Verify theoretical implications of this condition
195 +
              rf[k] +=  nr * gamma[i][j]/s;
196 +
            }
197 +
          }
198 +
        }
199 +
      }
200 +
      //Termination
201 +
      for(int j=0; (unsigned)j < alpha[ind][n_mrk-1].size(); j++)
202 +
      {
203 +
        term[ind] +=  alpha[ind][n_mrk-1][j];
204 +
      }
205 +
    } // loop over individuals
206 +
207 +
    //Likelihood using a specific recombination fraction vector
208 +
    //Usually, this is used to compute LOD Score under H0: rf=0.5
209 +
    if(ret_H0 == 1)
210 +
    {
211 +
      //Loglike computation
212 +
      for(int i=0; (unsigned)i < alpha.size(); i++)
213 +
      {
214 +
        temp=0.0;
215 +
        for(int j=0; (unsigned)j < alpha[i][n_mrk-1].size(); j++)
216 +
          temp += alpha[i][n_mrk-1][j];
217 +
          if(temp > 0)
218 +
            loglike += log10(temp);
219 +
      }
220 +
      if(verbose)
221 +
        Rcpp::Rcout << "\n";
222 +
      List z = List::create(wrap(loglike), rf_cur);
223 +
      return(z);
224 +
    }
225 +
    // re-scale
226 +
    for(int j=0; j<n_mrk-1; j++)
227 +
    {
228 +
      rf[j] /= (double)n_ind;
229 +
      if(rf[j] < tol/100.0) rf[j] = tol/100.0;
230 +
      else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0;
231 +
    }
232 +
    // check convergence
233 +
    flag=0;
234 +
    for(int j=0; j < n_mrk-1; j++)
235 +
    {
236 +
      if(fabs(rf[j] - rf_cur[j]) > tol*(rf_cur[j]+tol*100.0))
237 +
      {
238 +
        flag = 1;
239 +
        break;
240 +
      }
241 +
    }
242 +
    if(verbose)
243 +
    {
244 +
      Rcpp::Rcout << "\t\n Iter: " << it+1 << "\t";
245 +
      for(int j = 0; j < n_mrk-1; j++)
246 +
      {
247 +
        Rcpp::Rcout.precision(3);
248 +
        Rcpp::Rcout << std::fixed << rf[j] << " ";
249 +
      }
250 +
    }
251 +
    if(!flag) break;
252 +
  }//end of EM algorithm
253 +
  if(flag && verbose) Rcpp::Rcout << "Didn't converge!\n";
254 +
255 +
  //Loglike computation
256 +
  for(int i=0; (unsigned)i < alpha.size(); i++)
257 +
  {
258 +
    temp=0.0;
259 +
    for(int j=0; (unsigned)j < alpha[i][n_mrk-1].size(); j++)
260 +
      temp += alpha[i][n_mrk-1][j];
261 +
      if(temp > 0)
262 +
        loglike += log10(temp);
263 +
  }
264 +
  
265 +
  
266 +
  // for(int k = 0; k < n_mrk; k++)
267 +
  // {
268 +
  //   for(int j = 0; j < alpha[1][k].size(); j++){
269 +
  //     Rcpp::Rcout.precision(10);
270 +
  //     Rcpp::Rcout << std::fixed << alpha[1][k][j] << " ";
271 +
  //   }
272 +
  //   Rcpp::Rcout << "\n" ;
273 +
  // }
274 +
  
275 +
  
276 +
  
277 +
  if(verbose)
278 +
    Rcpp::Rcout << "\n";
279 +
  List z = List::create(wrap(loglike), rf);
280 +
  return(z);
281 +
}
282 +
283 +
RcppExport SEXP calc_genprob_one_parent(SEXP ploidyR,
284 +
                                        SEXP n_mrkR,
285 +
                                        SEXP n_indR,
286 +
                                        SEXP haploR,
287 +
                                        SEXP emitR,
288 +
                                        SEXP rfR,
289 +
                                        SEXP probsR,
290 +
                                        SEXP verboseR)
291 +
{
292 +
  //convert input to C++ types
293 +
  int m = Rcpp::as<int>(ploidyR);
294 +
  int n_mrk = Rcpp::as<int>(n_mrkR);
295 +
  int n_ind = Rcpp::as<int>(n_indR);
296 +
  Rcpp::List haplo(haploR);
297 +
  Rcpp::List emit(emitR);
298 +
  Rcpp::NumericVector rf(rfR);
299 +
  int verbose = Rcpp::as<int>(verboseR);
300 +
  std::vector<long double> probs = Rcpp::as<std::vector<long double> >(probsR);
301 +
  
302 +
  //Initializing some variables
303 +
  int g = nChoosek(m, m/2), k, k1, count = 0;
304 +
  
305 +
  if(verbose)
306 +
  {
307 +
    Rcpp::Rcout << "\tPloidy level: " << m << "\n" ;
308 +
    Rcpp::Rcout << "\tNumber of markers: " << n_mrk << "\n" ;
309 +
    Rcpp::Rcout << "\tNumber of individuals: " << n_ind << "\n" ;
310 +
  }
311 +
  //Initializing v: states hmm should visit for each marker
312 +
  //Initializing e: emission probabilities associated to the states hmm should visit for each marker
313 +
  std::vector<std::vector<std::vector<int> > > v;
314 +
  std::vector<std::vector<std::vector<double> > > e;
315 +
  for(int i=0; i < haplo.size(); i++) // i: number of markers
316 +
  {
317 +
    Rcpp::List haplo_temp(haplo(i)); //states hmm should visit for marker i
318 +
    Rcpp::List emit_temp(emit(i)); //emission probs. for states hmm should visit for marker i
319 +
    std::vector<std::vector<int> > v1;
320 +
    std::vector<std::vector<double> > e1;
321 +
    for(int j=0; j < haplo_temp.size(); j++) //iterate for all j individuals
322 +
    {
323 +
      Rcpp::NumericVector M_temp = haplo_temp(j);
324 +
      Rcpp::NumericVector E_temp = emit_temp(j);
325 +
      std::vector<int> v2 = Rcpp::as<std::vector<int> >(M_temp);
326 +
      std::vector<double> e2 = Rcpp::as<std::vector<double> >(E_temp);
327 +
      v1.push_back(v2);
328 +
      e1.push_back(e2);
329 +
    }
330 +
    v.push_back(v1);
331 +
    e.push_back(e1);
332 +
  }
333 +
  //Initializing alpha and beta
334 +
  std::vector<std::vector<std::vector<double> > > alpha(n_ind);
335 +
  std::vector<std::vector<std::vector<double> > > beta(n_ind);
336 +
  for(int ind=0; ind < n_ind; ind++)
337 +
  {
338 +
    for(int i=0; i < n_mrk; i++)
339 +
    {
340 +
      std::vector<double> temp3(v[i][ind].size());
341 +
      alpha[ind].push_back(temp3);
342 +
      beta[ind].push_back(temp3);
343 +
    }
344 +
  }
345 +
  //Initializing transition matrices
346 +
  std::vector< std::vector< std::vector<double> > > T;
347 +
  for(int i=0; i < n_mrk-1; i++)
348 +
  {
349 +
    T.push_back(transition(m, rf[i]));
350 +
  }
351 +
  //Loop over all individuals
352 +
  for(int ind=0; ind < n_ind; ind++)
353 +
  {
354 +
    R_CheckUserInterrupt();
355 +
    for(int j=0; (unsigned)j < e[0][ind].size(); j++)
356 +
    {
357 +
      alpha[ind][0][j] = e[0][ind][j];
358 +
    }
359 +
    std::fill(beta[ind][n_mrk-1].begin(), beta[ind][n_mrk-1].end(), 1);
360 +
    
361 +
    //forward-backward
362 +
    for(k=1,k1=n_mrk-2; k < n_mrk; k++, k1--)
363 +
    {
364 +
      std::vector<double> temp4 (v[k][ind].size());
365 +
      temp4 = forward_emit_one_parent(m, alpha[ind][k-1], v[k-1][ind], v[k][ind], e[k][ind], T[k-1]);
366 +
      for(int j=0; (unsigned)j < temp4.size(); j++)
367 +
      {
368 +
        alpha[ind][k][j]=temp4[j];
369 +
      }
370 +
      std::vector<double> temp5 (v[k1][ind].size());
371 +
      temp5=backward_emit_one_parent(m, beta[ind][k1+1], v[k1][ind], v[k1+1][ind], e[k1+1][ind], T[k1]);
372 +
      for(int j=0; (unsigned)j < temp5.size(); j++)
373 +
      {
374 +
        beta[ind][k1][j]=temp5[j];
375 +
      }
376 +
    }
377 +
  } // loop over individuals
378 +
  
379 +
380 +
  // for(int k = 0; k < n_mrk; k++)
381 +
  // {
382 +
  //   for(int j = 0; j < alpha[1][k].size(); j++){
383 +
  //     Rcpp::Rcout.precision(10);
384 +
  //     Rcpp::Rcout << std::fixed << alpha[1][k][j] << " ";
385 +
  //   }
386 +
  //   Rcpp::Rcout << "\n" ;
387 +
  // }
388 +
  
389 +
  
390 +
  
391 +
  for(int ind=0; ind < n_ind; ind++)
392 +
  {
393 +
    std::vector<double> gamma(g);
394 +
    for(int k=0; k < n_mrk; k++)
395 +
    {
396 +
      std::fill(gamma.begin(), gamma.end(), 0);
397 +
      long double w = 0.0;
398 +
      for(int j=0; (unsigned)j < alpha[ind][k].size(); j++)
399 +
        w += alpha[ind][k][j]*beta[ind][k][j];
400 +
      for(int j=0; (unsigned)j < alpha[ind][k].size(); j++)
401 +
        gamma[v[k][ind][j]] = alpha[ind][k][j]*beta[ind][k][j]/w;
402 +
      for(int j=0; j < g; j++)
403 +
      {
404 +
        probs[count] = gamma[j];
405 +
        count++;
406 +
      }
407 +
    }
408 +
  }
409 +
  List z  = List::create(probs);
410 +
  return z ;
411 +
}
412 +
413 +

@@ -89,7 +89,7 @@
Loading
89 89
  pair_input <- input.twopt$pairwise
90 90
  marnames <- rownames(get(input.twopt$data.name, pos = 1)$geno.dose)[sort(input.twopt$seq.num)]
91 91
  marindex <- sort(input.twopt$seq.num)
92 -
  lod.mat <- rec.mat <- matrix(NA, input.twopt$n.mrk, input.twopt$n.mrk)
92 +
  lod.ph.mat <- lod.mat <- rec.mat <- matrix(NA, input.twopt$n.mrk, input.twopt$n.mrk)
93 93
  if(shared.alleles)
94 94
    ShP <- ShQ <- lod.mat
95 95
  #### UPDATE: instead of recovering the order from names, provide using the object 'input.twopt'
@@ -121,39 +121,21 @@
Loading
121 121
    if (verbose) {
122 122
      cat("INFO: Going singlemode. Using one CPU.\n")
123 123
    }
124 -
    rf.lod.mat <- sapply(pair_input, function(x, thresh.LOD.ph, thresh.LOD.rf, thresh.rf, shared.alleles)
125 -
    {
126 -
      if(any(is.na(x)))
127 -
      {
128 -
        if(shared.alleles){return(c(NA,NA,NA,NA))} else return(c(NA,NA))
129 -
      }
130 -
      if((nrow(x)  ==  1 || abs(x[2 , 1]) >= thresh.LOD.ph) &&
131 -
         abs(x[1, 3]) >= thresh.LOD.rf &&
132 -
         abs(x[1, 2]) <= thresh.rf)
133 -
      {
134 -
        if(shared.alleles){
135 -
          y <- strsplit(rownames(x), "-")
136 -
          return(c(x[1,2:3], as.numeric(y[[1]][1]), as.numeric(y[[1]][2])))
137 -
        } else {
138 -
          return(x[1,2:3])          
139 -
        }
140 -
      }
141 -
      else{
142 -
        {
143 -
          if(shared.alleles){return(c(NA,NA,NA,NA))} else return(c(NA,NA))
144 -
        }
145 -
      }
146 -
    }, thresh.LOD.ph, thresh.LOD.rf, thresh.rf, shared.alleles)
124 +
    rf.lod.mat <- sapply(pair_input, select_rf, thresh.LOD.ph, thresh.LOD.rf, thresh.rf, shared.alleles)
147 125
  }
148 126
  rec.mat[lower.tri(rec.mat)] <- as.numeric(rf.lod.mat[1,])
149 127
  rec.mat[upper.tri(rec.mat)] <- t(rec.mat)[upper.tri(rec.mat)]
150 128
  lod.mat[lower.tri(lod.mat)] <- as.numeric(rf.lod.mat[2,])
151 129
  lod.mat[upper.tri(lod.mat)] <- t(lod.mat)[upper.tri(lod.mat)]
152 -
  dimnames(rec.mat) <- dimnames(lod.mat) <- list(marnames, marnames)
130 +
  lod.ph.mat[lower.tri(lod.ph.mat)] <- as.numeric(rf.lod.mat[3,])
131 +
  lod.ph.mat[upper.tri(lod.ph.mat)] <- t(lod.ph.mat)[upper.tri(lod.ph.mat)]
132 +
  
133 +
  
134 +
  dimnames(lod.ph.mat) <- dimnames(rec.mat) <- dimnames(lod.mat) <- list(marnames, marnames)
153 135
  if(shared.alleles){
154 -
    ShP[lower.tri(ShP)] <- as.numeric(rf.lod.mat[3,])
136 +
    ShP[lower.tri(ShP)] <- as.numeric(rf.lod.mat[4,])
155 137
    ShP[upper.tri(ShP)] <- t(ShP)[upper.tri(ShP)]
156 -
    ShQ[lower.tri(ShQ)] <- as.numeric(rf.lod.mat[4,])
138 +
    ShQ[lower.tri(ShQ)] <- as.numeric(rf.lod.mat[5,])
157 139
    ShQ[upper.tri(ShQ)] <- t(ShQ)[upper.tri(ShQ)]
158 140
    dimnames(ShP) <- dimnames(ShQ) <- list(marindex, marindex)
159 141
  } else{
@@ -164,6 +146,7 @@
Loading
164 146
                 thresh.rf = thresh.rf,
165 147
                 rec.mat = rec.mat,
166 148
                 lod.mat = abs(lod.mat),
149 +
                 lod.ph.mat = abs(lod.ph.mat),
167 150
                 ShP = ShP,
168 151
                 ShQ = ShQ,
169 152
                 data.name  = input.twopt$data.name,
@@ -269,22 +252,22 @@
Loading
269 252
{
270 253
  if(any(is.na(x)))
271 254
  {
272 -
    if(shared.alleles){return(c(NA,NA,NA,NA))} else return(c(NA,NA))
255 +
    if(shared.alleles){return(c(NA,NA,NA,NA,NA))} else return(c(NA,NA,NA))
273 256
  }
274 257
  if((nrow(x)  ==  1 || abs(x[2 , 1]) >= thresh.LOD.ph) &&
275 258
     abs(x[1, 3]) >= thresh.LOD.rf &&
276 259
     abs(x[1, 2]) <= thresh.rf)
277 260
  {
278 261
    if(shared.alleles){
279 262
      y <- strsplit(rownames(x), "-")
280 -
      return(c(x[1,2:3], as.numeric(y[[1]][1]), as.numeric(y[[1]][2])))
263 +
      return(c(x[1,2:3], x[2,1], as.numeric(y[[1]][1]), as.numeric(y[[1]][2])))
281 264
    } else {
282 -
      return(x[1,2:3])          
265 +
      return(c(x[1,2:3], x[2,1]))          
283 266
    }
284 267
  }
285 268
  else{
286 269
    {
287 -
      if(shared.alleles){return(c(NA,NA,NA,NA))} else return(c(NA,NA))
270 +
      if(shared.alleles){return(c(NA,NA,NA,NA,NA))} else return(c(NA,NA,NA))
288 271
    }
289 272
  }
290 273
}

@@ -30,7 +30,15 @@
Loading
30 30
#'
31 31
#' @param verbose if \code{TRUE}, current progress is shown; if
32 32
#'     \code{FALSE} (default), no output is produced
33 -
#'
33 +
#'     
34 +
#' @param info.parent Parent which will be considered informative in case of
35 +
#'     computing separate maps in both parents. If NULL (default), returns 
36 +
#'     the integrated map for both parents. 
37 +
#' 
38 +
#' @param uninfo.parent Parent which will be considered uninformative in case of
39 +
#'     computing separate maps in both parents. If NULL (default), returns 
40 +
#'     the integrated map for both parents. 
41 +
#' 
34 42
#' @param tol the desired accuracy (default = 1e-04)
35 43
#'
36 44
#' @param est.given.0.rf logical. If TRUE returns a map forcing all
@@ -142,6 +150,8 @@
Loading
142 150
est_rf_hmm <- function(input.seq, input.ph = NULL,
143 151
                       thres = 0.5, twopt = NULL,
144 152
                       verbose = FALSE, 
153 +
                       info.parent = NULL,
154 +
                       uninfo.parent = NULL,
145 155
                       tol = 1e-04,
146 156
                       est.given.0.rf = FALSE,
147 157
                       reestimate.single.ph.configuration = TRUE,
@@ -153,6 +163,14 @@
Loading
153 163
  }
154 164
  if(length(input.seq$seq.num)  ==  1)
155 165
    stop("Input sequence contains only one marker.", call. = FALSE)
166 +
  if(!is.null(info.parent)){
167 +
    if(is.null(uninfo.parent))
168 +
      stop("Input sequence contains only one marker.", call. = FALSE)
169 +
    P <- do.call(cbind, input.seq[grep(pattern = "seq.dose.p", names(input.seq))])
170 +
    id <- P[,info.parent] != 0 & apply(P[,uninfo.parent, drop = FALSE], 1, function(x) all(x==0))
171 +
    input.seq <- make_seq_mappoly(get(input.seq$data.name, pos = 1), names(which(id)), 
172 +
                                  data.name = input.seq$data.name)
173 +
  }
156 174
  if (is.null(input.ph)) {
157 175
    if (is.null(twopt))
158 176
      stop("Two-point analysis not found!")
@@ -166,8 +184,8 @@
Loading
166 184
  if(length(input.seq$seq.num)  ==  2 & !est.given.0.rf){
167 185
    maps <- vector("list", 1)
168 186
    res.temp <- twopt$pairwise[[paste(sort(input.seq$seq.num), collapse = "-")]]
169 -
    dp <- get(input.seq$data.name, pos  = 1)$dosage.p1[input.seq$seq.num]
170 -
    dq <- get(input.seq$data.name, pos  = 1)$dosage.p2[input.seq$seq.num]
187 +
    dp <- input.seq$seq.dose.p1
188 +
    dq <- input.seq$seq.dose.p2
171 189
    sh <- as.numeric(unlist(strsplit(rownames(res.temp)[1], split = "-")))
172 190
    res.ph <- list(P = c(list(0),list(0)), Q = c(list(0),list(0)))
173 191
    if(dp[1] != 0)
@@ -193,13 +211,13 @@
Loading
193 211
                                      n.mrk = length(input.seq$seq.num),
194 212
                                      seq.num = input.seq$seq.num,
195 213
                                      mrk.names = input.seq$seq.mrk.names,
196 -
                                      seq.dose.p1 = get(input.seq$data.name, pos  = 1)$dosage.p1[input.seq$seq.mrk.names],
197 -
                                      seq.dose.p2 = get(input.seq$data.name, pos  = 1)$dosage.p2[input.seq$seq.mrk.names],
198 -
                                      chrom = get(input.seq$data.name, pos  = 1)$chrom[input.seq$seq.mrk.names],
199 -
                                      genome.pos = get(input.seq$data.name, pos  = 1)$genome.pos[input.seq$seq.mrk.names],
200 -
                                      seq.ref = get(input.seq$data.name, pos  = 1)$seq.ref[input.seq$seq.mrk.names],
201 -
                                      seq.alt = get(input.seq$data.name, pos  = 1)$seq.alt[input.seq$seq.mrk.names],
202 -
                                      chisq.pval = get(input.seq$data.name, pos  = 1)$chisq.pval[input.seq$seq.mrk.names],
214 +
                                      seq.dose.p1 = input.seq$seq.dose.p1,
215 +
                                      seq.dose.p2 = input.seq$seq.dose.p2,
216 +
                                      chrom = input.seq$chrom,
217 +
                                      genome.pos = input.seq$genome.pos,
218 +
                                      seq.ref = input.seq$seq.ref,
219 +
                                      seq.alt = input.seq$seq.alt,
220 +
                                      chisq.pval = input.seq$chisq.pval,
203 221
                                      data.name = input.seq$data.name,
204 222
                                      ph.thresh = abs(res.temp[2,1])),
205 223
                          maps = maps),
@@ -209,10 +227,6 @@
Loading
209 227
  ret.map.no.rf.estimation <- FALSE
210 228
  if(n.ph  ==  1 && !reestimate.single.ph.configuration)
211 229
    ret.map.no.rf.estimation <- TRUE
212 -
  #if (verbose) {
213 -
  #  cat("\n    Number of linkage phase configurations: ")
214 -
  #cat("\n---------------------------------------------\n|\n|--->")
215 -
  #}
216 230
  maps <- vector("list", n.ph)
217 231
  if (verbose){
218 232
    txt <- paste0("       ",n.ph, " phase(s): ")
@@ -233,31 +247,45 @@
Loading
233 247
    }
234 248
    ph <- list(P = input.ph$config.to.test[[i]]$P,
235 249
               Q = input.ph$config.to.test[[i]]$Q)
236 -
    maps[[i]] <- est_rf_hmm_single(input.seq = input.seq,
237 -
                                   input.ph.single = ph,
238 -
                                   rf.temp = rf.temp,
239 -
                                   tol = tol,
240 -
                                   verbose = FALSE,
241 -
                                   ret.map.no.rf.estimation = ret.map.no.rf.estimation, 
242 -
                                   high.prec = high.prec)
250 +
    if(!is.null(info.parent)){
251 +
      map.temp <- est_rf_hmm_single_one_parent(input.seq = input.seq,
252 +
                                                input.ph.single = ph,
253 +
                                                info.parent = info.parent,
254 +
                                                uninfo.parent = uninfo.parent,
255 +
                                                rf.vec = rf.temp,
256 +
                                                global.err = 0.0,
257 +
                                                tol = tol,
258 +
                                                verbose = FALSE,
259 +
                                                ret.map.no.rf.estimation = ret.map.no.rf.estimation)
260 +
    maps[[i]] <- map.temp$maps[[1]]
261 +
    } else{
262 +
      maps[[i]] <- est_rf_hmm_single(input.seq = input.seq,
263 +
                                     input.ph.single = ph,
264 +
                                     rf.temp = rf.temp,
265 +
                                     tol = tol,
266 +
                                     verbose = FALSE,
267 +
                                     ret.map.no.rf.estimation = ret.map.no.rf.estimation, 
268 +
                                     high.prec = high.prec)
269 +
      
270 +
    }
243 271
  }
244 272
  #cat("\n")
245 273
  id <- order(sapply(maps, function(x) x$loglike), decreasing = TRUE)
246 274
  maps <- maps[id]
247 275
  if (verbose)
248 276
    cat("\n")
249 277
  structure(list(info = list(ploidy = input.seq$ploidy,
250 -
                             n.mrk = length(input.seq$seq.num),
251 -
                             seq.num = input.seq$seq.num,
252 -
                             mrk.names = input.seq$seq.mrk.names,
253 -
                             seq.dose.p1 = get(input.seq$data.name, pos  = 1)$dosage.p1[input.seq$seq.mrk.names],
254 -
                             seq.dose.p2 = get(input.seq$data.name, pos  = 1)$dosage.p2[input.seq$seq.mrk.names],
255 -
                             chrom = get(input.seq$data.name, pos  = 1)$chrom[input.seq$seq.mrk.names],
256 -
                             genome.pos = get(input.seq$data.name, pos  = 1)$genome.pos[input.seq$seq.mrk.names],
257 -
                             seq.ref = get(input.seq$data.name, pos  = 1)$seq.ref[input.seq$seq.mrk.names],
258 -
                             seq.alt = get(input.seq$data.name, pos  = 1)$seq.alt[input.seq$seq.mrk.names],
259 -
                             chisq.pval = get(input.seq$data.name, pos  = 1)$chisq.pval[input.seq$seq.mrk.names],
260 -
                             data.name = input.seq$data.name, ph.thresh = input.ph$thres),
278 +
                            n.mrk = length(input.seq$seq.num),
279 +
                            seq.num = input.seq$seq.num,
280 +
                            mrk.names = input.seq$seq.mrk.names,
281 +
                            seq.dose.p1 = input.seq$seq.dose.p1,
282 +
                            seq.dose.p2 = input.seq$seq.dose.p2,
283 +
                            chrom = input.seq$chrom,
284 +
                            genome.pos = input.seq$genome.pos,
285 +
                            seq.ref = input.seq$seq.ref,
286 +
                            seq.alt = input.seq$seq.alt,
287 +
                            chisq.pval = input.seq$chisq.pval,
288 +
                            data.name = input.seq$data.name),
261 289
                 maps = maps),
262 290
            class = "mappoly.map")
263 291
}

@@ -31,13 +31,13 @@
Loading
31 31
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
32 32
#' @export est_rf_hmm_single
33 33
est_rf_hmm_single <- function(input.seq,
34 -
                            input.ph.single,
35 -
                            rf.temp = NULL,
36 -
                            tol,
37 -
                            verbose = FALSE,
38 -
                            ret.map.no.rf.estimation = FALSE,
39 -
                            high.prec = TRUE,
40 -
                            max.rf.to.break.EM = 0.5)
34 +
                              input.ph.single,
35 +
                              rf.temp = NULL,
36 +
                              tol,
37 +
                              verbose = FALSE,
38 +
                              ret.map.no.rf.estimation = FALSE,
39 +
                              high.prec = TRUE,
40 +
                              max.rf.to.break.EM = 0.5)
41 41
{
42 42
  input_classes <- c("mappoly.sequence")
43 43
  if (!inherits(input.seq, input_classes[1])) {
@@ -57,35 +57,35 @@
Loading
57 57
    dq <- get(input.seq$data.name)$dosage.p2[input.seq$seq.num]
58 58
    for (j in 1:nrow(D))
59 59
      D[j, D[j, ]  ==  input.seq$ploidy + 1] <- dp[j] + dq[j] + 1 + as.numeric(dp[j] == 0 || dq[j] == 0)
60 -
60 +
    
61 61
    if(high.prec)
62 62
    {
63 63
      res.temp <- .Call("est_map_hmm_highprec",
64 -
                      input.seq$ploidy,
65 -
                      t(D),
66 -
                      lapply(input.ph.single$P, function(x) x-1),
67 -
                      lapply(input.ph.single$Q, function(x) x-1),
68 -
                      rf.temp,
69 -
                      verbose = verbose,
70 -
                      max.rf.to.break.EM,
71 -
                      tol,
72 -
                      PACKAGE = "mappoly")
64 +
                        input.seq$ploidy,
65 +
                        t(D),
66 +
                        lapply(input.ph.single$P, function(x) x-1),
67 +
                        lapply(input.ph.single$Q, function(x) x-1),
68 +
                        rf.temp,
69 +
                        verbose = verbose,
70 +
                        max.rf.to.break.EM,
71 +
                        tol,
72 +
                        PACKAGE = "mappoly")
73 73
    } else{
74 74
      res.temp <- .Call("est_map_hmm",
75 -
                      input.seq$ploidy,
76 -
                      t(D),
77 -
                      lapply(input.ph.single$P, function(x) x-1),
78 -
                      lapply(input.ph.single$Q, function(x) x-1),
79 -
                      rf.temp,
80 -
                      verbose = verbose,
81 -
                      max.rf.to.break.EM,
82 -
                      tol,
83 -
                      PACKAGE = "mappoly")
75 +
                        input.seq$ploidy,
76 +
                        t(D),
77 +
                        lapply(input.ph.single$P, function(x) x-1),
78 +
                        lapply(input.ph.single$Q, function(x) x-1),
79 +
                        rf.temp,
80 +
                        verbose = verbose,
81 +
                        max.rf.to.break.EM,
82 +
                        tol,
83 +
                        PACKAGE = "mappoly")
84 84
    }
85 85
  } else res.temp <- list(1, rf.temp)
86 86
  map <- list(seq.num = input.seq$seq.num,
87 -
            seq.rf = res.temp[[2]],
88 -
            seq.ph = input.ph.single,
89 -
            loglike = res.temp[[1]])
87 +
              seq.rf = res.temp[[2]],
88 +
              seq.ph = input.ph.single,
89 +
              loglike = res.temp[[1]])
90 90
  map
91 91
}

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Click to load this diff.
Loading diff...

Learn more Showing 3 files with coverage changes found.

New file R/one_paprent_single_map_hmm.R
New
Loading file...
New file R/calc_genoprob_one_parent.R
New
Loading file...
New file src/hmm_one_parent.cpp
New
Loading file...
Files Coverage
R -1.56% 67.81%
src -8.63% 68.07%
Project Totals (62 files) 67.88%
Loading