1 ```#' Count the number/proportion of present/missing sites in each sample ``` 2 ```#' ``` 3 ```#' @param data EIGENSTRAT data object. ``` 4 ```#' @param prop Calculate the proportion instead of counts? ``` 5 ```#' @param missing Count present SNPs or missing SNPs? ``` 6 ```#' ``` 7 ```#' @return A data.frame object with SNP counts/proportions. ``` 8 ```#' ``` 9 ```#' @examples ``` 10 ```#' \dontrun{snps <- eigenstrat(download_data(dirname = tempdir())) ``` 11 ```#' ``` 12 ```#' present_count <- count_snps(snps) ``` 13 ```#' missing_count <- count_snps(snps, missing = TRUE) ``` 14 ```#' ``` 15 ```#' present_proportion <- count_snps(snps, prop = TRUE) ``` 16 ```#' missing_proportion <- count_snps(snps, missing = TRUE, prop = TRUE) ``` 17 ```#' } ``` 18 ```#' ``` 19 ```#' @export ``` 20 ```#' @import rlang ``` 21 ```count_snps <- function(data, missing = FALSE, prop = FALSE) { ``` 22 1 ``` summary_fun <- ifelse(prop, mean, sum) ``` 23 1 ``` if (missing) { ``` 24 1 ``` fun <- function(x) as.integer(is.na(x)) ``` 25 1 ``` col <- "missing" ``` 26 ``` } else { ``` 27 1 ``` fun <- function(x) as.integer(!is.na(x)) ``` 28 1 ``` col <- "present" ``` 29 ``` } ``` 30 1 ``` geno <- read_geno(data) ``` 31 1 ``` result <- read_ind(data) ``` 32 1 ``` result[[col]] <- as.vector(t(dplyr::summarise_all(geno, list(~ summary_fun(fun(.)))))) ``` 33 1 ``` result ``` 34 ```} ``` 35 36 37 38 ```# Run a specified ADMIXTOOLS command. ``` 39 ```run_cmd <- function(cmd, par_file, log_file) { ``` 40 1 ``` system(paste(cmd, "-p", par_file, ">", log_file)) ``` 41 ```} ``` 42 43 44 45 ```# Create either specified or a temporary directory. ``` 46 ```get_dir <- function(dir_name = NULL) { ``` 47 1 ``` if (!is.null(dir_name)) { ``` 48 1 ``` dir.create(dir_name, showWarnings = FALSE) ``` 49 ``` } else { ``` 50 1 ``` dir_name <- tempdir() ``` 51 ``` } ``` 52 53 1 ``` path.expand(dir_name) ``` 54 ```} ``` 55 56 57 58 ```# Generate paths to the population file, parameter file and log file ``` 59 ```# based on a specified directory. ``` 60 ```get_files <- function(dir_name, prefix) { ``` 61 1 ``` directory <- get_dir(dir_name) ``` 62 1 ``` list( ``` 63 1 ``` pop_file = file.path(directory, paste0(prefix, ".pop")), ``` 64 1 ``` par_file = file.path(directory, paste0(prefix, ".par")), ``` 65 1 ``` log_file = file.path(directory, paste0(prefix, ".log")) ``` 66 ``` ) ``` 67 ```} ``` 68 69 70 71 ```# Check that the provided object is of the required type ``` 72 ```check_type <- function(x, type) { ``` 73 1 ``` if (!inherits(x, type)) { ``` 74 1 ``` stop(glue::glue("Object is not of the type {type}"), call. = FALSE) ``` 75 ``` } ``` 76 ```} ``` 77 78 79 80 ```# Check for the presence of a given set of labels in an 'ind' file. ``` 81 ```# Fail if there a sample was not found. ``` 82 ```check_presence <- function(labels, data) { ``` 83 1 ``` not_present <- setdiff(labels, suppressMessages(read_ind(data)\$label)) ``` 84 1 ``` if (length(not_present) > 0) { ``` 85 1 ``` stop("The following samples are not present in the data: ", ``` 86 1 ``` paste(not_present, collapse = ", "), call. = FALSE) ``` 87 ``` } ``` 88 ```} ``` 89 90 91 92 ```#' Download example SNP data. ``` 93 ```#' ``` 94 ```#' The data is downloaded to a temporary directory by default. ``` 95 ```#' ``` 96 ```#' @param dirname Directory in which to put the data (EIGENSTRAT trio ``` 97 ```#' of snp/geno/ind files). ``` 98 ```#' ``` 99 ```#' @export ``` 100 ```download_data <- function(dirname = tempdir()) { ``` 101 1 ``` dest <- file.path(dirname, "snps.tgz") ``` 102 1 ``` utils::download.file( ``` 103 1 ``` "https://bioinf.eva.mpg.de/admixr/snps.tar.gz", ``` 104 1 ``` destfile = dest, ``` 105 1 ``` method = "wget", ``` 106 1 ``` quiet = TRUE ``` 107 ``` ) ``` 108 1 ``` system(paste0("cd ", dirname, "; tar xf ", dest, "; rm snps.tgz")) ``` 109 1 ``` file.path(dirname, "snps", "snps") ``` 110 ```} ``` 111 112 113 114 ```# this looks incredibly hacky, but seems to be a solution to my R CMD check ``` 115 ```# "missing global variable" NOTE woes caused by dplyr code (the following ``` 116 ```# are not actually global variables) ``` 117 ```utils::globalVariables( ``` 118 ``` names = c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", ``` 119 ``` "FORMAT", "chrom", "pos", "snp_id", "ref", "alt", "gen_dist", ``` 120 ``` "sample_id", "name", "target", ".", "start", "end", ``` 121 ``` "model", "noutgroups", "outgroups", "pattern", "pvalue"), ``` 122 ``` package = "admixr") ``` 123 124 125 126 ```#' Pipe operator ``` 127 ```#' ``` 128 ```#' Added via usethis::use_pipe(). ``` 129 ```#' ``` 130 ```#' ``` 131 ```#' @name %>% ``` 132 ```#' @rdname pipe ``` 133 ```#' @keywords internal ``` 134 ```#' @export ``` 135 ```#' @importFrom magrittr %>% ``` 136 ```#' @usage lhs \%>\% rhs ``` 137 ```NULL ```

Read our documentation on viewing source code .