1
#' Count the number/proportion of present/missing sites in each sample
2
#'
3
#' @param data EIGENSTRAT data object.
4
#' @param prop Calculate the proportion instead of counts?
5
#' @param missing Count present SNPs or missing SNPs?
6
#'
7
#' @return A data.frame object with SNP counts/proportions.
8
#'
9
#' @examples
10
#' \dontrun{snps <- eigenstrat(download_data(dirname = tempdir()))
11
#' 
12
#' present_count <- count_snps(snps)
13
#' missing_count <- count_snps(snps, missing = TRUE)
14
#'
15
#' present_proportion <- count_snps(snps, prop = TRUE)
16
#' missing_proportion <- count_snps(snps, missing = TRUE, prop = TRUE)
17
#' }
18
#'
19
#' @export
20
#' @import rlang
21
count_snps <- function(data, missing = FALSE, prop = FALSE) {
22 1
  summary_fun <- ifelse(prop, mean, sum)
23 1
  if (missing) {
24 1
    fun <- function(x) as.integer(is.na(x))
25 1
    col <- "missing"
26
  } else {
27 1
    fun <- function(x) as.integer(!is.na(x))
28 1
    col <- "present"
29
  }
30 1
  geno <- read_geno(data)
31 1
  result <- read_ind(data)
32 1
  result[[col]] <- as.vector(t(dplyr::summarise_all(geno, list(~ summary_fun(fun(.))))))
33 1
  result
34
}
35

36

37

38
# Run a specified ADMIXTOOLS command.
39
run_cmd <- function(cmd, par_file, log_file) {
40 1
  system(paste(cmd, "-p", par_file, ">", log_file))
41
}
42

43

44

45
# Create either specified or a temporary directory.
46
get_dir <- function(dir_name = NULL) {
47 1
  if (!is.null(dir_name)) {
48 1
    dir.create(dir_name, showWarnings = FALSE)
49
  } else {
50 1
    dir_name <- tempdir()
51
  }
52

53 1
  path.expand(dir_name)
54
}
55

56

57

58
# Generate paths to the population file, parameter file and log file
59
# based on a specified directory.
60
get_files <- function(dir_name, prefix) {
61 1
  directory <- get_dir(dir_name)
62 1
  list(
63 1
    pop_file = file.path(directory, paste0(prefix, ".pop")),
64 1
    par_file = file.path(directory, paste0(prefix, ".par")),
65 1
    log_file = file.path(directory, paste0(prefix, ".log"))
66
  )
67
}
68

69

70

71
# Check that the provided object is of the required type
72
check_type <- function(x, type) {
73 1
    if (!inherits(x, type)) {
74 1
        stop(glue::glue("Object is not of the type {type}"), call. = FALSE)
75
    }
76
}
77

78

79

80
# Check for the presence of a given set of labels in an 'ind' file.
81
# Fail if there a sample was not found.
82
check_presence <- function(labels, data) {
83 1
  not_present <- setdiff(labels, suppressMessages(read_ind(data)$label))
84 1
  if (length(not_present) > 0) {
85 1
    stop("The following samples are not present in the data: ",
86 1
         paste(not_present, collapse = ", "), call. = FALSE)
87
  }
88
}
89

90

91

92
#' Download example SNP data.
93
#'
94
#' The data is downloaded to a temporary directory by default.
95
#' 
96
#' @param dirname Directory in which to put the data (EIGENSTRAT trio
97
#'     of snp/geno/ind files).
98
#'
99
#' @export
100
download_data <- function(dirname = tempdir()) {
101 1
  dest <- file.path(dirname, "snps.tgz")
102 1
  utils::download.file(
103 1
    "https://bioinf.eva.mpg.de/admixr/snps.tar.gz",
104 1
    destfile = dest,
105 1
    method = "wget",
106 1
    quiet = TRUE
107
  )
108 1
  system(paste0("cd ", dirname, "; tar xf ", dest, "; rm snps.tgz"))
109 1
  file.path(dirname, "snps", "snps")
110
}
111

112

113

114
# this looks incredibly hacky, but seems to be a solution to my R CMD check
115
# "missing global variable" NOTE woes caused by dplyr code (the following
116
# are not actually global variables)
117
utils::globalVariables(
118
  names = c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",
119
            "FORMAT", "chrom", "pos", "snp_id", "ref", "alt", "gen_dist",
120
            "sample_id", "name", "target", ".", "start", "end",
121
            "model", "noutgroups", "outgroups", "pattern", "pvalue"),
122
            package = "admixr")
123

124

125

126
#' Pipe operator
127
#' 
128
#' Added via usethis::use_pipe().
129
#'
130
#'
131
#' @name %>%
132
#' @rdname pipe
133
#' @keywords internal
134
#' @export
135
#' @importFrom magrittr %>%
136
#' @usage lhs \%>\% rhs
137
NULL

Read our documentation on viewing source code .

Loading