For more details: https://ropensci.org/technotes/2019/06/07/ropensci-docs/
1 |
#' @include ccTable.R
|
|
2 |
ccTable$methods( |
|
3 |
get_missingness = function() { |
|
4 | 1 |
miss_count <- function(tb_) { |
5 | 1 |
cmplt <- function(vec) { |
6 | 1 |
length(which(vec!="NA"))/length(vec) * 100 |
7 |
}
|
|
8 | 1 |
items_ <- names(tb_)[!names(tb_) %in% c("site", "episode_id", "time")] |
9 | 1 |
flags <- tb_[, cmplt(.SD[[items_[1]]]), .(site, episode_id)] |
10 | 1 |
setnames(flags, c('site', 'episode_id', items_[1])) |
11 | 1 |
flags |
12 |
}
|
|
13 |
|
|
14 | 1 |
.self$dquality[['missingness']] <- .self$torigin[, 1, by=c("site", "episode_id")] |
15 | 1 |
.self$dquality[['missingness']][, V1:=NULL] |
16 | 1 |
setkey(.self$dquality[['missingness']], site, episode_id) |
17 |
|
|
18 | 1 |
for (i in names(.self$conf)) { |
19 | 1 |
missconf <- .self$conf[[i]][["missingness"]][["labels"]] |
20 | 1 |
if(!is.null(missconf)) { |
21 | 1 |
for (c in seq(missconf)) { |
22 | 1 |
col_name <- names(missconf[c]) |
23 | 1 |
colr <- missconf[[c]] |
24 | 1 |
tbq <- ccd_select_table(.self$record, items_opt=i, freq=colr) |
25 | 1 |
setkey(tbq, episode_id, site) |
26 | 1 |
oldnm <- names(.self$dquality[['missingness']]) |
27 | 1 |
.self$dquality[['missingness']] <- |
28 | 1 |
merge(.self$dquality[['missingness']], miss_count(tbq)) |
29 | 1 |
setnames(.self$dquality[['missingness']], c(oldnm, paste(i, col_name, sep="."))) |
30 |
}
|
|
31 |
}
|
|
32 |
}
|
|
33 |
})
|
|
34 |
|
|
35 |
|
|
36 |
#' Data missing filter
|
|
37 |
#'
|
|
38 |
#' Deal with data when insufficient data points are supported. There are
|
|
39 |
#' two key items to be set in the YAML configuration file.
|
|
40 |
#' 1) labels -- time interval. 2) accept_2d -- the accept present ratio.
|
|
41 |
#' So if we set the labels is 24, and accept_2d is 70. It means we accept
|
|
42 |
#' all the missing rate that is lower than 30% every 24 data points.
|
|
43 |
#' @name ccTable_filter_missingness
|
|
44 |
#' @param recount logical value. Recount the missingness if TRUE.
|
|
45 |
NULL
|
|
46 |
ccTable$methods( |
|
47 |
filter_missingness = function(recount=FALSE){ |
|
48 | 1 |
"filter out the where missingness is too low."
|
49 | 1 |
if (recount || is.null(.self$dquality[['missingness']]) || |
50 | 1 |
nrow(.self$dquality[['missingness']]) == 0) |
51 | 1 |
.self$get_missingness() |
52 |
|
|
53 | 1 |
if (is.null(.self$tclean) || nrow(.self$tclean) == 0) |
54 | 1 |
.self$tclean <- .self$torigin |
55 |
|
|
56 | 1 |
thresholds <-
|
57 | 1 |
unlist(lapply(.self$conf, |
58 | 1 |
function(x) x[["missingness"]][["accept_2d"]])) |
59 |
|
|
60 | 1 |
select_index <- rep(TRUE, nrow(.self$dquality[['missingness']])) |
61 | 1 |
for (nt in names(thresholds)) |
62 | 1 |
select_index <-
|
63 | 1 |
select_index & as.vector(.self$dquality[['missingness']][, nt, with=FALSE] > thresholds[nt]) |
64 |
|
|
65 | 1 |
.self$dfilter$missingness <- list() |
66 | 1 |
.self$dfilter$missingness$episode <- |
67 | 1 |
data.table(.self$dquality$missingness[, c('site', 'episode_id'), |
68 | 1 |
with=FALSE], select_index) |
69 |
})
|
|
70 |
|
|
71 |
#' No data filter
|
|
72 |
#'
|
|
73 |
#' Remove the episode when a particular field is not presented.
|
|
74 |
#' It need to be set up in the YAML configuration file.
|
|
75 |
#' @name ccTable_filter_nodata
|
|
76 |
NULL
|
|
77 |
ccTable$methods( |
|
78 |
filter_nodata = function() { |
|
79 | 1 |
"Exclude episodes when no data is presented in certain fields"
|
80 | 1 |
data <- .self$get.data.column("nodata") |
81 | 1 |
nodata <- function(x, ...) { |
82 | 1 |
!all(x %in% c("NA", NA)) |
83 |
}
|
|
84 | 1 |
.self$dfilter$nodata <- getfilter(data, nodata) |
85 |
}
|
|
86 |
)
|
Read our documentation on viewing source code .