1
#' @include ccTable.R
2
ccTable$methods(
3
    get_missingness = function() {
4 1
        miss_count <- function(tb_) { 
5 1
            cmplt <- function(vec) {
6 1
                length(which(vec!="NA"))/length(vec) * 100 
7
            }
8 1
            items_ <- names(tb_)[!names(tb_) %in% c("site", "episode_id", "time")]
9 1
            flags <- tb_[, cmplt(.SD[[items_[1]]]), .(site, episode_id)]
10 1
            setnames(flags, c('site', 'episode_id', items_[1]))
11 1
            flags
12
        }
13

14 1
        .self$dquality[['missingness']] <- .self$torigin[, 1, by=c("site", "episode_id")]
15 1
        .self$dquality[['missingness']][, V1:=NULL]
16 1
        setkey(.self$dquality[['missingness']], site, episode_id)
17

18 1
        for (i in names(.self$conf)) {
19 1
            missconf <- .self$conf[[i]][["missingness"]][["labels"]]
20 1
            if(!is.null(missconf)) {
21 1
                for (c in seq(missconf)) {
22 1
                    col_name <- names(missconf[c])
23 1
                    colr <- missconf[[c]]
24 1
                    tbq <- ccd_select_table(.self$record, items_opt=i, freq=colr)
25 1
                    setkey(tbq, episode_id, site)
26 1
                    oldnm <- names(.self$dquality[['missingness']])
27 1
                    .self$dquality[['missingness']] <- 
28 1
                        merge(.self$dquality[['missingness']], miss_count(tbq))
29 1
                    setnames(.self$dquality[['missingness']], c(oldnm, paste(i, col_name, sep=".")))
30
                }
31
            }
32
        }    
33
})
34

35

36
#' Data missing filter
37
#'
38
#' Deal with data when insufficient data points are supported. There are 
39
#' two key items to be set in the YAML configuration file. 
40
#' 1) labels -- time interval. 2) accept_2d -- the accept present ratio. 
41
#' So if we set the labels is 24, and accept_2d is 70. It means we accept 
42
#' all the missing rate that is lower than 30% every 24 data points.
43
#' @name ccTable_filter_missingness
44
#' @param recount logical value. Recount the missingness if TRUE. 
45
NULL
46
ccTable$methods(
47
    filter_missingness = function(recount=FALSE){
48 1
        "filter out the where missingness is too low."
49 1
        if (recount || is.null(.self$dquality[['missingness']]) ||
50 1
            nrow(.self$dquality[['missingness']]) == 0)
51 1
            .self$get_missingness()
52

53 1
        if (is.null(.self$tclean) || nrow(.self$tclean) == 0)
54 1
            .self$tclean <- .self$torigin
55

56 1
        thresholds <- 
57 1
            unlist(lapply(.self$conf, 
58 1
                          function(x) x[["missingness"]][["accept_2d"]]))
59

60 1
        select_index <- rep(TRUE, nrow(.self$dquality[['missingness']]))
61 1
        for (nt in names(thresholds))
62 1
            select_index <- 
63 1
                select_index & as.vector(.self$dquality[['missingness']][, nt, with=FALSE] > thresholds[nt])
64
        
65 1
        .self$dfilter$missingness <- list()
66 1
        .self$dfilter$missingness$episode <-
67 1
            data.table(.self$dquality$missingness[, c('site', 'episode_id'),
68 1
                       with=FALSE], select_index)
69
})
70

71
#' No data filter
72
#' 
73
#' Remove the episode when a particular field is not presented.
74
#' It need to be set up in the YAML configuration file. 
75
#' @name ccTable_filter_nodata
76
NULL
77
ccTable$methods(
78
    filter_nodata = function() {
79 1
        "Exclude episodes when no data is presented in certain fields"
80 1
        data <- .self$get.data.column("nodata")
81 1
        nodata <- function(x, ...) {
82 1
            !all(x %in% c("NA", NA))
83
        }
84 1
        .self$dfilter$nodata <- getfilter(data, nodata)
85
    }
86
)

Read our documentation on viewing source code .

Loading