@@ -0,0 +1,194 @@
Loading
1 +
#' Complexity meta-features
2 +
#'
3 +
#' The complexity group is a set of measures to characterize the complexity of 
4 +
#' classification problems based on aspects that quantify the linearity of the 
5 +
#' data, the presence of informative feature, the sparsity and dimensionality 
6 +
#' of the datasets.
7 +
#'
8 +
#' @family meta-features
9 +
#' @param x A data.frame contained only the input attributes.
10 +
#' @param y A factor response vector with one label for each row/component of x.
11 +
#' @param features A list of features names or \code{"all"} to include all them.
12 +
#'  The supported values are described in the details section. (Default: 
13 +
#'  \code{"all"})
14 +
#' @param summary A list of summarization functions or empty for all values. See
15 +
#'  \link{post.processing} method to more information. (Default: 
16 +
#'  \code{c("mean", "sd")})
17 +
#' @param formula A formula to define the class column.
18 +
#' @param data A data.frame dataset contained the input attributes and class
19 +
#' @param ... Not used.
20 +
#' 
21 +
#' @details
22 +
#'  The following features are allowed for classification problems:
23 +
#'  \describe{
24 +
#'    \item{"C1"}{Entropy of class proportions.}
25 +
#'    \item{"C2"}{Multi-class imbalance ratio.}
26 +
#'    \item{"F1"}{Fisher's discriminant ratio.}
27 +
#'    \item{"F1v"}{The directional-vector Fisher's discriminant ratio.}
28 +
#'    \item{"F2"}{Overlapping of the per-class bounding boxes.}
29 +
#'    \item{"F3"}{Maximum individual feature efficiency.}
30 +
#'    \item{"F4"}{Collective feature efficiency.}
31 +
#'    \item{"L1"}{Distance of erroneous instances to a linear classifier.}
32 +
#'    \item{"L2"}{Training error of a linear classifier.}
33 +
#'    \item{"L3"}{Nonlinearity of a linear classifier.}
34 +
#'    \item{"LSC"}{Local-Set cardinality average.}
35 +
#'    \item{"N1"}{Fraction of points lying on the class boundary.}
36 +
#'    \item{"N2"}{Average intra/inter class nearest neighbor distances.}
37 +
#'    \item{"N3"}{Leave-one-out error rate of the 1-nearest neighbor algorithm.}
38 +
#'    \item{"N4"}{Nonlinearity of the one-nearest neighbor classifier.}
39 +
#'    \item{"T1"}{Fraction of maximum covering spheres on data.}
40 +
#'    \item{"T2"}{Average number of samples per dimension.}
41 +
#'    \item{"T3"}{Average intrinsic dimensionality per number of examples.}
42 +
#'    \item{"T4"}{Intrinsic dimensionality proportion.}
43 +
#'  }
44 +
#'  Also it is possible to ask for a subgroup of features:
45 +
#'  \describe{
46 +
#'     \item{"balance"}{Include the measures C1 and C2.}
47 +
#'     \item{"dimensionality"}{Include the measures T2, T3 and T4.}
48 +
#'     \item{"linearity"}{Include the measures L1, L2 and L3.}
49 +
#'     \item{"neighborhood"}{Include the measures N1, N2, N3, N4, T1 and LSC.}
50 +
#'     \item{"network"}{Include the measures Density, ClsCoef and Hubs.}
51 +
#'     \item{"overlapping"}{Include the measures F1, F1v, F2, F3 and F4.}
52 +
#'  }
53 +
#' @return A list named by the requested meta-features.
54 +
#'
55 +
#' @references
56 +
#'  Ana C. Lorena, Luis P. F. Garcia, Jens Lehmann, Marcilio C. P. Souto, and 
57 +
#'  Tin Kam Ho. 2019. How Complex Is Your Classification Problem?: A Survey on 
58 +
#'  Measuring Classification Complexity. ACM Comput. Surv. 52, 5.
59 +
#'
60 +
#'  Lorena, A. C., Maciel, A. I., de Miranda, P. B. C., Costa, I. G., and 
61 +
#'  Prudencio, R. B. C. (2018). Data complexity meta-features for regression 
62 +
#'  problems. Machine Learning, 107(1):209-246.
63 +
#'
64 +
#'  Ho, T., and Basu, M. (2002). Complexity measures of supervised 
65 +
#'  classification problems. IEEE Transactions on Pattern Analysis and 
66 +
#'  Machine Intelligence, 24(3):289-300.
67 +
#'
68 +
#' @examples
69 +
#' ## Extract all metafeatures
70 +
#' complexity(Species ~ ., iris)
71 +
#'
72 +
#' ## Extract some metafeatures
73 +
#' complexity(iris[30:120, 1:4], iris[30:120, 5], c("F1", "F2", "linearity"))
74 +
#' @export
75 +
complexity <- function(...) {
76 +
  UseMethod("complexity")
77 +
}
78 +
79 +
#' @rdname complexity
80 +
#' @export
81 +
complexity.default <- function(x, y, features="all", 
82 +
                               summary=c("mean", "sd"), ...) {
83 +
  if(!is.data.frame(x)) {
84 +
    stop("data argument must be a data.frame")
85 +
  }
86 +
  
87 +
  if(is.data.frame(y)) {
88 +
    y <- y[, 1]
89 +
  }
90 +
  
91 +
  y <- as.factor(y)
92 +
  if(min(table(y)) < 2) {
93 +
    stop("number of examples in the minority class should be >= 2")
94 +
  }
95 +
  
96 +
  if(nrow(x) != length(y)) {
97 +
    stop("x and y must have same number of rows")
98 +
  }
99 +
  
100 +
  if(features[1] == "all") {
101 +
    features <- ls.complexity()
102 +
  }
103 +
  
104 +
  if (any(features %in% ls.complexity.groups("class"))) {
105 +
    features <- unique(c(features, unlist(sapply(features, 
106 +
                                                 ls.complexity.groups))))
107 +
  }
108 +
  features <- match.arg(features, ls.complexity(), TRUE)
109 +
  colnames(x) <- make.names(colnames(x), unique=TRUE)
110 +
  
111 +
  groups <- names(which(sapply(ls.complexity.groups("class"), 
112 +
                 function(x) any(features %in% ls.complexity.groups(x)))))
113 +
114 +
  do.call(c, lapply(groups, function(group) {
115 +
    fmethod <- get(group, asNamespace("ECoL"))
116 +
    measures <- intersect(features, ls.complexity.groups(group))
117 +
    subgroups <- do.call(fmethod, list(x=x, y=y, measures=measures, 
118 +
                                       summary="return", ...))
119 +
    sapply(names(subgroups), function(measure){
120 +
      post.processing(subgroups[[measure]], summary, 
121 +
                      measure %in% ls.complexity.multiples(), ...)
122 +
    }, simplify = FALSE)
123 +
  }))[features]
124 +
}
125 +
126 +
#' @rdname complexity
127 +
#' @export
128 +
complexity.formula <- function(formula, data, features="all", 
129 +
                               summary=c("mean", "sd"), ...) {
130 +
  if(!inherits(formula, "formula")) {
131 +
    stop("method is only for formula datas")
132 +
  }
133 +
  
134 +
  if(!is.data.frame(data)) {
135 +
    stop("data argument must be a data.frame")
136 +
  }
137 +
  
138 +
  modFrame <- stats::model.frame(formula, data)
139 +
  attr(modFrame, "terms") <- NULL
140 +
  
141 +
  complexity.default(modFrame[-1], modFrame[1], features, summary, ...)
142 +
}
143 +
144 +
#' List the complexity meta-features
145 +
#'
146 +
#' @return A list of complexity meta-features names
147 +
#' @export
148 +
#'
149 +
#' @examples
150 +
#' ls.complexity()
151 +
ls.complexity <- function() {
152 +
  sort(unlist(lapply(ls.complexity.groups("class"), ls.complexity.groups)))
153 +
}
154 +
155 +
ls.complexity.multiples <- function() {
156 +
  c(c("F1", "F1v", "F2", "F3", "F4"), #overlapping
157 +
    c("N2", "N3", "N4", "T1"), #neighborhood
158 +
    c("L1", "L2", "L3"), #linearity
159 +
    c(), #dimensionality
160 +
    c(), #balance
161 +
    c("Hubs") #network
162 +
  )
163 +
}
164 +
165 +
ls.complexity.groups <- function(type) {
166 +
  
167 +
  switch(type,
168 +
         class = {
169 +
           c("overlapping", "neighborhood", "linearity", "dimensionality",
170 +
             "balance", "network")
171 +
         }, regr = {
172 +
           c("correlation", "linearity", 
173 +
             "smoothness", "dimensionality")
174 +
         },
175 +
         overlapping = {
176 +
           c("F1", "F1v", "F2", "F3", "F4")
177 +
         },
178 +
         neighborhood = {
179 +
           c("N1", "N2", "N3", "N4", "T1", "LSC")
180 +
         },
181 +
         linearity = {
182 +
           c("L1", "L2", "L3")
183 +
         },
184 +
         dimensionality = {
185 +
           c("T2", "T3", "T4")
186 +
         },
187 +
         balance = {
188 +
           c("C1", "C2")
189 +
         },
190 +
         network = {
191 +
           c("Density", "ClsCoef", "Hubs")
192 +
         }
193 +
  )
194 +
}

@@ -0,0 +1,186 @@
Loading
1 +
#' Concept Meta-features
2 +
#'
3 +
#' Concept characterization features measure the sparsity of the input space and 
4 +
#' the irregularity of the input-output distribution measures extract 
5 +
#' information about validation index.
6 +
#'
7 +
#' @family meta-features
8 +
#' @param x A data.frame contained only the input attributes.
9 +
#' @param y A factor response vector with one label for each row/component of x.
10 +
#' @param features A list of features names or \code{"all"} to include all them.
11 +
#' @param summary A list of summarization functions or empty for all values. See
12 +
#'  \link{post.processing} method to more information. (Default: 
13 +
#'  \code{c("mean", "sd")})
14 +
#' @param transform A logical value indicating if the categorical attributes
15 +
#'  should be transformed. If \code{FALSE} they will be ignored. (Default: 
16 +
#'  \code{TRUE})
17 +
#' @param formula A formula to define the class column.
18 +
#' @param data A data.frame dataset contained the input attributes and class.
19 +
#'  The details section describes the valid values for this group.
20 +
#' @param ... Further arguments passed to the summarization functions.
21 +
#' @details
22 +
#'  The following features are allowed for this method:
23 +
#'  \describe{
24 +
#'    \item{"cohesiveness"}{Example Cohesiveness is a different version of the
25 +
#'      wgDist measure.}
26 +
#'    \item{"conceptvar"}{Concept variation estimates the variability of class 
27 +
#'      labels among examples.}
28 +
#'    \item{"impconceptvar"}{Improved concept variation is a different version
29 +
#'       of the conceptvar measure.}
30 +
#'    \item{"wgDist"}{Weighted distance captures how dense or sparse is the 
31 +
#'      example distribution.}
32 +
#'  }
33 +
#' @return A list named by the requested meta-features.
34 +
#'
35 +
#' @references
36 +
#' Vilalta, R., & Drissi, Y. (2002). A characterization of difficult 
37 +
#'  problems in classification. In M. A. Wani, H. R. Arabnia, K. J. 
38 +
#'  Cios, K. Hafeez, G. Kendall (Eds.), Proceedings ofthe 2002 
39 +
#'  international conference on machine learning and applications - 
40 +
#'  ICMLA 2002, June 24-27, 2002, Las Vegas, Nevada (pp. 133-138).
41 +
#'  
42 +
#' Vilalta, R., 1999. Understanding accuracy performance through 
43 +
#'  concept characterization and algorithm analysis. In: ECML 
44 +
#'  Workshop on Recent Advances in Meta-Learning and Future Work. 
45 +
#'  pp. 3-9.
46 +
#'
47 +
#' @examples
48 +
#' ## Extract all meta-features using formula
49 +
#' concept(Species ~ ., iris)
50 +
#'
51 +
#' ## Extract some meta-features
52 +
#' concept(iris[1:4], iris[5], c("conceptvar"))
53 +
#'
54 +
#' ## Use another summarization function
55 +
#' concept(Species ~ ., iris, summary=c("min", "median", "max"))
56 +
#' @export
57 +
concept <- function(...) {
58 +
  UseMethod("concept")
59 +
}
60 +
61 +
#' @rdname concept
62 +
#' @export
63 +
concept.default <- function(x, y, features="all",
64 +
                               summary=c("mean", "sd"),
65 +
                               transform=TRUE, ...) {
66 +
  if(!is.data.frame(x)) {
67 +
    stop("data argument must be a data.frame")
68 +
  }
69 +
70 +
  if(is.data.frame(y)) {
71 +
    y <- y[, 1]
72 +
  }
73 +
  y <- as.factor(y)
74 +
75 +
  if(min(table(y)) < 2) {
76 +
    stop("number of examples in the minority class should be >= 2")
77 +
  }
78 +
79 +
  if(nrow(x) != length(y)) {
80 +
    stop("x and y must have same number of rows")
81 +
  }
82 +
83 +
  if(features[1] == "all") {
84 +
    features <- ls.concept()
85 +
  }
86 +
  features <- match.arg(features, ls.concept(), TRUE)
87 +
  colnames(x) <- make.names(colnames(x), unique=TRUE)
88 +
89 +
  if (length(summary) == 0) {
90 +
    summary <- "non.aggregated"
91 +
  }
92 +
93 +
  if(transform) {
94 +
    x <- binarize(x)
95 +
  } else {
96 +
    x <- x[sapply(x, is.numeric)]
97 +
  }
98 +
99 +
  x <- as.matrix(x)
100 +
  y <- as.integer(y)
101 +
102 +
  alpha <- 1 #Hyperparameter
103 +
  nfs <- apply(x, 2, function(col) max(col) - min(col))
104 +
  d <- apply(x, 1, function(row) sqrt(rowSums(t(row - t(x))/nfs) ^2))
105 +
  
106 +
  sapply(features, function(f) {
107 +
    fn <- paste("m", f, sep=".")
108 +
    measure <- eval(call(fn, x=x, y=y, d=d, alpha=alpha))
109 +
    post.processing(measure, summary, f %in% ls.concept.multiples(), ...)
110 +
  }, simplify=FALSE)
111 +
}
112 +
113 +
#' @rdname concept
114 +
#' @export
115 +
concept.formula <- function(formula, data, features="all",
116 +
                                   summary=c("mean", "sd"),
117 +
                                   transform=TRUE, ...) {
118 +
  if(!inherits(formula, "formula")) {
119 +
    stop("method is only for formula datas")
120 +
  }
121 +
122 +
  if(!is.data.frame(data)) {
123 +
    stop("data argument must be a data.frame")
124 +
  }
125 +
126 +
  modFrame <- stats::model.frame(formula, data)
127 +
  attr(modFrame, "terms") <- NULL
128 +
129 +
  concept.default(modFrame[-1], modFrame[1], features, summary, transform, 
130 +
    ...)
131 +
}
132 +
133 +
#' List the best concept meta-features
134 +
#'
135 +
#' @return A list of concept meta-features names.
136 +
#' @export
137 +
#'
138 +
#' @examples
139 +
#' ls.concept()
140 +
ls.concept <- function() {
141 +
  c("cohesiveness", "conceptvar", "impconceptvar", "wgDist")
142 +
}
143 +
144 +
ls.concept.multiples <- function() {
145 +
  ls.concept()
146 +
}
147 +
148 +
m.cohesiveness <- function(y, d, alpha, ...) {
149 +
  cls <- sapply(y, function(yr) yr != y)
150 +
  sapply(seq(ncol(d)), function(i){
151 +
    row <- cls[i,-i]
152 +
    radius <- ceiling(d[i,-i])
153 +
    radius[radius == 0] <- 1
154 +
    sum((1/2^(alpha*unique(radius))) * table(radius))
155 +
  })
156 +
}
157 +
158 +
m.conceptvar <- function(x, y, d, alpha, ...) {
159 +
  sn <- sqrt(ncol(x))
160 +
  W <- 1 / (2 ^ (alpha * (d/(sn-d))))
161 +
  diag(W) <- 0
162 +
  W[is.infinite(W)] <- 0 #TODO Think better what to do with these cases
163 +
  
164 +
  rowSums(W * sapply(y, function(yr) yr != y)) / rowSums(W)
165 +
}
166 +
167 +
m.impconceptvar <- function(y, d, alpha=1, ...) {
168 +
  cls <- sapply(y, function(yr) yr != y)
169 +
  sapply(seq(ncol(d)), function(i){
170 +
    row <- cls[i,-i]
171 +
    radius <- ceiling(d[i,-i])
172 +
    radius[radius == 0] <- 1
173 +
    sum(sapply(unique(radius), function(r){
174 +
      mean(row[radius == r]) * (1/2^(alpha*r))
175 +
    }))
176 +
  })
177 +
}
178 +
179 +
m.wgDist <- function(x, d, alpha, ...) {
180 +
  sn <- sqrt(ncol(x))
181 +
  W <- 1 / (2 ^ (alpha * (d/(sn-d))))
182 +
  diag(W) <- 0
183 +
  W[is.infinite(W)] <- 0 #TODO Think better what to do with these cases
184 +
  
185 +
  rowSums(W * d) / rowSums(W)
186 +
}

@@ -0,0 +1,154 @@
Loading
1 +
#' Itemset Meta-features
2 +
#'
3 +
#' Itemset characterization features measure measure the distribution of values 
4 +
#' of both single attributes and pairs of attributes.
5 +
#'
6 +
#' @family meta-features
7 +
#' @param x A data.frame contained only the input attributes.
8 +
#' @param y A factor response vector with one label for each row/component of x.
9 +
#' @param features A list of features names or \code{"all"} to include all them.
10 +
#' @param summary A list of summarization functions or empty for all values. See
11 +
#'  \link{post.processing} method to more information. (Default: 
12 +
#'  \code{c("mean", "sd")})
13 +
#' @param formula A formula to define the class column.
14 +
#' @param data A data.frame dataset contained the input attributes and class.
15 +
#'  The details section describes the valid values for this group.
16 +
#' @param ... Further arguments passed to the summarization functions.
17 +
#' @details
18 +
#'  The following features are allowed for this method:
19 +
#'  \describe{
20 +
#'    \item{"oneitemset"}{Individual frequency of each attributes' value.}
21 +
#'    \item{"twoitemset"}{Correlation information of the two attributes' 
22 +
#'      value pairs.}
23 +
#'    \item{"clssitemset"}{It is a two itemset computed using a predictive 
24 +
#'      attribute and the target.}
25 +
#'  }
26 +
#' @return A list named by the requested meta-features.
27 +
#'
28 +
#' @references
29 +
#' Song, Q., Wang, G., & Wang, C. (2012). Automatic recommendation of 
30 +
#'   classification algorithms based on data set characteristics. Pattern 
31 +
#'   Recognition, 45(7), 2672-2689.
32 +
#'   
33 +
#' Wang, G., Song, Q., & Zhu, X. (2015). An improved data characterization 
34 +
#'   method and its application in classification algorithm recommendation. 
35 +
#'   Applied Intelligence, 43(4), 892-912.
36 +
#'
37 +
#' @examples
38 +
#' ## Extract all meta-features using formula
39 +
#' itemset(Species ~ ., iris)
40 +
#'
41 +
#' ## Extract some meta-features
42 +
#' itemset(iris[1:4], iris[5], c("oneitemset"))
43 +
#'
44 +
#' ## Use another summarization function
45 +
#' itemset(Species ~ ., iris, summary=c("min", "median", "max"))
46 +
#' @export
47 +
itemset <- function(...) {
48 +
  UseMethod("itemset")
49 +
}
50 +
51 +
#' @rdname itemset
52 +
#' @export
53 +
itemset.default <- function(x, y, features="all",
54 +
                               summary=c("mean", "sd"),
55 +
                               ...) {
56 +
  if(!is.data.frame(x)) {
57 +
    stop("data argument must be a data.frame")
58 +
  }
59 +
60 +
  if(is.data.frame(y)) {
61 +
    y <- y[, 1]
62 +
  }
63 +
  y <- as.factor(y)
64 +
65 +
  if(min(table(y)) < 2) {
66 +
    stop("number of examples in the minority class should be >= 2")
67 +
  }
68 +
69 +
  if(nrow(x) != length(y)) {
70 +
    stop("x and y must have same number of rows")
71 +
  }
72 +
73 +
  if(features[1] == "all") {
74 +
    features <- ls.itemset()
75 +
  }
76 +
  features <- match.arg(features, ls.itemset(), TRUE)
77 +
  colnames(x) <- make.names(colnames(x), unique=TRUE)
78 +
79 +
  if (length(summary) == 0) {
80 +
    summary <- "non.aggregated"
81 +
  }
82 +
83 +
  x <- categorize(x)
84 +
  
85 +
  sapply(features, function(f) {
86 +
    fn <- paste("m", f, sep=".")
87 +
    measure <- eval(call(fn, x=x, y=y))
88 +
    post.processing(measure, summary, f %in% ls.itemset.multiples(), ...)
89 +
  }, simplify=FALSE)
90 +
}
91 +
92 +
#' @rdname itemset
93 +
#' @export
94 +
itemset.formula <- function(formula, data, features="all",
95 +
                                   summary=c("mean", "sd"),
96 +
                                   ...) {
97 +
  if(!inherits(formula, "formula")) {
98 +
    stop("method is only for formula datas")
99 +
  }
100 +
101 +
  if(!is.data.frame(data)) {
102 +
    stop("data argument must be a data.frame")
103 +
  }
104 +
105 +
  modFrame <- stats::model.frame(formula, data)
106 +
  attr(modFrame, "terms") <- NULL
107 +
108 +
  itemset.default(modFrame[-1], modFrame[1], features, summary, ...)
109 +
}
110 +
111 +
#' List the itemset meta-features
112 +
#'
113 +
#' @return A list of itemset meta-features names.
114 +
#' @export
115 +
#'
116 +
#' @examples
117 +
#' ls.itemset()
118 +
ls.itemset <- function() {
119 +
  c("classitemset", "oneitemset", "twoitemset")
120 +
}
121 +
122 +
ls.itemset.multiples <- function() {
123 +
  ls.itemset()
124 +
}
125 +
126 +
m.oneitemset <- function(x, ...) {
127 +
  unlist(c(apply(x, 2, table)))/nrow(x)
128 +
}
129 +
130 +
m.twoitemset <- function(x, ...) {
131 +
  unlist(c(apply(utils::combn(seq(ncol(x)), 2), 2, function(pair){
132 +
    v1 <- table(x[,as.numeric(pair[1])])/nrow(x)
133 +
    v2 <- table(x[,as.numeric(pair[2])])/nrow(x)
134 +
    
135 +
    v12 <- table(apply(x[,as.numeric(pair)], 1, paste, collapse='_'))/nrow(x)
136 +
    
137 +
    apply(expand.grid(names(v1), names(v2)), 1, function(twop){
138 +
      val <- v12[paste(twop, collapse='_')]
139 +
      (v1[twop[1]] + v2[twop[2]]) - ifelse(is.na(val), 0, 2*val)
140 +
    })
141 +
  })))
142 +
}
143 +
144 +
m.classitemset <- function(x, y, ...) {
145 +
  v2 <- table(y)/nrow(x)
146 +
  unlist(c(apply(x, 2, function(col){
147 +
    v1 <- table(col)/nrow(x)
148 +
    v12 <- table(paste(col, y, sep='_'))/nrow(x)
149 +
    apply(expand.grid(names(v1), names(v2)), 1, function(twop){
150 +
      val <- v12[paste(twop, collapse='_')]
151 +
      (v1[twop[1]] + v2[twop[2]]) - ifelse(is.na(val), 0, 2*val)
152 +
    })
153 +
  })))
154 +
}

@@ -5,8 +5,9 @@
Loading
5 5
#'
6 6
#' @param x A data.frame contained only the input attributes.
7 7
#' @param y A factor response vector with one label for each row/component of x.
8 -
#' @param groups A list of meta-features groups or \code{"all"} to include all
9 -
#'  them. The details section describes the valid values for this parameter.
8 +
#' @param groups A list of meta-features groups, \code{"default"} for traditional
9 +
#'  groups of meta-features or \code{"all"} to include all them. The details 
10 +
#'  section describes the valid values for this parameter.
10 11
#' @param summary A list of summarization functions or empty for all values. See
11 12
#'  \link{post.processing} method to more information. (Default: 
12 13
#'  \code{c("mean", "sd")})
@@ -29,6 +30,12 @@
Loading
29 30
#'      \link{statistical} for more details.}
30 31
#'    \item{"clustering"}{Include all clustering meta-features. See
31 32
#'      \link{clustering} for more details.}
33 +
#'    \item{"complexity"}{Include all complexity meta-features. See
34 +
#'      \link{complexity} for more details.}
35 +
#'    \item{"concept"}{Include all concept variation meta-features. See
36 +
#'      \link{concept} for more details.}
37 +
#'    \item{"itemset"}{Include all itemset meta-features. See
38 +
#'      \link{itemset} for more details.}
32 39
#'  }
33 40
#'
34 41
#' @return A numeric vector named by the meta-features from the specified 
@@ -50,7 +57,7 @@
Loading
50 57
51 58
#' @rdname metafeatures
52 59
#' @export
53 -
metafeatures.default <- function(x, y, groups="all",
60 +
metafeatures.default <- function(x, y, groups="default",
54 61
                                 summary=c("mean", "sd"), ...) {
55 62
  if(!is.data.frame(x)) {
56 63
    stop("data argument must be a data.frame")
@@ -68,6 +75,9 @@
Loading
68 75
  if(groups[1] == "all") {
69 76
    groups <- ls.metafeatures()
70 77
  }
78 +
  else if(groups[1] == "default") {
79 +
    groups <- c("general", "statistical", "infotheo", "model.based", "landmarking")
80 +
  }
71 81
  groups <- match.arg(groups, ls.metafeatures(), TRUE)
72 82
  
73 83
  if (length(summary) == 0) {
@@ -82,7 +92,7 @@
Loading
82 92
83 93
#' @rdname metafeatures
84 94
#' @export
85 -
metafeatures.formula <- function(formula, data, groups="all",
95 +
metafeatures.formula <- function(formula, data, groups="default",
86 96
                                 summary=c("mean", "sd"), ...) {
87 97
  if(!inherits(formula, "formula")) {
88 98
    stop("method is only for formula datas")
@@ -107,5 +117,5 @@
Loading
107 117
#' ls.metafeatures()
108 118
ls.metafeatures <- function() {
109 119
  c("general", "statistical", "infotheo", "model.based", "landmarking", 
110 -
    "relative", "clustering")
120 +
    "relative", "clustering", "complexity", "concept", "itemset")
111 121
}
Files Coverage
R 90.04%
Project Totals (13 files) 90.04%

No yaml found.

Create your codecov.yml to customize your Codecov experience

Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading