1
#' (faster) Linear SVM classifier for texts
2
#'
3
#' Fit a fast linear SVM classifier for sparse text matrices, using svmlin C++
4
#' code written by Vikas Sindhwani and S. Sathiya Keerthi.  This method
5
#' implements the modified finite Newton L2-SVM method (L2-SVM-MFN) method
6
#' described in Sindhwani and Keerthi (2006). Currently,
7
#' `textmodel_svmlin()` only works for two-class problems.
8
#'
9
#' @param x the [dfm] on which the model will be fit.  Does not need to contain
10
#'   only the training documents.
11
#' @param y vector of training labels associated with each document identified
12
#'   in `train`.  (These will be converted to factors if not already factors.)
13
#' @param intercept logical; if `TRUE`, add an intercept to the data
14
#' @param lambda numeric; regularization parameter lambda (default 1)
15
#' @param cp numeric; Relative cost for "positive" examples (the second factor
16
#'   level)
17
#' @param cn numeric; Relative cost for "negative" examples (the first factor
18
#'   level)
19
#' @param scale	logical; if `TRUE`, normalize the feature counts
20
#' @param center logical; if `TRUE`, centre the feature counts
21
#' @return a fitted model object of class `textmodel_svmlin`
22
#' @references
23
#' Vikas Sindhwani and S. Sathiya Keerthi (2006).  [Large Scale Semi-supervised
24
#' Linear SVMs](https://vikas.sindhwani.org/sk_sigir06.pdf). *Proceedings of ACM
25
#' SIGIR*. August 6–11, 2006, Seattle.
26
#'
27
#' V. Sindhwani and S. Sathiya Keerthi (2006).  Newton Methods for Fast Solution
28
#' of Semi-supervised Linear SVMs. Book Chapter in *Large Scale Kernel
29
#' Machines*, MIT Press, 2006.
30
#'
31
#' @seealso [predict.textmodel_svmlin()]
32
#' @examples
33
#' # use Lenihan for govt class and Bruton for opposition
34
#' quanteda::docvars(data_corpus_irishbudget2010, "govtopp") <-
35
#'     c("Govt", "Opp", rep(NA, 12))
36
#' dfmat <- quanteda::dfm(data_corpus_irishbudget2010)
37
#'
38
#' tmod <- textmodel_svmlin(dfmat, y = quanteda::docvars(dfmat, "govtopp"))
39
#' predict(tmod)
40
#' @importFrom quanteda dfm_group as.dfm
41
#' @importFrom stats na.omit predict
42
#' @keywords textmodel
43
#' @export
44
textmodel_svmlin <- function(x, y, intercept = TRUE, # x_u = NULL,
45
                             lambda = 1,
46
                             # algorithm = 1,
47
                             # lambda_u = 1, max_switch = 10000, # pos_frac = 0.5,
48
                             cp = 1, cn = 1,
49
                             scale = FALSE, center = FALSE) {
50 1
    UseMethod("textmodel_svmlin")
51
}
52

53
#' @export
54
textmodel_svmlin.default <-  function(x, y, intercept = TRUE, # x_u = NULL,
55
                                      lambda = 1,
56
                                      # algorithm = 1,
57
                                      # lambda_u = 1, max_switch = 10000, # pos_frac = 0.5,
58
                                      cp = 1, cn = 1,
59
                                      scale = FALSE, center = FALSE) {
60 0
  stop(friendly_class_undefined_message(class(x), "textmodel_svmlin"))
61
}
62

63
#' @export
64
#' @importFrom methods as
65
textmodel_svmlin.dfm <-  function(x, y, intercept = TRUE, # x_u = NULL,
66
                                  lambda = 1,
67
                                  # algorithm = 1,
68
                                  # lambda_u = 1, max_switch = 10000, # pos_frac = 0.5,
69
                                  cp = 1, cn = 1,
70
                                  scale = FALSE, center = FALSE) {
71 1
    x <- as.dfm(x)
72 0
    if (!sum(x)) stop(message_error("dfm_empty"))
73 1
    call <- match.call()
74

75 1
    y <- factor(y)
76 1
    if (length(levels(y)) != 2) stop("y must contain two values only")
77

78 1
    temp <- x[!is.na(y), ]
79 1
    class <- y[!is.na(y)]
80 1
    temp <- dfm_group(temp, class, force = TRUE)
81

82 1
    svmlinfitted <- svmlin(X = as(temp, "dgCMatrix"), #X_u = x_u,
83 1
                           y = factor(docnames(temp), levels = docnames(temp)),
84 1
                           intercept = intercept,
85 1
                           algorithm = 1, lambda = lambda,
86
                           # lambda_u = lambda_u,
87
                           # max_switch = max_switch,
88
                           # pos_frac = pos_frac,
89 1
                           Cp = cp, Cn = cn, verbose = FALSE,
90 1
                           scale = scale, x_center = center)
91

92 1
    result <- list(
93 1
        x = x, y = y,
94 1
        weights = svmlinfitted$weights,
95
        # algorithm = factor(svmlinfitted$algorithm, levels = 0:3,
96
        #                    labels = c("Regularized Least Squares Classification",
97
        #                               "SVM",
98
        #                               "Multi-switch Transductive SVM",
99
        #                               "Deterministic Annealing Semi-supervised SVM")),
100
        # classnames = svmlinfitted$classnames,
101 1
        intercept = intercept,
102 1
        call = call
103
    )
104 1
    weightnames <- featnames(temp)
105 1
    if (intercept) weightnames <- c("intercept", weightnames)
106 1
    names(result$weights) <- weightnames
107 1
    class(result) <- c("textmodel_svmlin", "textmodel", "list")
108 1
    result
109
}
110

111
# helper methods ----------------
112

113
#' Prediction from a fitted textmodel_svmlin object
114
#'
115
#' `predict.textmodel_svmlin()` implements class predictions from a fitted
116
#' linear SVM model.
117
#' @param object a fitted linear SVM textmodel
118
#' @param newdata dfm on which prediction should be made
119
#' @param type the type of predicted values to be returned; see Value
120
#' @param force logical, if `TRUE`, make newdata's feature set conformant to the
121
#'   model terms
122
#' @param ... not used
123
#' @return `predict.textmodel_svmlin` returns either a vector of class
124
#'   predictions for each row of `newdata` (when `type = "class"`), or
125
#'   a document-by-class matrix of class probabilities (when `type =
126
#'   "probability"`).
127
#' @seealso [textmodel_svmlin()]
128
#' @importFrom stats predict
129
#' @method predict textmodel_svmlin
130
#' @keywords textmodel internal
131
#' @export
132
predict.textmodel_svmlin <- function(object, newdata = NULL,
133
                                  type = c("class", "probability"),
134
                                  force = FALSE, ...) {
135 1
    unused_dots(...)
136

137 1
    type <- match.arg(type)
138

139 1
    if (!is.null(newdata)) {
140 0
        data <- as.dfm(newdata)
141
    } else {
142 1
        data <- as.dfm(object$x)
143
    }
144

145 1
    if (object$intercept) {
146 1
        data <- cbind(1, data)
147 1
        colnames(data)[1] <- "intercept"
148
    }
149 1
    data <- force_conformance(data, names(object$weights), force)
150

151 1
    pred_y <- as.numeric(data %*% object$weights)
152 1
    names(pred_y) <- docnames(data)
153

154 1
    classnames <- levels(object$y)
155 1
    if (type == "class") {
156 1
        pred_y <- ifelse(pred_y < 0, classnames[1], classnames[2])
157 0
    } else if (type == "probability") {
158 0
        stop("probability type not implemented yet")
159
    }
160

161 1
    pred_y
162
}
163

164
#' @export
165
#' @method print textmodel_svmlin
166
print.textmodel_svmlin <- function(x, ...) {
167 1
    cat("\nCall:\n")
168 1
    print(x$call)
169 1
    cat("\n",
170 1
        length(na.omit(x$y)), " training documents; ",
171 1
        nfeat(na.omit(x)), " fitted features.",
172 1
        "\n",
173 1
        "Method: ", x$algorithm, "\n",
174 1
        sep = "")
175
}
176

177
#' summary method for textmodel_svmlin objects
178
#' @param object output from [textmodel_svmlin()]
179
#' @param n how many coefficients to print before truncating
180
#' @param ... additional arguments not used
181
#' @keywords textmodel internal
182
#' @method summary textmodel_svmlin
183
#' @importFrom utils head
184
#' @export
185
summary.textmodel_svmlin <- function(object, n = 30, ...) {
186 1
    result <- list(
187 1
        "call" = object$call,
188 1
        "estimated.feature.scores" = as.coefficients_textmodel(head(coef(object), n))
189
    )
190 1
    as.summary.textmodel(result)
191
}
192

193
#' @noRd
194
#' @method coef textmodel_svmlin
195
#' @export
196
coef.textmodel_svmlin <- function(object, ...) {
197 1
    object$weights
198
}
199

200
#' @noRd
201
#' @method coefficients textmodel_svmlin
202
#' @export
203
coefficients.textmodel_svmlin <- function(object, ...) {
204 0
    UseMethod("coef")
205
}
206

207
#' @export
208
#' @method print predict.textmodel_svmlin
209
print.predict.textmodel_svmlin <- function(x, ...) {
210 0
    print(unclass(x))
211
}
212

213

214

215
# adapted from RSSL -----------------
216

217
svmlin <- function(X, y, X_u = NULL, algorithm = 1, lambda = 1, lambda_u = 1, max_switch = 10000,
218
                   pos_frac = 0.5, Cp = 1.0, Cn = 1.0, verbose = FALSE,
219
                   intercept = TRUE, scale = FALSE, x_center = FALSE) {
220

221 1
  use_Xu_for_scaling <- TRUE
222 1
  classnames <- levels(y)
223

224 1
    if (scale || x_center) {
225 0
        if (intercept) {
226 0
            cols <- 2:ncol(X) # do not scale the intercept column
227
        } else {
228 0
            cols <- 1:ncol(X)
229
        }
230

231 0
        if (!is.null(X_u) && use_Xu_for_scaling) {
232 0
            Xe <- rbind(X, X_u)
233 0
            scaling <- scaleMatrix(Xe[, cols, drop = FALSE], center = TRUE, scale = scale)
234 0
            X[, cols] <- predict(scaling, as.matrix(X[, cols, drop=FALSE]))
235 0
            X_u[, cols] <- predict(scaling, as.matrix(X_u[, cols, drop=FALSE]))
236
        } else {
237 0
            scaling <- scaleMatrix(X[, cols, drop = FALSE], center = TRUE, scale = scale)
238 0
            X[, cols] <- predict(scaling, as.matrix(X[, cols, drop=FALSE]))
239 0
            if (!is.null(X_u)) {
240 0
                X_u[, cols] <- predict(scaling,as.matrix(X_u[, cols, drop=FALSE]))
241
            }
242
        }
243
    } else {
244 1
        scaling = NULL
245
    }
246

247 1
    y <- as.numeric(y) * 2 - 3
248

249
    # Combine feature matrices, add intercept and transpose them to conform to the C++ datastructure
250 1
    if (is.null(X_u) || algorithm < 2) {
251 1
        if (intercept) {
252 1
            X <- cbind(X, 1)
253 1
            X_u <- cbind(X_u, 1)
254
        }
255 1
        Xall <- Matrix::t(X)
256
    } else {
257 0
        if (intercept) {
258 0
            X <- cbind(X, 1)
259 0
            X_u <- cbind(X_u, 1)
260
        }
261 0
        Xall <- Matrix::t(Matrix::rbind2(X, X_u))
262 0
        y <- c(y, rep(0,nrow(X_u)))
263
    }
264

265
    # Determine costs
266 1
    costs <- rep(1, ncol(Xall))
267 1
    if (algorithm < 1) {
268 0
        costs[y < 0] <- Cn
269 0
        costs[y > 0] <- Cp
270
    }
271

272 1
    res <- svmlin_rcpp(X = Xall, y = y, l = nrow(X), algorithm = algorithm,
273 1
                       lambda = lambda, lambda_u = lambda_u, max_switch = max_switch,
274 1
                       pos_frac = pos_frac, Cp = Cp, Cn = Cn, costs = costs,
275 1
                       verbose = verbose)
276

277 1
    list(classnames = classnames,
278 1
         weights = res$Weights,
279 1
         algorithm = algorithm,
280 1
         scaling = scaling,
281 1
         intercept = intercept)
282
}
283

284
setClass("scaleMatrix",
285
         representation(mean = "ANY", scale = "ANY"))
286

287
scaleMatrix <- function(x, center = TRUE, scale = TRUE) {
288 0
  if (center) {
289 0
    mean <- quanteda::colMeans(x)
290 0
    x <- sweep(x, 2, mean)
291
  } else {
292 0
    mean <- NULL
293
  }
294

295 0
  if (scale) {
296 0
    scale <- sqrt(colSums(sweep(x, 2, quanteda::colMeans(x)) ^ 2) / (nrow(x) - 1))
297 0
    x <- sweep(x, 2, scale, "/")
298
  } else {
299 0
    scale <- NULL
300
  }
301

302 0
  object <- new(Class="scaleMatrix", mean = mean, scale = scale)
303 0
  return(object)
304
}
305

306
setMethod("predict", signature=c("scaleMatrix"), function(object, newdata, ...) {
307 0
    if (!is.matrix(newdata)) stop("Incorrect newdata")
308 0
    if (!is.null(object@mean)) {
309 0
        newdata <- sweep(newdata, 2, object@mean)
310
    }
311 0
    if (!is.null(object@scale)) {
312 0
        newdata <- sweep(newdata, 2, object@scale, "/")
313
    }
314 0
    return(newdata)
315
})

Read our documentation on viewing source code .

Loading