Re-implement textmodel_svmlim() without dependencies
1 |
/* Copyright 2006 Vikas Sindhwani (vikass@cs.uchicago.edu)
|
|
2 |
SVM-lin: Fast SVM Solvers for Supervised and Semi-supervised Learning
|
|
3 |
|
|
4 |
This file is part of SVM-lin.
|
|
5 |
|
|
6 |
SVM-lin is free software; you can redistribute it and/or modify
|
|
7 |
it under the terms of the GNU General Public License as published by
|
|
8 |
the Free Software Foundation; either version 2 of the License, or
|
|
9 |
(at your option) any later version.
|
|
10 |
|
|
11 |
SVM-lin is distributed in the hope that it will be useful,
|
|
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14 |
GNU General Public License for more details.
|
|
15 |
|
|
16 |
You should have received a copy of the GNU General Public License
|
|
17 |
along with SVM-lin (see gpl.txt); if not, write to the Free Software
|
|
18 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19 |
*/
|
|
20 |
|
|
21 |
#include <Rcpp.h> |
|
22 |
#include <iostream> |
|
23 |
#include <stdlib.h> |
|
24 |
#include <stdio.h> |
|
25 |
#include <ctype.h> |
|
26 |
#include <cstring> |
|
27 |
#include "ssl.h" |
|
28 |
using namespace Rcpp; |
|
29 |
|
|
30 | 1 |
struct options *Options = new options[1]; |
31 | 1 |
struct data *Data = new data[1]; |
32 | 1 |
struct vector_double *Weights = new vector_double[1]; |
33 | 1 |
struct vector_double *Outputs = new vector_double[1]; |
34 | 1 |
struct vector_double *Labels = new vector_double[1]; |
35 |
|
|
36 |
// [[Rcpp::export]]
|
|
37 | 1 |
List svmlin_rcpp(S4 X, |
38 |
NumericVector y, |
|
39 |
int l, |
|
40 |
int algorithm, |
|
41 |
double lambda, |
|
42 |
double lambda_u, |
|
43 |
int max_switch, |
|
44 |
double pos_frac, |
|
45 |
double Cp, |
|
46 |
double Cn, |
|
47 |
NumericVector costs, |
|
48 |
bool verbose) { |
|
49 |
// Set options
|
|
50 | 1 |
Options->algo = algorithm; |
51 | 1 |
Options->lambda=lambda; |
52 | 1 |
Options->lambda_u=lambda_u; |
53 | 1 |
Options->S=max_switch; |
54 | 1 |
Options->R=pos_frac; |
55 | 1 |
Options->epsilon=EPSILON; |
56 | 1 |
Options->cgitermax=CGITERMAX; |
57 | 1 |
Options->mfnitermax=MFNITERMAX; |
58 | 1 |
Options->Cp = Cp; |
59 | 1 |
Options->Cn = Cn; |
60 | 1 |
Options->verbose = verbose; |
61 |
|
|
62 | 1 |
NumericVector ycop( y.begin(), y.end() ); |
63 | 1 |
NumericVector costcop( costs.begin(), costs.end() ); |
64 |
// Rprintf("Step 1\n");
|
|
65 |
// size_t size = ((DoubleVector)X.slot("x")).length()+((DoubleVector)Xu.slot("x")).length();
|
|
66 |
// std::vector<double> vals(size);
|
|
67 |
// std::vector<double>::iterator it = vals.begin();
|
|
68 |
// vals.insert(it,((DoubleVector)X.slot("x")).begin(),((DoubleVector)Xu.slot("x")).end());
|
|
69 |
// it = vals.begin()+((DoubleVector)X.slot("x")).length();
|
|
70 |
// vals.insert(it,((DoubleVector)Xu.slot("x")).begin(),((DoubleVector)Xu.slot("x")).end());
|
|
71 |
//
|
|
72 |
// Rprintf("Step 2\n");
|
|
73 |
//
|
|
74 |
// size = ((IntegerVector)X.slot("i")).length()+((IntegerVector)Xu.slot("i")).length();
|
|
75 |
// std::vector<int> colinds(size);
|
|
76 |
// std::vector<int>::iterator it2 = colinds.begin();
|
|
77 |
// colinds.insert(it2,((IntegerVector)X.slot("i")).begin(),((IntegerVector)X.slot("i")).end());
|
|
78 |
// it2 = colinds.begin() + ((IntegerVector)X.slot("i")).length();
|
|
79 |
// colinds.insert(it2,((IntegerVector)Xu.slot("i")).begin(),((IntegerVector)Xu.slot("i")).end());
|
|
80 |
//
|
|
81 |
// size = ((IntegerVector)X.slot("p")).length()+((IntegerVector)Xu.slot("p")).length();
|
|
82 |
// std::vector<int> rowpts(size);
|
|
83 |
// it2 = rowpts.begin();
|
|
84 |
// rowpts.insert(it2,((IntegerVector)X.slot("p")).begin(),((IntegerVector)X.slot("p")).end());
|
|
85 |
// it2 = rowpts.begin() + ((IntegerVector)X.slot("p")).length();
|
|
86 |
// rowpts.insert(it2,((IntegerVector)Xu.slot("p")).begin(),((IntegerVector)Xu.slot("p")).end());
|
|
87 |
|
|
88 |
// R data to svmlin data structure
|
|
89 | 1 |
Data->m=((IntegerVector)X.slot("Dim"))[1]; |
90 | 1 |
Data->l=l; |
91 | 1 |
Data->u=Data->m-Data->l; |
92 | 1 |
Data->n=((IntegerVector)X.slot("Dim"))[0]; |
93 | 1 |
Data->nz=((DoubleVector)X.slot("x")).size(); |
94 | 1 |
Data->val=((DoubleVector)X.slot("x")).begin(); |
95 | 1 |
Data->rowptr=((IntegerVector)X.slot("p")).begin(); |
96 | 1 |
Data->colind=((IntegerVector)X.slot("i")).begin(); |
97 | 1 |
Data->Y=ycop.begin(); |
98 | 1 |
Data->C=costcop.begin(); |
99 |
|
|
100 |
// TODO: load correct costs for unlabeled data.
|
|
101 | 1 |
if (Options->verbose) { |
102 |
Rcout << " Input Data Matrix Statistics:" << endl; |
|
103 |
Rcout << " Examples: " << Data->m << endl; |
|
104 |
Rcout << " Features: " << Data->n << " (including bias feature)" << endl; |
|
105 |
Rcout << " Non-zeros: " << Data->nz << " (including bias features)" << endl; |
|
106 |
Rcout << " Average sparsity: " << Data->nz*1.0/Data->m << " non-zero features per example." << endl; |
|
107 |
}
|
|
108 |
// for (int i = 0; i<((DoubleVector)X.slot("x")).length();i++) {
|
|
109 |
// Rprintf("val: %f \n",Data->val[i]);
|
|
110 |
// }
|
|
111 |
// for (int i = 0; i<((IntegerVector)X.slot("i")).length();i++) {
|
|
112 |
// Rprintf("col: %d \n",Data->colind[i]);
|
|
113 |
// }
|
|
114 |
// for (int i = 0; i<((IntegerVector)X.slot("p")).length();i++) {
|
|
115 |
// Rprintf("row: %d \n",Data->rowptr[i]);
|
|
116 |
// }
|
|
117 |
|
|
118 |
|
|
119 |
// Run
|
|
120 | 1 |
ssl_train(Data, |
121 | 1 |
Options, |
122 | 1 |
Weights, |
123 | 1 |
Outputs); |
124 |
//Clear(Data);
|
|
125 |
|
|
126 | 1 |
return Rcpp::List::create(Rcpp::Named("Weights") = Rcpp::NumericVector(Weights->vec, Weights->vec+Weights->d), |
127 | 1 |
Rcpp::Named("Outputs") = Rcpp::NumericVector(Outputs->vec, Outputs->vec+Outputs->d) |
128 |
);
|
|
129 |
}
|
Read our documentation on viewing source code .