Re-implement textmodel_svmlim() without dependencies
1 |
#include "lib.h" |
|
2 |
using namespace quanteda; |
|
3 |
using namespace RcppParallel; |
|
4 |
using namespace Rcpp; |
|
5 |
using namespace arma; |
|
6 |
|
|
7 |
//# define RESIDUALS_LIM 0.5
|
|
8 |
# define NUMSVD 1
|
|
9 |
# define OUTERITER 100
|
|
10 |
# define INNERITER 10
|
|
11 |
# define LASTLP -2000000000000.0
|
|
12 |
# define SA 0.99 //simulated annealing algorithm |
|
13 |
|
|
14 |
//find the principle elements for the sparse residual matrix
|
|
15 | 1 |
void create_residual(const std::size_t row_num, const arma::sp_mat& wfm, const arma::colvec &rsum, const arma::rowvec &csum, const double &asum, |
16 |
const double residual_floor, const std::size_t K, Triplets &residual_tri) { |
|
17 | 1 |
for (std::size_t k = 0; k < K; k++) { |
18 | 1 |
double residual = (wfm(row_num, k) - rsum(row_num) * csum(k) / asum) / sqrt(rsum(row_num) * csum(k) / asum); |
19 | 1 |
if (fabs(residual) > residual_floor) { |
20 | 1 |
Triplet mat_triplet = std::make_tuple(row_num, k, residual); |
21 | 1 |
residual_tri.push_back(mat_triplet); |
22 |
}
|
|
23 |
}
|
|
24 |
}
|
|
25 |
//Create the residual matrix
|
|
26 | 1 |
struct Residual : public Worker { |
27 |
const arma::sp_mat& wfm; |
|
28 |
const arma::colvec &rsum; |
|
29 |
const arma::rowvec &csum; |
|
30 |
const double asum; |
|
31 |
const double residual_floor; |
|
32 |
const std::size_t K; |
|
33 |
|
|
34 |
// output: residual[index0, index1, residual_value]
|
|
35 |
Triplets &residual_tri; |
|
36 |
|
|
37 |
//constructor
|
|
38 | 1 |
Residual(const arma::sp_mat& wfm, const arma::colvec &rsum, const arma::rowvec &csum, const double asum, |
39 |
const double residual_floor, const std::size_t K, Triplets &residual_tri): |
|
40 | 1 |
wfm(wfm), rsum(rsum), csum(csum), asum(asum), residual_floor(residual_floor), |
41 | 1 |
K(K), residual_tri(residual_tri) {} |
42 |
|
|
43 |
|
|
44 | 1 |
void operator() (std::size_t begin, std::size_t end) { |
45 | 1 |
for (std::size_t i = begin; i < end; i++) { |
46 | 1 |
create_residual(i, wfm, rsum, csum, asum, residual_floor, K, residual_tri); |
47 |
}
|
|
48 |
}
|
|
49 |
};
|
|
50 |
|
|
51 |
//Compute LOG-POSTERIOR
|
|
52 | 1 |
struct LogPos : public Worker { |
53 |
const arma::colvec& alpha; |
|
54 |
const arma::rowvec& psi; |
|
55 |
const arma::rowvec& beta; |
|
56 |
const arma::colvec& theta; |
|
57 |
const arma::sp_mat& wfm; |
|
58 |
const std::size_t K; |
|
59 |
|
|
60 |
// accumulated value
|
|
61 |
double lp; |
|
62 |
|
|
63 |
// constructors
|
|
64 | 1 |
LogPos(const arma::colvec& alpha, const arma::rowvec& psi, const arma::rowvec& beta, |
65 |
const arma::colvec& theta, const arma::sp_mat& wfm, const std::size_t K) : |
|
66 | 1 |
alpha(alpha), psi(psi), beta(beta), theta(theta), wfm(wfm), K(K), lp(0) {} |
67 |
|
|
68 | 1 |
LogPos(const LogPos& sum, Split) : |
69 | 1 |
alpha(sum.alpha), psi(sum.psi), beta(sum.beta), |
70 | 1 |
theta(sum.theta), wfm(sum.wfm), K(sum.K), lp(0) {} |
71 |
|
|
72 |
|
|
73 | 1 |
void operator() (std::size_t begin, std::size_t end) { |
74 | 1 |
for (std::size_t i = begin; i < end; i++) { |
75 | 1 |
double temp = lp; |
76 | 1 |
for (std::size_t k = 0; k < K; k++){ |
77 | 1 |
double loglambdaik = alpha(i) + psi(k) + beta(k) * theta(i); |
78 | 1 |
temp += loglambdaik * wfm(i,k) - exp(loglambdaik); |
79 |
}
|
|
80 | 1 |
lp = temp; |
81 |
}
|
|
82 |
}
|
|
83 |
|
|
84 |
// join lp with that of another LogPos
|
|
85 | 1 |
void join(const LogPos& rhs) { |
86 | 1 |
lp += rhs.lp; |
87 |
}
|
|
88 |
};
|
|
89 |
|
|
90 |
// UPDATE WORD PARAMETERS
|
|
91 | 1 |
struct WordPar : public Worker { |
92 |
const arma::colvec& alpha; |
|
93 |
RVector<double> psi; |
|
94 |
RVector<double> beta; |
|
95 |
const arma::colvec& theta; |
|
96 |
const arma::rowvec& phi; |
|
97 |
const arma::sp_mat& wfm; |
|
98 |
const NumericVector& tolvec; |
|
99 |
const int outeriter; |
|
100 |
const double priorprecpsi; |
|
101 |
const double priorprecbeta; |
|
102 |
double stepsize; |
|
103 |
|
|
104 | 1 |
WordPar(const arma::colvec& alpha, NumericVector& psi, NumericVector& beta, |
105 |
const arma::colvec& theta, const arma::rowvec& phi, const arma::sp_mat& wfm, |
|
106 |
const NumericVector& tolvec, const int outeriter, |
|
107 |
const double priorprecpsi, const double priorprecbeta, double stepsize) : |
|
108 | 1 |
alpha(alpha), psi(psi), beta(beta), theta(theta), phi(phi), wfm(wfm), tolvec(tolvec), |
109 | 1 |
outeriter(outeriter), priorprecpsi(priorprecpsi), priorprecbeta(priorprecbeta), stepsize(stepsize) {} |
110 |
|
|
111 | 1 |
void operator() (std::size_t begin, std::size_t end) { |
112 | 1 |
for (std::size_t k = begin; k < end; k++) { |
113 | 1 |
double cc = 1; |
114 | 1 |
int inneriter = 0; |
115 | 1 |
if (outeriter == 1) stepsize = 0.5; |
116 | 1 |
arma::colvec col_k(wfm.col(k)); |
117 | 1 |
arma::colvec lambdak(alpha.size()); |
118 | 1 |
arma::mat pars(2,1); |
119 | 1 |
arma::mat newpars(2,1); |
120 | 1 |
arma::mat G(2,1); |
121 | 1 |
arma::mat H(2,2); |
122 | 1 |
while ((cc > tolvec(1)) && inneriter < INNERITER){ |
123 | 1 |
inneriter++; |
124 | 1 |
lambdak = exp(alpha + psi[k] + beta[k] * theta); |
125 | 1 |
G(0,0) = accu(col_k - lambdak) / phi(k) - psi[k] * priorprecpsi; |
126 | 1 |
G(1,0) = accu(theta % (col_k - lambdak)) / phi(k) - beta[k] * priorprecbeta; |
127 | 1 |
H(0,0) = -accu(lambdak) / phi(k) - priorprecpsi; |
128 | 1 |
H(1,0) = -accu(theta % lambdak) / phi(k); |
129 | 1 |
H(0,1) = H(1,0); |
130 | 1 |
H(1,1) = -accu((theta % theta) % lambdak) / phi(k) - priorprecbeta; |
131 | 1 |
pars(0,0) = psi[k]; |
132 | 1 |
pars(1,0) = beta[k]; |
133 | 1 |
newpars(0,0) = pars(0,0) - stepsize * (H(1,1) * G(0,0) - H(0,1) * G(1,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0)); |
134 | 1 |
newpars(1,0) = pars(1,0) - stepsize * (H(0,0) * G(1,0) - H(1,0) * G(0,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0)); |
135 | 1 |
newpars(0,0) *= SA; //simulated annealing algorithm |
136 | 1 |
newpars(1,0) *= SA; |
137 | 1 |
psi[k] = newpars(0,0); |
138 | 1 |
beta[k] = newpars(1,0); |
139 | 1 |
cc = abs(newpars - pars).max(); |
140 | 1 |
stepsize = 1.0; |
141 |
}
|
|
142 |
}
|
|
143 | 1 |
} //End of operator |
144 |
};
|
|
145 |
|
|
146 |
// UPDATE DOCUMENT PARAMETERS
|
|
147 | 1 |
struct DocPar : public Worker { |
148 |
RVector<double> alpha; |
|
149 |
const arma::rowvec& psi; |
|
150 |
const arma::rowvec& beta; |
|
151 |
RVector<double> theta; |
|
152 |
const arma::rowvec& phi; |
|
153 |
const arma::sp_mat& wfm; |
|
154 |
const NumericVector& tolvec; |
|
155 |
const int outeriter; |
|
156 |
const double priorprecalpha; |
|
157 |
const double priorprectheta; |
|
158 |
double stepsize; |
|
159 |
|
|
160 | 1 |
DocPar(NumericVector& alpha, const arma::rowvec& psi, const arma::rowvec& beta, |
161 |
NumericVector& theta, const arma::rowvec& phi, const arma::sp_mat& wfm, |
|
162 |
const NumericVector& tolvec, const int outeriter, |
|
163 |
const double priorprecalpha, const double priorprectheta, double stepsize) : |
|
164 | 1 |
alpha(alpha), psi(psi), beta(beta), theta(theta), phi(phi), wfm(wfm), tolvec(tolvec), |
165 | 1 |
outeriter(outeriter), priorprecalpha(priorprecalpha), priorprectheta(priorprectheta), stepsize(stepsize) {} |
166 |
|
|
167 | 1 |
void operator() (std::size_t begin, std::size_t end) { |
168 | 1 |
for (std::size_t i = begin; i < end; i++) { |
169 | 1 |
double cc = 1; |
170 | 1 |
int inneriter = 0; |
171 | 1 |
if (outeriter == 1) stepsize = 0.5; |
172 | 1 |
arma::rowvec row_i(wfm.row(i)); |
173 | 1 |
arma::rowvec lambdai(alpha.size()); |
174 | 1 |
arma::mat pars(2,1); |
175 | 1 |
arma::mat newpars(2,1); |
176 | 1 |
arma::mat G(2,1); |
177 | 1 |
arma::mat H(2,2); |
178 | 1 |
while ((cc > tolvec(1)) && inneriter < INNERITER){ |
179 | 1 |
inneriter++; |
180 | 1 |
lambdai = exp(alpha[i] + psi + beta * theta[i]); |
181 | 1 |
G(0,0) = accu((row_i - lambdai) / phi) - alpha[i] * priorprecalpha; |
182 | 1 |
G(1,0) = accu((beta % (row_i - lambdai)) / phi) - theta[i] * priorprectheta; |
183 | 1 |
H(0,0) = -accu(lambdai / phi) - priorprecalpha; |
184 | 1 |
H(1,0) = -accu((beta % lambdai) / phi); |
185 | 1 |
H(0,1) = H(1,0); |
186 | 1 |
H(1,1) = -accu(((beta % beta) % lambdai) / phi) - priorprectheta; |
187 | 1 |
pars(0,0) = alpha[i]; |
188 | 1 |
pars(1,0) = theta[i]; |
189 | 1 |
newpars(0,0) = pars(0,0) - stepsize*(H(1,1) * G(0,0) - H(0,1) * G(1,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0)); |
190 | 1 |
newpars(1,0) = pars(1,0) - stepsize*(H(0,0) * G(1,0) - H(1,0) * G(0,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0)); |
191 | 1 |
newpars(0,0) *= SA; |
192 | 1 |
newpars(0,0) *= SA; |
193 | 1 |
alpha[i] = newpars(0,0); |
194 | 1 |
theta[i] = newpars(1,0); |
195 | 1 |
cc = abs(newpars - pars).max(); |
196 | 1 |
stepsize = 1.0; |
197 |
}
|
|
198 |
}
|
|
199 | 1 |
} //End of operator |
200 |
};
|
|
201 |
|
|
202 |
// Update dispersion parameters -- single dispersion parameter for all words
|
|
203 |
struct DispPar : public Worker { |
|
204 |
const arma::colvec& alpha; |
|
205 |
const arma::rowvec& psi; |
|
206 |
const arma::rowvec& beta; |
|
207 |
const arma::colvec& theta; |
|
208 |
const arma::sp_mat& wfm; |
|
209 |
const std::size_t& N; |
|
210 |
|
|
211 |
// accumulated value
|
|
212 |
double phitmp; |
|
213 |
|
|
214 |
// constructors
|
|
215 |
DispPar(const arma::colvec& alpha, const arma::rowvec& psi, const arma::rowvec& beta, |
|
216 |
const arma::colvec& theta, const arma::sp_mat& wfm, const std::size_t& N) : |
|
217 |
alpha(alpha), psi(psi), beta(beta), theta(theta), wfm(wfm), N(N), phitmp(0) {} |
|
218 |
|
|
219 |
DispPar(const DispPar& sum, Split) : |
|
220 |
alpha(sum.alpha), psi(sum.psi), beta(sum.beta), |
|
221 |
theta(sum.theta), wfm(sum.wfm), N(sum.N), phitmp(0) {} |
|
222 |
|
|
223 |
|
|
224 |
void operator() (std::size_t begin, std::size_t end) { |
|
225 |
for (std::size_t k = begin; k < end; k++) { |
|
226 |
float temp = phitmp; |
|
227 |
for (std::size_t i = 0; i < N; i++){ |
|
228 |
double mutmp = exp(alpha(i) + psi(k) + beta(k) * theta(i)); |
|
229 |
temp += (wfm(i,k) - mutmp) * (wfm(i,k) - mutmp) / mutmp; |
|
230 |
}
|
|
231 |
phitmp = temp; |
|
232 |
}
|
|
233 |
}
|
|
234 |
|
|
235 |
// join phitmp with that of another DispPar
|
|
236 |
void join(const DispPar& rhs) { |
|
237 |
phitmp += rhs.phitmp; |
|
238 |
}
|
|
239 |
};
|
|
240 |
|
|
241 |
// Update dispersion parameters -- individual dispersion parameter for each word
|
|
242 | 1 |
struct DispPar2 : public Worker { |
243 |
const arma::colvec& alpha; |
|
244 |
const arma::rowvec& psi; |
|
245 |
const arma::rowvec& beta; |
|
246 |
const arma::colvec& theta; |
|
247 |
const arma::sp_mat& wfm; |
|
248 |
const IntegerVector& disptype; |
|
249 |
const NumericVector& dispmin; |
|
250 |
const std::size_t& N; |
|
251 |
const std::size_t& K; |
|
252 |
// output vector
|
|
253 |
RVector<double> phi; |
|
254 |
|
|
255 |
// constructors
|
|
256 | 1 |
DispPar2(const arma::colvec& alpha, const arma::rowvec& psi, const arma::rowvec& beta, |
257 |
const arma::colvec& theta, const arma::sp_mat& wfm, const IntegerVector& disptype, |
|
258 |
const NumericVector& dispmin, const std::size_t& N, const std::size_t& K, NumericVector& phi) : |
|
259 | 1 |
alpha(alpha), psi(psi), beta(beta), theta(theta), wfm(wfm), disptype(disptype), |
260 | 1 |
dispmin(dispmin), N(N), K(K), phi(phi){} |
261 |
|
|
262 | 1 |
void operator() (std::size_t begin, std::size_t end) { |
263 | 1 |
for (std::size_t k = begin; k < end; k++) { |
264 | 1 |
double phitmp = 0.0; |
265 | 1 |
for (std::size_t i = 0; i < N; i++){ |
266 | 1 |
double mutmp = exp(alpha(i) + psi(k) + beta(k) * theta(i)); |
267 | 1 |
phitmp += (wfm(i,k) - mutmp) * (wfm(i,k) - mutmp) / mutmp; |
268 |
}
|
|
269 | 1 |
phitmp = ((K)*phitmp)/(N * K - 2 * N - 2 * K); |
270 | 1 |
if (disptype(0) == 4) { |
271 |
phi[k] = fmax(dispmin(0), phitmp); |
|
272 |
}else{ |
|
273 | 1 |
phi[k] = phitmp; |
274 |
}
|
|
275 |
}
|
|
276 |
}
|
|
277 |
};
|
|
278 |
|
|
279 |
// Calculate Document Standard Errors
|
|
280 | 1 |
struct DocErr : public Worker { |
281 |
const arma::colvec& alpha; |
|
282 |
const arma::rowvec& psi; |
|
283 |
const arma::rowvec& beta; |
|
284 |
const arma::colvec& theta; |
|
285 |
const arma::rowvec& phi; |
|
286 |
const double& priorprecalpha; |
|
287 |
const double& priorprectheta; |
|
288 |
// output vector
|
|
289 |
RVector<double> thetaSE; |
|
290 |
|
|
291 |
// constructors
|
|
292 | 1 |
DocErr(const arma::colvec& alpha, const arma::rowvec& psi, const arma::rowvec& beta, |
293 |
const arma::colvec& theta, const arma::rowvec& phi, |
|
294 |
const double& priorprecalpha, const double& priorprectheta, NumericVector& thetaSE) : |
|
295 | 1 |
alpha(alpha), psi(psi), beta(beta), theta(theta), phi(phi), |
296 | 1 |
priorprecalpha(priorprecalpha), priorprectheta(priorprectheta), thetaSE(thetaSE){} |
297 |
|
|
298 | 1 |
void operator() (std::size_t begin, std::size_t end) { |
299 | 1 |
arma::rowvec lambdai(alpha.size()); |
300 | 1 |
for (std::size_t i = begin; i < end; i++) { |
301 | 1 |
lambdai = exp(alpha(i) + psi + beta * theta(i)); |
302 | 1 |
arma::mat H(2,2); |
303 | 1 |
H(0,0) = -accu(lambdai / phi) - priorprecalpha; |
304 | 1 |
H(1,0) = -accu((beta % lambdai) / phi); |
305 | 1 |
H(0,1) = H(1,0); |
306 | 1 |
H(1,1) = -accu(((beta % beta) % lambdai) / phi) - priorprectheta; |
307 | 1 |
thetaSE[i] = sqrt(-1.0 * H(0,0) / (H(0,0) * H(1,1) - H(1,0) * H(0,1))); |
308 |
}
|
|
309 |
}
|
|
310 |
};
|
|
311 |
// [[Rcpp::export]]
|
|
312 |
|
|
313 | 1 |
Rcpp::List qatd_cpp_wordfish(arma::sp_mat &wfm, |
314 |
IntegerVector& dirvec, |
|
315 |
NumericVector& priorvec, |
|
316 |
NumericVector& tolvec, |
|
317 |
IntegerVector& disptype, |
|
318 |
NumericVector& dispmin, |
|
319 |
bool ABS,bool svd_sparse, |
|
320 |
double residual_floor){ |
|
321 |
|
|
322 |
// DEFINE INPUTS
|
|
323 | 1 |
double priorprecalpha = priorvec(0); |
324 | 1 |
double priorprecpsi = priorvec(1); |
325 | 1 |
double priorprecbeta = priorvec(2); |
326 | 1 |
double priorprectheta = priorvec(3); |
327 |
|
|
328 |
// std::random_device rd; // random engine
|
|
329 |
// std::mt19937 mt(rd());
|
|
330 | 1 |
std::mt19937 mt(time(0)); // issue #1063 |
331 | 1 |
std::uniform_real_distribution<double> dist(0.0, 1.0); |
332 |
|
|
333 | 1 |
std::size_t N = wfm.n_rows; |
334 | 1 |
std::size_t K = wfm.n_cols; |
335 |
|
|
336 |
// Set initial values
|
|
337 | 1 |
arma::colvec alpha(N); |
338 | 1 |
arma::rowvec psi(K); |
339 | 1 |
arma::rowvec beta(K); |
340 | 1 |
arma::colvec theta(N); |
341 | 1 |
arma::colvec thetaSE(N); // document position standard errors |
342 | 1 |
arma::rowvec phi(K); // word-level dispersion parameters |
343 | 1 |
phi.fill(1.0); |
344 |
|
|
345 |
// Construct Chi-Sq Residuals
|
|
346 | 1 |
arma::colvec rsum(sum(wfm,1)); |
347 | 1 |
arma::rowvec csum(sum(wfm,0)); |
348 | 1 |
double asum = accu(wfm); |
349 | 1 |
if (svd_sparse == true){ |
350 |
|
|
351 |
//create the residual matrix
|
|
352 | 1 |
Triplets residual_tri; |
353 |
|
|
354 |
#if QUANTEDA_USE_TBB
|
|
355 | 1 |
Residual residual(wfm, rsum, csum, asum, residual_floor, K, residual_tri); |
356 | 1 |
parallelFor(0, N, residual); |
357 |
//Rcout<<"used TBB"<<std::endl;
|
|
358 |
#else
|
|
359 |
for (std::size_t i = 0; i < N; i++) { |
|
360 |
create_residual(i, wfm, rsum, csum, asum, residual_floor, K, residual_tri); |
|
361 |
}
|
|
362 |
//Rcout<<"not use TBB"<<std::endl;
|
|
363 |
#endif
|
|
364 |
// Convert to Rcpp objects
|
|
365 | 1 |
std::size_t mat_size = residual_tri.size(); |
366 | 1 |
arma::umat index_mat(2, mat_size, arma::fill::zeros); |
367 | 1 |
arma::vec w_values(mat_size, arma::fill::zeros); |
368 | 1 |
for (std::size_t k = 0; k < residual_tri.size(); k++) { |
369 | 1 |
index_mat(0,k) = std::get<0>(residual_tri[k]); |
370 | 1 |
index_mat(1,k) = std::get<1>(residual_tri[k]); |
371 | 1 |
w_values(k) = std::get<2>(residual_tri[k]); |
372 |
}
|
|
373 |
|
|
374 |
// constract the sparse matrix
|
|
375 | 1 |
arma::sp_mat C(index_mat, w_values, N, K); |
376 |
|
|
377 | 1 |
const int svdk = NUMSVD; |
378 | 1 |
arma::mat U(N, svdk); |
379 | 1 |
arma::vec s(svdk); |
380 | 1 |
arma::mat V(K, svdk); |
381 | 1 |
arma::svds(U, s, V, C, svdk); |
382 | 1 |
for (std::size_t i = 0; i < N; i++) theta(i) = pow(rsum(i) / asum, -0.5) * U(i, 0); |
383 |
} else { |
|
384 |
// Load initial values
|
|
385 |
for (std::size_t i=0; i < N; i++) theta(i) = pow(rsum(i) / asum, -0.5) - dist(mt);//* U(i, 0); |
|
386 |
}
|
|
387 | 1 |
for (std::size_t k = 0; k < K; k++) beta(k) = 0;// pow(csum(k)/asum,-0.5) * V(k,0); |
388 |
//beta.fill(0.0);
|
|
389 | 1 |
alpha = log(rsum); |
390 | 1 |
psi = log(csum/N); |
391 |
|
|
392 | 1 |
alpha = alpha - log(mean(rsum)); |
393 | 1 |
theta = (theta - mean(theta)) / stddev(theta); |
394 |
|
|
395 |
// Create temporary variables
|
|
396 | 1 |
arma::mat pars(2,1); |
397 | 1 |
arma::mat newpars(2,1); |
398 | 1 |
arma::mat G(2,1); |
399 | 1 |
arma::mat H(2,2); |
400 | 1 |
arma::rowvec lambdai(K); |
401 | 1 |
arma::colvec lambdak(N); |
402 | 1 |
double stepsize = 1.0; |
403 | 1 |
int outeriter = 0; |
404 |
|
|
405 |
//Initialize LOG-POSTERIOR
|
|
406 | 1 |
double lastlp = LASTLP; |
407 | 1 |
double lp = -1.0 * (accu(0.5 * ((alpha % alpha) * priorprecalpha)) + accu(0.5 * ((psi % psi) * priorprecpsi)) |
408 | 1 |
+ accu(0.5 * ((beta % beta) * priorprecbeta)) + accu(0.5 * ((theta % theta) * priorprectheta))); |
409 |
|
|
410 |
// compute LOG posterior
|
|
411 | 1 |
LogPos logPos(alpha, psi, beta, theta, wfm, K); |
412 | 1 |
parallelReduce(0, N, logPos); |
413 | 1 |
lp += logPos.lp; |
414 |
|
|
415 |
|
|
416 |
// BEGIN WHILE LOOP
|
|
417 | 1 |
while (((lp - lastlp) > tolvec(0)) && outeriter < OUTERITER) { |
418 | 1 |
outeriter++; |
419 |
//Rcout<<"alphs_b="<<mean(alpha)<<" theta_B="<<mean(theta)<<std::endl;
|
|
420 |
|
|
421 |
// UPDATE WORD PARAMETERS
|
|
422 | 1 |
NumericVector psi_N(psi.begin(), psi.end()); |
423 | 1 |
NumericVector beta_N(beta.begin(), beta.end()); |
424 | 1 |
WordPar wordPar(alpha, psi_N, beta_N, theta, phi, wfm, tolvec, |
425 | 1 |
outeriter, priorprecpsi, priorprecbeta, stepsize); |
426 | 1 |
parallelFor(0, K, wordPar); |
427 |
|
|
428 | 1 |
psi = as<arma::rowvec>(psi_N); |
429 | 1 |
beta = as<arma::rowvec>(beta_N); |
430 |
|
|
431 |
// UPDATE DOCUMENT PARAMETERS
|
|
432 | 1 |
NumericVector alpha_N(alpha.begin(), alpha.end()); |
433 | 1 |
NumericVector theta_N(theta.begin(), theta.end()); |
434 | 1 |
DocPar docPar(alpha_N, psi, beta, theta_N, phi, wfm, tolvec, |
435 | 1 |
outeriter, priorprecalpha, priorprectheta, stepsize); |
436 | 1 |
parallelFor(0, N, docPar); |
437 |
|
|
438 | 1 |
alpha = as<arma::colvec>(alpha_N); |
439 | 1 |
theta = as<arma::colvec>(theta_N); |
440 |
|
|
441 |
|
|
442 |
// UPDATE DISPERSION PARAMETERS
|
|
443 |
|
|
444 | 1 |
if (disptype(0) == 2) { // single dispersion parameter for all words |
445 |
DispPar dispPar(alpha, psi, beta, theta, wfm, N); |
|
446 |
parallelReduce(0, K, dispPar); |
|
447 |
double phitmp = dispPar.phitmp; |
|
448 |
phitmp /= N * K - 2 * N - 2 * K; |
|
449 |
phi.fill(phitmp); |
|
450 |
}
|
|
451 |
|
|
452 | 1 |
if (disptype(0) >= 3) { // individual dispersion parameter for each word |
453 | 1 |
NumericVector phi_N(phi.begin(), phi.end()); |
454 | 1 |
DispPar2 dispPar2(alpha, psi, beta, theta, wfm, disptype, dispmin, N, K, phi_N); |
455 | 1 |
parallelFor(0, K, dispPar2); |
456 | 1 |
phi = as<arma::rowvec>(phi_N); |
457 |
}
|
|
458 |
|
|
459 | 1 |
alpha = alpha - mean(alpha); |
460 | 1 |
theta = (theta - mean(theta))/stddev(theta); |
461 |
|
|
462 |
// CHECK LOG-POSTERIOR FOR CONVERGENCE
|
|
463 | 1 |
lastlp = lp; |
464 | 1 |
lp = -1.0 * (accu(0.5 * ((alpha % alpha) * priorprecalpha)) + accu(0.5 * ((psi % psi) * priorprecpsi)) + |
465 | 1 |
accu(0.5 * ((beta % beta) * priorprecbeta)) + accu(0.5 * ((theta % theta) * priorprectheta))); |
466 |
|
|
467 | 1 |
LogPos logPos2(alpha, psi, beta, theta, wfm, K); |
468 | 1 |
parallelReduce(0, N, logPos2); |
469 | 1 |
lp += logPos2.lp; |
470 |
// END WHILE LOOP
|
|
471 |
}
|
|
472 |
|
|
473 |
// Fix Global Polarity
|
|
474 |
//Rcout<<"outeriter="<<outeriter<<" lp - lastlp= "<<lp - lastlp<<std::endl;
|
|
475 |
// added the -1 because C counts from ZERO... -- KB
|
|
476 | 1 |
if (theta(dirvec(0) - 1) > theta(dirvec(1) - 1)) { |
477 | 1 |
beta = -beta; |
478 | 1 |
theta = -theta; |
479 |
}
|
|
480 |
|
|
481 |
// COMPUTE DOCUMENT STANDARD ERRORS
|
|
482 | 1 |
NumericVector thetaSE_N(thetaSE.begin(), thetaSE.end()); |
483 | 1 |
DocErr docErr(alpha, psi, beta, theta, phi, priorprecalpha, priorprectheta, thetaSE_N); |
484 | 1 |
parallelFor(0, N, docErr); |
485 | 1 |
thetaSE = as<arma::colvec>(thetaSE_N); |
486 |
|
|
487 |
// DEFINE OUTPUT
|
|
488 | 1 |
return Rcpp::List::create(Rcpp::Named("theta") = theta, |
489 | 1 |
Rcpp::Named("alpha") = alpha, |
490 | 1 |
Rcpp::Named("psi") = psi, |
491 | 1 |
Rcpp::Named("beta") = beta, |
492 | 1 |
Rcpp::Named("phi") = phi, |
493 | 1 |
Rcpp::Named("thetaSE") = thetaSE); |
494 |
|
|
495 |
}
|
Read our documentation on viewing source code .