1
#include "lib.h"
2
#include <math.h>
3
using namespace Rcpp;
4

5
// [[Rcpp::export]]
6 1
Rcpp::List qatd_cpp_wordfish_dense(SEXP wfm, SEXP dir, SEXP priors, SEXP tol, SEXP disp, SEXP dispfloor, bool abs_err){
7

8
    // DEFINE INPUTS
9

10 1
    Rcpp::NumericMatrix Y(wfm);
11 1
    Rcpp::NumericVector priorvec(priors);
12 1
    Rcpp::NumericVector tolvec(tol);
13 1
    Rcpp::IntegerVector dirvec(dir);
14 1
    Rcpp::IntegerVector disptype(disp);
15 1
    Rcpp::NumericVector dispmin(dispfloor);
16

17 1
    double priorprecalpha = priorvec(0);
18 1
    double priorprecpsi = priorvec(1);
19 1
    double priorprecbeta = priorvec(2);
20 1
    double priorprectheta = priorvec(3);
21

22 1
    int N = Y.nrow();
23 1
    int K = Y.ncol();
24

25
    // SET INITIAL VALUES
26 1
    Rcpp::NumericVector alpha(N);
27 1
    Rcpp::NumericVector psi(K);
28 1
    Rcpp::NumericVector beta(K);
29 1
    Rcpp::NumericVector theta(N);
30

31 1
    Rcpp::NumericVector thetaSE(N); // document position standard errors
32 1
    Rcpp::NumericVector phi(K,1.0); // word-level dispersion parameters
33

34
    // Construct Chi-Sq Residuals
35 1
    arma::mat C(Y.begin(), N, K);
36 1
    arma::colvec rsum = sum(C,1);
37 1
    arma::rowvec csum = sum(C,0);
38 1
    double asum = sum(rsum);
39 1
    for (int i = 0; i < N; i++){
40 1
        for (int k=0; k < K; k++){
41 1
            C(i,k) = (Y(i,k) - rsum(i)*csum(k)/asum)/sqrt(rsum(i)*csum(k)/asum);
42
        }
43
    }
44

45
    // Singular Value Decomposition of Chi-Sq Residuals
46 1
    arma::mat U(N,N);
47 1
    arma::vec s(N);
48 1
    arma::mat V(K,N);
49 1
    svd(U,s,V,C);
50

51
    // Load initial values
52 1
    for (int i = 0; i < N; i++) {
53 1
        theta(i) = pow(rsum(i) / asum,-0.5) * U(i,0);
54
        //Rcout<<"theta starting values:"<<theta(i)<<std::endl;
55
    }
56 1
    for (int k=0; k < K; k++) beta(k) = 0; // pow(csum(k)/asum,-0.5) * V(k,0);
57 1
    for (int i=0; i < N; i++) alpha = log(rsum);
58 1
    psi = log(csum/N);
59

60 1
    alpha = alpha - log(mean(rsum));
61 1
    theta = (theta - mean(theta)) / sd(theta);
62

63
    // Create temporary variables
64 1
    Rcpp::NumericMatrix pars(2,1);
65 1
    Rcpp::NumericMatrix newpars(2,1);
66 1
    Rcpp::NumericMatrix G(2,1);
67 1
    Rcpp::NumericMatrix H(2,2);
68
    double loglambdaik;
69
    double mutmp;
70
    double phitmp;
71 1
    Rcpp::NumericVector lambdai(K);
72 1
    Rcpp::NumericVector lambdak(N);
73 1
    double stepsize = 1.0;
74 1
    double cc = 0.0;
75 1
    int inneriter = 0;
76 1
    int outeriter = 0;
77

78 1
    double lastlp = -2000000000000.0;
79 1
    double lp = -1.0 * (sum(0.5 * ((alpha*alpha) * (priorprecalpha))) +
80 1
                        sum(0.5 * ((psi * psi) * (priorprecpsi))) +
81 1
                        sum(0.5 * ((beta*beta)*(priorprecbeta))) +
82 1
                        sum(0.5 * ((theta*theta)*(priorprectheta))));
83

84 1
    for (int i = 0; i < N; i++){
85 1
        for (int k=0; k < K; k++){
86 1
            loglambdaik = alpha(i) + psi(k) + beta(k) * theta(i);
87 1
            lp = lp + loglambdaik * Y(i,k) - exp(loglambdaik);
88
        }
89
    }
90

91
    // BEGIN WHILE LOOP
92 1
    double err = (abs_err == true) ? fabs(lp - lastlp) : (lp - lastlp);
93 1
    while ((err > tolvec(0)) && outeriter < 100) {
94 1
        outeriter++;
95

96
        // UPDATE WORD PARAMETERS
97 1
        for (int k = 0; k < K; k++) {
98 1
            cc = 1;
99 1
            inneriter = 0;
100 1
            if (outeriter == 1) stepsize = 0.5;
101 1
            while ((cc > tolvec(1)) && inneriter < 10){
102 1
                inneriter++;
103 1
                lambdak = exp(alpha + psi(k) + beta(k) * theta);
104 1
                G(0,0) = sum(Y(_,k) - lambdak) / phi(k) - psi(k) * (priorprecpsi);
105 1
                G(1,0) = sum(theta * (Y(_,k) - lambdak)) / phi(k) - beta(k) * (priorprecbeta);
106 1
                H(0,0) = -sum(lambdak) / phi(k) - priorprecpsi;
107 1
                H(1,0) = -sum(theta * lambdak)/phi(k);
108 1
                H(0,1) = H(1,0);
109 1
                H(1,1) = -sum((theta * theta) * lambdak) / phi(k) - priorprecbeta;
110 1
                pars(0,0) = psi(k);
111 1
                pars(1,0) = beta(k);
112 1
                newpars(0,0) = pars(0,0) - stepsize * (H(1,1) * G(0,0) - H(0,1) * G(1,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0));
113 1
                newpars(1,0) = pars(1,0) - stepsize * (H(0,0) * G(1,0) - H(1,0) * G(0,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0));
114 1
                psi(k) = newpars(0,0);
115 1
                beta(k) = newpars(1,0);
116 1
                cc = max(abs(newpars - pars));
117 1
                stepsize = 1.0;
118
            }
119
        }
120

121

122
        // UPDATE DOCUMENT PARAMETERS
123 1
        for (int i = 0; i < N; i++){
124 1
            cc = 1;
125 1
            inneriter = 0;
126 1
            if (outeriter == 1) stepsize = 0.5;
127 1
            while ((cc > tolvec(1)) && inneriter < 10){
128 1
                inneriter++;
129 1
                lambdai = exp(alpha(i) + psi + beta * theta(i));
130 1
                G(0,0) = sum((Y(i,_) - lambdai) / phi) - alpha(i) * priorprecalpha;
131 1
                G(1,0) = sum((beta * (Y(i,_) - lambdai)) / phi) - theta(i) * priorprectheta;
132 1
                H(0,0) = -sum(lambdai/phi) - priorprecalpha;
133 1
                H(1,0) = -sum((beta * lambdai) / phi);
134 1
                H(0,1) = H(1,0);
135 1
                H(1,1) = -sum(((beta * beta) * lambdai) / phi) - priorprectheta;
136 1
                pars(0,0) = alpha(i);
137 1
                pars(1,0) = theta(i);
138 1
                newpars(0,0) = pars(0,0) - stepsize * (H(1,1) * G(0,0) - H(0,1) * G(1,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0));
139 1
                newpars(1,0) = pars(1,0) - stepsize * (H(0,0) * G(1,0) - H(1,0) * G(0,0)) / (H(0,0) * H(1,1) - H(0,1) * H(1,0));
140 1
                alpha(i) = newpars(0,0);
141 1
                theta(i) = newpars(1,0);
142 1
                cc = max(abs(newpars - pars));
143 1
                stepsize = 1.0;
144
            }
145
        }
146

147
        // UPDATE DISPERSION PARAMETERS
148

149 1
        if (disptype(0) == 2) { // single dispersion parameter for all words
150 1
            phitmp = 0.0;
151 1
            for (int k = 0; k < K; k++){
152 1
                for (int i=0; i < N; i++){
153 1
                    mutmp = exp(alpha(i) + psi(k) + beta(k) * theta(i));
154 1
                    phitmp = phitmp + (Y(i,k) - mutmp) * (Y(i,k) - mutmp) / mutmp;
155
                }
156
            }
157 1
            phitmp = phitmp / (N * K - 2 * N - 2 * K);
158 1
            for (int k = 0; k < K; k++) phi(k) = phitmp;
159
        }
160

161 1
        if (disptype(0) >= 3) { // individual dispersion parameter for each word
162 1
            for (int k = 0; k < K; k++){
163 1
                phitmp = 0.0;
164 1
                for (int i = 0; i < N; i++){
165 1
                    mutmp = exp(alpha(i) + psi(k) + beta(k) * theta(i));
166 1
                    phitmp = phitmp + (Y(i,k) - mutmp) * (Y(i,k) - mutmp) / mutmp;
167
                }
168 1
                phitmp = (K * phitmp) / (N * K - 2 * N - 2 * K);
169 1
                phi(k) = phitmp;
170
                // set ceiling on underdispersion
171 1
                if (disptype(0) == 4) phi(k) = fmax(dispmin(0), phi(k));
172
            }
173
        }
174

175 1
        alpha = alpha - mean(alpha);
176 1
        theta = (theta - mean(theta))/sd(theta);
177

178
        // CHECK LOG-POSTERIOR FOR CONVERGENCE
179 1
        lastlp = lp;
180 1
        lp = -1.0 * (sum(0.5 * ((alpha*alpha) * (priorprecalpha))) + sum(0.5 * ((psi*psi) * (priorprecpsi))) + sum(0.5 * ((beta*beta)*(priorprecbeta))) + sum(0.5 * ((theta*theta)*(priorprectheta))));
181 1
        for (int i = 0; i < N; i++){
182 1
            for (int k = 0; k < K; k++){
183 1
                loglambdaik = alpha(i) + psi(k) + beta(k) * theta(i);
184 1
                lp = lp + loglambdaik * Y(i,k) - exp(loglambdaik);
185
            }
186
        }
187
        // Rprintf("%d: %f2\\n",outeriter,lp);
188
        //Rcout<<"outeriter="<<outeriter<<"  lp - lastlp= "<<lp - lastlp<<std::endl;
189 1
        err = (abs_err == true) ? fabs(lp - lastlp) : (lp - lastlp);
190
        // END WHILE LOOP
191
    }
192

193

194
    // Fix Global Polarity
195

196
    // added the -1 because C counts from ZERO...  -- KB
197 1
    if (theta(dirvec(0) - 1) > theta(dirvec(1) - 1)) {
198 1
        beta = -beta;
199 1
        theta = -theta;
200
    }
201

202
    // COMPUTE DOCUMENT STANDARD ERRORS
203 1
    for (int i = 0; i < N; i++) {
204 1
        lambdai = exp(alpha(i) + psi + beta*theta(i));
205 1
        H(0,0) = -sum(lambdai / phi) - priorprecalpha;
206 1
        H(1,0) = -sum((beta * lambdai) / phi);
207 1
        H(0,1) = H(1,0);
208 1
        H(1,1) = -sum(((beta* beta) * lambdai)/phi) - priorprectheta;
209 1
        thetaSE(i) = sqrt(-1.0 * H(0,0) / (H(0,0) * H(1,1) - H(1,0) * H(0,1)));
210
    }
211

212
    // DEFINE OUTPUT
213

214 1
    return Rcpp::List::create(Rcpp::Named("theta") = theta,
215 1
                              Rcpp::Named("alpha") = alpha,
216 1
                              Rcpp::Named("psi") = psi,
217 1
                              Rcpp::Named("beta") = beta,
218 1
                              Rcpp::Named("phi") = phi,
219 1
                              Rcpp::Named("thetaSE") = thetaSE);
220

221
}

Read our documentation on viewing source code .

Loading