kurtis-s / overture
Showing 2 of 13 files from the diff.

@@ -102,6 +102,34 @@
Loading
102 102
    })
103 103
}
104 104
105 +
RunMcmc <- function(samps, b.start, expr_q, env, n.save, backing.path, thin,
106 +
                    exclude, overwrite) {
107 +
    # If file-backed, save the call so the MCMC can be resumed
108 +
    if(!is.na(backing.path)) {
109 +
        call.lst <- as.list(environment())
110 +
        call.lst[["env"]] <- NULL
111 +
        saveRDS(call.lst, file.path(backing.path, "call_lst.rds"))
112 +
    }
113 +
114 +
    env.len <- -1
115 +
    for(b in b.start:n.save) {
116 +
        for(t in 1:thin) {
117 +
            eval(expr_q, envir=env)
118 +
        }
119 +
        if(env.len != length(env)) {
120 +
            samps <- InitSampMats(env, samps, n.save, backing.path, exclude,
121 +
                                  overwrite)
122 +
            env.len <- length(env)
123 +
        }
124 +
        for(var.name in names(samps)) {
125 +
            samps[[var.name]][b, ] <- c(env[[var.name]])
126 +
        }
127 +
    }
128 +
    FlushMats(samps)
129 +
130 +
    return(samps)
131 +
}
132 +
105 133
#' Initialize a Markov chain Monte Carlo run
106 134
#'
107 135
#' Eliminates much of the "boilerplate" code needed for MCMC implementations by
@@ -180,24 +208,8 @@
Loading
180 208
    function(expr, overwrite=over.write) {
181 209
        expr_q <- substitute(expr)
182 210
        env <- new.env(parent=parent.frame(1))
183 -
        env.len <- -1
184 211
        samps <- list()
185 -
        for(b in 1:n.save) {
186 -
            for(t in 1:thin) {
187 -
                eval(expr_q, envir=env)
188 -
            }
189 -
            if(env.len != length(env)) {
190 -
                samps <- InitSampMats(env, samps, n.save, backing.path, exclude,
191 -
                                      overwrite)
192 -
                env.len <- length(env)
193 -
            }
194 -
            for(var.name in names(samps)) {
195 -
                samps[[var.name]][b, ] <- c(env[[var.name]])
196 -
            }
197 -
        }
198 -
        FlushMats(samps)
199 -
200 -
        return(samps)
212 +
        RunMcmc(samps, 1, expr_q, env, n.save, backing.path, thin, exclude, overwrite)
201 213
    }
202 214
}
203 215
@@ -239,7 +251,9 @@
Loading
239 251
    return(samps)
240 252
}
241 253
242 -
RemoveMissingDraws <- function(samps) {
254 +
FirstMissIdxs <- function(samps) {
255 +
# Get a vector with the row index of the first MCMC sample with incomplete
256 +
# samples, i.e. the first row that has some NA values for the parameter
243 257
    first.miss.idxs <- vector(length=length(samps))
244 258
    for(i in 1:length(samps)) {
245 259
        param.curr <- samps[[i]]
@@ -254,13 +268,23 @@
Loading
254 268
        }
255 269
    }
256 270
271 +
    return(first.miss.idxs)
272 +
}
273 +
274 +
LastSampIdx <- function(first.miss.idxs) {
275 +
# Get row index of the last sample that was fully completed for all parameters
276 +
    min(first.miss.idxs, na.rm=TRUE) - 1
277 +
}
278 +
279 +
RemoveMissingDraws <- function(samps) {
280 +
    first.miss.idxs <- FirstMissIdxs(samps)
257 281
    if(all(is.na(first.miss.idxs))) { # No missing values, MCMC is finished
258 282
        samps.subsetted <- samps
259 283
    }
260 284
    else {
261 -
        last.available.sample <- min(first.miss.idxs) - 1
285 +
        last.samp.idx <- LastSampIdx(first.miss.idxs)
262 286
        samps.subsetted <- lapply(samps, function(x) {
263 -
            bigmemory::sub.big.matrix(x, lastRow=last.available.sample)
287 +
            bigmemory::sub.big.matrix(x, lastRow=last.samp.idx)
264 288
        })
265 289
    }
266 290
@@ -307,3 +331,45 @@
Loading
307 331
        in.mem.mat
308 332
    })
309 333
}
334 +
335 +
#' Resumes an interrupted file-backed MCMC
336 +
#'
337 +
#' \code{Resume} will finish a file-backed MCMC that was interrupted.  To resume
338 +
#' an MCMC run, specify the MCMC's backing path and the sampling will continue
339 +
#' from the last completed sample in the chain.  Note, however, that the random
340 +
#' number generator state from when the MCMC was interrupted is \emph{not}
341 +
#' restored, so the resulting chain my not be reproducible, even if a seed was
342 +
#' specified before the sampling was interrupted.
343 +
#'
344 +
#' @param backing.path directory path where the (partially completed) MCMC
345 +
#'   samples were saved
346 +
#' @return A list of either \code{\link{matrix}} or \code{\link{big.matrix}}
347 +
#'   with the MCMC samples.  Each row in the matrices corresponds to one sample
348 +
#'   from the MCMC chain.
349 +
#' @example examples/example-Resume.R
350 +
#' @export
351 +
#' @seealso \code{\link{InitMcmc}}
352 +
Resume <- function(backing.path) {
353 +
    samps <- LoadMcmc(backing.path)
354 +
    first.miss.idxs <- FirstMissIdxs(samps)
355 +
356 +
    # Run the rest of the MCMC if the MCMC isn't already complete
357 +
    if(!all(is.na(first.miss.idxs))) {
358 +
        env <- new.env(parent=parent.frame(1))
359 +
360 +
        # Re-assign the last completed parameter samples to the env
361 +
        last.samp.idx <- LastSampIdx(first.miss.idxs)
362 +
        for(param.name in names(samps)) {
363 +
            assign(param.name, samps[[param.name]][last.samp.idx,], pos=env)
364 +
        }
365 +
366 +
        call.lst <- readRDS(file.path(backing.path, "call_lst.rds"))
367 +
        call.lst[["b.start"]] <- last.samp.idx + 1
368 +
        call.lst[["env"]] <- env
369 +
        call.lst[["samps"]] <- samps
370 +
371 +
        samps <- do.call("RunMcmc", call.lst, quote=TRUE)
372 +
    }
373 +
374 +
    return(samps)
375 +
}

@@ -26,29 +26,6 @@
Loading
26 26
    log(u) <= (log.prop - log.curr + log.prop.to.curr - log.curr.to.prop)
27 27
}
28 28
29 -
#' Determine if a Metropolis–Hastings step should be accepted
30 -
#'
31 -
#' \code{AcceptProposal} is deprecated.  Please use \code{\link{AcceptProp}}
32 -
#' instead.
33 -
#'
34 -
#' @param log.curr log density of the target at the current value,
35 -
#'   \eqn{log(P(x))}
36 -
#' @param log.prop log density of the target at the proposed value,
37 -
#'   \eqn{log(P(x'))}
38 -
#' @param log.curr.to.prop log of transition distribution from current value to
39 -
#'   proposed value, \eqn{log(g(x'|x))}
40 -
#' @param log.prop.to.curr log of transition distribution from proposed value to
41 -
#'   current value, \eqn{log(g(x|x'))}
42 -
#' @return \code{TRUE/FALSE} for whether the proposal should be accepted or
43 -
#'   rejected, respectively
44 -
#' @example examples/example-AcceptProp.R
45 -
#' @export
46 -
AcceptProposal <- function(log.curr, log.prop, log.curr.to.prop=0,
47 -
                           log.prop.to.curr=0) {
48 -
    .Deprecated("AcceptProp")
49 -
    AcceptProp(log.curr, log.prop, log.curr.to.prop=0,  log.prop.to.curr=0)
50 -
}
51 -
52 29
DeltaNDefault <- function(n) {
53 30
    # Default proposal sd delta from Roberts & Rosenthal (2009)
54 31
    min(0.01, n^(-1/2))
@@ -56,14 +33,14 @@
Loading
56 33
57 34
#' Turn a non-adaptive Metropolis sampler into an adaptive Metropolis sampler
58 35
#'
59 -
#' Given a non-adpative sampler of the form f(..., s), \code{Amwg} will return a
36 +
#' Given a non-adaptive sampler of the form f(..., s), \code{Amwg} will return a
60 37
#' function g(...) that automatically adapts the Metropolis proposal standard
61 -
#' deviation s to try and acheive a target acceptance rate.
38 +
#' deviation s to try and achieve a target acceptance rate.
62 39
#'
63 40
#' \code{Amwg} uses the Adaptive Metropolis-Within-Gibbs algorithm from Roberts
64 41
#' & Rosenthal (2009), which re-scales the proposal standard deviation after a
65 42
#' fixed number of MCMC iterations have elapsed.  The goal of the algorithm is
66 -
#' to acheive a target acceptance rate for the Metropolis step.  After the
43 +
#' to achieve a target acceptance rate for the Metropolis step.  After the
67 44
#' n\emph{th} batch of MCMC iterations the log of the proposal standard
68 45
#' deviation, \eqn{log(s)}, is increased/decreased by \eqn{\delta(n)}.
69 46
#' \eqn{log(s)} is increased by \eqn{\delta(n)} if the observed acceptance rate
@@ -72,7 +49,12 @@
Loading
72 49
#' \code{Amwg} keeps track of the the acceptance rate by comparing the
73 50
#' previously sampled value from \code{f} to the next value.  If the two values
74 51
#' are equal, the proposal is considered to be rejected, whereas if the two
75 -
#' values are different the proposal is considered accepted.
52 +
#' values are different the proposal is considered accepted.  \code{Amwg} will
53 +
#' optionally stop adapting the proposal standard deviation after
54 +
#' \code{stop.after} iterations.  Setting \code{stop.after} can be used, for
55 +
#' example, to stop adapting the proposal standard deviation after some burn-in
56 +
#' period.  If \code{stop.after=NA} (the default), \code{Amwg} will continue to
57 +
#' modify the proposal standard deviation throughout the entire MCMC.
76 58
#'
77 59
#' \code{DeltaN} is set to \eqn{\delta(n) = min(0.01, n^{-1/2})} unless
78 60
#' re-specified in the function call. Some care should be taken if re-specifying
@@ -89,7 +71,7 @@
Loading
89 71
#' by \eqn{f}.  This functionality can be used, for example, if \eqn{f} samples
90 72
#' each of its returned elements individually, updating each element using a
91 73
#' Metropolis step.  See the examples for an illustration of this use case.  In
92 -
#' such settings, \eqn{f} should be contructed to receive \eqn{s} as a vector
74 +
#' such settings, \eqn{f} should be constructed to receive \eqn{s} as a vector
93 75
#' argument.
94 76
#'
95 77
#' @param f non-adaptive Metropolis sampler of the form f(..., s)
@@ -97,7 +79,9 @@
Loading
97 79
#' @param batch.size number of iterations before proposal SD is adapted
98 80
#' @param target target acceptance rate
99 81
#' @param DeltaN function of the form f(n) which returns the adaptation amount
100 -
#'   based on the number of elapsed iterations, n
82 +
#'   based on the number of elapsed iterations, n.  Defaults to \eqn{\delta(n) =
83 +
#'   min(0.01, n^{-1/2})}
84 +
#' @param stop.after stop adapting proposal SD after this many iterations
101 85
#' @return Adaptive Metropolis sampler function of the form g(...).
102 86
#' @references  Gareth O. Roberts & Jeffrey S. Rosenthal (2009) Examples of
103 87
#'   Adaptive MCMC, Journal of Computational and Graphical Statistics, 18:2,
@@ -105,7 +89,16 @@
Loading
105 89
#'
106 90
#' @example examples/example-Amwg.R
107 91
#' @export
108 -
Amwg <- function(f, s, batch.size=50, target=0.44, DeltaN) {
92 +
Amwg <- function(f, s, batch.size=50, target=0.44, DeltaN, stop.after=NA) {
93 +
    if(!IsPositiveInteger(batch.size)) {
94 +
        stop("'batch.size' must be an integer > 0")
95 +
    }
96 +
    if(!(IsPositiveInteger(stop.after) || is.na(stop.after))) {
97 +
        stop("'stop.after' must be an integer > 0")
98 +
    }
99 +
    if(!((target>0) && (target<1))) stop("'target' must be 0<target<1")
100 +
101 +
    if(is.na(stop.after)) stop.after <- Inf
109 102
    if(missing(DeltaN)) DeltaN <- DeltaNDefault
110 103
    n.iters <- 0
111 104
    n.accepted <- rep(0, length(s))
@@ -130,7 +123,7 @@
Loading
130 123
        }
131 124
        accept.rate <<- n.accepted/n.iters
132 125
        prev <<- ret
133 -
        if(n.iters %% batch.size == 0) {
126 +
        if((n.iters %% batch.size == 0) && (n.iters <= stop.after)){
134 127
            delta.n <- DeltaN(n.iters)
135 128
            s <<- ifelse(accept.rate > target,
136 129
                         s*exp(delta.n),
Files Coverage
R 98.14%
Project Totals (2 files) 98.14%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
    patch:
10
      default:
11
        target: auto
12
        threshold: 1%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading