1
utils::globalVariables(c("name", "time", "value"))
2

3
#' Fit a kinetic model to data with one or more state variables
4
#'
5
#' This function maximises the likelihood of the observed data using the Port
6
#' algorithm [stats::nlminb()], and the specified initial or fixed
7
#' parameters and starting values.  In each step of the optimisation, the
8
#' kinetic model is solved using the function [mkinpredict()], except
9
#' if an analytical solution is implemented, in which case the model is solved
10
#' using the degradation function in the [mkinmod] object. The
11
#' parameters of the selected error model are fitted simultaneously with the
12
#' degradation model parameters, as both of them are arguments of the
13
#' likelihood function.
14
#'
15
#' Per default, parameters in the kinetic models are internally transformed in
16
#' order to better satisfy the assumption of a normal distribution of their
17
#' estimators.
18
#'
19
#' @param mkinmod A list of class [mkinmod], containing the kinetic
20
#'   model to be fitted to the data, or one of the shorthand names ("SFO",
21
#'   "FOMC", "DFOP", "HS", "SFORB", "IORE"). If a shorthand name is given, a
22
#'   parent only degradation model is generated for the variable with the
23
#'   highest value in \code{observed}.
24
#' @param observed A dataframe with the observed data.  The first column called
25
#'   "name" must contain the name of the observed variable for each data point.
26
#'   The second column must contain the times of observation, named "time".
27
#'   The third column must be named "value" and contain the observed values.
28
#'   Zero values in the "value" column will be removed, with a warning, in
29
#'   order to avoid problems with fitting the two-component error model. This
30
#'   is not expected to be a problem, because in general, values of zero are
31
#'   not observed in degradation data, because there is a lower limit of
32
#'   detection.
33
#' @param parms.ini A named vector of initial values for the parameters,
34
#'   including parameters to be optimised and potentially also fixed parameters
35
#'   as indicated by \code{fixed_parms}.  If set to "auto", initial values for
36
#'   rate constants are set to default values.  Using parameter names that are
37
#'   not in the model gives an error.
38
#'
39
#'   It is possible to only specify a subset of the parameters that the model
40
#'   needs. You can use the parameter lists "bparms.ode" from a previously
41
#'   fitted model, which contains the differential equation parameters from
42
#'   this model.  This works nicely if the models are nested. An example is
43
#'   given below.
44
#' @param state.ini A named vector of initial values for the state variables of
45
#'   the model. In case the observed variables are represented by more than one
46
#'   model variable, the names will differ from the names of the observed
47
#'   variables (see \code{map} component of [mkinmod]). The default
48
#'   is to set the initial value of the first model variable to the mean of the
49
#'   time zero values for the variable with the maximum observed value, and all
50
#'   others to 0.  If this variable has no time zero observations, its initial
51
#'   value is set to 100.
52
#' @param err.ini A named vector of initial values for the error model
53
#'   parameters to be optimised.  If set to "auto", initial values are set to
54
#'   default values.  Otherwise, inital values for all error model parameters
55
#'   must be given.
56
#' @param fixed_parms The names of parameters that should not be optimised but
57
#'   rather kept at the values specified in \code{parms.ini}. Alternatively,
58
#'   a named numeric vector of parameters to be fixed, regardless of the values
59
#'   in parms.ini.
60
#' @param fixed_initials The names of model variables for which the initial
61
#'   state at time 0 should be excluded from the optimisation. Defaults to all
62
#'   state variables except for the first one.
63
#' @param from_max_mean If this is set to TRUE, and the model has only one
64
#'   observed variable, then data before the time of the maximum observed value
65
#'   (after averaging for each sampling time) are discarded, and this time is
66
#'   subtracted from all remaining time values, so the time of the maximum
67
#'   observed mean value is the new time zero.
68
#' @param solution_type If set to "eigen", the solution of the system of
69
#'   differential equations is based on the spectral decomposition of the
70
#'   coefficient matrix in cases that this is possible. If set to "deSolve", a
71
#'   numerical [ode solver from package deSolve][deSolve::ode()] is used. If
72
#'   set to "analytical", an analytical solution of the model is used. This is
73
#'   only implemented for relatively simple degradation models.  The default is
74
#'   "auto", which uses "analytical" if possible, otherwise "deSolve" if a
75
#'   compiler is present, and "eigen" if no compiler is present and the model
76
#'   can be expressed using eigenvalues and eigenvectors.
77
#' @param method.ode The solution method passed via [mkinpredict()]
78
#'   to [deSolve::ode()] in case the solution type is "deSolve". The default
79
#'   "lsoda" is performant, but sometimes fails to converge.
80
#' @param use_compiled If set to \code{FALSE}, no compiled version of the
81
#'   [mkinmod] model is used in the calls to [mkinpredict()] even if a compiled
82
#'   version is present.
83
#' @param control A list of control arguments passed to [stats::nlminb()].
84
#' @param transform_rates Boolean specifying if kinetic rate constants should
85
#'   be transformed in the model specification used in the fitting for better
86
#'   compliance with the assumption of normal distribution of the estimator. If
87
#'   TRUE, also alpha and beta parameters of the FOMC model are
88
#'   log-transformed, as well as k1 and k2 rate constants for the DFOP and HS
89
#'   models and the break point tb of the HS model.  If FALSE, zero is used as
90
#'   a lower bound for the rates in the optimisation.
91
#' @param transform_fractions Boolean specifying if formation fractions
92
#'   should be transformed in the model specification used in the fitting for
93
#'   better compliance with the assumption of normal distribution of the
94
#'   estimator. The default (TRUE) is to do transformations. If TRUE,
95
#'   the g parameter of the DFOP model is also transformed. Transformations
96
#'   are described in [transform_odeparms].
97
#' @param quiet Suppress printing out the current value of the negative
98
#'   log-likelihood after each improvement?
99
#' @param atol Absolute error tolerance, passed to [deSolve::ode()]. Default
100
#'   is 1e-8, which is lower than the default in the [deSolve::lsoda()]
101
#'   function which is used per default.
102
#' @param rtol Absolute error tolerance, passed to [deSolve::ode()]. Default
103
#'   is 1e-10, much lower than in [deSolve::lsoda()].
104
#' @param error_model If the error model is "const", a constant standard
105
#'   deviation is assumed.
106
#'
107
#'   If the error model is "obs", each observed variable is assumed to have its
108
#'   own variance.
109
#'
110
#'   If the error model is "tc" (two-component error model), a two component
111
#'   error model similar to the one described by Rocke and Lorenzato (1995) is
112
#'   used for setting up the likelihood function.  Note that this model
113
#'   deviates from the model by Rocke and Lorenzato, as their model implies
114
#'   that the errors follow a lognormal distribution for large values, not a
115
#'   normal distribution as assumed by this method.
116
#' @param error_model_algorithm If "auto", the selected algorithm depends on
117
#'   the error model.  If the error model is "const", unweighted nonlinear
118
#'   least squares fitting ("OLS") is selected. If the error model is "obs", or
119
#'   "tc", the "d_3" algorithm is selected.
120
#'
121
#'   The algorithm "d_3" will directly minimize the negative log-likelihood
122
#'   and independently also use the three step algorithm described below.
123
#'   The fit with the higher likelihood is returned.
124
#'
125
#'   The algorithm "direct" will directly minimize the negative log-likelihood.
126
#'
127
#'   The algorithm "twostep" will minimize the negative log-likelihood after an
128
#'   initial unweighted least squares optimisation step.
129
#'
130
#'   The algorithm "threestep" starts with unweighted least squares, then
131
#'   optimizes only the error model using the degradation model parameters
132
#'   found, and then minimizes the negative log-likelihood with free
133
#'   degradation and error model parameters.
134
#'
135
#'   The algorithm "fourstep" starts with unweighted least squares, then
136
#'   optimizes only the error model using the degradation model parameters
137
#'   found, then optimizes the degradation model again with fixed error model
138
#'   parameters, and finally minimizes the negative log-likelihood with free
139
#'   degradation and error model parameters.
140
#'
141
#'   The algorithm "IRLS" (Iteratively Reweighted Least Squares) starts with
142
#'   unweighted least squares, and then iterates optimization of the error
143
#'   model parameters and subsequent optimization of the degradation model
144
#'   using those error model parameters, until the error model parameters
145
#'   converge.
146
#' @param reweight.tol Tolerance for the convergence criterion calculated from
147
#'   the error model parameters in IRLS fits.
148
#' @param reweight.max.iter Maximum number of iterations in IRLS fits.
149
#' @param trace_parms Should a trace of the parameter values be listed?
150
#' @param test_residuals Should the residuals be tested for normal distribution?
151
#' @param \dots Further arguments that will be passed on to
152
#'   [deSolve::ode()].
153
#' @importFrom stats nlminb aggregate dist shapiro.test
154
#' @return A list with "mkinfit" in the class attribute.
155
#' @note When using the "IORE" submodel for metabolites, fitting with
156
#'   "transform_rates = TRUE" (the default) often leads to failures of the
157
#'   numerical ODE solver. In this situation it may help to switch off the
158
#'   internal rate transformation.
159
#' @author Johannes Ranke
160
#' @seealso [summary.mkinfit], [plot.mkinfit], [parms] and [lrtest].
161
#'
162
#'   Comparisons of models fitted to the same data can be made using
163
#'   \code{\link{AIC}} by virtue of the method \code{\link{logLik.mkinfit}}.
164
#'
165
#'   Fitting of several models to several datasets in a single call to
166
#'   \code{\link{mmkin}}.
167
#' @references Rocke DM and Lorenzato S (1995) A two-component model
168
#'   for measurement error in analytical chemistry. *Technometrics* 37(2), 176-184.
169
#'
170
#'   Ranke J and Meinecke S (2019) Error Models for the Kinetic Evaluation of Chemical
171
#'   Degradation Data. *Environments* 6(12) 124
172
#'   [doi:10.3390/environments6120124](https://doi.org/10.3390/environments6120124).
173
#' @examples
174
#'
175
#' # Use shorthand notation for parent only degradation
176
#' fit <- mkinfit("FOMC", FOCUS_2006_C, quiet = TRUE)
177
#' summary(fit)
178
#'
179
#' # One parent compound, one metabolite, both single first order.
180
#' # We remove zero values from FOCUS dataset D in order to avoid warnings
181
#' FOCUS_D <- subset(FOCUS_2006_D, value != 0)
182
#' # Use mkinsub for convenience in model formulation. Pathway to sink included per default.
183
#' SFO_SFO <- mkinmod(
184
#'   parent = mkinsub("SFO", "m1"),
185
#'   m1 = mkinsub("SFO"))
186
#'
187
#' # Fit the model quietly to the FOCUS example dataset D using defaults
188
#' fit <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE)
189
#' plot_sep(fit)
190
#' # As lower parent values appear to have lower variance, we try an alternative error model
191
#' fit.tc <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc")
192
#' # This avoids the warning, and the likelihood ratio test confirms it is preferable
193
#' lrtest(fit.tc, fit)
194
#' # We can also allow for different variances of parent and metabolite as error model
195
#' fit.obs <- mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "obs")
196
#' # The two-component error model has significantly higher likelihood
197
#' lrtest(fit.obs, fit.tc)
198
#' parms(fit.tc)
199
#' endpoints(fit.tc)
200
#'
201
#' # We can show a quick (only one replication) benchmark for this case, as we
202
#' # have several alternative solution methods for the model. We skip
203
#' # uncompiled deSolve, as it is so slow. More benchmarks are found in the
204
#' # benchmark vignette
205
#' \dontrun{
206
#' if(require(rbenchmark)) {
207
#'   benchmark(replications = 1, order = "relative", columns = c("test", "relative", "elapsed"),
208
#'     deSolve_compiled = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc",
209
#'       solution_type = "deSolve", use_compiled = TRUE),
210
#'     eigen = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc",
211
#'       solution_type = "eigen"),
212
#'     analytical = mkinfit(SFO_SFO, FOCUS_D, quiet = TRUE, error_model = "tc",
213
#'       solution_type = "analytical"))
214
#' }
215
#' }
216
#'
217
#' # Use stepwise fitting, using optimised parameters from parent only fit, FOMC-SFO
218
#' \dontrun{
219
#' FOMC_SFO <- mkinmod(
220
#'   parent = mkinsub("FOMC", "m1"),
221
#'   m1 = mkinsub("SFO"))
222
#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE)
223
#' # Again, we get a warning and try a more sophisticated error model
224
#' fit.FOMC_SFO.tc <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE, error_model = "tc")
225
#' # This model has a higher likelihood, but not significantly so
226
#' lrtest(fit.tc, fit.FOMC_SFO.tc)
227
#' # Also, the missing standard error for log_beta and the t-tests for alpha
228
#' # and beta indicate overparameterisation
229
#' summary(fit.FOMC_SFO.tc, data = FALSE)
230
#'
231
#' # We can easily use starting parameters from the parent only fit (only for illustration)
232
#' fit.FOMC = mkinfit("FOMC", FOCUS_2006_D, quiet = TRUE, error_model = "tc")
233
#' fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_D, quiet = TRUE,
234
#'   parms.ini = fit.FOMC$bparms.ode, error_model = "tc")
235
#' }
236
#' @export
237
mkinfit <- function(mkinmod, observed,
238
  parms.ini = "auto",
239
  state.ini = "auto",
240
  err.ini = "auto",
241
  fixed_parms = NULL,
242
  fixed_initials = names(mkinmod$diffs)[-1],
243
  from_max_mean = FALSE,
244
  solution_type = c("auto", "analytical", "eigen", "deSolve"),
245
  method.ode = "lsoda",
246
  use_compiled = "auto",
247
  control = list(eval.max = 300, iter.max = 200),
248
  transform_rates = TRUE,
249
  transform_fractions = TRUE,
250
  quiet = FALSE,
251
  atol = 1e-8, rtol = 1e-10,
252
  error_model = c("const", "obs", "tc"),
253
  error_model_algorithm = c("auto", "d_3", "direct", "twostep", "threestep", "fourstep", "IRLS", "OLS"),
254
  reweight.tol = 1e-8, reweight.max.iter = 10,
255
  trace_parms = FALSE,
256
  test_residuals = FALSE,
257
  ...)
258
{
259 2
  call <- match.call()
260

261 2
  summary_warnings <- character()
262

263
  # Derive the name used for the model
264 2
  if (is.character(mkinmod)) mkinmod_name <- mkinmod
265 2
  else mkinmod_name <- deparse(substitute(mkinmod))
266

267
  # Check mkinmod and generate a model for the variable whith the highest value
268
  # if a suitable string is given
269 2
  parent_models_available = c("SFO", "FOMC", "DFOP", "HS", "SFORB", "IORE", "logistic")
270 2
  if (class(mkinmod) != "mkinmod") {
271 2
    presumed_parent_name = observed[which.max(observed$value), "name"]
272 2
    if (mkinmod[[1]] %in% parent_models_available) {
273 2
      speclist <- list(list(type = mkinmod, sink = TRUE))
274 2
      names(speclist) <- presumed_parent_name
275 2
      mkinmod <- mkinmod(speclist = speclist, use_of_ff = "max")
276
    } else {
277 2
      stop("Argument mkinmod must be of class mkinmod or a string containing one of\n  ",
278 2
           paste(parent_models_available, collapse = ", "))
279
    }
280
  }
281

282
  # Get the names of the state variables in the model
283 2
  mod_vars <- names(mkinmod$diffs)
284

285
  # Get the names of observed variables
286 2
  obs_vars <- names(mkinmod$spec)
287

288
  # Subset observed data with names of observed data in the model and remove NA values
289 2
  observed <- subset(observed, name %in% obs_vars)
290 2
  observed <- subset(observed, !is.na(value))
291

292
  # Also remove zero values to avoid instabilities (e.g. of the 'tc' error model)
293 2
  if (any(observed$value == 0)) {
294 2
    zero_warning <- "Observations with value of zero were removed from the data"
295 2
    summary_warnings <- c(summary_warnings, Z = zero_warning)
296 2
    warning(zero_warning)
297 2
    observed <- subset(observed, value != 0)
298
  }
299

300
  # Sort observed values for efficient analytical predictions
301 2
  observed$name <- ordered(observed$name, levels = obs_vars)
302 2
  observed <- observed[order(observed$name, observed$time), ]
303

304
  # Obtain data for decline from maximum mean value if requested
305 2
  if (from_max_mean) {
306
    # This is only used for simple decline models
307 2
    if (length(obs_vars) > 1)
308 2
      stop("Decline from maximum is only implemented for models with a single observed variable")
309 2
    observed$name <- as.character(observed$name)
310

311 2
    means <- aggregate(value ~ time, data = observed, mean, na.rm=TRUE)
312 2
    t_of_max <- means[which.max(means$value), "time"]
313 2
    observed <- subset(observed, time >= t_of_max)
314 2
    observed$time <- observed$time - t_of_max
315
  }
316

317
  # Number observations used for fitting
318 2
  n_observed <- nrow(observed)
319

320
  # Define starting values for parameters where not specified by the user
321 2
  if (parms.ini[[1]] == "auto") parms.ini = vector()
322

323
  # Override parms.ini for parameters given as a numeric vector in
324
  # fixed_parms
325 2
  if (is.numeric(fixed_parms)) {
326 2
    fixed_parm_names <- names(fixed_parms)
327 2
    parms.ini[fixed_parm_names] <- fixed_parms
328 2
    fixed_parms <- fixed_parm_names
329
  }
330

331
  # Warn for inital parameter specifications that are not in the model
332 2
  wrongpar.names <- setdiff(names(parms.ini), mkinmod$parms)
333 2
  if (length(wrongpar.names) > 0) {
334 2
    warning("Initial parameter(s) ", paste(wrongpar.names, collapse = ", "),
335 2
         " not used in the model")
336 2
    parms.ini <- parms.ini[setdiff(names(parms.ini), wrongpar.names)]
337
  }
338

339
  # Warn that the sum of formation fractions may exceed one if they are not
340
  # fitted in the transformed way
341 2
  if (mkinmod$use_of_ff == "max" & transform_fractions == FALSE) {
342 2
    warning("The sum of formation fractions may exceed one if you do not use ",
343 2
            "transform_fractions = TRUE." )
344 2
    for (box in mod_vars) {
345
      # Stop if formation fractions are not transformed and we have no sink
346 2
      if (mkinmod$spec[[box]]$sink == FALSE) {
347 2
        stop("If formation fractions are not transformed during the fitting, ",
348 2
             "it is not supported to turn off pathways to sink.\n ",
349 2
             "Consider turning on the transformation of formation fractions or ",
350 2
             "setting up a model with use_of_ff = 'min'.\n")
351
      }
352
    }
353
  }
354

355
  # Do not allow fixing formation fractions if we are using the ilr transformation,
356
  # this is not supported
357 2
  if (transform_fractions == TRUE && length(fixed_parms) > 0) {
358 2
    if (any(grepl("^f_", fixed_parms))) {
359 2
      stop("Fixing formation fractions is not supported when using the ilr ",
360 2
           "transformation.")
361
    }
362
  }
363

364
  # Set initial parameter values, including a small increment (salt)
365
  # to avoid linear dependencies (singular matrix) in Eigenvalue based solutions
366 2
  k_salt = 0
367 2
  defaultpar.names <- setdiff(mkinmod$parms, names(parms.ini))
368 2
  for (parmname in defaultpar.names) {
369
    # Default values for rate constants, depending on the parameterisation
370 2
    if (grepl("^k", parmname)) {
371 2
      parms.ini[parmname] = 0.1 + k_salt
372 2
      k_salt = k_salt + 1e-4
373
    }
374
    # Default values for rate constants for reversible binding
375 2
    if (grepl("free_bound$", parmname)) parms.ini[parmname] = 0.1
376 2
    if (grepl("bound_free$", parmname)) parms.ini[parmname] = 0.02
377
    # Default values for IORE exponents
378 2
    if (grepl("^N", parmname)) parms.ini[parmname] = 1.1
379
    # Default values for the FOMC, DFOP and HS models
380 2
    if (parmname == "alpha") parms.ini[parmname] = 1
381 2
    if (parmname == "beta") parms.ini[parmname] = 10
382 2
    if (parmname == "k1") parms.ini[parmname] = 0.1
383 2
    if (parmname == "k2") parms.ini[parmname] = 0.01
384 2
    if (parmname == "tb") parms.ini[parmname] = 5
385 2
    if (parmname == "g") parms.ini[parmname] = 0.5
386 2
    if (parmname == "kmax") parms.ini[parmname] = 0.1
387 2
    if (parmname == "k0") parms.ini[parmname] = 0.0001
388 2
    if (parmname == "r") parms.ini[parmname] = 0.2
389
  }
390
  # Default values for formation fractions in case they are present
391 2
  for (obs_var in obs_vars) {
392 2
    origin <- mkinmod$map[[obs_var]][[1]]
393 2
    f_names <- mkinmod$parms[grep(paste0("^f_", origin), mkinmod$parms)]
394 2
    if (length(f_names) > 0) {
395
      # We need to differentiate between default and specified fractions
396
      # and set the unspecified to 1 - sum(specified)/n_unspecified
397 2
      f_default_names <- intersect(f_names, defaultpar.names)
398 2
      f_specified_names <- setdiff(f_names, defaultpar.names)
399 2
      sum_f_specified = sum(parms.ini[f_specified_names])
400 2
      if (sum_f_specified > 1) {
401 2
        stop("Starting values for the formation fractions originating from ",
402 2
             origin, " sum up to more than 1.")
403
      }
404 2
      if (mkinmod$spec[[obs_var]]$sink) n_unspecified = length(f_default_names) + 1
405
      else {
406 2
        n_unspecified = length(f_default_names)
407
      }
408 2
      parms.ini[f_default_names] <- (1 - sum_f_specified) / n_unspecified
409
    }
410
  }
411

412
  # Set default for state.ini if appropriate
413 2
  parent_name = names(mkinmod$spec)[[1]]
414 2
  parent_time_0 = subset(observed, time == 0 & name == parent_name)$value
415 2
  parent_time_0_mean = mean(parent_time_0, na.rm = TRUE)
416 2
  if (is.na(parent_time_0_mean)) {
417 0
    state.ini_auto = c(100, rep(0, length(mkinmod$diffs) - 1))
418
  } else {
419 2
    state.ini_auto = c(parent_time_0_mean, rep(0, length(mkinmod$diffs) - 1))
420
  }
421 2
  names(state.ini_auto) <- mod_vars
422

423 2
  if (state.ini[1] == "auto") {
424 2
    state.ini_used <- state.ini_auto
425
  } else {
426 2
    state.ini_used <- state.ini_auto
427 2
    state.ini_good <- intersect(names(mkinmod$diffs), names(state.ini))
428 2
    state.ini_used[state.ini_good] <- state.ini[state.ini_good]
429
  }
430 2
  state.ini <- state.ini_used
431

432
  # Name the inital state variable values if they are not named yet
433 0
  if(is.null(names(state.ini))) names(state.ini) <- mod_vars
434

435
  # Transform initial parameter values for fitting
436 2
  transparms.ini <- transform_odeparms(parms.ini, mkinmod,
437 2
                                       transform_rates = transform_rates,
438 2
                                       transform_fractions = transform_fractions)
439

440
  # Parameters to be optimised:
441
  # Kinetic parameters in parms.ini whose names are not in fixed_parms
442 2
  parms.fixed <- parms.ini[fixed_parms]
443 2
  parms.optim <- parms.ini[setdiff(names(parms.ini), fixed_parms)]
444

445 2
  transparms.fixed <- transform_odeparms(parms.fixed, mkinmod,
446 2
                                       transform_rates = transform_rates,
447 2
                                       transform_fractions = transform_fractions)
448 2
  transparms.optim <- transform_odeparms(parms.optim, mkinmod,
449 2
                                       transform_rates = transform_rates,
450 2
                                       transform_fractions = transform_fractions)
451

452
  # Inital state variables in state.ini whose names are not in fixed_initials
453 2
  state.ini.fixed <- state.ini[fixed_initials]
454 2
  state.ini.optim <- state.ini[setdiff(names(state.ini), fixed_initials)]
455

456
  # Preserve names of state variables before renaming initial state variable
457
  # parameters
458 2
  state.ini.optim.boxnames <- names(state.ini.optim)
459 2
  state.ini.fixed.boxnames <- names(state.ini.fixed)
460 2
  if(length(state.ini.optim) > 0) {
461 2
    names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_")
462
  }
463 2
  if(length(state.ini.fixed) > 0) {
464 2
    names(state.ini.fixed) <- paste(names(state.ini.fixed), "0", sep="_")
465
  }
466

467
  # Decide if the solution of the model can be based on a simple analytical
468
  # formula, the spectral decomposition of the matrix (fundamental system)
469
  # or a numeric ode solver from the deSolve package
470
  # Prefer deSolve over eigen if a compiled model is present and use_compiled
471
  # is not set to FALSE
472 2
  solution_type = match.arg(solution_type)
473 2
  if (solution_type == "analytical" && !is.function(mkinmod$deg_func))
474 2
     stop("Analytical solution not implemented for this model.")
475 2
  if (solution_type == "eigen" && !is.matrix(mkinmod$coefmat))
476 2
     stop("Eigenvalue based solution not possible, coefficient matrix not present.")
477 2
  if (solution_type == "auto") {
478 2
    if (length(mkinmod$spec) == 1 || is.function(mkinmod$deg_func)) {
479 2
      solution_type = "analytical"
480
    } else {
481 2
      if (!is.null(mkinmod$cf) & use_compiled[1] != FALSE) {
482 2
        solution_type = "deSolve"
483
      } else {
484 0
        if (is.matrix(mkinmod$coefmat)) {
485 0
          solution_type = "eigen"
486 0
          if (max(observed$value, na.rm = TRUE) < 0.1) {
487 0
            stop("The combination of small observed values (all < 0.1) and solution_type = eigen is error-prone")
488
          }
489
        } else {
490 0
          solution_type = "deSolve"
491
        }
492
      }
493
    }
494
  }
495

496
  # Get the error model and the algorithm for fitting
497 2
  err_mod <- match.arg(error_model)
498 2
  error_model_algorithm = match.arg(error_model_algorithm)
499 2
  if (error_model_algorithm == "OLS") {
500 0
    if (err_mod != "const") stop("OLS is only appropriate for constant variance")
501
  }
502 2
  if (error_model_algorithm == "auto") {
503 2
    error_model_algorithm = switch(err_mod,
504 2
      const = "OLS", obs = "d_3", tc = "d_3")
505
  }
506 2
  errparm_names <- switch(err_mod,
507 2
    "const" = "sigma",
508 2
    "obs" = paste0("sigma_", obs_vars),
509 2
    "tc" = c("sigma_low", "rsd_high"))
510 2
  errparm_names_optim <- if (error_model_algorithm == "OLS") NULL else errparm_names
511

512
  # Define starting values for the error model
513 2
  if (err.ini[1] != "auto") {
514 0
    if (!identical(names(err.ini), errparm_names)) {
515 0
      stop("Please supply initial values for error model components ", paste(errparm_names, collapse = ", "))
516
    } else {
517 0
      errparms = err.ini
518
    }
519
  } else {
520 2
    if (err_mod == "const") {
521 2
      errparms = 3
522
    }
523 2
    if (err_mod == "obs") {
524 2
      errparms = rep(3, length(obs_vars))
525
    }
526 2
    if (err_mod == "tc") {
527 2
      errparms <- c(sigma_low = 0.1, rsd_high = 0.1)
528
    }
529 2
    names(errparms) <- errparm_names
530
  }
531 2
  if (error_model_algorithm == "OLS") {
532 2
    errparms_optim <- NULL
533
  } else {
534 2
    errparms_optim <- errparms
535
  }
536

537
  # Unique outtimes for model solution.
538 2
  outtimes <- sort(unique(observed$time))
539

540
  # Define the objective function for optimisation, including (back)transformations
541 2
  cost_function <- function(P, trans = TRUE, OLS = FALSE, fixed_degparms = FALSE, fixed_errparms = FALSE, update_data = TRUE, ...)
542
  {
543 2
    assign("calls", calls + 1, inherits = TRUE) # Increase the model solution counter
544

545
    # Trace parameter values if requested and if we are actually optimising
546 0
    if(trace_parms & update_data) cat(format(P, width = 10, digits = 6), "\n")
547

548
    # Determine local parameter values for the cost estimation
549 2
    if (is.numeric(fixed_degparms)) {
550 2
      cost_degparms <- fixed_degparms
551 2
      cost_errparms <- P
552 2
      degparms_fixed = TRUE
553
    } else {
554 2
      degparms_fixed = FALSE
555
    }
556

557 2
    if (is.numeric(fixed_errparms)) {
558 2
      cost_degparms <- P
559 2
      cost_errparms <- fixed_errparms
560 2
      errparms_fixed = TRUE
561
    } else {
562 2
      errparms_fixed = FALSE
563
    }
564

565 2
    if (OLS) {
566 2
      cost_degparms <- P
567 2
      cost_errparms <- numeric(0)
568
    }
569

570 2
    if (!OLS & !degparms_fixed & !errparms_fixed) {
571 2
      cost_degparms <- P[1:(length(P) - length(errparms))]
572 2
      cost_errparms <- P[(length(cost_degparms) + 1):length(P)]
573
    }
574

575
    # Initial states for t0
576 2
    if(length(state.ini.optim) > 0) {
577 2
      odeini <- c(cost_degparms[1:length(state.ini.optim)], state.ini.fixed)
578 2
      names(odeini) <- c(state.ini.optim.boxnames, state.ini.fixed.boxnames)
579
    } else {
580 0
      odeini <- state.ini.fixed
581 0
      names(odeini) <- state.ini.fixed.boxnames
582
    }
583

584 2
    odeparms.optim <- cost_degparms[(length(state.ini.optim) + 1):length(cost_degparms)]
585

586 2
    if (trans == TRUE) {
587 2
      odeparms <- c(odeparms.optim, transparms.fixed)
588 2
      parms <- backtransform_odeparms(odeparms, mkinmod,
589 2
                                      transform_rates = transform_rates,
590 2
                                      transform_fractions = transform_fractions)
591
    } else {
592 2
      parms <- c(odeparms.optim, parms.fixed)
593
    }
594

595
    # Solve the system with current parameter values
596 2
    if (solution_type == "analytical") {
597 2
      observed$predicted <- mkinmod$deg_func(observed, odeini, parms)
598
    } else {
599 2
      out <- mkinpredict(mkinmod, parms,
600 2
                         odeini, outtimes,
601 2
                         solution_type = solution_type,
602 2
                         use_compiled = use_compiled,
603 2
                         method.ode = method.ode,
604 2
                         atol = atol, rtol = rtol, ...)
605

606 2
      observed_index <- cbind(as.character(observed$time), as.character(observed$name))
607 2
      observed$predicted <- out[observed_index]
608
    }
609

610
    # Define standard deviation for each observation
611 2
    if (err_mod == "const") {
612 2
      observed$std <- if (OLS) NA else cost_errparms["sigma"]
613
    }
614 2
    if (err_mod == "obs") {
615 2
      std_names <- paste0("sigma_", observed$name)
616 2
      observed$std <- cost_errparms[std_names]
617
    }
618 2
    if (err_mod == "tc") {
619 2
      observed$std <- sqrt(cost_errparms["sigma_low"]^2 + observed$predicted^2 * cost_errparms["rsd_high"]^2)
620
    }
621

622
    # Calculate model cost
623 2
    if (OLS) {
624
      # Cost is the sum of squared residuals
625 2
      cost <- with(observed, sum((value - predicted)^2))
626
    } else {
627
      # Cost is the negative log-likelihood
628 2
      cost <- - with(observed,
629 2
        sum(dnorm(x = value, mean = predicted, sd = std, log = TRUE)))
630
    }
631

632
    # We update the current cost and data during the optimisation, not
633
    # during hessian calculations
634 2
    if (update_data) {
635

636 2
      assign("current_data", observed, inherits = TRUE)
637

638 2
      if (cost < cost.current) {
639 2
        assign("cost.current", cost, inherits = TRUE)
640 0
        if (!quiet) cat(ifelse(OLS, "Sum of squared residuals", "Negative log-likelihood"),
641 0
                        " at call ", calls, ": ", signif(cost.current, 6), "\n", sep = "")
642
      }
643
    }
644 2
    return(cost)
645
  }
646

647 2
  names_optim <- c(names(state.ini.optim),
648 2
                   names(transparms.optim),
649 2
                   errparm_names_optim)
650 2
  n_optim <- length(names_optim)
651

652
  # Define lower and upper bounds other than -Inf and Inf for parameters
653
  # for which no internal transformation is requested in the call to mkinfit
654
  # and for optimised error model parameters
655 2
  lower <- rep(-Inf, n_optim)
656 2
  upper <- rep(Inf, n_optim)
657 2
  names(lower) <- names(upper) <- names_optim
658

659
  # IORE exponents are not transformed, but need a lower bound
660 2
  index_N <- grep("^N", names(lower))
661 2
  lower[index_N] <- 0
662

663 2
  if (!transform_rates) {
664 2
    index_k <- grep("^k_", names(lower))
665 2
    lower[index_k] <- 0
666 2
    index_k__iore <- grep("^k__iore_", names(lower))
667 2
    lower[index_k__iore] <- 0
668 2
    other_rate_parms <- intersect(c("alpha", "beta", "k1", "k2", "tb", "r"), names(lower))
669 2
    lower[other_rate_parms] <- 0
670
  }
671

672 2
  if (!transform_fractions) {
673 2
    index_f <- grep("^f_", names(upper))
674 2
    lower[index_f] <- 0
675 2
    upper[index_f] <- 1
676 2
    other_fraction_parms <- intersect(c("g"), names(upper))
677 2
    lower[other_fraction_parms] <- 0
678 2
    upper[other_fraction_parms] <- 1
679
  }
680

681 2
  if (err_mod == "const") {
682 2
    if (error_model_algorithm != "OLS") {
683 0
      lower["sigma"] <- 0
684
    }
685
  }
686 2
  if (err_mod == "obs") {
687 2
    index_sigma <- grep("^sigma_", names(lower))
688 2
    lower[index_sigma] <- 0
689
  }
690 2
  if (err_mod == "tc") {
691 2
    lower["sigma_low"] <- 0
692 2
    lower["rsd_high"] <- 0
693
  }
694

695
  # Counter for cost function evaluations
696 2
  calls = 0
697 2
  cost.current <- Inf
698 2
  out_predicted <- NA
699 2
  current_data <- NA
700

701
  # Show parameter names if tracing is requested
702 0
  if(trace_parms) cat(format(names_optim, width = 10), "\n")
703

704
  #browser()
705

706
  # Do the fit and take the time until the hessians are calculated
707 2
  fit_time <- system.time({
708 2
    degparms <- c(state.ini.optim, transparms.optim)
709 2
    n_degparms <- length(degparms)
710 2
    degparms_index <- seq(1, n_degparms)
711 2
    errparms_index <- seq(n_degparms + 1, length.out = length(errparms))
712

713 2
    if (error_model_algorithm == "d_3") {
714 0
      if (!quiet) message("Directly optimising the complete model")
715 2
      parms.start <- c(degparms, errparms)
716 2
      fit_direct <- try(nlminb(parms.start, cost_function,
717 2
        lower = lower[names(parms.start)],
718 2
        upper = upper[names(parms.start)],
719 2
        control = control, ...))
720 2
      if (!inherits(fit_direct, "try-error")) {
721 2
        fit_direct$logLik <- - cost.current
722 2
        cost.current <- Inf # reset to avoid conflict with the OLS step
723 2
        data_direct <- current_data # We need this later if it was better
724 2
        direct_failed = FALSE
725
      } else {
726 0
        direct_failed = TRUE
727
      }
728
    }
729 2
    if (error_model_algorithm != "direct") {
730 0
      if (!quiet) message("Ordinary least squares optimisation")
731 2
      fit <- nlminb(degparms, cost_function, control = control,
732 2
        lower = lower[names(degparms)],
733 2
        upper = upper[names(degparms)], OLS = TRUE, ...)
734 2
      degparms <- fit$par
735

736
      # Get the maximum likelihood estimate for sigma at the optimum parameter values
737 2
      current_data$residual <- current_data$value - current_data$predicted
738 2
      sigma_mle <- sqrt(sum(current_data$residual^2)/nrow(current_data))
739

740
      # Use that estimate for the constant variance, or as first guess if err_mod = "obs"
741 2
      if (err_mod != "tc") {
742 2
        errparms[names(errparms)] <- sigma_mle
743
      }
744 2
      fit$par <- c(fit$par, errparms)
745

746 2
      cost.current <- cost_function(c(degparms, errparms), OLS = FALSE)
747 2
      fit$logLik <- - cost.current
748
    }
749 2
    if (error_model_algorithm %in% c("threestep", "fourstep", "d_3")) {
750 0
      if (!quiet) message("Optimising the error model")
751 2
      fit <- nlminb(errparms, cost_function, control = control,
752 2
        lower = lower[names(errparms)],
753 2
        upper = upper[names(errparms)],
754 2
        fixed_degparms = degparms, ...)
755 2
      errparms <- fit$par
756
    }
757 2
    if (error_model_algorithm == "fourstep") {
758 0
      if (!quiet) message("Optimising the degradation model")
759 2
      fit <- nlminb(degparms, cost_function, control = control,
760 2
        lower = lower[names(degparms)],
761 2
        upper = upper[names(degparms)],
762 2
        fixed_errparms = errparms, ...)
763 2
      degparms <- fit$par
764
    }
765 2
    if (error_model_algorithm %in%
766 2
      c("direct", "twostep", "threestep", "fourstep", "d_3")) {
767 0
      if (!quiet) message("Optimising the complete model")
768 2
      parms.start <- c(degparms, errparms)
769 2
      fit <- nlminb(parms.start, cost_function,
770 2
        lower = lower[names(parms.start)],
771 2
        upper = upper[names(parms.start)],
772 2
        control = control, ...)
773 2
      degparms <- fit$par[degparms_index]
774 2
      errparms <- fit$par[errparms_index]
775 2
      fit$logLik <- - cost.current
776

777 2
      if (error_model_algorithm == "d_3") {
778 2
        d_3_messages = c(
779 2
           direct_failed = "Direct fitting failed, results of three-step fitting are returned",
780 2
           same = "Direct fitting and three-step fitting yield approximately the same likelihood",
781 2
           threestep = "Three-step fitting yielded a higher likelihood than direct fitting",
782 2
           direct = "Direct fitting yielded a higher likelihood than three-step fitting")
783 2
        if (direct_failed) {
784 0
          if (!quiet) message(d_3_messages["direct_failed"])
785 0
          fit$d_3_message <- d_3_messages["direct_failed"]
786
        } else {
787 2
          rel_diff <- abs((fit_direct$logLik - fit$logLik))/-mean(c(fit_direct$logLik, fit$logLik))
788 2
          if (rel_diff < 0.0001) {
789 0
            if (!quiet) message(d_3_messages["same"])
790 2
            fit$d_3_message <- d_3_messages["same"]
791
          } else {
792 2
            if (fit$logLik > fit_direct$logLik) {
793 0
              if (!quiet) message(d_3_messages["threestep"])
794 2
              fit$d_3_message <- d_3_messages["threestep"]
795
            } else {
796 0
              if (!quiet) message(d_3_messages["direct"])
797 2
              fit <- fit_direct
798 2
              fit$d_3_message <- d_3_messages["direct"]
799 2
              degparms <- fit$par[degparms_index]
800 2
              errparms <- fit$par[errparms_index]
801 2
              current_data  <- data_direct
802
            }
803
          }
804
        }
805
      }
806
    }
807 2
    if (err_mod != "const" & error_model_algorithm == "IRLS") {
808 2
      reweight.diff <- 1
809 2
      n.iter <- 0
810 2
      errparms_last <- errparms
811

812 2
      while (reweight.diff > reweight.tol &
813 2
             n.iter < reweight.max.iter) {
814

815 0
        if (!quiet) message("Optimising the error model")
816 2
        fit <- nlminb(errparms, cost_function, control = control,
817 2
          lower = lower[names(errparms)],
818 2
          upper = upper[names(errparms)],
819 2
          fixed_degparms = degparms, ...)
820 2
        errparms <- fit$par
821

822 0
        if (!quiet) message("Optimising the degradation model")
823 2
        fit <- nlminb(degparms, cost_function, control = control,
824 2
          lower = lower[names(degparms)],
825 2
          upper = upper[names(degparms)],
826 2
          fixed_errparms = errparms, ...)
827 2
        degparms <- fit$par
828

829 2
        reweight.diff <- dist(rbind(errparms, errparms_last))
830 2
        errparms_last <- errparms
831

832 2
        fit$par <- c(fit$par, errparms)
833 2
        cost.current <- cost_function(c(degparms, errparms), OLS = FALSE)
834 2
        fit$logLik <- - cost.current
835
      }
836
    }
837

838 2
    fit$hessian <- try(numDeriv::hessian(cost_function, c(degparms, errparms), OLS = FALSE,
839 2
        update_data = FALSE), silent = TRUE)
840 2
    dimnames(fit$hessian) <- list(names(c(degparms, errparms)),
841 2
      names(c(degparms, errparms)))
842

843
    # Backtransform parameters
844 2
    bparms.optim = backtransform_odeparms(degparms, mkinmod,
845 2
      transform_rates = transform_rates,
846 2
      transform_fractions = transform_fractions)
847 2
    bparms.fixed = c(state.ini.fixed, parms.fixed)
848 2
    bparms.all = c(bparms.optim, parms.fixed)
849

850 2
    fit$hessian_notrans <- try(numDeriv::hessian(cost_function, c(bparms.optim, errparms),
851 2
        OLS = FALSE, trans = FALSE, update_data = FALSE), silent = TRUE)
852

853 2
    dimnames(fit$hessian_notrans) <- list(names(c(bparms.optim, errparms)),
854 2
      names(c(bparms.optim, errparms)))
855
  })
856

857 2
  fit$call <- call
858

859 2
  fit$error_model_algorithm <- error_model_algorithm
860

861 2
  if (fit$convergence != 0) {
862 2
    convergence_warning = paste0("Optimisation did not converge:\n", fit$message)
863 2
    summary_warnings <- c(summary_warnings, C = convergence_warning)
864 2
    warning(convergence_warning)
865
  } else {
866 0
    if(!quiet) message("Optimisation successfully terminated.\n")
867
  }
868

869
  # We need to return some more data for summary and plotting
870 2
  fit$solution_type <- solution_type
871 2
  fit$transform_rates <- transform_rates
872 2
  fit$transform_fractions <- transform_fractions
873 2
  fit$reweight.tol <- reweight.tol
874 2
  fit$reweight.max.iter <- reweight.max.iter
875 2
  fit$control <- control
876 2
  fit$calls <- calls
877 2
  fit$time <- fit_time
878

879
  # We also need the model and a model name for summary and plotting
880 2
  fit$mkinmod <- mkinmod
881 2
  fit$mkinmod$name <- mkinmod_name
882 2
  fit$obs_vars <- obs_vars
883

884
  # Residual sum of squares as a function of the fitted parameters
885 2
  fit$rss <- function(P) cost_function(P, OLS = TRUE, update_data = FALSE)
886

887
  # Log-likelihood with possibility to fix degparms or errparms
888 2
  fit$ll <- function(P, fixed_degparms = FALSE, fixed_errparms = FALSE) {
889 2
    - cost_function(P, trans = FALSE, fixed_degparms = fixed_degparms,
890 2
      fixed_errparms = fixed_errparms, OLS = FALSE, update_data = FALSE)
891
  }
892

893
  # Collect initial parameter values in three dataframes
894 2
  fit$start <- data.frame(value = c(state.ini.optim,
895 2
                                    parms.optim, errparms_optim))
896 2
  fit$start$type = c(rep("state", length(state.ini.optim)),
897 2
                     rep("deparm", length(parms.optim)),
898 2
                     rep("error", length(errparms_optim)))
899

900 2
  fit$start_transformed = data.frame(
901 2
      value = c(state.ini.optim, transparms.optim, errparms_optim),
902 2
      lower = lower,
903 2
      upper = upper)
904

905 2
  fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed))
906 2
  fit$fixed$type = c(rep("state", length(state.ini.fixed)),
907 2
                     rep("deparm", length(parms.fixed)))
908

909 2
  fit$data <- data.frame(time = current_data$time,
910 2
                         variable = current_data$name,
911 2
                         observed = current_data$value,
912 2
                         predicted = current_data$predicted)
913

914 2
  fit$data$residual <- fit$data$observed - fit$data$predicted
915

916 2
  fit$atol <- atol
917 2
  fit$rtol <- rtol
918 2
  fit$err_mod <- err_mod
919

920
  # Return different sets of backtransformed parameters for summary and plotting
921 2
  fit$bparms.optim <- bparms.optim
922 2
  fit$bparms.fixed <- bparms.fixed
923

924
  # Return ode and state parameters for further fitting
925 2
  fit$bparms.ode <- bparms.all[mkinmod$parms]
926 2
  fit$bparms.state <- c(bparms.all[setdiff(names(bparms.all), names(fit$bparms.ode))],
927 2
                        state.ini.fixed)
928 2
  names(fit$bparms.state) <- gsub("_0$", "", names(fit$bparms.state))
929

930 2
  fit$errparms <- errparms
931 2
  fit$df.residual <- n_observed - length(c(degparms, errparms))
932

933
  # Assign the class here so method dispatch works for residuals
934 2
  class(fit) <- c("mkinfit")
935

936 2
  if (test_residuals) {
937
    # Check for normal distribution of residuals
938 2
    fit$shapiro.p <- shapiro.test(residuals(fit, standardized = TRUE))$p.value
939 2
    if (fit$shapiro.p < 0.05) {
940 2
      shapiro_warning <- paste("Shapiro-Wilk test for standardized residuals: p = ", signif(fit$shapiro.p, 3))
941 2
      warning(shapiro_warning)
942 2
      summary_warnings <- c(summary_warnings, S = shapiro_warning)
943
    }
944
  }
945

946 2
  fit$summary_warnings <- summary_warnings
947

948 2
  fit$date <- date()
949 2
  fit$version <- as.character(utils::packageVersion("mkin"))
950 2
  fit$Rversion <- paste(R.version$major, R.version$minor, sep=".")
951

952 2
  return(fit)
953
}

Read our documentation on viewing source code .

Loading