Commit 85ffdb79 authored by Chris Jewell's avatar Chris Jewell
Browse files

Interim commit -- interface restructure.

parent 3d904df0
......@@ -110,7 +110,7 @@
#' \tab i.e. \eqn{theta = sum(Human_itl) / sum(lambda_ijtl / theta)}}
#' }
#'
#' \item{\code{fit_params(n_iter = 1000, burn_in = 0, thin = 1,
#' \item{\code{mcmc_params(n_iter = 1000, burn_in = 0, thin = 1,
#' n_r = ceiling(private$nTypes * 0.2), params_fix = NULL)}}{when called, sets the mcmc
#' parameters.
#'
......@@ -130,7 +130,7 @@
#' \item{\code{update(n_iter, append = TRUE)}}{when called, updates the \code{HaldDP}
#' model by running \code{n_iter} iterations.
#'
#' If missing \code{n_iter}, the \code{n_iter} last set using \code{fit_params()}
#' If missing \code{n_iter}, the \code{n_iter} last set using \code{mcmc_params()}
#' or \code{update()} is used.
#'
#' \code{append}
......@@ -156,7 +156,7 @@
#' \code{theta} (an array \code{theta[types, iters]}), \code{s} (an array
#' \code{s[types, iters]}), and \code{r} (an array \code{r[types, sources, times]})).}
#'
#' \item{\code{print_fit_params}}{returns a list of fitting parameters (\code{n_iter},
#' \item{\code{print_mcmc_params}}{returns a list of fitting parameters (\code{n_iter},
#' \code{append}, \code{burn_in}, \code{thin}, \code{params_fix} (R6 class with members
#' \code{alpha}, \code{q}, \code{r})).}
#'
......@@ -249,14 +249,14 @@
#' Location = rep("A", 6))
#' priors <- list(a_alpha = 1, a_r = 0.1, a_theta = 0.01, b_theta = 0.00001)
#' res <- HaldDP$new(data = campy, k = prevs, priors = priors, a_q = 0.1)
#' res$fit_params(n_iter = 100, burn_in = 10, thin = 1)
#' res$mcmc_params(n_iter = 100, burn_in = 10, thin = 1)
#' res$update()
#'
#' dat <- res$print_data()
#' init <- res$print_inits()
#' prior <- res$print_priors()
#' acceptance <- res$print_acceptance()
#' fit_params <- res$print_fit_params()
#' mcmc_params <- res$print_mcmc_params()
#'
#' res$plot_heatmap(iters = 10:100, hclust_method = "complete")
#'
......@@ -389,143 +389,31 @@ HaldDP <- R6::R6Class(
}
private$updaters = steps
},
set_data = function(data)
checkin_data = function(y,x,k)
{
if (!is.data.frame(data))
stop("data must be a data frame.")
if (!all(c("Human", "Type", "Time", "Location") %in% colnames(data)))
stop("The data must have columns names Human, Type, Time, and Location.")
source_names <-
colnames(data)[!(colnames(data) %in% c("Human", "Type", "Time", "Location"))]
data <- na.omit(data)
## check all data values are positive, numeric, and whole numbers
check_data <-
all(sapply(data[, c("Human", source_names)], function(x)
return(all(
isFiniteInteger(x)
) & all(x >= 0))))
if (!check_data)
stop("All human and source data values must be positive integers.")
## check that source data is the same for each location within time
for (times in 1:length(unique(data$Time))) {
if (length(unique(data$Location)) > 1) {
tmp <- list()
for (locations in 1:length(unique(data$Location))) {
tmp[[locations]] <- subset(data,
Time == unique(data$Time)[times] &
Location == unique(data$Location)[locations])[, !(names(data) %in% c("Human", "Location", "Time"))]
}
combs <- combn(length(unique(data$Location)), 2)
if (!all(sapply(1:ncol(combs) , function(x)
nrow(setdiff(tmp[[combs[1, x]]], tmp[[combs[2, x]]])) == 0)))
stop("Source data must be identical for each location within a time.")
}
}
## check all types have at least 1 source case over all times
check_non0 <-
matrix(NA, nrow = length(unique(data$Type)), ncol = length(unique(data$Time)))
for (times in 1:length(unique(data$Time))) {
tmp <-
subset(data,
Time == unique(data$Time)[times] &
Location == unique(data$Location)[1])[, c(source_names)]
tmp <- apply(tmp, 1, function(x)
sum(x) <= 0)
names(tmp) <- NULL
if (times > 1)
tmp <- tmp[match(
subset(
data,
Time == unique(data$Time)[1] &
Location == unique(data$Location)[1]
)$Type,
subset(
data,
Time == unique(data$Time)[times] &
Location == unique(data$Location)[1]
)$Type
)] # same order of types
check_non0[, times] <- tmp
}
## if all source data for a particular time, type combo are 0, then stop
if (!all(!apply(check_non0, 1, function(x)
sum(x) >= length(unique(data$Time)))))
stop("One or more type-time combinations has 0 source cases for all sources.")
data$Time <- as.factor(data$Time)
data$Location <- as.factor(data$Location)
data$Type <- as.factor(data$Type)
data$Type <- as.factor(data$Type)
data$Time <- as.factor(data$Time)
data$Location <- as.factor(data$Location)
private$nTypes <- length(unique(data$Type))
private$nTimes <- length(unique(data$Time))
private$nLocations <- length(unique(data$Location))
private$nSources <- length(source_names)
if (dim(unique(data[, c("Type", "Time", "Location")]))[1] != (private$nTypes * private$nTimes * private$nLocations))
stop(
"data must have a non-NA count for the human and source sample columns for each time, type and location combination."
)
private$namesTypes <-
as.factor(paste(gtools::mixedsort(unique(data$Type))))
private$namesTimes <-
as.factor(paste(gtools::mixedsort(unique(data$Time))))
private$namesLocations <-
as.factor(paste(gtools::mixedsort(unique(data$Location))))
private$namesSources <-
as.factor(paste(gtools::mixedsort(source_names)))
# Construct X
# Source data does not have location, so
# take from 1st Location only
dx = split(data[, c(as.character(private$namesSources)
, 'Type', 'Time')], data$Location)[[1]]
dx = melt(dx,
measure.vars = as.character(private$namesSources),
variable.name = 'Source')
private$X = tryCatch(
reshape2::acast(dx, Type ~ Source ~ Time, value.var = 'value'),
condition = function(c)
stop('Irregular source cases. Check Type/Source/Time for consistency')
)
names(dimnames(private$X)) = c('Type', 'Source', 'Time')
if(any(is.na(private$X))) stop('NA in source data')
# Construct y
private$y = tryCatch(
reshape2::acast(data[, c('Human', 'Type', 'Time', 'Location')],
Type ~ Time ~ Location, value.var = 'Human'),
condition = function(c)
stop('Irregular human cases. Check Type/Time/Location consistency.')
)
names(dimnames(private$y)) = c('Type', 'Time', 'Location')
if (any(is.na(private$y)))
stop('NA in human data')
},
set_k = function(k)
{
if (!is.data.frame(k))
stop("k must be a data frame.")
if (!all(c("Value", "Source", "Time") %in% colnames(k)))
stop("k must have columns named Value, Source and Time.")
if (!setequal(unique(k$Time), private$namesTimes))
stop("The times in the Time column of k must be the same as those in the data.")
if (!setequal(unique(k$Source), private$namesSources))
stop("The sources in the Source column of k must be the same as those in the data.")
private$k = tryCatch(
acast(k, Source ~ Time, value.var = 'Value'),
condition = function(c)
stop('k must have a single number for each time, source combination. Check Source/Time combinations for regularity.')
)
if (!all(is.finite(private$k) & private$k >=0 & private$k <=1))
stop("Prevalence value outside [0,1]. Check each Source/Time combination has a value.")
# Check dimension mappings
if(!identical(dimnames(y)$type, dimnames(x)$type))
stop("Types in x and y do not match")
if(!identical(dimnames(y)$time, dimnames(x)$time))
stop("Times in x and y do not match")
if(!identical(dimnames(x)$time, dimnames(k)$time))
stop("Times in x and k do not match")
if(!identical(dimnames(x)$source, dimnames(k)$source))
stop("Sources in x and k do not match")
private$y = y$x
private$X = x$x
private$k = k$x
private$nTypes = dim(private$y)[1]
private$nTimes = dim(private$y)[2]
private$nLocations = dim(private$y)[3]
private$nSources = dim(private$x)[2]
private$namesTypes = dimnames(private$y)$type
private$namesTimes = dimnames(private$y)$time
private$namesLocations = dimnames(private$y)$location
private$namesSources = dimnames(private$x)$source
},
set_a_q = function(a_q)
{
......@@ -584,7 +472,7 @@ HaldDP <- R6::R6Class(
!isFinitePositive(priors$a_r))
stop("priors$a_r must be a data frame or a single number.")
}
browser()
a_r <- array(
NA,
dim = c(private$nTypes, private$nSources, private$nTimes),
......@@ -860,90 +748,6 @@ HaldDP <- R6::R6Class(
n_r <= private$nTypes)
private$n_r <- n_r
},
flatten_alpha = function(object)
{
res <- NULL
ncol_old = 0
for (times in 1:dim(object)[2]) {
for (locations in 1:dim(object)[3]) {
res <- cbind(res, t(object[, times, locations,]))
colnames(res)[(ncol_old + 1):dim(res)[2]] <-
paste(
"alpha",
colnames(res[, (ncol_old + 1):dim(res)[2]]),
dimnames(object)$time[times],
dimnames(object)$location[locations],
sep = "_"
)
ncol_old <- dim(res)[2]
}
}
return(res)
},
flatten_q_s = function(object, names)
{
res <- t(object)
colnames(res) <- paste(names, colnames(res), sep = "_")
return(res)
},
flatten_r = function(object)
{
res <- NULL
ncol_old = 0
for (times in 1:dim(object)[3]) {
for (types in 1:dim(object)[1]) {
res <- cbind(res, t(object[types, , times,]))
colnames(res)[(ncol_old + 1):dim(res)[2]] <-
paste("r",
colnames(res[, (ncol_old + 1):dim(res)[2]]),
dimnames(object)$time[times],
dimnames(object)$type[types],
sep = "_")
ncol_old <- dim(res)[2]
}
}
return(res)
},
flatten_lambda_i = function(object)
{
res <- NULL
ncol_old = 0
for (times in 1:dim(object)[2]) {
for (locations in 1:dim(object)[3]) {
res <- cbind(res, t(object[, times, locations,]))
colnames(res)[(ncol_old + 1):dim(res)[2]] <-
paste(
"lambda",
colnames(res[, (ncol_old + 1):dim(res)[2]]),
dimnames(object)$time[times],
dimnames(object)$location[locations],
sep = "_"
)
ncol_old <- dim(res)[2]
}
}
return(res)
},
flatten_lambda_j = function(object)
{
res <- NULL
ncol_old = 0
for (times in 1:dim(object)[2]) {
for (locations in 1:dim(object)[3]) {
res <- cbind(res, t(object[, times, locations,]))
colnames(res)[(ncol_old + 1):dim(res)[2]] <-
paste(
"lambda",
colnames(res[, (ncol_old + 1):dim(res)[2]]),
dimnames(object)$time[times],
dimnames(object)$location[locations],
sep = "_"
)
ncol_old <- dim(res)[2]
}
}
return(res)
},
calc_CI = function(x, alpha, CI_type)
{
# x is the MCMC output
......@@ -1246,11 +1050,14 @@ HaldDP <- R6::R6Class(
),
public = list(
#TODO : change r init to an x/colsums(x) and remove from model!
initialize = function(data, k, priors, a_q, inits = NULL)
initialize = function(y, x, k, priors, a_q, inits = NULL)
{
## read in data
private$set_data(data) # sets y, X, names and number of sources, types, times and locations
private$set_k(k) # sets prevalences
if(!identical(class(y), c("Y","Data","R6")) )
stop("y must be of type sourceR::Y")
if(!identical(class(x), c("X","Data","R6")) )
stop("x must be of type sourceR::X")
if(!identical(class(x), c("Prev","Data","R6")))
private$checkin_data(y,x,k)
## read in priors
private$set_priors(priors)
......@@ -1279,9 +1086,9 @@ HaldDP <- R6::R6Class(
theta = private$inits$theta,
alpha = private$inits$alpha
)
self$fit_params()
self$mcmc_params()
},
fit_params = function(n_iter = 1000,
mcmc_params = function(n_iter = 1000,
burn_in = 0,
thin = 1,
n_r = ceiling(private$nTypes * 0.2),
......@@ -1295,7 +1102,6 @@ HaldDP <- R6::R6Class(
private$set_update_schema(update_schema) # Builds the schema here
private$reset_posterior() # Resets posterior counters
},
# Run the MCMC. If n_iter is specified,
# runs the algorithm until n_iter iterations
# have been added to the posterior, taking into
......@@ -1369,7 +1175,7 @@ HaldDP <- R6::R6Class(
{
return(private$inits)
},
print_fit_params = function()
print_mcmc_params = function()
{
return(
list(
......@@ -1457,25 +1263,7 @@ HaldDP <- R6::R6Class(
})
if (isTRUE(flatten)) {
res <- do.call(cbind, lapply(params, function(x) {
switch(
x,
"alpha" = private$flatten_alpha(res$alpha),
"q" = private$flatten_q_s(res$q, names='q'),
"r" = private$flatten_r(res$r),
"lambda_i" = private$flatten_lambda_i(res$lambda_i),
"lambda_j" = private$flatten_lambda_j(res$lambda_j),
"lambda_j_prop" = private$flatten_lambda_j(res$lambda_j),
"s" = private$flatten_q_s(res$s, names='s'),
stop("Unrecognised model component")
)
}))
res <- as.data.frame(res)
# if ("s" %in% params) {
# tmp <-
# as.data.frame(private$flatten_q_s(res$s, names = "s"))
# res <- cbind(res, tmp)
# }
res = lapply(res, melt)
}
res
},
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment