Commit e5ba6da3 authored by Chris Jewell's avatar Chris Jewell
Browse files

Resolved commit merge in R/interface.R

parent 614b5ad1
......@@ -165,7 +165,8 @@ HaldDP <- R6::R6Class(
private = list(
## DATA
y = NULL,
X = NULL, ## 3D array [sources, type, time] giving the number of positive samples
X = NULL,
## 3D array [sources, type, time] giving the number of positive samples
k = NULL,
nTypes = NULL,
......@@ -189,8 +190,10 @@ HaldDP <- R6::R6Class(
initialize = function(priors) {
self$a_theta <- priors$a_theta ## single number
self$b_theta <- priors$b_theta ## single number
self$a_r <- priors$a_r ## DP prior for each type, source, time, location
self$a_alpha <- priors$a_alpha ## DP prior for each source, time, location
self$a_r <-
priors$a_r ## DP prior for each type, source, time, location
self$a_alpha <-
priors$a_alpha ## DP prior for each source, time, location
}
)
),
......@@ -204,10 +207,13 @@ HaldDP <- R6::R6Class(
alpha = NULL,
r = NULL,
initialize = function(inits) {
self$theta <- inits$theta ## vector of length number of groups (i.e. no of unique values in s)
self$theta <-
inits$theta ## vector of length number of groups (i.e. no of unique values in s)
self$s <- inits$s ## implement uuid thing
self$alpha <- inits$alpha ## 3D array [source, time, location]
self$r <- inits$r ## 3D array [source, type, time] giving the relative prevalences
self$alpha <-
inits$alpha ## 3D array [source, time, location]
self$r <-
inits$r ## 3D array [source, type, time] giving the relative prevalences
}
)
),
......@@ -241,251 +247,117 @@ HaldDP <- R6::R6Class(
## MODEL and RESULTS
a_q = NULL,
DPModel_impl = NULL,
posterior_class = R6::R6Class(
"posterior_class",
public = list(
q = NULL,
s = NULL,
alpha = NULL,
r = NULL,
lambda_i = NULL,
lambda_j = NULL,
lambda_j_prop = NULL,
initialize = function(nSources, nTimes,
nLocations, nTypes, n_iter,
namesSources, namesTimes,
namesLocations, namesTypes,
namesIters) {
self$alpha <- array(
dim = c(nSources,
nTimes,
nLocations,
n_iter),
dimnames = list(
source = namesSources,
time = namesTimes,
location = namesLocations,
iter = namesIters
)
)
self$q <- array(
dim = c(
nTypes,
n_iter
),
dimnames = list(
type = namesTypes, iter = namesIters
)
)
self$s <- array(
dim = c(
nTypes,
n_iter
),
dimnames = list(
type = namesTypes, iter = namesIters
)
)
self$r <- array(
dim = c(nTypes,
nSources,
nTimes,
n_iter),
dimnames = list(
type = namesTypes, source = namesSources, time = namesTimes, iter = namesIters
)
)
},
calc_lambda_i = function(n_iter, nTimes,
nLocations, nTypes,
namesTimes,
namesLocations, namesTypes,
namesIters,
k) {
self$lambda_i <- array(
dim = c(nTypes,
nTimes,
nLocations,
n_iter),
dimnames = list(
type = namesTypes,
time = namesTimes,
location = namesLocations,
iter = namesIters
)
)
if (!is.null(self$q)) { # don't try to calculate before any iterations have occured
for (i in 1:n_iter) {
for (times in 1:nTimes) {
for (locations in 1:nLocations) {
self$lambda_i[, times, locations, i] <- self$q[, i] * self$r[, , times, i] %*% (k[, times] * self$alpha[, times, locations, i])
}
}
}
} else {
self$lambda_i <- NULL
}
},
calc_lambda_j = function(n_iter, nSources, nTimes,
nLocations,
namesSources, namesTimes,
namesLocations,
namesIters,
k) {
self$lambda_j <- array(
dim = c(nSources,
nTimes,
nLocations,
n_iter),
dimnames = list(
source = namesSources,
time = namesTimes,
location = namesLocations,
iter = namesIters
)
)
if (!is.null(self$q)) { # don't try to calculate before any iterations have occured
for (i in 1:n_iter) {
for (times in 1:nTimes) {
for (locations in 1:nLocations) {
self$lambda_j[, times, locations, i] <- self$alpha[, times, locations, i] * colSums(self$r[, , times, i] * self$q[, i]) * k[, times]
}
}
}
} else {
self$lambda_j <- NULL
}
},
calc_lambda_j_prop = function(nTimes, nLocations) {
# TODO: check with multiple times and locations
if (is.null(self$lambda_j)) self$calc_lambda_j()
self$lambda_j_prop <- self$lambda_j
for (iter in 1:dim(self$lambda_j)[4]) {
for (times in 1:nTimes) {
for (locations in 1:nLocations) {
self$lambda_j_prop[,times,locations,iter] <- self$lambda_j[,times,locations,iter] / sum(self$lambda_j[,times,locations,iter])
}
}
}
}
)
),
posterior = NULL,
acceptance_class = R6::R6Class(
"acceptance",
public = list(
alpha = NULL,
r = NULL,
initialize = function(nSources = private$nSources,
nTimes = private$nTimes,
nLocations = private$nLocations,
nTypes = private$nTypes,
namesSources = private$namesSources,
namesTimes = private$namesTimes,
namesLocations = private$namesLocations,
namesTypes = private$namesTypes) {
self$alpha <- array(
dim = c(nSources,
nTimes,
nLocations),
dimnames = list(
source = namesSources,
time = namesTimes,
location = namesLocations
)
)
self$r <- array(
dim = c(nTypes,
nSources,
nTimes),
dimnames = list(
type = namesTypes, source = namesSources, time = namesTimes
)
)
}
)
),
acceptance = NULL,
## MCMC updaters for each parameter
r_updaters = NULL,
q_updaters = NULL,
alpha_updaters = NULL,
save_chain_state = function(iter) # FINISHED
save_chain_state = function(iter)
# FINISHED
{
for (time in 1:private$nTimes) {
for (location in 1:private$nLocations) {
private$posterior$alpha[, time, location, iter] <- private$DPModel_impl$alphaNodes[[time]][[location]]$getData()
private$posterior$alpha[, time, location, iter] <-
private$DPModel_impl$alphaNodes[[time]][[location]]$getData()
}
for (sources in 1:private$nSources) { # TODO: change to an lapply?
private$posterior$r[ , sources, time, iter] <- private$DPModel_impl$rNodes[[time]][[sources]]$getData()
for (sources in 1:private$nSources) {
# TODO: change to an lapply?
private$posterior$r[, sources, time, iter] <-
private$DPModel_impl$rNodes[[time]][[sources]]$getData()
}
}
private$posterior$q[, iter] <- private$DPModel_impl$qNodes$getData()
private$posterior$q[, iter] <-
private$DPModel_impl$qNodes$getData()
private$posterior$s[, iter] <- private$DPModel_impl$qNodes$s
},
create_posterior = function() # FINISHED
create_posterior = function()
# FINISHED
{
if (private$append == FALSE | isTRUE(all.equal(private$n_iter_old, 0)))
if (private$append == FALSE |
isTRUE(all.equal(private$n_iter_old, 0)))
{
## create posterior if no posterior exists or if append = FALSE
private$posterior <- private$posterior_class$new(nSources = private$nSources, nTimes = private$nTimes,
nLocations = private$nLocations, nTypes = private$nTypes,
n_iter = private$n_iter,
namesSources = private$namesSources, namesTimes = private$namesTimes,
namesLocations = private$namesLocations, namesTypes = private$namesTypes,
namesIters = 1:private$n_iter)
private$posterior <-
Posterior$new(
nSources = private$nSources,
nTimes = private$nTimes,
nLocations = private$nLocations,
nTypes = private$nTypes,
n_iter = private$n_iter,
namesSources = private$namesSources,
namesTimes = private$namesTimes,
namesLocations = private$namesLocations,
namesTypes = private$namesTypes,
namesIters = 1:private$n_iter
)
} else {
new_post <- private$posterior_class$new(nSources = private$nSources,
nTimes = private$nTimes,
nLocations = private$nLocations,
nTypes = private$nTypes,
n_iter = private$n_iter - private$n_iter_old,
namesSources = private$namesSources, namesTimes = private$namesTimes,
namesLocations = private$namesLocations, namesTypes = private$namesTypes,
namesIters = (private$n_iter_old + 1) : private$n_iter)
private$posterior$alpha <- abind::abind(private$posterior$alpha, new_post$alpha, along = 4)
names(dimnames(private$posterior$alpha)) <- names(dimnames(new_post$alpha))
new_post <- Posterior$new(
nSources = private$nSources,
nTimes = private$nTimes,
nLocations = private$nLocations,
nTypes = private$nTypes,
n_iter = private$n_iter - private$n_iter_old,
namesSources = private$namesSources,
namesTimes = private$namesTimes,
namesLocations = private$namesLocations,
namesTypes = private$namesTypes,
namesIters = (private$n_iter_old + 1):private$n_iter
)
private$posterior$alpha <-
abind::abind(private$posterior$alpha, new_post$alpha, along = 4)
names(dimnames(private$posterior$alpha)) <-
names(dimnames(new_post$alpha))
new_post$alpha <- NULL
private$posterior$q <- abind::abind(private$posterior$q, new_post$q, along = 2)
names(dimnames(private$posterior$q)) <- names(dimnames(new_post$q))
private$posterior$q <-
abind::abind(private$posterior$q, new_post$q, along = 2)
names(dimnames(private$posterior$q)) <-
names(dimnames(new_post$q))
new_post$q <- NULL
private$posterior$s <- abind::abind(private$posterior$s, new_post$s, along = 2)
names(dimnames(private$posterior$s)) <- names(dimnames(new_post$s))
private$posterior$s <-
abind::abind(private$posterior$s, new_post$s, along = 2)
names(dimnames(private$posterior$s)) <-
names(dimnames(new_post$s))
new_post$s <- NULL
private$posterior$r <- abind::abind(private$posterior$r, new_post$r, along = 4)
names(dimnames(private$posterior$r)) <- names(dimnames(new_post$r))
private$posterior$r <-
abind::abind(private$posterior$r, new_post$r, along = 4)
names(dimnames(private$posterior$r)) <-
names(dimnames(new_post$r))
new_post <- NULL
}
},
assign_updaters = function() # FINISHED
assign_updaters = function()
# FINISHED
{
private$r_updaters <- list()
private$alpha_updaters <- list()
# sapply(1:private$nTimes, function(time) {
# private$alpha_updates[[time]] <- list()
# sapply(1:private$nLocations, function(location) {
# is.
# })
# })
for (time in 1:private$nTimes) {
private$alpha_updaters[[time]] <- list()
for (location in 1:private$nLocations) {
if (isTRUE(private$params_fix$alpha)) {
private$alpha_updaters[[time]][[location]] <- function() return(NULL)
private$alpha_updaters[[time]][[location]] <-
function()
return(NULL)
} else {
private$alpha_updaters[[time]][[location]] <- AdaptiveDirMRW$new(private$DPModel_impl$alphaNodes[[time]][[location]],
tune = rep(0.4, private$nSources)) # User specificed starting variance?
private$alpha_updaters[[time]][[location]] <-
AdaptiveDirMRW$new(private$DPModel_impl$alphaNodes[[time]][[location]],
tune = rep(0.4, private$nSources)) # User specificed starting variance?
}
}
private$r_updaters[[time]] <- list()
for (sources in 1:private$nSources) { # TODO: change to an lapply?
for (sources in 1:private$nSources) {
# TODO: change to an lapply?
if (isTRUE(private$params_fix$r)) {
private$r_updaters[[time]][[sources]] <- function() return(NULL)
private$r_updaters[[time]][[sources]] <- function()
return(NULL)
} else {
private$r_updaters[[time]][[sources]] <- AdaptiveLogDirMRW$new(private$DPModel_impl$rNodes[[time]][[sources]],
toupdate = function() sample(private$nTypes, private$n_r),
......@@ -495,37 +367,32 @@ HaldDP <- R6::R6Class(
}
}
if (isTRUE(private$params_fix$q)) {
private$q_updaters <- function() return(NULL)
private$q_updaters <- function()
return(NULL)
} else {
private$q_updaters <- PoisGammaDPUpdate$new(private$DPModel_impl$qNodes)
}
},
test_integer = function(a) # FINISHED
{
return(is.finite(a) & isTRUE(all.equal(a, as.integer(a))))
},
test_logical = function(a) # FINISHED
{
return(is.finite(a) & is.logical(a))
},
test_positive_number = function(a) # FINISHED
{
return(is.finite(a) & a > 0)
},
set_data = function(data) # FINISHED
set_data = function(data)
# FINISHED
{
if (!is.data.frame(data)) stop("data must be a data frame.")
if (!all(c("Human", "Type", "Time", "Location") %in% colnames(data))) stop("The data must have columns names Human, Type, Time, and Location.")
source_names <- colnames(data)[!(colnames(data) %in% c("Human", "Type", "Time", "Location"))]
if (!is.data.frame(data))
stop("data must be a data frame.")
if (!all(c("Human", "Type", "Time", "Location") %in% colnames(data)))
stop("The data must have columns names Human, Type, Time, and Location.")
source_names <-
colnames(data)[!(colnames(data) %in% c("Human", "Type", "Time", "Location"))]
data <- na.omit(data)
## check all data values are positive, numeric, and whole numbers
check_data <- all(sapply(data[, c("Human", source_names)], function(x) return(all(private$test_integer(x)) & all(x >= 0))))
if (!check_data) stop("All human and source data values must be positive integers.")
check_data <-
all(sapply(data[, c("Human", source_names)], function(x)
return(all(
isFiniteInteger(x)
) & all(x >= 0))))
if (!check_data)
stop("All human and source data values must be positive integers.")
data$Time <- as.factor(data$Time)
data$Location <- as.factor(data$Location)
data$Type <- as.factor(data$Type)
......@@ -540,161 +407,229 @@ HaldDP <- R6::R6Class(
private$nSources <- length(source_names)
if (dim(unique(data[, c("Type", "Time", "Location")]))[1] != (private$nTypes * private$nTimes * private$nLocations))
stop("data must have a non-NA count for the human and source sample columns for each time, type and location combination.")
stop(
"data must have a non-NA count for the human and source sample columns for each time, type and location combination."
)
private$namesTypes <- as.factor(paste(gtools::mixedsort(unique(data$Type))))
private$namesTimes <- as.factor(paste(gtools::mixedsort(unique(data$Time))))
private$namesLocations <- as.factor(paste(gtools::mixedsort(unique(data$Location))))
private$namesSources <- as.factor(paste(gtools::mixedsort(source_names)))
private$namesTypes <-
as.factor(paste(gtools::mixedsort(unique(data$Type))))
private$namesTimes <-
as.factor(paste(gtools::mixedsort(unique(data$Time))))
private$namesLocations <-
as.factor(paste(gtools::mixedsort(unique(data$Location))))
private$namesSources <-
as.factor(paste(gtools::mixedsort(source_names)))
private$y <- array(
NA, dim = c(private$nTypes, private$nTimes, private$nLocations),
dimnames = list(type = private$namesTypes,
time = private$namesTimes,
location = private$namesLocations)
NA,
dim = c(private$nTypes, private$nTimes, private$nLocations),
dimnames = list(
type = private$namesTypes,
time = private$namesTimes,
location = private$namesLocations
)
)
private$X <- array(
NA, dim = c(private$nTypes, private$nSources, private$nTimes),
dimnames = list(type = private$namesTypes,
source = private$namesSources,
time = private$namesTimes)
NA,
dim = c(private$nTypes, private$nSources, private$nTimes),
dimnames = list(
type = private$namesTypes,
source = private$namesSources,
time = private$namesTimes
)
)
## Extract values from data frame and put into arrays. Surely this is very slow!! TODO: find better way!
for (time in private$namesTimes) {
for (type in private$namesTypes) {
for (sources in private$namesSources) {
tmp_X <- unique(data[which((data$Type == type) & (data$Time == time)),
sources])
if (length(tmp_X) != 1L) stop("The data must have a non-NA row for each time and type for each source.")
tmp_X <-
unique(data[which((data$Type == type) & (data$Time == time)),
sources])
if (length(tmp_X) != 1L)
stop("The data must have a non-NA row for each time and type for each source.")
private$X[which(private$namesTypes == type),
which(private$namesSources == sources),
which(private$namesTimes == time)] <- tmp_X
}
for (location in private$namesLocations) {
tmp_y <- data$Human[which((data$Type == type) & (data$Time == time) & (data$Location == location))]
if (length(tmp_y) != 1L) stop("The data must have a single non-NA row for each time, type, and location for each source.")
tmp_y <-
data$Human[which((data$Type == type) &
(data$Time == time) & (data$Location == location))]
if (length(tmp_y) != 1L)
stop(
"The data must have a single non-NA row for each time, type, and location for each source."
)
private$y[which(private$namesTypes == type),
which(private$namesTimes == time),
which(private$namesLocations == location)] <- tmp_y
which(private$namesLocations == location)] <-
tmp_y
}
}
}
},
set_k = function(k) # FINISHED
set_k = function(k)
# FINISHED
{
if (!is.data.frame(k)) stop("k must be a data frame.")
if (!is.data.frame(k))
stop("k must be a data frame.")
k <- na.omit(k)
if (!all(c("Value", "Source", "Time") %in% colnames(k))) stop("k must have columns named Value, Source and Time.")
if (!all(c("Value", "Source", "Time") %in% colnames(k)))
stop("k must have columns named Value, Source and Time.")
k$Source <- as.factor(k$Source)
k$Time <- as.factor(k$Time)
if (all(paste(gtools::mixedsort(unique(k$Time))) != private$namesTimes)) stop("The times in the Time column of k must be the same as those in the data.")
if (all(paste(gtools::mixedsort(unique(k$Source))) != private$namesSources)) stop("The sources in the Source column of k must be the same as those in the data.")
if (all(paste(gtools::mixedsort(unique(k$Time))) != private$namesTimes))
stop("The times in the Time column of k must be the same as those in the data.")
if (all(paste(gtools::mixedsort(unique(k$Source))) != private$namesSources))
stop("The sources in the Source column of k must be the same as those in the data.")
if (dim(unique(k[, c("Time", "Source")]))[1] != (private$nTimes * private$nSources))
stop("k must have a single number for each time, source combination.")
## check all data values are positive, numeric, and whole numbers
if (!all(is.finite(k$Value)) | !all(k$Value >= 0) | !all(k$Value <= 1))
if (!all(is.finite(k$Value)) |
!all(k$Value >= 0) | !all(k$Value <= 1))
stop("All prevalence values must be numbers between 0 and 1.")
private$k <- array(
NA, dim = c(private$nSources, private$nTimes),
dimnames = list(source = private$namesSources,
time = private$namesTimes)
NA,
dim = c(private$nSources, private$nTimes),
dimnames = list(
source = private$namesSources,
time = private$namesTimes
)
)
## Extract values from data frame and put into arrays. Surely this is very slow!! TODO: find better way!
for (time in private$namesTimes) {
for (sources in private$namesSources) {
tmp_k <- k$Value[which(k$Time == time & k$Source == sources)]
if (length(tmp_k) == 0L) stop("k must have a non-NA row for each time and source.")
private$k[which(private$namesSources == sources), which(private$namesTimes == time)] <- tmp_k
if (length(tmp_k) == 0L)
stop("k must have a non-NA row for each time and source.")
private$k[which(private$namesSources == sources), which(private$namesTimes == time)] <-
tmp_k
}
}
},
set_a_q = function(a_q) # FINISHED
set_a_q = function(a_q)
# FINISHED
{
if (length(a_q) != 1 | !private$test_positive_number(a_q) | a_q <= 0) stop("a_q should be a single positive number")
if (length(a_q) != 1 |
!isFinitePositive(a_q) |
a_q <= 0)
stop("a_q should be a single positive number")
private$a_q <- a_q
},
set_priors = function(priors) # FINISHED
set_priors = function(priors)
# FINISHED
{
if (!is.list(priors) | !all(c("a_alpha", "a_r", "a_theta", "b_theta") %in% names(priors)))
if (!is.list(priors) |
!all(c("a_alpha", "a_r", "a_theta", "b_theta") %in% names(priors)))
stop("The priors must be a list with names a_alpha, a_r, a_theta, and b_theta.")
## theta priors
if (length(priors$a_theta) != 1 | !private$test_positive_number(priors$a_theta))
if (length(priors$a_theta) != 1 |