Commit 44c53fa1 authored by Poppy Miller's avatar Poppy Miller
Browse files

Added code to check source data does not have all 0's for a type for all...

Added code to check source data does not have all 0's for a type for all times, and to check it was identical for each location (within a time).
parent 08675db7
......@@ -12,6 +12,9 @@
#' @docType class
#' @name HaldDP
#' @importFrom R6 R6Class
#' @importFrom grDevices col2rgb colorRampPalette
#' @importFrom stats median
#' @import dplyr
#' @export
#'
#' @return Object of \code{\link{HaldDP}} with methods for creating a HaldDP model,
......@@ -225,13 +228,15 @@
#' \deqn{r_{ijt}} is the unknown relative occurrence of type \eqn{i} on source \eqn{j}.
#'
#' \emph{Priors}
#' \deqn{r_{.jt}\sim Dirichlet(a_r_{1jt},..., a_r_{njt})}
#' \deqn{a_{tl}\sim Dirichlet(a_alpha_{1tl},..., a_alpha_{mtl})}
#' \deqn{r_{.jt}\sim Dirichlet(a\_r_{1jt},..., a\_r_{njt})}
#' \deqn{a_{tl}\sim Dirichlet(a\_alpha_{1tl},..., a\_alpha_{mtl})}
#' \deqn{q\sim DP(a_q, Gamma(a_{theta},b_{theta}))}
#' }
#'
#' @references Chen, M.-H. and Shao, Q.-M. (1998). Monte Carlo estimation of Bayesian credible and HPD intervals, \emph{Journal of Computational and Graphical Statistics}, 7.
#' @references Liu Y, Gelman A, Zheng T (2015). "Simulation-efficient shortest probability intervals." Statistics and Computing.
#' @references Chen, M.-H. and Shao, Q.-M. (1998). Monte Carlo estimation of Bayesian
#' credible and HPD intervals, \emph{Journal of Computational and Graphical Statistics}, 7.
#' @references Liu Y, Gelman A, Zheng T (2015). "Simulation-efficient shortest probability
#' intervals." Statistics and Computing.
#' @author Chris Jewell and Poppy Miller \email{p.miller at lancaster.ac.uk}
#'
#' @examples
......@@ -248,19 +253,19 @@
#' res$fit_params(n_iter = 100, burn_in = 10, thin = 1)
#' res$update()
#'
#' res$print_data()
#' res$print_inits()
#' res$print_priors()
#' res$print_acceptance()
#' res$print_fit_params()
#' dat <- res$print_data()
#' init <- res$print_inits()
#' prior <- res$print_priors()
#' acceptance <- res$print_acceptance()
#' fit_params <- res$print_fit_params()
#'
#' res$plot_heatmap(iters = 10:100, hclust_method = "complete")
#'
#' res$summary(params = c("alpha", "q", "lambda_i"),
#' summarys <- res$summary(params = c("alpha", "q", "lambda_i"),
#' times = "1", sources = c("ChickenA", "Bovine"),
#' iters = 10:100, flatten = TRUE, CI_type = "chen-shao")
#'
#' res$extract(params = c("alpha", "r", "q", "lambda_j"),
#' posteriors <- res$extract(params = c("alpha", "r", "q", "lambda_j"),
#' sources = c("ChickenB", "Ovine"),
#' types = c("474", "52"),
#' iters = 50:100, drop = FALSE, flatten = FALSE)
......@@ -477,11 +482,42 @@ HaldDP <- R6::R6Class(
## check all data values are positive, numeric, and whole numbers
check_data <-
all(sapply(data[, c("Human", source_names)], function(x)
return(all(
isFiniteInteger(x)
) & all(x >= 0))))
return(
all(isFiniteInteger(x)) & all(x >= 0)
)
))
if (!check_data)
stop("All human and source data values must be positive integers.")
## check that source data is the same for each location within time
for (times in 1:length(unique(data$Time))) {
if (length(unique(data$Location)) > 1) {
tmp <- list()
for (locations in 1:length(unique(data$Location))) {
tmp[[locations]] <- subset(data, Time == unique(data$Time)[times] &
Location == unique(data$Location)[locations])[, !(names(data) %in% c("Human", "Location", "Time"))]
}
combs <- combn(length(unique(data$Location)), 2)
if (!all(sapply(1:ncol(combs) , function(x)
nrow(setdiff(tmp[[combs[1, x]]], tmp[[combs[2, x]]])) == 0)))
stop("Source data must be identical for each location within a time.")
}
}
## check all types have at least 1 source case over all times
check_non0 <- matrix(NA, nrow = length(unique(data$Type)), ncol = length(unique(data$Time)))
for (times in 1:length(unique(data$Time))) {
tmp <- subset(data, Time == unique(data$Time)[times] & Location == unique(data$Location)[1])[, c(source_names)]
tmp <- apply(tmp, 1, function(x) sum(x) <= 0)
names(tmp) <- NULL
if (times > 1) tmp <- tmp[match(subset(data, Time == unique(data$Time)[1] &
Location == unique(data$Location)[1])$Type,
subset(data, Time == unique(data$Time)[times] &
Location == unique(data$Location)[1])$Type)] # same order of types
check_non0[, times] <- tmp
}
## if all source data for a particular time, type combo are 0, then stop
if (!all(!apply(check_non0, 1, function(x) sum(x) >= length(unique(data$Time)))))
stop("One or more type-time combinations has 0 source cases for all sources.")
data$Time <- as.factor(data$Time)
data$Location <- as.factor(data$Location)
data$Type <- as.factor(data$Type)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment