Commit c2aa89d4 authored by Poppy Miller's avatar Poppy Miller
Browse files

Added man files

parent 7a4c1d73
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/model.R
\docType{data}
\name{DPModel_impl}
\alias{DPModel_impl}
\title{Builds the source attribution model. Is not intended to be used by a regular user. Developers only here!}
\format{An object of class \code{R6ClassGenerator} of length 24.}
\usage{
DPModel_impl
}
\arguments{
\item{y}{3D array of [type, time, location] of the number of human cases}
\item{R}{3D array of normalised relative prevalences for each timepoint [type, source, time]}
\item{Time}{a character vector of timepoint ids matching time dimension in y and R}
\item{Location}{a character vector of location ids matching location dimension in y}
\item{Prev}{a 2D array (matrix) of [source, time].}
\item{a_q}{concentration parameter for the DP}
\item{a_theta}{shape parameter for the Gamma base distribution for the DP}
\item{b_theta}{rate parameter for the Gamma base distribution for the DP}
\item{s}{vector giving group allocation for each type for the DP}
\item{theta}{vector giving values for each group in the DP}
\item{a_r}{3D array of [type, src, time] for the hyperprior on the relative prevalences R}
}
\description{
Builds the source attribution model. Is not intended to be used by a regular user. Developers only here!
}
\keyword{datasets}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/interface.R
\docType{class}
\name{HaldDP}
\alias{HaldDP}
\title{Runs the HaldDP source attribution model}
\format{Object of \code{\link{R6Class}} with methods for creating a HaldDP model,
running the model, and accessing and plotting the results.}
\usage{
HaldDP
}
\value{
Object of \code{\link{HaldDP}} with methods for creating a HaldDP model,
running the model, and accessing and plotting the results.
}
\description{
Runs the HaldDP source attribution model
}
\section{Description}{
This function fits a non-parametric Poisson source attribution model for human cases of
disease. It supports multiple types, sources, times and locations. The number of
human cases for each type, time and location follow a Poisson likelihood.
\deqn{y_{itl}\sim\textsf{Poisson}(\lambda_{itl})}
}
\section{Methods}{
\describe{
\item{\code{new(data, k, priors, a_q, inits = NULL)}}{Constructor takes
\code{data} dataframe (with columns containing the number of human cases named
\code{Human}, columns containing the number of positive source samples
(one column per source), a column with the time id's named \code{Time},
a column with the type id's named \code{Type}, and a column with the source
location id's \code{Location}). The data for the human cases and source
counts must be integers. The data for the time, location and type columns
must be factors. The source counts are currently only allowed to vary over time,
hence they must be repeated for each location within each time.
\code{k} prevalence dataframe (with columns named \code{Value, Time,
Location and Source}). Prevalences must be between 0 and 1 as they are the
proportion of samples that were positive for any type for a given source and time.
\code{priors} list with elements named \code{a_r}, \code{a_alpha}, \code{a_theta} and \code{b_theta},
corresponding to the prior parameters for the \code{r}, \code{alpha}, and base
distribution for the DP parameters respectively.
\tabular{lllll}{
\emph{Parameter} \tab \emph{Prior Distribution} \tab \emph{Prior Parameters}\cr
\code{a_r} \tab Dirichlet(concentration) \tab A single positive number or a data \cr
\tab \tab frame with columns giving the prior values \cr
\tab \tab (named \code{Value}), times (named Time) \cr
\tab \tab and source ids (named \code{Source}. If a\cr
\tab \tab single number is supplied, it will be used for\cr
\tab \tab all times, sources an locations. \cr
\code{a_alpha} \tab Dirichlet(concentration) \tab A single positive number or a dataframe \cr
\tab \tab with columns giving the prior values (named \cr
\tab \tab \code{value}), times (named the name of the \cr
\tab \tab time column in the data), locations (named the \cr
\tab \tab name of the location column in the data) and \cr
\tab \tab the source id (named \code{source_id}). \cr
Type effects base \tab DPM(Gamma(shape, rate), \tab Numerical vector of length 2 for the shape and \cr
distribution (\code{theta}) \tab alpha)\tab rate of the Gamma base distribution.\cr
}
\code{a_q} the Dirichlet Process concentration parameter.
\code{inits} (optional) initial values for the mcmc algorithm. This is a list
that may contain any of the following items: \code{alpha} (a data frame with
columns named \code{Value} contining the initial values, \code{Time},
\code{Location}, \code{Source}), \code{q} (a data frame with columns names
\code{Value} contining the initial values and \code{Type}), and \code{r} (a data
frame a column with the initial r values named \code{Value} (note these must
sum to 1 for each source-time combination), a column with the source id's
named \code{Source}, a column with the time id's named \code{Time}, a column
with the type id's named \code{Type}.)
An optional list giving the starting values for the parameters.
\tabular{lll}{
\emph{Parameter} \tab \emph{Description} \cr
\code{r}
\tab A data frame with columns giving the initial values (named \code{Value}),\cr
\tab times (named Time) and source and type id's (named \code{Source} and Type. \cr
\tab DEFAULT: the default initial values are the maximum likelihood point \cr
\tab estimates of \code{r} from the source matrix (i.e. \eqn{r_ij = x_ij / sum_i=1^n x_ij}).\cr
Source effects (\code{alpha})
\tab A data frame with columns named \code{Value} (containing the initial values), \cr
\tab \code{Source} (containing the source names) and columns giving the time and \cr
\tab location for each parameter (named Location). DEFAULT: The default initial values\cr
\tab for the source effects are drawn the prior distribution (Dirichlet). \cr
Type effects (\code{q})
\tab A data frame with columns giving the initial values (named \code{Value})\cr
\tab and the type ids (named Type). DEFAULT: initialise all type effects to be in \cr
\tab a single group with a theta value calculated as \cr
\tab \eqn{\theta = sum(Human_itl) / sum_l=1^L(sum_t=1^T(sum_i=1^n(sum_j=1^m(alpha_jtl * r_ijt * k_jt))))}. \cr
\tab i.e. \eqn{theta = sum(Human_itl) / sum(lambda_ijtl / theta)}}
}
\item{\code{fit_params(n_iter = 1000, burn_in = 0, thin = 1,
n_r = ceiling(private$nTypes * 0.2), params_fix = NULL)}}{when called, sets the mcmc
parameters.
\code{n_iter} sets the number of iterations returned (after removing
\code{burn_in} and thinning results by \code{thin} i.e. a total of
(n_iter * thin) + burn_in iterations are run)
\code{n_r} is a positive
integer that sets the total number of \code{r_{ijtl}} parameters to be updated
at each time-location-source combination (the default is 20 percent updated
per iteration)
\code{params_fix} is a list with a logical value for any of the model parameters
(any of \code{"alpha", "r", or "q"}). For each parameter, if set to \code{TRUE},
the parameters will be fixed at their initial values and not updated.}
\item{\code{update(n_iter, append = TRUE)}}{when called, updates the \code{HaldDP}
model by running \code{n_iter} iterations.
If missing \code{n_iter}, the \code{n_iter} last set using \code{fit_params()}
or \code{update()} is used.
\code{append}
is a logical value which determines whether the next \code{n_iter} iterations
are appended to any previous iterations, or overwrites them. When
\code{append = TRUE}, the starting values are the last iteration and no
\code{burn_in} is removed. Running the model for the first time, or changing any
model or fitting parameters will set \code{append = FALSE}.}
\item{\code{print_data}}{returns a list containing the human data \code{y}
(an array y[types, times, locations]), the source data \code{X} (an array X[types, sources, times]),
the prevalence data (an array k[sources, times]), the type names, source names,
time names, location names and number of different types, sources, times and locations.
}
\item{\code{print_priors}}{returns a list containing the DP concentration
parameter \code{a_q}, and the priors (R6 class with members named \code{a_alpha}
(members are array \code{a_alpha[sources, times, locations]}), \code{a_r} (an array \code{a_r[types, sources, times]}),
\code{a_theta} and \code{b_theta}).}
\item{\code{print_inits}}{returns an R6 class holding the initial values
(members are \code{alpha} (an array \code{alpha[sources, times, locations]}),
\code{theta} (an array \code{theta[types, iters]}), \code{s} (an array
\code{s[types, iters]}), and \code{r} (an array \code{r[types, sources, times]})).}
\item{\code{print_fit_params}}{returns a list of fitting parameters (\code{n_iter},
\code{append}, \code{burn_in}, \code{thin}, \code{params_fix} (R6 class with members
\code{alpha}, \code{q}, \code{r})).}
\item{\code{print_acceptance}}{returns an R6 class containing the acceptance
rates for each parameter (members are \code{alpha} (an array \code{alpha[sources, times, locations]}),
and \code{r} (an array \code{r[types, sources, times]})).}
\item{\code{extract(params = c("alpha", "q", "s", "r", "lambda_i", "lambda_j", "lambda_j_prop"),
times = NULL, locations = NULL, sources = NULL, types = NULL, iters = NULL,
flatten = FALSE, drop = TRUE)}}{returns a list contining a subset of the parameters
(determined by the \code{params} vector, \code{times}, \code{locations}, \code{sources}, \code{types} and \code{iters}).
If \code{flatten} is set to \code{TRUE}, it returns a dataframe with 1 column per
parameter, otherwise it returns a list containing \code{params} containing a
subset of the following arrays: \code{alpha[Sources, Times, Locations, iters]}, \code{q[Types, iters]},
\code{s[Types, iters]}, \code{r[Types, Sources, Times, iters]}, \code{lambda_i[Types, Times, Locations, iters]},
\code{lambda_j[Sources, Times, Locations, iters]}.
\code{drop}
determines whether to delete the dimensions of an array which have only one
level when \code{flatten = FALSE}.}
\item{\code{summary(alpha = 0.05, params = c("alpha", "q", "s", "r", "lambda_i",
"lambda_j" ,"lambda_j_prop"), times = NULL, locations = NULL, sources = NULL,
types = NULL, iters = NULL, flatten = FALSE, drop = TRUE, CI_type = "c-s")}}{
returns a list contining the
median and credible intervals for a subset of the parameters. The default credible
interval type are Chen-Shao highest posterior density intervals (alternatives
are \code{"percentiles"} and \code{"spin"}).
See \code{extract} for details on the subsetting. \code{lambda_j_prop} returns the
proportion of cases attributed to each source \code{j} and is calculated by dividing
each iteration of \code{lambda_{jtl}} values by their sum within each time \code{t} and location \code{l}.}
\item{\code{plot_heatmap(iters, cols = c("blue","white"), hclust_method = "complete")}}{
Creates a dendrogram and heatmap for the type effect groupings (\code{s} parameter in the model).
This uses the heatmap.2 function from gplots.
\code{iters} is a vector containing the iterations to be used in constructing
the graph. Default is all iterations in posterior.
\code{hclust_method} allows the user to select the method used by \code{stats::hclust} to
cluster the type effect groupings \code{s}.
\code{cols} gives the colours for completely dissimilar (dissimilarity value
of 1), and identical (dissimilarity value of 0). All other values will be in
between the two chosen colours. See ?colorRampPalette for more details..}
}
}
\examples{
data(campy)
zero_rows <- which(apply(campy[,c(2:7)], 1, sum) == 0)
campy <- campy[-zero_rows,]
prevs <- data.frame(Value = 1/c(181/ 239, 113/196, 109/127,
97/595, 165/552, 86/524),
Source = colnames(campy[, 2:7]),
Time = rep(1, 6),
Location = rep("A", 6))
priors <- list(a_alpha = 1, a_r = 1, a_theta = 0.01, b_theta = 0.00001)
res <- HaldDP$new(data = campy, k = prevs, priors = priors, a_q = 1)
res$fit_params(n_iter = 100, burn_in = 10, thin = 1)
res$update()
res$print_data()
res$print_inits()
res$print_priors()
res$print_acceptance()
res$print_fit_params()
res$plot_heatmap(iters = 10:100, hclust_method = "complete")
res$summary(params = c("alpha", "q", "lambda_i"),
times = "1", sources = c("ChickenA", "Bovine"),
iters = 10:100, flatten = TRUE, CI_type = "c-s")
res$extract(params = c("alpha", "r", "q", "lambda_j"),
sources = c("ChickenB", "Ovine"),
types = c("474", "52"),
iters = 50:100, drop = FALSE, flatten = FALSE)
}
\author{
Chris Jewell and Poppy Miller \email{p.miller at lancaster.ac.uk}
}
\references{
Chen, M.-H. and Shao, Q.-M. (1998). Monte Carlo estimation of Bayesian credible and HPD intervals, \emph{Journal of Computational and Graphical Statistics}, 7.
Liu Y, Gelman A, Zheng T (2015). "Simulation-efficient shortest probability intervals." Statistics and Computing.
}
\keyword{datasets}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment