Commit a5ecb881 authored by Chris Jewell's avatar Chris Jewell
Browse files

Implemented flat HDF5 data structure for parameters and occults. This is now...

Implemented flat HDF5 data structure for parameters and occults.  This is now compatible with rhdf5 (see InFERreader) and h5py.
parent 561c7cc6
......@@ -7,6 +7,6 @@ Author: Chris Jewell (c) 2013
Maintainer: Chris Jewell <c.p.jewell@massey.ac.nz>
Description: This package performs Bayesian Inference on epidemic models, and hence provides real-time risk prediction for disease outbreaks.
License: GPLv3
Depends: Rcpp (>= 0.9.10), methods
LinkingTo: Rcpp
Packaged: 2013-03-07 00:41:57 UTC; cpjewell
Depends: R (>= 3.1.0)
Imports: Rcpp (>= 0.9.10), methods, InFERreader
useDynLib(inferfmd)
exportClasses(Posterior,
HD5ParamProxy,
HD5InfecProxy,
SpatPointSINR)
exportClasses(SpatPointSINR)
export(Posterior,
SpatPointSINR,
read.posterior,
write.posterior)
export(SpatPointSINR)
exportMethods(show,
names,
dim,
length,
nrow,
ncol,
"[",
"$",
as.data.frame,
poccult,
summary,
plot,
berp.fit,
exportMethods(berp.fit,
berp.sim)
# Auxilliary helper functions
crn <- function(n)
{
# Takes a number n, and returns a pair
# of integers (x,y) describing a near-square
# grid containing n + m elements, minimising
# m.
rows <- floor(sqrt(n))
cols <- rows
m <- rows*cols - n
if(m == 0) return(as.integer(c(rows,cols)))
m <- -Inf
# Add columns until we have a positive remainder
while(m < 0) {
cols <- cols + 1
m <- rows*cols - n
}
# Now adjust the aspect ratio to minimise m
while(TRUE)
{
mprime <- (cols+1)*(rows-1) - n
if(mprime < m & mprime >= 0 & rows/cols > 0.4)
{
cols <- cols + 1
rows <- rows - 1
m <- mprime
}
else break
}
d <- c(rows,cols)
return(as.integer(d))
}
# GPLv3 here
# Helper functions
getPosteriorLen <- function(filename)
{
return(.Call("getPosteriorLen", filename))
}
getPosteriorModel <- function(filename)
{
return(.Call("getPosteriorModel", filename))
}
#getPosteriorParam <- function(filename, i, j)
#{
# rows <- NULL
# cols <- NULL
#
# # Assess missingness of operators
# if(!missing(i)) rows <- i
# if(!missing(j)) cols <- j
#
# return(.Call("getPosteriorParam", filename))
#}
#
#getPosteriorInfec <- function(filename, i, j)
#{
# rows <- NULL
# cols <- NULL
#
# # Assess missingness of operators
# if(!missing(i)) rows <- i
# if(!missing(j)) cols <- j
#
# return(.Call("getPosteriorInfec", filename))
#}
lookupInfIdxNames <- function(theVector, tags)
{
names(theVector) <- tags[as.numeric(names(theVector))+1]
theVector
}
# Constructor functions
Posterior <- function(filename)
{
new("Posterior", filename=filename)
}
read.posterior <- function(filename)
{
Posterior(filename);
}
write.posterior <- function(posterior, filename, ...)
{
file.copy(posterior@filename, filename, ...)
}
HD5ParamProxy <- function(filename)
{
new("HD5ParamProxy", filename=filename)
}
HD5InfecProxy <- function(filename)
{
new("HD5InfecProxy", filename=filename)
}
.initPosterior <- function() {
# Proxy classes -- these classes provide an interface to the posterior in
# the underlying disc storage.
setClass("HD5ParamProxy",representation(filename="character",tags="character"))
setMethod("initialize","HD5ParamProxy",
function(.Object,filename) {
.Object@filename <- filename
info <- .Call("getPosteriorParamInfo", filename)
.Object@tags <- info$tags
return(.Object)
}
)
setClass("HD5InfecProxy",representation(filename="character",tags="character"))
setMethod("initialize","HD5InfecProxy",
function(.Object,filename) {
.Object@filename <- filename
info <- .Call("getPosteriorInfecInfo", filename)
.Object@tags <- info$tags
return(.Object)
}
)
setClass("Posterior",representation(filename="character", model="character", param="HD5ParamProxy",infec="HD5InfecProxy"))
setMethod("initialize", "Posterior", function(.Object, filename) {
.Object@filename <- filename
.Object@model <- getPosteriorModel(filename)
.Object@param <- HD5ParamProxy(filename)
.Object@infec <- HD5InfecProxy(filename)
return(.Object)
})
# Metadata methods
setMethod("show","Posterior", function(object) cat("Instance of Posterior for model '",object@model,"', length=",length(object),"\n",sep=''))
setMethod("show","HD5ParamProxy", function(object) cat("Instance of HD5ParamProxy\n"))
setMethod("show","HD5InfecProxy", function(object) cat("Instance of HD5InfecProxy\n"))
setMethod("names","Posterior", function(x) c("param","infec","model","filename"))
setMethod("names","HD5ParamProxy", function(x) x@tags)
setMethod("names","HD5InfecProxy", function(x) x@tags)
#setMethod("length", "HD5ParamProxy", function(x) getPosteriorLen(x@filename))
#setMethod("length", "HD5InfecProxy", function(x) getPosteriorLen(x@filename))
setMethod("length", "Posterior", function(x) getPosteriorLen(x@filename))
setGeneric("nrow")
setMethod("nrow", "HD5ParamProxy", function(x) getPosteriorLen(x@filename))
setMethod("nrow", "HD5InfecProxy", function(x) getPosteriorLen(x@filename))
setMethod("dim", "HD5ParamProxy", function(x) c(nrow(x), length(x@tags)))
setMethod("dim", "HD5InfecProxy", function(x) c(nrow(x), length(x@tags)))
setGeneric("ncol")
setMethod("ncol", "HD5ParamProxy", function(x) dim(x)[2])
setMethod("ncol", "HD5InfecProxy", function(x) dim(x)[2])
# Data accessor methods
setMethod("$","Posterior", function(x,name) switch(name,param=x@param, infec=x@infec, filename=x@filename, model=x@model))
setMethod("[","HD5ParamProxy",
function(x,i,j) {
rows <- integer(0)
cols <- integer(0)
if(!missing(i)) {
rows <- (0:(nrow(x)-1))[i] # May seem odd, but it delegates bounds checking to R's internals
}
else rows <- 0:(nrow(x)-1)
if(!missing(j)) {
if(class(j) == "character") {
cols <- match(j,x@tags) - 1
if(any(is.na(cols))) stop("Invalid column specification")
}
else cols <- (0:(length(x@tags)-1))[j]
}
else cols <- 0:(length(x@tags)-1)
.Call("getPosteriorParams",x@filename,rows,cols)
}
)
setMethod("$","HD5ParamProxy",
function(x,name) {
if(name %in% names(x)) x[,name]
else NULL
}
)
setMethod("[","HD5InfecProxy",
function(x,i) {
rows <- integer(0)
cols <- integer(0)
if(!missing(i)) {
rows <- (0:(nrow(x)-1))[i]
}
else rows <- 0:(nrow(x)-1)
# if(!missing(j)) {
# cols <- (1:length(x@tags))[j]
# if(any(cols < 1) | any(cols > length(x@tags))) stop("Col subscript of of bounds")
# }
# else cols <- 1:length(x@tags)
# cols is currently unimplemented
.Call("getPosteriorInfecs",x@filename,rows,cols)
}
)
# Occult probs
setGeneric("poccult", function(x, from, to) standardGeneric("poccult"))
setMethod("poccult", "Posterior", function(x, from, to) .Call("getOccultProb", x@filename, from=1, to=length(x)))
# Coercion
setGeneric("as.data.frame")
setMethod("as.data.frame","HD5ParamProxy", function(x) x[])
setMethod("as.data.frame","HD5InfecProxy", function(x) x[])
# Summary methods
setGeneric("summary")
setMethod("summary","Posterior",
function(object) {
cat("Model:", object@model, "\n")
cat("Num samples:", length(object@param), "\n")
cat("Parameters:\n")
print(summary(object@param))
cat("\nInfections:\n")
print(summary(object@infec))
}
)
setMethod("summary","HD5ParamProxy",
function(object) {
np <- dim(object)[2]
z <- list()
for(i in 1:np) z[[names(object)[i]]] <- summary(object[,i])
fields <- names(z[[1]])
nms <- names(z)
z <- unlist(z,use.names=TRUE)
dim(z) <- c(length(fields),length(nms))
dimnames(z) <- list(fields,nms)
as.table(z)
})
setMethod("summary","HD5InfecProxy",
function(object) {
"To be implemented -- tell me what you want to know about!"
})
# Plot methods
setGeneric("plot")
setMethod("plot","HD5ParamProxy",
function(x) {
np <- dim(x)[2]
gridDim <- crn(np)
par(mfrow=gridDim)
for(i in names(x))
{
plot(x[,i],type='l',main=i)
}
})
} # .initPosterior
......@@ -8,6 +8,5 @@
.initClasses()
.initFittingMethods()
.initPosterior()
.initSimMethods()
}
library(inferfmd)
library(InFERreader)
pop<-read.csv("test.pop")
epi<-read.csv("test.epi")
......
......@@ -9,7 +9,7 @@ MCMCOBJS = mcmc/SmpSirMcmc.o mcmc/GpuLikelihood.o mcmc/GpuLikelihood_gpu.o mcmc
FRAMEWORKOBJS = Framework/Random.o Framework/SpatMetaPop.o Framework/SmpIndividual.o Framework/stlStrTok.o
IFACEOBJS = interface/posterior.o interface/SmpIface.o interface/SpSINRMcmc.o interface/RData.o
IFACEOBJS = interface/SmpIface.o interface/SpSINRMcmc.o interface/RData.o
SIMOBJS = sim/SmpSirSim.o
......
......@@ -2,7 +2,7 @@ AM_CPPFLAGS = -I. -I../data -I../Framework -I../sim -I../mcmc @PKG_CPPFLAGS@
EXT = cpp
OBJS = posterior.o SmpIface.o SpSINRMcmc.o RData.o
OBJS = SmpIface.o SpSINRMcmc.o RData.o
all: $(OBJS)
......
......@@ -334,7 +334,7 @@ RcppExport SEXP SpSINRMcmc(const SEXP population,
// Make output directory
string outputFile(_outputfile[0]);
EpiRisk::PosteriorHDF5Writer output(outputFile, likelihood);
EpiRisk::PosteriorHDF5Writer output(outputFile, likelihood, numIter[0]);
output.AddParameter(epsilon1); output.AddParameter(epsilon2);
output.AddParameter(gamma1);
output.AddParameter(gamma2);
......@@ -376,12 +376,10 @@ RcppExport SEXP SpSINRMcmc(const SEXP population,
mcmc.Update();
output.write();
if(k % 500 == 0)
if(k % 512 == 0)
{
Rcpp::Rcout << "Iteration " << k << std::endl;
output.flush();
}
}
// Wrap up
......
#include "posterior.hpp"
#include <iostream>
#include <cstdlib>
#include <stdexcept>
#include <H5Cpp.h>
#include <H5PacketTable.h>
#include <H5Exception.h>
static char paramPath[] = "posterior/parameters";
static char infecpath[] = "posterior/infections";
static char idsPath[] = "posterior/ids";
typedef struct
{
int idx;
float val;
} ipTuple_t;
void
readTags(H5::DataSet& dataset, Rcpp::CharacterVector& rTags)
{
H5::Attribute tags;
tags = dataset.openAttribute("tags");
H5::DataSpace tagDS = tags.getSpace();
hsize_t* aDim = new hsize_t[tagDS.getSimpleExtentNdims()];
tagDS.getSimpleExtentDims(aDim);
char** readBuff = new char*[aDim[0]];
H5::DataType paramTag_t = tags.getDataType();
tags.read(paramTag_t, readBuff);
for (int i = 0; i < aDim[0]; ++i)
{
rTags.push_back(readBuff[i]);
}
H5Dvlen_reclaim(paramTag_t.getId(), tagDS.getId(), H5P_DEFAULT, readBuff);
delete[] readBuff;
delete[] aDim;
}
hsize_t
getFLPTwidth(H5::DataSet& dataset)
{
H5::ArrayType pType = dataset.getArrayType();
hsize_t* dims = new hsize_t[pType.getArrayNDims()]; // Check is 1!
pType.getArrayDims(dims); // Stored
hsize_t numFields = dims[0];
delete[] dims;
return numFields;
}
RcppExport SEXP
getPosteriorParams(SEXP filename, SEXP rows, SEXP cols)
{
// Rows and cols are 0-based!
// No subscript boundary checking is performed.
Rcpp::CharacterVector _filename(filename);
Rcpp::IntegerVector _rows(rows);
Rcpp::IntegerVector _cols(cols);
Rcpp::List dataList(_cols.length());
try
{
H5::H5File file(_filename[0], H5F_ACC_RDONLY, H5P_DEFAULT, H5P_DEFAULT);
H5::DataSet pds = file.openDataSet(paramPath);
// Open parameters as a packet list
FL_PacketTable parameters(file.getId(), paramPath);
hsize_t totalRecords = parameters.GetPacketCount();
hsize_t numFields = getFLPTwidth(pds);
// Get parameter names
Rcpp::CharacterVector rTags;
Rcpp::CharacterVector allTags;
readTags(pds, allTags);
for (size_t i = 0; i < _cols.length(); ++i)
{
rTags.push_back(allTags[_cols[i]]);
}
// Construct R data.frame
for (int i = 0; i < _cols.length(); ++i)
{
Rcpp::NumericVector myVector(_rows.length());
dataList[i] = myVector;
}
dataList.attr("names") = rTags;
// Copy in data
float* record = new float[numFields];
for (int i = 0; i < _rows.length(); ++i)
{
parameters.GetPacket(_rows[i], record);
for (int j = 0; j < _cols.length(); ++j)
{
Rcpp::NumericVector col = dataList[j];
col[i] = record[_cols[j]];
}
}
delete[] record;
file.close();
}
catch (std::exception& __ex__)
{
forward_exception_to_r(__ex__);
}
catch (H5::Exception& e)
{
::Rf_error(e.getCDetailMsg());
}
catch (...)
{
::Rf_error("c++ exception (unknown reason)");
}
if(_cols.length() == 1) return dataList[0];
else return Rcpp::DataFrame(dataList);
}
RcppExport SEXP
getPosteriorInfecs(SEXP filename, SEXP rows, SEXP cols)
{
// Rows and cols are 0-based!
// No subscript boundary checking is performed
// cols is currently unused
Rcpp::CharacterVector _filename(filename);
Rcpp::IntegerVector _rows(rows);
Rcpp::List data(_rows.length());
Rcpp::List info = getPosteriorInfecInfo(filename);
Rcpp::CharacterVector tags = info[1];
try
{
H5::H5File file(_filename[0], H5F_ACC_RDONLY, H5P_DEFAULT, H5P_DEFAULT);
FL_PacketTable infections(file.getId(), infecpath);
size_t totalRecords = infections.GetPacketCount();
hvl_t buff;
for (size_t i = 0; i < _rows.length(); ++i)
{
infections.GetPacket(_rows[i], &buff);
Rcpp::CharacterVector ids(buff.len);
Rcpp::NumericVector val(buff.len);
ipTuple_t* records = (ipTuple_t*) buff.p;
for (size_t j = 0; j < buff.len; ++j)
{
ids[j] = tags[records[j].idx];
val[j] = records[j].val;
}
free(buff.p);
val.attr("names") = ids;
data[i] = val;
}
file.close();
}
catch (std::exception& __ex__)
{
forward_exception_to_r(__ex__);
}
catch (H5::Exception& e)
{
::Rf_error(e.getCDetailMsg());
}
catch (...)
{
::Rf_error("c++ exception (unknown reason)");
}
return data;
}
RcppExport SEXP
getPosteriorParamInfo(SEXP filename)
{
Rcpp::CharacterVector _filename(filename);
Rcpp::List info(2);
Rcpp::CharacterVector infoNames(2);
infoNames[0] = "length";
infoNames[1] = "tags";
info.attr("names") = infoNames;
try
{
H5::H5File file(_filename[0], H5F_ACC_RDONLY, H5P_DEFAULT, H5P_DEFAULT);
FL_PacketTable parameters(file.getId(), paramPath);
// Extract length
Rcpp::NumericVector length(1);
length[0] = parameters.GetPacketCount();
info[0] = length;
// Extract tags
Rcpp::CharacterVector tags;
H5::DataSet pds = file.openDataSet(paramPath);
readTags(pds, tags);
info[1] = tags;
file.close();
}
catch (std::exception& __ex__)
{
forward_exception_to_r(__ex__);
}
catch (H5::Exception& e)
{
::Rf_error(e.getCDetailMsg());
}
catch (...)
{
::Rf_error("c++ exception (unknown reason)");
}
return info;
}
RcppExport SEXP
getPosteriorInfecInfo(SEXP filename)
{
Rcpp::CharacterVector _filename(filename);
Rcpp::List info(2);