Commit 8eb0ab69 authored by Chris Jewell's avatar Chris Jewell
Browse files

Added inference summary metrics

CHANGES:

1. Added `data/example_cases.py`, `data/uk_clip.py`, `covid/summary.py`, `summary.py`
2. `summary.py` computes summary values required by reports to SAGE
3. Small changes to `inference.py` and `simulate.py` to reflect baked-in `nu` parameter in `model_spec.py`.
parent 744ee455
"""Epidemic summary measure functions"""
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from covid.impl.util import compute_state
def mean_and_ci(arr, q=(0.025, 0.975), axis=0, name=None):
if name is None:
name = ""
else:
name = name + "_"
q = np.array(q)
mean = tf.reduce_mean(arr, axis=axis)
ci = tfp.stats.percentile(arr, q=q * 100.0, axis=axis)
results = dict()
results[name + "mean"] = mean
for i, qq in enumerate(q):
results[name + str(qq)] = ci[i]
return results
# Reproduction number calculation
def calc_R_it(theta, xi, events, init_state, covar_data):
"""Calculates effective reproduction number for batches of metapopulations
:param theta: a tensor of batched theta parameters [B] + theta.shape
:param xi: a tensor of batched xi parameters [B] + xi.shape
:param events: a [B, M, T, X] batched events tensor
:param init_state: the initial state of the epidemic at earliest inference date
:param covar_data: the covariate data
:return a batched vector of R_it estimates
"""
def r_fn(args):
theta_, xi_, events_ = args
t = events_.shape[-2] - 1
state = compute_state(init_state, events_, model_spec.STOICHIOMETRY)
state = tf.gather(state, t - 1, axis=-2) # State on final inference day
par = dict(beta1=theta[0], beta2=theta[1], gamma=theta[2], nu=0.5, xi=xi)
ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
ngm = ngm_fn(t, state)
return ngm
return tf.vectorized_map(r_fn, elems=(theta, xi, events))
This diff is collapsed.
......@@ -91,14 +91,7 @@ if __name__ == "__main__":
# $\pi(\theta, \xi, y^{se}, y^{ei} | y^{ir})$
def logp(theta, xi, events):
return model.log_prob(
dict(
beta1=theta[0],
beta2=theta[1],
gamma=theta[2],
xi=xi,
nu=0.5, # Fixed!
seir=events,
)
dict(beta1=theta[0], beta2=theta[1], gamma=theta[2], xi=xi, seir=events,)
)
# Build Metropolis within Gibbs sampler
......@@ -243,7 +236,7 @@ if __name__ == "__main__":
current_state = [
np.array([0.85, 0.3, 0.25], dtype=DTYPE),
np.zeros(model.model['xi']().event_shape[-1], dtype=DTYPE),
np.zeros(model.model["xi"]().event_shape[-1], dtype=DTYPE),
events,
]
......@@ -258,6 +251,10 @@ if __name__ == "__main__":
event_size = [NUM_SAVED_SAMPLES] + list(current_state[2].shape)
posterior.create_dataset("initial_state", data=initial_state)
# Ideally we insert the inference period into the posterior file
# as this allows us to post-attribute it to the data. Maybe better
# to simply save the data into it as well.
posterior.create_dataset(
"inference_period", data=[b"2020-06-16", b"2020-09-08"]
).attrs["description"] = "inference period [start, end)"
......
......@@ -5,16 +5,17 @@ import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from covid.config import floatX
from covid.model import DiscreteTimeStateTransitionModel
from covid.util import impute_previous_cases
tfd = tfp.distributions
DTYPE = floatX
DTYPE = np.float64
STOICHIOMETRY = tf.constant([[-1, 1, 0, 0], [0, -1, 1, 0], [0, 0, -1, 1]])
TIME_DELTA = 1.0
XI_FREQ = 14
XI_FREQ = 14 # baseline transmission changes every 14 days
NU = tf.constant(0.5, dtype=DTYPE) # E->I rate assumed known.
def read_covariates(paths):
"""Loads covariate data
......@@ -64,7 +65,6 @@ def impute_censored_events(cases):
def CovidUK(covariates, initial_state, initial_step, num_steps):
def beta1():
return tfd.Gamma(
concentration=tf.constant(1.0, dtype=DTYPE),
......@@ -84,24 +84,17 @@ def CovidUK(covariates, initial_state, initial_step, num_steps):
idx_pts = tf.cast(tf.range(num_steps // XI_FREQ) * XI_FREQ, dtype=DTYPE)
return tfd.GaussianProcess(kernel, index_points=idx_pts[:, tf.newaxis])
def nu():
return tfd.Gamma(
concentration=tf.constant(1.0, dtype=DTYPE),
rate=tf.constant(1.0, dtype=DTYPE),
)
def gamma():
return tfd.Gamma(
concentration=tf.constant(100.0, dtype=DTYPE),
rate=tf.constant(400.0, dtype=DTYPE),
)
def seir(beta1, beta2, xi, nu, gamma):
def seir(beta1, beta2, xi, gamma):
beta1 = tf.convert_to_tensor(beta1, DTYPE)
beta2 = tf.convert_to_tensor(beta2, DTYPE)
xi = tf.convert_to_tensor(xi, DTYPE)
nu = tf.convert_to_tensor(nu, DTYPE)
gamma = tf.convert_to_tensor(gamma, DTYPE)
def transition_rate_fn(t, state):
......@@ -128,7 +121,7 @@ def CovidUK(covariates, initial_state, initial_step, num_steps):
)
infec_rate = infec_rate / tf.squeeze(N) + 0.000000001 # Vector of length nc
ei = tf.broadcast_to([nu], shape=[state.shape[0]]) # Vector of length nc
ei = tf.broadcast_to([NU], shape=[state.shape[0]]) # Vector of length nc
ir = tf.broadcast_to([gamma], shape=[state.shape[0]]) # Vector of length nc
return [infec_rate, ei, ir]
......@@ -143,5 +136,43 @@ def CovidUK(covariates, initial_state, initial_step, num_steps):
)
return tfd.JointDistributionNamed(
dict(beta1=beta1, beta2=beta2, xi=xi, nu=nu, gamma=gamma, seir=seir)
dict(beta1=beta1, beta2=beta2, xi=xi, gamma=gamma, seir=seir)
)
def next_generation_matrix_fn(covar_data, param):
"""The next generation matrix calculates the force of infection from
individuals in metapopulation i to all other metapopulations j during
a typical infectious period (1/gamma). i.e.
\[ A_{ij} = S_j * \beta_1 ( 1 + \beta_2 * w_t * C_{ij} / N_i) / N_j / gamma \]
:param covar_data: a dictionary of covariate data
:param param: a dictionary of parameters
:returns: a function taking arguments `t` and `state` giving the time and
epidemic state (SEIR) for which the NGM is to be calculated. This
function in turn returns an MxM next generation matrix.
"""
def fn(t, state):
C = tf.convert_to_tensor(covar_data["C"], dtype=DTYPE)
C = tf.linalg.set_diag(C + tf.transpose(C), tf.zeros(C.shape[0], dtype=DTYPE))
W = tf.constant(covar_data["W"], dtype=DTYPE)
N = tf.constant(covar_data["N"], dtype=DTYPE)
w_idx = tf.clip_by_value(tf.cast(t, tf.int64), 0, W.shape[0] - 1)
commute_volume = tf.gather(W, w_idx)
xi_idx = tf.cast(
tf.clip_by_value(t // XI_FREQ, 0, param["xi"].shape[0] - 1), dtype=tf.int64,
)
xi = tf.gather(param["xi"], xi_idx)
beta = param["beta1"] * tf.math.exp(xi)
ngm = beta * (
tf.eye(C.shape[0], dtype=state.dtype)
+ param["beta2"] * commute_volume * C / N[tf.newaxis, :]
)
ngm = ngm * state[..., 0][..., tf.newaxis] / (N[:, tf.newaxis] * param["gamma"])
return ngm
return fn
......@@ -32,11 +32,11 @@ with open(options.config, "r") as f:
# Load in covariate data
covar_data = model_spec.read_covariates(config['data'])
covar_data = model_spec.read_covariates(config["data"])
# We load in cases and impute missing infections first, since this sets the
# time epoch which we are analysing.
cases = model_spec.read_cases(config['data']['reported_cases'])
cases = model_spec.read_cases(config["data"]["reported_cases"])
# Single imputation of censored data
events = model_spec.impute_censored_events(cases)
......@@ -44,9 +44,7 @@ events = model_spec.impute_censored_events(cases)
# Initial conditions S(0), E(0), I(0), R(0) are calculated
# by calculating the state at the beginning of the inference period
state = compute_state(
initial_state=tf.concat(
[covar_data["N"], tf.zeros_like(events[:, 0, :])], axis=-1
),
initial_state=tf.concat([covar_data["N"], tf.zeros_like(events[:, 0, :])], axis=-1),
events=events,
stoichiometry=model_spec.STOICHIOMETRY,
)
......@@ -56,13 +54,9 @@ events = events[:, start_time:, :]
# Build model and sample
full_probability_model = model_spec.CovidUK(
covariates=covar_data,
xi_freq=14,
initial_state=initial_state,
initial_step=0,
num_steps=80,
covariates=covar_data, initial_state=initial_state, initial_step=0, num_steps=80,
)
seir = full_probability_model.model["seir"](
beta1=0.35, beta2=0.65, xi=[0.0] * 5, nu=0.5, gamma=0.49
beta1=0.35, beta2=0.65, xi=[0.0] * 5, gamma=0.49
)
sim = seir.sample()
"""Calculate Rt given a posterior"""
import yaml
import h5py
import numpy as np
import geopandas as gp
import tensorflow as tf
from covid.model import (
rayleigh_quotient,
power_iteration,
)
from covid.impl.util import compute_state
from covid.summary import mean_and_ci
import model_spec
DTYPE = model_spec.DTYPE
CONFIG_FILE = "example_config.yaml"
GIS_TEMPLATE = "data/uk_clip.gpkg"
GIS_OUTPUT = "example_gis_2020-09-25.gpkg"
# Reproduction number calculation
def calc_R_it(theta, xi, events, init_state, covar_data):
"""Calculates effective reproduction number for batches of metapopulations
:param theta: a tensor of batched theta parameters [B] + theta.shape
:param xi: a tensor of batched xi parameters [B] + xi.shape
:param events: a [B, M, T, X] batched events tensor
:param init_state: the initial state of the epidemic at earliest inference date
:param covar_data: the covariate data
:return a batched vector of R_it estimates
"""
print("Theta shape: ", theta.shape)
def r_fn(args):
theta_, xi_, events_ = args
t = events_.shape[-2] - 1
state = compute_state(init_state, events_, model_spec.STOICHIOMETRY)
state = tf.gather(state, t - 1, axis=-2) # State on final inference day
par = dict(beta1=theta_[0], beta2=theta_[1], gamma=theta_[2], xi=xi_)
ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
ngm = ngm_fn(t, state)
return ngm
return tf.vectorized_map(r_fn, elems=(theta, xi, events))
@tf.function
def predicted_incidence(theta, xi, init_state, init_step, num_steps):
"""Runs the simulation forward in time from `init_state` at time `init_time`
for `num_steps`.
:param theta: a tensor of batched theta parameters [B] + theta.shape
:param xi: a tensor of batched xi parameters [B] + xi.shape
:param events: a [B, M, S] batched state tensor
:param init_step: the initial time step
:param num_steps: the number of steps to simulate
:returns: a tensor of shape [B, M, num_steps, X] where X is the number of state
transitions
"""
def sim_fn(args):
theta_, xi_, init_ = args
par = dict(beta1=theta_[0], beta2=theta_[1], gamma=theta_[2], xi=xi_)
model = model_spec.CovidUK(
covar_data,
initial_state=init_,
initial_step=init_step,
num_steps=num_steps,
)
sim = model.sample(**par)
return sim["seir"]
events = tf.map_fn(
sim_fn, elems=(theta, xi, init_state), fn_output_signature=(tf.float64),
)
return events
# Today's prevalence
def prevalence(predicted_state, population_size, name=None):
"""Computes prevalence of E and I individuals
:param state: the state at a particular timepoint [batch, M, S]
:param population_size: the size of the population
:returns: a dict of mean and 95% credibility intervals for prevalence
in units of infections per person
"""
prev = tf.reduce_sum(predicted_state[:, :, 1:3], axis=-1) / tf.squeeze(
population_size
)
return mean_and_ci(prev, name=name)
def predicted_events(events, name=None):
num_events = tf.reduce_sum(events, axis=-1)
return mean_and_ci(num_events, name=name)
if __name__ == "__main__":
# Get general config
with open(CONFIG_FILE, "r") as f:
config = yaml.load(f, Loader=yaml.FullLoader)
# Load covariate data
covar_data = model_spec.read_covariates(config["data"])
# Load posterior file
posterior = h5py.File(
config["output"]["posterior"], "r", rdcc_nbytes=1024 ** 3, rdcc_nslots=1e6,
)
# Pre-determined thinning of posterior (better done in MCMC?)
idx = slice(posterior["samples/theta"].shape[0]) # range(6000, 10000, 10)
theta = posterior["samples/theta"][idx]
xi = posterior["samples/xi"][idx]
events = posterior["samples/events"][idx]
init_state = posterior["initial_state"][:]
state_timeseries = compute_state(init_state, events, model_spec.STOICHIOMETRY)
# Build model
model = model_spec.CovidUK(
covar_data, initial_state=init_state, initial_step=0, num_steps=events.shape[1],
)
ngms = calc_R_it(theta, xi, events, init_state, covar_data)
b, _ = power_iteration(ngms)
rt = rayleigh_quotient(ngms, b)
q = np.arange(0.05, 1.0, 0.05)
rt_quantiles = np.stack([q, np.quantile(rt, q)], axis=-1)
# Prediction requires simulation from the last available timepoint for 28 + 4 + 1 days
# Note a 4 day recording lag in the case timeseries data requires that
# now = state_timeseries.shape[-2] + 4
prediction = predicted_incidence(
theta,
xi,
init_state=state_timeseries[..., -1, :],
init_step=state_timeseries.shape[-2] - 1,
num_steps=33,
)
predicted_state = compute_state(
state_timeseries[..., -1, :], prediction, model_spec.STOICHIOMETRY
)
# Prevalence now
prev_now = prevalence(predicted_state[..., 4, :], covar_data["N"], name="prev")
# Incidence of detections now
cases_now = predicted_events(prediction[..., 4:5, 2], name="cases")
# Incidence from now to now+7
cases_7 = predicted_events(prediction[..., 4:11, 2], name="cases7")
cases_14 = predicted_events(prediction[..., 4:18, 2], name="cases14")
cases_21 = predicted_events(prediction[..., 4:25, 2], name="cases21")
cases_28 = predicted_events(prediction[..., 4:32, 2], name="cases28")
# Prevalence at day 7
prev_7 = prevalence(predicted_state[..., 11, :], covar_data["N"], name="prev7")
prev_14 = prevalence(predicted_state[..., 18, :], covar_data["N"], name="prev14")
prev_21 = prevalence(predicted_state[..., 25, :], covar_data["N"], name="prev21")
prev_28 = prevalence(predicted_state[..., 28, :], covar_data["N"], name="prev28")
def geosummary(geodata, summaries):
for summary in summaries:
for k, v in summary.items():
arr = v
if isinstance(v, tf.Tensor):
arr = v.numpy()
geodata[k] = arr
## GIS here
ltla = gp.read_file(GIS_TEMPLATE)
ltla = ltla[ltla["lad19cd"].str.startswith("E")] # England only, for now.
ltla = ltla.sort_values("lad19cd")
rti = tf.reduce_sum(ngms, axis=-1)
geosummary(
ltla,
(
mean_and_ci(rti, name="Rt"),
prev_now,
cases_now,
prev_7,
prev_14,
prev_21,
prev_28,
cases_7,
cases_14,
cases_21,
cases_28,
),
)
ltla["Rt_exceed"] = np.mean(rti > 1.0, axis=0)
ltla = ltla.loc[
:,
ltla.columns.str.contains(
"(lad19cd|lad19nm$|prev|cases|Rt|popsize|geometry)", regex=True
),
]
ltla.to_file(GIS_OUTPUT, driver="GPKG")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment