Commit 297fe828 authored by Chris Jewell's avatar Chris Jewell
Browse files

Major changes:

1. Bugfix in initial condition setting for MCMC and prediction.

2. Rationalised data import.  Now uses a load_data function in the covid.model module.

3. Updated model doc to reflect loss of background infectious pressure, implementation of commuting frequency.

4. Fixes in covid_ode.py reflecting updated model interface.
parent e315c086
......@@ -4,11 +4,15 @@ import tensorflow_probability as tfp
from tensorflow_probability.python.internal import dtype_util
import numpy as np
from covid.rdata import load_mobility_matrix, load_population, load_age_mixing
from covid.pydata import load_commute_volume
from covid.impl.chainbinom_simulate import chain_binomial_simulate
tode = tfp.math.ode
tla = tf.linalg
DTYPE = np.float64
def power_iteration(A, tol=1e-3):
b_k = tf.random.normal([A.shape[1], 1], dtype=A.dtype)
......@@ -38,6 +42,30 @@ def dense_to_block_diagonal(A, n_blocks):
return A_block
def load_data(paths, settings, dtype=DTYPE):
M_tt, age_groups = load_age_mixing(paths['age_mixing_matrix_term'])
M_hh, _ = load_age_mixing(paths['age_mixing_matrix_hol'])
C, la_names = load_mobility_matrix(paths['mobility_matrix'])
np.fill_diagonal(C, 0.)
w_period = [settings['inference_period'][0], settings['prediction_period'][1]]
W = load_commute_volume(paths['commute_volume'], w_period)['percent']
pop = load_population(paths['population_size'])
M_tt = M_tt.astype(DTYPE)
M_hh = M_hh.astype(DTYPE)
C = C.astype(DTYPE)
W = W.astype(DTYPE)
pop['n'] = pop['n'].astype(DTYPE)
return {'M_tt': M_tt, 'M_hh': M_hh,
'C': C, 'la_names': la_names,
'age_groups': age_groups,
'W': W, 'pop': pop}
class CovidUK:
def __init__(self,
M_tt: np.float64,
......@@ -103,17 +131,17 @@ class CovidUK:
def create_initial_state(self, init_matrix=None):
if init_matrix is None:
I = np.zeros(self.N.shape, dtype=np.float64)
I = np.zeros(self.N.shape, dtype=DTYPE)
I[149*17+10] = 30. # Middle-aged in Surrey
else:
np.testing.assert_array_equal(init_matrix.shape, [self.n_lads, self.n_ages],
err_msg=f"init_matrix does not have shape [<num lads>,<num ages>] \
({self.n_lads},{self.n_ages})")
I = init_matrix.flatten()
I = tf.reshape(init_matrix, [-1])
S = self.N - I
E = np.zeros(self.N.shape, dtype=np.float64)
R = np.zeros(self.N.shape, dtype=np.float64)
return np.stack([S, E, I, R], axis=-1)
E = tf.zeros(self.N.shape, dtype=DTYPE)
R = tf.zeros(self.N.shape, dtype=DTYPE)
return tf.stack([S, E, I, R], axis=-1)
class CovidUKODE(CovidUK):
......@@ -127,8 +155,7 @@ class CovidUKODE(CovidUK):
def h_fn(t, state):
state_ = tf.transpose(state)
S, E, I, R = tf.unstack(state_, axis=0)
S, E, I, R = tf.unstack(state, axis=-1)
# Integrator may produce time values outside the range desired, so
# we clip, implicitly assuming the outside dates have the same
# holiday status as their nearest neighbors in the desired range.
......@@ -156,7 +183,7 @@ class CovidUKODE(CovidUK):
def simulate(self, param, state_init, solver_state=None):
h = self.make_h(param)
t = np.arange(self.times.shape[0])
results = self.solver.solve(ode_fn=h, initial_time=t[0], initial_state=state_init * param['I0'],
results = self.solver.solve(ode_fn=h, initial_time=t[0], initial_state=state_init,
solution_times=t, previous_solver_internal_state=solver_state)
return results.times, results.states, results.solver_internal_state
......
......@@ -38,4 +38,4 @@ def load_population(rds_file: str):
raw = pyr.read_r(rds_file)
df = list(raw.values())[0]
df = df.sort_values(by=['LA.code', 'age'])
return df['n'].to_numpy(dtype=np.float32), df[['name', 'Area.name.2']]
return df[['LA.code', 'name', 'Area.name.2', 'age', 'n']]
......@@ -29,7 +29,7 @@ def seed_areas(N, names, age_group=8, num_la=152, num_age=17, n_seed=30.):
'West Midlands (Met County)',
'Greater Manchester (Met County)']
names_matrix = names['Area.name.2'].to_numpy().reshape([num_la, num_age])
names_matrix = names.to_numpy().reshape([num_la, num_age])
seed_areas = np.in1d(names_matrix[:, age_group], areas)
N_matrix = N.reshape([num_la, num_age]) # LA x Age
......@@ -110,7 +110,7 @@ def save_sims(dates, sims, la_names, age_groups, filename):
dset_sim = f.create_dataset('prediction', data=sims, compression='gzip', compression_opts=4)
la_long = np.repeat(la_names, age_groups.shape[0]).astype(np.string_)
age_long = np.tile(age_groups, la_names.shape[0]).astype(np.string_)
dset_dims = f.create_dataset("dimnames", data=[b'sim_id', b't', b'state', b'la_age'])
dset_dims = f.create_dataset("dimnames", data=[b'sim_id', b't', b'la_age', b'state'])
dset_la = f.create_dataset('la_names', data=la_long)
dset_age = f.create_dataset('age_names', data=age_long)
dset_times = f.create_dataset('date', data=dates.astype(np.string_))
......
import optparse
import time
import sys
import h5py
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import yaml
from covid.model import CovidUKODE
from covid.rdata import *
from covid.pydata import load_commute_volume
from covid.model import load_data
from covid.util import sanitise_parameter, sanitise_settings, seed_areas
......@@ -113,8 +113,8 @@ def draw_figs(sim, N):
# TotalUK epidemic curve by age-group
fig, ax = plt.subplots(1, 2, figsize=[24, 12])
plot_by_la(sim, la_names, ax=ax[0])
plot_by_age(sim, age_groups, ax=ax[1])
plot_by_la(sim, data['la_names'], ax=ax[0])
plot_by_age(sim, data['age_groups'], ax=ax[1])
ax[1].legend()
plt.xticks(rotation=45, horizontalalignment="right")
fig.autofmt_xdate()
......@@ -123,7 +123,7 @@ def draw_figs(sim, N):
# Plot attack rate
plt.figure(figsize=[4, 2])
plt.plot(age_groups, attack_rate, 'o-')
plt.plot(data['age_groups'], attack_rate, 'o-')
plt.xticks(rotation=90)
plt.title('Age-specific attack rate')
plt.savefig('age_attack_rate.pdf')
......@@ -144,7 +144,7 @@ def plot_age_attack_rate(ax, sim, N, label):
Ns = N.reshape([152, 17]).sum(axis=0)
fs = final_size(sim.numpy())
attack_rate = fs / Ns
ax.plot(age_groups, attack_rate, 'o-', label=label)
ax.plot(data['age_groups'], attack_rate, 'o-', label=label)
if __name__ == '__main__':
......@@ -157,31 +157,38 @@ if __name__ == '__main__':
with open(options.config, 'r') as ymlfile:
config = yaml.load(ymlfile)
K_tt, age_groups = load_age_mixing(config['data']['age_mixing_matrix_term'])
K_hh, _ = load_age_mixing(config['data']['age_mixing_matrix_hol'])
T, la_names = load_mobility_matrix(config['data']['mobility_matrix'])
np.fill_diagonal(T, 0.)
N, n_names = load_population(config['data']['population_size'])
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
model = CovidUKODE(K_tt, K_hh, T, N, settings['start'], settings['end'], settings['holiday'],
settings['bg_max_time'], 1)
data = load_data(config['data'], settings)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
model = CovidUKODE(M_tt=data['M_tt'],
M_hh=data['M_hh'],
C=data['C'],
N=data['pop']['n'].to_numpy(),
W=data['W'].to_numpy(),
date_range=settings['prediction_period'],
holidays=settings['holiday'],
lockdown=settings['lockdown'],
time_step=1)
seeding = seed_areas(data['pop']['n'].to_numpy(),
data['pop']['Area.name.2']) # Seed 40-44 age group, 30 seeds by popn size
state_init = model.create_initial_state(init_matrix=seeding)
print('R0_term=', model.eval_R0(param))
@tf.function(autograph=False, experimental_compile=True)
def compiled_sim():
return model.simulate(param, state_init)
start = time.perf_counter()
t, sim, _ = model.simulate(param, state_init)
t, sim, _ = compiled_sim()
end = time.perf_counter()
print(f'Complete in {end - start} seconds')
dates = settings['start'] + t.numpy().astype(np.timedelta64)
dates = np.arange(settings['prediction_period'][0], settings['prediction_period'][1],
np.timedelta64(1, 'D'))
dt = doubling_time(dates, sim.numpy(), '2020-03-01', '2020-03-31')
print(f"Doubling time: {dt}")
......@@ -189,7 +196,7 @@ if __name__ == '__main__':
fig_attack = plt.figure()
fig_uk = plt.figure()
plot_age_attack_rate(fig_attack.gca(), sim, N, "Attack Rate")
plot_age_attack_rate(fig_attack.gca(), sim, data['pop']['n'].to_numpy(), "Attack Rate")
fig_attack.suptitle("Attack Rate")
plot_infec_curve(fig_uk.gca(), sim.numpy(), "Infections")
fig_uk.suptitle("UK Infections")
......
%% LyX 2.2.4 created this file. For more info, see http://www.lyx.org/.
% LyX 2.2.4 created this file. For more info, see http://www.lyx.org/.
%% Do not edit unless you really know what you are doing.
\documentclass[english]{article}
\usepackage[T1]{fontenc}
......@@ -94,30 +94,65 @@ number of individual in each age-group-LAD combination at time $t$
by the vectors $\vec{S}(t),\vec{E}(t),\vec{I}(t),\vec{R}(t)$. We
therefore have
\begin{align*}
\frac{\mathrm{d\vec{S}(t)}}{dt} & =-\beta_{1}\left[M^{\star}\vec{I}(t)+\beta_{2}\bar{M}C^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}\\
\frac{\mathrm{d}\vec{E}(t)}{dt} & =\beta_{1}\left[M^{\star}\vec{I}(t)+\beta_{2}\bar{M}C^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}-\nu\vec{E}(t)\\
\frac{\mathrm{d\vec{S}(t)}}{dt} & =-\epsilon\frac{\vec{S}(t)}{N}-\beta_{t}\left[M^{\star}\vec{I}(t)+\beta_{2}w_{t}\bar{M}C^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}\\
\frac{\mathrm{d}\vec{E}(t)}{dt} & =\epsilon\frac{\vec{S}(t)}{N}+\beta_{t}\left[M^{\star}\vec{I}(t)+\beta_{2}w_{t}\bar{M}C^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}-\nu\vec{E}(t)\\
\frac{\mathrm{d}\vec{I}(t)}{dt} & =\nu\vec{E}(t)-\gamma\vec{I}(t)\\
\frac{\mathrm{d}\vec{R}(t)}{dt} & =\gamma\vec{I}(t)
\end{align*}
where $\bar{M}$ is the global mean person-person contact rate. Parameters
are: baseline infection rate $\beta_{1}$, commuting infection ratio
$\beta_{2}$, latent period $\frac{1}{\nu}$, and infectious period
$\frac{1}{\gamma}$. Typically, we assume that contact with commuters
is $\beta_{2}=\frac{1}{3}$ of that between members of the same age-LAD
combination assuming an 8 hour working day.
where $\bar{M}$ is the global mean person-person contact rate, and
$w_{t}$ is the total rail ticket sales in the UK expressed as a fraction
of the 2019 mean (a proxy for reduction in travel). Parameters are:
\subsection{Noise model (under construction)}
\[
\epsilon_{t}=\begin{cases}
\epsilon_{0} & \mbox{if}t<T_{b}\\
0 & \mbox{otherwise}
\end{cases}
\]
with $T_{b}$ a time beyond which imports to the UK are considered
to cease;
\begin{align*}
\beta_{t} & =\begin{cases}
\beta_{1} & \mbox{if }t<T_{L}\\
\beta_{1}\beta_{3} & \mbox{otherwise }
\end{cases}\\
\end{align*}
with $T_{L}$ the date of lock-down restrictions, $\beta_{1}$ a baseline
transmisison rate, and $\beta_{3}$ giving the ratio of post-lockdown
to pre-lockdown transmission; commuting infection ratio $\beta_{2}$;
latent period $\frac{1}{\nu}=4$days; and infectious period $\frac{1}{\gamma}$.
Typically, we assume that contact with commuters is $\beta_{2}=\frac{1}{3}$
of that between members of the same age-LAD combination assuming an
8 hour working day.
\subsection{Noise model}
Currently, and subject to discussion, we assume that all detected
cases are synonymous with individuals transitioning $I\rightarrow R$.
We assume the number of new cases in each age-LAD combination are
given by
\[
y_{ik}(t)\sim\mbox{Poisson}\left(R_{ik}(t)-R_{ik}(t-1)\right)
y_{ik}(t)\sim\mbox{Negative Binomial}\left(r,\phi(R_{ik}(t)-R_{ik}(t-1))\right)
\]
This could be relaxed to a Negative Binomial distribution to account
for Poisson overdispersion.
where $\phi$ is the case reporting fraction (i.e. proportion of infections
that are eventually detected) and $r$ is an overdispersion parameter.
\subsection{Inference}
We are interested in making inference on $\epsilon_{0},\beta_{1}$,
and $\gamma$. Priors are currently:
\begin{align*}
\epsilon_{0} & \sim\mbox{Gamma}(1,1)\\
\beta_{1} & \sim\mbox{Gamma}(1,1)\\
\beta_{3} & \sim\mbox{Gamma}(20,20)\\
\gamma & \sim\mbox{Gamma}(10,40)\\
r & \sim\mbox{\mbox{Gamma}(}0.1,0.1)
\end{align*}
specified to express relative ignorance about $\epsilon_{0}$ , $\beta_{1}$,
and $r$, strong information on $\beta_{3}$ and $r$, and a belief
that the infectious period should be approximately 4 days.
\subsection{Implementation}
......@@ -127,4 +162,6 @@ class provided by Tensorflow Probability 0.9. Code may be found at
\url{http://fhm-chicas-code.lancs.ac.uk/jewell/covid19uk}.
The code is a work in progress \textendash{} see README.
\section{Results}
\end{document}
import optparse
import yaml
import time
import pickle as pkl
from tensorflow_probability import distributions as tfd
from tensorflow_probability import bijectors as tfb
import matplotlib.pyplot as plt
import time
import matplotlib.pyplot as plt
import yaml
from tensorflow_probability import bijectors as tfb
from tensorflow_probability import distributions as tfd
from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.pydata import load_commute_volume
from covid.model import CovidUKODE, covid19uk_logp
from covid.model import CovidUKODE, covid19uk_logp, load_data
from covid.util import *
DTYPE = np.float64
......@@ -43,23 +41,7 @@ if __name__ == '__main__':
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
K_tt, age_groups = load_age_mixing(config['data']['age_mixing_matrix_term'])
K_hh, _ = load_age_mixing(config['data']['age_mixing_matrix_hol'])
T, la_names = load_mobility_matrix(config['data']['mobility_matrix'])
np.fill_diagonal(T, 0.)
W = load_commute_volume(config['data']['commute_volume'], settings['inference_period'])['percent']
N, n_names = load_population(config['data']['population_size'])
K_tt = K_tt.astype(DTYPE)
K_hh = K_hh.astype(DTYPE)
W = W.to_numpy().astype(DTYPE)
T = T.astype(DTYPE)
N = N.astype(DTYPE)
data = load_data(config['data'], settings)
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
......@@ -68,18 +50,17 @@ if __name__ == '__main__':
y = case_reports['CumCases'].to_numpy()
y_incr = np.round((y[1:] - y[:-1]) * 0.8)
simulator = CovidUKODE(
M_tt=K_tt,
M_hh=K_hh,
C=T,
W=W,
N=N,
date_range=[date_range[0]-np.timedelta64(1,'D'), date_range[1]],
holidays=settings['holiday'],
lockdown=settings['lockdown'],
time_step=int(settings['time_step']))
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
simulator = CovidUKODE(M_tt=data['M_tt'],
M_hh=data['M_hh'],
C=data['C'],
N=data['pop']['n'].to_numpy(),
W=data['W'].to_numpy(),
date_range=[date_range[0] - np.timedelta64(1, 'D'), date_range[1]],
holidays=settings['holiday'],
lockdown=settings['lockdown'],
time_step=int(settings['time_step']))
seeding = seed_areas(data['pop']['n'].to_numpy(), data['pop']['Area.name.2']) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
def logp(par):
......@@ -90,11 +71,12 @@ if __name__ == '__main__':
p['I0'] = par[3]
p['r'] = par[4]
beta_logp = tfd.Gamma(concentration=tf.constant(1., dtype=DTYPE), rate=tf.constant(1., dtype=DTYPE)).log_prob(p['beta1'])
beta3_logp = tfd.Gamma(concentration=tf.constant(20., dtype=DTYPE),
rate=tf.constant(20., dtype=DTYPE)).log_prob(p['beta3'])
beta3_logp = tfd.Gamma(concentration=tf.constant(200., dtype=DTYPE),
rate=tf.constant(200., dtype=DTYPE)).log_prob(p['beta3'])
gamma_logp = tfd.Gamma(concentration=tf.constant(100., dtype=DTYPE), rate=tf.constant(400., dtype=DTYPE)).log_prob(p['gamma'])
I0_logp = tfd.Gamma(concentration=tf.constant(1.5, dtype=DTYPE), rate=tf.constant(0.05, dtype=DTYPE)).log_prob(p['I0'])
r_logp = tfd.Gamma(concentration=tf.constant(0.1, dtype=DTYPE), rate=tf.constant(0.1, dtype=DTYPE)).log_prob(p['gamma'])
state_init = simulator.create_initial_state(init_matrix=seeding * p['I0'])
t, sim, solve = simulator.simulate(p, state_init)
y_logp = covid19uk_logp(y_incr, sim, 0.1, p['r'])
logp = beta_logp + beta3_logp + gamma_logp + I0_logp + r_logp + tf.reduce_sum(y_logp)
......
......@@ -11,9 +11,7 @@ from tensorflow_probability import stats as tfs
import matplotlib.pyplot as plt
import h5py
from covid.model import CovidUKODE
from covid.rdata import load_age_mixing, load_mobility_matrix, load_population
from covid.pydata import load_commute_volume
from covid.model import CovidUKODE, load_data
from covid.util import sanitise_settings, sanitise_parameter, seed_areas, doubling_time, save_sims
from covid.plotting import plot_prediction, plot_case_incidence
......@@ -33,21 +31,7 @@ if __name__ == '__main__':
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
W = load_commute_volume(config['data']['commute_volume'], settings['prediction_period'])
K_tt, age_groups = load_age_mixing(config['data']['age_mixing_matrix_term'])
K_hh, _ = load_age_mixing(config['data']['age_mixing_matrix_hol'])
T, la_names = load_mobility_matrix(config['data']['mobility_matrix'])
np.fill_diagonal(T, 0.)
N, n_names = load_population(config['data']['population_size'])
K_tt = K_tt.astype(DTYPE)
K_hh = K_hh.astype(DTYPE)
T = T.astype(DTYPE)
N = N.astype(DTYPE)
W = W.to_numpy().astype(DTYPE)
data = load_data(config['data'], settings)
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
......@@ -63,17 +47,18 @@ if __name__ == '__main__':
data_dates = np.arange(date_range[0],
date_range[1]+np.timedelta64(1,'D'),
np.timedelta64(1, 'D'))
simulator = CovidUKODE(M_tt=K_tt,
M_hh=K_hh,
C=T,
W=W,
N=N,
simulator = CovidUKODE(M_tt=data['M_tt'],
M_hh=data['M_hh'],
C=data['C'],
N=data['pop']['n'].to_numpy(),
W=data['W'].to_numpy(),
date_range=[settings['prediction_period'][0],
settings['prediction_period'][1]],
holidays=settings['holiday'],
lockdown=settings['lockdown'],
time_step=1)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
seeding = seed_areas(data['pop']['n'].to_numpy(), data['pop']['Area.name.2']) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
@tf.function
......@@ -87,6 +72,7 @@ if __name__ == '__main__':
p['gamma'] = gamma[i]
p['I0'] = I0[i]
p['r'] = r_[i]
state_init = simulator.create_initial_state(seeding * p['I0'])
t, sim, solver_results = simulator.simulate(p, state_init)
r = simulator.eval_R0(p)
R0 = R0.write(i, r[0])
......@@ -97,14 +83,12 @@ if __name__ == '__main__':
with tf.device('/CPU:0'):
sims, R0 = prediction(draws[:, 0], draws[:, 1], draws[:, 2], draws[:, 3], draws[:, 4])
sims = tf.stack(sims) # shape=[n_sims, n_times, n_metapops, n_states]
save_sims(simulator.times, sims, la_names, age_groups, 'pred_2020-03-29.h5')
save_sims(simulator.times, sims, data['la_names'], data['age_groups'], 'pred_2020-03-29.h5')
dub_time = [doubling_time(simulator.times, sim, '2020-03-01', '2020-04-01') for sim in sims.numpy()]
plot_prediction(settings['prediction_period'], sims, case_reports)
plot_case_incidence(settings['prediction_period'], sims.numpy())
# R0
R0_ci = tfs.percentile(R0, q=[2.5, 50, 97.5])
print("R0:", R0_ci)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment