Commits (2)
......@@ -82,7 +82,7 @@ class Homogeneous:
def dense_to_block_diagonal(A, n_blocks):
A_dense = tf.linalg.LinearOperatorFullMatrix(A)
eye = tf.linalg.LinearOperatorIdentity(n_blocks)
eye = tf.linalg.LinearOperatorIdentity(n_blocks, dtype=tf.float64)
A_block = tf.linalg.LinearOperatorKronecker([eye, A_dense])
return A_block
......@@ -101,24 +101,25 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
self.n_ages = M_tt.shape[0]
self.n_lads = C.shape[0]
self.Kbar = tf.reduce_mean(M_tt)
self.M = tf.tuple([dense_to_block_diagonal(M_tt, self.n_lads),
dense_to_block_diagonal(M_hh, self.n_lads)])
self.Kbar = tf.reduce_mean(tf.cast(M_tt, tf.float64))
self.M = tf.tuple([dense_to_block_diagonal(tf.cast(M_tt, tf.float64), self.n_lads),
dense_to_block_diagonal(tf.cast(M_hh, tf.float64), self.n_lads)])
C = tf.cast(C, tf.float64)
self.C = tf.linalg.LinearOperatorFullMatrix(C + tf.transpose(C))
shp = tf.linalg.LinearOperatorFullMatrix(np.ones_like(M_tt, dtype=np.float32))
shp = tf.linalg.LinearOperatorFullMatrix(np.ones_like(M_tt, dtype=np.float64))
self.C = tf.linalg.LinearOperatorKronecker([self.C, shp])
self.N = tf.constant(N, dtype=tf.float32)
self.N = tf.constant(N, dtype=tf.float64)
N_matrix = tf.reshape(self.N, [self.n_lads, self.n_ages])
N_sum = tf.reduce_sum(N_matrix, axis=1)
N_sum = N_sum[:, None] * tf.ones([1, self.n_ages])
N_sum = N_sum[:, None] * tf.ones([1, self.n_ages], dtype=tf.float64)
self.N_sum = tf.reshape(N_sum, [-1])
self.times = np.arange(start, end, np.timedelta64(t_step, 'D'))
m_select = (np.less_equal(holidays[0], self.times) & np.less(self.times, holidays[1])).astype(np.int64)
self.m_select = tf.constant(m_select, dtype=tf.int64)
self.bg_select = tf.constant(np.less(bg_max_t, self.times), dtype=tf.int64)
self.bg_select = tf.constant(np.less(self.times, bg_max_t), dtype=tf.int64)
self.solver = tode.DormandPrince()
def make_h(self, param):
......@@ -128,7 +129,7 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
S, E, I, R = state
t = tf.clip_by_value(tf.cast(t, tf.int64), 0, self.m_select.shape[0]-1)
m_switch = tf.gather(self.m_select, t)
epsilon = param['epsilon'] * tf.cast(tf.gather(self.bg_select, t), tf.float32)
epsilon = param['epsilon'] * tf.cast(tf.gather(self.bg_select, t), tf.float64)
if m_switch == 0:
infec_rate = param['beta1'] * tf.linalg.matvec(self.M[0], I)
......@@ -148,7 +149,7 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
def create_initial_state(self, init_matrix=None):
if init_matrix is None:
I = np.zeros(self.N.shape, dtype=np.float32)
I = np.zeros(self.N.shape, dtype=np.float64)
I[149*17+10] = 30. # Middle-aged in Surrey
np.testing.assert_array_equal(init_matrix.shape, [self.n_lads, self.n_ages],
......@@ -156,8 +157,8 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
I = init_matrix.flatten()
S = self.N - I
E = np.zeros(self.N.shape, dtype=np.float32)
R = np.zeros(self.N.shape, dtype=np.float32)
E = np.zeros(self.N.shape, dtype=np.float64)
R = np.zeros(self.N.shape, dtype=np.float64)
return np.stack([S, E, I, R])
......@@ -25,29 +25,28 @@ of Covid-19 across the UK, respecting the availability of human mobility
data as well as known contact behaviour between individuals of different
A deterministic SEIR model is posited in which infection rate is written
A deterministic SEIR state transition model is posited in which individuals
transition from Susceptible to Exposed (i.e. infected but not yet
infectious) to Infectious to Removed (i.e. quarantined, got better,
or died).
We model the infection rate (rate of S$\rightarrow$E transition)
as a function of known age-structured contact from Polymod, known
human mobility between MSOAs (Middle Super Output Area), and Census-derived
age structured population density in regions across the UK.
Currently, this model is populated with data for England only, though
we are in the process of extending this to Scotland and Wales.
Noise in daily case numbers $y_{it}$ for age-group $i$ in location
$k$ on day $t$ is assumed to be Poisson-distributed such that
we are in the process of extending this to Scotland, Wales, and Northern
Standard Polymod data for the UK are used, with 17 5-year age groups
$[0-5),[5-10),\dots,[75-80),[80-\infty)$. Estimated contact matrices
for term-time $M_{tt}$ and school-holidays $M_{hh}$ were extracted
of dimension $n_{m}\times n_{m}$ where $n_{m}=17$.
Standard Polymod social mixing data for the UK are used, with 17 5-year
age groups $[0-5),[5-10),\dots,[75-80),[80-\infty)$. Estimated contact
matrices for term-time $M_{tt}$ and school-holidays $M_{hh}$ were
extracted of dimension $n_{m}\times n_{m}$ where $n_{m}=17$.
\subsection{Human mobility}
......@@ -67,8 +66,8 @@ with 0 diagonal.
Age-structured population size within each LAD is taken from publicly
available 2019 Local Authority data giving a vector $N$ of length
$n_{m}n_{c}$, i.e. population for each of $n_{m}$ age groups and
$n_{c}$ LADs.
$n_{m}n_{c}=2584$, i.e. population for each of $n_{m}$ age groups
and $n_{c}$ LADs.
......@@ -77,16 +76,16 @@ $n_{c}$ LADs.
We assemble a country-wide connectivity matrices as Kronecker products,
such that
K^{\star}=I_{n_{c}}\bigotimes M
M^{\star}=I_{n_{c}}\bigotimes M
T^{\star}=C\bigotimes\bm{1}_{n_{m}\times n_{c}}
C^{\star}=C\bigotimes\bm{1}_{n_{m}\times n_{c}}
giving two matrices of dimension $n_{m}n_{c}$. $K^{\star}$ is block
diagonal with Polymod mixing matrices. $T^{\star}$ expands the mobility
matrix $C$ such that a block structure of connectivity between LADs
giving two matrices of dimension $n_{m}n_{c}\times n_{m}n_{c}$. $M^{\star}$
is block diagonal with Polymod mixing matrices. $C^{\star}$ expands
the mobility matrix $C$ such that a block structure of connectivity
between LADs results.
\subsection{Disease progression model}
......@@ -95,16 +94,19 @@ number of individual in each age-group-LAD combination at time $t$
by the vectors $\vec{S}(t),\vec{E}(t),\vec{I}(t),\vec{R}(t)$. We
therefore have
\frac{\mathrm{d\vec{S}(t)}}{dt} & =-\beta_{1}\left[K^{\star}\vec{I}(t)+\beta_{2}\bar{K}T^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}\\
\frac{\mathrm{d}\vec{E}(t)}{dt} & =\beta_{1}\left[K^{\star}\vec{I}(t)+\beta_{2}\bar{K}T^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}-\nu\vec{E}(t)\\
\frac{\mathrm{d\vec{S}(t)}}{dt} & =-\beta_{1}\left[M^{\star}\vec{I}(t)+\beta_{2}\bar{M}C^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}\\
\frac{\mathrm{d}\vec{E}(t)}{dt} & =\beta_{1}\left[M^{\star}\vec{I}(t)+\beta_{2}\bar{M}C^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}-\nu\vec{E}(t)\\
\frac{\mathrm{d}\vec{I}(t)}{dt} & =\nu\vec{E}(t)-\gamma\vec{I}(t)\\
\frac{\mathrm{d}\vec{R}(t)}{dt} & =\gamma\vec{I}(t)
with parameters: baseline infection rate $\beta_{1}$, commuting infection
ratio $\beta_{2}$, latent period $\frac{1}{\nu}$, and infectious
period $\frac{1}{\gamma}$.
where $\bar{M}$ is the global mean person-person contact rate. Parameters
are: baseline infection rate $\beta_{1}$, commuting infection ratio
$\beta_{2}$, latent period $\frac{1}{\nu}$, and infectious period
$\frac{1}{\gamma}$. Typically, we assume that contact with commuters
is $\beta_{2}=\frac{1}{3}$ of that between members of the same age-LAD
combination assuming an 8 hour working day.
\subsection{Noise model}
\subsection{Noise model (under construction)}
Currently, and subject to discussion, we assume that all detected
cases are synonymous with individuals transitioning $I\rightarrow R$.
......@@ -122,9 +124,7 @@ for Poisson overdispersion.
The model is currently implemented in Python3, using Tensorflow 2.1
with the RK5 differential equation solver implemented in the \texttt{DormandPrince}
class provided by Tensorflow Probability 0.9. Code may be found at
\section{Quick results}
See attached SPI-M report.
The code is a work in progress \textendash{} see README.
......@@ -8,6 +8,7 @@ import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
from tensorflow_probability import bijectors as tfb
import matplotlib.pyplot as plt
from tensorflow_probability.python.util import SeedStream
from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.model import CovidUKODE, covid19uk_logp
......@@ -36,6 +37,18 @@ def plotting(dates, sim):
def random_walk_mvnorm_fn(covariance, name=None):
"""Returns callable that adds Multivariate Normal noise to the input"""
rv = tfp.distributions.MultivariateNormalTriL(loc=tf.zeros(covariance.shape[0], dtype=tf.float64),
scale_tril=tf.linalg.cholesky(tf.convert_to_tensor(covariance, dtype=tf.float64)))
def _fn(state_parts, seed):
with tf.name_scope(name or 'random_walk_mvnorm_fn'):
seed_stream = SeedStream(seed, salt='RandomWalkNormalFn')
return rv.sample() + state_parts
if __name__ == '__main__':
parser = optparse.OptionParser()
......@@ -68,16 +81,18 @@ if __name__ == '__main__':
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
def logp(epsilon, beta):
def logp(par):
p = param
p['epsilon'] = epsilon
p['beta1'] = beta
epsilon_logp = tfd.Gamma(concentration=1., rate=1.).log_prob(p['epsilon'])
beta_logp = tfd.Gamma(concentration=1., rate=1.).log_prob(p['beta1'])
p['epsilon'] = par[0]
p['beta1'] = par[1]
p['gamma'] = par[2]
epsilon_logp = tfd.Gamma(concentration=tf.constant(1., tf.float64), rate=tf.constant(1., tf.float64)).log_prob(p['epsilon'])
beta_logp = tfd.Gamma(concentration=tf.constant(1., tf.float64), rate=tf.constant(1., tf.float64)).log_prob(p['beta1'])
gamma_logp = tfd.Gamma(concentration=tf.constant(100., tf.float64), rate=tf.constant(400., tf.float64)).log_prob(p['gamma'])
t, sim, solve = simulator.simulate(p, state_init)
y_logp = covid19uk_logp(y_incr, sim, 0.1)
logp = epsilon_logp + beta_logp + tf.reduce_sum(y_logp)
logp = epsilon_logp + beta_logp + gamma_logp + tf.reduce_sum(y_logp)
return logp
def trace_fn(_, pkr):
......@@ -87,48 +102,51 @@ if __name__ == '__main__':
unconstraining_bijector = [tfb.Exp(), tfb.Exp()]
initial_mcmc_state = [0.00001, 0.03]
print("Initial log likelihood:", logp(*initial_mcmc_state))
unconstraining_bijector = [tfb.Exp()]
initial_mcmc_state = np.array([0.001, 0.036, 0.25], dtype=np.float64)
print("Initial log likelihood:", logp(initial_mcmc_state))
def sample(n_samples, step_size=[0.01, 0.1]):
def sample(n_samples, init_state, scale):
return tfp.mcmc.sample_chain(
trace_fn=lambda _, pkr: pkr.inner_results.inner_results.accepted_results.step_size)
trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)
with tf.device("/CPU:0"):
cov = np.diag([0.001, 0.03, 1.02]) * 2.38**2 / 3
start = time.perf_counter()
pi_beta, results = sample(1000, [0.1, 0.1])
# sd = tfp.stats.stddev(pi_beta, sample_axis=1)
# sd = sd / tf.reduce_sum(sd)
# new_step_size = [results[-1] * sd[0], results[-1] * sd[1]]
# print("New step size:",new_step_size)
# pi_beta, results = sample(1000, step_size=new_step_size)
joint_posterior, results = sample(100, init_state=initial_mcmc_state, scale=cov)
for i in range(10):
cov = tfp.stats.covariance(tf.math.log(joint_posterior)) * 2.38**2 / joint_posterior.shape[0] + tf.eye(cov.shape[0], dtype=tf.float64)*1.e-7
posterior_new, results = sample(100, joint_posterior[-1, :], cov)
joint_posterior = tf.concat([joint_posterior, posterior_new], axis=0)
posterior_new, results = sample(2000, init_state=joint_posterior[-1, :], scale=cov)
joint_posterior = tf.concat([joint_posterior, posterior_new], axis=0)
end = time.perf_counter()
print(f"Simulation complete in {end-start} seconds")
print("Acceptance: ", np.mean(results.numpy()))
print(tfp.stats.covariance(tf.math.log(joint_posterior[1000:, :])))
fig, ax = plt.subplots(1, 2)
fig, ax = plt.subplots(1, 3)
ax[0].plot(joint_posterior[:, 0])
ax[1].plot(joint_posterior[:, 1])
ax[2].plot(joint_posterior[:, 2])
print(f"Posterior mean: {np.mean(pi_beta[0])}")
print(f"Posterior mean: {np.mean(joint_posterior, axis=0)}")
with open('pi_beta_2020-03-15.pkl', 'wb') as f:
pkl.dump(pi_beta, f)
pkl.dump(joint_posterior, f)
#dates = settings['start'] + t.numpy().astype(np.timedelta64)
#plotting(dates, sim)
"""Prediction functions"""
import optparse
import seaborn
import yaml
import pickle as pkl
import numpy as np
......@@ -7,10 +9,22 @@ import pandas as pd
import tensorflow as tf
from tensorflow_probability import stats as tfs
import matplotlib.pyplot as plt
import h5py
from covid.model import CovidUKODE
from covid.rdata import load_age_mixing, load_mobility_matrix, load_population
from covid.util import sanitise_settings, sanitise_parameter, seed_areas
from covid.util import sanitise_settings, sanitise_parameter, seed_areas, doubling_time
def save_sims(sims, la_names, age_groups, filename):
f = h5py.File(filename, 'w')
dset_sim = f.create_dataset('prediction', data=sims)
la_long = np.repeat(la_names, age_groups.shape[0]).astype(np.string_)
age_long = np.tile(age_groups, la_names.shape[0]).astype(np.string_)
dset_dims = f.create_dataset("dimnames", data=[b'sim_id', b't', b'state', b'la_age'])
dset_la = f.create_dataset('la_names', data=la_long)
dset_age = f.create_dataset('age_names', data=age_long)
if __name__ == '__main__':
......@@ -48,25 +62,42 @@ if __name__ == '__main__':
np.timedelta64(1, 'D'))
simulator = CovidUKODE(K_tt, K_hh, T, N, date_range[0] - np.timedelta64(1, 'D'),
np.datetime64('2020-05-01'), settings['holiday'], settings['bg_max_time'], 1)
np.datetime64('2020-09-01'), settings['holiday'], settings['bg_max_time'], 1)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
def prediction(beta):
def prediction(epsilon, beta):
sims = tf.TensorArray(tf.float32, size=beta.shape[0])
R0 = tf.TensorArray(tf.float32, size=beta.shape[0])
#d_time = tf.TensorArray(tf.float32, size=beta.shape[0])
for i in tf.range(beta.shape[0]):
p = param
p['epsilon'] = epsilon[i]
p['beta1'] = beta[i]
t, sim, solver_results = simulator.simulate(p, state_init)
sim_aggr = tf.reduce_sum(sim, axis=2)
sims = sims.write(i, sim_aggr)
return sims.gather(range(beta.shape[0]))
r = simulator.eval_R0(p)
R0 = R0.write(i, r[0])
#d_time = d_time.write(i, doubling_time(t, sim, '2002-03-01', '2002-04-01'))
#sim_aggr = tf.reduce_sum(sim, axis=2)
sims = sims.write(i, sim)
return sims.gather(range(beta.shape[0])), R0.gather(range(beta.shape[0]))
draws = [pi_beta[0].numpy()[np.arange(500, pi_beta[0].shape[0], 10)],
pi_beta[1].numpy()[np.arange(500, pi_beta[1].shape[0], 10)]]
with tf.device('/CPU:0'):
sims, R0 = prediction(draws[0], draws[1])
sims = tf.stack(sims) # shape=[n_sims, n_times, n_states, n_metapops]
save_sims(sims, la_names, age_groups, 'pred_2020-03-15.h5')
draws = pi_beta[0].numpy()[np.arange(0, pi_beta[0].shape[0], 20)]
sims = prediction(draws)
dub_time = [doubling_time(simulator.times, sim, '2020-03-01', '2020-04-01') for sim in sims.numpy()]
dates = np.arange(date_range[0]-np.timedelta64(1, 'D'), np.datetime64('2020-05-01'),
# Sum over country
sims = tf.reduce_sum(sims, axis=3)
print("Plotting...", flush=True)
dates = np.arange(date_range[0]-np.timedelta64(1, 'D'), np.datetime64('2020-09-01'),
np.timedelta64(1, 'D'))
total_infected = tfs.percentile(tf.reduce_sum(sims[:, :, 1:3], axis=2), q=[2.5, 50, 97.5], axis=0)
removed = tfs.percentile(sims[:, :, 3], q=[2.5, 50, 97.5], axis=0)
......@@ -98,3 +129,15 @@ if __name__ == '__main__':
plt.ylabel("Incidence per 10,000")
# R0
R0_ci = tfs.percentile(R0, q=[2.5, 50, 97.5])
print("R0:", R0_ci)
fig = plt.figure()
seaborn.kdeplot(R0.numpy(), ax=fig.gca())
# Doubling time
dub_ci = tfs.percentile(dub_time, q=[2.5, 50, 97.5])
print("Doubling time:", dub_ci)