...
 
Commits (3)
......@@ -80,35 +80,44 @@ class Homogeneous:
time_lims[1], 1., self.stoichiometry)
class CovidUKODE:
def dense_to_block_diagonal(A, n_blocks):
A_dense = tf.linalg.LinearOperatorFullMatrix(A)
eye = tf.linalg.LinearOperatorIdentity(n_blocks)
A_block = tf.linalg.LinearOperatorKronecker([eye, A_dense])
return A_block
def __init__(self, K, T, N):
class CovidUKODE: # TODO: add background case importation rate to the UK, e.g. \epsilon term.
def __init__(self, M_tt, M_hh, C, N, start, end, holidays, t_step):
"""Represents a CovidUK ODE model
:param K: a MxM matrix of age group mixing in term time
:param T: a n_ladsxn_lads matrix of inter-LAD commuting
:param K_tt: a MxM matrix of age group mixing in term time
:param K_hh: a MxM matrix of age group mixing in holiday time
:param holidays: a list of length-2 tuples containing dates of holidays
:param C: a n_ladsxn_lads matrix of inter-LAD commuting
:param N: a vector of population sizes in each LAD
:param n_lads: the number of LADS
"""
self.n_ages = K.shape[0]
self.n_lads = T.shape[0]
self.n_ages = M_tt.shape[0]
self.n_lads = C.shape[0]
self.Kbar = tf.reduce_mean(K)
self.K = tf.linalg.LinearOperatorFullMatrix(K)
eye = tf.linalg.LinearOperatorIdentity(self.n_lads)
self.K = tf.linalg.LinearOperatorKronecker([eye, self.K])
self.Kbar = tf.reduce_mean(M_tt)
self.M = tf.tuple([dense_to_block_diagonal(M_tt, self.n_lads),
dense_to_block_diagonal(M_hh, self.n_lads)])
self.T = tf.linalg.LinearOperatorFullMatrix(T + tf.transpose(T))
shp = tf.linalg.LinearOperatorFullMatrix(np.ones_like(K, dtype=np.float32))
self.T = tf.linalg.LinearOperatorKronecker([self.T, shp])
self.C = tf.linalg.LinearOperatorFullMatrix(C + tf.transpose(C))
shp = tf.linalg.LinearOperatorFullMatrix(np.ones_like(M_tt, dtype=np.float32))
self.C = tf.linalg.LinearOperatorKronecker([self.C, shp])
self.N = tf.constant(N, dtype=tf.float32)
N_matrix = tf.reshape(self.N, [self.n_lads, self.n_ages])
N_sum = tf.reduce_sum(N_matrix, axis=1)
N_sum = N_sum[:, None] * tf.ones([1, self.n_ages])
self.N_sum = tf.reshape(N_sum, [-1])
self.times = np.arange(start, end, np.timedelta64(t_step, 'D'))
m_select = (np.less_equal(holidays[0], self.times) & np.less(self.times, holidays[1])).astype(np.int64)
self.m_select = tf.constant(m_select)
self.solver = tode.DormandPrince()
def make_h(self, param):
......@@ -116,9 +125,12 @@ class CovidUKODE:
def h_fn(t, state):
state = tf.unstack(state, axis=0)
S, E, I, R = state
infec_rate = param['beta1'] * tf.linalg.matvec(self.K, I)
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * tf.linalg.matvec(self.T, I / self.N_sum)
m_switch = tf.gather(self.m_select, tf.cast(t, tf.int64))
if m_switch == 0:
infec_rate = param['beta1'] * tf.linalg.matvec(self.M[0], I)
else:
infec_rate = param['beta1'] * tf.linalg.matvec(self.M[1], I)
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * tf.linalg.matvec(self.C, I / self.N_sum)
infec_rate = S / self.N * infec_rate
dS = -infec_rate
......@@ -146,19 +158,16 @@ class CovidUKODE:
return np.stack([S, E, I, R])
@tf.function
def simulate(self, param, state_init, t_start, t_end, t_step=1., solver_state=None):
def simulate(self, param, state_init, solver_state=None):
h = self.make_h(param)
t0 = 0.
t1 = (t_end - t_start) / np.timedelta64(1, 'D')
t = np.arange(start=t0, stop=t1, step=t_step)[1:]
results = self.solver.solve(ode_fn=h, initial_time=t0, initial_state=state_init, solution_times=t,
previous_solver_internal_state=solver_state)
t = np.arange(self.times.shape[0])
results = self.solver.solve(ode_fn=h, initial_time=t[0], initial_state=state_init,
solution_times=t, previous_solver_internal_state=solver_state)
return results.times, results.states, results.solver_internal_state
def ngm(self, param):
infec_rate = param['beta1'] * self.K.to_dense()
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * self.T.to_dense() / self.N_sum[None, :]
infec_rate = param['beta1'] * self.M[0].to_dense()
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * self.C.to_dense() / self.N_sum[None, :]
ngm = infec_rate / param['gamma']
return ngm
......@@ -168,3 +177,11 @@ class CovidUKODE:
dom_eigen_vec, i = power_iteration(ngm, tol=tol)
R0 = rayleigh_quotient(ngm, dom_eigen_vec)
return tf.squeeze(R0), i
def covid19uk_logp(y, sim, phi):
# Sum daily increments in removed
r_incr = sim[1:, 3, :] - sim[:-1, 3, :]
r_incr = tf.reduce_sum(r_incr, axis=1)
y_ = tfp.distributions.Poisson(rate=phi*r_incr)
return y_.log_prob(y)
DateVal,CMODateCount,CumCases
2020-01-31,2,2
2020-02-01,0,2
2020-02-02,0,2
2020-02-03,0,2
2020-02-04,0,2
2020-02-05,0,2
2020-02-06,1,3
2020-02-07,0,3
2020-02-08,0,3
2020-02-09,1,4
2020-02-10,4,8
2020-02-11,0,8
2020-02-12,0,8
2020-02-13,1,9
2020-02-14,0,9
2020-02-15,0,9
2020-02-16,0,9
2020-02-17,0,9
2020-02-18,0,9
2020-02-19,0,9
2020-02-20,0,9
2020-02-21,0,9
2020-02-22,0,9
2020-02-23,0,9
2020-02-24,4,13
2020-02-25,0,13
2020-02-26,0,13
2020-02-27,0,13
2020-02-28,6,19
2020-02-29,4,23
2020-03-01,12,35
2020-03-02,5,40
2020-03-03,11,51
2020-03-04,34,85
2020-03-05,29,114
2020-03-06,46,160
2020-03-07,46,206
2020-03-08,65,271
2020-03-09,50,321
2020-03-10,52,373
2020-03-11,83,456
2020-03-12,139,590
2020-03-13,207,797
2020-03-14,343,1140
File added
This source diff could not be displayed because it is too large. You can view the blob instead.
%% LyX 2.2.4 created this file. For more info, see http://www.lyx.org/.
%% Do not edit unless you really know what you are doing.
\documentclass[english]{article}
\usepackage[T1]{fontenc}
\usepackage[latin9]{inputenc}
\usepackage{geometry}
\geometry{verbose,tmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm,rmargin=2.5cm}
\setlength{\parskip}{\medskipamount}
\setlength{\parindent}{0pt}
\usepackage{url}
\usepackage{bm}
\usepackage{amsmath}
\usepackage{babel}
\begin{document}
\title{UK Age and Space structured Covid-19 model}
\author{Chris Jewell, Barry Rowlingson, Jon Read}
\maketitle
\section{Concept}
We wish to develop a model that will enable us to assess spatial spread
of Covid-19 across the UK, respecting the availability of human mobility
data as well as known contact behaviour between individuals of different
ages.
A deterministic SEIR model is posited in which infection rate is written
as a function of known age-structured contact from Polymod, known
human mobility between MSOAs (Middle Super Output Area), and Census-derived
age structured population density in regions across the UK.
Currently, this model is populated with data for England only, though
we are in the process of extending this to Scotland and Wales.
Noise in daily case numbers $y_{it}$ for age-group $i$ in location
$k$ on day $t$ is assumed to be Poisson-distributed such that
\[
y_{ikt}\sim\mbox{Poisson}(R_{ikt}-R_{ikt-1})
\]
\section{Data}
\subsection{Age-mixing}
Standard Polymod data for the UK are used, with 17 5-year age groups
$[0-5),[5-10),\dots,[75-80),[80-\infty)$. Estimated contact matrices
for term-time $M_{tt}$ and school-holidays $M_{hh}$ were extracted
of dimension $n_{m}\times n_{m}$ where $n_{m}=17$.
\subsection{Human mobility}
2011 Census data from ONS on daily mean numbers of commuters moving
from each Residential MSOA to Workplace MSOA. MSOAs are aggregated
to Local Authority Districts (LADs) for which we have age-structured
population density. The resulting matrix $C$ is of dimension $n_{c}\times n_{c}$
where $n_{c}=152$. Since this matrix is for Residence to Workplace
movement only, we assume that the mean number of journeys between
each LAD is given by
\[
T=C+C^{T}
\]
with 0 diagonal.
\subsection{Population size}
Age-structured population size within each LAD is taken from publicly
available 2019 Local Authority data giving a vector $N$ of length
$n_{m}n_{c}$, i.e. population for each of $n_{m}$ age groups and
$n_{c}$ LADs.
\section{Model}
\subsection{Connectivity matrix}
We assemble a country-wide connectivity matrices as Kronecker products,
such that
\[
K^{\star}=I_{n_{c}}\bigotimes M
\]
and
\[
T^{\star}=C\bigotimes\bm{1}_{n_{m}\times n_{c}}
\]
giving two matrices of dimension $n_{m}n_{c}$. $K^{\star}$ is block
diagonal with Polymod mixing matrices. $T^{\star}$ expands the mobility
matrix $C$ such that a block structure of connectivity between LADs
results.
\subsection{Disease progression model}
We assume an SEIR model described as a system of ODEs. We denote the
number of individual in each age-group-LAD combination at time $t$
by the vectors $\vec{S}(t),\vec{E}(t),\vec{I}(t),\vec{R}(t)$. We
therefore have
\begin{align*}
\frac{\mathrm{d\vec{S}(t)}}{dt} & =-\beta_{1}\left[K^{\star}\vec{I}(t)+\beta_{2}\bar{K}T^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}\\
\frac{\mathrm{d}\vec{E}(t)}{dt} & =\beta_{1}\left[K^{\star}\vec{I}(t)+\beta_{2}\bar{K}T^{\star}\frac{{\vec{I}(t)}}{N}\right]\frac{\vec{S}(t)}{N}-\nu\vec{E}(t)\\
\frac{\mathrm{d}\vec{I}(t)}{dt} & =\nu\vec{E}(t)-\gamma\vec{I}(t)\\
\frac{\mathrm{d}\vec{R}(t)}{dt} & =\gamma\vec{I}(t)
\end{align*}
with parameters: baseline infection rate $\beta_{1}$, commuting infection
ratio $\beta_{2}$, latent period $\frac{1}{\nu}$, and infectious
period $\frac{1}{\gamma}$.
\subsection{Noise model}
Currently, and subject to discussion, we assume that all detected
cases are synonymous with individuals transitioning $I\rightarrow R$.
We assume the number of new cases in each age-LAD combination are
given by
\[
y_{ik}(t)\sim\mbox{Poisson}\left(R_{ik}(t)-R_{ik}(t-1)\right)
\]
This could be relaxed to a Negative Binomial distribution to account
for Poisson overdispersion.
\subsection{Implementation}
The model is currently implemented in Python3, using Tensorflow 2.1
with the RK5 differential equation solver implemented in the \texttt{DormandPrince}
class provided by Tensorflow Probability 0.9. Code may be found at
\url{http://fhm-chicas-code.lancs.ac.uk/jewell/covid19uk}.
\section{Quick results}
See attached SPI-M report.
\end{document}
import optparse
import yaml
import time
import pickle as pkl
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
from tensorflow_probability import bijectors as tfb
import matplotlib.pyplot as plt
from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.model import CovidUKODE, covid19uk_logp
from covid.util import *
def plotting(dates, sim):
print("Initial R0:", simulator.eval_R0(param))
print("Doubling time:", doubling_time(dates, sim.numpy(), '2020-02-27','2020-03-13'))
fig = plt.figure()
removals = tf.reduce_sum(sim[:, 3, :], axis=1)
infected = tf.reduce_sum(sim[:, 1:3, :], axis=[1,2])
exposed = tf.reduce_sum(sim[:, 1, :], axis=1)
date = np.squeeze(np.where(dates == np.datetime64('2020-03-13'))[0])
print("Daily incidence 2020-03-13:", exposed[date]-exposed[date-1])
plt.plot(dates, removals*0.10, '-', label='10% reporting')
plt.plot(dates, infected, '-', color='red', label='Total infected')
plt.plot(dates, removals, '-', color='gray', label='Total recovered/detected/died')
plt.scatter(np.datetime64('2020-03-13'), 600, label='gov.uk cases 13th March 2020')
plt.legend()
plt.grid(True)
fig.autofmt_xdate()
plt.show()
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option("--config", "-c", dest="config",
help="configuration file")
options, args = parser.parse_args()
with open(options.config, 'r') as ymlfile:
config = yaml.load(ymlfile)
K_tt, age_groups = load_age_mixing(config['data']['age_mixing_matrix_term'])
K_hh, _ = load_age_mixing(config['data']['age_mixing_matrix_hol'])
T, la_names = load_mobility_matrix(config['data']['mobility_matrix'])
np.fill_diagonal(T, 0.)
N, n_names = load_population(config['data']['population_size'])
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
date_range = [case_reports['DateVal'].min(), case_reports['DateVal'].max()]
y = case_reports['CumCases'].to_numpy()
y_incr = np.round((y[1:] - y[:-1]) * 0.8)
simulator = CovidUKODE(K_tt, K_hh, T, N, date_range[0]-np.timedelta64(1,'D'),
date_range[1], settings['holiday'], int(settings['time_step']))
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
#@tf.function
def logp(beta1):
p = param
p['beta1'] = beta1
beta_logp = tfd.Gamma(concentration=1., rate=1.).log_prob(p['beta1'])
t, sim, solve = simulator.simulate(p, state_init)
y_logp = covid19uk_logp(y_incr, sim, 0.1)
return beta_logp + tf.reduce_sum(y_logp)
unconstraining_bijector = [tfb.Log()]
initial_mcmc_state = [0.03]
print("Initial log likelihood:", logp(0.03))
@tf.function
def sample():
return tfp.mcmc.sample_chain(
num_results=2000,
num_burnin_steps=500,
current_state=initial_mcmc_state,
kernel=tfp.mcmc.SimpleStepSizeAdaptation(
tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
target_log_prob_fn=logp,
step_size=0.0001,
num_leapfrog_steps=5),
bijector=unconstraining_bijector),
num_adaptation_steps=400),
trace_fn=lambda _, pkr: pkr.inner_results.inner_results.is_accepted)
start = time.perf_counter()
pi_beta, accept = sample()
end = time.perf_counter()
print(f"Simulation complete in {end-start} seconds")
plt.plot(pi_beta[0])
plt.show()
print(f"Posterior mean: {np.mean(pi_beta[0])}")
with open('pi_beta_2020-03-15.pkl','wb') as f:
pkl.dump(pi_beta, f)
#dates = settings['start'] + t.numpy().astype(np.timedelta64)
#plotting(dates, sim)
......@@ -5,6 +5,7 @@ data:
age_mixing_matrix_hol: data/polymod_no_school_df.rds
mobility_matrix: data/movement.rds
population_size: data/pop.rds
reported_cases: data/DailyConfirmedCases.csv
parameter:
beta1: 0.035 # R0 2.4
......
"""Prediction functions"""
import optparse
import yaml
import pickle as pkl
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow_probability import stats as tfs
import matplotlib.pyplot as plt
from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.model import CovidUKODE
from covid.util import *
from covid.rdata import load_age_mixing, load_mobility_matrix, load_population
from covid.util import sanitise_settings, sanitise_parameter, seed_areas
if __name__ == '__main__':
......@@ -28,47 +33,67 @@ if __name__ == '__main__':
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
model_term = CovidUKODE(K_tt, T, N)
model_holiday = CovidUKODE(K_hh, T, N)
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
date_range = [case_reports['DateVal'].min(), case_reports['DateVal'].max()]
y = case_reports['CumCases'].to_numpy()
y_incr = y[1:] - y[-1:]
with open('pi_beta_2020-03-15.pkl', 'rb') as f:
pi_beta = pkl.load(f)
# Predictive distribution of epidemic spread
data_dates = np.arange(date_range[0],
date_range[1]+np.timedelta64(1,'D'),
np.timedelta64(1, 'D'))
simulator = CovidUKODE(K_tt, K_hh, T, N, date_range[0] - np.timedelta64(1, 'D'),
np.datetime64('2020-07-01'), settings['holiday'], 1)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = model_term.create_initial_state(init_matrix=seeding)
@tf.function()
def simulate():
t0, sim_0, solve0 = model_term.simulate(param, state_init,
settings['start'], settings['holiday'][0],
settings['time_step'])
t1, sim_1, solve1 = model_holiday.simulate(param, sim_0[-1, :, :],
settings['holiday'][0], settings['holiday'][1],
settings['time_step'], solver_state=None)
t2, sim_2, _ = model_term.simulate(param, sim_1[-1, :, :],
settings['holiday'][1], settings['end'],
settings['time_step'], solver_state=None)
t = tf.concat([t0, t1 + t0[-1], t2 + t0[-1] + t1[-1]], axis=0)
sim = tf.concat([tf.expand_dims(state_init, axis=0), sim_0, sim_1, sim_2], axis=0)
return t, sim
t, sim = simulate()
dates = settings['start'] + t.numpy().astype(np.timedelta64)
print("Initial R0:", model_term.eval_R0(param))
print("Doubling time:", doubling_time(dates, sim.numpy(), '2020-02-27','2020-03-13'))
state_init = simulator.create_initial_state(init_matrix=seeding)
#@tf.function
def prediction(beta):
sims = tf.TensorArray(tf.float32, size=beta.shape[0])
for i in tf.range(beta.shape[0]):
p = param
p['beta1'] = beta[i]
t, sim, solver_results = simulator.simulate(p, state_init)
sim_aggr = tf.reduce_sum(sim, axis=2)
sims = sims.write(i, sim_aggr)
return sims.gather(range(beta.shape[0]))
draws = pi_beta[0].numpy()[np.arange(0, pi_beta[0].shape[0], 20)]
sims = prediction(draws)
dates = np.arange(date_range[0]-np.timedelta64(1, 'D'), np.datetime64('2020-07-01'),
np.timedelta64(1, 'D'))
total_infected = tfs.percentile(tf.reduce_sum(sims[:, :, 1:3], axis=2), q=[2.5, 50, 97.5], axis=0)
removed = tfs.percentile(sims[:, :, 3], q=[2.5, 50, 97.5], axis=0)
removed_observed = tfs.percentile(removed * 0.1, q=[2.5, 50, 97.5], axis=0)
fig = plt.figure()
removals = tf.reduce_sum(sim[:, 3, :], axis=1)
infected = tf.reduce_sum(sim[:, 1:3, :], axis=[1,2])
exposed = tf.reduce_sum(sim[:, 1, :], axis=1)
date = np.squeeze(np.where(dates == np.datetime64('2020-03-13'))[0])
print("Daily incidence 2020-03-13:", exposed[date])
plt.plot(dates, removals[:-1]*0.10, '-', label='10% reporting')
plt.plot(dates, infected[:-1], '-', color='red', label='Total infected')
plt.plot(dates, removals[:-1], '-', color='gray', label='Total recovered/detected/died')
plt.scatter(np.datetime64('2020-03-13'), 600, label='gov.uk cases 13th March 2020')
plt.legend()
plt.grid(True)
filler = plt.fill_between(dates, total_infected[0, :], total_infected[2, :], color='lightgray', label="95% credible interval")
plt.fill_between(dates, removed[0, :], removed[2, :], color='lightgray')
plt.fill_between(dates, removed_observed[0, :], removed_observed[2, :], color='lightgray')
ti_line = plt.plot(dates, total_infected[1, :], '-', color='red', alpha=0.4, label="Infected")
rem_line = plt.plot(dates, removed[1, :], '-', color='blue', label="Removed")
ro_line = plt.plot(dates, removed_observed[1, :], '-', color='orange', label='Predicted detections')
marks = plt.plot(data_dates, y, '+', label='Observed cases')
plt.legend([ti_line[0], rem_line[0], ro_line[0], filler, marks[0]],
["Infected", "Removed", "Predicted detections", "95% credible interval", "Observed counts"])
plt.grid()
plt.xlabel("Date")
plt.ylabel("$10^7$ individuals")
fig.autofmt_xdate()
plt.show()
# Number of new cases per day
new_cases = tfs.percentile(removed[:, 1:] - removed[:, :-1], q=[2.5, 50, 97.5], axis=0)/10000.
fig = plt.figure()
plt.fill_between(dates[:-1], new_cases[0, :], new_cases[2, :], color='lightgray', label="95% credible interval")
plt.plot(dates[:-1], new_cases[1, :], '-', alpha=0.2, label='New cases')
plt.grid()
plt.xlabel("Date")
plt.ylabel("Incidence per 10,000")
fig.autofmt_xdate()
plt.show()
......@@ -45,7 +45,7 @@ if __name__ == '__main__':
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
model_term = CovidUKODE(K_tt, T, N)
model_term = CovidUKODE(K_tt, K_hh, T, N)
model_holiday = CovidUKODE(K_hh, T, N)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
......