Commit f8e38742 authored by Chris Jewell's avatar Chris Jewell
Browse files

Commit after constructing MCMC and prediction functions.

parent 7586e553
......@@ -80,35 +80,44 @@ class Homogeneous:
time_lims[1], 1., self.stoichiometry)
class CovidUKODE:
def dense_to_block_diagonal(A, n_blocks):
A_dense = tf.linalg.LinearOperatorFullMatrix(A)
eye = tf.linalg.LinearOperatorIdentity(n_blocks)
A_block = tf.linalg.LinearOperatorKronecker([eye, A_dense])
return A_block
def __init__(self, K, T, N):
class CovidUKODE: # TODO: add background case importation rate to the UK, e.g. \epsilon term.
def __init__(self, M_tt, M_hh, C, N, start, end, holidays, t_step):
"""Represents a CovidUK ODE model
:param K: a MxM matrix of age group mixing in term time
:param T: a n_ladsxn_lads matrix of inter-LAD commuting
:param K_tt: a MxM matrix of age group mixing in term time
:param K_hh: a MxM matrix of age group mixing in holiday time
:param holidays: a list of length-2 tuples containing dates of holidays
:param C: a n_ladsxn_lads matrix of inter-LAD commuting
:param N: a vector of population sizes in each LAD
:param n_lads: the number of LADS
"""
self.n_ages = K.shape[0]
self.n_lads = T.shape[0]
self.n_ages = M_tt.shape[0]
self.n_lads = C.shape[0]
self.Kbar = tf.reduce_mean(K)
self.K = tf.linalg.LinearOperatorFullMatrix(K)
eye = tf.linalg.LinearOperatorIdentity(self.n_lads)
self.K = tf.linalg.LinearOperatorKronecker([eye, self.K])
self.Kbar = tf.reduce_mean(M_tt)
self.M = tf.tuple([dense_to_block_diagonal(M_tt, self.n_lads),
dense_to_block_diagonal(M_hh, self.n_lads)])
self.T = tf.linalg.LinearOperatorFullMatrix(T + tf.transpose(T))
shp = tf.linalg.LinearOperatorFullMatrix(np.ones_like(K, dtype=np.float32))
self.T = tf.linalg.LinearOperatorKronecker([self.T, shp])
self.C = tf.linalg.LinearOperatorFullMatrix(C + tf.transpose(C))
shp = tf.linalg.LinearOperatorFullMatrix(np.ones_like(M_tt, dtype=np.float32))
self.C = tf.linalg.LinearOperatorKronecker([self.C, shp])
self.N = tf.constant(N, dtype=tf.float32)
N_matrix = tf.reshape(self.N, [self.n_lads, self.n_ages])
N_sum = tf.reduce_sum(N_matrix, axis=1)
N_sum = N_sum[:, None] * tf.ones([1, self.n_ages])
self.N_sum = tf.reshape(N_sum, [-1])
self.times = np.arange(start, end, np.timedelta64(t_step, 'D'))
m_select = (np.less_equal(holidays[0], self.times) & np.less(self.times, holidays[1])).astype(np.int64)
self.m_select = tf.constant(m_select)
self.solver = tode.DormandPrince()
def make_h(self, param):
......@@ -116,9 +125,12 @@ class CovidUKODE:
def h_fn(t, state):
state = tf.unstack(state, axis=0)
S, E, I, R = state
infec_rate = param['beta1'] * tf.linalg.matvec(self.K, I)
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * tf.linalg.matvec(self.T, I / self.N_sum)
m_switch = tf.gather(self.m_select, tf.cast(t, tf.int64))
if m_switch == 0:
infec_rate = param['beta1'] * tf.linalg.matvec(self.M[0], I)
else:
infec_rate = param['beta1'] * tf.linalg.matvec(self.M[1], I)
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * tf.linalg.matvec(self.C, I / self.N_sum)
infec_rate = S / self.N * infec_rate
dS = -infec_rate
......@@ -146,19 +158,16 @@ class CovidUKODE:
return np.stack([S, E, I, R])
@tf.function
def simulate(self, param, state_init, t_start, t_end, t_step=1., solver_state=None):
def simulate(self, param, state_init, solver_state=None):
h = self.make_h(param)
t0 = 0.
t1 = (t_end - t_start) / np.timedelta64(1, 'D')
t = np.arange(start=t0, stop=t1, step=t_step)[1:]
results = self.solver.solve(ode_fn=h, initial_time=t0, initial_state=state_init, solution_times=t,
previous_solver_internal_state=solver_state)
t = np.arange(self.times.shape[0])
results = self.solver.solve(ode_fn=h, initial_time=t[0], initial_state=state_init,
solution_times=t, previous_solver_internal_state=solver_state)
return results.times, results.states, results.solver_internal_state
def ngm(self, param):
infec_rate = param['beta1'] * self.K.to_dense()
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * self.T.to_dense() / self.N_sum[None, :]
infec_rate = param['beta1'] * self.M[0].to_dense()
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * self.C.to_dense() / self.N_sum[None, :]
ngm = infec_rate / param['gamma']
return ngm
......@@ -168,3 +177,11 @@ class CovidUKODE:
dom_eigen_vec, i = power_iteration(ngm, tol=tol)
R0 = rayleigh_quotient(ngm, dom_eigen_vec)
return tf.squeeze(R0), i
def covid19uk_logp(y, sim, phi):
# Sum daily increments in removed
r_incr = sim[1:, 3, :] - sim[:-1, 3, :]
r_incr = tf.reduce_sum(r_incr, axis=1)
y_ = tfp.distributions.Poisson(rate=phi*r_incr)
return y_.log_prob(y)
import optparse
import yaml
import time
import pickle as pkl
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import distributions as tfd
from tensorflow_probability import bijectors as tfb
import matplotlib.pyplot as plt
from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.model import CovidUKODE, covid19uk_logp
from covid.util import *
def plotting(dates, sim):
print("Initial R0:", simulator.eval_R0(param))
print("Doubling time:", doubling_time(dates, sim.numpy(), '2020-02-27','2020-03-13'))
fig = plt.figure()
removals = tf.reduce_sum(sim[:, 3, :], axis=1)
infected = tf.reduce_sum(sim[:, 1:3, :], axis=[1,2])
exposed = tf.reduce_sum(sim[:, 1, :], axis=1)
date = np.squeeze(np.where(dates == np.datetime64('2020-03-13'))[0])
print("Daily incidence 2020-03-13:", exposed[date]-exposed[date-1])
plt.plot(dates, removals*0.10, '-', label='10% reporting')
plt.plot(dates, infected, '-', color='red', label='Total infected')
plt.plot(dates, removals, '-', color='gray', label='Total recovered/detected/died')
plt.scatter(np.datetime64('2020-03-13'), 600, label='gov.uk cases 13th March 2020')
plt.legend()
plt.grid(True)
fig.autofmt_xdate()
plt.show()
if __name__ == '__main__':
parser = optparse.OptionParser()
parser.add_option("--config", "-c", dest="config",
help="configuration file")
options, args = parser.parse_args()
with open(options.config, 'r') as ymlfile:
config = yaml.load(ymlfile)
K_tt, age_groups = load_age_mixing(config['data']['age_mixing_matrix_term'])
K_hh, _ = load_age_mixing(config['data']['age_mixing_matrix_hol'])
T, la_names = load_mobility_matrix(config['data']['mobility_matrix'])
np.fill_diagonal(T, 0.)
N, n_names = load_population(config['data']['population_size'])
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
date_range = [case_reports['DateVal'].min(), case_reports['DateVal'].max()]
y = case_reports['CumCases'].to_numpy()
y_incr = np.round((y[1:] - y[:-1]) * 0.8)
simulator = CovidUKODE(K_tt, K_hh, T, N, date_range[0]-np.timedelta64(1,'D'),
date_range[1], settings['holiday'], int(settings['time_step']))
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
#@tf.function
def logp(beta1):
p = param
p['beta1'] = beta1
beta_logp = tfd.Gamma(concentration=1., rate=1.).log_prob(p['beta1'])
t, sim, solve = simulator.simulate(p, state_init)
y_logp = covid19uk_logp(y_incr, sim, 0.1)
return beta_logp + tf.reduce_sum(y_logp)
unconstraining_bijector = [tfb.Log()]
initial_mcmc_state = [0.03]
print("Initial log likelihood:", logp(0.03))
@tf.function
def sample():
return tfp.mcmc.sample_chain(
num_results=2000,
num_burnin_steps=500,
current_state=initial_mcmc_state,
kernel=tfp.mcmc.SimpleStepSizeAdaptation(
tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
target_log_prob_fn=logp,
step_size=0.0001,
num_leapfrog_steps=5),
bijector=unconstraining_bijector),
num_adaptation_steps=400),
trace_fn=lambda _, pkr: pkr.inner_results.inner_results.is_accepted)
start = time.perf_counter()
pi_beta, accept = sample()
end = time.perf_counter()
print(f"Simulation complete in {end-start} seconds")
plt.plot(pi_beta[0])
plt.show()
print(f"Posterior mean: {np.mean(pi_beta[0])}")
with open('pi_beta_2020-03-15.pkl','wb') as f:
pkl.dump(pi_beta, f)
#dates = settings['start'] + t.numpy().astype(np.timedelta64)
#plotting(dates, sim)
......@@ -5,6 +5,7 @@ data:
age_mixing_matrix_hol: data/polymod_no_school_df.rds
mobility_matrix: data/movement.rds
population_size: data/pop.rds
reported_cases: data/DailyConfirmedCases.csv
parameter:
beta1: 0.035 # R0 2.4
......
"""Prediction functions"""
import optparse
import yaml
import pickle as pkl
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow_probability import stats as tfs
import matplotlib.pyplot as plt
from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.model import CovidUKODE
from covid.util import *
from covid.rdata import load_age_mixing, load_mobility_matrix, load_population
from covid.util import sanitise_settings, sanitise_parameter, seed_areas
if __name__ == '__main__':
......@@ -28,47 +33,67 @@ if __name__ == '__main__':
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
model_term = CovidUKODE(K_tt, T, N)
model_holiday = CovidUKODE(K_hh, T, N)
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
date_range = [case_reports['DateVal'].min(), case_reports['DateVal'].max()]
y = case_reports['CumCases'].to_numpy()
y_incr = y[1:] - y[-1:]
with open('pi_beta_2020-03-15.pkl', 'rb') as f:
pi_beta = pkl.load(f)
# Predictive distribution of epidemic spread
data_dates = np.arange(date_range[0],
date_range[1]+np.timedelta64(1,'D'),
np.timedelta64(1, 'D'))
simulator = CovidUKODE(K_tt, K_hh, T, N, date_range[0] - np.timedelta64(1, 'D'),
np.datetime64('2020-07-01'), settings['holiday'], 1)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = model_term.create_initial_state(init_matrix=seeding)
@tf.function()
def simulate():
t0, sim_0, solve0 = model_term.simulate(param, state_init,
settings['start'], settings['holiday'][0],
settings['time_step'])
t1, sim_1, solve1 = model_holiday.simulate(param, sim_0[-1, :, :],
settings['holiday'][0], settings['holiday'][1],
settings['time_step'], solver_state=None)
t2, sim_2, _ = model_term.simulate(param, sim_1[-1, :, :],
settings['holiday'][1], settings['end'],
settings['time_step'], solver_state=None)
t = tf.concat([t0, t1 + t0[-1], t2 + t0[-1] + t1[-1]], axis=0)
sim = tf.concat([tf.expand_dims(state_init, axis=0), sim_0, sim_1, sim_2], axis=0)
return t, sim
t, sim = simulate()
dates = settings['start'] + t.numpy().astype(np.timedelta64)
print("Initial R0:", model_term.eval_R0(param))
print("Doubling time:", doubling_time(dates, sim.numpy(), '2020-02-27','2020-03-13'))
state_init = simulator.create_initial_state(init_matrix=seeding)
#@tf.function
def prediction(beta):
sims = tf.TensorArray(tf.float32, size=beta.shape[0])
for i in tf.range(beta.shape[0]):
p = param
p['beta1'] = beta[i]
t, sim, solver_results = simulator.simulate(p, state_init)
sim_aggr = tf.reduce_sum(sim, axis=2)
sims = sims.write(i, sim_aggr)
return sims.gather(range(beta.shape[0]))
draws = pi_beta[0].numpy()[np.arange(0, pi_beta[0].shape[0], 20)]
sims = prediction(draws)
dates = np.arange(date_range[0]-np.timedelta64(1, 'D'), np.datetime64('2020-07-01'),
np.timedelta64(1, 'D'))
total_infected = tfs.percentile(tf.reduce_sum(sims[:, :, 1:3], axis=2), q=[2.5, 50, 97.5], axis=0)
removed = tfs.percentile(sims[:, :, 3], q=[2.5, 50, 97.5], axis=0)
removed_observed = tfs.percentile(removed * 0.1, q=[2.5, 50, 97.5], axis=0)
fig = plt.figure()
removals = tf.reduce_sum(sim[:, 3, :], axis=1)
infected = tf.reduce_sum(sim[:, 1:3, :], axis=[1,2])
exposed = tf.reduce_sum(sim[:, 1, :], axis=1)
date = np.squeeze(np.where(dates == np.datetime64('2020-03-13'))[0])
print("Daily incidence 2020-03-13:", exposed[date])
plt.plot(dates, removals[:-1]*0.10, '-', label='10% reporting')
plt.plot(dates, infected[:-1], '-', color='red', label='Total infected')
plt.plot(dates, removals[:-1], '-', color='gray', label='Total recovered/detected/died')
plt.scatter(np.datetime64('2020-03-13'), 600, label='gov.uk cases 13th March 2020')
plt.legend()
plt.grid(True)
filler = plt.fill_between(dates, total_infected[0, :], total_infected[2, :], color='lightgray', label="95% credible interval")
plt.fill_between(dates, removed[0, :], removed[2, :], color='lightgray')
plt.fill_between(dates, removed_observed[0, :], removed_observed[2, :], color='lightgray')
ti_line = plt.plot(dates, total_infected[1, :], '-', color='red', alpha=0.4, label="Infected")
rem_line = plt.plot(dates, removed[1, :], '-', color='blue', label="Removed")
ro_line = plt.plot(dates, removed_observed[1, :], '-', color='orange', label='Predicted detections')
marks = plt.plot(data_dates, y, '+', label='Observed cases')
plt.legend([ti_line[0], rem_line[0], ro_line[0], filler, marks[0]],
["Infected", "Removed", "Predicted detections", "95% credible interval", "Observed counts"])
plt.grid()
plt.xlabel("Date")
plt.ylabel("$10^7$ individuals")
fig.autofmt_xdate()
plt.show()
# Number of new cases per day
new_cases = tfs.percentile(removed[:, 1:] - removed[:, :-1], q=[2.5, 50, 97.5], axis=0)/10000.
fig = plt.figure()
plt.fill_between(dates[:-1], new_cases[0, :], new_cases[2, :], color='lightgray', label="95% credible interval")
plt.plot(dates[:-1], new_cases[1, :], '-', alpha=0.2, label='New cases')
plt.grid()
plt.xlabel("Date")
plt.ylabel("Incidence per 10,000")
fig.autofmt_xdate()
plt.show()
......@@ -45,7 +45,7 @@ if __name__ == '__main__':
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
model_term = CovidUKODE(K_tt, T, N)
model_term = CovidUKODE(K_tt, K_hh, T, N)
model_holiday = CovidUKODE(K_hh, T, N)
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment