Commit 15938bd6 authored by Chris Jewell's avatar Chris Jewell
Browse files

Performance enhancements

parent 150aae00
"""Functions for infection rates"""
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability.python.internal import dtype_util
import numpy as np
from covid.impl.chainbinom_simulate import chain_binomial_simulate
......@@ -10,8 +11,8 @@ tla = tf.linalg
def power_iteration(A, tol=1e-3):
b_k = tf.random.normal([A.shape[1], 1], dtype=tf.float64)
epsilon = tf.constant(1., dtype=tf.float64)
b_k = tf.random.normal([A.shape[1], 1], dtype=A.dtype)
epsilon = tf.constant(1., dtype=A.dtype)
i = 0
while tf.greater(epsilon, tol):
b_k1 = tf.matmul(A, b_k)
......@@ -32,7 +33,7 @@ def rayleigh_quotient(A, b):
def dense_to_block_diagonal(A, n_blocks):
A_dense = tf.linalg.LinearOperatorFullMatrix(A)
eye = tf.linalg.LinearOperatorIdentity(n_blocks, dtype=tf.float64)
eye = tf.linalg.LinearOperatorIdentity(n_blocks, dtype=A.dtype)
A_block = tf.linalg.LinearOperatorKronecker([eye, A_dense])
return A_block
......@@ -48,29 +49,46 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
:param N: a vector of population sizes in each LAD
:param n_lads: the number of LADS
"""
dtype = dtype_util.common_dtype([M_tt, M_hh, C, N], dtype_hint=np.float64)
self.n_ages = M_tt.shape[0]
self.n_lads = C.shape[0]
self.M_tt = tf.convert_to_tensor(M_tt, dtype=tf.float64)
self.M_hh = tf.convert_to_tensor(M_hh, dtype=tf.float64)
self.Kbar = tf.reduce_mean(self.M_tt)
C = tf.cast(C, tf.float64)
self.C = tla.LinearOperatorFullMatrix(C + tf.transpose(C))
shp = tla.LinearOperatorFullMatrix(np.ones_like(M_tt, dtype=np.float64))
self.C = tla.LinearOperatorKronecker([self.C, shp])
self.N = tf.constant(N, dtype=tf.float64)
# Create one linear operator comprising both the term and holiday
# matrices. This is nice because
# - the dense "M" parts will take up memory of shape [2, M, M]
# - the identity matirix will only take up memory of shape [M]
# - matmuls/matvecs will be quite efficient because of the
# LinearOperatorKronecker structure and diagonal structure of the
# identity piece thereof.
# It should be sufficiently efficient that we can just get rid of the
# control flow switching between the two operators, and instead just do
# both matmuls in one big (vectorized!) pass, and pull out what we want
# after the fact with tf.gather.
self.M = dense_to_block_diagonal(
np.stack([M_tt, M_hh], axis=0), self.n_lads)
self.Kbar = tf.reduce_mean(M_tt)
self.C = tf.linalg.LinearOperatorFullMatrix(C + tf.transpose(C))
shp = tf.linalg.LinearOperatorFullMatrix(tf.ones_like(M_tt, dtype=dtype))
self.C = tf.linalg.LinearOperatorKronecker([self.C, shp])
self.N = tf.constant(N, dtype=dtype)
N_matrix = tf.reshape(self.N, [self.n_lads, self.n_ages])
N_sum = tf.reduce_sum(N_matrix, axis=1)
N_sum = N_sum[:, None] * tf.ones([1, self.n_ages], dtype=tf.float64)
N_sum = N_sum[:, None] * tf.ones([1, self.n_ages], dtype=dtype)
self.N_sum = tf.reshape(N_sum, [-1])
self.times = np.arange(start, end, np.timedelta64(t_step, 'D'))
self.school_hols = [tf.constant((holidays[0] - start) // np.timedelta64(1, 'D'), dtype=tf.float64),
tf.constant((holidays[1] - start) // np.timedelta64(1, 'D'), dtype=tf.float64)]
self.bg_max_t = tf.convert_to_tensor(bg_max_t, dtype=tf.float64)
self.m_select = np.int64((self.times >= holidays[0]) &
(self.times < holidays[1]))
self.max_t = self.m_select.shape[0] - 1
self.bg_select = tf.constant(self.times < bg_max_t, dtype=dtype)
self.bg_max_t = tf.convert_to_tensor(bg_max_t, dtype=dtype)
self.solver = tode.DormandPrince()
def make_h(self, param):
......@@ -78,16 +96,16 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
def h_fn(t, state):
state = tf.unstack(state, axis=0)
S, E, I, R = state
M = tf.where(tf.less_equal(self.school_hols[0], t) & tf.less(t, self.school_hols[1]),
self.M_hh, self.M_tt)
M = dense_to_block_diagonal(M, self.n_lads)
epsilon = tf.where(t < self.bg_max_t, param['epsilon'], tf.constant(0., dtype=tf.float64))
infec_rate = param['beta1'] * tla.matvec(M, I)
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * tla.matvec(self.C, I / self.N_sum)
# Integrator may produce time values outside the range desired, so
# we clip, implicitly assuming the outside dates have the same
# holiday status as their nearest neighbors in the desired range.
t_idx = tf.clip_by_value(tf.cast(t, tf.int64), 0, self.max_t)
m_switch = tf.gather(self.m_select, t_idx)
epsilon = param['epsilon'] * tf.gather(self.bg_select, t_idx)
infec_rate = param['beta1'] * (
tf.gather(self.M.matvec(I), m_switch) +
param['beta2'] * self.Kbar * self.C.matvec(I / self.N_sum))
infec_rate = S / self.N * (infec_rate + epsilon)
dS = -infec_rate
......@@ -122,8 +140,10 @@ class CovidUKODE: # TODO: add background case importation rate to the UK, e.g.
return results.times, results.states, results.solver_internal_state
def ngm(self, param):
infec_rate = param['beta1'] * dense_to_block_diagonal(self.M_tt, self.n_lads).to_dense()
infec_rate += param['beta1'] * param['beta2'] * self.Kbar * self.C.to_dense() / self.N_sum[None, :]
infec_rate = param['beta1'] * (
self.M.to_dense()[0, ...] +
(param['beta2'] * self.Kbar * self.C.to_dense() /
self.N_sum[np.newaxis, :]))
ngm = infec_rate / param['gamma']
return ngm
......
DateVal,CMODateCount,CumCases
2020-01-31,2,2
2020-02-01,0,2
2020-02-02,0,2
2020-02-03,0,2
2020-02-04,0,2
2020-02-05,0,2
2020-02-06,1,3
2020-02-07,0,3
2020-02-08,0,3
2020-02-09,1,4
2020-02-10,4,8
2020-02-11,0,8
2020-02-12,0,8
2020-02-13,1,9
2020-02-14,0,9
2020-02-15,0,9
2020-02-16,0,9
2020-02-17,0,9
2020-02-18,0,9
2020-02-19,0,9
2020-02-20,0,9
2020-02-21,0,9
2020-02-22,0,9
2020-02-23,0,9
2020-02-24,4,13
2020-02-25,0,13
2020-02-26,0,13
2020-02-27,0,13
2020-02-28,6,19
2020-02-29,4,23
2020-03-01,12,35
2020-03-02,5,40
2020-03-03,11,51
2020-03-04,34,85
2020-03-05,29,114
2020-03-06,46,160
2020-03-07,46,206
2020-03-08,65,271
2020-03-09,50,321
2020-03-10,52,373
2020-03-11,83,456
2020-03-12,139,590
2020-03-13,207,797
2020-03-14,264,1061
2020-03-15,330,1391
2020-03-16,152,1543
2020-03-17,407,1950
2020-03-18,676,2626
2020-03-19,643,3269
2020-03-20,714,3983
......@@ -14,6 +14,8 @@ from covid.rdata import load_population, load_age_mixing, load_mobility_matrix
from covid.model import CovidUKODE, covid19uk_logp
from covid.util import *
DTYPE = np.float64
def plotting(dates, sim):
print("Initial R0:", simulator.eval_R0(param))
......@@ -40,11 +42,9 @@ def plotting(dates, sim):
def random_walk_mvnorm_fn(covariance, name=None):
"""Returns callable that adds Multivariate Normal noise to the input"""
covariance = covariance + tf.eye(covariance.shape[0], dtype=tf.float64) * 1.e-9
scale_tril = tf.linalg.cholesky(covariance)
rv = tfp.distributions.MultivariateNormalTriL(loc=tf.zeros(covariance.shape[0], dtype=tf.float64),
scale_tril=tf.linalg.cholesky(
tf.convert_to_tensor(covariance,
dtype=tf.float64)))
scale_tril=scale_tril)
def _fn(state_parts, seed):
with tf.name_scope(name or 'random_walk_mvnorm_fn'):
......@@ -72,17 +72,31 @@ if __name__ == '__main__':
N, n_names = load_population(config['data']['population_size'])
K_tt = K_tt.astype(DTYPE)
K_hh = K_hh.astype(DTYPE)
T = T.astype(DTYPE)
N = N.astype(DTYPE)
param = sanitise_parameter(config['parameter'])
settings = sanitise_settings(config['settings'])
case_reports = pd.read_csv(config['data']['reported_cases'])
case_reports['DateVal'] = pd.to_datetime(case_reports['DateVal'])
case_reports = case_reports[case_reports['DateVal'] >= '2020-02-19']
date_range = [case_reports['DateVal'].min(), case_reports['DateVal'].max()]
y = case_reports['CumCases'].to_numpy()
y_incr = np.round((y[1:] - y[:-1]) * 0.8)
simulator = CovidUKODE(K_tt, K_hh, T, N, date_range[0]-np.timedelta64(1,'D'),
date_range[1], settings['holiday'], settings['bg_max_time'], int(settings['time_step']))
simulator = CovidUKODE(
M_tt=K_tt,
M_hh=K_hh,
C=T,
N=N,
start=date_range[0]-np.timedelta64(1,'D'),
end=date_range[1],
holidays=settings['holiday'],
bg_max_t=settings['bg_max_time'],
t_step=int(settings['time_step']))
seeding = seed_areas(N, n_names) # Seed 40-44 age group, 30 seeds by popn size
state_init = simulator.create_initial_state(init_matrix=seeding)
......@@ -92,7 +106,7 @@ if __name__ == '__main__':
p['epsilon'] = par[0]
p['beta1'] = par[1]
p['gamma'] = par[2]
epsilon_logp = tfd.Gamma(concentration=tf.constant(1., tf.float64), rate=tf.constant(1., tf.float64)).log_prob(p['epsilon'])
epsilon_logp = tfd.Gamma(concentration=tf.constant(8., tf.float64), rate=tf.constant(1000., tf.float64)).log_prob(p['epsilon'])
beta_logp = tfd.Gamma(concentration=tf.constant(1., tf.float64), rate=tf.constant(1., tf.float64)).log_prob(p['beta1'])
gamma_logp = tfd.Gamma(concentration=tf.constant(100., tf.float64), rate=tf.constant(400., tf.float64)).log_prob(p['gamma'])
t, sim, solve = simulator.simulate(p, state_init)
......@@ -108,14 +122,14 @@ if __name__ == '__main__':
unconstraining_bijector = [tfb.Exp()]
initial_mcmc_state = np.array([0.001, 0.036, 0.25], dtype=np.float64)
initial_mcmc_state = np.array([0.008, 0.05, 0.25], dtype=np.float64)
print("Initial log likelihood:", logp(initial_mcmc_state))
@tf.function
def sample(n_samples, init_state, scale):
@tf.function(autograph=False, experimental_compile=True)
def sample(n_samples, init_state, scale, num_burnin_steps=0):
return tfp.mcmc.sample_chain(
num_results=n_samples,
num_burnin_steps=0,
num_burnin_steps=num_burnin_steps,
current_state=init_state,
kernel=tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.RandomWalkMetropolis(
......@@ -125,15 +139,36 @@ if __name__ == '__main__':
bijector=unconstraining_bijector),
trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)
joint_posterior = tf.zeros([0] + list(initial_mcmc_state.shape), dtype=DTYPE)
scale = np.diag([0.00001, 0.00001, 0.00001])
overall_start = time.perf_counter()
num_covariance_estimation_iterations = 50
num_covariance_estimation_samples = 50
num_final_samples = 10000
with tf.device("/CPU:0"):
cov = np.diag([0.00001, 0.00001, 0.00001])
start = time.perf_counter()
joint_posterior, results = sample(50, init_state=initial_mcmc_state, scale=cov)
for i in range(200):
cov = tfp.stats.covariance(tf.math.log(joint_posterior)) * 2.38**2 / joint_posterior.shape[1]
for i in range(num_covariance_estimation_iterations):
step_start = time.perf_counter()
samples, results = sample(num_covariance_estimation_samples,
initial_mcmc_state,
scale)
step_end = time.perf_counter()
print(f'{i} time {step_end - step_start}')
print("Acceptance: ", results.numpy().mean())
joint_posterior = tf.concat([joint_posterior, samples], axis=0)
cov = tfp.stats.covariance(tf.math.log(joint_posterior))
print(cov.numpy())
posterior_new, results = sample(50, joint_posterior[-1, :].numpy(), cov)
joint_posterior = tf.concat([joint_posterior, posterior_new], axis=0)
scale = cov * 2.38**2 / joint_posterior.shape[1]
initial_mcmc_state = joint_posterior[-1, :]
step_start = time.perf_counter()
samples, results = sample(num_final_samples,
init_state=joint_posterior[-1, :], scale=scale,)
joint_posterior = tf.concat([joint_posterior, samples], axis=0)
step_end = time.perf_counter()
print(f'Sampling step time {step_end - step_start}')
end = time.perf_counter()
print(f"Simulation complete in {end-start} seconds")
print("Acceptance: ", np.mean(results.numpy()))
......
......@@ -5,7 +5,7 @@ data:
age_mixing_matrix_hol: data/polymod_no_school_df.rds
mobility_matrix: data/movement.rds
population_size: data/pop.rds
reported_cases: data/DailyConfirmedCases.csv
reported_cases: data/DailyConfirmedCases_2020-03-20.csv
parameter:
epsilon: 1.0e-6
......@@ -15,7 +15,7 @@ parameter:
gamma: 0.25
settings:
start: 2020-02-04
start: 2020-02-19
end: 2020-04-01
holiday:
- 2020-03-23
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment