Commit 150aae00 authored by Chris Jewell's avatar Chris Jewell
Browse files

Added functions to process PHE Death data.

parent 8d779320
......@@ -39,5 +39,37 @@ def ingest_data(lad_shp, lad_pop):
return {'geo': lad, 'N': N}
def phe_death_timeseries(filename, date_range=['2020-02-02', '2020-03-21']):
date_range = [np.datetime64(x) for x in date_range]
csv = pd.read_excel(filename)
cases = pd.DataFrame({'hospital': csv.groupby(['Hospital admission date (non-HCID)', 'Region']).size(),
'deaths': csv.groupby(['PATIENT_DEATH_DATE', 'Region']).size()})
cases.index.rename(['date', 'region'], [0, 1], inplace=True)
cases.reset_index(inplace=True)
cases = cases.pivot(index='date', columns='region')
dates = pd.DataFrame(index=pd.DatetimeIndex(np.arange(*date_range, np.timedelta64(1, 'D'))))
combined = dates.merge(cases, how='left', left_index=True, right_index=True)
combined.columns = pd.MultiIndex.from_tuples(combined.columns, names=['timeseries','region'])
combined[combined.isna()] = 0.0
output = {k: combined.loc[:, [k, None]] for k in combined.columns.levels[0]}
return output
def phe_death_hosp_to_death(filename, date_range=['2020-02-02', '2020-03-21']):
date_range = [np.datetime64(x) for x in date_range]
csv = pd.read_excel(filename)
data = csv.loc[:, ['Sex', 'Age', 'Underlying medical condition?', 'Hospital admission date (non-HCID)',
'PATIENT_DEATH_DATE']]
data.columns = ['sex','age','underlying_condition', 'hosp_adm_date', 'death_date']
data.loc[:, 'underlying_condition'] = data['underlying_condition'] == 'Yes'
data['adm_to_death'] = (data['death_date'] - data['hosp_adm_date']) / np.timedelta64(1, 'D')
return data.dropna(axis=0)
if __name__=='__main__':
pass
......@@ -44,3 +44,8 @@ DateVal,CMODateCount,CumCases
2020-03-13,207,797
2020-03-14,264,1061
2020-03-15,330,1391
2020-03-16,152,1543
2020-03-17,407,1950
2020-03-18,676,2626
2020-03-19,643,3269
2020-03-20,714,3983
......@@ -148,6 +148,3 @@ if __name__ == '__main__':
with open('pi_beta_2020-03-15.pkl', 'wb') as f:
pkl.dump(joint_posterior, f)
#dates = settings['start'] + t.numpy().astype(np.timedelta64)
#plotting(dates, sim)
......@@ -84,21 +84,21 @@ if __name__ == '__main__':
draws = pi_beta.numpy()[np.arange(5000, pi_beta.shape[0], 10), :]
with tf.device('/CPU:0'):
sims, R0 = prediction(draws[:, 0], draws[:, 1], draws[:, 2])
sims = tf.stack(sims) # shape=[n_sims, n_times, n_states, n_metapops]
sims = tf.stack(sims) # shape=[n_sims, n_times, n_states, n_metapops]
save_sims(sims, la_names, age_groups, 'pred_2020-03-15.h5')
save_sims(sims, la_names, age_groups, 'pred_2020-03-15.h5')
dub_time = [doubling_time(simulator.times, sim, '2020-03-01', '2020-04-01') for sim in sims.numpy()]
dub_time = [doubling_time(simulator.times, sim, '2020-03-01', '2020-04-01') for sim in sims.numpy()]
# Sum over country
sims = tf.reduce_sum(sims, axis=3)
# Sum over country
sims = tf.reduce_sum(sims, axis=3)
print("Plotting...", flush=True)
dates = np.arange(date_range[0]-np.timedelta64(1, 'D'), np.datetime64('2020-09-01'),
np.timedelta64(1, 'D'))
total_infected = tfs.percentile(tf.reduce_sum(sims[:, :, 1:3], axis=2), q=[2.5, 50, 97.5], axis=0)
removed = tfs.percentile(sims[:, :, 3], q=[2.5, 50, 97.5], axis=0)
removed_observed = tfs.percentile(removed * 0.1, q=[2.5, 50, 97.5], axis=0)
print("Plotting...", flush=True)
dates = np.arange(date_range[0]-np.timedelta64(1, 'D'), np.datetime64('2020-09-01'),
np.timedelta64(1, 'D'))
total_infected = tfs.percentile(tf.reduce_sum(sims[:, :, 1:3], axis=2), q=[2.5, 50, 97.5], axis=0)
removed = tfs.percentile(sims[:, :, 3], q=[2.5, 50, 97.5], axis=0)
removed_observed = tfs.percentile(removed * 0.1, q=[2.5, 50, 97.5], axis=0)
fig = plt.figure()
filler = plt.fill_between(dates, total_infected[0, :], total_infected[2, :], color='lightgray', alpha=0.8, label="95% credible interval")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment