Commit 15610577 authored by Chris Jewell's avatar Chris Jewell
Browse files

Addition of Dynamic Health Atlas data processing

parent 161c77b2
......@@ -28,6 +28,7 @@ from covid_pipeline.tasks import (
summary_geopackage,
summary_longformat,
crystalcast_output,
summary_dha,
)
__all__ = ["run_pipeline"]
......@@ -76,7 +77,10 @@ def run_pipeline(global_config, results_directory, cli_options):
assemble_data(output_file, config["ProcessData"])
@rf.transform(
process_data, rf.formatter(), wd("posterior.hd5"), global_config,
process_data,
rf.formatter(),
wd("posterior.hd5"),
global_config,
)
def run_mcmc(input_file, output_file, config):
mcmc(input_file, output_file, config["Mcmc"])
......@@ -98,7 +102,9 @@ def run_pipeline(global_config, results_directory, cli_options):
)(reproduction_number)
rf.transform(
input=reproduction_number, filter=rf.formatter(), output=wd("national_rt.xlsx"),
input=reproduction_number,
filter=rf.formatter(),
output=wd("national_rt.xlsx"),
)(overall_rt)
# In-sample prediction
......@@ -150,7 +156,9 @@ def run_pipeline(global_config, results_directory, cli_options):
# Summarisation
rf.transform(
input=reproduction_number, filter=rf.formatter(), output=wd("rt_summary.csv"),
input=reproduction_number,
filter=rf.formatter(),
output=wd("rt_summary.csv"),
)(summarize.rt)
rf.transform(
......@@ -223,9 +231,41 @@ def run_pipeline(global_config, results_directory, cli_options):
# DSTL Summary
rf.transform(
[[process_data, insample7, insample14, medium_term, reproduction_number,]],
[
[
process_data,
insample7,
insample14,
medium_term,
reproduction_number,
]
],
rf.formatter(),
wd("summary_longformat.xlsx"),
)(summary_longformat)
# DHA inputs
@rf.transform(
input=[
[
process_data,
insample7,
insample14,
medium_term,
reproduction_number,
]
],
filter=rf.formatter(),
output=wd("dha"),
)
def dha_inputs(input_files, output_path):
summary_dha(
input_files,
output_path,
num_weeks=8,
ci_list=[0.05, 0.95],
config=global_config["Geopackage"],
url="https://fhm-chicas-storage.lancs.ac.uk/bayesstm/latest/",
)
rf.cmdline.run(cli_options)
......@@ -9,6 +9,7 @@ from covid_pipeline.tasks.summary_geopackage import summary_geopackage
from covid_pipeline.tasks.summary_longformat import summary_longformat
import covid_pipeline.tasks.summarize as summarize
from covid_pipeline.tasks.crystalcast_output import crystalcast_output
from covid_pipeline.tasks.summary_dha import summary_dha
__all__ = [
"overall_rt",
......@@ -18,4 +19,5 @@ __all__ = [
"summary_longformat",
"summarize",
"crystalcast_output",
"summary_dha",
]
......@@ -7,7 +7,6 @@
# input_path = "H:/Downloads/2021-06-17_uk/"; input_files = [input_path + "inferencedata.nc", input_path + "insample7.nc", input_path + "insample14.nc", input_path + "medium_term.nc", input_path + "reproduction_number.nc"]
# config = {"base_geopackage":"data/UK2019mod_pop.gpkg", "base_layer":"UK2019mod_pop_xgen"}
from covid_pipeline.tasks.summary_longformat import xarray2summarydf
from covid_pipeline.tasks.summary_longformat import prevalence
from covid_pipeline.tasks import case_exceedance
......@@ -261,7 +260,7 @@ def write_csv(x, cis, folder, file_name):
axis="columns",
)
y = pd.DataFrame(y)
y.to_csv(folder + file_name + ".csv")
y.to_csv(Path(folder) / f"{file_name}.csv")
def write_xls(df1, df2, web_folder_data, name):
......@@ -271,7 +270,9 @@ def write_xls(df1, df2, web_folder_data, name):
:param web_folder_data: (str) output folder for geojson files e.g. "z:/dha_website_root/data/"
:parma name: (str) name of layer
"""
writer = pd.ExcelWriter(web_folder_data + name + ".xlsx", engine="openpyxl")
writer = pd.ExcelWriter(
Path(web_folder_data) / f"{name}.xlsx", engine="openpyxl"
)
df1.to_excel(writer, sheet_name="Insample")
df2.to_excel(writer, sheet_name="Pr(pred<obs)")
writer.save()
......@@ -468,10 +469,13 @@ def summary_dha(input_files, output_folder, num_weeks, ci_list, config, url=""):
cases = cases.to_dataframe().reset_index()
dha_format = dha_format_dict()
layers = {}
output_folders = {"web_folder_data": output_folder + "data/",
"web_folder_js": output_folder + "js/"}
Path(output_folders["web_folder_data"]).mkdir(parents=True, exist_ok=True)
Path(output_folders["web_folder_js"]).mkdir(parents=True, exist_ok=True)
output_folders = {
"web_folder_data": Path(output_folder) / "data",
"web_folder_js": Path(output_folder) / "js",
}
for k, v in output_folders.items():
print(f"Making folder: 'v'")
v.mkdir(parents=True, exist_ok=True)
# geopackage: load, select, transform and round
dec = re.compile(r"\d*\.\d+")
......@@ -720,4 +724,3 @@ def summary_dha(input_files, output_folder, num_weeks, ci_list, config, url=""):
utils.write_last_updated_time(
output_folders["web_folder_js"], "lastupdated.js", "last updated: "
)
......@@ -79,12 +79,19 @@ poetry run python -m covid_pipeline.pipeline \
# Push results to bucket
BASENAME=`basename ${arg_staging_dir}`
BUCKET_DIR=${arg_results_dir}/${BASENAME}
DHA_DIR="bayesstm/latest"
module add aws-cli
aws --endpoint-url https://fhm-chicas-storage.lancs.ac.uk s3 \
--profile covid-pipeline-data \
cp --recursive ${arg_staging_dir} ${BUCKET_DIR}
# Push dha to bayesstm bucket
aws --endpoint-url https://fhm-chicas-storage.lancs.ac.uk s3 \
--profile bayesstm \
cp --recursive ${arg_staging_dir}/dha ${DHA_DIR}
# Trigger report
REPORTBRANCH=master
GEOGRAPHY=`echo ${BASENAME} | cut -d"_" -f2`
......
......@@ -9,14 +9,14 @@ license = "MIT"
python = "^3.7"
xarray = {extras = ["netcdf4"], version = "^0.17.0"}
s3fs = "^2021.04.0"
covid19uk = {git = "https://github.com/chrism0dwk/covid19uk.git", tag="v0.8.0-alpha.3"}
ruffus = "^2.8.4"
openpyxl = "^3.0.7"
matplotlib = "^3.4.1"
descartes = "^1.1.0"
enum34 = "1.1.8"
mapclassify = "^2.4.2"
dhaconfig = {git = "https://gitlab.com/achale/dhaconfig.git"}
dhaconfig = {git = "https://gitlab.com/achale/dhaconfig.git", tag="v0.0.2"}
covid19uk = {git = "https://gitlab.com/chicas-covid19/covid19uk.git", tag="v0.8.0-alpha.4"}
[tool.poetry.dev-dependencies]
jedi = "^0.17.2"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment