Commit c8546479 authored by Chris Jewell's avatar Chris Jewell
Browse files

Production pipeline brought up to date.

parent e5645b64
# Covid stochastic model configuration
ProcessData:
date_range:
- 2020-11-20
- 2021-02-12
mobility_matrix: data/mergedflows.csv
population_size: data/c2019modagepop.csv
commute_volume: # Can be replaced by DfT traffic flow data - contact authors <c.jewell@lancaster.ac.uk>
CasesData:
input: csv
address: data/Anonymised Combined Line List 20210419.csv
pillars:
- Pillar 1 # Capability to filter Pillar 1 and 2 testing data from PHE confidential line listing
measure: specimen # Capability to filter date of test report from PHE confidential line listing
format: phe
AreaCodeData:
input: json
address: "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_APR_2019_UK_NC/FeatureServer/0/query?where=1%3D1&outFields=LAD19CD,LAD19NM&returnGeometry=false&returnDistinctValues=true&orderByFields=LAD19CD&outSR=4326&f=json"
format: ons
regions:
- E # England
Mcmc:
dmax: 84 # Max distance to move events
nmax: 50 # Max num events per metapopulation/time to move
m: 1 # Number of metapopulations to move
occult_nmax: 15 # Max number of occults to add/delete per metapop/time
num_event_time_updates: 35 # Num event and occult updates per sweep of Gibbs MCMC sampler.
num_bursts: 200 # Number of MCMC bursts of `num_burst_samples`
num_burst_samples: 50 # Number of MCMC samples per burst
thin: 20 # Thin MCMC samples every `thin` iterations
ThinPosterior: # Post-process further chain thinning HDF5 -> .pkl.
start: 6000
end: 10000
by: 10
Geopackage: # covid.tasks.summary_geopackage
base_geopackage: data/UK2019mod_pop.gpkg
base_layer: UK2019mod_pop_xgen
\ No newline at end of file
# Covid stochastic model configuration
ProcessData:
date_range:
- 2020-11-20
- 2021-02-12
mobility_matrix: data/mergedflows.csv
population_size: data/c2019modagepop.csv
commute_volume: # Can be replaced by DfT traffic flow data - contact authors <c.jewell@lancaster.ac.uk>
CasesData:
input: csv
address: data/Anonymised Combined Line List 20210419.csv
pillars:
- Pillar 2
measure: specimen # Capability to filter date of test report from PHE confidential line listing
format: phe
AreaCodeData:
input: json
address: "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_APR_2019_UK_NC/FeatureServer/0/query?where=1%3D1&outFields=LAD19CD,LAD19NM&returnGeometry=false&returnDistinctValues=true&orderByFields=LAD19CD&outSR=4326&f=json"
format: ons
regions:
- E # England
Mcmc:
dmax: 84 # Max distance to move events
nmax: 50 # Max num events per metapopulation/time to move
m: 1 # Number of metapopulations to move
occult_nmax: 15 # Max number of occults to add/delete per metapop/time
num_event_time_updates: 35 # Num event and occult updates per sweep of Gibbs MCMC sampler.
num_bursts: 200 # Number of MCMC bursts of `num_burst_samples`
num_burst_samples: 50 # Number of MCMC samples per burst
thin: 20 # Thin MCMC samples every `thin` iterations
ThinPosterior: # Post-process further chain thinning HDF5 -> .pkl.
start: 6000
end: 10000
by: 10
Geopackage: # covid.tasks.summary_geopackage
base_geopackage: data/UK2019mod_pop.gpkg
base_layer: UK2019mod_pop_xgen
\ No newline at end of file
......@@ -41,7 +41,7 @@ done
source /usr/shared_apps/admin/etc/sge/switch-gpu.sh
# Environment variables for date and results staging
JOBNAME=c19_$arg_date_$arg_config
JOBNAME=c19_${arg_date}_${arg_config}
STDOUT=$arg_staging_dir/stdout.txt
# Final results dir
......
# Covid stochastic model configuration
ProcessData:
date_range:
- 2020-10-09
- 2021-01-01
mobility_matrix: data/mergedflows.csv
population_size: data/c2019modagepop.csv
commute_volume: # Can be replaced by DfT traffic flow data - contact authors <c.jewell@lancaster.ac.uk>
CasesData:
input: url
address: https://api.coronavirus.data.gov.uk/v2/data?areaType=ltla&metric=newCasesBySpecimenDate&format=json
# address: /home/hpc/39/jewellcp/Projects/covid-production-pipeline/data/cases_2021-04-12_uk.csv
pillars: None # Capability to filter Pillar 1 and 2 testing data from PHE confidential line listing
measure: None # Capability to filter date of test report from PHE confidential line listing
format: gov
AreaCodeData:
input: json
address: "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_APR_2019_UK_NC/FeatureServer/0/query?where=1%3D1&outFields=LAD19CD,LAD19NM&returnGeometry=false&returnDistinctValues=true&orderByFields=LAD19CD&outSR=4326&f=json"
format: ons
regions:
- N # Northern Ireland
Mcmc:
dmax: 84 # Max distance to move events
nmax: 50 # Max num events per metapopulation/time to move
m: 1 # Number of metapopulations to move
occult_nmax: 15 # Max number of occults to add/delete per metapop/time
num_event_time_updates: 35 # Num event and occult updates per sweep of Gibbs MCMC sampler.
num_bursts: 200 # Number of MCMC bursts of `num_burst_samples`
num_burst_samples: 50 # Number of MCMC samples per burst
thin: 20 # Thin MCMC samples every `thin` iterations
ThinPosterior: # Post-process further chain thinning HDF5 -> .pkl.
start: 6000
end: 10000
by: 10
Geopackage: # covid.tasks.summary_geopackage
base_geopackage: data/UK2019mod_pop.gpkg
base_layer: UK2019mod_pop_xgen
\ No newline at end of file
......@@ -72,8 +72,9 @@ echo Date range $DATELOW to $DATEHIGH
poetry run python -m covid.pipeline \
--config ${arg_config} \
--date-range $DATELOW $DATEHIGH \
--results-directory ${arg_staging_dir} ||
error_exit "Pipeline job failed"
--results-directory ${arg_staging_dir} \
--aws -v ||
error_exit "Pipeline job failed"
rsync -aP ${arg_staging_dir} ${arg_results_dir} ||
error_exit "Error copying ${arg_staging_dir} to ${arg_results_dir}"
......
......@@ -7,8 +7,15 @@ license = "MIT"
[tool.poetry.dependencies]
python = "^3.7"
covid19uk = {git = "git@fhm-chicas-code.lancs.ac.uk:jewell/covid19uk.git", rev="65bd815"}
covid19uk = {git = "git@fhm-chicas-code.lancs.ac.uk:jewell/covid19uk.git", rev="d9c52e4eee49d9befd97db60cca1c58835a73dbd"}
drmaa = "^0.7.9"
jupyter = "^1.0.0"
xarray = {extras = ["netcdf4"], version = "^0.17.0"}
s3fs = "^0.5.2"
jupyterlab = "^3.0.12"
ipympl = "^0.6.3"
zarr = "^2.7.0"
hide-code = "^0.6.0"
[tool.poetry.dev-dependencies]
ipython = "^7.20.0"
......
# Covid stochastic model configuration
ProcessData:
date_range:
- 2020-10-09
- 2021-01-01
mobility_matrix: data/mergedflows.csv
population_size: data/c2019modagepop.csv
commute_volume: # Can be replaced by DfT traffic flow data - contact authors <c.jewell@lancaster.ac.uk>
CasesData:
input: url
address: https://api.coronavirus.data.gov.uk/v2/data?areaType=ltla&metric=newCasesBySpecimenDate&format=json
# address: /home/hpc/39/jewellcp/Projects/covid-production-pipeline/data/cases_2021-04-12_uk.csv
pillars: None # Capability to filter Pillar 1 and 2 testing data from PHE confidential line listing
measure: None # Capability to filter date of test report from PHE confidential line listing
format: gov
AreaCodeData:
input: json
address: "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_APR_2019_UK_NC/FeatureServer/0/query?where=1%3D1&outFields=LAD19CD,LAD19NM&returnGeometry=false&returnDistinctValues=true&orderByFields=LAD19CD&outSR=4326&f=json"
format: ons
regions:
- S # Scotland
Mcmc:
dmax: 84 # Max distance to move events
nmax: 50 # Max num events per metapopulation/time to move
m: 1 # Number of metapopulations to move
occult_nmax: 15 # Max number of occults to add/delete per metapop/time
num_event_time_updates: 35 # Num event and occult updates per sweep of Gibbs MCMC sampler.
num_bursts: 200 # Number of MCMC bursts of `num_burst_samples`
num_burst_samples: 50 # Number of MCMC samples per burst
thin: 20 # Thin MCMC samples every `thin` iterations
ThinPosterior: # Post-process further chain thinning HDF5 -> .pkl.
start: 6000
end: 10000
by: 10
Geopackage: # covid.tasks.summary_geopackage
base_geopackage: data/UK2019mod_pop.gpkg
base_layer: UK2019mod_pop_xgen
AWSS3:
bucket=lancaster-covid-pipeline
profile=covid-pipeline
......@@ -11,9 +11,10 @@ ProcessData:
CasesData:
input: url
address: https://api.coronavirus.data.gov.uk/v2/data?areaType=ltla&metric=newCasesBySpecimenDate&format=json
# address: /home/hpc/39/jewellcp/Projects/covid-production-pipeline/data/cases_2021-04-12_uk.csv
pillars: None # Capability to filter Pillar 1 and 2 testing data from PHE confidential line listing
measure: None # Capability to filter date of test report from PHE confidential line listing
format: json
format: gov
AreaCodeData:
input: json
......@@ -23,7 +24,7 @@ ProcessData:
- S # Scotland
- E # England
- W # Wales
- NI # Northern Ireland
- N # Northern Ireland
Mcmc:
dmax: 84 # Max distance to move events
......@@ -42,4 +43,8 @@ ThinPosterior: # Post-process further chain thinning HDF5 -> .pkl.
Geopackage: # covid.tasks.summary_geopackage
base_geopackage: data/UK2019mod_pop.gpkg
base_layer: UK2019mod_pop_xgen
\ No newline at end of file
base_layer: UK2019mod_pop_xgen
AWSS3:
bucket: lancaster-covid-pipeline
profile: covid-pipeline
# Covid stochastic model configuration
ProcessData:
date_range:
- 2020-10-09
- 2021-01-01
mobility_matrix: data/mergedflows.csv
population_size: data/c2019modagepop.csv
commute_volume: # Can be replaced by DfT traffic flow data - contact authors <c.jewell@lancaster.ac.uk>
CasesData:
input: url
address: https://api.coronavirus.data.gov.uk/v2/data?areaType=ltla&metric=newCasesBySpecimenDate&format=json
# address: /home/hpc/39/jewellcp/Projects/covid-production-pipeline/data/cases_2021-04-12_uk.csv
pillars: None # Capability to filter Pillar 1 and 2 testing data from PHE confidential line listing
measure: None # Capability to filter date of test report from PHE confidential line listing
format: gov
AreaCodeData:
input: json
address: "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_APR_2019_UK_NC/FeatureServer/0/query?where=1%3D1&outFields=LAD19CD,LAD19NM&returnGeometry=false&returnDistinctValues=true&orderByFields=LAD19CD&outSR=4326&f=json"
format: ons
regions:
- W # Wales
Mcmc:
dmax: 84 # Max distance to move events
nmax: 50 # Max num events per metapopulation/time to move
m: 1 # Number of metapopulations to move
occult_nmax: 15 # Max number of occults to add/delete per metapop/time
num_event_time_updates: 35 # Num event and occult updates per sweep of Gibbs MCMC sampler.
num_bursts: 200 # Number of MCMC bursts of `num_burst_samples`
num_burst_samples: 50 # Number of MCMC samples per burst
thin: 20 # Thin MCMC samples every `thin` iterations
ThinPosterior: # Post-process further chain thinning HDF5 -> .pkl.
start: 6000
end: 10000
by: 10
Geopackage: # covid.tasks.summary_geopackage
base_geopackage: data/UK2019mod_pop.gpkg
base_layer: UK2019mod_pop_xgen
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment