util.py 2.47 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""Utility functions for COVID19 UK data"""

import os
import re
import datetime
import numpy as np
import pandas as pd


def prependDate(filename):
    now = datetime.now()  # current date and time
    date_time = now.strftime("%Y-%m-%d")
    return date_time + "_" + filename


def prependID(filename, config):
    return config["Global"]["prependID_Str"] + "_" + filename


def format_input_filename(filename, config):
    # prepend with a set string
    # to load a specific date, this should be in the string
    p, f = os.path.split(filename)
    if config["Global"]["prependID"]:
        f = prependID(f, config)
    filename = p + "/" + f
    return filename


def format_output_filename(filename, config):
    p, f = os.path.split(filename)
    if config["Global"]["prependID"]:
        f = prependID(f, config)
    if config["Global"]["prependDate"]:
        f = prependDate(f)
    filename = p + "/" + f
    return filename


def merge_lad_codes(lad19cd):
    merging = {
        "E06000052": "E06000052,E06000053",  # City of London & Westminster
        "E06000053": "E06000052,E06000053",  # City of London & Westminster
        "E09000001": "E09000001,E09000033",  # Cornwall & Isles of Scilly
        "E09000033": "E09000001,E09000033",  # Cornwall & Isles of Scilly
    }
    lad19cd = lad19cd.apply(lambda x: merging[x] if x in merging.keys() else x)

    return lad19cd


def merge_lad_values(df):
    df = df.groupby("lad19cd").sum().reset_index()
    return df


def get_date_low_high(config):
58
59
    date_range = [np.datetime64(x) for x in config["date_range"]]
    return tuple(date_range)
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95


def check_date_format(df):
    df = df.reset_index()

    if (
        not pd.to_datetime(df["date"], format="%Y-%m-%d", errors="coerce")
        .notnull()
        .all()
    ):
        raise ValueError("Invalid date format")

    return True


def check_date_bounds(df, date_low, date_high):
    if not ((date_low <= df["date"]) & (df["date"] < date_high)).all():
        raise ValueError("Date out of bounds")
    return True


def check_lad19cd_format(df):
    df = df.reset_index()

    # Must contain 9 characters, 1 region letter followed by 8 numbers
    split_code = df["lad19cd"].apply(lambda x: re.split("(\d+)", x))
    if not split_code.apply(
        lambda x: (len(x[0]) == 1) & (x[0] in "ENSW") & (len(x[1]) == 8)
    ).all():
        raise ValueError("Invalid lad19cd format")

    return True


def invalidInput(input):
    raise NotImplementedError(f'Input type "{input}" mode not implemented')