-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_trend.py
92 lines (73 loc) · 2.75 KB
/
process_trend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python
# coding: utf-8
# CDC: Monkeypox derived timeseries
import pandas as pd
import us
import urllib.request, json
import datetime as dt
today = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%Y-%m-%d")
time = pd.Timestamp.now(tz='America/Los_Angeles').strftime("%-I:%M %p")
## Get historical case timeseries
#### CDC only updates this weekly
cdc_timeseries = (
pd.read_csv(
"data/processed/monkeypox_cases_timeseries_cdc_latest.csv",
parse_dates=["epi_date"],
names=[
"epi_date",
"cases",
"weekly-average",
"cumulative_cases",
"asof",
"cumulative_sum",
],
header=0,
).sort_values("epi_date", ascending=False)
).reset_index(drop=True)
cdc_timeseries["date"] = pd.to_datetime(cdc_timeseries["epi_date"]).dt.strftime(
"%Y-%m-%d"
)
#### The latest we have
historical_src = (
pd.read_csv(
"data/processed/monkeypox_cases_timeseries_cdc_historical.csv",
parse_dates=["date"],
)
.sort_values("date", ascending=False)
.reset_index(drop=True)
)
historical_src["date"] = pd.to_datetime(historical_src["date"]).dt.strftime("%Y-%m-%d")
historical_src = historical_src[historical_src['date'] < today].reset_index(drop=True)
## CDC Monkeypox
#### Latest totals, aggregated by state
states_src = pd.read_csv('https://www.cdc.gov/wcms/vizdata/poxvirus/monkeypox/data/USmap_counts.csv')
states_src.columns = states_src.columns.str.lower().str.replace(' ', '_', regex=False)
states_src.drop(['case_range'], axis=1, inplace=True)
states_src['cases'] = states_src['cases'].astype(int)
states = states_src[(states_src['location'] != 'Total') & (states_src['location'] != 'Non-US Resident')].copy()
#### Aggregate totals among all states to add to timeseries
latest_total = states['cases'].sum()
historical_total = historical_src[historical_src["date"] == historical_src["date"].max()]["cumulative_sum"][0]
change = latest_total - historical_total
updated_data = {'date': today, 'cases': change, 'cumulative_sum': latest_total}
updated_data_df = pd.DataFrame(updated_data, index=[0])
updated_data_df["date"] = pd.to_datetime(updated_data_df["date"]).dt.strftime(
"%Y-%m-%d"
)
df = (
pd.concat([historical_src, updated_data_df])
.drop_duplicates(subset="date")
.sort_values("date", ascending=False)
.copy()
)
## Exports
df.to_csv(f"data/processed/monkeypox_cases_timeseries_cdc_historical.csv", index=False)
df.to_csv(
f"data/processed/monkeypox_cases_timeseries_cdc_historical_{today}.csv", index=False
)
df.to_csv(f"data/processed/monkeypox_cases_derived_timeseries_latest.csv", index=False)
df.to_json(
f"data/processed/monkeypox_cases_derived_timeseries_latest.json",
orient="records",
indent=4,
)