forked from chschoenenberger/covid19_dashboard
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbackend.py
200 lines (176 loc) · 8.43 KB
/
backend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import pandas as pd
import numpy as np
import time
import os
import datetime
import math
import requests
def normalize_str(s):
""" Function for name normalization (handle áéíóú). """
return unicodedata.normalize("NFKD", s).encode("ascii","ignore").decode("ascii").upper()
FILES_TO_DOWNLOAD = {
'Argentina_Provinces.csv': 'https://raw.githubusercontent.com/mariano22/argcovidapi/master/csvs/Argentina_Provinces.csv',
'SantaFe_AllData.csv': 'https://raw.githubusercontent.com/mariano22/argcovidapi/master/csvs/SantaFe_AllData.csv',
}
DATA_DIR = './data/'
def _download_file(url, out_file):
response = requests.get(url)
assert response.status_code == 200,\
'Wrong status code at dowloading {}'.format(out_file)
f = open(out_file, "wb")
f.write(response.content)
f.close()
def _download_expired_data():
for csv_fn, csv_remote_fp in FILES_TO_DOWNLOAD.items():
csv_fp = os.path.join(DATA_DIR, csv_fn)
if (not os.path.isfile(csv_fp)) or (time.time()-os.stat(csv_fp).st_mtime>30*60):
print('Downloading',csv_fn)
_download_file(csv_remote_fp, csv_fp)
def _load_National_data(csv_fp):
df_arg = pd.read_csv(csv_fp)
df_arg['LOCATION'] = 'ARGENTINA/' + df_arg['PROVINCIA']
df_arg = df_arg.drop(columns=['PROVINCIA'])
df_arg = df_arg.set_index(['TYPE','LOCATION'])
df_arg = df_arg.rename(columns=lambda colname: pd.to_datetime(colname,format='%d/%m').replace(year=2020))
total_arg = df_arg.groupby(level=[0]).sum()
total_arg['LOCATION']='ARGENTINA'
total_arg = total_arg.reset_index().set_index(['TYPE','LOCATION'])
df_arg = pd.concat([df_arg,total_arg]).sort_index()
df_arg = df_arg[df_arg.columns[:-1]]
return df_arg
def _set_location_safe(row):
location_prefix = 'ARGENTINA/SANTA FE'
if row['DEPARTMENT']=='##TOTAL':
return location_prefix
location_prefix += '/'+row['DEPARTMENT'][3:]
if row['PLACE'].startswith('#'):
return location_prefix
return location_prefix +'/'+ row['PLACE']
def _load_SantaFe_data(csv_fp):
df_safe = pd.read_csv(csv_fp)
df_safe['LOCATION'] = df_safe.apply(_set_location_safe, axis=1)
df_safe = df_safe[ (df_safe['TYPE']=='CONFIRMADOS') & (df_safe['DEPARTMENT']!='##TOTAL') ]
df_safe['LOCATION'] = df_safe['LOCATION'].replace({
'ARGENTINA/SANTA FE/IRIONDO/CLASSON':'ARGENTINA/SANTA FE/IRIONDO/CLASON',
'ARGENTINA/SANTA FE/ROSARIO/VILLA GOB. GALVEZ':'ARGENTINA/SANTA FE/ROSARIO/VILLA GOBERNADOR GALVEZ',
'ARGENTINA/SANTA FE/SAN LORENZO/PUERTO GRAL. SAN MARTIN': 'ARGENTINA/SANTA FE/SAN LORENZO/PUERTO GENERAL SAN MARTIN',
})
df_safe = df_safe.drop(columns=['DEPARTMENT', 'PLACE'])
df_safe = df_safe.set_index(['TYPE','LOCATION'])
df_safe = df_safe.rename(columns=lambda colname: pd.to_datetime(colname,format='%d/%m/%Y'))
return df_safe
def _load_data_time_series(df_geoinfo):
df_arg = _load_National_data(os.path.join(DATA_DIR, 'Argentina_Provinces.csv'))
df_safe = _load_SantaFe_data(os.path.join(DATA_DIR, 'SantaFe_AllData.csv'))
df = pd.concat([df_arg,df_safe])
# Non described dates are 0's
df = df.fillna(0).sort_index()
# Set day 0 (prior any date) with all 0's
day_zero = df.columns[0]-pd.Timedelta(days=1)
df[day_zero]=0
df = df[df.columns.sort_values()]
# Add per capita fields
df_per_capita = pd.merge((df*10000).reset_index(),df_geoinfo[['LOCATION','POPULATION']],on='LOCATION',how='left')
df_per_capita = df_per_capita.fillna(math.inf).set_index(['TYPE','LOCATION'])
df_per_capita = df_per_capita.div(df_per_capita['POPULATION'], axis=0)
df_per_capita = df_per_capita.drop(columns=['POPULATION'])
df_per_capita.index = df_per_capita.index.map(lambda x : (x[0]+'_PER100K',x[1]) )
df = pd.concat([df,df_per_capita]).sort_index()
# Calculate number afected subregions
are_confirmados = df.loc['CONFIRMADOS']>0
are_confirmados['PARENT_LOCATION'] = are_confirmados.index.map(lambda l : os.path.dirname(l))
affected_subregions = are_confirmados.groupby('PARENT_LOCATION').sum()
affected_subregions = affected_subregions.reset_index().rename(columns={'PARENT_LOCATION':'LOCATION'})
affected_subregions = affected_subregions[ affected_subregions['LOCATION']!='' ]
affected_subregions['TYPE']='AFFECTED_SUBREGIONS'
affected_subregions = affected_subregions.set_index(['TYPE','LOCATION'])
df = pd.concat([df,affected_subregions]).sort_index()
# Calculate difference and differnce ratio with last day
df_shift = df.shift(axis=1).fillna(0)
df_diff = df-df_shift
df_diff.index = df_diff.index.map(lambda x : (x[0]+'_DIFF',x[1]) )
df_diff_ration = ((df-df_shift)/df_shift).fillna(0)
df_diff_ration.index = df_diff_ration.index.map(lambda x : (x[0]+'_DIFF_RATIO',x[1]) )
df = pd.concat([df,df_diff,df_diff_ration,affected_subregions])
# Erase non sense columns
nonsense_columns = [ 'ACTIVOS_PER100K_DIFF_RATIO',
'AFFECTED_SUBREGIONS_DIFF_RATIO',
'CONFIRMADOS_PER100K_DIFF_RATIO',
'MUERTOS_PER100K_DIFF_RATIO',
'RECUPERADOS_PER100K_DIFF_RATIO' ]
df = df[df.index.map(lambda i : i[0] not in nonsense_columns)]
return df
def _time_series_melt(df_time_series, df_geoinfo):
df = pd.melt(df_time_series, id_vars=['TYPE','LOCATION'], value_vars=df_time_series.columns[2:], var_name='date')
df = df.pivot_table(index=['LOCATION','date'], columns='TYPE', values='value').reset_index()
df = pd.merge(df,df_geoinfo,on='LOCATION',how='left')
return df
def _only_povs(df):
df = df[ df['LOCATION'].apply(lambda l : l.count('/')==1) ].copy()
df['LOCATION'] = df['LOCATION'].apply(lambda l : l[10:])
return df
def _soon_deprecated_data(df_time_series, df_info):
df_time_series=_only_povs(df_time_series)
df_info=_only_povs(df_info)
df_time_series['2020-03-02 00:00:00']=0.0
df = pd.melt(df_time_series, id_vars=['TYPE','LOCATION'], value_vars=df_time_series.columns[2:], var_name='date')
df = df[ df['TYPE'].apply(lambda t: t in ['ACTIVOS','CONFIRMADOS','MUERTOS','RECUPERADOS']) ]
df['TYPE'] = df['TYPE'].replace({
'ACTIVOS': 'active',
'CONFIRMADOS': 'confirmed',
'MUERTOS': 'deceased',
'RECUPERADOS': 'recovered',
})
df = pd.merge(df,df_info,on='LOCATION')
df['Province/State']=df['LOCATION']
df = df.rename(columns={
'TYPE':'var',
'LAT':'Lat',
'LONG':'Long',
'LOCATION':'Country/Region',
'POPULATION': 'population',
})
df = df[ [ 'date', 'Country/Region', 'Province/State', 'var', 'value', 'Lat', 'Long', 'population' ] ]
df = df.sort_values(by=['Country/Region','date','var'])
df['value_new'] = df['value'].diff(4)
df = df.sort_values(by=['date', 'Country/Region','var'])
df = df[df['date']!='2020-03-02']
return df
def _calculate_global_status():
df_geoinfo = pd.read_csv(os.path.join(DATA_DIR, 'info_general.csv'))
df_time_series =_load_data_time_series(df_geoinfo).reset_index()
df_time_series_melt = _time_series_melt(df_time_series,df_geoinfo)
return {
'timestamp': datetime.datetime.today().strftime('%Y-%m-%d-%H:%M:%S'),
'geoinfo': df_geoinfo,
'time_series': df_time_series,
'time_series_melt': df_time_series_melt,
'soon_deprecated': _soon_deprecated_data(df_time_series, df_geoinfo)
}
_global_status = None
def backend_update_data():
global _global_status
print("Updating backend...")
_download_expired_data()
_global_status = _calculate_global_status()
def backend_global_status_getter(field):
global _global_status
return _global_status[field]
def backend_data_at_date(date):
global _global_status
return _global_status['time_series'][date].swaplevel(0,1).unstack()
def backend_filter_location_by_level(df, level, extract_name=True):
if level=='LEAF':
have_childs = set(df['LOCATION'].apply(lambda l : os.path.dirname(l)))
df = df[ df['LOCATION'].apply(lambda l : l not in have_childs) ]
else:
to_level_map = { 'COUNTRY': 0,
'PROVINCE': 1,
'DEPARTMENT': 2,
'CITY': 3 }
if type(level)==str:
level = to_level_map[level]
df = df[ df['LOCATION'].apply(lambda l : l.count('/')) == level ]
if extract_name:
df['LOCATION'] = df['LOCATION'].apply(lambda l : os.path.basename(l))
return df