-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_service.py
More file actions
143 lines (115 loc) · 4.66 KB
/
Copy pathdata_service.py
File metadata and controls
143 lines (115 loc) · 4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pandas as pd
import requests
from datetime import datetime, timedelta
def load_data(countries=None, days=90):
"""
Fetch COVID-19 data from disease.sh API (Johns Hopkins CSSE data).
Returns a pandas DataFrame with historical data.
Args:
countries: List of country names. Defaults to top 10 countries.
days: Number of days of historical data to fetch. Defaults to 90.
Note: Recent data may not include recovery figures as disease.sh API
no longer tracks recoveries for most regions.
"""
if countries is None:
countries = ['USA', 'India', 'Brazil', 'UK', 'France', 'Germany', 'Italy', 'Spain', 'Canada', 'Australia']
all_data = []
errors = []
for country in countries:
try:
url = f"https://disease.sh/v3/covid-19/historical/{country}?lastdays={days}"
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
if 'timeline' not in data:
errors.append(f"No timeline data for {country}")
continue
timeline = data['timeline']
if 'cases' not in timeline or 'deaths' not in timeline:
errors.append(f"Incomplete timeline data for {country}")
continue
for date_str in timeline['cases'].keys():
all_data.append({
'date': date_str,
'region': country,
'cases': timeline['cases'].get(date_str, 0),
'deaths': timeline['deaths'].get(date_str, 0),
'recoveries': timeline.get('recovered', {}).get(date_str, 0)
})
else:
errors.append(f"Failed to load {country} (HTTP {response.status_code})")
except Exception as e:
errors.append(f"Error loading {country}: {str(e)}")
df = pd.DataFrame(all_data)
return df, errors
def clean_data(df):
"""
Clean the COVID-19 data: handle missing values and convert dates.
"""
if df.empty:
return df
df['recoveries'] = df['recoveries'].fillna(0)
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%y', errors='coerce')
df = df.sort_values(['region', 'date']).reset_index(drop=True)
return df
def analyze_data(df):
"""
Perform data analysis: calculate ratios, group by region, and compute correlations.
Returns analyzed DataFrame and regional summary.
"""
if df.empty:
return df, pd.DataFrame()
df['recovery_ratio'] = df.apply(lambda row: row['recoveries'] / row['cases'] if row['cases'] > 0 else 0, axis=1)
df['death_rate'] = df.apply(lambda row: row['deaths'] / row['cases'] if row['cases'] > 0 else 0, axis=1)
df_grouped = df.groupby('region').last().reset_index()
regional_summary = df_grouped[['region', 'cases', 'deaths', 'recoveries']].copy()
regional_summary = regional_summary.sort_values('cases', ascending=False)
return df, regional_summary
def calculate_global_stats(regional_summary):
"""
Calculate global statistics from regional summary.
"""
if regional_summary.empty:
return {
'total_cases': 0,
'total_deaths': 0,
'total_recoveries': 0,
'death_rate': 0.0
}
total_cases = regional_summary['cases'].sum()
total_deaths = regional_summary['deaths'].sum()
total_recoveries = regional_summary['recoveries'].sum()
death_rate = (total_deaths / total_cases * 100) if total_cases > 0 else 0
return {
'total_cases': int(total_cases),
'total_deaths': int(total_deaths),
'total_recoveries': int(total_recoveries),
'death_rate': death_rate
}
def get_country_data(df, country):
"""
Get data for a specific country.
"""
return df[df['region'] == country].copy()
def get_date_range(df):
"""
Get the date range of the data.
"""
if df.empty or 'date' not in df.columns:
return None, None
return df['date'].min(), df['date'].max()
def filter_by_date_range(df, start_date, end_date):
"""
Filter data by date range.
"""
if df.empty:
return df
mask = (df['date'] >= pd.to_datetime(start_date)) & (df['date'] <= pd.to_datetime(end_date))
return df[mask].copy()
def filter_by_countries(df, countries):
"""
Filter data by list of countries.
"""
if df.empty or not countries:
return df
return df[df['region'].isin(countries)].copy()