Skip to content

293 Remove default value for out_folder #319

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions pycode/examples/epidata/PlotCaseData.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@

yesterday = pd.Timestamp(datetime.date.today()) - pd.DateOffset(days=1)

# folder where divi and case data can be found after downloading with default out_folder
data_folder = os.path.join(dd.defaultDict['out_folder'], "pydata", "Germany")
# folder where divi and case data can be found after downloading
data_folder = os.path.join(os.getcwd(), 'data', 'pydata')
data_folder_germany = os.path.join(data_folder, "pydata", "Germany")


def get_Data(endday_divi=yesterday, moving_average=True):
Expand All @@ -44,11 +45,11 @@ def get_Data(endday_divi=yesterday, moving_average=True):
@param moving_average Defines if moving average is used"""
print('Download case data from the Internet, takes some time')
getCaseData.get_case_data(
out_folder=dd.defaultDict['out_folder'],
data_folder,
moving_average=moving_average)
print('Download DIVI Data from the Internet, takes some time')
getDIVIData.get_divi_data(
out_folder=dd.defaultDict['out_folder'],
data_folder,
end_date=endday_divi)


Expand All @@ -60,7 +61,7 @@ def plot_cases(
@param daystart Day at which should be started in timestamp format
@param simulationperiod number in integer format of days for which data should be plotted
@param saveplot boolean value; says if plot should be saved """
df = pd.read_json(os.path.join(data_folder, "cases_infected.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_infected.json"))
if not (daystart + pd.DateOffset(days=simulationperiod) <= yesterday):
simulationperiod = (yesterday - daystart).days
mask = (
Expand All @@ -81,7 +82,7 @@ def plot_cases(
plt.savefig(os.path.join('Plots', fig_name+".png"))

if moving_average:
df = pd.read_json(os.path.join(data_folder, "cases_infected_ma7.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_infected_ma7.json"))
mask = (
df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod))
Expand All @@ -98,7 +99,7 @@ def plot_cases(
if saveplot:
plt.savefig('Plots/plot_cases_confirmed_infections_ma7.png')

df = pd.read_json(os.path.join(data_folder, "cases_deaths.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_deaths.json"))
mask = (
df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod))
Expand All @@ -117,7 +118,7 @@ def plot_cases(
plt.savefig(os.path.join('Plots', fig_name+".png"))

if moving_average:
df = pd.read_json(os.path.join(data_folder, "cases_deaths_ma7.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_deaths_ma7.json"))
mask = (
df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod))
Expand Down Expand Up @@ -145,7 +146,7 @@ def plot_cases_age(
@param saveplot boolean value; says if plot should be saved """
if not (daystart + pd.DateOffset(days=simulationperiod) <= yesterday):
simulationperiod = (yesterday - daystart).days
df = pd.read_json(os.path.join(data_folder, "cases_all_age.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_all_age.json"))
mask = (
df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod))
Expand Down Expand Up @@ -183,7 +184,7 @@ def plot_cases_age(
plt.savefig(os.path.join('Plots', fig_name+".png"))

if moving_average:
df = pd.read_json(os.path.join(data_folder, "cases_all_age_ma7.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_all_age_ma7.json"))
mask = (
df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod))
Expand Down Expand Up @@ -233,7 +234,7 @@ def plot_cases_county(
@param saveplot boolean value; says if plot should be saved """
if not (daystart + pd.DateOffset(days=simulationperiod) <= yesterday):
simulationperiod = (yesterday - daystart).days
df = pd.read_json(os.path.join(data_folder, "cases_all_county.json"))
df = pd.read_json(os.path.join(data_folder_germany, "cases_all_county.json"))
mask = (df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) & (df["County"] == county)
fig_name = 'cases_confirmed_infections_county_' + county.replace(" ", "_")
Expand Down Expand Up @@ -264,7 +265,7 @@ def plot_cases_county(

if moving_average:
df = pd.read_json(os.path.join(
data_folder, "cases_all_county_ma7.json"))
data_folder_germany, "cases_all_county_ma7.json"))
mask = (df['Date'] >= daystart) & (
df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) & (df["County"] == county)
fig_name = 'cases_confirmed_infections_county_' + county.replace(" ", "_") + '_ma7'
Expand Down Expand Up @@ -308,7 +309,7 @@ def plot_DIVI_data(
+ daystart.strftime("%d.%m.%Y") +
". Data for this date is not available.")
else:
df = pd.read_json(os.path.join(data_folder, "germany_divi.json"))
df = pd.read_json(os.path.join(data_folder_germany, "germany_divi.json"))
if not (daystart + pd.DateOffset(days=simulationperiod) <= endday_divi):
simulationperiod = (endday_divi - daystart).days
mask = (
Expand Down
5 changes: 1 addition & 4 deletions pycode/memilio-epidata/memilio/epidata/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,7 @@ optional arguments working for all are:
+---------------------------------------------+-----------------------------------------------------------+
| -h, --help | show this help message and exit |
+---------------------------------------------+-----------------------------------------------------------+
| -r, --read-data | Reads the data from file "json" instead of downloading it.|
+---------------------------------------------+-----------------------------------------------------------+
| -o OUT_FOLDER, | Defines folder for output. |
| --out-folder OUT_FOLDER | |
| -r, --read-data | Reads the data from file "json" instead of downloading it.| | |
+---------------------------------------------+-----------------------------------------------------------+
| -ff {json,hdf5,json_timeasstring} | Defines output format for data files. |
| --file-format {json,hdf5,json_timeasstring} | Default is "json_timeasstring". |
Expand Down
11 changes: 3 additions & 8 deletions pycode/memilio-epidata/memilio/epidata/cleanData.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,8 @@ def cli():
- delete all
- delete just cases, jh, population, divi, vaccination, commuter or testing
- choose file format: json or hdf5
- define path to files
"""

out_path_default = dd.defaultDict['out_folder']

parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -247,24 +245,21 @@ def cli():
action='store_true')
parser.add_argument('-tx', '--txt', help='Deletes txt files.',
action='store_true')
parser.add_argument(
'-o', '--out_path', type=str, default=out_path_default,
help='Defines folder for output.')

args = parser.parse_args()

return_args = [args.all_data, args.cases, args.john_hopkins, args.population,
args.divi, args.vaccination, args.commuter, args.testing,
args.json, args.hdf5, args.txt, args.out_path]
args.json, args.hdf5, args.txt]

return return_args


def main():
"""! Main program entry."""

out_path = os.path.join(os.getcwd(), 'data', 'pydata')
[all_data, cases, john_hopkins, population, divi,
vaccination, commuter, testing, json, hdf5, txt, out_path] = cli()
vaccination, commuter, testing, json, hdf5, txt] = cli()

clean_data(all_data, cases, john_hopkins, population, divi,
vaccination, commuter, testing, json, hdf5, txt, out_path)
Expand Down
7 changes: 0 additions & 7 deletions pycode/memilio-epidata/memilio/epidata/defaultDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,9 @@
import os
from datetime import date

default_file_path = os.path.dirname(os.path.abspath(__file__))
dfp_vec = default_file_path.split('memilio')
if len(dfp_vec) > 0:
default_file_path = os.path.join(
dfp_vec[0], os.path.join('memilio', 'data/pydata'))

defaultDict = {
'read_data': False,
'make_plot': False,
'out_folder': default_file_path,
'start_date': date(2020, 4, 24),
'end_date': date.today(),
'split_berlin': False,
Expand Down
13 changes: 7 additions & 6 deletions pycode/memilio-epidata/memilio/epidata/getCaseData.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def check_for_completeness(df, merge_berlin=False, merge_eisenach=True):
return False


def get_case_data(read_data=dd.defaultDict['read_data'],
def get_case_data(data_folder,
read_data=dd.defaultDict['read_data'],
file_format=dd.defaultDict['file_format'],
out_folder=dd.defaultDict['out_folder'],
no_raw=dd.defaultDict['no_raw'],
impute_dates=dd.defaultDict['impute_dates'],
make_plot=dd.defaultDict['make_plot'],
Expand All @@ -76,7 +76,7 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
If the data is read from the internet, before changing anything the data is stored in CaseDataFull.json.
If data should be downloaded, it is checked if data contains all counties.
If not a different source is tried.
The file is read in or stored at the folder "out_folder"/Germany/.
The file is read in or stored at the folder "data_folder"/Germany/.
To store and change the data we use pandas.

While working with the data
Expand Down Expand Up @@ -105,9 +105,9 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
- Infected, deaths and recovered split for state and age are stored in "cases_all_state_age"
- Infected, deaths and recovered split for county and age are stored in "cases_all_county_age(_split_berlin)"

@param data_folder Path to folder where data is written in folder data_folder/Germany.
@param read_data False [Default] or True. Defines if data is read from file or downloaded.
@param file_format File format which is used for writing the data. Default defined in defaultDict.
@param out_folder Path to folder where data is written in folder out_folder/Germany.
@param no_raw True or False [Default]. Defines if unchanged raw data is saved or not.
@param impute_dates False [Default] or True. Defines if values for dates without new information are imputed.
@param make_plot False [Default] or True. Defines if plots are generated with matplotlib.
Expand All @@ -116,7 +116,7 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
@param split_berlin True or False [Default]. Defines if Berlin's disctricts are kept separated or get merged.
"""

directory = os.path.join(out_folder, 'Germany/')
directory = os.path.join(data_folder, 'Germany/')
gd.check_dir(directory)
filename = "CaseDataFull"

Expand Down Expand Up @@ -684,8 +684,9 @@ def get_case_data(read_data=dd.defaultDict['read_data'],
def main():
"""! Main program entry."""

path = os.path.join(os.getcwd(), 'data', 'pydata')
arg_dict = gd.cli("cases")
get_case_data(**arg_dict)
get_case_data(path, **arg_dict)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
from memilio.epidata import getJHData as gjd


def get_case_data_with_estimations(read_data=dd.defaultDict['read_data'],
def get_case_data_with_estimations(data_folder,
read_data=dd.defaultDict['read_data'],
file_format=dd.defaultDict['file_format'],
out_folder=dd.defaultDict['out_folder'],
no_raw=dd.defaultDict['no_raw'],
make_plot=dd.defaultDict['make_plot']):
"""! Function to estimate recovered and deaths from combination of case data from RKI and JH data
Expand All @@ -49,14 +49,14 @@ def get_case_data_with_estimations(read_data=dd.defaultDict['read_data'],
With this fraction every existing case data from RKI is scaled.
The new columns recovered_estimated and deaths_estimated are added.

@param data_folder Folder where data is written to.
@param read_data False [Default] or True. Defines if data is read from file or downloaded.
@param file_format json [Default]
@param out_folder Folder where data is written to.
@param no_raw True or False [Default]. Defines if unchanged raw data is saved or not.
@param make_plot [Optional] case data from RKI and estimated data can be compared by plots
"""

data_path = os.path.join(out_folder, 'Germany/')
data_path = os.path.join(data_folder, 'Germany/')

if not read_data:
impute_dates = False
Expand All @@ -66,11 +66,11 @@ def get_case_data_with_estimations(read_data=dd.defaultDict['read_data'],

# get case data
gcd.get_case_data(
read_data, file_format, out_folder, no_raw, impute_dates,
data_path, read_data, file_format, no_raw, impute_dates,
make_plot_cases, moving_average, no_raw, split_berlin)

# get data from John Hopkins University
gjd.get_jh_data(read_data, file_format, out_folder, no_raw)
gjd.get_jh_data(data_path, read_data, file_format, no_raw)

# Now we now which data is generated and we can use it
# read in jh data
Expand Down Expand Up @@ -379,8 +379,9 @@ def download_weekly_deaths_numbers(data_path):
def main():
"""! Main program entry."""

path = os.path.join(os.getcwd(), 'data', 'pydata')
arg_dict = gd.cli("cases_est")
get_case_data_with_estimations(**arg_dict)
get_case_data_with_estimations(path, **arg_dict)


if __name__ == "__main__":
Expand Down
Loading