diff --git a/pycode/examples/epidata/PlotCaseData.py b/pycode/examples/epidata/PlotCaseData.py index 6ba208d71c..0c8c4a645c 100644 --- a/pycode/examples/epidata/PlotCaseData.py +++ b/pycode/examples/epidata/PlotCaseData.py @@ -34,8 +34,9 @@ yesterday = pd.Timestamp(datetime.date.today()) - pd.DateOffset(days=1) -# folder where divi and case data can be found after downloading with default out_folder -data_folder = os.path.join(dd.defaultDict['out_folder'], "pydata", "Germany") +# folder where divi and case data can be found after downloading +data_folder = os.path.join(os.getcwd(), 'data', 'pydata') +data_folder_germany = os.path.join(data_folder, "pydata", "Germany") def get_Data(endday_divi=yesterday, moving_average=True): @@ -44,11 +45,11 @@ def get_Data(endday_divi=yesterday, moving_average=True): @param moving_average Defines if moving average is used""" print('Download case data from the Internet, takes some time') getCaseData.get_case_data( - out_folder=dd.defaultDict['out_folder'], + data_folder, moving_average=moving_average) print('Download DIVI Data from the Internet, takes some time') getDIVIData.get_divi_data( - out_folder=dd.defaultDict['out_folder'], + data_folder, end_date=endday_divi) @@ -60,7 +61,7 @@ def plot_cases( @param daystart Day at which should be started in timestamp format @param simulationperiod number in integer format of days for which data should be plotted @param saveplot boolean value; says if plot should be saved """ - df = pd.read_json(os.path.join(data_folder, "cases_infected.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_infected.json")) if not (daystart + pd.DateOffset(days=simulationperiod) <= yesterday): simulationperiod = (yesterday - daystart).days mask = ( @@ -81,7 +82,7 @@ def plot_cases( plt.savefig(os.path.join('Plots', fig_name+".png")) if moving_average: - df = pd.read_json(os.path.join(data_folder, "cases_infected_ma7.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_infected_ma7.json")) mask = ( df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) @@ -98,7 +99,7 @@ def plot_cases( if saveplot: plt.savefig('Plots/plot_cases_confirmed_infections_ma7.png') - df = pd.read_json(os.path.join(data_folder, "cases_deaths.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_deaths.json")) mask = ( df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) @@ -117,7 +118,7 @@ def plot_cases( plt.savefig(os.path.join('Plots', fig_name+".png")) if moving_average: - df = pd.read_json(os.path.join(data_folder, "cases_deaths_ma7.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_deaths_ma7.json")) mask = ( df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) @@ -145,7 +146,7 @@ def plot_cases_age( @param saveplot boolean value; says if plot should be saved """ if not (daystart + pd.DateOffset(days=simulationperiod) <= yesterday): simulationperiod = (yesterday - daystart).days - df = pd.read_json(os.path.join(data_folder, "cases_all_age.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_all_age.json")) mask = ( df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) @@ -183,7 +184,7 @@ def plot_cases_age( plt.savefig(os.path.join('Plots', fig_name+".png")) if moving_average: - df = pd.read_json(os.path.join(data_folder, "cases_all_age_ma7.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_all_age_ma7.json")) mask = ( df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) @@ -233,7 +234,7 @@ def plot_cases_county( @param saveplot boolean value; says if plot should be saved """ if not (daystart + pd.DateOffset(days=simulationperiod) <= yesterday): simulationperiod = (yesterday - daystart).days - df = pd.read_json(os.path.join(data_folder, "cases_all_county.json")) + df = pd.read_json(os.path.join(data_folder_germany, "cases_all_county.json")) mask = (df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) & (df["County"] == county) fig_name = 'cases_confirmed_infections_county_' + county.replace(" ", "_") @@ -264,7 +265,7 @@ def plot_cases_county( if moving_average: df = pd.read_json(os.path.join( - data_folder, "cases_all_county_ma7.json")) + data_folder_germany, "cases_all_county_ma7.json")) mask = (df['Date'] >= daystart) & ( df['Date'] <= daystart + pd.DateOffset(days=simulationperiod)) & (df["County"] == county) fig_name = 'cases_confirmed_infections_county_' + county.replace(" ", "_") + '_ma7' @@ -308,7 +309,7 @@ def plot_DIVI_data( + daystart.strftime("%d.%m.%Y") + ". Data for this date is not available.") else: - df = pd.read_json(os.path.join(data_folder, "germany_divi.json")) + df = pd.read_json(os.path.join(data_folder_germany, "germany_divi.json")) if not (daystart + pd.DateOffset(days=simulationperiod) <= endday_divi): simulationperiod = (endday_divi - daystart).days mask = ( diff --git a/pycode/memilio-epidata/memilio/epidata/README.rst b/pycode/memilio-epidata/memilio/epidata/README.rst index 276227dbc2..9d7f2ebe06 100644 --- a/pycode/memilio-epidata/memilio/epidata/README.rst +++ b/pycode/memilio-epidata/memilio/epidata/README.rst @@ -76,10 +76,7 @@ optional arguments working for all are: +---------------------------------------------+-----------------------------------------------------------+ | -h, --help | show this help message and exit | +---------------------------------------------+-----------------------------------------------------------+ -| -r, --read-data | Reads the data from file "json" instead of downloading it.| -+---------------------------------------------+-----------------------------------------------------------+ -| -o OUT_FOLDER, | Defines folder for output. | -| --out-folder OUT_FOLDER | | +| -r, --read-data | Reads the data from file "json" instead of downloading it.| | | +---------------------------------------------+-----------------------------------------------------------+ | -ff {json,hdf5,json_timeasstring} | Defines output format for data files. | | --file-format {json,hdf5,json_timeasstring} | Default is "json_timeasstring". | diff --git a/pycode/memilio-epidata/memilio/epidata/cleanData.py b/pycode/memilio-epidata/memilio/epidata/cleanData.py index bd5dfb968a..59280560cd 100644 --- a/pycode/memilio-epidata/memilio/epidata/cleanData.py +++ b/pycode/memilio-epidata/memilio/epidata/cleanData.py @@ -211,10 +211,8 @@ def cli(): - delete all - delete just cases, jh, population, divi, vaccination, commuter or testing - choose file format: json or hdf5 - - define path to files """ - out_path_default = dd.defaultDict['out_folder'] parser = argparse.ArgumentParser() @@ -247,15 +245,11 @@ def cli(): action='store_true') parser.add_argument('-tx', '--txt', help='Deletes txt files.', action='store_true') - parser.add_argument( - '-o', '--out_path', type=str, default=out_path_default, - help='Defines folder for output.') - args = parser.parse_args() return_args = [args.all_data, args.cases, args.john_hopkins, args.population, args.divi, args.vaccination, args.commuter, args.testing, - args.json, args.hdf5, args.txt, args.out_path] + args.json, args.hdf5, args.txt] return return_args @@ -263,8 +257,9 @@ def cli(): def main(): """! Main program entry.""" + out_path = os.path.join(os.getcwd(), 'data', 'pydata') [all_data, cases, john_hopkins, population, divi, - vaccination, commuter, testing, json, hdf5, txt, out_path] = cli() + vaccination, commuter, testing, json, hdf5, txt] = cli() clean_data(all_data, cases, john_hopkins, population, divi, vaccination, commuter, testing, json, hdf5, txt, out_path) diff --git a/pycode/memilio-epidata/memilio/epidata/defaultDict.py b/pycode/memilio-epidata/memilio/epidata/defaultDict.py index ae011614e3..45933a22f7 100644 --- a/pycode/memilio-epidata/memilio/epidata/defaultDict.py +++ b/pycode/memilio-epidata/memilio/epidata/defaultDict.py @@ -32,16 +32,9 @@ import os from datetime import date -default_file_path = os.path.dirname(os.path.abspath(__file__)) -dfp_vec = default_file_path.split('memilio') -if len(dfp_vec) > 0: - default_file_path = os.path.join( - dfp_vec[0], os.path.join('memilio', 'data/pydata')) - defaultDict = { 'read_data': False, 'make_plot': False, - 'out_folder': default_file_path, 'start_date': date(2020, 4, 24), 'end_date': date.today(), 'split_berlin': False, diff --git a/pycode/memilio-epidata/memilio/epidata/getCaseData.py b/pycode/memilio-epidata/memilio/epidata/getCaseData.py index 48efd2101a..1f18071031 100644 --- a/pycode/memilio-epidata/memilio/epidata/getCaseData.py +++ b/pycode/memilio-epidata/memilio/epidata/getCaseData.py @@ -60,9 +60,9 @@ def check_for_completeness(df, merge_berlin=False, merge_eisenach=True): return False -def get_case_data(read_data=dd.defaultDict['read_data'], +def get_case_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], impute_dates=dd.defaultDict['impute_dates'], make_plot=dd.defaultDict['make_plot'], @@ -76,7 +76,7 @@ def get_case_data(read_data=dd.defaultDict['read_data'], If the data is read from the internet, before changing anything the data is stored in CaseDataFull.json. If data should be downloaded, it is checked if data contains all counties. If not a different source is tried. - The file is read in or stored at the folder "out_folder"/Germany/. + The file is read in or stored at the folder "data_folder"/Germany/. To store and change the data we use pandas. While working with the data @@ -105,9 +105,9 @@ def get_case_data(read_data=dd.defaultDict['read_data'], - Infected, deaths and recovered split for state and age are stored in "cases_all_state_age" - Infected, deaths and recovered split for county and age are stored in "cases_all_county_age(_split_berlin)" + @param data_folder Path to folder where data is written in folder data_folder/Germany. @param read_data False [Default] or True. Defines if data is read from file or downloaded. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder out_folder/Germany. @param no_raw True or False [Default]. Defines if unchanged raw data is saved or not. @param impute_dates False [Default] or True. Defines if values for dates without new information are imputed. @param make_plot False [Default] or True. Defines if plots are generated with matplotlib. @@ -116,7 +116,7 @@ def get_case_data(read_data=dd.defaultDict['read_data'], @param split_berlin True or False [Default]. Defines if Berlin's disctricts are kept separated or get merged. """ - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) filename = "CaseDataFull" @@ -684,8 +684,9 @@ def get_case_data(read_data=dd.defaultDict['read_data'], def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("cases") - get_case_data(**arg_dict) + get_case_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py b/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py index 13676fefc9..4ca0dcf337 100644 --- a/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py +++ b/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py @@ -37,9 +37,9 @@ from memilio.epidata import getJHData as gjd -def get_case_data_with_estimations(read_data=dd.defaultDict['read_data'], +def get_case_data_with_estimations(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], make_plot=dd.defaultDict['make_plot']): """! Function to estimate recovered and deaths from combination of case data from RKI and JH data @@ -49,14 +49,14 @@ def get_case_data_with_estimations(read_data=dd.defaultDict['read_data'], With this fraction every existing case data from RKI is scaled. The new columns recovered_estimated and deaths_estimated are added. + @param data_folder Folder where data is written to. @param read_data False [Default] or True. Defines if data is read from file or downloaded. @param file_format json [Default] - @param out_folder Folder where data is written to. @param no_raw True or False [Default]. Defines if unchanged raw data is saved or not. @param make_plot [Optional] case data from RKI and estimated data can be compared by plots """ - data_path = os.path.join(out_folder, 'Germany/') + data_path = os.path.join(data_folder, 'Germany/') if not read_data: impute_dates = False @@ -66,11 +66,11 @@ def get_case_data_with_estimations(read_data=dd.defaultDict['read_data'], # get case data gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + data_path, read_data, file_format, no_raw, impute_dates, make_plot_cases, moving_average, no_raw, split_berlin) # get data from John Hopkins University - gjd.get_jh_data(read_data, file_format, out_folder, no_raw) + gjd.get_jh_data(data_path, read_data, file_format, no_raw) # Now we now which data is generated and we can use it # read in jh data @@ -379,8 +379,9 @@ def download_weekly_deaths_numbers(data_path): def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("cases_est") - get_case_data_with_estimations(**arg_dict) + get_case_data_with_estimations(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py b/pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py index 674c98b239..26ccd3f356 100644 --- a/pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py +++ b/pycode/memilio-epidata/memilio/epidata/getCommuterMobility.py @@ -127,10 +127,10 @@ def assign_geographical_entities(countykey_list, govkey_list): return countykey2govkey, countykey2localnumlist, gov_county_table, state_gov_table -def get_commuter_data(setup_dict='', +def get_commuter_data(data_path, + setup_dict='', read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], make_plot=dd.defaultDict['make_plot'], no_raw=dd.defaultDict['no_raw']): """! Computes DataFrame of commuter migration patterns based on the Federal @@ -159,7 +159,7 @@ def get_commuter_data(setup_dict='', 'rel_tol': rel_tol, 'path': path} - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_path, 'Germany/') gd.check_dir(directory) countykey_list = geoger.get_county_ids(merge_eisenach=False, zfill=True) @@ -169,7 +169,7 @@ def get_commuter_data(setup_dict='', # This is not very nice either to have the same file with either Eisenach merged or not... population = gPd.get_population_data( - out_folder=out_folder, merge_eisenach=False, read_data=read_data) + data_path, merge_eisenach=False, read_data=read_data) countypop_list = list(population[dd.EngEng["population"]]) @@ -201,22 +201,21 @@ def get_commuter_data(setup_dict='', # Using the 'Einpendler' sheet to correctly distribute summed values over counties of other gov. region # This File is in a zip folder so it has to be unzipped first before it can be read. param_dict={"sheet_name": 3, "engine": "pyxlsb"} - filepath = os.path.join(out_folder, 'Germany/') url = setup_dict['path'] + item.split('.')[0] + '.zip' # Unzip it - zipfile = wget.download(url, filepath) + zipfile = wget.download(url, directory) with ZipFile(zipfile, 'r') as zipObj: - zipObj.extractall(path = filepath) + zipObj.extractall(path = directory) # Read the file filename = item.split('-20')[0] + '.xlsb' file = filename.replace('-','_') - commuter_migration_file = pd.read_excel(filepath + file, **param_dict) + commuter_migration_file = pd.read_excel(directory + file, **param_dict) # pd.read_excel(os.path.join(setup_dict['path'], item), sheet_name=3) # delete zip folder after extracting - os.remove(os.path.join(filepath, item)) + os.remove(os.path.join(directory, item)) # delete file after reading - os.remove(os.path.join(filepath, file)) + os.remove(os.path.join(directory, file)) counties_done = [] # counties considered as 'migration from' # current_row = -1 # row of matrix that belongs to county migrated from @@ -462,8 +461,8 @@ def commuter_sanity_checks(df): def get_neighbors_mobility( - countyid, direction='both', abs_tol=0, rel_tol=0, tol_comb='or', - merge_eisenach=True, out_folder=dd.defaultDict['out_folder']): + data_path, countyid, direction='both', abs_tol=0, rel_tol=0, + tol_comb='or', merge_eisenach=True): '''! Returns the neighbors of a particular county ID depening on the commuter mobility and given absolute and relative thresholds on the number of commuters. @@ -487,7 +486,7 @@ def get_neighbors_mobility( commuters from and to the neighbors. ''' # This is not very nice either to have the same file with either Eisenach merged or not... - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_path, 'Germany/') gd.check_dir(directory) try: if merge_eisenach: @@ -498,7 +497,7 @@ def get_neighbors_mobility( directory, "migration_bfa_2020_dim401.json")) except ValueError: print("Commuter data was not found. Download and process it from the internet.") - commuter = get_commuter_data(out_folder=out_folder) + commuter = get_commuter_data(data_path) countykey_list = commuter.columns commuter.index = countykey_list @@ -521,8 +520,8 @@ def get_neighbors_mobility( def get_neighbors_mobility_all( - direction='both', abs_tol=0, rel_tol=0, tol_comb='or', - merge_eisenach=True, out_folder=dd.defaultDict['out_folder']): + data_path, direction='both', abs_tol=0, rel_tol=0, + tol_comb='or', merge_eisenach=True): '''! Returns the neighbors of all counties ID depening on the commuter mobility and given absolute and relative thresholds on the number of commuters. @@ -542,17 +541,16 @@ def get_neighbors_mobility_all( both ('and') @return Neighbors of all counties with respect to mobility. ''' - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_path, 'Germany/') gd.check_dir(directory) countyids = geoger.get_county_ids(merge_eisenach=merge_eisenach) neighbors_table = [] for id in countyids: neighbors_table.append( get_neighbors_mobility( - id, direction=direction, abs_tol=abs_tol, - rel_tol=rel_tol, tol_comb=tol_comb, - merge_eisenach=merge_eisenach, - out_folder=out_folder)) + data_path, id, direction=direction, + abs_tol=abs_tol, rel_tol=rel_tol, + tol_comb=tol_comb, merge_eisenach=merge_eisenach)) return dict(zip(countyids, neighbors_table)) @@ -560,6 +558,9 @@ def get_neighbors_mobility_all( def main(): """! Main program entry.""" + + data_path = os.path.join(os.getcwd(), 'data', 'pydata') + arg_dict = gd.cli("commuter_official") ref_year = 2020 @@ -572,10 +573,10 @@ def main(): 'rel_tol': rel_tol, 'path': path} - get_neighbors_mobility(1001, abs_tol=0, rel_tol=0, tol_comb='or', - merge_eisenach=True, out_folder=dd.defaultDict['out_folder']) + get_neighbors_mobility(data_path, 1001, abs_tol=0, rel_tol=0, tol_comb='or', + merge_eisenach=True) - mat_commuter_migration = get_commuter_data(setup_dict, **arg_dict) + mat_commuter_migration = get_commuter_data(data_path, setup_dict, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getDIVIData.py b/pycode/memilio-epidata/memilio/epidata/getDIVIData.py index 41b5579bc9..ffd8fa8327 100644 --- a/pycode/memilio-epidata/memilio/epidata/getDIVIData.py +++ b/pycode/memilio-epidata/memilio/epidata/getDIVIData.py @@ -45,9 +45,9 @@ from memilio.epidata import modifyDataframeSeries -def get_divi_data(read_data=dd.defaultDict['read_data'], +def get_divi_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], end_date=dd.defaultDict['end_date'], start_date=dd.defaultDict['start_date'], @@ -58,7 +58,7 @@ def get_divi_data(read_data=dd.defaultDict['read_data'], Available data starts from 2020-04-24. If the given start_date is earlier, it is changed to this date and a warning is printed. - If it does not already exist, the folder Germany is generated in the given out_folder. + If it does not already exist, the folder Germany is generated in the given data_folder. If read_data == True and the file "FullData_DIVI.json" exists, the data is read form this file and stored in a pandas dataframe. If read_data = True and the file does not exist the program is stopped. @@ -69,10 +69,10 @@ def get_divi_data(read_data=dd.defaultDict['read_data'], stored in the files "county_divi".json", "state_divi.json" and "germany_divi.json" for counties, states and whole Germany, respectively. + @param data_folder Folder where data is written to. @param read_data False [Default] or True. Defines if data is read from file or downloaded. @param file_format File format which is used for writing the data. Default defined in defaultDict. "False [Default]" if it is downloaded for all dates from start_date to end_date. - @param out_folder Folder where data is written to. @param no_raw True or False [Default]. Defines if unchanged raw data is saved or not. @param start_date [Optional] Date of first date in dataframe. Default defined in defaultDict. @param end_date [Optional] Date of last date in dataframe. Default defined in defaultDict. @@ -87,7 +87,7 @@ def get_divi_data(read_data=dd.defaultDict['read_data'], "You asked for " + start_date.strftime("%Y-%m-%d") + ".") start_date = date(2020, 4, 24) - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) filename = "FullData_DIVI" @@ -221,8 +221,9 @@ def divi_data_sanity_checks(df=pd.DataFrame()): def main(): """ Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli('divi',) - get_divi_data(**arg_dict) + get_divi_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py b/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py index ac72b846a6..f8a0b1d847 100644 --- a/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py +++ b/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py @@ -164,7 +164,6 @@ def cli(what): The following default arguments are added to the parser: - read-from-disk - file-format, choices = ['json', 'hdf5', 'json_timeasstring'] - - out_path - no_raw The default values are defined in default dict. @@ -202,10 +201,6 @@ def cli(what): except KeyError: raise ValueError("Wrong key or cli_dict.") - out_path_default = dd.defaultDict['out_folder'] - - check_dir(out_path_default) - parser = argparse.ArgumentParser(description=what_list[0]) group = parser.add_mutually_exclusive_group() @@ -220,9 +215,7 @@ def cli(what): choices=['json', 'hdf5', 'json_timeasstring'], help='Defines output format for data files. Default is \"' + str(dd.defaultDict['file_format'] + "\".")) - parser.add_argument('-o', '--out-folder', type=str, - default=out_path_default, - help='Defines folder for output.') + parser.add_argument( '-n', '--no-raw', default=dd.defaultDict['no_raw'], help='Defines if raw data will be stored for further use.', diff --git a/pycode/memilio-epidata/memilio/epidata/getJHData.py b/pycode/memilio-epidata/memilio/epidata/getJHData.py index 29a449277f..1d25478b9a 100644 --- a/pycode/memilio-epidata/memilio/epidata/getJHData.py +++ b/pycode/memilio-epidata/memilio/epidata/getJHData.py @@ -30,9 +30,9 @@ from memilio.epidata import defaultDict as dd -def get_jh_data(read_data=dd.defaultDict['read_data'], +def get_jh_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw']): """! Download data from John Hopkins University @@ -47,16 +47,16 @@ def get_jh_data(read_data=dd.defaultDict['read_data'], - Germany, SouthKorea, Spain, France, Italy, US, China - furthermore, all countries, for which provinces are added, are written to a file + @param data_folder Path to directory where data is written in. Here data_folder/Germany. @param read_data False [Default] or True. Defines if data is read from file or downloaded. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder out_folder/Germany. @param no_raw True or False [Default]. Defines if unchanged raw data is saved or not. """ filename = "FullData_JohnHopkins" if read_data: - file_in = os.path.join(out_folder, filename + ".json") + file_in = os.path.join(data_folder, filename + ".json") # if once dowloaded just read json file try: df = pandas.read_json(file_in) @@ -77,7 +77,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'], # output data to not always download it if not no_raw: - gd.write_dataframe(df, out_folder, filename, "json") + gd.write_dataframe(df, data_folder, filename, "json") df.rename({'Country/Region': 'CountryRegion', 'Province/State': 'ProvinceState'}, axis=1, inplace=True) print("Available columns:", df.columns) @@ -86,13 +86,13 @@ def get_jh_data(read_data=dd.defaultDict['read_data'], df.loc[df['CountryRegion'] == "Korea, South", ['CountryRegion']] = 'SouthKorea' # Generate folders if needed - directory_ger = os.path.join(out_folder, 'Germany/') - directory_es = os.path.join(out_folder, 'Spain/') - directory_fr = os.path.join(out_folder, 'France/') - directory_it = os.path.join(out_folder, 'Italy/') - directory_us = os.path.join(out_folder, 'US/') - directory_rok = os.path.join(out_folder, 'SouthKorea/') - directory_prc = os.path.join(out_folder, 'China/') + directory_ger = os.path.join(data_folder, 'Germany/') + directory_es = os.path.join(data_folder, 'Spain/') + directory_fr = os.path.join(data_folder, 'France/') + directory_it = os.path.join(data_folder, 'Italy/') + directory_us = os.path.join(data_folder, 'US/') + directory_rok = os.path.join(data_folder, 'SouthKorea/') + directory_prc = os.path.join(data_folder, 'China/') # dictionary of countries countries = { @@ -109,7 +109,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'], gb = df.groupby(['CountryRegion', 'Date']).agg({"Confirmed": sum, "Recovered": sum, "Deaths": sum}) - gd.write_dataframe(gb.reset_index(), out_folder, "all_countries_jh", file_format) + gd.write_dataframe(gb.reset_index(), data_folder, "all_countries_jh", file_format) for key in countries: # get data for specific countries @@ -128,7 +128,7 @@ def get_jh_data(read_data=dd.defaultDict['read_data'], gb = dfD.groupby(['CountryRegion', 'ProvinceState', 'Date']).agg( {"Confirmed": sum, "Recovered": sum, "Deaths": sum}) - gd.write_dataframe(gb.reset_index(), out_folder, "all_provincestate_jh", file_format) + gd.write_dataframe(gb.reset_index(), data_folder, "all_provincestate_jh", file_format) # print(dfD[dfD.ProvinceState=="Saskatchewan"]) # print(gb.reset_index()[gb.reset_index().ProvinceState=="Saskatchewan"]) @@ -143,8 +143,9 @@ def get_jh_data(read_data=dd.defaultDict['read_data'], def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("jh") - get_jh_data(**arg_dict) + get_jh_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getPopulationData.py b/pycode/memilio-epidata/memilio/epidata/getPopulationData.py index 6d72af8d90..f847bd66a5 100644 --- a/pycode/memilio-epidata/memilio/epidata/getPopulationData.py +++ b/pycode/memilio-epidata/memilio/epidata/getPopulationData.py @@ -116,7 +116,7 @@ def get_new_counties(data): return data_temp -def load_population_data(out_folder=dd.defaultDict['out_folder'], +def load_population_data(data_folder, read_data=dd.defaultDict['read_data'], no_raw=dd.defaultDict['no_raw'], file_format=dd.defaultDict['file_format']): @@ -130,16 +130,16 @@ def load_population_data(out_folder=dd.defaultDict['out_folder'], - Zensus2011 data from opendata splitted for age and gender [stored in "zensus"] - Data is either downloaded or read from "out_folder"/Germany/. + Data is either downloaded or read from "data_folder"/Germany/. - @param out_folder Path to folder where data is written in folder out_folder/Germany. Default defined in defaultDict. + @param data_folder Path to folder where data is written in folder data_folder/Germany. @param read_data False or True. Defines if data is read from file or downloaded. Default defined in defaultDict. @param no_raw True or False. Defines if unchanged raw data is written or not. Default defined in defaultDict. @param file_format File format which is used for writing the data. Default defined in defaultDict. @return 3 Dataframes of migration, reg_key and zensus """ - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) filename_counties = 'migration' @@ -222,9 +222,9 @@ def load_population_data(out_folder=dd.defaultDict['out_folder'], return counties, zensus, reg_key -def get_population_data(read_data=dd.defaultDict['read_data'], +def get_population_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], split_gender=False, merge_eisenach=True): @@ -247,7 +247,7 @@ def get_population_data(read_data=dd.defaultDict['read_data'], Altersgruppen (17) - Stichtag 31.12. - regionale Tiefe: Kreise und krfr. Städte". - Download the xlsx file and put it under dd.defaultDict['out_folder'], + Download the xlsx file and put it under data_folder, this normally is Memilio/data/pydata/Germany. The folders 'pydata/Germany' have to be created if they do not exist yet. Then this script can be run. @@ -260,14 +260,14 @@ def get_population_data(read_data=dd.defaultDict['read_data'], represents the relative increase/decrease in population size between 2011 and 2019 for each county" This data can either be downloaded automatically or read from - "out_folder"/Germany/ if it was downloaded before. + "data_folder"/Germany/ if it was downloaded before. + @param data_folder Path to folder where data is written in folder + data_folder/Germany. @param read_data False or True. Defines if data is read from file or downloaded. Default defined in defaultDict. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder - out_folder/Germany. Default defined in defaultDict. @param no_raw True or False. Defines if unchanged raw data is written or not. Default defined in defaultDict. @param split_gender [Default: False] or True. Defines whether data is @@ -277,7 +277,8 @@ def get_population_data(read_data=dd.defaultDict['read_data'], combined as one entity 'Wartburgkreis'. @return DataFrame with adjusted population data for all ages to current level. """ - directory = os.path.join(dd.defaultDict['out_folder'], 'Germany') + directory = os.path.join(data_folder, 'Germany') + gd.check_dir(directory) filename = '12411-02-03-4' # '12411-09-01-4-B' new_data_file = os.path.join(directory, filename) new_data_avail = os.path.isfile(new_data_file + '.xlsx') @@ -409,9 +410,6 @@ def get_population_data(read_data=dd.defaultDict['read_data'], df_pop_export[dd.EngEng['population'] ] = df_pop_export.iloc[:, 2:].sum(axis=1) - directory = os.path.join(out_folder, 'Germany/') - gd.check_dir(directory) - filename = 'county_current_population_dim401' gd.write_dataframe(df_pop_export, directory, filename, file_format) @@ -429,7 +427,7 @@ def get_population_data(read_data=dd.defaultDict['read_data'], else: counties, zensus, reg_key = load_population_data( - out_folder, read_data=read_data, no_raw=no_raw, + data_folder, read_data=read_data, no_raw=no_raw, file_format=file_format) # find region keys for census population data @@ -520,9 +518,6 @@ def get_population_data(read_data=dd.defaultDict['read_data'], df_current = pd.DataFrame( np.round(data_current).astype(int), columns=columns) - directory = os.path.join(out_folder, 'Germany/') - gd.check_dir(directory) - if merge_eisenach == True: # Merge Eisenach and Wartburgkreis df_current = geoger.merge_df_counties_all( @@ -547,8 +542,9 @@ def get_population_data(read_data=dd.defaultDict['read_data'], def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("population") - get_population_data(**arg_dict) + get_population_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getSimulationData.py b/pycode/memilio-epidata/memilio/epidata/getSimulationData.py index 1f1134f483..f12bbe91fc 100644 --- a/pycode/memilio-epidata/memilio/epidata/getSimulationData.py +++ b/pycode/memilio-epidata/memilio/epidata/getSimulationData.py @@ -30,6 +30,7 @@ """ +import os from memilio.epidata import getDataIntoPandasDataFrame as gd from memilio.epidata import defaultDict as dd from memilio.epidata import getVaccinationData @@ -45,9 +46,9 @@ def print_error(text): ' data could not be stored correctly.') -def get_simulation_data(read_data=dd.defaultDict['read_data'], +def get_simulation_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], end_date=dd.defaultDict['end_date'], impute_dates=dd.defaultDict['impute_dates'], @@ -65,9 +66,9 @@ def get_simulation_data(read_data=dd.defaultDict['read_data'], - getDIVIData.get_divi_data Keyword arguments: + @param data_folder Path to folder where data is written in. @param read_data False [Default] or True. Defines if data is read from file or downloaded. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder out_folder/Germany. @param no_raw True or False [Default]. Defines if unchanged raw data is saved or not. @param end_date [Optional] Date to stop to download data [Default = today]. @param impute_dates False [Default] or True. Defines if dates where nothing changed are added. @@ -79,7 +80,7 @@ def get_simulation_data(read_data=dd.defaultDict['read_data'], arg_dict_all = { "read_data": read_data, "file_format": file_format, - "out_folder": out_folder, "no_raw": no_raw} + "no_raw": no_raw} arg_dict_cases = {**arg_dict_all, "make_plot": make_plot, "impute_dates": impute_dates, @@ -93,25 +94,25 @@ def get_simulation_data(read_data=dd.defaultDict['read_data'], "moving_average": moving_average} try: - getCaseData.get_case_data(**arg_dict_cases) + getCaseData.get_case_data(data_folder, **arg_dict_cases) except Exception as exp: print(str(type(exp).__name__) + ": " + str(exp)) print_error('case') try: - getPopulationData.get_population_data(**arg_dict_all) + getPopulationData.get_population_data(data_folder, **arg_dict_all) except Exception as exp: print(str(type(exp).__name__) + ": " + str(exp)) print_error('population') try: - getDIVIData.get_divi_data(**arg_dict_divi) + getDIVIData.get_divi_data(data_folder, **arg_dict_divi) except Exception as exp: print(str(type(exp).__name__) + ": " + str(exp)) print_error('DIVI') try: - getVaccinationData.get_vaccination_data(**arg_dict_vacc) + getVaccinationData.get_vaccination_data(data_folder, **arg_dict_vacc) except Exception as exp: print(str(type(exp).__name__) + ": " + str(exp)) print_error('vaccination') @@ -120,8 +121,9 @@ def get_simulation_data(read_data=dd.defaultDict['read_data'], def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("sim") - get_simulation_data(**arg_dict) + get_simulation_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getTestingData.py b/pycode/memilio-epidata/memilio/epidata/getTestingData.py index 82a8b27e68..f33c4dba8d 100644 --- a/pycode/memilio-epidata/memilio/epidata/getTestingData.py +++ b/pycode/memilio-epidata/memilio/epidata/getTestingData.py @@ -105,9 +105,9 @@ def transform_weeks_to_dates(df_test): # gets rki testing monitoring data resolved by federal states (which only # is a subset of the total conducted tests) # extrapolates the values for counties according to their population -def get_testing_data(read_data=dd.defaultDict['read_data'], +def get_testing_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], start_date=dd.defaultDict['start_date'], end_date=dd.defaultDict['end_date'], @@ -120,7 +120,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'], only positive rates are provided. The data is read from the internet. - The file is read in or stored at the folder "out_folder"/Germany/. + The file is read in or stored at the folder "data_folder"/Germany/. To store and change the data we use pandas. While working with the data @@ -143,12 +143,12 @@ def get_testing_data(read_data=dd.defaultDict['read_data'], - Start and end dates can be provided to define the length of the returned data frames. + @param data_folder Path to folder where data is written in folder + data_folder/Germany. @param read_data False [Default]. Data is always downloaded from the internet. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder - out_folder/Germany. @param no_raw True or False [Default]. Defines if raw data is saved or not. @param start_date [Default = '', taken from read data] Start date @@ -163,8 +163,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'], # data for all dates is automatically added impute_dates = True - directory = out_folder - directory = os.path.join(directory, 'Germany/') + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) df_test = download_testing_data() @@ -280,8 +279,9 @@ def get_testing_data(read_data=dd.defaultDict['read_data'], def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("testing") - get_testing_data(**arg_dict) + get_testing_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/getVaccinationData.py b/pycode/memilio-epidata/memilio/epidata/getVaccinationData.py index c7dde11055..437114ff5c 100644 --- a/pycode/memilio-epidata/memilio/epidata/getVaccinationData.py +++ b/pycode/memilio-epidata/memilio/epidata/getVaccinationData.py @@ -488,9 +488,9 @@ def extrapolate_age_groups( # gets rki vaccination monitoring data for all states and extrapolates the values for counties according to their population # Missing ratio values for the two different age groups are also estimated -def get_vaccination_data(read_data=dd.defaultDict['read_data'], +def get_vaccination_data(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], no_raw=dd.defaultDict['no_raw'], start_date=dd.defaultDict['start_date'], end_date=dd.defaultDict['end_date'], @@ -500,7 +500,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'], """! Downloads the RKI vaccination data and provides different kind of structured data. The data is read from the internet. - The file is read in or stored at the folder "out_folder"/Germany/. + The file is read in or stored at the folder "data_folder"/Germany/. To store and change the data we use pandas. While working with the data @@ -519,9 +519,9 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'], - Start and end dates can be provided to define the length of the returned data frames. + @param data_folder Path to folder where data is written in folder data_folder/Germany. @param read_data False [Default]. Data is always downloaded from the internet. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder out_folder/Germany. @param no_raw True or False [Default]. Defines if raw data is saved or not. @param start_date [Default = '', taken from read data] Start date of stored data frames. @param end_date [Default = '', taken from read data] End date of stored data frames. @@ -545,7 +545,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'], # data for all dates is automatically added impute_dates = True - directory = os.path.join(out_folder, 'Germany/') + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) df_data = download_vaccination_data() @@ -618,7 +618,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'], except ValueError: print("Population data was not found. Download it from the internet.") population = gpd.get_population_data( - read_data=False, file_format=file_format, out_folder=out_folder, + data_folder, read_data=False, file_format=file_format, no_raw=no_raw, split_gender=False, merge_eisenach=True) min_age_pop = [] @@ -825,8 +825,7 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'], # commuter inflow from other counties as first weight to distribute # vaccinations from vaccination county to extrapolated home counties neighbors_mobility = gcm.get_neighbors_mobility_all( - direction='in', abs_tol=10, merge_eisenach=True, - out_folder=out_folder) + data_folder, direction='in', abs_tol=10, merge_eisenach=True) df_data_agevacc_county_cs = sanitizing_extrapolation_mobility(df_data_agevacc_county_cs, unique_age_groups_old, vacc_column_names, population_old_ages, neighbors_mobility) # compute the moving average @@ -1046,8 +1045,9 @@ def get_vaccination_data(read_data=dd.defaultDict['read_data'], def main(): """! Main program entry.""" + path = os.path.join(os.getcwd(), 'data', 'pydata') arg_dict = gd.cli("vaccination") - get_vaccination_data(**arg_dict) + get_vaccination_data(path, **arg_dict) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata/transformMobilityData.py b/pycode/memilio-epidata/memilio/epidata/transformMobilityData.py index 9e8264497b..7281562c7a 100644 --- a/pycode/memilio-epidata/memilio/epidata/transformMobilityData.py +++ b/pycode/memilio-epidata/memilio/epidata/transformMobilityData.py @@ -35,7 +35,7 @@ def getMobilityFromFile(directory, mobility_file): @return Mobility matrix data frame. """ mobility_matrix = gd.loadCsv( - '', directory + mobility_file, extension='.txt', + '', os.path.join(directory, mobility_file), extension='.txt', param_dict={'sep': ' ', 'header': None}) return mobility_matrix @@ -130,8 +130,9 @@ def updateMobility2022(directory, mobility_file): def main(): """! Main program entry.""" - directory = dd.defaultDict['out_folder'].split('/pydata')[0] - directory = os.path.join(directory, 'mobility/') + + path = os.path.join(os.getcwd(), 'data') + directory = os.path.join(path, 'mobility/') # Merge Eisenach and Wartbugkreis in Input Data if need be updateMobility2022(directory, mobility_file='twitter_scaled_1252') diff --git a/pycode/memilio-epidata/memilio/epidata/transformWeatherData.py b/pycode/memilio-epidata/memilio/epidata/transformWeatherData.py index 7e3bdf09dc..2cd6cc8c20 100644 --- a/pycode/memilio-epidata/memilio/epidata/transformWeatherData.py +++ b/pycode/memilio-epidata/memilio/epidata/transformWeatherData.py @@ -28,9 +28,9 @@ from memilio.epidata import modifyDataframeSeries -def transformWeatherData(read_data=dd.defaultDict['read_data'], +def transformWeatherData(data_folder, + read_data=dd.defaultDict['read_data'], file_format=dd.defaultDict['file_format'], - out_folder=dd.defaultDict['out_folder'], start_date=dd.defaultDict['start_date'], end_date=dd.defaultDict['end_date'], make_plot=dd.defaultDict['make_plot'], @@ -39,10 +39,10 @@ def transformWeatherData(read_data=dd.defaultDict['read_data'], merge_eisenach=False ): """! ... + @param data_folder Path to folder where data is written in folder + data_folder/Germany. @param file_format File format which is used for writing the data. Default defined in defaultDict. - @param out_folder Path to folder where data is written in folder - out_folder/Germany. @param start_date [Default = '', taken from read data] Start date of stored data frames. @param end_date [Default = '', taken from read data] End date of @@ -53,8 +53,7 @@ def transformWeatherData(read_data=dd.defaultDict['read_data'], days for which a centered moving average is computed. """ - directory = out_folder - directory = os.path.join(directory, 'Germany/') + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) if not read_data: @@ -190,7 +189,8 @@ def main(): """! Main program entry.""" # arg_dict = gd.cli("testing") - transformWeatherData(read_data=False, make_plot=True, moving_average=30) + path = os.path.join(os.getcwd(), 'data', 'pydata') + transformWeatherData(path, read_data=False, make_plot=True, moving_average=30) if __name__ == "__main__": diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_cleandata.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_cleandata.py index 23fec12b62..111c036493 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_cleandata.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_cleandata.py @@ -899,16 +899,14 @@ def test_no_files(self): def test_cli_default(self): - out_path_default = dd.defaultDict['out_folder'] - test_args = ["prog"] with patch.object(sys, 'argv', test_args): [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() - - print([all_data, cases, jh, popul, hdf5, out_path]) + testing, json, hdf5, txt] = cd.cli() + + print([all_data, cases, jh, popul, hdf5]) self.assertEqual(all_data, False) self.assertEqual(cases, False) @@ -921,41 +919,15 @@ def test_cli_default(self): self.assertEqual(json, False) self.assertEqual(hdf5, False) self.assertEqual(txt, False) - self.assertEqual(out_path, out_path_default) - - def test_cli_folder(self): - - folder = "some_folder" - test_args = ["prog", '--out_path', folder] - - with patch.object(sys, 'argv', test_args): - - [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() - - self.assertEqual(all_data, False) - self.assertEqual(cases, False) - self.assertEqual(jh, False) - self.assertEqual(popul, False) - self.assertEqual(divi, False) - self.assertEqual(vacc, False) - self.assertEqual(commuter, False) - self.assertEqual(testing, False) - self.assertEqual(json, False) - self.assertEqual(hdf5, False) - self.assertEqual(txt, False) - self.assertEqual(out_path, folder) def test_cli_all(self): - out_path_default = dd.defaultDict['out_folder'] - test_args = ["prog", '--all'] with patch.object(sys, 'argv', test_args): [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() + testing, json, hdf5, txt] = cd.cli() self.assertEqual(all_data, True) self.assertEqual(cases, False) @@ -968,18 +940,15 @@ def test_cli_all(self): self.assertEqual(json, False) self.assertEqual(hdf5, False) self.assertEqual(txt, False) - self.assertEqual(out_path, out_path_default) def test_cli_cases(self): - out_path_default = dd.defaultDict['out_folder'] - test_args = ["prog", '--cases', '--txt'] with patch.object(sys, 'argv', test_args): [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() + testing, json, hdf5, txt] = cd.cli() self.assertEqual(all_data, False) self.assertEqual(cases, True) @@ -992,18 +961,15 @@ def test_cli_cases(self): self.assertEqual(json, False) self.assertEqual(hdf5, False) self.assertEqual(txt, True) - self.assertEqual(out_path, out_path_default) def test_cli_jh(self): - out_path_default = dd.defaultDict['out_folder'] - test_args = ["prog", '-j', '--json', '--hdf5'] with patch.object(sys, 'argv', test_args): [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() + testing, json, hdf5, txt] = cd.cli() self.assertEqual(all_data, False) self.assertEqual(cases, False) @@ -1016,18 +982,15 @@ def test_cli_jh(self): self.assertEqual(json, True) self.assertEqual(hdf5, True) self.assertEqual(txt, False) - self.assertEqual(out_path, out_path_default) def test_cli_popul(self): - out_path_default = dd.defaultDict['out_folder'] - test_args = ['prog', '--population', '-js', '-h5', '-tx'] with patch.object(sys, 'argv', test_args): [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() + testing, json, hdf5, txt] = cd.cli() self.assertEqual(all_data, False) self.assertEqual(cases, False) @@ -1040,18 +1003,15 @@ def test_cli_popul(self): self.assertEqual(json, True) self.assertEqual(hdf5, True) self.assertEqual(txt, True) - self.assertEqual(out_path, out_path_default) def test_cli_divi_vacc_commuter_testing(self): - out_path_default = dd.defaultDict['out_folder'] - test_args = ['prog', '-d', '-v', '-co', '-t'] with patch.object(sys, 'argv', test_args): [all_data, cases, jh, popul, divi, vacc, commuter, - testing, json, hdf5, txt, out_path] = cd.cli() + testing, json, hdf5, txt] = cd.cli() self.assertEqual(all_data, False) self.assertEqual(cases, False) @@ -1064,7 +1024,6 @@ def test_cli_divi_vacc_commuter_testing(self): self.assertEqual(json, False) self.assertEqual(hdf5, False) self.assertEqual(txt, False) - self.assertEqual(out_path, out_path_default) def test_clean_divi_vacc_commuter_testing_json(self): diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py index 283e2623f2..0526aa8698 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py @@ -19,6 +19,7 @@ ############################################################################# import unittest import pandas as pd +import numpy as np from unittest.mock import patch from pyfakefs import fake_filesystem_unittest from memilio.epidata import geoModificationGermany as geoger @@ -542,8 +543,9 @@ def test_get_intermediateregionid_to_countyids_map(self): def test_merge_df_counties(self): test_df = pd.DataFrame(self.eisenach_unmerged_data) group_columns = ['Date', 'labels'] + separated_ids = np.array( [16063, 16056],dtype=np.int64) result_df = geoger.merge_df_counties( - test_df, 16063, [16063, 16056], group_columns, group_columns) + test_df, 16063, separated_ids, group_columns, group_columns) pd.testing.assert_frame_equal(result_df, self.eisenach_merged_df) # the test dataframe should be unchanged as it is the input of the function pd.testing.assert_frame_equal( diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseData.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseData.py index 0c2e2da74f..c19e56a33f 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseData.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseData.py @@ -158,9 +158,9 @@ def write_case_data_not_all_states(self, out_folder): def test_get_case_data_read(self): # Test without downloading data + out_folder = self.path read_data = True file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = False make_plot = False @@ -176,7 +176,7 @@ def test_get_case_data_read(self): file_with_path = os.path.join(directory, file) with self.assertRaises(FileNotFoundError) as error: - gcd.get_case_data(read_data, file_format, out_folder, no_raw, + gcd.get_case_data(out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) @@ -191,7 +191,7 @@ def test_get_case_data_read(self): self.assertEqual(len(os.listdir(directory)), 1) gcd.get_case_data( - read_data, file_format, out_folder, impute_dates, make_plot, + out_folder, read_data, file_format, impute_dates, make_plot, moving_average, no_raw, split_berlin, rep_date) # check if expected files are written @@ -314,9 +314,9 @@ def test_get_case_data_read(self): @patch('memilio.epidata.getCaseData.gd.loadCsv') def test_get_case_data_download(self, mock_loadCsv, mock_loadGeojson): # Test with downloading data + out_folder = self.path read_data = False file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = False make_plot = False @@ -339,7 +339,7 @@ def test_get_case_data_download(self, mock_loadCsv, mock_loadGeojson): os.path.join(directory, "CaseDataNotFull.json")) with self.assertRaises(FileNotFoundError) as error: gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) self.assertEqual( str(error.exception), @@ -359,7 +359,7 @@ def test_get_case_data_download(self, mock_loadCsv, mock_loadGeojson): os.path.join(directory, "CaseDataArcgis.json")) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) mock_loadGeojson.assert_called() @@ -407,9 +407,9 @@ def test_get_case_data_download_split_berlin( self, mock_loadCsv, mock_loadGeojson): # Test case with downloading data where first csv-source is incomplete and second one is used # and split_berlin = True + out_folder = self.path read_data = False file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = False make_plot = False @@ -431,7 +431,7 @@ def test_get_case_data_download_split_berlin( mock_loadGeojson.return_value = pd.DataFrame() gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) mock_loadGeojson.assert_not_called() @@ -505,9 +505,9 @@ def test_get_case_data_download_split_berlin( def test_get_case_data_read_moving_average(self): # Test without downloading data + out_folder = self.path read_data = True file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = False make_plot = False @@ -524,7 +524,7 @@ def test_get_case_data_read_moving_average(self): self.assertEqual(len(os.listdir(directory)), 1) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) # check if expected files are written @@ -686,9 +686,9 @@ def test_get_case_data_read_moving_average(self): def test_get_case_data_read_impute_dates(self): # Test without downloading data + out_folder = self.path read_data = True file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = True make_plot = False @@ -705,7 +705,7 @@ def test_get_case_data_read_impute_dates(self): self.assertEqual(len(os.listdir(directory)), 1) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) # check if expected files are written @@ -816,9 +816,9 @@ def test_get_case_data_read_impute_dates(self): def test_get_case_data_read_moving_average_and_split_berlin(self): # test if split_berlin and moving_average = True are working together + out_folder = self.path read_data = True file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = False make_plot = False @@ -835,7 +835,7 @@ def test_get_case_data_read_moving_average_and_split_berlin(self): self.assertEqual(len(os.listdir(directory)), 1) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) # check if expected files are written (27 same number as with split_berlin=False) @@ -866,9 +866,9 @@ def test_get_case_data_read_moving_average_and_split_berlin(self): def test_get_case_data_read_all_dates_and_split_berlin(self): # test if split_berlin and moving_average = True are working together + out_folder = self.path read_data = True file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = True make_plot = False @@ -885,7 +885,7 @@ def test_get_case_data_read_all_dates_and_split_berlin(self): self.assertEqual(len(os.listdir(directory)), 1) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) # check if expected files are written (27 same number as with split_berlin=False) @@ -906,9 +906,9 @@ def test_get_case_data_read_all_dates_and_split_berlin(self): @patch('memilio.epidata.getCaseData.gd.loadCsv') def test_no_raw(self, mock_loadCsv): # Test with downloading data + out_folder = self.path read_data = False file_format = 'json_timeasstring' - out_folder = self.path no_raw = True impute_dates = False make_plot = False @@ -924,7 +924,7 @@ def test_no_raw(self, mock_loadCsv): self.test_string_all_federal_states_and_counties_github) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) mock_loadCsv.assert_called() @@ -972,13 +972,12 @@ def test_main(self, mock_cli): mock_cli.return_value = {"read_data": True, "file_format": 'json_timeasstring', - "out_folder": self.path, "impute_dates": False, "make_plot": False, "moving_average": 0, "split_berlin": False, "no_raw": False, "rep_date": False} - out_folder = self.path - directory = os.path.join(out_folder, 'Germany/') + data_folder = self.path + directory = os.path.join(data_folder, 'Germany/') gd.check_dir(directory) # Test case where file does not exist @@ -989,7 +988,8 @@ def test_main(self, mock_cli): # check if expected file is written self.assertEqual(len(os.listdir(directory)), 1) - gcd.main() + arg_dict = gd.cli("cases") + gcd.get_case_data(data_folder, **arg_dict) # check if expected files are written self.assertEqual(len(os.listdir(directory)), 14) @@ -1003,9 +1003,9 @@ def test_rep_date(self, mocklcsv): mocklcsv.return_value = pd.read_json( self.test_string_all_federal_states_and_counties_github) + out_folder = self.path read_data = False file_format = 'json_timeasstring' - out_folder = self.path no_raw = False impute_dates = False make_plot = False @@ -1017,7 +1017,7 @@ def test_rep_date(self, mocklcsv): gd.check_dir(directory) gcd.get_case_data( - read_data, file_format, out_folder, no_raw, impute_dates, + out_folder, read_data, file_format, no_raw, impute_dates, make_plot, moving_average, split_berlin, rep_date) mocklcsv.assert_called() diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseDatawithEstimations.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseDatawithEstimations.py index 624ac54f2b..4bbf2e89ad 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseDatawithEstimations.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCaseDatawithEstimations.py @@ -230,8 +230,8 @@ def write_weekly_deaths_xlsx_data( def test_get_case_data_with_estimations(self): - [read_data, make_plot, file_format, out_folder, no_raw] \ - = [True, False, "json", self.path, False] + [out_folder, read_data, make_plot, file_format, no_raw] \ + = [self.path, True, False, "json", False] # write files which should be read in by program @@ -249,7 +249,7 @@ def test_get_case_data_with_estimations(self): 2 + len(self.case_files_to_change)) gcdwe.get_case_data_with_estimations( - read_data, file_format, out_folder, no_raw, make_plot) + out_folder, read_data, file_format, no_raw, make_plot) # check if expected files are written self.assertEqual(len(os.listdir(self.path)), 1) @@ -336,8 +336,8 @@ def test_get_case_data_with_estimations(self): def test_get_case_data_with_estimations_age_data(self): - [read_data, make_plot, file_format, out_folder, no_raw] \ - = [True, False, "json", self.path, False] + [out_folder, read_data, make_plot, file_format, no_raw] \ + = [self.path, True, False, "json", False] # write files which should be read in by program @@ -355,7 +355,7 @@ def test_get_case_data_with_estimations_age_data(self): 2 + len(self.case_files_to_change)) gcdwe.get_case_data_with_estimations( - read_data, file_format, out_folder, no_raw, make_plot) + out_folder, read_data, file_format, no_raw, make_plot) # check if expected files are written self.assertEqual(len(os.listdir(self.path)), 1) @@ -408,8 +408,8 @@ def test_get_case_data_with_estimations_download( self, mock_get_jh_data, mock_get_case_data, mock_download_weekly_deaths_numbers): - [read_data, make_plot, file_format, out_folder, no_raw] \ - = [False, False, "json", self.path, False] + [out_folder, read_data, make_plot, file_format, no_raw] \ + = [self.path, False, False, "json", False] directory = os.path.join(out_folder, 'Germany/') gd.check_dir(directory) @@ -421,9 +421,6 @@ def test_get_case_data_with_estimations_download( # write files which should be read in by program - directory = os.path.join(out_folder, 'Germany/') - gd.check_dir(directory) - case_files_to_change = [ "cases_all_germany", "cases_all_gender", "cases_all_age", "cases_all_state", "cases_all_state_gender", "cases_all_state_age", @@ -455,7 +452,7 @@ def test_get_case_data_with_estimations_download( 2 + len(case_files_to_change)) gcdwe.get_case_data_with_estimations( - read_data, file_format, out_folder, no_raw, make_plot) + out_folder, read_data, file_format, no_raw, make_plot) # check if expected files are written self.assertEqual(len(os.listdir(self.path)), 1) @@ -582,7 +579,7 @@ def test_except_non_existing_file(self, mock_print): self.write_jh_data(directory) gcdwe.get_case_data_with_estimations( - read_data, file_format, out_folder, no_raw, make_plot) + out_folder, read_data, file_format, no_raw, make_plot) # print is called 9 times, because no file exists self.assertEqual(len(mock_print.mock_calls), 9) diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py index 6ff343b004..a8ba24d4d8 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py @@ -321,7 +321,7 @@ def test_commuter_data(self): """! Tests migration data by some randomly chosen tests. """ - df_commuter_migration = gcm.get_commuter_data(out_folder=self.path) + df_commuter_migration = gcm.get_commuter_data(self.path) # the first column and first row are just the county IDs # mat_commuter_migration is the real Data that should be tested mat_commuter_migration = df_commuter_migration.iloc[:, 0:] @@ -341,7 +341,7 @@ def test_commuter_data(self): city_from = countykey2numlist['05316'] city_to = countykey2numlist['16069'] population = gpd.get_population_data( - out_folder=self.path, merge_eisenach=False) + self.path, merge_eisenach=False) countypop_list = list(population[dd.EngEng['population']]) self.assertEqual(countypop_list[city_from], 163905) self.assertAlmostEqual( @@ -370,8 +370,8 @@ def test_get_neighbors_mobility(self, mock_print): tci = testcountyid #direction = both (countykey_list, commuter_all) = gcm.get_neighbors_mobility( - tci, direction='both', abs_tol=0, rel_tol=0, - tol_comb='or', merge_eisenach=True, out_folder=self.path) + self.path, tci, direction='both', abs_tol=0, rel_tol=0, + tol_comb='or', merge_eisenach=True) self.assertEqual(len(countykey_list), 398) self.assertAlmostEqual(228, commuter_all[0], 2) self.assertAlmostEqual(2146, commuter_all[9], 2) @@ -380,8 +380,8 @@ def test_get_neighbors_mobility(self, mock_print): # direction = in (countykey_list, commuter_all) = gcm.get_neighbors_mobility( - tci, direction='in', abs_tol=0, rel_tol=0, - tol_comb='or', merge_eisenach=True, out_folder=self.path) + self.path, tci, direction='in', abs_tol=0, rel_tol=0, + tol_comb='or', merge_eisenach=True) self.assertEqual(len(countykey_list), 393) self.assertAlmostEqual(48, commuter_all[0], 2) self.assertAlmostEqual(842, commuter_all[9], 2) @@ -389,8 +389,8 @@ def test_get_neighbors_mobility(self, mock_print): # direction = out (countykey_list, commuter_all) = gcm.get_neighbors_mobility( - tci, direction='out', abs_tol=0, rel_tol=0, - tol_comb='or', merge_eisenach=True, out_folder=self.path) + self.path, tci, direction='out', abs_tol=0, rel_tol=0, + tol_comb='or', merge_eisenach=True) self.assertEqual(len(countykey_list), 375) self.assertAlmostEqual(180, commuter_all[0], 2) self.assertAlmostEqual(1304, commuter_all[9], 2) diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py index bdcb6e421e..c79cced056 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getDataIntoPandasDataFrame.py @@ -41,19 +41,23 @@ class Test_getDataIntoPandasDataFrame(fake_filesystem_unittest.TestCase): path = '/home/x/' + # path_data is used in test_call_functions() + # Windows: '//data//pydata'; Linux: '/data/pydata' + path_data = os.sep + 'data' + os.sep + 'pydata' + data = ("""{"type": "FeatureCollection",\ -"name": "Cases_COVID19",\ -"features": [\ -{ "type": "Feature", "properties": { "ObjectId": 1, "IdBundesland": 1, "Bundesland": "Schleswig-Holstein",\ -"Landkreis": "SK Flensburg", "Altersgruppe": "A15-A34", "Geschlecht": "M", "AnzahlFall": 1, "AnzahlTodesfall": 0,\ -"Meldedatum": "2021-03-26T00:00:00Z", "IdLandkreis": "01001", "Datenstand": "20.04.2021, 00:00 Uhr", "NeuerFall": 0, "NeuerTodesfall": -9, "Refdatum": "2021-03-22T00:00:00Z", "NeuGenesen": 0, "AnzahlGenesen": 1, "IstErkrankungsbeginn": 1, "Altersgruppe2": "Nicht übermittelt" }, "geometry": null },\ -{ "type": "Feature", "properties": { "ObjectId": 2, "IdBundesland": 1, "Bundesland": "Schleswig-Holstein", -"Landkreis": "SK Flensburg", "Altersgruppe": "A15-A34", "Geschlecht": "M", "AnzahlFall": 7, "AnzahlTodesfall": 0,\ -"Meldedatum": "2021-03-26T00:00:00Z", "IdLandkreis": "01001", "Datenstand": "20.04.2021, 00:00 Uhr", "NeuerFall": 0, "NeuerTodesfall": -9, "Refdatum": "2021-03-26T00:00:00Z", "NeuGenesen": 0, "AnzahlGenesen": 7, "IstErkrankungsbeginn": 0, "Altersgruppe2": "Nicht übermittelt" }, "geometry": null },\ -{ "type": "Feature", "properties": { "ObjectId": 3, "IdBundesland": 1, "Bundesland": "Schleswig-Holstein",\ -"Landkreis": "SK Flensburg", "Altersgruppe": "A15-A34", "Geschlecht": "M", "AnzahlFall": 1, "AnzahlTodesfall": 0,\ -"Meldedatum": "2021-03-26T00:00:00Z", "IdLandkreis": "01001", "Datenstand": "20.04.2021, 00:00 Uhr", "NeuerFall": 0, "NeuerTodesfall": -9, "Refdatum": "2021-03-26T00:00:00Z", "NeuGenesen": -9, "AnzahlGenesen": 0, "IstErkrankungsbeginn": 0, "Altersgruppe2": "Nicht übermittelt" }, "geometry": null }\ -]}""") + "name": "Cases_COVID19",\ + "features": [\ + { "type": "Feature", "properties": { "ObjectId": 1, "IdBundesland": 1, "Bundesland": "Schleswig-Holstein",\ + "Landkreis": "SK Flensburg", "Altersgruppe": "A15-A34", "Geschlecht": "M", "AnzahlFall": 1, "AnzahlTodesfall": 0,\ + "Meldedatum": "2021-03-26T00:00:00Z", "IdLandkreis": "01001", "Datenstand": "20.04.2021, 00:00 Uhr", "NeuerFall": 0, "NeuerTodesfall": -9, "Refdatum": "2021-03-22T00:00:00Z", "NeuGenesen": 0, "AnzahlGenesen": 1, "IstErkrankungsbeginn": 1, "Altersgruppe2": "Nicht übermittelt" }, "geometry": null },\ + { "type": "Feature", "properties": { "ObjectId": 2, "IdBundesland": 1, "Bundesland": "Schleswig-Holstein", + "Landkreis": "SK Flensburg", "Altersgruppe": "A15-A34", "Geschlecht": "M", "AnzahlFall": 7, "AnzahlTodesfall": 0,\ + "Meldedatum": "2021-03-26T00:00:00Z", "IdLandkreis": "01001", "Datenstand": "20.04.2021, 00:00 Uhr", "NeuerFall": 0, "NeuerTodesfall": -9, "Refdatum": "2021-03-26T00:00:00Z", "NeuGenesen": 0, "AnzahlGenesen": 7, "IstErkrankungsbeginn": 0, "Altersgruppe2": "Nicht übermittelt" }, "geometry": null },\ + { "type": "Feature", "properties": { "ObjectId": 3, "IdBundesland": 1, "Bundesland": "Schleswig-Holstein",\ + "Landkreis": "SK Flensburg", "Altersgruppe": "A15-A34", "Geschlecht": "M", "AnzahlFall": 1, "AnzahlTodesfall": 0,\ + "Meldedatum": "2021-03-26T00:00:00Z", "IdLandkreis": "01001", "Datenstand": "20.04.2021, 00:00 Uhr", "NeuerFall": 0, "NeuerTodesfall": -9, "Refdatum": "2021-03-26T00:00:00Z", "NeuGenesen": -9, "AnzahlGenesen": 0, "IstErkrankungsbeginn": 0, "Altersgruppe2": "Nicht übermittelt" }, "geometry": null }\ + ]}""") def setUp(self): self.setUpPyfakefs() @@ -127,34 +131,27 @@ def test_load_csv_working(self, mock_csv): def test_cli_correct_default(self): - out_path_default = dd.defaultDict['out_folder'] - arg_dict = gd.cli("population") read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] - out_folder = arg_dict["out_folder"] no_raw = arg_dict["no_raw"] assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert no_raw == dd.defaultDict['no_raw'] arg_dict = gd.cli("jh") read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] - out_folder = arg_dict["out_folder"] no_raw = arg_dict["no_raw"] assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert no_raw == dd.defaultDict['no_raw'] arg_dict = gd.cli("cases") read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] - out_folder = arg_dict["out_folder"] impute_dates = arg_dict["impute_dates"] make_plot = arg_dict["make_plot"] moving_average = arg_dict["moving_average"] @@ -164,7 +161,6 @@ def test_cli_correct_default(self): assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert make_plot == dd.defaultDict['make_plot'] assert split_berlin == dd.defaultDict['split_berlin'] assert moving_average == dd.defaultDict['moving_average'] @@ -176,30 +172,25 @@ def test_cli_correct_default(self): read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] make_plot = arg_dict["make_plot"] - out_folder = arg_dict["out_folder"] no_raw = arg_dict["no_raw"] assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert no_raw == dd.defaultDict['no_raw'] assert make_plot == dd.defaultDict['make_plot'] arg_dict = gd.cli("commuter_official") read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] - out_folder = arg_dict["out_folder"] no_raw = arg_dict["no_raw"] assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert no_raw == dd.defaultDict['no_raw'] arg_dict = gd.cli("divi") read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] - out_folder = arg_dict["out_folder"] end_date = arg_dict["end_date"] start_date = arg_dict["start_date"] impute_dates = arg_dict["impute_dates"] @@ -208,7 +199,6 @@ def test_cli_correct_default(self): assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert end_date == dd.defaultDict['end_date'] assert start_date == dd.defaultDict['start_date'] assert impute_dates == dd.defaultDict['impute_dates'] @@ -216,11 +206,10 @@ def test_cli_correct_default(self): assert no_raw == dd.defaultDict['no_raw'] arg_dict = gd.cli("sim") - [read_data, file_format, out_folder, end_date, make_plot, impute_dates, + [read_data, file_format, end_date, make_plot, impute_dates, moving_average, split_berlin, start_date, no_raw] read_data = arg_dict["read_data"] file_format = arg_dict["file_format"] - out_folder = arg_dict["out_folder"] end_date = arg_dict["end_date"] make_plot = arg_dict["make_plot"] start_date = arg_dict["start_date"] @@ -231,7 +220,6 @@ def test_cli_correct_default(self): assert read_data == dd.defaultDict['read_data'] assert file_format == dd.defaultDict['file_format'] - assert out_folder == out_path_default assert no_raw == dd.defaultDict['no_raw'] assert end_date == dd.defaultDict['end_date'] assert impute_dates == dd.defaultDict['impute_dates'] @@ -300,48 +288,41 @@ def test_cli_correct_raise_exit(self, mock_stderr): def test_cli_set_different_values(self): - folder = "some_folder" - - test_args = ["prog", '--read-data', '--out-folder', - folder, '--file-format', 'hdf5', '--no-raw'] + test_args = ["prog", '--read-data', + '--file-format', 'hdf5', '--no-raw'] with patch.object(sys, 'argv', test_args): arg_dict = gd.cli("population") - [read_data, file_format, out_folder, no_raw] = [ + [read_data, file_format, no_raw] = [ arg_dict["read_data"], arg_dict["file_format"], - arg_dict["out_folder"], arg_dict["no_raw"]] assert read_data == True assert file_format == 'hdf5' - assert out_folder == "some_folder" assert no_raw == True arg_dict = gd.cli("jh") - [read_data, file_format, out_folder, no_raw] = [ + [read_data, file_format, no_raw] = [ arg_dict["read_data"], arg_dict["file_format"], - arg_dict["out_folder"], arg_dict["no_raw"]] assert read_data == True assert file_format == 'hdf5' - assert out_folder == "some_folder" assert no_raw == True - test_args = ["prog", '--read-data', '--out-folder', folder, + test_args = ["prog", '--read-data', '--file-format', 'hdf5', '--make-plot', '--split-berlin', '--moving-average', 0, '--no-raw', '--impute-dates'] with patch.object(sys, 'argv', test_args): arg_dict = gd.cli("cases") - [read_data, file_format, out_folder, impute_dates, make_plot, + [read_data, file_format, impute_dates, make_plot, moving_average, split_berlin, no_raw, rep_date] = [ arg_dict["read_data"], arg_dict["file_format"], - arg_dict["out_folder"], arg_dict["impute_dates"], arg_dict["make_plot"], arg_dict["moving_average"], @@ -351,7 +332,6 @@ def test_cli_set_different_values(self): assert read_data == True assert file_format == 'hdf5' - assert out_folder == "some_folder" assert impute_dates == True assert split_berlin == True assert moving_average == 0 @@ -359,57 +339,52 @@ def test_cli_set_different_values(self): assert no_raw == True assert rep_date == False - test_args = ["prog", '--read-data', '--out-folder', - folder, '--file-format', 'json', '--make-plot'] + test_args = ["prog", '--read-data', + '--file-format', 'json', '--make-plot'] with patch.object(sys, 'argv', test_args): arg_dict = gd.cli("cases_est") - [read_data, file_format, out_folder, no_raw, make_plot] = [ + [read_data, file_format, no_raw, make_plot] = [ arg_dict["read_data"], arg_dict["file_format"], - arg_dict["out_folder"], arg_dict["no_raw"], arg_dict["make_plot"]] assert read_data == True assert file_format == 'json' - assert out_folder == "some_folder" assert make_plot == True assert no_raw == False test_args = [ - "prog", '--out-folder', folder, '--file-format', 'json', + "prog", '--file-format', 'json', '--start-date', '2020-11-24', '--end-date', '2020-11-26', '-n'] with patch.object(sys, 'argv', test_args): arg_dict = gd.cli("divi") - [read_data, file_format, out_folder, end_date, start_date, + [read_data, file_format, end_date, start_date, no_raw] = [arg_dict["read_data"], arg_dict["file_format"], - arg_dict["out_folder"], arg_dict["end_date"], arg_dict["start_date"], arg_dict["no_raw"]] assert read_data == dd.defaultDict['read_data'] assert file_format == 'json' - assert out_folder == "some_folder" assert end_date == date(2020, 11, 26) assert start_date == date(2020, 11, 24) assert no_raw == True test_args = [ - "prog", '--out-folder', folder, '--file-format', 'json', + "prog", '--file-format', 'json', '--make-plot', '--start-date', '2020-11-24', '--end-date', '2020-11-26'] with patch.object(sys, 'argv', test_args): arg_dict = gd.cli("sim") - [read_data, file_format, out_folder, no_raw, end_date, + [read_data, file_format, no_raw, end_date, impute_dates, make_plot, moving_average, split_berlin, start_date] = [arg_dict["read_data"], arg_dict["file_format"], - arg_dict["out_folder"], arg_dict["no_raw"], arg_dict["end_date"], arg_dict["impute_dates"], @@ -420,7 +395,6 @@ def test_cli_set_different_values(self): assert read_data == dd.defaultDict['read_data'] assert file_format == 'json' - assert out_folder == "some_folder" assert end_date == date(2020, 11, 26) assert start_date == date(2020, 11, 24) assert make_plot == True @@ -544,7 +518,6 @@ def test_call_functions( arg_dict_all = { "read_data": dd.defaultDict['read_data'], "file_format": dd.defaultDict['file_format'], - "out_folder": os.path.join(dd.defaultDict['out_folder']), 'no_raw': dd.defaultDict["no_raw"]} arg_dict_vaccination = { @@ -573,27 +546,27 @@ def test_call_functions( getVaccinationData.main() mock_vaccination.assert_called() - mock_vaccination.assert_called_with(**arg_dict_vaccination) + mock_vaccination.assert_called_with(self.path_data, **arg_dict_vaccination) getPopulationData.main() mock_popul.assert_called() - mock_popul.assert_called_with(**arg_dict_all) + mock_popul.assert_called_with(self.path_data, **arg_dict_all) getCaseData.main() mock_cases.assert_called() - mock_cases.assert_called_with(**arg_dict_cases) + mock_cases.assert_called_with(self.path_data, **arg_dict_cases) getDIVIData.main() mock_divi.assert_called() - mock_divi.assert_called_with(**arg_dict_divi) + mock_divi.assert_called_with(self.path_data, **arg_dict_divi) getCaseDatawithEstimations.main() mock_caseswe.assert_called() - mock_caseswe.assert_called_with(**arg_dict_cases_est) + mock_caseswe.assert_called_with(self.path_data, **arg_dict_cases_est) getJHData.main() mock_jh.assert_called() - mock_jh.assert_called_with(**arg_dict_all) + mock_jh.assert_called_with(self.path_data, **arg_dict_all) if __name__ == '__main__': diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_divi_data.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_divi_data.py index 1d679725de..7276394b88 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_divi_data.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_divi_data.py @@ -72,7 +72,7 @@ def test_exit_strings(self, mocklcsv, mockrjson): # test read_data Error call if json file is not found mockrjson.side_effect = ValueError with self.assertRaises(FileNotFoundError) as error: - gdd.get_divi_data(read_data=True, out_folder=self.path) + gdd.get_divi_data(self.path, read_data=True) file_in = os.path.join(self.path, "Germany/FullData_DIVI.json") error_message = "Error: The file: " + file_in + " does not exist. "\ "Call program without -r flag to get it." @@ -81,7 +81,7 @@ def test_exit_strings(self, mocklcsv, mockrjson): # test loadCsv Error if file can't be downloaded mocklcsv.side_effect = Exception with self.assertRaises(FileNotFoundError) as error: - gdd.get_divi_data(read_data=False) + gdd.get_divi_data(self.path, read_data=False) error_message = "Error: Download link for Divi data has changed." self.assertEqual(str(error.exception), error_message) @@ -92,7 +92,7 @@ def test_df_empty(self, mocklcsv): # test Error for empty returned dataframe mocklcsv.value = pd.DataFrame() with self.assertRaises(gd.DataError) as error: - gdd.get_divi_data(read_data=False) + gdd.get_divi_data(self.path, read_data=False) error_message = "Something went wrong, dataframe is empty." self.assertEqual(str(error.exception), error_message) @@ -102,7 +102,7 @@ def test_df_empty(self, mocklcsv): def test_get_divi_data_prints(self, mock_print, mock_csv, mock_san): mock_csv.return_value = self.df_test # case with start_date before 2020-04-24 - gdd.get_divi_data(out_folder=self.path, start_date=date(2020, 1, 1)) + gdd.get_divi_data(self.path, start_date=date(2020, 1, 1)) expected_call = [ call( 'Warning: First data available on 2020-04-24. You asked for 2020-01-01.')] @@ -117,7 +117,7 @@ def test_get_divi_data_prints(self, mock_print, mock_csv, mock_san): def test_get_divi_data(self, mock_print, mock_csv, mock_san): mock_csv.return_value = self.df_test # test case with standard parameters - (df, df_county, df_states, df_ger) = gdd.get_divi_data(out_folder=self.path) + (df, df_county, df_states, df_ger) = gdd.get_divi_data(self.path) mock_san.assert_has_calls([call(self.df_test)]) pd.testing.assert_frame_equal(df, self.df_test) self.assertEqual( @@ -154,7 +154,7 @@ def test_gdd_ma(self, mock_print, mock_csv, mock_san): mock_csv.return_value = self.df_test # test case with moving average (df, df_county, df_states, df_ger) = gdd.get_divi_data( - out_folder=self.path, moving_average=3) + self.path, moving_average=3) mock_san.assert_has_calls([call(self.df_test)]) pd.testing.assert_frame_equal(df, self.df_test) self.assertAlmostEqual( @@ -191,7 +191,7 @@ def test_gdd_all_dates(self, mock_print, mock_csv, mock_san): mock_csv.return_value = self.df_test # test case with impute dates is True (df, df_county, df_states, df_ger) = gdd.get_divi_data( - out_folder=self.path, impute_dates=True) + self.path, impute_dates=True) mock_san.assert_has_calls([call(self.df_test)]) pd.testing.assert_frame_equal(df, self.df_test) self.assertEqual( @@ -273,7 +273,7 @@ def test_divi_data_sanity_checks(self, mockrjson3): # test if it works in main with self.assertRaises(gd.DataError) as error: - gdd.get_divi_data(read_data=True, out_folder=self.path) + gdd.get_divi_data(self.path, read_data=True) error_message = "Error: Number of data categories changed." self.assertEqual(str(error.exception), error_message) diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_jh_data.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_jh_data.py index f9bec3a6d4..1c4cbb9a20 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_jh_data.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_jh_data.py @@ -94,7 +94,7 @@ def test_get_JH_Data(self): file_with_path = os.path.join(out_folder, file) with self.assertRaises(FileNotFoundError) as error: - gJHD.get_jh_data(read_data, file_format, out_folder, no_raw) + gJHD.get_jh_data(out_folder, read_data, file_format, no_raw) self.assertEqual(str(error.exception), "Error: The file: " + file_with_path + \ " does not exist. Call program without -r " @@ -108,7 +108,7 @@ def test_get_JH_Data(self): # check if expected file is written self.assertEqual(len(os.listdir(self.path)), 1) - gJHD.get_jh_data(read_data, file_format, out_folder, no_raw) + gJHD.get_jh_data(out_folder, read_data, file_format, no_raw) # check if expected files are written # 7 country-folders+3 all countries-files @@ -197,7 +197,7 @@ def test_get_JH_Data_Download(self, mock_loadcsv): mock_loadcsv.return_value = pd.read_json(self.str_FullData_JohnHopkins) - gJHD.get_jh_data(read_data, file_format, out_folder, no_raw) + gJHD.get_jh_data(out_folder, read_data, file_format, no_raw) mock_loadcsv.assert_called_once() @@ -291,7 +291,7 @@ def test_get_JH_Data_Download_omit_raw(self, mock_loadcsv): mock_loadcsv.return_value = pd.read_json(self.str_FullData_JohnHopkins) - gJHD.get_jh_data(read_data, file_format, out_folder, no_raw) + gJHD.get_jh_data(out_folder, read_data, file_format, no_raw) mock_loadcsv.assert_called_once() diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py index dec22fd370..c044b1d02e 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_population_data.py @@ -132,7 +132,7 @@ def test_get_new_counties(self): def test_get_population(self, mock_data): gpd.get_population_data( - read_data=True, file_format='json', out_folder=self.path, + self.path, read_data=True, file_format='json', no_raw=False, split_gender=False, merge_eisenach=False) test_df = pd.read_json(os.path.join( @@ -147,7 +147,7 @@ def test_get_population(self, mock_data): def test_popul_split_gender(self, mock_data): test_df = gpd.get_population_data( - read_data=False, file_format='json', out_folder=self.path, + self.path, read_data=False, file_format='json', no_raw=False, split_gender=True, merge_eisenach=False) test_df = test_df.drop( @@ -165,11 +165,11 @@ def test_load_population_data( directory = os.path.join(self.path, 'Germany/') counties_write, zensus_write, reg_key_write = gpd.load_population_data( - out_folder=self.path, read_data=False) + self.path, read_data=False) self.assertEqual(len(os.listdir(directory)), 3) counties_read, zensus_read, reg_key_read = gpd.load_population_data( - out_folder=self.path, read_data=True) + self.path, read_data=True) pd.testing.assert_frame_equal( counties_read, counties_write, check_dtype=False) diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_simulation_data.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_simulation_data.py index 80568de998..9de2c8d02c 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_simulation_data.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_simulation_data.py @@ -41,9 +41,9 @@ def setUp(self): def test_get_call_sub_functions(self, mock_popul, mock_cases, mock_divi, mock_vaccination): - [read_data, file_format, out_folder, no_raw, end_date, impute_dates, - make_plot, moving_average, split_berlin, start_date] = [False, - "json_timeasstring", self.path, + [out_folder, read_data, file_format, no_raw, end_date, impute_dates, + make_plot, moving_average, split_berlin, start_date] = [self.path, False, + "json_timeasstring", False, dd.defaultDict['end_date'], dd.defaultDict['impute_dates'], dd.defaultDict['make_plot'], @@ -52,13 +52,12 @@ def test_get_call_sub_functions(self, mock_popul, mock_cases, dd.defaultDict['start_date']] gsd.get_simulation_data( - read_data, file_format, out_folder, no_raw, end_date, impute_dates, + out_folder, read_data, file_format, no_raw, end_date, impute_dates, make_plot, moving_average, split_berlin, start_date) arg_dict_all = { "read_data": dd.defaultDict['read_data'], "file_format": dd.defaultDict['file_format'], - "out_folder": self.path, 'no_raw': dd.defaultDict["no_raw"]} arg_dict_cases = { @@ -78,16 +77,16 @@ def test_get_call_sub_functions(self, mock_popul, mock_cases, "moving_average": dd.defaultDict['moving_average']} mock_popul.assert_called() - mock_popul.assert_called_with(**arg_dict_all) + mock_popul.assert_called_with(out_folder, **arg_dict_all) mock_cases.assert_called() - mock_cases.assert_called_with(**arg_dict_cases) + mock_cases.assert_called_with(out_folder, **arg_dict_cases) mock_divi.assert_called() - mock_divi.assert_called_with(**arg_dict_divi) + mock_divi.assert_called_with(out_folder, **arg_dict_divi) mock_vaccination.assert_called() - mock_vaccination.assert_called_with(**arg_dict_vaccination) + mock_vaccination.assert_called_with(out_folder, **arg_dict_vaccination) @patch('builtins.print') @patch('memilio.epidata.getVaccinationData.get_vaccination_data') @@ -101,7 +100,7 @@ def test_errors( mock_cases.side_effect = Exception mock_divi.side_effect = Exception mock_vaccination.side_effect = Exception - gsd.get_simulation_data() + gsd.get_simulation_data(self.path) populprint = call( 'Error: Something went wrong while getting ' + 'population' + ' data. This was likely caused by a changed file format' diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_vaccination_data.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_vaccination_data.py index ac3e744510..8f0f5061e8 100644 --- a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_vaccination_data.py +++ b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_get_vaccination_data.py @@ -102,14 +102,14 @@ def setUp(self): return_value=df_vacc_data_altern) def test_get_vaccination_data_alternative_ages_no_errors_with_plots( self, mockv): - gvd.get_vaccination_data(out_folder=self.path) + gvd.get_vaccination_data(self.path) @unittest.skip @patch('memilio.epidata.getVaccinationData.download_vaccination_data', return_value=df_vacc_data) def test_get_standard_vaccination_sanitize_3( self, mockv): - gvd.get_vaccination_data(out_folder=self.path, sanitize_data=3) + gvd.get_vaccination_data(self.path, sanitize_data=3) @patch('memilio.epidata.getVaccinationData.pd.read_csv', return_value=df_vacc_data_altern)