diff --git a/.gitignore b/.gitignore index 9286729..57df3c1 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,103 @@ ENV/ notebooks/data/ docs/notebooks + + Created by https://www.gitignore.io/api/pycharm +# Edit at https://www.gitignore.io/?templates=pycharm + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +.idea/**/sonarlint/ + +# SonarQube Plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator/ + +/.idea/.gitignore +/.idea/misc.xml +/.idea/modules.xml +/.idea/inspectionProfiles/profiles_settings.xml +/.idea/rSettings.xml +/.idea/task-geo.iml +/.idea/vcs.xml +# End of https://www.gitignore.io/api/pycharm diff --git a/task_geo/data_sources/covid/south_korea/__init__.py b/task_geo/data_sources/covid/south_korea/__init__.py new file mode 100644 index 0000000..6c2ae1b --- /dev/null +++ b/task_geo/data_sources/covid/south_korea/__init__.py @@ -0,0 +1,3 @@ +from task_geo.data_sources.covid.south_korea.kr_covid import kr_covid + +__all__ = ['kr_covid'] diff --git a/task_geo/data_sources/covid/south_korea/__main__.py b/task_geo/data_sources/covid/south_korea/__main__.py new file mode 100644 index 0000000..42a957c --- /dev/null +++ b/task_geo/data_sources/covid/south_korea/__main__.py @@ -0,0 +1,24 @@ +import argparse + +from kr_covid import kr_covid + +def get_argparser(): + parser = argparse.ArgumentParser() + + parser.add_argument( + '-o', '--output', required=True, + help='Destination file to store the processed dataset.') + + return parser + + +def main(): + parser = get_argparser() + args = parser.parse_args() + + dataset = kr_covid() + dataset.to_csv(args.output, index=False, header=True) + + +if __name__ == '__main__': + main() diff --git a/task_geo/data_sources/covid/south_korea/audit.md b/task_geo/data_sources/covid/south_korea/audit.md new file mode 100644 index 0000000..e69de29 diff --git a/task_geo/data_sources/covid/south_korea/kr_covid.py b/task_geo/data_sources/covid/south_korea/kr_covid.py new file mode 100644 index 0000000..66a8909 --- /dev/null +++ b/task_geo/data_sources/covid/south_korea/kr_covid.py @@ -0,0 +1,59 @@ +import io +import pandas as pd +import requests + + +def kr_covid_connector(): + """Retrieves data from south_korea_patients. + + Arguments: + url(string): Dataset url + Returns: + pandas.DataFrame + """ + url = 'https://raw.githubusercontent.com/KrSuma/COVID19_Kr/master/Datasets/PatientInfo.csv' + csv = requests.get('url').content + return pd.read_csv(io.StringIO(csv.decode('utf-8'))) + + +def kr_covid_formatter(df): + """Formats data retrieved from south_korea_patients. + + Arguments: + df(pandas.DataFrame): + + Returns: + pandas.DataFrame + """ + cols_ordered = [ + 'country', 'state', 'province', 'confirmed_date', + 'released_date', 'deceased_date', 'exposure_start', + 'exposure_end', 'global_id', 'birth_year', + 'local_id', 'sex', 'disease', + 'group', 'infection_reason', 'infection_order', + 'infected_by', 'contact_number' + ] + df = df.reindex(columns=cols_ordered) + date_columns = ['confirmed_date', 'release_date', 'deceased_date', 'exposure_start', + 'exposure_end'] + df[date_columns] = df[date_columns].apply(pd.to_datetime()) + + # df['confirmed_date'] = pd.to_datetime(df.confirmed_date) + # df['released_date'] = pd.to_datetime(df.released_date) + # df['deceased_date'] = pd.to_datetime(df.deceased_date) + # df['exposure_start'] = pd.to_datetime(df.exposure_start) + # df['exposure_end'] = pd.to_datetime(df.exposure_end) + return df + + +def kr_covid(): + """Data Source for south_korea_patients. + + Arguments: + url(string): Dataset url + + Returns: + pandas.DataFrame + """ + data = kr_covid_connector() + return kr_covid_formatter(data)