Skip to content

Commit

Permalink
data download prep
Browse files Browse the repository at this point in the history
  • Loading branch information
SaeedShurrab committed May 2, 2021
1 parent 1cd319f commit 4b47bb7
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 2 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*
!kaggle.json
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
venv
data

kaggle.json
6 changes: 5 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
FROM python:3.8.3


RUN pip install -U pip && \
mkdir /src
mkdir /src \
mkdir /root/.kaggle

COPY kaggle.json /root/.kaggle

WORKDIR /src

Expand Down
40 changes: 40 additions & 0 deletions data-download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import os
import zipfile

data_dir = 'data'
sub_dirs = ['raw','intermediate','preprocessed']



# data directories tree establishment
try:
os.mkdir(data_dir)
except:
print(f'{data_dir} directory exists!!')


for dir in sub_dirs:
try:
os.mkdir(os.path.join(os.curdir,data_dir,dir))
except:
print(f'{dir} directory exists!!')





#UN data download
#UN_URL = 'https://justdata91.s3.us-east-2.amazonaws.com/UNv1.0.ar-en.ar.tar.xz'
#response = requests.get(UN_URL)
#UN_dir = os.path.join(raw_dir,data_sources[0])

#if response.status_code == 200:
# with open(os.path.join(UN_dir,'UNv1.0.ar-en.ar.tar.xz'), "wb+") as file:
# file.write(response.content)
# print("Download completed")
#else:
# print("Download Failed!!")

#with tarfile.open(os.path.join(UN_dir,'UNv1.0.ar-en.ar.tar.xz'),'r') as archive:
# archive.extractall(UN_dir)
# os.remove(os.path.join(UN_dir,'UNv1.0.ar-en.ar.tar.xz'))

0 comments on commit 4b47bb7

Please sign in to comment.