Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapt gdown to be used as python library #244

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
adapt gdown to be ussed as python library
Skitionek committed Feb 20, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit ef0caa1c70808bf3d65f2c62660c57a62393810c
9 changes: 3 additions & 6 deletions gdown/cached_download.py
Original file line number Diff line number Diff line change
@@ -88,11 +88,8 @@ def cached_download(
print("File exists: {}".format(path))
return path
elif osp.exists(path) and md5:
try:
assert_md5sum(path, md5, quiet=quiet)
return path
except AssertionError as e:
print(e, file=sys.stderr)
assert_md5sum(path, md5, quiet=quiet)
return path

# download
lock_path = osp.join(cache_root, "_dl_lock")
@@ -110,7 +107,7 @@ def cached_download(
msg = "{}: {}".format(msg, path)
else:
msg = "{}...".format(msg)
print(msg, file=sys.stderr)
print(msg)

download(url, temp_path, quiet=quiet, **kwargs)
with filelock.FileLock(lock_path):
59 changes: 32 additions & 27 deletions gdown/cli.py
Original file line number Diff line number Diff line change
@@ -134,33 +134,38 @@ def main():
url = None
id = args.url_or_id

if args.folder:
filenames = download_folder(
url=url,
id=id,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
remaining_ok=args.remaining_ok,
)
success = filenames is not None
else:
filename = download(
url=url,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
id=id,
fuzzy=args.fuzzy,
resume=args.continue_,
)
success = filename is not None

try:
if args.folder:
filenames = download_folder(
url=url,
id=id,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
remaining_ok=args.remaining_ok,
)
success = filenames is not None
else:
filename = download(
url=url,
output=args.output,
quiet=args.quiet,
proxy=args.proxy,
speed=args.speed,
use_cookies=not args.no_cookies,
verify=not args.no_check_certificate,
id=id,
fuzzy=args.fuzzy,
resume=args.continue_,
)
success = filename is not None
except Exception as e:
print(e, file=sys.stderr)
success = False

if not success:
sys.exit(1)
66 changes: 28 additions & 38 deletions gdown/download.py
Original file line number Diff line number Diff line change
@@ -5,14 +5,14 @@
import os.path as osp
import re
import shutil
import sys
import tempfile
import textwrap
import time

import requests
import six
import tqdm
from requests import HTTPError, RequestException
from requests.exceptions import ProxyError

from .parse_url import parse_url

@@ -89,6 +89,7 @@ def download(
id=None,
fuzzy=False,
resume=False,
progress=None
):
"""Download file from URL.

@@ -117,6 +118,8 @@ def download(
resume: bool
Resume the download from existing tmp file if possible.
Default is False.
progress: tqdm.tqdm
callback implementing tqdm progress interface

Returns
-------
@@ -127,6 +130,9 @@ def download(
raise ValueError("Either url or id has to be specified")
if id is not None:
url = "https://drive.google.com/uc?id={id}".format(id=id)
if progress is None:
import tqdm
progress = lambda total: tqdm.tqdm(total=total, unit="B", unit_scale=True)

url_origin = url

@@ -136,7 +142,7 @@ def download(

if proxy is not None:
sess.proxies = {"http": proxy, "https": proxy}
print("Using proxy:", proxy, file=sys.stderr)
print("Using proxy:", proxy)

gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)

@@ -153,10 +159,8 @@ def download(
while True:
try:
res = sess.get(url, headers=headers, stream=True, verify=verify)
except requests.exceptions.ProxyError as e:
print("An error has occurred using proxy:", proxy, file=sys.stderr)
print(e, file=sys.stderr)
return
except ProxyError as e:
raise ProxyError(f"An error has occurred using proxy: ${proxy}", code=407) from e

if use_cookies:
if not osp.exists(osp.dirname(cookies_file)):
@@ -180,16 +184,14 @@ def download(
try:
url = get_url_from_gdrive_confirmation(res.text)
except RuntimeError as e:
print("Access denied with the following error:")
error = "\n".join(textwrap.wrap(str(e)))
error = indent(error, "\t")
print("\n", error, "\n", file=sys.stderr)
print(
"You may still be able to access the file from the browser:",
file=sys.stderr,
)
print("\n\t", url_origin, "\n", file=sys.stderr)
return
raise HTTPError(
"Access denied with the following error:\n" +
indent("\n".join(textwrap.wrap(str(e))), "\t") +
"\n" +
"You may still be able to access the file from the browser:" +
"\n\t" + url_origin + "\n",
code=403
) from e

if gdrive_file_id and is_gdrive_download_link:
content_disposition = six.moves.urllib_parse.unquote(
@@ -217,19 +219,11 @@ def download(
existing_tmp_files.append(osp.join(osp.dirname(output), file))
if resume and existing_tmp_files:
if len(existing_tmp_files) != 1:
print(
"There are multiple temporary files to resume:",
file=sys.stderr,
)
print("\n")
for file in existing_tmp_files:
print("\t", file, file=sys.stderr)
print("\n")
print(
"Please remove them except one to resume downloading.",
file=sys.stderr,
raise RequestException(
"There are multiple temporary files to resume:\n" +
"\t".join(existing_tmp_files) + "\n" +
"Please remove them except one to resume downloading."
)
return
tmp_file = existing_tmp_files[0]
else:
resume = False
@@ -250,22 +244,21 @@ def download(
res = sess.get(url, headers=headers, stream=True, verify=verify)

if not quiet:
print("Downloading...", file=sys.stderr)
print("Downloading...")
if resume:
print("Resume:", tmp_file, file=sys.stderr)
print("From:", url_origin, file=sys.stderr)
print("Resume:", tmp_file)
print("From:", url_origin)
print(
"To:",
osp.abspath(output) if output_is_path else output,
file=sys.stderr,
osp.abspath(output) if output_is_path else output
)

try:
total = res.headers.get("Content-Length")
if total is not None:
total = int(total)
if not quiet:
pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True)
pbar = progress(total=total)
t_start = time.time()
for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
f.write(chunk)
@@ -281,9 +274,6 @@ def download(
if tmp_file:
f.close()
shutil.move(tmp_file, output)
except IOError as e:
print(e, file=sys.stderr)
return
finally:
sess.close()

22 changes: 10 additions & 12 deletions gdown/download_folder.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
import warnings

import bs4
from requests import RequestException, HTTPError

from .download import _get_session
from .download import download
@@ -252,7 +253,7 @@ def download_folder(
sess = _get_session(use_cookies=use_cookies)

if not quiet:
print("Retrieving folder list", file=sys.stderr)
print("Retrieving folder list")
try:
return_code, gdrive_file = _download_and_parse_google_drive_link(
sess,
@@ -262,17 +263,16 @@ def download_folder(
verify=verify,
)
except RuntimeError as e:
print("Failed to retrieve folder contents:", file=sys.stderr)
error = "\n".join(textwrap.wrap(str(e)))
error = indent(error, "\t")
print("\n", error, "\n", file=sys.stderr)
return
raise RequestException(
"Failed to retrieve folder contents:" +
indent("\n".join(textwrap.wrap(str(e))), "\t")
) from e

if not return_code:
return return_code
if not quiet:
print("Retrieving folder list completed", file=sys.stderr)
print("Building directory structure", file=sys.stderr)
print("Retrieving folder list completed")
print("Building directory structure")
if output is None:
output = os.getcwd() + osp.sep
if output.endswith(osp.sep):
@@ -303,10 +303,8 @@ def download_folder(
)

if filename is None:
if not quiet:
print("Download ended unsuccessfully", file=sys.stderr)
return
raise HTTPError("Download ended unsuccessfully")
filenames.append(filename)
if not quiet:
print("Download completed", file=sys.stderr)
print("Download completed")
return filenames