From ef0caa1c70808bf3d65f2c62660c57a62393810c Mon Sep 17 00:00:00 2001 From: Dominik Mariusz Maszczyk Date: Mon, 20 Feb 2023 14:28:15 +0100 Subject: [PATCH] adapt gdown to be ussed as python library --- gdown/cached_download.py | 9 ++---- gdown/cli.py | 59 +++++++++++++++++++---------------- gdown/download.py | 66 +++++++++++++++++----------------------- gdown/download_folder.py | 22 ++++++-------- 4 files changed, 73 insertions(+), 83 deletions(-) diff --git a/gdown/cached_download.py b/gdown/cached_download.py index 94f1a7a9..dcebd361 100644 --- a/gdown/cached_download.py +++ b/gdown/cached_download.py @@ -88,11 +88,8 @@ def cached_download( print("File exists: {}".format(path)) return path elif osp.exists(path) and md5: - try: - assert_md5sum(path, md5, quiet=quiet) - return path - except AssertionError as e: - print(e, file=sys.stderr) + assert_md5sum(path, md5, quiet=quiet) + return path # download lock_path = osp.join(cache_root, "_dl_lock") @@ -110,7 +107,7 @@ def cached_download( msg = "{}: {}".format(msg, path) else: msg = "{}...".format(msg) - print(msg, file=sys.stderr) + print(msg) download(url, temp_path, quiet=quiet, **kwargs) with filelock.FileLock(lock_path): diff --git a/gdown/cli.py b/gdown/cli.py index f42fdab1..182d6304 100644 --- a/gdown/cli.py +++ b/gdown/cli.py @@ -134,33 +134,38 @@ def main(): url = None id = args.url_or_id - if args.folder: - filenames = download_folder( - url=url, - id=id, - output=args.output, - quiet=args.quiet, - proxy=args.proxy, - speed=args.speed, - use_cookies=not args.no_cookies, - verify=not args.no_check_certificate, - remaining_ok=args.remaining_ok, - ) - success = filenames is not None - else: - filename = download( - url=url, - output=args.output, - quiet=args.quiet, - proxy=args.proxy, - speed=args.speed, - use_cookies=not args.no_cookies, - verify=not args.no_check_certificate, - id=id, - fuzzy=args.fuzzy, - resume=args.continue_, - ) - success = filename is not None + + try: + if args.folder: + filenames = download_folder( + url=url, + id=id, + output=args.output, + quiet=args.quiet, + proxy=args.proxy, + speed=args.speed, + use_cookies=not args.no_cookies, + verify=not args.no_check_certificate, + remaining_ok=args.remaining_ok, + ) + success = filenames is not None + else: + filename = download( + url=url, + output=args.output, + quiet=args.quiet, + proxy=args.proxy, + speed=args.speed, + use_cookies=not args.no_cookies, + verify=not args.no_check_certificate, + id=id, + fuzzy=args.fuzzy, + resume=args.continue_, + ) + success = filename is not None + except Exception as e: + print(e, file=sys.stderr) + success = False if not success: sys.exit(1) diff --git a/gdown/download.py b/gdown/download.py index decf3e09..ab272c3a 100644 --- a/gdown/download.py +++ b/gdown/download.py @@ -5,14 +5,14 @@ import os.path as osp import re import shutil -import sys import tempfile import textwrap import time import requests import six -import tqdm +from requests import HTTPError, RequestException +from requests.exceptions import ProxyError from .parse_url import parse_url @@ -89,6 +89,7 @@ def download( id=None, fuzzy=False, resume=False, + progress=None ): """Download file from URL. @@ -117,6 +118,8 @@ def download( resume: bool Resume the download from existing tmp file if possible. Default is False. + progress: tqdm.tqdm + callback implementing tqdm progress interface Returns ------- @@ -127,6 +130,9 @@ def download( raise ValueError("Either url or id has to be specified") if id is not None: url = "https://drive.google.com/uc?id={id}".format(id=id) + if progress is None: + import tqdm + progress = lambda total: tqdm.tqdm(total=total, unit="B", unit_scale=True) url_origin = url @@ -136,7 +142,7 @@ def download( if proxy is not None: sess.proxies = {"http": proxy, "https": proxy} - print("Using proxy:", proxy, file=sys.stderr) + print("Using proxy:", proxy) gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy) @@ -153,10 +159,8 @@ def download( while True: try: res = sess.get(url, headers=headers, stream=True, verify=verify) - except requests.exceptions.ProxyError as e: - print("An error has occurred using proxy:", proxy, file=sys.stderr) - print(e, file=sys.stderr) - return + except ProxyError as e: + raise ProxyError(f"An error has occurred using proxy: ${proxy}", code=407) from e if use_cookies: if not osp.exists(osp.dirname(cookies_file)): @@ -180,16 +184,14 @@ def download( try: url = get_url_from_gdrive_confirmation(res.text) except RuntimeError as e: - print("Access denied with the following error:") - error = "\n".join(textwrap.wrap(str(e))) - error = indent(error, "\t") - print("\n", error, "\n", file=sys.stderr) - print( - "You may still be able to access the file from the browser:", - file=sys.stderr, - ) - print("\n\t", url_origin, "\n", file=sys.stderr) - return + raise HTTPError( + "Access denied with the following error:\n" + + indent("\n".join(textwrap.wrap(str(e))), "\t") + + "\n" + + "You may still be able to access the file from the browser:" + + "\n\t" + url_origin + "\n", + code=403 + ) from e if gdrive_file_id and is_gdrive_download_link: content_disposition = six.moves.urllib_parse.unquote( @@ -217,19 +219,11 @@ def download( existing_tmp_files.append(osp.join(osp.dirname(output), file)) if resume and existing_tmp_files: if len(existing_tmp_files) != 1: - print( - "There are multiple temporary files to resume:", - file=sys.stderr, - ) - print("\n") - for file in existing_tmp_files: - print("\t", file, file=sys.stderr) - print("\n") - print( - "Please remove them except one to resume downloading.", - file=sys.stderr, + raise RequestException( + "There are multiple temporary files to resume:\n" + + "\t".join(existing_tmp_files) + "\n" + + "Please remove them except one to resume downloading." ) - return tmp_file = existing_tmp_files[0] else: resume = False @@ -250,14 +244,13 @@ def download( res = sess.get(url, headers=headers, stream=True, verify=verify) if not quiet: - print("Downloading...", file=sys.stderr) + print("Downloading...") if resume: - print("Resume:", tmp_file, file=sys.stderr) - print("From:", url_origin, file=sys.stderr) + print("Resume:", tmp_file) + print("From:", url_origin) print( "To:", - osp.abspath(output) if output_is_path else output, - file=sys.stderr, + osp.abspath(output) if output_is_path else output ) try: @@ -265,7 +258,7 @@ def download( if total is not None: total = int(total) if not quiet: - pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True) + pbar = progress(total=total) t_start = time.time() for chunk in res.iter_content(chunk_size=CHUNK_SIZE): f.write(chunk) @@ -281,9 +274,6 @@ def download( if tmp_file: f.close() shutil.move(tmp_file, output) - except IOError as e: - print(e, file=sys.stderr) - return finally: sess.close() diff --git a/gdown/download_folder.py b/gdown/download_folder.py index f61f2271..2022df24 100644 --- a/gdown/download_folder.py +++ b/gdown/download_folder.py @@ -12,6 +12,7 @@ import warnings import bs4 +from requests import RequestException, HTTPError from .download import _get_session from .download import download @@ -252,7 +253,7 @@ def download_folder( sess = _get_session(use_cookies=use_cookies) if not quiet: - print("Retrieving folder list", file=sys.stderr) + print("Retrieving folder list") try: return_code, gdrive_file = _download_and_parse_google_drive_link( sess, @@ -262,17 +263,16 @@ def download_folder( verify=verify, ) except RuntimeError as e: - print("Failed to retrieve folder contents:", file=sys.stderr) - error = "\n".join(textwrap.wrap(str(e))) - error = indent(error, "\t") - print("\n", error, "\n", file=sys.stderr) - return + raise RequestException( + "Failed to retrieve folder contents:" + + indent("\n".join(textwrap.wrap(str(e))), "\t") + ) from e if not return_code: return return_code if not quiet: - print("Retrieving folder list completed", file=sys.stderr) - print("Building directory structure", file=sys.stderr) + print("Retrieving folder list completed") + print("Building directory structure") if output is None: output = os.getcwd() + osp.sep if output.endswith(osp.sep): @@ -303,10 +303,8 @@ def download_folder( ) if filename is None: - if not quiet: - print("Download ended unsuccessfully", file=sys.stderr) - return + raise HTTPError("Download ended unsuccessfully") filenames.append(filename) if not quiet: - print("Download completed", file=sys.stderr) + print("Download completed") return filenames