adapt gdown to be ussed as python library

wkentaro · Skitionek · Feb 20, 2023 · Feb 20, 2023 · ef0caa1c70808bf3d65f2c62660c57a62393810c
commit ef0caa1c70808bf3d65f2c62660c57a62393810c
diff --git a/gdown/cached_download.py b/gdown/cached_download.py
@@ -88,11 +88,8 @@ def cached_download(
             print("File exists: {}".format(path))
         return path
     elif osp.exists(path) and md5:
-        try:
-            assert_md5sum(path, md5, quiet=quiet)
-            return path
-        except AssertionError as e:
-            print(e, file=sys.stderr)
+        assert_md5sum(path, md5, quiet=quiet)
+        return path
 
     # download
     lock_path = osp.join(cache_root, "_dl_lock")
@@ -110,7 +107,7 @@ def cached_download(
                 msg = "{}: {}".format(msg, path)
             else:
                 msg = "{}...".format(msg)
-            print(msg, file=sys.stderr)
+            print(msg)
 
         download(url, temp_path, quiet=quiet, **kwargs)
         with filelock.FileLock(lock_path):

diff --git a/gdown/cli.py b/gdown/cli.py
@@ -134,33 +134,38 @@ def main():
             url = None
             id = args.url_or_id
 
-    if args.folder:
-        filenames = download_folder(
-            url=url,
-            id=id,
-            output=args.output,
-            quiet=args.quiet,
-            proxy=args.proxy,
-            speed=args.speed,
-            use_cookies=not args.no_cookies,
-            verify=not args.no_check_certificate,
-            remaining_ok=args.remaining_ok,
-        )
-        success = filenames is not None
-    else:
-        filename = download(
-            url=url,
-            output=args.output,
-            quiet=args.quiet,
-            proxy=args.proxy,
-            speed=args.speed,
-            use_cookies=not args.no_cookies,
-            verify=not args.no_check_certificate,
-            id=id,
-            fuzzy=args.fuzzy,
-            resume=args.continue_,
-        )
-        success = filename is not None
+
+    try:
+        if args.folder:
+            filenames = download_folder(
+                url=url,
+                id=id,
+                output=args.output,
+                quiet=args.quiet,
+                proxy=args.proxy,
+                speed=args.speed,
+                use_cookies=not args.no_cookies,
+                verify=not args.no_check_certificate,
+                remaining_ok=args.remaining_ok,
+            )
+            success = filenames is not None
+        else:
+            filename = download(
+                url=url,
+                output=args.output,
+                quiet=args.quiet,
+                proxy=args.proxy,
+                speed=args.speed,
+                use_cookies=not args.no_cookies,
+                verify=not args.no_check_certificate,
+                id=id,
+                fuzzy=args.fuzzy,
+                resume=args.continue_,
+            )
+            success = filename is not None
+    except Exception as e:
+        print(e, file=sys.stderr)
+        success = False
 
     if not success:
         sys.exit(1)

diff --git a/gdown/download.py b/gdown/download.py
@@ -5,14 +5,14 @@
 import os.path as osp
 import re
 import shutil
-import sys
 import tempfile
 import textwrap
 import time
 
 import requests
 import six
-import tqdm
+from requests import HTTPError, RequestException
+from requests.exceptions import ProxyError
 
 from .parse_url import parse_url
 
@@ -89,6 +89,7 @@ def download(
     id=None,
     fuzzy=False,
     resume=False,
+    progress=None
 ):
     """Download file from URL.
 
@@ -117,6 +118,8 @@ def download(
     resume: bool
         Resume the download from existing tmp file if possible.
         Default is False.
+    progress: tqdm.tqdm
+        callback implementing tqdm progress interface
 
     Returns
     -------
@@ -127,6 +130,9 @@ def download(
         raise ValueError("Either url or id has to be specified")
     if id is not None:
         url = "https://drive.google.com/uc?id={id}".format(id=id)
+    if progress is None:
+        import tqdm
+        progress = lambda total: tqdm.tqdm(total=total, unit="B", unit_scale=True)
 
     url_origin = url
 
@@ -136,7 +142,7 @@ def download(
 
     if proxy is not None:
         sess.proxies = {"http": proxy, "https": proxy}
-        print("Using proxy:", proxy, file=sys.stderr)
+        print("Using proxy:", proxy)
 
     gdrive_file_id, is_gdrive_download_link = parse_url(url, warning=not fuzzy)
 
@@ -153,10 +159,8 @@ def download(
     while True:
         try:
             res = sess.get(url, headers=headers, stream=True, verify=verify)
-        except requests.exceptions.ProxyError as e:
-            print("An error has occurred using proxy:", proxy, file=sys.stderr)
-            print(e, file=sys.stderr)
-            return
+        except ProxyError as e:
+            raise ProxyError(f"An error has occurred using proxy: ${proxy}", code=407) from e
 
         if use_cookies:
             if not osp.exists(osp.dirname(cookies_file)):
@@ -180,16 +184,14 @@ def download(
         try:
             url = get_url_from_gdrive_confirmation(res.text)
         except RuntimeError as e:
-            print("Access denied with the following error:")
-            error = "\n".join(textwrap.wrap(str(e)))
-            error = indent(error, "\t")
-            print("\n", error, "\n", file=sys.stderr)
-            print(
-                "You may still be able to access the file from the browser:",
-                file=sys.stderr,
-            )
-            print("\n\t", url_origin, "\n", file=sys.stderr)
-            return
+            raise HTTPError(
+                    "Access denied with the following error:\n" +
+                    indent("\n".join(textwrap.wrap(str(e))), "\t") +
+                    "\n" +
+                    "You may still be able to access the file from the browser:" +
+                    "\n\t" + url_origin + "\n",
+                    code=403
+                    ) from e
 
     if gdrive_file_id and is_gdrive_download_link:
         content_disposition = six.moves.urllib_parse.unquote(
@@ -217,19 +219,11 @@ def download(
                 existing_tmp_files.append(osp.join(osp.dirname(output), file))
         if resume and existing_tmp_files:
             if len(existing_tmp_files) != 1:
-                print(
-                    "There are multiple temporary files to resume:",
-                    file=sys.stderr,
-                )
-                print("\n")
-                for file in existing_tmp_files:
-                    print("\t", file, file=sys.stderr)
-                print("\n")
-                print(
-                    "Please remove them except one to resume downloading.",
-                    file=sys.stderr,
+                raise RequestException(
+                    "There are multiple temporary files to resume:\n" +
+                    "\t".join(existing_tmp_files) + "\n" +
+                    "Please remove them except one to resume downloading."
                 )
-                return
             tmp_file = existing_tmp_files[0]
         else:
             resume = False
@@ -250,22 +244,21 @@ def download(
         res = sess.get(url, headers=headers, stream=True, verify=verify)
 
     if not quiet:
-        print("Downloading...", file=sys.stderr)
+        print("Downloading...")
         if resume:
-            print("Resume:", tmp_file, file=sys.stderr)
-        print("From:", url_origin, file=sys.stderr)
+            print("Resume:", tmp_file)
+        print("From:", url_origin)
         print(
             "To:",
-            osp.abspath(output) if output_is_path else output,
-            file=sys.stderr,
+            osp.abspath(output) if output_is_path else output
         )
 
     try:
         total = res.headers.get("Content-Length")
         if total is not None:
             total = int(total)
         if not quiet:
-            pbar = tqdm.tqdm(total=total, unit="B", unit_scale=True)
+            pbar = progress(total=total)
         t_start = time.time()
         for chunk in res.iter_content(chunk_size=CHUNK_SIZE):
             f.write(chunk)
@@ -281,9 +274,6 @@ def download(
         if tmp_file:
             f.close()
             shutil.move(tmp_file, output)
-    except IOError as e:
-        print(e, file=sys.stderr)
-        return
     finally:
         sess.close()
 

diff --git a/gdown/download_folder.py b/gdown/download_folder.py
@@ -12,6 +12,7 @@
 import warnings
 
 import bs4
+from requests import RequestException, HTTPError
 
 from .download import _get_session
 from .download import download
@@ -252,7 +253,7 @@ def download_folder(
     sess = _get_session(use_cookies=use_cookies)
 
     if not quiet:
-        print("Retrieving folder list", file=sys.stderr)
+        print("Retrieving folder list")
     try:
         return_code, gdrive_file = _download_and_parse_google_drive_link(
             sess,
@@ -262,17 +263,16 @@ def download_folder(
             verify=verify,
         )
     except RuntimeError as e:
-        print("Failed to retrieve folder contents:", file=sys.stderr)
-        error = "\n".join(textwrap.wrap(str(e)))
-        error = indent(error, "\t")
-        print("\n", error, "\n", file=sys.stderr)
-        return
+        raise RequestException(
+                "Failed to retrieve folder contents:" +
+                indent("\n".join(textwrap.wrap(str(e))), "\t")
+                ) from e
 
     if not return_code:
         return return_code
     if not quiet:
-        print("Retrieving folder list completed", file=sys.stderr)
-        print("Building directory structure", file=sys.stderr)
+        print("Retrieving folder list completed")
+        print("Building directory structure")
     if output is None:
         output = os.getcwd() + osp.sep
     if output.endswith(osp.sep):
@@ -303,10 +303,8 @@ def download_folder(
         )
 
         if filename is None:
-            if not quiet:
-                print("Download ended unsuccessfully", file=sys.stderr)
-            return
+            raise HTTPError("Download ended unsuccessfully")
         filenames.append(filename)
     if not quiet:
-        print("Download completed", file=sys.stderr)
+        print("Download completed")
     return filenames