|
14 | 14 | import signal
|
15 | 15 | import sys
|
16 | 16 | import tempfile
|
| 17 | +import urllib |
17 | 18 | import unicodedata
|
18 | 19 | import warnings
|
19 | 20 | from collections import defaultdict
|
|
23 | 24 |
|
24 | 25 | from pkg_resources import DistributionNotFound, get_distribution
|
25 | 26 |
|
26 |
| -try: |
| 27 | +# pylint: disable=no-name-in-module, import-error, wrong-import-position |
| 28 | +if sys.version_info >= (3,): |
27 | 29 | from urllib.parse import urlparse
|
28 |
| -except ImportError: |
| 30 | + from urllib.request import urlopen, FancyURLopener |
| 31 | +else: |
| 32 | + from urllib import urlopen, FancyURLopener |
29 | 33 | from urlparse import urlparse
|
30 | 34 |
|
31 | 35 |
|
@@ -582,6 +586,37 @@ def files_to_be_fetched(self):
|
582 | 586 | for url, file_size, filename in self.fetch_entries():
|
583 | 587 | yield filename
|
584 | 588 |
|
| 589 | + def fetch_files_to_be_fetched(self): |
| 590 | + """ |
| 591 | + Fetches files from the fetch.txt |
| 592 | + """ |
| 593 | + urllib._urlopener = BagFetcherURLOpener # pylint: disable=protected-access |
| 594 | + for url, expected_size, filename in self.fetch_entries(): |
| 595 | + expected_size = int(expected_size) # FIXME should be int in the first place |
| 596 | + if filename in self.payload_files(): |
| 597 | + LOGGER.info(_("File already fetched: %s"), filename) |
| 598 | + continue |
| 599 | + resp = urlopen(url) |
| 600 | + headers = resp.info() |
| 601 | + if "content-length" not in headers: |
| 602 | + LOGGER.warning(_("Server sent no content-length for <%s>"), url) |
| 603 | + else: |
| 604 | + content_length = int(headers['content-length']) |
| 605 | + if content_length != expected_size: |
| 606 | + raise BagError(_("Inconsistent size of %s: Expected %s but Content-Length is %s") % (filename, expected_size, content_length)) |
| 607 | + with open(join(self.path, filename), 'wb') as out: |
| 608 | + read = 0 |
| 609 | + while True: |
| 610 | + block = resp.read(1024 * 8) |
| 611 | + if not block: |
| 612 | + break |
| 613 | + read += len(block) |
| 614 | + out.write(block) |
| 615 | + if read != expected_size: |
| 616 | + raise BagError(_("Inconsistent size of %s: Expected %s but received %s") % (filename, expected_size, read)) |
| 617 | + LOGGER.info(_("Fetched %s from %s"), filename, url) |
| 618 | + |
| 619 | + |
585 | 620 | def has_oxum(self):
|
586 | 621 | return "Payload-Oxum" in self.info
|
587 | 622 |
|
@@ -767,6 +802,7 @@ def validate_fetch(self):
|
767 | 802 | # well formed:
|
768 | 803 | parsed_url = urlparse(url)
|
769 | 804 |
|
| 805 | + # ensure url is a remote URL, not file:// |
770 | 806 | if not all((parsed_url.scheme, parsed_url.netloc)):
|
771 | 807 | raise BagError(_("Malformed URL in fetch.txt: %s") % url)
|
772 | 808 |
|
@@ -937,6 +973,8 @@ def _path_is_dangerous(self, path):
|
937 | 973 | common = os.path.commonprefix((bag_path, real_path))
|
938 | 974 | return not (common == bag_path)
|
939 | 975 |
|
| 976 | +class BagFetcherURLOpener(FancyURLopener): |
| 977 | + version = "bagit.py/%s (Python/%s)" % (VERSION, sys.version) |
940 | 978 |
|
941 | 979 | class BagError(Exception):
|
942 | 980 | pass
|
|
0 commit comments