diff --git a/src/pynorare/api.py b/src/pynorare/api.py index d6b10cd..6a43cda 100644 --- a/src/pynorare/api.py +++ b/src/pynorare/api.py @@ -18,7 +18,7 @@ from clldutils.apilib import API import pybtex.database -from pynorare.files import get_mappings, get_excel, download_file +from pynorare import files from pynorare.util import read_wellformed_tsv_or_die __all__ = ['NoRaRe'] @@ -130,7 +130,7 @@ def _run_function(self, name, *args, **kw): def map(self, concepticon=None, mappings=None): if not mappings: - mappings, _ = get_mappings(concepticon) + mappings, _ = files.get_mappings(concepticon) self._run_function('map', concepticon, mappings) def download(self): @@ -148,11 +148,7 @@ def download_file(self, url, target=None, overwrite=False): if not target: target = urllib.parse.urlparse(url).path.split('/')[-1] if (not self.raw_dir.joinpath(target).exists()) or overwrite: - try: - urllib.request.urlretrieve(url, str(self.raw_dir / target)) - except urllib.error.HTTPError: # pragma: no cover - # Try with requests: - download_file(url, self.raw_dir / target) + files.download_file(url, self.raw_dir / target) self.log.info('Downloaded {0} successfully.'.format(url)) return self.raw_dir / target @@ -161,7 +157,7 @@ def get_csv(self, path, delimiter="\t", dicts=True, coding="utf-8"): return list(reader(self.raw_dir / path, delimiter=delimiter, dicts=dicts, encoding=coding)) def get_excel(self, path, sidx=0, dicts=True): - sheet = get_excel(self.raw_dir.joinpath(path), sidx, dicts) + sheet = files.get_excel(self.raw_dir.joinpath(path), sidx, dicts) self.log.info('load data from {0}'.format(path)) return sheet diff --git a/src/pynorare/files.py b/src/pynorare/files.py index e450d89..6efa554 100644 --- a/src/pynorare/files.py +++ b/src/pynorare/files.py @@ -1,11 +1,12 @@ import collections +from urllib.request import Request, urlopen -import requests from cldfcatalog import Config from pyconcepticon import Concepticon import xlrd import openpyxl +import pynorare from pynorare.util import read_wellformed_tsv_or_die @@ -39,9 +40,10 @@ def get_excel(path, sheet_index, dicts=False): def download_file(url, path): # pragma: no cover - with requests.get(url, stream=True) as r: - r.raise_for_status() - with path.open('wb') as f: - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) + user_agent = f'norare/{pynorare.__version__}' + request = Request(url, headers={'User-Agent': user_agent}) + with urlopen(request) as response: + with open(path, 'wb') as fp: + while (chunk := response.read(8192)): + fp.write(chunk) return path diff --git a/tests/test_api.py b/tests/test_api.py index cc25f31..a066e4a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -16,8 +16,8 @@ def test_NoRaRe(api): def test_Dataset_download_zip(api, mocker): mocker.patch( - 'pynorare.api.urllib.request', - mocker.Mock(urlretrieve=lambda u, t: 1)) + 'pynorare.api.files.download_file', + side_effect=lambda _url, path: path) ds = api.datasets['ds2'] ds.download_zip('x', 'f.zip', 'norare.xlsx') assert len(ds.get_excel('norare.xlsx')) == 2 diff --git a/tests/test_cli.py b/tests/test_cli.py index 23e5108..10463c2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -27,18 +27,27 @@ def test_stats(_main, capsys): assert out.strip().startswith('No.') +def make_pretend_data(_url, path): + with open(path, 'w', encoding='utf-8') as f: + f.write( + 'gloss,float,int,POS\n' + 'the gloss,1.2,3,noun\n' + 'other gloss,1.2,3') + return path + + def test_workflow(_main, mocker): - mocker.patch( - 'pynorare.api.urllib.request', - mocker.Mock(urlretrieve=lambda u, f: pathlib.Path(f).write_text( - 'gloss,float,int,POS\nthe gloss,1.2,3,noun\nother gloss,1.2,3', encoding='utf8'))) + mock_download = mocker.patch( + 'pynorare.api.files.download_file', + side_effect=make_pretend_data) _main('download', 'dsid') + mock_download.assert_called_once() _main('map', 'dsid') _main('validate', 'dsid') mocker.patch( - 'pynorare.api.urllib.request', - mocker.Mock(urlretrieve=lambda u, t: 1)) + 'pynorare.api.files.download_file', + side_effect=lambda _url, path: path) _main('download', 'ds2') _main('map', 'ds2')