Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions src/pynorare/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from clldutils.apilib import API
import pybtex.database

from pynorare.files import get_mappings, get_excel, download_file
from pynorare import files
from pynorare.util import read_wellformed_tsv_or_die

__all__ = ['NoRaRe']
Expand Down Expand Up @@ -130,7 +130,7 @@ def _run_function(self, name, *args, **kw):

def map(self, concepticon=None, mappings=None):
if not mappings:
mappings, _ = get_mappings(concepticon)
mappings, _ = files.get_mappings(concepticon)
self._run_function('map', concepticon, mappings)

def download(self):
Expand All @@ -148,11 +148,7 @@ def download_file(self, url, target=None, overwrite=False):
if not target:
target = urllib.parse.urlparse(url).path.split('/')[-1]
if (not self.raw_dir.joinpath(target).exists()) or overwrite:
try:
urllib.request.urlretrieve(url, str(self.raw_dir / target))
except urllib.error.HTTPError: # pragma: no cover
# Try with requests:
download_file(url, self.raw_dir / target)
files.download_file(url, self.raw_dir / target)
self.log.info('Downloaded {0} successfully.'.format(url))
return self.raw_dir / target

Expand All @@ -161,7 +157,7 @@ def get_csv(self, path, delimiter="\t", dicts=True, coding="utf-8"):
return list(reader(self.raw_dir / path, delimiter=delimiter, dicts=dicts, encoding=coding))

def get_excel(self, path, sidx=0, dicts=True):
sheet = get_excel(self.raw_dir.joinpath(path), sidx, dicts)
sheet = files.get_excel(self.raw_dir.joinpath(path), sidx, dicts)
self.log.info('load data from {0}'.format(path))
return sheet

Expand Down
14 changes: 8 additions & 6 deletions src/pynorare/files.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import collections
from urllib.request import Request, urlopen

import requests
from cldfcatalog import Config
from pyconcepticon import Concepticon
import xlrd
import openpyxl

import pynorare
from pynorare.util import read_wellformed_tsv_or_die


Expand Down Expand Up @@ -39,9 +40,10 @@ def get_excel(path, sheet_index, dicts=False):


def download_file(url, path): # pragma: no cover
with requests.get(url, stream=True) as r:
r.raise_for_status()
with path.open('wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
user_agent = f'norare/{pynorare.__version__}'
request = Request(url, headers={'User-Agent': user_agent})
with urlopen(request) as response:
with open(path, 'wb') as fp:
while (chunk := response.read(8192)):
fp.write(chunk)
return path
4 changes: 2 additions & 2 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def test_NoRaRe(api):

def test_Dataset_download_zip(api, mocker):
mocker.patch(
'pynorare.api.urllib.request',
mocker.Mock(urlretrieve=lambda u, t: 1))
'pynorare.api.files.download_file',
side_effect=lambda _url, path: path)
ds = api.datasets['ds2']
ds.download_zip('x', 'f.zip', 'norare.xlsx')
assert len(ds.get_excel('norare.xlsx')) == 2
Expand Down
21 changes: 15 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,27 @@ def test_stats(_main, capsys):
assert out.strip().startswith('No.')


def make_pretend_data(_url, path):
with open(path, 'w', encoding='utf-8') as f:
f.write(
'gloss,float,int,POS\n'
'the gloss,1.2,3,noun\n'
'other gloss,1.2,3')
return path


def test_workflow(_main, mocker):
mocker.patch(
'pynorare.api.urllib.request',
mocker.Mock(urlretrieve=lambda u, f: pathlib.Path(f).write_text(
'gloss,float,int,POS\nthe gloss,1.2,3,noun\nother gloss,1.2,3', encoding='utf8')))
mock_download = mocker.patch(
'pynorare.api.files.download_file',
side_effect=make_pretend_data)
_main('download', 'dsid')
mock_download.assert_called_once()
_main('map', 'dsid')
_main('validate', 'dsid')

mocker.patch(
'pynorare.api.urllib.request',
mocker.Mock(urlretrieve=lambda u, t: 1))
'pynorare.api.files.download_file',
side_effect=lambda _url, path: path)
_main('download', 'ds2')
_main('map', 'ds2')

Expand Down