diff --git a/.gitignore b/.gitignore index 6917232047e..39168c227d8 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ __pycache__ .coverage .pytest_cache/ .benchmarks +.htmlcov # documentation build artifacts diff --git a/Makefile b/Makefile index 1ed1a6b1098..7170fa9df2d 100644 --- a/Makefile +++ b/Makefile @@ -66,6 +66,13 @@ test-with-cov : --cov=$(SRC) \ --cov-report=xml +.PHONY : test-with-cov-html +test-with-cov-html : + pytest --color=yes -rf --durations=40 \ + --cov-config=.coveragerc \ + --cov=$(SRC) \ + --cov-report=html + .PHONY : gpu-test gpu-test : check-for-cuda pytest --color=yes -v -rf -m gpu diff --git a/tests/data/dataset_readers/huggingface_datasets_test.py b/tests/data/dataset_readers/huggingface_datasets_test.py index 2a7a79f4079..0cf96c21b3e 100644 --- a/tests/data/dataset_readers/huggingface_datasets_test.py +++ b/tests/data/dataset_readers/huggingface_datasets_test.py @@ -29,6 +29,13 @@ def test_read(self, dataset, config, split): # Confirm all features were mapped assert len(instance.fields) == len(entry) + def test_read_unsupported_sequence_nesting(self): + dataset = "diplomacy_detection" + split = "train" + huggingface_reader = HuggingfaceDatasetReader(dataset_name=dataset) + with pytest.raises(ValueError): + next(huggingface_reader.read(split)) + def test_read_with_tokenizer(self): dataset = "glue" config = "cola" @@ -50,6 +57,20 @@ def test_read_with_tokenizer(self): # Confirm it was tokenized assert len(instance["sentence"]) > 1 + def test_read_without_config(self): + dataset = "urdu_fake_news" + split = "train" + huggingface_reader = HuggingfaceDatasetReader(dataset_name=dataset) + instances = list(huggingface_reader.read(split)) + # Confirm instance were made for all rows + assert len(instances) == len(huggingface_reader.dataset[split]) + + entry = huggingface_reader.dataset[split][0] + instance = instances[0] + + # Confirm all features were mapped + assert len(instance.fields) == len(entry) + def test_read_with_preload(self): dataset = "glue" config = "cola" @@ -94,7 +115,7 @@ def test_non_supported_feature(self): config = "pqa_labeled" split = "train" with pytest.raises(ValueError): - list(HuggingfaceDatasetReader(dataset_name=dataset, config_name=config).read(split)) + next(HuggingfaceDatasetReader(dataset_name=dataset, config_name=config).read(split)) def test_non_available_dataset(self): with pytest.raises(ValueError): @@ -103,4 +124,4 @@ def test_non_available_dataset(self): @pytest.mark.parametrize("split", (None, "surely-such-a-split-does-not-exist")) def test_read_with_invalid_split(self, split): with pytest.raises(ValueError): - list(HuggingfaceDatasetReader(dataset_name="glue", config_name="cola").read(split)) + next(HuggingfaceDatasetReader(dataset_name="glue", config_name="cola").read(split))