Bring up coverage, add test-with-cov-html mk command for easier check…

…ing up of coverage for developers
allenai · Apr 14, 2021 · 867a23f · 867a23f
1 parent 960f9ad
commit 867a23f
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -45,6 +45,7 @@ __pycache__
 .coverage
 .pytest_cache/
 .benchmarks
+.htmlcov
 
 # documentation build artifacts
 

diff --git a/Makefile b/Makefile
@@ -66,6 +66,13 @@ test-with-cov :
 			--cov=$(SRC) \
 			--cov-report=xml
 
+.PHONY : test-with-cov-html
+test-with-cov-html :
+	pytest --color=yes -rf --durations=40 \
+			--cov-config=.coveragerc \
+			--cov=$(SRC) \
+			--cov-report=html
+
 .PHONY : gpu-test
 gpu-test : check-for-cuda
 	pytest --color=yes -v -rf -m gpu

diff --git a/tests/data/dataset_readers/huggingface_datasets_test.py b/tests/data/dataset_readers/huggingface_datasets_test.py
@@ -29,6 +29,13 @@ def test_read(self, dataset, config, split):
         # Confirm all features were mapped
         assert len(instance.fields) == len(entry)
 
+    def test_read_unsupported_sequence_nesting(self):
+        dataset = "diplomacy_detection"
+        split = "train"
+        huggingface_reader = HuggingfaceDatasetReader(dataset_name=dataset)
+        with pytest.raises(ValueError):
+            next(huggingface_reader.read(split))
+
     def test_read_with_tokenizer(self):
         dataset = "glue"
         config = "cola"
@@ -50,6 +57,20 @@ def test_read_with_tokenizer(self):
         # Confirm it was tokenized
         assert len(instance["sentence"]) > 1
 
+    def test_read_without_config(self):
+        dataset = "urdu_fake_news"
+        split = "train"
+        huggingface_reader = HuggingfaceDatasetReader(dataset_name=dataset)
+        instances = list(huggingface_reader.read(split))
+        # Confirm instance were made for all rows
+        assert len(instances) == len(huggingface_reader.dataset[split])
+
+        entry = huggingface_reader.dataset[split][0]
+        instance = instances[0]
+
+        # Confirm all features were mapped
+        assert len(instance.fields) == len(entry)
+
     def test_read_with_preload(self):
         dataset = "glue"
         config = "cola"
@@ -94,7 +115,7 @@ def test_non_supported_feature(self):
         config = "pqa_labeled"
         split = "train"
         with pytest.raises(ValueError):
-            list(HuggingfaceDatasetReader(dataset_name=dataset, config_name=config).read(split))
+            next(HuggingfaceDatasetReader(dataset_name=dataset, config_name=config).read(split))
 
     def test_non_available_dataset(self):
         with pytest.raises(ValueError):
@@ -103,4 +124,4 @@ def test_non_available_dataset(self):
     @pytest.mark.parametrize("split", (None, "surely-such-a-split-does-not-exist"))
     def test_read_with_invalid_split(self, split):
         with pytest.raises(ValueError):
-            list(HuggingfaceDatasetReader(dataset_name="glue", config_name="cola").read(split))
+            next(HuggingfaceDatasetReader(dataset_name="glue", config_name="cola").read(split))