Skip to content

Commit 7816e9d

Browse files
committed
add bool test to test_pattern_features
1 parent 0aaef9f commit 7816e9d

File tree

2 files changed

+7
-46
lines changed

2 files changed

+7
-46
lines changed

webstruct/tests/test_add_pattern_features.py

-40
This file was deleted.

webstruct/tests/test_pattern_features.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from __future__ import absolute_import
33
import unittest
44
from webstruct import GateLoader, HtmlTokenizer, HtmlFeatureExtractor
5-
from webstruct.features import token_lower, token_identity, Pattern
5+
from webstruct.features import token_lower, token_identity, looks_like_year, Pattern
66

77

88
class PatternTest(unittest.TestCase):
@@ -17,19 +17,20 @@ def _load_document(self):
1717
return html_tokens
1818

1919
def test_pattern(self):
20+
#, (0, 'looks_like_year')
2021
featextractor = HtmlFeatureExtractor(
21-
token_features = [token_lower, token_identity],
22+
token_features = [token_lower, token_identity, looks_like_year],
2223
global_features = [
23-
Pattern((-2, 'lower'), (-1, 'lower'))
24+
Pattern((-2, 'lower'), (-1, 'lower'), (-1, 'looks_like_year'))
2425
]
2526
)
2627
X = featextractor.transform_single(self.html_tokens)
27-
28-
key = 'lower[-2]/lower[-1]'
28+
key = 'lower[-2]/lower[-1]/looks_like_year[-1]'
2929
self.assertNotIn(key, X[0])
3030
self.assertListEqual(
3131
[feat[key] for feat in X[1:]],
32-
['?/hello', 'hello/john', 'john/doe', 'doe/mary'],
32+
['?/hello/False', 'hello/john/False', 'john/doe/False',
33+
'doe/mary/False'],
3334
)
3435

3536
def test_pattern_lookups(self):

0 commit comments

Comments
 (0)