Skip to content

Commit 9fe8988

Browse files
authored
Merge pull request #59 from scrapinghub/fix-boolean-bug
fix boolean bug
2 parents d5a7fcf + 7816e9d commit 9fe8988

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

webstruct/features/global_features.py

+1
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,5 @@ def _add_pattern_features(feature_dicts, pattern, out_value, missing_value, sepa
9999

100100
# FIXME: there should be a cleaner/faster way
101101
if not all(v == out_value for v in values):
102+
values = [str(v) if type(v) == bool else v for v in values]
102103
featdict[separator.join(keys)] = separator.join(values)

webstruct/tests/test_pattern_features.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from __future__ import absolute_import
33
import unittest
44
from webstruct import GateLoader, HtmlTokenizer, HtmlFeatureExtractor
5-
from webstruct.features import token_lower, token_identity, Pattern
5+
from webstruct.features import token_lower, token_identity, looks_like_year, Pattern
66

77

88
class PatternTest(unittest.TestCase):
@@ -17,19 +17,20 @@ def _load_document(self):
1717
return html_tokens
1818

1919
def test_pattern(self):
20+
#, (0, 'looks_like_year')
2021
featextractor = HtmlFeatureExtractor(
21-
token_features = [token_lower, token_identity],
22+
token_features = [token_lower, token_identity, looks_like_year],
2223
global_features = [
23-
Pattern((-2, 'lower'), (-1, 'lower'))
24+
Pattern((-2, 'lower'), (-1, 'lower'), (-1, 'looks_like_year'))
2425
]
2526
)
2627
X = featextractor.transform_single(self.html_tokens)
27-
28-
key = 'lower[-2]/lower[-1]'
28+
key = 'lower[-2]/lower[-1]/looks_like_year[-1]'
2929
self.assertNotIn(key, X[0])
3030
self.assertListEqual(
3131
[feat[key] for feat in X[1:]],
32-
['?/hello', 'hello/john', 'john/doe', 'doe/mary'],
32+
['?/hello/False', 'hello/john/False', 'john/doe/False',
33+
'doe/mary/False'],
3334
)
3435

3536
def test_pattern_lookups(self):

0 commit comments

Comments
 (0)