Skip to content

Commit

Permalink
Fix tests, remove useless comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ermanh committed Jun 22, 2020
1 parent 096a400 commit c5a6934
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 20 deletions.
25 changes: 8 additions & 17 deletions tests/test_regex.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from typing import List
import unittest

from trieregex import TrieRegEx as TRE
Expand All @@ -21,30 +20,21 @@ def setUp(self):
'scallion', 'ginger', 'garlic', 'onion', 'galangal'
]

def findall(self, string: str, boundary: str) -> List[str]:
"""Helper function. The TrieRegEx.regex() function is called here and
the result of regex matching is returned
"""
pattern = re.compile(f'{boundary}{self.tre.regex()}{boundary}')
return sorted(pattern.findall(string))

def test_match_all_incrementals(self):
self.tre.add(*self.words)
found = self.findall(' '.join(self.words), '\\b')
found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))

self.assertEqual(found, sorted(self.words))
self.assertEqual(sorted(found), sorted(self.words))

def test_does_not_match_larger_string(self):
self.tre.add('p')
found = self.findall('pe', '\\b')

found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe')
self.assertEqual(found, [])

def test_does_not_match_substring(self):
my_words = self.words[1:] # leave out 'p'
self.tre.add(*my_words)
found = self.findall(' '.join(self.words), '\\b')

found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))
self.assertEqual(
found,
sorted(my_words),
Expand All @@ -56,13 +46,14 @@ def test_empty_trie_returns_empty_string_regex(self):

def test_match_all_words(self):
self.tre.add(*self.more_words)
found = self.findall(' '.join(sorted(self.more_words)), '\\b')
self.assertEqual(found, sorted(self.more_words))
pattern = f'\\b{self.tre.regex()}\\b'
found = re.findall(pattern, ' '.join(self.more_words))
self.assertEqual(sorted(found), sorted(self.more_words))

def test_match_all_words_surrounded_by_spaces(self):
words = sorted(self.more_words)
self.tre.add(*words)
found = re.findall(f"(?<= ){'|'.join(words)}(?= )", ' '.join(words))
found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words))
self.assertEqual(
found,
words[1:-1],
Expand Down
6 changes: 3 additions & 3 deletions trieregex/trieregex.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _adjust_initials_finals(self, word, increase=True):

@Memoizer
def add(self, *words: str) -> None:
self.regex.clear_cache() # better performance to clear just once
self.regex.clear_cache()
for word in words:
if word != '' and not self.has(word):
self._adjust_initials_finals(word)
Expand All @@ -36,7 +36,7 @@ def add(self, *words: str) -> None:
trie['**'] = {}

def remove(self, *words: str) -> None:
self.add.clear_cache() # better performance to clear just once
self.add.clear_cache()
self.regex.clear_cache()
for word in words:
remove_word = False
Expand Down Expand Up @@ -98,7 +98,7 @@ def regex(self, trie: dict = None, reset: bool = True) -> str:
else:
sequences = [f'{escape(key)}{self.regex(trie[key], False)}'
for key in trie if key != '**']
sequences.sort(key=lambda x: (-len(x), x)) # for easier inspection
sequences.sort(key=lambda x: (-len(x), x))

if len(sequences) == 1:
result = sequences[0]
Expand Down

0 comments on commit c5a6934

Please sign in to comment.