diff --git a/.gitignore b/.gitignore index 4db2726..da146ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ -__pycache__/** -.cache/** -.pytest_cache/** +__pycache__/ +.cache/ +.pytest_cache/ *.pyc - +*.egg-info/ +*.egg +dist/ \ No newline at end of file diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..781e903 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Herman Leung + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..af3b4d0 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include *.md +include LICENSE.txt \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8f917f5 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +license_files = LICENSE.txt \ No newline at end of file diff --git a/setup.py b/setup.py index ea58a94..5ed5c20 100644 --- a/setup.py +++ b/setup.py @@ -2,12 +2,25 @@ setup( name='trieregex', - version='', - description='Compose efficient regexes from a large list of keywords', + version='1.0.0', + description='Compose efficient trie-based regexes from large word lists', long_description='', author='Herman Leung', - author_email='', + author_email='leung.hm@gmail.com', url='https://github.com/ermanh/trieregex', - license='', - packages=find_packages() # exclude=('tests', 'docs')) + packages=find_packages(exclude='tests'), + python_requires='>=3.6', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + ], + keywords=['python', 'regular expressions', 'regex', 'pattern', 'trie'], + license='MIT', ) diff --git a/trieregex/trieregex.py b/trieregex/trieregex.py index c88010e..58a2d96 100644 --- a/trieregex/trieregex.py +++ b/trieregex/trieregex.py @@ -14,20 +14,13 @@ def __init__(self, *words: str) -> None: self._finals = defaultdict(int) self.add(*words) - def _adjust_initials_finals(self, word, increase=True): - if increase: - self._initials[word[0]] += 1 - self._finals[word[-1]] += 1 - else: - self._initials[word[0]] -= 1 - self._finals[word[-1]] -= 1 - @Memoizer def add(self, *words: str) -> None: self.regex.clear_cache() for word in words: if word != '' and not self.has(word): - self._adjust_initials_finals(word) + self._initials[word[0]] += 1 + self._finals[word[-1]] += 1 trie = self._trie for char in word: if char not in trie: @@ -44,7 +37,8 @@ def remove(self, *words: str) -> None: is_end = i == len(word) if is_end and self.has(word[:i]): remove_word = True - self._adjust_initials_finals(word, increase=False) + self._initials[word[0]] -= 1 + self._finals[word[-1]] -= 1 if remove_word: node = self._trie for j in range(i-1): @@ -69,9 +63,7 @@ def has(self, word: str) -> bool: trie = trie[char] else: return False - if '**' not in trie: - return False - return True + return True if ('**' in trie) else False def initials(self) -> List[str]: result = [key for key in self._initials if self._initials[key] > 0]