Skip to content

Commit 813a787

Browse files
committed
typing: add initial types
1 parent 1a4f357 commit 813a787

File tree

11 files changed

+151
-95
lines changed

11 files changed

+151
-95
lines changed

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Source = "https://github.com/sloria/TextBlob"
2828
[project.optional-dependencies]
2929
docs = ["sphinx==8.1.3", "sphinx-issues==5.0.0", "PyYAML==6.0.2"]
3030
tests = ["pytest", "numpy"]
31-
dev = ["textblob[tests]", "tox", "pre-commit~=3.5"]
31+
dev = ["textblob[tests]", "tox", "pre-commit~=3.5", "pyright", "ruff"]
3232

3333
[build-system]
3434
requires = ["flit_core<4"]
@@ -96,3 +96,7 @@ markers = [
9696
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
9797
"numpy: marks tests that require numpy",
9898
]
99+
100+
[tool.pyright]
101+
include = ["src/**"]
102+
exclude = ["tests/**"]

src/textblob/_text.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def keys(self):
124124
def values(self):
125125
return self._lazy("values")
126126

127-
def update(self, *args):
127+
def update(self, *args, **kwargs):
128128
return self._lazy("update", *args)
129129

130130
def pop(self, *args):
@@ -324,10 +324,10 @@ def penntreebank2universal(token, tag):
324324
("cry", -1.00): set((":'(", ":'''(", ";'(")),
325325
}
326326

327-
RE_EMOTICONS = [
327+
TEMP_RE_EMOTICONS = [
328328
r" ?".join([re.escape(each) for each in e]) for v in EMOTICONS.values() for e in v
329329
]
330-
RE_EMOTICONS = re.compile(r"(%s)($|\s)" % "|".join(RE_EMOTICONS))
330+
RE_EMOTICONS = re.compile(r"(%s)($|\s)" % "|".join(TEMP_RE_EMOTICONS))
331331

332332
# Handle sarcasm punctuation (!).
333333
RE_SARCASM = re.compile(r"\( ?\! ?\)")
@@ -490,9 +490,9 @@ class Lexicon(lazydict):
490490
def __init__(
491491
self,
492492
path="",
493-
morphology=None,
494-
context=None,
495-
entities=None,
493+
morphology="",
494+
context="",
495+
entities="",
496496
NNP="NNP",
497497
language=None,
498498
):
@@ -724,7 +724,7 @@ def apply(self, tokens):
724724
t[i] = [t[i][0], r[1]]
725725
return t[len(o) : -len(o)]
726726

727-
def insert(self, i, tag1, tag2, cmd="prevtag", x=None, y=None):
727+
def insert(self, i, tag1, tag2, cmd="prevtag", x=None, y=None, *args):
728728
"""Inserts a new rule that updates words with tag1 to tag2,
729729
given constraints x and y, e.g., Context.append("TO < NN", "VB")
730730
"""
@@ -739,7 +739,7 @@ def insert(self, i, tag1, tag2, cmd="prevtag", x=None, y=None):
739739
def append(self, *args, **kwargs):
740740
self.insert(len(self) - 1, *args, **kwargs)
741741

742-
def extend(self, rules=None):
742+
def extend(self, rules=None, *args):
743743
if rules is None:
744744
rules = []
745745
for r in rules:
@@ -1570,9 +1570,8 @@ def parse(
15701570

15711571
TOKENS = "tokens"
15721572

1573-
15741573
class TaggedString(str):
1575-
def __new__(self, string, tags=None, language=None):
1574+
def __new__(cls, string, tags=None, language=None):
15761575
"""Unicode string with tags and language attributes.
15771576
For example: TaggedString("cat/NN/NP", tags=["word", "pos", "chunk"]).
15781577
"""
@@ -1588,7 +1587,7 @@ def __new__(self, string, tags=None, language=None):
15881587
for s in string
15891588
]
15901589
string = "\n".join(" ".join("/".join(token) for token in s) for s in string)
1591-
s = str.__new__(self, string)
1590+
s = str.__new__(cls, string)
15921591
s.tags = list(tags)
15931592
s.language = language
15941593
return s
@@ -1634,7 +1633,7 @@ def language(self):
16341633
return self._language
16351634

16361635
@classmethod
1637-
def train(self, s, path="spelling.txt"):
1636+
def train(cls, s, path="spelling.txt"):
16381637
"""Counts the words in the given string and saves the probabilities at the given path.
16391638
This can be used to generate a new model for the Spelling() constructor.
16401639
"""

src/textblob/base.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@
55
All base classes are defined in the same module, ``textblob.base``.
66
"""
77

8+
from __future__ import annotations
9+
810
from abc import ABCMeta, abstractmethod
11+
from typing import TYPE_CHECKING
912

1013
import nltk
1114

15+
if TYPE_CHECKING:
16+
from typing import Any, AnyStr
17+
1218
##### POS TAGGERS #####
1319

1420

@@ -19,11 +25,11 @@ class BaseTagger(metaclass=ABCMeta):
1925
"""
2026

2127
@abstractmethod
22-
def tag(self, text, tokenize=True):
28+
def tag(self, text: str, tokenize=True) -> list[tuple[str, str]]:
2329
"""Return a list of tuples of the form (word, tag)
2430
for a given set of text or BaseBlob instance.
2531
"""
26-
return
32+
raise NotImplementedError("Subclass must implement a tag method")
2733

2834

2935
##### NOUN PHRASE EXTRACTORS #####
@@ -36,29 +42,29 @@ class BaseNPExtractor(metaclass=ABCMeta):
3642
"""
3743

3844
@abstractmethod
39-
def extract(self, text):
45+
def extract(self, text: str) -> list[str]:
4046
"""Return a list of noun phrases (strings) for a body of text."""
41-
return
47+
raise NotImplementedError("Subclass must implement an extract method")
4248

4349

4450
##### TOKENIZERS #####
4551

4652

47-
class BaseTokenizer(nltk.tokenize.api.TokenizerI, metaclass=ABCMeta):
53+
class BaseTokenizer(nltk.tokenize.api.TokenizerI, metaclass=ABCMeta): # pyright: ignore
4854
"""Abstract base class from which all Tokenizer classes inherit.
4955
Descendant classes must implement a ``tokenize(text)`` method
5056
that returns a list of noun phrases as strings.
5157
"""
5258

5359
@abstractmethod
54-
def tokenize(self, text):
60+
def tokenize(self, text: str) -> list[str]:
5561
"""Return a list of tokens (strings) for a body of text.
5662
5763
:rtype: list
5864
"""
59-
return
65+
raise NotImplementedError("Subclasss must implement tokenize method")
6066

61-
def itokenize(self, text, *args, **kwargs):
67+
def itokenize(self, text: str, *args, **kwargs):
6268
"""Return a generator that generates tokens "on-demand".
6369
6470
.. versionadded:: 0.6.0
@@ -81,6 +87,8 @@ class BaseSentimentAnalyzer(metaclass=ABCMeta):
8187
results of analysis.
8288
"""
8389

90+
_trained: bool
91+
8492
kind = DISCRETE
8593

8694
def __init__(self):
@@ -91,7 +99,7 @@ def train(self):
9199
self._trained = True
92100

93101
@abstractmethod
94-
def analyze(self, text):
102+
def analyze(self, text) -> Any:
95103
"""Return the result of of analysis. Typically returns either a
96104
tuple, float, or dictionary.
97105
"""
@@ -111,6 +119,6 @@ class BaseParser(metaclass=ABCMeta):
111119
"""
112120

113121
@abstractmethod
114-
def parse(self, text):
122+
def parse(self, text: AnyStr):
115123
"""Parses the text."""
116-
return
124+
raise NotImplementedError("Subclass must implement a parse method")

src/textblob/blob.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ def lemmatize(self, pos=None):
138138
lemmatizer = nltk.stem.WordNetLemmatizer()
139139
return lemmatizer.lemmatize(self.string, tag)
140140

141-
PorterStemmer = nltk.stem.porter.PorterStemmer()
142-
LancasterStemmer = nltk.stem.lancaster.LancasterStemmer()
143-
SnowballStemmer = nltk.stem.snowball.SnowballStemmer("english")
141+
PorterStemmer = nltk.stem.PorterStemmer()
142+
LancasterStemmer = nltk.stem.LancasterStemmer()
143+
SnowballStemmer = nltk.stem.SnowballStemmer("english")
144144

145145
# added 'stemmer' on lines of lemmatizer
146146
# based on nltk

src/textblob/classifiers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,8 @@ def update(
510510

511511

512512
class MaxEntClassifier(NLTKClassifier):
513-
__doc__ = nltk.classify.maxent.MaxentClassifier.__doc__
514-
nltk_class = nltk.classify.maxent.MaxentClassifier
513+
__doc__ = nltk.classify.MaxentClassifier.__doc__
514+
nltk_class = nltk.classify.MaxentClassifier
515515

516516
def prob_classify(self, text):
517517
"""Return the label probability distribution for classifying a string

src/textblob/decorators.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
"""Custom decorators."""
22

3+
from __future__ import annotations
4+
35
from functools import wraps
6+
from typing import TYPE_CHECKING
47

58
from textblob.exceptions import MissingCorpusError
69

10+
if TYPE_CHECKING:
11+
from collections.abc import Callable
12+
from typing import TypeVar
13+
14+
ReturnType = TypeVar("ReturnType")
15+
716

817
class cached_property:
918
"""A property that is only computed once per instance and then replaces
@@ -24,7 +33,9 @@ def __get__(self, obj, cls):
2433
return value
2534

2635

27-
def requires_nltk_corpus(func):
36+
def requires_nltk_corpus(
37+
func: Callable[..., ReturnType],
38+
) -> Callable[..., ReturnType]:
2839
"""Wraps a function that requires an NLTK corpus. If the corpus isn't found,
2940
raise a :exc:`MissingCorpusError`.
3041
"""

src/textblob/en/inflect.py

Lines changed: 57 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,15 @@
44
See here https://github.com/clips/pattern/blob/master/LICENSE.txt for
55
complete license information.
66
"""
7+
8+
from __future__ import annotations
9+
from collections.abc import MutableMapping
710
import re
11+
from typing import TYPE_CHECKING
12+
13+
if TYPE_CHECKING:
14+
from typing import AnyStr
15+
816

917
VERB, NOUN, ADJECTIVE, ADVERB = "VB", "NN", "JJ", "RB"
1018

@@ -523,7 +531,7 @@
523531
}
524532

525533

526-
def pluralize(word, pos=NOUN, custom=None, classical=True):
534+
def pluralize(word: str, pos=NOUN, custom=None, classical=True) -> str:
527535
"""Returns the plural of a given word.
528536
For example: child -> children.
529537
Handles nouns and adjectives, using classical inflection by default
@@ -584,6 +592,7 @@ def pluralize(word, pos=NOUN, custom=None, classical=True):
584592
):
585593
if suffix.search(word) is not None:
586594
return suffix.sub(inflection, word)
595+
return word
587596

588597

589598
#### SINGULARIZE ###################################################################################
@@ -607,55 +616,57 @@ def pluralize(word, pos=NOUN, custom=None, classical=True):
607616
# THIS SOFTWARE.
608617

609618
singular_rules = [
610-
["(?i)(.)ae$", "\\1a"],
611-
["(?i)(.)itis$", "\\1itis"],
612-
["(?i)(.)eaux$", "\\1eau"],
613-
["(?i)(quiz)zes$", "\\1"],
614-
["(?i)(matr)ices$", "\\1ix"],
615-
["(?i)(ap|vert|ind)ices$", "\\1ex"],
616-
["(?i)^(ox)en", "\\1"],
617-
["(?i)(alias|status)es$", "\\1"],
618-
["(?i)([octop|vir])i$", "\\1us"],
619-
["(?i)(cris|ax|test)es$", "\\1is"],
620-
["(?i)(shoe)s$", "\\1"],
621-
["(?i)(o)es$", "\\1"],
622-
["(?i)(bus)es$", "\\1"],
623-
["(?i)([m|l])ice$", "\\1ouse"],
624-
["(?i)(x|ch|ss|sh)es$", "\\1"],
625-
["(?i)(m)ovies$", "\\1ovie"],
626-
["(?i)(.)ombies$", "\\1ombie"],
627-
["(?i)(s)eries$", "\\1eries"],
628-
["(?i)([^aeiouy]|qu)ies$", "\\1y"],
619+
(re.compile("(?i)(.)ae$"), "\\1a"),
620+
(re.compile("(?i)(.)itis$"), "\\1itis"),
621+
(re.compile("(?i)(.)eaux$"), "\\1eau"),
622+
(re.compile("(?i)(quiz)zes$"), "\\1"),
623+
(re.compile("(?i)(matr)ices$"), "\\1ix"),
624+
(re.compile("(?i)(ap|vert|ind)ices$"), "\\1ex"),
625+
(re.compile("(?i)^(ox)en"), "\\1"),
626+
(re.compile("(?i)(alias|status)es$"), "\\1"),
627+
(re.compile("(?i)([octop|vir])i$"), "\\1us"),
628+
(re.compile("(?i)(cris|ax|test)es$"), "\\1is"),
629+
(re.compile("(?i)(shoe)s$"), "\\1"),
630+
(re.compile("(?i)(o)es$"), "\\1"),
631+
(re.compile("(?i)(bus)es$"), "\\1"),
632+
(re.compile("(?i)([m|l])ice$"), "\\1ouse"),
633+
(re.compile("(?i)(x|ch|ss|sh)es$"), "\\1"),
634+
(re.compile("(?i)(m)ovies$"), "\\1ovie"),
635+
(re.compile("(?i)(.)ombies$"), "\\1ombie"),
636+
(re.compile("(?i)(s)eries$"), "\\1eries"),
637+
(re.compile("(?i)([^aeiouy]|qu)ies$"), "\\1y"),
629638
# Certain words ending in -f or -fe take -ves in the plural (lives, wolves).
630-
["([aeo]l)ves$", "\\1f"],
631-
["([^d]ea)ves$", "\\1f"],
632-
["arves$", "arf"],
633-
["erves$", "erve"],
634-
["([nlw]i)ves$", "\\1fe"],
635-
["(?i)([lr])ves$", "\\1f"],
636-
["([aeo])ves$", "\\1ve"],
637-
["(?i)(sive)s$", "\\1"],
638-
["(?i)(tive)s$", "\\1"],
639-
["(?i)(hive)s$", "\\1"],
640-
["(?i)([^f])ves$", "\\1fe"],
639+
(re.compile("([aeo]l)ves$"), "\\1f"),
640+
(re.compile("([^d]ea)ves$"), "\\1f"),
641+
(re.compile("arves$"), "arf"),
642+
(re.compile("erves$"), "erve"),
643+
(re.compile("([nlw]i)ves$"), "\\1fe"),
644+
(re.compile("(?i)([lr])ves$"), "\\1f"),
645+
(re.compile("([aeo])ves$"), "\\1ve"),
646+
(re.compile("(?i)(sive)s$"), "\\1"),
647+
(re.compile("(?i)(tive)s$"), "\\1"),
648+
(re.compile("(?i)(hive)s$"), "\\1"),
649+
(re.compile("(?i)([^f])ves$"), "\\1fe"),
641650
# -es suffix.
642-
["(?i)(^analy)ses$", "\\1sis"],
643-
["(?i)((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "\\1\\2sis"],
644-
["(?i)(.)opses$", "\\1opsis"],
645-
["(?i)(.)yses$", "\\1ysis"],
646-
["(?i)(h|d|r|o|n|b|cl|p)oses$", "\\1ose"],
647-
["(?i)(fruct|gluc|galact|lact|ket|malt|rib|sacchar|cellul)ose$", "\\1ose"],
648-
["(?i)(.)oses$", "\\1osis"],
651+
(re.compile("(?i)(^analy)ses$"), "\\1sis"),
652+
(
653+
re.compile("(?i)((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$"),
654+
"\\1\\2sis",
655+
),
656+
(re.compile("(?i)(.)opses$"), "\\1opsis"),
657+
(re.compile("(?i)(.)yses$"), "\\1ysis"),
658+
(re.compile("(?i)(h|d|r|o|n|b|cl|p)oses$"), "\\1ose"),
659+
(
660+
re.compile("(?i)(fruct|gluc|galact|lact|ket|malt|rib|sacchar|cellul)ose$"),
661+
"\\1ose",
662+
),
663+
(re.compile("(?i)(.)oses$"), "\\1osis"),
649664
# -a
650-
["(?i)([ti])a$", "\\1um"],
651-
["(?i)(n)ews$", "\\1ews"],
652-
["(?i)s$", ""],
665+
(re.compile("(?i)([ti])a$"), "\\1um"),
666+
(re.compile("(?i)(n)ews$"), "\\1ews"),
667+
(re.compile("(?i)s$"), ""),
653668
]
654669

655-
# For performance, compile the regular expressions only once:
656-
for rule in singular_rules:
657-
rule[0] = re.compile(rule[0])
658-
659670
singular_uninflected = [
660671
"aircraft",
661672
"antelope",
@@ -833,7 +844,7 @@ def pluralize(word, pos=NOUN, custom=None, classical=True):
833844
}
834845

835846

836-
def singularize(word, pos=NOUN, custom=None):
847+
def singularize(word: str, pos=NOUN, custom: MutableMapping[str, str] | None = None):
837848
if custom is None:
838849
custom = {}
839850
if word in list(custom.keys()):

0 commit comments

Comments
 (0)