Skip to content

Commit 5765091

Browse files
authored
typing: add initial types (#488)
1 parent 909d4c8 commit 5765091

File tree

11 files changed

+152
-96
lines changed

11 files changed

+152
-96
lines changed

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Source = "https://github.com/sloria/TextBlob"
2828
[project.optional-dependencies]
2929
docs = ["sphinx==8.1.3", "sphinx-issues==5.0.0", "PyYAML==6.0.2"]
3030
tests = ["pytest", "numpy"]
31-
dev = ["textblob[tests]", "tox", "pre-commit>=3.5,<5.0"]
31+
dev = ["textblob[tests]", "tox", "pre-commit>=3.5,<5.0", "pyright", "ruff"]
3232

3333
[build-system]
3434
requires = ["flit_core<4"]
@@ -86,6 +86,7 @@ select = [
8686
"I", # isort
8787
"UP", # pyupgrade
8888
"W", # pycodestyle warning
89+
"TC", # flake8-typechecking
8990
]
9091

9192
[tool.ruff.lint.per-file-ignores]
@@ -96,3 +97,6 @@ markers = [
9697
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
9798
"numpy: marks tests that require numpy",
9899
]
100+
101+
[tool.pyright]
102+
include = ["src/**", "tests/**"]

src/textblob/_text.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def keys(self):
124124
def values(self):
125125
return self._lazy("values")
126126

127-
def update(self, *args):
127+
def update(self, *args, **kwargs):
128128
return self._lazy("update", *args)
129129

130130
def pop(self, *args):
@@ -324,10 +324,10 @@ def penntreebank2universal(token, tag):
324324
("cry", -1.00): set((":'(", ":'''(", ";'(")),
325325
}
326326

327-
RE_EMOTICONS = [
327+
TEMP_RE_EMOTICONS = [
328328
r" ?".join([re.escape(each) for each in e]) for v in EMOTICONS.values() for e in v
329329
]
330-
RE_EMOTICONS = re.compile(r"(%s)($|\s)" % "|".join(RE_EMOTICONS))
330+
RE_EMOTICONS = re.compile(r"(%s)($|\s)" % "|".join(TEMP_RE_EMOTICONS))
331331

332332
# Handle sarcasm punctuation (!).
333333
RE_SARCASM = re.compile(r"\( ?\! ?\)")
@@ -490,9 +490,9 @@ class Lexicon(lazydict):
490490
def __init__(
491491
self,
492492
path="",
493-
morphology=None,
494-
context=None,
495-
entities=None,
493+
morphology="",
494+
context="",
495+
entities="",
496496
NNP="NNP",
497497
language=None,
498498
):
@@ -724,7 +724,7 @@ def apply(self, tokens):
724724
t[i] = [t[i][0], r[1]]
725725
return t[len(o) : -len(o)]
726726

727-
def insert(self, i, tag1, tag2, cmd="prevtag", x=None, y=None):
727+
def insert(self, i, tag1, tag2, cmd="prevtag", x=None, y=None, *args):
728728
"""Inserts a new rule that updates words with tag1 to tag2,
729729
given constraints x and y, e.g., Context.append("TO < NN", "VB")
730730
"""
@@ -739,7 +739,7 @@ def insert(self, i, tag1, tag2, cmd="prevtag", x=None, y=None):
739739
def append(self, *args, **kwargs):
740740
self.insert(len(self) - 1, *args, **kwargs)
741741

742-
def extend(self, rules=None):
742+
def extend(self, rules=None, *args):
743743
if rules is None:
744744
rules = []
745745
for r in rules:
@@ -1570,9 +1570,8 @@ def parse(
15701570

15711571
TOKENS = "tokens"
15721572

1573-
15741573
class TaggedString(str):
1575-
def __new__(self, string, tags=None, language=None):
1574+
def __new__(cls, string, tags=None, language=None):
15761575
"""Unicode string with tags and language attributes.
15771576
For example: TaggedString("cat/NN/NP", tags=["word", "pos", "chunk"]).
15781577
"""
@@ -1588,7 +1587,7 @@ def __new__(self, string, tags=None, language=None):
15881587
for s in string
15891588
]
15901589
string = "\n".join(" ".join("/".join(token) for token in s) for s in string)
1591-
s = str.__new__(self, string)
1590+
s = str.__new__(cls, string)
15921591
s.tags = list(tags)
15931592
s.language = language
15941593
return s
@@ -1634,7 +1633,7 @@ def language(self):
16341633
return self._language
16351634

16361635
@classmethod
1637-
def train(self, s, path="spelling.txt"):
1636+
def train(cls, s, path="spelling.txt"):
16381637
"""Counts the words in the given string and saves the probabilities at the given path.
16391638
This can be used to generate a new model for the Spelling() constructor.
16401639
"""

src/textblob/base.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@
55
All base classes are defined in the same module, ``textblob.base``.
66
"""
77

8+
from __future__ import annotations
9+
810
from abc import ABCMeta, abstractmethod
11+
from typing import TYPE_CHECKING
912

1013
import nltk
1114

15+
if TYPE_CHECKING:
16+
from typing import Any, AnyStr
17+
1218
##### POS TAGGERS #####
1319

1420

@@ -19,11 +25,11 @@ class BaseTagger(metaclass=ABCMeta):
1925
"""
2026

2127
@abstractmethod
22-
def tag(self, text, tokenize=True):
28+
def tag(self, text: str, tokenize=True) -> list[tuple[str, str]]:
2329
"""Return a list of tuples of the form (word, tag)
2430
for a given set of text or BaseBlob instance.
2531
"""
26-
return
32+
...
2733

2834

2935
##### NOUN PHRASE EXTRACTORS #####
@@ -36,29 +42,29 @@ class BaseNPExtractor(metaclass=ABCMeta):
3642
"""
3743

3844
@abstractmethod
39-
def extract(self, text):
45+
def extract(self, text: str) -> list[str]:
4046
"""Return a list of noun phrases (strings) for a body of text."""
41-
return
47+
...
4248

4349

4450
##### TOKENIZERS #####
4551

4652

47-
class BaseTokenizer(nltk.tokenize.api.TokenizerI, metaclass=ABCMeta):
53+
class BaseTokenizer(nltk.tokenize.api.TokenizerI, metaclass=ABCMeta): # pyright: ignore
4854
"""Abstract base class from which all Tokenizer classes inherit.
4955
Descendant classes must implement a ``tokenize(text)`` method
5056
that returns a list of noun phrases as strings.
5157
"""
5258

5359
@abstractmethod
54-
def tokenize(self, text):
60+
def tokenize(self, text: str) -> list[str]:
5561
"""Return a list of tokens (strings) for a body of text.
5662
5763
:rtype: list
5864
"""
59-
return
65+
...
6066

61-
def itokenize(self, text, *args, **kwargs):
67+
def itokenize(self, text: str, *args, **kwargs):
6268
"""Return a generator that generates tokens "on-demand".
6369
6470
.. versionadded:: 0.6.0
@@ -81,6 +87,8 @@ class BaseSentimentAnalyzer(metaclass=ABCMeta):
8187
results of analysis.
8288
"""
8389

90+
_trained: bool
91+
8492
kind = DISCRETE
8593

8694
def __init__(self):
@@ -91,7 +99,7 @@ def train(self):
9199
self._trained = True
92100

93101
@abstractmethod
94-
def analyze(self, text):
102+
def analyze(self, text) -> Any:
95103
"""Return the result of of analysis. Typically returns either a
96104
tuple, float, or dictionary.
97105
"""
@@ -111,6 +119,6 @@ class BaseParser(metaclass=ABCMeta):
111119
"""
112120

113121
@abstractmethod
114-
def parse(self, text):
122+
def parse(self, text: AnyStr):
115123
"""Parses the text."""
116-
return
124+
...

src/textblob/blob.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ def lemmatize(self, pos=None):
138138
lemmatizer = nltk.stem.WordNetLemmatizer()
139139
return lemmatizer.lemmatize(self.string, tag)
140140

141-
PorterStemmer = nltk.stem.porter.PorterStemmer()
142-
LancasterStemmer = nltk.stem.lancaster.LancasterStemmer()
143-
SnowballStemmer = nltk.stem.snowball.SnowballStemmer("english")
141+
PorterStemmer = nltk.stem.PorterStemmer()
142+
LancasterStemmer = nltk.stem.LancasterStemmer()
143+
SnowballStemmer = nltk.stem.SnowballStemmer("english")
144144

145145
# added 'stemmer' on lines of lemmatizer
146146
# based on nltk
@@ -308,7 +308,7 @@ def _initialize_models(
308308
obj.tokenizer = _validated_param(
309309
tokenizer,
310310
"tokenizer",
311-
base_class=(BaseTokenizer, nltk.tokenize.api.TokenizerI),
311+
base_class=(BaseTokenizer, nltk.tokenize.api.TokenizerI), # pyright: ignore
312312
default=BaseBlob.tokenizer,
313313
base_class_name="BaseTokenizer",
314314
)

src/textblob/classifiers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,8 @@ def update(
510510

511511

512512
class MaxEntClassifier(NLTKClassifier):
513-
__doc__ = nltk.classify.maxent.MaxentClassifier.__doc__
514-
nltk_class = nltk.classify.maxent.MaxentClassifier
513+
__doc__ = nltk.classify.MaxentClassifier.__doc__
514+
nltk_class = nltk.classify.MaxentClassifier
515515

516516
def prob_classify(self, text):
517517
"""Return the label probability distribution for classifying a string

src/textblob/decorators.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
"""Custom decorators."""
22

3+
from __future__ import annotations
4+
35
from functools import wraps
6+
from typing import TYPE_CHECKING
47

58
from textblob.exceptions import MissingCorpusError
69

10+
if TYPE_CHECKING:
11+
from collections.abc import Callable
12+
from typing import TypeVar
13+
14+
ReturnType = TypeVar("ReturnType")
15+
716

817
class cached_property:
918
"""A property that is only computed once per instance and then replaces
@@ -24,7 +33,9 @@ def __get__(self, obj, cls):
2433
return value
2534

2635

27-
def requires_nltk_corpus(func):
36+
def requires_nltk_corpus(
37+
func: Callable[..., ReturnType],
38+
) -> Callable[..., ReturnType]:
2839
"""Wraps a function that requires an NLTK corpus. If the corpus isn't found,
2940
raise a :exc:`MissingCorpusError`.
3041
"""

0 commit comments

Comments
 (0)