From c27324d9986fdfa56d4337c3bce952f2b057ceb4 Mon Sep 17 00:00:00 2001 From: Steven Loria Date: Thu, 15 Feb 2024 17:01:19 -0500 Subject: [PATCH] refactor: remove vendorized unicodecsv; remove mentions of translation (#428) --- CHANGELOG.rst | 7 + NOTICE | 67 -------- README.rst | 2 +- docs/index.rst | 2 +- pyproject.toml | 1 - src/textblob/blob.py | 2 +- src/textblob/unicodecsv/__init__.py | 249 ---------------------------- 7 files changed, 10 insertions(+), 320 deletions(-) delete mode 100644 src/textblob/unicodecsv/__init__.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d0de94e4..eaaf0974 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +0.19.0 (unreleased) +___________________ + +Other changes: + +- Remove vendorized ``unicodecsv`` module, as it's no longer used. + 0.18.0 (2024-02-15) ------------------- diff --git a/NOTICE b/NOTICE index e5269b81..2b004eb0 100644 --- a/NOTICE +++ b/NOTICE @@ -208,8 +208,6 @@ NLTK License limitations under the License. - - Pattern License =============== @@ -242,68 +240,3 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - -translate.py License -==================== - -"THE BEER-WARE LICENSE" (Revision 42): - wrote this file. As long as you retain this notice you -can do whatever you want with this stuff. If we meet some day, and you think -this stuff is worth it, you can buy me a beer in return to Terry Yin. - - - -unicodecsv License -================== - -Copyright 2010 Jeremy Dunck. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are -permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this list of - conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, this list - of conditions and the following disclaimer in the documentation and/or other materials - provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY JEREMY DUNCK ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JEREMY DUNCK OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -The views and conclusions contained in the software and documentation are those of the -authors and should not be interpreted as representing official policies, either expressed -or implied, of Jeremy Dunck. - - - -six License -=========== - -Copyright (c) 2010-2013 Benjamin Peterson - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.rst b/README.rst index a4a07e21..cdef58d2 100644 --- a/README.rst +++ b/README.rst @@ -13,7 +13,7 @@ TextBlob: Simplified Text Processing Homepage: `https://textblob.readthedocs.io/ `_ -`TextBlob` is a Python library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, translation, and more. +`TextBlob` is a Python library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, and more. .. code-block:: python diff --git a/docs/index.rst b/docs/index.rst index b4c64479..72a39f51 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,7 @@ TextBlob: Simplified Text Processing Release v\ |version|. (:ref:`Changelog`) -*TextBlob* is a Python library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, translation, and more. +*TextBlob* is a Python library for processing textual data. It provides a simple API for diving into common natural language processing (NLP) tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, and more. .. code-block:: python diff --git a/pyproject.toml b/pyproject.toml index e8abdb35..25aca859 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,6 @@ exclude = [ "venv", # Vendorized code "src/textblob/en", - "src/textblob/unicodecsv", "src/textblob/_text.py", ] diff --git a/src/textblob/blob.py b/src/textblob/blob.py index 4b2b3a77..0bea3325 100644 --- a/src/textblob/blob.py +++ b/src/textblob/blob.py @@ -67,7 +67,7 @@ def _penn_to_wordnet(tag): class Word(str): """A simple word representation. Includes methods for inflection, - translation, and WordNet integration. + and WordNet integration. """ def __new__(cls, string, pos_tag=None): diff --git a/src/textblob/unicodecsv/__init__.py b/src/textblob/unicodecsv/__init__.py deleted file mode 100644 index b32470f3..00000000 --- a/src/textblob/unicodecsv/__init__.py +++ /dev/null @@ -1,249 +0,0 @@ -import csv - - -# http://semver.org/ -VERSION = (0, 9, 4) -__version__ = ".".join(map(str, VERSION)) - -pass_throughs = [ - "register_dialect", - "unregister_dialect", - "get_dialect", - "list_dialects", - "field_size_limit", - "Dialect", - "excel", - "excel_tab", - "Sniffer", - "QUOTE_ALL", - "QUOTE_MINIMAL", - "QUOTE_NONNUMERIC", - "QUOTE_NONE", - "Error", -] -__all__ = [ - "reader", - "writer", - "DictReader", - "DictWriter", -] + pass_throughs - -for prop in pass_throughs: - globals()[prop] = getattr(csv, prop) - - -def _stringify(s, encoding, errors): - if s is None: - return "" - if isinstance(s, unicode): - return s.encode(encoding, errors) - elif isinstance(s, (int, float)): - pass # let csv.QUOTE_NONNUMERIC do its thing. - elif not isinstance(s, str): - s = str(s) - return s - - -def _stringify_list(l, encoding, errors="strict"): - try: - return [_stringify(s, encoding, errors) for s in iter(l)] - except TypeError as e: - raise csv.Error(str(e)) - - -def _unicodify(s, encoding): - if s is None: - return None - if isinstance(s, (unicode, int, float)): - return s - elif isinstance(s, str): - return s.decode(encoding) - return s - - -class UnicodeWriter: - """ - >>> import unicodecsv - >>> from cStringIO import StringIO - >>> f = StringIO() - >>> w = unicodecsv.writer(f, encoding='utf-8') - >>> w.writerow((u'é', u'ñ')) - >>> f.seek(0) - >>> r = unicodecsv.reader(f, encoding='utf-8') - >>> row = r.next() - >>> row[0] == u'é' - True - >>> row[1] == u'ñ' - True - """ - - def __init__( - self, f, dialect=csv.excel, encoding="utf-8", errors="strict", *args, **kwds - ): - self.encoding = encoding - self.writer = csv.writer(f, dialect, *args, **kwds) - self.encoding_errors = errors - - def writerow(self, row): - self.writer.writerow(_stringify_list(row, self.encoding, self.encoding_errors)) - - def writerows(self, rows): - for row in rows: - self.writerow(row) - - @property - def dialect(self): - return self.writer.dialect - - -writer = UnicodeWriter - - -class UnicodeReader: - def __init__(self, f, dialect=None, encoding="utf-8", errors="strict", **kwds): - format_params = [ - "delimiter", - "doublequote", - "escapechar", - "lineterminator", - "quotechar", - "quoting", - "skipinitialspace", - ] - if dialect is None: - if not any([kwd_name in format_params for kwd_name in kwds.keys()]): - dialect = csv.excel - self.reader = csv.reader(f, dialect, **kwds) - self.encoding = encoding - self.encoding_errors = errors - - def next(self): - row = self.reader.next() - encoding = self.encoding - encoding_errors = self.encoding_errors - float_ = float - unicode_ = unicode - return [ - ( - value - if isinstance(value, float_) - else unicode_(value, encoding, encoding_errors) - ) - for value in row - ] - - def __iter__(self): - return self - - @property - def dialect(self): - return self.reader.dialect - - @property - def line_num(self): - return self.reader.line_num - - -reader = UnicodeReader - - -class DictWriter(csv.DictWriter): - """ - >>> from cStringIO import StringIO - >>> f = StringIO() - >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î') - >>> w.writerow({'a':'1', u'ñ':'2'}) - >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'}) - >>> w.writerow({'a':u'é', u'ñ':'2'}) - >>> f.seek(0) - >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r') - >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']} - True - >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']} - True - >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']} - True - """ - - def __init__( - self, - csvfile, - fieldnames, - restval="", - extrasaction="raise", - dialect="excel", - encoding="utf-8", - errors="strict", - *args, - **kwds, - ): - self.encoding = encoding - csv.DictWriter.__init__( - self, csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds - ) - self.writer = UnicodeWriter( - csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds - ) - self.encoding_errors = errors - - def writeheader(self): - _stringify_list(self.fieldnames, self.encoding, self.encoding_errors) - header = dict(zip(self.fieldnames, self.fieldnames)) - self.writerow(header) - - -class DictReader(csv.DictReader): - """ - >>> from cStringIO import StringIO - >>> f = StringIO() - >>> w = DictWriter(f, fieldnames=['name', 'place']) - >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'}) - >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'}) - >>> w.writerow({'name': u'Willam ø. Unicoder', 'place': u'éSpandland'}) - >>> f.seek(0) - >>> r = DictReader(f, fieldnames=['name', 'place']) - >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'} - True - >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'} - True - >>> print r.next() == {'name': u'Willam ø. Unicoder', 'place': u'éSpandland'} - True - """ - - def __init__( - self, - csvfile, - fieldnames=None, - restkey=None, - restval=None, - dialect="excel", - encoding="utf-8", - errors="strict", - *args, - **kwds, - ): - if fieldnames is not None: - fieldnames = _stringify_list(fieldnames, encoding) - csv.DictReader.__init__( - self, csvfile, fieldnames, restkey, restval, dialect, *args, **kwds - ) - self.reader = UnicodeReader( - csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds - ) - if fieldnames is None and not hasattr(csv.DictReader, "fieldnames"): - # Python 2.5 fieldnames workaround. (http://bugs.python.org/issue3436) - reader = UnicodeReader(csvfile, dialect, encoding=encoding, *args, **kwds) - self.fieldnames = _stringify_list(reader.next(), reader.encoding) - self.unicode_fieldnames = [_unicodify(f, encoding) for f in self.fieldnames] - self.unicode_restkey = _unicodify(restkey, encoding) - - def next(self): - row = csv.DictReader.next(self) - result = dict( - (uni_key, row[str_key]) - for (str_key, uni_key) in zip(self.fieldnames, self.unicode_fieldnames) - ) - rest = row.get(self.restkey) - if rest: - result[self.unicode_restkey] = rest - return result