diff --git a/README.rst b/README.rst
index 17b0189..ab03983 100644
--- a/README.rst
+++ b/README.rst
@@ -36,6 +36,17 @@ Usage::
If you need different HTML output just subclass and override the ``format_*`` methods.
+"broadcast" mentions and old-style retweets are now available as well:
+
+ >>> from ttp import ttp
+ >>> p = ttp.Parser()
+ >>> result = p.parse(".@eadmundo has added broadcast mentions!")
+ >>> result.broadcast
+ 'eadmundo'
+ >>> result = p.parse("RT @eadmundo, also old-style retweets")
+ >>> result.retweet
+ 'eadmundo'
+
You can also ask for the span tags to be returned for each entity::
>>> p = ttp.Parser(include_spans=True)
diff --git a/ttp/tests.py b/ttp/tests.py
index 39aa5ab..6df8ca0 100644
--- a/ttp/tests.py
+++ b/ttp/tests.py
@@ -509,6 +509,50 @@ def test_username_non_reply(self):
self.assertEqual(result.users, [u'username'])
self.assertEqual(result.reply, None)
+ # Broadcast mentions
+ def test_username_broadcast_mention_at_start(self):
+ result = self.parser.parse(u'.@username')
+ self.assertEqual(result.html, u'.@username')
+ self.assertEqual(result.users, [u'username'])
+ self.assertEqual(result.broadcast, u'username')
+
+ def test_username_broadcast_mention_in_middle(self):
+ result = self.parser.parse(u'something .@username')
+ self.assertEqual(result.html, u'something .@username')
+ self.assertEqual(result.users, [u'username'])
+ self.assertEqual(result.broadcast, u'username')
+
+ # Retweets
+ def test_username_old_style_retweet(self):
+ result = self.parser.parse(u'retweet RT @username something')
+ self.assertEqual(result.html, u'retweet RT @username something')
+ self.assertEqual(result.retweet, u'username')
+
+ def test_username_old_style_retweet_at_beginning(self):
+ result = self.parser.parse(u'RT @username something')
+ self.assertEqual(result.html, u'RT @username something')
+ self.assertEqual(result.retweet, u'username')
+
+ def test_username_quoted_retweet(self):
+ result = self.parser.parse(u'retweet "@username something"')
+ self.assertEqual(result.html, u'retweet "@username something"')
+ self.assertEqual(result.retweet, u'username')
+
+ def test_username_curly_quoted_retweet(self):
+ result = self.parser.parse(u'retweet “@username something”')
+ self.assertEqual(result.html, u'retweet “@username something”')
+ self.assertEqual(result.retweet, u'username')
+
+ def test_username_quoted_retweet_at_beginning(self):
+ result = self.parser.parse(u'"@username something"')
+ self.assertEqual(result.html, u'"@username something"')
+ self.assertEqual(result.retweet, u'username')
+
+ def test_username_curly_quoted_retweet_at_beginning(self):
+ result = self.parser.parse(u'“@username something”')
+ self.assertEqual(result.html, u'“@username something”')
+ self.assertEqual(result.retweet, u'username')
+
# List tests ---------------------------------------------------------------
# --------------------------------------------------------------------------
def test_list_preceeded(self):
diff --git a/ttp/ttp.py b/ttp/ttp.py
index ac7c79e..4d0fd8d 100644
--- a/ttp/ttp.py
+++ b/ttp/ttp.py
@@ -30,6 +30,7 @@
AT_SIGNS = ur'[@\uff20]'
UTF_CHARS = ur'a-z0-9_\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff'
SPACES = ur'[\u0020\u00A0\u1680\u180E\u2002-\u202F\u205F\u2060\u3000]'
+QUOTES = ur'[\u0022\u201C]'
# Lists
LIST_PRE_CHARS = ur'([^a-z0-9_]|^)'
@@ -41,6 +42,8 @@
USERNAME_REGEX = re.compile(ur'\B' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)
REPLY_REGEX = re.compile(ur'^(?:' + SPACES + ur')*' + AT_SIGNS
+ ur'([a-z0-9_]{1,20}).*', re.IGNORECASE)
+BROADCAST_REGEX = re.compile('.' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)
+RETWEET_REGEX = re.compile('(?:RT' + SPACES + '|' + QUOTES + ')' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)
# Hashtags
HASHTAG_EXP = ur'(^|[^0-9A-Z&/]+)(#|\uff03)([0-9A-Z_]*[A-Z_]+[%s]*)' % UTF_CHARS
@@ -86,6 +89,15 @@ class ParseResult(object):
Note: It's generally better to rely on the Tweet JSON/XML in order to
find out if it's a reply or not.
+ - broadcast
+ A string containing the username this tweet was a broadcast mention to
+ (e.g. preceeded by a dot to ensure that mention is broadcast even to
+ non-followers of that username)
+
+ - retweet
+ A string containing the username this tweet was a retweet of (as indicated by
+ either old-style RT or encased in quotes)
+
- lists
A list containing all the valid lists in the Tweet.
Each list item is a tuple in the format (username, listname).
@@ -100,11 +112,13 @@ class ParseResult(object):
'''
- def __init__(self, urls, users, reply, lists, tags, html):
+ def __init__(self, urls, users, reply, broadcast, retweet, lists, tags, html):
self.urls = urls if urls else []
self.users = users if users else []
self.lists = lists if lists else []
self.reply = reply if reply else None
+ self.broadcast = broadcast if broadcast else None
+ self.retweet = retweet if retweet else None
self.tags = tags if tags else []
self.html = html
@@ -127,8 +141,14 @@ def parse(self, text, html=True):
reply = REPLY_REGEX.match(text)
reply = reply.groups(0)[0] if reply is not None else None
+ broadcast = BROADCAST_REGEX.search(text)
+ broadcast = broadcast.groups(0)[0] if broadcast is not None else None
+
+ retweet = RETWEET_REGEX.search(text)
+ retweet = retweet.groups(0)[0] if retweet is not None else None
+
parsed_html = self._html(text) if html else self._text(text)
- return ParseResult(self._urls, self._users, reply,
+ return ParseResult(self._urls, self._users, reply, broadcast, retweet,
self._lists, self._tags, parsed_html)
def _text(self, text):