diff --git a/chatminer/chatparsers.py b/chatminer/chatparsers.py index 170eae8..8d7b59b 100644 --- a/chatminer/chatparsers.py +++ b/chatminer/chatparsers.py @@ -120,7 +120,7 @@ def _parse_message(self, mess: Any) -> Optional[ParsedMessage]: class SignalParser(Parser): def _read_raw_messages_from_file(self): def _is_new_message(line: str): - regex = r"^\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}\]" + regex = r"^\[\d{4}-\d{2}-\d{2}, \d{2}:\d{2}\]" return re.match(regex, line) with self._file.open(encoding="utf-8") as f: @@ -136,7 +136,8 @@ def _is_new_message(line: str): if buffer: buffer.append(line) buffer.reverse() - self._raw_messages.append(" ".join(buffer)) + joined_buffer = " ".join(buffer) + self._raw_messages.append("".join(joined_buffer.splitlines())) buffer.clear() else: self._raw_messages.append(line) @@ -221,7 +222,7 @@ def _parse_message(self, mess: Dict[str, Any]): self._logger.warning("Skipped message with unknown format: %s", mess) return None - time = dt.datetime.fromtimestamp(mess["timestamp_ms"] / 1000) + time = dt.datetime.utcfromtimestamp(mess["timestamp_ms"] / 1000) author = mess["sender_name"].encode("latin-1").decode("utf-8") body = body.encode("latin-1").decode("utf-8") return ParsedMessage(time, author, body) @@ -263,7 +264,7 @@ def _parse_message(self, mess: Dict[str, Any]): self._logger.warning("Skipped message with unknown format: %s", mess) return None - time = dt.datetime.fromtimestamp(mess["timestamp_ms"] / 1000) + time = dt.datetime.utcfromtimestamp(mess["timestamp_ms"] / 1000) author = mess["sender_name"].encode("latin-1").decode("utf-8") body = body.encode("latin-1").decode("utf-8") return ParsedMessage(time, author, body) @@ -313,7 +314,7 @@ def _parse_message(self, mess: Dict[str, Any]): else: raise ValueError(f"Unable to parse type {type(mess['text'])} in {mess}") - time = dt.datetime.fromtimestamp(int(mess["date_unixtime"])) + time = dt.datetime.utcfromtimestamp(int(mess["date_unixtime"])) author = mess["from"] return ParsedMessage(time, author, body) return None @@ -393,12 +394,8 @@ def _log_resulting_format(self): end = "]" if self.has_brackets else "" if self.is_yearfirst: date1 = "year" - if self.is_dayfirst: - date2 = "day" - date3 = "month" - else: - date2 = "month" - date3 = "day" + date2 = "month" + date3 = "day" elif self.is_dayfirst: date1 = "day" date2 = "month" diff --git a/setup.cfg b/setup.cfg index 44808ae..522bd5b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,7 @@ NLP = [coverage:run] omit = + chatminer/cli.py chatminer/nlp.py chatminer/visualizations.py chatminer/__init__.py \ No newline at end of file diff --git a/test/instagram/target.json b/test/instagram/target.json new file mode 100644 index 0000000..002adbf --- /dev/null +++ b/test/instagram/target.json @@ -0,0 +1,12 @@ +[ + { + "timestamp": "2020-01-17T20:49:00", + "author": "John Doe", + "message": "Hello Jane!" + }, + { + "timestamp": "2019-12-24T11:23:00", + "author": "Jane Doe", + "message": "Hello John!" + } +] \ No newline at end of file diff --git a/test/instagram/test_export.json b/test/instagram/test_export.json new file mode 100644 index 0000000..ffd956a --- /dev/null +++ b/test/instagram/test_export.json @@ -0,0 +1,14 @@ +{ + "messages": [ + { + "sender_name": "John Doe", + "timestamp_ms": 1579294140000, + "content": "Hello Jane!" + }, + { + "sender_name": "Jane Doe", + "timestamp_ms": 1577186580000, + "content": "Hello John!" + } + ] +} \ No newline at end of file diff --git a/test/instagram/testlog.json b/test/instagram/testlog.json deleted file mode 100644 index e9cb44d..0000000 --- a/test/instagram/testlog.json +++ /dev/null @@ -1,141 +0,0 @@ -{ - "participants": [ - { - "name": "John Doe" - }, - { - "name": "J\u00e2\u009d\u00a4\u00ef\u00b8\u008fhn Doe" - }, - { - "name": "Jane Doe" - }, - { - "name": "Lionel Messi" - } - ], - "messages": [ - { - "sender_name": "John Doe", - "timestamp_ms": 1646025943621, - "reactions": [ - { - "reaction": "\u00e2\u009d\u00a4\u00ef\u00b8\u008f", - "actor": "J\u00e2\u009d\u00a4\u00ef\u00b8\u008fhn Doe" - }, - { - "reaction": "\u00f0\u009f\u0092\u0096", - "actor": "Jane Doe" - } - ] - }, - { - "sender_name": "John Doe", - "timestamp_ms": 1635088903000, - "content": "John Doe's poll has multiple updates." - }, - { - "sender_name": "Jane Doe", - "timestamp_ms": 1662849420000, - "content": "This poll is no longer available." - }, - { - "sender_name": "John Doe", - "timestamp_ms": 1577230200000, - "audio_files": [ - { - "uri": "messages/inbox/videocallwiththehomies_2765720812686121/audio/audioclip2765720812686121_27657208126861.mp4", - "creation_timestamp": 1577230200000 - } - ] - }, - { - "sender_name": "Jane Doe", - "timestamp_ms": 1569324600000, - "videos": [ - { - "uri": "messages/inbox/testlog_834308961406351/videos/834308961_834308961406351_834308961406351_n_834308961406351.mp4", - "creation_timestamp": 1569324600000 - } - ] - }, - { - "sender_name": "Lionel Messi", - "timestamp_ms": 1569367800000, - "content": "John Doe sent an attachment.", - "share": { - "link": "https://www.instagram.com/p/CmUv48DLvxd/?feed_type=reshare_chaining", - "share_text": "" - } - }, - { - "sender_name": "John Doe", - "timestamp_ms": 1500232380000, - "photos": [ - { - "uri": "messages/inbox/testlog_5409190972533416/photos/5409190972533416_5409190972533416_5409190972533416_n_5409190972533416.jpg", - "creation_timestamp": 1500232380000 - } - ] - }, - { - "sender_name": "Jane Doe", - "timestamp_ms": 1419440880000, - "content": "Jane Doe joined the group.", - "users": [ - { - "name": "Jane Doe" - } - ] - }, - { - "sender_name": "John Doe", - "timestamp_ms": 1499369100000, - "content": "Testing message with reactions.", - "reactions": [ - { - "reaction": "\u00e2\u009d\u00a4\u00ef\u00b8\u008f", - "actor": "J\u00e2\u009d\u00a4\u00ef\u00b8\u008fhn Doe" - }, - { - "reaction": "\u00f0\u009f\u0092\u0096", - "actor": "Jane Doe" - } - ] - }, - { - "sender_name": "\u00e2\u009d\u00a4\u00ef\u00b8\u008f", - "timestamp_ms": 1579359180000, - "content": "Testing username only emoji: \u00e2\u009d\u00a4\u00ef\u00b8\u008f" - }, - { - "sender_name": "J\u00e2\u009d\u00a4\u00ef\u00b8\u008fhn Doe", - "timestamp_ms": 1579359180000, - "content": "Testing username with emoji: J\u00e2\u009d\u00a4\u00ef\u00b8\u008fhn Doe" - }, - { - "sender_name": "John Doe", - "timestamp_ms": 1579294140000, - "content": "Testing message with emoji: \u00e2\u009d\u00a4\u00ef\u00b8\u008f" - }, - { - "sender_name": "John Doe", - "timestamp_ms": 1577186580000, - "content": "Testing normal message." - } - ], - "title": "Test Log", - "is_still_participant": true, - "thread_type": "RegularGroup", - "thread_path": "inbox/testlog_2765720812686121", - "magic_words": [ - - ], - "image": { - "uri": "messages/photos/968400688_050920249084043_0809974223100900748_n_1485937848424598.jpg", - "creation_timestamp": 1577186580000 - }, - "joinable_mode": { - "mode": 2, - "link": "https://ig.me/j/XNX75a8NCvbJSRg4/" - } -} \ No newline at end of file diff --git a/test/instagram/testlog_target.csv b/test/instagram/testlog_target.csv deleted file mode 100644 index 4408148..0000000 --- a/test/instagram/testlog_target.csv +++ /dev/null @@ -1,12 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2022-02-28 06:25:43.621,John Doe,disappearingmessage,Monday,6,1,19 -2019-12-25 00:30:00.000,John Doe,sentaudio,Wednesday,0,1,9 -2019-09-24 13:30:00.000,Jane Doe,sentvideo,Tuesday,13,1,9 -2019-09-25 01:30:00.000,Lionel Messi,sentshare,Wednesday,1,1,9 -2017-07-16 21:13:00.000,John Doe,sentphoto,Sunday,21,1,9 -2014-12-24 18:08:00.000,Jane Doe,Jane Doe joined the group.,Wednesday,18,5,26 -2017-07-06 21:25:00.000,John Doe,Testing message with reactions.,Thursday,21,4,31 -2020-01-18 15:53:00.000,❤️,Testing username only emoji: ❤️,Saturday,15,5,31 -2020-01-18 15:53:00.000,J❤️hn Doe,Testing username with emoji: J❤️hn Doe,Saturday,15,6,38 -2020-01-17 21:49:00.000,John Doe,Testing message with emoji: ❤️,Friday,21,5,30 -2019-12-24 12:23:00.000,John Doe,Testing normal message.,Tuesday,12,3,23 diff --git a/test/signal/target.json b/test/signal/target.json new file mode 100644 index 0000000..ca45339 --- /dev/null +++ b/test/signal/target.json @@ -0,0 +1,12 @@ +[ + { + "timestamp": "2020-01-17T21:49:00", + "author": "John Doe", + "message": "Hello Jane!" + }, + { + "timestamp": "2019-12-24T12:23:00", + "author": "Jane Doe", + "message": "Hello John!" + } +] \ No newline at end of file diff --git a/test/signal/test_export.txt b/test/signal/test_export.txt new file mode 100644 index 0000000..902fff3 --- /dev/null +++ b/test/signal/test_export.txt @@ -0,0 +1,3 @@ +[2019-12-24, 12:23] Jane Doe: Hello John! +[2020-01-17, 21:49] John Doe: Hello +Jane! diff --git a/test/telegram/target.json b/test/telegram/target.json new file mode 100644 index 0000000..127edfd --- /dev/null +++ b/test/telegram/target.json @@ -0,0 +1,12 @@ +[ + { + "timestamp": "2018-12-05T10:39:10", + "author": "Author 1", + "message": "Minimal Message" + }, + { + "timestamp": "2018-12-05T10:49:24", + "author": "Author 2", + "message": "Link: https://telegram.org/" + } +] \ No newline at end of file diff --git a/test/telegram/test_batch_export.json b/test/telegram/test_batch_export.json new file mode 100644 index 0000000..e90fba7 --- /dev/null +++ b/test/telegram/test_batch_export.json @@ -0,0 +1,32 @@ +{ + "chats": { + "list": [ + { + "name": "Chatname", + "messages": [ + { + "type": "message", + "date_unixtime": "1544006350", + "from": "Author 1", + "text": "Minimal Message" + }, + { + "type": "message", + "date_unixtime": "1544006964", + "from": "Author 2", + "text": [ + { + "type": "bold", + "text": "Link:" + }, + { + "type": "link", + "text": "https://telegram.org/" + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/test/telegram/test_single_export.json b/test/telegram/test_single_export.json new file mode 100644 index 0000000..684bf80 --- /dev/null +++ b/test/telegram/test_single_export.json @@ -0,0 +1,25 @@ +{ + "messages": [ + { + "type": "message", + "date_unixtime": "1544006350", + "from": "Author 1", + "text": "Minimal Message" + }, + { + "type": "message", + "date_unixtime": "1544006964", + "from": "Author 2", + "text": [ + { + "type": "bold", + "text": "Link:" + }, + { + "type": "link", + "text": "https://telegram.org/" + } + ] + } + ] +} \ No newline at end of file diff --git a/test/test_instagram.py b/test/test_instagram.py index 852e88e..cf64395 100644 --- a/test/test_instagram.py +++ b/test/test_instagram.py @@ -1,18 +1,9 @@ -import pandas as pd -from pandas.testing import assert_frame_equal - -from chatminer.chatparsers import InstagramJsonParser +from chatminer.chatparsers import InstagramJsonParser, ParsedMessageCollection def test_instagram(): - parser = InstagramJsonParser("test/instagram/testlog.json") + target = ParsedMessageCollection() + target.read_from_json("test/instagram/target.json") + parser = InstagramJsonParser("test/instagram/test_export.json") parser.parse_file() - df_res = parser.parsed_messages.get_df() - df_test = pd.read_csv( - "test/instagram/testlog_target.csv", - parse_dates=["timestamp"], - ) - assert_frame_equal( - df_test[["author", "message", "words", "letters"]], - df_res[["author", "message", "words", "letters"]], - ) + assert parser.parsed_messages == target diff --git a/test/test_signal.py b/test/test_signal.py new file mode 100644 index 0000000..e6a7a70 --- /dev/null +++ b/test/test_signal.py @@ -0,0 +1,9 @@ +from chatminer.chatparsers import ParsedMessageCollection, SignalParser + + +def test_signal(): + target = ParsedMessageCollection() + target.read_from_json("test/signal/target.json") + parser = SignalParser("test/signal/test_export.txt") + parser.parse_file() + assert parser.parsed_messages == target diff --git a/test/test_telegram.py b/test/test_telegram.py new file mode 100644 index 0000000..fa58221 --- /dev/null +++ b/test/test_telegram.py @@ -0,0 +1,17 @@ +from chatminer.chatparsers import ParsedMessageCollection, TelegramJsonParser + + +def test_telegram_single_export(): + target = ParsedMessageCollection() + target.read_from_json("test/telegram/target.json") + parser = TelegramJsonParser("test/telegram/test_single_export.json") + parser.parse_file() + assert parser.parsed_messages == target + + +def test_telegram_batch_export(): + target = ParsedMessageCollection() + target.read_from_json("test/telegram/target.json") + parser = TelegramJsonParser("test/telegram/test_batch_export.json", "Chatname") + parser.parse_file() + assert parser.parsed_messages == target diff --git a/test/test_whatsapp.py b/test/test_whatsapp.py index 68b484c..61e3f20 100644 --- a/test/test_whatsapp.py +++ b/test/test_whatsapp.py @@ -1,39 +1,30 @@ -import pandas as pd -from pandas.testing import assert_frame_equal +from chatminer.chatparsers import ParsedMessageCollection, WhatsAppParser -from chatminer.chatparsers import WhatsAppParser - -def assert_equal_from_file(file): - parser = WhatsAppParser(f"test/whatsapp/test_{file}.txt") +def is_equal_to_target(sourcefile): + targetfile = "test/whatsapp/target.json" + target = ParsedMessageCollection() + target.read_from_json(targetfile) + parser = WhatsAppParser(sourcefile) parser.parse_file() - df_res = parser.parsed_messages.get_df() - df_test = pd.read_csv( - f"test/whatsapp/test_{file}_target.csv", - parse_dates=["timestamp"], - ) - assert_frame_equal(df_test, df_res, check_dtype=False) - - -def test_dateformat1(): - assert_equal_from_file("dateformat1") + return parser.parsed_messages == target -def test_dateformat2(): - assert_equal_from_file("dateformat2") +def test_mmddyy_24hrs(): + assert is_equal_to_target("test/whatsapp/test_mmddyy_24hrs.txt") -def test_dateformat3(): - assert_equal_from_file("dateformat3") +def test_ddmmyy_24hrs(): + assert is_equal_to_target("test/whatsapp/test_ddmmyy_24hrs.txt") -def test_dateformat4(): - assert_equal_from_file("dateformat4") +def test_mmddyyyy_12hrs(): + assert is_equal_to_target("test/whatsapp/test_mmddyyyy_12hrs.txt") -def test_dateformat5(): - assert_equal_from_file("dateformat5") +def test_yyyymmdd_24hrs(): + assert is_equal_to_target("test/whatsapp/test_yyyymmdd_24hrs.txt") -def test_unicode(): - assert_equal_from_file("unicode") +def test_mmddyy_brackets_24hrs(): + assert is_equal_to_target("test/whatsapp/test_[mmddyy]_24hrs.txt") diff --git a/test/whatsapp/target.json b/test/whatsapp/target.json new file mode 100644 index 0000000..2b833d4 --- /dev/null +++ b/test/whatsapp/target.json @@ -0,0 +1,47 @@ +[ + { + "timestamp": "2021-12-30T22:01:00", + "author": "John Doe", + "message": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero." + }, + { + "timestamp": "2021-12-20T12:10:00", + "author": "🤓", + "message": "Lorem ipsum 🤓" + }, + { + "timestamp": "2021-12-10T10:10:00", + "author": "John Doe 🤓", + "message": "Lorem ipsum 🤓" + }, + { + "timestamp": "2020-06-30T09:10:00", + "author": "System", + "message": "You were added" + }, + { + "timestamp": "2020-06-20T00:08:00", + "author": "System", + "message": "+12 345 578 created group \"Groupname\"" + }, + { + "timestamp": "2020-06-10T15:55:00", + "author": "John-John Doe", + "message": "Lorem ipsum : dolor sit amet." + }, + { + "timestamp": "2019-01-30T20:49:00", + "author": "Jahn Doe", + "message": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero." + }, + { + "timestamp": "2019-01-20T11:23:00", + "author": "John Doe", + "message": "" + }, + { + "timestamp": "2019-01-01T11:25:00", + "author": "John Doe", + "message": "Lorem ipsum dolor sit amet." + } +] \ No newline at end of file diff --git a/test/whatsapp/test_[mmddyy]_24hrs.txt b/test/whatsapp/test_[mmddyy]_24hrs.txt new file mode 100644 index 0000000..d315641 --- /dev/null +++ b/test/whatsapp/test_[mmddyy]_24hrs.txt @@ -0,0 +1,11 @@ +[01/01/19 11:25] John Doe: Lorem ipsum dolor sit amet. +[01/20/19 11:23] John Doe:. +[01/30/19 20:49] Jahn Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. +[06/10/20 15:55] John-John Doe: Lorem ipsum : dolor sit amet. +[06/20/20 00:08] +12 345 578 created group "Groupname" +[06/30/20 09:10] You were added +[12/10/21 10:10] John Doe 🤓: Lorem ipsum 🤓 +[12/20/21 12:10] 🤓: Lorem ipsum 🤓 +[12/30/21 22:01] John Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_dateformat1.txt b/test/whatsapp/test_dateformat1.txt deleted file mode 100644 index 74c85a4..0000000 --- a/test/whatsapp/test_dateformat1.txt +++ /dev/null @@ -1,16 +0,0 @@ -12/24/19, 11:23 - John Doe:. -12/24/19, 11:23 - John Doe: Lorem ipsum dolor sit amet. -12/24/19, 11:23 - John Doe: Lorem ipsum : dolor sit amet. -12/26/19, 11:55 - John-John Doe: Lorem ipsum : dolor sit amet. -1/17/20, 20:49 - Jahn Doe: Lorem ipsum dolor sit amet, -consectetur adipiscing elit. Duis libero. -1/18/20, 14:53 - Jahn Doe: Lorem ipsum dolor sit amet, -consectetur adipiscing elit. -Pellentesque non tincidunt erat. -Nullam suscipit diam dolor, non pretium leo semper eu. -Aliquam pellentesque facilisis nunc eu rhoncus. -Cras scelerisque neque. -7/6/17, 19:25 - Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them. Tap to learn more. -12/24/14, 17:08 - +12 345 578 created group "Groupname" -12/24/14, 17:08 - You were added -7/16/17, 19:13 - +12 345 578: Lorem ipsum. \ No newline at end of file diff --git a/test/whatsapp/test_dateformat1_target.csv b/test/whatsapp/test_dateformat1_target.csv deleted file mode 100644 index 345a5b6..0000000 --- a/test/whatsapp/test_dateformat1_target.csv +++ /dev/null @@ -1,11 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2017-07-16 19:13:00,+12 345 578,Lorem ipsum.,Sunday,19,2,12 -2014-12-24 17:08:00,System,You were added,Wednesday,17,3,14 -2014-12-24 17:08:00,System,"+12 345 578 created group ""Groupname""",Wednesday,17,6,37 -2017-07-06 19:25:00,System,"Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them. Tap to learn more.",Thursday,19,25,139 -2020-01-18 14:53:00,Jahn Doe,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque non tincidunt erat. Nullam suscipit diam dolor, non pretium leo semper eu. Aliquam pellentesque facilisis nunc eu rhoncus. Cras scelerisque neque.",Saturday,14,30,216 -2020-01-17 20:49:00,Jahn Doe,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero.",Friday,20,10,69 -2019-12-26 11:55:00,John-John Doe,Lorem ipsum : dolor sit amet.,Thursday,11,6,29 -2019-12-24 11:23:00,John Doe,Lorem ipsum : dolor sit amet.,Tuesday,11,6,29 -2019-12-24 11:23:00,John Doe,Lorem ipsum dolor sit amet.,Tuesday,11,5,27 -2019-12-24 11:23:00,John Doe,,Tuesday,11,2,22 diff --git a/test/whatsapp/test_dateformat2.txt b/test/whatsapp/test_dateformat2.txt deleted file mode 100644 index 4586c35..0000000 --- a/test/whatsapp/test_dateformat2.txt +++ /dev/null @@ -1,5 +0,0 @@ -9/24/2019 11:30 p.m. - John Doe: Lorem ipsum dolor sit amet. -11/27/2019 9:30 p. m. - John Doe: Lorem ipsum dolor sit amet. -11/17/2019 9:34 p. m. - John-John Doe: Lorem ipsum : dolor sit amet. -12/24/2019 11:30 a. m. - Jahn Doe: Lorem ipsum dolor sit amet, -consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_dateformat2_target.csv b/test/whatsapp/test_dateformat2_target.csv deleted file mode 100644 index 1bb346f..0000000 --- a/test/whatsapp/test_dateformat2_target.csv +++ /dev/null @@ -1,5 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2019-12-24 11:30:00,Jahn Doe,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero.",Tuesday,11,10,69 -2019-11-17 21:34:00,John-John Doe,Lorem ipsum : dolor sit amet.,Sunday,21,6,29 -2019-11-27 21:30:00,John Doe,Lorem ipsum dolor sit amet.,Wednesday,21,5,27 -2019-09-24 23:30:00,John Doe,Lorem ipsum dolor sit amet.,Tuesday,23,5,27 diff --git a/test/whatsapp/test_dateformat3.txt b/test/whatsapp/test_dateformat3.txt deleted file mode 100644 index 83ab3b4..0000000 --- a/test/whatsapp/test_dateformat3.txt +++ /dev/null @@ -1,5 +0,0 @@ -2022/12/14, 13:34. - John Doe: Lorem ipsum dolor sit amet. -2022/12/24, 11:33. - John Doe: Lorem ipsum : dolor sit amet. -2022/12/24, 11:35. - John-John Doe: Lorem ipsum : dolor sit amet. -2022/12/21, 17:02. - Jahn Doe: Lorem ipsum dolor sit amet, -consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_dateformat3_target.csv b/test/whatsapp/test_dateformat3_target.csv deleted file mode 100644 index 9109680..0000000 --- a/test/whatsapp/test_dateformat3_target.csv +++ /dev/null @@ -1,5 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2022-12-21 17:02:00,Jahn Doe,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero.",Wednesday,17,10,69 -2022-12-24 11:35:00,John-John Doe,Lorem ipsum : dolor sit amet.,Saturday,11,6,29 -2022-12-24 11:33:00,John Doe,Lorem ipsum : dolor sit amet.,Saturday,11,6,29 -2022-12-14 13:34:00,John Doe,Lorem ipsum dolor sit amet.,Wednesday,13,5,27 diff --git a/test/whatsapp/test_dateformat4.txt b/test/whatsapp/test_dateformat4.txt deleted file mode 100644 index 26f8116..0000000 --- a/test/whatsapp/test_dateformat4.txt +++ /dev/null @@ -1,5 +0,0 @@ -[09/10/22 11:37:00 p.m.] John Doe: Lorem ipsum dolor sit amet. -[10/10/22 12:37:00 p.m.] John Doe: Lorem ipsum dolor sit amet. -[11/10/22 13:37:00 p.m.] John-John Doe: Lorem ipsum : dolor sit amet. -[12/10/22 14:37:00 p.m.] Jahn Doe: Lorem ipsum dolor sit amet, -consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_dateformat4_target.csv b/test/whatsapp/test_dateformat4_target.csv deleted file mode 100644 index 9dc4a39..0000000 --- a/test/whatsapp/test_dateformat4_target.csv +++ /dev/null @@ -1,5 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2022-10-12 14:37:00,Jahn Doe,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero.",Wednesday,14,10,69 -2022-10-11 13:37:00,John-John Doe,Lorem ipsum : dolor sit amet.,Tuesday,13,6,29 -2022-10-10 12:37:00,John Doe,Lorem ipsum dolor sit amet.,Monday,12,5,27 -2022-10-09 23:37:00,John Doe,Lorem ipsum dolor sit amet.,Sunday,23,5,27 diff --git a/test/whatsapp/test_dateformat5.txt b/test/whatsapp/test_dateformat5.txt deleted file mode 100644 index 104aab6..0000000 --- a/test/whatsapp/test_dateformat5.txt +++ /dev/null @@ -1,5 +0,0 @@ -[10.21.21, 15:21:45] John Doe: Lorem ipsum dolor sit amet. -[10.22.21, 15:21:46] John Doe: Lorem ipsum dolor sit amet. -[10.23.21, 15:21:47] John-John Doe: Lorem ipsum : dolor sit amet. -[10.24.21, 15:21:48] Jahn Doe: Lorem ipsum dolor sit amet, -consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_dateformat5_target.csv b/test/whatsapp/test_dateformat5_target.csv deleted file mode 100644 index d755e7c..0000000 --- a/test/whatsapp/test_dateformat5_target.csv +++ /dev/null @@ -1,5 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2021-10-24 15:21:48,Jahn Doe,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis libero.",Sunday,15,10,69 -2021-10-23 15:21:47,John-John Doe,Lorem ipsum : dolor sit amet.,Saturday,15,6,29 -2021-10-22 15:21:46,John Doe,Lorem ipsum dolor sit amet.,Friday,15,5,27 -2021-10-21 15:21:45,John Doe,Lorem ipsum dolor sit amet.,Thursday,15,5,27 diff --git a/test/whatsapp/test_ddmmyy_24hrs.txt b/test/whatsapp/test_ddmmyy_24hrs.txt new file mode 100644 index 0000000..dc4d84c --- /dev/null +++ b/test/whatsapp/test_ddmmyy_24hrs.txt @@ -0,0 +1,11 @@ +01/01/19, 11:25 - John Doe: Lorem ipsum dolor sit amet. +20/01/19, 11:23 - John Doe:. +30/01/19, 20:49 - Jahn Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. +10/06/20, 15:55 - John-John Doe: Lorem ipsum : dolor sit amet. +20/06/20, 00:08 - +12 345 578 created group "Groupname" +30/06/20, 09:10 - You were added +10/12/21, 10:10 - John Doe 🤓: Lorem ipsum 🤓 +20/12/21, 12:10 - 🤓: Lorem ipsum 🤓 +30/12/21, 22:01 - John Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_mmddyy_24hrs.txt b/test/whatsapp/test_mmddyy_24hrs.txt new file mode 100644 index 0000000..043a0da --- /dev/null +++ b/test/whatsapp/test_mmddyy_24hrs.txt @@ -0,0 +1,11 @@ +01/01/19, 11:25 - John Doe: Lorem ipsum dolor sit amet. +01/20/19, 11:23 - John Doe:. +01/30/19, 20:49 - Jahn Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. +06/10/20, 15:55 - John-John Doe: Lorem ipsum : dolor sit amet. +06/20/20, 00:08 - +12 345 578 created group "Groupname" +06/30/20, 09:10 - You were added +12/10/21, 10:10 - John Doe 🤓: Lorem ipsum 🤓 +12/20/21, 12:10 - 🤓: Lorem ipsum 🤓 +12/30/21, 22:01 - John Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_mmddyyyy_12hrs.txt b/test/whatsapp/test_mmddyyyy_12hrs.txt new file mode 100644 index 0000000..57bb953 --- /dev/null +++ b/test/whatsapp/test_mmddyyyy_12hrs.txt @@ -0,0 +1,11 @@ +01/01/2019, 11:25 a. m. - John Doe: Lorem ipsum dolor sit amet. +01/20/2019, 11:23 a. m. - John Doe:. +01/30/2019, 08:49 p. m. - Jahn Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. +06/10/2020, 03:55 p. m. - John-John Doe: Lorem ipsum : dolor sit amet. +06/20/2020, 00:08 a. m. - +12 345 578 created group "Groupname" +06/30/2020, 09:10 a. m. - You were added +12/10/2021, 10:10 a. m. - John Doe 🤓: Lorem ipsum 🤓 +12/20/2021, 12:10 p. m. - 🤓: Lorem ipsum 🤓 +12/30/2021, 10:01 p. m. - John Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. \ No newline at end of file diff --git a/test/whatsapp/test_unicode.txt b/test/whatsapp/test_unicode.txt deleted file mode 100644 index 5d7ff0b..0000000 --- a/test/whatsapp/test_unicode.txt +++ /dev/null @@ -1,3 +0,0 @@ -09/10/22, 10:37 p. m. - John Doe: Testing abnormal unicode -09/10/22, 10:37 p. m. - John Doe 🤓: Testing emoji 🤓 -09/10/22, 10:37 p. m. - 🤓: Testing emoji 🤓 \ No newline at end of file diff --git a/test/whatsapp/test_unicode_target.csv b/test/whatsapp/test_unicode_target.csv deleted file mode 100644 index 44d7e84..0000000 --- a/test/whatsapp/test_unicode_target.csv +++ /dev/null @@ -1,4 +0,0 @@ -timestamp,author,message,weekday,hour,words,letters -2022-10-09 22:37:00,🤓,Testing emoji 🤓,Sunday,22,3,15 -2022-10-09 22:37:00,John Doe 🤓,Testing emoji 🤓,Sunday,22,3,15 -2022-10-09 22:37:00,John Doe,Testing abnormal unicode,Sunday,22,3,24 diff --git a/test/whatsapp/test_yyyymmdd_24hrs.txt b/test/whatsapp/test_yyyymmdd_24hrs.txt new file mode 100644 index 0000000..cb1a206 --- /dev/null +++ b/test/whatsapp/test_yyyymmdd_24hrs.txt @@ -0,0 +1,11 @@ +2019/01/01, 11:25 - John Doe: Lorem ipsum dolor sit amet. +2019/01/20, 11:23 - John Doe:. +2019/01/30, 20:49 - Jahn Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. +2020/06/10, 15:55 - John-John Doe: Lorem ipsum : dolor sit amet. +2020/06/20, 00:08 - +12 345 578 created group "Groupname" +2020/06/30, 09:10 - You were added +2021/12/10, 10:10 - John Doe 🤓: Lorem ipsum 🤓 +2021/12/20, 12:10 - 🤓: Lorem ipsum 🤓 +2021/12/30, 22:01 - John Doe: Lorem ipsum dolor sit amet, +consectetur adipiscing elit. Duis libero. \ No newline at end of file