Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve test coverage #123

Merged
merged 16 commits into from
Dec 3, 2023
Merged
19 changes: 8 additions & 11 deletions chatminer/chatparsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _parse_message(self, mess: Any) -> Optional[ParsedMessage]:
class SignalParser(Parser):
def _read_raw_messages_from_file(self):
def _is_new_message(line: str):
regex = r"^\[\d{4}-\d{2}-\d{2} \d{2}:\d{2}\]"
regex = r"^\[\d{4}-\d{2}-\d{2}, \d{2}:\d{2}\]"
return re.match(regex, line)

with self._file.open(encoding="utf-8") as f:
Expand All @@ -136,7 +136,8 @@ def _is_new_message(line: str):
if buffer:
buffer.append(line)
buffer.reverse()
self._raw_messages.append(" ".join(buffer))
joined_buffer = " ".join(buffer)
self._raw_messages.append("".join(joined_buffer.splitlines()))
buffer.clear()
else:
self._raw_messages.append(line)
Expand Down Expand Up @@ -221,7 +222,7 @@ def _parse_message(self, mess: Dict[str, Any]):
self._logger.warning("Skipped message with unknown format: %s", mess)
return None

time = dt.datetime.fromtimestamp(mess["timestamp_ms"] / 1000)
time = dt.datetime.utcfromtimestamp(mess["timestamp_ms"] / 1000)
author = mess["sender_name"].encode("latin-1").decode("utf-8")
body = body.encode("latin-1").decode("utf-8")
return ParsedMessage(time, author, body)
Expand Down Expand Up @@ -263,7 +264,7 @@ def _parse_message(self, mess: Dict[str, Any]):
self._logger.warning("Skipped message with unknown format: %s", mess)
return None

time = dt.datetime.fromtimestamp(mess["timestamp_ms"] / 1000)
time = dt.datetime.utcfromtimestamp(mess["timestamp_ms"] / 1000)
author = mess["sender_name"].encode("latin-1").decode("utf-8")
body = body.encode("latin-1").decode("utf-8")
return ParsedMessage(time, author, body)
Expand Down Expand Up @@ -313,7 +314,7 @@ def _parse_message(self, mess: Dict[str, Any]):
else:
raise ValueError(f"Unable to parse type {type(mess['text'])} in {mess}")

time = dt.datetime.fromtimestamp(int(mess["date_unixtime"]))
time = dt.datetime.utcfromtimestamp(int(mess["date_unixtime"]))
author = mess["from"]
return ParsedMessage(time, author, body)
return None
Expand Down Expand Up @@ -393,12 +394,8 @@ def _log_resulting_format(self):
end = "]" if self.has_brackets else ""
if self.is_yearfirst:
date1 = "year"
if self.is_dayfirst:
date2 = "day"
date3 = "month"
else:
date2 = "month"
date3 = "day"
date2 = "month"
date3 = "day"
elif self.is_dayfirst:
date1 = "day"
date2 = "month"
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ NLP =

[coverage:run]
omit =
chatminer/cli.py
chatminer/nlp.py
chatminer/visualizations.py
chatminer/__init__.py
12 changes: 12 additions & 0 deletions test/instagram/target.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"timestamp": "2020-01-17T20:49:00",
"author": "John Doe",
"message": "Hello Jane!"
},
{
"timestamp": "2019-12-24T11:23:00",
"author": "Jane Doe",
"message": "Hello John!"
}
]
14 changes: 14 additions & 0 deletions test/instagram/test_export.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"messages": [
{
"sender_name": "John Doe",
"timestamp_ms": 1579294140000,
"content": "Hello Jane!"
},
{
"sender_name": "Jane Doe",
"timestamp_ms": 1577186580000,
"content": "Hello John!"
}
]
}
141 changes: 0 additions & 141 deletions test/instagram/testlog.json

This file was deleted.

12 changes: 0 additions & 12 deletions test/instagram/testlog_target.csv

This file was deleted.

12 changes: 12 additions & 0 deletions test/signal/target.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"timestamp": "2020-01-17T21:49:00",
"author": "John Doe",
"message": "Hello Jane!"
},
{
"timestamp": "2019-12-24T12:23:00",
"author": "Jane Doe",
"message": "Hello John!"
}
]
3 changes: 3 additions & 0 deletions test/signal/test_export.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[2019-12-24, 12:23] Jane Doe: Hello John!
[2020-01-17, 21:49] John Doe: Hello
Jane!
12 changes: 12 additions & 0 deletions test/telegram/target.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"timestamp": "2018-12-05T10:39:10",
"author": "Author 1",
"message": "Minimal Message"
},
{
"timestamp": "2018-12-05T10:49:24",
"author": "Author 2",
"message": "Link: https://telegram.org/"
}
]
32 changes: 32 additions & 0 deletions test/telegram/test_batch_export.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"chats": {
"list": [
{
"name": "Chatname",
"messages": [
{
"type": "message",
"date_unixtime": "1544006350",
"from": "Author 1",
"text": "Minimal Message"
},
{
"type": "message",
"date_unixtime": "1544006964",
"from": "Author 2",
"text": [
{
"type": "bold",
"text": "Link:"
},
{
"type": "link",
"text": "https://telegram.org/"
}
]
}
]
}
]
}
}
25 changes: 25 additions & 0 deletions test/telegram/test_single_export.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"messages": [
{
"type": "message",
"date_unixtime": "1544006350",
"from": "Author 1",
"text": "Minimal Message"
},
{
"type": "message",
"date_unixtime": "1544006964",
"from": "Author 2",
"text": [
{
"type": "bold",
"text": "Link:"
},
{
"type": "link",
"text": "https://telegram.org/"
}
]
}
]
}
19 changes: 5 additions & 14 deletions test/test_instagram.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
import pandas as pd
from pandas.testing import assert_frame_equal

from chatminer.chatparsers import InstagramJsonParser
from chatminer.chatparsers import InstagramJsonParser, ParsedMessageCollection


def test_instagram():
parser = InstagramJsonParser("test/instagram/testlog.json")
target = ParsedMessageCollection()
target.read_from_json("test/instagram/target.json")
parser = InstagramJsonParser("test/instagram/test_export.json")
parser.parse_file()
df_res = parser.parsed_messages.get_df()
df_test = pd.read_csv(
"test/instagram/testlog_target.csv",
parse_dates=["timestamp"],
)
assert_frame_equal(
df_test[["author", "message", "words", "letters"]],
df_res[["author", "message", "words", "letters"]],
)
assert parser.parsed_messages == target
9 changes: 9 additions & 0 deletions test/test_signal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from chatminer.chatparsers import ParsedMessageCollection, SignalParser


def test_signal():
target = ParsedMessageCollection()
target.read_from_json("test/signal/target.json")
parser = SignalParser("test/signal/test_export.txt")
parser.parse_file()
assert parser.parsed_messages == target
Loading