Skip to content

Commit

Permalink
Introduced support for sentiment analysis (#81)
Browse files Browse the repository at this point in the history
* Create nlp.py & add_sentiment(df) function

* Added optional NLP dependencies

* Closes #60 

---------

Co-authored-by: joweich <[email protected]>
  • Loading branch information
bdfsaraiva and joweich authored Jan 28, 2023
1 parent e348a4c commit b1daa4e
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# VScode
.vscode/
46 changes: 46 additions & 0 deletions chatminer/nlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from transformers import pipeline
import pandas as pd


def add_sentiment(df: pd.DataFrame, lang: str = "en") -> pd.DataFrame:
"""
Add sentiment column to the input dataframe
Parameters:
df (pd.DataFrame): The input dataframe
lang (str): Language of the messages, defaults to "en"
Returns:
pd.DataFrame: The input dataframe with an additional column "sentiment"
"""
if "message" not in df.columns:
raise ValueError("Input dataframe does not contain a 'message' column")

model_path = (
"cardiffnlp/twitter-roberta-base-sentiment-latest"
if lang == "en"
else "cardiffnlp/twitter-xlm-roberta-base-sentiment"
)
sentiment_pipeline = pipeline("sentiment-analysis", model=model_path)

def extract_sentiment(message: str) -> str:
"""
Extract sentiment from message
Parameters:
message (str): The input message
Returns:
str: The sentiment of the message
"""
try:
return sentiment_pipeline(message)[0]["label"]
except:
print(f"Error processing message: {message}")
return None

df["sentiment"] = df["message"].apply(extract_sentiment)

return df
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ numpy
matplotlib
wordcloud
python-dateutil
tqdm
tqdm
transformers
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,7 @@ install_requires =
wordcloud
python-dateutil
tqdm

[options.extras_require]
NLP =
transformers

0 comments on commit b1daa4e

Please sign in to comment.