causalNLP · labhinav · Dec 27, 2021 · Dec 31, 2021 · Jan 2, 2022 · Jan 3, 2022
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,12 @@ __pycache__/
 *.py[cod]
 *$py.class
 
+#logs
+*.log
+*.txt
+*lsf.*
+*/results/*
+
 # C extensions
 *.so
 
@@ -175,8 +181,15 @@ fabric.properties
 .DS_Store
 */.DS_Store
 .idea
+*__MACOSX*
 
 # ignore large files
-saved_models/electra/
-saved_models/deberta/
-
+*saved_models*
+*saved_datasets*
+data/multinli*
+*jsonl
+
+#api keys
+data/api_openai.txt
+/codes_for_models/abhinav_experiments/processed_train_df.csv
+/codes_for_models/abhinav_experiments/processed_val_df.csv
diff --git a/README.md b/README.md
@@ -1,14 +1,26 @@
 # Logical Fallacy Detection
-Repo for the paper "[Detecting Logical Fallacies: From Educational Examples to Climate Change News](https://bit.ly/logical-nlp)" (2021) by _Zhijing Jin, Tejas Vaidhya, Xiaoyu Shen, Yiwen Ding, Zhiheng Lyu, Mrinmaya Sachan, Rada Mihalcea, Bernhard Schoelkopf_.
+Repo for the paper "[Logical Fallacy Detection
+](https://arxiv.org/abs/2202.13758)" (2022) by _Abhinav Lalwani, Zhijing Jin, Tejas Vaidhya, Xiaoyu Shen, Yiwen Ding, Zhiheng Lyu, Mrinmaya Sachan, Rada Mihalcea, Bernhard Schoelkopf_.
 
 To cite the paper:
 ```bibtex
 
-@article{jin2021detecting,
-    title     = {Detecting Logical Fallacies: {F}rom Educational Examples to Climate Change News},
-    author = {Zhijing Jin and Tejas Vaidhya and Xiaoyu Shen and Yiwen Ding and Zhiheng Lyu and Mrinmaya Sachan and Rada Mihalcea and Bernhard Schoelkopf},
-    year = {2021},
-    url = {https://bit.ly/logical-nlp},
+@misc{https://doi.org/10.48550/arxiv.2202.13758,
+  doi = {10.48550/ARXIV.2202.13758},
+
+  url = {https://arxiv.org/abs/2202.13758},
+
+  author = {Jin, Zhijing and Lalwani, Abhinav and Vaidhya, Tejas and Shen, Xiaoyu and Ding, Yiwen and Lyu, Zhiheng and Sachan, Mrinmaya and Mihalcea, Rada and Schölkopf, Bernhard},
+
+  keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI), Computers and Society (cs.CY), Machine Learning (cs.LG), Logic in Computer Science (cs.LO), FOS: Computer and information sciences, FOS: Computer and information sciences},
+
+  title = {Logical Fallacy Detection},
+
+  publisher = {arXiv},
+
+  year = {2022},
+
+  copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}
 }
 ```
 

diff --git a/codes_for_analysis/visualization/blue_score.ipynb b/codes_for_analysis/visualization/blue_score.ipynb
@@ -0,0 +1,28 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/codes_for_models/experiments_round2/analyze_datasets.py b/codes_for_models/experiments_round2/analyze_datasets.py
@@ -0,0 +1,33 @@
+import pandas as pd
+import stanza
+from logicedu import get_logger
+
+nlp = stanza.Pipeline(lang='en', processors='tokenize', use_gpu=True)
+
+
+def get_stats(df):
+    sent_count = 0
+    word_count = 0
+    words = set()
+    i = 0
+    for text in df['text']:
+        if isinstance(text, float):
+            continue
+        doc = nlp(text)
+        logger.warn(i)
+        i += 1
+        sent_count += len(doc.sentences)
+        for sent in doc.sentences:
+            word_count += len(sent.words)
+            for word in sent.words:
+                words.add(word.text)
+    return [len(df), sent_count, word_count, len(words)]
+
+
+logger = get_logger(level='WARN')
+train_df = pd.read_csv('~/PycharmProjects/kialoscraping/data/kialo_train.csv')
+dev_df = pd.read_csv('~/PycharmProjects/kialoscraping/data/kialo_dev.csv')
+test_df = pd.read_csv('~/PycharmProjects/kialoscraping/data/kialo_test.csv')
+all_df = pd.concat([train_df, dev_df, test_df])
+results = [get_stats(train_df), get_stats(dev_df), get_stats(test_df), get_stats(all_df)]
+print(results)
diff --git a/codes_for_models/experiments_round2/class_distribution.py b/codes_for_models/experiments_round2/class_distribution.py
@@ -0,0 +1,9 @@
+import pandas as pd
+
+train_df = pd.read_csv('../../data/climate_train.csv')
+dev_df = pd.read_csv('../../data/climate_dev.csv')
+test_df = pd.read_csv('../../data/climate_test.csv')
+all_df = pd.concat([train_df, dev_df, test_df])
+counts = all_df['logical_fallacies'].value_counts()
+for count in counts.iteritems():
+    print("%s & %.2f \\%% \\\\" % (count[0].title(), count[1]/len(all_df)*100))
diff --git a/codes_for_models/experiments_round2/classwise_electra.csv b/codes_for_models/experiments_round2/classwise_electra.csv
@@ -0,0 +1,16 @@
+,Fallacy Name,Precision,Recall,F1,Number of Positive Labels for this Class in Test Set
+0,fallacy of logic,0.4642857142857143,0.7647058823529411,0.5777777777777777,17
+1,fallacy of relevance,0.7,0.3181818181818182,0.4375,22
+2,appeal to normality,0.0,0.0,0.0,0
+3,intentional,0.45454545454545453,0.45454545454545453,0.45454545454545453,11
+4,faulty generalization,0.3220338983050847,0.9193548387096774,0.4769874476987448,62
+5,miscellaneous,0.0,0.0,0.0,2
+6,ad hominem,0.3333333333333333,0.925,0.490066225165563,40
+7,fallacy of extension,0.45454545454545453,0.4,0.4255319148936171,25
+8,circular reasoning,0.5,0.6875,0.5789473684210527,16
+9,appeal to emotion,0.3157894736842105,0.6666666666666666,0.42857142857142855,18
+10,false causality,0.28,0.7368421052631579,0.40579710144927544,19
+11,false dilemma,0.6842105263157895,1.0,0.8125000000000001,13
+12,fallacy of credibility,0.5294117647058824,0.5294117647058824,0.5294117647058824,17
+13,ad populum,0.5178571428571429,0.90625,0.6590909090909092,32
+14,equivocation,1.0,0.16666666666666666,0.2857142857142857,6