From a65098e5ca4d1c746554d530c62be5b51753dc72 Mon Sep 17 00:00:00 2001
From: Daniel Botros <dab463@cornell.edu>
Date: Tue, 12 Dec 2023 12:43:18 -0500
Subject: [PATCH 1/4] Add PosNegIrony transformer and demo

---
 .../positive_negative_irony/posNegIrony.py    | 178 ++++++++++
 examples/pos-neg-irony/posNegIrony.ipynb      | 321 ++++++++++++++++++
 2 files changed, 499 insertions(+)
 create mode 100644 convokit/positive_negative_irony/posNegIrony.py
 create mode 100644 examples/pos-neg-irony/posNegIrony.ipynb

diff --git a/convokit/positive_negative_irony/posNegIrony.py b/convokit/positive_negative_irony/posNegIrony.py
new file mode 100644
index 00000000..989aefcd
--- /dev/null
+++ b/convokit/positive_negative_irony/posNegIrony.py
@@ -0,0 +1,178 @@
+import math
+import nltk
+from convokit.transformer import Transformer, Corpus
+from inspect import signature
+from nltk.sentiment import SentimentIntensityAnalyzer
+nltk.download('vader_lexicon')
+
+class PosNegIronyTransformer(Transformer):
+    """
+    A transformer to label all instances of the token "/s" (ironic utterances)
+    with a score corresponding to whether or not it is positive or negative irony,
+    based the degree of sentiment of the utterance and its replies.
+
+    :param obj_type: type of Corpus object to calculate: 'conversation', 'speaker', or 'utterance', default to be 'utterance'
+    :param input_field: Input fields from every utterance object. Will default to reading 'utt.text'. If a string is provided, than consider metadata with field name input_field.
+    :param output_field: field for writing the computed output in metadata. Will default to write to utterance metadata with name 'capitalization'.
+    :param input_filter: a boolean function of signature `input_filter(utterance, aux_input)`. attributes will only be computed for utterances where `input_filter` returns `True`. By default, will always return `True`, meaning that attributes will be computed for all utterances.
+    :param verbosity: frequency at which to print status messages when computing attributes.
+    """
+
+    def __init__(
+        self,
+        obj_type='utterance',
+        output_field=None,
+        input_field=None,
+        input_filter=filter,
+        verbosity=10000,
+    ):
+        if input_filter:
+            if len(signature(input_filter).parameters) == 1:
+                self.input_filter = lambda utt: input_filter(self, utt)
+            else:
+                self.input_filter = input_filter
+        else:
+            self.input_filter = lambda utt: True
+        self.obj_type = obj_type
+        self.input_field = input_field
+        self.output_field = output_field
+        self.verbosity = verbosity
+        self.sia = SentimentIntensityAnalyzer()
+        self.mean = 0
+        self.sd = 0
+
+    def _print_output(self, i):
+        return (self.verbosity > 0) and (i > 0) and (i % self.verbosity == 0)
+
+
+    def fit(self, corpus: Corpus) -> Corpus:
+      corpus_sent = {}
+      corpus_sent["pos"] = 0
+      corpus_sent["neg"] = 0
+      corpus_sent["neu"] = 0
+      corpus_sent["compound"] = 0
+      l = 0
+      values = []
+
+      whitelist(self, corpus)
+
+      if self.obj_type == 'utterance':
+        for idx, utterance in enumerate(corpus.iter_utterances()):
+          if self._print_output(idx):
+              print(f"%03d {self.obj_type} processed" % (idx))
+
+          if self.input_field is None:
+              text_entry = utterance.text
+          elif isinstance(self.input_field, str):
+              text_entry = utterance.meta(self.input_field)
+          if text_entry is None:
+              continue
+
+          l += 1
+          sentiment = self.sia.polarity_scores(text_entry)
+          corpus_sent["pos"] += sentiment["pos"]
+          corpus_sent["neg"] += sentiment["neg"]
+          corpus_sent["neu"] += sentiment["neu"]
+          corpus_sent["compound"] += sentiment["compound"]
+          values.append(sentiment["compound"])
+
+        corpus_sent = {key: value / l for key, value in corpus_sent.items()}
+        self.mean = corpus_sent["compound"]
+
+        squared_differences = [(x - self.mean) ** 2 for x in values]
+        variance = sum(squared_differences) / (len(values) - 1)
+        standard_deviation = math.sqrt(variance)
+        self.sd = standard_deviation
+
+        return self
+
+    def transform(self, corpus: Corpus) -> Corpus:
+        """
+
+        :param corpus: Corpus
+        :return: the corpus
+        """
+
+        if self.obj_type == 'utterance':
+          total = len(list(corpus.iter_utterances()))
+
+          for idx, utterance in enumerate(corpus.iter_utterances()):
+              if self._print_output(idx):
+                  print(f"%03d/%03d {self.obj_type} processed" % (idx, total))
+
+              if not self.input_filter(self, utterance):
+                  continue
+
+              if self.input_field is None:
+                if "&gt" in utterance.text:
+                  try:
+                    text_entry = utterance.text.split("\n")[1]
+                  except:
+                    text_entry = utterance.text.split(".")[1] 
+                else:
+                  text_entry = utterance.text
+                if " /s " in text_entry:
+                  text_entry = text_entry.split(" \s ")[0]
+                elif "\n/s" in text_entry:
+                  text_entry = text_entry.split("\n/s")[0]
+                else:
+                  text_entry = text_entry
+              elif isinstance(self.input_field, str):
+                  text_entry = utterance.meta(self.input_field)
+              if text_entry is None:
+                  continue
+
+              if " /s " in utterance.text or "\n/s" in utterance.text:
+                sentiment = self.sia.polarity_scores(text_entry)
+                convo = utterance.get_conversation()
+                replies = list(convo.get_subtree(utterance.id).children)
+                acc_sent = 0
+                average_sent = 0
+
+                if len(replies) > 0:
+                  for reply in replies:
+                    reply_sent = self.sia.polarity_scores(reply.utt.text)
+                    acc_sent += reply_sent["compound"]
+                    reply.utt.add_meta("sentiment", reply_sent)
+                  average_sent = acc_sent / len(replies)
+
+                utterance.add_meta("sentiment", sentiment)
+                utterance.add_meta("replies_sentiment", average_sent)
+                agree_score = 0
+
+                if average_sent == 0:
+                  agree_score = 0
+                elif (average_sent <= (self.mean - self.sd*.5) and average_sent >= (self.mean - self.sd*2) and sentiment["compound"] <= (self.mean - self.sd*.5)) or (average_sent >= (self.mean + self.sd*.5) and average_sent <= (self.mean - self.sd*2) and sentiment["compound"] >= (self.mean + self.sd*.5)) or (sentiment["compound"] <= (self.mean - self.sd*.5) and sentiment["compound"] >= (self.mean - self.sd*2) and average_sent <= (self.mean - self.sd*.5)) or (sentiment["compound"] >= (self.mean + self.sd*.5) and sentiment["compound"] <= (self.mean - self.sd*2) and average_sent >= (self.mean + self.sd*.5)):
+                  agree_score = (average_sent + sentiment["compound"])/2
+                elif (average_sent < (self.mean - self.sd*2) and sentiment["compound"] < (self.mean - self.sd*2)) or (average_sent > (self.mean + self.sd*2) and sentiment["compound"] > (self.mean + self.sd*2)):
+                  agree_score = -abs((average_sent + sentiment["compound"])/2)
+                elif (average_sent > (self.mean + self.sd*.5) and sentiment["compound"] < (self.mean - self.sd*.5)) or (average_sent < (self.mean - self.sd*.5) and sentiment["compound"] > (self.mean + self.sd*.5)):
+                  agree_score = (average_sent + -sentiment["compound"])/2
+                else:
+                  agree_score = 0
+
+                utterance.add_meta("agree_score", agree_score)
+        else:
+          raise KeyError('obj_type must be utterance')
+
+
+        if self.verbosity > 0:
+            print(f"%03d/%03d {self.obj_type} processed" % (total, total))
+        return corpus
+    
+def whitelist(self, corpus: Corpus):
+  whitelist = []
+  for convo in corpus.iter_conversations():
+    for utt in convo.iter_utterances():
+      if " /s " in utt.text or "\n/s" in utt.text:
+        whitelist.append(utt.id)
+        convo = utt.get_conversation()
+        replies = list(convo.get_subtree(utt.id).bfs_traversal())
+        for reply in replies:
+          if reply.utt.id != utt.id:
+            whitelist.append(reply.utt.id)
+
+  self.whitelist = whitelist
+
+def filter(self, utt):
+  return utt.id in self.whitelist
\ No newline at end of file
diff --git a/examples/pos-neg-irony/posNegIrony.ipynb b/examples/pos-neg-irony/posNegIrony.ipynb
new file mode 100644
index 00000000..0c6ddd43
--- /dev/null
+++ b/examples/pos-neg-irony/posNegIrony.ipynb
@@ -0,0 +1,321 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "cannot import name 'PosNegIronyTransformer' from 'convokit' (/Users/danielbotros/cs4300-env/lib/python3.7/site-packages/convokit/__init__.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-2-ee3a3e73c3b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Imports\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mconvokit\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCorpus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPosNegIronyTransformer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mImportError\u001b[0m: cannot import name 'PosNegIronyTransformer' from 'convokit' (/Users/danielbotros/cs4300-env/lib/python3.7/site-packages/convokit/__init__.py)"
+     ]
+    }
+   ],
+   "source": [
+    "# Imports\n",
+    "from convokit import Corpus, download, PosNegIronyTransformer\n",
+    "import math\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Comparing Positive Irony In r/Ohio and r/Cleveland, PosNegIronyTransformer Example\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download Corpus', print summary statistics\n",
+    "cleveland = Corpus(filename=download(\"subreddit-Cleveland\"))\n",
+    "ohio = Corpus(filename=download(\"subreddit-Ohio\"))\n",
+    "\n",
+    "cleveland.print_summary_stats()\n",
+    "ohio.print_summary_stats()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# See the number of ironic comments in each subreddit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "ohio_cnt = []\n",
+    "cleveland_cnt = []\n",
+    "\n",
+    "for utt in ohio.iter_utterances():\n",
+    "  if \" /s \" or \"\\n/s\" in utt.text:\n",
+    "    ohio_cnt.append(1)\n",
+    "\n",
+    "for utt in cleveland.iter_utterances():\n",
+    "  if \" /s \" or \"\\n/s\" in utt.text:\n",
+    "    cleveland_cnt.append(1)\n",
+    "\n",
+    "cleveland_cnt = sum(cleveland_cnt)\n",
+    "ohio_cnt = sum(ohio_cnt)\n",
+    "\n",
+    "print(cleveland_cnt)\n",
+    "\n",
+    "labels = ['r/Ohio', 'r/Cleveland']\n",
+    "\n",
+    "plt.bar(labels, [80, 87])\n",
+    "plt.xlabel('Subreddit')\n",
+    "plt.ylabel('Irony Counts')\n",
+    "plt.title('Number of Ironic Comments in each Subreddit')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Fit and transform both Corpus'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = PosNegIronyTransformer(obj_type='utterance')\n",
+    "transformer.fit(cleveland)\n",
+    "transformer.transform(cleveland)\n",
+    "transformer.fit(ohio)\n",
+    "transformer.transform(ohio)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Gather statistics about the average thread upvote score of positive ironic comments between r/Ohio and r/Cleveland"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "avg_ohio_score = 0\n",
+    "pos_irony_ohio_score = 0\n",
+    "\n",
+    "avg_cleveland_score = 0\n",
+    "pos_irony_cleveland_score = 0\n",
+    "\n",
+    "cleveland_size = 136087\n",
+    "ohio_size = 372075\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "avg_cleveland_length = 0\n",
+    "for convo in cleveland.iter_conversations():\n",
+    "  for utt in convo.iter_utterances():\n",
+    "    avg_cleveland_length += 1\n",
+    "    avg_cleveland_score += utt.meta[\"score\"]\n",
+    "\n",
+    "avg_cleveland_score = (avg_cleveland_score / avg_cleveland_length) / math.log(cleveland_size)\n",
+    "\n",
+    "pos_irony_cleveland_length = 0\n",
+    "for convo in cleveland.iter_conversations():\n",
+    "  for utt in convo.iter_utterances():\n",
+    "    if \" /s \" in utt.text or \"\\n/s\" in utt.text:\n",
+    "      if utt.meta[\"agree_score\"] > 0: # Positive irony\n",
+    "        convo = utt.get_conversation()\n",
+    "        replies = list(convo.get_subtree(utt.id).bfs_traversal())\n",
+    "        for reply in replies:\n",
+    "          if reply.utt.id != utt.id:\n",
+    "            pos_irony_cleveland_length += 1\n",
+    "            pos_irony_cleveland_score += utt.meta[\"score\"]\n",
+    "\n",
+    "pos_irony_cleveland_score = (pos_irony_cleveland_score/pos_irony_cleveland_length)/ math.log(cleveland_size) ## Normalize for community size\n",
+    "pos_irony_cleveland_score_adj = pos_irony_cleveland_score - avg_cleveland_score\n",
+    "print(\"Number of comments for all ironic threads\" + str(pos_irony_cleveland_length))\n",
+    "\n",
+    "\n",
+    "\n",
+    "avg_ohio_length = 0\n",
+    "for convo in ohio.iter_conversations():\n",
+    "  for utt in convo.iter_utterances():\n",
+    "    avg_ohio_length += 1\n",
+    "    avg_ohio_score += utt.meta[\"score\"]\n",
+    "\n",
+    "avg_ohio_score = (avg_ohio_score / avg_ohio_length) / math.log(ohio_size)\n",
+    "\n",
+    "pos_irony_ohio_length = 0\n",
+    "for convo in ohio.iter_conversations():\n",
+    "  for utt in convo.iter_utterances():\n",
+    "    if \" /s \" in utt.text or \"\\n/s\" in utt.text:\n",
+    "      if utt.meta[\"agree_score\"] > 0: # Positive irony\n",
+    "        convo = utt.get_conversation()\n",
+    "        replies = list(convo.get_subtree(utt.id).bfs_traversal())\n",
+    "        for reply in replies:\n",
+    "          if reply.utt.id != utt.id:\n",
+    "            pos_irony_ohio_length += 1\n",
+    "            pos_irony_ohio_score += utt.meta[\"score\"]\n",
+    "\n",
+    "pos_irony_ohio_score = (pos_irony_ohio_score/pos_irony_ohio_length)/ math.log(ohio_size) ## Normalize for community size\n",
+    "pos_irony_ohio_score_adj = pos_irony_ohio_score - avg_ohio_score\n",
+    "print(\"Number of comments for all ironic threads\" + str(pos_irony_ohio_length))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(pos_irony_cleveland_score)\n",
+    "print(pos_irony_cleveland_score_adj)\n",
+    "print()\n",
+    "print(pos_irony_ohio_score)\n",
+    "print(pos_irony_ohio_score_adj)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Compare the average score of positive ironic comments in each subreddit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "community = ['r/Ohio', 'r/Cleveland']\n",
+    "values = [pos_irony_ohio_score,pos_irony_cleveland_score]\n",
+    "colors = ['blue', 'orange']\n",
+    "# Create a bar plot\n",
+    "plt.bar(community, values, color=colors)\n",
+    "\n",
+    "# Add labels and title\n",
+    "plt.xlabel('Community')\n",
+    "plt.ylabel('Average comment score of positive ironic comments')\n",
+    "\n",
+    "threshold1 = avg_ohio_score\n",
+    "threshold2 = avg_cleveland_score\n",
+    "plt.axhline(y=threshold1, color='blue', linestyle='--')\n",
+    "plt.axhline(y=threshold2, color='orange', linestyle='--')\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Compare the average score of positive ironic comments in each subreddit, adjusted for the community mean"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "community = ['r/Ohio', 'r/Cleveland']\n",
+    "values = [pos_irony_ohio_score_adj,pos_irony_cleveland_score_adj]\n",
+    "colors = ['blue', 'orange']\n",
+    "# Create a bar plot\n",
+    "plt.bar(community, values, color=colors)\n",
+    "\n",
+    "# Add labels and title\n",
+    "plt.xlabel('Community')\n",
+    "plt.ylabel('Difference from community mean of positive irony')\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Examine distribution of positive, negative, and neutral / unlabelled irony in both subreddits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pos = 0\n",
+    "neg = 0\n",
+    "neu = 0\n",
+    "\n",
+    "for utt in cleveland.iter_utterances():\n",
+    "  if \" /s \" in utt.text or \"\\n/s\" in utt.text:\n",
+    "    if utt.meta[\"agree_score\"] < 0:\n",
+    "      neg += 1\n",
+    "    elif utt.meta[\"agree_score\"] > 0:\n",
+    "      pos += 1\n",
+    "    else:\n",
+    "      neu += 1\n",
+    "\n",
+    "for utt in ohio.iter_utterances():\n",
+    "  if \" /s \" in utt.text or \"\\n/s\" in utt.text:\n",
+    "    if utt.meta[\"agree_score\"] < 0:\n",
+    "      neg += 1\n",
+    "    elif utt.meta[\"agree_score\"] > 0:\n",
+    "      pos += 1\n",
+    "    else:\n",
+    "      neu += 1\n",
+    "\n",
+    "labels = ['Neg', 'Neu', 'Pos']\n",
+    "plt.bar(labels, [neg, neu, pos])\n",
+    "plt.xlabel('Irony type')\n",
+    "plt.ylabel('Count')\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cs4300-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 80518be34098cca19b07b128398ee80262112ea1 Mon Sep 17 00:00:00 2001
From: Daniel Botros <dab463@cornell.edu>
Date: Tue, 12 Dec 2023 12:52:08 -0500
Subject: [PATCH 2/4] Minor quality changes / fixes

---
 convokit/positive_negative_irony/posNegIrony.py | 2 +-
 examples/pos-neg-irony/posNegIrony.ipynb        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/convokit/positive_negative_irony/posNegIrony.py b/convokit/positive_negative_irony/posNegIrony.py
index 989aefcd..0a500e4d 100644
--- a/convokit/positive_negative_irony/posNegIrony.py
+++ b/convokit/positive_negative_irony/posNegIrony.py
@@ -28,7 +28,7 @@ def __init__(
     ):
         if input_filter:
             if len(signature(input_filter).parameters) == 1:
-                self.input_filter = lambda utt: input_filter(self, utt)
+                self.input_filter = lambda utt: input_filter(utt)
             else:
                 self.input_filter = input_filter
         else:
diff --git a/examples/pos-neg-irony/posNegIrony.ipynb b/examples/pos-neg-irony/posNegIrony.ipynb
index 0c6ddd43..792950b2 100644
--- a/examples/pos-neg-irony/posNegIrony.ipynb
+++ b/examples/pos-neg-irony/posNegIrony.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -12,7 +12,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-2-ee3a3e73c3b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Imports\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mconvokit\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCorpus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPosNegIronyTransformer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-1-ee3a3e73c3b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Imports\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mconvokit\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCorpus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPosNegIronyTransformer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mImportError\u001b[0m: cannot import name 'PosNegIronyTransformer' from 'convokit' (/Users/danielbotros/cs4300-env/lib/python3.7/site-packages/convokit/__init__.py)"
      ]
     }

From 3f14a44064851548b58a7f6ec3067dbae98a352e Mon Sep 17 00:00:00 2001
From: Daniel Botros <dab463@cornell.edu>
Date: Tue, 12 Dec 2023 12:53:02 -0500
Subject: [PATCH 3/4] Clearing demo output

---
 examples/pos-neg-irony/posNegIrony.ipynb | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/examples/pos-neg-irony/posNegIrony.ipynb b/examples/pos-neg-irony/posNegIrony.ipynb
index 792950b2..885315aa 100644
--- a/examples/pos-neg-irony/posNegIrony.ipynb
+++ b/examples/pos-neg-irony/posNegIrony.ipynb
@@ -2,21 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ImportError",
-     "evalue": "cannot import name 'PosNegIronyTransformer' from 'convokit' (/Users/danielbotros/cs4300-env/lib/python3.7/site-packages/convokit/__init__.py)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-1-ee3a3e73c3b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Imports\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mconvokit\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mCorpus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPosNegIronyTransformer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mImportError\u001b[0m: cannot import name 'PosNegIronyTransformer' from 'convokit' (/Users/danielbotros/cs4300-env/lib/python3.7/site-packages/convokit/__init__.py)"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Imports\n",
     "from convokit import Corpus, download, PosNegIronyTransformer\n",

From b15eab3ebabe21232ee52cc87ae78f5544903f0c Mon Sep 17 00:00:00 2001
From: Daniel Botros <dab463@cornell.edu>
Date: Mon, 18 Dec 2023 16:13:04 -0500
Subject: [PATCH 4/4] Use black formatter

---
 .../positive_negative_irony/posNegIrony.py    | 269 ++++++++++--------
 1 file changed, 152 insertions(+), 117 deletions(-)

diff --git a/convokit/positive_negative_irony/posNegIrony.py b/convokit/positive_negative_irony/posNegIrony.py
index 0a500e4d..1c9ea290 100644
--- a/convokit/positive_negative_irony/posNegIrony.py
+++ b/convokit/positive_negative_irony/posNegIrony.py
@@ -3,7 +3,9 @@
 from convokit.transformer import Transformer, Corpus
 from inspect import signature
 from nltk.sentiment import SentimentIntensityAnalyzer
-nltk.download('vader_lexicon')
+
+nltk.download("vader_lexicon")
+
 
 class PosNegIronyTransformer(Transformer):
     """
@@ -20,7 +22,7 @@ class PosNegIronyTransformer(Transformer):
 
     def __init__(
         self,
-        obj_type='utterance',
+        obj_type="utterance",
         output_field=None,
         input_field=None,
         input_filter=filter,
@@ -44,47 +46,46 @@ def __init__(
     def _print_output(self, i):
         return (self.verbosity > 0) and (i > 0) and (i % self.verbosity == 0)
 
-
     def fit(self, corpus: Corpus) -> Corpus:
-      corpus_sent = {}
-      corpus_sent["pos"] = 0
-      corpus_sent["neg"] = 0
-      corpus_sent["neu"] = 0
-      corpus_sent["compound"] = 0
-      l = 0
-      values = []
-
-      whitelist(self, corpus)
-
-      if self.obj_type == 'utterance':
-        for idx, utterance in enumerate(corpus.iter_utterances()):
-          if self._print_output(idx):
-              print(f"%03d {self.obj_type} processed" % (idx))
-
-          if self.input_field is None:
-              text_entry = utterance.text
-          elif isinstance(self.input_field, str):
-              text_entry = utterance.meta(self.input_field)
-          if text_entry is None:
-              continue
-
-          l += 1
-          sentiment = self.sia.polarity_scores(text_entry)
-          corpus_sent["pos"] += sentiment["pos"]
-          corpus_sent["neg"] += sentiment["neg"]
-          corpus_sent["neu"] += sentiment["neu"]
-          corpus_sent["compound"] += sentiment["compound"]
-          values.append(sentiment["compound"])
-
-        corpus_sent = {key: value / l for key, value in corpus_sent.items()}
-        self.mean = corpus_sent["compound"]
-
-        squared_differences = [(x - self.mean) ** 2 for x in values]
-        variance = sum(squared_differences) / (len(values) - 1)
-        standard_deviation = math.sqrt(variance)
-        self.sd = standard_deviation
-
-        return self
+        corpus_sent = {}
+        corpus_sent["pos"] = 0
+        corpus_sent["neg"] = 0
+        corpus_sent["neu"] = 0
+        corpus_sent["compound"] = 0
+        l = 0
+        values = []
+
+        whitelist(self, corpus)
+
+        if self.obj_type == "utterance":
+            for idx, utterance in enumerate(corpus.iter_utterances()):
+                if self._print_output(idx):
+                    print(f"%03d {self.obj_type} processed" % (idx))
+
+                if self.input_field is None:
+                    text_entry = utterance.text
+                elif isinstance(self.input_field, str):
+                    text_entry = utterance.meta(self.input_field)
+                if text_entry is None:
+                    continue
+
+                l += 1
+                sentiment = self.sia.polarity_scores(text_entry)
+                corpus_sent["pos"] += sentiment["pos"]
+                corpus_sent["neg"] += sentiment["neg"]
+                corpus_sent["neu"] += sentiment["neu"]
+                corpus_sent["compound"] += sentiment["compound"]
+                values.append(sentiment["compound"])
+
+            corpus_sent = {key: value / l for key, value in corpus_sent.items()}
+            self.mean = corpus_sent["compound"]
+
+            squared_differences = [(x - self.mean) ** 2 for x in values]
+            variance = sum(squared_differences) / (len(values) - 1)
+            standard_deviation = math.sqrt(variance)
+            self.sd = standard_deviation
+
+            return self
 
     def transform(self, corpus: Corpus) -> Corpus:
         """
@@ -93,86 +94,120 @@ def transform(self, corpus: Corpus) -> Corpus:
         :return: the corpus
         """
 
-        if self.obj_type == 'utterance':
-          total = len(list(corpus.iter_utterances()))
-
-          for idx, utterance in enumerate(corpus.iter_utterances()):
-              if self._print_output(idx):
-                  print(f"%03d/%03d {self.obj_type} processed" % (idx, total))
-
-              if not self.input_filter(self, utterance):
-                  continue
-
-              if self.input_field is None:
-                if "&gt" in utterance.text:
-                  try:
-                    text_entry = utterance.text.split("\n")[1]
-                  except:
-                    text_entry = utterance.text.split(".")[1] 
-                else:
-                  text_entry = utterance.text
-                if " /s " in text_entry:
-                  text_entry = text_entry.split(" \s ")[0]
-                elif "\n/s" in text_entry:
-                  text_entry = text_entry.split("\n/s")[0]
-                else:
-                  text_entry = text_entry
-              elif isinstance(self.input_field, str):
-                  text_entry = utterance.meta(self.input_field)
-              if text_entry is None:
-                  continue
-
-              if " /s " in utterance.text or "\n/s" in utterance.text:
-                sentiment = self.sia.polarity_scores(text_entry)
-                convo = utterance.get_conversation()
-                replies = list(convo.get_subtree(utterance.id).children)
-                acc_sent = 0
-                average_sent = 0
-
-                if len(replies) > 0:
-                  for reply in replies:
-                    reply_sent = self.sia.polarity_scores(reply.utt.text)
-                    acc_sent += reply_sent["compound"]
-                    reply.utt.add_meta("sentiment", reply_sent)
-                  average_sent = acc_sent / len(replies)
-
-                utterance.add_meta("sentiment", sentiment)
-                utterance.add_meta("replies_sentiment", average_sent)
-                agree_score = 0
-
-                if average_sent == 0:
-                  agree_score = 0
-                elif (average_sent <= (self.mean - self.sd*.5) and average_sent >= (self.mean - self.sd*2) and sentiment["compound"] <= (self.mean - self.sd*.5)) or (average_sent >= (self.mean + self.sd*.5) and average_sent <= (self.mean - self.sd*2) and sentiment["compound"] >= (self.mean + self.sd*.5)) or (sentiment["compound"] <= (self.mean - self.sd*.5) and sentiment["compound"] >= (self.mean - self.sd*2) and average_sent <= (self.mean - self.sd*.5)) or (sentiment["compound"] >= (self.mean + self.sd*.5) and sentiment["compound"] <= (self.mean - self.sd*2) and average_sent >= (self.mean + self.sd*.5)):
-                  agree_score = (average_sent + sentiment["compound"])/2
-                elif (average_sent < (self.mean - self.sd*2) and sentiment["compound"] < (self.mean - self.sd*2)) or (average_sent > (self.mean + self.sd*2) and sentiment["compound"] > (self.mean + self.sd*2)):
-                  agree_score = -abs((average_sent + sentiment["compound"])/2)
-                elif (average_sent > (self.mean + self.sd*.5) and sentiment["compound"] < (self.mean - self.sd*.5)) or (average_sent < (self.mean - self.sd*.5) and sentiment["compound"] > (self.mean + self.sd*.5)):
-                  agree_score = (average_sent + -sentiment["compound"])/2
-                else:
-                  agree_score = 0
-
-                utterance.add_meta("agree_score", agree_score)
+        if self.obj_type == "utterance":
+            total = len(list(corpus.iter_utterances()))
+
+            for idx, utterance in enumerate(corpus.iter_utterances()):
+                if self._print_output(idx):
+                    print(f"%03d/%03d {self.obj_type} processed" % (idx, total))
+
+                if not self.input_filter(self, utterance):
+                    continue
+
+                if self.input_field is None:
+                    if "&gt" in utterance.text:
+                        try:
+                            text_entry = utterance.text.split("\n")[1]
+                        except:
+                            text_entry = utterance.text.split(".")[1]
+                    else:
+                        text_entry = utterance.text
+                    if " /s " in text_entry:
+                        text_entry = text_entry.split(" \s ")[0]
+                    elif "\n/s" in text_entry:
+                        text_entry = text_entry.split("\n/s")[0]
+                    else:
+                        text_entry = text_entry
+                elif isinstance(self.input_field, str):
+                    text_entry = utterance.meta(self.input_field)
+                if text_entry is None:
+                    continue
+
+                if " /s " in utterance.text or "\n/s" in utterance.text:
+                    sentiment = self.sia.polarity_scores(text_entry)
+                    convo = utterance.get_conversation()
+                    replies = list(convo.get_subtree(utterance.id).children)
+                    acc_sent = 0
+                    average_sent = 0
+
+                    if len(replies) > 0:
+                        for reply in replies:
+                            reply_sent = self.sia.polarity_scores(reply.utt.text)
+                            acc_sent += reply_sent["compound"]
+                            reply.utt.add_meta("sentiment", reply_sent)
+                        average_sent = acc_sent / len(replies)
+
+                    utterance.add_meta("sentiment", sentiment)
+                    utterance.add_meta("replies_sentiment", average_sent)
+                    agree_score = 0
+
+                    if average_sent == 0:
+                        agree_score = 0
+                    elif (
+                        (
+                            average_sent <= (self.mean - self.sd * 0.5)
+                            and average_sent >= (self.mean - self.sd * 2)
+                            and sentiment["compound"] <= (self.mean - self.sd * 0.5)
+                        )
+                        or (
+                            average_sent >= (self.mean + self.sd * 0.5)
+                            and average_sent <= (self.mean - self.sd * 2)
+                            and sentiment["compound"] >= (self.mean + self.sd * 0.5)
+                        )
+                        or (
+                            sentiment["compound"] <= (self.mean - self.sd * 0.5)
+                            and sentiment["compound"] >= (self.mean - self.sd * 2)
+                            and average_sent <= (self.mean - self.sd * 0.5)
+                        )
+                        or (
+                            sentiment["compound"] >= (self.mean + self.sd * 0.5)
+                            and sentiment["compound"] <= (self.mean - self.sd * 2)
+                            and average_sent >= (self.mean + self.sd * 0.5)
+                        )
+                    ):
+                        agree_score = (average_sent + sentiment["compound"]) / 2
+                    elif (
+                        average_sent < (self.mean - self.sd * 2)
+                        and sentiment["compound"] < (self.mean - self.sd * 2)
+                    ) or (
+                        average_sent > (self.mean + self.sd * 2)
+                        and sentiment["compound"] > (self.mean + self.sd * 2)
+                    ):
+                        agree_score = -abs((average_sent + sentiment["compound"]) / 2)
+                    elif (
+                        average_sent > (self.mean + self.sd * 0.5)
+                        and sentiment["compound"] < (self.mean - self.sd * 0.5)
+                    ) or (
+                        average_sent < (self.mean - self.sd * 0.5)
+                        and sentiment["compound"] > (self.mean + self.sd * 0.5)
+                    ):
+                        agree_score = (average_sent + -sentiment["compound"]) / 2
+                    else:
+                        agree_score = 0
+
+                    utterance.add_meta("agree_score", agree_score)
         else:
-          raise KeyError('obj_type must be utterance')
-
+            raise KeyError("obj_type must be utterance")
 
         if self.verbosity > 0:
             print(f"%03d/%03d {self.obj_type} processed" % (total, total))
         return corpus
-    
+
+
 def whitelist(self, corpus: Corpus):
-  whitelist = []
-  for convo in corpus.iter_conversations():
-    for utt in convo.iter_utterances():
-      if " /s " in utt.text or "\n/s" in utt.text:
-        whitelist.append(utt.id)
-        convo = utt.get_conversation()
-        replies = list(convo.get_subtree(utt.id).bfs_traversal())
-        for reply in replies:
-          if reply.utt.id != utt.id:
-            whitelist.append(reply.utt.id)
-
-  self.whitelist = whitelist
+    whitelist = []
+    for convo in corpus.iter_conversations():
+        for utt in convo.iter_utterances():
+            if " /s " in utt.text or "\n/s" in utt.text:
+                whitelist.append(utt.id)
+                convo = utt.get_conversation()
+                replies = list(convo.get_subtree(utt.id).bfs_traversal())
+                for reply in replies:
+                    if reply.utt.id != utt.id:
+                        whitelist.append(reply.utt.id)
+
+    self.whitelist = whitelist
+
 
 def filter(self, utt):
-  return utt.id in self.whitelist
\ No newline at end of file
+    return utt.id in self.whitelist