diff --git a/convokit/coordination/coordination.py b/convokit/coordination/coordination.py
index 1d524479..a181dc35 100644
--- a/convokit/coordination/coordination.py
+++ b/convokit/coordination/coordination.py
@@ -486,9 +486,10 @@ def _scores_over_utterances(
target = utt1.speaker
if speaker == target:
continue
- speaker, target = Coordination._annot_speaker(
- speaker, utt2, split_by_attribs
- ), Coordination._annot_speaker(target, utt1, split_by_attribs)
+ speaker, target = (
+ Coordination._annot_speaker(speaker, utt2, split_by_attribs),
+ Coordination._annot_speaker(target, utt1, split_by_attribs),
+ )
speaker_filter = speaker_utterance_selector(utt2, utt1)
target_filter = target_utterance_selector(utt2, utt1)
diff --git a/convokit/forecaster/BERTCGAModel.py b/convokit/forecaster/BERTCGAModel.py
new file mode 100644
index 00000000..1142f348
--- /dev/null
+++ b/convokit/forecaster/BERTCGAModel.py
@@ -0,0 +1,252 @@
+import os
+import torch
+import torch.nn.functional as F
+import pandas as pd
+import numpy as np
+import json
+from tqdm import tqdm
+from sklearn.metrics import roc_curve
+from datasets import Dataset, DatasetDict
+from transformers import (
+ AutoConfig,
+ AutoModelForSequenceClassification,
+ AutoTokenizer,
+ TrainingArguments,
+ Trainer,
+)
+from .forecasterModel import ForecasterModel
+import shutil
+
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+DEFAULT_CONFIG = {
+ "output_dir": "BERTCGAModel",
+ "per_device_batch_size": 4,
+ "num_train_epochs": 2,
+ "learning_rate": 6.7e-6,
+ "random_seed": 1,
+ "device": "cuda",
+}
+
+
+class BERTCGAModel(ForecasterModel):
+ """
+ Wrapper for Huggingface Transformers AutoModel
+ """
+
+ def __init__(self, model_name_or_path, config=DEFAULT_CONFIG):
+ super().__init__()
+ try:
+ self.tokenizer = AutoTokenizer.from_pretrained(
+ model_name_or_path,
+ model_max_length=512,
+ truncation_side="left",
+ padding_side="right",
+ )
+ except:
+ # The checkpoint didn't save tokenizer
+ model_config_file = os.path.join(model_name_or_path, "config.json")
+ with open(model_config_file, "r") as file:
+ original_model = json.load(file)["_name_or_path"]
+ self.tokenizer = AutoTokenizer.from_pretrained(
+ original_model, model_max_length=512, truncation_side="left", padding_side="right"
+ )
+ self.best_threshold = None
+ model_config = AutoConfig.from_pretrained(
+ model_name_or_path, num_labels=2, problem_type="single_label_classification"
+ )
+ self.model = AutoModelForSequenceClassification.from_pretrained(
+ model_name_or_path, ignore_mismatched_sizes=True, config=model_config
+ ).to(config["device"])
+ if not os.path.exists(config["output_dir"]):
+ os.makedirs(config["output_dir"])
+ self.config = config
+ return
+
+ def _tokenize(self, context):
+ tokenized_context = self.tokenizer.encode_plus(
+ text=f" {self.tokenizer.sep_token} ".join([u.text for u in context]),
+ add_special_tokens=True,
+ padding="max_length",
+ truncation=True,
+ max_length=512,
+ )
+ return tokenized_context
+
+ def _context_to_bert_data(self, contexts):
+ pairs = {"id": [], "input_ids": [], "attention_mask": [], "labels": []}
+ for context in contexts:
+ convo = context.current_utterance.get_conversation()
+ label = self.labeler(convo)
+
+ if ("context_mode" not in self.config) or self.config["context_mode"] == "normal":
+ context_utts = context.context
+ elif self.config["context_mode"] == "no-context":
+ context_utts = [context.current_utterance]
+ tokenized_context = self._tokenize(context_utts)
+ pairs["input_ids"].append(tokenized_context["input_ids"])
+ pairs["attention_mask"].append(tokenized_context["attention_mask"])
+ pairs["labels"].append(label)
+ pairs["id"].append(context.current_utterance.id)
+ return Dataset.from_dict(pairs)
+
+ @torch.inference_mode
+ @torch.no_grad
+ def _predict(
+ self,
+ dataset,
+ model=None,
+ threshold=0.5,
+ forecast_prob_attribute_name=None,
+ forecast_attribute_name=None,
+ ):
+ """
+ Return predictions in DataFrame
+ """
+ if not forecast_prob_attribute_name:
+ forecast_prob_attribute_name = "score"
+ if not forecast_attribute_name:
+ forecast_attribute_name = "pred"
+ if not model:
+ model = self.model.to(self.config["device"])
+ utt_ids = []
+ preds = []
+ scores = []
+ for data in tqdm(dataset):
+ input_ids = (
+ data["input_ids"].to(self.config["device"], dtype=torch.long).reshape([1, -1])
+ )
+ attention_mask = (
+ data["attention_mask"].to(self.config["device"], dtype=torch.long).reshape([1, -1])
+ )
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+ probs = F.softmax(outputs.logits, dim=-1)
+ utt_ids.append(data["id"])
+ raw_score = probs[0, 1].item()
+ preds.append(int(raw_score > threshold))
+ scores.append(raw_score)
+
+ return pd.DataFrame(
+ {forecast_attribute_name: preds, forecast_prob_attribute_name: scores}, index=utt_ids
+ )
+
+ def _tune_best_val_accuracy(self, val_dataset, val_contexts):
+ """
+ Save the tuned model to self.best_threshold and self.model
+ """
+ checkpoints = os.listdir(self.config["output_dir"])
+ best_val_accuracy = 0
+ val_convo_ids = set()
+ utt2convo = {}
+ val_labels_dict = {}
+ for context in val_contexts:
+ convo_id = context.conversation_id
+ utt_id = context.current_utterance.id
+ label = self.labeler(context.current_utterance.get_conversation())
+ utt2convo[utt_id] = convo_id
+ val_labels_dict[convo_id] = label
+ val_convo_ids.add(convo_id)
+ val_convo_ids = list(val_convo_ids)
+ for cp in checkpoints:
+ full_model_path = os.path.join(self.config["output_dir"], cp)
+ finetuned_model = AutoModelForSequenceClassification.from_pretrained(
+ full_model_path
+ ).to(self.config["device"])
+ val_scores = self._predict(val_dataset, model=finetuned_model)
+ # for each CONVERSATION, whether or not it triggers will be effectively determined by what the highest score it ever got was
+ highest_convo_scores = {convo_id: -1 for convo_id in val_convo_ids}
+ count_correct = 0
+ for utt_id in val_scores.index:
+ count_correct += 1
+ convo_id = utt2convo[utt_id]
+ utt_score = val_scores.loc[utt_id].score
+ if utt_score > highest_convo_scores[convo_id]:
+ highest_convo_scores[convo_id] = utt_score
+
+ val_labels = np.asarray([int(val_labels_dict[c]) for c in val_convo_ids])
+ val_scores = np.asarray([highest_convo_scores[c] for c in val_convo_ids])
+ # use scikit learn to find candidate threshold cutoffs
+ _, _, thresholds = roc_curve(val_labels, val_scores)
+
+ def acc_with_threshold(y_true, y_score, thresh):
+ y_pred = (y_score > thresh).astype(int)
+ return (y_pred == y_true).mean()
+
+ accs = [acc_with_threshold(val_labels, val_scores, t) for t in thresholds]
+ best_acc_idx = np.argmax(accs)
+
+ print("Accuracy:", cp, accs[best_acc_idx])
+ if accs[best_acc_idx] > best_val_accuracy:
+ best_checkpoint = cp
+ best_val_accuracy = accs[best_acc_idx]
+ self.best_threshold = thresholds[best_acc_idx]
+ self.model = finetuned_model
+
+ eval_forecasts_df = self._predict(val_dataset, threshold=self.best_threshold)
+ eval_prediction_file = os.path.join(self.config["output_dir"], "val_predictions.csv")
+ eval_forecasts_df.to_csv(eval_prediction_file)
+
+ # Save the best config
+ best_config = {}
+ best_config["best_checkpoint"] = best_checkpoint
+ best_config["best_threshold"] = self.best_threshold
+ best_config["best_val_accuracy"] = best_val_accuracy
+ config_file = os.path.join(self.config["output_dir"], "dev_config.json")
+ with open(config_file, "w") as outfile:
+ json_object = json.dumps(best_config, indent=4)
+ outfile.write(json_object)
+
+ # Clean other checkpoints to save disk space.
+ for root, _, _ in os.walk(self.config["output_dir"]):
+ if ("checkpoint" in root) and (best_checkpoint not in root):
+ print("Deleting:", root)
+ shutil.rmtree(root)
+ return
+
+ def fit(self, contexts, val_contexts):
+ """
+ Description: Train the conversational forecasting model on the given data
+ Parameters:
+ contexts: an iterator over context tuples, as defined by the above data format
+ val_contexts: an optional second iterator over context tuples to be used as a separate held-out validation set.
+ The generator for this must be the same as test generator
+ """
+ val_contexts = list(val_contexts)
+ train_pairs = self._context_to_bert_data(contexts)
+ val_for_tuning_pairs = self._context_to_bert_data(val_contexts)
+ dataset = DatasetDict({"train": train_pairs, "val_for_tuning": val_for_tuning_pairs})
+ dataset.set_format("torch")
+
+ training_args = TrainingArguments(
+ output_dir=self.config["output_dir"],
+ per_device_train_batch_size=self.config["per_device_batch_size"],
+ num_train_epochs=self.config["num_train_epochs"],
+ learning_rate=self.config["learning_rate"],
+ logging_strategy="epoch",
+ weight_decay=0.01,
+ eval_strategy="no",
+ save_strategy="epoch",
+ prediction_loss_only=False,
+ seed=self.config["random_seed"],
+ )
+ trainer = Trainer(model=self.model, args=training_args, train_dataset=dataset["train"])
+ trainer.train()
+
+ self._tune_best_val_accuracy(dataset["val_for_tuning"], val_contexts)
+ return
+
+ def transform(self, contexts, forecast_attribute_name, forecast_prob_attribute_name):
+ test_pairs = self._context_to_bert_data(contexts)
+ dataset = DatasetDict({"test": test_pairs})
+ dataset.set_format("torch")
+ forecasts_df = self._predict(
+ dataset["test"],
+ threshold=self.best_threshold,
+ forecast_attribute_name=forecast_attribute_name,
+ forecast_prob_attribute_name=forecast_prob_attribute_name,
+ )
+
+ prediction_file = os.path.join(self.config["output_dir"], "test_predictions.csv")
+ forecasts_df.to_csv(prediction_file)
+
+ return forecasts_df
diff --git a/convokit/forecaster/README.md b/convokit/forecaster/README.md
new file mode 100644
index 00000000..8f85d0f1
--- /dev/null
+++ b/convokit/forecaster/README.md
@@ -0,0 +1,13 @@
+**Table 1: Forecasting derailment on \newcmv conversations.**
+The performance is measured in accuracy (Acc), precision (P), recall (R), F1, false positive rate (FPR), mean horizon (Mean H), and Forecast Recovery (Recovery) along with the correct and incorrect adjustment rates. The best performance across each metric is indicated in **bold**.
+| Model | Acc ↑ | P ↑ | R ↑ | F1 ↑ | FPR ↓ | Mean H ↑ | Recovery ↑ (CA/N - IA/N) |
+|--------------------------------|--------|-------|-------|-------|--------|---------|-------------------------|
+| Human (84 convos) round-1 | 62.2 | 67.8 | 48.9 | 54.6 | 24.4 | 3.64 | - |
+| Human (84 convos) round-2 | 70.0 | 75.9 | 55.6 | 63.9 | 15.6 | 3.13 | - |
+| RoBERTa-large | **68.4** | 67.5 | 71.1 | 69.2 | 34.3 | 4.14 | +1.1 (7.2 - 6.1) |
+| Gemma-2 27B-IT (finetuned) | **68.4** | 66.2 | 75.2 | **70.4** | 38.5 | 4.30 | +0.0 (10.7 - 10.7) |
+| GPT-4o (12/2024; zero-shot) | 66.6 | **71.0** | 56.3 | 62.8 | **23.0** | 3.78 | -1.5 (5.9 - 7.4) |
+| BERT-base | 65.2 | 63.5 | 72.0 | 67.4 | 41.6 | 4.45 | +2.1 (9.8 - 7.7) |
+| CRAFT | 62.8 | 59.4 | 81.1 | 68.5 | 55.5 | 4.69 | +4.9 (12.0 - 7.1) |
+| Gemma-2 27B-IT (zero-shot) | 59.4 | 55.7 | **92.2** | 69.4 | 73.5 | **5.27** | **+7.1** (12.2 - 5.1) |
+
diff --git a/convokit/forecaster/__init__.py b/convokit/forecaster/__init__.py
index 5bfd23d7..d0bcaced 100644
--- a/convokit/forecaster/__init__.py
+++ b/convokit/forecaster/__init__.py
@@ -5,4 +5,5 @@
if "torch" in sys.modules:
from .CRAFTModel import *
+ from .BERTCGAModel import *
from .CRAFT import *
diff --git a/convokit/forecaster/forecaster.py b/convokit/forecaster/forecaster.py
index 346c0110..c3c259ac 100644
--- a/convokit/forecaster/forecaster.py
+++ b/convokit/forecaster/forecaster.py
@@ -216,6 +216,7 @@ def summarize(
"label": [],
"score": [],
"forecast": [],
+ "last_utterance_forecast": [],
}
for convo in corpus.iter_conversations():
if selector(convo):
@@ -237,6 +238,7 @@ def summarize(
)
conversational_forecasts_df["score"].append(np.max(forecast_scores))
conversational_forecasts_df["forecast"].append(np.max(forecasts))
+ conversational_forecasts_df["last_utterance_forecast"].append(forecasts[-1])
conversational_forecasts_df = pd.DataFrame(conversational_forecasts_df).set_index(
"conversation_id"
)
@@ -260,21 +262,53 @@ def summarize(
(conversational_forecasts_df["label"] == 1)
& (conversational_forecasts_df["forecast"] == 0)
).sum()
+ # Correct Adjustments
+ ca = (
+ (conversational_forecasts_df["label"] == 0)
+ & (conversational_forecasts_df["forecast"] == 1)
+ & (conversational_forecasts_df["last_utterance_forecast"] == 0)
+ ).mean()
+ # Incorrect Adjustments
+ ia = (
+ (conversational_forecasts_df["label"] == 1)
+ & (conversational_forecasts_df["forecast"] == 1)
+ & (conversational_forecasts_df["last_utterance_forecast"] == 0)
+ ).mean()
+
p = tp / (tp + fp)
r = tp / (tp + fn)
fpr = fp / (fp + tn)
f1 = 2 / (((tp + fp) / tp) + ((tp + fn) / tp))
- metrics = {"Accuracy": acc, "Precision": p, "Recall": r, "FPR": fpr, "F1": f1}
-
- print(pd.Series(metrics))
comments_until_end = self._draw_horizon_plot(corpus, selector)
comments_until_end_vals = list(comments_until_end.values())
+ mean_h = np.mean(comments_until_end_vals) - 1
print(
"Horizon statistics (# of comments between first positive forecast and conversation end):"
)
- print(
- f"Mean = {np.mean(comments_until_end_vals)}, Median = {np.median(comments_until_end_vals)}"
- )
+ print(f"Mean = {mean_h}, Median = {np.median(comments_until_end_vals) - 1}")
+ leaderboard_string = (
+ f"| MODEL_NAME | "
+ f"{acc*100:.1f} | "
+ f"{p*100:.1f} | "
+ f"{r*100:.1f} | "
+ f"{f1*100:.1f} | "
+ f"{fpr*100:.1f} | "
+ f"{mean_h:.2f} | "
+ f"{(ca-ia)*100:.1f} ({ca*100:.1f} - {ia*100:.1f}) |"
+ )
+ metrics = {
+ "Accuracy": acc,
+ "Precision": p,
+ "Recall": r,
+ "FPR": fpr,
+ "F1": f1,
+ "Mean H": mean_h,
+ "Correct Adjustment": ca,
+ "Incorrect Adjustment": ia,
+ "Recovery": ca - ia,
+ "Leaderboard String": leaderboard_string,
+ }
+ print(pd.Series(metrics))
return conversational_forecasts_df, metrics
diff --git a/download_config.json b/download_config.json
index 5afa7cba..4c8f38f4 100644
--- a/download_config.json
+++ b/download_config.json
@@ -124,6 +124,12 @@
"https://zissou.infosci.cornell.edu/convokit/models/craft_cmv/craft_full.tar",
"https://zissou.infosci.cornell.edu/convokit/models/craft_cmv/index2word.json",
"https://zissou.infosci.cornell.edu/convokit/models/craft_cmv/word2index.json"
- ]
+ ],
+ "cga-cmv-large/roberta-large": ["https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-cmv-large/roberta-large.tar"],
+ "cga-cmv-large/bert-base-cased": ["https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-cmv-large/bert-base-cased.tar"],
+ "cga-cmv-legacy/roberta-large": ["https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-cmv-legacy/roberta-large.tar"],
+ "cga-cmv-legacy/bert-base-cased": ["https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-cmv-legacy/bert-base-cased.tar"],
+ "cga-wikiconv/roberta-large": ["https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-wikiconv/roberta-large.tar"],
+ "cga-wikiconv/bert-base-cased": ["https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-wikiconv/bert-base-cased.tar"]
}
}
diff --git a/examples/forecaster/BERTCGA Forecaster demo.ipynb b/examples/forecaster/BERTCGA Forecaster demo.ipynb
new file mode 100644
index 00000000..b5df54d6
--- /dev/null
+++ b/examples/forecaster/BERTCGA Forecaster demo.ipynb
@@ -0,0 +1,605 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a76caad0-a440-43cf-bfcd-af0ad6d68042",
+ "metadata": {},
+ "source": [
+ "# ConvoKit Forecaster framework: BERTCGA demo\n",
+ "\n",
+ "The `Forecaster` class provides a generic interface to *conversational forecasting models*, a class of models designed to computationally capture the trajectory of conversations in order to predict future events. Though individual conversational forecasting models can get quite complex, the `Forecaster` API abstracts away the implementation details into a standard fit-transform interface. To demonstrate the power of this framework, this notebook walks through an example of fine-tuning the `BERTCGA` (including all BERT-styled models: `BERT`, `RoBERTa`, `SpanBERT`, `DeBERTa`, ...) conversational forecasting model on the CGA-CMV corpus. You will see how the `Forecaster` API allows us to load the data, select training, validation, and testing samples, train the BERTCGA model, and perform evaluation - replicating the original paper's full pipeline (minus pre-training, which is considered outside the scope of ConvoKit) all in only a few lines of code!\n",
+ "\n",
+ "Let's start by importing the necessary ConvoKit classes and functions, and loading the CGA-CMV corpus."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "326d6337-43c1-48c4-90de-d907b5fa32b6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from convokit import download, Corpus, Forecaster, BERTCGAModel\n",
+ "from functools import partial\n",
+ "import json, os"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "3c530c49-80d9-455d-a062-cb31a7514d85",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dataset already exists at /home/sqt2/CALMpaper/ConvoKit/examples/forecaster/YOUR_DATA_DIRECTORY/conversations-gone-awry-cmv-corpus\n"
+ ]
+ }
+ ],
+ "source": [
+ "corpus = Corpus(filename=download(\"conversations-gone-awry-cmv-corpus\", data_dir=\"YOUR_DATA_DIRECTORY\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a4d27b1a-3d1f-4039-b10f-21b6dd230c10",
+ "metadata": {},
+ "source": [
+ "## Define selectors for the Forecaster\n",
+ "\n",
+ "Core to the flexibility of the `Forecaster` framework is the concept of *selectors*. \n",
+ "\n",
+ "To capture the temporal dimension of the conversational forecasting task, `Forecaster` iterates through conversations in chronological utterance order, at each step presenting to the backend forecasting model a \"context tuple\" containing both the comment itself and the full \"context\" preceding that comment. As a general framework, `Forecaster` on its own does not try to make any further assumptions about what \"context\" should contain or look like; it simply presents context as a chronologically ordered list of all utterances up to and including the current one. \n",
+ "\n",
+ "But in practice, we often want to be pickier about what we mean by \"context\". At a basic level, we might want to select only specific contexts during training versus during evaluation. The simplest version of this is the desire to split the conversations by training and testing splits, but more specifically, we might also want to select only certain contexts within a conversation. This is necessary for BERTCGA training, which works by taking only the chronologically last context (i.e., all utterances up to and not including the toxic comment, or up to the end of the conversation) as a labeled training instance. This is where selectors come in! A selector is a user-provided function that takes in a context and returns a boolean representing whether or not that context should be used. You can provide separate selectors for `fit` and `transform`, and `fit` also takes in a second selector that you can use to define validation data.\n",
+ "\n",
+ "Here we show how to implement the necessary selectors for BERTCGA."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "6bfdb2b9-757a-4a44-89dc-a6e3b6249ac8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def generic_fit_selector(context_tuple, split):\n",
+ " \"\"\"\n",
+ " We use this generic function for both training and validation data.\n",
+ " In both cases, its job is to select only those contexts for which the\n",
+ " FUTURE context is empty. This is in accordance with how CRAFT Model was\n",
+ " originally trained on CGA-CMV, taking the last context from each\n",
+ " conversation (\"last\" defined as being up to and including the chronologically\n",
+ " last utterance as recorded in the corpus)\n",
+ " \"\"\"\n",
+ " matches_split = (context_tuple.current_utterance.get_conversation().meta[\"split\"] == split)\n",
+ " is_end = (len(context_tuple.future_context) == 0)\n",
+ " return (matches_split and is_end)\n",
+ "\n",
+ "def transform_selector(context_tuple):\n",
+ " \"\"\"\n",
+ " For transform we only need to check that the conversation is in the test split\n",
+ " \"\"\"\n",
+ " return (context_tuple.current_utterance.get_conversation().meta[\"split\"] == \"test\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9614aff5-843e-4b3b-b03f-6f57f8e76b8a",
+ "metadata": {},
+ "source": [
+ "## Initialize the Forecaster and BERTCGAModel backend\n",
+ "\n",
+ "Now the rest of the process is pretty straightforward! We simply need to:\n",
+ "1. Initialize a backend `ForecasterModel` for the `Forecaster` to use, in this case we choose ConvoKit's implementation of BERTCGA.\n",
+ "2. Initialize a `Forecaster` instance to wrap that `ForecasterModel` in a generic fit-transform API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "9ee62d7f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# CPU mode (noting that it will be slower)\n",
+ "DEVICE = \"cuda\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "653febf7-ef59-4e8b-8ce2-39b4a33c3747",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+ ]
+ }
+ ],
+ "source": [
+ "model_name_or_path = 'bert-base-cased'\n",
+ "# roberta-large can also be used with this class\n",
+ "config_dict = {\n",
+ " \"output_dir\": \"YOUR_SAVING_DIRECTORY\", \n",
+ " \"per_device_batch_size\": 4, \n",
+ " \"num_train_epochs\": 4, \n",
+ " \"learning_rate\": 6.7e-6,\n",
+ " \"random_seed\": 1,\n",
+ " \"device\": DEVICE\n",
+ "}\n",
+ "bert_model = BERTCGAModel(model_name_or_path, config=config_dict)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "c971ec31-964b-4df0-a7bb-d064ae62c5a3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bert_forecaster = Forecaster(bert_model, \"has_removed_comment\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b840f526-dafd-4022-b5a1-90adecbd1591",
+ "metadata": {},
+ "source": [
+ "## Fine-tune the model using Forecaster.fit\n",
+ "\n",
+ "And now, just like any other ConvoKit Transformer, model training is done simply by calling `fit` (note how we pass in the selectors we previously defined!)..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8590e7b3-f633-4844-baea-18bee961eebd",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " [4108/4108 07:19, Epoch 4/4]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1027 | \n",
+ " 0.669800 | \n",
+ "
\n",
+ " \n",
+ " 2054 | \n",
+ " 0.615500 | \n",
+ "
\n",
+ " \n",
+ " 3081 | \n",
+ " 0.560400 | \n",
+ "
\n",
+ " \n",
+ " 4108 | \n",
+ " 0.512400 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1368/1368 [00:12<00:00, 110.08it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy: checkpoint-1027 0.6461988304093568\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1368/1368 [00:12<00:00, 110.04it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy: checkpoint-2054 0.6659356725146199\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1368/1368 [00:12<00:00, 109.91it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy: checkpoint-3081 0.6732456140350878\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1368/1368 [00:12<00:00, 107.93it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy: checkpoint-4108 0.6769005847953217\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1368/1368 [00:12<00:00, 108.49it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deleting: YOUR_SAVING_DIRECTORY/checkpoint-1027\n",
+ "Deleting: YOUR_SAVING_DIRECTORY/checkpoint-2054\n",
+ "Deleting: YOUR_SAVING_DIRECTORY/checkpoint-3081\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bert_forecaster.fit(corpus, \n",
+ " partial(generic_fit_selector, split=\"train\"), \n",
+ " val_context_selector=partial(generic_fit_selector, split=\"val\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3238e632-4e1b-4ba1-938a-b9aeef07f0cf",
+ "metadata": {},
+ "source": [
+ "## Run the fitted model on the test set and perform evaluation\n",
+ "\n",
+ "...and inference is done simply by calling `transform`! (again, note the selector)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d5e34ca",
+ "metadata": {},
+ "source": [
+ "### Normal Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "b1a978a8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_dict = {\n",
+ " \"output_dir\": \"YOUR_SAVING_DIRECTORY\", \n",
+ " \"per_device_batch_size\": 4, \n",
+ " \"num_train_epochs\": 4, \n",
+ " \"learning_rate\": 6.7e-6,\n",
+ " \"random_seed\": 1,\n",
+ " \"context_mode\": \"normal\", # set to normal by default\n",
+ " \"device\": DEVICE\n",
+ "}\n",
+ "config_file = os.path.join(\"YOUR_SAVING_DIRECTORY\", \"dev_config.json\")\n",
+ "with open(config_file, 'r') as file:\n",
+ " model_config = json.load(file)\n",
+ "normal_bert = BERTCGAModel(os.path.join(\"YOUR_SAVING_DIRECTORY\", model_config['best_checkpoint']), config=config_dict)\n",
+ "normal_bert.best_threshold = model_config['best_threshold']\n",
+ "normal_bert_forecaster = Forecaster(normal_bert, \"has_removed_comment\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "4d3e4892-f6ef-4b6f-a9e4-d318f8cb96b9",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 8466/8466 [01:17<00:00, 108.85it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "corpus = normal_bert_forecaster.transform(corpus, transform_selector)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9f517cfb-3a1d-4279-bf09-96695938d30c",
+ "metadata": {},
+ "source": [
+ "Finally, to get a human-readable interpretation of model performance, we can use `summarize` to generate a table of standard performance metrics. It also returns a table of conversation-level predictions in case you want to do more complex analysis!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "c7fa3d49-39c1-4dd4-9f8e-ca31f421ac5a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.8336380255941496, Median = 3.0\n",
+ "Accuracy 0.614035\n",
+ "Precision 0.583156\n",
+ "Recall 0.799708\n",
+ "FPR 0.571637\n",
+ "F1 0.674476\n",
+ "Mean H 3.833638\n",
+ "Correct Adjustment 0.120614\n",
+ "Incorrect Adjustment 0.075292\n",
+ "Recovery 0.045322\n",
+ "Leaderboard String | MODEL_NAME | 61.4 | 58.3 | 80.0 | 67....\n",
+ "dtype: object\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "( label score forecast last_utterance_forecast\n",
+ " conversation_id \n",
+ " cus26gy 1 0.880532 1.0 1.0\n",
+ " cus37h0 1 0.953645 1.0 1.0\n",
+ " cus142u 0 0.932500 1.0 1.0\n",
+ " cus19ml 0 0.921860 1.0 0.0\n",
+ " cusxft0 1 0.166532 0.0 0.0\n",
+ " ... ... ... ... ...\n",
+ " e8qli0i 0 0.185570 0.0 0.0\n",
+ " e8qm4aj 0 0.093030 0.0 0.0\n",
+ " e8ql8ii 0 0.852890 1.0 1.0\n",
+ " e8qzjei 1 0.952173 1.0 1.0\n",
+ " e8r00ko 0 0.884986 1.0 0.0\n",
+ " \n",
+ " [1368 rows x 4 columns],\n",
+ " {'Accuracy': 0.6140350877192983,\n",
+ " 'Precision': 0.5831556503198294,\n",
+ " 'Recall': 0.7997076023391813,\n",
+ " 'FPR': 0.5716374269005848,\n",
+ " 'F1': 0.6744759556103577,\n",
+ " 'Mean H': 3.8336380255941496,\n",
+ " 'Correct Adjustment': 0.1206140350877193,\n",
+ " 'Incorrect Adjustment': 0.07529239766081872,\n",
+ " 'Recovery': 0.04532163742690058,\n",
+ " 'Leaderboard String': '| MODEL_NAME | 61.4 | 58.3 | 80.0 | 67.4 | 57.2 | 3.83 | 4.5 (12.1 - 7.5) |'})"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "normal_bert_forecaster.summarize(corpus, lambda c: c.meta['split'] == \"test\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1abddbd9",
+ "metadata": {},
+ "source": [
+ "### No-Context Model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e7c72044",
+ "metadata": {},
+ "source": [
+ "Capturing conversational context and dynamics is a critical aspect of conversational forecast models. To evaluate whether a model possesses this capability, we introduce the No-Context Setting, where models are deprived of access to conversational history. Specifically, instead of providing the full context, we feed only the most recent utterance at each timestamp, removing any contextual information about prior interactions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "cb7a6c82",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_dict = {\n",
+ " \"output_dir\": \"YOUR_SAVING_DIRECTORY/no-context\", \n",
+ " \"per_device_batch_size\": 4, \n",
+ " \"num_train_epochs\": 4, \n",
+ " \"learning_rate\": 6.7e-6,\n",
+ " \"random_seed\": 1,\n",
+ " \"context_mode\": \"no-context\", # set to normal by default\n",
+ " \"device\": DEVICE\n",
+ "}\n",
+ "config_file = os.path.join(\"YOUR_SAVING_DIRECTORY\", \"dev_config.json\")\n",
+ "with open(config_file, 'r') as file:\n",
+ " model_config = json.load(file)\n",
+ "nocontext_bert = BERTCGAModel(os.path.join(\"YOUR_SAVING_DIRECTORY\", model_config['best_checkpoint']), config=config_dict)\n",
+ "nocontext_bert.best_threshold = model_config['best_threshold']\n",
+ "nocontext_bert_forecaster = Forecaster(nocontext_bert, \"has_removed_comment\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "2bd36200",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 8466/8466 [01:18<00:00, 107.36it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "corpus = nocontext_bert_forecaster.transform(corpus, transform_selector)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "b5a938e7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 4.0287539936102235, Median = 3.0\n",
+ "Accuracy 0.600146\n",
+ "Precision 0.561435\n",
+ "Recall 0.915205\n",
+ "FPR 0.714912\n",
+ "F1 0.695942\n",
+ "Mean H 4.028754\n",
+ "Correct Adjustment 0.207602\n",
+ "Incorrect Adjustment 0.164474\n",
+ "Recovery 0.043129\n",
+ "Leaderboard String | MODEL_NAME | 60.0 | 56.1 | 91.5 | 69....\n",
+ "dtype: object\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "( label score forecast last_utterance_forecast\n",
+ " conversation_id \n",
+ " cus26gy 1 0.948133 1.0 0.0\n",
+ " cus37h0 1 0.940318 1.0 1.0\n",
+ " cus142u 0 0.894471 1.0 0.0\n",
+ " cus19ml 0 0.921860 1.0 0.0\n",
+ " cusxft0 1 0.220547 0.0 0.0\n",
+ " ... ... ... ... ...\n",
+ " e8qli0i 0 0.185570 0.0 0.0\n",
+ " e8qm4aj 0 0.471338 1.0 1.0\n",
+ " e8ql8ii 0 0.864208 1.0 0.0\n",
+ " e8qzjei 1 0.951614 1.0 1.0\n",
+ " e8r00ko 0 0.920416 1.0 0.0\n",
+ " \n",
+ " [1368 rows x 4 columns],\n",
+ " {'Accuracy': 0.6001461988304093,\n",
+ " 'Precision': 0.5614349775784754,\n",
+ " 'Recall': 0.9152046783625731,\n",
+ " 'FPR': 0.7149122807017544,\n",
+ " 'F1': 0.6959421901056142,\n",
+ " 'Mean H': 4.0287539936102235,\n",
+ " 'Correct Adjustment': 0.20760233918128654,\n",
+ " 'Incorrect Adjustment': 0.16447368421052633,\n",
+ " 'Recovery': 0.04312865497076021,\n",
+ " 'Leaderboard String': '| MODEL_NAME | 60.0 | 56.1 | 91.5 | 69.6 | 71.5 | 4.03 | 4.3 (20.8 - 16.4) |'})"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nocontext_bert_forecaster.summarize(corpus, lambda c: c.meta['split'] == \"test\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.18"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/forecaster/Forecasting Models.ipynb b/examples/forecaster/Forecasting Models.ipynb
new file mode 100644
index 00000000..3f685451
--- /dev/null
+++ b/examples/forecaster/Forecasting Models.ipynb
@@ -0,0 +1,690 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook is for reproducing all results reported in paper \"Every Comment, Every Conversation, All at Once: Evaluating Conversational Forecasting Models\".\n",
+ "The results include:\n",
+ "1. Performance of CRAFT models\n",
+ "2. Performance of BERT-base and RoBERTa-large models\n",
+ "3. Performance of zero-shot Gemma-2 and GPT-4o\n",
+ "4. Performance of fine-tuned Gemma-2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/reef/conda-envs/sqt-env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from .autonotebook import tqdm as notebook_tqdm\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "🦥 Unsloth Zoo will now patch everything to make training faster!\n"
+ ]
+ }
+ ],
+ "source": [
+ "from convokit import download, Corpus, Forecaster, BERTCGAModel\n",
+ "import tarfile\n",
+ "import json, os, shutil"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Define datasets and working directories "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# CPU mode (noting that it will be slower)\n",
+ "DEVICE = \"cuda\"\n",
+ "\n",
+ "corpus_name = \"cga-wikiconv\"\n",
+ "# corpus_name = \"cga-cmv-legacy\"\n",
+ "# corpus_name = \"cga-cmv-large\"\n",
+ "label_metadata = \"has_removed_comment\" if 'cmv' in corpus_name else 'conversation_has_personal_attack'\n",
+ "\n",
+ "YOUR_MODEL_DIRECTORY = \"YOUR_MODEL_DIRECTORY\"\n",
+ "YOUR_DATA_DIRECTORY = \"YOUR_DATA_DIRECTORY\"\n",
+ "YOUR_SAVING_DIRECTORY = \"YOUR_SAVING_DIRECTORY\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading conversations-gone-awry-corpus to /reef/sqt2/DevTest-newConvokit/YOUR_DATA_DIRECTORY/conversations-gone-awry-corpus\n",
+ "Downloading conversations-gone-awry-corpus from http://zissou.infosci.cornell.edu/convokit/datasets/conversations-gone-awry-corpus/conversations-gone-awry-corpus.zip (45.2MB)... "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done\n"
+ ]
+ }
+ ],
+ "source": [
+ "if corpus_name == \"cga-wikiconv\":\n",
+ " corpus = Corpus(filename=download(\"conversations-gone-awry-corpus\", data_dir=YOUR_DATA_DIRECTORY))\n",
+ "elif corpus_name == \"cga-cmv-legacy\":\n",
+ " corpus = Corpus(filename=download(\"conversations-gone-awry-cmv-corpus\", data_dir=YOUR_DATA_DIRECTORY))\n",
+ "elif corpus_name == \"cga-cmv-large\":\n",
+ " raise ValueError(f\"The corpus {corpus_name} has not been published. This corpus is not available.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Download Fine-tuned Models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading cga-wikiconv/bert-base-cased to /reef/sqt2/DevTest-newConvokit/YOUR_MODEL_DIRECTORY/tar_files/cga-wikiconv/bert-base-cased\n",
+ "Downloading cga-wikiconv/bert-base-cased/bert-base-cased.tar from https://zissou.infosci.cornell.edu/convokit/models/forecaster_models/cga-wikiconv/bert-base-cased.tar (11460.7MB)... "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done\n"
+ ]
+ }
+ ],
+ "source": [
+ "model = \"bert-base-cased\"\n",
+ "# model = \"roberta-large\"\n",
+ "tarfile_path = download(f\"{corpus_name}/{model}\", data_dir=f\"{YOUR_MODEL_DIRECTORY}/tar_files\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/reef/sqt2/DevTest-newConvokit/YOUR_MODEL_DIRECTORY/forecasting_models/cga-wikiconv/bert-base-cased'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with tarfile.open(f\"{tarfile_path}/{model}.tar\", 'r') as tar:\n",
+ " tar.extractall()\n",
+ "forecasting_models_path = f\"{YOUR_MODEL_DIRECTORY}/forecasting_models/{corpus_name}/{model}\"\n",
+ "shutil.move(f\"{model}\", forecasting_models_path)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Define selectors for the Forecaster\n",
+ "\n",
+ "Core to the flexibility of the `Forecaster` framework is the concept of *selectors*. \n",
+ "\n",
+ "To capture the temporal dimension of the conversational forecasting task, `Forecaster` iterates through conversations in chronological utterance order, at each step presenting to the backend forecasting model a \"context tuple\" containing both the comment itself and the full \"context\" preceding that comment. As a general framework, `Forecaster` on its own does not try to make any further assumptions about what \"context\" should contain or look like; it simply presents context as a chronologically ordered list of all utterances up to and including the current one. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "def transform_selector(context_tuple):\n",
+ " \"\"\"\n",
+ " For transform we only need to check that the conversation is in the test split\n",
+ " \"\"\"\n",
+ " convo = context_tuple.current_utterance.get_conversation()\n",
+ " convo_length = len(convo.get_chronological_utterance_list())\n",
+ "\n",
+ " matches_split = (context_tuple.current_utterance.get_conversation().meta[\"split\"] == \"test\")\n",
+ " is_end = (len(context_tuple.context) == convo_length)\n",
+ "\n",
+ " return (matches_split and not is_end)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def update_metrics(all_results, cur_metrics):\n",
+ " for metric in cur_metrics:\n",
+ " all_results[metric] = all_results.get(metric, []) + [cur_metrics[metric]]\n",
+ " return all_results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# BERT and RoBERTa Forecaster"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Evaluating Random Seed 1\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:47<00:00, 107.60it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.5488215488215484, Median = 3.0\n",
+ "Accuracy 0.683333\n",
+ "Precision 0.675\n",
+ "Recall 0.707143\n",
+ "FPR 0.340476\n",
+ "F1 0.690698\n",
+ "Mean H 3.548822\n",
+ "Correct Adjustment 0.036905\n",
+ "Incorrect Adjustment 0.053571\n",
+ "Recovery -0.016667\n",
+ "Leaderboard String | MODEL_NAME | 68.3 | 67.5 | 70.7 | 69....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 2\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:46<00:00, 109.17it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.5134099616858236, Median = 3.0\n",
+ "Accuracy 0.666667\n",
+ "Precision 0.683246\n",
+ "Recall 0.621429\n",
+ "FPR 0.288095\n",
+ "F1 0.650873\n",
+ "Mean H 3.51341\n",
+ "Correct Adjustment 0.05\n",
+ "Incorrect Adjustment 0.066667\n",
+ "Recovery -0.016667\n",
+ "Leaderboard String | MODEL_NAME | 66.7 | 68.3 | 62.1 | 65....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 3\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:47<00:00, 108.96it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.609427609427609, Median = 3.0\n",
+ "Accuracy 0.679762\n",
+ "Precision 0.670429\n",
+ "Recall 0.707143\n",
+ "FPR 0.347619\n",
+ "F1 0.688297\n",
+ "Mean H 3.609428\n",
+ "Correct Adjustment 0.041667\n",
+ "Incorrect Adjustment 0.060714\n",
+ "Recovery -0.019048\n",
+ "Leaderboard String | MODEL_NAME | 68.0 | 67.0 | 70.7 | 68....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 4\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:46<00:00, 109.29it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.5428571428571427, Median = 3.0\n",
+ "Accuracy 0.65\n",
+ "Precision 0.673077\n",
+ "Recall 0.583333\n",
+ "FPR 0.283333\n",
+ "F1 0.625\n",
+ "Mean H 3.542857\n",
+ "Correct Adjustment 0.05\n",
+ "Incorrect Adjustment 0.066667\n",
+ "Recovery -0.016667\n",
+ "Leaderboard String | MODEL_NAME | 65.0 | 67.3 | 58.3 | 62....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 5\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:47<00:00, 108.61it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkgAAAGwCAYAAABSN5pGAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAVvRJREFUeJzt3XlYVGX/P/D3sAwgq4BsSoBKgoqgoIiWWlCYPrmE5UKBS/jU4wpqSpm4lICpkY88kn5L65ckmUumRRquKaKiuAIqobiwuAQIJghz//7w8uQM6xgwQO/Xdc0Vc5/73OdzjjNn3p0554xMCCFARERERBItTRdARERE1NwwIBERERGpYEAiIiIiUsGARERERKSCAYmIiIhIBQMSERERkQoGJCIiIiIVOpouoKVSKBS4efMmjI2NIZPJNF0OERER1YMQAvfu3YOdnR20tGo+TsSA9JRu3rwJe3t7TZdBRERET+HatWvo0KFDjdMZkJ6SsbExgEcb2MTERMPVEBERUX0UFxfD3t5e+hyvCQPSU3r8tZqJiQkDEhERUQtT1+kxPEmbiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWgWASk2NhaOjo7Q19eHt7c3jh07VmPfdevW4fnnn0fbtm3Rtm1b+Pn5VekvhMCCBQtga2sLAwMD+Pn54dKlS0p97t69i8DAQJiYmMDMzAyTJk1CSUlJo6wfERERtSw6mi4gISEBYWFhiIuLg7e3N2JiYuDv74/MzExYWVlV6b9//36MHTsW/fr1g76+PqKjo/Hyyy/j/PnzaN++PQBg2bJlWLVqFb766is4OTnhww8/hL+/Py5cuAB9fX0AQGBgIHJzc7Fnzx48fPgQEyZMwOTJkxEfH9+k699aOM7bpekSnsqVqKGaLoGIiJohmRBCaLIAb29v9O7dG6tXrwYAKBQK2NvbY9q0aZg3b16d81dWVqJt27ZYvXo1goKCIISAnZ0dZs2ahdmzZwMAioqKYG1tjQ0bNmDMmDFIT09H165dcfz4cXh5eQEAEhMTMWTIEFy/fh12dnZVllNWVoaysjLpeXFxMezt7VFUVAQTE5OG2BQtGgMSERG1BMXFxTA1Na3z81ujX7GVl5cjNTUVfn5+UpuWlhb8/PyQnJxcrzHu37+Phw8fwtzcHACQnZ2NvLw8pTFNTU3h7e0tjZmcnAwzMzMpHAGAn58ftLS0kJKSUu1yIiMjYWpqKj3s7e3VXl8iIiJqGTQakG7fvo3KykpYW1srtVtbWyMvL69eY8ydOxd2dnZSIHo8X21j5uXlVfn6TkdHB+bm5jUuNzw8HEVFRdLj2rVr9aqPiIiIWh6Nn4P0d0RFRWHTpk3Yv3+/dG5RY9HT04Oenl6jLoOIiIiaB40eQbK0tIS2tjby8/OV2vPz82FjY1PrvMuXL0dUVBR2796NHj16SO2P56ttTBsbGxQUFChNr6iowN27d+tcLhEREbV+Gg1Icrkcnp6eSEpKktoUCgWSkpLg4+NT43zLli3DkiVLkJiYqHQeEQA4OTnBxsZGaczi4mKkpKRIY/r4+KCwsBCpqalSn71790KhUMDb27uhVo+IiIhaKI1/xRYWFobg4GB4eXmhT58+iImJQWlpKSZMmAAACAoKQvv27REZGQkAiI6OxoIFCxAfHw9HR0fpnCEjIyMYGRlBJpNh5syZ+Oijj+Ds7Cxd5m9nZ4cRI0YAAFxdXTF48GCEhIQgLi4ODx8+xNSpUzFmzJhqr2AjIiKifxaNB6TRo0fj1q1bWLBgAfLy8uDh4YHExETpJOucnBxoaf11oGvNmjUoLy/HqFGjlMaJiIjAwoULAQDvvfceSktLMXnyZBQWFuK5555DYmKi0nlKGzduxNSpU+Hr6wstLS0EBARg1apVjb/CRERE1Oxp/D5ILVV976PwT8H7IBERUUvQIu6DRERERNQcMSARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlKho+kCiDTJcd4uTZegtitRQzVdAhFRq8cjSEREREQqGJCIiIiIVDAgEREREalgQCIiIiJSofGAFBsbC0dHR+jr68Pb2xvHjh2rse/58+cREBAAR0dHyGQyxMTEVOnzeJrqY8qUKVKfQYMGVZn+zjvvNMbqERERUQuk0avYEhISEBYWhri4OHh7eyMmJgb+/v7IzMyElZVVlf73799Hx44d8frrryM0NLTaMY8fP47Kykrp+blz5/DSSy/h9ddfV+oXEhKCxYsXS8/btGnTQGtF1Lh45R0RUePT6BGklStXIiQkBBMmTEDXrl0RFxeHNm3a4Msvv6y2f+/evfHJJ59gzJgx0NPTq7ZPu3btYGNjIz127tyJTp06YeDAgUr92rRpo9TPxMSkwdePiIiIWiaNBaTy8nKkpqbCz8/vr2K0tODn54fk5OQGW8Y333yDiRMnQiaTKU3buHEjLC0t0b17d4SHh+P+/fu1jlVWVobi4mKlBxEREbVOGvuK7fbt26isrIS1tbVSu7W1NTIyMhpkGdu3b0dhYSHGjx+v1D5u3Dg4ODjAzs4OZ86cwdy5c5GZmYmtW7fWOFZkZCQWLVrUIHURERFR89aq76T9xRdf4JVXXoGdnZ1S++TJk6W/3dzcYGtrC19fX2RlZaFTp07VjhUeHo6wsDDpeXFxMezt7RuncCIiItIojQUkS0tLaGtrIz8/X6k9Pz8fNjY2f3v8q1ev4tdff631qNBj3t7eAIDLly/XGJD09PRqPO+JiIiIWheNnYMkl8vh6emJpKQkqU2hUCApKQk+Pj5/e/z169fDysoKQ4fWffVMWloaAMDW1vZvL5eIiIhaPo1+xRYWFobg4GB4eXmhT58+iImJQWlpKSZMmAAACAoKQvv27REZGQng0UnXFy5ckP6+ceMG0tLSYGRkhM6dO0vjKhQKrF+/HsHBwdDRUV7FrKwsxMfHY8iQIbCwsMCZM2cQGhqKAQMGoEePHk205kRERNScaTQgjR49Grdu3cKCBQuQl5cHDw8PJCYmSidu5+TkQEvrr4NcN2/eRM+ePaXny5cvx/LlyzFw4EDs379fav/111+Rk5ODiRMnVlmmXC7Hr7/+KoUxe3t7BAQEYP78+Y23okRERNSiyIQQQtNFtETFxcUwNTVFUVER76GElnnzQmo6vFEkETUX9f381vhPjRARERE1NwxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFRoPSLGxsXB0dIS+vj68vb1x7NixGvueP38eAQEBcHR0hEwmQ0xMTJU+CxcuhEwmU3q4uLgo9Xnw4AGmTJkCCwsLGBkZISAgAPn5+Q29akRERNRCqR2QvvrqK+zatUt6/t5778HMzAz9+vXD1atX1RorISEBYWFhiIiIwMmTJ+Hu7g5/f38UFBRU2//+/fvo2LEjoqKiYGNjU+O43bp1Q25urvT47bfflKaHhobixx9/xObNm3HgwAHcvHkTr732mlq1ExERUeuldkBaunQpDAwMAADJycmIjY3FsmXLYGlpidDQULXGWrlyJUJCQjBhwgR07doVcXFxaNOmDb788stq+/fu3RuffPIJxowZAz09vRrH1dHRgY2NjfSwtLSUphUVFeGLL77AypUr8eKLL8LT0xPr16/HkSNHcPToUbXqJyIiotZJ7YB07do1dO7cGQCwfft2BAQEYPLkyYiMjMShQ4fqPU55eTlSU1Ph5+f3VzFaWvDz80NycrK6ZSm5dOkS7Ozs0LFjRwQGBiInJ0ealpqaiocPHyot18XFBc8880ytyy0rK0NxcbHSg4iIiFontQOSkZER7ty5AwDYvXs3XnrpJQCAvr4+/vzzz3qPc/v2bVRWVsLa2lqp3draGnl5eeqWJfH29saGDRuQmJiINWvWIDs7G88//zzu3bsHAMjLy4NcLoeZmZlay42MjISpqan0sLe3f+oaiYiIqHnTUXeGl156CW+//TZ69uyJixcvYsiQIQAenUDt6OjY0PWp7ZVXXpH+7tGjB7y9veHg4IDvvvsOkyZNeupxw8PDERYWJj0vLi5mSCIiImql1D6CFBsbCx8fH9y6dQtbtmyBhYUFgEdfXY0dO7be41haWkJbW7vK1WP5+fm1noCtLjMzMzz77LO4fPkyAMDGxgbl5eUoLCxUa7l6enowMTFRehAREVHrpHZAMjMzw+rVq/HDDz9g8ODBUvuiRYvwwQcf1HscuVwOT09PJCUlSW0KhQJJSUnw8fFRt6walZSUICsrC7a2tgAAT09P6OrqKi03MzMTOTk5DbpcIiIiarnU/ooNAAoLC3Hs2DEUFBRAoVBI7TKZDG+99Va9xwkLC0NwcDC8vLzQp08fxMTEoLS0FBMmTAAABAUFoX379oiMjATw6MTuCxcuSH/fuHEDaWlpMDIykk4cnz17Nl599VU4ODjg5s2biIiIgLa2tnR0y9TUFJMmTUJYWBjMzc1hYmKCadOmwcfHB3379n2azUFEREStjNoB6ccff0RgYCBKSkpgYmICmUwmTVM3II0ePRq3bt3CggULkJeXBw8PDyQmJkonbufk5EBL66+DXDdv3kTPnj2l58uXL8fy5csxcOBA7N+/HwBw/fp1jB07Fnfu3EG7du3w3HPP4ejRo2jXrp0036effgotLS0EBASgrKwM/v7++N///qfupiAiIqJWSiaEEOrM8Oyzz2LIkCFYunQp2rRp01h1NXvFxcUwNTVFUVERz0cC4DhvV92d6B/rStRQTZdARASg/p/fap+DdOPGDUyfPv0fHY6IiIiodVM7IPn7++PEiRONUQsRERFRs6D2OUhDhw7FnDlzcOHCBbi5uUFXV1dp+rBhwxqsOCIiIiJNUDsghYSEAAAWL15cZZpMJkNlZeXfr4qIiIhIg9QOSE9e1k9ERETUGql9DhIRERFRa/dUAenAgQN49dVX0blzZ3Tu3BnDhg3DoUOHGro2IiIiIo1QOyB988038PPzQ5s2bTB9+nRMnz4dBgYG8PX1RXx8fGPUSERERNSk1L5RpKurKyZPnozQ0FCl9pUrV2LdunVIT09v0AKbK94oUhlvFEm14Y0iiai5aLQbRf7+++949dVXq7QPGzYM2dnZ6g5HRERE1OyoHZDs7e2RlJRUpf3XX3+Fvb19gxRFREREpElqX+Y/a9YsTJ8+HWlpaejXrx8A4PDhw9iwYQM+++yzBi+QiIiIqKmpHZDeffdd2NjYYMWKFfjuu+8APDovKSEhAcOHD2/wAomIiIiamtoBCQBGjhyJkSNHNnQtRERERM0CbxRJREREpKJeR5DMzc1x8eJFWFpaom3btpDJZDX2vXv3boMVR0RERKQJ9QpIn376KYyNjaW/awtIRERERC1dvQJScHCw9Pf48eMbqxYiIiKiZkHtc5C0tbVRUFBQpf3OnTvQ1tZukKKIiIiINEntgFTTL5OUlZVBLpf/7YKIiIiINK3el/mvWrUKACCTyfB///d/MDIykqZVVlbi4MGDcHFxafgKiajF42/1NQ3+5h1Rw6l3QPr0008BPDqCFBcXp/R1mlwuh6OjI+Li4hq+QiIiIqImVu+A9PiHaF944QVs3boVbdu2bbSiiIiIiDRJ7Ttp79u3rzHqICIiImo2nuqnRq5fv44dO3YgJycH5eXlStNWrlzZIIURERERaYraASkpKQnDhg1Dx44dkZGRge7du+PKlSsQQqBXr16NUSMRERFRk1L7Mv/w8HDMnj0bZ8+ehb6+PrZs2YJr165h4MCBeP311xujRiIiIqImpXZASk9PR1BQEABAR0cHf/75J4yMjLB48WJER0c3eIFERERETU3tgGRoaCidd2Rra4usrCxp2u3btxuuMiIiIiINUfscpL59++K3336Dq6srhgwZglmzZuHs2bPYunUr+vbt2xg1EhERETUptQPSypUrUVJSAgBYtGgRSkpKkJCQAGdnZ17BRkRERK2C2gGpY8eO0t+Ghoa8ezYRERG1Omqfg3Tt2jVcv35den7s2DHMnDkTa9eubdDCiIiIiDRF7YA0btw46W7aeXl58PPzw7Fjx/DBBx9g8eLFahcQGxsLR0dH6Ovrw9vbG8eOHaux7/nz5xEQEABHR0fIZDLExMRU6RMZGYnevXvD2NgYVlZWGDFiBDIzM5X6DBo0CDKZTOnxzjvvqF07ERERtU5qB6Rz586hT58+AIDvvvsObm5uOHLkCDZu3IgNGzaoNVZCQgLCwsIQERGBkydPwt3dHf7+/igoKKi2//3799GxY0dERUXBxsam2j4HDhzAlClTcPToUezZswcPHz7Eyy+/jNLSUqV+ISEhyM3NlR7Lli1Tq3YiIiJqvdQ+B+nhw4fQ09MDAPz6668YNmwYAMDFxQW5ublqjbVy5UqEhIRgwoQJAIC4uDjs2rULX375JebNm1elf+/evdG7d28AqHY6ACQmJio937BhA6ysrJCamooBAwZI7W3atKkxZFWnrKwMZWVl0vPi4uJ6z0tEREQti9pHkLp164a4uDgcOnQIe/bsweDBgwEAN2/ehIWFRb3HKS8vR2pqKvz8/P4qRksLfn5+SE5OVresGhUVFQEAzM3Nldo3btwIS0tLdO/eHeHh4bh//36t40RGRsLU1FR62NvbN1iNRERE1LyoHZCio6Px+eefY9CgQRg7dizc3d0BADt27JC+equP27dvo7KyEtbW1krt1tbWyMvLU7esaikUCsycORP9+/dH9+7dpfZx48bhm2++wb59+xAeHo7/9//+H958881axwoPD0dRUZH0uHbtWoPUSERERM2P2l+xDRo0CLdv30ZxcTHatm0rtU+ePBlt2rRp0OL+rilTpuDcuXP47bfflNonT54s/e3m5gZbW1v4+voiKysLnTp1qnYsPT096atFIiIiat3UPoIEAEIIpKam4vPPP8e9e/cAAHK5XK2AZGlpCW1tbeTn5yu15+fnq3VuUE2mTp2KnTt3Yt++fejQoUOtfb29vQEAly9f/tvLJSIiopZP7YB09epVuLm5Yfjw4ZgyZQpu3boF4NFXb7Nnz673OHK5HJ6enkhKSpLaFAoFkpKS4OPjo25ZEiEEpk6dim3btmHv3r1wcnKqc560tDQAj35bjoiIiEjtr9hmzJgBLy8vnD59Wumk7JEjRyIkJEStscLCwhAcHAwvLy/06dMHMTExKC0tla5qCwoKQvv27REZGQng0YndFy5ckP6+ceMG0tLSYGRkhM6dOwN49LVafHw8fvjhBxgbG0vnM5mamsLAwABZWVmIj4/HkCFDYGFhgTNnziA0NBQDBgxAjx491N0cRERE1AqpHZAOHTqEI0eOQC6XK7U7Ojrixo0bao01evRo3Lp1CwsWLEBeXh48PDyQmJgonbidk5MDLa2/DnLdvHkTPXv2lJ4vX74cy5cvx8CBA7F//34AwJo1awA8OlfqSevXr8f48eMhl8vx66+/SmHM3t4eAQEBmD9/vlq1ExERUeuldkBSKBSorKys0n79+nUYGxurXcDUqVMxderUaqc9Dj2POTo6QghR63h1Tbe3t8eBAwfUqpGIiIj+WdQ+B+nll19W+okPmUyGkpISREREYMiQIQ1ZGxEREZFGqH0EacWKFfD390fXrl3x4MEDjBs3DpcuXYKlpSW+/fbbxqiRiIiIqEmpHZA6dOiA06dPY9OmTThz5gxKSkowadIkBAYGwsDAoDFqJCIiImpSagekBw8eQF9fv847TxMRERG1VGqfg2RlZYXg4GDs2bMHCoWiMWoiIiIi0ii1A9JXX32F+/fvY/jw4Wjfvj1mzpyJEydONEZtRERERBqhdkAaOXIkNm/ejPz8fCxduhQXLlxA37598eyzz2Lx4sWNUSMRERFRk3qq32IDAGNjY0yYMAG7d+/GmTNnYGhoiEWLFjVkbUREREQa8dQB6cGDB/juu+8wYsQI9OrVC3fv3sWcOXMasjYiIiIijVD7KrZffvkF8fHx2L59O3R0dDBq1Cjs3r0bAwYMaIz6iIiIiJqc2gFp5MiR+Ne//oWvv/4aQ4YMga6ubmPURURERKQxagek/Pz8p/rNNSIiIqKWQu2AZGxsDIVCgcuXL6OgoKDKvZD4VRsRERG1dGoHpKNHj2LcuHG4evUqhBBK02QyGSorKxusOCIiIiJNUDsgvfPOO/Dy8sKuXbtga2sLmUzWGHURERERaYzaAenSpUv4/vvv0blz58aoh4iIiEjj1L4Pkre3Ny5fvtwYtRARERE1C2ofQZo2bRpmzZqFvLw8uLm5VbnMv0ePHg1WHBEREZEmqB2QAgICAAATJ06U2mQyGYQQPEmbiIiIWgW1A1J2dnZj1EFERETUbKgdkBwcHBqjDiIiIqJmQ+2ABABZWVmIiYlBeno6AKBr166YMWMGOnXq1KDFEREREWmC2lex/fLLL+jatSuOHTuGHj16oEePHkhJSUG3bt2wZ8+exqiRiIiIqEmpfQRp3rx5CA0NRVRUVJX2uXPn4qWXXmqw4oiIiIg0Qe0jSOnp6Zg0aVKV9okTJ+LChQsNUhQRERGRJqkdkNq1a4e0tLQq7WlpabCysmqImoiIiIg0Su2v2EJCQjB58mT8/vvv6NevHwDg8OHDiI6ORlhYWIMXSERERNTU1A5IH374IYyNjbFixQqEh4cDAOzs7LBw4UJMnz69wQskIiIiampqBySZTIbQ0FCEhobi3r17AABjY+MGL4yIiIhIU57qTtoVFRVwdnZWCkaXLl2Crq4uHB0dG7I+IiIioian9kna48ePx5EjR6q0p6SkYPz48Q1RExEREZFGqR2QTp06hf79+1dp79u3b7VXtxERERG1NGoHJJlMJp179KSioiJUVlaqXUBsbCwcHR2hr68Pb29vHDt2rMa+58+fR0BAABwdHSGTyRATE/NUYz548ABTpkyBhYUFjIyMEBAQgPz8fLVrJyIiotZJ7YA0YMAAREZGKoWhyspKREZG4rnnnlNrrISEBISFhSEiIgInT56Eu7s7/P39UVBQUG3/+/fvo2PHjoiKioKNjc1TjxkaGooff/wRmzdvxoEDB3Dz5k289tpratVORERErZdMCCHUmeHChQsYMGAAzMzM8PzzzwMADh06hOLiYuzduxfdu3ev91je3t7o3bs3Vq9eDQBQKBSwt7fHtGnTMG/evFrndXR0xMyZMzFz5ky1xiwqKkK7du0QHx+PUaNGAQAyMjLg6uqK5ORk9O3bt161FxcXw9TUFEVFRTAxMan3OrdWjvN2aboEon+8K1FDNV0CUbNX389vtY8gde3aFWfOnMEbb7yBgoIC3Lt3D0FBQcjIyFArHJWXlyM1NRV+fn5/FaOlBT8/PyQnJ6tbVr3HTE1NxcOHD5X6uLi44Jlnnql1uWVlZSguLlZ6EBERUeuk9mX+wKMbQy5duvRvLfj27duorKyEtbW1Uru1tTUyMjIabcy8vDzI5XKYmZlV6ZOXl1fj2JGRkVi0aNFT1UVEREQti9pHkP6pwsPDUVRUJD2uXbum6ZKIiIiokTzVEaSGYGlpCW1t7SpXj+Xn59d4AnZDjGljY4Py8nIUFhYqHUWqa7l6enrQ09N7qrqIiIioZdHYESS5XA5PT08kJSVJbQqFAklJSfDx8Wm0MT09PaGrq6vUJzMzEzk5OU+9XCIiImpd6nUEaceOHXjllVegq6vboAsPCwtDcHAwvLy80KdPH8TExKC0tBQTJkwAAAQFBaF9+/aIjIwE8Ogk7AsXLkh/37hxA2lpaTAyMkLnzp3rNaapqSkmTZqEsLAwmJubw8TEBNOmTYOPj0+9r2AjIiKi1q1eAWnkyJHIy8tDu3btoK2tjdzcXFhZWf3thY8ePRq3bt3CggULkJeXBw8PDyQmJkonWefk5EBL66+DXDdv3kTPnj2l58uXL8fy5csxcOBA7N+/v15jAsCnn34KLS0tBAQEoKysDP7+/vjf//73t9eHiIiIWod63QfJxsYG69atw6uvvgotLS3k5+ejXbt2TVFfs8X7ICnjfZCINI/3QSKqW30/v+t1BOmdd97B8OHDIZPJIJPJaj2Z+Wl+boSIiIioOalXQFq4cCHGjBmDy5cvY9iwYVi/fn2V+wgRERERtRb1vszfxcUFLi4uiIiIwOuvv442bdo0Zl1EREREGqP2fZAiIiIAALdu3UJmZiYAoEuXLv/4c5KIiIio9VD7Pkj379/HxIkTYWdnhwEDBmDAgAGws7PDpEmTcP/+/caokYiIiKhJqR2QQkNDceDAAezYsQOFhYUoLCzEDz/8gAMHDmDWrFmNUSMRERFRk1L7K7YtW7bg+++/x6BBg6S2IUOGwMDAAG+88QbWrFnTkPURERERNbmn+ortyZsuPmZlZcWv2IiIiKhVUDsg+fj4ICIiAg8ePJDa/vzzTyxatIi/ZUZEREStgtpfsX322Wfw9/dHhw4d4O7uDgA4ffo09PX18csvvzR4gURERERNTe2A1L17d1y6dAkbN25ERkYGAGDs2LEIDAyEgYFBgxdIRERE1NTUDkgA0KZNG4SEhDR0LURERETNgtrnIBERERG1dgxIRERERCoYkIiIiIhUMCARERERqVA7IHXs2BF37typ0l5YWIiOHTs2SFFEREREmqR2QLpy5QoqKyurtJeVleHGjRsNUhQRERGRJtX7Mv8dO3ZIf//yyy8wNTWVnldWViIpKQmOjo4NWhwRERGRJtQ7II0YMQIAIJPJEBwcrDRNV1cXjo6OWLFiRYMWR0RERKQJ9Q5ICoUCAODk5ITjx4/D0tKy0YoiIiIi0iS176SdnZ3dGHUQERERNRtP9VMjSUlJSEpKQkFBgXRk6bEvv/yyQQojIiIi0hS1A9KiRYuwePFieHl5wdbWFjKZrDHqIiIiItIYtQNSXFwcNmzYgLfeeqsx6iEiIiLSOLXvg1ReXo5+/fo1Ri1EREREzYLaAentt99GfHx8Y9RCRERE1Cyo/RXbgwcPsHbtWvz666/o0aMHdHV1laavXLmywYojIiIi0gS1A9KZM2fg4eEBADh37pzSNJ6wTURERK2B2gFp3759jVEHERERUbOh9jlIj12+fBm//PIL/vzzTwCAEKLBiiIiIiLSJLUD0p07d+Dr64tnn30WQ4YMQW5uLgBg0qRJmDVrVoMXSERERNTU1P6KLTQ0FLq6usjJyYGrq6vUPnr0aISFhfEHa4mINMRx3i5Nl6C2K1FDNV0CUbXUPoK0e/duREdHo0OHDkrtzs7OuHr16lMVERsbC0dHR+jr68Pb2xvHjh2rtf/mzZvh4uICfX19uLm54aefflKaLpPJqn188sknUh9HR8cq06Oiop6qfiIiImpd1A5IpaWlaNOmTZX2u3fvQk9PT+0CEhISEBYWhoiICJw8eRLu7u7w9/dHQUFBtf2PHDmCsWPHYtKkSTh16hRGjBiBESNGKF1Rl5ubq/T48ssvIZPJEBAQoDTW4sWLlfpNmzZN7fqJiIio9VE7ID3//PP4+uuvpecymQwKhQLLli3DCy+8oHYBK1euREhICCZMmICuXbsiLi4Obdq0qfFHbz/77DMMHjwYc+bMgaurK5YsWYJevXph9erVUh8bGxulxw8//IAXXngBHTt2VBrL2NhYqZ+hoaHa9RMREVHro3ZAWrZsGdauXYtXXnkF5eXleO+999C9e3ccPHgQ0dHRao1VXl6O1NRU+Pn5/VWQlhb8/PyQnJxc7TzJyclK/QHA39+/xv75+fnYtWsXJk2aVGVaVFQULCws0LNnT3zyySeoqKiosdaysjIUFxcrPYiIiKh1Uvsk7e7du+PixYtYvXo1jI2NUVJSgtdeew1TpkyBra2tWmPdvn0blZWVsLa2Vmq3trZGRkZGtfPk5eVV2z8vL6/a/l999RWMjY3x2muvKbVPnz4dvXr1grm5OY4cOYLw8HDk5ubWeCfwyMhILFq0qL6rRkRERC2Y2gEJAExNTfHBBx80dC2N4ssvv0RgYCD09fWV2sPCwqS/e/ToAblcjn//+9+IjIys9lyq8PBwpXmKi4thb2/feIUTERGRxqgdkNavXw8jIyO8/vrrSu2bN2/G/fv3ERwcXO+xLC0toa2tjfz8fKX2/Px82NjYVDuPjY1NvfsfOnQImZmZSEhIqLMWb29vVFRU4MqVK+jSpUuV6Xp6ek91EjoRERG1PGqfgxQZGQlLS8sq7VZWVli6dKlaY8nlcnh6eiIpKUlqUygUSEpKgo+PT7Xz+Pj4KPUHgD179lTb/4svvoCnpyfc3d3rrCUtLQ1aWlqwsrJSax2IiIio9VH7CFJOTg6cnJyqtDs4OCAnJ0ftAsLCwhAcHAwvLy/06dMHMTExKC0txYQJEwAAQUFBaN++PSIjIwEAM2bMwMCBA7FixQoMHToUmzZtwokTJ7B27VqlcYuLi7F58+Zqb1yZnJyMlJQUvPDCCzA2NkZycjJCQ0Px5ptvom3btmqvAxEREbUuagckKysrnDlzBo6Ojkrtp0+fhoWFhdoFjB49Grdu3cKCBQuQl5cHDw8PJCYmSidi5+TkQEvrrwNd/fr1Q3x8PObPn4/3338fzs7O2L59O7p376407qZNmyCEwNixY6ssU09PD5s2bcLChQtRVlYGJycnhIaGKp1jRERERP9cMqHmr8zOnTsXCQkJWL9+PQYMGAAAOHDgACZOnIhRo0Zh+fLljVJoc1NcXAxTU1MUFRXBxMRE0+VoXEv8iQMi0jz+1Ag1tfp+fqt9BGnJkiW4cuUKfH19oaPzaHaFQoGgoCC1z0EiIiIiao7UCkhCCOTl5WHDhg346KOPkJaWBgMDA7i5ucHBwaGxaiQiIiJqUmoHpM6dO+P8+fNwdnaGs7NzY9VFREREpDFqXeavpaUFZ2dn3Llzp7HqISIiItI4te+DFBUVhTlz5uDcuXONUQ8RERGRxql9knZQUBDu378Pd3d3yOVyGBgYKE2/e/dugxVHREREpAlqB6SYmJhGKIOIiIio+VA7IKnzW2tERERELZHa5yABQFZWFubPn4+xY8eioKAAAPDzzz/j/PnzDVocERERkSaoHZAOHDgANzc3pKSkYOvWrSgpKQHw6KdGIiIiGrxAIiIioqamdkCaN28ePvroI+zZswdyuVxqf/HFF3H06NEGLY6IiIhIE9QOSGfPnsXIkSOrtFtZWeH27dsNUhQRERGRJqkdkMzMzJCbm1ul/dSpU2jfvn2DFEVERESkSWoHpDFjxmDu3LnIy8uDTCaDQqHA4cOHMXv2bAQFBTVGjURERERNSu2AtHTpUri4uMDe3h4lJSXo2rUrBgwYgH79+mH+/PmNUSMRERFRk1L7PkhyuRzr1q3DggULcPbsWZSUlKBnz5784VoiIiJqNeodkBQKBT755BPs2LED5eXl8PX1RURERJWfGiEiIiJq6er9FdvHH3+M999/H0ZGRmjfvj0+++wzTJkypTFrIyIiItKIegekr7/+Gv/73//wyy+/YPv27fjxxx+xceNGKBSKxqyPiIiIqMnVOyDl5ORgyJAh0nM/Pz/IZDLcvHmzUQojIiIi0pR6B6SKigro6+srtenq6uLhw4cNXhQRERGRJtX7JG0hBMaPHw89PT2p7cGDB3jnnXdgaGgotW3durVhKyQiIiJqYvUOSMHBwVXa3nzzzQYthoiIiKg5qHdAWr9+fWPWQURERNRsqH0nbSIiIqLWTu07aRMRETUUx3m7NF2C2q5EDdV0CdQEeASJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLRLAJSbGwsHB0doa+vD29vbxw7dqzW/ps3b4aLiwv09fXh5uaGn376SWn6+PHjIZPJlB6DBw9W6nP37l0EBgbCxMQEZmZmmDRpEkpKShp83YiIiKjl0XhASkhIQFhYGCIiInDy5Em4u7vD398fBQUF1fY/cuQIxo4di0mTJuHUqVMYMWIERowYgXPnzin1Gzx4MHJzc6XHt99+qzQ9MDAQ58+fx549e7Bz504cPHgQkydPbrT1JCIiopZDJoQQmizA29sbvXv3xurVqwEACoUC9vb2mDZtGubNm1el/+jRo1FaWoqdO3dKbX379oWHhwfi4uIAPDqCVFhYiO3bt1e7zPT0dHTt2hXHjx+Hl5cXACAxMRFDhgzB9evXYWdnV2WesrIylJWVSc+Li4thb2+PoqIimJiYPPX6txYt8W64RERPg3fSbtmKi4thampa5+e3Ro8glZeXIzU1FX5+flKblpYW/Pz8kJycXO08ycnJSv0BwN/fv0r//fv3w8rKCl26dMG7776LO3fuKI1hZmYmhSMA8PPzg5aWFlJSUqpdbmRkJExNTaWHvb292utLRERELYNGA9Lt27dRWVkJa2trpXZra2vk5eVVO09eXl6d/QcPHoyvv/4aSUlJiI6OxoEDB/DKK6+gsrJSGsPKykppDB0dHZibm9e43PDwcBQVFUmPa9euqb2+RERE1DK0yh+rHTNmjPS3m5sbevTogU6dOmH//v3w9fV9qjH19PSgp6fXUCUSERFRM6bRI0iWlpbQ1tZGfn6+Unt+fj5sbGyqncfGxkat/gDQsWNHWFpa4vLly9IYqieBV1RU4O7du7WOQ0RERP8MGg1Icrkcnp6eSEpKktoUCgWSkpLg4+NT7Tw+Pj5K/QFgz549NfYHgOvXr+POnTuwtbWVxigsLERqaqrUZ+/evVAoFPD29v47q0REREStgMYv8w8LC8O6devw1VdfIT09He+++y5KS0sxYcIEAEBQUBDCw8Ol/jNmzEBiYiJWrFiBjIwMLFy4ECdOnMDUqVMBACUlJZgzZw6OHj2KK1euICkpCcOHD0fnzp3h7+8PAHB1dcXgwYMREhKCY8eO4fDhw5g6dSrGjBlT7RVsRERE9M+i8XOQRo8ejVu3bmHBggXIy8uDh4cHEhMTpROxc3JyoKX1V47r168f4uPjMX/+fLz//vtwdnbG9u3b0b17dwCAtrY2zpw5g6+++gqFhYWws7PDyy+/jCVLliidQ7Rx40ZMnToVvr6+0NLSQkBAAFatWtW0K09ERETNksbvg9RS1fc+Cv8UvA8SEf1T8D5ILVuLuA8SERERUXPEgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWgWASk2NhaOjo7Q19eHt7c3jh07Vmv/zZs3w8XFBfr6+nBzc8NPP/0kTXv48CHmzp0LNzc3GBoaws7ODkFBQbh586bSGI6OjpDJZEqPqKioRlk/IiIialk0HpASEhIQFhaGiIgInDx5Eu7u7vD390dBQUG1/Y8cOYKxY8di0qRJOHXqFEaMGIERI0bg3LlzAID79+/j5MmT+PDDD3Hy5Els3boVmZmZGDZsWJWxFi9ejNzcXOkxbdq0Rl1XIiIiahlkQgihyQK8vb3Ru3dvrF69GgCgUChgb2+PadOmYd68eVX6jx49GqWlpdi5c6fU1rdvX3h4eCAuLq7aZRw/fhx9+vTB1atX8cwzzwB4dARp5syZmDlz5lPVXVxcDFNTUxQVFcHExOSpxmhNHOft0nQJRERN4krUUE2XQH9DfT+/NXoEqby8HKmpqfDz85PatLS04Ofnh+Tk5GrnSU5OVuoPAP7+/jX2B4CioiLIZDKYmZkptUdFRcHCwgI9e/bEJ598goqKihrHKCsrQ3FxsdKDiIiIWicdTS789u3bqKyshLW1tVK7tbU1MjIyqp0nLy+v2v55eXnV9n/w4AHmzp2LsWPHKiXF6dOno1evXjA3N8eRI0cQHh6O3NxcrFy5stpxIiMjsWjRInVWj4iIWqGWeMScR73Up9GA1NgePnyIN954A0IIrFmzRmlaWFiY9HePHj0gl8vx73//G5GRkdDT06syVnh4uNI8xcXFsLe3b7ziiYiISGM0GpAsLS2hra2N/Px8pfb8/HzY2NhUO4+NjU29+j8OR1evXsXevXvrPE/I29sbFRUVuHLlCrp06VJlup6eXrXBiYiIiFofjZ6DJJfL4enpiaSkJKlNoVAgKSkJPj4+1c7j4+Oj1B8A9uzZo9T/cTi6dOkSfv31V1hYWNRZS1paGrS0tGBlZfWUa0NERESthca/YgsLC0NwcDC8vLzQp08fxMTEoLS0FBMmTAAABAUFoX379oiMjAQAzJgxAwMHDsSKFSswdOhQbNq0CSdOnMDatWsBPApHo0aNwsmTJ7Fz505UVlZK5yeZm5tDLpcjOTkZKSkpeOGFF2BsbIzk5GSEhobizTffRNu2bTWzIYiIiKjZ0HhAGj16NG7duoUFCxYgLy8PHh4eSExMlE7EzsnJgZbWXwe6+vXrh/j4eMyfPx/vv/8+nJ2dsX37dnTv3h0AcOPGDezYsQMA4OHhobSsffv2YdCgQdDT08OmTZuwcOFClJWVwcnJCaGhoUrnGBEREdE/l8bvg9RS8T5IylriVR1ERP8UvIrtLy3iPkhEREREzREDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCp0NF0AERERNa6W+IPimv6BXR5BIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZEKBiQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZGKZhGQYmNj4ejoCH19fXh7e+PYsWO19t+8eTNcXFygr68PNzc3/PTTT0rThRBYsGABbG1tYWBgAD8/P1y6dEmpz927dxEYGAgTExOYmZlh0qRJKCkpafB1IyIiopZH4wEpISEBYWFhiIiIwMmTJ+Hu7g5/f38UFBRU2//IkSMYO3YsJk2ahFOnTmHEiBEYMWIEzp07J/VZtmwZVq1ahbi4OKSkpMDQ0BD+/v548OCB1CcwMBDnz5/Hnj17sHPnThw8eBCTJ09u9PUlIiKi5k8mhBCaLMDb2xu9e/fG6tWrAQAKhQL29vaYNm0a5s2bV6X/6NGjUVpaip07d0ptffv2hYeHB+Li4iCEgJ2dHWbNmoXZs2cDAIqKimBtbY0NGzZgzJgxSE9PR9euXXH8+HF4eXkBABITEzFkyBBcv34ddnZ2ddZdXFwMU1NTFBUVwcTEpCE2RYvmOG+XpksgIqJW5ErU0EYZt76f3zqNsvR6Ki8vR2pqKsLDw6U2LS0t+Pn5ITk5udp5kpOTERYWptTm7++P7du3AwCys7ORl5cHPz8/abqpqSm8vb2RnJyMMWPGIDk5GWZmZlI4AgA/Pz9oaWkhJSUFI0eOrLLcsrIylJWVSc+LiooAPNrQBCjK7mu6BCIiakUa6/P18bh1HR/SaEC6ffs2KisrYW1trdRubW2NjIyMaufJy8urtn9eXp40/XFbbX2srKyUpuvo6MDc3FzqoyoyMhKLFi2q0m5vb1/T6hEREdFTMo1p3PHv3bsHU1PTGqdrNCC1JOHh4UpHrhQKBe7evQsLCwvIZDINVqZ5xcXFsLe3x7Vr1/h1YyPjtm4a3M5Ng9u5aXA7KxNC4N69e3WeTqPRgGRpaQltbW3k5+crtefn58PGxqbaeWxsbGrt//i/+fn5sLW1Verj4eEh9VE9CbyiogJ3796tcbl6enrQ09NTajMzM6t9Bf9hTExM+OZrItzWTYPbuWlwOzcNbue/1Hbk6DGNXsUml8vh6emJpKQkqU2hUCApKQk+Pj7VzuPj46PUHwD27Nkj9XdycoKNjY1Sn+LiYqSkpEh9fHx8UFhYiNTUVKnP3r17oVAo4O3t3WDrR0RERC2Txr9iCwsLQ3BwMLy8vNCnTx/ExMSgtLQUEyZMAAAEBQWhffv2iIyMBADMmDEDAwcOxIoVKzB06FBs2rQJJ06cwNq1awEAMpkMM2fOxEcffQRnZ2c4OTnhww8/hJ2dHUaMGAEAcHV1xeDBgxESEoK4uDg8fPgQU6dOxZgxY+p1BRsRERG1bhoPSKNHj8atW7ewYMEC5OXlwcPDA4mJidJJ1jk5OdDS+utAV79+/RAfH4/58+fj/fffh7OzM7Zv347u3btLfd577z2UlpZi8uTJKCwsxHPPPYfExETo6+tLfTZu3IipU6fC19cXWlpaCAgIwKpVq5puxVsRPT09REREVPkKkhoet3XT4HZuGtzOTYPb+elo/D5IRERERM2Nxu+kTURERNTcMCARERERqWBAIiIiIlLBgERERESkggGJnlpkZCR69+4NY2NjWFlZYcSIEcjMzNR0Wa1eVFSUdDsLalg3btzAm2++CQsLCxgYGMDNzQ0nTpzQdFmtTmVlJT788EM4OTnBwMAAnTp1wpIlS+r8bSyq3cGDB/Hqq6/Czs4OMplM+o3Sx4QQWLBgAWxtbWFgYAA/Pz9cunRJM8W2AAxI9NQOHDiAKVOm4OjRo9izZw8ePnyIl19+GaWlpZourdU6fvw4Pv/8c/To0UPTpbQ6f/zxB/r37w9dXV38/PPPuHDhAlasWIG2bdtqurRWJzo6GmvWrMHq1auRnp6O6OhoLFu2DP/97381XVqLVlpaCnd3d8TGxlY7fdmyZVi1ahXi4uKQkpICQ0ND+Pv748GDB01cacvAy/ypwdy6dQtWVlY4cOAABgwYoOlyWp2SkhL06tUL//vf//DRRx/Bw8MDMTExmi6r1Zg3bx4OHz6MQ4cOabqUVu9f//oXrK2t8cUXX0htAQEBMDAwwDfffKPByloPmUyGbdu2STdIFkLAzs4Os2bNwuzZswEARUVFsLa2xoYNGzBmzBgNVts88QgSNZiioiIAgLm5uYYraZ2mTJmCoUOHws/PT9OltEo7duyAl5cXXn/9dVhZWaFnz55Yt26dpstqlfr164ekpCRcvHgRAHD69Gn89ttveOWVVzRcWeuVnZ2NvLw8pf2HqakpvL29kZycrMHKmi+N30mbWgeFQoGZM2eif//+Snc1p4axadMmnDx5EsePH9d0Ka3W77//jjVr1iAsLAzvv/8+jh8/junTp0MulyM4OFjT5bUq8+bNQ3FxMVxcXKCtrY3Kykp8/PHHCAwM1HRprVZeXh4ASL9S8Zi1tbU0jZQxIFGDmDJlCs6dO4fffvtN06W0OteuXcOMGTOwZ88epZ/LoYalUCjg5eWFpUuXAgB69uyJc+fOIS4ujgGpgX333XfYuHEj4uPj0a1bN6SlpWHmzJmws7PjtqZmg1+x0d82depU7Ny5E/v27UOHDh00XU6rk5qaioKCAvTq1Qs6OjrQ0dHBgQMHsGrVKujo6KCyslLTJbYKtra26Nq1q1Kbq6srcnJyNFRR6zVnzhzMmzcPY8aMgZubG9566y2EhoZKP0pODc/GxgYAkJ+fr9Sen58vTSNlDEj01IQQmDp1KrZt24a9e/fCyclJ0yW1Sr6+vjh79izS0tKkh5eXFwIDA5GWlgZtbW1Nl9gq9O/fv8ptKi5evAgHBwcNVdR63b9/X+lHyAFAW1sbCoVCQxW1fk5OTrCxsUFSUpLUVlxcjJSUFPj4+GiwsuaLX7HRU5syZQri4+Pxww8/wNjYWPoe29TUFAYGBhqurvUwNjaucl6XoaEhLCwseL5XAwoNDUW/fv2wdOlSvPHGGzh27BjWrl2LtWvXarq0VufVV1/Fxx9/jGeeeQbdunXDqVOnsHLlSkycOFHTpbVoJSUluHz5svQ8OzsbaWlpMDc3xzPPPIOZM2fio48+grOzM5ycnPDhhx/Czs5OutKNVAiipwSg2sf69es1XVqrN3DgQDFjxgxNl9Hq/Pjjj6J79+5CT09PuLi4iLVr12q6pFapuLhYzJgxQzzzzDNCX19fdOzYUXzwwQeirKxM06W1aPv27at2nxwcHCyEEEKhUIgPP/xQWFtbCz09PeHr6ysyMzM1W3QzxvsgEREREangOUhEREREKhiQiIiIiFQwIBERERGpYEAiIiIiUsGARERERKSCAYmIiIhIBQMSERERkQoGJCIiIiIV/7iAdOXKFchkMqSlpWm6FElGRgb69u0LfX19eHh4aLqcVmPDhg0wMzPTdBkaJZPJsH379r81RnPZjvV5nwghMHnyZJibm0vv80GDBmHmzJlNWmtjGT9+fJ0/C7F//37IZDIUFhY2ai2HDx+Gm5sbdHV1+VMVf4MmP5Pq83qqy/379xEQEAATE5Mmed01pSYPSOPHj4dMJkNUVJRS+/bt2yGTyZq6nGYhIiIChoaGyMzMVPohwX+y5vKhDDRMyNCU3NxcvPLKK5ouo0HU532SmJiIDRs2YOfOncjNzUX37t2xdetWLFmy5G8tu7m8Bj777DNs2LBBel5d+OvXrx9yc3NhamraqLWEhYXBw8MD2dnZSjW1NM1pX9MSffXVVzh06BCOHDnSJK+7pqSRI0j6+vqIjo7GH3/8oYnFN4ry8vKnnjcrKwvPPfccHBwcYGFh0YBV0T+djY0N9PT0NF1Gg6jP+yQrKwu2trbo168fbGxsoKOjA3NzcxgbG9c47t957zY1U1PTOj/M5XI5bGxsGv1/OLOysvDiiy+iQ4cOTx0wWtK2p+plZWXB1dUV3bt3b5LXXZNq6h9/Cw4OFv/617+Ei4uLmDNnjtS+bds28WQ5ERERwt3dXWneTz/9VDg4OCiNNXz4cPHxxx8LKysrYWpqKhYtWiQePnwoZs+eLdq2bSvat28vvvzyS2me7OxsAUB8++23wsfHR+jp6Ylu3bqJ/fv3Ky3r7NmzYvDgwcLQ0FBYWVmJN998U9y6dUuaPnDgQDFlyhQxY8YMYWFhIQYNGlTt+lZWVopFixaJ9u3bC7lcLtzd3cXPP/8sTYfKjwpGRETUOE50dLTo1KmTkMvlwt7eXnz00UfS9DNnzogXXnhB6OvrC3NzcxESEiLu3bvXINsqISFBPPfcc0JfX194eXmJzMxMcezYMeHp6SkMDQ3F4MGDRUFBgVK969atEy4uLkJPT0906dJFxMbGVhl3y5YtYtCgQcLAwED06NFDHDlyRAhR/Q8uPt4usbGxonPnzkJPT09YWVmJgICAareXEEKsX79emJqaim3btknzvPzyyyInJ0ep3/bt20XPnj2Fnp6ecHJyEgsXLhQPHz4UQgjh4OCgVIeDg4MoLCwUWlpa4vjx49K/Tdu2bYW3t7c05v/7f/9PdOjQQXqek5MjXn/9dWFqairatm0rhg0bJrKzsxtsm9UEgNi2bZtaY6xfv17Y29sLAwMDMWLECLF8+XJhampa7222aNEiYWtrK27fvi31HzJkiBg0aJCorKysts6GeJ8EBwdX+bcSouoP+zo4OIjFixeLt956SxgbG4vg4GBRVlYmpkyZImxsbISenp545plnxNKlS6X+1Y2rqr77lv3794vevXsLuVwubGxsxNy5c6VtJ4QQmzdvFt27d5fey76+vqKkpERax+HDh1e7vgBEdna29P75448/RFFRkdDX1xc//fSTUg1bt24VRkZGorS0VAhRv9en6no++Xj8A9V1rVtN+8269rd17f/ee+894ezsLAwMDISTk5OYP3++KC8vl6anpaWJQYMGCSMjI2FsbCx69eoljh8/Xuu+RtXly5fFsGHDhJWVlTA0NBReXl5iz549Sn0cHBzExx9/LCZMmCCMjIyEvb29+Pzzz5X6pKSkCA8PD6Gnpyc8PT3F1q1bBQBx6tSpapcrhBAPHjwQs2bNEnZ2dqJNmzaiT58+Yt++fdL0x/u6xMRE4eLiIgwNDYW/v7+4efOm1KeiokKEhoYKU1NTYW5uLubMmSOCgoKk11NNvv/+e9G1a1chl8uFg4ODWL58uTRt4MCBSttu4MCBNY6zY8cO4eXlJfT09ISFhYUYMWKENO3u3bvirbfeEmZmZsLAwEAMHjxYXLx4sd7r98svvwg9PT3xxx9/KC1z+vTp4oUXXpCeHzp0SPos69Chg5g2bZr03hKi6r5BIwFp+PDhYuvWrUJfX19cu3ZNCPH0AcnY2FhMmTJFZGRkiC+++EIAEP7+/uLjjz8WFy9eFEuWLBG6urrSch6/uTt06CC+//57ceHCBfH2228LY2NjaYf+xx9/iHbt2onw8HCRnp4uTp48KV566SWlDT1w4EBhZGQk5syZIzIyMkRGRka167ty5UphYmIivv32W5GRkSHee+89oaurK/3j5+bmim7duolZs2aJ3NxcpVDzpPfee0+0bdtWbNiwQVy+fFkcOnRIrFu3TgghRElJibC1tRWvvfaaOHv2rEhKShJOTk7SLzj/3W3l4uIiEhMTxYULF0Tfvn2Fp6enGDRokPjtt9/EyZMnRefOncU777wjLeubb74Rtra2YsuWLeL3338XW7ZsEebm5mLDhg1Vxt25c6fIzMwUo0aNEg4ODuLhw4eirKxMxMTECBMTE5Gbmyttl+PHjwttbW0RHx8vrly5Ik6ePCk+++yz6l9o4tGbSldXV3h5eYkjR46IEydOiD59+oh+/fpJfQ4ePChMTEzEhg0bRFZWlti9e7dwdHQUCxcuFEIIUVBQIH0A5ObmSkGwV69e4pNPPhFCPNr5mpubC7lcLv37vf322yIwMFAIIUR5eblwdXUVEydOFGfOnBEXLlwQ48aNE126dJF+vfzvbrOaVBeQahvj6NGjQktLS0RHR4vMzEzx2WefCTMzM6WAVNc2q6ioED4+PtIOcPXq1cLMzExcvXq1xjob4n1SWFgoFi9eLDp06KD0b1VdQDIxMRHLly8Xly9fFpcvXxaffPKJsLe3FwcPHhRXrlwRhw4dEvHx8bW+BlTVZ99y/fp10aZNG/Gf//xHpKeni23btglLS0vpQ/nmzZtCR0dHrFy5UmRnZ4szZ86I2NhYaX2fDEiFhYXCx8dHhISESO+TiooKpYAkhBCjRo0Sb775plKtAQEBUlt9Xp9PqqioELm5ucLExETExMSI3Nxccf/+/TrX7fG/hep+sz7729r2f0IIsWTJEnH48GGRnZ0tduzYIaytrUV0dLQ0vVu3buLNN98U6enp4uLFi+K7774TaWlpNe5rqpOWlibi4uLE2bNnxcWLF8X8+fOFvr6+0uvawcFBmJubi9jYWHHp0iURGRkptLS0pM+He/fuiXbt2olx48aJc+fOiR9//FF07NixzoD09ttvi379+omDBw9Kr1c9PT3p/fF4X+fn5yeOHz8uUlNThaurqxg3bpw0RnR0tGjbtq3YsmWLuHDhgpg0aZIwNjauNSCdOHFCaGlpicWLF4vMzEyxfv16YWBgIAXiO3fuiJCQEOHj4yNyc3PFnTt3qh1n586dQltbWyxYsEBcuHBBpKWlSf8DIoQQw4YNE66uruLgwYMiLS1N+Pv7i86dO0sht671q6ioENbW1uL//u//pDFV2y5fviwMDQ3Fp59+Ki5evCgOHz4sevbsKcaPH6/07/fkvkFjAUkIIfr27SsmTpwohHj6gOTg4KD0f6VdunQRzz//vPS8oqJCGBoaim+//VYI8ddOLCoqSurz8OFD0aFDB+kNtWTJEvHyyy8rLfvatWsCgMjMzBRCPHqj9+zZs871tbOzEx9//LFSW+/evcV//vMf6bm7u3uN/9cihBDFxcVCT09PaYfwpLVr14q2bdsqJeFdu3YJLS0tkZeXJ4T4e9vqyRfdt99+KwCIpKQkqS0yMlJ06dJFet6pUyfpw+WxJUuWCB8fnxrHPX/+vAAg0tPThRB//R/Dk7Zs2SJMTExEcXFxjdvqSevXrxcAxNGjR6W29PR0AUCkpKQIIYTw9fVVeqMK8ejoj62trfT8yZDxWFhYmBg6dKgQQoiYmBgxevRopaMenTt3FmvXrpXG69Kli1AoFNL8ZWVlwsDAQPzyyy9CiIbZZtWpLiDVNsbYsWPFkCFDlMYYPXq00r9FfbZZVlaWMDY2FnPnzhUGBgZi48aNNdYoRMO8T4Souo8QovqA9OT/vQohxLRp08SLL76o9G/0pOpeA6rqs295//33q7wWYmNjhZGRkaisrBSpqakCgLhy5Uq1y3hy/1ndugkhqgSkbdu2KR0tenxU6fFrtT6vz+qYmppKH5T1WbfH9aruN+va39a1/6vOJ598Ijw9PaXnxsbG0v9sqKpuX1Nf3bp1E//973+l5w4ODkphVKFQCCsrK7FmzRohhBCff/65sLCwEH/++afUZ82aNbUGpKtXrwptbW1x48YNpXZfX18RHh4urQMAcfnyZWl6bGyssLa2lp7b2tqKZcuWSc8fvzZrC0jjxo0TL730klLbnDlzRNeuXaXnM2bMqPXIkRBC+Pj4SP/DqOrixYsCgDh8+LDUdvv2bWFgYCC+++67eq/fjBkzxIsvvig9Vz2qNGnSJDF58mSlZR86dEhoaWlJ/x6q+waNXsUWHR2Nr776Cunp6U89Rrdu3aCl9ddqWFtbw83NTXqura0NCwsLFBQUKM3n4+Mj/a2jowMvLy+pjtOnT2Pfvn0wMjKSHi4uLgAefd/6mKenZ621FRcX4+bNm+jfv79Se//+/dVa5/T0dJSVlcHX17fG6e7u7jA0NFRahkKhQGZmptT2tNuqR48eSvMAUJrP2tpamqe0tBRZWVmYNGmS0vb76KOPlLad6ri2trYAUGXZT3rppZfg4OCAjh074q233sLGjRtx//79GvsDj/5te/fuLT13cXGBmZmZ0r/14sWLlWoNCQlBbm5urWMPHDgQv/32GyorK3HgwAEMGjQIgwYNwv79+3Hz5k1cvnwZgwYNkpZx+fJlGBsbS8swNzfHgwcPkJWV1ajbrDq1jZGeng5vb2+l/k++V+q7zTp27Ijly5cjOjoaw4YNw7hx42qsp6HeJ+rw8vJSej5+/HikpaWhS5cumD59Onbv3v3UY9e2b0lPT4ePj4/SeRr9+/dHSUkJrl+/Dnd3d/j6+sLNzQ2vv/461q1b97fP1RwyZAh0dXWxY8cOAMCWLVtgYmICPz8/AHW/PuurrnV7THW/Wdf+tq79HwAkJCSgf//+sLGxgZGREebPn4+cnBxpelhYGN5++234+fkhKipKrfV6rKSkBLNnz4arqyvMzMxgZGSE9PR0peUAyu8vmUwGGxsbpfdXjx49oK+vL/VRfX+pOnv2LCorK/Hss88qbaMDBw4orUebNm3QqVMn6bmtra203KKiIuTm5iq9tx+/NmuTnp5e7fvy0qVLqKysrHXeJ6WlpdX6+aWjo6NUm4WFBbp06aL0/q9t/QAgMDBQ2v8CwMaNGzF06FDp/LjTp09jw4YNStvQ398fCoUC2dnZ0jhPbhOdeq9hIxgwYAD8/f0RHh6O8ePHK03T0tKCEEKp7eHDh1XG0NXVVXouk8mqbVMoFPWuq6SkBK+++iqio6OrTHv8gQJAKZA0JgMDgwYZ52m31ZN9Hu/8VNsez1NSUgIAWLduXZUPWm1t7TrHre3fydjYGCdPnsT+/fuxe/duLFiwAAsXLsTx48ef+iTRkpISLFq0CK+99lqVaU/uxFQNGDAA9+7dw8mTJ3Hw4EEsXboUNjY2iIqKgru7O+zs7ODs7Cwtw9PTExs3bqwyTrt27Rp1m1Xn745R32128OBBaGtr48qVK6ioqICOjkZ3N0pU37u9evVCdnY2fv75Z/z6669444034Ofnh++//75J69LW1saePXtw5MgR7N69G//973/xwQcfICUlBU5OTk81plwux6hRoxAfH48xY8YgPj4eo0ePlv496np9NjTVbV/X/vb333+vdbzk5GQEBgZi0aJF8Pf3h6mpKTZt2oQVK1ZIfRYuXIhx48Zh165d+PnnnxEREYFNmzZh5MiR9a579uzZ2LNnD5YvX47OnTvDwMAAo0aNqnKi+d/9/FFVUlICbW1tpKamVtkfGBkZ1bpc1c9QTWmIz7C61q93797o1KkTNm3ahHfffRfbtm1TurqypKQE//73vzF9+vQqYz/zzDPS30++PjV+H6SoqCj8+OOPSE5OVmpv164d8vLylDZAQ94n4ujRo9LfFRUVSE1NhaurK4BHO8vz58/D0dERnTt3VnqoE4pMTExgZ2eHw4cPK7UfPnwYXbt2rfc4zs7OMDAwqPHSZldXV5w+fRqlpaVKy9DS0kKXLl3qvZyGYG1tDTs7O/z+++9Vtp06O3i5XF7t/6Ho6OjAz88Py5Ytw5kzZ3DlyhXs3bu3xnEqKipw4sQJ6XlmZiYKCwuV/q0zMzOr1Nq5c2fpaJuurm6VWszMzNCjRw+sXr0aurq6cHFxwYABA3Dq1Cns3LkTAwcOlPr26tULly5dgpWVVZVlmJqaNtg2awiurq5ISUlRanvyvQLUb5slJCRg69at2L9/P3Jycmq9zL6h3id/l4mJCUaPHo1169YhISEBW7Zswd27dwFU/xqoSW37FldXVyQnJyvt1w4fPgxjY2N06NABwKMdf//+/bFo0SKcOnUKcrkc27Ztq3ZZNb1PVAUGBiIxMRHnz5/H3r17ERgYKE2r6/VZX/VZt+rUtb+ta/935MgRODg44IMPPoCXlxecnZ1x9erVKv2effZZhIaGYvfu3Xjttdewfv16APXfhocPH8b48eMxcuRIuLm5wcbGBleuXKlzvie5urrizJkzePDggdSm+v5S1bNnT1RWVqKgoKDK9rGxsanXck1NTWFra6v03n782qyr3urel88++2yVsFabHj161Pr5VVFRoVTbnTt3kJmZqfb7PzAwEBs3bsSPP/4ILS0tDB06VJrWq1cvXLhwodr9llwur3Y8jQckNzc3BAYGYtWqVUrtgwYNwq1bt7Bs2TJkZWUhNjYWP//8c4MtNzY2Ftu2bUNGRgamTJmCP/74AxMnTgQATJkyBXfv3sXYsWNx/PhxZGVl4ZdffsGECRPUOqwIAHPmzEF0dDQSEhKQmZmJefPmIS0tDTNmzKj3GPr6+pg7dy7ee+89fP3118jKysLRo0fxxRdfAHj0otDX10dwcDDOnTuHffv2Ydq0aXjrrbekr8Sa0qJFixAZGYlVq1bh4sWLOHv2LNavX4+VK1fWewxHR0eUlJQgKSkJt2/fxv3797Fz506sWrUKaWlpuHr1Kr7++msoFIpaQ6Curi6mTZuGlJQUpKamYvz48ejbty/69OkDAFiwYAG+/vprLFq0COfPn0d6ejo2bdqE+fPnK9WSlJSEvLw8pa87Bg0ahI0bN0phyNzcHK6urkhISFAKSIGBgbC0tMTw4cNx6NAhZGdnY//+/Zg+fbr01UNDbLOGMH36dCQmJmL58uW4dOkSVq9ejcTERKU+dW2z69ev491330V0dDSee+45rF+/HkuXLq31g6Ah3id/x8qVK/Htt98iIyMDFy9exObNm2FjYyMdmazpNVCd2vYt//nPf3Dt2jVMmzYNGRkZ+OGHHxAREYGwsDBoaWkhJSUFS5cuxYkTJ5CTk4OtW7fi1q1bUsBS5ejoiJSUFFy5cgW3b9+u8UjFgAEDYGNjg8DAQDg5OSkdqazP67M+6lq3mtS1v61r/+fs7IycnBxs2rQJWVlZWLVqlVKg/PPPPzF16lTs378fV69exeHDh3H8+HFpm1a3r6mOs7Mztm7dirS0NJw+fRrjxo1T+8jQuHHjIJPJEBISggsXLuCnn37C8uXLa53n2WefRWBgIIKCgrB161ZkZ2fj2LFjiIyMxK5du+q97BkzZiAqKgrbt29HRkYG/vOf/9R5U8dZs2YhKSkJS5YswcWLF/HVV19h9erVmD17dr2XCzy6h9m3336LiIgIpKen4+zZs9IRQ2dnZwwfPhwhISH47bffcPr0abz55pto3749hg8frtZyAgMDcfLkSXz88ccYNWqU0i1O5s6diyNHjmDq1KlIS0vDpUuX8MMPP2Dq1Kk1D1jrmVWNQPUkQyEendwol8uFajlr1qwR9vb2wtDQUAQFBYmPP/642sv8n1TdSYsODg7i008/lZYFQMTHx4s+ffoIuVwuunbtKvbu3as0z8WLF8XIkSOlyw5dXFzEzJkzpRMQq1tOdSorK8XChQtF+/btha6ubpXLl4Wo38mnlZWV4qOPPhIODg5CV1dX6TJkIep/mf/TbKsnTx5UPQFUiOpPcty4caPw8PAQcrlctG3bVgwYMEBs3bq1xnH/+OMPAUDp0tV33nlHWFhYSJfeHjp0SAwcOFC0bdtWukQ9ISGhxm32uK4tW7aIjh07Cj09PeHn51flaqrExETRr18/YWBgIExMTESfPn2kE6yFeHR5aufOnYWOjo7S6+/xhQWPT8AU4tGJggCqXNWYm5srgoKChKWlpdDT0xMdO3YUISEhoqioqEG3mSpUc5J2XWN88cUXokOHDsLAwEC8+uqr1V7mX9M2UygUwtfXV/j7+yudrDtt2jTRqVOnGq8Qaqj3SX1P0n78Gn9s7dq1wsPDQxgaGgoTExPh6+srTp48KU2v6TXwpPruW2q7FP7ChQvC399ftGvXTujp6Ylnn31W6SRg1fdxZmam6Nu3rzAwMKj2Mv8nvffeewKAWLBgQZXa6/P6VKV6knZd6yZEzfvNuva3de3/5syZIywsLISRkZEYPXq0+PTTT6XXbFlZmRgzZoywt7cXcrlc2NnZialTpyqdKK26r6lOdna2eOGFF4SBgYGwt7cXq1evrtdrS/V1m5ycLNzd3YVcLhceHh5iy5YtdV7FVl5eLhYsWCAcHR2Frq6usLW1FSNHjhRnzpwRQlS/D1a98Onhw4dixowZwsTERJiZmYmwsDC1LvN/vN0fX737WH1O0hbi0UU2j/dvlpaW4rXXXpOmPb7M39TUVBgYGAh/f/9qL/Ovbf0e69OnjwBQ5X0nhBDHjh0TL730kjAyMhKGhoaiR48eSheHqP77yYRoJl9SEhG1YFeuXIGTkxNOnTrFnwwiagU0/hUbERERUXPDgERERESkgl+xEREREangESQiIiIiFQxIRERERCoYkIiIiIhUMCARERERqWBAIiIiIlLBgERERESkggGJiIiISAUDEhEREZGK/w9A4xLLEROwnwAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.517605633802817, Median = 3.0\n",
+ "Accuracy 0.665476\n",
+ "Precision 0.662005\n",
+ "Recall 0.67619\n",
+ "FPR 0.345238\n",
+ "F1 0.669022\n",
+ "Mean H 3.517606\n",
+ "Correct Adjustment 0.067857\n",
+ "Incorrect Adjustment 0.07619\n",
+ "Recovery -0.008333\n",
+ "Leaderboard String | MODEL_NAME | 66.5 | 66.2 | 67.6 | 66....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 6\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:46<00:00, 109.30it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.6198630136986303, Median = 3.0\n",
+ "Accuracy 0.679762\n",
+ "Precision 0.674365\n",
+ "Recall 0.695238\n",
+ "FPR 0.335714\n",
+ "F1 0.684642\n",
+ "Mean H 3.619863\n",
+ "Correct Adjustment 0.055952\n",
+ "Incorrect Adjustment 0.069048\n",
+ "Recovery -0.013095\n",
+ "Leaderboard String | MODEL_NAME | 68.0 | 67.4 | 69.5 | 68....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 7\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:47<00:00, 108.73it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.867507886435331, Median = 4.0\n",
+ "Accuracy 0.663095\n",
+ "Precision 0.637827\n",
+ "Recall 0.754762\n",
+ "FPR 0.428571\n",
+ "F1 0.691385\n",
+ "Mean H 3.867508\n",
+ "Correct Adjustment 0.075\n",
+ "Incorrect Adjustment 0.072619\n",
+ "Recovery 0.002381\n",
+ "Leaderboard String | MODEL_NAME | 66.3 | 63.8 | 75.5 | 69....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 8\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:46<00:00, 109.62it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.5703971119133575, Median = 3.0\n",
+ "Accuracy 0.678571\n",
+ "Precision 0.685644\n",
+ "Recall 0.659524\n",
+ "FPR 0.302381\n",
+ "F1 0.67233\n",
+ "Mean H 3.570397\n",
+ "Correct Adjustment 0.05\n",
+ "Incorrect Adjustment 0.075\n",
+ "Recovery -0.025\n",
+ "Leaderboard String | MODEL_NAME | 67.9 | 68.6 | 66.0 | 67....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 9\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:46<00:00, 109.24it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.6156462585034017, Median = 3.0\n",
+ "Accuracy 0.659524\n",
+ "Precision 0.647577\n",
+ "Recall 0.7\n",
+ "FPR 0.380952\n",
+ "F1 0.672769\n",
+ "Mean H 3.615646\n",
+ "Correct Adjustment 0.053571\n",
+ "Incorrect Adjustment 0.058333\n",
+ "Recovery -0.004762\n",
+ "Leaderboard String | MODEL_NAME | 66.0 | 64.8 | 70.0 | 67....\n",
+ "dtype: object\n",
+ "Evaluating Random Seed 10\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 5131/5131 [00:46<00:00, 109.96it/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Horizon statistics (# of comments between first positive forecast and conversation end):\n",
+ "Mean = 3.6433566433566433, Median = 3.0\n",
+ "Accuracy 0.659524\n",
+ "Precision 0.652968\n",
+ "Recall 0.680952\n",
+ "FPR 0.361905\n",
+ "F1 0.666667\n",
+ "Mean H 3.643357\n",
+ "Correct Adjustment 0.042857\n",
+ "Incorrect Adjustment 0.061905\n",
+ "Recovery -0.019048\n",
+ "Leaderboard String | MODEL_NAME | 66.0 | 65.3 | 68.1 | 66....\n",
+ "dtype: object\n",
+ "{'Accuracy': np.float64(0.6685714285714287), 'Precision': np.float64(0.6662137103526862), 'Recall': np.float64(0.6785714285714287), 'FPR': np.float64(0.3414285714285714), 'F1': np.float64(0.6711682543124684), 'Mean H': np.float64(3.604889281050231), 'Correct Adjustment': np.float64(0.052380952380952375), 'Incorrect Adjustment': np.float64(0.06607142857142859), 'Recovery': np.float64(-0.013690476190476192), 'Leaderboard String': ['| MODEL_NAME | 68.3 | 67.5 | 70.7 | 69.1 | 34.0 | 3.55 | -1.7 (3.7 - 5.4) |', '| MODEL_NAME | 66.7 | 68.3 | 62.1 | 65.1 | 28.8 | 3.51 | -1.7 (5.0 - 6.7) |', '| MODEL_NAME | 68.0 | 67.0 | 70.7 | 68.8 | 34.8 | 3.61 | -1.9 (4.2 - 6.1) |', '| MODEL_NAME | 65.0 | 67.3 | 58.3 | 62.5 | 28.3 | 3.54 | -1.7 (5.0 - 6.7) |', '| MODEL_NAME | 66.5 | 66.2 | 67.6 | 66.9 | 34.5 | 3.52 | -0.8 (6.8 - 7.6) |', '| MODEL_NAME | 68.0 | 67.4 | 69.5 | 68.5 | 33.6 | 3.62 | -1.3 (5.6 - 6.9) |', '| MODEL_NAME | 66.3 | 63.8 | 75.5 | 69.1 | 42.9 | 3.87 | 0.2 (7.5 - 7.3) |', '| MODEL_NAME | 67.9 | 68.6 | 66.0 | 67.2 | 30.2 | 3.57 | -2.5 (5.0 - 7.5) |', '| MODEL_NAME | 66.0 | 64.8 | 70.0 | 67.3 | 38.1 | 3.62 | -0.5 (5.4 - 5.8) |', '| MODEL_NAME | 66.0 | 65.3 | 68.1 | 66.7 | 36.2 | 3.64 | -1.9 (4.3 - 6.2) |']}\n"
+ ]
+ }
+ ],
+ "source": [
+ "all_results = {}\n",
+ "for seed in range(1,11):\n",
+ " print(f\"Evaluating Random Seed {seed}\")\n",
+ " config_dict = {\n",
+ " \"output_dir\": f\"{YOUR_SAVING_DIRECTORY}/seed{seed}\", \n",
+ " \"context_mode\": \"normal\", # set to normal by default\n",
+ " \"device\": DEVICE\n",
+ " }\n",
+ " saved_model_path = os.path.join(forecasting_models_path, f'seed-{seed}')\n",
+ "\n",
+ " #Load pre-tuned config\n",
+ " tuned_config_file = os.path.join(saved_model_path, \"dev_config.json\")\n",
+ " with open(tuned_config_file, 'r') as file:\n",
+ " tuned_config = json.load(file)\n",
+ " \n",
+ " normal_bert = BERTCGAModel(os.path.join(saved_model_path, tuned_config['best_checkpoint']), config=config_dict)\n",
+ " normal_bert.best_threshold = tuned_config['best_threshold']\n",
+ " normal_bert_forecaster = Forecaster(normal_bert, label_metadata)\n",
+ "\n",
+ " # corpus = copy.deepcopy(corpus)\n",
+ " corpus = normal_bert_forecaster.transform(corpus, transform_selector)\n",
+ " _, cur_metrics= normal_bert_forecaster.summarize(corpus, lambda c: c.meta['split'] == \"test\")\n",
+ "\n",
+ " update_metrics(all_results, cur_metrics)\n",
+ "\n",
+ "for metric in all_results:\n",
+ " if metric == \"Leaderboard String\":\n",
+ " continue\n",
+ " all_results[metric] = sum(all_results[metric]) / len(all_results[metric])\n",
+ "\n",
+ "print(all_results)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"Accuracy\": 0.6685714285714287,\n",
+ " \"Precision\": 0.6662137103526862,\n",
+ " \"Recall\": 0.6785714285714287,\n",
+ " \"FPR\": 0.3414285714285714,\n",
+ " \"F1\": 0.6711682543124684,\n",
+ " \"Mean H\": 3.604889281050231,\n",
+ " \"Correct Adjustment\": 0.052380952380952375,\n",
+ " \"Incorrect Adjustment\": 0.06607142857142859,\n",
+ " \"Recovery\": -0.013690476190476192,\n",
+ " \"Leaderboard String\": \"| BERT-base | 66.9 | 66.6 | 67.9 | 67.1 | 34.1 | 3.60 | -1.4 (5.2 - 6.6) |\"\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "leaderboard_string = (f\"| BERT-base | \"\n",
+ " f\"{all_results['Accuracy']*100:.1f} | \"\n",
+ " f\"{all_results['Precision']*100:.1f} | \"\n",
+ " f\"{all_results['Recall']*100:.1f} | \"\n",
+ " f\"{all_results['F1']*100:.1f} | \"\n",
+ " f\"{all_results['FPR']*100:.1f} | \"\n",
+ " f\"{all_results['Mean H']:.2f} | \"\n",
+ " f\"{(all_results['Correct Adjustment']-all_results['Incorrect Adjustment'])*100:.1f} \"\n",
+ " f\"({all_results['Correct Adjustment']*100:.1f} - {all_results['Incorrect Adjustment']*100:.1f}) |\")\n",
+ "all_results['Leaderboard String'] = leaderboard_string\n",
+ "print(json.dumps(all_results, indent=4))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "sqt_env",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/setup.py b/setup.py
index 8cf22b4e..13418dde 100644
--- a/setup.py
+++ b/setup.py
@@ -74,6 +74,7 @@
],
extras_require={
"craft": ["torch>=0.12"],
+ "forecaster": ["torch>=0.12", "datasets"],
},
classifiers=[
"Programming Language :: Python",