fix args & args loading, add train/inference scripts

koustuvsinha · koustuvsinha · commit c1b2f8b52d67 · 2020-05-16T20:41:57.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,12 @@
+1.0.1 (May 16, 2020)
+
+- Uploaded training / testing data
+- Uploaded pre-trained DistilBERT embeddings
+- Fixed requirements
+- Minor fixes to arg loading and paths
+
+Thanks to Prakhar Gupta for pointing out the issues in the codebase!
+
 1.0.0 (April 15, 2020)
 
 Initial Public Release of MaUde, an unreferenced metric for online dialog evaluation, to appear in ACL 2020
diff --git a/README.md b/README.md
@@ -4,9 +4,20 @@
 
 Contains code of the paper titled _"Learning an Unreferenced Metric for Online Dialogue Evaluation"_ to appear at **ACL 2020**, [Arxiv](https://arxiv.org/abs/2005.00583)
 
+## Installation
+
+- `pip install -r requirements.txt`
+- Install [ParlAI](https://github.com/facebookresearch/ParlAI#installing-parlai)
+
 ## Getting the data
 
-To get the trained models, [download the data from here](https://drive.google.com/file/d/1Ysso9hdzSenK13LjOFombyXYqA_kv-Vy/view?usp=sharing).
+- Get the `convai2` train and test data and pre-trained Distilbert [embeddings here](https://drive.google.com/file/d/1VVcsxmUrDSRIfunPWe9UO1aeCz-lITNy/view?usp=sharing). Download and unzip in the folder `convai2_data`.
+- Get the trained model checkpoints [from here](https://drive.google.com/file/d/1Ysso9hdzSenK13LjOFombyXYqA_kv-Vy/view?usp=sharing). Download and unzip into the folder `full_acl_runs`.
+- For individual licensing reasons we cannot release the train/test data of MultiWoz, Frames and DailyDialog. Please [send me a mail](mailto:koustuv.sinha@mail.mcgill.ca) if you need them!
+- Run inference using `./run_inference.sh`
+
+**N.B.** - For model names and checkpoints, please refer to `run_inference.sh` script.
+
 
 ## Computing Backtranslation
 
@@ -38,6 +49,8 @@ For baselines, add the appropriate flag:
 --train_baseline [infersent/ruber/bertnli]
 ```
 
+An example training script is provided at [`run_training.sh`](run_training.sh)
+
 ## Inference Script
 
 ```
@@ -48,7 +61,8 @@ For baselines, add the appropriate flag:
     --test_column true_response --results_file "results.jsonl"
 ```
 
-Outputs the results in a `jsonl` file. To measure human correaltion with [See et al 2019](https://parl.ai/projects/controllable_dialogue/), specify `--human_eval` flag and `--human_eval_file` location.
+- Outputs the results in a `jsonl` file. To measure human correaltion with [See et al 2019](https://parl.ai/projects/controllable_dialogue/), specify `--human_eval` flag and `--human_eval_file` location.
+- We have also added the script to run inference on our trained checkpoints - [`run_inference.sh`](run_inference.sh).
 
 ## Acknowledgements
 
@@ -60,6 +74,11 @@ Outputs the results in a `jsonl` file. To measure human correaltion with [See et
 - ParlAI - https://parl.ai/
 - See et al 2019 data - https://parl.ai/projects/controllable_dialogue/
 
+## Questions
+
+- Please send a mail to [koustuv.sinha@mail.mcgill.ca](mailto:koustuv.sinha@mail.mcgill.ca) for questions / clarifications.
+- Open an Issue
+
 ## Citation
 
 If our work is useful for your research, consider citing it using the following bibtex:
diff --git a/args.py b/args.py
@@ -130,9 +130,9 @@ def get_args(command=None):
     )
     parser.add_argument(
         "--logger_dir",
-        default="/private/home/koustuvs/mlp/latentDialogAnalysis/logs/",
+        default="./",
         type=str,
-        help="batch size",
+        help="log directory (must be created)",
     )
     parser.add_argument("--log_interval", default=100, type=int, help="log interval")
     parser.add_argument(
diff --git a/codes/inference.py b/codes/inference.py
@@ -7,6 +7,9 @@
 #
 """
 # File to run various inferences
+import sys
+import os
+sys.path.append(os.getcwd())
 import torch
 from args import get_args
 from logbook.logbook import LogBook
diff --git a/data.py b/data.py
@@ -15,7 +15,7 @@
 import random
 from parlai.core.params import ParlaiParser
 from parlai.agents.repeat_label.repeat_label import RepeatLabelAgent
-from parlai_internal.agents.ir_baseline.ir_baseline import IrBaselineAgent
+from parlai.agents.ir_baseline.ir_baseline import IrBaselineAgent
 from parlai.core.worlds import create_task
 from sklearn.decomposition import PCA
 import numpy as np
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,9 @@
-scikit-learn
-numpy
-matplotlib
-pytorch_transformers
-nltk
+torch==1.2.0
+scikit-learn==0.21.2
+numpy==1.16.4
+matplotlib==3.1.1
+pytorch-lightning==0.5.2.1
+transformers==2.1.1
+nltk==3.4.5
+wandb==0.8.5
+PyYAML==5.1.1
diff --git a/run_inference.sh b/run_inference.sh
diff --git a/run_training.sh b/run_training.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+BATCH_SIZE=64
+MODEL_SAVE_DIR=full_acl_runs/
+DATA_NAME=convai2
+DATA_LOC=convai2_data/
+FINE_TUNE_MODEL=/convai2_data/distilbert_lm
+TRAIN_MODE=nce
+NUM_GPUS=8
+# Model
+# python codes/trainer.py --mode train --batch_size 64 --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --corrupt_type all_context
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --corrupt_type all
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --corrupt_type only_semantics
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --corrupt_type only_syntax
+# InferSent Baseline
+python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline infersent --corrupt_type all_context
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline infersent --corrupt_type all
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline infersent --corrupt_type only_semantics
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline infersent --corrupt_type only_syntax
+# BertNLI baseline
+python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline bertnli --corrupt_type all_context
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline bertnli --corrupt_type all
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline bertnli --corrupt_type only_semantics
+# python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR  --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --use_cluster --use_ddp --per_experiment_nb_gpus $NUM_GPUS --train_baseline bertnli --corrupt_type only_syntax
+python codes/trainer.py --mode train --batch_size $BATCH_SIZE --model_save_dir $MODEL_SAVE_DIR --data_name $DATA_NAME --data_loc $DATA_LOC --fine_tune_model $FINE_TUNE_MODEL --learn_down True --downsample True --down_dim 300 --optim adam,lr=0.0001 --dropout 0.2 --decoder_hidden 200 --load_fine_tuned --train_mode $TRAIN_MODE --gpus 0 --train_baseline bertnli --corrupt_type all_context

Original file line number	Diff line number	Diff line change
`@@ -130,9 +130,9 @@ def get_args(command=None):`
`130`	`130`	`)`
`131`	`131`	`parser.add_argument(`
`132`	`132`	`"--logger_dir",`
`133`		`- default="/private/home/koustuvs/mlp/latentDialogAnalysis/logs/",`
	`133`	`+ default="./",`
`134`	`134`	`type=str,`
`135`		`- help="batch size",`
	`135`	`+ help="log directory (must be created)",`
`136`	`136`	`)`
`137`	`137`	`parser.add_argument("--log_interval", default=100, type=int, help="log interval")`
`138`	`138`	`parser.add_argument(`