diff --git a/.gitignore b/.gitignore
index 70702445..70255b20 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,7 @@ _build/
 mydask.png
 dataframes/data
 .idea/
+.devcontainer/
+.data/
+.vector_cache/
+__pycache__
\ No newline at end of file
diff --git a/machine-learning/Untitled1.ipynb b/machine-learning/Untitled1.ipynb
deleted file mode 100644
index 2fd64429..00000000
--- a/machine-learning/Untitled1.ipynb
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/machine-learning/model.py b/machine-learning/model.py
new file mode 100644
index 00000000..6f48270d
--- /dev/null
+++ b/machine-learning/model.py
@@ -0,0 +1,53 @@
+# more details can be found here: https://github.com/bentrevett/pytorch-sentiment-analysis/blob/master/4%20-%20Convolutional%20Sentiment%20Analysis.ipynb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils.rnn import pad_sequence
+import torchtext
+import numpy as np
+
+
+class CNN(nn.Module):
+    def __init__(self, n_filters=100, filter_sizes=(2,3,4), output_dim=2, dropout=0.2, pretrained_embeddings=None, TEXT=None):
+        
+        super().__init__()
+        self.TEXT = TEXT
+        # will be used to initialize model embeddings layer
+        self.embedding = nn.Embedding.from_pretrained(pretrained_embeddings)
+        self.embedding.weight.requires_grad = False # save some computation
+        embedding_dim = self.embedding.embedding_dim
+        self.conv_0 = nn.Conv1d(in_channels = 1, 
+                                out_channels = n_filters, 
+                                kernel_size = (filter_sizes[0], embedding_dim))
+        self.conv_1 = nn.Conv1d(in_channels = 1, 
+                                out_channels = n_filters, 
+                                kernel_size = (filter_sizes[1], embedding_dim))
+        self.conv_2 = nn.Conv1d(in_channels = 1, 
+                                out_channels = n_filters, 
+                                kernel_size = (filter_sizes[2], embedding_dim))
+        self.fc = nn.Linear(len(filter_sizes) * n_filters, 2)
+        self.dropout = nn.Dropout(dropout)
+        
+    def forward(self, text):
+#         # bit of a hack to preprocess data inside the network
+#         if isinstance(text, np.ndarray):
+#             text = self.TEXT.process(text)
+        
+        #text = [batch size, sent len]
+        embedded = self.embedding(text)
+        #embedded = [batch size, sent len, emb dim]
+        embedded = embedded.unsqueeze(1)
+        #embedded = [batch size, 1, sent len, emb dim]
+        conved_0 = F.relu(self.conv_0(embedded).squeeze(3))
+        conved_1 = F.relu(self.conv_1(embedded).squeeze(3))
+        conved_2 = F.relu(self.conv_2(embedded).squeeze(3))
+        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
+        pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
+        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
+        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
+        #pooled_n = [batch size, n_filters]
+        cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim = 1))
+        #cat = [batch size, n_filters * len(filter_sizes)]
+        logits = self.fc(cat)
+        #logits = [batch_size, output_dim]
+        return F.softmax(logits, dim=-1)
\ No newline at end of file
diff --git a/machine-learning/skorch-hyperparam-opt.ipynb b/machine-learning/skorch-hyperparam-opt.ipynb
new file mode 100644
index 00000000..d551c689
--- /dev/null
+++ b/machine-learning/skorch-hyperparam-opt.ipynb
@@ -0,0 +1,1784 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install -q dask_cuda torch torchtext skorch\n",
+    "# !pip -q install dask[dataframe] --upgrade"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Hyperparameter optimization with Skorch\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup Dask Cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import random\n",
+    "import time\n",
+    "\n",
+    "import dask.array as da\n",
+    "from dask_cuda import LocalCUDACluster\n",
+    "from dask_ml.model_selection import HyperbandSearchCV\n",
+    "from distributed import Client\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import loguniform\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.model_selection import RandomizedSearchCV\n",
+    "import skorch\n",
+    "from skorch import NeuralNetClassifier\n",
+    "from skorch.helper import SliceDataset\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "import torchtext\n",
+    "from torchtext import data\n",
+    "from torchtext import datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table style=\"border: 2px solid white;\">\n",
+       "<tr>\n",
+       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
+       "<h3 style=\"text-align: left;\">Client</h3>\n",
+       "<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
+       "  <li><b>Scheduler: </b>tcp://127.0.0.1:43999</li>\n",
+       "  <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a></li>\n",
+       "</ul>\n",
+       "</td>\n",
+       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
+       "<h3 style=\"text-align: left;\">Cluster</h3>\n",
+       "<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
+       "  <li><b>Workers: </b>1</li>\n",
+       "  <li><b>Cores: </b>1</li>\n",
+       "  <li><b>Memory: </b>31.63 GB</li>\n",
+       "</ul>\n",
+       "</td>\n",
+       "</tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "<Client: 'tcp://127.0.0.1:43999' processes=1 threads=1, memory=31.63 GB>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# if you have GPU(s), use dask_cuda to automatically make use of them in your dask cluster\n",
+    "if torch.cuda.is_available():\n",
+    "    cluster = LocalCUDACluster()\n",
+    "    client = Client(cluster)\n",
+    "else:\n",
+    "    client = Client(processes=False, threads_per_worker=4,\n",
+    "                    n_workers=1, memory_limit='2GB')\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for reproducibility\n",
+    "# NB: enabling reproducibility can significantly slow down runtimes\n",
+    "reproducible = False\n",
+    "if reproducible:\n",
+    "    SEED = 42\n",
+    "    random.seed(SEED)\n",
+    "    np.random.seed(SEED)\n",
+    "    torch.manual_seed(SEED)\n",
+    "    torch.backends.cudnn.deterministic = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this solves many of our later problems but isn't an ideal solution\n",
+    "# accuracy will take a hit\n",
+    "FIX_LENGTH = 512"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# a few seconds to download IMDB dataset (84Mb, will be cached)\n",
+    "# approx. 10 minutes to download glove embeddings (862Mb, will be cached)\n",
+    "\n",
+    "# set up fields\n",
+    "TEXT = data.Field(lower=True, batch_first=True, fix_length=FIX_LENGTH)\n",
+    "LABEL = data.Field(sequential=False, unk_token=None)\n",
+    "\n",
+    "# make splits for data\n",
+    "train, test = datasets.IMDB.splits(TEXT, LABEL)\n",
+    "\n",
+    "# work with 5k datapoints for faster iteration times\n",
+    "split_ratio = 5_000 / len(train)\n",
+    "train, discard = train.split(split_ratio=split_ratio)\n",
+    "\n",
+    "split_ratio = 5_000 / len(test)\n",
+    "test, discard = test.split(split_ratio=split_ratio)\n",
+    "\n",
+    "# will be used to initialize model embeddings layer\n",
+    "vocab = torchtext.vocab.GloVe(name='6B', dim=100)\n",
+    "\n",
+    "# build the vocabulary\n",
+    "max_size = 25_000 # shorten for demonstrative purposes\n",
+    "TEXT.build_vocab(train, vectors=vocab, max_size=max_size)\n",
+    "LABEL.build_vocab(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['<unk>', '<pad>', 'the', 'and', 'a']"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# itos := index-to-string\n",
+    "# note the 2 extra tokens added for us: '<unk>', '<pad>'\n",
+    "TEXT.vocab.itos[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "defaultdict(None, {'pos': 0, 'neg': 1})"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# stoi := string-to-index\n",
+    "# check on the meaning of these zeroes and ones\n",
+    "LABEL.vocab.stoi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert (len(TEXT.vocab.itos) == max_size + 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['i', \"couldn't\", 'hold', 'back', 'the', 'tears', 'when', 'i', 'watched', 'this'] ...\n",
+      "\n",
+      "pos\n"
+     ]
+    }
+   ],
+   "source": [
+    "# peek at the data\n",
+    "print(train.examples[0].text[:10], '...')\n",
+    "print()\n",
+    "print(train.examples[0].label)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# custom dataset class required to work with Skorch\n",
+    "class TorchDataset(Dataset):\n",
+    "    def __init__(self, dataset):\n",
+    "        self.dataset = dataset\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        example = self.dataset.examples[idx]\n",
+    "        return example.text, example.label\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = TorchDataset(train)\n",
+    "test_dataset = TorchDataset(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokens, label = train_dataset[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['i', \"couldn't\", 'hold', 'back', 'the', 'tears', 'when', 'i', 'watched', 'this'] ...\n",
+      "\n",
+      "pos\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tokens[:10], '...')\n",
+    "print()\n",
+    "print(label)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# custom collate function for DataLoader\n",
+    "def pad_batch(batch, TEXT, LABEL):\n",
+    "    text, label = list(zip(*batch))\n",
+    "    # numericalized and padded text representation\n",
+    "    text_processed = TEXT.process(text)\n",
+    "    label_processed = LABEL.process(label)\n",
+    "    return text_processed, label_processed\n",
+    "\n",
+    "from functools import partial\n",
+    "\n",
+    "pad_batch_partial = partial(pad_batch, TEXT=TEXT, LABEL=LABEL)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=pad_batch_partial)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch = next(iter(train_dataloader))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "processed_examples, labels = batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([   9,   20,    7, 3765,   23,    8,   54,  692,    2, 2384]) ...\n",
+      "\n",
+      "tensor(1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(processed_examples[0][:10], '...')\n",
+    "print()\n",
+    "print(labels[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define your network"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# would have defined the class in this notebook but was getting the following error from Hyperband\n",
+    "# PicklingError: Can't pickle <class '__main__.CNN'>: attribute lookup CNN on __main__ failed\n",
+    "from model import CNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[0.5000, 0.5000],\n",
+      "        [0.5174, 0.4826],\n",
+      "        [0.4599, 0.5401],\n",
+      "        [0.3472, 0.6528],\n",
+      "        [0.3888, 0.6112],\n",
+      "        [0.4259, 0.5741],\n",
+      "        [0.3734, 0.6266],\n",
+      "        [0.3527, 0.6473],\n",
+      "        [0.4275, 0.5725],\n",
+      "        [0.4277, 0.5723],\n",
+      "        [0.5281, 0.4719],\n",
+      "        [0.4183, 0.5817],\n",
+      "        [0.4409, 0.5591],\n",
+      "        [0.4205, 0.5795],\n",
+      "        [0.4820, 0.5180],\n",
+      "        [0.3552, 0.6448],\n",
+      "        [0.3843, 0.6157],\n",
+      "        [0.3047, 0.6953],\n",
+      "        [0.5312, 0.4688],\n",
+      "        [0.4069, 0.5931],\n",
+      "        [0.3691, 0.6309],\n",
+      "        [0.3541, 0.6459],\n",
+      "        [0.2763, 0.7237],\n",
+      "        [0.4770, 0.5230],\n",
+      "        [0.3749, 0.6251],\n",
+      "        [0.4165, 0.5835],\n",
+      "        [0.4208, 0.5792],\n",
+      "        [0.5268, 0.4732],\n",
+      "        [0.4046, 0.5954],\n",
+      "        [0.5047, 0.4953],\n",
+      "        [0.3795, 0.6205],\n",
+      "        [0.4030, 0.5970]], device='cuda:0', grad_fn=<SoftmaxBackward>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# smoketest\n",
+    "model = CNN(pretrained_embeddings=TEXT.vocab.vectors).to(device)\n",
+    "gpu_batch = batch[0].to(device)\n",
+    "model_out = model(gpu_batch)\n",
+    "print(model_out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del gpu_batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "torch.cuda.empty_cache()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Quick attempt at model training to debug any issues"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# if you want to use a custom DataLoader, you must use NeuralNet\n",
+    "# also, not immediately obvious that for NeuralNet you are responsible for applying the log function\n",
+    "# whereas for NeuralNetClassifier, you are not\n",
+    "\n",
+    "# NB: not ideal to be using softmax + log + NLLLoss\n",
+    "# see discussion: https://github.com/skorch-dev/skorch/issues/637\n",
+    "skorch_model = NeuralNetClassifier(\n",
+    "                CNN,\n",
+    "                device=device,\n",
+    "                max_epochs=2,\n",
+    "                lr=0.001,\n",
+    "                optimizer=optim.Adam,\n",
+    "                criterion=nn.NLLLoss,\n",
+    "                iterator_train=DataLoader,\n",
+    "                iterator_train__shuffle=True,\n",
+    "                iterator_train__batch_size=32,\n",
+    "                iterator_train__collate_fn=pad_batch_partial,\n",
+    "                iterator_train__num_workers=8,\n",
+    "                iterator_valid=DataLoader,\n",
+    "                iterator_valid__shuffle=False,\n",
+    "                iterator_valid__batch_size=64,\n",
+    "                iterator_valid__collate_fn=pad_batch_partial,\n",
+    "                iterator_valid__num_workers=8,\n",
+    "                train_split=skorch.dataset.CVSplit(.2), # NB: this witholds 20% of the training data for validation\n",
+    "                module__n_filters=100,\n",
+    "                module__filter_sizes=(2,3,4),\n",
+    "                module__dropout=0.2,\n",
+    "                module__pretrained_embeddings=TEXT.vocab.vectors,\n",
+    "                verbose=2)\n",
+    "# getting the following error when trying to compute accuracy\n",
+    "# ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets\n",
+    "#                 callbacks=callbacks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  epoch    train_loss    valid_acc    valid_loss     dur\n",
+      "-------  ------------  -----------  ------------  ------\n",
+      "      1        \u001b[36m0.6258\u001b[0m       \u001b[32m0.7930\u001b[0m        \u001b[35m0.5002\u001b[0m  2.0929\n",
+      "      2        \u001b[36m0.4405\u001b[0m       \u001b[32m0.8250\u001b[0m        \u001b[35m0.3986\u001b[0m  1.9010\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n",
+       "  module_=CNN(\n",
+       "    (embedding): Embedding(25002, 100)\n",
+       "    (conv_0): Conv1d(1, 100, kernel_size=(2, 100), stride=(1,))\n",
+       "    (conv_1): Conv1d(1, 100, kernel_size=(3, 100), stride=(1,))\n",
+       "    (conv_2): Conv1d(1, 100, kernel_size=(4, 100), stride=(1,))\n",
+       "    (fc): Linear(in_features=300, out_features=2, bias=True)\n",
+       "    (dropout): Dropout(p=0.2, inplace=False)\n",
+       "  ),\n",
+       ")"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "skorch_model.fit(train_dataset, y=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://github.com/skorch-dev/skorch/issues/641\n",
+    "\n",
+    "# skorch_model.score(test_dataset)\n",
+    "# TypeError: score() missing 1 required positional argument: 'y'\n",
+    "# skorch_model.score(test_dataset, y=None)\n",
+    "# ValueError: Expected array-like (array or non-string sequence), got None\n",
+    "\n",
+    "# can monkey patch skorch_model to achieve native scoring\n",
+    "# def score(self, X, y=None): \n",
+    "#     ds = self.get_dataset(X) \n",
+    "#     target_iterator = self.get_iterator(ds, training=False) \n",
+    "    \n",
+    "#     y_true = np.concatenate([skorch.utils.to_numpy(y) for _, y in target_iterator]) \n",
+    "#     y_pred = self.predict(X)\n",
+    "    \n",
+    "#     return accuracy_score(y_true, y_pred) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# score manually\n",
+    "test_dataloader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False, collate_fn=pad_batch_partial, num_workers=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8006\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test set accuracy\n",
+    "test_preds = skorch_model.predict(test_dataset)\n",
+    "processed_test_data = next(iter(test_dataloader))\n",
+    "test_labels = processed_test_data[1].numpy()\n",
+    "print(accuracy_score(test_labels, test_preds))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([0, 1]), array([2554, 2446]))"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# random guessing would 50% accuracy so the model is indeed training well\n",
+    "np.unique(test_labels, return_counts=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NB: this has no effect on GPU memory usage. If I keyboard interrupt, the workers get\n",
+    "# restarted and memory usage goes down. Deleting these \"handler\" objects doesn't delete\n",
+    "# GPU memory references on the workers. \n",
+    "# del skorch_model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Grid search with Skorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define parameter grid\n",
+    "params = {'module__filter_sizes': [(1, 2, 3), (2, 3, 4), (3, 4, 5)], \n",
+    "          'module__n_filters': [25, 50, 100],\n",
+    "          'module__dropout': loguniform(1e-1, 3e-1),\n",
+    "          'batch_size': [32, 64],\n",
+    "         }\n",
+    "\n",
+    "skorch_search = RandomizedSearchCV(skorch_model, params, n_iter=2, cv=5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This errors out with: TypeError: fit() missing 1 required positional argument: 'y'\n",
+    "# skorch_search.fit(train_dataset, y=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://github.com/skorch-dev/skorch/issues/605#issuecomment-650580286"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ValueError: Dataset does not have consistent lengths.\n",
+    "# dummy_y = np.zeros((len(train_dataset)))\n",
+    "# skorch_search.fit(train_dataset, y=dummy_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ValueError: Dataset does not have consistent lengths.\n",
+    "# y = torch.cat([LABEL.process([pair[1]]) for pair in train_dataset]).numpy()\n",
+    "# skorch_search.fit(train_dataset, y=y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ValueError: Dataset does not have consistent lengths.\n",
+    "# skorch_search.fit(train_dataset, y=SliceDataset(train_dataset, idx=1))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Grid search with Hyperband"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is a really unfortunate hack to make deep learning batching semantics work with `Skorch` and `Dask`. The downside here is that we're no longer padding to the longest sequence in the batch, rather we're padding to the longest sequence in the *dataset*, which results in signifcantly more computation and thus significantly more time to train a model.\n",
+    "\n",
+    "Our solution was to set a max sequence length but that's not an ideal solution since you're still performing extra computation and accuracy does suffer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# train=True shuffles the data\n",
+    "train_iter_skorch = torchtext.data.Iterator(train, batch_size=len(train), train=True, sort=False, device='cpu')\n",
+    "test_iter_skorch = torchtext.data.Iterator(test, batch_size=len(test), train=False, sort=False, device='cpu')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# takes some time to numericalize the whole dataset\n",
+    "\n",
+    "# also notice that skorch and dask expect numpy arrays, which isn't ideal since it ties you to the cpu.\n",
+    "# meanwhile, projects like https://rapids.ai/ are moving toward all GPU computation, avoiding the cpu altogether.\n",
+    "for batch in train_iter_skorch:\n",
+    "    X_train = batch.text.numpy()\n",
+    "    y_train = batch.label.numpy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for batch in test_iter_skorch:\n",
+    "    X_test = batch.text.numpy()\n",
+    "    y_test = batch.label.numpy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(5000, 512)"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# notice how awfully large the second dimension is\n",
+    "X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# https://ml.dask.org/hyper-parameter-search.html#hyperband-parameters-rule-of-thumb\n",
+    "EPOCHS = 10\n",
+    "NUM_TRAINING_EXAMPLES = len(train)*.8\n",
+    "n_examples = EPOCHS * NUM_TRAINING_EXAMPLES\n",
+    "n_params = 12\n",
+    "\n",
+    "# it's not immediately obvious to beginners how all these parameters interact with each other\n",
+    "max_iter = n_params\n",
+    "chunk_size = n_examples // n_params"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# suppose we want to set max_iter to be the commensurate with the number of examples required\n",
+    "# for the model converge (as cited in the documentation)\n",
+    "\n",
+    "# it's a bit unclear how n_params relates to BOTH the number of data points required\n",
+    "# for the model to converge AND how many hyperparameters to try out (i.e. n_iter in RandomizedSearchCV)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chunk size: 3333.0\n",
+      "Total chunks: 2\n",
+      "Last chunk size: 1667.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# choose chunk size so that the remainder is not a tiny number\n",
+    "print(f'Chunk size: {chunk_size}')\n",
+    "print(f'Total chunks: {math.ceil(len(train) / chunk_size)}')\n",
+    "last_chunk_size = len(train) % chunk_size\n",
+    "if last_chunk_size == 0: # i.e. chunk_size evenly divides X_train\n",
+    "    last_chunk_size = chunk_size\n",
+    "print(f'Last chunk size: {last_chunk_size}')\n",
+    "\n",
+    "assert (len(train) % chunk_size > 10 or len(train) % chunk_size == 0), 'Choose another chunk size'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = da.from_array(X_train, chunks=(chunk_size, X_train.shape[-1]))\n",
+    "y = da.from_array(y_train, chunks=(chunk_size))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<tr>\n",
+       "<td>\n",
+       "<table>\n",
+       "  <thead>\n",
+       "    <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr><th> Bytes </th><td> 20.48 MB </td> <td> 13.65 MB </td></tr>\n",
+       "    <tr><th> Shape </th><td> (5000, 512) </td> <td> (3333, 512) </td></tr>\n",
+       "    <tr><th> Count </th><td> 3 Tasks </td><td> 2 Chunks </td></tr>\n",
+       "    <tr><th> Type </th><td> int64 </td><td> numpy.ndarray </td></tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</td>\n",
+       "<td>\n",
+       "<svg width=\"88\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"38\" y2=\"0\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"0\" y1=\"79\" x2=\"38\" y2=\"79\" />\n",
+       "  <line x1=\"0\" y1=\"120\" x2=\"38\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"38\" y1=\"0\" x2=\"38\" y2=\"120\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"0.000000,0.000000 38.727884,0.000000 38.727884,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Text -->\n",
+       "  <text x=\"19.363942\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >512</text>\n",
+       "  <text x=\"58.727884\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,58.727884,60.000000)\">5000</text>\n",
+       "</svg>\n",
+       "</td>\n",
+       "</tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "dask.array<array, shape=(5000, 512), dtype=int64, chunksize=(3333, 512), chunktype=numpy.ndarray>"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "TLDR; you can't use dask arrays with `torch.utils.data.Dataloader`, which means you have to do all your data preparation ahead of time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# raw_train_dataset = [x for x in train_dataset]\n",
+    "# raw_train_dataset_array = np.array(raw_train_dataset, dtype=object)\n",
+    "# dask_dataset = da.from_array(raw_train_dataset_array, chunks=(chunk_size))\n",
+    "# dask_dataset[0].compute()\n",
+    "\n",
+    "# TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'dask.array.core.Array'>\n",
+    "# data_iter = DataLoader(dask_dataset)\n",
+    "# next(iter(data_iter))\n",
+    "\n",
+    "# # TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found object\n",
+    "# np_data_iter = DataLoader(raw_train_dataset_array)\n",
+    "# next(iter(np_data_iter))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reinitialize and set train_split=None to let hyperband handle validation set splitting\n",
+    "skorch_model = NeuralNetClassifier(\n",
+    "                CNN,\n",
+    "                device=device,\n",
+    "                lr=0.001,\n",
+    "                optimizer=optim.Adam,\n",
+    "                criterion=nn.NLLLoss,\n",
+    "                iterator_train__batch_size=32,\n",
+    "                iterator_valid__batch_size=64,\n",
+    "                train_split=None, # let hyperband handle it\n",
+    "                module__n_filters=100,\n",
+    "                module__filter_sizes=(2, 3, 4),\n",
+    "                module__dropout=0.2,\n",
+    "                module__pretrained_embeddings=TEXT.vocab.vectors,\n",
+    "                # module__TEXT=TEXT,\n",
+    "                batch_size=32,\n",
+    "                verbose=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define parameter grid\n",
+    "params = {'module__filter_sizes': [(1, 2, 3), (2, 3, 4), (3, 4, 5)], \n",
+    "          'module__n_filters': [25, 50, 100],\n",
+    "          'module__dropout': loguniform(1e-1, 3e-1),\n",
+    "          'batch_size': [32, 64],\n",
+    "         }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "search = HyperbandSearchCV(\n",
+    "    skorch_model,\n",
+    "    params,\n",
+    "    max_iter=max_iter,\n",
+    "    verbose=True,\n",
+    "    test_size=0.2 # validation size\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "85"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.metadata[\"partial_fit_calls\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "17"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.metadata['n_models']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# to clear up any confusion, every time partial_fit is called, we're passing in chunk_size number of\n",
+    "# data points. Then skorch handles the batch size either by being set explicitly or as part of the param grid.\n",
+    "\n",
+    "# to compare this grid search to number of epochs, we have 26 partial_fit calls * 10k data points = 260k examples\n",
+    "# with a training set size of 25k * .8 = 20k data points, this is 13 epochs!\n",
+    "# considering that it takes approximately 5 epochs to train a model, you would get through less than 3 sets of \n",
+    "# hyperparameters if manually searching. Instead we'll search through ~5."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Running training on a Nvidia Tesla T4\\ "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV, bracket=2] creating 9 models\n",
+      "[CV, bracket=1] creating 5 models\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.7/site-packages/distributed/worker.py:3351: UserWarning: Large object of size 10.00 MB detected in task graph: \n",
+      "  [<class 'skorch.classifier.NeuralNetClassifier'>[u ... .0000]]),\n",
+      "), 0]\n",
+      "Consider scattering large objects ahead of time\n",
+      "with client.scatter to reduce scheduler burden and \n",
+      "keep data on workers\n",
+      "\n",
+      "    future = client.submit(func, big_data)    # bad\n",
+      "\n",
+      "    big_future = client.scatter(big_data)     # good\n",
+      "    future = client.submit(func, big_future)  # good\n",
+      "  % (format_bytes(len(b)), s)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV, bracket=0] creating 3 models\n",
+      "[CV, bracket=0] For training there are between 1333 and 2666 examples in each chunk\n",
+      "[CV, bracket=1] For training there are between 1333 and 2666 examples in each chunk\n",
+      "[CV, bracket=2] For training there are between 1333 and 2666 examples in each chunk\n",
+      "[CV, bracket=0] validation score of 0.7982 received after 1 partial_fit calls\n",
+      "[CV, bracket=1] validation score of 0.8032 received after 1 partial_fit calls\n",
+      "[CV, bracket=2] validation score of 0.7842 received after 1 partial_fit calls\n",
+      "[CV, bracket=0] validation score of 0.8551 received after 12 partial_fit calls\n",
+      "[CV, bracket=1] validation score of 0.8322 received after 4 partial_fit calls\n",
+      "[CV, bracket=2] validation score of 0.8212 received after 3 partial_fit calls\n",
+      "[CV, bracket=1] validation score of 0.8162 received after 12 partial_fit calls\n",
+      "[CV, bracket=2] validation score of 0.8062 received after 9 partial_fit calls\n",
+      "Time to complete grid search: 372.02 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "# notice how the number of training datapoints relates to the chunk size and our test_size\n",
+    "# Train set chunk size: 800 = 1000*(1-.2)\n",
+    "# Validation set chunk size: 200 = 1000*.2\n",
+    "start = time.time()\n",
+    "search.fit(X, y)\n",
+    "end = time.time()\n",
+    "duration = round(end - start, 2)\n",
+    "print(f'Time to complete grid search: {duration} seconds')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Integration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`HyperbandSearchCV` follows the Scikit-learn API and mirrors Scikit-learn's `RandomizedSearchCV`. This means that it \"just works\". All the Scikit-learn attributes and methods are available:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8551448551448552"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<class 'skorch.classifier.NeuralNetClassifier'>[initialized](\n",
+       "  module_=CNN(\n",
+       "    (embedding): Embedding(25002, 100)\n",
+       "    (conv_0): Conv1d(1, 50, kernel_size=(1, 100), stride=(1,))\n",
+       "    (conv_1): Conv1d(1, 50, kernel_size=(2, 100), stride=(1,))\n",
+       "    (conv_2): Conv1d(1, 50, kernel_size=(3, 100), stride=(1,))\n",
+       "    (fc): Linear(in_features=150, out_features=2, bias=True)\n",
+       "    (dropout): Dropout(p=0.12476236679704862, inplace=False)\n",
+       "  ),\n",
+       ")"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.best_estimator_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'param_module__n_filters': array([100, 100,  25,  50,  25,  25,  25,  50,  50,  25, 100,  25, 100,\n",
+       "        100,  50, 100, 100]),\n",
+       " 'param_batch_size': array([32, 64, 64, 32, 32, 32, 32, 32, 32, 32, 64, 64, 64, 32, 32, 64, 32]),\n",
+       " 'param_module__dropout': array([0.14233276, 0.15454961, 0.273504  , 0.26444226, 0.13400896,\n",
+       "        0.19302409, 0.15142034, 0.23147478, 0.28516434, 0.15385363,\n",
+       "        0.18147304, 0.29957988, 0.10990513, 0.29003914, 0.12476237,\n",
+       "        0.14516028, 0.10459089]),\n",
+       " 'std_partial_fit_time': array([0.        , 1.42792781, 0.        , 0.        , 0.8889792 ,\n",
+       "        0.94623792, 0.        , 0.        , 0.        , 0.84567356,\n",
+       "        1.6097213 , 0.79452193, 1.30084145, 1.54377656, 0.04432201,\n",
+       "        0.26346576, 0.31667006]),\n",
+       " 'param_module__filter_sizes': array([[3, 4, 5],\n",
+       "        [2, 3, 4],\n",
+       "        [1, 2, 3],\n",
+       "        [2, 3, 4],\n",
+       "        [2, 3, 4],\n",
+       "        [3, 4, 5],\n",
+       "        [1, 2, 3],\n",
+       "        [3, 4, 5],\n",
+       "        [1, 2, 3],\n",
+       "        [2, 3, 4],\n",
+       "        [2, 3, 4],\n",
+       "        [1, 2, 3],\n",
+       "        [1, 2, 3],\n",
+       "        [3, 4, 5],\n",
+       "        [1, 2, 3],\n",
+       "        [3, 4, 5],\n",
+       "        [3, 4, 5]]),\n",
+       " 'test_score': array([0.73626374, 0.80619381, 0.77822178, 0.77922078, 0.80619381,\n",
+       "        0.78821179, 0.75224775, 0.77322677, 0.77322677, 0.80819181,\n",
+       "        0.81718282, 0.7982018 , 0.82817183, 0.81618382, 0.85514486,\n",
+       "        0.84615385, 0.84515485]),\n",
+       " 'model_id': array(['bracket=2-0', 'bracket=2-1', 'bracket=2-2', 'bracket=2-3',\n",
+       "        'bracket=2-4', 'bracket=2-5', 'bracket=2-6', 'bracket=2-7',\n",
+       "        'bracket=2-8', 'bracket=1-0', 'bracket=1-1', 'bracket=1-2',\n",
+       "        'bracket=1-3', 'bracket=1-4', 'bracket=0-0', 'bracket=0-1',\n",
+       "        'bracket=0-2'], dtype='<U11'),\n",
+       " 'bracket': array([2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0]),\n",
+       " 'rank_test_score': array([9, 1, 5, 4, 1, 3, 8, 6, 6, 4, 2, 5, 1, 3, 1, 2, 3]),\n",
+       " 'std_score_time': array([0.        , 0.00367966, 0.        , 0.        , 0.00024688,\n",
+       "        0.01542246, 0.        , 0.        , 0.        , 0.00037813,\n",
+       "        0.00196266, 0.0007596 , 0.00154793, 0.0058749 , 0.00411808,\n",
+       "        0.01123738, 0.00584662]),\n",
+       " 'params': array([{'batch_size': 32, 'module__dropout': 0.14233276095945843, 'module__filter_sizes': (3, 4, 5), 'module__n_filters': 100},\n",
+       "        {'batch_size': 64, 'module__dropout': 0.15454961327883227, 'module__filter_sizes': (2, 3, 4), 'module__n_filters': 100},\n",
+       "        {'batch_size': 64, 'module__dropout': 0.2735040006927046, 'module__filter_sizes': (1, 2, 3), 'module__n_filters': 25},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.2644422586210435, 'module__filter_sizes': (2, 3, 4), 'module__n_filters': 50},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.13400896038886034, 'module__filter_sizes': (2, 3, 4), 'module__n_filters': 25},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.1930240908469314, 'module__filter_sizes': (3, 4, 5), 'module__n_filters': 25},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.1514203365611017, 'module__filter_sizes': (1, 2, 3), 'module__n_filters': 25},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.23147478210838274, 'module__filter_sizes': (3, 4, 5), 'module__n_filters': 50},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.2851643369137857, 'module__filter_sizes': (1, 2, 3), 'module__n_filters': 50},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.15385363146857528, 'module__filter_sizes': (2, 3, 4), 'module__n_filters': 25},\n",
+       "        {'batch_size': 64, 'module__dropout': 0.18147304066270142, 'module__filter_sizes': (2, 3, 4), 'module__n_filters': 100},\n",
+       "        {'batch_size': 64, 'module__dropout': 0.29957987553007753, 'module__filter_sizes': (1, 2, 3), 'module__n_filters': 25},\n",
+       "        {'batch_size': 64, 'module__dropout': 0.10990512595594133, 'module__filter_sizes': (1, 2, 3), 'module__n_filters': 100},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.2900391400227389, 'module__filter_sizes': (3, 4, 5), 'module__n_filters': 100},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.12476236679704862, 'module__filter_sizes': (1, 2, 3), 'module__n_filters': 50},\n",
+       "        {'batch_size': 64, 'module__dropout': 0.14516028378189222, 'module__filter_sizes': (3, 4, 5), 'module__n_filters': 100},\n",
+       "        {'batch_size': 32, 'module__dropout': 0.10459089167362097, 'module__filter_sizes': (3, 4, 5), 'module__n_filters': 100}],\n",
+       "       dtype=object),\n",
+       " 'mean_partial_fit_time': array([3.08231282, 4.20376865, 1.61370659, 1.77556491, 2.55069685,\n",
+       "        2.75940621, 1.6858294 , 1.94340801, 1.67727423, 2.52923703,\n",
+       "        4.61230862, 2.45016968, 3.72088039, 4.44598476, 3.63528967,\n",
+       "        6.53302658, 6.50973403]),\n",
+       " 'mean_score_time': array([0.16500354, 0.17686621, 0.05370951, 0.07551599, 0.06296408,\n",
+       "        0.08132923, 0.06409264, 0.09241557, 0.07140708, 0.0622654 ,\n",
+       "        0.17359829, 0.05686355, 0.13362586, 0.17294081, 0.06504714,\n",
+       "        0.17865801, 0.17236865]),\n",
+       " 'partial_fit_calls': array([ 1,  9,  1,  1,  3,  3,  1,  1,  1,  4,  4,  4,  4, 12, 12, 12, 12])}"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.cv_results_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data must be 1-dimensional\n"
+     ]
+    }
+   ],
+   "source": [
+    "# issue with numpy converting array of tuples into a 2d array\n",
+    "try:    \n",
+    "    cv_results = pd.DataFrame(search.cv_results_)\n",
+    "    cv_results.head()\n",
+    "except Exception as e:\n",
+    "    print(e)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# issue with numpy converting array of tuples into a 2d array\n",
+    "search.cv_results_['param_module__filter_sizes'] = search.cv_results_['param_module__filter_sizes'].tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>param_module__n_filters</th>\n",
+       "      <th>param_batch_size</th>\n",
+       "      <th>param_module__dropout</th>\n",
+       "      <th>std_partial_fit_time</th>\n",
+       "      <th>param_module__filter_sizes</th>\n",
+       "      <th>test_score</th>\n",
+       "      <th>model_id</th>\n",
+       "      <th>bracket</th>\n",
+       "      <th>rank_test_score</th>\n",
+       "      <th>std_score_time</th>\n",
+       "      <th>params</th>\n",
+       "      <th>mean_partial_fit_time</th>\n",
+       "      <th>mean_score_time</th>\n",
+       "      <th>partial_fit_calls</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>100</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.142333</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[3, 4, 5]</td>\n",
+       "      <td>0.736264</td>\n",
+       "      <td>bracket=2-0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>{'batch_size': 32, 'module__dropout': 0.142332...</td>\n",
+       "      <td>3.082313</td>\n",
+       "      <td>0.165004</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>100</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0.154550</td>\n",
+       "      <td>1.427928</td>\n",
+       "      <td>[2, 3, 4]</td>\n",
+       "      <td>0.806194</td>\n",
+       "      <td>bracket=2-1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.003680</td>\n",
+       "      <td>{'batch_size': 64, 'module__dropout': 0.154549...</td>\n",
+       "      <td>4.203769</td>\n",
+       "      <td>0.176866</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>25</td>\n",
+       "      <td>64</td>\n",
+       "      <td>0.273504</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[1, 2, 3]</td>\n",
+       "      <td>0.778222</td>\n",
+       "      <td>bracket=2-2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>{'batch_size': 64, 'module__dropout': 0.273504...</td>\n",
+       "      <td>1.613707</td>\n",
+       "      <td>0.053710</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>50</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.264442</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>[2, 3, 4]</td>\n",
+       "      <td>0.779221</td>\n",
+       "      <td>bracket=2-3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>{'batch_size': 32, 'module__dropout': 0.264442...</td>\n",
+       "      <td>1.775565</td>\n",
+       "      <td>0.075516</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>25</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.134009</td>\n",
+       "      <td>0.888979</td>\n",
+       "      <td>[2, 3, 4]</td>\n",
+       "      <td>0.806194</td>\n",
+       "      <td>bracket=2-4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000247</td>\n",
+       "      <td>{'batch_size': 32, 'module__dropout': 0.134008...</td>\n",
+       "      <td>2.550697</td>\n",
+       "      <td>0.062964</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   param_module__n_filters  param_batch_size  param_module__dropout  \\\n",
+       "0                      100                32               0.142333   \n",
+       "1                      100                64               0.154550   \n",
+       "2                       25                64               0.273504   \n",
+       "3                       50                32               0.264442   \n",
+       "4                       25                32               0.134009   \n",
+       "\n",
+       "   std_partial_fit_time param_module__filter_sizes  test_score     model_id  \\\n",
+       "0              0.000000                  [3, 4, 5]    0.736264  bracket=2-0   \n",
+       "1              1.427928                  [2, 3, 4]    0.806194  bracket=2-1   \n",
+       "2              0.000000                  [1, 2, 3]    0.778222  bracket=2-2   \n",
+       "3              0.000000                  [2, 3, 4]    0.779221  bracket=2-3   \n",
+       "4              0.888979                  [2, 3, 4]    0.806194  bracket=2-4   \n",
+       "\n",
+       "   bracket  rank_test_score  std_score_time  \\\n",
+       "0        2                9        0.000000   \n",
+       "1        2                1        0.003680   \n",
+       "2        2                5        0.000000   \n",
+       "3        2                4        0.000000   \n",
+       "4        2                1        0.000247   \n",
+       "\n",
+       "                                              params  mean_partial_fit_time  \\\n",
+       "0  {'batch_size': 32, 'module__dropout': 0.142332...               3.082313   \n",
+       "1  {'batch_size': 64, 'module__dropout': 0.154549...               4.203769   \n",
+       "2  {'batch_size': 64, 'module__dropout': 0.273504...               1.613707   \n",
+       "3  {'batch_size': 32, 'module__dropout': 0.264442...               1.775565   \n",
+       "4  {'batch_size': 32, 'module__dropout': 0.134008...               2.550697   \n",
+       "\n",
+       "   mean_score_time  partial_fit_calls  \n",
+       "0         0.165004                  1  \n",
+       "1         0.176866                  9  \n",
+       "2         0.053710                  1  \n",
+       "3         0.075516                  1  \n",
+       "4         0.062964                  3  "
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cv_results = pd.DataFrame(search.cv_results_)\n",
+    "cv_results.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8106"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "<tr>\n",
+       "<td>\n",
+       "<table>\n",
+       "  <thead>\n",
+       "    <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr><th> Bytes </th><td> 40.00 kB </td> <td> 40.00 kB </td></tr>\n",
+       "    <tr><th> Shape </th><td> (5000,) </td> <td> (5000,) </td></tr>\n",
+       "    <tr><th> Count </th><td> 2 Tasks </td><td> 1 Chunks </td></tr>\n",
+       "    <tr><th> Type </th><td> int64 </td><td> numpy.ndarray </td></tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</td>\n",
+       "<td>\n",
+       "<svg width=\"170\" height=\"75\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
+       "\n",
+       "  <!-- Horizontal lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"0\" y1=\"25\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Vertical lines -->\n",
+       "  <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"25\" style=\"stroke-width:2\" />\n",
+       "  <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"25\" style=\"stroke-width:2\" />\n",
+       "\n",
+       "  <!-- Colored Rectangle -->\n",
+       "  <polygon points=\"0.000000,0.000000 120.000000,0.000000 120.000000,25.412617 0.000000,25.412617\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
+       "\n",
+       "  <!-- Text -->\n",
+       "  <text x=\"60.000000\" y=\"45.412617\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >5000</text>\n",
+       "  <text x=\"140.000000\" y=\"12.706308\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(0,140.000000,12.706308)\">1</text>\n",
+       "</svg>\n",
+       "</td>\n",
+       "</tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "dask.array<_predict, shape=(5000,), dtype=int64, chunksize=(5000,), chunktype=numpy.ndarray>"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 1, 0, ..., 1, 0, 1])"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.predict(X_test).compute()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It also has some other attributes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>model_id</th>\n",
+       "      <th>params</th>\n",
+       "      <th>partial_fit_calls</th>\n",
+       "      <th>partial_fit_time</th>\n",
+       "      <th>score</th>\n",
+       "      <th>score_time</th>\n",
+       "      <th>elapsed_wall_time</th>\n",
+       "      <th>bracket</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>bracket=0-0</td>\n",
+       "      <td>{'batch_size': 32, 'module__dropout': 0.124762...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.679612</td>\n",
+       "      <td>0.798202</td>\n",
+       "      <td>0.069165</td>\n",
+       "      <td>30.430613</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>bracket=0-1</td>\n",
+       "      <td>{'batch_size': 64, 'module__dropout': 0.145160...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6.269561</td>\n",
+       "      <td>0.791209</td>\n",
+       "      <td>0.167421</td>\n",
+       "      <td>30.430616</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>bracket=0-2</td>\n",
+       "      <td>{'batch_size': 32, 'module__dropout': 0.104590...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6.193064</td>\n",
+       "      <td>0.789211</td>\n",
+       "      <td>0.166522</td>\n",
+       "      <td>30.430617</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>bracket=1-0</td>\n",
+       "      <td>{'batch_size': 32, 'module__dropout': 0.153853...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.683563</td>\n",
+       "      <td>0.787213</td>\n",
+       "      <td>0.062644</td>\n",
+       "      <td>36.104408</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>bracket=1-1</td>\n",
+       "      <td>{'batch_size': 64, 'module__dropout': 0.181473...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.002587</td>\n",
+       "      <td>0.803197</td>\n",
+       "      <td>0.171636</td>\n",
+       "      <td>36.104410</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      model_id                                             params  \\\n",
+       "0  bracket=0-0  {'batch_size': 32, 'module__dropout': 0.124762...   \n",
+       "1  bracket=0-1  {'batch_size': 64, 'module__dropout': 0.145160...   \n",
+       "2  bracket=0-2  {'batch_size': 32, 'module__dropout': 0.104590...   \n",
+       "3  bracket=1-0  {'batch_size': 32, 'module__dropout': 0.153853...   \n",
+       "4  bracket=1-1  {'batch_size': 64, 'module__dropout': 0.181473...   \n",
+       "\n",
+       "   partial_fit_calls  partial_fit_time     score  score_time  \\\n",
+       "0                  1          3.679612  0.798202    0.069165   \n",
+       "1                  1          6.269561  0.791209    0.167421   \n",
+       "2                  1          6.193064  0.789211    0.166522   \n",
+       "3                  1          1.683563  0.787213    0.062644   \n",
+       "4                  1          3.002587  0.803197    0.171636   \n",
+       "\n",
+       "   elapsed_wall_time  bracket  \n",
+       "0          30.430613        0  \n",
+       "1          30.430616        0  \n",
+       "2          30.430617        0  \n",
+       "3          36.104408        1  \n",
+       "4          36.104410        1  "
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hist = pd.DataFrame(search.history_)\n",
+    "hist.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This illustrates the history after every `partial_fit` call. There's also an attributed `model_history_` that records the history for each model (it's a reorganization of `history_`)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Learn more"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook covered basic usage `HyperbandSearchCV`. The following documentation and resources might be useful to learn more about `HyperbandSearchCV`, including some of the finer use cases:\n",
+    "\n",
+    "* [A talk](https://www.youtube.com/watch?v=x67K9FiPFBQ) introducing `HyperbandSearchCV` to the SciPy 2019 audience and the [corresponding paper](https://conference.scipy.org/proceedings/scipy2019/pdfs/scott_sievert.pdf)\n",
+    "* [HyperbandSearchCV's documentation](https://ml.dask.org/modules/generated/dask_ml.model_selection.HyperbandSearchCV.html)\n",
+    "\n",
+    "Performance comparisons can be found in the SciPy 2019 talk/paper."
+   ]
+  }
+ ],
+ "metadata": {
+  "environment": {
+   "name": "pytorch-gpu.1-4.m46",
+   "type": "gcloud",
+   "uri": "gcr.io/deeplearning-platform-release/pytorch-gpu.1-4:m46"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}