diff --git a/.gitignore b/.gitignore index 70702445..70255b20 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,7 @@ _build/ mydask.png dataframes/data .idea/ +.devcontainer/ +.data/ +.vector_cache/ +__pycache__ \ No newline at end of file diff --git a/machine-learning/Untitled1.ipynb b/machine-learning/Untitled1.ipynb deleted file mode 100644 index 2fd64429..00000000 --- a/machine-learning/Untitled1.ipynb +++ /dev/null @@ -1,6 +0,0 @@ -{ - "cells": [], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/machine-learning/model.py b/machine-learning/model.py new file mode 100644 index 00000000..6f48270d --- /dev/null +++ b/machine-learning/model.py @@ -0,0 +1,53 @@ +# more details can be found here: https://github.com/bentrevett/pytorch-sentiment-analysis/blob/master/4%20-%20Convolutional%20Sentiment%20Analysis.ipynb +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.utils.rnn import pad_sequence +import torchtext +import numpy as np + + +class CNN(nn.Module): + def __init__(self, n_filters=100, filter_sizes=(2,3,4), output_dim=2, dropout=0.2, pretrained_embeddings=None, TEXT=None): + + super().__init__() + self.TEXT = TEXT + # will be used to initialize model embeddings layer + self.embedding = nn.Embedding.from_pretrained(pretrained_embeddings) + self.embedding.weight.requires_grad = False # save some computation + embedding_dim = self.embedding.embedding_dim + self.conv_0 = nn.Conv1d(in_channels = 1, + out_channels = n_filters, + kernel_size = (filter_sizes[0], embedding_dim)) + self.conv_1 = nn.Conv1d(in_channels = 1, + out_channels = n_filters, + kernel_size = (filter_sizes[1], embedding_dim)) + self.conv_2 = nn.Conv1d(in_channels = 1, + out_channels = n_filters, + kernel_size = (filter_sizes[2], embedding_dim)) + self.fc = nn.Linear(len(filter_sizes) * n_filters, 2) + self.dropout = nn.Dropout(dropout) + + def forward(self, text): +# # bit of a hack to preprocess data inside the network +# if isinstance(text, np.ndarray): +# text = self.TEXT.process(text) + + #text = [batch size, sent len] + embedded = self.embedding(text) + #embedded = [batch size, sent len, emb dim] + embedded = embedded.unsqueeze(1) + #embedded = [batch size, 1, sent len, emb dim] + conved_0 = F.relu(self.conv_0(embedded).squeeze(3)) + conved_1 = F.relu(self.conv_1(embedded).squeeze(3)) + conved_2 = F.relu(self.conv_2(embedded).squeeze(3)) + #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1] + pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2) + pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2) + pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2) + #pooled_n = [batch size, n_filters] + cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim = 1)) + #cat = [batch size, n_filters * len(filter_sizes)] + logits = self.fc(cat) + #logits = [batch_size, output_dim] + return F.softmax(logits, dim=-1) \ No newline at end of file diff --git a/machine-learning/skorch-hyperparam-opt.ipynb b/machine-learning/skorch-hyperparam-opt.ipynb new file mode 100644 index 00000000..d551c689 --- /dev/null +++ b/machine-learning/skorch-hyperparam-opt.ipynb @@ -0,0 +1,1784 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -q dask_cuda torch torchtext skorch\n", + "# !pip -q install dask[dataframe] --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hyperparameter optimization with Skorch\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Dask Cluster" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import random\n", + "import time\n", + "\n", + "import dask.array as da\n", + "from dask_cuda import LocalCUDACluster\n", + "from dask_ml.model_selection import HyperbandSearchCV\n", + "from distributed import Client\n", + "import numpy as np\n", + "import pandas as pd\n", + "from scipy.stats import loguniform\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "import skorch\n", + "from skorch import NeuralNetClassifier\n", + "from skorch.helper import SliceDataset\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torch.utils.data import Dataset, DataLoader\n", + "import torchtext\n", + "from torchtext import data\n", + "from torchtext import datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 1
  • \n", + "
  • Cores: 1
  • \n", + "
  • Memory: 31.63 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# if you have GPU(s), use dask_cuda to automatically make use of them in your dask cluster\n", + "if torch.cuda.is_available():\n", + " cluster = LocalCUDACluster()\n", + " client = Client(cluster)\n", + "else:\n", + " client = Client(processes=False, threads_per_worker=4,\n", + " n_workers=1, memory_limit='2GB')\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# for reproducibility\n", + "# NB: enabling reproducibility can significantly slow down runtimes\n", + "reproducible = False\n", + "if reproducible:\n", + " SEED = 42\n", + " random.seed(SEED)\n", + " np.random.seed(SEED)\n", + " torch.manual_seed(SEED)\n", + " torch.backends.cudnn.deterministic = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# this solves many of our later problems but isn't an ideal solution\n", + "# accuracy will take a hit\n", + "FIX_LENGTH = 512" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# a few seconds to download IMDB dataset (84Mb, will be cached)\n", + "# approx. 10 minutes to download glove embeddings (862Mb, will be cached)\n", + "\n", + "# set up fields\n", + "TEXT = data.Field(lower=True, batch_first=True, fix_length=FIX_LENGTH)\n", + "LABEL = data.Field(sequential=False, unk_token=None)\n", + "\n", + "# make splits for data\n", + "train, test = datasets.IMDB.splits(TEXT, LABEL)\n", + "\n", + "# work with 5k datapoints for faster iteration times\n", + "split_ratio = 5_000 / len(train)\n", + "train, discard = train.split(split_ratio=split_ratio)\n", + "\n", + "split_ratio = 5_000 / len(test)\n", + "test, discard = test.split(split_ratio=split_ratio)\n", + "\n", + "# will be used to initialize model embeddings layer\n", + "vocab = torchtext.vocab.GloVe(name='6B', dim=100)\n", + "\n", + "# build the vocabulary\n", + "max_size = 25_000 # shorten for demonstrative purposes\n", + "TEXT.build_vocab(train, vectors=vocab, max_size=max_size)\n", + "LABEL.build_vocab(train)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['', '', 'the', 'and', 'a']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# itos := index-to-string\n", + "# note the 2 extra tokens added for us: '', ''\n", + "TEXT.vocab.itos[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(None, {'pos': 0, 'neg': 1})" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# stoi := string-to-index\n", + "# check on the meaning of these zeroes and ones\n", + "LABEL.vocab.stoi" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "assert (len(TEXT.vocab.itos) == max_size + 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['i', \"couldn't\", 'hold', 'back', 'the', 'tears', 'when', 'i', 'watched', 'this'] ...\n", + "\n", + "pos\n" + ] + } + ], + "source": [ + "# peek at the data\n", + "print(train.examples[0].text[:10], '...')\n", + "print()\n", + "print(train.examples[0].label)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# custom dataset class required to work with Skorch\n", + "class TorchDataset(Dataset):\n", + " def __init__(self, dataset):\n", + " self.dataset = dataset\n", + "\n", + " def __getitem__(self, idx):\n", + " example = self.dataset.examples[idx]\n", + " return example.text, example.label\n", + " \n", + " def __len__(self):\n", + " return len(self.dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataset = TorchDataset(train)\n", + "test_dataset = TorchDataset(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "tokens, label = train_dataset[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['i', \"couldn't\", 'hold', 'back', 'the', 'tears', 'when', 'i', 'watched', 'this'] ...\n", + "\n", + "pos\n" + ] + } + ], + "source": [ + "print(tokens[:10], '...')\n", + "print()\n", + "print(label)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# custom collate function for DataLoader\n", + "def pad_batch(batch, TEXT, LABEL):\n", + " text, label = list(zip(*batch))\n", + " # numericalized and padded text representation\n", + " text_processed = TEXT.process(text)\n", + " label_processed = LABEL.process(label)\n", + " return text_processed, label_processed\n", + "\n", + "from functools import partial\n", + "\n", + "pad_batch_partial = partial(pad_batch, TEXT=TEXT, LABEL=LABEL)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=pad_batch_partial)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "batch = next(iter(train_dataloader))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "processed_examples, labels = batch" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([ 9, 20, 7, 3765, 23, 8, 54, 692, 2, 2384]) ...\n", + "\n", + "tensor(1)\n" + ] + } + ], + "source": [ + "print(processed_examples[0][:10], '...')\n", + "print()\n", + "print(labels[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define your network" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# would have defined the class in this notebook but was getting the following error from Hyperband\n", + "# PicklingError: Can't pickle : attribute lookup CNN on __main__ failed\n", + "from model import CNN" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[0.5000, 0.5000],\n", + " [0.5174, 0.4826],\n", + " [0.4599, 0.5401],\n", + " [0.3472, 0.6528],\n", + " [0.3888, 0.6112],\n", + " [0.4259, 0.5741],\n", + " [0.3734, 0.6266],\n", + " [0.3527, 0.6473],\n", + " [0.4275, 0.5725],\n", + " [0.4277, 0.5723],\n", + " [0.5281, 0.4719],\n", + " [0.4183, 0.5817],\n", + " [0.4409, 0.5591],\n", + " [0.4205, 0.5795],\n", + " [0.4820, 0.5180],\n", + " [0.3552, 0.6448],\n", + " [0.3843, 0.6157],\n", + " [0.3047, 0.6953],\n", + " [0.5312, 0.4688],\n", + " [0.4069, 0.5931],\n", + " [0.3691, 0.6309],\n", + " [0.3541, 0.6459],\n", + " [0.2763, 0.7237],\n", + " [0.4770, 0.5230],\n", + " [0.3749, 0.6251],\n", + " [0.4165, 0.5835],\n", + " [0.4208, 0.5792],\n", + " [0.5268, 0.4732],\n", + " [0.4046, 0.5954],\n", + " [0.5047, 0.4953],\n", + " [0.3795, 0.6205],\n", + " [0.4030, 0.5970]], device='cuda:0', grad_fn=)\n" + ] + } + ], + "source": [ + "# smoketest\n", + "model = CNN(pretrained_embeddings=TEXT.vocab.vectors).to(device)\n", + "gpu_batch = batch[0].to(device)\n", + "model_out = model(gpu_batch)\n", + "print(model_out)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "del model" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "del gpu_batch" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "torch.cuda.empty_cache()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Quick attempt at model training to debug any issues" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# if you want to use a custom DataLoader, you must use NeuralNet\n", + "# also, not immediately obvious that for NeuralNet you are responsible for applying the log function\n", + "# whereas for NeuralNetClassifier, you are not\n", + "\n", + "# NB: not ideal to be using softmax + log + NLLLoss\n", + "# see discussion: https://github.com/skorch-dev/skorch/issues/637\n", + "skorch_model = NeuralNetClassifier(\n", + " CNN,\n", + " device=device,\n", + " max_epochs=2,\n", + " lr=0.001,\n", + " optimizer=optim.Adam,\n", + " criterion=nn.NLLLoss,\n", + " iterator_train=DataLoader,\n", + " iterator_train__shuffle=True,\n", + " iterator_train__batch_size=32,\n", + " iterator_train__collate_fn=pad_batch_partial,\n", + " iterator_train__num_workers=8,\n", + " iterator_valid=DataLoader,\n", + " iterator_valid__shuffle=False,\n", + " iterator_valid__batch_size=64,\n", + " iterator_valid__collate_fn=pad_batch_partial,\n", + " iterator_valid__num_workers=8,\n", + " train_split=skorch.dataset.CVSplit(.2), # NB: this witholds 20% of the training data for validation\n", + " module__n_filters=100,\n", + " module__filter_sizes=(2,3,4),\n", + " module__dropout=0.2,\n", + " module__pretrained_embeddings=TEXT.vocab.vectors,\n", + " verbose=2)\n", + "# getting the following error when trying to compute accuracy\n", + "# ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets\n", + "# callbacks=callbacks)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " epoch train_loss valid_acc valid_loss dur\n", + "------- ------------ ----------- ------------ ------\n", + " 1 \u001b[36m0.6258\u001b[0m \u001b[32m0.7930\u001b[0m \u001b[35m0.5002\u001b[0m 2.0929\n", + " 2 \u001b[36m0.4405\u001b[0m \u001b[32m0.8250\u001b[0m \u001b[35m0.3986\u001b[0m 1.9010\n" + ] + }, + { + "data": { + "text/plain": [ + "[initialized](\n", + " module_=CNN(\n", + " (embedding): Embedding(25002, 100)\n", + " (conv_0): Conv1d(1, 100, kernel_size=(2, 100), stride=(1,))\n", + " (conv_1): Conv1d(1, 100, kernel_size=(3, 100), stride=(1,))\n", + " (conv_2): Conv1d(1, 100, kernel_size=(4, 100), stride=(1,))\n", + " (fc): Linear(in_features=300, out_features=2, bias=True)\n", + " (dropout): Dropout(p=0.2, inplace=False)\n", + " ),\n", + ")" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "skorch_model.fit(train_dataset, y=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/skorch-dev/skorch/issues/641\n", + "\n", + "# skorch_model.score(test_dataset)\n", + "# TypeError: score() missing 1 required positional argument: 'y'\n", + "# skorch_model.score(test_dataset, y=None)\n", + "# ValueError: Expected array-like (array or non-string sequence), got None\n", + "\n", + "# can monkey patch skorch_model to achieve native scoring\n", + "# def score(self, X, y=None): \n", + "# ds = self.get_dataset(X) \n", + "# target_iterator = self.get_iterator(ds, training=False) \n", + " \n", + "# y_true = np.concatenate([skorch.utils.to_numpy(y) for _, y in target_iterator]) \n", + "# y_pred = self.predict(X)\n", + " \n", + "# return accuracy_score(y_true, y_pred) " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# score manually\n", + "test_dataloader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False, collate_fn=pad_batch_partial, num_workers=8)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8006\n" + ] + } + ], + "source": [ + "# test set accuracy\n", + "test_preds = skorch_model.predict(test_dataset)\n", + "processed_test_data = next(iter(test_dataloader))\n", + "test_labels = processed_test_data[1].numpy()\n", + "print(accuracy_score(test_labels, test_preds))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0, 1]), array([2554, 2446]))" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# random guessing would 50% accuracy so the model is indeed training well\n", + "np.unique(test_labels, return_counts=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# NB: this has no effect on GPU memory usage. If I keyboard interrupt, the workers get\n", + "# restarted and memory usage goes down. Deleting these \"handler\" objects doesn't delete\n", + "# GPU memory references on the workers. \n", + "# del skorch_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Grid search with Skorch" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# define parameter grid\n", + "params = {'module__filter_sizes': [(1, 2, 3), (2, 3, 4), (3, 4, 5)], \n", + " 'module__n_filters': [25, 50, 100],\n", + " 'module__dropout': loguniform(1e-1, 3e-1),\n", + " 'batch_size': [32, 64],\n", + " }\n", + "\n", + "skorch_search = RandomizedSearchCV(skorch_model, params, n_iter=2, cv=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# This errors out with: TypeError: fit() missing 1 required positional argument: 'y'\n", + "# skorch_search.fit(train_dataset, y=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/skorch-dev/skorch/issues/605#issuecomment-650580286" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# ValueError: Dataset does not have consistent lengths.\n", + "# dummy_y = np.zeros((len(train_dataset)))\n", + "# skorch_search.fit(train_dataset, y=dummy_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# ValueError: Dataset does not have consistent lengths.\n", + "# y = torch.cat([LABEL.process([pair[1]]) for pair in train_dataset]).numpy()\n", + "# skorch_search.fit(train_dataset, y=y)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "# ValueError: Dataset does not have consistent lengths.\n", + "# skorch_search.fit(train_dataset, y=SliceDataset(train_dataset, idx=1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Grid search with Hyperband" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a really unfortunate hack to make deep learning batching semantics work with `Skorch` and `Dask`. The downside here is that we're no longer padding to the longest sequence in the batch, rather we're padding to the longest sequence in the *dataset*, which results in signifcantly more computation and thus significantly more time to train a model.\n", + "\n", + "Our solution was to set a max sequence length but that's not an ideal solution since you're still performing extra computation and accuracy does suffer." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "# train=True shuffles the data\n", + "train_iter_skorch = torchtext.data.Iterator(train, batch_size=len(train), train=True, sort=False, device='cpu')\n", + "test_iter_skorch = torchtext.data.Iterator(test, batch_size=len(test), train=False, sort=False, device='cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# takes some time to numericalize the whole dataset\n", + "\n", + "# also notice that skorch and dask expect numpy arrays, which isn't ideal since it ties you to the cpu.\n", + "# meanwhile, projects like https://rapids.ai/ are moving toward all GPU computation, avoiding the cpu altogether.\n", + "for batch in train_iter_skorch:\n", + " X_train = batch.text.numpy()\n", + " y_train = batch.label.numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "for batch in test_iter_skorch:\n", + " X_test = batch.text.numpy()\n", + " y_test = batch.label.numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5000, 512)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# notice how awfully large the second dimension is\n", + "X_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "# https://ml.dask.org/hyper-parameter-search.html#hyperband-parameters-rule-of-thumb\n", + "EPOCHS = 10\n", + "NUM_TRAINING_EXAMPLES = len(train)*.8\n", + "n_examples = EPOCHS * NUM_TRAINING_EXAMPLES\n", + "n_params = 12\n", + "\n", + "# it's not immediately obvious to beginners how all these parameters interact with each other\n", + "max_iter = n_params\n", + "chunk_size = n_examples // n_params" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "# suppose we want to set max_iter to be the commensurate with the number of examples required\n", + "# for the model converge (as cited in the documentation)\n", + "\n", + "# it's a bit unclear how n_params relates to BOTH the number of data points required\n", + "# for the model to converge AND how many hyperparameters to try out (i.e. n_iter in RandomizedSearchCV)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chunk size: 3333.0\n", + "Total chunks: 2\n", + "Last chunk size: 1667.0\n" + ] + } + ], + "source": [ + "# choose chunk size so that the remainder is not a tiny number\n", + "print(f'Chunk size: {chunk_size}')\n", + "print(f'Total chunks: {math.ceil(len(train) / chunk_size)}')\n", + "last_chunk_size = len(train) % chunk_size\n", + "if last_chunk_size == 0: # i.e. chunk_size evenly divides X_train\n", + " last_chunk_size = chunk_size\n", + "print(f'Last chunk size: {last_chunk_size}')\n", + "\n", + "assert (len(train) % chunk_size > 10 or len(train) % chunk_size == 0), 'Choose another chunk size'" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "X = da.from_array(X_train, chunks=(chunk_size, X_train.shape[-1]))\n", + "y = da.from_array(y_train, chunks=(chunk_size))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Array Chunk
Bytes 20.48 MB 13.65 MB
Shape (5000, 512) (3333, 512)
Count 3 Tasks 2 Chunks
Type int64 numpy.ndarray
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " 512\n", + " 5000\n", + "\n", + "
" + ], + "text/plain": [ + "dask.array" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TLDR; you can't use dask arrays with `torch.utils.data.Dataloader`, which means you have to do all your data preparation ahead of time" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "# raw_train_dataset = [x for x in train_dataset]\n", + "# raw_train_dataset_array = np.array(raw_train_dataset, dtype=object)\n", + "# dask_dataset = da.from_array(raw_train_dataset_array, chunks=(chunk_size))\n", + "# dask_dataset[0].compute()\n", + "\n", + "# TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found \n", + "# data_iter = DataLoader(dask_dataset)\n", + "# next(iter(data_iter))\n", + "\n", + "# # TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found object\n", + "# np_data_iter = DataLoader(raw_train_dataset_array)\n", + "# next(iter(np_data_iter))" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "# reinitialize and set train_split=None to let hyperband handle validation set splitting\n", + "skorch_model = NeuralNetClassifier(\n", + " CNN,\n", + " device=device,\n", + " lr=0.001,\n", + " optimizer=optim.Adam,\n", + " criterion=nn.NLLLoss,\n", + " iterator_train__batch_size=32,\n", + " iterator_valid__batch_size=64,\n", + " train_split=None, # let hyperband handle it\n", + " module__n_filters=100,\n", + " module__filter_sizes=(2, 3, 4),\n", + " module__dropout=0.2,\n", + " module__pretrained_embeddings=TEXT.vocab.vectors,\n", + " # module__TEXT=TEXT,\n", + " batch_size=32,\n", + " verbose=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "# define parameter grid\n", + "params = {'module__filter_sizes': [(1, 2, 3), (2, 3, 4), (3, 4, 5)], \n", + " 'module__n_filters': [25, 50, 100],\n", + " 'module__dropout': loguniform(1e-1, 3e-1),\n", + " 'batch_size': [32, 64],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "search = HyperbandSearchCV(\n", + " skorch_model,\n", + " params,\n", + " max_iter=max_iter,\n", + " verbose=True,\n", + " test_size=0.2 # validation size\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "85" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.metadata[\"partial_fit_calls\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "17" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.metadata['n_models']" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "# to clear up any confusion, every time partial_fit is called, we're passing in chunk_size number of\n", + "# data points. Then skorch handles the batch size either by being set explicitly or as part of the param grid.\n", + "\n", + "# to compare this grid search to number of epochs, we have 26 partial_fit calls * 10k data points = 260k examples\n", + "# with a training set size of 25k * .8 = 20k data points, this is 13 epochs!\n", + "# considering that it takes approximately 5 epochs to train a model, you would get through less than 3 sets of \n", + "# hyperparameters if manually searching. Instead we'll search through ~5." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Running training on a Nvidia Tesla T4\\ " + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV, bracket=2] creating 9 models\n", + "[CV, bracket=1] creating 5 models\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.7/site-packages/distributed/worker.py:3351: UserWarning: Large object of size 10.00 MB detected in task graph: \n", + " [[u ... .0000]]),\n", + "), 0]\n", + "Consider scattering large objects ahead of time\n", + "with client.scatter to reduce scheduler burden and \n", + "keep data on workers\n", + "\n", + " future = client.submit(func, big_data) # bad\n", + "\n", + " big_future = client.scatter(big_data) # good\n", + " future = client.submit(func, big_future) # good\n", + " % (format_bytes(len(b)), s)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[CV, bracket=0] creating 3 models\n", + "[CV, bracket=0] For training there are between 1333 and 2666 examples in each chunk\n", + "[CV, bracket=1] For training there are between 1333 and 2666 examples in each chunk\n", + "[CV, bracket=2] For training there are between 1333 and 2666 examples in each chunk\n", + "[CV, bracket=0] validation score of 0.7982 received after 1 partial_fit calls\n", + "[CV, bracket=1] validation score of 0.8032 received after 1 partial_fit calls\n", + "[CV, bracket=2] validation score of 0.7842 received after 1 partial_fit calls\n", + "[CV, bracket=0] validation score of 0.8551 received after 12 partial_fit calls\n", + "[CV, bracket=1] validation score of 0.8322 received after 4 partial_fit calls\n", + "[CV, bracket=2] validation score of 0.8212 received after 3 partial_fit calls\n", + "[CV, bracket=1] validation score of 0.8162 received after 12 partial_fit calls\n", + "[CV, bracket=2] validation score of 0.8062 received after 9 partial_fit calls\n", + "Time to complete grid search: 372.02 seconds\n" + ] + } + ], + "source": [ + "# notice how the number of training datapoints relates to the chunk size and our test_size\n", + "# Train set chunk size: 800 = 1000*(1-.2)\n", + "# Validation set chunk size: 200 = 1000*.2\n", + "start = time.time()\n", + "search.fit(X, y)\n", + "end = time.time()\n", + "duration = round(end - start, 2)\n", + "print(f'Time to complete grid search: {duration} seconds')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Integration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`HyperbandSearchCV` follows the Scikit-learn API and mirrors Scikit-learn's `RandomizedSearchCV`. This means that it \"just works\". All the Scikit-learn attributes and methods are available:" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8551448551448552" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.best_score_" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[initialized](\n", + " module_=CNN(\n", + " (embedding): Embedding(25002, 100)\n", + " (conv_0): Conv1d(1, 50, kernel_size=(1, 100), stride=(1,))\n", + " (conv_1): Conv1d(1, 50, kernel_size=(2, 100), stride=(1,))\n", + " (conv_2): Conv1d(1, 50, kernel_size=(3, 100), stride=(1,))\n", + " (fc): Linear(in_features=150, out_features=2, bias=True)\n", + " (dropout): Dropout(p=0.12476236679704862, inplace=False)\n", + " ),\n", + ")" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.best_estimator_" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'param_module__n_filters': array([100, 100, 25, 50, 25, 25, 25, 50, 50, 25, 100, 25, 100,\n", + " 100, 50, 100, 100]),\n", + " 'param_batch_size': array([32, 64, 64, 32, 32, 32, 32, 32, 32, 32, 64, 64, 64, 32, 32, 64, 32]),\n", + " 'param_module__dropout': array([0.14233276, 0.15454961, 0.273504 , 0.26444226, 0.13400896,\n", + " 0.19302409, 0.15142034, 0.23147478, 0.28516434, 0.15385363,\n", + " 0.18147304, 0.29957988, 0.10990513, 0.29003914, 0.12476237,\n", + " 0.14516028, 0.10459089]),\n", + " 'std_partial_fit_time': array([0. , 1.42792781, 0. , 0. , 0.8889792 ,\n", + " 0.94623792, 0. , 0. , 0. , 0.84567356,\n", + " 1.6097213 , 0.79452193, 1.30084145, 1.54377656, 0.04432201,\n", + " 0.26346576, 0.31667006]),\n", + " 'param_module__filter_sizes': array([[3, 4, 5],\n", + " [2, 3, 4],\n", + " [1, 2, 3],\n", + " [2, 3, 4],\n", + " [2, 3, 4],\n", + " [3, 4, 5],\n", + " [1, 2, 3],\n", + " [3, 4, 5],\n", + " [1, 2, 3],\n", + " [2, 3, 4],\n", + " [2, 3, 4],\n", + " [1, 2, 3],\n", + " [1, 2, 3],\n", + " [3, 4, 5],\n", + " [1, 2, 3],\n", + " [3, 4, 5],\n", + " [3, 4, 5]]),\n", + " 'test_score': array([0.73626374, 0.80619381, 0.77822178, 0.77922078, 0.80619381,\n", + " 0.78821179, 0.75224775, 0.77322677, 0.77322677, 0.80819181,\n", + " 0.81718282, 0.7982018 , 0.82817183, 0.81618382, 0.85514486,\n", + " 0.84615385, 0.84515485]),\n", + " 'model_id': array(['bracket=2-0', 'bracket=2-1', 'bracket=2-2', 'bracket=2-3',\n", + " 'bracket=2-4', 'bracket=2-5', 'bracket=2-6', 'bracket=2-7',\n", + " 'bracket=2-8', 'bracket=1-0', 'bracket=1-1', 'bracket=1-2',\n", + " 'bracket=1-3', 'bracket=1-4', 'bracket=0-0', 'bracket=0-1',\n", + " 'bracket=0-2'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
param_module__n_filtersparam_batch_sizeparam_module__dropoutstd_partial_fit_timeparam_module__filter_sizestest_scoremodel_idbracketrank_test_scorestd_score_timeparamsmean_partial_fit_timemean_score_timepartial_fit_calls
0100320.1423330.000000[3, 4, 5]0.736264bracket=2-0290.000000{'batch_size': 32, 'module__dropout': 0.142332...3.0823130.1650041
1100640.1545501.427928[2, 3, 4]0.806194bracket=2-1210.003680{'batch_size': 64, 'module__dropout': 0.154549...4.2037690.1768669
225640.2735040.000000[1, 2, 3]0.778222bracket=2-2250.000000{'batch_size': 64, 'module__dropout': 0.273504...1.6137070.0537101
350320.2644420.000000[2, 3, 4]0.779221bracket=2-3240.000000{'batch_size': 32, 'module__dropout': 0.264442...1.7755650.0755161
425320.1340090.888979[2, 3, 4]0.806194bracket=2-4210.000247{'batch_size': 32, 'module__dropout': 0.134008...2.5506970.0629643
\n", + "" + ], + "text/plain": [ + " param_module__n_filters param_batch_size param_module__dropout \\\n", + "0 100 32 0.142333 \n", + "1 100 64 0.154550 \n", + "2 25 64 0.273504 \n", + "3 50 32 0.264442 \n", + "4 25 32 0.134009 \n", + "\n", + " std_partial_fit_time param_module__filter_sizes test_score model_id \\\n", + "0 0.000000 [3, 4, 5] 0.736264 bracket=2-0 \n", + "1 1.427928 [2, 3, 4] 0.806194 bracket=2-1 \n", + "2 0.000000 [1, 2, 3] 0.778222 bracket=2-2 \n", + "3 0.000000 [2, 3, 4] 0.779221 bracket=2-3 \n", + "4 0.888979 [2, 3, 4] 0.806194 bracket=2-4 \n", + "\n", + " bracket rank_test_score std_score_time \\\n", + "0 2 9 0.000000 \n", + "1 2 1 0.003680 \n", + "2 2 5 0.000000 \n", + "3 2 4 0.000000 \n", + "4 2 1 0.000247 \n", + "\n", + " params mean_partial_fit_time \\\n", + "0 {'batch_size': 32, 'module__dropout': 0.142332... 3.082313 \n", + "1 {'batch_size': 64, 'module__dropout': 0.154549... 4.203769 \n", + "2 {'batch_size': 64, 'module__dropout': 0.273504... 1.613707 \n", + "3 {'batch_size': 32, 'module__dropout': 0.264442... 1.775565 \n", + "4 {'batch_size': 32, 'module__dropout': 0.134008... 2.550697 \n", + "\n", + " mean_score_time partial_fit_calls \n", + "0 0.165004 1 \n", + "1 0.176866 9 \n", + "2 0.053710 1 \n", + "3 0.075516 1 \n", + "4 0.062964 3 " + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cv_results = pd.DataFrame(search.cv_results_)\n", + "cv_results.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8106" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.score(X_test, y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Array Chunk
Bytes 40.00 kB 40.00 kB
Shape (5000,) (5000,)
Count 2 Tasks 1 Chunks
Type int64 numpy.ndarray
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + " \n", + " 5000\n", + " 1\n", + "\n", + "
" + ], + "text/plain": [ + "dask.array<_predict, shape=(5000,), dtype=int64, chunksize=(5000,), chunktype=numpy.ndarray>" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0, ..., 1, 0, 1])" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search.predict(X_test).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It also has some other attributes." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
model_idparamspartial_fit_callspartial_fit_timescorescore_timeelapsed_wall_timebracket
0bracket=0-0{'batch_size': 32, 'module__dropout': 0.124762...13.6796120.7982020.06916530.4306130
1bracket=0-1{'batch_size': 64, 'module__dropout': 0.145160...16.2695610.7912090.16742130.4306160
2bracket=0-2{'batch_size': 32, 'module__dropout': 0.104590...16.1930640.7892110.16652230.4306170
3bracket=1-0{'batch_size': 32, 'module__dropout': 0.153853...11.6835630.7872130.06264436.1044081
4bracket=1-1{'batch_size': 64, 'module__dropout': 0.181473...13.0025870.8031970.17163636.1044101
\n", + "
" + ], + "text/plain": [ + " model_id params \\\n", + "0 bracket=0-0 {'batch_size': 32, 'module__dropout': 0.124762... \n", + "1 bracket=0-1 {'batch_size': 64, 'module__dropout': 0.145160... \n", + "2 bracket=0-2 {'batch_size': 32, 'module__dropout': 0.104590... \n", + "3 bracket=1-0 {'batch_size': 32, 'module__dropout': 0.153853... \n", + "4 bracket=1-1 {'batch_size': 64, 'module__dropout': 0.181473... \n", + "\n", + " partial_fit_calls partial_fit_time score score_time \\\n", + "0 1 3.679612 0.798202 0.069165 \n", + "1 1 6.269561 0.791209 0.167421 \n", + "2 1 6.193064 0.789211 0.166522 \n", + "3 1 1.683563 0.787213 0.062644 \n", + "4 1 3.002587 0.803197 0.171636 \n", + "\n", + " elapsed_wall_time bracket \n", + "0 30.430613 0 \n", + "1 30.430616 0 \n", + "2 30.430617 0 \n", + "3 36.104408 1 \n", + "4 36.104410 1 " + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "hist = pd.DataFrame(search.history_)\n", + "hist.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This illustrates the history after every `partial_fit` call. There's also an attributed `model_history_` that records the history for each model (it's a reorganization of `history_`)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learn more" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook covered basic usage `HyperbandSearchCV`. The following documentation and resources might be useful to learn more about `HyperbandSearchCV`, including some of the finer use cases:\n", + "\n", + "* [A talk](https://www.youtube.com/watch?v=x67K9FiPFBQ) introducing `HyperbandSearchCV` to the SciPy 2019 audience and the [corresponding paper](https://conference.scipy.org/proceedings/scipy2019/pdfs/scott_sievert.pdf)\n", + "* [HyperbandSearchCV's documentation](https://ml.dask.org/modules/generated/dask_ml.model_selection.HyperbandSearchCV.html)\n", + "\n", + "Performance comparisons can be found in the SciPy 2019 talk/paper." + ] + } + ], + "metadata": { + "environment": { + "name": "pytorch-gpu.1-4.m46", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/pytorch-gpu.1-4:m46" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}