|
3 | 3 | with hyperparameter optimization using Optuna. In this example, we fine-tune a lightweight |
4 | 4 | pre-trained BERT model on a small subset of the IMDb dataset to classify movie reviews as |
5 | 5 | positive or negative. We optimize the validation accuracy by tuning the learning rate |
6 | | -and batch size. To learn more about transformers' hyperparameter search, |
7 | | -you can check the following documentation: |
| 6 | +and batch size. To learn more about transformers' hyperparameter search, |
| 7 | +you can check the following documentation: |
8 | 8 | https://huggingface.co/docs/transformers/en/hpo_train. |
9 | 9 | """ |
10 | 10 |
|
@@ -36,8 +36,12 @@ def tokenize(batch): |
36 | 36 | return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512) |
37 | 37 |
|
38 | 38 |
|
39 | | -tokenized_train = train_dataset.map(tokenize, batched=True).select_columns(["input_ids", "attention_mask", "label"]) |
40 | | -tokenized_valid = valid_dataset.map(tokenize, batched=True).select_columns(["input_ids", "attention_mask", "label"]) |
| 39 | +tokenized_train = train_dataset.map(tokenize, batched=True).select_columns( |
| 40 | + ["input_ids", "attention_mask", "label"] |
| 41 | +) |
| 42 | +tokenized_valid = valid_dataset.map(tokenize, batched=True).select_columns( |
| 43 | + ["input_ids", "attention_mask", "label"] |
| 44 | +) |
41 | 45 |
|
42 | 46 |
|
43 | 47 | metric = evaluate.load("accuracy") |
|
0 commit comments