Skip to content

Commit a243fe5

Browse files
[nightshift] fix documentation drift in tutorials (#3966)
> *Pages left behind,* > *code rewrites its own story—* > *docs echo the old.* - **`add-optimizer.md`**: Fixed `TrainLmConfig` import path (`levanter.trainer` → `levanter.main.train_lm`) - **`first-experiment.md`**: Added required `gpu_type` arg to `ResourceConfig.with_gpu(count=1)` → `ResourceConfig.with_gpu("H100", count=1)` - **`train-an-lm.md`**: Added required `gpu_type` arg to `ResourceConfig.with_gpu(count=4)`, added missing `EvalTaskConfig` import, fixed trailing comma - **`train-dpo.md`**: `llama_3_1_8b` is a module-level `LlamaConfig` variable, not a callable — removed erroneous `()` that would cause `TypeError` - **`train_test_overlap.md`**: Renamed `DedupeConfig` → `DeconConfig` and `DedupMode` → `DeconMode` to match actual class names in `marin.processing.classification.decon`
1 parent 60d4a0a commit a243fe5

5 files changed

Lines changed: 12 additions & 9 deletions

File tree

docs/tutorials/add-optimizer.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ In this guide, we’ll walk through adding an [AdaMax](https://optax.readthedocs
101101
and use it in `TrainLmConfig`:
102102

103103
```python
104-
from levanter.trainer import TrainLmConfig
104+
from levanter.main.train_lm import TrainLmConfig
105105

106106
trainer_config = TrainLmConfig(
107107
...

docs/tutorials/first-experiment.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ This class defines basic training configuration that is sufficient for most expe
105105

106106
nano_train_config = SimpleTrainConfig(
107107
# Here we define the hardware resources we need.
108-
resources=ResourceConfig.with_gpu(count=1),
108+
resources=ResourceConfig.with_gpu("H100", count=1),
109109
train_batch_size=32,
110110
num_train_steps=100,
111111
# set hyperparameters

docs/tutorials/train-an-lm.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ from levanter.models.llama import LlamaConfig
2929
# Import the executor framework for running experiments
3030
from marin.execution.executor import executor_main
3131

32+
# Import evaluation task configuration
33+
from marin.evaluation.evaluation_config import EvalTaskConfig
34+
3235
# Import logging utilities
3336
import logging
3437
```
@@ -70,7 +73,7 @@ Set up your training configuration by calculating the number of training steps a
7073
NUM_TRAIN_STEPS = NUM_TRAIN_TOKENS // (BATCH_SIZE * SEQ_LEN)
7174

7275
training_config = SimpleTrainConfig(
73-
resources=ResourceConfig.with_gpu(count=4), # Hardware configuration: 4 GPUs
76+
resources=ResourceConfig.with_gpu("H100", count=4), # Hardware configuration: 4 GPUs
7477
train_batch_size=BATCH_SIZE, # Sequences processed per step
7578
num_train_steps=NUM_TRAIN_STEPS, # Total optimization steps
7679
learning_rate=3e-3, # Peak learning rate
@@ -116,7 +119,7 @@ model = default_train(
116119
model_config=model_config, # Model architecture
117120
train_config=training_config, # Training hyperparameters
118121
tags=["${YOUR_TAG1}", "${YOUR_TAG2}"], # Tags for experiment tracking
119-
eval_harness_tasks = [EvalTaskConfig("mmlu", 0, task_alias="mmlu_0shot"), EvalTaskConfig("mmlu", 5, task_alias="mmlu_5shot")] # Evaluation Tasks to run on the checkpoint
122+
eval_harness_tasks = [EvalTaskConfig("mmlu", 0, task_alias="mmlu_0shot"), EvalTaskConfig("mmlu", 5, task_alias="mmlu_5shot")], # Evaluation Tasks to run on the checkpoint
120123
)
121124

122125
# Set up the experiment execution

docs/tutorials/train-dpo.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ from the loss by default.
5353
## Configuring the DPO Run
5454

5555
```python
56-
model_config = llama_3_1_8b()
56+
model_config = llama_3_1_8b
5757

5858
dpo_config = SimpleDPOConfig(
5959
resources=ResourceConfig.with_tpu("v5p-32"),

docs/tutorials/train_test_overlap.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,15 +148,15 @@ NGRAM_CONFIG = NGramConfig(
148148

149149

150150
def build_step(dataset_config: DatasetConfig) -> ExecutorStep:
151-
dedupe_config = DedupeConfig(
151+
dedupe_config = DeconConfig(
152152
input_path=dataset_config.path,
153153
output_path=this_output_path(),
154154
decontaminate_source=EVAL_DATASET_STEPS,
155155
attribute_name="ngram_overlap",
156156
false_positive_rate=1e-20,
157157
ngram=NGRAM_CONFIG,
158158
processes=1024,
159-
mode=DedupMode.TRAIN_TEST_OVERLAP,
159+
mode=DeconMode.TRAIN_TEST_OVERLAP,
160160
text_field=dataset_config.text_field,
161161
)
162162

@@ -188,7 +188,7 @@ Zephyr handles file discovery and parallelism automatically. You can control the
188188
```python
189189
# In build_step function - adjust processes for parallelism
190190
def build_step(dataset_config: DatasetConfig) -> ExecutorStep:
191-
dedupe_config = DedupeConfig(
191+
dedupe_config = DeconConfig(
192192
# ...
193193
processes=128, # Control parallelism level
194194
)
@@ -247,7 +247,7 @@ DEFAULT_NGRAM_CONFIG = NGramConfig(
247247

248248
# Edit the processes parameter in build_step
249249
def build_step(dataset_config: DatasetConfig) -> ExecutorStep:
250-
dedupe_config = DedupeConfig(
250+
dedupe_config = DeconConfig(
251251
# ...
252252
processes=32, # Increase for more parallelism
253253
)

0 commit comments

Comments
 (0)