forked from thinking-machines-lab/tinker-cookbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsl_loop.py
More file actions
155 lines (128 loc) · 5.03 KB
/
sl_loop.py
File metadata and controls
155 lines (128 loc) · 5.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
Minimal supervised fine-tuning script without abstractions.
Uses existing modules but with a simple, flat training loop.
"""
import logging
import time
import chz
import datasets
import tinker
from tinker_cookbook import checkpoint_utils, model_info, renderers
from tinker_cookbook.supervised.common import compute_mean_nll
from tinker_cookbook.supervised.data import conversation_to_datum
from tinker_cookbook.tokenizer_utils import get_tokenizer
from tinker_cookbook.utils import ml_log
logger = logging.getLogger(__name__)
logging.getLogger("httpx").setLevel(logging.WARN)
@chz.chz
class Config:
base_url: str | None = None
log_path: str = "/tmp/tinker-examples/sl-loop"
model_name: str = "meta-llama/Llama-3.1-8B"
batch_size: int = 128
learning_rate: float = 1e-4
max_length: int = 32768
train_on_what: renderers.TrainOnWhat = renderers.TrainOnWhat.ALL_ASSISTANT_MESSAGES
lora_rank: int = 32
save_every: int = 20
def main(config: Config):
# Setup logging
ml_logger = ml_log.setup_logging(
log_dir=config.log_path,
wandb_project=None,
wandb_name=None,
config=config,
do_configure_logging_module=True,
)
# Get tokenizer and renderer
tokenizer = get_tokenizer(config.model_name)
renderer_name = model_info.get_recommended_renderer_name(config.model_name)
renderer = renderers.get_renderer(renderer_name, tokenizer)
logger.info(f"Using renderer: {renderer_name}")
# Load No Robots dataset
logger.info("Loading dataset...")
dataset = datasets.load_dataset("HuggingFaceH4/no_robots")
assert isinstance(dataset, datasets.DatasetDict)
train_dataset = dataset["train"]
n_train_batches = len(train_dataset) // config.batch_size
logger.info(f"Train batches: {n_train_batches}")
# Setup training client
service_client = tinker.ServiceClient(base_url=config.base_url)
# Check for resuming
resume_info = checkpoint_utils.get_last_checkpoint(config.log_path)
if resume_info:
training_client = service_client.create_training_client_from_state(
resume_info["state_path"]
)
start_batch = resume_info["batch"]
logger.info(f"Resuming from batch {start_batch}")
else:
training_client = service_client.create_lora_training_client(
base_model=config.model_name, rank=config.lora_rank
)
start_batch = 0
# Training loop (single epoch)
logger.info(f"Training for {n_train_batches} steps")
# Shuffle dataset
train_dataset = train_dataset.shuffle(seed=0)
for batch_idx in range(start_batch, n_train_batches):
start_time = time.time()
step = batch_idx
metrics = {}
# Save checkpoint
if step % config.save_every == 0 and step > 0:
checkpoint_utils.save_checkpoint(
training_client=training_client,
name=f"{step:06d}",
log_path=config.log_path,
kind="state",
loop_state={"batch": batch_idx},
)
# Linear learning rate schedule
lr_mult = max(0.0, 1.0 - step / n_train_batches)
current_lr = config.learning_rate * lr_mult
adam_params = tinker.AdamParams(learning_rate=current_lr, beta1=0.9, beta2=0.95, eps=1e-8)
# Get training batch and convert to datums online
batch_start = batch_idx * config.batch_size
batch_end = min((batch_idx + 1) * config.batch_size, len(train_dataset))
batch_rows = train_dataset.select(range(batch_start, batch_end))
batch = [
conversation_to_datum(
row["messages"], # type: ignore
renderer,
config.max_length,
config.train_on_what,
)
for row in batch_rows
]
# Training step
fwd_bwd_future = training_client.forward_backward(batch, loss_fn="cross_entropy")
optim_step_future = training_client.optim_step(adam_params)
fwd_bwd_result = fwd_bwd_future.result()
_optim_result = optim_step_future.result()
# Compute train metrics
train_logprobs = [x["logprobs"] for x in fwd_bwd_result.loss_fn_outputs]
train_weights = [d.loss_fn_inputs["weights"] for d in batch]
train_nll = compute_mean_nll(train_logprobs, train_weights)
# Log metrics
metrics.update(
num_sequences=len(batch),
num_tokens=sum(d.model_input.length for d in batch),
learning_rate=current_lr,
train_mean_nll=train_nll,
progress=step / n_train_batches,
time_total=time.time() - start_time,
)
ml_logger.log_metrics(metrics=metrics, step=step)
# Save final checkpoint
checkpoint_utils.save_checkpoint(
training_client=training_client,
name="final",
log_path=config.log_path,
kind="both",
loop_state={"batch": n_train_batches},
)
ml_logger.close()
logger.info("Training completed")
if __name__ == "__main__":
chz.nested_entrypoint(main)