-
Notifications
You must be signed in to change notification settings - Fork 134
Expand file tree
/
Copy pathcustomizer_gpt_oss_full_sft.yaml
More file actions
92 lines (90 loc) · 2.56 KB
/
customizer_gpt_oss_full_sft.yaml
File metadata and controls
92 lines (90 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
recipe: TrainFinetuneRecipeForNextTokenPrediction
dist_env:
backend: nccl
timeout_minutes: 30
rng:
_target_: nemo_automodel.components.training.rng.StatefulRNG
seed: 1111
ranked: true
model:
pretrained_model_name_or_path: openai/gpt-oss-20b
torch_dtype: auto
trust_remote_code: false
_target_: nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
attn_implementation: sdpa
backend:
_target_: nemo_automodel.components.models.common.utils.BackendConfig
enable_deepep: false
distributed:
_target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager
dp_size: 8
tp_size: 1
pp_size: 1
cp_size: 1
ep_size: 8
sequence_parallel: false
step_scheduler:
global_batch_size: 8
local_batch_size: 1
max_steps: 13
num_epochs: 1
val_every_steps: 12
ckpt_every_steps: 12
optimizer:
_target_: torch.optim.Adam
lr: 5.0e-06
weight_decay: 0.01
betas:
- 0.9
- 0.999
eps: 1.0e-08
lr_scheduler:
lr_decay_style: cosine
lr_warmup_steps: 0
checkpoint:
enabled: true
model_save_format: safetensors
checkpoint_dir: ./checkpoints
save_consolidated: true
dequantize_base_checkpoint: true
dataset:
_target_: nemo_automodel.components.datasets.llm.column_mapped_text_instruction_dataset.ColumnMappedTextInstructionDataset
path_or_dataset_id: ./sample-datasets/prompt_completion/train.jsonl
split: train
column_mapping:
question: prompt
answer: completion
seq_length: 2048
answer_only_loss_mask: true
padding: do_not_pad
truncation: longest_first
validation_dataset:
_target_: nemo_automodel.components.datasets.llm.column_mapped_text_instruction_dataset.ColumnMappedTextInstructionDataset
path_or_dataset_id: ./sample-datasets/prompt_completion/validation.jsonl
split: validation
column_mapping:
question: prompt
answer: completion
seq_length: 2048
answer_only_loss_mask: true
padding: do_not_pad
truncation: longest_first
dataloader:
_target_: torchdata.stateful_dataloader.StatefulDataLoader
collate_fn: nemo_automodel.components.datasets.utils.default_collater
shuffle: true
validation_dataloader:
_target_: torchdata.stateful_dataloader.StatefulDataLoader
collate_fn: nemo_automodel.components.datasets.utils.default_collater
loss_fn:
_target_: nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy
parallelizer:
_target_: nemo_automodel.components.moe.parallelizer.parallelize_model
activation_checkpointing: false
ci:
time: "00:30:00"
checkpoint_robustness:
hf_kl_threshold: 5e-2
tokenizer_name: openai/gpt-oss-20b
no_check_resume: true
check_phantom_keys: true