-
Notifications
You must be signed in to change notification settings - Fork 111
Expand file tree
/
Copy pathdpo_tiny_gpt2.yaml
More file actions
67 lines (57 loc) · 1.39 KB
/
dpo_tiny_gpt2.yaml
File metadata and controls
67 lines (57 loc) · 1.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Tiny DPO sanity-check config for train_dpo.py.
data:
tokenizer: gpt2
cache_dir: cache/dpo_tiny
components:
default:
source:
type: url
train_urls:
- config/data/tiny_preference.jsonl
format:
type: preference_chat
chat_template: |
{% for message in messages %}
{% if message['role'] == 'user' %}
{{ message['content'] }}
{% elif message['role'] == 'assistant' %}
{% generation %}{{ message['content'] }}{% endgeneration %}
{% endif %}
{% endfor %}
pack: false
slice_strategy: raise
shuffle: true
model:
type: gpt2
train_seq_len: 64
trainer:
seed: 0
mp: f32
train_batch_size: 16
num_train_steps: 20
steps_per_eval: 1
model_averaging: null
ray:
auto_start_cluster: false
require_accelerator: false
checkpointer:
save_interval: 1m
keep:
- every: 5
optimizer:
learning_rate: 1e-3
weight_decay: 0.0
warmup: 0.0
adapter:
type: none
reference:
type: separate
model_path: hf-internal-testing/tiny-random-gpt2
is_hf: true
beta: 0.1
validation_split_fraction: 0.1
# Repeat the tiny dataset enough times to hit ~20 steps with batch size 16
# (1 train example after the 10% val split -> 320 epochs => 320/16 = 20 steps).
epoch: 320
initialize_from_hf: hf-internal-testing/tiny-random-gpt2
use_hf_model_config: true