marin/lib/levanter/config/dpo_tiny_gpt2.yaml at bc41ad9a8e26a70edaef552a994f511f561d6d63 · marin-community/marin · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Tiny DPO sanity-check config for train_dpo.py.

data:
  tokenizer: gpt2
  cache_dir: cache/dpo_tiny
  components:
    default:
      source:
        type: url
        train_urls:
          - config/data/tiny_preference.jsonl
      format:
        type: preference_chat
        chat_template: |
          {% for message in messages %}
          {% if message['role'] == 'user' %}
          {{ message['content'] }}
          {% elif message['role'] == 'assistant' %}
          {% generation %}{{ message['content'] }}{% endgeneration %}
          {% endif %}
          {% endfor %}
        pack: false
        slice_strategy: raise
      shuffle: true

model:
  type: gpt2

train_seq_len: 64

trainer:
  seed: 0
  mp: f32
  train_batch_size: 16
  num_train_steps: 20
  steps_per_eval: 1
  model_averaging: null
  ray:
    auto_start_cluster: false
  require_accelerator: false
  checkpointer:
    save_interval: 1m
    keep:
      - every: 5

optimizer:
  learning_rate: 1e-3
  weight_decay: 0.0
  warmup: 0.0

adapter:
  type: none

reference:
  type: separate
  model_path: hf-internal-testing/tiny-random-gpt2
  is_hf: true

beta: 0.1
validation_split_fraction: 0.1

# Repeat the tiny dataset enough times to hit ~20 steps with batch size 16
# (1 train example after the 10% val split -> 320 epochs => 320/16 = 20 steps).
epoch: 320

initialize_from_hf: hf-internal-testing/tiny-random-gpt2
use_hf_model_config: true