forked from EleutherAI/bergson
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_build.py
More file actions
112 lines (89 loc) · 3.02 KB
/
test_build.py
File metadata and controls
112 lines (89 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import pytest
from bergson.data import load_gradients
try:
import torch
HAS_CUDA = torch.cuda.is_available()
except Exception:
HAS_CUDA = False
if not HAS_CUDA:
pytest.skip(
"Skipping GPU-only tests: no CUDA/NVIDIA driver available.",
allow_module_level=True,
)
from pathlib import Path
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from bergson import (
DataConfig,
GradientProcessor,
HeadConfig,
IndexConfig,
collect_gradients,
)
from bergson.data import tokenize
def test_disk_build_linear(tmp_path: Path):
run_path = tmp_path / "example_with_heads"
run_path.mkdir(parents=True, exist_ok=True)
config = IndexConfig(
run_path=str(run_path),
model="RonenEldan/TinyStories-1M",
data=DataConfig(dataset="NeelNanda/pile-10k", truncation=True),
head_cfgs={
"h.0.attn.attention.out_proj": HeadConfig(
num_heads=16, head_size=4, head_dim=2
),
},
)
model = AutoModelForCausalLM.from_pretrained(
config.model, trust_remote_code=True, use_safetensors=True
)
tokenizer = AutoTokenizer.from_pretrained(config.model)
data = load_dataset(config.data.dataset, split="train[:1%]")
data = data.select(range(8)) # type: ignore
processor = GradientProcessor(projection_dim=config.projection_dim)
data = data.map(
tokenize,
batched=True,
fn_kwargs=dict(args=config.data, tokenizer=tokenizer),
remove_columns=data.column_names,
)
collect_gradients(
model=model,
data=data,
processor=processor,
path=config.run_path,
head_cfgs=config.head_cfgs,
)
assert any(run_path.iterdir()), "Expected artifacts in the temp run_path"
def test_disk_build_conv1d(tmp_path: Path):
run_path = tmp_path / "example_with_heads"
run_path.mkdir(parents=True, exist_ok=True)
config = IndexConfig(
run_path=str(run_path),
model="openai-community/gpt2",
data=DataConfig(dataset="NeelNanda/pile-10k", truncation=True),
)
model = AutoModelForCausalLM.from_pretrained(
config.model, trust_remote_code=True, use_safetensors=True
)
tokenizer = AutoTokenizer.from_pretrained(config.model)
data = load_dataset(config.data.dataset, split="train")
data = data.select(range(8)) # type: ignore
processor = GradientProcessor(projection_dim=config.projection_dim)
data = data.map(
tokenize,
batched=True,
fn_kwargs=dict(args=config.data, tokenizer=tokenizer),
remove_columns=data.column_names,
)
collect_gradients(
model=model,
data=data,
processor=processor,
path=config.run_path,
head_cfgs=config.head_cfgs,
)
assert any(run_path.iterdir()), "Expected artifacts in the temp run_path"
index = load_gradients(str(run_path))
assert len(modules := index.dtype.names) != 0
assert len(first_column := index[modules[0]]) != 0