Skip to content

Commit 2e55187

Browse files
authored
feat: add benchmark toolkit for comparing inference configs against f… (#1938)
Unified benchmark toolkit that runs gold-standard fp64 baseline inference, then evaluates a given InferenceSettings config on the same systems — measuring accuracy (energy/force/stress error vs baseline) and performance (QPS, GPU memory, warmup time). Default systems: - Water box (60 atoms, omol) - FCC crystal (200 atoms, omat) - FCC crystal (1000 atoms, omat) Integrates with the fairchem Hydra CLI — override any setting: fairchem -c configs/uma/benchmark/toolkit/benchmark.yaml fairchem -c ... runner.inference_settings.execution_mode=umas_fast_gpu fairchem -c ... runner.inference_settings.tf32=True fairchem -c ... runner.device=cpu
1 parent 4c5e7bf commit 2e55187

25 files changed

Lines changed: 3524 additions & 178 deletions
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
model: fairchem.core.models.uma.escn_moe.eSCNMDMoeBackbone
2+
moe_dropout: 0.05
3+
moe_layer_type: ${moe_layer_type}
4+
num_experts: ${num_experts}
5+
use_composition_embedding: true
6+
use_global_embedding: false
7+
8+
max_num_elements: 100
9+
sphere_channels: 128
10+
lmax: 2
11+
mmax: 2
12+
13+
otf_graph: True
14+
max_neighbors: ${max_neighbors}
15+
use_pbc: True
16+
use_pbc_single: True
17+
18+
cutoff: ${cutoff_radius}
19+
edge_channels: 128
20+
distance_function: gaussian
21+
num_distance_basis: 64
22+
23+
regress_forces: True
24+
regress_stress: ${regress_stress}
25+
direct_forces: ${direct_forces}
26+
27+
num_layers: 4
28+
hidden_channels: 128
29+
norm_type: rms_norm_sh
30+
act_type: ${act_type}
31+
ff_type: ${ff_type}
32+
33+
chg_spin_emb_type: "rand_emb"
34+
cs_emb_grad: True
35+
dataset_list: ["oc20", "omol"]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Benchmark a single inference config against fp64 baseline
2+
#
3+
# Usage:
4+
# fairchem -c configs/uma/benchmark/perf_check/benchmark.yaml
5+
#
6+
# Override any setting from the command line:
7+
# fairchem -c configs/uma/benchmark/perf_check/benchmark.yaml \
8+
# runner.inference_settings.execution_mode=umas_fast_gpu
9+
# fairchem -c configs/uma/benchmark/perf_check/benchmark.yaml \
10+
# runner.inference_settings.tf32=True runner.inference_settings.compile=True
11+
# fairchem -c configs/uma/benchmark/perf_check/benchmark.yaml runner.device=cpu
12+
# fairchem -c configs/uma/benchmark/perf_check/benchmark.yaml runner.warmup_iters=20
13+
14+
defaults:
15+
- job: local
16+
- _self_
17+
18+
checkpoint:
19+
_target_: fairchem.core.calculate.pretrained_mlip.pretrained_checkpoint_path_from_name
20+
model_name: "uma-s-1p2"
21+
22+
runner:
23+
_target_: fairchem.core.components.benchmark.perf_check.PerfCheckRunner
24+
checkpoint: ${checkpoint}
25+
device: "cuda"
26+
warmup_iters: 10
27+
timed_iters: 50
28+
inference_settings:
29+
_target_: fairchem.core.units.mlip_unit.api.inference.InferenceSettings
30+
tf32: False
31+
activation_checkpointing: True
32+
merge_mole: False
33+
compile: False
34+
execution_mode: "general"
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
2+
data_root_dir: "."
3+
cpu_graph: false
4+
5+
oc20_forces_key: oc20_forces
6+
omol_forces_key: omol_forces
7+
omat_forces_key: omat_forces
8+
odac_forces_key: odac_forces
9+
omc_forces_key: omc_forces
10+
11+
max_atoms: 350
12+
min_atoms: 0
13+
14+
exclude_keys: [
15+
"id",
16+
"fid",
17+
"absolute_idx",
18+
"target_pos",
19+
"ref_energy",
20+
"pbc",
21+
"nads",
22+
"oc22",
23+
"formation_energy",
24+
"total_charge",
25+
]
26+
27+
oc20_train:
28+
splits:
29+
train:
30+
src:
31+
- ${datasets.data_root_dir}/oc20/oc20_train.aselmdb
32+
format: ase_db
33+
a2g_args:
34+
r_energy: True
35+
r_forces: True
36+
r_stress: True
37+
r_edges: ${datasets.cpu_graph}
38+
r_data_keys: ['spin', 'charge']
39+
max_neigh: ${max_neighbors}
40+
key_mapping:
41+
energy: oc20_energy
42+
forces: ${datasets.oc20_forces_key}
43+
stress: oc20_stress
44+
transforms:
45+
common_transform:
46+
dataset_name: oc20
47+
stress_reshape_transform:
48+
dataset_name: oc20
49+
50+
oc20_val:
51+
splits:
52+
train:
53+
src:
54+
- ${datasets.data_root_dir}/oc20/oc20_val.aselmdb
55+
format: ase_db
56+
a2g_args:
57+
r_energy: True
58+
r_forces: True
59+
r_stress: True
60+
r_edges: ${datasets.cpu_graph}
61+
r_data_keys: ['spin', 'charge']
62+
max_neigh: ${max_neighbors}
63+
key_mapping:
64+
energy: oc20_energy
65+
forces: ${datasets.oc20_forces_key}
66+
stress: oc20_stress
67+
transforms:
68+
common_transform:
69+
dataset_name: oc20
70+
stress_reshape_transform:
71+
dataset_name: oc20
72+
73+
omol_train:
74+
splits:
75+
train:
76+
src:
77+
- ${datasets.data_root_dir}/omol/omol_train.aselmdb
78+
format: ase_db
79+
a2g_args:
80+
r_energy: True
81+
r_forces: True
82+
r_stress: True
83+
r_edges: ${datasets.cpu_graph}
84+
r_data_keys: ['spin', 'charge']
85+
max_neigh: ${max_neighbors}
86+
molecule_cell_size: 120.0
87+
key_mapping:
88+
energy: omol_energy
89+
forces: ${datasets.omol_forces_key}
90+
stress: omol_stress
91+
transforms:
92+
common_transform:
93+
dataset_name: omol
94+
stress_reshape_transform:
95+
dataset_name: omol
96+
97+
omol_val:
98+
splits:
99+
train:
100+
src:
101+
- ${datasets.data_root_dir}/omol/omol_val.aselmdb
102+
format: ase_db
103+
a2g_args:
104+
r_energy: True
105+
r_forces: True
106+
r_stress: True
107+
r_edges: ${datasets.cpu_graph}
108+
r_data_keys: ['spin', 'charge']
109+
max_neigh: ${max_neighbors}
110+
molecule_cell_size: 120.0
111+
key_mapping:
112+
energy: omol_energy
113+
forces: ${datasets.omol_forces_key}
114+
stress: omol_stress
115+
transforms:
116+
common_transform:
117+
dataset_name: omol
118+
stress_reshape_transform:
119+
dataset_name: omol
120+
121+
omat_train:
122+
splits:
123+
train:
124+
src:
125+
- ${datasets.data_root_dir}/omat/omat_train.aselmdb
126+
format: ase_db
127+
a2g_args:
128+
r_energy: True
129+
r_forces: True
130+
r_stress: True
131+
r_edges: ${datasets.cpu_graph}
132+
r_data_keys: ['spin', 'charge']
133+
max_neigh: ${max_neighbors}
134+
key_mapping:
135+
energy: omat_energy
136+
forces: ${datasets.omat_forces_key}
137+
stress: omat_stress
138+
transforms:
139+
common_transform:
140+
dataset_name: omat
141+
stress_reshape_transform:
142+
dataset_name: omat
143+
144+
omat_val:
145+
splits:
146+
train:
147+
src:
148+
- ${datasets.data_root_dir}/omat/omat_val.aselmdb
149+
format: ase_db
150+
a2g_args:
151+
r_energy: True
152+
r_forces: True
153+
r_stress: True
154+
r_edges: ${datasets.cpu_graph}
155+
r_data_keys: ['spin', 'charge']
156+
max_neigh: ${max_neighbors}
157+
key_mapping:
158+
energy: omat_energy
159+
forces: ${datasets.omat_forces_key}
160+
stress: omat_stress
161+
transforms:
162+
common_transform:
163+
dataset_name: omat
164+
stress_reshape_transform:
165+
dataset_name: omat
166+
167+
odac_train:
168+
splits:
169+
train:
170+
src:
171+
- ${datasets.data_root_dir}/odac/odac_train.aselmdb
172+
format: ase_db
173+
a2g_args:
174+
r_energy: True
175+
r_forces: True
176+
r_stress: True
177+
r_edges: ${datasets.cpu_graph}
178+
r_data_keys: ['spin', 'charge']
179+
max_neigh: ${max_neighbors}
180+
key_mapping:
181+
energy: odac_energy
182+
forces: ${datasets.odac_forces_key}
183+
stress: odac_stress
184+
transforms:
185+
common_transform:
186+
dataset_name: odac
187+
stress_reshape_transform:
188+
dataset_name: odac
189+
190+
odac_val:
191+
splits:
192+
train:
193+
src:
194+
- ${datasets.data_root_dir}/odac/odac_val.aselmdb
195+
format: ase_db
196+
a2g_args:
197+
r_energy: True
198+
r_forces: True
199+
r_stress: True
200+
r_edges: ${datasets.cpu_graph}
201+
r_data_keys: ['spin', 'charge']
202+
max_neigh: ${max_neighbors}
203+
key_mapping:
204+
energy: odac_energy
205+
forces: ${datasets.odac_forces_key}
206+
stress: odac_stress
207+
transforms:
208+
common_transform:
209+
dataset_name: odac
210+
stress_reshape_transform:
211+
dataset_name: odac
212+
213+
omc_train:
214+
splits:
215+
train:
216+
src:
217+
- ${datasets.data_root_dir}/omc/omc_train.aselmdb
218+
format: ase_db
219+
a2g_args:
220+
r_energy: True
221+
r_forces: True
222+
r_stress: True
223+
r_edges: ${datasets.cpu_graph}
224+
r_data_keys: ['spin', 'charge']
225+
max_neigh: ${max_neighbors}
226+
key_mapping:
227+
energy: omc_energy
228+
forces: ${datasets.omc_forces_key}
229+
stress: omc_stress
230+
transforms:
231+
common_transform:
232+
dataset_name: omc
233+
stress_reshape_transform:
234+
dataset_name: omc
235+
236+
omc_val:
237+
splits:
238+
train:
239+
src:
240+
- ${datasets.data_root_dir}/omc/omc_val.aselmdb
241+
format: ase_db
242+
a2g_args:
243+
r_energy: True
244+
r_forces: True
245+
r_stress: True
246+
r_edges: ${datasets.cpu_graph}
247+
r_data_keys: ['spin', 'charge']
248+
max_neigh: ${max_neighbors}
249+
key_mapping:
250+
energy: omc_energy
251+
forces: ${datasets.omc_forces_key}
252+
stress: omc_stress
253+
transforms:
254+
common_transform:
255+
dataset_name: omc
256+
stress_reshape_transform:
257+
dataset_name: omc
258+
259+
train_dataset:
260+
_target_: fairchem.core.datasets.mt_concat_dataset.create_concat_dataset
261+
dataset_configs:
262+
omol: ${datasets.omol_train}
263+
oc20: ${datasets.oc20_train}
264+
omat: ${datasets.omat_train}
265+
odac: ${datasets.odac_train}
266+
omc: ${datasets.omc_train}
267+
combined_dataset_config:
268+
sampling:
269+
type: explicit
270+
ratios:
271+
omol.train: 4.0
272+
omc.train: 2.0
273+
omat.train: 2.0
274+
oc20.train: 1.0
275+
odac.train: 1.0
276+
277+
val_dataset:
278+
_target_: fairchem.core.datasets.mt_concat_dataset.create_concat_dataset
279+
dataset_configs:
280+
omol: ${datasets.omol_val}
281+
oc20: ${datasets.oc20_val}
282+
omat: ${datasets.omat_val}
283+
odac: ${datasets.odac_val}
284+
omc: ${datasets.omc_val}
285+
combined_dataset_config: { sampling: {type: temperature, temperature: 1.0} }
286+
287+
train_dataloader:
288+
_target_: fairchem.core.components.common.dataloader_builder.get_dataloader
289+
dataset: ${datasets.train_dataset}
290+
batch_sampler_fn:
291+
_target_: fairchem.core.datasets.samplers.max_atom_distributed_sampler.MaxAtomDistributedBatchSampler
292+
_partial_: True
293+
max_atoms: ${datasets.max_atoms}
294+
min_atoms: ${datasets.min_atoms}
295+
shuffle: True
296+
seed: 0
297+
num_workers: 0
298+
collate_fn:
299+
_target_: fairchem.core.units.mlip_unit.mlip_unit.mt_collater_adapter
300+
tasks: ${tasks}
301+
exclude_keys: ${datasets.exclude_keys}
302+
303+
val_dataloader:
304+
_target_: fairchem.core.components.common.dataloader_builder.get_dataloader
305+
dataset: ${datasets.val_dataset}
306+
batch_sampler_fn:
307+
_target_: fairchem.core.datasets.samplers.max_atom_distributed_sampler.MaxAtomDistributedBatchSampler
308+
_partial_: True
309+
max_atoms: ${datasets.max_atoms}
310+
min_atoms: ${datasets.min_atoms}
311+
shuffle: True
312+
seed: 0
313+
num_workers: 0
314+
collate_fn:
315+
_target_: fairchem.core.units.mlip_unit.mlip_unit.mt_collater_adapter
316+
tasks: ${tasks}
317+
exclude_keys: ${datasets.exclude_keys}

0 commit comments

Comments
 (0)