Skip to content

Commit 44b1b03

Browse files
authored
Merge branch 'main' into patch-5
2 parents c484133 + 84bda5e commit 44b1b03

42 files changed

Lines changed: 5990 additions & 183 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
model: fairchem.core.models.allscaip.AllScAIP.AllScAIPBackbone
2+
3+
# Global Configs
4+
num_layers: 10
5+
hidden_size: 640
6+
direct_forces: ${direct_forces}
7+
regress_forces: True
8+
regress_stress: ${regress_stress}
9+
use_compile: ${use_compile}
10+
use_padding: ${direct_forces}
11+
dataset_list: ${dataset_list}
12+
13+
use_node_path: True
14+
use_freq_mask: False
15+
use_sincx_mask: False
16+
17+
18+
# Molecular Graph Configs
19+
max_num_elements: 110
20+
max_batch_size: 96 # this is set to be the max number of batch that could be sampled by the sampler
21+
max_atoms: ${max_atoms}
22+
max_radius: ${cutoff_radius}
23+
use_pbc: ${use_pbc}
24+
knn_k: ${max_neighbors}
25+
knn_soft: true
26+
knn_sigmoid_scale: 0.2
27+
knn_lse_scale: 0.1
28+
knn_use_low_mem: true
29+
knn_pad_size: ${max_neighbors_pad_size}
30+
31+
32+
# Graph Neural Networks Configs
33+
atten_name: memory_efficient
34+
atten_num_heads: 10
35+
36+
37+
# Regularization Configs
38+
# atten_dropout: 0.1
39+
# mlp_dropout: 0.05
40+
# node_ffn_dropout: 0.05
41+
# edge_ffn_dropout: 0.05
42+
# scalar_output_dropout: 0.05
43+
# vector_output_dropout: 0.05
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
model: fairchem.core.models.allscaip.AllScAIP.AllScAIPBackbone
2+
3+
# Global Configs
4+
num_layers: 6
5+
hidden_size: 512
6+
direct_forces: ${direct_forces}
7+
regress_forces: True
8+
regress_stress: ${regress_stress}
9+
use_compile: ${use_compile}
10+
use_padding: ${direct_forces}
11+
dataset_list: ${dataset_list}
12+
13+
use_node_path: True
14+
use_freq_mask: True
15+
use_sincx_mask: True
16+
17+
18+
# Molecular Graph Configs
19+
max_num_elements: 110
20+
max_batch_size: 96 # this is set to be the max number of batch that could be sampled by the sampler
21+
max_atoms: ${max_atoms}
22+
max_radius: ${cutoff_radius}
23+
use_pbc: ${use_pbc}
24+
knn_k: ${max_neighbors}
25+
knn_soft: true
26+
knn_sigmoid_scale: 0.2
27+
knn_lse_scale: 0.1
28+
knn_use_low_mem: true
29+
knn_pad_size: ${max_neighbors_pad_size}
30+
31+
32+
# Graph Neural Networks Configs
33+
atten_name: memory_efficient
34+
atten_num_heads: 8
35+
36+
37+
# Regularization Configs
38+
# atten_dropout: 0.1
39+
# mlp_dropout: 0.05
40+
# node_ffn_dropout: 0.05
41+
# edge_ffn_dropout: 0.05
42+
# scalar_output_dropout: 0.05
43+
# vector_output_dropout: 0.05
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
run_dir: /data/ericqu/rundir
2+
mode: LOCAL
3+
device: CUDA
4+
ranks_per_node: 1
5+
dataloader_workers: 0
6+
timeout_hr: 12
7+
debug: True
8+
mem_gb: 64
9+
cpus_per_task: 9
10+
account: null
11+
qos: gpu
12+
partition: gpu
13+
omol_all_path: /data/OMOL/ALL
14+
omol_4M_train_path: /data/OMOL/4M/train_4M
15+
data_root_dir: /data/ericqu
16+
additional_parameters:
17+
nodelist: germain

configs/allscaip/dataset/omol.yaml

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
omol_train:
2+
splits:
3+
train:
4+
src: ${cluster.omol_all_path}/train
5+
format: ase_db
6+
a2g_args:
7+
molecule_cell_size: 120.0
8+
r_energy: True
9+
r_forces: True
10+
r_data_keys: ['spin', 'charge']
11+
key_mapping:
12+
energy: omol_energy
13+
forces: ${omol_forces_key}
14+
transforms:
15+
common_transform:
16+
dataset_name: omol
17+
18+
omol_val:
19+
splits:
20+
val:
21+
sample_n: 20000
22+
src: ${cluster.omol_all_path}/val
23+
# metal_ligand_ood:
24+
# sample_n: 20000
25+
# src: ${cluster.omol_all_path}/metal_ligand_ood
26+
# elytes_ood:
27+
# sample_n: 20000
28+
# src: ${cluster.omol_all_path}/elytes_ood
29+
val_metal_complexes:
30+
sample_n: 20000
31+
src: ${cluster.omol_all_path}/val
32+
subset_to:
33+
- op: in
34+
metadata_key: data_ids
35+
rhv:
36+
- metal_complexes
37+
val_electrolytes:
38+
sample_n: 20000
39+
src: ${cluster.omol_all_path}/val
40+
subset_to:
41+
- op: in
42+
metadata_key: data_ids
43+
rhv:
44+
- elytes
45+
val_biomolecules:
46+
sample_n: 20000
47+
src: ${cluster.omol_all_path}/val
48+
subset_to:
49+
- op: in
50+
metadata_key: data_ids
51+
rhv:
52+
- biomolecules
53+
val_spice:
54+
src: ${cluster.omol_all_path}/val
55+
subset_to:
56+
- op: in
57+
metadata_key: data_ids
58+
rhv:
59+
- spice
60+
val_neutralorganics:
61+
sample_n: 20000
62+
src: ${cluster.omol_all_path}/val
63+
subset_to:
64+
- op: in
65+
metadata_key: data_ids
66+
rhv:
67+
- ani2x
68+
- orbnet_denali
69+
- geom_orca6
70+
- trans1x
71+
- rgd
72+
format: ase_db
73+
a2g_args:
74+
molecule_cell_size: 120.0
75+
r_energy: True
76+
r_forces: True
77+
r_data_keys: ['spin', 'charge']
78+
# r_edges: ${cpu_graph}
79+
# radius: ${cutoff_radius}
80+
# max_neigh: ${max_neighbors}
81+
key_mapping:
82+
energy: omol_energy
83+
forces: ${omol_forces_key}
84+
transforms:
85+
common_transform:
86+
dataset_name: omol
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
3+
omol_train:
4+
splits:
5+
train:
6+
src: ${cluster.omol_4M_train_path}
7+
format: ase_db
8+
a2g_args:
9+
molecule_cell_size: 120.0
10+
r_energy: True
11+
r_forces: True
12+
r_data_keys: ['spin', 'charge']
13+
key_mapping:
14+
energy: omol_energy
15+
forces: ${omol_forces_key}
16+
transforms:
17+
common_transform:
18+
dataset_name: omol
19+
20+
omol_val:
21+
splits:
22+
val:
23+
sample_n: 20000
24+
src: ${cluster.omol_all_path}/val
25+
# metal_ligand_ood:
26+
# sample_n: 20000
27+
# src: ${cluster.omol_all_path}/metal_ligand_ood
28+
# elytes_ood:
29+
# sample_n: 20000
30+
# src: ${cluster.omol_all_path}/elytes_ood
31+
val_metal_complexes:
32+
sample_n: 20000
33+
src: ${cluster.omol_all_path}/val
34+
subset_to:
35+
- op: in
36+
metadata_key: data_ids
37+
rhv:
38+
- metal_complexes
39+
val_electrolytes:
40+
sample_n: 20000
41+
src: ${cluster.omol_all_path}/val
42+
subset_to:
43+
- op: in
44+
metadata_key: data_ids
45+
rhv:
46+
- elytes
47+
val_biomolecules:
48+
sample_n: 20000
49+
src: ${cluster.omol_all_path}/val
50+
subset_to:
51+
- op: in
52+
metadata_key: data_ids
53+
rhv:
54+
- biomolecules
55+
val_spice:
56+
src: ${cluster.omol_all_path}/val
57+
subset_to:
58+
- op: in
59+
metadata_key: data_ids
60+
rhv:
61+
- spice
62+
val_neutralorganics:
63+
sample_n: 20000
64+
src: ${cluster.omol_all_path}/val
65+
subset_to:
66+
- op: in
67+
metadata_key: data_ids
68+
rhv:
69+
- ani2x
70+
- orbnet_denali
71+
- geom_orca6
72+
- trans1x
73+
- rgd
74+
format: ase_db
75+
a2g_args:
76+
molecule_cell_size: 120.0
77+
r_energy: True
78+
r_forces: True
79+
r_data_keys: ['spin', 'charge']
80+
# r_edges: ${cpu_graph}
81+
# radius: ${cutoff_radius}
82+
# max_neigh: ${max_neighbors}
83+
key_mapping:
84+
energy: omol_energy
85+
forces: ${omol_forces_key}
86+
transforms:
87+
common_transform:
88+
dataset_name: omol

0 commit comments

Comments
 (0)