-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplants.yaml
More file actions
24 lines (21 loc) · 840 Bytes
/
plants.yaml
File metadata and controls
24 lines (21 loc) · 840 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
defaults:
- default
# Training dataset: Angiosperm 16 genomes
dataset_name: kuleshov-group/Angiosperm_16_genomes
# Batch size configuration
batch_size: 2048 # Total effective batch size
per_device_batch_size: 128 # Batch size per device (adjust based on GPU memory)
num_workers: 8
# Evaluation datasets
evals:
- name: maize_af
dataset_name: plantcad/maize-allele-frequency
dataset_config: null
split: validation
genome_url: https://ftp.ensemblgenomes.ebi.ac.uk/pub/plants/release-62/fasta/zea_mays/dna/Zea_mays.Zm-B73-REFERENCE-NAM-5.0.dna_sm.toplevel.fa.gz
filter_name: none
window_size: 512
batch_size: 128
label_column: AF # Allele frequency column
transform: identity # No transform for regression (default: identity)
metrics: [pearson, spearman] # Correlation metrics for regression task