Skip to content

Commit a243522

Browse files
authored
Merge pull request #59 from MTG/multifeat-improvements
Multifeat improvements
2 parents 41846ce + 2aafddb commit a243522

73 files changed

Lines changed: 1957 additions & 350 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ checkpoints/
1010
/figs/
1111

1212
/venv/
13+
14+
build/

cfg/config_text_audio_dev.gin

Lines changed: 0 additions & 59 deletions
This file was deleted.

cfg/downstream/chords.gin

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
dataset_name = "chords_data"
2+
embeddings_dir = "/gpfs/projects/upf97/embeddings_ssl/"
3+
4+
# Lighting Trainer parameters, overwrites the training config
5+
predict.device_dict = {
6+
"accelerator": "gpu",
7+
"devices": 1,
8+
"num_nodes": 1,
9+
}
10+
11+
# Embedding taking location from the neural network
12+
predict.embedding_layer = [6, 11]
13+
predict.overlap_ratio = 0.5
14+
15+
predict.embeddings_dir = %embeddings_dir
16+
predict.dataset_name = %dataset_name
17+
18+
# Audio Loader for embedding extraction
19+
AudioEmbeddingDataModule.data_dir = "/gpfs/projects/upf97/downstream_datasets/chords_data/"
20+
AudioEmbeddingDataModule.file_format = "mp3"
21+
AudioEmbeddingDataModule.num_workers = 20
22+
AudioEmbeddingDataModule.batch_size = 32
23+
AudioEmbeddingDataModule.overlap_ratio = 0.5
24+
AudioEmbeddingDataModule.n_seconds = 30
25+
AudioEmbeddingDataModule.last_chunk_ratio = 0.1
26+
27+
28+
build_module_and_datamodule.dataset_name = %dataset_name
29+
build_module_and_datamodule.embeddings_dir = %embeddings_dir
30+
31+
MTTEmbeddingLoadingDataModule.gt_path = "/data0/palonso/ssl-mtg/downstream_datasets/magnatagatune/metadata/mtat/binary.npy"
32+
MTTEmbeddingLoadingDataModule.train_filelist = "/data0/palonso/ssl-mtg/downstream_datasets/magnatagatune/metadata/mtat/train.npy"
33+
MTTEmbeddingLoadingDataModule.val_filelist = "/data0/palonso/ssl-mtg/downstream_datasets/magnatagatune/metadata/mtat/valid.npy"
34+
MTTEmbeddingLoadingDataModule.test_filelist = "/data0/palonso/ssl-mtg/downstream_datasets/magnatagatune/metadata/mtat/test.npy"
35+
MTTEmbeddingLoadingDataModule.batch_size = 64
36+
MTTEmbeddingLoadingDataModule.num_workers = 10
37+
MTTEmbeddingLoadingDataModule.layer_aggregation = "none"
38+
MTTEmbeddingLoadingDataModule.granularity = "chunk"
39+
MTTEmbeddingLoadingDataModule.time_aggregation = "mean"
40+
41+
train_probe.wandb_params = {
42+
"project": "magnatagatune",
43+
"offline": False,
44+
"entity": "mtg-upf",
45+
"save_dir": "/data0/palonso/ssl-mtg/logs/",
46+
}
47+
48+
train_probe.train_params = {
49+
"accelerator": "gpu",
50+
"devices": 1,
51+
"log_every_n_steps": 50,
52+
"max_steps": 20000,
53+
"num_sanity_val_steps": 0,
54+
"check_val_every_n_epoch": 1,
55+
}
56+
train_probe.monitor = "val-MAP-macro"
57+
train_probe.monitor_mode = "max"
58+
59+
optimize_probe.bound_conditions = {
60+
"hidden_size": (64, 1024),
61+
"dropout": (0.0, 0.5),
62+
"lr": (1e-5, 1e-3),
63+
}
64+
# Other parameters to optimize:
65+
# "max_epochs": (10, 100)
66+
# "batch_size": (32, 128)
67+
68+
optimize_probe.optim_process = False
69+
optimize_probe.init_points = 5
70+
optimize_probe.n_iter = 50
71+
optimize_probe.seed = 1
72+
73+
# Warning: these parameters are ignored when the the Bayesian optimization is enabled
74+
SequenceMultiLabelClassificationProbe.num_layers = 2
75+
SequenceMultiLabelClassificationProbe.hidden_size = 512
76+
SequenceMultiLabelClassificationProbe.dropout = 0.2
77+
SequenceMultiLabelClassificationProbe.lr = 0.0001
78+
79+
SequenceMultiLabelClassificationProbe.activation = "relu"
80+
SequenceMultiLabelClassificationProbe.bias = True
81+
SequenceMultiLabelClassificationProbe.num_labels = 50
82+
SequenceMultiLabelClassificationProbe.labels = "/data0/palonso/ssl-mtg/downstream_datasets/magnatagatune/metadata/mtat/tags.npy"
83+
84+
# CosineAnnealing scheduler
85+
CosineAnnealingCallback.warmup_steps = 2000
86+
CosineAnnealingCallback.eta_min = 1e-7

cfg/downstream/gtzan_zsl.gin

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
dataset_name = "gtzan_zsl"
2+
embeddings_dir = "/home/upf/upf825634/embeddings/"
3+
4+
# Lighting Trainer parameters, overwrites the training config
5+
predict.device_dict = {
6+
"accelerator": "gpu",
7+
"devices": 1,
8+
}
9+
10+
predict.overlap_ratio = 1
11+
12+
predict.embeddings_dir = %embeddings_dir
13+
predict.dataset_name = %dataset_name
14+
15+
# Audio Loader for embedding extraction
16+
AudioEmbeddingDataModule.data_dir = "/gpfs/home/upf/upf825634/datasets/gtzan/22kmono"
17+
AudioEmbeddingDataModule.file_format = "wav"
18+
AudioEmbeddingDataModule.orig_freq = 22050
19+
AudioEmbeddingDataModule.num_workers = 6
20+
AudioEmbeddingDataModule.batch_size = 64
21+
AudioEmbeddingDataModule.overlap_ratio = 0
22+
AudioEmbeddingDataModule.num_frames = 66150
23+
24+
25+
build_module_and_datamodule.dataset_name = %dataset_name
26+
build_module_and_datamodule.embeddings_dir = %embeddings_dir
27+
GTZANEmbeddingLoadingDataModule.filelist = "/gpfs/home/upf/upf825634/data/gtzan/metadata/gtzan_filelist.txt"
28+
GTZANEmbeddingLoadingDataModule.batch_size = 64
29+
GTZANEmbeddingLoadingDataModule.num_workers = 0
30+
GTZANEmbeddingLoadingDataModule.layer_aggregation = "none"
31+
GTZANEmbeddingLoadingDataModule.granularity = "chunk"
32+
GTZANEmbeddingLoadingDataModule.time_aggregation = "mean"
33+
34+
train_probe.train_params = {
35+
"accelerator": "gpu",
36+
"devices": 1,
37+
"log_every_n_steps": 50,
38+
"max_steps": 50000,
39+
"num_sanity_val_steps": 0,
40+
"check_val_every_n_epoch": 1,
41+
}

cfg/downstream/nsynth_pitch.gin

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
dataset_name = "nsynth"
2-
embeddings_dir = "/data0/palonso/ssl-mtg/embeddings/"
2+
embeddings_dir = "/gpfs/scratch/upf97/embeddings/"
33

44
# Lighting Trainer parameters, overwrites the training config
55
predict.device_dict = {
66
"accelerator": "gpu",
77
"devices": 1,
8+
"num_nodes": 1,
89
}
910

1011
# Embedding taking location from the neural network
@@ -15,39 +16,39 @@ predict.embeddings_dir = %embeddings_dir
1516
predict.dataset_name = %dataset_name
1617

1718
# Audio Loader for embedding extraction
18-
AudioEmbeddingDataModule.data_dir = "/data0/palonso/ssl-mtg/downstream_datasets/nsynth/"
19+
AudioEmbeddingDataModule.data_dir = "/gpfs/projects/upf97/downstream_datasets/nsynth/"
1920
AudioEmbeddingDataModule.file_format = "wav"
20-
AudioEmbeddingDataModule.orig_freq = 16000
2121
AudioEmbeddingDataModule.num_workers = 20
2222
AudioEmbeddingDataModule.batch_size = 128
2323
AudioEmbeddingDataModule.overlap_ratio = 0.5
24-
AudioEmbeddingDataModule.num_frames = 480000
24+
AudioEmbeddingDataModule.n_seconds = 4
25+
AudioEmbeddingDataModule.last_chunk_ratio = 0.1
2526

2627

2728
build_module_and_datamodule.dataset_name = %dataset_name
2829
build_module_and_datamodule.embeddings_dir = %embeddings_dir
2930

30-
NSynthPitchEmbeddingLoadingDataModule.train_filelist = "/data0/palonso/ssl-mtg/downstream_datasets/nsynth/metadata/nsynth_filelist_train.txt"
31-
NSynthPitchEmbeddingLoadingDataModule.val_filelist = "/data0/palonso/ssl-mtg/downstream_datasets/nsynth/metadata/nsynth_filelist_valid.txt"
32-
NSynthPitchEmbeddingLoadingDataModule.test_filelist = "/data0/palonso/ssl-mtg/downstream_datasets/nsynth/metadata/nsynth_filelist_test.txt"
33-
NSynthPitchEmbeddingLoadingDataModule.batch_size = 64
34-
NSynthPitchEmbeddingLoadingDataModule.num_workers = 0
31+
NSynthPitchEmbeddingLoadingDataModule.train_filelist = "/gpfs/projects/upf97/downstream_datasets/nsynth/metadata/nsynth_filelist_train.txt"
32+
NSynthPitchEmbeddingLoadingDataModule.val_filelist = "/gpfs/projects/upf97/downstream_datasets/nsynth/metadata/nsynth_filelist_valid.txt"
33+
NSynthPitchEmbeddingLoadingDataModule.test_filelist = "/gpfs/projects/upf97/downstream_datasets/nsynth/metadata/nsynth_filelist_test.txt"
34+
NSynthPitchEmbeddingLoadingDataModule.batch_size = 32
35+
NSynthPitchEmbeddingLoadingDataModule.num_workers = 10
3536
NSynthPitchEmbeddingLoadingDataModule.layer_aggregation = "none"
3637
NSynthPitchEmbeddingLoadingDataModule.granularity = "chunk"
3738
NSynthPitchEmbeddingLoadingDataModule.time_aggregation = "mean"
3839

3940
train_probe.wandb_params = {
4041
"project": "nsynth",
41-
"offline": False,
42+
"offline": True,
4243
"entity": "mtg-upf",
43-
"save_dir": "/data0/palonso/ssl-mtg/logs/",
44+
"save_dir": "/gpfs/projects/upf97/logs/",
4445
}
4546

4647
train_probe.train_params = {
4748
"accelerator": "gpu",
4849
"devices": 1,
4950
"log_every_n_steps": 50,
50-
"max_steps": 50000,
51+
"max_steps": 100000,
5152
"num_sanity_val_steps": 0,
5253
"check_val_every_n_epoch": 1,
5354
}
@@ -71,8 +72,9 @@ optimize_probe.seed = 1
7172
# Warning: these parameters are ignored when the the Bayesian optimization is enabled
7273
SequenceClassificationProbe.num_layers = 2
7374
SequenceClassificationProbe.hidden_size = 512
74-
SequenceClassificationProbe.dropout = 0.2
75-
SequenceClassificationProbe.lr = 0.0001
75+
SequenceClassificationProbe.dropout = 0.0
76+
SequenceClassificationProbe.lr = 0.001
77+
7678
SequenceClassificationProbe.activation = "relu"
7779
SequenceClassificationProbe.bias = True
7880
SequenceClassificationProbe.num_labels = 128
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
dataset_name = "nsynth"
2+
embeddings_dir = "/gpfs/scratch/upf97/embeddings/"
3+
4+
# Lighting Trainer parameters, overwrites the training config
5+
predict.device_dict = {
6+
"accelerator": "gpu",
7+
"devices": 1,
8+
"num_nodes": 1,
9+
}
10+
11+
# Embedding taking location from the neural network
12+
predict.embedding_layer = [11]
13+
predict.overlap_ratio = 0.5
14+
15+
predict.embeddings_dir = %embeddings_dir
16+
predict.dataset_name = %dataset_name
17+
18+
# Audio Loader for embedding extraction
19+
AudioEmbeddingDataModule.data_dir = "/gpfs/projects/upf97/downstream_datasets/nsynth/"
20+
AudioEmbeddingDataModule.file_format = "wav"
21+
AudioEmbeddingDataModule.num_workers = 20
22+
AudioEmbeddingDataModule.batch_size = 128
23+
AudioEmbeddingDataModule.overlap_ratio = 0.5
24+
AudioEmbeddingDataModule.n_seconds = 4
25+
AudioEmbeddingDataModule.last_chunk_ratio = 0.1
26+
27+
28+
build_module_and_datamodule.dataset_name = %dataset_name
29+
build_module_and_datamodule.embeddings_dir = %embeddings_dir
30+
31+
NSynthPitchEmbeddingLoadingDataModule.train_filelist = "/gpfs/projects/upf97/downstream_datasets/nsynth/metadata/nsynth_filelist_train.txt"
32+
NSynthPitchEmbeddingLoadingDataModule.val_filelist = "/gpfs/projects/upf97/downstream_datasets/nsynth/metadata/nsynth_filelist_valid.txt"
33+
NSynthPitchEmbeddingLoadingDataModule.test_filelist = "/gpfs/projects/upf97/downstream_datasets/nsynth/metadata/nsynth_filelist_test.txt"
34+
NSynthPitchEmbeddingLoadingDataModule.batch_size = 64
35+
NSynthPitchEmbeddingLoadingDataModule.num_workers = 10
36+
NSynthPitchEmbeddingLoadingDataModule.layer_aggregation = "none"
37+
NSynthPitchEmbeddingLoadingDataModule.granularity = "chunk"
38+
NSynthPitchEmbeddingLoadingDataModule.time_aggregation = "mean"
39+
40+
train_probe.wandb_params = {
41+
"project": "nsynth",
42+
"offline": True,
43+
"entity": "mtg-upf",
44+
"save_dir": "/gpfs/projects/upf97/logs/",
45+
}
46+
47+
train_probe.train_params = {
48+
"accelerator": "gpu",
49+
"devices": 1,
50+
"log_every_n_steps": 50,
51+
"max_steps": 20000,
52+
"num_sanity_val_steps": 0,
53+
"check_val_every_n_epoch": 1,
54+
}
55+
train_probe.monitor = "val-acc"
56+
train_probe.monitor_mode = "max"
57+
58+
optimize_probe.bound_conditions = {
59+
"hidden_size": (64, 1024),
60+
"dropout": (0.0, 0.5),
61+
"lr": (1e-5, 1e-3),
62+
}
63+
# Other parameters to optimize:
64+
# "max_epochs": (10, 100)
65+
# "batch_size": (32, 128)
66+
67+
optimize_probe.optim_process = False
68+
optimize_probe.init_points = 5
69+
optimize_probe.n_iter = 50
70+
optimize_probe.seed = 1
71+
72+
# Warning: these parameters are ignored when the the Bayesian optimization is enabled
73+
SequenceClassificationProbe.num_layers = 2
74+
SequenceClassificationProbe.hidden_size = 512
75+
SequenceClassificationProbe.dropout = 0.2
76+
SequenceClassificationProbe.lr = 0.0001
77+
78+
SequenceClassificationProbe.activation = "relu"
79+
SequenceClassificationProbe.bias = True
80+
SequenceClassificationProbe.num_labels = 128
81+
82+
# CosineAnnealing scheduler
83+
CosineAnnealingCallback.warmup_steps = 2000
84+
CosineAnnealingCallback.eta_min = 1e-7

0 commit comments

Comments
 (0)