Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
412a9d8
first commit
Aug 30, 2024
48d1120
predict working
Aug 30, 2024
6845126
split
Aug 30, 2024
abb3bed
avanzando con el dataset
Aug 30, 2024
f92fdc1
normalize labels from harmonix
Aug 30, 2024
faea1bf
faltan validation y test me piro a ver la tele que es diumenge pero t…
Sep 1, 2024
feaaf9c
test and val finished more or less
Sep 2, 2024
e5e26d9
before removing the multihot
Sep 2, 2024
1bcc28a
multihot_removed
Sep 2, 2024
69f2fcd
change to crossentropy and add a clean implemetnation of the confusio…
Sep 2, 2024
260dedd
working with crossentropy
Sep 4, 2024
61d7257
reformulated the task as predicting the expectation function of each …
Sep 4, 2024
aaf7442
two heads (THE PRVIOUS IS THE ONE I NEED FOR RHYTHM)
Sep 4, 2024
fd4c3e0
postprocessing and vlaidation of boundaries
Sep 5, 2024
9021308
restructuring infrastructure structure
Sep 5, 2024
cce916e
restructuring infrastructure structure
Sep 5, 2024
fe05e69
fixed bgs and added infrastrstructure for visualizing embeddings
Sep 5, 2024
8127a1d
Merge branch 'main' into pedro/structbrrrrr
Sep 6, 2024
1b3d273
before overlap_ratio data augmentation
Sep 6, 2024
3f77aa1
Merge branch 'main' into pedro/structbrrrrr
Sep 6, 2024
9d8bddd
tricks for using the model
Sep 6, 2024
5a53226
fix sigmoid and thresholding
Sep 6, 2024
62bc866
black
Sep 6, 2024
80d833a
me esta petando en memoria los 120 gb de data augmentation xd
Sep 9, 2024
bb5ea0c
confusion mtrix shows that weights are wrong
Sep 9, 2024
bea28c5
fix bug weights
Sep 10, 2024
6799101
some litte fixes
Sep 10, 2024
e875336
before the ctlloss
Sep 12, 2024
53b9ae4
fix global print
Sep 13, 2024
7492eb8
Support extraction from different layers
palonso Sep 13, 2024
4c637be
Organize class and use set for layer member
palonso Sep 13, 2024
6d2448b
Fix conformer imports
palonso Sep 13, 2024
2bf99ba
Merge branch 'main' into downstream_improvements
palonso Sep 16, 2024
671dba6
Implement Bayesian optimization
palonso Sep 16, 2024
01b925b
Load downstream datamodule in __init__
palonso Sep 16, 2024
00fa1c5
Add consine scheduler
palonso Sep 16, 2024
2494c1e
Do separate wandb experiments
palonso Sep 16, 2024
4885966
Set Bayesian optimization hyperparams
palonso Sep 16, 2024
2f42c3a
after pablo talk
Sep 16, 2024
151746c
after pablo talk
Sep 16, 2024
7219387
Merge branch 'main' into pedro/structbrrrrr
Sep 16, 2024
f5624e0
Add callback for best model
palonso Sep 17, 2024
1a6234a
Always compute test metrics
palonso Sep 17, 2024
458d934
Log probing params
palonso Sep 17, 2024
53869ae
Save best metric values
palonso Sep 17, 2024
277c7f3
early stopping
Sep 18, 2024
d4358eb
early stopping
Sep 18, 2024
3e419c5
Merge remote-tracking branch 'origin/downstream_improvements' into pe…
Sep 18, 2024
aac90f0
early stopping
Oct 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,7 @@ checkpoints/
.idea/

/figs/
/src/probe/visualize_probe/embedding_structure/
/src/probe/visualize_probe/embedding_structure/

/venv/
72 changes: 72 additions & 0 deletions cfg/downstream/structure.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
dataset_name = "harmonix"
overlap_ratio = 0.1
embeddings_dir = "/gpfs/scratch/upf97/embeddings/"

# Lighting Trainer parameters, overwrites the training config
predict.device_dict = {
"accelerator": "gpu",
"devices": 1,
}

# Embedding taking location from the neural network
predict.embedding_layer = [6]

predict.embeddings_dir = %embeddings_dir
predict.dataset_name = %dataset_name
predict.overlap_ratio = %overlap_ratio


# Audio Loader for embedding extraction
AudioEmbeddingDataModule.data_dir = "/gpfs/projects/upf97/downstream_datasets/harmonix/tracks"
AudioEmbeddingDataModule.file_format = "mp3"
AudioEmbeddingDataModule.orig_freq = 44100
AudioEmbeddingDataModule.new_freq = 16000 # TODO read from train cfg
AudioEmbeddingDataModule.mono = True # TODO read from train cfg
AudioEmbeddingDataModule.half_precision = True # TODO read from train cfg
AudioEmbeddingDataModule.num_workers = 20
AudioEmbeddingDataModule.overlap_ratio = %overlap_ratio
AudioEmbeddingDataModule.patch_size_sec = 30


build_module_and_datamodule.dataset_name = %dataset_name
build_module_and_datamodule.embeddings_dir = %embeddings_dir

HarmonixEmbeddingLoadingDataModule.gt_path = "data/harmonix/segments_norm"
HarmonixEmbeddingLoadingDataModule.train_filelist = "data/harmonix/train.txt"
HarmonixEmbeddingLoadingDataModule.val_filelist = "data/harmonix/validation.txt"
HarmonixEmbeddingLoadingDataModule.test_filelist = "data/harmonix/test.txt"
HarmonixEmbeddingLoadingDataModule.batch_size = 128
HarmonixEmbeddingLoadingDataModule.num_frames_aggregate = 3
HarmonixEmbeddingLoadingDataModule.num_workers = 16
HarmonixEmbeddingLoadingDataModule.overlap = 0.1
HarmonixEmbeddingLoadingDataModule.precompute = True

train_probe.wandb_params = {
"project": "structure",
"name": "structure_50k_d001",
"offline": True,
"entity": "mtg-upf",
"save_dir": "/gpfs/projects/upf97/logs/"
}

train_probe.train_params = {
"accelerator": "gpu",
"devices": 1,
"log_every_n_steps": 10,
"max_steps": 50000,
"num_sanity_val_steps": 0,
"val_check_interval": 500,
"check_val_every_n_epoch": None
}


probe.modules.structure_probe.StructureClassProbe.num_classes = 7 # TODO
probe.modules.structure_probe.StructureClassProbe.hidden_size = 512
probe.modules.structure_probe.StructureClassProbe.bias = True
probe.modules.structure_probe.StructureClassProbe.dropout = 0.001
probe.modules.structure_probe.StructureClassProbe.lr = 0.0001
probe.modules.structure_probe.StructureClassProbe.num_aggregations = 3
probe.modules.structure_probe.StructureClassProbe.save_prediction = True



66 changes: 66 additions & 0 deletions cfg/downstream/structure_local.gin
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
dataset_name = "harmonix"
embeddings_dir = "/home/pedro/Documents/experimentos_bsc/"

# Lighting Trainer parameters, overwrites the training config
predict.device_dict = {
"accelerator": "gpu",
"devices": 1,
}

# Embedding taking location from the neural network
predict.embedding_layer = [-1]

predict.embeddings_dir = %embeddings_dir
predict.dataset_name = %dataset_name
predict.overlap_ratio = 0.1


# Audio Loader for embedding extraction
AudioEmbeddingDataModule.data_dir = "/gpfs/projects/upf97/downstream_datasets/harmonix/tracks"
AudioEmbeddingDataModule.file_format = "mp3"
AudioEmbeddingDataModule.orig_freq = 44100
AudioEmbeddingDataModule.new_freq = 16000 # TODO read from train cfg
AudioEmbeddingDataModule.mono = True # TODO read from train cfg
AudioEmbeddingDataModule.half_precision = True # TODO read from train cfg
AudioEmbeddingDataModule.num_workers = 20


build_module_and_datamodule.dataset_name = %dataset_name
build_module_and_datamodule.embeddings_dir = %embeddings_dir

HarmonixEmbeddingLoadingDataModule.gt_path = "data/harmonix/segments_norm"
HarmonixEmbeddingLoadingDataModule.train_filelist = "data/harmonix/train.txt"
HarmonixEmbeddingLoadingDataModule.val_filelist = "data/harmonix/validation.txt"
HarmonixEmbeddingLoadingDataModule.test_filelist = "data/harmonix/test.txt"
HarmonixEmbeddingLoadingDataModule.batch_size = 16
HarmonixEmbeddingLoadingDataModule.num_frames_aggregate = 3
HarmonixEmbeddingLoadingDataModule.num_workers = 4
HarmonixEmbeddingLoadingDataModule.overlap = 0.1
HarmonixEmbeddingLoadingDataModule.precompute = False

train_probe.wandb_params = {
"project": "structure",
"name": "my_first_structure_classifier",
"offline": False,
"entity": "mtg-upf",
"save_dir": "/home/pedro/ssl-mtg/data/logs/"
}

train_probe.train_params = {
"accelerator": "gpu",
"devices": 1,
"log_every_n_steps": 10,
"max_epochs": 20,
"num_sanity_val_steps": 0,
}


probe.modules.structure_probe.StructureClassProbe.num_classes = 7 # TODO
probe.modules.structure_probe.StructureClassProbe.hidden_size = 512
probe.modules.structure_probe.StructureClassProbe.bias = True
probe.modules.structure_probe.StructureClassProbe.dropout = 0.1
probe.modules.structure_probe.StructureClassProbe.lr = 0.0001
probe.modules.structure_probe.StructureClassProbe.num_aggregations = 3
probe.modules.structure_probe.StructureClassProbe.save_prediction = True


36 changes: 28 additions & 8 deletions cfg/downstream/tagging.gin
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ predict.device_dict = {
}

# Embedding taking location from the neural network
predict.embedding_layer = [-1]
predict.embedding_layer = [6]
predict.overlap_ratio = 0.5

predict.embeddings_dir = %embeddings_dir
Expand All @@ -22,6 +22,7 @@ AudioEmbeddingDataModule.new_freq = 16000 # TODO read from train cfg
AudioEmbeddingDataModule.mono = True # TODO read from train cfg
AudioEmbeddingDataModule.half_precision = True # TODO read from train cfg
AudioEmbeddingDataModule.num_workers = 20
AudioEmbeddingDataModule.pad_to30sec = True


build_module_and_datamodule.dataset_name = %dataset_name
Expand All @@ -31,7 +32,7 @@ MTTEmbeddingLoadingDataModule.gt_path = "/gpfs/projects/upf97/downstream_dataset
MTTEmbeddingLoadingDataModule.train_filelist = "/gpfs/projects/upf97/downstream_datasets/magnatagatune/metadata/mtat/train.npy"
MTTEmbeddingLoadingDataModule.val_filelist = "/gpfs/projects/upf97/downstream_datasets/magnatagatune/metadata/mtat/valid.npy"
MTTEmbeddingLoadingDataModule.test_filelist = "/gpfs/projects/upf97/downstream_datasets/magnatagatune/metadata/mtat/test.npy"
MTTEmbeddingLoadingDataModule.batch_size = 256
MTTEmbeddingLoadingDataModule.batch_size = 64
MTTEmbeddingLoadingDataModule.num_workers = 10
MTTEmbeddingLoadingDataModule.layer_aggregation = "none"
MTTEmbeddingLoadingDataModule.granularity = "chunk"
Expand All @@ -47,16 +48,35 @@ train_probe.wandb_params = {
train_probe.train_params = {
"accelerator": "gpu",
"devices": 1,
"log_every_n_steps": 10,
"max_epochs": 20,
"log_every_n_steps": 50,
"max_steps": 30000,
"num_sanity_val_steps": 0,
}

optimize_probe.bound_conditions = {
"num_layers": (1, 2),
"hidden_size": (128, 512),
"dropout": (0.1, 0.5),
"lr": (1e-5, 1e-3),
}
optimize_probe.init_points = 5
optimize_probe.n_iter = 25
optimize_probe.seed = 1

# "max_epochs": (10, 100),
# "batch_size": (32, 128),

# Warning: these parameters are ignored when the the Bayesian optimization is enabled
SequenceMultiLabelClassificationProbe.num_layers = 2
SequenceMultiLabelClassificationProbe.num_labels = 50 # TODO
SequenceMultiLabelClassificationProbe.hidden_size = 512
SequenceMultiLabelClassificationProbe.activation = "relu"
SequenceMultiLabelClassificationProbe.bias = True
SequenceMultiLabelClassificationProbe.dropout = 0.2
SequenceMultiLabelClassificationProbe.lr = 0.0001
SequenceMultiLabelClassificationProbe.labels = "/gpfs/projects/upf97/downstream_datasets/magnatagatune/metadata/mtat/tags.npy"

SequenceMultiLabelClassificationProbe.activation = "relu"
SequenceMultiLabelClassificationProbe.bias = True
SequenceMultiLabelClassificationProbe.num_labels = 50
SequenceMultiLabelClassificationProbe.labels = "/gpfs/projects/upf97/downstream_datasets/magnatagatune/metadata/mtat/tags.npy"

# CosineAnnealing scheduler
CosineAnnealingCallback.warmup_steps = 3000
CosineAnnealingCallback.eta_min = 1e-7
Loading