Nospoko · WojciechMat · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
diff --git a/checkpoints/denoise/.keep b/checkpoints/denoise/.keep
diff --git a/configs/T5denoise-dstart.yaml b/configs/T5denoise-dstart.yaml
@@ -0,0 +1,46 @@
+train:
+  num_epochs: 5
+  accum_iter: 5
+  batch_size: 2
+  base_lr: 3e-5
+  warmup: 4000
+  finetune: False
+
+model_name: T5
+dataset_name: 'roszcz/maestro-v1-sustain'
+target: denoise
+seed: 26
+
+overfit: False
+
+tokens_per_note: single
+time_quantization_method: dstart
+masking_probability: 0.2
+mask: tokens
+
+encoder: velocity
+time_bins: 100
+
+dataset:
+  sequence_len: 128
+  sequence_step: 42
+
+  quantization:
+    dstart: 5
+    duration: 5
+    velocity: 3
+
+device: "cuda:0"
+
+log: True
+log_frequency: 10
+run_name: midi-T5-${now:%Y-%m-%d-%H-%M}
+project: "midi-hf-transformer"
+
+pre_defined_model: null
+model:
+  d_model: 512
+  d_kv: 64
+  d_ff: 2048
+  num_layers: 6
+  num_heads: 8
diff --git a/configs/T5denoise.yaml b/configs/T5denoise.yaml
@@ -0,0 +1,47 @@
+train:
+  num_epochs: 5
+  accum_iter: 10
+  batch_size: 8
+  base_lr: 3e-5
+  warmup: 4000
+  finetune: False
+
+model_name: T5
+dataset_name: 'roszcz/maestro-v1-sustain'
+target: denoise
+seed: 26
+
+overfit: False
+
+tokens_per_note: multiple
+time_quantization_method: start
+masking_probability: 0.15
+mask: notes
+
+encoder: velocity
+time_bins: 100
+
+dataset:
+  sequence_duration: 5
+  sequence_step: 2
+
+  quantization:
+    start: 50
+    duration: 5
+    velocity: 3
+
+device: "cuda:0"
+
+log: True
+log_frequency: 10
+run_name: midi-T5-${now:%Y-%m-%d-%H-%M}
+project: "midi-hf-transformer"
+
+pre_defined_model: null
+
+model:
+  d_model: 512
+  d_kv: 64
+  d_ff: 2048
+  num_layers: 6
+  num_heads: 8
diff --git a/configs/T5start.yaml b/configs/T5start.yaml
@@ -13,6 +13,7 @@ seed: 26
 
 overfit: False
 
+tokens_per_note: multiple
 time_quantization_method: start
 dataset:
   sequence_duration: 5
@@ -23,13 +24,15 @@ dataset:
     duration: 3
     velocity: 3
 
-device: "cpu"
+device: "cuda:0"
 
 log: True
 log_frequency: 10
 run_name: midi-T5-${now:%Y-%m-%d-%H-%M}
 project: "midi-hf-transformer"
 
+pre_defined_model: null
+
 model:
   d_model: 512
   d_kv: 64

diff --git a/configs/T5velocity-dstart.yaml b/configs/T5velocity-dstart.yaml
@@ -0,0 +1,45 @@
+train:
+  num_epochs: 5
+  accum_iter: 10
+  batch_size: 8
+  base_lr: 3e-5
+  finetune: True
+  warmup: 4000
+
+
+pretrained_checkpoint: midi-T5-2023-11-15-17-18.pt
+model_name: T5
+dataset_name: 'roszcz/maestro-v1-sustain'
+target: velocity
+seed: 26
+time_bins: 100
+
+overfit: False
+
+tokens_per_note: "single"
+time_quantization_method: dstart
+dataset:
+  sequence_len: 128
+  sequence_step: 42
+
+  quantization:
+    dstart: 5
+    duration: 5
+    velocity: 3
+
+device: "cuda:0"
+
+log: True
+log_frequency: 10
+run_name: midi-T5-${now:%Y-%m-%d-%H-%M}
+project: "midi-hf-transformer"
+
+pre_defined_model: null
+
+model:
+  d_model: 512
+  d_kv: 64
+  d_ff: 2048
+  num_layers: 6
+  num_decoder_layers: None
+  num_heads: 8
diff --git a/configs/T5velocity.yaml b/configs/T5velocity.yaml
@@ -4,31 +4,36 @@ train:
   batch_size: 8
   base_lr: 3e-4
   warmup: 4000
+  finetune: True
 
+pretrained_checkpoint: midi-T5-2023-11-11-10-29.pt
 model_name: T5
 dataset_name: 'roszcz/maestro-v1-sustain'
 target: velocity
 seed: 26
 
 overfit: False
 
+tokens_per_note: "multiple"
 time_quantization_method: start
 dataset:
   sequence_duration: 5
   sequence_step: 2
 
   quantization:
-    start: 400
+    start: 20
     duration: 3
     velocity: 3
 
-device: "cpu"
+device: "cuda:0"
 
 log: True
 log_frequency: 10
 run_name: midi-T5-${now:%Y-%m-%d-%H-%M}
 project: "midi-hf-transformer"
 
+pre_defined_model: null
+
 model:
   d_model: 512
   d_kv: 64

diff --git a/configs/architectures/large.yaml b/configs/architectures/large.yaml
@@ -0,0 +1,5 @@
+d_model: 512
+d_kv: 64
+d_ff: 2048
+num_layers: 6
+num_heads: 8
diff --git a/configs/architectures/mid.yaml b/configs/architectures/mid.yaml
@@ -0,0 +1,5 @@
+d_model: 256
+d_kv: 32
+d_ff: 1024
+num_layers: 6
+num_heads: 8
diff --git a/configs/architectures/small.yaml b/configs/architectures/small.yaml
@@ -0,0 +1,5 @@
+d_model: 256
+d_kv: 32
+d_ff: 512
+num_layers: 4
+num_heads: 4