stroke-seg-ssl/nnUnet/ssl/cfg_files/example.yml.example at main · 3489214402/stroke-seg-ssl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
training:
    # ---------------------------------------------  Data  ------------------------------------------------
    # Shape of the global patch
    global_crop_size: [16, 112, 112]
    # Whether to take into account the brain-bkd ratio also when cropping the gobal patch
    wise_crop: False

    # Number of small local views to generate. Set this parameter to 0 to disable multi-crop
    local_crops_number: 0
    # Shape of the local patch (not used)
    local_crop_size: null

    # Number of iterations to use during training (number of batches to sample)
    n_iters: 9600
    # Number of iterations to use during validation (number of batches to sample)
    n_val_iters: 3200

    # Whether the images provided are already cropped to the brain bbox or not
    already_cropped: True
    # If the images are not cropped to the brain bboxes, then either pass this path to a json
    # containing the bbox for each image. This file must be generated by meas of preprocessing pipeline.
    bboxes_paxth: 'example_files/bbox_info.json'
    # How thick the slab sampled in z axes will be
    slab_thickness: 24
    # This was added to use the ais label used for the visualizations during training
    all_datasets_csv_path: 'ssl/example_files/dataset.csv'
    # When using difference image as extra channel, this should be True
    multichannel_input: False

    # Transformations. Default:
    transformations_cfg:
      # Used for large global view cropping. When disabling multi-crop (local_crops_number: 0)
      global_scale: [0.8, 1.2]
      # Used for small local view cropping of multi-crop. Not used if local_crops_nr = 0
      local_scale: null
      # whether to use mirror data augmentation with a probabbility of 0.8 across the three axes
      mirror: True
      # Probability to use with symmetry data aumentation, if provided the dataset used should be
      # selected accordingly
      symmetry: 0.7
      # Batch of standard augmentations
      i_global:
        g_noise: [1.0, 1.0]
        g_blur: [0.8, 0.8]
        mult_bright: [0.8, 1.0]
        brightness: [0.8, 0.8]
        contrast_augm: [1.0, 1.0]
        gamma: [0.8, 1.0]
        per_channel: False
      i_local:
        g_noise: null
        g_blur: null
        mult_bright: null
        brightness: null
        contrast_augm: null
        gamma: null
        per_channel: null

    # ------------------------------------- Model Parameters --------------------------------------
    # Dimensionality of the projection head output.
    out_dim: 65536
    # Whether or not to weight normalize the last layer of the DeSD head.
    norm_last_layer: True
    # Base EMA parameter for teacher update. The value is increased to 1 during training
    # with cosine schedule.
    momentum_teacher: 0.996
    # Whether to use batch normalizations in projection head
    use_bn_in_head: True
    # Initial value for the teacher temperature.
    warmup_teacher_temp: 0.04
    # Final value (after linear warmup) of the teacher temperature.
    teacher_temp: 0.07
    # Number of warmup epochs for the teacher temperature.
    warmup_teacher_temp_epochs: 10

    #------------------------------ Training/Optimization parameters -----------------------------------
    # Whether or not to use half precision for training.
    use_fp16: True
    # Initial value of the weight decay
    weight_decay: 0.04
    # Final value of the weight decay. We use a cosine schedule for WD
    weight_decay_end: 0.000001
    # Maximal parameter gradient norm if using gradient clipping. 0 for disabling.
    clip_grad: 0
    # Batch-size. Default: 32
    batch_size: 64
    # Epochs. Default: 100
    epochs: 100
    # Number of epochs during which we keep the output layer fixed.
    freeze_last_layer: 1
    # Learning rate at the end of linear warmup (highest LR used during training).
    lr: 0.1
    # Number of epochs for the linear learning-rate warm up.
    warmup_epochs: 10
    # This parameter is used to configure the learning rate initial value ooriginally set by
    # a fized rule. This should be eaqual to the batch size
    lr_sch_den: 64
    # Target LR at the end of optimization. We use a cosine LR schedule with linear warmup.
    min_lr: 0.01
    # Options: 'adamw', 'sgd', 'lars'. Type of optimizer.
    optimizer: 'adamw'
    # How the loss obtained at the different stages are combined. Costant means the wight is
    # the same for all. Other options are: 'exp', 'fraction', 'linear', 'last'. Decreasing
    # from bottleneck to base.
    weights: 'constant'

    # ------------------------------ Misc -------------------------------------------------------
    # Path to save logs and checkpoints.
    output_dir: '<SOME_PATH>'
    # Save checkpoint every x epochs.
    saveckp_freq: 50
    # Random seed.
    seed: 0
    # Number of data loading workers
    num_workers: 6

    # -------------------------------  Metrics_cfg -----------------------------------------------
    metrics_cfg:
      # Over which set to compute the metrics at each epoch's end. 'train' and/or 'val'
      over: ['val']
      # Frequency to log the RankMe meassurment, null if avoided
      rank_me_freq: 1
      # Frequency to log the dimensionality reduced projection scatter plot, null if avoided
      projection_freq: 5

    # -------------------------------  Pretrained SSL encoder -------------------------------------
    # learning rate of the encoder weights (not projection head)
    encoder_lr: 0.001
    # minimum learning rate of the encoder weights (not projection head)
    min_encoder_lr: 0.0001
    # path to the pretrained weights
    encoder_chkpt: '<SOME_PATH>/checkpoint.pth'

    # ---------------------------------- nnUnet ---------------------------------------------------
    # name of the nnUNet dataset to use
    dataset: 'Dataset044_AIS'
    # name of the configuration that was used to build the model
    configuration: '3d_fullres'
    # partition to use during training
    fold: 0
    # nnUNet trainer name
    trainer: 'nnUNetTrainerCfg'
    # nnUNet experiment planner name
    exp_planner: 'nnUNetPlansSSL'
    # whether to use gpu
    device: 'cuda'
    # This configurations are used to generate the config file that is later going to be used to
    # finetune nnUNet
    # Number of epochs to finetune nnUNet afterwards
    num_epochs: 100
    # Unfreezing policy to use in supervised nnUNet finetuning
    unfreeze_lr: null
    unfreeze_epoch: null
    # This flag indicates nnUNet pipeline that the model uses pretrained weights
    ssl_pretrained: True
    # Path from where to extract the partitions
    split_path: '<SOME_PATH>/splits_final.json'