bionemo-framework/bionemo-recipes/recipes/esm2_native_te/fp4_debugging_stats.yaml at b6042f17b6c8e203b694d52cc999bbeee8e44054 · NVIDIA/bionemo-framework · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
example_fp4_tensor_stat_collection:
    enabled: True
    layers:
        # Use regex to select layers 0-4 (1-indexed as layers.1 through layers.5 in the naming)
        # This matches: model.esm.encoder.layers.[1-5].*.(layernorm_qkv|proj|fc1|fc2)
        layer_name_regex_pattern: 'model\.esm\.encoder\.layers\.[1-5]\..*(layernorm_qkv|proj|fc1|fc2)'
    transformer_engine:
        LogNvfp4TensorStats:
            enabled: True
            tensors_struct:
            - tensor: activation
              stats: [underflows%, mse]
              freq: 100
            - tensor: gradient
              stats: [underflows%, mse]
              freq: 100

example_fp8_tensor_stat_collection:
    enabled: True
    layers:
        # Use regex to select layers 0-4 (1-indexed as layers.1 through layers.5 in the naming)
        # This matches: model.esm.encoder.layers.[1-5].*.(layernorm_qkv|proj|fc1|fc2)
        layer_name_regex_pattern: 'model\.esm\.encoder\.layers\.([6-9]|10)\..*(layernorm_qkv|proj|fc1|fc2)'
    transformer_engine:
        LogFp8TensorStats:
            enabled: True
            tensors_struct:
            - tensor: activation
              stats: [mxfp8_underflows%, mxfp8_scale_inv_min, mxfp8_scale_inv_max, mxfp8_mse]
              freq: 100
            - tensor: gradient
              stats: [mxfp8_underflows%, mxfp8_scale_inv_min, mxfp8_scale_inv_max, mxfp8_mse]
              freq: 100