ao/benchmarks/microbenchmarks/test/benchmark_config.yml at b57fae2be6e5f71ee17f301c704b28d79ef7cd6a · pytorch/ao · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Sample configuration for inference benchmarks
benchmark_mode: "inference"
quantization_config_recipe_names:
  # Will run a baseline inference for model by default, without quantization for comparison
  # - "int4wo-32"
  # - "marlin"
  - "int8wo"
# sparsity_config_recipe_names:
  # Will run a baseline inference for model by default, without sparsity for comparison
  # - "semi-sparse"
  # - "block"
output_dir: "benchmarks/microbenchmarks/results"
model_params:
  # - name: "small_bf16_linear"
  #   matrix_shapes:
  #     - name: "custom"
  #       shapes: [
  #         [1024, 1024, 1024],  # [m, k, n]
  #       ]
  #   high_precision_dtype: "torch.bfloat16"
  #   use_torch_compile: true
  #   torch_compile_mode: "max-autotune"
  #   device: "cuda"
  #   model_type: "linear"
  #   enable_profiler: true  # Enable profiling for this model

  - name: "large_bf16_ln_linear"
    matrix_shapes:
      - name: "custom"
        shapes: [
          [2048, 4096, 1024],
          # [4096, 4096, 1024]
        ]
    high_precision_dtype: "torch.bfloat16"
    use_torch_compile: true
    torch_compile_mode: "max-autotune"
    device: "cuda"
    model_type: "linear"
    enable_profiler: true  # Enable profiling for this model
    enable_memory_profile: true  # Enable memory profiling for this model

  # - name: "cpu_fp32_linear"
  #   matrix_shapes:
  #     - name: "custom"
  #       shapes: [
  #         [4096, 4096, 1024]
  #       ]
  #   high_precision_dtype: "torch.float32"
  #   use_torch_compile: false
  #   device: "cpu"
  #   model_type: "linear"
  #   enable_profiler: true  # Enable profiling for this model

  - name: "bf16_rms_norm_linear_activation"
    matrix_shapes:
      - name: "custom"
        shapes: [
          [2048, 4096, 1024],
        ]
    high_precision_dtype: "torch.bfloat16"
    use_torch_compile: true
    torch_compile_mode: "max-autotune"
    device: "cuda"
    model_type: "rms_norm_linear_activation"
    enable_profiler: true
    enable_memory_profile: true

  - name: "bf16_transformer_block"
    matrix_shapes:
      - name: "custom"
        shapes: [
          [2048, 4096, 1024],  # For transformer_block, k is the hidden dimension
        ]
    high_precision_dtype: "torch.bfloat16"
    use_torch_compile: true
    torch_compile_mode: "max-autotune"
    device: "cuda"
    model_type: "transformer_block"
    enable_profiler: true
    enable_memory_profile: true