-
Notifications
You must be signed in to change notification settings - Fork 464
Expand file tree
/
Copy pathbenchmark_config.yml
More file actions
80 lines (76 loc) · 2.36 KB
/
benchmark_config.yml
File metadata and controls
80 lines (76 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Sample configuration for inference benchmarks
benchmark_mode: "inference"
quantization_config_recipe_names:
# Will run a baseline inference for model by default, without quantization for comparison
# - "int4wo-32"
# - "marlin"
- "int8wo"
# sparsity_config_recipe_names:
# Will run a baseline inference for model by default, without sparsity for comparison
# - "semi-sparse"
# - "block"
output_dir: "benchmarks/microbenchmarks/results"
model_params:
# - name: "small_bf16_linear"
# matrix_shapes:
# - name: "custom"
# shapes: [
# [1024, 1024, 1024], # [m, k, n]
# ]
# high_precision_dtype: "torch.bfloat16"
# use_torch_compile: true
# torch_compile_mode: "max-autotune"
# device: "cuda"
# model_type: "linear"
# enable_profiler: true # Enable profiling for this model
- name: "large_bf16_ln_linear"
matrix_shapes:
- name: "custom"
shapes: [
[2048, 4096, 1024],
# [4096, 4096, 1024]
]
high_precision_dtype: "torch.bfloat16"
use_torch_compile: true
torch_compile_mode: "max-autotune"
device: "cuda"
model_type: "linear"
enable_profiler: true # Enable profiling for this model
enable_memory_profile: true # Enable memory profiling for this model
# - name: "cpu_fp32_linear"
# matrix_shapes:
# - name: "custom"
# shapes: [
# [4096, 4096, 1024]
# ]
# high_precision_dtype: "torch.float32"
# use_torch_compile: false
# device: "cpu"
# model_type: "linear"
# enable_profiler: true # Enable profiling for this model
- name: "bf16_rms_norm_linear_activation"
matrix_shapes:
- name: "custom"
shapes: [
[2048, 4096, 1024],
]
high_precision_dtype: "torch.bfloat16"
use_torch_compile: true
torch_compile_mode: "max-autotune"
device: "cuda"
model_type: "rms_norm_linear_activation"
enable_profiler: true
enable_memory_profile: true
- name: "bf16_transformer_block"
matrix_shapes:
- name: "custom"
shapes: [
[2048, 4096, 1024], # For transformer_block, k is the hidden dimension
]
high_precision_dtype: "torch.bfloat16"
use_torch_compile: true
torch_compile_mode: "max-autotune"
device: "cuda"
model_type: "transformer_block"
enable_profiler: true
enable_memory_profile: true