File tree 20 files changed +2920
-0
lines changed
vllm/model_executor/layers
quantization/utils/configs
20 files changed +2920
-0
lines changed Original file line number Diff line number Diff line change
1
+ {
2
+ "1" : {
3
+ "BLOCK_SIZE_M" : 16 ,
4
+ "BLOCK_SIZE_N" : 64 ,
5
+ "BLOCK_SIZE_K" : 128 ,
6
+ "GROUP_SIZE_M" : 1 ,
7
+ "num_warps" : 4 ,
8
+ "num_stages" : 5
9
+ },
10
+ "2" : {
11
+ "BLOCK_SIZE_M" : 16 ,
12
+ "BLOCK_SIZE_N" : 128 ,
13
+ "BLOCK_SIZE_K" : 128 ,
14
+ "GROUP_SIZE_M" : 16 ,
15
+ "num_warps" : 4 ,
16
+ "num_stages" : 3
17
+ },
18
+ "4" : {
19
+ "BLOCK_SIZE_M" : 64 ,
20
+ "BLOCK_SIZE_N" : 64 ,
21
+ "BLOCK_SIZE_K" : 128 ,
22
+ "GROUP_SIZE_M" : 64 ,
23
+ "num_warps" : 4 ,
24
+ "num_stages" : 4
25
+ },
26
+ "8" : {
27
+ "BLOCK_SIZE_M" : 64 ,
28
+ "BLOCK_SIZE_N" : 128 ,
29
+ "BLOCK_SIZE_K" : 128 ,
30
+ "GROUP_SIZE_M" : 64 ,
31
+ "num_warps" : 4 ,
32
+ "num_stages" : 3
33
+ },
34
+ "16" : {
35
+ "BLOCK_SIZE_M" : 16 ,
36
+ "BLOCK_SIZE_N" : 256 ,
37
+ "BLOCK_SIZE_K" : 64 ,
38
+ "GROUP_SIZE_M" : 32 ,
39
+ "num_warps" : 4 ,
40
+ "num_stages" : 3
41
+ },
42
+ "24" : {
43
+ "BLOCK_SIZE_M" : 64 ,
44
+ "BLOCK_SIZE_N" : 128 ,
45
+ "BLOCK_SIZE_K" : 128 ,
46
+ "GROUP_SIZE_M" : 16 ,
47
+ "num_warps" : 4 ,
48
+ "num_stages" : 3
49
+ },
50
+ "32" : {
51
+ "BLOCK_SIZE_M" : 64 ,
52
+ "BLOCK_SIZE_N" : 128 ,
53
+ "BLOCK_SIZE_K" : 128 ,
54
+ "GROUP_SIZE_M" : 1 ,
55
+ "num_warps" : 4 ,
56
+ "num_stages" : 3
57
+ },
58
+ "48" : {
59
+ "BLOCK_SIZE_M" : 64 ,
60
+ "BLOCK_SIZE_N" : 128 ,
61
+ "BLOCK_SIZE_K" : 128 ,
62
+ "GROUP_SIZE_M" : 32 ,
63
+ "num_warps" : 4 ,
64
+ "num_stages" : 3
65
+ },
66
+ "64" : {
67
+ "BLOCK_SIZE_M" : 64 ,
68
+ "BLOCK_SIZE_N" : 128 ,
69
+ "BLOCK_SIZE_K" : 128 ,
70
+ "GROUP_SIZE_M" : 32 ,
71
+ "num_warps" : 4 ,
72
+ "num_stages" : 3
73
+ },
74
+ "96" : {
75
+ "BLOCK_SIZE_M" : 64 ,
76
+ "BLOCK_SIZE_N" : 128 ,
77
+ "BLOCK_SIZE_K" : 128 ,
78
+ "GROUP_SIZE_M" : 32 ,
79
+ "num_warps" : 4 ,
80
+ "num_stages" : 3
81
+ },
82
+ "128" : {
83
+ "BLOCK_SIZE_M" : 64 ,
84
+ "BLOCK_SIZE_N" : 128 ,
85
+ "BLOCK_SIZE_K" : 128 ,
86
+ "GROUP_SIZE_M" : 16 ,
87
+ "num_warps" : 4 ,
88
+ "num_stages" : 3
89
+ },
90
+ "256" : {
91
+ "BLOCK_SIZE_M" : 64 ,
92
+ "BLOCK_SIZE_N" : 128 ,
93
+ "BLOCK_SIZE_K" : 128 ,
94
+ "GROUP_SIZE_M" : 32 ,
95
+ "num_warps" : 4 ,
96
+ "num_stages" : 3
97
+ },
98
+ "512" : {
99
+ "BLOCK_SIZE_M" : 64 ,
100
+ "BLOCK_SIZE_N" : 128 ,
101
+ "BLOCK_SIZE_K" : 128 ,
102
+ "GROUP_SIZE_M" : 32 ,
103
+ "num_warps" : 4 ,
104
+ "num_stages" : 3
105
+ },
106
+ "1024" : {
107
+ "BLOCK_SIZE_M" : 64 ,
108
+ "BLOCK_SIZE_N" : 128 ,
109
+ "BLOCK_SIZE_K" : 128 ,
110
+ "GROUP_SIZE_M" : 32 ,
111
+ "num_warps" : 4 ,
112
+ "num_stages" : 3
113
+ },
114
+ "1536" : {
115
+ "BLOCK_SIZE_M" : 64 ,
116
+ "BLOCK_SIZE_N" : 128 ,
117
+ "BLOCK_SIZE_K" : 128 ,
118
+ "GROUP_SIZE_M" : 32 ,
119
+ "num_warps" : 4 ,
120
+ "num_stages" : 3
121
+ },
122
+ "2048" : {
123
+ "BLOCK_SIZE_M" : 64 ,
124
+ "BLOCK_SIZE_N" : 128 ,
125
+ "BLOCK_SIZE_K" : 128 ,
126
+ "GROUP_SIZE_M" : 64 ,
127
+ "num_warps" : 4 ,
128
+ "num_stages" : 3
129
+ },
130
+ "3072" : {
131
+ "BLOCK_SIZE_M" : 128 ,
132
+ "BLOCK_SIZE_N" : 64 ,
133
+ "BLOCK_SIZE_K" : 128 ,
134
+ "GROUP_SIZE_M" : 16 ,
135
+ "num_warps" : 4 ,
136
+ "num_stages" : 4
137
+ },
138
+ "4096" : {
139
+ "BLOCK_SIZE_M" : 64 ,
140
+ "BLOCK_SIZE_N" : 128 ,
141
+ "BLOCK_SIZE_K" : 128 ,
142
+ "GROUP_SIZE_M" : 64 ,
143
+ "num_warps" : 4 ,
144
+ "num_stages" : 3
145
+ }
146
+ }
Original file line number Diff line number Diff line change
1
+ {
2
+ "1" : {
3
+ "BLOCK_SIZE_M" : 64 ,
4
+ "BLOCK_SIZE_N" : 32 ,
5
+ "BLOCK_SIZE_K" : 128 ,
6
+ "GROUP_SIZE_M" : 64 ,
7
+ "num_warps" : 4 ,
8
+ "num_stages" : 5
9
+ },
10
+ "2" : {
11
+ "BLOCK_SIZE_M" : 64 ,
12
+ "BLOCK_SIZE_N" : 32 ,
13
+ "BLOCK_SIZE_K" : 128 ,
14
+ "GROUP_SIZE_M" : 1 ,
15
+ "num_warps" : 4 ,
16
+ "num_stages" : 5
17
+ },
18
+ "4" : {
19
+ "BLOCK_SIZE_M" : 64 ,
20
+ "BLOCK_SIZE_N" : 32 ,
21
+ "BLOCK_SIZE_K" : 128 ,
22
+ "GROUP_SIZE_M" : 1 ,
23
+ "num_warps" : 4 ,
24
+ "num_stages" : 4
25
+ },
26
+ "8" : {
27
+ "BLOCK_SIZE_M" : 64 ,
28
+ "BLOCK_SIZE_N" : 32 ,
29
+ "BLOCK_SIZE_K" : 128 ,
30
+ "GROUP_SIZE_M" : 64 ,
31
+ "num_warps" : 4 ,
32
+ "num_stages" : 4
33
+ },
34
+ "16" : {
35
+ "BLOCK_SIZE_M" : 64 ,
36
+ "BLOCK_SIZE_N" : 32 ,
37
+ "BLOCK_SIZE_K" : 128 ,
38
+ "GROUP_SIZE_M" : 64 ,
39
+ "num_warps" : 4 ,
40
+ "num_stages" : 4
41
+ },
42
+ "24" : {
43
+ "BLOCK_SIZE_M" : 64 ,
44
+ "BLOCK_SIZE_N" : 32 ,
45
+ "BLOCK_SIZE_K" : 128 ,
46
+ "GROUP_SIZE_M" : 64 ,
47
+ "num_warps" : 4 ,
48
+ "num_stages" : 4
49
+ },
50
+ "32" : {
51
+ "BLOCK_SIZE_M" : 64 ,
52
+ "BLOCK_SIZE_N" : 32 ,
53
+ "BLOCK_SIZE_K" : 128 ,
54
+ "GROUP_SIZE_M" : 32 ,
55
+ "num_warps" : 4 ,
56
+ "num_stages" : 4
57
+ },
58
+ "48" : {
59
+ "BLOCK_SIZE_M" : 64 ,
60
+ "BLOCK_SIZE_N" : 32 ,
61
+ "BLOCK_SIZE_K" : 128 ,
62
+ "GROUP_SIZE_M" : 1 ,
63
+ "num_warps" : 4 ,
64
+ "num_stages" : 4
65
+ },
66
+ "64" : {
67
+ "BLOCK_SIZE_M" : 64 ,
68
+ "BLOCK_SIZE_N" : 32 ,
69
+ "BLOCK_SIZE_K" : 128 ,
70
+ "GROUP_SIZE_M" : 16 ,
71
+ "num_warps" : 4 ,
72
+ "num_stages" : 4
73
+ },
74
+ "96" : {
75
+ "BLOCK_SIZE_M" : 64 ,
76
+ "BLOCK_SIZE_N" : 32 ,
77
+ "BLOCK_SIZE_K" : 128 ,
78
+ "GROUP_SIZE_M" : 16 ,
79
+ "num_warps" : 4 ,
80
+ "num_stages" : 4
81
+ },
82
+ "128" : {
83
+ "BLOCK_SIZE_M" : 64 ,
84
+ "BLOCK_SIZE_N" : 32 ,
85
+ "BLOCK_SIZE_K" : 128 ,
86
+ "GROUP_SIZE_M" : 32 ,
87
+ "num_warps" : 4 ,
88
+ "num_stages" : 4
89
+ },
90
+ "256" : {
91
+ "BLOCK_SIZE_M" : 64 ,
92
+ "BLOCK_SIZE_N" : 64 ,
93
+ "BLOCK_SIZE_K" : 128 ,
94
+ "GROUP_SIZE_M" : 32 ,
95
+ "num_warps" : 4 ,
96
+ "num_stages" : 5
97
+ },
98
+ "512" : {
99
+ "BLOCK_SIZE_M" : 64 ,
100
+ "BLOCK_SIZE_N" : 32 ,
101
+ "BLOCK_SIZE_K" : 128 ,
102
+ "GROUP_SIZE_M" : 1 ,
103
+ "num_warps" : 4 ,
104
+ "num_stages" : 3
105
+ },
106
+ "1024" : {
107
+ "BLOCK_SIZE_M" : 64 ,
108
+ "BLOCK_SIZE_N" : 64 ,
109
+ "BLOCK_SIZE_K" : 128 ,
110
+ "GROUP_SIZE_M" : 32 ,
111
+ "num_warps" : 4 ,
112
+ "num_stages" : 3
113
+ },
114
+ "1536" : {
115
+ "BLOCK_SIZE_M" : 64 ,
116
+ "BLOCK_SIZE_N" : 64 ,
117
+ "BLOCK_SIZE_K" : 128 ,
118
+ "GROUP_SIZE_M" : 64 ,
119
+ "num_warps" : 4 ,
120
+ "num_stages" : 3
121
+ },
122
+ "2048" : {
123
+ "BLOCK_SIZE_M" : 64 ,
124
+ "BLOCK_SIZE_N" : 64 ,
125
+ "BLOCK_SIZE_K" : 128 ,
126
+ "GROUP_SIZE_M" : 16 ,
127
+ "num_warps" : 4 ,
128
+ "num_stages" : 3
129
+ },
130
+ "3072" : {
131
+ "BLOCK_SIZE_M" : 64 ,
132
+ "BLOCK_SIZE_N" : 64 ,
133
+ "BLOCK_SIZE_K" : 128 ,
134
+ "GROUP_SIZE_M" : 64 ,
135
+ "num_warps" : 4 ,
136
+ "num_stages" : 3
137
+ },
138
+ "4096" : {
139
+ "BLOCK_SIZE_M" : 64 ,
140
+ "BLOCK_SIZE_N" : 128 ,
141
+ "BLOCK_SIZE_K" : 128 ,
142
+ "GROUP_SIZE_M" : 16 ,
143
+ "num_warps" : 4 ,
144
+ "num_stages" : 3
145
+ }
146
+ }
You can’t perform that action at this time.
0 commit comments