@@ -126,6 +126,71 @@ python serving/submit_job.py \
126126
127127</details >
128128
129+ #### ` Mistral-Small-24B-Instruct-2501 `
130+
131+ <details >
132+
133+ <summary >SGLang, vLLM (tested ✅)</summary >
134+
135+ ``` bash
136+ python serving/submit_job.py \
137+ --slurm-nodes 1 \
138+ --serving-framework sglang \
139+ --worker-port 8080 \
140+ --slurm-environment $( pwd) /serving/envs/sglang.toml \
141+ --framework-args " --model-path mistralai/Mistral-Small-24B-Instruct-2501 \
142+ --host 0.0.0.0 \
143+ --port 8080 \
144+ --served-model-name mistralai/Mistral-Small-24B-Instruct-2501-$( whoami) \
145+ --dp-size 4"
146+ ```
147+
148+ </details >
149+
150+ #### ` Mistral-Large-3-675B-Instruct-2512 `
151+
152+ <details >
153+
154+ <summary >vLLM (tested ✅)</summary >
155+
156+ ``` bash
157+ python serving/submit_job.py \
158+ --slurm-nodes 4 \
159+ --serving-framework vllm \
160+ --worker-port 8080 \
161+ --slurm-environment $( pwd) /serving/envs/vllm.toml \
162+ --disable-ocf \
163+ --framework-args " --model mistralai/Mistral-Large-3-675B-Instruct-2512 \
164+ --host 0.0.0.0 \
165+ --port 8080 \
166+ --served-model-name mistralai/Mistral-Large-3-675B-Instruct-2512-$( whoami) \
167+ --tensor-parallel-size 16"
168+ ```
169+
170+ </details >
171+
172+ #### ` Mixtral-8x22B-Instruct-v0.1 `
173+
174+ <details >
175+
176+ <summary >SGLang, vLLM (tested ✅)</summary >
177+
178+ ``` bash
179+ python serving/submit_job.py \
180+ --slurm-nodes 2 \
181+ --serving-framework sglang \
182+ --disable-ocf \
183+ --worker-port 8080 \
184+ --slurm-environment $( pwd) /serving/envs/sglang.toml \
185+ --framework-args " --model mistralai/Mixtral-8x22B-Instruct-v0.1 \
186+ --host 0.0.0.0 \
187+ --port 8080 \
188+ --tp-size 8 \
189+ --served-model-name mistralai/Mixtral-8x22B-Instruct-v0.1-$( whoami) "
190+ ```
191+
192+ </details >
193+
129194### Snowflake
130195
131196#### ` snowflake-arctic-embed-l-v2.0 `
@@ -149,6 +214,48 @@ python serving/submit_job.py \
149214
150215### Qwen
151216
217+ #### ` Qwen3-8B `
218+
219+ <details >
220+
221+ <summary >SGLang, vLLM (tested ✅)</summary >
222+
223+ ``` bash
224+ python serving/submit_job.py \
225+ --slurm-nodes 1 \
226+ --serving-framework sglang \
227+ --worker-port 8080 \
228+ --slurm-environment $( pwd) /serving/envs/sglang.toml \
229+ --framework-args " --model-path Qwen/Qwen3-8B \
230+ --host 0.0.0.0 \
231+ --port 8080 \
232+ --served-model-name Qwen/Qwen3-8B-$( whoami) \
233+ --dp-size 4"
234+ ```
235+
236+ </details >
237+
238+ #### ` Qwen3-32B `
239+
240+ <details >
241+
242+ <summary >SGLang, vLLM (tested ✅)</summary >
243+
244+ ``` bash
245+ python serving/submit_job.py \
246+ --slurm-nodes 1 \
247+ --serving-framework sglang \
248+ --worker-port 8080 \
249+ --slurm-environment $( pwd) /serving/envs/sglang.toml \
250+ --framework-args " --model-path Qwen/Qwen3-32B \
251+ --host 0.0.0.0 \
252+ --port 8080 \
253+ --served-model-name Qwen/Qwen3-32B-$( whoami) \
254+ --dp-size 4"
255+ ```
256+
257+ </details >
258+
152259#### ` Qwen3-Next-80B-A3B-Instruct `
153260
154261<details >
@@ -168,6 +275,50 @@ python serving/submit_job.py \
168275
169276</details >
170277
278+ #### ` Qwen3-235B-A22B-Instruct-2507 `
279+
280+ <details >
281+
282+ <summary >SGLang, vLLM (tested ✅)</summary >
283+
284+ ``` bash
285+ python serving/submit_job.py \
286+ --slurm-nodes 2 \
287+ --serving-framework sglang \
288+ --worker-port 8080 \
289+ --slurm-environment $( pwd) /serving/envs/sglang.toml \
290+ --disable-ocf \
291+ --framework-args " --model-path Qwen/Qwen3-235B-A22B-Instruct-2507 \
292+ --host 0.0.0.0 \
293+ --port 8080 \
294+ --served-model-name Qwen/Qwen3-235B-A22B-Instruct-2507-$( whoami) \
295+ --tp-size 8"
296+ ```
297+
298+ </details >
299+
300+ #### ` Qwen3.5-397B-A17B `
301+
302+ <details >
303+
304+ <summary >vLLM (tested ✅)</summary >
305+
306+ ``` bash
307+ python serving/submit_job.py \
308+ --slurm-nodes 4 \
309+ --serving-framework vllm \
310+ --disable-ocf \
311+ --worker-port 8080 \
312+ --slurm-environment $( pwd) /serving/envs/vllm_qwen35.toml \
313+ --framework-args " --model Qwen/Qwen3.5-397B-A17B \
314+ --host 0.0.0.0 \
315+ --port 8080 \
316+ --tensor-parallel-size 16 \
317+ --served-model-name Qwen/Qwen3.5-397B-A17B-$( whoami) "
318+ ```
319+
320+ </details >
321+
171322### DeepSeek
172323
173324#### ` DeepSeek-V3.1 `
0 commit comments