Skip to content

Commit 1440b5c

Browse files
are-cesclaude
andcommitted
feat(aws): add --tool-call-parser and --chat-template flags for RHEL AI
Enable vLLM tool calling by adding --tool-call-parser and --chat-template flags. When --tool-call-parser is set, --enable-auto-tool-choice is automatically added. Chat template filename is resolved to /opt/app-root/template/. Updated Tekton task template with new params. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a43bcd4 commit 1440b5c

7 files changed

Lines changed: 51 additions & 5 deletions

File tree

cmd/mapt/cmd/aws/hosts/rhelai.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ func getRHELAICreate() *cobra.Command {
7272
HFToken: viper.GetString(params.RhelAIHFToken),
7373
APIKey: viper.GetString(params.RhelAIAPIKey),
7474
AutoStart: viper.IsSet(params.RhelAIAutoStart),
75+
VLLMExtraArgs: viper.GetString(params.RhelAIVLLMExtraArgs),
7576
ExposePorts: viper.GetIntSlice(params.RhelAIExposePorts),
7677
})
7778
},
@@ -87,6 +88,7 @@ func getRHELAICreate() *cobra.Command {
8788
flagSet.StringP(params.RhelAIAPIKey, "", "", params.RhelAIAPIKeyDesc)
8889
flagSet.Bool(params.RhelAIAutoStart, false, params.RhelAIAutoStartDesc)
8990
flagSet.IntSlice(params.RhelAIExposePorts, nil, params.RhelAIExposePortsDesc)
91+
flagSet.StringP(params.RhelAIVLLMExtraArgs, "", "", params.RhelAIVLLMExtraArgsDesc)
9092
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
9193
params.AddComputeRequestFlags(flagSet)
9294
params.AddSpotFlags(flagSet)

cmd/mapt/cmd/params/params.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ const (
131131
RhelAIAutoStartDesc string = "automatically configure and start RHAIIS after provisioning"
132132
RhelAIExposePorts string = "expose-ports"
133133
RhelAIExposePortsDesc string = "comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080)"
134+
RhelAIVLLMExtraArgs string = "vllm-extra-args"
135+
RhelAIVLLMExtraArgsDesc string = "extra vLLM arguments appended to the RHAIIS Exec line (e.g. '--enable-auto-tool-choice --tool-call-parser llama3_json --max-model-len 16384')"
134136

135137
// Serverless
136138
Timeout string = "timeout"

pkg/provider/aws/action/rhel-ai/rhelai.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ type rhelAIRequest struct {
4646
hfToken *string
4747
apiKey *string
4848
autoStart bool
49+
vllmExtraArgs *string
4950
exposePorts []int
5051
}
5152

@@ -85,6 +86,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
8586
hfToken: &args.HFToken,
8687
apiKey: &args.APIKey,
8788
autoStart: args.AutoStart,
89+
vllmExtraArgs: &args.VLLMExtraArgs,
8890
exposePorts: args.ExposePorts}
8991
if args.Spot != nil {
9092
r.spot = args.Spot.Spot
@@ -359,7 +361,7 @@ func (r *rhelAIRequest) lbTargetGroups() []int {
359361
}
360362

361363
func (r *rhelAIRequest) rhaiisSetupScript() string {
362-
confDir := "/etc/containers/systemd/rhaiis.container.d"
364+
confDir := "/etc/containers/systemd/rhaii.container.d"
363365
script := fmt.Sprintf(
364366
"sudo cp %s/install.conf.example %s/install.conf",
365367
confDir, confDir)
@@ -373,12 +375,27 @@ func (r *rhelAIRequest) rhaiisSetupScript() string {
373375
` && sudo sed -i 's|--model .*|--model %s \\|' %s/install.conf`,
374376
*r.model, confDir)
375377
}
378+
script += fmt.Sprintf(
379+
` && GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l) && [ "$GPU_COUNT" -gt 0 ] && sudo sed -i "s|--tensor-parallel-size 1|--tensor-parallel-size $GPU_COUNT|" %s/install.conf`,
380+
confDir)
381+
if len(*r.vllmExtraArgs) > 0 {
382+
extraArgs := *r.vllmExtraArgs
383+
if strings.Contains(extraArgs, "--max-model-len") {
384+
script += fmt.Sprintf(
385+
` && sudo sed -i 's|--max-model-len [0-9]*|%s|' %s/install.conf`,
386+
extraArgs, confDir)
387+
} else {
388+
script += fmt.Sprintf(
389+
` && sudo sed -i 's|--max-model-len 4096|--max-model-len 4096 \\\n %s|' %s/install.conf`,
390+
extraArgs, confDir)
391+
}
392+
}
376393
if len(*r.apiKey) > 0 {
377394
script += fmt.Sprintf(
378395
" && sudo sed -i '/\\[Install\\]/i Environment=VLLM_API_KEY=%s' %s/install.conf",
379396
*r.apiKey, confDir)
380397
}
381-
script += " && sudo systemctl daemon-reload && sudo systemctl start rhaiis"
398+
script += " && sudo systemctl daemon-reload && sudo systemctl start rhaii"
382399
return script
383400
}
384401

pkg/target/host/rhelai/api.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ type RHELAIArgs struct {
1919
Timeout string
2020
Model string
2121
HFToken string
22-
APIKey string
23-
AutoStart bool
24-
ExposePorts []int
22+
APIKey string
23+
AutoStart bool
24+
VLLMExtraArgs string
25+
ExposePorts []int
2526
}

tkn/infra-aws-rhel-ai.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ spec:
155155
- name: expose-ports
156156
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
157157
default: ""
158+
- name: vllm-extra-args
159+
description: Extra vLLM arguments appended to the RHAIIS Exec line (e.g. '--enable-auto-tool-choice --tool-call-parser llama3_json --max-model-len 16384').
160+
default: ""
158161

159162
# Network params
160163
- name: service-endpoints
@@ -317,6 +320,9 @@ spec:
317320
if [[ "$(params.expose-ports)" != "" ]]; then
318321
cmd+="--expose-ports '$(params.expose-ports)' "
319322
fi
323+
if [[ "$(params.vllm-extra-args)" != "" ]]; then
324+
cmd+="--vllm-extra-args '$(params.vllm-extra-args)' "
325+
fi
320326
cmd+="--tags '$(params.tags)' "
321327
fi
322328

tkn/infra-azure-rhel-ai.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ spec:
8585
- name: disk-size
8686
description: Disk size in GB for the cloud instance
8787
default: "200"
88+
- name: gpus
89+
description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8)
90+
default: "8"
91+
- name: gpu-manufacturer
92+
description: GPU manufacturer name for instance filtering (e.g. NVIDIA, AMD)
93+
default: ""
8894
- name: compute-sizes
8995
description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args
9096
default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5"
@@ -229,6 +235,12 @@ spec:
229235
if [[ "$(params.compute-sizes)" != "" ]]; then
230236
cmd+="--compute-sizes '$(params.compute-sizes)' "
231237
fi
238+
if [[ "$(params.gpus)" != "" ]]; then
239+
cmd+="--gpus '$(params.gpus)' "
240+
fi
241+
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
242+
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
243+
fi
232244
if [[ "$(params.marketplace)" == "true" ]]; then
233245
cmd+="--marketplace "
234246
cmd+="--accelerator '$(params.accelerator)' "

tkn/template/infra-aws-rhel-ai.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,9 @@ spec:
155155
- name: expose-ports
156156
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
157157
default: ""
158+
- name: vllm-extra-args
159+
description: Extra vLLM arguments appended to the RHAIIS Exec line (e.g. '--enable-auto-tool-choice --tool-call-parser llama3_json --max-model-len 16384').
160+
default: ""
158161

159162
# Network params
160163
- name: service-endpoints
@@ -317,6 +320,9 @@ spec:
317320
if [[ "$(params.expose-ports)" != "" ]]; then
318321
cmd+="--expose-ports '$(params.expose-ports)' "
319322
fi
323+
if [[ "$(params.vllm-extra-args)" != "" ]]; then
324+
cmd+="--vllm-extra-args '$(params.vllm-extra-args)' "
325+
fi
320326
cmd+="--tags '$(params.tags)' "
321327
fi
322328

0 commit comments

Comments
 (0)