Skip to content

Commit 98e4848

Browse files
are-cesclaude
andcommitted
feat(aws): add --tool-call-parser and --chat-template flags for RHEL AI
Enable vLLM tool calling by adding --tool-call-parser and --chat-template flags. When --tool-call-parser is set, --enable-auto-tool-choice is automatically added. Chat template filename is resolved to /opt/app-root/template/. Updated Tekton task template with new params. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a43bcd4 commit 98e4848

7 files changed

Lines changed: 62 additions & 3 deletions

File tree

cmd/mapt/cmd/aws/hosts/rhelai.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ func getRHELAICreate() *cobra.Command {
7272
HFToken: viper.GetString(params.RhelAIHFToken),
7373
APIKey: viper.GetString(params.RhelAIAPIKey),
7474
AutoStart: viper.IsSet(params.RhelAIAutoStart),
75+
ToolCallParser: viper.GetString(params.RhelAIToolCallParser),
76+
ChatTemplate: viper.GetString(params.RhelAIChatTemplate),
7577
ExposePorts: viper.GetIntSlice(params.RhelAIExposePorts),
7678
})
7779
},
@@ -87,6 +89,8 @@ func getRHELAICreate() *cobra.Command {
8789
flagSet.StringP(params.RhelAIAPIKey, "", "", params.RhelAIAPIKeyDesc)
8890
flagSet.Bool(params.RhelAIAutoStart, false, params.RhelAIAutoStartDesc)
8991
flagSet.IntSlice(params.RhelAIExposePorts, nil, params.RhelAIExposePortsDesc)
92+
flagSet.StringP(params.RhelAIToolCallParser, "", "", params.RhelAIToolCallParserDesc)
93+
flagSet.StringP(params.RhelAIChatTemplate, "", "", params.RhelAIChatTemplateDesc)
9094
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
9195
params.AddComputeRequestFlags(flagSet)
9296
params.AddSpotFlags(flagSet)

cmd/mapt/cmd/params/params.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ const (
131131
RhelAIAutoStartDesc string = "automatically configure and start RHAIIS after provisioning"
132132
RhelAIExposePorts string = "expose-ports"
133133
RhelAIExposePortsDesc string = "comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080)"
134+
RhelAIToolCallParser string = "tool-call-parser"
135+
RhelAIToolCallParserDesc string = "enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral)"
136+
RhelAIChatTemplate string = "chat-template"
137+
RhelAIChatTemplateDesc string = "chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja)"
134138

135139
// Serverless
136140
Timeout string = "timeout"

pkg/provider/aws/action/rhel-ai/rhelai.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ type rhelAIRequest struct {
4646
hfToken *string
4747
apiKey *string
4848
autoStart bool
49+
toolCallParser *string
50+
chatTemplate *string
4951
exposePorts []int
5052
}
5153

@@ -85,6 +87,8 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
8587
hfToken: &args.HFToken,
8688
apiKey: &args.APIKey,
8789
autoStart: args.AutoStart,
90+
toolCallParser: &args.ToolCallParser,
91+
chatTemplate: &args.ChatTemplate,
8892
exposePorts: args.ExposePorts}
8993
if args.Spot != nil {
9094
r.spot = args.Spot.Spot
@@ -373,6 +377,15 @@ func (r *rhelAIRequest) rhaiisSetupScript() string {
373377
` && sudo sed -i 's|--model .*|--model %s \\|' %s/install.conf`,
374378
*r.model, confDir)
375379
}
380+
if len(*r.toolCallParser) > 0 {
381+
toolArgs := fmt.Sprintf(`--enable-auto-tool-choice \\\n --tool-call-parser %s`, *r.toolCallParser)
382+
if len(*r.chatTemplate) > 0 {
383+
toolArgs += fmt.Sprintf(` \\\n --chat-template /opt/app-root/template/%s`, *r.chatTemplate)
384+
}
385+
script += fmt.Sprintf(
386+
` && sudo sed -i 's|--max-model-len.*|--max-model-len 4096 \\\n %s|' %s/install.conf`,
387+
toolArgs, confDir)
388+
}
376389
if len(*r.apiKey) > 0 {
377390
script += fmt.Sprintf(
378391
" && sudo sed -i '/\\[Install\\]/i Environment=VLLM_API_KEY=%s' %s/install.conf",

pkg/target/host/rhelai/api.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ type RHELAIArgs struct {
1919
Timeout string
2020
Model string
2121
HFToken string
22-
APIKey string
23-
AutoStart bool
24-
ExposePorts []int
22+
APIKey string
23+
AutoStart bool
24+
ToolCallParser string
25+
ChatTemplate string
26+
ExposePorts []int
2527
}

tkn/infra-aws-rhel-ai.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ spec:
155155
- name: expose-ports
156156
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
157157
default: ""
158+
- name: tool-call-parser
159+
description: Enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral). Automatically adds --enable-auto-tool-choice.
160+
default: ""
161+
- name: chat-template
162+
description: Chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja).
163+
default: ""
158164

159165
# Network params
160166
- name: service-endpoints
@@ -317,6 +323,12 @@ spec:
317323
if [[ "$(params.expose-ports)" != "" ]]; then
318324
cmd+="--expose-ports '$(params.expose-ports)' "
319325
fi
326+
if [[ "$(params.tool-call-parser)" != "" ]]; then
327+
cmd+="--tool-call-parser '$(params.tool-call-parser)' "
328+
fi
329+
if [[ "$(params.chat-template)" != "" ]]; then
330+
cmd+="--chat-template '$(params.chat-template)' "
331+
fi
320332
cmd+="--tags '$(params.tags)' "
321333
fi
322334

tkn/infra-azure-rhel-ai.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ spec:
8585
- name: disk-size
8686
description: Disk size in GB for the cloud instance
8787
default: "200"
88+
- name: gpus
89+
description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8)
90+
default: "8"
91+
- name: gpu-manufacturer
92+
description: GPU manufacturer name for instance filtering (e.g. NVIDIA, AMD)
93+
default: ""
8894
- name: compute-sizes
8995
description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args
9096
default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5"
@@ -229,6 +235,12 @@ spec:
229235
if [[ "$(params.compute-sizes)" != "" ]]; then
230236
cmd+="--compute-sizes '$(params.compute-sizes)' "
231237
fi
238+
if [[ "$(params.gpus)" != "" ]]; then
239+
cmd+="--gpus '$(params.gpus)' "
240+
fi
241+
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
242+
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
243+
fi
232244
if [[ "$(params.marketplace)" == "true" ]]; then
233245
cmd+="--marketplace "
234246
cmd+="--accelerator '$(params.accelerator)' "

tkn/template/infra-aws-rhel-ai.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ spec:
155155
- name: expose-ports
156156
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
157157
default: ""
158+
- name: tool-call-parser
159+
description: Enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral). Automatically adds --enable-auto-tool-choice.
160+
default: ""
161+
- name: chat-template
162+
description: Chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja).
163+
default: ""
158164

159165
# Network params
160166
- name: service-endpoints
@@ -317,6 +323,12 @@ spec:
317323
if [[ "$(params.expose-ports)" != "" ]]; then
318324
cmd+="--expose-ports '$(params.expose-ports)' "
319325
fi
326+
if [[ "$(params.tool-call-parser)" != "" ]]; then
327+
cmd+="--tool-call-parser '$(params.tool-call-parser)' "
328+
fi
329+
if [[ "$(params.chat-template)" != "" ]]; then
330+
cmd+="--chat-template '$(params.chat-template)' "
331+
fi
320332
cmd+="--tags '$(params.tags)' "
321333
fi
322334

0 commit comments

Comments
 (0)