Skip to content

Commit 2d7165f

Browse files
are-cesclaude
andcommitted
feat(aws): add --tool-call-parser and --chat-template flags for RHEL AI
Enable vLLM tool calling by adding --tool-call-parser and --chat-template flags. When --tool-call-parser is set, --enable-auto-tool-choice is automatically added. Chat template filename is resolved to /opt/app-root/template/. Updated Tekton task template with new params. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a43bcd4 commit 2d7165f

7 files changed

Lines changed: 89 additions & 3 deletions

File tree

cmd/mapt/cmd/aws/hosts/rhelai.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ func getRHELAICreate() *cobra.Command {
7272
HFToken: viper.GetString(params.RhelAIHFToken),
7373
APIKey: viper.GetString(params.RhelAIAPIKey),
7474
AutoStart: viper.IsSet(params.RhelAIAutoStart),
75+
ToolCallParser: viper.GetString(params.RhelAIToolCallParser),
76+
ChatTemplate: viper.GetString(params.RhelAIChatTemplate),
77+
MaxModelLen: viper.GetInt(params.RhelAIMaxModelLen),
7578
ExposePorts: viper.GetIntSlice(params.RhelAIExposePorts),
7679
})
7780
},
@@ -87,6 +90,9 @@ func getRHELAICreate() *cobra.Command {
8790
flagSet.StringP(params.RhelAIAPIKey, "", "", params.RhelAIAPIKeyDesc)
8891
flagSet.Bool(params.RhelAIAutoStart, false, params.RhelAIAutoStartDesc)
8992
flagSet.IntSlice(params.RhelAIExposePorts, nil, params.RhelAIExposePortsDesc)
93+
flagSet.StringP(params.RhelAIToolCallParser, "", "", params.RhelAIToolCallParserDesc)
94+
flagSet.StringP(params.RhelAIChatTemplate, "", "", params.RhelAIChatTemplateDesc)
95+
flagSet.Int(params.RhelAIMaxModelLen, 0, params.RhelAIMaxModelLenDesc)
9096
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
9197
params.AddComputeRequestFlags(flagSet)
9298
params.AddSpotFlags(flagSet)

cmd/mapt/cmd/params/params.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,12 @@ const (
131131
RhelAIAutoStartDesc string = "automatically configure and start RHAIIS after provisioning"
132132
RhelAIExposePorts string = "expose-ports"
133133
RhelAIExposePortsDesc string = "comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080)"
134+
RhelAIToolCallParser string = "tool-call-parser"
135+
RhelAIToolCallParserDesc string = "enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral)"
136+
RhelAIChatTemplate string = "chat-template"
137+
RhelAIChatTemplateDesc string = "chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja)"
138+
RhelAIMaxModelLen string = "max-model-len"
139+
RhelAIMaxModelLenDesc string = "maximum model context length in tokens (default 4096)"
134140

135141
// Serverless
136142
Timeout string = "timeout"

pkg/provider/aws/action/rhel-ai/rhelai.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ type rhelAIRequest struct {
4646
hfToken *string
4747
apiKey *string
4848
autoStart bool
49+
toolCallParser *string
50+
chatTemplate *string
51+
maxModelLen int
4952
exposePorts []int
5053
}
5154

@@ -85,6 +88,9 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
8588
hfToken: &args.HFToken,
8689
apiKey: &args.APIKey,
8790
autoStart: args.AutoStart,
91+
maxModelLen: args.MaxModelLen,
92+
toolCallParser: &args.ToolCallParser,
93+
chatTemplate: &args.ChatTemplate,
8894
exposePorts: args.ExposePorts}
8995
if args.Spot != nil {
9096
r.spot = args.Spot.Spot
@@ -373,6 +379,23 @@ func (r *rhelAIRequest) rhaiisSetupScript() string {
373379
` && sudo sed -i 's|--model .*|--model %s \\|' %s/install.conf`,
374380
*r.model, confDir)
375381
}
382+
maxModelLen := 4096
383+
if r.maxModelLen > 0 {
384+
maxModelLen = r.maxModelLen
385+
}
386+
if len(*r.toolCallParser) > 0 {
387+
toolArgs := fmt.Sprintf(`--enable-auto-tool-choice \\\n --tool-call-parser %s`, *r.toolCallParser)
388+
if len(*r.chatTemplate) > 0 {
389+
toolArgs += fmt.Sprintf(` \\\n --chat-template /opt/app-root/template/%s`, *r.chatTemplate)
390+
}
391+
script += fmt.Sprintf(
392+
` && sudo sed -i 's|--max-model-len.*|--max-model-len %d \\\n %s|' %s/install.conf`,
393+
maxModelLen, toolArgs, confDir)
394+
} else if r.maxModelLen > 0 {
395+
script += fmt.Sprintf(
396+
` && sudo sed -i 's|--max-model-len.*|--max-model-len %d|' %s/install.conf`,
397+
maxModelLen, confDir)
398+
}
376399
if len(*r.apiKey) > 0 {
377400
script += fmt.Sprintf(
378401
" && sudo sed -i '/\\[Install\\]/i Environment=VLLM_API_KEY=%s' %s/install.conf",

pkg/target/host/rhelai/api.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ type RHELAIArgs struct {
1919
Timeout string
2020
Model string
2121
HFToken string
22-
APIKey string
23-
AutoStart bool
24-
ExposePorts []int
22+
APIKey string
23+
AutoStart bool
24+
ToolCallParser string
25+
ChatTemplate string
26+
MaxModelLen int
27+
ExposePorts []int
2528
}

tkn/infra-aws-rhel-ai.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ spec:
155155
- name: expose-ports
156156
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
157157
default: ""
158+
- name: tool-call-parser
159+
description: Enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral). Automatically adds --enable-auto-tool-choice.
160+
default: ""
161+
- name: chat-template
162+
description: Chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja).
163+
default: ""
164+
- name: max-model-len
165+
description: Maximum model context length in tokens (default 4096). Increase for tool calling or larger models.
166+
default: "0"
158167

159168
# Network params
160169
- name: service-endpoints
@@ -317,6 +326,15 @@ spec:
317326
if [[ "$(params.expose-ports)" != "" ]]; then
318327
cmd+="--expose-ports '$(params.expose-ports)' "
319328
fi
329+
if [[ "$(params.tool-call-parser)" != "" ]]; then
330+
cmd+="--tool-call-parser '$(params.tool-call-parser)' "
331+
fi
332+
if [[ "$(params.chat-template)" != "" ]]; then
333+
cmd+="--chat-template '$(params.chat-template)' "
334+
fi
335+
if [[ "$(params.max-model-len)" != "0" ]]; then
336+
cmd+="--max-model-len '$(params.max-model-len)' "
337+
fi
320338
cmd+="--tags '$(params.tags)' "
321339
fi
322340

tkn/infra-azure-rhel-ai.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ spec:
8585
- name: disk-size
8686
description: Disk size in GB for the cloud instance
8787
default: "200"
88+
- name: gpus
89+
description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8)
90+
default: "8"
91+
- name: gpu-manufacturer
92+
description: GPU manufacturer name for instance filtering (e.g. NVIDIA, AMD)
93+
default: ""
8894
- name: compute-sizes
8995
description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args
9096
default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5"
@@ -229,6 +235,12 @@ spec:
229235
if [[ "$(params.compute-sizes)" != "" ]]; then
230236
cmd+="--compute-sizes '$(params.compute-sizes)' "
231237
fi
238+
if [[ "$(params.gpus)" != "" ]]; then
239+
cmd+="--gpus '$(params.gpus)' "
240+
fi
241+
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
242+
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
243+
fi
232244
if [[ "$(params.marketplace)" == "true" ]]; then
233245
cmd+="--marketplace "
234246
cmd+="--accelerator '$(params.accelerator)' "

tkn/template/infra-aws-rhel-ai.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ spec:
155155
- name: expose-ports
156156
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
157157
default: ""
158+
- name: tool-call-parser
159+
description: Enable tool calling with the specified parser (e.g. llama3_json, hermes, mistral). Automatically adds --enable-auto-tool-choice.
160+
default: ""
161+
- name: chat-template
162+
description: Chat template jinja filename (e.g. tool_chat_template_llama3.2_json.jinja).
163+
default: ""
164+
- name: max-model-len
165+
description: Maximum model context length in tokens (default 4096). Increase for tool calling or larger models.
166+
default: "0"
158167

159168
# Network params
160169
- name: service-endpoints
@@ -317,6 +326,15 @@ spec:
317326
if [[ "$(params.expose-ports)" != "" ]]; then
318327
cmd+="--expose-ports '$(params.expose-ports)' "
319328
fi
329+
if [[ "$(params.tool-call-parser)" != "" ]]; then
330+
cmd+="--tool-call-parser '$(params.tool-call-parser)' "
331+
fi
332+
if [[ "$(params.chat-template)" != "" ]]; then
333+
cmd+="--chat-template '$(params.chat-template)' "
334+
fi
335+
if [[ "$(params.max-model-len)" != "0" ]]; then
336+
cmd+="--max-model-len '$(params.max-model-len)' "
337+
fi
320338
cmd+="--tags '$(params.tags)' "
321339
fi
322340

0 commit comments

Comments
 (0)