Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/mapt/cmd/aws/hosts/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func getRHELAICreate() *cobra.Command {
HFToken: viper.GetString(params.RhelAIHFToken),
APIKey: viper.GetString(params.RhelAIAPIKey),
AutoStart: viper.IsSet(params.RhelAIAutoStart),
VLLMExtraArgs: viper.GetString(params.RhelAIVLLMExtraArgs),
ExposePorts: viper.GetIntSlice(params.RhelAIExposePorts),
})
},
Expand All @@ -87,6 +88,7 @@ func getRHELAICreate() *cobra.Command {
flagSet.StringP(params.RhelAIAPIKey, "", "", params.RhelAIAPIKeyDesc)
flagSet.Bool(params.RhelAIAutoStart, false, params.RhelAIAutoStartDesc)
flagSet.IntSlice(params.RhelAIExposePorts, nil, params.RhelAIExposePortsDesc)
flagSet.StringP(params.RhelAIVLLMExtraArgs, "", "", params.RhelAIVLLMExtraArgsDesc)
flagSet.StringP(params.Timeout, "", "", params.TimeoutDesc)
params.AddComputeRequestFlags(flagSet)
params.AddSpotFlags(flagSet)
Expand Down
2 changes: 2 additions & 0 deletions cmd/mapt/cmd/params/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ const (
RhelAIAutoStartDesc string = "automatically configure and start RHAIIS after provisioning"
RhelAIExposePorts string = "expose-ports"
RhelAIExposePortsDesc string = "comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080)"
RhelAIVLLMExtraArgs string = "vllm-extra-args"
RhelAIVLLMExtraArgsDesc string = "extra vLLM arguments appended to the RHAIIS Exec line (e.g. '--enable-auto-tool-choice --tool-call-parser llama3_json --max-model-len 16384')"

// Serverless
Timeout string = "timeout"
Expand Down
21 changes: 19 additions & 2 deletions pkg/provider/aws/action/rhel-ai/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type rhelAIRequest struct {
hfToken *string
apiKey *string
autoStart bool
vllmExtraArgs *string
exposePorts []int
}

Expand Down Expand Up @@ -85,6 +86,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
hfToken: &args.HFToken,
apiKey: &args.APIKey,
autoStart: args.AutoStart,
vllmExtraArgs: &args.VLLMExtraArgs,
exposePorts: args.ExposePorts}
if args.Spot != nil {
r.spot = args.Spot.Spot
Expand Down Expand Up @@ -359,7 +361,7 @@ func (r *rhelAIRequest) lbTargetGroups() []int {
}

func (r *rhelAIRequest) rhaiisSetupScript() string {
confDir := "/etc/containers/systemd/rhaiis.container.d"
confDir := "/etc/containers/systemd/rhaii.container.d"
script := fmt.Sprintf(
"sudo cp %s/install.conf.example %s/install.conf",
confDir, confDir)
Expand All @@ -373,12 +375,27 @@ func (r *rhelAIRequest) rhaiisSetupScript() string {
` && sudo sed -i 's|--model .*|--model %s \\|' %s/install.conf`,
*r.model, confDir)
}
script += fmt.Sprintf(
` && GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l) && [ "$GPU_COUNT" -gt 0 ] && sudo sed -i "s|--tensor-parallel-size 1|--tensor-parallel-size $GPU_COUNT|" %s/install.conf`,
confDir)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if len(*r.vllmExtraArgs) > 0 {
extraArgs := *r.vllmExtraArgs
if strings.Contains(extraArgs, "--max-model-len") {
script += fmt.Sprintf(
` && sudo sed -i 's|--max-model-len [0-9]*|%s|' %s/install.conf`,
extraArgs, confDir)
} else {
script += fmt.Sprintf(
` && sudo sed -i 's|--max-model-len 4096|--max-model-len 4096 \\\n %s|' %s/install.conf`,
extraArgs, confDir)
Comment on lines +381 to +390

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔒 Security & Privacy | 🟠 Major | ⚡ Quick win

Shell-quote and sed-escape vllmExtraArgs before interpolation.

vllm-extra-args is user-supplied via CLI/Tekton, but it is inserted into a single-quoted sed program. A value containing ', |, or & can break the shell command or corrupt install.conf.

Suggested direction
 	if len(*r.vllmExtraArgs) > 0 {
-		extraArgs := *r.vllmExtraArgs
+		extraArgs := sedReplacementEscape(*r.vllmExtraArgs)
 		if strings.Contains(extraArgs, "--max-model-len") {
 			script += fmt.Sprintf(
-				` && sudo sed -i 's|--max-model-len [0-9]*|%s|' %s/install.conf`,
-				extraArgs, confDir)
+				` && sudo sed -i %s %s/install.conf`,
+				shellSingleQuote(fmt.Sprintf(`s|--max-model-len [0-9]*|%s|`, extraArgs)), confDir)
 		} else {
 			script += fmt.Sprintf(
-				` && sudo sed -i 's|--max-model-len 4096|--max-model-len 4096 \\\n     %s|' %s/install.conf`,
-				extraArgs, confDir)
+				` && sudo sed -i %s %s/install.conf`,
+				shellSingleQuote(fmt.Sprintf(`s|--max-model-len 4096|--max-model-len 4096 \\\n     %s|`, extraArgs)), confDir)
 		}
 	}

Add small helpers near this script builder:

func shellSingleQuote(s string) string {
	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
}

func sedReplacementEscape(s string) string {
	return strings.NewReplacer(`\`, `\\`, `&`, `\&`, `|`, `\|`).Replace(s)
}

As per path instructions, “Focus on major issues impacting performance, readability, maintainability and security.”

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if len(*r.vllmExtraArgs) > 0 {
extraArgs := *r.vllmExtraArgs
if strings.Contains(extraArgs, "--max-model-len") {
script += fmt.Sprintf(
` && sudo sed -i 's|--max-model-len [0-9]*|%s|' %s/install.conf`,
extraArgs, confDir)
} else {
script += fmt.Sprintf(
` && sudo sed -i 's|--max-model-len 4096|--max-model-len 4096 \\\n %s|' %s/install.conf`,
extraArgs, confDir)
if len(*r.vllmExtraArgs) > 0 {
extraArgs := sedReplacementEscape(*r.vllmExtraArgs)
if strings.Contains(extraArgs, "--max-model-len") {
script += fmt.Sprintf(
` && sudo sed -i %s %s/install.conf`,
shellSingleQuote(fmt.Sprintf(`s|--max-model-len [0-9]*|%s|`, extraArgs)), confDir)
} else {
script += fmt.Sprintf(
` && sudo sed -i %s %s/install.conf`,
shellSingleQuote(fmt.Sprintf(`s|--max-model-len 4096|--max-model-len 4096 \\\n %s|`, extraArgs)), confDir)
}
}
func shellSingleQuote(s string) string {
return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
}
func sedReplacementEscape(s string) string {
return strings.NewReplacer(`\`, `\\`, `&`, `\&`, `|`, `\|`).Replace(s)
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/provider/aws/action/rhel-ai/rhelai.go` around lines 381 - 390,
`rhelai.go` is interpolating user-supplied `vllmExtraArgs` directly into a shell
`sed` command in the script builder, which can break quoting or allow
command/script corruption. Update the `script` construction in the
`r.vllmExtraArgs` branch to properly shell-quote the argument and escape it for
the `sed` replacement/program before formatting it into the command, ideally via
small helpers near this logic such as `shellSingleQuote` and
`sedReplacementEscape`, and apply them in the `len(*r.vllmExtraArgs) > 0` block
that uses `strings.Contains(extraArgs, "--max-model-len")`.

Source: Path instructions

}
}
if len(*r.apiKey) > 0 {
script += fmt.Sprintf(
" && sudo sed -i '/\\[Install\\]/i Environment=VLLM_API_KEY=%s' %s/install.conf",
*r.apiKey, confDir)
}
script += " && sudo systemctl daemon-reload && sudo systemctl start rhaiis"
script += " && sudo systemctl daemon-reload && sudo systemctl start rhaii"
return script
}

Expand Down
7 changes: 4 additions & 3 deletions pkg/target/host/rhelai/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ type RHELAIArgs struct {
Timeout string
Model string
HFToken string
APIKey string
AutoStart bool
ExposePorts []int
APIKey string
AutoStart bool
VLLMExtraArgs string
ExposePorts []int
}
6 changes: 6 additions & 0 deletions tkn/infra-aws-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ spec:
- name: expose-ports
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
default: ""
- name: vllm-extra-args
description: Extra vLLM arguments appended to the RHAIIS Exec line (e.g. '--enable-auto-tool-choice --tool-call-parser llama3_json --max-model-len 16384').
default: ""

# Network params
- name: service-endpoints
Expand Down Expand Up @@ -317,6 +320,9 @@ spec:
if [[ "$(params.expose-ports)" != "" ]]; then
cmd+="--expose-ports '$(params.expose-ports)' "
fi
if [[ "$(params.vllm-extra-args)" != "" ]]; then
cmd+="--vllm-extra-args '$(params.vllm-extra-args)' "
fi
cmd+="--tags '$(params.tags)' "
fi

Expand Down
12 changes: 12 additions & 0 deletions tkn/infra-azure-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ spec:
- name: disk-size
description: Disk size in GB for the cloud instance
default: "200"
- name: gpus
description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8)
default: "8"
Comment on lines +88 to +90

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚀 Performance & Scalability | 🟠 Major | ⚡ Quick win

Preserve the Azure RHEL AI GPU default unless callers opt in.

With default: "8", the condition on Line 238 is always true for omitted Tekton params, so this task now always passes --gpus 8. That bypasses the CLI/provider default path where unset/0 GPUs are normalized to 1, which can unexpectedly select larger marketplace SKUs and hit quota/cost limits.

Proposed fix
     - name: gpus
       description: Number of GPUs for the cloud instance (valid marketplace values are 1, 2, 4, 8)
-      default: "8"
+      default: ""

As per path instructions, **: “Focus on major issues impacting performance, readability, maintainability and security. Avoid nitpicks and avoid verbosity.”

Also applies to: 238-240

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tkn/infra-azure-rhel-ai.yaml` around lines 88 - 90, The Tekton param default
for gpus is forcing the Azure RHEL AI task to always pass --gpus 8 when callers
omit the value, bypassing the provider’s unset/0 normalization path. Update the
gpus parameter in infra-azure-rhel-ai.yaml so it does not default to 8, and let
the existing CLI/provider handling in the task logic around the gpus condition
at the referenced block decide the effective GPU count unless a caller
explicitly opts in.

Source: Path instructions

- name: gpu-manufacturer
description: GPU manufacturer name for instance filtering (e.g. NVIDIA, AMD)
default: ""
- name: compute-sizes
description: Comma seperated list of sizes for the machines to be requested. If set this takes precedence over compute by args
default: "Standard_ND96is_MI300X_v5,Standard_ND96isr_MI300X_v5"
Expand Down Expand Up @@ -229,6 +235,12 @@ spec:
if [[ "$(params.compute-sizes)" != "" ]]; then
cmd+="--compute-sizes '$(params.compute-sizes)' "
fi
if [[ "$(params.gpus)" != "" ]]; then
cmd+="--gpus '$(params.gpus)' "
fi
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
fi
Comment on lines +238 to +243

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔒 Security & Privacy | 🟠 Major | ⚡ Quick win

Validate new params before adding them to the eval’d command.

These Tekton params are interpolated into cmd and executed via eval on Line 272. A value containing a single quote can break out of the intended argument and execute shell syntax.

Proposed hardening
           if [[ "$(params.gpus)" != "" ]]; then
-            cmd+="--gpus '$(params.gpus)' "
+            case "$(params.gpus)" in
+              *[!0-9]*) echo "Parameter gpus must be numeric"; exit 1 ;;
+            esac
+            cmd+="--gpus $(params.gpus) "
           fi
           if [[ "$(params.gpu-manufacturer)" != "" ]]; then
+            case "$(params.gpu-manufacturer)" in
+              *[!A-Za-z0-9._-]*) echo "Parameter gpu-manufacturer contains unsupported characters"; exit 1 ;;
+            esac
             cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
           fi

As per path instructions, **: “Focus on major issues impacting performance, readability, maintainability and security. Avoid nitpicks and avoid verbosity.”

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if [[ "$(params.gpus)" != "" ]]; then
cmd+="--gpus '$(params.gpus)' "
fi
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
fi
if [[ "$(params.gpus)" != "" ]]; then
case "$(params.gpus)" in
*[!0-9]*) echo "Parameter gpus must be numeric"; exit 1 ;;
esac
cmd+="--gpus $(params.gpus) "
fi
if [[ "$(params.gpu-manufacturer)" != "" ]]; then
case "$(params.gpu-manufacturer)" in
*[!A-Za-z0-9._-]*) echo "Parameter gpu-manufacturer contains unsupported characters"; exit 1 ;;
esac
cmd+="--gpu-manufacturer '$(params.gpu-manufacturer)' "
fi
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tkn/infra-azure-rhel-ai.yaml` around lines 238 - 243, The command-building
logic in the Tekton task is vulnerable because params like gpus and
gpu-manufacturer are interpolated into cmd and later executed with eval. Harden
the command assembly by validating or sanitizing these params before appending
them, or avoid eval entirely by passing arguments safely; update the logic in
the script section where cmd is built so single quotes or shell metacharacters
cannot break out of the intended argument.

Source: Path instructions

if [[ "$(params.marketplace)" == "true" ]]; then
cmd+="--marketplace "
cmd+="--accelerator '$(params.accelerator)' "
Expand Down
6 changes: 6 additions & 0 deletions tkn/template/infra-aws-rhel-ai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ spec:
- name: expose-ports
description: Comma-separated list of ports to expose through the load balancer and security group (e.g. 8000,8080).
default: ""
- name: vllm-extra-args
description: Extra vLLM arguments appended to the RHAIIS Exec line (e.g. '--enable-auto-tool-choice --tool-call-parser llama3_json --max-model-len 16384').
default: ""

# Network params
- name: service-endpoints
Expand Down Expand Up @@ -317,6 +320,9 @@ spec:
if [[ "$(params.expose-ports)" != "" ]]; then
cmd+="--expose-ports '$(params.expose-ports)' "
fi
if [[ "$(params.vllm-extra-args)" != "" ]]; then
cmd+="--vllm-extra-args '$(params.vllm-extra-args)' "
fi
cmd+="--tags '$(params.tags)' "
fi

Expand Down