Skip to content

Commit b9d4dae

Browse files
committed
[sc-147336] new help option to see available vllm parameters
1 parent 4b6c196 commit b9d4dae

File tree

4 files changed

+142
-2
lines changed

4 files changed

+142
-2
lines changed

cmd/aiservices/deployment/deployment_create.go

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"os"
7+
"sort"
78
"strings"
89

910
exocmd "github.com/exoscale/cli/cmd"
@@ -18,14 +19,15 @@ type DeploymentCreateCmd struct {
1819

1920
_ bool `cli-cmd:"create"`
2021

21-
Name string `cli-arg:"#" cli-usage:"NAME"`
22+
Name string `cli-arg:"?" cli-usage:"NAME"`
2223
GPUType string `cli-flag:"gpu-type" cli-usage:"GPU type family (e.g., gpua5000, gpu3080ti)"`
2324
GPUCount int64 `cli-flag:"gpu-count" cli-usage:"Number of GPUs (1-8)"`
2425
Replicas int64 `cli-flag:"replicas" cli-usage:"Number of replicas (>=1)"`
2526

2627
ModelID string `cli-flag:"model-id" cli-usage:"Model ID (UUID)"`
2728
ModelName string `cli-flag:"model-name" cli-usage:"Model name (as created)"`
2829
InferenceEngineParameters string `cli-flag:"inference-engine-params" cli-usage:"Space-separated inference engine server CLI arguments (e.g., \"--gpu-memory-usage=0.8 --max-tokens=4096\")"`
30+
InferenceEngineHelp bool `cli-flag:"inference-engine-parameter-help" cli-usage:"Show inference engine parameters help"`
2931
Zone v3.ZoneName `cli-short:"z" cli-usage:"zone"`
3032
}
3133

@@ -38,13 +40,88 @@ func (c *DeploymentCreateCmd) CmdPreRun(cmd *cobra.Command, args []string) error
3840
exocmd.CmdSetZoneFlagFromDefault(cmd)
3941
return exocmd.CliCommandDefaultPreRun(c, cmd, args)
4042
}
43+
func (c *DeploymentCreateCmd) showInferenceEngineParameterHelp(ctx context.Context, client *v3.Client) error {
44+
resp, err := client.GetInferenceEngineHelp(ctx)
45+
if err != nil {
46+
return err
47+
}
48+
49+
sections := make(map[string][]v3.InferenceEngineParameterEntry)
50+
var sectionNames []string
51+
for _, p := range resp.Parameters {
52+
if _, ok := sections[p.Section]; !ok {
53+
sectionNames = append(sectionNames, p.Section)
54+
}
55+
sections[p.Section] = append(sections[p.Section], p)
56+
}
57+
sort.Strings(sectionNames)
58+
59+
for i, section := range sectionNames {
60+
if i > 0 {
61+
fmt.Println()
62+
}
63+
fmt.Printf("%s:\n", section)
64+
for _, p := range sections[section] {
65+
flags := strings.Join(p.Flags, ", ")
66+
if p.Type != "boolean" && p.Type != "enum" {
67+
flags += " " + strings.ToUpper(strings.ReplaceAll(p.Name, "-", "_"))
68+
}
69+
70+
fmt.Printf(" %s\n", flags)
71+
72+
desc := p.Description
73+
if p.Default != "" {
74+
// The description in example sometimes already includes default, but the example output
75+
// shows (default: ...) at the end.
76+
if !strings.Contains(desc, fmt.Sprintf("(default: %s)", p.Default)) {
77+
desc += fmt.Sprintf(" (default: %s)", p.Default)
78+
}
79+
}
80+
81+
// Simple wrapping
82+
words := strings.Fields(desc)
83+
if len(words) > 0 {
84+
line := " "
85+
for _, word := range words {
86+
if len(line)+len(word) > 160 {
87+
fmt.Println(line)
88+
line = " " + word
89+
} else {
90+
if line == " " {
91+
line += word
92+
} else {
93+
line += " " + word
94+
}
95+
}
96+
}
97+
fmt.Println(line)
98+
}
99+
}
100+
}
101+
102+
return nil
103+
}
104+
41105
func (c *DeploymentCreateCmd) CmdRun(_ *cobra.Command, _ []string) error {
42106
ctx := exocmd.GContext
107+
108+
if c.InferenceEngineHelp {
109+
client, err := exocmd.SwitchClientZoneV3(ctx, globalstate.EgoscaleV3Client, c.Zone)
110+
if err != nil {
111+
return err
112+
}
113+
return c.showInferenceEngineParameterHelp(ctx, client)
114+
}
115+
43116
client, err := exocmd.SwitchClientZoneV3(ctx, globalstate.EgoscaleV3Client, c.Zone)
44117
if err != nil {
45118
return err
46119
}
47120

121+
if c.Name == "" {
122+
return fmt.Errorf("NAME is required")
123+
}
124+
48125
if c.GPUType == "" || c.GPUCount == 0 {
49126
return fmt.Errorf("--gpu-type and --gpu-count are required")
50127
}

cmd/aiservices/deployment/deployment_create_test.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,62 @@ func TestDeploymentCreateWithInferenceEngineParameters(t *testing.T) {
123123
}
124124
}
125125
}
126+
127+
func TestDeploymentCreateInferenceEngineHelp(t *testing.T) {
128+
mux := http.NewServeMux()
129+
mux.HandleFunc("/ai/deployment/inference-engine-help", func(w http.ResponseWriter, r *http.Request) {
130+
if r.Method != http.MethodGet {
131+
w.WriteHeader(http.StatusMethodNotAllowed)
132+
return
133+
}
134+
resp := v3.GetInferenceEngineHelpResponse{
135+
Parameters: []v3.InferenceEngineParameterEntry{
136+
{
137+
Name: "config-format",
138+
Flags: []string{"--config-format"},
139+
Type: "enum",
140+
Default: "auto",
141+
Section: "ModelConfig",
142+
Description: "The format of the model config to load.",
143+
},
144+
{
145+
Name: "max-model-len",
146+
Flags: []string{"--max-model-len"},
147+
Type: "integer",
148+
Default: "None",
149+
Section: "ModelConfig",
150+
Description: "Model context length.",
151+
},
152+
},
153+
}
154+
writeJSON(t, w, http.StatusOK, resp)
155+
})
156+
srv := httptest.NewServer(mux)
157+
defer srv.Close()
158+
159+
exocmd.GContext = context.Background()
160+
globalstate.Quiet = true
161+
creds := credentials.NewStaticCredentials("key", "secret")
162+
client, err := v3.NewClient(creds)
163+
if err != nil {
164+
t.Fatalf("new client: %v", err)
165+
}
166+
globalstate.EgoscaleV3Client = client.WithEndpoint(v3.Endpoint(srv.URL))
167+
168+
c := &DeploymentCreateCmd{
169+
CliCommandSettings: exocmd.DefaultCLICmdSettings(),
170+
InferenceEngineHelp: true,
171+
}
172+
if err := c.CmdRun(nil, nil); err != nil {
173+
t.Fatalf("deployment create help without name: %v", err)
174+
}
175+
176+
c = &DeploymentCreateCmd{
177+
CliCommandSettings: exocmd.DefaultCLICmdSettings(),
178+
Name: "test-deploy",
179+
InferenceEngineHelp: true,
180+
}
181+
if err := c.CmdRun(nil, nil); err != nil {
182+
t.Fatalf("deployment create help with name: %v", err)
183+
}
184+
}

vendor/github.com/exoscale/egoscale/v3/operations.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/exoscale/egoscale/v3/schemas.go

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)