Skip to content

Commit 3ca0b5e

Browse files
committed
Add annotations to instances in launcher
.. to carry data meaningful to clients but not launcher. Signed-off-by: Mike Spreitzer <mspreitz@us.ibm.com>
1 parent 976a361 commit 3ca0b5e

4 files changed

Lines changed: 33 additions & 26 deletions

File tree

inference_server/launcher/launcher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class VllmConfig(BaseModel):
5959
options: str
6060
gpu_uuids: Optional[List[str]] = None
6161
env_vars: Optional[Dict[str, str]] = None
62+
annotations: Optional[Dict[str, str]] = None
6263

6364

6465
class HalfMade(Exception):

inference_server/launcher/tests/test_launcher.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@
5353
def vllm_config():
5454
"""Create a sample VllmConfig for testing"""
5555
return VllmConfig(
56-
options="--model test-model --port 8000", env_vars={"TEST_VAR": "test_value"}
56+
options="--model test-model --port 8000",
57+
env_vars={"TEST_VAR": "test_value"},
58+
annotations={"Foo": "bar"},
5759
)
5860

5961

pkg/controller/dual-pods/inference-server.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -631,12 +631,12 @@ func (ctl *controller) selectBestLauncherPod(
631631
hasSleepingInstance := false
632632
hasPortConflict := false
633633
for _, inst := range insts.Instances {
634-
instPort, err := getVLLMInstancePort(inst.Options)
634+
instPort, err := getVLLMInstancePort(inst.Spec.Options)
635635
if err != nil {
636636
logger.V(5).Info("Skipping launcher Pod because an instance has unparseable options",
637637
"name", launcherPod.Name,
638638
"instanceID", inst.InstanceID,
639-
"options", inst.Options,
639+
"options", inst.Spec.Options,
640640
"err", err)
641641
hasPortConflict = true
642642
break
@@ -695,11 +695,16 @@ func (ctl *controller) selectBestLauncherPod(
695695
}
696696

697697
func (ctl *controller) configInferenceServer(isc *fmav1alpha1.InferenceServerConfig, gpuUUIDs []string) (*VllmConfig, string, error) {
698-
options := isc.Spec.ModelServerConfig.Options + " --port " + strconv.Itoa(int(isc.Spec.ModelServerConfig.Port))
698+
portS := strconv.Itoa(int(isc.Spec.ModelServerConfig.Port))
699+
options := isc.Spec.ModelServerConfig.Options + " --port " + portS
699700
vllmCfg := VllmConfig{
700701
Options: options,
701702
GpuUUIDs: gpuUUIDs,
702703
EnvVars: make(map[string]string, len(isc.Spec.ModelServerConfig.EnvVars)),
704+
Annotations: map[string]string{
705+
"isc-name": isc.Name,
706+
"inference-port": portS,
707+
},
703708
}
704709
for k, v := range isc.Spec.ModelServerConfig.EnvVars {
705710
vllmCfg.EnvVars[k] = v
@@ -1322,7 +1327,7 @@ var podDecoder k8sruntime.Decoder
13221327
// syncLauncherInstances queries the launcher pod for its current instances,
13231328
// updates the controller's internal launcherData state, and returns the fresh
13241329
// launcher response used for the update.
1325-
func (ctl *controller) syncLauncherInstances(ctx context.Context, nodeDat *nodeData, launcherPod *corev1.Pod) (*AllInstancesStatus, error, bool) {
1330+
func (ctl *controller) syncLauncherInstances(ctx context.Context, nodeDat *nodeData, launcherPod *corev1.Pod) (*AllInstancesState, error, bool) {
13261331
logger := klog.FromContext(ctx)
13271332

13281333
if launcherPod.Status.PodIP == "" || !utils.IsPodReady(launcherPod) {

pkg/controller/dual-pods/launcherclient.go

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -50,25 +50,24 @@ func NewLauncherClient(baseURL string) (*LauncherClient, error) {
5050

5151
// VllmConfig matches the launcher API schema.
5252
type VllmConfig struct {
53-
Options string `json:"options"`
54-
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
55-
EnvVars map[string]string `json:"env_vars,omitempty"`
53+
Options string `json:"options"`
54+
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
55+
EnvVars map[string]string `json:"env_vars,omitempty"`
56+
Annotations map[string]string `json:"annotations,omitempty"`
5657
}
5758

58-
// InstanceStatus returned by status APIs.
59-
type InstanceStatus struct {
60-
InstanceID string `json:"instance_id"`
61-
Status string `json:"status"`
62-
Options string `json:"options"`
63-
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
64-
EnvVars map[string]string `json:"env_vars,omitempty"`
59+
// InstanceState returned by launcher API.
60+
type InstanceState struct {
61+
InstanceID string `json:"instance_id"`
62+
Status string `json:"status"`
63+
Spec VllmConfig `json:",inline"`
6564
}
6665

67-
// AllInstancesStatus response.
68-
type AllInstancesStatus struct {
69-
TotalInstances int `json:"total_instances"`
70-
RunningInstances int `json:"running_instances"`
71-
Instances []InstanceStatus `json:"instances"`
66+
// AllInstancesState response.
67+
type AllInstancesState struct {
68+
TotalInstances int `json:"total_instances"`
69+
RunningInstances int `json:"running_instances"`
70+
Instances []InstanceState `json:"instances"`
7271
}
7372

7473
// Generic response for creation and deletion.
@@ -95,13 +94,13 @@ func (c *LauncherClient) CreateNamedInstance(
9594
return c.create(ctx, path, http.MethodPut, cfg)
9695
}
9796

98-
// GetInstanceStatus returns the status of a single instance.
99-
func (c *LauncherClient) GetInstanceStatus(
97+
// GetInstanceState returns the state of a single instance.
98+
func (c *LauncherClient) GetInstanceState(
10099
ctx context.Context,
101100
instanceID string,
102-
) (*InstanceStatus, error) {
101+
) (*InstanceState, error) {
103102
path := fmt.Sprintf("/v2/vllm/instances/%s", instanceID)
104-
var out InstanceStatus
103+
var out InstanceState
105104
if err := c.do(ctx, http.MethodGet, path, nil, &out); err != nil {
106105
return nil, err
107106
}
@@ -111,8 +110,8 @@ func (c *LauncherClient) GetInstanceStatus(
111110
// ListInstances returns all instances with status.
112111
func (c *LauncherClient) ListInstances(
113112
ctx context.Context,
114-
) (*AllInstancesStatus, error) {
115-
var out AllInstancesStatus
113+
) (*AllInstancesState, error) {
114+
var out AllInstancesState
116115
if err := c.do(ctx, http.MethodGet, "/v2/vllm/instances", nil, &out); err != nil {
117116
return nil, err
118117
}

0 commit comments

Comments
 (0)