Skip to content

Commit 77f97d2

Browse files
Add annotations to instances in launcher (#399)
.. to carry data meaningful to clients but not launcher. Signed-off-by: Mike Spreitzer <mspreitz@us.ibm.com>
1 parent b3209b4 commit 77f97d2

4 files changed

Lines changed: 38 additions & 26 deletions

File tree

inference_server/launcher/launcher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class VllmConfig(BaseModel):
5959
options: str
6060
gpu_uuids: Optional[List[str]] = None
6161
env_vars: Optional[Dict[str, str]] = None
62+
annotations: Optional[Dict[str, str]] = None
6263

6364

6465
class HalfMade(Exception):

inference_server/launcher/tests/test_launcher.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@
5353
def vllm_config():
5454
"""Create a sample VllmConfig for testing"""
5555
return VllmConfig(
56-
options="--model test-model --port 8000", env_vars={"TEST_VAR": "test_value"}
56+
options="--model test-model --port 8000",
57+
env_vars={"TEST_VAR": "test_value"},
58+
annotations={"Foo": "bar"},
5759
)
5860

5961

pkg/controller/dual-pods/inference-server.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -698,11 +698,16 @@ func (ctl *controller) selectBestLauncherPod(
698698
// `isc` and `gpuUUIDs` are deeply immutable.
699699
// The result is deeply immutable.
700700
func (ctl *controller) configInferenceServer(isc *fmav1alpha1.InferenceServerConfig, gpuUUIDs []string) (*VllmConfig, string, error) {
701-
options := isc.Spec.ModelServerConfig.Options + " --port " + strconv.Itoa(int(isc.Spec.ModelServerConfig.Port))
701+
portS := strconv.Itoa(int(isc.Spec.ModelServerConfig.Port))
702+
options := isc.Spec.ModelServerConfig.Options + " --port " + portS
702703
vllmCfg := VllmConfig{
703704
Options: options,
704705
GpuUUIDs: gpuUUIDs,
705706
EnvVars: isc.Spec.ModelServerConfig.EnvVars,
707+
Annotations: map[string]string{
708+
"isc-name": isc.Name,
709+
"inference-port": portS,
710+
},
706711
}
707712
iscBytes, err := yaml.Marshal(isc.Spec.ModelServerConfig)
708713
if err != nil {
@@ -1321,7 +1326,7 @@ var podDecoder k8sruntime.Decoder
13211326
// syncLauncherInstances queries the launcher pod for its current instances,
13221327
// updates the controller's internal launcherData state, and returns the fresh
13231328
// launcher response used for the update.
1324-
func (ctl *controller) syncLauncherInstances(ctx context.Context, nodeDat *nodeData, launcherPod *corev1.Pod) (*AllInstancesStatus, error, bool) {
1329+
func (ctl *controller) syncLauncherInstances(ctx context.Context, nodeDat *nodeData, launcherPod *corev1.Pod) (*AllInstancesState, error, bool) {
13251330
logger := klog.FromContext(ctx)
13261331

13271332
if launcherPod.Status.PodIP == "" || !utils.IsPodReady(launcherPod) {

pkg/controller/dual-pods/launcherclient.go

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import (
2525
"net/http"
2626
"net/url"
2727
"time"
28+
29+
"k8s.io/klog/v2"
2830
)
2931

3032
type LauncherClient struct {
@@ -50,25 +52,24 @@ func NewLauncherClient(baseURL string) (*LauncherClient, error) {
5052

5153
// VllmConfig matches the launcher API schema.
5254
type VllmConfig struct {
53-
Options string `json:"options"`
54-
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
55-
EnvVars map[string]string `json:"env_vars,omitempty"`
55+
Options string `json:"options"`
56+
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
57+
EnvVars map[string]string `json:"env_vars,omitempty"`
58+
Annotations map[string]string `json:"annotations,omitempty"`
5659
}
5760

58-
// InstanceStatus returned by status APIs.
59-
type InstanceStatus struct {
60-
InstanceID string `json:"instance_id"`
61-
Status string `json:"status"`
62-
Options string `json:"options"`
63-
GpuUUIDs []string `json:"gpu_uuids,omitempty"`
64-
EnvVars map[string]string `json:"env_vars,omitempty"`
61+
// InstanceState returned by launcher API.
62+
type InstanceState struct {
63+
InstanceID string `json:"instance_id"`
64+
Status string `json:"status"`
65+
VllmConfig `json:",inline"`
6566
}
6667

67-
// AllInstancesStatus response.
68-
type AllInstancesStatus struct {
69-
TotalInstances int `json:"total_instances"`
70-
RunningInstances int `json:"running_instances"`
71-
Instances []InstanceStatus `json:"instances"`
68+
// AllInstancesState response.
69+
type AllInstancesState struct {
70+
TotalInstances int `json:"total_instances"`
71+
RunningInstances int `json:"running_instances"`
72+
Instances []InstanceState `json:"instances"`
7273
}
7374

7475
// Generic response for creation and deletion.
@@ -95,13 +96,13 @@ func (c *LauncherClient) CreateNamedInstance(
9596
return c.create(ctx, path, http.MethodPut, cfg)
9697
}
9798

98-
// GetInstanceStatus returns the status of a single instance.
99-
func (c *LauncherClient) GetInstanceStatus(
99+
// GetInstanceState returns the state of a single instance.
100+
func (c *LauncherClient) GetInstanceState(
100101
ctx context.Context,
101102
instanceID string,
102-
) (*InstanceStatus, error) {
103+
) (*InstanceState, error) {
103104
path := fmt.Sprintf("/v2/vllm/instances/%s", instanceID)
104-
var out InstanceStatus
105+
var out InstanceState
105106
if err := c.do(ctx, http.MethodGet, path, nil, &out); err != nil {
106107
return nil, err
107108
}
@@ -111,8 +112,8 @@ func (c *LauncherClient) GetInstanceStatus(
111112
// ListInstances returns all instances with status.
112113
func (c *LauncherClient) ListInstances(
113114
ctx context.Context,
114-
) (*AllInstancesStatus, error) {
115-
var out AllInstancesStatus
115+
) (*AllInstancesState, error) {
116+
var out AllInstancesState
116117
if err := c.do(ctx, http.MethodGet, "/v2/vllm/instances", nil, &out); err != nil {
117118
return nil, err
118119
}
@@ -216,8 +217,11 @@ func (c *LauncherClient) do(
216217
}
217218

218219
if out != nil {
219-
return json.NewDecoder(resp.Body).Decode(out)
220+
respBytes, _ := io.ReadAll(resp.Body)
221+
_ = resp.Body.Close()
222+
err = json.NewDecoder(bytes.NewReader(respBytes)).Decode(out)
223+
klog.FromContext(ctx).V(6).Info("Decoded response body", "body", string(respBytes), "decoded", out, "err", err)
220224
}
221225

222-
return nil
226+
return err
223227
}

0 commit comments

Comments
 (0)