Skip to content

Commit 9be919c

Browse files
Fix: Update ModelCopies.TotalCopies for all model states (kserve#4676)
Signed-off-by: Hardik Menger <hardik.menger@gmail.com> Signed-off-by: Hardik Menger
1 parent 7c64e7b commit 9be919c

File tree

8 files changed

+145
-18
lines changed

8 files changed

+145
-18
lines changed

pkg/apis/serving/v1beta1/inference_service_status.go

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ type ModelCopies struct {
163163
// How many copies of this predictor's models failed to load recently
164164
// +kubebuilder:default=0
165165
FailedCopies int `json:"failedCopies"`
166-
// Total number copies of this predictor's models that are currently loaded
166+
// Total number of copies of this predictor's models across all states (Pending, Loading, Loaded, FailedToLoad).
167167
// +optional
168168
TotalCopies int `json:"totalCopies,omitempty"`
169169
}
@@ -609,7 +609,7 @@ func (ss *InferenceServiceStatus) ClearCondition(conditionType apis.ConditionTyp
609609
}
610610
}
611611

612-
func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelState, totalCopies int, info *FailureInfo) {
612+
func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelState, info *FailureInfo) {
613613
if ss.ModelStatus.ModelRevisionStates == nil {
614614
ss.ModelStatus.ModelRevisionStates = &ModelRevisionStates{TargetModelState: modelState}
615615
} else {
@@ -621,7 +621,6 @@ func (ss *InferenceServiceStatus) UpdateModelRevisionStates(modelState ModelStat
621621
ss.ModelStatus.TransitionStatus = InProgress
622622
case Loaded:
623623
ss.ModelStatus.TransitionStatus = UpToDate
624-
ss.ModelStatus.ModelCopies = &ModelCopies{TotalCopies: totalCopies}
625624
ss.ModelStatus.ModelRevisionStates.ActiveModelState = Loaded
626625
case FailedToLoad:
627626
ss.ModelStatus.TransitionStatus = BlockedByFailedLoad
@@ -654,24 +653,49 @@ func (ss *InferenceServiceStatus) SetModelFailureInfo(info *FailureInfo) bool {
654653
return true
655654
}
656655

656+
// countReadyPods counts the number of pods that are in Ready state and can serve inference requests
657+
func countReadyPods(podList *corev1.PodList) int {
658+
if podList == nil {
659+
return 0
660+
}
661+
readyCount := 0
662+
for _, pod := range podList.Items {
663+
if pod.Status.Phase == corev1.PodRunning {
664+
for _, cond := range pod.Status.Conditions {
665+
if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue {
666+
readyCount++
667+
break
668+
}
669+
}
670+
}
671+
}
672+
return readyCount
673+
}
674+
657675
func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatusSpec, podList *corev1.PodList, rawDeployment bool, serviceStatus *knservingv1.ServiceStatus) bool {
658676
// Check at least one pod is running for the latest revision of inferenceservice
677+
readyCopies := countReadyPods(podList)
659678
totalCopies := len(podList.Items)
679+
if ss.ModelStatus.ModelCopies == nil {
680+
ss.ModelStatus.ModelCopies = &ModelCopies{}
681+
}
682+
ss.ModelStatus.ModelCopies.TotalCopies = readyCopies
660683
if totalCopies == 0 {
661684
if !rawDeployment {
662685
// Make sure we haven't scaled down to 0 because of an error
663686
for _, knativeCond := range serviceStatus.Conditions {
664687
if knativeCond.Status == "False" {
665688
// If any of the knative statuses are False, the model failed
666689
// Hopefully the lastFailureInfo already has the info we need, so we don't update it here
667-
ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, nil)
690+
ss.UpdateModelRevisionStates(FailedToLoad, nil)
668691
return true
669692
}
670693
}
671694
}
672695

673696
// If we made it here then hopefully there are 0 pods because we're just getting started and therefore Pending seems appropriate
674-
ss.UpdateModelRevisionStates(Pending, totalCopies, nil)
697+
ss.UpdateModelRevisionStates(Pending, nil)
698+
675699
return true
676700
}
677701

@@ -689,19 +713,19 @@ func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatu
689713
return false
690714
} else {
691715
// If there is no previous error, we should be okay to move into the Loading state
692-
ss.UpdateModelRevisionStates(Loading, totalCopies, nil)
716+
ss.UpdateModelRevisionStates(Loading, nil)
693717
return true
694718
}
695719

696720
case cs.State.Terminated != nil && cs.State.Terminated.Reason == constants.StateReasonError:
697-
ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{
721+
ss.UpdateModelRevisionStates(FailedToLoad, &FailureInfo{
698722
Reason: ModelLoadFailed,
699723
Message: cs.State.Terminated.Message,
700724
ExitCode: cs.State.Terminated.ExitCode,
701725
})
702726
return true
703727
case cs.State.Waiting != nil && cs.State.Waiting.Reason == constants.StateReasonCrashLoopBackOff:
704-
ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{
728+
ss.UpdateModelRevisionStates(FailedToLoad, &FailureInfo{
705729
Reason: ModelLoadFailed,
706730
Message: cs.LastTerminationState.Terminated.Message,
707731
ExitCode: cs.LastTerminationState.Terminated.ExitCode,
@@ -715,10 +739,10 @@ func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatu
715739
// For serverless deployment, the latest created revision and the latest ready revision should be equal
716740
if ss.IsReady() {
717741
if rawDeployment {
718-
ss.UpdateModelRevisionStates(Loaded, totalCopies, nil)
742+
ss.UpdateModelRevisionStates(Loaded, nil)
719743
return true
720744
} else if statusSpec.LatestCreatedRevision == statusSpec.LatestReadyRevision {
721-
ss.UpdateModelRevisionStates(Loaded, totalCopies, nil)
745+
ss.UpdateModelRevisionStates(Loaded, nil)
722746
return true
723747
}
724748
}
@@ -729,19 +753,19 @@ func (ss *InferenceServiceStatus) PropagateModelStatus(statusSpec ComponentStatu
729753
if cs.Name == constants.InferenceServiceContainerName {
730754
switch {
731755
case cs.State.Terminated != nil && cs.State.Terminated.Reason == constants.StateReasonError:
732-
ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{
756+
ss.UpdateModelRevisionStates(FailedToLoad, &FailureInfo{
733757
Reason: ModelLoadFailed,
734758
Message: cs.State.Terminated.Message,
735759
ExitCode: cs.State.Terminated.ExitCode,
736760
})
737761
case cs.State.Waiting != nil && cs.State.Waiting.Reason == constants.StateReasonCrashLoopBackOff:
738-
ss.UpdateModelRevisionStates(FailedToLoad, totalCopies, &FailureInfo{
762+
ss.UpdateModelRevisionStates(FailedToLoad, &FailureInfo{
739763
Reason: ModelLoadFailed,
740764
Message: cs.LastTerminationState.Terminated.Message,
741765
ExitCode: cs.LastTerminationState.Terminated.ExitCode,
742766
})
743767
default:
744-
ss.UpdateModelRevisionStates(Pending, totalCopies, nil)
768+
ss.UpdateModelRevisionStates(Pending, nil)
745769
}
746770
}
747771
}

pkg/apis/serving/v1beta1/inference_service_status_test.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,6 +1413,97 @@ func TestInferenceServiceStatus_PropagateModelStatus(t *testing.T) {
14131413
}
14141414
}
14151415

1416+
func TestPropagateModelStatus_ReadyPodsCountingOnly(t *testing.T) {
1417+
g := gomega.NewGomegaWithT(t)
1418+
1419+
// Test case that specifically verifies PropagateModelStatus only counts ready pods
1420+
// regardless of model state transitions or other conditions
1421+
scenarios := map[string]struct {
1422+
name string
1423+
initialModelState ModelState
1424+
initialTransitionStatus TransitionStatus
1425+
podList *corev1.PodList
1426+
rawDeployment bool
1427+
serviceStatus *knservingv1.ServiceStatus
1428+
statusSpec ComponentStatusSpec
1429+
expectedReadyCount int
1430+
}{
1431+
"ready pods count independent of Loading state": {
1432+
initialModelState: Loading,
1433+
initialTransitionStatus: InProgress,
1434+
podList: &corev1.PodList{Items: []corev1.Pod{
1435+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}}}, // Ready
1436+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}}}, // Ready
1437+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionFalse}}}}, // Unready
1438+
}},
1439+
rawDeployment: true,
1440+
serviceStatus: &knservingv1.ServiceStatus{},
1441+
statusSpec: ComponentStatusSpec{LatestReadyRevision: "test", LatestCreatedRevision: "test"},
1442+
expectedReadyCount: 2,
1443+
},
1444+
"ready pods count independent of FailedToLoad state": {
1445+
initialModelState: FailedToLoad,
1446+
initialTransitionStatus: BlockedByFailedLoad,
1447+
podList: &corev1.PodList{Items: []corev1.Pod{
1448+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}}}, // Ready
1449+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionFalse}}}}, // Unready
1450+
}},
1451+
rawDeployment: true,
1452+
serviceStatus: &knservingv1.ServiceStatus{},
1453+
statusSpec: ComponentStatusSpec{LatestReadyRevision: "test", LatestCreatedRevision: "test"},
1454+
expectedReadyCount: 1,
1455+
},
1456+
"ready pods count independent of Pending state": {
1457+
initialModelState: Pending,
1458+
initialTransitionStatus: InProgress,
1459+
podList: &corev1.PodList{Items: []corev1.Pod{
1460+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionTrue}}}}, // Ready
1461+
{Status: corev1.PodStatus{Phase: corev1.PodPending, Conditions: []corev1.PodCondition{}}}, // Unready (pending)
1462+
}},
1463+
rawDeployment: true,
1464+
serviceStatus: &knservingv1.ServiceStatus{},
1465+
statusSpec: ComponentStatusSpec{LatestReadyRevision: "test", LatestCreatedRevision: "test"},
1466+
expectedReadyCount: 1,
1467+
},
1468+
"no ready pods in any state": {
1469+
initialModelState: Loaded,
1470+
initialTransitionStatus: UpToDate,
1471+
podList: &corev1.PodList{Items: []corev1.Pod{
1472+
{Status: corev1.PodStatus{Phase: corev1.PodRunning, Conditions: []corev1.PodCondition{{Type: corev1.PodReady, Status: corev1.ConditionFalse}}}}, // Unready
1473+
{Status: corev1.PodStatus{Phase: corev1.PodPending, Conditions: []corev1.PodCondition{}}}, // Unready (pending)
1474+
}},
1475+
rawDeployment: true,
1476+
serviceStatus: &knservingv1.ServiceStatus{},
1477+
statusSpec: ComponentStatusSpec{LatestReadyRevision: "test", LatestCreatedRevision: "test"},
1478+
expectedReadyCount: 0,
1479+
},
1480+
}
1481+
1482+
for name, scenario := range scenarios {
1483+
t.Run(name, func(t *testing.T) {
1484+
isvcStatus := &InferenceServiceStatus{
1485+
ModelStatus: ModelStatus{
1486+
TransitionStatus: scenario.initialTransitionStatus,
1487+
ModelRevisionStates: &ModelRevisionStates{
1488+
ActiveModelState: scenario.initialModelState,
1489+
TargetModelState: scenario.initialModelState,
1490+
},
1491+
},
1492+
}
1493+
1494+
returnValue := isvcStatus.PropagateModelStatus(scenario.statusSpec, scenario.podList, scenario.rawDeployment, scenario.serviceStatus)
1495+
1496+
g.Expect(returnValue).To(gomega.BeTrue())
1497+
g.Expect(isvcStatus.ModelStatus.ModelCopies).ToNot(gomega.BeNil())
1498+
g.Expect(isvcStatus.ModelStatus.ModelCopies.TotalCopies).To(gomega.Equal(scenario.expectedReadyCount))
1499+
1500+
// Verify consistency with countReadyPods
1501+
directCount := countReadyPods(scenario.podList)
1502+
g.Expect(isvcStatus.ModelStatus.ModelCopies.TotalCopies).To(gomega.Equal(directCount))
1503+
})
1504+
}
1505+
}
1506+
14161507
func TestInferenceServiceStatus_UpdateModelRevisionStates(t *testing.T) {
14171508
g := gomega.NewGomegaWithT(t)
14181509

pkg/controller/v1beta1/inferenceservice/controller_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,6 +2283,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
22832283
ModelStatus: v1beta1.ModelStatus{
22842284
TransitionStatus: "InProgress",
22852285
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
2286+
ModelCopies: &v1beta1.ModelCopies{},
22862287
},
22872288
ServingRuntimeName: "tf-serving",
22882289
}

pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
575575
ModelStatus: v1beta1.ModelStatus{
576576
TransitionStatus: "InProgress",
577577
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
578+
ModelCopies: &v1beta1.ModelCopies{},
578579
},
579580
DeploymentMode: string(constants.Standard),
580581
ServingRuntimeName: "tf-serving-raw",
@@ -1132,6 +1133,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
11321133
ModelStatus: v1beta1.ModelStatus{
11331134
TransitionStatus: "InProgress",
11341135
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
1136+
ModelCopies: &v1beta1.ModelCopies{},
11351137
},
11361138
DeploymentMode: string(constants.Standard),
11371139
ServingRuntimeName: "tf-serving-raw",
@@ -1685,6 +1687,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
16851687
ModelStatus: v1beta1.ModelStatus{
16861688
TransitionStatus: "InProgress",
16871689
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
1690+
ModelCopies: &v1beta1.ModelCopies{},
16881691
},
16891692
ServingRuntimeName: "tf-serving-raw",
16901693
}
@@ -4823,6 +4826,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
48234826
ModelStatus: v1beta1.ModelStatus{
48244827
TransitionStatus: "InProgress",
48254828
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
4829+
ModelCopies: &v1beta1.ModelCopies{},
48264830
},
48274831
DeploymentMode: string(constants.Standard),
48284832
ServingRuntimeName: "tf-serving-raw",
@@ -5399,6 +5403,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
53995403
ModelStatus: v1beta1.ModelStatus{
54005404
TransitionStatus: "InProgress",
54015405
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
5406+
ModelCopies: &v1beta1.ModelCopies{},
54025407
},
54035408
DeploymentMode: string(constants.Standard),
54045409
ServingRuntimeName: "tf-serving-raw",
@@ -6241,6 +6246,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
62416246
ModelStatus: v1beta1.ModelStatus{
62426247
TransitionStatus: "InProgress",
62436248
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
6249+
ModelCopies: &v1beta1.ModelCopies{},
62446250
},
62456251
DeploymentMode: string(constants.Standard),
62466252
ServingRuntimeName: "tf-serving-raw",
@@ -7192,6 +7198,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
71927198
ModelStatus: v1beta1.ModelStatus{
71937199
TransitionStatus: "InProgress",
71947200
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
7201+
ModelCopies: &v1beta1.ModelCopies{},
71957202
},
71967203
DeploymentMode: string(constants.Standard),
71977204
ServingRuntimeName: "tf-serving-raw",
@@ -7883,6 +7890,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
78837890
ModelStatus: v1beta1.ModelStatus{
78847891
TransitionStatus: "InProgress",
78857892
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
7893+
ModelCopies: &v1beta1.ModelCopies{},
78867894
},
78877895
DeploymentMode: string(constants.Standard),
78887896
ServingRuntimeName: "tf-serving-raw",
@@ -8775,6 +8783,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
87758783
ModelStatus: v1beta1.ModelStatus{
87768784
TransitionStatus: "InProgress",
87778785
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
8786+
ModelCopies: &v1beta1.ModelCopies{},
87788787
},
87798788
DeploymentMode: string(constants.Standard),
87808789
ServingRuntimeName: "tf-serving-raw",
@@ -9818,6 +9827,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
98189827
ModelStatus: v1beta1.ModelStatus{
98199828
TransitionStatus: "InProgress",
98209829
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
9830+
ModelCopies: &v1beta1.ModelCopies{},
98219831
},
98229832
DeploymentMode: string(constants.Standard),
98239833
ServingRuntimeName: "tf-serving-raw",
@@ -10724,6 +10734,7 @@ var _ = Describe("v1beta1 inference service controller", func() {
1072410734
ModelStatus: v1beta1.ModelStatus{
1072510735
TransitionStatus: "InProgress",
1072610736
ModelRevisionStates: &v1beta1.ModelRevisionStates{TargetModelState: "Pending"},
10737+
ModelCopies: &v1beta1.ModelCopies{},
1072710738
},
1072810739
DeploymentMode: string(constants.Standard),
1072910740
ServingRuntimeName: "tf-serving-raw",

pkg/openapi/openapi_generated.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/openapi/swagger.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3803,7 +3803,7 @@
38033803
"default": 0
38043804
},
38053805
"totalCopies": {
3806-
"description": "Total number copies of this predictor's models that are currently loaded",
3806+
"description": "Total number of copies of this predictor's models across all states (Pending, Loading, Loaded, FailedToLoad).",
38073807
"type": "integer",
38083808
"format": "int32"
38093809
}

python/kserve/docs/V1beta1ModelCopies.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Name | Type | Description | Notes
55
------------ | ------------- | ------------- | -------------
66
**failed_copies** | **int** | How many copies of this predictor&#39;s models failed to load recently | [default to 0]
7-
**total_copies** | **int** | Total number copies of this predictor&#39;s models that are currently loaded | [optional]
7+
**total_copies** | **int** | Total number of copies of this predictor&#39;s models across all states (Pending, Loading, Loaded, FailedToLoad). | [optional]
88

99
[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
1010

python/kserve/kserve/models/v1beta1_model_copies.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def failed_copies(self, failed_copies):
9999
def total_copies(self):
100100
"""Gets the total_copies of this V1beta1ModelCopies. # noqa: E501
101101
102-
Total number copies of this predictor's models that are currently loaded # noqa: E501
102+
Total number of copies of this predictor's models across all states (Pending, Loading, Loaded, FailedToLoad). # noqa: E501
103103
104104
:return: The total_copies of this V1beta1ModelCopies. # noqa: E501
105105
:rtype: int
@@ -110,7 +110,7 @@ def total_copies(self):
110110
def total_copies(self, total_copies):
111111
"""Sets the total_copies of this V1beta1ModelCopies.
112112
113-
Total number copies of this predictor's models that are currently loaded # noqa: E501
113+
Total number of copies of this predictor's models across all states (Pending, Loading, Loaded, FailedToLoad). # noqa: E501
114114
115115
:param total_copies: The total_copies of this V1beta1ModelCopies. # noqa: E501
116116
:type: int

0 commit comments

Comments
 (0)