Skip to content

Commit 21b578e

Browse files
authored
feat: add model source properties to store metadata about origin of a model artifact, fixes RHOAIENG-19885 (#838)
Signed-off-by: Dhiraj Bokde <[email protected]>
1 parent 0fa0e3e commit 21b578e

21 files changed

+1003
-38
lines changed

api/openapi/model-registry.yaml

+29
Original file line numberDiff line numberDiff line change
@@ -1550,6 +1550,35 @@ components:
15501550
serviceAccountName:
15511551
description: Name of the service account with storage secret.
15521552
type: string
1553+
modelSourceKind:
1554+
type: string
1555+
description: |-
1556+
A string identifier describing the source kind. It differentiates various sources of model artifacts.
1557+
This identifier should be agreed upon by producers and consumers of source model metadata.
1558+
It is not an enumeration to keep the source of model metadata open ended.
1559+
E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.
1560+
modelSourceClass:
1561+
type: string
1562+
description: |-
1563+
A subgroup within the source kind. It is a specific sub-component or instance within the source kind.
1564+
E.g. `pipelinerun` for a Kubeflow pipeline run.
1565+
modelSourceGroup:
1566+
type: string
1567+
description: |-
1568+
Unique identifier for a source group for models from source class.
1569+
It maps to a physical group of source models.
1570+
E.g. a Kubernetes namespace where the pipeline run was executed.
1571+
modelSourceId:
1572+
type: string
1573+
description: |-
1574+
A unique identifier for a source model within kind, class, and group.
1575+
It should be a url friendly string if source supports using URLs to locate source models.
1576+
E.g. a pipeline run ID.
1577+
modelSourceName:
1578+
type: string
1579+
description: |-
1580+
A human-readable name for the source model.
1581+
E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.
15531582
ModelArtifactCreate:
15541583
description: An ML model artifact.
15551584
properties:

clients/python/src/model_registry/_client.py

+15
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,11 @@ def register_model(
271271
storage_key: str | None = None,
272272
storage_path: str | None = None,
273273
service_account_name: str | None = None,
274+
model_source_kind: str | None = None,
275+
model_source_class: str | None = None,
276+
model_source_group: str | None = None,
277+
model_source_id: str | None = None,
278+
model_source_name: str | None = None,
274279
author: str | None = None,
275280
owner: str | None = None,
276281
description: str | None = None,
@@ -300,6 +305,11 @@ def register_model(
300305
storage_key: Storage key.
301306
storage_path: Storage path.
302307
service_account_name: Service account name.
308+
model_source_kind: A string identifier describing the source kind.
309+
model_source_class: A subgroup within the source kind.
310+
model_source_group: This identifies a source group for models from source class.
311+
model_source_id: A unique identifier for a source model within kind, class, and group.
312+
model_source_name: A human-readable name for the source model.
303313
metadata: Additional version metadata. Defaults to values returned by `default_metadata()`.
304314
305315
Returns:
@@ -325,6 +335,11 @@ def register_model(
325335
storage_key=storage_key,
326336
storage_path=storage_path,
327337
service_account_name=service_account_name,
338+
model_source_kind=model_source_kind,
339+
model_source_class=model_source_class,
340+
model_source_group=model_source_group,
341+
model_source_id=model_source_id,
342+
model_source_name=model_source_name,
328343
)
329344
)
330345

clients/python/src/model_registry/types/artifacts.py

+15
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ class ModelArtifact(Artifact):
135135
storage_key: Storage secret name.
136136
storage_path: Storage path of the model.
137137
service_account_name: Name of the service account with storage secret.
138+
model_source_kind: A string identifier describing the source kind.
139+
model_source_class: A subgroup within the source kind.
140+
model_source_group: This identifies a source group for models from source class.
141+
model_source_id: A unique identifier for a source model within kind, class, and group.
142+
model_source_name: A human-readable name for the source model.
138143
"""
139144

140145
# TODO: this could be an enum of valid formats
@@ -143,6 +148,11 @@ class ModelArtifact(Artifact):
143148
storage_key: str | None = None
144149
storage_path: str | None = None
145150
service_account_name: str | None = None
151+
model_source_kind: str | None = None
152+
model_source_class: str | None = None
153+
model_source_group: str | None = None
154+
model_source_id: str | None = None
155+
model_source_name: str | None = None
146156

147157
_model_version_id: str | None = None
148158

@@ -194,6 +204,11 @@ def from_basemodel(cls, source: ModelArtifactBaseModel) -> ModelArtifact:
194204
storage_key=source.storage_key,
195205
storage_path=source.storage_path,
196206
service_account_name=source.service_account_name,
207+
model_source_kind=source.model_source_kind,
208+
model_source_class=source.model_source_class,
209+
model_source_group=source.model_source_group,
210+
model_source_id=source.model_source_id,
211+
model_source_name=source.model_source_name,
197212
state=source.state,
198213
custom_properties=cls._unmap_custom_properties(source.custom_properties)
199214
if source.custom_properties

clients/python/src/mr_openapi/models/model_artifact.py

+35
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,31 @@ class ModelArtifact(BaseModel):
7070
service_account_name: StrictStr | None = Field(
7171
default=None, description="Name of the service account with storage secret.", alias="serviceAccountName"
7272
)
73+
model_source_kind: StrictStr | None = Field(
74+
default=None,
75+
description="A string identifier describing the source kind. It differentiates various sources of model artifacts. This identifier should be agreed upon by producers and consumers of source model metadata. It is not an enumeration to keep the source of model metadata open ended. E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.",
76+
alias="modelSourceKind",
77+
)
78+
model_source_class: StrictStr | None = Field(
79+
default=None,
80+
description="A subgroup within the source kind. It is a specific sub-component or instance within the source kind. E.g. `pipelinerun` for a Kubeflow pipeline run.",
81+
alias="modelSourceClass",
82+
)
83+
model_source_group: StrictStr | None = Field(
84+
default=None,
85+
description="Unique identifier for a source group for models from source class. It maps to a physical group of source models. E.g. a Kubernetes namespace where the pipeline run was executed.",
86+
alias="modelSourceGroup",
87+
)
88+
model_source_id: StrictStr | None = Field(
89+
default=None,
90+
description="A unique identifier for a source model within kind, class, and group. It should be a url friendly string if source supports using URLs to locate source models. E.g. a pipeline run ID.",
91+
alias="modelSourceId",
92+
)
93+
model_source_name: StrictStr | None = Field(
94+
default=None,
95+
description="A human-readable name for the source model. E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.",
96+
alias="modelSourceName",
97+
)
7398
__properties: ClassVar[list[str]] = [
7499
"customProperties",
75100
"description",
@@ -86,6 +111,11 @@ class ModelArtifact(BaseModel):
86111
"storagePath",
87112
"modelFormatVersion",
88113
"serviceAccountName",
114+
"modelSourceKind",
115+
"modelSourceClass",
116+
"modelSourceGroup",
117+
"modelSourceId",
118+
"modelSourceName",
89119
]
90120

91121
model_config = ConfigDict(
@@ -170,5 +200,10 @@ def from_dict(cls, obj: dict[str, Any] | None) -> Self | None:
170200
"storagePath": obj.get("storagePath"),
171201
"modelFormatVersion": obj.get("modelFormatVersion"),
172202
"serviceAccountName": obj.get("serviceAccountName"),
203+
"modelSourceKind": obj.get("modelSourceKind"),
204+
"modelSourceClass": obj.get("modelSourceClass"),
205+
"modelSourceGroup": obj.get("modelSourceGroup"),
206+
"modelSourceId": obj.get("modelSourceId"),
207+
"modelSourceName": obj.get("modelSourceName"),
173208
}
174209
)

clients/python/src/mr_openapi/models/model_artifact_create.py

+35
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,31 @@ class ModelArtifactCreate(BaseModel):
5959
service_account_name: StrictStr | None = Field(
6060
default=None, description="Name of the service account with storage secret.", alias="serviceAccountName"
6161
)
62+
model_source_kind: StrictStr | None = Field(
63+
default=None,
64+
description="A string identifier describing the source kind. It differentiates various sources of model artifacts. This identifier should be agreed upon by producers and consumers of source model metadata. It is not an enumeration to keep the source of model metadata open ended. E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.",
65+
alias="modelSourceKind",
66+
)
67+
model_source_class: StrictStr | None = Field(
68+
default=None,
69+
description="A subgroup within the source kind. It is a specific sub-component or instance within the source kind. E.g. `pipelinerun` for a Kubeflow pipeline run.",
70+
alias="modelSourceClass",
71+
)
72+
model_source_group: StrictStr | None = Field(
73+
default=None,
74+
description="Unique identifier for a source group for models from source class. It maps to a physical group of source models. E.g. a Kubernetes namespace where the pipeline run was executed.",
75+
alias="modelSourceGroup",
76+
)
77+
model_source_id: StrictStr | None = Field(
78+
default=None,
79+
description="A unique identifier for a source model within kind, class, and group. It should be a url friendly string if source supports using URLs to locate source models. E.g. a pipeline run ID.",
80+
alias="modelSourceId",
81+
)
82+
model_source_name: StrictStr | None = Field(
83+
default=None,
84+
description="A human-readable name for the source model. E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.",
85+
alias="modelSourceName",
86+
)
6287
__properties: ClassVar[list[str]] = [
6388
"customProperties",
6489
"description",
@@ -72,6 +97,11 @@ class ModelArtifactCreate(BaseModel):
7297
"storagePath",
7398
"modelFormatVersion",
7499
"serviceAccountName",
100+
"modelSourceKind",
101+
"modelSourceClass",
102+
"modelSourceGroup",
103+
"modelSourceId",
104+
"modelSourceName",
75105
]
76106

77107
model_config = ConfigDict(
@@ -147,5 +177,10 @@ def from_dict(cls, obj: dict[str, Any] | None) -> Self | None:
147177
"storagePath": obj.get("storagePath"),
148178
"modelFormatVersion": obj.get("modelFormatVersion"),
149179
"serviceAccountName": obj.get("serviceAccountName"),
180+
"modelSourceKind": obj.get("modelSourceKind"),
181+
"modelSourceClass": obj.get("modelSourceClass"),
182+
"modelSourceGroup": obj.get("modelSourceGroup"),
183+
"modelSourceId": obj.get("modelSourceId"),
184+
"modelSourceName": obj.get("modelSourceName"),
150185
}
151186
)

clients/python/src/mr_openapi/models/model_artifact_update.py

+35
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,31 @@ class ModelArtifactUpdate(BaseModel):
5555
service_account_name: StrictStr | None = Field(
5656
default=None, description="Name of the service account with storage secret.", alias="serviceAccountName"
5757
)
58+
model_source_kind: StrictStr | None = Field(
59+
default=None,
60+
description="A string identifier describing the source kind. It differentiates various sources of model artifacts. This identifier should be agreed upon by producers and consumers of source model metadata. It is not an enumeration to keep the source of model metadata open ended. E.g. Kubeflow pipelines could use `pipelines` to identify models it produces.",
61+
alias="modelSourceKind",
62+
)
63+
model_source_class: StrictStr | None = Field(
64+
default=None,
65+
description="A subgroup within the source kind. It is a specific sub-component or instance within the source kind. E.g. `pipelinerun` for a Kubeflow pipeline run.",
66+
alias="modelSourceClass",
67+
)
68+
model_source_group: StrictStr | None = Field(
69+
default=None,
70+
description="Unique identifier for a source group for models from source class. It maps to a physical group of source models. E.g. a Kubernetes namespace where the pipeline run was executed.",
71+
alias="modelSourceGroup",
72+
)
73+
model_source_id: StrictStr | None = Field(
74+
default=None,
75+
description="A unique identifier for a source model within kind, class, and group. It should be a url friendly string if source supports using URLs to locate source models. E.g. a pipeline run ID.",
76+
alias="modelSourceId",
77+
)
78+
model_source_name: StrictStr | None = Field(
79+
default=None,
80+
description="A human-readable name for the source model. E.g. `my-project/1`, `ibm-granite/granite-3.1-8b-base:2.1.2`.",
81+
alias="modelSourceName",
82+
)
5883
__properties: ClassVar[list[str]] = [
5984
"customProperties",
6085
"description",
@@ -67,6 +92,11 @@ class ModelArtifactUpdate(BaseModel):
6792
"storagePath",
6893
"modelFormatVersion",
6994
"serviceAccountName",
95+
"modelSourceKind",
96+
"modelSourceClass",
97+
"modelSourceGroup",
98+
"modelSourceId",
99+
"modelSourceName",
70100
]
71101

72102
model_config = ConfigDict(
@@ -141,5 +171,10 @@ def from_dict(cls, obj: dict[str, Any] | None) -> Self | None:
141171
"storagePath": obj.get("storagePath"),
142172
"modelFormatVersion": obj.get("modelFormatVersion"),
143173
"serviceAccountName": obj.get("serviceAccountName"),
174+
"modelSourceKind": obj.get("modelSourceKind"),
175+
"modelSourceClass": obj.get("modelSourceClass"),
176+
"modelSourceGroup": obj.get("modelSourceGroup"),
177+
"modelSourceId": obj.get("modelSourceId"),
178+
"modelSourceName": obj.get("modelSourceName"),
144179
}
145180
)

0 commit comments

Comments
 (0)