Skip to content

Commit 4daf40f

Browse files
committed
[FSTORE-1975] Add row count endpoint for feature groups
1 parent 940af23 commit 4daf40f

9 files changed

Lines changed: 139 additions & 8 deletions

File tree

python/hopsworks/client/__init__.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,17 @@
22
# Do not edit it manually!
33
# It is managed by hopsworks-apigen and is subject to periodical deletion.
44
import hopsworks_common.client
5-
6-
75
_is_external = hopsworks_common.client._is_external
86
get_connection = hopsworks_common.client.get_connection
97
get_instance = hopsworks_common.client.get_instance
108
get_knative_domain = hopsworks_common.client.get_knative_domain
11-
get_serving_num_instances_limits = (
12-
hopsworks_common.client.get_serving_num_instances_limits
13-
)
9+
get_serving_num_instances_limits = hopsworks_common.client.get_serving_num_instances_limits
1410
init = hopsworks_common.client.init
1511
is_kserve_installed = hopsworks_common.client.is_kserve_installed
1612
is_saas_connection = hopsworks_common.client.is_saas_connection
1713
is_scale_to_zero_required = hopsworks_common.client.is_scale_to_zero_required
1814
set_connection = hopsworks_common.client.set_connection
1915
set_knative_domain = hopsworks_common.client.set_knative_domain
2016
set_kserve_installed = hopsworks_common.client.set_kserve_installed
21-
set_serving_num_instances_limits = (
22-
hopsworks_common.client.set_serving_num_instances_limits
23-
)
17+
set_serving_num_instances_limits = hopsworks_common.client.set_serving_num_instances_limits
2418
stop = hopsworks_common.client.stop

python/hopsworks/core/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# This file is generated by hopsworks-apigen.
2+
# Do not edit it manually!
3+
# It is managed by hopsworks-apigen and is subject to periodical deletion.
4+
import hopsworks_common.core.sink_job_configuration
5+
FeatureColumnMapping = hopsworks_common.core.sink_job_configuration.FeatureColumnMapping
6+
FullLoadConfig = hopsworks_common.core.sink_job_configuration.FullLoadConfig
7+
LoadingConfig = hopsworks_common.core.sink_job_configuration.LoadingConfig
8+
SinkJobConfiguration = hopsworks_common.core.sink_job_configuration.SinkJobConfiguration

python/hopsworks/mcp/tools/feature_group.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ def __init__(self, mcp: FastMCP):
5454
self.mcp.tool(tags=[TAGS.FEATURE_GROUP, TAGS.READ, TAGS.STATEFUL])(
5555
self.get_features
5656
)
57+
self.mcp.tool(tags=[TAGS.FEATURE_GROUP, TAGS.READ, TAGS.STATEFUL])(
58+
self.get_feature_group_row_count
59+
)
5760

5861
def _get_feature_group_versions(self, name: str | None = None):
5962
# Get the current project and its feature groups
@@ -180,6 +183,20 @@ async def preview_feature_group( # docsig: disable
180183
f"Unable to convert preview to dictionary. Here's the raw preview:\n{preview}"
181184
)
182185

186+
async def get_feature_group_row_count( # docsig: disable
187+
self,
188+
ctx: Context,
189+
name: str,
190+
version: int | None = None,
191+
) -> int:
192+
"""Get the row count of a feature group with the specified name and version (latest by default)."""
193+
await ctx.info(
194+
f"Retrieving row count of {name}{f' v{version}' if version else ''} feature group..."
195+
)
196+
197+
fg = self._get_feature_group_version(name, version)
198+
return fg.get_row_count()
199+
183200
async def get_features( # docsig: disable
184201
self,
185202
ctx: Context,

python/hsfs/core/feature_group_api.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,33 @@ def get_generated_feature_groups(
635635
explicit_provenance.Links.Type.FEATURE_GROUP,
636636
)
637637

638+
def get_row_count(
639+
self,
640+
feature_group_instance: fg_mod.FeatureGroup
641+
| fg_mod.ExternalFeatureGroup
642+
| fg_mod.SpineGroup,
643+
) -> int:
644+
"""Get the row count of a feature group.
645+
646+
Parameters:
647+
feature_group_instance:
648+
metadata object of feature group.
649+
650+
Returns:
651+
The number of rows in the feature group.
652+
"""
653+
_client = client.get_instance()
654+
path_params = [
655+
"project",
656+
_client._project_id,
657+
"featurestores",
658+
feature_group_instance.feature_store_id,
659+
"featuregroups",
660+
feature_group_instance.id,
661+
"rowcount",
662+
]
663+
return _client._send_request("GET", path_params)
664+
638665
def _check_features(self, feature_group_instance) -> None:
639666
if not feature_group_instance._features:
640667
warnings.warn(

python/hsfs/core/feature_group_base_engine.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,17 @@ def __init__(self, feature_store_id):
4141
def delete(self, feature_group):
4242
self._feature_group_api.delete(feature_group)
4343

44+
def get_row_count(self, feature_group: FeatureGroup) -> int:
45+
"""Get the row count of a feature group.
46+
47+
Parameters:
48+
feature_group: The feature group to get the row count of.
49+
50+
Returns:
51+
The row count of the feature group.
52+
"""
53+
return self._feature_group_api.get_row_count(feature_group)
54+
4455
def add_tag(self, feature_group: FeatureGroup, name: str, value: Any):
4556
"""Attach a name/value tag to a feature group.
4657

python/hsfs/feature_group.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,28 @@ def get_generated_feature_groups(self) -> explicit_provenance.Links | None:
865865
"""
866866
return self._feature_group_engine.get_generated_feature_groups(self)
867867

868+
def get_row_count(self) -> int:
869+
"""Get the number of rows in this feature group.
870+
871+
Example:
872+
```python
873+
# connect to the Feature Store
874+
fs = ...
875+
876+
# get the Feature Group instance
877+
fg = fs.get_or_create_feature_group(...)
878+
879+
row_count = fg.get_row_count()
880+
```
881+
882+
Returns:
883+
The number of rows in the feature group.
884+
885+
Raises:
886+
hopsworks.client.exceptions.RestAPIError: If the backend encounters an error when handling the request.
887+
"""
888+
return self._feature_group_engine.get_row_count(self)
889+
868890
@public
869891
def get_feature(self, name: str) -> feature.Feature | None:
870892
"""Retrieve a `Feature` object from the schema of the feature group.

python/tests/core/test_feature_group_api.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,30 @@ def test_check_features(self, mocker, backend_fixtures):
6464
# Assert
6565
assert len(warning_record) == 0
6666

67+
def test_get_row_count(self, mocker, backend_fixtures):
68+
# Arrange
69+
fg_api = feature_group_api.FeatureGroupApi()
70+
71+
client_mock = Mock()
72+
client_mock.configure_mock(
73+
**{"_send_request.return_value": 42, "_project_id": 1}
74+
)
75+
mocker.patch(
76+
"hopsworks_common.client.get_instance",
77+
return_value=client_mock,
78+
)
79+
mocker.patch("hsfs.engine.get_instance")
80+
81+
json = backend_fixtures["feature_group"]["get"]["response"]
82+
fg = fg_mod.FeatureGroup.from_response_json(json)
83+
84+
# Act
85+
result = fg_api.get_row_count(fg)
86+
87+
# Assert
88+
assert result == 42
89+
assert client_mock._send_request.call_count == 1
90+
6791
def test_check_features_no_features(self, mocker, backend_fixtures):
6892
# Arrange
6993
fg_api = feature_group_api.FeatureGroupApi()

python/tests/core/test_feature_group_base_engine.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,22 @@ def test_get_tags(self, mocker):
9999
# Assert
100100
assert mock_tags_api.return_value.get.call_count == 1
101101

102+
def test_get_row_count(self, mocker):
103+
# Arrange
104+
feature_store_id = 99
105+
106+
mock_fg_api = mocker.patch("hsfs.core.feature_group_api.FeatureGroupApi")
107+
108+
fg_base_engine = feature_group_base_engine.FeatureGroupBaseEngine(
109+
feature_store_id=feature_store_id
110+
)
111+
112+
# Act
113+
fg_base_engine.get_row_count(feature_group=None)
114+
115+
# Assert
116+
assert mock_fg_api.return_value.get_row_count.call_count == 1
117+
102118
def test_update_statistics_config(self, mocker):
103119
# Arrange
104120
feature_store_id = 99

python/tests/test_feature_group.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,18 @@ def test_select_features(self):
385385
assert len(features) == 2
386386
assert {f.name for f in features} == {"f1", "f2"}
387387

388+
def test_get_row_count(self, mocker):
389+
mock_engine = mocker.patch(
390+
"hsfs.core.feature_group_base_engine.FeatureGroupBaseEngine.get_row_count",
391+
return_value=123,
392+
)
393+
394+
fg = get_test_feature_group()
395+
result = fg.get_row_count()
396+
397+
assert result == 123
398+
mock_engine.assert_called_once_with(fg)
399+
388400
def test_materialization_job(self, mocker):
389401
mock_job = mocker.Mock()
390402
mock_job_api = mocker.patch(

0 commit comments

Comments
 (0)