Skip to content

Commit 6217ada

Browse files
authored
Merge pull request #8 from XpressAI/fahreza/metrics-api
Metrics API Update
2 parents 1df6e22 + 3c3b3f0 commit 6217ada

5 files changed

Lines changed: 163 additions & 53 deletions

File tree

tests/pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ markers =
1010
analogy
1111
delete
1212
exception
13+
metrics
1314
toolbelt
1415
management
1516
vectorspace

tests/test_management.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,16 @@ def test_delete_vector_space():
161161
id_exists = any(vecto_vector_space.id == test_vector_space.id for vecto_vector_space in updated_vs_list)
162162
logger.info("Check if the vector space is deleted.")
163163
assert not id_exists
164+
165+
166+
@pytest.mark.metrics
167+
def test_usage():
168+
from datetime import datetime
169+
today = datetime.now()
170+
usage_response = user_vecto.usage(today.year, today.month)
171+
logger.info("Checking that usage returns a valid response.")
172+
assert usage_response is not None
173+
logger.info("Checking that usage for today is not empty.")
174+
assert usage_response.usage.lookups.dailyMetrics[today.day-1].count > 0
175+
assert usage_response.usage.indexing.dailyMetrics[today.day-1].count > 0
176+

tests/test_sdk.py

Lines changed: 67 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@
4545
user_vecto = Vecto(token, vector_space_id, vecto_base_url=vecto_base_url)
4646
user_db_twin = DatabaseTwin()
4747

48+
# IDs for update apis
49+
ingest_text_ids = None
50+
ingest_image_ids = None
51+
4852
# Clear off vector space before start
4953
@pytest.mark.clear
5054
def test_clear_vector_space_entries():
@@ -126,6 +130,10 @@ def test_ingest_image_with_valid_source(self):
126130
# for f in files:
127131
# f.close()
128132
results = response.ids
133+
134+
global ingest_image_ids
135+
ingest_image_ids = response.ids
136+
129137
user_db_twin.update_database(results, data['data'])
130138
ref_db = user_db_twin.get_database()
131139

@@ -166,6 +174,10 @@ def test_ingest_text(self):
166174
attribute = TestDataset.get_text_attribute(batch.index.tolist()[:5], batch.tolist()[:5])
167175
response = user_vecto.ingest_text(batch.tolist()[:5], attribute)
168176
results = response.ids
177+
178+
global ingest_text_ids
179+
ingest_text_ids = response.ids
180+
169181
user_db_twin.update_database(results, attribute)
170182
ref_db = user_db_twin.get_database()
171183

@@ -344,30 +356,30 @@ class TestUpdating:
344356

345357
# Test updating a vector embedding using text on Vecto
346358
def test_update_single_text_vector_embedding(self):
347-
text = TestDataset.get_random_text(TestDataset.get_color_dataset)
348-
vector_ids = random.sample(range(len(text)), len(text))
359+
text = TestDataset.get_random_text(TestDataset.get_color_dataset)[0]
360+
global ingest_text_ids
361+
vector_id = ingest_text_ids[0]
349362

350363
updated_vector = []
351-
352-
for file, vector_id in zip(text, vector_ids):
353-
updated_vector.append({
364+
updated_vector.append({
354365
'id': vector_id,
355-
'data': io.StringIO(file),
366+
'data': io.StringIO(text),
356367
})
357368

358369
user_vecto.update_vector_embeddings(updated_vector, modality='TEXT')
359370

360371
# Test updating a vector embedding using image on Vecto
361372
def test_update_single_image_vector_embedding(self):
362-
image = TestDataset.get_random_image()
363-
vector_ids = random.sample(range(len(image)), len(image))
373+
image = TestDataset.get_random_image()[0]
374+
375+
global ingest_image_ids
376+
vector_id = ingest_image_ids[0]
364377

365378
updated_vector = []
366379

367-
for file, vector_id in zip(image, vector_ids):
368-
updated_vector.append({
380+
updated_vector.append({
369381
'id': vector_id,
370-
'data': open(file, 'rb')
382+
'data': open(image, 'rb')
371383
})
372384

373385
user_vecto.update_vector_embeddings(updated_vector, modality='IMAGE')
@@ -378,7 +390,9 @@ def test_update_single_image_vector_embedding(self):
378390
# Test updating multiple vector embeddings using text on Vecto
379391
def test_update_batch_text_vector_embedding(self):
380392
text = TestDataset.get_color_dataset()[:5]
381-
vector_ids = random.sample(range(len(text)), len(text))
393+
394+
global ingest_text_ids
395+
vector_ids = ingest_text_ids[:5]
382396

383397
updated_vector = []
384398

@@ -394,8 +408,9 @@ def test_update_batch_text_vector_embedding(self):
394408
# Test updating multiple vector embeddings using image on Vecto
395409
def test_update_batch_image_vector_embedding(self):
396410
image = TestDataset.get_image_dataset()[:5]
397-
vector_ids = random.sample(range(len(image)), len(image))
398411

412+
global ingest_image_ids
413+
vector_ids = ingest_image_ids[:5]
399414
updated_vector = []
400415

401416
for file, vector_id in zip(image, vector_ids):
@@ -411,7 +426,12 @@ def test_update_batch_image_vector_embedding(self):
411426

412427
# Test updating attribute of a vector embedding on Vecto
413428
def test_update_single_vector_attribute(self):
414-
vector_id = random.randrange(0, 10)
429+
430+
response = user_vecto.lookup(io.StringIO('blue'), modality='TEXT', top_k=100)
431+
old_results = {result.id: result for result in response}
432+
433+
global ingest_text_ids
434+
vector_id = ingest_text_ids[0]
415435
new_attribute = 'new_attribute'
416436

417437
updated_attribute = [{
@@ -420,8 +440,7 @@ def test_update_single_vector_attribute(self):
420440
}]
421441

422442
user_vecto.update_vector_attribute(updated_attribute)
423-
ref_db = user_db_twin.get_database()
424-
443+
425444
# Just a dummy lookup to return the specified ID - check specific entry
426445
f = io.StringIO('blue')
427446
lookup_response = user_vecto.lookup(f, modality='TEXT', top_k=1, ids=vector_id)
@@ -433,23 +452,28 @@ def test_update_single_vector_attribute(self):
433452
# Just a dummy lookup to return all the data in the vector space - check other entries
434453
f = io.StringIO('blue')
435454
lookup_response = user_vecto.lookup(f, modality='TEXT', top_k=100)
436-
lookup_attribute = []
437-
438-
#need to iterate though this object
439-
for result in lookup_response:
440-
if result.id != vector_id:
441-
lookup_attribute.append([result.id, result.attributes])
455+
lookup_attribute = {result.id: result for result in lookup_response}
456+
442457
logger.info("Checking if other attribute is not updated...")
443-
for result in lookup_attribute:
444-
id = result[0]
445-
attribute = result[1]
446-
assert attribute == ref_db.iloc[id]['attribute']
458+
459+
for id, result in old_results.items():
460+
if id != vector_id: # Skip the updated id
461+
assert id in lookup_attribute, f"ID {id} is missing in the new results."
462+
assert result.attributes == lookup_attribute[id].attributes, \
463+
f"Attributes for ID {id} have changed."
464+
447465
logger.info("All other attribute unchanged.")
448466

449467
# Test updating attribute of multiple vector embeddings on Vecto
450468
def test_update_vector_attribute(self):
469+
470+
response = user_vecto.lookup(io.StringIO('blue'), modality='TEXT', top_k=100)
471+
old_results = {result.id: result for result in response}
472+
451473
batch_len = 3
452-
vector_ids = random.sample(range(10), batch_len)
474+
475+
global ingest_text_ids
476+
vector_ids = ingest_text_ids[:3]
453477
new_attribute = ['new_attribute_{}'.format(i) for i in range(batch_len)]
454478

455479
updated_attribute = []
@@ -461,7 +485,6 @@ def test_update_vector_attribute(self):
461485
})
462486

463487
user_vecto.update_vector_attribute(updated_attribute)
464-
ref_db = user_db_twin.get_database()
465488

466489
# Just a dummy lookup to return all the data in the vector space - check other entries
467490
f = io.StringIO('blue')
@@ -478,17 +501,15 @@ def test_update_vector_attribute(self):
478501
# Just a dummy lookup to return all the data in the vector space - check other entries
479502
f = io.StringIO('blue')
480503
lookup_response = user_vecto.lookup(f, modality='TEXT', top_k=100)
481-
lookup_attribute = []
482-
for result in lookup_response:
483-
if result.id != vector_ids:
484-
lookup_attribute.append([result.id, result.attributes])
504+
lookup_attribute = {result.id: result for result in lookup_response}
505+
485506

486507
logger.info("Checking if other attribute is not updated...")
487-
for result in lookup_attribute:
488-
id = result[0]
489-
if id not in vector_ids:
490-
attribute = result[1]
491-
assert attribute == ref_db.iloc[id].attribute
508+
for id, result in old_results.items():
509+
if id not in vector_ids: # Correctly skip the updated ids
510+
assert id in lookup_attribute, f"ID {id} is missing in the new results."
511+
assert result.attributes == lookup_attribute[id].attributes, \
512+
f"Attributes for ID {id} have changed."
492513
logger.info("All other attribute unchanged.")
493514

494515
@pytest.mark.analogy
@@ -553,23 +574,19 @@ def test_delete_single_vector_embedding(self):
553574

554575
# Test deleting multiple vector embeddings from Vecto
555576
def test_delete_batch_vector_embedding(self):
556-
batch_len = 5
557-
vector_ids = []
558-
deleted_ids = user_db_twin.get_deleted_ids()
559-
while len(vector_ids) < batch_len:
560-
rand_id = random.randrange(0, 10)
561-
if rand_id not in deleted_ids and rand_id not in vector_ids:
562-
vector_ids.append(rand_id)
563-
user_vecto.delete_vector_embeddings(vector_ids)
564-
ref_db = user_db_twin.get_database()
565-
user_db_twin.update_deleted_ids(vector_ids)
566577

567578
f = io.StringIO('blue')
579+
original_response = user_vecto.lookup(f, modality='TEXT', top_k=100)
580+
581+
global ingest_text_ids
582+
deleted_vector_ids = ingest_text_ids
583+
584+
user_vecto.delete_vector_embeddings(deleted_vector_ids)
585+
568586
lookup_response = user_vecto.lookup(f, modality='TEXT', top_k=100)
569-
results = lookup_response
570587

571-
logger.info("Checking if the length of result is 6: " + str(len(results) == (len(ref_db) - len(deleted_ids))))
572-
assert len(results) is (len(ref_db) - len(deleted_ids))
588+
logger.info("Checking if the length of result: " + str(len(lookup_response) == (len(original_response) - len(deleted_vector_ids))))
589+
assert len(lookup_response) is (len(original_response) - len(deleted_vector_ids))
573590

574591

575592
@pytest.mark.exception

vecto/schema.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import sys
22
from typing import IO, NamedTuple, List, Optional
3+
from datetime import date
34

45
if sys.version_info >= (3, 8):
56
from typing import TypedDict
@@ -97,4 +98,25 @@ class VectoAnalogy(NamedTuple):
9798
textAnalogyExampleIds: List
9899
createdAt: str
99100
updatedAt: str
100-
vectorSpaceId: int
101+
vectorSpaceId: int
102+
103+
class DailyUsageMetric(NamedTuple):
104+
'''A named tuple that contains daily usage metrics.'''
105+
date: date
106+
count: int
107+
cumulativeCount: int
108+
109+
class UsageMetric(NamedTuple):
110+
'''A named tuple that contains usage metrics, including an array of daily metrics.'''
111+
dailyMetrics: List[DailyUsageMetric]
112+
113+
class VectoUsageMetrics(NamedTuple):
114+
'''A named tuple that contains Vecto usage metrics for lookups and indexing.'''
115+
lookups: UsageMetric
116+
indexing: UsageMetric
117+
118+
class MonthlyUsageResponse(NamedTuple):
119+
'''A named tuple that contains the usage metrics for a specified vector space and month.'''
120+
year: int
121+
month: int
122+
usage: VectoUsageMetrics

vecto/vecto_requests.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@
2020
import io
2121
import os
2222
import pathlib
23+
from datetime import date
2324

24-
from typing import IO, List, Union
25+
from typing import IO, List, Union, Any, Dict
2526
from .exceptions import (UnpairedAnalogy, InvalidModality, ModelNotFoundException )
2627

2728
from .schema import (VectoIngestData, VectoEmbeddingData, VectoAttribute, VectoAnalogyStartEnd,
2829
IngestResponse, LookupResult, VectoModel, VectoVectorSpace, VectoUser,
29-
VectoToken, VectoNewTokenResponse, MODEL_MAP, VectoAnalogy)
30+
VectoToken, VectoNewTokenResponse, MODEL_MAP, VectoAnalogy,
31+
DailyUsageMetric, UsageMetric, VectoUsageMetrics, MonthlyUsageResponse)
3032

3133
from .client import Client
3234
import vecto
@@ -916,3 +918,58 @@ def delete_analogy(self, vector_space_id:int, **kwargs):
916918
'''
917919
url = f"/api/v0/account/space/{vector_space_id}/analogy"
918920
self._client.delete(url, **kwargs)
921+
922+
###############
923+
# Metrics API #
924+
###############
925+
926+
def _parse_daily_usage_metric(self, d: Dict[str, Any]) -> DailyUsageMetric:
927+
return DailyUsageMetric(
928+
date=date.fromisoformat(d['date']),
929+
count=d['count'],
930+
cumulativeCount=d['cumulativeCount']
931+
)
932+
933+
def _parse_usage_metric(self, u: Dict[str, Any]) -> UsageMetric:
934+
return UsageMetric(
935+
dailyMetrics=[self._parse_daily_usage_metric(d) for d in u['dailyMetrics']]
936+
)
937+
938+
def _parse_vecto_usage_metrics(self, u: Dict[str, Any]) -> VectoUsageMetrics:
939+
return VectoUsageMetrics(
940+
lookups=self._parse_usage_metric(u['lookups']),
941+
indexing=self._parse_usage_metric(u['indexing'])
942+
)
943+
944+
def usage(self, year: int, month: int, vector_space_id: int = None, **kwargs) -> MonthlyUsageResponse:
945+
'''Return the usage metrics for the selected month
946+
947+
Args:
948+
year (int): The year for the usage data.
949+
month (int): The month for the usage data.
950+
vector_space_id (int, optional): The ID of the vector space. Defaults to None.
951+
**kwargs: Other keyword arguments for clients other than `requests`
952+
Returns:
953+
MonthlyUsageResponse: Named tuple that contains the usage metrics for a specified vector space and month.
954+
'''
955+
956+
# Use provided vector_space_id or fallback to self.vector_space_id
957+
vector_space_id = vector_space_id or getattr(self, 'vector_space_id', None)
958+
959+
# Raise an error if vector_space_id is still not available
960+
if vector_space_id is None:
961+
raise ValueError("A vector space ID must be provided either as a parameter or set in the instance.")
962+
963+
url = f"/api/v0/space/{vector_space_id}/usage/{year}/{month}"
964+
response = self._client.get(url, **kwargs)
965+
response_data = response.json()
966+
967+
usage_metrics = self._parse_vecto_usage_metrics(response_data['usage'])
968+
969+
monthly_usage_response = MonthlyUsageResponse(
970+
year=response_data['year'],
971+
month=response_data['month'],
972+
usage=usage_metrics
973+
)
974+
975+
return monthly_usage_response

0 commit comments

Comments
 (0)