4545user_vecto = Vecto (token , vector_space_id , vecto_base_url = vecto_base_url )
4646user_db_twin = DatabaseTwin ()
4747
48+ # IDs for update apis
49+ ingest_text_ids = None
50+ ingest_image_ids = None
51+
4852# Clear off vector space before start
4953@pytest .mark .clear
5054def test_clear_vector_space_entries ():
@@ -126,6 +130,10 @@ def test_ingest_image_with_valid_source(self):
126130 # for f in files:
127131 # f.close()
128132 results = response .ids
133+
134+ global ingest_image_ids
135+ ingest_image_ids = response .ids
136+
129137 user_db_twin .update_database (results , data ['data' ])
130138 ref_db = user_db_twin .get_database ()
131139
@@ -166,6 +174,10 @@ def test_ingest_text(self):
166174 attribute = TestDataset .get_text_attribute (batch .index .tolist ()[:5 ], batch .tolist ()[:5 ])
167175 response = user_vecto .ingest_text (batch .tolist ()[:5 ], attribute )
168176 results = response .ids
177+
178+ global ingest_text_ids
179+ ingest_text_ids = response .ids
180+
169181 user_db_twin .update_database (results , attribute )
170182 ref_db = user_db_twin .get_database ()
171183
@@ -344,30 +356,30 @@ class TestUpdating:
344356
345357 # Test updating a vector embedding using text on Vecto
346358 def test_update_single_text_vector_embedding (self ):
347- text = TestDataset .get_random_text (TestDataset .get_color_dataset )
348- vector_ids = random .sample (range (len (text )), len (text ))
359+ text = TestDataset .get_random_text (TestDataset .get_color_dataset )[0 ]
360+ global ingest_text_ids
361+ vector_id = ingest_text_ids [0 ]
349362
350363 updated_vector = []
351-
352- for file , vector_id in zip (text , vector_ids ):
353- updated_vector .append ({
364+ updated_vector .append ({
354365 'id' : vector_id ,
355- 'data' : io .StringIO (file ),
366+ 'data' : io .StringIO (text ),
356367 })
357368
358369 user_vecto .update_vector_embeddings (updated_vector , modality = 'TEXT' )
359370
360371 # Test updating a vector embedding using image on Vecto
361372 def test_update_single_image_vector_embedding (self ):
362- image = TestDataset .get_random_image ()
363- vector_ids = random .sample (range (len (image )), len (image ))
373+ image = TestDataset .get_random_image ()[0 ]
374+
375+ global ingest_image_ids
376+ vector_id = ingest_image_ids [0 ]
364377
365378 updated_vector = []
366379
367- for file , vector_id in zip (image , vector_ids ):
368- updated_vector .append ({
380+ updated_vector .append ({
369381 'id' : vector_id ,
370- 'data' : open (file , 'rb' )
382+ 'data' : open (image , 'rb' )
371383 })
372384
373385 user_vecto .update_vector_embeddings (updated_vector , modality = 'IMAGE' )
@@ -378,7 +390,9 @@ def test_update_single_image_vector_embedding(self):
378390 # Test updating multiple vector embeddings using text on Vecto
379391 def test_update_batch_text_vector_embedding (self ):
380392 text = TestDataset .get_color_dataset ()[:5 ]
381- vector_ids = random .sample (range (len (text )), len (text ))
393+
394+ global ingest_text_ids
395+ vector_ids = ingest_text_ids [:5 ]
382396
383397 updated_vector = []
384398
@@ -394,8 +408,9 @@ def test_update_batch_text_vector_embedding(self):
394408 # Test updating multiple vector embeddings using image on Vecto
395409 def test_update_batch_image_vector_embedding (self ):
396410 image = TestDataset .get_image_dataset ()[:5 ]
397- vector_ids = random .sample (range (len (image )), len (image ))
398411
412+ global ingest_image_ids
413+ vector_ids = ingest_image_ids [:5 ]
399414 updated_vector = []
400415
401416 for file , vector_id in zip (image , vector_ids ):
@@ -411,7 +426,12 @@ def test_update_batch_image_vector_embedding(self):
411426
412427 # Test updating attribute of a vector embedding on Vecto
413428 def test_update_single_vector_attribute (self ):
414- vector_id = random .randrange (0 , 10 )
429+
430+ response = user_vecto .lookup (io .StringIO ('blue' ), modality = 'TEXT' , top_k = 100 )
431+ old_results = {result .id : result for result in response }
432+
433+ global ingest_text_ids
434+ vector_id = ingest_text_ids [0 ]
415435 new_attribute = 'new_attribute'
416436
417437 updated_attribute = [{
@@ -420,8 +440,7 @@ def test_update_single_vector_attribute(self):
420440 }]
421441
422442 user_vecto .update_vector_attribute (updated_attribute )
423- ref_db = user_db_twin .get_database ()
424-
443+
425444 # Just a dummy lookup to return the specified ID - check specific entry
426445 f = io .StringIO ('blue' )
427446 lookup_response = user_vecto .lookup (f , modality = 'TEXT' , top_k = 1 , ids = vector_id )
@@ -433,23 +452,28 @@ def test_update_single_vector_attribute(self):
433452 # Just a dummy lookup to return all the data in the vector space - check other entries
434453 f = io .StringIO ('blue' )
435454 lookup_response = user_vecto .lookup (f , modality = 'TEXT' , top_k = 100 )
436- lookup_attribute = []
437-
438- #need to iterate though this object
439- for result in lookup_response :
440- if result .id != vector_id :
441- lookup_attribute .append ([result .id , result .attributes ])
455+ lookup_attribute = {result .id : result for result in lookup_response }
456+
442457 logger .info ("Checking if other attribute is not updated..." )
443- for result in lookup_attribute :
444- id = result [0 ]
445- attribute = result [1 ]
446- assert attribute == ref_db .iloc [id ]['attribute' ]
458+
459+ for id , result in old_results .items ():
460+ if id != vector_id : # Skip the updated id
461+ assert id in lookup_attribute , f"ID { id } is missing in the new results."
462+ assert result .attributes == lookup_attribute [id ].attributes , \
463+ f"Attributes for ID { id } have changed."
464+
447465 logger .info ("All other attribute unchanged." )
448466
449467 # Test updating attribute of multiple vector embeddings on Vecto
450468 def test_update_vector_attribute (self ):
469+
470+ response = user_vecto .lookup (io .StringIO ('blue' ), modality = 'TEXT' , top_k = 100 )
471+ old_results = {result .id : result for result in response }
472+
451473 batch_len = 3
452- vector_ids = random .sample (range (10 ), batch_len )
474+
475+ global ingest_text_ids
476+ vector_ids = ingest_text_ids [:3 ]
453477 new_attribute = ['new_attribute_{}' .format (i ) for i in range (batch_len )]
454478
455479 updated_attribute = []
@@ -461,7 +485,6 @@ def test_update_vector_attribute(self):
461485 })
462486
463487 user_vecto .update_vector_attribute (updated_attribute )
464- ref_db = user_db_twin .get_database ()
465488
466489 # Just a dummy lookup to return all the data in the vector space - check other entries
467490 f = io .StringIO ('blue' )
@@ -478,17 +501,15 @@ def test_update_vector_attribute(self):
478501 # Just a dummy lookup to return all the data in the vector space - check other entries
479502 f = io .StringIO ('blue' )
480503 lookup_response = user_vecto .lookup (f , modality = 'TEXT' , top_k = 100 )
481- lookup_attribute = []
482- for result in lookup_response :
483- if result .id != vector_ids :
484- lookup_attribute .append ([result .id , result .attributes ])
504+ lookup_attribute = {result .id : result for result in lookup_response }
505+
485506
486507 logger .info ("Checking if other attribute is not updated..." )
487- for result in lookup_attribute :
488- id = result [ 0 ]
489- if id not in vector_ids :
490- attribute = result [ 1 ]
491- assert attribute == ref_db . iloc [ id ]. attribute
508+ for id , result in old_results . items () :
509+ if id not in vector_ids : # Correctly skip the updated ids
510+ assert id in lookup_attribute , f"ID { id } is missing in the new results."
511+ assert result . attributes == lookup_attribute [ id ]. attributes , \
512+ f"Attributes for ID { id } have changed."
492513 logger .info ("All other attribute unchanged." )
493514
494515@pytest .mark .analogy
@@ -553,23 +574,19 @@ def test_delete_single_vector_embedding(self):
553574
554575 # Test deleting multiple vector embeddings from Vecto
555576 def test_delete_batch_vector_embedding (self ):
556- batch_len = 5
557- vector_ids = []
558- deleted_ids = user_db_twin .get_deleted_ids ()
559- while len (vector_ids ) < batch_len :
560- rand_id = random .randrange (0 , 10 )
561- if rand_id not in deleted_ids and rand_id not in vector_ids :
562- vector_ids .append (rand_id )
563- user_vecto .delete_vector_embeddings (vector_ids )
564- ref_db = user_db_twin .get_database ()
565- user_db_twin .update_deleted_ids (vector_ids )
566577
567578 f = io .StringIO ('blue' )
579+ original_response = user_vecto .lookup (f , modality = 'TEXT' , top_k = 100 )
580+
581+ global ingest_text_ids
582+ deleted_vector_ids = ingest_text_ids
583+
584+ user_vecto .delete_vector_embeddings (deleted_vector_ids )
585+
568586 lookup_response = user_vecto .lookup (f , modality = 'TEXT' , top_k = 100 )
569- results = lookup_response
570587
571- logger .info ("Checking if the length of result is 6 : " + str (len (results ) == (len (ref_db ) - len (deleted_ids ))))
572- assert len (results ) is (len (ref_db ) - len (deleted_ids ))
588+ logger .info ("Checking if the length of result: " + str (len (lookup_response ) == (len (original_response ) - len (deleted_vector_ids ))))
589+ assert len (lookup_response ) is (len (original_response ) - len (deleted_vector_ids ))
573590
574591
575592@pytest .mark .exception
0 commit comments