diff --git a/controller/config/reid-config.json b/controller/config/reid-config.json index dd513057e..d1b6a04d1 100644 --- a/controller/config/reid-config.json +++ b/controller/config/reid-config.json @@ -1,4 +1,5 @@ { + "similarity_metric": "L2", "stale_feature_timeout_secs": 5.0, "stale_feature_check_interval_secs": 1.0, "feature_accumulation_threshold": 12, diff --git a/controller/src/controller/data_source.py b/controller/src/controller/data_source.py index c854265f8..ef35ba7e2 100644 --- a/controller/src/controller/data_source.py +++ b/controller/src/controller/data_source.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod from pathlib import Path import json + from scene_common import log from scene_common.rest_client import RESTClient diff --git a/controller/src/controller/reid.py b/controller/src/controller/reid.py index 732128a78..32f255309 100644 --- a/controller/src/controller/reid.py +++ b/controller/src/controller/reid.py @@ -3,7 +3,59 @@ from abc import ABC, abstractmethod +import numpy as np + +from scene_common import log + class ReIDDatabase(ABC): + def prepareReidDict(self, embedding_vector, dimensions=None, + normalize_embeddings=False): + """Prepare a normalized/validated ReID payload from arbitrary vector shapes. + + Supports vectors shaped as (N,), (1, N), or any array-like object by + flattening to 1D. If dimensions is None, dimensions are inferred from the + flattened vector length. + """ + if embedding_vector is None: + log.warning("prepareReidDict: Empty embedding vector, skipping this vector") + return None + + vec_array = np.asarray(embedding_vector, dtype="float32").reshape(-1) + inferred_dimensions = int(vec_array.shape[0]) + expected_dimensions = inferred_dimensions if dimensions is None else int(dimensions) + + if inferred_dimensions != expected_dimensions: + log.warning( + f"prepareReidDict: Expected vector shape ({expected_dimensions},) but got {vec_array.shape}, skipping this vector") + return None + + if not np.all(np.isfinite(vec_array)): + log.warning("prepareReidDict: Vector contains non-finite values, skipping this vector") + return None + + if normalize_embeddings: + norm = np.linalg.norm(vec_array) + if not np.isfinite(norm) or norm == 0.0: + log.warning(f"prepareReidDict: Invalid vector norm ({norm}), skipping this vector") + return None + vec_array = vec_array / norm + + return { + "embedded_vector": vec_array.astype("float32", copy=False), + "dimensions": expected_dimensions, + } + + def prepareReidVector(self, reid_vector, dimensions, + normalize_embeddings=False): + """Backward-compatible wrapper returning only the prepared vector.""" + prepared_reid = self.prepareReidDict( + reid_vector, + dimensions, + normalize_embeddings=normalize_embeddings) + if prepared_reid is None: + return None + return prepared_reid["embedded_vector"] + @abstractmethod def connect(self, hostname): """ diff --git a/controller/src/controller/scene.py b/controller/src/controller/scene.py index 9ebcc4988..e77e412a4 100644 --- a/controller/src/controller/scene.py +++ b/controller/src/controller/scene.py @@ -3,19 +3,21 @@ from types import SimpleNamespace from typing import Optional + import numpy as np + import robot_vision as rv -from controller.controller_mode import ControllerMode -from controller.moving_object import ChainData from scene_common import log from scene_common.camera import Camera from scene_common.earth_lla import convertLLAToECEF, calculateTRSLocal2LLAFromSurfacePoints -from scene_common.geometry import Line, Point, Region, Tripwire, getRegionEvents, getTripwireEvents +from scene_common.geometry import Point, Region, Tripwire, getRegionEvents, getTripwireEvents from scene_common.scene_model import SceneModel from scene_common.timestamp import get_epoch_time, get_iso_time from scene_common.transform import CameraPose from scene_common.mesh_util import getMeshAxisAlignedProjectionToXY, createRegionMesh, createObjectMesh +from controller.controller_mode import ControllerMode +from controller.moving_object import ChainData from controller.ilabs_tracking import IntelLabsTracking from controller.time_chunking import TimeChunkedIntelLabsTracking, DEFAULT_CHUNKING_RATE_FPS from controller.tracking import (MAX_UNRELIABLE_TIME, diff --git a/controller/src/controller/uuid_manager.py b/controller/src/controller/uuid_manager.py index c02d1884a..0358a4e61 100644 --- a/controller/src/controller/uuid_manager.py +++ b/controller/src/controller/uuid_manager.py @@ -4,16 +4,18 @@ import collections import concurrent.futures import threading +import math import numpy as np -from controller.vdms_adapter import VDMSDatabase +from controller.vdms_adapter import VDMSDatabase, COSINE_SIMILARITY_TOLERANCE from controller.moving_object import ReidState, MovingObject from scene_common import log from scene_common.timestamp import get_epoch_time DEFAULT_DATABASE = "VDMS" -DEFAULT_SIMILARITY_THRESHOLD = 40 +DEFAULT_SIMILARITY_THRESHOLD_L2 = 40.0 +DEFAULT_SIMILARITY_THRESHOLD_COSINE = 0.5 DEFAULT_MINIMUM_BBOX_AREA = 5000 DEFAULT_MINIMUM_FEATURE_COUNT = 12 DEFAULT_FEATURE_SLICE_SIZE = 10 @@ -21,11 +23,61 @@ DEFAULT_MAX_SIMILARITY_QUERIES_TRACKED = 10 DEFAULT_STALE_FEATURE_TIMEOUT_SECS = 5.0 DEFAULT_STALE_FEATURE_CHECK_INTERVAL_SECS = 1.0 +DEFAULT_SIMILARITY_METRIC = "L2" +SUPPORTED_SIMILARITY_METRICS = {"COSINE", "L2"} +# Tolerance applied to the theoretical [-1, 1] IP score bounds to absorb +# float32 rounding errors from VDMS normalization and inner-product computation. available_databases = { "VDMS": VDMSDatabase, } class UUIDManager: + def _normalizeSimilarityMetric(self, metric): + normalized_metric = str(metric).strip().upper() + if normalized_metric not in SUPPORTED_SIMILARITY_METRICS: + log.warning( + f"Unsupported similarity_metric '{metric}', " + f"supported values are {sorted(SUPPORTED_SIMILARITY_METRICS)}; " + f"falling back to {DEFAULT_SIMILARITY_METRIC}") + return DEFAULT_SIMILARITY_METRIC + return normalized_metric + + def _resolveDatabaseSimilarityMetric(self, configured_metric): + """Translate controller-facing similarity metric to the VDMS descriptor metric.""" + metric = self._normalizeSimilarityMetric(configured_metric) + if metric == "COSINE": + return "IP" + return metric + + def _resolveDefaultSimilarityThreshold(self, similarity_metric): + """Return the default threshold for the configured similarity metric.""" + if self._normalizeSimilarityMetric(similarity_metric) == "COSINE": + return DEFAULT_SIMILARITY_THRESHOLD_COSINE + return DEFAULT_SIMILARITY_THRESHOLD_L2 + + def _validateSimilarityThreshold(self, similarity_threshold, similarity_metric): + """Normalize and validate the configured threshold for the active metric.""" + try: + normalized_threshold = float(similarity_threshold) + except (TypeError, ValueError) as err: + raise ValueError( + f"similarity_threshold must be a finite numeric value, got {similarity_threshold}") from err + + if not math.isfinite(normalized_threshold): + raise ValueError( + f"similarity_threshold must be a finite numeric value, got {similarity_threshold}") + + normalized_metric = self._normalizeSimilarityMetric(similarity_metric) + if normalized_metric == "COSINE": + if normalized_threshold < -1.0 or normalized_threshold > 1.0: + raise ValueError( + "similarity_threshold for COSINE must be within [-1.0, 1.0]") + return normalized_threshold + + if normalized_threshold < 0.0: + raise ValueError("similarity_threshold for L2 must be non-negative") + return normalized_threshold + def __init__(self, database=DEFAULT_DATABASE, reid_config_data=None): self.active_ids = {} self.active_ids_lock = threading.Lock() @@ -34,6 +86,7 @@ def __init__(self, database=DEFAULT_DATABASE, reid_config_data=None): self.features_for_database_timestamps = {} # Track when features were added self.quality_features = {} self.unique_id_count = 0 + self.stale_feature_timer = None self.unique_id_count_lock = threading.Lock() # ReID embedding dimensions are inferred from the first observed embedding. @@ -79,12 +132,21 @@ def _applyReidConfig(self, reid_config_data=None): 'stale_feature_check_interval_secs', DEFAULT_STALE_FEATURE_CHECK_INTERVAL_SECS) self.minimum_feature_count = reid_config_data.get( 'feature_accumulation_threshold', DEFAULT_MINIMUM_FEATURE_COUNT) - self.similarity_threshold = reid_config_data.get( - 'similarity_threshold', DEFAULT_SIMILARITY_THRESHOLD) + self.similarity_metric = self._normalizeSimilarityMetric(reid_config_data.get( + 'similarity_metric', DEFAULT_SIMILARITY_METRIC)) + configured_similarity_threshold = reid_config_data.get('similarity_threshold') + if configured_similarity_threshold is None: + configured_similarity_threshold = self._resolveDefaultSimilarityThreshold( + self.similarity_metric) + self.similarity_threshold = self._validateSimilarityThreshold( + configured_similarity_threshold, self.similarity_metric) self.minimum_bbox_area = reid_config_data.get( 'minimum_bbox_area', DEFAULT_MINIMUM_BBOX_AREA) self.feature_slice_size = reid_config_data.get( 'feature_slice_size', DEFAULT_FEATURE_SLICE_SIZE) + if hasattr(self, 'reid_database') and self.reid_database is not None: + self.reid_database.similarity_metric = self._resolveDatabaseSimilarityMetric( + self.similarity_metric) def _rescheduleStaleFeatureTimer(self): """Cancel any existing stale-feature timer and start a new one.""" @@ -448,49 +510,119 @@ def parseQueryResults(self, similarity_scores, threshold=None): """ Check database for any similar objects and return an ID and similarity score. Uses a majority-vote strategy: a candidate UUID must appear in at least half of the - per-vector best matches whose distance is below the threshold to be accepted. - When multiple candidates qualify, the one with the lowest distance is returned. + per-vector best matches that pass the metric-specific threshold test to be accepted. + When multiple candidates qualify, the one with the best metric value is returned + according to descriptor semantics (highest for IP/COSINE, lowest for L2). @param similarity_scores The similarity scores obtained from the database query - @param threshold The maximum distance between Re-ID vectors still considered - a valid match; defaults to self.similarity_threshold - @return database_id UUID of the matched entry if a majority-vote match is found; - otherwise None - @return similarity Minimum distance to the matched entry if found; otherwise None + @param threshold Similarity threshold interpreted according to metric semantics: + - L2-style distance: lower is better, candidate must be < threshold + - IP-style score: higher is better, candidate must be > threshold + @return database_id Returns the ID of the matched entry from the database if one + is found; otherwise, returns None + @return similarity Similarity value returned by VDMS (`_distance` field) for + the matched entry if one is found; otherwise, return None """ if threshold is None: threshold = self.similarity_threshold + if not self._hasValidSimilarityScoreShape(similarity_scores): + log.warning( + "parseQueryResults: Invalid similarity_scores shape; expected list[list[entity]]. " + f"Received type={type(similarity_scores)}") + return None, None + if similarity_scores: - minimum_distances = [self._findMinimumDistance(entities) + metric_candidates = [self._findBestMetricCandidate(entities) for entities in similarity_scores] - distances_below_threshold = [(uuid, distance) for (uuid, distance) in - minimum_distances if - distance is not None and distance < threshold] - - if distances_below_threshold: - counter = collections.Counter(item[0] for item in distances_below_threshold) + qualifying_candidates = [(uuid, metric_value) for (uuid, metric_value) in + metric_candidates if + metric_value is not None and + self._isSimilarityMatch(metric_value, threshold)] + if qualifying_candidates: + counter = collections.Counter(item[0] for item in qualifying_candidates) most_common_uuid, count = counter.most_common(1)[0] - if count >= (len(minimum_distances) / 2): - similarity = min(item[1] for item in distances_below_threshold - if item[0] == most_common_uuid) + if count >= (len(metric_candidates) / 2): + similarity = self._pickBestMetricValue( + [item[1] for item in qualifying_candidates if item[0] == most_common_uuid]) return most_common_uuid, similarity return None, None - def _findMinimumDistance(self, entities): + def _hasValidSimilarityScoreShape(self, similarity_scores): + """Validate that query results follow the strict list-of-lists contract.""" + if not similarity_scores: + return True + + if not isinstance(similarity_scores, list): + return False + + return all(isinstance(item, list) for item in similarity_scores) + + def _isHigherBetterMetric(self): + """Return True when the configured descriptor metric uses higher-is-better semantics.""" + metric = getattr(self.reid_database, 'similarity_metric', None) + if metric is None: + return False + return str(metric).strip().upper() == "IP" + + def _isSimilarityMatch(self, metric_value, threshold): + """Evaluate threshold semantics according to the active descriptor metric.""" + if metric_value is None: + return False + + if not math.isfinite(metric_value): + return False + + if self._isHigherBetterMetric(): + # For IP metrics, scores must lie within [-1, 1] (normalized embeddings). + # Allow a small tolerance to absorb float32 rounding from VDMS computation. + if metric_value < -(1.0 + COSINE_SIMILARITY_TOLERANCE) or metric_value > (1.0 + COSINE_SIMILARITY_TOLERANCE): + return False + return metric_value > threshold + return metric_value < threshold + + def _pickBestMetricValue(self, metric_values): + """Pick best metric value according to descriptor metric semantics.""" + if not metric_values: + return None + if self._isHigherBetterMetric(): + return max(metric_values) + return min(metric_values) + + def _findBestMetricCandidate(self, entities): """ - Find the uuid with the minimum distance and the corresponding distance value. + Find the best candidate uuid and metric value according to descriptor semantics. - VDMS returns entities sorted ascending by _distance (closest first), so entities[0] - is always the best match. + The best match is selected from the provided entities based on the configured + descriptor metric semantics: higher values are better for higher-is-better + metrics, and lower values are better otherwise. Structure of entities: [{'uuid': , 'rvid': , '_distance': }, ...] """ + is_higher_better = self._isHigherBetterMetric() if entities: - minimum_distance_entity = entities[0] - return (minimum_distance_entity['uuid'], minimum_distance_entity['_distance']) + filtered_entities = [] + for entity in entities: + metric_value = entity.get('_distance') + if metric_value is None or not math.isfinite(metric_value): + continue + if is_higher_better and (metric_value < -(1.0 + COSINE_SIMILARITY_TOLERANCE) or metric_value > (1.0 + COSINE_SIMILARITY_TOLERANCE)): + log.warning( + f"Ignoring out-of-range IP similarity score {metric_value} " + f"for uuid={entity.get('uuid')}") + continue + filtered_entities.append(entity) + + if not filtered_entities: + return (None, None) + + if is_higher_better: + best_entity = max(filtered_entities, key=lambda x: x['_distance']) + else: + best_entity = min(filtered_entities, key=lambda x: x['_distance']) + return (best_entity['uuid'], best_entity['_distance']) return (None, None) def _activeGidIndex(self): diff --git a/controller/src/controller/vdms_adapter.py b/controller/src/controller/vdms_adapter.py index cf77005e1..a41d7af4b 100644 --- a/controller/src/controller/vdms_adapter.py +++ b/controller/src/controller/vdms_adapter.py @@ -18,6 +18,9 @@ K_NEIGHBORS = 1 SCHEMA_NAME = "reid_vector" SIMILARITY_METRIC = "L2" +# Tolerance applied to the theoretical [-1, 1] IP score bounds to absorb +# float32 rounding errors from VDMS normalization and inner-product computation. +COSINE_SIMILARITY_TOLERANCE = 1e-6 class VDMSDatabase(ReIDDatabase): def __init__(self, set_name=SCHEMA_NAME, @@ -38,6 +41,28 @@ def __init__(self, set_name=SCHEMA_NAME, self._schema_ready = False return + def _usesInnerProductMetric(self): + """Return True when descriptor metric is Inner Product.""" + metric = str(self.similarity_metric).strip().upper() + return metric == "IP" + + def _isValidSimilarityScore(self, score): + """Validate similarity score according to active metric semantics.""" + try: + value = float(score) + except (TypeError, ValueError): + return False + + if not np.isfinite(value): + return False + + # With normalized embeddings, Inner Product must stay within [-1, 1]. + # Allow a small tolerance to absorb float32 rounding from VDMS. + if self._usesInnerProductMetric() and (value < -(1.0 + COSINE_SIMILARITY_TOLERANCE) or value > (1.0 + COSINE_SIMILARITY_TOLERANCE)): + return False + + return True + def sendQuery(self, query, blob=None): """ Helper function for handling the responses from sending queries to VDMS. There are three @@ -78,13 +103,23 @@ def connect(self, hostname=DEFAULT_HOSTNAME): self.db.connect(hostname) if self.dimensions is not None: expected_dimensions = int(self.dimensions) + expected_metric = str(self.similarity_metric).strip().upper() with self._schema_lock: - schema_exists, schema_dimensions = self.findSchemaDetails(self.set_name) + schema_exists, schema_dimensions, schema_metric = self.findSchemaMetadata(self.set_name) if schema_exists: if schema_dimensions is None: raise RuntimeError( f"connect: VDMS descriptor set '{self.set_name}' exists but returned no dimensions. " "Refusing to proceed; recreate the descriptor set to continue.") + if schema_metric is None: + raise RuntimeError( + f"connect: VDMS descriptor set '{self.set_name}' exists but returned no metric. " + "Refusing to proceed; recreate the descriptor set to continue.") + if str(schema_metric).strip().upper() != expected_metric: + raise RuntimeError( + f"connect: VDMS descriptor set '{self.set_name}' uses metric {schema_metric}, " + f"but controller is configured for {expected_metric}. " + "Refusing to proceed; recreate the descriptor set with matching metric.") if schema_dimensions != expected_dimensions: raise RuntimeError( f"connect: VDMS descriptor set '{self.set_name}' uses {schema_dimensions} dimensions, " @@ -135,6 +170,7 @@ def ensureSchema(self, dimensions): """ with self._schema_lock: requested_dimensions = int(dimensions) + expected_metric = str(self.similarity_metric).strip().upper() if self._schema_ready: if int(self.dimensions) != requested_dimensions: raise ValueError( @@ -142,12 +178,21 @@ def ensureSchema(self, dimensions): f"incoming vector has {requested_dimensions} dimensions. " f"Restart the controller and flush the VDMS descriptor set to change dimensions.") return - schema_exists, schema_dimensions = self.findSchemaDetails(self.set_name) + schema_exists, schema_dimensions, schema_metric = self.findSchemaMetadata(self.set_name) if schema_exists: if schema_dimensions is None: raise RuntimeError( f"ensureSchema: VDMS descriptor set '{self.set_name}' exists but dimensions were not returned. " "Refusing to proceed; recreate the descriptor set to continue.") + if schema_metric is None: + raise RuntimeError( + f"ensureSchema: VDMS descriptor set '{self.set_name}' exists but metric was not returned. " + "Refusing to proceed; recreate the descriptor set to continue.") + if str(schema_metric).strip().upper() != expected_metric: + raise RuntimeError( + f"ensureSchema: VDMS descriptor set '{self.set_name}' uses metric {schema_metric}, " + f"but controller is configured for {expected_metric}. " + "Refusing to proceed; recreate the descriptor set with matching metric.") if schema_dimensions != requested_dimensions: raise RuntimeError( f"ensureSchema: VDMS descriptor set '{self.set_name}' uses {schema_dimensions} dimensions, " @@ -199,27 +244,22 @@ def addEntry(self, uuid, rvid, object_type, reid_vectors, set_name=SCHEMA_NAME, # Store as string properties[key] = str(value) - query = { - "AddDescriptor": { - "set": f"{set_name}", - "properties": properties - } - } # Convert vectors to JSON-serializable format (float32 -> float) and to bytes # VDMS API expects: query([q1, q2, ...], [blob1, blob2, ...]) # Blobs are consumed sequentially, one per AddDescriptor query (flat list) descriptor_blobs = [] add_query = [] + normalize_embeddings = self._usesInnerProductMetric() + for reid_vector in reid_vectors: - # Decoded embeddings from decodeReIDEmbeddingVector are (1, N); flatten to - # (N,) so tobytes() produces the correct contiguous float32 byte sequence. - vec_array = np.asarray(reid_vector, dtype="float32").flatten() - if self.dimensions is None: - log.warning("addEntry: ReID dimensions not yet initialized, skipping vector") - continue - if vec_array.shape[0] != self.dimensions: - log.warning(f"addEntry: Expected vector shape ({self.dimensions},) but got {vec_array.shape}, skipping this vector") + prepared_reid = self.prepareReidDict( + reid_vector, + self.dimensions, + normalize_embeddings=normalize_embeddings) + if prepared_reid is None: continue + + vec_array = prepared_reid["embedded_vector"] descriptor_blobs.append(vec_array.tobytes()) # Create query dict for each vector add_query.append({ @@ -251,6 +291,10 @@ def findSchema(self, set_name): return schema_exists def findSchemaDetails(self, set_name): + schema_exists, schema_dimensions, _ = self.findSchemaMetadata(set_name) + return schema_exists, schema_dimensions + + def findSchemaMetadata(self, set_name): query = [{ "FindDescriptorSet": { "set": f"{set_name}" @@ -258,13 +302,14 @@ def findSchemaDetails(self, set_name): }] response, _ = self.sendQuery(query) if not response: - return False, None + return False, None, None first_response = response[0] if first_response.get('status') != 0 or first_response.get('returned', 0) <= 0: - return False, None + return False, None, None schema_dimensions = self._extractSchemaDimensions(first_response) - return True, schema_dimensions + schema_metric = self._extractSchemaMetric(first_response) + return True, schema_dimensions, schema_metric def _extractSchemaDimensions(self, find_descriptor_set_response): # VDMS responses may return descriptor set fields at the top level or nested under @@ -288,7 +333,24 @@ def _extractSchemaDimensions(self, find_descriptor_set_response): return None return None - def _build_query_constraints(self, object_type, **constraints): + def _extractSchemaMetric(self, find_descriptor_set_response): + # VDMS responses may return descriptor set fields at the top level or nested under + # common payload keys like "entities" or "content". + payloads = [find_descriptor_set_response] + for key in ['entities', 'entity', 'content', 'results', 'DescriptorSet']: + value = find_descriptor_set_response.get(key) + if isinstance(value, dict): + payloads.append(value) + elif isinstance(value, list): + payloads.extend(item for item in value if isinstance(item, dict)) + + for payload in payloads: + for key in ['metric', 'distance_metric', 'similarity_metric']: + if key in payload and payload[key] is not None: + return str(payload[key]) + return None + + def _buildQueryConstraints(self, object_type, **constraints): """ Build query constraints for TIER 1 metadata filtering. @@ -382,7 +444,7 @@ def findMatches(self, object_type, reid_vectors, set_name=SCHEMA_NAME, log.debug(f"[VDMS] findMatches constraints received: {constraints}") # TIER 1: Build dynamic constraints for metadata filtering - query_constraints = self._build_query_constraints(object_type, **constraints) + query_constraints = self._buildQueryConstraints(object_type, **constraints) find_query = { "FindDescriptor": { @@ -404,12 +466,21 @@ def findMatches(self, object_type, reid_vectors, set_name=SCHEMA_NAME, # TIER 2: Vector similarity search on filtered candidates blob = [] + normalize_embeddings = self._usesInnerProductMetric() for reid_vector in reid_vectors: - # Ensure vector is float32, then convert to bytes for VDMS - vec_array = np.array(reid_vector, dtype="float32") + vec_array = self.prepareReidVector( + reid_vector, + self.dimensions, + normalize_embeddings=normalize_embeddings) + if vec_array is None: + continue blob.append(vec_array.tobytes()) # Flat list of blobs - query = [find_query] * len(reid_vectors) + if len(blob) == 0: + log.warning("findMatches: No valid vectors for similarity search") + return None + + query = [find_query] * len(blob) response, _ = self.sendQuery(query, blob) log.debug(f"[VDMS] Raw VDMS response (truncated): status={response[0].get('status') if response else 'None'}, returned={response[0].get('returned') if response else 'None'}") @@ -417,12 +488,31 @@ def findMatches(self, object_type, reid_vectors, set_name=SCHEMA_NAME, log.debug(f"[VDMS] Full first response: {response[0]}") if response: - result = [ - item.get('entities') - for item in response - if (item.get('status') == 0 and item.get('returned') > 0) - ] - log.debug(f"[VDMS] findMatches returned {len(result)} result(s) from {len(reid_vectors)} vector(s)") + result = [] + for item in response: + if item.get('status') != 0 or item.get('returned') <= 0: + continue + + valid_entities = [] + for entity in item.get('entities', []): + similarity = entity.get('_distance') + if self._isValidSimilarityScore(similarity): + valid_entities.append(entity) + else: + log.warning( + f"findMatches: Discarding entity with invalid similarity score " + f"{similarity} for metric {self.similarity_metric}") + + # Preserve 1:1 correspondence between query vectors and per-vector responses. + # A successful query response with only invalid entities should still count as + # "no usable match" for downstream majority-vote logic. + result.append(valid_entities) + + log.debug( + "[VDMS] findMatches returned %d per-vector result item(s) from %d valid " + "query vector(s); VDMS response items=%d, input vectors=%d", + len(result), len(blob), len(response), len(reid_vectors)) + return result log.debug("[VDMS] findMatches returned None (no response from VDMS)") return None diff --git a/controller/tests/test_reid_state_tracking.py b/controller/tests/test_reid_state_tracking.py index 75d778e39..2e84c076d 100644 --- a/controller/tests/test_reid_state_tracking.py +++ b/controller/tests/test_reid_state_tracking.py @@ -537,5 +537,51 @@ def test_update_active_dict_never_sets_matched_with_null_similarity(self): assert not (obj.reid_state == ReidState.MATCHED and obj.similarity is None) +class TestUUIDManagerSimilarityThresholdValidation: + """Test metric-aware validation of configured similarity thresholds.""" + + def test_rejects_negative_l2_similarity_threshold(self): + with pytest.raises(ValueError, match="similarity_threshold for L2 must be non-negative"): + UUIDManager(reid_config_data={ + 'similarity_metric': 'L2', + 'similarity_threshold': -0.1, + 'stale_feature_check_interval_secs': 3600, + }) + + @pytest.mark.parametrize('invalid_threshold', [-1.1, 1.1]) + def test_rejects_out_of_range_cosine_similarity_threshold(self, invalid_threshold): + with pytest.raises( + ValueError, + match=r"similarity_threshold for COSINE must be within \[-1.0, 1.0\]", + ): + UUIDManager(reid_config_data={ + 'similarity_metric': 'COSINE', + 'similarity_threshold': invalid_threshold, + 'stale_feature_check_interval_secs': 3600, + }) + + @pytest.mark.parametrize( + ('metric', 'threshold'), + [ + ('L2', 0.0), + ('L2', 40.0), + ('COSINE', -1.0), + ('COSINE', 0.5), + ('COSINE', 1.0), + ], + ) + def test_accepts_thresholds_at_valid_metric_boundaries(self, metric, threshold): + manager = UUIDManager(reid_config_data={ + 'similarity_metric': metric, + 'similarity_threshold': threshold, + 'stale_feature_check_interval_secs': 3600, + }) + + try: + assert manager.similarity_threshold == threshold + finally: + manager.shutdown() + + if __name__ == '__main__': pytest.main([__file__, '-v']) diff --git a/docs/adr/0006-build-time-model-selection-mapping-service.md b/docs/adr/0006-build-time-model-selection-mapping-service.md index daba2c98e..72044d196 100644 --- a/docs/adr/0006-build-time-model-selection-mapping-service.md +++ b/docs/adr/0006-build-time-model-selection-mapping-service.md @@ -1,6 +1,6 @@ -# ADR 6: Build-Time Model Selection for 3D Mapping Service +# ADR 6: 3D Mapping Service -- **Author(s)**: [Intel SceneScape Team](https://github.com/open-edge-platform/scenescape) +- **Author(s)**: Sarat Poluri - **Date**: 2025-10-30 - **Status**: `Accepted` diff --git a/docs/adr/0010-reid-metadata-storage-architecture.md b/docs/adr/0010-reid-metadata-storage-architecture.md index 77a84e48a..848937930 100644 --- a/docs/adr/0010-reid-metadata-storage-architecture.md +++ b/docs/adr/0010-reid-metadata-storage-architecture.md @@ -44,7 +44,6 @@ Detection → Metadata Extraction → Vector Generation → Storage ┌─────────────────────────┐ │ Binary Blob (Vector) │ │ • 256-dim float32 vec │ - │ • L2 similarity metric │ └─────────────────────────┘ Query → Extract Vector → Build Constraints → VDMS Search @@ -54,7 +53,7 @@ Query → Extract Vector → Build Constraints → VDMS Search TIER 1: Database-level metadata filtering ↓ - TIER 2: L2 distance + TIER 2: Vector similarity on filtered candidates ``` @@ -72,7 +71,7 @@ Query → Extract Vector → Build Constraints → VDMS Search - Executed inside VDMS before vector search - Reduces candidate set significantly - **TIER 2**: Vector similarity search on filtered candidates - - L2 distance on 256-dim embeddings + - Similarity search on 256-dim embeddings - Only processes constrained candidates - Returns top-k results with metadata diff --git a/docs/adr/0011-inner-product-reid-state-and-id-lineage.md b/docs/adr/0011-inner-product-reid-state-and-id-lineage.md new file mode 100644 index 000000000..ced88dc3c --- /dev/null +++ b/docs/adr/0011-inner-product-reid-state-and-id-lineage.md @@ -0,0 +1,137 @@ + + + +# ADR 11: Configurable ReID Similarity Metric and Track Lineage Output + +- **Author(s)**: Sarat Poluri, GitHub Copilot +- **Date**: 2026-04-22 +- **Status**: `Accepted` + +## Context + +The controller now supports choosing the ReID similarity metric via runtime configuration (`reid-config.json`, `similarity_metric`). The configuration default is `L2`. + +Because VDMS descriptor search uses Inner Product (`IP`) for this flow, the controller maps configured `COSINE` to VDMS `IP` at the database boundary. + +The configuration contract allows only `COSINE` and `L2` values. `IP` is an internal VDMS execution detail, not a user-facing config option. + +For `IP`, the controller normalizes ReID embedding vectors before storing and querying in VDMS. With normalized vectors, the metric value returned by VDMS is directly interpretable as cosine similarity and is expected to lie in the range `[-1, 1]`. + +This matters specifically for **visual embeddings** because most modern ReID models are trained so that identity similarity is expressed primarily through **vector direction**, not vector magnitude. After normalization, the useful signal is the angular agreement between two embeddings. In that setting, Inner Product directly measures the quantity we care about. + +For unit-normalized vectors $x$ and $y$: + +$$ +\lVert x - y \rVert^2 = 2 - 2(x \cdot y) +$$ + +This means L2 distance and Inner Product induce the same ranking once vectors are normalized. However, they do not provide the same **operational semantics**. + +At the same time, downstream consumers need to distinguish between multiple operational states that were previously ambiguous in scene output: + +- A track that is still collecting embeddings and has not queried the database yet. +- A track that queried the database and found no match. +- A track that successfully matched a prior identity. +- A track for which ReID has been disabled. + +For post-mortem stitching analysis, operators also need more than the current `id` and `similarity` fields. They need a durable per-track history of which global IDs were assigned over time, when each assignment happened, and whether the assignment came from a successful ReID match or from the no-match path. + +## Decision + +We will make three related changes. + +1. Make ReID similarity metric **configurable**, with `L2` as the configuration default. +2. Expose explicit **ReID state** on tracked objects so scene output distinguishes query lifecycle from match outcome. +3. Persist and publish a **previous_ids_chain** for each track so identity transitions can be reconstructed after the fact. + +### Metric Decision + +- Similarity metric is configured at runtime (`similarity_metric`) and propagated into VDMS operations. +- Supported configured metrics are `COSINE` and `L2`. +- Unsupported configured metric values fall back to `L2`. +- Configured `COSINE` is translated to VDMS `IP` at the adapter boundary so configuration semantics stay model-friendly while database semantics stay VDMS-compatible. +- For `IP`, ReID vectors are normalized before storage/query and returned values are expected to be finite in `[-1, 1]`. +- For non-`IP` metrics (for example `L2`), vectors are not force-normalized by this controller path, and `[-1, 1]` range validation is not applied. +- Match threshold interpretation is metric-aware: + - `IP`/similarity-style metrics: higher values are better, match when value > threshold. + - Distance-style metrics (for example `L2`): lower values are better, match when value < threshold. + +### Why Keep `COSINE` Available (with `IP` in VDMS) + +- With normalized embeddings, `IP` is equivalent to cosine similarity, which matches how visual ReID embeddings are typically interpreted. +- L2 and `IP` produce the same neighbor ordering after normalization, but `IP` yields a score where: + - `1` means identical direction / strongest possible match, + - `0` means orthogonal / unrelated, + - `-1` means maximally opposed. +- That bounded and signed score is easier to reason about than L2 distance, where **smaller is better** and the practical range depends on normalization assumptions. +- The controller already exposes a `similarity` field and applies a `similarity_threshold`. `L2` uses distance-style (lower-is-better) interpretation by default, while `COSINE` remains available and is executed via equivalent VDMS `IP`. +- `IP` gives a stable contract for downstream systems, logs, tests, and operators: higher is always better, the range is bounded, and invalid values can be rejected with a simple `[-1, 1]` check. +- Because ranking can be equivalent under normalization, allowing configuration supports experimentation while keeping `L2` as the stable default contract and `COSINE` as an explicit opt-in path executed via VDMS `IP`. + +### ReID State Decision + +Tracked objects expose a `reid_state` field with these values: + +- `pending_collection`: the controller is still collecting quality embeddings and has not completed a database query. +- `query_no_match`: a query was made and no usable database match was selected. +- `matched`: a query produced a valid reusable database identity. +- `reid_disabled`: ReID is disabled and no query will be attempted. + +This state is carried on the `MovingObject`, updated by the UUID manager, and emitted in controller output for downstream logic. + +### ID Lineage Decision + +Each tracked object maintains `previous_ids_chain`, a chronological list of entries shaped as: + +```text +{ + "id": , + "timestamp": , + "similarity_score": +} +``` + +The chain is updated whenever UUID assignment is finalized: + +- On a successful ReID match, the matched database ID is appended with the associated similarity score. +- On a no-match outcome, the newly assigned controller-generated ID is appended with `similarity_score = null`. + +When serialized in controller output, timestamps are normalized to ISO 8601, and the field is omitted when no assignments have been recorded yet. + +## Alternatives Considered + +- Keep the previous metric behavior implicit. + - Rejected because downstream interpretation of `similarity` requires explicit metric semantics. +- Hard-code `IP` and disallow alternatives. + - Rejected because teams need to evaluate metric choices per model/domain without code changes. +- Emit only `similarity` without an explicit `reid_state`. + - Rejected because `null` similarity alone cannot distinguish "not queried yet", "no match", and "ReID disabled". +- Emit only the latest assigned ID. + - Rejected because it prevents post-mortem reconstruction of identity stitching decisions and removes valuable debugging data for false merges or missed matches. +- Store lineage only in logs. + - Rejected because logs are incomplete as an API contract and are harder for downstream systems and tests to consume deterministically. + +## Consequences + +### Positive + +- Similarity scoring semantics are explicit and metric-aware. +- For the default `IP` path, normalized embeddings plus `IP` yields a bounded score in `[-1, 1]`, and invalid values are rejected early. +- Downstream consumers can branch reliably on `reid_state` instead of inferring intent from `similarity` alone. +- Operators can reconstruct identity evolution for a track using `previous_ids_chain`. +- Unit and functional verification become easier because query lifecycle and assignment outcomes are visible in structured output. + +### Negative + +- Controller output now carries additional state that downstream consumers must understand and preserve. +- `previous_ids_chain` increases payload size for long-lived tracks. +- Operators must understand that threshold direction depends on metric choice. +- The `IP` safety checks (`[-1, 1]`) are metric-specific and intentionally not applied to non-`IP` metrics. + +## References + +- `controller/src/controller/vdms_adapter.py` +- `controller/src/controller/uuid_manager.py` +- `controller/src/controller/moving_object.py` +- `controller/src/controller/detections_builder.py` +- `docs/user-guide/microservices/controller/data_formats.md` diff --git a/docs/user-guide/microservices/controller/Extended-ReID.md b/docs/user-guide/microservices/controller/Extended-ReID.md index 799f4468e..4b88d74a2 100644 --- a/docs/user-guide/microservices/controller/Extended-ReID.md +++ b/docs/user-guide/microservices/controller/Extended-ReID.md @@ -24,13 +24,30 @@ VDMS Query Flow: "Find entries where type='Person' AND gender='Female' AND age='22'" ↓ TIER 2: VDMS performs vector similarity on filtered candidates - "Compute L2 distance between query vector and filtered candidates" + "Compute configured similarity metric value between query vector and filtered candidates" ↓ Return top-k matches with metadata ``` ## Key Concepts +### Similarity Metric and Score Semantics + +The Re-ID metric is configured through `reid-config.json` (`similarity_metric`) and defaults to `L2`. + +When `similarity_metric` is `COSINE`, Re-ID embedding vectors are normalized to unit length before they are: + +- stored in VDMS (`AddDescriptor`) +- used as query vectors in VDMS (`FindDescriptor`) + +For `COSINE`, SceneScape uses VDMS `IP` internally with normalized vectors, so similarity scores are expected to stay in the range `[-1, 1]`. + +- `1.0`: identical direction (most similar) +- `0.0`: orthogonal embeddings +- `-1.0`: opposite direction + +The controller validates returned similarity scores for the normalized-cosine path (`COSINE` mapped to VDMS `IP`) and discards out-of-range values. For non-cosine distance metrics (for example `L2`), vectors are not force-normalized and this `[-1, 1]` check is not applied. + ### Confidence-Based Constraint Filtering (AND-Only) The 2-tier implementation uses metadata confidence scores to determine which constraints are applied in TIER 1 filtering. **Only high-confidence (≥ 0.8) constraints are used for strict AND filtering**. Low-confidence constraints are skipped in TIER 1, allowing TIER 2 vector similarity to handle flexible matching: @@ -148,25 +165,29 @@ controller/config/reid-config.json ```json { + "similarity_metric": "L2", "stale_feature_timeout_secs": 5.0, "stale_feature_check_interval_secs": 1.0, "feature_accumulation_threshold": 12, "minimum_bbox_area": 5000, "feature_slice_size": 10, - "similarity_threshold": 30.0 + "similarity_threshold": 40.0 } ``` ### Configuration Parameters -| Parameter | Type | Default | Description | -| ----------------------------------- | ----- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `stale_feature_timeout_secs` | float | 5.0 | How long (seconds) to accumulate features in memory before flushing to VDMS. Features older than this threshold are persisted to the database for long-term storage. | -| `stale_feature_check_interval_secs` | float | 1.0 | How frequently (seconds) the background timer checks for stale features and flushes them to VDMS. More frequent checks ensure timely database updates. | -| `feature_accumulation_threshold` | int | 12 | Minimum number of quality features required before initiating a similarity query against the database. More features = higher statistical confidence in matching. | -| `minimum_bbox_area` | int | 5000 | Minimum bounding-box area in pixels required before a detected object contributes a ReID embedding to quality feature accumulation. | -| `feature_slice_size` | int | 10 | When persisting features to VDMS, sample every Nth feature vector from the accumulated set to reduce database bloat. Example: slice_size=10 stores every 10th vector. | -| `similarity_threshold` | float | 40.0 | Maximum L2 distance from VDMS for a candidate to be accepted as a match. Candidates with distance **below** this threshold are considered valid. Lower values = stricter matching. | +| Parameter | Type | Default | Description | +| ----------------------------------- | ------ | ------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `similarity_metric` | string | `L2` | Similarity metric for ReID matching. `L2` is the default distance-style metric (lower-is-better). `COSINE` is implemented using normalized vectors with VDMS `IP` (higher-is-better). | +| `stale_feature_timeout_secs` | float | 5.0 | How long (seconds) to accumulate features in memory before flushing to VDMS. Features older than this threshold are persisted to the database for long-term storage. | +| `stale_feature_check_interval_secs` | float | 1.0 | How frequently (seconds) the background timer checks for stale features and flushes them to VDMS. More frequent checks ensure timely database updates. | +| `feature_accumulation_threshold` | int | 12 | Minimum number of quality features required before initiating a similarity query against the database. More features = higher statistical confidence in matching. | +| `minimum_bbox_area` | int | 5000 | Minimum bounding-box area in pixels required before a detected object contributes a ReID embedding to quality feature accumulation. | +| `feature_slice_size` | int | 10 | When persisting features to VDMS, sample every Nth feature vector from the accumulated set to reduce database bloat. Example: slice_size=10 stores every 10th vector. | +| `similarity_threshold` | float | metric-dependent (`40.0` for `L2`, `0.5` for `COSINE`) | Match acceptance threshold interpreted using the configured metric semantics: for `COSINE`, candidates **above** the threshold match; for `L2`-style distance metrics, candidates **below** the threshold match. | + +**Similarity range note**: For `COSINE` (implemented via VDMS `IP`), scores are validated against `[-1, 1]` because embeddings are normalized before storage and query. This range check is metric-specific and is not applied to non-cosine distance metrics. ### Embedding Dimension Inference @@ -190,7 +211,7 @@ python scene_controller.py \ **Current Implementation Note**: -- `stale_feature_timeout_secs`, `stale_feature_check_interval_secs`, `feature_accumulation_threshold`, `minimum_bbox_area`, `feature_slice_size`, and `similarity_threshold` are fully implemented. +- `similarity_metric`, `stale_feature_timeout_secs`, `stale_feature_check_interval_secs`, `feature_accumulation_threshold`, `minimum_bbox_area`, `feature_slice_size`, and `similarity_threshold` are fully implemented - ReID embedding dimensions are inferred at runtime from the first received embedding; there is no configuration override for dimension. - All semantic metadata attributes are currently used for TIER 1 filtering. Selective metadata filtering is planned for Phase 2. @@ -201,7 +222,9 @@ python scene_controller.py \ - Decrease `stale_feature_timeout_secs`: 3.0 (flush features sooner, capture recent appearances) - Decrease `stale_feature_check_interval_secs`: 0.5 (check for stale features more frequently) - Decrease `feature_accumulation_threshold`: 8 (query sooner with fewer features) -- Increase `similarity_threshold`: 60.0 (accept less-perfect matches, wider distance budget) +- `similarity_threshold` — direction depends on the configured metric: + - **`L2` (default)**: _Increase_ the threshold (e.g., 50.0) to accept candidates further away → more matches + - **`COSINE`**: _Decrease_ the threshold (e.g., 0.2) to accept candidates with lower cosine similarity → more matches - Increase `feature_slice_size`: 20 (store more diverse samples) **For Higher Precision (only confident matches)**: @@ -209,7 +232,9 @@ python scene_controller.py \ - Increase `stale_feature_timeout_secs`: 8.0 (accumulate more features before persisting) - Increase `stale_feature_check_interval_secs`: 2.0 (check less frequently, reduce overhead) - Increase `feature_accumulation_threshold`: 16 (require more samples for statistical confidence) -- Decrease `similarity_threshold`: 20.0 (stricter matching, tighter distance budget) +- `similarity_threshold` — direction depends on the configured metric: + - **`L2` (default)**: _Decrease_ the threshold (e.g., 20.0) so only close-distance candidates match → fewer, more confident matches + - **`COSINE`**: _Increase_ the threshold (e.g., 0.8) to accept only high-cosine-similarity candidates → fewer, more confident matches - Decrease `feature_slice_size`: 5 (store every 5th feature for better coverage) ### Future Extensibility diff --git a/docs/user-guide/microservices/controller/controller.md b/docs/user-guide/microservices/controller/controller.md index a28673ac4..ab62ce228 100644 --- a/docs/user-guide/microservices/controller/controller.md +++ b/docs/user-guide/microservices/controller/controller.md @@ -42,7 +42,9 @@ To deploy the scene controller service, refer to the [Get Started](./get-started `--tracker_config_file`: Path to the JSON file containing the tracker configuration. This file is used to enable and manage time-based parameters for the tracker. -`--reid_config_file`: Path to the JSON file containing Re-ID (Re-Identification) configuration. This file controls Re-ID specific settings such as stale feature timeout, feature accumulation thresholds, and similarity scoring. See [Extended Re-ID](./Extended-ReID.md) for details. +`--reid_config_file`: Path to the JSON file containing Re-ID (Re-Identification) configuration. This file controls Re-ID specific settings such as stale feature timeout, feature accumulation thresholds, similarity metric selection, and similarity scoring. See [Extended Re-ID](./Extended-ReID.md) for details. + +When `similarity_metric` is `L2` (default), Re-ID vectors follow distance-style matching semantics (lower values are better). When `similarity_metric` is `COSINE`, Re-ID vectors are normalized before write/query and SceneScape uses VDMS `IP` internally; similarity scores are expected in `[-1, 1]`. `--schema_file`: Specifies the path to the JSON file that contains the metadata schema. By default, it uses [metadata.schema.json](https://github.com/open-edge-platform/scenescape/blob/main/controller/src/schema/metadata.schema.json). This schema outlines the structure and format of the messages processed by the service. diff --git a/docs/user-guide/microservices/controller/data_formats.md b/docs/user-guide/microservices/controller/data_formats.md index 9f7e52a32..5eed18bbc 100644 --- a/docs/user-guide/microservices/controller/data_formats.md +++ b/docs/user-guide/microservices/controller/data_formats.md @@ -199,7 +199,7 @@ tracked object contains the following fields: | `visibility` | array of string | Camera IDs currently observing this object | | `regions` | object | Map of region/sensor IDs to membership metadata. By default this is `{id: {entered: timestamp}}`. In region-scoped outputs, objects currently inside a region also include a live dwell time as `{id: {entered: timestamp, dwell: seconds}}`. | | `sensors` | object | Map of sensor IDs to timestamped readings (`{id: [[timestamp, value], ...]}`) | -| `similarity` | number or null | L2 distance to the matched ReID embedding in VDMS; lower values indicate a closer match. `null` when ReID is still collecting embeddings, when no database match was found, or when ReID is disabled. | +| `similarity` | number or null | Similarity/distance value to the matched ReID embedding in VDMS; higher-is-better for `COSINE`; lower is better for `L2`. `null` when ReID is still collecting embeddings, when no database match was found, or when ReID is disabled. | | `reid_state` | string | Re-ID processing state for the object. One of: `pending_collection`, `query_no_match`, `matched`, `reid_disabled` | | `previous_ids_chain` | array or absent | History of UUID reassignments for this track. Each element is `{"id": "", "timestamp": "", "similarity_score": }`. Present only when the object has been re-identified at least once; omitted otherwise. | | `first_seen` | string (ISO 8601) | Timestamp when the track was first created | @@ -213,9 +213,11 @@ tracked object contains the following fields: > camera input it is a base64-encoded string. `metadata` is absent when no semantic > analytics pipeline is configured. -> **Note on `similarity`**: This field holds an L2 distance returned by VDMS, not a -> cosine similarity score. Lower values mean the query embedding is closer to the stored -> embedding. A value of `null` means either the ReID query has not been submitted yet +> **Note on `similarity`**: This field holds the metric value returned by VDMS +> in `_distance` and is evaluated by the controller using configured metric semantics. +> For `COSINE` (implemented via VDMS `IP`), value must be above `similarity_threshold`; for distance-style metrics such as `L2`, +> value must be below `similarity_threshold`. +> A value of `null` means either the ReID query has not been submitted yet > (`pending_collection`), the query found no match below the configured > `similarity_threshold` (`query_no_match`), or ReID is disabled (`reid_disabled`). @@ -401,18 +403,18 @@ interest changes. The `{event_type}` segment is typically `objects`. ### Region Event Top-Level Fields -| Field | Type | Description | -| ------------- | --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `timestamp` | string (ISO 8601 UTC) | Event timestamp | -| `scene_id` | string | Scene identifier (UUID) | -| `scene_name` | string | Scene name | -| `region_id` | string | Region identifier (UUID) | -| `region_name` | string | Region name | -| `counts` | object | Map of category to object count currently inside the region (e.g. `{"person": 2}`) | -| `objects` | array | Tracked objects currently inside the region. Each object includes live `regions..dwell` in addition to [Common Output Track Fields](#common-output-track-fields) | -| `entered` | array | Objects that entered the region during this cycle; each element is a bare track object and may include live `regions..dwell`. Empty when no entry occurred | -| `exited` | array | Objects that exited the region during this cycle; each element is `{"object": , "dwell": }`. Empty when no exit occurred | -| `metadata` | object | Region geometry: `title`, `uuid`, `points` (polygon vertices in metres), `area` (`"poly"`), `fromSensor` (boolean) | +| Field | Type | Description | +| ------------- | --------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `timestamp` | string (ISO 8601 UTC) | Event timestamp | +| `scene_id` | string | Scene identifier (UUID) | +| `scene_name` | string | Scene name | +| `region_id` | string | Region identifier (UUID) | +| `region_name` | string | Region name | +| `counts` | object | Map of category to object count currently inside the region (e.g. `{"person": 2}`) | +| `objects` | array | Tracked objects currently inside the region (#common-output-track-fields) | +| `entered` | array | Objects that entered the region during this cycle; Empty when no entry occurred | +| `exited` | array | Objects that exited the region during this cycle; Empty when no exit occurred | +| `metadata` | object | Region geometry: `title`, `uuid`, `points` (polygon vertices in metres), `area` (`"poly"`), `fromSensor` (boolean) | ### Example Region Event Message diff --git a/docs/user-guide/other-topics/how-to-enable-reidentification.md b/docs/user-guide/other-topics/how-to-enable-reidentification.md index d9c7e30b6..5c3325050 100644 --- a/docs/user-guide/other-topics/how-to-enable-reidentification.md +++ b/docs/user-guide/other-topics/how-to-enable-reidentification.md @@ -149,12 +149,12 @@ The scene output includes `reid_state` for each tracked object. For canonical st ## Configuration Options -| Parameter | Purpose | Expected Value/Range | -| -------------------------------- | -------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | -| `DEFAULT_SIMILARITY_THRESHOLD` | Maximum L2 distance for a match to be accepted. Higher values increase matches (and false positives); lower values are stricter. | Float L2 distance (e.g., 20.0–60.0) | -| `DEFAULT_MINIMUM_BBOX_AREA` | Minimum bounding box size to consider a valid feature. | Pixel area (e.g., 400–1600) | -| `DEFAULT_MINIMUM_FEATURE_COUNT` | Minimum features needed before querying DB. | Integer (e.g., 5–20) | -| `DEFAULT_MAX_FEATURE_SLICE_SIZE` | Proportion of features stored to improve DB performance. | Float (e.g., 0.1–1.0) | +| Parameter | Purpose | Expected Value/Range | +| ------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `DEFAULT_SIMILARITY_THRESHOLD_L2` / `DEFAULT_SIMILARITY_THRESHOLD_COSINE` | Match-acceptance threshold defaults selected by `similarity_metric`: `L2` uses `DEFAULT_SIMILARITY_THRESHOLD_L2`, and `COSINE` (mapped to VDMS `IP`) uses `DEFAULT_SIMILARITY_THRESHOLD_COSINE`. | Float; tune per metric. For `COSINE`/`IP`, values such as `0.2–0.8` may be used. For `L2`, use a distance threshold appropriate to the embedding/model. | +| `DEFAULT_MINIMUM_BBOX_AREA` | Minimum bounding box size to consider a valid feature. | Pixel area (e.g., 400–1600) | +| `DEFAULT_MINIMUM_FEATURE_COUNT` | Minimum features needed before querying DB. | Integer (e.g., 5–20) | +| `DEFAULT_MAX_FEATURE_SLICE_SIZE` | Proportion of features stored to improve DB performance. | Float (e.g., 0.1–1.0) | To apply changes (include `--profile vdms` if ReID is enabled; see [Docker Compose Profiles](../get-started.md#docker-compose-profiles)): diff --git a/tests/Makefile.functional b/tests/Makefile.functional index c490ead83..ae8644eeb 100644 --- a/tests/Makefile.functional +++ b/tests/Makefile.functional @@ -263,17 +263,17 @@ REID_MATRIX_CONFIG ?= $(CURDIR)/functional/config/reid-threshold-scenario.json define run-reid-matrix @echo "Running $(1) matrix from $(REID_MATRIX_CONFIG)"; - @python3 -c 'import json, sys; scenarios = json.load(open(sys.argv[1], "r", encoding="utf-8")); [print("%s|%s|%s|%s" % (s["scenario_name"], s["variant"], s["threshold"], s.get("expect_exceed_max", False))) for s in scenarios]' "$(REID_MATRIX_CONFIG)" | \ - while IFS='|' read -r scenario_name variant threshold expect_exceed_max; do \ + @python3 -c 'import json, sys; scenarios = json.load(open(sys.argv[1], "r", encoding="utf-8")); [print("%s|%s|%s|%s|%s" % (s["scenario_name"], s["variant"], s["threshold"], s.get("expect_exceed_max", False), s.get("metric", "L2"))) for s in scenarios]' "$(REID_MATRIX_CONFIG)" | \ + while IFS='|' read -r scenario_name variant threshold expect_exceed_max metric; do \ tmp_reid_config="$(CURDIR)/functional/config/reid-config-$$scenario_name.json"; \ expect_arg="--expect_exceed_max=$$expect_exceed_max"; \ - python3 -c 'import json, sys; cfg_path, out_path, threshold = sys.argv[1], sys.argv[2], float(sys.argv[3]); cfg = json.load(open(cfg_path, "r", encoding="utf-8")); cfg["similarity_threshold"] = threshold; json.dump(cfg, open(out_path, "w", encoding="utf-8"), indent=2); print()' "$(CURDIR)/../controller/config/reid-config.json" "$$tmp_reid_config" "$$threshold" || exit $$?; \ + python3 -c 'import json, sys; cfg_path, out_path, threshold, metric = sys.argv[1], sys.argv[2], float(sys.argv[3]), sys.argv[4]; cfg = json.load(open(cfg_path, "r", encoding="utf-8")); cfg["similarity_threshold"] = threshold; cfg["similarity_metric"] = metric; json.dump(cfg, open(out_path, "w", encoding="utf-8"), indent=2); print()' "$(CURDIR)/../controller/config/reid-config.json" "$$tmp_reid_config" "$$threshold" "$$metric" || exit $$?; \ case "$$variant" in \ default) scene_compose="$(COMPOSE)/scene_reid.yml" ;; \ time-chunking) scene_compose="$(COMPOSE)/scene_reid_time_chunking.yml" ;; \ *) echo "Unsupported variant '$$variant' in $(REID_MATRIX_CONFIG)"; exit 1 ;; \ esac; \ - echo "=== $(1): scenario=$$scenario_name variant=$$variant threshold=$$threshold expect_exceed_max=$$expect_exceed_max ==="; \ + echo "=== $(1): scenario=$$scenario_name variant=$$variant threshold=$$threshold expect_exceed_max=$$expect_exceed_max metric=$$metric ==="; \ REID_CONFIG_FILE="$$tmp_reid_config" EXPECT_EXCEED_MAX_ARG="$$expect_arg" REID_SCENE_COMPOSE=$$scene_compose $(MAKE) $(1)-run; \ status=$$?; rm -f "$$tmp_reid_config"; \ if [ $$status -ne 0 ]; then exit $$status; fi; \ diff --git a/tests/functional/config/reid-threshold-scenario.json b/tests/functional/config/reid-threshold-scenario.json index 8d424a720..ca3b6149c 100644 --- a/tests/functional/config/reid-threshold-scenario.json +++ b/tests/functional/config/reid-threshold-scenario.json @@ -1,26 +1,58 @@ [ { "scenario_name": "reid-l2-tight-default", + "metric": "L2", "variant": "default", "threshold": 1, "expect_exceed_max": true }, { "scenario_name": "reid-l2-tight-time-chunking", + "metric": "L2", "variant": "time-chunking", "threshold": 1, "expect_exceed_max": true }, + { + "scenario_name": "reid-cosine-tight-default", + "metric": "cosine", + "variant": "default", + "threshold": 0.99, + "expect_exceed_max": true + }, + { + "scenario_name": "reid-cosine-tight-time-chunking", + "metric": "cosine", + "variant": "time-chunking", + "threshold": 0.99, + "expect_exceed_max": true + }, { "scenario_name": "reid-l2-loose-default", + "metric": "L2", "variant": "default", "threshold": 30, "expect_exceed_max": false }, { "scenario_name": "reid-l2-loose-time-chunking", + "metric": "L2", "variant": "time-chunking", "threshold": 30, "expect_exceed_max": false + }, + { + "scenario_name": "reid-cosine-loose-default", + "metric": "cosine", + "variant": "default", + "threshold": 0.01, + "expect_exceed_max": false + }, + { + "scenario_name": "reid-cosine-loose-time-chunking", + "metric": "cosine", + "variant": "time-chunking", + "threshold": 0.01, + "expect_exceed_max": false } ] diff --git a/tests/sscape_tests/scenescape/test_scene_controller.py b/tests/sscape_tests/scenescape/test_scene_controller.py index bd8ce2009..e43656624 100644 --- a/tests/sscape_tests/scenescape/test_scene_controller.py +++ b/tests/sscape_tests/scenescape/test_scene_controller.py @@ -158,9 +158,11 @@ def test_extracts_all_known_reid_config_fields(self): reid_config = { 'feature_accumulation_threshold': 8, + 'similarity_metric': 'L2', 'similarity_threshold': 55, 'stale_feature_timeout_secs': 7.5, 'stale_feature_check_interval_secs': 2.0, + 'minimum_bbox_area': 5000, 'feature_slice_size': 10, } with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: diff --git a/tests/sscape_tests/uuid_manager/test_uuid_manager.py b/tests/sscape_tests/uuid_manager/test_uuid_manager.py index 900503aad..7132fa663 100644 --- a/tests/sscape_tests/uuid_manager/test_uuid_manager.py +++ b/tests/sscape_tests/uuid_manager/test_uuid_manager.py @@ -13,7 +13,11 @@ import pytest -from controller.uuid_manager import UUIDManager +from controller.uuid_manager import ( + UUIDManager, + DEFAULT_SIMILARITY_THRESHOLD_L2, + DEFAULT_SIMILARITY_THRESHOLD_COSINE, +) def call_update_active_dict_locked(manager, sscape_object, database_id, similarity, query_timestamp=None): @@ -77,6 +81,30 @@ def test_active_ids_tracking_initialized(self, mock_vdms_db): assert isinstance(manager.active_ids, dict) assert len(manager.active_ids) == 0 + def test_default_similarity_threshold_uses_l2_value_when_metric_is_l2(self, mock_vdms_db): + """L2 metric should use the L2-specific default threshold when not configured.""" + + manager = UUIDManager(reid_config_data={'similarity_metric': 'L2'}) + + assert manager.similarity_metric == 'L2' + assert manager.similarity_threshold == DEFAULT_SIMILARITY_THRESHOLD_L2 + + def test_default_similarity_threshold_uses_cosine_value_when_metric_is_cosine(self, mock_vdms_db): + """COSINE metric should use the cosine-specific default threshold when not configured.""" + + manager = UUIDManager(reid_config_data={'similarity_metric': 'COSINE'}) + + assert manager.similarity_metric == 'COSINE' + assert manager.similarity_threshold == DEFAULT_SIMILARITY_THRESHOLD_COSINE + + def test_similarity_threshold_explicit_value_overrides_metric_default(self, mock_vdms_db): + """Explicit similarity_threshold should take precedence over metric-specific defaults.""" + + manager = UUIDManager(reid_config_data={'similarity_metric': 'COSINE', 'similarity_threshold': 0.77}) + + assert manager.similarity_metric == 'COSINE' + assert manager.similarity_threshold == 0.77 + class TestExtractReidEmbedding: """Test Re-ID embedding extraction from detection objects.""" @@ -529,6 +557,224 @@ def test_metadata_with_special_characters(self, mock_vdms_db): "confidence": 0.9 } +class TestUUIDManagerMetricAwareMatching: + """Verify parseQueryResults follows descriptor metric semantics.""" + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_rejects_single_dimension_entity_list(self, mock_vdms_class): + """Flat entity lists violate contract and should be treated as no-match.""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.5}) + manager.reid_database.similarity_metric = "L2" + + # Invalid input shape: one vector result returned as a flat entity list. + similarity_scores = [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.2}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.6}, + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id is None + assert similarity is None + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_ip_uses_higher_is_better(self, mock_vdms_class): + """IP metric should select max `_distance` and require values above threshold.""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.5}) + manager.reid_database.similarity_metric = "IP" + + similarity_scores = [ + [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.7}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.6}, + ], + [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.8}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.4}, + ], + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id == 'a' + assert similarity == 0.8 + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_l2_uses_lower_is_better(self, mock_vdms_class): + """L2 metric should select min `_distance` and require values below threshold.""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.5}) + manager.reid_database.similarity_metric = "L2" + + similarity_scores = [ + [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.2}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.6}, + ], + [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.3}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.7}, + ], + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id == 'a' + assert similarity == 0.2 + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_ip_ignores_out_of_range_scores(self, mock_vdms_class): + """IP matching must ignore candidates with `_distance` outside [-1, 1].""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.5}) + manager.reid_database.similarity_metric = "IP" + + similarity_scores = [ + [ + {'uuid': 'a', 'rvid': '1', '_distance': 1.2}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.85}, + ], + [ + {'uuid': 'a', 'rvid': '1', '_distance': -1.2}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.9}, + ], + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id == 'b' + assert similarity == 0.9 + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_ip_returns_no_match_when_all_scores_invalid(self, mock_vdms_class): + """IP matching must return no match if all candidate scores are out of range.""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.5}) + manager.reid_database.similarity_metric = "IP" + + similarity_scores = [ + [ + {'uuid': 'a', 'rvid': '1', '_distance': 1.3}, + {'uuid': 'b', 'rvid': '2', '_distance': -1.4}, + ], + [ + {'uuid': 'a', 'rvid': '1', '_distance': 1.1}, + ], + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id is None + assert similarity is None + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_ip_threshold_boundary_requires_strictly_greater(self, mock_vdms_class): + """IP matching should not accept values exactly equal to threshold.""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.8}) + manager.reid_database.similarity_metric = "IP" + + similarity_scores = [ + [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.8}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.79}, + ] + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id is None + assert similarity is None + + @patch('controller.uuid_manager.VDMSDatabase') + def test_parse_query_results_l2_threshold_boundary_requires_strictly_less(self, mock_vdms_class): + """L2 matching should not accept values exactly equal to threshold.""" + mock_vdms_class.return_value = MagicMock() + + manager = UUIDManager(reid_config_data={'similarity_threshold': 0.2}) + manager.reid_database.similarity_metric = "L2" + + similarity_scores = [ + [ + {'uuid': 'a', 'rvid': '1', '_distance': 0.2}, + {'uuid': 'b', 'rvid': '2', '_distance': 0.21}, + ] + ] + + database_id, similarity = manager.parseQueryResults(similarity_scores) + + assert database_id is None + assert similarity is None + + +class TestUUIDManagerMetricAwareUpdateFlow: + """Verify parse->update flow produces correct states for both metric paths.""" + + @patch('controller.uuid_manager.VDMSDatabase') + def test_cosine_path_match_transitions_to_matched(self, mock_vdms_class): + """COSINE (mapped to IP) should produce MATCHED when best score is above threshold.""" + from controller.moving_object import MovingObject, ReidState + import time + + mock_vdms_class.return_value = MagicMock() + manager = UUIDManager(reid_config_data={'similarity_metric': 'COSINE', 'similarity_threshold': 0.8}) + manager.reid_database.similarity_metric = "IP" + + info = {'id': '1', 'confidence': 0.95} + obj = MovingObject(info, time.time(), None) + obj.rv_id = 1 + obj.reid = [0.1, 0.2, 0.3] + obj.category = 'person' + + with manager.active_ids_lock: + manager.active_ids[obj.rv_id] = [None, None] + manager.quality_features[obj.rv_id] = [[0.1, 0.2, 0.3]] + + similarity_scores = [[{'uuid': 'db_match_1', 'rvid': '1', '_distance': 0.92}]] + database_id, similarity = manager.parseQueryResults(similarity_scores) + call_update_active_dict_locked(manager, obj, database_id=database_id, similarity=similarity) + + assert obj.reid_state == ReidState.MATCHED + assert obj.gid == 'db_match_1' + assert obj.similarity == 0.92 + + @patch('controller.uuid_manager.VDMSDatabase') + def test_l2_path_equal_threshold_transitions_to_query_no_match(self, mock_vdms_class): + """L2 should produce QUERY_NO_MATCH when best score is equal to threshold.""" + from controller.moving_object import MovingObject, ReidState + import time + + mock_vdms_class.return_value = MagicMock() + manager = UUIDManager(reid_config_data={'similarity_metric': 'L2', 'similarity_threshold': 0.2}) + manager.reid_database.similarity_metric = "L2" + + info = {'id': '1', 'confidence': 0.95} + obj = MovingObject(info, time.time(), None) + obj.rv_id = 2 + obj.reid = [0.1, 0.2, 0.3] + obj.category = 'person' + + with manager.active_ids_lock: + manager.active_ids[obj.rv_id] = [None, None] + manager.quality_features[obj.rv_id] = [[0.1, 0.2, 0.3]] + + similarity_scores = [[{'uuid': 'db_match_2', 'rvid': '2', '_distance': 0.2}]] + database_id, similarity = manager.parseQueryResults(similarity_scores) + call_update_active_dict_locked(manager, obj, database_id=database_id, similarity=similarity) + + assert database_id is None + assert similarity is None + assert obj.reid_state == ReidState.QUERY_NO_MATCH + assert obj.gid is not None + assert obj.similarity is None class TestDimensionInference: """Test automatic ReID embedding dimension inference from first observed vector.""" diff --git a/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py b/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py index bae154323..134d36777 100644 --- a/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py +++ b/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py @@ -47,13 +47,14 @@ def test_initialization_creates_database_instance(self, mock_vdms): db = VDMSDatabase() assert db.db is not None + assert db.similarity_metric == "L2" mock_vdms.assert_called() @patch('controller.vdms_adapter.vdms.vdms') def test_initialization_with_custom_parameters(self, mock_vdms): """Verify VDMS can be initialized with custom schema parameters.""" custom_set_name = "custom_reid" - custom_metric = "L2" + custom_metric = "IP" custom_dims = 512 db = VDMSDatabase( @@ -115,6 +116,26 @@ def test_find_schema_details_extracts_nested_dimensions(self, mock_vdms_class): assert exists is True assert dimensions == 512 + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_schema_metadata_extracts_metric(self, mock_vdms_class): + """Verify FindDescriptorSet metric is extracted for schema compatibility checks.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'dimensions': 256, + 'metric': 'L2' + }], [])) + + exists, dimensions, metric = db.findSchemaMetadata(SCHEMA_NAME) + + assert exists is True + assert dimensions == 256 + assert metric == 'L2' + @patch('controller.vdms_adapter.vdms.vdms') def test_ensure_schema_raises_on_existing_dimension_mismatch(self, mock_vdms_class): """Verify ensureSchema fails fast when existing descriptor dimensions differ.""" @@ -125,7 +146,8 @@ def test_ensure_schema_raises_on_existing_dimension_mismatch(self, mock_vdms_cla db.sendQuery = Mock(return_value=([{ 'status': 0, 'returned': 1, - 'dimensions': 128 + 'dimensions': 128, + 'metric': 'L2' }], [])) with pytest.raises(RuntimeError, match="uses 128 dimensions"): @@ -153,6 +175,26 @@ def test_ensure_schema_raises_when_dimensions_not_reported(self, mock_vdms_class assert db._schema_ready is False assert db.dimensions is None + @patch('controller.vdms_adapter.vdms.vdms') + def test_ensure_schema_raises_on_existing_metric_mismatch(self, mock_vdms_class): + """Verify ensureSchema fails fast when existing descriptor metric differs.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP", dimensions=None) + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'dimensions': 256, + 'metric': 'L2' + }], [])) + + with pytest.raises(RuntimeError, match="uses metric L2"): + db.ensureSchema(256) + + assert db._schema_ready is False + assert db.dimensions is None + @patch('controller.vdms_adapter.vdms.vdms') def test_ensure_schema_accepts_matching_existing_dimensions(self, mock_vdms_class): """Verify ensureSchema succeeds when descriptor dimensions match requested size.""" @@ -163,7 +205,8 @@ def test_ensure_schema_accepts_matching_existing_dimensions(self, mock_vdms_clas db.sendQuery = Mock(return_value=([{ 'status': 0, 'returned': 1, - 'dimensions': 256 + 'dimensions': 256, + 'metric': 'L2' }], [])) db.ensureSchema(256) @@ -175,6 +218,73 @@ def test_ensure_schema_accepts_matching_existing_dimensions(self, mock_vdms_clas class TestAddEntry: """Test adding entries to VDMS.""" + @patch('controller.vdms_adapter.vdms.vdms') + def test_prepare_reid_dict_infers_dimensions_from_row_vector(self, mock_vdms_class): + """Verify prepareReidDict infers dimensions and flattens (1, N) input.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + row_vector = np.arange(12, dtype=np.float32).reshape(1, 12) + + prepared = db.prepareReidDict(row_vector, dimensions=None) + + assert prepared is not None + assert prepared['dimensions'] == 12 + assert prepared['embedded_vector'].shape == (12,) + assert np.array_equal(prepared['embedded_vector'], row_vector.reshape(-1)) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_prepare_reid_dict_rejects_dimension_mismatch(self, mock_vdms_class): + """Verify prepareReidDict returns None when expected dimensions do not match.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + row_vector = np.arange(16, dtype=np.float32).reshape(1, 16) + + prepared = db.prepareReidDict(row_vector, dimensions=32) + + assert prepared is None + + @patch('controller.vdms_adapter.vdms.vdms') + def test_prepare_reid_dict_normalizes_for_ip_metric(self, mock_vdms_class): + """Verify prepareReidDict normalizes vectors when normalize_embeddings=True (IP metric).""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP") + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + prepared = db.prepareReidDict(vec, dimensions=256, normalize_embeddings=True) + + assert prepared is not None + normalized = prepared['embedded_vector'] + assert np.isclose(np.linalg.norm(normalized), 1.0), "Vector should be normalized to unit norm" + assert np.isclose(normalized[0], 0.6), "Normalized [0] should be 3/5 = 0.6" + assert np.isclose(normalized[1], 0.8), "Normalized [1] should be 4/5 = 0.8" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_prepare_reid_dict_preserves_for_l2_metric(self, mock_vdms_class): + """Verify prepareReidDict preserves raw vectors when normalize_embeddings=False (L2 metric).""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="L2") + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + prepared = db.prepareReidDict(vec, dimensions=256, normalize_embeddings=False) + + assert prepared is not None + raw = prepared['embedded_vector'] + assert np.isclose(raw[0], 3.0), "Raw [0] should remain 3.0" + assert np.isclose(raw[1], 4.0), "Raw [1] should remain 4.0" + assert np.isclose(np.linalg.norm(raw), 5.0), "Raw norm should be 5.0 (3-4-5 triangle)" + @patch('controller.vdms_adapter.vdms.vdms') def test_add_entry_requires_standard_fields(self, mock_vdms_class): """Verify addEntry includes uuid, rvid, and type in properties.""" @@ -202,6 +312,26 @@ def test_add_entry_requires_standard_fields(self, mock_vdms_class): assert properties['rvid'] == test_rvid assert properties['type'] == test_type + @patch('controller.vdms_adapter.vdms.vdms') + def test_add_entry_accepts_row_vector_shape(self, mock_vdms_class): + """Verify addEntry accepts (1, N) vectors through prepareReidDict.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.dimensions = 256 + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + row_vector = np.random.randn(1, 256).astype(np.float32) + + db.addEntry("test-uuid", "rvid", "Person", [row_vector]) + + db.sendQuery.assert_called_once() + blob = db.sendQuery.call_args[0][1] + assert len(blob) == 1 + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (256,) + @patch('controller.vdms_adapter.vdms.vdms') def test_add_entry_handles_new_metadata_format(self, mock_vdms_class): """Verify addEntry extracts label from metadata dict for VDMS constraint matching.""" @@ -238,34 +368,50 @@ def test_add_entry_handles_new_metadata_format(self, mock_vdms_class): assert properties['age'] == "28" @patch('controller.vdms_adapter.vdms.vdms') - def test_add_entry_converts_vectors_to_bytes(self, mock_vdms_class): - """Verify addEntry converts numpy vectors to bytes for blob transmission.""" + def test_add_entry_normalizes_vectors_before_blob(self, mock_vdms_class): + """Verify addEntry normalizes vectors before sending them to VDMS when metric is IP.""" mock_vdms_instance = MagicMock() mock_vdms_class.return_value = mock_vdms_instance - db = VDMSDatabase() - db.dimensions = 256 - db.sendQuery = Mock(return_value=([{'status': 0}, {'status': 0}], [])) - - test_uuid = "test-uuid" - test_rvid = "rvid" - test_type = "Person" + db = VDMSDatabase(similarity_metric="IP") + db.sendQuery = Mock(return_value=([{'status': 0}], [])) - test_vectors = [ - np.random.randn(256).astype(np.float32), - np.random.randn(256).astype(np.float32) - ] + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 - db.addEntry(test_uuid, test_rvid, test_type, test_vectors) + db.addEntry("test-uuid", "rvid", "Person", [vec]) call_args = db.sendQuery.call_args blob = call_args[0][1] + normalized = np.frombuffer(blob[0], dtype=np.float32) - assert blob is not None - assert len(blob) == len(test_vectors), "Blob should have one entry per vector" + assert np.isclose(np.linalg.norm(normalized), 1.0) + assert np.isclose(normalized[0], 0.6) + assert np.isclose(normalized[1], 0.8) - for blob_item in blob: - assert isinstance(blob_item, bytes), "Blob item should be bytes" + @patch('controller.vdms_adapter.vdms.vdms') + def test_add_entry_preserves_raw_vectors_for_l2_metric(self, mock_vdms_class): + """Verify non-IP metrics do not force vector normalization before sending to VDMS.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="L2") + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + db.addEntry("test-uuid", "rvid", "Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + + assert np.isclose(stored[0], 3.0) + assert np.isclose(stored[1], 4.0) + assert np.isclose(np.linalg.norm(stored), 5.0) @patch('controller.vdms_adapter.vdms.vdms') def test_add_entry_handles_multiple_vectors(self, mock_vdms_class): @@ -388,6 +534,76 @@ def test_find_matches_tier2_vector_similarity_search(self, mock_vdms_class): for blob_item in blob: assert isinstance(blob_item, bytes), "TIER 2 requires vectors as bytes" + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_normalizes_query_vectors(self, mock_vdms_class): + """Verify findMatches normalizes query vectors before similarity search when metric is IP.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP") + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 0 + }], [])) + + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + db.findMatches("Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + normalized = np.frombuffer(blob[0], dtype=np.float32) + + assert np.isclose(np.linalg.norm(normalized), 1.0) + assert np.isclose(normalized[0], 0.6) + assert np.isclose(normalized[1], 0.8) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_preserves_query_vectors_for_l2_metric(self, mock_vdms_class): + """Verify findMatches preserves raw vectors for the L2 metric path.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="L2") + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 0 + }], [])) + + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + db.findMatches("Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + + assert np.isclose(stored[0], 3.0) + assert np.isclose(stored[1], 4.0) + assert np.isclose(np.linalg.norm(stored), 5.0) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_skips_zero_norm_vectors(self, mock_vdms_class): + """Verify findMatches ignores zero-norm vectors for IP metric and avoids empty VDMS queries.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP") + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 0 + }], [])) + + zero_vec = np.zeros(256, dtype=np.float32) + result = db.findMatches("Person", [zero_vec]) + + assert result is None + db.sendQuery.assert_not_called() + @patch('controller.vdms_adapter.vdms.vdms') def test_find_matches_returns_matched_entities(self, mock_vdms_class): """Verify findMatches returns matched entities from VDMS.""" @@ -413,6 +629,114 @@ def test_find_matches_returns_matched_entities(self, mock_vdms_class): assert len(result) == 1 assert result[0] == expected_entities + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_filters_invalid_ip_similarity_scores(self, mock_vdms_class): + """Verify findMatches filters entities with IP scores outside [-1, 1].""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP") + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 3, + 'entities': [ + {'uuid': 'valid', 'rvid': 'rvid-1', '_distance': 0.9}, + {'uuid': 'too-high', 'rvid': 'rvid-2', '_distance': 1.2}, + {'uuid': 'too-low', 'rvid': 'rvid-3', '_distance': -1.1}, + ] + }], [])) + + test_vectors = [np.random.randn(256).astype(np.float32)] + result = db.findMatches("Person", test_vectors) + + assert result is not None + assert len(result) == 1 + assert len(result[0]) == 1 + assert result[0][0]['uuid'] == 'valid' + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_preserves_per_vector_slot_when_all_entities_invalid(self, mock_vdms_class): + """Verify successful per-vector responses with only invalid IP scores return an empty slot.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP") + db.sendQuery = Mock(return_value=([ + { + 'status': 0, + 'returned': 2, + 'entities': [ + {'uuid': 'too-high', 'rvid': 'rvid-2', '_distance': 1.4}, + {'uuid': 'too-low', 'rvid': 'rvid-3', '_distance': -1.2}, + ] + }, + { + 'status': 0, + 'returned': 1, + 'entities': [ + {'uuid': 'valid', 'rvid': 'rvid-1', '_distance': 0.9}, + ] + } + ], [])) + + test_vectors = [ + np.random.randn(256).astype(np.float32), + np.random.randn(256).astype(np.float32), + ] + result = db.findMatches("Person", test_vectors) + + assert result is not None + assert len(result) == 2 + assert result[0] == [] + assert len(result[1]) == 1 + assert result[1][0]['uuid'] == 'valid' + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_returns_none_when_all_ip_scores_invalid(self, mock_vdms_class): + """Verify findMatches returns no matches if all IP scores are out of range.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="IP") + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 2, + 'entities': [ + {'uuid': 'too-high', 'rvid': 'rvid-2', '_distance': 1.4}, + {'uuid': 'too-low', 'rvid': 'rvid-3', '_distance': -1.2}, + ] + }], [])) + + test_vectors = [np.random.randn(256).astype(np.float32)] + result = db.findMatches("Person", test_vectors) + + assert result == [[]] + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_keeps_out_of_range_scores_for_l2_metric(self, mock_vdms_class): + """Verify L2 path does not filter scores by the IP-only [-1, 1] rule.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(similarity_metric="L2") + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 2, + 'entities': [ + {'uuid': 'dist-high', 'rvid': 'rvid-1', '_distance': 1.4}, + {'uuid': 'dist-negative', 'rvid': 'rvid-2', '_distance': -1.2}, + ] + }], [])) + + test_vectors = [np.random.randn(256).astype(np.float32)] + result = db.findMatches("Person", test_vectors) + + assert result is not None + assert len(result) == 1 + assert len(result[0]) == 2 + assert result[0][0]['uuid'] == 'dist-high' + assert result[0][1]['uuid'] == 'dist-negative' + @patch('controller.vdms_adapter.vdms.vdms') def test_find_matches_handles_no_results(self, mock_vdms_class): """Verify findMatches handles case with no matches.""" @@ -473,7 +797,7 @@ def test_build_constraints_dict_metadata_high_confidence(self, mock_vdms_class): } } - result = db._build_query_constraints("Person", **constraints) + result = db._buildQueryConstraints("Person", **constraints) assert "gender" in result assert result["gender"] == ["==", "Female"] @@ -494,7 +818,7 @@ def test_build_constraints_dict_metadata_low_confidence(self, mock_vdms_class): } } - result = db._build_query_constraints("Person", **constraints) + result = db._buildQueryConstraints("Person", **constraints) assert result == {"type": ["==", "Person"]}, "Low-confidence constraints should be ignored" @@ -515,7 +839,7 @@ def test_build_constraints_mixed_dict_and_plain_values(self, mock_vdms_class): "color": "blue" } - result = db._build_query_constraints("Person", **constraints) + result = db._buildQueryConstraints("Person", **constraints) assert "gender" in result assert result["gender"] == ["==", "Male"] @@ -536,7 +860,7 @@ def test_build_constraints_dict_without_confidence(self, mock_vdms_class): } } - result = db._build_query_constraints("Person", **constraints) + result = db._buildQueryConstraints("Person", **constraints) assert result == {"type": ["==", "Person"]}, "Dict without confidence should be ignored" @@ -554,7 +878,7 @@ def test_build_constraints_dict_value_extraction(self, mock_vdms_class): "name": {"label": "John", "model_name": "name", "confidence": 0.99} } - result = db._build_query_constraints("Person", **constraints) + result = db._buildQueryConstraints("Person", **constraints) assert result["age"] == ["==", "28"], "High confidence (0.88 >= 0.8) should be AND" assert result["name"] == ["==", "John"], "High confidence (0.99 >= 0.8) should be AND" @@ -570,7 +894,7 @@ def test_build_constraints_object_type_always_and(self, mock_vdms_class): db = VDMSDatabase() test_type = "Person" - constraints = db._build_query_constraints(test_type) + constraints = db._buildQueryConstraints(test_type) assert "type" in constraints, "Object type must always be present" assert constraints["type"] == ["==", test_type], "Object type must be AND constraint format" @@ -589,7 +913,7 @@ def test_build_constraints_high_confidence_to_and(self, mock_vdms_class): "color": {"label": "blue", "model_name": "color_v1", "confidence": 0.8} } - constraints = db._build_query_constraints("Person", **high_confidence_constraints) + constraints = db._buildQueryConstraints("Person", **high_confidence_constraints) assert "gender" in constraints assert "age_range" in constraints @@ -613,7 +937,7 @@ def test_build_constraints_low_confidence_ignored(self, mock_vdms_class): "color": {"label": "blue", "model_name": "color", "confidence": 0.01} } - constraints = db._build_query_constraints("Person", **low_confidence_constraints) + constraints = db._buildQueryConstraints("Person", **low_confidence_constraints) assert constraints == {"type": ["==", "Person"]}, "Low-confidence constraints should all be ignored" @@ -625,7 +949,7 @@ def test_build_constraints_empty_constraints(self, mock_vdms_class): db = VDMSDatabase() - constraints = db._build_query_constraints("Vehicle") + constraints = db._buildQueryConstraints("Vehicle") assert constraints == {"type": ["==", "Vehicle"]}, \ "Empty constraints should only have type constraint" @@ -644,7 +968,7 @@ def test_build_constraints_none_values_ignored(self, mock_vdms_class): "color": "blue" } - constraints = db._build_query_constraints("Person", **constraints_with_none) + constraints = db._buildQueryConstraints("Person", **constraints_with_none) assert "age" not in constraints, "None values should be ignored" assert "gender" in constraints @@ -663,7 +987,7 @@ def test_build_constraints_boundary_confidence_0_8(self, mock_vdms_class): "attribute_exact": {"label": "test_value", "model_name": "model", "confidence": 0.8} } - constraints = db._build_query_constraints("Person", **boundary_constraints) + constraints = db._buildQueryConstraints("Person", **boundary_constraints) assert "attribute_exact" in constraints assert constraints["attribute_exact"] == ["==", "test_value"] @@ -674,7 +998,7 @@ class TestFindMatchesIntegration: @patch('controller.vdms_adapter.vdms.vdms') def test_find_matches_uses_constraint_builder(self, mock_vdms_class): - """Verify findMatches delegates to _build_query_constraints.""" + """Verify findMatches delegates to _buildQueryConstraints.""" mock_vdms_instance = MagicMock() mock_vdms_class.return_value = mock_vdms_instance @@ -697,6 +1021,79 @@ def test_find_matches_uses_constraint_builder(self, mock_vdms_class): assert query_constraints["gender"] == ["==", "Female"] +class TestConfigurationParameters: + """Test that VDMSDatabase respects configuration parameters.""" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_default_parameters_initialization(self, mock_vdms_class): + """Verify VDMSDatabase initializes with expected defaults.""" + from controller.vdms_adapter import SCHEMA_NAME, DIMENSIONS, K_NEIGHBORS, SIMILARITY_METRIC, DEFAULT_CONFIDENCE_THRESHOLD + + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + + assert db.set_name == SCHEMA_NAME, f"Expected set_name={SCHEMA_NAME}, got {db.set_name}" + assert db.dimensions == DIMENSIONS, f"Expected dimensions={DIMENSIONS}, got {db.dimensions}" + assert db.similarity_metric == SIMILARITY_METRIC, f"Expected metric={SIMILARITY_METRIC}, got {db.similarity_metric}" + assert db.confidence_threshold == DEFAULT_CONFIDENCE_THRESHOLD, f"Expected threshold={DEFAULT_CONFIDENCE_THRESHOLD}, got {db.confidence_threshold}" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_custom_confidence_threshold_in_constraints(self, mock_vdms_class): + """Verify custom confidence_threshold parameter is used in constraint building.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + custom_threshold = 0.95 + db = VDMSDatabase(confidence_threshold=custom_threshold) + db.sendQuery = Mock(return_value=([{'status': 0, 'returned': 0}], [])) + + # High-confidence constraint that meets custom threshold + constraints_high = { + "gender": {"label": "Female", "model_name": "gender_v2", "confidence": 0.96} + } + + result = db._buildQueryConstraints("Person", **constraints_high) + assert "gender" in result, "Confidence 0.96 should exceed custom threshold 0.95" + + # Medium-confidence constraint that fails custom threshold + constraints_medium = { + "age": {"label": 25, "model_name": "age_v2", "confidence": 0.90} + } + + result = db._buildQueryConstraints("Person", **constraints_medium) + assert "age" not in result, "Confidence 0.90 should fail custom threshold 0.95" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_similarity_metric_affects_normalization(self, mock_vdms_class): + """Verify similarity_metric parameter properly controls normalization in addEntry.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + vec = np.zeros(256, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + # Test IP metric (should normalize) + db_ip = VDMSDatabase(similarity_metric="IP") + db_ip.sendQuery = Mock(return_value=([{'status': 0}], [])) + db_ip.addEntry("uuid-ip", "rvid", "Person", [vec]) + + blob_ip = db_ip.sendQuery.call_args[0][1] + normalized_ip = np.frombuffer(blob_ip[0], dtype=np.float32) + assert np.isclose(np.linalg.norm(normalized_ip), 1.0), "IP metric should normalize" + + # Test L2 metric (should NOT normalize) + db_l2 = VDMSDatabase(similarity_metric="L2") + db_l2.sendQuery = Mock(return_value=([{'status': 0}], [])) + db_l2.addEntry("uuid-l2", "rvid", "Person", [vec]) + + blob_l2 = db_l2.sendQuery.call_args[0][1] + stored_l2 = np.frombuffer(blob_l2[0], dtype=np.float32) + assert np.isclose(np.linalg.norm(stored_l2), 5.0), "L2 metric should preserve raw vectors" + + class TestMetadataStorageQueryConsistency: """Test that stored metadata matches what is queried (no storage/query mismatch).""" @@ -817,3 +1214,298 @@ def test_metadata_consistency_multiple_types(self, mock_vdms_class): f"{attr_name}: Expected constraint '{expected_stored}' but got '{queried_value}'" assert stored_value == queried_value, \ f"{attr_name}: Storage/Query mismatch - stored='{stored_value}' vs queried='{queried_value}'" + + +class TestDimensionInferenceAndArbitraryDimensions: + """Test that ReID works with arbitrary dimensions, not just 256.""" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_add_entry_with_128_dimensions(self, mock_vdms_class): + """Verify addEntry works with 128-dimension vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=128) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + test_vectors = [np.random.randn(128).astype(np.float32)] + db.addEntry("uuid", "rvid", "Person", test_vectors) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + assert len(blob) == 1 + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (128,), f"Expected (128,) but got {stored.shape}" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_add_entry_with_512_dimensions(self, mock_vdms_class): + """Verify addEntry works with 512-dimension vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=512) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + test_vectors = [np.random.randn(512).astype(np.float32)] + db.addEntry("uuid", "rvid", "Person", test_vectors) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (512,), f"Expected (512,) but got {stored.shape}" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_add_entry_with_1024_dimensions(self, mock_vdms_class): + """Verify addEntry works with 1024-dimension vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=1024) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + test_vectors = [np.random.randn(1024).astype(np.float32)] + db.addEntry("uuid", "rvid", "Person", test_vectors) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (1024,), f"Expected (1024,) but got {stored.shape}" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_dimension_inference_none_with_128_vectors(self, mock_vdms_class): + """Verify dimension inference works when dimensions=None and vectors are 128D.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=None) + vec = np.random.randn(128).astype(np.float32) + + prepared = db.prepareReidDict(vec, dimensions=None) + + assert prepared is not None + assert prepared['dimensions'] == 128 + assert prepared['embedded_vector'].shape == (128,) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_dimension_inference_none_with_512_vectors(self, mock_vdms_class): + """Verify dimension inference works when dimensions=None and vectors are 512D.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=None) + vec = np.random.randn(512).astype(np.float32) + + prepared = db.prepareReidDict(vec, dimensions=None) + + assert prepared is not None + assert prepared['dimensions'] == 512 + assert prepared['embedded_vector'].shape == (512,) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_with_128_dimensions(self, mock_vdms_class): + """Verify findMatches works correctly with 128-dimension vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=128) + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'entities': [{'uuid': 'match-1', '_distance': 0.95}] + }], [])) + + test_vectors = [np.random.randn(128).astype(np.float32)] + result = db.findMatches("Person", test_vectors) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + query_vec = np.frombuffer(blob[0], dtype=np.float32) + assert query_vec.shape == (128,), f"Expected query vector (128,) but got {query_vec.shape}" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_matches_with_512_dimensions(self, mock_vdms_class): + """Verify findMatches works correctly with 512-dimension vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=512) + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'entities': [{'uuid': 'match-1', '_distance': 0.95}] + }], [])) + + test_vectors = [np.random.randn(512).astype(np.float32)] + result = db.findMatches("Person", test_vectors) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + query_vec = np.frombuffer(blob[0], dtype=np.float32) + assert query_vec.shape == (512,), f"Expected query vector (512,) but got {query_vec.shape}" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_dimension_mismatch_rejected_for_128(self, mock_vdms_class): + """Verify dimension mismatch is rejected for 128-dimension adapter.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=128) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + # Try to add 256-dimension vector to 128-dimension adapter + wrong_vec = np.random.randn(256).astype(np.float32) + db.addEntry("uuid", "rvid", "Person", [wrong_vec]) + + # Should not have sent query (vector was rejected) + db.sendQuery.assert_not_called() + + @patch('controller.vdms_adapter.vdms.vdms') + def test_dimension_mismatch_rejected_for_512(self, mock_vdms_class): + """Verify dimension mismatch is rejected for 512-dimension adapter.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=512) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + # Try to add 256-dimension vector to 512-dimension adapter + wrong_vec = np.random.randn(256).astype(np.float32) + db.addEntry("uuid", "rvid", "Person", [wrong_vec]) + + # Should not have sent query (vector was rejected) + db.sendQuery.assert_not_called() + + @patch('controller.vdms_adapter.vdms.vdms') + def test_normalization_works_for_non_256_dimensions(self, mock_vdms_class): + """Verify IP normalization works correctly for non-256 dimensions.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=128, similarity_metric="IP") + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + vec = np.zeros(128, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + db.addEntry("uuid", "rvid", "Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + normalized = np.frombuffer(blob[0], dtype=np.float32) + + assert np.isclose(np.linalg.norm(normalized), 1.0), "Should normalize to unit norm" + assert np.isclose(normalized[0], 0.6) + assert np.isclose(normalized[1], 0.8) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_l2_preservation_for_non_256_dimensions(self, mock_vdms_class): + """Verify L2 metric preserves raw vectors for non-256 dimensions.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=512, similarity_metric="L2") + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + vec = np.zeros(512, dtype=np.float32) + vec[0] = 3.0 + vec[1] = 4.0 + + db.addEntry("uuid", "rvid", "Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + + assert np.isclose(stored[0], 3.0) + assert np.isclose(stored[1], 4.0) + assert np.isclose(np.linalg.norm(stored), 5.0) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_row_vector_shape_inference_128(self, mock_vdms_class): + """Verify (1, 128) row vector is properly flattened and inferred.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=128) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + row_vector = np.random.randn(1, 128).astype(np.float32) + db.addEntry("uuid", "rvid", "Person", [row_vector]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (128,) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_row_vector_shape_inference_512(self, mock_vdms_class): + """Verify (1, 512) row vector is properly flattened and inferred.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=512) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + row_vector = np.random.randn(1, 512).astype(np.float32) + db.addEntry("uuid", "rvid", "Person", [row_vector]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (512,) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_mixed_dimension_vectors_rejected(self, mock_vdms_class): + """Verify mixed-dimension vectors are rejected in same addEntry call.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=256) + db.sendQuery = Mock(return_value=([{'status': 0}, {'status': 0}], [])) + + vectors = [ + np.random.randn(256).astype(np.float32), # Correct dimension + np.random.randn(512).astype(np.float32) # Wrong dimension (should be skipped) + ] + + db.addEntry("uuid", "rvid", "Person", vectors) + + call_args = db.sendQuery.call_args + query_list = call_args[0][0] + # Should only have one AddDescriptor query (the 512D vector was skipped) + assert len(query_list) == 1, "Expected 1 query for valid vector, got {}".format(len(query_list)) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_arbitrary_dimension_64(self, mock_vdms_class): + """Verify adapter works with small (64-dimension) vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=64) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + vec = np.random.randn(64).astype(np.float32) + db.addEntry("uuid", "rvid", "Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (64,) + + @patch('controller.vdms_adapter.vdms.vdms') + def test_arbitrary_dimension_2048(self, mock_vdms_class): + """Verify adapter works with large (2048-dimension) vectors.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase(dimensions=2048) + db.sendQuery = Mock(return_value=([{'status': 0}], [])) + + vec = np.random.randn(2048).astype(np.float32) + db.addEntry("uuid", "rvid", "Person", [vec]) + + call_args = db.sendQuery.call_args + blob = call_args[0][1] + stored = np.frombuffer(blob[0], dtype=np.float32) + assert stored.shape == (2048,)