diff --git a/controller/src/controller/detections_builder.py b/controller/src/controller/detections_builder.py index e45055b7e..03acdfa22 100644 --- a/controller/src/controller/detections_builder.py +++ b/controller/src/controller/detections_builder.py @@ -88,17 +88,18 @@ def prepareObjDict(scene, obj, update_visibility, include_sensors=False, if key != 'reid': obj_dict['metadata'][key] = value - # Output reid in metadata structure + # Output reid in metadata structure. + # embedding_vector is always a (1, N) ndarray after decodeReIDEmbeddingVector. if aobj.reid and 'embedding_vector' in aobj.reid: reid_embedding = aobj.reid['embedding_vector'] if reid_embedding is not None: if 'metadata' not in obj_dict: obj_dict['metadata'] = {} - if isinstance(reid_embedding, np.ndarray): - obj_dict['metadata']['reid'] = {'embedding_vector': reid_embedding.tolist()} - else: - obj_dict['metadata']['reid'] = {'embedding_vector': reid_embedding} - # Add model_name if available + reid_vec = np.asarray(reid_embedding, dtype=np.float32) + obj_dict['metadata']['reid'] = { + 'embedding_vector': reid_vec.tolist(), + 'embedding_dimensions': int(reid_vec.reshape(-1).shape[0]), + } if 'model_name' in aobj.reid: obj_dict['metadata']['reid']['model_name'] = aobj.reid['model_name'] diff --git a/controller/src/controller/moving_object.py b/controller/src/controller/moving_object.py index 727d626da..157d5444a 100644 --- a/controller/src/controller/moving_object.py +++ b/controller/src/controller/moving_object.py @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import base64 +import binascii import datetime -import struct import warnings from dataclasses import dataclass, field from threading import Lock @@ -26,6 +26,89 @@ DEFAULT_TRACKING_RADIUS = 2.0 LOCATION_LIMIT = 20 SPEED_THRESHOLD = 0.1 +REID_FLOAT_SIZE_BYTES = np.dtype(np.float32).itemsize +REID_EMBEDDING_DIMENSIONS_KEY = 'embedding_dimensions' + + +def _getReIDEmbeddingDimensions(reid): + if not isinstance(reid, dict): + return None + + for key in (REID_EMBEDDING_DIMENSIONS_KEY, 'dimensions'): + value = reid.get(key) + if value is None: + continue + try: + return int(value) + except (TypeError, ValueError) as err: + raise ValueError(f"Invalid ReID embedding dimensions: {value}") from err + + return None + + +def decodeReIDEmbeddingVector(embedding_data, dimensions=None): + if isinstance(embedding_data, str): + vector = base64.b64decode(embedding_data, validate=True) + if len(vector) % REID_FLOAT_SIZE_BYTES != 0: + raise ValueError( + f"Packed ReID vector size {len(vector)} is not divisible by {REID_FLOAT_SIZE_BYTES}") + + inferred_dimensions = len(vector) // REID_FLOAT_SIZE_BYTES + if dimensions is None: + dimensions = inferred_dimensions + elif int(dimensions) != inferred_dimensions: + raise ValueError( + f"Packed ReID vector contains {inferred_dimensions} floats, expected {dimensions}") + + return np.frombuffer(vector, dtype=np.float32).copy().reshape(1, dimensions) + + if isinstance(embedding_data, (np.ndarray, list)): + arr = np.asarray(embedding_data, dtype=np.float32).reshape(-1) + actual_length = arr.shape[0] + if dimensions is not None and int(dimensions) != actual_length: + raise ValueError( + f"ReID embedding vector has {actual_length} elements, expected {int(dimensions)}") + return arr.reshape(1, actual_length) + + return None + + +def serializeReIDPayload(reid): + if reid is None: + return None + + if isinstance(reid, dict): + serialized = dict(reid) + embedding_data = serialized.get('embedding_vector', None) + if embedding_data is None: + return serialized + + if isinstance(embedding_data, str): + try: + if REID_EMBEDDING_DIMENSIONS_KEY not in serialized and 'dimensions' not in serialized: + vector = base64.b64decode(embedding_data) + if len(vector) % REID_FLOAT_SIZE_BYTES != 0: + raise ValueError( + f"Packed ReID vector size {len(vector)} is not divisible by {REID_FLOAT_SIZE_BYTES}") + serialized[REID_EMBEDDING_DIMENSIONS_KEY] = len(vector) // REID_FLOAT_SIZE_BYTES + except (binascii.Error, TypeError, ValueError) as err: + log.warning(f"Failed to decode ReID embedding vector: {err}. Setting embedding_vector to None.") + serialized['embedding_vector'] = None + return serialized + + flat_vector = np.asarray(embedding_data, dtype=np.float32).reshape(-1) + serialized['embedding_vector'] = base64.b64encode(flat_vector.tobytes()).decode('utf-8') + serialized[REID_EMBEDDING_DIMENSIONS_KEY] = int(flat_vector.size) + return serialized + + if isinstance(reid, np.ndarray) or isinstance(reid, list): + flat_vector = np.asarray(reid, dtype=np.float32).reshape(-1) + return { + 'embedding_vector': base64.b64encode(flat_vector.tobytes()).decode('utf-8'), + REID_EMBEDDING_DIMENSIONS_KEY: int(flat_vector.size), + } + + return reid @dataclass class ChainData: @@ -134,29 +217,23 @@ def _decodeReIDVector(self, reid): @param reid The reid data in one of the supported formats """ try: + self.reid = {} + # Handle new format: dict with embedding_vector and model_name if isinstance(reid, dict) and 'embedding_vector' in reid: embedding_data = reid['embedding_vector'] - if 'model_name' in reid: - self.reid['model_name'] = reid['model_name'] + self.reid.update({k: v for k, v in reid.items() if k != 'embedding_vector'}) + embedding_dimensions = _getReIDEmbeddingDimensions(reid) else: embedding_data = reid + embedding_dimensions = None # Process the embedding data - if isinstance(embedding_data, str): - # Base64-encoded string format - vector = base64.b64decode(embedding_data) - self.reid['embedding_vector'] = np.array(struct.unpack("256f", vector)).reshape(1, -1) - elif isinstance(embedding_data, list): - # Direct list format - self.reid['embedding_vector'] = embedding_data - else: - # Unknown format, leave as None - self.reid['embedding_vector'] = None + self.reid['embedding_vector'] = decodeReIDEmbeddingVector(embedding_data, embedding_dimensions) # Clean up info dict self.info.pop('reid', None) - except (TypeError, ValueError) as e: + except (TypeError, ValueError, binascii.Error): self.reid['embedding_vector'] = None return @@ -364,7 +441,7 @@ def dump(self): 'bounding_box': self.boundingBox.asDict, 'gid': self.gid, 'frame_count': self.frameCount, - 'reid': self.reid, + 'reid': serializeReIDPayload(self.reid), 'first_seen': self.first_seen, 'location': [{'point': (v.point.x, v.point.y, v.point.z), 'timestamp': v.when, @@ -375,11 +452,6 @@ def dump(self): 'intersected': self.intersected, 'scene_loc': self.sceneLoc.asNumpyCartesian.tolist(), } - if 'reid' in dd and isinstance(dd['reid'], np.ndarray): - vector = dd['reid'].flatten().tolist() - vector = struct.pack("256f", *vector) - vector = base64.b64encode(vector).decode('utf-8') - dd['reid'] = vector if self.intersected: dd['adjusted'] = {'gid': self.adjusted[0], 'point': (self.adjusted[1].x, self.adjusted[1].y, self.adjusted[1].z)} @@ -391,9 +463,8 @@ def load(self, info, scene): self.gid = info['gid'] self.frameCount = info['frame_count'] self.reid = info['reid'] - if self.reid is not None and 'embedding_vector' in self.reid: - vector = base64.b64decode(self.reid['embedding_vector']) - self.reid['embedding_vector'] = np.array(struct.unpack("256f", vector)).reshape(1, -1) + if self.reid is not None: + self._decodeReIDVector(self.reid) self.first_seen = info['first_seen'] self.location = [Chronoloc(Point(v['point']), v['timestamp'], Rectangle(v['bounding_box'])) for v in info['location']] diff --git a/controller/src/controller/scene.py b/controller/src/controller/scene.py index 186b06fb0..32c64f888 100644 --- a/controller/src/controller/scene.py +++ b/controller/src/controller/scene.py @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: (C) 2025 - 2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import itertools from types import SimpleNamespace from typing import Optional import numpy as np diff --git a/controller/src/controller/time_chunking.py b/controller/src/controller/time_chunking.py index b3e0a5a94..a60a4a25b 100644 --- a/controller/src/controller/time_chunking.py +++ b/controller/src/controller/time_chunking.py @@ -26,7 +26,6 @@ """ import threading -import time from typing import Any, List from scene_common import log diff --git a/controller/src/controller/uuid_manager.py b/controller/src/controller/uuid_manager.py index 37adf258c..7fd0c21d6 100644 --- a/controller/src/controller/uuid_manager.py +++ b/controller/src/controller/uuid_manager.py @@ -3,10 +3,10 @@ import collections import concurrent.futures -import json import threading import time -from unittest import result + +import numpy as np from controller.vdms_adapter import VDMSDatabase from scene_common import log @@ -35,15 +35,18 @@ def __init__(self, database=DEFAULT_DATABASE, reid_config_data=None): self.features_for_database_timestamps = {} # Track when features were added self.quality_features = {} self.unique_id_count = 0 - self.reid_database = available_databases[database]() + # ReID embedding dimensions are inferred from the first observed embedding. + if reid_config_data is None: + reid_config_data = {} + self._inferred_dimensions = None + self._dimensions_lock = threading.Lock() + self.reid_database = available_databases[database](dimensions=None) self.pool = concurrent.futures.ThreadPoolExecutor() self.similarity_query_times = collections.deque( maxlen=DEFAULT_MAX_SIMILARITY_QUERIES_TRACKED) self.similarity_query_times_lock = threading.Lock() self.reid_enabled = True # Extract stale feature timeout from reid config, use default if not provided - if reid_config_data is None: - reid_config_data = {} self.stale_feature_timeout_secs = reid_config_data.get('stale_feature_timeout_secs', DEFAULT_STALE_FEATURE_TIMEOUT_SECS) self.stale_feature_check_interval_secs = reid_config_data.get('stale_feature_check_interval_secs', DEFAULT_STALE_FEATURE_CHECK_INTERVAL_SECS) self.stale_feature_timer = None @@ -99,16 +102,51 @@ def _flush_stale_features(self): def connectDatabase(self): self.pool.submit(self.reid_database.connect) + def _ensureReIDDimensions(self, embedding): + """ + Infer the ReID embedding dimension from the first observed vector and lazily + initialize the VDMS descriptor set schema with that dimension. + On subsequent calls, validate that the embedding dimension is consistent with + the first observed vector so that mixed-model or mis-configured producers are + caught early rather than producing silent data corruption in the DB. + + @param embedding Decoded ReID embedding (numpy array or list) + @return bool True if the embedding should be used; False if it must be discarded + """ + # Decoded embeddings from decodeReIDEmbeddingVector are (1, N); reshape(-1) + # flattens that to (N,) so we get the true element count regardless of shape. + dim = int(np.asarray(embedding).reshape(-1).shape[0]) + if dim <= 0: + log.warning( + f"_ensureReIDDimensions: Skipping empty or zero-length embedding (dim={dim}); " + "embedding will not be used.") + return False + with self._dimensions_lock: + if self._inferred_dimensions is None: + log.info(f"Inferred ReID embedding dimensions from first observed vector: {dim}") + try: + self.reid_database.ensureSchema(dim) + except (ValueError, RuntimeError) as err: + log.error(f"ReID schema initialization failed: {err}") + return False + self._inferred_dimensions = dim + return True + if dim != self._inferred_dimensions: + log.warning( + f"Discarding ReID embedding with inconsistent dimension {dim}; " + f"expected {self._inferred_dimensions} (inferred from first observed vector). " + f"Restart the controller to switch ReID models.") + return False + return True + def _extractReidEmbedding(self, sscape_object): """ Extract embedding vector from sscape_object's reid field. - Returns the decoded embedding vector (numpy array or list), not the base64 string. - Handles both formats: - 1. New format: dict with 'embedding_vector' and 'model_name' keys - 2. Legacy format: direct vector (list or numpy array) + decodeReIDEmbeddingVector guarantees that embedding_vector is a (1, N) + numpy array after _decodeReIDVector runs, so no string check is needed here. @param sscape_object The Scenescape object with detection data - @return embedding The decoded embedding vector (numpy array or list), or None if not available + @return embedding The decoded (1, N) ndarray, or None if not available """ try: reid = sscape_object.reid @@ -118,17 +156,13 @@ def _extractReidEmbedding(self, sscape_object): if reid is None: return None - # New format: dict with 'embedding_vector' key + # Standard path: dict populated by MovingObject._decodeReIDVector. + # embedding_vector is always an ndarray (1, N) or None at this point. if isinstance(reid, dict): - embedding = reid.get('embedding_vector', None) - # Only return if it's already decoded (numpy array or list, not a string) - if embedding is not None and not isinstance(embedding, str): - return embedding - return None + return reid.get('embedding_vector', None) - # Legacy format: direct vector (list or numpy array) - # Return if it's not a dict and not a string - if not isinstance(reid, str) and not isinstance(reid, dict): + # Safety net for callers that set reid directly to an ndarray or list. + if isinstance(reid, (np.ndarray, list)): return reid return None @@ -234,6 +268,8 @@ def gatherQualityVisualFeatures(self, sscape_object, reid_embedding = self._extractReidEmbedding(sscape_object) if reid_embedding is not None and self.reid_enabled: + if not self._ensureReIDDimensions(reid_embedding): + return bbox_area = sscape_object.boundingBoxPixels.area if hasattr(sscape_object, 'boundingBoxPixels') else 0 if bbox_area > minimum_bbox_area: if sscape_object.rv_id in self.quality_features: @@ -263,7 +299,7 @@ def pickBestID(self, sscape_object): sscape_object.similarity = result[1] reid_embedding = self._extractReidEmbedding(sscape_object) - if reid_embedding is not None: + if reid_embedding is not None and self._ensureReIDDimensions(reid_embedding): if sscape_object.rv_id in self.features_for_database: self.features_for_database[sscape_object.rv_id]['reid_vectors'].append( reid_embedding) diff --git a/controller/src/controller/vdms_adapter.py b/controller/src/controller/vdms_adapter.py index bdfd0daff..cf77005e1 100644 --- a/controller/src/controller/vdms_adapter.py +++ b/controller/src/controller/vdms_adapter.py @@ -34,6 +34,8 @@ def __init__(self, set_name=SCHEMA_NAME, self.dimensions = dimensions self.confidence_threshold = confidence_threshold self.lock = threading.Lock() + self._schema_lock = threading.Lock() + self._schema_ready = False return def sendQuery(self, query, blob=None): @@ -74,8 +76,28 @@ def sendQuery(self, query, blob=None): def connect(self, hostname=DEFAULT_HOSTNAME): try: self.db.connect(hostname) - if not self.findSchema(self.set_name): - self.addSchema(self.set_name, self.similarity_metric, self.dimensions) + if self.dimensions is not None: + expected_dimensions = int(self.dimensions) + with self._schema_lock: + schema_exists, schema_dimensions = self.findSchemaDetails(self.set_name) + if schema_exists: + if schema_dimensions is None: + raise RuntimeError( + f"connect: VDMS descriptor set '{self.set_name}' exists but returned no dimensions. " + "Refusing to proceed; recreate the descriptor set to continue.") + if schema_dimensions != expected_dimensions: + raise RuntimeError( + f"connect: VDMS descriptor set '{self.set_name}' uses {schema_dimensions} dimensions, " + f"but controller is configured for {expected_dimensions}. " + "Refusing to proceed; recreate the descriptor set with matching dimensions.") + else: + if not self.addSchema(self.set_name, self.similarity_metric, expected_dimensions): + log.warning("connect: Schema creation failed; _schema_ready left False") + return + self.dimensions = expected_dimensions + self._schema_ready = True + except RuntimeError as e: + log.error(f"Failed to initialize VDMS schema: {e}") except socket.error as e: log.warning(f"Failed to connect to VDMS container: {e}") return @@ -89,10 +111,55 @@ def addSchema(self, set_name, similarity_metric, dimensions): } }] response, _ = self.sendQuery(query) - if response and response[0].get('status') != 0: + if not response: + log.warning("addSchema: No response from VDMS when creating descriptor set") + return False + if response[0].get('status') != 0: log.warning( f"Failed to add the descriptor set to the database. Received response {response[0]}") - return + return False + return True + + def ensureSchema(self, dimensions): + """ + Initialize the VDMS descriptor set schema with the given dimensions. + Called lazily when the first ReID embedding is observed, removing the need + to pre-configure vector_dimensions before runtime. + Thread-safe; idempotent when called with consistent dimensions. + + @param dimensions Number of float32 elements in each embedding vector + @raises ValueError If called with dimensions inconsistent with a previously + initialized schema + @raises RuntimeError If schema creation fails; schema remains unavailable until + schema is flushed and the controller is restarted + """ + with self._schema_lock: + requested_dimensions = int(dimensions) + if self._schema_ready: + if int(self.dimensions) != requested_dimensions: + raise ValueError( + f"ReID schema already initialized with {self.dimensions} dimensions; " + f"incoming vector has {requested_dimensions} dimensions. " + f"Restart the controller and flush the VDMS descriptor set to change dimensions.") + return + schema_exists, schema_dimensions = self.findSchemaDetails(self.set_name) + if schema_exists: + if schema_dimensions is None: + raise RuntimeError( + f"ensureSchema: VDMS descriptor set '{self.set_name}' exists but dimensions were not returned. " + "Refusing to proceed; recreate the descriptor set to continue.") + if schema_dimensions != requested_dimensions: + raise RuntimeError( + f"ensureSchema: VDMS descriptor set '{self.set_name}' uses {schema_dimensions} dimensions, " + f"but incoming vectors use {requested_dimensions}. " + "Refusing to proceed; recreate the descriptor set with matching dimensions.") + else: + if not self.addSchema(self.set_name, self.similarity_metric, requested_dimensions): + raise RuntimeError( + f"ensureSchema: Failed to create VDMS descriptor set '{self.set_name}'; " + "schema not confirmed. ReID writes will be skipped until schema is available.") + self.dimensions = requested_dimensions + self._schema_ready = True def addEntry(self, uuid, rvid, object_type, reid_vectors, set_name=SCHEMA_NAME, **metadata): """ @@ -144,11 +211,14 @@ def addEntry(self, uuid, rvid, object_type, reid_vectors, set_name=SCHEMA_NAME, descriptor_blobs = [] add_query = [] for reid_vector in reid_vectors: - # Ensure vector is properly formatted as 1D array of float32 - # reid_vector might be shape (1, 256) from moving_object, need to flatten to (256,) + # Decoded embeddings from decodeReIDEmbeddingVector are (1, N); flatten to + # (N,) so tobytes() produces the correct contiguous float32 byte sequence. vec_array = np.asarray(reid_vector, dtype="float32").flatten() - if vec_array.shape[0] != 256: - log.warning(f"addEntry: Expected vector shape (256,) but got {vec_array.shape}, skipping this vector") + if self.dimensions is None: + log.warning("addEntry: ReID dimensions not yet initialized, skipping vector") + continue + if vec_array.shape[0] != self.dimensions: + log.warning(f"addEntry: Expected vector shape ({self.dimensions},) but got {vec_array.shape}, skipping this vector") continue descriptor_blobs.append(vec_array.tobytes()) # Create query dict for each vector @@ -159,6 +229,10 @@ def addEntry(self, uuid, rvid, object_type, reid_vectors, set_name=SCHEMA_NAME, } }) + if not add_query: + log.warning("addEntry: No valid vectors to add (all skipped due to dimension mismatch or uninitialized dimensions)") + return + response, _ = self.sendQuery(add_query, descriptor_blobs) # Flat list of blobs if response: success_count = 0 @@ -173,15 +247,46 @@ def addEntry(self, uuid, rvid, object_type, reid_vectors, set_name=SCHEMA_NAME, return def findSchema(self, set_name): + schema_exists, _ = self.findSchemaDetails(set_name) + return schema_exists + + def findSchemaDetails(self, set_name): query = [{ "FindDescriptorSet": { "set": f"{set_name}" } }] response, _ = self.sendQuery(query) - if response and response[0].get('status') == 0 and response[0].get('returned') > 0: - return True - return False + if not response: + return False, None + first_response = response[0] + if first_response.get('status') != 0 or first_response.get('returned', 0) <= 0: + return False, None + + schema_dimensions = self._extractSchemaDimensions(first_response) + return True, schema_dimensions + + def _extractSchemaDimensions(self, find_descriptor_set_response): + # VDMS responses may return descriptor set fields at the top level or nested under + # common payload keys like "entities" or "content". + payloads = [find_descriptor_set_response] + for key in ['entities', 'entity', 'content', 'results', 'DescriptorSet']: + value = find_descriptor_set_response.get(key) + if isinstance(value, dict): + payloads.append(value) + elif isinstance(value, list): + payloads.extend(item for item in value if isinstance(item, dict)) + + for payload in payloads: + for key in ['dimensions', 'dimension']: + if key in payload: + try: + return int(payload[key]) + except (TypeError, ValueError): + log.warning( + f"findSchemaDetails: Could not parse descriptor dimensions from key '{key}' value '{payload[key]}'") + return None + return None def _build_query_constraints(self, object_type, **constraints): """ diff --git a/controller/src/schema/metadata.schema.json b/controller/src/schema/metadata.schema.json index e2eab6430..a3d56b5fa 100644 --- a/controller/src/schema/metadata.schema.json +++ b/controller/src/schema/metadata.schema.json @@ -130,6 +130,10 @@ "embedding_vector": { "type": "string" }, + "embedding_dimensions": { + "type": "integer", + "minimum": 32 + }, "model_name": { "type": "string" } diff --git a/controller/tools/tracker/tracker b/controller/tools/tracker/tracker index 5652d0ce9..fdff5c62d 100755 --- a/controller/tools/tracker/tracker +++ b/controller/tools/tracker/tracker @@ -6,7 +6,6 @@ import base64 import orjson import os -import struct from argparse import ArgumentParser import cv2 @@ -57,10 +56,8 @@ def serializeVectors(objects, key): found = findResults(objects, key) for obj in found: if isinstance(obj[key], np.ndarray): - vector = obj[key].flatten().tolist() - vector = struct.pack("256f", *vector) - vector = base64.b64encode(vector).decode('utf-8') - obj[key] = vector + vector = np.asarray(obj[key], dtype=np.float32).reshape(-1) + obj[key] = base64.b64encode(vector.tobytes()).decode('utf-8') return def publishEvents(scene, ts_str): diff --git a/controller/tools/tracker/tracking_debug.py b/controller/tools/tracker/tracking_debug.py index 1cfb533c0..938a74974 100644 --- a/controller/tools/tracker/tracking_debug.py +++ b/controller/tools/tracker/tracking_debug.py @@ -8,6 +8,7 @@ from drawing import * from controller.ilabs_tracking import IntelLabsTracking +from controller.moving_object import decodeReIDEmbeddingVector from controller.scene import Scene from controller.tracking import Tracking from scene_common.geometry import DEFAULTZ, Line, Point @@ -35,8 +36,7 @@ def loadState(self, state, scene): self.reidExpired = [] reidExp = state['reid_expired'] for exp in reidExp: - vector = base64.b64decode(exp['reid']) - vector = np.array(struct.unpack("256f", vector)).reshape(1, -1) + vector = decodeReIDEmbeddingVector(exp['reid'], exp.get('embedding_dimensions')) self.reidExpired.append(Expired(exp['timestamp'], exp['gid'], vector, exp['frame_count'], exp['first_seen'])) diff --git a/dlstreamer-pipeline-server/user_scripts/gvapython/sscape/sscape_policies.py b/dlstreamer-pipeline-server/user_scripts/gvapython/sscape/sscape_policies.py index d1abc9fb6..082335419 100644 --- a/dlstreamer-pipeline-server/user_scripts/gvapython/sscape/sscape_policies.py +++ b/dlstreamer-pipeline-server/user_scripts/gvapython/sscape/sscape_policies.py @@ -37,7 +37,18 @@ def reidPolicy(pobj, item, fw, fh): name = tensor.get('name','') if name and ('reid' in name or 'embedding' in name): reid_vector = tensor.get('data', []) - v = struct.pack("256f",*reid_vector) + # Handle variable-length re-id vectors from different models + if not reid_vector: + continue + vector_len = len(reid_vector) + # Pack vector with its actual dimensions + format_string = f"{vector_len}f" + try: + v = struct.pack(format_string, *reid_vector) + except struct.error as e: + import sys + print(f"Failed to pack reid vector of length {vector_len}: {e}", file=sys.stderr) + continue # Move reid under metadata key if 'metadata' not in pobj: pobj['metadata'] = {} diff --git a/docs/user-guide/microservices/controller/Extended-ReID.md b/docs/user-guide/microservices/controller/Extended-ReID.md index ec9b2d8b3..7c4297f5d 100644 --- a/docs/user-guide/microservices/controller/Extended-ReID.md +++ b/docs/user-guide/microservices/controller/Extended-ReID.md @@ -166,6 +166,14 @@ controller/config/reid-config.json | `feature_slice_size` | int | 10 | When persisting features to VDMS, sample every Nth feature vector from the accumulated set to reduce database bloat. Example: slice_size=10 stores every 10th vector. | | `similarity_threshold` | int | 60 | Minimum similarity score (0-100) for a match to be considered valid. Higher values = stricter matching. | +### Embedding Dimension Inference + +The controller automatically infers the ReID embedding dimension from the first vector it receives at runtime: + +- **Runtime inference only**: On the first decoded embedding the controller reads the vector length from the payload, creates the VDMS descriptor set schema with that dimension, and locks that dimension for the process lifetime. All subsequent embeddings are validated against that inferred length; mismatches are discarded with a warning. +- **Switching ReID models**: Because the dimension is locked after the first embedding, switching to a model with a different output length requires restarting the controller. The VDMS descriptor set must also be recreated if the stored dimension differs (VDMS does not support in-place schema migration). +- **Base64 compatibility**: The controller decodes base64 embeddings using the payload byte length by default. Producers can also include an optional `embedding_dimensions` field alongside `embedding_vector`; if provided, it must match the packed float count. + ### Using the Configuration File Pass the reid-config file path to the Scene Controller: @@ -181,6 +189,7 @@ python scene_controller.py \ **Current Implementation Note**: - `stale_feature_timeout_secs`, `stale_feature_check_interval_secs`, `feature_accumulation_threshold`, `feature_slice_size`, and `similarity_threshold` are fully implemented +- ReID embedding dimensions are inferred at runtime from the first received embedding; there is no configuration override for dimension - All semantic metadata attributes are currently used for TIER 1 filtering. Selective metadata filtering is planned for Phase 2. ### Tuning Recommendations diff --git a/docs/user-guide/microservices/controller/data_formats.md b/docs/user-guide/microservices/controller/data_formats.md index 47f9f34e6..793a8cd74 100644 --- a/docs/user-guide/microservices/controller/data_formats.md +++ b/docs/user-guide/microservices/controller/data_formats.md @@ -106,6 +106,7 @@ omitted; `embedding_vector` truncated for readability): }, "reid": { "embedding_vector": "", + "embedding_dimensions": 256, "model_name": "torch-jit-export" } } @@ -263,7 +264,8 @@ objects of that category. "confidence": 0.904 }, "reid": { - "embedding_vector": "<256-element float array>", + "embedding_vector": "", + "embedding_dimensions": 256, "model_name": "torch-jit-export" } }, @@ -341,7 +343,8 @@ applications. "confidence": 0.963 }, "reid": { - "embedding_vector": "<256-element float array>", + "embedding_vector": "", + "embedding_dimensions": 256, "model_name": "torch-jit-export" } }, diff --git a/tests/sscape_tests/scenescape/test_moving_object.py b/tests/sscape_tests/scenescape/test_moving_object.py index ebfe4df28..6e935b4f1 100644 --- a/tests/sscape_tests/scenescape/test_moving_object.py +++ b/tests/sscape_tests/scenescape/test_moving_object.py @@ -8,7 +8,7 @@ import numpy as np -from controller.moving_object import ChainData, Chronoloc, MovingObject +from controller.moving_object import ChainData, Chronoloc, MovingObject, decodeReIDEmbeddingVector from scene_common.geometry import Point, Rectangle @@ -49,22 +49,51 @@ def test_init_extracts_reid_and_keeps_metadata(self): assert obj.metadata['age'] == 'adult' assert obj.reid['model_name'] == 'reid-v1' - assert obj.reid['embedding_vector'] == [0.1, 0.2] + assert np.allclose(obj.reid['embedding_vector'], np.array([[0.1, 0.2]], dtype=np.float32)) assert 'metadata' not in obj.info - def test_init_decodes_base64_reid_vector(self): + def test_init_decodes_base64_reid_vector_with_runtime_length(self): when = datetime.datetime.now(datetime.timezone.utc) - vector = np.zeros(256, dtype=np.float32) + vector = np.arange(192, dtype=np.float32) encoded = base64.b64encode(vector.tobytes()).decode('utf-8') metadata = { - 'reid': {'embedding_vector': encoded, 'model_name': 'reid-v2'} + 'reid': { + 'embedding_vector': encoded, + 'embedding_dimensions': 192, + 'model_name': 'reid-v2' + } } obj = MovingObject(_base_info(metadata=metadata), when, _camera()) assert obj.reid['model_name'] == 'reid-v2' - assert obj.reid['embedding_vector'].shape == (1, 256) - assert np.allclose(obj.reid['embedding_vector'], 0.0) + assert obj.reid['embedding_dimensions'] == 192 + assert obj.reid['embedding_vector'].shape == (1, 192) + assert np.allclose(obj.reid['embedding_vector'], vector.reshape(1, -1)) + + def test_dump_and_load_round_trip_reid_with_embedding_dimensions(self): + when = datetime.datetime.now(datetime.timezone.utc) + obj = MovingObject(_base_info(metadata={'age': 'adult'}), when, _camera()) + obj.gid = 'gid-1' + obj.reid = { + 'embedding_vector': np.arange(64, dtype=np.float32).reshape(1, -1), + 'model_name': 'reid-v3' + } + obj.location = [Chronoloc(Point(1.0, 2.0, 3.0), when, obj.boundingBox)] + obj.vectors = [SimpleNamespace(camera=_camera(), point=Point(1.0, 2.0, 3.0), last_seen=when)] + + dumped = obj.dump() + + assert dumped['reid']['embedding_dimensions'] == 64 + assert isinstance(dumped['reid']['embedding_vector'], str) + + loaded = MovingObject(_base_info(object_id='obj-2'), when, _camera()) + loaded.load(dumped, SimpleNamespace(cameras={'cam-1': _camera()})) + + assert loaded.reid['model_name'] == 'reid-v3' + assert loaded.reid['embedding_dimensions'] == 64 + assert loaded.reid['embedding_vector'].shape == (1, 64) + assert np.allclose(loaded.reid['embedding_vector'], obj.reid['embedding_vector']) def test_set_persistent_attributes_stores_full_and_partial_values(self): when = datetime.datetime.now(datetime.timezone.utc) @@ -142,3 +171,78 @@ def test_infer_rotation_from_velocity_skips_when_speed_below_threshold(self): mock_rotation_to_target.assert_not_called() assert obj.rotation == original_rotation + + +class TestDecodeReIDEmbeddingVector: + """Tests for decodeReIDEmbeddingVector validation and normalization.""" + + def test_list_without_dimensions_normalizes_to_2d_array(self): + result = decodeReIDEmbeddingVector([0.1, 0.2, 0.3]) + assert isinstance(result, np.ndarray) + assert result.shape == (1, 3) + assert result.dtype == np.float32 + + def test_ndarray_without_dimensions_normalizes_to_2d_array(self): + vec = np.arange(128, dtype=np.float32) + result = decodeReIDEmbeddingVector(vec) + assert result.shape == (1, 128) + + def test_list_with_matching_dimensions_succeeds(self): + vec = list(range(256)) + result = decodeReIDEmbeddingVector(vec, dimensions=256) + assert result.shape == (1, 256) + + def test_ndarray_with_matching_dimensions_succeeds(self): + vec = np.zeros(256, dtype=np.float32) + result = decodeReIDEmbeddingVector(vec, dimensions=256) + assert result.shape == (1, 256) + + def test_list_with_mismatched_dimensions_raises(self): + import pytest + with pytest.raises(ValueError, match="128 elements, expected 256"): + decodeReIDEmbeddingVector(list(range(128)), dimensions=256) + + def test_ndarray_with_mismatched_dimensions_raises(self): + import pytest + vec = np.zeros(64, dtype=np.float32) + with pytest.raises(ValueError, match="64 elements, expected 128"): + decodeReIDEmbeddingVector(vec, dimensions=128) + + def test_2d_ndarray_is_flattened_before_dimension_check(self): + vec = np.ones((4, 64), dtype=np.float32) + # 4*64 = 256 elements; should pass with dimensions=256 + result = decodeReIDEmbeddingVector(vec, dimensions=256) + assert result.shape == (1, 256) + + def test_2d_ndarray_mismatch_after_flatten_raises(self): + import pytest + vec = np.ones((2, 64), dtype=np.float32) # 128 elements + with pytest.raises(ValueError, match="128 elements, expected 256"): + decodeReIDEmbeddingVector(vec, dimensions=256) + + def test_invalid_base64_string_raises(self): + import pytest, binascii + # Characters outside the base64 alphabet (e.g. '!') are rejected when validate=True + with pytest.raises(binascii.Error): + decodeReIDEmbeddingVector("not!valid==base64") + + def test_invalid_base64_propagates_through_decode_reid_vector(self): + """Verify _decodeReIDVector catches binascii.Error from invalid base64 and sets embedding_vector to None.""" + import datetime + when = datetime.datetime.now(datetime.timezone.utc) + from types import SimpleNamespace + camera = SimpleNamespace(cameraID='cam-1') + info = { + 'id': 'obj-1', + 'category': 'person', + 'confidence': 0.9, + 'bounding_box': {'x': 0.1, 'y': 0.2, 'width': 0.3, 'height': 0.4}, + 'metadata': { + 'reid': { + 'embedding_vector': 'not!valid==base64', + 'model_name': 'test-model', + } + } + } + obj = MovingObject(info, when, camera) + assert obj.reid.get('embedding_vector') is None diff --git a/tests/sscape_tests/uuid_manager/test_uuid_manager.py b/tests/sscape_tests/uuid_manager/test_uuid_manager.py index 3abca2f22..3a83b60cc 100644 --- a/tests/sscape_tests/uuid_manager/test_uuid_manager.py +++ b/tests/sscape_tests/uuid_manager/test_uuid_manager.py @@ -15,14 +15,23 @@ from controller.uuid_manager import UUIDManager +@pytest.fixture(autouse=True) +def mock_vdms_db(): + """Patch UUIDManager database mapping so all tests use a fake VDMS backend.""" + mock_vdms_db = MagicMock() + + def fake_constructor(**kwargs): + return mock_vdms_db + + with patch.dict(UUIDManager.__init__.__globals__['available_databases'], {'VDMS': fake_constructor}): + yield mock_vdms_db + + class TestUUIDManagerInitialization: """Test UUIDManager initialization and basic setup.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_initialization_with_default_database(self, mock_vdms_class): + def test_initialization_with_default_database(self, mock_vdms_db): """Verify UUIDManager initializes with default VDMS database.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -32,33 +41,24 @@ def test_initialization_with_default_database(self, mock_vdms_class): assert manager.unique_id_count == 0 assert manager.reid_enabled is True - @patch('controller.uuid_manager.VDMSDatabase') - def test_initialization_with_custom_database(self, mock_vdms_class): + def test_initialization_with_custom_database(self, mock_vdms_db): """Verify UUIDManager can be initialized with custom database.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager(database="VDMS") assert manager is not None assert manager.reid_database is not None - @patch('controller.uuid_manager.VDMSDatabase') - def test_has_thread_pool_for_async_operations(self, mock_vdms_class): + def test_has_thread_pool_for_async_operations(self, mock_vdms_db): """Verify UUIDManager has thread pool for asynchronous database operations.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() assert hasattr(manager, 'pool'), "Should have thread pool" assert manager.pool is not None - @patch('controller.uuid_manager.VDMSDatabase') - def test_active_ids_tracking_initialized(self, mock_vdms_class): + def test_active_ids_tracking_initialized(self, mock_vdms_db): """Verify active_ids dictionary is initialized for tracking.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -70,11 +70,8 @@ def test_active_ids_tracking_initialized(self, mock_vdms_class): class TestExtractReidEmbedding: """Test Re-ID embedding extraction from detection objects.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_reid_from_new_format(self, mock_vdms_class): + def test_extract_reid_from_new_format(self, mock_vdms_db): """Verify extraction from new format: dict with 'embedding_vector' key.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -90,11 +87,8 @@ def test_extract_reid_from_new_format(self, mock_vdms_class): assert embedding is not None, "Should extract embedding from new format" assert len(embedding) == 4, "Embedding should have correct length" - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_reid_from_legacy_format(self, mock_vdms_class): + def test_extract_reid_from_legacy_format(self, mock_vdms_db): """Verify extraction from legacy format: direct vector.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -107,11 +101,8 @@ def test_extract_reid_from_legacy_format(self, mock_vdms_class): assert embedding is not None, "Should extract embedding from legacy format" assert len(embedding) == 4, "Embedding should have correct length" - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_reid_returns_none_when_missing(self, mock_vdms_class): + def test_extract_reid_returns_none_when_missing(self, mock_vdms_db): """Verify None is returned when reid field is missing.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -122,11 +113,8 @@ def test_extract_reid_returns_none_when_missing(self, mock_vdms_class): assert embedding is None, "Should return None when reid is missing" - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_reid_returns_none_when_none_value(self, mock_vdms_class): + def test_extract_reid_returns_none_when_none_value(self, mock_vdms_db): """Verify None is returned when reid value is None.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -142,11 +130,8 @@ def test_extract_reid_returns_none_when_none_value(self, mock_vdms_class): class TestExtractSemanticMetadata: """Test semantic metadata extraction from detection objects.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_semantic_metadata_new_format(self, mock_vdms_class): + def test_extract_semantic_metadata_new_format(self, mock_vdms_db): """Verify extraction from new metadata format: metadata attribute.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -171,11 +156,8 @@ def test_extract_semantic_metadata_new_format(self, mock_vdms_class): # Generic properties should not be in metadata assert "category" not in metadata, "Should not include generic properties" - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_semantic_metadata_skips_generic_properties(self, mock_vdms_class): + def test_extract_semantic_metadata_skips_generic_properties(self, mock_vdms_db): """Verify generic properties are excluded from metadata extraction.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -199,11 +181,8 @@ def test_extract_semantic_metadata_skips_generic_properties(self, mock_vdms_clas assert "custom_attribute" in metadata assert metadata["custom_attribute"] == {"label": "test", "model_name": "test_model", "confidence": 0.9} - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_semantic_metadata_skips_internal_fields(self, mock_vdms_class): + def test_extract_semantic_metadata_skips_internal_fields(self, mock_vdms_db): """Verify only metadata attribute is extracted, not internal fields.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -224,11 +203,8 @@ def test_extract_semantic_metadata_skips_internal_fields(self, mock_vdms_class): # Metadata contents should be extracted assert "public_attribute" in metadata - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_semantic_metadata_handles_none_values(self, mock_vdms_class): + def test_extract_semantic_metadata_handles_none_values(self, mock_vdms_db): """Verify None metadata is handled gracefully.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -241,11 +217,8 @@ def test_extract_semantic_metadata_handles_none_values(self, mock_vdms_class): # Should return empty dict when metadata is None assert metadata == {} - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_semantic_metadata_preserves_value_types(self, mock_vdms_class): + def test_extract_semantic_metadata_preserves_value_types(self, mock_vdms_db): """Verify extracted metadata preserves data types.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -266,11 +239,8 @@ def test_extract_semantic_metadata_preserves_value_types(self, mock_vdms_class): assert metadata["float_attr"] == {"label": 3.14, "model_name": "model", "confidence": 0.9} assert metadata["bool_attr"] == {"label": True, "model_name": "model", "confidence": 0.9} - @patch('controller.uuid_manager.VDMSDatabase') - def test_extract_semantic_metadata_handles_legacy_format(self, mock_vdms_class): + def test_extract_semantic_metadata_handles_legacy_format(self, mock_vdms_db): """Verify no metadata attribute returns empty dict (legacy objects).""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -291,11 +261,8 @@ def __init__(self): class TestIsNewTrackerID: """Test checking if tracker ID is new.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_is_new_tracker_id_when_not_seen_before(self, mock_vdms_class): + def test_is_new_tracker_id_when_not_seen_before(self, mock_vdms_db): """Verify isNewTrackerID returns True for unseen tracker IDs.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -307,11 +274,8 @@ def test_is_new_tracker_id_when_not_seen_before(self, mock_vdms_class): assert result is True, "Should return True for new tracker ID" - @patch('controller.uuid_manager.VDMSDatabase') - def test_is_new_tracker_id_when_seen_before(self, mock_vdms_class): + def test_is_new_tracker_id_when_seen_before(self, mock_vdms_db): """Verify isNewTrackerID returns False for known tracker IDs.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -330,11 +294,8 @@ def test_is_new_tracker_id_when_seen_before(self, mock_vdms_class): class TestAssignID: """Test ID assignment logic.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_increments_counter_when_no_reid(self, mock_vdms_class): + def test_assign_id_increments_counter_when_no_reid(self, mock_vdms_db): """Verify unique_id_count increments when tracker has no reid vector.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() initial_count = manager.unique_id_count @@ -350,11 +311,8 @@ def test_assign_id_increments_counter_when_no_reid(self, mock_vdms_class): assert manager.unique_id_count == initial_count + 1, "Should increment counter when assigning ID to tracker with no reid" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_does_not_increment_counter_when_reid_present(self, mock_vdms_class): + def test_assign_id_does_not_increment_counter_when_reid_present(self, mock_vdms_db): """Verify unique_id_count is not incremented when tracker has reid vector.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() initial_count = manager.unique_id_count @@ -372,11 +330,8 @@ def test_assign_id_does_not_increment_counter_when_reid_present(self, mock_vdms_ assert manager.unique_id_count == initial_count, "Should not increment counter when reid is present" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_initializes_tracking_for_new_tracker(self, mock_vdms_class): + def test_assign_id_initializes_tracking_for_new_tracker(self, mock_vdms_db): """Verify assignID initializes tracking for new tracker IDs.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -392,11 +347,8 @@ def test_assign_id_initializes_tracking_for_new_tracker(self, mock_vdms_class): assert "new_tracker" in manager.active_ids, "Should initialize tracking for new tracker" assert manager.active_ids["new_tracker"] == [None, None], "Should initialize with [None, None]" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_gathers_quality_features_for_new_tracker(self, mock_vdms_class): + def test_assign_id_gathers_quality_features_for_new_tracker(self, mock_vdms_db): """Verify assignID gathers quality visual features for new tracker.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -415,11 +367,8 @@ def test_assign_id_gathers_quality_features_for_new_tracker(self, mock_vdms_clas assert "new_tracker_with_features" in manager.quality_features, "Should gather quality features for new tracker" assert len(manager.quality_features["new_tracker_with_features"]) > 0, "Should have collected at least one feature" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_calls_pick_best_id_always(self, mock_vdms_class): + def test_assign_id_calls_pick_best_id_always(self, mock_vdms_db): """Verify assignID always calls pickBestID.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() # Mock pickBestID to verify it's called @@ -436,11 +385,8 @@ def test_assign_id_calls_pick_best_id_always(self, mock_vdms_class): manager.pickBestID.assert_called_once_with(obj), "Should call pickBestID" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_does_not_submit_query_without_sufficient_features(self, mock_vdms_class): + def test_assign_id_does_not_submit_query_without_sufficient_features(self, mock_vdms_db): """Verify assignID does not submit query if features are insufficient.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() manager.pool = MagicMock() @@ -459,11 +405,8 @@ def test_assign_id_does_not_submit_query_without_sufficient_features(self, mock_ # Only one feature gathered, less than minimum required assert manager.pool.submit.call_count == 0, "Should not submit query without sufficient features" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_submits_query_with_sufficient_features(self, mock_vdms_class): + def test_assign_id_submits_query_with_sufficient_features(self, mock_vdms_db): """Verify assignID submits similarity query when sufficient features are gathered.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() manager.pool = MagicMock() @@ -488,11 +431,8 @@ def test_assign_id_submits_query_with_sufficient_features(self, mock_vdms_class) assert manager.pool.submit.call_count >= 1, "Should submit query with sufficient features" assert "tracker_many_features" in manager.active_query, "Should mark query as submitted" - @patch('controller.uuid_manager.VDMSDatabase') - def test_assign_id_skips_feature_gathering_if_query_already_submitted(self, mock_vdms_class): + def test_assign_id_skips_feature_gathering_if_query_already_submitted(self, mock_vdms_db): """Verify assignID doesn't resubmit queries if one is already in progress.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() manager.pool = MagicMock() @@ -521,11 +461,8 @@ def test_assign_id_skips_feature_gathering_if_query_already_submitted(self, mock class TestConnectDatabase: """Test database connection.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_connect_database_submits_to_pool(self, mock_vdms_class): + def test_connect_database_submits_to_pool(self, mock_vdms_db): """Verify connectDatabase submits connection task to thread pool.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -541,11 +478,8 @@ def test_connect_database_submits_to_pool(self, mock_vdms_class): class TestDataTypes: """Test data type handling and preservation.""" - @patch('controller.uuid_manager.VDMSDatabase') - def test_metadata_with_unicode_strings(self, mock_vdms_class): + def test_metadata_with_unicode_strings(self, mock_vdms_db): """Verify Unicode strings in metadata are preserved.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -561,11 +495,8 @@ def test_metadata_with_unicode_strings(self, mock_vdms_class): assert metadata["emotion"] == {"label": "Happy", "model_name": "emotion-recognition-retail-0003", "confidence": 0.9} assert metadata["clothing_color"] == {"label": "Blue", "model_name": "clothing-attributes-recognition", "confidence": 0.85} - @patch('controller.uuid_manager.VDMSDatabase') - def test_metadata_with_special_characters(self, mock_vdms_class): + def test_metadata_with_special_characters(self, mock_vdms_db): """Verify special characters in metadata are preserved.""" - mock_vdms_instance = MagicMock() - mock_vdms_class.return_value = mock_vdms_instance manager = UUIDManager() @@ -586,3 +517,101 @@ def test_metadata_with_special_characters(self, mock_vdms_class): "model_name": "desc", "confidence": 0.9 } + + +class TestDimensionInference: + """Test automatic ReID embedding dimension inference from first observed vector.""" + + def _make_manager_with_mock_db(self, reid_config_data=None): + """Helper: build a UUIDManager that uses the shared mock VDMS backend fixture.""" + if reid_config_data is None: + reid_config_data = {} + return UUIDManager(database="VDMS", reid_config_data=reid_config_data) + + def test_infer_dimensions_from_first_embedding(self, mock_vdms_db): + """Verify _ensureReIDDimensions infers dimension from first embedding and calls ensureSchema.""" + manager = self._make_manager_with_mock_db() + assert manager._inferred_dimensions is None + + embedding = np.arange(192, dtype=np.float32) + result = manager._ensureReIDDimensions(embedding) + + assert result is True, "Should accept first embedding" + assert manager._inferred_dimensions == 192, "Should lock in inferred dimension" + mock_vdms_db.ensureSchema.assert_called_once_with(192) + + def test_infer_accepts_subsequent_embedding_with_same_dimension(self, mock_vdms_db): + """Verify _ensureReIDDimensions accepts all embeddings matching the inferred dimension.""" + manager = self._make_manager_with_mock_db() + first = np.arange(128, dtype=np.float32) + second = np.ones(128, dtype=np.float32) + + assert manager._ensureReIDDimensions(first) is True + assert manager._ensureReIDDimensions(second) is True + assert manager._inferred_dimensions == 128 + mock_vdms_db.ensureSchema.assert_called_once_with(128) + + def test_reject_embedding_with_inconsistent_dimension(self, mock_vdms_db): + """Verify _ensureReIDDimensions discards embeddings whose length differs from the inferred one.""" + manager = self._make_manager_with_mock_db() + first = np.arange(256, dtype=np.float32) + mismatched = np.arange(128, dtype=np.float32) + + manager._ensureReIDDimensions(first) + result = manager._ensureReIDDimensions(mismatched) + + assert result is False, "Should reject embedding with different dimension" + assert manager._inferred_dimensions == 256, "Locked dimension should remain unchanged" + + def test_ensure_schema_error_causes_false_return(self, mock_vdms_db): + """Verify False is returned and dimension remains unset when ensureSchema raises.""" + mock_vdms_db.ensureSchema.side_effect = ValueError("schema conflict") + manager = UUIDManager(database="VDMS", reid_config_data={}) + + result = manager._ensureReIDDimensions(np.arange(256, dtype=np.float32)) + + assert result is False, "Should return False when ensureSchema raises" + assert manager._inferred_dimensions is None, "Dimension should remain unset after failure" + + def test_zero_length_embedding_is_rejected_and_does_not_lock_dimensions(self, mock_vdms_db): + """Verify empty arrays are rejected early without calling ensureSchema or locking dimensions.""" + manager = self._make_manager_with_mock_db() + + result_empty_array = manager._ensureReIDDimensions(np.array([], dtype=np.float32)) + + assert result_empty_array is False, "Empty ndarray should be rejected" + assert manager._inferred_dimensions is None, "Dimension must not be locked to 0" + mock_vdms_db.ensureSchema.assert_not_called() + + def test_zero_length_embedding_does_not_block_valid_subsequent_embedding(self, mock_vdms_db): + """Verify that after an empty embedding is rejected, a valid embedding is still accepted.""" + manager = self._make_manager_with_mock_db() + + manager._ensureReIDDimensions(np.array([], dtype=np.float32)) + result = manager._ensureReIDDimensions(np.arange(256, dtype=np.float32)) + + assert result is True + assert manager._inferred_dimensions == 256 + mock_vdms_db.ensureSchema.assert_called_once_with(256) + + def test_gather_features_uses_inferred_dimension_gate(self, mock_vdms_db): + """Verify gatherQualityVisualFeatures silently drops embeddings with wrong dimension.""" + manager = self._make_manager_with_mock_db() + + good_obj = MagicMock() + good_obj.rv_id = "track_1" + good_obj.reid = {"embedding_vector": np.arange(64, dtype=np.float32).tolist()} + good_obj.boundingBoxPixels = MagicMock() + good_obj.boundingBoxPixels.area = 10000 + + bad_obj = MagicMock() + bad_obj.rv_id = "track_2" + bad_obj.reid = {"embedding_vector": np.arange(128, dtype=np.float32).tolist()} + bad_obj.boundingBoxPixels = MagicMock() + bad_obj.boundingBoxPixels.area = 10000 + + manager.gatherQualityVisualFeatures(good_obj) + manager.gatherQualityVisualFeatures(bad_obj) + + assert "track_1" in manager.quality_features, "64-dim embedding should be accepted" + assert "track_2" not in manager.quality_features, "128-dim embedding should be rejected after 64 inferred" diff --git a/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py b/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py index b95b3943f..b390c55f6 100644 --- a/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py +++ b/tests/sscape_tests/vdms_adapter/test_vdms_adapter.py @@ -73,6 +73,105 @@ def test_has_threading_lock(self, mock_vdms): assert hasattr(db, 'lock'), "VDMSDatabase must have a lock for thread safety" +class TestSchemaValidation: + """Test descriptor set schema validation and mismatch handling.""" + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_schema_details_extracts_top_level_dimensions(self, mock_vdms_class): + """Verify FindDescriptorSet top-level dimensions are parsed.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'dimensions': 256 + }], [])) + + exists, dimensions = db.findSchemaDetails(SCHEMA_NAME) + + assert exists is True + assert dimensions == 256 + + @patch('controller.vdms_adapter.vdms.vdms') + def test_find_schema_details_extracts_nested_dimensions(self, mock_vdms_class): + """Verify dimensions nested under entities are parsed.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'entities': [{ + 'name': SCHEMA_NAME, + 'dimensions': 512 + }] + }], [])) + + exists, dimensions = db.findSchemaDetails(SCHEMA_NAME) + + assert exists is True + assert dimensions == 512 + + @patch('controller.vdms_adapter.vdms.vdms') + def test_ensure_schema_raises_on_existing_dimension_mismatch(self, mock_vdms_class): + """Verify ensureSchema fails fast when existing descriptor dimensions differ.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'dimensions': 128 + }], [])) + + with pytest.raises(RuntimeError, match="uses 128 dimensions"): + db.ensureSchema(256) + + assert db._schema_ready is False + assert db.dimensions is None + + @patch('controller.vdms_adapter.vdms.vdms') + def test_ensure_schema_raises_when_dimensions_not_reported(self, mock_vdms_class): + """Verify ensureSchema refuses existing descriptor sets without dimension info.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'name': SCHEMA_NAME + }], [])) + + with pytest.raises(RuntimeError, match="dimensions were not returned"): + db.ensureSchema(256) + + assert db._schema_ready is False + assert db.dimensions is None + + @patch('controller.vdms_adapter.vdms.vdms') + def test_ensure_schema_accepts_matching_existing_dimensions(self, mock_vdms_class): + """Verify ensureSchema succeeds when descriptor dimensions match requested size.""" + mock_vdms_instance = MagicMock() + mock_vdms_class.return_value = mock_vdms_instance + + db = VDMSDatabase() + db.sendQuery = Mock(return_value=([{ + 'status': 0, + 'returned': 1, + 'dimensions': 256 + }], [])) + + db.ensureSchema(256) + + assert db._schema_ready is True + assert db.dimensions == 256 + + class TestAddEntry: """Test adding entries to VDMS.""" @@ -83,6 +182,7 @@ def test_add_entry_requires_standard_fields(self, mock_vdms_class): mock_vdms_class.return_value = mock_vdms_instance db = VDMSDatabase() + db.dimensions = 256 db.sendQuery = Mock(return_value=([{'status': 0}], [])) test_uuid = "test-uuid-123" @@ -109,6 +209,7 @@ def test_add_entry_handles_new_metadata_format(self, mock_vdms_class): mock_vdms_class.return_value = mock_vdms_instance db = VDMSDatabase() + db.dimensions = 256 db.sendQuery = Mock(return_value=([{'status': 0}], [])) test_uuid = "test-uuid" @@ -143,6 +244,7 @@ def test_add_entry_converts_vectors_to_bytes(self, mock_vdms_class): mock_vdms_class.return_value = mock_vdms_instance db = VDMSDatabase() + db.dimensions = 256 db.sendQuery = Mock(return_value=([{'status': 0}, {'status': 0}], [])) test_uuid = "test-uuid" @@ -172,6 +274,7 @@ def test_add_entry_handles_multiple_vectors(self, mock_vdms_class): mock_vdms_class.return_value = mock_vdms_instance db = VDMSDatabase() + db.dimensions = 256 db.sendQuery = Mock(return_value=([{'status': 0}, {'status': 0}, {'status': 0}], [])) test_uuid = "test-uuid" @@ -613,6 +716,7 @@ def test_metadata_stored_matches_constraint_query(self, mock_vdms_class): mock_vdms_class.return_value = mock_vdms_instance db = VDMSDatabase() + db.dimensions = 256 db.sendQuery = Mock(return_value=([{'status': 0}], [])) test_uuid = "test-uuid" @@ -668,6 +772,7 @@ def test_metadata_consistency_multiple_types(self, mock_vdms_class): mock_vdms_class.return_value = mock_vdms_instance db = VDMSDatabase() + db.dimensions = 256 db.sendQuery = Mock(return_value=([{'status': 0}], [])) # Test various data types in metadata labels