@@ -720,6 +720,94 @@ class Index {
720720 size_t getCurrentCount () const {
721721 return appr_alg->cur_element_count ;
722722 }
723+
724+
725+ py::dict checkIntegrity () {
726+ /* *
727+ * Python-friendly integrity check that returns detailed results
728+ * instead of crashing on assert failures.
729+ *
730+ * Returns a dict with:
731+ * - valid: bool - whether integrity check passed
732+ * - connections_checked: int - total connections verified
733+ * - min_inbound: int - minimum inbound connections per node
734+ * - max_inbound: int - maximum inbound connections per node
735+ * - errors: list[str] - list of any errors found
736+ */
737+ if (!appr_alg) {
738+ return py::dict (
739+ " valid" _a = false ,
740+ " connections_checked" _a = 0 ,
741+ " min_inbound" _a = 0 ,
742+ " max_inbound" _a = 0 ,
743+ " errors" _a = py::list (py::cast (std::vector<std::string>{" Index not initialized" }))
744+ );
745+ }
746+
747+ std::vector<std::string> errors;
748+ int connections_checked = 0 ;
749+ std::vector<int > inbound_connections_num (appr_alg->cur_element_count , 0 );
750+
751+ for (size_t i = 0 ; i < appr_alg->cur_element_count ; i++) {
752+ for (int l = 0 ; l <= appr_alg->element_levels_ [i]; l++) {
753+ hnswlib::linklistsizeint *ll_cur = appr_alg->get_linklist_at_level (i, l);
754+ int size = appr_alg->getListCount (ll_cur);
755+ hnswlib::tableint *data = (hnswlib::tableint *) (ll_cur + 1 );
756+ std::unordered_set<hnswlib::tableint> s;
757+
758+ for (int j = 0 ; j < size; j++) {
759+ // Check: connection points to valid element
760+ if (data[j] >= appr_alg->cur_element_count ) {
761+ errors.push_back (" Element " + std::to_string (i) + " at level " +
762+ std::to_string (l) + " has invalid connection to " + std::to_string (data[j]));
763+ }
764+ // Check: no self-loops
765+ if (data[j] == i) {
766+ errors.push_back (" Element " + std::to_string (i) + " at level " +
767+ std::to_string (l) + " has self-loop" );
768+ }
769+ // Track for duplicate check
770+ if (s.find (data[j]) != s.end ()) {
771+ errors.push_back (" Element " + std::to_string (i) + " at level " +
772+ std::to_string (l) + " has duplicate connection to " + std::to_string (data[j]));
773+ }
774+ s.insert (data[j]);
775+ if (data[j] < appr_alg->cur_element_count ) {
776+ inbound_connections_num[data[j]]++;
777+ }
778+ connections_checked++;
779+ }
780+ }
781+ }
782+
783+ // Check for orphan nodes (no inbound connections)
784+ int min_inbound = 0 , max_inbound = 0 ;
785+ if (appr_alg->cur_element_count > 1 ) {
786+ min_inbound = inbound_connections_num[0 ];
787+ max_inbound = inbound_connections_num[0 ];
788+ for (size_t i = 0 ; i < appr_alg->cur_element_count ; i++) {
789+ if (inbound_connections_num[i] == 0 ) {
790+ errors.push_back (" Element " + std::to_string (i) + " has no inbound connections (orphan)" );
791+ }
792+ min_inbound = std::min (inbound_connections_num[i], min_inbound);
793+ max_inbound = std::max (inbound_connections_num[i], max_inbound);
794+ }
795+ }
796+
797+ py::list error_list;
798+ for (const auto & err : errors) {
799+ error_list.append (err);
800+ }
801+
802+ return py::dict (
803+ " valid" _a = errors.empty (),
804+ " connections_checked" _a = connections_checked,
805+ " element_count" _a = (size_t )appr_alg->cur_element_count ,
806+ " min_inbound" _a = min_inbound,
807+ " max_inbound" _a = max_inbound,
808+ " errors" _a = error_list
809+ );
810+ }
723811};
724812
725813template <typename dist_t , typename data_t = float >
@@ -950,6 +1038,15 @@ PYBIND11_PLUGIN(hnswlib) {
9501038 .def (" resize_index" , &Index<float >::resizeIndex, py::arg (" new_size" ))
9511039 .def (" get_max_elements" , &Index<float >::getMaxElements)
9521040 .def (" get_current_count" , &Index<float >::getCurrentCount)
1041+ .def (" check_integrity" , &Index<float >::checkIntegrity,
1042+ " Check index integrity and return detailed results.\n\n "
1043+ " Returns a dict with:\n "
1044+ " - valid: bool - whether integrity check passed\n "
1045+ " - connections_checked: int - total connections verified\n "
1046+ " - element_count: int - number of elements in index\n "
1047+ " - min_inbound: int - minimum inbound connections per node\n "
1048+ " - max_inbound: int - maximum inbound connections per node\n "
1049+ " - errors: list[str] - list of any errors found\n " )
9531050 .def_readonly (" space" , &Index<float >::space_name)
9541051 .def_readonly (" dim" , &Index<float >::dim)
9551052 .def_readwrite (" num_threads" , &Index<float >::num_threads_default)
0 commit comments