88#include < memory>
99#include < vector>
1010#include < string>
11- #include < unordered_set >
11+ #include < set >
1212#include < rapidfuzz/distance/Indel.hpp>
1313#include < rapidfuzz/distance/Levenshtein.hpp>
1414
@@ -190,7 +190,7 @@ static inline std::vector<uint32_t> make_symlist(const std::vector<RF_String>& s
190190 return symlist;
191191 }
192192
193- std::unordered_set <uint32_t > symmap;
193+ std::set <uint32_t > symmap;
194194 for (const auto & string : strings) {
195195 visit (string, [&](auto first1, auto last1){
196196 for (; first1 != last1; ++first1) {
@@ -351,20 +351,16 @@ static inline double finish_distance_computations(size_t len1, uint32_t* string1
351351 const std::vector<double >& weights, std::vector<std::unique_ptr<size_t []>>& rows,
352352 std::unique_ptr<size_t []>& row)
353353{
354- size_t *end;
355- size_t i, j;
356- size_t offset; /* row[0]; offset + len1 give together real len of string1 */
357354 double distsum = 0.0 ; /* sum of distances */
358-
359355 /* catch trivial case */
360356 if (len1 == 0 ) {
361- for (j = 0 ; j < strings.size (); j++)
357+ for (size_t j = 0 ; j < strings.size (); j++)
362358 distsum += (double )rows[j][strings[j].length ]*weights[j];
363359 return distsum;
364360 }
365361
366362 /* iterate through the strings and sum the distances */
367- for (j = 0 ; j < strings.size (); j++) {
363+ for (size_t j = 0 ; j < strings.size (); j++) {
368364 visit (strings[j], [&](auto first1, auto last1){
369365 size_t * rowi = rows[j].get (); /* current row */
370366 size_t leni = (size_t )std::distance (first1, last1); /* current length */
@@ -381,17 +377,18 @@ static inline double finish_distance_computations(size_t len1, uint32_t* string1
381377 distsum += (double )rowi[leni]*weights[j];
382378 return ;
383379 }
384- offset = rowi[0 ];
380+ /* row[0]; offset + len1 give together real len of string1 */
381+ size_t offset = rowi[0 ];
385382 if (leni == 0 ) {
386383 distsum += (double )(offset + len)*weights[j];
387384 return ;
388385 }
389386
390387 /* complete the matrix */
391388 memcpy (row.get (), rowi, (leni + 1 )*sizeof (size_t ));
392- end = row.get () + leni;
389+ size_t * end = row.get () + leni;
393390
394- for (i = 1 ; i <= len; i++) {
391+ for (size_t i = 1 ; i <= len; i++) {
395392 size_t * p = row.get () + 1 ;
396393 const uint32_t char1 = string1[i - 1 ];
397394 auto char2p = first1;
@@ -447,18 +444,16 @@ static inline std::basic_string<uint32_t> lev_median_improve(const RF_String& st
447444 /* allocate and initialize per-string matrix rows and a common work buffer */
448445 std::vector<std::unique_ptr<size_t []>> rows (strings.size ());
449446 size_t maxlen = 0 ;
450- for (const auto & str : strings) {
451- maxlen = std::max (maxlen, (size_t )str.length );
452- }
453-
454447 for (size_t i = 0 ; i < strings.size (); i++) {
455448 size_t leni = (size_t )strings[i].length ;
449+ if (leni > maxlen)
450+ maxlen = leni;
456451 rows[i] = std::make_unique<size_t []>(leni + 1 );
457452 std::iota (rows[i].get (), rows[i].get () + leni + 1 , 0 );
458453 }
459454
460455 size_t stoplen = 2 *maxlen + 1 ;
461- auto row = std::make_unique<size_t []>(stoplen + 1 );
456+ auto row = std::make_unique<size_t []>(stoplen + 2 );
462457
463458 /* initialize median to given string */
464459 auto _median = std::make_unique<uint32_t []>(stoplen + 1 );
0 commit comments