66#include < numeric>
77#include < memory>
88#include < vector>
9+ #include < string>
910#include < unordered_set>
1011#include < rapidfuzz/distance/Indel.hpp>
1112#include < rapidfuzz/distance/Levenshtein.hpp>
@@ -129,24 +130,24 @@ static inline RF_String convert_string(PyObject* py_str)
129130/* Edit operation type
130131 * DON'T CHANGE! used as array indices and the bits are occasionally used
131132 * as flags */
132- typedef enum {
133+ enum LevEditType {
133134 LEV_EDIT_KEEP = 0 ,
134135 LEV_EDIT_REPLACE = 1 ,
135136 LEV_EDIT_INSERT = 2 ,
136137 LEV_EDIT_DELETE = 3 ,
137138 LEV_EDIT_LAST /* sometimes returned when an error occurs */
138- } LevEditType ;
139+ };
139140
140141/* Error codes returned by editop check functions */
141- typedef enum {
142+ enum LevEditOpError {
142143 LEV_EDIT_ERR_OK = 0 ,
143144 LEV_EDIT_ERR_TYPE, /* nonexistent edit type */
144145 LEV_EDIT_ERR_OUT, /* edit out of string bounds */
145146 LEV_EDIT_ERR_ORDER, /* ops are not ordered */
146147 LEV_EDIT_ERR_BLOCK, /* incosistent block boundaries (block ops) */
147148 LEV_EDIT_ERR_SPAN, /* sequence is not a full transformation (block ops) */
148149 LEV_EDIT_ERR_LAST
149- } LevEditOpError ;
150+ };
150151
151152/* Edit operation (atomic).
152153 * This is the `native' atomic edit operation. It differs from the difflib
@@ -481,26 +482,23 @@ double finish_distance_computations(size_t len1, CharT* string1,
481482 * @strings: An array of strings, that may contain NUL characters.
482483 * @weights: The string weights (they behave exactly as multiplicities, though
483484 * any positive value is allowed, not just integers).
484- * @medlength: Where the new length of the median should be stored.
485485 *
486486 * Tries to make @s a better generalized median string of @strings with
487487 * small perturbations.
488488 *
489489 * It never returns a string with larger SOD than @s; in the worst case, a
490490 * string identical to @s is returned.
491491 *
492- * Returns: The improved generalized median, as a newly allocated string; its
493- * length is stored in @medlength.
492+ * Returns: The improved generalized median
494493 **/
495494template <typename CharT>
496- CharT* lev_median_improve (size_t len, const CharT* s, size_t n, const size_t * lengths,
497- const CharT** strings, const double *weights, size_t *medlength )
495+ std::basic_string< CharT> lev_median_improve (size_t len, const CharT* s, size_t n, const size_t * lengths,
496+ const CharT** strings, const double *weights)
498497{
499498 /* find all symbols */
500499 std::vector<CharT> symlist = make_symlist (n, lengths, strings);
501500 if (symlist.empty ()) {
502- *medlength = 0 ;
503- return (CharT*)calloc (1 , sizeof (CharT));
501+ return std::basic_string<CharT>();
504502 }
505503
506504 /* allocate and initialize per-string matrix rows and a common work buffer */
@@ -552,7 +550,7 @@ CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* le
552550 median[pos] = orig_symbol;
553551 }
554552 /* FOREACH symbol: try to add it at pos, if some lower the total
555- * distance, chooste the best (increase medlength )
553+ * distance, chooste the best (increase medlen )
556554 * We simulate insertion by replacing the character at pos-1 */
557555 orig_symbol = *(median + pos - 1 );
558556 for (size_t j = 0 ; j < symlist.size (); j++) {
@@ -567,8 +565,8 @@ CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* le
567565 }
568566 }
569567 *(median + pos - 1 ) = orig_symbol;
570- /* IF pos < medlength : try to delete the symbol at pos, if it lowers
571- * the total distance remember it (decrease medlength ) */
568+ /* IF pos < medlen : try to delete the symbol at pos, if it lowers
569+ * the total distance remember it (decrease medlen ) */
572570 if (pos < medlen) {
573571 sum = finish_distance_computations (medlen - pos - 1 , median + pos + 1 ,
574572 n, lengths, strings,
@@ -624,29 +622,20 @@ CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* le
624622 }
625623 }
626624
627- /* return result */
628- CharT *result = (CharT*)safe_malloc (medlen, sizeof (CharT));
629- if (!result) {
630- return NULL ;
631- }
632- *medlength = medlen;
633- memcpy (result, median, medlen*sizeof (CharT));
634- return result;
625+ return std::basic_string<CharT>(median, medlen);
635626}
636627
637- lev_byte*
628+ std::basic_string< lev_byte>
638629lev_quick_median (size_t n,
639630 const size_t *lengths,
640631 const lev_byte *strings[],
641- const double *weights,
642- size_t *medlength);
632+ const double *weights);
643633
644- lev_wchar*
634+ std::basic_string< lev_wchar>
645635lev_u_quick_median (size_t n,
646636 const size_t *lengths,
647637 const lev_wchar *strings[],
648- const double *weights,
649- size_t *medlength);
638+ const double *weights);
650639
651640/* *
652641 * lev_set_median:
@@ -655,12 +644,10 @@ lev_u_quick_median(size_t n,
655644 * @strings: An array of strings, that may contain NUL characters.
656645 * @weights: The string weights (they behave exactly as multiplicities, though
657646 * any positive value is allowed, not just integers).
658- * @medlength: Where the length of the median string should be stored.
659647 *
660648 * Finds the median string of a string set @strings.
661649 *
662- * Returns: The set median as a newly allocate string, its length is stored
663- * in @medlength. %NULL in the case of failure.
650+ * Returns: The set median
664651 **/
665652static inline std::basic_string<uint32_t > lev_set_median (const std::vector<RF_String>& strings,
666653 const std::vector<double >& weights)
0 commit comments