Skip to content

Commit afffe65

Browse files
committed
reduce C usage
1 parent 62b7209 commit afffe65

File tree

3 files changed

+66
-117
lines changed

3 files changed

+66
-117
lines changed

src/Levenshtein/Levenshtein-c/_levenshtein.cpp

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,15 @@ make_symlistset(size_t n, const size_t *lengths,
8787
return symlist;
8888
}
8989

90-
lev_byte*
90+
std::basic_string<lev_byte>
9191
lev_quick_median(size_t n,
9292
const size_t *lengths,
9393
const lev_byte *strings[],
94-
const double *weights,
95-
size_t *medlength)
94+
const double *weights)
9695
{
9796
size_t symlistlen, len, i, j, k;
9897
lev_byte *symlist;
99-
lev_byte *median; /* the resulting string */
98+
std::basic_string<lev_byte> median; /* the resulting string */
10099
double *symset;
101100

102101
/* first check whether the result would be an empty string
@@ -105,27 +104,25 @@ lev_quick_median(size_t n,
105104
double wl = std::accumulate( weights, weights + n, 0.0);
106105

107106
if (wl == 0.0)
108-
return (lev_byte*)calloc(1, sizeof(lev_byte));
107+
return std::basic_string<lev_byte>();
108+
109109
ml = floor(ml/wl + 0.499999);
110-
*medlength = len = (size_t)ml;
110+
len = (size_t)ml;
111111
if (!len)
112-
return (lev_byte*)calloc(1, sizeof(lev_byte));
113-
median = (lev_byte*)safe_malloc(len, sizeof(lev_byte));
114-
if (!median)
115-
return NULL;
112+
return std::basic_string<lev_byte>();
113+
114+
median.resize(len);
116115

117116
/* find the symbol set;
118117
* now an empty symbol set is really a failure */
119118
symset = (double*)calloc(0x100, sizeof(double));
120-
if (!symset) {
121-
free(median);
122-
return NULL;
123-
}
119+
if (!symset)
120+
throw std::bad_alloc();
121+
124122
symlist = make_symlistset(n, lengths, strings, &symlistlen, symset);
125123
if (!symlist) {
126-
free(median);
127124
free(symset);
128-
return NULL;
125+
throw std::bad_alloc();
129126
}
130127

131128
for (j = 0; j < len; j++) {
@@ -269,16 +266,15 @@ make_usymlistset(size_t n, const size_t *lengths,
269266
return symlist;
270267
}
271268

272-
lev_wchar*
269+
std::basic_string<lev_wchar>
273270
lev_u_quick_median(size_t n,
274271
const size_t *lengths,
275272
const lev_wchar *strings[],
276-
const double *weights,
277-
size_t *medlength)
273+
const double *weights)
278274
{
279275
size_t symlistlen, len, i, j, k;
280276
lev_wchar *symlist;
281-
lev_wchar *median; /* the resulting string */
277+
std::basic_string<lev_wchar> median; /* the resulting string */
282278
HQItem *symmap;
283279

284280
/* first check whether the result would be an empty string
@@ -287,27 +283,23 @@ lev_u_quick_median(size_t n,
287283
double wl = std::accumulate( weights, weights + n, 0.0);
288284

289285
if (wl == 0.0)
290-
return (lev_wchar*)calloc(1, sizeof(lev_wchar));
286+
return std::basic_string<lev_wchar>();
291287
ml = floor(ml/wl + 0.499999);
292-
*medlength = len = (size_t)ml;
288+
len = (size_t)ml;
293289
if (!len)
294-
return (lev_wchar*)calloc(1, sizeof(lev_wchar));
295-
median = (lev_wchar*)safe_malloc(len, sizeof(lev_wchar));
296-
if (!median)
297-
return NULL;
290+
return std::basic_string<lev_wchar>();
291+
median.resize(len);
298292

299293
/* find the symbol set;
300294
* now an empty symbol set is really a failure */
301295
symmap = (HQItem*)safe_malloc(0x100, sizeof(HQItem));
302-
if (!symmap) {
303-
free(median);
304-
return NULL;
305-
}
296+
if (!symmap)
297+
throw std::bad_alloc();
298+
306299
symlist = make_usymlistset(n, lengths, strings, &symlistlen, symmap);
307300
if (!symlist) {
308-
free(median);
309301
free_usymlistset_hash(symmap);
310-
return NULL;
302+
throw std::bad_alloc();
311303
}
312304

313305
for (j = 0; j < len; j++) {

src/Levenshtein/Levenshtein-c/_levenshtein.hpp

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <numeric>
77
#include <memory>
88
#include <vector>
9+
#include <string>
910
#include <unordered_set>
1011
#include <rapidfuzz/distance/Indel.hpp>
1112
#include <rapidfuzz/distance/Levenshtein.hpp>
@@ -129,24 +130,24 @@ static inline RF_String convert_string(PyObject* py_str)
129130
/* Edit operation type
130131
* DON'T CHANGE! used as array indices and the bits are occasionally used
131132
* as flags */
132-
typedef enum {
133+
enum LevEditType {
133134
LEV_EDIT_KEEP = 0,
134135
LEV_EDIT_REPLACE = 1,
135136
LEV_EDIT_INSERT = 2,
136137
LEV_EDIT_DELETE = 3,
137138
LEV_EDIT_LAST /* sometimes returned when an error occurs */
138-
} LevEditType;
139+
};
139140

140141
/* Error codes returned by editop check functions */
141-
typedef enum {
142+
enum LevEditOpError {
142143
LEV_EDIT_ERR_OK = 0,
143144
LEV_EDIT_ERR_TYPE, /* nonexistent edit type */
144145
LEV_EDIT_ERR_OUT, /* edit out of string bounds */
145146
LEV_EDIT_ERR_ORDER, /* ops are not ordered */
146147
LEV_EDIT_ERR_BLOCK, /* incosistent block boundaries (block ops) */
147148
LEV_EDIT_ERR_SPAN, /* sequence is not a full transformation (block ops) */
148149
LEV_EDIT_ERR_LAST
149-
} LevEditOpError;
150+
};
150151

151152
/* Edit operation (atomic).
152153
* This is the `native' atomic edit operation. It differs from the difflib
@@ -481,26 +482,23 @@ double finish_distance_computations(size_t len1, CharT* string1,
481482
* @strings: An array of strings, that may contain NUL characters.
482483
* @weights: The string weights (they behave exactly as multiplicities, though
483484
* any positive value is allowed, not just integers).
484-
* @medlength: Where the new length of the median should be stored.
485485
*
486486
* Tries to make @s a better generalized median string of @strings with
487487
* small perturbations.
488488
*
489489
* It never returns a string with larger SOD than @s; in the worst case, a
490490
* string identical to @s is returned.
491491
*
492-
* Returns: The improved generalized median, as a newly allocated string; its
493-
* length is stored in @medlength.
492+
* Returns: The improved generalized median
494493
**/
495494
template <typename CharT>
496-
CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* lengths,
497-
const CharT** strings, const double *weights, size_t *medlength)
495+
std::basic_string<CharT> lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* lengths,
496+
const CharT** strings, const double *weights)
498497
{
499498
/* find all symbols */
500499
std::vector<CharT> symlist = make_symlist(n, lengths, strings);
501500
if (symlist.empty()) {
502-
*medlength = 0;
503-
return (CharT*)calloc(1, sizeof(CharT));
501+
return std::basic_string<CharT>();
504502
}
505503

506504
/* allocate and initialize per-string matrix rows and a common work buffer */
@@ -552,7 +550,7 @@ CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* le
552550
median[pos] = orig_symbol;
553551
}
554552
/* FOREACH symbol: try to add it at pos, if some lower the total
555-
* distance, chooste the best (increase medlength)
553+
* distance, chooste the best (increase medlen)
556554
* We simulate insertion by replacing the character at pos-1 */
557555
orig_symbol = *(median + pos - 1);
558556
for (size_t j = 0; j < symlist.size(); j++) {
@@ -567,8 +565,8 @@ CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* le
567565
}
568566
}
569567
*(median + pos - 1) = orig_symbol;
570-
/* IF pos < medlength: try to delete the symbol at pos, if it lowers
571-
* the total distance remember it (decrease medlength) */
568+
/* IF pos < medlen: try to delete the symbol at pos, if it lowers
569+
* the total distance remember it (decrease medlen) */
572570
if (pos < medlen) {
573571
sum = finish_distance_computations(medlen - pos - 1, median + pos + 1,
574572
n, lengths, strings,
@@ -624,29 +622,20 @@ CharT* lev_median_improve(size_t len, const CharT* s, size_t n, const size_t* le
624622
}
625623
}
626624

627-
/* return result */
628-
CharT *result = (CharT*)safe_malloc(medlen, sizeof(CharT));
629-
if (!result) {
630-
return NULL;
631-
}
632-
*medlength = medlen;
633-
memcpy(result, median, medlen*sizeof(CharT));
634-
return result;
625+
return std::basic_string<CharT>(median, medlen);
635626
}
636627

637-
lev_byte*
628+
std::basic_string<lev_byte>
638629
lev_quick_median(size_t n,
639630
const size_t *lengths,
640631
const lev_byte *strings[],
641-
const double *weights,
642-
size_t *medlength);
632+
const double *weights);
643633

644-
lev_wchar*
634+
std::basic_string<lev_wchar>
645635
lev_u_quick_median(size_t n,
646636
const size_t *lengths,
647637
const lev_wchar *strings[],
648-
const double *weights,
649-
size_t *medlength);
638+
const double *weights);
650639

651640
/**
652641
* lev_set_median:
@@ -655,12 +644,10 @@ lev_u_quick_median(size_t n,
655644
* @strings: An array of strings, that may contain NUL characters.
656645
* @weights: The string weights (they behave exactly as multiplicities, though
657646
* any positive value is allowed, not just integers).
658-
* @medlength: Where the length of the median string should be stored.
659647
*
660648
* Finds the median string of a string set @strings.
661649
*
662-
* Returns: The set median as a newly allocate string, its length is stored
663-
* in @medlength. %NULL in the case of failure.
650+
* Returns: The set median
664651
**/
665652
static inline std::basic_string<uint32_t> lev_set_median(const std::vector<RF_String>& strings,
666653
const std::vector<double>& weights)

0 commit comments

Comments
 (0)