Skip to content

Commit 1f003e3

Browse files
committed
Cleanup parse loop
1 parent 84f3718 commit 1f003e3

File tree

2 files changed

+6
-13
lines changed

2 files changed

+6
-13
lines changed

src/calibre/ebooks/html_entities.c

+5-13
Original file line numberDiff line numberDiff line change
@@ -114,17 +114,15 @@ add_entity(const char *entity, const size_t elen, char *output) {
114114

115115
static size_t
116116
process_entity(const char *input, size_t input_sz, char *output, size_t *output_pos) {
117-
size_t input_pos = 0;
117+
size_t input_pos = 1; // ignore leading &
118118
while (input_pos < input_sz) {
119119
char ch = input[input_pos++];
120120
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || (ch == '#' && input_pos == 1));
121-
else if (ch == ';') *output_pos += add_entity(input, input_pos-1, output + *output_pos);
122-
else {
123-
output[(*output_pos)++] = '&';
124-
memcpy(output + *output_pos, input, input_pos);
125-
*output_pos += input_pos;
126-
}
121+
else if (ch == ';') { *output_pos += add_entity(input, input_pos-1, output + *output_pos); return input_pos; }
122+
else break;
127123
}
124+
memcpy(output + *output_pos, input, input_pos);
125+
*output_pos += input_pos;
128126
return input_pos;
129127
}
130128

@@ -134,12 +132,6 @@ replace(const char *input, size_t input_sz, char *output, int keep_xml_entities)
134132
while (input_pos < input_sz) {
135133
const char *p = (const char*)memchr(input + input_pos, '&', input_sz - input_pos);
136134
if (p) {
137-
if (p > input + input_pos) {
138-
size_t sz = p - (input + input_pos);
139-
memcpy(output + output_pos, input + input_pos, sz);
140-
output_pos += sz;
141-
input_pos += sz;
142-
}
143135
input_pos += process_entity(p, input_sz - (p - input), output, &output_pos);
144136
} else {
145137
memcpy(output + output_pos, input + input_pos, input_sz - input_pos);

src/calibre/ebooks/html_entities.py

+1
Original file line numberDiff line numberDiff line change
@@ -2142,6 +2142,7 @@ def test_html_entity_replacement(self):
21422142
from calibre_extensions.fast_html_entities import replace_entities
21432143
def t(inp, exp):
21442144
self.assertEqual(exp, replace_entities(inp), f'Failed for input: {inp!r}')
2145+
t('&amp', '&amp')
21452146
t('', '')
21462147
t('a', 'a')
21472148

0 commit comments

Comments
 (0)