Skip to content

Commit

Permalink
Cleanup parse loop
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Sep 13, 2024
1 parent 84f3718 commit 1f003e3
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 13 deletions.
18 changes: 5 additions & 13 deletions src/calibre/ebooks/html_entities.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,15 @@ add_entity(const char *entity, const size_t elen, char *output) {

static size_t
process_entity(const char *input, size_t input_sz, char *output, size_t *output_pos) {
size_t input_pos = 0;
size_t input_pos = 1; // ignore leading &
while (input_pos < input_sz) {
char ch = input[input_pos++];
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || (ch == '#' && input_pos == 1));
else if (ch == ';') *output_pos += add_entity(input, input_pos-1, output + *output_pos);
else {
output[(*output_pos)++] = '&';
memcpy(output + *output_pos, input, input_pos);
*output_pos += input_pos;
}
else if (ch == ';') { *output_pos += add_entity(input, input_pos-1, output + *output_pos); return input_pos; }
else break;
}
memcpy(output + *output_pos, input, input_pos);
*output_pos += input_pos;
return input_pos;
}

Expand All @@ -134,12 +132,6 @@ replace(const char *input, size_t input_sz, char *output, int keep_xml_entities)
while (input_pos < input_sz) {
const char *p = (const char*)memchr(input + input_pos, '&', input_sz - input_pos);
if (p) {
if (p > input + input_pos) {
size_t sz = p - (input + input_pos);
memcpy(output + output_pos, input + input_pos, sz);
output_pos += sz;
input_pos += sz;
}
input_pos += process_entity(p, input_sz - (p - input), output, &output_pos);
} else {
memcpy(output + output_pos, input + input_pos, input_sz - input_pos);
Expand Down
1 change: 1 addition & 0 deletions src/calibre/ebooks/html_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2142,6 +2142,7 @@ def test_html_entity_replacement(self):
from calibre_extensions.fast_html_entities import replace_entities
def t(inp, exp):
self.assertEqual(exp, replace_entities(inp), f'Failed for input: {inp!r}')
t('&amp', '&amp')
t('', '')
t('a', 'a')

Expand Down

0 comments on commit 1f003e3

Please sign in to comment.