From 234c272b1cedfedeec94eb286dc5f87e45552db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Mon, 23 Oct 2017 23:59:29 +0100 Subject: [PATCH 01/15] encoding --- entities.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/entities.c b/entities.c index a81098a..fd5e6e5 100644 --- a/entities.c +++ b/entities.c @@ -390,3 +390,32 @@ size_t decode_html_entities_utf8(char *dest, const char *src) return (size_t)(to - dest); } +int encode_html_entities(char *dest, const char *src) { + char *to = dest; + for( const char *from = src ; *from ; from++ ) { + int i = 9999; + if ( *from <= '+' ) { + sprintf(to,"%%%02x",*from); + to += 3; + continue; + } + if ( *from<='z' ) { + *to = *from; + to++; + continue; + } + //if ( *from=='\r' || *from=='\n' ) continue; + for( i=0 ; i Date: Tue, 24 Oct 2017 00:02:21 +0100 Subject: [PATCH 02/15] stdio added --- entities.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/entities.c b/entities.c index fd5e6e5..cb33e41 100644 --- a/entities.c +++ b/entities.c @@ -11,6 +11,7 @@ #include #include #include +#include #define UNICODE_MAX 0x10FFFFul @@ -418,4 +419,4 @@ int encode_html_entities(char *dest, const char *src) { } *to = 0; return strlen(dest); -} \ No newline at end of file +} From 92cd2bcd6fd7d1f71a8f528d29e425efa3425d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Tue, 24 Oct 2017 00:07:30 +0100 Subject: [PATCH 03/15] encode declaration --- entities.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/entities.h b/entities.h index d8e58ef..aeb556b 100644 --- a/entities.h +++ b/entities.h @@ -20,5 +20,9 @@ extern size_t decode_html_entities_utf8(char *dest, const char *src); The function returns the length of the decoded string. */ + + +int encode_html_entities(char *dest, const char *src); + #endif From 087ef2bd297a8b6205f3763b86d9ea0ee8309f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Wed, 27 Dec 2017 22:10:44 +0000 Subject: [PATCH 04/15] size_t return --- entities.c | 6 +++--- entities.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/entities.c b/entities.c index cb33e41..3dfd356 100644 --- a/entities.c +++ b/entities.c @@ -391,10 +391,9 @@ size_t decode_html_entities_utf8(char *dest, const char *src) return (size_t)(to - dest); } -int encode_html_entities(char *dest, const char *src) { +size_t encode_html_entities(char *dest, const char *src) { char *to = dest; for( const char *from = src ; *from ; from++ ) { - int i = 9999; if ( *from <= '+' ) { sprintf(to,"%%%02x",*from); to += 3; @@ -406,6 +405,7 @@ int encode_html_entities(char *dest, const char *src) { continue; } //if ( *from=='\r' || *from=='\n' ) continue; + unsigned i; for( i=0 ; i Date: Thu, 28 Dec 2017 09:42:16 +0000 Subject: [PATCH 05/15] const cast in cmp() to improve compatibility --- entities.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/entities.c b/entities.c index 3dfd356..18be7e6 100644 --- a/entities.c +++ b/entities.c @@ -273,8 +273,8 @@ static const char *const NAMED_ENTITIES[][2] = { static int cmp(const void *key, const void *value) { - return strncmp((const char *)key, *(const char **)value, - strlen(*(const char **)value)); + return strncmp((const char *)key, *(const char *const *)value, + strlen(*(const char *const *)value)); } static const char *get_named_entity(const char *name) From 20d7f5ebc713eb5bfdbfb70f4b0a5ccabf7c724d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Thu, 28 Dec 2017 18:30:28 +0000 Subject: [PATCH 06/15] encoding with full test --- entities.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/entities.c b/entities.c index 18be7e6..452a9ee 100644 --- a/entities.c +++ b/entities.c @@ -394,23 +394,24 @@ size_t decode_html_entities_utf8(char *dest, const char *src) size_t encode_html_entities(char *dest, const char *src) { char *to = dest; for( const char *from = src ; *from ; from++ ) { - if ( *from <= '+' ) { - sprintf(to,"%%%02x",*from); - to += 3; - continue; - } - if ( *from<='z' ) { - *to = *from; - to++; - continue; - } + if ( *from>=0 ) { + if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) { + *to = *from; + to++; + continue; + } + } //if ( *from=='\r' || *from=='\n' ) continue; unsigned i; for( i=0 ; i Date: Thu, 28 Dec 2017 18:44:01 +0000 Subject: [PATCH 07/15] added commat; entitie --- entities.c | 1 + 1 file changed, 1 insertion(+) diff --git a/entities.c b/entities.c index 452a9ee..fc8e50f 100644 --- a/entities.c +++ b/entities.c @@ -101,6 +101,7 @@ static const char *const NAMED_ENTITIES[][2] = { { "chi;", "χ" }, { "circ;", "ˆ" }, { "clubs;", "♣" }, + { "commat;", "@" }, { "cong;", "≅" }, { "copy;", "©" }, { "crarr;", "↵" }, From 1e13e72389c14e23cf914191f7b7d2d3bd9a00a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Thu, 28 Dec 2017 18:57:18 +0000 Subject: [PATCH 08/15] encoding test --- t-entities.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/t-entities.c b/t-entities.c index 6dd8c34..3c55602 100644 --- a/t-entities.c +++ b/t-entities.c @@ -30,6 +30,21 @@ int main(void) assert(strcmp(buffer, SAMPLE) == 0); } + { + static const char INPUT[] = "Miguel Leitão\ntest@example.org\n

Hello!!

"; + static char GOAL[] = + "Miguel Leitão\ntest@example.org\n<p>Hello!!</p>"; + char OUTPUT[sizeof GOAL]; + char REVERT[sizeof GOAL]; + assert(encode_html_entities(OUTPUT, INPUT) == sizeof GOAL - 1); + // printf("output: %s\n", OUTPUT); + assert(strcmp(OUTPUT,GOAL) == 0); + decode_html_entities_utf8(REVERT,OUTPUT); + + assert(strcmp(INPUT,REVERT) == 0); + } + + fprintf(stdout, "All tests passed :-)\n"); return EXIT_SUCCESS; } From e6a49b585332439be0cd2968b63766ddbddc820b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Mon, 23 Oct 2017 23:59:29 +0100 Subject: [PATCH 09/15] encoding --- entities.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/entities.c b/entities.c index 6fb6aa3..76ea019 100644 --- a/entities.c +++ b/entities.c @@ -391,3 +391,32 @@ size_t decode_html_entities_utf8(char *dest, const char *src) return (size_t)(to - dest); } +int encode_html_entities(char *dest, const char *src) { + char *to = dest; + for( const char *from = src ; *from ; from++ ) { + int i = 9999; + if ( *from <= '+' ) { + sprintf(to,"%%%02x",*from); + to += 3; + continue; + } + if ( *from<='z' ) { + *to = *from; + to++; + continue; + } + //if ( *from=='\r' || *from=='\n' ) continue; + for( i=0 ; i Date: Tue, 24 Oct 2017 00:02:21 +0100 Subject: [PATCH 10/15] stdio added --- entities.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/entities.c b/entities.c index 76ea019..0f2afac 100644 --- a/entities.c +++ b/entities.c @@ -11,6 +11,7 @@ #include #include #include +#include #define UNICODE_MAX 0x10FFFFul @@ -419,4 +420,4 @@ int encode_html_entities(char *dest, const char *src) { } *to = 0; return strlen(dest); -} \ No newline at end of file +} From d33c9829d19e3efb4e753f0f82062f2c5ba76725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Tue, 24 Oct 2017 00:07:30 +0100 Subject: [PATCH 11/15] encode declaration --- entities.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/entities.h b/entities.h index d8e58ef..aeb556b 100644 --- a/entities.h +++ b/entities.h @@ -20,5 +20,9 @@ extern size_t decode_html_entities_utf8(char *dest, const char *src); The function returns the length of the decoded string. */ + + +int encode_html_entities(char *dest, const char *src); + #endif From 0fe8c7ff37dcdbe872446050911c8cd7b3896966 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Leit=C3=A3o?= Date: Wed, 27 Dec 2017 22:10:44 +0000 Subject: [PATCH 12/15] size_t return --- entities.c | 6 +++--- entities.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/entities.c b/entities.c index 0f2afac..cbdd385 100644 --- a/entities.c +++ b/entities.c @@ -392,10 +392,9 @@ size_t decode_html_entities_utf8(char *dest, const char *src) return (size_t)(to - dest); } -int encode_html_entities(char *dest, const char *src) { +size_t encode_html_entities(char *dest, const char *src) { char *to = dest; for( const char *from = src ; *from ; from++ ) { - int i = 9999; if ( *from <= '+' ) { sprintf(to,"%%%02x",*from); to += 3; @@ -407,6 +406,7 @@ int encode_html_entities(char *dest, const char *src) { continue; } //if ( *from=='\r' || *from=='\n' ) continue; + unsigned i; for( i=0 ; i Date: Thu, 28 Dec 2017 18:30:28 +0000 Subject: [PATCH 13/15] encoding with full test --- entities.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/entities.c b/entities.c index cbdd385..fc8e50f 100644 --- a/entities.c +++ b/entities.c @@ -395,23 +395,24 @@ size_t decode_html_entities_utf8(char *dest, const char *src) size_t encode_html_entities(char *dest, const char *src) { char *to = dest; for( const char *from = src ; *from ; from++ ) { - if ( *from <= '+' ) { - sprintf(to,"%%%02x",*from); - to += 3; - continue; - } - if ( *from<='z' ) { - *to = *from; - to++; - continue; - } + if ( *from>=0 ) { + if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) { + *to = *from; + to++; + continue; + } + } //if ( *from=='\r' || *from=='\n' ) continue; unsigned i; for( i=0 ; i Date: Thu, 28 Dec 2017 18:57:18 +0000 Subject: [PATCH 14/15] encoding test --- t-entities.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/t-entities.c b/t-entities.c index 6dd8c34..3c55602 100644 --- a/t-entities.c +++ b/t-entities.c @@ -30,6 +30,21 @@ int main(void) assert(strcmp(buffer, SAMPLE) == 0); } + { + static const char INPUT[] = "Miguel Leitão\ntest@example.org\n

Hello!!

"; + static char GOAL[] = + "Miguel Leitão\ntest@example.org\n<p>Hello!!</p>"; + char OUTPUT[sizeof GOAL]; + char REVERT[sizeof GOAL]; + assert(encode_html_entities(OUTPUT, INPUT) == sizeof GOAL - 1); + // printf("output: %s\n", OUTPUT); + assert(strcmp(OUTPUT,GOAL) == 0); + decode_html_entities_utf8(REVERT,OUTPUT); + + assert(strcmp(INPUT,REVERT) == 0); + } + + fprintf(stdout, "All tests passed :-)\n"); return EXIT_SUCCESS; } From eeb211f375a92734cf5cb73f355fe77b01f21bda Mon Sep 17 00:00:00 2001 From: ooxi Date: Fri, 29 Dec 2017 15:38:41 +0100 Subject: [PATCH 15/15] Misc stylistic unification --- entities.c | 55 ++++++++++++++++++++++++++-------------------------- t-entities.c | 20 +++++++++++-------- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/entities.c b/entities.c index fc8e50f..00c107f 100644 --- a/entities.c +++ b/entities.c @@ -9,9 +9,9 @@ #include #include +#include #include #include -#include #define UNICODE_MAX 0x10FFFFul @@ -393,33 +393,32 @@ size_t decode_html_entities_utf8(char *dest, const char *src) } size_t encode_html_entities(char *dest, const char *src) { - char *to = dest; - for( const char *from = src ; *from ; from++ ) { - if ( *from>=0 ) { - if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) { - *to = *from; - to++; - continue; - } - } - //if ( *from=='\r' || *from=='\n' ) continue; - unsigned i; - for( i=0 ; i=0 ) { + if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) { + *to = *from; + to++; + continue; + } + } + unsigned i; + for( i=0 ; iHello!!

"; + static char GOAL[] = "" + "Miguel Leitão\n" + "test@example.org\n" + "<p>Hello!!</p>"; char OUTPUT[sizeof GOAL]; char REVERT[sizeof GOAL]; assert(encode_html_entities(OUTPUT, INPUT) == sizeof GOAL - 1); - // printf("output: %s\n", OUTPUT); - assert(strcmp(OUTPUT,GOAL) == 0); - decode_html_entities_utf8(REVERT,OUTPUT); - - assert(strcmp(INPUT,REVERT) == 0); + assert(strcmp(OUTPUT, GOAL) == 0); + + decode_html_entities_utf8(REVERT, OUTPUT); + assert(strcmp(INPUT, REVERT) == 0); }