From 234c272b1cedfedeec94eb286dc5f87e45552db4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Mon, 23 Oct 2017 23:59:29 +0100
Subject: [PATCH 01/15] encoding
---
entities.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/entities.c b/entities.c
index a81098a..fd5e6e5 100644
--- a/entities.c
+++ b/entities.c
@@ -390,3 +390,32 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
return (size_t)(to - dest);
}
+int encode_html_entities(char *dest, const char *src) {
+ char *to = dest;
+ for( const char *from = src ; *from ; from++ ) {
+ int i = 9999;
+ if ( *from <= '+' ) {
+ sprintf(to,"%%%02x",*from);
+ to += 3;
+ continue;
+ }
+ if ( *from<='z' ) {
+ *to = *from;
+ to++;
+ continue;
+ }
+ //if ( *from=='\r' || *from=='\n' ) continue;
+ for( i=0 ; i
Date: Tue, 24 Oct 2017 00:02:21 +0100
Subject: [PATCH 02/15] stdio added
---
entities.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/entities.c b/entities.c
index fd5e6e5..cb33e41 100644
--- a/entities.c
+++ b/entities.c
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#define UNICODE_MAX 0x10FFFFul
@@ -418,4 +419,4 @@ int encode_html_entities(char *dest, const char *src) {
}
*to = 0;
return strlen(dest);
-}
\ No newline at end of file
+}
From 92cd2bcd6fd7d1f71a8f528d29e425efa3425d85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Tue, 24 Oct 2017 00:07:30 +0100
Subject: [PATCH 03/15] encode declaration
---
entities.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/entities.h b/entities.h
index d8e58ef..aeb556b 100644
--- a/entities.h
+++ b/entities.h
@@ -20,5 +20,9 @@ extern size_t decode_html_entities_utf8(char *dest, const char *src);
The function returns the length of the decoded string.
*/
+
+
+int encode_html_entities(char *dest, const char *src);
+
#endif
From 087ef2bd297a8b6205f3763b86d9ea0ee8309f79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Wed, 27 Dec 2017 22:10:44 +0000
Subject: [PATCH 04/15] size_t return
---
entities.c | 6 +++---
entities.h | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/entities.c b/entities.c
index cb33e41..3dfd356 100644
--- a/entities.c
+++ b/entities.c
@@ -391,10 +391,9 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
return (size_t)(to - dest);
}
-int encode_html_entities(char *dest, const char *src) {
+size_t encode_html_entities(char *dest, const char *src) {
char *to = dest;
for( const char *from = src ; *from ; from++ ) {
- int i = 9999;
if ( *from <= '+' ) {
sprintf(to,"%%%02x",*from);
to += 3;
@@ -406,6 +405,7 @@ int encode_html_entities(char *dest, const char *src) {
continue;
}
//if ( *from=='\r' || *from=='\n' ) continue;
+ unsigned i;
for( i=0 ; i
Date: Thu, 28 Dec 2017 09:42:16 +0000
Subject: [PATCH 05/15] const cast in cmp() to improve compatibility
---
entities.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/entities.c b/entities.c
index 3dfd356..18be7e6 100644
--- a/entities.c
+++ b/entities.c
@@ -273,8 +273,8 @@ static const char *const NAMED_ENTITIES[][2] = {
static int cmp(const void *key, const void *value)
{
- return strncmp((const char *)key, *(const char **)value,
- strlen(*(const char **)value));
+ return strncmp((const char *)key, *(const char *const *)value,
+ strlen(*(const char *const *)value));
}
static const char *get_named_entity(const char *name)
From 20d7f5ebc713eb5bfdbfb70f4b0a5ccabf7c724d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Thu, 28 Dec 2017 18:30:28 +0000
Subject: [PATCH 06/15] encoding with full test
---
entities.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/entities.c b/entities.c
index 18be7e6..452a9ee 100644
--- a/entities.c
+++ b/entities.c
@@ -394,23 +394,24 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
size_t encode_html_entities(char *dest, const char *src) {
char *to = dest;
for( const char *from = src ; *from ; from++ ) {
- if ( *from <= '+' ) {
- sprintf(to,"%%%02x",*from);
- to += 3;
- continue;
- }
- if ( *from<='z' ) {
- *to = *from;
- to++;
- continue;
- }
+ if ( *from>=0 ) {
+ if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) {
+ *to = *from;
+ to++;
+ continue;
+ }
+ }
//if ( *from=='\r' || *from=='\n' ) continue;
unsigned i;
for( i=0 ; i
Date: Thu, 28 Dec 2017 18:44:01 +0000
Subject: [PATCH 07/15] added commat; entitie
---
entities.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/entities.c b/entities.c
index 452a9ee..fc8e50f 100644
--- a/entities.c
+++ b/entities.c
@@ -101,6 +101,7 @@ static const char *const NAMED_ENTITIES[][2] = {
{ "chi;", "χ" },
{ "circ;", "ˆ" },
{ "clubs;", "♣" },
+ { "commat;", "@" },
{ "cong;", "≅" },
{ "copy;", "©" },
{ "crarr;", "↵" },
From 1e13e72389c14e23cf914191f7b7d2d3bd9a00a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Thu, 28 Dec 2017 18:57:18 +0000
Subject: [PATCH 08/15] encoding test
---
t-entities.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/t-entities.c b/t-entities.c
index 6dd8c34..3c55602 100644
--- a/t-entities.c
+++ b/t-entities.c
@@ -30,6 +30,21 @@ int main(void)
assert(strcmp(buffer, SAMPLE) == 0);
}
+ {
+ static const char INPUT[] = "Miguel Leitão\ntest@example.org\nHello!!
";
+ static char GOAL[] =
+ "Miguel Leitão\ntest@example.org\n<p>Hello!!</p>";
+ char OUTPUT[sizeof GOAL];
+ char REVERT[sizeof GOAL];
+ assert(encode_html_entities(OUTPUT, INPUT) == sizeof GOAL - 1);
+ // printf("output: %s\n", OUTPUT);
+ assert(strcmp(OUTPUT,GOAL) == 0);
+ decode_html_entities_utf8(REVERT,OUTPUT);
+
+ assert(strcmp(INPUT,REVERT) == 0);
+ }
+
+
fprintf(stdout, "All tests passed :-)\n");
return EXIT_SUCCESS;
}
From e6a49b585332439be0cd2968b63766ddbddc820b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Mon, 23 Oct 2017 23:59:29 +0100
Subject: [PATCH 09/15] encoding
---
entities.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/entities.c b/entities.c
index 6fb6aa3..76ea019 100644
--- a/entities.c
+++ b/entities.c
@@ -391,3 +391,32 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
return (size_t)(to - dest);
}
+int encode_html_entities(char *dest, const char *src) {
+ char *to = dest;
+ for( const char *from = src ; *from ; from++ ) {
+ int i = 9999;
+ if ( *from <= '+' ) {
+ sprintf(to,"%%%02x",*from);
+ to += 3;
+ continue;
+ }
+ if ( *from<='z' ) {
+ *to = *from;
+ to++;
+ continue;
+ }
+ //if ( *from=='\r' || *from=='\n' ) continue;
+ for( i=0 ; i
Date: Tue, 24 Oct 2017 00:02:21 +0100
Subject: [PATCH 10/15] stdio added
---
entities.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/entities.c b/entities.c
index 76ea019..0f2afac 100644
--- a/entities.c
+++ b/entities.c
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#define UNICODE_MAX 0x10FFFFul
@@ -419,4 +420,4 @@ int encode_html_entities(char *dest, const char *src) {
}
*to = 0;
return strlen(dest);
-}
\ No newline at end of file
+}
From d33c9829d19e3efb4e753f0f82062f2c5ba76725 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Tue, 24 Oct 2017 00:07:30 +0100
Subject: [PATCH 11/15] encode declaration
---
entities.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/entities.h b/entities.h
index d8e58ef..aeb556b 100644
--- a/entities.h
+++ b/entities.h
@@ -20,5 +20,9 @@ extern size_t decode_html_entities_utf8(char *dest, const char *src);
The function returns the length of the decoded string.
*/
+
+
+int encode_html_entities(char *dest, const char *src);
+
#endif
From 0fe8c7ff37dcdbe872446050911c8cd7b3896966 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miguel=20Leit=C3=A3o?=
Date: Wed, 27 Dec 2017 22:10:44 +0000
Subject: [PATCH 12/15] size_t return
---
entities.c | 6 +++---
entities.h | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/entities.c b/entities.c
index 0f2afac..cbdd385 100644
--- a/entities.c
+++ b/entities.c
@@ -392,10 +392,9 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
return (size_t)(to - dest);
}
-int encode_html_entities(char *dest, const char *src) {
+size_t encode_html_entities(char *dest, const char *src) {
char *to = dest;
for( const char *from = src ; *from ; from++ ) {
- int i = 9999;
if ( *from <= '+' ) {
sprintf(to,"%%%02x",*from);
to += 3;
@@ -407,6 +406,7 @@ int encode_html_entities(char *dest, const char *src) {
continue;
}
//if ( *from=='\r' || *from=='\n' ) continue;
+ unsigned i;
for( i=0 ; i
Date: Thu, 28 Dec 2017 18:30:28 +0000
Subject: [PATCH 13/15] encoding with full test
---
entities.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/entities.c b/entities.c
index cbdd385..fc8e50f 100644
--- a/entities.c
+++ b/entities.c
@@ -395,23 +395,24 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
size_t encode_html_entities(char *dest, const char *src) {
char *to = dest;
for( const char *from = src ; *from ; from++ ) {
- if ( *from <= '+' ) {
- sprintf(to,"%%%02x",*from);
- to += 3;
- continue;
- }
- if ( *from<='z' ) {
- *to = *from;
- to++;
- continue;
- }
+ if ( *from>=0 ) {
+ if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) {
+ *to = *from;
+ to++;
+ continue;
+ }
+ }
//if ( *from=='\r' || *from=='\n' ) continue;
unsigned i;
for( i=0 ; i
Date: Thu, 28 Dec 2017 18:57:18 +0000
Subject: [PATCH 14/15] encoding test
---
t-entities.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/t-entities.c b/t-entities.c
index 6dd8c34..3c55602 100644
--- a/t-entities.c
+++ b/t-entities.c
@@ -30,6 +30,21 @@ int main(void)
assert(strcmp(buffer, SAMPLE) == 0);
}
+ {
+ static const char INPUT[] = "Miguel Leitão\ntest@example.org\nHello!!
";
+ static char GOAL[] =
+ "Miguel Leitão\ntest@example.org\n<p>Hello!!</p>";
+ char OUTPUT[sizeof GOAL];
+ char REVERT[sizeof GOAL];
+ assert(encode_html_entities(OUTPUT, INPUT) == sizeof GOAL - 1);
+ // printf("output: %s\n", OUTPUT);
+ assert(strcmp(OUTPUT,GOAL) == 0);
+ decode_html_entities_utf8(REVERT,OUTPUT);
+
+ assert(strcmp(INPUT,REVERT) == 0);
+ }
+
+
fprintf(stdout, "All tests passed :-)\n");
return EXIT_SUCCESS;
}
From eeb211f375a92734cf5cb73f355fe77b01f21bda Mon Sep 17 00:00:00 2001
From: ooxi
Date: Fri, 29 Dec 2017 15:38:41 +0100
Subject: [PATCH 15/15] Misc stylistic unification
---
entities.c | 55 ++++++++++++++++++++++++++--------------------------
t-entities.c | 20 +++++++++++--------
2 files changed, 39 insertions(+), 36 deletions(-)
diff --git a/entities.c b/entities.c
index fc8e50f..00c107f 100644
--- a/entities.c
+++ b/entities.c
@@ -9,9 +9,9 @@
#include
#include
+#include
#include
#include
-#include
#define UNICODE_MAX 0x10FFFFul
@@ -393,33 +393,32 @@ size_t decode_html_entities_utf8(char *dest, const char *src)
}
size_t encode_html_entities(char *dest, const char *src) {
- char *to = dest;
- for( const char *from = src ; *from ; from++ ) {
- if ( *from>=0 ) {
- if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) {
- *to = *from;
- to++;
- continue;
- }
- }
- //if ( *from=='\r' || *from=='\n' ) continue;
- unsigned i;
- for( i=0 ; i=0 ) {
+ if ( (*from>='A' && *from<='Z') || (*from>='a' && *from<='z') ) {
+ *to = *from;
+ to++;
+ continue;
+ }
+ }
+ unsigned i;
+ for( i=0 ; iHello!!
";
- static char GOAL[] =
- "Miguel Leitão\ntest@example.org\n<p>Hello!!</p>";
+ static const char INPUT[] = ""
+ "Miguel Leitão\n"
+ "test@example.org\n"
+ "Hello!!
";
+ static char GOAL[] = ""
+ "Miguel Leitão\n"
+ "test@example.org\n"
+ "<p>Hello!!</p>";
char OUTPUT[sizeof GOAL];
char REVERT[sizeof GOAL];
assert(encode_html_entities(OUTPUT, INPUT) == sizeof GOAL - 1);
- // printf("output: %s\n", OUTPUT);
- assert(strcmp(OUTPUT,GOAL) == 0);
- decode_html_entities_utf8(REVERT,OUTPUT);
-
- assert(strcmp(INPUT,REVERT) == 0);
+ assert(strcmp(OUTPUT, GOAL) == 0);
+
+ decode_html_entities_utf8(REVERT, OUTPUT);
+ assert(strcmp(INPUT, REVERT) == 0);
}