Skip to content

Commit

Permalink
Use simdutf to validate utf8 in websocket server (#11140)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner authored May 17, 2024
1 parent dfb935d commit 902c258
Showing 1 changed file with 4 additions and 42 deletions.
46 changes: 4 additions & 42 deletions packages/bun-uws/src/WebSocketProtocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
#include <cstdlib>
#include <string_view>

// bun-specific
#include "simdutf.h"

namespace uWS {

/* We should not overcomplicate these */
Expand Down Expand Up @@ -109,50 +112,9 @@ T cond_byte_swap(T value) {
return value;
}

// Based on utf8_check.c by Markus Kuhn, 2005
// https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
// Optimized for predominantly 7-bit content by Alex Hultman, 2016
// Licensed as Zlib, like the rest of this project
static bool isValidUtf8(unsigned char *s, size_t length)
{
for (unsigned char *e = s + length; s != e; ) {
if (s + 4 <= e) {
uint32_t tmp;
memcpy(&tmp, s, 4);
if ((tmp & 0x80808080) == 0) {
s += 4;
continue;
}
}

while (!(*s & 0x80)) {
if (++s == e) {
return true;
}
}

if ((s[0] & 0x60) == 0x40) {
if (s + 1 >= e || (s[1] & 0xc0) != 0x80 || (s[0] & 0xfe) == 0xc0) {
return false;
}
s += 2;
} else if ((s[0] & 0xf0) == 0xe0) {
if (s + 2 >= e || (s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 ||
(s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || (s[0] == 0xed && (s[1] & 0xe0) == 0xa0)) {
return false;
}
s += 3;
} else if ((s[0] & 0xf8) == 0xf0) {
if (s + 3 >= e || (s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 || (s[3] & 0xc0) != 0x80 ||
(s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) {
return false;
}
s += 4;
} else {
return false;
}
}
return true;
return simdutf::validate_utf8(reinterpret_cast<const char *>(s), length);
}

struct CloseFrame {
Expand Down

1 comment on commit 902c258

@uNetworkingAB
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. But this commit runs 40% faster than simdutf: uNetworking/uWebSockets@124cacb

Please sign in to comment.