Skip to content

Commit

Permalink
fine tuning of zsetstr/psetstr functions and added pad character trim…
Browse files Browse the repository at this point in the history
…ming
  • Loading branch information
suiginsoft committed Mar 1, 2017
1 parent 03f3679 commit 605202c
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 52 deletions.
2 changes: 0 additions & 2 deletions TODO
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ Other:
local cache invalidation with an atomic counter
* add secure versions of functions for explicitly clearing memory
and constant-time comparison
* add support for non-ASCII/UTF-8 character sets to pgetstr/psetstr
* add base32/base64 alphabet support
* more testing on other Linux and BSD distributions
* more testing with different C compilers and with older versions of compilers
* perform an audit on floating-point values, make sure we aren't using any
Expand Down
7 changes: 4 additions & 3 deletions hebimath.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,10 @@ enum {
HEBI_STR_RFC4648_BASE64 = 0x05,
HEBI_STR_ALPHABET_COUNT = 0x06,
HEBI_STR_ALPHABET_MASK = 0x0F,
HEBI_STR_RADIX = 0x10,
HEBI_STR_SIGN = 0x20,
HEBI_STR_TRIM = 0x40
HEBI_STR_PAD = 0x10,
HEBI_STR_RADIX = 0x20,
HEBI_STR_SIGN = 0x40,
HEBI_STR_TRIM = 0x80
};

struct hebi_psetstrstate
Expand Down
128 changes: 92 additions & 36 deletions src/p/psetstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,67 @@ static const struct alphabet_decoder decoders[HEBI_STR_ALPHABET_COUNT] = {
{ &rfc4648base64readradix, rfc4648base64digitlut, 64, '-', '\0', '=', 'A' }
};

static inline size_t
trimleft(const char *restrict str, size_t start, size_t end, char c)
{
size_t i = start;

while (i < end && str[i] == c)
i++;

return i;
}

static inline size_t
trimright(const char *restrict str, size_t start, size_t end, char c)
{
size_t i = end;

while (start < i && str[i - 1] == c)
i--;

return i;
}

static inline void
trimwhitespace(const char *restrict str, size_t *restrict start, size_t *restrict end)
{
size_t first = *start;
size_t last = *end;

while (first < last && isspace(str[first]))
first++;

while (first < last && isspace(str[last - 1]))
last--;

*start = first;
*end = last;
}

static inline size_t
readsign(
const struct alphabet_decoder *restrict decoder,
struct hebi_psetstrstate *restrict state,
const char *restrict str,
size_t start,
size_t end )
{
size_t i = start;

if (i < end) {
if (decoder->minus != '\0' && str[i] == decoder->minus) {
state->hm_sign = -1;
i++;
} else if (decoder->plus != '\0' && str[i] == decoder->plus) {
state->hm_sign = 1;
i++;
}
}

return i;
}

static inline unsigned int
chartodigit(const unsigned char *digitlut, char c)
{
Expand Down Expand Up @@ -558,6 +619,9 @@ hebi_psetstr(
ASSERT(0 < n && n <= HEBI_PACKET_MAXLEN);

/* read and validate state */
if (state->hm_errcode != HEBI_ENONE)
return SIZE_MAX;

str = state->hm_str;
end = state->hm_end;
cur = state->hm_cur;
Expand All @@ -568,10 +632,7 @@ hebi_psetstr(
decoder = &decoders[state->hm_alphabet];

radix = state->hm_radix;
ASSERT(radix <= decoder->maxradix);

ASSERT(state->hm_errcode == HEBI_ENONE);
state->hm_errcode = HEBI_ENONE;
ASSERT(2 <= radix && radix <= decoder->maxradix);

/* setup to start reading digits */
size = 0;
Expand Down Expand Up @@ -655,47 +716,37 @@ hebi_psetstrprepare(
size_t end;
size_t space;

ASSERT(!base || (2 <= base && base <= 64));

cur = 0;
end = len;

/* trim whitespace */
if (flags & HEBI_STR_TRIM) {
while (cur < end && isspace(str[cur]))
cur++;
while (cur < end && isspace(str[end - 1]))
end--;
}

/* setup state */
/* setup initial state */
state->hm_str = str;
state->hm_len = len;
state->hm_start = cur;
state->hm_end = end;
state->hm_start = 0;
state->hm_end = len;
state->hm_radix = 0;
state->hm_sign = 1;
state->hm_errcode = HEBI_ENONE;

cur = 0;
end = len;

/* determine alphabet index and make sure it's valid */
state->hm_alphabet = flags & HEBI_STR_ALPHABET_MASK;
if (UNLIKELY(state->hm_alphabet >= HEBI_STR_ALPHABET_COUNT))
return error(state, cur, HEBI_EBADVALUE);
decoder = &decoders[state->hm_alphabet];

/* read sign character */
if ((flags & HEBI_STR_SIGN) && cur < end) {
if (str[cur] == decoder->minus &&
decoder->minus != '\0') {
state->hm_sign = -1;
cur++;
} else if (str[cur] == decoder->plus
&& decoder->plus != '\0') {
state->hm_sign = 1;
cur++;
}
/* trim whitespace & padding, adjust start & end positions */
if (flags & (HEBI_STR_TRIM | HEBI_STR_PAD)) {
if (flags & HEBI_STR_TRIM)
trimwhitespace(str, &cur, &end);
if ((flags & HEBI_STR_PAD) && decoder->pad != '\0')
end = trimright(str, cur, end, decoder->pad);
state->hm_start = cur;
state->hm_end = len;
}

/* read sign character */
if ((flags & HEBI_STR_SIGN) && cur < end)
cur = readsign(decoder, state, str, cur, end);

/* determine radix, reading optional radix prefix if present */
radix = base;
if (!radix || (flags & HEBI_STR_RADIX))
Expand All @@ -704,20 +755,25 @@ hebi_psetstrprepare(
if (UNLIKELY(radix < 2 || decoder->maxradix < radix))
return error(state, cur, HEBI_EBADVALUE);

/* consume leading zero characters and estimate remaining space */
/* check for empty digit sequence */
if (UNLIKELY(cur >= end))
return error(state, cur, HEBI_EBADSYNTAX);
while (cur < end && str[cur] == decoder->zero)
cur++;
state->hm_cur = cur;

/* consume leading zero characters and estimate remaining space */
cur = trimleft(str, cur, end, decoder->zero);
if (UNLIKELY(cur >= end)) {
state->hm_cur = end;
state->hm_sign = 0;
state->hm_errcode = HEBI_ENONE;
return 0;
}

/* estimate space needed for packet sequence */
space = estimatespace(cur, end, radix);
if (UNLIKELY(space >= HEBI_PACKET_MAXLEN))
return error(state, cur, HEBI_EBADLENGTH);

state->hm_cur = cur;
state->hm_errcode = HEBI_ENONE;
return space + 1;
}
25 changes: 14 additions & 11 deletions src/z/zsetstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "../../internal.h"
#include <string.h>

enum { FLAGS = HEBI_STR_RADIX | HEBI_STR_SIGN | HEBI_STR_TRIM };
enum { FLAGS = HEBI_STR_RADIX | HEBI_STR_SIGN };

HEBI_API
int
Expand All @@ -17,35 +17,38 @@ hebi_zsetstr(
unsigned int base,
unsigned int flags )
{
hebi_z q;
struct hebi_psetstrstate state;
size_t len;
size_t space;
size_t used;

hebi_zinit_push__(q, hebi_zallocator(r));

len = strlen(str);
space = hebi_psetstrprepare(&state, str, len, base, flags | FLAGS);

if (UNLIKELY(space == SIZE_MAX)) {
used = SIZE_MAX;
} else if (space > 0) {
hebi_zgrow__(r, space);
used = hebi_psetstr(r->hz_packs, space, &state);
if (0 < space && space != SIZE_MAX) {
hebi_zexpand__(q, space, 0);
used = hebi_psetstr(q->hz_packs, space, &state);
} else {
used = 0;
used = space;
}

if (endptr)
*endptr = (char *)str + state.hm_cur;

if (UNLIKELY(used == SIZE_MAX)) {
hebi_zsetzero(r);
if (state.hm_errcode != HEBI_EBADSYNTAX)
hebi_error_raise(HEBI_ERRDOM_HEBI, state.hm_errcode);
hebi_zsetzero(r);
hebi_zdestroy_pop__(q);
return 0;
}

ASSERT(used <= space);
r->hz_used = used;
r->hz_sign = state.hm_sign;
q->hz_used = used;
q->hz_sign = state.hm_sign;
hebi_zswap(q, r);
hebi_zdestroy_pop__(q);
return 1;
}

0 comments on commit 605202c

Please sign in to comment.