Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 74 additions & 51 deletions src/entry.c
Original file line number Diff line number Diff line change
@@ -1,39 +1,60 @@
#include <stdbool.h>
#include "server.h"
#include "serverassert.h"
#include "entry.h"

#include <stdbool.h>

/*-----------------------------------------------------------------------------
* Entry API
* Entry Implementation
*----------------------------------------------------------------------------*/

/* The entry pointer is the field sds. We encode the entry layout type
* in the field SDS header. Field type SDS_TYPE_5 doesn't have any spare bits to
* encode this so we use it only for the first layout type.
/* There are 3 different formats for the "entry". In all cases, the "entry" pointer points into the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are 4 right? expiration existence only multiply the past 2 encoding types (embedded value and non-embedded value)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In types 2 and 3, the expiration is optional. I didn't consider/document these as fundamentally different types. If we do that, there would be 5 types.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

O.K I understand it better now. LGTM

* allocation and is identical to the "field" sds pointer.
*
* Type 1: Field sds type is an SDS_TYPE_5
* With this type, both the key and value are embedded in the entry. Expiration is not allowed
* as the SDS_TYPE_5 (on field) doesn't contain any aux bits to encode the existence of an
* expiration. Extra padding is included in the value to the size of the physical block.
*
* entry
* |
* +---------V------------+----------------------------+
* | Field | Value |
* | sdshdr5 | "foo" \0 | sdshdr8 "bar" \0 (padding) |
* +---------+------------+----------------------------+
*
* Identified by: field sds type is SDS_TYPE_5
*
*
* Entry with embedded value, used for small sizes. The value is stored as
* SDS_TYPE_8. The field can use any SDS type.
* Type 2: Field sds type is an SDS_TYPE_8 type
* With this type, both the key and value are embedded. Extra bits in the sdshdr8 (on field)
* are used to encode aux flags which may indicate the presence of an optional expiration.
* Extra padding is included in the value to the size of the physical block.
*
* Entry can also have expiration timestamp, which is the UNIX timestamp for it to be expired.
* For aligned fast access, we keep the expiry timestamp prior to the start of the sds header.
* entry
* |
* +--------------+---------V------------+----------------------------+
* | Expire (opt) | Field | Value |
* | long long | sdshdr8 | "foo" \0 | sdshdr8 "bar" \0 (padding) |
* +--------------+---------+------------+----------------------------+
*
* +--------------+--------------+---------------+
* | Expiration | field | value |
* | 1234567890LL | hdr "foo" \0 | hdr8 "bar" \0 |
* +--------------+--------------+---------------+
* Identified by: sds type is SDS_TYPE_8 AND has embedded value
*
* Entry with value pointer, used for larger fields and values. The field is SDS
* type 8 or higher.
*
* +--------------+-------+--------------+
* | Expiration | value | field |
* | 1234567890LL | ptr | hdr "foo" \0 |
* +--------------+---^---+--------------+
* |
* |
* value pointer = value sds
* Type 3: Value is an sds, referenced by pointer
* With this type, the key is embedded, and the value is an sds, referenced by pointer. Extra
* bits in the sdshdr8(+) are used to encode aux flags which indicate the presence of a value by
* pointer. An aux bit may indicate the presence of an optional expiration. Note that the
* "field" is not padded, so there's no direct way to identify the length of the allocation.
*
* entry
* |
* +--------------+---------------+----------V----------+--------+
* | Expire (opt) | Value | Field | / / / /|
* | long long | sds (pointer) | sdshdr8+ | "foo" \0 |/ / / / |
* +--------------+-------+-------+----------+----------+--------+
* |
* +-> sds value
*
* Identified by: Aux bit FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR
*/

enum {
Expand All @@ -47,10 +68,15 @@ enum {
};
static_assert(FIELD_SDS_AUX_BIT_MAX < sizeof(char) - SDS_TYPE_BITS, "too many sds bits are used for entry metadata");

/* The entry pointer is the field sds, but that's an implementation detail. */
sds entryGetField(const entry *entry) {
return (sds)entry;
}

/* Returns true in case the entry's value is not embedded in the entry.
* Returns false otherwise. */
static inline bool entryHasValuePtr(const entry *entry) {
return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR);
static bool entryHasValuePtr(const entry *entry) {
return sdsGetAuxBit(entryGetField(entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR);
}

/* Returns true in case the entry's value is embedded in the entry.
Expand All @@ -62,19 +88,14 @@ bool entryHasEmbeddedValue(entry *entry) {
/* Returns true in case the entry has expiration timestamp.
* Returns false otherwise. */
bool entryHasExpiry(const entry *entry) {
return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY);
}

/* The entry pointer is the field sds, but that's an implementation detail. */
sds entryGetField(const entry *entry) {
return (sds)entry;
return sdsGetAuxBit(entryGetField(entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY);
}

/* Returns the location of a pointer to a separately allocated value. Only for
* an entry without an embedded value. */
static sds *entryGetValueRef(const entry *entry) {
serverAssert(entryHasValuePtr(entry));
char *field_data = sdsAllocPtr(entry);
char *field_data = sdsAllocPtr(entryGetField(entry));
field_data -= sizeof(sds);
return (sds *)field_data;
}
Expand All @@ -85,8 +106,8 @@ sds entryGetValue(const entry *entry) {
return *entryGetValueRef(entry);
} else {
/* Skip field content, field null terminator and value sds8 hdr. */
size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8);
return (char *)entry + offset;
size_t offset = sdslen(entryGetField(entry)) + 1 + sdsHdrSize(SDS_TYPE_8);
return (sds)((char *)entry + offset);
}
}

Expand All @@ -106,7 +127,7 @@ entry *entrySetValue(entry *e, sds value) {

/* Returns the address of the entry allocation. */
void *entryGetAllocPtr(const entry *entry) {
char *buf = sdsAllocPtr(entry);
char *buf = sdsAllocPtr(entryGetField(entry));
if (entryHasValuePtr(entry)) buf -= sizeof(sds);
if (entryHasExpiry(entry)) buf -= sizeof(long long);
return buf;
Expand Down Expand Up @@ -243,16 +264,17 @@ static entry *entryWrite(char *buf,
buf_size -= embedded_value_sds_size;
}
}
/* Set the field data */
entry *new_entry = sdswrite(buf, embedded_field_sds_size, embedded_field_sds_type, field, sdslen(field));
/* Set the field data. When we write the field into the buffer, the entry pointer is the returned
* sds (after the sds header). */
entry *new_entry = (entry *)sdswrite(buf, embedded_field_sds_size, embedded_field_sds_type, field, sdslen(field));

/* Field sds aux bits are zero, which we use for this entry encoding. */
sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1);
sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0);
sdsSetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1);
sdsSetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0);

/* Check that the new entry was built correctly */
debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1));
debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0));
debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1));
debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0));
return new_entry;
}

Expand All @@ -275,7 +297,7 @@ entry *entryCreate(const_sds field, sds value, long long expiry) {
* Note that the value ownership is moved to this function and the caller should assume the
* value is no longer usable after calling this function. */
entry *entryUpdate(entry *e, sds value, long long expiry) {
sds field = (sds)e;
sds field = entryGetField(e);
entry *new_entry = NULL;

bool update_value = value ? true : false;
Expand Down Expand Up @@ -354,8 +376,8 @@ entry *entryUpdate(entry *e, sds value, long long expiry) {
entryFree(e);
}
/* Check that the new entry was built correctly */
debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1));
debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0));
debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1));
debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0));
serverAssert(new_entry);
return new_entry;
}
Expand All @@ -370,7 +392,7 @@ size_t entryMemUsage(entry *entry) {
* header could be too small for holding the real allocation size. */
mem += zmalloc_usable_size(entryGetAllocPtr(entry));
} else {
mem += sdsReqSize(sdslen(entry), sdsType(entry));
mem += sdsReqSize(sdslen(entryGetField(entry)), sdsType(entryGetField(entry)));
if (entryHasExpiry(entry)) mem += sizeof(long long);
}
mem += sdsAllocSize(entryGetValue(entry));
Expand All @@ -384,18 +406,19 @@ size_t entryMemUsage(entry *entry) {
* of sds strings.
* If the location of the entry changed we return the new location,
* otherwise we return NULL. */
entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) {
if (entryHasValuePtr(entry)) {
sds *value_ref = entryGetValueRef(entry);
entry *entryDefrag(entry *e, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) {
if (entryHasValuePtr(e)) {
sds *value_ref = entryGetValueRef(e);
sds new_value = sdsdefragfn(*value_ref);
if (new_value) *value_ref = new_value;
}
char *allocation = entryGetAllocPtr(entry);
char *allocation = entryGetAllocPtr(e);
char *new_allocation = defragfn(allocation);
if (new_allocation != NULL) {
/* Return the same offset into the new allocation as the entry's offset
* in the old allocation. */
return new_allocation + ((char *)entry - allocation);
int entry_pointer_offset = (char *)e - allocation;
return (entry *)(new_allocation + entry_pointer_offset);
}
return NULL;
}
Expand Down
38 changes: 5 additions & 33 deletions src/entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,11 @@
*----------------------------------------------------------------------------*/

/*
* The entry pointer is the field `sds`. We encode the entry layout type
* in the SDS header.
*
* An entry represents a key–value pair with an optional expiration timestamp.
* The pointer of type `entry *` always points to the VALUE `sds`.
*
* Layout 1: Embedded Field and Value (Compact Form)
*
* +-------------------+-------------------+-------------------+
* | Expiration (opt) | Field (sds) | Value (sds) |
* | 8 bytes (int64_t) | "field" + header | "value" + header |
* +-------------------+-------------------+-------------------+
* ^
* |
* entry pointer
*
* - Both field and value are small and embedded.
* - The expiration is stored just before the first sds.
*
*
* Layout 2: Pointer-Based Value (Large Values)
*
* +-------------------+-------------------+------------------+
* | Expiration (opt) | Value pointer | Field (sds) |
* | 8 bytes (int64_t) | 8 bytes (void *) | "field" + header |
* +-------------------+-------------------+------------------+
* ^
* |
* entry pointer
*
* - The value is stored separately via a pointer.
* - Used for large value sizes. */
typedef void entry;
* An "entry" is a field/value sds pair, with an optional expiration time. The
* entry is used as part of the HASH datatype, and supports hash field expiration.
*/

typedef struct _entry entry;

/* The maximum allocation size we want to use for entries with embedded
* values. */
Expand Down
4 changes: 2 additions & 2 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -3432,8 +3432,8 @@ robj *setTypeDup(robj *o);
#define HASH_SET_COPY 0


void hashTypeFreeVolatileSet(robj *o); /* needed only for freeHashObject */
void hashTypeTrackEntry(robj *o, void *entry); /* needed only for rdbLoadObject */
void hashTypeFreeVolatileSet(robj *o); /* needed only for freeHashObject */
void hashTypeTrackEntry(robj *o, entry *entry); /* needed only for rdbLoadObject */
size_t hashTypeScanDefrag(robj *ob, size_t cursor, void *(*defragAlloc)(void *));
size_t hashTypeDeleteExpiredFields(robj *o, mstime_t now, unsigned long max_fields, robj **out_fields);

Expand Down
21 changes: 13 additions & 8 deletions src/t_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ typedef enum {
EXPIRATION_MODIFICATION_EXPIRE_ASAP = 2, /* if apply of the expiration modification was set to a time in the past (i.e field is immediately expired) */
} expiryModificationResult;

// A vsetGetExpiryFunc
static long long entryGetExpiryVsetFunc(const void *e) {
return entryGetExpiry((const entry *)e);
}

/*-----------------------------------------------------------------------------
* Hash type Expiry API
*----------------------------------------------------------------------------*/
Expand Down Expand Up @@ -103,28 +108,28 @@ void hashTypeFreeVolatileSet(robj *o) {
hashTypeIgnoreTTL(o, true);
}

void hashTypeTrackEntry(robj *o, void *entry) {
void hashTypeTrackEntry(robj *o, entry *entry) {
vset *set;
if (hashTypeHasVolatileFields(o)) {
set = hashTypeGetVolatileSet(o);
} else {
set = hashTypeGetOrcreateVolatileSet(o);
}
bool added = vsetAddEntry(set, entryGetExpiry, entry);
bool added = vsetAddEntry(set, entryGetExpiryVsetFunc, entry);
serverAssert(added);
}

static void hashTypeUntrackEntry(robj *o, void *entry) {
static void hashTypeUntrackEntry(robj *o, entry *entry) {
if (!entryHasExpiry(entry)) return;
vset *set = hashTypeGetVolatileSet(o);
debugServerAssert(set);
serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry));
serverAssert(vsetRemoveEntry(set, entryGetExpiryVsetFunc, entry));
if (vsetIsEmpty(set)) {
hashTypeFreeVolatileSet(o);
}
}

static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) {
static void hashTypeTrackUpdateEntry(robj *o, entry *old_entry, entry *new_entry, long long old_expiry, long long new_expiry) {
int old_tracked = (old_entry && old_expiry != EXPIRY_NONE);
int new_tracked = (new_entry && new_expiry != EXPIRY_NONE);
/* If entry was not tracked before and not going to be tracked now, we can simply return */
Expand All @@ -134,14 +139,14 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry,
vset *set = hashTypeGetOrcreateVolatileSet(o);
debugServerAssert(!old_tracked || !vsetIsEmpty(set));

serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1);
serverAssert(vsetUpdateEntry(set, entryGetExpiryVsetFunc, old_entry, new_entry, old_expiry, new_expiry) == 1);

if (vsetIsEmpty(set)) {
hashTypeFreeVolatileSet(o);
}
}

bool hashHashtableTypeValidate(hashtable *ht, void *entry) {
bool hashHashtableTypeValidate(hashtable *ht, entry *entry) {
UNUSED(ht);
expirationPolicy policy = getExpirationPolicyWithFlags(0);
if (policy == POLICY_IGNORE_EXPIRE) return true;
Expand Down Expand Up @@ -2156,7 +2161,7 @@ size_t hashTypeDeleteExpiredFields(robj *o, mstime_t now, unsigned long max_fiel
/* skip TTL checks temporarily (to allow hashtable pops) */
hashTypeIgnoreTTL(o, true);
expiryContext ctx = {.key = o, .fields = out_entries, .n_fields = 0};
size_t expired = vsetRemoveExpired(vset, entryGetExpiry, hashTypeExpireEntry, now, max_fields, &ctx);
size_t expired = vsetRemoveExpired(vset, entryGetExpiryVsetFunc, hashTypeExpireEntry, now, max_fields, &ctx);
serverAssert(ctx.n_fields <= max_fields);
if (vsetIsEmpty(vset)) {
hashTypeFreeVolatileSet(o);
Expand Down
Loading