diff --git a/src/entry.c b/src/entry.c index 097a36387c..cc9819e41c 100644 --- a/src/entry.c +++ b/src/entry.c @@ -1,39 +1,60 @@ -#include #include "server.h" #include "serverassert.h" #include "entry.h" -#include - /*----------------------------------------------------------------------------- - * Entry API + * Entry Implementation *----------------------------------------------------------------------------*/ -/* The entry pointer is the field sds. We encode the entry layout type - * in the field SDS header. Field type SDS_TYPE_5 doesn't have any spare bits to - * encode this so we use it only for the first layout type. +/* There are 3 different formats for the "entry". In all cases, the "entry" pointer points into the + * allocation and is identical to the "field" sds pointer. + * + * Type 1: Field sds type is an SDS_TYPE_5 + * With this type, both the key and value are embedded in the entry. Expiration is not allowed + * as the SDS_TYPE_5 (on field) doesn't contain any aux bits to encode the existence of an + * expiration. Extra padding is included in the value to the size of the physical block. + * + * entry + * | + * +---------V------------+----------------------------+ + * | Field | Value | + * | sdshdr5 | "foo" \0 | sdshdr8 "bar" \0 (padding) | + * +---------+------------+----------------------------+ + * + * Identified by: field sds type is SDS_TYPE_5 + * * - * Entry with embedded value, used for small sizes. The value is stored as - * SDS_TYPE_8. The field can use any SDS type. + * Type 2: Field sds type is an SDS_TYPE_8 type + * With this type, both the key and value are embedded. Extra bits in the sdshdr8 (on field) + * are used to encode aux flags which may indicate the presence of an optional expiration. + * Extra padding is included in the value to the size of the physical block. * - * Entry can also have expiration timestamp, which is the UNIX timestamp for it to be expired. - * For aligned fast access, we keep the expiry timestamp prior to the start of the sds header. + * entry + * | + * +--------------+---------V------------+----------------------------+ + * | Expire (opt) | Field | Value | + * | long long | sdshdr8 | "foo" \0 | sdshdr8 "bar" \0 (padding) | + * +--------------+---------+------------+----------------------------+ * - * +--------------+--------------+---------------+ - * | Expiration | field | value | - * | 1234567890LL | hdr "foo" \0 | hdr8 "bar" \0 | - * +--------------+--------------+---------------+ + * Identified by: sds type is SDS_TYPE_8 AND has embedded value * - * Entry with value pointer, used for larger fields and values. The field is SDS - * type 8 or higher. * - * +--------------+-------+--------------+ - * | Expiration | value | field | - * | 1234567890LL | ptr | hdr "foo" \0 | - * +--------------+---^---+--------------+ - * | - * | - * value pointer = value sds + * Type 3: Value is an sds, referenced by pointer + * With this type, the key is embedded, and the value is an sds, referenced by pointer. Extra + * bits in the sdshdr8(+) are used to encode aux flags which indicate the presence of a value by + * pointer. An aux bit may indicate the presence of an optional expiration. Note that the + * "field" is not padded, so there's no direct way to identify the length of the allocation. + * + * entry + * | + * +--------------+---------------+----------V----------+--------+ + * | Expire (opt) | Value | Field | / / / /| + * | long long | sds (pointer) | sdshdr8+ | "foo" \0 |/ / / / | + * +--------------+-------+-------+----------+----------+--------+ + * | + * +-> sds value + * + * Identified by: Aux bit FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR */ enum { @@ -47,10 +68,15 @@ enum { }; static_assert(FIELD_SDS_AUX_BIT_MAX < sizeof(char) - SDS_TYPE_BITS, "too many sds bits are used for entry metadata"); +/* The entry pointer is the field sds, but that's an implementation detail. */ +sds entryGetField(const entry *entry) { + return (sds)entry; +} + /* Returns true in case the entry's value is not embedded in the entry. * Returns false otherwise. */ -static inline bool entryHasValuePtr(const entry *entry) { - return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); +static bool entryHasValuePtr(const entry *entry) { + return sdsGetAuxBit(entryGetField(entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); } /* Returns true in case the entry's value is embedded in the entry. @@ -62,19 +88,14 @@ bool entryHasEmbeddedValue(entry *entry) { /* Returns true in case the entry has expiration timestamp. * Returns false otherwise. */ bool entryHasExpiry(const entry *entry) { - return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY); -} - -/* The entry pointer is the field sds, but that's an implementation detail. */ -sds entryGetField(const entry *entry) { - return (sds)entry; + return sdsGetAuxBit(entryGetField(entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY); } /* Returns the location of a pointer to a separately allocated value. Only for * an entry without an embedded value. */ static sds *entryGetValueRef(const entry *entry) { serverAssert(entryHasValuePtr(entry)); - char *field_data = sdsAllocPtr(entry); + char *field_data = sdsAllocPtr(entryGetField(entry)); field_data -= sizeof(sds); return (sds *)field_data; } @@ -85,8 +106,8 @@ sds entryGetValue(const entry *entry) { return *entryGetValueRef(entry); } else { /* Skip field content, field null terminator and value sds8 hdr. */ - size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8); - return (char *)entry + offset; + size_t offset = sdslen(entryGetField(entry)) + 1 + sdsHdrSize(SDS_TYPE_8); + return (sds)((char *)entry + offset); } } @@ -106,7 +127,7 @@ entry *entrySetValue(entry *e, sds value) { /* Returns the address of the entry allocation. */ void *entryGetAllocPtr(const entry *entry) { - char *buf = sdsAllocPtr(entry); + char *buf = sdsAllocPtr(entryGetField(entry)); if (entryHasValuePtr(entry)) buf -= sizeof(sds); if (entryHasExpiry(entry)) buf -= sizeof(long long); return buf; @@ -243,16 +264,17 @@ static entry *entryWrite(char *buf, buf_size -= embedded_value_sds_size; } } - /* Set the field data */ - entry *new_entry = sdswrite(buf, embedded_field_sds_size, embedded_field_sds_type, field, sdslen(field)); + /* Set the field data. When we write the field into the buffer, the entry pointer is the returned + * sds (after the sds header). */ + entry *new_entry = (entry *)sdswrite(buf, embedded_field_sds_size, embedded_field_sds_type, field, sdslen(field)); /* Field sds aux bits are zero, which we use for this entry encoding. */ - sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1); - sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0); + sdsSetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1); + sdsSetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0); /* Check that the new entry was built correctly */ - debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); - debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); + debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); + debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); return new_entry; } @@ -275,7 +297,7 @@ entry *entryCreate(const_sds field, sds value, long long expiry) { * Note that the value ownership is moved to this function and the caller should assume the * value is no longer usable after calling this function. */ entry *entryUpdate(entry *e, sds value, long long expiry) { - sds field = (sds)e; + sds field = entryGetField(e); entry *new_entry = NULL; bool update_value = value ? true : false; @@ -354,8 +376,8 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { entryFree(e); } /* Check that the new entry was built correctly */ - debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); - debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); + debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); + debugServerAssert(sdsGetAuxBit(entryGetField(new_entry), FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); serverAssert(new_entry); return new_entry; } @@ -370,7 +392,7 @@ size_t entryMemUsage(entry *entry) { * header could be too small for holding the real allocation size. */ mem += zmalloc_usable_size(entryGetAllocPtr(entry)); } else { - mem += sdsReqSize(sdslen(entry), sdsType(entry)); + mem += sdsReqSize(sdslen(entryGetField(entry)), sdsType(entryGetField(entry))); if (entryHasExpiry(entry)) mem += sizeof(long long); } mem += sdsAllocSize(entryGetValue(entry)); @@ -384,18 +406,19 @@ size_t entryMemUsage(entry *entry) { * of sds strings. * If the location of the entry changed we return the new location, * otherwise we return NULL. */ -entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) { - if (entryHasValuePtr(entry)) { - sds *value_ref = entryGetValueRef(entry); +entry *entryDefrag(entry *e, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) { + if (entryHasValuePtr(e)) { + sds *value_ref = entryGetValueRef(e); sds new_value = sdsdefragfn(*value_ref); if (new_value) *value_ref = new_value; } - char *allocation = entryGetAllocPtr(entry); + char *allocation = entryGetAllocPtr(e); char *new_allocation = defragfn(allocation); if (new_allocation != NULL) { /* Return the same offset into the new allocation as the entry's offset * in the old allocation. */ - return new_allocation + ((char *)entry - allocation); + int entry_pointer_offset = (char *)e - allocation; + return (entry *)(new_allocation + entry_pointer_offset); } return NULL; } diff --git a/src/entry.h b/src/entry.h index f23f3dfc7b..d4a24daacb 100644 --- a/src/entry.h +++ b/src/entry.h @@ -9,39 +9,11 @@ *----------------------------------------------------------------------------*/ /* - * The entry pointer is the field `sds`. We encode the entry layout type - * in the SDS header. - * - * An entry represents a key–value pair with an optional expiration timestamp. - * The pointer of type `entry *` always points to the VALUE `sds`. - * - * Layout 1: Embedded Field and Value (Compact Form) - * - * +-------------------+-------------------+-------------------+ - * | Expiration (opt) | Field (sds) | Value (sds) | - * | 8 bytes (int64_t) | "field" + header | "value" + header | - * +-------------------+-------------------+-------------------+ - * ^ - * | - * entry pointer - * - * - Both field and value are small and embedded. - * - The expiration is stored just before the first sds. - * - * - * Layout 2: Pointer-Based Value (Large Values) - * - * +-------------------+-------------------+------------------+ - * | Expiration (opt) | Value pointer | Field (sds) | - * | 8 bytes (int64_t) | 8 bytes (void *) | "field" + header | - * +-------------------+-------------------+------------------+ - * ^ - * | - * entry pointer - * - * - The value is stored separately via a pointer. - * - Used for large value sizes. */ -typedef void entry; + * An "entry" is a field/value sds pair, with an optional expiration time. The + * entry is used as part of the HASH datatype, and supports hash field expiration. + */ + +typedef struct _entry entry; /* The maximum allocation size we want to use for entries with embedded * values. */ diff --git a/src/server.h b/src/server.h index 25f01a31e4..3fb0eecc27 100644 --- a/src/server.h +++ b/src/server.h @@ -3432,8 +3432,8 @@ robj *setTypeDup(robj *o); #define HASH_SET_COPY 0 -void hashTypeFreeVolatileSet(robj *o); /* needed only for freeHashObject */ -void hashTypeTrackEntry(robj *o, void *entry); /* needed only for rdbLoadObject */ +void hashTypeFreeVolatileSet(robj *o); /* needed only for freeHashObject */ +void hashTypeTrackEntry(robj *o, entry *entry); /* needed only for rdbLoadObject */ size_t hashTypeScanDefrag(robj *ob, size_t cursor, void *(*defragAlloc)(void *)); size_t hashTypeDeleteExpiredFields(robj *o, mstime_t now, unsigned long max_fields, robj **out_fields); diff --git a/src/t_hash.c b/src/t_hash.c index f5200ea342..bc8dc9cdc1 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -52,6 +52,11 @@ typedef enum { EXPIRATION_MODIFICATION_EXPIRE_ASAP = 2, /* if apply of the expiration modification was set to a time in the past (i.e field is immediately expired) */ } expiryModificationResult; +// A vsetGetExpiryFunc +static long long entryGetExpiryVsetFunc(const void *e) { + return entryGetExpiry((const entry *)e); +} + /*----------------------------------------------------------------------------- * Hash type Expiry API *----------------------------------------------------------------------------*/ @@ -103,28 +108,28 @@ void hashTypeFreeVolatileSet(robj *o) { hashTypeIgnoreTTL(o, true); } -void hashTypeTrackEntry(robj *o, void *entry) { +void hashTypeTrackEntry(robj *o, entry *entry) { vset *set; if (hashTypeHasVolatileFields(o)) { set = hashTypeGetVolatileSet(o); } else { set = hashTypeGetOrcreateVolatileSet(o); } - bool added = vsetAddEntry(set, entryGetExpiry, entry); + bool added = vsetAddEntry(set, entryGetExpiryVsetFunc, entry); serverAssert(added); } -static void hashTypeUntrackEntry(robj *o, void *entry) { +static void hashTypeUntrackEntry(robj *o, entry *entry) { if (!entryHasExpiry(entry)) return; vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); - serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry)); + serverAssert(vsetRemoveEntry(set, entryGetExpiryVsetFunc, entry)); if (vsetIsEmpty(set)) { hashTypeFreeVolatileSet(o); } } -static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +static void hashTypeTrackUpdateEntry(robj *o, entry *old_entry, entry *new_entry, long long old_expiry, long long new_expiry) { int old_tracked = (old_entry && old_expiry != EXPIRY_NONE); int new_tracked = (new_entry && new_expiry != EXPIRY_NONE); /* If entry was not tracked before and not going to be tracked now, we can simply return */ @@ -134,14 +139,14 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, vset *set = hashTypeGetOrcreateVolatileSet(o); debugServerAssert(!old_tracked || !vsetIsEmpty(set)); - serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); + serverAssert(vsetUpdateEntry(set, entryGetExpiryVsetFunc, old_entry, new_entry, old_expiry, new_expiry) == 1); if (vsetIsEmpty(set)) { hashTypeFreeVolatileSet(o); } } -bool hashHashtableTypeValidate(hashtable *ht, void *entry) { +bool hashHashtableTypeValidate(hashtable *ht, entry *entry) { UNUSED(ht); expirationPolicy policy = getExpirationPolicyWithFlags(0); if (policy == POLICY_IGNORE_EXPIRE) return true; @@ -2156,7 +2161,7 @@ size_t hashTypeDeleteExpiredFields(robj *o, mstime_t now, unsigned long max_fiel /* skip TTL checks temporarily (to allow hashtable pops) */ hashTypeIgnoreTTL(o, true); expiryContext ctx = {.key = o, .fields = out_entries, .n_fields = 0}; - size_t expired = vsetRemoveExpired(vset, entryGetExpiry, hashTypeExpireEntry, now, max_fields, &ctx); + size_t expired = vsetRemoveExpired(vset, entryGetExpiryVsetFunc, hashTypeExpireEntry, now, max_fields, &ctx); serverAssert(ctx.n_fields <= max_fields); if (vsetIsEmpty(vset)) { hashTypeFreeVolatileSet(o);