Redis系列（九）：資料結構Hash原始碼解析和HSET、HGET命令

阿新 • • 發佈：2020-07-01

2.原始碼解析

1.相關命令如下：

    {"hset",hsetCommand,,"wmF",,NULL,,,,,},

    {"hsetnx",hsetnxCommand,,"wmF",,NULL,,,,,},

    {"hget",hgetCommand,,"rF",,NULL,,,,,},

    {"hmset",hmsetCommand,-,"wm",,NULL,,,,,},

    {"hmget",hmgetCommand,-,"r",,NULL,,,,,},

    {"hincrby",hincrbyCommand,,"wmF",,NULL,,,,,},

    {"hincrbyfloat",hincrbyfloatCommand,,"wmF",,NULL,,,,,},
 

    {"hdel",hdelCommand,-,"wF",,NULL,,,,,},

    {"hlen",hlenCommand,,"rF",,NULL,,,,,},

    {"hstrlen",hstrlenCommand,,"rF",,NULL,,,,,},

    {"hkeys",hkeysCommand,,"rS",,NULL,,,,,},

    {"hvals",hvalsCommand,,"rS",,NULL,,,,,},

    {"hgetall",hgetallCommand,,"r",,NULL,,,,,},

    {"hexists",hexistsCommand,,"rF",,NULL,,,,,},
 

    {"hscan",hscanCommand,-,"rR",,NULL,,,,,},

2.ziplist資料結構

/* We use this function to receive information about a ziplist entry.

 * Note that this is not how the data is actually encoded, is just what we

 * get filled by a function in order to operate more easily. */

typedef struct zlentry {

    unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
 

    unsigned int prevrawlen;     /* Previous entry len. */

    unsigned int lensize;        /* Bytes used to encode this entry type/len.

                                    For example strings have a 1, 2 or 5 bytes

                                    header. Integers always use a single byte.*/

    unsigned int len;            /* Bytes used to represent the actual entry.

                                    For strings this is just the string length

                                    while for integers it is 1, 2, 3, 4, 8 or

                                    0 (for 4 bit immediate) depending on the

                                    number range. */

    unsigned int headersize;     /* prevrawlensize + lensize. */

    unsigned char encoding;      /* Set to ZIP_STR_* or ZIP_INT_* depending on

                                    the entry encoding. However for 4 bits

                                    immediate integers this can assume a range

                                    of values and must be range-checked. */

    unsigned char *p;            /* Pointer to the very start of the entry, that

                                    is, this points to prev-entry-len field. */

} zlentry;

3.hashtable資料結構

typedef struct dictEntry {

    void *key;

    union {

        void *val;

        uint64_t u64;

        int64_t s64;

        double d;

    } v;

    struct dictEntry *next;

} dictEntry;

typedef struct dictType {

    uint64_t (*hashFunction)(const void *key);

    void *(*keyDup)(void *privdata, const void *key);

    void *(*valDup)(void *privdata, const void *obj);

    int (*keyCompare)(void *privdata, const void *key1, const void *key2);

    void (*keyDestructor)(void *privdata, void *key);

    void (*valDestructor)(void *privdata, void *obj);

} dictType;

/* This is our hash table structure. Every dictionary has two of this as we

 * implement incremental rehashing, for the old to the new table. */

typedef struct dictht {

    dictEntry **table;

    unsigned long size;

    unsigned long sizemask;

    unsigned long used;

} dictht;

typedef struct dict {

    dictType *type;

    void *privdata;

    dictht ht[];

    long rehashidx; /* rehashing not in progress if rehashidx == -1 */

    unsigned long iterators; /* number of iterators currently running */

} dict;

hset

// t_hash.c, set key field value

void hsetCommand(client *c) {

    int update;

    robj *o;

    // 1. 查詢hash的key是否存在，不存在則新建一個，然後在其上進行資料操作

    if ((o = hashTypeLookupWriteOrCreate(c,c->argv[])) == NULL) return;

    // 2. 檢查2-3個引數是否需要將簡單版（ziplist）hash錶轉換為複雜的hash表，轉換後的表通過 o->ptr 體現

    hashTypeTryConversion(o,c->argv,,);

    // 3. 新增kv到 o 的hash表中

    update = hashTypeSet(o,c->argv[]->ptr,c->argv[]->ptr,HASH_SET_COPY);

    addReply(c, update ? shared.czero : shared.cone);

    // 變更命令傳播

    signalModifiedKey(c->db,c->argv[]);

    notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[],c->db->id);

    server.dirty++;

}

// 1. 獲取db外部的key, 即整體hash資料例項

// t_hash.c

robj *hashTypeLookupWriteOrCreate(client *c, robj *key) {

    robj *o = lookupKeyWrite(c->db,key);

    if (o == NULL) {

        // 此處建立的hashObject是以 ziplist 形式的

        o = createHashObject();

        dbAdd(c->db,key,o);

    } else {

        // 不是hash型別的鍵已存在，不可覆蓋，返回錯誤

        if (o->type != OBJ_HASH) {

            addReply(c,shared.wrongtypeerr);

            return NULL;

        }

    }

    return o;

}

// object.c, 建立hashObject, 以 ziplist 形式建立

robj *createHashObject(void) {

    unsigned char *zl = ziplistNew();

    robj *o = createObject(OBJ_HASH, zl);

    o->encoding = OBJ_ENCODING_ZIPLIST;

    return o;

}

// ziplist.c

static unsigned char *createList() {

    unsigned char *zl = ziplistNew();

    zl = ziplistPush(zl, (unsigned char*)"foo", , ZIPLIST_TAIL);

    zl = ziplistPush(zl, (unsigned char*)"quux", , ZIPLIST_TAIL);

    zl = ziplistPush(zl, (unsigned char*)"hello", , ZIPLIST_HEAD);

    zl = ziplistPush(zl, (unsigned char*)"", , ZIPLIST_TAIL);

    return zl;

}

// 2. 檢查引數，是否需要將 ziplist 形式的hash錶轉換為真正的hash表

/* Check the length of a number of objects to see if we need to convert a

 * ziplist to a real hash. Note that we only check string encoded objects

 * as their string length can be queried in constant time. */

void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {

    int i;

    if (o->encoding != OBJ_ENCODING_ZIPLIST) return;

    for (i = start; i <= end; i++) {

        // 引數大於設定的 hash_max_ziplist_value (預設: 64)時，會直接將 ziplist 轉換為 ht

        // OBJ_ENCODING_RAW, OBJ_ENCODING_EMBSTR

        // 迴圈檢查引數，只要發生了一次轉換就結束檢查（沒必要繼續了）

        if (sdsEncodedObject(argv[i]) &&

            sdslen(argv[i]->ptr) > server.hash_max_ziplist_value)

        {

            // 這個轉換過程很有意思，我們深入看看

            hashTypeConvert(o, OBJ_ENCODING_HT);

            break;

        }

    }

}

// t_hash.c, 轉換編碼方式 (如上, ziplist -> ht)

void hashTypeConvert(robj *o, int enc) {

    if (o->encoding == OBJ_ENCODING_ZIPLIST) {

        // 此處我們只處理這種情況

        hashTypeConvertZiplist(o, enc);

    } else if (o->encoding == OBJ_ENCODING_HT) {

        serverPanic("Not implemented");

    } else {

        serverPanic("Unknown hash encoding");

    }

}

// t_hash.c, 轉換編碼 ziplist 為目標 enc (實際只能是 OBJ_ENCODING_HT)

void hashTypeConvertZiplist(robj *o, int enc) {

    serverAssert(o->encoding == OBJ_ENCODING_ZIPLIST);

    if (enc == OBJ_ENCODING_ZIPLIST) {

        /* Nothing to do... */

    } else if (enc == OBJ_ENCODING_HT) {

        hashTypeIterator *hi;

        dict *dict;

        int ret;

        // 迭代器建立

        hi = hashTypeInitIterator(o);

        // 一個hash的資料結構就是一個 dict, 從這個級別來說, hash 與 db 是一個級別的

        dict = dictCreate(&hashDictType, NULL);

        // 依次迭代 o, 賦值到 hi->fptr, hi->vptr

        // 依次新增到 dict 中

        while (hashTypeNext(hi) != C_ERR) {

            sds key, value;

            // 從 hi->fptr 中獲取key

            // 從 hi->vptr 中獲取value

            key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);

            value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);

            // 新增到 dict 中

            ret = dictAdd(dict, key, value);

            if (ret != DICT_OK) {

                serverLogHexDump(LL_WARNING,"ziplist with dup elements dump",

                    o->ptr,ziplistBlobLen(o->ptr));

                serverPanic("Ziplist corruption detected");

            }

        }

        // 釋放迭代器

        hashTypeReleaseIterator(hi);

        zfree(o->ptr);

        // 將變更反映到o物件上返回

        o->encoding = OBJ_ENCODING_HT;

        o->ptr = dict;

    } else {

        serverPanic("Unknown hash encoding");

    }

}

// 2.1. 迭代ziplist元素

// t_hash.c, 迭代器

/* Move to the next entry in the hash. Return C_OK when the next entry

 * could be found and C_ERR when the iterator reaches the end. */

int hashTypeNext(hashTypeIterator *hi) {

    if (hi->encoding == OBJ_ENCODING_ZIPLIST) {

        unsigned char *zl;

        unsigned char *fptr, *vptr;

        // 每次都是基於原始字元器進行計算偏移

        // 迭代的是 fptr,vptr

        zl = hi->subject->ptr;

        fptr = hi->fptr;

        vptr = hi->vptr;

        // 第一次查詢時使用index查詢，後續則使用 fptr,vptr 進行迭代

        if (fptr == NULL) {

            /* Initialize cursor */

            serverAssert(vptr == NULL);

            fptr = ziplistIndex(zl, );

        } else {

            /* Advance cursor */

            serverAssert(vptr != NULL);

            fptr = ziplistNext(zl, vptr);

        }

        if (fptr == NULL) return C_ERR;

        /* Grab pointer to the value (fptr points to the field) */

        vptr = ziplistNext(zl, fptr);

        serverAssert(vptr != NULL);

        /* fptr, vptr now point to the first or next pair */

        hi->fptr = fptr;

        hi->vptr = vptr;

    } else if (hi->encoding == OBJ_ENCODING_HT) {

        if ((hi->de = dictNext(hi->di)) == NULL) return C_ERR;

    } else {

        serverPanic("Unknown hash encoding");

    }

    return C_OK;

}

// ziplist.c, 查詢 index 的元素

/* Returns an offset to use for iterating with ziplistNext. When the given

 * index is negative, the list is traversed back to front. When the list

 * doesn't contain an element at the provided index, NULL is returned. */

unsigned char *ziplistIndex(unsigned char *zl, int index) {

    unsigned char *p;

    unsigned int prevlensize, prevlen = ;

    if (index < ) {

        // 小於0時，反向查詢

        index = (-index)-;

        p = ZIPLIST_ENTRY_TAIL(zl);

        if (p[] != ZIP_END) {

            ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);

            while (prevlen >  && index--) {

                p -= prevlen;

                ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);

            }

        }

    } else {

        p = ZIPLIST_ENTRY_HEAD(zl);

        while (p[] != ZIP_END && index--) {

            p += zipRawEntryLength(p);

        }

    }

    // 迭代完成還沒找到元素 p[0]=ZIP_END

    // index 超出整體ziplist大小則遍歷完成後 index>0

    return (p[] == ZIP_END || index > ) ? NULL : p;

}

// ziplist.c, 由 fptr,vptr 進行迭代元素

/* Return pointer to next entry in ziplist.

 *

 * zl is the pointer to the ziplist

 * p is the pointer to the current element

 *

 * The element after 'p' is returned, otherwise NULL if we are at the end. */

unsigned char *ziplistNext(unsigned char *zl, unsigned char *p) {

    ((void) zl);

    /* "p" could be equal to ZIP_END, caused by ziplistDelete,

     * and we should return NULL. Otherwise, we should return NULL

     * when the *next* element is ZIP_END (there is no next entry). */

    if (p[] == ZIP_END) {

        return NULL;

    }

    // 當前指標偏移當前元素長度（根據ziplist協議），即到下一元素指標位置

    p += zipRawEntryLength(p);

    if (p[] == ZIP_END) {

        return NULL;

    }

    return p;

}

/* Return the total number of bytes used by the entry pointed to by 'p'. */

static unsigned int zipRawEntryLength(unsigned char *p) {

    unsigned int prevlensize, encoding, lensize, len;

    ZIP_DECODE_PREVLENSIZE(p, prevlensize);

    ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);

    return prevlensize + lensize + len;

}

// 2.2. t_hash.c, 獲取 hashTypeIterator 的具體值，寫入 vstr, vlen 中

/* Return the key or value at the current iterator position as a new

 * SDS string. */

sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what) {

    unsigned char *vstr;

    unsigned int vlen;

    long long vll;

    hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll);

    if (vstr) return sdsnewlen(vstr,vlen);

    return sdsfromlonglong(vll);

}

/* Higher level function of hashTypeCurrent*() that returns the hash value

 * at current iterator position.

 *

 * The returned element is returned by reference in either *vstr and *vlen if

 * it's returned in string form, or stored in *vll if it's returned as

 * a number.

 *

 * If *vll is populated *vstr is set to NULL, so the caller

 * can always check the function return by checking the return value

 * type checking if vstr == NULL. */

void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll) {

    if (hi->encoding == OBJ_ENCODING_ZIPLIST) {

        *vstr = NULL;

        hashTypeCurrentFromZiplist(hi, what, vstr, vlen, vll);

    } else if (hi->encoding == OBJ_ENCODING_HT) {

        sds ele = hashTypeCurrentFromHashTable(hi, what);

        *vstr = (unsigned char*) ele;

        *vlen = sdslen(ele);

    } else {

        serverPanic("Unknown hash encoding");

    }

}

// t_hash.c, 從ziplist中獲取某個 hashTypeIterator 的具體值，結果定稿 vstr, vlen

/* Get the field or value at iterator cursor, for an iterator on a hash value

 * encoded as a ziplist. Prototype is similar to `hashTypeGetFromZiplist`. */

void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,

                                unsigned char **vstr,

                                unsigned int *vlen,

                                long long *vll)

{

    int ret;

    serverAssert(hi->encoding == OBJ_ENCODING_ZIPLIST);

    // OBJ_HASH_KEY 從 fptr 中獲取, 否則從 vptr 中獲取

    if (what & OBJ_HASH_KEY) {

        ret = ziplistGet(hi->fptr, vstr, vlen, vll);

        serverAssert(ret);

    } else {

        ret = ziplistGet(hi->vptr, vstr, vlen, vll);

        serverAssert(ret);

    }

}

// ziplist.c,

/* Get entry pointed to by 'p' and store in either '*sstr' or 'sval' depending

 * on the encoding of the entry. '*sstr' is always set to NULL to be able

 * to find out whether the string pointer or the integer value was set.

 * Return 0 if 'p' points to the end of the ziplist, 1 otherwise. */

unsigned int ziplistGet(unsigned char *p, unsigned char **sstr, unsigned int *slen, long long *sval) {

    zlentry entry;

    if (p == NULL || p[] == ZIP_END) return ;

    if (sstr) *sstr = NULL;

    // 按照ziplist的編碼協議, 獲取頭部資訊

    zipEntry(p, &entry);

    if (ZIP_IS_STR(entry.encoding)) {

        if (sstr) {

            *slen = entry.len;

            *sstr = p+entry.headersize;

        }

    } else {

        if (sval) {

            *sval = zipLoadInteger(p+entry.headersize,entry.encoding);

        }

    }

    return ;

}

// ziplist.c, 解析原始字串為 zlentry

/* Return a struct with all information about an entry. */

static void zipEntry(unsigned char *p, zlentry *e) {

    // 按照ziplist的編碼協議,依次讀取 prevrawlensize, prevrawlen

    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);

    // 指向下一位置偏移，按照ziplist的編碼協議,依次讀取 encoding, lensize, len

    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);

    // 除去header得到 body偏移

    e->headersize = e->prevrawlensize + e->lensize;

    e->p = p;

}

header

// ziplist.c

/* Decode the length of the previous element, from the perspective of the entry

 * pointed to by 'ptr'. */

#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \

    // 解析第1個字元為 prevlensize

    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \

    if ((prevlensize) == ) {                                                  \

        (prevlen) = (ptr)[];                                                  \

    } else if ((prevlensize) == ) {                                           \

        assert(sizeof((prevlensize)) == );                                    \

        // 當ptr[0]>254時，代表內容有點大，需要使用 5個字元儲存上一字元長度

        memcpy(&(prevlen), ((char*)(ptr)) + , );                             \

        memrev32ifbe(&prevlen);                                                \

    }                                                                          \

} while();

/* Decode the number of bytes required to store the length of the previous

 * element, from the perspective of the entry pointed to by 'ptr'. */

#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \

    if ((ptr)[] < ZIP_BIGLEN) {                                               \

        (prevlensize) = ;                                                     \

    } else {                                                                   \

        (prevlensize) = ;                                                     \

    }                                                                          \

} while();

/* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the

 * entries encoding, the 'lensize' variable will hold the number of bytes

 * required to encode the entries length, and the 'len' variable will hold the

 * entries length. */

#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do {                    \

    // 解析第1個字元為 編碼格式 &ZIP_STR_MASK=0xc0

    ZIP_ENTRY_ENCODING((ptr), (encoding));                                     \

    if ((encoding) < ZIP_STR_MASK) {                                           \

        // 0 << 6 =0

        // 具體解析如下程式碼,

        if ((encoding) == ZIP_STR_06B) {                                       \

            (lensize) = ;                                                     \

            (len) = (ptr)[] & 0x3f;                                           \

        }

        // 1 << 6 =64

        else if ((encoding) == ZIP_STR_14B) {                                  \

            (lensize) = ;                                                     \

            (len) = (((ptr)[] & 0x3f) << ) | (ptr)[];                       \

        }

        // 2 << 6 =128

        else if (encoding == ZIP_STR_32B) {                                    \

            (lensize) = ;                                                     \

            (len) = ((ptr)[] << ) |                                         \

                    ((ptr)[] << ) |                                         \

                    ((ptr)[] <<  ) |                                         \

                    ((ptr)[]);                                                \

        } else {                                                               \

            assert(NULL);                                                      \

        }                                                                      \

    } else {                                                                   \

        // 超過 0xc0 的長度了，直接使用 1,2,3,4 表示len

        (lensize) = ;                                                         \

        (len) = zipIntSize(encoding);                                          \

    }                                                                          \

} while();

/* Extract the encoding from the byte pointed by 'ptr' and set it into

 * 'encoding'. */

#define ZIP_ENTRY_ENCODING(ptr, encoding) do {  \

    (encoding) = (ptr[]); \

    if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \

} while()

/* Different encoding/length possibilities */

#define ZIP_STR_MASK 0xc0

#define ZIP_INT_MASK 0x30

#define ZIP_STR_06B (0 << 6)        // 0x00

#define ZIP_STR_14B (1 << 6)        // 0x40

#define ZIP_STR_32B (2 << 6)        // 0x80

#define ZIP_INT_16B (0xc0 | 0<<4)    // 0xc0

#define ZIP_INT_32B (0xc0 | 1<<4)    // 0xd0

#define ZIP_INT_64B (0xc0 | 2<<4)    // 0xe0

#define ZIP_INT_24B (0xc0 | 3<<4)    // 0xf0

#define ZIP_INT_8B 0xfe                // 0xfe

新增kv到對應的key例項中：

// 3. 新增kv到 hash表中, 稍微複雜

// t_hash.c, 做變更到hash表中

int hashTypeSet(robj *o, sds field, sds value, int flags) {

    int update = ;

    // 針對ziplist 的新增, 與 ht 編碼的新增, 自然是分別處理

    if (o->encoding == OBJ_ENCODING_ZIPLIST) {

        unsigned char *zl, *fptr, *vptr;

        zl = o->ptr;

        // 找到ziplist 的頭節點指標

        fptr = ziplistIndex(zl, ZIPLIST_HEAD);

        if (fptr != NULL) {

            // 嘗試查詢該 field 對應的元素(從1開始)，如果找到則先刪除原值，然後統一新增

            fptr = ziplistFind(fptr, (unsigned char*)field, sdslen(field), );

            if (fptr != NULL) {

                /* Grab pointer to the value (fptr points to the field) */

                // value 不可以為null, 否則 ziplist 將無法工作

                vptr = ziplistNext(zl, fptr);

                serverAssert(vptr != NULL);

                update = ;

                /* Delete value */

                // 先刪除舊的 value, 再以插入的形式更新, 後續講刪除時再詳解

                zl = ziplistDelete(zl, &vptr);

                /* Insert new value */

                // 重點，將value新增到 ziplist 中

                zl = ziplistInsert(zl, vptr, (unsigned char*)value,

                        sdslen(value));

            }

        }

        // 沒有找到對應元素，則直接將元素新增到尾部即可

        if (!update) {

            /* Push new field/value pair onto the tail of the ziplist */

            zl = ziplistPush(zl, (unsigned char*)field, sdslen(field),

                    ZIPLIST_TAIL);

            zl = ziplistPush(zl, (unsigned char*)value, sdslen(value),

                    ZIPLIST_TAIL);

        }

        o->ptr = zl;

        /* Check if the ziplist needs to be converted to a hash table */

        // 大於設定的閥值後，轉換ziplist為ht(預設: 512)

        if (hashTypeLength(o) > server.hash_max_ziplist_entries)

            hashTypeConvert(o, OBJ_ENCODING_HT);

    } else if (o->encoding == OBJ_ENCODING_HT) {

        dictEntry *de = dictFind(o->ptr,field);

        if (de) {

            sdsfree(dictGetVal(de));

            if (flags & HASH_SET_TAKE_VALUE) {

                dictGetVal(de) = value;

                value = NULL;

            } else {

                dictGetVal(de) = sdsdup(value);

            }

            update = ;

        } else {

            sds f,v;

            if (flags & HASH_SET_TAKE_FIELD) {

                f = field;

                field = NULL;

            } else {

                f = sdsdup(field);

            }

            if (flags & HASH_SET_TAKE_VALUE) {

                v = value;

                value = NULL;

            } else {

                v = sdsdup(value);

            }

            dictAdd(o->ptr,f,v);

        }

    } else {

        serverPanic("Unknown hash encoding");

    }

    /* Free SDS strings we did not referenced elsewhere if the flags

     * want this function to be responsible. */

    if (flags & HASH_SET_TAKE_FIELD && field) sdsfree(field);

    if (flags & HASH_SET_TAKE_VALUE && value) sdsfree(value);

    return update;

}

// 3.1. 使用ziplist進行儲存 field -> value

// ziplist.c, 查詢某個 field 是否存在於ziplist中

/* Find pointer to the entry equal to the specified entry. Skip 'skip' entries

 * between every comparison. Returns NULL when the field could not be found. */

unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip) {

    int skipcnt = ;

    unsigned char vencoding = ;

    long long vll = ;

    while (p[] != ZIP_END) {

        unsigned int prevlensize, encoding, lensize, len;

        unsigned char *q;

        // 解析整個字串p的 prevlensize,encoding,lensize,len

        ZIP_DECODE_PREVLENSIZE(p, prevlensize);

        ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);

        q = p + prevlensize + lensize;

        // 傳入1, 代表要跳過一個元素, 比如: 查詢key時,跳過1個v,然後繼續迭代

        // 跳過了n個元素後，再從此開始key的比對過程

        if (skipcnt == ) {

            /* Compare current entry with specified entry */

            // 針對不同的編碼使用不同的比較方式

            if (ZIP_IS_STR(encoding)) {

                // 找到相應的元素，直接返回 p 指標

                if (len == vlen && memcmp(q, vstr, vlen) == ) {

                    return p;

                }

            } else {

                /* Find out if the searched field can be encoded. Note that

                 * we do it only the first time, once done vencoding is set

                 * to non-zero and vll is set to the integer value. */

                if (vencoding == ) {

                    if (!zipTryEncoding(vstr, vlen, &vll, &vencoding)) {

                        /* If the entry can't be encoded we set it to

                         * UCHAR_MAX so that we don't retry again the next

                         * time. */

                        vencoding = UCHAR_MAX;

                    }

                    /* Must be non-zero by now */

                    assert(vencoding);

                }

                /* Compare current entry with specified entry, do it only

                 * if vencoding != UCHAR_MAX because if there is no encoding

                 * possible for the field it can't be a valid integer. */

                if (vencoding != UCHAR_MAX) {

                    long long ll = zipLoadInteger(q, encoding);

                    if (ll == vll) {

                        return p;

                    }

                }

            }

            /* Reset skip count */

            // 查詢一次，跳過skip次

            skipcnt = skip;

        } else {

            /* Skip entry */

            skipcnt--;

        }

        /* Move to next entry */

        p = q + len;

    }

    return NULL;

}

// ziplist.c, 新增value到ziplist中

// zl:ziplist例項, p:要插入的key字串, s:要插入的value字串, len:要插入的value的長度

/* Insert an entry at "p". */

unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {

    return __ziplistInsert(zl,p,s,slen);

}

/* Insert item at "p". */

static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {

    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;

    unsigned int prevlensize, prevlen = ;

    size_t offset;

    int nextdiff = ;

    unsigned char encoding = ;

    long long value = ; /* initialized to avoid warning. Using a value

                                    that is easy to see if for some reason

                                    we use it uninitialized. */

    zlentry tail;

    /* Find out prevlen for the entry that is inserted. */

    if (p[] != ZIP_END) {

        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);

    } else {

        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);

        if (ptail[] != ZIP_END) {

            prevlen = zipRawEntryLength(ptail);

        }

    }

    /* See if the entry can be encoded */

    if (zipTryEncoding(s,slen,&value,&encoding)) {

        /* 'encoding' is set to the appropriate integer encoding */

        reqlen = zipIntSize(encoding);

    } else {

        /* 'encoding' is untouched, however zipEncodeLength will use the

         * string length to figure out how to encode it. */

        reqlen = slen;

    }

    /* We need space for both the length of the previous entry and

     * the length of the payload. */

    // 加上prevlen,encoding,slen 的長度，以計算value的存放位置

    reqlen += zipPrevEncodeLength(NULL,prevlen);

    reqlen += zipEncodeLength(NULL,encoding,slen);

    /* When the insert position is not equal to the tail, we need to

     * make sure that the next entry can hold this entry's length in

     * its prevlen field. */

    nextdiff = (p[] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : ;

    /* Store offset because a realloc may change the address of zl. */

    // 儲存當前偏移位置，以便在擴容之後，還能找到相應位置

    // p = p -zl + zl

    offset = p-zl;

    zl = ziplistResize(zl,curlen+reqlen+nextdiff);

    p = zl+offset;

    /* Apply memory move when necessary and update tail offset. */

    if (p[] != ZIP_END) {

        /* Subtract one because of the ZIP_END bytes */

        // 字元拷貝

        memmove(p+reqlen,p-nextdiff,curlen-offset-+nextdiff);

        /* Encode this entry's raw length in the next entry. */

        zipPrevEncodeLength(p+reqlen,reqlen);

        /* Update offset for tail */

        ZIPLIST_TAIL_OFFSET(zl) =

            intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take

         * "nextdiff" in account as well. Otherwise, a change in the

         * size of prevlen doesn't have an effect on the *tail* offset. */

        zipEntry(p+reqlen, &tail);

        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {

            ZIPLIST_TAIL_OFFSET(zl) =

                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);

        }

    } else {

        /* This element will be the new tail. */

        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);

    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so

     * we need to cascade the update throughout the ziplist */

    if (nextdiff != ) {

        // 如果本次更新後資料位置變化，則需要更新後續的元素位置

        offset = p-zl;

        zl = __ziplistCascadeUpdate(zl,p+reqlen);

        p = zl+offset;

    }

    /* Write the entry */

    // 將 value 寫入 p 中, 即寫入了 ziplist 中

    p += zipPrevEncodeLength(p,prevlen);

    p += zipEncodeLength(p,encoding,slen);

    if (ZIP_IS_STR(encoding)) {

        memcpy(p,s,slen);

    } else {

        zipSaveInteger(p,value,encoding);

    }

    ZIPLIST_INCR_LENGTH(zl,);

    return zl;

}

// 另外，如果沒有舊的元素值時，直接在hash表的末尾新增對應的field->value 即可

// ziplist.c, 在尾部進行新增元素，沒有許多的情況要考慮，但是程式碼完全複用 __ziplistInsert()

unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {

    unsigned char *p;

    p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);

    return __ziplistInsert(zl,p,s,slen);

}

深入理解ziplist

　看起來沒ziplist好像沒那麼簡單呢，為啥還要搞這麼複雜呢？其實以上程式碼，僅是在人看來複雜，對機器來說就是更多的移位計算操作，多消耗點cpu就換來了空間上的節省，是可以的。軟體本身的複雜性帶來了效益，是軟體的價值體現，所以，並非所有的東西都是簡單即美。

　　接下來，我們來看一下使用 HT 的編碼又如何儲存field->value呢？

// 3.2. OBJ_ENCODING_HT 的 field -> value 的新增

    if (o->encoding == OBJ_ENCODING_HT) {

        // hash 表中查詢對應的 field

        dictEntry *de = dictFind(o->ptr,field);

        if (de) {

            sdsfree(dictGetVal(de));

            // hset 時使用 HASH_SET_COPY, 所以直接使用 sdsdup() 即可

            if (flags & HASH_SET_TAKE_VALUE) {

                dictGetVal(de) = value;

                value = NULL;

            } else {

                dictGetVal(de) = sdsdup(value);

            }

            update = ;

        } else {

            // 新增 field -> value

            sds f,v;

            if (flags & HASH_SET_TAKE_FIELD) {

                f = field;

                field = NULL;

            } else {

                f = sdsdup(field);

            }

            if (flags & HASH_SET_TAKE_VALUE) {

                v = value;

                value = NULL;

            } else {

                v = sdsdup(value);

            }

            // 新增到 hash 表中，前些篇章講解過，大概就是計算hash，放入v的過程

            dictAdd(o->ptr,f,v);

        }

    }

如此看來，OBJ_ENCODING_HT 的實現反而簡單了哦。

總結下 hash的插入過程，hash 初始建立時都是使用ziplist 進行容納元素的，在特定情況下會觸發 ziplist 為 ht 的編碼方式, 比如:

　　　　1. hset時自身的引數大於設定值(預設: 64)時直接轉換 ziplist -> ht;

　　　　2. hash表的元素數量大於設定值(預設: 512)時轉換 ziplist -> ht;

　　這麼設計的原因是，元素較少且佔用空間較小時，使用ziplist會節省空間，且時間消耗與hash表相關並不大，所以 ziplist 是優先的選擇了。但是大量資料還是必須要使用hash表儲存的。

Redis系列（九）：資料結構Hash原始碼解析和HSET、HGET命令

2.原始碼解析 1.相關命令如下： {\"hset\",hsetCommand,,\"wmF\",,NULL,,,,,}, {\"hsetnx\",hsetnxCommand,,\"wmF\",,NULL,,,,,},

Redis系列（十）：資料結構Set原始碼解析和SADD、SINTER、SDIFF、SUNION、SPOP命令

1.介紹 Hash是以K->V形式儲存，而Set則是K儲存,空間節省了很多 Redis中Set是String型別的無序集合；集合成員是唯一的。

Redis系列（九）：資料結構Hash之HDEL、HEXISTS、HGETALL、HKEYS、HLEN、HVALS命令

1.HDEL 從 key 指定的雜湊集中移除指定的域。在雜湊集中不存在的域將被忽略。

Redis系列（六）：資料結構List雙向連結串列LPUSH、LPOP、RPUSH、RPOP、LLEN命令

1.介紹 redis中的list既實現了棧（先進後出）又實現了佇列（先進先出） 1.示意圖

Redis系列（六）：資料結構QuickList（快速列表）原始碼解析

1.介紹 Redis在3.2版本之前List的底層編碼是ZipList和LinkedList實現的在3.2版本之後，重新引入了QuickList的資料結構，列表的底層都是QuickList實現

Redis系列（八）：資料結構List雙向連結串列中阻塞版本之BLPOP、BRPOP和LINDEX、LINSERT、LRANGE命令詳解

1.BRPOP、BLPOP BLPOP： BLPOP是阻塞式列表的彈出原語。它是命令LPOP的阻塞版本，這是因為當給定列表內沒有任何元素可供彈出的時候，

Quartz.Net系列（九）：Trigger之DailyTimeIntervalScheduleBuilder詳解

1.介紹中文意義就是每日時間間隔計劃生成 2.API講解 (1)WithInterval、WithIntervalInHours、WithIntervalInMinutes、WithIntervalInSeconds

Quartz.Net系列（九）：Trigger之CronScheduleBuilder和Cron表示式詳解

1.使用 var scheduler =await StdSchedulerFactory.GetDefaultScheduler(); await scheduler.Start(); var job = JobBuilder.Create<FirstJob>().Build();

從0到1使用Kubernetes系列（六）：資料持久化實戰

上一篇介紹了 Kubernetes 排程器如何進行資源排程，本文將為大家介紹幾種常用儲存型別：secret、configMap、emptyDir、hostPath、nfs、persistentVolumeClaim。

Spark原始碼系列（九）Spark SQL初體驗之解析過程詳解

好久沒更新部落格了，之前學了一些R語言和機器學習的內容，做了一些筆記，之後也會放到部落格上面來給大家共享。一個月前就打算更新Spark Sql的內容了，因為一些別的事情耽誤了，今天就簡單寫點，Spark1.2馬上就要出

Redis系列（十二）：資料結構SortedSet跳躍表中基本操作命令和原始碼解析

1.SkipList Redis的sortedSet資料結構是有序不重複的（索引為唯一的，資料(score)卻可以重複），

C#資料結構與算法系列（十）：中綴表示式轉字尾表示式

1.具體步驟 1）初始化兩個棧：運算子棧s1和儲存中間結果的棧s2；2）從左至右掃描中綴表示式；3）遇到運算元時，將其壓s2；4）遇到運算子時，比較其與s1棧頂運算子的優先順序：（1）如果s1為空，或棧頂運算子為左括號

儲存引擎系列（二）：資料庫索引底層資料結構 —— B+ 樹

索引原理只要是稍微瞭解 MySQL 資料庫的同學都應該知道，合理設定索引欄位可以有效提高資料庫的查詢效能，資料庫索引也是底層的儲存引擎維護的，那麼為什麼設定索引可以提升資料庫查詢效能？MySQL 資料庫底層又是如

SpringBoot + Vue + ElementUI 實現後臺管理系統模板 -- 後端篇（五）：資料表設計、使用 jwt、redis、sms 工具類完善註冊登入邏輯

（1）相關博文地址： SpringBoot + Vue + ElementUI 實現後臺管理系統模板 -- 前端篇（一）：搭建基本環境：https://www.cnblogs.com/l-y-h/p/12930895.html

Hadoop基礎（二十九）：資料清洗（ETL）（二）複雜解析版

資料清洗案例實操-複雜解析版 1．需求對Web訪問日誌中的各欄位識別切分，去除日誌中不合法的記錄。根據清洗規則，輸出過濾後的資料。

Docker 系列（四）：Docker 容器資料卷簡單使用

開始之前如果你有一些需要持續更新的資料並且希望持久化資料，或者需要在不同的容器之間共享資料，再者需要主機與容器之間共享資料，那麼你可以使用資料捲來滿足這些需求。

儲存引擎系列（四）：不同型別的查詢語句如何設定索引（上）—— 資料表初始化

B+ 索引樹回顧上篇教程學院君給大家介紹了不同型別的資料庫索引對應的 B+ 樹是如何維護的，這其實是對資料庫表記錄進行更新時底層所做的（插入、修改、刪除）事情，我們來簡單回顧下 B+ 索引樹：

Pandas系列教程（2）Pandas資料結構

Pandas資料結構 DataFrame: 二維陣列，整個表格，多行多列 Series: 一維資料，一行或一列

Prometheus環境搭建系列（三）：監控redis伺服器（redis_exporter）

redis叢集環境搭建：https://www.cnblogs.com/uncleyong/p/13196936.html 在需要監控的redis上安裝 node_exporter和redis_exporter

Java SE基礎鞏固（九）：註解

官方檔案是這麼描述註解的： Annotations,a form of metadata,provide data about a program that is not part of the program itself. Annotations have no direct effect on the operation of the code they anno

Redis系列（九）：資料結構Hash原始碼解析和HSET、HGET命令

相關推薦