redis原始碼分析1------dict的實現

阿新 • • 發佈：2018-11-17

1. 總體結構

redis的dict就是hash表，使用鏈式結構來解決key值衝突，典型的資料結構

結構體的定義如下：



typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;  //hash桶是一個指標陣列，裡面存放的是hash entry的指標型別，只需要（8位元組*size）個連續記憶體不需要大量的連續記憶體
    unsigned long size;  //這個是hash桶的大小
    unsigned long sizemask;  //hash桶大小-1， **用hash**/sizemask來計算桶下標
    unsigned long used; //當前這個dict一共放了多少個kv鍵值對
} dictht;
//一旦used/size >=dict_force_resize_ratio(預設值是5)，就會觸發rehash，可以理解為一個hash桶後面平均掛載的衝突佇列個數為5的時候，就會觸發rehash


typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

如下圖所示：

2. API介面分析

2.1 建立

API介面函式：

dictAdd(dict d, void key, void *val)

在d中增加一個k-v對，實現程式碼如下：

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);//呼叫了內部函式

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}



dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d); //如果正在rehash進行中，則每次操作都嘗試進行一次rehash操作

    /* Get the index of the new element, or -1 if
     * the element already exists. 獲取到hash桶的入口index*/
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently.
     （譯文：申請記憶體來儲存一個新的entry結構，插入元素到頭部，
     這裡的實現和一般的hash鏈式解決衝突的實現有點小不同，基於這樣的假定：在資料庫系統中，最近增加的entries越有可能被訪問。 
     這裡是把新插入的entry放到了連結串列頭上，可以看上面的英文解釋*/
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields.*/
    dictSetKey(d, entry, key);
    return entry;
}


/* Returns the index of a free slot that can be populated with
 * a hash entry for the given 'key'.
 * If the key already exists, -1 is returned
 * and the optional output parameter may be filled.
 *
 * Note that if we are in the process of rehashing the hash table, the
 * index is always returned in the context of the second (new) hash table. 
 
 這個原版註釋寫的很清楚，如果正在rehashing的時候，index返回的是new的hashtable*/
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    /* Expand the hash table if needed ，判斷hash桶是否需要擴大，這個地方是redis比較牛逼的地方，  
    hash桶是動態擴大的，預設初始的時候只有4，然後每次乘2的方式進行擴充套件，如果擴充套件了，就需要進行rehash*/
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    /*獲取索引的時候，如果正在rehash，需要兩個hashtable都進行查詢*/
    for (table = 0; table <= 1; table++) {
        /*這個idx就是hash桶的下標*/
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
        /*這裡是必須遍歷下衝突佇列，保證key沒有出現過*/
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        /*如果不在rehash的話，其實就沒有必要再做查詢的操作了，直接返回就好了*/
        if (!dictIsRehashing(d)) break;
    }
    return idx;
}

dictEntry dictFind(dict d, const void *key)
根據key在d中尋找值，這個邏輯和add差不多，程式碼很簡單，這裡就不做解釋了


dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    uint64_t h, idx, table;

    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d);  //和增加的時候邏輯一樣，如果正在rehashing，則進行一步rehash
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

3. rehash過程
redis對於dict支援兩種rehash的方式：按照時間，或者按照操作進行rehash。每次都調整一個key值桶內所有的衝突連結串列到新的hash表中。
rehash 程式碼如下：

static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}


/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1; //redis為了保證效能，掃描空桶，最多也是有一定的限制
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT ，這個迴圈就是開始把這個rehashidx下標的hashtable遷移到新的下標下面，注意，這裡需要重新計算key值，重新插入*/
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;//重新計算key值，重新插入
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table...，一次操作完了，可能這個hashtable已經遷移完畢，返回0，否則返回1 */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1]; //現在的0變成1
        _dictReset(&d->ht[1]);  //現在的1被reset掉
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

redis原始碼分析1------dict的實現

1. 總體結構 redis的dict就是hash表，使用鏈式結構來解決key值衝突，典型的資料結構結構體的定義如下： typedef struct dictEntry { void *key; union { void *val; uint64_t

Redis原始碼分析（dict）

一、dict 簡介 dict (dictionary 字典)，通常的儲存結構是Key-Value形式的，通過Hash函式對key求Hash值來確定Value的位置，因此也叫Hash表，是一種用來解決演算法中查詢問題的資料結構，預設的演算法複雜度接近O(1)，Redis本

Redis原始碼分析（三十四）--- redis.h服務端的實現分析（1）

上次剛剛分析過了客戶端的結構體分析，思路比較簡答，清晰，最後學習的是服務端的實現，服務端在Redis可是重中之重，裡面基本上囊括了之前模組中涉及到的所有知識點，從redis的標頭檔案就可以看出了，redis.h程式碼量就已經破1000+行了，而且都還只是一些變

redis原始碼分析與思考（十七）——有序集合型別的命令實現(t_zset.c)

有序集合是集合的延伸，它儲存著集合元素的不可重複性，但不同的是，它是有序的，它利用每一個元素的分數來作為有序集合的排序依據，現在列出有序集合的命令：有序集合命令命令對應操作時

redis原始碼分析與思考（十六）——集合型別的命令實現(t_set.c)

集合型別是用來儲存多個字串的，與列表型別不一樣，集合中不允許有重複的元素，也不能以索引的方式來通過下標獲取值，集合中的元素還是無序的。在普通的集合上增刪查改外，集合型別還實現了多個集合的取交集、並集、差集，集合的命令如下表所示：集合命

redis原始碼分析與思考（十五）——雜湊型別的命令實現(t_hash.c)

雜湊型別又叫做字典，在redis中，雜湊型別本身是一個鍵值對，而雜湊型別裡面也存貯著鍵值對，其對應關係是，每個雜湊型別的值對應著一個鍵值對或多對鍵值對，如圖所示：雜湊型別命令命令對應操

redis原始碼分析與思考（十四）——列表型別的命令實現(t_list.c)

列表型別是用來存貯多個字串物件的結構。一個列表可以存貯232-1個元素，可以對列表兩端進行插入(push)、彈出(pop)，還可以獲取指定範圍內的元素列表、獲取指定索引的元素等等，它可以靈活的充當棧和佇列的角色。下面列出列表的命令：列

redis原始碼分析與思考（十三）——字串型別的命令實現(t_string.c)

在對字串操作的命令中，主要有增加刪查該、批處理操作以及編碼的轉換命令，現在列出對字串物件操作的主要常用命令：常用命令表命令對應操作時間複雜度

redis原始碼分析與思考（十七）——有序集合型別的命令實現(t_set.c)

有序集合是集合的延伸，它儲存著集合元素的不可重複性，但不同的是，它是有序的，它利用每一個元素的分數來作為有序集合的排序依據，現在列出有序集合的命令：有序集合命令命令對應操作時間複

Redis網路庫原始碼分析(1)之介紹篇

一、前言 Redis網路庫是一個單執行緒EPOLL模型的網路庫，和Memcached使用的libevent相比，它沒有那麼龐大，程式碼一共2000多行，因此比較容易分析。其實網上已經有非常多有關這個網

Redis原始碼分析（三十五）--- redis.c服務端的實現分析（2）

在Redis服務端的程式碼量真的是比較大，如果一個一個API的學習怎麼實現，無疑是一種效率很低的做法，所以我今天對服務端的實現程式碼的學習，重在他的執行流程上，而對於他的模組設計在上一篇中我已經分析過了，不明白的同學可以接著看上篇。所以我學習分析redis服務

結合redis設計與實現的redis原始碼學習-1-記憶體分配（zmalloc）

在進入公司後的第一個任務就是使用redis的快取功能實現伺服器的雲託管功能，在瞭解了大致需求後，依靠之前對redis的瞭解封裝了常用的redis命令，並使用單例的連線池來維護與redis的連線，使用連線池來獲取redis的連線物件，依靠這些功能基本可以實現要求的

redis cluster叢集的原始碼分析(1)

對於cluster.c的原始碼分析，我將會分兩部分介紹。本文主要分析叢集通訊和通訊故障。先大致歸納下cluster的主要函式 void clusterCron(void);//叢集的定時任務 int clusterProcessPacket(clusterLink *l

Netty Pipeline原始碼分析(1)

原文連結：wangwei.one/posts/netty… 前面，我們分析了Netty EventLoop的建立與啟動原理，接下里我們來分析Netty中另外兩個重要元件—— ChannelHandler 與 Pipeline。Netty中I/O事件的傳播機制均由它負責，下面我們來看看它是如

【Android】原始碼分析 - LRUCache快取實現原理

一、Android中的快取策略一般來說，快取策略主要包含快取的新增、獲取和刪除這三類操作。如何新增和獲取快取這個比較好理解，那麼為什麼還要刪除快取呢？這是因為不管是記憶體快取還是硬碟快取，它們的快取大小都是有限的。當快取滿了之後，再想其新增快取，這個時候就需要刪除一些舊的快取

redis原始碼分析與思考（十九）——AOF持久化

為了解決持久化檔案很龐大以及會阻塞伺服器的情況，redis提出一種新的持久化方案：AOF持久化。AOF持久化是redis儲存資料的另外一種方式，全稱Append Only File，與RDB持久化不同的是，AOF持久化是隻儲存從客戶端鍵入

redis原始碼分析與思考（十八）——RDB持久化

redis是一個鍵值對的資料庫伺服器，伺服器中包含著若干個非空的資料庫，每個非空資料庫裡又包含著若干個鍵值對。因為redis是一個基於記憶體存貯的資料庫，他將自己所存的資料存於記憶體中，如果不將這些資料及時的儲存在硬碟中，當電腦關機或者進行

vue原始碼分析1-new Vue做了哪些操作

首先我們可以看到vue的原始碼在github上有，大家可以克隆下來。 git地址我們主要看src下的內容。 1.現在我們來分析下 new Vue都做了哪些操作 var app = new Vue({ el: '#app', mounted:{ console.log(t

Netty原始碼分析:1.4伺服器啟動流程

第一章節是主要是伺服器啟動的程式碼分析。章節目錄有： |———1.1初始化NioEventLoopGroup |———1.2初始化NioEventLoop |———1.3初始化NioServerSocketChannel |———1.4伺服器啟動流程為什麼先從初始化開

redis原始碼分析1------dict的實現

相關推薦