MySQL InnoDB Engine--自適應雜湊索引程式碼瞎猜02

阿新 • • 發佈：2021-07-09

自適應雜湊索引資料結構

對B+樹的資料查詢，需要遍歷從根節點到葉子節點之間的每一層的節點，因此B+樹的樹高過高時，會影響B+樹的查詢效率。

Innodb儲存引擎根據查詢模式，對活躍的資料頁中的記錄進行雜湊索引，以實現快速查詢，解決B+樹樹高問題。

為標識查詢模式和熱點頁，需要在索引和資料頁上有相應的資料結構來存放訪問資訊。

每個索引物件有一個btr_search_t物件來保持索引的訪問模式和相關資訊。
每個資料頁物件有一個buf_block_t物件來保持資料頁的訪問模式和相關資訊。

btr_search_t物件

btr_search_t物件來保持索引的訪問模式以及相關AHI資訊：

hash_analysis：通過索引訪問的次數，如果通過索引訪問次數超過BTR_SEARCH_HASH_ANALYSIS(預設17)次數時，則開始對索引上的訪問模式進行統計。

n_hash_potential：按照連續查詢模式成功次數，當成功次數超過BTR_SEARCH_BUILD_LIMIT(預設100)次數時，則開始對索引中記錄建立HASH索引。
n_fields：自適應雜湊索引字首列的數量
n_bytes：自適應雜湊索引的位元組數(非完整列)
left_side：訪問資料方向，用於判斷對重複列進行查詢的方向(向左還是向右

struct btr_search_t{
    ulint    ref_count;    /*!< Number of blocks in this index tree
                that have search index built
                i.e. block->index points to this index.
                Protected by search latch except
                when during initialization in
                btr_search_info_create().  
*/

    /* @{ The following fields are not protected by any latch.
    Unfortunately, this means that they must be aligned to
    the machine word, i.e., they cannot be turned into bit-fields. */
    buf_block_t* root_guess;/*!< the root page frame when it was last time
                fetched, or NULL  
*/
    ulint    withdraw_clock;    /*!< the withdraw clock value of the buffer
                pool when root_guess was stored */
    ulint    hash_analysis;    /*!< when this exceeds
                BTR_SEARCH_HASH_ANALYSIS, the hash
                analysis starts; this is reset if no
                success noticed */
    ibool    last_hash_succ;    /*!< TRUE if the last search would have
                succeeded, or did succeed, using the hash
                index; NOTE that the value here is not exact:
                it is not calculated for every search, and the
                calculation itself is not always accurate! */
    ulint    n_hash_potential;
                /*!< number of consecutive searches
                which would have succeeded, or did succeed,
                using the hash index;
                the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
    /* @} */
    /*---------------------- @{ */
    ulint    n_fields;    /*!< recommended prefix length for hash search:
                number of full fields */
    ulint    n_bytes;    /*!< recommended prefix: number of bytes in
                an incomplete field
                @see BTR_PAGE_MAX_REC_SIZE */
    ibool    left_side;    /*!< TRUE or FALSE, depending on whether
                the leftmost record of several records with
                the same prefix should be indexed in the
                hash index */
    /*---------------------- @} */
#ifdef UNIV_SEARCH_PERF_STAT
    ulint    n_hash_succ;    /*!< number of successful hash searches thus
                far */
    ulint    n_hash_fail;    /*!< number of failed hash searches */
    ulint    n_patt_succ;    /*!< number of successful pattern searches thus
                far */
    ulint    n_searches;    /*!< number of searches */
#endif /* UNIV_SEARCH_PERF_STAT */
#ifdef UNIV_DEBUG
    ulint    magic_n;    /*!< magic number @see BTR_SEARCH_MAGIC_N */
/** value of btr_search_t::magic_n, used in assertions */
# define BTR_SEARCH_MAGIC_N    1112765
#endif /* UNIV_DEBUG */
};

buf_block_t 物件

每個資料頁有一個buf_block_t物件用來存放頁的訪問資訊，判斷該資料頁是否需要對該頁記錄建立自適應雜湊索引。

n_hash_helps：用來控制是否建立雜湊索引的計數器
n_fields：自適應雜湊索引字首列的數量
n_bytes：自適應雜湊索引的位元組數(非完整列)

/** The buffer control block structure */

struct buf_block_t{

    /** @name General fields */
    /* @{ */

    buf_page_t    page;        /*!< page information; this must
                    be the first field, so that
                    buf_pool->page_hash can point
                    to buf_page_t or buf_block_t */
    byte*        frame;        /*!< pointer to buffer frame which
                    is of size UNIV_PAGE_SIZE, and
                    aligned to an address divisible by
                    UNIV_PAGE_SIZE */
#ifndef UNIV_HOTBACKUP
    BPageLock    lock;        /*!< read-write lock of the buffer
                    frame */
    UT_LIST_NODE_T(buf_block_t) unzip_LRU;
                    /*!< node of the decompressed LRU list;
                    a block is in the unzip_LRU list
                    if page.state == BUF_BLOCK_FILE_PAGE
                    and page.zip.data != NULL */
#ifdef UNIV_DEBUG
    ibool        in_unzip_LRU_list;/*!< TRUE if the page is in the
                    decompressed LRU list;
                    used in debugging */
    ibool        in_withdraw_list;
#endif /* UNIV_DEBUG */
    unsigned    lock_hash_val:32;/*!< hashed value of the page address
                    in the record lock hash table;
                    protected by buf_block_t::lock
                    (or buf_block_t::mutex, buf_pool->mutex
                        in buf_page_get_gen(),
                    buf_page_init_for_read()
                    and buf_page_create()) */
    /* @} */
    /** @name Optimistic search field */
    /* @{ */

    ib_uint64_t    modify_clock;    /*!< this clock is incremented every
                    time a pointer to a record on the
                    page may become obsolete; this is
                    used in the optimistic cursor
                    positioning: if the modify clock has
                    not changed, we know that the pointer
                    is still valid; this field may be
                    changed if the thread (1) owns the
                    pool mutex and the page is not
                    bufferfixed, or (2) the thread has an
                    x-latch on the block */
    /* @} */
    /** @name Hash search fields (unprotected)
    NOTE that these fields are NOT protected by any semaphore! */
    /* @{ */

    ulint        n_hash_helps;    /*!< counter which controls building
                    of a new hash index for the page */
    volatile ulint    n_bytes;    /*!< recommended prefix length for hash
                    search: number of bytes in
                    an incomplete last field */
    volatile ulint    n_fields;    /*!< recommended prefix length for hash
                    search: number of full fields */
    volatile bool    left_side;    /*!< true or false, depending on
                    whether the leftmost record of several
                    records with the same prefix should be
                    indexed in the hash index */
    /* @} */

    /** @name Hash search fields
    These 5 fields may only be modified when:
    we are holding the appropriate x-latch in btr_search_latches[], and
    one of the following holds:
    (1) the block state is BUF_BLOCK_FILE_PAGE, and
    we are holding an s-latch or x-latch on buf_block_t::lock, or
    (2) buf_block_t::buf_fix_count == 0, or
    (3) the block state is BUF_BLOCK_REMOVE_HASH.

    An exception to this is when we init or create a page
    in the buffer pool in buf0buf.cc.

    Another exception for buf_pool_clear_hash_index() is that
    assigning block->index = NULL (and block->n_pointers = 0)
    is allowed whenever btr_search_own_all(RW_LOCK_X).

    Another exception is that ha_insert_for_fold_func() may
    decrement n_pointers without holding the appropriate latch
    in btr_search_latches[]. Thus, n_pointers must be
    protected by atomic memory access.

    This implies that the fields may be read without race
    condition whenever any of the following hold:
    - the btr_search_latches[] s-latch or x-latch is being held, or
    - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
    and holding some latch prevents the state from changing to that.

    Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
    is prone to race conditions while buf_pool_clear_hash_index() is
    executing (the adaptive hash index is being disabled). Such use
    is explicitly commented. */

    /* @{ */

#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
    ulint        n_pointers;    /*!< used in debugging: the number of
                    pointers in the adaptive hash index
                    pointing to this frame;
                    protected by atomic memory access
                    or btr_search_own_all(). */
# define assert_block_ahi_empty(block)                    \
    ut_a(os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0)
# define assert_block_ahi_empty_on_init(block) do {            \
    UNIV_MEM_VALID(&(block)->n_pointers, sizeof (block)->n_pointers); \
    assert_block_ahi_empty(block);                    \
} while (0)
# define assert_block_ahi_valid(block)                    \
    ut_a((block)->index                        \
         || os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
# define assert_block_ahi_empty(block) /* nothing */
# define assert_block_ahi_empty_on_init(block) /* nothing */
# define assert_block_ahi_valid(block) /* nothing */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
    unsigned    curr_n_fields:10;/*!< prefix length for hash indexing:
                    number of full fields */
    unsigned    curr_n_bytes:15;/*!< number of bytes in hash
                    indexing */
    unsigned    curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
    dict_index_t*    index;        /*!< Index for which the
                    adaptive hash index has been
                    created, or NULL if the page
                    does not exist in the
                    index. Note that it does not
                    guarantee that the index is
                    complete, though: there may
                    have been hash collisions,
                    record deletions, etc. */
    /* @} */
    bool        made_dirty_with_no_latch;
                    /*!< true if block has been made dirty
                    without acquiring X/SX latch as the
                    block belongs to temporary tablespace
                    and block is always accessed by a
                    single thread. */
    bool        skip_flush_check;
                    /*!< Skip check in buf_dblwr_check_block
                    during bulk load, protected by lock.*/
# ifdef UNIV_DEBUG
    /** @name Debug fields */
    /* @{ */
    rw_lock_t    debug_latch;    /*!< in the debug version, each thread
                    which bufferfixes the block acquires
                    an s-latch here; so we can use the
                    debug utilities in sync0rw */
    /* @} */
# endif
    BPageMutex    mutex;        /*!< mutex protecting this block:
                    state (also protected by the buffer
                    pool mutex), io_fix, buf_fix_count,
                    and accessed; we introduce this new
                    mutex in InnoDB-5.1 to relieve
                    contention on the buffer pool mutex */
#endif /* !UNIV_HOTBACKUP */
};

判斷索引物件是否滿足自適應雜湊索引

在判斷是否需要對索引物件建立自適應雜湊索引前，需要先判斷索引是否被訪問17次，btr_search_t物件中hash_analysis統計索引使用次數


/** Updates the search info. */
UNIV_INLINE
void btr_search_info_update(
    dict_index_t *index, /*!< in: index of the cursor */
    btr_cur_t *cursor)   /*!< in: cursor which was just positioned */
{
  ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
  ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));

  if (dict_index_is_spatial(index) || !btr_search_enabled) {
    return;
  }

  btr_search_t *info;
  info = btr_search_get_info(index);

  info->hash_analysis++;

  if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
    /* Do nothing */

    return;
  }

  ut_ad(cursor->flag != BTR_CUR_HASH);

  btr_search_info_update_slow(info, cursor);
}

BTR_SEARCH_HASH_ANALYSIS被硬編碼在程式碼中：

/** After change in n_fields or n_bytes in info, this many rounds are waited
before starting the hash analysis again: this is to save CPU time when there
is no hope in building a hash index. */
#define BTR_SEARCH_HASH_ANALYSIS 17

判斷資料頁物件是否滿足自適應雜湊索引

當索引被訪問17次後，則開始對索引上的資料頁進行訪問模式統計，當滿足下列條件時則對相應的資料頁建立自適應雜湊索引：

索引上按照相同查詢模式連續訪問100次
資料頁上按照相同查詢模式訪問超過"當前資料頁記錄數的1/16" 次

/** The global limit for consecutive potentially successful hash searches,
before hash index building is started */
#define BTR_SEARCH_BUILD_LIMIT 100

/** If the number of records on the page divided by this parameter
would have been successfully accessed using a hash index, the index
is then built on the page, assuming the global limit has been reached */
#define BTR_SEARCH_PAGE_BUILD_LIMIT 16

/** Update the block search info on hash successes. NOTE that info and
block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any
semaphore, to save CPU time! Do not assume the fields are consistent.
@return true if building a (new) hash index on the block is recommended
@param[in,out]    info    search info
@param[in,out]    block    buffer block
@param[in]    cursor    cursor */
static ibool btr_search_update_block_hash_info(btr_search_t *info,
                                               buf_block_t *block,
                                               const btr_cur_t *cursor) {
  ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_S));
  ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X));
  ut_ad(rw_lock_own(&block->lock, RW_LOCK_S) ||
        rw_lock_own(&block->lock, RW_LOCK_X));

  info->last_hash_succ = FALSE;

  ut_a(buf_block_state_valid(block));
  ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);

  if ((block->n_hash_helps > 0) && (info->n_hash_potential > 0) &&
      (block->n_fields == info->n_fields) &&
      (block->n_bytes == info->n_bytes) &&
      (block->left_side == !!info->left_side)) {
    if ((block->index) && (block->curr_n_fields == info->n_fields) &&
        (block->curr_n_bytes == info->n_bytes) &&
        (block->curr_left_side == info->left_side)) {
      /* The search would presumably have succeeded using
      the hash index */

      info->last_hash_succ = TRUE;
    }

    block->n_hash_helps++;
  } else {
    block->n_hash_helps = 1;
    block->n_fields = info->n_fields;
    block->n_bytes = info->n_bytes;
    block->left_side = info->left_side;
  }

#ifdef UNIV_DEBUG
  if (cursor->index->table->does_not_fit_in_memory) {
    block->n_hash_helps = 0;
  }
#endif /* UNIV_DEBUG */

  if ((block->n_hash_helps >
       page_get_n_recs(block->frame) / BTR_SEARCH_PAGE_BUILD_LIMIT) &&
      (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
    if ((!block->index) ||
        (block->n_hash_helps > 2 * page_get_n_recs(block->frame)) ||
        (block->n_fields != block->curr_n_fields) ||
        (block->n_bytes != block->curr_n_bytes) ||
        (block->left_side != block->curr_left_side)) {
      /* Build a new hash index on the page */

      return (TRUE);
    }
  }

  return (FALSE);
}