MySQL InnoDB Engine--自適應雜湊索引程式碼瞎猜02
阿新 • • 發佈:2021-07-09
自適應雜湊索引資料結構
對B+樹的資料查詢,需要遍歷從根節點到葉子節點之間的每一層的節點,因此B+樹的樹高過高時,會影響B+樹的查詢效率。
Innodb儲存引擎根據查詢模式,對活躍的資料頁中的記錄進行雜湊索引,以實現快速查詢,解決B+樹樹高問題。
為標識查詢模式和熱點頁,需要在索引和資料頁上有相應的資料結構來存放訪問資訊。
- 每個索引物件有一個btr_search_t物件來保持索引的訪問模式和相關資訊。
- 每個資料頁物件有一個buf_block_t物件來保持資料頁的訪問模式和相關資訊。
btr_search_t物件
btr_search_t物件來保持索引的訪問模式以及相關AHI資訊:
- hash_analysis:通過索引訪問的次數,如果通過索引訪問次數超過BTR_SEARCH_HASH_ANALYSIS(預設17)次數時,則開始對索引上的訪問模式進行統計。
- n_hash_potential:按照連續查詢模式成功次數,當成功次數超過BTR_SEARCH_BUILD_LIMIT(預設100)次數時,則開始對索引中記錄建立HASH索引。
- n_fields:自適應雜湊索引字首列的數量
- n_bytes:自適應雜湊索引的位元組數(非完整列)
- left_side:訪問資料方向,用於判斷對重複列進行查詢的方向(向左還是向右
struct btr_search_t{ ulint ref_count; /*!< Number of blocks in this index tree that have search index built i.e. block->index points to this index. Protected by search latch except when during initialization in btr_search_info_create().*/ /* @{ The following fields are not protected by any latch. Unfortunately, this means that they must be aligned to the machine word, i.e., they cannot be turned into bit-fields. */ buf_block_t* root_guess;/*!< the root page frame when it was last time fetched, or NULL*/ ulint withdraw_clock; /*!< the withdraw clock value of the buffer pool when root_guess was stored */ ulint hash_analysis; /*!< when this exceeds BTR_SEARCH_HASH_ANALYSIS, the hash analysis starts; this is reset if no success noticed */ ibool last_hash_succ; /*!< TRUE if the last search would have succeeded, or did succeed, using the hash index; NOTE that the value here is not exact: it is not calculated for every search, and the calculation itself is not always accurate! */ ulint n_hash_potential; /*!< number of consecutive searches which would have succeeded, or did succeed, using the hash index; the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */ /* @} */ /*---------------------- @{ */ ulint n_fields; /*!< recommended prefix length for hash search: number of full fields */ ulint n_bytes; /*!< recommended prefix: number of bytes in an incomplete field @see BTR_PAGE_MAX_REC_SIZE */ ibool left_side; /*!< TRUE or FALSE, depending on whether the leftmost record of several records with the same prefix should be indexed in the hash index */ /*---------------------- @} */ #ifdef UNIV_SEARCH_PERF_STAT ulint n_hash_succ; /*!< number of successful hash searches thus far */ ulint n_hash_fail; /*!< number of failed hash searches */ ulint n_patt_succ; /*!< number of successful pattern searches thus far */ ulint n_searches; /*!< number of searches */ #endif /* UNIV_SEARCH_PERF_STAT */ #ifdef UNIV_DEBUG ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */ /** value of btr_search_t::magic_n, used in assertions */ # define BTR_SEARCH_MAGIC_N 1112765 #endif /* UNIV_DEBUG */ };
buf_block_t 物件
每個資料頁有一個buf_block_t物件用來存放頁的訪問資訊,判斷該資料頁是否需要對該頁記錄建立自適應雜湊索引。
- n_hash_helps:用來控制是否建立雜湊索引的計數器
- n_fields:自適應雜湊索引字首列的數量
- n_bytes:自適應雜湊索引的位元組數(非完整列)
/** The buffer control block structure */ struct buf_block_t{ /** @name General fields */ /* @{ */ buf_page_t page; /*!< page information; this must be the first field, so that buf_pool->page_hash can point to buf_page_t or buf_block_t */ byte* frame; /*!< pointer to buffer frame which is of size UNIV_PAGE_SIZE, and aligned to an address divisible by UNIV_PAGE_SIZE */ #ifndef UNIV_HOTBACKUP BPageLock lock; /*!< read-write lock of the buffer frame */ UT_LIST_NODE_T(buf_block_t) unzip_LRU; /*!< node of the decompressed LRU list; a block is in the unzip_LRU list if page.state == BUF_BLOCK_FILE_PAGE and page.zip.data != NULL */ #ifdef UNIV_DEBUG ibool in_unzip_LRU_list;/*!< TRUE if the page is in the decompressed LRU list; used in debugging */ ibool in_withdraw_list; #endif /* UNIV_DEBUG */ unsigned lock_hash_val:32;/*!< hashed value of the page address in the record lock hash table; protected by buf_block_t::lock (or buf_block_t::mutex, buf_pool->mutex in buf_page_get_gen(), buf_page_init_for_read() and buf_page_create()) */ /* @} */ /** @name Optimistic search field */ /* @{ */ ib_uint64_t modify_clock; /*!< this clock is incremented every time a pointer to a record on the page may become obsolete; this is used in the optimistic cursor positioning: if the modify clock has not changed, we know that the pointer is still valid; this field may be changed if the thread (1) owns the pool mutex and the page is not bufferfixed, or (2) the thread has an x-latch on the block */ /* @} */ /** @name Hash search fields (unprotected) NOTE that these fields are NOT protected by any semaphore! */ /* @{ */ ulint n_hash_helps; /*!< counter which controls building of a new hash index for the page */ volatile ulint n_bytes; /*!< recommended prefix length for hash search: number of bytes in an incomplete last field */ volatile ulint n_fields; /*!< recommended prefix length for hash search: number of full fields */ volatile bool left_side; /*!< true or false, depending on whether the leftmost record of several records with the same prefix should be indexed in the hash index */ /* @} */ /** @name Hash search fields These 5 fields may only be modified when: we are holding the appropriate x-latch in btr_search_latches[], and one of the following holds: (1) the block state is BUF_BLOCK_FILE_PAGE, and we are holding an s-latch or x-latch on buf_block_t::lock, or (2) buf_block_t::buf_fix_count == 0, or (3) the block state is BUF_BLOCK_REMOVE_HASH. An exception to this is when we init or create a page in the buffer pool in buf0buf.cc. Another exception for buf_pool_clear_hash_index() is that assigning block->index = NULL (and block->n_pointers = 0) is allowed whenever btr_search_own_all(RW_LOCK_X). Another exception is that ha_insert_for_fold_func() may decrement n_pointers without holding the appropriate latch in btr_search_latches[]. Thus, n_pointers must be protected by atomic memory access. This implies that the fields may be read without race condition whenever any of the following hold: - the btr_search_latches[] s-latch or x-latch is being held, or - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH, and holding some latch prevents the state from changing to that. Some use of assert_block_ahi_empty() or assert_block_ahi_valid() is prone to race conditions while buf_pool_clear_hash_index() is executing (the adaptive hash index is being disabled). Such use is explicitly commented. */ /* @{ */ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG ulint n_pointers; /*!< used in debugging: the number of pointers in the adaptive hash index pointing to this frame; protected by atomic memory access or btr_search_own_all(). */ # define assert_block_ahi_empty(block) \ ut_a(os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0) # define assert_block_ahi_empty_on_init(block) do { \ UNIV_MEM_VALID(&(block)->n_pointers, sizeof (block)->n_pointers); \ assert_block_ahi_empty(block); \ } while (0) # define assert_block_ahi_valid(block) \ ut_a((block)->index \ || os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0) #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ # define assert_block_ahi_empty(block) /* nothing */ # define assert_block_ahi_empty_on_init(block) /* nothing */ # define assert_block_ahi_valid(block) /* nothing */ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ unsigned curr_n_fields:10;/*!< prefix length for hash indexing: number of full fields */ unsigned curr_n_bytes:15;/*!< number of bytes in hash indexing */ unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */ dict_index_t* index; /*!< Index for which the adaptive hash index has been created, or NULL if the page does not exist in the index. Note that it does not guarantee that the index is complete, though: there may have been hash collisions, record deletions, etc. */ /* @} */ bool made_dirty_with_no_latch; /*!< true if block has been made dirty without acquiring X/SX latch as the block belongs to temporary tablespace and block is always accessed by a single thread. */ bool skip_flush_check; /*!< Skip check in buf_dblwr_check_block during bulk load, protected by lock.*/ # ifdef UNIV_DEBUG /** @name Debug fields */ /* @{ */ rw_lock_t debug_latch; /*!< in the debug version, each thread which bufferfixes the block acquires an s-latch here; so we can use the debug utilities in sync0rw */ /* @} */ # endif BPageMutex mutex; /*!< mutex protecting this block: state (also protected by the buffer pool mutex), io_fix, buf_fix_count, and accessed; we introduce this new mutex in InnoDB-5.1 to relieve contention on the buffer pool mutex */ #endif /* !UNIV_HOTBACKUP */ };
判斷索引物件是否滿足自適應雜湊索引
在判斷是否需要對索引物件建立自適應雜湊索引前,需要先判斷索引是否被訪問17次,btr_search_t物件中hash_analysis統計索引使用次數
/** Updates the search info. */ UNIV_INLINE void btr_search_info_update( dict_index_t *index, /*!< in: index of the cursor */ btr_cur_t *cursor) /*!< in: cursor which was just positioned */ { ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S)); ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X)); if (dict_index_is_spatial(index) || !btr_search_enabled) { return; } btr_search_t *info; info = btr_search_get_info(index); info->hash_analysis++; if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) { /* Do nothing */ return; } ut_ad(cursor->flag != BTR_CUR_HASH); btr_search_info_update_slow(info, cursor); }
BTR_SEARCH_HASH_ANALYSIS被硬編碼在程式碼中:
/** After change in n_fields or n_bytes in info, this many rounds are waited before starting the hash analysis again: this is to save CPU time when there is no hope in building a hash index. */ #define BTR_SEARCH_HASH_ANALYSIS 17
判斷資料頁物件是否滿足自適應雜湊索引
當索引被訪問17次後,則開始對索引上的資料頁進行訪問模式統計,當滿足下列條件時則對相應的資料頁建立自適應雜湊索引:
- 索引上按照相同查詢模式連續訪問100次
- 資料頁上按照相同查詢模式訪問超過"當前資料頁記錄數的1/16" 次
/** The global limit for consecutive potentially successful hash searches, before hash index building is started */ #define BTR_SEARCH_BUILD_LIMIT 100 /** If the number of records on the page divided by this parameter would have been successfully accessed using a hash index, the index is then built on the page, assuming the global limit has been reached */ #define BTR_SEARCH_PAGE_BUILD_LIMIT 16 /** Update the block search info on hash successes. NOTE that info and block->n_hash_helps, n_fields, n_bytes, left_side are NOT protected by any semaphore, to save CPU time! Do not assume the fields are consistent. @return true if building a (new) hash index on the block is recommended @param[in,out] info search info @param[in,out] block buffer block @param[in] cursor cursor */ static ibool btr_search_update_block_hash_info(btr_search_t *info, buf_block_t *block, const btr_cur_t *cursor) { ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_S)); ut_ad(!rw_lock_own(btr_get_search_latch(cursor->index), RW_LOCK_X)); ut_ad(rw_lock_own(&block->lock, RW_LOCK_S) || rw_lock_own(&block->lock, RW_LOCK_X)); info->last_hash_succ = FALSE; ut_a(buf_block_state_valid(block)); ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N); if ((block->n_hash_helps > 0) && (info->n_hash_potential > 0) && (block->n_fields == info->n_fields) && (block->n_bytes == info->n_bytes) && (block->left_side == !!info->left_side)) { if ((block->index) && (block->curr_n_fields == info->n_fields) && (block->curr_n_bytes == info->n_bytes) && (block->curr_left_side == info->left_side)) { /* The search would presumably have succeeded using the hash index */ info->last_hash_succ = TRUE; } block->n_hash_helps++; } else { block->n_hash_helps = 1; block->n_fields = info->n_fields; block->n_bytes = info->n_bytes; block->left_side = info->left_side; } #ifdef UNIV_DEBUG if (cursor->index->table->does_not_fit_in_memory) { block->n_hash_helps = 0; } #endif /* UNIV_DEBUG */ if ((block->n_hash_helps > page_get_n_recs(block->frame) / BTR_SEARCH_PAGE_BUILD_LIMIT) && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) { if ((!block->index) || (block->n_hash_helps > 2 * page_get_n_recs(block->frame)) || (block->n_fields != block->curr_n_fields) || (block->n_bytes != block->curr_n_bytes) || (block->left_side != block->curr_left_side)) { /* Build a new hash index on the page */ return (TRUE); } } return (FALSE); }