MySQL啟動過程詳解五:GTID的處理
阿新 • • 發佈:2022-04-14
MySQL啟動過程中 GTID 的處理:
1. 在核心模組啟動函式 init_server_components() 會呼叫 gtid_server_init() 初始化 gtid server
2. 呼叫 init_server_auto_options() 初始化並獲取資料庫的 server_uuid,從 auto.cnf 檔案中讀取,如果沒有則重新生成
3. 呼叫 gtid_state->init() 將 server_uuid 新增到 sid_map 中。
4. 讀取 mysql.gtid_executed表,讀取 mysql.gtid_executed 的值
5. 接下來,要從 mysql.gtid_executed 表和 binlog 檔案中初始化 gtid_executed 和 gtid_purged 的值
6. 對 mysql.gtid_executed 表中的 gtid_executed 進行修正,更新 lost_gtids & gtids_only_in_table & previous_gtids_logged 的值
7. 將 previous_gtids_logged 寫入最新的binlog檔案
涉及到的原始碼如下:
// 在init_server_components() 函式中會呼叫 gtid_server_init(), 建立 global_sid_lock & global_mode_lock & global_sid_map & gtid_state & gtid_table_persistor
if (init_server_components()) unireg_abort(MYSQLD_ABORT_EXIT); /* Each server should have one UUID. We will create it automatically, if it does not exist.
初始化並獲取資料庫的 server_uuid, 從 auto.cnf 檔案中獲取,如果沒有則重新生成。 */ if (init_server_auto_options()) { sql_print_error("Initialization of the server's UUID failed because it could" " not be read from the auto.cnf file. If this is a new" " server, the initialization failed because it was not" " possible to generate a new UUID."); unireg_abort(MYSQLD_ABORT_EXIT); } /* Add server_uuid to the sid_map. This must be done after server_uuid has been initialized in init_server_auto_options and after the binary log (and sid_map file) has been initialized in init_server_components(). No error message is needed: init_sid_map() prints a message. Strictly speaking, this is not currently needed when opt_bin_log==0, since the variables that gtid_state->init initializes are not currently used in that case. But we call it regardless to avoid possible future bugs if gtid_state ever needs to do anything else. */ global_sid_lock->wrlock();
// 將 server_uuid 新增到 sid_map 中 int gtid_ret = gtid_state->init(); global_sid_lock->unlock(); if (gtid_ret) unireg_abort(MYSQLD_ABORT_EXIT); // Initialize executed_gtids from mysql.gtid_executed table. if (gtid_state->read_gtid_executed_from_table() == -1) unireg_abort(1); if (opt_bin_log) { /* Initialize GLOBAL.GTID_EXECUTED and GLOBAL.GTID_PURGED from gtid_executed table and binlog files during server startup. */ // 獲取 gtid_state.executed_gtids, gtid_state.lost_gtids, gtid_state.gtids_only_in_table, gtid_state.previous_gtids_logged的指標 Gtid_set *executed_gtids = const_cast<Gtid_set *>(gtid_state->get_executed_gtids()); Gtid_set *lost_gtids = const_cast<Gtid_set *>(gtid_state->get_lost_gtids()); Gtid_set *gtids_only_in_table = const_cast<Gtid_set *>(gtid_state->get_gtids_only_in_table()); Gtid_set *previous_gtids_logged = const_cast<Gtid_set *>(gtid_state->get_previous_gtids_logged()); // 定義中間變數, 包括 binlog中purge掉的 gtids, binlog中包含的 gtids, binlog中包含但是不在表中的 gtids Gtid_set purged_gtids_from_binlog(global_sid_map, global_sid_lock); Gtid_set gtids_in_binlog(global_sid_map, global_sid_lock); Gtid_set gtids_in_binlog_not_in_table(global_sid_map, global_sid_lock); // 從 binlog 檔案中讀取 gtids_in_binlog 和 purged_gtids_from_binlog if (mysql_bin_log.init_gtid_sets(>ids_in_binlog, &purged_gtids_from_binlog, opt_master_verify_checksum, true /*true=need lock*/, NULL /*trx_parser*/, NULL /*gtid_partial_trx*/, true /*is_server_starting*/)) unireg_abort(MYSQLD_ABORT_EXIT); global_sid_lock->wrlock(); purged_gtids_from_binlog.dbug_print("purged_gtids_from_binlog"); gtids_in_binlog.dbug_print("gtids_in_binlog"); // 如果 gtids_in_binlog 不是空的, 並且從表中讀取的 executed_gtids 是 gtids_in_binlog 的子集 if (!gtids_in_binlog.is_empty() && !gtids_in_binlog.is_subset(executed_gtids)) { gtids_in_binlog_not_in_table.add_gtid_set(>ids_in_binlog); if (!executed_gtids->is_empty()) // 更新 gtids_in_binlog_not_in_table gtids_in_binlog_not_in_table.remove_gtid_set(executed_gtids); /* Save unsaved GTIDs into gtid_executed table, in the following four cases: 1. the upgrade case. 2. the case that a slave is provisioned from a backup of the master and the slave is cleaned by RESET MASTER and RESET SLAVE before this. 3. the case that no binlog rotation happened from the last RESET MASTER on the server before it crashes. 4. The set of GTIDs of the last binlog is not saved into the gtid_executed table if server crashes, so we save it into gtid_executed table and executed_gtids during recovery from the crash. */ // 將 gtids_in_binlog_not_in_table 儲存到 mysql.gtid_executed 表中 if (gtid_state->save(>ids_in_binlog_not_in_table) == -1) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } // 在記憶體中的 executed_gtids 中加入 gtids_in_binlog_not_in_table gtid 集合 executed_gtids->add_gtid_set(>ids_in_binlog_not_in_table); } /* gtids_only_in_table= executed_gtids - gtids_in_binlog */ if (gtids_only_in_table->add_gtid_set(executed_gtids) != RETURN_STATUS_OK) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } // gtids_only_in_table gtids_only_in_table->remove_gtid_set(>ids_in_binlog); /* lost_gtids = executed_gtids - (gtids_in_binlog - purged_gtids_from_binlog) = gtids_only_in_table + purged_gtids_from_binlog; */ assert(lost_gtids->is_empty()); // 獲取 lost_gtids, 也就是 gtid_purged 的值 if (lost_gtids->add_gtid_set(gtids_only_in_table) != RETURN_STATUS_OK || lost_gtids->add_gtid_set(&purged_gtids_from_binlog) != RETURN_STATUS_OK) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } /* Prepare previous_gtids_logged for next binlog previous_gtids_logged */ if (previous_gtids_logged->add_gtid_set(>ids_in_binlog) != RETURN_STATUS_OK) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } /* Write the previous set of gtids at this point because during the creation of the binary log this is not done as we cannot move the init_gtid_sets() to a place before openning the binary log. This requires some investigation. 將 previous_gtids_logged 寫入最新的 binlog。 /Alfranio */ Previous_gtids_log_event prev_gtids_ev(>ids_in_binlog); global_sid_lock->unlock(); (prev_gtids_ev.common_footer)->checksum_alg = static_cast<enum_binlog_checksum_alg>(binlog_checksum_options); if (prev_gtids_ev.write(mysql_bin_log.get_log_file())) unireg_abort(MYSQLD_ABORT_EXIT); mysql_bin_log.add_bytes_written( prev_gtids_ev.common_header->data_written); if (flush_io_cache(mysql_bin_log.get_log_file()) || mysql_file_sync(mysql_bin_log.get_log_file()->file, MYF(MY_WME))) unireg_abort(MYSQLD_ABORT_EXIT); mysql_bin_log.update_binlog_end_pos(); #ifdef HAVE_REPLICATION if (opt_bin_log && expire_logs_days) { time_t purge_time = server_start_time - expire_logs_days * 24 * 60 * 60; DBUG_EXECUTE_IF("expire_logs_always_at_start", { purge_time = my_time(0); }); if (purge_time >= 0) mysql_bin_log.purge_logs_before_date(purge_time, true); } #endif (void)RUN_HOOK(server_state, after_engine_recovery, (NULL)); }
從 binlog 檔案中讀取 gtids_in_binlog 和 purged_gtids_from_binlog的init_gtid_sets() 函式程式碼解析如下:
bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids, bool verify_checksum, bool need_lock, Transaction_boundary_parser *trx_parser, Gtid *gtid_partial_trx, bool is_server_starting) { /* Acquires the necessary locks to ensure that logs are not either removed or updated when we are reading from it. */ if (need_lock) { // We don't need LOCK_log if we are only going to read the initial // Prevoius_gtids_log_event and ignore the Gtid_log_events. if (all_gtids != NULL) mysql_mutex_lock(&LOCK_log); mysql_mutex_lock(&LOCK_index); global_sid_lock->wrlock(); } else { if (all_gtids != NULL) mysql_mutex_assert_owner(&LOCK_log); mysql_mutex_assert_owner(&LOCK_index); global_sid_lock->assert_some_wrlock(); } // Gather the set of files to be accessed. list<string> filename_list; LOG_INFO linfo; int error; list<string>::iterator it; list<string>::reverse_iterator rit; bool reached_first_file = false; /* Initialize the sid_map to be used in read_gtids_from_binlog */ Sid_map *sid_map = NULL; if (all_gtids) sid_map = all_gtids->get_sid_map(); else if (lost_gtids) sid_map = lost_gtids->get_sid_map(); // 將當前的 binlog name加入到 filename_list 中 for (error = find_log_pos(&linfo, NULL, false /*need_lock_index=false*/); !error; error = find_next_log(&linfo, false /*need_lock_index=false*/)) { DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name)); filename_list.push_back(string(linfo.log_file_name)); } if (error != LOG_INFO_EOF) { DBUG_PRINT("error", ("Error reading %s index", is_relay_log ? "relaylog" : "binlog")); goto end; } /* On server starting, one new empty binlog file is created and its file name is put into index file before initializing GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the last binlog file before the server restarts, so we remove its file name from filename_list. 在 server 啟動時, 會建立一個新的空 binlog 檔案, 並將他的檔名加入 binlog index 檔案, 因為這個檔案不是 mysql restart之前的最後一個binlog檔案, 所以需要從 filename list中移除。 */ if (is_server_starting && !is_relay_log && !filename_list.empty()) filename_list.pop_back(); error = 0; // 資料庫啟動時不會是 NULL, 但是 purge binary logs等刪除 binlog 檔案的命令時是 NULL if (all_gtids != NULL) { DBUG_PRINT("info", ("Iterating backwards through %s logs, " "looking for the last %s log that contains " "a Previous_gtids_log_event.", is_relay_log ? "relay" : "binary", is_relay_log ? "relay" : "binary")); // Iterate over all files in reverse order until we find one that // contains a Previous_gtids_log_event. // 反向迭代指向 filename list的尾部。 rit = filename_list.rbegin(); bool can_stop_reading = false; // 如果只有一個 binlog 檔案則為 true reached_first_file = (rit == filename_list.rend()); // DBUG_PRINT("info", ("filename='%s' reached_first_file=%d", reached_first_file ? "" : rit->c_str(), reached_first_file)); // 通過反向迴圈掃描來獲取 gtids_in_binlog 集合。 while (!can_stop_reading && !reached_first_file) { // 獲取檔名 const char *filename = rit->c_str(); assert(rit != filename_list.rend()); rit++; // 如果到達第一個檔案則為 true, 表示掃描完成 reached_first_file = (rit == filename_list.rend()); DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d " "reached_first_file=%d, ", filename, can_stop_reading, reached_first_file)); // 通過 read_gtids_from_binlog 來讀取 binlog 檔案 switch (read_gtids_from_binlog(filename, all_gtids, reached_first_file ? lost_gtids : NULL, NULL /* first_gtid */, sid_map, verify_checksum, is_relay_log)) { case ERROR: { error = 1; goto end; } case GOT_GTIDS: { //如果掃描本binlog有PREVIOUS GTID EVENT和GTID EVENT 則跳出迴圈直達end can_stop_reading = true; break; } case GOT_PREVIOUS_GTIDS: { // 如果掃描本binlog有PREVIOUS GTID EVENT, 但是沒有 GTID EVENT /* If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS. If this is a relaylog file, we need to find at least one GTID to start parsing the relay log to add GTID of transactions that might have spanned in distinct relaylog files. 如果這是一個 binlog 檔案, 那麼存在 PREVIOUS GTID EVENT 就足夠了。 如果這是 relay log檔案, 我們需要找到至少一個 GTID 來解析 relay log, 以新增到 可能跨越不同 relay log檔案的事務中。 */ if (!is_relay_log) can_stop_reading = true; break; } case NO_GTIDS: { // 這裡如果binlog不包含GTID EVENT和PREVIOUS GTID EVENT /* Mysql server iterates backwards through binary logs, looking for the last binary log that contains a Previous_gtids_log_event for gathering the set of gtid_executed on server start. This may take very long time if it has many binary logs and almost all of them are out of filesystem cache. So if the binlog_gtid_simple_recovery is enabled, and the last binary log does not contain any GTID event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and GLOBAL.GTID_PURGED should be empty in the case. 如果 binlog_gtid_simple_recovery 是 on, 並且最後一個 binary log 檔案中沒有 任何 GTID event, 那麼不需要再讀取binlog 檔案。global.gtid_executed 和 gtid_purged 都應該是空的。 */ if (binlog_gtid_simple_recovery && is_server_starting && !is_relay_log) { assert(all_gtids->is_empty()); assert(lost_gtids->is_empty()); goto end; } /*FALLTHROUGH*/ } case TRUNCATED: { break; } } } /* If we use GTIDs and have partial transactions on the relay log, must check if it ends on next relay log files. We also need to feed the boundary parser with the rest of the relay log to put it in the correct state before receiving new events from the master in the case of GTID auto positioning be disabled. 如果我們使用 GTID 並且在 relay log中存在部分事務, 那麼必須檢查事務是否在下一個relay log 檔案中結束。 */ if (is_relay_log && filename_list.size() > 0) { /* Suppose the following relaylog: rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004 ---------------+---------------+---------------+--------------- PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS (empty) | (UUID:1) | (UUID:1) | (UUID:1) ---------------+---------------+---------------+--------------- GTID(UUID:1) | QUERY(INSERT) | QUERY(INSERT) | XID ---------------+---------------+---------------+--------------- QUERY(CREATE | TABLE t1 ...) | ---------------+ GTID(UUID:2) | ---------------+ QUERY(BEGIN) | ---------------+ As it is impossible to determine the current Retrieved_Gtid_Set by only looking to the PREVIOUS_GTIDS on the last relay log file, and scanning events on it, we tried to find a relay log file that contains at least one GTID event during the backwards search. In the example, we will find a GTID only in rl-bin.000001, as the UUID:2 transaction was spanned across 4 relay log files. The transaction spanning can be caused by "FLUSH RELAY LOGS" commands on slave while it is queuing the transaction. So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need to parse the relay log starting on the file we found the last GTID queued to know if the transaction was fully retrieved or not. */ /* Adjust the reverse iterator to point to the relaylog file we need to start parsing, as it was incremented after generating the relay log file name. */ assert(rit != filename_list.rbegin()); rit--; assert(rit != filename_list.rend()); /* Reset the transaction parser before feeding it with events */ trx_parser->reset(); gtid_partial_trx->clear(); DBUG_PRINT("info", ("Iterating forwards through relay logs, " "updating the Retrieved_Gtid_Set and updating " "IO thread trx parser before start.")); for (it = find(filename_list.begin(), filename_list.end(), *rit); it != filename_list.end(); it++) { const char *filename = it->c_str(); DBUG_PRINT("info", ("filename='%s'", filename)); if (read_gtids_and_update_trx_parser_from_relaylog(filename, all_gtids, true, trx_parser, gtid_partial_trx)) { error = 1; goto end; } } } } /** 正向迴圈查詢 purged_gtids_from_binlog */ // 如果前面的反向迴圈沒有迭代到第一個 binlog 檔案 if (lost_gtids != NULL && !reached_first_file) { /* This branch is only reacheable by a binary log. The relay log don't need to get lost_gtids information. A 5.6 server sets GTID_PURGED by rotating the binary log. A 5.6 server that had recently enabled GTIDs and set GTID_PURGED would have a sequence of binary logs like: master-bin.N : No PREVIOUS_GTIDS (GTID wasn't enabled) master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE (GTID was enabled on startup) master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a SET @@GLOBAL.GTID_PURGED + has GTIDs of some transactions. If this 5.6 server be upgraded to 5.7 keeping its binary log files, this routine will have to find the first binary log that contains a PREVIOUS_GTIDS + a GTID event to ensure that the content of the GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery is not enabled). */ DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for " "the first binary log that contains both a " "Previous_gtids_log_event and a Gtid_log_event.")); assert(!is_relay_log); // 正向迴圈查詢 for (it = filename_list.begin(); it != filename_list.end(); it++) { /* We should pass a first_gtid to read_gtids_from_binlog when binlog_gtid_simple_recovery is disabled, or else it will return right after reading the PREVIOUS_GTIDS event to avoid stall on reading the whole binary log. */ Gtid first_gtid = {0, 0}; const char *filename = it->c_str(); DBUG_PRINT("info", ("filename='%s'", filename)); // 通過函式 read_gtids_from_binlog 讀取 binlog 檔案 switch (read_gtids_from_binlog(filename, NULL, lost_gtids, binlog_gtid_simple_recovery ? NULL : &first_gtid, sid_map, verify_checksum, is_relay_log)) { case ERROR: { error = 1; /*FALLTHROUGH*/ } case GOT_GTIDS: { // 讀取 binlog 檔案掃描到了 PREVIOUS GTID EVENT和GTID EVENT, 跳出迴圈 goto end; } case NO_GTIDS: // 沒有找到 PREVIOUS GTID EVENT 和 GTID EVENT則和 GOT_PREVIOUS_GTIDS的處理方式一致。 case GOT_PREVIOUS_GTIDS: { /* Mysql server iterates forwards through binary logs, looking for the first binary log that contains both Previous_gtids_log_event and gtid_log_event for gathering the set of gtid_purged on server start. It also iterates forwards through binary logs, looking for the first binary log that contains both Previous_gtids_log_event and gtid_log_event for gathering the set of gtid_purged when purging binary logs. This may take very long time if it has many binary logs and almost all of them are out of filesystem cache. So if the binlog_gtid_simple_recovery is enabled, we just initialize GLOBAL.GTID_PURGED from the first binary log, do not read any more binary logs. 如果只有 PREVIOUS GTID EVENT 並且設定了 binlog_gtid_simple_recovery, 只讀取第一個 binlog 檔案來 初始化 GLOBAL.GTID_PURGED。 */ if (binlog_gtid_simple_recovery) goto end; /*FALLTHROUGH*/ } case TRUNCATED: { break; } } } } end: if (all_gtids) all_gtids->dbug_print("all_gtids"); if (lost_gtids) lost_gtids->dbug_print("lost_gtids"); if (need_lock) { global_sid_lock->unlock(); mysql_mutex_unlock(&LOCK_index); if (all_gtids != NULL) mysql_mutex_unlock(&LOCK_log); } filename_list.clear(); DBUG_PRINT("info", ("returning %d", error)); DBUG_RETURN(error != 0 ? true : false); }