f2fs系列文章fill_super(三)
這篇文章完成f2fs的segment管理結構f2fs_sm_info的建立和恢復。
build_segment_manager:首先分配容納f2fs_sm_info的空間,然後用f2fs_super_block中的資料對f2fs_sm_info的一些關於segment數量的資訊進行初始化。接著初始化其中的三個連結串列discard_list、wait_list、sit_entry_set。然後呼叫build_sit_info構建sit_info,主要是sit_info以及管理的結構的空間的分配。接著呼叫build_free_segmap構建free_segmap_info,這裡主要完成空間的分配,並將所有的segment和section設定為髒,其修改過程在後面再完成。然後呼叫build_curseg來構建各種curseg_info,並完成curren segment的恢復。接著呼叫build_sit_entries來恢復所有的seg_entry和sec_entry,然後呼叫init_free_segmap來恢復free_segmap和free_secmap,接著呼叫build_dirty_segmap來構建dirty_seglist_info。最後更新sit_info中的min和max的mtime。
int build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_sm_info *sm_info; int err; sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); if (!sm_info) return -ENOMEM; sbi->sm_info = sm_info; sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); sm_info->segment_count = le32_to_cpu(raw_super->segment_count); sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count); sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; if (!test_opt(sbi, LFS)) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; INIT_LIST_HEAD(&sm_info->discard_list); INIT_LIST_HEAD(&sm_info->wait_list); sm_info->nr_discards = 0; sm_info->max_discards = 0; sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) return err; } err = build_sit_info(sbi); if (err) return err; err = build_free_segmap(sbi); if (err) return err; err = build_curseg(sbi); if (err) return err; build_sit_entries(sbi); init_free_segmap(sbi); err = build_dirty_segmap(sbi); if (err) return err; init_min_max_mtime(sbi); return 0; }
build_sit_info:主要完成f2fs_sm_info中的sit_info的空間的分配和其中的幾個欄位和點陣圖的空間的分配,這些包括所有f2fs_sit_entry對應的記憶體結構seg_entry的空間、記錄當前的sit點陣圖的cur_valid_map和上次cp的sit點陣圖的ckpt_valid_map、記錄discard塊的點陣圖discard_map、臨時點陣圖tmp_map、所有section的相關資訊的sec_entries。還有sit_info的一些欄位的賦值。
static int build_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
char *src_bitmap, *dst_bitmap;
unsigned int bitmap_size;
sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
if (!sit_i)
return -ENOMEM;
SM_I(sbi)->sit_info = sit_i;
sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL);
if (!sit_i->sentries)
return -ENOMEM;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
if (!sit_i->dirty_sentries_bitmap)
return -ENOMEM;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
sit_i->sentries[start].cur_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].cur_valid_map || !sit_i->sentries[start].ckpt_valid_map)
return -ENOMEM;
if (f2fs_discard_en(sbi)) {
sit_i->sentries[start].discard_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].discard_map)
return -ENOMEM;
}
}
sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->tmp_map)
return -ENOMEM;
if (sbi->segs_per_sec > 1) {
sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL);
if (!sit_i->sec_entries)
return -ENOMEM;
}
sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
if (!dst_bitmap)
return -ENOMEM;
sit_i->s_ops = &default_salloc_ops;
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
mutex_init(&sit_i->sentry_lock);
return 0;
}
build_free_segmap:首先分配一個free_segmap_info的空間,然後分配記錄所有的segment的free_segmap點陣圖,再分配記錄所有的section的free_secmap的點陣圖。接著將這兩個點陣圖初始化為全是1,表示全部都不是空閒的,然後初始化free_segmap_info的記錄segment起始地segno的start_segno,將空閒的segment和section的個數賦值為0。
static int build_free_segmap(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i;
unsigned int bitmap_size, sec_bitmap_size;
free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
if (!free_i)
return -ENOMEM;
SM_I(sbi)->free_info = free_i;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
if (!free_i->free_segmap)
return -ENOMEM;
sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
if (!free_i->free_secmap)
return -ENOMEM;
memset(free_i->free_segmap, 0xff, bitmap_size);
memset(free_i->free_secmap, 0xff, sec_bitmap_size);
free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
free_i->free_segments = 0;
free_i->free_sections = 0;
spin_lock_init(&free_i->segmap_lock);
return 0;
}
build_curseg首先分配NR_CURSEG_TYPE個curseg_info的空間,然後對每個curseg_info進行空間的分配,首先是分配一個f2fs_summary,然後初始化管理journal的讀寫鎖,接著分配f2fs_journal,然後將segno和next_blkoff分別初始化為NULL_SEGNO和0。然後呼叫函式restore_curseg_summaries對curseg_info進行恢復。
static int build_curseg(struct f2fs_sb_info *sbi)
{
struct curseg_info *array;
int i;
array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
if (!array)
return -ENOMEM;
SM_I(sbi)->curseg_array = array;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
return -ENOMEM;
init_rwsem(&array[i].journal_rwsem);
array[i].journal = kzalloc(sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
}
return restore_curseg_summaries(sbi);
}
restore_curseg_summaries:根據do_checkpoint時的curseg_info的兩種寫入方式,這裡首先要判斷那種方式進行恢復。首先檢查是否設定了CP_COMPACT_SUM_FLAG,如果設定了那麼就採用複雜的方式read_compacted對data的summaries進行讀取恢復。然後檢查之前是否將node的summaries也寫入裝置了,然後將node採用普通的方式read_normal_summaries對sumamies進行讀取恢復。這裡如果之前沒有進行復雜的讀取恢復,那麼這裡會將data和node一起以普通的方式read_normal_summaries讀取恢復。
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
int type = CURSEG_HOT_DATA;
int err;
if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
int npages = npages_for_summary_flush(sbi, true);
if (npages >= 2)
ra_meta_pages(sbi, start_sum_block(sbi), npages, META_CP, true);
if (read_compacted_summaries(sbi))
return -EINVAL;
type = CURSEG_HOT_NODE;
}
if (__exist_node_summaries(sbi))
ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
NR_CURSEG_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
if (err)
return err;
}
return 0;
}
build_sit_entries:首先將所有的f2fs_sit_entry讀取出來恢復seg_entry。首先對f2fs_sit_block進行預讀,然後遍歷所有的segment,先獲取當前segment的seg_entry,然後獲取當前段的f2fs_sit_entry,接著呼叫check_block_count檢查f2fs_sit_entry中的有效塊數不能大於512,還有就是segno不能大於總的段數。然後呼叫seg_info_from_raw_sit將f2fs_sit_entry的資訊同步到seg_entry中,接著更新seg_entry的discard_map,這個map跟seg_entry的cur_valid_map一致,其discard_blks跟seg_entry中的free的block的塊數。另外由於sit的最新資料可能是放置在curseg_info的sit_journal中的,所以還需要讀取這些f2fs_sit_entry來獲取最新的f2fs_sit_entry,通過遍歷其中的f2fs_journal陣列,其恢復方式跟上面的是一致的。
static void build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_journal *journal = curseg->journal;
struct seg_entry *se;
struct f2fs_sit_entry sit;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
for (; start < end && start < MAIN_SEGS(sbi); start++) {
struct f2fs_sit_block *sit_blk;
struct page *page;
se = &sit_i->sentries[start];
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
if (f2fs_discard_en(sbi)) {
memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
}
if (sbi->segs_per_sec > 1)
get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks;
}
start_blk += readed;
} while (start_blk < sit_blk_cnt);
down_read(&curseg->journal_rwsem);
for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int old_valid_blocks;
start = le32_to_cpu(segno_in_journal(journal, i));
se = &sit_i->sentries[start];
sit = sit_in_journal(journal, i);
old_valid_blocks = se->valid_blocks;
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
if (f2fs_discard_en(sbi)) {
memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += old_valid_blocks - se->valid_blocks;
}
if (sbi->segs_per_sec > 1)
get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks - old_valid_blocks;
}
up_read(&curseg->journal_rwsem);
}
init_free_segmap:對所有的main area的segment進行遍歷,檢查其seg_entry中的有效塊數valid_blocks是否為零,如果滿足,則呼叫__set_free將相應的segment設定為free,同時如果該段所在的section都沒有有效塊數的話,也將section設定為free。再對所有的curseg_info進行遍歷,將所有的current segment對應的segno和躲在的section都從free的map中清除。
static void init_free_segmap(struct f2fs_sb_info *sbi)
{
unsigned int start;
int type;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
struct seg_entry *sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
}
for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
struct curseg_info *curseg_t = CURSEG_I(sbi, type);
__set_test_and_inuse(sbi, curseg_t->segno);
}
}
__set_free:將segno所對應的free_segmap_info中的管理segment的free_segmap的位消掉,表示這個segno是空閒的。同時對該segno所在的section在free_segmap_info中 的所有位進行檢查,如果都是空閒的,那麼就將free_segmap_info中的管理section的free_secmap的位消掉,表示這個section是空閒的。在這個過程中free_segmap_info中的相關的數量統計free_segments和free_sections也隨之更新。
static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int next;
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
free_i->free_segments++;
next = find_next_bit(free_i->free_segmap, start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
spin_unlock(&free_i->segmap_lock);
}
__set_test_and_inuse:檢查segno對應的free_segmap_info中的管理segment的free_segmap的位是否是空閒的,如果是,那就置位標誌不再空閒,然後檢查segno所在的section對應的free_segmap_info中的管理section的free_secmap的位是否是空閒的,如果是,那就置位標誌不再空閒。在這個過程中free_segmap_info中的相關的數量統計free_segments和free_sections也隨之更新。
static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
free_i->free_sections--;
}
spin_unlock(&free_i->segmap_lock);
}
build_dirty_segno:首先分配dirty_seglist_info的空間,然後分配NR_DIRTY_TYPE個相關的管理不同型別的dirty的segment的點陣圖。然後呼叫init_dirty_segmap利用free_segmap對dirty_segmap進行更新,最後呼叫init_victim_sectim分配dirty_seglist_info中的victim_secmap,初始化為全部都是零。
static int build_dirty_segmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i;
unsigned int bitmap_size, i;
dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
if (!dirty_i)
return -ENOMEM;
SM_I(sbi)->dirty_info = dirty_i;
mutex_init(&dirty_i->seglist_lock);
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
for (i = 0; i < NR_DIRTY_TYPE; i++) {
dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
if (!dirty_i->dirty_segmap[i])
return -ENOMEM;
}
init_dirty_segmap(sbi);
return init_victim_secmap(sbi);
}
init_dirty_segmap:首先在free_segmap_info的free_segmap中查詢到不是空閒的segment,如果對應的有效塊數是512或者0,那就不是dirty的,其他情況下呼叫函式__locate_dirty_segment將該segno在dirty_seglist_info的點陣圖中置位。
static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno = 0, offset = 0;
unsigned short valid_blocks;
while (1) {
segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
if (segno >= MAIN_SEGS(sbi))
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, 0);
if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
continue;
if (valid_blocks > sbi->blocks_per_seg) {
f2fs_bug_on(sbi, 1);
continue;
}
mutex_lock(&dirty_i->seglist_lock);
__locate_dirty_segment(sbi, segno, DIRTY);
mutex_unlock(&dirty_i->seglist_lock);
}
}
__locate_dirty_segment:首先檢查segno是不是current segment,如果是就不進行操作了。然後在檢查dirty_segmap [DIRTY]的中是否有置位,沒有就置位並更新數量。然後在獲取segno對應的seg_entry,獲取其type,然後檢查相應的dirty_segmap [type]中是否有置位,沒有就置位並更新數量。
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
if (IS_CURSEG(sbi, segno))
return;
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]++;
if (dirty_type == DIRTY) {
struct seg_entry *sentry = get_seg_entry(sbi, segno);
enum dirty_type t = sentry->type;
if (unlikely(t >= DIRTY)) {
f2fs_bug_on(sbi, 1);
return;
}
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]++;
}
}
init_victim_secmap:分配平衡gc時的victim_select的section的點陣圖的空間。
static int init_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
if (!dirty_i->victim_secmap)
return -ENOMEM;
return 0;
}