【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs
阿新 • • 發佈:2018-11-04
ADNI Series
1、【ADNI】資料預處理(1)SPM,CAT12
2、【ADNI】資料預處理(2)獲取 subject slices
3、【ADNI】資料預處理(3)CNNs
4、【ADNI】資料預處理(4)Get top k slices according to CNNs
5、【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs
6、【ADNI】資料預處理(6)ADNI_slice_dataloader ||| show image
Author: Chaoqun Hou
where: hcq_research
[ [email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$ ]
data: 20180407
get_top_k_slices_MCI.py
#!/usr/bin/python # -*- coding: utf-8 -*- import os import re import time import datetime import shutil import random from hcq_lib import * train_percentage = 0.75 val_percentage = 0.2 test_percentage = 0.05 ## AD/NC # root_txt_path = "/home/hcq/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/AD_NC/AD_NC_GM_subject_id" # dataset_path = os.path.join(root_txt_path, "AD_NC_GM_subject_id_20180403") ## pMCI/sMCI root_txt_path = "/home/hcq/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset" dataset_path = os.path.join(root_txt_path, "sMCI_pMCI_GM_subject_id_20180405") top_k_silces_id_txt = os.path.join(root_txt_path, "top_k_slices.txt") log_path = os.path.join(root_txt_path, "random_log", "random_log.txt") root_new_path = "/home/hcq/alzheimer_disease/ADNI_825/experiments_FineTunning/" dataset_name = "majority_select_slices_folder_01_pMCI_sMCI" def partition_slice_train_val_test(silce_txt, dataset_dir, label): ## rules ## ## old_name = root_path + subject_id + top_k_slices_id ## subject_id: random select ## top_k_slices_id: majority select by CNNs ## step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05) ## added by hcq 20180404 train_subject_id = [] val_subject_id = [] test_subject_id = [] # get subject_id list subject_id_list = [] with open(silce_txt, "r") as silce_txt_list: for item in silce_txt_list: item = item.replace("\n", "") item = item.replace("\r", "") # print(item) subject_id = item.split('/')[3] if(subject_id not in subject_id_list): subject_id_list.append(subject_id) num_train = 0 num_val = 0 num_test = 0 len_slice_list = len(subject_id_list) rondom_list = random.sample(range(0, len_slice_list), len_slice_list) hcq_write(log_path, True, True, "rondom_list [{}]".format(label)) hcq_write(log_path, False, False, rondom_list) for i in range(len_slice_list): random_id = rondom_list[i] if(num_train < int(len_slice_list*train_percentage)): # print("[Train] {}".format(subject_id_list[random_id])) train_subject_id.append(subject_id_list[random_id]) num_train += 1 elif(num_val < int(len_slice_list*val_percentage)): # print("[val] {}".format(subject_id_list[random_id])) val_subject_id.append(subject_id_list[random_id]) num_val += 1 else: # print("[test] {}".format(subject_id_list[random_id])) test_subject_id.append(subject_id_list[random_id]) num_test += 1 # print("[len_slice_list] {}".format(len_slice_list)) # print("[num_train] {}".format(num_train)) # print("[num_val] {}".format(num_val)) # print("[num_test] {}".format(num_test)) hcq_write(log_path, True, True, "[len_slice_list] {}".format(len_slice_list)) hcq_write(log_path, True, True, "[num_train] {}".format(num_train)) hcq_write(log_path, True, True, "[num_val] {}".format(num_val)) hcq_write(log_path, True, True, "[num_test] {} \n".format(num_test)) ### step2: according to top_k_silces_id_txt, majority select top k slices; ### added by hcq 20180404 move_slice(train_subject_id, dataset_dir, "train", label) move_slice(val_subject_id, dataset_dir, "validation", label) move_slice(test_subject_id, dataset_dir, "test", label) def move_slice(subject_id_folder_list, dataset_dir, folder_name, label): new_name_path = os.path.join(root_new_path, dataset_name, folder_name, label) hcq_create_dir(new_name_path) for subject_id in subject_id_folder_list: with open(top_k_silces_id_txt, "r") as top_k_silces_id_txt_list: for item in top_k_silces_id_txt_list: item = item.replace("\n", "") item = item.replace("\r", "") slice_id = item.split('|||')[0] slice_id = slice_id + ".jpg" if "X" in slice_id: old_name = os.path.join(dataset_dir, subject_id, "XSlice", slice_id) elif("Y" in slice_id): old_name = os.path.join(dataset_dir, subject_id, "YSlice", slice_id) elif("Z" in slice_id): old_name = os.path.join(dataset_dir, subject_id, "ZSlice", slice_id) slice_name = subject_id + "_" + slice_id new_name = os.path.join(new_name_path, slice_name) # print(old_name) # print(new_name) hcq_write(log_path, True, True, new_name) shutil.copyfile(old_name, new_name) if __name__=="__main__": hcq_write(log_path, True, True, "="*40) ### pMCI_silce_txt = os.path.join(root_txt_path, "pMCI_gray_matter_Slices_path.txt") sMCI_silce_txt = os.path.join(root_txt_path, "sMCI_gray_matter_Slices_path.txt") print("pMCI_silce_txt = {}".format(pMCI_silce_txt)) print("sMCI_silce_txt = {}".format(sMCI_silce_txt)) dataset_pMCI = os.path.join(dataset_path, "pMCI_gray_matter_Slices") dataset_sMCI = os.path.join(dataset_path, "sMCI_gray_matter_Slices") partition_slice_train_val_test(pMCI_silce_txt, dataset_pMCI, "pMCI") partition_slice_train_val_test(sMCI_silce_txt, dataset_sMCI, "sMCI")
[email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$ ls
get_top_k_slices_MCI.py hcq_lib.pyc random_log sMCI_pMCI_GM_subject_id_20180405 top_k_slices.txt
hcq_lib.py pMCI_gray_matter_Slices_path.txt sMCI_gray_matter_Slices_path.txt sMCI_pMCI_GM_subject_id_20180405.zip
[email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$
pMCI_gray_matter_Slices_path.txt
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/136_S_0695/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/136_S_0695/ZSlice
[email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset/sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices$ tree -L 2
.
├── 002_S_0729
│ ├── XSlice
│ ├── YSlice
│ └── ZSlice
├── 002_S_0954
│ ├── XSlice
│ ├── YSlice
│ └── ZSlice
├── 002_S_1070
│ ├── XSlice
│ ├── YSlice
│ └── ZSlice
├── 003_S_1057
[email protected]:~/alzheimer_disease/ADNI_825/experiments_FineTunning/majority_select_slices_folder_01_pMCI_sMCI$ tree -L 2
.
├── test
│ ├── pMCI
│ └── sMCI
├── train
│ ├── pMCI
│ └── sMCI
└── validation
├── pMCI
└── sMCI
9 directories, 0 files