1. 程式人生 > >【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs

【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs

ADNI Series

1、【ADNI】資料預處理(1)SPM,CAT12

2、【ADNI】資料預處理(2)獲取 subject slices

3、【ADNI】資料預處理(3)CNNs

4、【ADNI】資料預處理(4)Get top k slices according to CNNs

5、【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs

6、【ADNI】資料預處理(6)ADNI_slice_dataloader ||| show image


Author: Chaoqun Hou

where: hcq_research 

[ [email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$ ]

data: 20180407

 

get_top_k_slices_MCI.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime

import shutil
import random

from hcq_lib import *

train_percentage = 0.75
val_percentage = 0.2
test_percentage = 0.05


## AD/NC
# root_txt_path = "/home/hcq/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/AD_NC/AD_NC_GM_subject_id"
# dataset_path = os.path.join(root_txt_path, "AD_NC_GM_subject_id_20180403")

## pMCI/sMCI
root_txt_path = "/home/hcq/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset"
dataset_path = os.path.join(root_txt_path, "sMCI_pMCI_GM_subject_id_20180405")


top_k_silces_id_txt = os.path.join(root_txt_path, "top_k_slices.txt")
log_path = os.path.join(root_txt_path, "random_log", "random_log.txt")

root_new_path = "/home/hcq/alzheimer_disease/ADNI_825/experiments_FineTunning/"
dataset_name = "majority_select_slices_folder_01_pMCI_sMCI"

def partition_slice_train_val_test(silce_txt, dataset_dir, label):

	## rules ##
	## old_name = root_path + subject_id + top_k_slices_id
	## subject_id: random select
	## top_k_slices_id: majority select by CNNs


	## step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
	## added by hcq 20180404
	train_subject_id = []
	val_subject_id = []
	test_subject_id = []

	# get subject_id list
	subject_id_list = []
	with open(silce_txt, "r") as silce_txt_list:

		for item in silce_txt_list:
			item = item.replace("\n", "")
			item = item.replace("\r", "")
			# print(item)
			subject_id = item.split('/')[3]
			if(subject_id not in subject_id_list):
				subject_id_list.append(subject_id)

	num_train = 0
	num_val = 0
	num_test = 0

	len_slice_list = len(subject_id_list)
	rondom_list = random.sample(range(0, len_slice_list), len_slice_list)
	hcq_write(log_path, True, True, "rondom_list [{}]".format(label))
	hcq_write(log_path, False, False, rondom_list)

	for i in range(len_slice_list):
		random_id = rondom_list[i]
		if(num_train < int(len_slice_list*train_percentage)):
			# print("[Train] {}".format(subject_id_list[random_id]))
			train_subject_id.append(subject_id_list[random_id])
			num_train += 1
		elif(num_val < int(len_slice_list*val_percentage)):
			# print("[val] {}".format(subject_id_list[random_id]))
			val_subject_id.append(subject_id_list[random_id])
			num_val += 1
		else:
			# print("[test] {}".format(subject_id_list[random_id]))
			test_subject_id.append(subject_id_list[random_id])
			num_test += 1

	# print("[len_slice_list] {}".format(len_slice_list))
	# print("[num_train] {}".format(num_train))
	# print("[num_val] {}".format(num_val))
	# print("[num_test] {}".format(num_test))
	hcq_write(log_path, True, True, "[len_slice_list] {}".format(len_slice_list))
	hcq_write(log_path, True, True, "[num_train] {}".format(num_train))
	hcq_write(log_path, True, True, "[num_val] {}".format(num_val))
	hcq_write(log_path, True, True, "[num_test] {} \n".format(num_test))


	### step2: according to top_k_silces_id_txt, majority select top k slices;
	### added by hcq 20180404

	move_slice(train_subject_id, dataset_dir, "train", label)
	move_slice(val_subject_id, dataset_dir, "validation", label)
	move_slice(test_subject_id, dataset_dir, "test", label)
	


def move_slice(subject_id_folder_list, dataset_dir, folder_name, label):

	new_name_path = os.path.join(root_new_path, dataset_name, folder_name, label)
	hcq_create_dir(new_name_path)

	for subject_id in subject_id_folder_list:

		with open(top_k_silces_id_txt, "r") as top_k_silces_id_txt_list:
			for item in top_k_silces_id_txt_list:
				item = item.replace("\n", "")
				item = item.replace("\r", "")
				slice_id = item.split('|||')[0]

				slice_id = slice_id + ".jpg"
				if "X" in slice_id:
					old_name = os.path.join(dataset_dir, subject_id, "XSlice", slice_id)
				elif("Y" in slice_id):
					old_name = os.path.join(dataset_dir, subject_id, "YSlice", slice_id)
				elif("Z" in slice_id):
					old_name = os.path.join(dataset_dir, subject_id, "ZSlice", slice_id)

				
				slice_name = subject_id + "_" + slice_id
				new_name = os.path.join(new_name_path, slice_name)
				# print(old_name)
				# print(new_name)

				hcq_write(log_path, True, True, new_name)
				shutil.copyfile(old_name, new_name)



if __name__=="__main__":

	hcq_write(log_path, True, True, "="*40)

	### 
	pMCI_silce_txt = os.path.join(root_txt_path, "pMCI_gray_matter_Slices_path.txt")
	sMCI_silce_txt = os.path.join(root_txt_path, "sMCI_gray_matter_Slices_path.txt")
	
	print("pMCI_silce_txt = {}".format(pMCI_silce_txt))
	print("sMCI_silce_txt = {}".format(sMCI_silce_txt))

	dataset_pMCI = os.path.join(dataset_path, "pMCI_gray_matter_Slices")
	dataset_sMCI = os.path.join(dataset_path, "sMCI_gray_matter_Slices")

	partition_slice_train_val_test(pMCI_silce_txt, dataset_pMCI, "pMCI")
	partition_slice_train_val_test(sMCI_silce_txt, dataset_sMCI, "sMCI")
[email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$ ls
get_top_k_slices_MCI.py  hcq_lib.pyc                       random_log                        sMCI_pMCI_GM_subject_id_20180405      top_k_slices.txt
hcq_lib.py               pMCI_gray_matter_Slices_path.txt  sMCI_gray_matter_Slices_path.txt  sMCI_pMCI_GM_subject_id_20180405.zip
[email protected]
:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$

pMCI_gray_matter_Slices_path.txt

./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/136_S_0695/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/136_S_0695/ZSlice

 

[email protected]:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset/sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices$ tree -L 2
.
├── 002_S_0729
│   ├── XSlice
│   ├── YSlice
│   └── ZSlice
├── 002_S_0954
│   ├── XSlice
│   ├── YSlice
│   └── ZSlice
├── 002_S_1070
│   ├── XSlice
│   ├── YSlice
│   └── ZSlice
├── 003_S_1057

 

[email protected]:~/alzheimer_disease/ADNI_825/experiments_FineTunning/majority_select_slices_folder_01_pMCI_sMCI$ tree -L 2
.
├── test
│   ├── pMCI
│   └── sMCI
├── train
│   ├── pMCI
│   └── sMCI
└── validation
    ├── pMCI
    └── sMCI

9 directories, 0 files