將自己的dcm資料製作成LUNA16資料集提供資料樣式之程式碼整理
阿新 • • 發佈:2018-12-29
1.獲取mhd和raw
import cv2 import os import pydicom import numpy import SimpleITK # 路徑和列表宣告 rootpath="E:/DcmData/xlc/Fracture_data/Me/" PathDicom = "E:/DcmData/xlc/Fracture_data/Me/3004291153/3307885/" # 與python檔案同一個目錄下的資料夾,儲存dicom檔案 SaveRawDicom = "E:/DcmData/xlc/Fracture_data/mhd_raw/" # 與python檔案同一個目錄下的資料夾,用來儲存mhd檔案和raw檔案 def getSubPaths(dir): list = [] # 判斷路徑是否存在 if (os.path.exists(dir)): # 獲取該目錄下的所有檔案或資料夾目錄 files = os.listdir(dir) for file in files: # 得到該檔案下所有目錄的路徑 m = os.path.join(dir, file) # 判斷該路徑下是否是資料夾 if (os.path.isdir(m)): h = os.path.split(m) list.append(m) return list def get_mhd_raw(PathDicom,SaveRawDicom): lstFilesDCM = [] # for root, dirs, files in os.walk(PathDicom): # for name in files: # print(os.path.join(root, name)) # for name in dirs: # print(os.path.join(root, name)) # 將PathDicom資料夾下的dicom檔案地址讀取到lstFilesDCM中 for dirName, subdirList, fileList in os.walk(PathDicom): for filename in fileList: if ".dcm" in filename.lower(): # 判斷檔案是否為dicom檔案 #print(filename) lstFilesDCM.append(os.path.join(dirName, filename)) # 加入到列表中 # 第一步:將第一張圖片作為參考圖片,並認為所有圖片具有相同維度 RefDs = pydicom.read_file(lstFilesDCM[0]) # 讀取第一張dicom圖片 print(RefDs.SOPInstanceUID) # 第二步:得到dicom圖片所組成3D圖片的維度 ConstPixelDims = (int(RefDs.Rows), int(RefDs.Columns), len(lstFilesDCM)) # ConstPixelDims是一個元組 # 第三步:得到x方向和y方向的Spacing並得到z方向的層厚 ConstPixelSpacing = (float(RefDs.PixelSpacing[0]), float(RefDs.PixelSpacing[1]), float(RefDs.SliceThickness)) # 第四步:得到影象的原點 Origin = RefDs.ImagePositionPatient # 第五步:得到序列名稱用於命名 Seriesname=RefDs.SeriesInstanceUID # 根據維度建立一個numpy的三維陣列,並將元素型別設為:pixel_array.dtype ArrayDicom = numpy.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype) # array is a numpy array # 第五步:遍歷所有的dicom檔案,讀取影象資料,存放在numpy陣列中 i = 0 for filenameDCM in lstFilesDCM: ds = pydicom.read_file(filenameDCM) #print(ds.SOPInstanceUID) #print(lstFilesDCM.index(filenameDCM)) ArrayDicom[:, :, lstFilesDCM.index(filenameDCM)] = ds.pixel_array #cv2.imwrite("out_" + str(i) + ".png", ArrayDicom[:, :, lstFilesDCM.index(filenameDCM)]) i += 1 # 第六步:對numpy陣列進行轉置,即把座標軸(x,y,z)變換為(z,y,x),這樣是dicom儲存檔案的格式,即第一個維度為z軸便於圖片堆疊 ArrayDicom = numpy.transpose(ArrayDicom, (2, 0, 1)) # 第七步:將現在的numpy陣列通過SimpleITK轉化為mhd和raw檔案 sitk_img = SimpleITK.GetImageFromArray(ArrayDicom, isVector=False) sitk_img.SetSpacing(ConstPixelSpacing) sitk_img.SetOrigin(Origin) SimpleITK.WriteImage(sitk_img, os.path.join(SaveRawDicom, Seriesname+ ".mhd")) list_classes = getSubPaths(rootpath) for li in range(len(list_classes)): lc=getSubPaths(list_classes[li]) PathDicom=lc[0] get_mhd_raw(PathDicom,SaveRawDicom)
2.根據csv(這裡是dec檔案,這是解碼的鍋,在pandas中功能與csv一致)獲取轉換後的資料csv
import pandas as pd import os import pydicom #import csv import numpy as np #任意的多組列表 rootpath='E:/DcmData/xlc/Fracture_data/Me/' #PathDicom = 'E:/DcmData/xlc/Fracture_data/Me/3004276169/3302845/' #candidates = os.path.join(PathDicom,'RibFracture.dec') def getSubPaths(dir): list = [] # 判斷路徑是否存在 if (os.path.exists(dir)): # 獲取該目錄下的所有檔案或資料夾目錄 files = os.listdir(dir) for file in files: # 得到該檔案下所有目錄的路徑 m = os.path.join(dir, file) # 判斷該路徑下是否是資料夾 if (os.path.isdir(m)): h = os.path.split(m) list.append(m) return list def dcm_rename(dir): # 判斷路徑是否存在 if (os.path.exists(dir)): # 獲取該目錄下的所有檔案或資料夾目錄 files = os.listdir(dir) for file in files: # 得到該檔案下所有目錄的路徑 m = os.path.join(dir, file) #mp=os.path.splitext(file)[0] #獲取檔名字首,[-1]為字尾。 if ".dcm" in file.lower(): RefDs = pydicom.read_file(m) filename = RefDs.SOPInstanceUID os.rename(m, os.path.join(dir, filename + ".DCM")) def csv_ch(PathDicom,rootpath): seriesuid = [] coordX = [] coordY = [] coordZ = [] DX = [] DY = [] cl = [] candidates = os.path.join(PathDicom, 'RibFracture.dec') candidatesList = pd.read_csv(candidates) for i in range(len(candidatesList)): m = os.path.join(PathDicom, candidatesList.loc[i][5]+'.DCM') #print(m) if not os.path.isfile(m):#防止csv裡SOPInstanceUID找不到對應dcm,相當於這些標記無用 continue RefDs = pydicom.read_file(m) coordZ.append(RefDs.ImagePositionPatient[2]) seriesuid.append(RefDs.SeriesInstanceUID) deslist = np.array(['正常', '隱匿型', '無錯位', '有錯位', '有骨痂', '畸形癒合']) for j in range(6): if candidatesList.loc[i][6] == deslist[j]: cl.append(j) break X = candidatesList.loc[i][9].split(';') Y = candidatesList.loc[i][10].split(';') ax = [] ay = [] for xi in range(len(X)-1): ax.append(X[xi]) for yi in range(len(Y)-1): ay.append(Y[yi]) ax = list(map(float, ax)) ay = list(map(float, ay)) minx = np.min(ax)*RefDs.PixelSpacing[0]+RefDs.ImagePositionPatient[0] maxx = np.max(ax)*RefDs.PixelSpacing[0]+RefDs.ImagePositionPatient[0] miny = np.min(ay)*RefDs.PixelSpacing[1]+RefDs.ImagePositionPatient[1] maxy = np.max(ay)*RefDs.PixelSpacing[1]+RefDs.ImagePositionPatient[1] coordX.append(minx) coordY.append(miny) DX.append(maxx-minx) DY.append(maxy-miny) csv_name=RefDs.SeriesInstanceUID+'.csv' csv_name=os.path.join(rootpath,csv_name) #字典中的key值即為csv中列名(放一起它的順序很亂,只能一個一個往後面插入) dataframe = pd.DataFrame({'seriesuid':seriesuid}) dataframe['coordX'] = coordX dataframe['coordY'] = coordY dataframe['coordZ'] = coordZ dataframe['DistanceX_mm'] = DX dataframe['DistanceY_mm'] = DY dataframe['class'] = cl #將DataFrame儲存為csv,index表示是否顯示行名,default=True dataframe.to_csv(csv_name,index=False,sep=',') return csv_name # dcm_rename(PathDicom) # csv_ch(PathDicom) csv_path=os.path.join(rootpath,'candidates.csv') list_classes = getSubPaths(rootpath) for li in range(len(list_classes)): lc=getSubPaths(list_classes[li]) PathDicom=lc[0] #print(PathDicom) dcm_rename(PathDicom) csv_ch(PathDicom,rootpath)
3.將這些csv合併
import pandas as pd import os import glob csv_files = glob.glob('E:/DcmData/xlc/Fracture_data/Me/*.csv') df = df = pd.DataFrame(columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'DistanceX_mm','DistanceY_mm','class']) for csv in csv_files: df = pd.merge(df,pd.read_csv(csv),how='outer') os.remove(csv) df_to_save = pd.DataFrame(df,columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'DistanceX_mm','DistanceY_mm','class']) df_to_save.to_csv('annotations.csv',index=False)