python解析並修改xml檔案
阿新 • • 發佈:2019-01-31
使用labelImg標註圖片後需要統一修改圖片label的名稱和圖片名,所以用python批量修改圖片名和xml檔案。
首先批量複製並修改圖片名稱:
# -*- coding: utf-8 -*-
# 將二級目錄下的圖片/標籤檔案重新命名到同一個新資料夾下
import os, shutil
def batchRenameFile(srcDirName, destDirName): # srcDirName 為原始檔夾的絕對路徑,真正儲存資料檔案的子資料夾都在該資料夾下;destDirName 為目標資料夾的絕對路徑
subDirNameList = os.listdir(srcDirName) # 獲取真正儲存資料檔案的資料夾序列
for subDirName in subDirNameList:
fileList = os.listdir(srcDirName+'/'+subDirName) # 此處須給出絕對路徑
i = 1
for file in fileList:
shutil.copy(srcDirName+'/'+subDirName+'/'+file, destDirName+'/1_'+subDirName+'_'+str(i)+'.jpg') # 此處須給出絕對路徑
print(destDirName+'/1_' +subDirName+'_'+str(i)+'.jpg')
i = i+1
接著從txt檔案中生成數字和標籤對應的字典:
def creatDic():
txtDict = {}
DirFile = 'E:\Cats&Dogs\CatList.txt'
dicFile = open(DirFile,'r')
while True:
line = dicFile.readline()
if '\xef\xbb\xbf' in line:
line = line.replace ('\xef\xbb\xbf', '')
if line == '':
break
key = line.split('\t')[0]
# print(key)
value = line.split('\t')[-1]
# print(value)
txtDict[key] = value # 加入字典
dicFile.close()
DirFile = 'E:\Cats&Dogs\DogList.txt'
dicFile = open(DirFile, 'r')
while True:
line = dicFile.readline()
if '\xef\xbb\xbf' in line:
line = line.replace('\xef\xbb\xbf', '')
if line == '':
break
key = line.split('\t')[0]
value = line.split('\t')[-1].split('\n')[0]
txtDict[str(int(key)+int(42))] = value # 加入字典
dicFile.close()
return txtDict
最後批量修改xml檔案中對圖片打的標籤名稱和與xml對應的圖片名稱:
def batchRenameFile1(DirName,txtDict): # DirName 為資料夾的絕對路徑
FileList = os.listdir(DirName)
for FileName in FileList:
FilePath = DirName+'\\'+FileName
print(FilePath)
doc = parse(FilePath)
root = doc.getroot()
sub1 = root.find("filename")
name = FileName.split(".")[0] + ".jpg"
sub1.text = name
sub2 = root.find("path")
sub2.text = "E:\myVOCdevkit\VOC2007\\images\\"+name
species = FileName.split("_")[0]
label = FileName.split("_")[1]
if species == "1": # cat
sub3 = root.find("folder")
sub3.text = txtDict[label]
for sub4 in root.findall("object"): ##找到root節點下的所有object節點,因為有不止一個名字叫object的節點
subsub=sub4.find('name')
subsub.text = txtDict[label]
if species == "2": # dog
sub3 = root.find("folder")
sub3.text = txtDict[str(int(label)+int(42))]
for sub4 in root.findall("object"):
subsub=sub4.find('name')
subsub.text = txtDict[str(int(label)+int(42))]
doc.write(FilePath)