過濾xml檔案內容
阿新 • • 發佈:2020-07-16
在指定的目錄下獲取所有.xml檔案,通過正則表示式過濾掉不需要的內容,然後把保留的內容寫到指定目錄下的txt檔案中。
import codecs import xml.etree.ElementTree as ET import sys,re import csv import os def file_name(file_dir): for root, dirs, files in os.walk(file_dir): pass return files file_dir = 'D:/untitled/test/fcc' list_name = [] ll = file_name(file_dir) ll = str(ll) ld = ll.replace('.xml','') list = eval(ld) print(list) for i in range(len(list)): xml_01 = "./fcc/{}.xml".format(list[i]) csv_01 = "./csv/{}.txt".format(list[i]) xmlfile = codecs.open(xml_01, 'r', 'utf-8') txtfile = open(csv_01,'a+',encoding='utf-8',newline='') #csv_writer = csv.writer(csvfile) line = xmlfile.readline() while line: result = re.search('<ANNOTATION_VALUE>', line) #print(result) if result is not None: bs = re.sub('<.*?>', "", line) #print(bs) bs = bs.strip() print("成功") txtline = bs + '\n' #bs = bs.decode('utf-8') txtfile.write(txtline) line = xmlfile.readline() xmlfile.close() txtfile.close()