1. 程式人生 > 實用技巧 >過濾xml檔案內容

過濾xml檔案內容

  在指定的目錄下獲取所有.xml檔案,通過正則表示式過濾掉不需要的內容,然後把保留的內容寫到指定目錄下的txt檔案中。

import codecs
import xml.etree.ElementTree as ET
import sys,re
import csv
import os


def file_name(file_dir):
    for root, dirs, files in os.walk(file_dir):
        pass
    return files

file_dir = 'D:/untitled/test/fcc'
list_name = []
ll = file_name(file_dir)
ll = str(ll)
ld = ll.replace('.xml','')
list = eval(ld)
print(list)

for i in range(len(list)):
    xml_01 = "./fcc/{}.xml".format(list[i])
    csv_01 = "./csv/{}.txt".format(list[i])
    xmlfile = codecs.open(xml_01, 'r', 'utf-8')
    txtfile = open(csv_01,'a+',encoding='utf-8',newline='')
    #csv_writer = csv.writer(csvfile)
    line = xmlfile.readline()


    while line:
        result = re.search('<ANNOTATION_VALUE>', line)
        #print(result)
        if result is not None:
            bs = re.sub('<.*?>', "", line)
            #print(bs)
            bs = bs.strip()
            print("成功")
            txtline = bs + '\n'
            #bs = bs.decode('utf-8')
            txtfile.write(txtline)

        line = xmlfile.readline()

    xmlfile.close()
    txtfile.close()