python3基礎教程 專案3:萬能的XML
阿新 • • 發佈:2019-02-05
模組介紹:
在python中使用sax方式處理xml要先引入xml.sax中的parse函式,還有xml.sax.handler中的ContentHandler。
parse函式:用於解析xml檔案
幾個注意點:
getattr()函式:用於返回一個物件屬性值。
callable() 函式:用於檢查一個物件是否是可呼叫的。
os.join()函式:使用正確的分隔符(‘/’)將多條路徑合二為一。
os.makedirs()函式:在指定的路徑中建立必要的目錄。
以下是程式碼及註釋:
from xml.sax.handler import ContentHandler from xml.sax import parse import os class Dispatcher: #分派器類,該類負責為指定的需處理的事件查詢與其對應的處理程式 def dispatch(self, prefix, name, attrs=None): #負責查詢合適的處理程式、建立引數元素並使用這些引數呼叫處理程式 mname = prefix + name.capitalize() dname = 'default' + prefix.capitalize() method = getattr(self, mname, None) if callable(method): args = () else: method = getattr(self, dname, None) args = name, if prefix == 'start':args += attrs, if callable(method): method(*args) #以下兩條為基本的事件處理程式,它們只是呼叫方法dispatch def startElement(self, name, attrs): self.dispatch('start', name, attrs) def endElement(self, name): self.dispatch('end', name) class WebsiteConstructor(Dispatcher, ContentHandler): passthrough = False #利用passthrough確定當前是否在某一元素(xml文字塊)內 def __init__(self, directory): self.directory = [directory] self.ensureDirectory() def ensureDirectory(self): path = os.path.join(*self.directory) os.makedirs(path, exist_ok = True) #在指定的路徑中建立必要的目錄 def characters(self, chars): #遇到字串自動呼叫 if self.passthrough: self.out.write(chars) def defaultStart(self, name, attrs): #處理除了標題和檔案頭以外的xml塊 if self.passthrough: self.out.write('<' + name) for key,val in attrs.items(): self.out.write(' {}="{}"'.format(key,val)) self.out.write('>') def defaultEnd(self, name): if self.passthrough: self.out.write('</{}>'.format(name)) def startDirectory(self, attrs): self.directory.append(attrs['name']) self.ensureDirectory() def endDirectory(self): self.directory.pop() def startPage(self, attrs): filename = os.path.join(*self.directory + [attrs['name'] + '.html']) self.out = open(filename, 'w') self.writeHeader(attrs['title']) self.passthrough = True def endPage(self): self.passthrough = False self.writeFooter() self.out.close() def writeHeader(self, title): #將首部寫入檔案 self.out.write('<html>\n <head>\n <title>') self.out.write(title) self.out.write('</title>\n </head>\n <body>\n') def writeFooter(self): #將尾部寫入檔案 self.out.write('\n </body>\n</html>\n') parse('website.xml',WebsiteConstructor('public_html'))
website.xml:
<website> <page name = "index" title = "Home Page"> <h1> Welcome to My Home Page</h1> <p>Hi, there. My name is Mr.Gumby, and this is my home page. Here are some of my interests </p> <ul> <li><a href = "interests/shouting.html">Shouting</a></li> <li><a href = "interests/sleeping.html">Sleeping</a></li> <li><a href = "interests/eating.html">Eating</a></li> </ul> </page> <directory name="interests"> <page name="shouting" title="Shoutin"> <h1>Mr.Gumby's Shouting Page</h1> <p>...</p> </page> <page name="sleeping" title="Sleeping"> <h1>Mr.Gumby's Sleeping Page</h1> <p>...</p> </page> <page name="eating" title="Eating"> <h1>Mr.Gumby's Eating Page</h1> <p>...</p> </page> </directory> </website>