Androguard的部分原始碼(一)——androaxml.py
廢話少說,上程式碼。
option_0 = { 'name' : ('-i', '--input'), 'help' : 'filename input (APK or android\'s binary xml)', 'nargs' : 1 } option_1 = { 'name' : ('-o', '--output'), 'help' : 'filename output of the xml', 'nargs' : 1 } option_2 = { 'name' : ('-v', '--version'), 'help' : 'version of the API', 'action' : 'count' } options = [option_0, option_1, option_2] def main(options, arguments): if options.input != None: buff = "" ret_type = androconf.is_android(options.input) #讀取檔案頭判斷檔案型別 if ret_type == "APK": a = apk.APK(options.input) buff = a.get_android_manifest_xml().toprettyxml(encoding="utf-8") elif ".xml" in options.input: ap = apk.AXMLPrinter(read(options.input)) buff = minidom.parseString(ap.get_buff()).toprettyxml(encoding="utf-8") else: print "Unknown file type" return if options.output != None: #建立輸出檔案 fd = codecs.open(options.output, "w", "utf-8") fd.write( buff ) fd.close() else: #否則輸出到螢幕 print buff elif options.version != None: print "Androaxml version %s" % androconf.ANDROGUARD_VERSION if __name__ == "__main__": parser = OptionParser() for option in options: param = option['name'] del option['name'] parser.add_option(*param, **option) options, arguments = parser.parse_args() sys.argv[:] = arguments main(options, arguments)
這是androaxml.py的全部原始碼。幾個內容
第一,引數。一個input,可以是apk,或者AndroidManfest.xml。一個output,這是指定的輸出檔名,如果不指定輸出檔名,則輸出到螢幕。
第二,如果為apk,則使用APK()解析
def get_android_manifest_xml(self): """ Return the xml object which corresponds to the AndroidManifest.xml file :rtype: object """ try: return self.xml["AndroidManifest.xml"] except KeyError: return None
如果是AndroidManfest.xml,則使用AXMLPrinter
而在APK.__init__函式中有這樣一段
if zipmodule == 0: self.zip = ChilkatZip(self.__raw) elif zipmodule == 2: from androguard.patch import zipfile self.zip = zipfile.ZipFile(StringIO.StringIO(self.__raw), mode=mode) else: import zipfile self.zip = zipfile.ZipFile(StringIO.StringIO(self.__raw), mode=mode) for i in self.zip.namelist(): if i == "AndroidManifest.xml": self.axml[i] = AXMLPrinter(self.zip.read(i)) try: self.xml[i] = minidom.parseString(self.axml[i].get_buff()) except: self.xml[i] = None
對apk檔案利用ChilkatZip或者ZipFile進行解壓,然後從解壓後的檔案列表當中遍歷獲取AndroidManfest.xml,再對AndroidManfest.xml
呼叫AXMLPrinter,所以核心的處理在AXMLPrinter當中。
AXMLPrinter則是用AXMLParser對檔案進行解析。
所以處理流程就清晰了
APK: 生成APK class例項 ——> 解壓檔案 ——> 遍歷獲取AndroidManfest.xml ——> AXMLPrinter例項 ——> AXMLParser例項解析
XML: AXMLPrinter例項 ——> AXMLParser例項解析
class AXMLParser(object):
def __init__(self, raw_buff):
self.reset()
self.valid_axml = True
self.buff = bytecode.BuffHandle(raw_buff)
axml_file = unpack('<L', self.buff.read(4))[0] #讀取檔案頭
if axml_file == CHUNK_AXML_FILE: #判斷檔案頭
self.buff.read(4)
self.sb = StringBlock(self.buff) #字串池
self.m_resourceIDs = []
self.m_prefixuri = {}
self.m_uriprefix = {}
self.m_prefixuriL = []
self.visited_ns = []
else:
self.valid_axml = False
androconf.warning("Not a valid xml file")
AXMLParser.buff結構
self.__buff儲存內容
self.__idx儲存已解析的長度,也就是下次解析的起點
class AXMLPrinter(object):
def __init__(self, raw_buff):
self.axml = AXMLParser(raw_buff) #例項化AXMLParser
self.xmlns = False
self.buff = u''
#主處理邏輯
while True and self.axml.is_valid():
_type = self.axml.next()
# print "tagtype = ", _type
if _type == START_DOCUMENT:
self.buff += u'<?xml version="1.0" encoding="utf-8"?>\n'
elif _type == START_TAG:
self.buff += u'<' + self.getPrefix(self.axml.getPrefix()) + self.axml.getName() + u'\n'
self.buff += self.axml.getXMLNS()
for i in range(0, self.axml.getAttributeCount()):
self.buff += "%s%s=\"%s\"\n" % (self.getPrefix(
self.axml.getAttributePrefix(i)), self.axml.getAttributeName(i), self._escape(self.getAttributeValue(i)))
self.buff += u'>\n'
elif _type == END_TAG:
self.buff += "</%s%s>\n" % (self.getPrefix(self.axml.getPrefix()), self.axml.getName())
elif _type == TEXT:
self.buff += "%s\n" % self.axml.getText()
elif _type == END_DOCUMENT:
break
AXMLParser例項化完成後進入主處理邏輯
在前一篇文章反編譯編譯後的AndroidManifest 當中也有一段類似的處理邏輯,實現大同小異,都是讀取tag,判斷是什麼chunk,然後然後處理,可以對比一下。
def next(self):
self.doNext()
return self.m_event
next函式呼叫doNext
def doNext(self):
if self.m_event == END_DOCUMENT: #檔案結束
return
event = self.m_event
self.reset()
while True:
chunkType = -1
# Fake END_DOCUMENT event.
if event == END_TAG: #tag結束標誌
pass
# START_DOCUMENT
if event == START_DOCUMENT: #Start Tag Chunk
chunkType = CHUNK_XML_START_TAG
else:
if self.buff.end(): #檔案是否結束
self.m_event = END_DOCUMENT
break
chunkType = unpack('<L', self.buff.read(4))[0] #讀取後四位
if chunkType == CHUNK_RESOURCEIDS: #ResourceId Chunk
chunkSize = unpack('<L', self.buff.read(4))[0]
# FIXME
if chunkSize < 8 or chunkSize % 4 != 0: #長度是否合法
androconf.warning("Invalid chunk size")
for i in range(0, chunkSize / 4 - 2):
self.m_resourceIDs.append(unpack('<L', self.buff.read(4))[0])
continue
# FIXME
if chunkType < CHUNK_XML_FIRST or chunkType > CHUNK_XML_LAST: #無法識別的tag
androconf.warning("invalid chunk type")
# Fake START_DOCUMENT event.
if chunkType == CHUNK_XML_START_TAG and event == -1: #第一次讀到Start Tag Chunk時,event為-1
self.m_event = START_DOCUMENT #將event設定為START_DOCUMENT之後退出
break #返回到主處理邏輯
self.buff.read(4) # /*chunkSize*/
lineNumber = unpack('<L', self.buff.read(4))[0]
self.buff.read(4) # 0xFFFFFFFF
if chunkType == CHUNK_XML_START_NAMESPACE or chunkType == CHUNK_XML_END_NAMESPACE:
if chunkType == CHUNK_XML_START_NAMESPACE: #Start Namespace Chunk
prefix = unpack('<L', self.buff.read(4))[0]
uri = unpack('<L', self.buff.read(4))[0]
self.m_prefixuri[prefix] = uri
self.m_uriprefix[uri] = prefix
self.m_prefixuriL.append((prefix, uri))
self.ns = uri
else: #End Namespace Chunk
self.ns = -1
self.buff.read(4)
self.buff.read(4)
(prefix, uri) = self.m_prefixuriL.pop()
#del self.m_prefixuri[ prefix ]
#del self.m_uriprefix[ uri ]
continue
self.m_lineNumber = lineNumber
if chunkType == CHUNK_XML_START_TAG: #第二次讀取到Start Tag Chunk,此時event為START_DOCUMENT
self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
self.m_name = unpack('<L', self.buff.read(4))[0]
# FIXME
self.buff.read(4) # flags
attributeCount = unpack('<L', self.buff.read(4))[0]
self.m_idAttribute = (attributeCount >> 16) - 1
attributeCount = attributeCount & 0xFFFF
self.m_classAttribute = unpack('<L', self.buff.read(4))[0]
self.m_styleAttribute = (self.m_classAttribute >> 16) - 1
self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1
for i in range(0, attributeCount * ATTRIBUTE_LENGHT):
self.m_attributes.append(unpack('<L', self.buff.read(4))[0])
for i in range(ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), ATTRIBUTE_LENGHT):
self.m_attributes[i] = self.m_attributes[i] >> 24
self.m_event = START_TAG
break
if chunkType == CHUNK_XML_END_TAG:
self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
self.m_name = unpack('<L', self.buff.read(4))[0]
self.m_event = END_TAG
break
if chunkType == CHUNK_XML_TEXT:
self.m_name = unpack('<L', self.buff.read(4))[0]
# FIXME
self.buff.read(4)
self.buff.read(4)
self.m_event = TEXT
break
doNext函式很長。關注的重點在while迴圈中。當讀取到ResourceId Chunk和Namespace Chunk
則continue。而第一次讀到Start Tag Chunk的時候則會退出,返回到AXMLPrinter的主處理邏輯當中。
再看一下AXMLPrinter
while True and self.axml.is_valid():
_type = self.axml.next()
# print "tagtype = ", _type
if _type == START_DOCUMENT:
self.buff += u'<?xml version="1.0" encoding="utf-8"?>\n'
elif _type == START_TAG:
self.buff += u'<' + self.getPrefix(self.axml.getPrefix()) + self.axml.getName() + u'\n'
self.buff += self.axml.getXMLNS()
for i in range(0, self.axml.getAttributeCount()):
self.buff += "%s%s=\"%s\"\n" % (self.getPrefix(
self.axml.getAttributePrefix(i)), self.axml.getAttributeName(i), self._escape(self.getAttributeValue(i)))
self.buff += u'>\n'
elif _type == END_TAG:
self.buff += "</%s%s>\n" % (self.getPrefix(self.axml.getPrefix()), self.axml.getName())
elif _type == TEXT:
self.buff += "%s\n" % self.axml.getText()
elif _type == END_DOCUMENT:
break
self.buff是準備寫入解析後的xml檔案的字串。第一次執行next函式,成功讀取ResourceId Chunk和Namespace Chunk
之後遇到Start Tag Chunk,修改m_event之後退出。在buff字串寫入u'<?xml version="1.0" encoding="utf-8"?>\n'
然後繼續執行next函式,之後再遇到Start Tag Chunk時不會直接退出,而是執行相應的解析操作。
從理論上將字串池、ResourceId Chunk和Namespace Chunk都位於第一個Start Tag Chunk之前
而這些chunk也並不會直接出現在解析後的xml檔案中。
所以先將他們解析,放入準備好的變數容器。第一次遇到Start Tag Chunk說明之前的內容已經處理完畢了,之後就可以將
Start Tag Chunk解析後的結果寫入結果字串。
當結果字串構造完畢之後,輸出到檔案或者螢幕即可。