python-re模組(92)
阿新 • • 發佈:2018-12-12
>>> import re >>> m = re.match('f..','food') # 匹配到返回物件,匹配開頭 >>> print(re.match('f..','seafood')) #匹配不到返回None None >>> m.group() #返回匹配的值 'foo' >>> m = re.search('f..','seafood') #匹配沒有限制 >>> m.group() 'foo' >>> re.findall('f..','seafood is food') #返回所有匹配項組成的列表 ['foo', 'foo'] >>> result = re.finditer('f..','seafood is food') # 返回匹配物件組成的迭代器 >>> for m in result: # 從迭代器中逐個取出匹配物件 ... print(m.group()) ... foo foo >>> re.sub('f..','abc','fish is food') #相當於匹配後替換 'abch is abcd' >>> re.split('\.|-','hello-word-.tar.gz') #切割,用.和-做切割符號 ['hello', 'word', '', 'tar', 'gz'] >>> patt = re.compile('f..') # 先把要匹配的模式編譯,提升效率 >>> m = patt.search('seafood') # 指定在哪個字串中匹配 >>> m.group() 'foo' #匹配檔案中指定模式 import re def count_patt(fname,patt): cpatt = re.compile(patt) result = {} with open(fname) as fobj: for line in fobj: m = cpatt.search(line) #如果匹配不到,返回None if m: key = m.group() result[key] = result.get(key,0) + 1 #如果有值,返回值key,沒有返回0 return result if __name__ == '__main__': fname = 'access_log' # apache日誌檔案 ip = '^(\d+\.){3}\d+' # 日誌開頭的ip地址 print(count_patt(fname,ip)) br = 'Firefox|MSIE|Chrome' # 日誌中客戶端瀏覽器 print(count_patt(fname,br)) #模式匹配進階寫法 import re from collections import Counter # Counter物件是有序的,字典無序 class CountPatt: def __init__(self,fname): self.fname = fname def count_patt(self,patt): cpatt = re.compile(patt) result = Counter() with open(self,fname) as fobj: for line in fobj: m = cpatt.search(line) #如果匹配不到,返回None if m: result.update([m.group()]) return result if __name__ == '__main__': c = CounterPatt('access_log') ip = '^(\d+\.){3}\d+' br = 'Firefox|MSIE|Chrome' a = c.count_patt(ip) print(a) print(a.most_common(3)) print(c.count_patt(br))