python3迭代器和生成器
阿新 • • 發佈:2018-12-29
1、手動訪問迭代器中的元素
#要手動訪問迭代器中的元素,可以使用next()函式 In [3]: with open('/etc/passwd') as f: ...: try: ...: while True: ...: print(next(f)) #next()函式訪問迭代中的函式 ...: except StopIteration: #捕獲結束異常 ...: print('None') #通過指定返回結束值來判斷迭代結束 In [28]: with open('/etc/passwd') as f: ...: while True: ...: line = next(f,None) ...: if line is None: ...: break ...: print(line)
2、委託跌打
class node(object): def __init__(self,value): self._value = value self._children = []def __repr__(self): return 'Node({!r})'.format(self._value) def add_child(self,node): self._children.append(node) def __iter__(self): return iter(self._children) if __name__ == '__main__': root = node(0) child1 = node(1) child2 = node(2) root.add_child(child1) root.add_child(child2)for i in root: print(i) # Node(1) Node(2)
3、用生成器建立新的迭代模式
def frange(start,stop,setup): #建立新的迭代函式 while start < stop: yield start #通過yield轉換成一個生成器 start += setup #可根據要求生成迭代物件元素 for i in frange(0,10,0.8): print(i)
4、實現迭代協議
class Node: def __init__(self,value): self._value = value self._children = [] def __repr__(self): return 'Node({!r})'.format(self._value) def add_child(self,node): self._children.append(node) def __iter__(self): return iter(self._children) def depth_first(self): #首先輸出自己然後輸出自己的子節點 yield self for i in self: yield from i.depth_first() if __name__ == '__main__': root = Node(123) child1 = Node(10) child2 = Node(20) root.add_child(child1) root.add_child(child2) child1.add_child(Node(111)) child1.add_child(Node(222)) child2.add_child(Node(333)) for i in root.depth_first(): print(i) # Node(123) Node(10) Node(111) Node(222) Node(20) Node(333)
5、反向迭代
In [29]: a = [1,2,3,4] In [30]: for i in reversed(a): #反向迭代 ...: print(i) ...: 4 3 2 1
6、對迭代器做切片操作
#要對迭代器和生成器做資料切換處理,可以使用itertools.islice()函式 In [31]: def count(n): ...: while True: ...: yield n ...: n += 1 ...: In [33]: c = count(0) #生成器物件時不可切片操作的 In [34]: c[10:20] --------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-34-d27b6259daf3> in <module>() ----> 1 c[10:20] TypeError: 'generator' object is not subscriptable #只有使用itertools.islice()函式才可切片 In [35]: import itertools In [36]: for i in itertools.islice(c,10,20): ...: print(i,end=' ') ...: 10 11 12 13 14 15 16 17 18 19 #迭代器和生成器是沒法執行普通的切片操作的,這是因為不知道它們的長度和索引,而islice()產生的結果是一個迭代器,它可以產生出所需要切片元素,它是通過訪問並丟棄所有起始索引之前的元素來實現的,
#之後的元素會由islice物件來產生直到結束索引為止。並且islice()會消耗掉提供的迭代器中的資料,它只能訪問一次的
7、跳過可迭代物件中的前一部分元素
#itertools模組中有一個函式dropwhile()它回迭代丟棄需要過濾的元素,但只丟棄開頭的過濾元素 #cat test.txt #aaa #bbb ccc ddd #eee from itertools import dropwhile #使用dropwhile函式過濾開始元素,startswith函式指定判斷元素的值 with open('test.txt') as f: for i in dropwhile(lambda x:x.startswith('#'),f): print(i,end='') #輸出結構只判斷開始行中首字母為#好的行將過濾掉 ccc ddd #eee #也可用使用itertools模組中的islice函式來指定跳過多少個元素 from itertools import islice items = ['a','b','c',1,2,3] for x in islice(items,4,None): print(x) #輸出將跳過前面的4個元素,只輸出後面的2,3 #如果需要跳過所有需要顧慮的元素,只需要指定判定規則值即可 with open('test.txt') as f: lines = (line for line in f if not line.startswith('#')) for line in lines: print(line,end='') #輸出就只有不以#號開頭的行了 ccc ddd
8、迭代所有可能的組合或排列
#itertools模組中提供了3個函式來解決所有元素的重排列的可能情況,先來看第一個itertools.permutations()使用方法: In [6]: from itertools import permutations In [7]: items = ['a','b','c'] In [8]: for i in permutations(items): ...: print(i) ...: ('a', 'b', 'c') ('a', 'c', 'b') ('b', 'a', 'c') ('b', 'c', 'a') ('c', 'a', 'b') ('c', 'b', 'a') #如果要限定排序的長度可用指定長度引數 In [9]: for i in permutations(items,2): ...: print(i) .... ('c', 'a') ('c', 'b') #使用第二個函式itertools.combinations()它將輸出序列中所有元素的全部組合形式,但元素之間是不考慮順序的,比如(a,b)和(b,a)是一種組合形式將只出現一次 In [12]: for j in combinations(range(4),3): ...: print(j) ...: (0, 1, 2) (0, 1, 3) (0, 2, 3) (1, 2, 3) In [13]: for j in combinations(range(4),4): ...: print(j) ...: (0, 1, 2, 3) #第三個函式itertools.combinations_with_replacement()它允許一個元素可被選擇多次進行排序 In [14]: from itertools import combinations_with_replacement In [15]: for i in combinations_with_replacement(range(1,3),3): ...: print(i) ...: (1, 1, 1) (1, 1, 2) (1, 2, 2) (2, 2, 2)
9、以索引-值對的形式迭代序列
#內建的enumerate()函式可用輸出元素序列,可用指定序列的開始值 In [20]: for k,v in enumerate(my_list,1): ...: print(k,'==',v) ...: 1 == a 2 == b 3 == c #列印巢狀陣列 In [21]: list1 = [(1,2),(11,22),(33,44)] In [22]: for n,(k,v) in enumerate(list1,1): ...: print(n,'<===>',k,v) ...: 1 <===> 1 2 2 <===> 11 22 3 <===> 33 44
10、同時迭代多個序列
#同時迭代多個序列可用使用zip()函式,它將迭代物件產生出一個元組,整個迭代的長度取其中最短的序列長度 In [23]: list1 = [1,2,3,4,5] In [24]: list2 = ['a','b','c','d'] In [25]: list3 = ['jar','py','jc'] In [26]: for x,y,z in zip(list1,list2,list3): ...: print(x,y,z) ...: 1 a jar 2 b py 3 c jc #如果需要輸出完整的序列可以使用itertools模組中的zip_longest函式 In [27]: from itertools import zip_longest In [28]: for i in zip_longest(list1,list2,list3): ...: print(i) ...: (1, 'a', 'jar') (2, 'b', 'py') (3, 'c', 'jc') (4, 'd', None) (5, None, None) #zip()通常用於處理資料配對的,如將以字典形式配對名稱和值 In [29]: heard = ['name','age','salely'] In [30]: values = ['zhangsan',30,99999] In [31]: s = dict(zip(heard,values)) In [32]: s Out[32]: {'name': 'zhangsan', 'age': 30, 'salely': 99999}
11、在不同的容器中進行迭代
#itertools.chain()方法可以在多個容器中迭代物件 In [33]: from itertools import chain In [34]: a = [1,2,3] In [35]: b = ['a','b','c'] In [36]: for i in chain(a,b):print(i) 1 2 3 a b c
12、建立處理資料的管道
import os,fnmatch,gzip,bz2,re def gen_find(filepat,top): for path,dirlist,filelist in os.walk(top): for name in fnmatch.filter(filelist,filepat): yield os.path.join(path,name) def gen_opener(filenames): for filename in filenames: if filename.endswith('.gz'): f = gzip.open(filename,'rt') elif filename.endswith('.bz2'): f = bz2.open(filename,'rt') else: f = open(filename,'rt') yield f f.close() def gen_concatenate(iterators): for it in iterators: yield from it def gen_grep(pattern,lines): pat = re.compile(pattern) for line in lines: if pat.search(line): yield line if __name__ == '__main__': file_name = input('please in to file:') directory_name = input('pease in to directory:') lognames = gen_find(file_name,directory_name) files = gen_opener(lognames) lines = gen_concatenate(files) pylines = gen_grep('(?i)python',lines) for line in pylines: print(line)
13、扁平化處理巢狀型的序列
#yield from可以將迭代物件中子元素進行遞迴,將它們所有的值產生出來,得到的結果就是一個沒有巢狀的單值序列 from collections import Iterable def flatten(items,ignore_types=(str,bytes)): for i in items: if isinstance(i,Iterable) and not isinstance(i,ignore_types): yield from flatten(i) else: yield(i) items = [1,2,[3,4,[5,6],7],8] for i in flatten(items): print(i,end=' ')
14、合併多個有序序列,再對整個有序序列進行迭代
#對多個有序序列先進行合併再進行排序,可以使用heapq.merge()函式,它不會將所有的資料讀取到堆中,也不會做排序操作,
#它只是簡單的檢查每個輸入序列中的第一個元素,將最小的傳送出去,然後再重複執行這一步操作,直到序列耗盡為止 In [8]: import heapq In [9]: a = [12,3,4,9] In [10]: b = [5,32,15,1] In [11]: c = heapq.merge(a,b) In [12]: c Out[12]: <generator object merge at 0x7f781eb934f8> In [13]: d = list(c) In [14]: d Out[14]: [5, 12, 3, 4, 9, 32, 15, 1]
15、用迭代器取代while迴圈
#我們在處理I/O通訊時經常會碰到接收資料時判斷是否接收完的程式碼 def readder(s): while True: data = s.recv(8192) if data == b'': break #上面的程式碼我們可以使用iter()函式來代替,它可以選擇性的接收一個無引數的可呼叫物件和一個結束值作為輸入,iter()會建立一個迭代器然後重複呼叫使用者提供的可呼叫物件,知道它返回結束值 import sys def reader(s): for i in iter(lambda:s.recv(8192),b''): data = sys.stdout.write(i) print(data)