帶batch_size的迭代器讀取檔案,解決記憶體不足的大資料處理問題!!!!!!!!!!完美解決
阿新 • • 發佈:2020-07-24
https://github.com/zhangbo2008/perfect_batch_generator_for_pyton
核心程式碼如下:
def bylineread(fimename,batchsize=1): batchsize=batchsize with open(fimename) as f: cnt=0 out=[] line = f.readline() while line: out.append(line) cnt+=1 ifcnt==batchsize: yield out out=[] cnt=0 line = f.readline() yield out # 用來強制返回最後不成batch的資料. #read是一個生成器物件 read = bylineread('1',batchsize=2) while 1: try: print(next(read)) except: print('over') break
def bylineread(fimename,batchsize=1): | |
batchsize=batchsize | |
with open(fimename) as f: | |
cnt=0 | |
out=[] | |
line = f.readline() | |
while line: | |
out.append(line) | |
cnt+=1 | |
if cnt==batchsize: | |
yield |
|
out=[] | |
cnt=0 | |
line = f.readline() | |
yield out # 用來強制返回最後不成batch的資料. | |
#read是一個生成器物件 | |
read = bylineread('1',batchsize=2) | |
while 1: | |
try: | |
print(next(read)) | |
except: | |
print('over') | |
break |