1. 程式人生 > 實用技巧 >帶batch_size的迭代器讀取檔案,解決記憶體不足的大資料處理問題!!!!!!!!!!完美解決

帶batch_size的迭代器讀取檔案,解決記憶體不足的大資料處理問題!!!!!!!!!!完美解決

https://github.com/zhangbo2008/perfect_batch_generator_for_pyton

核心程式碼如下:

def bylineread(fimename,batchsize=1):
    batchsize=batchsize
    with open(fimename) as f:

        cnt=0
        out=[]
        line = f.readline()
        while line:

            out.append(line)
            cnt+=1
            if
cnt==batchsize: yield out out=[] cnt=0 line = f.readline() yield out # 用來強制返回最後不成batch的資料. #read是一個生成器物件 read = bylineread('1',batchsize=2) while 1: try: print(next(read)) except: print('over') break

def bylineread(fimename,batchsize=1):
batchsize=batchsize
with open(fimename) as f:
cnt=0
out=[]
line = f.readline()
while line:
out.append(line)
cnt+=1
if cnt==batchsize:
yield
out
out=[]
cnt=0
line = f.readline()
yield out # 用來強制返回最後不成batch的資料.
#read是一個生成器物件
read = bylineread('1',batchsize=2)
while 1:
try:
print(next(read))
except:
print('over')
break