環球新聞資料拆分單篇不易命名,建立索引目錄
阿新 • • 發佈:2020-12-26
import os
flist = os.listdir('./original')
count = 1
index = open('index.txt', 'w', encoding='utf-8')
for filename in flist:
print(filename)
with open('./original/'+filename,'r', encoding='utf-8') as news:
while True:
line = news.readline().strip( )
if line == '':
break
content = line.split('^')[1]
title = line.split('^')[2]
index.write(str(count)+'\t\t'+title+'\n')
with open(
'./single/single'+str(count)+'.txt','w',
encoding= 'utf-8') as newssplit:
newssplit.write(content+'\n')
count += 1