阿新 • • 發佈:2019-01-07
# -*- coding: utf-8 -*- # @Date : 2018-11-02 17:38:53 # @Author : Jimy_Fengqi ([email protected]) # @Link : https://blog.csdn.net/qiqiyingse # @Version : V1.0 ''' 將txt小說分割轉換成單個章節檔案 檔名字以章節命名 本文執行在python3上面, 處理小說的時候,需要將小說的格式以utf-8儲存 (處理以ANSI編碼格式的txt文字會出現錯誤) ''' import re import os import sys # txt book's path. novel_name='' #小說名字 source_path = os.getcwd()+'\\'+novel_name path_pieces = os.path.split(source_path) novel_title = re.sub(r'(\..*$)|($)', '', path_pieces[1]) target_path = '%s\\%s' % (path_pieces[0], novel_title)#小說分章目錄 section_re = re.compile(r'^\s*第.+章\s+.*$') # entry of the script def main(): # create the output folder if not os.path.exists(target_path): os.mkdir(target_path) # open the source file input = open(source_path, 'r',encoding='utf-8') sec_count = 0 sec_cache = [] title_cache=[] output = open('%s\\前言.txt' % (target_path), 'w',encoding='utf-8') preface_title = '%s 前言' % novel_title output.writelines(preface_title) for line in input: # is a chapter's title? #if line.strip() == '': #去掉空行 # pass if re.match(section_re, line): line = re.sub(r'\s+', ' ', line) print ('converting %s...' % line) output.writelines(sec_cache) output.flush() output.close() sec_cache = [] sec_count += 1 #chapter_name=re.sub('(~|!+|\(+|\)+|~+|\(+|\)+|(+|!+)','_',line) chapter_name=re.sub('(~+|\*+|\,+|\?+|\,+|\?+)','_',line)#章節名字當檔名字時,不能有特殊符號 # create a new section output = open('%s\\%s.txt' % (target_path, chapter_name), 'w',encoding='utf-8') output.writelines(line) title_cache.append(line+'\n') else: sec_cache.append(line) output.writelines(sec_cache) output.flush() output.close() sec_cache = [] # write the menu output = open('%s\\目錄.txt' % (target_path), 'w',encoding='utf-8') menu_head = '%s 目錄' % novel_title output.writelines(menu_head) output.writelines(title_cache) output.flush() output.close() inx_cache = [] print ('completed. %d chapter(s) in total.' % sec_count) if __name__ == '__main__': main()