Python轉碼&解壓&多程序
Python批量轉換檔案編碼格式
Eclipse中看ANSI編碼的檔案有亂碼,所以希望通過python將相關檔案轉換成utf-8編碼。
源:https://www.cnblogs.com/tsbc/p/4450675.html
''' 遍歷資料夾 如果檔名是.cpp .h 如果原來的編碼不是utf-8,將檔案編碼格式改成utf-8 '''
import os,sys import chardet
def convert( filename, out_enc="UTF8" ): try: fp = open(filename,'rb+') content = fp.read() coding = chardet.detect(content)['encoding'] #獲取encoding的值[編碼格式] if coding != 'utf-8': new_content = content.decode(coding,"ignore").encode(out_enc) fp.seek(0) fp.write(new_content) fp.close() except IOError: print( " error")
def explore(dir): #遍歷目錄 for root, dirs, files in os.walk(dir): for file in files: if '.cpp' in file or '.h' in file: path = os.path.join(root, file) convert(path)
fiePath = r'E:\Code'
def main(): explore(fiePath)
if __name__ == "__main__": main() |
Python解壓
https://www.cnblogs.com/Oliva/p/8824040.html 多執行緒字典破解加密zip
https://www.cnblogs.com/fyqq0403/p/9710420.html 解壓加密的zip
https://www.cnblogs.com/flyhigh1860/p/3884842.html 解壓zip
Python多執行緒&多程序
https://www.cnblogs.com/yeayee/p/4952022.html 基礎介紹
https://www.cnblogs.com/kellyseeme/p/5525017.html 鎖的應用
https://www.cnblogs.com/znicy/p/6234522.html 通過多程序的方式解決了解壓縮的效能問題
https://www.cnblogs.com/xybaby/p/6510941.html#undefined python效能優化,介紹了GIL導致多執行緒的問題
https://www.cnblogs.com/SuKiWX/p/8804974.html python GIL解釋
用python解壓多個壓縮檔案(環境中有6000個左右壓縮檔案)遇到瓶頸,解壓過程非常慢。嘗試用多執行緒解壓,處理時間不僅沒有減少,還增加了。後搜尋上述部落格後,用多程序解壓以縮短處理時間。
import zipfile import tarfile import gzip import os from time import ctime from multiprocessing import Pool from multiprocessing import cpu_count
dayZipsPath = r'.' quarterZipsPath = r'./tmp' zipPassWord = b'password' mrFilePath = r'./data'
def unzipDayFile(): for file_name in os.listdir(dayZipsPath): if os.path.splitext(file_name)[1] == '.zip': print( file_name) file_zip = zipfile.ZipFile(file_name, 'r') file_zip.extractall(path = quarterZipsPath, pwd = zipPassWord) file_zip.close() #os.remove(file_name)
def untarDayFile(): for file_name in os.listdir(dayZipsPath): if '.tar.gz' in file_name: print( file_name) file_tar = tarfile.open(file_name) file_tar.extractall(path = quarterZipsPath) file_tar.close() #os.remove(file_name)
def unzip(zipsList): for file_name in zipsList: if os.path.splitext(file_name)[1] == '.zip': zipFileName = quarterZipsPath +'/'+ file_name file_zip = zipfile.ZipFile(zipFileName, 'r') file_zip.extractall(path = mrFilePath, pwd = zipPassWord) file_zip.close() os.remove(zipFileName)
if __name__ == '__main__': print('Begin:%s' % ctime()) #獲取CPU核個數 cpuNum = cpu_count() print(cpuNum)
unzipDayFile() untarDayFile()
#多程序解壓,大大縮短處理時間 quarterZipsList = list(os.listdir(quarterZipsPath)) zipFileNum = len(quarterZipsList) print("total zip files num:%d" % (zipFileNum)) print("begin unzip:%s" % ctime()) p = Pool() for i in range(cpuNum): beginPos = int(i*zipFileNum/cpuNum) endPos = min(int((i+1)*zipFileNum/cpuNum),zipFileNum) print("proc %d - %d" % (beginPos, endPos)) p.apply_async(unzip,args=(quarterZipsList[beginPos:endPos],)) print("waiting for unzip quarter mr data ...") p.close() p.join() print("end unzip:%s" % ctime()) print( "End:%s" % ctime()) |