1. 程式人生 > >python實現pdf格式轉換圖片格式

python實現pdf格式轉換圖片格式

使用python程式碼實現pdf轉換圖片格式

核心程式碼:

import io
from wand.image import Image
from wand.color import Color
from PyPDF2 import PdfFileReader, PdfFileWriter

memo = {}

def getPdfReader(filename):
	reader = memo.get(filename, None)

	if reader is None:

		reader = PdfFileReader(filename, strict=False)

		memo[filename] = reader

	return reader

def _run_convert(filename, page, res=120):

	idx = page + 1

	pdfile = getPdfReader(filename)

	pageObj = pdfile.getPage(page)

	dst_pdf = PdfFileWriter()

	dst_pdf.addPage(pageObj)

	pdf_bytes = io.BytesIO()

	dst_pdf.write(pdf_bytes)

	pdf_bytes.seek(0)

	img = Image(file=pdf_bytes, resolution=res)

	img.format = 'png'

	img.compression_quality = 90

	img.background_color = Color("white")

	img_path = '%s%d.png' % (filename[:filename.rindex('.')], idx)

	img.save(filename=img_path)
	print(img_path)
	img.destroy()

實現功能程式碼:

#coding:utf-8
import io
import os
import glob
from wand.image import Image
from wand.color import Color
from PyPDF2 import PdfFileReader, PdfFileWriter

memo = {}

def getPdfReader(filename):
	reader = memo.get(filename, None)
	if reader is None:
		reader = PdfFileReader(filename, strict=False)
		memo[filename] = reader
	return reader

def _run_convert(pdfile, savedfilename, page_index, index, res=120):
	pageObj = pdfile.getPage(page_index)#獲取pdf的第page_index頁
	dst_pdf = PdfFileWriter()
	dst_pdf.addPage(pageObj)
	pdf_bytes = io.BytesIO()
	dst_pdf.write(pdf_bytes)
	pdf_bytes.seek(0)
	img = Image(file=pdf_bytes, resolution=res)

	img.format = 'png'

	img.compression_quality = 90
	img.background_color = Color("white")

	img_path = '%s%04d.jpg' % (savedfilename, index)
	img.save(filename=img_path)
	print(img_path)
	img.destroy()

def dealPerPdf(path, file, index):
	savedfilename = path.split('/')[-1].split('-')[0] + '_'
	savedfilename = path + '/2_' + savedfilename#要儲存的圖片檔名

	new_path = os.path.join(path, file)
	pdfile = getPdfReader(new_path)  # 開啟pdf檔案控制代碼
	page_nums = pdfile.getNumPages()  # 獲取pdf總頁數

	for page_index in range(page_nums):
		# print(index)
		_run_convert(pdfile, savedfilename, page_index, index)
		index = index + 1
	return index

def getAllfiles(path):
	files = os.listdir(path)
	files.sort()
	index = 0
	for file in files:
		new_path = path + '/' + file;
		if os.path.isdir(new_path):
			getAllfiles(new_path)
		elif os.path.isfile(new_path):
			is_pdf = file.split('.')[-1]
			if is_pdf != 'pdf':
				continue
			index = dealPerPdf(path, file, index)
			index = index+1

def DealBatchPdf(path):
	getAllfiles(path)


if __name__ == '__main__':
	# path = os.getcwd()
	path = '/data/tfsong2/dangan'
	is_batch_deal = True
	if is_batch_deal:
		DealBatchPdf(path)
	else:
		filename = '001.pdf' #要處理的pdf檔名
		dealPerPdf(path, filename, 0)

即可實現pdf轉圖片,就不貼實際操作結果了!