1. 程式人生 > 其它 >python read pdf and display in console

python read pdf and display in console

# coding=utf-8
import curses
#pip install pdfminer.six

from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO

def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = open(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()


for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)


text = retstr.getvalue()

fp.close()
device.close()
retstr.close()
return text

def main(lines):
# get the curses screen window
screen = curses.initscr()

# turn off input echoing
curses.noecho()

# respond to keys immediately (don't wait for enter)
curses.cbreak()

# map arrow keys to special values
screen.keypad(True)
start = 0
end = len(lines)
screen.addstr(0, 0, 'ready')
try:
while True:
char = screen.getch()
if char == ord('q'):
break
elif char == curses.KEY_RIGHT:
start = start + 10
if (start > end):
start = end -1
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
elif char == curses.KEY_LEFT:
start = start - 10
if (start < 0):
start = 0
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
elif char == curses.KEY_UP:
start = start - 1
if (start < 0):
start = 0
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
elif char == curses.KEY_DOWN:
start = start + 1
if (start > end):
start = end -1
screen.clear()
screen.addstr(0, 0, lines[start])
screen.refresh()
finally:
# shut down cleanly
curses.nocbreak(); screen.keypad(0); curses.echo()
curses.endwin()

if __name__ == "__main__":
#testPdf()
s = convert_pdf_to_txt('c:/test/d_to_m.pdf')
lines = s.split("\n")
print(len(lines))
for line in lines:
if (len(line.strip()) == 0):
lines.remove(line)

print(len(lines))
main(lines)