程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
您现在的位置: 程式師世界 >> 編程語言 >  >> 更多編程語言 >> Python

Png script for PDF conversion in Python

編輯:Python

function :pdf transformation png

#coding:utf-8
'''
function :pdf Convert file to picture
author : Song Tengfei
'''
import io
import os
import glob
from wand.image import Image
from wand.color import Color
from PyPDF2 import PdfFileReader, PdfFileWriter
memo = {}
def getPdfReader(filename):
reader = memo.get(filename, None)
if reader is None:
reader = PdfFileReader(filename, strict=False)
memo[filename] = reader
return reader
def _run_convert(pdfile, savedfilename, page_index, index, res=120):
pageObj = pdfile.getPage(page_index)# obtain pdf Of the page_index page
dst_pdf = PdfFileWriter()
dst_pdf.addPage(pageObj)
pdf_bytes = io.BytesIO()
dst_pdf.write(pdf_bytes)
pdf_bytes.seek(0)
img = Image(file=pdf_bytes, resolution=res)
img.format = 'png'
img.compression_quality = 90
img.background_color = Color("white")
img_path = '%s%04d.jpg' % (savedfilename, index)
img.save(filename=img_path)
print(img_path)
img.destroy()
def dealPerPdf(path, file, index):
savedfilename = path.split('/')[-1].split('-')[0] + '_'
savedfilename = path + '/2_' + savedfilename# Name of the picture file to save
new_path = os.path.join(path, file)
pdfile = getPdfReader(new_path) # open pdf File handle
page_nums = pdfile.getNumPages() # obtain pdf Total number of pages
for page_index in range(page_nums):
# print(index)
_run_convert(pdfile, savedfilename, page_index, index)
index = index + 1
return index
def getAllfiles(path):
files = os.listdir(path)
files.sort()
index = 0
for file in files:
new_path = path + '/' + file;
if os.path.isdir(new_path):
getAllfiles(new_path)
elif os.path.isfile(new_path):
is_pdf = file.split('.')[-1]
if is_pdf != 'pdf':
continue
index = dealPerPdf(path, file, index)
index = index+1
def DealBatchPdf(path):
getAllfiles(path)
if __name__ == '__main__':
# path = os.getcwd()
path = './'
is_batch_deal = False
if is_batch_deal:
DealBatchPdf(path)
else:
filename = 'AFCZ10893963ZY.pdf' # What to deal with pdf file name
dealPerPdf(path, filename, 0)
#
def test():
pdf_file_path = 'dangan'
pdf_format = '*.pdf'
pdf_files_list = glob.glob( pdf_format )
print(len(pdf_files_list))
savedfilename = '2_001_'
index = int(0)
for num_files in range(len(pdf_files_list)):
filename = pdf_files_list[num_files].split('.')[:-1][0].split('-')[0]
print(''.join(filename))
filename = filename[0]
# if not os.path.exists(filename):
# os.makedirs(filename)
pdfile = getPdfReader(pdf_files_list[num_files])# open pdf File handle
page_nums = pdfile.getNumPages()# obtain pdf Total number of pages
for page_index in range(page_nums):
# print(index)
_run_convert(pdfile, savedfilename, page_index, index)
index = index + 1


  1. 上一篇文章:
  2. 下一篇文章:
Copyright © 程式師世界 All Rights Reserved