A fast way to create thumbnails from pdf files with python
One of my favorite thing about python is that repetitive tasks can be easily and reliably automated with simple syntax. A pdf to thumbnail converter will be presented below. Since pure line drawings from Creo can only be exported to pdf, it was necessary to find a solution that would easily create thumbnails from hundreds of pdfs to jpeg format. The data team can later use these for the website. The desired size was 300X300 pixel. To do it there is a good module what capable to read pdf file. It is fitz module. To be able to use fitz module it is a better way to install PyMuPDF. I used the image lib of the well-known PIL module to convert the scanned pdf files into images.
import fitz
from PIL import Image
import os
#declare the pdf folder
main_folder = r"C:/pdf_to_thumbnail"
#declare the full resolution jpeg folder
fullres_folder = r"C:/pdf_to_thumbnail/fulres_folder'
#declare the thumbnail jpeg folder
thumbnail_folder = r"C:/pdf_to_thumbnail/thumbnails'
#convert pdf-s to full resolution jpeg format with 'same file name + _fr + .jpeg'
def pdf_to_fullres(filename):
# filename = 'b00000078_for_thumbn.pdf'
#concatenate mainfolder path and filename
pdf_path = os.path.join(main_folder, filename)
#declare the new full res filename and path
fullres_filepath = os.path.join(fullres_folder, (filename[:-4] + "_fr" + ".jpeg"))
#open the pdf from path
doc = fitz.open(pdf_path)
#Select the first page of pdf
page = doc[0]
#Convert pdf to jpeg
fullres_pic = page.get_pixmap()
#Write the full res pics
fullres_pic.pil_save(fullres_filepath)
#Convert full resolution jpeg to thumbnail with 'same file name + _tn + .jpeg'
#Cut 4 pixel every side because Creo pdf export leave a thin paper contour boundary line
def jpeg_to_thumbnail(filename):
# filename = 'pic078_for_thumbn_fr.jpeg'
#concatenate fullres_folder path and filename
fulrespic_path = os.path.join(fullres_folder, filename)
#declare the new thumbnail filename and path
thumbnail_filepath = os.path.join(thumbnail_folder, (filename[:-8] + "_tn" + ".jpeg"))
#open full resolution jpeg
image = Image.open(fulrespic_path)
#get image size
w, h = image.size
#crop image
crop_const = 5
croped_im=image.crop((4, 4, w-crop_const, h-crop_const))
#set thumbnail size
size= 300,300
#Create thumnail piture
croped_im.thumbnail(size)
#Save the thumbnail picture
croped_im.save(thumbnail_filepath)
# Read every pdf files from main folder
main_folder_files= os.listdir(main_folder)
pdf_list = []
for file in main_folder_files:
if file.endswith('.pdf'):
pdf_list.append(file)
# convert pdf to full resolution picures
for pdf in pdf_list:
pdf_to_fullres(pdf)
# Read every fullres pics from fullres_folder
fullres_folder_files = os.listdir(fullres_folder)
jpg_fr_list = []
for file in fullres_folder_files:
if file.endswith('.jpeg'):
jpg_fr_list.append(file)
# convert full resolution picures to thumbnails
for file in jpg_fr_list:
jpeg_to_thumbnail(file)

