thumbnail picture editing

PDF to thumbnail picture

A fast way to create thumbnails from pdf files with python

One of my favorite thing about python is that repetitive tasks can be easily and reliably automated with simple syntax. A pdf to thumbnail converter will be presented below. Since pure line drawings from Creo can only be exported to pdf, it was necessary to find a solution that would easily create thumbnails from hundreds of pdfs to jpeg format. The data team can later use these for the website. The desired size was 300X300 pixel. To do it there is a good module what capable to read pdf file. It is fitz module. To be able to use fitz module it is a better way to install PyMuPDF. I used the image lib of the well-known PIL module to convert the scanned pdf files into images.

import fitz
from PIL import Image
import os

#declare the pdf folder
main_folder = r"C:/pdf_to_thumbnail"

#declare the full resolution jpeg folder
fullres_folder = r"C:/pdf_to_thumbnail/fulres_folder'

#declare the thumbnail jpeg folder
thumbnail_folder = r"C:/pdf_to_thumbnail/thumbnails'

#convert pdf-s to full resolution jpeg format with 'same file name + _fr + .jpeg'
def pdf_to_fullres(filename):
    # filename = 'b00000078_for_thumbn.pdf'
    #concatenate mainfolder path and filename
    pdf_path = os.path.join(main_folder, filename)
    
    #declare the new full res filename and path
    fullres_filepath = os.path.join(fullres_folder, (filename[:-4] + "_fr" + ".jpeg"))
    
    #open the pdf from path
    doc = fitz.open(pdf_path)
    
    #Select the first page of pdf
    page = doc[0]
    
    #Convert pdf to jpeg
    fullres_pic = page.get_pixmap()
    
    #Write the full res pics
    fullres_pic.pil_save(fullres_filepath)


#Convert full resolution jpeg to thumbnail with 'same file name + _tn + .jpeg'
#Cut 4 pixel every side because Creo pdf export leave a thin paper contour boundary line
def jpeg_to_thumbnail(filename):
    # filename = 'pic078_for_thumbn_fr.jpeg'
    #concatenate fullres_folder path and filename
    fulrespic_path = os.path.join(fullres_folder, filename)
    
    #declare the new thumbnail filename and path
    thumbnail_filepath = os.path.join(thumbnail_folder, (filename[:-8] + "_tn" + ".jpeg"))
    
    #open full resolution jpeg
    image = Image.open(fulrespic_path)
    
    #get image size
    w, h = image.size
    
    #crop image
    crop_const = 5
    croped_im=image.crop((4, 4, w-crop_const, h-crop_const))
    
    #set thumbnail size
    size= 300,300
    
    #Create thumnail piture
    croped_im.thumbnail(size)
    
    #Save the thumbnail picture
    croped_im.save(thumbnail_filepath)

# Read every pdf files from main folder
main_folder_files= os.listdir(main_folder)
pdf_list = []
for file in main_folder_files:
    if file.endswith('.pdf'):
        pdf_list.append(file)

# convert pdf to full resolution picures		
for pdf in pdf_list:
    pdf_to_fullres(pdf)
	
# Read every fullres pics from fullres_folder 
fullres_folder_files = os.listdir(fullres_folder)
jpg_fr_list = []
for file in fullres_folder_files:
    if file.endswith('.jpeg'):
        jpg_fr_list.append(file)

# convert full resolution picures to thumbnails	
for file in jpg_fr_list:
    jpeg_to_thumbnail(file)