''' This Python code uses PyMyPDF to convert covers pages of PDF files in a folder. Both this code and the folder containing PDF files should be in same directory. There is an option to create HTML tags to add the PNG files into a web page. It is stored in a text file named imageHTML.txt. The ALT tag for images are added based on theme of the folder name with a suffix added for image number. #--Syntax: py pdf-png-CoverPage.py folder_name ''' import fitz import glob, sys, os, subprocess from PIL import Image #------------------------------------------------------------------------------ if (len(sys.argv) < 2): print("\n Error! Usage: py {} folder") print("\n Please specify the folder name") sys.exit(1) #------------------------------------------------------------------------------ root = str(sys.argv[1]) topic = root.split('/')[0][3:] #Remove first 3 characters from the folder name # Make directory named by variable output_dir output_dir = "PDF2PNG" #------------------------------------------------------------------------------ #Check whether folder exists and clear contents if it does if os.path.exists(output_dir): files = glob.glob(output_dir + "/" + "*.png") for f in files: os.remove(f) else: os.makedirs(output_dir, exist_ok=True) i = 0 for path, subdirs, files in os.walk(root): for name in files: i = i + 1 j = 0 w = 600 #Desired width of all the images out_f = "imageHTML.txt" f = open(out_f, "w") for path, subdirs, files in os.walk(root): for name in sorted(files): j = j + 1 s = os.path.join(path, name) ext = os.path.splitext(s)[-1].lower() f1 = os.path.join(os.getcwd(),name) f2 = name.strip(".pdf") + ".png" with open(s, 'rb') as pfile: if ext == ".pdf": doc = fitz.open(pfile) page = doc.load_page(0) pg = page.get_pixmap() png_name = output_dir + "/" + f2 pg.save(png_name) doc.close print("File "+'{0:03}'.format(j)+ " of " + str(i) + " processed!") j = 0 for path, subdirs, files in os.walk(output_dir): for name in sorted(files): s = os.path.join(path, name) img = Image.open(s) wc = (w/float(img.size[0])) h = int((float(img.size[1])*float(wc))) #Caution about following statement. Image after resampling #should be stored in same or another variable before saving img = img.resize((w, h), Image.Resampling.LANCZOS) img.save(s) img.close() j = j + 1 sen="


" f.write(sen+'\n') print("Image "+'{0:03}'.format(j)+ " of " + str(i) + " processed!") f.close