''' This Python code uses OpenCV to convert coloured images to Grayscale and Black -White format. It further removes black patches near the 4 boarders. User needs to specify threshold value of the pixel color to convert to black and white. This needs to be tried for each set of images as the quality of colour varies for each image. ''' from PIL import Image import cv2 import fitz import glob import sys, os import numpy as np pdfFile = "Merged-PDF.pdf" pdfx = "X1.pdf" bwFile = "BW.png" mergedPDF = fitz.open() #Higher the threshold, darker the dark colour, there is no optimum value. # thresh = 100 fn = lambda x : 255 if x > thresh else 0 # #Number of row of pixels at top to convert into white nT = 10 # #Number of row of pixels at bottom to convert into white nB = 20 # #Number of row of pixels on left to convert into white nL = 60 # #Number of row of pixels on right to convert into white nR = 60 for f in glob.glob("*.jpg"): #convert the image to grayscale format: both the approaches are identical imgGray = cv2.imread(f, 0) #Convert the image to black and white, thresholding turns the border of the #object in the image completely white, with all pixels having same intensity (thresh, bwImg) = cv2.threshold(imgGray, thresh, 255, cv2.THRESH_BINARY) #cv2.imwrite(bwFile, bwImg) h, w = bwImg.shape[:2] # image height and width for y in range(nT): for x in range(w): bwImg[y, x] = 255 for y in range(h-nB, h): for x in range(w): bwImg[y, x] = 255 for x in range(nL): for y in range(h): bwImg[y, x] = 255 for x in range(w-nR, w): for y in range(h): bwImg[y, x] = 255 cv2.imwrite(bwFile, bwImg) img2pdf = Image.open(bwFile).convert("RGB") img2pdf.save(pdfx, "PDF", resolution=100.0) p = fitz.open(pdfx) mergedPDF.insertPDF(p) p.close() mergedPDF.save(pdfFile) mergedPDF.close()