'''
This Python code uses OpenCV to convert coloured images to Grayscale and Black
-White format. It further removes black patches near the 4 boarders.

User needs to specify threshold value of the pixel color to convert to black 
and white. This needs to be tried for each set of images as the quality of 
colour varies for each image.
'''
from PIL import Image
import cv2
import fitz
import glob
import sys, os
import numpy as np

pdfFile = "Merged-PDF.pdf"
pdfx = "X1.pdf"
bwFile = "BW.png"

mergedPDF =  fitz.open()

#Higher the threshold, darker the dark colour, there is no optimum value. 
#
thresh = 100
fn = lambda x : 255 if x > thresh else 0
#
#Number of row of pixels at top to convert into white
nT = 10
#
#Number of row of pixels at bottom to convert into white
nB = 20
#
#Number of row of pixels on left to convert into white
nL = 60
#
#Number of row of pixels on right to convert into white
nR = 60

for f in glob.glob("*.jpg"):
	#convert the image to grayscale format: both the approaches are identical
	imgGray = cv2.imread(f, 0)

	#Convert the image to black and white, thresholding turns the border of the 
	#object in the image completely white, with all pixels having same intensity
	(thresh, bwImg) = cv2.threshold(imgGray, thresh, 255, cv2.THRESH_BINARY)

	#cv2.imwrite(bwFile, bwImg)

	h, w = bwImg.shape[:2]  # image height and width
	for y in range(nT):
		for x in range(w):
			bwImg[y, x] = 255
	for y in range(h-nB, h):
		for x in range(w):
			bwImg[y, x] = 255
	for x in range(nL):
		for y in range(h):
			bwImg[y, x] = 255
	for x in range(w-nR, w):
		for y in range(h):
			bwImg[y, x] = 255

	cv2.imwrite(bwFile, bwImg)
	img2pdf = Image.open(bwFile).convert("RGB")
	img2pdf.save(pdfx, "PDF", resolution=100.0)
	
	p = fitz.open(pdfx)
	mergedPDF.insertPDF(p)
	p.close()
	
mergedPDF.save(pdfFile)
mergedPDF.close()