#------------------------------------------------------------------------------ #Converts the number of connected pixels above a threshold size to white #It uses CCL: Connected Component Labeling method to find noisy pixels #------------------------------------------------------------------------------ import sys, os import numpy as np import cv2 #------------------------------------------------------------------------------ if (len(sys.argv) < 3): print("\nUsage: python3 {} image_file threshold \n".format(sys.argv[0])) sys.exit(1) #------------------------------------------------------------------------------ #Get input file inFile = str(sys.argv[1]) threshold = int(sys.argv[2]) #Define names of the output files file_path = os.getcwd() + "/" + inFile extn = os.path.splitext(file_path)[1] outFile0 = file_path.strip(extn) + "-GR.png" outFile1 = file_path.strip(extn) + "-BW.png" outFile2 = file_path.strip(extn) + "-WB.png" outFile3 = file_path.strip(extn) + "-CL.png" black = 0 white = 255 #Define size (number of pixels) a component should consist of. Note that text #may be connected and a smaller number will wipe out texts from the image. This #values needs to be worked by trial-and-error for each type of image. arsz = 2000 #Open input image in grayscale mode and get its pixels imgGray = cv2.imread(inFile, 0) #imgGray = cv2.cvtColor(f, cv2.COLOR_BGR2GRAY) cv2.imwrite(outFile0, imgGray) pixels = np.array(imgGray)[:,:] #Change pixels above threshold to white (255) pixels[pixels > threshold] = white pixels[pixels < threshold] = black imgBW = pixels cv2.imwrite(outFile1, imgBW) imgWB = cv2.bitwise_not(imgBW) cv2.imwrite(outFile2, imgWB) # Apply the Component analysis function, CV_32S is output image label type analysis = cv2.connectedComponentsWithStats(imgWB, 8, cv2.CV_32S) #nLabels is the total number of labels where 0 represents the background label. #A label is assigned to each pixel based on its location and neighbours. If a #pixel is black (value = 0), it is skipped as default label is '0'. Thus, each #connected region shall be labeled 1, 2, 3... nLabels-1. (nLabels, labels, values, centroid) = analysis #nLabels = analysis[0], labels = analysis[1], values = analsysi[2]... #labels is matrix of the size of input image, each element has value = its label #Create mask of same dimensions as image, pixel valued 0=black,255=white imgBlack = np.zeros(imgBW.shape, dtype="uint8") #Demo only - no used later imgWhite = np.ones(imgBW.shape, dtype="uint8")*255 # Loop through each component for i in range(1, nLabels): #Find area of the connected components area = values[i, cv2.CC_STAT_AREA] if (area > arsz): #Create a mask for each label value. 'labels' is a matrix of same size #as input image. For each elements of matrix 'labels' equals component #id 'i', create a copy of that matrix of type unsigned-integer-8-bit. #Construct a mask for the current connected component by finding a #pixels in the labels array that have current connected component ID cMask = (labels == i).astype("uint8") * 255 #x = np.array([1.2, 2.3, 5.6]), x.astype(int) = array([1, 2, 6]) # imgBW = cv2.bitwise_or(imgBW, cMask) cv2.imwrite(outFile3, imgBW)