#detect checkbox/squaredef extract_quads(self,image_arr,name_arr): if len(image_arr) > 0: for index,original_image in enumerate(image_arr): #to store extracted images extracted_quad = [] image = original_image.copy() #grayscale only if its not alreadyif len(image.shape) > 2: gray = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY) else: gray = image.copy() #image preprocessing for quadrilaterals img_dilate = self.do_quad_imageprocessing(gray,self.blocksize,self.thresh_const,self.kernelsize) if len(img_dilate) > 0: try:#detect contours cnts = cv2.findContours(img_dilate.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) #loop through detected contoursfor c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, (self.epsilon)* peri, True) #bounding rec cordinates (x, y, w, h) = cv2.boundingRect(approx) #get the aspect ratio aspectratio = float(w/h)
area = cv2.contourArea(c) if area < self.rec_max_area and area > self.rec_min_area and (aspectratio >= self.aspect_ratio[0] and aspectratio <= self.aspect_ratio[1]): #check if there are 4 corners in the polygonif len(approx) == 4: cv2.drawContours(original_image,[c], 0, (0,255,0), 2) roi = original_image[y:y+h, x:x+w] extracted_quad.append(roi) except Exceptionas e:print('The following exception occured during quad shape detection: ',e) self.extracted_img_data.append([original_image,extracted_quad,name_arr[index]]) else: print('No image is found during the extraction process')
使用pdf2image将pdf转换为图像:
defImg2Pdf(dirname): images = []#get the pdf filefor x in os.listdir(dirname):if (dirname.split('.')[1]) == 'pdf': pdf_filename = x images_from_path = convert_from_path(os.path.join(dirname),dpi=300, poppler_path = r'C:\Program Files (x86)\poppler-0.68.0_x86\poppler-0.68.0\bin')for image in images_from_path: images.append(np.array(image))return images