import cv2import numpy as npimport matplotlib.pyplot as pltdocument_path = 'path/to/your/document.jpg'document_image = cv2.imread(document_path)gray_document = cv2.cvtColor(document_image, cv2.COLOR_BGR2GRAY)blurred_document = cv2.GaussianBlur(gray_document, (5, 5), 0)edges_document = cv2.Canny(blurred_document, 50, 150)contours, _ = cv2.findContours(edges_document, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)largest_contour = max(contours, key=cv2.contourArea)perimeter = cv2.arcLength(largest_contour, True)approx = cv2.approxPolyDP(largest_contour, 0.02 * perimeter, True)if len(approx) == 4: transformed_document = cv2.warpPerspective(document_image, cv2.getPerspectiveTransform(approx.reshape(4, 2), np.float32([[0, 0], [800, 0], [800, 1200], [0, 1200]])), (800, 1200)) plt.figure(figsize=(10, 5)) plt.subplot(1, 2, 1) plt.imshow(cv2.cvtColor(document_image, cv2.COLOR_BGR2RGB)) plt.title('Original Document') plt.axis('off')
plt.subplot(1, 2, 2) plt.imshow(cv2.cvtColor(transformed_document, cv2.COLOR_BGR2RGB)) plt.title('Transformed Document') plt.axis('off') plt.show()
import cv2import pytesseractfrom PIL import Imagepytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'text_image_path = 'path/to/your/text_image.jpg'text_image = cv2.imread(text_image_path)gray_text_image = cv2.cvtColor(text_image, cv2.COLOR_BGR2GRAY)_, thresholded_text = cv2.threshold(gray_text_image, 150, 255, cv2.THRESH_BINARY)text = pytesseract.image_to_string(Image.fromarray(thresholded_text))plt.figure(figsize=(8, 6))plt.imshow(cv2.cvtColor(text_image, cv2.COLOR_BGR2RGB))plt.title('Original Image')plt.axis('off')plt.show()print("Extracted Text:")print(text)