##################### Making Essential Imports ############################ import sklearn import os import sys import matplotlib.pyplot as plt import cv2 import pytesseract import numpy as np import pandas as pd import tensorflow as tf conf = r'-- oem 2' ##################################### # Defining a skeleton for our # # DataFrame # ##################################### DataFrame = { 'photo_name' : [], 'flattenPhoto' : [], 'text' : [], } ####################################################################################### # The Approach is to apply transfer learning hence using Resnet50 as my # # pretrained model # ####################################################################################### MyModel = tf.keras.models.Sequential() MyModel.add(tf.keras.applications.ResNet50( include_top = False, weights='imagenet', pooling='avg', )) # freezing weights for 1st layer MyModel.layers[0].trainable = False ### Now defining dataloading Function defLoadDataAndDoEssentials(path, h, w): img = cv2.imread(path) DataFrame['text'].append(pytesseract.image_to_string(img, config = conf)) img = cv2.resize(img, (h, w)) ## Expanding image dims so this represents 1 sample img = img = np.expand_dims(img, 0) img = tf.keras.applications.resnet50.preprocess_input(img) extractedFeatures = MyModel.predict(img) extractedFeatures = np.array(extractedFeatures) DataFrame['flattenPhoto'].append(extractedFeatures.flatten())
### with this all done lets write the iterrrative loop defReadAndStoreMyImages(path): list_ = os.listdir(path) for mem in list_: DataFrame['photo_name'].append(mem) imagePath = path + '/' + mem LoadDataAndDoEssentials(imagePath, 224, 224)
### lets give the address of our Parent directory and start path = 'enter your data's path here' ReadAndStoreMyImages(path) ###################################################### # lets now do clustering # ###################################################### Training_Feature_vector = np.array(DataFrame['flattenPhoto'], dtype = 'float64') from sklearn.cluster import AgglomerativeClustering kmeans = AgglomerativeClustering(n_clusters = 2) kmeans.fit(Training_Feature_vector) A little explanation for the above code: