diff --git a/CNN_model.py b/CNN_model.py new file mode 100644 index 0000000..092c2cc --- /dev/null +++ b/CNN_model.py @@ -0,0 +1,64 @@ +from __future__ import print_function +import keras +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras import backend as K + +batch_size = 128 +num_classes = 10 +epochs = 10 + +# input image dimensions +img_rows, img_cols = 28, 28 + +# the data, shuffled and split between train and test sets +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +if K.image_data_format() == 'channels_first': + x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) + x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) +else: + x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) + x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + +x_train = x_train.astype('float32') +x_test = x_test.astype('float32') +x_train /= 255 +x_test /= 255 +print('x_train shape:', x_train.shape) +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') + +# convert class vectors to binary class matrices +y_train = keras.utils.to_categorical(y_train, num_classes) +y_test = keras.utils.to_categorical(y_test, num_classes) + +model = Sequential() +model.add(Conv2D(32, kernel_size=(3, 3), + activation='relu', + input_shape=input_shape)) +model.add(Conv2D(64, (3, 3), activation='relu')) +model.add(MaxPooling2D(pool_size=(2, 2))) +model.add(Dropout(0.25)) +model.add(Flatten()) +model.add(Dense(128, activation='relu')) +model.add(Dropout(0.5)) +model.add(Dense(num_classes, activation='softmax')) + +model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adadelta(), + metrics=['accuracy']) + +model.fit(x_train, y_train, + batch_size=batch_size, + epochs=epochs, + verbose=1, + validation_data=(x_test, y_test)) +score = model.evaluate(x_test, y_test, verbose=0) +model.save('digit_model.h5') +print('Test loss:', score[0]) +print('Test accuracy:', score[1]) diff --git a/digit_model.h5 b/digit_model.h5 new file mode 100644 index 0000000..21ee771 Binary files /dev/null and b/digit_model.h5 differ diff --git a/digits_cls.pkl b/digits_cls.pkl index fc3db2a..aa692d8 100644 Binary files a/digits_cls.pkl and b/digits_cls.pkl differ diff --git a/generateClassifier.py b/generateClassifier.py index 2ae0a95..4efd3a2 100755 --- a/generateClassifier.py +++ b/generateClassifier.py @@ -27,7 +27,7 @@ pp = preprocessing.StandardScaler().fit(hog_features) hog_features = pp.transform(hog_features) -print "Count of digits in dataset", Counter(labels) +print("Count of digits in dataset", Counter(labels)) # Create an linear SVM object clf = LinearSVC() diff --git a/performRecognition.py b/performRecognition.py index c7d02c2..5254737 100755 --- a/performRecognition.py +++ b/performRecognition.py @@ -2,55 +2,78 @@ # Import the modules import cv2 -from sklearn.externals import joblib -from skimage.feature import hog +from keras.models import load_model import numpy as np import argparse as ap + +def show_image(im): + cv2.namedWindow("Resulting Image with Rectangular ROIs", cv2.WINDOW_NORMAL) + cv2.imshow("Resulting Image with Rectangular ROIs", im) + cv2.waitKey(10000) + cv2.destroyAllWindows() + + # Get the path of the training set parser = ap.ArgumentParser() -parser.add_argument("-c", "--classiferPath", help="Path to Classifier File", required="True") +# parser.add_argument("-c", "--classiferPath", help="Path to Classifier File", required="True") parser.add_argument("-i", "--image", help="Path to Image", required="True") args = vars(parser.parse_args()) # Load the classifier -clf, pp = joblib.load(args["classiferPath"]) +# clf, pp = joblib.load(args["classiferPath"]) +model = load_model('digit_model.h5') -# Read the input image +# Read the input image im = cv2.imread(args["image"]) - -# Convert to grayscale and apply Gaussian filtering im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) -im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0) -# Threshold the image -ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV) +im_for_ctr = cv2.GaussianBlur(im_gray, (5, 5), 0) +im_th = cv2.Canny(im_for_ctr, 100, 200) +# show_image(im_th) # Find contours in the image -ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) +ret, binary = cv2.threshold(im_gray, 210, 255, cv2.THRESH_BINARY) + +_, ctrs, _ = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Get rectangles contains each contour rects = [cv2.boundingRect(ctr) for ctr in ctrs] -# For each rectangular region, calculate HOG features and predict -# the digit using Linear SVM. + for rect in rects: + if 15 < rect[2] < 50 and 25 < rect[3] < 50: + pass + else: + continue + + if 440 < rect[0] < 620 and 125 < rect[1] < 165: + pass + else: + continue + # Draw the rectangles - cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) + # rect[0], rect[1] : top left coor , rect[2], rect[3] : width and height + cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) # Make the rectangular region around the digit - leng = int(rect[3] * 1.6) + leng = int(rect[3] * 1.2) pt1 = int(rect[1] + rect[3] // 2 - leng // 2) pt2 = int(rect[0] + rect[2] // 2 - leng // 2) - roi = im_th[pt1:pt1+leng, pt2:pt2+leng] + roi = binary[pt1:pt1+leng, pt2:pt2+leng] # Resize the image roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA) roi = cv2.dilate(roi, (3, 3)) - # Calculate the HOG features - roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False) - roi_hog_fd = pp.transform(np.array([roi_hog_fd], 'float64')) - nbr = clf.predict(roi_hog_fd) - cv2.putText(im, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3) + # show_image(roi) + + roi = np.array(roi, dtype='float32') + roi = roi.reshape((1, 28, 28, 1)) + + # predict + nbr = model.predict(roi) + result = np.argmax(nbr) + + cv2.putText(im, str(result), (rect[0], rect[1]), cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3) cv2.namedWindow("Resulting Image with Rectangular ROIs", cv2.WINDOW_NORMAL) cv2.imshow("Resulting Image with Rectangular ROIs", im) -cv2.waitKey() +cv2.waitKey(10000) diff --git a/test.png b/test.png new file mode 100644 index 0000000..128bd08 Binary files /dev/null and b/test.png differ