bikz05 · gogameboy1 · Jan 6, 2017 · Oct 22, 2017 · Oct 25, 2017
diff --git a/CNN_model.py b/CNN_model.py
@@ -0,0 +1,64 @@
+from __future__ import print_function
+import keras
+from keras.datasets import mnist
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Flatten
+from keras.layers import Conv2D, MaxPooling2D
+from keras import backend as K
+
+batch_size = 128
+num_classes = 10
+epochs = 10
+
+# input image dimensions
+img_rows, img_cols = 28, 28
+
+# the data, shuffled and split between train and test sets
+(x_train, y_train), (x_test, y_test) = mnist.load_data()
+
+if K.image_data_format() == 'channels_first':
+    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
+    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
+    input_shape = (1, img_rows, img_cols)
+else:
+    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
+    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
+    input_shape = (img_rows, img_cols, 1)
+
+x_train = x_train.astype('float32')
+x_test = x_test.astype('float32')
+x_train /= 255
+x_test /= 255
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_test.shape[0], 'test samples')
+
+# convert class vectors to binary class matrices
+y_train = keras.utils.to_categorical(y_train, num_classes)
+y_test = keras.utils.to_categorical(y_test, num_classes)
+
+model = Sequential()
+model.add(Conv2D(32, kernel_size=(3, 3),
+                 activation='relu',
+                 input_shape=input_shape))
+model.add(Conv2D(64, (3, 3), activation='relu'))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+model.add(Dropout(0.25))
+model.add(Flatten())
+model.add(Dense(128, activation='relu'))
+model.add(Dropout(0.5))
+model.add(Dense(num_classes, activation='softmax'))
+
+model.compile(loss=keras.losses.categorical_crossentropy,
+              optimizer=keras.optimizers.Adadelta(),
+              metrics=['accuracy'])
+
+model.fit(x_train, y_train,
+          batch_size=batch_size,
+          epochs=epochs,
+          verbose=1,
+          validation_data=(x_test, y_test))
+score = model.evaluate(x_test, y_test, verbose=0)
+model.save('digit_model.h5')
+print('Test loss:', score[0])
+print('Test accuracy:', score[1])
diff --git a/digit_model.h5 b/digit_model.h5
diff --git a/digits_cls.pkl b/digits_cls.pkl
diff --git a/generateClassifier.py b/generateClassifier.py
@@ -27,7 +27,7 @@
 pp = preprocessing.StandardScaler().fit(hog_features)
 hog_features = pp.transform(hog_features)
 
-print "Count of digits in dataset", Counter(labels)
+print("Count of digits in dataset", Counter(labels))
 
 # Create an linear SVM object
 clf = LinearSVC()

diff --git a/performRecognition.py b/performRecognition.py
@@ -2,55 +2,78 @@
 
 # Import the modules
 import cv2
-from sklearn.externals import joblib
-from skimage.feature import hog
+from keras.models import load_model
 import numpy as np
 import argparse as ap
 
+
+def show_image(im):
+    cv2.namedWindow("Resulting Image with Rectangular ROIs", cv2.WINDOW_NORMAL)
+    cv2.imshow("Resulting Image with Rectangular ROIs", im)
+    cv2.waitKey(10000)
+    cv2.destroyAllWindows()
+
+
 # Get the path of the training set
 parser = ap.ArgumentParser()
-parser.add_argument("-c", "--classiferPath", help="Path to Classifier File", required="True")
+# parser.add_argument("-c", "--classiferPath", help="Path to Classifier File", required="True")
 parser.add_argument("-i", "--image", help="Path to Image", required="True")
 args = vars(parser.parse_args())
 
 # Load the classifier
-clf, pp = joblib.load(args["classiferPath"])
+# clf, pp = joblib.load(args["classiferPath"])
+model = load_model('digit_model.h5')
 
-# Read the input image 
+# Read the input image
 im = cv2.imread(args["image"])
-
-# Convert to grayscale and apply Gaussian filtering
 im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
-im_gray = cv2.GaussianBlur(im_gray, (5, 5), 0)
 
-# Threshold the image
-ret, im_th = cv2.threshold(im_gray, 90, 255, cv2.THRESH_BINARY_INV)
+im_for_ctr = cv2.GaussianBlur(im_gray, (5, 5), 0)
+im_th = cv2.Canny(im_for_ctr, 100, 200)
+# show_image(im_th)
 
 # Find contours in the image
-ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ret, binary = cv2.threshold(im_gray, 210, 255, cv2.THRESH_BINARY)
+
+_, ctrs, _ = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
 # Get rectangles contains each contour
 rects = [cv2.boundingRect(ctr) for ctr in ctrs]
 
-# For each rectangular region, calculate HOG features and predict
-# the digit using Linear SVM.
+
 for rect in rects:
+    if 15 < rect[2] < 50 and 25 < rect[3] < 50:
+        pass
+    else:
+        continue
+
+    if 440 < rect[0] < 620 and 125 < rect[1] < 165:
+        pass
+    else:
+        continue
+
     # Draw the rectangles
-    cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3) 
+    # rect[0], rect[1] : top left coor , rect[2], rect[3] : width and height
+    cv2.rectangle(im, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0), 3)
     # Make the rectangular region around the digit
-    leng = int(rect[3] * 1.6)
+    leng = int(rect[3] * 1.2)
     pt1 = int(rect[1] + rect[3] // 2 - leng // 2)
     pt2 = int(rect[0] + rect[2] // 2 - leng // 2)
-    roi = im_th[pt1:pt1+leng, pt2:pt2+leng]
+    roi = binary[pt1:pt1+leng, pt2:pt2+leng]
     # Resize the image
     roi = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
     roi = cv2.dilate(roi, (3, 3))
-    # Calculate the HOG features
-    roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(14, 14), cells_per_block=(1, 1), visualise=False)
-    roi_hog_fd = pp.transform(np.array([roi_hog_fd], 'float64'))
-    nbr = clf.predict(roi_hog_fd)
-    cv2.putText(im, str(int(nbr[0])), (rect[0], rect[1]),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
+    # show_image(roi)
+
+    roi = np.array(roi, dtype='float32')
+    roi = roi.reshape((1, 28, 28, 1))
+
+    # predict
+    nbr = model.predict(roi)
+    result = np.argmax(nbr)
+
+    cv2.putText(im, str(result), (rect[0], rect[1]), cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
 
 cv2.namedWindow("Resulting Image with Rectangular ROIs", cv2.WINDOW_NORMAL)
 cv2.imshow("Resulting Image with Rectangular ROIs", im)
-cv2.waitKey()
+cv2.waitKey(10000)
diff --git a/test.png b/test.png