Using CNNs to Classify Shapes¶
Load the required libraries¶
In [2]:
import numpy as np
from matplotlib import pyplot as plt
import glob
from PIL import Image
import cv2
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from keras.models import Model
from keras.utils import to_categorical, set_random_seed
from keras.models import Sequential
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
Load datset¶
In [3]:
path_to_root_dataset = '/run/media/sparsharay/groot/Codes/Workspace/Courses/IDC409_IDS/dataset'
shape_of_image = (126, 126) # resize all images to this shape
In [4]:
datasets = {
'train' : {
'images' : [],
'labels' : [],
},
'validation' : {
'images' : [],
'labels' : [],
},
'test' : {
'images' : [],
'labels' : [],
},
}
shapes = ['semicircle', 'triangle', 'circle', 'square', 'rhombus', 'rectangle', 'parallelogram', 'oval', 'trapezoid', 'pentagon']
for dataset in datasets :
for shape in shapes :
image_paths = glob.glob(f'{path_to_root_dataset}/{dataset}/{shape}/*.png')
for image_path in image_paths :
image = Image.open(image_path).convert('L')
image = image.resize(shape_of_image)
image = (np.array(image)/255).astype('float16')
datasets[dataset]['images'].append(image)
datasets[dataset]['labels'].append(shapes.index(shape))
if len(datasets[dataset]['labels'])%5000 == 0 : break # limit to 5000 images per shape in training set to not run out of memory
datasets[dataset]['images'] = np.array(datasets[dataset]['images']).reshape(-1, shape_of_image[0], shape_of_image[1], 1)
datasets[dataset]['labels'] = to_categorical(np.array(datasets[dataset]['labels']), num_classes=len(shapes))
In [5]:
print(f'Training dataset -> \t Images: {datasets['train']['images'].shape}, Labels: {datasets['train']['labels'].shape}')
print(f'Validation dataset -> \t Images: {datasets['validation']['images'].shape}, Labels: {datasets['validation']['labels'].shape}')
print(f'Test dataset -> \t Images: {datasets['test']['images'].shape}, Labels: {datasets['test']['labels'].shape}')
Training dataset -> Images: (50000, 126, 126, 1), Labels: (50000, 10) Validation dataset -> Images: (10000, 126, 126, 1), Labels: (10000, 10) Test dataset -> Images: (10000, 126, 126, 1), Labels: (10000, 10)
Define the CNN model¶
In [6]:
set_random_seed(1)
model = Sequential()
model.add(Input(shape=(shape_of_image[0], shape_of_image[1], 1)))
model.add(Conv2D(8, (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='elu'))
model.add(Dense(64, activation='elu'))
model.add(Dense(len(shapes), activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ conv2d (Conv2D) │ (None, 124, 124, 8) │ 80 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 62, 62, 8) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 60, 60, 16) │ 1,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 30, 30, 16) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 28, 28, 32) │ 4,640 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_2 (MaxPooling2D) │ (None, 14, 14, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_3 (Conv2D) │ (None, 12, 12, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_3 (MaxPooling2D) │ (None, 6, 6, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_4 (Conv2D) │ (None, 4, 4, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_4 (MaxPooling2D) │ (None, 2, 2, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten (Flatten) │ (None, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 128) │ 65,664 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 64) │ 8,256 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 10) │ 650 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 172,810 (675.04 KB)
Trainable params: 172,810 (675.04 KB)
Non-trainable params: 0 (0.00 B)
Train the model¶
In [7]:
history = model.fit(datasets['train']['images'],
datasets['train']['labels'],
epochs=9, batch_size=25,
validation_data=(datasets['validation']['images'],
datasets['validation']['labels']),
verbose='auto',
shuffle=True)
Epoch 1/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 25s 11ms/step - accuracy: 0.6013 - loss: 2.3634 - val_accuracy: 0.9460 - val_loss: 0.6240 Epoch 2/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9407 - loss: 0.5941 - val_accuracy: 0.9695 - val_loss: 0.4292 Epoch 3/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9644 - loss: 0.4300 - val_accuracy: 0.9784 - val_loss: 0.3355 Epoch 4/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 21s 11ms/step - accuracy: 0.9713 - loss: 0.3478 - val_accuracy: 0.9909 - val_loss: 0.2546 Epoch 5/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9790 - loss: 0.2800 - val_accuracy: 0.9744 - val_loss: 0.2713 Epoch 6/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 21s 11ms/step - accuracy: 0.9786 - loss: 0.2505 - val_accuracy: 0.9885 - val_loss: 0.1984 Epoch 7/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9822 - loss: 0.2080 - val_accuracy: 0.9834 - val_loss: 0.1935 Epoch 8/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 21s 11ms/step - accuracy: 0.9846 - loss: 0.1893 - val_accuracy: 0.9917 - val_loss: 0.1579 Epoch 9/9 2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9847 - loss: 0.1798 - val_accuracy: 0.9931 - val_loss: 0.1539
In [8]:
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid()
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.ylim(0.75, 1.0)
plt.title('Accuracy vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()
plt.suptitle('Training and Validation Metrics', fontsize=15)
plt.subplots_adjust(left=0.1, right=0.9, top=0.8, bottom=0.1, wspace=0.25)
plt.show()
Evaluate the model¶
In [9]:
result = model.predict(datasets['test']['images'], verbose='auto')
predicted_classes = np.argmax(result, axis=1)
true_classes = np.argmax(datasets['test']['labels'], axis=1)
print(classification_report(true_classes, predicted_classes, target_names=shapes))
cm = confusion_matrix(true_classes, predicted_classes)
cm_plot = ConfusionMatrixDisplay(cm, display_labels=shapes)
cm_plot.plot(cmap=plt.cm.Blues, xticks_rotation='vertical')
cm_plot.ax_.set_title('Confusion Matrix')
cm_plot.figure_.set_size_inches(8, 6)
plt.show()
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step precision recall f1-score support semicircle 0.99 0.99 0.99 1000 triangle 0.98 1.00 0.99 1000 circle 1.00 0.99 0.99 1000 square 0.99 1.00 0.99 1000 rhombus 0.99 1.00 1.00 1000 rectangle 1.00 0.99 0.99 1000 parallelogram 1.00 1.00 1.00 1000 oval 1.00 0.99 1.00 1000 trapezoid 1.00 0.99 0.99 1000 pentagon 0.99 0.99 0.99 1000 accuracy 0.99 10000 macro avg 0.99 0.99 0.99 10000 weighted avg 0.99 0.99 0.99 10000
Visualize the filters¶
In [10]:
for layer in model.layers:
if 'conv' in layer.name:
filter = layer.get_weights()[0]
print(layer.name, model.layers.index(layer), filter.shape)
conv2d 0 (3, 3, 1, 8) conv2d_1 2 (3, 3, 8, 16) conv2d_2 4 (3, 3, 16, 32) conv2d_3 6 (3, 3, 32, 64) conv2d_4 8 (3, 3, 64, 128)
In [11]:
filters, biases = model.layers[0].get_weights()
print(filters.shape)
fig, axs = plt.subplots(1, filters.shape[3])
fig.set_size_inches(8, 4)
for i, ax in enumerate(axs):
ax.axis('off')
try : ax.imshow(filters[:, :, 0, i], cmap='gray', interpolation='none')
except IndexError: pass
ax.set_title(f'F{i}')
plt.tight_layout()
plt.suptitle('Some of the First Layer Filters', fontsize=15)
plt.show()
(3, 3, 1, 8)
In [12]:
filters, biases = model.layers[2].get_weights()
print(filters.shape)
fig, axs = plt.subplots(8, 8)
fig.set_size_inches(8, 10)
for i, ax in enumerate(axs):
for j, sub_ax in enumerate(ax):
sub_ax.axis('off')
sub_ax.imshow(filters[:, :, j, i], cmap='gray', interpolation='none')
sub_ax.set_title(f'F{i}C{j}')
plt.suptitle('Some of the Second Layer Filters', fontsize=15)
plt.show()
(3, 3, 8, 16)
In [13]:
filters, biases = model.layers[8].get_weights()
print(filters.shape)
fig, axs = plt.subplots(8, 8)
fig.set_size_inches(8, 10)
for i, ax in enumerate(axs):
for j, sub_ax in enumerate(ax):
sub_ax.axis('off')
sub_ax.imshow(filters[:, :, j, i], cmap='gray', interpolation='none')
sub_ax.set_title(f'F{i}C{j}')
plt.suptitle('Some of the Last Layer Filters', fontsize=15)
plt.show()
(3, 3, 64, 128)
Visualize the feature maps¶
In [26]:
test_img = datasets['test']['images'][np.random.randint(0, datasets['test']['images'].shape[0])]
test_img = test_img.reshape(1, shape_of_image[0], shape_of_image[1], 1)
print(test_img.shape)
plt.figure(figsize=(3, 3))
plt.imshow(test_img[0], cmap='gray', interpolation='none')
plt.axis('off')
plt.title('Random Test Image')
plt.show()
print("Predicted Class:", shapes[np.argmax(model.predict(test_img))])
(1, 126, 126, 1)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step Predicted Class: pentagon
In [27]:
view_layers = [2, 4, 6, 8, 9]
fig, all_axs = plt.subplots(len(view_layers), 16)
fig.set_size_inches(16, 8)
for plt_idx, layer_idx in enumerate(view_layers):
print(layer_idx)
temp_model = Model(inputs=model.inputs, outputs=model.layers[layer_idx].output)
feature_maps = temp_model.predict(test_img, verbose='auto')
print(feature_maps.shape)
axs = all_axs[plt_idx]
for i, ax in enumerate(axs):
ax.axis('off')
try : ax.imshow(feature_maps[0, :, :, i], cmap='gray', interpolation='none')
except IndexError: pass
ax.set_title(f'L{layer_idx}C{i}')
plt.suptitle('Feature Maps of Different Layers for the Test Image', fontsize=15)
plt.show()
2 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step (1, 60, 60, 16) 4 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step (1, 28, 28, 32) 6 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step (1, 12, 12, 64) 8 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 52ms/step (1, 4, 4, 128) 9 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 51ms/step (1, 2, 2, 128)
Segment and classify shapes from a image containing multiple shapes¶
In [ ]:
import cv2
image = cv2.imread("shapes_used.jpeg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
plt.imshow(gray, cmap='gray')
plt.title('Image containing multiple shapes')
plt.show()
_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
target_size = 256
padding = 10
for i, contour in enumerate(contours) :
if cv2.contourArea(contour) < 100 : continue
x, y, w, h = cv2.boundingRect(contour)
x_start = max(0, x - padding)
y_start = max(0, y - padding)
x_end = min(image.shape[1], x + w + padding)
y_end = min(image.shape[0], y + h + padding)
segment = image[y_start:y_end, x_start:x_end]
isolated_shape_image = np.ones((segment.shape[0], segment.shape[1], 3), dtype=np.uint8) * 255
mask = np.zeros_like(gray[y_start:y_end, x_start:x_end])
contour_offset = contour - np.array([x_start, y_start])
cv2.drawContours(mask, [contour_offset], -1, 255, thickness=cv2.FILLED)
if segment.shape[0] == mask.shape[0] and segment.shape[1] == mask.shape[1] :
isolated_shape_image[mask == 255] = segment[mask == 255]
else : continue
isolated_shape_image = cv2.cvtColor(isolated_shape_image, cv2.COLOR_BGR2GRAY)
h, w = isolated_shape_image.shape[:2]
new_image = np.full((target_size, target_size), 255, dtype=np.uint8)
scale = float(target_size) / max(h, w)
new_w = round(w * scale)
new_h = round(h * scale)
if scale < 1 : interpolation = cv2.INTER_AREA
else : interpolation = cv2.INTER_CUBIC
resized_image = cv2.resize(isolated_shape_image, (new_w, new_h), interpolation=interpolation)
x_offset = (target_size - new_w) // 2
y_offset = (target_size - new_h) // 2
new_image[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized_image
_, new_image = cv2.threshold(new_image, 200, 255, cv2.THRESH_BINARY_INV)
new_image = Image.fromarray(new_image).resize(shape_of_image)
print(new_image.size)
new_image = ((255 - np.array(new_image))/255.0).astype('float16')
plt.title(f"Segmented Image, predicted to be: {shapes[np.argmax(model.predict(new_image.reshape(1, shape_of_image[0], shape_of_image[1], 1)))]}", fontsize=10)
plt.imshow(new_image, cmap='gray')
plt.show()