import numpy as np
from matplotlib import pyplot as plt

import glob

from PIL import Image
import cv2

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report

from keras.models import Model
from keras.utils import to_categorical, set_random_seed
from keras.models import Sequential
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

path_to_root_dataset = '/run/media/sparsharay/groot/Codes/Workspace/Courses/IDC409_IDS/dataset'
shape_of_image = (126, 126) # resize all images to this shape

datasets = {
    'train'      : {
        'images' : [],
        'labels' : [],
    },

    'validation' : {
        'images' : [],
        'labels' : [],
    },
    
    'test'       : {
        'images' : [],
        'labels' : [],
    },
}

shapes = ['semicircle', 'triangle', 'circle', 'square', 'rhombus', 'rectangle', 'parallelogram', 'oval', 'trapezoid', 'pentagon']

for dataset in datasets :
    for shape in shapes :
        image_paths = glob.glob(f'{path_to_root_dataset}/{dataset}/{shape}/*.png')

        for image_path in image_paths :
            image = Image.open(image_path).convert('L')
            image = image.resize(shape_of_image)
            image = (np.array(image)/255).astype('float16')
            datasets[dataset]['images'].append(image)
            datasets[dataset]['labels'].append(shapes.index(shape))
            if len(datasets[dataset]['labels'])%5000 == 0 : break # limit to 5000 images per shape in training set to not run out of memory

    datasets[dataset]['images'] = np.array(datasets[dataset]['images']).reshape(-1, shape_of_image[0], shape_of_image[1], 1)
    datasets[dataset]['labels'] = to_categorical(np.array(datasets[dataset]['labels']), num_classes=len(shapes))

print(f'Training dataset -> \t Images: {datasets['train']['images'].shape}, Labels: {datasets['train']['labels'].shape}')
print(f'Validation dataset -> \t Images: {datasets['validation']['images'].shape}, Labels: {datasets['validation']['labels'].shape}')
print(f'Test dataset -> \t Images: {datasets['test']['images'].shape}, Labels: {datasets['test']['labels'].shape}')

Training dataset -> 	 Images: (50000, 126, 126, 1), Labels: (50000, 10)
Validation dataset -> 	 Images: (10000, 126, 126, 1), Labels: (10000, 10)
Test dataset -> 	 Images: (10000, 126, 126, 1), Labels: (10000, 10)

set_random_seed(1)

model = Sequential()
model.add(Input(shape=(shape_of_image[0], shape_of_image[1], 1)))

model.add(Conv2D(8,   (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(16,  (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(32,  (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64,  (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', kernel_regularizer='l2'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='elu'))
model.add(Dense(64,  activation='elu'))
model.add(Dense(len(shapes), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                 │ (None, 124, 124, 8)    │            80 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 62, 62, 8)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 60, 60, 16)     │         1,168 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 30, 30, 16)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 28, 28, 32)     │         4,640 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_2 (MaxPooling2D)  │ (None, 14, 14, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 12, 12, 64)     │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_3 (MaxPooling2D)  │ (None, 6, 6, 64)       │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_4 (Conv2D)               │ (None, 4, 4, 128)      │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_4 (MaxPooling2D)  │ (None, 2, 2, 128)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten (Flatten)               │ (None, 512)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 128)            │        65,664 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 64)             │         8,256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 10)             │           650 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 172,810 (675.04 KB)

 Trainable params: 172,810 (675.04 KB)

 Non-trainable params: 0 (0.00 B)

history = model.fit(datasets['train']['images'], 
                    datasets['train']['labels'], 
                    epochs=9, batch_size=25, 
                    validation_data=(datasets['validation']['images'], 
                                     datasets['validation']['labels']), 
                    verbose='auto',
                    shuffle=True)

Epoch 1/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 25s 11ms/step - accuracy: 0.6013 - loss: 2.3634 - val_accuracy: 0.9460 - val_loss: 0.6240
Epoch 2/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9407 - loss: 0.5941 - val_accuracy: 0.9695 - val_loss: 0.4292
Epoch 3/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9644 - loss: 0.4300 - val_accuracy: 0.9784 - val_loss: 0.3355
Epoch 4/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 21s 11ms/step - accuracy: 0.9713 - loss: 0.3478 - val_accuracy: 0.9909 - val_loss: 0.2546
Epoch 5/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9790 - loss: 0.2800 - val_accuracy: 0.9744 - val_loss: 0.2713
Epoch 6/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 21s 11ms/step - accuracy: 0.9786 - loss: 0.2505 - val_accuracy: 0.9885 - val_loss: 0.1984
Epoch 7/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9822 - loss: 0.2080 - val_accuracy: 0.9834 - val_loss: 0.1935
Epoch 8/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 21s 11ms/step - accuracy: 0.9846 - loss: 0.1893 - val_accuracy: 0.9917 - val_loss: 0.1579
Epoch 9/9
2000/2000 ━━━━━━━━━━━━━━━━━━━━ 22s 11ms/step - accuracy: 0.9847 - loss: 0.1798 - val_accuracy: 0.9931 - val_loss: 0.1539

plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.ylim(0.75, 1.0)
plt.title('Accuracy vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()

plt.suptitle('Training and Validation Metrics', fontsize=15)
plt.subplots_adjust(left=0.1, right=0.9, top=0.8, bottom=0.1, wspace=0.25)
plt.show()

result = model.predict(datasets['test']['images'], verbose='auto')
predicted_classes = np.argmax(result, axis=1)
true_classes = np.argmax(datasets['test']['labels'], axis=1)

print(classification_report(true_classes, predicted_classes, target_names=shapes))

cm = confusion_matrix(true_classes, predicted_classes)
cm_plot = ConfusionMatrixDisplay(cm, display_labels=shapes)
cm_plot.plot(cmap=plt.cm.Blues, xticks_rotation='vertical')
cm_plot.ax_.set_title('Confusion Matrix')
cm_plot.figure_.set_size_inches(8, 6)
plt.show()

313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step
               precision    recall  f1-score   support

   semicircle       0.99      0.99      0.99      1000
     triangle       0.98      1.00      0.99      1000
       circle       1.00      0.99      0.99      1000
       square       0.99      1.00      0.99      1000
      rhombus       0.99      1.00      1.00      1000
    rectangle       1.00      0.99      0.99      1000
parallelogram       1.00      1.00      1.00      1000
         oval       1.00      0.99      1.00      1000
    trapezoid       1.00      0.99      0.99      1000
     pentagon       0.99      0.99      0.99      1000

     accuracy                           0.99     10000
    macro avg       0.99      0.99      0.99     10000
 weighted avg       0.99      0.99      0.99     10000

for layer in model.layers:
    if 'conv' in layer.name:
        filter = layer.get_weights()[0]
        print(layer.name, model.layers.index(layer), filter.shape)

conv2d 0 (3, 3, 1, 8)
conv2d_1 2 (3, 3, 8, 16)
conv2d_2 4 (3, 3, 16, 32)
conv2d_3 6 (3, 3, 32, 64)
conv2d_4 8 (3, 3, 64, 128)

filters, biases = model.layers[0].get_weights()
print(filters.shape)

fig, axs = plt.subplots(1, filters.shape[3])
fig.set_size_inches(8, 4)

for i, ax in enumerate(axs):
    ax.axis('off')
    try : ax.imshow(filters[:, :, 0, i], cmap='gray', interpolation='none')
    except IndexError: pass
    ax.set_title(f'F{i}')

plt.tight_layout()
plt.suptitle('Some of the First Layer Filters', fontsize=15)
plt.show()

(3, 3, 1, 8)

filters, biases = model.layers[2].get_weights()
print(filters.shape)

fig, axs = plt.subplots(8, 8)
fig.set_size_inches(8, 10)

for i, ax in enumerate(axs):
    for j, sub_ax in enumerate(ax):
        sub_ax.axis('off')
        sub_ax.imshow(filters[:, :, j, i], cmap='gray', interpolation='none')
        sub_ax.set_title(f'F{i}C{j}')

plt.suptitle('Some of the Second Layer Filters', fontsize=15)
plt.show()

(3, 3, 8, 16)

filters, biases = model.layers[8].get_weights()
print(filters.shape)

fig, axs = plt.subplots(8, 8)
fig.set_size_inches(8, 10)

for i, ax in enumerate(axs):
    for j, sub_ax in enumerate(ax):
        sub_ax.axis('off')
        sub_ax.imshow(filters[:, :, j, i], cmap='gray', interpolation='none')
        sub_ax.set_title(f'F{i}C{j}')

plt.suptitle('Some of the Last Layer Filters', fontsize=15)
plt.show()

(3, 3, 64, 128)

test_img = datasets['test']['images'][np.random.randint(0, datasets['test']['images'].shape[0])]
test_img = test_img.reshape(1, shape_of_image[0], shape_of_image[1], 1)
print(test_img.shape)
plt.figure(figsize=(3, 3))
plt.imshow(test_img[0], cmap='gray', interpolation='none')
plt.axis('off')
plt.title('Random Test Image')
plt.show()
print("Predicted Class:", shapes[np.argmax(model.predict(test_img))])

(1, 126, 126, 1)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step
Predicted Class: pentagon

view_layers = [2, 4, 6, 8, 9]
fig, all_axs = plt.subplots(len(view_layers), 16)
fig.set_size_inches(16, 8)

for plt_idx, layer_idx in enumerate(view_layers):
    print(layer_idx)
    temp_model = Model(inputs=model.inputs, outputs=model.layers[layer_idx].output)
    feature_maps = temp_model.predict(test_img, verbose='auto')
    print(feature_maps.shape)
    axs = all_axs[plt_idx]
    for i, ax in enumerate(axs):
        ax.axis('off')
        try : ax.imshow(feature_maps[0, :, :, i], cmap='gray', interpolation='none')
        except IndexError: pass
        ax.set_title(f'L{layer_idx}C{i}')

plt.suptitle('Feature Maps of Different Layers for the Test Image', fontsize=15)
plt.show()

2
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
(1, 60, 60, 16)
4
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 39ms/step
(1, 28, 28, 32)
6
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 43ms/step
(1, 12, 12, 64)
8
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 52ms/step
(1, 4, 4, 128)
9
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 51ms/step
(1, 2, 2, 128)

import cv2

image = cv2.imread("shapes_used.jpeg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

plt.imshow(gray, cmap='gray')
plt.title('Image containing multiple shapes')
plt.show()

_, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV) 
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

target_size = 256
padding = 10

for i, contour in enumerate(contours) :

    if cv2.contourArea(contour) < 100 : continue

    x, y, w, h = cv2.boundingRect(contour)

    x_start = max(0, x - padding)
    y_start = max(0, y - padding)
    x_end = min(image.shape[1], x + w + padding)
    y_end = min(image.shape[0], y + h + padding)

    segment = image[y_start:y_end, x_start:x_end]
    
    isolated_shape_image = np.ones((segment.shape[0], segment.shape[1], 3), dtype=np.uint8) * 255
    
    mask = np.zeros_like(gray[y_start:y_end, x_start:x_end])
    contour_offset = contour - np.array([x_start, y_start]) 
    cv2.drawContours(mask, [contour_offset], -1, 255, thickness=cv2.FILLED)
    
    if segment.shape[0] == mask.shape[0] and segment.shape[1] == mask.shape[1] :
        isolated_shape_image[mask == 255] = segment[mask == 255]
    else : continue

    isolated_shape_image = cv2.cvtColor(isolated_shape_image, cv2.COLOR_BGR2GRAY)
    h, w = isolated_shape_image.shape[:2]
    
    new_image = np.full((target_size, target_size), 255, dtype=np.uint8)

    scale = float(target_size) / max(h, w)
    new_w = round(w * scale)
    new_h = round(h * scale)
    
    if scale < 1 : interpolation = cv2.INTER_AREA
    else :         interpolation = cv2.INTER_CUBIC
        
    resized_image = cv2.resize(isolated_shape_image, (new_w, new_h), interpolation=interpolation)

    x_offset = (target_size - new_w) // 2
    y_offset = (target_size - new_h) // 2

    new_image[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized_image
    _, new_image = cv2.threshold(new_image, 200, 255, cv2.THRESH_BINARY_INV)

    new_image = Image.fromarray(new_image).resize(shape_of_image)
    print(new_image.size)

    new_image = ((255 - np.array(new_image))/255.0).astype('float16')

    plt.title(f"Segmented Image, predicted to be: {shapes[np.argmax(model.predict(new_image.reshape(1, shape_of_image[0], shape_of_image[1], 1)))]}", fontsize=10)
    plt.imshow(new_image, cmap='gray')
    plt.show()

Using CNNs to Classify Shapes¶

Load the required libraries¶

Load datset¶

Define the CNN model¶

Train the model¶

Evaluate the model¶

Visualize the filters¶

Visualize the feature maps¶

Segment and classify shapes from a image containing multiple shapes¶