# convolutional neural network (CNN) # import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.regularizers import l2 import matplotlib.pyplot as plt import numpy as np # Load the MNIST Fashion dataset (x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data() # Normalize pixel values to between 0 and 1 x_train = x_train.astype("float32") / 255.0 x_test = x_test.astype("float32") / 255.0 # Define the CNN model # The first layer is a Conv2D layer with 32 filters of size 3x3 and a ReLU # activation function. The input_shape parameter is specified as (28, 28, 1), # which means that the layer expects a 2D image of size 28x28 with one color # channel as input. This layer applies 32 filters to the input image and # generates 32 feature maps. # The second layer is a MaxPooling2D layer with a pool size of 2x2. This # layer downsamples the output of the previous layer by taking the maximum # value in each 2x2 window. This reduces the spatial dimensions of the feature # maps and makes the model more efficient. # The third layer is a Flatten layer. This layer takes the output of the # previous layer and flattens it into a 1D array. This prepares the data for # input into the fully connected layers of the network. # The fourth layer is a Dense layer with 10 units and a softmax activation # function. This layer is the output layer of the model and produces a # probability distribution over the 10 possible classes of the input image. model = keras.Sequential( [ layers.Conv2D(32, (3, 3), activation="relu", input_shape=(28, 28, 1)), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)), #keras.layers.Dense(10) layers.Dense(10, activation="softmax"), ] ) # Compile the model model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) # Train the model history = model.fit(x_train.reshape(-1, 28, 28, 1), y_train, epochs=25, batch_size=32, validation_split=0.2) # Evaluate the model on the test set test_loss, test_acc = model.evaluate(x_test.reshape(-1, 28, 28, 1), y_test) print("Test accuracy:", test_acc) # Plot the training and validation accuracy and loss over time plt.figure(figsize=(10, 4)) plt.subplot(1, 2, 1) plt.plot(history.history["accuracy"]) plt.plot(history.history["val_accuracy"]) plt.title("Model accuracy") plt.ylabel("Accuracy") plt.xlabel("Epoch") plt.legend(["Train", "Validation"], loc="lower right") plt.subplot(1, 2, 2) plt.plot(history.history["loss"]) plt.plot(history.history["val_loss"]) plt.title("Model loss") plt.ylabel("Loss") plt.xlabel("Epoch") plt.legend(["Train", "Validation"], loc="upper right") plt.show() # Plot a confusion matrix of the test set predictions test_preds = np.argmax(model.predict(x_test.reshape(-1, 28, 28, 1)), axis=1) conf_mat = tf.math.confusion_matrix(y_test, test_preds) plt.imshow(conf_mat, cmap="Blues") plt.xlabel("Predicted labels") plt.ylabel("True labels") plt.xticks(np.arange(10)) plt.yticks(np.arange(10)) plt.colorbar() plt.show() # Plot a random sample of test set images and their predicted labels random_indices = np.random.choice(x_test.shape[0], 16, replace=False) test_preds = np.argmax(model.predict(x_test.reshape(-1, 28, 28, 1)), axis=1) plt.figure(figsize=(10, 10)) for i, index in enumerate(random_indices): plt.subplot(4, 4, i+1) plt.xticks([]) plt.yticks([]) plt.grid(False) plt.imshow(x_test[index], cmap=plt.cm.binary) plt.xlabel(f"Predicted: {test_preds[index]}\nTrue: {y_test[index]}") plt.show()