# convolutional neural network (CNN)
#
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
import numpy as np

# Load the MNIST Fashion dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Normalize pixel values to between 0 and 1
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Define the CNN model
# The first layer is a Conv2D layer with 32 filters of size 3x3 and a ReLU
# activation function. The input_shape parameter is specified as (28, 28, 1),
# which means that the layer expects a 2D image of size 28x28 with one color
# channel as input. This layer applies 32 filters to the input image and
# generates 32 feature maps.
# The second layer is a MaxPooling2D layer with a pool size of 2x2. This
# layer downsamples the output of the previous layer by taking the maximum
# value in each 2x2 window. This reduces the spatial dimensions of the feature
# maps and makes the model more efficient.
# The third layer is a Flatten layer. This layer takes the output of the
# previous layer and flattens it into a 1D array. This prepares the data for
# input into the fully connected layers of the network.
# The fourth layer is a Dense layer with 10 units and a softmax activation
# function. This layer is the output layer of the model and produces a
# probability distribution over the 10 possible classes of the input image.
model = keras.Sequential(
    [
        layers.Conv2D(32, (3, 3), activation="relu", input_shape=(28, 28, 1)),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        keras.layers.Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
        #keras.layers.Dense(10)
        layers.Dense(10, activation="softmax"),
    ]
)

# Compile the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
history = model.fit(x_train.reshape(-1, 28, 28, 1), y_train, epochs=25, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test.reshape(-1, 28, 28, 1), y_test)
print("Test accuracy:", test_acc)

# Plot the training and validation accuracy and loss over time
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("Model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="lower right")

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("Model loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Validation"], loc="upper right")

plt.show()

# Plot a confusion matrix of the test set predictions
test_preds = np.argmax(model.predict(x_test.reshape(-1, 28, 28, 1)), axis=1)
conf_mat = tf.math.confusion_matrix(y_test, test_preds)
plt.imshow(conf_mat, cmap="Blues")
plt.xlabel("Predicted labels")
plt.ylabel("True labels")
plt.xticks(np.arange(10))
plt.yticks(np.arange(10))
plt.colorbar()
plt.show()

# Plot a random sample of test set images and their predicted labels
random_indices = np.random.choice(x_test.shape[0], 16, replace=False)
test_preds = np.argmax(model.predict(x_test.reshape(-1, 28, 28, 1)), axis=1)
plt.figure(figsize=(10, 10))
for i, index in enumerate(random_indices):
    plt.subplot(4, 4, i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_test[index], cmap=plt.cm.binary)
    plt.xlabel(f"Predicted: {test_preds[index]}\nTrue: {y_test[index]}")
plt.show()