# In this example, we used the TensorFlow library to load the MNIST data,
# define a model with three dense layers, compile the model, train it
# for 10 epochs, evaluate it on the test set, and make predictions on
# the test set. Finally, we plot some examples of the predictions made
# by the model.

# In this example, we used the Multi-layer Perceptron (MLP) classifier 

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()


# Normalize the pixel values to be between 0 and 1
x_train = x_train / 255
x_test = x_test / 255


# Flatten the 2D images into 1D arrays
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Convert the labels into one-hot encoded arrays
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Define the model
# The number of parameters depends on the shapes and sizes of the layers.
# In the given model, the first layer Dense(512, activation='relu',
# input_shape=(784,)) has 784 input nodes and 512 output nodes. Therefore,
# the number of parameters in this layer would be (784 * 512) + 512 = 401920,
# where the +512 term is for the bias terms.
# The second layer also has 512 input nodes and 512 output nodes, which makes
# 512512 = 262,144 parameters. The third and last layer has 512 input nodes
# and 10 output nodes, which makes 512*10 = 5,120 parameters.
#model = tf.keras.models.Sequential()
#model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,)))
#model.add(tf.keras.layers.Dense(512, activation='relu'))
#model.add(tf.keras.layers.Dense(10, activation='softmax'))

# over come overfitting by regularization
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),input_shape=(784,)),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model and record the history
history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))

# Get the weights of the Dense layer
# plot the weights as a heatmap or image, where the weights are represented
# as pixel values.
# model.layers[2].get_weights()[0] returns only the weights of the third
# layer. If you wanted to get the biases, you would use
# model.layers[2].get_weights()[1].
dense_weights = model.layers[2].get_weights()[0]

# Plot the weights as a heatmap
plt.imshow(dense_weights, cmap='coolwarm')
plt.colorbar()
plt.title('weights in the output layer')
plt.show()


# Plot loss and accuracy
plt.figure(figsize=(12, 4))

# Plot the loss during training
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)

# Make predictions on the test set
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)

# Plot some examples from the test set and their predictions
fig, axes = plt.subplots(4, 4, figsize=(14, 14))
for i, ax in enumerate(axes.ravel()):
    ax.matshow(x_test[i].reshape(28, 28), cmap='gray')
    ax.set_title("True: %d\nPredict: %d" % (np.argmax(y_test[i]), y_pred[i]))
    ax.axis("off")

plt.show()