# In this example, we used the TensorFlow library to load the MNIST data, # define a model with three dense layers, compile the model, train it # for 10 epochs, evaluate it on the test set, and make predictions on # the test set. Finally, we plot some examples of the predictions made # by the model. # In this example, we used the Multi-layer Perceptron (MLP) classifier import tensorflow as tf import numpy as np import matplotlib.pyplot as plt (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Normalize the pixel values to be between 0 and 1 x_train = x_train / 255 x_test = x_test / 255 # Flatten the 2D images into 1D arrays x_train = x_train.reshape(x_train.shape[0], -1) x_test = x_test.reshape(x_test.shape[0], -1) # Convert the labels into one-hot encoded arrays y_train = tf.keras.utils.to_categorical(y_train, num_classes=10) y_test = tf.keras.utils.to_categorical(y_test, num_classes=10) # Define the model # The number of parameters depends on the shapes and sizes of the layers. # In the given model, the first layer Dense(512, activation='relu', # input_shape=(784,)) has 784 input nodes and 512 output nodes. Therefore, # the number of parameters in this layer would be (784 * 512) + 512 = 401920, # where the +512 term is for the bias terms. # The second layer also has 512 input nodes and 512 output nodes, which makes # 512512 = 262,144 parameters. The third and last layer has 512 input nodes # and 10 output nodes, which makes 512*10 = 5,120 parameters. #model = tf.keras.models.Sequential() #model.add(tf.keras.layers.Dense(512, activation='relu', input_shape=(784,))) #model.add(tf.keras.layers.Dense(512, activation='relu')) #model.add(tf.keras.layers.Dense(10, activation='softmax')) # over come overfitting by regularization model = tf.keras.models.Sequential([ tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),input_shape=(784,)), tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)), tf.keras.layers.Dense(10, activation='softmax') ]) # Compile the model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Train the model and record the history history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test)) # Get the weights of the Dense layer # plot the weights as a heatmap or image, where the weights are represented # as pixel values. # model.layers[2].get_weights()[0] returns only the weights of the third # layer. If you wanted to get the biases, you would use # model.layers[2].get_weights()[1]. dense_weights = model.layers[2].get_weights()[0] # Plot the weights as a heatmap plt.imshow(dense_weights, cmap='coolwarm') plt.colorbar() plt.title('weights in the output layer') plt.show() # Plot loss and accuracy plt.figure(figsize=(12, 4)) # Plot the loss during training plt.subplot(1, 2, 1) plt.plot(history.history['loss'], label='training loss') plt.plot(history.history['val_loss'], label='validation loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.subplot(1, 2, 2) plt.plot(history.history['accuracy']) plt.plot(history.history['val_accuracy']) plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() plt.show() # Evaluate the model on the test set test_loss, test_acc = model.evaluate(x_test, y_test) print('Test accuracy:', test_acc) # Make predictions on the test set y_pred = model.predict(x_test) y_pred = np.argmax(y_pred, axis=1) # Plot some examples from the test set and their predictions fig, axes = plt.subplots(4, 4, figsize=(14, 14)) for i, ax in enumerate(axes.ravel()): ax.matshow(x_test[i].reshape(28, 28), cmap='gray') ax.set_title("True: %d\nPredict: %d" % (np.argmax(y_test[i]), y_pred[i])) ax.axis("off") plt.show()