Training a Simple Neural Network with TensorFlow

Context (title): Training a Simple Neural Network with TensorFlow

In this post, we use TensorFlow’s Eager Execution to build a model, which eliminates the need to create Graphs and Sessions as before, making neural network training more convenient and faster. Below, we use the Iris dataset as an example to train a neural network. The code is from Google’s tutorial.

# First, import the necessary libraries
from __future__ import absolute_import, division, print_function
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()  # Use eager_execution
# Check version information and whether eager_execution is enabled
print('TensorFlow Version:{}'.format(tf.VERSION))
print('Eager execution:{}'.format(tf.executing_eagerly()))

TensorFlow Version:1.8.0
Eager execution:True

# Get the dataset and display the local save location
train_dataset_url = 'http://download.tensorflow.org/data/iris_training.csv'
train_dataset_fp = tf.keras.utils.get_file(fname=os.path.basename(train_dataset_url), origin=train_dataset_url)
print('Local copy of the dataset file:{}'.format(train_dataset_fp))

Downloading data from http://download.tensorflow.org/data/iris_training.csv
8192/2194 [================================================================================================================] - 0s 0us/step
Local copy of the dataset file: C:\Users\Frank.keras\datasets\iris_training.csv

# Parse each row in the table file, each row has 5 elements, the first 4 are features, the last one is the label
def parse_csv(line):
    example_defaults = [[0.], [0.], [0.], [0.], [0]]
    parsed_line = tf.decode_csv(line, example_defaults)
    features = tf.reshape(parsed_line[:-1], shape=(4,))
    label = tf.reshape(parsed_line[-1], shape=())
    return features, label

train_dataset = tf.data.TextLineDataset(train_dataset_fp)  # Read csv and convert to dataset
train_dataset = train_dataset.skip(1)  # Skip the header row
train_dataset = train_dataset.map(parse_csv)  # Map each row
train_dataset = train_dataset.shuffle(buffer_size=1000)  # Shuffle randomly
train_dataset = train_dataset.batch(32)  # Batch

# Print a set of training data
features, label = iter(train_dataset).next()
print('example features:', features[0])
print('example label:', label[0])

example features: tf.Tensor([6.8 3. 5.5 2.1], shape=(4,), dtype=float32)
example label: tf.Tensor(2, shape=(), dtype=int32)

# Build a neural network model with two hidden layers
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(3)
])

# Define the loss function as softmax cross-entropy, returning a loss function object
def loss(model, x, y):
    y_ = model(x)
    return tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)

# Return a gradient object
def grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets)
    return tape.gradient(loss_value, model.variables)  # Return gradient object, pass loss function and optimization object as parameters to the constructor

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

train_loss_results = []
train_accuracy_results = []
num_epochs = 201

# Iterate the optimization process 201 times
for epoch in range(num_epochs):
    epoch_loss_avg = tfe.metrics.Mean()  # Average error object for cross-entropy
    epoch_accuracy = tfe.metrics.Accuracy()  # Accuracy object

    for x, y in train_dataset:
        grads = grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.variables),  # Group gradients with corresponding model variables
                                  global_step=tf.train.get_or_create_global_step())
        epoch_loss_avg(loss(model, x, y))
        epoch_accuracy(tf.argmax(model(x), axis=1, output_type=tf.int32), y)
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())
    if epoch % 50 == 0:
        print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, epoch_loss_avg.result(), epoch_accuracy.result()))

Epoch 000: Loss: 1.217, Accuracy: 30.833%
Epoch 050: Loss: 0.524, Accuracy: 93.333%
Epoch 100: Loss: 0.261, Accuracy: 96.667%
Epoch 150: Loss: 0.169, Accuracy: 97.500%
Epoch 200: Loss: 0.133, Accuracy: 97.500%

# Visualize the training process loss function and accuracy
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')
axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)

plt.show()

# Test model performance on the test set
test_url = "http://download.tensorflow.org/data/iris_test.csv"

test_fp = tf.keras.utils.get_file(fname=os.path.basename(test_url),
                                  origin=test_url)

test_dataset = tf.data.TextLineDataset(test_fp)
test_dataset = test_dataset.skip(1)
test_dataset = test_dataset.map(parse_csv)
test_dataset = test_dataset.shuffle(1000)
test_dataset = test_dataset.batch(32)

test_accuracy = tfe.metrics.Accuracy()

for (x, y) in test_dataset:
  prediction = tf.argmax(model(x), axis=1, output_type=tf.int32)
  test_accuracy(prediction, y)

print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

Test set accuracy: 100.000%

# Use the model to make predictions
class_ids = ["Iris setosa", "Iris versicolor", "Iris virginica"]
predict_dataset = tf.convert_to_tensor([
    [5.1, 3.3, 1.7, 0.5,],
    [5.9, 3.0, 4.2, 1.5,],
    [6.9, 3.1, 5.4, 2.1]
])

predictions = model(predict_dataset)

for i, logits in enumerate(predictions):
  class_idx = tf.argmax(logits).numpy()
  name = class_ids[class_idx]
  print("Example {} prediction: {}".format(i, name))

Example 0 prediction: Iris setosa
Example 1 prediction: Iris versicolor
Example 2 prediction: Iris virginica