Training a Simple Neural Network with TensorFlow

Here we use TensorFlow’s Eager Execution to build the model, so that we no longer have to create a Graph and Session as before, which makes training a neural network more convenient and faster. Below we use the Iris dataset as an example to train a neural network, with the code taken from Google’s tutorial.

Preparing the Environment

# First import the relevant libraries
from __future__ import absolute_import,division,print_function
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()	# Use eager_execution
# Check the version info and verify whether eager_execution is enabled
print('TensorFlow Version:{}'.format(tf.VERSION))
print('Eager execution:{}'.format(tf.executing_eagerly()))

TensorFlow Version:1.8.0 Eager execution:True

Loading the Dataset

# Get the dataset and display where it is saved locally
train_dataset_url='http://download.tensorflow.org/data/iris_training.csv'
train_dataset_fp=tf.keras.utils.get_file(fname=os.path.basename(train_dataset_url),origin=train_dataset_url)
print('Local copy of the dataset file:{}'.format(train_dataset_fp))

Downloading data from http://download.tensorflow.org/data/iris_training.csv 8192/2194 [================================================================================================================] - 0s 0us/step Local copy of the dataset file:C:\Users\Frank.keras\datasets\iris_training.csv

# Parse each row of the table file; each row has 5 elements, the first 4 are features and the last one is the label
def parse_csv(line):
    example_defaults=[[0.],[0.],[0.],[0.],[0]]
    parsed_line=tf.decode_csv(line,example_defaults)
    features=tf.reshape(parsed_line[:-1],shape=(4,))
    label=tf.reshape(parsed_line[-1],shape=())
    return features,label

train_dataset=tf.data.TextLineDataset(train_dataset_fp)	# Read the csv and convert it to a dataset
train_dataset=train_dataset.skip(1)	# Skip the header row
train_dataset=train_dataset.map(parse_csv)	# Apply the mapping to each row
train_dataset=train_dataset.shuffle(buffer_size=1000)	# Shuffle randomly
train_dataset=train_dataset.batch(32)	# Split into batches

# Print one batch of training data
features,label=iter(train_dataset).next()
print('example features:',features[0])
print('example label:',label[0])

example features: tf.Tensor([6.8 3. 5.5 2.1], shape=(4,), dtype=float32) example label: tf.Tensor(2, shape=(), dtype=int32)

Building and Training the Model

# Build the neural network model, with two hidden layers
model=tf.keras.Sequential([
    tf.keras.layers.Dense(10,activation='relu',input_shape=(4,)),
    tf.keras.layers.Dense(10,activation='relu'),
    tf.keras.layers.Dense(3)
])

# Define the loss function as the cross_entropy after softmax, returning a loss function object
def loss(model,x,y):
    y_=model(x)
    return tf.losses.sparse_softmax_cross_entropy(labels=y,logits=y_)

# Return a gradient object
def grad(model,inputs,targets):
    with tf.GradientTape() as tape:
        loss_value=loss(model,inputs,targets)
    return tape.gradient(loss_value,model.variables)	# Return the gradient object, passing the loss function and the optimization target as constructor arguments

optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.01)

train_loss_results=[]
train_accuracy_results=[]
num_epochs=201

# The optimization process iterates 201 times
for epoch in range(num_epochs):
    epoch_loss_avg=tfe.metrics.Mean()	# Object for the average cross-entropy error
    epoch_accuracy=tfe.metrics.Accuracy()	# Accuracy object

    for x,y in train_dataset:
        grads=grad(model,x,y)
        optimizer.apply_gradients(zip(grads, model.variables),	# Group the gradients with their corresponding model variables
                              global_step=tf.train.get_or_create_global_step())
        epoch_loss_avg(loss(model,x,y))
        epoch_accuracy(tf.argmax(model(x),axis=1,output_type=tf.int32),y)
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())
    if epoch % 50 == 0:
        print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch,epoch_loss_avg.result(),epoch_accuracy.result()

Epoch 000: Loss: 1.217, Accuracy: 30.833% Epoch 050: Loss: 0.524, Accuracy: 93.333% Epoch 100: Loss: 0.261, Accuracy: 96.667% Epoch 150: Loss: 0.169, Accuracy: 97.500% Epoch 200: Loss: 0.133, Accuracy: 97.500%

# Visualize the loss function and accuracy during training
fig,axes=plt.subplots(2,sharex=True,figsize=(12,8))
fig.suptitle('Training Metrics')
axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)

plt.show()

Evaluation and Prediction

# Test the model's performance on the test set
test_url = "http://download.tensorflow.org/data/iris_test.csv"

test_fp = tf.keras.utils.get_file(fname=os.path.basename(test_url),
                                  origin=test_url)

test_dataset = tf.data.TextLineDataset(test_fp)
test_dataset = test_dataset.skip(1)
test_dataset = test_dataset.map(parse_csv)
test_dataset = test_dataset.shuffle(1000)
test_dataset = test_dataset.batch(32)

test_accuracy = tfe.metrics.Accuracy()

for (x, y) in test_dataset:
  prediction = tf.argmax(model(x), axis=1, output_type=tf.int32)
  test_accuracy(prediction, y)

print("Test set accuracy: {:.3%}".format(test_accuracy.result()))

Test set accuracy: 100.000%

# Use the model to make predictions
class_ids = ["Iris setosa", "Iris versicolor", "Iris virginica"]
predict_dataset = tf.convert_to_tensor([
    [5.1, 3.3, 1.7, 0.5,],
    [5.9, 3.0, 4.2, 1.5,],
    [6.9, 3.1, 5.4, 2.1]
])

predictions = model(predict_dataset)

for i, logits in enumerate(predictions):
  class_idx = tf.argmax(logits).numpy()
  name = class_ids[class_idx]
  print("Example {} prediction: {}".format(i, name))

Example 0 prediction: Iris setosa Example 1 prediction: Iris versicolor Example 2 prediction: Iris virginica