¿Cómo aplicar Drop Out en Tensorflow para mejorar la precisión de la red neuronal?


La deserción es una técnica de regularización. Y quiero aplicarlo a los datos de notMNIST para reducir el exceso de ajuste para terminar mi asignación de Curso de Aprendizaje Profundo de Udacity.He leído los documentos de tensorflow sobre cómo llamar al tf.nn.dropout. Y aquí está mi código

# before proceeding further.
from __future__ import print_function
import numpy as np  
import tensorflow as tf
from six.moves import cPickle as pickle


pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)


image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels

    train_dataset, train_labels = reformat(train_dataset, train_labels)
    valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
    test_dataset, test_labels = reformat(test_dataset, test_labels)
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

    def accuracy(predictions, labels):
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))  / predictions.shape[0])


# ReLU neuron
# param
training_epochs = 30
batch_size = 521
display_step = 1
n_input = 784 # img shape: 28*28
n_classes = 10 # MNIST total classes (0-9 digits)

# hyper-parameter
n_hidden_1 = 256 
learning_rate = 0.05
lambda_term = 0.01


graph = tf.Graph()
with graph.as_default():
    # init weights
    weights_hiden =  tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=np.sqrt(n_input)))
    weights_out = tf.Variable(tf.random_normal([n_hidden_1, n_classes], stddev=np.sqrt(n_hidden_1)))

    biases_hidden = tf.Variable(tf.random_normal([n_hidden_1]))
    biases_out = tf.Variable(tf.random_normal([n_classes]))

    x = tf.placeholder("float", [None, n_input])
    y = tf.placeholder("float", [None, n_classes])

    def model(x, weights_hiden, weights_out, biases_hidden, biases_out):
        # hidden layer with RELU activation
        layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
        # apply DropOut to hidden layer
        keep_prob = tf.placeholder(tf.float32)  # DROP-OUT here
        drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here
        # output layer with linear activation
        out_layer = tf.matmul(layer_1, weights_out) + biases_out
        return out_layer

    # Construct model
    pred = model(x, weights_hiden, weights_out, biases_hidden, biases_out)

    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y) +
                          lambda_term * tf.nn.l2_loss(weights_hiden) + 
                          lambda_term * tf.nn.l2_loss(weights_out) +
                          lambda_term * tf.nn.l2_loss(biases_hidden) + 
                          lambda_term * tf.nn.l2_loss(biases_out))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)


# run the graph
with tf.Session(graph=graph) as sess:
    tf.initialize_all_variables().run()
    print('Initialized')
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(train_dataset.shape[0]/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x = train_dataset[(i*batch_size):((i*batch_size) + batch_size), :]
            batch_y = train_labels[(i*batch_size):((i*batch_size) + batch_size), :]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels}))
    print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels}))

El tf.nn.dropout se llama en función model(), pero después de aplicar la técnica de abandono a la red neuronal, la precisión parecía cualquier cambio, aquí está el resultado:

Epoch: 0001 cost= 579980.086977807
Epoch: 0002 cost= 238859.802382506
Epoch: 0003 cost= 90672.733752856
Epoch: 0004 cost= 32649.040985028
Epoch: 0005 cost= 11325.878361874
Epoch: 0006 cost= 3866.805511076
Epoch: 0007 cost= 1357.785540469
Epoch: 0008 cost= 519.381747333
Epoch: 0009 cost= 225.359804119
Epoch: 0010 cost= 110.099476707
Epoch: 0011 cost= 55.212384386
Epoch: 0012 cost= 28.469241683
Epoch: 0013 cost= 14.511494627
Epoch: 0014 cost= 6.567228943
Epoch: 0015 cost= 3.186372240
Epoch: 0016 cost= 1.701917576
Epoch: 0017 cost= 1.041632473
Epoch: 0018 cost= 0.843376874
Epoch: 0019 cost= 0.786183911
Epoch: 0020 cost= 0.775412846
Epoch: 0021 cost= 0.782965020
Epoch: 0022 cost= 0.796788171
Epoch: 0023 cost= 0.814522117
Epoch: 0024 cost= 0.832090579
Epoch: 0025 cost= 0.849197715
Epoch: 0026 cost= 0.867473578
Epoch: 0027 cost= 0.889561496
Epoch: 0028 cost= 0.921837020
Epoch: 0029 cost= 16.655304543
Epoch: 0030 cost= 1.421570476
Optimization Finished!
Test data accuracy: 0.8775
Valid data accuracy: 0.8069

¿Cómo puedo aplicar DropOut de Tensorflow para mejorar la precisión de la red? ¡Gracias!

Author: GoingMyWay, 2016-11-30

2 answers

En el gráfico, sugeriría mover keep_prob = tf.placeholder(tf.float32) fuera de la función model para hacerla global.

with graph.as_default():
    ...
    x = tf.placeholder("float", [None, n_input])
    y = tf.placeholder("float", [None, n_classes])
    keep_prob = tf.placeholder(tf.float32)

    def model(x, weights_hiden, weights_out, biases_hidden, biases_out, keep_prob):
        # hidden layer with RELU activation
        layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
        # apply DropOut to hidden layer
        drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here
        # output layer with linear activation
        out_layer = tf.matmul(drop_out, weights_out) + biases_out
        return out_layer
    ...

Al ejecutar session, alimentar un valor deseado keep_prob durante el tiempo de entrenamiento, y alimentar 1.0 a keep_prob durante el tiempo de referencia (validación y/o pruebas).

# run the graph
with tf.Session(graph=graph) as sess:
    tf.initialize_all_variables().run()
    ...
    for epoch in range(training_epochs):
        ...
        for i in range(total_batch):
            batch_x = ...
            batch_y = ...
            # Run optimization op (backprop) and cost op (to get loss value)
            # Feed a value < 1.0 for keep prob during training
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob : 0.5})
    ...
    # Feed 1.0 for keep prob during testing
    print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels, keep_prob : 1.0}))
    print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels, keep_prob : 1.0}))
 40
Author: Zhongyu Kuang,
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/ajaxhispano.com/template/agent.layouts/content.php on line 61
2017-07-19 01:07:59

El punto clave aquí es que:

    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
    # apply DropOut to hidden layer
    keep_prob = tf.placeholder(tf.float32)  # DROP-OUT here
    drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here
    # output layer with linear activation
    out_layer = tf.matmul(layer_1, weights_out) + biases_out

Se convierte en:

    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
    # apply DropOut to hidden layer
    drop_out = tf.nn.dropout(layer_1, keep_prob)  # DROP-OUT here
    # output layer with linear activation
    out_layer = tf.matmul(drop_out, weights_out) + biases_out

Donde drop_out se usa en la línea final como opuesto a layer_1. Ya que esto ignoraría la línea de abandono.

 8
Author: James Shiztar,
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/ajaxhispano.com/template/agent.layouts/content.php on line 61
2017-08-01 13:54:17