¿Cómo aplicar Drop Out en Tensorflow para mejorar la precisión de la red neuronal?
La deserción es una técnica de regularización. Y quiero aplicarlo a los datos de notMNIST para reducir el exceso de ajuste para terminar mi asignación de Curso de Aprendizaje Profundo de Udacity.He leído los documentos de tensorflow sobre cómo llamar al tf.nn.dropout
. Y aquí está mi código
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
# Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])
# ReLU neuron
# param
training_epochs = 30
batch_size = 521
display_step = 1
n_input = 784 # img shape: 28*28
n_classes = 10 # MNIST total classes (0-9 digits)
# hyper-parameter
n_hidden_1 = 256
learning_rate = 0.05
lambda_term = 0.01
graph = tf.Graph()
with graph.as_default():
# init weights
weights_hiden = tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=np.sqrt(n_input)))
weights_out = tf.Variable(tf.random_normal([n_hidden_1, n_classes], stddev=np.sqrt(n_hidden_1)))
biases_hidden = tf.Variable(tf.random_normal([n_hidden_1]))
biases_out = tf.Variable(tf.random_normal([n_classes]))
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
def model(x, weights_hiden, weights_out, biases_hidden, biases_out):
# hidden layer with RELU activation
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
# apply DropOut to hidden layer
keep_prob = tf.placeholder(tf.float32) # DROP-OUT here
drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here
# output layer with linear activation
out_layer = tf.matmul(layer_1, weights_out) + biases_out
return out_layer
# Construct model
pred = model(x, weights_hiden, weights_out, biases_hidden, biases_out)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y) +
lambda_term * tf.nn.l2_loss(weights_hiden) +
lambda_term * tf.nn.l2_loss(weights_out) +
lambda_term * tf.nn.l2_loss(biases_hidden) +
lambda_term * tf.nn.l2_loss(biases_out))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# run the graph
with tf.Session(graph=graph) as sess:
tf.initialize_all_variables().run()
print('Initialized')
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(train_dataset.shape[0]/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x = train_dataset[(i*batch_size):((i*batch_size) + batch_size), :]
batch_y = train_labels[(i*batch_size):((i*batch_size) + batch_size), :]
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels}))
print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels}))
El tf.nn.dropout
se llama en función model()
, pero después de aplicar la técnica de abandono a la red neuronal, la precisión parecía cualquier cambio, aquí está el resultado:
Epoch: 0001 cost= 579980.086977807
Epoch: 0002 cost= 238859.802382506
Epoch: 0003 cost= 90672.733752856
Epoch: 0004 cost= 32649.040985028
Epoch: 0005 cost= 11325.878361874
Epoch: 0006 cost= 3866.805511076
Epoch: 0007 cost= 1357.785540469
Epoch: 0008 cost= 519.381747333
Epoch: 0009 cost= 225.359804119
Epoch: 0010 cost= 110.099476707
Epoch: 0011 cost= 55.212384386
Epoch: 0012 cost= 28.469241683
Epoch: 0013 cost= 14.511494627
Epoch: 0014 cost= 6.567228943
Epoch: 0015 cost= 3.186372240
Epoch: 0016 cost= 1.701917576
Epoch: 0017 cost= 1.041632473
Epoch: 0018 cost= 0.843376874
Epoch: 0019 cost= 0.786183911
Epoch: 0020 cost= 0.775412846
Epoch: 0021 cost= 0.782965020
Epoch: 0022 cost= 0.796788171
Epoch: 0023 cost= 0.814522117
Epoch: 0024 cost= 0.832090579
Epoch: 0025 cost= 0.849197715
Epoch: 0026 cost= 0.867473578
Epoch: 0027 cost= 0.889561496
Epoch: 0028 cost= 0.921837020
Epoch: 0029 cost= 16.655304543
Epoch: 0030 cost= 1.421570476
Optimization Finished!
Test data accuracy: 0.8775
Valid data accuracy: 0.8069
¿Cómo puedo aplicar DropOut de Tensorflow para mejorar la precisión de la red? ¡Gracias!
2 answers
En el gráfico, sugeriría mover keep_prob = tf.placeholder(tf.float32)
fuera de la función model
para hacerla global.
with graph.as_default():
...
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
keep_prob = tf.placeholder(tf.float32)
def model(x, weights_hiden, weights_out, biases_hidden, biases_out, keep_prob):
# hidden layer with RELU activation
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
# apply DropOut to hidden layer
drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here
# output layer with linear activation
out_layer = tf.matmul(drop_out, weights_out) + biases_out
return out_layer
...
Al ejecutar session
, alimentar un valor deseado keep_prob
durante el tiempo de entrenamiento, y alimentar 1.0 a keep_prob
durante el tiempo de referencia (validación y/o pruebas).
# run the graph
with tf.Session(graph=graph) as sess:
tf.initialize_all_variables().run()
...
for epoch in range(training_epochs):
...
for i in range(total_batch):
batch_x = ...
batch_y = ...
# Run optimization op (backprop) and cost op (to get loss value)
# Feed a value < 1.0 for keep prob during training
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob : 0.5})
...
# Feed 1.0 for keep prob during testing
print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels, keep_prob : 1.0}))
print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels, keep_prob : 1.0}))
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/ajaxhispano.com/template/agent.layouts/content.php on line 61
2017-07-19 01:07:59
El punto clave aquí es que:
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
# apply DropOut to hidden layer
keep_prob = tf.placeholder(tf.float32) # DROP-OUT here
drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here
# output layer with linear activation
out_layer = tf.matmul(layer_1, weights_out) + biases_out
Se convierte en:
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden))
# apply DropOut to hidden layer
drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here
# output layer with linear activation
out_layer = tf.matmul(drop_out, weights_out) + biases_out
Donde drop_out se usa en la línea final como opuesto a layer_1. Ya que esto ignoraría la línea de abandono.
Warning: date(): Invalid date.timezone value 'Europe/Kyiv', we selected the timezone 'UTC' for now. in /var/www/agent_stack/data/www/ajaxhispano.com/template/agent.layouts/content.php on line 61
2017-08-01 13:54:17