所以我尝试以类似于这个的方式在MNIST数据集上实现一个卷积神经网络: https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
然而,在这样做的时候,我注意到不知为何我的第二个max_pool操作没有执行。另外,我不明白上述链接中的代码是如何工作的,更具体地说,nn_layer方法是如何被重复使用的,因为权重只存在于那个作用域内,并且调用两次会改变它们吗?
我的代码如下:
import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_dataimport os from tensorflow.contrib.tensorboard.plugins import projectorcurrent_path = os.path.dirname(os.path.realpath(__file__))current_path = current_path+"/logs"def train(): mnist = input_data.read_data_sets("MNIST_data", one_hot = True) def initializer(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def conv2d(x,W): return tf.nn.conv2d(x , W , [1,1,1,1] , padding="SAME") def max_pool(x): return tf.nn.max_pool(x , [1,2,2,1] , [1,2,2,1] , padding="SAME") def conv_layer(x,length,width,input_channels,output_channels,layer_name,act=tf.nn.relu): with tf.name_scope(layer_name): with tf.name_scope('weights'): weights = initializer([length,width,input_channels,output_channels]) tf.summary.histogram(layer_name+"_weights",weights) with tf.name_scope('biases'): biases = initializer([output_channels]) tf.summary.histogram(layer_name+"_biases",biases) with tf.name_scope('activations'): activations = act(conv2d(x,weights) + biases) activations = max_pool(activations) tf.summary.histogram(layer_name+"_activations",activations) return activations def dense_layer(x,input_size,output_size,layer_name,act=tf.nn.relu): with tf.name_scope(layer_name): with tf.name_scope('weights'): weights = initializer([input_size,output_size]) tf.summary.histogram(layer_name+"_weights",weights) with tf.name_scope('biases'): biases = initializer([output_size]) tf.summary.histogram(layer_name+"_biases",biases) with tf.name_scope('activations'): activations = act(tf.matmul(x,weights) + biases) tf.summary.histogram(layer_name+"_activations",activations) return activations def dropout(x,keep_prob): with tf.name_scope('Dropout'): dropped =tf.nn.dropout(x,keep_prob) return dropped with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None,784],name='image_inputs') y = tf.placeholder(tf.float32, [None,10],name='image_labels') keep_prob = tf.placeholder(tf.float32,name='keep_probability') with tf.name_scope('input_reshape'): x_image = tf.reshape(x , [-1,28,28,1]) tf.summary.image('input',x_image,50) h1 = conv_layer(x_image,3,3,1,32,"first_convolution_layer") h2 = conv_layer(h1,3,3,32,64,"second_convolution_layer") h2 = tf.reshape(h1,[-1,7*7*64]) h2 = dropout(h2,keep_prob) h3 = dense_layer(h2,7*7*64,1024,"first_dense_layer") h3 = dropout(h3,keep_prob) h4 = dense_layer(h3,1024,1024,"second_dense_layer") h4 = dropout(h4,keep_prob) h_out = dense_layer(h4,1024,10,"output_dense_layer",act=tf.nn.sigmoid) with tf.name_scope("Loss"): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=h_out)) tf.summary.scalar('Loss',cost) train = tf.train.AdamOptimizer().minimize(cost) with tf.name_scope("Accuracy"): correct_pred = tf.equal(tf.argmax(h_out, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) summary = tf.summary.merge_all() init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(current_path, sess.graph) for i in range(500): batch = mnist.train.next_batch(500) if(i%100 == 0): summary_str = sess.run(summary,feed_dict={x:batch[0], y:batch[1], keep_prob:1.0}) summary_writer.add_summary(summary_str, i) summary_writer.flush() train_accuracy = accuracy.eval(feed_dict={x:batch[0], y:batch[1], keep_prob:1.0}) saver.save(sess, os.path.join(current_path,'model.ckpt'), i) print("Step %d Training Accuracy: %f" %((i/100 + 1), train_accuracy)) train.run(feed_dict={x:batch[0], y:batch[1], keep_prob:0.5}) sum=0.0 for i in range(10): batch_x = mnist.test.images[(i*1000):((i+1)*1000)-1] batch_y = mnist.test.labels[(i*1000):((i+1)*1000)-1] sum = sum + accuracy.eval(feed_dict={x:batch_x, y:batch_y, keep_prob:1.0}) print("Test Accuracy: %f" %(sum/10.0))if tf.gfile.Exists(current_path): tf.gfile.DeleteRecursively(current_path)tf.gfile.MakeDirs(current_path)train()
回答:
这是一个简单的打字错误。
将这个
h2 = tf.reshape(h1,[-1,7*7*64])
改为这个
h2 = tf.reshape(h2,[-1,7*7*64])
错误
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[1000,10] labels_size=[500,10] [[Node: Loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Loss/Reshape, Loss/Reshape_1)]]
就消失了。