1500字范文 > DL-3利用MNIST搭建神经网络模型（三种方法）：1.用CNN 2.用CNN+RNN 3.用自编码网络autoencoder

DL-3利用MNIST搭建神经网络模型（三种方法）：1.用CNN 2.用CNN+RNN 3.用自编码网络autoencoder

时间：2024-03-11 05:57:40

`Author：吾爱北方的母老虎`

`原创链接：/weixin_41010198/article/details/80286216`

import tensorflow as tfimport numpy as np

MNIST的分裂问题

加载数据构建回归模型训练模型模型评估训练过程的可视化

逻辑回归构架MNIST的简单网络，其实相当于是是一个感知机，只有一个隐藏层

1.加载数据

from tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("/tmp/data/",one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.Extracting /tmp/data/train-images-idx3-ubyte.gzSuccessfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.Extracting /tmp/data/train-labels-idx1-ubyte.gzSuccessfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.Extracting /tmp/data/t10k-images-idx3-ubyte.gzSuccessfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.Extracting /tmp/data/t10k-labels-idx1-ubyte.gz

2. 构建回归模型

# 每一张手写数字的大小都是28X28=784 pixelx = tf.placeholder(tf.float32,[None,784])W = tf.Variable(tf.zeros([784,10]))b = tf.Variable(tf.zeros([10]))y = tf.matmul(x,W)+b# 预测值，定义了一个回归模型

# 定义损失函数和优化器y_ = tf.placeholder(tf.float32,[None,10]) # 输入真实值的占位符 # 梯度下降以0.5的学习率最小化交叉熵cross_entropcross_entrop = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y,labels=y_)) # 采用SGD作为优化器train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entrop)

3. 训练模型

# 这里采用交互式的初始化变量，请忽略其与tf.Session()的区别sess = tf.InteractiveSession()tf.global_variables_initializer().run()# 或者写成下面也是可以的# init = tf.global_variables_initializer()# sess.run(inti)# 下面采用的是批梯度下降的，每次循环遍历100个样例（数据点），来替换之前的占位符# train for _ in range(1000):batch_xs,batch_ys = mnist.train.next_batch(100)sess.run(train_step,feed_dict={x:batch_xs,y_:batch_ys})

4. 评估模型

tf.argmax(vector, 1)：返回的是vector中的最大值的索引号，如果vector是一个向量，那就返回一个值，如果是一个矩阵，那就返回一个向量，这个向量的每一个维度都是相对应矩阵行的最大值元素的索引号。

Markdown插入代码的时候用的符号不是单引号，是~键上的那个斜撇号

"`代码块`"

``` c 加一个c会有高亮显示

输出结果： [4] [2 1]

# tf.argmax(y,1)返回的是模型对任一输入x预测到的表机制， tf.argmax(y_,1)代表正确的标记值correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1)) # 计算预测值和真实值# 布尔型转为浮点型，并取平均值得到准确率accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) # 计算在测试集上准确率print(sess.run(accuracy,feed_dict={x:mnist.test.images,y_:mnist.test.labels}))

输出准确率为0.9175

下面用卷积神经网络，并结合Tensorboard进行可是化训练MNIST¶

# 下面的代码在文件 more mnist_with_summaries.py 下面，用more可以查看文件中的内容"""A simple MNIST classifier which displays summaries in TensorBoard.This is an unimpressive MNIST model, but it is a good example of usingtf.name_scope to make a graph legible in the TensorBoard graph explorer, and ofnaming summary tags so that they are grouped meaningfully in TensorBoard.It demonstrates the functionality of every TensorBoard dashboard."""from __future__ import absolute_importfrom __future__ import divisionfrom __future__ import print_functionimport argparseimport osimport sysimport tensorflow as tffrom tensorflow.examples.tutorials.mnist import input_dataFLAGS = Nonedef train():# Import datamnist = input_data.read_data_sets(FLAGS.data_dir,fake_data=FLAGS.fake_data)sess = tf.InteractiveSession()# Create a multilayer model.# Input placeholderswith tf.name_scope('input'):x = tf.placeholder(tf.float32, [None, 784], name='x-input')y_ = tf.placeholder(tf.int64, [None], name='y-input')with tf.name_scope('input_reshape'):image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])tf.summary.image('input', image_shaped_input, 10)# We can't initialize these variables to 0 - the network will get stuck.def weight_variable(shape):"""Create a weight variable with appropriate initialization."""initial = tf.truncated_normal(shape, stddev=0.1)return tf.Variable(initial)def bias_variable(shape):"""Create a bias variable with appropriate initialization."""initial = tf.constant(0.1, shape=shape)return tf.Variable(initial)def variable_summaries(var):"""Attach a lot of summaries to a Tensor (for TensorBoard visualization).对一个张量添加多个摘要描述"""with tf.name_scope('summaries'):mean = tf.reduce_mean(var)tf.summary.scalar('mean', mean)with tf.name_scope('stddev'):stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))tf.summary.scalar('stddev', stddev)tf.summary.scalar('max', tf.reduce_max(var))tf.summary.scalar('min', tf.reduce_min(var))tf.summary.histogram('histogram', var)def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):"""Reusable code for making a simple neural net layer.It does a matrix multiply, bias add, and then uses ReLU to nonlinearize.It also sets up name scoping so that the resultant graph is easy to read,and adds a number of summary ops."""# Adding a name scope ensures logical grouping of the layers in the graph.with tf.name_scope(layer_name):# This Variable will hold the state of the weights for the layerwith tf.name_scope('weights'):weights = weight_variable([input_dim, output_dim])variable_summaries(weights)with tf.name_scope('biases'):biases = bias_variable([output_dim])variable_summaries(biases)with tf.name_scope('Wx_plus_b'):preactivate = tf.matmul(input_tensor, weights) + biasestf.summary.histogram('pre_activations', preactivate)activations = act(preactivate, name='activation')tf.summary.histogram('activations', activations)return activationshidden1 = nn_layer(x, 784, 500, 'layer1')with tf.name_scope('dropout'):keep_prob = tf.placeholder(tf.float32)tf.summary.scalar('dropout_keep_probability', keep_prob)dropped = tf.nn.dropout(hidden1, keep_prob)# Do not apply softmax activation yet, see below.y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity)with tf.name_scope('cross_entropy'):# The raw formulation of cross-entropy,## tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),# reduction_indices=[1]))## can be numerically unstable.## So here we use tf.losses.sparse_softmax_cross_entropy on the# raw logit outputs of the nn_layer above, and then average across# the batch.with tf.name_scope('total'):cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)tf.summary.scalar('cross_entropy', cross_entropy)with tf.name_scope('train'):train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy)with tf.name_scope('accuracy'):with tf.name_scope('correct_prediction'):correct_prediction = tf.equal(tf.argmax(y, 1), y_)with tf.name_scope('accuracy'):accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))tf.summary.scalar('accuracy', accuracy)# Merge all the summaries and write them out to# /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default)merged = tf.summary.merge_all()train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph)test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test')tf.global_variables_initializer().run()# Train the model, and also write summaries.# Every 10th step, measure test-set accuracy, and write test summaries# All other steps, run train_step on training data, & add training summariesdef feed_dict(train):"""Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""if train or FLAGS.fake_data:xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data)k = FLAGS.dropoutelse:xs, ys = mnist.test.images, mnist.test.labelsk = 1.0return {x: xs, y_: ys, keep_prob: k}for i in range(FLAGS.max_steps):if i % 10 == 0: # Record summaries and test-set accuracysummary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))test_writer.add_summary(summary, i)print('Accuracy at step %s: %s' % (i, acc))else: # Record train set summaries, and trainif i % 100 == 99: # Record execution statsrun_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)run_metadata = tf.RunMetadata()summary, _ = sess.run([merged, train_step],feed_dict=feed_dict(True),options=run_options,run_metadata=run_metadata)train_writer.add_run_metadata(run_metadata, 'step%03d' % i)train_writer.add_summary(summary, i)print('Adding run metadata for', i)else: # Record a summarysummary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))train_writer.add_summary(summary, i)train_writer.close()test_writer.close()def main(_):if tf.gfile.Exists(FLAGS.log_dir):tf.gfile.DeleteRecursively(FLAGS.log_dir)tf.gfile.MakeDirs(FLAGS.log_dir)train()if __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--fake_data', nargs='?', const=True, type=bool,default=False,help='If true, uses fake data for unit testing.')parser.add_argument('--max_steps', type=int, default=1000,help='Number of steps to run trainer.')parser.add_argument('--learning_rate', type=float, default=0.001,help='Initial learning rate')parser.add_argument('--dropout', type=float, default=0.9,help='Keep probability for training dropout.')parser.add_argument('--data_dir',type=str,default=os.path.join(os.getenv('TEST_TMPDIR', '/tmp'),'tensorflow/mnist/input_data'),help='Directory for storing input data')parser.add_argument('--log_dir',type=str,default=os.path.join(os.getenv('TEST_TMPDIR', '/tmp'),'tensorflow/mnist/logs/mnist_with_summaries'),help='Summaries log directory')FLAGS, unparsed = parser.parse_known_args()tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

不知道如何打开TensorFlow，参考我的另外一篇博客：

/AlvinSui/p/8982483.html

MNIST的卷积神经网络

1.加载数据

先导入必要的库

mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)

trainX,trainY,testX,testY = mnist.train.images,mnist.train.labels,mnist.test.images,mnist.test.labels

print(trainX)print("--------------------------")print(trainY)print("--------------------------")print(testX)print("--------------------------")print(testY)print("训练数据:",trainX.shape)print("训练数据标签:",trainY.shape)print("测试数据:",testX.shape)print("测试数据标签:",testY.shape)# trainX trainY testX testY 是数据的矩阵表现

[[0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.]...[0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.]]--------------------------[[0. 0. 0. ... 1. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.]...[0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 1. 0.]]--------------------------[[0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.]...[0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.]]--------------------------[[0. 0. 0. ... 1. 0. 0.][0. 0. 1. ... 0. 0. 0.][0. 1. 0. ... 0. 0. 0.]...[0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.][0. 0. 0. ... 0. 0. 0.]]训练数据: (55000, 784)训练数据标签: (55000, 10)测试数据: (10000, 784)测试数据标签: (10000, 10)

处理输入的数据

把上述的trainX和testX的形状变成[-1,28,28,1],-1表示不考虑输入图片的数量，1是图片的通道数，所以图片是黑白的如果是RGB数据则通道数则为3

trainX = trainX.reshape(-1,28,28,1)print("训练数据:",trainX.shape)testX = testX.reshape(-1,28,28,1)print("测试数据:",testX.shape)

X = tf.placeholder("float",[None,28,28,1]) # 此时是不知道输入数据的多少，先定义为NoneY = tf.placeholder("float",[None,10])

初始化权重参数和定义网络结构

3个卷积层3个池化层1个全连接层和输出层的NN

def init_weights(shape):return tf.Variable(tf.random_normal(shape))

# 初始化权重，卷积核的大小为3x3w = init_weights([3,3,1,32]) # patch的大小为3x3，输入维度为1，输出维度为32w2 = init_weights([3,3,32,64])w3 = init_weights([3,3,64,128])w4 = init_weights([128*4*4,625]) # 全连接层，输入维度为128*4*4，是上一层输出数据由三维转换为一维，输出维度为625w_o = init_weights([625,10]) # 输出层维度为625，输出维度为10，代表10类labels

def model(X,w,w2,w3,w4,w_o,p_keep_conv,p_keep_hidden):# 第一层卷几层及池化层，最后dropout一些神经元l1a = tf.nn.relu(tf.nn.conv2d(X,w,strides=[1,1,1,1],padding="SAME"))# l1a shape=(?,28,28,32)l1 = tf.nn.max_pool(l1a,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")# l1 shape(?,14,14,32)l1 = tf.nn.dropout(l1,p_keep_conv)l2a = tf.nn.relu(tf.nn.conv2d(l1,w2,strides=[1,1,1,1],padding="SAME"))l2 = tf.nn.max_pool(l2a,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")l2 = tf.nn.dropout(l2,p_keep_conv)l3a = tf.nn.relu(tf.nn.conv2d(l2,w3,strides=[1,1,1,1],padding="SAME"))l3 = tf.nn.max_pool(l3a,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")l3 = tf.reshape(l3,[-1,w4.get_shape().as_list()[0]]) # reshape(?,2048)l3 = tf.nn.dropout(l3,p_keep_conv)# 全连接层，最后dropout一些神经元l4 = tf.nn.relu(tf.matmul(l3,w4))l4 = tf.nn.dropout(l4,p_keep_hidden)# 输出层pyx = tf.matmul(l4,w_o)return pyx # 返回预测值

# 定义dropout的占位符，他表示在一层中有多少比例的神经元被保留了下来，生产网络模型，得到最终的预测值p_keep_conv = tf.placeholder("float")p_keep_hidden = tf.placeholder("float")py_x = model(X,w,w2,w3,w4,w_o,p_keep_conv,p_keep_hidden)# 得到预测值# 接下来定义损失函数cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x,labels=Y))train_op = tf.train.RMSPropOptimizer(0.01,0.9).minimize(cost) # 定义学习率为0.01，衰减值为0.9predict_op = tf.argmax(py_x,1)print("训练数据:",trainX.shape)print("训练数据标签:",trainY.shape)print("测试数据:",testX.shape)print("测试数据标签:",testY.shape)

训练模型和评估模型¶

# 先定义训练时的批次大小和评估时的批次的大小batch_size = 128test_size = 256# 创建一个会话，然后在会话中启动图，开始训练和评估，之前定义的是一些变量，需要在图中进行初始化，然后执行操作opwith tf.Session() as sess:initlize = tf.global_variables_initializer() # 全局进行初始化的时候，一定要记得加括号sess.run(initlize)for i in range(1000):training_batch = zip(range(0,len(trainX),batch_size),range(batch_size,len(trainX)+1,batch_size)) # zip() 函数是把两个列表中对应的元素返回成元祖的形式for start ,end in training_batch:sess.run(train_op,feed_dict={X:trainX[start:end],Y:trainY[start:end],p_keep_conv:0.8,p_keep_hidden:0.5})test_indices = np.arange(len(testX))np.random.shuffle(test_indices)test_indices = test_indices[0:test_size]print(i,np.mean(np.argmax(testY[test_indices],axis=1)==sess.run(predict_op,feed_dict={X:testX[test_indices],p_keep_conv:1.0,p_keep_hidden:1.0})))# 这个就基层卷积网络，尽然训练了两个小时，没有GPU，我只想说，玩个毛线的深度学习

下面是训练结果的部分截图：

搭建一个RNN循环神经网络模型用于MNIST的训练

下面直接给出代码的github地址：

RNN构建MNIST模型

加载数据的方式和上面的CNN网络构建是一样的

训练时间十多分钟，但是CPU的占用率太高，感觉我的电脑有点吃不消呀，最红的测试准确率只有百分之八十多，明显效果不是很好

""" Recurrent Neural Network.A Recurrent Neural Network (LSTM) implementation example using TensorFlow library.This example is using the MNIST database of handwritten digits (/exdb/mnist/)Links:[Long Short Term Memory](http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf)[MNIST Dataset](/exdb/mnist/).Author: Aymeric DamienProject: /aymericdamien/TensorFlow-Examples/"""from __future__ import print_functionimport tensorflow as tffrom tensorflow.contrib import rnn# Import MNIST datafrom tensorflow.examples.tutorials.mnist import input_datamnist = input_data.read_data_sets("/tmp/data/", one_hot=True)'''To classify images using a recurrent neural network, we consider every imagerow as a sequence of pixels. Because MNIST image shape is 28*28px, we will thenhandle 28 sequences of 28 steps for every sample.'''# Training Parameterslearning_rate = 0.001training_steps = 10000batch_size = 128display_step = 200# Network Parametersnum_input = 28 # MNIST data input (img shape: 28*28)timesteps = 28 # timestepsnum_hidden = 128 # hidden layer num of featuresnum_classes = 10 # MNIST total classes (0-9 digits)# tf Graph inputX = tf.placeholder("float", [None, timesteps, num_input])Y = tf.placeholder("float", [None, num_classes])# Define weightsweights = {'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))}biases = {'out': tf.Variable(tf.random_normal([num_classes]))}def RNN(x, weights, biases):# Prepare data shape to match `rnn` function requirements# Current data input shape: (batch_size, timesteps, n_input)# Required shape: 'timesteps' tensors list of shape (batch_size, n_input)# Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)x = tf.unstack(x, timesteps, 1)# Define a lstm cell with tensorflowlstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)# Get lstm cell outputoutputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)# Linear activation, using rnn inner loop last outputreturn tf.matmul(outputs[-1], weights['out']) + biases['out']logits = RNN(X, weights, biases)prediction = tf.nn.softmax(logits)# Define loss and optimizerloss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)train_op = optimizer.minimize(loss_op)# Evaluate model (with test logits, for dropout to be disabled)correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))# Initialize the variables (i.e. assign their default value)init = tf.global_variables_initializer()# Start trainingwith tf.Session() as sess:# Run the initializersess.run(init)for step in range(1, training_steps+1):batch_x, batch_y = mnist.train.next_batch(batch_size)# Reshape data to get 28 seq of 28 elementsbatch_x = batch_x.reshape((batch_size, timesteps, num_input))# Run optimization op (backprop)sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})if step % display_step == 0 or step == 1:# Calculate batch loss and accuracyloss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,Y: batch_y})print("Step " + str(step) + ", Minibatch Loss= " + \"{:.4f}".format(loss) + ", Training Accuracy= " + \"{:.3f}".format(acc))print("Optimization Finished!")# Calculate accuracy for 128 mnist test imagestest_len = 128test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))test_label = mnist.test.labels[:test_len]print("Testing Accuracy:", \sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

训练结果的部分截图：

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。