1500字范文 > 【吴恩达深度学习编程作业】4.4特殊应用——人脸识别和神经风格转换（问题未解决）

【吴恩达深度学习编程作业】4.4特殊应用——人脸识别和神经风格转换（问题未解决）

时间：2021-12-28 05:29:19

参考文章：1.人脸识别与神经风格转换 2.神经风格转换编程作业

神经网络风格中遇到的问题已经解决了并将解决方案写在了备注里面，但是人脸识别那里运行到database就出错了，目前仍没有找到解决方案。我觉得是因为我电脑的CUDA版本是9.1.84，而2.3.1版本的tensorflow-gpu版本支持10.1以上的CUDA，不想再卸载重装tf了。

报错信息：

Traceback (most recent call last):File "G:/Project/PYTHON/Demo01/Deep_Learning/test4_4/人脸识别.py", line 108, in <module>database["danielle"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/danielle.png", FRmodel)File "G:\Project\PYTHON\Demo01\Deep_Learning\test4_4\fr_utils.py", line 198, in img_to_encodingembedding = model.predict_on_batch(x_train)File "F:\Python\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 1214, in predict_on_batchoutputs = self.predict_function(inputs)File "F:\Python\lib\site-packages\tensorflow\python\keras\backend.py", line 3822, in __call__self._make_callable(feed_arrays, feed_symbols, symbol_vals, session)File "F:\Python\lib\site-packages\tensorflow\python\keras\backend.py", line 3759, in _make_callablecallable_fn = session._make_callable_from_options(callable_opts)File "F:\Python\lib\site-packages\tensorflow\python\client\session.py", line 1505, in _make_callable_from_optionsreturn BaseSession._Callable(self, callable_options)File "F:\Python\lib\site-packages\tensorflow\python\client\session.py", line 1460, in __init__session._session, options_ptr)tensorflow.python.framework.errors_impl.InvalidArgumentError: Default MaxPoolingOp only supports NHWC on device type CPU[[{{node max_pooling2d/MaxPool}}]]

1.人脸验证与人脸识别？？？

main.py

"""代码实现：1.实现三元组损失函数2.使用一个已经训练好了的模型来将人脸图像映射到一个128位数字的的向量3.使用这些编码来执行人脸验证和人脸识别。"""from keras.models import Sequential, Modelfrom keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenatefrom keras.layers.normalization import BatchNormalizationfrom keras.layers.pooling import MaxPooling2D,AveragePooling2Dfrom keras.layers.merge import Concatenatefrom keras.layers.core import Lambda, Flatten, Densefrom keras.initializers import glorot_uniformfrom keras.engine.topology import Layerfrom keras import backend as K# 用于绘制模型的细节from IPython.display import SVGfrom keras.utils.vis_utils import model_to_dotfrom keras.utils import plot_modelK.set_image_data_format('channels_first')import timeimport cv2import osos.environ['CUDA_VISIBLE_DEVICES']='0' # 设置GPU设备import numpy as npfrom numpy import genfromtxtimport pandas as pdimport tensorflow as pat.v1.disable_eager_execution()import Deep_Learning.test4_4.fr_utilsfrom Deep_Learning.test4_4.inception_blocks_v2 import *# 全部打印# np.set_printoptions(threshold=np.nan)# 旧版本np.set_printoptions(threshold=np.inf)# 使用卷积网络将人脸图像编码为128位的向量，输入(m,n_c,n_h,n_w)=(m,3,96,96),输出(m,128)# 获取模型FRmodel = faceRecoModel(input_shape=(3, 96, 96))# 打印模型的总参数数量print("参数数量：" + str(FRmodel.count_params())) # 参数数量：3743280# 绘制模型细节plot_model(FRmodel, to_file="FRmodel.png")SVG(model_to_dot(FRmodel).create(prog='dot', format='svg'))# 定义三元组损失函数def triplet_loss(y_true, y_pred, alpha=0.2):"""实现三元组损失函数:param y_true:-true标签，当在keras里定义了一个损失函数的时候需要它，这里不需要:param y_pred:-列表类型，包含以下参数：anchor -给定的anchor图像的编码，维度为(None,128)positive -positive图像的编码，维度为(None,128)negative -negative图像的编码，维度为(None,128):param alpha: -超参数，阈值:return: loss -实数，损失的值"""# 获取anchor,positive,negative的图像编码anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]# 1.计算anchor与positive之间编码的距离pos_dist = tf.reduce_mean(tf.square(tf.subtract(anchor, positive)), axis=-1)# 2.计算anchor与negative之间编码的距离neg_dist = tf.reduce_mean(tf.square(tf.subtract(anchor, negative)), axis=-1)# 3.减去之前的两个距离，加上alphabasic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)# 通过取带0的最大值和对训练样本的求和来计算整个公式loss = tf.reduce_sum(tf.maximum(basic_loss, 0))return loss# 测试triplet_lossprint("=====================测试triplet_loss===============")with pat.v1.Session() as test:pat.v1.set_random_seed(1)y_true = (None, None, None)y_pred = (pat.v1.random_normal([3, 128], mean=6, stddev=0.1, seed=1),pat.v1.random_normal([3, 128], mean=1, stddev=1, seed=1),pat.v1.random_normal([3, 128], mean=3, stddev=4, seed=1))loss = triplet_loss(y_true, y_pred)print("loss = " + str(loss.eval()))# loss = 4.522995# 加载已经训练好了的模型start_time = time.clock() # 开始时间# 编译模型pile(optimizer='adam', loss=triplet_loss, metrics=['accuracy'])# 加载权值Deep_Learning.test4_4.fr_utils.load_weights_from_FaceNet(FRmodel)end_time = time.clock()# 结束时间# 计算时差minium = end_time - start_timeprint("执行了：" + str(int(minium / 60)) + "分" + str(int(minium % 60)) + "秒") # 执行了：1分16秒# ????????????卡这里了# 构建人脸验证数据库，使用img_to_encoding(image_path, model)生成编码，它会根据图像进行模型的前向传播database = {}database["danielle"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/danielle.png", FRmodel)database["younes"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/younes.jpg", FRmodel)database["tian"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/tian.jpg", FRmodel)database["andrew"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/andrew.jpg", FRmodel)database["kian"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/kian.jpg", FRmodel)database["dan"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/dan.jpg", FRmodel)database["sebastiano"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/sebastiano.jpg", FRmodel)database["bertrand"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/bertrand.jpg", FRmodel)database["kevin"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/kevin.jpg", FRmodel)database["felix"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/felix.jpg", FRmodel)database["benoit"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/benoit.jpg", FRmodel)database["arnaud"] = Deep_Learning.test4_4.fr_utils.img_to_encoding("image1/arnaud.jpg", FRmodel)# 面部验证：验证摄像头的照片是否与身份证上的名称匹配def verity(image_path, identity, database, model):"""对"identity"与"image_path"的编码进行验证:param image_path: -摄像头的照片:param identity: -字符类型，想要验证的人的名字:param databasse: -字典类型，包含了成员的名字信息与对应的编码:param model: -在keras的模型的实例:return:dist -摄像头的图片与数据库中的图片的编码的距离is_open_door -boolean, 是否该开门"""# 1.计算图像的编码，使用fr_utils.img_to_encoding()来计算encoding = Deep_Learning.test4_4.fr_utils.img_to_encoding(image_path, model)# 2.计算与数据库中保存的编码的差距L2范数dist = np.linalg.norm(encoding - database[identity])# 3.判断是否开门if dist < 0.7:print("欢迎" + str(identity) + "回家")is_door_open = Trueelse:print("经验证，您与" + str(identity) + "不符")is_door_open = Falsereturn dist, is_door_openprint("======================测试verity===================")# ??????????切片不正确，可能是版本问题导致的verity("images1/camera_0.jpg", "younes", database, FRmodel)verity("images1/camera_2.jpg", "kian", database, FRmodel)# 人脸识别def who_is_it(image_path, database, model):"""根据指定的图片进行人脸验证:param image_path: -图像地址:param database: -包含了名字与编码的字典:param model: -在keras中的模型的实现:return: min_dist -在数据库中与指定图像最接近的编码identity -字符串类型，与min_dist编码相对应的名字"""# 1.计算指定图像的编码，使用fr_utils.img_to_encoding()来计算encoding = Deep_Learning.test4_4.fr_utils.img_to_encoding(image_path, model)# 2.找到最相近的编码# 2.1初始化min_dist为足够大的数字，这里设置为100min_dist = 100# 2.2遍历数据库找到最相近的编码for(name, db_enc) in database.items():# 2.2.1计算目标编码与当前数据库编码之间的L2差距dist = np.linalg.norm(encoding - db_enc)# 2.2.2如果差距小于min_dist，那么更新名字与编码到identity与min_dist中if dist < min_dist:min_dist = distidentity = name# 判断是否在数据库中if min_dist > 0.7:print("抱歉，您的信息不在数据库中")else:print("姓名" + str(identity) + " 差距：" + str(min_dist))return min_dist, identityprint("======================测试who_is_it===================")who_is_it("images1/camera_0.jpg", database, FRmodel)

2.神经风格转换

main.py

"""代码实现：1.实现神经风格转换算法2.用算法生成新的艺术图像之前都是优化一个成本函数来获得一组参数值，这里优化成本函数以获取像素值"""import imageioimport matplotlib.pyplot as pltfrom matplotlib.pyplot import imshowimport numpy as npimport tensorflow as pat.v1.disable_eager_execution()import Deep_Learning.test4_4.nst_utils# 使用VGG-19模型:这里报错TypeError: conv2d_v2() got an unexpected keyword argument 'filter'改一下nst_utils里的112行pat.v1.nn.conv2dmodel = Deep_Learning.test4_4.nst_utils.load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")print(model) # {'input': <tf.Variable 'Variable:0' shape=(1, 300, 400, 3) dtype=float32>...# 计算内容代价# content_image = scipy.misc.imread("image2/louvre.jpg")content_image = imageio.imread("image2/louvre.jpg")imshow(content_image)plt.show()def compute_content_cost(a_C, a_G):"""计算内容代价的函数:param a_C:-tensor类型，维度为(1,n_H,n_W,n_C),表示隐藏层中图像C的内容的激活值:param a_G:-tensor类型，维度为(1,n_H,n_W,n_C),表示隐藏层中图像G的内容的激活值:return: J_content -实数"""# 获取a_G的维度信息m, n_H, n_W, n_C = a_G.get_shape().as_list()# 对a_C与a_G从3维降到2维a_C_unrolled = tf.transpose(tf.reshape(a_C, [n_H * n_W, n_C]))a_G_unrolled = tf.transpose(tf.reshape(a_G, [n_H * n_W, n_C]))# 计算内容代价J_content = 1 / (4 * n_H * n_W * n_C) * tf.reduce_sum(tf.square(tf.subtract(a_C_unrolled, a_G_unrolled)))return pat.v1.reset_default_graph()with pat.v1.Session() as test:pat.v1.set_random_seed(1)a_C = pat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)a_G = pat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)J_content = compute_content_cost(a_C, a_G)print("J_content = " + str(J_content.eval())) # J_content = 7.6410217test.close()# 计算风格代价style_image = imageio.imread("image2/monet_800600.jpg")imshow(style_image)plt.show()def gram_matrix(A):"""风格矩阵GA = A A.T:param A: -矩阵，维度(n_C,n_H*n_W):return: GA -A的风格矩阵，维度(n_C,n_C)"""GA = tf.matmul(A, tf.transpose(A))return pat.v1.reset_default_graph()with pat.v1.Session() as test:pat.v1.set_random_seed(1)A = pat.v1.random_normal([3, 2 * 1], mean=1, stddev=4)GA = gram_matrix(A)print("GA = " + str(GA.eval()))test.close()"""运行结果：GA = [[ 15.615461 12.248833 -29.87157 ][ 12.248833 10.877857 -19.879116][-29.87157 -19.879116 67.08007 ]]"""# 计算单层风格代价函数def compute_layer_style_cost(a_S, a_G):""":param a_S:-tensor类型，维度为(1,n_H,n_W,n_C),风格图像的隐藏层激活值:param a_G:-tensor类型，维度为(1,n_H,n_W,n_C),生成图像的隐藏层激活值:return: J_style_layer -风格代价"""# 获取a_G的维度信息m, n_H, n_W, n_C = a_G.get_shape().as_list()# 变形,将隐藏层激活值a_S和a_G展开到二维矩阵中a_S = tf.reshape(a_S, (n_H * n_W, n_C))a_G = tf.reshape(a_G, (n_H * n_W, n_C))# 计算风格矩阵GS = gram_matrix(tf.transpose(a_S))GG = gram_matrix(tf.transpose(a_G))# 计算损失值J_style_layer = 1 / (4 * (n_C ** 2) * ((n_H * n_W) ** 2)) \* tf.reduce_sum(tf.square(tf.subtract(GS, GG)))return pat.v1.reset_default_graph()with pat.v1.Session() as test:pat.v1.set_random_seed(1)a_S = pat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)a_G = pat.v1.random_normal([1, 4, 4, 3], mean=1, stddev=4)J_style_layer = compute_layer_style_cost(a_S, a_G)print("J_style_layer = " + str(J_style_layer.eval())) # J_style_layer = 2.2849257test.close()# 风格权重，可以合并不同层的风格成本STYLE_LAYERS = [('conv1_1', 0.2),('conv2_1', 0.2),('conv3_1', 0.2),('conv4_1', 0.2),('conv5_1', 0.2)]# 定义多层风格代价函数def compute_style_cost(model, STYLE_LAYERS):"""Computes the overall style cost from several chosen layers:param model: -tensorflow模型:param STYLE_LAYERS: -列表，包括层的名称和它们的系数:return:J_style -tensor类型，风格代价"""# 初始化J_style = 0for layer_name, coeff in STYLE_LAYERS:# 选择当前选定图层的输出张量out = model[layer_name]# 通过在外部运行会话，将a_S设置为所选层的隐藏层激活a_S = sess.run(out)# 设置a_G为同一层的隐藏层激活。在此，a_G引用了模型[layer_name]，尚未进行评估。然后将图像G分配为模型输入。a_G = out# 计算当前层的风格代价J_style_layer = compute_layer_style_cost(a_S, a_G)# 将这一层的coeff * J_style_layer添加到整体样式成本中J_style += coeff * J_style_layerreturn J_style# 定义总代价def total_cost(J_content, J_style, alpha=10, beta=40):"""计算总代价函数:param J_content: -内容代价:param J_style:-风格代价:param alpha: -超参数，内容代价的权重:param beta: -超参数，风格代价的权重:return: J-总代价"""J = alpha * J_content + beta * J_stylereturn pat.v1.reset_default_graph()with pat.v1.Session() as test:pat.v1.set_random_seed(3)J_content = np.random.randn()J_style = np.random.randn()J = total_cost(J_content, J_style)print("J = " + str(J)) # J = 34.76142525659208test.close()# 整体实现pat.v1.reset_default_graph()# 1.创建Interactive Session："""与常规session不同，“Interactive Session”将自己安装为构建graph的默认session。这允许运行变量而不需要经常引用session对象，简化了代码。"""sess = pat.v1.InteractiveSession()# 2.加载内容图像，整形并归一化content_image = imageio.imread("image2/dks.png")content_image = Deep_Learning.test4_4.nst_utils.reshape_and_normalize_image(content_image)# 3.加载风格图像，整形并归一化style_image = imageio.imread("image2/xingkong.jpg")style_image = Deep_Learning.test4_4.nst_utils.reshape_and_normalize_image(style_image)# 4.随机初始化要生成的图像"""我们将“生成的”图像初始化为从内容图像创建的带噪声图像。通过初始化生成的图像的像素，使其主要是噪声，但仍然与内容图像有轻微的相关性，这将有助于“生成”图像的内容更快地匹配“内容”图像的内容。"""generated_image = Deep_Learning.test4_4.nst_utils.generate_noise_image(content_image)imshow(generated_image[0])plt.show()# 5.加载VGG-19模型model = Deep_Learning.test4_4.nst_utils.load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")# 6.建立TensorFlow graph# 6.1运行内容图像并计算内容成本# 6.1.1将内容图像指定为VGG模型的输入sess.run(model['input'].assign(content_image))# 6.1.2设a_C作为“conv4_2”层的隐含层激活的张量out = model['conv4_2']a_C = sess.run(out)# 6.1.3设a_G作为同一层的隐含层激活的张量a_G = outJ_content = compute_content_cost(a_C, a_G)"""注意:在这里a_C是一个张量，还没有被赋值。当在model_nn()中运行Tensorflow graph时，它将在每次迭代中被评价和更新。"""# 6.2运行风格图像并计算风格成本sess.run(model['input'].assign(style_image))J_style = compute_style_cost(model, STYLE_LAYERS)# 6.3计算总成本J = total_cost(J_content, J_style, 10, 40)# 6.4定义优化器Adam和学习率2.0optimizer = pat.v1.train.AdamOptimizer(2.0)train_step = optimizer.minimize(J)# 7.初始化TensorFlow graph并在大量迭代中运行它，在每一次迭代中更新生成的图像。def model_nn(sess, input_image, num_iterations=500):"""初始化tensorflow graph的变量，将输入图像(初始生成的图像)指定为VGG19模型的输入，并多次运行train_step。:param sess: -session:param input_image:-输入图像:param num_iterations: -迭代次数:return:"""# 初始化global variablessess.run(pat.v1.global_variables_initializer())#通过模型运行嘈杂的输入图像（初始生成的图像）generated_image = sess.run(model["input"].assign(input_image))for i in range(num_iterations):# 运行会话以最小化总代价sess.run(train_step)# 通过在当前模型上运行会话来计算生成的图像generated_image = sess.run(model["input"])if i % 20 == 0:Jt, Jc, Js = sess.run([J, J_content, J_style])print("Iteration" + str(i) + ":")print("total cost = " + str(Jt))print("content cost = " + str(Jc))print("style cost = " + str(Js))Deep_Learning.test4_4.nst_utils.save_image("output/" + str(i) + ".png", generated_image)Deep_Learning.test4_4.nst_utils.save_image('output/generated_image.jpg', generated_image)return generated_imagemodel_nn(sess, generated_image)# 将自己的图片调为400*300并替换文件就可以啦

运行结果

Iteration20:total cost = 945719.5content cost = 12107.339style cost = 20616.152Iteration40:total cost = 311538.5content cost = 10928.709style cost = 5056.2847Iteration60:total cost = 200105.4content cost = 10215.002style cost = 2448.885...total cost = 51185.676content cost = 4928.8657style cost = 47.425514