【機器學習】使用CNN神經網絡實現對圖片分類識別及模型轉換


僅做記錄,后面慢慢整理

訓練函數:

from skimage import io, transform  # skimage模塊下的io transform(圖像的形變與縮放)模塊
import glob  # glob 文件通配符模塊
import os  # os 處理文件和目錄的模塊
import tensorflow as tf
import numpy as np  # 多維數據處理模塊
import time

# 數據集地址

path = 'E:/tensor_data/powerpoint/test_database/'
# 模型保存地址
model_path = 'E:/tensor_data/powerpoint/model/fc_model.ckpt'

# 將所有的圖片resize成100*100
w = 100
h = 100
c = 3

print("開始執行讀取圖片和數據處理")

# 讀取圖片+數據處理
def read_img(path):
    # os.listdir(path) 返回path指定的文件夾包含的文件或文件夾的名字的列表
    # os.path.isdir(path)判斷path是否是目錄
    # b = [x+x for x in list1 if x+x<15 ]  列表生成式,循環list1,當if為真時,將x+x加入列表b
    print(os.listdir(path))
    '''for x in os.listdir(path):
        if os.path.isdir(path+x):
           print(x)'''

    cate = [path + x for x in os.listdir(path) if os.path.isdir(path + x)]
    print("數據集地址:"+path)
    imgs = []
    labels = []
    for idx, folder in enumerate(cate):
        # glob.glob(s+'*.py') 從目錄通配符搜索中生成文件列表
        for im in glob.glob(folder + '/*.jpg'):
            # 輸出讀取的圖片的名稱
            print('reading the images:%s' % (im))
            # io.imread(im)讀取單張RGB圖片 skimage.io.imread(fname,as_grey=True)讀取單張灰度圖片
            # 讀取的圖片
            img = io.imread(im)
            # skimage.transform.resize(image, output_shape)改變圖片的尺寸
            img = transform.resize(img, (w, h))
            # 將讀取的圖片數據加載到imgs[]列表中
            imgs.append(img)
            # 將圖片的label加載到labels[]中,與上方的imgs索引對應
            labels.append(idx)
    # 將讀取的圖片和labels信息,轉化為numpy結構的ndarr(N維數組對象(矩陣))數據信息
    return np.asarray(imgs, np.float32), np.asarray(labels, np.int32)


# 調用讀取圖片的函數,得到圖片和labels的數據集
data, label = read_img(path)

# 打亂順序
# 讀取data矩陣的第一維數(圖片的個數)
num_example = data.shape[0]
# 產生一個num_example范圍,步長為1的序列
arr = np.arange(num_example)
# 調用函數,打亂順序
np.random.shuffle(arr)
# 按照打亂的順序,重新排序
data = data[arr]
label = label[arr]

# 將所有數據分為訓練集和驗證集
ratio = 0.8
s = np.int(num_example * ratio)
x_train = data[:s]
y_train = label[:s]
x_val = data[s:]
y_val = label[s:]

# -----------------構建網絡----------------------
# 本程序cnn網絡模型,共有7層,前三層為卷積層,后三層為全連接層,前三層中,每層包含卷積、激活、池化層
# 占位符設置輸入參數的大小和格式
x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x')
y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_')


def inference(input_tensor, train, regularizer):
    # -----------------------第一層----------------------------
    with tf.variable_scope('layer1-conv1'):
        # 初始化權重conv1_weights為可保存變量,大小為5x5,3個通道(RGB),數量為32個
        conv1_weights = tf.get_variable("weight", [5, 5, 3, 32],
                                        initializer=tf.truncated_normal_initializer(stddev=0.1))
        # 初始化偏置conv1_biases,數量為32個
        conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))
        # 卷積計算,tf.nn.conv2d為tensorflow自帶2維卷積函數,input_tensor為輸入數據,
        # conv1_weights為權重,strides=[1, 1, 1, 1]表示左右上下滑動步長為1,padding='SAME'表示輸入和輸出大小一樣,即補0
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        # 激勵計算,調用tensorflow的relu函數
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

    with tf.name_scope("layer2-pool1"):
        # 池化計算,調用tensorflow的max_pool函數,strides=[1,2,2,1],表示池化邊界,2個對一個生成,padding="VALID"表示不操作。
        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    # -----------------------第二層----------------------------
    with tf.variable_scope("layer3-conv2"):
        # 同上,不過參數的有變化,根據卷積計算和通道數量的變化,設置對應的參數
        conv2_weights = tf.get_variable("weight", [5, 5, 32, 64],
                                        initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))

    with tf.name_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    # -----------------------第三層----------------------------
    # 同上,不過參數的有變化,根據卷積計算和通道數量的變化,設置對應的參數
    with tf.variable_scope("layer5-conv3"):
        conv3_weights = tf.get_variable("weight", [3, 3, 64, 128],
                                        initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv3_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
        conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases))

    with tf.name_scope("layer6-pool3"):
        pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    # -----------------------第四層----------------------------
    # 同上,不過參數的有變化,根據卷積計算和通道數量的變化,設置對應的參數
    with tf.variable_scope("layer7-conv4"):
        conv4_weights = tf.get_variable("weight", [3, 3, 128, 128],
                                        initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv4_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
        conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases))

    with tf.name_scope("layer8-pool4"):
        pool4 = tf.nn.max_pool(relu4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
        nodes = 6 * 6 * 128
        reshaped = tf.reshape(pool4, [-1, nodes])
        # 使用變形函數轉化結構
    # -----------------------第五層---------------------------
    with tf.variable_scope('layer9-fc1'):
        # 初始化全連接層的參數,隱含節點為1024個
        fc1_weights = tf.get_variable("weight", [nodes, 1024],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))  # 正則化矩陣
        fc1_biases = tf.get_variable("bias", [1024], initializer=tf.constant_initializer(0.1))
        # 使用relu函數作為激活函數
        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        # 采用dropout層,減少過擬合和欠擬合的程度,保存模型最好的預測效率
        if train: fc1 = tf.nn.dropout(fc1, 0.5)
    # -----------------------第六層----------------------------
    with tf.variable_scope('layer10-fc2'):
        # 同上,不過參數的有變化,根據卷積計算和通道數量的變化,設置對應的參數
        fc2_weights = tf.get_variable("weight", [1024, 512],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [512], initializer=tf.constant_initializer(0.1))

        fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
        if train: fc2 = tf.nn.dropout(fc2, 0.5)
    # -----------------------第七層----------------------------
    with tf.variable_scope('layer11-fc3'):
        # 同上,不過參數的有變化,根據卷積計算和通道數量的變化,設置對應的參數
        fc3_weights = tf.get_variable("weight", [512, 5],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc3_weights))
        fc3_biases = tf.get_variable("bias", [5], initializer=tf.constant_initializer(0.1))
        logit = tf.add(tf.matmul(fc2, fc3_weights), fc3_biases, name="output")  # matmul矩陣相乘

    # 返回最后的計算結果
    return logit


# ---------------------------網絡結束---------------------------
# 設置正則化參數為0.0001
regularizer = tf.contrib.layers.l2_regularizer(0.0001)
# 將上述構建網絡結構引入
logits = inference(x, False, regularizer)

# (小處理)將logits乘以1賦值給logits_eval,定義name,方便在后續調用模型時通過tensor名字調用輸出tensor
b = tf.constant(value=1, dtype=tf.float32)
logits_eval = tf.multiply(logits, b, name='logits_eval')  # b為1

# 設置損失函數,作為模型訓練優化的參考標准,loss越小,模型越優
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_)
# 設置整體學習率為α為0.001
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
# 設置預測精度
correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# 定義一個函數,按批次取數據
def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batch_size]
        else:
            excerpt = slice(start_idx, start_idx + batch_size)
        yield inputs[excerpt], targets[excerpt]


# 訓練和測試數據,可將n_epoch設置更大一些


# 迭代次數
n_epoch = 20#10
# 每次迭代輸入的圖片數據
batch_size = 64
saver = tf.train.Saver(max_to_keep=4)  # 可以指定保存的模型個數,利用max_to_keep=4,則最終會保存4個模型(
with tf.Session() as sess:
    # 初始化全局參數
    sess.run(tf.global_variables_initializer())
    # 開始迭代訓練,調用的都是前面設置好的函數或變量
    for epoch in range(n_epoch):
        start_time = time.time()

        # training#訓練集
        train_loss, train_acc, n_batch = 0, 0, 0
        for x_train_a, y_train_a in minibatches(x_train, y_train, batch_size, shuffle=True):
            _, err, ac = sess.run([train_op, loss, acc], feed_dict={x: x_train_a, y_: y_train_a})
            train_loss += err;
            train_acc += ac;
            n_batch += 1
            print("   train loss: %f" % (np.sum(train_loss) / n_batch))
            print("   train acc: %f" % (np.sum(train_acc) / n_batch))

        # validation#驗證集
        val_loss, val_acc, n_batch = 0, 0, 0
        for x_val_a, y_val_a in minibatches(x_val, y_val, batch_size, shuffle=False):
            err, ac = sess.run([loss, acc], feed_dict={x: x_val_a, y_: y_val_a})
            val_loss += err;
            val_acc += ac;
            n_batch += 1
            print("   validation loss: %f" % (np.sum(val_loss) / n_batch))
            print("   validation acc: %f" % (np.sum(val_acc) / n_batch))
        # 保存模型及模型參數
        if epoch % 2 == 0:
            saver.save(sess, model_path, global_step=epoch)
            print(sess.graph.name_scope)

測試代碼:

from skimage import io, transform
import tensorflow as tf
import numpy as np
import os  # os 處理文件和目錄的模塊
import glob  # glob 文件通配符模塊

# 此程序作用於進行簡單的預測,取5個圖片來進行預測,如果有多數據預測,按照cnn.py中,讀取數據的方式即可


path = 'E:/tensor_data/powerpoint/test_powerpoint/'
# 類別代表字典
flower_dict = {0: '其他', 1: '文檔', 2: '幻燈片', 3: '黑板', 4: '不可能出現的類別'}

w = 100
h = 100
c = 3


# 讀取圖片+數據處理
def read_img(path):
    # os.listdir(path) 返回path指定的文件夾包含的文件或文件夾的名字的列表
    # os.path.isdir(path)判斷path是否是目錄
    # b = [x+x for x in list1 if x+x<15 ]  列表生成式,循環list1,當if為真時,將x+x加入列表b
    cate = [path + x for x in os.listdir(path) if os.path.isdir(path + x)]
    imgs = []
    for idx, folder in enumerate(cate):
        # glob.glob(s+'*.py') 從目錄通配符搜索中生成文件列表
        for im in glob.glob(folder + '/*.jpg'):
            # 輸出讀取的圖片的名稱
            print('reading the images:%s' % (im))
            # io.imread(im)讀取單張RGB圖片 skimage.io.imread(fname,as_grey=True)讀取單張灰度圖片
            # 讀取的圖片
            img = io.imread(im)
            # skimage.transform.resize(image, output_shape)改變圖片的尺寸
            img = transform.resize(img, (w, h))
            # 將讀取的圖片數據加載到imgs[]列表中
            imgs.append(img)
            # 將圖片的label加載到labels[]中,與上方的imgs索引對應
        # labels.append(idx)
    # 將讀取的圖片和labels信息,轉化為numpy結構的ndarr(N維數組對象(矩陣))數據信息
    return np.asarray(imgs, np.float32)


# 調用讀取圖片的函數,得到圖片和labels的數據集
data = read_img(path)
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('E:/tensor_data/powerpoint/model/fc_model.ckpt-18.meta')
    saver.restore(sess, tf.train.latest_checkpoint('E:/tensor_data/powerpoint/model/'))
    # sess:表示當前會話,之前保存的結果將被加載入這個會話
    # 設置每次預測的個數
    graph = tf.get_default_graph()
    x = graph.get_tensor_by_name("x:0")
    feed_dict = {x: data}

    logits = graph.get_tensor_by_name("logits_eval:0")  # eval功能等同於sess(run)

    classification_result = sess.run(logits, feed_dict)

    # 打印出預測矩陣
    print(classification_result)
    # 打印出預測矩陣每一行最大值的索引
    print(tf.argmax(classification_result, 1).eval())
    # 根據索引通過字典對應的分類
    output = []
    output = tf.argmax(classification_result, 1).eval()
    for i in range(len(output)):
        print("", i + 1, "張圖片預測:" + flower_dict[output[i]])

這里生成的模型是ckpt,參考代碼CNN中是沒有指定輸入輸出結點名稱的,這里直接在源碼第11層修改即可。

使用Netron可以快速查看模型結構,找到輸入輸出結點名稱。

也可以使用代碼打印全部結點名稱:

import os
import tensorflow as tf
checkpoint_path=os.path.join('E:/tensor_data/powerpoint/model/fc_model.ckpt-18')
reader=pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
var_to_shape_map=reader.get_variable_to_shape_map()
for key in var_to_shape_map:
    print ('tensor_name: ',key)

拿到輸出結點名稱后,就可以使用腳本對ckpt模型轉換了,轉成pb格式

第一個參數是 ckpt模型地址,第二個是pb模型輸出地址,第三個是輸出結點

import tensorflow as tf


def read_graph_from_ckpt(ckpt_path, out_pb_path, output_name):
    # 從meta文件加載網絡結構
    saver = tf.train.import_meta_graph(ckpt_path + '.meta', clear_devices=True)
    graph = tf.get_default_graph()
    with tf.Session(graph=graph) as sess:
        sess.run(tf.global_variables_initializer())
        # 從ckpt加載參數
        saver.restore(sess, ckpt_path)
        output_tf = graph.get_tensor_by_name(output_name)

        # 固化
        pb_graph = tf.graph_util.convert_variables_to_constants(sess, graph.as_graph_def(), [output_tf.op.name])

        # 保存
        with tf.gfile.FastGFile(out_pb_path, mode='wb') as f:
            f.write(pb_graph.SerializeToString())


read_graph_from_ckpt('E:/tensor_data/powerpoint/model/fc_model.ckpt-18', 'E:/tensor_data/powerpoint/model/idcard_seg.pb', 'layer11-fc3/output:0')

拿到pb模型后,再使用Netron查看就清晰了很多~~~~

由於我訓練模型是為了手機使用的,因此還需要將pb模型轉成tflite格式

查看官方文檔發現已經提供了轉換的py接口,直接使用就好啦~

input是輸入結點,output是輸出結點,使用Netron看一下就好了

生成的tflite在你的工程根目錄下

import tensorflow as tf

graph_def_file = "E:/tensor_data/powerpoint/model/idcard_seg.pb"
input_arrays = ["x"]
output_arrays = ["layer11-fc3/output"]

converter = tf.lite.TFLiteConverter.from_frozen_graph(
  graph_def_file, input_arrays, output_arrays)
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)

 

那個啥,完全沒有測試模型的准確率emmm先試試看吧!


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM