神經網絡實現Discuz驗證碼識別


南京老門東一隅

最近自己嘗試了網上的驗證碼識別代碼項目,該小項目見以下鏈接: >https://cuijiahua.com/blog/2018/01/dl_5.html

數據也就用了作者上傳的60000張Discuz驗證碼。作者是創建了一個 封裝了所有的變量和函數,我看了他的代碼之后自己嘗試着不用類去實現該網絡。

作者說自己可以訓練到90%以上的精度。然而我看了他的代碼后發現,作者是用訓練過的數據來進行測試,即訓練集和測試集是一樣的

我想着,測試集應該是不能參與訓練過程中的,比如說我們在做mnist手寫數字識別的時候,訓練集與測試集就一定是不一樣的。

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz

於是我在自己實現的過程中,將數據集打亂后取10000個作為測試集,不參與訓練,剩余的50000張驗證碼作為訓練集。

訓練過程中發現只有將學習率設置為0.001時,loss才會降下去,太高,loss會卡在0.07;其次,我的訓練精度最多只能到50%左右,但是我用訓練數據來測試保存的模型,精度確實達到了90%,即作者看到的精度。不過這個模型不具有泛化能力,它在沒見過的測試集上只有50%的精確度。

同時這個代碼還有問題:測算精確度時,一張圖中4個驗證碼有兩個錯誤的話,正確率是50%而不是0.當一張圖中4個驗證碼識別有一個錯誤時,該驗證碼識別就應該是失敗的。因此這個精確度實在是有相當大的水分。

於是要考慮解決辦法。首先我嘗試着下調學習率,發現永遠還是到50%就上不去了。

接下來我在原來的3層卷積層上,又加了一層卷積層。然而這並沒有提升多少精度。

隨后我又加入了一層全連接層,期望可以擬合得更好一些,但是這樣讓我陷入了麻煩。

我的loss值卡在了0.07,無論我的學習率是0.1還是0.00001.哪怕迭代一百萬次也是如此。這時候的測試精度只有······3%。

我不知道是什么問題更不知道如何改進。

這更讓我覺得沒有人帶我,多么地難受;同時也更深刻地體驗到理論知識是多么地重要(當然我一直知道)。

我自己的代碼附上,大家可以相互交流。數據可以在文章頂部的鏈接里下載,作者壓縮好的。

以下是訓練腳本:(理論上python3和python2應該都能跑。我是用2寫的)
訓練中我使用了學習率衰減,本來還想用dropout結果發現這個訓練基本不給我過擬合的機會所以訓練加了沒有意義。

from __future__ import print_function, division, absolute_import
import tensorflow as tf
import os
import cv2
import matplotlib.pyplot as plt 
import random
import numpy as np
from optparse import OptionParser

path = 'Discuz/' #存放數據的路徑
imgs = os.listdir(path) #以列表形式讀取所有圖片名稱
random.shuffle(imgs) #打亂
max_steps = 1000000 #最大迭代步數
save_path = 'model4cnn-1fcn' #保存模型的路徑,會自動生成
dropout = 1 #沒用到

trainnum = 50000 #定義訓練集和測試集的大小
testnum = 10000

traindatas = imgs[:trainnum] #取出訓練集和測試集及其標簽
trainlabels = list(map(lambda x: x.split('.')[0],traindatas))

testdatas = imgs[trainnum:]
testlabels = list(map(lambda x: x.split('.')[0],testdatas))

#定義取數據集的指針
train_ptr = 0
test_ptr = 0

def next_batch(batch=100, train_flag=True):
	global train_ptr
	global test_ptr
	batch_x = np.zeros([batch,30*100])
	batch_y = np.zeros([batch, 4*63])

	if train_flag == True:
		if batch + train_ptr < trainnum:
			trains = traindatas[train_ptr:(train_ptr+batch)]
			labels = trainlabels[train_ptr:(train_ptr+batch)]
			train_ptr += batch
		else:
			new_ptr = (train_ptr + batch) % trainnum 
			trains = traindatas[train_ptr:] + traindatas[:new_ptr]
			labels = trainlabels[train_ptr:] + traindatas[:new_ptr]
			train_ptr = new_ptr

		for index, train in enumerate(trains):
			img = np.mean(cv2.imread(path + train), -1)
			batch_x[index,:] = img.flatten() /255
		for index, label in enumerate(labels):
			batch_y[index,:] = text2vec(label)

	else:
		if batch + test_ptr < testnum:
			tests = testdatas[test_ptr:(test_ptr+batch)]
			labels = testlabels[test_ptr:(test_ptr+batch)]
			test_ptr += batch
		else:
			new_ptr = (test_ptr + batch) % testnum 
			tests = testdatas[test_ptr:] + testdatas[:new_ptr]
			labels = testlabels[test_ptr:] + testlabels[:new_ptr]
			test_ptr = new_ptr

		for index, test in enumerate(tests):
			img = np.mean(cv2.imread(path + test), -1)
			batch_x[index, :] = img.flatten() /255
		for index, label in enumerate(labels):
			batch_y[index,:] = text2vec(label)

	return batch_x, batch_y

def text2vec(text):
	if len(text) > 4:
		raise ValueError('too long captcha')

	vector = np.zeros(4*63)
	def char2pos(c):
		if c == '_':
			k = 62
			return k
		k = ord(c)-48
		if k > 9:
			k = ord(c)-55
			if k > 35:
				k = ord(c) - 61
				if k > 61:
					raise ValueError('No Map')

		return k

	for i, c in enumerate(text):
		idx = i*63 + char2pos(c)
		vector[idx] = 1

	return vector

X = tf.placeholder(tf.float32, [None, 30*100])
Y = tf.placeholder(tf.float32, [None,4*63])
_lr = tf.placeholder(tf.float32)
keep_prob = tf.placeholder(tf.float32)

def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def max_pool2d(x, k=2):
    x = tf.nn.max_pool(
        x, ksize=[
            1, k, k, 1], strides=[
            1, k, k, 1], padding='SAME')
    return x

weights = {
        'wc1': tf.Variable(0.01*tf.random_normal([3, 3, 1, 32])),
        'wc2': tf.Variable(0.01*tf.random_normal([3, 3, 32, 64])),
        'wc3': tf.Variable(0.01*tf.random_normal([3, 3, 64, 64])),
        'wc4': tf.Variable(0.01*tf.random_normal([3, 3, 64, 64])),
        'wf1': tf.Variable(0.01*tf.random_normal([2 * 7 * 64, 1024])),
        'wf2': tf.Variable(0.01*tf.random_normal([1024, 1024])),
        'wout': tf.Variable(0.01*tf.random_normal([1024, 4*63]))
        }

biases = {
        'bc1': tf.Variable(0.1*tf.random_normal([32])),
        'bc2': tf.Variable(0.1*tf.random_normal([64])),
        'bc3': tf.Variable(0.1*tf.random_normal([64])),
        'bc4': tf.Variable(0.1*tf.random_normal([64])),
        'bf1': tf.Variable(0.1*tf.random_normal([1024])),
        'bf2': tf.Variable(0.1*tf.random_normal([1024])),
        'bout': tf.Variable(0.1*tf.random_normal([4*63]))
    }

def conv_net(x, weights, biases, dropout):
	x = tf.reshape(x, [-1,100,30,1])

	conv1 = conv2d(x, weights['wc1'], biases['bc1'], 1)
	conv1 = max_pool2d(conv1, 2)

	conv2 = conv2d(conv1, weights['wc2'], biases['bc2'], 1)
	conv2 = max_pool2d(conv2, 2)

	conv3 = conv2d(conv2, weights['wc3'], biases['bc3'], 1)
	conv3 = max_pool2d(conv3, 2)
	
	conv4 = conv2d(conv3, weights['wc4'], biases['bc4'], 1)
	conv4 = max_pool2d(conv4, 2)

	fc1 = tf.reshape(
        conv4, shape=[-1, weights['wf1'].get_shape().as_list()[0]])
	fc1 = tf.matmul(fc1, weights['wf1'])
	fc1 = tf.add(fc1, biases['bf1'])
	fc1 = tf.nn.relu(fc1)


	out = tf.add(tf.matmul(fc1, weights['wout']), biases['bout'])

	return out


output = conv_net(X, weights, biases, keep_prob)

loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=output, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=_lr).minimize(loss_op)

y = tf.reshape(output, [-1,4,63])
y_ = tf.reshape(Y, [-1,4,63])

correct_pred = tf.equal(tf.argmax(y, 2), tf.argmax(y_,2))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
lr = 0.001
saver = tf.train.Saver()
with tf.Session() as sess:
	sess.run(init)
	for step in range(1,1+max_steps):
		batch_x, batch_y = next_batch(100,True)
		loss_value,_ = sess.run([loss_op, optimizer],
			feed_dict = {X:batch_x, Y:batch_y, keep_prob:dropout,_lr:lr})
		if step % 10 == 0:
			batch_x_test, batch_y_test = next_batch(100, False)
			acc = sess.run(accuracy, 
				feed_dict={X:batch_x_test, Y:batch_y_test,keep_prob:1})
			print('step{}, loss={}, accuracy={}'.format(step,loss_value, acc))

		if step % 500 == 0:
			random.shuffle(traindatas)
			trainlabels = list(map(lambda x: x.split('.')[0],traindatas))

		if step % 3000 == 0:
			lr *= 0.9

		if step % 10000 == 0:
			saver.save(sess, save_path + "/model.ckpt-%d" % step)
			print('model saved!')

接下來是我寫的一個直觀觀察訓練效果的,新建一個腳本,添加如下代碼,然后運行該腳本,將會隨機展示4張驗證碼和你的預測結果,終端還會顯示本次預測的精確度。

from __future__ import print_function, division, absolute_import
import tensorflow as tf
import os
import cv2
import matplotlib.pyplot as plt 
import random
import numpy as np
from datasplit import use
#from optparse import OptionParser


testnumber = 4 #要更改的話需要改畫圖部分的代碼否則會出錯
path = 'Discuz/'
imgs = os.listdir(path)
model_path = 'model4cnn-1fcn/model.ckpt-500000' #讀取你訓練好的模型
testdatas = random.sample(imgs,testnumber)
testlabels = list(map(lambda x: x.split('.')[0],testdatas))
#testnum = len(testdatas)
#test_ptr = 0

X = tf.placeholder(tf.float32, [None, 30*100])
Y = tf.placeholder(tf.float32, [None,4*63])
keep_prob = tf.placeholder(tf.float32)

def text2vec(text):
	if len(text) > 4:
		raise ValueError('too long captcha')

	vector = np.zeros(4*63)
	def char2pos(c):
		if c == '_':
			k = 62
			return k
		k = ord(c)-48
		if k > 9:
			k = ord(c)-55
			if k > 35:
				k = ord(c) - 61
				if k > 61:
					raise ValueError('No Map')

		return k

	for i, c in enumerate(text):
		idx = i*63 + char2pos(c)
		vector[idx] = 1

	return vector

def vec2text(vec):

    char_pos = vec.nonzero()[0]
    text = []
    for i, c in enumerate(char_pos):
        char_at_pos = i #c/63
        char_idx = c % 63
        if char_idx < 10:
            char_code = char_idx + ord('0')
        elif char_idx < 36:
            char_code = char_idx - 10 + ord('A')
        elif char_idx < 62:
            char_code = char_idx - 36 + ord('a')
        elif char_idx == 62:
            char_code = ord('_')
        else:
            raise ValueError('error')
        text.append(chr(char_code))
    return "".join(text)

batch_x = np.zeros([testnumber,30*100])
batch_y = np.zeros([testnumber, 4*63])

for index, test in enumerate(testdatas):
	img = np.mean(cv2.imread(path + test), -1)
	batch_x[index, :] = img.flatten() /255
for index, label in enumerate(testlabels):
	batch_y[index, :] = text2vec(label)

def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def max_pool2d(x, k=2):
    x = tf.nn.max_pool(
        x, ksize=[
            1, k, k, 1], strides=[
            1, k, k, 1], padding='SAME')
    return x

weights = {
        'wc1': tf.Variable(0.01*tf.random_normal([3, 3, 1, 32])),
        'wc2': tf.Variable(0.01*tf.random_normal([3, 3, 32, 64])),
        'wc3': tf.Variable(0.01*tf.random_normal([3, 3, 64, 64])),
        'wc4': tf.Variable(0.01*tf.random_normal([3, 3, 64, 64])),
        'wf1': tf.Variable(0.01*tf.random_normal([2 * 7 * 64, 1024])),
        'wf2': tf.Variable(0.01*tf.random_normal([1024, 1024])),
        'wout': tf.Variable(0.01*tf.random_normal([1024, 4*63]))
        }

biases = {
        'bc1': tf.Variable(0.1*tf.random_normal([32])),
        'bc2': tf.Variable(0.1*tf.random_normal([64])),
        'bc3': tf.Variable(0.1*tf.random_normal([64])),
        'bc4': tf.Variable(0.1*tf.random_normal([64])),
        'bf1': tf.Variable(0.1*tf.random_normal([1024])),
        'bf2': tf.Variable(0.1*tf.random_normal([1024])),
        'bout': tf.Variable(0.1*tf.random_normal([4*63]))
    }

def conv_net(x, weights, biases, dropout):
	x = tf.reshape(x, [-1,100,30,1])

	conv1 = conv2d(x, weights['wc1'], biases['bc1'], 1)
	conv1 = max_pool2d(conv1, 2)

	conv2 = conv2d(conv1, weights['wc2'], biases['bc2'], 1)
	conv2 = max_pool2d(conv2, 2)

	conv3 = conv2d(conv2, weights['wc3'], biases['bc3'], 1)
	conv3 = max_pool2d(conv3, 2)
	
	conv4 = conv2d(conv3, weights['wc4'], biases['bc4'], 1)
	conv4 = max_pool2d(conv4, 2)
	
	fc1 = tf.reshape(
        conv4, shape=[-1, weights['wf1'].get_shape().as_list()[0]])
	fc1 = tf.matmul(fc1, weights['wf1'])
	fc1 = tf.add(fc1, biases['bf1'])
	fc1 = tf.nn.relu(fc1)

	out = tf.add(tf.matmul(fc1, weights['wout']), biases['bout'])

	return out

output = conv_net(X, weights, biases, keep_prob)

y = tf.reshape(output, [-1,4,63])
y_ = tf.reshape(Y, [-1,4,63])

predict = tf.argmax(y,2)
correct_pred = tf.equal(predict, tf.argmax(y_,2))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()

with tf.Session() as sess:
	saver.restore(sess, model_path)

	pred, acc = sess.run([predict,accuracy], feed_dict ={ X:batch_x, Y:batch_y,keep_prob:1})
	print('accuracy={}'.format(acc))
	for i in range(1,testnumber+1):

		plt.subplot(2,2,i)
		img = cv2.imread(path+testdatas[i-1])
		plt.imshow(img)
		plt.title('number%d' %i)
		plt.xticks([])
		plt.yticks([])
		vect = np.zeros([4*63])

		#print(pred[i-1])
		for ind,j in enumerate(pred[i-1]):
			vect[ind*63+j] = 1

		xlabel = 'True label:{};Pred label:{}'.format(testlabels[i-1], vec2text(vect))
		plt.xlabel(xlabel)

	plt.show()

有任何問題歡迎討論。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM