背景:
看了許多網上關於Flask多進程的介紹,但根本行不通(有直接用flask實現多進程的朋友,麻煩教教我哈)
開始:
下面是基於flask的基礎web服務實現的多進程並發思路(該思路已經證明是可行的)
功能:提取圖像上的文字,實現並發
1. 寫一個flask基本服務,用於接受請求,將傳來的圖像隨機命名為rand.jpg並存到test文件夾,不斷檢測result文件夾中是否存在rand.txt文件,若存在,解析rand.txt將結果返回,並刪除result/rand.txt,若超時則拋錯;
2. 使用python的多進程策略,將圖像提取文字的功能封裝成函數,視為工人,用多進程開啟多個工人,用循環讓工人一直等待,直到test文件夾存在圖像文件,讓工人去搶工作(圖像)並開始處理,這里需要用Lock的邏輯,不然工人會混亂並且浪費資源,當工人發現圖像,立即鎖定文件lock.txt,若發現該文件被鎖定,則稍等片刻再次嘗試鎖定文件,直到文件沒有被鎖定,這時候這個工人先自己把lock.txt鎖定,並且獲取test文件夾中的圖像列表imglist,與lock.txt的文件進行對比,遍歷imglist,如果發現圖像A.jpg在lock.txt中不存在,則將這個A.jpg添加到lock.txt中,並對lock.txt解除鎖定,然后跳出遍歷開始對將A.jpg進行處理,處理完成后將處理結果存在result/A.txt中,並刪除test中的A.jpg。此時A.txt會被步驟1檢測到,至此大功告成。
注意:需要將lock.txt定期刪除,否則會增加列表遍歷時間,當檢測文件存在時,不要立即讀取文件,稍等片刻再讀取,否則文件在寫入的過程就去讀取通常會報錯
代碼:
功能部分tyocr.py
import cv2
import os, fcntl, time
import model
from apphelper.image import union_rbox,adjust_box_to_origin
imgroot = 'test'
locktxt = 'lock.txt'
def find_word(imgpath):
img = cv2.imread(imgpath)
_,result,angle= model.model(img,detectAngle=True,config=dict(MAX_HORIZONTAL_GAP=50,MIN_V_OVERLAPS=0.6,MIN_SIZE_SIM=0.6,TEXT_PROPOSALS_MIN_SCORE=0.1,TEXT_PROPOSALS_NMS_THRESH=0.3,TEXT_LINE_NMS_THRESH = 0.7),leftAdjust=True,rightAdjust=True,alph=0.01)
result = union_rbox(result,0.2)
res = [{'text':x['text'],'name':str(i),'box':{'cx':x['cx'],'cy':x['cy'],'w':x['w'],'h':x['h'],'angle':x['degree']}} for i,x in enumerate(result)]
res = adjust_box_to_origin(img,angle, res)##修正box
txtpath = os.path.join('result', imgpath.split('/')[1].split('.')[0] + '.txt')
print(res)
with open(txtpath, 'w') as f:
for n in res:
str_temp = n['text']
f.write(str_temp)
f.write('\n')
f.close()
if os.path.exists(imgpath):
os.remove(imgpath)
def get_worklist():
list_ = os.listdir(imgroot)
if len(list_) == 0:
if os.path.exists(locktxt):
os.remove(locktxt)
return None
with open(locktxt, 'a') as f:
try:
fcntl.flock(f,fcntl.LOCK_EX|fcntl.LOCK_NB)
try:
list_unwork = os.listdir(imgroot)
list_working = []
if os.path.exists(locktxt):
for n in open(locktxt):
list_working.append(n[:-1])
for m in list_unwork:
if not m in list_working:
f.write(m)
f.write('\n')
fcntl.flock(f, fcntl.LOCK_UN)
f.close()
return os.path.join(imgroot, m)
else:
continue
return None
except:
fcntl.flock(f, fcntl.LOCK_UN) ## 一旦程序出現問題,就解鎖不要耽誤其他進程
return None
except:
return None
if __name__ == "__main__":
print('======= s t a r t =========')
while True:
imgpath = None
imgpath = get_worklist()
if imgpath == None:
continue
else:
find_word(imgpath)
開啟多進程main.py
import os
from multiprocessing import Process
def fun1():
os.system("python tyocr.py")
if __name__ == "__main__":
list_ = []
for i in range(4):
list_.append(Process(target = fun1))
for p in list_:
p.start()
web服務server.py
import os, requests, time
from flask import Flask,render_template,request
import base64
import random
def getRandomSet(bits):
num_set = [chr(i) for i in range(48,58)]
char_set = [chr(i) for i in range(97,123)]
total_set = num_set + char_set
value_set = "".join(random.sample(total_set, bits))
return value_set
app = Flask(__name__)
imgroot = 'test'
txtroot = 'result'
@app.route("/tyocr", methods = ['GET', 'POST'])
def tyocr():
if request.method == "POST":
imgbase64 = request.form.get('imgbase64')
imgdata = base64.b64decode(imgbase64)
randname = getRandomSet(15)
imgrandpath = os.path.join(imgroot, randname + '.jpg')
txtrandpath = os.path.join(txtroot, randname + '.txt')
file = open(imgrandpath,'wb')
file.write(imgdata)
file.close()
count = 0
while True:
time.sleep(0.01)
count = count + 0.01
if count > 20:
return {'sign':-1, 'text':'time out'}
if os.path.exists(txtrandpath):
time.sleep(0.1)
str1 = ''
for n in open(txtrandpath):
str1 = str1 + n
print(str1)
os.remove(txtrandpath)
return {'sign':0, 'text':str1}
else:
continue
else:
return "<h1>Image find words! please use post</h1>"
if __name__ == "__main__":
host = '0.0.0.0'
port = '8090'
app.run(debug=True, host=host, port=port)
