Python識別圖片中的文字

本文轉載自查看原文 2020-07-04 11:24 1465 基礎知識

 1 import os,glob
 2 def photo_compression(original_imgage,tmp_image_path):
 3     '''圖片備份、壓縮；param original_imgage:原始圖片路徑；param tmp_imgage_path:臨時圖片路徑，備份路徑；return'''
 4     from PIL import Image
 5     img = Image.open(original_imgage)
 6     width,height = img.size
 7     while (width*height>4000000):#該數值壓縮后的圖片大約200多k
 8         width = width//2
 9         height = height//2
10     e_img = img.resize((width,height),Image.BILINEAR)
11     save_path = os.path.join(tmp_image_path,os.path.basename(original_imgage))
12     e_img.save(save_path)
13     return save_path
14 
15 def ocr(original_image):
16     '''使用百度OCR進行文字識別，支持JPG、JPEG、PNG、BMP格式；param original_image:待識別圖片；return'''
17     from aip import AipOcr
18     filename = os.path.basename(original_image)
19     #輸入自己的百度ai賬號ID密碼：參考鏈接：https://m.toutiaocdn.com/i6704242394566492684/
20     APP_ID = '******'
21     API_KEY = '*******'
22     SECRECT_KEY = '*********'
23 
24     client = AipOcr(APP_ID,API_KEY,SECRECT_KEY)
25 
26     with open(original_image,'rb') as picfile_read:
27         img = picfile_read.read()
28         print('正在識別圖片：{0}......'.format(filename))
29         try:
30             result = client.basicGeneral(img)#通用文字識別，50000次/天免費
31         except:
32             result = client.basicAccurate(img)#通用文字識別（高精度版），500次/天免費
33     return result
34 
35 def run_ocr(original_image,tmp_image_path,result_file_path='identify_results.txt'):
36     '''主函數 批量執行圖片文本識別，結果存儲；original_image:原始圖片；tmp_image_path:臨時圖片；result_file_path:識別文字存儲文件；return'''
37 
38     if os.path.exists(result_file_path):#判斷是否存在歷史識別結果，若存在則刪除
39         os.remove(result_file_path)
40     if not os.path.exists(tmp_image_path):#判斷臨時圖片路徑是否存在，若不存在則創建
41         os.mkdir(tmp_image_path)
42     tmp_file_path = []#臨時文件路徑列表
43     for picfile in glob.glob(original_image):#glob.glob的參數是一個只含有方括號、問號、正斜線的正則表達式
44         tmp_file = photo_compression(picfile,tmp_image_path)
45         tmp_file_path.append(tmp_file)
46     for picfile in tmp_file_path:#遍歷所有文件，進行OCR識別 結果存儲
47         result = ocr(picfile)
48         lines = [text.get('words') + '\n' for text in result.get('words_result')]
49         # lines = [text.get('words').encode('utf-8')+'\n' for text in result.get('words_result')]
50 
51         with open(result_file_path,'a+',encoding='utf-8') as fo:
52             fo.writelines("="*100+'\n')
53             fo.writelines("【識別圖片】：{0} \n".format(os.path.basename(picfile)))
54             fo.writelines("【文本內容】： \n")
55             fo.writelines(lines)
56         os.remove(picfile)
57 
58 if __name__ == '__main__':
59     tmp_image_path = os.getcwd()+'\\tmp'
60     original_image = os.getcwd() + '\\*.png'
61     run_ocr(original_image,tmp_image_path)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 利用python庫識別圖片中的文字 Python如何識別圖片中的文字【轉】 python識別圖片上的文字並返回文字在圖片中的坐標 java從圖片中識別文字 Python通過百度Ai識別圖片中的文字 Python實例001：實現識別圖片中的文字使用Python進行OCR -- 識別圖片中的文字 python 識別圖片中的漢字 python識別圖片中的信息 pytesseract提取識別圖片中的文字