代碼實現
from typing import Sized from docx import Document import time from docxtpl import DocxTemplate,InlineImage,RichText from docx.shared import Mm from PIL import Image from selenium import webdriver import ssl import sys import json import base64 # 初始化driver driver = webdriver.Chrome() driver.set_window_size(1280, 800, driver.window_handles[0]) driver.maximize_window() # 獲取驗證碼圖片 def getimage(): ele_vcode = driver.find_element_by_xpath("//*[@id='captchaImgU']") ele_vcode.click() time.sleep(2) ele_vcode.screenshot('vcode.png') # 百度api接口識別 coding=utf-8 # post請求參數 ssl._create_default_https_context = ssl._create_unverified_context API_KEY = 'fqe83vwceOl3A87umYHATbaB' SECRET_KEY = 'UFjtlGbBvhLAh1VSDok1apCuDx6AceRG' OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token' # 保證兼容python2以及python3 IS_PY3 = sys.version_info.major == 3 if IS_PY3: from urllib.request import urlopen from urllib.request import Request from urllib.error import URLError from urllib.parse import urlencode from urllib.parse import quote_plus else: pass # 獲取token def fetch_token(): params = {'grant_type': 'client_credentials', 'client_id': API_KEY, 'client_secret': SECRET_KEY} post_data = urlencode(params) if (IS_PY3): post_data = post_data.encode('utf-8') req = Request(TOKEN_URL, post_data) try: f = urlopen(req, timeout=5) result_str = f.read() except URLError as err: print(err) if (IS_PY3): result_str = result_str.decode() result = json.loads(result_str) if ('access_token' in result.keys() and 'scope' in result.keys()): if not 'brain_all_scope' in result['scope'].split(' '): print ('please ensure has check the ability') exit() return result['access_token'] else: print ('please overwrite the correct API_KEY and SECRET_KEY') exit() # 讀取文件 def read_file(image_path): f = None try: f = open(image_path, 'rb') return f.read() except: print('read image file fail') return None finally: if f: f.close() # 調用遠程服務 def request(url, data): req = Request(url, data.encode('utf-8')) has_error = False try: f = urlopen(req) result_str = f.read() if (IS_PY3): result_str = result_str.decode() return result_str except URLError as err: print(err) # 獲取驗證碼 def get_code(): # 獲取access token token = fetch_token() # 拼接通用文字識別高精度url image_url = OCR_URL + "?access_token=" + token text = "" # 讀取測試圖片 file_content = read_file('vcode.png') # 調用文字識別服務 result = request(image_url, urlencode({'image': base64.b64encode(file_content)})) result_json = json.loads(result) for words_result in result_json["words_result"]: text = text + words_result["words"] return text # 執行登錄 def phsc_login(): driver.get("https://www.shgt.com/trade-web/login") time.sleep(5) while driver.title == '登錄': # 加入循環判斷,登錄不成功,重新獲取驗證碼登錄 getimage() vcode = get_code() driver.find_element_by_xpath("//*[@name='user']").clear() driver.find_element_by_xpath("//*[@name='user']").send_keys("username") driver.find_element_by_xpath("//*[@name='pass']").clear() driver.find_element_by_xpath("//*[@name='pass']").send_keys("password") driver.find_element_by_xpath("//*[@name='validateCode']").clear() driver.find_element_by_xpath("//*[@name='validateCode']").send_keys(vcode) driver.find_element_by_xpath("//*[@class='el-button btn_login el-button--button']").click() time.sleep(5) phsc_login() driver.quit()
參考文章:
百度ORC接口入門:https://ai.baidu.com/ai-doc/OCR/dk3iqnq51
如何用代碼調用百度OCR服務:https://cloud.baidu.com/doc/OCR/s/Pkrwx9ye4
【Python+selenium】帶圖片驗證碼的登錄自動化實戰:https://www.jianshu.com/p/6755a40d961f
5行Python實現驗證碼識別(識別率一般):https://jishuin.proginn.com/p/763bfbd60bb1