一、一些用到的技術
1.1解析驗證碼(聯眾-收費,兩分錢一個)
驗證碼識別設計機器學習,沒有耗費時間去實現,所以采用了第三方(聯眾)的在線打碼,注冊充值便可使用,識別率挺高的,驗證碼識別種類不少,充了五十塊玩了好久還剩不少。具體地址http://v1-http-api.jsdama.com/api.php?mod=php
1.2 具體實現代碼,此部分負責登錄的部分邏輯(__init__.py)
# -*- coding: utf-8 -*- import sys sys.path.append("..") import requests, json from common.base import Base from requests.packages.urllib3.exceptions import InsecureRequestWarning # 禁用安全請求警告 requests.packages.urllib3.disable_warnings(InsecureRequestWarning) __author__ = 'Rachel feng' PATH = 'https://kyfw.12306.cn' __all__ = ['LoginTic'] class LoginTic(Base): def __init__(self): super(LoginTic, self).__init__() # 驗證結果 def checkYanZheng(self,solution): # # 分割用戶輸入的驗證碼位置 # 225,83|181,31|35,67 => 225,83,181,31,35,67 yanStr = solution.replace('|',',') print('校驗驗證碼......') checkUrl = "https://kyfw.12306.cn/passport/captcha/captcha-check" data = { 'login_site':'E', #固定的 'rand':'sjrand', #固定的 'answer':yanStr #驗證碼對應的坐標,兩個為一組,跟選擇順序有關,有幾個正確的,輸入幾個 } print(data) # 發送驗證 cont = self.requests.post(checkUrl, data=data, verify=False) # 返回json格式的字符串,用json模塊解析 dic = cont.json() code = dic['result_code'] # 取出驗證結果,4:成功 5:驗證失敗 7:過期 if str(code) == '4': return True else: print(dic) return False # 發送登錄請求的方法 def loginTo(self, userName, pwd): yan = self.decode_captcha(url) check = False # #只有驗證成功后才能執行登錄操作 # while not check: check = self.checkYanZheng(yan) if not check: return 600, '驗證失敗,請重新驗證' loginUrl = "https://kyfw.12306.cn/passport/web/login" data = { 'username':userName, 'password':pwd, 'appid':'otn' } print(data) headers = { 'Origin': 'https://kyfw.12306.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36', 'Referer': 'https://kyfw.12306.cn/otn/login/init', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept': 'application/json, text/javascript, */*; q=0.01' } result = self.requests.post(loginUrl,data=data,headers=headers) dic = result.json() print(dic) mes = dic['result_message'] # 結果的編碼方式是Unicode編碼,所以對比的時候字符串前面加u,或者mes.encode('utf-8') == '登錄成功'進行判斷,否則報錯 if mes == u'登錄成功': # 獲取cookie r = self.requests.post('https://kyfw.12306.cn/otn/login/userLogin') data = {'appid': 'otn'} r = self.requests.post('https://kyfw.12306.cn/passport/web/auth/uamtk', data=data) if r.status_code != 200: return 600, '登錄驗證不通過' d = r.json() if d.get('result_code') == 0: data = { 'tk': d.get('newapptk') } r = self.requests.post('https://kyfw.12306.cn/otn/uamauthclient', data=data) d = r.json() print(d) if d.get('result_code') == 0: print('恭喜你,登錄成功,可以購票!') return True else: return 600, d.get('result') else: print(dic.get('result_message')) return 600, dic.get('result_message') def get_info(self): data = {'_json_att': ''} headers = { 'Referer': 'https://kyfw.12306.cn/otn/index/initMy12306', 'Origin': 'https://kyfw.12306.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded' } r = self.requests.post('https://kyfw.12306.cn/otn/modifyUser/initQueryUserInfo', data=data, headers=headers) if r.status_code != 200: print('獲取購票人信息異常') with open('__tmp/userInfo.html', 'wb') as f: f.write(r.content) f.close() print('獲取信息成功') if __name__ == '__main__': userName = '’ # 12306登錄賬號 pwd = '' #12306登錄密碼 url = "https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand"; login = LoginTic() d = login.loginTo(userName, pwd) if type(d) != tuple: login.get_info() else: print(d)
1.2base.py(在comment目錄底下共用)
# -*- coding: utf-8 -*- import importlib, logging, codecs, json, datetime, time import web, requests from pyquery import PyQuery as pq from jinja2 import Template __all__ = ["cache_session"] # API_PATH = 'http://bbb4.hyslt.com/api.php?mod=php' API_PATH = 'http://v1-http-api.jsdama.com/api.php?mod=php' USERNAME = '' #聯眾賬號 PASSWORD = '' #聯眾密碼 TOKEN = '' class Base(object): """docstring for base""" def __init__(self): self.session = requests.session() net = requests.Session() net.timeout = 30 net.headers.update({ "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36" }) retries = requests.packages.urllib3.util.retry.Retry(total=3, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]) adapter = requests.adapters.HTTPAdapter(max_retries=retries) net.mount('http://', adapter) net.mount('https://', adapter) self.requests = net # 識別 def decode(self, img, codetype=1303, minlength=1, maxlength=8): """ img: 驗證碼圖片 codetype: 驗證碼類型(https://www.jsdati.com/price) 1001: 四個字母,數字 1201: 計算題 1008: 純字母驗證碼 1009: 純數字驗證碼 1013: 5位字母加數字(5位純字母) 1014: 6位字母加數字(6位純字母) 1015: 7位字母加數字(7位純字母) 1313: 坐標點擊3次 1314: 坐標點擊4次 minlength: 驗證碼最小長度 maxlength: 驗證碼最大長度 """ data = { 'user_name': USERNAME, 'user_pw': PASSWORD, 'zztool_token': TOKEN, 'yzmtype_mark': codetype, 'yzm_minlen': minlength, 'yzm_maxlen': maxlength } files = { 'upload': img } try: r = self.requests.post(API_PATH, params={'act':'upload'}, data=data, files=files, timeout=20) except: return 504, '連接超時' d = r.json() return d # 獲取驗證碼圖片 def decode_captcha(self, url): print('get code....') response = self.requests.get(url, verify=False) # 把驗證碼圖片保存到本地 with open('vcode.png','wb') as f: f.write(response.content) # 用pillow模塊打開並解析驗證碼,這里是假的,自動解析以后學會了再實現 try: content = response.content except: content = None print('獲取驗證碼失敗') # 識別 print('decode...') d = self.decode(content) # 識別成功 if type(d) is dict and d['result']: code = d['data']['val'] print(code) # 保存驗證碼圖片 return code elif type(d) is str: return d
1.3一些亂七八糟補充的:
項目目錄
comment
---__init__.py
---base.py
test
---__init__.py
驗證碼識別有時候識別不准確,導致校驗有時候會不通過,得檢查一下請求頭什么的。
2.運行結果