python 實現（附帶驗證碼識別）的模擬登陸

本文轉載自查看原文 2016-02-27 14:14 3709 C++/python/ 網絡編程

python的requests模塊是個神器，這里用request模塊實現模擬登登陸：

#coding:utf-8
import sys
import requests
from bs4 import BeautifulSoup
import re
from pylsy import pylsytable
#驗證碼識別#
import os
os.chdir("C:\Python27\Lib\site-packages")
from pytesser import *
#驗證碼識別的庫

login_url = 'http://mis.teach.ustc.edu.cn/userinit.do'
a_url = 'http://mis.teach.ustc.edu.cn/login.do'
pre_url = 'http://mis.teach.ustc.edu.cn/'
grades_url = 'http://mis.teach.ustc.edu.cn/querycjxx.do'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36',
    'Referer': 'http://mis.teach.ustc.edu.cn/userinit.do',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
    'Connection': 'keep-alive',
}
pre_data = {'userbz': 's'}
login_data = {
    'userbz': 's',
    'hidjym': '',
}
grades_data = {
    'xuenian': '',
    'chaxun': '+%B2%E9++%D1%AF+',
    'px': '1',
    'zd': '0',
}

#提交post指令

def  judging(name):
    divide = 125#闕值根據具體調試
    list1 = []  
    for i in range(256):  
        if i < divide:  
           list1.append(0)  
        else:  
           list1.append(1)
    image = Image.open(name)  
    
    image2 = image.convert('L')
    
    #根據闕值二值化分割 
     
    image_text = image2.point(table,'1')  
    
    return image_to_string(image_text)
    #識別率有待改進

def getGrades(filename):
    userid=raw_input("name:")
    password=raw_input("password:")
    s = requests.Session()
    login_r = s.post(login_url, headers=headers, data=pre_data)
    
    
    soup = BeautifulSoup(login_r.text,"html.parser")
    img_src = pre_url + soup.find('img', id='random')['src']
    f = open('c.png', 'wb')
    img = s.get(img_src)
    f.write(img.content)
    f.close()
    code = judging('c.png')
    login_data['userCode']=userid
    login_data['passWord']=password
    login_data['check'] = code
    li_r = s.post(a_url, headers=headers, data=login_data)
    grades = s.post(grades_url, headers=headers, data=grades_data)
    f = open(filename, 'w')
    reload(sys)
    sys.setdefaultencoding('utf8') 
    f.writelines(grades.text)
    f.close()


def sousa(filename):
    f = open(filename)
    text = f.read()
    #html.parser
    soup=BeautifulSoup(text,"html.parser")
    trs=soup.find_all('tr',class_='bg')
    courseName=[]
    courseGrades=[]
    courseGPA=[]
    del trs[0]
    for course in trs:
        tds=course.find_all('td',class_='bg')
        courseName.append(tds[2].string)
        courseGrades.append(tds[4].string)
        courseGPA.append(tds[6].string)
    return (courseName,courseGrades,courseGPA)
def writeGrades(filename):
    courseName,courseGrades,courseGPA=sousa()
    f=open(filename,'w')
    for i in range(len(courseGPA)):
        f.write('%s %s %s \n' % (courseName[i],courseGrades[i],courseGPA[i]))
    f.close()

if __name__ == '__main__':
    getGrades('test.txt')
    courseName,courseGrades,courseGPA=sousa('test.txt')
    
    attributes=['courseName','courseGrades','coursePoints']
    table=pylsytable(attributes)
    table.add_data('courseName',courseName)
    table.add_data('courseGrades',courseGrades)
    table.add_data('coursePoints',courseGPA)
    print table

　　利用requests.Session()並構造post指令，具體情況具體分析。

圖像處理用到了PIL，pytesser庫，pytesser調用的tesseract是谷歌的一個用於識別的開源框架，可用於數字、字母、漢字識別（需要優化）。

相關主要代碼：

    image = Image.open(name)  
    
    image_text = image2.point(table,'1')  
    
    return image_to_string(image_text)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Python 模擬驗證碼登陸 python 識別驗證碼自動登陸 python 識別登陸驗證碼圖片（完整代碼） python模擬網站登陸-滑動驗證碼 python模擬網站登陸-滑動驗證碼 python模擬登陸帶弱圖片驗證碼的網站 [ 轉]c# 使用 HttpWebRequest模擬登陸（附帶驗證碼） java模擬有驗證碼的Http登陸模擬Post登陸帶驗證碼的網站驗證碼破解 | Selenium模擬登陸微博