python 實現(附帶驗證碼識別)的模擬登陸


python的requests模塊是個神器,這里用request模塊實現模擬登登陸:

 

#coding:utf-8
import sys
import requests
from bs4 import BeautifulSoup
import re
from pylsy import pylsytable
#驗證碼識別#
import os
os.chdir("C:\Python27\Lib\site-packages")
from pytesser import *
#驗證碼識別的庫

login_url = 'http://mis.teach.ustc.edu.cn/userinit.do'
a_url = 'http://mis.teach.ustc.edu.cn/login.do'
pre_url = 'http://mis.teach.ustc.edu.cn/'
grades_url = 'http://mis.teach.ustc.edu.cn/querycjxx.do'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36',
    'Referer': 'http://mis.teach.ustc.edu.cn/userinit.do',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
    'Connection': 'keep-alive',
}
pre_data = {'userbz': 's'}
login_data = {
    'userbz': 's',
    'hidjym': '',
}
grades_data = {
    'xuenian': '',
    'chaxun': '+%B2%E9++%D1%AF+',
    'px': '1',
    'zd': '0',
}

#提交post指令

def  judging(name):
    divide = 125#闕值根據具體調試
    list1 = []  
    for i in range(256):  
        if i < divide:  
           list1.append(0)  
        else:  
           list1.append(1)
    image = Image.open(name)  
    
    image2 = image.convert('L')
    
    #根據闕值二值化分割 
     
    image_text = image2.point(table,'1')  
    
    return image_to_string(image_text)
    #識別率有待改進

def getGrades(filename):
    userid=raw_input("name:")
    password=raw_input("password:")
    s = requests.Session()
    login_r = s.post(login_url, headers=headers, data=pre_data)
    
    
    soup = BeautifulSoup(login_r.text,"html.parser")
    img_src = pre_url + soup.find('img', id='random')['src']
    f = open('c.png', 'wb')
    img = s.get(img_src)
    f.write(img.content)
    f.close()
    code = judging('c.png')
    login_data['userCode']=userid
    login_data['passWord']=password
    login_data['check'] = code
    li_r = s.post(a_url, headers=headers, data=login_data)
    grades = s.post(grades_url, headers=headers, data=grades_data)
    f = open(filename, 'w')
    reload(sys)
    sys.setdefaultencoding('utf8') 
    f.writelines(grades.text)
    f.close()


def sousa(filename):
    f = open(filename)
    text = f.read()
    #html.parser
    soup=BeautifulSoup(text,"html.parser")
    trs=soup.find_all('tr',class_='bg')
    courseName=[]
    courseGrades=[]
    courseGPA=[]
    del trs[0]
    for course in trs:
        tds=course.find_all('td',class_='bg')
        courseName.append(tds[2].string)
        courseGrades.append(tds[4].string)
        courseGPA.append(tds[6].string)
    return (courseName,courseGrades,courseGPA)
def writeGrades(filename):
    courseName,courseGrades,courseGPA=sousa()
    f=open(filename,'w')
    for i in range(len(courseGPA)):
        f.write('%s %s %s \n' % (courseName[i],courseGrades[i],courseGPA[i]))
    f.close()

if __name__ == '__main__':
    getGrades('test.txt')
    courseName,courseGrades,courseGPA=sousa('test.txt')
    
    attributes=['courseName','courseGrades','coursePoints']
    table=pylsytable(attributes)
    table.add_data('courseName',courseName)
    table.add_data('courseGrades',courseGrades)
    table.add_data('coursePoints',courseGPA)
    print table
    
    

  利用requests.Session()並構造post指令,具體情況具體分析。

      圖像處理用到了PIL,pytesser庫 ,pytesser調用的tesseract是谷歌的一個用於識別的開源框架,可用於數字、字母、漢字識別(需要優化)。

相關主要代碼:

    image = Image.open(name)  
    
    image_text = image2.point(table,'1')  
    
    return image_to_string(image_text)

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM