電子科技大學 易查分網站 爬蟲 批量爬取成績


暑假一個人在寢室,閑來無事。

某天,輔導員恰好發了學年查分的鏈接,一看,發現是易查分平台,再加上手頭的數據,有搞頭啊,遂開始設計爬蟲。
易查分這網站很怪,PC版需要輸入驗證碼,手機版就不需要了。為了方便爬取,果斷選擇手機版。(本來還想訓練個自動填充驗證碼的神經網絡的,可難度有些大,有空了以后補上吧)

該爬蟲使用selenium的webdriver技術實現。速度……只能說可以接受吧

數據准備:需查同學的姓名、學號和身份證號。 (具體獲取方式……自行解決)

這段代碼還融合了Excel的讀寫。

爬蟲有風險,請遵守法律法規哦!

import xlrd
import xlwt
from selenium import webdriver
import time

allstu = []


class stu():
    def __init__(self, name, sex, number, psw):
        self.name = name
        self.sex = sex
        self.number = number
        self.psw = psw[-7:-1]
        self.dic = {}
        self.classify = ''


def readData():
    global allstu
    workbook = xlrd.open_workbook('data.xlsx')
    booksheet = workbook.sheet_by_index(0)
    col = booksheet.ncols
    row = booksheet.nrows
    print(row, col)
    for i in range(row):
        allstu.append(stu(booksheet.cell_value(i, 0), booksheet.cell_value(i, 1),
                          booksheet.cell_value(i, 3), booksheet.cell_value(i, 2)))


def writeData():
    book = xlwt.Workbook(encoding='utf-8', style_compression=0)

    sheet = book.add_sheet('Out', cell_overwrite_ok=True)
    for j in range(len(allsubjects)):
        sheet.write(0, 4 + j, allsubjects[j])

    for i in range(len(allstu)):
        sheet.write(i + 1, 0, allstu[i].name)
        sheet.write(i + 1, 1, allstu[i].sex)
        sheet.write(i + 1, 2, allstu[i].number)
        sheet.write(i + 1, 3, allstu[i].classify)
        for j in range(len(allsubjects)):
            sheet.write(i + 1, 4 + j, allstu[i].dic.get(allsubjects[j], ''))

    book.save(r'out.xls')


allsubjects = []
readData()
urls = ['http://241374.yichafen.com/mobile/queryscore/sqcode/MsTcInwmMjkwfDViN2EzZDI0NTllYzAO0O0O.html',
        'http://241374.yichafen.com/mobile/queryscore/sqcode/MsTcInwmMzAxfDViN2E2MGQwNTVkM2UO0O0O.html',
        'http://241374.yichafen.com/mobile/queryscore/sqcode/MsTcInwmMzAyfDViN2E2MTVhY2E2MDQO0O0O.html']
classes = ['通信工程', '網絡工程', '物聯網工程']
driver = webdriver.Chrome()

# i = 0
i = 15
while(i < len(allstu)):
# while(i < 20):
    # time.sleep(0.5)
    found = False
    for k in range(3):
        url = urls[k]
        driver.implicitly_wait(1)
        driver.get(url)
        driver.refresh()
        number = driver.find_element_by_xpath("//input[@name='s_xuehao']")
        number.clear()
        number.send_keys(allstu[i].number)
        name = driver.find_element_by_xpath("//input[@name='s_xingming']")
        name.clear()
        name.send_keys(allstu[i].name)
        psw = driver.find_element_by_xpath("//input[@name='s_2c54d23b18177aabe8759f1f551451f3']")
        psw.clear()
        psw.send_keys(allstu[i].psw)
        button = driver.find_element_by_xpath("//a[@id='submitBtn']")
        button.click()
        flag = False
        try:
            driver.implicitly_wait(0.5)
            errormsg = driver.find_element_by_xpath("//div[@class='weui-dialog__bd']")
        # print(errormsg.text)
        except:
            flag = True

        if flag:
            allstu[i].classify = classes[k]
            found = True
            subnames = driver.find_elements_by_class_name('left_cell')
            grades = driver.find_elements_by_class_name('right_cell')
            for j in range(3, len(subnames)):
                if not subnames[j].text in allsubjects:
                    allsubjects.append(subnames[j].text)
                allstu[i].dic[subnames[j].text] = grades[j].text
            break
    print('{} {} : {}, finished'.format(str(i + 1), allstu[i].name, allstu[i].classify))
    i += 1
writeData()

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM