序言
本片文章讲解爬取强智科技教务系统平台
因只能以本校为例,侵删请联系 nepenthic@163.com
仅供学习参考
正文
如果你有心抓取强智科技的教务系统登入页面,你会发现输入完账号密码会把表单参数加密发送到登入校验页
通过查看网页源代码我们可以一路追溯到加密方法
因为我写这篇文章时候教务系统已经崩了,请求502,所以不讲解如何追溯的
加密方法其实是base64加密
使用python的base64库能直接转换,下面的转换方法
1 def strToBase64(s): 2 strEncode = base64.b64encode(s.encode('utf8')) 3 return str(strEncode, encoding='utf8')
得到加密数据,我们提交给login页
1 def get_class(name, passwd): 2 encoded = strToBase64(name) + '%%%' + strToBase64(passwd) # 自己在线转换base4 name+%%%+passwd 3 param = { # 提交的表单 4 'userAccount': name, 5 'userPassword': passwd, 6 'encoded': encoded # base64加密后的字符串 7 } 8 head = { # 请求头 9 "rq": rq, 10 "Cookie": "application/x-www-form-urlencoded; charset=UTF-8", 11 "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", 12 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", 13 "Referer": "https://www.baidu.com/", 14 "accept-encoding": "gzip, deflate, br", 15 "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", 16 "cache-control": "max-age=0", 17 "Connection": "keep-alive", 18 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" 19 } 20 session = requests.session() # 使用session保持在线 21 jsess = session.post(url="http://jiaowu.jvtc.jx.cn/jsxsd/xk/LoginToXk", params=param, headers=head) # 发送请求
29 param2 = { # 获得课表jsp页面的表单 30 "Referer": "http://jiaowu.jvtc.jx.cn/jsxsd/framework/xsMain_new.jsp?t1=1", # 来自主页面的跳转 31 "rq": rq 32 } 33 r = session.post("http://jiaowu.jvtc.jx.cn/jsxsd/framework/main_index_loadkb.jsp", params=param2) # 提交课表时间请求,返回本周课表
34 soundCode = r.text 35 # all=session.post('http://jiaowu.jvtc.jx.cn/jsxsd/xskb/xskb_list.do') #全课表,真心分析不动
上面代码中,21行把加密账号密码post给登入页面,33行请求了课表的jsp页面,打印34行就是你的课表页面
下面我直接上传整个代码(包含了qq及wx的推送功能)
# -*- codeing = utf-8 -*- # @TIME : 2021/5/15 3:56 # @Auther : 幼稚鬼(Naive) # @what are you to do? : 九江职业技术学院xxxxxx班课表推送 import base64 import linecache import os import re import requests from bs4 import BeautifulSoup import pytz import datetime import pandas as pd from pandas import DataFrame pytz.country_timezones('cn') tz = pytz.timezone('Asia/Shanghai') #保持时区+8 rq = str(datetime.datetime.now(tz).strftime('%Y-%m-%d')) xq = str(datetime.datetime.now(tz).isoweekday()) xq = int(xq) cn = "" # 星期几 if xq == 7: cn = "星期日" elif xq == 1: cn = "星期一" elif xq == 2: cn = "星期二" elif xq == 3: cn = "星期三" elif xq == 4: cn = "星期四" elif xq == 5: cn = "星期五" elif xq == 6: cn = "星期六" def strToBase64(s): strEncode = base64.b64encode(s.encode('utf8')) return str(strEncode, encoding='utf8') def get_class(name, passwd): encoded = strToBase64(name) + '%%%' + strToBase64(passwd) # 自己在线转换base4 name+%%%+passwd param = { # 提交的表单 'userAccount': name, 'userPassword': passwd, 'encoded': encoded # base64加密后的字符串 } head = { # 请求头 "rq": rq, "Cookie": "application/x-www-form-urlencoded; charset=UTF-8", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36", "Referer": "https://www.baidu.com/", "accept-encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "cache-control": "max-age=0", "Connection": "keep-alive", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" } session = requests.session() # 使用session保持在线 jsess = session.post(url="http://jiaowu.jvtc.jx.cn/jsxsd/xk/LoginToXk", params=param, headers=head) # 发送请求 if jsess.status_code == 200: # 验证是否链接成功 print("链接成功!") else: print("链接失败!") calls("\n有内鬼,停止交易!\n\n服务器都炸了你还在这里吟诗作对,切") wx("\n有内鬼,停止交易!\n\n服务器都炸了你还在这里吟诗作对,切") planB() param2 = { # 获得课表jsp页面的表单 "Referer": "http://jiaowu.jvtc.jx.cn/jsxsd/framework/xsMain_new.jsp?t1=1", # 来自主页面的跳转 "rq": rq } r = session.post("http://jiaowu.jvtc.jx.cn/jsxsd/framework/main_index_loadkb.jsp", params=param2) # 提交课表时间请求,返回本周课表 soundCode = r.text # all=session.post('http://jiaowu.jvtc.jx.cn/jsxsd/xskb/xskb_list.do') #全课表,真心分析不动 findall(soundCode) def findall(soundCode): demand = BeautifulSoup(soundCode, 'lxml') if len(demand.select('title')) != 0: if str(demand.title.string) == '登录': # str(demand.title.string)验证是否还在登入页面,因为强智系统还是出问题 若还在则结束推送 print("链接成功,当依然在登入页面!") calls("\n有内鬼,停止交易!\n\n服务器都炸了你还在这里吟诗作对,切切") wx("\n有内鬼,停止交易!\n\n服务器都炸了你还在这里吟诗作对,切切") planB() else: print("len(demand.select('title')) != 0 的未知错误,请处理!") calls("\n有内鬼,停止交易!\n\n服务器都炸了你还在这里吟诗作对,切切切") wx("\n有内鬼,停止交易!\n\n服务器都炸了你还在这里吟诗作对,切切切") planB() else: print("进入主页成功,等待加载 * * * * * * ") week = re.findall('上课时间:第(.*?)周 星期', str(soundCode))[0] fo = open("log.txt", "w+") # log.txt 第一行记载本周周数与最后更新时间 fo.write(week + "\n" + rq) fo.close() axq = ['星期一', '星期二', '星期三', '星期四', '星期五', '星期六', '星期日'] answer = [['星期一', []], ['星期二', []], ['星期三', []], ['星期四', []], ['星期五', []], ['星期六', []], ['星期日', []]] list = demand.select('p') # 本周有的课都在<p>标签里 answerindex = 0 for i in axq: for li in list: string1 = str(li) stringclass = '' if string1.find(i) != -1: # 查看p.string是否包含今天的星期数cn namestart = re.findall("\[(.*?)\]节<", string1) # 匹配第几节 j = str(namestart[0]) if j == '01-02': j = "第一大节" elif j == '03-04': j = "第二大节" elif j == '05-06': j = "第三大节" elif j == '07-08': j = "第四大节" elif j == '09-10': j = "第五大节" elif j == '11-12': j = "第六大节" stringclass += j namestart = re.findall("课程名称:(.*?)<br/>", string1) # 匹配课程名称 stringclass += "\n" + namestart[0] namestart = re.findall("上课地点:(.*?)\">", string1) # 匹配上课地点 stringclass += "\n" + namestart[0] if stringclass != '': answer[answerindex][1].append(stringclass) # 把一节课的数据存入answer answerindex += 1 # print(answer) msg(answer) answer[6][1].append(None) answer[6][1].append(None) answer[6][1].append(None) answer[6][1].append(None) answer[6][1].append(None) answer[6][1].append(None) toexcl(answer, week) def msg(answer): # 遍历answer进行QQ推送 if len(answer[int(xq) - 1][1]) == 0: # 无课判断 print("休息日 无课\n\n业精于勤,荒于嬉;行成于思,毁于随——韩愈") calls("\n休息日 无课\n\n业精于勤,荒于嬉;行成于思,毁于随——韩愈") wx("\n休息日 无课\n\n业精于勤,荒于嬉;行成于思,毁于随——韩愈") else: lifeline = "" for i in answer[int(xq) - 1][1]: lifeline += '\n' + str(i) + '\n' print(lifeline) wx(lifeline) calls(lifeline) # 到这开始存csv def calls(strs): # qq推送 print("qq:",strs) listqq = [推送的QQ] for i in listqq: qqone = 'https://qmsg.zendee.cn/send/ qmsg酱的key ?msg=' + strs + '&qq=' + i requests.get(qqone) print("QQ推送成功") def wx(strs): # 微信企业推送 # print("wx:", strs) urlwxqy = "https://sctapi.ftqq.com/ server酱的key .send" parmwxqy = { 'title': rq + ' ' + cn, 'desp': strs, } requests.post(url=urlwxqy, params=parmwxqy) print("企业微信推送成功") def planB(): print("开始执行方案B") week = linecache.getline(r'log.txt', 1) if len(week) == 3: week = week[0:2] elif len(week) == 2: week = week[0:1] lastdate = linecache.getline(r'log.txt', 2)[0:10] path = r'xxxx班第{}周课表.xlsx'.format(week) if os.path.exists(path): pass else: path = r'xxxx班第{}周课表.xlsx'.format(int(week) - 1) if os.path.exists(path): pass else: strs = "\n无历史课表\n\n生而为人,我很抱歉\n" print(strs) calls(strs) wx(strs) exit(0) pdarry = pd.read_excel(r'xxxx第{}周课表.xlsx'.format(week)) list1 = pdarry[xq].values strs = '\n执行方案B\n数据来源:{}\n\n'.format(lastdate) for i in list1: if str(i) == '芜湖': i = '' strs += str(i) if strs == '\n执行方案B\n数据来源:{}\n\n'.format(lastdate): strs += "休息日 无课\n\n业精于勤,荒于嬉;行成于思,毁于随——韩愈\n" strs += "\n\n历史课表,仅供参考!!!" print(strs) calls(strs) wx(strs) print("执行方案B成功") exit(0) def toexcl(anwser, week): # 存csv print("开始执行存xlsx") data = [anwser[0][1], anwser[1][1], anwser[2][1], anwser[3][1], anwser[4][1], anwser[5][1], anwser[6][1]] pdarrays = DataFrame(data, index=[1, 2, 3, 4, 5, 6, 7], columns=['第一大节', '第二大节', '第三大节', '第四大节', '第五大节', '第六大节'], dtype=str) pdarrays2 = pdarrays.fillna('芜湖') # 查找空数据 pdarrays3 = pdarrays2.T # 行列数据转置 pdarrays3.to_excel(r'xxxx班第{}周课表.xlsx'.format(week)) print("存入xlsx成功")if __name__ == '__main__': get_class('账号', '密码')
代码有些臃肿,但能运行就没再精益求精了
需要注意的是param请求的时间格式是 2021-05-05 这样子的
其中的qq和wx推送方式自行注册 QQ推送 : qmsg酱 微信推送 : server酱 获得key填入对应代码位置即可
再次声明,如果学校在意请联系我删除