這兩天從早上寫代碼到半夜,終於找回一點做程序員的感覺,人閑太久了真沒勁,所以沒事可以多定定計划,找一找奮斗的感覺挺好。
閑話不多說,今天筆記主要是記錄對微信爬取的整個過程
爬取分為幾個步驟:
1.實現登錄
2.實現用戶初始化,獲取最近聯系人以及所有用戶信息
3.實現對用戶發送消息
4.實現對消息的接收
本文主要是使用flask框架以及requests庫以及bs4來進行爬蟲
代碼結構:

步驟:
1.創建flask框架Wechat,到manage.py里面定義登錄login函數:
#-*-coding:utf-8-*- from flask import Flask,request,render_template,session,jsonify import time import requests,re import json from bs4 import BeautifulSoup app = Flask(__name__) app.debug = True app.secret_key='abcdefghigklmn' @app.route('/login',methods=['GET','POST']) def login(): if request.method == 'GET': ctime = str(int(time.time()*1000)) qcode_url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}'.format(ctime) res = requests.get(qcode_url) print res.text qcode = re.findall('uuid = "(.*)";',res.text)[0] # print qcode session['qcode'] = qcode return render_template('login.html',qcode=qcode) else: pass
@app.route('/check_login')
def check_login():
'''
發送GET請求檢測是否已經掃碼,登陸
https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=gbG3TQrkaA==&tip=0&r=-925318273&_=1529933650035
:return:
'''
response = {'code':408}
qcode = session.get('qcode')
ctime = str(int(time.time() * 1000))
check_url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip=0&r=-925318273&_={1}'.format(qcode,ctime)
req = requests.get(check_url)
# print req.text
# print'獲取的是登陸的狀態'
if "code=201" in req.text:
#用戶已掃碼,獲取用戶頭像
src = re.findall("userAvatar = '(.*)';",req.text)[0]
print 'src==',src
response['code'] = 201
response['src'] = src
# else:
# #用戶未掃碼
elif 'code=200' in req.text:
#確認登錄
redirect_uri = re.findall('redirect_uri="(.*)";',req.text)[0]
# print ' redirect_uri', redirect_uri
redirect_uri = redirect_uri + '&fun=new&version=v2'
ticket_ret = requests.get(redirect_uri)
# print 'ticket_ret.text',ticket_ret.text
ticket_dict = xml_parser(ticket_ret.text)
session['ticket_dict']=ticket_dict
session['ticket_cookie'] = ticket_ret.cookies.get_dict()
response['code'] = 200
return jsonify(response)
def xml_parser(text):
dic ={}
soup = BeautifulSoup(text,'html.parser')
div = soup.find(name='error')
for item in div.find_all(recursive=False):
dic[item.name]=item.text
return dic
創建login.html,代碼如下:
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <div style="width:200px;margin:0 auto"> <h1 style="text-align: center"> 登陸</h1> <image id="img" style="height:200px;width:200px;" src="https://login.wx.qq.com/qrcode/{{qcode}}"></image> </div> <script src="/static/jquery-1.12.4.min.js"></script> <script> $(function () { checkLogin(); }) function checkLogin(){ $.ajax({ url:'/check_login', type:'GET', dataType:'JSON', success:function(arg){ if (arg.code === 201){ //掃碼成功 console.log('src:',arg.src) $('#img').attr('src',arg.src); checkLogin(); }else if(arg.code === 200){ //重定向到用戶列表 location.href = '/index' }else{ checkLogin(); } } }) } </script> </body> </html>
2.用戶初始化並獲取用戶頭像代碼實現:
#用戶初始化步驟 @app.route('/index') def index(): '''用戶初始化信息''' ticket_dict = session.get('ticket_dict') ticket_cookie = session.get('ticket_cookie') # print 'ticket_dict',ticket_dict https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2115319983 init_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2132117709&pass_ticket={0}'.format(ticket_dict.get('pass_ticket')) data_dict = { 'BaseRequest': { 'DeviceID': "e292711087499063", 'Sid': ticket_dict.get('wxsid'), 'Uin': ticket_dict.get('wxuid'), 'Skey': ticket_dict.get('skey'), }, } init_ret = requests.post( url=init_url, json = data_dict, cookies= ticket_cookie, #這樣就是jaon格式的數據了 相當於data = json.dumps(data_dict),headers = {'Content-type':''} ) init_ret.encoding = 'utf-8' user_dict = init_ret.json() # get_img() session['current_user'] = user_dict['User'] session['synckey'] = user_dict['SyncKey'] return render_template('index.html',user_dict=user_dict) @app.route('/get_img') def get_img(): #獲取頭像# # https# ://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgeticon?seq=1182160498&username=@f04bb7e4d7821f504a4992ca85be95aa3e9957c7e3dfb224dc467af8639450e7&skey=@crypt_a1d89414_e0cf3503fac08d5ac1bf9fadcae86c0d current_user = session['current_user'] ticket_cookie = session.get('ticket_cookie') head_url = "https://wx.qq.com"+current_user["HeadImgUrl"] img_ret = requests.get(head_url,cookies=ticket_cookie,headers={'Content-Type':'image/jpg'}) return img_ret.content
前端代碼index.html如下:
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <h1>歡迎登陸{{user_dict.User.NickName}}</h1> <div> <img src="/get_img" alt=""> <h2>{{user_dict.User.NickName}}</h2> <h2>{{user_dict.User.UserName}}</h2> </div> <h3>最近登錄聯系人</h3> <ul> {% for user in user_dict.ContactList %} <li>{{ user.NickName}}</li> {% endfor %} </ul> <a href="/user_list">查看所有聯系人</a> </body> </html>
3.到這里就能夠實現自動登錄並獲取到最近聯系人,接着我們獲取所有聯系人及信息
@app.route('/user_list') def user_list(): ticket_dict = session.get('ticket_dict') ticket_cookie = session.get('ticket_cookie') ctime = int(time.time()*1000) skey = ticket_dict.get('skey') pass_ticket = ticket_dict.get('pass_ticket') user_list_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}".format(pass_ticket,ctime,skey) r1 = requests.get(user_list_url,cookies=ticket_cookie) r1.encoding = 'utf-8' wx_user_dict = r1.json() return render_template('user_list.html',wx_user_dict=wx_user_dict)
前端代碼如下:
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <div> <div style="width:30%;float:left;"> <h3>{{wx_user_dict.MemberCount}}</h3> <ul> {% for item in wx_user_dict.MemberList %} <li>{{ item.NickName }} ===== {{item.UserName}}</li> {% endfor %} </ul> </div> <div style="width:7%;float:right;> </div> </div> </body> </html>
4.接下來可以實現發送消息的功能
首先創建前端send.html頁面
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <form action="" method="post"> <input type="text" name="to"> <input type="text" name="content"> <input type="submit" value="發送"> </form> </body> </html>
后台實現邏輯如下:
@app.route('/send',methods=['GET','POST']) def send(): if request.method == "GET": return render_template('send.html') current_user = session['current_user'] ticket_dict = session.get('ticket_dict') ticket_cookie = session.get('ticket_cookie') pass_ticket = ticket_dict.get('pass_ticket') from_user = current_user["UserName"] to = request.form.get('to') content = request.form.get('content') ctime = str(time.time()*1000) msg_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?pass_ticket={0}'.format(pass_ticket) data_dict = { 'BaseRequest':{ 'DeviceID': "e956888515941054", 'Sid': ticket_dict.get('wxsid'), 'Uin': ticket_dict.get('wxuid'), 'Skey': ticket_dict.get('skey'), }, 'Msg':{ 'ClientMsgId':ctime, 'LocalID':ctime, 'FromUserName':from_user, 'ToUserName':to, 'Content':content, 'Type':1 }, 'scene':0 } ret = requests.post( url = msg_url, data = json.dumps(data_dict,ensure_ascii=False), cookies = ticket_cookie, ) return ret.text
5.實現獲取消息代碼:
首先定義get_msg.html文件
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <ul> {% for item in content.AddMsgList %} <li> <!--{{ item }}--> {{ item['Content']}} From--> {{item['FromUserName']}} To--> {{item['ToUserName']}} </li> {% endfor %} </ul> <scrip src="/static/jquery-1.12.4.js"></scrip> <script> $(function(){ fetchMessage(); }); function fetchMessage(){ $.ajax({ url:'/get_msg', type: 'GET', success:function(arg){ fetchMessage(); } }) } </script> </body> </html>
后台實現邏輯如下:
@app.route('/get_msg') def get_msg(): #檢查是否有新消息到來 SyncKey_1 = session['synckey'] sync_url = "https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck" sync_data_list = [] for item in SyncKey_1['List']: temp = "%s_%s"%(item['Key'],item['Val']) sync_data_list.append(temp) sync_data_str = "|".join(sync_data_list) nid = int(time.time()) ticket_dict = session.get('ticket_dict') sync_dict = { "r":nid, "skey": ticket_dict['skey'], "sid":ticket_dict['wxsid'], "uin":ticket_dict['wxuin'], "deviceid":"e590082815481369", "synckey":sync_data_str, } ticket_cookie = session.get('ticket_cookie') response_sync = requests.get(sync_url,params=sync_dict,cookies=ticket_cookie) pass_ticket = ticket_dict.get('pass_ticket') #獲取消息內容 if 'selector:"2"' in response_sync.text: fetch_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid={0}&skey={1}&lang=zh_CN&pass_ticket={2}".format(ticket_dict['wxsid'],ticket_dict['skey'],pass_ticket) form_data = { 'BaseRequest':{ 'DeviceID':"e616487029833324", 'Sid':ticket_dict['wxsid'], 'Skey':ticket_dict['skey'], 'Uin':ticket_dict['wxuin'], }, 'SyncKey':SyncKey_1, 'rr':str(time.time()) } response_fetch_msg = requests.post(fetch_msg_url,json=form_data) response_fetch_msg.encoding = 'utf-8' content = response_fetch_msg.json() return render_template('get_msg.html',content=content)
最后:
if __name__ == '__main__': app.run()
執行代碼即可實現微信網頁版自動登錄,獲取聯系人信息,發送並接收消息
