為了能夠模擬登陸QQ,並獲取信息。對掃碼登錄微信進行了分析。簡單的用了一下Django將獲取的信息映射到頁面上。(python3+pycharm)
主要過程就是:
1、獲取二維碼
2、掃碼登錄(有三種狀態)
3、獲取聯系人信息(index頁面獲取的是個人信息、最近聯系人信息、公眾號)
4、獲取所有的聯系人
5、發送和接收消息(接收消息打印到了后台)
創建Django項目、導入JQuery(發送AJax請求)、創建APP、創建模板(簡單的弄一下,能合理顯示得到的數據就好)
url.py
from django.contrib import admin # from django.urls import path from django.conf.urls import url from app01 import views urlpatterns = [ url('admin/', admin.site.urls), url(r'^$', views.login), # 顯示登錄二維碼 url(r'^polling/$', views.long_polling), # 長輪詢 url(r'^index/$', views.index), # url(r'^contact_list/$', views.contact_list),# 獲取全部的聯系人 url(r'^send_msg/$', views.send_msg), # 發送消息 url(r'^get_msg/$', views.get_msg), # 接收消息 ]
templates
login.html(對應的是views中函數login和long_polling)
掃碼前

掃碼后、沒有點擊登錄按鈕,顯示的你的頭像

<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <div style="margin: 0 auto;width: 300px"> <img id="qcode" style="width: 300px;height: 300px" src="https://login.weixin.qq.com/qrcode/{{ code }}" alt="掃碼登錄"> </div> <script src="/static/jquery-1.12.4.js"></script> <script> $(function () { polling(); }); function polling(){ $.ajax({ url: '/polling/', type: 'GET', dataType: 'json', success: function (arg) { if(arg.status==408){ console.log(1); polling(); }else if(arg.status==201){ $('#qcode').attr('src',arg.data); polling(); }else if(arg.status==200) { window.location.href ='/index/'; } } }) } </script> </body> </html>
index.html(個人信息、最近聯系人、公眾號頁面)
點擊登錄后的頁面:

<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <h1>個人信息</h1> <div> {# <img src="https://wx.qq.com{{ data.User.HeadImgUrl }}">#} <img style="width: 40px;height: 40px" src={{ code }}> </div> <div> {{ data.User.NickName }} - {{ data.User.UserName }} </div> <h1>最近聯系人列表</h1> <ul> {% for row in data.ContactList%} <li>{{ row.UserName }} - {{ row.NickName }}</li> {% endfor %} <li><a href="/contact_list/">獲取更多聯系人</a></li> </ul> <h1>公眾號</h1> {% for row in data.MPSubscribeMsgList%} <div style="font-weight: bolder">{{ row.NickName }}</div> {% for i in row.MPArticleList %} <div> <div><a href="{{ i.Url }}">{{ i.Title }}</a></div> <div style="color: #dddddd">{{ i.Digest }}</div> </div> {% endfor %} {% endfor %} </body> </html>
contac_list(全部聯系人、發送接收頁面)
頁面:

<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Title</title> </head> <body> <h1>發送消息</h1> <div> <p><input id="user_id" type="text" placeholder="請輸入用戶唯一ID" /></p> <p><input id='user_msg' type="text" placeholder="請輸入內容" /></p> <input id="sendMsg" type="button" value="提交" /> </div> <ul> {% for row in obj.MemberList %} <li>{{ row.NickName }} - {{ row.UserName }} -{{ row.Province }}</li> {% endfor %} </ul> <script src="/static/jquery-1.12.4.js"></script> <script> $(function () { bindSendMessage(); fetchMessage(); }); function bindSendMessage() { $('#sendMsg').click(function () { $.ajax({ url: '/send_msg/', type: 'POST', data: {'user_id': $('#user_id').val(), 'user_msg': $('#user_msg').val()}, success:function () { } }) }); } function fetchMessage(){ $.ajax({ url: '/get_msg/', type: 'GET', success:function (arg) { fetchMessage(); } }) } </script> </body> </html>

views.py(邏輯層)
import re import time import json import requests from bs4 import BeautifulSoup from django.shortcuts import render, HttpResponse # Create your views here. # 一些全局變量 # 當前時間戳 CURRENT_TIME = None # 驗證碼 QCODE = None #圖片 PICTURE = None # tip TIP = 1 # LOGININ cookies # all_cookie_dict = {} # 保存cookies LOGIN_COOLIES_DICT = {} TICKET_COOKIES_DICT = {} TICKET_DICT = {} # 用戶信息 USER_INIT_DATA = {} def login(request): ''' 獲取登錄二維碼 :param request: :return: ''' global QCODE global CURRENT_TIME url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}' CURRENT_TIME = str(time.time()) q_code_url = url.format(CURRENT_TIME) response = requests.get(q_code_url) # 打印返回值 # window.QRLogin.code = 200; window.QRLogin.uuid = "4c5VeLH00g=="; # print(response.text) code = re.findall('uuid = "(.*)";', response.text)[0] QCODE = code # print(code) return render(request, 'login.html', {'code': code}) def long_polling(request): ''' 長輪詢,有408、201、200三種狀態 :param request: :return: ''' global TIP global PICTURE print('polling') # 默認返回Ajax狀態為408 ret = {'status': 408, 'data': None} base_login_url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip={1}&r=970980966&_={1}' login_url = base_login_url.format(QCODE, TIP, CURRENT_TIME) response = requests.get(login_url) # print(response.text) # 判斷 if 'window.code=201' in response.text: TIP = 0 # 圖片地址 avatar = re.findall("userAvatar = '(.*)';", response.text)[0] # print(avatar) ret['data'] = avatar ret['status'] = 201 # 獲得個人信息頭像使用(index函數中,直接在函數返回值中提取鏈接獲取不到圖片) PICTURE = avatar elif 'window.code=200' in response.text: # 獲取登錄時的cookie值,存到全局變量中 LOGIN_COOLIES_DICT.update(response.cookies.get_dict()) # 提取返回值中的url地址 redirect_uri = re.findall('redirect_uri="(.*)";', response.text)[0] redirect_uri += '&fun=new&version=v2' # 獲取以后要用到的ticket等數據 response_ticket = requests.get(redirect_uri, cookies=LOGIN_COOLIES_DICT) TICKET_COOKIES_DICT.update(response_ticket.cookies.get_dict()) # print(response_ticket.text) # soup1 = BeautifulSoup(response_ticket.text, 'lxml') soup2 = BeautifulSoup(response_ticket.text, 'html.parser') # print(soup1) # print(soup2) for tag in soup2.find(): TICKET_DICT[tag.name] = tag.string # print(TICKET_DICT) ret['status'] = 200 return HttpResponse(json.dumps(ret)) def index(request): ''' 獲取個人信息、最近聯系人、公眾號 :param request: :return: ''' # 用戶初始化 user_init_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=855409185&pass_ticket=%s' % TICKET_DICT['pass_ticket'] form_data = { 'BaseRequest': { 'DeviceID': 'e531777446530354', 'Sid': TICKET_DICT['wxsid'], 'Skey': TICKET_DICT['skey'], 'Uin': TICKET_DICT['wxuin'] } } # 帶上所有的coolkie all_cookie_dict = {} all_cookie_dict.update(LOGIN_COOLIES_DICT) all_cookie_dict.update(TICKET_COOKIES_DICT) # json= 發送過去會自動帶一個請求頭,用json解析 response_init = requests.post(user_init_url, json=form_data, cookies=all_cookie_dict) # print(response_init.text) response_init.encoding = 'utf-8' # print(response_init.text) user_init_data = json.loads(response_init.text) USER_INIT_DATA.update(user_init_data) return render(request, 'index.html', {'data': user_init_data, 'code':PICTURE}) def contact_list(request): ''' 獲取所有聯系人列表 :param request: :return: ''' # print(all_cookie_dict) # print(TICKET_DICT) # print(LOGIN_COOLIES_DICT) # print(TICKET_COOKIES_DICT) url = ("https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket=%s&r=%s&seq=0&skey=%s" % \ (TICKET_DICT['pass_ticket'], str(time.time()), TICKET_DICT['skey'])) # base_url = '{0} {1} {2}' # url = base_url.format(TICKET_DICT['pass_ticket'], str(time.time()), TICKET_DICT['skey']) # print(base_url) all_cookie_dict = {} all_cookie_dict.update(LOGIN_COOLIES_DICT) all_cookie_dict.update(TICKET_COOKIES_DICT) response = requests.get(url, cookies=all_cookie_dict) response.encoding = 'utf-8' contact_list_dict = json.loads(response.text) # print(contact_list_dict) return render(request, 'contact_list.html', {'obj': contact_list_dict}) # return HttpResponse("ok") def send_msg(request): ''' 發送消息 :param request: :return: ''' # print(USER_INIT_DATA) # print(TICKET_DICT) # print(LOGIN_COOLIES_DICT) # print(TICKET_COOKIES_DICT) from_user_id = USER_INIT_DATA['User']['UserName'] to_user_id = request.POST.get('user_id') msg = request.POST.get('user_msg') send_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg' form_data = { 'BaseRequest': { 'DeviceID': 'e531777446530354', 'Sid': TICKET_DICT['wxsid'], 'Skey': TICKET_DICT['skey'], 'Uin': TICKET_DICT['wxuin'] }, 'Msg': { "ClientMsgId": str(time.time()), #"Content": msg, "Content": '%(content)s', "FromUserName": from_user_id, "LocalID": str(time.time()), "ToUserName": to_user_id, "Type": 1 }, 'Scene': 0 } import json # 字符串 form_data_str = json.dumps(form_data) # 進行格式化 form_data_str = form_data_str % {'content': msg} # 轉換成字節 form_data_bytes = bytes(form_data_str, encoding='utf-8') all_cookie_dict = {} all_cookie_dict.update(LOGIN_COOLIES_DICT) all_cookie_dict.update(TICKET_COOKIES_DICT) # response = requests.post(send_url, json=form_data, cookies=all_cookie_dict) response = requests.post(send_url, data=form_data_bytes, cookies=all_cookie_dict, headers={ 'Content-Type': 'application/json'}) print(response.text) return HttpResponse('OK') def get_msg(request): sync_url = 'https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck' sync_data_list = [] for item in USER_INIT_DATA['SyncKey']['List']: temp = "%s_%s" % (item['Key'], item['Val']) sync_data_list.append(temp) sync_data_str = "|".join(sync_data_list) nid = int(time.time()) sync_dict = { "r": nid, "skey": TICKET_DICT['skey'], "sid": TICKET_DICT['wxsid'], "uin": TICKET_DICT['wxuin'], "deviceid": "e531777446530354", "synckey": sync_data_str } all_cookie = {} all_cookie.update(LOGIN_COOLIES_DICT) all_cookie.update(TICKET_COOKIES_DICT) response_sync = requests.get(sync_url, params=sync_dict, cookies=all_cookie) print(response_sync.text) if 'selector:"2"' in response_sync.text: fetch_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid=%s&skey=%s&lang=zh_CN&pass_ticket=%s" % ( TICKET_DICT['wxsid'], TICKET_DICT['skey'], TICKET_DICT['pass_ticket']) form_data = { 'BaseRequest': { 'DeviceID': 'e531777446530354', 'Sid': TICKET_DICT['wxsid'], 'Skey': TICKET_DICT['skey'], 'Uin': TICKET_DICT['wxuin'] }, 'SyncKey': USER_INIT_DATA['SyncKey'], 'rr': str(time.time()) } response_fetch_msg = requests.post(fetch_msg_url, json=form_data) response_fetch_msg.encoding = 'utf-8' res_fetch_msg_dict = json.loads(response_fetch_msg.text) USER_INIT_DATA['SyncKey'] = res_fetch_msg_dict['SyncKey'] for item in res_fetch_msg_dict['AddMsgList']: print(item['Content'], ":::::", item['FromUserName'], "---->", item['ToUserName'],) return HttpResponse('ok')
爬蟲入門簡單,想要深入果然很難。繼續努力,加油!
