爬取微信自動登錄並初始化


這兩天從早上寫代碼到半夜,終於找回一點做程序員的感覺,人閑太久了真沒勁,所以沒事可以多定定計划,找一找奮斗的感覺挺好。

閑話不多說,今天筆記主要是記錄對微信爬取的整個過程

爬取分為幾個步驟:

1.實現登錄

2.實現用戶初始化,獲取最近聯系人以及所有用戶信息

3.實現對用戶發送消息

4.實現對消息的接收

本文主要是使用flask框架以及requests庫以及bs4來進行爬蟲

 

代碼結構:

步驟:

1.創建flask框架Wechat,到manage.py里面定義登錄login函數:

#-*-coding:utf-8-*-
from flask import Flask,request,render_template,session,jsonify
import time
import requests,re
import json
from bs4 import BeautifulSoup
app = Flask(__name__)
app.debug = True

app.secret_key='abcdefghigklmn'
@app.route('/login',methods=['GET','POST'])
def login():
    if request.method == 'GET':
        ctime = str(int(time.time()*1000))
        qcode_url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}'.format(ctime)
        res = requests.get(qcode_url)
        print res.text
        qcode = re.findall('uuid = "(.*)";',res.text)[0]
        # print qcode
        session['qcode'] = qcode
        return render_template('login.html',qcode=qcode)
    else:
        pass


@app.route('/check_login')
def check_login():
'''
發送GET請求檢測是否已經掃碼,登陸
https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=gbG3TQrkaA==&tip=0&r=-925318273&_=1529933650035
:return:
'''
response = {'code':408}
qcode = session.get('qcode')
ctime = str(int(time.time() * 1000))
check_url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip=0&r=-925318273&_={1}'.format(qcode,ctime)
req = requests.get(check_url)
# print req.text
# print'獲取的是登陸的狀態'
if "code=201" in req.text:
#用戶已掃碼,獲取用戶頭像
src = re.findall("userAvatar = '(.*)';",req.text)[0]
print 'src==',src
response['code'] = 201
response['src'] = src

# else:
# #用戶未掃碼
elif 'code=200' in req.text:
#確認登錄

redirect_uri = re.findall('redirect_uri="(.*)";',req.text)[0]
# print ' redirect_uri', redirect_uri

redirect_uri = redirect_uri + '&fun=new&version=v2'
ticket_ret = requests.get(redirect_uri)
# print 'ticket_ret.text',ticket_ret.text
ticket_dict = xml_parser(ticket_ret.text)

session['ticket_dict']=ticket_dict
session['ticket_cookie'] = ticket_ret.cookies.get_dict()
response['code'] = 200
return jsonify(response)



def xml_parser(text):
dic ={}
soup = BeautifulSoup(text,'html.parser')
div = soup.find(name='error')
for item in div.find_all(recursive=False):
dic[item.name]=item.text
return dic
 

創建login.html,代碼如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<div style="width:200px;margin:0 auto">
   <h1 style="text-align: center">  登陸</h1>
    <image id="img" style="height:200px;width:200px;" src="https://login.wx.qq.com/qrcode/{{qcode}}"></image>
</div>
<script src="/static/jquery-1.12.4.min.js"></script>
<script>
    $(function () {
        checkLogin();
    })
    function checkLogin(){
        $.ajax({
            url:'/check_login',
            type:'GET',
            dataType:'JSON',
            success:function(arg){
                if (arg.code === 201){
                    //掃碼成功
                    console.log('src:',arg.src)

                    $('#img').attr('src',arg.src);
                    checkLogin();
                }else if(arg.code === 200){
                    //重定向到用戶列表
                    location.href = '/index'
                }else{
                    checkLogin();
                }

            }
        })
    }
</script>

</body>
</html>

2.用戶初始化並獲取用戶頭像代碼實現:

#用戶初始化步驟
@app.route('/index')
def index():
    '''用戶初始化信息'''
    ticket_dict = session.get('ticket_dict')
    ticket_cookie = session.get('ticket_cookie')
    # print 'ticket_dict',ticket_dict  https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2115319983
    init_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2132117709&pass_ticket={0}'.format(ticket_dict.get('pass_ticket'))
    data_dict = {
        'BaseRequest': {
            'DeviceID': "e292711087499063",
            'Sid': ticket_dict.get('wxsid'),
            'Uin': ticket_dict.get('wxuid'),
            'Skey': ticket_dict.get('skey'),
        },

    }
    init_ret = requests.post(
        url=init_url,
        json = data_dict,
        cookies= ticket_cookie,
       #這樣就是jaon格式的數據了  相當於data = json.dumps(data_dict),headers = {'Content-type':''}
    )

    init_ret.encoding = 'utf-8'
    user_dict = init_ret.json()
    # get_img()
    session['current_user'] = user_dict['User']
    session['synckey'] = user_dict['SyncKey']
    return render_template('index.html',user_dict=user_dict)

@app.route('/get_img')
def get_img():
        #獲取頭像#
        #  https# ://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgeticon?seq=1182160498&username=@f04bb7e4d7821f504a4992ca85be95aa3e9957c7e3dfb224dc467af8639450e7&skey=@crypt_a1d89414_e0cf3503fac08d5ac1bf9fadcae86c0d
    current_user = session['current_user']
    ticket_cookie = session.get('ticket_cookie')
    head_url = "https://wx.qq.com"+current_user["HeadImgUrl"]
    img_ret = requests.get(head_url,cookies=ticket_cookie,headers={'Content-Type':'image/jpg'})
    return img_ret.content

前端代碼index.html如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<h1>歡迎登陸{{user_dict.User.NickName}}</h1>
<div>
    <img src="/get_img" alt="">
    <h2>{{user_dict.User.NickName}}</h2>
    <h2>{{user_dict.User.UserName}}</h2>

</div>
<h3>最近登錄聯系人</h3>
<ul>
    {%  for user in user_dict.ContactList %}
    <li>{{ user.NickName}}</li>
    {% endfor %}
</ul>
<a href="/user_list">查看所有聯系人</a>
</body>
</html>

3.到這里就能夠實現自動登錄並獲取到最近聯系人,接着我們獲取所有聯系人及信息

@app.route('/user_list')
def user_list():
    ticket_dict = session.get('ticket_dict')
    ticket_cookie = session.get('ticket_cookie')

    ctime = int(time.time()*1000)
    skey = ticket_dict.get('skey')

    pass_ticket = ticket_dict.get('pass_ticket')
    user_list_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}".format(pass_ticket,ctime,skey)

    r1 = requests.get(user_list_url,cookies=ticket_cookie)

    r1.encoding = 'utf-8'
    wx_user_dict = r1.json()

    return render_template('user_list.html',wx_user_dict=wx_user_dict)

前端代碼如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
     <div>
         <div style="width:30%;float:left;">
             <h3>{{wx_user_dict.MemberCount}}</h3>
             <ul>
                 {% for item in wx_user_dict.MemberList %}
                 <li>{{ item.NickName }} ===== {{item.UserName}}</li>
                 {% endfor %}
             </ul>
         </div>
         <div style="width:7%;float:right;>

         </div>
     </div>
</body>
</html>

4.接下來可以實現發送消息的功能

首先創建前端send.html頁面

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<form action="" method="post">
    <input type="text" name="to">
    <input type="text" name="content">
    <input type="submit" value="發送">
</form>
</body>
</html>

后台實現邏輯如下:

@app.route('/send',methods=['GET','POST'])
def send():
    if request.method == "GET":
        return render_template('send.html')
    current_user = session['current_user']
    ticket_dict = session.get('ticket_dict')
    ticket_cookie = session.get('ticket_cookie')
    pass_ticket = ticket_dict.get('pass_ticket')
    from_user = current_user["UserName"]
    to = request.form.get('to')
    content = request.form.get('content')
    ctime = str(time.time()*1000)
    msg_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?pass_ticket={0}'.format(pass_ticket)
    data_dict = {
        'BaseRequest':{
            'DeviceID': "e956888515941054",
            'Sid': ticket_dict.get('wxsid'),
            'Uin': ticket_dict.get('wxuid'),
            'Skey': ticket_dict.get('skey'),
        },
        'Msg':{
            'ClientMsgId':ctime,
            'LocalID':ctime,
            'FromUserName':from_user,
            'ToUserName':to,
            'Content':content,
            'Type':1
        },
        'scene':0
    }
    ret = requests.post(
        url = msg_url,
        data = json.dumps(data_dict,ensure_ascii=False),
        cookies = ticket_cookie,
     
    )
    return ret.text

5.實現獲取消息代碼:

首先定義get_msg.html文件

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<ul>
    {% for item in content.AddMsgList %}
        <li>
            <!--{{ item }}-->
            {{ item['Content']}} From-->  {{item['FromUserName']}}  To-->  {{item['ToUserName']}}
        </li>
    {% endfor %}
</ul>
<scrip src="/static/jquery-1.12.4.js"></scrip>
<script>
    $(function(){
        fetchMessage();
    });
    function fetchMessage(){
        $.ajax({
            url:'/get_msg',
            type: 'GET',
            success:function(arg){
                fetchMessage();
            }
        })
    }
</script>
</body>
</html>

后台實現邏輯如下:

@app.route('/get_msg')
def get_msg():

    #檢查是否有新消息到來
    SyncKey_1 =    session['synckey']
    sync_url = "https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck"
    sync_data_list = []
    for item in SyncKey_1['List']:
        temp = "%s_%s"%(item['Key'],item['Val'])
        sync_data_list.append(temp)
    sync_data_str = "|".join(sync_data_list)
    nid = int(time.time())
    ticket_dict = session.get('ticket_dict')

    sync_dict = {
        "r":nid,
        "skey": ticket_dict['skey'],
        "sid":ticket_dict['wxsid'],
        "uin":ticket_dict['wxuin'],
        "deviceid":"e590082815481369",
        "synckey":sync_data_str,
    }
    ticket_cookie = session.get('ticket_cookie')
    response_sync = requests.get(sync_url,params=sync_dict,cookies=ticket_cookie)
    pass_ticket = ticket_dict.get('pass_ticket')
    #獲取消息內容
    if 'selector:"2"' in response_sync.text:
        fetch_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid={0}&skey={1}&lang=zh_CN&pass_ticket={2}".format(ticket_dict['wxsid'],ticket_dict['skey'],pass_ticket)
        form_data = {
            'BaseRequest':{
                'DeviceID':"e616487029833324",
                'Sid':ticket_dict['wxsid'],
                'Skey':ticket_dict['skey'],
                'Uin':ticket_dict['wxuin'],
            },
            'SyncKey':SyncKey_1,
            'rr':str(time.time())
        }
        response_fetch_msg = requests.post(fetch_msg_url,json=form_data)
        response_fetch_msg.encoding = 'utf-8'
        content = response_fetch_msg.json()

        return render_template('get_msg.html',content=content)

 

最后:

if __name__ == '__main__':
    app.run()

執行代碼即可實現微信網頁版自動登錄,獲取聯系人信息,發送並接收消息

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM