python3.7 爬取QQ空间好友


 使用selenium库自动登录,记录登录的Cookie。以下URL分别代表不同的动作,虽然没有全用。

留言:
https://user.qzone.qq.com/proxy/domain/m.qzone.qq.com/cgi-bin/new/get_msgb?
uin=1612893772&hostUin=1148639090&start=0&s=0.8883444517176473&format=jsonp&num=10
&inCharset=utf-8&outCharset=utf-8&g_tk=2208268
&qzonetoken=bede67d5ca4dc0944791e45f795beeb346e50a23b20df9b4152a142232a7f7cd40e26b929798e3b74bab&g_tk=2208268
好友:
https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?
uin=1612893772&do=1&rd=0.19169828437926406&fupdate=1&clean=1&g_tk=108064521
&qzonetoken=77bdd3f44636c7b403a6462f493a2e6e02e6b8cd1772fe928bf511442e491315df84454ad4455093f2&g_tk=108064521

说说:
https://h5.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_module?i_uin=1148639090&i_login_uin=1612893772
&mode=4&previewV8=1&style=25&version=8&needDelOpr=true&transparence=true&hideExtend=false
&showcount=5&MORE_FEEDS_CGI=http://ic2.qzone.qq.com/cgi-bin/feeds/feeds_html_act_all&refer=2&paramstring=os-winxp|100

兴趣爱好:
https://h5.qzone.qq.com/proxy/domain/page.qq.com/cgi-bin/profile/interest_get?
uin=851676467&vuin=1612893772&flag=1&rd=0.7835457101159748&fupdate=1&
g_tk=896484925&qzonetoken=38bcb8fb59e772a31ff4ca2358781258d1c7f4e2c8f640e537d6bf52ccc4ab48c7614fa3a57a5cabf0

以下是具体代码:

 1 from urllib import parse  2 from selenium import webdriver  3 import requests  4 import json  5 from json import loads  6 import time  7 import pymssql  8 import datetime  9 
 10 def get_key_values(body,key,end =';'):  11     """提取body中不包括的key,分片操作  12 
 13  :param body: 父字符串  14  :param key: 子字符串  15  :param end: 结束字符串,默认为;  16  :return: 不包括子字符串的字符串  17     """
 18     return body[body.find(key) + len(key): body.find(';', body.find(key))]  19 
 20 def get_key(cookies):  21     """获取cookie中的相关键的值  22  解密  23 
 24  :param cookies: 缓存  25  :return: 相关键的值  26     """
 27     key = get_key_values(cookies,'p_skey=')  28     h = 5381
 29     for i in key:  30         h += (h << 5) + ord(i)  31     return h & 2147483647
 32 
 33 def web_login_cookie():  34     """url = 'https://user.qzone.qq.com/QQ号相关的缓存  35  实现自动化登录  36 
 37  :return: 浏览器的缓存  38     """
 39     driver = webdriver.Chrome()  40     qq_account = '1612893772'
 41     qq_password = '13974162858x'
 42  login(driver,qq_account,qq_password)  43     time.sleep(10)  44     driver.get('https://user.qzone.qq.com/{}'.format(qq_account))  45     cookie = ''
 46     for elem in driver.get_cookies(): # 记录相关的Cookie
 47         # elem 为 dict类型
 48         cookie += elem["name"] + "=" + elem["value"] + ";"
 49     # cookies = cookie
 50     return cookie  51 
 52 def login(driver,qq_account,qq_password):  53     """登录  54 
 55  :param driver: 浏览器对象  56  :param qq_account: QQ账号  57  :param qq_password: QQ密码  58  :return:  59     """
 60  driver.maximize_window()  61     driver.get('http://user.qzone.qq.com')  62     driver.switch_to.frame('login_frame')  63     time.sleep(1)  64     driver.find_element_by_id("switcher_plogin").click()  65     driver.find_element_by_id("u").send_keys(qq_account)  66     time.sleep(2)  67     driver.find_element_by_id("p").send_keys(qq_password)  68     time.sleep(2)  69     driver.find_element_by_id("login_button").click()  70 
 71 def send_requests(req,headers,url,params=None):  72     """url_friend = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?'  73  url_friend携带以下参数:uin(QQ号)、do(没有它,返回空,默认值为:1)  74  rd,g_t,qzonetoken(每次登录都发生变化,从Cookiezz中获取)  75  fupdate,clean(默认值为:1)  76 
 77  :param req: 请求(Request),该请求为会话  78  :param headers: 请求头  79  :param params: 请求参数  80  :return: JSONP数据  81     """
 82     if None != params:  83         url = url + parse.urlencode(params)  84     # url = url+'&offset='+str(0)
 85     page = req.get(url=url, headers=headers)  86     return page.text  87 
 88 def get_each_str(req,uin,headers):  89     each_url = 'https://user.qzone.qq.com/{}'.format(uin)  90     page = req.get(url=each_url, headers=headers)  91 
 92 def friend_db(dicts,name=''):  93     """操作DB  94 
 95  :param dicts: 数据字典信息  96  :param name: 备注名  97  :return: void  98     """
 99     if len(str(dicts['birthyear'])) < 4: 100         dicts['birthyear'] = '1900'
101     if dicts['birthday'][1:2] == '0': 102         dicts['birthday'] = '01-01'
103     if len(dicts['signature']) > 70: 104         dicts['signature'] = ''
105     friend_db_dict = { 106         'friendInfo': [ 107         dicts['uin'], name, dicts['age'], '' if dicts['sex'] == 1 else ''
108         , datetime.datetime.strptime(str(dicts['birthyear']) + '-' + str(dicts['birthday']), '%Y-%m-%d')], 109         'friendPlace': [ 110         dicts['uin'], dicts['company'],dicts['career'], dicts['hco'] + dicts['hp'] + dicts['hc'], 111         dicts['country'] + dicts['province'] + dicts['city'],dicts['cco'] + dicts['cp'] + dicts['cc'], dicts['cb']], 112         'friendNet': [ 113         dicts['uin'], dicts['nickname'], dicts['spacename'], dicts['desc'], dicts['signature']] 114  } 115     conn = pymssql.connect(host='localhost', user='sa', password='123456', database='friendDB', 116                             charset='utf8') 117     cur = conn.cursor() 118     sql = "begin tran insertData insert into friendInfo values({},'{}',{},'{}','{}');" \ 119           "insert into friendPlace values({},'{}','{}','{}','{}','{}','{}');" \ 120           "insert into friendNet values({},'{}','{}','{}','{}');" \ 121           "commit tran insertData".\ 122         format(friend_db_dict['friendInfo'][0],friend_db_dict['friendInfo'][1],friend_db_dict['friendInfo'][2] 123                 ,friend_db_dict['friendInfo'][3],friend_db_dict['friendInfo'][4],friend_db_dict['friendPlace'][0], 124                 friend_db_dict['friendPlace'][1],friend_db_dict['friendPlace'][2],friend_db_dict['friendPlace'][3], 125                 friend_db_dict['friendPlace'][4],friend_db_dict['friendPlace'][5],friend_db_dict['friendPlace'][6], 126                 friend_db_dict['friendNet'][0],friend_db_dict['friendNet'][1],friend_db_dict['friendNet'][2], 127                 friend_db_dict['friendNet'][3],friend_db_dict['friendNet'][4]) 128     print('sql: ',sql) 129  cur.execute(sql) 130  conn.commit() 131  cur.close() 132  conn.close() 133 
134 def main(): 135     """主要操作 136 
137  :return: void 138     """
139     req = requests.session() 140     headers={'host': 'h5.qzone.qq.com', 141              'accept-encoding':'gzip, deflate, br', 142              'accept-language':'zh-CN,zh;q=0.8', 143              'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 144              'user-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
145                           '59.0.3071.115 Safari/537.36', 146              'connection': 'keep-alive'} 147     cookie = web_login_cookie() 148     print('cookie',cookie) 149     g_tk = get_key(cookie) 150     qzonetoken_friend = get_key_values(cookie,'ptcz=') 151     uin_friend = get_key_values(cookie,'ptui_loginuin=') 152     rd_friend = get_key_values(cookie,'_qpsvr_localtk=') 153     print('friend_data','qzontoken:%s;uin:%s;rd:%s' %(qzonetoken_friend,uin_friend,rd_friend)) 154     headers['Cookie']=cookie 155     params_friend = {"uin": uin_friend,"fupdate": 1,"action": 1,"do":1,"g_tk":g_tk,"rd":rd_friend, 156                      'qzonetoken':qzonetoken_friend} 157     url_friend = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?'
158     data_friend_str = send_requests(req,headers,url_friend,params=params_friend) 159     data_friend_dict = loads(data_friend_str[0+len('_Callback('):data_friend_str.find(');')]) 160     print('data_friend_dict: ',data_friend_dict) 161     if data_friend_dict['code'] != 0: # code = -3000 message = '请先登录'
162         time.sleep(10) 163  main() 164     else: 165         data_friend_list = list(data_friend_dict['data']['items_list']) 166         for i in range(len(data_friend_list)): 167             each_uin = data_friend_list[i]['uin'] 168             each_url = 'https://h5.qzone.qq.com/proxy/domain/base.qzone.qq.com/cgi-bin/user/cgi_userinfo_get_all?'
169             params_each = {"uin": each_uin, "fupdate": 1, "vuin": uin_friend, "g_tk": g_tk, "rd": rd_friend, 170                            'qzonetoken': qzonetoken_friend} 171             time.sleep(1) 172             data_each_str = send_requests(req,headers,each_url,params_each) 173             try: 174                 data_each_dict = loads(data_each_str[0+len("_Callback("):data_each_str.find(");")]) 175             except json.decoder.JSONDecodeError as e: 176                 with open('leak.txt','a',encoding='utf8') as file: # 数据持久化,统计错误信息
177                     file.write('except: ' + str(each_uin) + " " + data_friend_list[i]['name'] + " " + e.msg + "\n") 178                     continue
179             print('data_each_dict: ',data_each_dict) 180             if data_each_dict['code'] == 0: # code = -4009 message = '没有访问权限'
181                 friend_db(data_each_dict['data'],name=data_friend_list[i]['name']) 182             else: 183                 with open('leak.txt','a',encoding='utf8') as file: # 数据持久化,统计错误信息
184                     file.write(('没有访问权限: ' + str(each_uin) + " " + data_friend_list[i]['name'] + "\n")) 185 main()


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM