python 爬取王者榮耀英雄皮膚代碼


import os, time, requests, json, re, sys
from retrying import retry
from urllib import parse

"""
文章描述:爬取王者榮耀英雄壁紙+封面
使用說明:直接在最底下輸入下載地址,然后運行
作者:Felix(2020/7/30 14:42)
最新修改時間:2021-4-5
公眾號:【全面資源集】
博客:https://blog.csdn.net/weixin_49012647
說明:沒有使用進程,面向對象加過程,使用控制台輸出顯示進度,沒有反扒機制,不識別UA,此文章調試了兩天才趨近完美
"""

class HonorOfKings:
    """王者榮耀皮膚下載"""
    def __init__(self, save_path='./heros'):
        self.save_path = save_path  # 默認路徑為:./heros
        self.time = str(time.time()).split('.')
        self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % \
                   self.time[0]  # 這是抓包獲得的,暫時不會。。

    def hello(self):
        """這是排面"""
        print("*" * 50)
        print(' ' * 18 + '王者榮耀壁紙下載')
        print(' ' * 5 + '公眾號:【全面資源集】')
        print("*" * 50)
        return self

    def run(self):
        """爬蟲主程序"""
        print('' * 20 + ' 格式選擇: ' + '' * 20)
        print('1.縮略圖 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440')
        size = input('請輸入您想下載的格式序號,默認6:')
        print()
        size = size if size and int(size) in [1, 2, 3, 4, 5, 6, 7, 8] else 6  # 直接回車就選6

        hero_list = self.request('http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8').json()
        hero_names = []  # 上面網址是抓包獲得的,暫時不會。。
        cover_div = os.path.join(self.save_path, '英雄封面')
        os.makedirs(cover_div)
        num = 0  # 下載第幾張封面,用於顯示進度
        all = len(hero_list['list'])
        for i in hero_list['list']:
            hero_names.append(i['name'])  # 把英雄名放入列表
            content = self.request(i['cover']).content
            cover_path = os.path.join(cover_div, i['name']+'.png')
            if not os.path.exists(cover_path):
                with open(cover_path, 'wb') as f:  # 保存封面
                    f.write(content)
                    num += 1
                    sys.stdout.write('\r')
                    sys.stdout.write('→ → → →正在爬取封面....爬取進度:%s|%s張' % (num, all))
        # print(hero_names)

        page = 0  # 第零頁,用於獲取英雄總數,並保存第零頁圖片
        offset = 20  # 頁數,用於遞增爬取不同頁
        total_response = self.request(self.url.format(page)).text
        total_res = json.loads(total_response)
        total_page = --int(total_res['iTotalPages'])  # 總頁數(25)
        print('→ → → →開始爬取皮膚...(總共 {} 頁)'.format(total_page))
        while True:
            if offset > total_page:
                break
            url = self.url.format(offset)
            result = self.request(url).json()  # 獲取json格式數據(不標准),但是能索引,你也可以用下面的
            # response = self.request(url).text
            # result = json.loads(response)
            now = 0  # 表示第幾張圖,用於顯示進度
            for item in result["List"]:
                now += 1
                split_name = parse.unquote(item['sProdName']).split('-')
                hero_name = split_name[0]  # 英雄名,但是不規范
                hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)  # 把垃圾符號弄掉
                for f in hero_names:  # 有些英雄名是:張良·幽蘭居士,但是我希望所有同英雄皮膚放在一個目錄下,所有加上這一步
                    if f in hero_name:
                        hero_name = f
                # print('---正在下載第 {} 頁 {} 英雄 進度{}/{}...'.format(offset, hero_name, now, len(result["List"])))
                hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])  # 網址都被編碼了,惡心
                save_path = os.path.join(self.save_path, hero_name)  # 圖片保存路徑
                try:  # 不是每個名字都有“-”
                    pic_name = split_name[1]
                    pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', pic_name)+'.jpg'  # 圖片名也給它標准化
                except IndexError:
                    pic_name = hero_name+'.jpg'
                save_name = os.path.join(save_path, pic_name)
                if not os.path.exists(save_path):
                    os.makedirs(save_path)
                if not os.path.exists(save_name):
                    with open(save_name, 'wb') as f:
                        response_content = self.request(hero_url.replace("/200", "/0")).content
                        f.write(response_content)
                        sys.stdout.write('\r')  # 讓輸出不斷更新
                        sys.stdout.write('第%s頁 %s|第%s張' % (offset, ''*2*now, now))
            offset += 1
        print('\n下載完成!')

    @retry(stop_max_attempt_number=3)
    def request(self, url):
        response = requests.get(url, timeout=10)
        assert response.status_code == 200
        return response


if __name__ == "__main__":
    HonorOfKings(r'E:\win10\Pictures\電腦圖片\王者榮耀壁紙').hello().run()  # 這里設置圖片下載根目錄

加線程代碼

import os, time, requests, json, re, sys

import threadpool
from retrying import retry
from urllib import parse
from tqdm import tqdm

"""
文章描述:爬取王者榮耀英雄壁紙+封面
使用說明:直接在最底下輸入下載地址,然后運行
作者:Felix(2020/7/30 14:42)
最新修改時間:2021-4-4
公眾號:【全面資源集】
博客:https://blog.csdn.net/weixin_49012647
說明:(1)使用線程爬取,但是感覺沒有快多少,網址圖片加載速度不是很快,而且服務器會沒有響應。
     (2)使用tqdm顯示進度,但是該模塊也會出問題,比如單位,img/s,結果變成s/img,而且加重程序負擔
      (3)因為是二次更改,在函數里嵌套函數,非常不專業,所有盡量少用
"""

class HonorOfKings:
    """
     This is a main Class, the file contains all documents.
     One document contains paragraphs that have several sentences
     It loads the original file and converts the original file to new content
     Then the new content will be saved by this class
    """

    def __init__(self, save_path='./heros'):
        self.save_path = save_path  # 保存根目錄默認在代碼所在目錄
        self.time = str(time.time()).split('.')
        self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % \
                   self.time[0]  # 抓包的網址

    def hello(self):
        """
        This is a welcome speech(歡迎界面)

        :return: self
        """
        print("*" * 50)
        print(' ' * 18 + '王者榮耀壁紙下載')
        print(' ' * 5 + '公眾號:【全面資源集】')
        print("*" * 50)
        return self

    def pool(self, function, arg):
        """下載線程池"""
        pool = threadpool.ThreadPool(20)
        request = threadpool.makeRequests(function, arg)
        [pool.putRequest(req) for req in request]
        pool.wait()

    def run(self):
        """The program entry(程序入口)"""
        print('' * 20 + ' 格式選擇: ' + '' * 20)
        print('1.縮略圖 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440')
        size = input('請輸入您想下載的格式序號,默認6:')
        print()
        size = size if size and int(size) in [1, 2, 3, 4, 5, 6, 7, 8] else 6

        hero_list = self.request(  # 下面網址是抓包獲得的,暫時不會。。
            'http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8').json()
        cover_dicts = []  # 存放字典 {封面名:封面網址}
        hero_names = []  # 存放所有英雄名
        cover_div = os.path.join(self.save_path, '英雄封面')
        os.makedirs(cover_div)
        num = 0  # 下載第幾張封面,用於顯示進度
        all = len(hero_list['list'])
        def down_corver(dict):
            """下載封面"""
            global num
            content = self.request(dict['cover']).content
            cover_path = os.path.join(cover_div, dict['name'] + '.png')
            if not os.path.exists(cover_path):
                with open(cover_path, 'wb') as f:  # 保存封面
                    f.write(content)
                    num += 1
                    sys.stdout.write('\r')
                    sys.stdout.write('→ → → →正在爬取封面....爬取進度:%s|%s張' % (num, all))
        for i in hero_list['list']:
            cover_dicts.append({i['name']: i['corver']})
            hero_names.append(i['name'])
        # print(cover_dicts)
        for i in hero_names:
            os.makedirs(os.path.join(self.save_path, i))
        self.pool(down_corver, cover_dicts)

        page = 0
        offset = 0  # 爬取的頁數
        total_res = self.request(self.url.format(page)).json()
        # total_response = self.request(self.url.format(page)).text
        # total_res = json.loads(total_response)
        total_page = --int(total_res['iTotalPages'])  # 所有頁數
        print('→ → → →開始爬取皮膚(總共 {} 頁)...'.format(total_page))

        def down(dict):
            """創建線程池下載"""
            if '-' in dict['name']:
                hero_name = dict['name'].split('-')[0]  # 英雄名,創建英雄圖片目錄
                hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)
                for name in hero_names:
                    if name in hero_name:
                        hero_name = name
                save_path = os.path.join(self.save_path, hero_name)  # 英雄皮膚保存目錄
                pic_name = dict['name'].split('-')[1]  # 各種皮膚名
                pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', pic_name) + '.jpg'
            else:
                hero_name = pic_name = dict['name']
                hero_name = pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name)
                save_path = os.path.join(self.save_path, hero_name)
            save_name = os.path.join(save_path, pic_name)
            hero_url = dict['url']
            if not os.path.exists(save_name):
                with open(save_name, 'wb') as f:
                    response_content = self.request(hero_url.replace("/200", "/0")).content
                    f.write(response_content)
            tq.update(1)
            time.sleep(0.4)

        while True:
            if offset > total_page:
                break
            url = self.url.format(offset)
            response = self.request(url).text
            result = json.loads(response)  # 共25頁,每頁20個圖片,總共483張;每頁英雄不同,即亂排的
            # now = 0
            dict_list = []  # 儲存所有{英雄名:下載地址}的列表
            with tqdm(total=len(result["List"]), leave=False, unit='img', ncols=100) as tq:
                tq.set_description('第%s頁' % offset)
                for item in result["List"]:
                    # now += 1
                    hero_name = parse.unquote(item['sProdName'])
                    # print('---正在下載第 {} 頁 {} 英雄 進度{}/{}...'.format(offset, hero_name, now, len(result["List"])))
                    hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))])
                    dict_list.append({'name': hero_name, 'url': hero_url})  # 把所有對應英雄名及圖片下載地址放進列表
                self.pool(down, dict_list)
                offset += 1
        print('下載完成!')

    @retry(stop_max_attempt_number=3)
    def request(self, url):
        """
        Send a request

        :param url: the url of request
        :param timeout: the time of request
        :return: the result of request
        """
        response = requests.get(url, timeout=10)
        assert response.status_code == 200
        return response


if __name__ == "__main__":
    HonorOfKings(save_path=r'E:\win10\Pictures\電腦圖片\王者榮耀壁紙').hello().run()

 更多資源請關注:【全面資源集


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM