import os, time, requests, json, re, sys from retrying import retry from urllib import parse """ 文章描述:爬取王者榮耀英雄壁紙+封面 使用說明:直接在最底下輸入下載地址,然后運行 作者:Felix(2020/7/30 14:42) 最新修改時間:2021-4-5 公眾號:【全面資源集】 博客:https://blog.csdn.net/weixin_49012647 說明:沒有使用進程,面向對象加過程,使用控制台輸出顯示進度,沒有反扒機制,不識別UA,此文章調試了兩天才趨近完美 """ class HonorOfKings: """王者榮耀皮膚下載""" def __init__(self, save_path='./heros'): self.save_path = save_path # 默認路徑為:./heros self.time = str(time.time()).split('.') self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % \ self.time[0] # 這是抓包獲得的,暫時不會。。 def hello(self): """這是排面""" print("*" * 50) print(' ' * 18 + '王者榮耀壁紙下載') print(' ' * 5 + '公眾號:【全面資源集】') print("*" * 50) return self def run(self): """爬蟲主程序""" print('↓' * 20 + ' 格式選擇: ' + '↓' * 20) print('1.縮略圖 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440') size = input('請輸入您想下載的格式序號,默認6:') print() size = size if size and int(size) in [1, 2, 3, 4, 5, 6, 7, 8] else 6 # 直接回車就選6 hero_list = self.request('http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8').json() hero_names = [] # 上面網址是抓包獲得的,暫時不會。。 cover_div = os.path.join(self.save_path, '英雄封面') os.makedirs(cover_div) num = 0 # 下載第幾張封面,用於顯示進度 all = len(hero_list['list']) for i in hero_list['list']: hero_names.append(i['name']) # 把英雄名放入列表 content = self.request(i['cover']).content cover_path = os.path.join(cover_div, i['name']+'.png') if not os.path.exists(cover_path): with open(cover_path, 'wb') as f: # 保存封面 f.write(content) num += 1 sys.stdout.write('\r') sys.stdout.write('→ → → →正在爬取封面....爬取進度:%s|%s張' % (num, all)) # print(hero_names) page = 0 # 第零頁,用於獲取英雄總數,並保存第零頁圖片 offset = 20 # 頁數,用於遞增爬取不同頁 total_response = self.request(self.url.format(page)).text total_res = json.loads(total_response) total_page = --int(total_res['iTotalPages']) # 總頁數(25) print('→ → → →開始爬取皮膚...(總共 {} 頁)'.format(total_page)) while True: if offset > total_page: break url = self.url.format(offset) result = self.request(url).json() # 獲取json格式數據(不標准),但是能索引,你也可以用下面的 # response = self.request(url).text # result = json.loads(response) now = 0 # 表示第幾張圖,用於顯示進度 for item in result["List"]: now += 1 split_name = parse.unquote(item['sProdName']).split('-') hero_name = split_name[0] # 英雄名,但是不規范 hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name) # 把垃圾符號弄掉 for f in hero_names: # 有些英雄名是:張良·幽蘭居士,但是我希望所有同英雄皮膚放在一個目錄下,所有加上這一步 if f in hero_name: hero_name = f # print('---正在下載第 {} 頁 {} 英雄 進度{}/{}...'.format(offset, hero_name, now, len(result["List"]))) hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))]) # 網址都被編碼了,惡心 save_path = os.path.join(self.save_path, hero_name) # 圖片保存路徑 try: # 不是每個名字都有“-” pic_name = split_name[1] pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', pic_name)+'.jpg' # 圖片名也給它標准化 except IndexError: pic_name = hero_name+'.jpg' save_name = os.path.join(save_path, pic_name) if not os.path.exists(save_path): os.makedirs(save_path) if not os.path.exists(save_name): with open(save_name, 'wb') as f: response_content = self.request(hero_url.replace("/200", "/0")).content f.write(response_content) sys.stdout.write('\r') # 讓輸出不斷更新 sys.stdout.write('第%s頁 %s|第%s張' % (offset, '▋'*2*now, now)) offset += 1 print('\n下載完成!') @retry(stop_max_attempt_number=3) def request(self, url): response = requests.get(url, timeout=10) assert response.status_code == 200 return response if __name__ == "__main__": HonorOfKings(r'E:\win10\Pictures\電腦圖片\王者榮耀壁紙').hello().run() # 這里設置圖片下載根目錄
加線程代碼
import os, time, requests, json, re, sys import threadpool from retrying import retry from urllib import parse from tqdm import tqdm """ 文章描述:爬取王者榮耀英雄壁紙+封面 使用說明:直接在最底下輸入下載地址,然后運行 作者:Felix(2020/7/30 14:42) 最新修改時間:2021-4-4 公眾號:【全面資源集】 博客:https://blog.csdn.net/weixin_49012647 說明:(1)使用線程爬取,但是感覺沒有快多少,網址圖片加載速度不是很快,而且服務器會沒有響應。 (2)使用tqdm顯示進度,但是該模塊也會出問題,比如單位,img/s,結果變成s/img,而且加重程序負擔 (3)因為是二次更改,在函數里嵌套函數,非常不專業,所有盡量少用 """ class HonorOfKings: """ This is a main Class, the file contains all documents. One document contains paragraphs that have several sentences It loads the original file and converts the original file to new content Then the new content will be saved by this class """ def __init__(self, save_path='./heros'): self.save_path = save_path # 保存根目錄默認在代碼所在目錄 self.time = str(time.time()).split('.') self.url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=%s' % \ self.time[0] # 抓包的網址 def hello(self): """ This is a welcome speech(歡迎界面) :return: self """ print("*" * 50) print(' ' * 18 + '王者榮耀壁紙下載') print(' ' * 5 + '公眾號:【全面資源集】') print("*" * 50) return self def pool(self, function, arg): """下載線程池""" pool = threadpool.ThreadPool(20) request = threadpool.makeRequests(function, arg) [pool.putRequest(req) for req in request] pool.wait() def run(self): """The program entry(程序入口)""" print('↓' * 20 + ' 格式選擇: ' + '↓' * 20) print('1.縮略圖 2.1024x768 3.1280x720 4.1280x1024 5.1440x900 6.1920x1080 7.1920x1200 8.1920x1440') size = input('請輸入您想下載的格式序號,默認6:') print() size = size if size and int(size) in [1, 2, 3, 4, 5, 6, 7, 8] else 6 hero_list = self.request( # 下面網址是抓包獲得的,暫時不會。。 'http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8').json() cover_dicts = [] # 存放字典 {封面名:封面網址} hero_names = [] # 存放所有英雄名 cover_div = os.path.join(self.save_path, '英雄封面') os.makedirs(cover_div) num = 0 # 下載第幾張封面,用於顯示進度 all = len(hero_list['list']) def down_corver(dict): """下載封面""" global num content = self.request(dict['cover']).content cover_path = os.path.join(cover_div, dict['name'] + '.png') if not os.path.exists(cover_path): with open(cover_path, 'wb') as f: # 保存封面 f.write(content) num += 1 sys.stdout.write('\r') sys.stdout.write('→ → → →正在爬取封面....爬取進度:%s|%s張' % (num, all)) for i in hero_list['list']: cover_dicts.append({i['name']: i['corver']}) hero_names.append(i['name']) # print(cover_dicts) for i in hero_names: os.makedirs(os.path.join(self.save_path, i)) self.pool(down_corver, cover_dicts) page = 0 offset = 0 # 爬取的頁數 total_res = self.request(self.url.format(page)).json() # total_response = self.request(self.url.format(page)).text # total_res = json.loads(total_response) total_page = --int(total_res['iTotalPages']) # 所有頁數 print('→ → → →開始爬取皮膚(總共 {} 頁)...'.format(total_page)) def down(dict): """創建線程池下載""" if '-' in dict['name']: hero_name = dict['name'].split('-')[0] # 英雄名,創建英雄圖片目錄 hero_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name) for name in hero_names: if name in hero_name: hero_name = name save_path = os.path.join(self.save_path, hero_name) # 英雄皮膚保存目錄 pic_name = dict['name'].split('-')[1] # 各種皮膚名 pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', pic_name) + '.jpg' else: hero_name = pic_name = dict['name'] hero_name = pic_name = re.sub(r'[【】:.<>|·@#$%^&() ]', '', hero_name) save_path = os.path.join(self.save_path, hero_name) save_name = os.path.join(save_path, pic_name) hero_url = dict['url'] if not os.path.exists(save_name): with open(save_name, 'wb') as f: response_content = self.request(hero_url.replace("/200", "/0")).content f.write(response_content) tq.update(1) time.sleep(0.4) while True: if offset > total_page: break url = self.url.format(offset) response = self.request(url).text result = json.loads(response) # 共25頁,每頁20個圖片,總共483張;每頁英雄不同,即亂排的 # now = 0 dict_list = [] # 儲存所有{英雄名:下載地址}的列表 with tqdm(total=len(result["List"]), leave=False, unit='img', ncols=100) as tq: tq.set_description('第%s頁' % offset) for item in result["List"]: # now += 1 hero_name = parse.unquote(item['sProdName']) # print('---正在下載第 {} 頁 {} 英雄 進度{}/{}...'.format(offset, hero_name, now, len(result["List"]))) hero_url = parse.unquote(item['sProdImgNo_{}'.format(str(size))]) dict_list.append({'name': hero_name, 'url': hero_url}) # 把所有對應英雄名及圖片下載地址放進列表 self.pool(down, dict_list) offset += 1 print('下載完成!') @retry(stop_max_attempt_number=3) def request(self, url): """ Send a request :param url: the url of request :param timeout: the time of request :return: the result of request """ response = requests.get(url, timeout=10) assert response.status_code == 200 return response if __name__ == "__main__": HonorOfKings(save_path=r'E:\win10\Pictures\電腦圖片\王者榮耀壁紙').hello().run()
更多資源請關注:【全面資源集】