背景
由於測試時需要上傳一些圖片,而自己保存的圖片很少。
為了讓測試數據看起來不那么重復,所以網上找了一個爬蟲腳本,以下是源碼:

1 import requests 2 import os 3 4 class Image(): 5 url = 'https://image.baidu.com/search/acjson' 6 headers = { 7 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.9 Safari/537.36' 8 } 9 varlist = [] 10 dir = './images' 11 params = {} 12 13 def __init__(self): 14 global page_num,keywords 15 page_num = int(input('請輸入要抓取的頁數:\n')) 16 keywords = input('請輸入關鍵字:\n') 17 if self.catch_page(): 18 self.writeData() 19 else: 20 print('抓取頁面失敗') 21 22 def catch_page(self): 23 for i in range(0,page_num * 30,30): 24 self.params = { 25 'tn': 'resultjson_com', 26 'ipn': 'rj', 27 'ct': '201326592', 28 'is': '', 29 'fp': 'result', 30 'queryWord': keywords, 31 'cl': '2', 32 'lm': '-1', 33 'ie': 'utf-8', 34 'oe': 'utf-8', 35 'adpicid': '', 36 'st': '-1', 37 'z': '', 38 'ic': '0', 39 'hd': '', 40 'latest': '', 41 'copyright': '', 42 'word': keywords, 43 's': '', 44 'se': '', 45 'tab': '', 46 'width': '', 47 'height': '', 48 'face': '0', 49 'istype': '2', 50 'qc': '', 51 'nc': '1', 52 'fr': '', 53 'expermode': '', 54 'force': '', 55 'cg': 'girl', 56 'pn': i, 57 'rn': '30', 58 'gsm': '', 59 '1584010126096': '' 60 } 61 res = requests.get(url = self.url,params = self.params).json()['data'] 62 for j in range(0,30): 63 self.varlist.append(res[j]['thumbURL']) 64 if self.varlist != None: 65 return True 66 return False 67 68 def writeData(self): 69 # 判讀是否存在文件,不存在則創建 70 if not os.path.exists(self.dir): 71 os.mkdir(self.dir) 72 73 for i in range(0,page_num * 30): 74 print(f'正在下載第{i}條數據') 75 images = requests.get(url = self.varlist[i]) 76 open(f'./images/{i}.jpg','wb').write(images.content) 77 78 if __name__ == '__main__': 79 Image()
這代碼可能作者跑當時ok,但我跑失敗了(報錯:requests.exceptions.TooManyRedirects: Exceeded 30 redirects.),排查了一下,請求時加上headers參數就ok了。
圖片是保存到當前路徑下的,要是把圖片存儲換一個目錄,就需要移動這個爬蟲文件,當然你也可以改代碼里面的路徑,但是換一次路徑就改下代碼?感覺不太優雅。
那能不能寫個window的批處理腳本(xxx.bat),py文件不動,你要換那個目錄就把.bat文件放在那個目錄里,py文件就放一個地方不用動。
解決方案
首先,這方法肯定是可行的
其次,我得確認py文件的路徑
接着,我可以執行這個py文件
然后,執行的時候把當前.bat路徑傳給py文件
最后,在py代碼里把圖片保存在傳入的路徑下
大功告成!.bat文件內容如下:
1 @echo off 2 rem 這里的D:和D:\Python 是Python文件所在的盤及路徑 3 D: 4 cd D:\spider 5 6 echo 當前路徑:%~dp0 7 python drink_pic.py %~dp0 8 pause 9 exit
其中:
%cd%
代表的是當前工作目錄(current working directory,variable);%~dp0
代表的是當前批處理文件所在完整目錄(the batch file's directory,fixed)
以下是修改后圖片爬蟲py源碼:

1 import argparse 2 3 import requests 4 import os 5 import sys 6 7 class Image(): 8 url = 'https://image.baidu.com/search/acjson' 9 headers = { 10 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.9 Safari/537.36', 11 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 12 'Accept-Encoding':'gzip, deflate', 13 'Accept-Language':'zh-CN,zh;q=0.9', 14 'Connection':'keep-alive', 15 'Cookie':'BDqhfp=%E8%BD%AF%E4%BB%B6%E6%B5%8B%E8%AF%95logo%26%26NaN-1undefined-1undefined%26%262928%26%266; BAIDUID=50559E09CC89BCB4A35AE534A4AFBD93:FG=1; PSTM=1613793192; BIDUPSID=994A62B2BBC179C9D5FDDD4576FD1138; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; __yjs_duid=1_b93b073db4b3095e4b6ca8bdad9666671613879345923; H_PS_PSSID=33512_33241_33257_33344_31254_33601_33585_26350_33264; delPer=0; PSINO=5; ZD_ENTRY=baidu; BA_HECTOR=2081a48k040k852hlm1g3c5g40r; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=www.baidu.com; indexPageSugList=%5B%22%E9%85%92%22%5D; cleanHistoryStatus=0', 16 'Host':'image.baidu.com', 17 'Referer':'https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E9%85%92', 18 'Upgrade-Insecure-Requests':'1', 19 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 20 } 21 varlist = [] 22 dir = './images' 23 params = {} 24 25 def __init__(self, pt): 26 global page_num,keywords 27 page_num = int(input('請輸入要抓取的頁數:\n')) 28 keywords = input('請輸入關鍵字:\n') 29 if self.catch_page(): 30 self.writeData(pt) 31 else: 32 print('抓取頁面失敗') 33 34 def catch_page(self): 35 for i in range(0,page_num * 30,30): 36 self.params = { 37 'tn': 'resultjson_com', 38 'ipn': 'rj', 39 'ct': '201326592', 40 'is': '', 41 'fp': 'result', 42 'queryWord': keywords, 43 'cl': '2', 44 'lm': '-1', 45 'ie': 'utf-8', 46 'oe': 'utf-8', 47 'adpicid': '', 48 'st': '-1', 49 'z': '', 50 'ic': '0', 51 'hd': '', 52 'latest': '', 53 'copyright': '', 54 'word': keywords, 55 's': '', 56 'se': '', 57 'tab': '', 58 'width': '', 59 'height': '', 60 'face': '0', 61 'istype': '2', 62 'qc': '', 63 'nc': '1', 64 'fr': '', 65 'expermode': '', 66 'force': '', 67 'cg': 'girl', 68 'pn': i, 69 'rn': '30', 70 'gsm': '', 71 '1584010126096': '' 72 } 73 res = requests.get(url = self.url,headers = self.headers, params = self.params).json()['data'] 74 print("---------res=", res) 75 for j in range(0,30): 76 self.varlist.append(res[j]['thumbURL']) 77 if self.varlist != None: 78 print(self.varlist) 79 return True 80 return False 81 82 def writeData(self, pt): 83 # 判讀是否存在文件,不存在則創建 84 pt = pt + 'images/' 85 if not os.path.exists(pt): 86 os.mkdir(pt) 87 print(pt) 88 for i in range(0,page_num * 30): 89 print(f'正在下載第{i}條數據') 90 images_data = requests.get(self.varlist[i]) 91 images_content = images_data.content 92 open(pt + f'{i}.jpg','wb').write(images_content) 93 94 if __name__ == '__main__': 95 # sys.argv[1]這里代表接受CMD傳入的第一個參數,如果傳多個參數命令后以空格隔開 96 print("入參[1]為:", sys.argv[1]) 97 pt = sys.argv[1] 98 # pt = 'E:/圖片視頻/' 99 pt1 = pt.replace('\\', '/') 100 print('path',pt1) 101 im= Image(pt1)