事先申明一點,這個人品沒有什么問題,只是朋友發一段python源碼,再這里分享大家。
1 import requests 2 from lxml import html 3 import os 4 from multiprocessing.dummy import Pool as ThreadPool 5 6 def header(referer): 7 headers = { 8 'Host': 'i.meizitu.net', 9 'Pragma': 'no-cache', 10 'Accept-Encoding': 'gzip, deflate', 11 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', 12 'Cache-Control': 'no-cache', 13 'Connection': 'keep-alive', 14 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) ' 15 'Chrome/59.0.3071.115 Safari/537.36', 16 'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8', 17 'Referer': '{}'.format(referer), 18 } 19 return headers 20 21 # 獲取主頁列表 22 def getPage(pageNum): 23 baseUrl = 'http://www.mzitu.com/page/{}'.format(pageNum) 24 selector = html.fromstring(requests.get(baseUrl).content) 25 urls = [] 26 for i in selector.xpath('//ul[@id="pins"]/li/a/@href'): 27 urls.append(i) 28 print(i) 29 return urls 30 31 32 # 圖片鏈接列表, 標題 33 # url是詳情頁鏈接 34 def getPiclink(url): 35 sel = html.fromstring(requests.get(url).content) 36 # 圖片總數 37 total = sel.xpath('//div[@class="pagenavi"]/a[last()-1]/span/text()')[0] 38 # 標題 39 title = sel.xpath('//h2[@class="main-title"]/text()')[0] 40 # 文件夾格式 41 dirName = u"【{}P】{}".format(total, title) 42 # 新建文件夾 43 os.mkdir(dirName) 44 45 n = 1 46 for i in range(int(total)): 47 # 每一頁 48 try: 49 link = '{}/{}'.format(url, i+1) 50 s = html.fromstring(requests.get(link).content) 51 # 圖片地址在src標簽中 52 jpgLink = s.xpath('//div[@class="main-image"]/p/a/img/@src')[0] 53 # print(jpgLink) 54 # 文件寫入的名稱:當前路徑/文件夾/文件名 55 filename = '%s/%s/%s.jpg' % (os.path.abspath('.'), dirName, n) 56 print(u'開始下載圖片:%s 第%s張' % (dirName, n)) 57 with open(filename, "wb+") as jpg: 58 jpg.write(requests.get(jpgLink, headers=header(jpgLink)).content) 59 n += 1 60 except: 61 pass 62 63 64 if __name__ == '__main__': 65 pageNum = input(u'請輸入頁碼:') 66 p = getPage(pageNum) 67 with ThreadPool(4) as pool: 68 pool.map(getPiclink, p)
至於爬出出來的效果圖,我就不發布了