Python多線程Threading爬取圖片,保存本地,openpyxl批量插入圖片到Excel表中


之前用過openpyxl庫保存數據到Excel文件寫入不了,換用xlsxwriter

批量插入圖片到Excel表中

 1 import os
 2 import requests
 3 import re
 4 from openpyxl import load_workbook
 5 import xlsxwriter
 6 from multiprocessing.dummy import Pool as ThreadPool
 7 from openpyxl.drawing.image import Image
 8 
 9 def spider(url):
10     headers = {
11         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}
12     html = requests.get(url, headers, timeout=None)
13     pic_url = re.findall('class="product-image">.*?<img src="(.*?)"  height="', html.text, re.S)
14     sku = re.findall('q=(\d+)', url, re.S)#正則匹配鏈接后面的sku
15     if pic_url != []:
16         print('正在下載'+sku[0]+'圖片,圖片地址:' + pic_url[0])
17         pic = requests.get(pic_url[0])
18         dir = cwd + '\\images\\' + sku[0] + '.jpg'
19         # print(dir)
20         with open(dir, 'wb') as file:
21             file.write(pic.content)
22     else:
23         if sku !=[]:
24             print('沒有找到' + sku[0]+'產品')
25             No_images.append(sku[0])
26 #之前用過openpyxl創建新的Excel文件,但是寫入不了,之后換用xlsxwriter保存數據到Excel
27 def save_excel(sku):
28     print(sku)
29     wb1 = xlsxwriter.Workbook(cwd + '\\' + 'No_images.xlsx')
30     ws1 = wb1.add_worksheet()
31     ws1.write(0, 0, 'No_images_sku')
32     for i in range(1,len(sku)+1):
33         ws1.write(i, 0, sku[i-1])
34     wb1.close()
35     print('保存沒有圖片的sku成功!')
36 
37 #插入圖片到Excel
38 def insert_images(sku):
39     wb = load_workbook(path)
40     ws = wb.worksheets[0]
41     img_path = cwd + '\\images\\'+sku+'.jpg'
42     print(img_path)
43     ws.column_dimensions['H'].width = 11#設置單元格列寬
44     try:
45         img = Image(img_path)
46         #設置圖片大小
47         img.width = 80
48         img.height = 96
49         # print(img)
50         for i in range(1,ws.max_row+1):
51             if ws.cell(i,2).value == int(sku):
52                 ws.row_dimensions[i].height = 88#設置單元格行高
53                 ws.add_image(img,'H'+str(i))
54                 wb.save(path)
55     except(FileNotFoundError) as e:
56         print(e)
57     wb.close()#每次插入一張圖片需要關閉一次Excel表
58     
59 if __name__ == '__main__':
60     cwd=os.getcwd()
61     path = cwd + '\\'+'圖片測試.xlsx'
62     wb =load_workbook(path)
63     ws = wb.worksheets[0]
64     pool =ThreadPool(50)#開啟多少個進程,四核電腦
65     urls = []
66     No_images = []
67     for i in range(1, ws.max_row+1):#通過循環將Excel數據讀取出來
68         sku = ws.cell(i,2).value
69         if sku !=None:
70             print('正在爬取第'+str(i)+'個sku圖片')
71             url = 'http://www.fulchic.com/catalogsearch/result/?q=' + str(sku)
72             urls.append(url)
73     pool.map(spider,urls)#多線程工作,其中,spider是爬蟲函數名,urls是個爬取鏈接列表
74     pool.close()
75     pool.join()
76     #保存圖片到本地
77     save_excel(No_images)
78     #循環寫入保存圖片,並設置單元格和圖片大小
79     for i in range(1, ws.max_row + 1):  # 通過循環將Excel數據讀取出來
80         sku = ws.cell(i, 2).value
81         if sku != None:
82             print('正在保存第' + str(i) + '個sku圖片')
83             insert_images(str(sku))

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM