使用BeautifulSoup去爬取豆瓣圖片

本文轉載自查看原文 2018-05-23 11:34 1109 爬蟲

#要安裝bs4和request,用pip install
from bs4 import BeautifulSoup
import requests
#用request下載網頁圖片
import urllib.request

#放在函數定義
x = 0
#獲取豆瓣圖片
#參數賦值可以不寫，也可以傳個默認值1,但不能寫i變量，因為這時i還沒定義，兩個變量相等是不行的
def  getDbImage(page = 1):
    #x = 0
    #獲取網頁源代碼
    #https://www.dbmeinv.com/?pager_offset=3
    response = requests.get('https://www.dbmeinv.com/?pager_offset={}'.format(page))
    html = response.text
    #返回200，說明請求成功
    #print(response)
    #創建對象   用解釋器解釋網頁    也可以用lxml等
    soup = BeautifulSoup(html,'html.parser')
    #找到所有的img標簽,這個網頁只有我們要的照片有img標簽，要看網頁情況
    girl = soup.find_all('img')
    #獲得所有img標簽，這是個列表，可以循環
    #print(girl)
    #循環列表得到每一個標簽
    for img in girl:
        #print(img)
        #獲得所有的鏈接
        link = img.get('src')
        #print(link)
        #x定義在函數內則不需要global,但每次循環x都會變回0，不利於文件命名,在這里定義成全局變量
        global x
        #下載圖片,也可以用open，現在換個方法，urlretrieve(檢索)第一個參數為鏈接，第二個參數是保存路徑,images文件夾下，后面是文件名
        urllib.request.urlretrieve(link,'images/%s.jpg'%x)
        #每次循環修改文件名，每次遞增1，
        x += 1
        #做個提示
        print('正在下載第%s張圖片'%x)
    
#獲取范圍頁數,比如前10頁
for i in range(1,11):
    print("正在下載第{}頁圖片".format(i))
    #現在對比下網頁翻頁變化，可以發現后面那個4可以更改為變量
    #https://www.dbmeinv.com/?pager_offset=3
    #https://www.dbmeinv.com/?pager_offset=4
    getDbImage(i)
    #函數需要變量接受i

除去注釋后簡約版代碼：

from bs4 import BeautifulSoup
import requests
import urllib.request

x = 0
def  getDbImage(page = 1):
    response = requests.get('https://www.dbmeinv.com/?pager_offset={}'.format(page))
    html = response.text
    soup = BeautifulSoup(html,'html.parser')
    girl = soup.find_all('img')
    for img in girl:
        link = img.get('src')
        global x
        urllib.request.urlretrieve(link,'images/%s.jpg'%x)
        x += 1
        print('正在下載第%s張圖片'%x)
    
for i in range(1,11):
    print("正在下載第{}頁圖片".format(i))
    getDbImage(i)

效果如下：

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python3爬蟲-6.使用requests和BeautifulSoup爬取豆瓣Top250電影 Scrapy教程--豆瓣電影圖片爬取使用beautifulsoup與requests爬取數據使用 BeautifulSoup 和 Selenium 進行網頁爬取 Python使用BeautifulSoup爬取網頁信息爬蟲初識之BeautifulSoup庫的使用-爬取某圖片站的image 爬取豆瓣電影 Python爬蟲之利用BeautifulSoup爬取豆瓣小說（一）——設置代理IP 爬取豆瓣網頁上的電影(包括圖片，評分，和簡介等） python網絡爬蟲之解析網頁的BeautifulSoup(爬取電影圖片)[三]