Python 爬虫-爬取京东手机页面的图片

本文转载自查看原文 2017-06-13 16:00 1365 Python

具体代码如下：

__author__ = 'Fred Zhao'

import requests
from bs4 import BeautifulSoup
import os
from urllib.request import urlretrieve

class Picture():

    def __init__(self):
        self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'}
        self.base_url = 'https://list.jd.com/list.html?cat=9987,653,655&page='
        self.base_path = os.path.dirname(__file__)

    def makedir(self, name):
        path = os.path.join(self.base_path, name)
        isExist = os.path.exists(path)
        if not isExist:
            os.makedirs(path)
            print("File has been created.")
        else:
            print('OK!The file is existed. You do not need create a new one.')
        os.chdir(path)

    def request(self, url):
        r = requests.get(url, headers=self.headers)
        return r

    def get_img(self, page):
        r = self.request(self.base_url + str(page))
        plist = BeautifulSoup(r.text, 'lxml').find('div', id='plist')
        item = plist.find_all('li', class_='gl-item')
        print(len(item))
        self.makedir('pictures')
        num = 0
        for i in item:
            num += 1
            imglist = i.find('div', class_='p-img')
            print(num)
            img = imglist.find('img')
            print('This is %s picture' %num)
            if img.get('src'):
                url = 'https:' + img.get('src')
                fileName = img.get('src').split('/')[-1]
                urlretrieve(url, filename=fileName)

            elif img.get('data-lazy-img'):
                url = 'https:' + img.get('data-lazy-img')
                fileName = img.get('data-lazy-img').split('/')[-1]
                urlretrieve(url, filename=fileName)



if __name__ == '__main__':
    picture = Picture()
    for i in range(2): #控制爬取的页数
        picture.get_img(i+1)

免责声明！

本站转载的文章为个人学习借鉴使用，本站对版权不负任何法律责任。如果侵犯了您的隐私权益，请联系本站邮箱yoyou2525@163.com删除。

猜您在找 京东某商品页面的简单爬取 --Pyhon网络爬虫与信息获取分布式爬虫系统设计、实现与实战：爬取京东、苏宁易购全网手机商品数据+MySQL、HBase存储使用Selenium爬取京东电商数据(以手机商品为例) python爬取华为商城所有的手机参数爬取淘宝“手机信息” 网络爬虫之scrapy爬取某招聘网手机APP发布信息 python爬虫：爬取京东商品信息 python爬虫爬取京东商品信息 Python爬虫——爬取网页图片 Python新手爬虫四：爬取视频