從百度圖片批量獲取自己想要的圖片

本文轉載自查看原文 2018-06-27 10:56 2014 Python

# -*- coding: utf-8 -*-
# @Time    : 19-1-10 下午9:44
# @Author  : Felix Wang

import re
import requests
import json
import random
from multiprocessing import Pool


def translate(content, tolang='zh', fromlang=None):
    User_Agent = [
        'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36',
        'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36',
        'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
    ]
    url = 'https://fanyi.baidu.com/basetrans'

    headers = {
        'User-Agent': random.choice(User_Agent)
    }
    datas = {
        'query': content,
    }
    # 自動獲取語言類型
    if not fromlang:
        fromlang = json.loads(requests.post('https://fanyi.baidu.com/langdetect', data=datas, headers=headers).text)[
            'lan']
    # print(fromlang)
    data = {
        'from': fromlang,
        'to': tolang,
        'query': content,

    }

    try:
        res = requests.post(url=url, data=data, headers=headers)
        # print(res.text)
        result = json.loads(res.text)
        return result['trans'][0]['dst']
    except Exception as e:
        print('翻譯出錯')
        print(e)


'''
zh    中文
en    英語
yue    粵語
wyw    文言文
jp    日語
kor    韓語
fra    法語
spa    西班牙語
th    泰語
ara    阿拉伯語
ru    俄語
pt    葡萄牙語
de    德語
it    意大利語
el    希臘語
nl    荷蘭語
pl    波蘭語
bul    保加利亞語
est    愛沙尼亞語
dan    丹麥語
fin    芬蘭語
cs    捷克語
rom    羅馬尼亞語
slo    斯洛文尼亞語
swe    瑞典語
hu    匈牙利語
cht    繁體中文
vie    越南語
'''


# 創建文件夾
def mkdir(path):
    # 引入模塊
    import os
    # 去除首位空格
    path = path.strip()
    # 去除尾部 \ 符號
    path = path.rstrip("\\")
    # 判斷路徑是否存在
    # 存在     True
    # 不存在   False
    isExists = os.path.exists(path)
    # 判斷結果
    if not isExists:
        # 如果不存在則創建目錄
        # 創建目錄操作函數
        os.makedirs(path)
        print(path + ' 創建成功')
        return True
    else:
        # 如果目錄存在則不創建，並提示目錄已存在
        print(path + ' 目錄已存在')
        return False


# 訪問百度圖片獲取信息
def get_data(name, pn):
    keys = {
        'tn': 'baiduimage',
        'word': str(name),
        'pn': str(pn),  # 從0開始30的倍數
        'rn': '30',
    }
    baseurl = 'https://image.baidu.com/search/index'

    response = requests.get(baseurl, params=keys)

    return response
    # with open('a.html', 'wb')as f:
    #     f.write(response.content)


# 獲取圖片地址
def get_img_url(response):
    p = re.compile('"thumbURL":"(.*?jpg)"', re.S)
    urls = p.findall(str(response.text))
    print(urls)
    return urls


# 下載一張圖片
def get_one_img(url, img_path, img_name):
    content = requests.get(url).content
    with open('{}/{}.jpg'.format(img_path, img_name), 'wb') as f:
        print(img_name + '下載成功')
        f.write(content)


# 獲取某路徑下某擴展名的詳情信息(文件個數和文件名)
def get_file_count(path, type):
    """
    :param path: 文件夾路徑
    :param type: 文件擴展名
    :return: 返回一個字典，counts表示文件個數，filenames表示所有文件的文件名
    """
    import os.path
    dir = path
    m = 0
    files = []
    for parentdir, dirname, filenames in os.walk(dir):
        for filename in filenames:
            # print(filename)
            files.append(filename)
            if os.path.splitext(filename)[1] == type:
                m = m + 1
    # print(m)
    return {'counts': m, 'filenames': files}


# 獲取所需的圖片
def get_needs_imgs(imgs_needs, img_type):
    tran_img_type = str(translate(img_type, 'en')).lower()  # 翻譯完之后所有字母小寫

    img_path = 'imgs/' + tran_img_type  # 圖片存儲路徑
    mkdir(img_path)  # 創建文件夾

    img_pg = 0  # 30的倍數
    while True:
        files_details = get_file_count(img_path, '.jpg')  # 查看當前目錄下已經有多少圖片
        count = files_details['counts']  # 獲取指定路徑下有多少文件
        count2 = count
        if count2 >= imgs_needs:
            print('指定文件夾下已經有{}張圖片了'.format(str(imgs_needs)))
            break
        res = get_data(img_type, img_pg)
        urls = get_img_url(res)
        for url in urls:
            try:
                get_one_img(url, img_path, tran_img_type + str(count2))
                count2 = count2 + 1
            except Exception as e:
                pass
            if count2 >= imgs_needs:
                break
        img_pg += 30


# 主要是為了多進程
def main(img_type):
    get_needs_imgs(imgs_needs=imgs_needs, img_type=img_type)


imgs_needs = 800  # 需要多少張圖片
img_types = ['汽車','兔子','吉他','房子']  # 需要什么圖片
# get_needs_imgs(imgs_needs, img_type)


if __name__ == '__main__':
    # 使用多進程
    pool = Pool()
    pool.map(main, img_types)

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python爬蟲——批量下載百度圖片 (二)批量下載百度網站圖片百度獲取圖片 json格式解析百度編輯器批量上傳圖片自動排版從百度地圖API接口批量獲取地點的經緯度調取百度地圖接口，實現取自己的實時位置，然后可以在百度地圖上添加信息標注爬蟲下載百度貼吧圖片如何使用百度圖片搜索API 百度api識別圖片文字百度WebUploader上傳圖片