Python 爬取b站专栏图片

本文转载自查看原文 2020-09-16 14:17 432 Python

当olinr学会了爬虫。。。
嘿嘿嘿

import urllib.request as urqt
import urllib.parse as urps
import sys
import os
import re
import shutil
tot = 0
def gethtml(url):
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"}
    res = urqt.Request(url, headers = header)
    html = urqt.urlopen(res).read().decode("utf-8")
    return html
def GetIntoPlace(string):
    os.chdir(r"D:\信息\python\一些成品\b站专栏图片爬虫")
    have = os.listdir()
    if string in have:
        shutil.rmtree(string)
    os.mkdir(string)
    os.chdir(string)
def getpng(url):
    global tot, num
    try:
        res = urqt.urlopen(url).read()
    except BaseException:
        return
    tot += 1
    f = open(str(tot) + '.jpg', 'wb')
    f.write(res)
    f.close()
    print("正在下载第 " + str(tot) + " 张")
    if tot == num:
        sys.exit()
def getans(html):
    key = re.compile('img data-src="//.+?\.jpg')
    have = re.findall(key, html)
    for per in have:
        per = "http:" + per[14:]
        getpng(per)
def work(html):
    key1 = re.compile('a title.+? href=".+?"');
    key2 = re.compile('//.+?"')
    have1 = re.findall(key1, html)
    for i in have1:
        now = "http:" + re.findall(key2, i)[0]
        getans(gethtml(now))
now = input("请输入想要的图片：")
num = int(input("请输入想要爬取的图片数量："))
frm = int(input("请输入爬取起始页码："))
GetIntoPlace(now)
now = urps.quote(now, encoding = "utf-8");
while tot < num:
    url = "https://search.bilibili.com/article?keyword=" + now + "&page=" + str(frm)
    work(gethtml(url))
    frm += 1

免责声明！

本站转载的文章为个人学习借鉴使用，本站对版权不负任何法律责任。如果侵犯了您的隐私权益，请联系本站邮箱yoyou2525@163.com删除。

猜您在找 Python 自动爬取B站视频 python B站弹幕爬取 Python如何实现爬取B站视频 Python爬取b站视频 python 爬虫爬取B站api接口返回的json数据，分页存储csv以及下载图片 b站评论爬取 python爬取b站排行榜 Python爬虫一爬取B站小视频源码 Python实战爬虫——B站封面爬取 python爬虫——爬取B站用户在线人数