python3.5.2爬蟲

本文轉載自查看原文 2016-08-30 21:50 1761 爬蟲/ py/ 3.5.2

話不多說，都在代碼里

#下載斗魚顏值欄目主播照片

#author:ives

#date:2016-8-28 21:58

#e-mail:renhanlinbsl@163.com

import urllib.request

import string

import re

import json

import sys,os

url="http://capi.douyucdn.cn/api/v1/getColumnRoom/8?offset="

urlAfter="&limit=30&client_sys=android"

offset=0

#下載的張數

count=1

#獲取當前腳本路徑

def cur_file_dir():

path=sys.path[0]

if os.path.isdir(path):

return path

elif os.path.isfile(path):

return os.path.dirname(path)

#獲取網頁json

def getHtml(url):

page=urllib.request.urlopen(url)

html=page.read()

#解決編碼問題

return html.decode("UTF-8")

#下載圖片

def downLoadImg(url):

#截取文件名

file=open(downLoadUrl+url.split("/")[-1],'wb')

img=urllib.request.urlopen(url)

buf=img.read()

file.write(buf)

return

#動態生成路徑

def getLink(url):

response=getHtml(url)

#處理中文編碼問題

response = response.encode('latin-1').decode('unicode_escape')

jsonText=json.loads(response)

#獲取所有的數據

test=jsonText["data"]

for i in test:

#獲取圖片路徑

src=i["vertical_src"]

downLoadImg(src)

global count

print("已下載"+str(count)+"張"+src+"-*-"+str(offset))

count=count+1

return

######################################################

#獲取下載目錄

downLoadUrl=cur_file_dir()+"/"

#動態修改offset獲取更多圖片

while(True):

finalUrl=url+str(offset)+urlAfter

getLink(url)

offset=offset+20

聯系我:renhanlinbsl@163.com

2016-8-30

21:49

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 源代碼編譯安裝Python3.5.2 python環境搭建-在Windows上安裝python3.5.2 windows + python3.5.2 + anaconda3 + dlib 安裝配置 Python 3.5.2建立與DB2的連接 ubuntu 16.04.1 LTS python 3.5.2安裝 Centos 6.4 python 2.6 升級到 3.5.2 PyQt5+Python3.5.2-32bit開發環境搭建 Python 爬蟲（一）：爬蟲偽裝 python爬蟲--爬蟲介紹 Python爬蟲教程—爬蟲