import urllib.request
import re
import os
import urllib
from lxml import etree
from lxml.html import fromstring
def get_img(path,urllist):
x = 0 # 声明一个变量赋值
if not os.path.isdir(path):
os.makedirs(path) # 判断没有此路径则创建
paths = path + '\\' # 保存在test路径下
for url in urllist:
print('开始下载', url, 'NUM', x)
try:
urllib.request.urlretrieve(url,'{0}{1}.jpg'.format(paths, x)) # 打开imgList,下载图片到本地
x = x + 1
except IOError:
pass
return imglist
def get_urltext(url, xpag, pos, reg):
page = urllib.request.urlopen(url)
html_1 = page.read()
html_2 = etree.HTML(html_1, etree.HTMLParser())
re1 = re.compile(reg) # 转换成一个正则对象
list1 = re1.findall(html_2.xpath(xpag)[pos])
return list1
url = input("3x3url:")
#print(url)
imgpath = 'https://img001.1fi4b.cn/' + get_urltext(url, '//script/text()', 1, r'chapterPath = "[0-9a-zA-Z\/_]{22}"')[0].split('"')[1]
imglist = get_urltext(url, '//script/text()', 1, r'[0-9a-zA-Z\_]{26}.jpg')
for i,imgvalue in enumerate(imglist):
imglist[i] = imgpath + imgvalue
picpathname = get_urltext(url, '//text()', 8, r'第\d\d话')
save_path = 'E:\\download\\' + picpathname[0] # 设置图片的保存地址
get_img(save_path,imglist)
print('下载完成:',save_path)