# -*- coding:utf-8 -*- from bs4 import BeautifulSoup as BS import urllib.request as rqst import os
url = 'http://xxxxxxx'
headers = {'User-Agent': 'xxxxxx(這個網上隨便找一個都可以)','Accept-Encoding':'utf-8'}
r = rqst.Request(url, headers=headers)
html = rqst.urlopen(url) #網頁用bs解析 bs = BS(req, 'lxml') #獲取css,js,img文件的路由 elc = bs.find_all('link', type='text/css') elj = bs.find_all('script') eli = bs.find_all('img') #保存css,js,img文件
for c in elc:
url = c['href'] #如果href不完整需要自己調整,下面的一樣
file = url.split('/')[-1] #獲取文件名
if(os.path.exists (file)==False):
try:
res = rqst.urlopen(url)
txt = res.read()
with open(file, 'wt', encoding='utf-8') as f:
f.write(txt)
f.close()
except Exception:
pass
for j in elj:
if(i.has_attr('src')):
url = j['src']
file = url.split('/')[-1]
if(os.path.exists(file)==False):
try:
res = rqst.urlopen(url)
txt = res.read()
with open(file, 'wt', encoding='utf-8') as f:
f.write(txt)
f.close()
except Exception:
pass
for i in eli:
url = i['src']
url = 'http://www.fmhhqb.com'+url
file = url.split('/')[-1]
if(os.path.exists(file)==False):
try:
r = getRequest(url)
res = rqst.urlopen(r)
txt = res.read()
with open(file, 'wb') as f:
f.write(txt)
f.close()
except Exception:
pass