因為一些工作原因需要用到安全設備掃描器的漏洞數據,但是安全設備掃描器本身導出的漏洞數據是加密的,所以只能是使用爬蟲進行爬取。
代碼如下:
# -*-coding:utf-8 -*-
import requests, re
import sys
from bs4 import BeautifulSoup
import re,sys,os
import xlsxwriter
from xlrd import open_workbook
from xlutils.copy import copy
put_name = 'loudong'
def login(login_url, username, password):
# 請求頭
my_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
'Origin': 'https://10.10.10.10',
'Referer': 'https://10.10.10.10/accounts/login_view/'
}
# 獲取token
sss = requests.Session()
r = sss.get(url='https://10.10.10.10/accounts/login/', headers=my_headers, verify=False)
# <input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="mvTgwjCx1iTzAdRROOPvk8YctcbO9uXV">'
pattern = re.compile(r'<input type=\'hidden\' name=\'csrfmiddlewaretoken\' value="(.*)">')
result = pattern.findall(r.text)
token = result[0]
# postdata
my_data = {
# 'commit' : '登錄',
'username': username,
'password': password,
'csrfmiddlewaretoken': token
}
# 登錄后k
r = sss.post(login_url,headers=my_headers,data=my_data,verify=False)
#print(r.text)
return sss
def get_date(url,sss):
my_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4',
'Origin': 'https://10.10.10.10',
'Referer': 'https://10.10.10.10/template/show_template?temp_id=12&temp_name=%E5%85%A8%E9%83%A8%E6%BC%8F%E6%B4%9E%E6%89%AB%E6%8F%8F&temp_desc=%E6%9C%AC%E6%A8%A1%E6%9D%BF%E6%89%AB%E6%8F%8F%E6%89%80%E6%9C%89%E6%8F%92%E4%BB%B6&vlun_count_allundefined'
}
my_data = {
'val': 'System',
'temp_id': '12',
'conditions': 'is_dangerous =',
'op_type':'showStemp'
}
r = sss.get(url, headers=my_headers, data=my_data, verify=False,timeout=5)
#判斷頁面是否為空
if r.apparent_encoding == 'utf-8':
pass
CVE_id = ''
cvss_score=''
CNCVE_id=''
risk_score=''
print('頁面不為空....')
else:
print(url,'--> !!頁面為空 ')
return
soup=BeautifulSoup(r.text,"html.parser")
#class 為 odd
tables=soup.find_all('tr',class_='odd')
#print(tables)
for i,env in enumerate(tables):
#漏洞名稱
if i == 0:
leak_name=env.get_text()
#解決方法
if i == 1:
str_env=env.get_text()
solution=str_env[5:]
solution =solution.replace('\n','')
# #危險插件
# if i ==2:
# str_env=env.get_text()
# danger_plug=str_env[6]
#CVE編號
if i == 3:
str_env = env.get_text()
CVE_id=str_env[7:]
#CVSS評分
if i == 5:
str_env = env.get_text()
cvss_score=str_env[7:]
#class 為even
tables2=soup.find_all('tr',class_='even')
for i,env in enumerate(tables2):
#漏洞描述
if i ==0:
str_env=env.get_text()
leak_desc=str_env[6:].strip()
leak_desc=leak_desc.replace('\n', '')
#危險分值
if i ==1:
str_env=env.get_text()
risk_score=str_env[5:]
#發現日期
if i ==2:
str_env=env.get_text()
data_discovery=str_env[5:]
#CNCVE編號
if i ==3:
str_env=env.get_text()
CNCVE_id=str_env[9:]
#CNVD
tables3 = soup.find_all('td')
#print(tables3)
cnvd_id=''
if "CNVD" in tables3[-1].get_text():
cnvd_id=tables3[-1].get_text()
else:
pass
cnnvd_id=''
if "CNNVD" in tables3[-6].get_text():
cnnvd_id=tables3[-6].get_text()
else:
pass
print('數據返回---> succcess')
log_file(url)
return leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score
def w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score):
if not os.path.exists(put_name+".xls"):
workbook = xlsxwriter.Workbook(put_name+".xls") # 建立
worksheet = workbook.add_worksheet('employee')
workbook.close()
else:
r_xls = open_workbook(put_name+".xls") # 讀取excel文件
row = r_xls.sheets()[0].nrows # 獲取已有的行數
excel = copy(r_xls) # 將xlrd的對象轉化為xlwt的對象
table = excel.get_sheet(0) # 獲取要操作的sheet
# 對excel表追加一行內容
table.write(row, 0, leak_name)
table.write(row, 1, solution)
table.write(row, 2, CVE_id)
table.write(row, 3, cvss_score)
table.write(row, 4, leak_desc)
table.write(row, 5, data_discovery)
table.write(row, 6, CNCVE_id)
table.write(row, 7, cnvd_id)
table.write(row, 8, cnnvd_id)
table.write(row, 8, risk_score)
excel.save(put_name+".xls")
def e_file(str_f):
f=open('error.txt','a+')
str_f=str(str_f)
f.write(str_f+'\n')
f.close()
def log_file(str_f):
f=open('w_file.txt','a+')
str_f=str(str_f)
f.write(str_f+'\n')
f.close()
if __name__ == '__main__':
login_success = login("https://10.10.10.10/accounts/login_view/", "username", "password")
for i in range(50000,60000):
url = "https://10.10.10.10/template/show_vul_desc?id=%s"%(i)
#url='https://10.10.10.10/template/show_vul_desc?id=50123'
try:
leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score=get_date(url,login_success)
# #print(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id)
w_file(leak_name,solution,CVE_id,cvss_score,leak_desc,data_discovery,CNCVE_id,cnvd_id,cnnvd_id,risk_score)
except Exception as e:
e_f=url+str(e)
e_file(e_f)
print(url,e)
else:
print(url,"爬取結束end")
總結:在進行登錄時遇到了token的問題,想了很長時間才解決,但是由於漏洞信息頁的頁碼沒有規律,所以只能是窮舉了。。
代碼本身還有很大的優化空間,以后有時間再完善。