2021.05.10更新:https://github.com/adezz/fofa-api-search
代碼實現:
# coding=utf-8
import requests
import configparser
import base64
from lxml import etree
class FofaSpider:
def __init__(self, search_keyword, page=5):
self.search_keyword = search_keyword
self.page = page
self.getConfig()
def goSpider(self):
headers = {
"Connection": "keep-alive",
"Cookie": "_fofapro_ars_session=" + self.cookie,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/65.0.3325.181 Safari/537.36 '
}
for i in range(1,self.page + 1):
url = "https://fofa.so/result?result?q=" + str(self.search_keyword,'utf-8') + '&qbase64=' + str(base64.b64encode(self.search_keyword), 'utf-8') + '&page=' + str(i)
resp = requests.get(url=url, headers=headers)
if(resp.status_code == 304):
print("程序停止, 可能的情況是你當前會員與要爬去的頁數不相匹配!")
exit()
print("開始解析...")
#開始解析
with open(__file__[0:-7] + 'fofa_res.txt','a+',encoding='utf-8') as f:
lxml_tree = etree.HTML(resp.content.decode('utf-8'))
url_list = lxml_tree.xpath('//div[@class="list_mod_t"]//a[@target="_blank"]/@href') #正常獲取到的URL
url_list2 = lxml_tree.xpath('//div[@class="list_mod_t"]//div[@class="ip-no-url"]//text()') #不正常獲取到的URL,就是有時候沒有a標簽,類型是ip-no-url
url_list.extend(url_list2)
url_title = lxml_tree.xpath('//ul[@class="list_sx1"]//li[1]//text()')
for url_res, title_res in zip(url_list, url_title):
f.write(url_res.strip().replace('\n', '').replace('\r', '') + " " + title_res.strip().replace('\n', '').replace('\r', '') + '\n')
print(url_res.strip().replace('\n', '').replace('\r', '') + " " + title_res.strip().replace('\n', '').replace('\r', ''))
print("解析結束...")
def getConfig(self):
conf = configparser.ConfigParser()
conf.read(__file__[0:-7] + 'fofa.config') # 讀config.ini文件
self.fofa_key = conf.get('config', 'fofa_key')
self.fofa_email = conf.get('config', 'fofa_email')
self.cookie = conf.get('config','cookie')
if '__main__' == __name__:
search_keyword = input("輸入你要的Fofa搜索語法: ").encode("utf-8")
page = input("輸入你要爬取的頁數(默認為5): ").encode("utf-8")
if(page):
fofa = FofaSpider(search_keyword, int(page))
else:
fofa = FofaSpider(search_keyword)
fofa.goSpider()
效果: