2021.05.10更新:https://github.com/adezz/fofa-api-search
代码实现:
# coding=utf-8
import requests
import configparser
import base64
from lxml import etree
class FofaSpider:
def __init__(self, search_keyword, page=5):
self.search_keyword = search_keyword
self.page = page
self.getConfig()
def goSpider(self):
headers = {
"Connection": "keep-alive",
"Cookie": "_fofapro_ars_session=" + self.cookie,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/65.0.3325.181 Safari/537.36 '
}
for i in range(1,self.page + 1):
url = "https://fofa.so/result?result?q=" + str(self.search_keyword,'utf-8') + '&qbase64=' + str(base64.b64encode(self.search_keyword), 'utf-8') + '&page=' + str(i)
resp = requests.get(url=url, headers=headers)
if(resp.status_code == 304):
print("程序停止, 可能的情况是你当前会员与要爬去的页数不相匹配!")
exit()
print("开始解析...")
#开始解析
with open(__file__[0:-7] + 'fofa_res.txt','a+',encoding='utf-8') as f:
lxml_tree = etree.HTML(resp.content.decode('utf-8'))
url_list = lxml_tree.xpath('//div[@class="list_mod_t"]//a[@target="_blank"]/@href') #正常获取到的URL
url_list2 = lxml_tree.xpath('//div[@class="list_mod_t"]//div[@class="ip-no-url"]//text()') #不正常获取到的URL,就是有时候没有a标签,类型是ip-no-url
url_list.extend(url_list2)
url_title = lxml_tree.xpath('//ul[@class="list_sx1"]//li[1]//text()')
for url_res, title_res in zip(url_list, url_title):
f.write(url_res.strip().replace('\n', '').replace('\r', '') + " " + title_res.strip().replace('\n', '').replace('\r', '') + '\n')
print(url_res.strip().replace('\n', '').replace('\r', '') + " " + title_res.strip().replace('\n', '').replace('\r', ''))
print("解析结束...")
def getConfig(self):
conf = configparser.ConfigParser()
conf.read(__file__[0:-7] + 'fofa.config') # 读config.ini文件
self.fofa_key = conf.get('config', 'fofa_key')
self.fofa_email = conf.get('config', 'fofa_email')
self.cookie = conf.get('config','cookie')
if '__main__' == __name__:
search_keyword = input("输入你要的Fofa搜索语法: ").encode("utf-8")
page = input("输入你要爬取的页数(默认为5): ").encode("utf-8")
if(page):
fofa = FofaSpider(search_keyword, int(page))
else:
fofa = FofaSpider(search_keyword)
fofa.goSpider()
效果: