python3 fofa爬取類

本文轉載自查看原文 2020-03-31 17:27 1202 python

2021.05.10更新：https://github.com/adezz/fofa-api-search

代碼實現：

# coding=utf-8

import requests
import configparser
import base64
from lxml import etree

class FofaSpider:

    def __init__(self, search_keyword, page=5):
        self.search_keyword = search_keyword
        self.page = page
        self.getConfig()

    def goSpider(self):
        headers = {
            "Connection": "keep-alive",
            "Cookie": "_fofapro_ars_session=" + self.cookie,
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/65.0.3325.181 Safari/537.36 '
        }

        for i in range(1,self.page + 1):
            url = "https://fofa.so/result?result?q=" + str(self.search_keyword,'utf-8') + '&qbase64=' + str(base64.b64encode(self.search_keyword), 'utf-8') + '&page=' + str(i)
            resp = requests.get(url=url, headers=headers)


            if(resp.status_code == 304):
                print("程序停止， 可能的情況是你當前會員與要爬去的頁數不相匹配!")
                exit()

            print("開始解析...")
            #開始解析
            with open(__file__[0:-7] + 'fofa_res.txt','a+',encoding='utf-8') as f:
                lxml_tree = etree.HTML(resp.content.decode('utf-8'))
                url_list = lxml_tree.xpath('//div[@class="list_mod_t"]//a[@target="_blank"]/@href') #正常獲取到的URL
                url_list2 = lxml_tree.xpath('//div[@class="list_mod_t"]//div[@class="ip-no-url"]//text()') #不正常獲取到的URL，就是有時候沒有a標簽，類型是ip-no-url
                url_list.extend(url_list2)
                url_title = lxml_tree.xpath('//ul[@class="list_sx1"]//li[1]//text()')
                for url_res, title_res in zip(url_list, url_title):
                    f.write(url_res.strip().replace('\n', '').replace('\r', '') + "    " + title_res.strip().replace('\n', '').replace('\r', '') + '\n')
                    print(url_res.strip().replace('\n', '').replace('\r', '') + "   " + title_res.strip().replace('\n', '').replace('\r', ''))
            print("解析結束...")

    def getConfig(self):
        conf = configparser.ConfigParser()
        conf.read(__file__[0:-7] + 'fofa.config')  # 讀config.ini文件
        self.fofa_key = conf.get('config', 'fofa_key')
        self.fofa_email = conf.get('config', 'fofa_email')
        self.cookie = conf.get('config','cookie')


if '__main__' == __name__:
    search_keyword = input("輸入你要的Fofa搜索語法: ").encode("utf-8")
    page = input("輸入你要爬取的頁數(默認為5): ").encode("utf-8")
    if(page):
        fofa = FofaSpider(search_keyword, int(page))
    else:
        fofa = FofaSpider(search_keyword)
    fofa.goSpider()

效果：

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 FOFA鏈接爬蟲爬取fofa spider 利用Python爬取fofa網頁端數據 python3爬取淘寶商品(失效) python3 爬取知乎模擬登錄 Python3——爬取淘寶評論 python3爬取高清壁紙(1) python3爬蟲爬取動漫視頻 python3爬取1024圖片 python3爬取全民K歌 python3爬取拉鈎招聘數據