about

Web攻防中一個非常關鍵的技術就是Web目錄的掃描。
目錄掃描可以讓我們發現這個網站存在多少個目錄，多少個頁面，探索出網站的整體結構。通過目錄掃描我們還能掃描敏感文件，后台文件，數據庫文件，和信息泄露文件，等等。
目錄遍歷（路徑遍歷）是由於web服務器或者web應用程序對用戶輸入的文件名稱的安全性驗證不足而導致的一種安全漏洞，使得攻擊者通過利用一些特殊字符就可以繞過服務器的安全限制，訪問任意的文件（可以是web根目錄以外的文件），甚至執行系統命令。

漏洞原理：
程序在實現上沒有充分過濾用戶輸入的/之類的目錄跳轉符，導致惡意用戶可以通過提交目錄跳轉來遍歷服務器上的任意文件。

掃描工具：
可以使用相關的工具，如御劍掃描、Dirbuster等，也可以去GitHub上尋找相關資源。

目錄遍歷原理：
通過收集大量的常見網站網址，拼接目標服務器，得到一個url，然后對該url進行訪問，訪問成功則說明該網站具有該路徑，那么該路徑就可能存在安全漏洞。
采用這種撞庫的原理進行掃描目標服務器，得到一個可能存在漏洞的url列表。

Python代碼實現

撞庫文件鏈接：https://pan.baidu.com/s/158brUNIoZ7f4h5HoUnCoFA 提取碼：t3lh

#!/bin/bash
# -*- coding: utf-8 -*-
# @Time    : 2020/12/8 10:02
# @Author  : 張開
# File      : 01 web目錄掃描.py

import time
import threading
import argparse
import requests
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import cpu_count


class Work(object):

    def __init__(self, args):
        self.args = args
        self.lock = threading.Lock()

    def _read_file(self):
        """  讀取撞庫文件 """
        with open(file=self.args.scan_dict, mode='r', encoding='utf-8') as f:
            return f.readlines()

    def _write_file(self, content):
        """ 將撞庫成功的url寫入到文件中 """
        self.lock.acquire()
        with open(file=self.args.scan_output, mode='a', encoding='utf-8') as f:
            f.write(content)
        self.lock.release()

    def _send_msg(self, line):
        """ 整理url並發送請求 """
        url = "{}{}".format(self.args.scan_site, line) if "://" in self.args.scan_site else "{}{}{}".format("http://",
                                                                                                            self.args.scan_site,
                                                                                                            line)
        try:
            response = requests.get(url=url, timeout=60, allow_redirects=False)
            if response.status_code == 200:
                self._write_file('{}\n'.format(response.url))
                print(response.url, response.status_code)
        except Exception as e:
            print(e, url)

    def run(self):
        while True:
            try:
                for i in self._read_file():
                    t = threading.Thread(target=self._send_msg, args=(i,))
                    t.start()
            except KeyboardInterrupt:
                print('\n[WARNING] 請等待所有的子線程執行結束 當前線程數為%d' % threading.activeCount())
                break


if __name__ == '__main__':
    parse = argparse.ArgumentParser()
    parse.add_argument('--site', dest='scan_site', help='要掃描的服務器', type=str)
    parse.add_argument('--dict', dest='scan_dict', help="撞庫文件", default='dict.txt', type=str)
    parse.add_argument('--output', dest='scan_output', help="存儲撞庫成功的路徑", default='./output.txt', type=str)
    parse.add_argument('--thread', dest='thread_num', help='設置線程數量', default=cpu_count() * 5, type=int)

    obj = Work(args=parse.parse_args())
    obj.run()
"""
D:\tmp>python "01 web目錄掃描.py" --site www.baidu.com
http://www.baidu.com/#%E8%84%9A%E6%9C%AC%E8%AF%86%E5%88%AB 200
http://www.baidu.com/robots.txt 200
http://www.baidu.com/index.htm 200
http://www.baidu.com/index.html 200
http://www.baidu.com/index.php 200
"""

或者參考:

另一個版本

import time
import queue
import argparse
import requests
import threading

class Dirscan(object):

	def __init__(self, scanSite, scanDict, scanOutput,threadNum):
		print('Dirscan is running!')
		# 三運運算符   https://www.oldboyedu.com/ 
		self.scanSite = 'http://%s' % scanSite if scanSite.find('://') == -1 else scanSite
		print('Scan target:',self.scanSite,"<=====>")
		
		# 把dict/dict.txt賦值給scanDict
		self.scanDict = scanDict
		# 創建數據文件 https://www.oldboyedu.com => www.oldboyedu.com + ".txt"
		self.scanOutput = scanSite.rstrip('/').replace('https://', '').replace('http://', '')+'.txt' if scanOutput == 0 else scanOutput
		# 指定並發線程的數量
		self.threadNum = threadNum # 60
		# 創建線程鎖 (防止數據錯亂,保證數據安全,所以需要線程鎖)
		self.lock = threading.Lock()
		# 偽造請求頭 (假裝不是爬蟲,是正常的瀏覽器訪問)
		self._loadHeaders()
		# 把dict文件夾中dict文件讀取出來
		self._loadDict(self.scanDict)
		# 判斷鍵盤是不是使用ctrl+c進行鍵盤終止
		self.STOP_ME = False
		
	def _loadHeaders(self):
		self.headers = {
			'Accept': '*/*',  # 接受所有種類的數據類型
			'Referer': 'http://www.baidu.com',  # 請求該網站的源頭網址 
			'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36', # 標注瀏覽器內核(偽造是瀏覽器請求的不是爬蟲)
			'Cache-Control': 'no-cache',    # 不使用緩存
		}

	def _loadDict(self, dict_list):
		# 創建隊列
		self.q = queue.Queue()
		# 把dict.txt里面的所有路徑拿出來  dict_list = dict/dict.txt
		with open(dict_list ,mode="r", encoding="utf-8") as fp:
			for line in fp:
				# 如果不是注釋
				if line[0] != '#':
					# 把數據添加到線程的隊列里
					self.q.put(line.strip())

		# q.qsize是隊列長度
		if self.q.qsize() > 0:
			print('Total Dictionary:',self.q.qsize())
		# 如果隊列里面的數據已經沒有了,證明線程已經處理完所有要掃描的路徑;
		else:
			print('Dict is Null ???')
			# 退出
			quit()

	def _writeOutput(self, result):
		# 上鎖
		self.lock.acquire()
		with open(self.scanOutput, mode = 'a+',encoding="utf-8") as f:
			f.write(result + '\n')
		# 解鎖
		self.lock.release()

	def _scan(self, url):
		html_result = None
		try:
			# allow_redirects=False 參數,禁用重定向處理 timeout = 60 限定最大超時時間為60秒
			html_result = requests.get(url, headers=self.headers, allow_redirects=False, timeout=60)
		# 如果發生鏈接失敗的報錯
		except requests.exceptions.ConnectionError:
			print('Request Timeout:%s' % url)
		finally:
			if html_result.status_code == 200 and html_result is not None:
				# 狀態碼 , html_result.url
				# [200]https://www.oldboyedu.com//about_21.shtml.bak
				print('[%d]%s' % (html_result.status_code, html_result.url))
				self._writeOutput('[%d]%s' % (html_result.status_code, html_result.url))
				
	def run(self):
		# q.empty()判斷是否為空
		while not self.q.empty() and self.STOP_ME == False:
			# https://www.oldboyedu.com/ + /index.html =>  https://www.oldboyedu.com//index.html
			url = self.scanSite + self.q.get()
			self._scan(url)

if __name__ == '__main__':
	# ### part1 封裝參數
	parser = argparse.ArgumentParser()
	# 必要參數  要瀏覽的網站
	parser.add_argument('scanSite', help="The website to be scanned", type=str)
	# 可選參數  要匹配的目錄原文件
	parser.add_argument('-d', '--dict'  , dest="scanDict"  , help="要瀏覽的目錄", type=str,   default="dict/dict.txt")
	# 可選參數  要保存的文件
	parser.add_argument('-o', '--output', dest="scanOutput", help="保存的文件"    , type=str, default=0)
	# 設置線程的數量 
	parser.add_argument('-t', '--thread', dest="threadNum" , help="設置線程的數量", type=int, default=60)
	# 返回參數對象
	args = parser.parse_args()
	
	
	# ### part2 實例化對象 (自動觸發init構造方法) 
	# scanSite = https://www.oldboyedu.com/   scanDict = dict/dict.txt   scanOutput = 0  threadNum = 60
	scan = Dirscan(args.scanSite, args.scanDict, args.scanOutput, args.threadNum)	

	# ### part3 創建60個子線程,執行run這個任務
	for i in range(args.threadNum):
		t = threading.Thread(target=scan.run)
		t.start()

	# ### part4 判斷是否存在鍵盤終止的異常.
	# threading.activeCount() 統計當前活躍線程的數量是多少
	while True:
		if threading.activeCount() <= 1 :
			break
		else:
			try:
				# 爬的太快容易封ip,適當速度減慢
				time.sleep(0.1)
			# KeyboardInterrupt 在ctrl +c 的時候觸發報的錯誤是KeyboardInterrupt
			except KeyboardInterrupt:
				print('\n[WARNING] 請等待所有的子線程執行結束 當前線程數為%d' % threading.activeCount())
				scan.STOP_ME = True
	
	print('Scan end!!!')

that's all, see also:

動手實現簡易網站目錄掃描器——WebScanner | Web滲透之信息收集——目錄掃描從御劍到Dirbuster

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 滲透測試之目錄掃描-Dirbuster 滲透測試工具篇之目錄掃描工具dirmap web滲透——nmap掃描滲透測試之弱點掃描小白日記29：kali滲透測試之Web滲透-掃描工具-Vega 小白日記31：kali滲透測試之Web滲透-掃描工具-Arachni 小白日記28：kali滲透測試之Web滲透-掃描工具-Nikto 小白日記30：kali滲透測試之Web滲透-掃描工具-Skipfish 小白日記32：kali滲透測試之Web滲透-掃描工具-QWASP_ZAP 小白日記33：kali滲透測試之Web滲透-掃描工具-Burpsuite（一）