YOLOV5——使用 k-means 聚類 anchorbox 數據


訓練的標注數據格式如下:

[
    {
        "name": "235_2_t20201127123021723_CAM2.jpg",
        "image_height": 6000,
        "image_width": 8192,
        "category": 5,
        "bbox": [
            1876.06,
            998.04,
            1883.06,
            1004.04
        ]
    },
    {
        "name": "235_2_t20201127123021723_CAM2.jpg",
        "image_height": 6000,
        "image_width": 8192,
        "category": 5,
        "bbox": [
            1655.06,
            1094.04,
            1663.06,
            1102.04
        ]
    }
]

聚類anchorbox只需要 bbox 中的左上角與右下角的 x,y 數據

 

k-means 聚類代碼:

import numpy as np
import json
import os
from PIL import Image


def iou(box, clusters):
    """
   計算 IOU
    param:
        box: tuple or array, shifted to the origin (i. e. width and height)
        clusters: numpy array of shape (k, 2) where k is the number of clusters
    return:
        numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = intersection / (box_area + cluster_area - intersection + 1e-10)

    return iou_


#  計算框的 numpy 數組和 k 個簇之間的平均並集交集(IoU)。
def avg_iou(boxes, clusters):
    """
    param:
        boxes: numpy array of shape (r, 2), where r is the number of rows
        clusters: numpy array of shape (k, 2) where k is the number of clusters
    return:
        average IoU as a single float
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])


# 將所有框轉換為原點。
def translate_boxes(boxes):
    """
    param:
        boxes: numpy array of shape (r, 4)
    return:
    numpy array of shape (r, 2)
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)


# 使用聯合上的交集(IoU)度量計算k均值聚類。
def kmeans(boxes, k, dist=np.median):
    """
    param:
        boxes: numpy array of shape (r, 2), where r is the number of rows
        k: number of clusters
        dist: distance function
    return:
        numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]  # 初始化k個聚類中心(方法是從原始數據集中隨機選k個)

    while True:
        for row in range(rows):
            # 定義的距離度量公式:d(box,centroid)=1-IOU(box,centroid)。到聚類中心的距離越小越好,但IOU值是越大越好,所以使用 1 - IOU,這樣就保證距離越小,IOU值越大。
            distances[row] = 1 - iou(boxes[row], clusters)
        # 將標注框分配給“距離”最近的聚類中心(也就是這里代碼就是選出(對於每一個box)距離最小的那個聚類中心)。
        nearest_clusters = np.argmin(distances, axis=1)
        # 直到聚類中心改變量為0(也就是聚類中心不變了)。
        if (last_clusters == nearest_clusters).all():
            break
        # 更新聚類中心(這里把每一個類的中位數作為新的聚類中心)
        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters


# 獲取圖片寬高
def get_image_width_high(full_image_name):
    image = Image.open(full_image_name)
    image_width, image_high = image.size[0], image.size[1]
    return image_width, image_high


# 讀取 json 文件中的標注數據
def parse_label_json(label_path):
    with open(label_path, 'r') as f:
        label = json.load(f)
    result = []
    for line in label:
        bbox = line['bbox']
        x_label_min, y_label_min, x_label_max, y_label_max = bbox[0], bbox[1], bbox[2], bbox[3]
        # 計算邊框的大小
        width = x_label_max - x_label_min
        height = y_label_max - y_label_min
        assert width > 0
        assert height > 0
        result.append([width, height])
    result = np.asarray(result)
    return result


# 讀取 txt 標注數據文件
def parse_label_txt(label_path):
    all_label = os.listdir(label_path)
    result = []
    for i in range(len(all_label)):
        full_label_name = os.path.join(label_path, all_label[i])
        print(full_label_name)
        # 分離文件名和文件后綴
        label_name, label_extension = os.path.splitext(all_label[i])
        full_image_name = os.path.join(label_path.replace('labels', 'images'), label_name + '.jpg')
        image_width, image_high = get_image_width_high(full_image_name)
        fp = open(full_label_name, mode="r")
        lines = fp.readlines()
        for line in lines:
            array = line.split()
            x_label_min = (float(array[1]) - float(array[3]) / 2) * image_width
            x_label_max = (float(array[1]) + float(array[3]) / 2) * image_width
            y_label_min = (float(array[2]) - float(array[4]) / 2) * image_high
            y_label_max = (float(array[2]) + float(array[4]) / 2) * image_high
            # 計算邊框的大小
            width = x_label_max - x_label_min
            height = y_label_max - y_label_min
            assert width > 0
            assert height > 0
            result.append([round(width, 2), round(height, 2)])
    result = np.asarray(result)

    return result


def get_kmeans(label, cluster_num=9):

    anchors = kmeans(label, cluster_num)
    ave_iou = avg_iou(label, anchors)

    anchors = anchors.astype('int').tolist()

    anchors = sorted(anchors, key=lambda x: x[0] * x[1])

    return anchors, ave_iou


if __name__ == '__main__':
    # 讀取 json 格式的標注數據
    label_path = "tile_round1_train_20201231/train_annos.json"
    label_result = parse_label_json(label_path)

    # 讀取 txt 格式的標注數據
    # label_path = "../image_data/seed/labels/"    # seed/images/ 內是對應圖片文件
    # label_result = parse_label_txt(label_path)

    anchors, ave_iou = get_kmeans(label_result, 9)

    anchor_string = ''
    for anchor in anchors:
        anchor_string += '{},{}, '.format(anchor[0], anchor[1])
    anchor_string = anchor_string[:-2]

    print(f'anchors are: {anchor_string}')
    print(f'the average iou is: {ave_iou}')

  

  

每次運行的結果都會有點不大一樣

參考:https://blog.csdn.net/zuliang001/article/details/90551798


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM