YOLOV5——輸出 json 格式預測文件（修改detect.py）

本文轉載自查看原文 2021-01-08 11:08 4305 深度學習

YOLOV5的預測只輸出在測試文件中預測標記的數據，如下圖。如果想要將預測的 json 格式文件輸出該怎么辦呢

json 標注數據文件內容包含：

name：圖片文件名

category：類別id

bbox：目標框信息xyrb格式，分別指[左上角x坐標，左上角y坐標，右下角x坐標，右下角y坐標]

score：預測的分數

[
    {
        "name": "226_46_t20201125133518273_CAM1.jpg",
        "category": 4,
        "bbox": [
            5662,
            2489,
            5671,
            2497
        ],
        "score": 0.130577
    }
]

寫入標記數據信息

content_json = []
# windows下使用
file_name = save_path.split('\\')
# Linux下使用
# file_name = save_path.split('/')
content_dic = {
    "name": file_name[len(file_name)-1],
    "category": (names[int(cls)]),
    "bbox": torch.tensor(xyxy).view(1, 4).view(-1).tolist(),
    "score": conf.tolist()
}
content_json.append(content_dic)

# 將 json 數據寫入文件
with open(os.path.join(Path(out), 'result.json'), 'w') as f:
     json.dump(content_json, f)

完整的detect.py文件

import argparse
import os
import shutil
import time
from pathlib import Path

import json

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import (
    check_img_size, non_max_suppression, apply_classifier, scale_coords,
    xyxy2xywh, plot_one_box, strip_optimizer, set_logging)
from utils.torch_utils import select_device, load_classifier, time_synchronized


def detect(save_img=False):
    # 獲取設置的參數數據
    out, source, weights, view_img, save_txt, imgsz = \
        opt.save_dir, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):  # output dir
        shutil.rmtree(out)  # delete dir
    os.makedirs(out)  # make new dir
    # 如果設備為GPU時， 使用Float16
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model 確保用戶設定的輸入圖片分辨率能整除32(如不能則調整為能整除並返回)
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier 設置第二次分類，默認不使用
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader 通過不同的輸入源來設置不同的數據加載方式
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    # 獲取類別名字
    names = model.module.names if hasattr(model, 'module') else model.names
    # 設置畫框的顏色
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    # 進行一次前向推理,測試程序是否正常
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

    # 輸出json文件
    save_json = True
    content_json = []

    # path 圖片/視頻路徑
    # img 進行resize+pad之后的圖片
    # img0 原size圖片
    # cap 當讀取圖片時為None，讀取視頻時為視頻源
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        # 圖片也設置為Float16
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        # 沒有batch_size的話則在最前面添加一個軸
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        """
        前向傳播 返回pred的shape是(1, num_boxes, 5+num_class)
        h,w為傳入網絡圖片的長和寬，注意dataset在檢測時使用了矩形推理，所以這里h不一定等於w
        num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
        pred[..., 0:4]為預測框坐標
        預測框坐標為xywh(中心點+寬長)格式
        pred[..., 4]為objectness置信度
        pred[..., 5:-1]為分類結果
        """
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        """
        pred:前向傳播的輸出
        conf_thres:置信度閾值
        iou_thres:iou閾值
        classes:是否只保留特定的類別
        agnostic:進行nms是否也去除不同類別之間的框
        經過nms之后，預測框格式：xywh-->xyxy(左上角右下角)
        pred是一個列表list[torch.tensor]，長度為batch_size
        每一個torch.tensor的shape為(num_boxes, 6),內容為box+conf+cls
        """
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        # 添加二次分類，默認不使用
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        # 對每一張圖片作處理
        for i, det in enumerate(pred):  # detections per image
            # 如果輸入源是webcam，則batch_size不為1，取出dataset中的一張圖片
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            # 設置保存圖片/視頻的路徑
            save_path = str(Path(out) / Path(p).name)
            # 設置保存框坐標txt文件的路徑
            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            # 設置打印信息（圖片長寬）
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                # 調整預測框的坐標：基於resize+pad的圖片的坐標-->基於原size圖片的坐標
                # 此時坐標格式為xyxy
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                # 打印檢測到的類別數量
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        # 將xyxy(左上角+右下角)格式轉為xywh(中心點+寬長)格式，並除上w，h做歸一化，轉化為列表再保存
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, conf, *xywh) if opt.save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            # 在原圖上畫框
                            f.write(('%g ' * len(line) + '\n') % line)

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

                    # 輸出 json 文件
                    if save_json:
                        # windows下使用
                        file_name = save_path.split('\\')
                        # Linux下使用
                        # file_name = save_path.split('/')
                        content_dic = {
                            "name": file_name[len(file_name)-1],
                            "category": (names[int(cls)]),
                            "bbox": torch.tensor(xyxy).view(1, 4).view(-1).tolist(),
                            "score": conf.tolist()
                        }
                        content_json.append(content_dic)

            # Print time (inference + NMS)
            # 打印前向傳播時間
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            # 如果設置展示，則show圖片/視頻
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            # 設置保存圖片/視頻
            # if save_img:
            #     if dataset.mode == 'images':
            #         cv2.imwrite(save_path, im0)
            #     else:
            #         if vid_path != save_path:  # new video
            #             vid_path = save_path
            #             if isinstance(vid_writer, cv2.VideoWriter):
            #                 vid_writer.release()  # release previous video writer
            #
            #             fourcc = 'mp4v'  # output video codec
            #             fps = vid_cap.get(cv2.CAP_PROP_FPS)
            #             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            #             h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #             vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
            #         vid_writer.write(im0)

    if save_txt or save_img or save_json:
        print('Results saved to %s' % Path(out))
        # 將 json 數據寫入文件
        with open(os.path.join(Path(out), 'result.json'), 'w') as f:
            json.dump(content_json, f)
    # 打印總時間
    print('Done. (%.3fs)' % (time.time() - t0))


if __name__ == '__main__':
    """
    weights:訓練的權重
    source:測試數據，可以是圖片/視頻路徑，也可以是'0'(電腦自帶攝像頭),也可以是rtsp等視頻流
    output:網絡預測之后的圖片/視頻的保存路徑
    img-size:網絡輸入圖片大小
    conf-thres:置信度閾值
    iou-thres:做nms的iou閾值
    device:設置設備
    view-img:是否展示預測之后的圖片/視頻，默認False
    save-txt:是否將預測的框坐標以txt文件形式保存，默認False
    classes:設置只保留某一部分類別，形如0或者0 2 3
    agnostic-nms:進行nms是否也去除不同類別之間的框，默認False
    augment:推理的時候進行多尺度，翻轉等操作(TTA)推理
    update:如果為True，則對所有模型進行strip_optimizer操作，去除pt文件中的優化器等信息，默認為False
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default='best.pt', help='model.pt path(s)')
    parser.add_argument('--source', type=str, default='../tile/testA_imgs', help='source')  # file/folder, 0 for webcam
    parser.add_argument('--img-size', type=int, default=1600, help='inference size (pixels)')
    parser.add_argument('--conf-thres', type=float, default=0.1, help='object confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='display results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-dir', type=str, default='detect_img/output', help='directory to save results')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')

    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--update', action='store_true', help='update all models')
    opt = parser.parse_args()
    print(opt)

    with torch.no_grad():
        if opt.update:  # update all models (to fix SourceChangeWarning)
            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
                detect()
                # 去除pt文件中的優化器等信息
                strip_optimizer(opt.weights)
        else:
            detect()

代碼注釋參考：https://blog.csdn.net/weixin_44152895/article/details/110009680

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python detect.py YOLOV5源碼解讀-export.py網絡結構、配置文件 yolov5的yaml文件解析 YOLOV5——將 json 格式的標注數據轉化為 YOLO 需要的 txt 格式 YOLOV5——將 json 格式的標注數據轉化為 YOLO 需要的 txt 格式《深度學習之kaggle》：四、字符分類-標簽格式轉為YOLOV5 yolov5 環境配置 yolov5單圖片檢測 yolov5的安裝與使用 yolov5安裝教程