YOLOV5的預測只輸出在測試文件中預測標記的數據,如下圖。如果想要將預測的 json 格式文件輸出該怎么辦呢
json 標注數據文件內容包含:
name:圖片文件名
category:類別id
bbox:
目標框信息xyrb格式,分別指[左上角x坐標,左上角y坐標,右下角x坐標,右下角y坐標]
score:預測的分數
[ { "name": "226_46_t20201125133518273_CAM1.jpg", "category": 4, "bbox": [ 5662, 2489, 5671, 2497 ], "score": 0.130577 } ]
寫入標記數據信息
content_json = [] # windows下使用 file_name = save_path.split('\\') # Linux下使用 # file_name = save_path.split('/') content_dic = { "name": file_name[len(file_name)-1], "category": (names[int(cls)]), "bbox": torch.tensor(xyxy).view(1, 4).view(-1).tolist(), "score": conf.tolist() } content_json.append(content_dic) # 將 json 數據寫入文件 with open(os.path.join(Path(out), 'result.json'), 'w') as f: json.dump(content_json, f)
完整的detect.py文件
import argparse import os import shutil import time from pathlib import Path import json import cv2 import torch import torch.backends.cudnn as cudnn from numpy import random from models.experimental import attempt_load from utils.datasets import LoadStreams, LoadImages from utils.general import ( check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, plot_one_box, strip_optimizer, set_logging) from utils.torch_utils import select_device, load_classifier, time_synchronized def detect(save_img=False): # 獲取設置的參數數據 out, source, weights, view_img, save_txt, imgsz = \ opt.save_dir, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): # output dir shutil.rmtree(out) # delete dir os.makedirs(out) # make new dir # 如果設備為GPU時, 使用Float16 half = device.type != 'cpu' # half precision only supported on CUDA # Load model 確保用戶設定的輸入圖片分辨率能整除32(如不能則調整為能整除並返回) model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier 設置第二次分類,默認不使用 classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader 通過不同的輸入源來設置不同的數據加載方式 vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # 獲取類別名字 names = model.module.names if hasattr(model, 'module') else model.names # 設置畫框的顏色 colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() # 進行一次前向推理,測試程序是否正常 img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once # 輸出json文件 save_json = True content_json = [] # path 圖片/視頻路徑 # img 進行resize+pad之后的圖片 # img0 原size圖片 # cap 當讀取圖片時為None,讀取視頻時為視頻源 for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) # 圖片也設置為Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # 沒有batch_size的話則在最前面添加一個軸 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() """ 前向傳播 返回pred的shape是(1, num_boxes, 5+num_class) h,w為傳入網絡圖片的長和寬,注意dataset在檢測時使用了矩形推理,所以這里h不一定等於w num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8 pred[..., 0:4]為預測框坐標 預測框坐標為xywh(中心點+寬長)格式 pred[..., 4]為objectness置信度 pred[..., 5:-1]為分類結果 """ pred = model(img, augment=opt.augment)[0] # Apply NMS """ pred:前向傳播的輸出 conf_thres:置信度閾值 iou_thres:iou閾值 classes:是否只保留特定的類別 agnostic:進行nms是否也去除不同類別之間的框 經過nms之后,預測框格式:xywh-->xyxy(左上角右下角) pred是一個列表list[torch.tensor],長度為batch_size 每一個torch.tensor的shape為(num_boxes, 6),內容為box+conf+cls """ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier # 添加二次分類,默認不使用 if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections # 對每一張圖片作處理 for i, det in enumerate(pred): # detections per image # 如果輸入源是webcam,則batch_size不為1,取出dataset中的一張圖片 if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s # 設置保存圖片/視頻的路徑 save_path = str(Path(out) / Path(p).name) # 設置保存框坐標txt文件的路徑 txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') # 設置打印信息(圖片長寬) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size # 調整預測框的坐標:基於resize+pad的圖片的坐標-->基於原size圖片的坐標 # 此時坐標格式為xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印檢測到的類別數量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file # 將xyxy(左上角+右下角)格式轉為xywh(中心點+寬長)格式,並除上w,h做歸一化,轉化為列表再保存 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, conf, *xywh) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: # 在原圖上畫框 f.write(('%g ' * len(line) + '\n') % line) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # 輸出 json 文件 if save_json: # windows下使用 file_name = save_path.split('\\') # Linux下使用 # file_name = save_path.split('/') content_dic = { "name": file_name[len(file_name)-1], "category": (names[int(cls)]), "bbox": torch.tensor(xyxy).view(1, 4).view(-1).tolist(), "score": conf.tolist() } content_json.append(content_dic) # Print time (inference + NMS) # 打印前向傳播時間 print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results # 如果設置展示,則show圖片/視頻 if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) # 設置保存圖片/視頻 # if save_img: # if dataset.mode == 'images': # cv2.imwrite(save_path, im0) # else: # if vid_path != save_path: # new video # vid_path = save_path # if isinstance(vid_writer, cv2.VideoWriter): # vid_writer.release() # release previous video writer # # fourcc = 'mp4v' # output video codec # fps = vid_cap.get(cv2.CAP_PROP_FPS) # w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) # vid_writer.write(im0) if save_txt or save_img or save_json: print('Results saved to %s' % Path(out)) # 將 json 數據寫入文件 with open(os.path.join(Path(out), 'result.json'), 'w') as f: json.dump(content_json, f) # 打印總時間 print('Done. (%.3fs)' % (time.time() - t0)) if __name__ == '__main__': """ weights:訓練的權重 source:測試數據,可以是圖片/視頻路徑,也可以是'0'(電腦自帶攝像頭),也可以是rtsp等視頻流 output:網絡預測之后的圖片/視頻的保存路徑 img-size:網絡輸入圖片大小 conf-thres:置信度閾值 iou-thres:做nms的iou閾值 device:設置設備 view-img:是否展示預測之后的圖片/視頻,默認False save-txt:是否將預測的框坐標以txt文件形式保存,默認False classes:設置只保留某一部分類別,形如0或者0 2 3 agnostic-nms:進行nms是否也去除不同類別之間的框,默認False augment:推理的時候進行多尺度,翻轉等操作(TTA)推理 update:如果為True,則對所有模型進行strip_optimizer操作,去除pt文件中的優化器等信息,默認為False """ parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default='best.pt', help='model.pt path(s)') parser.add_argument('--source', type=str, default='../tile/testA_imgs', help='source') # file/folder, 0 for webcam parser.add_argument('--img-size', type=int, default=1600, help='inference size (pixels)') parser.add_argument('--conf-thres', type=float, default=0.1, help='object confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='display results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-dir', type=str, default='detect_img/output', help='directory to save results') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--update', action='store_true', help='update all models') opt = parser.parse_args() print(opt) with torch.no_grad(): if opt.update: # update all models (to fix SourceChangeWarning) for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: detect() # 去除pt文件中的優化器等信息 strip_optimizer(opt.weights) else: detect()
代碼注釋參考:https://blog.csdn.net/weixin_44152895/article/details/110009680