第十一節，利用yolov3訓練自己的數據集

本文轉載自查看原文 2019-04-02 18:01 1794 計算機視覺

1、環境配置

tensorflow1.12.0

Opencv3.4.2

keras

pycharm

2、配置yolov3

下載yolov3代碼：https://github.com/qqwweee/keras-yolo3
下載權重：https://pjreddie.com/media/files/yolov3.weights，並將權重文件放在keras-yolo3-master文件下
執行如下命令將darknet下的yolov3配置文件轉換成keras適用的h5文件。

python convert.py yolov3.cfg yolov3.weights model_data/yolo.h5

更改了一下代碼：重新編寫了一個測試代碼object_detection_yolo.py

# This code is written at BigVision LLC. It is based on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html

# Usage example:  python3 object_detection_yolo.py --video=run.mp4
#                 python3 object_detection_yolo.py --image=bird.jpg

import cv2 as cv
import argparse
import sys
import numpy as np
import os.path

# Initialize the parameters
confThreshold = 0.5  # Confidence threshold
nmsThreshold = 0.4   #Non-maximum suppression threshold
inpWidth = 416       #Width of network's input image
inpHeight = 416      #Height of network's input image

parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
args = parser.parse_args()
        
# Load names of classes
classesFile = "model_data/coco_classes.txt";
classes = None
# with open(classesFile, 'rt') as f:
#     classes = f.read().rstrip('\n').split('\n')
classes_path = os.path.expanduser(classesFile)
with open(classes_path) as f:
    class_names = f.readlines()
    classes = [c.strip() for c in class_names]

# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "yolov3.cfg";
modelWeights = "yolov3.weights";

net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

# Get the names of the output layers
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Draw the predicted bounding box
def drawPred(classId, conf, left, top, right, bottom):
    # Draw a bounding box.
    cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 3)
    
    label = '%.2f' % conf
        
    # Get the label for the class name and its confidence
    if classes:
        assert(classId < len(classes))
        label = '%s:%s' % (classes[classId], label)

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv.rectangle(frame, (left, top - round(1.5*labelSize[1])), (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv.FILLED)
    cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)

# Remove the bounding boxes with low confidence using non-maxima suppression
def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]

    classIds = []
    confidences = []
    boxes = []
    # Scan through all the bounding boxes output from the network and keep only the
    # ones with high confidence scores. Assign the box's class label as the class with the highest score.
    classIds = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences.
    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

# Process inputs
winName = 'Deep learning object detection in OpenCV'
#cv.namedWindow(winName, cv.WINDOW_NORMAL)

outputFile = "yolo_out_py.avi"
if (args.image):
    # Open the image file
    if not os.path.isfile(args.image):
        print("Input image file ", args.image, " doesn't exist")
        sys.exit(1)
    cap = cv.VideoCapture(args.image)
    outputFile = args.image[:-4]+'_yolo_out_py.jpg'
elif (args.video):
    # Open the video file
    if not os.path.isfile(args.video):
        print("Input video file ", args.video, " doesn't exist")
        sys.exit(1)
    cap = cv.VideoCapture(args.video)
    outputFile = args.video[:-4]+'_yolo_out_py.avi'
else:
    # Webcam input
    cap = cv.VideoCapture(0)

# Get the video writer initialized to save the output video
if (not args.image):
    vid_writer = cv.VideoWriter(outputFile, cv.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv.CAP_PROP_FRAME_HEIGHT))))

while cv.waitKey(1) < 0:
    
    # get frame from the video
    hasFrame, frame = cap.read()
    
    # Stop the program if reached end of video
    if not hasFrame:
        print("Done processing !!!")
        print("Output file is stored as ", outputFile)
        cv.waitKey(3000)
        break

    # Create a 4D blob from a frame.
    blob = cv.dnn.blobFromImage(frame, 1/255, (inpWidth, inpHeight), [0,0,0], 1, crop=False)

    # Sets the input to the network
    net.setInput(blob)

    # Runs the forward pass to get output of the output layers
    outs = net.forward(getOutputsNames(net))

    # Remove the bounding boxes with low confidence
    postprocess(frame, outs)

    # Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

    # Write the frame with the detection boxes
    if (args.image):
        cv.imwrite(outputFile, frame.astype(np.uint8));
    else:
        vid_writer.write(frame.astype(np.uint8))

    #cv.imshow(winName, frame)

View Code

3、用自己的數據集訓練

在工程下新建一個文件夾VOCdevkit，結構與VOC數據集格式保持一致，目錄結構如下所示：

將自己的數據圖片放入JPEFImages文件中，

生成Annotation下的文件，安裝工具labelImg。安裝過程可參照：

https://blog.csdn.net/u012746060/article/details/81016993，結果如下圖：

生成ImageSet/Main/4個文件，在VOC2007下新建一個test.py文件：

import os

import random

trainval_percent = 0.2

train_percent = 0.8

xmlfilepath = 'Annotations'

txtsavepath = 'ImageSets\Main'

total_xml = os.listdir(xmlfilepath)

num = len(total_xml)

list = range(num)

tv = int(num * trainval_percent)

tr = int(tv * train_percent)

trainval = random.sample(list, tv)

train = random.sample(trainval, tr)

ftrainval = open('ImageSets/Main/trainval.txt', 'w')

ftest = open('ImageSets/Main/test.txt', 'w')

ftrain = open('ImageSets/Main/train.txt', 'w')

fval = open('ImageSets/Main/val.txt', 'w')

for i in list:

    name = total_xml[i][:-4] + '\n'

    if i in trainval:

        ftrainval.write(name)

        if i in train:

            ftest.write(name)

        else:

            fval.write(name)

    else:

        ftrain.write(name)

ftrainval.close()

ftrain.close()

fval.close()

ftest.close()

View Code

運行代碼之后，生成如下文件，VOC2007數據集制作完成。

生成yolo3所需的train.txt,val.txt,test.txt

生成的數據集不能供yolov3直接使用。需要運行voc_annotation.py(遷移項目時必須重新運行，涉及路徑問題），classes以檢測兩個類為例（redlight和greenlight），在voc_annotation.py需改你的數據集為：

運行之后，生成如下三個文件：

文件內容如圖所示：

修改參數文件yolo3.cfg

打開yolo3.cfg文件。搜索yolo(共出現三次)，每次按下圖都要修改：

filter：3*（5+len（classes））

classes:你要訓練的類別數（我這里是訓練兩類）

random：原來是1，顯存小改為0

修改model_data下的voc_classes.txt為自己訓練的類別

修改train.py代碼（用下面代碼直接替換原來的代碼）

"""

Retrain the YOLO model for your own dataset.

"""

import numpy as np

import keras.backend as K

from keras.layers import Input, Lambda

from keras.models import Model

from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping



from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss

from yolo3.utils import get_random_data





def _main():

    annotation_path = '2007_train.txt'

    log_dir = 'logs/000/'

    classes_path = 'model_data/voc_classes.txt'

    anchors_path = 'model_data/yolo_anchors.txt'

    class_names = get_classes(classes_path)

    anchors = get_anchors(anchors_path)

    input_shape = (416,416) # multiple of 32, hw

    model = create_model(input_shape, anchors, len(class_names) )

    train(model, annotation_path, input_shape, anchors, len(class_names), log_dir=log_dir)



def train(model, annotation_path, input_shape, anchors, num_classes, log_dir='logs/'):

    model.compile(optimizer='adam', loss={

        'yolo_loss': lambda y_true, y_pred: y_pred})

    logging = TensorBoard(log_dir=log_dir)

    checkpoint = ModelCheckpoint(log_dir + "ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5",

        monitor='val_loss', save_weights_only=True, save_best_only=True, period=1)

    batch_size = 10

    val_split = 0.1

    with open(annotation_path) as f:

        lines = f.readlines()

    np.random.shuffle(lines)

    num_val = int(len(lines)*val_split)

    num_train = len(lines) - num_val

    print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))



    model.fit_generator(data_generator_wrap(lines[:num_train], batch_size, input_shape, anchors, num_classes),

            steps_per_epoch=max(1, num_train//batch_size),

            validation_data=data_generator_wrap(lines[num_train:], batch_size, input_shape, anchors, num_classes),

            validation_steps=max(1, num_val//batch_size),

            epochs=500,

            initial_epoch=0)

    model.save_weights(log_dir + 'trained_weights.h5')



def get_classes(classes_path):

    with open(classes_path) as f:

        class_names = f.readlines()

    class_names = [c.strip() for c in class_names]

    return class_names



def get_anchors(anchors_path):

    with open(anchors_path) as f:

        anchors = f.readline()

    anchors = [float(x) for x in anchors.split(',')]

    return np.array(anchors).reshape(-1, 2)



def create_model(input_shape, anchors, num_classes, load_pretrained=False, freeze_body=False,

            weights_path='model_data/yolo_weights.h5'):

    K.clear_session() # get a new session

    image_input = Input(shape=(None, None, 3))

    h, w = input_shape

    num_anchors = len(anchors)

    y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \

        num_anchors//3, num_classes+5)) for l in range(3)]



    model_body = yolo_body(image_input, num_anchors//3, num_classes)

    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))



    if load_pretrained:

        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)

        print('Load weights {}.'.format(weights_path))

        if freeze_body:

            # Do not freeze 3 output layers.

            num = len(model_body.layers)-7

            for i in range(num): model_body.layers[i].trainable = False

            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))



    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',

        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(

        [*model_body.output, *y_true])

    model = Model([model_body.input, *y_true], model_loss)

    return model

def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):

    n = len(annotation_lines)

    np.random.shuffle(annotation_lines)

    i = 0

    while True:

        image_data = []

        box_data = []

        for b in range(batch_size):

            i %= n

            image, box = get_random_data(annotation_lines[i], input_shape, random=True)

            image_data.append(image)

            box_data.append(box)

            i += 1

        image_data = np.array(image_data)

        box_data = np.array(box_data)

        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)

        yield [image_data, *y_true], np.zeros(batch_size)



def data_generator_wrap(annotation_lines, batch_size, input_shape, anchors, num_classes):

    n = len(annotation_lines)

    if n==0 or batch_size<=0: return None

    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)



if __name__ == '__main__':

    _main()

View Code

替換完成后，千萬千萬值得注意的是，因為程序中有logs/000/目錄，你需要創建這樣一個目錄，這個目錄的作用就是存放自己的數據集訓練得到的模型。不然程序運行到最后會因為找不到該路徑而發生錯誤。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 如何使用yolov3訓練自己的數據集 yolov3訓練自己的數據集 Yolov3 訓練自己的數據集 YoLov3訓練自己的數據集 tensorflow yolov3訓練自己的數據集，詳細教程 Yolov3代碼分析與訓練自己數據集 pytorch版yolov3訓練自己數據集 pythonTensorFlow實現yolov3訓練自己的目標檢測探測自定義數據集 Ubuntu16.04中用yolov3訓練自己的數據集 win10 下的YOLOv3 訓練 wider_face 數據集檢測人臉