有效地讀取圖像，對比opencv、PIL、turbojpeg、lmdb、tfrecords

本文轉載自查看原文 2020-06-24 16:36 951 opencv

opencv和PIL都是很常見的圖像處理庫了，就不介紹了，主要介紹后面三個：

turbojpeg：libjpeg-turbo的python包裝器，用於jpeg圖像的解碼和編碼。

基本用法：

import cv2
from turbojpeg import TurboJPEG, TJPF_GRAY, TJSAMP_GRAY, TJFLAG_PROGRESSIVE
# using default library installation
jpeg = TurboJPEG()

# decoding input.jpg to BGR array
in_file = open('input.jpg', 'rb')
bgr_array = jpeg.decode(in_file.read())
in_file.close()
cv2.imshow('bgr_array', bgr_array)
cv2.waitKey(0)

更多信息參考：https://www.cnpython.com/pypi/pyturbojpeg

lmdb：LMDB的全稱是Lightning Memory-Mapped Database(快如閃電的內存映射數據庫)。LMDB文件可以同時由多個進程打開，具有極高的數據存取速度，訪問簡單，不需要運行單獨的數據庫管理進程，只要在訪問數據的代碼里引用LMDB庫，訪問時給文件路徑即可。讓系統訪問大量小文件的開銷很大，而LMDB使用內存映射的方式訪問文件，使得文件內尋址的開銷非常小，使用指針運算就能實現。數據庫單文件還能減少數據集復制/傳輸過程的開銷。

基本用法：

# -*- coding: utf-8 -*-
import lmdb
  
# 如果train文件夾下沒有data.mbd或lock.mdb文件，則會生成一個空的，如果有，不會覆蓋
# map_size定義最大儲存容量，單位是kb，以下定義1TB容量
env = lmdb.open("./train"，map_size=1099511627776)
env.close()

更多信息參考：https://blog.csdn.net/weixin_41874599/article/details/86631186

tfrecords：frecords是一種二進制編碼的文件格式，tensorflow專用。能將任意數據轉換為tfrecords。更好的利用內存，更方便復制和移動，並且不需要單獨的標簽文件。

將圖像轉換為lmdb格式的數據：

import os
from argparse import ArgumentParser

import cv2
import lmdb
import numpy as np

from tools import get_images_paths


def store_many_lmdb(images_list, save_path):

    num_images = len(images_list)  # number of images in our folder

    file_sizes = [os.path.getsize(item) for item in images_list]  # all file sizes
    max_size_index = np.argmax(file_sizes)  # the maximum file size index

    # maximum database size in bytes
    map_size = num_images * cv2.imread(images_list[max_size_index]).nbytes * 10

    env = lmdb.open(save_path, map_size=map_size)  # create lmdb environment

    with env.begin(write=True) as txn:  # start writing to environment
        for i, image in enumerate(images_list):
            with open(image, "rb") as file:
                data = file.read()  # read image as bytes
                key = f"{i:08}"  # get image key
                txn.put(key.encode("ascii"), data)  # put the key-value into database

    env.close()  # close the environment


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "--path",
        "-p",
        type=str,
        required=True,
        help="path to the images folder to collect",
    )
    parser.add_argument(
        "--output",
        "-o",
        type=str,
        required=True,
        help='path to the output environment directory file i.e. "path/to/folder/env/"',
    )

    args = parser.parse_args()
    if not os.path.exists(args.output):
        os.makedirs(args.output)

    images = get_images_paths(args.path)
    store_many_lmdb(images, args.output)

將圖像轉換為tfrecords格式的數據：

import os
from argparse import ArgumentParser

import tensorflow as tf

from tools import get_images_paths


def _byte_feature(value):
    """Convert string / byte into bytes_list."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()  # BytesList can't unpack string from EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
    """Convert bool / enum / int / uint into int64_list."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def image_example(image_string, label):
    feature = {
        "label": _int64_feature(label),
        "image_raw": _byte_feature(image_string),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))


def store_many_tfrecords(images_list, save_file):

    assert save_file.endswith(
        ".tfrecords",
    ), 'File path is wrong, it should contain "*myname*.tfrecords"'

    directory = os.path.dirname(save_file)
    if not os.path.exists(directory):
        os.makedirs(directory)

    with tf.io.TFRecordWriter(save_file) as writer:  # start writer
        for label, filename in enumerate(images_list):  # cycle by each image path
            image_string = open(filename, "rb").read()  # read the image as bytes string
            tf_example = image_example(
                image_string, label,
            )  # save the data as tf.Example object
            writer.write(tf_example.SerializeToString())  # and write it into database


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument(
        "--path",
        "-p",
        type=str,
        required=True,
        help="path to the images folder to collect",
    )
    parser.add_argument(
        "--output",
        "-o",
        type=str,
        required=True,
        help='path to the output tfrecords file i.e. "path/to/folder/myname.tfrecords"',
    )

    args = parser.parse_args()
    image_paths = get_images_paths(args.path)
    store_many_tfrecords(image_paths, args.output)

使用不同的方式讀取圖像，同時默認是以BGR的格式讀取：

import os
from abc import abstractmethod
from timeit import default_timer as timer

import cv2
import lmdb
import numpy as np
import tensorflow as tf
from PIL import Image
from turbojpeg import TurboJPEG

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"


class ImageLoader:
    extensions: tuple = (".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".tfrecords")

    def __init__(self, path: str, mode: str = "BGR"):
        self.path = path
        self.mode = mode
        self.dataset = self.parse_input(self.path)
        self.sample_idx = 0

    def parse_input(self, path):

        # single image or tfrecords file
        if os.path.isfile(path):
            assert path.lower().endswith(
                self.extensions,
            ), f"Unsupportable extension, please, use one of {self.extensions}"
            return [path]

        if os.path.isdir(path):
            # lmdb environment
            if any([file.endswith(".mdb") for file in os.listdir(path)]):
                return path
            else:
                # folder with images
                paths = [os.path.join(path, image) for image in os.listdir(path)]
                return paths

    def __iter__(self):
        self.sample_idx = 0
        return self

    def __len__(self):
        return len(self.dataset)

    @abstractmethod
    def __next__(self):
        pass


class CV2Loader(ImageLoader):
    def __next__(self):
        start = timer()
        path = self.dataset[self.sample_idx]  # get image path by index from the dataset
        image = cv2.imread(path)  # read the image
        full_time = timer() - start
        if self.mode == "RGB":
            start = timer()
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # change color mode
            full_time += timer() - start
        self.sample_idx += 1
        return image, full_time


class PILLoader(ImageLoader):
    def __next__(self):
        start = timer()
        path = self.dataset[self.sample_idx]  # get image path by index from the dataset
        image = np.asarray(Image.open(path))  # read the image as numpy array
        full_time = timer() - start
        if self.mode == "BGR":
            start = timer()
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # change color mode
            full_time += timer() - start
        self.sample_idx += 1
        return image, full_time


class TurboJpegLoader(ImageLoader):
    def __init__(self, path, **kwargs):
        super(TurboJpegLoader, self).__init__(path, **kwargs)
        self.jpeg_reader = TurboJPEG()  # create TurboJPEG object for image reading

    def __next__(self):
        start = timer()
        file = open(self.dataset[self.sample_idx], "rb")  # open the input file as bytes
        full_time = timer() - start
        if self.mode == "RGB":
            mode = 0
        elif self.mode == "BGR":
            mode = 1
        start = timer()
        image = self.jpeg_reader.decode(file.read(), mode)  # decode raw image
        full_time += timer() - start
        self.sample_idx += 1
        return image, full_time


class LmdbLoader(ImageLoader):
    def __init__(self, path, **kwargs):
        super(LmdbLoader, self).__init__(path, **kwargs)
        self.path = path
        self._dataset_size = 0
        self.dataset = self.open_database()

    # we need to open the database to read images from it
    def open_database(self):
        lmdb_env = lmdb.open(self.path)  # open the environment by path
        lmdb_txn = lmdb_env.begin()  # start reading
        lmdb_cursor = lmdb_txn.cursor()  # create cursor to iterate through the database
        self._dataset_size = lmdb_env.stat()[
            "entries"
        ]  # get number of items in full dataset
        return lmdb_cursor

    def __iter__(self):
        self.dataset.first()  # return the cursor to the first database element
        return self

    def __next__(self):
        start = timer()
        raw_image = self.dataset.value()  # get raw image
        image = np.frombuffer(raw_image, dtype=np.uint8)  # convert it to numpy
        image = cv2.imdecode(image, cv2.IMREAD_COLOR)  # decode image
        full_time = timer() - start
        if self.mode == "RGB":
            start = timer()
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            full_time += timer() - start
        start = timer()
        self.dataset.next()  # step to the next element in database
        full_time += timer() - start
        return image, full_time

    def __len__(self):
        return self._dataset_size  # get dataset length


class TFRecordsLoader(ImageLoader):
    def __init__(self, path, **kwargs):
        super(TFRecordsLoader, self).__init__(path, **kwargs)
        self._dataset = self.open_database()

    def open_database(self):
        def _parse_image_function(example_proto):
            return tf.io.parse_single_example(example_proto, image_feature_description)

        # dataset structure description
        image_feature_description = {
            "label": tf.io.FixedLenFeature([], tf.int64),
            "image_raw": tf.io.FixedLenFeature([], tf.string),
        }
        raw_image_dataset = tf.data.TFRecordDataset(self.path)  # open dataset by path
        parsed_image_dataset = raw_image_dataset.map(
            _parse_image_function,
        )  # parse dataset using structure description

        return parsed_image_dataset

    def __iter__(self):
        self.dataset = self._dataset.as_numpy_iterator()
        return self

    def __next__(self):
        start = timer()
        value = next(self.dataset)[
            "image_raw"
        ]  # step to the next element in database and get new image
        image = tf.image.decode_jpeg(value).numpy()  # decode raw image
        full_time = timer() - start
        if self.mode == "BGR":
            start = timer()
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            full_time += timer() - start
        return image, full_time

    def __len__(self):
        return self._dataset.reduce(
            np.int64(0), lambda x, _: x + 1,
        ).numpy()  # get dataset length


methods = {
    "cv2": CV2Loader,
    "pil": PILLoader,
    "turbojpeg": TurboJpegLoader,
    "lmdb": LmdbLoader,
    "tfrecords": TFRecordsLoader,
}

顯示圖像：

from argparse import ArgumentParser

import cv2

from loader import (
    CV2Loader,
    LmdbLoader,
    PILLoader,
    TFRecordsLoader,
    TurboJpegLoader,
    methods,
)


def show_image(method, image):
    cv2.imshow(f"{method} image", image)
    k = cv2.waitKey(0) & 0xFF
    if k == 27:  # check ESC pressing
        return True
    else:
        return False


def show_images(loader):
    num_images = len(loader)
    loader = iter(loader)
    for idx in range(num_images):
        image, time = next(loader)
        print_info(image, time)
        stop = show_image(type(loader).__name__, image)
        if stop:
            cv2.destroyAllWindows()
            return


def print_info(image, time):
    print(
        f"Image with {image.shape[0]}x{image.shape[1]} size has been loading for {time} seconds",
    )


def demo(method, path):
    loader = methods[method](path)  # get the image loader
    show_images(loader)


if __name__ == "__main__":
    parser = ArgumentParser()

    parser.add_argument(
        "--path",
        "-p",
        type=str,
        help="path to image, folder of images, lmdb environment path or tfrecords database path",
    )
    parser.add_argument(
        "--method",
        required=True,
        choices=["cv2", "pil", "turbojpeg", "lmdb", "tfrecords"],
        help="Image loading methods to use in benchmark",
    )

    args = parser.parse_args()

    demo(args.method, args.path)

更多細節請參考：

https://github.com/spmallick/learnopencv/tree/master/Efficient-image-loading

https://www.learnopencv.com/efficient-image-loading/

這里就只看結果了：

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 如何有效地記錄和管理筆記（一）如何有效地進行代碼 Review？ Flutter如何有效地退出程序如何有效地閱讀文獻並做筆記如何有效地做算法題 CV2和PIL讀取圖像方法與區別對比 MySQL如何有效地創建基於 INNODB 引擎的表如何更有效地學習開源項目的代碼？如何快速有效地找出程序錯誤（C語言篇） Redis連接數需有效地釋放