輕量數據庫,刪和改就沒啥必要了。
1,將圖片數據寫入lmdb。
def image_dataset_to_lmdb(image_folder_path,lmdb_path): # create db db = lmdb.open(lmdb_path, map_size=int(1e12)) # start transaction with db.begin(write=True) as db_txn: # 枚舉圖片地址 for parent, dirnames, filenames in os.walk(image_folder_path): # 限制目錄范圍,子目錄跳過 if (parent == image_folder_path): for index, filename in enumerate(filenames, start=0): image_path = image_folder_path + filename # read image into numpy.ndarray image_array = numpy.array(Image.open(image_path)) # image type recognition if type(image_array[0][0]) is numpy.uint8: print(image_path + ' => image type : grayscale, this function can not handle this case, skip.') continue elif type(image_array[0][0]) is numpy.ndarray: print(image_path + ' => image type : color, go.') # RGB format to BGR format image_array = image_array[:, :, ::-1] # channel split by transpose matrix operation image_array = image_array.transpose((2, 0, 1)) # convert numpy ndarray to caffe datum format im_dat = caffe.io.array_to_datum(image_array) # write to db # 這里可以為每張圖片設置y值 im_dat.label = 1 # '{:0>10d}'.format(index) => key, im_dat.SerializeToString() => value db_txn.put('{:0>10d}'.format(index), im_dat.SerializeToString()) db.close()
2,讀取數據庫
def read_lmdb(lmdb_path): #open lmdb lmdb_env = lmdb.open(lmdb_path) #begin transaction lmdb_txn = lmdb_env.begin() #get cursor lmdb_cursor = lmdb_txn.cursor() #get data object datum = caffe.proto.caffe_pb2.Datum() for key, value in lmdb_cursor: #parse back to datum datum.ParseFromString(value) #get y value label = datum.label print('label = ' + str(label)) data_array = caffe.io.datum_to_array(datum) print('data is numpy.ndarray :') for data in data_array: print(str(data) + '\n')
lmdb_env.close()