1.python文件夾下圖片去重(參考刪除重復文件或圖片(去重)的python代碼)
# /usr/bin/env python # -*- coding:utf-8 -*- # 運行的代碼文件要放到刪除重復的文件或圖片所包含的目錄中 import os import hashlib filedir = '/home/xx/xx/xx' def filecount(DIR): filecount = len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))]) return (filecount) def md5sum(filename): f = open(filedir+'/'+filename, 'rb') md5 = hashlib.md5() while True: fb = f.read(8096) if not fb: break md5.update(fb) f.close() return (md5.hexdigest()) def delfile(): all_md5 = {} dir =os.walk(filedir) for i in dir: for tlie in i[2]: if md5sum(tlie) in all_md5.values(): os.remove(filedir+'/'+tlie) print(tlie) else: all_md5[tlie] = md5sum(tlie) if __name__ == '__main__': oldf = filecount(filedir) print('去重前有', oldf, '個文件\n請稍等正在刪除重復文件...') delfile() print('\n\n去重后剩', filecount(filedir), '個文件') print('\n\n一共刪除了', oldf - filecount(filedir), '個文件\n\n')
2.圖片批量修改為同一尺寸(參考:https://www.cnblogs.com/neo-T/p/6477378.html)
# -*- coding: utf-8 -*- import os import cv2 IMAGE_SIZE = 256 # 按照指定圖像大小調整尺寸 def resize_image(image, height=IMAGE_SIZE, width=IMAGE_SIZE): top, bottom, left, right = (0, 0, 0, 0) # 獲取圖像尺寸 h, w, _ = image.shape # 對於長寬不相等的圖片,找到最長的一邊 longest_edge = max(h, w) # 計算短邊需要增加多上像素寬度使其與長邊等長 if h < longest_edge: dh = longest_edge - h top = dh // 2 bottom = dh - top elif w < longest_edge: dw = longest_edge - w left = dw // 2 right = dw - left else: pass # RGB顏色 BLACK = [0, 0, 0] # 給圖像增加邊界,是圖片長、寬等長,cv2.BORDER_CONSTANT指定邊界顏色由value指定 constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=BLACK) # 調整圖像大小並返回 return cv2.resize(constant, (height, width)) # 讀取訓練數據 images = [] labels = [] def read_path(path_name): for dir_item in os.listdir(path_name): print(dir_item) # 從初始路徑開始疊加,合並成可識別的操作路徑 full_path = os.path.abspath(os.path.join(path_name, dir_item)) # print(full_path) if os.path.isdir(full_path): # 如果是文件夾,繼續遞歸調用 read_path(full_path) else: # 文件 if dir_item.endswith('.jpg'): image = cv2.imread(full_path) image = resize_image(image, IMAGE_SIZE, IMAGE_SIZE) cv2.imwrite(full_path, image) print(dir_item) if __name__ == '__main__': read_path('/home/xx/xx/xx')
list去重
1.
a=[1,2,3,1,5,1,6] print(list(set(a)))
2.
a=[[1,2],[5,6],[1,2]] b=[tuple(x) for x in a ] c=[list(x) for x in set(b)] print(c)