"""
要求:
1.讀取cdm文件的所有子文件夾,然后每個文件夾里面是抽出一個一個mp3后綴的文件。
遍歷所有的子文件,然后將這些mp3文件,保存到一個新的文件夾。文件夾后綴是當前文件的年代的
基礎加后綴_single,例如:來自2000年文件夾就命名2000_single。
"""
import shutil
import pathlib
import pprint
from typing import List, Generator, Iterable, Tuple
from itertools import chain, islice
import os
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from functools import partial
import traceback
import time
from multiprocessing import cpu_count
DIR_PATH = pathlib.Path("/Users/chennan/CDM")
MAX_CONCCURENT = cpu_count() * 4 - 2
def multiple_file_types(file_path: Generator, *patterns: Tuple[str]) \
-> Iterable:
"""
因為glob不支持多個匹配規則,所以這里做一個適配。
:param file_path: 文件的路徑
:param patterns: 匹配規則
:return:
"""
return (file_path.glob(pattern) for pattern in patterns)
def get_all_list_by_year(y: str) -> Generator:
"""
:param y: 年代
:return: 返回結果迭代器類型
"""
file_path = DIR_PATH / y
result = file_path.glob("*")
return result
def get_mp3_list(cdm_path: str) -> Generator:
"""
獲取當前文件下面的所有音頻文件,mp3,wav,ape,flac等
:param cdm_path:專輯的路徑
:return:
"""
mp3_gen = multiple_file_types(cdm_path, *("*.mp3", "*.ape", "*.wav", "*.flac"))
return mp3_gen
def select_mp3(y: str) -> List[pathlib.Path]:
"""
:param y:
:return:
"""
selected_mp3_list = []
sa = selected_mp3_list.append
cdm_list = get_all_list_by_year(y)
for dir_path in cdm_list:
mp3_gen = get_mp3_list(dir_path)
try:
single = mp3_gen.send(None)
sa(list(single)[0])
except StopIteration as e:
pass
except IndexError as e:
pass
return selected_mp3_list
def create_target_file(y):
target = DIR_PATH / f"{y}_new"
if not os.path.exists(target):
os.makedirs(target)
return target
def copy_file_to_new_path(source, y, target):
"""
shutil復制文件
:return:
"""
try:
shutil.copy(source, target / source.name)
except Exception as e:
print(traceback.format_exc())
if __name__ == '__main__':
start = time.time()
year = "2000"
mp3_list = select_mp3(year)
target = create_target_file(year)
if target:
with ThreadPoolExecutor(max_workers=MAX_CONCCURENT) as pool:
pool.map(partial(copy_file_to_new_path, y=year, target=target), mp3_list)
print(f"並發{MAX_CONCCURENT}次,用時", time.time() - start)
#關於並發的問題有待觀察。。。
# 線程池並發結果
# 並發2次,用時 6.518146991729736
# 並發10次,用時 7.732945919036865
# 並發30次,用時 7.956831932067871
# 並發100次,用時 7.956831932067871
# 進程池並發結果
# 並發2次,用時 6.013077974319458
# 並發8次,用時 6.863225221633911
# 並發14次,用時 4.6188788414001465
# 並發30次,用時 4.858534812927246