最近在項目上需要批量把txt文件轉成成csv文件格式,以前是手動打開excel文件,然后導入txt來生產csv文件,由於這已經變成每周需要做的事情,決定用python自動化腳本來實現,思路:
- 讀取文件夾中所有txt文件,保存到list中
- 針對每個txt文件,自動生產同文件名的csv文件
- 對每個txt文件,根據分隔符來保存為csv文件,分隔符為分號“;”,在轉換之前先把文件編碼統一成'utf-8',因為在實現過程中,發現總會有編碼報錯問題出現
- 新建txt文件夾來存放所有txt文件
完整代碼如下:
import csv import os import shutil from chardet.universaldetector import UniversalDetector def get_encode_info(file): with open(file, 'rb') as f: detector = UniversalDetector() for line in f.readlines(): detector.feed(line) if detector.done: break detector.close() return detector.result['encoding'] def read_file(file): with open(file, 'rb') as f: return f.read() def write_file(content, file): with open(file, 'wb') as f: f.write(content) def convert_encode2utf8(file, original_encode, des_encode): file_content = read_file(file) file_decode = file_content.decode(original_encode,'ignore') file_encode = file_decode.encode(des_encode) write_file(file_encode, file) ## Move *.txt to a folder def move2txtfolder(path, txt_file_list): txt_folder_path = path + '\\txt' if not os.path.exists(txt_folder_path): os.makedirs(txt_folder_path) for file in txt_file_list: des_path = os.path.join(txt_folder_path, os.path.basename(file)) shutil.move(file, des_path) ##在路徑中找出所有的*.txt文件 def findtxt(path, txt_file_list): file_name_list = os.listdir(path) for filename in file_name_list: de_path = os.path.join(path, filename) if os.path.isfile(de_path): if de_path.endswith(".txt"): # Specify to find the txt file. txt_file_list.append(de_path) else: findtxt(de_path, txt_file_list) def txt2csv(txt_file): ##先把所有文件的encoding都轉換成utf-8 encode_info = get_encode_info(txt_file) if encode_info != 'utf-8': convert_encode2utf8(txt_file, encode_info, 'utf-8') csv_file = os.path.splitext(txt_file)[0] + '.csv' with open(csv_file, 'w+', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile, dialect='excel') with open(txt_file, 'r', encoding='utf-8') as txtfile: for line in txtfile.readlines(): line_list = line.strip('\n').split(';') writer.writerow(line_list) if __name__ == '__main__': folder_path = r'C:\Details' # ##如果文件夾中還有子文件夾,請用findtxt函數 # txt_file_list = [] # findtxt(folder_path, txt_file_list) ##如果文件夾中沒有子文件夾的時候直接使用推導式來生產txt文件的list txt_file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if os.path.join(folder_path, file).endswith('.txt')] for txt_file in txt_file_list: txt2csv(txt_file) move2txtfolder(folder_path, txt_file_list)