原文:
https://www.jianshu.com/p/4f38fe021fb3
# -*- coding: utf-8 -*-
# # 找出非excel格式的文件
# import pandas as pd
# import os
# from shutil import copyfile
# excel_dir = 'D:/工作/數據轉換/訪客數據/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# root_dir = ""
# print(filename)
# try:
# frame = pd.read_excel(excel_dir+filename)
# root_dir = "D:/工作/數據轉換/excel格式/"
# except Exception as e:
# print("異常:", e)
# root_dir = "D:/工作/數據轉換/html格式/"
# copyfile(excel_dir+filename, root_dir+filename)
# # html的table轉換為excel
# # 原文:https://www.jianshu.com/p/4f38fe021fb3
# import pandas
# import os
# from shutil import copyfile
# excel_dir = 'D:/工作/數據轉換/html格式/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# print(filename)
# try:
# with open(excel_dir+filename, 'rb') as f:
# df = pandas.read_html(f.read(),encoding='utf-8')
# bb = pandas.ExcelWriter("D:/工作/數據轉換/轉換數據/" + filename)
# df[0].to_excel(bb,index=False)
# bb.close()
# except Exception as e:
# print("異常:"+e)
# # copyfile(excel_dir+filename, "D:/工作/數據轉換/出錯數據/" + filename)
# # 單文件測試
# import pandas
# import os
# with open('訪客-2020-09-18 23_45_16.xlsx', 'rb') as f:
# df = pandas.read_html(f.read(),encoding='utf-8')
# bb = pandas.ExcelWriter('訪訪客-2020-09-18 23_45_16-1.xlsx')
# df[0].to_excel(bb,index=False)
# bb.close()
# # 批量文件改名
# import os
# excel_dir = 'D:/工作/數據轉換/1/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# print(filename)
# oldFullName = excel_dir+filename
# newFullName = excel_dir+"1店-"+filename
# os.rename(oldFullName, newFullName)
# # excel轉換為csv文件
# import pandas as pd
# import os
# import re
# excel_dir = 'D:/工作/數據轉換/轉換數據/'
# os.chdir(excel_dir)
# for filename in os.listdir(excel_dir):
# print(filename)
# # filename = "訪客-2020-07-30 23_11_30.xlsx"
# df = pd.read_excel(filename,index_col=0)
# # 訪問時間列 增加 日期
# m = re.search("(\d{4}-\d{1,2}-\d{1,2})", filename)
# strdate = m.group(1)
# df["訪問時間"] =['%s %s' % (strdate, s) for s in df["訪問時間"]]
# col_name = df.columns.tolist()
# if '搜索關鍵字' not in col_name:
# # 插入列
# index = col_name.index('入店來源') + 1
# col_name.insert(index, '搜索關鍵字')
# df = df.reindex(columns = col_name)
# # 修改值
# df.loc[df['入店來源'].str.find('手淘搜索') > -1 , '搜索關鍵字'] = df['入店來源'].str.replace('手淘搜索','')
# df.loc[df['入店來源'].str.find('手淘搜索') > -1 , '入店來源'] = '手淘搜索'
# file_name = os.path.splitext(filename)[0]
# df.to_csv('D:/工作/數據轉換/CSV數據/'+file_name+'.csv',encoding='utf-8')
# # 數據合並
# import pandas as pd
# import os
# path = "./2店/"
# files = os.listdir(path)
# list_excel = []
# for filename in files:
# fullname = path + filename# excel的相對路徑
# df = pd.read_csv(fullname)# 默認讀取Excel的第一個表單
# list_excel.append(df)# 把Excel追加到list中
# writer = pd.ExcelWriter('合並后的數據.xlsx')
# pd.concat(list_excel).to_excel(writer,'sheet1',index=False)
# writer.save()
# print('合並完成')