import os import os.path from win32com import client as wc c=[] rootdir=["d:/77"] #以該路徑為實驗 def txt(j,c): word = wc.Dispatch('Word.Application') doc = word.Documents.Open(c[j]) newname=c[j][:-5]+"(translate txt)" doc.SaveAs(newname,4) doc.Close() word.Quit() os.remove(c[j]) print("完成") def wordt(c): #定義函數,進行篩選 for j in range(0,len(c)): if c[j][-5:] == ".docx": #尋找docx文件 txt(j,c) # else: pass for i in rootdir: #定義函數,查找所有文件 for parent,dirnames,filenames in os.walk(i): for filename in filenames: c.append(os.path.join(parent,filename)) wordt(c)
將docx另存為txt ,並且刪除源文件
涉及到office中docx文檔的打與另存為命令
相關參考
from win32com import client as wc word = wc.Dispatch('Word.Application') doc = word.Documents.Open('c:/test') doc.SaveAs('c:/test.text', 2) doc.Close() word.Quit()
open(r'c:\text','r') wdFormatDocument = 0 wdFormatDocument97 = 0 wdFormatDocumentDefault = 16 wdFormatDOSText = 4 wdFormatDOSTextLineBreaks = 5 wdFormatEncodedText = 7 wdFormatFilteredHTML = 10 wdFormatFlatXML = 19 wdFormatFlatXMLMacroEnabled = 20 wdFormatFlatXMLTemplate = 21 wdFormatFlatXMLTemplateMacroEnabled = 22 wdFormatHTML = 8 wdFormatPDF = 17 wdFormatRTF = 6 wdFormatTemplate = 1 wdFormatTemplate97 = 1 wdFormatText = 2 wdFormatTextLineBreaks = 3 wdFormatUnicodeText = 7 wdFormatWebArchive = 9 wdFormatXML = 11 wdFormatXMLDocument = 12 wdFormatXMLDocumentMacroEnabled = 13 wdFormatXMLTemplate = 14 wdFormatXMLTemplateMacroEnabled = 15 wdFormatXPS = 18
over!