注意:此方法只能用於docx文檔,doc的不行,doc的得另存為Html文件,進入到上述網頁文件所在的文件夾中,你會發現有一個名稱為“文件名.files”(如“圖片.files”),進入其中,前面 Word 文檔中的圖片就一一被保存在里面了。
在解壓的時候遇到點問題,1,不能解壓,2,不能刪除文件夾,3,不能移動文件,用try解決了
doc的也不是不行,需要使用vba把doc自動另存為docx格式的文檔,vba代碼如下
Sub doc2docx() 'doc文件轉docx文件 Dim myDialog As FileDialog Set myDialog = Application.FileDialog(msoFileDialogFilePicker) Dim oFile As Object Dim oFilePath As Variant With myDialog .Filters.Clear '清除所有文件篩選器中的項目 .Filters.Add "所有 WORD2007 文件", "*.doc", 1 '增加篩選器的項目為所有doc文件 .AllowMultiSelect = True '允許多項選擇 If .Show = -1 Then '確定 For Each oFilePath In .SelectedItems '在所有選取項目中循環 Set oFile = Documents.Open(oFilePath) oFile.SaveAs FileName:=Replace(oFilePath, "doc", "docx"), FileFormat:=16 oFile.Close Next End If End With End Sub
然后再使用下面的代碼提取"docx"格式文檔里面的圖片
#revised by Stephen Shen 2020-3-8 19:51:46 #to extract pics from *.docx files import zipfile import os,shutil import datetime # import send2trash # from tempfile import TemporaryDirectory def renameFile(dstpath): fdirname,fbasename=os.path.split(dstpath) #文件名相同但大小不同 fname,fext=os.path.splitext(fbasename) nowtime=datetime.datetime.now() strtime=str(nowtime.year)+str(nowtime.month)+str(nowtime.day)+str(nowtime.hour)+str(nowtime.minute) newfbasename=fname+'-'+strtime+fext dstpath=os.path.join(fdirname,newfbasename) return dstpath def extractPics(doc_inpath,outDir,zipDir): file_ext=['.docx'] dirPath,fname,fext=splitPath(doc_inpath) if fext in file_ext: print(doc_inpath+' is extracting...') fnameNew=fname.strip() out_dir =os.path.join(outDir,fnameNew) if not os.path.exists(out_dir):#文件夾不存在 if len(os.path.split(out_dir)[-1])>200: print('dirname is too long :'+out_dir) else: os.mkdir(out_dir) else:#文件夾已存在 nowtime=datetime.datetime.now() strtime=str(nowtime.year)+str(nowtime.month)+str(nowtime.day)+str(nowtime.hour)+str(nowtime.minute) out_dir=os.path.join(outDir,fnameNew+'-'+strtime) zip_path=os.path.join(zipDir,'docx.zip') shutil.copyfile(doc_inpath,zip_path) # zip_path=os.path.join(zip_dir,fnameNew+'.zip') # os.rename(copy_path,zip_path) try: with zipfile.ZipFile(zip_path,'r') as f: f.extractall() except: print(zip_path+' cannot be extracted') else: picsDir=os.path.join(zipDir,'word/media') if os.path.exists(picsDir): for pic in os.listdir(picsDir): oldpic=os.path.join(picsDir,pic) newpic=os.path.join(out_dir,pic) try: shutil.move(oldpic,newpic) except: print(doc_inpath+' is skipped') filelist=os.listdir(zipDir) for f in filelist: filepath = os.path.join(zipDir, f ) if os.path.isfile(filepath): os.remove(filepath) elif os.path.isdir(filepath): shutil.rmtree(filepath,True) def splitPath(doc_inpath): dirPath=os.path.dirname(doc_inpath) fname,fext=os.path.splitext(os.path.basename(doc_inpath)) return dirPath,fname,fext outDir=r'k:/fileExtracted/imagesFromDocs' zipDir=r'k:/fileExtracted/zip' # docDir=r'K:\imagsDocx' docDir=r'K:/docs' if not os.path.exists(outDir): os.makedirs(outDir) if not os.path.exists(zipDir): os.makedirs(zipDir) for folder,subFolderName,files in os.walk(docDir): for file in files: doc_inpath=os.path.join(folder,file) os.chdir(zipDir) extractPics(doc_inpath,outDir,zipDir) print('done')