前段時間項目上遇到這么一個需求,需要將多個OCR識別的word文檔合並成一個,於是就在網上找了找,自己修改了一下。在這里跟大家分享一下,希望有用的到的。
要做多word文檔合並,首先要導入Microsoft.Office.Interop.Word這個dll。我覺得很多人會在這個問題上糾結很久,因為找不到這個dll,當然我也這里找了很久才找到,最終我的解決辦法是這樣的。如果在VS的引用下的COM組件中找不到這個dll,那么,你就在你的電腦上查找這個文件,如果找到了,你可以直接把它復制到你項目中,然后用瀏覽的方式直接找到這個dll引用就可以了。另外網上還有人說添加Microsoft Office 11.0 Object Library這個引用,看你具體需要吧,如果用得着,那就添加,用不着就沒必要。如果你電腦上查找不到Microsoft.Office.Interop.Word.dll,那么可能是你的office安裝的有問題,就往這方面解決。好了,引用了這個dll后就可以寫代碼了。
這里網上有一個封裝好的類,稍微修改了一下,還不錯,所以就直接拿來給大家參考。其實我覺得開發沒必要什么都親力親為,我們程序員的工作任務是解決實際的問題,而並不是寫代碼。
public class WordClass { Microsoft.Office.Interop.Word.Application objApp = null; Document objDocLast = null; Document objDocBeforeLast = null; public WordClass() { objApp = new Application(); } #region 打開文件 public void Open(string tempDoc) { object objTempDoc = tempDoc; object objMissing = System.Reflection.Missing.Value; objDocLast = objApp.Documents.Open( ref objTempDoc, //FileName ref objMissing, //ConfirmVersions ref objMissing, //ReadOnly ref objMissing, //AddToRecentFiles ref objMissing, //PasswordDocument ref objMissing, //PasswordTemplate ref objMissing, //Revert ref objMissing, //WritePasswordDocument ref objMissing, //WritePasswordTemplate ref objMissing, //Format ref objMissing, //Enconding ref objMissing, //Visible ref objMissing, //OpenAndRepair ref objMissing, //DocumentDirection ref objMissing, //NoEncodingDialog ref objMissing //XMLTransform ); objDocLast.Activate(); } #endregion #region 保存文件到輸出模板 public void SaveAs(string outDoc) { object objMissing = System.Reflection.Missing.Value; object objOutDoc = outDoc; objDocLast.SaveAs( ref objOutDoc, //FileName ref objMissing, //FileFormat ref objMissing, //LockComments ref objMissing, //PassWord ref objMissing, //AddToRecentFiles ref objMissing, //WritePassword ref objMissing, //ReadOnlyRecommended ref objMissing, //EmbedTrueTypeFonts ref objMissing, //SaveNativePictureFormat ref objMissing, //SaveFormsData ref objMissing, //SaveAsAOCELetter, ref objMissing, //Encoding ref objMissing, //InsertLineBreaks ref objMissing, //AllowSubstitutions ref objMissing, //LineEnding ref objMissing //AddBiDiMarks ); } #endregion #region 循環合並多個文件(復制合並重復的文件) /// <summary> /// 循環合並多個文件(復制合並重復的文件) /// </summary> /// <param name="tempDoc">模板文件</param> /// <param name="arrCopies">需要合並的文件</param> /// <param name="outDoc">合並后的輸出文件</param> public void CopyMerge(string tempDoc, string[] arrCopies, string outDoc) { object objMissing = Missing.Value; object objFalse = false; object objTarget = WdMergeTarget.wdMergeTargetSelected; object objUseFormatFrom = WdUseFormattingFrom.wdFormattingFromSelected; try { //打開模板文件 Open(tempDoc); foreach (string strCopy in arrCopies) { objDocLast.Merge( strCopy, //FileName ref objTarget, //MergeTarget ref objMissing, //DetectFormatChanges ref objUseFormatFrom, //UseFormattingFrom ref objMissing //AddToRecentFiles ); objDocBeforeLast = objDocLast; objDocLast = objApp.ActiveDocument; if (objDocBeforeLast != null) { objDocBeforeLast.Close( ref objFalse, //SaveChanges ref objMissing, //OriginalFormat ref objMissing //RouteDocument ); } } //保存到輸出文件 SaveAs(outDoc); foreach (Document objDocument in objApp.Documents) { objDocument.Close( ref objFalse, //SaveChanges ref objMissing, //OriginalFormat ref objMissing //RouteDocument ); } } finally { objApp.Quit( ref objMissing, //SaveChanges ref objMissing, //OriginalFormat ref objMissing //RoutDocument ); objApp = null; } } /// <summary> /// 循環合並多個文件(復制合並重復的文件) /// </summary> /// <param name="tempDoc">模板文件</param> /// <param name="arrCopies">需要合並的文件</param> /// <param name="outDoc">合並后的輸出文件</param> public void CopyMerge(string tempDoc, string strCopyFolder, string outDoc) { string[] arrFiles = Directory.GetFiles(strCopyFolder); CopyMerge(tempDoc, arrFiles, outDoc); } #endregion #region 循環合並多個文件(插入合並文件) /// <summary> /// 循環合並多個文件(插入合並文件) /// </summary> /// <param name="tempDoc">模板文件</param> /// <param name="arrCopies">需要合並的文件</param> /// <param name="outDoc">合並后的輸出文件</param> public void InsertMerge(string tempDoc, List<string> arrCopies, string outDoc) { object objMissing = Missing.Value; object objFalse = false; object confirmConversion = false; object link = false; object attachment = false; try { //打開模板文件 Open(tempDoc); foreach (string strCopy in arrCopies) { objApp.Selection.InsertFile( strCopy, ref objMissing, ref confirmConversion, ref link, ref attachment ); } //保存到輸出文件 SaveAs(outDoc); foreach (Document objDocument in objApp.Documents) { objDocument.Close( ref objFalse, //SaveChanges ref objMissing, //OriginalFormat ref objMissing //RouteDocument ); } } finally { objApp.Quit( ref objMissing, //SaveChanges ref objMissing, //OriginalFormat ref objMissing //RoutDocument ); objApp = null; } } /// <summary> /// 循環合並多個文件(插入合並文件) /// </summary> /// <param name="tempDoc">模板文件</param> /// <param name="arrCopies">需要合並的文件</param> /// <param name="outDoc">合並后的輸出文件</param> public void InsertMerge(string tempDoc, string strCopyFolder, string outDoc) { string[] arrFiles = Directory.GetFiles(strCopyFolder); List<string> files = new List<string>(); for (int i = 0; i < arrFiles.Count(); i++) { if (arrFiles[i].Contains("doc")) { files.Add(arrFiles[i]); } } InsertMerge(tempDoc, files, outDoc); } #endregion #region 合並文件夾下的所有txt文件 /// <summary> /// 合並多個txt文件 /// </summary> /// <param name="infileName">文件存在的路勁</param> /// <param name="outfileName">輸出文件名稱</param> public void CombineFile(string filePath, string outfileName) { string[] infileName = Directory.GetFiles(filePath, "*.txt"); int b; int n = infileName.Length; FileStream[] fileIn = new FileStream[n]; using (FileStream fileOut = new FileStream(outfileName, FileMode.Create)) { for (int i = 0; i < n; i++) { try { fileIn[i] = new FileStream(infileName[i], FileMode.Open); while ((b = fileIn[i].ReadByte()) != -1) fileOut.WriteByte((byte)b); } catch (System.Exception ex) { Console.WriteLine(ex.Message); } finally { fileIn[i].Close(); } } } } #endregion }
使用這個類,我們可以把一個目錄下的所有word文檔合並成一個,也可以把一個目錄下的所有txt文件合並成一個。