NetCore的word轉html很少見,特別是帶圖片轉換為html的
查了很多資料做了些嘗試
DocX 免費的不支持NetCore了
后來在nuget上查找發現DotNetCore.NPOI可以引用(后來發現NPOI其實就可以了)
然后就開始嘗試
獲取圖片有點坑 踩了好大一會 找了好久才找到對應關系 然后就是 兩張一樣的圖片只會顯示一張
github地址如下 如果有幫助 麻煩幫忙Star一下謝謝
https://github.com/ToolsByXLG/NPOI.Word2Html
首先nuget引用 NPOI
Install-Package NPOI
理論上如果不是netcore的話 直接引用NPOI就好
可能不太完善 可以稍微改進下
反正可以正常在centos上word轉成html了
FileStream stream = new FileStream("1.docx", FileMode.Open,FileAccess.Read);
更新了代碼,現在我項目中就是用的這個類,上傳阿里雲部分自己改改
using NPOI.OpenXmlFormats.Wordprocessing; using NPOI.XWPF.UserModel;
public class NpoiDocHelper { private readonly IBinaryObjectManager _binaryObjectManager; public NpoiDocHelper(IBinaryObjectManager binaryObjectManager) { _binaryObjectManager = binaryObjectManager; } /// <summary> /// Npoi處理Doc /// </summary> /// <param name="stream"></param> /// <param name="isImgUploadAliYun"></param> /// <returns></returns> public async Task<string> NpoiDoc(Stream stream, bool isImgUploadAliYun = false) { var myDocx = new XWPFDocument(stream); //打開07(.docx)以上的版本的文檔 var picInfoList = await PicturesHandleAsync(myDocx, isImgUploadAliYun); var sb = new StringBuilder(); foreach (var para in myDocx.BodyElements) switch (para.ElementType) { case BodyElementType.PARAGRAPH: { var paragraph = (XWPFParagraph) para; sb.Append(ParaGraphHandle(paragraph, picInfoList)); break; } case BodyElementType.TABLE: var paraTable = (XWPFTable) para; sb.Append(TableHandle(paraTable, picInfoList)); break; } return sb.Replace(" style=''", "").ToString(); } /// <summary> /// 圖片處理 /// </summary> /// <param name="myDocx"></param> /// <param name="isImgUploadAliYun">圖片是否上傳阿里雲</param> /// <returns></returns> public async Task<List<PicInfo>> PicturesHandleAsync(XWPFDocument myDocx, bool isImgUploadAliYun = false) { var picInfoList = new List<PicInfo>(); var picturesList = myDocx.AllPictures; foreach (var pictures in picturesList) { var pData = pictures.Data; var picPackagePart = pictures.GetPackagePart(); var picPackageRelationship = pictures.GetPackageRelationship(); var picInfo = new PicInfo { Id = picPackageRelationship.Id, PicType = picPackagePart.ContentType }; try { if (isImgUploadAliYun) { //阿里雲上傳圖片方法 var url = await _binaryObjectManager.SaveAsync(new BinaryObject {Bytes = pData, FileName = pictures.FileName, FileType = picInfo.PicType}); picInfo.Url = url; } } catch (Exception) { // ignored } if (string.IsNullOrWhiteSpace(picInfo.Url)) picInfo.Url = $"data:{picInfo.PicType};base64,{Convert.ToBase64String(pData)}"; //先把pData傳阿里雲得到url 如果有其他方式傳改這里 或者轉base64 picInfoList.Add(picInfo); } return picInfoList; } /// <summary> /// word中的表格處理 /// </summary> /// <param name="paraTable"></param> /// <param name="picInfoList"></param> /// <returns></returns> public StringBuilder TableHandle(XWPFTable paraTable, List<PicInfo> picInfoList) { var sb = new StringBuilder(); var rows = paraTable.Rows; sb.Append("<table border='1' cellspacing='0'>"); foreach (var row in rows) { var cells = row.GetTableCells(); sb.Append( "<tr style='"); //var firstRowCell = cells[0]; sb.Append( "'>"); foreach (var cell in cells) { var cellCtTc = cell.GetCTTc(); var tcPr = cellCtTc.tcPr; sb.Append("<td style='"); if (!string.IsNullOrWhiteSpace(tcPr.tcW?.w)) sb.Append($"width:{tcPr.tcW.w}px;"); if (!string.IsNullOrWhiteSpace(tcPr.shd?.fill)) sb.Append($"background-color: #{tcPr.shd.fill};"); sb.Append("'>"); var cellParagraphs = cell.Paragraphs; foreach (var cellParagraph in cellParagraphs) sb.Append(ParaGraphHandle(cellParagraph, picInfoList)); //sb.Append(cell.GetText()); sb.Append("</td>"); } sb.Append("</tr>"); } sb.Append("</table>"); return sb; } /// <summary> /// word文本對應處理 /// </summary> /// <param name="ctr"></param> /// <returns></returns> public StringBuilder FontHandle(CT_R ctr) { var sb = new StringBuilder(); #region 文本格式 var textList = ctr.GetTList(); foreach (var text in textList) { sb.Append( "<span style='"); if (!string.IsNullOrWhiteSpace(ctr.rPr?.color?.val)) sb.Append( $"color:#{ctr.rPr.color.val};"); if (!string.IsNullOrWhiteSpace(ctr.rPr?.highlight?.val.ToString())) sb.Append( $"background-color: {ctr.rPr.highlight.val};"); if (ctr.rPr?.i?.val == true) sb.Append( "font-style:italic;"); if (ctr.rPr?.b?.val == true) sb.Append( "font-weight:bold;"); if (ctr.rPr?.sz != null) sb.Append( $"font-size:{ctr.rPr.sz.val}px;"); if (!string.IsNullOrWhiteSpace(ctr.rPr?.rFonts?.ascii)) sb.Append( $"font-family:{ctr.rPr.rFonts.ascii};"); sb.Append( "'>"); sb.Append(text.Value); sb.Append("</span>"); } #endregion return sb; } /// <summary> /// word圖片對應處理 /// </summary> /// <param name="ctr"></param> /// <param name="picInfoList"></param> /// <returns></returns> public StringBuilder DrawingHandle(CT_R ctr, List<PicInfo> picInfoList) { var sb = new StringBuilder(); var drawingList = ctr.GetDrawingList(); foreach (var drawing in drawingList) { var a = drawing.GetInlineList(); foreach (var a1 in a) { var anyList = a1.graphic.graphicData.Any; foreach (var any1 in anyList) { var pictures = picInfoList .FirstOrDefault(x => any1.IndexOf("a:blip r:embed=\"" + x.Id + "\"", StringComparison.Ordinal) > -1); if (pictures != null && !string.IsNullOrWhiteSpace(pictures.Url)) sb.Append($@"<img src='{pictures.Url}' />"); } } } return sb; } /// <summary> /// word行處理為P標簽 /// </summary> /// <param name="paragraph"></param> /// <returns></returns> public StringBuilder TagPHandle(XWPFParagraph paragraph) { var sb = new StringBuilder(); sb.Append("<p style='"); try { //左右對齊 var fontAlignment = paragraph.FontAlignment; string fontAlignmentName; switch (fontAlignment) { case 0: fontAlignmentName = "auto"; break; case 1: fontAlignmentName = "left"; break; case 2: fontAlignmentName = "center"; break; case 3: fontAlignmentName = "right"; break; default: fontAlignmentName = "auto"; break; } //自動和左對齊不需樣式 if (fontAlignment > 1) sb.Append($"text-align:{fontAlignmentName};"); var em = paragraph.IndentationFirstLine / 240; if (em > 0) sb.Append($"text-indent:{em}em;"); } catch (Exception) { // ignored } sb.Append("'>"); return sb; } /// <summary> /// word文檔對應行內容處理 /// </summary> /// <param name="paragraph"></param> /// <param name="picInfoList"></param> /// <returns></returns> public StringBuilder ParaGraphHandle(XWPFParagraph paragraph, List<PicInfo> picInfoList) { var sb = new StringBuilder(); #region P標簽 sb.Append(TagPHandle(paragraph)); #endregion var runs = paragraph.Runs; foreach (var run in runs) { var ctr = run.GetCTR(); #region 圖片格式 sb.Append(DrawingHandle(ctr, picInfoList)); #endregion #region 文本格式 sb.Append(FontHandle(ctr)); #endregion } sb.Append("</p>"); return sb; } public class PicInfo { /// <summary> /// 圖片編號 /// </summary> public string Id { get; set; } /// <summary> /// 圖片類型 /// </summary> public string PicType { get; set; } /// <summary> /// 上傳地址/或者Base64 /// </summary> public string Url { get; set; } } }