NetCore中帶圖片的word轉html (NPOI.Word2Html)


NetCore的word轉html很少見,特別是帶圖片轉換為html的

查了很多資料做了些嘗試

DocX 免費的不支持NetCore了

后來在nuget上查找發現DotNetCore.NPOI可以引用(后來發現NPOI其實就可以了)

然后就開始嘗試

獲取圖片有點坑 踩了好大一會 找了好久才找到對應關系 然后就是 兩張一樣的圖片只會顯示一張 

 

github地址如下 如果有幫助 麻煩幫忙Star一下謝謝

https://github.com/ToolsByXLG/NPOI.Word2Html

 

首先nuget引用 NPOI

Install-Package NPOI

 

理論上如果不是netcore的話 直接引用NPOI就好

可能不太完善 可以稍微改進下

反正可以正常在centos上word轉成html了

 

FileStream stream = new FileStream("1.docx", FileMode.Open,FileAccess.Read);

更新了代碼,現在我項目中就是用的這個類,上傳阿里雲部分自己改改

using NPOI.OpenXmlFormats.Wordprocessing;
using NPOI.XWPF.UserModel;

 

 

  public class NpoiDocHelper 
    {
        private readonly IBinaryObjectManager _binaryObjectManager;


        public NpoiDocHelper(IBinaryObjectManager binaryObjectManager)
        {
            _binaryObjectManager = binaryObjectManager;
        }

        /// <summary>
        ///     Npoi處理Doc
        /// </summary>
        /// <param name="stream"></param>
        /// <param name="isImgUploadAliYun"></param>
        /// <returns></returns>
        public async Task<string> NpoiDoc(Stream stream, bool isImgUploadAliYun = false)
        {
            var myDocx = new XWPFDocument(stream); //打開07(.docx)以上的版本的文檔


            var picInfoList = await PicturesHandleAsync(myDocx, isImgUploadAliYun);

            var sb = new StringBuilder();

            foreach (var para in myDocx.BodyElements)
                switch (para.ElementType)
                {
                    case BodyElementType.PARAGRAPH:
                    {
                        var paragraph = (XWPFParagraph) para;
                        sb.Append(ParaGraphHandle(paragraph, picInfoList));

                        break;
                    }

                    case BodyElementType.TABLE:
                        var paraTable = (XWPFTable) para;
                        sb.Append(TableHandle(paraTable, picInfoList));
                        break;
                }


            return sb.Replace(" style=''", "").ToString();
        }

        /// <summary>
        ///     圖片處理
        /// </summary>
        /// <param name="myDocx"></param>
        /// <param name="isImgUploadAliYun">圖片是否上傳阿里雲</param>
        /// <returns></returns>
        public async Task<List<PicInfo>> PicturesHandleAsync(XWPFDocument myDocx, bool isImgUploadAliYun = false)
        {
            var picInfoList = new List<PicInfo>();
            var picturesList = myDocx.AllPictures;
            foreach (var pictures in picturesList)
            {
                var pData = pictures.Data;
                var picPackagePart = pictures.GetPackagePart();
                var picPackageRelationship = pictures.GetPackageRelationship();
                var picInfo = new PicInfo
                {
                    Id = picPackageRelationship.Id,
                    PicType = picPackagePart.ContentType
                };


                try
                {
                    if (isImgUploadAliYun)
                    {
                        //阿里雲上傳圖片方法
                        var url = await _binaryObjectManager.SaveAsync(new BinaryObject
                            {Bytes = pData, FileName = pictures.FileName, FileType = picInfo.PicType});
                        picInfo.Url = url;
                    }
                }
                catch (Exception)
                {
                    // ignored
                }

                if (string.IsNullOrWhiteSpace(picInfo.Url))
                    picInfo.Url = $"data:{picInfo.PicType};base64,{Convert.ToBase64String(pData)}";
                //先把pData傳阿里雲得到url  如果有其他方式傳改這里 或者轉base64

                picInfoList.Add(picInfo);
            }

            return picInfoList;
        }

        /// <summary>
        ///     word中的表格處理
        /// </summary>
        /// <param name="paraTable"></param>
        /// <param name="picInfoList"></param>
        /// <returns></returns>
        public StringBuilder TableHandle(XWPFTable paraTable, List<PicInfo> picInfoList)
        {
            var sb = new StringBuilder();

            var rows = paraTable.Rows;
            sb.Append("<table border='1' cellspacing='0'>");
            foreach (var row in rows)
            {
                var cells = row.GetTableCells();

                sb.Append(
                    "<tr style='");
                //var firstRowCell = cells[0];


                sb.Append(
                    "'>");


                foreach (var cell in cells)
                {
                    var cellCtTc = cell.GetCTTc();
                    var tcPr = cellCtTc.tcPr;


                    sb.Append("<td style='");

                    if (!string.IsNullOrWhiteSpace(tcPr.tcW?.w))
                        sb.Append($"width:{tcPr.tcW.w}px;");
                    if (!string.IsNullOrWhiteSpace(tcPr.shd?.fill))
                        sb.Append($"background-color: #{tcPr.shd.fill};");

                    sb.Append("'>");
                    var cellParagraphs = cell.Paragraphs;
                    foreach (var cellParagraph in cellParagraphs)
                        sb.Append(ParaGraphHandle(cellParagraph, picInfoList));

                    //sb.Append(cell.GetText());
                    sb.Append("</td>");
                }


                sb.Append("</tr>");
            }

            sb.Append("</table>");
            return sb;
        }

        /// <summary>
        ///     word文本對應處理
        /// </summary>
        /// <param name="ctr"></param>
        /// <returns></returns>
        public StringBuilder FontHandle(CT_R ctr)
        {
            var sb = new StringBuilder();

            #region 文本格式

            var textList = ctr.GetTList();
            foreach (var text in textList)
            {
                sb.Append(
                    "<span style='");
                if (!string.IsNullOrWhiteSpace(ctr.rPr?.color?.val))
                    sb.Append(
                        $"color:#{ctr.rPr.color.val};");
                if (!string.IsNullOrWhiteSpace(ctr.rPr?.highlight?.val.ToString()))
                    sb.Append(
                        $"background-color: {ctr.rPr.highlight.val};");
                if (ctr.rPr?.i?.val == true)
                    sb.Append(
                        "font-style:italic;");
                if (ctr.rPr?.b?.val == true)
                    sb.Append(
                        "font-weight:bold;");
                if (ctr.rPr?.sz != null)
                    sb.Append(
                        $"font-size:{ctr.rPr.sz.val}px;");
                if (!string.IsNullOrWhiteSpace(ctr.rPr?.rFonts?.ascii))
                    sb.Append(
                        $"font-family:{ctr.rPr.rFonts.ascii};");

                sb.Append(
                    "'>");

                sb.Append(text.Value);
                sb.Append("</span>");
            }

            #endregion

            return sb;
        }

        /// <summary>
        ///     word圖片對應處理
        /// </summary>
        /// <param name="ctr"></param>
        /// <param name="picInfoList"></param>
        /// <returns></returns>
        public StringBuilder DrawingHandle(CT_R ctr, List<PicInfo> picInfoList)
        {
            var sb = new StringBuilder();
            var drawingList = ctr.GetDrawingList();
            foreach (var drawing in drawingList)
            {
                var a = drawing.GetInlineList();
                foreach (var a1 in a)
                {
                    var anyList = a1.graphic.graphicData.Any;

                    foreach (var any1 in anyList)
                    {
                        var pictures = picInfoList
                            .FirstOrDefault(x =>
                                any1.IndexOf("a:blip r:embed=\"" + x.Id + "\"", StringComparison.Ordinal) > -1);
                        if (pictures != null && !string.IsNullOrWhiteSpace(pictures.Url))
                            sb.Append($@"<img src='{pictures.Url}' />");
                    }
                }
            }

            return sb;
        }

        /// <summary>
        ///     word行處理為P標簽
        /// </summary>
        /// <param name="paragraph"></param>
        /// <returns></returns>
        public StringBuilder TagPHandle(XWPFParagraph paragraph)
        {
            var sb = new StringBuilder();
            sb.Append("<p style='");

            try
            {
                //左右對齊

                var fontAlignment = paragraph.FontAlignment;
                string fontAlignmentName;
                switch (fontAlignment)
                {
                    case 0:
                        fontAlignmentName = "auto";
                        break;
                    case 1:
                        fontAlignmentName = "left";
                        break;
                    case 2:
                        fontAlignmentName = "center";
                        break;
                    case 3:
                        fontAlignmentName = "right";
                        break;
                    default:
                        fontAlignmentName = "auto";
                        break;
                }
                //自動和左對齊不需樣式
                if (fontAlignment > 1) sb.Append($"text-align:{fontAlignmentName};");


                var em = paragraph.IndentationFirstLine / 240;

                if (em > 0) sb.Append($"text-indent:{em}em;");
            }
            catch (Exception)
            {
                // ignored
            }

            sb.Append("'>");
            return sb;
        }

        /// <summary>
        ///     word文檔對應行內容處理
        /// </summary>
        /// <param name="paragraph"></param>
        /// <param name="picInfoList"></param>
        /// <returns></returns>
        public StringBuilder ParaGraphHandle(XWPFParagraph paragraph, List<PicInfo> picInfoList)
        {
            var sb = new StringBuilder();

            #region P標簽

            sb.Append(TagPHandle(paragraph));

            #endregion


            var runs = paragraph.Runs;
            foreach (var run in runs)
            {
                var ctr = run.GetCTR();

                #region 圖片格式

                sb.Append(DrawingHandle(ctr, picInfoList));

                #endregion

                #region 文本格式

                sb.Append(FontHandle(ctr));

                #endregion
            }

            sb.Append("</p>");
            return sb;
        }

        public class PicInfo
        {
            /// <summary>
            ///     圖片編號
            /// </summary>
            public string Id { get; set; }

            /// <summary>
            ///     圖片類型
            /// </summary>
            public string PicType { get; set; }

            /// <summary>
            ///     上傳地址/或者Base64
            /// </summary>
            public string Url { get; set; }
        }
    }

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM