java把Word文件转成html的字符串返回出去


1、需求是把前端上传的word文件解析出来,生成html的字符串返回给前端去展示,Word里面的图片可以忽略不显示,所以这段代码去掉了解析图片的代码

package com.lieni.core.util; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.springframework.web.multipart.MultipartFile; import org.w3c.dom.Document; import com.itextpdf.text.log.Logger; import com.itextpdf.text.log.LoggerFactory; /** * Created by LTmei on 2018/10/10 10:00 */
public class Word2HtmlUtil { /** * logger */
    private static final Logger logger = LoggerFactory.getLogger(Word2HtmlUtil.class); public static String Word2007ToHtml(MultipartFile file) throws IOException { if (file.isEmpty() || file.getSize() <= 0) { logger.error("Sorry File does not Exists!"); return null; } else { if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) { // 1) 加载word文档生成 XWPFDocument对象
                InputStream in = file.getInputStream(); XWPFDocument document = new XWPFDocument(in); // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(document, baos, null); String content = baos.toString(); baos.close(); return content; } else { logger.error("Enter only MS Office 2007+ files"); return null; } } } public static String Word2003ToHtml(MultipartFile file) throws IOException, ParserConfigurationException, TransformerException { if (file.isEmpty() || file.getSize() <= 0) { logger.error("Sorry File does not Exists!"); return null; } else { if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) { InputStream input = file.getInputStream(); HWPFDocument wordDocument = new HWPFDocument(input); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); // 解析word文档
 wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(baos); TransformerFactory factory = TransformerFactory.newInstance(); Transformer serializer = factory.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); // 也可以使用字符数组流获取解析的内容
                String content = new String(baos.toByteArray()); baos.close(); return content; } else { logger.error("Enter only MS Office 2003 files"); return null; } } } }

 


免责声明!

本站转载的文章为个人学习借鉴使用,本站对版权不负任何法律责任。如果侵犯了您的隐私权益,请联系本站邮箱yoyou2525@163.com删除。



 
粤ICP备18138465号  © 2018-2025 CODEPRJ.COM