1.添加需要的jar包:
<dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.document</artifactId> <version>2.0.1</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId> <version>2.0.1</version> </dependency>
2.來一個小demo吧。
對於該demo,描述幾個我覺得需要注意的點:
2.1:不知道有沒有小伙伴發生了jar包沖突的現象呢,可以考慮修改一下jar包版本號哦,基本上應該沒什么問題呢;
2.2:word文檔的后綴有.doc和.docx,需要知道轉換的方法不是一樣的。所以,對於不同的文檔,我們需要知道其文檔后綴是什么,才能進行下一步操作;
2.3:此demo,我選擇通過接口直接返回動態的html,當然,如果想生成一個靜態的html,可以自己修改輸出方式;
2.4:對於文檔中涉及到圖片如何轉化的問題,暫時選擇用base64轉碼到html中
2.5:最后:此demo中測試轉化的文檔,目前只測試了簡單的文本加圖片,所以可能有別的問題待發現並解決。
/** * 將word轉成html * * @param id * @return * @throws Exception */ @ApiOperation(value = "將word轉成html") @GetMapping(value = "/convertWordToHtml") public void convertWordToHtml(@RequestParam(required = true) String id, HttpServletResponse httpServletResponse) throws Exception { demoService.convertWordToHtml(id, httpServletResponse); }
//此處省略部分不重要的代碼哈,只需將需要轉化的文檔轉成inputStream。
InputStream inputStream = null;
OutputStream outputStream = httpServletResponse.getOutputStream();
/**
* 將 docx 轉成 html
*
* @param outputStream 輸出流
* @throws Exception
*/
public static void convertDocxFileToHtml(OutputStream outputStream) throws Exception {
//創建操作word的對象
XWPFDocument document = new XWPFDocument(inputStream);
XHTMLOptions options = XHTMLOptions.create();
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
//圖片用base64轉化
options.setImageManager(new Base64EmbedImgManager());
//轉化成HTML
XHTMLConverter.getInstance().convert(document, outputStream, options);
outputStream.flush();
outputStream.close();
inputStream.close();
}
/**
* 將 doc 轉成 html
*
* @param outputStream 輸出流
* @throws Exception
*/
public static void convertDocFileToHtml(OutputStream outputStream) throws Exception {
//ps:當inputStream!=null,而生成wordDocument報錯,請檢查文檔是否用office word保存的
HWPFDocument wordDocument = (HWPFDocument) WordToHtmlUtils.loadDoc(inputStream);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
);
//將圖片轉成base64的格式
PicturesManager pictureRunMapper = (bytes, pictureType, s, v, v1) -> "data:image/png;base64," + Base64.encodeBase64String(bytes);
wordToHtmlConverter.setPicturesManager(pictureRunMapper);
//解析word文檔
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
}