Word文档转换为图片

本文转载自查看原文 2021-09-02 11:38 126 JAVA/ word

实现方式是通过word转pdf然后在转换图片

用到的依赖包

 compile "com.documents4j:documents4j-local:1.0.3"
 compile "com.documents4j:documents4j-transformer-msoffice-word:1.0.3"

实现

package com.viewhigh.epro.svc.util;

import com.documents4j.api.DocumentType;
import com.documents4j.api.IConverter;
import com.documents4j.job.LocalConverter;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
 * @author qiao
 * @version 1.0
 * @date 2021/8/30 9:43
 */
@Slf4j
public class WordUtil {

    /**
     * word转pdf
     *
     * @param wordInputStream word输入流
     * @return pdf字节数据
     */
    public static byte[] wordToPdf(InputStream wordInputStream) {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        IConverter converter = LocalConverter.builder().build();
        converter.convert(wordInputStream).as(DocumentType.DOC).to(byteArrayOutputStream).as(DocumentType.PDF).execute();
        return byteArrayOutputStream.toByteArray();
    }

    /**
     * pdf转图片
     *
     * @param pdfData pdf字节数据
     * @param scale   图片比例(A scale of 1 will render at 72 DPI.)
     *                根据需求自行调整，数值过大转换会慢
     * @return 转换后图片数据的字节数组
     */
    @SneakyThrows
    public static List<byte[]> pdfToImage(byte[] pdfData, float scale, boolean isMultiply, Boolean isHorizontal) {
        try (PDDocument doc = PDDocument.load(pdfData)) {
            PDFRenderer renderer = new PDFRenderer(doc);
            int pageCount = doc.getNumberOfPages();
            List<BufferedImage> bufferedImageList = new ArrayList<>();
            List<byte[]> retList = new ArrayList<>();
            for (int i = 0; i < pageCount; i++) {
                // 第二个参数越大生成图片分辨率越高，转换时间也就越长
                BufferedImage image = renderer.renderImage(i, scale);
                bufferedImageList.add(image);

            }
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            if (isMultiply) {
                retList = bufferedImageList.stream().map(item -> {
                    byte[] retBytes = new byte[0];
                    try {
                        ImageIO.write(item, "PNG", byteArrayOutputStream);
                        retBytes = byteArrayOutputStream.toByteArray();
                        byteArrayOutputStream.reset();
                    } catch (IOException e) {
                        log.error("读取单个图片流失败", e);
                    }
                    return retBytes;
                }).collect(Collectors.toList());
            } else {
                //合成图片
                ImageIO.write(mergeImage(isHorizontal, bufferedImageList.toArray(new BufferedImage[0])), "PNG", byteArrayOutputStream);
                retList.add(byteArrayOutputStream.toByteArray());
            }

            return retList;
        }
    }

    /**
     * word文档转换为多张图（每页一张）
     * 说明：如果word文档过大可能会有oom风险，请自行限制大小
     * @param wordInputStream word文档输入流
     * @param scale 图片比例(A scale of 1 will render at 72 DPI.)
     * @return 转换后图片数据的字节数组
     */
    public static List<byte[]> wordToMultipleImage(InputStream wordInputStream, float scale) {
        return pdfToImage(wordToPdf(wordInputStream), scale, true, null);
    }

    /**
     * word文档转为单张图
     * 说明：如果word文档过大可能会有oom风险，请自行限制大小
     * @param wordInputStream word文档输入流
     * @param scale 图片比例(A scale of 1 will render at 72 DPI.)
     * @param isHorizontal true-水平 false-垂直
     * @return 图片数据字节数组
     */
    public static byte[] wordToMergeImage(InputStream wordInputStream, float scale, Boolean isHorizontal) {
        return pdfToImage(wordToPdf(wordInputStream), scale, false, isHorizontal)
                .stream()
                .findFirst()
                .orElse(null);
    }

    /**
     * 合成多个图片
     *
     * @param isHorizontal   true-水平 false-垂直
     * @param bufferedImages 多张图片buffer
     * @return 合成后图片buffer
     */
    private static BufferedImage mergeImage(Boolean isHorizontal, BufferedImage... bufferedImages) {
        if (isHorizontal == null) {
            isHorizontal = true;
        }
        // 生成新图片
        BufferedImage destImage;
        // 计算新图片的长和高
        int allw = 0, allh = 0, maxallw = 0, maxallh = 0;
        // 获取总长、总宽、最长、最宽
        for (BufferedImage img : bufferedImages) {
            allw += img.getWidth();
            allh += img.getHeight();
            if (img.getWidth() > maxallw) {
                maxallw = img.getWidth();
            }
            if (img.getHeight() > maxallh) {
                maxallh = img.getHeight();
            }
        }
        // 创建新图片
        if (isHorizontal) {
            destImage = new BufferedImage(allw, maxallh, BufferedImage.TYPE_INT_RGB);
        } else {
            destImage = new BufferedImage(maxallw, allh, BufferedImage.TYPE_INT_RGB);
        }
        // 合并所有子图片到新图片
        int wx = 0, wy = 0;
        for (BufferedImage img : bufferedImages) {
            int w1 = img.getWidth();
            int h1 = img.getHeight();
            // 从图片中读取RGB
            int[] ImageArrayOne = new int[w1 * h1];
            ImageArrayOne = img.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行扫描图像中各个像素的RGB到数组中
            if (isHorizontal) { // 水平方向合并
                destImage.setRGB(wx, 0, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
            } else { // 垂直方向合并
                destImage.setRGB(0, wy, w1, h1, ImageArrayOne, 0, w1); // 设置上半部分或左半部分的RGB
            }
            wx += w1;
            wy += h1;
        }
        return destImage;
    }


    public static void main(String[] args) {
        File inputWord = new File("f:/b.doc");
        InputStream docxInputStream = null;
        try {
            //合成图
//            docxInputStream = new FileInputStream(inputWord);
//            ImageIO.write(ImageIO.read(new ByteArrayInputStream(wordToMergeImage(docxInputStream, 1.25f, false))), "PNG", new File("F:/HHH.PNG"));

            //多图
            docxInputStream = new FileInputStream(inputWord);
            for (byte[] data : wordToMultipleImage(docxInputStream, 1.25f)) {
                ImageIO.write(ImageIO.read(new ByteArrayInputStream(data)), "PNG", new File("F:/png/" + System.currentTimeMillis() + ".PNG"));
            }

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (docxInputStream != null) {
                try {
                    docxInputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

免责声明！

本站转载的文章为个人学习借鉴使用，本站对版权不负任何法律责任。如果侵犯了您的隐私权益，请联系本站邮箱yoyou2525@163.com删除。

猜您在找 图片转换为word公式与word公式转换为latex 将html转换为word文档的几种方式利用pandoc将markdown转换为word文档使用Aspose.Words将Word文档转换为Tiff格式图片文件将PDF文档转换为图片的方法 c#把word文档转换为html页面 Java 使用 jacob 将 word 文档转换为 pdf 文件使用phpword读取word文档，并转换为HTML 将word文件转换为富文本编辑支持图片 Word转换为markdown