Java解析excel文件大數據量時,報OOM異常問題。


在解析EXCEL,JAVA 通常使用poi包下的 XSSFWorkbook 對象,但是,遇到海量數據(比如十幾萬條 或者 JVM堆設置了內存比較小時),就會拋出OOM異常,下面就放出解決方法。

直接上代碼:

package com.xxx.xxx.xxx.common.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * @author Jimmy Shan
 * @date 2021-04-09
 * @desc 解析大數據量excel文件,避免OOM發生
 */
@Component
public class ExcelParserBigData {
    private static final Logger logger = LoggerFactory.getLogger(ExcelParserBigData.class);
    private ISheetContentHandler contentHandler = new DefaultSheetHandler(); //表格默認處理器
    private List<String[]> datas = new ArrayList<String[]>(); //讀取數據

    /**
     * @desc 轉換表格,默認為轉換第一個表格
     */
    public ExcelParserBigData parse(InputStream stream)
            throws InvalidFormatException, IOException, ParseException {
        return parse(stream, 1);
    }

    /**
     * @desc 解析方法
     */
    public synchronized ExcelParserBigData parse(InputStream stream, int sheetId)
            throws InvalidFormatException, IOException, ParseException {
        // 每次轉換前都清空數據
        datas.clear();
        // 打開表格文件輸入流
        OPCPackage pkg = OPCPackage.open(stream);
        try {
            // 創建表閱讀器
            XSSFReader reader;
            try {
                reader = new XSSFReader(pkg);
            } catch (OpenXML4JException e) {
                logger.error("讀取表格出錯");
                throw new ParseException(e.fillInStackTrace());
            }

            // 轉換指定單元表
            InputStream shellStream = reader.getSheet("rId" + sheetId);
            try {
                InputSource sheetSource = new InputSource(shellStream);
                StylesTable styles = reader.getStylesTable();
                ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
                getContentHandler().init(datas);// 設置讀取出的數據
                // 獲取轉換器
                XMLReader parser = getSheetParser(styles, strings);
                parser.parse(sheetSource);
            } catch (SAXException e) {
                logger.error("讀取表格出錯");
                throw new ParseException(e.fillInStackTrace());
            } finally {
                shellStream.close();
            }
        } finally {
            pkg.close();
        }

        return this;
    }

    /**
     * @desc 獲取表格讀取數據,獲取數據前,需要先轉換數據,此方法不會獲取第一行數據,表格讀取數據
     */
    public List<String[]> getDatas() {
        return getDatas(true);
    }

    /**
     * @desc 獲取表格讀取數據,獲取數據前,需要先轉換數據
     */
    public List<String[]> getDatas(boolean dropFirstRow) {
        if (dropFirstRow && datas.size() > 0) {
            datas.remove(0);// 刪除表頭
        }

        return datas;
    }

    /**
     * @desc 獲取讀取表格的轉換器
     */
    protected XMLReader getSheetParser(StylesTable styles, ReadOnlySharedStringsTable strings) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader();
        parser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, getContentHandler(), false));

        return parser;
    }

    public ISheetContentHandler getContentHandler() {
        return contentHandler;
    }

    public void setContentHandler(ISheetContentHandler contentHandler) {
        this.contentHandler = contentHandler;
    }

    /**
     * @desc 表格轉換錯誤
     */
    public class ParseException extends Exception {
        private static final long serialVersionUID = -2451526411018517607L;

        public ParseException(Throwable t) {
            super("表格轉換錯誤", t);
        }
    }

    public interface ISheetContentHandler extends SheetContentsHandler {
        /**
         * @desc 設置轉換后的數據集,用於存放轉換結果
         */
        void init(List<String[]> datas);
    }

    /**
     * @desc 默認表格解析handder
     */
    class DefaultSheetHandler implements ISheetContentHandler {
        /**
         * @desc 讀取數據
         */
        private List<String[]> datas;
        private int columsLength;
        private String[] readRow;
        private ArrayList<String> fristRow = new ArrayList<String>();

        @Override
        public void init(List<String[]> datas) {
            this.datas = datas;
            //this.columsLength = columsLength;
        }

        @Override
        public void startRow(int rowNum) {
            if (rowNum != 0) {
                readRow = new String[columsLength];
            }
        }

        @Override
        public void endRow(int rowNum) {
            //將Excel第一行表頭的列數當做數組的長度,要保證后續的行的列數不能超過這個長度,這是個約定。
            if (rowNum == 0) {
                columsLength = fristRow.size();
                readRow = fristRow.toArray(new String[fristRow.size()]);
            }else {
                readRow = fristRow.toArray(new String[columsLength]);
            }
            datas.add(readRow.clone());
            readRow = null;
            fristRow.clear();
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            int index = getCellIndex(cellReference);//轉換A1,B1,C1等表格位置為真實索引位置
            try {
                fristRow.set(index, formattedValue);
            } catch (IndexOutOfBoundsException e) {
                int size = fristRow.size();
                for (int i = index - size+1;i>0;i--){
                    fristRow.add(null);
                }
                fristRow.set(index,formattedValue);
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
        }

        /**
         * @desc 轉換表格引用為列編號
         */
        public int getCellIndex(String cellReference) {
            String ref = cellReference.replaceAll("\\d+", "");
            int num = 0;
            int result = 0;
            for (int i = 0; i < ref.length(); i++) {
                char ch = cellReference.charAt(ref.length() - i - 1);
                num = (int) (ch - 'A' + 1);
                num *= Math.pow(26, i);
                result += num;
            }
            return result - 1;
        }
    }
}

 

調用方:

package com.xxx.xxx.xxx.service.impl;

import com.xxx.xxx.xxx.common.util.ExcelParserBigData;
import com.xxx.xxx.xxx.service.DemoBaseService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.FileInputStream;
import java.util.List;

/**
 * @author Jimmy Shan
 * @date 2021-04-07
 * @desc 示例 服務實現
 */
@Service("demoBaseService")
public class DemoBaseServiceImpl implements DemoBaseService {
    private static final Logger LOGGER = LoggerFactory.getLogger(DemoBaseServiceImpl.class);
    @Autowired
    private ExcelParserBigData excelParserBigData;

    /**
     * @desc 解析大數據量excel文件
     */
    @Override
    public void parseBigXlsx(String upPathName) {
        long start = System.currentTimeMillis();
        FileInputStream inf = null;
        try {
            inf = new FileInputStream(upPathName);
            ExcelParserBigData parseBigData = excelParserBigData.parse(inf);
            List<String[]> dataList = parseBigData.getDatas(); //這里的List泛型是String數組,解析后的數據是以 數組形式存放的。
            for (int i = 0; i < dataList.size(); i++) {
                String[] str = dataList.get(i);
                LOGGER.info("第 {} 行", (i +1));
                LOGGER.info("oppoId = {}, oppoName = {}", str[0], str[1]);
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if(inf != null) {
                    inf.close();
                }
                LOGGER.info("解析總耗時:{} 毫秒", (System.currentTimeMillis() - start));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}

 

接口定義:

package com.xxx.xxx.xxx.service;

/**
 * @author Jimmy Shan
 * @date 2021-04-07
 * @desc 示例 服務
 */
public interface DemoBaseService {
    /**
     * @desc 解析大數據量excel文件
     */
    void parseBigXlsx(String upPathName);
}

 

OK,記錄到此。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM