在解析EXCEL,JAVA 通常使用poi包下的 XSSFWorkbook 對象,但是,遇到海量數據(比如十幾萬條 或者 JVM堆設置了內存比較小時),就會拋出OOM異常,下面就放出解決方法。
直接上代碼:
package com.xxx.xxx.xxx.common.util; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFComment; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; /** * @author Jimmy Shan * @date 2021-04-09 * @desc 解析大數據量excel文件,避免OOM發生 */ @Component public class ExcelParserBigData { private static final Logger logger = LoggerFactory.getLogger(ExcelParserBigData.class); private ISheetContentHandler contentHandler = new DefaultSheetHandler(); //表格默認處理器 private List<String[]> datas = new ArrayList<String[]>(); //讀取數據 /** * @desc 轉換表格,默認為轉換第一個表格 */ public ExcelParserBigData parse(InputStream stream) throws InvalidFormatException, IOException, ParseException { return parse(stream, 1); } /** * @desc 解析方法 */ public synchronized ExcelParserBigData parse(InputStream stream, int sheetId) throws InvalidFormatException, IOException, ParseException { // 每次轉換前都清空數據 datas.clear(); // 打開表格文件輸入流 OPCPackage pkg = OPCPackage.open(stream); try { // 創建表閱讀器 XSSFReader reader; try { reader = new XSSFReader(pkg); } catch (OpenXML4JException e) { logger.error("讀取表格出錯"); throw new ParseException(e.fillInStackTrace()); } // 轉換指定單元表 InputStream shellStream = reader.getSheet("rId" + sheetId); try { InputSource sheetSource = new InputSource(shellStream); StylesTable styles = reader.getStylesTable(); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); getContentHandler().init(datas);// 設置讀取出的數據 // 獲取轉換器 XMLReader parser = getSheetParser(styles, strings); parser.parse(sheetSource); } catch (SAXException e) { logger.error("讀取表格出錯"); throw new ParseException(e.fillInStackTrace()); } finally { shellStream.close(); } } finally { pkg.close(); } return this; } /** * @desc 獲取表格讀取數據,獲取數據前,需要先轉換數據,此方法不會獲取第一行數據,表格讀取數據 */ public List<String[]> getDatas() { return getDatas(true); } /** * @desc 獲取表格讀取數據,獲取數據前,需要先轉換數據 */ public List<String[]> getDatas(boolean dropFirstRow) { if (dropFirstRow && datas.size() > 0) { datas.remove(0);// 刪除表頭 } return datas; } /** * @desc 獲取讀取表格的轉換器 */ protected XMLReader getSheetParser(StylesTable styles, ReadOnlySharedStringsTable strings) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader(); parser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, getContentHandler(), false)); return parser; } public ISheetContentHandler getContentHandler() { return contentHandler; } public void setContentHandler(ISheetContentHandler contentHandler) { this.contentHandler = contentHandler; } /** * @desc 表格轉換錯誤 */ public class ParseException extends Exception { private static final long serialVersionUID = -2451526411018517607L; public ParseException(Throwable t) { super("表格轉換錯誤", t); } } public interface ISheetContentHandler extends SheetContentsHandler { /** * @desc 設置轉換后的數據集,用於存放轉換結果 */ void init(List<String[]> datas); } /** * @desc 默認表格解析handder */ class DefaultSheetHandler implements ISheetContentHandler { /** * @desc 讀取數據 */ private List<String[]> datas; private int columsLength; private String[] readRow; private ArrayList<String> fristRow = new ArrayList<String>(); @Override public void init(List<String[]> datas) { this.datas = datas; //this.columsLength = columsLength; } @Override public void startRow(int rowNum) { if (rowNum != 0) { readRow = new String[columsLength]; } } @Override public void endRow(int rowNum) { //將Excel第一行表頭的列數當做數組的長度,要保證后續的行的列數不能超過這個長度,這是個約定。 if (rowNum == 0) { columsLength = fristRow.size(); readRow = fristRow.toArray(new String[fristRow.size()]); }else { readRow = fristRow.toArray(new String[columsLength]); } datas.add(readRow.clone()); readRow = null; fristRow.clear(); } @Override public void cell(String cellReference, String formattedValue, XSSFComment comment) { int index = getCellIndex(cellReference);//轉換A1,B1,C1等表格位置為真實索引位置 try { fristRow.set(index, formattedValue); } catch (IndexOutOfBoundsException e) { int size = fristRow.size(); for (int i = index - size+1;i>0;i--){ fristRow.add(null); } fristRow.set(index,formattedValue); } } @Override public void headerFooter(String text, boolean isHeader, String tagName) { } /** * @desc 轉換表格引用為列編號 */ public int getCellIndex(String cellReference) { String ref = cellReference.replaceAll("\\d+", ""); int num = 0; int result = 0; for (int i = 0; i < ref.length(); i++) { char ch = cellReference.charAt(ref.length() - i - 1); num = (int) (ch - 'A' + 1); num *= Math.pow(26, i); result += num; } return result - 1; } } }
調用方:
package com.xxx.xxx.xxx.service.impl;
import com.xxx.xxx.xxx.common.util.ExcelParserBigData;
import com.xxx.xxx.xxx.service.DemoBaseService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.FileInputStream;
import java.util.List;
/**
* @author Jimmy Shan
* @date 2021-04-07
* @desc 示例 服務實現
*/
@Service("demoBaseService")
public class DemoBaseServiceImpl implements DemoBaseService {
private static final Logger LOGGER = LoggerFactory.getLogger(DemoBaseServiceImpl.class);
@Autowired
private ExcelParserBigData excelParserBigData;
/**
* @desc 解析大數據量excel文件
*/
@Override
public void parseBigXlsx(String upPathName) {
long start = System.currentTimeMillis();
FileInputStream inf = null;
try {
inf = new FileInputStream(upPathName);
ExcelParserBigData parseBigData = excelParserBigData.parse(inf);
List<String[]> dataList = parseBigData.getDatas(); //這里的List泛型是String數組,解析后的數據是以 數組形式存放的。
for (int i = 0; i < dataList.size(); i++) {
String[] str = dataList.get(i);
LOGGER.info("第 {} 行", (i +1));
LOGGER.info("oppoId = {}, oppoName = {}", str[0], str[1]);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if(inf != null) {
inf.close();
}
LOGGER.info("解析總耗時:{} 毫秒", (System.currentTimeMillis() - start));
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
接口定義:
package com.xxx.xxx.xxx.service; /** * @author Jimmy Shan * @date 2021-04-07 * @desc 示例 服務 */ public interface DemoBaseService { /** * @desc 解析大數據量excel文件 */ void parseBigXlsx(String upPathName); }
OK,記錄到此。