poi 通過驅動模式(SAX)解析 防止讀取大數據excel文件 導致的內存溢出


  前幾天項目中遇到了導入大數據的excel表格導致jvm內存溢出的情況,現記錄下解決方案。

poi 解析文件分為用戶模式和驅動模式解析,用戶模式無非就是new HSSFWorkbook(is),直接通過文件流讀取,這種方式占用的內存比較大,大數據不建議采用此方式解析。

驅動模式:將文件解析成csv格式的數據,也是跟用戶模式一樣分03版和07版的excel進行解析,下面直接貼代碼,后面的main方法為測試方法。

XLS2CSV.java

  

/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.ExtendedFormatRecord;
import org.apache.poi.hssf.record.FontRecord;
import org.apache.poi.hssf.record.FormatRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

/**
 * 用sax解析xls 格式文檔 轉成csv格式
 * 
 *
 */
public class XLS2CSV implements HSSFListener {
    private int minColumns;
    private POIFSFileSystem fs;
    private PrintStream output;
    
    
    public List<ArrayList<String>> getData() {
        return data;
    }

    // 當前行
    private int curRow = 0;

    // 存儲行記錄的容器
    private List<String> rowlist = new ArrayList<String>();
    
    //樣式記錄容器
    private List<String> rowType = new ArrayList<String>();
    @SuppressWarnings("unchecked")
//    private ArrayList boundSheetRecords = new ArrayList();
    
    private String sheetName;
  //Excel數據
    private List<ArrayList<String>> data = new ArrayList<ArrayList<String>>();

    private int lastRowNumber;
    private int lastColumnNumber;

     private int currentSheetChildPage = 1;
    /** Should we output the formula, or the value it has? */
    private boolean outputFormulaValues = true;

    /** For parsing Formulas */
    private SheetRecordCollectingListener workbookBuildingListener;
    private HSSFWorkbook stubWorkbook;

    // Records we pick up as we process
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;
    
    /** So we known which sheet we're on */
    private int sheetIndex = -1;
    private BoundSheetRecord[] orderedBSRs;
    private List boundSheetRecords = new ArrayList<>();

    // For handling formulas with string results
    private int nextRow;
    private int nextColumn;
    private boolean outputNextStringRecord;

    /**
     * Creates a new XLS -> CSV converter
     * @param fs The POIFSFileSystem to process
     * @param output The PrintStream to output the CSV to
     * @param minColumns The minimum number of columns to output, or -1 for no minimum
     */
    public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
        this.fs = fs;
        this.output = output;
        this.minColumns = minColumns;
    }

    /**
     * Creates a new XLS -> CSV converter
     * @param filename The file to process
     * @param minColumns The minimum number of columns to output, or -1 for no minimum
     */
    public XLS2CSV(InputStream is, int minColumns) throws IOException, FileNotFoundException {
        this(
                new POIFSFileSystem(is),
                System.out, minColumns
        );
    }

    /**
     * Initiates the processing of the XLS file to CSV
     */
    public void process() throws IOException {
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();

        if(outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }

        factory.processWorkbookEvents(request, fs);
    }

    /**
     * Main HSSFListener method, processes events, and outputs the
     *  CSV as the file is processed.
     */
    @Override
    public void processRecord(Record record) {
         int thisRow = -1;
            int thisColumn = -1;
            String thisStr = null;
            String value = null;
            
            
            switch (record.getSid()) {
            //---------add start---------
            case FontRecord.sid://字體記錄
                /*FontRecord font = (FontRecord) record;
                
                short boldWeight = font.getBoldWeight();
                short fontHeight = font.getFontHeight();
                short colorPaletteIndex = font.getColorPaletteIndex();
                cellStyle = "style='";index++;
                cellStyle += "font-weight:" + boldWeight + ";"; // 
                cellStyle += "font-size: " + fontHeight / 2 + "%;"; // 
    */            break;
            case FormatRecord.sid://單元格樣式記錄
                /*FormatRecord format = (FormatRecord) record;*/
                break;
            case ExtendedFormatRecord.sid://擴展單元格樣式記錄
                /*ExtendedFormatRecord extendedFormat = (ExtendedFormatRecord) record;
                short borderTop = extendedFormat.getBorderTop();
                short borderRight = extendedFormat.getBorderRight();
                short borderBottom = extendedFormat.getBorderBottom();
                short leftBorderPaletteIdx = extendedFormat.getLeftBorderPaletteIdx();
                
                short alignment = extendedFormat.getAlignment();
                short verticalAlignment = extendedFormat.getVerticalAlignment();
                
                index++;
                alignStyle = "align='" + convertAlignToHtml(alignment) + "' ";
                alignStyle += "valign='" + convertVerticalAlignToHtml(verticalAlignment) + "' ";// 
                
                StringBuffer sb = new StringBuffer();
                sb.append(getBorderStyle(0, borderTop));
                sb.append(getBorderStyle(1, borderRight));
                sb.append(getBorderStyle(2, borderBottom));
                sb.append(getBorderStyle(3, leftBorderPaletteIdx));
                cellStyle += sb.toString();*/
                break;
            //---------add end---------
            case BoundSheetRecord.sid://遍歷所有boundSheetRecord,每個sheet對應一個boundSheetRecord
                boundSheetRecords.add(record);
                break;
            case BOFRecord.sid://type=5為workbook的開始
                BOFRecord br = (BOFRecord) record;
                if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                    // 如果有需要,則建立子工作薄
                    if (workbookBuildingListener != null && stubWorkbook == null) {
                        stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
                    }

                    sheetIndex++;
                    if (orderedBSRs == null) {
                        orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
                    }
                    sheetName = orderedBSRs[sheetIndex].getSheetname();
                    /*if(currentSheetIndex!=-1 && sheetIndex > currentSheetIndex){
                        if(data.size()>0){
                            String writeSheetName = orderedBSRs[sheetIndex-1].getSheetname();
                            String sheetDir = dirPath + "/" + writeSheetName;
                            String htmlPath = sheetDir + "/" + fileName.substring(0, fileName.lastIndexOf(".")) + "_"
                                    + writeSheetName + "_" + currentSheetChildPage + ".html";
                            writeHtml(writeSheetName, htmlPath);
                            data.clear();
                            currentSheetChildPage=1;
                        }
                    }
                    currentSheetIndex = sheetIndex;*/
                }
                break;
                
            case EOFRecord.sid:
                /*if(sheetIndex!=-1){
                    if(data.size()>0){
                        String sheetDir = dirPath + "/_a"+ (sheetIndex+1) + "-" + sheetName;
                        String htmlPath = sheetDir + "/" + fileName.substring(0, fileName.lastIndexOf(".")) + "_"
                                + sheetName + "_" + currentSheetChildPage + ".html";
                        boolean writeHtml = writeHtml(orderedBSRs[sheetIndex].getSheetname(), htmlPath);
                        data.clear();
                        if(writeHtml) currentSheetChildPage++;
                    }
                }*/
                currentSheetChildPage=1;
                break;
            case SSTRecord.sid://存儲了xls所有文本單元格值,通過索引獲取
                sstRecord = (SSTRecord) record;
                break;

            case BlankRecord.sid:
                BlankRecord brec = (BlankRecord) record;
                thisRow = brec.getRow();
                thisColumn = brec.getColumn();
                thisStr = "";
                rowlist.add(thisColumn, thisStr);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            case BoolErrRecord.sid: // 單元格為布爾類型
                BoolErrRecord berec = (BoolErrRecord) record;
                thisRow = berec.getRow();
                thisColumn = berec.getColumn();
                thisStr = berec.getBooleanValue() + "";
                rowlist.add(thisColumn, thisStr);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;

            case FormulaRecord.sid: // 單元格為公式類型
                FormulaRecord frec = (FormulaRecord) record;
                thisRow = frec.getRow();
                thisColumn = frec.getColumn();
                if (outputFormulaValues) {
                    if (Double.isNaN(frec.getValue())) {
                        // Formula result is a string
                        // This is stored in the next record
                        outputNextStringRecord = true;
                        nextRow = frec.getRow();
                        nextColumn = frec.getColumn();
                    } else {
                        thisStr = formatListener.formatNumberDateCell(frec);
                    }
                } else {
                    thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
                }
                rowlist.add(thisColumn, thisStr);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            case StringRecord.sid:// 單元格中公式的字符串
                if (outputNextStringRecord) {
                    // String for formula
                    StringRecord srec = (StringRecord) record;
                    thisStr = srec.getString();
                    thisRow = nextRow;
                    thisColumn = nextColumn;
                    outputNextStringRecord = false;
                }
                break;
            case LabelRecord.sid:
                LabelRecord lrec = (LabelRecord) record;
                curRow = thisRow = lrec.getRow();
                thisColumn = lrec.getColumn();
                value = lrec.getValue().trim();
                value = value.equals("") ? " " : value;
                this.rowlist.add(thisColumn, value);
               //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            case LabelSSTRecord.sid: // 單元格為字符串類型
                LabelSSTRecord lsrec = (LabelSSTRecord) record;
                curRow = thisRow = lsrec.getRow();
                thisColumn = lsrec.getColumn();
                if (sstRecord == null) {
                    rowlist.add(thisColumn, " ");
                    //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                } else {
                    value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
                    value = value.equals("") ? " " : value;
                    rowlist.add(thisColumn, value);
                    //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                }
                break;
            case NumberRecord.sid: // 單元格為數字類型
                NumberRecord numrec = (NumberRecord) record;
                curRow = thisRow = numrec.getRow();
                thisColumn = numrec.getColumn();
                value = formatListener.formatNumberDateCell(numrec).trim();
                value = value.equals("") ? " " : value;
                // 向容器加入列值
                rowlist.add(thisColumn, value);
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
                break;
            default:
                break;
            }

            // 遇到新行的操作
            if (thisRow != -1 && thisRow != lastRowNumber) {
                lastColumnNumber = -1;
            }

            // 空值的操作
            if (record instanceof MissingCellDummyRecord) {
                MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
                curRow = thisRow = mc.getRow();
                thisColumn = mc.getColumn();
                rowlist.add(thisColumn, " ");
                //rowType.add(thisColumn,cellStyle + "' " + alignStyle);
            }

            // 更新行和列的值
            if (thisRow > -1)
                lastRowNumber = thisRow;
            if (thisColumn > -1)
                lastColumnNumber = thisColumn;

            // 行結束時的操作
            if (record instanceof LastCellOfRowDummyRecord) {
                if (minColumns > 0) {
                    // 列值重新置空
                    if (lastColumnNumber == -1) {
                        lastColumnNumber = 0;
                    }
                }
                lastColumnNumber = -1;

                // 每行結束時, 調用getRows() 方法(打印內容)
                //rowReader.getRows(sheetIndex, curRow, rowlist);
                
                ArrayList<String> list = new ArrayList<>();
                list.addAll(rowlist);
                data.add(list);
               /* if(data.size()==2000){
                    String sheetDir = dirPath + "/_a"+ (sheetIndex+1)+ "-" + sheetName;
                    String htmlPath = sheetDir + "/" + fileName.substring(0, fileName.lastIndexOf(".")) + "_"
                            + sheetName + "_" + currentSheetChildPage + ".html";
                    boolean writeHtml = writeHtml(orderedBSRs[sheetIndex].getSheetname(), htmlPath);
                    data.clear();
                    if(writeHtml) currentSheetChildPage++;
                }*/
                /*List<String> styleList = new ArrayList<>();
                styleList.addAll(rowType);
                styleData.add(styleList);
    */            
                // 清空容器
                rowlist.clear();
            }
    }

    public static void main(String[] args) throws Exception {


        XLS2CSV xls2csv = new XLS2CSV(new FileInputStream("數據xls.xls"), 20);
        xls2csv.process();
        List<ArrayList<String>> data2 = xls2csv.getData();
        for (ArrayList<String> arrayList : data2) {
            System.out.println(arrayList.toString());
        }
        
    }
}

 

XLSX2CSV.java

  


import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;  
  
/** 
 * A rudimentary XLSX -> CSV processor modeled on the 
 * POI sample program XLS2CSVmra from the package 
 * org.apache.poi.hssf.eventusermodel.examples. 
 * As with the HSSF version, this tries to spot missing 
 * rows and cells, and output empty entries for them. 
 * <p/> 
 * Data sheets are read using a SAX parser to keep the 
 * memory footprint relatively small, so this should be 
 * able to read enormous workbooks.  The styles table and 
 * the shared-string table must be kept in memory.  The 
 * standard POI styles table class is used, but a custom 
 * (read-only) class is used for the shared string table 
 * because the standard POI SharedStringsTable grows very 
 * quickly with the number of unique strings. 
 * <p/> 
 * For a more advanced implementation of SAX event parsing 
 * of XLSX files, see {@link XSSFEventBasedExcelExtractor} 
 * and {@link XSSFSheetXMLHandler}. Note that for many cases, 
 * it may be possible to simply use those with a custom 
 * {@link SheetContentsHandler} and no SAX code needed of 
 * your own! 
 */  
/**
 * 用sax解析xlsx 格式文檔 轉成csv格式
 * 
 *
 */
public class XLSX2CSV {  
    /** 
     * Uses the XSSF Event SAX helpers to do most of the work 
     * of parsing the Sheet XML, and outputs the contents 
     * as a (basic) CSV. 
     */  
    private class SheetToCSV implements SheetContentsHandler {  
        private boolean firstCellOfRow = false;  
        private int currentRow = -1;  
        private int currentCol = -1;  
  
        private void outputMissingRows(int number) {  
            for (int i = 0; i < number; i++) {  
            	curstr = new ArrayList<String>();
                for (int j = 0; j < minColumns; j++) {  
                	curstr.add("");  
                }  
                output.add(curstr);  
            }  
        }  
  
        @Override  
        public void startRow(int rowNum) {  
        	curstr = new ArrayList<String>();
            // If there were gaps, output the missing rows  
            outputMissingRows(rowNum - currentRow - 1);  
            // Prepare for this row  
            firstCellOfRow = true;  
            currentRow = rowNum;  
            currentCol = -1;  
        }  
  
        @Override  
        public void endRow(int rowNum) {  
            // Ensure the minimum number of columns  
            for (int i = currentCol; i < minColumns ; i++) {  
                curstr.add("");  
            }  
            output.add(curstr);  
        }  
  
        @Override  
        public void cell(String cellReference, String formattedValue,  
                         XSSFComment comment) {  
//            if (firstCellOfRow) {  
//                firstCellOfRow = false;  
//            } else {  
//                curstr.append(',');  
//            }  
  
            // gracefully handle missing CellRef here in a similar way as XSSFCell does  
            if (cellReference == null) {  
                cellReference = new CellAddress(currentRow, currentCol).formatAsString();  
            }  
  
            // Did we miss any cells?  
            int thisCol = (new CellReference(cellReference)).getCol();  
            int missedCols = thisCol - currentCol - 1;  
            for (int i = 0; i < missedCols; i++) {  
                curstr.add("");  
            }  
            currentCol = thisCol;  
  
            // Number or string?  
            try {  
                Double.parseDouble(formattedValue);  
                curstr.add(formattedValue);  
            } catch (NumberFormatException e) {  
               // output.append('"');  
            	curstr.add(formattedValue);  
             //   output.append('"');  
            }  
        }  
  
        @Override  
        public void headerFooter(String text, boolean isHeader, String tagName) {  
            // Skip, no headers or footers in CSV  
        }  
    }  
  
  
    ///////////////////////////////////////  
  
    private final OPCPackage xlsxPackage;  
  
    /** 
     * Number of columns to read starting with leftmost 
     */  
    private final int minColumns;  
  
    /** 
     * Destination for data 
     */  
    
    private List<ArrayList<String>> output;
    private ArrayList<String> curstr;
    
    public  List<ArrayList<String>> get_output(){
    	return output;
    }
    
    /** 
     * Creates a new XLSX -> CSV converter 
     * 
     * @param pkg        The XLSX package to process 
     * @param output     The PrintStream to output the CSV to 
     * @param minColumns The minimum number of columns to output, or -1 for no minimum 
     */  
    public XLSX2CSV(OPCPackage pkg, int minColumns) {  
        this.xlsxPackage = pkg;  
        this.minColumns = minColumns;  
    }  
    
  
    /** 
     * Parses and shows the content of one sheet 
     * using the specified styles and shared-strings tables. 
     * 
     * @param styles 
     * @param strings 
     * @param sheetInputStream 
     */  
    public void processSheet(  
            StylesTable styles,  
            ReadOnlySharedStringsTable strings,  
            SheetContentsHandler sheetHandler,  
            InputStream sheetInputStream)  
            throws IOException, ParserConfigurationException, SAXException {  
        DataFormatter formatter = new DataFormatter();  
        InputSource sheetSource = new InputSource(sheetInputStream);  
        try {  
            XMLReader sheetParser = SAXHelper.newXMLReader();  
            ContentHandler handler = new XSSFSheetXMLHandler(  
                    styles, null, strings, sheetHandler, formatter, false);  
            sheetParser.setContentHandler(handler);  
            sheetParser.parse(sheetSource);  
        } catch (ParserConfigurationException e) {  
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());  
        }  
    }  
  
    /** 
     * Initiates the processing of the XLS workbook file to CSV. 
     * 
     * @throws IOException 
     * @throws OpenXML4JException 
     * @throws ParserConfigurationException 
     * @throws SAXException 
     */  
    public void process()  
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {  
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);  
        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);  
        StylesTable styles = xssfReader.getStylesTable();  
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();  
//        int index = 0;  
        while (iter.hasNext()) {  
        	output = new ArrayList<ArrayList<String>> ();
            InputStream stream = iter.next();  
//            String sheetName = iter.getSheetName();  
//            System.out.println("正在讀取sheet: "+sheetName + " [index=" + index + "]:");  
            processSheet(styles, strings, new SheetToCSV(), stream);  
//            System.out.println("sheet 讀取完成!");
            stream.close();  
//            ++index;  
        }  
    }  
  
    
//    public static void main(String[] args) throws Exception {  
//      /*  if (args.length < 1) { 
//            System.err.println("Use:"); 
//            System.err.println("  XLSX2CSV <xlsx file> [min columns]"); 
//            return; 
//        }*/  
//  
//        File xlsxFile = new File("F:\\8月數據.xlsx");  
//        if (!xlsxFile.exists()) {  
//            System.err.println("Not found or not a file: " + xlsxFile.getPath());  
//            return;  
//        }  
//  
//        int minColumns = -1;  
//        if (args.length >= 2)  
//            minColumns = Integer.parseInt(args[1]);  
//  
//        // The package open is instantaneous, as it should be.  
//        OPCPackage p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);  
//        XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);  
//        xlsx2csv.process();  
//        p.close();  
//    }  
}  

 

下面提供一個excel讀取工具類進行讀取,返回的數據類型為 List<ArrayList<String>>,

Excel_reader.java

    

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.openxml4j.opc.OPCPackage;
 
public class Excel_reader {
 
    // *************xlsx文件讀取函數************************
    // 在jdbc.properties上加上 excelUrl:xlsx文件的目錄
    // excel_name為文件名,arg為需要查詢的列號(輸入數字則返回對應列 , 輸入字符串則固定返回這個字符串)
    // 返回
    @SuppressWarnings({ "resource", "unused" })
    public static List<ArrayList<String>> xlsx_reader(InputStream xlsxFile, ArrayList<Object> args)
            throws IOException {
        // 讀取excel文件夾url
        /*Properties properties = new Properties();
        InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
        properties.load(inStream);
        String excelUrl = properties.getProperty("excelUrl");*/
 
//        File xlsxFile = new File(excelUrl + excel_name);
        /*if (!xlsxFile.exists()) {
            System.err.println("Not found or not a file: " + xlsxFile.getPath());
            return null;
        }*/
        List<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();
        try {
            OPCPackage p;
            p = OPCPackage.open(xlsxFile);
//            p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
            XLSX2CSV xlsx2csv = new XLSX2CSV(p, 20); // 20代表最大列數
            xlsx2csv.process();
            excel_output = xlsx2csv.get_output();
            p.close();   //釋放
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
 
//        System.out.println(excel_name + " 讀取完畢");
 
        // //讀取xlsx文件
        // XSSFWorkbook xssfWorkbook = null;
        // //尋找目錄讀取文件
        // System.out.println("開始讀取 "+excel_name);
        // File excelFile = new File(excelUrl+excel_name);
        // InputStream is = new FileInputStream(excelFile);
        // xssfWorkbook = new XSSFWorkbook(is);
        //
        // if(xssfWorkbook==null){
        // System.out.println("未讀取到內容,請檢查路徑!");
        // return null;
        // }else{
        // System.out.println(excel_name+" 讀取完畢");
        // }
 
        List<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
        // 遍歷xlsx中的sheet
 
        // 對於每個sheet,讀取其中的每一行
        for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
            ArrayList<String> cur_output = excel_output.get(rowNum);
            ArrayList<String> curarr = new ArrayList<String>();
            for (int columnNum = 0; columnNum < args.size(); columnNum++) {
                Object obj = args.get(columnNum);
                if (obj instanceof String) {
                    curarr.add(obj.toString());
                } else if (obj instanceof Integer) {
                    String cell = cur_output.get((int) obj);
                    curarr.add(cell);
                } else {
                    System.out.print("類型錯誤!");
                    return null;
                }
            }
            ans.add(curarr);
        }
 
        return ans;
    }
    
    
    public static List<ArrayList<String>> xls_reader(InputStream xlsxFile)
            throws IOException {
        // 讀取excel文件夾url
        /*Properties properties = new Properties();
        InputStream inStream = JDBCTools.class.getClassLoader().getResourceAsStream("jdbc.properties");
        properties.load(inStream);
        String excelUrl = properties.getProperty("excelUrl");*/
 
//        File xlsxFile = new File(excelUrl + excel_name);
        /*if (!xlsxFile.exists()) {
            System.err.println("Not found or not a file: " + xlsxFile.getPath());
            return null;
        }*/
        List<ArrayList<String>> excel_output = new ArrayList<ArrayList<String>>();;
        try {
            XLS2CSV xls2csv = new XLS2CSV(xlsxFile, 20);
            xls2csv.process();
            excel_output = xls2csv.getData();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
 
//        System.out.println(excel_name + " 讀取完畢");
 
        // //讀取xlsx文件
        // XSSFWorkbook xssfWorkbook = null;
        // //尋找目錄讀取文件
        // System.out.println("開始讀取 "+excel_name);
        // File excelFile = new File(excelUrl+excel_name);
        // InputStream is = new FileInputStream(excelFile);
        // xssfWorkbook = new XSSFWorkbook(is);
        //
        // if(xssfWorkbook==null){
        // System.out.println("未讀取到內容,請檢查路徑!");
        // return null;
        // }else{
        // System.out.println(excel_name+" 讀取完畢");
        // }
 
/*        ArrayList<ArrayList<String>> ans = new ArrayList<ArrayList<String>>();
        // 遍歷xlsx中的sheet
 
        // 對於每個sheet,讀取其中的每一行
        for (int rowNum = 0; rowNum < excel_output.size(); rowNum++) {
            ArrayList<String> cur_output = excel_output.get(rowNum);
            ArrayList<String> curarr = new ArrayList<String>();
            for (int columnNum = 0; columnNum < args.size(); columnNum++) {
                Object obj = args.get(columnNum);
                if (obj instanceof String) {
                    curarr.add(obj.toString());
                } else if (obj instanceof Integer) {
                    String cell = cur_output.get((int) obj);
                    curarr.add(cell);
                } else {
                    System.out.print("類型錯誤!");
                    return null;
                }
            }
            ans.add(curarr);
        }*/
 
        return excel_output;
    }
 
//    // 判斷后綴為xlsx的excel文件的數據類
//    @SuppressWarnings("deprecation")
//    private static String getValue(XSSFCell xssfRow) {
//        if (xssfRow == null) {
//            return null;
//        }
//        if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BOOLEAN) {
//            return String.valueOf(xssfRow.getBooleanCellValue());
//        } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_NUMERIC) {
//            double cur = xssfRow.getNumericCellValue();
//            long longVal = Math.round(cur);
//            Object inputValue = null;
//            if (Double.parseDouble(longVal + ".0") == cur)
//                inputValue = longVal;
//            else
//                inputValue = cur;
//            return String.valueOf(inputValue);
//        } else if (xssfRow.getCellType() == xssfRow.CELL_TYPE_BLANK
//                || xssfRow.getCellType() == xssfRow.CELL_TYPE_ERROR) {
//            return "";
//        } else {
//            return String.valueOf(xssfRow.getStringCellValue());
//        }
//    }
 
    public static void main(String[] args) throws FileNotFoundException {
        File xlsxFile = new File("數據1.xlsx");
        //讀取xlsx文件測試
        /*InputStream is = new FileInputStream(xlsxFile);
        ArrayList<Object> a = new ArrayList<>();
        //需要讀取的列
                a.add(0);
                a.add(1);
                a.add(2);
        try {
            List<ArrayList<String>> xlsx_reader = xlsx_reader(is,a);
            for (int i = 0; i < xlsx_reader.size(); i++) {
                System.out.println(xlsx_reader.get(i));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }*/
        
        //讀取xls文件測試
        XLS2CSV xls2csv;
        try {
            xls2csv = new XLS2CSV(new FileInputStream("數據xls.xls"), 20);
            xls2csv.process();
            List<ArrayList<String>> data2 = xls2csv.getData();
            for (ArrayList<String> arrayList : data2) {
                System.out.println(arrayList.toString());
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

 

解析成的格式為  

[小紅, 男, , , , , , , ]
[小明, 男, , , , , , , ]

沒數據的顯示為空,解析xlsx文件需要傳入 需要讀取的列的集合。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM