import lombok.Data; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import java.util.ArrayList; import java.util.List; /** * @author: * @descripition: 快速讀取excel * @date: created in 15:30 2020/10/13 * @modify: Copyright (c) Supermap All Rights Reserved. */ @Data public class Excel07Parser extends DefaultHandler { //取SST 的索引對應的值 private SharedStringsTable sst; //解析結果保存 private List<List<String>> container; // 開始行 private Integer startRow=0; // 結束行 private Integer endRow=0; // 當前行 private Integer row; // 是否是查詢數據 private Boolean is=false; private Excel07Parser.CellDataType nextDataType = Excel07Parser.CellDataType.SSTINDEX; private final DataFormatter formatter = new DataFormatter(); private short formatIndex; private String formatString; private StylesTable stylesTable; //用一個enum表示單元格可能的數據類型 enum CellDataType{ BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL } public Excel07Parser(SharedStringsTable sst,StylesTable stylesTable, List<List<String>> container) { this.sst = sst; this.container = container; this.stylesTable = stylesTable; } public Excel07Parser(SharedStringsTable sst, List<List<String>> container, Integer startRow, Integer endRow, Boolean is) { this.sst = sst; this.container = container; this.startRow = startRow; this.endRow = endRow; this.is = is; } /** * 存儲cell標簽下v標簽包裹的字符文本內容 * 在v標簽開始后,解析器自動調用characters()保存到 lastContents * 【但】當cell標簽的屬性 s是 t時, 表示取到的lastContents是 SharedStringsTable 的index值 * 需要在v標簽結束時根據 index(lastContents)獲取一次真正的值 */ private String lastContents; //有效數據矩形區域,A1:Y2 private String dimension; //根據dimension得出每行的數據長度 private int longest; //上個有內容的單元格id,判斷空單元格 private String lastCellid; //上一行id, 判斷空行 private String lastRowid; // 判斷單元格cell的c標簽下是否有v,否則可能數據錯位 private boolean hasV = false; //行數據保存 private List<String> currentRow; //單元格內容是SST 的索引 private boolean isSSTIndex = false; @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { lastContents = ""; if (qName.equals("dimension")) { dimension = attributes.getValue("ref"); longest = covertRowIdtoInt(dimension.substring(dimension.indexOf(":") + 1)); } //行開始 if (qName.equals("row")) { String rowNum = attributes.getValue("r"); row = Integer.parseInt(rowNum); //判斷空行 if (lastRowid != null) { //與上一行相差2, 說明中間有空行 int gap = Integer.parseInt(rowNum) - Integer.parseInt(lastRowid); if (gap > 1) { gap -= 1; while (gap > 0) { container.add(new ArrayList<>()); gap--; } } } lastRowid = attributes.getValue("r"); currentRow = new ArrayList<>(); } if (qName.equals("c")) { // 設置單元格的數據類型 this.setNextDataType(attributes); String rowId = attributes.getValue("r"); //空單元判斷,添加空字符到list if (lastCellid != null) { int gap = covertRowIdtoInt(rowId) - covertRowIdtoInt(lastCellid); for (int i = 0; i < gap - 1; i++) { currentRow.add(""); } } else { //第一個單元格可能不是在第一列 if (!"A1".equals(rowId)) { for (int i = 0; i < covertRowIdtoInt(rowId) - 1; i++) { currentRow.add(""); } } } lastCellid = rowId; //判斷單元格的值是SST 的索引,不能直接characters方法取值 if (attributes.getValue("t") != null && attributes.getValue("t").equals("s")) { isSSTIndex = true; } else { isSSTIndex = false; } } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { //行結束,存儲一行數據 if (qName.equals("row")) { //判斷最后一個單元格是否在最后,補齊列數 //【注意】有的單元格只修改單元格格式,而沒有內容,會出現c標簽下沒有v標簽,導致currentRow少 if (covertRowIdtoInt(lastCellid) < longest) { int min = Math.min(currentRow.size(), covertRowIdtoInt(lastCellid)); for (int i = 0; i < longest - min; i++) { currentRow.add(""); } } if (is){ if (row==1 || row==2 ||(row > startRow && row < endRow)){ container.add(currentRow); } }else { container.add(currentRow); } lastCellid = null; } //單元格結束,沒有v時需要補位 if (qName.equals("c")){ if (!hasV) currentRow.add(""); hasV = false; } //單元格內容標簽結束,characters方法會被調用處理內容 if (qName.equals("v")) { hasV = true; //單元格的值是SST 的索引 if (isSSTIndex) { String sstIndex = lastContents.toString(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString( sst.getEntryAt(idx)); lastContents = rtss.toString(); currentRow.add(lastContents); } catch (NumberFormatException ex) { System.out.println(lastContents); } } else { lastContents = this.getDataValue(lastContents.trim(), ""); currentRow.add(lastContents); } } } /** * 獲取element的文本數據 * * @see org.xml.sax.ContentHandler#characters */ @Override public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } /** * 列號轉數字 AB7-->28 第28列 * * @param cellId 單元格定位id,行列號,AB7 * @return */ public static int covertRowIdtoInt(String cellId) { StringBuilder sb = new StringBuilder(); String column = ""; //從cellId中提取列號 for(char c:cellId.toCharArray()){ if (Character.isAlphabetic(c)){ sb.append(c); }else{ column = sb.toString(); } } //列號字符轉數字 int result = 0; for (char c : column.toCharArray()) { result = result * 26 + (c - 'A') + 1; } return result; } /** * 根據element屬性設置數據類型 * @param attributes */ public void setNextDataType(Attributes attributes){ nextDataType = Excel07Parser.CellDataType.NUMBER; formatIndex = -1; formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if ("b".equals(cellType)){ nextDataType = Excel07Parser.CellDataType.BOOL; }else if ("e".equals(cellType)){ nextDataType = Excel07Parser.CellDataType.ERROR; }else if ("inlineStr".equals(cellType)){ nextDataType = Excel07Parser.CellDataType.INLINESTR; }else if ("s".equals(cellType)){ nextDataType = Excel07Parser.CellDataType.SSTINDEX; }else if ("str".equals(cellType)){ nextDataType = Excel07Parser.CellDataType.FORMULA; } if (cellStyleStr != null){ int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); formatIndex = style.getDataFormat(); formatString = style.getDataFormatString(); if ("m/d/yy" == formatString){ nextDataType = Excel07Parser.CellDataType.DATE; //full format is "yyyy-MM-dd hh:mm:ss.SSS"; formatString = "yyyy-MM-dd"; } if (formatString == null){ nextDataType = Excel07Parser.CellDataType.NULL; formatString = BuiltinFormats.getBuiltinFormat(formatIndex); } } } /** * 根據數據類型獲取數據 * @param value * @param thisStr * @return */ public String getDataValue(String value, String thisStr) { switch (nextDataType) { //這幾個的順序不能隨便交換,交換了很可能會導致數據錯誤 case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "FALSE" : "TRUE"; break; case ERROR: thisStr = "\"ERROR:" + value.toString() + '"'; break; case FORMULA: thisStr = '"' + value.toString() + '"'; break; case INLINESTR: XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); thisStr = rtsi.toString(); rtsi = null; break; case SSTINDEX: String sstIndex = value.toString(); thisStr = value.toString(); break; case NUMBER: if (formatString != null){ thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim(); }else{ thisStr = value; } thisStr = thisStr.replace("_", "").trim(); break; case DATE: try{ thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString); }catch(NumberFormatException ex){ thisStr = value.toString(); } thisStr = thisStr.replace(" ", ""); break; default: thisStr = ""; break; } return thisStr; } }
如何調用
import com.sgis.common.testutils.Excel07Parser; import org.apache.commons.io.IOUtils; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringRunner; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; @Test public void method_19() throws OpenXML4JException, IOException, SAXException { String filePath = "H:\\Project\\test\\測試.xlsx"; // 讀取excel數據 OPCPackage pkg = OPCPackage.open(filePath); XSSFReader r = new XSSFReader(pkg); InputStream is = r.getSheet("rId1"); //debug 查看轉換的xml原始文件,方便理解后面解析時的處理, byte[] isBytes = IOUtils.toByteArray(is); //下面是SST 的索引會用到的 SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); List<List<String>> container = new ArrayList<>(); StylesTable stylesTable = r.getStylesTable(); parser.setContentHandler(new Excel07Parser(sst, stylesTable, container)); InputSource inputSource = new InputSource(new ByteArrayInputStream(isBytes)); parser.parse(inputSource); is.close(); System.out.println(container); }