import lombok.Data;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.util.ArrayList;
import java.util.List;
/**
* @author:
* @descripition: 快速讀取excel
* @date: created in 15:30 2020/10/13
* @modify: Copyright (c) Supermap All Rights Reserved.
*/
@Data
public class Excel07Parser extends DefaultHandler {
//取SST 的索引對應的值
private SharedStringsTable sst;
//解析結果保存
private List<List<String>> container;
// 開始行
private Integer startRow=0;
// 結束行
private Integer endRow=0;
// 當前行
private Integer row;
// 是否是查詢數據
private Boolean is=false;
private Excel07Parser.CellDataType nextDataType = Excel07Parser.CellDataType.SSTINDEX;
private final DataFormatter formatter = new DataFormatter();
private short formatIndex;
private String formatString;
private StylesTable stylesTable;
//用一個enum表示單元格可能的數據類型
enum CellDataType{
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
public Excel07Parser(SharedStringsTable sst,StylesTable stylesTable, List<List<String>> container) {
this.sst = sst;
this.container = container;
this.stylesTable = stylesTable;
}
public Excel07Parser(SharedStringsTable sst, List<List<String>> container, Integer startRow, Integer endRow, Boolean is) {
this.sst = sst;
this.container = container;
this.startRow = startRow;
this.endRow = endRow;
this.is = is;
}
/**
* 存儲cell標簽下v標簽包裹的字符文本內容
* 在v標簽開始后,解析器自動調用characters()保存到 lastContents
* 【但】當cell標簽的屬性 s是 t時, 表示取到的lastContents是 SharedStringsTable 的index值
* 需要在v標簽結束時根據 index(lastContents)獲取一次真正的值
*/
private String lastContents;
//有效數據矩形區域,A1:Y2
private String dimension;
//根據dimension得出每行的數據長度
private int longest;
//上個有內容的單元格id,判斷空單元格
private String lastCellid;
//上一行id, 判斷空行
private String lastRowid;
// 判斷單元格cell的c標簽下是否有v,否則可能數據錯位
private boolean hasV = false;
//行數據保存
private List<String> currentRow;
//單元格內容是SST 的索引
private boolean isSSTIndex = false;
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
lastContents = "";
if (qName.equals("dimension")) {
dimension = attributes.getValue("ref");
longest = covertRowIdtoInt(dimension.substring(dimension.indexOf(":") + 1));
}
//行開始
if (qName.equals("row")) {
String rowNum = attributes.getValue("r");
row = Integer.parseInt(rowNum);
//判斷空行
if (lastRowid != null) {
//與上一行相差2, 說明中間有空行
int gap = Integer.parseInt(rowNum) - Integer.parseInt(lastRowid);
if (gap > 1) {
gap -= 1;
while (gap > 0) {
container.add(new ArrayList<>());
gap--;
}
}
}
lastRowid = attributes.getValue("r");
currentRow = new ArrayList<>();
}
if (qName.equals("c")) {
// 設置單元格的數據類型
this.setNextDataType(attributes);
String rowId = attributes.getValue("r");
//空單元判斷,添加空字符到list
if (lastCellid != null) {
int gap = covertRowIdtoInt(rowId) - covertRowIdtoInt(lastCellid);
for (int i = 0; i < gap - 1; i++) {
currentRow.add("");
}
} else {
//第一個單元格可能不是在第一列
if (!"A1".equals(rowId)) {
for (int i = 0; i < covertRowIdtoInt(rowId) - 1; i++) {
currentRow.add("");
}
}
}
lastCellid = rowId;
//判斷單元格的值是SST 的索引,不能直接characters方法取值
if (attributes.getValue("t") != null && attributes.getValue("t").equals("s")) {
isSSTIndex = true;
} else {
isSSTIndex = false;
}
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
//行結束,存儲一行數據
if (qName.equals("row")) {
//判斷最后一個單元格是否在最后,補齊列數
//【注意】有的單元格只修改單元格格式,而沒有內容,會出現c標簽下沒有v標簽,導致currentRow少
if (covertRowIdtoInt(lastCellid) < longest) {
int min = Math.min(currentRow.size(), covertRowIdtoInt(lastCellid));
for (int i = 0; i < longest - min; i++) {
currentRow.add("");
}
}
if (is){
if (row==1 || row==2 ||(row > startRow && row < endRow)){
container.add(currentRow);
}
}else {
container.add(currentRow);
}
lastCellid = null;
}
//單元格結束,沒有v時需要補位
if (qName.equals("c")){
if (!hasV) currentRow.add("");
hasV = false;
}
//單元格內容標簽結束,characters方法會被調用處理內容
if (qName.equals("v")) {
hasV = true;
//單元格的值是SST 的索引
if (isSSTIndex) {
String sstIndex = lastContents.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(
sst.getEntryAt(idx));
lastContents = rtss.toString();
currentRow.add(lastContents);
} catch (NumberFormatException ex) {
System.out.println(lastContents);
}
} else {
lastContents = this.getDataValue(lastContents.trim(), "");
currentRow.add(lastContents);
}
}
}
/**
* 獲取element的文本數據
*
* @see org.xml.sax.ContentHandler#characters
*/
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
/**
* 列號轉數字 AB7-->28 第28列
*
* @param cellId 單元格定位id,行列號,AB7
* @return
*/
public static int covertRowIdtoInt(String cellId) {
StringBuilder sb = new StringBuilder();
String column = "";
//從cellId中提取列號
for(char c:cellId.toCharArray()){
if (Character.isAlphabetic(c)){
sb.append(c);
}else{
column = sb.toString();
}
}
//列號字符轉數字
int result = 0;
for (char c : column.toCharArray()) {
result = result * 26 + (c - 'A') + 1;
}
return result;
}
/**
* 根據element屬性設置數據類型
* @param attributes
*/
public void setNextDataType(Attributes attributes){
nextDataType = Excel07Parser.CellDataType.NUMBER;
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType)){
nextDataType = Excel07Parser.CellDataType.BOOL;
}else if ("e".equals(cellType)){
nextDataType = Excel07Parser.CellDataType.ERROR;
}else if ("inlineStr".equals(cellType)){
nextDataType = Excel07Parser.CellDataType.INLINESTR;
}else if ("s".equals(cellType)){
nextDataType = Excel07Parser.CellDataType.SSTINDEX;
}else if ("str".equals(cellType)){
nextDataType = Excel07Parser.CellDataType.FORMULA;
}
if (cellStyleStr != null){
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
formatIndex = style.getDataFormat();
formatString = style.getDataFormatString();
if ("m/d/yy" == formatString){
nextDataType = Excel07Parser.CellDataType.DATE;
//full format is "yyyy-MM-dd hh:mm:ss.SSS";
formatString = "yyyy-MM-dd";
}
if (formatString == null){
nextDataType = Excel07Parser.CellDataType.NULL;
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
}
}
/**
* 根據數據類型獲取數據
* @param value
* @param thisStr
* @return
*/
public String getDataValue(String value, String thisStr)
{
switch (nextDataType)
{
//這幾個的順序不能隨便交換,交換了很可能會導致數據錯誤
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX:
String sstIndex = value.toString();
thisStr = value.toString();
break;
case NUMBER:
if (formatString != null){
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();
}else{
thisStr = value;
}
thisStr = thisStr.replace("_", "").trim();
break;
case DATE:
try{
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString);
}catch(NumberFormatException ex){
thisStr = value.toString();
}
thisStr = thisStr.replace(" ", "");
break;
default:
thisStr = "";
break;
}
return thisStr;
}
}
import com.sgis.common.testutils.Excel07Parser;
import org.apache.commons.io.IOUtils;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
@Test
public void method_19() throws OpenXML4JException, IOException, SAXException {
String filePath = "H:\\Project\\test\\測試.xlsx";
// 讀取excel數據
OPCPackage pkg = OPCPackage.open(filePath);
XSSFReader r = new XSSFReader(pkg);
InputStream is = r.getSheet("rId1");
//debug 查看轉換的xml原始文件,方便理解后面解析時的處理,
byte[] isBytes = IOUtils.toByteArray(is);
//下面是SST 的索引會用到的
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
List<List<String>> container = new ArrayList<>();
StylesTable stylesTable = r.getStylesTable();
parser.setContentHandler(new Excel07Parser(sst, stylesTable, container));
InputSource inputSource = new InputSource(new ByteArrayInputStream(isBytes));
parser.parse(inputSource);
is.close();
System.out.println(container);
}