使用的poi jar包需要自己下載
讀取的word文檔中含有多個圖片,所以分為兩個部分,一個部分讀取各個表格中內容,一個是將所有圖片截取出來:
/** * 遍歷段落內容 * docxReadPath 文檔地址 * uploadPic 圖片上傳地址 * picFile 圖片保存后地址 * @param document * @return XWPFDocument * @throws IOException */ public static String readPar(XWPFDocument document,String docxReadPath,String uploadPic,String picFile){ String fail="sucess";
Iterator<XWPFParagraph> itPara = document.getParagraphsIterator(); try {
//讀取word中所有內容
while (itPara.hasNext()) {
XWPFParagraph paragraph = (XWPFParagraph) itPara.next();
//run表示相同區域屬性相同的字符,結果以‘,’分隔;
List<XWPFRun> runs =paragraph.getRuns();// paragraph.getRuns();
String fileName="";
for (int i = 0; i < runs.size(); i++){
String oneparaString = runs.get(i).getText(runs.get(i).getTextPosition());
System.out.println(oneparaString);
}
}
List<XWPFPictureData> picList = document.getAllPictures();for (XWPFPictureData pic : picList) { byte[] bytev = pic.getData(); String imgName=pic.getFileName();
System.out.println("=====圖片生成中========"+imgName); if(!"image1.jpeg".equals(imgName)){ FileOutputStream fos = new FileOutputStream(uploadPic+"/"+imgName); fos.write(bytev); } } } catch (Exception e) { e.printStackTrace(); System.out.println("=====錯誤信息===="+e.getMessage()); fail="false"; } return fail; }
/** * 遍歷所有表格的內容 * @param document * @throws FileNotFoundException */ public static void readTableContent(XWPFDocument document) { Iterator<XWPFTable> itTable = document.getTablesIterator(); int ind = 0; while (itTable.hasNext()){ ind++; XWPFTable table = (XWPFTable) itTable.next(); //行 int rcount = table.getNumberOfRows(); for (int i = 0; i < rcount; i++){ XWPFTableRow row = table.getRow(i); //列 List<XWPFTableCell> cells = row.getTableCells(); int len = cells.size(); for(int j = 0;j < len;j++){ XWPFTableCell xc = cells.get(j); String sc = xc.getText(); System.out.println("第"+ ind +"個表格,第"+ (i+1) +"行,第"+ (j+1) +"列:" +sc); } } } }
/** * 讀取文件 * @param srcPath * @return XWPFDocument */ public static XWPFDocument read_file(String srcPath) { String[] sp = srcPath.split("\\."); if ((sp.length > 0) && sp[sp.length - 1].equalsIgnoreCase("docx")) { try { FileInputStream fis = new FileInputStream(srcPath); XWPFDocument xdoc = new XWPFDocument(fis); XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc); // OPCPackage pack = POIXMLDocument.openPackage(srcPath); // XWPFDocument doc = new XWPFDocument(pack); return xdoc; } catch (IOException e) { System.out.println("讀取文件出錯!"); e.printStackTrace(); return null; } } return null; }
public static void main(String[] args) throws IOException{ String docx = "F:\\bb.docx"; XWPFDocument document = read_file(docx); readPar(document); readTableContent(document); }