package read.document; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.sql.Connection; import java.util.ArrayList; import java.util.List; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Range; import pers.mysql.DBUtil; import pers.mysql.MysqlDao; import pers.mysql.MysqlDaoImp; public class WordReading { public static void main(String[] args) { String filePath = "*****.doc"; readOnWord(filePath); } public static void readOnWord(String filePath) { if (filePath.endsWith(".doc")) { // 輸入流-基類 InputStream is = null; try { is = new FileInputStream(filePath); } catch (FileNotFoundException e) { e.printStackTrace(); System.out.println("文件打開失敗。"); } // 加載doc文檔 try { HWPFDocument doc = new HWPFDocument(is); Range text = doc.getRange();// 整個文檔 /* * 分解word:文本 ->小節 ->段落 ->characterRun(理解為小單元) * section -小節; paragraph - 段落 */ //1分出內容節點 Range hotWord = text.getSection(2);// 0-封面,1-目錄,2-文本;第3小節 //2段落處理 /* * 維護兩個變量 * * 熱詞和解釋區別 :大小-word:26,explaining:18 * */ String word = ""; String explaining = ""; int wordOK = 0; int explainOK = 0;// 判斷當前word&explain是否可以填入數據庫 int count = 24;// 讀取幾條數據到數據庫 int begin = 2;// 段落讀取位置 for (int i = 0; i < count;) { Range para = hotWord.getParagraph(begin); CharacterRun field = para.getCharacterRun(0); int fontSize = field.getFontSize(); if (fontSize == 26) { word = para.text(); wordOK = 1; begin++; } else { while (fontSize < 26) { explaining += para.text(); begin++; para = hotWord.getParagraph(begin); field = para.getCharacterRun(0); fontSize = field.getFontSize(); } explainOK = 1; } // 判斷word&explain是否可以填入數據庫 if (wordOK == 1 && explainOK == 1) { MysqlDaoImp.addData(word, explaining); i++; //填入數據庫后,一切歸"0" wordOK = 0; explainOK = 0; word=""; explaining=""; } } // 輸出測試 // System.out.println("讀取:" + "head:"); } catch (IOException e) { e.printStackTrace(); System.out.println("IO錯誤。"); } } else { System.out.println("文件格式 error:not .doc"); } }