JAVA - 實現 - 利用POI讀取word文檔實例


package read.document;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;

import pers.mysql.DBUtil;
import pers.mysql.MysqlDao;
import pers.mysql.MysqlDaoImp;

public class WordReading {

    public static void main(String[] args) {

        String filePath = "*****.doc";

        readOnWord(filePath);

    }

    public static void readOnWord(String filePath) {

        if (filePath.endsWith(".doc")) {

            // 輸入流-基類
            InputStream is = null;
            try {
                is = new FileInputStream(filePath);
            } catch (FileNotFoundException e) {
                e.printStackTrace();
                System.out.println("文件打開失敗。");
            }

            // 加載doc文檔
            try {

                HWPFDocument doc = new HWPFDocument(is);

                Range text = doc.getRange();// 整個文檔

                /*
                 * 分解word:文本 ->小節 ->段落 ->characterRun(理解為小單元)
                 * section -小節; paragraph - 段落
                 */

                //1分出內容節點
                Range hotWord = text.getSection(2);// 0-封面,1-目錄,2-文本;第3小節

                //2段落處理
                /*
                 * 維護兩個變量
                 * 
                 * 熱詞和解釋區別 :大小-word:26,explaining:18
                 * 
                 */
                String word = "";
                String explaining = "";
                int wordOK = 0;
                int explainOK = 0;// 判斷當前word&explain是否可以填入數據庫

                int count = 24;// 讀取幾條數據到數據庫
                int begin = 2;// 段落讀取位置

                for (int i = 0; i < count;) {
                    Range para = hotWord.getParagraph(begin);
                    CharacterRun field = para.getCharacterRun(0);
                    int fontSize = field.getFontSize();
                    if (fontSize == 26) {
                        word = para.text();
                        wordOK = 1;
                        begin++;
                    } else {
                        while (fontSize < 26) {
                            explaining += para.text();
                            begin++;
                            para = hotWord.getParagraph(begin);
                            field = para.getCharacterRun(0);
                            fontSize = field.getFontSize();
                        }
                        explainOK = 1;
                    }
                    // 判斷word&explain是否可以填入數據庫
                    if (wordOK == 1 && explainOK == 1) {
                        MysqlDaoImp.addData(word, explaining);
                        i++;
                        //填入數據庫后,一切歸"0"
                        wordOK = 0;
                        explainOK = 0;
                        word="";
                        explaining="";
                    }
                }
                // 輸出測試
                // System.out.println("讀取:" + "head:");

            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("IO錯誤。");
            }

        } else {
            System.out.println("文件格式 error:not .doc");
        }

    }

   


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM