Lucene8學習：工具類

本文轉載自查看原文 2019-11-15 13:02 302 Lucene
1.1. Lucene工具類

為了后面的開發、測試方便，這里編寫一個工具類：
  1 import java.io.IOException;
  2 
  3 import java.nio.file.Paths;
  4 
  5 import java.util.List;
  6 
  7  
  8 
  9 import org.apache.lucene.analysis.Analyzer;
 10 
 11 import org.apache.lucene.document.Document;
 12 
 13 import org.apache.lucene.index.DirectoryReader;
 14 
 15 import org.apache.lucene.index.IndexReader;
 16 
 17 import org.apache.lucene.index.IndexWriter;
 18 
 19 import org.apache.lucene.index.IndexWriterConfig;
 20 
 21 import org.apache.lucene.index.IndexableField;
 22 
 23 import org.apache.lucene.search.IndexSearcher;
 24 
 25 import org.apache.lucene.search.Query;
 26 
 27 import org.apache.lucene.search.ScoreDoc;
 28 
 29 import org.apache.lucene.search.TopDocs;
 30 
 31 import org.apache.lucene.search.highlight.Formatter;
 32 
 33 import org.apache.lucene.search.highlight.Highlighter;
 34 
 35 import org.apache.lucene.search.highlight.QueryScorer;
 36 
 37 import org.apache.lucene.search.highlight.Scorer;
 38 
 39 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 40 
 41 import org.apache.lucene.store.Directory;
 42 
 43 import org.apache.lucene.store.FSDirectory;
 44 
 45 import org.slf4j.Logger;
 46 
 47 import org.slf4j.LoggerFactory;
 48 
 49 import org.wltea.analyzer.lucene.IKAnalyzer;
 50 
 51  
 52 
 53 import cn.lmc.myworld.common.utils.PropertyUtil;
 54 
 55  
 56 
 57  
 58 
 59 /**
 60 
 61  * 全文檢索工具類
 62 
 63  * @author limingcheng
 64 
 65  *
 66 
 67  */
 68 
 69 public class LuceneUtils {
 70 
 71     // 打印日志
 72 
 73 private static final Logger LOGGER = LoggerFactory.getLogger(LuceneUtils.class);
 74 
 75  
 76 
 77     private static Directory directory; // 索引文件存放目錄對象
 78 
 79     private static IndexWriter indexWriter; // 索引寫對象,線程安全
 80 
 81     private static IndexReader indexReader; // 索引讀對象，線程安全
 82 
 83     private static IndexSearcher indexSearcher; // 索引搜索對象，線程安全
 84 
 85     private static Analyzer analyzer; // 分詞器對象
 86 
 87     public static IndexWriterConfig indexWriterConfig; // 索引配置
 88 
 89 //    public static Version matchVersion; // 索引版本(Lucene4.0之前需要用到，4.0之后被取消)
 90 
 91     
 92 
 93 static{
 94 
 95 try {
 96 
 97      //初始化索引文件存放目錄對象
 98 
 99 // directory =
100 
101 // FSDirectory.open(Paths.get((String)PropertyUtil.getParamFromConfig("lucene.index.directory")));
102 
103 directory = FSDirectory.open(Paths.get("E://index"));
104 
105 // 虛擬機退出時關閉
106 
107 Runtime.getRuntime().addShutdownHook(new Thread(){
108 
109 @Override
110 
111 public void run() {
112 
113 LOGGER.info("--------Lucene釋放關閉資源中....");
114 
115 try{
116 
117 //釋放關閉資源
118 
119 if(null!=indexWriter){
120 
121 indexWriter.close();
122 
123 }
124 
125 if(null!=indexReader){
126 
127 indexReader.close();
128 
129 }
130 
131 if(null!=directory){
132 
133 directory.close();
134 
135 }
136 
137 if(null!=analyzer){
138 
139 analyzer.close();
140 
141 }
142 
143 } catch (IOException e) {
144 
145 e.printStackTrace();
146 
147 }
148 
149 LOGGER.info("--------Lucene釋放關閉資源成功....");
150 
151 }
152 
153 });
154 
155        
156 
157 } catch (Exception e) {
158 
159        e.printStackTrace();
160 
161     }
162 
163 }
164 
165     
166 
167 /**
168 
169      *
170 
171      * @return 返回用於操作索引的對象
172 
173      * @throws IOException
174 
175      */
176 
177     public static IndexWriter getIndexWriter() throws IOException{
178 
179      if(null==indexWriter){
180 
181             // 初始化IK分詞器
182 
183             Analyzer analyzer = getAnalyzer();
184 
185             // 初始化索引的寫配置對象
186 
187             indexWriterConfig = new IndexWriterConfig(analyzer);
188 
189             // 初始化索引的寫對象
190 
191             indexWriter=new IndexWriter(directory, indexWriterConfig);
192 
193          }
194 
195          return indexWriter;
196 
197     }
198 
199     
200 
201     /**
202 
203      *
204 
205      * @return 返回用於操作索引的對象
206 
207      * @throws IOException
208 
209      */
210 
211     public static IndexReader getIndexReader() throws IOException{
212 
213      indexReader = DirectoryReader.open(directory);
214 
215         return indexReader;
216 
217     }
218 
219     
220 
221     /**
222 
223      *
224 
225      * @return 返回用於讀取索引的對象
226 
227      * @throws IOException
228 
229      */
230 
231     public static IndexSearcher getIndexSearcher() throws IOException{
232 
233         indexReader = DirectoryReader.open(directory);
234 
235         indexSearcher = new IndexSearcher(indexReader);
236 
237         return indexSearcher;
238 
239     }
240 
241     
242 
243     /**
244 
245      *
246 
247      * @return 返回用於讀取索引的對象
248 
249      * @throws IOException
250 
251      */
252 
253     public static IndexSearcher getIndexSearcher(Directory directory) throws IOException{
254 
255      indexReader = DirectoryReader.open(directory);
256 
257         indexSearcher = new IndexSearcher(indexReader);
258 
259         return indexSearcher;
260 
261     }
262 
263  
264 
265     /**
266 
267      *
268 
269      * @return 返回版本信息
270 
271      */
272 
273 //    public static Version getMatchVersion() {
274 
275 //        return matchVersion;
276 
277 //    }
278 
279  
280 
281     /**
282 
283      *
284 
285      * @return 返回分詞器
286 
287      */
288 
289     public static Analyzer getAnalyzer() {
290 
291      // Lucene4以前的版本需要用到版本配置
292 
293      // matchVersion = Version.LUCENE_44;
294 
295      // 分詞器
296 
297      // analyzer = new StandardAnalyzer(); // 標准分詞
298 
299      if(analyzer == null) {
300 
301      System.out.println("創建新的分析器");
302 
303      analyzer = new IKAnalyzer();
304 
305      }
306 
307         return analyzer;
308 
309     }
310 
311     
312 
313     /**
314 
315      * 打印一個文檔的所有字段的內容
316 
317      * @param
318 
319      */
320 
321     public static void printDocument(Document document){
322 
323      //打印具體字段
324 
325      List<IndexableField> fieldList = document.getFields();
326 
327      //遍歷列表
328 
329      for (IndexableField field : fieldList){
330 
331      //打印出所有的字段的名字和值（必須存儲了的）
332 
333      LOGGER.info(field.name()+":"+field.stringValue());
334 
335      }
336 
337      //文檔詳情
338 
339      LOGGER.info(document.toString());
340 
341     }
342 
343  
344 
345     /**
346 
347      * 打印ScoreDoc
348 
349      * @param scoreDoc
350 
351      * @throws IOException
352 
353      */
354 
355     public static void printScoreDoc(ScoreDoc scoreDoc) throws IOException{
356 
357      //獲取文檔的編號（類似索引主鍵）
358 
359      int docId = scoreDoc.doc;
360 
361      LOGGER.info("======文檔編號："+docId);
362 
363      // 取出文檔得分
364 
365      LOGGER.info("得分： " + scoreDoc.score);
366 
367      //獲取具體文檔
368 
369      Document document = indexSearcher.doc(docId);
370 
371      //打印具體字段
372 
373      printDocument(document);
374 
375     }
376 
377  
378 
379     /**
380 
381      * 打印命中的文檔（帶得分）的詳情
382 
383      * @param topDocs
384 
385      */
386 
387     public static void printTopDocs(TopDocs topDocs) throws IOException {
388 
389      // 1)打印總記錄數（命中數）：類似於百度為您找到相關結果約100,000,000個
390 
391      long totalHits = topDocs.totalHits.value;
392 
393      LOGGER.info("查詢（命中）總的文檔條數："+totalHits);
394 
395 //      LOGGER.info("查詢（命中）文檔最大分數："+topDocs.getMaxScore());
396 
397      //2)獲取指定的最大條數的、命中的查詢結果的文檔對象集合
398 
399      ScoreDoc[] scoreDocs = topDocs.scoreDocs;
400 
401      //打印具體文檔
402 
403      for (ScoreDoc scoreDoc : scoreDocs) {
404 
405      printScoreDoc(scoreDoc);
406 
407      }
408 
409     }
410 
411  
412 
413     public static void printTopDocsByQueryForHighlighter(Query query, int n) throws Exception{
414 
415  
416 
417        //=========1.創建一個高亮工具對象
418 
419        // 格式化器：參數1：前置標簽，參數2：后置標簽
420 
421        Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
422 
423        //打分對象，參數：query里面的條件，條件里面有搜索關鍵詞
424 
425        Scorer fragmentScorer = new QueryScorer(query);
426 
427        //高亮工具
428 
429        //參數1.需要高亮什么顏色, 參數2.將哪些關鍵詞進行高亮
430 
431        Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
432 
433        //=======搜索相關
434 
435        IndexSearcher indexSearcher = getIndexSearcher();
436 
437        // 搜索數據,兩個參數：查詢條件對象要查詢的最大結果條數
438 
439        // 返回的結果是 按照匹配度排名得分前N名的文檔信息（包含查詢到的總條數信息、所有符合條件的文檔的編號信息）
440 
441        TopDocs topDocs = indexSearcher.search(query, n);
442 
443        // 打印命中的總條數
444 
445 //     LOGGER.info("本次搜索共" + topDocs.totalHits + "條數據,最高分："+topDocs.getMaxScore());
446 
447  
448 
449        // 獲取得分文檔對象（ScoreDoc）數組.SocreDoc中包含：文檔的編號、文檔的得分
450 
451        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
452 
453  
454 
455        //循環
456 
457        for (ScoreDoc scoreDoc : scoreDocs) {
458 
459         // 取出文檔編號
460 
461         int docID = scoreDoc.doc;
462 
463         System.out.println("=========文檔的編號是："+docID);
464 
465         // 取出文檔得分
466 
467         System.out.println("當前文檔得分： " + scoreDoc.score);
468 
469         // 根據編號去找文檔
470 
471         Document document = indexSearcher.doc(docID);
472 
473         //獲取文檔的所有字段對象
474 
475         List<IndexableField> fieldList= document.getFields();
476 
477         //遍歷列表
478 
479         for (IndexableField field : fieldList) {
480 
481         String highlighterValue = highlighter.getBestFragment(getAnalyzer(), field.name(), field.stringValue());
482 
483         //如果沒有得到高亮的值
484 
485         if (null==highlighterValue) {
486 
487         //則讓高亮結果等不高亮的值
488 
489         highlighterValue = field.stringValue();
490 
491         }
492 
493         //打印出所有的字段的名字和值（必須存儲了的）
494 
495         LOGGER.info(field.name()+":"+highlighterValue);
496 
497         }
498 
499  
500 
501         }
502 
503     }
504 
505     
506 
507 }
免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。
猜您在找 Guava工具類學習 Apache Commons工具類學習（一）-----CSV C#編寫了一個基於Lucene.Net的搜索引擎查詢通用工具類：SearchEngineUtil SFTP工具類 restTemplate工具類 RedisUtil工具類 BigDecimalUtil 工具類 JsonUtil工具類 RandomStringUtils工具類 OkHttp工具類