Lucene学习：工具类

Posted 2020-11-23 bestlmc
tags:
篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Lucene学习：工具类相关的知识，希望对你有一定的参考价值。
1.1. Lucene工具类

为了后面的开发、测试方便，这里编写一个工具类：
  1 import java.io.IOException;
  2 
  3 import java.nio.file.Paths;
  4 
  5 import java.util.List;
  6 
  7  
  8 
  9 import org.apache.lucene.analysis.Analyzer;
 10 
 11 import org.apache.lucene.document.Document;
 12 
 13 import org.apache.lucene.index.DirectoryReader;
 14 
 15 import org.apache.lucene.index.IndexReader;
 16 
 17 import org.apache.lucene.index.IndexWriter;
 18 
 19 import org.apache.lucene.index.IndexWriterConfig;
 20 
 21 import org.apache.lucene.index.IndexableField;
 22 
 23 import org.apache.lucene.search.IndexSearcher;
 24 
 25 import org.apache.lucene.search.Query;
 26 
 27 import org.apache.lucene.search.ScoreDoc;
 28 
 29 import org.apache.lucene.search.TopDocs;
 30 
 31 import org.apache.lucene.search.highlight.Formatter;
 32 
 33 import org.apache.lucene.search.highlight.Highlighter;
 34 
 35 import org.apache.lucene.search.highlight.QueryScorer;
 36 
 37 import org.apache.lucene.search.highlight.Scorer;
 38 
 39 import org.apache.lucene.search.highlight.SimplehtmlFormatter;
 40 
 41 import org.apache.lucene.store.Directory;
 42 
 43 import org.apache.lucene.store.FSDirectory;
 44 
 45 import org.slf4j.Logger;
 46 
 47 import org.slf4j.LoggerFactory;
 48 
 49 import org.wltea.analyzer.lucene.IKAnalyzer;
 50 
 51  
 52 
 53 import cn.lmc.myworld.common.utils.PropertyUtil;
 54 
 55  
 56 
 57  
 58 
 59 /**
 60 
 61  * 全文检索工具类
 62 
 63  * @author limingcheng
 64 
 65  *
 66 
 67  */
 68 
 69 public class LuceneUtils {
 70 
 71     // 打印日志
 72 
 73 private static final Logger LOGGER = LoggerFactory.getLogger(LuceneUtils.class);
 74 
 75  
 76 
 77     private static Directory directory; // 索引文件存放目录对象
 78 
 79     private static IndexWriter indexWriter; // 索引写对象,线程安全
 80 
 81     private static IndexReader indexReader; // 索引读对象，线程安全
 82 
 83     private static IndexSearcher indexSearcher; // 索引搜索对象，线程安全
 84 
 85     private static Analyzer analyzer; // 分词器对象
 86 
 87     public static IndexWriterConfig indexWriterConfig; // 索引配置
 88 
 89 //    public static Version matchVersion; // 索引版本(Lucene4.0之前需要用到，4.0之后被取消)
 90 
 91     
 92 
 93 static{
 94 
 95 try {
 96 
 97      //初始化索引文件存放目录对象
 98 
 99 // directory =
100 
101 // FSDirectory.open(Paths.get((String)PropertyUtil.getParamFromConfig("lucene.index.directory")));
102 
103 directory = FSDirectory.open(Paths.get("E://index"));
104 
105 // 虚拟机退出时关闭
106 
107 Runtime.getRuntime().addShutdownHook(new Thread(){
108 
109 @Override
110 
111 public void run() {
112 
113 LOGGER.info("--------Lucene释放关闭资源中....");
114 
115 try{
116 
117 //释放关闭资源
118 
119 if(null!=indexWriter){
120 
121 indexWriter.close();
122 
123 }
124 
125 if(null!=indexReader){
126 
127 indexReader.close();
128 
129 }
130 
131 if(null!=directory){
132 
133 directory.close();
134 
135 }
136 
137 if(null!=analyzer){
138 
139 analyzer.close();
140 
141 }
142 
143 } catch (IOException e) {
144 
145 e.printStackTrace();
146 
147 }
148 
149 LOGGER.info("--------Lucene释放关闭资源成功....");
150 
151 }
152 
153 });
154 
155        
156 
157 } catch (Exception e) {
158 
159        e.printStackTrace();
160 
161     }
162 
163 }
164 
165     
166 
167 /**
168 
169      *
170 
171      * @return 返回用于操作索引的对象
172 
173      * @throws IOException
174 
175      */
176 
177     public static IndexWriter getIndexWriter() throws IOException{
178 
179      if(null==indexWriter){
180 
181             // 初始化IK分词器
182 
183             Analyzer analyzer = getAnalyzer();
184 
185             // 初始化索引的写配置对象
186 
187             indexWriterConfig = new IndexWriterConfig(analyzer);
188 
189             // 初始化索引的写对象
190 
191             indexWriter=new IndexWriter(directory, indexWriterConfig);
192 
193          }
194 
195          return indexWriter;
196 
197     }
198 
199     
200 
201     /**
202 
203      *
204 
205      * @return 返回用于操作索引的对象
206 
207      * @throws IOException
208 
209      */
210 
211     public static IndexReader getIndexReader() throws IOException{
212 
213      indexReader = DirectoryReader.open(directory);
214 
215         return indexReader;
216 
217     }
218 
219     
220 
221     /**
222 
223      *
224 
225      * @return 返回用于读取索引的对象
226 
227      * @throws IOException
228 
229      */
230 
231     public static IndexSearcher getIndexSearcher() throws IOException{
232 
233         indexReader = DirectoryReader.open(directory);
234 
235         indexSearcher = new IndexSearcher(indexReader);
236 
237         return indexSearcher;
238 
239     }
240 
241     
242 
243     /**
244 
245      *
246 
247      * @return 返回用于读取索引的对象
248 
249      * @throws IOException
250 
251      */
252 
253     public static IndexSearcher getIndexSearcher(Directory directory) throws IOException{
254 
255      indexReader = DirectoryReader.open(directory);
256 
257         indexSearcher = new IndexSearcher(indexReader);
258 
259         return indexSearcher;
260 
261     }
262 
263  
264 
265     /**
266 
267      *
268 
269      * @return 返回版本信息
270 
271      */
272 
273 //    public static Version getMatchVersion() {
274 
275 //        return matchVersion;
276 
277 //    }
278 
279  
280 
281     /**
282 
283      *
284 
285      * @return 返回分词器
286 
287      */
288 
289     public static Analyzer getAnalyzer() {
290 
291      // Lucene4以前的版本需要用到版本配置
292 
293      // matchVersion = Version.LUCENE_44;
294 
295      // 分词器
296 
297      // analyzer = new StandardAnalyzer(); // 标准分词
298 
299      if(analyzer == null) {
300 
301      System.out.println("创建新的分析器");
302 
303      analyzer = new IKAnalyzer();
304 
305      }
306 
307         return analyzer;
308 
309     }
310 
311     
312 
313     /**
314 
315      * 打印一个文档的所有字段的内容
316 
317      * @param
318 
319      */
320 
321     public static void printDocument(Document document){
322 
323      //打印具体字段
324 
325      List<IndexableField> fieldList = document.getFields();
326 
327      //遍历列表
328 
329      for (IndexableField field : fieldList){
330 
331      //打印出所有的字段的名字和值（必须存储了的）
332 
333      LOGGER.info(field.name()+":"+field.stringValue());
334 
335      }
336 
337      //文档详情
338 
339      LOGGER.info(document.toString());
340 
341     }
342 
343  
344 
345     /**
346 
347      * 打印ScoreDoc
348 
349      * @param scoreDoc
350 
351      * @throws IOException
352 
353      */
354 
355     public static void printScoreDoc(ScoreDoc scoreDoc) throws IOException{
356 
357      //获取文档的编号（类似索引主键）
358 
359      int docId = scoreDoc.doc;
360 
361      LOGGER.info("======文档编号："+docId);
362 
363      // 取出文档得分
364 
365      LOGGER.info("得分： " + scoreDoc.score);
366 
367      //获取具体文档
368 
369      Document document = indexSearcher.doc(docId);
370 
371      //打印具体字段
372 
373      printDocument(document);
374 
375     }
376 
377  
378 
379     /**
380 
381      * 打印命中的文档（带得分）的详情
382 
383      * @param topDocs
384 
385      */
386 
387     public static void printTopDocs(TopDocs topDocs) throws IOException {
388 
389      // 1)打印总记录数（命中数）：类似于百度为您找到相关结果约100,000,000个
390 
391      long totalHits = topDocs.totalHits.value;
392 
393      LOGGER.info("查询（命中）总的文档条数："+totalHits);
394 
395 //      LOGGER.info("查询（命中）文档最大分数："+topDocs.getMaxScore());
396 
397      //2)获取指定的最大条数的、命中的查询结果的文档对象集合
398 
399      ScoreDoc[] scoreDocs = topDocs.scoreDocs;
400 
401      //打印具体文档
402 
403      for (ScoreDoc scoreDoc : scoreDocs) {
404 
405      printScoreDoc(scoreDoc);
406 
407      }
408 
409     }
410 
411  
412 
413     public static void printTopDocsByQueryForHighlighter(Query query, int n) throws Exception{
414 
415  
416 
417        //=========1.创建一个高亮工具对象
418 
419        // 格式化器：参数1：前置标签，参数2：后置标签
420 
421        Formatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
422 
423        //打分对象，参数：query里面的条件，条件里面有搜索关键词
424 
425        Scorer fragmentScorer = new QueryScorer(query);
426 
427        //高亮工具
428 
429        //参数1.需要高亮什么颜色, 参数2.将哪些关键词进行高亮
430 
431        Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
432 
433        //=======搜索相关
434 
435        IndexSearcher indexSearcher = getIndexSearcher();
436 
437        // 搜索数据,两个参数：查询条件对象要查询的最大结果条数
438 
439        // 返回的结果是 按照匹配度排名得分前N名的文档信息（包含查询到的总条数信息、所有符合条件的文档的编号信息）
440 
441        TopDocs topDocs = indexSearcher.search(query, n);
442 
443        // 打印命中的总条数
444 
445 //     LOGGER.info("本次搜索共" + topDocs.totalHits + "条数据,最高分："+topDocs.getMaxScore());
446 
447  
448 
449        // 获取得分文档对象（ScoreDoc）数组.SocreDoc中包含：文档的编号、文档的得分
450 
451        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
452 
453  
454 
455        //循环
456 
457        for (ScoreDoc scoreDoc : scoreDocs) {
458 
459         // 取出文档编号
460 
461         int docID = scoreDoc.doc;
462 
463         System.out.println("=========文档的编号是："+docID);
464 
465         // 取出文档得分
466 
467         System.out.println("当前文档得分： " + scoreDoc.score);
468 
469         // 根据编号去找文档
470 
471         Document document = indexSearcher.doc(docID);
472 
473         //获取文档的所有字段对象
474 
475         List<IndexableField> fieldList= document.getFields();
476 
477         //遍历列表
478 
479         for (IndexableField field : fieldList) {
480 
481         String highlighterValue = highlighter.getBestFragment(getAnalyzer(), field.name(), field.stringValue());
482 
483         //如果没有得到高亮的值
484 
485         if (null==highlighterValue) {
486 
487         //则让高亮结果等不高亮的值
488 
489         highlighterValue = field.stringValue();
490 
491         }
492 
493         //打印出所有的字段的名字和值（必须存储了的）
494 
495         LOGGER.info(field.name()+":"+highlighterValue);
496 
497         }
498 
499  
500 
501         }
502 
503     }
504 
505     
506 
507 }
以上是关于Lucene学习：工具类的主要内容，如果未能解决你的问题，请参考以下文章