Lucene系列:搜索结果摘要
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Lucene系列:搜索结果摘要相关的知识,希望对你有一定的参考价值。
如果搜索结果内容太多,我们只想显示前几个字符, 必须与高亮一起使用
TestFragment.java
package com.rk.lucene.e_fragment; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimplehtmlFormatter; import org.junit.Test; import com.rk.lucene.entity.Article; import com.rk.lucene.utils.LuceneUtils; public class TestFragment { @Test public void testAdd() throws Exception{ List<Article> list = new ArrayList<Article>(); list.add(new Article(1, "疾风之刃", "《疾风之刃》是一款超动作3D动漫风网游。作为新一代动作游戏,《疾风之刃》呈现出极致华丽的动作表演,精心打磨出的打击感震撼人心。")); list.add(new Article(2, "月光疾风", "月光疾风,日本动漫《火影忍者》中的人物,比较个人主义,性格温和。火之国木叶村的特别上忍,中忍考试正赛预选的考官,体质似乎很不好,有着严重的黑眼圈、脸色苍白且经常咳嗽,善用剑术。")); list.add(new Article(3, "疾风航班中文版下载", "《疾风航班》是一款优质的动作模拟游戏。游戏中包括亚欧美洲,乃至飞往太空的5条航线,共计50个循序渐进的关卡,以及具有挑战性的Expert级别评定,每个关卡结束后还可进入商店对主角和飞机进...")); list.add(new Article(4, "八神疾风", "八神疾风(CV:植田佳奈)是日本动漫《魔法少女奈叶A‘s》首次登场的女角色。暗之书事件中心人物,时空管理局魔导师,擅长贝尔卡式广域·远程魔法。")); list.add(new Article(5, "逝去的疾风", "大战中飞得最快的日本飞机,恐怕要数“疾风”战斗机了,它由中岛飞机厂研制生产,制式型号为: 四式单(座)战(斗机),代号キ-84(读作 Ki-84)。")); list.add(new Article(6, "疾风剑豪 亚索", "亚索是一个百折不屈的男人,还是一名身手敏捷的剑客,能够运用风的力量来斩杀敌人。这位曾经春风得意的战士因为诬告而身败名裂,并且被迫卷入了一场令人绝望的生存之...")); list.add(new Article(7, "疾风知劲草", "疾风知劲草,谓在猛烈的大风中,可看出什么样的草是强劲的。比喻意志坚定,经得起考验。出自《东观汉记·王霸传》:“上谓霸曰:‘颍川从我者皆逝,而子独留,始验疾风知劲草。...")); LuceneUtils.addAll(list); } @Test public void testSearch() throws Exception{ List<Article> list = new ArrayList<Article>(); String keyword = "疾风"; QueryParser queryParser = new QueryParser(LuceneUtils.getVersion(),"content", LuceneUtils.getAnalyzer()); Query query = queryParser.parse(keyword); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); TopDocs topDocs = indexSearcher.search(query, 10000); Formatter formatter = new SimpleHTMLFormatter("<font color=‘red‘>", "</font>"); Scorer scorer = new QueryScorer(query); Highlighter titleHighlighter = new Highlighter(formatter, scorer); Highlighter contentHighlighter = new Highlighter(formatter, scorer); Fragmenter titleFragmenter = new SimpleFragmenter(6); Fragmenter contentFragmenter = new SimpleFragmenter(20); titleHighlighter.setTextFragmenter(titleFragmenter); contentHighlighter.setTextFragmenter(contentFragmenter); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docIndex = scoreDoc.doc; Document document = indexSearcher.doc(docIndex); System.out.println("编号为"+document.get("id")+"号的文章得分是" + scoreDoc.score); String titleValue = titleHighlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", document.get("title")); String contentValue = contentHighlighter.getBestFragment(LuceneUtils.getAnalyzer(), "content", document.get("content")); document.getField("title").setValue(titleValue); document.getField("content").setValue(contentValue); Article article = LuceneUtils.document2javabean(document, Article.class); list.add(article); } indexSearcher.close(); for(Article article : list){ System.out.println(article); } } }
关键代码
Formatter formatter = new SimpleHTMLFormatter("<font color=‘red‘>", "</font>"); Scorer scorer = new QueryScorer(query); Highlighter titleHighlighter = new Highlighter(formatter, scorer); Highlighter contentHighlighter = new Highlighter(formatter, scorer); Fragmenter titleFragmenter = new SimpleFragmenter(6); Fragmenter contentFragmenter = new SimpleFragmenter(20); titleHighlighter.setTextFragmenter(titleFragmenter); contentHighlighter.setTextFragmenter(contentFragmenter); String titleValue = titleHighlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", document.get("title")); String contentValue = contentHighlighter.getBestFragment(LuceneUtils.getAnalyzer(), "content", document.get("content")); document.getField("title").setValue(titleValue); document.getField("content").setValue(contentValue);
LuceneUtils.java
package com.rk.lucene.utils; import java.io.File; import java.io.IOException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.List; import org.apache.commons.beanutils.BeanUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.rk.lucene.entity.Page; public class LuceneUtils { private static Directory directory; private static Version version; private static Analyzer analyzer; private static MaxFieldLength maxFieldLength; private static final String LUCENE_DIRECTORY= "D:/rk/indexDB"; static{ try { directory = FSDirectory.open(new File(LUCENE_DIRECTORY)); version = Version.LUCENE_30; analyzer = new StandardAnalyzer(version); maxFieldLength = MaxFieldLength.LIMITED; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } } //不让外部new当前帮助类的对象 private LuceneUtils(){} public static <T> void pagination(Page<T> page,String field,String keyword,Class<T> clazz) throws Exception{ QueryParser queryParser = new QueryParser(getVersion(), field, getAnalyzer()); Query query = queryParser.parse(keyword); IndexSearcher indexSearcher = new IndexSearcher(getDirectory()); TopDocs topDocs = indexSearcher.search(query, 200); int totalHits = topDocs.totalHits; int curPage = page.getCurPage(); int pageSize = page.getPageSize(); int quotient = totalHits / pageSize; int remainder = totalHits % pageSize; int totalPages = remainder==0 ? quotient : quotient+1; int startIndex = (curPage-1) * pageSize; int stopIndex = Math.min(startIndex + pageSize, totalHits); List<T> list = page.getItems(); if(list == null){ list = new ArrayList<T>(); page.setItems(list); } list.clear(); for(int i=startIndex;i<stopIndex;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docIndex = scoreDoc.doc; Document document = indexSearcher.doc(docIndex); T t = document2javabean(document, clazz); list.add(t); } page.setTotalPages(totalPages); page.setTotalItems(totalHits); indexSearcher.close(); } public static <T> void add(T t) throws Exception{ Document document = javabean2document(t); IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } public static <T> void addAll(List<T> list) throws Exception{ IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength()); for(T t : list){ Document doc = javabean2document(t); indexWriter.addDocument(doc); } indexWriter.close(); } public static <T> void update(String field,String value,T t) throws Exception{ Document document = javabean2document(t); IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength()); indexWriter.updateDocument(new Term(field,value), document); indexWriter.close(); } public static <T> void delete(String field,String value) throws Exception{ IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength()); indexWriter.deleteDocuments(new Term(field,value)); indexWriter.close(); } /** * 删除所有记录 */ public static void deleteAll() throws Exception { IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength()); indexWriter.deleteAll(); indexWriter.close(); } /** * 根据关键字进行搜索 */ public static <T> List<T> search(String field,String keyword,int topN,Class<T> clazz) throws Exception{ List<T> list = new ArrayList<T>(); QueryParser queryParser = new QueryParser(getVersion(), field, getAnalyzer()); Query query = queryParser.parse(keyword); IndexSearcher indexSearcher = new IndexSearcher(getDirectory()); TopDocs topDocs = indexSearcher.search(query, topN); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docIndex = scoreDoc.doc; System.out.println("文档索引号" + docIndex + ",文档得分:" + scoreDoc.score); Document document = indexSearcher.doc(docIndex); T entity = (T) document2javabean(document, clazz); list.add(entity); } indexSearcher.close(); return list; } /** * 打印List */ public static <T> void printList(List<T> list){ if(list != null && list.size()>0){ for(T t : list){ System.out.println(t); } } } //将JavaBean转成Document对象 public static Document javabean2document(Object obj) throws Exception{ //创建Document对象 Document document = new Document(); //获取obj引用的对象字节码 Class clazz = obj.getClass(); //通过对象字节码获取私有的属性 java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields(); //迭代 for(java.lang.reflect.Field reflectField : reflectFields){ //反射 reflectField.setAccessible(true); //获取字段名 String name = reflectField.getName(); //获取字段值 String value = reflectField.get(obj).toString(); //加入到Document对象中去,这时javabean的属性与document对象的属性相同 document.add(new Field(name, value, Store.YES, Index.ANALYZED)); } //返回document对象 return document; } //将Document对象转换成JavaBean对象 public static <T> T document2javabean(Document document,Class<T> clazz) throws Exception{ T obj = clazz.newInstance(); java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields(); for(java.lang.reflect.Field reflectField : reflectFields){ reflectField.setAccessible(true); String name = reflectField.getName(); String value = document.get(name); BeanUtils.setProperty(obj, name, value); } return obj; } public static Directory getDirectory() { return directory; } public static void setDirectory(Directory directory) { LuceneUtils.directory = directory; } public static Version getVersion() { return version; } public static void setVersion(Version version) { LuceneUtils.version = version; } public static Analyzer getAnalyzer() { return analyzer; } public static void setAnalyzer(Analyzer analyzer) { LuceneUtils.analyzer = analyzer; } public static MaxFieldLength getMaxFieldLength() { return maxFieldLength; } public static void setMaxFieldLength(MaxFieldLength maxFieldLength) { LuceneUtils.maxFieldLength = maxFieldLength; } }
Article.java
package com.rk.lucene.entity; public class Article { private Integer id; private String title;//标题 private String content;//内容 public Article() { } public Article(Integer id, String title, String content) { this.id = id; this.title = title; this.content = content; } public Integer getId() { return id; } public void setId(Integer id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } @Override public String toString() { return "编号: " + id + "\n标题: " + title + "\n内容: " + content + "\n------------------------------------------------------------------\n"; } }
输出结果:
编号为1号的文章得分是0.32994816 编号为7号的文章得分是0.28870463 编号为4号的文章得分是0.23330858 编号为5号的文章得分是0.23330858 编号为2号的文章得分是0.20414501 编号为3号的文章得分是0.20414501 编号: 1 标题: <font color=‘red‘>疾</font><font color=‘red‘>风</font>之刃 内容: 《<font color=‘red‘>疾</font><font color=‘red‘>风</font>之刃》是一款超动作3D动漫风网游 ------------------------------------------------------------------ 编号: 7 标题: <font color=‘red‘>疾</font><font color=‘red‘>风</font>知劲草 内容: <font color=‘red‘>疾</font><font color=‘red‘>风</font>知劲草,谓在猛烈的大风中,可看出什 ------------------------------------------------------------------ 编号: 4 标题: 八神<font color=‘red‘>疾</font><font color=‘red‘>风</font> 内容: 八神<font color=‘red‘>疾</font><font color=‘red‘>风</font>(CV:植田佳奈)是日本动漫 ------------------------------------------------------------------ 编号: 5 标题: 逝去的<font color=‘red‘>疾</font><font color=‘red‘>风</font> 内容: 大战中飞得最快的日本飞机,恐怕要数“<font color=‘red‘>疾</font> ------------------------------------------------------------------ 编号: 2 标题: 月光<font color=‘red‘>疾</font><font color=‘red‘>风</font> 内容: 月光<font color=‘red‘>疾</font><font color=‘red‘>风</font>,日本动漫《火影忍者》中的人物 ------------------------------------------------------------------ 编号: 3 标题: <font color=‘red‘>疾</font><font color=‘red‘>风</font>航班中 内容: 《<font color=‘red‘>疾</font><font color=‘red‘>风</font>航班》是一款优质的动作模拟游戏 ------------------------------------------------------------------
以上是关于Lucene系列:搜索结果摘要的主要内容,如果未能解决你的问题,请参考以下文章
毕业论文“基于Ajax+Lucene构建搜索引擎的设计与实现”中如何写好摘要
搜索引擎系列五:Lucene索引详解(IndexWriter详解Document详解索引更新)
论文参考基于Ajax+Lucene构建搜索引擎的设计与实现(源代码+论文)免费下载
搜索引擎系列二:Lucene(Lucene介绍Lucene架构Lucene集成)