lucene 索引搜索 BlogIndex.java
Posted 帅的土掉渣
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lucene 索引搜索 BlogIndex.java相关的知识,希望对你有一定的参考价值。
package com.blog.lucene;
import com.blog.entity.Blog; import com.blog.utils.DateUtils; import com.blog.utils.StringUtils; import org.apache.commons.lang.StringEscapeUtils; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.search.highlight.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import java.io.IOException; import java.io.StringReader; import java.nio.file.Paths; import java.util.Date; import java.util.LinkedList; import java.util.List; /** * 使用lucene对博客实现增删改查 */ @SuppressWarnings("all") public class BlogIndex { private Directory dir = null; private String lucenePath = "e://lucene"; /** * 获取luene的写入方法 * @return * @throws IOException */ private IndexWriter getWriter() throws IOException { this.dir = FSDirectory.open(Paths.get(this.lucenePath, new String[0])); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir,iwc); return writer; } /** * 增加索引 */ public void addIndex(Blog blog) throws IOException { IndexWriter writer = getWriter(); Document document = new Document(); document.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES)); document.add(new TextField("title", blog.getTitle(), Field.Store.YES)); document.add(new StringField("releaseDate",DateUtils.formatDate(new Date(),"yyyy-MM-dd"), Field.Store.YES)); document.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES)); document.add(new StringField("keyWord", blog.getKeyWord(), Field.Store.YES)); writer.addDocument(document); writer.close(); } /** * 跟新索引 */ public void updateIndex(Blog blog) throws IOException { IndexWriter writer = getWriter(); Document document = new Document(); document.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES)); document.add(new TextField("title", blog.getTitle(), Field.Store.YES)); document.add(new StringField("releaseDate", DateUtils.formatDate(new Date(),"yyyy-MM-dd"), Field.Store.YES)); document.add(new StringField("content", blog.getContentNoTag(), Field.Store.YES)); document.add(new StringField("keyWord", blog.getKeyWord(), Field.Store.YES)); writer.updateDocument(new Term("id", String.valueOf(blog.getId())),document); writer.close(); } /** * 删除索引 */ public void deleteIndex(String blogId) throws IOException { IndexWriter writer = getWriter(); writer.deleteDocuments(new Term[]{new Term("id", blogId)}); writer.forceMergeDeletes(); writer.commit(); writer.close(); } /** * 搜索索引 */ public List<Blog> searchBlog(String q) throws Exception { List<Blog> blogList = new LinkedList<Blog>(); dir = FSDirectory.open(Paths.get(this.lucenePath, new String[0])); //获取reader IndexReader reader = DirectoryReader.open(this.dir); //获取流 IndexSearcher is = new IndexSearcher(reader); //放入查询条件 BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("title", analyzer); Query query = parser.parse(q); QueryParser parser2 = new QueryParser("content", analyzer); Query query2 = parser2.parse(q); QueryParser parser3 = new QueryParser("keyWord", analyzer); Query query3 = parser3.parse(q); booleanQuery.add(query, BooleanClause.Occur.SHOULD); booleanQuery.add(query2, BooleanClause.Occur.SHOULD); booleanQuery.add(query3, BooleanClause.Occur.SHOULD); //最多返回100条数据 TopDocs hits = is.search(booleanQuery.build(), 100); //高亮搜索字 QueryScorer scorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimplehtmlFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=‘red‘>","</font></b>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer); highlighter.setTextFragmenter(fragmenter); //遍历查询结果,放入blogList for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); Blog blog = new Blog(); blog.setId(Integer.valueOf(Integer.parseInt(doc.get("id")))); blog.setReleaseDateStr(doc.get("releaseDate")); String title = doc.get("title"); String content = StringEscapeUtils.escapeHtml(doc.get("content")); String keyWord = doc.get("keyWord"); if (title != null){ TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title)); String hTitle = highlighter.getBestFragment(tokenStream, title); if (StringUtils.isEmpty(hTitle)){ blog.setTitle(title); }else { blog.setTitle(hTitle); } } if (content != null){ TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); String hContent = highlighter.getBestFragment(tokenStream, content); if (StringUtils.isEmpty(hContent)){ if (content.length()<=200){ blog.setContent(content); }else { blog.setContent(content.substring(0, 200)); } }else { blog.setTitle(hContent); } } if (keyWord != null){ TokenStream tokenStream = analyzer.tokenStream("keyWord", new StringReader(keyWord)); String hKeyWord = highlighter.getBestFragment(tokenStream, keyWord); if (StringUtils.isEmpty(hKeyWord)){ blog.setTitle(keyWord); }else { blog.setTitle(hKeyWord); } } blogList.add(blog); } return blogList; } }
以上是关于lucene 索引搜索 BlogIndex.java的主要内容,如果未能解决你的问题,请参考以下文章
搜索引擎系列五:Lucene索引详解(IndexWriter详解Document详解索引更新)