lucene 索引搜索 BlogIndex.java

Posted 帅的土掉渣

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lucene 索引搜索 BlogIndex.java相关的知识,希望对你有一定的参考价值。

package com.blog.lucene;
import com.blog.entity.Blog;
import com.blog.utils.DateUtils;
import com.blog.utils.StringUtils;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;


/**
 * 使用lucene对博客实现增删改查 
 */
@SuppressWarnings("all")
public class BlogIndex {
    private Directory dir = null;

    private String lucenePath = "e://lucene";

    /**
     * 获取luene的写入方法
     * @return
     * @throws IOException
     */
    private IndexWriter getWriter() throws IOException {
        this.dir = FSDirectory.open(Paths.get(this.lucenePath, new String[0]));
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(dir,iwc);
        return writer;
    }

    /**
     * 增加索引
     */
    public void addIndex(Blog blog) throws IOException {
        IndexWriter writer = getWriter();
        Document document = new Document();
        document.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
        document.add(new TextField("title", blog.getTitle(), Field.Store.YES));
        document.add(new StringField("releaseDate",DateUtils.formatDate(new Date(),"yyyy-MM-dd"), Field.Store.YES));
        document.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES));
        document.add(new StringField("keyWord", blog.getKeyWord(), Field.Store.YES));
        writer.addDocument(document);
        writer.close();
    }

    /**
     * 跟新索引
     */
    public void updateIndex(Blog blog) throws IOException {
        IndexWriter writer = getWriter();
        Document document = new Document();
        document.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
        document.add(new TextField("title", blog.getTitle(), Field.Store.YES));
        document.add(new StringField("releaseDate", DateUtils.formatDate(new Date(),"yyyy-MM-dd"), Field.Store.YES));
        document.add(new StringField("content", blog.getContentNoTag(), Field.Store.YES));
        document.add(new StringField("keyWord", blog.getKeyWord(), Field.Store.YES));
        writer.updateDocument(new Term("id", String.valueOf(blog.getId())),document);
        writer.close();
    }

    /**
     * 删除索引
     */
    public void deleteIndex(String blogId) throws IOException {
        IndexWriter writer = getWriter();
        writer.deleteDocuments(new Term[]{new Term("id", blogId)});
        writer.forceMergeDeletes();
        writer.commit();
        writer.close();
    }

    /**
     * 搜索索引
     */
    public List<Blog> searchBlog(String q) throws Exception {
        List<Blog> blogList = new LinkedList<Blog>();
        dir = FSDirectory.open(Paths.get(this.lucenePath, new String[0]));
        //获取reader
        IndexReader reader = DirectoryReader.open(this.dir);
        //获取流
        IndexSearcher is = new IndexSearcher(reader);
        //放入查询条件
        BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
        QueryParser parser = new QueryParser("title", analyzer);
        Query query = parser.parse(q);
        QueryParser parser2 = new QueryParser("content", analyzer);
        Query query2 = parser2.parse(q);
        QueryParser parser3 = new QueryParser("keyWord", analyzer);
        Query query3 = parser3.parse(q);

        booleanQuery.add(query, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query2, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query3, BooleanClause.Occur.SHOULD);
        //最多返回100条数据
        TopDocs hits = is.search(booleanQuery.build(), 100);

        //高亮搜索字
        QueryScorer scorer = new QueryScorer(query);
        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
        SimplehtmlFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=‘red‘>","</font></b>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
        highlighter.setTextFragmenter(fragmenter);

        //遍历查询结果,放入blogList
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            Blog blog = new Blog();
            blog.setId(Integer.valueOf(Integer.parseInt(doc.get("id"))));
            blog.setReleaseDateStr(doc.get("releaseDate"));
            String title = doc.get("title");
            String content = StringEscapeUtils.escapeHtml(doc.get("content"));
            String keyWord = doc.get("keyWord");


            if (title != null){
                TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
                String hTitle = highlighter.getBestFragment(tokenStream, title);
                if (StringUtils.isEmpty(hTitle)){
                    blog.setTitle(title);
                }else {
                    blog.setTitle(hTitle);
                }
            }
            if (content != null){
                TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
                String hContent = highlighter.getBestFragment(tokenStream, content);
                if (StringUtils.isEmpty(hContent)){
                    if (content.length()<=200){
                        blog.setContent(content);
                    }else {
                        blog.setContent(content.substring(0, 200));
                    }

                }else {
                    blog.setTitle(hContent);
                }
            }
            if (keyWord != null){
                TokenStream tokenStream = analyzer.tokenStream("keyWord", new StringReader(keyWord));
                String hKeyWord = highlighter.getBestFragment(tokenStream, keyWord);
                if (StringUtils.isEmpty(hKeyWord)){
                    blog.setTitle(keyWord);
                }else {
                    blog.setTitle(hKeyWord);
                }
            }

            blogList.add(blog);
        }

        return blogList;
    }




}

 

以上是关于lucene 索引搜索 BlogIndex.java的主要内容,如果未能解决你的问题,请参考以下文章

搜索引擎系列五:Lucene索引详解(IndexWriter详解Document详解索引更新)

搜索引擎为什么这么快?Lucene 倒排索引介绍

lucene学习笔记一:lucene是什么实现步骤以及索引的创建查询修改删除

Lucene

Lucene搜索流程(上)

使用Lucene.Net做一个简单的搜索引擎-全文索引