Lucene查询并高亮显示

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Lucene查询并高亮显示相关的知识,希望对你有一定的参考价值。

1.导入jar包

技术分享


2.创建实体Bean

package com.zhishang.lucene;

/**
 * Created by Administrator on 2017/7/8.
 */
public class htmlBean {
    private String title;
    private String content;
    private String url;

    public void setTitle(String title) {
        this.title = title;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    public String getTitle() {
        return title;
    }

    public String getContent() {
        return content;
    }

    public String getUrl() {
        return url;
    }
}


3.创建工具Bean

package com.zhishang.lucene;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import org.junit.Test;

import java.io.File;
import java.io.IOException;

/**
 * Created by Administrator on 2017/7/8.
 */
public class HtmlBeanUtil {


    public static HtmlBean parseHtml(File file){
        try {
            Source sc = new Source(file);
            Element element = sc.getFirstElement(HTMLElementName.TITLE);
            if (element == null || element.getTextExtractor() == null){
                return null;
            }

            HtmlBean htmlBean = new HtmlBean();
            htmlBean.setTitle(element.getTextExtractor().toString());
            htmlBean.setContent(sc.getTextExtractor().toString());
            htmlBean.setUrl(file.getAbsolutePath());

            return htmlBean;
        } catch (IOException e) {
            e.printStackTrace();
        }

        return null;
    }
}


4.创建操作Bean

package com.zhishang.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
 * Created by Administrator on 2017/7/7.
 */
public class SearchIndex {

    public List<HtmlBean> search(String keyword){
        Directory dir = null;
        try {
            dir = FSDirectory.open(new File(CreateIndex.indexDir));
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new IKAnalyzer();
            MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(Version.LUCENE_4_9,new String[]{"title","content"},analyzer);
            Query query = multiFieldQueryParser.parse(keyword);
            TopDocs search = searcher.search(query,10);
            ScoreDoc[] scoreDocs = search.scoreDocs;
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=‘red‘>","</font>");
            QueryScorer queryScorer = new QueryScorer(query,"title");
            Highlighter highlighter = new Highlighter(simpleHTMLFormatter,queryScorer);
            List<HtmlBean> htmlBeanList = new ArrayList<HtmlBean>();
            for (ScoreDoc scoreDoc:scoreDocs){
                Document document = reader.document(scoreDoc.doc);
                String title = highlighter.getBestFragment(new IKAnalyzer(),"title",document.get("title"));
                String content = highlighter.getBestFragments(new IKAnalyzer().tokenStream("content",document.get("content")),document.get("content"),3,"...");
                String url = document.get("url");
                HtmlBean htmlBean = new HtmlBean();
                htmlBean.setTitle(title);
                htmlBean.setContent(content);
                htmlBean.setUrl(url);
                htmlBeanList.add(htmlBean);
            }

            return htmlBeanList;
//            System.out.println(search.totalHits);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }

        return null;
    }
}


4.创建测试Bean

package com.zhishang.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import java.io.File;
import java.util.List;

/**
 * Created by Administrator on 2017/7/8.
 */
public class LuceneBean {

    @Test
    public void search(){
        SearchIndex searchIndex = new SearchIndex();
        List<HtmlBean> htmlBeanList = searchIndex.search("java");
        for (HtmlBean bean:htmlBeanList){
            System.out.println(bean.getTitle());
            System.out.println(bean.getContent());
            System.out.println(bean.getUrl());
            System.out.println("-----------------------------------------------------");
        }
    }

    /*
    创建索引
     */
    @Test
    public void createIndex(){
        File file = new File(CreateIndex.indexDir);
        if (file.exists()){
            file.delete();
            file.mkdirs();
        }
        CreateIndex createIndex = new CreateIndex();
        createIndex.createIndex();
    }
}


本文出自 “素颜” 博客,请务必保留此出处http://suyanzhu.blog.51cto.com/8050189/1945606

以上是关于Lucene查询并高亮显示的主要内容,如果未能解决你的问题,请参考以下文章

Lucene —— 搜索结果高亮显示

Lucene三个高亮显示模块的简单示例-Highlighter

如何使用 Lucene 做网站高亮搜索功能?

3.6 Lucene基本检索+关键词高亮+分页

Lucene系列:搜索关键字高亮

Elasticsearchelasticsearch 查询 高亮