Lucene参与项目持久层中对于索引库的增删改查

Posted 2022-11-24 Frank Q

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Lucene参与项目持久层中对于索引库的增删改查相关的知识，希望对你有一定的参考价值。

本文主要介绍的是Lucene在参与项目中持久层的时候对于索引库增删改查的详细使用；（为了更好地使用Lucene，本文中使用的版本是Lucene4.04，使用的分词器是IKAnalyzer2012FF）

1、Lucene实现增删改查准备工作
2、Lucene持久层详细实现

1、Lucene实现增删改查准备工作

第一步：创建Java工程，也可以创建pom.xml工程，需要五个jar包：
common-io.jar
IKAnalyzer2012FF_u1.jar
lucene-analyzers-common-4.4.0.jar
lucene-core-4.4.0.jar
lucene-queryparser-4.4.0.jar
第二步：我们需要一个JavaBean对象用于存储网上新闻或者文章；网上的新闻或者文章主要是有文章题目；文章作者；文章目录；文章链接四个方面组成所以，所以创建Article文章Bean类如下：

Article.java

/**
 * JAVABEAN用于方便存储抓取的文章索引库中的单个对象
 */
public class Article 

    private int id;

    private String title; 

    private String author;

    private String content;

    private String link;

    public int getId() 
        return id;
    
    public void setId(int id) 
        this.id = id;
    

    public String getTitle() 
        return title;
    
    public void setTitle(String title) 
        this.title = title;
    

    public String getAuthor() 
        return author;
    
    public void setAuthor(String author) 
        this.author = author;
    

    public String getContent() 
        return content;
    
    public void setContent(String content) 
        this.content = content;
    

    public String getLink() 
        return link;
    
    public void setLink(String link) 
        this.link = link;
    

    @Override
    public String toString() 
        return "Article [id=" + id + ", title=" + title + ", author=" + author
                + ", content=" + content + ", link=" + link + "]";

第三步：创建Lucene工具类（用于准备Lucene中最重要的indexWriter和indexReader两个对象）：

LuceneUtils.java

public class LuceneUtils 

    private static Directory directory;

    private static IndexWriterConfig indexWriterConfig;

    private static Version matchVersion = Version.LUCENE_44;

    private static Analyzer analyzer = new IKAnalyzer();

    static 

        try 

            directory = FSDirectory.open(new File(Contants.INDEXURL));

            indexWriterConfig = new IndexWriterConfig(matchVersion, analyzer);

         catch (IOException e) 
            e.printStackTrace();
        

    

    /**
     * 获取返回用于操作索引的对象
     * @return
     * @throws IOException
     */
    public static IndexWriter getIndexWriter() throws IOException 

        IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);

        return indexWriter;
    

    /**
     * 获取用于查询索引库的对象
     * @return
     * @throws Exception
     */
    public static IndexSearcher getIndexSearcher() throws Exception 

        IndexReader indexReader = DirectoryReader.open(directory);

        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        return indexSearcher;
    

    // 获取索引库地址
    public static Directory getDirectory() 
        return directory;
    

    // 获取当前的版本
    public static Version getMatchVersion() 
        return matchVersion;
    

    // 获取分词分析器
    public static Analyzer getAnalyzer() 
        return analyzer;

Contants.java

public interface Contants 
    // 存储地址
    public static final String INDEXURL = "index/news";

第四步：创建Article向Document转化的工具类：

ArticleUtils.java

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;

import com.lucene.bean.Article;

/**
 * article 的转化类
 */
public class ArticleUtils 

    /**
     * 将article转化为document
     * @param article
     * @return
     */
    public static Document articleDocument(Article article) 

        Document document = new Document();

        IntField IDField = new IntField("id", article.getId(), Store.YES);

        StringField titleField = new StringField("title", article.getTitle(), Store.YES);

        TextField contentField = new TextField("content", article.getContent(), Store.YES);

        StringField authorField= new StringField("author", article.getAuthor(), Store.YES);

        StringField urlField   = new StringField("link", article.getLink(), Store.YES);

        document.add(IDField);
        document.add(titleField);
        document.add(contentField);
        document.add(authorField);
        document.add(urlField);

        return document;

2、Lucene持久层详细实现

LuceneDao.java

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;

import com.lucene.bean.Article;
import com.lucene.utils.ArticleUtils;
import com.lucene.utils.LuceneUtils;

/**
 * Lucene操作索引库（Dao层操作）
 */
public class LuceneDao 
    /**
     * 增删改索引都通过indexWriter完成
     * @throws IOException 
     */
    /**
     * 添加索引库
     * @param article
     * @throws IOException
     */
    @Test
    public void addIndex(Article article) throws IOException 

        IndexWriter indexWriter = LuceneUtils.getIndexWriter();

        Document document = ArticleUtils.articleDocument(article);

        indexWriter.addDocument(document);

        indexWriter.close();
    

    /**
     * 根据字段删除索引，删除对应的值
     * @param fieldName
     * @param fieldValue
     * @throws Exception
     */
    public void delIndex(String fieldName, String fieldValue) throws Exception 
        IndexWriter indexWriter = LuceneUtils.getIndexWriter();

        Term term = new Term(fieldName, fieldValue);

        indexWriter.deleteDocuments(term);
        indexWriter.commit();
        indexWriter.close();
    

    /**
     * 更新索引库中的内容
     * @param fieldName
     * @param fieldValue
     * @param article
     * @throws IOException
     */
    public void updateIndex(String fieldName, String fieldValue,Article article) throws IOException 
        IndexWriter indexWriter = LuceneUtils.getIndexWriter();

        Term term = new Term(fieldName, fieldValue);

        Document document = ArticleUtils.articleDocument(article);

        indexWriter.updateDocument(term, document);

        indexWriter.commit();

        indexWriter.close();
    

    /**
     * 
     * 分页数据
     * 
     * 显示数据进行分页 0 , 10
     * 
     * 显示数据进行分页11 , 20 
     * 
     * 在索引库中根据关键字查找
     * @param keywords
     * @return
     * @throws Exception
     */
    public List<Article> findIndex(String keywords, int start, int row) throws Exception 

        IndexSearcher indexSearcher = LuceneUtils.getIndexSearcher();

        // 需要根据那几个字段进行检索
        String fields[] = "title","content";
//      String fields[] = "author";

        QueryParser queryParser = new MultiFieldQueryParser(LuceneUtils.getMatchVersion(), fields, LuceneUtils.getAnalyzer());

        // 不同的规则构造不同的子类
        // title：keywords ， content：keywords
        Query query = queryParser.parse(keywords);

        TopDocs topDocs = indexSearcher.search(query, start+row);

        System.out.println("总记录数====total===="+topDocs.totalHits);

        ScoreDoc scoreDocs[] = topDocs.scoreDocs;

        Article article = null;

        List<Article> articlelist = new ArrayList<Article>();

        int endResult = Math.min(scoreDocs.length, start+row);

        for (int i = start; i < endResult; i++) 
            int docID = scoreDocs[i].doc;
            article = new Article();
            Document document = indexSearcher.doc(docID);
            article.setId(Integer.parseInt(document.get("id")));
            article.setTitle(document.get("title"));
            article.setContent(document.get("content"));
            article.setLink(document.get("link"));
            article.setAuthor(document.get("author"));

            articlelist.add(article);
        

        return articlelist;

以上是关于Lucene参与项目持久层中对于索引库的增删改查的主要内容，如果未能解决你的问题，请参考以下文章