lucene 5.2.0学习笔记
Posted duenboa
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lucene 5.2.0学习笔记相关的知识,希望对你有一定的参考价值。
package com.bc.cas.manager;
import com.bc.cas.dao.BookDao;
import com.bc.cas.model.entity.Book;
import com.google.common.base.Objects;
import com.google.common.collect.Lists;
import org.apache.log4j.helpers.LogLog;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;
import org.springframework.util.CollectionUtils;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* @Copyright 2012-2016 duenboa 版权所有
* @Author Created by Administrator on 2016/11/29.
* @Version V 1.0.0
* @Desc 索引管理器
*/
@Repository
public class IndexManager {
@Autowired
private BookDao bookDao;
static FSDirectory dir;
static {
try {
dir = FSDirectory.open(Paths.get("d:/lucene/lucene052_index05"));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建索引 -common
*/
@Test
public void createIndex(List<Document> docs) throws IOException {
if (docs == null) {
return;
}
// 自定义停用词
List<String> strings = Arrays.asList("的", "在", "了", "呢", ",", "0", ":", ",", "是", "这", "那", "么");
CharArraySet unUsefulWorlds = new CharArraySet(strings, true);
// 加入系统默认停用词
Iterator<Object> itor = SmartChineseAnalyzer.getDefaultStopSet().iterator();
while (itor.hasNext()) unUsefulWorlds.add(itor.next());
//指定了停用词的分词器
Analyzer analyzer = new SmartChineseAnalyzer(unUsefulWorlds);
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, config);
writer.addDocuments(docs);
writer.commit();
writer.close();
}
/**
* 执行查询 -common
*
* @param query
* @throws IOException
* @throws ParseException
*/
private static void doQuery(Query query) throws IOException, ParseException {
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
//执行query,获取指定条数的顶行记录
TopDocs topDocs = searcher.search(query, 10);
System.out.println("搜索出的总记录数为: " + topDocs.totalHits);
//评分文档集合
ScoreDoc[] docs = topDocs.scoreDocs;
for (ScoreDoc doc : docs) {
//获取文档id
int id = doc.doc;
//根据文档id查询文档对象
Document document = searcher.doc(id);
//打印信息
System.out.println(
Objects.toStringHelper("docuemnt")
.add("文档id", document.get("id"))
.add("文档名称", document.get("name"))
.add("文档图片", document.get("pic"))
.add("文档描述", document.get("description"))
.toString());
}
reader.close();
}
/**
* 测试新建索引
*/
public void testCreateIndex() {
List<Book> bookList = bookDao.findAll();
if (CollectionUtils.isEmpty(bookList)) return;
List<Document> docList = Lists.newArrayList();
Document doc;
for (Book book : bookList) {
doc = new Document();
doc.add(new StoredField("id", book.getId()));
doc.add(new StringField("name", book.getName(), Field.Store.YES));
doc.add(new TextField("pic", book.getPic(), Field.Store.YES));
doc.add(new TextField("description", book.getDescription(), Field.Store.YES));
docList.add(doc);
}
try {
createIndex(docList);
} catch (IOException e) {
LogLog.error(e.getMessage(), e);
}
}
/**
*
* 查询
*
* @throws IOException
*/
@Test
public static void testQuery() throws IOException, ParseException {
QueryParser parser = new QueryParser("description", new SmartChineseAnalyzer());
Query query = parser.parse("description:java AND lucene");
doQuery(query);
}
}
Field类的子类和说明, 以及用法:
StoredField(FieldName, FieldValue) | 重载,支持多种类型 | 不分词 | 不索引 | 示例: |
StringField(FieldName, String FieldValue, Store.YES) | 字符串类型数据 | 不分词 | 索引 | 示例: 订单号, id, 手机号等 |
LongField(FieldName, Long FieldValue, Store.YES) | Long型数据 | 分词 | 索引 | 示例: 价格 |
TextField(FieldName, FieldValue, Store.YES) 或 TextField(FieldName, Reader) | 字符串或者流 | 分词 | 索引 | |
以上是关于lucene 5.2.0学习笔记的主要内容,如果未能解决你的问题,请参考以下文章