Lucene

Posted 2021-01-15 laodang

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了Lucene相关的知识，希望对你有一定的参考价值。

核心示例代码：

 1 package xyz.zerone.lucene.pojo;
 2 
 3 public class Book {
 4     private Integer id;
 5     private String name;
 6     private Float price;
 7     private String pic;
 8     private String description;
 9     public Integer getId() {
10         return id;
11     }
12     public void setId(Integer id) {
13         this.id = id;
14     }
15     public String getName() {
16         return name;
17     }
18     public void setName(String name) {
19         this.name = name;
20     }
21     public Float getPrice() {
22         return price;
23     }
24     public void setPrice(Float price) {
25         this.price = price;
26     }
27     public String getPic() {
28         return pic;
29     }
30     public void setPic(String pic) {
31         this.pic = pic;
32     }
33     public String getDescription() {
34         return description;
35     }
36     public void setDescription(String description) {
37         this.description = description;
38     }
39     @Override
40     public String toString() {
41         return "Book [id=" + id + ", name=" + name + ", price=" + price + ", pic=" + pic + ", description="
42                 + description + "]";
43     }
44     
45 }

 1 package xyz.zerone.lucene.dao;
 2 
 3 import java.sql.Connection;
 4 import java.sql.DriverManager;
 5 import java.sql.ResultSet;
 6 import java.util.ArrayList;
 7 import java.util.List;
 8 import java.sql.PreparedStatement;
 9 
10 
11 import xyz.zerone.lucene.pojo.Book;
12 
13 public class BookDaoImp implements BookDao {
14     private static String sql = "select * from book";
15 
16     @Override
17     public List<Book> findBookAll() throws Exception {
18         Connection connection = null;
19         PreparedStatement preparedStatement = null;
20         ResultSet resultSet = null;
21         List<Book> list = new ArrayList<Book>();
22         try {
23             Class.forName("com.mysql.jdbc.Driver");
24             connection = DriverManager.getConnection("jdbc:mysql://192.168.3.8:3306/lucenedb", "root", "123456");
25             preparedStatement = connection.prepareStatement(sql);
26             ResultSet rs = preparedStatement.executeQuery();
27             while (rs.next()) {
28                 Book book = new Book();
29                 book.setId(rs.getInt("id"));
30                 book.setName(rs.getString("name"));
31                 book.setPrice(rs.getFloat("price"));
32                 book.setPic(rs.getString("pic"));
33                 book.setDescription(rs.getString("description"));
34                 list.add(book);
35             }
36         } catch (Exception e) {
37             e.printStackTrace();
38         }
39         return list;
40     }
41 
42 }

  1 package xyz.zerone.lucene.first;
  2 
  3 import java.io.File;
  4 import java.io.IOException;
  5 import java.util.ArrayList;
  6 import java.util.List;
  7 
  8 import org.apache.lucene.analysis.Analyzer;
  9 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 10 import org.apache.lucene.document.Document;
 11 import org.apache.lucene.document.Field.Store;
 12 import org.apache.lucene.index.IndexReader;
 13 import org.apache.lucene.index.IndexWriter;
 14 import org.apache.lucene.index.IndexWriterConfig;
 15 import org.apache.lucene.queryparser.classic.ParseException;
 16 import org.apache.lucene.queryparser.classic.QueryParser;
 17 import org.apache.lucene.search.IndexSearcher;
 18 import org.apache.lucene.search.Query;
 19 import org.apache.lucene.search.ScoreDoc;
 20 import org.apache.lucene.search.TopDocs;
 21 import org.apache.lucene.document.FloatField;
 22 import org.apache.lucene.document.StoredField;
 23 import org.apache.lucene.document.StringField;
 24 import org.apache.lucene.document.TextField;
 25 import org.apache.lucene.store.Directory;
 26 import org.apache.lucene.store.FSDirectory;
 27 import org.apache.lucene.util.Version;
 28 import org.apache.lucene.index.Term;
 29 import org.junit.Test;
 30 
 31 import xyz.zerone.lucene.dao.BookDaoImp;
 32 import xyz.zerone.lucene.pojo.Book;
 33 
 34 public class LuceneCore {
 35     // 创建索引库生成到指定目录中
 36     @Test
 37     public void createIndexFromDb() throws Exception {
 38         // 定义List存储document
 39         List<Document> docs = new ArrayList<Document>();
 40         // 从数据库获取book表的集合数据
 41         List<Book> bookList = new BookDaoImp().findBookAll();
 42         for (Book book : bookList) {
 43             // 创建Document
 44             Document doc = new Document();
 45             // 创建Field域
 46             // -创建book表字段id的field域
 47             // --参数：域名、域中的value值、是否存储到文件系统
 48             /**
 49              * 根据实际的需求： 这里的Field属性设置可以有三种（是否分词、是否索引、是否存储）； 详见createAutoIndexDate();
 50              */
 51             TextField id = new TextField("id", book.getId().toString(), Store.YES);
 52             TextField name = new TextField("name", book.getName(), Store.YES);
 53             FloatField price = new FloatField("price", book.getPrice(), Store.YES);
 54             TextField pic = new TextField("pic", book.getPic(), Store.YES);
 55             TextField description = new TextField("description", book.getDescription(), Store.YES);
 56             // 将field域加入到documet中
 57             doc.add(id);
 58             doc.add(name);
 59             doc.add(price);
 60             doc.add(pic);
 61             doc.add(description);
 62             // 将document加入集合
 63             docs.add(doc);
 64         }
 65         // 创建一个标准的分词器
 66         Analyzer analyzer = new StandardAnalyzer();
 67         // 创建索引目录的流对象，指定索引库存放位置
 68         Directory d = FSDirectory.open(new File("C:\Users\dangxiaopeng\Desktop\LuceneIndexData"));
 69         // IndexWriter配置对象
 70         // 参数：lucene版本、分词器
 71         IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
 72         // 创建索引操作对象，提供了很多方法操作索引文件（添加、修改、删除）
 73         IndexWriter indexWriter = new IndexWriter(d, conf);
 74         // 通过indexWriter创建索引
 75         for (Document doc : docs) {
 76             indexWriter.addDocument(doc);
 77         }
 78         indexWriter.commit();
 79         indexWriter.close();
 80     }
 81 
 82     // 根据实际需求创建索引库生成到指定目录中
 83     @Test
 84     public void createAutoIndexFromDb() throws Exception {
 85         // 定义List存储document
 86         List<Document> docs = new ArrayList<Document>();
 87         // 从数据库获取book表的集合数据
 88         List<Book> bookList = new BookDaoImp().findBookAll();
 89         for (Book book : bookList) {
 90             // 创建Document
 91             Document doc = new Document();
 92             // 创建Field域
 93             // -创建book表字段id的field域
 94             // --参数：域名、域中的value值、是否存储到文件系统
 95             // 图书id，不要分词，要索引，要存储
 96             StringField id = new StringField("id", book.getId().toString(), Store.YES);
 97             // 图书名称，要分词，要索引，要存储
 98             TextField name = new TextField("name", book.getName(), Store.YES);
 99             // 图书价格，要分词（Lucene需要的对价格数字特殊处理分词，以便处理范围查询），要索引，要存储
100             FloatField price = new FloatField("price", book.getPrice(), Store.YES);
101             // 图书图片，不要分词，不要索引，要存储
102             StoredField pic = new StoredField("pic", book.getPic());
103             // 图书描述，要分词，要索引，不要存储
104             TextField description = new TextField("description", book.getDescription(), Store.YES);
105             // 将field域加入到documet中
106             doc.add(id);
107             doc.add(name);
108             doc.add(price);
109             doc.add(pic);
110             doc.add(description);
111             // 将document加入集合
112             docs.add(doc);
113         }
114         // 创建一个标准的分词器
115         Analyzer analyzer = new StandardAnalyzer();
116         // 创建索引目录的流对象，指定索引库存放位置
117         Directory d = FSDirectory.open(new File("C:\Users\dangxiaopeng\Desktop\LuceneIndexData"));
118         // IndexWriter配置对象
119         // 参数：lucene版本、分词器
120         IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
121         // 创建索引操作对象，提供了很多方法操作索引文件（添加、修改、删除）
122         IndexWriter indexWriter = new IndexWriter(d, conf);
123         // 通过indexWriter创建索引
124         for (Document doc : docs) {
125             indexWriter.addDocument(doc);
126         }
127         indexWriter.commit();
128         indexWriter.close();
129     }
130 
131     // 删除索引
132     @Test
133     public void deleteIndex() throws IOException, ParseException {
134 
135         // 调用indexWriter中删除索引方法
136 
137         // 创建分词器
138         Analyzer analyzer = new StandardAnalyzer();
139 
140         // 创建索引目录 的流对象,指定索引目录 的位置
141         Directory d = FSDirectory.open(new File("C:\Users\dangxiaopeng\Desktop\LuceneIndexData"));
142 
143         // IndexWriter配置对象
144         // 参数：lucene的版本，分词器
145         IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
146 
147         // 创建indexWriter
148         IndexWriter indexWriter = new IndexWriter(d, conf);
149 
150         // 删除全部索引
151         // indexWriter.deleteAll();
152 
153         // 查询分析器
154         // 第一个参数：指定默认搜索的域,第二个：分词器
155         QueryParser queryParser = new QueryParser("description", analyzer);
156 
157         // 创建查询对象
158         Query query = queryParser.parse("description:java");
159         // 删除符合条件的索引，删除符合query查询的所有document
160         indexWriter.deleteDocuments(query);
161         // 提交
162         indexWriter.commit();
163         // 关闭资源
164         indexWriter.close();
165 
166     }
167 
168     // 更新索引
169     @Test
170     public void updateIndex() throws IOException {
171         // 调用indexWriter中更新索引方法
172         // 创建分词器
173         Analyzer analyzer = new StandardAnalyzer();
174 
175         // 创建索引目录 的流对象,指定索引目录 的位置
176         Directory d = FSDirectory.open(new File("C:\Users\dangxiaopeng\Desktop\LuceneIndexData"));
177 
178         // IndexWriter配置对象
179         // 参数：lucene的版本，分词器
180         IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
181 
182         // 创建indexWriter
183         IndexWriter indexWriter = new IndexWriter(d, conf);
184 
185         // 创建一个term作为查询条件
186         // 参数：field域名称，值
187         Term term = new Term("id", "1");
188 
189         // 创建document，新的document，替换id等的1的document
190         Document doc = new Document();
191         // 图书id，不要分词，要索引、要存储
192         StringField id = new StringField("id", "1".toString(), Store.YES);
193 
194         // 图书名称：要分词，要索引，要存储
195         TextField name = new TextField("name", "java编程思想第三版", Store.YES);
196 
197         doc.add(id);
198         doc.add(name);
199 
200         // 更新思路：根据term的信息查询document，找到之后删除document，添加doc中的添加到索引库
201         // 总之：先查询、再删除、再添加，建议根据主键field查询document。
202         indexWriter.updateDocument(term, doc);
203         // 提交
204         indexWriter.commit();
205         // 关闭资源
206         indexWriter.close();
207     }
208 
209     // 搜索索引
210     @Test
211     public void searchIndex() throws ParseException, IOException {
212         // 分词，搜索过程使用的分词器要和创建索引时使用的分词器一致
213         Analyzer analyzer = new StandardAnalyzer();
214         // 查询分析器
215         // 参数：指定默认搜索field域、分词器
216         QueryParser queryParser = new QueryParser("description", analyzer);
217         // 创建查询对象(这里是使用了查询语法指定了description域如果不指定按照上面默认的field域)
218         Query query = queryParser.parse("description:java");
219         // 创建索引目录的流对象，指定索引目录位置
220         Directory d = FSDirectory.open(new File("C:\Users\dangxiaopeng\Desktop\LuceneIndexData"));
221         // 索引读取对象
222         // 指定读取索引的目录
223         IndexReader indexReader = IndexReader.open(d);
224         // 索引搜索对象
225         IndexSearcher indexSearcher = new IndexSearcher(indexReader);
226         // 执行搜索
227         // 参数：query查询对象、取出匹配度高的前n条记录
228         TopDocs topDocs = indexSearcher.search(query, 100);
229         // 取出匹配上的文档
230         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
231         for (ScoreDoc scoreDoc : scoreDocs) {
232             // document的id
233             int docId = scoreDoc.doc;
234             // 从indexReader根据docId获取document
235             Document doc = indexReader.document(docId);
236             // 取出doc中field域的内容
237             // 参数指定field域名
238             String id = doc.get("id");
239             String name = doc.get("name");
240             Float price = Float.parseFloat(doc.get("price"));
241             String pic = doc.get("pic");
242 
243             System.out.println(id + "-" + name + "-" + price + "-" + pic);
244         }
245         // 关闭资源
246         indexReader.close();
247     }
248 }

Lucene深入（Luke&&中文分词器&&查询语句见相关文档）

以上是关于Lucene的主要内容，如果未能解决你的问题，请参考以下文章