lucene 学习总结

Posted zhuanzhuan

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lucene 学习总结相关的知识,希望对你有一定的参考价值。

最近在研究全文检索,全文检索的高版本的api 不兼容低版本的api,所以用的每个版本时写法都不一样,用了两三个晚上,至少现在把lucene 的原理弄明白;

先分享两个很不错的链接:

大家可以从上面两个博客中学习

下面这是个数据库索引的demo,用的jar 包是 2.4.0

package com.dy.test;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

public class DBIndexer {
    
        private String driver="com.microsoft.sqlserver.jdbc.SQLServerDriver";
        private String url="jdbc:sqlserver://localhost:1433;databaseName=yun4jbook;";
        private String user="sa";
        private String pass="dyz";
        private Connection conn=null;
        private Statement st=null;
        private ResultSet rs=null;
        private String indexUrl="E:\\workspace\\mylunceDemo\\myIndex";

        private ResultSet getResult() throws Exception{        
            try {
                Class.forName(driver);
                conn = DriverManager.getConnection(url, user, pass);
                String sql = "select * from sysAdmin";
                st = conn.createStatement();
                rs = st.executeQuery(sql);
//                while (rs.next()) {
//                    System.out.print(rs.getInt("id") + "    ");
//                    System.out.print(rs.getString("name") + "    ");
//                    System.out.print(rs.getString("pass") + "    ");
//                    System.out.print(rs.getDate("updatetime") + "    ");
//                }
            }
            catch (Exception e) {
                e.printStackTrace();
            }
            return rs;
        }

        private void executeIndex(ResultSet rs, IndexWriter indexWriter) throws Exception{
            int i=0;
            while(rs.next()){
                int id = rs.getInt("adminId");
                String name = rs.getString("adminName");
                Document doc = new Document();

                Field idField=new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
                Field nameField=new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES);

                doc.add(idField);
                doc.add(nameField);

                indexWriter.addDocument(doc);

                i++;
            }
            this.close();
            System.out.println("共处理记录:"+i);
        }

        private void close() throws Exception{
            this.rs.close();
            this.st.close();
            this.conn.close();
        }

        public void createIndex() throws Exception{
            // get data ResultSet
            ResultSet rs=this.getResult();

//            Analyzer chineseAnalyzer = new ChineseAnalyzer();
            Analyzer chineseAnalyzer = new StandardAnalyzer();
            IndexWriter indexWriter = new IndexWriter(this.indexUrl, chineseAnalyzer, true);
            indexWriter.setMergeFactor(100);
            indexWriter.setMaxBufferedDocs(100);

            java.util.Date startDate=new java.util.Date();

            System.out.println("开始索引时间:" + startDate);

            executeIndex(rs, indexWriter);

            indexWriter.optimize();

            indexWriter.close();

            java.util.Date endDate=new java.util.Date();

            System.out.println("索引结束时间:" + endDate);
            System.out.println("共花费:" + (endDate.getTime()-startDate.getTime()) + "ms");
        }
        
        public static void main (String args[]) throws Exception {
            DBIndexer oIndexer = new DBIndexer();
            oIndexer.createIndex();
        }

}

 

 

package com.dy.test;

import java.util.Iterator;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

public class DBSearcher {
    private static final String indexUrl="E:\\workspace\\mylunceDemo\\myIndex";

    public static void main(String[] args) throws Exception {
           IndexSearcher searcher = new IndexSearcher(indexUrl);
           Hits hits = null;
           Query query = null;
           QueryParser qp = new QueryParser("name", new StandardAnalyzer());
           
           String searchText = "中国";
           
           query = qp.parse(searchText);
           hits = searcher.search(query);
           System.out.println("Search "+ searchText + " total " + hits.length() + " result ! ");
           for (Iterator it = hits.iterator(); it.hasNext(); ) {
               Hit hit = (Hit) it.next();
              // System.out.println(hit.getDocument().getField("id").stringValue());
               System.out.println(hit.getDocument().getField("name"));
           }
       }
    }

 

以上是关于lucene 学习总结的主要内容,如果未能解决你的问题,请参考以下文章

Lucene的学习与总结

Javalucene4.0学习心得

Lucene的学习与总结

Lucene学习总结之二:Lucene的总体架构

Lucene学习总结之二:Lucene的总体架构

Lucene学习总结