Lucene

Posted whtt

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Lucene相关的知识,希望对你有一定的参考价值。

分析器

public static void main(String[] args) throws IOException {
  //1.创建一个Analyzer对象
  Analyzer analyzer=new StandardAnalyzer();
  //2.调用Analyzer对象的tokenStream方法获取TokenStream对象,此对象包含了所有的分词结果
  TokenStream tokenStream = analyzer.tokenStream("", "The spring Framework provides a comprehensive programming and configuration model.");
  //3.给tokenStream对象设置一个指针,指针在哪当前就在哪一个分词上
  CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
  //4.调用tokenStream对象的reset方法,重置指针,不调用会报错
  tokenStream.reset();
  //5.利用while循环,拿到分词列表的结果  incrementToken方法返回值如果为false代表读取完毕  true代表没有读取完毕
  while (tokenStream.incrementToken()){
    System.out.println(charTermAttribute.toString());
  }
  //6.关闭
  tokenStream.close();
}

中文分析器

  <!-- https://mvnrepository.com/artifact/com.jianggujin/IKAnalyzer-lucene -->
    <dependency>
    <groupId>com.jianggujin</groupId>
    <artifactId>IKAnalyzer-lucene</artifactId>
    <version>8.0.0</version>
    </dependency>public static void main(String[] args) throws IOException {
  //1.创建一个Analyzer对象
  Analyzer analyzer=new IKAnalyzer();
  //2.调用Analyzer对象的tokenStream方法获取TokenStream对象,此对象包含了所有的分词结果
  TokenStream tokenStream = analyzer.tokenStream("", "五道口课工场梅川酷子梅川酷子梅川酷子呵呵");
  //3.给tokenStream对象设置一个指针,指针在哪当前就在哪一个分词上
  CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
  //4.调用tokenStream对象的reset方法,重置指针,不调用会报错
  tokenStream.reset();
  //5.利用while循环,拿到分词列表的结果  incrementToken方法返回值如果为false代表读取完毕  true代表没有读取完毕
  while (tokenStream.incrementToken()){
    System.out.println(charTermAttribute.toString());
  }
  //6.关闭
  tokenStream.close();
}
  
  IndexWriter indexWriter=new IndexWriter(directory,new IndexWriterConfig(new IKAnalyzer()));

索引添加

@Test
public void createDocument() throws IOException {
  //创建IndexWriter对象   参数一:索引库位置   参数二:指定配置
  IndexWriter indexWriter=new IndexWriter(FSDirectory.open(new File("D:\Y3\0225\LuncenIndex").toPath()),
    new IndexWriterConfig(new IKAnalyzer()));
  //创建一个文档对象
  Document document=new Document();
  document.add(new TextField("fieldName","hehe.txt", Field.Store.YES));
  document.add(new StoredField("fieldPath","c://hehe.txt"));
  document.add(new LongPoint("fieldSize",123));
  document.add(new StoredField("fieldSize",123));
  document.add(new TextField("fieldContent","爱上风神股份代号你哦按名单来看积分不到的解决办法梅川酷子是一个开源基于JAVA语言的 .", Field.Store.YES));
  //创建索引,将文档添加到索引库当中
  indexWriter.addDocument(document);
  //关闭
  indexWriter.close();
}

索引修改:原理-先删除在添加

@Test
public void updateDocument() throws IOException {
  //创建IndexWriter对象   参数一:索引库位置   参数二:指定配置
  IndexWriter indexWriter=new IndexWriter(FSDirectory.open(new File("D:\Y3\0225\LuncenIndex").toPath()),
    new IndexWriterConfig(new IKAnalyzer()));
  //创建文档
  Document document=new Document();
  document.add(new TextField("fieldName","new.txt", Field.Store.YES));
  document.add(new StoredField("fieldPath","c://new.txt"));
  document.add(new LongPoint("fieldSize",456));
  document.add(new StoredField("fieldSize",456));
  document.add(new TextField("fieldContent","修改fieldName为全文检索的文档,进行文档替换,先删除掉fieldName为全文检索的两个文档,再添加一个fileName为new的新文档", Field.Store.YES));
  //修改  参数一为条件  参数二为修改的文档值
  indexWriter.updateDocument(new Term("fieldName","全文检索"),document);
  //关闭
  indexWriter.close();
}

索引删除

@Test
public void deleteAllDocument() throws IOException {
  //创建IndexWriter对象   参数一:索引库位置   参数二:指定配置
  IndexWriter indexWriter=new IndexWriter(FSDirectory.open(new File("D:\Y3\0225\LuncenIndex").toPath()),
    new IndexWriterConfig(new IKAnalyzer()));
  //删除索引
  indexWriter.deleteAll();
  //关闭
  indexWriter.close();
}

根据域和关键词删除

@Test
public void termQuery() throws IOException {
  //创建查询条件
  Query query=new TermQuery(new Term("fieldName","new"));
  //执行查询
  TopDocs topDocs = indexSearcher.search(query, 10);
  System.out.println("返回的文档个数:"+topDocs.totalHits);
  //获取到文档集合
  ScoreDoc [] scoreDocs=topDocs.scoreDocs;
  for (ScoreDoc doc:scoreDocs) {
    //获取到文档
    Document document = indexSearcher.doc(doc.doc);
    //获取到文档域中数据
    System.out.println("fieldName:"+document.get("fieldName"));
    System.out.println("fieldPath:"+document.get("fieldPath"));
    System.out.println("fieldSize:"+document.get("fieldSize"));
    System.out.println("fieldContent:"+document.get("fieldContent"));
  }
  //关闭
  indexReader.close();
}

范围查找

@Test
                public void RangeQuery() throws IOException {
                    //设置范围搜索的条件 参数一范围所在的域
                    Query query=LongPoint.newRangeQuery("fieldSize",0,10);
                    //查询
                    TopDocs topDocs = indexSearcher.search(query, 10);
                    System.out.println("返回的文档个数:"+topDocs.totalHits);

                    //获取到文档集合
                    ScoreDoc [] scoreDocs=topDocs.scoreDocs;
                    for (ScoreDoc doc:scoreDocs) {
                        //获取到文档
                        Document document = indexSearcher.doc(doc.doc);
                        //获取到文档域中数据
                        System.out.println("fieldName:"+document.get("fieldName"));
                        System.out.println("fieldPath:"+document.get("fieldPath"));
                        System.out.println("fieldSize:"+document.get("fieldSize"));
                        System.out.println("fieldContent:"+document.get("fieldContent"));
                    }
                    //关闭
                    indexReader.close();
                }

TermQuery:根据域和关键词进行搜索

        @Test
                public void termQuery() throws IOException {
                    //创建查询条件
                    Query query=new TermQuery(new Term("fieldName","new"));
                    //执行查询
                    TopDocs topDocs = indexSearcher.search(query, 10);
                    System.out.println("返回的文档个数:"+topDocs.totalHits);
                    //获取到文档集合
                    ScoreDoc [] scoreDocs=topDocs.scoreDocs;
                    for (ScoreDoc doc:scoreDocs) {
                        //获取到文档
                        Document document = indexSearcher.doc(doc.doc);
                        //获取到文档域中数据
                        System.out.println("fieldName:"+document.get("fieldName"));
                        System.out.println("fieldPath:"+document.get("fieldPath"));
                        System.out.println("fieldSize:"+document.get("fieldSize"));
                        System.out.println("fieldContent:"+document.get("fieldContent"));
                    }
                    //关闭
                    indexReader.close();
                }

RangeQuery:范围搜索

 

  前提:创建文档是保存范围

 

      @Test
                public void RangeQuery() throws IOException {
                    //设置范围搜索的条件 参数一范围所在的域
                    Query query=LongPoint.newRangeQuery("fieldSize",0,50);
                    //查询
                    TopDocs topDocs = indexSearcher.search(query, 10);
                    System.out.println("返回的文档个数:"+topDocs.totalHits);

                    //获取到文档集合
                    ScoreDoc [] scoreDocs=topDocs.scoreDocs;
                    for (ScoreDoc doc:scoreDocs) {
                        //获取到文档
                        Document document = indexSearcher.doc(doc.doc);
                        //获取到文档域中数据
                        System.out.println("fieldName:"+document.get("fieldName"));
                        System.out.println("fieldPath:"+document.get("fieldPath"));
                        System.out.println("fieldSize:"+document.get("fieldSize"));
                        System.out.println("fieldContent:"+document.get("fieldContent"));
                    }
                    //关闭
                    indexReader.close();
                }

QueryParser:匹配一行数据

  <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
  <dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-queryparser</artifactId>
    <version>7.4.0</version>
  </dependency>
                    @Test
                    public void queryparser() throws IOException, ParseException {
                        //创建一个QueryParser对象 参数一:查询的域  参数二:使用哪种分析器
                        QueryParser parser=new QueryParser("fieldContent",new IKAnalyzer());
                        //设置匹配的数据条件
                        Query query = parser.parse("Lucene是一个开源的基于Java的搜索库");
                        //查询
                        TopDocs topDocs = indexSearcher.search(query, 10);
                        System.out.println("返回的文档个数:"+topDocs.totalHits);

                        //获取到文档集合
                        ScoreDoc [] scoreDocs=topDocs.scoreDocs;
                        for (ScoreDoc doc:scoreDocs) {
                            //获取到文档
                            Document document = indexSearcher.doc(doc.doc);
                            //获取到文档域中数据
                            System.out.println("fieldName:"+document.get("fieldName"));
                            System.out.println("fieldPath:"+document.get("fieldPath"));
                            System.out.println("fieldSize:"+document.get("fieldSize"));
                            System.out.println("fieldContent:"+document.get("fieldContent"));
                        }
                        //关闭
                        indexReader.close();
                    }

 

以上是关于Lucene的主要内容,如果未能解决你的问题,请参考以下文章

lucene demo代码

三搜索引擎篇-lucene入门代码示例

ElasticSearch 学习笔记一 简介

Lucene学习总结

Lucene分析

1. 初识 Lucene