lecune入门示例
Posted 手边星辰
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lecune入门示例相关的知识,希望对你有一定的参考价值。
注意:本示例中的lucene版本需在jdk7以上使用。
一、pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>cd.jeryzhi</groupId> <artifactId>luceneDemo</artifactId> <version>1.0</version> <name>${project.artifactId}</name> <dependencies> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.0.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.0.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.0.0</version> </dependency> </dependencies> </project>
二、代码:
package luceneDemo; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; public class FindTxtManager { public static void main(String[] args) { find("C:\\Users\\Administrator\\Desktop\\新建文件夹", "Application Strategy and Integration","pdf"); } public static void find(String dirPath, String findStr,String fileType) { try { Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = new IndexWriter(directory, config); File[] files = new File(dirPath).listFiles(); List<File> fileList = new ArrayList<File>(); StringBuffer sb = new StringBuffer(); for (File file : files) { if (file.getName().lastIndexOf("."+fileType) > 0) { fileList.add(file); String fileStr = txt2String(file); sb.append(fileStr); Document document = new Document(); document.add(new TextField("filename", file.getName(), Store.YES)); document.add(new TextField("content", fileStr, Store.YES)); document.add(new TextField("path", file.getPath(), Store.YES)); iwriter.addDocument(document); // iwriter.commit(); } } iwriter.close(); DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser("content", analyzer); Query query = parser.parse(findStr); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; System.out.println(hits.length); for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); System.out.println("____________________________"); // System.out.println(hitDoc.get("filename")); // System.out.println(hitDoc.get("content")); System.out.println(hitDoc.get("path")); System.out.println("____________________________"); } ireader.close(); directory.close(); } catch (Exception e) { e.printStackTrace(); } } public static String txt2String(File file) { StringBuffer sb = new StringBuffer(); try { BufferedReader br = new BufferedReader(new FileReader(file));// 构造一个BufferedReader类来读取文件 String s = null; while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行 sb.append("\n").append(s); } br.close(); } catch (Exception e) { e.printStackTrace(); } return sb.toString(); } }
输出:
1
____________________________
C:\Users\Administrator\Desktop\新建文件夹\java.pdf
____________________________
以上是关于lecune入门示例的主要内容,如果未能解决你的问题,请参考以下文章
LeCun 推荐!50 行 PyTorch 代码搞定 GAN