ForkJoin统计文件夹中包含关键词的数量
Posted q651231292
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ForkJoin统计文件夹中包含关键词的数量相关的知识,希望对你有一定的参考价值。
2018-06-09总结:
ForkJoin确实可以很快速的去解析文件并统计关键词的数量,但是如果文件过大就会出现内存溢出,是否可以通过虚拟内存方式解决内存溢出的问题呢?
package com.oxygen.forkjoin.model;
import java.util.List;
/**
* 文档
* @author renguanyu
*
*/
public class Document {
private List<String> lines;
public Document(List<String> lines) {
super();
this.lines = lines;
}
public List<String> getLines() {
return lines;
}
public void setLines(List<String> lines) {
this.lines = lines;
}
}
package com.oxygen.forkjoin.model;
import java.util.List;
/**
* 文件夹
* @author renguanyu
*
*/
public class Folder {
private List<Folder> subFolders;
private List<Document> documents;
public Folder(List<Folder> subFolders, List<Document> documents) {
this.subFolders = subFolders;
this.documents = documents;
}
public List<Folder> getSubFolders() {
return subFolders;
}
public void setSubFolders(List<Folder> subFolders) {
this.subFolders = subFolders;
}
public List<Document> getDocuments() {
return documents;
}
public void setDocuments(List<Document> documents) {
this.documents = documents;
}
}
package com.oxygen.forkjoin.service;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.oxygen.forkjoin.model.Document;
/**
* 文档服务
* @author renguanyu
*
*/
public class DocumentService {
/**
* 读取文件中所以数据
* @param file 文件
* @return 文档
*/
public static Document fromFile(File file) {
List<String> lines = new ArrayList<>();
try(BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line = reader.readLine();
while (line != null) {
lines.add(line);
line = reader.readLine();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return new Document(lines);
}
}
package com.oxygen.forkjoin.service;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import com.oxygen.forkjoin.model.Document;
import com.oxygen.forkjoin.model.Folder;
import com.oxygen.forkjoin.task.FolderSearchTask;
/**
* 文件夹服务
* @author renguanyu
*
*/
public class FolderService{
/**
* 递归查询文件夹中所有的数据
* 1.在内存中建立文件夹的结构
* 2.把数据都加载到这个结构中,方便下一步计算
* @param dir 文件夹
* @return 文件夹
*/
public static Folder fromDirectory(File dir) {
List<Document> documents = new ArrayList<>();
List<Folder> subFolders = new ArrayList<>();
for (File entry : dir.listFiles()) {
if (entry.isDirectory()) {
subFolders.add(FolderService.fromDirectory(entry));
} else {
documents.add(DocumentService.fromFile(entry));
}
}
return new Folder(subFolders, documents);
}
/**
* 获取关键词总数
* @param targetFolder 目标文件夹
* @param keyword 关键词
* @throws IOException
*/
public static long getKeywordTotal(String targetFolder, String keyword) {
ForkJoinPool forkJoinPool = new ForkJoinPool();
//把文件夹中的数据加载到内存中,我这个文件夹中就一个日志文件
File dir = new File(targetFolder);
Folder folder = FolderService.fromDirectory(dir);
//创建一个搜索任务
FolderSearchTask task = new FolderSearchTask(folder, keyword);
//开始执行fork/join任务
long counts = forkJoinPool.invoke(task);
return counts;
}
}
package com.oxygen.forkjoin.task;
import java.util.List;
import java.util.concurrent.RecursiveTask;
import com.oxygen.forkjoin.model.Document;
/**
* 文档搜索任务
* @author renguanyu
*
*/
public class DocumentSearchTask extends RecursiveTask<Long> {
private static final long serialVersionUID = 1L;
private Document document;
private String searchedWord;
public DocumentSearchTask(Document document, String searchedWord) {
super();
this.document = document;
this.searchedWord = searchedWord;
}
@Override
protected Long compute() {
long count = 0;
List<String> lines = document.getLines();
for (String line : lines) {
String[] words = line.trim().split("(\s|\p{Punct})+");
for (String word : words) {
if (searchedWord.equals(word)) {
count = count + 1;
}
}
}
return count;
}
}
package com.oxygen.forkjoin.task;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.RecursiveTask;
import com.oxygen.forkjoin.model.Document;
import com.oxygen.forkjoin.model.Folder;
/**
* 文件夹搜索任务
* @author renguanyu
*
*/
public class FolderSearchTask extends RecursiveTask<Long> {
private static final long serialVersionUID = 1L;
private Folder folder;
private String searchedWord;
public FolderSearchTask(Folder folder, String searchedWord) {
super();
this.folder = folder;
this.searchedWord = searchedWord;
}
//计算方法
@Override
protected Long compute() {
long count = 0L;
List<RecursiveTask<Long>> forks = new ArrayList<>();
//获取文件夹下的子文件夹
for (Folder subFolder : folder.getSubFolders()) {
//递归文件夹搜索任务
FolderSearchTask task = new FolderSearchTask(subFolder, searchedWord);
//把任务添加到分叉列表,用于合并任务
forks.add(task);
//放到工作队列中
task.fork();
}
//获取文件夹下的文档
for (Document document : folder.getDocuments()) {
DocumentSearchTask task = new DocumentSearchTask(document, searchedWord);
//把任务添加到分叉列表,用于合并任务
forks.add(task);
//放到工作队列中
task.fork();
}
//合并工作队列中各个线程计算结果的值
for (RecursiveTask<Long> task : forks) {
count = count + task.join();
}
return count;
}
}
package com.oxygen.forkjoin.test;
import java.io.IOException;
import com.oxygen.forkjoin.service.FolderService;
/**
* 测试程序
* @author renguanyu
*
*/
public class MainTest {
public static void main(String[] args) throws IOException {
long startTime = System.currentTimeMillis();
long counts = FolderService.getKeywordTotal("C:\test\logs\", "null");
long stopTime = System.currentTimeMillis();
long completeTime = stopTime - startTime;
System.out.println(counts + " , fork / join search took " + completeTime + "ms");
}
}
以上是关于ForkJoin统计文件夹中包含关键词的数量的主要内容,如果未能解决你的问题,请参考以下文章