Java---统计单词个数
Posted Shall潇
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Java---统计单词个数相关的知识,希望对你有一定的参考价值。
一、单线程统计
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
/**
* @Author shall潇
* @Date 2021/5/31
* @Description 单线程读文件统计单词个数
*/
public class wc1 {
public static void main(String[] args) {
Map<String,Integer> map = new TreeMap<>(); //用于存放最后结果
try {
BufferedReader buff = new BufferedReader(new FileReader("word.txt")); //IO流读取数据
String tmp = "";
while ((tmp=buff.readLine())!=null){
if(!tmp.equals("")){ //防止读到空行
String[] strs = tmp.trim().split(" ");
for (int i = 0; i < strs.length; i++) {
if(map.containsKey(strs[i])){
map.put(strs[i],map.get(strs[i])+1); //如果这个单词已经出现过,数量+1
}else {
map.put(strs[i],1); //如果第一次单词出现,直接放入
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
//遍历打印单词和个数
map.forEach((k,v)->{
System.out.println("单词:"+k+":"+v);
});
}
}
二、多线程统计
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
* @Author shall潇
* @Date 2021/5/31
* @Description 多线程统计单词个数:每个线程统计好,最后累计一下,相当于先组内统计,在再组间统计
*/
public class wc2 {
//String:线程名,Map<String,Integer>:String:单词,Integer:个数
static Map<String,Map<String,Integer>> threadMap = new ConcurrentHashMap<>();
//创建线程池
static ExecutorService executorService = Executors.newCachedThreadPool();
public static void threadConf(String content,Integer count){ //执行线程
wcThread wcThread = new wcThread(content); //wcThread 类在最后面定义
Thread thread = new Thread(wcThread);
executorService.execute(thread);
threadMap.put("thread-"+count,wcThread.wordMap);
}
public static void main(String[] args) {
try {
BufferedReader br = new BufferedReader(new FileReader("word.txt"));
String tmpStr = "";
StringBuffer lineStringBuf = new StringBuffer();
int num = 0;
while ((tmpStr=br.readLine())!=null){
if(!tmpStr.equals("")){
num++; //每读一行,统计一下
lineStringBuf.append(tmpStr.trim()+" ");
if(num%30000==0){ //每读3万行,启动一个线程
threadConf(lineStringBuf.toString(),num/30000); //启动线程
lineStringBuf.delete(0,lineStringBuf.length()); //清空buffer缓存
}
}
}
//最后不足30000行的数据
if(lineStringBuf.length()>0){
threadConf(lineStringBuf.toString(),0);
lineStringBuf.delete(0,lineStringBuf.length());
}
executorService.shutdown(); //关闭线程池,有可能主线程结束,子线程还没运行完
while (true){
if(executorService.isTerminated()){ //确保所有子线程都运行完了
ConcurrentHashMap<String, Integer> chMap = new ConcurrentHashMap<>(); //最终结果保存的地方
for (Map<String, Integer> value : threadMap.values()) {
value.forEach((k,v)->{
if(chMap.containsKey(k)){
chMap.put(k,chMap.get(k)+value.get(k)); //将线程统计的单词个数在总体统计
}else {
chMap.put(k,value.get(k));
}
});
}
chMap.forEach((k,v)->{
System.out.println("单词:"+k+":"+v);
});
break;
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
class wcThread implements Runnable{
String content = ""; //读到的每一行
Map<String,Integer> wordMap = new TreeMap<>();
public wcThread(String content) {
this.content = content;
}
@Override
public void run() {
String[] strs = content.trim().split(" ");
for (int i = 0; i < strs.length; i++) {
if(wordMap.containsKey(strs[i])){
wordMap.put(strs[i],wordMap.get(strs[i])+1);
}else {
wordMap.put(strs[i],1);
}
}
}
}
以上是关于Java---统计单词个数的主要内容,如果未能解决你的问题,请参考以下文章