单词统计
Posted nyar
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了单词统计相关的知识,希望对你有一定的参考价值。
package dao;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.text.NumberFormat;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Scanner;
public class Dao{
static String str;
static boolean flag=true;
static int i;
static int j;
static int m;
static Scanner scan=new Scanner(System.in);
public static void main(String[] args) {
System.out.println("运行成功,请稍等!");
readFile();
while(flag) {
System.out.print("输入1查询字母所占的比例,输入2查询占前几个的单词,输入3退出:");
int n=scan.nextInt();
switch(n) {
case 1:
findletter();//查询字母所占比例
break;
case 2:
findword();//查询单词所占的比例
break;
case 3:
System.out.println("退出成功!");
flag=false;
break;
}
}
}
public static void readFile() {
String pathname = "Harry Potter and the Sorcerer‘s Stone.txt"; //这里为要统计的文本文档名,同时需要将该文档导入到本工程中try (FileReader reader = new FileReader(pathname);
BufferedReader br = new BufferedReader(reader)
) {
String line;
while ((line = br.readLine()) != null) {
str+=line;
}
}
catch (IOException e) {
e.printStackTrace();
}
}
public static void findletter() {
char s[]=str.toCharArray();
int[] a=new int[52];
char[] b=new char[52];
String str1="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
b=str1.toCharArray();
for(j=0;j<str.length();j++) {
if(s[j]>=‘a‘&&s[j]<=‘z‘) {
m=s[j]-‘a‘;
a[m]++;
}
else if(s[j]>=‘A‘&&s[j]<=‘Z‘) {
m=s[j]-‘A‘+26;
a[m]++;
}
}
int t = 0;
char ts;
for(i=0;i<52;i++) {
for(j=i+1;j<52;j++) {
if(a[i]<a[j]) {
t=a[i];
a[i]=a[j];
a[j]=t;
ts=b[i];
b[i]=b[j];
b[j]=ts;
}
}
}
double sum=0;
for(i=0;i<52;i++) {
sum+=a[i];
}
NumberFormat nt = NumberFormat.getPercentInstance();
nt.setMinimumFractionDigits(2);
for(i=0;i<52;i++) {
System.out.println(b[i]+" "+nt.format(a[i]/sum*1.0));
}
}
public static void findword() {
str.toLowerCase(); // 将str转换为小写
String[] words = str.split("[^(a-zA-Z)]+"); // 非单词的字符来分割,得到所有单词
Map<String ,Integer> map = new HashMap<String, Integer>() ;
for(String word :words){
if(map.get(word)==null){ // 若不存在说明是第一次,则加入到map,出现次数为1
map.put(word,1);
}else{
map.put(word,map.get(word)+1); // 若存在,次数累加1 }
}
// 排序
List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) {
int i=left.getValue()-right.getValue();
if(i==0) {
return (right.getKey().compareTo(left.getKey()));
}
return (left.getValue().compareTo(right.getValue()));
}
};
// 集合默认升序 Collections.sort(list,comparator);
int n=list.size();
System.out.println("一共有"+n+"种单词");
System.out.println("请输入你要排序前几个单词:");
Scanner scanner=new Scanner(System.in);
n=scanner.nextInt();
for(int i=0;i<n;i++){// 由高到低输出
System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue());
}
}
}
以上是关于单词统计的主要内容,如果未能解决你的问题,请参考以下文章
C语言 统计歌词中的单词个数,并找出最短和最长的单词(必须用指针)求帮忙,感谢!!!