linux实现针对文本统计字母出现的次数(所有的可打印的字符)

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了linux实现针对文本统计字母出现的次数(所有的可打印的字符)相关的知识,希望对你有一定的参考价值。

最近在看看有意思的编程,发现算法真是一个好东西,呵呵,自己也写了一个简单的demo

代码具体如下:

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#define bSize 255
#define bufSize 4096
static uint64_t  arr[255] ={0};
static uint64_t  hash = 0;
void whash(char *s)
{
    int len = strlen(s);
    int i =0,total =0;
    while(i < len) {
        int index = s[i] -32;
        if((hash &(1 <<index)) ==0) {
            hash |= (1 <<index);
            arr[index]++;
        } else {
            arr[index]++;
        }
        i++;
    }
}
int bread(char *file)
{
    int fd = open(file,O_RDONLY,0666);
    struct stat st;
    char buf[bufSize] = {\0};
    if(fd == -1) {
        return -1;
    }
    fstat(fd,&st);
    size_t  total = st.st_size;
    while(total > 0) {
        int left =read(fd,buf,bufSize);
        if(left < 0) {
            break;
        }
        whash(buf);
        total -= left;
        memset(buf,\0,bufSize);
    }
    return 0;
}
int main(void) {
    printf("bread = %d\n",bread("./sry.c"));
    int i = 0;
    for(;i < bSize;i++) {
        if(arr[i] > 0){
            printf("count(%c) =%d\n",i+32,arr[i]);
        }
    }
    return 0;
}

运行结果  如下:

 

[email protected]:~/code_c_20160101/algorithm/str:./sry
bread = 0
count( ) =335
count(") =6
count(#) =9
count(%) =3
count(&) =2
count() =4
count(() =23
count()) =23
count(*) =2
count(+) =9
count(,) =11
count(-) =4
count(.) =10
count(/) =3
count(0) =15
count(1) =4
count(2) =4
count(3) =2
count(4) =3
count(5) =4
count(6) =6
count(9) =1
count(;) =27
count(<) =14
count(=) =19
count(>) =9
count(D) =1
count(L) =1
count(N) =1
count(O) =2
count(R) =1
count(S) =6
count(Y) =1
count([) =7
count(\) =4
count(]) =7
count(_) =5
count(a) =28
count(b) =14
count(c) =17
count(d) =29
count(e) =48
count(f) =28
count(g) =1
count(h) =22
count(i) =61
count(k) =1
count(l) =23
count(m) =3
count(n) =41
count(o) =10
count(p) =4
count(r) =31
count(s) =33
count(t) =55
count(u) =23
count(v) =2
count(w) =4
count(x) =5
count(y) =4
count(z) =8
count({) =13
count(|) =1
count(}) =13

 

以上是关于linux实现针对文本统计字母出现的次数(所有的可打印的字符)的主要内容,如果未能解决你的问题,请参考以下文章

统计文本中英文字母及英文单词的次数并排序

LQ0070 字符统计文本处理

用Hash Table(哈希散列表)实现统计文本每个单词重复次数(频率)

C语言 统计文本文件中出现的次数最多和最少的字符串

导入文本查询字母单词个数

编程: 输入一个字符串,统计该字符串中每个字母出现的次数,并按出现次数降序的输出每个字母。