实现音节化算法但实际上很慢

Question

我在改进的Lansky算法之后实现了简单的音节化算法，但是当我需要在语料库上运行这个算法超过200万字时它真的很慢。有人能指出我导致它如此缓慢的方向吗？算法如下：

最后一个元音（元音组）之后的所有内容都属于最后一个音节
第一个元音（元音组）之前的所有内容都属于第一个音节
如果元音之间的辅音数是偶数（2n），则将它们分成前半部分属于左元音，第二部分属于右元音（n / n）。
如果元音之间的辅音数是奇数（2n + 1），我们将它们分成n / n + 1个部分。
如果元音之间只有一个辅音，则它属于左元音。 #include <stdio.h> #include <string.h> #define VOWELS "aeiou" int get_n_consonant_between(char *word, int length) { int count = 0; int i = 0; while (i++ < length) { if (strchr(VOWELS, *word)) break; word++; count++; } return count; } void syllabification(char *word, int n_vowel_groups) { int i = 0, length = strlen(word), consonants; int syllables = 0, vowel_group = 0, syl_length = 0; char *syllable = word; char hola[length]; memset(hola, 0, length); if (n_vowel_groups < 2) { printf("CAN'T BE SPLIT INTO SYLLABLES "); return; } while (i < length) { if (strchr(VOWELS, word[i])) { syl_length++; i++; if (vowel_group) continue; vowel_group = 1; } else { if (vowel_group) { consonants = get_n_consonant_between(word + i, length - i); if (consonants == 1) { // printf("only one consonant "); syl_length++; strncpy(hola, syllable, syl_length); i++; } else { int count = consonants / 2; if ((consonants % 2) == 0) { /* number of consonants is 2n, first half belongs to the left vowel */ syl_length += count; } else { syl_length += count; } strncpy(hola, syllable, syl_length); i += count; } syllables++; if (syllables == n_vowel_groups) { printf("syllable done %d: %s ", syllables, syllable); break; } printf("syllable %d: %s ", syllables, hola); syllable = word + i; syl_length = 0; memset(hola, 0, length); } else { syl_length++; i++; } vowel_group = 0; } } } int count_vowel_groups(char *word) { int i, nvowels = 0; int vowel_group = 0; for (i = 0; i < strlen(word); i++) { if (strchr(VOWELS, word[i])) { if (vowel_group) continue; vowel_group = 1; } else { if (vowel_group) nvowels++; vowel_group = 0; } } // printf("%d vowel groups ", nvowels); return nvowels; } void repl() { char *line = NULL; size_t len = 0; int i = 0; int count; FILE *file = fopen("../syllables.txt", "r"); while(i++ < 15) { getline(&line, &len, file); printf(" %s ", line); count = count_vowel_groups(line); syllabification(line, count); } } int main(int argc, char *argv[]) { // printf("Syllabification test: "); repl(); } `

Answer 1

另一答案