利用 libiconv 实现汉字编码 utf-8 格式 和 gbk格式的相互转换
Posted 阿汤的博客
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了利用 libiconv 实现汉字编码 utf-8 格式 和 gbk格式的相互转换相关的知识,希望对你有一定的参考价值。
参考文章:http://jimmee.iteye.com/blog/2174693
关于windows上编译libiconv的库,请参见:http://www.cnblogs.com/tangxin-blog/p/5608751.html
1 #include <stdio.h> 2 #include <string.h> 3 #include <stdint.h> 4 #include <stdlib.h> 5 #include "iconv.h" 6 7 #define MAX_BUF_SIZE 1024 8 9 int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen, 10 char *outbuf, size_t outlen) { 11 iconv_t cd; 12 char **pin = &inbuf; 13 char **pout = &outbuf; 14 15 cd = iconv_open(to_charset, from_charset); 16 if (cd == 0) 17 return -1; 18 memset(outbuf, 0, outlen); 19 if (iconv(cd, pin, &inlen, pout, &outlen) == -1) 20 return -1; 21 iconv_close(cd); 22 *pout = \'\\0\'; 23 24 return 0; 25 } 26 27 int utf8_to_gbk(char *inbuf, size_t inlen, char *outbuf, size_t outlen) { 28 return code_convert("utf-8", "gbk", inbuf, inlen, outbuf, outlen); 29 } 30 31 int gbk_to_utf8(char *inbuf, size_t inlen, char *outbuf, size_t outlen) { 32 return code_convert("gbk", "utf-8", inbuf, inlen, outbuf, outlen); 33 } 34 35 void read_file(char buf[], const int32_t max_buf_size, const char *file_name) 36 { 37 FILE * pFile; 38 long lSize; 39 size_t result; 40 fopen_s(&pFile, file_name, "rb"); 41 if (pFile == NULL) { fputs("File error\\n", stderr); exit(1); } 42 // obtain file size: 43 fseek(pFile, 0, SEEK_END); 44 lSize = ftell(pFile); 45 rewind(pFile); 46 if (lSize >= max_buf_size){ fputs("file too large\\n", stderr); exit(1); } 47 result = fread(buf, 1, lSize, pFile); 48 if (result != lSize) { fputs("Reading error\\n", stderr); exit(3); } 49 fclose(pFile); 50 } 51 52 //将gbk编码的str分隔成一个一个的字符,并判断是否是汉字,并输出编码,包括简体和繁体 53 void GetToken(const char *str) 54 { 55 int32_t i = 0; 56 int32_t len = strlen(str); 57 short high, low; 58 uint32_t code; 59 char cstr[3]; 60 for (; i < len; ++i) 61 { 62 if (str[i] >= 0 || i == len - 1) 63 { 64 printf("%c >> no\\n", str[i]); //ASCII字符 65 } 66 else 67 { 68 // 计算编码 69 high = (short)str[i] + 256; 70 low = (short)str[i + 1] + 256; 71 code = high * 256 + low; 72 73 //获取字符 74 cstr[0] = str[i]; 75 cstr[1] = str[i + 1]; 76 cstr[2] = 0; 77 i++; 78 79 printf("%s >> 0x%x", cstr, code); 80 if ((code >= 0xB0A1 && code <= 0xF7FE) || (code >= 0x8140 && code <= 0xA0FE) || (code >= 0xAA40 && code <= 0xFEA0)) 81 { 82 printf(" yes\\n"); 83 } 84 else 85 { 86 printf(" no\\n"); 87 } 88 } 89 } 90 } 91 92 int main(int argc, char *argv[]) 93 { 94 char in_buf[MAX_BUF_SIZE] = { 0 }, out_buf[MAX_BUF_SIZE] = { 0 }; 95 read_file(in_buf, MAX_BUF_SIZE, "chinese_gbk.txt"); 96 printf("%s\\n", in_buf); 97 GetToken(in_buf); 98 read_file(in_buf, MAX_BUF_SIZE, "chinese_utf8.txt"); 99 printf("%s\\n", in_buf); 100 GetToken(in_buf); 101 utf8_to_gbk(in_buf, strlen(in_buf), out_buf, MAX_BUF_SIZE); 102 printf("%s\\n", out_buf); 103 GetToken(out_buf); 104 getchar(); 105 return 0; 106 }
完整工程demo:http://download.csdn.net/detail/tangxin19930330/9557218
以上是关于利用 libiconv 实现汉字编码 utf-8 格式 和 gbk格式的相互转换的主要内容,如果未能解决你的问题,请参考以下文章