utf8 to unicode
Posted littletiger
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了utf8 to unicode相关的知识,希望对你有一定的参考价值。
json utf8 to unicode (stm32 发烧群友提供),仅留做参考,不保证其准确及可用。
u32 UTF8_to_Unicode(char *dst, char *src) //json utf8 to unicode { u32 i = 0, unicode = 0, ii, iii; int codeLen = 0; while ( *src ) { //1. UTF-8 ---> Unicode if(0 == (src[0] & 0x80)) { // 单字节 codeLen = 1; unicode = src[0]; } else if(0xC0 == (src[0] & 0xE0) && 0x80 == (src[1] & 0xC0)) {// 双字节 codeLen = 2; unicode = (u32)((((u32)src[0] & 0x001F) << 6) | ((u32)src[1] & 0x003F)); } else if(0xE0 == (src[0] & 0xF0) && 0x80 == (src[1] & 0xC0) && 0x80 == (src[2] & 0xC0)) {// 三字节 codeLen = 3; ii = (((u32)src[0] & 0x000F) << 12); iii = (((u32)src[1] & 0x003F) << 6); unicode = ii|iii|((u32)src[2] & 0x003F); unicode = (u32)((((u32)src[0] & 0x000F) << 12) | (((u32)src[1] & 0x003F) << 6) | ((u32)src[2] & 0x003F)); } else if(0xF0 == (src[0] & 0xF0) && 0x80 == (src[1] & 0xC0) && 0x80 == (src[2] & 0xC0) && 0x80 == (src[3] & 0xC0)) {// 四字节 codeLen = 4; unicode = (((int)(src[0] & 0x07)) << 18) | (((int)(src[1] & 0x3F)) << 12) | (((int)(src[2] & 0x3F)) << 6) | (src[3] & 0x3F); } else { break; } src += codeLen; if (unicode < 0x80) { if (i == 0 && unicode == 0x20) { continue; } } i += 2; *dst++ = (u8)((unicode&0xff)); *dst++ = (u8)(((unicode>>8)&0xff)); } // end while *dst = 0; return i; }
以上是关于utf8 to unicode的主要内容,如果未能解决你的问题,请参考以下文章
python2.7运行出现的Warning: UnicodeWarning: Unicode equal comparison failed to convert both arguments to