TXT文件编码为ANSI,怎么实现用JAVA程序转换为Uncoide

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了TXT文件编码为ANSI,怎么实现用JAVA程序转换为Uncoide相关的知识,希望对你有一定的参考价值。

参考技术A 我只说思想,如一个文本,他开始的的编码为GBK,然后我读的时候就以GBK读,然后在同目录下,创建同名文件并且编码是UTF-8,这默认会把开始的文件替换,然后再以UTF-8的编码把刚刚读的写进去,完事 参考技术B package test;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CodeUtil public static void main(String[] args) throws UnsupportedEncodingException
String chinaString = "测试";
String unicode = convertToUnicode(chinaString);
System.out.println(unicode);
System.out.println(unicodeToChinese(unicode));

String _x16 = convertTo16Code(chinaString, "UTF-8").toLowerCase();
System.out.println(_x16);
String[] sby = convertToBitCode(_x16);

StringBuffer subf = new StringBuffer();
subf.append("byte[] bytes = ");
byte[] bytes = new byte[sby.length];
for (int i = 0; i < sby.length; i++)
int icode = Integer.decode(sby[i]).intValue();
bytes[i] = (byte) icode;
subf.append("(byte)").append(sby[i]).append(",");
// System.out.println(sby[i]);

subf.deleteCharAt(subf.length() - 1);
subf.append(";");
System.out.println(subf.toString());
System.out.println(new String(bytes, "UTF-8"));


private final static String hexString = "0123456789ABCDEF";
/**
* 转16进制,如果是中文,encoding-->>UTF-8
*/
public static String convertTo16Code(String str, String encoding)
throws UnsupportedEncodingException
byte[] bytes = str.getBytes(encoding);

StringBuffer sb = new StringBuffer(bytes.length * 2);
for (int i = 0; i < bytes.length; i++)
sb.append(hexString.charAt((bytes[i] & 0xf0) >> 4));
sb.append(hexString.charAt((bytes[i] & 0x0f) >> 0));

return sb.toString();


/**
* 将汉字转Unicode
*
* @param String
* s
* @return String sb.toString();
*/
public static String convertToUnicode(String s)
if (s == null)
return s;

char[] chars = s.toCharArray();
char c;
StringBuffer sb = new StringBuffer();
for (int i = 0; i < chars.length; i++)
c = chars[i];
if (c > 0xff)
sb.append("\\u").append(Integer.toHexString(c));
else
sb.append(c);


return sb.toString();


/**
* 将16进制转为区位码
*
* @param
*/
public static String[] convertToBitCode(String str_16)
String[] result = null;
if (str_16 == null || str_16 == "" || str_16.length() % 2 != 0)
return result;

result = new String[str_16.length() / 2];
for (int i = 0; i < str_16.length() / 2; i++)
result[i] = "0x" + str_16.charAt(i * 2) + str_16.charAt(i * 2 + 1);

return result;


/**
*
*/

public static String unicodeToChinese(String unicodeStr)
int start = 0;
int end = 0;
final StringBuffer buffer = new StringBuffer();
while (start > -1)
end = unicodeStr.indexOf("\\u", start + 2);
String charStr = "";
if (end == -1)
charStr = unicodeStr.substring(start + 2, unicodeStr.length());
else
charStr = unicodeStr.substring(start + 2, end);

char letter = (char) Integer.parseInt(charStr, 16);
buffer.append(new Character(letter).toString());
start = end;

return buffer.toString();


public static boolean isChinese(char c)
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
return true;

return false;


public static boolean isMessyCode(String str)
Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
Matcher m = p.matcher(str);
String after = m.replaceAll("");
String temp = after.replaceAll("\\pP", "");
char[] ch = temp.trim().toCharArray();
float chLength = ch.length;
float count = 0;
for (int i = 0; i < ch.length; i++)
char c = ch[i];
if (!Character.isLetterOrDigit(c))

if (!isChinese(c))
count = count + 1;
System.out.print(c);



float result = count / chLength;
if (result > 0.4)
return true;
else
return false;




这个你懂了,解决你的问题就是小意思了本回答被提问者和网友采纳
参考技术C 用编辑器就可以改的吧

如何实现UTF-8 Unicode Ansi 汉字编码转换

参考技术A 请参照下面方法,把 CFile类 改成 FILE*。 写字符串改成写文件流。
注意,汉字在utf8中占3个字节。
// unicode to ansi
void CConvertDlg::OnBnClickedButtonUnicodeToAnsi()

// unicode to ansi
wchar_t* wszString = L"abcd1234你我他";
//预转换,得到所需空间的大小,这次用的函数和上面名字相反
int ansiLen = ::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), NULL, 0, NULL, NULL);
//同上,分配空间要给'\0'留个空间
char* szAnsi = new char[ansiLen + 1];
//转换
//unicode版对应的strlen是wcslen
::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), szAnsi, ansiLen, NULL, NULL);
//最后加上'\0'
szAnsi[ansiLen] = '\0';
//Ansi版的MessageBox API
::MessageBoxA(GetSafeHwnd(), szAnsi, szAnsi, MB_OK);
//接下来写入文本
//写文本文件,ANSI文件没有BOM
CFile cFile;
cFile.Open(_T("1.txt"), CFile::modeWrite | CFile::modeCreate);
//文件开头
cFile.SeekToBegin();
//写入内容
cFile.Write(szAnsi, ansiLen * sizeof(char));
cFile.Flush();
cFile.Close();
delete[] szAnsi;
szAnsi =NULL;

//方法2
//和上面一样有另一种方法
setlocale(LC_CTYPE, "chs");
char szStr[100];
//注意下面是大写,在ansi中,代表后面是unicode字符串
//sprintf
sprintf(szStr, "%S", wszString);
::MessageBoxA(GetSafeHwnd(), szStr, szStr, MB_OK);

// UTF8转UNICODE
void CConvertDlg::OnBnClickedButtonU8ToUnicode()

//UTF8 to Unicode
//由于中文直接复制过来会成乱码,编译器有时会报错,故采用16进制形式
char* szU8 = "abcd1234\xe4\xbd\xa0\xe6\x88\x91\xe4\xbb\x96\x00";
//预转换,得到所需空间的大小
int wcsLen = ::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), NULL, 0);
//分配空间要给'\0'留个空间,MultiByteToWideChar不会给'\0'空间
wchar_t* wszString = new wchar_t[wcsLen + 1];
//转换
::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), wszString, wcsLen);
//最后加上'\0'
wszString[wcsLen] = '\0';
//unicode版的MessageBox API
::MessageBoxW(GetSafeHwnd(), wszString, wszString, MB_OK);
//写文本同ansi to unicode
本回答被提问者采纳

以上是关于TXT文件编码为ANSI,怎么实现用JAVA程序转换为Uncoide的主要内容,如果未能解决你的问题,请参考以下文章

高分求-VB 把ANSI文本转换成UTF-8,多谢!!

VB中MultiByteToWideChar如何调用使得文本格式由ANSI转换成UTF-8

VB怎么把ANSI编码的txt文件转为UTF-8编码?

vb中怎么把ansi转换成UTF-8

如何将csv文件转换成Ansi编码的文件?

Java中如何将gbk装换为ansi