2018-03-21 11:34:44 java脚本批量转换java utf-8 bom源码文件为utf-8编码文件

Posted 代码让自己变强

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了2018-03-21 11:34:44 java脚本批量转换java utf-8 bom源码文件为utf-8编码文件相关的知识,希望对你有一定的参考价值。

package com.springbootdubbo;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

/**
*@title : JavaClass
*@author:zyh
*@createDate:2018/11/19 18:30
*
**/
public class UTF8BOMConverter extends Reader {

PushbackInputStream internalIn;

InputStreamReader internalIn2 = null;

String defaultEnc;

private static final int BOM_SIZE = 4;


/**

* @param in inputstream to be read

* @param defaultEnc default encoding if stream does not have

* BOM marker. Give NULL to use system-level default.

*/

UTF8BOMConverter(InputStream in, String defaultEnc) {

internalIn = new PushbackInputStream(in, BOM_SIZE);

this.defaultEnc = defaultEnc;

}


public String getDefaultEncoding() {

return defaultEnc;

}


/**

* Get stream encoding or NULL if stream is uninitialized.

* Call init() or read() method to initialize it.

*/

public String getEncoding() {

if (internalIn2 == null) return null;

return internalIn2.getEncoding();

}


/**

* Read-ahead four bytes and check for BOM marks. Extra bytes are

* unread back to the stream, only BOM bytes are skipped.

*/

protected void init() throws IOException {

if (internalIn2 != null) return;

String encoding;

byte bom[] = new byte[BOM_SIZE];

int n, unread;

n = internalIn.read(bom, 0, bom.length);

if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) &&

(bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {

encoding = "UTF-32BE";

unread = n - 4;

} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) &&

(bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {

encoding = "UTF-32LE";

unread = n - 4;

} else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) &&

(bom[2] == (byte) 0xBF)) {

encoding = "UTF-8";

unread = n - 3;

} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {

encoding = "UTF-16BE";

unread = n - 2;

} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {

encoding = "UTF-16LE";

unread = n - 2;

} else {

// Unicode BOM mark not found, unread all bytes

encoding = defaultEnc;

unread = n;

}

//System.out.println("read=" + n + ", unread=" + unread);

if (unread > 0) internalIn.unread(bom, (n - unread), unread);

// Use given encoding

if (encoding == null) {
internalIn2 = new InputStreamReader(internalIn);

} else {

internalIn2 = new InputStreamReader(internalIn, encoding);

}

}


public void close() throws IOException {

init();

internalIn2.close();

}


public int read(char[] cbuf, int off, int len) throws IOException {

init();

return internalIn2.read(cbuf, off, len);

}


private static void readContentAndSaveWithEncoding(String filePath, String readEncoding, String saveEncoding) throws Exception {

saveContent(filePath, readContent(filePath, readEncoding), saveEncoding);

}


private static void saveContent(String filePath, String content, String encoding) throws Exception {

FileOutputStream fos = new FileOutputStream(filePath);

OutputStreamWriter w = new OutputStreamWriter(fos, encoding);

w.write(content);

w.flush();

}


private static String readContent(String filePath, String encoding) throws Exception {

FileInputStream file = new FileInputStream(new File(filePath));

BufferedReader br = new BufferedReader(new UTF8BOMConverter(file, encoding));

String line = null;

String fileContent = "";

while ((line = br.readLine()) != null) {

fileContent = fileContent + line;

fileContent += " ";

}

return fileContent;

}


private static List<String> getPerlineFileName(String filePath) throws Exception {

FileInputStream file = new FileInputStream(new File(filePath));

BufferedReader br = new BufferedReader(new InputStreamReader(file, "UTF-8"));

String line = null;

List<String> list = new ArrayList<String>();

while ((line = br.readLine()) != null) {

list.add(line);

}

return list;

}


private static List<String> getAllFilePaths(File filePath, List<String> filePaths) {

File[] files = filePath.listFiles();

if (files == null) {

return filePaths;

}

for (File f : files) {

if (f.isDirectory()) {

filePaths.add(f.getPath());

getAllFilePaths(f, filePaths);

} else {

filePaths.add(f.getPath());

}

}

return filePaths;

}


public static void main(String[] args) throws Exception {

String suffix = ".java";


List<String> paths = new ArrayList<String>();

paths = getAllFilePaths(new File("E:\mgtt\DING"), paths);


List<String> pathList = new ArrayList<String>();

for (String path : paths) {

if (path.endsWith(suffix)) {

pathList.add(path);
}

}


for (String path : pathList) {

readContentAndSaveWithEncoding(path, "UTF-8", "UTF-8");

System.out.println(path + "转换成功");
}

}

}









































































































































































































































































































以上是关于2018-03-21 11:34:44 java脚本批量转换java utf-8 bom源码文件为utf-8编码文件的主要内容,如果未能解决你的问题,请参考以下文章

2018-03-21

自己喜欢的一些句子摘录-2018-03-21

如何获取Java 9打印有关GC的Java 8风格信息

Java 处理PDF文档:页眉页脚水印背景附件

java实现插入word页眉页脚以及生成目录及页码

如何制作和连接具有上传、设置页眉和页脚报告的Java swing程序