1. 程式人生 > 實用技巧 >判斷檔案的編碼格式

判斷檔案的編碼格式

package com.qing.qing.test;

import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

public class TestBM {
public static void main(String[] args) {
String path = "D:\\ytzz\\ddd.txt";
path = "D:\\ytzz\\ddd - 副本.txt";
File file = new File(path);
if(!file.exists()){
System.out.println("not found");
return;
}
try {
String type = getFileCharsetName(path);
System.out.println(type);
byte[] bytes = FileUtils.readFileToByteArray(new File(path));
System.out.println(new String(bytes,type));
} catch (IOException e) {
e.printStackTrace();
}
}

public static String getFileCharsetName(String fileName) throws IOException {
InputStream inputStream = new FileInputStream(fileName);
byte[] head = new byte[3];
inputStream.read(head);

String charsetName = "GBK";//或GB2312,即ANSI
if (head[0] == -1 && head[1] == -2 ) //0xFFFE
charsetName = "UTF-16";
else if (head[0] == -2 && head[1] == -1 ) //0xFEFF
charsetName = "Unicode";//包含兩種編碼格式:UCS2-Big-Endian和UCS2-Little-Endian
else if(head[0]==-27 && head[1]==-101 && head[2] ==-98)
charsetName = "UTF-8"; //UTF-8(不含BOM)
else if(head[0]==-17 && head[1]==-69 && head[2] ==-65)
charsetName = "UTF-8"; //UTF-8-BOM
inputStream.close();
//System.out.println(code);
return charsetName;

}
}