JAVA 無BOM utf8檔案編碼判斷
阿新 • • 發佈:2019-02-05
/** * @Comments :獲取檔案編碼格式 * @param fileName * @return */ private static String getCharset(File fileName) { BufferedInputStream bin; int bom = 0; String str = " "; String str2 = ""; try { bin = new BufferedInputStream(new FileInputStream(fileName)); bom = (bin.read() << 8) + bin.read(); // 獲取兩個位元組內容,如果檔案無BOM資訊,則通過判斷字的位元組長度區分編碼格式 byte bs[] = new byte[10]; while(str.matches("\\s+\\w*")){ bin.read(bs); str = new String(bs, "UTF-8"); } str2 = new String(bs, "GBK"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } String code = null; // 有BOM switch (bom) { case 0xefbb: code = "UTF-8"; break; case 0xfffe: code = "Unicode"; break; case 0xfeff: code = "UTF-16BE"; break; default: // 無BOM if (str.length() <=str2.length()) { code = "UTF-8"; } else { code = "GBK"; } } return code; }