1. 程式人生 > 其它 >c# 判斷Txt檔案的編碼格式

c# 判斷Txt檔案的編碼格式

/// <summary>

/// 獲取檔案的編碼格式 /// </summary> publicclassEncodingType { /// <summary> /// 給定檔案的路徑,讀取檔案的二進位制資料,判斷檔案的編碼型別 /// </summary> /// <param name=“FILE_NAME“>檔案路徑</param> /// <returns>檔案的編碼型別</returns> publicstaticSystem.Text.Encoding GetType(stringFILE_NAME) { FileStream fs =new
FileStream(FILE_NAME, FileMode.Open, FileAccess.Read); Encoding r = GetType(fs); fs.Close(); returnr; } /// <summary> /// 通過給定的檔案流,判斷檔案的編碼型別 /// </summary> /// <param name=“fs“>檔案流</param> /// <returns>檔案的編碼型別</returns> publicstaticSystem.Text.Encoding GetType(FileStream fs) { byte
[] Unicode =newbyte[] { 0xFF, 0xFE, 0x41 }; byte[] UnicodeBIG =newbyte[] { 0xFE, 0xFF, 0x00 }; byte[] UTF8 =newbyte[] { 0xEF, 0xBB, 0xBF };//帶BOM Encoding reVal = Encoding.Default; BinaryReader r =newBinaryReader(fs, System.Text.Encoding.Default); inti; int.TryParse(fs.Length.ToString(),outi); byte[] ss = r.ReadBytes(i);
if(IsUTF8Bytes(ss) || (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF)) { reVal = Encoding.UTF8; } elseif(ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00) { reVal = Encoding.BigEndianUnicode; } elseif(ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41) { reVal = Encoding.Unicode; } r.Close(); returnreVal; } /// <summary> /// 判斷是否是不帶 BOM 的 UTF8 格式 /// </summary> /// <param name=“data“></param> /// <returns></returns> privatestaticboolIsUTF8Bytes(byte[] data) { intcharByteCounter = 1;//計算當前正分析的字元應還有的位元組數 bytecurByte;//當前分析的位元組. for(inti = 0; i < data.Length; i++) { curByte = data[i]; if(charByteCounter == 1) { if(curByte >= 0x80) { //判斷當前 while(((curByte <<= 1) & 0x80) != 0) { charByteCounter++; } //標記位首位若為非0 則至少以2個1開始 如:110XXXXX...........1111110X if(charByteCounter == 1 || charByteCounter > 6) { returnfalse; } } } else { //若是UTF-8 此時第一位必須為1 if((curByte & 0xC0) != 0x80) { returnfalse; } charByteCounter--; } } if(charByteCounter > 1) { thrownewException("非預期的byte格式"); } returntrue; } } 轉自:https://www.cnblogs.com/cyberarmy/p/5652835.html