c# 判斷Txt檔案的編碼格式
阿新 • • 發佈:2021-06-29
/// <summary>
/// 獲取檔案的編碼格式
/// </summary>
public
class
EncodingType
{
/// <summary>
/// 給定檔案的路徑,讀取檔案的二進位制資料,判斷檔案的編碼型別
/// </summary>
/// <param name=“FILE_NAME“>檔案路徑</param>
/// <returns>檔案的編碼型別</returns>
public
static
System.Text.Encoding GetType(
string
FILE_NAME)
{
FileStream fs =
new
FileStream(FILE_NAME, FileMode.Open, FileAccess.Read);
Encoding r = GetType(fs);
fs.Close();
return
r;
}
/// <summary>
/// 通過給定的檔案流,判斷檔案的編碼型別
/// </summary>
/// <param name=“fs“>檔案流</param>
/// <returns>檔案的編碼型別</returns>
public
static
System.Text.Encoding GetType(FileStream fs)
{
byte
[] Unicode =
new
byte
[] { 0xFF, 0xFE, 0x41 };
byte
[] UnicodeBIG =
new
byte
[] { 0xFE, 0xFF, 0x00 };
byte
[] UTF8 =
new
byte
[] { 0xEF, 0xBB, 0xBF };
//帶BOM
Encoding reVal = Encoding.Default;
BinaryReader r =
new
BinaryReader(fs, System.Text.Encoding.Default);
int
i;
int
.TryParse(fs.Length.ToString(),
out
i);
byte
[] ss = r.ReadBytes(i);
if
(IsUTF8Bytes(ss) || (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF))
{
reVal = Encoding.UTF8;
}
else
if
(ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
{
reVal = Encoding.BigEndianUnicode;
}
else
if
(ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
{
reVal = Encoding.Unicode;
}
r.Close();
return
reVal;
}
/// <summary>
/// 判斷是否是不帶 BOM 的 UTF8 格式
/// </summary>
/// <param name=“data“></param>
/// <returns></returns>
private
static
bool
IsUTF8Bytes(
byte
[] data)
{
int
charByteCounter = 1;
//計算當前正分析的字元應還有的位元組數
byte
curByte;
//當前分析的位元組.
for
(
int
i = 0; i < data.Length; i++)
{
curByte = data[i];
if
(charByteCounter == 1)
{
if
(curByte >= 0x80)
{
//判斷當前
while
(((curByte <<= 1) & 0x80) != 0)
{
charByteCounter++;
}
//標記位首位若為非0 則至少以2個1開始 如:110XXXXX...........1111110X
if
(charByteCounter == 1 || charByteCounter > 6)
{
return
false
;
}
}
}
else
{
//若是UTF-8 此時第一位必須為1
if
((curByte & 0xC0) != 0x80)
{
return
false
;
}
charByteCounter--;
}
}
if
(charByteCounter > 1)
{
throw
new
Exception(
"非預期的byte格式"
);
}
return
true
;
}
}
轉自:https://www.cnblogs.com/cyberarmy/p/5652835.html