使用C#.NET呼叫ICU進行編碼檢測和編碼轉換
阿新 • • 發佈:2020-10-09
ICU的C/C++版本:ICU4C
相關API的用法可查閱官方文件,本例只演示使用P/Invoke呼叫。
DLL檔案需要注意區分32位和64位。
官方API文件:ICU-docs
P/Invoke相關文件:Native interoperabilityInterop Marshaling
非常有用的P/Invoke函式簽名查詢工具:PINVOKE.NET
using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Runtime.InteropServices; usingSystem.Text; namespace EncodingConverter.Console { class Program { static void Main(string[] args) { Convert_UCS2_To_GBK(); Convert_GBK_To_UCS2(); Detect_Encoding(); System.Console.ReadKey(); } static void Detect_Encoding() { ICU4C.UErrorCode status;byte[] input = File.ReadAllBytes(@"TEST.GBK.TXT"); // 開啟檢測器 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr ucsd = ICU4C.NativeMethods.ucsdet_open(ref status); // 設定要檢查的文字 status = ICU4C.UErrorCode.U_ZERO_ERROR; ICU4C.NativeMethods.ucsdet_setText(ucsd, input, input.Length,ref status); // 執行檢測 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr ucsm = ICU4C.NativeMethods.ucsdet_detect(ucsd, ref status); // 取結果 IntPtr lpstr = ICU4C.NativeMethods.ucsdet_getName(ucsm, ref status); string str = Marshal.PtrToStringAnsi(lpstr); // 關閉檢測器 ICU4C.NativeMethods.ucsdet_close(ucsd); System.Console.WriteLine($"Detected Encoding"); System.Console.WriteLine($" Result = {str}"); } static void Convert_UCS2_To_GBK() { string input = File.ReadAllText(@"TEST.TXT", Encoding.Unicode); ICU4C.UErrorCode status; // 開啟轉換器 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr cnv = ICU4C.NativeMethods.ucnv_open("GBK", ref status); // 計算輸出長度 status = ICU4C.UErrorCode.U_ZERO_ERROR; int outputLength = ICU4C.NativeMethods.ucnv_fromUChars(cnv, null, 0, input, input.Length, ref status); // 輸出緩衝區 byte[] output = new byte[outputLength]; // 轉換並輸出 status = ICU4C.UErrorCode.U_ZERO_ERROR; ICU4C.NativeMethods.ucnv_fromUChars(cnv, output, output.Length, input, input.Length, ref status); // 關閉轉換器 ICU4C.NativeMethods.ucnv_close(cnv); // 寫出檔案 File.WriteAllBytes(@"TEST.GBK.TXT", output); System.Console.WriteLine("Convert UCS2 to GBK"); System.Console.WriteLine($" Input Length = {input.Length} characters"); System.Console.WriteLine($" Output Length = {output.Length} bytes"); } static void Convert_GBK_To_UCS2() { byte[] input = File.ReadAllBytes(@"TEST.GBK.TXT"); ICU4C.UErrorCode status; // 開啟轉換器 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr cnv = ICU4C.NativeMethods.ucnv_open("GBK", ref status); // 計算輸出長度 status = ICU4C.UErrorCode.U_ZERO_ERROR; int outputLength = ICU4C.NativeMethods.ucnv_toUChars(cnv, null, 0, input, input.Length, ref status); // 輸出緩衝區 StringBuilder output = new StringBuilder(outputLength); // 轉換並輸出 status = ICU4C.UErrorCode.U_ZERO_ERROR; ICU4C.NativeMethods.ucnv_toUChars(cnv, output, output.Capacity, input, input.Length, ref status); // 關閉轉換器 ICU4C.NativeMethods.ucnv_close(cnv); // 寫出檔案 File.WriteAllText(@"TEST.UCS2.TXT", output.ToString(), Encoding.Unicode); System.Console.WriteLine("Convert GBK to UCS2"); System.Console.WriteLine($" Input Length = {input.Length} bytes"); System.Console.WriteLine($" Output Length = {output.Length} characters"); } } }
P/Invoke API 定義
using System; using System.Collections.Generic; using System.Runtime.InteropServices; using System.Text; #pragma warning disable IDE1006 // Naming Styles namespace EncodingConverter.ICU4C { enum UErrorCode { U_ZERO_ERROR } static class NativeMethods { [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_open_67")] public static extern IntPtr ucsdet_open(ref UErrorCode status); [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_close_67")] public static extern void ucsdet_close(IntPtr ucsd); [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_setText_67")] public static extern void ucsdet_setText(IntPtr ucsd, byte[] textIn, int len, ref UErrorCode status); [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_detect_67")] public static extern IntPtr ucsdet_detect(IntPtr ucsd, ref UErrorCode status); [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_getName_67")] public static extern IntPtr ucsdet_getName(IntPtr ucsm, ref UErrorCode status); [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_open_67")] public static extern IntPtr ucnv_open([MarshalAs(UnmanagedType.LPStr)] string converterName, ref UErrorCode err); [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_close_67")] public static extern void ucnv_close(IntPtr converter); [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_fromUChars_67")] public static extern int ucnv_fromUChars(IntPtr cnv, [Out] byte[] dest, int destCapacity, [MarshalAs(UnmanagedType.LPWStr)] string src, int srcLength, ref UErrorCode pErrorCode); [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_toUChars_67")] public static extern int ucnv_toUChars(IntPtr cnv, [MarshalAs(UnmanagedType.LPWStr)] StringBuilder dest, int destCapacity, byte[] src, int srcLength, ref UErrorCode pErrorCode); } }