1. 程式人生 > 實用技巧 >使用C#.NET呼叫ICU進行編碼檢測和編碼轉換

使用C#.NET呼叫ICU進行編碼檢測和編碼轉換

ICU的C/C++版本:ICU4C

相關API的用法可查閱官方文件,本例只演示使用P/Invoke呼叫。

DLL檔案需要注意區分32位和64位。

官方API文件:ICU-docs

P/Invoke相關文件:Native interoperabilityInterop Marshaling

非常有用的P/Invoke函式簽名查詢工具:PINVOKE.NET

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using
System.Text; namespace EncodingConverter.Console { class Program { static void Main(string[] args) { Convert_UCS2_To_GBK(); Convert_GBK_To_UCS2(); Detect_Encoding(); System.Console.ReadKey(); } static void Detect_Encoding() { ICU4C.UErrorCode status;
byte[] input = File.ReadAllBytes(@"TEST.GBK.TXT"); // 開啟檢測器 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr ucsd = ICU4C.NativeMethods.ucsdet_open(ref status); // 設定要檢查的文字 status = ICU4C.UErrorCode.U_ZERO_ERROR; ICU4C.NativeMethods.ucsdet_setText(ucsd, input, input.Length,
ref status); // 執行檢測 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr ucsm = ICU4C.NativeMethods.ucsdet_detect(ucsd, ref status); // 取結果 IntPtr lpstr = ICU4C.NativeMethods.ucsdet_getName(ucsm, ref status); string str = Marshal.PtrToStringAnsi(lpstr); // 關閉檢測器 ICU4C.NativeMethods.ucsdet_close(ucsd); System.Console.WriteLine($"Detected Encoding"); System.Console.WriteLine($" Result = {str}"); } static void Convert_UCS2_To_GBK() { string input = File.ReadAllText(@"TEST.TXT", Encoding.Unicode); ICU4C.UErrorCode status; // 開啟轉換器 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr cnv = ICU4C.NativeMethods.ucnv_open("GBK", ref status); // 計算輸出長度 status = ICU4C.UErrorCode.U_ZERO_ERROR; int outputLength = ICU4C.NativeMethods.ucnv_fromUChars(cnv, null, 0, input, input.Length, ref status); // 輸出緩衝區 byte[] output = new byte[outputLength]; // 轉換並輸出 status = ICU4C.UErrorCode.U_ZERO_ERROR; ICU4C.NativeMethods.ucnv_fromUChars(cnv, output, output.Length, input, input.Length, ref status); // 關閉轉換器 ICU4C.NativeMethods.ucnv_close(cnv); // 寫出檔案 File.WriteAllBytes(@"TEST.GBK.TXT", output); System.Console.WriteLine("Convert UCS2 to GBK"); System.Console.WriteLine($" Input Length = {input.Length} characters"); System.Console.WriteLine($" Output Length = {output.Length} bytes"); } static void Convert_GBK_To_UCS2() { byte[] input = File.ReadAllBytes(@"TEST.GBK.TXT"); ICU4C.UErrorCode status; // 開啟轉換器 status = ICU4C.UErrorCode.U_ZERO_ERROR; IntPtr cnv = ICU4C.NativeMethods.ucnv_open("GBK", ref status); // 計算輸出長度 status = ICU4C.UErrorCode.U_ZERO_ERROR; int outputLength = ICU4C.NativeMethods.ucnv_toUChars(cnv, null, 0, input, input.Length, ref status); // 輸出緩衝區 StringBuilder output = new StringBuilder(outputLength); // 轉換並輸出 status = ICU4C.UErrorCode.U_ZERO_ERROR; ICU4C.NativeMethods.ucnv_toUChars(cnv, output, output.Capacity, input, input.Length, ref status); // 關閉轉換器 ICU4C.NativeMethods.ucnv_close(cnv); // 寫出檔案 File.WriteAllText(@"TEST.UCS2.TXT", output.ToString(), Encoding.Unicode); System.Console.WriteLine("Convert GBK to UCS2"); System.Console.WriteLine($" Input Length = {input.Length} bytes"); System.Console.WriteLine($" Output Length = {output.Length} characters"); } } }

P/Invoke API 定義

using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;

#pragma warning disable IDE1006 // Naming Styles

namespace EncodingConverter.ICU4C
{
    enum UErrorCode
    {
        U_ZERO_ERROR
    }

    static class NativeMethods
    {
        [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_open_67")]
        public static extern IntPtr ucsdet_open(ref UErrorCode status);

        [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_close_67")]
        public static extern void ucsdet_close(IntPtr ucsd);

        [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_setText_67")]
        public static extern void ucsdet_setText(IntPtr ucsd, byte[] textIn, int len, ref UErrorCode status);

        [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_detect_67")]
        public static extern IntPtr ucsdet_detect(IntPtr ucsd, ref UErrorCode status);

        [DllImport("icuin67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucsdet_getName_67")]
        public static extern IntPtr ucsdet_getName(IntPtr ucsm, ref UErrorCode status);

        [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_open_67")]
        public static extern IntPtr ucnv_open([MarshalAs(UnmanagedType.LPStr)] string converterName, ref UErrorCode err);

        [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_close_67")]
        public static extern void ucnv_close(IntPtr converter);

        [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_fromUChars_67")]
        public static extern int ucnv_fromUChars(IntPtr cnv, [Out] byte[] dest, int destCapacity, [MarshalAs(UnmanagedType.LPWStr)] string src, int srcLength, ref UErrorCode pErrorCode);

        [DllImport("icuuc67.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "ucnv_toUChars_67")]
        public static extern int ucnv_toUChars(IntPtr cnv, [MarshalAs(UnmanagedType.LPWStr)] StringBuilder dest, int destCapacity, byte[] src, int srcLength, ref UErrorCode pErrorCode);
    }
}