1. 程式人生 > >(轉)UTF8轉換為UNICODE(UTF8ToUnicode)

(轉)UTF8轉換為UNICODE(UTF8ToUnicode)

//注:Linux下使用,未知Window下的情況

1. 

/**********************************************************************************/
/* */
/* Function: UTF8ToUnicode */
/* Description: Convert UTF8 byte sequence to Unicode string */
/* */
/* Parameters: szSrc - UTF8 byte sequence to be converted */
/* nSrcLen - Length of szSrc */
/* strDest - unicode char buffer for */
/* nDestLen - size (in characters) of buffer */
/* */
/* Returns: On success, the number of characters in the destination buffer */
/* 0 on failure due to insufficient buffer size */
/* */
/* History: Created {DATE] {BY} {NAME} {PRODUCT REV} */
/* Modified {DATE] {BY} {NAME} {PRODUCT REV} */
/* */
/**********************************************************************************/

int32 UTF8ToUnicode(const char *szSrc, int32 nSrcLen,wchar *strDest, int32 nDestLen)
{
  int32 i = 0;
  int32 i_cur_output = 0;

  if (nDestLen <= 0)
  {
  // We cannot append terminate 0 at this case.
  return 0; /* ERROR_INSUFFICIENT_BUFFER */
  }

  unsigned char *pszSrc = (unsigned char *)szSrc; /* cast to avoid signed/unsigned promomtion problems */
  while (i < nSrcLen)
  {
  if (SIGMASK_3_1 <= pszSrc[i]) /* 1st byte of 3 byte representation */
  {
  if (i + 2 < nSrcLen && i_cur_output + 1 < nDestLen)
  {
  strDest[i_cur_output++] = (wchar_t)(((wchar_t)pszSrc[i] << 12) |
  (((wchar_t)pszSrc[i+1] & 0x3f) << 6) |
  ((wchar_t)pszSrc[i+2] & 0x3f));
  i += 3;
  }
  else
  {
  strDest[i_cur_output] = 0; /* Terminate string */
  return 0; /* ERROR_INSUFFICIENT_BUFFER */
  }
  }
  else if (SIGMASK_2_1 <= pszSrc[i]) /* 1st byte of 2 byte representation */
  {
  if (i + 1 < nSrcLen && i_cur_output + 1 < nDestLen)
  {
  strDest[i_cur_output++] = (wchar_t)(((wchar_t)pszSrc[i] & ~0xc0) << 6 |
  ((wchar_t)pszSrc[i+1] & ~0x80));
  i += 2;
  }
  else
  {
  strDest[i_cur_output] = 0; /* Terminate string */
  return 0; /* ERROR_INSUFFICIENT_BUFFER */
  }
  }
  else /* Single byte representation */
  {
  if (i < nSrcLen && i_cur_output + 1 < nDestLen)
  {
  strDest[i_cur_output++] = (wchar_t)pszSrc[i];
  ++i;
  }
  else
  {
  strDest[i_cur_output] = 0; /* Terminate string */
  return 0; /* ERROR_INSUFFICIENT_BUFFER */
  }
  }
  }

  strDest[i_cur_output] = 0; /* Terminate string */
  return i_cur_output;
}

2. UTF-8 <-> Unicode convertion library

/projects/libbsdua

This library is a collection of source files, rather than complete binary library. Anyway, all files are BSD licensed, so you can use them freely in your projects. If you don't know what BSD license gives to you, just read it in wikipedia.

Consists of:

UTF-8 <-> Unicode convertion

SASL Digest-MD5

VPool: auto-resizeable buffer

UTF-8 <-> Unicode convertion library

It's an implementation of RFC3629. It can convert UCS4 (wchar_t) strings into UTF-8 string and vice versa. I plan to add support of streaming when converting from UTF-8 and support of specifying different endians (not one that local machine has).

Files:

unicode.tar.gz

Previous version (rfc2279): unicode-old.tar.gz

SASL Digest-MD5

Digestmd5 is a implementation of SASL Digest-MD5 authentication method (RC2831). This implementation lacks auth-int, auth-conf and server part but it's functional for most of clients' needs.

Files:

digestmd5.tar.gz

Example:

#include <sys/types.h>

#include <stdio.h>

#include <string.h>

#include "digest-md5.h"

void

client_auth_session(struct digestmd5_client *ctx)

{

char *realm;

u_int n, i;

int error, flags;

struct digestmd5_client_authinfo auth;

char buf[1000];

/*

* Challenge #1

*/

printf("Enter challenge: ");

fflush(stdout);

scanf("%999s", buf);

error = digestmd5_client_challenge(ctx, buf, strlen(buf));

if (error != DIGESTMD5_OK) {

fprintf(stderr, "broken challenge/n");

return;

}

printf("Realms:/n");

n = digestmd5_client_num_realms(ctx);

if (n > 0) {

for (i = 0; i < n; i++)

printf("/t%s/n", digestmd5_client_get_realm(ctx, i));

realm = digestmd5_client_get_realm(ctx, 0);

} else

realm = "bsdua.org";

flags = digestmd5_client_flags(ctx);

printf("Charset: %s/n",

   ((flags & DIGESTMD5_UTF8) != 0) ? "utf-8" : "iso8859-1");

memset(&auth, 0, sizeof(auth));

auth.username="your_name";

auth.passwd="your_password";

auth.realm = realm;

auth.servtype="smtp";

auth.host = "bsdua.org";

error = digestmd5_client_set_authinfo(ctx, &auth);

if (error != DIGESTMD5_OK) {

fprintf(stderr, "cannot set authinfo/n");

return;

}

error = digestmd5_client_response(ctx);

if (error != DIGESTMD5_OK)

return;

printf("%s/n", digestmd5_client_buf(ctx, NULL));

/*

* Challenge #2

*/

printf("Enter challenge: ");

fflush(stdout);

scanf("%999s", buf);

error = digestmd5_client_challenge(ctx, buf, strlen(buf));

if (error == DIGESTMD5_DONE) {

printf("authentication is successful/n");

} else if (error == DIGESTMD5_OK) {

fprintf(stderr, "authentication is not complete/n");

return;

} else {

fprintf(stderr, "error!/n");

return;

}

}

int

main(void)

{

struct digestmd5_client *client;

client = digestmd5_client_alloc();

client_auth_session(client);

digestmd5_client_free(client);

return (0);

}

VPOOL

Vpool is an auto-resizeable buffer (dynamic array). Using it, you don't need to care about memory allocation, boundary checking, pointer manipulations and etc.

Files:

vpool.c

vpool.h

Example:

int

main(void)

{

struct vpool vp;

char *p;

int error;

size_t len;

/*

* Initialize vpool object.

* Use 1024 bytes allocation block.

* `vp' buffer will have no limits (actually, SIZE_MAX).

*/

vpool_init(&vp, 1024, 0);

/*

* Allocate 100 bytes at the end of pool.

*/

p = (char *) vpool_expand(&vp, VPOOL_TAIL, 100);

if (p != NULL)

memset(p, 'A', 100);

/*

* Insert 3 bytes ("BBB") starting from position 2

* into the pool.

*/

p = vpool_insert(&vp, 2, "BBB", 3);

/*

* Truncate pool. Leave 5 bytes starting from position 3.

*/

error = vpool_truncate(&vp, 3, 5, VPOOL_INCLUDE);

/*

* Access data

*/

if (!vpool_is_empty(&vp)) {

p = (char *) vpool_get_buf(&vp);

len = vpool_get_length(&vp);

/*

* XXX - do whatever you want with the buffer.

* But remember! The pointer is valid until

* you make vpool_expand(), vpool_insert(), vpool_truncate()

*/

}

/*

* Free resources allocated by pool.

*/

vpool_final(&vp);

exit(0);

}