c語言判斷是否是utf8字串，計算字元個數

阿新 • • 發佈：2018-12-27

#include <stdio.h>

#include <string.h>

#include <stdlib.h>

/****************************************************************************

Unicode符號範圍 | UTF-8編碼方式

(十六進位制) | （二進位制)

0000 0000-0000 007F:0xxxxxxx

0000 0080-0000 07FF:110xxxxx 10xxxxxx

0000 0800-0000 FFFF:1110xxxx 10xxxxxx 10xxxxxx

0001 0000-001F FFFF:11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

0020 0000-03FF FFFF:111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

0400 0000-7FFF FFFF:1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

**************************************************************************/

unsigned char utf8_look_for_table[] =

{

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1};

#define UTFLEN(x) utf8_look_for_table[(x)]

//根據首位元組,獲取utf8字元所佔位元組數

inline int GetUtf8charByteNum(unsigned char ch)

{

int byteNum = 0;

if (ch >= 0xFC && ch < 0xFE)

byteNum = 6;

else if (ch >= 0xF8)

byteNum = 5;

else if (ch >= 0xF0)

byteNum = 4;

else if (ch >= 0xE0)

byteNum = 3;

else if (ch >= 0xC0)

byteNum = 2;

else if (0 == (ch & 0x80))

byteNum = 1;

return byteNum;

}

//判斷字串是否是utf8格式

int IsUtf8Format(const char *str)

{

int byteNum = 0;

unsigned char ch;

const char *ptr = str;

if (NULL == str)

return 0;

while (*ptr != '\0')

{

ch = (unsigned char)*ptr;

if (byteNum == 0) //根據首位元組特性判斷該字元的位元組數

{

if (0 == (byteNum = GetUtf8charByteNum(ch)))

return 0;

}

else //多位元組字元,非首位元組格式:10xxxxxx

{

if ((ch & 0xC0) != 0x80)

return 0;

}

byteNum--;

ptr++;

}

if (byteNum > 0)

return 0;

return 1;

}

//計算utf8字串字元個數

int GetUtf8Length(char *str)

{

int clen = 0;

int len = 0;

int byteNum = 0;

unsigned char ch;

char *ptr = str;

if (NULL == str)

return 0;

clen = strlen(str);

while (*ptr != '\0' && len < clen)

{

ch = (unsigned char)*ptr;

if (0 == (byteNum = GetUtf8charByteNum(ch)))

return 0;

ptr += byteNum;

len++;

}

return len;

}

int GetChargeNum(int len)

{

int num = 0;

if (len > 70 && len <= 500)

{

if (!len % 67)

num = len / 67;

else

num = len / 67 + 1;

}

else if (len > 0)

num = 1;

return num;

}

int main(int argc, char **argv)

{

//char *str = "hello 你好呀！";

char *str;

int len = 0;

int num = 0;

if (argc < 2)

return 0;

str = argv[1];

printf("%s\n", str);

if (!IsUtf8Format(str))

{

printf("the text is not the Format of utf8\n");

return 0;

}

if (!(len = GetUtf8Length(str)))

return 0;

printf("the length of text: %d\n", len);

if (!(num = GetChargeNum(len)))

return 0;

printf("the chargeNumber of sms: %d\n", num);

return 1;

}

c語言判斷是否是utf8字串，計算字元個數

#include <stdio.h> #include <string.h> #include <stdlib.h> /******************************************************************

C語言判斷迴文字串程式碼及解析

輸入一個字串，判斷其是否為迴文。所謂迴文字串，是指從左到右讀和從右到左讀完全相同的字串。主要涉及知識點：字元陣列的輸入輸出，及相應的處理。這個問題，可以從字串的兩頭開始比較，即第1個字元和倒數第1個字元比較，第2個字元和倒數第2個字元比較，以此類推...如果出現字元不相等的情況，說明不是迴文，如果

C++如何判斷一個string字串，是否是數字

方法一：判斷字元的ASCII範圍（數字的範圍為48~57） #include <iostream> using namespace std; bool AllisNum(string str); int main( void ) {

C語言輸入兩個時間，計算兩個時間的時間差！

#include<stdio.h> int main() { int hour1, minute1; int hour2, minute2; scanf("%d %d",&h

c語言判斷兩個字串是否相等

#include <stdio.h> #include <string.h> int fun(char *a,char *b) { int i,flag=0; f

【C語言】編寫一個函式，實現兩個數的交換詳細解答

今天在用函式寫兩數交換的時候發現有點小挫折，然後經過多次查詢驗證才發現是因為地址不對的緣故，我在此放出兩種交換的方法，先發錯誤的如下：#include <stdio.h> void swap(int a,int b) { int temp=a; a

c語言判斷是否是utf8字符串，計算字符個數

++ != get line [1] code 二進制 pri 範圍 #include <stdio.h> #include <string.h> #include <stdlib.h> /********************

不依賴任何系統API，用c語言實現gbk/utf8/unicode編碼轉換

轉載地址:https://blog.csdn.net/bladeandmaster88/article/details/54837338 漢字'我' Unicode編碼是0x6211 01100010 00010001 UTF8編碼是&

C語言：呼叫子函式，實現選擇排序和字串連線

2018年11月14日 19:57:26 return 微明閱讀數：1 個人分類： C語言

C語言：模擬實現字串函式strlen，strcpy，strcat，strcmp，strchr，strstr

什麼是C語言中的字串字串或串(String)是由數字、字母、下劃線組成的一串字元。通常放在常量字串中或者字元陣列中。C語言中以 ‘\0’ 來作為字串的結束標記。字元的ASCII編碼表 1.strlen 功能：字串求長計算給定字串的（unsigned in

C語言判斷一個字串是否是另一個字串的子串

int f(char*s1,char*s2) { char *p,*q; for(;*s1!='\0';s1++) { if (*s2==*s1) {/*判斷字串中是否有和要判斷的字串首字元相同的字元*/ flag=1; p=s1; /*s1 p為第一

c語言判斷是否是迴文字串

迴文字串例子 abccba 、hellolleh 這些具有對稱規則的字串被稱為迴文字串。下面是迴文字串的一個判定函式。 /* 指標法判斷是不是迴文字串 */ int plalindrome(char

c語言：實現一個函式，判斷一個數是不是素數。

實現一個函式，判斷一個數是不是素數。程式：#include<stdio.h>#include<math.h>int prime(intnum) //prime表示素數{int i

C語言,判斷一個5位數是不是迴文數。即12321是迴文數，個位與萬位相同，十位與千位相同。

<span style="font-size:18px;color:#330033;">int a = 0; printf("輸入一個五位數:"); scanf("%d",

C語言：輸入一行字串統計出英文字母，空格，數字和其他字元的個數

題目要求輸入一行字串統計出英文字母，空格，數字和其他字元的個數。程式分析要統計英文字母，空格，數字和其他字元的個數，則要遇到他們加一。核心程式碼如下： while ((c=getchar())!='\n') { if ((c >= 'a' &

c語言迴圈位移（數字，字串）

C語言中沒有提供迴圈移位的操作符，但可以通過簡潔的方式實現迴圈移位設一個運算元x有s位則迴圈左移n位的操作為： (x << n) | (x >> (s - n)); 同理右移n位位: (x >> n) | (x << (s

【C語言】實現一個函式，可以左旋字串中的k個字元。

.實現一個函式，可以左旋字串中的k個字元。 AABCD左旋一個字元得到ABCDA AABCD左旋兩個字元得到BCDAA 思路： 1.如果直接進行左移，肯定會存在陣列前面的元素丟失。解決辦法：

C語言程式設計學習筆記字串（II）（字串輸入輸出，字串陣列，程式引數）

字串輸入輸出： char str[8]; scanf("%s",&str); printf("%s",str); scanf表示讀入一個單詞（到空格、tab、回車為止） scanf是不安全的，因為這樣不知道要讀入的內容的長度，在一些情況中會出現問題：

C語言之定義一個函式，實現對字串做如下操作：當字元為字母時，大小寫互換，當字元為數字時，原樣輸出，當有其它字元出現時，結束操作，返回已處理的字串

Action(){ char str[]={"tEst234%^road"};int len = sizeof(str)/sizeof(char);//strTest2呼叫函式lr_output_message("%s",strTest2(str,len)); r

c語言除法如何保留小數，如何printf小數 [程式設計題] 字串碎片

#include<stdio.h> #include<string.h> char str[51]; int n,count,i; int main() { scanf("%s",&str); n=strlen(str);

c語言判斷是否是utf8字串，計算字元個數

相關推薦