1. 程式人生 > >Huffman樹與編碼

Huffman樹與編碼

reat 指針數組 get n-1 include 開始 靜態數組 當前 編碼

帶權路徑最小的二叉樹稱為最優二叉樹或Huffman(哈夫曼樹)。

Huffman樹的構造

將節點的權值存入數組中,由數組開始構造Huffman樹。初始化指針數組,指針指向含有權值的孤立節點。

b = malloc(n*sizeof(BTreeNode));
for (i = 0; i < n; i++)     {
    b[i] = malloc(sizeof(BTreeNode));
    b[i]->data  = a[i];
    b[i]->left  = NULL;
    b[i]->right = NULL;
}

數組b中的指針可以理解為二叉樹的根指針。

進行n - 1次循環建立Huffman樹

選擇b中根節點權值最小的兩棵二叉樹作為左右子樹組成新的二叉樹,新二叉樹的根節點權值為兩顆二叉樹根節點權值的和。

將新二叉樹添加到b中,並從b中刪除原來的兩棵二叉樹。當b中只有一棵樹時終止循環。

int k1 = -1, k2;
for (j = 0; j < n; j++)
//讓k1初始指向森林中第一棵樹,k2指向第二棵
{
    if (b[j] != NULL && k1 == -1)
    {
        k1 = j;
        continue;
    }
    if (b[j] != NULL)
    {
        k2 = j;
         break;
    }
}
for (j = k2; j < n; j++)
//從當前森林中求出最小權值樹和次最小權值樹
{
    if (b[j] != NULL)
    {
    if (b[j]->data < b[k1]->data)
    {
        k2 = k1;
        k1 = j;
    }
    else if (b[j]->data < b[k2]->data)
        k2 = j;
    }
}
//由最小權值樹和次最小權值樹建立一棵新樹,q指向樹根結點
q = malloc(sizeof(BTreeNode));
q->data = b[k1]->data + b[k2]->data;
q->left = b[k1];
q->right = b[k2];

b[k1] = q;//將指向新樹的指針賦給b指針數組中k1位置
b[k2] = NULL;//k2位置為空

Huffman編碼與解碼

首先給出求帶權路徑的遞歸實現:

double WeightPathLength(BTreeNode* FBT, int len) { //len = 0
    if (FBT == NULL) {//空樹返回0
        return 0;
    }
    else
    {
        if (FBT->left == NULL && FBT->right == NULL)//訪問到葉子結點
            return FBT->data * len;
        else //訪問到非葉子結點,進行遞歸調用,返回左右子樹的帶權路徑長度之和,len遞增
            return WeightPathLength(FBT->left,len+1)+WeightPathLength(FBT->right,len+1);
    }
}

上述算法實際上通過雙遞歸遍歷了Huffman樹。

改進上述算法得到求哈夫曼編碼的實現:

static int index = 0;
char *c;
void HuffManCoding(FILE *fp, BTreeNode* FBT, int len)//len初始值為0
{
    static int a[10];//定義靜態數組a,保存每個葉子的編碼,數組長度至少是樹深度減一
    if (FBT != NULL)//訪問到葉子結點時輸出其保存在數組a中的0和1序列編碼
    {
        if (FBT->left == NULL && FBT->right == NULL)
        {
            int i;
            fprintf(fp,"%c %d:",c[index++],FBT->data);
            for (i = 0; i < len; i++)
                fprintf(fp,"%d", a[i]);
           fprintf(fp,"\n");
        }
        else//訪問到非葉子結點時分別向左右子樹遞歸調用,並把分支上的0、1編碼保存到數組a
        {   //的對應元素中,向下深入一層時len值增1
            a[len] = 0;
            HuffManCoding(fp, FBT->left, len + 1);
            a[len] = 1;
            HuffManCoding(fp, FBT->right, len + 1);
        }
    }
}

節點的Huffman編碼由它在Huffman樹中的位置決定。從根節點到任意節點有且僅有一條路徑,且路徑可以唯一確定節點。因此規定從左子結點經過編碼為0,從右子結點經過編碼為1,路徑序列作為編碼。

由Huffman樹和Huffman編碼的性質可知,Huffman編碼是一種不等長編碼。在構造過程中,兩個權值較小的節點生成一棵新的二叉樹,根節點的權值為左右子節點的和,並不實際代表字符。也就是說,較短的編碼不可能是較長編碼的前綴。

Huffman樹從葉子到根構造,靠近根的字符節點權值與幾個靠近葉子的節點權值和相近,故而靠近根的字符節點權值較高即編碼較短。

解碼過程可以由字符串匹配來完成:

//Decoding
for(i = 0; code[i]; i++) {
    for (j = 0; j < n; j++) {
        t = 1;
        for (k = 0; coding[j][k]; k++) {
            if (code[i + k] != coding[j][k]) {
                t = 0;
                break;
            }
        }
        if (t == 1) {
            append(out,c[j]);
            i = i + k - 1;
            break;
        }
    }
}
printf("%s\n",out);

//Huffman.c
#include<stdio.h>
#include<string.h>
#include<stdlib.h>

typedef struct
{
    int data;
    struct BTreeNode* left;
    struct BTreeNode* right;
}BTreeNode;

#define M 32
char coding[M][M];

BTreeNode* CreateHuffman(int a[], int n)
{
    int i, j;
    BTreeNode **b, *q;
    b = malloc(n*sizeof(BTreeNode));
    for (i = 0; i < n; i++)     {
        b[i] = malloc(sizeof(BTreeNode));
        b[i]->data  = a[i];
        b[i]->left  = NULL;
        b[i]->right = NULL;
    }
    for (i = 1; i < n; i++)//進行 n-1 次循環建立哈夫曼樹
    {
        int k1 = -1, k2;
        for (j = 0; j < n; j++)        {
            if (b[j] != NULL && k1 == -1)
            {
                k1 = j;
                continue;
            }
            if (b[j] != NULL)
            {
                k2 = j;
                break;
            }
        }
        for (j = k2; j < n; j++)//從當前森林中求出最小權值樹和次最小
        {
            if (b[j] != NULL)
            {
                if (b[j]->data < b[k1]->data)
                {
                    k2 = k1;
                    k1 = j;
                }
                else if (b[j]->data < b[k2]->data)
                    k2 = j;
            }
        }
        q = malloc(sizeof(BTreeNode));
        q->data = b[k1]->data + b[k2]->data;
        q->left = b[k1];
        q->right = b[k2];

        b[k1] = q;
        b[k2] = NULL;
    }
    free(b); 
    return q; 
}

double WeightPathLength(BTreeNode* FBT, int len)//len初始為0
{
    if (FBT == NULL) {
        return 0;
    }
    else {
        if (FBT->left == NULL && FBT->right == NULL) {
            return FBT->data * len;
        }
        else {
            return WeightPathLength(FBT->left,len+1)+WeightPathLength(FBT->right,len+1);
        }
    }
}

static int index = 0;
char *c;
void HuffManCoding(FILE *fp, BTreeNode* FBT, int len)//len初始值為0
{
    static int a[10];   
    if (FBT != NULL)    {
            if (FBT->left == NULL && FBT->right == NULL) {
                int i;
                fprintf(fp,"%c %d:",c[index++],FBT->data);
                for (i = 0; i < len; i++)
                    fprintf(fp,"%d", a[i]);
               fprintf(fp,"\n");
            }
            else {  
                a[len] = 0;
                HuffManCoding(fp, FBT->left, len + 1);
                a[len] = 1;
                HuffManCoding(fp, FBT->right, len + 1);
            }
        }
}

void append(char *str, char ch) {
    int i;
    for (i = 0; str[i];i++);
    str[i] = ch;
    str[i+1] = ‘\0‘;
}

int main()
{
    int i, j, k, n, t;
    int* arr;
    char ch, in[M] = {‘\0‘}, code[M*M] = {‘\0‘}, out[M] = {‘\0‘};
    BTreeNode* fbt;
    FILE *fp;

    //Input
    freopen("test.in","r",stdin);
    scanf("%d", &n);
    arr = (int *)malloc(n * sizeof(int));
    c   = (char *)malloc(n * sizeof(char));
    arr[0] = 186;
    c[0] = ‘ ‘;
    //原諒樓主這裏偷懶,空格字符的輸入有點麻煩所以直接寫入了
    for (i = 1; i < n; i++) {
        getchar();
        scanf("%c %d",&c[i],&arr[i]);
    }

    //huffman coding
    fbt = CreateHuffman(arr, n);
    fp = fopen("code.txt","w");
    HuffManCoding(fp, fbt, 0);
    fflush(fp);

    //Encoding
    fp = fopen("code.txt","r");
    for (i = 0; i < n; i++) {
       fgetc(fp);
       fscanf(fp,"%c %d:%s", &t, &ch, &coding[i]);
    }
    fp = fopen("src.in","r");
    fscanf(fp, "%s", in);
    for (i = 0; in[i]; i++) {
        for (j = 0; j < n; j++) {
            if (c[j] == in[i]) {
                strcat(code,coding[j]);
            }
        }
    }
    printf("%s\n",code);

    //Decoding
    for(i = 0; code[i]; i++) {
        for (j = 0; j < n; j++) {
            t = 1;
            for (k = 0; coding[j][k]; k++) {
                if (code[i + k] != coding[j][k]) {
                    t = 0;
                    break;
                }
            }
            if (t == 1) {
                append(out,c[j]);
                i = i + k - 1;
                break;
            }
        }
    }
    printf("%s\n",out);
    return 0;
}

測試數據:

test.in:

27
a 4
b 13 
c 22 
d 32 
e 103 
f 21 
g 15 
h 47 
i 57 
j 1 
k 5 
l 32 
m 20 
n 57
o 63 
p 15 
q 1 
r 48 
s 51 
t 80 
u 23 
v 8 
w 18 
x 1 
y 16 
z 1

Huffman樹與編碼