Huffman樹與編碼
阿新 • • 發佈:2017-06-06
reat 指針數組 get n-1 include 開始 靜態數組 當前 編碼
帶權路徑最小的二叉樹稱為最優二叉樹或Huffman(哈夫曼樹)。
Huffman樹的構造
將節點的權值存入數組中,由數組開始構造Huffman樹。初始化指針數組,指針指向含有權值的孤立節點。
b = malloc(n*sizeof(BTreeNode));
for (i = 0; i < n; i++) {
b[i] = malloc(sizeof(BTreeNode));
b[i]->data = a[i];
b[i]->left = NULL;
b[i]->right = NULL;
}
數組b中的指針可以理解為二叉樹的根指針。
進行n - 1次循環建立Huffman樹
選擇b中根節點權值最小的兩棵二叉樹作為左右子樹組成新的二叉樹,新二叉樹的根節點權值為兩顆二叉樹根節點權值的和。
將新二叉樹添加到b中,並從b中刪除原來的兩棵二叉樹。當b中只有一棵樹時終止循環。
int k1 = -1, k2; for (j = 0; j < n; j++) //讓k1初始指向森林中第一棵樹,k2指向第二棵 { if (b[j] != NULL && k1 == -1) { k1 = j; continue; } if (b[j] != NULL) { k2 = j; break; } } for (j = k2; j < n; j++) //從當前森林中求出最小權值樹和次最小權值樹 { if (b[j] != NULL) { if (b[j]->data < b[k1]->data) { k2 = k1; k1 = j; } else if (b[j]->data < b[k2]->data) k2 = j; } } //由最小權值樹和次最小權值樹建立一棵新樹,q指向樹根結點 q = malloc(sizeof(BTreeNode)); q->data = b[k1]->data + b[k2]->data; q->left = b[k1]; q->right = b[k2]; b[k1] = q;//將指向新樹的指針賦給b指針數組中k1位置 b[k2] = NULL;//k2位置為空
Huffman編碼與解碼
首先給出求帶權路徑的遞歸實現:
double WeightPathLength(BTreeNode* FBT, int len) { //len = 0 if (FBT == NULL) {//空樹返回0 return 0; } else { if (FBT->left == NULL && FBT->right == NULL)//訪問到葉子結點 return FBT->data * len; else //訪問到非葉子結點,進行遞歸調用,返回左右子樹的帶權路徑長度之和,len遞增 return WeightPathLength(FBT->left,len+1)+WeightPathLength(FBT->right,len+1); } }
上述算法實際上通過雙遞歸遍歷了Huffman樹。
改進上述算法得到求哈夫曼編碼的實現:
static int index = 0;
char *c;
void HuffManCoding(FILE *fp, BTreeNode* FBT, int len)//len初始值為0
{
static int a[10];//定義靜態數組a,保存每個葉子的編碼,數組長度至少是樹深度減一
if (FBT != NULL)//訪問到葉子結點時輸出其保存在數組a中的0和1序列編碼
{
if (FBT->left == NULL && FBT->right == NULL)
{
int i;
fprintf(fp,"%c %d:",c[index++],FBT->data);
for (i = 0; i < len; i++)
fprintf(fp,"%d", a[i]);
fprintf(fp,"\n");
}
else//訪問到非葉子結點時分別向左右子樹遞歸調用,並把分支上的0、1編碼保存到數組a
{ //的對應元素中,向下深入一層時len值增1
a[len] = 0;
HuffManCoding(fp, FBT->left, len + 1);
a[len] = 1;
HuffManCoding(fp, FBT->right, len + 1);
}
}
}
節點的Huffman編碼由它在Huffman樹中的位置決定。從根節點到任意節點有且僅有一條路徑,且路徑可以唯一確定節點。因此規定從左子結點經過編碼為0,從右子結點經過編碼為1,路徑序列作為編碼。
由Huffman樹和Huffman編碼的性質可知,Huffman編碼是一種不等長編碼。在構造過程中,兩個權值較小的節點生成一棵新的二叉樹,根節點的權值為左右子節點的和,並不實際代表字符。也就是說,較短的編碼不可能是較長編碼的前綴。
Huffman樹從葉子到根構造,靠近根的字符節點權值與幾個靠近葉子的節點權值和相近,故而靠近根的字符節點權值較高即編碼較短。
解碼過程可以由字符串匹配來完成:
//Decoding
for(i = 0; code[i]; i++) {
for (j = 0; j < n; j++) {
t = 1;
for (k = 0; coding[j][k]; k++) {
if (code[i + k] != coding[j][k]) {
t = 0;
break;
}
}
if (t == 1) {
append(out,c[j]);
i = i + k - 1;
break;
}
}
}
printf("%s\n",out);
//Huffman.c
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
typedef struct
{
int data;
struct BTreeNode* left;
struct BTreeNode* right;
}BTreeNode;
#define M 32
char coding[M][M];
BTreeNode* CreateHuffman(int a[], int n)
{
int i, j;
BTreeNode **b, *q;
b = malloc(n*sizeof(BTreeNode));
for (i = 0; i < n; i++) {
b[i] = malloc(sizeof(BTreeNode));
b[i]->data = a[i];
b[i]->left = NULL;
b[i]->right = NULL;
}
for (i = 1; i < n; i++)//進行 n-1 次循環建立哈夫曼樹
{
int k1 = -1, k2;
for (j = 0; j < n; j++) {
if (b[j] != NULL && k1 == -1)
{
k1 = j;
continue;
}
if (b[j] != NULL)
{
k2 = j;
break;
}
}
for (j = k2; j < n; j++)//從當前森林中求出最小權值樹和次最小
{
if (b[j] != NULL)
{
if (b[j]->data < b[k1]->data)
{
k2 = k1;
k1 = j;
}
else if (b[j]->data < b[k2]->data)
k2 = j;
}
}
q = malloc(sizeof(BTreeNode));
q->data = b[k1]->data + b[k2]->data;
q->left = b[k1];
q->right = b[k2];
b[k1] = q;
b[k2] = NULL;
}
free(b);
return q;
}
double WeightPathLength(BTreeNode* FBT, int len)//len初始為0
{
if (FBT == NULL) {
return 0;
}
else {
if (FBT->left == NULL && FBT->right == NULL) {
return FBT->data * len;
}
else {
return WeightPathLength(FBT->left,len+1)+WeightPathLength(FBT->right,len+1);
}
}
}
static int index = 0;
char *c;
void HuffManCoding(FILE *fp, BTreeNode* FBT, int len)//len初始值為0
{
static int a[10];
if (FBT != NULL) {
if (FBT->left == NULL && FBT->right == NULL) {
int i;
fprintf(fp,"%c %d:",c[index++],FBT->data);
for (i = 0; i < len; i++)
fprintf(fp,"%d", a[i]);
fprintf(fp,"\n");
}
else {
a[len] = 0;
HuffManCoding(fp, FBT->left, len + 1);
a[len] = 1;
HuffManCoding(fp, FBT->right, len + 1);
}
}
}
void append(char *str, char ch) {
int i;
for (i = 0; str[i];i++);
str[i] = ch;
str[i+1] = ‘\0‘;
}
int main()
{
int i, j, k, n, t;
int* arr;
char ch, in[M] = {‘\0‘}, code[M*M] = {‘\0‘}, out[M] = {‘\0‘};
BTreeNode* fbt;
FILE *fp;
//Input
freopen("test.in","r",stdin);
scanf("%d", &n);
arr = (int *)malloc(n * sizeof(int));
c = (char *)malloc(n * sizeof(char));
arr[0] = 186;
c[0] = ‘ ‘;
//原諒樓主這裏偷懶,空格字符的輸入有點麻煩所以直接寫入了
for (i = 1; i < n; i++) {
getchar();
scanf("%c %d",&c[i],&arr[i]);
}
//huffman coding
fbt = CreateHuffman(arr, n);
fp = fopen("code.txt","w");
HuffManCoding(fp, fbt, 0);
fflush(fp);
//Encoding
fp = fopen("code.txt","r");
for (i = 0; i < n; i++) {
fgetc(fp);
fscanf(fp,"%c %d:%s", &t, &ch, &coding[i]);
}
fp = fopen("src.in","r");
fscanf(fp, "%s", in);
for (i = 0; in[i]; i++) {
for (j = 0; j < n; j++) {
if (c[j] == in[i]) {
strcat(code,coding[j]);
}
}
}
printf("%s\n",code);
//Decoding
for(i = 0; code[i]; i++) {
for (j = 0; j < n; j++) {
t = 1;
for (k = 0; coding[j][k]; k++) {
if (code[i + k] != coding[j][k]) {
t = 0;
break;
}
}
if (t == 1) {
append(out,c[j]);
i = i + k - 1;
break;
}
}
}
printf("%s\n",out);
return 0;
}
測試數據:
test.in:
27
a 4
b 13
c 22
d 32
e 103
f 21
g 15
h 47
i 57
j 1
k 5
l 32
m 20
n 57
o 63
p 15
q 1
r 48
s 51
t 80
u 23
v 8
w 18
x 1
y 16
z 1
Huffman樹與編碼