Huffman編碼壓縮解壓縮檔案
阿新 • • 發佈:2019-01-28
採用了哈弗曼編碼和優先佇列(最小堆)實現
標頭檔案
堆定義#ifndef HUFFMAN_H_INCLUDED #define HUFFMAN_H_INCLUDED #include "stdio.h" #include "stdlib.h" #include "string.h" #define INIT_SIZE 1000 #define FALSE 0 #define TRUE 1 #define WEIGHT_SIZE 256 #define CODE_SIZE 40 typedef int BOOL; typedef struct huff_node { unsigned char value; char *code; struct huff_node *left_child; struct huff_node *right_child; struct huff_node *parent; int weight; } huff_node,*phuff_node; typedef struct { phuff_node root; phuff_node *code_table; // int leaves_size; }huff_tree,*phuff_tree; typedef huff_node heap_node,*pheap_node; typedef struct heap { pheap_node *value; int capacity; int length; } heap,*pheap; void error(char *msg); BOOL is_emp(pheap heap); BOOL is_full(pheap heap); pheap init_heap(); void insert_heap(pheap heap,pheap_node heap_node); pheap_node delete_heap(pheap heap); #endif // HUFFMAN_H_INCLUDED
#include "huffman.h" void error(char *msg) { perror(msg); exit("-1"); } BOOL is_emp(pheap heap) { return heap->length == 0 ? TRUE : FALSE; } BOOL is_full(pheap heap) { return heap->length == heap->capacity ? TRUE : FALSE; } pheap init_heap() { pheap heap = (pheap)malloc(sizeof(heap)); if(heap == NULL) error("heap malloc error\n"); heap->value = (pheap_node*)malloc((INIT_SIZE + 1) * sizeof(pheap_node)); if(heap->value == NULL) error("heap->value malloc error\n"); heap->capacity = INIT_SIZE; heap->length = 0; return heap; } void insert_heap(pheap heap,pheap_node node) { if(is_full(heap)) error("heap is full\n"); if(node->value != NULL) { node->left_child = NULL; node->right_child = NULL; } if(is_emp(heap)) { heap->value[++heap->length] = node; return ; } int i = ++heap->length; // printf("insert heap length is %d\n",heap->length); for(; (i > 1) && (heap->value[i / 2]->weight > node->weight); i /= 2) { heap->value[i] = heap->value[i / 2]; //printf("log:%d %d\n",i,heap->length); } // printf("out:%d %d\n",i,heap->length); heap->value[i] = node; } void print_node(pheap_node node) { printf("value:%c weight:%d\n",node->value,node->weight); } void print_heap(pheap heap) { int i = 1; for(; i <= heap->length; i++) print_node(heap->value[i]); printf("\n"); } pheap_node delete_heap(pheap heap) { if(is_emp(heap)) error("heap is empty\n"); int i,child; pheap_node result = heap->value[1]; pheap_node current = heap->value[heap->length]; //printf("current is %c\n",current->value); heap->length--; for(i = 1; 2 * i <= heap->length; i = child) { child = 2 * i; if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight)) child ++; if(current->weight < heap->value[child]->weight) break; heap->value[i] = heap->value[child]; } heap->value[i] = current; return result; } void reset_heap(pheap heap) { int i = heap->length / 2,j,child; pheap_node temp_node; for(; i > 0; i--) { child = 2 * i; if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight)) child ++; if(heap->value[i]->weight > heap->value[child]->weight) { temp_node = heap->value[i]; heap->value[i] = heap->value[child]; heap->value[child] = temp_node; //子樹修改了,要判斷是否需要進一步的調整 for(j = child; 2 * j <= heap->length; j = child) { child = 2 * j; if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight)) child ++; if(temp_node->weight < heap->value[child]->weight) break; heap->value[j] = heap->value[child]; } heap->value[j] = temp_node; } } }
Huffman樹定義及操作
壓縮#include "huffman.h" void free_huff_node(phuff_node node) { if(node != NULL) { free_huff_node(node->left_child); free_huff_node(node->right_child); if(node->code !=NULL) free(node->code); free(node); } } void free_huff_tree(phuff_tree huff_tree) { if(huff_tree != NULL) free_huff_node(huff_tree->root); int i =0; for(i; i < WEIGHT_SIZE; i++) { if(huff_tree->code_table[i] != NULL) { free(huff_tree->code_table[i]); } } free(huff_tree); } phuff_tree build_huff_tree(int *weight) { pheap_node node; pheap heap = init_heap(); pheap_node *code_table = (pheap_node*)malloc(WEIGHT_SIZE * sizeof(pheap_node)); int i,j; for(i = 0; i < WEIGHT_SIZE; i++) { if(weight[i] != 0) { node =(pheap_node)malloc(sizeof(heap_node)); node->weight = weight[i]; node->value = (unsigned char)i; code_table[i] = node;//儲存所有的葉子節點 insert_heap(heap,node); } else code_table[i] = NULL; } phuff_tree huff_tree = (phuff_tree)malloc(sizeof(huff_tree)); if(huff_tree == NULL) error("huff_tree malloc error\n"); huff_tree->code_table = code_table; pheap_node heap_node_1,heap_node_2,merge_node; while(heap->length > 1) { heap_node_1 = delete_heap(heap); heap_node_2 = delete_heap(heap); merge_node = (pheap_node)malloc(sizeof(heap_node)); if(merge_node == NULL) error("merge_node malloc error\n"); merge_node->value = NULL; merge_node->weight = heap_node_1->weight + heap_node_2->weight; merge_node->left_child = heap_node_1; merge_node->right_child = heap_node_2; heap_node_1->parent = merge_node; heap_node_2->parent = merge_node; insert_heap(heap,merge_node); } merge_node->parent = NULL; huff_tree->root = merge_node; build_code_table(huff_tree); return huff_tree; } void build_code_table(phuff_tree huff_tree) { char *code_buff = (char*)malloc(CODE_SIZE * sizeof(char)); memset(code_buff,'\0',CODE_SIZE); build_code(huff_tree->root,code_buff,0); } void build_code(phuff_node cur_pos,char* code_buff,int len) { // printf("current code_buff is %s\n",code_buff); if((cur_pos->left_child == NULL) && (cur_pos->right_child == NULL)) { cur_pos->code = (char*)malloc((len + 1)* sizeof(char)); memset(cur_pos->code,'\0',len + 1); memcpy(cur_pos->code,code_buff,len * sizeof(char)); } if(cur_pos->left_child != NULL) { code_buff[len] = '0'; build_code(cur_pos->left_child,code_buff,len + 1); } if(cur_pos->right_child != NULL) { code_buff[len] = '1'; build_code(cur_pos->right_child,code_buff,len + 1); } }
#include "huffman.h"
/*統計各個字元的權重*/
int* get_weight(char *file_name)
{
FILE *file = fopen(file_name,"r");
if(file == NULL)
error("file open error\n");
int *weight = (int*)malloc(WEIGHT_SIZE * sizeof(int));
int i;
for(i = 0; i < WEIGHT_SIZE; i++)
weight[i] = 0;
unsigned char temp_char;
while(!feof(file))
{
fread(&temp_char,1,1,file);
weight[temp_char]++;
}
fclose(file);
return weight;
}
/*壓縮*/
void compress(char* ori_file_name,char* com_file_name)
{
int *weight = get_weight(ori_file_name);
phuff_tree huff_tree = build_huff_tree(weight);
long cur_pos = 0,file_length = 0;
unsigned char temp_read = 0,temp_write = 0;
int code_len = 0,i = 0,j = 0,left_size = 8;
char *code;
FILE *ori_file = fopen(ori_file_name,"r");
FILE *com_file = fopen(com_file_name,"wb");
//寫權重資訊
for(i; i < WEIGHT_SIZE; i++)
{
if(weight[i] != 0)
{
fprintf(com_file,"%d",i);
fputc(' ',com_file);
fprintf(com_file,"%d",weight[i]);
fputc(' ',com_file);
file_length += weight[i];
}
}
fputc('\n',com_file);
fprintf(com_file,"%ld",file_length - 1);//多讀一個EOF
fputc('\n',com_file);
while(!feof(ori_file))
{
if(j >= code_len)
{
fread(&temp_read,sizeof(char),1,ori_file);
//printf(" %d ",temp_read);
code = huff_tree->code_table[temp_read]->code;
code_len = strlen(code);
j = 0;
}
for(i = 0; (i < left_size) && (j < code_len); i++,j++)
{
if(code[j]=='0')
temp_write = temp_write << 1;
else temp_write = (temp_write << 1)| 1;
}
if(i < left_size)//本位元組未填滿
{
left_size -= i;
}
else //填滿
{
left_size = 8;//表示當前位元組剩餘空間
fwrite(&temp_write, sizeof(char),1,com_file);
//printf(" %d ",temp_write);
temp_write = 0;
}
}
/*將後面的補為0*/
temp_write = temp_write << left_size;
fwrite(&temp_write,sizeof(char),1,com_file);
fclose(ori_file);
fclose(com_file);
free(huff_tree);
}
解壓縮
#include "huffman.h"
int* read_weight(FILE *com_file)
{
int* weight = (int*)malloc(WEIGHT_SIZE * sizeof(int));
int i = 0;
for(; i < WEIGHT_SIZE; i++)
weight[i] = 0;
char *temp_read = (char*)malloc(WEIGHT_SIZE * 10 * sizeof(char));
if(fgets(temp_read,WEIGHT_SIZE * 10,com_file) == NULL)
error("read com_file for weight erroe\n");
char *pre_pos = temp_read,*cur_pos = strstr(temp_read," ");
int num;
char str[10];
while(cur_pos !=NULL)
{
memset(str,'\0',10);
strncpy(str,pre_pos,cur_pos - pre_pos + 1);
num = atoi(str);
pre_pos = cur_pos;
cur_pos = strstr(cur_pos + 1," ");
memset(str,'\0',10);
strncpy(str,pre_pos,cur_pos - pre_pos + 1);
weight[num] = atoi(str);
pre_pos = cur_pos;
cur_pos = strstr(cur_pos + 1," ");
}
return weight;
}
void decompress(char* com_file_name,char* ori_file_name)
{
FILE *com_file = fopen(com_file_name,"rb+");
FILE *ori_file = fopen(ori_file_name,"w+");
int *weight = read_weight(com_file);
long file_length = 0;
//fread(&file_length,sizeof(long),1,com_file);
char *buf = (char*)malloc(sizeof(long) * 8 * sizeof(char));
fgets(buf,sizeof(long) * 8,com_file);
file_length = atol(buf);
free(buf);
if(file_length == 0) error("file length is 0!\n");
phuff_tree huff_tree = build_huff_tree(weight);
unsigned char temp_read = 0;
pheap_node huff_pos = huff_tree->root;
int i = 0,j = 0;
while(!feof(com_file))
{
fread(&temp_read,sizeof(char),1,com_file);
for(i = 0; i < 8; i++)
{
if((temp_read & 0x80) >> 1 == 64)
huff_pos = huff_pos->right_child;
else
huff_pos = huff_pos->left_child;
if(huff_pos->left_child == NULL && huff_pos->right_child == NULL)
{
fwrite(&(huff_pos->value),sizeof(unsigned char),1,ori_file);
huff_pos = huff_tree->root;
j++;
if(j >= file_length) break;
}
temp_read=temp_read << 1;
}
}
fclose(ori_file);
fclose(com_file);
free(huff_tree);
}
測試
int main()
{
char ori_file_name[20],com_file_name[20];
printf("請輸入要壓縮前的檔名(長度小於20):");
scanf("%s",ori_file_name);
printf("請輸入要壓縮後的檔名(長度小於20):");
scanf("%s",com_file_name);
printf("正在壓縮...\n");
compress(ori_file_name,com_file_name);
printf("正在解壓,解壓後的檔名為decompress...\n");
decompress(com_file_name,"decompress");
return 0;
}