通過哈夫曼編碼壓縮檔案
阿新 • • 發佈:2018-11-08
原理就是統計帶壓縮檔案字元頻率,構建哈夫曼樹,然後求哈夫曼編碼,將字元頻率(解壓的時候通過字元頻率建樹)和哈夫曼編碼寫入檔案,完成壓縮。
壓縮程式碼:
//獲取一個檔案的每個字元的頻率 void get_frequency(string filename, int frequency[256]) { ifstream fin(filename); if (!fin.is_open()) { return ; } memset(frequency, 0, sizeof(int) * 256); while (!fin.eof()) { unsigned char temp = fin.get(); if (fin.eof()) { break; } frequency[temp]++; } fin.close(); }
//哈夫曼樹的節點 struct node { unsigned char ch; int w; node *rch, *lch; }; //獲取一個行自定義屬性的節點 node* new_node(unsigned char ch, int w, node* lch = NULL, node* rch = NULL) { node* temp = (node*)malloc(sizeof(node)); temp->ch = ch; temp->w = w; temp->rch = rch; temp->lch = lch; return temp; } //優先順序佇列比較大小的方法 struct cmp { bool operator () (node* x, node* y) { return x->w > y->w; } }; //建樹,返回根節點 node* build_haffman(int frequency[256]) { priority_queue<node*, vector<node*>, cmp> q; for (int i = 0; i < 256; i++) { if (frequency[i] != 0) { node* temp = new_node((unsigned char)i, frequency[i]); q.push(temp); } } while (q.size() > 1) { node* x = q.top(); q.pop(); node* y = q.top(); q.pop(); node* temp = new_node(0, x->w + y->w, x, y); q.push(temp); } return q.top(); }
//後跟遍歷銷燬樹
void destory_haffman(node **root)
{
if (*root)
{
destory_haffman(&(*root)->lch);
destory_haffman(&(*root)->rch);
free(*root);
}
}
//獲取字元的哈夫曼編碼 void get_haffman_code(node* root, vector<char>& v, string code[256]) { if (root) { if (root->lch == NULL && root->rch == NULL) { string temp = ""; for (int i = 0; i < v.size(); i++) { temp += v[i]; } code[root->ch] = temp; } v.push_back('0'); get_haffman_code(root->lch, v, code); v.pop_back(); v.push_back('1'); get_haffman_code(root->rch, v, code); v.pop_back(); } }
//將8位01碼錶示為一個unsigned char
unsigned char create_uchar(string haff_code, int index)
{
unsigned char ch = 0;
unsigned char flag = 128;
for (int i = index; i < index + 8; i++)
{
ch += flag * (haff_code[i] - '0');
flag /= 2;
}
return ch;
}
//壓縮檔案的流程
void compress_to_file(string src_file, string dst_file)
{
ifstream fin(src_file);
ofstream fout(dst_file, ios::binary);
if (!fin.is_open() || !fout.is_open())
{
return;
}
int frequency[256];
string code[256];
vector<char> v;
get_frequency("/Users/Rubik/Desktop/123.txt", frequency);
node* root = build_haffman(frequency);
get_haffman_code(root, v, code);
string haff_code = "";
unsigned char ch;
while (!fin.eof())
{
ch = fin.get();
if (fin.eof()) break;
haff_code += code[ch];
}
int len = (int)haff_code.length();
cout << len << endl;
fout.write((const char*)frequency, sizeof(int) * 256);
fout.write((const char*)&len, sizeof(int));
while (haff_code.length() % 8 != 0)
{
haff_code += '0';
}
for (int i = 0; i < haff_code.length(); i += 8)
{
unsigned char temp = create_uchar(haff_code, i);
fout.write((const char*)&temp, sizeof(char));
}
fout.close();
fin.close();
destory_haffman(&root);
}
解壓部分比較簡單,獲取字元頻率,建樹,獲取unsigned char,遍歷樹,遇到葉子節點就輸出到解壓檔案
//通過一個unsigned char遍歷haffman樹,存到s[]裡,s長度為slen, cnt為已走長度,len為有效長度
node* get_res(node* root, node* pos, unsigned char temp, char* s, int &slen, int &cnt, int len)
{
slen = 0;
for (int i = 128; i > 0 && cnt < len; i >>= 1)
{
if (i & temp)
{
pos = pos->rch;
}
else
{
pos = pos->lch;
}
cnt++;
if (pos->lch == pos->rch && pos->lch == NULL)
{
s[slen++] = pos->ch;
pos = root;
}
}
return pos;
}
void decompress_to_file(string src_file, string dst_file)
{
ifstream fin(src_file);
ofstream fout(dst_file, ios::binary);
int frequency[256];
fin.read((char*)frequency, sizeof(int) * 256);
node* root = build_haffman(frequency);
vector<char> v;
string code[256];
get_haffman_code(root, v, code);
for (int i = 0; i < 256; i++)
{
if (code[i].length() > 0)
{
cout << code[i] << endl;
}
}
int len;
fin.read((char*)&len, sizeof(int));
unsigned char temp;
node *pos = root;
char s[8];
int slen, cnt = 0;
while (!fin.eof())
{
fin.read((char*)&temp, sizeof(char));
pos = get_res(root, pos, temp, s, slen, cnt, len);
for (int i = 0; i < slen; i++)
{
fout << s[i];
}
}
destory_haffman(&root);
fin.close();
fout.close();
}
int main()
{
compress_to_file("/Users/Rubik/Desktop/123.txt", "/Users/Rubik/Desktop/out.txt");
decompress_to_file("/Users/Rubik/Desktop/out.txt", "/Users/Rubik/Desktop/456.txt");
return 0;
}
效果如下