1. 程式人生 > >通過哈夫曼編碼壓縮檔案

通過哈夫曼編碼壓縮檔案

原理就是統計帶壓縮檔案字元頻率,構建哈夫曼樹,然後求哈夫曼編碼,將字元頻率(解壓的時候通過字元頻率建樹)和哈夫曼編碼寫入檔案,完成壓縮。

壓縮程式碼:

//獲取一個檔案的每個字元的頻率
void get_frequency(string filename, int frequency[256])
{
    ifstream fin(filename);
    
    if (!fin.is_open())
    {
        return ;
    }
    
    memset(frequency, 0, sizeof(int) * 256);
    
    while (!fin.eof())
    {
        unsigned char temp = fin.get();
        if (fin.eof())
        {
            break;
        }
        frequency[temp]++;
    }

    fin.close();
}
//哈夫曼樹的節點
struct node
{
    unsigned char ch;
    int w;
    node *rch, *lch;
};
//獲取一個行自定義屬性的節點
node* new_node(unsigned char ch, int w, node* lch = NULL, node* rch = NULL)
{
    node* temp = (node*)malloc(sizeof(node));
    temp->ch = ch;
    temp->w = w;
    temp->rch = rch;
    temp->lch = lch;
    return temp;
}
//優先順序佇列比較大小的方法
struct cmp
{
    bool operator () (node* x, node* y)
    {
        return x->w > y->w;
    }
};
//建樹,返回根節點
node* build_haffman(int frequency[256])
{
    priority_queue<node*, vector<node*>, cmp> q;
    for (int i = 0; i < 256; i++)
    {
        if (frequency[i] != 0)
        {
            node* temp = new_node((unsigned char)i, frequency[i]);
            q.push(temp);
        }
    }
    while (q.size() > 1)
    {
        node* x = q.top();
        q.pop();
        node* y = q.top();
        q.pop();
        
        node* temp = new_node(0, x->w + y->w, x, y);
        q.push(temp);
    }
    return q.top();
}
//後跟遍歷銷燬樹
void destory_haffman(node **root)
{
    if (*root)
    {
        destory_haffman(&(*root)->lch);
        destory_haffman(&(*root)->rch);
        free(*root);
    }
}
//獲取字元的哈夫曼編碼
void get_haffman_code(node* root, vector<char>& v, string code[256])
{
    if (root)
    {
        if (root->lch == NULL && root->rch == NULL)
        {
            string temp = "";
            for (int i = 0; i < v.size(); i++)
            {
                temp += v[i];
            }
            code[root->ch] = temp;
        }
        v.push_back('0');
        get_haffman_code(root->lch, v, code);
        v.pop_back();
        v.push_back('1');
        get_haffman_code(root->rch, v, code);
        v.pop_back();
    }
}
//將8位01碼錶示為一個unsigned char
unsigned char create_uchar(string haff_code, int index)
{
    unsigned char ch = 0;
    unsigned char flag = 128;
    for (int i = index; i < index + 8; i++)
    {
        ch += flag * (haff_code[i] - '0');
        flag /= 2;
    }
    return ch;
}
//壓縮檔案的流程
void compress_to_file(string src_file, string dst_file)
{
    ifstream fin(src_file);
    ofstream fout(dst_file, ios::binary);
    
    if (!fin.is_open() || !fout.is_open())
    {
        return;
    }
    
    int frequency[256];
    string code[256];
    vector<char> v;
    get_frequency("/Users/Rubik/Desktop/123.txt", frequency);
    node* root = build_haffman(frequency);
    get_haffman_code(root, v, code);
    
    string haff_code = "";
    unsigned char ch;
    while (!fin.eof())
    {
        ch = fin.get();
        if (fin.eof()) break;
        haff_code += code[ch];
    }
    int len = (int)haff_code.length();
    cout << len << endl;
    fout.write((const char*)frequency, sizeof(int) * 256);
    fout.write((const char*)&len, sizeof(int));
    
    while (haff_code.length() % 8 != 0)
    {
        haff_code += '0';
    }
    
    for (int i = 0; i < haff_code.length(); i += 8)
    {
        unsigned char temp = create_uchar(haff_code, i);
        fout.write((const char*)&temp, sizeof(char));
    }
    
    fout.close();
    fin.close();
    destory_haffman(&root);
}

解壓部分比較簡單,獲取字元頻率,建樹,獲取unsigned char,遍歷樹,遇到葉子節點就輸出到解壓檔案

//通過一個unsigned char遍歷haffman樹,存到s[]裡,s長度為slen, cnt為已走長度,len為有效長度
node* get_res(node* root, node* pos, unsigned char temp, char* s, int &slen, int &cnt, int len)
{
    slen = 0;
    for (int i = 128; i > 0 && cnt < len; i >>= 1)
    {
        if (i & temp)
        {
            pos = pos->rch;
        }
        else
        {
            pos = pos->lch;
        }
        cnt++;
        if (pos->lch == pos->rch && pos->lch == NULL)
        {
            s[slen++] = pos->ch;
            pos = root;
        }
    }
    return pos;
}

void decompress_to_file(string src_file, string dst_file)
{
    ifstream fin(src_file);
    ofstream fout(dst_file, ios::binary);
    
    int frequency[256];
    fin.read((char*)frequency, sizeof(int) * 256);
    
    node* root = build_haffman(frequency);
    
    vector<char> v;
    string code[256];
    get_haffman_code(root, v, code);
    
    for (int i = 0; i < 256; i++)
    {
        if (code[i].length() > 0)
        {
            cout << code[i] << endl;
        }
    }
    
    int len;
    fin.read((char*)&len, sizeof(int));
    
    unsigned char temp;
    node *pos = root;
    char s[8];
    int slen, cnt = 0;
    while (!fin.eof())
    {
        fin.read((char*)&temp, sizeof(char));
        pos = get_res(root, pos, temp, s, slen, cnt, len);
        for (int i = 0; i < slen; i++)
        {
            fout << s[i];
        }
    }
    
    destory_haffman(&root);
    
    fin.close();
    fout.close();
}
int main()
{
    compress_to_file("/Users/Rubik/Desktop/123.txt", "/Users/Rubik/Desktop/out.txt");
    decompress_to_file("/Users/Rubik/Desktop/out.txt", "/Users/Rubik/Desktop/456.txt");
    return 0;
}

效果如下