利用哈夫曼樹進行檔案壓縮
阿新 • • 發佈:2019-01-05
專案描述:
專案簡介:利用哈夫曼編碼的方式對檔案進行壓縮,並且對壓縮檔案可以解壓
開發環境:windows vs2013
專案概述:
1.壓縮
a.讀取檔案,將每個字元,該字元出現的次數和權值構成哈夫曼樹
b.哈夫曼樹是利用小堆構成,字元出現次數少的節點指標存在堆頂,出現次數多的在堆底
c.每次取堆頂的兩個數,再將兩個數相加進堆,直到堆被取完,這時哈夫曼樹也建成
d.從哈夫曼樹中獲取哈夫曼編碼,然後再根據整個字元陣列來獲取出現了得字元的編碼
e.獲取編碼後每次湊滿8位就將編碼串寫入到壓縮檔案(value處理編碼1與它即可,0只移動位)
f.寫好配置檔案,統計每個字元及其出現次數,並以“字元+','+次數”的形式儲存到配置檔案中
2.解壓
a.讀取配置檔案,統計所有字元的個數
b.構建哈夫曼樹,讀解壓縮檔案,將所讀到的編碼字元的這個節點所所含的字元寫入到解壓縮檔案中,知道將壓縮檔案讀完
c.壓縮解壓縮完全完成,進行小檔案大檔案的測試
程式碼如下:
#pragma once #include"HuffManTree.h" #include<string> struct CharInfo { CharInfo(int count=0) :_count(count) { } bool operator<(const CharInfo info) { return _count < info._count; } bool operator>(const CharInfo info) { return _count>info._count; } bool operator!=(const CharInfo info) { return _count != info._count; } CharInfo operator+(const CharInfo Info) { return CharInfo(_count + Info._count); } char _ch;//字元 int _count;//字元出現的次數 string _code;//字元對應的編碼 }; class FileCompress { public: FileCompress() { for (int i = 0; i < 256; i++) { _info[i]._ch = i; _info[i]._count = 0; } } public: void Compress(const char* FileName)//壓縮 { FILE* fout = fopen(FileName, "rb"); assert(fout); //統計字元出現的次數 int ch = fgetc(fout); printf("%c\n", ch); int count = 0; while (ch!= EOF) { _info[unsigned char(ch)]._count++; ch = fgetc(fout); count++; } //構建哈夫曼樹 CharInfo invalid; HuffManTree<CharInfo> h(_info, 256, invalid); //生成哈夫曼編碼 string code; _GetHuffManCode(h._GetRoot(), code); string CompressFileName = FileName; CompressFileName += ".compress"; FILE* fin = fopen(CompressFileName.c_str(), "wb"); assert(fin); fseek(fout, 0, SEEK_SET);//從檔案開頭 ch =(unsigned char)fgetc(fout); char value = 0; int size = 0; while (ch != EOF) { string _ccode = _info[(unsigned char)ch]._code; for (int i = 0; i < _ccode.size(); ++i) { value <<= 1; if (_ccode[i] =='1') { value |=1; } size++; if (size == 8) { fputc(value, fin); value = 0; size = 0; } } ch = fgetc(fout); } //補位 if (size!=0) { value <<= ( 8- size); fputc(value, fin); } //寫配置檔案 string configFileName = FileName; configFileName += ".config.txt"; FILE* finConfig = fopen(configFileName.c_str(), "wb"); assert(finConfig); string str; char buf[128]; for (int i = 0; i < 256; i++) { if (_info[i]._count>0) { str += _info[i]._ch; str += ','; _itoa(_info[i]._count, buf, 10); str += buf; str += '\n'; fputs(str.c_str(), finConfig); str.clear(); } } fclose(fin); fclose(fout); fclose(finConfig); } void unCompress(const char* FileName)//解壓縮 { //讀配置檔案 string configFileNane = FileName; configFileNane += ".config.txt"; FILE* foutConfig = fopen(configFileNane.c_str(), "rb"); assert(foutConfig); int count = 0; string str; while (Read_a_Line(foutConfig,str)) { if (str.empty()) { str += '\n'; count += 1; str.clear(); } //else //{ // //_info[(unsigned char)str[0]] = atoi(str.substr(2).c_str()); // count += _info[(unsigned char)str[0]]._count; // str.clear(); //} // _info[((unsigned char)str[0])]._count = atoi(str.substr(2).c_str()); //count += _info[(unsigned char)str[0]]._count; else { unsigned char ch = str[0]; _info[ch]._count = atoi(str.substr(2).c_str()); count += _info[ch]._count; str.clear(); } } CharInfo invaild; HuffManTree<CharInfo> tree(_info, 256,invaild); string unCompressFileName = FileName; unCompressFileName += ".unCompress";//解壓縮檔案 string CompressFileName = FileName; CompressFileName += ".compress"; FILE* fout = fopen(CompressFileName.c_str(), "rb"); assert(fout); FILE* fin = fopen(unCompressFileName.c_str(), "wb"); assert(fin); HuffManTreeNode<CharInfo>* root = tree._GetRoot(); HuffManTreeNode<CharInfo>* cur = root; int ch = fgetc(fout); int size =7; while (ch != EOF) { if (ch & (1 << size)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left==NULL&&cur->_right==NULL) { fputc(cur->_weight._ch, fin); cur = root; //count--; //if (count == 0) // break; } size--; if (size<0) { ch=fgetc(fout); size = 7; } } fclose(fin); fclose(fout); fclose(foutConfig); } protected: bool Read_a_Line(FILE*& fout,string& str) { int ch = fgetc(fout); if (ch == EOF) return false; while (ch != EOF&&ch!='\n') { str += ch; ch = fgetc(fout); } return true; } void _GetHuffManCode(const HuffManTreeNode<CharInfo>* root,string code)//生成哈夫曼編碼 { if (root == NULL) { return; } if (root->_left == NULL&&root->_right == NULL) { _info[unsigned char((root->_weight)._ch)]._code = code; return; } if (root->_left) _GetHuffManCode(root->_left, code + '0');//左路為0 if (root->_right) _GetHuffManCode(root->_right, code + '1');//右路為1 } private: CharInfo _info[256]; };
#pragma once #include<iostream> #include"Heap.h" using namespace std; template<class T> struct HuffManTreeNode { HuffManTreeNode(const T& weight) :_left(NULL) ,_right(NULL) ,_weight(weight) { } HuffManTreeNode<T>* _left; HuffManTreeNode<T>* _right; T _weight; }; template<class T> class HuffManTree { public: typedef HuffManTreeNode<T> Node; public: HuffManTree(T* arr, int size, T& invalid)//建立一個小堆 { struct CompareNode { bool operator()(Node*& L,Node*& R) { return L->_weight < R->_weight; } }; Heap<Node*, CompareNode> MinHeap; for (int i = 0; i < size; i++) { if (arr[i]!=invalid) MinHeap.Push(new Node(arr[i])); } while (MinHeap.Size()>1) { Node* left = MinHeap.Top(); MinHeap.Pop(); Node* right = MinHeap.Top(); MinHeap.Pop(); Node* parent = new Node(left->_weight + right->_weight); parent->_left = left; parent->_right = right; MinHeap.Push(parent); } _root = MinHeap.Top(); MinHeap.Pop(); } Node* _GetRoot() { return _root; } private: Node* _root; }; #pragma once #include<iostream> #include<vector> using namespace std; #include<assert.h> template<class T> struct Small { public: bool operator()(const T& l, const T& r) { return l < r; } }; // //template<class T>//可用來建大堆 //struct Big //{ // bool operator()(const T& l, const T& r) // { // return l > r; // } //}; template<class T,class CompareNode=Small<T>>//建立小堆 class Heap { public: Heap() { } Heap(const T* arr,int size) { for (int i = 0; i < size; i++) { _v.push_back(arr[i]); } for (int i = _v.size() / 2-1; i>=0; i--) { _AdjustDown(i); } } ~Heap() {} void Push(const T& d) { _v.push_back(d); _AdjustUp(_v.size()-1); } int Size() { return _v.size(); } T& Top() { return *(_v.begin()); } void Pop()//用交換法 { swap(_v[0], _v[_v.size()-1]); _v.pop_back(); _AdjustDown(0); } protected: void _AdjustDown(int parent)//向下調整 { CompareNode compareNode; int child = 2 * parent + 1; while (child < _v.size()) { if (child + 1 < _v.size() && compareNode(_v[child + 1], _v[child]))//找較小的child { child++; } if (compareNode(_v[child], _v[parent])) { swap(_v[parent], _v[child]); parent = child; child = 2 * parent + 1; } else break; } } void _AdjustUp(int child)//向上調整 { CompareNode compareNode; int parent = (child-1)/2; while (child>0) { /*if (child + 1 < _v.size() && compareNode(_v[child + 1], _v[child])) { child++; } */ if (compareNode(_v[child], _v[parent])) { swap(_v[parent], _v[child]); child = parent; parent = (child - 1) / 2; } else break; } } private: vector<T> _v; }; #include"FileCompress.h" void test() { FileCompress f; f.Compress("input.txt"); f.unCompress("input.txt"); } int main() { test(); system("pause"); return 0; }