Java實現哈夫曼編碼和解碼
阿新 • • 發佈:2019-02-13
題目:將一個字串進行哈夫曼編碼;編碼過程中,會得到每個字元的編碼,通過已知的每個字元的編碼對之前的編碼進行解碼。
分析:
首先是哈夫曼編碼演算法,引用李澤年寫的《多媒體技術教程》中對哈夫曼編碼演算法的描述:
•Initialization: Put all symbols on a list sorted according to their frequency counts. •Repeat until the list has only one symbol left: –From the list pick two symbols with the lowest frequency counts. Form a Huffman subtree that has these two symbols as child nodes and create a parent node.接下來是解碼。雖然解碼過程很簡單,但是卻是本文存在的理由。我在網上看了一些文章,都忽略一個問題:編碼和解碼過程中都有的東西是什麼?也就是,依靠什麼東西來解碼?本文的答案是“每個字元的編碼”,它在編碼的過程中生成,和字串編碼一起傳到解碼端用於解碼。你也可以說是“每個字元出現的次數”或者“哈夫曼樹”,不管是“每個字元出現的次數”還是“哈夫曼樹”,你都需要通過他們得到“每個字元的編碼”之後才能進行解碼。
下面是Java程式碼:
package com.liyuncong.algorithms.algorithms_huffman; /** * 哈夫曼樹的節點 * @author yuncong * */ public class Node implements Comparable<Node>{ private Node leftChild = null; private Data data = null; private Node rightChild = null; public Node getLeftChild() { return leftChild; } public void setLeftChild(Node leftChild) { this.leftChild = leftChild; } public Data getData() { return data; } public void setData(Data data) { this.data = data; } public Node getRightChild() { return rightChild; } public void setRightChild(Node rightChild) { this.rightChild = rightChild; } @Override public String toString() { return "Node [leftChild=" + leftChild + ", data=" + data + ", rightChild=" + rightChild + "]"; } @Override public int compareTo(Node o) { return this.data.compareTo(o.getData()); } }
package com.liyuncong.algorithms.algorithms_huffman;
/**
* Data用於儲存一個字元及其出現的次數
* @author yuncong
*
*/
public class Data implements Comparable<Data>{
// 字元
private char c = 0;
// 字元出現的次數
private int frequency = 0;
public char getC() {
return c;
}
public void setC(char c) {
this.c = c;
}
public int getFrequency() {
return frequency;
}
public void setFrequency(int frequency) {
this.frequency = frequency;
}
@Override
public String toString() {
return "Data [c=" + c + ", frequency=" + frequency + "]";
}
@Override
public int compareTo(Data o) {
if (this.frequency < o.getFrequency()) {
return -1;
} else if (this.frequency > o.getFrequency()) {
return 1;
} else {
return 0;
}
}
}
package com.liyuncong.algorithms.algorithms_huffman;
import java.util.Map;
/**
* 對字串編碼後的結果:包括編碼後的字串和字元/編碼對
* @author yuncong
*
*/
public class EncodeResult {
// 字串編碼後的結果
private String encode;
// 字元編碼對
private Map<Character, String> letterCode;
public EncodeResult(String encode, Map<Character, String> letterCode) {
super();
this.encode = encode;
this.letterCode = letterCode;
}
public String getEncode() {
return encode;
}
public Map<Character, String> getLetterCode() {
return letterCode;
}
}
package com.liyuncong.algorithms.algorithms_huffman;
public interface HuffmanAlgorithm {
/**
* 編碼字串。
* @param str 指定的需要編碼的字串
* @return 編碼結果
*/
public EncodeResult encode(String str);
/**
* 根據編碼結果返回原來的字串。
* @param decodeResult 原來字串的編碼結果。
* @return 解碼出來的字串。
*/
public String decode(EncodeResult encodeResult);
}
package com.liyuncong.algorithms.algorithms_huffman;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import com.liyuncong.application.commontools.FileTools;
public abstract class HuffmanAlgorithmAbstract implements HuffmanAlgorithm {
@Override
public EncodeResult encode(String str) {
ArrayList<Node> letterList = toList(str);
Node rootNode = createTree(letterList);
Map<Character, String> letterCode = getLetterCode(rootNode);
EncodeResult result = encode(letterCode, str);
return result;
}
/**
* 把一個字串轉化為節點列表
* @param letters
* @return
*/
private ArrayList<Node> toList(String letters) {
ArrayList<Node> letterList = new ArrayList<Node>();
Map<Character, Integer> ci = new HashMap<Character, Integer>();
for (int i = 0; i < letters.length(); i++) {
Character character = letters.charAt(i);
if (!ci.keySet().contains(character)) {
ci.put(character, 1);
} else {
Integer oldValue = ci.get(character);
ci.put(character, oldValue + 1);
}
}
Set<Character> keys = ci.keySet();
for (Character key : keys) {
Node node = new Node();
Data data = new Data();
data.setC(key);
data.setFrequency(ci.get(key));
node.setData(data);
letterList.add(node);
}
return letterList;
}
protected abstract Node createTree(ArrayList<Node> letterList);
/**
* 編碼字串。
* @param letterCode 字元/編碼對集合。
* @param letters 指定的需要編碼的字串。
* @return 編碼結果
*/
private EncodeResult encode(Map<Character, String> letterCode, String letters) {
StringBuilder encode = new StringBuilder();
for (int i = 0, length = letters.length(); i < length; i++) {
Character character = letters.charAt(i);
encode.append(letterCode.get(character));
}
EncodeResult result = new EncodeResult(encode.toString(), letterCode);
return result;
}
/**
* 獲得所有字元編碼對
*
* @param rootNode哈夫曼樹的根節點
* @return 所有字元編碼對
*/
private Map<Character, String> getLetterCode(Node rootNode) {
Map<Character, String> letterCode = new HashMap<Character, String>();
// 處理只有一個節點的情況
if (rootNode.getLeftChild() == null && rootNode.getRightChild() == null) {
letterCode.put(rootNode.getData().getC(), "1");
return letterCode;
}
getLetterCode(rootNode, "", letterCode);
return letterCode;
}
/**
* 先序遍歷哈夫曼樹,獲得所有字元編碼對。
*
* @param rooNode 哈夫曼樹根結點
* @param suffix 編碼字首,也就是編碼這個字元時,之前路徑上的所有編碼
* @param letterCode 用於儲存字元編碼結果
*/
private void getLetterCode(Node rooNode, String suffix,
Map<Character, String> letterCode) {
if (rooNode != null) {
if (rooNode.getLeftChild() == null
&& rooNode.getRightChild() == null) {
Character character = rooNode.getData().getC();
letterCode.put(character, suffix);
}
getLetterCode(rooNode.getLeftChild(), suffix + "0", letterCode);
getLetterCode(rooNode.getRightChild(), suffix + "1", letterCode);
}
}
public String decode(EncodeResult decodeResult) {
// 解碼得到的字串
StringBuffer decodeStr = new StringBuffer();
// 獲得解碼器
Map<String, Character> decodeMap = getDecoder(decodeResult
.getLetterCode());
// 解碼器鍵集合
Set<String> keys = decodeMap.keySet();
// 待解碼的(被編碼的)字串
String encode = decodeResult.getEncode();
// 從最短的開始匹配之所以能夠成功,是因為哈夫曼編碼的唯一字首性質
// 臨時的可能的鍵值
String temp = "";
// 改變temp值大小的遊標
int i = 1;
while (encode.length() > 0) {
temp = encode.substring(0, i);
if (keys.contains(temp)) {
Character character = decodeMap.get(temp);
decodeStr.append(character);
encode = encode.substring(i);
i = 1;
} else {
i++;
}
}
return decodeStr.toString();
}
/**
* 獲得解碼器,也就是通過字母/編碼對得到編碼/字元對。
*
* @param letterCode
* @return
*/
private Map<String, Character> getDecoder(Map<Character, String> letterCode) {
Map<String, Character> decodeMap = new HashMap<String, Character>();
Set<Character> keys = letterCode.keySet();
for (Character key : keys) {
String value = letterCode.get(key);
decodeMap.put(value, key);
}
return decodeMap;
}
}
package com.liyuncong.algorithms.algorithms_huffman;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* 演算法實現參考《多媒體技術教程》
* @author yuncong
*
*/
public class HuffmanAlgorithmImpl1 extends HuffmanAlgorithmAbstract {
/*
* 建立哈夫曼樹; 丟失了letterList中的資料,深拷貝letterList是需要完善的地方
*/
@Override
protected Node createTree(ArrayList<Node> letterList) {
init(letterList);
while (letterList.size() != 1) {
int size = letterList.size();
// 小的節點放在右邊(眼睛看到的左邊)
Node nodeLeft = letterList.get(size - 1);
Node nodeRight = letterList.get(size - 2);
Node nodeParent = new Node();
nodeParent.setLeftChild(nodeLeft);
nodeParent.setRightChild(nodeRight);
Data data = new Data();
data.setFrequency(nodeRight.getData().getFrequency()
+ nodeLeft.getData().getFrequency());
nodeParent.setData(data);
letterList.set(size - 2, nodeParent);
letterList.remove(size - 1);
sort(letterList);
}
Node rootNode = letterList.get(0);
return rootNode;
}
/**
* 初始化 讓節點列表有序
*/
private void init(ArrayList<Node> letterList) {
sort(letterList);
}
/**
* 氣泡排序,把小的放在最後
*/
private void sort(ArrayList<Node> letterList) {
int size = letterList.size();
// 處理只有一個元素的情況,也就是說,不需要排序
if (size == 1) {
return;
}
for (int i = 0; i < size; i++) {
for (int j = 0; j < size - 1 - i; j++) {
if (letterList.get(j).getData().getFrequency() < letterList
.get(j + 1).getData().getFrequency()) {
Node tempNode = letterList.get(j);
letterList.set(j, letterList.get(j + 1));
letterList.set(j + 1, tempNode);
}
}
}
}
}
package com.liyuncong.algorithms.algorithms_huffman;
import static org.junit.Assert.*;
import org.junit.Test;
public class HuffmanAlgorithmImpl1Test {
@Test
public void testEncodeString() {
HuffmanAlgorithmImpl1 huffmanImpl1 = new HuffmanAlgorithmImpl1();
EncodeResult result = huffmanImpl1.encode("abcdda");
System.out.println(result.getEncode());
}
@Test
public void testDecode() {
HuffmanAlgorithmImpl1 huffmanImpl1 = new HuffmanAlgorithmImpl1();
EncodeResult result = huffmanImpl1.encode("abcdda");
String decode = huffmanImpl1.decode(result);
System.out.println(decode);
}
}
原始碼放在github上: