赫夫曼編碼

阿新 • • 發佈：2020-08-05

赫夫曼編碼與解碼

●赫夫曼編碼也翻譯為哈夫曼編碼(HuffmanCoding)，又稱霍夫曼編碼，是一種編碼方式,屬於一種程式演算法
●赫夫曼編碼是赫哈夫曼樹在電訊通訊中的經典的應用之一。
●赫夫曼編碼廣泛地用於資料檔案壓縮。其壓縮率通常在20%~90%之間
●赫夫曼碼是可變字長編碼(VLC)的一種。Huffman於1952年提出一種編碼方法，稱之為最佳編碼

赫夫曼編碼原理剖析

赫夫曼樹根據排序方法不同，可能不太一樣，對應的赫夫曼編碼也不完全一樣，但長度總是相同的。

用赫夫曼編碼實現檔案壓縮和解壓

赫夫曼編碼壓縮檔案注意事項

➢如果檔案本身就是經過壓縮處理的，那麼使用赫夫曼編碼再壓縮效率不會有明顯變化

, 比如視訊,ppt等等檔案
➢赫夫曼編碼是按位元組來處理的，因此可以處理所有的檔案(二進位制檔案、文字檔案)
➢如果一個檔案中的內容，重複的資料不多，壓縮效果也不會很明顯。

package com.xudong.DataStructures;

import java.io.*;
import java.util.*;

public class HuffmanCodeDemo {
    public static void main(String[] args) {
        String content = "i like like like java do you like a java";
        byte[] contentBytes = content.getBytes();
        System.out.println("原字串長度：" + contentBytes.length);
        byte[] huffmanCodesBytes = haffmanZip(contentBytes);
        System.out.println("壓縮後的結果是：" + Arrays.toString(huffmanCodesBytes) + "  長度=" + huffmanCodesBytes.length);

        byte[] sourceBytes = decode(huffmanCodes, huffmanCodesBytes);
        System.out.println("原來的字串=" + new String(sourceBytes));

        System.out.println("------------------------------");
        //測試壓縮檔案
        String srcFile = "C:\\Users\\Shinelon\\Desktop\\java大資料開發大綱.jpg";
        String destFile = "C:\\Users\\Shinelon\\Desktop\\java大資料開發大綱.zip";
        zipFile(srcFile,destFile);
        System.out.println("壓縮檔案成功！");

        //測試解壓檔案
        String zipFile = "C:\\Users\\Shinelon\\Desktop\\java大資料開發大綱.zip";
        String unZipFile = "C:\\Users\\Shinelon\\Desktop\\java大資料開發大綱2.jpg";
        unZipFile(zipFile,unZipFile);
        System.out.println("解壓檔案成功！");


        /*
        List<Node1> nodes = getNodes(contentBytes);
        System.out.println("nodes=" + nodes);

        System.out.println("赫夫曼樹,前序遍歷：");
        Node1 huffmanTreeRoot = createHuffmanTree(nodes);
        huffmanTreeRoot.preOrder();

        Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
        System.out.println("生成的哈夫曼編碼表：" + huffmanCodes);

        byte[] huffmanCodeBytes = zip(contentBytes, huffmanCodes);
        System.out.println("huffmanCodeBytes=" + Arrays.toString(huffmanCodeBytes));

         */
    }

    //對檔案解壓
    public static void unZipFile(String zipFile,String destFile){
        ObjectInputStream ois = null;
        FileOutputStream fos = null;
        try {
            //建立檔案輸入流
            FileInputStream fis = new FileInputStream(zipFile);
            //建立物件輸入流
            ois = new ObjectInputStream(fis);
            //讀取byte陣列 huffmanBytes
            byte[] huffmanBytes = (byte[]) ois.readObject();
            //讀取赫夫曼編碼表
            Map<Byte,String> huffmanCodes = (Map<Byte, String>) ois.readObject();
            //解碼
            byte[] bytes = decode(huffmanCodes, huffmanBytes);
            //將bytes陣列寫入到目標檔案
            fos = new FileOutputStream(destFile);
            //寫入資料到destFile
            fos.write(bytes);

        } catch (IOException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } finally {
            try {
                ois.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                fos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    //對檔案進行壓縮
    /**
     * @param srcFile 原檔案路徑
     * @param destFile 壓縮後存放的目錄
     */
    public static void zipFile(String srcFile,String destFile){
        FileInputStream fis = null;
        ObjectOutputStream oos = null;
        try {
            //建立檔案輸入流
            fis = new FileInputStream(srcFile);
            //建立一個和原始檔一樣大小的byte[]
            byte[] b = new byte[fis.available()];
            //讀取檔案
            fis.read(b);
            //對原始檔進行壓縮
            byte[] huffmanBytes = haffmanZip(b);
            //建立檔案輸出流，存放壓縮檔案
            FileOutputStream fos = new FileOutputStream(destFile);
            //建立一個和檔案輸出流關聯的ObjectOutputStream
            oos = new ObjectOutputStream(fos);
            //把赫夫曼編碼後的位元組陣列寫入壓縮檔案
            oos.writeObject(huffmanBytes);
            //將赫夫曼編碼表寫入壓縮檔案
            oos.writeObject(huffmanCodes);

        } catch (IOException e) {
            System.out.println(e.getMessage());
        } finally {
            try {
                fis.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                oos.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }

    //資料解碼（解壓）
    //1.將huffmanCodesBytes[-88, -65, -56, -65...]轉成赫夫曼編碼對應的二進位制字串"101010001011..."
    //2.將對應的二進位制字串對照赫夫曼編碼表轉換成原來的字串"i like like like..."

    /**
     * @param huffmanCodes 赫夫曼編碼表
     * @param huffmanBytes 赫夫曼編碼得到的陣列
     * @return 原來字串對應的陣列
     */
    private static byte[] decode(Map<Byte,String> huffmanCodes,byte[] huffmanBytes){
        //1.先得到huffmanBytes對應的二進位制字串 "1010100010111..."
        StringBuilder stringBuilder = new StringBuilder();
        //將byte陣列轉成二進位制字串
        for (int i = 0; i < huffmanBytes.length; i++) {
            byte b = huffmanBytes[i];
            //判斷是否為最後一個位元組
            boolean flag = (i == huffmanBytes.length - 1);
            stringBuilder.append(byteToBitString(!flag,b));
        }

        //將赫夫曼編碼表的鍵值調換，進行解碼
        Map<String, Byte> map = new HashMap<>();
        for (Map.Entry<Byte,String> entry : huffmanCodes.entrySet()){
            map.put(entry.getValue(),entry.getKey());
        }

        //存放byte
        List<Byte> list = new ArrayList<>();
        //掃描stringBuilder
        for (int i = 0; i < stringBuilder.length();) {
            int count = 1;
            boolean flag = true;
            Byte b = null;

            while (flag){
                //遞增的取出key
                String key = stringBuilder.substring(i, i + count);
                b = map.get(key);
                if (b == null){//沒有匹配到
                    count++;
                }else {//匹配到
                    flag = false;
                }
            }
            list.add(b);
            i += count;//i 移動到count
        }
        //此時list存放了解壓後的所有字元
        byte b[] = new byte[list.size()];
        for (int i = 0; i < b.length; i++) {
            b[i] = list.get(i);
        }
        return b;
    }



    /**
     * @param flag 標誌是否需要補高位，如果是最後一個位元組，則無需補高位
     * @param b 傳入的byte
     * @return 是該 b 對應補碼返回的二進位制字串
     */
    private static String byteToBitString(boolean flag,byte b){
       //使用變數儲存b
       int temp = b;//將b轉成int
       //如果是正數，則需要補高位
       if (flag){
           temp |= 256;//按或與
       }
        String str = Integer.toBinaryString(temp);//返回的是temp對應的二進位制補碼
        if (flag){
            return str.substring(str.length() - 8);
        }else {
            return str;
        }
    }


    /**
     * @param bytes 原始字串對應的位元組陣列
     * @return 經過赫夫曼編碼處理後的位元組陣列
     */
    private static byte[] haffmanZip(byte[] bytes){
        List<Node1> nodes = getNodes(bytes);
        //根據nodes建立的赫夫曼樹
        Node1 huffmanTreeRoot = createHuffmanTree(nodes);
        //對應的赫夫曼編碼（根據赫夫曼樹）
        Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
        //根據生成的赫夫曼編碼，壓縮得到壓縮後的赫夫曼編碼位元組陣列
        byte[] huffmanCodeBytes = zip(bytes, huffmanCodes);
        return huffmanCodeBytes;
    }



    /**注：是補碼的形式進行編碼
     * @param bytes 原始的字串對應的byte[]
     * @param huffmanCodes 生成的赫夫曼編碼map
     * @return 拼接後的編碼，8位一個儲存在byte中
     */
    //將字串對應的byte[]陣列，通過生成的赫夫曼編碼表，返回一個赫夫曼編碼壓縮後的byte[]
    private static byte[] zip(byte[] bytes,Map<Byte,String> huffmanCodes){
        //利用huffmanCodes將bytes轉成赫夫曼編碼對應的字串
        StringBuilder stringBuilder = new StringBuilder();
        //遍歷傳入的位元組陣列
        for (byte b : bytes){
            stringBuilder.append(huffmanCodes.get(b));
        }
        //統計返回byte[] huffmanCodeBytes 長度
        int len;
        if (stringBuilder.length() % 8 == 0){
            len = stringBuilder.length() / 8;
        }else {
            len = stringBuilder.length() / 8 + 1;
        }

        //建立儲存壓縮後的byte陣列
        byte[] huffmanCodeBytes = new byte[len];
        int index = 0;//記錄是第幾個byte
        for (int i = 0; i < stringBuilder.length(); i += 8) {
            String strByte;
            if (i + 8 > stringBuilder.length()){//不夠8位
                strByte = stringBuilder.substring(i);
            }else {
                strByte = stringBuilder.substring(i,i + 8);
            }
            //將strByte 轉成 一個 byte ，放入huffmanCodeBytes中
            huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
            index++;
        }
        return huffmanCodeBytes;
    }

    //過載getCodes
    private static Map<Byte,String> getCodes(Node1 root){
        if (root == null){
            return null;
        }
        //處理root的左子樹
        getCodes(root.left,"0",stringBuilder);
        //處理root的右子樹
        getCodes(root.right,"1",stringBuilder);
        return huffmanCodes;
    }


    //將赫夫曼編碼表存放到Map中，形如 32 -> 01 , 97 -> 100
    static Map<Byte,String> huffmanCodes = new HashMap<>();
    //生成的赫夫曼編碼需要拼接路徑，定義StringBuilder儲存某個葉子節點的路徑
    static StringBuilder stringBuilder = new StringBuilder();

    /**
     * 功能：將傳入的node節點的所有葉子節點的赫夫曼編碼得到，並放入huffmanCodes集合
     * @param node 傳入節點
     * @param code 路徑：左子節點是 0 ，右子節點 1
     * @param stringBuilder 用於拼接路徑
     */
    private static void getCodes(Node1 node,String code,StringBuilder stringBuilder){
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        //將code加入到StringBuilder2
        stringBuilder2.append(code);
        if (node != null){//如果node == null不處理
            //判斷當前node是葉子節點還是非葉子節點
            if (node.data == null){//非葉子節點
                //向左遞迴
                getCodes(node.left,"0",stringBuilder2);
                //向右遞迴
                getCodes(node.right,"1",stringBuilder2);
            }else {//說明是葉子節點
                //儲存
                huffmanCodes.put(node.data,stringBuilder2.toString());
            }
        }
    }


    //前序遍歷的方法
    public static void preOrder(Node1 root){
        if (root != null){
            root.preOrder();
        }else {
            System.out.println("赫夫曼樹為空！");
        }
    }

    /**
     * @param bytes 接收位元組陣列
     * @return 返回List形式
     */
    private static List<Node1> getNodes(byte[] bytes){
        ArrayList<Node1> nodes = new ArrayList<>();

        //統計每一個byte出現的次數
        HashMap<Byte, Integer> counts = new HashMap<>();
        for (byte b : bytes){
            Integer count = counts.get(b);
            if (count == null){//Map還沒有這個資料時
                counts.put(b,1);
            }else {
                counts.put(b,count + 1);
            }
        }
        //遍歷map，把每一個鍵值對轉換成一個node物件，加入到nodes集合
        for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
            nodes.add(new Node1(entry.getKey(),entry.getValue()));
        }
        return nodes;
    }

    //通過List建立對應的赫夫曼樹
    public static Node1 createHuffmanTree(List<Node1> nodes){

        while (nodes.size() > 1){
            //排序
            Collections.sort(nodes);

            //1.取出權值最小的節點（二叉樹）
            Node1 leftNode = nodes.get(0);
            //2.取出權值第二小的節點（二叉樹）
            Node1 rightNode = nodes.get(1);
            //3.建立一個新的二叉樹,跟節點root只取權值
            Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
            parent.left = leftNode;
            parent.right = rightNode;
            //4.從ArrayList刪除處理過的二叉樹
            nodes.remove(leftNode);
            nodes.remove(rightNode);
            //5.將parent加入到nodes
            nodes.add(parent);
        }
        //最後的節點就是赫夫曼樹root節點
        return nodes.get(0);
    }
}

//建立Node，待處理資料和權值
class Node1 implements Comparable<Node1>{
    Byte data;//存放資料本身，即ASCII碼
    int weight;//權值，即字元出現的個數
    Node1 left;
    Node1 right;

    public Node1(Byte data, int weight) {
        this.data = data;
        this.weight = weight;
    }

    @Override
    public String toString() {
        return "Node1{" +
                "data=" + data +
                ", weight=" + weight +
                '}';
    }

    @Override
    public int compareTo(Node1 o) {
        return this.weight - o.weight;
    }

    //前序遍歷
    public void preOrder(){
        System.out.println(this);
        if (this.left != null){
            this.left.preOrder();
        }
        if (this.right != null){
            this.right.preOrder();
        }
    }
}