貪心算法之哈夫曼編碼


   哈夫曼編碼是廣泛地用於數據文件壓縮的十分有效的編碼方法。其壓縮率通常在20%~90%之間。哈夫曼編碼算法用字符在文件中出現的頻率表來建立一個用0,1串表示各字符的最優表示方式。

給出現頻率高的字符較短的編碼,出現頻率較低的字符以較長的編碼,可以大大縮短總碼長。

image

定長碼:

3*(45+13+12+16+9+5) = 300 千位

變長碼:

1*45+3*13+3*12+3*16+4*9+4*5 = 224 千位

1、前綴碼

對每一個字符規定一個0,1串作為其代碼,並要求任一字符的代碼都不是其它字符代碼的前綴。這種編碼稱為前綴碼

編碼的前綴性質可以使譯碼方法非常簡單。

表示最優前綴碼的二叉樹總是一棵完全二叉樹,即樹中任一結點都有2個兒子結點。

f(c)表示字符c出現的概率,dt(c)表示c的碼長

平均碼長定義為:

使平均碼長達到最小的前綴碼編碼方案稱為給定編碼字符集C的最優前綴碼

2、構造哈夫曼編碼

哈夫曼提出構造最優前綴碼的貪心算法,由此產生的編碼方案稱為哈夫曼編碼

哈夫曼算法以自底向上的方式構造表示最優前綴碼的二叉樹T。

算法以|C|個葉結點開始,執行|C|-1次的“合並”運算后產生最終所要求的樹T。

以f為鍵值的優先隊列Q用在貪心選擇時有效地確定算法當前要合並的2棵具有最小頻率的樹。一旦2棵具有最小頻率的樹合並后,產生一棵新的樹,其頻率為合並的2棵樹的頻率之和,並將新樹插入優先隊列Q。經過n-1次的合並后,優先隊列中只剩下一棵樹,即所要求的樹T。

算法huffmanTree用最小堆實現優先隊列Q。初始化優先隊列需要O(n)計算時間,由於最小堆的removeMin和put運算均需O(logn)時間,n-1次的合並總共需要O(nlogn)計算時間。因此,關於n個字符的哈夫曼算法的計算時間為O(nlogn) 。

3、哈夫曼算法的正確性

要證明哈夫曼算法的正確性,只要證明最優前綴碼問題具有貪心選擇性質和最優子結構性質。

(1)貪心選擇性質

(2)最優子結構性質

具體代碼實現:

   1: import java.util.LinkedHashMap;
   2: import java.util.ArrayList;
   3: import java.util.Set;
   4: import java.util.Iterator;
   5:  
   6: class HuffmanNode {
   7:     char label;
   8:     int weight;
   9:     int parent;
  10:     int lChild;
  11:     int rChild;
  12:     int frequency;//頻率主要是用來衡量字符在給定編碼字符串中出現的次數
  13:  
  14:     public HuffmanNode(char label, int weight, int parent, int lChild,
  15:             int rChild) {
  16:         this.label = label;
  17:         this.weight = weight;
  18:         this.lChild = lChild;
  19:         this.rChild = rChild;
  20:     }
  21: }
  22:  
  23: class HuffmanTree {
  24:     private LinkedHashMap<Character, Integer> charTable; //主要用hashmap來存放字符及其出現的頻率
  25:     private Set<Character> charset;
  26:     private ArrayList<HuffmanNode> huffmanTree;//huffman節點集合
  27:     private ArrayList<String> huffmanCode;//huffman編碼集合
  28:  
  29:     public HuffmanTree(LinkedHashMap<Character, Integer> map) {
  30:         charTable = map;
  31:         charset = map.keySet();
  32:         creatHuffmanTree();//首先創建huffman樹
  33:         creatHuffmanCode();
  34:     }
  35:  
  36:     private void initTree() {
  37:         huffmanTree = new ArrayList<HuffmanNode>();
  38:         Iterator<Character> charIter = charset.iterator();
  39:         int i = 1;
  40:         huffmanTree.add(0,
  41:                 new HuffmanNode((char) 0, Integer.MAX_VALUE, 0, 0, 0));
  42:         while (charIter.hasNext()) {
  43:             Character ch = charIter.next();
  44:             huffmanTree.add(i, new HuffmanNode(ch, charTable.get(ch), 0, 0, 0));
  45:             i++;
  46:         }
  47:         for (int j = charset.size() + 1; j < 2 * charset.size(); j++) {
  48:             huffmanTree.add(j, new HuffmanNode((char) 0, 0, 0, 0, 0));
  49:         }
  50:     }
  51:     
  52:     // 創建huffman樹
  53:     private void creatHuffmanTree() {
  54:         initTree();
  55:         int min_child1;
  56:         int min_child2;
  57:         for (int i = charset.size() + 1; i < 2 * charset.size(); i++) {
  58:             min_child1 = 0;
  59:             min_child2 = 0;
  60:             for (int j = 1; j < i; j++) {
  61:                 if (huffmanTree.get(j).parent == 0) {
  62:                     if (huffmanTree.get(j).weight < huffmanTree.get(min_child1).weight
  63:                             || huffmanTree.get(j).weight < huffmanTree
  64:                                     .get(min_child2).weight) {
  65:                         if (huffmanTree.get(min_child1).weight < huffmanTree
  66:                                 .get(min_child2).weight) {
  67:                             min_child2 = j;
  68:                         } else {
  69:                             min_child1 = j;
  70:                         }
  71:                     }
  72:                 }
  73:             }
  74:             huffmanTree.get(min_child1).parent = i;
  75:             huffmanTree.get(min_child2).parent = i;
  76:  
  77:             if (min_child1 < min_child2) {
  78:                 huffmanTree.get(i).lChild = min_child1;
  79:                 huffmanTree.get(i).rChild = min_child2;
  80:             } else {
  81:                 huffmanTree.get(i).rChild = min_child1;
  82:                 huffmanTree.get(i).lChild = min_child2;
  83:             }
  84:  
  85:             huffmanTree.get(i).weight = huffmanTree.get(i).weight
  86:                     + huffmanTree.get(i).weight;
  87:         }
  88:     }
  89:  
  90:     private void creatHuffmanCode() {
  91:         huffmanCode = new ArrayList<String>(charset.size() + 1);
  92:         huffmanCode.add(0, null);
  93:         char[] tempChars = new char[charset.size() + 1];
  94:         for (int i = 1; i < charset.size() + 1; i++) {
  95:             int startIndex = charset.size();
  96:             int parent = huffmanTree.get(i).parent;
  97:             int ch = i;
  98:             while (parent != 0) {
  99:                 if (huffmanTree.get(parent).lChild == ch) {
 100:                     tempChars[startIndex] = '0';
 101:                 } else {
 102:                     tempChars[startIndex] = '1';
 103:                 }
 104:                 startIndex--;
 105:                 ch = parent;
 106:                 parent = huffmanTree.get(parent).parent;
 107:             }
 108:             System.out.println(String.valueOf(tempChars, startIndex + 1,
 109:                     charset.size() - startIndex));
 110:             huffmanCode.add(i, String.valueOf(tempChars, startIndex + 1,
 111:                     charset.size() - startIndex));
 112:         }
 113:     }// end method
 114:     
 115:     // huffman編碼
 116:     public String enCodeString(String inString) {
 117:         StringBuffer temp = new StringBuffer();
 118:         for (int i = 0; i < inString.length(); i++) {
 119:             int ch = inString.charAt(i);
 120:             int j = 1;
 121:             for (; huffmanTree.get(j).label != ch && j < charset.size() + 1; j++) {
 122:             }
 123:             if (j <= charset.size()) {
 124:                 temp.append(huffmanCode.get(j));
 125:             } else {
 126:                 temp.append(ch);
 127:             }
 128:         }
 129:         return temp.toString();
 130:     }
 131:  
 132:     // huffman解碼
 133:     public String deCodeString(String inString) {
 134:         StringBuffer temp = new StringBuffer();
 135:         int root = charset.size() * 2 - 1;
 136:         for (int i = 0; i < inString.length(); i++) {
 137:             char ch = inString.charAt(i);
 138:             if (ch == '0') {
 139:                 root = huffmanTree.get(root).lChild;
 140:             } else if (ch == '1') {
 141:                 root = huffmanTree.get(root).rChild;
 142:             } else {
 143:                 temp.append(ch);
 144:             }
 145:             if (root <= charset.size()) {
 146:                 temp.append(huffmanTree.get(root).label);
 147:                 root = charset.size() * 2 - 1;
 148:             }
 149:         }
 150:         return temp.toString();
 151:     }
 152:  
 153: }
 154:  
 155: public class HuffmanTreeTest {
 156:     public static void main(String[] args) {
 157:         LinkedHashMap<Character, Integer> hasmap = new LinkedHashMap<Character, Integer>();
 158:         hasmap.put('a', 4);
 159:         hasmap.put('b', 5);
 160:         hasmap.put('c', 8);
 161:         hasmap.put('d', 10);
 162:  
 163:         HuffmanTree huffman = new HuffmanTree(hasmap);
 164:         String temp = huffman.enCodeString("abcd");
 165:         System.out.println(temp);
 166:         System.out.println(huffman.deCodeString(temp));
 167:  
 168:     }
 169:  
 170: }


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM