哈夫曼編碼是廣泛地用於數據文件壓縮的十分有效的編碼方法。其壓縮率通常在20%~90%之間。哈夫曼編碼算法用字符在文件中出現的頻率表來建立一個用0,1串表示各字符的最優表示方式。
給出現頻率高的字符較短的編碼,出現頻率較低的字符以較長的編碼,可以大大縮短總碼長。
定長碼:
3*(45+13+12+16+9+5) = 300 千位
變長碼:
1*45+3*13+3*12+3*16+4*9+4*5 = 224 千位
1、前綴碼
對每一個字符規定一個0,1串作為其代碼,並要求任一字符的代碼都不是其它字符代碼的前綴。這種編碼稱為前綴碼。
編碼的前綴性質可以使譯碼方法非常簡單。
表示最優前綴碼的二叉樹總是一棵完全二叉樹,即樹中任一結點都有2個兒子結點。
f(c)表示字符c出現的概率,dt(c)表示c的碼長
平均碼長定義為:
使平均碼長達到最小的前綴碼編碼方案稱為給定編碼字符集C的最優前綴碼。
2、構造哈夫曼編碼
哈夫曼提出構造最優前綴碼的貪心算法,由此產生的編碼方案稱為哈夫曼編碼。
哈夫曼算法以自底向上的方式構造表示最優前綴碼的二叉樹T。
算法以|C|個葉結點開始,執行|C|-1次的“合並”運算后產生最終所要求的樹T。
以f為鍵值的優先隊列Q用在貪心選擇時有效地確定算法當前要合並的2棵具有最小頻率的樹。一旦2棵具有最小頻率的樹合並后,產生一棵新的樹,其頻率為合並的2棵樹的頻率之和,並將新樹插入優先隊列Q。經過n-1次的合並后,優先隊列中只剩下一棵樹,即所要求的樹T。
算法huffmanTree用最小堆實現優先隊列Q。初始化優先隊列需要O(n)計算時間,由於最小堆的removeMin和put運算均需O(logn)時間,n-1次的合並總共需要O(nlogn)計算時間。因此,關於n個字符的哈夫曼算法的計算時間為O(nlogn) 。
3、哈夫曼算法的正確性
要證明哈夫曼算法的正確性,只要證明最優前綴碼問題具有貪心選擇性質和最優子結構性質。
(1)貪心選擇性質
(2)最優子結構性質
具體代碼實現:
1: import java.util.LinkedHashMap;
2: import java.util.ArrayList;
3: import java.util.Set;
4: import java.util.Iterator;
5:
6: class HuffmanNode {
7: char label;
8: int weight;
9: int parent;
10: int lChild;
11: int rChild;
12: int frequency;//頻率主要是用來衡量字符在給定編碼字符串中出現的次數
13:
14: public HuffmanNode(char label, int weight, int parent, int lChild,
15: int rChild) {
16: this.label = label;
17: this.weight = weight;
18: this.lChild = lChild;
19: this.rChild = rChild;
20: }
21: }
22:
23: class HuffmanTree {
24: private LinkedHashMap<Character, Integer> charTable; //主要用hashmap來存放字符及其出現的頻率
25: private Set<Character> charset;
26: private ArrayList<HuffmanNode> huffmanTree;//huffman節點集合
27: private ArrayList<String> huffmanCode;//huffman編碼集合
28:
29: public HuffmanTree(LinkedHashMap<Character, Integer> map) {
30: charTable = map;
31: charset = map.keySet();
32: creatHuffmanTree();//首先創建huffman樹
33: creatHuffmanCode();
34: }
35:
36: private void initTree() {
37: huffmanTree = new ArrayList<HuffmanNode>();
38: Iterator<Character> charIter = charset.iterator();
39: int i = 1;
40: huffmanTree.add(0,
41: new HuffmanNode((char) 0, Integer.MAX_VALUE, 0, 0, 0));
42: while (charIter.hasNext()) {
43: Character ch = charIter.next();
44: huffmanTree.add(i, new HuffmanNode(ch, charTable.get(ch), 0, 0, 0));
45: i++;
46: }
47: for (int j = charset.size() + 1; j < 2 * charset.size(); j++) {
48: huffmanTree.add(j, new HuffmanNode((char) 0, 0, 0, 0, 0));
49: }
50: }
51:
52: // 創建huffman樹
53: private void creatHuffmanTree() {
54: initTree();
55: int min_child1;
56: int min_child2;
57: for (int i = charset.size() + 1; i < 2 * charset.size(); i++) {
58: min_child1 = 0;
59: min_child2 = 0;
60: for (int j = 1; j < i; j++) {
61: if (huffmanTree.get(j).parent == 0) {
62: if (huffmanTree.get(j).weight < huffmanTree.get(min_child1).weight
63: || huffmanTree.get(j).weight < huffmanTree
64: .get(min_child2).weight) {
65: if (huffmanTree.get(min_child1).weight < huffmanTree
66: .get(min_child2).weight) {
67: min_child2 = j;
68: } else {
69: min_child1 = j;
70: }
71: }
72: }
73: }
74: huffmanTree.get(min_child1).parent = i;
75: huffmanTree.get(min_child2).parent = i;
76:
77: if (min_child1 < min_child2) {
78: huffmanTree.get(i).lChild = min_child1;
79: huffmanTree.get(i).rChild = min_child2;
80: } else {
81: huffmanTree.get(i).rChild = min_child1;
82: huffmanTree.get(i).lChild = min_child2;
83: }
84:
85: huffmanTree.get(i).weight = huffmanTree.get(i).weight
86: + huffmanTree.get(i).weight;
87: }
88: }
89:
90: private void creatHuffmanCode() {
91: huffmanCode = new ArrayList<String>(charset.size() + 1);
92: huffmanCode.add(0, null);
93: char[] tempChars = new char[charset.size() + 1];
94: for (int i = 1; i < charset.size() + 1; i++) {
95: int startIndex = charset.size();
96: int parent = huffmanTree.get(i).parent;
97: int ch = i;
98: while (parent != 0) {
99: if (huffmanTree.get(parent).lChild == ch) {
100: tempChars[startIndex] = '0';
101: } else {
102: tempChars[startIndex] = '1';
103: }
104: startIndex--;
105: ch = parent;
106: parent = huffmanTree.get(parent).parent;
107: }
108: System.out.println(String.valueOf(tempChars, startIndex + 1,
109: charset.size() - startIndex));
110: huffmanCode.add(i, String.valueOf(tempChars, startIndex + 1,
111: charset.size() - startIndex));
112: }
113: }// end method
114:
115: // huffman編碼
116: public String enCodeString(String inString) {
117: StringBuffer temp = new StringBuffer();
118: for (int i = 0; i < inString.length(); i++) {
119: int ch = inString.charAt(i);
120: int j = 1;
121: for (; huffmanTree.get(j).label != ch && j < charset.size() + 1; j++) {
122: }
123: if (j <= charset.size()) {
124: temp.append(huffmanCode.get(j));
125: } else {
126: temp.append(ch);
127: }
128: }
129: return temp.toString();
130: }
131:
132: // huffman解碼
133: public String deCodeString(String inString) {
134: StringBuffer temp = new StringBuffer();
135: int root = charset.size() * 2 - 1;
136: for (int i = 0; i < inString.length(); i++) {
137: char ch = inString.charAt(i);
138: if (ch == '0') {
139: root = huffmanTree.get(root).lChild;
140: } else if (ch == '1') {
141: root = huffmanTree.get(root).rChild;
142: } else {
143: temp.append(ch);
144: }
145: if (root <= charset.size()) {
146: temp.append(huffmanTree.get(root).label);
147: root = charset.size() * 2 - 1;
148: }
149: }
150: return temp.toString();
151: }
152:
153: }
154:
155: public class HuffmanTreeTest {
156: public static void main(String[] args) {
157: LinkedHashMap<Character, Integer> hasmap = new LinkedHashMap<Character, Integer>();
158: hasmap.put('a', 4);
159: hasmap.put('b', 5);
160: hasmap.put('c', 8);
161: hasmap.put('d', 10);
162:
163: HuffmanTree huffman = new HuffmanTree(hasmap);
164: String temp = huffman.enCodeString("abcd");
165: System.out.println(temp);
166: System.out.println(huffman.deCodeString(temp));
167:
168: }
169:
170: }