關於哈夫曼樹的講解,已有珠玉在前,我就不贅述了。
基本原理:統計字符串內的字符出現頻率,由此建立哈夫曼樹,頻率高的離根結點越近,原則上左子樹頻率小於右子樹。從根節點一路訪問到葉子結點,路徑權重即為結點字符的編碼,且獨一無二。解碼過程就是從根節點遍歷huffman樹的過程。
編程實踐:實現對純英文字符串和文件的哈夫曼編碼和解碼。
代碼如下:
#include <iostream> #include <stdlib.h> #include <string> #include <map> #include <fstream> #include<math.h> using namespace std; map<char,string> huffcode; //用來存儲字符編碼 struct Node //結點數據結構 { double weight; char ch; string code; int lchild, rchild, parent; }; void Select(Node huffTree[], int *a, int *b, int n) //找權值最小的兩個a和b { int i; double weight = 0; //找最小的數 for (i = 0; i <n; i++) { if (huffTree[i].parent != -1) //判斷節點是否已經選過 continue; else { if (weight == 0) { weight = huffTree[i].weight; *a = i; } else { if (huffTree[i].weight < weight) { weight = huffTree[i].weight; *a = i; } } } } weight = 0; //找第二小的數 for (i = 0; i < n; i++) { if (huffTree[i].parent != -1 || (i == *a))//排除已選過的數 continue; else { if (weight == 0) { weight = huffTree[i].weight; *b = i; } else { if (huffTree[i].weight < weight) { weight = huffTree[i].weight; *b = i; } } } } int temp; if (huffTree[*a].lchild < huffTree[*b].lchild) //小的數放左邊 { temp = *a; *a = *b; *b = temp; } } void Huff_Tree(Node huffTree[], int w[], char ch[], int n) { for (int i = 0; i < 2 * n - 1; i++) //初始過程 { huffTree[i].parent = -1; huffTree[i].lchild = -1; huffTree[i].rchild = -1; huffTree[i].code = ""; //初始化 } for (int i = 0; i < n; i++) //前n個節點為葉子結點 { huffTree[i].weight = w[i]; huffTree[i].ch = ch[i]; } for (int k = n; k < 2 * n - 1; k++) { int i1 = 0; int i2 = 0; Select(huffTree, &i1, &i2, k); //將i1,i2節點合成節點k huffTree[i1].parent = k; huffTree[i2].parent = k; huffTree[k].weight = huffTree[i1].weight + huffTree[i2].weight; huffTree[k].lchild = i1; huffTree[k].rchild = i2; } } void Huff_Code(Node huffTree[], int n) { int i, j, k; string s; for (i = 0; i < n; i++) { s = ""; j = i; while (huffTree[j].parent != -1) //從葉子往上找到根節點 { k = huffTree[j].parent; if (j == huffTree[k].lchild) //如果是根的左孩子,則記為0 s = s + "0"; else s = s + "1"; j = huffTree[j].parent; } cout << "字符 " << huffTree[i].ch << " 的編碼:"; for (int l = s.size() - 1; l >= 0; l--) //反向回溯 { cout << s[l]; huffTree[i].code += s[l]; //保存編碼 } huffcode[huffTree[i].ch] = huffTree[i].code; cout << endl; } } string Huff_Decode(Node huffTree[], int n,string s) { cout << "解碼后為:"; string temp = "",str="";//保存解碼后的字符串 for (int i = 0; i < s.size(); i++) { temp = temp + s[i]; for (int j = 0; j < n; j++) { if (temp == huffTree[j].code) { str=str+ huffTree[j].ch; temp = ""; break; } else if (i == s.size()-1 && j==n-1 && temp!="")//全部遍歷后沒有 str= "解碼錯誤!"; } } return str; } int main(){ cout << "編碼字符串或文件(1/2):"; int cho,n; //n是編碼個數 cin >> cho; string s,res; if(cho == 1) { cout << "輸入字符串:" <<endl; cin >> s; } else if(cho == 2) { //cout << "輸入文件路徑:" <<endl; //cin >> s; ifstream infile; infile.open("Huffman.txt"); infile >> s; infile.close(); cout << s << endl; } else cout << "輸入錯誤!" <<endl; if(cho == 1 || cho ==2) { string res = ""; int i; map<char,int> mp; for(i=0;i<s.length();i++){ if(mp.count(s[i]) == 0) mp[s[i]] = 1; else mp[s[i]] += 1; } map<char,int>::iterator iter; iter = mp.begin(); //聲明迭代器 n=mp.size(),i=0; Node huffTree[2*n-1]; //所有節點數 char ch[n]; int w[n]; while(iter != mp.end()) { ch[i] = iter->first; w[i] = iter->second; iter++; i++; } mp.clear(); Huff_Tree(huffTree, w, ch, n); Huff_Code(huffTree, n); for(i=0;i<s.length();i++) res += huffcode[s[i]]; if(cho == 1) cout << "字符編碼:" << endl << res << endl; else{ ofstream outfile; outfile.open("Huffman.txt"); outfile << res; outfile.close(); cout << "文件編碼完成!" <<endl; } i = 0; while(pow(2,i) < n) i++; cout << "壓縮率:" << (s.length()*i-res.length())*100/(s.length()*i) << "%" <<endl; //解碼過程 cout << "輸入合法的編碼:" <<endl; cin >> s; cout << Huff_Decode(huffTree, n, s)<< endl; huffcode.clear(); } return 0; }
用map容器統計字符頻率,之后放入數組排序,建立結構體數組當作哈夫曼樹。
運行截圖:
文件編碼結果:
目前存在的問題:文件讀取不到空格。
可完善的地方:結構體數組可以用int數組代替,但變化過程略復雜;map如果用允許自定義排序的話就不用另開一個數組。
---end---