壓縮軟件:
給定一篇文章,只含有英文大小寫字母和空格,以.txt格式存儲,統計該文件中各種字符的頻率,對各字符進行Huffman編碼,將該文件翻譯成Huffman編碼文件,再將Huffman編碼文件翻譯成源文件。
創建結構體數組,數組的每個元素存有字符,頻率,父節點下邊,左右孩子的下標。假設有n個結點,先統計每個字符出現的頻率當做權值,找出兩個權值最小的下標,權值的和作為新的下標存在結構體數組中,遍歷n-1次,得到哈夫曼樹。從葉結點出發,一直找父節點,直至父節點為根(即par = 0),左邊為0,右邊為1,得到哈夫曼編碼;解碼 時按位讀取,有映射關系直接輸出,否則繼續向后讀取。
#include <bits/stdc++.h> using namespace std; map<char, int>M; map<char, string>M1; const int N = 500; char s[N]; struct node { int weight, par, l, r; char c; }; //找兩個最小權的下標min1, min2 void select(node *Htree, int m, int &min1, int &min2) { min1 = min2 = 0; Htree[0].weight = 1e9; for(int i = 1; i <= m; i++) { if(!Htree[i].par && Htree[i].weight < Htree[min1].weight) min1 = i; } Htree[min1].par = -1; for(int i = 1; i <= m; i++) { if(!Htree[i].par && Htree[i].weight < Htree[min2].weight) min2 = i; } } //創建哈夫曼樹 void H_tree(node *Htree, int n) { int min1 = 0, min2 = 0; int now = n+1, m = n;//now是新節點的下標,m是尋找的上限 for(int i = 1; i < n; i++) { select(Htree, m, min1, min2);//在[1,m]中找兩個最小權的下標組成now Htree[now].weight = Htree[min1].weight + Htree[min2].weight; Htree[now].l = min1, Htree[now].r = min2; Htree[min1].par = now, Htree[min2].par = now; now++; m++; } } //打印樹 //void print(node *t,int next) //{ // printf("%d\n",t[next].weight); // if(t[next].l) // print(t, t[next].l); // if(t[next].r) // print(t, t[next].r); //} //進行哈夫曼編碼 void encode(node *Htree, int n, int len) { char temp[N];//臨時存放某個葉子節點的哈夫曼編碼 int now = N-1; //逆序推出哈夫曼編碼,左邊為0,右邊為1 temp[now] = '\0'; puts("編碼規則為:"); for(int i = 1; i <= n; i++) { now = N-1; for(int j = i; Htree[j].par != 0; j = Htree[j].par) { int p = Htree[j].par; if(Htree[p].l == j) temp[--now] = '0'; else temp[--now] = '1'; } printf("%c : %s\n", Htree[i].c, temp+now); string temp1(temp+now); M1[Htree[i].c] = temp1; } puts("源碼 -> 哈夫曼 見2.txt !"); //將源代碼轉化成哈夫曼代碼保存文件 ofstream savefile("2.txt"); for(int i = 0; i < len; i++) savefile << M1[s[i]]; savefile.close(); } //進行哈夫曼解碼 void decode(node *Htree) { puts("哈夫曼 -> 源碼 見3.txt !"); FILE *fp1, *fp2; fp1 = fopen("2.txt", "r"); fscanf(fp1, "%[^\n]", s); fp2 = fopen("3.txt", "w"); int len = strlen(s), c = 0; char str[N]; for(int i = 0; i < len; i++) { str[c++] = s[i]; str[c] = '\0'; for(map<char, string>::iterator it = M1.begin(); it != M1.end(); it++) { if(it->second == str) { fprintf(fp2, "%c", it->first); c = 0; } } } } int main() { ///請先在程序路徑中創建1.txt!!! FILE *fp = NULL; int len = 0; char c; fp = fopen("1.txt", "r"); if(!fp) { puts("請先在源目錄創建1.txt!"); return 0; } while((c = fgetc(fp)) != EOF) s[len++] = c; fclose(fp); node *Htree; Htree = (node*)malloc((2*len)*sizeof(node));//Htree從0-2*len-1 for(int i = 0; i < len; i++) M[s[i]]++; //統計每個字符的頻率 int n = 0; //n個葉子的哈夫曼樹共有2*n-1個節點 for(int i = 0; i < len; i++) { if(M[s[i]] != 0) { Htree[++n].weight = M[s[i]]; M[s[i]] = 0; //頻率置0防止重復計數 Htree[n].par = Htree[n].l = Htree[n].r = 0; Htree[n].c = s[i]; } } Htree[0].weight = Htree[0].par = Htree[0].l = Htree[0].r = 0;//0號節點全部賦值為0,並不使用 for(int i = n+1; i < 2*n; i++) Htree[i].weight = Htree[i].par = Htree[i].l = Htree[i].r = 0; H_tree(Htree, n); //print(Htree,2*n-1); encode(Htree, n, len); decode(Htree); return 0; }
源目錄1.txt內容如下
as6d54as56d as165f 1asf 0as as0ff a0 fasf as sf a sa f
測試結果
運行產生了2.txt和3.txt
出現的問題是對文件的操作不熟悉,又復習一波c和c++的文件操作。總的來說本次實驗不難,就是一個模擬的過程,非常考驗耐心和毅力。