基於哈夫曼樹的數據壓縮算法
描述
輸入一串字符串,根據給定的字符串中字符出現的頻率建立相應哈夫曼樹,構造哈夫曼編碼表,在此基礎上可以對待壓縮文件進行壓縮(即編碼),同時可以對壓縮后的二進制編碼文件進行解壓(即譯碼)。
輸入
多組數據,每組數據一行,為一個字符串(只考慮26個小寫字母即可)。當輸入字符串為“0”時,輸入結束。
輸出
每組數據輸出2n+3行(n為輸入串中字符類別的個數)。第一行為統計出來的字符出現頻率(只輸出存在的字符,格式為:字符:頻度),每兩組字符之間用一個空格分隔,字符按照ASCII碼從小到大的順序排列。第二行至第2n行為哈夫曼樹的存儲結構的終態(形如教材139頁表5.2(b),一行當中的數據用空格分隔)。第2n+1行為每個字符的哈夫曼編碼(只輸出存在的字符,格式為:字符:編碼),每兩組字符之間用一個空格分隔,字符按照ASCII碼從小到大的順序排列。第2n+2行為編碼后的字符串,第2n+3行為解碼后的字符串(與輸入的字符串相同)。
輸入樣例 1
aaaaaaabbbbbccdddd aabccc 0
輸出樣例 1
a:7 b:5 c:2 d:4 1 7 7 0 0 2 5 6 0 0 3 2 5 0 0 4 4 5 0 0 5 6 6 3 4 6 11 7 2 5 7 18 0 1 6 a:0 b:10 c:110 d:111 00000001010101010110110111111111111 aaaaaaabbbbbccdddd a:2 b:1 c:3 1 2 4 0 0 2 1 4 0 0 3 3 5 0 0 4 3 5 2 1 5 6 0 3 4 a:11 b:10 c:0 111110000 aabccc
問題:一開始不明白最后狀態是什么意思,然后就是不知道怎么得出a、b、c用0、1表示
這表示HuffmanCode是一個char**類型的代名詞。
char*可以理解為指向一個字符串第一個字的指針。
char**可以理解為字符串數組,
char **a = new char* [10];
for (int i = 0; i < 10; i++) a[i] = new char [30];
這就創建了一個a,a[n]代表第n+1個字符串,a[n][m]表示第n+1個字符串的第m+1個字符。
#include<iostream> #include<cstring> #include<stdio.h> #include<string> #define MAX 100 using namespace std; int coun[26]; //頻率 char saveletter[26];//存字母 char temp[MAX];//暫存被譯碼串 typedef struct htnode { int weight; int lchild, rchild, parent; char data; int frequency;//出現頻率 }*huftree; typedef char **hufcode; void select(huftree &hf, int x, int &s1, int &s2)//在葉子結點里找最小的兩個 { int min = 999, cmin = 999;//最小值和次小值 int i = 1; while (i <= x) { if (hf[i].parent == 0) { if (hf[i].weight < min)//尋找權值最小 { min = hf[i].weight; s1 = i; } i++; } else i++; } int flag = s1; i = 1; while (i <= x) { if (hf[i].parent == 0) { if ((hf[i].weight > min && hf[i].weight < cmin) || (hf[i].weight == min && flag != i))//找次小值 { cmin = hf[i].weight; s2 = i; } i++; } else i++; } } void Create(huftree &hf, int n)//葉子為n的哈樹有2n-1個結點 { int m = 2 * n - 1, s1 = 0, s2 = 0; if (n <= 1) return; hf = new htnode[m + 1];//0號單元不用 for (int i = 1; i <= m; i++)//都初始化為0 { hf[i].parent = 0; hf[i].rchild = 0; hf[i].lchild = 0; hf[i].data = saveletter[i - 1];//字母 } for (int i = 1; i <= n; i++) hf[i].weight = coun[i - 1];//輸入權值 for (int i = n + 1; i <= m; i++)//前n個為葉子,后面需要構建 { select(hf, i - 1, s1, s2);//選擇最小的兩個節點,返回序號 hf[s1].parent = i; hf[s2].parent = i;//結點雙親變為i hf[i].lchild = s1; hf[i].rchild = s2;//i的左右孩子 hf[i].weight = hf[s1].weight + hf[s2].weight; //i權值更改 } } void Show(huftree &hf, int x) { for (int i = 1; i <= 2 * x - 1; i++) { cout << i << " "; cout << hf[i].weight << " " << hf[i].parent << " " << hf[i].lchild << " " << hf[i].rchild << endl; } } void count(char str[], huftree &hf, int &n)//出現頻率 ,字母個數 { int num[26]; char ch; int i = 0, j = 0; memset(num, 0, sizeof(num)); while (str[i] != '\0') { j = str[i] - 97; num[j]++; i++; } j = 0; for (i = 0; i < 26; i++) { if (num[i] != 0) { saveletter[j] = char(i + 97); coun[j] = num[i]; j++; } } n = j; for (int i = 0; i < n; i++) { if (i == n - 1) cout << saveletter[i] << ":" << coun[i]; else cout << saveletter[i] << ":" << coun[i] << " "; } cout << endl; } void hfcode(huftree &hf, hufcode &hc, int n) { char *cd; int start = 0, c, f; hc = new char*[n + 1];//編碼表 cd = new char[n];//每個字符的編碼一定小於n cd[n - 1] = '\0'; for (int i = 1; i <= n; i++) { start = n - 1; c = i; f = hf[i].parent; while (f != 0)//不是根節點 { start--; if (hf[f].lchild == c) cd[start] = '0'; else cd[start] = '1'; c = f;//向上回溯 f = hf[f].parent; } hc[i] = new char[n - start]; strcpy(hc[i], &cd[start]);//把臨時空間的編碼復制到編碼表中 } delete cd; int i, j, z = 0; for (j = 1; j <= n; j++)//輸出字母編碼 { if (j == n) cout << saveletter[j - 1] << ":" << hc[j]; else cout << saveletter[j - 1] << ":" << hc[j] << " "; } cout << endl; } void transtonum(huftree &hf, hufcode &hc, int n, char str[]) { for (int i = 0; str[i] != '\0'; i++) for (int j = 1; j <= n; j++) if (str[i] == saveletter[j - 1]) { cout << hc[j]; strcat(temp, hc[j]); } cout << endl; } void transtoletter(huftree &hf, hufcode &hc, int n) { int i = 2 * n - 1; int j = 0; while (temp[j] != '\0') { if (temp[j] == '0') i = hf[i].lchild; //左孩子 else if (temp[j] == '1') i = hf[i].rchild; //右孩子 if (hf[i].lchild == 0) { cout << hf[i].data; i = 2 * n - 1; } j++; //無論是否找到葉子節點都讀取下一個編碼串字符 } cout << endl; } int main() { while (1) { huftree hf; hufcode hc; int n; char str[MAX]; scanf("%s", &str); if (str[0] == '0') break; count(str, hf, n); Create(hf, n); Show(hf, n); hfcode(hf, hc, n); transtonum(hf, hc, n, str); transtoletter(hf, hc, n); memset(coun, 0, sizeof(coun)); memset(saveletter, '\0', sizeof(saveletter)); memset(temp, '\0', sizeof(temp)); delete hf; delete hc; } return 0; }