本實驗構建最優二叉樹來實現哈夫曼編碼
使用VS2017完成
關於哈夫曼編碼的頭文件huffman.h
//huffman.h #ifndef HUFFMAN_H #define HUFFMAN_H #define OK 1 #define SIZE 256 struct HTNode { int weight;//權值 int parent;//父節點 int lchild;//左孩子 int rchild;//右孩子 }; typedef HTNode *HuffmanTree;//動態分配數組存儲Huffman樹 typedef char **HuffmanCode;//動態分配哈夫曼編碼表 //void PreorderTraverse(int root, HuffmanTree pHT); int HuffmanCoding(HuffmanCode &pHC, HuffmanTree &pHT); int Select(HuffmanTree pHT, int nSize); void TestHufTree(HuffmanTree pHT); void TestHufCode(int root, HuffmanTree pHT, HuffmanCode pHC); void TestHufTreeN(int root, HuffmanTree pHT); int HfmTree(HuffmanTree &pHT, int *w, int n); #endif
相關實現huffman.cpp
//Huffman.cpp #include<iostream> #include<cstring> #include"huffman.h" #pragma warning( disable : 4996) using namespace std; /* void PreorderTraverse(int root, HuffmanTree pHT) { cout << pHT[root].weight << " ";//訪問節點 if (pHT[root].lchild)//左孩子 { PreorderTraverse(pHT[root].lchild, pHT); } if (pHT[root].rchild)//右孩子 { PreorderTraverse(pHT[root].rchild, pHT); } } */ int HuffmanCoding(HuffmanCode &pHC, HuffmanTree &pHT) { // pHC = (HuffmanCode)malloc((SIZE + 1) * sizeof(char*)); //無棧非遞歸遍歷 char cd[SIZE] = { '\0' };//記錄訪問路徑 int cdlen = 0;//記錄當前路徑長度 for (int i = 1; i < 512; i++) { pHT[i].weight = 0;//遍歷 Huffman樹時用作節點的狀態標志 } int p = 2*SIZE-1;//根節點 while (p != 0) { if (pHT[p].weight == 0)//向左 { pHT[p].weight = 1; if (pHT[p].lchild != 0) { p = pHT[p].lchild; cd[cdlen++] = '0'; } else if (pHT[p].rchild == 0)//登記葉子節點的字符編碼 { pHC[p] = (char*)malloc((cdlen+1) * sizeof(char)); cd[cdlen] = '\0'; strcpy(pHC[p], cd);//復制編碼 } } else if (pHT[p].weight == 1)//向右 { pHT[p].weight = 2; if (pHT[p].rchild != 0)//右孩子為葉子節點 { p = pHT[p].rchild; cd[cdlen++] = '1'; } } else { //退回父節點,編碼長度減1 pHT[p].weight = 0; p = pHT[p].parent; --cdlen; } // printf("*"); } return OK; } int Select(HuffmanTree pHT, int nSize) { int minValue = 0x7FFFFFFF;//最小值 int min = 0; //找到最小權值的元素序號 for (int i = 1; i <= nSize; i++) { if (pHT[i].parent == 0 && pHT[i].weight < minValue) { minValue = pHT[i].weight; min = i; } } return min; } void TestHufTree(HuffmanTree pHT) { for (int i = 1; i < 2*SIZE; i++) { printf("pHT[%d]\t%d\t%d\t%d\t%d\n", i, pHT[i].weight, pHT[i].parent,pHT[i].lchild,pHT[i].rchild); } } int HfmTree(HuffmanTree &pHT, int *w, int n) { int m = 2 * n - 1; pHT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); if (!pHT) { cerr << "內存分配失敗! " << endl; return -1; } //初始化樹 HuffmanTree p = pHT + 1;//0號單元不使用 for (int i = 0; i < m; i++) { p->weight = (i < n) ? w[i] : 0; p->parent = 0; p->lchild = 0; p->rchild = 0; p++; } for (int i = n + 1; i <= m; i++) { //第一個最小元素 int s1 = Select(pHT, i - 1);//找出前i-1個中最小元素 pHT[s1].parent = i; //第二個最小元素 int s2 = Select(pHT, i - 1); pHT[s2].parent = i; pHT[i].weight = pHT[s1].weight + pHT[s2].weight; pHT[i].lchild = s1; pHT[i].rchild = s2; } return 0; } void TestHufCode(int root, HuffmanTree pHT, HuffmanCode pHC) { if (pHT[root].lchild == 0 && pHT[root].rchild == 0) { printf("0x%02X %s\n", root - 1, pHC[root]); } if (pHT[root].lchild)//訪問左孩子 { TestHufCode(pHT[root].lchild, pHT, pHC); } if (pHT[root].rchild) { TestHufCode(pHT[root].rchild, pHT, pHC); } } void TestHufTreeN(int root, HuffmanTree pHT) { cout << pHT[root].weight << "\t"<<pHT[root].lchild<<"\t"<<pHT[root].rchild<<"\t"<<pHT[root].parent<<"\n"; if (pHT[root].lchild != 0) { TestHufTreeN(pHT[root].lchild, pHT); } if (pHT[root].rchild != 0) { TestHufTreeN(pHT[root].rchild, pHT); } }
壓縮相關操作的頭文件Compress.h
//Compress.h #ifndef COMPRESS_H #define COMPRESS_H int Compress(const char *pFilename); char Str2byte(const char *pBinStr); int Encode(const char*pFilename, const HuffmanCode pHC, char *pBuffer, const int nSize); struct HEAD { char type[4];//文件類型 int length;//原文件長度 int weight[256];//權值數值 }; int WriteFile(const char*pFilename, const HEAD sHead, const char * pBuffer, const int nSize); int InitHead(const char *pFilename, HEAD &sHead); #endif
具體實現Compress.cpp
1 //Compress.cpp 2 3 #include"huffman.h" 4 #include"Compress.h" 5 #include<iostream> 6 #pragma warning( disable : 4996) 7 using namespace std; 8 //Compress 9 //InitHead 10 //Encode 11 //Str2byte 12 //WriteFile 13 char Str2byte(const char *pBinStr) 14 { 15 char b = 0x00; 16 for (int i = 0; i < 8; i++) 17 { 18 b = b << 1; 19 if (pBinStr[i] == '1') 20 { 21 b = b | 0x01; 22 } 23 } 24 return b; 25 } 26 27 int Compress(const char *pFilename) 28 { 29 int weight[256] = { 0 }; 30 //以二進制打開文件 31 FILE* in = fopen(pFilename, "rb"); 32 if (in == NULL) 33 { 34 cout << "Failed to open the file!" << endl; 35 exit(0); 36 } 37 cout << "成功打開文件 " << pFilename << endl; 38 int ch; 39 while ((ch = getc(in)) != EOF) 40 { 41 weight[ch]++; 42 } 43 fclose(in); 44 //cout << "Byte Weight" << endl; 45 //for (int i = 0; i < SIZE; i++) 46 //{ 47 // printf("0x%02X %d\n", i, weight[i]); 48 //} 49 50 HuffmanTree hfmt; 51 HfmTree(hfmt, weight, SIZE); 52 cout << "成功生成哈夫曼樹" << endl; 53 // TestHufTree(hfmt); 54 // TestHufTreeN(511, hfmt); 55 HuffmanCode hfmc=(HuffmanCode)malloc((SIZE+1)*sizeof(char*)); 56 // for (int i = 1; i <= SIZE; i++) 57 // hfmt[i].weight = weight[i - 1] 58 //根據哈夫曼樹進行編碼 59 HuffmanCoding(hfmc, hfmt); 60 cout << "成功完成哈夫曼編碼" << endl; 61 // cout << "先序遍歷哈夫曼樹輸出編碼信息:" << endl; 62 // TestHufCode(2 * SIZE - 1, hfmt, hfmc);//測試哈夫曼編碼 63 // cout << "壓縮后的文件編碼:" << endl; 64 65 //計算編碼緩沖區大小 66 int nSize = 0; 67 for (int i = 0; i < 256; i++) 68 { 69 nSize += weight[i] * strlen(hfmc[i+1]); 70 } 71 nSize = (nSize % 8) ? nSize / 8 + 1 : nSize / 8; 72 73 // cout <<"nSize = "<<nSize << endl << endl; 74 75 //對原文件進行壓縮編碼 76 char* pBuffer = NULL; 77 pBuffer = (char *)malloc(nSize*sizeof(char)); 78 memset(pBuffer, 0, (nSize) * sizeof(char)); 79 // cout << "begin: " << strlen(pBuffer) << endl; 80 //// cout << "----"; 81 // int n; 82 // cout << "input n:"; 83 // cin >> n; 84 //將編碼寫入緩沖區 85 Encode(pFilename, hfmc, pBuffer, nSize); 86 // cout << "after: " << strlen(pBuffer) << endl; 87 // cout << "len of puf = " << strlen(pBuffer) << endl; 88 // cout << "!pBuffer = " << !pBuffer << endl; 89 if (!pBuffer) 90 { 91 cout << "!pBuffer = " << !pBuffer << endl; 92 return -1; 93 } 94 cout << "\n壓縮完畢" << endl; 95 //for (int i = 1; i < strlen(pBuffer); i++) 96 //{ 97 // printf("%d", pBuffer[i]); 98 //} 99 100 HEAD sHead; 101 InitHead(pFilename, sHead); 102 cout <<"原文件"<< pFilename<<"大小為:" << sHead.length << "Byte" << endl; 103 int len_after = WriteFile(pFilename, sHead, pBuffer, nSize); 104 cout << "大小為:" << len_after << "Byte \n頭文件sHead大小為:" << sizeof(sHead)<<"Byte"<<endl; 105 cout << "壓縮比率:" << (double)len_after * 100 / sHead.length << "%" << endl; 106 free(hfmt); 107 free(hfmc); 108 free(pBuffer); 109 return OK; 110 } 111 112 113 int Encode(const char*pFilename, const HuffmanCode pHC, char *pBuffer, const int nSize) 114 { 115 //開辟緩沖區 116 // cout << "+++++"; 117 FILE* in = fopen(pFilename, "rb"); 118 if (in == NULL) 119 { 120 cout << "Failed to open the file!" << endl; 121 exit(0); 122 } 123 pBuffer = (char*)malloc(nSize * sizeof(char)); 124 if (!pBuffer) 125 { 126 cerr << "開辟緩沖區失敗" << endl; 127 return -1; 128 } 129 cout << "loading"; 130 int sign = 0;//用於控制小數點輸出 131 char cd[SIZE] = { 0 };//工作區 132 int pos = 0;//緩沖區指針 133 int ch; 134 //掃描文件,根據huffmman編碼表對其進行壓縮,壓縮結果暫存到緩沖區中 135 while ((ch = getc(in)) != EOF) 136 { 137 if (sign % 1000 == 1) 138 printf("."); 139 sign++; 140 strcat(cd, pHC[ch+1]);//從HC復制編碼串到cd 141 142 143 //打印壓縮后的文件編碼 144 // printf("%s", pHC[ch + 1]); 145 146 147 //壓縮編碼 148 while (strlen(cd) >= 8) 149 { 150 //截取字符串左邊的8個字符,編碼成字節 151 pBuffer[pos++] = Str2byte(cd); 152 //字符串整體左移8個字節 153 for (int i = 0; i < SIZE - 8; i++) 154 { 155 cd[i] = cd[i + 8]; 156 } 157 } 158 } 159 if (strlen(cd) > 0) 160 { 161 pBuffer[pos++] = Str2byte(cd); 162 } 163 fclose(in); 164 //for (int i = 1; i < nSize; i++) 165 //{ 166 // printf("%d ", pBuffer[i]); 167 //} 168 // cout << endl<<"before: " << strlen(pBuffer) << endl; 169 return OK; 170 } 171 172 int InitHead(const char *pFilename, HEAD &sHead) 173 { 174 //初始化文件頭 175 strcpy(sHead.type, "HUF");//文件類型 176 sHead.length = 0;//原文件長度 177 for (int i = 0; i < SIZE; i++) 178 { 179 sHead.weight[i] = 0; 180 } 181 FILE *in = fopen(pFilename, "rb"); 182 int ch; 183 while ((ch = fgetc(in)) != EOF) 184 { 185 sHead.weight[ch]++; 186 sHead.length++; 187 } 188 fclose(in); 189 in = NULL; 190 return OK; 191 } 192 193 int WriteFile(const char*pFilename, const HEAD sHead, const char * pBuffer, const int nSize) 194 { 195 //生成文件名 196 char filename[256] = { 0 }; 197 strcpy(filename, pFilename); 198 strcat(filename, ".huf"); 199 //以二進制流形式打開文件 200 FILE *out = fopen(filename, "wb"); 201 //寫文件頭 202 fwrite(&sHead, sizeof(char), 1, out); 203 //寫壓縮后的編碼 204 fwrite(pBuffer, sizeof(char), nSize, out); 205 //關閉文件,釋放文件指針 206 fclose(out); 207 out = NULL; 208 cout << "生成壓縮文件:" << filename << endl; 209 int len = sizeof(HEAD) + strlen(pFilename) + 1 + nSize; 210 return len; 211 }
主函數Main.cpp
//Main.cpp #include"huffman.h" #include"Compress.h" #include<iostream> #include<cstdlib> using namespace std; #pragma warning( disable : 4996) int main() { cout << "= = = = = = = =Huffman 文件壓縮= = = = = = = =" << endl; cout << "請輸入文件名:"; char filename[256]; cin>>filename; Compress(filename); // system("pause"); return 0; }