Huffman編碼和解碼(C++)


關於哈夫曼樹的講解,已有珠玉在前,我就不贅述了。

 

 

 基本原理:統計字符串內的字符出現頻率,由此建立哈夫曼樹,頻率高的離根結點越近,原則上左子樹頻率小於右子樹。從根節點一路訪問到葉子結點,路徑權重即為結點字符的編碼,且獨一無二。解碼過程就是從根節點遍歷huffman樹的過程。

編程實踐:實現對純英文字符串和文件的哈夫曼編碼和解碼。

代碼如下:

#include <iostream>
#include <stdlib.h>
#include <string>
#include <map>
#include <fstream>
#include<math.h>
using namespace std;
map<char,string> huffcode;       //用來存儲字符編碼

struct Node   //結點數據結構
{
    double weight;
    char ch;
    string code;
    int lchild, rchild, parent;
};

void Select(Node huffTree[], int *a, int *b, int n)   //找權值最小的兩個a和b
{
    int i;
    double weight = 0; //找最小的數
    for (i = 0; i <n; i++)
    {
        if (huffTree[i].parent != -1)     //判斷節點是否已經選過
            continue;
        else
        {
            if (weight == 0)
            {
                weight = huffTree[i].weight;
                *a = i;
            }
            else
            {
                if (huffTree[i].weight < weight)
                {
                    weight = huffTree[i].weight;
                    *a = i;
                }
            }
        }
    }
    weight = 0; //找第二小的數
    for (i = 0; i < n; i++)
    {
        if (huffTree[i].parent != -1 || (i == *a))//排除已選過的數
            continue;
        else
        {
            if (weight == 0)
            {
                weight = huffTree[i].weight;
                *b = i;
            }
            else
            {
                if (huffTree[i].weight  < weight)
                {
                    weight = huffTree[i].weight;
                    *b = i;
                }
            }
        }
    }
    int temp;
    if (huffTree[*a].lchild < huffTree[*b].lchild)  //小的數放左邊
    {
        temp = *a;
        *a = *b;
        *b = temp;
    }
}

void Huff_Tree(Node huffTree[], int w[], char ch[], int n)
{
    for (int i = 0; i < 2 * n - 1; i++) //初始過程
    {
        huffTree[i].parent = -1;
        huffTree[i].lchild = -1;
        huffTree[i].rchild = -1;
        huffTree[i].code = "";   //初始化
    }
    for (int i = 0; i < n; i++)   //前n個節點為葉子結點
    {
        huffTree[i].weight = w[i];
        huffTree[i].ch = ch[i];
    }
    for (int k = n; k < 2 * n - 1; k++)
    {
        int i1 = 0;
        int i2 = 0;
        Select(huffTree, &i1, &i2, k); //將i1,i2節點合成節點k
        huffTree[i1].parent = k;
        huffTree[i2].parent = k;
        huffTree[k].weight = huffTree[i1].weight + huffTree[i2].weight;
        huffTree[k].lchild = i1;
        huffTree[k].rchild = i2;
    }
}

void Huff_Code(Node huffTree[], int n)
{
    int i, j, k;
    string s;
    for (i = 0; i < n; i++)
    {
        s = "";
        j = i;
        while (huffTree[j].parent != -1) //從葉子往上找到根節點
        {
            k = huffTree[j].parent;
            if (j == huffTree[k].lchild) //如果是根的左孩子,則記為0
                s = s + "0";
            else
                s = s + "1";
            j = huffTree[j].parent;
        }
        cout << "字符 " << huffTree[i].ch << " 的編碼:";
        for (int l = s.size() - 1; l >= 0; l--)   //反向回溯
        {
            cout << s[l];
            huffTree[i].code += s[l]; //保存編碼
        }
        huffcode[huffTree[i].ch] = huffTree[i].code;
        cout << endl;
    }
}

string Huff_Decode(Node huffTree[], int n,string s)
{
    cout << "解碼后為:";
    string temp = "",str="";//保存解碼后的字符串
    for (int i = 0; i < s.size(); i++)
    {
        temp = temp + s[i];
        for (int j = 0; j < n; j++)
        {
            if (temp == huffTree[j].code)
            {
                str=str+ huffTree[j].ch;
                temp = "";
                break;
            }
            else if (i == s.size()-1 && j==n-1 && temp!="")//全部遍歷后沒有
                str= "解碼錯誤!";
        }
    }
    return str;
}

int main(){
    cout << "編碼字符串或文件(1/2):";
    int cho,n;   //n是編碼個數
    cin >> cho;
    string s,res;
    if(cho == 1)
    {
        cout << "輸入字符串:" <<endl;
        cin >> s;
    }
    else if(cho == 2)
    {
        //cout << "輸入文件路徑:" <<endl;
        //cin >> s;
        ifstream infile;
        infile.open("Huffman.txt");
        infile >> s;
        infile.close();
        cout << s << endl;
    }
    else
        cout << "輸入錯誤!" <<endl;

    if(cho == 1 || cho ==2)
    {
        string res = "";
        int i;
        map<char,int> mp;
        for(i=0;i<s.length();i++){
            if(mp.count(s[i]) == 0)
                mp[s[i]] = 1;
            else
                mp[s[i]] += 1;
        }
        map<char,int>::iterator iter;
        iter = mp.begin();   //聲明迭代器

        n=mp.size(),i=0;
        Node huffTree[2*n-1];    //所有節點數
        char ch[n];
        int w[n];
        while(iter != mp.end())
        {
            ch[i] = iter->first;
            w[i] = iter->second;
            iter++;
            i++;
        }
        mp.clear();

        Huff_Tree(huffTree, w, ch, n);
        Huff_Code(huffTree, n);
        for(i=0;i<s.length();i++)
            res += huffcode[s[i]];

        if(cho == 1)
            cout << "字符編碼:" << endl << res << endl;
        else{
            ofstream outfile;
            outfile.open("Huffman.txt");
            outfile << res;
            outfile.close();
            cout << "文件編碼完成!" <<endl;
        }

        i = 0;
        while(pow(2,i) < n)
            i++;
        cout << "壓縮率:" << (s.length()*i-res.length())*100/(s.length()*i) << "%" <<endl;

        //解碼過程
        cout << "輸入合法的編碼:" <<endl;
        cin >> s;
        cout << Huff_Decode(huffTree, n, s)<< endl;
        huffcode.clear();
    }

    return 0;
}

  

用map容器統計字符頻率,之后放入數組排序,建立結構體數組當作哈夫曼樹。

運行截圖:

 

 

 

 

 

文件編碼結果:

 

 目前存在的問題:文件讀取不到空格。

 

 

 可完善的地方:結構體數組可以用int數組代替,但變化過程略復雜;map如果用允許自定義排序的話就不用另開一個數組。

---end---


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM