數據結構（四）樹---哈夫曼樹了解以及代碼實現

本文轉載自查看原文 2018-08-14 09:43 2302 數據結構

哈夫曼樹

（一）定義

帶權路徑長度WPL：

哈夫曼樹（最優二叉樹）：

WPL最小的二叉樹

（二）構造

將權值從小到大排序，后將權值最小的兩個並在一起成新的二叉樹

A5,E10,B15,D30,C40

（三）哈夫曼樹特點

1.沒有度為1的結點

2.n個葉子節點的哈夫曼樹共有2n-1個結點

樹的特點：度為2結點和葉結點的關系n2=n0-1
所以：當葉結點為n時，度為二的結點數為n-1
因為哈夫曼沒有度為一的結點，所以一共在樹中有2n-1個結點

3.哈夫曼樹任意非葉結點的左右子樹交換后還是哈夫曼樹

4.對同一組權值{w1,w2,...,wn},是會存在不同結構的哈夫曼樹

哈夫曼編碼

固定一段字符串，如何對字符串進行編碼，可以使得該字符串的編碼存儲空間最少

例如一串文字BADCADFEED,我們要在網絡中傳遞，顯然是要傳遞二進制(0/1)來表示。

法一：直接傳遞字符的ASCII碼，每個字符占八位，一共傳遞80位

法二：我們發現數據只是從A-F,一共6個字符，我們完全可以使用3位二進制來表示這些數據（網絡對方需要知道我們的編碼才能解碼）

001000011010000011101100100011(30)

變為傳遞30位數據，對法一進行了極大的優化。

法三：我們發現一段文字中各個數字出現的頻率是不一樣的，各個字母頻率相加100%，可以使用哈夫曼編碼，對數據再次進行壓縮

假設各個字母頻率為
A 27,B 8,C 15,D 15,E 30,F 5

1.先構造哈夫曼樹

2.獲取前綴碼

（1）左右分支分別用0，1表示（避免了二義性） （2）字符只在葉子節點

哈夫曼壓縮后的數據二進制串:1001010010101001000111100(25)

哈夫曼編碼實現

頭文件

#pragma once
#ifndef _HUFFMAN_H
#define _HUFFMAN_H

//下面兩個結構體對於霍夫曼樹
typedef struct _htNode
{
    char symbol;
    struct _htNode* left, *right;
}htNode;

typedef struct _htTree
{
    htNode* root;
}htTree;

//下面兩個結構體對應霍夫曼編碼表
typedef struct _hlNode
{
    char symbol;
    char* code;        //類似'0001\0'
    struct _hlNode* next;
}hlNode;

typedef struct _hlTable
{
    hlNode *first;
    hlNode *last;
}hlTable;

//根據字符串創建霍夫曼樹
htTree* buildTree(char* str);
//根據霍夫曼樹創建霍夫曼前綴碼表
hlTable* buildTable(htTree* HT);
//根據字符串進行編碼，str是一串ASCII碼字符串
void encode(hlTable* ht, char *str);
//根據霍夫曼樹，進行解碼,str類似於'00001101'
void decode(htTree* ht,char *str);

#endif // !_HUFFMAN_H

huffman.h

#pragma once
#ifndef _QUEUE_H
#define _QUEUE_H

#include "huffman.h"

#define TYPE htNode *

#define MAX_SZ 256

typedef struct _pQueueNode
{
    TYPE val;
    unsigned int priority;
    struct _pQueueNode* next;
}pQueueNode;

typedef struct _pQueue
{
    pQueueNode* first;
    unsigned int size;    //無符號擴大空間
}pQueue;

void initPQueue(pQueue** queue);    //初始化隊列
void addPQueue(pQueue** queue,TYPE val,unsigned int priority);    //添加數據
TYPE getPQueue(pQueue** queue);    //獲取數據

#endif

queue.h

源文件

#include "queue.h"
#include <stdio.h>
#include <stdlib.h>

//初始化隊列
void initPQueue(pQueue** queue)
{
    *queue = (pQueue*)malloc(sizeof(pQueue));
    (*queue)->size = 0;
    (*queue)->first = NULL;    //初始化頭指針
}

//添加數據,這里不是簡單添加到隊列隊尾，而是按照優先級
void addPQueue(pQueue** queue, TYPE val, unsigned int priority)
{
    pQueueNode *aux,*cur;    //aux是新加入結點，cur是優先級判斷的游標
    //滿隊列
    if ((*queue)->size == MAX_SZ)
    {
        printf("\nQueue is full\n");
        return;
    }
    //創建該新的結點
    aux = (pQueueNode*)malloc(sizeof(pQueueNode));
    aux->priority = priority;
    aux->val = val;
    
    //若是空隊列，直接加入
    if ((*queue)->size == 0 || !(*queue)->first)
    {
        aux->next = NULL;
        (*queue)->first = aux;
        (*queue)->size++;
        return;
    }
    else
    {
        //進行循環判斷優先級,優先級低的在前面，根據霍夫曼編碼，每次或取出兩個最小的進行合並
        //首先判斷，優先級小於首結點
        if (priority<=(*queue)->first->priority)
        {
            aux->next=(*queue)->first;
            (*queue)->first = aux;
            (*queue)->size++;
            return;
        }
        else
        {
            cur = (*queue)->first;    
            while (cur->next)    //上面的if判斷過首結點，我們這里只需要判斷他下面結點即可
            {
                if (priority <= cur->next->priority)    
                {
                    aux->next = cur->next;
                    cur->next = aux;
                    (*queue)->size++;
                    return;
                }
                cur = cur->next;
            }
            //直到走到末尾，發現全部優先級都比他低，所以加入結尾
            if (cur->next == NULL)
            {
                aux->next = NULL;
                cur->next = aux;
                (*queue)->size++;
                return;
            }
        }
    }
}

//獲取數據
TYPE getPQueue(pQueue** queue)
{
    TYPE returnVal;
    if ((*queue)->size == 0 || (*queue)->first == NULL)
    {
        printf("queue is empty\n");
        return;
    }
    
    returnVal = (*queue)->first->val;
    (*queue)->first = (*queue)->first->next;
    (*queue)->size--;

    return returnVal;
}

queue.c

#include "huffman.h"
#include "queue.h"
#include <stdio.h>
#include <stdlib.h>

//根據字符串創建霍夫曼樹
htTree* buildTree(char* str)
{
    //先創建一個字符統計數組
    int proprity[256] = { 0 };
    for (int j = 0; j < strlen(str);j++)
    {
        proprity[(unsigned char)str[j]]++;
    }

    //創建一個隊列，利用隊列來創建一個完整的霍夫曼樹
    pQueue *queue;
    initPQueue(&queue);

    for (int k = 0; k < 256;k++)
    {
        if (proprity[k]!=0)
        {
            htNode* hn = (htNode*)malloc(sizeof(htNode));
            hn->left = NULL;
            hn->right = NULL;
            hn->symbol = (char)k;
            addPQueue(&queue, hn, proprity[k]);
        }
    }

    //將所有隊列中的數據開始合並
    while (queue->size != 1)
    {
        htNode* left, *right,*tnode;
        //優先級必須從這里獲取
        int proprity = queue->first->priority;
        proprity += queue->first->next->priority;
        //下面返回的是霍夫曼結點，其中不含有優先級
        left = getPQueue(&queue);
        right = getPQueue(&queue);
        tnode = (htNode*)malloc(sizeof(htNode));
        tnode->left = left;
        tnode->right = right;
        
        addPQueue(&queue, tnode, proprity);
    }

    //隊列中最后一個元素就是霍夫曼樹的根節點，我們將它賦值給霍夫曼樹即可
    htTree *ht = (htTree*)malloc(sizeof(htTree));
    ht->root = getPQueue(&queue);
    return ht;
}

//我們通過遍歷到二叉樹葉子節點，從而獲取到前綴碼
void preOrderGetTb(htNode* root,hlTable **table,int level, char* code)
{
    if (root->left||root->right)
    {
        if (root->left)
        {
            code[level] = '0';
            preOrderGetTb(root->left, table,level+1,code);
        }

        if (root->right)
        {
            code[level] = '1';
            preOrderGetTb(root->right, table, level + 1, code);
        }
    }
    else
    {
        code[level] = '\0';
        hlNode* aux = (hlNode*)malloc(sizeof(hlNode));
        aux->symbol = root->symbol;
        aux->code = (char*)malloc(sizeof(char)*(level + 1));
        strcpy(aux->code, code);
        aux->next = NULL;
        if ((*table)->first==NULL)
        {
            (*table)->first = aux;
            (*table)->last = aux;
        }
        else
        {
            (*table)->last->next = aux;
            (*table)->last = aux;
        }
    }
}


//根據霍夫曼樹創建霍夫曼前綴碼表
hlTable* buildTable(htTree* HT)
{
    hlTable* hl;
    hl = (hlTable*)malloc(sizeof(hlTable));
    hl->first = NULL;
    hl->last = NULL;

    int k = 0;
    char code[255] = { 0 };

    preOrderGetTb(HT->root, &hl, k, code);
    return hl;
}

//根據字符串進行編碼，str是一串ASCII碼字符串
void encode(hlTable* table, char *str)
{
    hlNode* cur = table->first;
    char *s = str;
    while (*s!='\0')
    {
        while (cur->symbol!=*s)
            cur = cur->next;
        printf("%s", cur->code);
        s++;
        cur = table->first;
    }
}

//根據霍夫曼樹，進行解碼,str類似於'00001101'
void decode(htTree* ht, char *str)
{
    char* s = str;
    htNode* tn = ht->root;
    while (*s!='\0')
    {
        if (*s=='0')
        {
            tn = tn->left;
            if (!tn->left&&!tn->right)
            {
                printf("%c", tn->symbol);
                tn = ht->root;
            }
        }
        else
        {
            tn = tn->right;
            if (!tn->left&&!tn->right)
            {
                printf("%c", tn->symbol);
                tn = ht->root;
            }
        }
        s++;
    }
}

huffman.c

#include "huffman.h"
#include "queue.h"
#include <stdio.h>
#include <stdlib.h>


int main()
{
    htTree *ht;
    hlTable *hl;
    ht = buildTree("i love www.fishc.com");//創建霍夫曼樹
    hl = buildTable(ht);    //根據霍夫曼樹創建前綴碼表
    encode(hl, "i love www.fishc.com");    //根據前綴碼表獲取全部前綴碼
    decode(ht, "000001010010111110");//oimfw
    system("pause");
    return 0;
}

int main()
{
    htTree *ht;
    hlTable *hl;
    ht = buildTree("aaaauxxz");//創建霍夫曼樹
    hl = buildTable(ht);    //根據霍夫曼樹創建前綴碼表
    encode(hl, "aaaauxxz");    //根據前綴碼表獲取全部前綴碼
    decode(ht, "1101001000000");//aaxuzz

    system("pause");
    return 0;
}

補充：

1.編寫代碼前先實現隊列的操作

2.隊列實現是優先級隊列，優先級低的放在前面

3.隊列中存放的是哈夫曼樹節點，我們每次從隊列中獲取兩個優先級最低的，進行合並，優先級為二者之和，然后又放回隊列中。

4.直到我們隊列中只有一個結點，這就是我們的根節點，結點下面是帶有我們所有數據的哈夫曼樹

5.獲取哈夫曼樹后，我們根據遞歸一路找到葉子結點（字符），將路徑轉前綴碼，根據結點字符和路徑前綴碼，創建前綴碼表

6.根據前綴碼表，我們可以獲取到各個字符的前綴碼，然后進行編碼即可

7.我們同樣可以根據前綴碼串，通過對哈夫曼樹的遍歷，找到前綴碼對應的字符

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 數據結構之哈夫曼樹數據結構之哈夫曼樹數據結構之哈夫曼樹數據結構與算法：哈夫曼樹【數據結構】哈夫曼樹重學數據結構之哈夫曼樹數據結構—哈夫曼樹（Java）數據結構--哈夫曼樹數據結構：哈夫曼樹與哈夫曼編碼數據結構——哈夫曼(Huffman)樹+哈夫曼編碼