逐步實現hash算法（基於BKDRhash函數）

本文轉載自查看原文 2014-09-11 17:20 5425 經典算法/ C/ hash

哈希(Hash)算法,即散列函數。它是一種單向密碼體制,即它是一個從明文到密文的不可逆的映射,只有加密過程,沒有解密過程。同時,哈希函數可以將任意長度的輸入經過變化以后得到固定長度的輸出。hash算法一般用於快速查找和加密。

hash算法可以使用的哈希函數種類很多，處理沖突的方法也有開放定址、再哈希、鏈地址、公共溢出區等。

因此，在編寫代碼之前，首先需要根據所要處理的數據，選擇合適的hash函數和沖突處理辦法。開放定址需要空閑存儲單元，所需要的表比實際容量大，而且容易產生二次聚集發生新沖突。鏈地址使用鏈表存儲關鍵字，可以隨時插入新數據，數據量大小不受限制。缺點是要用到指針，給新單元分配地址需要時間，會一定程度上減慢算法速度，但影響不大可以忽略。

筆者需要處理的是一個10W行字符串的字典，關鍵字重復率高。因此選擇適用於字符串的哈希函數，常用字符串哈希函數有 BKDRHash,APHash,DJBHash,JSHash,RSHash,SDBMHash,PJWHash,ELFHash等，個人傾向於BKDRHash，記憶和使用都很簡便。

BKDRHash函數代碼如下：

 1 unsigned int BKDRhash(TYPE key)  2 {//BKDRhash函數
 3     unsigned int seed = 131;  4     unsigned int hash = 0;  5 
 6     while(*key != '\n' && *key != 0)      //通常使用時，判別條件為*key != 0即可，此處的*key != '\n'是因筆者程序需要
 7         hash = hash * seed + (*key++);  8 
 9     return hash % DICLEN; 10 }

對於關鍵字重復的沖突處理方法，筆者這里使用鏈地址法。hash表結構體如下：

 1 #define STRLEN 15
 2 #define DICLEN 100000
 3 
 4 typedef char* TYPE;  5 typedef int BOOL;  6 
 7 typedef struct _NODE{  8  TYPE data;  9     struct _NODE* next; 10 }NODE; 11 
12 typedef struct _HASH_TABLE{ 13     NODE* phead;           //此變量可以不用，這里使用是為了減少其他函數中的重新定義過程
14     NODE** chainhash; 15 }HASH_TABLE;

准備工作OK，整理好思路，可以開始編寫hash算法了。O(∩_∩)O

首先，創建一個hash表，並對哈希表，鏈表，頭節點進行初始化。

 1 NODE* create_node()  2 {//開辟節點
 3     NODE* pnode = (NODE*)malloc(sizeof(NODE));  4     memset(pnode, 0, sizeof(NODE));  5 
 6     pnode->data = (char*)malloc(STRLEN * sizeof(char));  7     memset(pnode->data, 0, STRLEN * sizeof(char));  8     pnode->next = NULL;  9 
10     return pnode; 11 } 12 
13 HASH_TABLE* create_hash() 14 {//創建hash表
15     HASH_TABLE* new_hash_table = (HASH_TABLE*)malloc(sizeof(HASH_TABLE)); 16     memset(new_hash_table, 0, sizeof(HASH_TABLE)); 17 
18     new_hash_table->phead = create_node(); 19     new_hash_table->chainhash = (NODE**)malloc(DICLEN * sizeof(NODE*)); 20 
21     for(int i = 0; i < DICLEN; i++){ 22         new_hash_table->chainhash[i] = (NODE*)malloc(sizeof(NODE)); 23         memset(new_hash_table->chainhash[i], 0, sizeof(NODE)); 24  } 25 
26     return new_hash_table; 27 }

插入數據

鏈表的chainhash每個分量的初始狀態都是空指針，凡是哈希函數值 BKDRhash(data)相同的記錄，都插入同一個鏈表chainhash[i]，此時i = BKDRhash(data)。該鏈表頭結點不為空的話，指針就后移，在表尾插入新記錄（表頭、表尾插入均可，只要保持每次操作相同，即同一鏈表中的關鍵字有序）。

 1 BOOL insert_data(HASH_TABLE* hash, NODE* phead, TYPE data)  2 {//插入新數據
 3     if(hash == NULL)  4         return 0;  5     
 6     if(hash->chainhash[BKDRhash(data)]->data == NULL){  7         NODE* newnode = create_node();  8 
 9         strcpy(newnode->data, data); 10         newnode->next = NULL; 11         hash->chainhash[BKDRhash(data)]->data = newnode->data; 12         hash->chainhash[BKDRhash(data)]->next = newnode->next; 13 
14  free(newnode); 15         return 1; 16  } 17     
18     else{ 19         phead = hash->chainhash[BKDRhash(data)]; 20         
21         while(phead->next != NULL) 22             phead = phead->next; 23 
24         phead->next = create_node(); 25 
26         strcpy(phead->next->data, data); 27         phead->next->next = NULL; 28 
29         return 1; 30  } 31 }

查找數據

查找數據時，首先通過哈希函數值找到對應的鏈表，然后比較字符串內容。

 1 NODE* find_data(HASH_TABLE* hash, NODE* phead, TYPE data)  2 {//查找數據
 3     phead = hash->chainhash[BKDRhash(data)];  4 
 5     if(hash == NULL)  6         return NULL;  7     
 8     while(phead != NULL){  9 
10         if(strncmp(phead->data, data, STRLEN) == 0) 11             return phead; 12         else
13             phead = phead->next; 14  } 15 
16     return NULL; 17 }

刪除數據

刪除數據類似於單鏈表的刪除操作

 1 BOOL del_data(HASH_TABLE* hash, NODE* phead, TYPE data)  2 {//刪除數據
 3     
 4     phead->next = create_node();  5     phead->next = hash->chainhash[BKDRhash(data)];  6 
 7     if(hash == NULL)  8         return 0;  9 
10     while(phead->next != NULL){ 11 
12         if(strncmp(phead->next->data, data, STRLEN) == 0){ 13 
14             if(phead->next->data == hash->chainhash[BKDRhash(data)]->data) 15                 hash->chainhash[BKDRhash(data)] = phead->next->next; 16             else
17                 phead->next = phead->next->next; 18             
19             return 1; 20  } 21         else
22             phead->next = phead->next->next; 23  } 24 
25     free(phead->next); 26 
27     return 0; 28 }

修改數據

修改數據非常簡單，即先刪除后插入

 1 BOOL alter_data(HASH_TABLE* hash, NODE* phead, TYPE data, TYPE new_data)  2 {//修改數據
 3     if(hash == NULL)  4         return 0;  5 
 6     if(data == new_data)  7         return 1;  8 
 9     if(del_data(hash, phead, data) == 1){ 10 
11         if(insert_data(hash, phead, new_data) == 1) 12             return 1; 13         else
14             return 0; 15  } 16 
17     else
18         return 0; 19 }

這樣，一個簡單的hash算法就寫好了！筆者冗長的測試代碼如下。。。。至於為什么測試要寫這么長，筆者也不造o(╯□╰)o

 1 int main(int argc, char* argv[])  2 {//測試
 3     int i = 0;  4     char* testdata = "kyxntghcxolgqlw\n";  5     char data[STRLEN + 2] = {0};  6 
 7     HASH_TABLE* dic = create_hash();  8 
 9     FILE* fp = fopen("dic.txt", "r+"); 10     assert(fp != 0); 11 
12     while(i < DICLEN){ 13         fgets(data, STRLEN + 2, fp); 14         insert_data(dic, dic->phead, data); 15         i++; 16  } 17 
18     //查找測試
19     if(find_data(dic, dic->phead, testdata) != NULL) 20         printf("find it: %s\n", (find_data(dic, dic->phead, testdata))->data); 21     else
22         printf("no this data!\n"); 23 
24     //刪除再查找測試
25     if(del_data(dic, dic->phead, testdata) == 1) 26         printf("delete it!\n"); 27     else
28         printf("try again!\n"); 29 
30     if(find_data(dic, dic->phead, testdata) != NULL) 31         printf("find it: %s\n", (find_data(dic, dic->phead, testdata))->data); 32     else
33         printf("no this data!\n"); 34 
35     //修改數據測試
36     testdata = "fpwdwpk"; 37     char* newdata = "bibibibibiu\n"; 38 
39     if(alter_data(dic, dic->phead, testdata, newdata) == 1){ 40 
41         if(find_data(dic, dic->phead, newdata) != NULL) 42             printf("find it: %s\n", (find_data(dic, dic->phead, newdata))->data); 43         else
44             printf("no this data!\n"); 45  } 46         
47  fclose(fp); 48  free(dic); 49 
50     return 0; 51 }

歡迎轉載，請備注原始連接http://www.cnblogs.com/liuliuliu/p/3966851.html，並注明轉載。

作者bibibi_liuliu，聯系方式395985239@qq.com

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 BKDRhash hash算法和常見的hash函數 [轉] 經典Hash函數的實現 Java中實現hash算法 Hash算法（含python實現）利用國密SM4算法構造hash函數——C++實現【整理】hash算法原理及常見函數 Hash函數和消息摘要算法常見的Hash函數與加密算法幾種經典的Hash算法的實現(源代碼)