[原創]大數據:布隆過濾器C#版簡單實現。


    public class BloomFilter
    {
        public BitArray _BloomArray;
        public Int64 BloomArryLength { get; }
        public Int64 DataArrayLeng { get; }
        public Int64 BitIndexCount { get; }

        /// <summary>
        /// 初始化
        /// </summary>
        /// <param name="BloomArryLength">布隆數組的大小</param>
        /// <param name="DataArrayLeng">數據的長度</param>
        /// <param name="bitIndexCount">hash數</param>
        public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount)
        {
            _BloomArray = new BitArray(BloomArryLength);
            this.BloomArryLength = BloomArryLength;
            this.DataArrayLeng = DataArrayLeng;
            this.BitIndexCount = bitIndexCount;
        }

        
        public void Add(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i < BitIndexCount; i++)
            {
                var c = random.Next((int)(this.BloomArryLength - 1));
                _BloomArray[c] = true;
            }
        }

        public bool isExist(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i < BitIndexCount; i++)
            {
                if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))])
                {
                    return false;
                }
            }
            return true;
        }

        public int GetHashCode(object value)
        {
            return value.GetHashCode();
        }

        public double getFalsePositiveProbability()
        {
            // (1 - e^(-k * n / m)) ^ k
            return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)),
                    BitIndexCount);
        }
    }

 

        static void Main(string[] args)
        {
            Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千萬條數據

            for (int i = 0; i < bloom.DataArrayLeng; i++)//五千萬條數據
            {
                bloom.Add(i.ToString());
            }
            do
            {
                var c = Console.ReadLine();
                if (c == "e")
                    break;
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var temp=bloom.isExist(c);
                sw.Stop();
                Console.WriteLine($"查找:{c}\n結果:{temp}\n總耗時:{sw.ElapsedTicks}\n錯誤概率:{bloom.getFalsePositiveProbability()}");
            } while (true);
        }

結果:使用內存27MB,查找結果一般在100毫秒以內。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM