HashMap和布隆过滤器命中性能测试

Posted kibana

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了HashMap和布隆过滤器命中性能测试相关的知识,希望对你有一定的参考价值。

package datafilter;

import com.google.common.base.Stopwatch;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;

import java.util.*;

import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.NANOSECONDS;

public class WarnIp {
    public static String getRandomIp() {
        // 需要排除监控的ip范围
        int[][] range = { { 607649792, 608174079 }, // 36.56.0.0-36.63.255.255
                { 1038614528, 1039007743 }, // 61.232.0.0-61.237.255.255
                { 1783627776, 1784676351 }, // 106.80.0.0-106.95.255.255
                { 2035023872, 2035154943 }, // 121.76.0.0-121.77.255.255
                { 2078801920, 2079064063 }, // 123.232.0.0-123.235.255.255
                { -1950089216, -1948778497 }, // 139.196.0.0-139.215.255.255
                { -1425539072, -1425014785 }, // 171.8.0.0-171.15.255.255
                { -1236271104, -1235419137 }, // 182.80.0.0-182.92.255.255
                { -770113536, -768606209 }, // 210.25.0.0-210.47.255.255
                { -569376768, -564133889 }, // 222.16.0.0-222.95.255.255
        };
        Random rdint = new Random();
        int index = rdint.nextInt(10);
        String ip = num2ip(range[index][0] + new Random().nextInt(range[index][1] - range[index][0]));
        return ip;
    }
 
    /*
     * 将十进制转换成IP地址
     */
    public static String num2ip(int ip) {
        int[] b = new int[4];
        String x = "";
        b[0] = (int) ((ip >> 24) & 0xff);
        b[1] = (int) ((ip >> 16) & 0xff);
        b[2] = (int) ((ip >> 8) & 0xff);
        b[3] = (int) (ip & 0xff);
        x = Integer.toString(b[0]) + "." + Integer.toString(b[1]) + "." + Integer.toString(b[2]) + "." + Integer.toString(b[3]);
        return x;
    }

    private static Map<String, Integer> map = new HashMap<>();
    private static BloomFilter<String> bloomFilter = BloomFilter.create(Funnels.unencodedCharsFunnel(), 1000000);
    private static List<String> list = new ArrayList<>();
    static {
        int count = 100000000;
        for (int id = 0; id < count; id++) {
            String randomIp = getRandomIp();
            int randomPort = (int)(Math.random() * 65536);
            String key = randomIp + ":" + randomPort;
            if(!map.containsKey(key)) {
                System.out.println("key: " + key);
                map.put(key, id);
                list.add(key);
                bloomFilter.put(key);
            }
            if(map.size() >= 1000000) {
                break;
            }
        }
    }

    static Stopwatch stopwatch = Stopwatch.createUnstarted();
    public static void main(String[] args) {
        System.out.println("map.size: " + map.size());
        Scanner scanner = new Scanner(System.in); //String param = "222.17.204.104:8001";
        while(scanner.hasNextLine()) {
            String param = scanner.nextLine();
            System.out.println("Input: " + param);
            if("exit".equalsIgnoreCase(param)) {
                break;
            }
            stopwatch.start();
//          if (map.containsKey(param)) { //map判断
//          if(list.contains(param)) {
            if (bloomFilter.mightContain(param)) { //布隆过滤器判断(默认3%误差率)
                System.out.println("list集合存在该数据=============数据: " + param);
            }
            //计时结束
            stopwatch.stop();
            System.out.println("list集合测试,判断该元素集合中是否存在用时: " + stopwatch.elapsed(MILLISECONDS));
            System.out.println("list集合测试,判断该元素集合中是否存在用时: " + stopwatch.elapsed(NANOSECONDS));
            stopwatch.reset();
        }
        scanner.close();
    }
}

结论:100w数据,key都是字符串(ip:port),map测试时,key存在大概耗时0.01-0.1ms,不存在时大概0.005ms左右,list耗时20-40ms,布隆过滤器0.01-0.2ms左右。除此之外需要考虑占用内存,按255.255.255.255:65535来考虑,用list存储100w条21字节*1000000约20M,布隆过滤器占用更小。

以上是关于HashMap和布隆过滤器命中性能测试的主要内容,如果未能解决你的问题,请参考以下文章

Caffeine缓存性能算法&布隆过滤器

Caffeine缓存性能算法&布隆过滤器

Caffeine缓存性能算法&布隆过滤器

布隆过滤器

布隆过滤器的基本原理和使用

布隆过滤器的简单实现