IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> 数据结构与算法 -> 红黑树hashmap-RbtHashMap实现 -> 正文阅读

[数据结构与算法]红黑树hashmap-RbtHashMap实现

? ? ? 在之前的文章(自定义固定长度map)中实现过一个固定长度的map,其目的主要是为了实现固定长度在插入和删除过程中避免new和delete的内存调用,重复利用内存,但是它有一个很明显的缺陷,就是因为是固定长度冲突率会比标准库的map高,当冲突率高到一定长度时,其查找和删除效率会直线下降,所以为了进一步优化它的实现,就把发生冲突的元素由链表维护改为红黑树维护,这样最坏的情况下所有的元素的key的hash值都一样也只是退化为std::map而不是一个链表,因此带来此种情况下插入和删除性能的极大的提升。

? ? ??

std::unordered_map和之前的实现方式hash+单链表

新的实现方式hash+RBTree

? ? ?通过对比可以很直观的看到,当最坏的情况下所有的元素都为同一个hash值的时候,两者的区别一个退化为链表而新的实现则为一棵红黑树,查找效率有着天壤之别。

完整源码实现链接:

gitee地址https://gitee.com/DGuco/dsalgorithm/tree/master/rbtarr_mapgithub地址https://github.com/DGuco/DSAlgorithm/tree/master/rbtarr_map

测试用例:

//
// Created by DGuco on 2022/4/13.
// email:1139140929@qq.com
//
#include <iostream>
#include <list>
#include <unordered_map>
#include <pthread.h>
#include <sys/time.h>
#include <unistd.h>
#include <mutex>

#include "rbthash_map.h"
#include "rb_tree.h"


using namespace rbt_hash;
using namespace std;
#define TEST_COUNT 1000000
#define REMOVE_COUNT 50000
#define RB_COUNT 5000
#define HASH_CONFLICT_COUNT 5
#define HASH_CONFLICT_COUNT1 10000
#define COMSUME_KEEP_TIME 60 * 1000 * 1000
void testRBTree()
{
    printf("==========================test rbtree start===================================\n");
    // 设置种子
    srand( (unsigned)time( NULL ) );
    int *a= new int[RB_COUNT];
    RBTNode<int,unsigned int>* node = new RBTNode<int,unsigned int>[RB_COUNT];
    RBTree<int,int,unsigned int,RB_COUNT>tree(node,0);
    cout << "== 原始数据: ";
    int i = 0;
    std::unordered_map<int,int> stdmap;
    for(i=0; i < RB_COUNT; i++)
    {
        int key = rand();
        while (1)
        {
            if(stdmap.find(key) == stdmap.end())
            {
                break;
            }else
            {
                key = rand();
            }
        }
        a[i] = key;
        stdmap.insert(std::make_pair(key,1));
        cout << a[i] <<" ";
        node[i].init_rb();
        node[i].set_key(a[i]);
    }
    cout << endl;

    for(i=0; i<RB_COUNT; i++)
    {
        tree.insert(&node[i]);
        if(!tree.isRBTree())
        {
            tree.print();
            printf("test rbtree insert failed\n");
            exit(0);
        }
    }

    cout << "== 前序遍历: ";
    std::list<RBTNode<int,unsigned int>*> resList;
    resList.clear();
    tree.preOrder(resList);
    std::list<RBTNode<int,unsigned int>*>::iterator it = resList.begin();
    for(;it != resList.end();it++)
    {
        cout << (*it)->get_key() << " ";
    }
    cout << "\n== 中序遍历: ";
    resList.clear();
    tree.inOrder(resList);
    it = resList.begin();
    for(;it != resList.end();it++)
    {
        cout << (*it)->get_key() << " ";
    }
    cout << "\n== 后序遍历: ";
    resList.clear();
    tree.postOrder(resList);
    it = resList.begin();
    for(;it != resList.end();it++)
    {
        cout << (*it)->get_key() << " ";
    }
    cout << endl;
    cout << "== 最小值: " << tree.minimum()->get_key() << endl;
    cout << "== 最大值: " << tree.maximum()->get_key() << endl;
//    cout << "== 树的详细信息: " << endl;
//    tree.print();
    printf("== test rbtree insert done\n");

    for(i=0; i<RB_COUNT; i++)
    {
        tree.remove(a[i]);
//        cout << "== 删除节点: " << a[i] << endl;
//        cout << "== 树的详细信息: " << endl;
//        tree.print();
        if(!tree.isRBTree())
        {
            tree.print();
            printf("test rbtree remove failed\n");
            exit(0);
        }
    }
    printf("== test rbtree remove done\n");
    delete[] node;
    delete [] a;
    printf("==========================test rbtree done===================================\n");
}

class ValueType
{
public:
    ValueType(int value) : a(value)
    {

    }

    ~ValueType()
    {

    }

    int Key()
    {
        return  a;
    }

    bool IsValid()
    {
        return  a >= 0;
    }

    int a;
};

void testInsert()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    printf("==========================test insert start===================================\n");
    RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
    std::unordered_map<int,ValueType> stdmap;
    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            testMap->insert(key,key);
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }

    if(stdmap.size() != testMap->size())
    {
        printf("test insert failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    RbtHashMap<int,ValueType,TEST_COUNT>::iterator it = testMap->begin();
    for(;it != testMap->end();it++)
    {
        std::unordered_map<int,ValueType>::iterator  stdit = stdmap.find(it->first);
        if(stdit == stdmap.end())
        {
            printf("test insert failed never insert key = %d\n",it->first);
            exit(0);
        }
        if(it->second->a != stdit->second.a)
        {
            printf("test insert failed stdvalue= %d,rbtvalue = %d\n",stdit->second.a,it->second->a);
            exit(0);
        }
        //printf("it->first = %d,it->second = %d\n",it->first,it->second->a);
        stdmap.erase(it->first);
    }
    if(stdmap.size() > 0)
    {
        printf("test insert failed look map not all,last =  %d\n",stdmap.size());
        exit(0);
    }
    printf("==========================test insert done===================================\n");
    delete testMap;
    testMap = NULL;
}

void testremove()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    printf("==========================test remove begin===================================\n");
    RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
    std::unordered_map<int,ValueType> stdmap;
    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            testMap->insert(key,key);
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }

    std::unordered_map<int,ValueType>::iterator stdit = stdmap.begin();
    int count = 0;
    for(;stdit != stdmap.end();)
    {
        if(!testMap->erase_check(stdit->first))
        {
            printf("test remove failed never remove key = %d\n",stdit->first);
            exit(0);
        }
        stdit = stdmap.erase(stdit);
        count++;
        if(count >= REMOVE_COUNT)
        {
            break;
        }
    }

    if(stdmap.size() != testMap->size())
    {
        printf("test remove failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    RbtHashMap<int,ValueType,TEST_COUNT>::iterator it = testMap->begin();
    count = 0;
    for(;it != testMap->end();it++)
    {
        count++;
        std::unordered_map<int,ValueType>::iterator  stdit = stdmap.find(it->first);
        if(stdit == stdmap.end())
        {
            printf("test remove failed never lost key = %d\n",it->first);
            exit(0);
        }
        if(it->second->a != stdit->second.a)
        {
            printf("test remove failed stdvalue= %d,rbtvalue = %d\n",stdit->second.a,it->second->a);
            exit(0);
        }
        stdmap.erase(it->first);
    }
    if(stdmap.size() > 0)
    {
        printf("test remove failed look map not all,last =  %d\n",stdmap.size());
        exit(0);
    }

    RbtHashMap<int,ValueType,TEST_COUNT>::iterator  beginit = testMap->begin();
    int erasecount = 0;
    int size = testMap->size();
    for(;beginit != testMap->end();)
    {
        beginit = testMap->erase(beginit);
        erasecount++;
    }
    if(erasecount != size || testMap->size() != 0)
    {
        printf("test remove failed size = %d,erasecount = %d\n",size,erasecount);
        exit(0);
    }
    printf("==========================test remove done===================================\n");
    delete testMap;
    testMap = NULL;
}

void testmempool()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    printf("==========================test mempool begin===================================\n");
    RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
    std::unordered_map<int,ValueType> stdmap;
    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            testMap->insert(key,key);
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }

    std::unordered_map<int,ValueType>::iterator stdit = stdmap.begin();
    int count = 0;
    for(;stdit != stdmap.end();)
    {
        if(!testMap->erase_check(stdit->first))
        {
            printf("test mempool failed never remove key = %d\n",stdit->first);
            exit(0);
        }
        stdit = stdmap.erase(stdit);
        count++;
        if(count >= REMOVE_COUNT)
        {
            break;
        }
    }

    if(stdmap.size() != testMap->size())
    {
        printf("test mempool failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    while(true)
    {
        int key = rand();
        if(stdmap.find(key) == stdmap.end())
        {
            stdmap.insert(std::make_pair(key,ValueType(key)));
            if(!testMap->insert(key,key))
            {
                printf("test mempool failed some mem not reuse\n",stdmap.size(),testMap->size());
            }
        }

        if(stdmap.size() >= TEST_COUNT)
        {
            break;
        }
    }
    if(stdmap.size() != testMap->size())
    {
        printf("test mempool failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
        exit(0);
    }

    printf("==========================test mempool done===================================\n");
    delete testMap;
    testMap = NULL;
}

// 获取当前微秒
time_t GetUSTime()
{
    struct timeval tmval = {0};
    int nRetCode = gettimeofday(&tmval, NULL);
    if (nRetCode != 0)
    {
        return 0;
    }
    return ((tmval.tv_sec * 1000 * 1000) + tmval.tv_usec);
}

void testformance()
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    time_t start = 0;
    time_t end = 0;
    unsigned long long res = 0;
    printf("---------------------------test performance begin---------------------------------------------\n");
    RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
    start = GetUSTime();
    for(int i = 0;i < TEST_COUNT;i++)
    {
        testMap->insert(i * HASH_CONFLICT_COUNT, ValueType(i));
    }
    end = GetUSTime();
    printf("RbtHashMap<int,ValueType,%d>[conflict count = %d], insert use  %ld ms\n",TEST_COUNT,HASH_CONFLICT_COUNT,(end - start) / 1000);
    start = GetUSTime();
    res = 0;
    for(int i = 0;i < TEST_COUNT;i++)
    {
        res += testMap->find(i * HASH_CONFLICT_COUNT)->second->a;
    }
    end = GetUSTime();
    printf("RbtHashMap<int,ValueType,%d>[conflict count = %d] find use  %ld ms,res = %llu\n",TEST_COUNT,HASH_CONFLICT_COUNT,(end - start) / 1000,res);
    testMap->clear();
    start = GetUSTime();
    for(int i = 0;i < TEST_COUNT;i++)
    {
        testMap->insert(i * HASH_CONFLICT_COUNT1, ValueType(i));
    }
    end = GetUSTime();
    printf("RbtHashMap<int,ValueType,%d>[conflict count = %d], insert use  %ld ms\n",TEST_COUNT,HASH_CONFLICT_COUNT1,(end - start) / 1000);
    start = GetUSTime();
    res = 0;
    for(int i = 0;i < TEST_COUNT;i++)
    {
        res += testMap->find(i * HASH_CONFLICT_COUNT1)->second->a;
    }
    end = GetUSTime();
    printf("RbtHashMap<int,ValueType,%d>[conflict count = %d] find use  %ld ms,res = %llu\n",TEST_COUNT,HASH_CONFLICT_COUNT1,(end - start) / 1000,res);
    delete testMap;
    testMap = NULL;
    printf("------------------------------------------------------------------------\n");
    {
        std::map<int,ValueType> stdtMap;
        start = GetUSTime();
        for(int i = 0;i < TEST_COUNT;i++)
        {
            stdtMap.insert(std::make_pair(i * HASH_CONFLICT_COUNT, ValueType (i)));
        }
        end = GetUSTime();
        printf("std::map<int,ValueType> insert use  %ld ms\n",(end - start) / 1000);
        start = GetUSTime();
        res = 0;
        for(int i = 0;i < TEST_COUNT;i++)
        {
            res += stdtMap.find(i * HASH_CONFLICT_COUNT)->second.a;
        }
        end = GetUSTime();
        printf("std::map<int,ValueType> find use  %ld ms,res = %llu\n",(end - start) / 1000,res);
    }
    printf("------------------------------------------------------------------------\n");
    {
        std::unordered_map<int,ValueType> testUnorderMap;
        start = GetUSTime();
        for(int i = 0;i < TEST_COUNT;i++)
        {
            testUnorderMap.insert(std::make_pair(i * HASH_CONFLICT_COUNT, ValueType(i)));
        }
        end = GetUSTime();
        printf("std::unordered_map<int,ValueType> insert use  %ld ms\n",(end - start) / 1000);
        res = 0;
        start = GetUSTime();
        for(int i = 0;i < TEST_COUNT;i++)
        {
            res += testUnorderMap.find(i * HASH_CONFLICT_COUNT)->second.a;
        }
        end = GetUSTime();
        printf("std::unordered_map<int,ValueType> find use  %ld ms,res = %llu\n",(end - start) / 1000,res);
    }
    printf("-----------------------------test performance done-------------------------------------------\n");
    return;
}

RbtHashMap<int,ValueType,TEST_COUNT>* g_testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
std::unordered_map<int,ValueType> g_testUnorderMap;
std::mutex g_mtx;
bool g_exit = 0;
bool g_print1 = 0;
bool g_print2 = 0;
void* consume_insert(void* data)
{
    // 设置种子
    srand( (unsigned)time( NULL ) );
    unsigned int count = 0;
    while(true && !g_exit)
    {
        if(g_print1)
        {
            printf("consume inserting .....\n");
            g_print1 = 0;
        }
        {
            std::lock_guard<std::mutex> lck(g_mtx);
            if(g_testUnorderMap.size() >= TEST_COUNT)
            {
                continue;
            }else
            {
                int key = rand();
                if(g_testUnorderMap.find(key) == g_testUnorderMap.end())
                {
                    g_testUnorderMap.insert(std::make_pair(key,ValueType(key)));
                    if(!g_testMap->insert(key,key))
                    {
                        g_exit = 1;
                        printf("consume_insert insert failed testmap key = %d\n", key);
                        return 0;
                    }
                    count++;
                }
            }
        }
    }
    printf("consume_insert insert count = %d\n", count);
}

void* consume_remove(void* data)
{
    unsigned int count = 0;
    while (true && !g_exit)
    {
        if(g_print2)
        {
            printf("consume removing .....\n");
            g_print2 = 0;
        }
        {
            std::lock_guard<std::mutex> lck(g_mtx);
            std::unordered_map<int, ValueType>::iterator it = g_testUnorderMap.begin();
            if (it != g_testUnorderMap.end())
            {
                if (g_testMap->find(it->first) == g_testMap->end()) {
                    g_exit = 1;
                    printf("consume_remove find failed testmap lost key = %d\n", it->first);
                    return 0;
                }
                int key = it->first;
                g_testUnorderMap.erase(key);
                if (!g_testMap->erase_check(key))
                {
                    g_exit = 1;
                    printf("consume_remove erase failed testmap lost key = %d\n", key);
                    return 0;
                }
                count++;
            }
        }
    }
    printf("consume_insert remove count = %d\n", count);
}


void testconsume()
{
    printf("-----------------------------test consume begin-------------------------------------------\n");
    printf("test consume need time %d S,please waiting\n",COMSUME_KEEP_TIME / 1000 / 1000);
    pthread_t t1,t2;
    time_t start = GetUSTime();
    pthread_create(&t1,0,consume_insert,NULL);
    pthread_create(&t2,0,consume_remove,NULL);
    time_t lasttime = COMSUME_KEEP_TIME;
    time_t printstart = start;
    while (true)
    {
        time_t now = GetUSTime();
        if(now - printstart >= 20 * 1000 * 1000)
        {
            lasttime = lasttime - (now - printstart);
            printf("test consume last time %ld S,please waiting\n",lasttime / 1000 / 1000);
            g_print1 = 1;
            g_print2 = 1;
            printstart = now;
        }
        time_t keep = now - start;
        if(keep >= COMSUME_KEEP_TIME)
        {
            g_exit = 1;
            break;
        }
        usleep(10);
    }
    pthread_join(t1,NULL);
    pthread_join(t2,NULL);
    printf("test consume keep time %d S\n",COMSUME_KEEP_TIME / 1000 / 1000);
    printf("-----------------------------test consume done-------------------------------------------\n");
}

int main()
{
    testRBTree();
    testInsert();
    testremove();
    testmempool();
    testformance();
    testconsume();
    std::cout << "Test Done,Hello, World!" << std::endl;
}

测试结果:

/home/dguco/Workspace/github/dsalgorithm/rbtarr_map/cmake-build-debug/rbtarr_map
==========================test rbtree start===================================
== 原始数据: ....100000个数太长了,此处省略
== 前序遍历: ....100000个数太长了,此处省略
== 后序遍历: ....100000个数太长了,此处省略
== 最大值: 2147408880
== test rbtree insert done
== test rbtree remove done
==========================test rbtree done===================================
==========================test insert start===================================
==========================test insert done===================================
==========================test remove begin===================================
==========================test remove done===================================
==========================test mempool begin===================================
==========================test mempool done===================================
---------------------------test performance begin---------------------------------------------
RbtHashMap<int,ValueType,1000000>[conflict count = 5], insert use  219 ms
RbtHashMap<int,ValueType,1000000>[conflict count = 5] find use  51 ms,res = 499999500000
RbtHashMap<int,ValueType,1000000>[conflict count = 10000], insert use  649 ms
RbtHashMap<int,ValueType,1000000>[conflict count = 10000] find use  75 ms,res = 499999500000
------------------------------------------------------------------------
std::map<int,ValueType> insert use  704 ms
std::map<int,ValueType> find use  308 ms,res = 499999500000
------------------------------------------------------------------------
std::unordered_map<int,ValueType> insert use  159 ms
std::unordered_map<int,ValueType> find use  62 ms,res = 499999500000
-----------------------------test performance done-------------------------------------------
-----------------------------test consume begin-------------------------------------------
test consume need time 60 S,please waiting
test consume last time 39 S,please waiting
consume inserting .....
consume removing .....
test consume last time 19 S,please waiting
consume inserting .....
consume removing .....
consume_insert remove count = 88001355
consume_insert insert count = 88008711
test consume keep time 60 S
-----------------------------test consume done-------------------------------------------
Test Done,Hello, World!

Process finished with exit code 0

? ? ? ?测试结果可以看到在插入和删除以及查找等操作上,这里都要比标准库的要好(当然这里的插入和删除没有像标准库那样有new和delete的开销),但是查找大家都是一样的都不会有内存开销,甚至当冲突率达到10000(1000000个元素每10000个元素插入同一个hash中)它的查找效率依然比std::map高,几乎和unordered_map保持持平。

参考文章

基础优化-让哈希表更公平一些

红黑树(一)之 原理和算法详细介绍

红黑树(四)之 C++的实现

红黑树动画演示

漫画:什么是红黑树?

  数据结构与算法 最新文章
【力扣106】 从中序与后续遍历序列构造二叉
leetcode 322 零钱兑换
哈希的应用:海量数据处理
动态规划|最短Hamilton路径
华为机试_HJ41 称砝码【中等】【menset】【
【C与数据结构】——寒假提高每日练习Day1
基础算法——堆排序
2023王道数据结构线性表--单链表课后习题部
LeetCode 之 反转链表的一部分
【题解】lintcode必刷50题<有效的括号序列
上一篇文章      下一篇文章      查看所有文章
加:2022-05-24 18:28:41  更:2022-05-24 18:29:41 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年12日历 -2024/12/30 1:32:45-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码