? ? ? 在之前的文章(自定义固定长度map)中实现过一个固定长度的map,其目的主要是为了实现固定长度在插入和删除过程中避免new和delete的内存调用,重复利用内存,但是它有一个很明显的缺陷,就是因为是固定长度冲突率会比标准库的map高,当冲突率高到一定长度时,其查找和删除效率会直线下降,所以为了进一步优化它的实现,就把发生冲突的元素由链表维护改为红黑树维护,这样最坏的情况下所有的元素的key的hash值都一样也只是退化为std::map而不是一个链表,因此带来此种情况下插入和删除性能的极大的提升。
? ? ??
std::unordered_map和之前的实现方式hash+单链表
新的实现方式hash+RBTree
? ? ?通过对比可以很直观的看到,当最坏的情况下所有的元素都为同一个hash值的时候,两者的区别一个退化为链表而新的实现则为一棵红黑树,查找效率有着天壤之别。
完整源码实现链接:
gitee地址https://gitee.com/DGuco/dsalgorithm/tree/master/rbtarr_mapgithub地址https://github.com/DGuco/DSAlgorithm/tree/master/rbtarr_map
测试用例:
//
// Created by DGuco on 2022/4/13.
// email:1139140929@qq.com
//
#include <iostream>
#include <list>
#include <unordered_map>
#include <pthread.h>
#include <sys/time.h>
#include <unistd.h>
#include <mutex>
#include "rbthash_map.h"
#include "rb_tree.h"
using namespace rbt_hash;
using namespace std;
#define TEST_COUNT 1000000
#define REMOVE_COUNT 50000
#define RB_COUNT 5000
#define HASH_CONFLICT_COUNT 5
#define HASH_CONFLICT_COUNT1 10000
#define COMSUME_KEEP_TIME 60 * 1000 * 1000
void testRBTree()
{
printf("==========================test rbtree start===================================\n");
// 设置种子
srand( (unsigned)time( NULL ) );
int *a= new int[RB_COUNT];
RBTNode<int,unsigned int>* node = new RBTNode<int,unsigned int>[RB_COUNT];
RBTree<int,int,unsigned int,RB_COUNT>tree(node,0);
cout << "== 原始数据: ";
int i = 0;
std::unordered_map<int,int> stdmap;
for(i=0; i < RB_COUNT; i++)
{
int key = rand();
while (1)
{
if(stdmap.find(key) == stdmap.end())
{
break;
}else
{
key = rand();
}
}
a[i] = key;
stdmap.insert(std::make_pair(key,1));
cout << a[i] <<" ";
node[i].init_rb();
node[i].set_key(a[i]);
}
cout << endl;
for(i=0; i<RB_COUNT; i++)
{
tree.insert(&node[i]);
if(!tree.isRBTree())
{
tree.print();
printf("test rbtree insert failed\n");
exit(0);
}
}
cout << "== 前序遍历: ";
std::list<RBTNode<int,unsigned int>*> resList;
resList.clear();
tree.preOrder(resList);
std::list<RBTNode<int,unsigned int>*>::iterator it = resList.begin();
for(;it != resList.end();it++)
{
cout << (*it)->get_key() << " ";
}
cout << "\n== 中序遍历: ";
resList.clear();
tree.inOrder(resList);
it = resList.begin();
for(;it != resList.end();it++)
{
cout << (*it)->get_key() << " ";
}
cout << "\n== 后序遍历: ";
resList.clear();
tree.postOrder(resList);
it = resList.begin();
for(;it != resList.end();it++)
{
cout << (*it)->get_key() << " ";
}
cout << endl;
cout << "== 最小值: " << tree.minimum()->get_key() << endl;
cout << "== 最大值: " << tree.maximum()->get_key() << endl;
// cout << "== 树的详细信息: " << endl;
// tree.print();
printf("== test rbtree insert done\n");
for(i=0; i<RB_COUNT; i++)
{
tree.remove(a[i]);
// cout << "== 删除节点: " << a[i] << endl;
// cout << "== 树的详细信息: " << endl;
// tree.print();
if(!tree.isRBTree())
{
tree.print();
printf("test rbtree remove failed\n");
exit(0);
}
}
printf("== test rbtree remove done\n");
delete[] node;
delete [] a;
printf("==========================test rbtree done===================================\n");
}
class ValueType
{
public:
ValueType(int value) : a(value)
{
}
~ValueType()
{
}
int Key()
{
return a;
}
bool IsValid()
{
return a >= 0;
}
int a;
};
void testInsert()
{
// 设置种子
srand( (unsigned)time( NULL ) );
printf("==========================test insert start===================================\n");
RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
std::unordered_map<int,ValueType> stdmap;
while(true)
{
int key = rand();
if(stdmap.find(key) == stdmap.end())
{
stdmap.insert(std::make_pair(key,ValueType(key)));
testMap->insert(key,key);
}
if(stdmap.size() >= TEST_COUNT)
{
break;
}
}
if(stdmap.size() != testMap->size())
{
printf("test insert failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
exit(0);
}
RbtHashMap<int,ValueType,TEST_COUNT>::iterator it = testMap->begin();
for(;it != testMap->end();it++)
{
std::unordered_map<int,ValueType>::iterator stdit = stdmap.find(it->first);
if(stdit == stdmap.end())
{
printf("test insert failed never insert key = %d\n",it->first);
exit(0);
}
if(it->second->a != stdit->second.a)
{
printf("test insert failed stdvalue= %d,rbtvalue = %d\n",stdit->second.a,it->second->a);
exit(0);
}
//printf("it->first = %d,it->second = %d\n",it->first,it->second->a);
stdmap.erase(it->first);
}
if(stdmap.size() > 0)
{
printf("test insert failed look map not all,last = %d\n",stdmap.size());
exit(0);
}
printf("==========================test insert done===================================\n");
delete testMap;
testMap = NULL;
}
void testremove()
{
// 设置种子
srand( (unsigned)time( NULL ) );
printf("==========================test remove begin===================================\n");
RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
std::unordered_map<int,ValueType> stdmap;
while(true)
{
int key = rand();
if(stdmap.find(key) == stdmap.end())
{
stdmap.insert(std::make_pair(key,ValueType(key)));
testMap->insert(key,key);
}
if(stdmap.size() >= TEST_COUNT)
{
break;
}
}
std::unordered_map<int,ValueType>::iterator stdit = stdmap.begin();
int count = 0;
for(;stdit != stdmap.end();)
{
if(!testMap->erase_check(stdit->first))
{
printf("test remove failed never remove key = %d\n",stdit->first);
exit(0);
}
stdit = stdmap.erase(stdit);
count++;
if(count >= REMOVE_COUNT)
{
break;
}
}
if(stdmap.size() != testMap->size())
{
printf("test remove failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
exit(0);
}
RbtHashMap<int,ValueType,TEST_COUNT>::iterator it = testMap->begin();
count = 0;
for(;it != testMap->end();it++)
{
count++;
std::unordered_map<int,ValueType>::iterator stdit = stdmap.find(it->first);
if(stdit == stdmap.end())
{
printf("test remove failed never lost key = %d\n",it->first);
exit(0);
}
if(it->second->a != stdit->second.a)
{
printf("test remove failed stdvalue= %d,rbtvalue = %d\n",stdit->second.a,it->second->a);
exit(0);
}
stdmap.erase(it->first);
}
if(stdmap.size() > 0)
{
printf("test remove failed look map not all,last = %d\n",stdmap.size());
exit(0);
}
RbtHashMap<int,ValueType,TEST_COUNT>::iterator beginit = testMap->begin();
int erasecount = 0;
int size = testMap->size();
for(;beginit != testMap->end();)
{
beginit = testMap->erase(beginit);
erasecount++;
}
if(erasecount != size || testMap->size() != 0)
{
printf("test remove failed size = %d,erasecount = %d\n",size,erasecount);
exit(0);
}
printf("==========================test remove done===================================\n");
delete testMap;
testMap = NULL;
}
void testmempool()
{
// 设置种子
srand( (unsigned)time( NULL ) );
printf("==========================test mempool begin===================================\n");
RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
std::unordered_map<int,ValueType> stdmap;
while(true)
{
int key = rand();
if(stdmap.find(key) == stdmap.end())
{
stdmap.insert(std::make_pair(key,ValueType(key)));
testMap->insert(key,key);
}
if(stdmap.size() >= TEST_COUNT)
{
break;
}
}
std::unordered_map<int,ValueType>::iterator stdit = stdmap.begin();
int count = 0;
for(;stdit != stdmap.end();)
{
if(!testMap->erase_check(stdit->first))
{
printf("test mempool failed never remove key = %d\n",stdit->first);
exit(0);
}
stdit = stdmap.erase(stdit);
count++;
if(count >= REMOVE_COUNT)
{
break;
}
}
if(stdmap.size() != testMap->size())
{
printf("test mempool failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
exit(0);
}
while(true)
{
int key = rand();
if(stdmap.find(key) == stdmap.end())
{
stdmap.insert(std::make_pair(key,ValueType(key)));
if(!testMap->insert(key,key))
{
printf("test mempool failed some mem not reuse\n",stdmap.size(),testMap->size());
}
}
if(stdmap.size() >= TEST_COUNT)
{
break;
}
}
if(stdmap.size() != testMap->size())
{
printf("test mempool failed stdsize = %d,rbtsize = %d\n",stdmap.size(),testMap->size());
exit(0);
}
printf("==========================test mempool done===================================\n");
delete testMap;
testMap = NULL;
}
// 获取当前微秒
time_t GetUSTime()
{
struct timeval tmval = {0};
int nRetCode = gettimeofday(&tmval, NULL);
if (nRetCode != 0)
{
return 0;
}
return ((tmval.tv_sec * 1000 * 1000) + tmval.tv_usec);
}
void testformance()
{
// 设置种子
srand( (unsigned)time( NULL ) );
time_t start = 0;
time_t end = 0;
unsigned long long res = 0;
printf("---------------------------test performance begin---------------------------------------------\n");
RbtHashMap<int,ValueType,TEST_COUNT>* testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
start = GetUSTime();
for(int i = 0;i < TEST_COUNT;i++)
{
testMap->insert(i * HASH_CONFLICT_COUNT, ValueType(i));
}
end = GetUSTime();
printf("RbtHashMap<int,ValueType,%d>[conflict count = %d], insert use %ld ms\n",TEST_COUNT,HASH_CONFLICT_COUNT,(end - start) / 1000);
start = GetUSTime();
res = 0;
for(int i = 0;i < TEST_COUNT;i++)
{
res += testMap->find(i * HASH_CONFLICT_COUNT)->second->a;
}
end = GetUSTime();
printf("RbtHashMap<int,ValueType,%d>[conflict count = %d] find use %ld ms,res = %llu\n",TEST_COUNT,HASH_CONFLICT_COUNT,(end - start) / 1000,res);
testMap->clear();
start = GetUSTime();
for(int i = 0;i < TEST_COUNT;i++)
{
testMap->insert(i * HASH_CONFLICT_COUNT1, ValueType(i));
}
end = GetUSTime();
printf("RbtHashMap<int,ValueType,%d>[conflict count = %d], insert use %ld ms\n",TEST_COUNT,HASH_CONFLICT_COUNT1,(end - start) / 1000);
start = GetUSTime();
res = 0;
for(int i = 0;i < TEST_COUNT;i++)
{
res += testMap->find(i * HASH_CONFLICT_COUNT1)->second->a;
}
end = GetUSTime();
printf("RbtHashMap<int,ValueType,%d>[conflict count = %d] find use %ld ms,res = %llu\n",TEST_COUNT,HASH_CONFLICT_COUNT1,(end - start) / 1000,res);
delete testMap;
testMap = NULL;
printf("------------------------------------------------------------------------\n");
{
std::map<int,ValueType> stdtMap;
start = GetUSTime();
for(int i = 0;i < TEST_COUNT;i++)
{
stdtMap.insert(std::make_pair(i * HASH_CONFLICT_COUNT, ValueType (i)));
}
end = GetUSTime();
printf("std::map<int,ValueType> insert use %ld ms\n",(end - start) / 1000);
start = GetUSTime();
res = 0;
for(int i = 0;i < TEST_COUNT;i++)
{
res += stdtMap.find(i * HASH_CONFLICT_COUNT)->second.a;
}
end = GetUSTime();
printf("std::map<int,ValueType> find use %ld ms,res = %llu\n",(end - start) / 1000,res);
}
printf("------------------------------------------------------------------------\n");
{
std::unordered_map<int,ValueType> testUnorderMap;
start = GetUSTime();
for(int i = 0;i < TEST_COUNT;i++)
{
testUnorderMap.insert(std::make_pair(i * HASH_CONFLICT_COUNT, ValueType(i)));
}
end = GetUSTime();
printf("std::unordered_map<int,ValueType> insert use %ld ms\n",(end - start) / 1000);
res = 0;
start = GetUSTime();
for(int i = 0;i < TEST_COUNT;i++)
{
res += testUnorderMap.find(i * HASH_CONFLICT_COUNT)->second.a;
}
end = GetUSTime();
printf("std::unordered_map<int,ValueType> find use %ld ms,res = %llu\n",(end - start) / 1000,res);
}
printf("-----------------------------test performance done-------------------------------------------\n");
return;
}
RbtHashMap<int,ValueType,TEST_COUNT>* g_testMap = new RbtHashMap<int,ValueType,TEST_COUNT>();
std::unordered_map<int,ValueType> g_testUnorderMap;
std::mutex g_mtx;
bool g_exit = 0;
bool g_print1 = 0;
bool g_print2 = 0;
void* consume_insert(void* data)
{
// 设置种子
srand( (unsigned)time( NULL ) );
unsigned int count = 0;
while(true && !g_exit)
{
if(g_print1)
{
printf("consume inserting .....\n");
g_print1 = 0;
}
{
std::lock_guard<std::mutex> lck(g_mtx);
if(g_testUnorderMap.size() >= TEST_COUNT)
{
continue;
}else
{
int key = rand();
if(g_testUnorderMap.find(key) == g_testUnorderMap.end())
{
g_testUnorderMap.insert(std::make_pair(key,ValueType(key)));
if(!g_testMap->insert(key,key))
{
g_exit = 1;
printf("consume_insert insert failed testmap key = %d\n", key);
return 0;
}
count++;
}
}
}
}
printf("consume_insert insert count = %d\n", count);
}
void* consume_remove(void* data)
{
unsigned int count = 0;
while (true && !g_exit)
{
if(g_print2)
{
printf("consume removing .....\n");
g_print2 = 0;
}
{
std::lock_guard<std::mutex> lck(g_mtx);
std::unordered_map<int, ValueType>::iterator it = g_testUnorderMap.begin();
if (it != g_testUnorderMap.end())
{
if (g_testMap->find(it->first) == g_testMap->end()) {
g_exit = 1;
printf("consume_remove find failed testmap lost key = %d\n", it->first);
return 0;
}
int key = it->first;
g_testUnorderMap.erase(key);
if (!g_testMap->erase_check(key))
{
g_exit = 1;
printf("consume_remove erase failed testmap lost key = %d\n", key);
return 0;
}
count++;
}
}
}
printf("consume_insert remove count = %d\n", count);
}
void testconsume()
{
printf("-----------------------------test consume begin-------------------------------------------\n");
printf("test consume need time %d S,please waiting\n",COMSUME_KEEP_TIME / 1000 / 1000);
pthread_t t1,t2;
time_t start = GetUSTime();
pthread_create(&t1,0,consume_insert,NULL);
pthread_create(&t2,0,consume_remove,NULL);
time_t lasttime = COMSUME_KEEP_TIME;
time_t printstart = start;
while (true)
{
time_t now = GetUSTime();
if(now - printstart >= 20 * 1000 * 1000)
{
lasttime = lasttime - (now - printstart);
printf("test consume last time %ld S,please waiting\n",lasttime / 1000 / 1000);
g_print1 = 1;
g_print2 = 1;
printstart = now;
}
time_t keep = now - start;
if(keep >= COMSUME_KEEP_TIME)
{
g_exit = 1;
break;
}
usleep(10);
}
pthread_join(t1,NULL);
pthread_join(t2,NULL);
printf("test consume keep time %d S\n",COMSUME_KEEP_TIME / 1000 / 1000);
printf("-----------------------------test consume done-------------------------------------------\n");
}
int main()
{
testRBTree();
testInsert();
testremove();
testmempool();
testformance();
testconsume();
std::cout << "Test Done,Hello, World!" << std::endl;
}
测试结果:
/home/dguco/Workspace/github/dsalgorithm/rbtarr_map/cmake-build-debug/rbtarr_map
==========================test rbtree start===================================
== 原始数据: ....100000个数太长了,此处省略
== 前序遍历: ....100000个数太长了,此处省略
== 后序遍历: ....100000个数太长了,此处省略
== 最大值: 2147408880
== test rbtree insert done
== test rbtree remove done
==========================test rbtree done===================================
==========================test insert start===================================
==========================test insert done===================================
==========================test remove begin===================================
==========================test remove done===================================
==========================test mempool begin===================================
==========================test mempool done===================================
---------------------------test performance begin---------------------------------------------
RbtHashMap<int,ValueType,1000000>[conflict count = 5], insert use 219 ms
RbtHashMap<int,ValueType,1000000>[conflict count = 5] find use 51 ms,res = 499999500000
RbtHashMap<int,ValueType,1000000>[conflict count = 10000], insert use 649 ms
RbtHashMap<int,ValueType,1000000>[conflict count = 10000] find use 75 ms,res = 499999500000
------------------------------------------------------------------------
std::map<int,ValueType> insert use 704 ms
std::map<int,ValueType> find use 308 ms,res = 499999500000
------------------------------------------------------------------------
std::unordered_map<int,ValueType> insert use 159 ms
std::unordered_map<int,ValueType> find use 62 ms,res = 499999500000
-----------------------------test performance done-------------------------------------------
-----------------------------test consume begin-------------------------------------------
test consume need time 60 S,please waiting
test consume last time 39 S,please waiting
consume inserting .....
consume removing .....
test consume last time 19 S,please waiting
consume inserting .....
consume removing .....
consume_insert remove count = 88001355
consume_insert insert count = 88008711
test consume keep time 60 S
-----------------------------test consume done-------------------------------------------
Test Done,Hello, World!
Process finished with exit code 0
? ? ? ?测试结果可以看到在插入和删除以及查找等操作上,这里都要比标准库的要好(当然这里的插入和删除没有像标准库那样有new和delete的开销),但是查找大家都是一样的都不会有内存开销,甚至当冲突率达到10000(1000000个元素每10000个元素插入同一个hash中)它的查找效率依然比std::map高,几乎和unordered_map保持持平。
参考文章
基础优化-让哈希表更公平一些
红黑树(一)之 原理和算法详细介绍
红黑树(四)之 C++的实现
红黑树动画演示
漫画:什么是红黑树?
|