[rowset id] A little improvement of rowset id generator (#3203)
The main optimization points: 1. Use std::unordered_set instead of std::set, and use RowsetId.hi as RowsetId's hash value. 2. Minimize the scope of SpinLock in UniqueRowsetIdGenerator. Profile comparation: * Run UniqueRowsetIdGeneratorTest.GenerateIdBenchmark 10 times old version | new version 6s962ms | 3s647ms 6s139ms | 3s393ms 6s234ms | 3s686ms 6s060ms | 3s447ms 5s966ms | 4s127ms 5s786ms | 3s994ms 5s778ms | 4s072ms 6s193ms | 4s082ms 6s159ms | 3s560ms 5s591ms | 3s654ms
This commit is contained in:
@ -273,7 +273,7 @@ typedef std::set<uint32_t> UniqueIdSet;
|
||||
typedef std::map<ColumnId, ColumnId> UniqueIdToColumnIdMap;
|
||||
|
||||
// 8 bit rowset id version
|
||||
// 56 bit, inc number from 0
|
||||
// 56 bit, inc number from 1
|
||||
// 128 bit backend uid, it is a uuid bit, id version
|
||||
struct RowsetId {
|
||||
int8_t version = 0;
|
||||
@ -305,7 +305,7 @@ struct RowsetId {
|
||||
|
||||
void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) {
|
||||
version = id_version;
|
||||
if (high >= MAX_ROWSET_ID) {
|
||||
if (UNLIKELY(high >= MAX_ROWSET_ID)) {
|
||||
LOG(FATAL) << "inc rowsetid is too large:" << high;
|
||||
}
|
||||
hi = (id_version << 56) + (high & LOW_56_BITS);
|
||||
@ -350,6 +350,13 @@ struct RowsetId {
|
||||
}
|
||||
};
|
||||
|
||||
struct RowsetIdHash {
|
||||
size_t operator()(const RowsetId& rowset_id) const {
|
||||
// hi is an increasing number on a BE instance, we can use it as the hash value simply.
|
||||
return rowset_id.hi;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
|
||||
#endif // DORIS_BE_SRC_OLAP_OLAP_COMMON_H
|
||||
|
||||
@ -39,7 +39,7 @@ public:
|
||||
// for example, during gc logic, gc thread finds a file
|
||||
// and it could not find it under rowset list. but it maybe in use
|
||||
// during load procedure. Gc thread will check it using this method.
|
||||
virtual bool id_in_use(const RowsetId& rowset_id) = 0;
|
||||
virtual bool id_in_use(const RowsetId& rowset_id) const = 0;
|
||||
|
||||
// remove the rowsetid from useful rowsetid list.
|
||||
virtual void release_id(const RowsetId& rowset_id) = 0;
|
||||
|
||||
@ -22,26 +22,28 @@
|
||||
namespace doris {
|
||||
|
||||
UniqueRowsetIdGenerator::UniqueRowsetIdGenerator(const UniqueId& backend_uid) :
|
||||
_backend_uid(backend_uid), _inc_id(1) {
|
||||
_backend_uid(backend_uid), _inc_id(0) {
|
||||
}
|
||||
|
||||
// generate a unique rowset id and save it in a set to check whether it is valid in the future
|
||||
RowsetId UniqueRowsetIdGenerator::next_id() {
|
||||
std::lock_guard<SpinLock> l(_lock);
|
||||
RowsetId rowset_id;
|
||||
rowset_id.init(_version, ++_inc_id, _backend_uid.hi, _backend_uid.lo);
|
||||
_valid_rowset_ids.insert(rowset_id);
|
||||
{
|
||||
std::lock_guard<SpinLock> l(_lock);
|
||||
_valid_rowset_ids.insert(rowset_id);
|
||||
}
|
||||
return rowset_id;
|
||||
}
|
||||
|
||||
bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) {
|
||||
std::lock_guard<SpinLock> l(_lock);
|
||||
// if rowset_id == 1, then it is an old version rowsetid, not gc it
|
||||
bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) const {
|
||||
// if rowset_id == 1, then it is an old version rowsetid, not gc it
|
||||
// because old version rowset id is not generated by this code, so that not delete them
|
||||
if (rowset_id.version < _version) {
|
||||
return true;
|
||||
}
|
||||
return _valid_rowset_ids.find(rowset_id) != _valid_rowset_ids.end();
|
||||
std::lock_guard<SpinLock> l(_lock);
|
||||
return _valid_rowset_ids.count(rowset_id) == 1;
|
||||
}
|
||||
|
||||
void UniqueRowsetIdGenerator::release_id(const RowsetId& rowset_id) {
|
||||
|
||||
@ -30,16 +30,18 @@ public:
|
||||
|
||||
RowsetId next_id() override;
|
||||
|
||||
bool id_in_use(const RowsetId& rowset_id) override;
|
||||
bool id_in_use(const RowsetId& rowset_id) const override;
|
||||
|
||||
void release_id(const RowsetId& rowset_id) override;
|
||||
|
||||
private:
|
||||
SpinLock _lock;
|
||||
UniqueId _backend_uid;
|
||||
mutable SpinLock _lock;
|
||||
const UniqueId _backend_uid;
|
||||
const int64_t _version = 2; // modify it when create new version id generator
|
||||
int64_t _inc_id = 0;
|
||||
std::set<RowsetId> _valid_rowset_ids;
|
||||
}; // FeBasedRowsetIdGenerator
|
||||
std::atomic<int64_t> _inc_id;
|
||||
std::unordered_set<RowsetId, RowsetIdHash> _valid_rowset_ids;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(UniqueRowsetIdGenerator);
|
||||
}; // UniqueRowsetIdGenerator
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -20,6 +20,10 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/threadpool.h"
|
||||
#include "util/pretty_printer.h"
|
||||
|
||||
namespace doris {
|
||||
class UniqueRowsetIdGeneratorTest : public testing::Test {
|
||||
public:
|
||||
@ -29,33 +33,38 @@ public:
|
||||
};
|
||||
|
||||
TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) {
|
||||
int64_t max_id = 1;
|
||||
max_id = max_id << 56;
|
||||
{
|
||||
int64_t hi = 1; // version
|
||||
hi <<= 56;
|
||||
RowsetId rowset_id;
|
||||
rowset_id.init(123);
|
||||
ASSERT_TRUE(rowset_id.version == 1);
|
||||
ASSERT_TRUE(rowset_id.hi == (123 + max_id));
|
||||
ASSERT_TRUE(rowset_id.mi == 0);
|
||||
ASSERT_TRUE(rowset_id.lo == 0);
|
||||
ASSERT_STREQ("123", rowset_id.to_string().c_str());
|
||||
ASSERT_EQ(rowset_id.version, 1);
|
||||
ASSERT_EQ(rowset_id.hi, 123 + hi);
|
||||
ASSERT_EQ(rowset_id.mi, 0);
|
||||
ASSERT_EQ(rowset_id.lo, 0);
|
||||
ASSERT_EQ(std::string("123"), rowset_id.to_string());
|
||||
}
|
||||
{
|
||||
int64_t hi = 1; // version
|
||||
hi <<= 56;
|
||||
RowsetId rowset_id;
|
||||
rowset_id.init("123");
|
||||
ASSERT_TRUE(rowset_id.version == 1);
|
||||
ASSERT_TRUE(rowset_id.hi == (123 + max_id));
|
||||
ASSERT_TRUE(rowset_id.mi == 0);
|
||||
ASSERT_TRUE(rowset_id.lo == 0);
|
||||
ASSERT_STREQ("123", rowset_id.to_string().c_str());
|
||||
ASSERT_EQ(rowset_id.version, 1);
|
||||
ASSERT_EQ(rowset_id.hi, 123 + hi);
|
||||
ASSERT_EQ(rowset_id.mi, 0);
|
||||
ASSERT_EQ(rowset_id.lo, 0);
|
||||
ASSERT_EQ(std::string("123"), rowset_id.to_string());
|
||||
}
|
||||
|
||||
{
|
||||
int64_t hi = 2; // version
|
||||
hi <<= 56;
|
||||
const std::string rowset_id_v2("0200000000000003c04f58d989cab2f2efd45faa20449189");
|
||||
RowsetId rowset_id;
|
||||
rowset_id.init("0200000000000003c04f58d989cab2f2efd45faa20449189");
|
||||
ASSERT_TRUE(rowset_id.version == 2);
|
||||
ASSERT_TRUE(rowset_id.hi == (3 + max_id));
|
||||
ASSERT_STREQ("0200000000000003c04f58d989cab2f2efd45faa20449189", rowset_id.to_string().c_str());
|
||||
rowset_id.init(rowset_id_v2);
|
||||
ASSERT_EQ(rowset_id.version, 2);
|
||||
ASSERT_EQ(rowset_id.hi, 3 + hi);
|
||||
ASSERT_EQ(std::string(rowset_id_v2), rowset_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@ -63,41 +72,75 @@ TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) {
|
||||
TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdTest) {
|
||||
UniqueId backend_uid = UniqueId::gen_uid();
|
||||
UniqueId backend_uid2 = UniqueId::gen_uid();
|
||||
ASSERT_TRUE(backend_uid != backend_uid2);
|
||||
ASSERT_NE(backend_uid, backend_uid2);
|
||||
UniqueRowsetIdGenerator id_generator(backend_uid);
|
||||
UniqueRowsetIdGenerator id_generator2(backend_uid2);
|
||||
{
|
||||
RowsetId rowset_id1 = id_generator.next_id();
|
||||
RowsetId rowset_id1 = id_generator.next_id(); // hi == 1
|
||||
RowsetId rowset_id2 = id_generator2.next_id();
|
||||
ASSERT_TRUE(rowset_id1.hi != rowset_id2.hi);
|
||||
ASSERT_EQ(rowset_id1.hi, rowset_id2.hi);
|
||||
}
|
||||
{
|
||||
int64_t max_id = 2;
|
||||
max_id = max_id << 56;
|
||||
RowsetId rowset_id = id_generator.next_id();
|
||||
ASSERT_TRUE(rowset_id.hi == (1 + max_id));
|
||||
ASSERT_TRUE(rowset_id.version == 2);
|
||||
ASSERT_TRUE(backend_uid.lo == rowset_id.lo);
|
||||
ASSERT_TRUE(backend_uid.hi == rowset_id.mi);
|
||||
ASSERT_TRUE(rowset_id.hi != 0);
|
||||
int64_t hi = 2; // version
|
||||
hi <<= 56;
|
||||
RowsetId rowset_id = id_generator.next_id(); // hi == 2
|
||||
ASSERT_EQ(rowset_id.hi, hi + 2);
|
||||
ASSERT_EQ(rowset_id.version, 2);
|
||||
ASSERT_EQ(backend_uid.lo, rowset_id.lo);
|
||||
ASSERT_EQ(backend_uid.hi, rowset_id.mi);
|
||||
ASSERT_NE(rowset_id.hi, 0);
|
||||
bool in_use = id_generator.id_in_use(rowset_id);
|
||||
ASSERT_TRUE(in_use == true);
|
||||
ASSERT_TRUE(in_use);
|
||||
id_generator.release_id(rowset_id);
|
||||
in_use = id_generator.id_in_use(rowset_id);
|
||||
ASSERT_TRUE(in_use == false);
|
||||
ASSERT_FALSE(in_use);
|
||||
|
||||
int64_t high = rowset_id.hi + 1;
|
||||
rowset_id = id_generator.next_id();
|
||||
ASSERT_TRUE(rowset_id.hi == high);
|
||||
rowset_id = id_generator.next_id(); // hi == 3
|
||||
ASSERT_EQ(rowset_id.hi, high);
|
||||
in_use = id_generator.id_in_use(rowset_id);
|
||||
ASSERT_TRUE(in_use == true);
|
||||
ASSERT_TRUE(in_use);
|
||||
|
||||
std::string rowset_mid_str = rowset_id.to_string().substr(16,16);
|
||||
std::string backend_mid_str = backend_uid.to_string().substr(0, 16);
|
||||
ASSERT_STREQ(rowset_mid_str.c_str(), backend_mid_str.c_str());
|
||||
ASSERT_EQ(rowset_mid_str, backend_mid_str);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdBenchmark) {
|
||||
const int kNumThreads = 8;
|
||||
const int kIdPerThread = 1000000;
|
||||
|
||||
UniqueId backend_uid = UniqueId::gen_uid();
|
||||
UniqueRowsetIdGenerator id_generator(backend_uid);
|
||||
std::unique_ptr<ThreadPool> pool;
|
||||
Status s = ThreadPoolBuilder("GenerateIdBenchmark")
|
||||
.set_min_threads(kNumThreads)
|
||||
.set_max_threads(kNumThreads)
|
||||
.build(&pool);
|
||||
ASSERT_TRUE(s.ok()) << s.to_string();
|
||||
|
||||
int64_t cost_ns = 0;
|
||||
{
|
||||
SCOPED_RAW_TIMER(&cost_ns);
|
||||
for (int i = 0; i < kNumThreads; i++) {
|
||||
ASSERT_TRUE(pool->submit_func([&id_generator]() {
|
||||
for (int i = 0; i < kIdPerThread; ++i) {
|
||||
id_generator.next_id();
|
||||
}
|
||||
}).ok());
|
||||
}
|
||||
pool->wait();
|
||||
}
|
||||
|
||||
int64_t hi = 2; // version
|
||||
hi <<= 56;
|
||||
RowsetId last_id = id_generator.next_id();
|
||||
ASSERT_EQ(last_id.hi, hi + kNumThreads * kIdPerThread + 1);
|
||||
std::cout << "Generate " << kNumThreads * kIdPerThread << " rowset ids cost "
|
||||
<< PrettyPrinter::print(cost_ns, TUnit::TIME_NS) << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
@ -277,6 +277,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_meta_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/alpha_rowset_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/beta_rowset_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_converter_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/unique_rowset_id_generator_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test
|
||||
|
||||
Reference in New Issue
Block a user