[rowset id] A little improvement of rowset id generator (#3203)

The main optimization points:
1. Use std::unordered_set instead of std::set, and use RowsetId.hi as RowsetId's hash value.
2. Minimize the scope of SpinLock in UniqueRowsetIdGenerator.

Profile comparation:
* Run UniqueRowsetIdGeneratorTest.GenerateIdBenchmark 10 times
old version |  new version
6s962ms     |  3s647ms
6s139ms     |  3s393ms
6s234ms     |  3s686ms
6s060ms     |  3s447ms
5s966ms     |  4s127ms
5s786ms     |  3s994ms
5s778ms     |  4s072ms
6s193ms     |  4s082ms
6s159ms     |  3s560ms
5s591ms     |  3s654ms
This commit is contained in:
Yingchun Lai
2020-03-26 20:24:26 +08:00
committed by GitHub
parent eda23b57f2
commit cc31bf9cf9
6 changed files with 104 additions and 49 deletions

View File

@ -273,7 +273,7 @@ typedef std::set<uint32_t> UniqueIdSet;
typedef std::map<ColumnId, ColumnId> UniqueIdToColumnIdMap;
// 8 bit rowset id version
// 56 bit, inc number from 0
// 56 bit, inc number from 1
// 128 bit backend uid, it is a uuid bit, id version
struct RowsetId {
int8_t version = 0;
@ -305,7 +305,7 @@ struct RowsetId {
void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) {
version = id_version;
if (high >= MAX_ROWSET_ID) {
if (UNLIKELY(high >= MAX_ROWSET_ID)) {
LOG(FATAL) << "inc rowsetid is too large:" << high;
}
hi = (id_version << 56) + (high & LOW_56_BITS);
@ -350,6 +350,13 @@ struct RowsetId {
}
};
struct RowsetIdHash {
size_t operator()(const RowsetId& rowset_id) const {
// hi is an increasing number on a BE instance, we can use it as the hash value simply.
return rowset_id.hi;
}
};
} // namespace doris
#endif // DORIS_BE_SRC_OLAP_OLAP_COMMON_H

View File

@ -39,7 +39,7 @@ public:
// for example, during gc logic, gc thread finds a file
// and it could not find it under rowset list. but it maybe in use
// during load procedure. Gc thread will check it using this method.
virtual bool id_in_use(const RowsetId& rowset_id) = 0;
virtual bool id_in_use(const RowsetId& rowset_id) const = 0;
// remove the rowsetid from useful rowsetid list.
virtual void release_id(const RowsetId& rowset_id) = 0;

View File

@ -22,26 +22,28 @@
namespace doris {
UniqueRowsetIdGenerator::UniqueRowsetIdGenerator(const UniqueId& backend_uid) :
_backend_uid(backend_uid), _inc_id(1) {
_backend_uid(backend_uid), _inc_id(0) {
}
// generate a unique rowset id and save it in a set to check whether it is valid in the future
RowsetId UniqueRowsetIdGenerator::next_id() {
std::lock_guard<SpinLock> l(_lock);
RowsetId rowset_id;
rowset_id.init(_version, ++_inc_id, _backend_uid.hi, _backend_uid.lo);
_valid_rowset_ids.insert(rowset_id);
{
std::lock_guard<SpinLock> l(_lock);
_valid_rowset_ids.insert(rowset_id);
}
return rowset_id;
}
bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) {
std::lock_guard<SpinLock> l(_lock);
// if rowset_id == 1, then it is an old version rowsetid, not gc it
bool UniqueRowsetIdGenerator::id_in_use(const RowsetId& rowset_id) const {
// if rowset_id == 1, then it is an old version rowsetid, not gc it
// because old version rowset id is not generated by this code, so that not delete them
if (rowset_id.version < _version) {
return true;
}
return _valid_rowset_ids.find(rowset_id) != _valid_rowset_ids.end();
std::lock_guard<SpinLock> l(_lock);
return _valid_rowset_ids.count(rowset_id) == 1;
}
void UniqueRowsetIdGenerator::release_id(const RowsetId& rowset_id) {

View File

@ -30,16 +30,18 @@ public:
RowsetId next_id() override;
bool id_in_use(const RowsetId& rowset_id) override;
bool id_in_use(const RowsetId& rowset_id) const override;
void release_id(const RowsetId& rowset_id) override;
private:
SpinLock _lock;
UniqueId _backend_uid;
mutable SpinLock _lock;
const UniqueId _backend_uid;
const int64_t _version = 2; // modify it when create new version id generator
int64_t _inc_id = 0;
std::set<RowsetId> _valid_rowset_ids;
}; // FeBasedRowsetIdGenerator
std::atomic<int64_t> _inc_id;
std::unordered_set<RowsetId, RowsetIdHash> _valid_rowset_ids;
DISALLOW_COPY_AND_ASSIGN(UniqueRowsetIdGenerator);
}; // UniqueRowsetIdGenerator
} // namespace doris

View File

@ -20,6 +20,10 @@
#include <gtest/gtest.h>
#include <iostream>
#include "util/runtime_profile.h"
#include "util/threadpool.h"
#include "util/pretty_printer.h"
namespace doris {
class UniqueRowsetIdGeneratorTest : public testing::Test {
public:
@ -29,33 +33,38 @@ public:
};
TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) {
int64_t max_id = 1;
max_id = max_id << 56;
{
int64_t hi = 1; // version
hi <<= 56;
RowsetId rowset_id;
rowset_id.init(123);
ASSERT_TRUE(rowset_id.version == 1);
ASSERT_TRUE(rowset_id.hi == (123 + max_id));
ASSERT_TRUE(rowset_id.mi == 0);
ASSERT_TRUE(rowset_id.lo == 0);
ASSERT_STREQ("123", rowset_id.to_string().c_str());
ASSERT_EQ(rowset_id.version, 1);
ASSERT_EQ(rowset_id.hi, 123 + hi);
ASSERT_EQ(rowset_id.mi, 0);
ASSERT_EQ(rowset_id.lo, 0);
ASSERT_EQ(std::string("123"), rowset_id.to_string());
}
{
int64_t hi = 1; // version
hi <<= 56;
RowsetId rowset_id;
rowset_id.init("123");
ASSERT_TRUE(rowset_id.version == 1);
ASSERT_TRUE(rowset_id.hi == (123 + max_id));
ASSERT_TRUE(rowset_id.mi == 0);
ASSERT_TRUE(rowset_id.lo == 0);
ASSERT_STREQ("123", rowset_id.to_string().c_str());
ASSERT_EQ(rowset_id.version, 1);
ASSERT_EQ(rowset_id.hi, 123 + hi);
ASSERT_EQ(rowset_id.mi, 0);
ASSERT_EQ(rowset_id.lo, 0);
ASSERT_EQ(std::string("123"), rowset_id.to_string());
}
{
int64_t hi = 2; // version
hi <<= 56;
const std::string rowset_id_v2("0200000000000003c04f58d989cab2f2efd45faa20449189");
RowsetId rowset_id;
rowset_id.init("0200000000000003c04f58d989cab2f2efd45faa20449189");
ASSERT_TRUE(rowset_id.version == 2);
ASSERT_TRUE(rowset_id.hi == (3 + max_id));
ASSERT_STREQ("0200000000000003c04f58d989cab2f2efd45faa20449189", rowset_id.to_string().c_str());
rowset_id.init(rowset_id_v2);
ASSERT_EQ(rowset_id.version, 2);
ASSERT_EQ(rowset_id.hi, 3 + hi);
ASSERT_EQ(std::string(rowset_id_v2), rowset_id.to_string());
}
}
@ -63,41 +72,75 @@ TEST_F(UniqueRowsetIdGeneratorTest, RowsetIdFormatTest) {
TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdTest) {
UniqueId backend_uid = UniqueId::gen_uid();
UniqueId backend_uid2 = UniqueId::gen_uid();
ASSERT_TRUE(backend_uid != backend_uid2);
ASSERT_NE(backend_uid, backend_uid2);
UniqueRowsetIdGenerator id_generator(backend_uid);
UniqueRowsetIdGenerator id_generator2(backend_uid2);
{
RowsetId rowset_id1 = id_generator.next_id();
RowsetId rowset_id1 = id_generator.next_id(); // hi == 1
RowsetId rowset_id2 = id_generator2.next_id();
ASSERT_TRUE(rowset_id1.hi != rowset_id2.hi);
ASSERT_EQ(rowset_id1.hi, rowset_id2.hi);
}
{
int64_t max_id = 2;
max_id = max_id << 56;
RowsetId rowset_id = id_generator.next_id();
ASSERT_TRUE(rowset_id.hi == (1 + max_id));
ASSERT_TRUE(rowset_id.version == 2);
ASSERT_TRUE(backend_uid.lo == rowset_id.lo);
ASSERT_TRUE(backend_uid.hi == rowset_id.mi);
ASSERT_TRUE(rowset_id.hi != 0);
int64_t hi = 2; // version
hi <<= 56;
RowsetId rowset_id = id_generator.next_id(); // hi == 2
ASSERT_EQ(rowset_id.hi, hi + 2);
ASSERT_EQ(rowset_id.version, 2);
ASSERT_EQ(backend_uid.lo, rowset_id.lo);
ASSERT_EQ(backend_uid.hi, rowset_id.mi);
ASSERT_NE(rowset_id.hi, 0);
bool in_use = id_generator.id_in_use(rowset_id);
ASSERT_TRUE(in_use == true);
ASSERT_TRUE(in_use);
id_generator.release_id(rowset_id);
in_use = id_generator.id_in_use(rowset_id);
ASSERT_TRUE(in_use == false);
ASSERT_FALSE(in_use);
int64_t high = rowset_id.hi + 1;
rowset_id = id_generator.next_id();
ASSERT_TRUE(rowset_id.hi == high);
rowset_id = id_generator.next_id(); // hi == 3
ASSERT_EQ(rowset_id.hi, high);
in_use = id_generator.id_in_use(rowset_id);
ASSERT_TRUE(in_use == true);
ASSERT_TRUE(in_use);
std::string rowset_mid_str = rowset_id.to_string().substr(16,16);
std::string backend_mid_str = backend_uid.to_string().substr(0, 16);
ASSERT_STREQ(rowset_mid_str.c_str(), backend_mid_str.c_str());
ASSERT_EQ(rowset_mid_str, backend_mid_str);
}
}
TEST_F(UniqueRowsetIdGeneratorTest, GenerateIdBenchmark) {
const int kNumThreads = 8;
const int kIdPerThread = 1000000;
UniqueId backend_uid = UniqueId::gen_uid();
UniqueRowsetIdGenerator id_generator(backend_uid);
std::unique_ptr<ThreadPool> pool;
Status s = ThreadPoolBuilder("GenerateIdBenchmark")
.set_min_threads(kNumThreads)
.set_max_threads(kNumThreads)
.build(&pool);
ASSERT_TRUE(s.ok()) << s.to_string();
int64_t cost_ns = 0;
{
SCOPED_RAW_TIMER(&cost_ns);
for (int i = 0; i < kNumThreads; i++) {
ASSERT_TRUE(pool->submit_func([&id_generator]() {
for (int i = 0; i < kIdPerThread; ++i) {
id_generator.next_id();
}
}).ok());
}
pool->wait();
}
int64_t hi = 2; // version
hi <<= 56;
RowsetId last_id = id_generator.next_id();
ASSERT_EQ(last_id.hi, hi + kNumThreads * kIdPerThread + 1);
std::cout << "Generate " << kNumThreads * kIdPerThread << " rowset ids cost "
<< PrettyPrinter::print(cost_ns, TUnit::TIME_NS) << std::endl;
}
}
int main(int argc, char** argv) {

View File

@ -277,6 +277,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_meta_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/alpha_rowset_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/beta_rowset_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/rowset_converter_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/unique_rowset_id_generator_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test