[Fix](partition) Skip rowset partition id eq 0 smaller than config wh… (#29510)
This commit is contained in:
@ -1162,6 +1162,9 @@ DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728");
|
||||
// Default 300s, if its value <= 0, then log is disabled
|
||||
DEFINE_mInt64(enable_debug_log_timeout_secs, "0");
|
||||
|
||||
// Tolerance for the number of partition id 0 in rowset, default 0
|
||||
DEFINE_Int32(ignore_invalid_partition_id_rowset_num, "0");
|
||||
|
||||
// clang-format off
|
||||
#ifdef BE_TEST
|
||||
// test s3
|
||||
|
||||
@ -1237,6 +1237,9 @@ DECLARE_mInt64(enable_debug_log_timeout_secs);
|
||||
|
||||
DECLARE_mBool(enable_column_type_check);
|
||||
|
||||
// Tolerance for the number of partition id 0 in rowset, default 0
|
||||
DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
|
||||
|
||||
#ifdef BE_TEST
|
||||
// test s3
|
||||
DECLARE_String(test_s3_resource);
|
||||
|
||||
@ -409,6 +409,12 @@ Status DataDir::load() {
|
||||
RETURN_IF_ERROR(_meta->put(META_COLUMN_FAMILY_INDEX, key, result));
|
||||
}
|
||||
}
|
||||
|
||||
if (rowset_meta->partition_id() == 0) {
|
||||
LOG(WARNING) << "rs tablet=" << rowset_meta->tablet_id() << " rowset_id=" << rowset_id
|
||||
<< " load from meta but partition id eq 0";
|
||||
}
|
||||
|
||||
dir_rowset_metas.push_back(rowset_meta);
|
||||
return true;
|
||||
};
|
||||
@ -497,6 +503,19 @@ Status DataDir::load() {
|
||||
RETURN_IF_ERROR(
|
||||
TabletMetaManager::traverse_pending_publish(_meta, load_pending_publish_info_func));
|
||||
|
||||
int64_t rowset_partition_id_eq_0_num = 0;
|
||||
for (auto rowset_meta : dir_rowset_metas) {
|
||||
if (rowset_meta->partition_id() == 0) {
|
||||
++rowset_partition_id_eq_0_num;
|
||||
}
|
||||
}
|
||||
if (rowset_partition_id_eq_0_num > config::ignore_invalid_partition_id_rowset_num) {
|
||||
LOG(FATAL) << fmt::format(
|
||||
"roswet partition id eq 0 bigger than config {}, be exit, plz check be.INFO",
|
||||
config::ignore_invalid_partition_id_rowset_num);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// traverse rowset
|
||||
// 1. add committed rowset to txn map
|
||||
// 2. add visible rowset to tablet
|
||||
@ -513,6 +532,13 @@ Status DataDir::load() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rowset_meta->partition_id() == 0) {
|
||||
LOG(WARNING) << "skip tablet_id=" << tablet->tablet_id()
|
||||
<< " rowset: " << rowset_meta->rowset_id()
|
||||
<< " txn: " << rowset_meta->txn_id();
|
||||
continue;
|
||||
}
|
||||
|
||||
RowsetSharedPtr rowset;
|
||||
Status create_status = tablet->create_rowset(rowset_meta, &rowset);
|
||||
if (!create_status) {
|
||||
@ -528,7 +554,7 @@ Status DataDir::load() {
|
||||
rowset_meta->set_tablet_schema(tablet->tablet_schema());
|
||||
RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(),
|
||||
rowset_meta->rowset_id(),
|
||||
rowset_meta->get_rowset_pb()));
|
||||
rowset_meta->get_rowset_pb(), false));
|
||||
}
|
||||
Status commit_txn_status = _txn_manager->commit_txn(
|
||||
_meta, rowset_meta->partition_id(), rowset_meta->txn_id(),
|
||||
@ -561,7 +587,7 @@ Status DataDir::load() {
|
||||
rowset_meta->set_tablet_schema(tablet->tablet_schema());
|
||||
RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(),
|
||||
rowset_meta->rowset_id(),
|
||||
rowset_meta->get_rowset_pb()));
|
||||
rowset_meta->get_rowset_pb(), false));
|
||||
}
|
||||
Status publish_status = tablet->add_rowset(rowset);
|
||||
if (!publish_status && !publish_status.is<PUSH_VERSION_ALREADY_EXIST>()) {
|
||||
|
||||
@ -34,6 +34,7 @@
|
||||
#include "olap/olap_define.h"
|
||||
#include "olap/olap_meta.h"
|
||||
#include "olap/utils.h"
|
||||
#include "util/debug_points.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -95,15 +96,22 @@ Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const Rowse
|
||||
// return Status::InternalError("invaid partition id {} tablet {}",
|
||||
// rowset_meta_pb.partition_id(), rowset_meta_pb.tablet_id());
|
||||
}
|
||||
DBUG_EXECUTE_IF("RowsetMetaManager::save::zero_partition_id", {
|
||||
long partition_id = rowset_meta_pb.partition_id();
|
||||
auto& rs_pb = const_cast<std::decay_t<decltype(rowset_meta_pb)>&>(rowset_meta_pb);
|
||||
rs_pb.set_partition_id(0);
|
||||
LOG(WARNING) << "set debug point RowsetMetaManager::save::zero_partition_id old="
|
||||
<< partition_id << " new=" << rowset_meta_pb.DebugString();
|
||||
});
|
||||
if (enable_binlog) {
|
||||
return _save_with_binlog(meta, tablet_uid, rowset_id, rowset_meta_pb);
|
||||
} else {
|
||||
return save(meta, tablet_uid, rowset_id, rowset_meta_pb);
|
||||
return _save(meta, tablet_uid, rowset_id, rowset_meta_pb);
|
||||
}
|
||||
}
|
||||
|
||||
Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id,
|
||||
const RowsetMetaPB& rowset_meta_pb) {
|
||||
Status RowsetMetaManager::_save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id,
|
||||
const RowsetMetaPB& rowset_meta_pb) {
|
||||
std::string key =
|
||||
fmt::format("{}{}_{}", ROWSET_PREFIX, tablet_uid.to_string(), rowset_id.to_string());
|
||||
std::string value;
|
||||
@ -523,7 +531,7 @@ Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta,
|
||||
}
|
||||
RowsetId rowset_id = rowset_meta.rowset_id();
|
||||
TabletUid tablet_uid = rowset_meta.tablet_uid();
|
||||
Status status = save(meta, tablet_uid, rowset_id, rowset_meta.get_rowset_pb());
|
||||
Status status = save(meta, tablet_uid, rowset_id, rowset_meta.get_rowset_pb(), false);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@ -54,8 +54,6 @@ public:
|
||||
// TODO(Drogon): refactor save && _save_with_binlog to one, adapt to ut temperately
|
||||
static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id,
|
||||
const RowsetMetaPB& rowset_meta_pb, bool enable_binlog);
|
||||
static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id,
|
||||
const RowsetMetaPB& rowset_meta_pb);
|
||||
|
||||
static std::vector<std::string> get_binlog_filenames(OlapMeta* meta, TabletUid tablet_uid,
|
||||
std::string_view binlog_version,
|
||||
@ -83,6 +81,8 @@ public:
|
||||
static Status load_json_rowset_meta(OlapMeta* meta, const std::string& rowset_meta_path);
|
||||
|
||||
private:
|
||||
static Status _save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id,
|
||||
const RowsetMetaPB& rowset_meta_pb);
|
||||
static Status _save_with_binlog(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id,
|
||||
const RowsetMetaPB& rowset_meta_pb);
|
||||
static Status _get_rowset_binlog_metas(OlapMeta* meta, const TabletUid tablet_uid,
|
||||
|
||||
@ -832,10 +832,10 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_
|
||||
tablet_meta->set_tablet_state(TABLET_RUNNING);
|
||||
}
|
||||
|
||||
if (tablet_meta->partition_id() <= 0) {
|
||||
LOG(WARNING) << "invalid partition id " << tablet_meta->partition_id() << ", tablet "
|
||||
<< tablet_meta->tablet_id();
|
||||
if (tablet_meta->partition_id() == 0) {
|
||||
LOG(WARNING) << "tablet=" << tablet_id << " load from meta but partition id eq 0";
|
||||
}
|
||||
|
||||
TabletSharedPtr tablet = std::make_shared<Tablet>(std::move(tablet_meta), data_dir);
|
||||
|
||||
// NOTE: method load_tablet_from_meta could be called by two cases as below
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
#include "olap/olap_define.h"
|
||||
#include "olap/tablet_meta_manager.h"
|
||||
#include "olap/utils.h"
|
||||
#include "util/debug_points.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/time.h"
|
||||
#include "util/uid_util.h"
|
||||
@ -468,6 +469,16 @@ Status TabletMeta::_save_meta(DataDir* data_dir) {
|
||||
Status TabletMeta::serialize(string* meta_binary) {
|
||||
TabletMetaPB tablet_meta_pb;
|
||||
to_meta_pb(&tablet_meta_pb);
|
||||
if (tablet_meta_pb.partition_id() <= 0) {
|
||||
LOG(WARNING) << "invalid partition id " << tablet_meta_pb.partition_id() << " tablet "
|
||||
<< tablet_meta_pb.tablet_id();
|
||||
}
|
||||
DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", {
|
||||
long partition_id = tablet_meta_pb.partition_id();
|
||||
tablet_meta_pb.set_partition_id(0);
|
||||
LOG(WARNING) << "set debug point TabletMeta::serialize::zero_partition_id old="
|
||||
<< partition_id << " new=" << tablet_meta_pb.DebugString();
|
||||
});
|
||||
bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
|
||||
if (!serialize_success) {
|
||||
LOG(FATAL) << "failed to serialize meta " << tablet_id();
|
||||
|
||||
@ -327,6 +327,13 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id,
|
||||
do {
|
||||
// get tx
|
||||
std::shared_lock rdlock(_get_txn_map_lock(transaction_id));
|
||||
auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb();
|
||||
// TODO(dx): remove log after fix partition id eq 0 bug
|
||||
if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) {
|
||||
rowset_ptr->rowset_meta()->set_partition_id(partition_id);
|
||||
LOG(WARNING) << "cant get partition id from rs pb, get from func arg partition_id="
|
||||
<< partition_id;
|
||||
}
|
||||
txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
|
||||
auto it = txn_tablet_map.find(key);
|
||||
if (it == txn_tablet_map.end()) {
|
||||
@ -374,8 +381,9 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id,
|
||||
// save meta need access disk, it maybe very slow, so that it is not in global txn lock
|
||||
// it is under a single txn lock
|
||||
if (!is_recovery) {
|
||||
Status save_status = RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(),
|
||||
rowset_ptr->rowset_meta()->get_rowset_pb());
|
||||
Status save_status =
|
||||
RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(),
|
||||
rowset_ptr->rowset_meta()->get_rowset_pb(), false);
|
||||
DBUG_EXECUTE_IF("TxnManager.RowsetMetaManager.save_wait", {
|
||||
if (auto wait = dp->param<int>("duration", 0); wait > 0) {
|
||||
LOG_WARNING("TxnManager.RowsetMetaManager.save_wait").tag("wait ms", wait);
|
||||
|
||||
@ -176,7 +176,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
|
||||
st = create_rowset_files(*rs, false);
|
||||
ASSERT_TRUE(st.ok()) << st;
|
||||
st = RowsetMetaManager::save(data_dir.get_meta(), rs->rowset_meta()->tablet_uid(),
|
||||
rs->rowset_id(), rs->rowset_meta()->get_rowset_pb());
|
||||
rs->rowset_id(), rs->rowset_meta()->get_rowset_pb(), false);
|
||||
ASSERT_TRUE(st.ok()) << st;
|
||||
}
|
||||
// Prepare garbage rowset files
|
||||
|
||||
@ -106,7 +106,7 @@ TEST_F(RowsetMetaManagerTest, TestSaveAndGetAndRemove) {
|
||||
EXPECT_EQ(rowset_meta.rowset_id(), rowset_id);
|
||||
RowsetMetaPB rowset_meta_pb;
|
||||
rowset_meta.to_rowset_pb(&rowset_meta_pb);
|
||||
Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, rowset_meta_pb);
|
||||
Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, rowset_meta_pb, false);
|
||||
EXPECT_TRUE(status == Status::OK());
|
||||
EXPECT_TRUE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid, rowset_id));
|
||||
std::string json_rowset_meta_read;
|
||||
|
||||
Reference in New Issue
Block a user