[PALF] add enable_sync and enable_vote columns in __all_virtual_ha_diagnose

This commit is contained in:
BinChenn 2023-05-17 12:41:16 +00:00 committed by ob-robot
parent bf967f7515
commit fd4ddaac3d
12 changed files with 121 additions and 11 deletions

View File

@ -81,7 +81,7 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_degrade_upgrade)
{
oceanbase::common::ObClusterVersion::get_instance().cluster_version_ = CLUSTER_VERSION_4_1_0_0;
SET_CASE_LOG_FILE(TEST_NAME, "arb_2f1a_degrade_upgrade");
OB_LOGGER.set_log_level("DEBUG");
OB_LOGGER.set_log_level("TRACE");
MockLocCB loc_cb;
int ret = OB_SUCCESS;
PALF_LOG(INFO, "begin test_2f1a_degrade_upgrade");

View File

@ -42,6 +42,7 @@ public:
server_idx_(-1),
rebuild_palf_id_(-1),
rebuild_lsn_(),
allow_rebuild_(false),
is_inited_(false) {}
virtual ~TestRebuildCbImpl() { destroy(); }
public:
@ -80,6 +81,7 @@ public:
server_idx_ = -1;
rebuild_palf_id_ = -1;
rebuild_lsn_.reset();
allow_rebuild_ = false;
test_base_ = NULL;
}
@ -106,7 +108,7 @@ public:
ObTenantEnv::set_tenant(&tenant_base);
lib::set_thread_name("RebuildCB");
while (!has_set_stop()) {
if (rebuild_palf_id_ != -1 && rebuild_lsn_.is_valid()) {
if (true == allow_rebuild_ && rebuild_palf_id_ != -1 && rebuild_lsn_.is_valid()) {
PalfHandleImplGuard leader;
PalfHandleImplGuard *rebuild_palf;
int64_t leader_idx;
@ -133,6 +135,7 @@ public:
int64_t server_idx_;
int64_t rebuild_palf_id_;
LSN rebuild_lsn_;
bool allow_rebuild_;
bool is_inited_;
};
};
@ -170,10 +173,9 @@ TEST_F(TestObSimpleLogClusterRebuild, test_old_leader_rebuild)
PALF_LOG(INFO, "begin block net", K(id), K(leader_idx), K(follower_idx1), K(follower_idx2));
block_net(leader_idx, follower_idx1);
block_net(leader_idx, follower_idx2);
submit_log(leader, 100, id, 6 * KB);
PALF_LOG(INFO, "begin submit logs", K(id), K(leader_idx), K(follower_idx1), K(follower_idx2));
(void) submit_log(leader, 1000, leader_idx, MB);
// sleep to wait leader switching
sleep(16);
PALF_LOG(INFO, "after sleep 16s, begin get_leader", K(id), K(leader_idx), K(follower_idx1), K(follower_idx2));
int64_t new_leader_idx = 0;
unittest::PalfHandleImplGuard new_leader;
@ -206,10 +208,26 @@ TEST_F(TestObSimpleLogClusterRebuild, test_old_leader_rebuild)
EXPECT_EQ(OB_SUCCESS, rebuild_server->palf_handle_impl_->log_engine_.get_block_id_range(min_block_id, max_block_id));
PALF_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "runlin trace get_block_id_range", K(min_block_id), K(max_block_id));
// submit a cond task before unblocking net to stop truncating task
IOTaskCond cond(id, rebuild_server->palf_env_impl_->last_palf_epoch_);
LogIOWorker *io_worker = &rebuild_server->palf_env_impl_->log_io_worker_wrapper_.user_log_io_worker_;
io_worker->submit_io_task(&cond);
// after unblocking net, old leader will do rebuild
unblock_net(leader_idx, follower_idx1);
unblock_net(leader_idx, follower_idx2);
sleep(10);
sleep(5);
// is truncating, can not rebuild
if (rebuild_server->palf_handle_impl_->sw_.is_truncating_) {
PalfBaseInfo rebuild_base_info;
EXPECT_EQ(OB_SUCCESS, new_leader.palf_handle_impl_->get_base_info(rebuild_cb.rebuild_lsn_, rebuild_base_info));
EXPECT_EQ(OB_SUCCESS, rebuild_server->palf_handle_impl_->disable_sync());
EXPECT_EQ(OB_EAGAIN, rebuild_server->palf_handle_impl_->advance_base_info(rebuild_base_info, true));
}
cond.cond_.signal();
sleep(5);
rebuild_cb.allow_rebuild_ = true;
PalfBaseInfo base_info_in_leader;
PalfBaseInfo base_info_after_rebuild;
@ -255,6 +273,7 @@ TEST_F(TestObSimpleLogClusterRebuild, test_follower_rebuild)
EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list));
follower_idx = (leader_idx + 1) % 3;
TestRebuildCbImpl rebuild_cb;
rebuild_cb.allow_rebuild_ = true;
PalfRebuildCbNode rebuild_node(&rebuild_cb);
EXPECT_EQ(OB_SUCCESS, rebuild_cb.init(this, follower_idx));
EXPECT_EQ(OB_SUCCESS, rebuild_cb.start());

View File

@ -706,6 +706,7 @@ int ObLogService::diagnose_apply(const share::ObLSID &id,
return ret;
}
int ObLogService::get_io_start_time(int64_t &last_working_time)
{
int ret = OB_SUCCESS;

View File

@ -2774,6 +2774,16 @@ int LogSlidingWindow::get_majority_lsn_(const ObMemberList &member_list,
return ret;
}
bool LogSlidingWindow::is_allow_rebuild() const
{
// Caller holds palf_handle_impl's rlock.
bool bool_ret = false;
if (IS_INIT) {
bool_ret = !is_truncating_;
}
return bool_ret;
}
int LogSlidingWindow::truncate_for_rebuild(const PalfBaseInfo &palf_base_info)
{
// Caller holds palf_handle_impl's wrlock.

View File

@ -203,6 +203,7 @@ public:
virtual int ack_log(const common::ObAddr &src_server, const LSN &end_lsn);
virtual int truncate(const TruncateLogInfo &truncate_log_info, const LSN &expected_prev_lsn,
const int64_t expected_prev_log_pid);
virtual bool is_allow_rebuild() const;
virtual int truncate_for_rebuild(const PalfBaseInfo &palf_base_info);
virtual bool is_prev_log_pid_match(const int64_t log_id,
const LSN &lsn,

View File

@ -1467,7 +1467,12 @@ int PalfHandleImpl::advance_base_info(const PalfBaseInfo &palf_base_info, const
TruncatePrefixBlocksCbCtx truncate_prefix_cb_ctx(new_base_lsn);
flush_meta_cb_ctx.type_ = SNAPSHOT_META;
flush_meta_cb_ctx.base_lsn_ = new_base_lsn;
if (OB_FAIL(check_need_advance_base_info_(new_base_lsn, prev_log_info, is_rebuild))) {
// Note: can not rebuild while a truncate operation is doing, because group_buffer may be
// truncated by LogCallback again after it has been advanced by rebuild operation.
if (false == sw_.is_allow_rebuild()) {
ret = OB_EAGAIN;
PALF_LOG(WARN, "can not advance_base_info for now, try again failed", K(ret), KPC(this), K(palf_base_info), K(is_rebuild));
} else if (OB_FAIL(check_need_advance_base_info_(new_base_lsn, prev_log_info, is_rebuild))) {
PALF_LOG(WARN, "check_need_advance_base_info failed", K(ret), KPC(this), K(palf_base_info), K(is_rebuild));
} else if (OB_FAIL(log_snapshot_meta.generate(new_base_lsn, prev_log_info))) {
PALF_LOG(WARN, "LogSnapshotMeta generate failed", K(ret), KPC(this), K(palf_base_info));
@ -3873,6 +3878,8 @@ int PalfHandleImpl::diagnose(PalfDiagnoseInfo &diagnose_info) const
state_mgr_.get_role_and_state(diagnose_info.palf_role_, diagnose_info.palf_state_);
diagnose_info.palf_proposal_id_ = state_mgr_.get_proposal_id();
state_mgr_.get_election_role(diagnose_info.election_role_, diagnose_info.election_epoch_);
diagnose_info.enable_sync_ = state_mgr_.is_sync_enabled();
diagnose_info.enable_vote_ = state_mgr_.is_allow_vote();
return ret;
}

View File

@ -108,18 +108,24 @@ struct PalfDiagnoseInfo {
common::ObRole palf_role_;
palf::ObReplicaState palf_state_;
int64_t palf_proposal_id_;
bool enable_sync_;
bool enable_vote_;
void reset() {
election_role_ = FOLLOWER;
election_epoch_ = 0;
palf_role_ = FOLLOWER;
palf_state_ = ObReplicaState::INVALID_STATE;
palf_proposal_id_ = INVALID_PROPOSAL_ID;
enable_sync_ = false;
enable_vote_ = false;
}
TO_STRING_KV(K(election_role_),
K(election_epoch_),
K(palf_role_),
K(palf_state_),
K(palf_proposal_id_));
K(palf_proposal_id_),
K(enable_sync_),
K(enable_vote_));
};
struct FetchLogStat {

View File

@ -236,6 +236,17 @@ int ObAllVirtualHADiagnose::insert_stat_(storage::DiagnoseInfo &diagnose_info)
cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(
ObCharset::get_default_charset()));
break;
case ENABLE_SYNC:
cur_row_.cells_[i].set_bool(diagnose_info.palf_diagnose_info_.enable_sync_);
break;
case ENABLE_VOTE:
cur_row_.cells_[i].set_bool(diagnose_info.palf_diagnose_info_.enable_vote_);
break;
case ARB_SRV_INFO:
cur_row_.cells_[i].set_varchar(ObString(""));
cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(
ObCharset::get_default_charset()));
break;
default:
ret = OB_ERR_UNEXPECTED;
SERVER_LOG(WARN, "unkown column");

View File

@ -52,6 +52,9 @@ enum IOStatColumn
RESTORE_HANDLER_PROPOSAL_ID,
RESTORE_CONTEXT_INFO,
RESTORE_ERR_CONTEXT_INFO,
ENABLE_SYNC,
ENABLE_VOTE,
ARB_SRV_INFO,
};
class ObAllVirtualHADiagnose : public common::ObVirtualTableScannerIterator

View File

@ -8449,6 +8449,51 @@ int ObInnerTableSchema::all_virtual_ha_diagnose_schema(ObTableSchema &table_sche
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("enable_sync", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObTinyIntType, //column_type
CS_TYPE_INVALID, //column_collation_type
1, //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("enable_vote", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObTinyIntType, //column_type
CS_TYPE_INVALID, //column_collation_type
1, //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("arb_srv_info", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObVarcharType, //column_type
CS_TYPE_INVALID, //column_collation_type
1024, //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
table_schema.get_part_option().set_part_num(1);
table_schema.set_part_level(PARTITION_LEVEL_ONE);

View File

@ -11344,7 +11344,10 @@ def_table_schema(
('restore_handler_role', 'varchar:32'),
('restore_proposal_id', 'int'),
('restore_context_info', 'varchar:1024'),
('restore_err_context_info', 'varchar:1024')
('restore_err_context_info', 'varchar:1024'),
('enable_sync', 'bool'),
('enable_vote', 'bool'),
('arb_srv_info', 'varchar:1024')
],
partition_columns = ['svr_ip', 'svr_port'],

View File

@ -97,8 +97,11 @@ struct DiagnoseInfo
DiagnoseInfo() { reset(); }
~DiagnoseInfo() { reset(); }
bool is_role_sync() {
return ((palf_diagnose_info_.election_role_ == palf_diagnose_info_.palf_role_)
&& (palf_diagnose_info_.palf_role_ == log_handler_diagnose_info_.log_handler_role_));
return ((palf_diagnose_info_.election_role_ == palf_diagnose_info_.palf_role_) &&
((palf_diagnose_info_.palf_role_ == log_handler_diagnose_info_.log_handler_role_ &&
palf_diagnose_info_.palf_proposal_id_ == log_handler_diagnose_info_.log_handler_proposal_id_) ||
(palf_diagnose_info_.palf_role_ == restore_diagnose_info_.restore_role_ &&
palf_diagnose_info_.palf_proposal_id_ == restore_diagnose_info_.restore_proposal_id_)));
}
int64_t ls_id_;
logservice::LogHandlerDiagnoseInfo log_handler_diagnose_info_;
@ -117,7 +120,8 @@ struct DiagnoseInfo
K(replay_diagnose_info_),
K(gc_diagnose_info_),
K(checkpoint_diagnose_info_),
K(restore_diagnose_info_));
K(restore_diagnose_info_)
);
void reset() {
ls_id_ = -1;
log_handler_diagnose_info_.reset();