optimize das location refresh

This commit is contained in:
leslieyuchen 2023-07-03 12:42:25 +00:00 committed by ob-robot
parent d3cee584cb
commit 1d691e0d14
10 changed files with 66 additions and 36 deletions

View File

@ -711,6 +711,19 @@ bool has_exist_in_array(const T *array, const int64_t num, const T &var)
return ret;
}
template <typename ContainerT, typename ElementT>
bool element_exist(const ContainerT &container, const ElementT &var)
{
bool bret = false;
FOREACH(var_iter, container) {
if (*var_iter == var) {
bret = true;
break;
}
}
return bret;
}
template <typename T>
int add_var_to_array_no_dup(ObIArray<T> &array, const T &var, int64_t *idx = NULL)
{

View File

@ -92,7 +92,7 @@ public:
K_(property),
K_(restore_status),
K_(proposal_id));
private:
protected:
common::ObAddr server_;
common::ObRole role_;
int64_t sql_port_;

View File

@ -22,6 +22,7 @@
#include "sql/das/ob_das_utils.h"
#include "sql/ob_sql_context.h"
#include "storage/tx/wrs/ob_black_list.h"
#include "lib/rc/context.h"
namespace oceanbase
{
@ -744,6 +745,7 @@ ObDASLocationRouter::ObDASLocationRouter(ObIAllocator &allocator)
cur_errno_(OB_SUCCESS),
retry_cnt_(0),
all_tablet_list_(allocator),
succ_tablet_list_(allocator),
virtual_server_list_(allocator),
allocator_(allocator)
{
@ -1156,20 +1158,35 @@ void ObDASLocationRouter::refresh_location_cache(bool is_nonblock, int err_no)
//
// all_tablet_list_ may contain duplicate tablet_id
int ret = OB_SUCCESS;
if (OB_ISNULL(GCTX.location_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.location_service_ is null", KR(ret));
} else if (OB_FAIL(GCTX.location_service_->batch_renew_tablet_locations(
MTL_ID(),
all_tablet_list_,
err_no,
is_nonblock))) {
LOG_WARN("batch renew tablet locations failed", KR(ret),
"tenant_id", MTL_ID(), K(err_no), K(is_nonblock), K_(all_tablet_list));
} else {
LOG_INFO("LOCATION: refresh tablet location cache succ", K(err_no), K_(all_tablet_list));
lib::ContextParam param;
param.set_mem_attr(MTL_ID(), "DasRefrLoca", ObCtxIds::DEFAULT_CTX_ID)
.set_properties(lib::USE_TL_PAGE_OPTIONAL)
.set_page_size(OB_MALLOC_NORMAL_BLOCK_SIZE)
.set_ablock_size(lib::INTACT_MIDDLE_AOBJECT_SIZE);
CREATE_WITH_TEMP_CONTEXT(param) {
ObList<ObTabletID, ObIAllocator> failed_list(CURRENT_CONTEXT->get_allocator());
FOREACH_X(id_iter, all_tablet_list_, OB_SUCC(ret)) {
if (!element_exist(succ_tablet_list_, *id_iter) && !element_exist(failed_list, *id_iter)) {
if (OB_FAIL(failed_list.push_back(*id_iter))) {
LOG_WARN("store failed tablet id failed", KR(ret), K(id_iter));
}
}
}
if (OB_SUCC(ret)) {
if (OB_ISNULL(GCTX.location_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.location_service_ is null", KR(ret));
} else if (OB_FAIL(GCTX.location_service_->batch_renew_tablet_locations(MTL_ID(),
failed_list,
err_no,
is_nonblock))) {
LOG_WARN("batch renew tablet locations failed", KR(ret),
"tenant_id", MTL_ID(), K(err_no), K(is_nonblock), K(failed_list));
}
}
}
all_tablet_list_.clear();
succ_tablet_list_.clear();
}
NG_TRACE_TIMES(1, get_location_cache_end);
}

View File

@ -320,6 +320,8 @@ public:
cur_errno_ = err_no;
}
}
int save_success_task(const common::ObTabletID &succ_id)
{ return succ_tablet_list_.push_back(succ_id); }
private:
int get_vt_svr_pair(uint64_t vt_id, const VirtualSvrPair *&vt_svr_pair);
int get_vt_tablet_loc(uint64_t table_id,
@ -336,6 +338,7 @@ private:
int cur_errno_;
int64_t retry_cnt_;
ObList<common::ObTabletID, common::ObIAllocator> all_tablet_list_;
ObList<common::ObTabletID, common::ObIAllocator> succ_tablet_list_;
VirtualSvrList virtual_server_list_;
common::ObIAllocator &allocator_;
private:

View File

@ -38,7 +38,7 @@ void ObDASRetryCtrl::tablet_location_retry_proc(ObDASRef &das_ref,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet loc is nullptr", K(ret));
} else {
loc_router.refresh_location_cache(tablet_loc->tablet_id_, true, task_op.get_errcode());
loc_router.refresh_location_cache(true, task_op.get_errcode());
need_retry = true;
const ObDASTableLocMeta *loc_meta = tablet_loc->loc_meta_;
LOG_INFO("refresh tablet location cache and retry DAS task",

View File

@ -389,7 +389,7 @@ int ObDASUtils::wait_das_retry(int64_t retry_cnt)
{
int ret = OB_SUCCESS;
uint32_t timeout_factor = static_cast<uint32_t>((retry_cnt > 100) ? 100 : retry_cnt);
int64_t sleep_us = 1000L * timeout_factor > THIS_WORKER.get_timeout_remain()
int64_t sleep_us = 10000L * timeout_factor > THIS_WORKER.get_timeout_remain()
? THIS_WORKER.get_timeout_remain()
: 1000L * timeout_factor;
if (sleep_us > 0) {

View File

@ -210,6 +210,7 @@ int ObDataAccessService::clear_task_exec_env(ObDASRef &das_ref, ObIDASTaskOp &ta
if (OB_FAIL(task_op.end_das_task())) {
LOG_WARN("end das task failed", K(ret));
}
DAS_CTX(das_ref.get_exec_ctx()).get_location_router().save_cur_exec_status(OB_SUCCESS);
return ret;
}
@ -250,7 +251,8 @@ int ObDataAccessService::retry_das_task(ObDASRef &das_ref, ObIDASTaskOp &task_op
retry_func(das_ref, task_op, need_retry);
LOG_INFO("[DAS RETRY] check if need tablet level retry",
KR(task_op.errcode_), K(need_retry),
"retry_cnt", location_router.get_retry_cnt());
"retry_cnt", location_router.get_retry_cnt(),
KPC(task_op.get_tablet_loc()));
if (need_retry) {
task_op.in_part_retry_ = true;
location_router.set_last_errno(task_op.get_errcode());
@ -547,6 +549,7 @@ int ObDataAccessService::process_task_resp(ObDASRef &das_ref, const ObDASTaskRes
ObIDASTaskResult *op_result = nullptr;
ObDASExtraData *extra_result = nullptr;
const common::ObSEArray<ObIDASTaskResult*, 2> &op_results = task_resp.get_op_results();
ObDASLocationRouter &loc_router = DAS_CTX(das_ref.get_exec_ctx()).get_location_router();
ObDASUtils::log_user_error_and_warn(task_resp.get_rcode());
for (int i = 0; i < op_results.count() - 1; i++) {
// even if error happened durning iteration, we should iter to the end.
@ -626,6 +629,7 @@ int ObDataAccessService::process_task_resp(ObDASRef &das_ref, const ObDASTaskRes
} else {
// if no error happened, all tasks were executed successfully.
task_op->set_task_status(ObDasTaskStatus::FINISHED);
(void)loc_router.save_success_task(task_op->get_tablet_id());
if (OB_FAIL(task_op->state_advance())) {
LOG_WARN("failed to advance das task state.",K(ret));
}
@ -633,7 +637,7 @@ int ObDataAccessService::process_task_resp(ObDASRef &das_ref, const ObDASTaskRes
ret = COVER_SUCC(save_ret);
}
}
DAS_CTX(das_ref.get_exec_ctx()).get_location_router().save_cur_exec_status(ret);
loc_router.save_cur_exec_status(ret);
return ret;
}

View File

@ -27,8 +27,7 @@ namespace sql
ObOptTabletLoc::ObOptTabletLoc()
: partition_id_(OB_INVALID_INDEX),
first_level_part_id_(OB_INVALID_INDEX),
replica_locations_("SqlOptimLocaCac", OB_MALLOC_NORMAL_BLOCK_SIZE),
renew_time_(0)
replica_locations_("SqlOptimLocaCac", OB_MALLOC_NORMAL_BLOCK_SIZE)
{
}
@ -43,7 +42,6 @@ void ObOptTabletLoc::reset()
tablet_id_.reset();
ls_id_.reset();
replica_locations_.reset();
renew_time_ = 0;
}
int ObOptTabletLoc::assign(const ObOptTabletLoc &other)
@ -53,7 +51,6 @@ int ObOptTabletLoc::assign(const ObOptTabletLoc &other)
ls_id_ = other.ls_id_;
partition_id_ = other.partition_id_;
first_level_part_id_ = other.first_level_part_id_;
renew_time_ = other.renew_time_;
if (OB_FAIL(replica_locations_.assign(other.replica_locations_))) {
LOG_WARN("Failed to assign replica locations", K(ret));
}
@ -71,7 +68,6 @@ int ObOptTabletLoc::assign_with_only_readable_replica(const ObObjectID &partitio
first_level_part_id_ = first_level_part_id;
tablet_id_ = tablet_id;
ls_id_ = ls_location.get_ls_id();
renew_time_ = ls_location.get_renew_time();
for (int64_t i = 0; OB_SUCC(ret) && i < ls_location.get_replica_locations().count(); ++i) {
const ObLSReplicaLocation &replica_loc = ls_location.get_replica_locations().at(i);
if (ObReplicaTypeCheck::is_readable_replica(replica_loc.get_replica_type())) {
@ -103,8 +99,7 @@ int ObOptTabletLoc::assign_with_only_readable_replica(const ObObjectID &partitio
bool ObOptTabletLoc::is_valid() const
{
//为了兼容性考虑,1.4.x和2.1升级到2.2之后,pg_key可能是无效的,因此此处不检查pg_key_
return OB_INVALID_INDEX != partition_id_
&& renew_time_ >= 0;
return OB_INVALID_INDEX != partition_id_;
}
int ObOptTabletLoc::get_strong_leader(ObLSReplicaLocation &replica_location, int64_t &replica_idx) const
@ -355,7 +350,6 @@ int ObCandiTabletLoc::set_part_loc_with_only_readable_replica(const ObObjectID &
ObCandiTableLoc::ObCandiTableLoc()
: table_location_key_(OB_INVALID_ID),
ref_table_id_(OB_INVALID_ID),
direction_(UNORDERED),
candi_tablet_locs_(),
duplicate_type_(ObDuplicateType::NOT_DUPLICATE)
{
@ -369,7 +363,6 @@ void ObCandiTableLoc::reset()
{
table_location_key_ = OB_INVALID_ID;
ref_table_id_ = OB_INVALID_ID;
direction_ = UNORDERED;
candi_tablet_locs_.reset();
duplicate_type_ = ObDuplicateType::NOT_DUPLICATE;
}
@ -379,7 +372,6 @@ int ObCandiTableLoc::assign(const ObCandiTableLoc &other)
int ret = OB_SUCCESS;
table_location_key_ = other.table_location_key_;
ref_table_id_ = other.ref_table_id_;
direction_ = other.direction_;
duplicate_type_ = other.duplicate_type_;
if (OB_FAIL(candi_tablet_locs_.assign(other.candi_tablet_locs_))) {
LOG_WARN("Failed to assign phy_part_loc_info_list", K(ret));

View File

@ -63,13 +63,14 @@ public:
inline const share::ObLSID &get_ls_id() const { return ls_id_; }
inline int64_t get_renew_time() const { return renew_time_; }
inline const common::ObIArray<ObRoutePolicy::CandidateReplica> &get_replica_locations() const { return replica_locations_; }
inline common::ObIArray<ObRoutePolicy::CandidateReplica> &get_replica_locations() { return replica_locations_; }
TO_STRING_KV(K(partition_id_), K_(replica_locations), K_(renew_time));
TO_STRING_KV(K_(partition_id),
K_(tablet_id),
K_(ls_id),
K_(replica_locations));
private:
int64_t partition_id_;
@ -78,7 +79,6 @@ private:
common::ObTabletID tablet_id_;
share::ObLSID ls_id_;
ObSmartReplicaLocationArray replica_locations_;
int64_t renew_time_;
};
class ObCandiTabletLoc
@ -174,8 +174,6 @@ private:
uint64_t table_location_key_;
/* 用于获取实际的物理表ID */
uint64_t ref_table_id_;
/* location order */
ObOrderDirection direction_;
/* locations */
ObCandiTabletLocSEArray candi_tablet_locs_;
//复制表类型, 如果是复制表且未被更改则可以在分配exg算子时挑选更合适的副本

View File

@ -151,10 +151,13 @@ public:
is_filter_ = false;
}
int assign(CandidateReplica &other);
//to_string only outputs the key information that SQL execution is concerned with,
//and other information can be obtained through the corresponding virtual tables.
TO_STRING_KV(K_(server),
K_(role),
K_(sql_port),
K_(is_filter));
public:
INHERIT_TO_STRING_KV("ls_replica_location", ObLSReplicaLocation,
K(attr_), K(is_filter_), K(replica_idx_));
ReplicaAttribute attr_;
bool is_filter_;
int64_t replica_idx_;//invalid