fix runtime filter memory leak

This commit is contained in:
obdev
2023-09-14 03:47:37 +00:00
committed by ob-robot
parent ce7846d119
commit 41b8832a65
7 changed files with 102 additions and 36 deletions

View File

@ -174,13 +174,17 @@ int ObJoinFilterOpInput::init_shared_msgs(
msg_ptr = nullptr; msg_ptr = nullptr;
if (OB_FAIL(PX_P2P_DH.alloc_msg(allocator, spec.rf_infos_.at(i).dh_msg_type_, msg_ptr))) { if (OB_FAIL(PX_P2P_DH.alloc_msg(allocator, spec.rf_infos_.at(i).dh_msg_type_, msg_ptr))) {
LOG_WARN("fail to alloc msg", K(ret)); LOG_WARN("fail to alloc msg", K(ret));
} else if (OB_FAIL(array_ptr->push_back(msg_ptr))) {
// push_back failed, must destory the msg immediately
// if init or construct_msg_details failed, destory msg after the for loop
msg_ptr->destroy();
allocator.free(msg_ptr);
LOG_WARN("fail to push back array ptr", K(ret));
} else if (OB_FAIL(msg_ptr->init(spec.rf_infos_.at(i).p2p_datahub_id_, } else if (OB_FAIL(msg_ptr->init(spec.rf_infos_.at(i).p2p_datahub_id_,
px_sequence_id_, 0/*task_id*/, tenant_id, timeout_ts, register_dm_info_))) { px_sequence_id_, 0/*task_id*/, tenant_id, timeout_ts, register_dm_info_))) {
LOG_WARN("fail to init msg", K(ret)); LOG_WARN("fail to init msg", K(ret));
} else if (OB_FAIL(construct_msg_details(spec, sqc_proxy, config_, *msg_ptr, sqc_count))) { } else if (OB_FAIL(construct_msg_details(spec, sqc_proxy, config_, *msg_ptr, sqc_count))) {
LOG_WARN("fail to construct msg details", K(ret), K(tenant_id)); LOG_WARN("fail to construct msg details", K(ret), K(tenant_id));
} else if (OB_FAIL(array_ptr->push_back(msg_ptr))) {
LOG_WARN("fail to push back array ptr", K(ret));
} }
} }
} }
@ -189,7 +193,16 @@ int ObJoinFilterOpInput::init_shared_msgs(
} }
if (OB_FAIL(ret) && OB_NOT_NULL(array_ptr)) { if (OB_FAIL(ret) && OB_NOT_NULL(array_ptr)) {
// if failed, destroy all msgs
for (int64_t i = 0; i < array_ptr->count(); ++i) {
if (OB_NOT_NULL(array_ptr->at(i))) {
array_ptr->at(i)->destroy();
allocator.free(array_ptr->at(i));
array_ptr->at(i) = nullptr;
}
}
array_ptr->reset(); array_ptr->reset();
allocator.free(array_ptr);
array_ptr = nullptr; array_ptr = nullptr;
} }
@ -814,6 +827,10 @@ int ObJoinFilterOp::open_join_filter_create()
if (OB_FAIL(PX_P2P_DH.alloc_msg(allocator, MY_SPEC.rf_infos_.at(i).dh_msg_type_, msg_ptr))) { if (OB_FAIL(PX_P2P_DH.alloc_msg(allocator, MY_SPEC.rf_infos_.at(i).dh_msg_type_, msg_ptr))) {
LOG_WARN("fail to alloc msg", K(ret)); LOG_WARN("fail to alloc msg", K(ret));
} else if (OB_FAIL(local_rf_msgs_.push_back(msg_ptr))) { } else if (OB_FAIL(local_rf_msgs_.push_back(msg_ptr))) {
// push_back failed, must destory the msg immediately
// if init or construct_msg_details failed, destory msg during close
msg_ptr->destroy();
allocator.free(msg_ptr);
LOG_WARN("fail to push back msg ptr", K(ret)); LOG_WARN("fail to push back msg ptr", K(ret));
} else if (OB_FAIL(msg_ptr->init(MY_SPEC.rf_infos_.at(i).p2p_datahub_id_, } else if (OB_FAIL(msg_ptr->init(MY_SPEC.rf_infos_.at(i).p2p_datahub_id_,
filter_input->px_sequence_id_, filter_input->task_id_, tenant_id, timeout_ts, filter_input->register_dm_info_))) { filter_input->px_sequence_id_, filter_input->task_id_, tenant_id, timeout_ts, filter_input->register_dm_info_))) {
@ -825,6 +842,9 @@ int ObJoinFilterOp::open_join_filter_create()
LOG_WARN("fail to push back flag", K(ret)); LOG_WARN("fail to push back flag", K(ret));
} }
} }
if (OB_FAIL(ret)) {
IGNORE_RETURN release_local_msg();
}
} else if (OB_FAIL(init_shared_msgs_from_input())) { } else if (OB_FAIL(init_shared_msgs_from_input())) {
LOG_WARN("fail to init shared msgs from input", K(ret)); LOG_WARN("fail to init shared msgs from input", K(ret));
} }
@ -902,6 +922,9 @@ int ObJoinFilterOp::init_shared_msgs_from_input()
LOG_WARN("fail to push back flag", K(ret)); LOG_WARN("fail to push back flag", K(ret));
} }
} }
if (OB_FAIL(ret)) {
IGNORE_RETURN release_local_msg();
}
} }
return ret; return ret;
} }
@ -922,6 +945,12 @@ int ObJoinFilterOp::init_local_msg_from_shared_msg(ObP2PDatahubMsgBase &msg)
if (OB_FAIL(PX_P2P_DH.alloc_msg(ctx_.get_allocator(), if (OB_FAIL(PX_P2P_DH.alloc_msg(ctx_.get_allocator(),
ObP2PDatahubMsgBase::RANGE_FILTER_MSG, range_ptr))) { ObP2PDatahubMsgBase::RANGE_FILTER_MSG, range_ptr))) {
LOG_WARN("fail to alloc msg", K(ret)); LOG_WARN("fail to alloc msg", K(ret));
} else if (OB_FAIL(local_rf_msgs_.push_back(range_ptr))) {
// push_back failed, must destory the msg immediately
// if init or construct_msg_details failed, destory msg in release_local_msg
range_ptr->destroy();
ctx_.get_allocator().free(range_ptr);
LOG_WARN("fail to push back local rf msgs", K(ret));
} else if (OB_FAIL(range_ptr->init(msg.get_p2p_datahub_id(), } else if (OB_FAIL(range_ptr->init(msg.get_p2p_datahub_id(),
msg.get_px_seq_id(), 0/*task_id*/, msg.get_tenant_id(), msg.get_px_seq_id(), 0/*task_id*/, msg.get_tenant_id(),
msg.get_timeout_ts(), filter_input->register_dm_info_))) { msg.get_timeout_ts(), filter_input->register_dm_info_))) {
@ -929,8 +958,6 @@ int ObJoinFilterOp::init_local_msg_from_shared_msg(ObP2PDatahubMsgBase &msg)
} else if (OB_FAIL(ObJoinFilterOpInput::construct_msg_details(MY_SPEC, } else if (OB_FAIL(ObJoinFilterOpInput::construct_msg_details(MY_SPEC,
sqc_proxy, filter_input->config_, *range_ptr, msg.get_msg_receive_expect_cnt()))) { sqc_proxy, filter_input->config_, *range_ptr, msg.get_msg_receive_expect_cnt()))) {
LOG_WARN("fail to construct msg details", K(ret)); LOG_WARN("fail to construct msg details", K(ret));
} else if (OB_FAIL(local_rf_msgs_.push_back(range_ptr))) {
LOG_WARN("fail to push back local rf msgs", K(ret));
} }
break; break;
} }
@ -939,18 +966,19 @@ int ObJoinFilterOp::init_local_msg_from_shared_msg(ObP2PDatahubMsgBase &msg)
if (OB_FAIL(PX_P2P_DH.alloc_msg(ctx_.get_allocator(), if (OB_FAIL(PX_P2P_DH.alloc_msg(ctx_.get_allocator(),
ObP2PDatahubMsgBase::IN_FILTER_MSG, in_ptr))) { ObP2PDatahubMsgBase::IN_FILTER_MSG, in_ptr))) {
LOG_WARN("fail to alloc msg", K(ret)); LOG_WARN("fail to alloc msg", K(ret));
} else { } else if (OB_FAIL(local_rf_msgs_.push_back(in_ptr))) {
ObRFInFilterMsg &in_msg = static_cast<ObRFInFilterMsg &>(*in_ptr); // push_back failed, must destory the msg immediately
if (OB_FAIL(in_msg.init(msg.get_p2p_datahub_id(), // if init or construct_msg_details failed, destory msg in release_local_msg
msg.get_px_seq_id(), 0/*task_id*/, msg.get_tenant_id(), in_ptr->destroy();
msg.get_timeout_ts(), filter_input->register_dm_info_))) { ctx_.get_allocator().free(in_ptr);
LOG_WARN("fail to init msg", K(ret)); LOG_WARN("fail to push back local rf msgs", K(ret));
} else if (OB_FAIL(ObJoinFilterOpInput::construct_msg_details(MY_SPEC, } else if (OB_FAIL(in_ptr->init(msg.get_p2p_datahub_id(),
sqc_proxy, filter_input->config_, *in_ptr, msg.get_msg_receive_expect_cnt()))) { msg.get_px_seq_id(), 0/*task_id*/, msg.get_tenant_id(),
LOG_WARN("fail to construct msg details", K(ret)); msg.get_timeout_ts(), filter_input->register_dm_info_))) {
} else if (OB_FAIL(local_rf_msgs_.push_back(in_ptr))) { LOG_WARN("fail to init msg", K(ret));
LOG_WARN("fail to push back local rf msgs", K(ret)); } else if (OB_FAIL(ObJoinFilterOpInput::construct_msg_details(MY_SPEC,
} sqc_proxy, filter_input->config_, *in_ptr, msg.get_msg_receive_expect_cnt()))) {
LOG_WARN("fail to construct msg details", K(ret));
} }
break; break;
} }
@ -1003,16 +1031,25 @@ int ObJoinFilterOp::release_shared_msg()
ObJoinFilterOpInput *filter_input = static_cast<ObJoinFilterOpInput*>(input_); ObJoinFilterOpInput *filter_input = static_cast<ObJoinFilterOpInput*>(input_);
if (MY_SPEC.is_shared_join_filter() && !MY_SPEC.is_shuffle_) { if (MY_SPEC.is_shared_join_filter() && !MY_SPEC.is_shuffle_) {
bool need_release = filter_input->check_release(); bool need_release = filter_input->check_release();
for (int i = 0; need_release && i < shared_rf_msgs_.count(); ++i) { if (need_release) {
if (OB_NOT_NULL(shared_rf_msgs_.at(i))) { // shared_rf_msgs_ may not init succ, so when close,
msg = nullptr; // clear filter_input->share_info_.shared_msgs_ rather than this->shared_rf_msgs_
ObP2PDhKey key; ObArray<ObP2PDatahubMsgBase *> *shared_rf_msgs =
key.p2p_datahub_id_ = reinterpret_cast<ObArray<ObP2PDatahubMsgBase *> *>(
shared_rf_msgs_.at(i)->get_p2p_datahub_id(); filter_input->share_info_.shared_msgs_);
key.task_id_ = shared_rf_msgs_.at(i)->get_task_id(); if (OB_NOT_NULL(shared_rf_msgs)) {
key.px_sequence_id_ = shared_rf_msgs_.at(i)->get_px_seq_id(); for (int i = 0; i < shared_rf_msgs->count(); ++i) {
PX_P2P_DH.erase_msg(key, msg); if (OB_NOT_NULL(shared_rf_msgs->at(i))) {
shared_rf_msgs_.at(i)->destroy(); msg = nullptr;
ObP2PDhKey key;
key.p2p_datahub_id_ =
shared_rf_msgs->at(i)->get_p2p_datahub_id();
key.task_id_ = shared_rf_msgs->at(i)->get_task_id();
key.px_sequence_id_ = shared_rf_msgs->at(i)->get_px_seq_id();
PX_P2P_DH.erase_msg(key, msg);
shared_rf_msgs->at(i)->destroy();
}
}
} }
} }
} }

View File

@ -37,6 +37,10 @@ class ObPxSQCProxy;
struct ObJoinFilterShareInfo struct ObJoinFilterShareInfo
{ {
ObJoinFilterShareInfo()
: unfinished_count_ptr_(0), ch_provider_ptr_(0), release_ref_ptr_(0), filter_ptr_(0),
shared_msgs_(0)
{}
uint64_t unfinished_count_ptr_; // send_filter引用计数, 初始值为worker个数 uint64_t unfinished_count_ptr_; // send_filter引用计数, 初始值为worker个数
uint64_t ch_provider_ptr_; // sqc_proxy, 由于序列化需要, 使用指针表示. uint64_t ch_provider_ptr_; // sqc_proxy, 由于序列化需要, 使用指针表示.
uint64_t release_ref_ptr_; // 释放内存引用计数, 初始值为worker个数. uint64_t release_ref_ptr_; // 释放内存引用计数, 初始值为worker个数.

View File

@ -149,12 +149,18 @@ void ObPxSqcHandler::release_handler(ObPxSqcHandler *sqc_handler, int &report_re
LOG_ERROR_RET(OB_INVALID_ARGUMENT, "Get null sqc handler", K(sqc_handler)); LOG_ERROR_RET(OB_INVALID_ARGUMENT, "Get null sqc handler", K(sqc_handler));
} else if (FALSE_IT(sqc_handler->release(all_released))) { } else if (FALSE_IT(sqc_handler->release(all_released))) {
} else if (all_released) { } else if (all_released) {
IGNORE_RETURN sqc_handler->check_rf_leak();
IGNORE_RETURN sqc_handler->destroy_sqc(report_ret); IGNORE_RETURN sqc_handler->destroy_sqc(report_ret);
sqc_handler->reset(); sqc_handler->reset();
op_reclaim_free(sqc_handler); op_reclaim_free(sqc_handler);
} }
} }
void ObPxSqcHandler::check_rf_leak()
{
IGNORE_RETURN sub_coord_->destroy_shared_rf_msgs();
}
int ObPxSqcHandler::init() int ObPxSqcHandler::init()
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;

View File

@ -70,6 +70,7 @@ public:
static constexpr const char *OP_LABEL = ObModIds::ObModIds::OB_SQL_SQC_HANDLER; static constexpr const char *OP_LABEL = ObModIds::ObModIds::OB_SQL_SQC_HANDLER;
static ObPxSqcHandler *get_sqc_handler(); static ObPxSqcHandler *get_sqc_handler();
static void release_handler(ObPxSqcHandler *sqc_handler, int &report_ret); static void release_handler(ObPxSqcHandler *sqc_handler, int &report_ret);
inline void check_rf_leak();
void reset() ; void reset() ;
void release(bool &all_released) { void release(bool &all_released) {
int64_t reference_count = ATOMIC_AAF(&reference_count_, -1); int64_t reference_count = ATOMIC_AAF(&reference_count_, -1);

View File

@ -710,6 +710,20 @@ int ObPxSubCoord::try_prealloc_receive_channel(ObSqcCtx &sqc_ctx, ObPxSqcMeta &s
return ret; return ret;
} }
void ObPxSubCoord::destroy_shared_rf_msgs()
{
for (int i = 0; i < all_shared_rf_msgs_.count(); ++i) {
ObArray<ObP2PDatahubMsgBase *> *array_ptr =
reinterpret_cast<ObArray<ObP2PDatahubMsgBase *> *>(all_shared_rf_msgs_.at(i));
for (int j = 0; OB_NOT_NULL(array_ptr) && j < array_ptr->count(); ++j) {
array_ptr->at(j)->destroy();
}
if (OB_NOT_NULL(array_ptr) && !array_ptr->empty()) {
array_ptr->reset();
}
}
}
// the last worker will invoke this function // the last worker will invoke this function
int ObPxSubCoord::end_process() int ObPxSubCoord::end_process()
{ {
@ -730,16 +744,7 @@ int ObPxSubCoord::end_process()
LOG_WARN("fail check task finish status", K(ret)); LOG_WARN("fail check task finish status", K(ret));
} }
} }
for (int i = 0; i < all_shared_rf_msgs_.count(); ++i) { void destroy_shared_rf_msgs();
ObArray<ObP2PDatahubMsgBase *> *array_ptr =
reinterpret_cast<ObArray<ObP2PDatahubMsgBase *> *>(all_shared_rf_msgs_.at(i));
for (int j = 0; OB_NOT_NULL(array_ptr) && j < array_ptr->count(); ++j) {
array_ptr->at(j)->destroy();
}
if (OB_NOT_NULL(array_ptr) && !array_ptr->empty()) {
array_ptr->reset();
}
}
NG_TRACE(tag3); NG_TRACE(tag3);
LOG_TRACE("exit ObPxSubCoord process", K(ret)); LOG_TRACE("exit ObPxSubCoord process", K(ret));

View File

@ -87,6 +87,7 @@ public:
const ObIArray<ObSqcTableLocationKey> &tsc_location_keys); const ObIArray<ObSqcTableLocationKey> &tsc_location_keys);
int rebuild_sqc_access_table_locations(); int rebuild_sqc_access_table_locations();
void set_is_single_tsc_leaf_dfo(bool flag) { is_single_tsc_leaf_dfo_ = flag; } void set_is_single_tsc_leaf_dfo(bool flag) { is_single_tsc_leaf_dfo_ = flag; }
void destroy_shared_rf_msgs();
private: private:
int setup_loop_proc(ObSqcCtx &sqc_ctx) const; int setup_loop_proc(ObSqcCtx &sqc_ctx) const;
int setup_op_input(ObExecContext &ctx, int setup_op_input(ObExecContext &ctx,

View File

@ -391,6 +391,10 @@ int ObRFBloomFilterMsg::deep_copy_msg(ObP2PDatahubMsgBase *&new_msg_ptr)
} else { } else {
new_msg_ptr = bf_msg; new_msg_ptr = bf_msg;
} }
if (OB_FAIL(ret) && OB_NOT_NULL(bf_msg)) {
bf_msg->destroy();
ob_free(bf_msg);
}
return ret; return ret;
} }
@ -939,6 +943,10 @@ int ObRFRangeFilterMsg::deep_copy_msg(ObP2PDatahubMsgBase *&new_msg_ptr)
new_msg_ptr = rf_msg; new_msg_ptr = rf_msg;
} }
} }
if (OB_FAIL(ret) && OB_NOT_NULL(rf_msg)) {
rf_msg->destroy();
ob_free(rf_msg);
}
return ret; return ret;
} }
@ -1331,6 +1339,10 @@ int ObRFInFilterMsg::deep_copy_msg(ObP2PDatahubMsgBase *&new_msg_ptr)
new_msg_ptr = in_msg; new_msg_ptr = in_msg;
} }
} }
if (OB_FAIL(ret) && OB_NOT_NULL(in_msg)) {
in_msg->destroy();
ob_free(in_msg);
}
return ret; return ret;
} }