[MDS] limit mds timeout us at most 30s in every maybe hung interface

This commit is contained in:
fengdeyiji
2023-11-16 05:10:50 +00:00
committed by ob-robot
parent 58c026ace0
commit 092ba0f744
5 changed files with 210 additions and 59 deletions

View File

@ -18,6 +18,7 @@
#include "storage/tx_storage/ob_ls_service.h" #include "storage/tx_storage/ob_ls_service.h"
#include "storage/compaction/ob_schedule_dag_func.h" #include "storage/compaction/ob_schedule_dag_func.h"
#include "storage/multi_data_source/ob_mds_table_merge_dag_param.h" #include "storage/multi_data_source/ob_mds_table_merge_dag_param.h"
#include "storage/tx/ob_multi_data_source.h"
namespace oceanbase namespace oceanbase
{ {
@ -26,6 +27,8 @@ namespace storage
namespace mds namespace mds
{ {
TLOCAL(transaction::NotifyType, TLOCAL_MDS_TRANS_NOTIFY_TYPE) = transaction::NotifyType::UNKNOWN;
int MdsTableBase::advance_state_to(State new_state) const int MdsTableBase::advance_state_to(State new_state) const
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;

View File

@ -24,6 +24,10 @@
namespace oceanbase namespace oceanbase
{ {
namespace transaction
{
enum class NotifyType : int64_t;
}
namespace share namespace share
{ {
class SCN; class SCN;
@ -33,6 +37,7 @@ namespace storage
class ObTabletPointer; class ObTabletPointer;
namespace mds namespace mds
{ {
extern TLOCAL(transaction::NotifyType, TLOCAL_MDS_TRANS_NOTIFY_TYPE);
template <typename K, typename V> template <typename K, typename V>
class MdsRow; class MdsRow;
template <typename K, typename V> template <typename K, typename V>

View File

@ -161,13 +161,33 @@ int MdsTableHandle::set(T &&data, MdsCtx &ctx, const int64_t lock_timeout_us)
ret = MdsTableHandleHelper<DummyKey, T>::template get_unit_id<0>(mds_table_id_, unit_id); ret = MdsTableHandleHelper<DummyKey, T>::template get_unit_id<0>(mds_table_id_, unit_id);
DummyKey dummy_key; DummyKey dummy_key;
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->set(unit_id, int64_t converted_timeout = 0;
(void*)&dummy_key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
(void*)&data, converted_timeout = lock_timeout_us;
std::is_rvalue_reference<decltype(data)>::value, } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
ctx, if (lock_timeout_us > 30_s) {// timeout no more than 30s
lock_timeout_us))) { MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version",
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us)); KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = lock_timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort",
KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->set(unit_id,
(void*)&dummy_key,
(void*)&data,
std::is_rvalue_reference<decltype(data)>::value,
ctx,
converted_timeout))) {
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
}
} }
} }
return ret; return ret;
@ -236,15 +256,35 @@ int MdsTableHandle::get_snapshot(OP &&read_op,
return read_op(*reinterpret_cast<const T*>(data)); return read_op(*reinterpret_cast<const T*>(data));
}; };
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_snapshot(unit_id, int64_t converted_timeout = 0;
(void*)&dummy_key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
function, converted_timeout = timeout_us;
snapshot, } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
read_seq, if (timeout_us > 30_s) {// timeout no more than 30s
timeout_us))) { MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(snapshot),
if (OB_SNAPSHOT_DISCARDED != ret) { K(read_seq), K(timeout_us), K(converted_timeout));
MDS_LOG(WARN, "fail to call get_snapshot", KR(ret), K(unit_id), K(snapshot), converted_timeout = 30_s;
K(read_seq), K(timeout_us)); } else {
converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_snapshot(unit_id,
(void*)&dummy_key,
function,
snapshot,
read_seq,
converted_timeout))) {
if (OB_SNAPSHOT_DISCARDED != ret) {
MDS_LOG(WARN, "fail to call get_snapshot", KR(ret), K(unit_id), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
}
} }
} }
} }
@ -268,16 +308,36 @@ int MdsTableHandle::get_by_writer(OP &&read_op,
return read_op(*reinterpret_cast<const T*>(data)); return read_op(*reinterpret_cast<const T*>(data));
}; };
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_by_writer(unit_id, int64_t converted_timeout = 0;
(void*)&dummy_key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
function, converted_timeout = timeout_us;
writer, } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
snapshot, if (timeout_us > 30_s) {// timeout no more than 30s
read_seq, MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(writer),
timeout_us))) { K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) { converted_timeout = 30_s;
MDS_LOG(WARN, "fail to call get_by_writer", KR(ret), K(unit_id), K(writer), } else {
K(snapshot), K(read_seq), K(timeout_us)); converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_by_writer(unit_id,
(void*)&dummy_key,
function,
writer,
snapshot,
read_seq,
converted_timeout))) {
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) {
MDS_LOG(WARN, "fail to call get_by_writer", KR(ret), K(unit_id), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
}
} }
} }
} }
@ -314,14 +374,34 @@ int MdsTableHandle::set(const Key &key, Value &&data, MdsCtx &ctx, const int64_t
uint8_t unit_id = INT8_MAX; uint8_t unit_id = INT8_MAX;
ret = MdsTableHandleHelper<Key, Value>::template get_unit_id<0>(mds_table_id_, unit_id); ret = MdsTableHandleHelper<Key, Value>::template get_unit_id<0>(mds_table_id_, unit_id);
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->set(unit_id, int64_t converted_timeout = 0;
(void*)&key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
(void*)&data, converted_timeout = lock_timeout_us;
std::is_rvalue_reference<Value>::value, } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
ctx, if (lock_timeout_us > 30_s) {// timeout no more than 30s
lock_timeout_us))) { MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(data), K(ctx),
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(key), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
K(lock_timeout_us)); converted_timeout = 30_s;
} else {
converted_timeout = lock_timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(data),
K(ctx), K(lock_timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->set(unit_id,
(void*)&key,
(void*)&data,
std::is_rvalue_reference<Value>::value,
ctx,
converted_timeout))) {
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(key), K(data), K(ctx),
K(lock_timeout_us), K(converted_timeout));
}
} }
} }
return ret; return ret;
@ -355,12 +435,32 @@ int MdsTableHandle::remove(const Key &key, MdsCtx &ctx, const int64_t lock_timeo
uint8_t unit_id = INT8_MAX; uint8_t unit_id = INT8_MAX;
ret = MdsTableHandleHelper<Key, Value>::template get_unit_id<0>(mds_table_id_, unit_id); ret = MdsTableHandleHelper<Key, Value>::template get_unit_id<0>(mds_table_id_, unit_id);
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->remove(unit_id, int64_t converted_timeout = 0;
(void*)&key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
ctx, converted_timeout = lock_timeout_us;
lock_timeout_us))) { } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
MDS_LOG(WARN, "fail to call remove", KR(ret), K(unit_id), K(key), K(ctx), if (lock_timeout_us > 30_s) {// timeout no more than 30s
K(lock_timeout_us)); MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(ctx),
K(lock_timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = lock_timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(ctx),
K(lock_timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->remove(unit_id,
(void*)&key,
ctx,
converted_timeout))) {
MDS_LOG(WARN, "fail to call remove", KR(ret), K(unit_id), K(key), K(ctx),
K(lock_timeout_us), K(converted_timeout));
}
} }
} }
return ret; return ret;
@ -426,15 +526,35 @@ int MdsTableHandle::get_snapshot(const Key &key,
return read_op(*reinterpret_cast<const Value*>(data)); return read_op(*reinterpret_cast<const Value*>(data));
}; };
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_snapshot(unit_id, int64_t converted_timeout = 0;
(void*)&key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
function, converted_timeout = timeout_us;
snapshot, } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
read_seq, if (timeout_us > 30_s) {// timeout no more than 30s
timeout_us))) { MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(snapshot),
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) { K(read_seq), K(timeout_us), K(converted_timeout));
MDS_LOG(WARN, "fail to call get_snapshot", KR(ret), K(unit_id), K(key), K(snapshot), converted_timeout = 30_s;
K(read_seq), K(timeout_us)); } else {
converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_snapshot(unit_id,
(void*)&key,
function,
snapshot,
read_seq,
converted_timeout))) {
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) {
MDS_LOG(WARN, "fail to call get_snapshot", KR(ret), K(unit_id), K(key), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
}
} }
} }
} }
@ -457,16 +577,36 @@ int MdsTableHandle::get_by_writer(const Key &key,
return read_op(*reinterpret_cast<const Value*>(data)); return read_op(*reinterpret_cast<const Value*>(data));
}; };
if (OB_SUCC(ret)) { if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_by_writer(unit_id, int64_t converted_timeout = 0;
(void*)&key, if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
function, converted_timeout = timeout_us;
writer, } else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
snapshot, if (timeout_us > 30_s) {// timeout no more than 30s
read_seq, MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(writer),
timeout_us))) { K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) { converted_timeout = 30_s;
MDS_LOG(WARN, "fail to call get_by_writer", KR(ret), K(unit_id), K(key), K(writer), } else {
K(snapshot), K(read_seq), K(timeout_us)); converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_by_writer(unit_id,
(void*)&key,
function,
writer,
snapshot,
read_seq,
converted_timeout))) {
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) {
MDS_LOG(WARN, "fail to call get_by_writer", KR(ret), K(unit_id), K(key), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
}
} }
} }
} }

View File

@ -220,6 +220,7 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array,
} }
} }
} else { } else {
mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = notify_type;
switch (node.type_) { switch (node.type_) {
#define NEED_GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION #define NEED_GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION
#define _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENUM_NAME) \ #define _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENUM_NAME) \
@ -281,6 +282,7 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array,
default: default:
ob_abort(); ob_abort();
} }
mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = NotifyType::UNKNOWN;
} }
if (OB_FAIL(ret)) { if (OB_FAIL(ret)) {
TRANS_LOG(WARN, "notify data source failed", KR(ret), K(node)); TRANS_LOG(WARN, "notify data source failed", KR(ret), K(node));

View File

@ -69,6 +69,7 @@ enum class ObTxDataSourceType : int64_t
enum class NotifyType : int64_t enum class NotifyType : int64_t
{ {
UNKNOWN = -1,
REGISTER_SUCC = 0, REGISTER_SUCC = 0,
ON_REDO = 1, ON_REDO = 1,
TX_END = 2, TX_END = 2,