[MDS] limit mds timeout us at most 30s in every maybe hung interface

This commit is contained in:
fengdeyiji
2023-11-16 05:10:50 +00:00
committed by ob-robot
parent 58c026ace0
commit 092ba0f744
5 changed files with 210 additions and 59 deletions

View File

@ -18,6 +18,7 @@
#include "storage/tx_storage/ob_ls_service.h"
#include "storage/compaction/ob_schedule_dag_func.h"
#include "storage/multi_data_source/ob_mds_table_merge_dag_param.h"
#include "storage/tx/ob_multi_data_source.h"
namespace oceanbase
{
@ -26,6 +27,8 @@ namespace storage
namespace mds
{
TLOCAL(transaction::NotifyType, TLOCAL_MDS_TRANS_NOTIFY_TYPE) = transaction::NotifyType::UNKNOWN;
int MdsTableBase::advance_state_to(State new_state) const
{
int ret = OB_SUCCESS;

View File

@ -24,6 +24,10 @@
namespace oceanbase
{
namespace transaction
{
enum class NotifyType : int64_t;
}
namespace share
{
class SCN;
@ -33,6 +37,7 @@ namespace storage
class ObTabletPointer;
namespace mds
{
extern TLOCAL(transaction::NotifyType, TLOCAL_MDS_TRANS_NOTIFY_TYPE);
template <typename K, typename V>
class MdsRow;
template <typename K, typename V>

View File

@ -160,14 +160,34 @@ int MdsTableHandle::set(T &&data, MdsCtx &ctx, const int64_t lock_timeout_us)
uint8_t unit_id = INT8_MAX;
ret = MdsTableHandleHelper<DummyKey, T>::template get_unit_id<0>(mds_table_id_, unit_id);
DummyKey dummy_key;
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = lock_timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (lock_timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version",
KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = lock_timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort",
KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->set(unit_id,
(void*)&dummy_key,
(void*)&data,
std::is_rvalue_reference<decltype(data)>::value,
ctx,
lock_timeout_us))) {
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us));
converted_timeout))) {
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(data), K(ctx), K(lock_timeout_us), K(converted_timeout));
}
}
}
return ret;
@ -235,16 +255,36 @@ int MdsTableHandle::get_snapshot(OP &&read_op,
ObFunction<int(void *)> function = [&read_op](void *data) -> int {
return read_op(*reinterpret_cast<const T*>(data));
};
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_snapshot(unit_id,
(void*)&dummy_key,
function,
snapshot,
read_seq,
timeout_us))) {
converted_timeout))) {
if (OB_SNAPSHOT_DISCARDED != ret) {
MDS_LOG(WARN, "fail to call get_snapshot", KR(ret), K(unit_id), K(snapshot),
K(read_seq), K(timeout_us));
K(read_seq), K(timeout_us), K(converted_timeout));
}
}
}
}
@ -267,6 +307,25 @@ int MdsTableHandle::get_by_writer(OP &&read_op,
ObFunction<int(void *)> function = [&read_op](void *data) -> int {
return read_op(*reinterpret_cast<const T*>(data));
};
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_by_writer(unit_id,
(void*)&dummy_key,
@ -274,10 +333,11 @@ int MdsTableHandle::get_by_writer(OP &&read_op,
writer,
snapshot,
read_seq,
timeout_us))) {
converted_timeout))) {
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) {
MDS_LOG(WARN, "fail to call get_by_writer", KR(ret), K(unit_id), K(writer),
K(snapshot), K(read_seq), K(timeout_us));
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
}
}
}
}
@ -313,15 +373,35 @@ int MdsTableHandle::set(const Key &key, Value &&data, MdsCtx &ctx, const int64_t
CHECK_MDS_TABLE_INIT();
uint8_t unit_id = INT8_MAX;
ret = MdsTableHandleHelper<Key, Value>::template get_unit_id<0>(mds_table_id_, unit_id);
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = lock_timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (lock_timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(data), K(ctx),
K(lock_timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = lock_timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(data),
K(ctx), K(lock_timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->set(unit_id,
(void*)&key,
(void*)&data,
std::is_rvalue_reference<Value>::value,
ctx,
lock_timeout_us))) {
converted_timeout))) {
MDS_LOG(WARN, "fail to call set", KR(ret), K(unit_id), K(key), K(data), K(ctx),
K(lock_timeout_us));
K(lock_timeout_us), K(converted_timeout));
}
}
}
return ret;
@ -354,13 +434,33 @@ int MdsTableHandle::remove(const Key &key, MdsCtx &ctx, const int64_t lock_timeo
CHECK_MDS_TABLE_INIT();
uint8_t unit_id = INT8_MAX;
ret = MdsTableHandleHelper<Key, Value>::template get_unit_id<0>(mds_table_id_, unit_id);
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = lock_timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (lock_timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(ctx),
K(lock_timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = lock_timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(ctx),
K(lock_timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->remove(unit_id,
(void*)&key,
ctx,
lock_timeout_us))) {
converted_timeout))) {
MDS_LOG(WARN, "fail to call remove", KR(ret), K(unit_id), K(key), K(ctx),
K(lock_timeout_us));
K(lock_timeout_us), K(converted_timeout));
}
}
}
return ret;
@ -425,16 +525,36 @@ int MdsTableHandle::get_snapshot(const Key &key,
ObFunction<int(void *)> function = [&read_op](void *data) -> int {
return read_op(*reinterpret_cast<const Value*>(data));
};
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(snapshot),
K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_snapshot(unit_id,
(void*)&key,
function,
snapshot,
read_seq,
timeout_us))) {
converted_timeout))) {
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) {
MDS_LOG(WARN, "fail to call get_snapshot", KR(ret), K(unit_id), K(key), K(snapshot),
K(read_seq), K(timeout_us));
K(read_seq), K(timeout_us), K(converted_timeout));
}
}
}
}
@ -456,6 +576,25 @@ int MdsTableHandle::get_by_writer(const Key &key,
ObFunction<int(void *)> function = [&read_op](void *data) -> int {
return read_op(*reinterpret_cast<const Value*>(data));
};
if (OB_SUCC(ret)) {
int64_t converted_timeout = 0;
if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::UNKNOWN) { // no restrict
converted_timeout = timeout_us;
} else if (TLOCAL_MDS_TRANS_NOTIFY_TYPE == transaction::NotifyType::REGISTER_SUCC) {
if (timeout_us > 30_s) {// timeout no more than 30s
MDS_LOG(INFO, "timeout ts mustn't more than 30s in current version", KR(ret), K(unit_id), K(key), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
converted_timeout = 30_s;
} else {
converted_timeout = timeout_us;
}
} else {// do mds data maybe hang operation is not allowed in other phase callback
ret = OB_OP_NOT_ALLOW;// this call may deadlock with other threads and can not be avoided
MDS_LOG(ERROR, "you mustn't do maybe hung operation in trans callbacks :"
" on_redo/before_prepare/on_prepare/on_commit/on_abort", KR(ret), K(unit_id), K(key), K(writer),
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
MDS_ASSERT(false);// abort in test environment
}
if (OB_SUCC(ret)) {
if (OB_FAIL(p_mds_table_base_->get_by_writer(unit_id,
(void*)&key,
@ -463,10 +602,11 @@ int MdsTableHandle::get_by_writer(const Key &key,
writer,
snapshot,
read_seq,
timeout_us))) {
converted_timeout))) {
if (OB_UNLIKELY(OB_SNAPSHOT_DISCARDED != ret)) {
MDS_LOG(WARN, "fail to call get_by_writer", KR(ret), K(unit_id), K(key), K(writer),
K(snapshot), K(read_seq), K(timeout_us));
K(snapshot), K(read_seq), K(timeout_us), K(converted_timeout));
}
}
}
}

View File

@ -220,6 +220,7 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array,
}
}
} else {
mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = notify_type;
switch (node.type_) {
#define NEED_GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION
#define _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENUM_NAME) \
@ -281,6 +282,7 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array,
default:
ob_abort();
}
mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = NotifyType::UNKNOWN;
}
if (OB_FAIL(ret)) {
TRANS_LOG(WARN, "notify data source failed", KR(ret), K(node));

View File

@ -69,6 +69,7 @@ enum class ObTxDataSourceType : int64_t
enum class NotifyType : int64_t
{
UNKNOWN = -1,
REGISTER_SUCC = 0,
ON_REDO = 1,
TX_END = 2,