Throttling follower tablet creation speed in a safe level
This commit is contained in:
@ -984,7 +984,7 @@ int ObStartLSRestoreTask::create_tablet_(
|
|||||||
if (!tablet_meta.is_valid() || OB_ISNULL(ls)) {
|
if (!tablet_meta.is_valid() || OB_ISNULL(ls)) {
|
||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
LOG_WARN("create tablet get invalid argument", K(ret), K(tablet_meta), KP(ls));
|
LOG_WARN("create tablet get invalid argument", K(ret), K(tablet_meta), KP(ls));
|
||||||
} else if (OB_FAIL(ObTabletCreateMdsHelper::check_create_new_tablets(1LL, true/*is_soft_limit*/))) {
|
} else if (OB_FAIL(ObTabletCreateMdsHelper::check_create_new_tablets(1LL, ObTabletCreateThrottlingLevel::SOFT))) {
|
||||||
LOG_WARN("failed to check create new tablet", K(ret), K(tablet_meta));
|
LOG_WARN("failed to check create new tablet", K(ret), K(tablet_meta));
|
||||||
} else if (OB_FAIL(ls->rebuild_create_tablet(tablet_meta, false /*keep old*/))) {
|
} else if (OB_FAIL(ls->rebuild_create_tablet(tablet_meta, false /*keep old*/))) {
|
||||||
LOG_WARN("failed to create tablet", K(ret), K(tablet_meta));
|
LOG_WARN("failed to create tablet", K(ret), K(tablet_meta));
|
||||||
|
@ -544,7 +544,7 @@ int ObStorageHATabletsBuilder::create_or_update_tablet_(
|
|||||||
} else if (ObCopyTabletStatus::TABLET_NOT_EXIST == tablet_info.status_ && tablet_info.tablet_id_.is_ls_inner_tablet()) {
|
} else if (ObCopyTabletStatus::TABLET_NOT_EXIST == tablet_info.status_ && tablet_info.tablet_id_.is_ls_inner_tablet()) {
|
||||||
ret = OB_TABLET_NOT_EXIST;
|
ret = OB_TABLET_NOT_EXIST;
|
||||||
LOG_WARN("src ls inner tablet is not exist, src ls is maybe deleted", K(ret), K(tablet_info));
|
LOG_WARN("src ls inner tablet is not exist, src ls is maybe deleted", K(ret), K(tablet_info));
|
||||||
} else if (need_check_tablet_limit && OB_FAIL(ObTabletCreateMdsHelper::check_create_new_tablets(1LL, true/*is_soft_limit*/))) {
|
} else if (need_check_tablet_limit && OB_FAIL(ObTabletCreateMdsHelper::check_create_new_tablets(1LL, ObTabletCreateThrottlingLevel::SOFT))) {
|
||||||
if (OB_TOO_MANY_PARTITIONS_ERROR == ret) {
|
if (OB_TOO_MANY_PARTITIONS_ERROR == ret) {
|
||||||
LOG_ERROR("too many partitions, failed to check create new tablet", K(ret), K(tablet_info));
|
LOG_ERROR("too many partitions, failed to check create new tablet", K(ret), K(tablet_info));
|
||||||
} else {
|
} else {
|
||||||
|
@ -183,31 +183,25 @@ int ObTabletCreateMdsHelper::on_replay(
|
|||||||
LOG_INFO("skip replay create tablet for old mds", K(ret), K(scn), "arg", PRETTY_ARG(arg));
|
LOG_INFO("skip replay create tablet for old mds", K(ret), K(scn), "arg", PRETTY_ARG(arg));
|
||||||
} else if (OB_FAIL(convert_schemas(arg))) {
|
} else if (OB_FAIL(convert_schemas(arg))) {
|
||||||
LOG_WARN("failed to convert_schemas", K(ret), "arg", PRETTY_ARG(arg));
|
LOG_WARN("failed to convert_schemas", K(ret), "arg", PRETTY_ARG(arg));
|
||||||
} else {
|
} else if (CLICK_FAIL(check_create_new_tablets(arg, true/*is_replay*/))) {
|
||||||
// Should not fail the replay process when tablet count excceed recommended value
|
LOG_WARN("failed to check create new tablets", K(ret));
|
||||||
// Only print ERROR log to notice user scale up the unit memory
|
}
|
||||||
int tmp_ret = OB_SUCCESS;
|
|
||||||
if (OB_TMP_FAIL(check_create_new_tablets(arg, true/*is_replay*/))) {
|
|
||||||
if (OB_TOO_MANY_PARTITIONS_ERROR == tmp_ret) {
|
|
||||||
LOG_ERROR("tablet count is too big, consider scale up the unit memory", K(tmp_ret));
|
|
||||||
} else {
|
|
||||||
LOG_WARN("failed to check create new tablets", K(tmp_ret));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CLICK_FAIL(replay_process(arg, scn, ctx))) {
|
if (OB_FAIL(ret)) {
|
||||||
LOG_WARN("fail to replay_process", K(ret), "arg", PRETTY_ARG(arg));
|
} else if (CLICK_FAIL(replay_process(arg, scn, ctx))) {
|
||||||
}
|
LOG_WARN("fail to replay_process", K(ret), "arg", PRETTY_ARG(arg));
|
||||||
|
}
|
||||||
|
|
||||||
if (OB_FAIL(ret)) {
|
if (OB_FAIL(ret)) {
|
||||||
handle_ret_for_replay(ret);
|
handle_ret_for_replay(ret);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ObTabletCreateMdsHelper::check_create_new_tablets(const int64_t inc_tablet_cnt, const bool is_soft_limit)
|
int ObTabletCreateMdsHelper::check_create_new_tablets(
|
||||||
|
const int64_t inc_tablet_cnt,
|
||||||
|
const ObTabletCreateThrottlingLevel level)
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
const uint64_t tenant_id = MTL_ID();
|
const uint64_t tenant_id = MTL_ID();
|
||||||
@ -222,7 +216,17 @@ int ObTabletCreateMdsHelper::check_create_new_tablets(const int64_t inc_tablet_c
|
|||||||
LOG_ERROR("get invalid tenant config", K(ret));
|
LOG_ERROR("get invalid tenant config", K(ret));
|
||||||
} else {
|
} else {
|
||||||
tablet_cnt_per_gb = tenant_config->_max_tablet_cnt_per_gb;
|
tablet_cnt_per_gb = tenant_config->_max_tablet_cnt_per_gb;
|
||||||
tablet_cnt_per_gb = !is_soft_limit ? tablet_cnt_per_gb : MAX(tablet_cnt_per_gb, 30000);
|
switch (level) {
|
||||||
|
case ObTabletCreateThrottlingLevel::SOFT:
|
||||||
|
tablet_cnt_per_gb = MAX(tablet_cnt_per_gb, 30000);
|
||||||
|
break;
|
||||||
|
case ObTabletCreateThrottlingLevel::FREE:
|
||||||
|
tablet_cnt_per_gb = MAX(tablet_cnt_per_gb, 40000);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// do nothing
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,7 +244,7 @@ int ObTabletCreateMdsHelper::check_create_new_tablets(const int64_t inc_tablet_c
|
|||||||
|
|
||||||
if (OB_UNLIKELY(cur_tablet_cnt + inc_tablet_cnt > max_tablet_cnt)) {
|
if (OB_UNLIKELY(cur_tablet_cnt + inc_tablet_cnt > max_tablet_cnt)) {
|
||||||
ret = OB_TOO_MANY_PARTITIONS_ERROR;
|
ret = OB_TOO_MANY_PARTITIONS_ERROR;
|
||||||
LOG_WARN("too many partitions of tenant", K(ret), K(tenant_id), K(is_soft_limit), K(memory_limit), K(tablet_cnt_per_gb),
|
LOG_WARN("too many partitions of tenant", K(ret), K(tenant_id), K(level), K(memory_limit), K(tablet_cnt_per_gb),
|
||||||
K(max_tablet_cnt), K(cur_tablet_cnt), K(inc_tablet_cnt));
|
K(max_tablet_cnt), K(cur_tablet_cnt), K(inc_tablet_cnt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -278,15 +282,17 @@ int ObTabletCreateMdsHelper::check_create_new_tablets(const obrpc::ObBatchCreate
|
|||||||
ret = OB_TIMEOUT;
|
ret = OB_TIMEOUT;
|
||||||
LOG_WARN("too many partitions, retry timeout", K(ret));
|
LOG_WARN("too many partitions, retry timeout", K(ret));
|
||||||
break;
|
break;
|
||||||
} else if (OB_FAIL(check_create_new_tablets(arg.get_tablet_count(), true/*is_soft_limit*/))) {
|
} else if (OB_FAIL(check_create_new_tablets(arg.get_tablet_count(),
|
||||||
|
is_replay ? ObTabletCreateThrottlingLevel::FREE : ObTabletCreateThrottlingLevel::SOFT))) {
|
||||||
if (OB_TOO_MANY_PARTITIONS_ERROR != ret) {
|
if (OB_TOO_MANY_PARTITIONS_ERROR != ret) {
|
||||||
LOG_WARN("fail to check create new tablets", K(ret));
|
LOG_WARN("fail to check create new tablets", K(ret));
|
||||||
} else {
|
} else {
|
||||||
need_wait = true;
|
need_wait = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (need_wait && !is_replay); /* only retry for on_register truncate */
|
} while (need_wait);
|
||||||
} else if (OB_FAIL(check_create_new_tablets(arg.get_tablet_count()))) {
|
} else if (OB_FAIL(check_create_new_tablets(arg.get_tablet_count(),
|
||||||
|
is_replay ? ObTabletCreateThrottlingLevel::FREE : ObTabletCreateThrottlingLevel::STRICT))) {
|
||||||
LOG_WARN("fail to create new tablets", K(ret));
|
LOG_WARN("fail to create new tablets", K(ret));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -1045,9 +1051,10 @@ int ObTabletCreateMdsHelper::set_tablet_normal_status(
|
|||||||
|
|
||||||
void ObTabletCreateMdsHelper::handle_ret_for_replay(int &ret)
|
void ObTabletCreateMdsHelper::handle_ret_for_replay(int &ret)
|
||||||
{
|
{
|
||||||
if (OB_TIMEOUT == ret) {
|
if (OB_TIMEOUT == ret || OB_TOO_MANY_PARTITIONS_ERROR == ret) {
|
||||||
|
int origin_ret = ret;
|
||||||
ret = OB_EAGAIN;
|
ret = OB_EAGAIN;
|
||||||
LOG_INFO("rewrite ret from OB_TIMEOUT to OB_EAGAIN to retry clog replay", K(ret));
|
LOG_INFO("rewrite failure to OB_EAGAIN to retry clog replay", K(ret), K(origin_ret));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace storage
|
} // namespace storage
|
||||||
|
@ -55,6 +55,14 @@ class ObLSHandle;
|
|||||||
class ObTabletHandle;
|
class ObTabletHandle;
|
||||||
class ObLSTabletService;
|
class ObLSTabletService;
|
||||||
|
|
||||||
|
enum class ObTabletCreateThrottlingLevel : uint8_t
|
||||||
|
{
|
||||||
|
STRICT = 0, // throttling by config like 1G2W, used in leader creation
|
||||||
|
SOFT = 1, // adaptive, could break config to 1G3W, used in HA scene
|
||||||
|
FREE = 2, // most free, 1G4W is the max creation speed without influcing stability
|
||||||
|
MAX
|
||||||
|
};
|
||||||
|
|
||||||
class ObTabletCreateMdsHelper
|
class ObTabletCreateMdsHelper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -78,7 +86,7 @@ public:
|
|||||||
const obrpc::ObBatchCreateTabletArg &arg,
|
const obrpc::ObBatchCreateTabletArg &arg,
|
||||||
const share::SCN &scn,
|
const share::SCN &scn,
|
||||||
mds::BufferCtx &ctx);
|
mds::BufferCtx &ctx);
|
||||||
static int check_create_new_tablets(const int64_t inc_tablet_cnt, const bool is_soft_limit = false);
|
static int check_create_new_tablets(const int64_t inc_tablet_cnt, const ObTabletCreateThrottlingLevel level);
|
||||||
private:
|
private:
|
||||||
static int check_create_new_tablets(const obrpc::ObBatchCreateTabletArg &arg, const bool is_replay = false);
|
static int check_create_new_tablets(const obrpc::ObBatchCreateTabletArg &arg, const bool is_replay = false);
|
||||||
static int check_create_arg(
|
static int check_create_arg(
|
||||||
|
Reference in New Issue
Block a user