BUGFIX: check data disk at dml interface

This commit is contained in:
obdev 2023-10-23 08:10:05 +00:00 committed by ob-robot
parent c384fdc65c
commit 4399314332
6 changed files with 136 additions and 6 deletions

View File

@ -30,6 +30,7 @@
#include "logservice/ob_log_service.h"
#include "observer/ob_server_event_history_table_operator.h"
#include "storage/slog/ob_storage_logger.h"
#include "storage/tx_storage/ob_tenant_freezer.h"
#include "share/schema/ob_multi_version_schema_service.h"
namespace oceanbase
@ -48,6 +49,7 @@ ObFailureDetector::ObFailureDetector()
has_add_data_disk_hang_event_(false),
has_add_clog_full_event_(false),
has_schema_error_(false),
has_add_disk_full_event_(false),
lock_(common::ObLatchIds::ELECTION_LOCK)
{
COORDINATOR_LOG(INFO, "ObFailureDetector constructed");
@ -126,6 +128,7 @@ void ObFailureDetector::destroy()
has_add_data_disk_hang_event_ = false;
has_add_clog_full_event_ = false;
has_schema_error_ = false;
has_add_disk_full_event_ = false;
COORDINATOR_LOG(INFO, "ObFailureDetector mtl destroy");
}
@ -166,6 +169,8 @@ void ObFailureDetector::detect_failure()
detect_palf_disk_full_();
// schema refreshed check
detect_schema_not_refreshed_();
// data disk full check
detect_data_disk_full_();
}
int ObFailureDetector::add_failure_event(const FailureEvent &event)
@ -461,6 +466,53 @@ void ObFailureDetector::detect_schema_not_refreshed_()
}
}
void ObFailureDetector::detect_data_disk_full_()
{
LC_TIME_GUARD(1_s);
int ret = OB_SUCCESS;
const int64_t now = ObTimeUtility::current_time();
ObTenantFreezer *freezer = MTL(ObTenantFreezer*);
int64_t memstore_used = 0;
const bool force_refresh = true;
bool is_disk_enough = true;
FailureEvent data_disk_full_event(FailureType::RESOURCE_NOT_ENOUGH, FailureModule::STORAGE, FailureLevel::NOTICE);
if (OB_FAIL(data_disk_full_event.set_info("data disk full event"))) {
COORDINATOR_LOG(ERROR, "data_disk_full_event set_info failed", K(ret));
} else if (OB_FAIL(freezer->get_tenant_memstore_used(memstore_used, force_refresh))) {
COORDINATOR_LOG(WARN, "get tenant memstore used failed", K(ret));
} else if (OB_FAIL(THE_IO_DEVICE->check_space_full(memstore_used)) &&
OB_SERVER_OUTOF_DISK_SPACE != ret) {
COORDINATOR_LOG(WARN, "check space full failed", K(ret));
} else if (OB_SERVER_OUTOF_DISK_SPACE == ret) {
is_disk_enough = false;
ret = OB_SUCCESS;
} else {
// do nothing
}
if (OB_FAIL(ret)) {
} else if (false == ATOMIC_LOAD(&has_add_disk_full_event_)) {
if (is_disk_enough) {
// data disk is not full, skip.
} else if (OB_FAIL(add_failure_event(data_disk_full_event))) {
COORDINATOR_LOG(ERROR, "add_failure_event failed", K(ret), K(data_disk_full_event));
} else {
ATOMIC_SET(&has_add_disk_full_event_, true);
LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "data disk is full, add failure event",
K(data_disk_full_event), K(now));
}
} else {
if (!is_disk_enough) {
// data disk is still full, cannot remove failure_event.
} else if (OB_FAIL(remove_failure_event(data_disk_full_event))) {
COORDINATOR_LOG(ERROR, "remove_failure_event failed", K(ret), K(data_disk_full_event));
} else {
ATOMIC_SET(&has_add_disk_full_event_, false);
COORDINATOR_LOG(INFO, "data disk has left space, remove failure event", K(ret), K(data_disk_full_event));
}
}
}
int ObFailureDetector::FailureEventWithRecoverOp::init(const FailureEvent &event,
const ObFunction<bool()> &recover_detect_operation)
{

View File

@ -94,6 +94,10 @@ public:
bool is_clog_disk_has_fatal_error();
bool is_data_disk_has_fatal_error();
bool is_schema_not_refreshed();
bool is_data_disk_full() const
{
return has_add_disk_full_event_;
}
private:
bool check_is_running_() const { return is_running_; }
int insert_event_to_table_(const FailureEvent &event, const ObFunction<bool()> &recover_operation, ObString info);
@ -101,6 +105,7 @@ private:
void detect_data_disk_io_failure_();
void detect_palf_disk_full_();
void detect_schema_not_refreshed_();
void detect_data_disk_full_();
private:
struct FailureEventWithRecoverOp {
int init(const FailureEvent &event, const ObFunction<bool()> &recover_detect_operation);
@ -119,6 +124,7 @@ private:
bool has_add_data_disk_hang_event_;
bool has_add_clog_full_event_;
bool has_schema_error_;
bool has_add_disk_full_event_;
ObSpinLock lock_;
};

View File

@ -14,6 +14,7 @@
#include "lib/ob_errno.h"
#include "lib/objectpool/ob_server_object_pool.h"
#include "logservice/leader_coordinator/ob_failure_detector.h"
#include "share/ob_ls_id.h"
#include "storage/ob_query_iterator_factory.h"
#include "storage/access/ob_table_scan_iterator.h"
@ -29,6 +30,7 @@ namespace oceanbase
{
using namespace common;
using namespace share;
using namespace logservice::coordinator;
namespace storage
{
@ -99,6 +101,19 @@ int ObAccessService::check_tenant_out_of_memstore_limit_(bool &is_out_of_mem)
return ret;
}
int ObAccessService::check_data_disk_full_(bool &is_full)
{
int ret = OB_SUCCESS;
ObFailureDetector* detector = MTL(ObFailureDetector*);
if (OB_ISNULL(detector)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("mtl module detector is null", K(ret), KP(detector));
} else {
is_full = detector->is_data_disk_full();
}
return ret;
}
int ObAccessService::pre_check_lock(
const share::ObLSID &ls_id,
transaction::ObTxDesc &tx_desc,
@ -546,6 +561,7 @@ int ObAccessService::check_write_allowed_(
{
int ret = OB_SUCCESS;
bool is_out_of_mem = false;
bool is_disk_full = false;
ObLS *ls = nullptr;
ObLockID lock_id;
ObLockParam lock_param;
@ -559,6 +575,11 @@ int ObAccessService::check_write_allowed_(
} else if (is_out_of_mem && !tablet_id.is_inner_tablet()) {
ret = OB_TENANT_OUT_OF_MEM;
LOG_WARN("this tenant is already out of memstore limit", K(ret), K_(tenant_id));
} else if (OB_FAIL(check_data_disk_full_(is_disk_full))) {
LOG_WARN("fail to check data disk full", K(ret));
} else if (is_disk_full) {
ret = OB_SERVER_OUTOF_DISK_SPACE;
LOG_WARN("data disk full, you should not do io now", K(ret));
} else if (OB_FAIL(get_write_store_ctx_guard_(ls_id,
dml_param.timeout_,
tx_desc,

View File

@ -198,6 +198,7 @@ public:
int64_t &memtable_row_count) const;
protected:
int check_tenant_out_of_memstore_limit_(bool &is_out_of_mem);
int check_data_disk_full_(bool &is_full);
int get_write_store_ctx_guard_(
const share::ObLSID &ls_id,

View File

@ -917,7 +917,51 @@ bool ObTenantFreezer::is_replay_pending_log_too_large(const int64_t pending_size
return bool_ret;
}
int ObTenantFreezer::get_tenant_memstore_cond(
int ObTenantFreezer::get_tenant_memstore_used(int64_t &total_memstore_used,
const bool force_refresh)
{
int ret = OB_SUCCESS;
int64_t unused_active_memstore_used = 0;
int64_t unused_memstore_freeze_trigger = 0;
int64_t unused_memstore_limit = 0;
int64_t unused_freeze_cnt = 0;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
} else if (OB_FAIL(get_tenant_memstore_cond_(unused_active_memstore_used,
total_memstore_used,
unused_memstore_freeze_trigger,
unused_memstore_limit,
unused_freeze_cnt,
force_refresh))) {
LOG_WARN("get tenant memstore used failed", K(ret));
}
return ret;
}
int ObTenantFreezer::get_tenant_memstore_cond(int64_t &active_memstore_used,
int64_t &total_memstore_used,
int64_t &memstore_freeze_trigger,
int64_t &memstore_limit,
int64_t &freeze_cnt,
const bool force_refresh)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
} else if (OB_FAIL(get_tenant_memstore_cond_(active_memstore_used,
total_memstore_used,
memstore_freeze_trigger,
memstore_limit,
freeze_cnt,
force_refresh))) {
LOG_WARN("get tenant memstore used failed", K(ret));
}
return ret;
}
int ObTenantFreezer::get_tenant_memstore_cond_(
int64_t &active_memstore_used,
int64_t &total_memstore_used,
int64_t &memstore_freeze_trigger,
@ -941,11 +985,8 @@ int ObTenantFreezer::get_tenant_memstore_cond(
memstore_freeze_trigger = 0;
memstore_limit = 0;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
} else if (!force_refresh &&
current_time - last_refresh_timestamp < MEMSTORE_USED_CACHE_REFRESH_INTERVAL) {
if (!force_refresh &&
current_time - last_refresh_timestamp < MEMSTORE_USED_CACHE_REFRESH_INTERVAL) {
active_memstore_used = last_active_memstore_used;
total_memstore_used = last_total_memstore_used;
memstore_freeze_trigger = last_memstore_freeze_trigger;

View File

@ -121,6 +121,9 @@ public:
int64_t &memstore_limit,
int64_t &freeze_cnt,
const bool force_refresh = true);
// get the tenant memstore used
int get_tenant_memstore_used(int64_t &total_memstore_used,
const bool force_refresh = true);
// get the tenant memstore limit.
int get_tenant_memstore_limit(int64_t &mem_limit);
// this is used to check if the tenant's memstore is out at user side.
@ -149,6 +152,12 @@ public:
static int64_t get_freeze_trigger_interval() { return FREEZE_TRIGGER_INTERVAL; }
bool exist_ls_freezing();
private:
int get_tenant_memstore_cond_(int64_t &active_memstore_used,
int64_t &total_memstore_used,
int64_t &memstore_freeze_trigger,
int64_t &memstore_limit,
int64_t &freeze_cnt,
const bool force_refresh = true);
int check_memstore_full_(bool &last_result,
int64_t &last_check_timestamp,
bool &is_out_of_mem,