BUGFIX: check data disk at dml interface
This commit is contained in:
parent
c384fdc65c
commit
4399314332
@ -30,6 +30,7 @@
|
||||
#include "logservice/ob_log_service.h"
|
||||
#include "observer/ob_server_event_history_table_operator.h"
|
||||
#include "storage/slog/ob_storage_logger.h"
|
||||
#include "storage/tx_storage/ob_tenant_freezer.h"
|
||||
#include "share/schema/ob_multi_version_schema_service.h"
|
||||
|
||||
namespace oceanbase
|
||||
@ -48,6 +49,7 @@ ObFailureDetector::ObFailureDetector()
|
||||
has_add_data_disk_hang_event_(false),
|
||||
has_add_clog_full_event_(false),
|
||||
has_schema_error_(false),
|
||||
has_add_disk_full_event_(false),
|
||||
lock_(common::ObLatchIds::ELECTION_LOCK)
|
||||
{
|
||||
COORDINATOR_LOG(INFO, "ObFailureDetector constructed");
|
||||
@ -126,6 +128,7 @@ void ObFailureDetector::destroy()
|
||||
has_add_data_disk_hang_event_ = false;
|
||||
has_add_clog_full_event_ = false;
|
||||
has_schema_error_ = false;
|
||||
has_add_disk_full_event_ = false;
|
||||
COORDINATOR_LOG(INFO, "ObFailureDetector mtl destroy");
|
||||
}
|
||||
|
||||
@ -166,6 +169,8 @@ void ObFailureDetector::detect_failure()
|
||||
detect_palf_disk_full_();
|
||||
// schema refreshed check
|
||||
detect_schema_not_refreshed_();
|
||||
// data disk full check
|
||||
detect_data_disk_full_();
|
||||
}
|
||||
|
||||
int ObFailureDetector::add_failure_event(const FailureEvent &event)
|
||||
@ -461,6 +466,53 @@ void ObFailureDetector::detect_schema_not_refreshed_()
|
||||
}
|
||||
}
|
||||
|
||||
void ObFailureDetector::detect_data_disk_full_()
|
||||
{
|
||||
LC_TIME_GUARD(1_s);
|
||||
int ret = OB_SUCCESS;
|
||||
const int64_t now = ObTimeUtility::current_time();
|
||||
ObTenantFreezer *freezer = MTL(ObTenantFreezer*);
|
||||
int64_t memstore_used = 0;
|
||||
const bool force_refresh = true;
|
||||
bool is_disk_enough = true;
|
||||
FailureEvent data_disk_full_event(FailureType::RESOURCE_NOT_ENOUGH, FailureModule::STORAGE, FailureLevel::NOTICE);
|
||||
if (OB_FAIL(data_disk_full_event.set_info("data disk full event"))) {
|
||||
COORDINATOR_LOG(ERROR, "data_disk_full_event set_info failed", K(ret));
|
||||
} else if (OB_FAIL(freezer->get_tenant_memstore_used(memstore_used, force_refresh))) {
|
||||
COORDINATOR_LOG(WARN, "get tenant memstore used failed", K(ret));
|
||||
} else if (OB_FAIL(THE_IO_DEVICE->check_space_full(memstore_used)) &&
|
||||
OB_SERVER_OUTOF_DISK_SPACE != ret) {
|
||||
COORDINATOR_LOG(WARN, "check space full failed", K(ret));
|
||||
} else if (OB_SERVER_OUTOF_DISK_SPACE == ret) {
|
||||
is_disk_enough = false;
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (false == ATOMIC_LOAD(&has_add_disk_full_event_)) {
|
||||
if (is_disk_enough) {
|
||||
// data disk is not full, skip.
|
||||
} else if (OB_FAIL(add_failure_event(data_disk_full_event))) {
|
||||
COORDINATOR_LOG(ERROR, "add_failure_event failed", K(ret), K(data_disk_full_event));
|
||||
} else {
|
||||
ATOMIC_SET(&has_add_disk_full_event_, true);
|
||||
LOG_DBA_ERROR(OB_LOG_OUTOF_DISK_SPACE, "msg", "data disk is full, add failure event",
|
||||
K(data_disk_full_event), K(now));
|
||||
}
|
||||
} else {
|
||||
if (!is_disk_enough) {
|
||||
// data disk is still full, cannot remove failure_event.
|
||||
} else if (OB_FAIL(remove_failure_event(data_disk_full_event))) {
|
||||
COORDINATOR_LOG(ERROR, "remove_failure_event failed", K(ret), K(data_disk_full_event));
|
||||
} else {
|
||||
ATOMIC_SET(&has_add_disk_full_event_, false);
|
||||
COORDINATOR_LOG(INFO, "data disk has left space, remove failure event", K(ret), K(data_disk_full_event));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ObFailureDetector::FailureEventWithRecoverOp::init(const FailureEvent &event,
|
||||
const ObFunction<bool()> &recover_detect_operation)
|
||||
{
|
||||
|
@ -94,6 +94,10 @@ public:
|
||||
bool is_clog_disk_has_fatal_error();
|
||||
bool is_data_disk_has_fatal_error();
|
||||
bool is_schema_not_refreshed();
|
||||
bool is_data_disk_full() const
|
||||
{
|
||||
return has_add_disk_full_event_;
|
||||
}
|
||||
private:
|
||||
bool check_is_running_() const { return is_running_; }
|
||||
int insert_event_to_table_(const FailureEvent &event, const ObFunction<bool()> &recover_operation, ObString info);
|
||||
@ -101,6 +105,7 @@ private:
|
||||
void detect_data_disk_io_failure_();
|
||||
void detect_palf_disk_full_();
|
||||
void detect_schema_not_refreshed_();
|
||||
void detect_data_disk_full_();
|
||||
private:
|
||||
struct FailureEventWithRecoverOp {
|
||||
int init(const FailureEvent &event, const ObFunction<bool()> &recover_detect_operation);
|
||||
@ -119,6 +124,7 @@ private:
|
||||
bool has_add_data_disk_hang_event_;
|
||||
bool has_add_clog_full_event_;
|
||||
bool has_schema_error_;
|
||||
bool has_add_disk_full_event_;
|
||||
ObSpinLock lock_;
|
||||
};
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "lib/ob_errno.h"
|
||||
#include "lib/objectpool/ob_server_object_pool.h"
|
||||
#include "logservice/leader_coordinator/ob_failure_detector.h"
|
||||
#include "share/ob_ls_id.h"
|
||||
#include "storage/ob_query_iterator_factory.h"
|
||||
#include "storage/access/ob_table_scan_iterator.h"
|
||||
@ -29,6 +30,7 @@ namespace oceanbase
|
||||
{
|
||||
using namespace common;
|
||||
using namespace share;
|
||||
using namespace logservice::coordinator;
|
||||
namespace storage
|
||||
{
|
||||
|
||||
@ -99,6 +101,19 @@ int ObAccessService::check_tenant_out_of_memstore_limit_(bool &is_out_of_mem)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAccessService::check_data_disk_full_(bool &is_full)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObFailureDetector* detector = MTL(ObFailureDetector*);
|
||||
if (OB_ISNULL(detector)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("mtl module detector is null", K(ret), KP(detector));
|
||||
} else {
|
||||
is_full = detector->is_data_disk_full();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAccessService::pre_check_lock(
|
||||
const share::ObLSID &ls_id,
|
||||
transaction::ObTxDesc &tx_desc,
|
||||
@ -546,6 +561,7 @@ int ObAccessService::check_write_allowed_(
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
bool is_out_of_mem = false;
|
||||
bool is_disk_full = false;
|
||||
ObLS *ls = nullptr;
|
||||
ObLockID lock_id;
|
||||
ObLockParam lock_param;
|
||||
@ -559,6 +575,11 @@ int ObAccessService::check_write_allowed_(
|
||||
} else if (is_out_of_mem && !tablet_id.is_inner_tablet()) {
|
||||
ret = OB_TENANT_OUT_OF_MEM;
|
||||
LOG_WARN("this tenant is already out of memstore limit", K(ret), K_(tenant_id));
|
||||
} else if (OB_FAIL(check_data_disk_full_(is_disk_full))) {
|
||||
LOG_WARN("fail to check data disk full", K(ret));
|
||||
} else if (is_disk_full) {
|
||||
ret = OB_SERVER_OUTOF_DISK_SPACE;
|
||||
LOG_WARN("data disk full, you should not do io now", K(ret));
|
||||
} else if (OB_FAIL(get_write_store_ctx_guard_(ls_id,
|
||||
dml_param.timeout_,
|
||||
tx_desc,
|
||||
|
@ -198,6 +198,7 @@ public:
|
||||
int64_t &memtable_row_count) const;
|
||||
protected:
|
||||
int check_tenant_out_of_memstore_limit_(bool &is_out_of_mem);
|
||||
int check_data_disk_full_(bool &is_full);
|
||||
|
||||
int get_write_store_ctx_guard_(
|
||||
const share::ObLSID &ls_id,
|
||||
|
@ -917,7 +917,51 @@ bool ObTenantFreezer::is_replay_pending_log_too_large(const int64_t pending_size
|
||||
return bool_ret;
|
||||
}
|
||||
|
||||
int ObTenantFreezer::get_tenant_memstore_cond(
|
||||
int ObTenantFreezer::get_tenant_memstore_used(int64_t &total_memstore_used,
|
||||
const bool force_refresh)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t unused_active_memstore_used = 0;
|
||||
int64_t unused_memstore_freeze_trigger = 0;
|
||||
int64_t unused_memstore_limit = 0;
|
||||
int64_t unused_freeze_cnt = 0;
|
||||
if (!is_inited_) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
|
||||
} else if (OB_FAIL(get_tenant_memstore_cond_(unused_active_memstore_used,
|
||||
total_memstore_used,
|
||||
unused_memstore_freeze_trigger,
|
||||
unused_memstore_limit,
|
||||
unused_freeze_cnt,
|
||||
force_refresh))) {
|
||||
LOG_WARN("get tenant memstore used failed", K(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantFreezer::get_tenant_memstore_cond(int64_t &active_memstore_used,
|
||||
int64_t &total_memstore_used,
|
||||
int64_t &memstore_freeze_trigger,
|
||||
int64_t &memstore_limit,
|
||||
int64_t &freeze_cnt,
|
||||
const bool force_refresh)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (!is_inited_) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
|
||||
} else if (OB_FAIL(get_tenant_memstore_cond_(active_memstore_used,
|
||||
total_memstore_used,
|
||||
memstore_freeze_trigger,
|
||||
memstore_limit,
|
||||
freeze_cnt,
|
||||
force_refresh))) {
|
||||
LOG_WARN("get tenant memstore used failed", K(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTenantFreezer::get_tenant_memstore_cond_(
|
||||
int64_t &active_memstore_used,
|
||||
int64_t &total_memstore_used,
|
||||
int64_t &memstore_freeze_trigger,
|
||||
@ -941,11 +985,8 @@ int ObTenantFreezer::get_tenant_memstore_cond(
|
||||
memstore_freeze_trigger = 0;
|
||||
memstore_limit = 0;
|
||||
|
||||
if (!is_inited_) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("[TenantFreezer] tenant manager not init", KR(ret));
|
||||
} else if (!force_refresh &&
|
||||
current_time - last_refresh_timestamp < MEMSTORE_USED_CACHE_REFRESH_INTERVAL) {
|
||||
if (!force_refresh &&
|
||||
current_time - last_refresh_timestamp < MEMSTORE_USED_CACHE_REFRESH_INTERVAL) {
|
||||
active_memstore_used = last_active_memstore_used;
|
||||
total_memstore_used = last_total_memstore_used;
|
||||
memstore_freeze_trigger = last_memstore_freeze_trigger;
|
||||
|
@ -121,6 +121,9 @@ public:
|
||||
int64_t &memstore_limit,
|
||||
int64_t &freeze_cnt,
|
||||
const bool force_refresh = true);
|
||||
// get the tenant memstore used
|
||||
int get_tenant_memstore_used(int64_t &total_memstore_used,
|
||||
const bool force_refresh = true);
|
||||
// get the tenant memstore limit.
|
||||
int get_tenant_memstore_limit(int64_t &mem_limit);
|
||||
// this is used to check if the tenant's memstore is out at user side.
|
||||
@ -149,6 +152,12 @@ public:
|
||||
static int64_t get_freeze_trigger_interval() { return FREEZE_TRIGGER_INTERVAL; }
|
||||
bool exist_ls_freezing();
|
||||
private:
|
||||
int get_tenant_memstore_cond_(int64_t &active_memstore_used,
|
||||
int64_t &total_memstore_used,
|
||||
int64_t &memstore_freeze_trigger,
|
||||
int64_t &memstore_limit,
|
||||
int64_t &freeze_cnt,
|
||||
const bool force_refresh = true);
|
||||
int check_memstore_full_(bool &last_result,
|
||||
int64_t &last_check_timestamp,
|
||||
bool &is_out_of_mem,
|
||||
|
Loading…
x
Reference in New Issue
Block a user