add parameters for detecting disk warning and error
This commit is contained in:
11
deps/oblib/src/lib/io/ob_io_common.cpp
vendored
11
deps/oblib/src/lib/io/ob_io_common.cpp
vendored
@ -150,8 +150,8 @@ void ObIOConfig::set_default_value()
|
||||
cpu_high_water_level_ = DEFAULT_CPU_HIGH_WATER_LEVEL;
|
||||
write_failure_detect_interval_ = DEFAULT_WRITE_FAILURE_DETECT_INTERVAL;
|
||||
read_failure_black_list_interval_ = DEFAULT_READ_FAILURE_IN_BLACK_LIST_INTERVAL;
|
||||
retry_warn_limit_ = DEFAULT_RETRY_WARN_LIMIT;
|
||||
retry_error_limit_ = DEFAULT_RETRY_ERROR_LIMIT;
|
||||
data_storage_warning_tolerance_time_ = DEFAULT_WARNING_TOLERANCE_TIME;
|
||||
data_storage_error_tolerance_time_ = DEFAULT_ERROR_TOLERANCE_TIME;
|
||||
disk_io_thread_count_ = DEFAULT_DISK_IO_THREAD_COUNT;
|
||||
callback_thread_count_ = DEFAULT_IO_CALLBACK_THREAD_COUNT;
|
||||
large_query_io_percent_ = DEFAULT_LARGE_QUERY_IO_PERCENT;
|
||||
@ -163,7 +163,8 @@ bool ObIOConfig::is_valid() const
|
||||
return sys_io_low_percent_ >= 0 && sys_io_low_percent_ <= 100 && sys_io_high_percent_ > 0 &&
|
||||
sys_io_high_percent_ <= 100 && sys_io_low_percent_ <= sys_io_high_percent_ && user_iort_up_percent_ >= 0 &&
|
||||
cpu_high_water_level_ > 0 && write_failure_detect_interval_ > 0 && read_failure_black_list_interval_ > 0 &&
|
||||
retry_warn_limit_ > 0 && retry_error_limit_ > retry_warn_limit_ && disk_io_thread_count_ > 0 &&
|
||||
data_storage_warning_tolerance_time_ > 0 &&
|
||||
data_storage_error_tolerance_time_ >= data_storage_warning_tolerance_time_ && disk_io_thread_count_ > 0 &&
|
||||
disk_io_thread_count_ <= ObDisk::MAX_DISK_CHANNEL_CNT * 2 && disk_io_thread_count_ % 2 == 0 &&
|
||||
callback_thread_count_ > 0 && large_query_io_percent_ >= 0 && large_query_io_percent_ <= 100 &&
|
||||
data_storage_io_timeout_ms_ > 0;
|
||||
@ -177,8 +178,8 @@ void ObIOConfig::reset()
|
||||
cpu_high_water_level_ = 0;
|
||||
write_failure_detect_interval_ = 0;
|
||||
read_failure_black_list_interval_ = 0;
|
||||
retry_warn_limit_ = 0;
|
||||
retry_error_limit_ = 0;
|
||||
data_storage_warning_tolerance_time_ = 0;
|
||||
data_storage_error_tolerance_time_ = 0;
|
||||
disk_io_thread_count_ = 0;
|
||||
callback_thread_count_ = 0;
|
||||
large_query_io_percent_ = 0;
|
||||
|
||||
17
deps/oblib/src/lib/io/ob_io_common.h
vendored
17
deps/oblib/src/lib/io/ob_io_common.h
vendored
@ -98,8 +98,8 @@ public:
|
||||
static const int64_t DEFAULT_CPU_HIGH_WATER_LEVEL = 4800;
|
||||
static const int64_t DEFAULT_WRITE_FAILURE_DETECT_INTERVAL = 60 * 1000 * 1000; // 1 min
|
||||
static const int64_t DEFAULT_READ_FAILURE_IN_BLACK_LIST_INTERVAL = 300 * 1000 * 1000; // 5 min
|
||||
static const int32_t DEFAULT_RETRY_WARN_LIMIT = 2;
|
||||
static const int32_t DEFAULT_RETRY_ERROR_LIMIT = 5;
|
||||
static const int32_t DEFAULT_WARNING_TOLERANCE_TIME = 30L * 1000L * 1000L; // 30s
|
||||
static const int32_t DEFAULT_ERROR_TOLERANCE_TIME = 300L * 1000L * 1000L; // 300s
|
||||
static const int64_t DEFAULT_DISK_IO_THREAD_COUNT = 8;
|
||||
static const int64_t DEFAULT_IO_CALLBACK_THREAD_COUNT = 8;
|
||||
static const int64_t DEFAULT_LARGE_QUERY_IO_PERCENT = 0; // 0 means unlimited
|
||||
@ -113,19 +113,22 @@ public:
|
||||
bool is_valid() const;
|
||||
void reset();
|
||||
TO_STRING_KV(K_(sys_io_low_percent), K_(sys_io_high_percent), K_(user_iort_up_percent), K_(cpu_high_water_level),
|
||||
K_(write_failure_detect_interval), K_(read_failure_black_list_interval), K_(retry_warn_limit),
|
||||
K_(retry_error_limit), K_(disk_io_thread_count), K_(callback_thread_count), K_(large_query_io_percent),
|
||||
K_(data_storage_io_timeout_ms));
|
||||
K_(write_failure_detect_interval), K_(read_failure_black_list_interval), K_(data_storage_warning_tolerance_time),
|
||||
K_(data_storage_error_tolerance_time), K_(disk_io_thread_count), K_(callback_thread_count),
|
||||
K_(large_query_io_percent), K_(data_storage_io_timeout_ms));
|
||||
|
||||
public:
|
||||
// schedule related
|
||||
int64_t sys_io_low_percent_;
|
||||
int64_t sys_io_high_percent_;
|
||||
int64_t user_iort_up_percent_;
|
||||
int64_t cpu_high_water_level_;
|
||||
// diagnose related
|
||||
int64_t write_failure_detect_interval_;
|
||||
int64_t read_failure_black_list_interval_;
|
||||
int64_t retry_warn_limit_;
|
||||
int64_t retry_error_limit_;
|
||||
int64_t data_storage_warning_tolerance_time_;
|
||||
int64_t data_storage_error_tolerance_time_;
|
||||
// resource related
|
||||
int64_t disk_io_thread_count_;
|
||||
int64_t callback_thread_count_;
|
||||
int64_t large_query_io_percent_;
|
||||
|
||||
87
deps/oblib/src/lib/io/ob_io_disk.cpp
vendored
87
deps/oblib/src/lib/io/ob_io_disk.cpp
vendored
@ -41,28 +41,23 @@ void ObDiskDiagnose::reset()
|
||||
MEMSET(write_failure_event_ts_, 0, sizeof(write_failure_event_ts_));
|
||||
}
|
||||
|
||||
void ObDiskDiagnose::record_read_fail(const int64_t retry_cnt)
|
||||
void ObDiskDiagnose::record_read_fail(const int64_t diagnose_begin_ts)
|
||||
{
|
||||
const ObIOConfig io_config = OB_IO_MANAGER.get_io_config();
|
||||
// in oder to reduce the misjudgement, here is the rules:
|
||||
// watch the continuous read timeout with the exponential growth of timeout
|
||||
// 1. for more than 3 times, record as dick warning,
|
||||
// after that, this server is not allowed to be the paxos leader for a period,
|
||||
// which is indicated by READ_FAILURE_IN_BLACK_LIST_INTERVAL, usually 300s.
|
||||
//
|
||||
// 2. for more than 6 times, record as disk error
|
||||
// if the disk is confirmed normal, the administrator can reset the disk error by
|
||||
// alter system set disk valid server [=] 'ip:port'
|
||||
//
|
||||
if (retry_cnt < io_config.retry_warn_limit_) {
|
||||
// do nothing
|
||||
} else if (retry_cnt < io_config.retry_error_limit_) {
|
||||
last_read_failure_warn_ts_ = ObTimeUtility::current_time();
|
||||
} else {
|
||||
if (!is_disk_error_) {
|
||||
disk_error_begin_ts_ = ObTimeUtility::current_time();
|
||||
const int64_t current_ts = ObTimeUtility::current_time();
|
||||
if (current_ts >= diagnose_begin_ts + io_config.data_storage_warning_tolerance_time_) {
|
||||
// set disk warning and record warn_ts
|
||||
// until warn_ts + READ_FAILURE_IN_BLACK_LIST_INTERVAL, this server is not allowed to be partition leader
|
||||
last_read_failure_warn_ts_ = current_ts;
|
||||
}
|
||||
disk_error_last_ts_ = ObTimeUtility::current_time();
|
||||
if (current_ts >= diagnose_begin_ts + io_config.data_storage_error_tolerance_time_) {
|
||||
// set disk error and record error_ts
|
||||
// if the disk is confirmed normal, the administrator can reset disk status by:
|
||||
// alter system set disk valid server [=] 'ip:port'
|
||||
if (!is_disk_error_) {
|
||||
disk_error_begin_ts_ = current_ts;
|
||||
}
|
||||
disk_error_last_ts_ = current_ts;
|
||||
is_disk_error_ = true;
|
||||
COMMON_LOG(ERROR, "set_disk_error: attention!!!");
|
||||
}
|
||||
@ -119,18 +114,6 @@ int64_t ObDiskDiagnose::get_last_io_failure_ts() const
|
||||
return MAX(disk_error_last_ts_, last_read_failure_warn_ts_);
|
||||
}
|
||||
|
||||
int64_t ObDiskDiagnose::get_max_retry_cnt() const
|
||||
{
|
||||
const ObIOConfig io_config = OB_IO_MANAGER.get_io_config();
|
||||
return io_config.retry_error_limit_;
|
||||
}
|
||||
|
||||
int64_t ObDiskDiagnose::get_warn_retry_cnt() const
|
||||
{
|
||||
const ObIOConfig io_config = OB_IO_MANAGER.get_io_config();
|
||||
return io_config.retry_warn_limit_;
|
||||
}
|
||||
|
||||
/**
|
||||
* ---------------------------------------------- ObDisk ---------------------------------------------
|
||||
*/
|
||||
@ -648,33 +631,41 @@ void ObIOFaultDetector::handle(void* t)
|
||||
const ObIOInfo& info = task->info_;
|
||||
ObIOHandle handle;
|
||||
uint64_t timeout_ms = task->timeout_ms_;
|
||||
int64_t retry_cnt = 0;
|
||||
const int64_t MIN_IO_WAIT_TIME_MS = 30000; // 30s
|
||||
|
||||
for (retry_cnt = 0; retry_cnt < disk_diagnose.get_max_retry_cnt(); ++retry_cnt) {
|
||||
// remain 1s to avoid race condition for retry_black_list_interval
|
||||
const int64_t retry_black_list_interval_ms =
|
||||
OB_IO_MANAGER.get_io_config().read_failure_black_list_interval_ / 1000L - 1000L;
|
||||
// rety_io_timeout must less than black_list_interval
|
||||
const int64_t MIN_IO_RETRY_TIMEOUT_MS = min(10L * 1000L /* 10s */, retry_black_list_interval_ms);
|
||||
const int64_t MAX_IO_RETRY_TIMEOUT_MS = min(180L * 1000L /* 180s*/, retry_black_list_interval_ms);
|
||||
const int64_t diagnose_begin_ts = ObTimeUtility::current_time();
|
||||
bool is_retry_succ = false;
|
||||
while (OB_SUCC(ret) && !is_retry_succ && !disk_diagnose.is_disk_error()) {
|
||||
handle.reset();
|
||||
// timeout grows exponentially
|
||||
if (retry_cnt >= disk_diagnose.get_warn_retry_cnt() - 1) {
|
||||
timeout_ms = max(timeout_ms * 2, MIN_IO_WAIT_TIME_MS);
|
||||
} else {
|
||||
timeout_ms = timeout_ms * 2;
|
||||
}
|
||||
|
||||
if (retry_cnt == disk_diagnose.get_warn_retry_cnt()) {
|
||||
disk_diagnose.record_read_fail(retry_cnt);
|
||||
}
|
||||
|
||||
const ObIOConfig io_conf = OB_IO_MANAGER.get_io_config();
|
||||
const int64_t current_retry_ts = ObTimeUtility::current_time();
|
||||
const int64_t warn_ts = diagnose_begin_ts + io_conf.data_storage_warning_tolerance_time_;
|
||||
const int64_t error_ts = diagnose_begin_ts + io_conf.data_storage_error_tolerance_time_;
|
||||
const int64_t left_timeout_ms =
|
||||
!disk_diagnose.is_disk_warning() ? (warn_ts - current_retry_ts) / 1000 : (error_ts - current_retry_ts) / 1000;
|
||||
// timeout of retry io increase exponentially
|
||||
timeout_ms = min(left_timeout_ms, min(MAX_IO_RETRY_TIMEOUT_MS, max(timeout_ms * 2, MIN_IO_RETRY_TIMEOUT_MS)));
|
||||
if (timeout_ms > 0) {
|
||||
// do retry io
|
||||
if (disk->get_admin_status() != DISK_USING) {
|
||||
ret = OB_STATE_NOT_MATCH;
|
||||
COMMON_LOG(WARN, "check_admin_status failed, disk is deleting", K(ret), "status", disk->get_admin_status());
|
||||
break;
|
||||
} else if (OB_FAIL(OB_IO_MANAGER.read(info, handle, timeout_ms))) {
|
||||
COMMON_LOG(WARN, "ObIOManager::read failed", K(ret), K(info), K(timeout_ms));
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
break; // stop retry if success
|
||||
is_retry_succ = true;
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret) && !is_retry_succ) {
|
||||
disk_diagnose.record_read_fail(diagnose_begin_ts);
|
||||
}
|
||||
}
|
||||
disk_diagnose.record_read_fail(retry_cnt);
|
||||
|
||||
op_free(task);
|
||||
task = NULL;
|
||||
|
||||
4
deps/oblib/src/lib/io/ob_io_disk.h
vendored
4
deps/oblib/src/lib/io/ob_io_disk.h
vendored
@ -56,13 +56,11 @@ class ObDiskDiagnose {
|
||||
public:
|
||||
ObDiskDiagnose();
|
||||
virtual ~ObDiskDiagnose();
|
||||
void record_read_fail(const int64_t retry_cnt);
|
||||
void record_read_fail(const int64_t diagnose_begin_ts);
|
||||
void record_write_fail();
|
||||
bool is_disk_warning() const;
|
||||
bool is_disk_error() const;
|
||||
void reset_disk_health();
|
||||
int64_t get_max_retry_cnt() const;
|
||||
int64_t get_warn_retry_cnt() const;
|
||||
int64_t get_disk_error_begin_ts() const
|
||||
{
|
||||
return disk_error_begin_ts_;
|
||||
|
||||
@ -111,6 +111,8 @@ int ObServerReloadConfig::operator()()
|
||||
// In the 2.x version, reuse the sys_bkgd_io_timeout configuration item to indicate the data disk io timeout time
|
||||
// After version 3.1, use the data_storage_io_timeout configuration item.
|
||||
io_config.data_storage_io_timeout_ms_ = GCONF._data_storage_io_timeout / 1000L;
|
||||
io_config.data_storage_warning_tolerance_time_ = GCONF.data_storage_warning_tolerance_time;
|
||||
io_config.data_storage_error_tolerance_time_ = GCONF.data_storage_error_tolerance_time;
|
||||
if (OB_FAIL(ObIOManager::get_instance().set_io_config(io_config))) {
|
||||
real_ret = ret;
|
||||
LOG_WARN("reload io manager config fail, ", K(ret));
|
||||
|
||||
@ -302,6 +302,17 @@ bool ObConfigPartitionBalanceStrategyFuncChecker::check(const ObConfigItem& t) c
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
bool ObDataStorageErrorToleranceTimeChecker::check(const ObConfigItem& t) const
|
||||
{
|
||||
bool is_valid = false;
|
||||
int64_t value = ObConfigTimeParser::get(t.str(), is_valid);
|
||||
if (is_valid) {
|
||||
const int64_t warning_value = GCONF.data_storage_warning_tolerance_time;
|
||||
is_valid = value >= warning_value;
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
int64_t ObConfigIntParser::get(const char* str, bool& valid)
|
||||
{
|
||||
char* p_end = NULL;
|
||||
|
||||
@ -394,6 +394,18 @@ private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ObConfigPartitionBalanceStrategyFuncChecker);
|
||||
};
|
||||
|
||||
class ObDataStorageErrorToleranceTimeChecker : public ObConfigChecker {
|
||||
public:
|
||||
ObDataStorageErrorToleranceTimeChecker()
|
||||
{}
|
||||
virtual ~ObDataStorageErrorToleranceTimeChecker()
|
||||
{}
|
||||
bool check(const ObConfigItem& t) const;
|
||||
|
||||
private:
|
||||
DISABLE_COPY_ASSIGN(ObDataStorageErrorToleranceTimeChecker);
|
||||
};
|
||||
|
||||
// config item container
|
||||
class ObConfigStringKey {
|
||||
public:
|
||||
|
||||
@ -881,6 +881,15 @@ DEF_TIME(_data_storage_io_timeout, OB_CLUSTER_PARAMETER, "120s", "[5s,600s]",
|
||||
"io timeout for data storage, Range [5s,600s]. "
|
||||
"The default value is 120s",
|
||||
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||
DEF_TIME(data_storage_warning_tolerance_time, OB_CLUSTER_PARAMETER, "30s", "[10s,300s]",
|
||||
"time to tolerate disk read failure, after that, the disk status will be set warning. Range [10s,300s]. The "
|
||||
"default value is 30s",
|
||||
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||
DEF_TIME_WITH_CHECKER(data_storage_error_tolerance_time, OB_CLUSTER_PARAMETER, "300s",
|
||||
common::ObDataStorageErrorToleranceTimeChecker, "[10s,7200s]",
|
||||
"time to tolerate disk read failure, after that, the disk status will be set error. Range [10s,7200s]. The default "
|
||||
"value is 300s",
|
||||
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||
DEF_INT(data_disk_usage_limit_percentage, OB_CLUSTER_PARAMETER, "90", "[50,100]",
|
||||
"the safe use percentage of data disk"
|
||||
"Range: [50,100] in integer",
|
||||
|
||||
Reference in New Issue
Block a user