fix the bug io_error retry unlimited

This commit is contained in:
renju96
2023-02-16 09:44:53 +00:00
committed by ob-robot
parent 59be44790d
commit a0ae58bf15
3 changed files with 23 additions and 6 deletions

View File

@ -151,11 +151,21 @@ void ObIOFlag::set_unlimited(const bool is_unlimited)
is_unlimited_ = is_unlimited;
}
void ObIOFlag::set_detect(const bool is_detect)
{
is_detect_ = is_detect;
}
bool ObIOFlag::is_unlimited() const
{
return is_unlimited_;
}
bool ObIOFlag::is_detect() const
{
return is_detect_;
}
/****************** IOCallback **********************/
ObIOCallback::ObIOCallback()
: compat_mode_(static_cast<lib::Worker::CompatMode>(lib::get_compat_mode()))

View File

@ -65,19 +65,23 @@ public:
bool is_sync() const;
void set_unlimited(const bool is_unlimited = true);
bool is_unlimited() const;
void set_detect(const bool is_detect = true);
bool is_detect() const;
TO_STRING_KV("mode", common::get_io_mode_string(static_cast<ObIOMode>(mode_)),
K(group_id_), K(wait_event_id_), K(is_sync_), K(is_unlimited_), K(reserved_));
K(group_id_), K(wait_event_id_), K(is_sync_), K(is_unlimited_), K(reserved_), K(is_detect_));
private:
static constexpr int64_t IO_MODE_BIT = 4; // read, write, append
static constexpr int64_t IO_GROUP_ID_BIT = 16; // for consumer group in resource manager
static constexpr int64_t IO_WAIT_EVENT_BIT = 32; // for performance monitor
static constexpr int64_t IO_SYNC_FLAG_BIT = 1; // indicate if the caller is waiting io finished
static constexpr int64_t IO_DETECT_FLAG_BIT = 1; // notify a retry task
static constexpr int64_t IO_UNLIMITED_FLAG_BIT = 1; // indicate if the io is unlimited
static constexpr int64_t IO_RESERVED_BIT = 64 - IO_MODE_BIT
- IO_GROUP_ID_BIT
- IO_WAIT_EVENT_BIT
- IO_SYNC_FLAG_BIT
- IO_UNLIMITED_FLAG_BIT;
- IO_UNLIMITED_FLAG_BIT
- IO_DETECT_FLAG_BIT;
union {
int64_t flag_;
@ -87,6 +91,7 @@ private:
int64_t wait_event_id_ : IO_WAIT_EVENT_BIT;
bool is_sync_ : IO_SYNC_FLAG_BIT;
bool is_unlimited_ : IO_UNLIMITED_FLAG_BIT;
bool is_detect_ : IO_DETECT_FLAG_BIT;
int64_t reserved_ : IO_RESERVED_BIT;
};
};

View File

@ -49,8 +49,8 @@ const ObIOConfig &ObIOConfig::default_config()
void ObIOConfig::set_default_value()
{
write_failure_detect_interval_ = 60 * 1000 * 1000; // 1 min
read_failure_black_list_interval_ = 300 * 1000 * 1000; // 5 min
data_storage_warning_tolerance_time_ = 30L * 1000L * 1000L; // 30s
read_failure_black_list_interval_ = 60 * 1000 * 1000; // Cooperate with the adjustment of tolerance_time to 1min
data_storage_warning_tolerance_time_ = 5L * 1000L * 1000L; // 5s, same as parameter seed
data_storage_error_tolerance_time_ = 300L * 1000L * 1000L; // 300s
disk_io_thread_count_ = 8;
data_storage_io_timeout_ms_ = 120L * 1000L; // 120s
@ -1915,8 +1915,7 @@ void ObAsyncIOChannel::get_events()
}
} else { // io failed
LOG_ERROR("io request failed", K(*req), K(system_errno), K(complete_size));
const bool need_retry = false; // wait io device to support retry policy
if (need_retry) {
if (-EAGAIN == system_errno) { //retry
if (OB_FAIL(on_full_retry(*req))) {
LOG_WARN("retry io request failed", K(ret), K(system_errno), K(*req));
}
@ -2794,6 +2793,7 @@ void ObIOFaultDetector::handle(void *task)
const int64_t LONG_AIO_TIMEOUT_MS = 30000; // 30s
RetryTask *retry_task = reinterpret_cast<RetryTask *>(task);
retry_task->io_info_.flag_.set_unlimited();
retry_task->io_info_.flag_.set_detect();
int64_t timeout_ms = retry_task->timeout_ms_;
// remain 1s to avoid race condition for retry_black_list_interval
const int64_t retry_black_list_interval_ms = io_config_.read_failure_black_list_interval_ / 1000L - 1000L;
@ -2880,6 +2880,8 @@ void ObIOFaultDetector::record_failure(const ObIORequest &req)
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("io fault detector not init", K(ret), KP(is_inited_));
} else if (req.get_flag().is_detect()) {
//ignore, do not retry
} else if (req.is_finished_ && OB_IO_ERROR != req.ret_code_.io_ret_) {
// ignore, do nothing here
} else if (req.get_flag().is_read()) {