fix io hung and amplification
This commit is contained in:
parent
24ef025b73
commit
451044fb83
@ -66,7 +66,17 @@ int QSchedCallback::handle(TCRequest* tc_req)
|
||||
LOG_INFO("submit_request cost too much time", K(ret), K(time_guard), K(req));
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
io_req_finish(req, ObIORetCode(ret));
|
||||
if (ret == OB_EAGAIN) {
|
||||
if (REACH_TIME_INTERVAL(1 * 1000L * 1000L)) {
|
||||
LOG_INFO("device channel eagain", K(ret));
|
||||
}
|
||||
if (OB_FAIL(req.retry_io())) {
|
||||
LOG_WARN("retry io failed", K(ret), K(req));
|
||||
io_req_finish(req, ObIORetCode(ret));
|
||||
}
|
||||
} else {
|
||||
io_req_finish(req, ObIORetCode(ret));
|
||||
}
|
||||
}
|
||||
req.dec_ref("phyqueue_dec"); // ref for io queue
|
||||
return ret;
|
||||
|
@ -1514,6 +1514,17 @@ int ObIORequest::re_prepare()
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObIORequest::retry_io()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if(OB_ISNULL(tenant_io_mgr_.get_ptr())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("tenant io mgr is null", K(ret), K(*this));
|
||||
} else if (OB_FAIL(tenant_io_mgr_.get_ptr()->retry_io(*this))) {
|
||||
LOG_WARN("retry io failed", K(ret), K(*this));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
int ObIORequest::try_alloc_buf_until_timeout(char *&io_buf)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
@ -619,6 +619,7 @@ public:
|
||||
int prepare(char *next_buffer = nullptr, int64_t next_size = 0, int64_t next_offset = 0);
|
||||
int recycle_buffer();
|
||||
int re_prepare();
|
||||
int retry_io();
|
||||
int try_alloc_buf_until_timeout(char *&io_buf);
|
||||
bool can_callback() const;
|
||||
void free_io_buffer();
|
||||
|
@ -1936,6 +1936,10 @@ int ObTenantIOManager::retry_io(ObIORequest &req)
|
||||
} else if (OB_UNLIKELY(!is_working())) {
|
||||
ret = OB_STATE_NOT_MATCH;
|
||||
LOG_WARN("tenant not working", K(ret), K(tenant_id_));
|
||||
} else if (GCONF._enable_tree_based_io_scheduler) {
|
||||
if (OB_FAIL(qsched_.schedule_request(req))) {
|
||||
LOG_WARN("retry io request failed", K(ret), K(req));
|
||||
}
|
||||
} else if (OB_FAIL(io_scheduler_->retry_request(req))) {
|
||||
LOG_WARN("retry io request into sender failed", K(ret), K(req));
|
||||
}
|
||||
|
@ -2134,7 +2134,9 @@ int ObAsyncIOChannel::submit(ObIORequest &req)
|
||||
} else if (OB_UNLIKELY(device_handle_ != req.fd_.device_handle_)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", K(ret), K(req), KP(device_handle_));
|
||||
} else if (submit_count_ >= MAX_AIO_EVENT_CNT) {
|
||||
} else if (OB_ISNULL(req.io_result_)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
} else if ((!req.get_flag().is_detect()) && (submit_count_ >= MAX_AIO_EVENT_CNT - MAX_DETECT_DISK_HUNG_IO_CNT)) {
|
||||
ret = OB_EAGAIN;
|
||||
if (REACH_TIME_INTERVAL(1000000L)) {
|
||||
LOG_WARN("too many io requests", K(ret), K(submit_count_));
|
||||
@ -2142,7 +2144,7 @@ int ObAsyncIOChannel::submit(ObIORequest &req)
|
||||
} else if (OB_UNLIKELY(current_ts > req.timeout_ts())) {
|
||||
ret = OB_TIMEOUT;
|
||||
LOG_WARN("io timeout because current time is larger than timeout timestamp", K(ret), K(current_ts), K(req));
|
||||
} else if (device_channel_->used_io_depth_ > device_channel_->max_io_depth_) {
|
||||
} else if ((!req.get_flag().is_detect()) && (device_channel_->used_io_depth_ > device_channel_->max_io_depth_ - MAX_DETECT_DISK_HUNG_IO_CNT)) {
|
||||
ret = OB_EAGAIN;
|
||||
FLOG_INFO("reach max io depth", K(ret), K(device_channel_->used_io_depth_), K(device_channel_->max_io_depth_));
|
||||
} else {
|
||||
|
@ -457,6 +457,7 @@ private:
|
||||
|
||||
private:
|
||||
static const int32_t MAX_AIO_EVENT_CNT = 512;
|
||||
static const int32_t MAX_DETECT_DISK_HUNG_IO_CNT = 10;
|
||||
static const int64_t AIO_POLLING_TIMEOUT_NS = 1000L * 1000L * 1000L - 1L; // almost 1s, for timespec_valid check
|
||||
private:
|
||||
bool is_inited_;
|
||||
|
Loading…
x
Reference in New Issue
Block a user