Improve epoll error handling.
The main change is to remove sockets from epoll if there are no requested events, which happens when a socket is considered closed (due to an error or otherwise). This prevents a busy loop when a socket is an error condition where it will constantly be signaled, but not deleted by higher level code. Other related changes: * Set DE_CLOSE on errors regardless of whether the socket is readable or writable. * Don't set DE_ACCEPT on errors. * Handle getsockopt(SO_ERROR) errors. * In IsDescriptorClosed: * Retry recv on EINTR. * Treat ECONNABORTED and EPIPE as errors. Original patch contributed by andrey.semashev@gmail.com. Bug: webrtc:11124 Change-Id: I67f33213efc1418b1ffc8f4867f606b7f8dc4ece Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/235863 Commit-Queue: Taylor Brandstetter <deadbeef@webrtc.org> Reviewed-by: Niels Moller <nisse@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35300}
This commit is contained in:
committed by
WebRTC LUCI CQ
parent
83d667925f
commit
3041eb21e9
@ -719,7 +719,11 @@ bool SocketDispatcher::IsDescriptorClosed() {
|
||||
// from readability. So test on each readable call. Is this
|
||||
// inefficient? Probably.
|
||||
char ch;
|
||||
ssize_t res = ::recv(s_, &ch, 1, MSG_PEEK);
|
||||
ssize_t res;
|
||||
// Retry if the system call was interrupted.
|
||||
do {
|
||||
res = ::recv(s_, &ch, 1, MSG_PEEK);
|
||||
} while (res < 0 && errno == EINTR);
|
||||
if (res > 0) {
|
||||
// Data available, so not closed.
|
||||
return false;
|
||||
@ -730,13 +734,20 @@ bool SocketDispatcher::IsDescriptorClosed() {
|
||||
switch (errno) {
|
||||
// Returned if we've already closed s_.
|
||||
case EBADF:
|
||||
// This is dangerous: if we keep attempting to access a FD after close,
|
||||
// it could be reopened by something else making us think it's still
|
||||
// open. Note that this is only a DCHECK.
|
||||
RTC_NOTREACHED();
|
||||
return true;
|
||||
// Returned during ungraceful peer shutdown.
|
||||
case ECONNRESET:
|
||||
return true;
|
||||
case ECONNABORTED:
|
||||
return true;
|
||||
case EPIPE:
|
||||
return true;
|
||||
// The normal blocking error; don't log anything.
|
||||
case EWOULDBLOCK:
|
||||
// Interrupted system call.
|
||||
case EINTR:
|
||||
return false;
|
||||
default:
|
||||
// Assume that all other errors are just blocking errors, meaning the
|
||||
@ -1172,16 +1183,29 @@ bool PhysicalSocketServer::Wait(int cmsWait, bool process_io) {
|
||||
return WaitSelect(cmsWait, process_io);
|
||||
}
|
||||
|
||||
// `error_event` is true if we are responding to an event where we know an
|
||||
// error has occurred, which is possible with the poll/epoll implementations
|
||||
// but not the select implementation.
|
||||
//
|
||||
// `check_error` is true if there is the possibility of an error.
|
||||
static void ProcessEvents(Dispatcher* dispatcher,
|
||||
bool readable,
|
||||
bool writable,
|
||||
bool error_event,
|
||||
bool check_error) {
|
||||
RTC_DCHECK(!(error_event && !check_error));
|
||||
int errcode = 0;
|
||||
// TODO(pthatcher): Should we set errcode if getsockopt fails?
|
||||
if (check_error) {
|
||||
socklen_t len = sizeof(errcode);
|
||||
::getsockopt(dispatcher->GetDescriptor(), SOL_SOCKET, SO_ERROR, &errcode,
|
||||
&len);
|
||||
int res = ::getsockopt(dispatcher->GetDescriptor(), SOL_SOCKET, SO_ERROR,
|
||||
&errcode, &len);
|
||||
if (res < 0) {
|
||||
// If we are sure an error has occurred, or if getsockopt failed for a
|
||||
// socket descriptor, make sure we set the error code to a nonzero value.
|
||||
if (error_event || errno != ENOTSOCK) {
|
||||
errcode = EBADF;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Most often the socket is writable or readable or both, so make a single
|
||||
@ -1194,10 +1218,10 @@ static void ProcessEvents(Dispatcher* dispatcher,
|
||||
// readable or really closed.
|
||||
// TODO(pthatcher): Only peek at TCP descriptors.
|
||||
if (readable) {
|
||||
if (requested_events & DE_ACCEPT) {
|
||||
ff |= DE_ACCEPT;
|
||||
} else if (errcode || dispatcher->IsDescriptorClosed()) {
|
||||
if (errcode || dispatcher->IsDescriptorClosed()) {
|
||||
ff |= DE_CLOSE;
|
||||
} else if (requested_events & DE_ACCEPT) {
|
||||
ff |= DE_ACCEPT;
|
||||
} else {
|
||||
ff |= DE_READ;
|
||||
}
|
||||
@ -1209,14 +1233,17 @@ static void ProcessEvents(Dispatcher* dispatcher,
|
||||
if (requested_events & DE_CONNECT) {
|
||||
if (!errcode) {
|
||||
ff |= DE_CONNECT;
|
||||
} else {
|
||||
ff |= DE_CLOSE;
|
||||
}
|
||||
} else {
|
||||
ff |= DE_WRITE;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we report any errors regardless of whether readable or writable.
|
||||
if (errcode) {
|
||||
ff |= DE_CLOSE;
|
||||
}
|
||||
|
||||
// Tell the descriptor about the event.
|
||||
if (ff != 0) {
|
||||
dispatcher->OnEvent(ff, errcode);
|
||||
@ -1327,7 +1354,8 @@ bool PhysicalSocketServer::WaitSelect(int cmsWait, bool process_io) {
|
||||
}
|
||||
|
||||
// The error code can be signaled through reads or writes.
|
||||
ProcessEvents(pdispatcher, readable, writable, readable || writable);
|
||||
ProcessEvents(pdispatcher, readable, writable, /*error_event=*/false,
|
||||
readable || writable);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1359,6 +1387,11 @@ void PhysicalSocketServer::AddEpoll(Dispatcher* pdispatcher, uint64_t key) {
|
||||
|
||||
struct epoll_event event = {0};
|
||||
event.events = GetEpollEvents(pdispatcher->GetRequestedEvents());
|
||||
if (event.events == 0u) {
|
||||
// Don't add at all if we don't have any requested events. Could indicate a
|
||||
// closed socket.
|
||||
return;
|
||||
}
|
||||
event.data.u64 = key;
|
||||
int err = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &event);
|
||||
RTC_DCHECK_EQ(err, 0);
|
||||
@ -1378,13 +1411,10 @@ void PhysicalSocketServer::RemoveEpoll(Dispatcher* pdispatcher) {
|
||||
struct epoll_event event = {0};
|
||||
int err = epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &event);
|
||||
RTC_DCHECK(err == 0 || errno == ENOENT);
|
||||
if (err == -1) {
|
||||
if (errno == ENOENT) {
|
||||
// Socket has already been closed.
|
||||
RTC_LOG_E(LS_VERBOSE, EN, errno) << "epoll_ctl EPOLL_CTL_DEL";
|
||||
} else {
|
||||
RTC_LOG_E(LS_ERROR, EN, errno) << "epoll_ctl EPOLL_CTL_DEL";
|
||||
}
|
||||
// Ignore ENOENT, which could occur if this descriptor wasn't added due to
|
||||
// having no requested events.
|
||||
if (err == -1 && errno != ENOENT) {
|
||||
RTC_LOG_E(LS_ERROR, EN, errno) << "epoll_ctl EPOLL_CTL_DEL";
|
||||
}
|
||||
}
|
||||
|
||||
@ -1399,10 +1429,24 @@ void PhysicalSocketServer::UpdateEpoll(Dispatcher* pdispatcher, uint64_t key) {
|
||||
struct epoll_event event = {0};
|
||||
event.events = GetEpollEvents(pdispatcher->GetRequestedEvents());
|
||||
event.data.u64 = key;
|
||||
int err = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &event);
|
||||
RTC_DCHECK_EQ(err, 0);
|
||||
if (err == -1) {
|
||||
RTC_LOG_E(LS_ERROR, EN, errno) << "epoll_ctl EPOLL_CTL_MOD";
|
||||
// Remove if we don't have any requested events. Could indicate a closed
|
||||
// socket.
|
||||
if (event.events == 0u) {
|
||||
epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &event);
|
||||
} else {
|
||||
int err = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &event);
|
||||
RTC_DCHECK(err == 0 || errno == ENOENT);
|
||||
if (err == -1) {
|
||||
// Could have been removed earlier due to no requested events.
|
||||
if (errno == ENOENT) {
|
||||
err = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &event);
|
||||
if (err == -1) {
|
||||
RTC_LOG_E(LS_ERROR, EN, errno) << "epoll_ctl EPOLL_CTL_ADD";
|
||||
}
|
||||
} else {
|
||||
RTC_LOG_E(LS_ERROR, EN, errno) << "epoll_ctl EPOLL_CTL_MOD";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1449,9 +1493,9 @@ bool PhysicalSocketServer::WaitEpoll(int cmsWait) {
|
||||
|
||||
bool readable = (event.events & (EPOLLIN | EPOLLPRI));
|
||||
bool writable = (event.events & EPOLLOUT);
|
||||
bool check_error = (event.events & (EPOLLRDHUP | EPOLLERR | EPOLLHUP));
|
||||
bool error = (event.events & (EPOLLRDHUP | EPOLLERR | EPOLLHUP));
|
||||
|
||||
ProcessEvents(pdispatcher, readable, writable, check_error);
|
||||
ProcessEvents(pdispatcher, readable, writable, error, error);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1517,9 +1561,9 @@ bool PhysicalSocketServer::WaitPoll(int cmsWait, Dispatcher* dispatcher) {
|
||||
|
||||
bool readable = (fds.revents & (POLLIN | POLLPRI));
|
||||
bool writable = (fds.revents & POLLOUT);
|
||||
bool check_error = (fds.revents & (POLLRDHUP | POLLERR | POLLHUP));
|
||||
bool error = (fds.revents & (POLLRDHUP | POLLERR | POLLHUP));
|
||||
|
||||
ProcessEvents(dispatcher, readable, writable, check_error);
|
||||
ProcessEvents(dispatcher, readable, writable, error, error);
|
||||
}
|
||||
|
||||
if (cmsWait != kForever) {
|
||||
|
||||
Reference in New Issue
Block a user