Add verbose logging for session command failures
If the routing of a session command fails due to problems with the backend connections, a more verbose error message is logged. The added status information in the Backend class makes tracking the original cause of the problem a lot easier due to knowing where, when and why the connection was closed.
This commit is contained in:
@ -330,6 +330,23 @@ public:
|
||||
int64_t num_selects() const;
|
||||
const maxbase::StopWatch& session_timer() const;
|
||||
const maxbase::IntervalTimer& select_timer() const;
|
||||
|
||||
/**
|
||||
* Get verbose status description
|
||||
*
|
||||
* @return A verbose description of the backend's status
|
||||
*/
|
||||
std::string get_verbose_status() const;
|
||||
|
||||
/**
|
||||
* Add explanation message to latest close reason
|
||||
*
|
||||
* The message is printed in get_verbose_status() if the backend is closed.
|
||||
*
|
||||
* @param reason The human-readable message
|
||||
*/
|
||||
void set_close_reason(const std::string& reason);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Internal state of the backend
|
||||
@ -355,8 +372,13 @@ private:
|
||||
*/
|
||||
void set_state(backend_state state);
|
||||
|
||||
// Stringification function
|
||||
static std::string to_string(backend_state state);
|
||||
|
||||
bool m_closed; /**< True if a connection has been opened and closed */
|
||||
time_t m_closed_at; /**< Timestamp when the backend was last closed */
|
||||
std::string m_close_reason; /**< Why the backend was closed */
|
||||
time_t m_opened_at; /**< Timestamp when the backend was last opened */
|
||||
SERVER_REF* m_backend; /**< Backend server */
|
||||
DCB* m_dcb; /**< Backend DCB */
|
||||
mxs::Buffer m_pending_cmd; /**< Pending commands */
|
||||
|
||||
@ -17,12 +17,15 @@
|
||||
|
||||
#include <maxbase/atomic.hh>
|
||||
|
||||
#include <maxscale/alloc.h>
|
||||
#include <maxscale/protocol/mysql.h>
|
||||
|
||||
using namespace maxscale;
|
||||
|
||||
Backend::Backend(SERVER_REF* ref)
|
||||
: m_closed(false)
|
||||
, m_closed_at(0)
|
||||
, m_opened_at(0)
|
||||
, m_backend(ref)
|
||||
, m_dcb(NULL)
|
||||
, m_state(0)
|
||||
@ -49,6 +52,7 @@ void Backend::close(close_type type)
|
||||
if (!m_closed)
|
||||
{
|
||||
m_closed = true;
|
||||
m_closed_at = time(NULL);
|
||||
|
||||
if (in_use())
|
||||
{
|
||||
@ -182,6 +186,8 @@ bool Backend::connect(MXS_SESSION* session, SessionCommandList* sescmd)
|
||||
if ((m_dcb = dcb_connect(m_backend->server, session, m_backend->server->protocol)))
|
||||
{
|
||||
m_closed = false;
|
||||
m_closed_at = 0;
|
||||
m_opened_at = time(NULL);
|
||||
m_state = IN_USE;
|
||||
mxb::atomic::add(&m_backend->connections, 1, mxb::atomic::RELAXED);
|
||||
rval = true;
|
||||
@ -285,3 +291,75 @@ int64_t Backend::num_selects() const
|
||||
{
|
||||
return m_num_selects;
|
||||
}
|
||||
|
||||
void Backend::set_close_reason(const std::string& reason)
|
||||
{
|
||||
m_close_reason = reason;
|
||||
}
|
||||
|
||||
std::string Backend::get_verbose_status() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
char* status = server_status(m_backend->server);
|
||||
char closed_at[30] = "not closed";
|
||||
char opened_at[30] = "not opened";
|
||||
|
||||
if (m_closed_at)
|
||||
{
|
||||
mxb_assert(m_closed);
|
||||
ctime_r(&m_closed_at, closed_at);
|
||||
char* nl = strrchr(closed_at, '\n');
|
||||
mxb_assert(nl);
|
||||
*nl = '\0';
|
||||
}
|
||||
|
||||
if (m_opened_at)
|
||||
{
|
||||
ctime_r(&m_opened_at, opened_at);
|
||||
char* nl = strrchr(opened_at, '\n');
|
||||
mxb_assert(nl);
|
||||
*nl = '\0';
|
||||
}
|
||||
|
||||
ss << "name: [" << name() << "] "
|
||||
<< "status: [" << (status ? status : "no status") << "] "
|
||||
<< "state: [" << to_string((backend_state)m_state) << "] "
|
||||
<< "last opened at: [" << opened_at << "] "
|
||||
<< "last closed at: [" << closed_at << "] "
|
||||
<< "last close reason: [" << m_close_reason << "] "
|
||||
<< "num sescmd: [" << m_session_commands.size() << "]";
|
||||
|
||||
MXS_FREE(status);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string Backend::to_string(backend_state state)
|
||||
{
|
||||
std::string rval;
|
||||
|
||||
if (state == 0)
|
||||
{
|
||||
rval = "NOT_IN_USE";
|
||||
}
|
||||
else
|
||||
{
|
||||
if (state & IN_USE)
|
||||
{
|
||||
rval += "IN_USE";
|
||||
}
|
||||
|
||||
if (state & WAITING_RESULT)
|
||||
{
|
||||
rval += rval.empty() ? "" : "|";
|
||||
rval += "WAITING_RESULT";
|
||||
}
|
||||
|
||||
if (state & FATAL_FAILURE)
|
||||
{
|
||||
rval += rval.empty() ? "" : "|";
|
||||
rval += "FATAL_FAILURE";
|
||||
}
|
||||
}
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
@ -556,8 +556,16 @@ bool RWSplitSession::route_session_write(GWBUF* querybuf, uint8_t command, uint3
|
||||
}
|
||||
else
|
||||
{
|
||||
MXS_ERROR("Could not route session command: %s", attempted_write ? "Write to all backends failed" :
|
||||
"All connections have failed");
|
||||
std::string status;
|
||||
for (const auto& a : m_backends)
|
||||
{
|
||||
status += "\n";
|
||||
status += a->get_verbose_status();
|
||||
}
|
||||
|
||||
MXS_ERROR("Could not route session command: %s. Connection information: %s",
|
||||
attempted_write ? "Write to all backends failed" : "All connections have failed",
|
||||
status.c_str());
|
||||
}
|
||||
|
||||
return nsucc;
|
||||
@ -1057,6 +1065,7 @@ bool RWSplitSession::handle_master_is_target(SRWBackend* dest)
|
||||
if (m_current_master && m_current_master->in_use())
|
||||
{
|
||||
m_current_master->close();
|
||||
m_current_master->set_close_reason("The original master is not available");
|
||||
}
|
||||
}
|
||||
else if (!m_config.delayed_retry
|
||||
|
||||
@ -78,6 +78,7 @@ static void discard_if_response_differs(SRWBackend backend,
|
||||
STRPACKETTYPE(cmd),
|
||||
query.empty() ? "<no query>" : query.c_str());
|
||||
backend->close(mxs::Backend::CLOSE_FATAL);
|
||||
backend->set_close_reason("Invalid response to: " + query);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -951,6 +951,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
|
||||
}
|
||||
|
||||
backend->close();
|
||||
backend->set_close_reason("Master connection failed: " + extract_error(errmsgbuf));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -964,6 +965,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
|
||||
// Try to replay the transaction on another node
|
||||
can_continue = start_trx_replay();
|
||||
backend->close();
|
||||
backend->set_close_reason("Read-only trx failed: " + extract_error(errmsgbuf));
|
||||
|
||||
if (!can_continue)
|
||||
{
|
||||
@ -984,6 +986,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
|
||||
m_otrx_state = OTRX_INACTIVE;
|
||||
can_continue = start_trx_replay();
|
||||
backend->close();
|
||||
backend->set_close_reason("Optimistic trx failed: " + extract_error(errmsgbuf));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1073,6 +1076,7 @@ bool RWSplitSession::handle_error_new_connection(DCB* backend_dcb, GWBUF* errmsg
|
||||
* is closed, it's possible that the routing logic will pick the failed
|
||||
* server as the target. */
|
||||
backend->close();
|
||||
backend->set_close_reason("Slave connection failed: " + extract_error(errmsg));
|
||||
|
||||
if (route_stored)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user