Add verbose logging for session command failures

If the routing of a session command fails due to problems with the backend
connections, a more verbose error message is logged. The added status
information in the Backend class makes tracking the original cause of the
problem a lot easier due to knowing where, when and why the connection was
closed.
This commit is contained in:
Markus Mäkelä
2019-01-28 10:56:01 +02:00
parent df9335382d
commit 24c9b62a2f
5 changed files with 116 additions and 2 deletions

View File

@ -330,6 +330,23 @@ public:
int64_t num_selects() const;
const maxbase::StopWatch& session_timer() const;
const maxbase::IntervalTimer& select_timer() const;
/**
* Get verbose status description
*
* @return A verbose description of the backend's status
*/
std::string get_verbose_status() const;
/**
* Add explanation message to latest close reason
*
* The message is printed in get_verbose_status() if the backend is closed.
*
* @param reason The human-readable message
*/
void set_close_reason(const std::string& reason);
private:
/**
* Internal state of the backend
@ -355,8 +372,13 @@ private:
*/
void set_state(backend_state state);
// Stringification function
static std::string to_string(backend_state state);
bool m_closed; /**< True if a connection has been opened and closed */
time_t m_closed_at; /**< Timestamp when the backend was last closed */
std::string m_close_reason; /**< Why the backend was closed */
time_t m_opened_at; /**< Timestamp when the backend was last opened */
SERVER_REF* m_backend; /**< Backend server */
DCB* m_dcb; /**< Backend DCB */
mxs::Buffer m_pending_cmd; /**< Pending commands */

View File

@ -17,12 +17,15 @@
#include <maxbase/atomic.hh>
#include <maxscale/alloc.h>
#include <maxscale/protocol/mysql.h>
using namespace maxscale;
Backend::Backend(SERVER_REF* ref)
: m_closed(false)
, m_closed_at(0)
, m_opened_at(0)
, m_backend(ref)
, m_dcb(NULL)
, m_state(0)
@ -49,6 +52,7 @@ void Backend::close(close_type type)
if (!m_closed)
{
m_closed = true;
m_closed_at = time(NULL);
if (in_use())
{
@ -182,6 +186,8 @@ bool Backend::connect(MXS_SESSION* session, SessionCommandList* sescmd)
if ((m_dcb = dcb_connect(m_backend->server, session, m_backend->server->protocol)))
{
m_closed = false;
m_closed_at = 0;
m_opened_at = time(NULL);
m_state = IN_USE;
mxb::atomic::add(&m_backend->connections, 1, mxb::atomic::RELAXED);
rval = true;
@ -285,3 +291,75 @@ int64_t Backend::num_selects() const
{
return m_num_selects;
}
void Backend::set_close_reason(const std::string& reason)
{
m_close_reason = reason;
}
std::string Backend::get_verbose_status() const
{
std::stringstream ss;
char* status = server_status(m_backend->server);
char closed_at[30] = "not closed";
char opened_at[30] = "not opened";
if (m_closed_at)
{
mxb_assert(m_closed);
ctime_r(&m_closed_at, closed_at);
char* nl = strrchr(closed_at, '\n');
mxb_assert(nl);
*nl = '\0';
}
if (m_opened_at)
{
ctime_r(&m_opened_at, opened_at);
char* nl = strrchr(opened_at, '\n');
mxb_assert(nl);
*nl = '\0';
}
ss << "name: [" << name() << "] "
<< "status: [" << (status ? status : "no status") << "] "
<< "state: [" << to_string((backend_state)m_state) << "] "
<< "last opened at: [" << opened_at << "] "
<< "last closed at: [" << closed_at << "] "
<< "last close reason: [" << m_close_reason << "] "
<< "num sescmd: [" << m_session_commands.size() << "]";
MXS_FREE(status);
return ss.str();
}
std::string Backend::to_string(backend_state state)
{
std::string rval;
if (state == 0)
{
rval = "NOT_IN_USE";
}
else
{
if (state & IN_USE)
{
rval += "IN_USE";
}
if (state & WAITING_RESULT)
{
rval += rval.empty() ? "" : "|";
rval += "WAITING_RESULT";
}
if (state & FATAL_FAILURE)
{
rval += rval.empty() ? "" : "|";
rval += "FATAL_FAILURE";
}
}
return rval;
}

View File

@ -556,8 +556,16 @@ bool RWSplitSession::route_session_write(GWBUF* querybuf, uint8_t command, uint3
}
else
{
MXS_ERROR("Could not route session command: %s", attempted_write ? "Write to all backends failed" :
"All connections have failed");
std::string status;
for (const auto& a : m_backends)
{
status += "\n";
status += a->get_verbose_status();
}
MXS_ERROR("Could not route session command: %s. Connection information: %s",
attempted_write ? "Write to all backends failed" : "All connections have failed",
status.c_str());
}
return nsucc;
@ -1057,6 +1065,7 @@ bool RWSplitSession::handle_master_is_target(SRWBackend* dest)
if (m_current_master && m_current_master->in_use())
{
m_current_master->close();
m_current_master->set_close_reason("The original master is not available");
}
}
else if (!m_config.delayed_retry

View File

@ -78,6 +78,7 @@ static void discard_if_response_differs(SRWBackend backend,
STRPACKETTYPE(cmd),
query.empty() ? "<no query>" : query.c_str());
backend->close(mxs::Backend::CLOSE_FATAL);
backend->set_close_reason("Invalid response to: " + query);
}
}

View File

@ -951,6 +951,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
}
backend->close();
backend->set_close_reason("Master connection failed: " + extract_error(errmsgbuf));
}
else
{
@ -964,6 +965,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
// Try to replay the transaction on another node
can_continue = start_trx_replay();
backend->close();
backend->set_close_reason("Read-only trx failed: " + extract_error(errmsgbuf));
if (!can_continue)
{
@ -984,6 +986,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
m_otrx_state = OTRX_INACTIVE;
can_continue = start_trx_replay();
backend->close();
backend->set_close_reason("Optimistic trx failed: " + extract_error(errmsgbuf));
}
else
{
@ -1073,6 +1076,7 @@ bool RWSplitSession::handle_error_new_connection(DCB* backend_dcb, GWBUF* errmsg
* is closed, it's possible that the routing logic will pick the failed
* server as the target. */
backend->close();
backend->set_close_reason("Slave connection failed: " + extract_error(errmsg));
if (route_stored)
{