Add verbose logging for session command failures
If the routing of a session command fails due to problems with the backend connections, a more verbose error message is logged. The added status information in the Backend class makes tracking the original cause of the problem a lot easier due to knowing where, when and why the connection was closed.
This commit is contained in:
@ -330,6 +330,23 @@ public:
|
|||||||
int64_t num_selects() const;
|
int64_t num_selects() const;
|
||||||
const maxbase::StopWatch& session_timer() const;
|
const maxbase::StopWatch& session_timer() const;
|
||||||
const maxbase::IntervalTimer& select_timer() const;
|
const maxbase::IntervalTimer& select_timer() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get verbose status description
|
||||||
|
*
|
||||||
|
* @return A verbose description of the backend's status
|
||||||
|
*/
|
||||||
|
std::string get_verbose_status() const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add explanation message to latest close reason
|
||||||
|
*
|
||||||
|
* The message is printed in get_verbose_status() if the backend is closed.
|
||||||
|
*
|
||||||
|
* @param reason The human-readable message
|
||||||
|
*/
|
||||||
|
void set_close_reason(const std::string& reason);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* Internal state of the backend
|
* Internal state of the backend
|
||||||
@ -355,8 +372,13 @@ private:
|
|||||||
*/
|
*/
|
||||||
void set_state(backend_state state);
|
void set_state(backend_state state);
|
||||||
|
|
||||||
|
// Stringification function
|
||||||
|
static std::string to_string(backend_state state);
|
||||||
|
|
||||||
bool m_closed; /**< True if a connection has been opened and closed */
|
bool m_closed; /**< True if a connection has been opened and closed */
|
||||||
|
time_t m_closed_at; /**< Timestamp when the backend was last closed */
|
||||||
|
std::string m_close_reason; /**< Why the backend was closed */
|
||||||
|
time_t m_opened_at; /**< Timestamp when the backend was last opened */
|
||||||
SERVER_REF* m_backend; /**< Backend server */
|
SERVER_REF* m_backend; /**< Backend server */
|
||||||
DCB* m_dcb; /**< Backend DCB */
|
DCB* m_dcb; /**< Backend DCB */
|
||||||
mxs::Buffer m_pending_cmd; /**< Pending commands */
|
mxs::Buffer m_pending_cmd; /**< Pending commands */
|
||||||
|
|||||||
@ -17,12 +17,15 @@
|
|||||||
|
|
||||||
#include <maxbase/atomic.hh>
|
#include <maxbase/atomic.hh>
|
||||||
|
|
||||||
|
#include <maxscale/alloc.h>
|
||||||
#include <maxscale/protocol/mysql.h>
|
#include <maxscale/protocol/mysql.h>
|
||||||
|
|
||||||
using namespace maxscale;
|
using namespace maxscale;
|
||||||
|
|
||||||
Backend::Backend(SERVER_REF* ref)
|
Backend::Backend(SERVER_REF* ref)
|
||||||
: m_closed(false)
|
: m_closed(false)
|
||||||
|
, m_closed_at(0)
|
||||||
|
, m_opened_at(0)
|
||||||
, m_backend(ref)
|
, m_backend(ref)
|
||||||
, m_dcb(NULL)
|
, m_dcb(NULL)
|
||||||
, m_state(0)
|
, m_state(0)
|
||||||
@ -49,6 +52,7 @@ void Backend::close(close_type type)
|
|||||||
if (!m_closed)
|
if (!m_closed)
|
||||||
{
|
{
|
||||||
m_closed = true;
|
m_closed = true;
|
||||||
|
m_closed_at = time(NULL);
|
||||||
|
|
||||||
if (in_use())
|
if (in_use())
|
||||||
{
|
{
|
||||||
@ -182,6 +186,8 @@ bool Backend::connect(MXS_SESSION* session, SessionCommandList* sescmd)
|
|||||||
if ((m_dcb = dcb_connect(m_backend->server, session, m_backend->server->protocol)))
|
if ((m_dcb = dcb_connect(m_backend->server, session, m_backend->server->protocol)))
|
||||||
{
|
{
|
||||||
m_closed = false;
|
m_closed = false;
|
||||||
|
m_closed_at = 0;
|
||||||
|
m_opened_at = time(NULL);
|
||||||
m_state = IN_USE;
|
m_state = IN_USE;
|
||||||
mxb::atomic::add(&m_backend->connections, 1, mxb::atomic::RELAXED);
|
mxb::atomic::add(&m_backend->connections, 1, mxb::atomic::RELAXED);
|
||||||
rval = true;
|
rval = true;
|
||||||
@ -285,3 +291,75 @@ int64_t Backend::num_selects() const
|
|||||||
{
|
{
|
||||||
return m_num_selects;
|
return m_num_selects;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Backend::set_close_reason(const std::string& reason)
|
||||||
|
{
|
||||||
|
m_close_reason = reason;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Backend::get_verbose_status() const
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
char* status = server_status(m_backend->server);
|
||||||
|
char closed_at[30] = "not closed";
|
||||||
|
char opened_at[30] = "not opened";
|
||||||
|
|
||||||
|
if (m_closed_at)
|
||||||
|
{
|
||||||
|
mxb_assert(m_closed);
|
||||||
|
ctime_r(&m_closed_at, closed_at);
|
||||||
|
char* nl = strrchr(closed_at, '\n');
|
||||||
|
mxb_assert(nl);
|
||||||
|
*nl = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_opened_at)
|
||||||
|
{
|
||||||
|
ctime_r(&m_opened_at, opened_at);
|
||||||
|
char* nl = strrchr(opened_at, '\n');
|
||||||
|
mxb_assert(nl);
|
||||||
|
*nl = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << "name: [" << name() << "] "
|
||||||
|
<< "status: [" << (status ? status : "no status") << "] "
|
||||||
|
<< "state: [" << to_string((backend_state)m_state) << "] "
|
||||||
|
<< "last opened at: [" << opened_at << "] "
|
||||||
|
<< "last closed at: [" << closed_at << "] "
|
||||||
|
<< "last close reason: [" << m_close_reason << "] "
|
||||||
|
<< "num sescmd: [" << m_session_commands.size() << "]";
|
||||||
|
|
||||||
|
MXS_FREE(status);
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Backend::to_string(backend_state state)
|
||||||
|
{
|
||||||
|
std::string rval;
|
||||||
|
|
||||||
|
if (state == 0)
|
||||||
|
{
|
||||||
|
rval = "NOT_IN_USE";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (state & IN_USE)
|
||||||
|
{
|
||||||
|
rval += "IN_USE";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state & WAITING_RESULT)
|
||||||
|
{
|
||||||
|
rval += rval.empty() ? "" : "|";
|
||||||
|
rval += "WAITING_RESULT";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state & FATAL_FAILURE)
|
||||||
|
{
|
||||||
|
rval += rval.empty() ? "" : "|";
|
||||||
|
rval += "FATAL_FAILURE";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|||||||
@ -556,8 +556,16 @@ bool RWSplitSession::route_session_write(GWBUF* querybuf, uint8_t command, uint3
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MXS_ERROR("Could not route session command: %s", attempted_write ? "Write to all backends failed" :
|
std::string status;
|
||||||
"All connections have failed");
|
for (const auto& a : m_backends)
|
||||||
|
{
|
||||||
|
status += "\n";
|
||||||
|
status += a->get_verbose_status();
|
||||||
|
}
|
||||||
|
|
||||||
|
MXS_ERROR("Could not route session command: %s. Connection information: %s",
|
||||||
|
attempted_write ? "Write to all backends failed" : "All connections have failed",
|
||||||
|
status.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
return nsucc;
|
return nsucc;
|
||||||
@ -1057,6 +1065,7 @@ bool RWSplitSession::handle_master_is_target(SRWBackend* dest)
|
|||||||
if (m_current_master && m_current_master->in_use())
|
if (m_current_master && m_current_master->in_use())
|
||||||
{
|
{
|
||||||
m_current_master->close();
|
m_current_master->close();
|
||||||
|
m_current_master->set_close_reason("The original master is not available");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!m_config.delayed_retry
|
else if (!m_config.delayed_retry
|
||||||
|
|||||||
@ -78,6 +78,7 @@ static void discard_if_response_differs(SRWBackend backend,
|
|||||||
STRPACKETTYPE(cmd),
|
STRPACKETTYPE(cmd),
|
||||||
query.empty() ? "<no query>" : query.c_str());
|
query.empty() ? "<no query>" : query.c_str());
|
||||||
backend->close(mxs::Backend::CLOSE_FATAL);
|
backend->close(mxs::Backend::CLOSE_FATAL);
|
||||||
|
backend->set_close_reason("Invalid response to: " + query);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -951,6 +951,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
backend->close();
|
backend->close();
|
||||||
|
backend->set_close_reason("Master connection failed: " + extract_error(errmsgbuf));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -964,6 +965,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
|
|||||||
// Try to replay the transaction on another node
|
// Try to replay the transaction on another node
|
||||||
can_continue = start_trx_replay();
|
can_continue = start_trx_replay();
|
||||||
backend->close();
|
backend->close();
|
||||||
|
backend->set_close_reason("Read-only trx failed: " + extract_error(errmsgbuf));
|
||||||
|
|
||||||
if (!can_continue)
|
if (!can_continue)
|
||||||
{
|
{
|
||||||
@ -984,6 +986,7 @@ void RWSplitSession::handleError(GWBUF* errmsgbuf,
|
|||||||
m_otrx_state = OTRX_INACTIVE;
|
m_otrx_state = OTRX_INACTIVE;
|
||||||
can_continue = start_trx_replay();
|
can_continue = start_trx_replay();
|
||||||
backend->close();
|
backend->close();
|
||||||
|
backend->set_close_reason("Optimistic trx failed: " + extract_error(errmsgbuf));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1073,6 +1076,7 @@ bool RWSplitSession::handle_error_new_connection(DCB* backend_dcb, GWBUF* errmsg
|
|||||||
* is closed, it's possible that the routing logic will pick the failed
|
* is closed, it's possible that the routing logic will pick the failed
|
||||||
* server as the target. */
|
* server as the target. */
|
||||||
backend->close();
|
backend->close();
|
||||||
|
backend->set_close_reason("Slave connection failed: " + extract_error(errmsg));
|
||||||
|
|
||||||
if (route_stored)
|
if (route_stored)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user