MXS-2644 Make ci Clustrix_nodes::prepare_server() more resilient

When checking the state of a Clustrix node, we do so in steps:Z
- Is Clustrix installed
- Is Clustrix running
- Can Clustrix be accessed using root
- Can Clustrix be accessed using the test user

and deal with a failure at each point.
This commit is contained in:
Johan Wikman
2019-08-20 15:45:39 +03:00
parent e2124ec01f
commit 52df969e13

View File

@ -5,30 +5,170 @@
int Clustrix_nodes::prepare_server(int m) int Clustrix_nodes::prepare_server(int m)
{ {
int rv = 1;
int ec; int ec;
char* clustrix_rpm = ssh_node_output(m, "rpm -qa | grep clustrix-clxnode", true, &ec); char* clustrix_rpm = ssh_node_output(m, "rpm -qa | grep clustrix-clxnode", true, &ec);
if (strstr(clustrix_rpm, "clustrix-clxnode") == NULL) if (strstr(clustrix_rpm, "clustrix-clxnode") == NULL)
{ {
printf("%s\n", ssh_node_output(m, CLUSTRIX_DEPS_YUM, true, &ec)); char* str1 = nullptr;
printf("%s\n", ssh_node_output(m, WGET_CLUSTRIX, false, &ec)); char* str2 = nullptr;
printf("%s\n", ssh_node_output(m, UNPACK_CLUSTRIX, false, &ec)); char* str3 = nullptr;
printf("%s\n", ssh_node_output(m, INSTALL_CLUSTRIX, false, &ec)); char* str4 = nullptr;
create_users(m);
str1 = ssh_node_output(m, CLUSTRIX_DEPS_YUM, true, &ec);
if (ec == 0)
{
printf("Installed clustrix dependencies on node %d.\n", m);
str2 = ssh_node_output(m, WGET_CLUSTRIX, false, &ec);
if (ec == 0)
{
printf("Wgot Clustrix installation package on node %d.\n", m);
str3 = ssh_node_output(m, UNPACK_CLUSTRIX, false, &ec);
if (ec == 0)
{
printf("Unpacked Clustrix package on node %d.\n", m);
str4 = ssh_node_output(m, INSTALL_CLUSTRIX, false, &ec);
if (ec == 0)
{
printf("Successfully installed Clustrix on node %d.\n", m);
}
else
{
printf("Error: Could not install Clustrix package on node %d: %s\n", m, str4);
}
}
else
{
printf("Error: Could not unpack Clustrix package on node %d: %s\n", m, str3);
}
}
else
{
printf("Error: Could not wget Clustrix installation package on node %d: %s\n", m, str2);
}
}
else
{
printf("Error: Could not install Clustrix dependencies on node %d: %s\n", m, str1);
}
free(str4);
free(str3);
free(str2);
free(str1);
}
free(clustrix_rpm);
bool running = false;
ec = ssh_node(m, "systemctl status clustrix", true);
if (ec == 0)
{
printf("Clustrix running on node %d.\n", m);
ec = ssh_node(m, "mysql -e 'SELECT @@server_id'", true);
if (ec == 0)
{
running = true;
}
else
{
printf("Could not connect as root to Clustrix on node %d, restarting.\n", m);
ec = ssh_node(m, "systemctl restart clustrix", true);
if (ec == 0)
{
printf("Successfully restarted Clustrix on node %d.\n", m);
running = true;
}
else
{
printf("Could not restart Clustrix on node %d.\n", m);
}
}
} }
else else
{ {
printf("%s\n", ssh_node_output(m, "systemctl restart clustrix", true, &ec)); printf("Clustrix not running on node %d, starting.\n", m);
ec = ssh_node(m, "systemctl start clustrix", true);
if (ec == 0)
{
printf("Successfully started Clustrix on node %d.\n", m);
running = true;
}
else
{
printf("Could not start Clustrix on node %d.\n", m);
}
} }
return 0; bool check_users = false;
if (running)
{
int start = time(NULL);
int now;
do
{
ec = ssh_node(m, "mysql -e 'SELECT @@server_id'", true);
now = time(NULL);
if (ec != 0)
{
printf("Could not connect to Clustrix as root on node %d, "
"sleeping a while (totally at most ~1 minute) and retrying.\n", m);
sleep(10);
}
}
while (ec != 0 && now - start < 60);
if (ec == 0)
{
printf("Could connect as root to Clustrix on node %d.\n", m);
check_users = true;
}
else
{
printf("Could not connect as root to Clustrix on node %d within given timeframe.\n", m);
}
}
if (check_users)
{
std::string command("mysql ");
command += "-u ";
command += this->user_name;
command += " ";
command += "-p";
command += this->password;
ec = ssh_node(m, command.c_str(), false);
if (ec == 0)
{
printf("Can access Clustrix using user '%s.\n", this->user_name);
rv = 0;
}
else
{
printf("Cannot access Clustrix using user '%s', creating users.\n", this->user_name);
// TODO: We need an return code here.
create_users(m);
rv = 0;
}
}
return rv;
} }
int Clustrix_nodes::start_replication() int Clustrix_nodes::start_replication()
{ {
for (int i = 0; i < N; i++)
{
prepare_server(i);
}
std::string lic_filename = std::string(getenv("HOME")) std::string lic_filename = std::string(getenv("HOME"))
+ std::string("/.config/mdbci/clustrix_license"); + std::string("/.config/mdbci/clustrix_license");
std::ifstream lic_file; std::ifstream lic_file;
@ -75,15 +215,22 @@ std::string Clustrix_nodes::cnf_servers()
int Clustrix_nodes::check_replication() int Clustrix_nodes::check_replication()
{ {
int res = 0; int res = 0;
connect(); if (connect() == 0)
for (int i = 0; i < N; i++)
{ {
if (execute_query_count_rows(nodes[i], "select * from system.nodeinfo") != N) for (int i = 0; i < N; i++)
{ {
res = 1; if (execute_query_count_rows(nodes[i], "select * from system.nodeinfo") != N)
{
res = 1;
}
} }
} }
close_connections(); else
{
res = 1;
}
close_connections(); // Some might have been created by connect().
return res; return res;
} }