MXS-1702: Add new canonicalization function

The function uses a hand-written parser that picks out the values and
replaces them with question marks. This function is not yet able to
process negative values; a `-?` token will be generated for all negative
numbers.

The new function will canonicalize the query in such a way that string and
number constants cannot be distinguished which means that it won't pass
the current test cases.
This commit is contained in:
Markus Mäkelä
2018-03-15 20:31:47 +02:00
parent a03e8d46f0
commit 75fadd711a

View File

@ -20,6 +20,7 @@
#include <maxscale/alloc.h>
#include <maxscale/buffer.h>
#include <maxscale/buffer.hh>
#include <maxscale/modutil.h>
#include <maxscale/platform.h>
#include <maxscale/poll.h>
@ -1185,6 +1186,170 @@ mxs_pcre2_result_t modutil_mysql_wildcard_match(const char* pattern, const char*
return rval;
}
static std::pair<bool, mxs::Buffer::iterator> probe_number(mxs::Buffer::iterator it,
mxs::Buffer::iterator end)
{
ss_dassert(it != end);
ss_dassert(isdigit(*it));
std::pair<bool, mxs::Buffer::iterator> rval = std::make_pair(true, it);
bool is_hex = *it == '0';
// Skip the first character, we know it's a number
it++;
while (it != end)
{
if (!isdigit(*it))
{
if (is_hex && (*it == 'x' || *it == 'X'))
{
/** A hexadecimal literal, mark that we've seen the `x` so that
* if another one is seen, it is treated as a normal character */
is_hex = false;
}
else if (*it == 'e')
{
// Possible scientific notation number
auto next_it = std::next(it);
if (next_it == end || (!isdigit(*next_it) && *next_it != '-'))
{
rval.first = false;
break;
}
// Skip over the minus if we have one
if (*next_it == '-')
{
it++;
}
}
else if (*it == '.')
{
// Possible decimal number
auto next_it = std::next(it);
if (next_it != end && !isdigit(*next_it))
{
/** No number after the period, not a decimal number.
* The fractional part of the number is optional in MariaDB. */
rval.first = false;
break;
}
ss_dassert(isdigit(*next_it));
}
else
{
// If we have a non-text character, we treat it as a number
rval.first = !isalpha(*it);
break;
}
}
// Store the previous iterator
rval.second = it;
it++;
}
return rval;
}
char* modutil_get_new_canonical(GWBUF* querybuf)
{
std::string rval;
mxs::Buffer buf(querybuf);
enum state
{
NONE,
SINGLE_QUOTE,
DOUBLE_QUOTE,
BACKTICK
} my_state = NONE;
char prev = ' ';
for (auto it = std::next(buf.begin(), MYSQL_HEADER_LEN + 1); // Skip packet header and command
it != buf.end(); it++)
{
if (*it == '\\')
{
// Jump over any escaped values
rval += *it;
it++;
rval += *it;
continue;
}
switch (my_state)
{
case BACKTICK:
rval += *it;
if (*it == '`')
{
my_state = NONE;
}
break;
case SINGLE_QUOTE:
if (*it == '\'')
{
rval += "'?'";
my_state = NONE;
}
break;
case DOUBLE_QUOTE:
if (*it == '"')
{
rval += "\"?\"";
my_state = NONE;
}
break;
default:
if (isspace(*it))
{
*it = ' ';
}
else if (isdigit(*it) && !isalpha(prev) && !isdigit(prev) && prev != '_')
{
auto num_end = probe_number(it, buf.end());
if (num_end.first)
{
rval += '?';
it = num_end.second;
continue;
}
}
switch (*it)
{
case '\'':
my_state = SINGLE_QUOTE;
break;
case '"':
my_state = DOUBLE_QUOTE;
break;
case '`':
my_state = BACKTICK;
/* falls through */
default:
rval += *it;
break;
}
break;
}
prev = *it;
}
buf.release();
return MXS_STRDUP(rval.c_str());
}
/*
* Replace user-provided literals with question marks.
*
@ -1238,7 +1403,6 @@ char* modutil_get_canonical(GWBUF* querybuf)
return querystr;
}
char* modutil_MySQL_bypass_whitespace(char* sql, size_t len)
{
char *i = sql;