Use direct access into strings while canonicalizing

The process of appending to a std::string always includes a size check in
case the internal storage needs to expand. Given that we know a
canonicalized version of a query string is never larger than the original
string and that we pre-allocate enough memory to cope with the worst-case
scenario, the extra logic in std::string::push_back is unnecessary and an
extra cost. Writing directly into the string avoids this cost and improves
the performance.
This commit is contained in:
Markus Mäkelä
2018-12-27 11:58:35 +02:00
parent ad5b244313
commit fe4c848079

View File

@ -1383,22 +1383,21 @@ static std::pair<bool, mxs::Buffer::iterator> probe_number(mxs::Buffer::iterator
return rval; return rval;
} }
static inline bool is_negation(const std::string& str) static inline bool is_negation(const std::string& str, int i)
{ {
bool rval = false; bool rval = false;
if (!str.empty() && str.back() == '-') if (i > 0 && str[i - 1] == '-')
{ {
// Possibly a negative number // Possibly a negative number
rval = true; rval = true;
for (int j = i - 1; j >= 0; j--)
for (auto it = std::next(str.rbegin()); it != str.rend(); it++)
{ {
if (!is_space(*it)) if (!is_space(str[j]))
{ {
/** If we find a previously converted value, we know that it /** If we find a previously converted value, we know that it
* is not a negation but a subtraction. */ * is not a negation but a subtraction. */
rval = *it != '?'; rval = str[j] != '?';
break; break;
} }
} }
@ -1433,7 +1432,8 @@ namespace maxscale
std::string get_canonical(GWBUF* querybuf) std::string get_canonical(GWBUF* querybuf)
{ {
std::string rval; std::string rval;
rval.reserve(gwbuf_length(querybuf) - MYSQL_HEADER_LEN + 1); int i = 0;
rval.resize(gwbuf_length(querybuf) - MYSQL_HEADER_LEN + 1);
mxs::Buffer buf(querybuf); mxs::Buffer buf(querybuf);
for (auto it = std::next(buf.begin(), MYSQL_HEADER_LEN + 1); // Skip packet header and command for (auto it = std::next(buf.begin(), MYSQL_HEADER_LEN + 1); // Skip packet header and command
@ -1442,19 +1442,24 @@ std::string get_canonical(GWBUF* querybuf)
if (!is_special(*it)) if (!is_special(*it))
{ {
// Normal character, no special handling required // Normal character, no special handling required
rval += *it; rval[i++] = *it;
} }
else if (*it == '\\') else if (*it == '\\')
{ {
// Jump over any escaped values // Jump over any escaped values
rval += *it++; rval[i++] += *it++;
if (it != buf.end()) if (it != buf.end())
{ {
rval += *it; rval[i++] = *it;
}
else
{
// Query that ends with a backslash
break;
} }
} }
else if (is_space(*it) && (rval.empty() || is_space(rval.back()))) else if (is_space(*it) && (i == 0 || is_space(rval[i - 1])))
{ {
// Repeating space, skip it // Repeating space, skip it
} }
@ -1478,7 +1483,7 @@ std::string get_canonical(GWBUF* querybuf)
else else
{ {
// Executable comment, treat it as normal SQL // Executable comment, treat it as normal SQL
rval += *it; rval[i++] = *it;
} }
} }
else if ((*it == '#' || *it == '-') else if ((*it == '#' || *it == '-')
@ -1507,18 +1512,18 @@ std::string get_canonical(GWBUF* querybuf)
break; break;
} }
} }
else if (is_digit(*it) && (rval.empty() || (!is_alnum(rval.back()) && rval.back() != '_'))) else if (is_digit(*it) && (i == 0 || (!is_alnum(rval[i - 1]) && rval[i - 1] != '_')))
{ {
auto num_end = probe_number(it, buf.end()); auto num_end = probe_number(it, buf.end());
if (num_end.first) if (num_end.first)
{ {
if (is_negation(rval)) if (is_negation(rval, i))
{ {
// Remove the sign // Remove the sign
rval.resize(rval.size() - 1); i--;
} }
rval += '?'; rval[i++] = '?';
it = num_end.second; it = num_end.second;
} }
} }
@ -1526,21 +1531,25 @@ std::string get_canonical(GWBUF* querybuf)
{ {
char c = *it; char c = *it;
it = find_char(std::next(it), buf.end(), c); it = find_char(std::next(it), buf.end(), c);
rval += '?'; rval[i++] = '?';
} }
else if (*it == '`') else if (*it == '`')
{ {
auto start = it; auto start = it;
it = find_char(std::next(it), buf.end(), '`'); it = find_char(std::next(it), buf.end(), '`');
rval.append(start, it); std::copy(start, it, &rval[i]);
rval += '`'; i += std::distance(start, it);
rval[i++] = '`';
} }
else else
{ {
rval += *it; rval[i++] = *it;
} }
} }
// Shrink the buffer so that the internal bookkeeping of std::string remains up to date
rval.resize(i);
buf.release(); buf.release();
return rval; return rval;