From fe4c848079676ae776c1e2051c4e75f1b9f6e160 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= <markus.makela@mariadb.com>
Date: Thu, 27 Dec 2018 11:58:35 +0200
Subject: [PATCH] Use direct access into strings while canonicalizing

The process of appending to a std::string always includes a size check in
case the internal storage needs to expand. Given that we know a
canonicalized version of a query string is never larger than the original
string and that we pre-allocate enough memory to cope with the worst-case
scenario, the extra logic in std::string::push_back is unnecessary and an
extra cost. Writing directly into the string avoids this cost and improves
the performance.
---
 server/core/modutil.cc | 49 +++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/server/core/modutil.cc b/server/core/modutil.cc
index 4a77984b6..ce0d35a20 100644
--- a/server/core/modutil.cc
+++ b/server/core/modutil.cc
@@ -1383,22 +1383,21 @@ static std::pair<bool, mxs::Buffer::iterator> probe_number(mxs::Buffer::iterator
     return rval;
 }
 
-static inline bool is_negation(const std::string& str)
+static inline bool is_negation(const std::string& str, int i)
 {
     bool rval = false;
 
-    if (!str.empty() && str.back() == '-')
+    if (i > 0 && str[i - 1] == '-')
     {
         // Possibly a negative number
         rval = true;
-
-        for (auto it = std::next(str.rbegin()); it != str.rend(); it++)
+        for (int j = i - 1; j >= 0; j--)
         {
-            if (!is_space(*it))
+            if (!is_space(str[j]))
             {
                 /** If we find a previously converted value, we know that it
                  * is not a negation but a subtraction. */
-                rval = *it != '?';
+                rval = str[j] != '?';
                 break;
             }
         }
@@ -1433,7 +1432,8 @@ namespace maxscale
 std::string get_canonical(GWBUF* querybuf)
 {
     std::string rval;
-    rval.reserve(gwbuf_length(querybuf) - MYSQL_HEADER_LEN + 1);
+    int i = 0;
+    rval.resize(gwbuf_length(querybuf) - MYSQL_HEADER_LEN + 1);
     mxs::Buffer buf(querybuf);
 
     for (auto it = std::next(buf.begin(), MYSQL_HEADER_LEN + 1);    // Skip packet header and command
@@ -1442,19 +1442,24 @@ std::string get_canonical(GWBUF* querybuf)
         if (!is_special(*it))
         {
             // Normal character, no special handling required
-            rval += *it;
+            rval[i++] = *it;
         }
         else if (*it == '\\')
         {
             // Jump over any escaped values
-            rval += *it++;
+            rval[i++] += *it++;
 
             if (it != buf.end())
             {
-                rval += *it;
+                rval[i++] = *it;
+            }
+            else
+            {
+                // Query that ends with a backslash
+                break;
             }
         }
-        else if (is_space(*it) && (rval.empty() || is_space(rval.back())))
+        else if (is_space(*it) && (i == 0 || is_space(rval[i - 1])))
         {
             // Repeating space, skip it
         }
@@ -1478,7 +1483,7 @@ std::string get_canonical(GWBUF* querybuf)
             else
             {
                 // Executable comment, treat it as normal SQL
-                rval += *it;
+                rval[i++] = *it;
             }
         }
         else if ((*it == '#' || *it == '-')
@@ -1507,18 +1512,18 @@ std::string get_canonical(GWBUF* querybuf)
                 break;
             }
         }
-        else if (is_digit(*it) && (rval.empty() || (!is_alnum(rval.back()) && rval.back() != '_')))
+        else if (is_digit(*it) && (i == 0 || (!is_alnum(rval[i - 1]) && rval[i - 1] != '_')))
         {
             auto num_end = probe_number(it, buf.end());
 
             if (num_end.first)
             {
-                if (is_negation(rval))
+                if (is_negation(rval, i))
                 {
                     // Remove the sign
-                    rval.resize(rval.size() - 1);
+                    i--;
                 }
-                rval += '?';
+                rval[i++] = '?';
                 it = num_end.second;
             }
         }
@@ -1526,21 +1531,25 @@ std::string get_canonical(GWBUF* querybuf)
         {
             char c = *it;
             it = find_char(std::next(it), buf.end(), c);
-            rval += '?';
+            rval[i++] = '?';
         }
         else if (*it == '`')
         {
             auto start = it;
             it = find_char(std::next(it), buf.end(), '`');
-            rval.append(start, it);
-            rval += '`';
+            std::copy(start, it, &rval[i]);
+            i += std::distance(start, it);
+            rval[i++] = '`';
         }
         else
         {
-            rval += *it;
+            rval[i++] = *it;
         }
     }
 
+    // Shrink the buffer so that the internal bookkeeping of std::string remains up to date
+    rval.resize(i);
+
     buf.release();
 
     return rval;