From 470de51e224f6e3c27918a17d5294d3675eebdd3 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Tue, 14 Mar 2017 13:06:26 +0200 Subject: [PATCH] Handle whitespace in TrxBoundaryParser TheBoundaryMatcher is not updated as it is likely it will be removed altogether. A regex that accepts comments in all relevant places becomes so hairy it is unmaintainable. It seems that the only working solution would be to first remove all comments and then perform the regex. --- server/core/maxscale/trxboundaryparser.hh | 119 ++++++++++++- server/core/test/testtrxtracking.cc | 198 ++++++++++++++++------ 2 files changed, 261 insertions(+), 56 deletions(-) diff --git a/server/core/maxscale/trxboundaryparser.hh b/server/core/maxscale/trxboundaryparser.hh index 440ffdaf3..c83e0ec31 100644 --- a/server/core/maxscale/trxboundaryparser.hh +++ b/server/core/maxscale/trxboundaryparser.hh @@ -97,8 +97,10 @@ public: { uint32_t type_mask = 0; - if (modutil_extract_SQL(pBuf, &m_pSql, &m_len)) + char* pSql; + if (modutil_extract_SQL(pBuf, &pSql, &m_len)) { + m_pSql = pSql; m_pI = m_pSql; m_pEnd = m_pI + m_len; @@ -123,7 +125,7 @@ private: void log_exhausted() { -#ifndef NDEBUG +#ifdef SS_DEBUG MXS_WARNING("More tokens expected in statement '%.*s'.", (int)m_len, m_pSql); #endif } @@ -490,14 +492,115 @@ private: return token; } + void bypass_whitespace() + { + while (m_pI != m_pEnd) + { + if (isspace(*m_pI)) + { + ++m_pI; + } + else if (*m_pI == '/') // Might be a comment + { + if ((m_pI + 1 != m_pEnd) && (*(m_pI + 1) == '*')) // Indeed it was + { + m_pI += 2; + + while (m_pI != m_pEnd) + { + if (*m_pI == '*') // Might be the end of the comment + { + ++m_pI; + + if (m_pI != m_pEnd) + { + if (*m_pI == '/') // Indeed it was + { + ++m_pI; + break; // Out of this inner while. + } + } + } + else + { + // It was not the end of the comment. + ++m_pI; + } + } + } + else + { + // Was not a comment, so we'll bail out. + break; + } + } + else if (*m_pI == '-') // Might be the start of a comment to the end of line + { + bool is_comment = false; + + if (m_pI + 1 != m_pEnd) + { + if (*(m_pI + 1) == '-') // Might be, yes. + { + if (m_pI + 2 != m_pEnd) + { + if (isspace(*(m_pI + 2))) // Yes, it is. + { + is_comment = true; + + m_pI += 3; + + while ((m_pI != m_pEnd) && (*m_pI != '\n')) + { + ++m_pI; + } + + if (m_pI != m_pEnd) + { + ss_dassert(*m_pI == '\n'); + ++m_pI; + } + } + } + } + } + + if (!is_comment) + { + break; + } + } + else if (*m_pI == '#') + { + ++m_pI; + + while ((m_pI != m_pEnd) && (*m_pI != '\n')) + { + ++m_pI; + } + + if (m_pI != m_pEnd) + { + ss_dassert(*m_pI == '\n'); + ++m_pI; + } + + m_pI = m_pEnd; + break; + } + else + { + // Neither whitespace not start of a comment, so we bail out. + break; + } + } + } + token_t next_token(token_required_t required = TOKEN_NOT_REQUIRED) { token_t token = PARSER_UNKNOWN_TOKEN; - while ((m_pI != m_pEnd) && isblank(*m_pI)) - { - ++m_pI; - } + bypass_whitespace(); if (m_pI == m_pEnd) { @@ -507,7 +610,7 @@ private: { ++m_pI; - while ((m_pI != m_pEnd) && isblank(*m_pI)) + while ((m_pI != m_pEnd) && isspace(*m_pI)) { ++m_pI; } @@ -668,7 +771,7 @@ private: TrxBoundaryParser& operator = (const TrxBoundaryParser&); private: - char* m_pSql; + const char* m_pSql; int m_len; const char* m_pI; const char* m_pEnd; diff --git a/server/core/test/testtrxtracking.cc b/server/core/test/testtrxtracking.cc index e54b8de0a..cf1db4e32 100644 --- a/server/core/test/testtrxtracking.cc +++ b/server/core/test/testtrxtracking.cc @@ -23,6 +23,14 @@ using namespace std; namespace { +enum test_target_t +{ + TEST_PARSER = 0x1, + TEST_QC = 0x2, + TEST_REGEX = 0x4, + TEST_ALL = (TEST_PARSER | TEST_QC) +}; + GWBUF* create_gwbuf(const char* zStmt) { size_t len = strlen(zStmt); @@ -121,7 +129,11 @@ bool test(uint32_t (*getter)(GWBUF*), const char* zStmt, uint32_t expected_type_ const char* prefixes[] = { " ", - " " + " ", + "\n", + " \n", + "\n ", + "-- comment\n" }; const int N_PREFIXES = sizeof(prefixes) / sizeof(prefixes[0]); @@ -148,13 +160,19 @@ const char* suffixes[] = { " ", " ", + "\n", + " \n", + "\n ", ";", " ;", " ;", " ;", " ;", " ; ", - " ; " + ";\n", + " ; ", + "-- comment this, comment that", + // "# comment this, comment that" /* qc_sqlite does not handle this */ }; const int N_SUFFIXES = sizeof(suffixes) / sizeof(suffixes[0]); @@ -179,7 +197,13 @@ bool test_with_suffixes(uint32_t (*getter)(GWBUF*), const string& base, uint32_t const char* whitespace[] = { - " " + " ", + "\n", + "/**/", + "/***/", + "/****/", + "/* / * */", + "-- comment\n" }; const int N_WHITESPACE = sizeof(whitespace) / sizeof(whitespace[0]); @@ -264,14 +288,14 @@ bool test_with_commas(uint32_t (*getter)(GWBUF*), const string& base, uint32_t t } -bool test(uint32_t (*getter)(GWBUF*)) +bool test(uint32_t (*getter)(GWBUF*), bool dont_bail_out) { bool rc = true; test_case* pTest = test_cases; test_case* pEnd = pTest + N_TEST_CASES; - while (pTest < pEnd) + while ((pTest < pEnd) && (dont_bail_out || rc)) { string base(pTest->zStmt); cout << base << endl; @@ -284,24 +308,36 @@ bool test(uint32_t (*getter)(GWBUF*)) rc = false; } - if (!test_with_prefixes(getter, base, pTest->type_mask)) + if (dont_bail_out || rc) { - rc = false; + if (!test_with_prefixes(getter, base, pTest->type_mask)) + { + rc = false; + } } - if (!test_with_whitespace(getter, base, pTest->type_mask)) + if (dont_bail_out || rc) { - rc = false; + if (!test_with_whitespace(getter, base, pTest->type_mask)) + { + rc = false; + } } - if (!test_with_commas(getter, base, pTest->type_mask)) + if (dont_bail_out || rc) { - rc = false; + if (!test_with_commas(getter, base, pTest->type_mask)) + { + rc = false; + } } - if (!test_with_suffixes(getter, base, pTest->type_mask)) + if (dont_bail_out || rc) { - rc = false; + if (!test_with_suffixes(getter, base, pTest->type_mask)) + { + rc = false; + } } ++pTest; @@ -312,58 +348,124 @@ bool test(uint32_t (*getter)(GWBUF*)) } +namespace +{ + +char USAGE[] = + "usage: test_trxtracking [-p] [-q] [-r] [-d]\n" + "\n" + "-p : Test using custom parser\n" + "-q : Test using query classifier\n" + "-r : Test using regex matching\n" + "-d : Don't bail out at first error\n" + "\n" + "If neither -p, -q or -r has been specified, then all will be tested.\n"; +} int main(int argc, char* argv[]) { - int rc = EXIT_FAILURE; + int rc = EXIT_SUCCESS; - set_datadir(strdup("/tmp")); - set_langdir(strdup(".")); - set_process_datadir(strdup("/tmp")); + bool test_all = true; + uint32_t test_target = 0; + bool dont_bail_out = false; - if (mxs_log_init(NULL, ".", MXS_LOG_TARGET_DEFAULT)) + int c; + while ((c = getopt(argc, argv, "pqr")) != -1) { - // We have to setup something in order for the regexes to be compiled. - if (qc_setup("qc_sqlite", NULL) && qc_process_init(QC_INIT_BOTH)) + switch (c) { - rc = EXIT_SUCCESS; + case 'p': + test_all = false; + test_target |= TEST_PARSER; + break; - cout << "QC" << endl; - cout << "==" << endl; - if (!test(get_qc_trx_type_mask)) + case 'q': + test_all = false; + test_target = TEST_QC; + break; + + case 'r': + test_all = false; + test_target = TEST_REGEX; + break; + + case 'd': + dont_bail_out = true; + break; + + default: + cout << USAGE << endl; + rc = EXIT_FAILURE; + } + } + + if (rc == EXIT_SUCCESS) + { + rc = EXIT_FAILURE; + + if (test_all) + { + test_target = TEST_ALL; + } + + set_datadir(strdup("/tmp")); + set_langdir(strdup(".")); + set_process_datadir(strdup("/tmp")); + + if (mxs_log_init(NULL, ".", MXS_LOG_TARGET_DEFAULT)) + { + // We have to setup something in order for the regexes to be compiled. + if (qc_setup("qc_sqlite", NULL) && qc_process_init(QC_INIT_BOTH)) { - rc = EXIT_FAILURE; - } - cout << endl; + rc = EXIT_SUCCESS; - cout << "Regex" << endl; - cout << "=====" << endl; - if (!test(get_regex_trx_type_mask)) + if (test_target & TEST_QC) + { + cout << "QC" << endl; + cout << "==" << endl; + if (!test(get_qc_trx_type_mask, dont_bail_out)) + { + rc = EXIT_FAILURE; + } + cout << endl; + } + + if (test_target & TEST_REGEX) + { + cout << "Regex" << endl; + cout << "=====" << endl; + if (!test(get_regex_trx_type_mask, dont_bail_out)) + { + rc = EXIT_FAILURE; + } + cout << endl; + } + + if (test_target & TEST_PARSER) + { + cout << "Parser" << endl; + cout << "======" << endl; + if (!test(get_parser_trx_type_mask, dont_bail_out)) + { + rc = EXIT_FAILURE; + } + cout << endl; + } + + qc_process_end(QC_INIT_BOTH); + } + else { - rc = EXIT_FAILURE; + cerr << "error: Could not initialize qc_sqlite." << endl; } - cout << endl; - cout << "Parser" << endl; - cout << "======" << endl; - if (!test(get_parser_trx_type_mask)) - { - rc = EXIT_FAILURE; - } - cout << endl; - - qc_process_end(QC_INIT_BOTH); + mxs_log_finish(); } else { - cerr << "error: Could not initialize qc_sqlite." << endl; + cerr << "error: Could not initialize log." << endl; } - - mxs_log_finish(); - } - else - { - cerr << "error: Could not initialize log." << endl; } return rc;