MXS-935: Implement column matching

With the advent of qc_get_field_info, columns can now be matched.
However, there is still some undeterminism caused by the table
information not containing contextual information (exactly where
is the table used).

Further, suppose table X contains the column A and table Y contains
the column B, then given a statement like

    SELECT a, b from X, Z;

we cannot know whether a is in X or Z, or b in X or Z, without being
aware of the schema, which we currently are not.

Consequently, as long as MaxScale is not aware of the schema, some
heuristics must be applied. For instance, if exactly one table is
referred to, then we can assume that columns that are not explicitly
qualified are from that table.

The rule tests are currently rather rudimentary and need to be
expanded.
This commit is contained in:
Johan Wikman
2016-11-14 16:17:31 +02:00
parent 548182afe3
commit c4999232ce
4 changed files with 827 additions and 153 deletions

View File

@ -173,8 +173,11 @@ where,
* the _op_ can be `=`, `!=`, `like` or `unlike`, and
* the _value_ a string.
If _op_ is `=` or `!=` then _value_ is used verbatim; if it is `like`
If _op_ is `=` or `!=` then _value_ is used as a string; if it is `like`
or `unlike`, then _value_ is interpreted as a _pcre2_ regular expression.
Note though that if _attribute_ is `database`, `table` or `column`, then
the string is interpreted as a name, where a dot `.` denotes qualification
or scoping.
The objects in the `store` array are processed in order. If the result
of a comparison is _true_, no further processing will be made and the
@ -206,6 +209,39 @@ select * from tbl where b = 3 and a = 2;
as well. Although they conceptually are identical, there will be two
cache entries.
### Qualified Names
When using `=` or `!=` in the rule object in conjunction with `database`,
`table` and `column`, the provided string is interpreted as a name, that is,
dot (`.`) denotes qualification or scope.
In practice that means that if _attribute_ is `database` then _value_ may
not contain a dot, if _attribute_ is `table` then _value_ may contain one
dot, used for separating the database and table names respectively, and
if _attribute_ is `column` then _value_ may contain one or two dots, used
for separating table and column names, or database, table and column names.
Note that if a qualified name is used as a _value_, then all parts of the
name must be available for a match. Currently Maria DB MaxScale may not
always be capable of deducing in what table a particular column is. If
that is the case, then a value like `tbl.field` may not necessarily
be a match even if the field is `field` and the table actually is `tbl`.
### Implication of the _default_ database.
If the rules concerns the `database`, then only if the statement refers
to *no* specific database, will the default database be considered.
### Regexp Matching
The string used for matching the regular expression contains as much
information as there is available. For instance, in a situation like
```
use somedb;
select fld from tbl;
```
the string matched against the regular expression will be `somedb.tbl.fld`.
### Examples
Cache all queries targeting a particular database.

File diff suppressed because it is too large Load Diff

View File

@ -48,8 +48,14 @@ typedef struct cache_rule
char *value; // The value from the rule file.
struct
{
pcre2_code* code;
pcre2_match_data* data;
char *database;
char *table;
char *column;
} simple; // Details, only for CACHE_OP_[EQ|NEQ]
struct
{
pcre2_code *code;
pcre2_match_data *data;
} regexp; // Regexp data, only for CACHE_OP_[LIKE|UNLIKE].
uint32_t debug; // The debug level.
struct cache_rule *next;

View File

@ -133,7 +133,45 @@ struct store_test_case
const struct store_test_case store_test_cases[] =
{
STORE_TEST_CASE("column", "=", "a", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("column", "=", "b", false, NULL, "SELECT a FROM tbl")
STORE_TEST_CASE("column", "!=", "a", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("column", "=", "b", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("column", "!=", "b", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("column", "=", "tbl.a", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("column", "=", "tbl.a", true, NULL, "SELECT tbl.a FROM tbl"),
STORE_TEST_CASE("column", "like", ".*a", true, NULL, "SELECT a from tbl"),
STORE_TEST_CASE("column", "like", ".*a", true, NULL, "SELECT tbl.a from tbl"),
STORE_TEST_CASE("column", "like", ".*a", true, NULL, "SELECT db.tbl.a from tbl"),
STORE_TEST_CASE("column", "like", ".*aa", false, NULL, "SELECT a from tbl"),
STORE_TEST_CASE("column", "like", ".*aa", false, NULL, "SELECT tbl.a from tbl"),
STORE_TEST_CASE("column", "like", ".*aa", false, NULL, "SELECT db.tbl.a from tbl"),
STORE_TEST_CASE("column", "unlike", ".*aa", true, NULL, "SELECT a from tbl"),
STORE_TEST_CASE("column", "unlike", ".*aa", true, NULL, "SELECT tbl.a from tbl"),
STORE_TEST_CASE("column", "unlike", ".*aa", true, NULL, "SELECT db.tbl.a from tbl"),
STORE_TEST_CASE("table", "=", "tbl", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("table", "!=", "tbl", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("table", "=", "tbl2", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("table", "!=", "tbl2", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("table", "=", "db.tbl", true, NULL, "SELECT a from db.tbl"),
STORE_TEST_CASE("table", "=", "db.tbl", true, "db", "SELECT a from tbl"),
STORE_TEST_CASE("table", "!=", "db.tbl", false, NULL, "SELECT a from db.tbl"),
STORE_TEST_CASE("table", "!=", "db.tbl", false, "db", "SELECT a from tbl"),
STORE_TEST_CASE("database", "=", "db", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("database", "!=", "db", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("database", "=", "db1", true, NULL, "SELECT a FROM db1.tbl"),
STORE_TEST_CASE("database", "!=", "db1", false, NULL, "SELECT a FROM db1.tbl"),
STORE_TEST_CASE("database", "=", "db1", true, "db1", "SELECT a FROM tbl"),
STORE_TEST_CASE("database", "!=", "db1", false, "db1", "SELECT a FROM tbl"),
STORE_TEST_CASE("query", "=", "SELECT a FROM tbl", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("query", "!=", "SELECT a FROM tbl", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("query", "=", "SELECT b FROM tbl", false, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("query", "!=", "SELECT b FROM tbl", true, NULL, "SELECT a FROM tbl"),
STORE_TEST_CASE("column", "=", "a", false, NULL, "SELECT b FROM tbl WHERE a = 5"),
STORE_TEST_CASE("column", "=", "a", true, NULL, "SELECT a, b FROM tbl WHERE a = 5"),
};
const size_t n_store_test_cases = sizeof(store_test_cases) / sizeof(store_test_cases[0]);
@ -144,6 +182,7 @@ int test_store()
for (int i = 0; i < n_store_test_cases; ++i)
{
printf("TC : %d\n", i + 1);
const struct store_test_case *test_case = &store_test_cases[i];
CACHE_RULES *rules = cache_rules_parse(test_case->rule, 0);
@ -160,10 +199,12 @@ int test_store()
{
printf("Query : %s\n"
"Rule : %s\n"
"Def-db : %s\n"
"Expected: %s\n"
"Result : %s\n\n",
test_case->query,
test_case->rule,
test_case->default_db,
test_case->matches ? "A match" : "Not a match",
matches ? "A match" : "Not a match");
}