57 lines
2.5 KiB
Plaintext
57 lines
2.5 KiB
Plaintext
CREATE TABLE `access_path_selection` (
|
|
`a` int,
|
|
`b` int,
|
|
KEY `IDX_a` (`a`),
|
|
KEY `IDX_b` (`b`),
|
|
KEY `IDX_ab` (`a`, `b`)
|
|
);
|
|
explain select a from access_path_selection where a < 3;
|
|
# In this query, IDX_ab is better than IDX_a.
|
|
# The reason is that we have to do double scan if we use IDX_a since it doesn't contain column b.
|
|
explain select a, b from access_path_selection where a < 3;
|
|
# In this query, IDX_ab can't be used, so IDX_b is the best.
|
|
explain select a, b from access_path_selection where b < 3;
|
|
explain select a, b from access_path_selection where a < 3 and b < 3;
|
|
|
|
CREATE TABLE `outdated_statistics` (
|
|
`a` int,
|
|
`b` int,
|
|
`c` int,
|
|
INDEX idx_a(a),
|
|
INDEX idx_ab(a,b)
|
|
);
|
|
insert into outdated_statistics values (2, 2, 2);
|
|
insert into outdated_statistics values (3, 3, 3);
|
|
insert into outdated_statistics values (4, 4, 4);
|
|
analyze table outdated_statistics;
|
|
insert into outdated_statistics values (1, 1, 1);
|
|
insert into outdated_statistics values (1, 2, 2);
|
|
insert into outdated_statistics values (1, 3, 3);
|
|
# Only update idx_ab and leave idx_a outdated.
|
|
# Then the estimated number of rows on idx_ab is 1 while it's 0 on idx_a.
|
|
# But for this query, idx_ab is always better than idx_a,
|
|
# because idx_a can't take column b into account while idx_ab can.
|
|
# This wrong case can be solved by Skyline Pruning, so we may update its
|
|
# result after Skyline Pruning is introduced.
|
|
analyze table outdated_statistics index idx_ab;
|
|
explain select * from outdated_statistics where a=1 and b=1 and c=1;
|
|
|
|
CREATE TABLE `unknown_correlation` (
|
|
id int,
|
|
a int,
|
|
PRIMARY KEY (`id`),
|
|
INDEX idx_a(a)
|
|
);
|
|
INSERT INTO unknown_correlation values (1, 1),(2, 1),(3, 1),(4, 1),(5, 1),(6, 1),(7, 1),(8, 1),(9, 1),(10, 1),(11, 1),(12, 1),(13, 1),(14, 1),(15, 1),(16, 1),(17, 1),(18, 1),(19, 1),(20, 2),(21, 2),(22, 2),(23, 2),(24, 2),(25, 2);
|
|
ANALYZE TABLE unknown_correlation;
|
|
# Estimated row count on idx_a is 6,
|
|
# while the estimated row count on TableScan is 4.17, which is computed as below:
|
|
# selectivity := ds.stats.RowCount / rowCount ==> selectivity = 6 / 25
|
|
# rowCount := prop.ExpectedCnt / selectivity ==> rowCount = 1 / (6 / 25) = 4.17
|
|
# Then the planner will pick TableScan instead of IndexScan to execute.
|
|
# But actually the cost of TableScan is 25-6+1=20 under this correlation between `id` and `a`.
|
|
# So IndexScan is better than TableScan for this query, but the planner do a wrong choice.
|
|
# This problem can be solved by introducing correlation between columns,
|
|
# so we may update this case later.
|
|
EXPLAIN SELECT * FROM unknown_correlation WHERE a = 2 ORDER BY id limit 1;
|