tidb/cmd/explaintest/t/access_path_selection.test

CREATE TABLE `access_path_selection` (
`a` int,
`b` int,
KEY `IDX_a` (`a`),
KEY `IDX_b` (`b`),
KEY `IDX_ab` (`a`, `b`)
);
explain select a from access_path_selection where a < 3;
# In this query, IDX_ab is better than IDX_a.
# The reason is that we have to do double scan if we use IDX_a since it doesn't contain column b.
explain select a, b from access_path_selection where a < 3;
# In this query, IDX_ab can't be used, so IDX_b is the best.
explain select a, b from access_path_selection where b < 3;
explain select a, b from access_path_selection where a < 3 and b < 3;

CREATE TABLE `outdated_statistics` (
`a` int,
`b` int,
`c` int,
INDEX idx_a(a),
INDEX idx_ab(a,b)
);
insert into outdated_statistics values (2, 2, 2);
insert into outdated_statistics values (3, 3, 3);
insert into outdated_statistics values (4, 4, 4);
analyze table outdated_statistics;
insert into outdated_statistics values (1, 1, 1);
insert into outdated_statistics values (1, 2, 2);
insert into outdated_statistics values (1, 3, 3);
# Only update idx_ab and leave idx_a outdated.
# Then the estimated number of rows on idx_ab is 1 while it's 0 on idx_a.
# But for this query, idx_ab is always better than idx_a,
# because idx_a can't take column b into account while idx_ab can.
# This wrong case can be solved by Skyline Pruning, so we may update its
# result after Skyline Pruning is introduced.
analyze table outdated_statistics index idx_ab;
explain select * from outdated_statistics where a=1 and b=1 and c=1;

CREATE TABLE `unknown_correlation` (
    id int,
    a int,
    PRIMARY KEY (`id`),
    INDEX idx_a(a)
);
INSERT INTO unknown_correlation values (1, 1),(2, 1),(3, 1),(4, 1),(5, 1),(6, 1),(7, 1),(8, 1),(9, 1),(10, 1),(11, 1),(12, 1),(13, 1),(14, 1),(15, 1),(16, 1),(17, 1),(18, 1),(19, 1),(20, 2),(21, 2),(22, 2),(23, 2),(24, 2),(25, 2);
ANALYZE TABLE unknown_correlation;
# Estimated row count on idx_a is 6,
# while the estimated row count on TableScan is 4.17, which is computed as below:
#   selectivity := ds.stats.RowCount / rowCount   ==> selectivity = 6 / 25
#   rowCount := prop.ExpectedCnt / selectivity    ==> rowCount = 1 / (6 / 25) = 4.17
# Then the planner will pick TableScan instead of IndexScan to execute.
# But actually the cost of TableScan is 25-6+1=20 under this correlation between `id` and `a`.
# So IndexScan is better than TableScan for this query, but the planner do a wrong choice.
# This problem can be solved by introducing correlation between columns,
# so we may update this case later.
EXPLAIN SELECT * FROM unknown_correlation WHERE a = 2 ORDER BY id limit 1;