CREATE TABLE `access_path_selection` ( `a` int, `b` int, KEY `IDX_a` (`a`), KEY `IDX_b` (`b`), KEY `IDX_ab` (`a`, `b`) ); explain select a from access_path_selection where a < 3; # In this query, IDX_ab is better than IDX_a. # The reason is that we have to do double scan if we use IDX_a since it doesn't contain column b. explain select a, b from access_path_selection where a < 3; # In this query, IDX_ab can't be used, so IDX_b is the best. explain select a, b from access_path_selection where b < 3; explain select a, b from access_path_selection where a < 3 and b < 3; CREATE TABLE `outdated_statistics` ( `a` int, `b` int, `c` int, INDEX idx_a(a), INDEX idx_ab(a,b) ); insert into outdated_statistics values (2, 2, 2); insert into outdated_statistics values (3, 3, 3); insert into outdated_statistics values (4, 4, 4); analyze table outdated_statistics; insert into outdated_statistics values (1, 1, 1); insert into outdated_statistics values (1, 2, 2); insert into outdated_statistics values (1, 3, 3); # Only update idx_ab and leave idx_a outdated. # Then the estimated number of rows on idx_ab is 1 while it's 0 on idx_a. # But for this query, idx_ab is always better than idx_a, # because idx_a can't take column b into account while idx_ab can. # This wrong case can be solved by Skyline Pruning, so we may update its # result after Skyline Pruning is introduced. analyze table outdated_statistics index idx_ab; explain select * from outdated_statistics where a=1 and b=1 and c=1; CREATE TABLE `unknown_correlation` ( id int, a int, PRIMARY KEY (`id`), INDEX idx_a(a) ); INSERT INTO unknown_correlation values (1, 1),(2, 1),(3, 1),(4, 1),(5, 1),(6, 1),(7, 1),(8, 1),(9, 1),(10, 1),(11, 1),(12, 1),(13, 1),(14, 1),(15, 1),(16, 1),(17, 1),(18, 1),(19, 1),(20, 2),(21, 2),(22, 2),(23, 2),(24, 2),(25, 2); ANALYZE TABLE unknown_correlation; # Estimated row count on idx_a is 6, # while the estimated row count on TableScan is 4.17, which is computed as below: # selectivity := ds.stats.RowCount / rowCount ==> selectivity = 6 / 25 # rowCount := prop.ExpectedCnt / selectivity ==> rowCount = 1 / (6 / 25) = 4.17 # Then the planner will pick TableScan instead of IndexScan to execute. # But actually the cost of TableScan is 25-6+1=20 under this correlation between `id` and `a`. # So IndexScan is better than TableScan for this query, but the planner do a wrong choice. # This problem can be solved by introducing correlation between columns, # so we may update this case later. EXPLAIN SELECT * FROM unknown_correlation WHERE a = 2 ORDER BY id limit 1;