30 lines
1.2 KiB
SQL
30 lines
1.2 KiB
SQL
# ***Case Data***
|
|
# min = 1, max = 20, ndv = 20, null_num = 50
|
|
# num_row = 250, density = 0.0025, bucket count = 200
|
|
# the ratio of not null row is (250-50)/250 = 0.8
|
|
# | val | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
|
|
# | cnt | 5 | 14 | 13 | 16 | 9 | 7 | 10 | 13 | 15 | 1 | 5 | 6 | 10 | 9 | 9 | 12 | 21 | 11 | 11 | 3 |
|
|
# | acc | 5 | 19 | 32 | 48 | 57 | 64 | 74 | 87 | 102 | 103 | 108 | 114 | 124 | 133 | 142 | 154 | 175 | 186 | 197 | 200 |
|
|
|
|
density * num_null = 0.002
|
|
select c1 from t1 where c1 = 10;
|
|
density * num_null = 0.002
|
|
select c1 from t1 where c1 = 5.5;
|
|
density * num_null = 0.002
|
|
select c1 from t1 where c1 > 20;
|
|
density * num_null = 0.002
|
|
select c1 from t1 where c1 < 1;
|
|
|
|
5/200 * 0.8 = 0.02
|
|
select c1 from t1 where c1 = 1;
|
|
(5/200 + 0.0025) * 0.8 = 0.022
|
|
select c1 from t1 where c1 <= 1;
|
|
(103 - 5) / 200 * 0.8 = 0.392
|
|
select c1 from t1 where c1 >= 2 and c1 <= 10;
|
|
# TODO: 可能需要改造 query range
|
|
# ((103 - 5) / 200 + 0.0025) * 0.8 = 0.393
|
|
((103 - 5) / 200) * 0.8 = 0.392
|
|
select c1 from t1 where c1 > 1.5 and c1 <= 10;
|
|
((103 - 5) / 200 + 0.0025) * 0.8 = 0.393
|
|
select c1 from t1 where c1 >= 2 and c1 < 10.5;
|