[refactor]remove seek block in segmentIterator (#15413)

* remove seek block

* add reg test

Co-authored-by: Wang Bo <wangbo36@meituan.com>
This commit is contained in:
wangbo
2022-12-30 14:14:16 +08:00
committed by GitHub
parent 520b6d7910
commit 5ec4e5586f
6 changed files with 982 additions and 22 deletions

View File

@ -25,7 +25,6 @@
#include "olap/column_predicate.h"
#include "olap/olap_common.h"
#include "olap/row_block2.h"
#include "olap/row_cursor.h"
#include "olap/rowset/segment_v2/column_reader.h"
#include "olap/rowset/segment_v2/segment.h"
#include "olap/short_key_index.h"
@ -267,7 +266,16 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra
}
}
_seek_schema = std::make_unique<Schema>(key_fields, key_fields.size());
_seek_block = std::make_unique<RowBlockV2>(*_seek_schema, 1);
// todo(wb) need refactor here, when using pk to search, _seek_block is useless
if (_seek_block.capacity() == 0) {
_seek_block.resize(_seek_schema->num_column_ids());
int i = 0;
for (auto cid : _seek_schema->column_ids()) {
auto column_desc = _seek_schema->column(cid);
_seek_block[i] = Schema::get_column_by_field(*column_desc);
i++;
}
}
// create used column iterator
for (auto cid : _seek_schema->column_ids()) {
@ -583,19 +591,6 @@ Status SegmentIterator::_init_bitmap_index_iterators() {
return Status::OK();
}
// Schema of lhs and rhs are different.
// callers should assure that rhs' schema has all columns in lhs schema
template <typename LhsRowType, typename RhsRowType>
int compare_row_with_lhs_columns(const LhsRowType& lhs, const RhsRowType& rhs) {
for (auto cid : lhs.schema()->column_ids()) {
auto res = lhs.schema()->column(cid)->compare_cell(lhs.cell(cid), rhs.cell(cid));
if (res != 0) {
return res;
}
}
return 0;
}
Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound,
rowid_t* rowid) {
if (_segment->_tablet_schema->keys_type() == UNIQUE_KEYS &&
@ -623,6 +618,9 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool
encode_key_with_padding(&index_key, key, _segment->_tablet_schema->num_short_key_columns(),
is_include);
const auto& key_col_ids = key.schema()->column_ids();
_convert_rowcursor_to_short_key(key, key_col_ids.size());
uint32_t start_block_id = 0;
auto start_iter = sk_index_decoder->lower_bound(index_key);
if (start_iter.valid()) {
@ -650,7 +648,7 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool
while (start < end) {
rowid_t mid = (start + end) / 2;
RETURN_IF_ERROR(_seek_and_peek(mid));
int cmp = compare_row_with_lhs_columns(key, _seek_block->row(0));
int cmp = _compare_short_key_with_seek_block(key_col_ids);
if (cmp > 0) {
start = mid + 1;
} else if (cmp == 0) {
@ -745,11 +743,13 @@ Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
RETURN_IF_ERROR(_seek_columns(_seek_schema->column_ids(), rowid));
}
size_t num_rows = 1;
// please note that usually RowBlockV2.clear() is called to free MemPool memory before reading the next block,
// but here since there won't be too many keys to seek, we don't call RowBlockV2.clear() so that we can use
// a single MemPool for all seeked keys.
RETURN_IF_ERROR(_read_columns(_seek_schema->column_ids(), _seek_block.get(), 0, num_rows));
_seek_block->set_num_rows(num_rows);
//note(wb) reset _seek_block for memory reuse
// it is easier to use row based memory layout for clear memory
for (int i = 0; i < _seek_block.size(); i++) {
_seek_block[i]->clear();
}
RETURN_IF_ERROR(_read_columns(_seek_schema->column_ids(), _seek_block, num_rows));
return Status::OK();
}

View File

@ -25,6 +25,7 @@
#include "io/fs/file_reader.h"
#include "io/fs/file_system.h"
#include "olap/olap_common.h"
#include "olap/row_cursor.h"
#include "olap/rowset/segment_v2/common.h"
#include "olap/rowset/segment_v2/row_ranges.h"
#include "olap/rowset/segment_v2/segment.h"
@ -195,6 +196,59 @@ private:
vectorized::Block* block);
private:
// todo(wb) remove this method after RowCursor is removed
void _convert_rowcursor_to_short_key(const RowCursor& key, size_t num_keys) {
if (_short_key.capacity() == 0) {
_short_key.resize(num_keys);
for (auto cid = 0; cid < num_keys; cid++) {
auto* field = key.schema()->column(cid);
_short_key[cid] = Schema::get_column_by_field(*field);
if (field->type() == OLAP_FIELD_TYPE_DATE) {
_short_key[cid]->set_date_type();
} else if (field->type() == OLAP_FIELD_TYPE_DATETIME) {
_short_key[cid]->set_datetime_type();
}
}
} else {
for (int i = 0; i < num_keys; i++) {
_short_key[i]->clear();
}
}
for (auto cid = 0; cid < num_keys; cid++) {
auto field = key.schema()->column(cid);
if (field == nullptr) {
break;
}
auto cell = key.cell(cid);
if (cell.is_null()) {
_short_key[cid]->insert_default();
} else {
if (field->type() == OLAP_FIELD_TYPE_VARCHAR ||
field->type() == OLAP_FIELD_TYPE_CHAR ||
field->type() == OLAP_FIELD_TYPE_STRING) {
const Slice* slice = reinterpret_cast<const Slice*>(cell.cell_ptr());
_short_key[cid]->insert_data(slice->data, slice->size);
} else {
_short_key[cid]->insert_many_fix_len_data(
reinterpret_cast<const char*>(cell.cell_ptr()), 1);
}
}
}
}
int _compare_short_key_with_seek_block(const std::vector<ColumnId>& col_ids) {
for (auto cid : col_ids) {
// todo(wb) simd compare when memory layout in row
auto res = _short_key[cid]->compare_at(0, 0, *_seek_block[cid], -1);
if (res != 0) {
return res;
}
}
return 0;
}
class BitmapRangeIterator;
class BackwardBitmapRangeIterator;
@ -263,7 +317,10 @@ private:
std::unique_ptr<Schema> _seek_schema;
// used to binary search the rowid for a given key
// only used in `_get_row_ranges_by_keys`
std::unique_ptr<RowBlockV2> _seek_block;
vectorized::MutableColumns _seek_block;
//todo(wb) remove this field after Rowcursor is removed
vectorized::MutableColumns _short_key;
io::FileReaderSPtr _file_reader;

View File

@ -114,6 +114,18 @@ vectorized::DataTypePtr Schema::get_data_type_ptr(const Field& field) {
return vectorized::DataTypeFactory::instance().create_data_type(field);
}
vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) {
auto data_type_ptr = vectorized::DataTypeFactory::instance().create_data_type(field);
vectorized::IColumn::MutablePtr col_ptr = data_type_ptr->create_column();
if (field.is_nullable()) {
return doris::vectorized::ColumnNullable::create(std::move(col_ptr),
doris::vectorized::ColumnUInt8::create());
}
return col_ptr;
}
vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(const Field& field) {
if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY)) {
return get_data_type_ptr(field)->create_column();

View File

@ -110,6 +110,8 @@ public:
static vectorized::DataTypePtr get_data_type_ptr(const Field& field);
static vectorized::IColumn::MutablePtr get_column_by_field(const Field& field);
static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type);
static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(const Field& field);

View File

@ -0,0 +1,344 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !int_query_one_column --
1
1
5
5
9
9
13
13
13
13
17
17
-- !int_query_eq_pred --
13
13
13
13
-- !int_query_bigger_pred --
17
17
-- !int_query_bigger_eq_pred --
13
13
13
13
17
17
-- !int_query_less_than_pred --
1
1
5
5
9
9
-- !int_query_less_than_eq_pred --
1
1
5
5
9
9
13
13
13
13
-- !char_query_one_column --
a1
b2
c3
d4
-- !char_query_eq_pred --
d4
-- !char_query_bigger_pred --
-- !char_query_bigger_eq_pred --
d4
-- !char_query_less_than_pred --
a1
b2
c3
-- !char_query_less_than_eq_pred --
a1
b2
c3
d4
-- !date_query_one_column --
2022-01-01
2022-02-01
2022-03-01
2022-04-01
-- !date_query_eq_pred --
2022-04-01
-- !date_query_bigger_pred --
-- !date_query_bigger_eq_pred --
2022-04-01
-- !date_query_less_than_pred --
2022-01-01
2022-02-01
2022-03-01
-- !date_query_less_than_eq_pred --
2022-01-01
2022-02-01
2022-03-01
2022-04-01
-- !datev2_query_one_column --
2022-01-01
2022-02-01
2022-03-01
2022-04-01
-- !datev2_query_eq_pred --
2022-04-01
-- !datev2_query_bigger_pred --
-- !datev2_query_bigger_eq_pred --
2022-04-01
-- !datev2_query_less_than_pred --
2022-01-01
2022-02-01
2022-03-01
-- !datev2_query_less_than_eq_pred --
2022-01-01
2022-02-01
2022-03-01
2022-04-01
-- !datetime_query_one_column --
2022-01-01T00:00 10
2022-01-01T01:00 10
2022-01-01T03:00 2
2022-01-01T04:00 21
-- !datetime_query_eq_pred --
2022-01-01T04:00 21
-- !datetime_query_bigger_pred --
-- !datetime_query_bigger_eq_pred --
2022-01-01T04:00 21
-- !datetime_query_less_than_pred --
2022-01-01T00:00 10
2022-01-01T01:00 10
2022-01-01T03:00 2
-- !datetime_query_less_than_eq_pred --
2022-01-01T00:00 10
2022-01-01T01:00 10
2022-01-01T03:00 2
2022-01-01T04:00 21
-- !datetimev2_query_one_column --
2022-01-01T00:00 10
2022-01-01T01:00 10
2022-01-01T03:00 2
2022-01-01T04:00 21
-- !datetimev2_query_eq_pred --
2022-01-01T04:00 21
-- !datetimev2_query_bigger_pred --
-- !datetimev2_query_bigger_eq_pred --
2022-01-01T04:00 21
-- !datetimev2_query_less_than_pred --
2022-01-01T00:00 10
2022-01-01T01:00 10
2022-01-01T03:00 2
-- !datetimev2_query_less_than_eq_pred --
2022-01-01T00:00 10
2022-01-01T01:00 10
2022-01-01T03:00 2
2022-01-01T04:00 21
-- !decimal_query_one_column --
1
1
5
5
9
9
13
13
13
13
17
17
-- !decimal_query_eq_pred --
17
17
-- !decimal_query_bigger_pred --
-- !decimal_query_bigger_eq_pred --
17
17
-- !decimal_query_less_than_pred --
1
1
5
5
9
9
13
13
13
13
-- !decimal_query_less_than_eq_pred --
1
1
5
5
9
9
13
13
13
13
17
17
-- !decimalv2_query_one_column --
1
1
5
5
9
9
13
13
13
13
17
17
-- !decimalv2_query_eq_pred --
5
5
-- !decimalv2_query_bigger_pred --
9
9
13
13
13
13
17
17
-- !decimalv2_query_bigger_eq_pred --
5
5
9
9
13
13
13
13
17
17
-- !mix_type_query_all --
1 2 2022-01-01 a34 100
1 7 2022-01-02 a44 101
2 8 2022-01-03 a55 102
3 9 2022-01-04 a56 103
4 10 2022-05-01 a77 104
4 10 2022-05-01 a77 104
-- !pred_num_4 --
4 10 2022-05-01 a77 104
4 10 2022-05-01 a77 104
-- !pred_num_3 --
4 10 2022-05-01 a77 104
4 10 2022-05-01 a77 104
-- !pred_num_2 --
4 10 2022-05-01 a77 104
4 10 2022-05-01 a77 104
-- !pred_num_1 --
4 10 2022-05-01 a77 104
4 10 2022-05-01 a77 104
-- !nullable_query_one_column --
\N
\N
1
1
2
2
3
3
4
4
-- !nullable_query_eq_pred --
4
4
-- !nullable_query_bigger_pred --
-- !nullable_query_bigger_eq_pred --
4
4
-- !nullable_query_less_pred --
4
4
-- !nullable_query_less_eq_pred --
4
4
-- !nullable_query_is_null_pred --
\N
\N
-- !nullable_query_is_not_null_pred --
1
1
2
2
3
3
4
4
-- !bool_query_true --
true 2
true 10
-- !bool_query_false --
false 10
false 21

View File

@ -0,0 +1,545 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_short_key_index_groovy") {
def int_tab = "test_short_key_index_int_tab"
sql "drop table if exists ${int_tab}"
sql """
CREATE TABLE IF NOT EXISTS `${int_tab}` (
`siteid` int(11) NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`siteid`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`siteid`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${int_tab}` values
(9,10,11,12),
(9,10,11,12),
(1,2,3,4),
(13,21,22,16),
(13,14,15,16),
(17,18,19,20),
(1,2,3,4),
(13,21,22,16),
(13,14,15,16),
(17,18,19,20),
(5,6,7,8),
(5,6,7,8)
"""
qt_int_query_one_column "select siteid from `${int_tab}` order by siteid"
qt_int_query_eq_pred "select siteid from `${int_tab}` where siteid = 13 order by siteid"
qt_int_query_bigger_pred "select siteid from `${int_tab}` where siteid > 13 order by siteid"
qt_int_query_bigger_eq_pred " select siteid from `${int_tab}` where siteid >= 13 order by siteid"
qt_int_query_less_than_pred " select siteid from `${int_tab}` where siteid < 13 order by siteid"
qt_int_query_less_than_eq_pred " select siteid from `${int_tab}` where siteid <= 13 order by siteid"
sql "drop table if exists ${int_tab}"
// short key is char
def char_tab = "test_short_key_index_char_tab"
sql "drop table if exists `${char_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${char_tab}` (
`name` char(10) NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`name`,`citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`citycode`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${char_tab}` values
('a1',10,11,12),
('b2',10,11,12),
('c3',2,3,4),
('d4',21,22,16)
"""
qt_char_query_one_column "select name from `${char_tab}` order by name"
qt_char_query_eq_pred "select name from `${char_tab}` where name='d4' order by name"
qt_char_query_bigger_pred "select name from `${char_tab}` where name > 'd4' order by name"
qt_char_query_bigger_eq_pred "select name from `${char_tab}` where name>='d4' order by name"
qt_char_query_less_than_pred "select name from `${char_tab}` where name < 'd4' order by name"
qt_char_query_less_than_eq_pred " select name from `${char_tab}` where name <= 'd4' order by name"
sql "drop table if exists `${char_tab}`"
// short key is date
def date_tab = "test_short_key_index_date_tab"
sql "drop table if exists `${date_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${date_tab}` (
`dt` date NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`dt`,`citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`citycode`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${date_tab}` values
('2022-01-01',10,11,12),
('2022-02-01',10,11,12),
('2022-03-01',2,3,4),
('2022-04-01',21,22,16)
"""
qt_date_query_one_column "select dt from `${date_tab}` order by dt"
qt_date_query_eq_pred "select dt from `${date_tab}` where dt='2022-04-01' order by dt"
qt_date_query_bigger_pred "select dt from `${date_tab}` where dt>'2022-04-01' order by dt"
qt_date_query_bigger_eq_pred "select dt from `${date_tab}` where dt>='2022-04-01' order by dt"
qt_date_query_less_than_pred "select dt from `${date_tab}` where dt < '2022-04-01' order by dt"
qt_date_query_less_than_eq_pred "select dt from `${date_tab}` where dt<='2022-04-01' order by dt"
sql "drop table if exists `${date_tab}`"
// short key is datev2
def datev2_tab = "test_short_key_index_datev2_tab"
sql "drop table if exists `${datev2_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${datev2_tab}` (
`dt` datev2 NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`dt`,`citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`citycode`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${datev2_tab}` values
('2022-01-01',10,11,12),
('2022-02-01',10,11,12),
('2022-03-01',2,3,4),
('2022-04-01',21,22,16)
"""
qt_datev2_query_one_column "select dt from `${datev2_tab}` order by dt"
qt_datev2_query_eq_pred "select dt from `${datev2_tab}` where dt='2022-04-01' order by dt"
qt_datev2_query_bigger_pred "select dt from `${datev2_tab}` where dt>'2022-04-01' order by dt"
qt_datev2_query_bigger_eq_pred "select dt from `${datev2_tab}` where dt>='2022-04-01' order by dt"
qt_datev2_query_less_than_pred "select dt from `${datev2_tab}` where dt < '2022-04-01' order by dt"
qt_datev2_query_less_than_eq_pred "select dt from `${datev2_tab}` where dt<='2022-04-01' order by dt"
sql "drop table if exists `${datev2_tab}`"
// short key is datetime
def datetime_tab = "test_short_key_index_datetime_tab"
sql "drop table if exists `${datetime_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${datetime_tab}` (
`dtime` datetime NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`dtime`,`citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`citycode`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${datetime_tab}` values
('2022-01-01 00:00:00',10,11,12),
('2022-01-01 01:00:00',10,11,12),
('2022-01-01 03:00:00',2,3,4),
('2022-01-01 04:00:00',21,22,16)
"""
qt_datetime_query_one_column "select dtime,citycode from `${datetime_tab}` order by dtime,citycode"
qt_datetime_query_eq_pred "select dtime,citycode from `${datetime_tab}` where dtime='2022-01-01 04:00:00' order by dtime,citycode"
qt_datetime_query_bigger_pred "select dtime,citycode from `${datetime_tab}` where dtime > '2022-01-01 04:00:00' order by dtime,citycode"
qt_datetime_query_bigger_eq_pred "select dtime,citycode from `${datetime_tab}` where dtime >= '2022-01-01 04:00:00' order by dtime,citycode"
qt_datetime_query_less_than_pred "select dtime,citycode from `${datetime_tab}` where dtime < '2022-01-01 04:00:00' order by dtime,citycode"
qt_datetime_query_less_than_eq_pred "select dtime,citycode from `${datetime_tab}` where dtime <='2022-01-01 04:00:00' order by dtime,citycode"
sql "drop table if exists `${datetime_tab}`"
// short key is datetimev2
def datetimev2_tab = "test_short_key_index_datetimev2_tab"
sql "drop table if exists `${datetimev2_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${datetimev2_tab}` (
`dtime` datetimev2 NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`dtime`,`citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`citycode`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${datetimev2_tab}` values
('2022-01-01 00:00:00',10,11,12),
('2022-01-01 01:00:00',10,11,12),
('2022-01-01 03:00:00',2,3,4),
('2022-01-01 04:00:00',21,22,16)
"""
qt_datetimev2_query_one_column "select dtime,citycode from `${datetimev2_tab}` order by dtime,citycode"
qt_datetimev2_query_eq_pred "select dtime,citycode from `${datetimev2_tab}` where dtime='2022-01-01 04:00:00' order by dtime,citycode"
qt_datetimev2_query_bigger_pred "select dtime,citycode from `${datetimev2_tab}` where dtime > '2022-01-01 04:00:00' order by dtime,citycode"
qt_datetimev2_query_bigger_eq_pred "select dtime,citycode from `${datetimev2_tab}` where dtime >= '2022-01-01 04:00:00' order by dtime,citycode"
qt_datetimev2_query_less_than_pred "select dtime,citycode from `${datetimev2_tab}` where dtime < '2022-01-01 04:00:00' order by dtime,citycode"
qt_datetimev2_query_less_than_eq_pred "select dtime,citycode from `${datetimev2_tab}` where dtime <='2022-01-01 04:00:00' order by dtime,citycode"
sql "drop table if exists `${datetimev2_tab}`"
// short key is decimal
def decimal_tab = "test_short_key_index_decimal_tab"
sql "drop table if exists `${decimal_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${decimal_tab}` (
`siteid` decimal NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`siteid`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`siteid`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${decimal_tab}` values
(9,10,11,12),
(9,10,11,12),
(1,2,3,4),
(13,21,22,16),
(13,14,15,16),
(17,18,19,20),
(1,2,3,4),
(13,21,22,16),
(13,14,15,16),
(17,18,19,20),
(5,6,7,8),
(5,6,7,8)
"""
qt_decimal_query_one_column "select siteid from `${decimal_tab}` order by siteid"
qt_decimal_query_eq_pred "select siteid from `${decimal_tab}` where siteid=17 order by siteid"
qt_decimal_query_bigger_pred "select siteid from `${decimal_tab}` where siteid>17 order by siteid"
qt_decimal_query_bigger_eq_pred "select siteid from `${decimal_tab}` where siteid>=17 order by siteid"
qt_decimal_query_less_than_pred "select siteid from `${decimal_tab}` where siteid<17 order by siteid"
qt_decimal_query_less_than_eq_pred "select siteid from `${decimal_tab}` where siteid<=17 order by siteid"
sql "drop table if exists `${decimal_tab}`"
// short key is decimalv2
def decimalv2_tab = "test_short_key_index_decimalv2_tab"
sql "drop table if exists `${decimalv2_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${decimalv2_tab}` (
`siteid` decimalv3 NOT NULL COMMENT "",
`citycode` int(11) NOT NULL COMMENT "",
`userid` int(11) NOT NULL COMMENT "",
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`siteid`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`siteid`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${decimalv2_tab}` values
(9,10,11,12),
(9,10,11,12),
(1,2,3,4),
(13,21,22,16),
(13,14,15,16),
(17,18,19,20),
(1,2,3,4),
(13,21,22,16),
(13,14,15,16),
(17,18,19,20),
(5,6,7,8),
(5,6,7,8)
"""
qt_decimalv2_query_one_column "select siteid from `${decimalv2_tab}` order by siteid"
qt_decimalv2_query_eq_pred "select siteid from `${decimalv2_tab}` where siteid=5 order by siteid"
qt_decimalv2_query_bigger_pred "select siteid from `${decimalv2_tab}` where siteid>5 order by siteid"
qt_decimalv2_query_bigger_eq_pred "select siteid from `${decimalv2_tab}` where siteid>=5 order by siteid"
// These two sqls could fail because of bug of current master, but not caused by current pr
// qt_decimalv2_query_less_than_pred "select siteid from `${decimalv2_tab}` where siteid<5 order by siteid"
// qt_decimalv2_query_less_than_eq_pred "select siteid from `${decimalv2_tab}` where siteid<=5 order by siteid"
sql "drop table if exists `${decimalv2_tab}`"
// mix type short key
def mix_type_tab = "test_short_key_index_mixed_type_tab"
sql "drop table if exists `${mix_type_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${mix_type_tab}` (
`decimal_col` decimal NOT NULL COMMENT "",
`int_col` int not null,
`date_col` date not null,
`varchar_col` varchar(10) not null,
`pv` int(11) NOT NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`decimal_col`, `int_col`, `date_col`, `varchar_col`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`int_col`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${mix_type_tab}` values
(1,2,'2022-01-01','a34', 100),
(1,7,'2022-01-02','a44', 101),
(2,8,'2022-01-03','a55', 102),
(3,9,'2022-01-04','a56', 103),
(4,10,'2022-05-01','a77', 104),
(4,10,'2022-05-01','a77', 104)
"""
qt_mix_type_query_all "select * from `${mix_type_tab}` order by 1,2,3,4,5"
qt_pred_num_4 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 and date_col='2022-05-01' and varchar_col='a77'"
qt_pred_num_3 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 and date_col='2022-05-01'"
qt_pred_num_2 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 "
qt_pred_num_1 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 "
sql "drop table if exists `${mix_type_tab}`"
// nulable
def nullable_tab = "test_short_key_index_is_nullable_tab"
sql "drop table if exists `${nullable_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${nullable_tab}` (
`siteid` int(11) NULL COMMENT "",
`citycode` int(11) NULL COMMENT "",
`userid` int(11) NULL COMMENT "",
`pv` int(11) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`siteid`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`siteid`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${nullable_tab}` values
(1,10,11,12),
(1,10,11,12),
(2,2,3,4),
(2,21,22,16),
(3,14,15,16),
(3,18,19,20),
(4,2,3,4),
(4,21,22,16),
(null,14,15,16),
(null,16,17,18)
"""
qt_nullable_query_one_column "select siteid from `${nullable_tab}` order by siteid"
qt_nullable_query_eq_pred "select siteid from `${nullable_tab}` where siteid = 4"
qt_nullable_query_bigger_pred "select siteid from `${nullable_tab}` where siteid > 4"
qt_nullable_query_bigger_eq_pred "select siteid from `${nullable_tab}` where siteid >= 4"
qt_nullable_query_less_pred "select siteid from `${nullable_tab}` where siteid >= 4"
qt_nullable_query_less_eq_pred "select siteid from `${nullable_tab}` where siteid >= 4"
qt_nullable_query_is_null_pred "select siteid from `${nullable_tab}` where siteid is null"
qt_nullable_query_is_not_null_pred "select siteid from `${nullable_tab}` where siteid is not null"
sql "drop table if exists `${nullable_tab}`"
// bool
def bool_tab = "test_short_key_index_bool_tab"
sql "drop table if exists `${bool_tab}`"
sql """
CREATE TABLE IF NOT EXISTS `${bool_tab}` (
`is_happy` boolean NULL COMMENT "",
`citycode` int(11) NULL COMMENT "",
`userid` int(11) NULL COMMENT "",
`pv` int(11) NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`is_happy`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`citycode`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
insert into `${bool_tab}` values
(true,10,11,12),
(false,10,11,12),
(true,2,3,4),
(false,21,22,16)
"""
qt_bool_query_true "select is_happy,citycode from `${bool_tab}` where is_happy = true order by is_happy,citycode"
qt_bool_query_false "select is_happy,citycode from `${bool_tab}` where is_happy = false order by is_happy,citycode"
sql "drop table if exists `${bool_tab}`"
}