From 5ec4e5586f1a1fc877d4efc5f56cd61924d1d23b Mon Sep 17 00:00:00 2001 From: wangbo Date: Fri, 30 Dec 2022 14:14:16 +0800 Subject: [PATCH] [refactor]remove seek block in segmentIterator (#15413) * remove seek block * add reg test Co-authored-by: Wang Bo --- .../rowset/segment_v2/segment_iterator.cpp | 42 +- .../olap/rowset/segment_v2/segment_iterator.h | 59 +- be/src/olap/schema.cpp | 12 + be/src/olap/schema.h | 2 + .../storage/test_short_key_index.out | 344 +++++++++++ .../storage/test_short_key_index.groovy | 545 ++++++++++++++++++ 6 files changed, 982 insertions(+), 22 deletions(-) create mode 100644 regression-test/data/data_model_p0/duplicate/storage/test_short_key_index.out create mode 100644 regression-test/suites/data_model_p0/duplicate/storage/test_short_key_index.groovy diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index e3f1b13258..e6f13ee956 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -25,7 +25,6 @@ #include "olap/column_predicate.h" #include "olap/olap_common.h" #include "olap/row_block2.h" -#include "olap/row_cursor.h" #include "olap/rowset/segment_v2/column_reader.h" #include "olap/rowset/segment_v2/segment.h" #include "olap/short_key_index.h" @@ -267,7 +266,16 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra } } _seek_schema = std::make_unique(key_fields, key_fields.size()); - _seek_block = std::make_unique(*_seek_schema, 1); + // todo(wb) need refactor here, when using pk to search, _seek_block is useless + if (_seek_block.capacity() == 0) { + _seek_block.resize(_seek_schema->num_column_ids()); + int i = 0; + for (auto cid : _seek_schema->column_ids()) { + auto column_desc = _seek_schema->column(cid); + _seek_block[i] = Schema::get_column_by_field(*column_desc); + i++; + } + } // create used column iterator for (auto cid : _seek_schema->column_ids()) { @@ -583,19 +591,6 @@ Status SegmentIterator::_init_bitmap_index_iterators() { return Status::OK(); } -// Schema of lhs and rhs are different. -// callers should assure that rhs' schema has all columns in lhs schema -template -int compare_row_with_lhs_columns(const LhsRowType& lhs, const RhsRowType& rhs) { - for (auto cid : lhs.schema()->column_ids()) { - auto res = lhs.schema()->column(cid)->compare_cell(lhs.cell(cid), rhs.cell(cid)); - if (res != 0) { - return res; - } - } - return 0; -} - Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound, rowid_t* rowid) { if (_segment->_tablet_schema->keys_type() == UNIQUE_KEYS && @@ -623,6 +618,9 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool encode_key_with_padding(&index_key, key, _segment->_tablet_schema->num_short_key_columns(), is_include); + const auto& key_col_ids = key.schema()->column_ids(); + _convert_rowcursor_to_short_key(key, key_col_ids.size()); + uint32_t start_block_id = 0; auto start_iter = sk_index_decoder->lower_bound(index_key); if (start_iter.valid()) { @@ -650,7 +648,7 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool while (start < end) { rowid_t mid = (start + end) / 2; RETURN_IF_ERROR(_seek_and_peek(mid)); - int cmp = compare_row_with_lhs_columns(key, _seek_block->row(0)); + int cmp = _compare_short_key_with_seek_block(key_col_ids); if (cmp > 0) { start = mid + 1; } else if (cmp == 0) { @@ -745,11 +743,13 @@ Status SegmentIterator::_seek_and_peek(rowid_t rowid) { RETURN_IF_ERROR(_seek_columns(_seek_schema->column_ids(), rowid)); } size_t num_rows = 1; - // please note that usually RowBlockV2.clear() is called to free MemPool memory before reading the next block, - // but here since there won't be too many keys to seek, we don't call RowBlockV2.clear() so that we can use - // a single MemPool for all seeked keys. - RETURN_IF_ERROR(_read_columns(_seek_schema->column_ids(), _seek_block.get(), 0, num_rows)); - _seek_block->set_num_rows(num_rows); + + //note(wb) reset _seek_block for memory reuse + // it is easier to use row based memory layout for clear memory + for (int i = 0; i < _seek_block.size(); i++) { + _seek_block[i]->clear(); + } + RETURN_IF_ERROR(_read_columns(_seek_schema->column_ids(), _seek_block, num_rows)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index d1ac74cffa..ee056a2753 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -25,6 +25,7 @@ #include "io/fs/file_reader.h" #include "io/fs/file_system.h" #include "olap/olap_common.h" +#include "olap/row_cursor.h" #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/row_ranges.h" #include "olap/rowset/segment_v2/segment.h" @@ -195,6 +196,59 @@ private: vectorized::Block* block); private: + // todo(wb) remove this method after RowCursor is removed + void _convert_rowcursor_to_short_key(const RowCursor& key, size_t num_keys) { + if (_short_key.capacity() == 0) { + _short_key.resize(num_keys); + for (auto cid = 0; cid < num_keys; cid++) { + auto* field = key.schema()->column(cid); + _short_key[cid] = Schema::get_column_by_field(*field); + + if (field->type() == OLAP_FIELD_TYPE_DATE) { + _short_key[cid]->set_date_type(); + } else if (field->type() == OLAP_FIELD_TYPE_DATETIME) { + _short_key[cid]->set_datetime_type(); + } + } + } else { + for (int i = 0; i < num_keys; i++) { + _short_key[i]->clear(); + } + } + + for (auto cid = 0; cid < num_keys; cid++) { + auto field = key.schema()->column(cid); + if (field == nullptr) { + break; + } + auto cell = key.cell(cid); + if (cell.is_null()) { + _short_key[cid]->insert_default(); + } else { + if (field->type() == OLAP_FIELD_TYPE_VARCHAR || + field->type() == OLAP_FIELD_TYPE_CHAR || + field->type() == OLAP_FIELD_TYPE_STRING) { + const Slice* slice = reinterpret_cast(cell.cell_ptr()); + _short_key[cid]->insert_data(slice->data, slice->size); + } else { + _short_key[cid]->insert_many_fix_len_data( + reinterpret_cast(cell.cell_ptr()), 1); + } + } + } + } + + int _compare_short_key_with_seek_block(const std::vector& col_ids) { + for (auto cid : col_ids) { + // todo(wb) simd compare when memory layout in row + auto res = _short_key[cid]->compare_at(0, 0, *_seek_block[cid], -1); + if (res != 0) { + return res; + } + } + return 0; + } + class BitmapRangeIterator; class BackwardBitmapRangeIterator; @@ -263,7 +317,10 @@ private: std::unique_ptr _seek_schema; // used to binary search the rowid for a given key // only used in `_get_row_ranges_by_keys` - std::unique_ptr _seek_block; + vectorized::MutableColumns _seek_block; + + //todo(wb) remove this field after Rowcursor is removed + vectorized::MutableColumns _short_key; io::FileReaderSPtr _file_reader; diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index c8c27c4269..0d7cdb5d36 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -114,6 +114,18 @@ vectorized::DataTypePtr Schema::get_data_type_ptr(const Field& field) { return vectorized::DataTypeFactory::instance().create_data_type(field); } +vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) { + auto data_type_ptr = vectorized::DataTypeFactory::instance().create_data_type(field); + vectorized::IColumn::MutablePtr col_ptr = data_type_ptr->create_column(); + + if (field.is_nullable()) { + return doris::vectorized::ColumnNullable::create(std::move(col_ptr), + doris::vectorized::ColumnUInt8::create()); + } + + return col_ptr; +} + vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(const Field& field) { if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY)) { return get_data_type_ptr(field)->create_column(); diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index 7c578f4f27..afbff13b00 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -110,6 +110,8 @@ public: static vectorized::DataTypePtr get_data_type_ptr(const Field& field); + static vectorized::IColumn::MutablePtr get_column_by_field(const Field& field); + static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type); static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(const Field& field); diff --git a/regression-test/data/data_model_p0/duplicate/storage/test_short_key_index.out b/regression-test/data/data_model_p0/duplicate/storage/test_short_key_index.out new file mode 100644 index 0000000000..e0c71caff2 --- /dev/null +++ b/regression-test/data/data_model_p0/duplicate/storage/test_short_key_index.out @@ -0,0 +1,344 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !int_query_one_column -- +1 +1 +5 +5 +9 +9 +13 +13 +13 +13 +17 +17 + +-- !int_query_eq_pred -- +13 +13 +13 +13 + +-- !int_query_bigger_pred -- +17 +17 + +-- !int_query_bigger_eq_pred -- +13 +13 +13 +13 +17 +17 + +-- !int_query_less_than_pred -- +1 +1 +5 +5 +9 +9 + +-- !int_query_less_than_eq_pred -- +1 +1 +5 +5 +9 +9 +13 +13 +13 +13 + +-- !char_query_one_column -- +a1 +b2 +c3 +d4 + +-- !char_query_eq_pred -- +d4 + +-- !char_query_bigger_pred -- + +-- !char_query_bigger_eq_pred -- +d4 + +-- !char_query_less_than_pred -- +a1 +b2 +c3 + +-- !char_query_less_than_eq_pred -- +a1 +b2 +c3 +d4 + +-- !date_query_one_column -- +2022-01-01 +2022-02-01 +2022-03-01 +2022-04-01 + +-- !date_query_eq_pred -- +2022-04-01 + +-- !date_query_bigger_pred -- + +-- !date_query_bigger_eq_pred -- +2022-04-01 + +-- !date_query_less_than_pred -- +2022-01-01 +2022-02-01 +2022-03-01 + +-- !date_query_less_than_eq_pred -- +2022-01-01 +2022-02-01 +2022-03-01 +2022-04-01 + +-- !datev2_query_one_column -- +2022-01-01 +2022-02-01 +2022-03-01 +2022-04-01 + +-- !datev2_query_eq_pred -- +2022-04-01 + +-- !datev2_query_bigger_pred -- + +-- !datev2_query_bigger_eq_pred -- +2022-04-01 + +-- !datev2_query_less_than_pred -- +2022-01-01 +2022-02-01 +2022-03-01 + +-- !datev2_query_less_than_eq_pred -- +2022-01-01 +2022-02-01 +2022-03-01 +2022-04-01 + +-- !datetime_query_one_column -- +2022-01-01T00:00 10 +2022-01-01T01:00 10 +2022-01-01T03:00 2 +2022-01-01T04:00 21 + +-- !datetime_query_eq_pred -- +2022-01-01T04:00 21 + +-- !datetime_query_bigger_pred -- + +-- !datetime_query_bigger_eq_pred -- +2022-01-01T04:00 21 + +-- !datetime_query_less_than_pred -- +2022-01-01T00:00 10 +2022-01-01T01:00 10 +2022-01-01T03:00 2 + +-- !datetime_query_less_than_eq_pred -- +2022-01-01T00:00 10 +2022-01-01T01:00 10 +2022-01-01T03:00 2 +2022-01-01T04:00 21 + +-- !datetimev2_query_one_column -- +2022-01-01T00:00 10 +2022-01-01T01:00 10 +2022-01-01T03:00 2 +2022-01-01T04:00 21 + +-- !datetimev2_query_eq_pred -- +2022-01-01T04:00 21 + +-- !datetimev2_query_bigger_pred -- + +-- !datetimev2_query_bigger_eq_pred -- +2022-01-01T04:00 21 + +-- !datetimev2_query_less_than_pred -- +2022-01-01T00:00 10 +2022-01-01T01:00 10 +2022-01-01T03:00 2 + +-- !datetimev2_query_less_than_eq_pred -- +2022-01-01T00:00 10 +2022-01-01T01:00 10 +2022-01-01T03:00 2 +2022-01-01T04:00 21 + +-- !decimal_query_one_column -- +1 +1 +5 +5 +9 +9 +13 +13 +13 +13 +17 +17 + +-- !decimal_query_eq_pred -- +17 +17 + +-- !decimal_query_bigger_pred -- + +-- !decimal_query_bigger_eq_pred -- +17 +17 + +-- !decimal_query_less_than_pred -- +1 +1 +5 +5 +9 +9 +13 +13 +13 +13 + +-- !decimal_query_less_than_eq_pred -- +1 +1 +5 +5 +9 +9 +13 +13 +13 +13 +17 +17 + +-- !decimalv2_query_one_column -- +1 +1 +5 +5 +9 +9 +13 +13 +13 +13 +17 +17 + +-- !decimalv2_query_eq_pred -- +5 +5 + +-- !decimalv2_query_bigger_pred -- +9 +9 +13 +13 +13 +13 +17 +17 + +-- !decimalv2_query_bigger_eq_pred -- +5 +5 +9 +9 +13 +13 +13 +13 +17 +17 + +-- !mix_type_query_all -- +1 2 2022-01-01 a34 100 +1 7 2022-01-02 a44 101 +2 8 2022-01-03 a55 102 +3 9 2022-01-04 a56 103 +4 10 2022-05-01 a77 104 +4 10 2022-05-01 a77 104 + +-- !pred_num_4 -- +4 10 2022-05-01 a77 104 +4 10 2022-05-01 a77 104 + +-- !pred_num_3 -- +4 10 2022-05-01 a77 104 +4 10 2022-05-01 a77 104 + +-- !pred_num_2 -- +4 10 2022-05-01 a77 104 +4 10 2022-05-01 a77 104 + +-- !pred_num_1 -- +4 10 2022-05-01 a77 104 +4 10 2022-05-01 a77 104 + +-- !nullable_query_one_column -- +\N +\N +1 +1 +2 +2 +3 +3 +4 +4 + +-- !nullable_query_eq_pred -- +4 +4 + +-- !nullable_query_bigger_pred -- + +-- !nullable_query_bigger_eq_pred -- +4 +4 + +-- !nullable_query_less_pred -- +4 +4 + +-- !nullable_query_less_eq_pred -- +4 +4 + +-- !nullable_query_is_null_pred -- +\N +\N + +-- !nullable_query_is_not_null_pred -- +1 +1 +2 +2 +3 +3 +4 +4 + +-- !bool_query_true -- +true 2 +true 10 + +-- !bool_query_false -- +false 10 +false 21 + diff --git a/regression-test/suites/data_model_p0/duplicate/storage/test_short_key_index.groovy b/regression-test/suites/data_model_p0/duplicate/storage/test_short_key_index.groovy new file mode 100644 index 0000000000..858b08f873 --- /dev/null +++ b/regression-test/suites/data_model_p0/duplicate/storage/test_short_key_index.groovy @@ -0,0 +1,545 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_short_key_index_groovy") { + + + def int_tab = "test_short_key_index_int_tab" + + sql "drop table if exists ${int_tab}" + + sql """ + CREATE TABLE IF NOT EXISTS `${int_tab}` ( + `siteid` int(11) NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`siteid`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`siteid`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + + """ + sql """ + insert into `${int_tab}` values + (9,10,11,12), + (9,10,11,12), + (1,2,3,4), + (13,21,22,16), + (13,14,15,16), + (17,18,19,20), + (1,2,3,4), + (13,21,22,16), + (13,14,15,16), + (17,18,19,20), + (5,6,7,8), + (5,6,7,8) + """ + + qt_int_query_one_column "select siteid from `${int_tab}` order by siteid" + + qt_int_query_eq_pred "select siteid from `${int_tab}` where siteid = 13 order by siteid" + + qt_int_query_bigger_pred "select siteid from `${int_tab}` where siteid > 13 order by siteid" + + qt_int_query_bigger_eq_pred " select siteid from `${int_tab}` where siteid >= 13 order by siteid" + + qt_int_query_less_than_pred " select siteid from `${int_tab}` where siteid < 13 order by siteid" + + qt_int_query_less_than_eq_pred " select siteid from `${int_tab}` where siteid <= 13 order by siteid" + + sql "drop table if exists ${int_tab}" + + + // short key is char + + def char_tab = "test_short_key_index_char_tab" + + sql "drop table if exists `${char_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${char_tab}` ( + `name` char(10) NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`name`,`citycode`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`citycode`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + + """ + + sql """ + insert into `${char_tab}` values + ('a1',10,11,12), + ('b2',10,11,12), + ('c3',2,3,4), + ('d4',21,22,16) + """ + + qt_char_query_one_column "select name from `${char_tab}` order by name" + + qt_char_query_eq_pred "select name from `${char_tab}` where name='d4' order by name" + + qt_char_query_bigger_pred "select name from `${char_tab}` where name > 'd4' order by name" + + qt_char_query_bigger_eq_pred "select name from `${char_tab}` where name>='d4' order by name" + + qt_char_query_less_than_pred "select name from `${char_tab}` where name < 'd4' order by name" + + qt_char_query_less_than_eq_pred " select name from `${char_tab}` where name <= 'd4' order by name" + + sql "drop table if exists `${char_tab}`" + + + // short key is date + def date_tab = "test_short_key_index_date_tab" + + sql "drop table if exists `${date_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${date_tab}` ( + `dt` date NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`dt`,`citycode`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`citycode`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + + """ + + sql """ + insert into `${date_tab}` values + ('2022-01-01',10,11,12), + ('2022-02-01',10,11,12), + ('2022-03-01',2,3,4), + ('2022-04-01',21,22,16) + """ + + qt_date_query_one_column "select dt from `${date_tab}` order by dt" + + qt_date_query_eq_pred "select dt from `${date_tab}` where dt='2022-04-01' order by dt" + + qt_date_query_bigger_pred "select dt from `${date_tab}` where dt>'2022-04-01' order by dt" + + qt_date_query_bigger_eq_pred "select dt from `${date_tab}` where dt>='2022-04-01' order by dt" + + qt_date_query_less_than_pred "select dt from `${date_tab}` where dt < '2022-04-01' order by dt" + + qt_date_query_less_than_eq_pred "select dt from `${date_tab}` where dt<='2022-04-01' order by dt" + + sql "drop table if exists `${date_tab}`" + + + // short key is datev2 + def datev2_tab = "test_short_key_index_datev2_tab" + + sql "drop table if exists `${datev2_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${datev2_tab}` ( + `dt` datev2 NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`dt`,`citycode`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`citycode`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + + """ + + sql """ + insert into `${datev2_tab}` values + ('2022-01-01',10,11,12), + ('2022-02-01',10,11,12), + ('2022-03-01',2,3,4), + ('2022-04-01',21,22,16) + """ + + qt_datev2_query_one_column "select dt from `${datev2_tab}` order by dt" + + qt_datev2_query_eq_pred "select dt from `${datev2_tab}` where dt='2022-04-01' order by dt" + + qt_datev2_query_bigger_pred "select dt from `${datev2_tab}` where dt>'2022-04-01' order by dt" + + qt_datev2_query_bigger_eq_pred "select dt from `${datev2_tab}` where dt>='2022-04-01' order by dt" + + qt_datev2_query_less_than_pred "select dt from `${datev2_tab}` where dt < '2022-04-01' order by dt" + + qt_datev2_query_less_than_eq_pred "select dt from `${datev2_tab}` where dt<='2022-04-01' order by dt" + + sql "drop table if exists `${datev2_tab}`" + + // short key is datetime + def datetime_tab = "test_short_key_index_datetime_tab" + + sql "drop table if exists `${datetime_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${datetime_tab}` ( + `dtime` datetime NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`dtime`,`citycode`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`citycode`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${datetime_tab}` values + ('2022-01-01 00:00:00',10,11,12), + ('2022-01-01 01:00:00',10,11,12), + ('2022-01-01 03:00:00',2,3,4), + ('2022-01-01 04:00:00',21,22,16) + """ + + qt_datetime_query_one_column "select dtime,citycode from `${datetime_tab}` order by dtime,citycode" + + qt_datetime_query_eq_pred "select dtime,citycode from `${datetime_tab}` where dtime='2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetime_query_bigger_pred "select dtime,citycode from `${datetime_tab}` where dtime > '2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetime_query_bigger_eq_pred "select dtime,citycode from `${datetime_tab}` where dtime >= '2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetime_query_less_than_pred "select dtime,citycode from `${datetime_tab}` where dtime < '2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetime_query_less_than_eq_pred "select dtime,citycode from `${datetime_tab}` where dtime <='2022-01-01 04:00:00' order by dtime,citycode" + + sql "drop table if exists `${datetime_tab}`" + + + // short key is datetimev2 + def datetimev2_tab = "test_short_key_index_datetimev2_tab" + + sql "drop table if exists `${datetimev2_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${datetimev2_tab}` ( + `dtime` datetimev2 NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`dtime`,`citycode`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`citycode`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${datetimev2_tab}` values + ('2022-01-01 00:00:00',10,11,12), + ('2022-01-01 01:00:00',10,11,12), + ('2022-01-01 03:00:00',2,3,4), + ('2022-01-01 04:00:00',21,22,16) + """ + + qt_datetimev2_query_one_column "select dtime,citycode from `${datetimev2_tab}` order by dtime,citycode" + + qt_datetimev2_query_eq_pred "select dtime,citycode from `${datetimev2_tab}` where dtime='2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetimev2_query_bigger_pred "select dtime,citycode from `${datetimev2_tab}` where dtime > '2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetimev2_query_bigger_eq_pred "select dtime,citycode from `${datetimev2_tab}` where dtime >= '2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetimev2_query_less_than_pred "select dtime,citycode from `${datetimev2_tab}` where dtime < '2022-01-01 04:00:00' order by dtime,citycode" + + qt_datetimev2_query_less_than_eq_pred "select dtime,citycode from `${datetimev2_tab}` where dtime <='2022-01-01 04:00:00' order by dtime,citycode" + + sql "drop table if exists `${datetimev2_tab}`" + + + // short key is decimal + def decimal_tab = "test_short_key_index_decimal_tab" + + sql "drop table if exists `${decimal_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${decimal_tab}` ( + `siteid` decimal NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`siteid`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`siteid`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${decimal_tab}` values + (9,10,11,12), + (9,10,11,12), + (1,2,3,4), + (13,21,22,16), + (13,14,15,16), + (17,18,19,20), + (1,2,3,4), + (13,21,22,16), + (13,14,15,16), + (17,18,19,20), + (5,6,7,8), + (5,6,7,8) + """ + + qt_decimal_query_one_column "select siteid from `${decimal_tab}` order by siteid" + + qt_decimal_query_eq_pred "select siteid from `${decimal_tab}` where siteid=17 order by siteid" + + qt_decimal_query_bigger_pred "select siteid from `${decimal_tab}` where siteid>17 order by siteid" + + qt_decimal_query_bigger_eq_pred "select siteid from `${decimal_tab}` where siteid>=17 order by siteid" + + qt_decimal_query_less_than_pred "select siteid from `${decimal_tab}` where siteid<17 order by siteid" + + qt_decimal_query_less_than_eq_pred "select siteid from `${decimal_tab}` where siteid<=17 order by siteid" + + sql "drop table if exists `${decimal_tab}`" + + + // short key is decimalv2 + def decimalv2_tab = "test_short_key_index_decimalv2_tab" + + sql "drop table if exists `${decimalv2_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${decimalv2_tab}` ( + `siteid` decimalv3 NOT NULL COMMENT "", + `citycode` int(11) NOT NULL COMMENT "", + `userid` int(11) NOT NULL COMMENT "", + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`siteid`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`siteid`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${decimalv2_tab}` values + (9,10,11,12), + (9,10,11,12), + (1,2,3,4), + (13,21,22,16), + (13,14,15,16), + (17,18,19,20), + (1,2,3,4), + (13,21,22,16), + (13,14,15,16), + (17,18,19,20), + (5,6,7,8), + (5,6,7,8) + """ + + qt_decimalv2_query_one_column "select siteid from `${decimalv2_tab}` order by siteid" + + qt_decimalv2_query_eq_pred "select siteid from `${decimalv2_tab}` where siteid=5 order by siteid" + + qt_decimalv2_query_bigger_pred "select siteid from `${decimalv2_tab}` where siteid>5 order by siteid" + + qt_decimalv2_query_bigger_eq_pred "select siteid from `${decimalv2_tab}` where siteid>=5 order by siteid" + + // These two sqls could fail because of bug of current master, but not caused by current pr +// qt_decimalv2_query_less_than_pred "select siteid from `${decimalv2_tab}` where siteid<5 order by siteid" +// qt_decimalv2_query_less_than_eq_pred "select siteid from `${decimalv2_tab}` where siteid<=5 order by siteid" + + sql "drop table if exists `${decimalv2_tab}`" + + + // mix type short key + def mix_type_tab = "test_short_key_index_mixed_type_tab" + + sql "drop table if exists `${mix_type_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${mix_type_tab}` ( + `decimal_col` decimal NOT NULL COMMENT "", + `int_col` int not null, + `date_col` date not null, + `varchar_col` varchar(10) not null, + `pv` int(11) NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`decimal_col`, `int_col`, `date_col`, `varchar_col`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`int_col`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${mix_type_tab}` values + (1,2,'2022-01-01','a34', 100), + (1,7,'2022-01-02','a44', 101), + (2,8,'2022-01-03','a55', 102), + (3,9,'2022-01-04','a56', 103), + (4,10,'2022-05-01','a77', 104), + (4,10,'2022-05-01','a77', 104) + """ + + qt_mix_type_query_all "select * from `${mix_type_tab}` order by 1,2,3,4,5" + + qt_pred_num_4 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 and date_col='2022-05-01' and varchar_col='a77'" + + qt_pred_num_3 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 and date_col='2022-05-01'" + + qt_pred_num_2 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 " + + qt_pred_num_1 "select * from `${mix_type_tab}` where decimal_col=4 and int_col=10 " + + sql "drop table if exists `${mix_type_tab}`" + + + // nulable + def nullable_tab = "test_short_key_index_is_nullable_tab" + + sql "drop table if exists `${nullable_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${nullable_tab}` ( + `siteid` int(11) NULL COMMENT "", + `citycode` int(11) NULL COMMENT "", + `userid` int(11) NULL COMMENT "", + `pv` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`siteid`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`siteid`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${nullable_tab}` values + (1,10,11,12), + (1,10,11,12), + (2,2,3,4), + (2,21,22,16), + (3,14,15,16), + (3,18,19,20), + (4,2,3,4), + (4,21,22,16), + (null,14,15,16), + (null,16,17,18) + """ + + qt_nullable_query_one_column "select siteid from `${nullable_tab}` order by siteid" + + qt_nullable_query_eq_pred "select siteid from `${nullable_tab}` where siteid = 4" + + qt_nullable_query_bigger_pred "select siteid from `${nullable_tab}` where siteid > 4" + + qt_nullable_query_bigger_eq_pred "select siteid from `${nullable_tab}` where siteid >= 4" + + qt_nullable_query_less_pred "select siteid from `${nullable_tab}` where siteid >= 4" + + qt_nullable_query_less_eq_pred "select siteid from `${nullable_tab}` where siteid >= 4" + + qt_nullable_query_is_null_pred "select siteid from `${nullable_tab}` where siteid is null" + + qt_nullable_query_is_not_null_pred "select siteid from `${nullable_tab}` where siteid is not null" + + sql "drop table if exists `${nullable_tab}`" + + // bool + + def bool_tab = "test_short_key_index_bool_tab" + + sql "drop table if exists `${bool_tab}`" + + sql """ + CREATE TABLE IF NOT EXISTS `${bool_tab}` ( + `is_happy` boolean NULL COMMENT "", + `citycode` int(11) NULL COMMENT "", + `userid` int(11) NULL COMMENT "", + `pv` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`is_happy`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`citycode`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + insert into `${bool_tab}` values + (true,10,11,12), + (false,10,11,12), + (true,2,3,4), + (false,21,22,16) + """ + + qt_bool_query_true "select is_happy,citycode from `${bool_tab}` where is_happy = true order by is_happy,citycode" + + qt_bool_query_false "select is_happy,citycode from `${bool_tab}` where is_happy = false order by is_happy,citycode" + + + sql "drop table if exists `${bool_tab}`" + +}