// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include "gen_cpp/olap_file.pb.h" #include "gtest/gtest.h" #include "olap/data_dir.h" #include "olap/row_block.h" #include "olap/rowset/beta_rowset_reader.h" #include "olap/rowset/rowset_factory.h" #include "olap/rowset/rowset_reader_context.h" #include "olap/rowset/rowset_writer.h" #include "olap/rowset/rowset_writer_context.h" #include "olap/row_cursor.h" #include "olap/storage_engine.h" #include "olap/tablet_schema.h" #include "olap/utils.h" #include "olap/comparison_predicate.h" #include "runtime/mem_tracker.h" #include "runtime/mem_pool.h" #include "util/slice.h" using std::string; namespace doris { class BetaRowsetTest : public testing::Test { protected: const string kRowsetDir = "./ut_dir/beta_rowset_test"; OlapReaderStatistics _stats; void SetUp() override { OLAPStatus s; if (check_dir_existed(kRowsetDir)) { s = remove_all_dir(kRowsetDir); ASSERT_EQ(OLAP_SUCCESS, s); } s = create_dir(kRowsetDir); ASSERT_EQ(OLAP_SUCCESS, s); } void TearDown() override { if (check_dir_existed(kRowsetDir)) { auto s = remove_all_dir(kRowsetDir); ASSERT_EQ(OLAP_SUCCESS, s); } } // (k1 int, k2 varchar(20), k3 int) duplicated key (k1, k2) void create_tablet_schema(TabletSchema* tablet_schema) { TabletSchemaPB tablet_schema_pb; tablet_schema_pb.set_keys_type(DUP_KEYS); tablet_schema_pb.set_num_short_key_columns(2); tablet_schema_pb.set_num_rows_per_row_block(1024); tablet_schema_pb.set_compress_kind(COMPRESS_NONE); tablet_schema_pb.set_next_column_unique_id(4); ColumnPB* column_1 = tablet_schema_pb.add_column(); column_1->set_unique_id(1); column_1->set_name("k1"); column_1->set_type("INT"); column_1->set_is_key(true); column_1->set_length(4); column_1->set_index_length(4); column_1->set_is_nullable(true); column_1->set_is_bf_column(false); ColumnPB* column_2 = tablet_schema_pb.add_column(); column_2->set_unique_id(2); column_2->set_name("k2"); column_2->set_type("INT"); // TODO change to varchar(20) when dict encoding for string is supported column_2->set_length(4); column_2->set_index_length(4); column_2->set_is_nullable(true); column_2->set_is_key(true); column_2->set_is_nullable(true); column_2->set_is_bf_column(false); ColumnPB* column_3 = tablet_schema_pb.add_column(); column_3->set_unique_id(3); column_3->set_name("v1"); column_3->set_type("INT"); column_3->set_length(4); column_3->set_is_key(false); column_3->set_is_nullable(false); column_3->set_is_bf_column(false); column_3->set_aggregation("SUM"); tablet_schema->init_from_pb(tablet_schema_pb); } void create_rowset_writer_context(TabletSchema* tablet_schema, RowsetWriterContext* rowset_writer_context) { RowsetId rowset_id; rowset_id.init(10000); rowset_writer_context->rowset_id = rowset_id; rowset_writer_context->tablet_id = 12345; rowset_writer_context->tablet_schema_hash = 1111; rowset_writer_context->partition_id = 10; rowset_writer_context->rowset_type = BETA_ROWSET; rowset_writer_context->rowset_path_prefix = kRowsetDir; rowset_writer_context->rowset_state = VISIBLE; rowset_writer_context->tablet_schema = tablet_schema; rowset_writer_context->version.first = 10; rowset_writer_context->version.second = 10; rowset_writer_context->version_hash = 110; } void create_and_init_rowset_reader(Rowset* rowset, RowsetReaderContext& context, RowsetReaderSharedPtr* result) { auto s = rowset->create_reader(result); ASSERT_EQ(OLAP_SUCCESS, s); ASSERT_TRUE(*result != nullptr); s = (*result)->init(&context); ASSERT_EQ(OLAP_SUCCESS, s); } }; TEST_F(BetaRowsetTest, BasicFunctionTest) { OLAPStatus s; TabletSchema tablet_schema; create_tablet_schema(&tablet_schema); RowsetSharedPtr rowset; const int num_segments = 3; const uint32_t rows_per_segment = 4096; { // write `num_segments * rows_per_segment` rows to rowset RowsetWriterContext writer_context; create_rowset_writer_context(&tablet_schema, &writer_context); std::unique_ptr rowset_writer; s = RowsetFactory::create_rowset_writer(writer_context, &rowset_writer); ASSERT_EQ(OLAP_SUCCESS, s); RowCursor input_row; input_row.init(tablet_schema); // for segment "i", row "rid" // k1 := rid*10 + i // k2 := k1 * 10 // k3 := 4096 * i + rid for (int i = 0; i < num_segments; ++i) { MemTracker mem_tracker(-1); MemPool mem_pool(&mem_tracker); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 10 + i; uint32_t k2 = k1 * 10; uint32_t k3 = rows_per_segment * i + rid; input_row.set_field_content(0, reinterpret_cast(&k1), &mem_pool); input_row.set_field_content(1, reinterpret_cast(&k2), &mem_pool); input_row.set_field_content(2, reinterpret_cast(&k3), &mem_pool); s = rowset_writer->add_row(input_row); ASSERT_EQ(OLAP_SUCCESS, s); } s = rowset_writer->flush(); ASSERT_EQ(OLAP_SUCCESS, s); } rowset = rowset_writer->build(); ASSERT_TRUE(rowset != nullptr); ASSERT_EQ(num_segments, rowset->rowset_meta()->num_segments()); ASSERT_EQ(num_segments * rows_per_segment, rowset->rowset_meta()->num_rows()); } { // test return ordered results and return k1 and k2 RowsetReaderContext reader_context; reader_context.tablet_schema = &tablet_schema; reader_context.need_ordered_result = true; std::vector return_columns = {0, 1}; reader_context.return_columns = &return_columns; reader_context.seek_columns = &return_columns; reader_context.stats = &_stats; // without predicates { RowsetReaderSharedPtr rowset_reader; create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); RowBlock* output_block; uint32_t num_rows_read = 0; while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { ASSERT_TRUE(output_block != nullptr); ASSERT_GT(output_block->row_num(), 0); ASSERT_EQ(0, output_block->pos()); ASSERT_EQ(output_block->row_num(), output_block->limit()); ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); // after sort merge segments, k1 will be 0, 1, 2, 10, 11, 12, 20, 21, 22, ..., 40950, 40951, 40952 for (int i = 0; i < output_block->row_num(); ++i) { char* field1 = output_block->field_ptr(i, 0); char* field2 = output_block->field_ptr(i, 1); // test null bit ASSERT_FALSE(*reinterpret_cast(field1)); ASSERT_FALSE(*reinterpret_cast(field2)); uint32_t k1 = *reinterpret_cast(field1 + 1); uint32_t k2 = *reinterpret_cast(field2 + 1); ASSERT_EQ(k1 * 10, k2); int rid = num_rows_read / 3; int seg_id = num_rows_read % 3; ASSERT_EQ(rid * 10 + seg_id, k1); num_rows_read++; } } EXPECT_EQ(OLAP_ERR_DATA_EOF, s); EXPECT_TRUE(output_block == nullptr); EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read); } // merge segments with predicates { std::vector column_predicates; // column predicate: k1 = 10 std::unique_ptr predicate(new EqualPredicate(0, 10)); column_predicates.emplace_back(predicate.get()); reader_context.predicates = &column_predicates; RowsetReaderSharedPtr rowset_reader; create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); RowBlock* output_block; uint32_t num_rows_read = 0; while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { ASSERT_TRUE(output_block != nullptr); ASSERT_EQ(1, output_block->row_num()); ASSERT_EQ(0, output_block->pos()); ASSERT_EQ(output_block->row_num(), output_block->limit()); ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); // after sort merge segments, k1 will be 10 for (int i = 0; i < output_block->row_num(); ++i) { char* field1 = output_block->field_ptr(i, 0); char* field2 = output_block->field_ptr(i, 1); // test null bit ASSERT_FALSE(*reinterpret_cast(field1)); ASSERT_FALSE(*reinterpret_cast(field2)); uint32_t k1 = *reinterpret_cast(field1 + 1); uint32_t k2 = *reinterpret_cast(field2 + 1); ASSERT_EQ(10, k1); ASSERT_EQ(k1 * 10, k2); num_rows_read++; } } EXPECT_EQ(OLAP_ERR_DATA_EOF, s); EXPECT_TRUE(output_block == nullptr); EXPECT_EQ(1, num_rows_read); } } { // test return unordered data and only k3 RowsetReaderContext reader_context; reader_context.tablet_schema = &tablet_schema; reader_context.need_ordered_result = false; std::vector return_columns = {2}; reader_context.return_columns = &return_columns; reader_context.seek_columns = &return_columns; reader_context.stats = &_stats; // without predicate { RowsetReaderSharedPtr rowset_reader; create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); RowBlock* output_block; uint32_t num_rows_read = 0; while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { ASSERT_TRUE(output_block != nullptr); ASSERT_GT(output_block->row_num(), 0); ASSERT_EQ(0, output_block->pos()); ASSERT_EQ(output_block->row_num(), output_block->limit()); ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); // for unordered result, k3 will be 0, 1, 2, ..., 4096*3-1 for (int i = 0; i < output_block->row_num(); ++i) { char* field3 = output_block->field_ptr(i, 2); // test null bit ASSERT_FALSE(*reinterpret_cast(field3)); uint32_t k3 = *reinterpret_cast(field3 + 1); ASSERT_EQ(num_rows_read, k3); num_rows_read++; } } EXPECT_EQ(OLAP_ERR_DATA_EOF, s); EXPECT_TRUE(output_block == nullptr); EXPECT_EQ(rowset->rowset_meta()->num_rows(), num_rows_read); } // with predicate { std::vector column_predicates; // column predicate: k3 < 100 ColumnPredicate* predicate = new LessPredicate(2, 100); column_predicates.emplace_back(predicate); reader_context.predicates = &column_predicates; RowsetReaderSharedPtr rowset_reader; create_and_init_rowset_reader(rowset.get(), reader_context, &rowset_reader); RowBlock* output_block; uint32_t num_rows_read = 0; while ((s = rowset_reader->next_block(&output_block)) == OLAP_SUCCESS) { ASSERT_TRUE(output_block != nullptr); ASSERT_LE(output_block->row_num(), 100); ASSERT_EQ(0, output_block->pos()); ASSERT_EQ(output_block->row_num(), output_block->limit()); ASSERT_EQ(return_columns, output_block->row_block_info().column_ids); // for unordered result, k3 will be 0, 1, 2, ..., 99 for (int i = 0; i < output_block->row_num(); ++i) { char* field3 = output_block->field_ptr(i, 2); // test null bit ASSERT_FALSE(*reinterpret_cast(field3)); uint32_t k3 = *reinterpret_cast(field3 + 1); ASSERT_EQ(num_rows_read, k3); num_rows_read++; } } EXPECT_EQ(OLAP_ERR_DATA_EOF, s); EXPECT_TRUE(output_block == nullptr); EXPECT_EQ(100, num_rows_read); } } } } // namespace doris int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); }