Files
doris/be/test/vec/exec/vgeneric_iterators_test.cpp
Xinyi Zou e17aef9467 [refactor] refactor the implement of MemTracker, and related usage (#8322)
Modify the implementation of MemTracker:
1. Simplify a lot of useless logic;
2. Added MemTrackerTaskPool, as the ancestor of all query and import trackers, This is used to track the local memory usage of all tasks executing;
3. Add cosume/release cache, trigger a cosume/release when the memory accumulation exceeds the parameter mem_tracker_consume_min_size_bytes;
4. Add a new memory leak detection mode (Experimental feature), throw an exception when the remaining statistical value is greater than the specified range when the MemTracker is destructed, and print the accurate statistical value in HTTP, the parameter memory_leak_detection
5. Added Virtual MemTracker, cosume/release will not sync to parent. It will be used when introducing TCMalloc Hook to record memory later, to record the specified memory independently;
6. Modify the GC logic, register the buffer cached in DiskIoMgr as a GC function, and add other GC functions later;
7. Change the global root node from Root MemTracker to Process MemTracker, and remove Process MemTracker in exec_env;
8. Modify the macro that detects whether the memory has reached the upper limit, modify the parameters and default behavior of creating MemTracker, modify the error message format in mem_limit_exceeded, extend and apply transfer_to, remove Metric in MemTracker, etc.;

Modify where MemTracker is used:
1. MemPool adds a constructor to create a temporary tracker to avoid a lot of redundant code;
2. Added trackers for global objects such as ChunkAllocator and StorageEngine;
3. Added more fine-grained trackers such as ExprContext;
4. RuntimeState removes FragmentMemTracker, that is, PlanFragmentExecutor mem_tracker, which was previously used for independent statistical scan process memory, and replaces it with _scanner_mem_tracker in OlapScanNode;
5. MemTracker is no longer recorded in ReservationTracker, and ReservationTracker will be removed later;
2022-03-11 22:04:23 +08:00

316 lines
10 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/olap/vgeneric_iterators.h"
#include <gtest/gtest.h>
#include <vector>
#include "olap/olap_common.h"
#include "olap/row_block2.h"
#include "olap/schema.h"
#include "util/slice.h"
namespace doris {
namespace vectorized {
class VGenericIteratorsTest : public testing::Test {
public:
VGenericIteratorsTest() {}
virtual ~VGenericIteratorsTest() {}
};
Schema create_schema() {
std::vector<TabletColumn> col_schemas;
col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_SMALLINT, true);
// c2: int
col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_INT, true);
// c3: big int
col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_SUM, OLAP_FIELD_TYPE_BIGINT, true);
Schema schema(col_schemas, 2);
return schema;
}
static void create_block(Schema& schema, vectorized::Block& block)
{
for (auto &column_desc : schema.columns()) {
ASSERT_TRUE(column_desc);
auto data_type = Schema::get_data_type_ptr(column_desc->type());
ASSERT_NE(data_type, nullptr);
if (column_desc->is_nullable()) {
data_type = std::make_shared<vectorized::DataTypeNullable>(std::move(data_type));
}
auto column = data_type->create_column();
vectorized::ColumnWithTypeAndName ctn(std::move(column), data_type, column_desc->name());
block.insert(ctn);
}
}
TEST(VGenericIteratorsTest, AutoIncrement) {
auto schema = create_schema();
auto iter = vectorized::new_auto_increment_iterator(schema, 10);
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
auto ret = iter->next_batch(&block);
ASSERT_TRUE(ret.ok());
ASSERT_EQ(block.rows(), 10);
auto c0 = block.get_by_position(0).column;
auto c1 = block.get_by_position(1).column;
auto c2 = block.get_by_position(2).column;
int row_count = 0;
size_t rows = block.rows();
for (size_t i = 0; i < rows; ++i) {
ASSERT_EQ(row_count, (*c0)[i].get<int>());
ASSERT_EQ(row_count + 1, (*c1)[i].get<int>());
ASSERT_EQ(row_count + 2, (*c2)[i].get<int>());
row_count++;
}
delete iter;
}
TEST(VGenericIteratorsTest, Union) {
auto schema = create_schema();
std::vector<RowwiseIterator*> inputs;
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 100));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300));
auto iter = vectorized::new_union_iterator(inputs, MemTracker::create_tracker(-1, "VUnionIterator", nullptr));
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
do {
st = iter->next_batch(&block);
} while (st.ok());
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(block.rows(), 600);
auto c0 = block.get_by_position(0).column;
auto c1 = block.get_by_position(1).column;
auto c2 = block.get_by_position(2).column;
size_t row_count = 0;
for (size_t i = 0; i < block.rows(); ++i) {
size_t base_value = row_count;
if (row_count >= 300) {
base_value -= 300;
} else if (row_count >= 100) {
base_value -= 100;
}
ASSERT_EQ(base_value, (*c0)[i].get<int>());
ASSERT_EQ(base_value + 1, (*c1)[i].get<int>());
ASSERT_EQ(base_value + 2, (*c2)[i].get<int>());
row_count++;
}
delete iter;
}
TEST(VGenericIteratorsTest, Merge) {
ASSERT_TRUE(1);
auto schema = create_schema();
std::vector<RowwiseIterator*> inputs;
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 100));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200));
inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300));
auto iter = vectorized::new_merge_iterator(inputs, MemTracker::create_tracker(-1, "VMergeIterator", nullptr), -1);
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
do {
st = iter->next_batch(&block);
} while (st.ok());
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(block.rows(), 600);
auto c0 = block.get_by_position(0).column;
auto c1 = block.get_by_position(1).column;
auto c2 = block.get_by_position(2).column;
size_t row_count = 0;
for (size_t i = 0; i < block.rows(); ++i) {
size_t base_value = row_count;
// 100 * 3, 200 * 2, 300
if (row_count < 300) {
base_value = row_count / 3;
} else if (row_count < 500) {
base_value = (row_count - 300) / 2 + 100;
} else {
base_value = row_count - 300;
}
ASSERT_EQ(base_value, (*c0)[i].get<int>());
ASSERT_EQ(base_value + 1, (*c1)[i].get<int>());
ASSERT_EQ(base_value + 2, (*c2)[i].get<int>());
row_count++;
}
delete iter;
}
// only used for Seq Column UT
class SeqColumnUtIterator : public RowwiseIterator {
public:
// Will generate num_rows rows in total
SeqColumnUtIterator(const Schema& schema, size_t num_rows, size_t rows_returned, size_t seq_col_idx, size_t seq_col_rows_returned)
: _schema(schema), _num_rows(num_rows), _rows_returned(rows_returned), _seq_col_idx(seq_col_idx), _seq_col_rows_returned(seq_col_rows_returned) {}
~SeqColumnUtIterator() override {}
// NOTE: Currently, this function will ignore StorageReadOptions
Status init(const StorageReadOptions& opts) override {
return Status::OK();
};
Status next_batch(vectorized::Block* block) override {
int row_idx = 0;
while (_rows_returned < _num_rows) {
for (int j = 0; j < _schema.num_columns(); ++j) {
vectorized::ColumnWithTypeAndName& vc = block->get_by_position(j);
vectorized::IColumn& vi = (vectorized::IColumn&)(*vc.column);
char data[16] = {};
size_t data_len = 0;
const auto* col_schema = _schema.column(j);
switch (col_schema->type()) {
case OLAP_FIELD_TYPE_SMALLINT:
*(int16_t*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(int16_t);
break;
case OLAP_FIELD_TYPE_INT:
*(int32_t*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(int32_t);
break;
case OLAP_FIELD_TYPE_BIGINT:
*(int64_t*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(int64_t);
break;
case OLAP_FIELD_TYPE_FLOAT:
*(float*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(float);
break;
case OLAP_FIELD_TYPE_DOUBLE:
*(double*)data = j == _seq_col_idx ? _seq_col_rows_returned : 1;
data_len = sizeof(double);
break;
default:
break;
}
vi.insert_data(data, data_len);
}
++_rows_returned;
_seq_col_rows_returned++;
row_idx++;
}
if (row_idx > 0)
return Status::OK();
return Status::EndOfFile("End of VAutoIncrementIterator");
}
const Schema& schema() const override { return _schema; }
const Schema& _schema;
size_t _num_rows;
size_t _rows_returned;
int _seq_col_idx = -1;
int _seq_col_rows_returned = -1;
};
TEST(VGenericIteratorsTest, MergeWithSeqColumn) {
ASSERT_TRUE(1);
auto schema = create_schema();
std::vector<RowwiseIterator*> inputs;
int seq_column_id = 2;
int seg_iter_num = 10;
int num_rows = 1;
int rows_begin = 0;
// The same key in each file will only keep one with the largest seq id
// keep the key columns all the same, but seq column value different
// input seg file in Ascending, expect output seq column in Descending
for (int i = 0; i < seg_iter_num; i++) {
int seq_id_in_every_file = i;
inputs.push_back(new SeqColumnUtIterator(schema, num_rows, rows_begin, seq_column_id, seq_id_in_every_file));
}
auto iter = vectorized::new_merge_iterator(inputs, MemTracker::create_tracker(-1, "VMergeIterator", nullptr), seq_column_id);
StorageReadOptions opts;
auto st = iter->init(opts);
ASSERT_TRUE(st.ok());
vectorized::Block block;
create_block(schema, block);
do {
st = iter->next_batch(&block);
} while (st.ok());
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(block.rows(), seg_iter_num);
auto col0 = block.get_by_position(0).column;
auto col1 = block.get_by_position(1).column;
auto seq_col = block.get_by_position(seq_column_id).column;
for (size_t i = 0; i < seg_iter_num; i++) {
size_t expected_value = seg_iter_num - i - 1; // in Descending
size_t actual_value = (*seq_col)[i].get<int>();
ASSERT_EQ(expected_value, actual_value);
}
delete iter;
}
} // namespace vectorized
} // namespace doris
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}