Files
doris/be/test/runtime/buffered_tuple_stream2_test.cpp
HuangWei 10f822eb43 [MemTracker] make all MemTrackers shared (#4135)
We make all MemTrackers shared, in order to show MemTracker real-time consumptions on the web.
As follows:
1. nearly all MemTracker raw ptr -> shared_ptr
2. Use CreateTracker() to create new MemTracker(in order to add itself to its parent)
3. RowBatch & MemPool still use raw ptrs of MemTracker, it's easy to ensure RowBatch & MemPool destructor exec 
     before MemTracker's destructor. So we don't change these code.
4. MemTracker can use RuntimeProfile's counter to calc consumption. So RuntimeProfile's counter need to be shared 
    too. We add a shared counter pool to store the shared counter, don't change other counters of RuntimeProfile.
Note that, this PR doesn't change the MemTracker tree structure. So there still have some orphan trackers, e.g. RowBlockV2's MemTracker. If you find some shared MemTrackers are little memory consumption & too time-consuming, you could make them be the orphan, then it's fine to use the raw ptr.
2020-07-31 21:57:21 +08:00

854 lines
33 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/buffered_tuple_stream2.inline.h"
#include <boost/scoped_ptr.hpp>
#include <boost/bind.hpp>
#include <boost/filesystem.hpp>
#include <gtest/gtest.h>
#include <string>
#include <limits> // for std::numeric_limits<int>::max()
#include "runtime/types.h"
#include "runtime/row_batch.h"
#include "runtime/string_value.hpp"
#include "runtime/test_env.h"
#include "runtime/tmp_file_mgr.h"
#include "testutil/desc_tbl_builder.h"
#include "util/logging.h"
#include "util/disk_info.h"
#include "util/cpu_info.h"
#include "util/debug_util.h"
#include "gen_cpp/Types_types.h"
using std::vector;
using boost::scoped_ptr;
static const int BATCH_SIZE = 250;
static const uint32_t PRIME = 479001599;
namespace doris {
static const StringValue STRINGS[] = {
StringValue("ABC"),
StringValue("HELLO"),
StringValue("123456789"),
StringValue("FOOBAR"),
StringValue("ONE"),
StringValue("THREE"),
StringValue("abcdefghijklmno"),
StringValue("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
StringValue("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
};
static const int NUM_STRINGS = sizeof(STRINGS) / sizeof(StringValue);
class SimpleTupleStreamTest : public testing::Test {
public:
SimpleTupleStreamTest() : _tracker(new MemTracker(-1)) {}
// A null dtor to pass codestyle check
~SimpleTupleStreamTest() {}
protected:
virtual void SetUp() {
_test_env.reset(new TestEnv());
create_descriptors();
_mem_pool.reset(new MemPool(_tracker.get()));
}
virtual void create_descriptors() {
vector<bool> nullable_tuples(1, false);
vector<TTupleId> tuple_ids(1, static_cast<TTupleId>(0));
DescriptorTblBuilder int_builder(&_pool);
int_builder.declare_tuple() << TYPE_INT;
_int_desc = _pool.add(new RowDescriptor(*int_builder.build(), tuple_ids, nullable_tuples));
DescriptorTblBuilder string_builder(&_pool);
// string_builder.declare_tuple() << TYPE_STRING;
string_builder.declare_tuple() << TYPE_VARCHAR;
_string_desc = _pool.add(new RowDescriptor(
*string_builder.build(), tuple_ids, nullable_tuples));
}
virtual void TearDown() {
_runtime_state = NULL;
_client = NULL;
_pool.clear();
_mem_pool->free_all();
_test_env.reset();
}
// Setup a block manager with the provided settings and client with no reservation,
// tracked by _tracker.
void InitBlockMgr(int64_t limit, int block_size) {
Status status = _test_env->create_query_state(0, limit, block_size, &_runtime_state);
ASSERT_TRUE(status.ok());
status = _runtime_state->block_mgr2()->register_client(0, _tracker, _runtime_state,
&_client);
ASSERT_TRUE(status.ok());
}
// Generate the ith element of a sequence of int values.
int GenIntValue(int i) {
// Multiply by large prime to get varied bit patterns.
return i * PRIME;
}
// Generate the ith element of a sequence of bool values.
bool GenBoolValue(int i) {
// Use a middle bit of the int value.
return ((GenIntValue(i) >> 8) & 0x1) != 0;
}
virtual RowBatch* CreateIntBatch(int offset, int num_rows, bool gen_null) {
RowBatch* batch = _pool.add(new RowBatch(*_int_desc, num_rows, _tracker.get()));
int tuple_size = _int_desc->tuple_descriptors()[0]->byte_size();
uint8_t* tuple_mem = reinterpret_cast<uint8_t*>(
batch->tuple_data_pool()->allocate(tuple_size * num_rows));
memset(tuple_mem, 0, tuple_size * num_rows);
const int int_tuples = _int_desc->tuple_descriptors().size();
for (int i = 0; i < num_rows; ++i) {
int idx = batch->add_row();
TupleRow* row = batch->get_row(idx);
Tuple* int_tuple = reinterpret_cast<Tuple*>(tuple_mem + i * tuple_size);
// *reinterpret_cast<int*>(int_tuple + 1) = GenIntValue(i + offset);
*reinterpret_cast<int*>(reinterpret_cast<uint8_t*>(int_tuple) + 1) =
GenIntValue(i + offset);
for (int j = 0; j < int_tuples; ++j) {
int idx = (i + offset) * int_tuples + j;
if (!gen_null || GenBoolValue(idx)) {
row->set_tuple(j, int_tuple);
} else {
row->set_tuple(j, NULL);
}
}
batch->commit_last_row();
}
return batch;
}
virtual RowBatch* CreateStringBatch(int offset, int num_rows, bool gen_null) {
int tuple_size = sizeof(StringValue) + 1;
RowBatch* batch = _pool.add(new RowBatch(*_string_desc, num_rows, _tracker.get()));
uint8_t* tuple_mem = batch->tuple_data_pool()->allocate(tuple_size * num_rows);
memset(tuple_mem, 0, tuple_size * num_rows);
const int string_tuples = _string_desc->tuple_descriptors().size();
for (int i = 0; i < num_rows; ++i) {
TupleRow* row = batch->get_row(batch->add_row());
*reinterpret_cast<StringValue*>(tuple_mem + 1) = STRINGS[(i + offset) % NUM_STRINGS];
for (int j = 0; j < string_tuples; ++j) {
int idx = (i + offset) * string_tuples + j;
if (!gen_null || GenBoolValue(idx)) {
row->set_tuple(j, reinterpret_cast<Tuple*>(tuple_mem));
} else {
row->set_tuple(j, NULL);
}
}
batch->commit_last_row();
tuple_mem += tuple_size;
}
return batch;
}
void AppendRowTuples(TupleRow* row, vector<int>* results) {
DCHECK(row != NULL);
const int int_tuples = _int_desc->tuple_descriptors().size();
for (int i = 0; i < int_tuples; ++i) {
AppendValue(row->get_tuple(i), results);
}
}
void AppendRowTuples(TupleRow* row, vector<StringValue>* results) {
DCHECK(row != NULL);
const int string_tuples = _string_desc->tuple_descriptors().size();
for (int i = 0; i < string_tuples; ++i) {
AppendValue(row->get_tuple(i), results);
}
}
void AppendValue(Tuple* t, vector<int>* results) {
if (t == NULL) {
// For the tests indicate null-ability using the max int value
results->push_back(std::numeric_limits<int>::max());
} else {
results->push_back(*reinterpret_cast<int*>(reinterpret_cast<uint8_t*>(t) + 1));
}
}
void AppendValue(Tuple* t, vector<StringValue>* results) {
if (t == NULL) {
results->push_back(StringValue());
} else {
uint8_t* mem = reinterpret_cast<uint8_t*>(t);
StringValue sv = *reinterpret_cast<StringValue*>(mem + 1);
uint8_t* copy = _mem_pool->allocate(sv.len);
memcpy(copy, sv.ptr, sv.len);
sv.ptr = reinterpret_cast<char*>(copy);
results->push_back(sv);
}
}
template <typename T>
void ReadValues(BufferedTupleStream2* stream, RowDescriptor* desc, vector<T>* results,
int num_batches = -1) {
bool eos = false;
RowBatch batch(*desc, BATCH_SIZE, _tracker.get());
int batches_read = 0;
do {
batch.reset();
Status status = stream->get_next(&batch, &eos);
EXPECT_TRUE(status.ok());
++batches_read;
for (int i = 0; i < batch.num_rows(); ++i) {
AppendRowTuples(batch.get_row(i), results);
}
} while (!eos && (num_batches < 0 || batches_read <= num_batches));
}
virtual void VerifyResults(const vector<int>& results, int exp_rows, bool gen_null) {
const int int_tuples = _int_desc->tuple_descriptors().size();
EXPECT_EQ(results.size(), exp_rows * int_tuples);
for (int i = 0; i < exp_rows; ++i) {
for (int j = 0; j < int_tuples; ++j) {
int idx = i * int_tuples + j;
if (!gen_null || GenBoolValue(idx)) {
ASSERT_EQ(results[idx], GenIntValue(i))
<< " results[" << idx << "]: " << results[idx]
<< " != " << GenIntValue(i) << " gen_null=" << gen_null;
} else {
ASSERT_TRUE(results[idx] == std::numeric_limits<int>::max())
<< "i: " << i << " j: " << j << " results[" << idx << "]: "
<< results[idx] << " != " << std::numeric_limits<int>::max();
}
}
}
}
virtual void VerifyResults(const vector<StringValue>& results, int exp_rows,
bool gen_null) {
const int string_tuples = _string_desc->tuple_descriptors().size();
EXPECT_EQ(results.size(), exp_rows * string_tuples);
for (int i = 0; i < exp_rows; ++i) {
for (int j = 0; j < string_tuples; ++j) {
int idx = i * string_tuples + j;
if (!gen_null || GenBoolValue(idx)) {
ASSERT_TRUE(results[idx] == STRINGS[i % NUM_STRINGS])
<< "results[" << idx << "] " << results[idx]
<< " != " << STRINGS[i % NUM_STRINGS] << " i=" << i << " gen_null="
<< gen_null;
} else {
ASSERT_TRUE(results[idx] == StringValue())
<< "results[" << idx << "] " << results[idx] << " not NULL";
}
}
}
}
// Test adding num_batches of ints to the stream and reading them back.
template <typename T>
void TestValues(int num_batches, RowDescriptor* desc, bool gen_null) {
BufferedTupleStream2 stream(_runtime_state, *desc, _runtime_state->block_mgr2(),
_client, true, false);
Status status = stream.init(-1, NULL, true);
ASSERT_TRUE(status.ok()) << status.get_error_msg();
status = stream.unpin_stream();
ASSERT_TRUE(status.ok());
// Add rows to the stream
int offset = 0;
for (int i = 0; i < num_batches; ++i) {
RowBatch* batch = NULL;
if (sizeof(T) == sizeof(int)) {
batch = CreateIntBatch(offset, BATCH_SIZE, gen_null);
} else if (sizeof(T) == sizeof(StringValue)) {
batch = CreateStringBatch(offset, BATCH_SIZE, gen_null);
} else {
DCHECK(false);
}
for (int j = 0; j < batch->num_rows(); ++j) {
bool b = stream.add_row(batch->get_row(j), &status);
ASSERT_TRUE(status.ok());
if (!b) {
ASSERT_TRUE(stream.using_small_buffers());
bool got_buffer;
status = stream.switch_to_io_buffers(&got_buffer);
ASSERT_TRUE(status.ok());
ASSERT_TRUE(got_buffer);
b = stream.add_row(batch->get_row(j), &status);
ASSERT_TRUE(status.ok());
}
ASSERT_TRUE(b);
}
offset += batch->num_rows();
// Reset the batch to make sure the stream handles the memory correctly.
batch->reset();
}
status = stream.prepare_for_read(false);
ASSERT_TRUE(status.ok());
// Read all the rows back
vector<T> results;
ReadValues(&stream, desc, &results);
// Verify result
VerifyResults(results, BATCH_SIZE * num_batches, gen_null);
stream.close();
}
void TestIntValuesInterleaved(int num_batches, int num_batches_before_read) {
for (int small_buffers = 0; small_buffers < 2; ++small_buffers) {
BufferedTupleStream2 stream(_runtime_state, *_int_desc, _runtime_state->block_mgr2(),
_client, small_buffers == 0, // initial small buffers
true); // read_write
Status status = stream.init(-1, NULL, true);
ASSERT_TRUE(status.ok());
status = stream.prepare_for_read(true);
ASSERT_TRUE(status.ok());
status = stream.unpin_stream();
ASSERT_TRUE(status.ok());
vector<int> results;
for (int i = 0; i < num_batches; ++i) {
RowBatch* batch = CreateIntBatch(i * BATCH_SIZE, BATCH_SIZE, false);
for (int j = 0; j < batch->num_rows(); ++j) {
bool b = stream.add_row(batch->get_row(j), &status);
ASSERT_TRUE(b);
ASSERT_TRUE(status.ok());
}
// Reset the batch to make sure the stream handles the memory correctly.
batch->reset();
if (i % num_batches_before_read == 0) {
ReadValues(&stream, _int_desc, &results,
(rand() % num_batches_before_read) + 1);
}
}
ReadValues(&stream, _int_desc, &results);
VerifyResults(results, BATCH_SIZE * num_batches, false);
stream.close();
}
}
scoped_ptr<TestEnv> _test_env;
RuntimeState* _runtime_state;
BufferedBlockMgr2::Client* _client;
std::shared_ptr<MemTracker> _tracker;
ObjectPool _pool;
RowDescriptor* _int_desc;
RowDescriptor* _string_desc;
scoped_ptr<MemPool> _mem_pool;
};
// Tests with a non-NULLable tuple per row.
class SimpleNullStreamTest : public SimpleTupleStreamTest {
protected:
virtual void create_descriptors() {
vector<bool> nullable_tuples(1, true);
vector<TTupleId> tuple_ids(1, static_cast<TTupleId>(0));
DescriptorTblBuilder int_builder(&_pool);
int_builder.declare_tuple() << TYPE_INT;
_int_desc = _pool.add(new RowDescriptor(
*int_builder.build(), tuple_ids, nullable_tuples));
DescriptorTblBuilder string_builder(&_pool);
string_builder.declare_tuple() << TYPE_VARCHAR;
_string_desc = _pool.add(new RowDescriptor(
*string_builder.build(), tuple_ids, nullable_tuples));
}
}; // SimpleNullStreamTest
// Tests with multiple non-NULLable tuples per row.
class MultiTupleStreamTest : public SimpleTupleStreamTest {
protected:
virtual void create_descriptors() {
vector<bool> nullable_tuples;
nullable_tuples.push_back(false);
nullable_tuples.push_back(false);
nullable_tuples.push_back(false);
vector<TTupleId> tuple_ids;
tuple_ids.push_back(static_cast<TTupleId>(0));
tuple_ids.push_back(static_cast<TTupleId>(1));
tuple_ids.push_back(static_cast<TTupleId>(2));
DescriptorTblBuilder int_builder(&_pool);
int_builder.declare_tuple() << TYPE_INT;
int_builder.declare_tuple() << TYPE_INT;
int_builder.declare_tuple() << TYPE_INT;
_int_desc = _pool.add(new RowDescriptor(
*int_builder.build(), tuple_ids, nullable_tuples));
DescriptorTblBuilder string_builder(&_pool);
string_builder.declare_tuple() << TYPE_VARCHAR;
string_builder.declare_tuple() << TYPE_VARCHAR;
string_builder.declare_tuple() << TYPE_VARCHAR;
_string_desc = _pool.add(new RowDescriptor(
*string_builder.build(), tuple_ids, nullable_tuples));
}
};
// Tests with multiple NULLable tuples per row.
class MultiNullableTupleStreamTest : public SimpleTupleStreamTest {
protected:
virtual void create_descriptors() {
vector<bool> nullable_tuples;
nullable_tuples.push_back(false);
nullable_tuples.push_back(true);
nullable_tuples.push_back(true);
vector<TTupleId> tuple_ids;
tuple_ids.push_back(static_cast<TTupleId>(0));
tuple_ids.push_back(static_cast<TTupleId>(1));
tuple_ids.push_back(static_cast<TTupleId>(2));
DescriptorTblBuilder int_builder(&_pool);
int_builder.declare_tuple() << TYPE_INT;
int_builder.declare_tuple() << TYPE_INT;
int_builder.declare_tuple() << TYPE_INT;
_int_desc = _pool.add(new RowDescriptor(
*int_builder.build(), tuple_ids, nullable_tuples));
DescriptorTblBuilder string_builder(&_pool);
string_builder.declare_tuple() << TYPE_VARCHAR;
string_builder.declare_tuple() << TYPE_VARCHAR;
string_builder.declare_tuple() << TYPE_VARCHAR;
_string_desc = _pool.add(new RowDescriptor(
*string_builder.build(), tuple_ids, nullable_tuples));
}
};
#if 0
// Tests with collection types.
class ArrayTupleStreamTest : public SimpleTupleStreamTest {
protected:
RowDescriptor* _array_desc;
virtual void create_descriptors() {
// tuples: (array<string>, array<array<int>>) (array<int>)
vector<bool> nullable_tuples(2, true);
vector<TTupleId> tuple_ids;
tuple_ids.push_back(static_cast<TTupleId>(0));
tuple_ids.push_back(static_cast<TTupleId>(1));
TypeDescriptor string_array_type;
string_array_type.type = TYPE_ARRAY;
string_array_type.children.push_back(TYPE_VARCHAR);
TypeDescriptor int_array_type;
int_array_type.type = TYPE_ARRAY;
int_array_type.children.push_back(TYPE_VARCHAR);
TypeDescriptor nested_array_type;
nested_array_type.type = TYPE_ARRAY;
nested_array_type.children.push_back(int_array_type);
DescriptorTblBuilder builder(&_pool);
builder.declare_tuple() << string_array_type << nested_array_type;
builder.declare_tuple() << int_array_type;
_array_desc = _pool.add(new RowDescriptor(
*builder.build(), tuple_ids, nullable_tuples));
}
};
#endif
// Basic API test. No data should be going to disk.
TEST_F(SimpleTupleStreamTest, Basic) {
InitBlockMgr(-1, 8 * 1024 * 1024);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<int>(100, _int_desc, false);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
TestValues<StringValue>(100, _string_desc, false);
TestIntValuesInterleaved(1, 1);
TestIntValuesInterleaved(10, 5);
TestIntValuesInterleaved(100, 15);
}
// #if 0
// Test with only 1 buffer.
TEST_F(SimpleTupleStreamTest, OneBufferSpill) {
// Each buffer can only hold 100 ints, so this spills quite often.
int buffer_size = 100 * sizeof(int);
InitBlockMgr(buffer_size, buffer_size);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
}
// Test with a few buffers.
TEST_F(SimpleTupleStreamTest, ManyBufferSpill) {
int buffer_size = 100 * sizeof(int);
InitBlockMgr(10 * buffer_size, buffer_size);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<int>(100, _int_desc, false);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
TestValues<StringValue>(100, _string_desc, false);
TestIntValuesInterleaved(1, 1);
TestIntValuesInterleaved(10, 5);
TestIntValuesInterleaved(100, 15);
}
TEST_F(SimpleTupleStreamTest, UnpinPin) {
int buffer_size = 100 * sizeof(int);
InitBlockMgr(3 * buffer_size, buffer_size);
BufferedTupleStream2 stream(_runtime_state, *_int_desc, _runtime_state->block_mgr2(),
_client, true, false);
Status status = stream.init(-1, NULL, true);
ASSERT_TRUE(status.ok());
int offset = 0;
bool full = false;
while (!full) {
RowBatch* batch = CreateIntBatch(offset, BATCH_SIZE, false);
int j = 0;
for (; j < batch->num_rows(); ++j) {
full = !stream.add_row(batch->get_row(j), &status);
ASSERT_TRUE(status.ok());
if (full) {
break;
}
}
offset += j;
}
status = stream.unpin_stream();
ASSERT_TRUE(status.ok());
bool pinned = false;
status = stream.pin_stream(false, &pinned);
ASSERT_TRUE(status.ok());
ASSERT_TRUE(pinned);
vector<int> results;
// Read and verify result a few times. We should be able to reread the stream if
// we don't use delete on read mode.
int read_iters = 3;
for (int i = 0; i < read_iters; ++i) {
bool delete_on_read = i == read_iters - 1;
status = stream.prepare_for_read(delete_on_read);
ASSERT_TRUE(status.ok());
results.clear();
ReadValues(&stream, _int_desc, &results);
VerifyResults(results, offset, false);
}
// After delete_on_read, all blocks aside from the last should be deleted.
// Note: this should really be 0, but the BufferedTupleStream2 returns eos before
// deleting the last block, rather than after, so the last block isn't deleted
// until the stream is closed.
DCHECK_EQ(stream.bytes_in_mem(false), buffer_size);
stream.close();
DCHECK_EQ(stream.bytes_in_mem(false), 0);
}
TEST_F(SimpleTupleStreamTest, SmallBuffers) {
int buffer_size = 8 * 1024 * 1024;
InitBlockMgr(2 * buffer_size, buffer_size);
BufferedTupleStream2 stream(_runtime_state, *_int_desc, _runtime_state->block_mgr2(),
_client, true, false);
Status status = stream.init(-1, NULL, false);
ASSERT_TRUE(status.ok());
// Initial buffer should be small.
EXPECT_LT(stream.bytes_in_mem(false), buffer_size);
RowBatch* batch = CreateIntBatch(0, 1024, false);
for (int i = 0; i < batch->num_rows(); ++i) {
bool ret = stream.add_row(batch->get_row(i), &status);
EXPECT_TRUE(ret);
ASSERT_TRUE(status.ok());
}
EXPECT_LT(stream.bytes_in_mem(false), buffer_size);
EXPECT_LT(stream.byte_size(), buffer_size);
ASSERT_TRUE(stream.using_small_buffers());
// 40 MB of ints
batch = CreateIntBatch(0, 10 * 1024 * 1024, false);
for (int i = 0; i < batch->num_rows(); ++i) {
bool ret = stream.add_row(batch->get_row(i), &status);
ASSERT_TRUE(status.ok());
if (!ret) {
ASSERT_TRUE(stream.using_small_buffers());
bool got_buffer;
status = stream.switch_to_io_buffers(&got_buffer);
ASSERT_TRUE(status.ok());
ASSERT_TRUE(got_buffer);
ret = stream.add_row(batch->get_row(i), &status);
ASSERT_TRUE(status.ok());
}
ASSERT_TRUE(ret);
}
EXPECT_EQ(stream.bytes_in_mem(false), buffer_size);
// TODO: Test for IMPALA-2330. In case switch_to_io_buffers() fails to get buffer then
// using_small_buffers() should still return true.
stream.close();
}
// Basic API test. No data should be going to disk.
TEST_F(SimpleNullStreamTest, Basic) {
InitBlockMgr(-1, 8 * 1024 * 1024);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<int>(100, _int_desc, false);
TestValues<int>(1, _int_desc, true);
TestValues<int>(10, _int_desc, true);
TestValues<int>(100, _int_desc, true);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
TestValues<StringValue>(100, _string_desc, false);
TestValues<StringValue>(1, _string_desc, true);
TestValues<StringValue>(10, _string_desc, true);
TestValues<StringValue>(100, _string_desc, true);
TestIntValuesInterleaved(1, 1);
TestIntValuesInterleaved(10, 5);
TestIntValuesInterleaved(100, 15);
}
// Test tuple stream with only 1 buffer and rows with multiple tuples.
TEST_F(MultiTupleStreamTest, MultiTupleOneBufferSpill) {
// Each buffer can only hold 100 ints, so this spills quite often.
int buffer_size = 100 * sizeof(int);
InitBlockMgr(buffer_size, buffer_size);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
}
// Test with a few buffers and rows with multiple tuples.
TEST_F(MultiTupleStreamTest, MultiTupleManyBufferSpill) {
int buffer_size = 100 * sizeof(int);
InitBlockMgr(10 * buffer_size, buffer_size);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<int>(100, _int_desc, false);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
TestValues<StringValue>(100, _string_desc, false);
TestIntValuesInterleaved(1, 1);
TestIntValuesInterleaved(10, 5);
TestIntValuesInterleaved(100, 15);
}
// Test with rows with multiple nullable tuples.
TEST_F(MultiNullableTupleStreamTest, MultiNullableTupleOneBufferSpill) {
// Each buffer can only hold 100 ints, so this spills quite often.
int buffer_size = 100 * sizeof(int);
InitBlockMgr(buffer_size, buffer_size);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<int>(1, _int_desc, true);
TestValues<int>(10, _int_desc, true);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
TestValues<StringValue>(1, _string_desc, true);
TestValues<StringValue>(10, _string_desc, true);
}
// Test with a few buffers.
TEST_F(MultiNullableTupleStreamTest, MultiNullableTupleManyBufferSpill) {
int buffer_size = 100 * sizeof(int);
InitBlockMgr(10 * buffer_size, buffer_size);
TestValues<int>(1, _int_desc, false);
TestValues<int>(10, _int_desc, false);
TestValues<int>(100, _int_desc, false);
TestValues<int>(1, _int_desc, true);
TestValues<int>(10, _int_desc, true);
TestValues<int>(100, _int_desc, true);
TestValues<StringValue>(1, _string_desc, false);
TestValues<StringValue>(10, _string_desc, false);
TestValues<StringValue>(100, _string_desc, false);
TestValues<StringValue>(1, _string_desc, true);
TestValues<StringValue>(10, _string_desc, true);
TestValues<StringValue>(100, _string_desc, true);
TestIntValuesInterleaved(1, 1);
TestIntValuesInterleaved(10, 5);
TestIntValuesInterleaved(100, 15);
}
// #endif
#if 0
// Test that deep copy works with arrays by copying into a BufferedTupleStream2, freeing
// the original rows, then reading back the rows and verifying the contents.
TEST_F(ArrayTupleStreamTest, TestArrayDeepCopy) {
Status status;
InitBlockMgr(-1, 8 * 1024 * 1024);
const int NUM_ROWS = 4000;
BufferedTupleStream2 stream(_runtime_state, *_array_desc, _runtime_state->block_mgr2(),
_client, false, false);
const vector<TupleDescriptor*>& tuple_descs = _array_desc->tuple_descriptors();
// Write out a predictable pattern of data by iterating over arrays of constants.
int strings_index = 0; // we take the mod of this as index into STRINGS.
int array_lens[] = { 0, 1, 5, 10, 1000, 2, 49, 20 };
int num_array_lens = sizeof(array_lens) / sizeof(array_lens[0]);
int array_len_index = 0;
for (int i = 0; i < NUM_ROWS; ++i) {
int expected_row_size = tuple_descs[0]->byte_size() + tuple_descs[1]->byte_size();
// gscoped_ptr<TupleRow, FreeDeleter> row(reinterpret_cast<TupleRow*>(
// malloc(tuple_descs.size() * sizeof(Tuple*))));
// gscoped_ptr<Tuple, FreeDeleter> tuple0(reinterpret_cast<Tuple*>(
// malloc(tuple_descs[0]->byte_size())));
// gscoped_ptr<Tuple, FreeDeleter> tuple1(reinterpret_cast<Tuple*>(
// malloc(tuple_descs[1]->byte_size())));
scoped_ptr<TupleRow> row(reinterpret_cast<TupleRow*>(
malloc(tuple_descs.size() * sizeof(Tuple*))));
scoped_ptr<Tuple> tuple0(reinterpret_cast<Tuple*>(
malloc(tuple_descs[0]->byte_size())));
scoped_ptr<Tuple> tuple1(reinterpret_cast<Tuple*>(
malloc(tuple_descs[1]->byte_size())));
memset(tuple0.get(), 0, tuple_descs[0]->byte_size());
memset(tuple1.get(), 0, tuple_descs[1]->byte_size());
row->set_tuple(0, tuple0.get());
row->set_tuple(1, tuple1.get());
// Only array<string> is non-null.
tuple0->set_null(tuple_descs[0]->slots()[1]->null_indicator_offset());
tuple1->set_null(tuple_descs[1]->slots()[0]->null_indicator_offset());
const SlotDescriptor* array_slot_desc = tuple_descs[0]->slots()[0];
const TupleDescriptor* item_desc = array_slot_desc->collection_item_descriptor();
int array_len = array_lens[array_len_index++ % num_array_lens];
CollectionValue* cv = tuple0->GetCollectionSlot(array_slot_desc->tuple_offset());
cv->ptr = NULL;
cv->num_tuples = 0;
CollectionValueBuilder builder(cv, *item_desc, _mem_pool.get(), array_len);
Tuple* array_data;
builder.GetFreeMemory(&array_data);
expected_row_size += item_desc->byte_size() * array_len;
// Fill the array with pointers to our constant strings.
for (int j = 0; j < array_len; ++j) {
const StringValue* string = &STRINGS[strings_index++ % NUM_STRINGS];
array_data->SetNotNull(item_desc->slots()[0]->null_indicator_offset());
RawValue::Write(string, array_data, item_desc->slots()[0], _mem_pool.get());
array_data += item_desc->byte_size();
expected_row_size += string->len;
}
builder.CommitTuples(array_len);
// Check that internal row size computation gives correct result.
EXPECT_EQ(expected_row_size, stream.ComputeRowSize(row.get()));
bool b = stream.add_row(row.get(), &status);
ASSERT_TRUE(b);
ASSERT_TRUE(status.ok());
_mem_pool->FreeAll(); // Free data as soon as possible to smoke out issues.
}
// Read back and verify data.
stream.prepare_for_read(false);
strings_index = 0;
array_len_index = 0;
bool eos = false;
int rows_read = 0;
RowBatch batch(*_array_desc, BATCH_SIZE, _tracker.get());
do {
batch.reset();
ASSERT_TRUE(stream.get_next(&batch, &eos).ok());
for (int i = 0; i < batch.num_rows(); ++i) {
TupleRow* row = batch.GetRow(i);
Tuple* tuple0 = row->get_tuple(0);
Tuple* tuple1 = row->get_tuple(1);
ASSERT_TRUE(tuple0 != NULL);
ASSERT_TRUE(tuple1 != NULL);
const SlotDescriptor* array_slot_desc = tuple_descs[0]->slots()[0];
ASSERT_FALSE(tuple0->IsNull(array_slot_desc->null_indicator_offset()));
ASSERT_TRUE(tuple0->IsNull(tuple_descs[0]->slots()[1]->null_indicator_offset()));
ASSERT_TRUE(tuple1->IsNull(tuple_descs[1]->slots()[0]->null_indicator_offset()));
const TupleDescriptor* item_desc = array_slot_desc->collection_item_descriptor();
int expected_array_len = array_lens[array_len_index++ % num_array_lens];
CollectionValue* cv = tuple0->GetCollectionSlot(array_slot_desc->tuple_offset());
ASSERT_EQ(expected_array_len, cv->num_tuples);
for (int j = 0; j < cv->num_tuples; ++j) {
Tuple* item = reinterpret_cast<Tuple*>(cv->ptr + j * item_desc->byte_size());
const SlotDescriptor* string_desc = item_desc->slots()[0];
ASSERT_FALSE(item->IsNull(string_desc->null_indicator_offset()));
const StringValue* expected = &STRINGS[strings_index++ % NUM_STRINGS];
const StringValue* actual = item->GetStringSlot(string_desc->tuple_offset());
ASSERT_EQ(*expected, *actual);
}
}
rows_read += batch.num_rows();
} while (!eos);
ASSERT_EQ(NUM_ROWS, rows_read);
}
#endif
// TODO: more tests.
// - The stream can operate in many modes
}
int main(int argc, char** argv) {
// std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
// if (!doris::config::init(conffile.c_str(), false)) {
// fprintf(stderr, "error read config file. \n");
// return -1;
// }
doris::config::query_scratch_dirs = "/tmp";
// doris::config::max_free_io_buffers = 128;
doris::config::read_size = 8388608;
doris::config::min_buffer_size = 1024;
doris::config::disable_mem_pools = false;
doris::init_glog("be-test");
::testing::InitGoogleTest(&argc, argv);
doris::CpuInfo::init();
doris::DiskInfo::init();
return RUN_ALL_TESTS();
}