[feature-wip](unique-key-merge-on-write) add the implementation of primary key index update, DSIP-018 (#11057)

This commit is contained in:
Xin Liao
2022-07-27 14:17:56 +08:00
committed by GitHub
parent b74f36e009
commit eab8382b4a
15 changed files with 647 additions and 341 deletions

View File

@ -287,6 +287,7 @@ Status BetaRowsetWriter::_create_segment_writer(
DCHECK(file_writer != nullptr);
segment_v2::SegmentWriterOptions writer_options;
writer_options.enable_unique_key_merge_on_write = _context.enable_unique_key_merge_on_write;
writer->reset(new segment_v2::SegmentWriter(file_writer.get(), _num_segment,
_context.tablet_schema, _context.data_dir,
_context.max_rows_per_segment, writer_options));

View File

@ -96,6 +96,7 @@ struct RowsetWriterContext {
int64_t oldest_write_timestamp;
int64_t newest_write_timestamp;
bool enable_unique_key_merge_on_write = false;
};
} // namespace doris

View File

@ -77,28 +77,14 @@ public:
Status new_bitmap_index_iterator(const TabletColumn& tablet_column, BitmapIndexIterator** iter);
size_t num_short_keys() const { return _tablet_schema.num_short_key_columns(); }
uint32_t num_rows_per_block() const {
const ShortKeyIndexDecoder* get_short_key_index() const {
DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());
return _sk_index_decoder->num_rows_per_block();
}
ShortKeyIndexIterator lower_bound(const Slice& key) const {
DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());
return _sk_index_decoder->lower_bound(key);
}
ShortKeyIndexIterator upper_bound(const Slice& key) const {
DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());
return _sk_index_decoder->upper_bound(key);
return _sk_index_decoder.get();
}
// This will return the last row block in this segment.
// NOTE: Before call this function , client should assure that
// this segment is not empty.
uint32_t last_block() const {
const PrimaryKeyIndexReader* get_primary_key_index() const {
DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());
DCHECK(num_rows() > 0);
return _sk_index_decoder->num_items() - 1;
return _pk_index_reader.get();
}
Status lookup_row_key(const Slice& key, RowLocation* row_location);

View File

@ -31,6 +31,7 @@
#include "olap/rowset/segment_v2/segment.h"
#include "olap/short_key_index.h"
#include "util/doris_metrics.h"
#include "util/key_util.h"
#include "util/simd/bits.h"
namespace doris {
@ -383,7 +384,16 @@ int compare_row_with_lhs_columns(const LhsRowType& lhs, const RhsRowType& rhs) {
return 0;
}
// look up one key to get its ordinal at which can get data.
Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound,
rowid_t* rowid) {
if (_segment->_tablet_schema.keys_type() == UNIQUE_KEYS &&
_segment->get_primary_key_index() != nullptr) {
return _lookup_ordinal_from_pk_index(key, is_include, rowid);
}
return _lookup_ordinal_from_sk_index(key, is_include, upper_bound, rowid);
}
// look up one key to get its ordinal at which can get data by using short key index.
// 'upper_bound' is defined the max ordinal the function will search.
// We use upper_bound to reduce search times.
// If we find a valid ordinal, it will be set in rowid and with Status::OK()
@ -392,13 +402,17 @@ int compare_row_with_lhs_columns(const LhsRowType& lhs, const RhsRowType& rhs) {
// 1. get [start, end) ordinal through short key index
// 2. binary search to find exact ordinal that match the input condition
// Make is_include template to reduce branch
Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound,
rowid_t* rowid) {
Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool is_include,
rowid_t upper_bound, rowid_t* rowid) {
const ShortKeyIndexDecoder* sk_index_decoder = _segment->get_short_key_index();
DCHECK(sk_index_decoder != nullptr);
std::string index_key;
encode_key_with_padding(&index_key, key, _segment->num_short_keys(), is_include);
encode_key_with_padding(&index_key, key, _segment->_tablet_schema.num_short_key_columns(),
is_include);
uint32_t start_block_id = 0;
auto start_iter = _segment->lower_bound(index_key);
auto start_iter = sk_index_decoder->lower_bound(index_key);
if (start_iter.valid()) {
// Because previous block may contain this key, so we should set rowid to
// last block's first row.
@ -410,14 +424,14 @@ Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, r
// When we don't find a valid index item, which means all short key is
// smaller than input key, this means that this key may exist in the last
// row block. so we set the rowid to first row of last row block.
start_block_id = _segment->last_block();
start_block_id = sk_index_decoder->num_items() - 1;
}
rowid_t start = start_block_id * _segment->num_rows_per_block();
rowid_t start = start_block_id * sk_index_decoder->num_rows_per_block();
rowid_t end = upper_bound;
auto end_iter = _segment->upper_bound(index_key);
auto end_iter = sk_index_decoder->upper_bound(index_key);
if (end_iter.valid()) {
end = end_iter.ordinal() * _segment->num_rows_per_block();
end = end_iter.ordinal() * sk_index_decoder->num_rows_per_block();
}
// binary search to find the exact key
@ -444,6 +458,38 @@ Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, r
return Status::OK();
}
Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool is_include,
rowid_t* rowid) {
DCHECK(_segment->_tablet_schema.keys_type() == UNIQUE_KEYS);
const PrimaryKeyIndexReader* pk_index_reader = _segment->get_primary_key_index();
DCHECK(pk_index_reader != nullptr);
std::string index_key;
encode_key_with_padding<RowCursor, true, true>(
&index_key, key, _segment->_tablet_schema.num_key_columns(), is_include);
bool exact_match = false;
std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
RETURN_IF_ERROR(pk_index_reader->new_iterator(&index_iterator));
Status status = index_iterator->seek_at_or_after(&index_key, &exact_match);
if (UNLIKELY(!status.ok())) {
*rowid = num_rows();
if (status.is_not_found()) {
return Status::OK();
}
return status;
}
*rowid = index_iterator->get_current_ordinal();
// find the key in primary key index, and the is_include is false, so move
// to the next row.
if (exact_match && !is_include) {
*rowid += 1;
}
return Status::OK();
}
// seek to the row and load that row to _key_cursor
Status SegmentIterator::_seek_and_peek(rowid_t rowid) {
{

View File

@ -72,6 +72,11 @@ private:
Status _prepare_seek(const StorageReadOptions::KeyRange& key_range);
Status _lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound,
rowid_t* rowid);
// lookup the ordinal of given key from short key index
Status _lookup_ordinal_from_sk_index(const RowCursor& key, bool is_include, rowid_t upper_bound,
rowid_t* rowid);
// lookup the ordinal of given key from primary key index
Status _lookup_ordinal_from_pk_index(const RowCursor& key, bool is_include, rowid_t* rowid);
Status _seek_and_peek(rowid_t rowid);
// calculate row ranges that satisfy requested column conditions using various column index

View File

@ -21,6 +21,7 @@
#include "env/env.h" // Env
#include "io/fs/file_writer.h"
#include "olap/data_dir.h"
#include "olap/primary_key_index.h"
#include "olap/row.h" // ContiguousRow
#include "olap/row_cursor.h" // RowCursor
#include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter
@ -30,6 +31,7 @@
#include "runtime/memory/mem_tracker.h"
#include "util/crc32c.h"
#include "util/faststring.h"
#include "util/key_util.h"
namespace doris {
namespace segment_v2 {
@ -50,17 +52,21 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id,
std::to_string(segment_id))),
_olap_data_convertor(tablet_schema) {
CHECK_NOTNULL(file_writer);
size_t num_short_key_column = _tablet_schema->num_short_key_columns();
for (size_t cid = 0; cid < num_short_key_column; ++cid) {
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
_num_key_columns = _tablet_schema->num_key_columns();
} else {
_num_key_columns = _tablet_schema->num_short_key_columns();
}
for (size_t cid = 0; cid < _num_key_columns; ++cid) {
const auto& column = _tablet_schema->column(cid);
_short_key_coders.push_back(get_key_coder(column.type()));
_short_key_index_size.push_back(column.index_length());
_key_coders.push_back(get_key_coder(column.type()));
_key_index_size.push_back(column.index_length());
}
}
SegmentWriter::~SegmentWriter() {
_mem_tracker->release(_mem_tracker->consumption());
};
}
void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t* column_id,
const TabletColumn& column,
@ -108,7 +114,15 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec __attribute__((unused))
RETURN_IF_ERROR(writer->init());
_column_writers.push_back(std::move(writer));
}
_index_builder.reset(new ShortKeyIndexBuilder(_segment_id, _opts.num_rows_per_block));
// we don't need the short key index for unique key merge on write table.
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
_primary_key_index_builder.reset(new PrimaryKeyIndexBuilder(_file_writer));
RETURN_IF_ERROR(_primary_key_index_builder->init());
} else {
_short_key_index_builder.reset(
new ShortKeyIndexBuilder(_segment_id, _opts.num_rows_per_block));
}
return Status::OK();
}
@ -133,30 +147,30 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po
}
// convert column data from engine format to storage layer format
std::vector<vectorized::IOlapColumnDataAccessor*> short_key_columns;
size_t num_key_columns = _tablet_schema->num_short_key_columns();
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
for (size_t cid = 0; cid < _column_writers.size(); ++cid) {
auto converted_result = _olap_data_convertor.convert_column_data(cid);
if (converted_result.first != Status::OK()) {
return converted_result.first;
}
if (cid < num_key_columns) {
short_key_columns.push_back(converted_result.second);
if (cid < _num_key_columns) {
key_columns.push_back(converted_result.second);
}
RETURN_IF_ERROR(_column_writers[cid]->append(converted_result.second->get_nullmap(),
converted_result.second->get_data(),
num_rows));
}
// create short key indexes
std::vector<const void*> key_column_fields;
for (const auto pos : short_key_pos) {
for (const auto& column : short_key_columns) {
key_column_fields.push_back(column->get_data_at(pos));
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
// create primary indexes
for (size_t pos = 0; pos < num_rows; pos++) {
RETURN_IF_ERROR(_primary_key_index_builder->add_item(_encode_keys(key_columns, pos)));
}
} else {
// create short key indexes
for (const auto pos : short_key_pos) {
RETURN_IF_ERROR(_short_key_index_builder->add_item(_encode_keys(key_columns, pos)));
}
std::string encoded_key = encode_short_keys(key_column_fields);
RETURN_IF_ERROR(_index_builder->add_item(encoded_key));
key_column_fields.clear();
}
_row_count += num_rows;
@ -175,16 +189,16 @@ int64_t SegmentWriter::max_row_to_add(size_t row_avg_size_in_bytes) {
return std::min(size_rows, count_rows);
}
std::string SegmentWriter::encode_short_keys(const std::vector<const void*> key_column_fields,
bool null_first) {
size_t num_key_columns = _tablet_schema->num_short_key_columns();
assert(key_column_fields.size() == num_key_columns &&
_short_key_coders.size() == num_key_columns &&
_short_key_index_size.size() == num_key_columns);
std::string SegmentWriter::_encode_keys(
const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns, size_t pos,
bool null_first) {
assert(key_columns.size() == _num_key_columns && _key_coders.size() == _num_key_columns &&
_key_index_size.size() == _num_key_columns);
std::string encoded_keys;
for (size_t cid = 0; cid < num_key_columns; ++cid) {
auto field = key_column_fields[cid];
size_t cid = 0;
for (const auto& column : key_columns) {
auto field = column->get_data_at(pos);
if (UNLIKELY(!field)) {
if (null_first) {
encoded_keys.push_back(KEY_NULL_FIRST_MARKER);
@ -194,7 +208,12 @@ std::string SegmentWriter::encode_short_keys(const std::vector<const void*> key_
continue;
}
encoded_keys.push_back(KEY_NORMAL_MARKER);
_short_key_coders[cid]->encode_ascending(field, _short_key_index_size[cid], &encoded_keys);
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
_key_coders[cid]->full_encode_ascending(field, &encoded_keys);
} else {
_key_coders[cid]->encode_ascending(field, _key_index_size[cid], &encoded_keys);
}
++cid;
}
return encoded_keys;
}
@ -206,11 +225,17 @@ Status SegmentWriter::append_row(const RowType& row) {
RETURN_IF_ERROR(_column_writers[cid]->append(cell));
}
// At the begin of one block, so add a short key index entry
if ((_row_count % _opts.num_rows_per_block) == 0) {
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
std::string encoded_key;
encode_key(&encoded_key, row, _tablet_schema->num_short_key_columns());
RETURN_IF_ERROR(_index_builder->add_item(encoded_key));
encode_key<RowType, true, true>(&encoded_key, row, _num_key_columns);
RETURN_IF_ERROR(_primary_key_index_builder->add_item(encoded_key));
} else {
// At the beginning of one block, so add a short key index entry
if ((_row_count % _opts.num_rows_per_block) == 0) {
std::string encoded_key;
encode_key(&encoded_key, row, _num_key_columns);
RETURN_IF_ERROR(_short_key_index_builder->add_item(encoded_key));
}
}
++_row_count;
return Status::OK();
@ -229,7 +254,11 @@ uint64_t SegmentWriter::estimate_segment_size() {
for (auto& column_writer : _column_writers) {
size += column_writer->estimate_buffer_size();
}
size += _index_builder->size();
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
size += _primary_key_index_builder->size();
} else {
size += _short_key_index_builder->size();
}
// update the mem_tracker of segment size
_mem_tracker->consume(size - _mem_tracker->consumption());
@ -250,7 +279,11 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size
RETURN_IF_ERROR(_write_zone_map());
RETURN_IF_ERROR(_write_bitmap_index());
RETURN_IF_ERROR(_write_bloom_filter_index());
RETURN_IF_ERROR(_write_short_key_index());
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) {
RETURN_IF_ERROR(_write_primary_key_index());
} else {
RETURN_IF_ERROR(_write_short_key_index());
}
*index_size = _file_writer->bytes_appended() - index_offset;
RETURN_IF_ERROR(_write_footer());
RETURN_IF_ERROR(_file_writer->finalize());
@ -298,7 +331,7 @@ Status SegmentWriter::_write_bloom_filter_index() {
Status SegmentWriter::_write_short_key_index() {
std::vector<Slice> body;
PageFooterPB footer;
RETURN_IF_ERROR(_index_builder->finalize(_row_count, &body, &footer));
RETURN_IF_ERROR(_short_key_index_builder->finalize(_row_count, &body, &footer));
PagePointer pp;
// short key index page is not compressed right now
RETURN_IF_ERROR(PageIO::write_page(_file_writer, body, footer, &pp));
@ -306,6 +339,11 @@ Status SegmentWriter::_write_short_key_index() {
return Status::OK();
}
Status SegmentWriter::_write_primary_key_index() {
CHECK(_primary_key_index_builder->num_rows() == _row_count);
return _primary_key_index_builder->finalize(_footer.mutable_primary_key_index_meta());
}
Status SegmentWriter::_write_footer() {
_footer.set_num_rows(_row_count);
@ -334,5 +372,14 @@ Status SegmentWriter::_write_raw_data(const std::vector<Slice>& slices) {
return Status::OK();
}
Slice SegmentWriter::min_encoded_key() {
return (_primary_key_index_builder == nullptr) ? Slice()
: _primary_key_index_builder->min_key();
}
Slice SegmentWriter::max_encoded_key() {
return (_primary_key_index_builder == nullptr) ? Slice()
: _primary_key_index_builder->max_key();
}
} // namespace segment_v2
} // namespace doris

View File

@ -40,6 +40,7 @@ class RowCursor;
class TabletSchema;
class TabletColumn;
class ShortKeyIndexBuilder;
class PrimaryKeyIndexBuilder;
class KeyCoder;
namespace io {
@ -55,6 +56,7 @@ extern const uint32_t k_segment_magic_length;
struct SegmentWriterOptions {
uint32_t num_rows_per_block = 1024;
bool enable_unique_key_merge_on_write = false;
};
class SegmentWriter {
@ -81,6 +83,8 @@ public:
static void init_column_meta(ColumnMetaPB* meta, uint32_t* column_id,
const TabletColumn& column, const TabletSchema* tablet_schema);
Slice min_encoded_key();
Slice max_encoded_key();
private:
DISALLOW_COPY_AND_ASSIGN(SegmentWriter);
@ -90,11 +94,11 @@ private:
Status _write_bitmap_index();
Status _write_bloom_filter_index();
Status _write_short_key_index();
Status _write_primary_key_index();
Status _write_footer();
Status _write_raw_data(const std::vector<Slice>& slices);
std::string encode_short_keys(const std::vector<const void*> key_column_fields,
bool null_first = true);
std::string _encode_keys(const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
size_t pos, bool null_first = true);
private:
uint32_t _segment_id;
@ -107,16 +111,19 @@ private:
io::FileWriter* _file_writer;
SegmentFooterPB _footer;
std::unique_ptr<ShortKeyIndexBuilder> _index_builder;
size_t _num_key_columns;
std::unique_ptr<ShortKeyIndexBuilder> _short_key_index_builder;
std::unique_ptr<PrimaryKeyIndexBuilder> _primary_key_index_builder;
std::vector<std::unique_ptr<ColumnWriter>> _column_writers;
std::unique_ptr<MemTracker> _mem_tracker;
uint32_t _row_count = 0;
vectorized::OlapBlockDataConvertor _olap_data_convertor;
std::vector<const KeyCoder*> _short_key_coders;
std::vector<uint16_t> _short_key_index_size;
// used for building short key index or primary key index during vectorized write.
std::vector<const KeyCoder*> _key_coders;
std::vector<uint16_t> _key_index_size;
size_t _short_key_row_pos = 0;
};
} // namespace segment_v2
} // namespace doris
} // namespace doris

View File

@ -30,82 +30,6 @@
namespace doris {
// In our system, we have more complicated situation.
// First, our keys can be nullptr.
// Second, when key columns are not complete we want to distinguish GT and GE. For example,
// there are two key columns a and b, we have only one condition a > 1. We can only encode
// a prefix key 1, which is less than 1|2. This will make our read more data than
// we actually need. So we want to add more marker.
// a > 1: will be encoded into 1|\xFF
// a >= 1: will be encoded into 1|\x00
// a = 1 and b > 1: will be encoded into 1|\x02|1
// a = 1 and b is null: will be encoded into 1|\x01
// Used to represent minimal value for that field
constexpr uint8_t KEY_MINIMAL_MARKER = 0x00;
// Used to represent a null field, which value is seemed as minimal than other values
constexpr uint8_t KEY_NULL_FIRST_MARKER = 0x01;
// Used to represent a normal field, which content is encoded after this marker
constexpr uint8_t KEY_NORMAL_MARKER = 0x02;
// Used to represent
constexpr uint8_t KEY_NULL_LAST_MARKER = 0xFE;
// Used to represent maximal value for that field
constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
// Encode one row into binary according given num_keys.
// A cell will be encoded in the format of a marker and encoded content.
// When function encoding row, if any cell isn't found in row, this function will
// fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will
// be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
// If all num_keys are found in row, no marker will be added.
template <typename RowType, bool null_first = true>
void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys,
bool padding_minimal) {
for (auto cid = 0; cid < num_keys; cid++) {
auto field = row.schema()->column(cid);
if (field == nullptr) {
if (padding_minimal) {
buf->push_back(KEY_MINIMAL_MARKER);
} else {
buf->push_back(KEY_MAXIMAL_MARKER);
}
break;
}
auto cell = row.cell(cid);
if (cell.is_null()) {
if (null_first) {
buf->push_back(KEY_NULL_FIRST_MARKER);
} else {
buf->push_back(KEY_NULL_LAST_MARKER);
}
continue;
}
buf->push_back(KEY_NORMAL_MARKER);
field->encode_ascending(cell.cell_ptr(), buf);
}
}
// Encode one row into binary according given num_keys.
// Client call this function must assure that row contains the first
// num_keys columns.
template <typename RowType, bool null_first = true>
void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
for (auto cid = 0; cid < num_keys; cid++) {
auto cell = row.cell(cid);
if (cell.is_null()) {
if (null_first) {
buf->push_back(KEY_NULL_FIRST_MARKER);
} else {
buf->push_back(KEY_NULL_LAST_MARKER);
}
continue;
}
buf->push_back(KEY_NORMAL_MARKER);
row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf);
}
}
// Encode a segment short key indices to one ShortKeyPage. This version
// only accepts binary key, client should assure that input key is sorted,
// otherwise error could happens. This builder would arrange the page body in the

View File

@ -1641,6 +1641,7 @@ Status Tablet::create_rowset_writer(const Version& version, const RowsetStatePB&
context.oldest_write_timestamp = oldest_write_timestamp;
context.newest_write_timestamp = newest_write_timestamp;
context.tablet_schema = tablet_schema;
context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write();
_init_context_common_fields(context);
return RowsetFactory::create_rowset_writer(context, rowset_writer);
}
@ -1658,6 +1659,7 @@ Status Tablet::create_rowset_writer(const int64_t& txn_id, const PUniqueId& load
context.oldest_write_timestamp = -1;
context.newest_write_timestamp = -1;
context.tablet_schema = tablet_schema;
context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write();
_init_context_common_fields(context);
return RowsetFactory::create_rowset_writer(context, rowset_writer);
}

117
be/src/util/key_util.h Normal file
View File

@ -0,0 +1,117 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <iterator>
#include <string>
#include <vector>
#include "common/status.h"
#include "gen_cpp/segment_v2.pb.h"
#include "util/debug_util.h"
#include "util/faststring.h"
#include "util/slice.h"
namespace doris {
// In our system, we have more complicated situation.
// First, our keys can be nullptr.
// Second, when key columns are not complete we want to distinguish GT and GE. For example,
// there are two key columns a and b, we have only one condition a > 1. We can only encode
// a prefix key 1, which is less than 1|2. This will make our read more data than
// we actually need. So we want to add more marker.
// a > 1: will be encoded into 1|\xFF
// a >= 1: will be encoded into 1|\x00
// a = 1 and b > 1: will be encoded into 1|\x02|1
// a = 1 and b is null: will be encoded into 1|\x01
// Used to represent minimal value for that field
constexpr uint8_t KEY_MINIMAL_MARKER = 0x00;
// Used to represent a null field, which value is seemed as minimal than other values
constexpr uint8_t KEY_NULL_FIRST_MARKER = 0x01;
// Used to represent a normal field, which content is encoded after this marker
constexpr uint8_t KEY_NORMAL_MARKER = 0x02;
// Used to represent
constexpr uint8_t KEY_NULL_LAST_MARKER = 0xFE;
// Used to represent maximal value for that field
constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
// Encode one row into binary according given num_keys.
// A cell will be encoded in the format of a marker and encoded content.
// When function encoding row, if any cell isn't found in row, this function will
// fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will
// be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
// If all num_keys are found in row, no marker will be added.
template <typename RowType, bool null_first = true, bool full_encode = false>
void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys,
bool padding_minimal) {
for (auto cid = 0; cid < num_keys; cid++) {
auto field = row.schema()->column(cid);
if (field == nullptr) {
if (padding_minimal) {
buf->push_back(KEY_MINIMAL_MARKER);
} else {
buf->push_back(KEY_MAXIMAL_MARKER);
}
break;
}
auto cell = row.cell(cid);
if (cell.is_null()) {
if (null_first) {
buf->push_back(KEY_NULL_FIRST_MARKER);
} else {
buf->push_back(KEY_NULL_LAST_MARKER);
}
continue;
}
buf->push_back(KEY_NORMAL_MARKER);
if (full_encode) {
field->full_encode_ascending(cell.cell_ptr(), buf);
} else {
field->encode_ascending(cell.cell_ptr(), buf);
}
}
}
// Encode one row into binary according given num_keys.
// Client call this function must assure that row contains the first
// num_keys columns.
template <typename RowType, bool null_first = true, bool full_encode = false>
void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
for (auto cid = 0; cid < num_keys; cid++) {
auto cell = row.cell(cid);
if (cell.is_null()) {
if (null_first) {
buf->push_back(KEY_NULL_FIRST_MARKER);
} else {
buf->push_back(KEY_NULL_LAST_MARKER);
}
continue;
}
buf->push_back(KEY_NORMAL_MARKER);
if (full_encode) {
row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf);
} else {
row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf);
}
}
}
} // namespace doris

View File

@ -184,6 +184,7 @@ set(OLAP_TEST_FILES
olap/generic_iterators_test.cpp
olap/key_coder_test.cpp
olap/short_key_index_test.cpp
olap/primary_key_index_test.cpp
olap/page_cache_test.cpp
olap/hll_test.cpp
olap/selection_vector_test.cpp
@ -312,6 +313,7 @@ set(UTIL_TEST_FILES
util/quantile_state_test.cpp
util/hdfs_storage_backend_test.cpp
util/interval_tree_test.cpp
util/key_util_test.cpp
)
set(VEC_TEST_FILES
vec/aggregate_functions/agg_collect_test.cpp

View File

@ -25,6 +25,7 @@
#include "olap/tablet_schema_helper.h"
#include "util/debug_util.h"
#include "util/file_utils.h"
#include "util/key_util.h"
namespace doris {
@ -71,6 +72,7 @@ TEST_F(PrimaryKeyIndexTest, builder) {
EXPECT_TRUE(file_writer->close().ok());
EXPECT_EQ(num_rows, builder.num_rows());
FilePathDesc path_desc(filename);
PrimaryKeyIndexReader index_reader;
io::FileReaderSPtr file_reader;
EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
@ -126,6 +128,44 @@ TEST_F(PrimaryKeyIndexTest, builder) {
EXPECT_FALSE(exact_match);
EXPECT_TRUE(status.is_not_found());
}
// read all key
{
int32_t remaining = num_rows;
std::string last_key;
int num_batch = 0;
int batch_size = 1024;
MemPool pool;
while (remaining > 0) {
std::unique_ptr<segment_v2::IndexedColumnIterator> iter;
DCHECK(index_reader.new_iterator(&iter).ok());
size_t num_to_read = std::min(batch_size, remaining);
std::unique_ptr<ColumnVectorBatch> cvb;
DCHECK(ColumnVectorBatch::create(num_to_read, false, index_reader.type_info(), nullptr,
&cvb)
.ok());
ColumnBlock block(cvb.get(), &pool);
ColumnBlockView column_block_view(&block);
Slice last_key_slice(last_key);
DCHECK(iter->seek_at_or_after(&last_key_slice, &exact_match).ok());
size_t num_read = num_to_read;
DCHECK(iter->next_batch(&num_read, &column_block_view).ok());
DCHECK(num_to_read == num_read);
last_key = (reinterpret_cast<const Slice*>(cvb->cell_ptr(num_read - 1)))->to_string();
// exclude last_key, last_key will be read in next batch.
if (num_read == batch_size && num_read != remaining) {
num_read -= 1;
}
for (size_t i = 0; i < num_read; i++) {
const Slice* key = reinterpret_cast<const Slice*>(cvb->cell_ptr(i));
DCHECK_EQ(keys[i + (batch_size - 1) * num_batch], key->to_string());
}
num_batch++;
remaining -= num_read;
}
}
}
} // namespace doris

View File

@ -22,6 +22,7 @@
#include <filesystem>
#include <functional>
#include <iostream>
#include <vector>
#include "common/logging.h"
#include "io/fs/file_system.h"
@ -42,7 +43,9 @@
#include "olap/types.h"
#include "runtime/mem_pool.h"
#include "testutil/test_util.h"
#include "util/debug_util.h"
#include "util/file_utils.h"
#include "util/key_util.h"
namespace doris {
namespace segment_v2 {
@ -98,7 +101,7 @@ protected:
}
TabletSchema create_schema(const std::vector<TabletColumn>& columns,
int num_short_key_columns = -1) {
KeysType keys_type = DUP_KEYS, int num_custom_key_columns = -1) {
TabletSchema res;
int num_key_columns = 0;
for (auto& col : columns) {
@ -110,7 +113,8 @@ protected:
res._num_columns = columns.size();
res._num_key_columns = num_key_columns;
res._num_short_key_columns =
num_short_key_columns != -1 ? num_short_key_columns : num_key_columns;
num_custom_key_columns != -1 ? num_custom_key_columns : num_key_columns;
res._keys_type = keys_type;
res.init_field_index_for_test();
return res;
}
@ -151,6 +155,30 @@ protected:
st = writer.finalize(&file_size, &index_size);
EXPECT_TRUE(st.ok());
EXPECT_TRUE(file_writer->close().ok());
// Check min/max key generation
if (build_schema.keys_type() == UNIQUE_KEYS && opts.enable_unique_key_merge_on_write) {
// Create min row
for (int cid = 0; cid < build_schema.num_key_columns(); ++cid) {
RowCursorCell cell = row.cell(cid);
generator(0, cid, 0 / opts.num_rows_per_block, cell);
}
std::string min_encoded_key;
encode_key<RowCursor, true, true>(&min_encoded_key, row,
build_schema.num_key_columns());
EXPECT_EQ(min_encoded_key, writer.min_encoded_key().to_string());
// Create max row
for (int cid = 0; cid < build_schema.num_key_columns(); ++cid) {
RowCursorCell cell = row.cell(cid);
generator(nrows - 1, cid, (nrows - 1) / opts.num_rows_per_block, cell);
}
std::string max_encoded_key;
encode_key<RowCursor, true, true>(&max_encoded_key, row,
build_schema.num_key_columns());
EXPECT_EQ(max_encoded_key, writer.max_encoded_key().to_string());
} else {
EXPECT_EQ("", writer.min_encoded_key().to_string());
EXPECT_EQ("", writer.max_encoded_key().to_string());
}
st = Segment::open(fs, path, 0, &query_schema, res);
EXPECT_TRUE(st.ok());
@ -162,144 +190,215 @@ public:
};
TEST_F(SegmentReaderWriterTest, normal) {
TabletSchema tablet_schema = create_schema(
{create_int_key(1), create_int_key(2), create_int_value(3), create_int_value(4)});
std::vector<KeysType> keys_type_vec = {DUP_KEYS, AGG_KEYS, UNIQUE_KEYS};
std::vector<bool> enable_unique_key_merge_on_write_vec = {false, true};
for (auto keys_type : keys_type_vec) {
for (auto enable_unique_key_merge_on_write : enable_unique_key_merge_on_write_vec) {
TabletSchema tablet_schema = create_schema({create_int_key(1), create_int_key(2),
create_int_value(3), create_int_value(4)},
keys_type);
SegmentWriterOptions opts;
opts.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write;
opts.num_rows_per_block = 10;
SegmentWriterOptions opts;
opts.num_rows_per_block = 10;
shared_ptr<Segment> segment;
build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
shared_ptr<Segment> segment;
build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
// reader
{
Schema schema(tablet_schema);
OlapReaderStatistics stats;
// scan all rows
{
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
// reader
{
Schema schema(tablet_schema);
OlapReaderStatistics stats;
// scan all rows
{
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 1024);
RowBlockV2 block(schema, 1024);
int left = 4096;
int left = 4096;
int rowid = 0;
while (left > 0) {
int rows_read = left > 1024 ? 1024 : left;
block.clear();
EXPECT_TRUE(iter->next_batch(&block).ok());
EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
EXPECT_EQ(rows_read, block.num_rows());
left -= rows_read;
int rowid = 0;
while (left > 0) {
int rows_read = left > 1024 ? 1024 : left;
block.clear();
EXPECT_TRUE(iter->next_batch(&block).ok());
EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
EXPECT_EQ(rows_read, block.num_rows());
left -= rows_read;
for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
auto cid = block.schema()->column_ids()[j];
auto column_block = block.column_block(j);
for (int i = 0; i < rows_read; ++i) {
int rid = rowid + i;
EXPECT_FALSE(column_block.is_null(i));
EXPECT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
auto cid = block.schema()->column_ids()[j];
auto column_block = block.column_block(j);
for (int i = 0; i < rows_read; ++i) {
int rid = rowid + i;
EXPECT_FALSE(column_block.is_null(i));
EXPECT_EQ(rid * 10 + cid, *(int*)column_block.cell_ptr(i));
}
}
rowid += rows_read;
}
}
rowid += rows_read;
}
}
// test seek, key
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 2);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 100;
}
{
auto cell = lower_bound->cell(1);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 100;
}
// test seek, key, not exits
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 2);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 100;
}
{
auto cell = lower_bound->cell(1);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 100;
}
// upper bound
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
upper_bound->init(tablet_schema, 1);
{
auto cell = upper_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 200;
// upper bound
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
upper_bound->init(tablet_schema, 1);
{
auto cell = upper_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 200;
}
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(),
true);
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).ok());
EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
EXPECT_EQ(11, block.num_rows());
auto column_block = block.column_block(0);
for (int i = 0; i < 11; ++i) {
EXPECT_EQ(100 + i * 10, *(int*)column_block.cell_ptr(i));
}
}
// test seek, existing key
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 2);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 100;
}
{
auto cell = lower_bound->cell(1);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 101;
}
// upper bound
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
upper_bound->init(tablet_schema, 2);
{
auto cell = upper_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 200;
}
{
auto cell = upper_bound->cell(1);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 201;
}
// include upper key
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), true, upper_bound.get(),
true);
std::unique_ptr<RowwiseIterator> iter;
segment->new_iterator(schema, read_opts, &iter);
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).ok());
EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
EXPECT_EQ(11, block.num_rows());
auto column_block = block.column_block(0);
for (int i = 0; i < 11; ++i) {
EXPECT_EQ(100 + i * 10, *(int*)column_block.cell_ptr(i));
}
// not include upper key
StorageReadOptions read_opts1;
read_opts1.stats = &stats;
read_opts1.tablet_schema = &tablet_schema;
read_opts1.key_ranges.emplace_back(lower_bound.get(), true, upper_bound.get(),
false);
std::unique_ptr<RowwiseIterator> iter1;
segment->new_iterator(schema, read_opts1, &iter1);
RowBlockV2 block1(schema, 100);
EXPECT_TRUE(iter1->next_batch(&block1).ok());
EXPECT_EQ(DEL_NOT_SATISFIED, block1.delete_state());
EXPECT_EQ(10, block1.num_rows());
}
// test seek, key
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 1);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 40970;
}
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).is_end_of_file());
EXPECT_EQ(0, block.num_rows());
}
// test seek, key (-2, -1)
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 1);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = -2;
}
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
upper_bound->init(tablet_schema, 1);
{
auto cell = upper_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = -1;
}
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(),
false);
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).is_end_of_file());
EXPECT_EQ(0, block.num_rows());
}
}
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), true);
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).ok());
EXPECT_EQ(DEL_NOT_SATISFIED, block.delete_state());
EXPECT_EQ(11, block.num_rows());
auto column_block = block.column_block(0);
for (int i = 0; i < 11; ++i) {
EXPECT_EQ(100 + i * 10, *(int*)column_block.cell_ptr(i));
}
}
// test seek, key
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 1);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = 40970;
}
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).is_end_of_file());
EXPECT_EQ(0, block.num_rows());
}
// test seek, key (-2, -1)
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(tablet_schema, 1);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = -2;
}
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
upper_bound->init(tablet_schema, 1);
{
auto cell = upper_bound->cell(0);
cell.set_not_null();
*(int*)cell.mutable_cell_ptr() = -1;
}
StorageReadOptions read_opts;
read_opts.stats = &stats;
read_opts.tablet_schema = &tablet_schema;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
std::unique_ptr<RowwiseIterator> iter;
ASSERT_TRUE(segment->new_iterator(schema, read_opts, &iter).ok());
RowBlockV2 block(schema, 100);
EXPECT_TRUE(iter->next_batch(&block).is_end_of_file());
EXPECT_EQ(0, block.num_rows());
}
}
}
@ -1187,6 +1286,5 @@ TEST_F(SegmentReaderWriterTest, TestBloomFilterIndexUniqueModel) {
build_segment(opts2, schema, schema, 100, DefaultIntGenerator, &seg2);
EXPECT_TRUE(column_contains_index(seg2->footer().columns(3), BLOOM_FILTER_INDEX));
}
} // namespace segment_v2
} // namespace doris

View File

@ -19,10 +19,6 @@
#include <gtest/gtest.h>
#include "olap/row_cursor.h"
#include "olap/tablet_schema_helper.h"
#include "util/debug_util.h"
namespace doris {
class ShortKeyIndexTest : public testing::Test {
@ -93,66 +89,4 @@ TEST_F(ShortKeyIndexTest, builder) {
}
}
TEST_F(ShortKeyIndexTest, encode) {
TabletSchema tablet_schema;
tablet_schema._cols.push_back(create_int_key(0));
tablet_schema._cols.push_back(create_int_key(1));
tablet_schema._cols.push_back(create_int_key(2));
tablet_schema._cols.push_back(create_int_value(3));
tablet_schema._num_columns = 4;
tablet_schema._num_key_columns = 3;
tablet_schema._num_short_key_columns = 3;
// test encoding with padding
{
RowCursor row;
row.init(tablet_schema, 2);
{
// test padding
{
auto cell = row.cell(0);
cell.set_is_null(false);
*(int*)cell.mutable_cell_ptr() = 12345;
}
{
auto cell = row.cell(1);
cell.set_is_null(false);
*(int*)cell.mutable_cell_ptr() = 54321;
}
std::string buf;
encode_key_with_padding(&buf, row, 3, true);
// should be \x02\x80\x00\x30\x39\x02\x80\x00\xD4\x31\x00
EXPECT_STREQ("0280003039028000D43100", hexdump(buf.c_str(), buf.size()).c_str());
}
// test with null
{
{
auto cell = row.cell(0);
cell.set_is_null(false);
*(int*)cell.mutable_cell_ptr() = 54321;
}
{
auto cell = row.cell(1);
cell.set_is_null(true);
*(int*)cell.mutable_cell_ptr() = 54321;
}
{
std::string buf;
encode_key_with_padding(&buf, row, 3, false);
// should be \x02\x80\x00\xD4\x31\x01\xff
EXPECT_STREQ("028000D43101FF", hexdump(buf.c_str(), buf.size()).c_str());
}
// encode key
{
std::string buf;
encode_key(&buf, row, 2);
// should be \x02\x80\x00\xD4\x31\x01
EXPECT_STREQ("028000D43101", hexdump(buf.c_str(), buf.size()).c_str());
}
}
}
}
} // namespace doris

View File

@ -0,0 +1,96 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "util/key_util.h"
#include <gtest/gtest.h>
#include "olap/row_cursor.h"
#include "olap/tablet_schema_helper.h"
#include "util/debug_util.h"
namespace doris {
class KeyUtilTest : public testing::Test {
public:
KeyUtilTest() {}
virtual ~KeyUtilTest() {}
};
TEST_F(KeyUtilTest, encode) {
TabletSchema tablet_schema;
tablet_schema._cols.push_back(create_int_key(0));
tablet_schema._cols.push_back(create_int_key(1));
tablet_schema._cols.push_back(create_int_key(2));
tablet_schema._cols.push_back(create_int_value(3));
tablet_schema._num_columns = 4;
tablet_schema._num_key_columns = 3;
tablet_schema._num_short_key_columns = 3;
// test encoding with padding
{
RowCursor row;
row.init(tablet_schema, 2);
{
// test padding
{
auto cell = row.cell(0);
cell.set_is_null(false);
*(int*)cell.mutable_cell_ptr() = 12345;
}
{
auto cell = row.cell(1);
cell.set_is_null(false);
*(int*)cell.mutable_cell_ptr() = 54321;
}
std::string buf;
encode_key_with_padding(&buf, row, 3, true);
// should be \x02\x80\x00\x30\x39\x02\x80\x00\xD4\x31\x00
EXPECT_STREQ("0280003039028000D43100", hexdump(buf.c_str(), buf.size()).c_str());
}
// test with null
{
{
auto cell = row.cell(0);
cell.set_is_null(false);
*(int*)cell.mutable_cell_ptr() = 54321;
}
{
auto cell = row.cell(1);
cell.set_is_null(true);
*(int*)cell.mutable_cell_ptr() = 54321;
}
{
std::string buf;
encode_key_with_padding(&buf, row, 3, false);
// should be \x02\x80\x00\xD4\x31\x01\xff
EXPECT_STREQ("028000D43101FF", hexdump(buf.c_str(), buf.size()).c_str());
}
// encode key
{
std::string buf;
encode_key(&buf, row, 2);
// should be \x02\x80\x00\xD4\x31\x01
EXPECT_STREQ("028000D43101", hexdump(buf.c_str(), buf.size()).c_str());
}
}
}
}
} // namespace doris