v2 segment support string encode(#1766) (#1816)

major change

change data format of binary dict page, appending (dict page data) and (dict page offset) to binary dict page;
add new decoding method for new binary dict page format
add ut for segment test
set the elements of initial array to 0 ,when calling arena.AllocateNewBlock
hard code way to choose dict coding for string
0919 commit major change

change dict file format:when saving binary dict page, separate dict page from dict page,one dict page may have multi data pages;when reading a binary dict page,one ColumnReader keeps one dict page
loading dict when calling column_reader._read_page
3.rollback BinaryDictPage
no longer using memset(0) to inital column_zonemap.max_value
0926 17 commit major change

init column_zone_map min value column_zone_map slice's data array;
set char/varchar column_zone_map'max value size to 0
add ut for char column zone map query hit/miss
0929 10 commit major change

allocate mem for column_zone_map 's max and min value
direct copy content to column_zone_map's max and min value
This commit is contained in:
wangbo
2019-09-30 16:25:31 +08:00
committed by ZHAO Chun
parent 69d0a34bfd
commit 8aa8e08f27
19 changed files with 384 additions and 20 deletions

View File

@ -57,6 +57,7 @@ public:
inline void set_to_max(char* buf) const { return _type_info->set_to_max(buf); }
inline void set_to_min(char* buf) const { return _type_info->set_to_min(buf); }
inline char* allocate_value_from_arena(Arena* arena) const { return _type_info->allocate_value_from_arena(arena); }
inline void agg_update(RowCursorCell* dest, const RowCursorCell& src, MemPool* mem_pool = nullptr) const {
_agg_info->update(dest, src, mem_pool);
@ -199,6 +200,10 @@ public:
_type_info->deep_copy_with_arena(dest, src, arena);
}
inline void direct_copy_content(char* dest, const char* src) const {
_type_info->direct_copy(dest, src);
}
// Copy srouce content to destination in index format.
template<typename DstCellType, typename SrcCellType>
void to_index(DstCellType* dst, const SrcCellType& src) const;

View File

@ -50,9 +50,12 @@ static const uint64_t OLAP_FIX_HEADER_MAGIC_NUMBER = 0;
// 执行be/ce时默认的候选集大小
static constexpr uint32_t OLAP_COMPACTION_DEFAULT_CANDIDATE_SIZE = 10;
// the max length supported for string type
// the max length supported for varchar type
static const uint16_t OLAP_STRING_MAX_LENGTH = 65535;
//the max length supported for char type
static const uint16_t OLAP_CHAR_MAX_LENGTH = 255;
static const int32_t PREFERRED_SNAPSHOT_VERSION = 3;
// the max bytes for stored string length

View File

@ -102,7 +102,7 @@ Slice BinaryDictPageBuilder::finish() {
Slice data_slice = _data_page_builder->finish();
_buffer.append(data_slice.data, data_slice.size);
encode_fixed32_le(&_buffer[0], _encoding_type);
return Slice(_buffer.data(), _buffer.size());
return Slice(_buffer);
}
void BinaryDictPageBuilder::reset() {
@ -147,7 +147,6 @@ BinaryDictPageDecoder::BinaryDictPageDecoder(Slice data, const PageDecoderOption
_data(data),
_options(options),
_data_page_decoder(nullptr),
_dict_decoder(options.dict_decoder),
_parsed(false),
_encoding_type(UNKNOWN_ENCODING) { }
@ -161,7 +160,6 @@ Status BinaryDictPageDecoder::init() {
_encoding_type = static_cast<EncodingTypePB>(type);
_data.remove_prefix(BINARY_DICT_PAGE_HEADER_SIZE);
if (_encoding_type == DICT_ENCODING) {
DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";
_data_page_decoder.reset(new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
} else if (_encoding_type == PLAIN_ENCODING) {
DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
@ -180,12 +178,21 @@ Status BinaryDictPageDecoder::seek_to_position_in_page(size_t pos) {
return _data_page_decoder->seek_to_position_in_page(pos);
}
bool BinaryDictPageDecoder::is_dict_encoding() const {
return _encoding_type == DICT_ENCODING;
}
void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder){
_dict_decoder = (BinaryPlainPageDecoder*)dict_decoder;
};
Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
if (_encoding_type == PLAIN_ENCODING) {
return _data_page_decoder->next_batch(n, dst);
}
// dictionary encoding
DCHECK(_parsed);
DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";
if (PREDICT_FALSE(*n == 0)) {
*n = 0;
return Status::OK();

View File

@ -116,11 +116,15 @@ public:
return _data_page_decoder->current_index();
}
bool is_dict_encoding() const;
void set_dict_decoder(PageDecoder* dict_decoder);
private:
Slice _data;
PageDecoderOptions _options;
std::unique_ptr<PageDecoder> _data_page_decoder;
BinaryPlainPageDecoder* _dict_decoder;
const BinaryPlainPageDecoder* _dict_decoder = nullptr;
bool _parsed;
EncodingTypePB _encoding_type;
faststring _code_buf;

View File

@ -32,6 +32,7 @@
#include "util/crc32c.h"
#include "util/rle_encoding.h" // for RleDecoder
#include "util/block_compression.h"
#include "olap/rowset/segment_v2/binary_dict_page.h" // for BinaryDictPageDecoder
namespace doris {
namespace segment_v2 {
@ -168,6 +169,10 @@ void ColumnReader::get_row_ranges_by_zone_map(CondColumn* cond_column,
_calculate_row_ranges(page_indexes, row_ranges);
}
PagePointer ColumnReader::get_dict_page_pointer() const {
return _meta.dict_page();
}
void ColumnReader::_get_filtered_pages(CondColumn* cond_column,
const std::vector<CondColumn*>& delete_conditions, std::vector<uint32_t>* page_indexes) {
FieldType type = _type_info->type();
@ -426,6 +431,24 @@ Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, Pars
RETURN_IF_ERROR(_reader->encoding_info()->create_page_decoder(data, options, &page->data_decoder));
RETURN_IF_ERROR(page->data_decoder->init());
// lazy init dict_encoding'dict for three reasons
// 1. a column use dictionary encoding still has non-dict-encoded data pages are seeked,load dict when necessary
// 2. ColumnReader which is owned by Segment and Rowset can being alive even when there is no query,it should retain memory as small as possible.
// 3. Iterators of the same column won't repeat load the dict page because of page cache.
if (_reader->encoding_info()->encoding() == DICT_ENCODING) {
BinaryDictPageDecoder* binary_dict_page_decoder = (BinaryDictPageDecoder*)page->data_decoder;
if (binary_dict_page_decoder->is_dict_encoding()) {
if (_dict_decoder == nullptr) {
PagePointer pp = _reader->get_dict_page_pointer();
RETURN_IF_ERROR(_reader->read_page(pp, &_dict_page_handle));
_dict_decoder.reset(new BinaryPlainPageDecoder(_dict_page_handle.data()));
RETURN_IF_ERROR(_dict_decoder->init());
}
binary_dict_page_decoder->set_dict_decoder(_dict_decoder.get());
}
}
page->offset_in_page = 0;
return Status::OK();

View File

@ -29,6 +29,7 @@
#include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexIterator
#include "olap/rowset/segment_v2/column_zone_map.h" // for ColumnZoneMap
#include "olap/rowset/segment_v2/row_ranges.h" // for RowRanges
#include "olap/rowset/segment_v2/page_handle.h" // for PageHandle
namespace doris {
@ -85,6 +86,8 @@ public:
void get_row_ranges_by_zone_map(CondColumn* cond_column,
const std::vector<CondColumn*>& delete_conditions, RowRanges* row_ranges);
PagePointer get_dict_page_pointer() const;
private:
Status _init_ordinal_index();
@ -189,6 +192,12 @@ private:
// 3. When _page is null, it means that this reader can not be read.
std::unique_ptr<ParsedPage> _page;
// keep dict page decoder
std::unique_ptr<PageDecoder> _dict_decoder;
// keep dict page handle to avoid released
PageHandle _dict_page_handle;
// page iterator used to get next page when current page is finished.
// This value will be reset when a new seek is issued
OrdinalPageIndexIterator _page_iter;

View File

@ -213,6 +213,14 @@ Status ColumnWriter::write_data() {
RETURN_IF_ERROR(_write_data_page(page));
page = page->next;
}
// write column dict
if (_encoding_info->encoding() == DICT_ENCODING) {
Slice dict_page;
_page_builder->get_dictionary_page(&dict_page);
std::vector<Slice> origin_data;
origin_data.push_back(dict_page);
RETURN_IF_ERROR(_write_physical_page(&origin_data, &_dict_page_pp));
}
return Status::OK();
}
@ -240,6 +248,9 @@ void ColumnWriter::write_meta(ColumnMetaPB* meta) {
if (_opts.need_zone_map) {
_zone_map_pp.to_proto(meta->mutable_zone_map_page());
}
if (_encoding_info->encoding() == DICT_ENCODING) {
_dict_page_pp.to_proto(meta->mutable_dict_page());
}
}
// write a page into file and update ordinal index

View File

@ -157,6 +157,7 @@ private:
PagePointer _ordinal_index_pp;
PagePointer _zone_map_pp;
PagePointer _dict_page_pp;
uint64_t _written_size = 0;
};

View File

@ -28,9 +28,9 @@ ColumnZoneMapBuilder::ColumnZoneMapBuilder(const TypeInfo* type_info) : _type_in
options.data_page_size = 0;
_page_builder.reset(new BinaryPlainPageBuilder(options));
_field.reset(FieldFactory::create_by_type(_type_info->type()));
_max_string_value = _arena.Allocate(OLAP_STRING_MAX_LENGTH);
_zone_map.min_value = _arena.Allocate(_type_info->size());
_zone_map.max_value = _arena.Allocate(_type_info->size());
_zone_map.min_value = _field->allocate_value_from_arena(&_arena);
_zone_map.max_value = _field->allocate_value_from_arena(&_arena);
_reset_zone_map();
}
@ -38,10 +38,10 @@ Status ColumnZoneMapBuilder::add(const uint8_t *vals, size_t count) {
if (vals != nullptr) {
for (int i = 0; i < count; ++i) {
if (_field->compare(_zone_map.min_value, (char *)vals) > 0) {
_field->deep_copy_content(_zone_map.min_value, (const char *)vals, &_arena);
_field->direct_copy_content(_zone_map.min_value, (const char *)vals);
}
if (_field->compare(_zone_map.max_value, (char *)vals) < 0) {
_field->deep_copy_content(_zone_map.max_value, (const char *)vals, &_arena);
_field->direct_copy_content(_zone_map.max_value, (const char *)vals);
}
vals += _type_info->size();
if (!_zone_map.has_not_null) {
@ -78,10 +78,6 @@ Status ColumnZoneMapBuilder::flush() {
}
void ColumnZoneMapBuilder::_reset_zone_map() {
// we should allocate max varchar length and set to max for min value
Slice *min_slice = (Slice *)_zone_map.min_value;
min_slice->data = _max_string_value;
min_slice->size = OLAP_STRING_MAX_LENGTH;
_field->set_to_max(_zone_map.min_value);
_field->set_to_min(_zone_map.max_value);
_zone_map.has_null = false;

View File

@ -73,7 +73,6 @@ private:
std::unique_ptr<Field> _field;
// memory will be managed by arena
ZoneMap _zone_map;
char* _max_string_value;
Arena _arena;
};

View File

@ -20,6 +20,7 @@
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/bitshuffle_page.h"
#include "olap/rowset/segment_v2/rle_page.h"
#include "olap/rowset/segment_v2/binary_dict_page.h"
namespace doris {
namespace segment_v2 {
@ -67,6 +68,18 @@ struct TypeEncodingTraits<type, RLE> {
}
};
template<FieldType type>
struct TypeEncodingTraits<type, DICT_ENCODING> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
*builder = new BinaryDictPageBuilder(opts);
return Status::OK();
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, PageDecoder** decoder) {
*decoder = new BinaryDictPageDecoder(data, opts);
return Status::OK();
}
};
template<FieldType Type, EncodingTypePB Encoding>
struct EncodingTraits : TypeEncodingTraits<Type, Encoding> {
static const FieldType type = Type;
@ -122,6 +135,10 @@ EncodingInfoResolver::EncodingInfoResolver() {
_add_map<OLAP_FIELD_TYPE_FLOAT, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_DOUBLE, BIT_SHUFFLE>();
_add_map<OLAP_FIELD_TYPE_DOUBLE, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_CHAR, DICT_ENCODING>();
_add_map<OLAP_FIELD_TYPE_CHAR, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_VARCHAR, DICT_ENCODING>();
_add_map<OLAP_FIELD_TYPE_VARCHAR, PLAIN_ENCODING>();
_add_map<OLAP_FIELD_TYPE_BOOL, RLE>();
_add_map<OLAP_FIELD_TYPE_BOOL, BIT_SHUFFLE>();
_add_map<OLAP_FIELD_TYPE_BOOL, PLAIN_ENCODING>();

View File

@ -31,7 +31,6 @@ struct PageBuilderOptions {
};
struct PageDecoderOptions {
BinaryPlainPageDecoder* dict_decoder = nullptr;
};
} // namespace segment_v2

View File

@ -27,6 +27,7 @@ TypeInfo::TypeInfo(TypeTraitsClass t)
_deep_copy(TypeTraitsClass::deep_copy),
_deep_copy_with_arena(TypeTraitsClass::deep_copy_with_arena),
_direct_copy(TypeTraitsClass::direct_copy),
_allocate_value_from_arena(TypeTraitsClass::allocate_value_from_arena),
_from_string(TypeTraitsClass::from_string),
_to_string(TypeTraitsClass::to_string),
_set_to_max(TypeTraitsClass::set_to_max),

View File

@ -64,6 +64,10 @@ public:
_direct_copy(dest, src);
}
inline char* allocate_value_from_arena(Arena* arena) const {
return _allocate_value_from_arena(arena);
}
OLAPStatus from_string(void* buf, const std::string& scan_key) const {
return _from_string(buf, scan_key);
}
@ -85,6 +89,7 @@ private:
void (*_deep_copy)(void* dest, const void* src, MemPool* mem_pool);
void (*_deep_copy_with_arena)(void* dest, const void* src, Arena* arena);
void (*_direct_copy)(void* dest, const void* src);
char* (*_allocate_value_from_arena)(Arena* arena);
OLAPStatus (*_from_string)(void* buf, const std::string& scan_key);
std::string (*_to_string)(const void* src);
@ -213,6 +218,10 @@ struct BaseFieldtypeTraits : public CppTypeTraits<field_type> {
return HashUtil::hash(data, sizeof(CppType), seed);
}
static inline char* allocate_value_from_arena(Arena* arena) {
return arena->Allocate(sizeof(CppType));
}
static std::string to_string(const void* src) {
std::stringstream stream;
stream << *reinterpret_cast<const CppType*>(src);
@ -568,6 +577,13 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_CHAR> : public BaseFieldtypeTraits<OLAP_F
auto slice = reinterpret_cast<const Slice*>(data);
return HashUtil::hash(slice->data, slice->size, seed);
}
static char* allocate_value_from_arena(Arena* arena) {
char* type_value = arena->Allocate(sizeof(Slice));
auto slice = reinterpret_cast<Slice*>(type_value);
slice->size = OLAP_CHAR_MAX_LENGTH;
slice->data = arena->Allocate(OLAP_CHAR_MAX_LENGTH);
return type_value;
}
};
template<>
@ -594,6 +610,13 @@ struct FieldTypeTraits<OLAP_FIELD_TYPE_VARCHAR> : public FieldTypeTraits<OLAP_FI
auto slice = reinterpret_cast<Slice*>(buf);
slice->size = 0;
}
static char* allocate_value_from_arena(Arena* arena) {
char* type_value = arena->Allocate(sizeof(Slice));
auto slice = reinterpret_cast<Slice*>(type_value);
slice->size = OLAP_STRING_MAX_LENGTH;
slice->data = arena->Allocate(OLAP_STRING_MAX_LENGTH);
return type_value;
}
};
template<>

View File

@ -62,8 +62,9 @@ public:
// decode
PageDecoderOptions decoder_options;
decoder_options.dict_decoder = dict_page_decoder.get();
BinaryDictPageDecoder page_decoder(s, decoder_options);
page_decoder.set_dict_decoder(dict_page_decoder.get());
status = page_decoder.init();
ASSERT_TRUE(status.ok());
ASSERT_EQ(slices.size(), page_decoder.count());
@ -154,9 +155,9 @@ public:
// decode
PageDecoderOptions decoder_options;
decoder_options.dict_decoder = dict_page_decoder.get();
BinaryDictPageDecoder page_decoder(results[slice_index], decoder_options);
status = page_decoder.init();
page_decoder.set_dict_decoder(dict_page_decoder.get());
ASSERT_TRUE(status.ok());
//check values

View File

@ -56,7 +56,7 @@ public:
PageDecoderType page_decoder(s, decoder_options);
Status status = page_decoder.init();
ASSERT_TRUE(status.ok());
//test1
size_t size = 3;

View File

@ -606,6 +606,247 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
}
}
void set_column_value_by_type(FieldType fieldType, int src, char* target, Arena* _arena, size_t _length = 0) {
if (fieldType == OLAP_FIELD_TYPE_CHAR) {
char* src_value = &std::to_string(src)[0];
int src_len = strlen(src_value);
auto* dest_slice = (Slice*)target;
dest_slice->size = _length;
dest_slice->data = _arena->Allocate(dest_slice->size);
memcpy(dest_slice->data, src_value, src_len);
memset(dest_slice->data + src_len, 0, dest_slice->size - src_len);
} else if (fieldType == OLAP_FIELD_TYPE_VARCHAR) {
char* src_value = &std::to_string(src)[0];
int src_len = strlen(src_value);
auto* dest_slice = (Slice*)target;
dest_slice->size = src_len;
dest_slice->data = _arena->Allocate(src_len);
std::memcpy(dest_slice->data, src_value, src_len);
} else {
*(int*)target = src;
}
}
TEST_F(SegmentReaderWriterTest, TestStringDict) {
size_t num_rows_per_block = 10;
Arena _arena;
std::shared_ptr<TabletSchema> tablet_schema(new TabletSchema());
tablet_schema->_num_columns = 4;
tablet_schema->_num_key_columns = 3;
tablet_schema->_num_short_key_columns = 2;
tablet_schema->_num_rows_per_row_block = num_rows_per_block;
tablet_schema->_cols.push_back(create_char_key(1));
tablet_schema->_cols.push_back(create_char_key(2));
tablet_schema->_cols.push_back(create_varchar_key(3));
tablet_schema->_cols.push_back(create_varchar_key(4));
// segment write
std::string dname = "./ut_dir/segment_test";
FileUtils::create_dir(dname);
SegmentWriterOptions opts;
opts.num_rows_per_block = num_rows_per_block;
std::string fname = dname + "/string_case";
SegmentWriter writer(fname, 0, tablet_schema.get(), opts);
auto st = writer.init(10);
ASSERT_TRUE(st.ok());
RowCursor row;
auto olap_st = row.init(*tablet_schema);
ASSERT_EQ(OLAP_SUCCESS, olap_st);
// 0, 1, 2, 3
// 10, 11, 12, 13
// 20, 21, 22, 23
// convert int to string
for (int i = 0; i < 4096; ++i) {
for (int j = 0; j < 4; ++j) {
auto cell = row.cell(j);
cell.set_not_null();
set_column_value_by_type(tablet_schema->_cols[j]._type, i * 10 + j, (char*)cell.mutable_cell_ptr(), &_arena, tablet_schema->_cols[j]._length);
}
Status status = writer.append_row(row);
ASSERT_TRUE(status.ok());
}
uint64_t file_size = 0;
st = writer.finalize(&file_size);
ASSERT_TRUE(st.ok());
{
std::shared_ptr<Segment> segment(new Segment(fname, 0, tablet_schema.get()));
st = segment->open();
ASSERT_TRUE(st.ok());
ASSERT_EQ(4096, segment->num_rows());
Schema schema(*tablet_schema);
// scan all rows
{
StorageReadOptions read_opts;
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
RowBlockV2 block(schema, 1024);
int left = 4096;
int rowid = 0;
while (left > 0) {
int rows_read = left > 1024 ? 1024 : left;
block.clear();
st = iter->next_batch(&block);
ASSERT_TRUE(st.ok());
ASSERT_EQ(rows_read, block.num_rows());
left -= rows_read;
for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
auto cid = block.schema()->column_ids()[j];
auto column_block = block.column_block(j);
for (int i = 0; i < rows_read; ++i) {
int rid = rowid + i;
ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
const Slice* actual = reinterpret_cast<const Slice*>(column_block.cell_ptr(i));
Slice expect;
set_column_value_by_type(tablet_schema->_cols[j]._type, rid * 10 + cid, reinterpret_cast<char*>(&expect), &_arena, tablet_schema->_cols[j]._length);
ASSERT_EQ(expect.to_string(), actual->to_string());
}
}
rowid += rows_read;
}
}
// test seek, key
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(*tablet_schema, 1);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, 40970, (char*)cell.mutable_cell_ptr(), &_arena, tablet_schema->_cols[0]._length);
}
StorageReadOptions read_opts;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, nullptr, false);
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
RowBlockV2 block(schema, 100);
st = iter->next_batch(&block);
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(0, block.num_rows());
}
// test seek, key (-2, -1)
{
// lower bound
std::unique_ptr<RowCursor> lower_bound(new RowCursor());
lower_bound->init(*tablet_schema, 1);
{
auto cell = lower_bound->cell(0);
cell.set_not_null();
set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, -2, (char*)cell.mutable_cell_ptr(), &_arena, tablet_schema->_cols[0]._length);
}
std::unique_ptr<RowCursor> upper_bound(new RowCursor());
upper_bound->init(*tablet_schema, 1);
{
auto cell = upper_bound->cell(0);
cell.set_not_null();
set_column_value_by_type(OLAP_FIELD_TYPE_CHAR, -1, (char*)cell.mutable_cell_ptr(), &_arena, tablet_schema->_cols[0]._length);
}
StorageReadOptions read_opts;
read_opts.key_ranges.emplace_back(lower_bound.get(), false, upper_bound.get(), false);
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
RowBlockV2 block(schema, 100);
st = iter->next_batch(&block);
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(0, block.num_rows());
}
// test char zone_map query hit;should read whole page
{
TCondition condition;
condition.__set_column_name("1");
condition.__set_condition_op(">");
std::vector<std::string> vals = {"100"};
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(tablet_schema.get());
conditions->append_condition(condition);
StorageReadOptions read_opts;
read_opts.conditions = conditions.get();
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
RowBlockV2 block(schema, 1024);
int left = 4 * 1024;
int rowid = 0;
while (left > 0) {
int rows_read = left > 1024 ? 1024 : left;
block.clear();
st = iter->next_batch(&block);
ASSERT_TRUE(st.ok());
ASSERT_EQ(rows_read, block.num_rows());
left -= rows_read;
for (int j = 0; j < block.schema()->column_ids().size(); ++j) {
auto cid = block.schema()->column_ids()[j];
auto column_block = block.column_block(j);
for (int i = 0; i < rows_read; ++i) {
int rid = rowid + i;
ASSERT_FALSE(BitmapTest(column_block.null_bitmap(), i));
const Slice* actual = reinterpret_cast<const Slice*>(column_block.cell_ptr(i));
Slice expect;
set_column_value_by_type(tablet_schema->_cols[j]._type, rid * 10 + cid, reinterpret_cast<char*>(&expect), &_arena, tablet_schema->_cols[j]._length);
ASSERT_EQ(expect.to_string(), actual->to_string()) << "rid:" << rid << ", i:" << i;;
}
}
rowid += rows_read;
}
ASSERT_EQ(4 * 1024, rowid);
st = iter->next_batch(&block);
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(0, block.num_rows());
}
// test char zone_map query miss;col < -1
{
TCondition condition;
condition.__set_column_name("1");
condition.__set_condition_op("<");
std::vector<std::string> vals = {"-2"};
condition.__set_condition_values(vals);
std::shared_ptr<Conditions> conditions(new Conditions());
conditions->set_tablet_schema(tablet_schema.get());
conditions->append_condition(condition);
StorageReadOptions read_opts;
read_opts.conditions = conditions.get();
std::unique_ptr<SegmentIterator> iter = segment->new_iterator(schema, read_opts);
RowBlockV2 block(schema, 1024);
st = iter->next_batch(&block);
ASSERT_TRUE(st.is_end_of_file());
ASSERT_EQ(0, block.num_rows());
}
}
FileUtils::remove_all(dname);
}
}
}

View File

@ -55,4 +55,28 @@ TabletColumn create_int_value(
return column;
}
TabletColumn create_char_key(int32_t id, bool is_nullable = true) {
TabletColumn column;
column._unique_id = id;
column._col_name = std::to_string(id);
column._type = OLAP_FIELD_TYPE_CHAR;
column._is_key = true;
column._is_nullable = is_nullable;
column._length = 8;
column._index_length = 1;
return column;
}
TabletColumn create_varchar_key(int32_t id, bool is_nullable = true) {
TabletColumn column;
column._unique_id = id;
column._col_name = std::to_string(id);
column._type = OLAP_FIELD_TYPE_VARCHAR;
column._is_key = true;
column._is_nullable = is_nullable;
column._length = 4;
column._index_length = 4;
return column;
}
}

View File

@ -95,7 +95,7 @@ message ColumnMetaPB {
optional PagePointerPB zone_map_page = 8;
// // dictionary page for DICT_ENCODING
// optional PagePointerPB dict_page = 2;
optional PagePointerPB dict_page = 9;
// // bloom filter pages for bloom filter column
// repeated PagePointerPB bloom_filter_pages = 3;