[segment_v2] Switch to Unified and Extensible Page Format (#2953)
Fixes #2892 IMPORTANT NOTICE: this CL makes incompatible changes to V2 storage format, developers need to create new tables for test. This CL refactors the metadata and page format for segment_v2 in order to * make it easy to extend existing page type * make it easy to add new page type while not sacrificing code reuse * make it possible to use SIMD to speed up page decoding Here we summary the main code changes * Page and index metadata is redesigned, please see `segment_v2.proto` * The new class `PageIO` is the single place for reading and writing all pages. This removes lots of duplicated code. `PageCompressor` and `PageDecompressor` are now useless and removed. * The type of value ordinal is changed from `rowid_t` to 64-bits `ordinal_t`, this affects ordinal index as well. * Column's ordinal index is now implemented by IndexPage, the same with IndexedColumn. * Zone map index is now implemented by IndexedColumn
This commit is contained in:
@ -93,18 +93,18 @@ add_library(Olap STATIC
|
||||
rowset/segment_v2/indexed_column_reader.cpp
|
||||
rowset/segment_v2/indexed_column_writer.cpp
|
||||
rowset/segment_v2/ordinal_page_index.cpp
|
||||
rowset/segment_v2/page_compression.cpp
|
||||
rowset/segment_v2/page_io.cpp
|
||||
rowset/segment_v2/binary_dict_page.cpp
|
||||
rowset/segment_v2/binary_prefix_page.cpp
|
||||
rowset/segment_v2/segment.cpp
|
||||
rowset/segment_v2/segment_iterator.cpp
|
||||
rowset/segment_v2/empty_segment_iterator.cpp
|
||||
rowset/segment_v2/segment_writer.cpp
|
||||
rowset/segment_v2/column_zone_map.cpp
|
||||
rowset/segment_v2/block_split_bloom_filter.cpp
|
||||
rowset/segment_v2/bloom_filter_index_reader.cpp
|
||||
rowset/segment_v2/bloom_filter_index_writer.cpp
|
||||
rowset/segment_v2/bloom_filter.cpp
|
||||
rowset/segment_v2/zone_map_index.cpp
|
||||
task/engine_batch_load_task.cpp
|
||||
task/engine_checksum_task.cpp
|
||||
task/engine_clone_task.cpp
|
||||
|
||||
@ -64,6 +64,7 @@ private:
|
||||
add_mapping<OLAP_FIELD_TYPE_INT>();
|
||||
add_mapping<OLAP_FIELD_TYPE_UNSIGNED_INT>();
|
||||
add_mapping<OLAP_FIELD_TYPE_BIGINT>();
|
||||
add_mapping<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>();
|
||||
add_mapping<OLAP_FIELD_TYPE_LARGEINT>();
|
||||
add_mapping<OLAP_FIELD_TYPE_DATETIME>();
|
||||
|
||||
|
||||
@ -22,15 +22,15 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
Status BitmapIndexReader::load(bool cache_in_memory) {
|
||||
const IndexedColumnMetaPB& dict_meta = _bitmap_index_meta.dict_column();
|
||||
const IndexedColumnMetaPB& bitmap_meta = _bitmap_index_meta.bitmap_column();
|
||||
_has_null = _bitmap_index_meta.has_null();
|
||||
Status BitmapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
const IndexedColumnMetaPB& dict_meta = _bitmap_index_meta->dict_column();
|
||||
const IndexedColumnMetaPB& bitmap_meta = _bitmap_index_meta->bitmap_column();
|
||||
_has_null = _bitmap_index_meta->has_null();
|
||||
|
||||
_dict_column_reader.reset(new IndexedColumnReader(_file_name, dict_meta, cache_in_memory));
|
||||
_bitmap_column_reader.reset(new IndexedColumnReader(_file_name, bitmap_meta, cache_in_memory));
|
||||
RETURN_IF_ERROR(_dict_column_reader->load());
|
||||
RETURN_IF_ERROR(_bitmap_column_reader->load());
|
||||
_dict_column_reader.reset(new IndexedColumnReader(_file_name, dict_meta));
|
||||
_bitmap_column_reader.reset(new IndexedColumnReader(_file_name, bitmap_meta));
|
||||
RETURN_IF_ERROR(_dict_column_reader->load(use_page_cache, kept_in_memory));
|
||||
RETURN_IF_ERROR(_bitmap_column_reader->load(use_page_cache, kept_in_memory));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -41,13 +41,13 @@ class IndexedColumnIterator;
|
||||
class BitmapIndexReader {
|
||||
public:
|
||||
explicit BitmapIndexReader(const std::string& file_name,
|
||||
const BitmapIndexColumnPB& bitmap_index_meta)
|
||||
const BitmapIndexPB* bitmap_index_meta)
|
||||
: _file_name(file_name),
|
||||
_bitmap_index_meta(bitmap_index_meta){
|
||||
_typeinfo = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
|
||||
}
|
||||
|
||||
Status load(bool cache_in_memory);
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
// create a new column iterator. Client should delete returned iterator
|
||||
Status new_iterator(BitmapIndexIterator** iterator);
|
||||
@ -65,7 +65,7 @@ private:
|
||||
|
||||
std::string _file_name;
|
||||
const TypeInfo* _typeinfo;
|
||||
const BitmapIndexColumnPB& _bitmap_index_meta;
|
||||
const BitmapIndexPB* _bitmap_index_meta;
|
||||
bool _has_null = false;
|
||||
std::unique_ptr<IndexedColumnReader> _dict_column_reader;
|
||||
std::unique_ptr<IndexedColumnReader> _bitmap_column_reader;
|
||||
|
||||
@ -100,8 +100,11 @@ public:
|
||||
_rid += count;
|
||||
}
|
||||
|
||||
Status finish(WritableFile* file, BitmapIndexColumnPB* meta) override {
|
||||
meta->set_bitmap_type(BitmapIndexColumnPB::ROARING_BITMAP);
|
||||
Status finish(WritableFile* file, ColumnIndexMetaPB* index_meta) override {
|
||||
index_meta->set_type(BITMAP_INDEX);
|
||||
BitmapIndexPB* meta = index_meta->mutable_bitmap_index();
|
||||
|
||||
meta->set_bitmap_type(BitmapIndexPB::ROARING_BITMAP);
|
||||
meta->set_has_null(!_null_bitmap.isEmpty());
|
||||
|
||||
{ // write dictionary
|
||||
|
||||
@ -42,7 +42,7 @@ public:
|
||||
|
||||
virtual void add_nulls(uint32_t count) = 0;
|
||||
|
||||
virtual Status finish(WritableFile* file, BitmapIndexColumnPB* meta) = 0;
|
||||
virtual Status finish(WritableFile* file, ColumnIndexMetaPB* index_meta) = 0;
|
||||
|
||||
virtual uint64_t size() const = 0;
|
||||
private:
|
||||
|
||||
@ -23,11 +23,11 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
Status BloomFilterIndexReader::load(bool cache_in_memory) {
|
||||
const IndexedColumnMetaPB& bf_index_meta = _bloom_filter_index_meta.bloom_filter();
|
||||
Status BloomFilterIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
const IndexedColumnMetaPB& bf_index_meta = _bloom_filter_index_meta->bloom_filter();
|
||||
|
||||
_bloom_filter_reader.reset(new IndexedColumnReader(_file_name, bf_index_meta, cache_in_memory));
|
||||
RETURN_IF_ERROR(_bloom_filter_reader->load());
|
||||
_bloom_filter_reader.reset(new IndexedColumnReader(_file_name, bf_index_meta));
|
||||
RETURN_IF_ERROR(_bloom_filter_reader->load(use_page_cache, kept_in_memory));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -48,8 +48,8 @@ Status BloomFilterIndexIterator::read_bloom_filter(rowid_t ordinal, std::unique_
|
||||
RETURN_IF_ERROR(_bloom_filter_iter.next_batch(&num_read, &column_block_view));
|
||||
DCHECK(num_to_read == num_read);
|
||||
// construct bloom filter
|
||||
BloomFilter::create(_reader->_bloom_filter_index_meta.algorithm(), bf);
|
||||
RETURN_IF_ERROR((*bf)->init(value.data, value.size, _reader->_bloom_filter_index_meta.hash_strategy()));
|
||||
BloomFilter::create(_reader->_bloom_filter_index_meta->algorithm(), bf);
|
||||
RETURN_IF_ERROR((*bf)->init(value.data, value.size, _reader->_bloom_filter_index_meta->hash_strategy()));
|
||||
_pool->clear();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -44,13 +44,13 @@ class BloomFilter;
|
||||
class BloomFilterIndexReader {
|
||||
public:
|
||||
explicit BloomFilterIndexReader(const std::string& file_name,
|
||||
const BloomFilterIndexPB& bloom_filter_index_meta)
|
||||
const BloomFilterIndexPB* bloom_filter_index_meta)
|
||||
: _file_name(file_name),
|
||||
_bloom_filter_index_meta(bloom_filter_index_meta) {
|
||||
_typeinfo = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
|
||||
}
|
||||
|
||||
Status load(bool cache_in_memory);
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
// create a new column iterator.
|
||||
Status new_iterator(std::unique_ptr<BloomFilterIndexIterator>* iterator);
|
||||
@ -64,7 +64,7 @@ private:
|
||||
|
||||
std::string _file_name;
|
||||
const TypeInfo* _typeinfo;
|
||||
BloomFilterIndexPB _bloom_filter_index_meta;
|
||||
const BloomFilterIndexPB* _bloom_filter_index_meta;
|
||||
std::unique_ptr<IndexedColumnReader> _bloom_filter_reader;
|
||||
};
|
||||
|
||||
|
||||
@ -104,10 +104,12 @@ public:
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status finish(WritableFile* file, BloomFilterIndexPB* meta) override {
|
||||
Status finish(WritableFile* file, ColumnIndexMetaPB* index_meta) override {
|
||||
if (_values.size() > 0) {
|
||||
RETURN_IF_ERROR(flush());
|
||||
}
|
||||
index_meta->set_type(BLOOM_FILTER_INDEX);
|
||||
BloomFilterIndexPB* meta = index_meta->mutable_bloom_filter_index();
|
||||
meta->set_hash_strategy(_bf_options.strategy);
|
||||
meta->set_algorithm(BLOCK_BLOOM_FILTER);
|
||||
|
||||
@ -118,7 +120,7 @@ public:
|
||||
options.write_value_index = false;
|
||||
options.encoding = PLAIN_ENCODING;
|
||||
IndexedColumnWriter bf_writer(options, bf_typeinfo, file);
|
||||
bf_writer.init();
|
||||
RETURN_IF_ERROR(bf_writer.init());
|
||||
for (auto& bf : _bfs) {
|
||||
Slice data(bf->data(), bf->size());
|
||||
bf_writer.add(&data);
|
||||
|
||||
@ -47,7 +47,7 @@ public:
|
||||
|
||||
virtual Status flush() = 0;
|
||||
|
||||
virtual Status finish(WritableFile* file, BloomFilterIndexPB* meta) = 0;
|
||||
virtual Status finish(WritableFile* file, ColumnIndexMetaPB* index_meta) = 0;
|
||||
|
||||
virtual uint64_t size() = 0;
|
||||
private:
|
||||
|
||||
@ -21,19 +21,14 @@
|
||||
#include "env/env.h" // for RandomAccessFile
|
||||
#include "gutil/strings/substitute.h" // for Substitute
|
||||
#include "olap/rowset/segment_v2/encoding_info.h" // for EncodingInfo
|
||||
#include "olap/rowset/segment_v2/page_decoder.h" // for PagePointer
|
||||
#include "olap/rowset/segment_v2/page_handle.h" // for PageHandle
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "olap/rowset/segment_v2/page_pointer.h" // for PagePointer
|
||||
#include "olap/rowset/segment_v2/page_compression.h"
|
||||
#include "olap/rowset/segment_v2/options.h" // for PageDecoderOptions
|
||||
#include "olap/types.h" // for TypeInfo
|
||||
#include "olap/column_block.h" // for ColumnBlockView
|
||||
#include "olap/page_cache.h"
|
||||
#include "util/coding.h" // for get_varint32
|
||||
#include "util/crc32c.h"
|
||||
#include "util/rle_encoding.h" // for RleDecoder
|
||||
#include "util/block_compression.h"
|
||||
#include "util/file_manager.h"
|
||||
#include "olap/rowset/segment_v2/binary_dict_page.h" // for BinaryDictPageDecoder
|
||||
#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
|
||||
|
||||
@ -70,6 +65,31 @@ Status ColumnReader::init() {
|
||||
}
|
||||
RETURN_IF_ERROR(EncodingInfo::get(_type_info, _meta.encoding(), &_encoding_info));
|
||||
RETURN_IF_ERROR(get_block_compression_codec(_meta.compression(), &_compress_codec));
|
||||
|
||||
for (int i = 0; i < _meta.indexes_size(); i++) {
|
||||
auto& index_meta = _meta.indexes(i);
|
||||
switch (index_meta.type()) {
|
||||
case ORDINAL_INDEX:
|
||||
_ordinal_index_meta = &index_meta.ordinal_index();
|
||||
break;
|
||||
case ZONE_MAP_INDEX:
|
||||
_zone_map_index_meta = &index_meta.zone_map_index();
|
||||
break;
|
||||
case BITMAP_INDEX:
|
||||
_bitmap_index_meta = &index_meta.bitmap_index();
|
||||
break;
|
||||
case BLOOM_FILTER_INDEX:
|
||||
_bf_index_meta = &index_meta.bloom_filter_index();
|
||||
break;
|
||||
default:
|
||||
return Status::Corruption(Substitute(
|
||||
"Bad file $0: invalid column index type $1", _file_name, index_meta.type()));
|
||||
}
|
||||
}
|
||||
if (_ordinal_index_meta == nullptr) {
|
||||
return Status::Corruption(Substitute(
|
||||
"Bad file $0: missing ordinal index for column $1", _file_name, _meta.column_id()));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -80,85 +100,23 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) {
|
||||
|
||||
Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
RETURN_IF_ERROR(_bitmap_index_reader->new_iterator(iterator));
|
||||
RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::read_page(const PagePointer& pp, const ColumnIteratorOptions& opts, PageHandle* handle) {
|
||||
OpenedFileHandle<RandomAccessFile> file_handle;
|
||||
RETURN_IF_ERROR(FileManager::instance()->open_file(_file_name, &file_handle));
|
||||
RandomAccessFile* input_file = file_handle.file();
|
||||
return read_page(input_file, pp, opts, handle);
|
||||
}
|
||||
Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const PagePointer& pp,
|
||||
PageHandle* handle, Slice* page_body, PageFooterPB* footer) {
|
||||
iter_opts.sanity_check();
|
||||
PageReadOptions opts;
|
||||
opts.file = iter_opts.file;
|
||||
opts.page_pointer = pp;
|
||||
opts.codec = _compress_codec;
|
||||
opts.stats = iter_opts.stats;
|
||||
opts.verify_checksum = _opts.verify_checksum;
|
||||
opts.use_page_cache = iter_opts.use_page_cache;
|
||||
opts.kept_in_memory = _opts.kept_in_memory;
|
||||
|
||||
Status ColumnReader::read_page(RandomAccessFile* file, const PagePointer& pp,
|
||||
const ColumnIteratorOptions& iter_opts, PageHandle* handle) {
|
||||
iter_opts.stats->total_pages_num++;
|
||||
auto cache = StoragePageCache::instance();
|
||||
PageCacheHandle cache_handle;
|
||||
StoragePageCache::CacheKey cache_key(file->file_name(), pp.offset);
|
||||
if (iter_opts.use_page_cache && cache->lookup(cache_key, &cache_handle)) {
|
||||
// we find page in cache, use it
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
iter_opts.stats->cached_pages_num++;
|
||||
return Status::OK();
|
||||
}
|
||||
// Now we read this from file.
|
||||
size_t page_size = pp.size;
|
||||
if (page_size < sizeof(uint32_t)) {
|
||||
return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size));
|
||||
}
|
||||
|
||||
// Now we use this buffer to store page from storage, if this page is compressed
|
||||
// this buffer will assigned uncompressed page, and origin content will be freed.
|
||||
std::unique_ptr<uint8_t[]> page(new uint8_t[page_size]);
|
||||
Slice page_slice(page.get(), page_size);
|
||||
{
|
||||
SCOPED_RAW_TIMER(&iter_opts.stats->io_ns);
|
||||
RETURN_IF_ERROR(file->read_at(pp.offset, page_slice));
|
||||
iter_opts.stats->compressed_bytes_read += page_size;
|
||||
}
|
||||
|
||||
size_t data_size = page_size - 4;
|
||||
if (_opts.verify_checksum) {
|
||||
uint32_t expect = decode_fixed32_le((uint8_t*)page_slice.data + page_slice.size - 4);
|
||||
uint32_t actual = crc32c::Value(page_slice.data, page_slice.size - 4);
|
||||
if (expect != actual) {
|
||||
return Status::Corruption(
|
||||
Substitute("Page checksum mismatch, actual=$0 vs expect=$1", actual, expect));
|
||||
}
|
||||
}
|
||||
|
||||
// remove page's suffix
|
||||
page_slice.size = data_size;
|
||||
|
||||
if (_compress_codec != nullptr) {
|
||||
PageDecompressor decompressor(page_slice, _compress_codec);
|
||||
|
||||
Slice uncompressed_page;
|
||||
{
|
||||
SCOPED_RAW_TIMER(&iter_opts.stats->decompress_ns);
|
||||
RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
|
||||
}
|
||||
|
||||
// If decompressor create new heap memory for uncompressed data,
|
||||
// assign this uncompressed page to page and page slice
|
||||
if (uncompressed_page.data != page_slice.data) {
|
||||
page.reset((uint8_t*)uncompressed_page.data);
|
||||
}
|
||||
page_slice = uncompressed_page;
|
||||
iter_opts.stats->uncompressed_bytes_read += page_slice.size;
|
||||
}
|
||||
if (iter_opts.use_page_cache) {
|
||||
// insert this into cache and return the cache handle
|
||||
cache->insert(cache_key, page_slice, &cache_handle, _opts.cache_in_memory);
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
} else {
|
||||
*handle = PageHandle(page_slice);
|
||||
}
|
||||
|
||||
page.release();
|
||||
return Status::OK();
|
||||
return PageIO::read_and_decompress_page(opts, handle, page_body, footer);
|
||||
}
|
||||
|
||||
Status ColumnReader::get_row_ranges_by_zone_map(CondColumn* cond_column,
|
||||
@ -173,32 +131,57 @@ Status ColumnReader::get_row_ranges_by_zone_map(CondColumn* cond_column,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool ColumnReader::match_condition(CondColumn* cond) const {
|
||||
if (_zone_map_index_meta == nullptr || cond == nullptr) {
|
||||
return true;
|
||||
}
|
||||
FieldType type = _type_info->type();
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta.length()));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta.length()));
|
||||
return _zone_map_match_condition(
|
||||
_zone_map_index_meta->segment_zone_map(), min_value.get(), max_value.get(), cond);
|
||||
}
|
||||
|
||||
bool ColumnReader::_zone_map_match_condition(const ZoneMapPB& zone_map,
|
||||
WrapperField* min_value_container,
|
||||
WrapperField* max_value_container,
|
||||
CondColumn* cond) const {
|
||||
if (cond == nullptr) {
|
||||
return true;
|
||||
}
|
||||
if (!zone_map.has_not_null() && !zone_map.has_null()) {
|
||||
return false; // no data in this zone
|
||||
}
|
||||
// min value and max value are valid if has_not_null is true
|
||||
if (zone_map.has_not_null()) {
|
||||
min_value_container->from_string(zone_map.min());
|
||||
max_value_container->from_string(zone_map.max());
|
||||
}
|
||||
// for compatible original Cond eval logic
|
||||
// TODO(hkp): optimize OlapCond
|
||||
if (zone_map.has_null()) {
|
||||
// for compatible, if exist null, original logic treat null as min
|
||||
min_value_container->set_null();
|
||||
if (!zone_map.has_not_null()) {
|
||||
// for compatible OlapCond's 'is not null'
|
||||
max_value_container->set_null();
|
||||
}
|
||||
}
|
||||
|
||||
return cond->eval({min_value_container, max_value_container});
|
||||
}
|
||||
|
||||
Status ColumnReader::_get_filtered_pages(CondColumn* cond_column,
|
||||
const std::vector<CondColumn*>& delete_conditions,
|
||||
std::vector<uint32_t>* delete_partial_filtered_pages,
|
||||
std::vector<uint32_t>* page_indexes) {
|
||||
FieldType type = _type_info->type();
|
||||
const std::vector<ZoneMapPB>& zone_maps = _column_zone_map->get_column_zone_map();
|
||||
int32_t page_size = _column_zone_map->num_pages();
|
||||
const std::vector<ZoneMapPB>& zone_maps = _zone_map_index->page_zone_maps();
|
||||
int32_t page_size = _zone_map_index->num_pages();
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta.length()));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta.length()));
|
||||
for (int32_t i = 0; i < page_size; ++i) {
|
||||
// min value and max value are valid if has_not_null is true
|
||||
if (zone_maps[i].has_not_null()) {
|
||||
min_value->from_string(zone_maps[i].min());
|
||||
max_value->from_string(zone_maps[i].max());
|
||||
}
|
||||
// for compatible original Cond eval logic
|
||||
// TODO(hkp): optimize OlapCond
|
||||
if (zone_maps[i].has_null()) {
|
||||
// for compatible, if exist null, original logic treat null as min
|
||||
min_value->set_null();
|
||||
if (!zone_maps[i].has_not_null()) {
|
||||
// for compatible OlapCond's 'is not null'
|
||||
max_value->set_null();
|
||||
}
|
||||
}
|
||||
if (cond_column == nullptr || cond_column->eval({min_value.get(), max_value.get()})) {
|
||||
if (_zone_map_match_condition(zone_maps[i], min_value.get(), max_value.get(), cond_column)) {
|
||||
bool should_read = true;
|
||||
for (auto& col_cond : delete_conditions) {
|
||||
int state = col_cond->del_eval({min_value.get(), max_value.get()});
|
||||
@ -220,8 +203,8 @@ Status ColumnReader::_get_filtered_pages(CondColumn* cond_column,
|
||||
Status ColumnReader::_calculate_row_ranges(const std::vector<uint32_t>& page_indexes, RowRanges* row_ranges) {
|
||||
row_ranges->clear();
|
||||
for (auto i : page_indexes) {
|
||||
rowid_t page_first_id = _ordinal_index->get_first_row_id(i);
|
||||
rowid_t page_last_id = _ordinal_index->get_last_row_id(i);
|
||||
ordinal_t page_first_id = _ordinal_index->get_first_ordinal(i);
|
||||
ordinal_t page_last_id = _ordinal_index->get_last_ordinal(i);
|
||||
RowRanges page_row_ranges(RowRanges::create_single(page_first_id, page_last_id + 1));
|
||||
RowRanges::ranges_union(*row_ranges, page_row_ranges, row_ranges);
|
||||
}
|
||||
@ -232,7 +215,7 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(CondColumn* cond_column, Row
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
RowRanges bf_row_ranges;
|
||||
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
|
||||
RETURN_IF_ERROR(_bloom_filter_index_reader->new_iterator(&bf_iter));
|
||||
RETURN_IF_ERROR(_bloom_filter_index->new_iterator(&bf_iter));
|
||||
size_t range_size = row_ranges->range_size();
|
||||
// get covered page ids
|
||||
std::set<uint32_t> page_ids;
|
||||
@ -242,8 +225,8 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(CondColumn* cond_column, Row
|
||||
int64_t to = row_ranges->get_range_to(i);
|
||||
auto iter = _ordinal_index->seek_at_or_before(from);
|
||||
while (idx < to) {
|
||||
page_ids.insert(iter.cur_idx());
|
||||
idx = iter.cur_page_last_row_id() + 1;
|
||||
page_ids.insert(iter.page_index());
|
||||
idx = iter.last_ordinal() + 1;
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
@ -251,69 +234,40 @@ Status ColumnReader::get_row_ranges_by_bloom_filter(CondColumn* cond_column, Row
|
||||
std::unique_ptr<BloomFilter> bf;
|
||||
RETURN_IF_ERROR(bf_iter->read_bloom_filter(pid, &bf));
|
||||
if (cond_column->eval(bf.get())) {
|
||||
bf_row_ranges.add(RowRange(_ordinal_index->get_first_row_id(pid),
|
||||
_ordinal_index->get_last_row_id(pid) + 1));
|
||||
bf_row_ranges.add(RowRange(_ordinal_index->get_first_ordinal(pid),
|
||||
_ordinal_index->get_last_ordinal(pid) + 1));
|
||||
}
|
||||
}
|
||||
RowRanges::ranges_intersection(*row_ranges, bf_row_ranges, row_ranges);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_ordinal_index() {
|
||||
PagePointer pp = _meta.ordinal_index_page();
|
||||
PageHandle ph;
|
||||
OlapReaderStatistics stats;
|
||||
ColumnIteratorOptions opts;
|
||||
// column index only load once, so we use global config to decide
|
||||
if (!config::disable_storage_page_cache) {
|
||||
opts.use_page_cache = true;
|
||||
}
|
||||
opts.stats = &stats;
|
||||
RETURN_IF_ERROR(read_page(pp, opts, &ph));
|
||||
|
||||
_ordinal_index.reset(new OrdinalPageIndex(ph.data(), _num_rows));
|
||||
RETURN_IF_ERROR(_ordinal_index->load());
|
||||
return Status::OK();
|
||||
Status ColumnReader::_load_ordinal_index(bool use_page_cache, bool kept_in_memory) {
|
||||
DCHECK(_ordinal_index_meta != nullptr);
|
||||
_ordinal_index.reset(new OrdinalIndexReader(_file_name, _ordinal_index_meta, _num_rows));
|
||||
return _ordinal_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_zone_map_index() {
|
||||
if (_meta.has_zone_map_page()) {
|
||||
PagePointer pp = _meta.zone_map_page();
|
||||
PageHandle ph;
|
||||
OlapReaderStatistics stats;
|
||||
ColumnIteratorOptions opts;
|
||||
// column index only load once, so we use global config to decide
|
||||
if (!config::disable_storage_page_cache) {
|
||||
opts.use_page_cache = true;
|
||||
}
|
||||
opts.stats = &stats;
|
||||
RETURN_IF_ERROR(read_page(pp, opts, &ph));
|
||||
_column_zone_map.reset(new ColumnZoneMap(ph.data()));
|
||||
RETURN_IF_ERROR(_column_zone_map->load());
|
||||
} else {
|
||||
_column_zone_map.reset(nullptr);
|
||||
Status ColumnReader::_load_zone_map_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_zone_map_index_meta != nullptr) {
|
||||
_zone_map_index.reset(new ZoneMapIndexReader(_file_name, _zone_map_index_meta));
|
||||
return _zone_map_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_bitmap_index() {
|
||||
if (_meta.has_bitmap_index()) {
|
||||
const BitmapIndexColumnPB& bitmap_index_meta = _meta.bitmap_index();
|
||||
_bitmap_index_reader.reset(new BitmapIndexReader(_file_name, bitmap_index_meta));
|
||||
RETURN_IF_ERROR(_bitmap_index_reader->load(_opts.cache_in_memory));
|
||||
} else {
|
||||
_bitmap_index_reader.reset(nullptr);
|
||||
Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_bitmap_index_meta != nullptr) {
|
||||
_bitmap_index.reset(new BitmapIndexReader(_file_name, _bitmap_index_meta));
|
||||
return _bitmap_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::_load_bloom_filter_index() {
|
||||
if (_meta.has_bloom_filter_index()) {
|
||||
const BloomFilterIndexPB& bloom_filter_index_meta = _meta.bloom_filter_index();
|
||||
_bloom_filter_index_reader.reset(new BloomFilterIndexReader(_file_name, bloom_filter_index_meta));
|
||||
RETURN_IF_ERROR(_bloom_filter_index_reader->load(_opts.cache_in_memory));
|
||||
} else {
|
||||
_bloom_filter_index_reader.reset(nullptr);
|
||||
Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_bf_index_meta != nullptr) {
|
||||
_bloom_filter_index.reset(new BloomFilterIndexReader(_file_name, _bf_index_meta));
|
||||
return _bloom_filter_index->load(use_page_cache, kept_in_memory);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -327,11 +281,11 @@ Status ColumnReader::seek_to_first(OrdinalPageIndexIterator* iter) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnReader::seek_at_or_before(rowid_t rowid, OrdinalPageIndexIterator* iter) {
|
||||
Status ColumnReader::seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter) {
|
||||
RETURN_IF_ERROR(_ensure_index_loaded());
|
||||
*iter = _ordinal_index->seek_at_or_before(rowid);
|
||||
*iter = _ordinal_index->seek_at_or_before(ordinal);
|
||||
if (!iter->valid()) {
|
||||
return Status::NotFound(Substitute("Failed to seek to rowid $0, ", rowid));
|
||||
return Status::NotFound(Substitute("Failed to seek to ordinal $0, ", ordinal));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -343,38 +297,34 @@ FileColumnIterator::~FileColumnIterator() = default;
|
||||
|
||||
Status FileColumnIterator::seek_to_first() {
|
||||
RETURN_IF_ERROR(_reader->seek_to_first(&_page_iter));
|
||||
|
||||
_page.reset(new ParsedPage());
|
||||
RETURN_IF_ERROR(_read_page(_page_iter, _page.get()));
|
||||
RETURN_IF_ERROR(_read_data_page(_page_iter));
|
||||
|
||||
_seek_to_pos_in_page(_page.get(), 0);
|
||||
_current_rowid = 0;
|
||||
|
||||
_current_ordinal = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status FileColumnIterator::seek_to_ordinal(rowid_t rid) {
|
||||
Status FileColumnIterator::seek_to_ordinal(ordinal_t ord) {
|
||||
// if current page contains this row, we don't need to seek
|
||||
if (_page == nullptr || !_page->contains(rid)) {
|
||||
RETURN_IF_ERROR(_reader->seek_at_or_before(rid, &_page_iter));
|
||||
_page.reset(new ParsedPage());
|
||||
RETURN_IF_ERROR(_read_page(_page_iter, _page.get()));
|
||||
if (_page == nullptr || !_page->contains(ord)) {
|
||||
RETURN_IF_ERROR(_reader->seek_at_or_before(ord, &_page_iter));
|
||||
RETURN_IF_ERROR(_read_data_page(_page_iter));
|
||||
}
|
||||
_seek_to_pos_in_page(_page.get(), rid - _page->first_rowid);
|
||||
_current_rowid = rid;
|
||||
_seek_to_pos_in_page(_page.get(), ord - _page->first_ordinal);
|
||||
_current_ordinal = ord;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void FileColumnIterator::_seek_to_pos_in_page(ParsedPage* page, uint32_t offset_in_page) {
|
||||
void FileColumnIterator::_seek_to_pos_in_page(ParsedPage* page, ordinal_t offset_in_page) {
|
||||
if (page->offset_in_page == offset_in_page) {
|
||||
// fast path, do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t pos_in_data = offset_in_page;
|
||||
if (_reader->is_nullable()) {
|
||||
rowid_t offset_in_data = 0;
|
||||
rowid_t skips = offset_in_page;
|
||||
ordinal_t pos_in_data = offset_in_page;
|
||||
if (_page->has_null) {
|
||||
ordinal_t offset_in_data = 0;
|
||||
ordinal_t skips = offset_in_page;
|
||||
|
||||
if (offset_in_page > page->offset_in_page) {
|
||||
// forward, reuse null bitmap
|
||||
@ -415,8 +365,8 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst) {
|
||||
// number of rows to be read from this page
|
||||
size_t nrows_in_page = std::min(remaining, _page->remaining());
|
||||
size_t nrows_to_read = nrows_in_page;
|
||||
if (_reader->is_nullable()) {
|
||||
// when this column is nullable we read data in some runs
|
||||
if (_page->has_null) {
|
||||
// when this page contains NULLs we read data in some runs
|
||||
// first we read null bits in the same value, if this is null, we
|
||||
// don't need to read value from page.
|
||||
// If this is not null, we read data from page in batch.
|
||||
@ -438,7 +388,7 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst) {
|
||||
nrows_to_read -= this_run;
|
||||
_page->offset_in_page += this_run;
|
||||
dst->advance(this_run);
|
||||
_current_rowid += this_run;
|
||||
_current_ordinal += this_run;
|
||||
}
|
||||
} else {
|
||||
RETURN_IF_ERROR(_page->data_decoder->next_batch(&nrows_to_read, dst));
|
||||
@ -450,7 +400,7 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst) {
|
||||
|
||||
_page->offset_in_page += nrows_to_read;
|
||||
dst->advance(nrows_to_read);
|
||||
_current_rowid += nrows_to_read;
|
||||
_current_ordinal += nrows_to_read;
|
||||
}
|
||||
remaining -= nrows_in_page;
|
||||
}
|
||||
@ -467,70 +417,46 @@ Status FileColumnIterator::_load_next_page(bool* eos) {
|
||||
*eos = true;
|
||||
return Status::OK();
|
||||
}
|
||||
_page.reset(new ParsedPage());
|
||||
RETURN_IF_ERROR(_read_page(_page_iter, _page.get()));
|
||||
|
||||
RETURN_IF_ERROR(_read_data_page(_page_iter));
|
||||
_seek_to_pos_in_page(_page.get(), 0);
|
||||
*eos = false;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// read one page from file and parse this page to make
|
||||
// it ready to read
|
||||
Status FileColumnIterator::_read_page(const OrdinalPageIndexIterator& iter, ParsedPage* page) {
|
||||
page->page_pointer = iter.page();
|
||||
RETURN_IF_ERROR(_reader->read_page(_file, page->page_pointer, _opts, &page->page_handle));
|
||||
// TODO(zc): read page from file
|
||||
Slice data = page->page_handle.data();
|
||||
Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter) {
|
||||
PageHandle handle;
|
||||
Slice page_body;
|
||||
PageFooterPB footer;
|
||||
RETURN_IF_ERROR(_reader->read_page(_opts, iter.page(), &handle, &page_body, &footer));
|
||||
// parse data page
|
||||
RETURN_IF_ERROR(ParsedPage::create(
|
||||
std::move(handle), page_body, footer.data_page_footer(), _reader->encoding_info(),
|
||||
iter.page(), iter.page_index(), &_page));
|
||||
|
||||
// decode first rowid
|
||||
if (!get_varint32(&data, &page->first_rowid)) {
|
||||
return Status::Corruption("Bad page, failed to decode first rowid");
|
||||
}
|
||||
// decode number rows
|
||||
if (!get_varint32(&data, &page->num_rows)) {
|
||||
return Status::Corruption("Bad page, failed to decode rows count");
|
||||
}
|
||||
if (_reader->is_nullable()) {
|
||||
uint32_t null_bitmap_size = 0;
|
||||
if (!get_varint32(&data, &null_bitmap_size)) {
|
||||
return Status::Corruption("Bad page, failed to decode null bitmap size");
|
||||
}
|
||||
if (null_bitmap_size > data.size) {
|
||||
return Status::Corruption(
|
||||
Substitute("Bad page, null bitmap too large $0 vs $1", null_bitmap_size, data.size));
|
||||
}
|
||||
page->null_decoder = RleDecoder<bool>((uint8_t*)data.data, null_bitmap_size, 1);
|
||||
page->null_bitmap = Slice(data.data, null_bitmap_size);
|
||||
|
||||
// remove null bitmap
|
||||
data.remove_prefix(null_bitmap_size);
|
||||
}
|
||||
|
||||
// create page data decoder
|
||||
PageDecoderOptions options;
|
||||
RETURN_IF_ERROR(_reader->encoding_info()->create_page_decoder(data, options, &page->data_decoder));
|
||||
RETURN_IF_ERROR(page->data_decoder->init());
|
||||
|
||||
// lazy init dict_encoding'dict for three reasons
|
||||
// 1. a column use dictionary encoding still has non-dict-encoded data pages are seeked,load dict when necessary
|
||||
// 2. ColumnReader which is owned by Segment and Rowset can being alive even when there is no query,it should retain memory as small as possible.
|
||||
// 3. Iterators of the same column won't repeat load the dict page because of page cache.
|
||||
// dictionary page is read when the first data page that uses it is read,
|
||||
// this is to optimize the memory usage: when there is no query on one column, we could
|
||||
// release the memory of dictionary page.
|
||||
// note that concurrent iterators for the same column won't repeatedly read dictionary page
|
||||
// because of page cache.
|
||||
if (_reader->encoding_info()->encoding() == DICT_ENCODING) {
|
||||
BinaryDictPageDecoder* binary_dict_page_decoder = (BinaryDictPageDecoder*)page->data_decoder;
|
||||
if (binary_dict_page_decoder->is_dict_encoding()) {
|
||||
auto dict_page_decoder = reinterpret_cast<BinaryDictPageDecoder*>(_page->data_decoder);
|
||||
if (dict_page_decoder->is_dict_encoding()) {
|
||||
if (_dict_decoder == nullptr) {
|
||||
PagePointer pp = _reader->get_dict_page_pointer();
|
||||
RETURN_IF_ERROR(_reader->read_page(_file, pp, _opts, &_dict_page_handle));
|
||||
|
||||
_dict_decoder.reset(new BinaryPlainPageDecoder(_dict_page_handle.data()));
|
||||
// read dictionary page
|
||||
Slice dict_data;
|
||||
PageFooterPB dict_footer;
|
||||
RETURN_IF_ERROR(_reader->read_page(
|
||||
_opts, _reader->get_dict_page_pointer(),
|
||||
&_dict_page_handle, &dict_data, &dict_footer));
|
||||
// ignore dict_footer.dict_page_footer().encoding() due to only
|
||||
// PLAIN_ENCODING is supported for dict page right now
|
||||
_dict_decoder.reset(new BinaryPlainPageDecoder(dict_data));
|
||||
RETURN_IF_ERROR(_dict_decoder->init());
|
||||
}
|
||||
binary_dict_page_decoder->set_dict_decoder(_dict_decoder.get());
|
||||
dict_page_decoder->set_dict_decoder(_dict_decoder.get());
|
||||
}
|
||||
}
|
||||
|
||||
page->offset_in_page = 0;
|
||||
page->page_index = iter.cur_idx();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -21,17 +21,18 @@
|
||||
#include <cstddef> // for size_t
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "common/status.h" // for Status
|
||||
#include "gen_cpp/segment_v2.pb.h" // for ColumnMetaPB
|
||||
#include "olap/olap_cond.h" // for CondColumn
|
||||
#include "olap/tablet_schema.h"
|
||||
#include "olap/rowset/segment_v2/bitmap_index_reader.h" // for BitmapIndexReader
|
||||
#include "olap/rowset/segment_v2/common.h" // for rowid_t
|
||||
#include "olap/rowset/segment_v2/common.h"
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexIterator
|
||||
#include "olap/rowset/segment_v2/column_zone_map.h" // for ColumnZoneMap
|
||||
#include "olap/rowset/segment_v2/row_ranges.h" // for RowRanges
|
||||
#include "olap/rowset/segment_v2/page_handle.h" // for PageHandle
|
||||
#include "olap/rowset/segment_v2/parsed_page.h" // for ParsedPage
|
||||
#include "olap/rowset/segment_v2/zone_map_index.h"
|
||||
#include "util/once.h"
|
||||
#include "util/file_cache.h"
|
||||
|
||||
@ -41,12 +42,13 @@ class ColumnBlock;
|
||||
class RandomAccessFile;
|
||||
class TypeInfo;
|
||||
class BlockCompressionCodec;
|
||||
class WrapperField;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
class EncodingInfo;
|
||||
class PageHandle;
|
||||
class PagePointer;
|
||||
struct PagePointer;
|
||||
class ColumnIterator;
|
||||
class BloomFilterIndexReader;
|
||||
|
||||
@ -54,14 +56,19 @@ struct ColumnReaderOptions {
|
||||
// whether verify checksum when read page
|
||||
bool verify_checksum = true;
|
||||
// for in memory olap table, use DURABLE CachePriority in page cache
|
||||
bool cache_in_memory = false;
|
||||
bool kept_in_memory = false;
|
||||
};
|
||||
|
||||
struct ColumnIteratorOptions {
|
||||
RandomAccessFile* file = nullptr;
|
||||
// reader statistics
|
||||
OlapReaderStatistics* stats = nullptr;
|
||||
RandomAccessFile* file = nullptr;
|
||||
bool use_page_cache = false;
|
||||
|
||||
void sanity_check() const {
|
||||
CHECK_NOTNULL(file);
|
||||
CHECK_NOTNULL(stats);
|
||||
}
|
||||
};
|
||||
|
||||
// There will be concurrent users to read the same column. So
|
||||
@ -87,31 +94,25 @@ public:
|
||||
|
||||
// Seek to the first entry in the column.
|
||||
Status seek_to_first(OrdinalPageIndexIterator* iter);
|
||||
Status seek_at_or_before(rowid_t rowid, OrdinalPageIndexIterator* iter);
|
||||
Status seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter);
|
||||
|
||||
// read a page from file into a page handle
|
||||
// use reader owned _file(usually is Descriptor<RandomAccessFile>*) to read page
|
||||
Status read_page(const PagePointer& pp, const ColumnIteratorOptions& opts, PageHandle* handle);
|
||||
|
||||
// read a page from file into a page handle
|
||||
// use file(usually is RandomAccessFile*) to read page
|
||||
Status read_page(RandomAccessFile* file, const PagePointer& pp, const ColumnIteratorOptions& opts, PageHandle* handle);
|
||||
Status read_page(const ColumnIteratorOptions& iter_opts, const PagePointer& pp,
|
||||
PageHandle* handle, Slice* page_body, PageFooterPB* footer);
|
||||
|
||||
bool is_nullable() const { return _meta.is_nullable(); }
|
||||
|
||||
const EncodingInfo* encoding_info() const { return _encoding_info; }
|
||||
|
||||
const TypeInfo* type_info() const { return _type_info; }
|
||||
|
||||
bool has_zone_map() const { return _meta.has_zone_map_page(); }
|
||||
bool has_zone_map() const { return _zone_map_index_meta != nullptr; }
|
||||
bool has_bitmap_index() const { return _bitmap_index_meta != nullptr; }
|
||||
bool has_bloom_filter_index() const { return _bf_index_meta != nullptr; }
|
||||
|
||||
bool has_bitmap_index() {
|
||||
return _meta.has_bitmap_index();
|
||||
}
|
||||
|
||||
bool has_bloom_filter_index() {
|
||||
return _meta.has_bloom_filter_index();
|
||||
}
|
||||
// Check if this column could match `cond' using segment zone map.
|
||||
// Since segment zone map is stored in metadata, this function is fast without I/O.
|
||||
// Return true if segment zone map is absent or `cond' could be satisfied, false otherwise.
|
||||
bool match_condition(CondColumn* cond) const;
|
||||
|
||||
// get row ranges with zone map
|
||||
// - cond_column is user's query predicate
|
||||
@ -137,18 +138,24 @@ private:
|
||||
// May be called multiple times, subsequent calls will no op.
|
||||
Status _ensure_index_loaded() {
|
||||
return _load_index_once.call([this] {
|
||||
RETURN_IF_ERROR(_load_zone_map_index());
|
||||
RETURN_IF_ERROR(_load_ordinal_index());
|
||||
RETURN_IF_ERROR(_load_bitmap_index());
|
||||
RETURN_IF_ERROR(_load_bloom_filter_index());
|
||||
bool use_page_cache = !config::disable_storage_page_cache;
|
||||
RETURN_IF_ERROR(_load_zone_map_index(use_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_ordinal_index(use_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_bitmap_index(use_page_cache, _opts.kept_in_memory));
|
||||
RETURN_IF_ERROR(_load_bloom_filter_index(use_page_cache, _opts.kept_in_memory));
|
||||
return Status::OK();
|
||||
});
|
||||
}
|
||||
|
||||
Status _load_zone_map_index();
|
||||
Status _load_ordinal_index();
|
||||
Status _load_bitmap_index();
|
||||
Status _load_bloom_filter_index();
|
||||
Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory);
|
||||
Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory);
|
||||
Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory);
|
||||
Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
bool _zone_map_match_condition(const ZoneMapPB& zone_map,
|
||||
WrapperField* min_value_container,
|
||||
WrapperField* max_value_container,
|
||||
CondColumn* cond) const;
|
||||
|
||||
Status _get_filtered_pages(CondColumn* cond_column,
|
||||
const std::vector<CondColumn*>& delete_conditions,
|
||||
@ -167,12 +174,17 @@ private:
|
||||
const TypeInfo* _type_info = nullptr;
|
||||
const EncodingInfo* _encoding_info = nullptr;
|
||||
const BlockCompressionCodec* _compress_codec = nullptr;
|
||||
// meta for various column indexes (null if the index is absent)
|
||||
const ZoneMapIndexPB* _zone_map_index_meta = nullptr;
|
||||
const OrdinalIndexPB* _ordinal_index_meta = nullptr;
|
||||
const BitmapIndexPB* _bitmap_index_meta = nullptr;
|
||||
const BloomFilterIndexPB* _bf_index_meta = nullptr;
|
||||
|
||||
DorisCallOnce<Status> _load_index_once;
|
||||
std::unique_ptr<ColumnZoneMap> _column_zone_map;
|
||||
std::unique_ptr<OrdinalPageIndex> _ordinal_index;
|
||||
std::unique_ptr<BitmapIndexReader> _bitmap_index_reader;
|
||||
std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index_reader;
|
||||
std::unique_ptr<ZoneMapIndexReader> _zone_map_index;
|
||||
std::unique_ptr<OrdinalIndexReader> _ordinal_index;
|
||||
std::unique_ptr<BitmapIndexReader> _bitmap_index;
|
||||
std::unique_ptr<BloomFilterIndexReader> _bloom_filter_index;
|
||||
};
|
||||
|
||||
// Base iterator to read one column data
|
||||
@ -193,14 +205,14 @@ public:
|
||||
// Entry 0 is the first entry written to the column.
|
||||
// If provided seek point is past the end of the file,
|
||||
// then returns false.
|
||||
virtual Status seek_to_ordinal(rowid_t ord_idx) = 0;
|
||||
virtual Status seek_to_ordinal(ordinal_t ord) = 0;
|
||||
|
||||
// After one seek, we can call this function many times to read data
|
||||
// into ColumnBlockView. when read string type data, memory will allocated
|
||||
// from MemPool
|
||||
virtual Status next_batch(size_t* n, ColumnBlockView* dst) = 0;
|
||||
|
||||
virtual rowid_t get_current_ordinal() const = 0;
|
||||
virtual ordinal_t get_current_ordinal() const = 0;
|
||||
|
||||
virtual Status get_row_ranges_by_zone_map(CondColumn* cond_column,
|
||||
const std::vector<CondColumn*>& delete_conditions,
|
||||
@ -238,20 +250,13 @@ public:
|
||||
FileColumnIterator(ColumnReader* reader);
|
||||
~FileColumnIterator() override;
|
||||
|
||||
Status init(const ColumnIteratorOptions& opts) override {
|
||||
RETURN_IF_ERROR(ColumnIterator::init(opts));
|
||||
DCHECK(_opts.file != nullptr);
|
||||
_file = _opts.file;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status seek_to_first() override;
|
||||
|
||||
Status seek_to_ordinal(rowid_t ord_idx) override;
|
||||
Status seek_to_ordinal(ordinal_t ord) override;
|
||||
|
||||
Status next_batch(size_t* n, ColumnBlockView* dst) override;
|
||||
|
||||
rowid_t get_current_ordinal() const override { return _current_rowid; }
|
||||
ordinal_t get_current_ordinal() const override { return _current_ordinal; }
|
||||
|
||||
// get row ranges by zone map
|
||||
// - cond_column is user's query predicate
|
||||
@ -263,9 +268,9 @@ public:
|
||||
Status get_row_ranges_by_bloom_filter(CondColumn* cond_column, RowRanges* row_ranges) override;
|
||||
|
||||
private:
|
||||
void _seek_to_pos_in_page(ParsedPage* page, uint32_t offset_in_page);
|
||||
void _seek_to_pos_in_page(ParsedPage* page, ordinal_t offset_in_page);
|
||||
Status _load_next_page(bool* eos);
|
||||
Status _read_page(const OrdinalPageIndexIterator& iter, ParsedPage* page);
|
||||
Status _read_data_page(const OrdinalPageIndexIterator& iter);
|
||||
|
||||
private:
|
||||
ColumnReader* _reader;
|
||||
@ -286,13 +291,11 @@ private:
|
||||
// This value will be reset when a new seek is issued
|
||||
OrdinalPageIndexIterator _page_iter;
|
||||
|
||||
// current rowid
|
||||
rowid_t _current_rowid = 0;
|
||||
// current value ordinal
|
||||
ordinal_t _current_ordinal = 0;
|
||||
|
||||
// page indexes those are DEL_PARTIAL_SATISFIED
|
||||
std::vector<uint32_t> _delete_partial_statisfied_pages;
|
||||
|
||||
RandomAccessFile* _file;
|
||||
};
|
||||
|
||||
// This iterator is used to read default value column
|
||||
@ -315,14 +318,14 @@ public:
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status seek_to_ordinal(rowid_t ord_idx) override {
|
||||
Status seek_to_ordinal(ordinal_t ord_idx) override {
|
||||
_current_rowid = ord_idx;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status next_batch(size_t* n, ColumnBlockView* dst) override;
|
||||
|
||||
rowid_t get_current_ordinal() const override { return _current_rowid; }
|
||||
ordinal_t get_current_ordinal() const override { return _current_rowid; }
|
||||
|
||||
private:
|
||||
bool _has_default_value;
|
||||
@ -337,7 +340,7 @@ private:
|
||||
std::unique_ptr<MemPool> _pool;
|
||||
|
||||
// current rowid
|
||||
rowid_t _current_rowid = 0;
|
||||
ordinal_t _current_rowid = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -19,22 +19,21 @@
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/logging.h" // for LOG
|
||||
#include "env/env.h" // for LOG
|
||||
#include "gutil/strings/substitute.h" // for Substitute
|
||||
#include "common/logging.h"
|
||||
#include "env/env.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "olap/rowset/segment_v2/bitmap_index_writer.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h" // for EncodingInfo
|
||||
#include "olap/rowset/segment_v2/options.h" // for PageBuilderOptions
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexBuilder
|
||||
#include "olap/rowset/segment_v2/page_builder.h" // for PageBuilder
|
||||
#include "olap/rowset/segment_v2/page_compression.h"
|
||||
#include "olap/rowset/segment_v2/bloom_filter_index_writer.h"
|
||||
#include "olap/rowset/segment_v2/bloom_filter.h"
|
||||
#include "olap/types.h" // for TypeInfo
|
||||
#include "util/crc32c.h"
|
||||
#include "util/faststring.h" // for fastring
|
||||
#include "util/rle_encoding.h" // for RleEncoder
|
||||
#include "olap/rowset/segment_v2/bloom_filter_index_writer.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
#include "olap/rowset/segment_v2/options.h"
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h"
|
||||
#include "olap/rowset/segment_v2/page_builder.h"
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "olap/rowset/segment_v2/zone_map_index.h"
|
||||
#include "util/block_compression.h"
|
||||
#include "util/faststring.h"
|
||||
#include "util/rle_encoding.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
@ -43,23 +42,28 @@ using strings::Substitute;
|
||||
|
||||
class NullBitmapBuilder {
|
||||
public:
|
||||
NullBitmapBuilder() : _bitmap_buf(512), _rle_encoder(&_bitmap_buf, 1) {
|
||||
NullBitmapBuilder() : _has_null(false), _bitmap_buf(512), _rle_encoder(&_bitmap_buf, 1) {
|
||||
}
|
||||
|
||||
explicit NullBitmapBuilder(size_t reserve_bits)
|
||||
: _bitmap_buf(BitmapSize(reserve_bits)), _rle_encoder(&_bitmap_buf, 1) {
|
||||
: _has_null(false), _bitmap_buf(BitmapSize(reserve_bits)), _rle_encoder(&_bitmap_buf, 1) {
|
||||
}
|
||||
|
||||
void add_run(bool value, size_t run) {
|
||||
_has_null |= value;
|
||||
_rle_encoder.Put(value, run);
|
||||
}
|
||||
|
||||
// Returns whether the building nullmap contains NULL
|
||||
bool has_null() const { return _has_null; }
|
||||
|
||||
OwnedSlice finish() {
|
||||
_rle_encoder.Flush();
|
||||
return _bitmap_buf.build();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
_has_null = false;
|
||||
_rle_encoder.Clear();
|
||||
}
|
||||
|
||||
@ -67,19 +71,27 @@ public:
|
||||
return _bitmap_buf.size();
|
||||
}
|
||||
private:
|
||||
bool _has_null;
|
||||
faststring _bitmap_buf;
|
||||
RleEncoder<bool> _rle_encoder;
|
||||
};
|
||||
|
||||
ColumnWriter::ColumnWriter(const ColumnWriterOptions& opts,
|
||||
std::unique_ptr<Field> field,
|
||||
bool is_nullable,
|
||||
WritableFile* output_file)
|
||||
: _opts(opts),
|
||||
_is_nullable(is_nullable),
|
||||
_output_file(output_file),
|
||||
WritableFile* output_file) :
|
||||
_opts(opts),
|
||||
_field(std::move(field)),
|
||||
_output_file(output_file),
|
||||
_is_nullable(_opts.meta->is_nullable()),
|
||||
_data_size(0) {
|
||||
// these opts.meta fields should be set by client
|
||||
DCHECK(opts.meta->has_column_id());
|
||||
DCHECK(opts.meta->has_unique_id());
|
||||
DCHECK(opts.meta->has_type());
|
||||
DCHECK(opts.meta->has_length());
|
||||
DCHECK(opts.meta->has_encoding());
|
||||
DCHECK(opts.meta->has_compression());
|
||||
DCHECK(opts.meta->has_is_nullable());
|
||||
}
|
||||
|
||||
ColumnWriter::~ColumnWriter() {
|
||||
@ -93,10 +105,13 @@ ColumnWriter::~ColumnWriter() {
|
||||
}
|
||||
|
||||
Status ColumnWriter::init() {
|
||||
RETURN_IF_ERROR(EncodingInfo::get(_field->type_info(), _opts.encoding_type, &_encoding_info));
|
||||
if (_opts.compression_type != NO_COMPRESSION) {
|
||||
RETURN_IF_ERROR(get_block_compression_codec(_opts.compression_type, &_compress_codec));
|
||||
}
|
||||
RETURN_IF_ERROR(EncodingInfo::get(_field->type_info(), _opts.meta->encoding(), &_encoding_info));
|
||||
_opts.meta->set_encoding(_encoding_info->encoding());
|
||||
// should store more concrete encoding type instead of DEFAULT_ENCODING
|
||||
// because the default encoding of a data type can be changed in the future
|
||||
DCHECK_NE(_opts.meta->encoding(), DEFAULT_ENCODING);
|
||||
|
||||
RETURN_IF_ERROR(get_block_compression_codec(_opts.meta->compression(), &_compress_codec));
|
||||
|
||||
// create page builder
|
||||
PageBuilder* page_builder = nullptr;
|
||||
@ -106,17 +121,17 @@ Status ColumnWriter::init() {
|
||||
if (page_builder == nullptr) {
|
||||
return Status::NotSupported(
|
||||
Substitute("Failed to create page builder for type $0 and encoding $1",
|
||||
_field->type(), _opts.encoding_type));
|
||||
_field->type(), _opts.meta->encoding()));
|
||||
}
|
||||
_page_builder.reset(page_builder);
|
||||
// create ordinal builder
|
||||
_ordinal_index_builder.reset(new OrdinalPageIndexBuilder());
|
||||
_ordinal_index_builder.reset(new OrdinalIndexWriter());
|
||||
// create null bitmap builder
|
||||
if (_is_nullable) {
|
||||
_null_bitmap_builder.reset(new NullBitmapBuilder());
|
||||
}
|
||||
if (_opts.need_zone_map) {
|
||||
_column_zone_map_builder.reset(new ColumnZoneMapBuilder(_field.get()));
|
||||
_zone_map_index_builder.reset(new ZoneMapIndexWriter(_field.get()));
|
||||
}
|
||||
if (_opts.need_bitmap_index) {
|
||||
RETURN_IF_ERROR(BitmapIndexWriter::create(_field->type_info(), &_bitmap_index_builder));
|
||||
@ -132,7 +147,7 @@ Status ColumnWriter::append_nulls(size_t num_rows) {
|
||||
_null_bitmap_builder->add_run(true, num_rows);
|
||||
_next_rowid += num_rows;
|
||||
if (_opts.need_zone_map) {
|
||||
RETURN_IF_ERROR(_column_zone_map_builder->add(nullptr, 1));
|
||||
_zone_map_index_builder->add_nulls(num_rows);
|
||||
}
|
||||
if (_opts.need_bitmap_index) {
|
||||
_bitmap_index_builder->add_nulls(num_rows);
|
||||
@ -156,7 +171,7 @@ Status ColumnWriter::_append_data(const uint8_t** ptr, size_t num_rows) {
|
||||
size_t num_written = remaining;
|
||||
RETURN_IF_ERROR(_page_builder->add(*ptr, &num_written));
|
||||
if (_opts.need_zone_map) {
|
||||
RETURN_IF_ERROR(_column_zone_map_builder->add(*ptr, num_written));
|
||||
_zone_map_index_builder->add_values(*ptr, num_written);
|
||||
}
|
||||
if (_opts.need_bitmap_index) {
|
||||
_bitmap_index_builder->add_values(*ptr, num_written);
|
||||
@ -193,7 +208,7 @@ Status ColumnWriter::append_nullable(
|
||||
_null_bitmap_builder->add_run(true, this_run);
|
||||
_next_rowid += this_run;
|
||||
if (_opts.need_zone_map) {
|
||||
RETURN_IF_ERROR(_column_zone_map_builder->add(nullptr, 1));
|
||||
_zone_map_index_builder->add_nulls(this_run);
|
||||
}
|
||||
if (_opts.need_bitmap_index) {
|
||||
_bitmap_index_builder->add_nulls(this_run);
|
||||
@ -216,7 +231,7 @@ uint64_t ColumnWriter::estimate_buffer_size() {
|
||||
}
|
||||
size += _ordinal_index_builder->size();
|
||||
if (_opts.need_zone_map) {
|
||||
size += _column_zone_map_builder->size();
|
||||
size += _zone_map_index_builder->size();
|
||||
}
|
||||
if (_opts.need_bitmap_index) {
|
||||
size += _bitmap_index_builder->size();
|
||||
@ -239,189 +254,110 @@ Status ColumnWriter::write_data() {
|
||||
}
|
||||
// write column dict
|
||||
if (_encoding_info->encoding() == DICT_ENCODING) {
|
||||
OwnedSlice dict_page;
|
||||
_page_builder->get_dictionary_page(&dict_page);
|
||||
std::vector<Slice> origin_data;
|
||||
origin_data.push_back(dict_page.slice());
|
||||
RETURN_IF_ERROR(_compress_and_write_page(&origin_data, &_dict_page_pp));
|
||||
OwnedSlice dict_body;
|
||||
RETURN_IF_ERROR(_page_builder->get_dictionary_page(&dict_body));
|
||||
|
||||
PageFooterPB footer;
|
||||
footer.set_type(DICTIONARY_PAGE);
|
||||
footer.set_uncompressed_size(dict_body.slice().get_size());
|
||||
footer.mutable_dict_page_footer()->set_encoding(PLAIN_ENCODING);
|
||||
|
||||
PagePointer dict_pp;
|
||||
RETURN_IF_ERROR(PageIO::compress_and_write_page(
|
||||
_compress_codec, _opts.compression_min_space_saving, _output_file,
|
||||
{ dict_body.slice() }, footer, &dict_pp));
|
||||
dict_pp.to_proto(_opts.meta->mutable_dict_page());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnWriter::write_ordinal_index() {
|
||||
Slice data = _ordinal_index_builder->finish();
|
||||
std::vector<Slice> slices{data};
|
||||
auto st = _compress_and_write_page(&slices, &_ordinal_index_pp);
|
||||
return st;
|
||||
return _ordinal_index_builder->finish(_output_file, _opts.meta->add_indexes());
|
||||
}
|
||||
|
||||
Status ColumnWriter::write_zone_map() {
|
||||
if (_opts.need_zone_map) {
|
||||
OwnedSlice data = _column_zone_map_builder->finish();
|
||||
std::vector<Slice> slices{data.slice()};
|
||||
RETURN_IF_ERROR(_compress_and_write_page(&slices, &_zone_map_pp));
|
||||
return _zone_map_index_builder->finish(_output_file, _opts.meta->add_indexes());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnWriter::write_bitmap_index() {
|
||||
if (!_opts.need_bitmap_index) {
|
||||
return Status::OK();
|
||||
if (_opts.need_bitmap_index) {
|
||||
return _bitmap_index_builder->finish(_output_file, _opts.meta->add_indexes());
|
||||
}
|
||||
return _bitmap_index_builder->finish(_output_file, &_bitmap_index_meta);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnWriter::write_bloom_filter_index() {
|
||||
if (!_opts.need_bloom_filter) {
|
||||
return Status::OK();
|
||||
}
|
||||
return _bloom_filter_index_builder->finish(_output_file, &_bloom_filter_index_meta);
|
||||
}
|
||||
|
||||
void ColumnWriter::write_meta(ColumnMetaPB* meta) {
|
||||
meta->set_type(_field->type());
|
||||
meta->set_encoding(_encoding_info->encoding());
|
||||
// should store more concrete encoding type instead of DEFAULT_ENCODING
|
||||
// because the default encoding of a data type can be changed in the future
|
||||
DCHECK_NE(meta->encoding(), DEFAULT_ENCODING);
|
||||
meta->set_compression(_opts.compression_type);
|
||||
meta->set_is_nullable(_is_nullable);
|
||||
_ordinal_index_pp.to_proto(meta->mutable_ordinal_index_page());
|
||||
if (_opts.need_zone_map) {
|
||||
_zone_map_pp.to_proto(meta->mutable_zone_map_page());
|
||||
_column_zone_map_builder->fill_segment_zone_map(meta->mutable_zone_map());
|
||||
}
|
||||
if (_encoding_info->encoding() == DICT_ENCODING) {
|
||||
_dict_page_pp.to_proto(meta->mutable_dict_page());
|
||||
}
|
||||
if (_opts.need_bitmap_index) {
|
||||
meta->mutable_bitmap_index()->CopyFrom(_bitmap_index_meta);
|
||||
}
|
||||
if (_opts.need_bloom_filter) {
|
||||
meta->mutable_bloom_filter_index()->CopyFrom(_bloom_filter_index_meta);
|
||||
return _bloom_filter_index_builder->finish(_output_file, _opts.meta->add_indexes());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// write a page into file and update ordinal index
|
||||
// this function will call _write_physical_page to write data
|
||||
// write a data page into file and update ordinal index
|
||||
Status ColumnWriter::_write_data_page(Page* page) {
|
||||
PagePointer pp;
|
||||
std::vector<Slice> origin_data;
|
||||
std::vector<Slice> compressed_body;
|
||||
for (auto& data : page->data) {
|
||||
origin_data.push_back(data.slice());
|
||||
compressed_body.push_back(data.slice());
|
||||
}
|
||||
RETURN_IF_ERROR(_write_physical_page(&origin_data, &pp));
|
||||
_ordinal_index_builder->append_entry(page->first_rowid, pp);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnWriter::_compress_and_write_page(std::vector<Slice>* origin_data, PagePointer* pp) {
|
||||
std::vector<Slice>* output_data = origin_data;
|
||||
std::vector<Slice> compressed_data;
|
||||
|
||||
// Put compressor out of if block, because we will use compressor's
|
||||
// content until this function finished.
|
||||
PageCompressor compressor(_compress_codec);
|
||||
if (_compress_codec != nullptr) {
|
||||
RETURN_IF_ERROR(compressor.compress(*origin_data, &compressed_data));
|
||||
output_data = &compressed_data;
|
||||
}
|
||||
return _write_physical_page(output_data, pp);
|
||||
}
|
||||
|
||||
// write a physical page in to files
|
||||
Status ColumnWriter::_write_physical_page(std::vector<Slice>* origin_data, PagePointer* pp) {
|
||||
// checksum
|
||||
uint8_t checksum_buf[sizeof(uint32_t)];
|
||||
uint32_t checksum = crc32c::Value(*origin_data);
|
||||
encode_fixed32_le(checksum_buf, checksum);
|
||||
origin_data->emplace_back(checksum_buf, sizeof(uint32_t));
|
||||
|
||||
// remember the offset
|
||||
pp->offset = _output_file->size();
|
||||
// write content to file
|
||||
size_t bytes_written = 0;
|
||||
RETURN_IF_ERROR(_write_raw_data(*origin_data, &bytes_written));
|
||||
pp->size = bytes_written;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// write raw data into file, this is the only place to write data
|
||||
Status ColumnWriter::_write_raw_data(const std::vector<Slice>& data, size_t* bytes_written) {
|
||||
auto file_size = _output_file->size();
|
||||
auto st = _output_file->appendv(&data[0], data.size());
|
||||
if (!st.ok()) {
|
||||
LOG(WARNING) << "failed to append data to file, st=" << st.to_string();
|
||||
return st;
|
||||
}
|
||||
*bytes_written = _output_file->size() - file_size;
|
||||
_written_size += *bytes_written;
|
||||
RETURN_IF_ERROR(PageIO::write_page(_output_file, compressed_body, page->footer, &pp));
|
||||
_ordinal_index_builder->append_entry(page->footer.data_page_footer().first_ordinal(), pp);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ColumnWriter::_finish_current_page() {
|
||||
if (_next_rowid == _last_first_rowid) {
|
||||
if (_next_rowid == _first_rowid) {
|
||||
return Status::OK();
|
||||
}
|
||||
std::unique_ptr<Page> page(new Page());
|
||||
page->first_rowid = _last_first_rowid;
|
||||
page->num_rows = _next_rowid - _last_first_rowid;
|
||||
faststring header;
|
||||
// 1. first rowid
|
||||
put_varint32(&header, page->first_rowid);
|
||||
// 2. row count
|
||||
put_varint32(&header, page->num_rows);
|
||||
OwnedSlice null_bitmap;
|
||||
if (_is_nullable) {
|
||||
null_bitmap = _null_bitmap_builder->finish();
|
||||
_null_bitmap_builder->reset();
|
||||
put_varint32(&header, null_bitmap.slice().get_size());
|
||||
}
|
||||
page->data.emplace_back(std::move(header.build()));
|
||||
|
||||
if (_is_nullable) {
|
||||
page->data.emplace_back(std::move(null_bitmap));
|
||||
}
|
||||
OwnedSlice data_slice = _page_builder->finish();
|
||||
_page_builder->reset();
|
||||
page->data.emplace_back(std::move(data_slice));
|
||||
|
||||
// compressed data
|
||||
if (_compress_codec != nullptr) {
|
||||
PageCompressor compressor(_compress_codec);
|
||||
std::vector<Slice> data_slices;
|
||||
size_t origin_size = 0;
|
||||
for (auto& data : page->data) {
|
||||
data_slices.push_back(data.slice());
|
||||
origin_size += data.slice().size;
|
||||
}
|
||||
OwnedSlice compressed_data;
|
||||
bool compressed = false;
|
||||
RETURN_IF_ERROR(compressor.compress(data_slices, &compressed_data, &compressed));
|
||||
if (compressed) {
|
||||
page->data.clear();
|
||||
page->data.emplace_back(std::move(compressed_data));
|
||||
} else {
|
||||
size_t uncompressed_bytes = Slice::compute_total_size(data_slices);
|
||||
faststring buf;
|
||||
buf.resize(4);
|
||||
encode_fixed32_le((uint8_t*)buf.data(), uncompressed_bytes);
|
||||
page->data.emplace_back(std::move(buf.build()));
|
||||
}
|
||||
}
|
||||
|
||||
// update last first rowid
|
||||
_last_first_rowid = _next_rowid;
|
||||
|
||||
_push_back_page(page.release());
|
||||
if (_opts.need_zone_map) {
|
||||
RETURN_IF_ERROR(_column_zone_map_builder->flush());
|
||||
RETURN_IF_ERROR(_zone_map_index_builder->flush());
|
||||
}
|
||||
|
||||
if (_opts.need_bloom_filter) {
|
||||
RETURN_IF_ERROR(_bloom_filter_index_builder->flush());
|
||||
}
|
||||
|
||||
// build data page body : encoded values + [nullmap]
|
||||
vector<Slice> body;
|
||||
OwnedSlice encoded_values = _page_builder->finish();
|
||||
_page_builder->reset();
|
||||
body.push_back(encoded_values.slice());
|
||||
|
||||
OwnedSlice nullmap;
|
||||
if (_is_nullable && _null_bitmap_builder->has_null()) {
|
||||
nullmap = _null_bitmap_builder->finish();
|
||||
_null_bitmap_builder->reset();
|
||||
body.push_back(nullmap.slice());
|
||||
}
|
||||
|
||||
// prepare data page footer
|
||||
std::unique_ptr<Page> page(new Page());
|
||||
page->footer.set_type(DATA_PAGE);
|
||||
page->footer.set_uncompressed_size(Slice::compute_total_size(body));
|
||||
auto data_page_footer = page->footer.mutable_data_page_footer();
|
||||
data_page_footer->set_first_ordinal(_first_rowid);
|
||||
data_page_footer->set_num_values(_next_rowid - _first_rowid);
|
||||
data_page_footer->set_nullmap_size(nullmap.slice().size);
|
||||
|
||||
// trying to compress page body
|
||||
OwnedSlice compressed_body;
|
||||
RETURN_IF_ERROR(PageIO::compress_page_body(
|
||||
_compress_codec, _opts.compression_min_space_saving, body, &compressed_body));
|
||||
if (compressed_body.slice().empty()) {
|
||||
// page body is uncompressed
|
||||
page->data.emplace_back(std::move(encoded_values));
|
||||
page->data.emplace_back(std::move(nullmap));
|
||||
} else {
|
||||
// page body is compressed
|
||||
page->data.emplace_back(std::move(compressed_body));
|
||||
}
|
||||
|
||||
_push_back_page(page.release());
|
||||
_first_rowid = _next_rowid;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -21,8 +21,7 @@
|
||||
|
||||
#include "common/status.h" // for Status
|
||||
#include "gen_cpp/segment_v2.pb.h" // for EncodingTypePB
|
||||
#include "olap/rowset/segment_v2/column_zone_map.h" // for ColumnZoneMapBuilder
|
||||
#include "olap/rowset/segment_v2/common.h" // for rowid_t
|
||||
#include "olap/rowset/segment_v2/common.h"
|
||||
#include "olap/rowset/segment_v2/page_pointer.h" // for PagePointer
|
||||
#include "util/bitmap.h" // for BitmapChange
|
||||
#include "util/slice.h" // for OwnedSlice
|
||||
@ -36,8 +35,10 @@ class BlockCompressionCodec;
|
||||
namespace segment_v2 {
|
||||
|
||||
struct ColumnWriterOptions {
|
||||
EncodingTypePB encoding_type = DEFAULT_ENCODING;
|
||||
CompressionTypePB compression_type = segment_v2::CompressionTypePB::LZ4F;
|
||||
// input and output parameter:
|
||||
// - input: column_id/unique_id/type/length/encoding/compression/is_nullable members
|
||||
// - output: encoding/indexes/dict_page members
|
||||
ColumnMetaPB* meta;
|
||||
size_t data_page_size = 64 * 1024;
|
||||
// store compressed page only when space saving is above the threshold.
|
||||
// space saving = 1 - compressed_size / uncompressed_size
|
||||
@ -50,9 +51,10 @@ struct ColumnWriterOptions {
|
||||
class BitmapIndexWriter;
|
||||
class EncodingInfo;
|
||||
class NullBitmapBuilder;
|
||||
class OrdinalPageIndexBuilder;
|
||||
class OrdinalIndexWriter;
|
||||
class PageBuilder;
|
||||
class BloomFilterIndexWriter;
|
||||
class ZoneMapIndexWriter;
|
||||
|
||||
// Encode one column's data into some memory slice.
|
||||
// Because some columns would be stored in a file, we should wait
|
||||
@ -62,7 +64,6 @@ class ColumnWriter {
|
||||
public:
|
||||
ColumnWriter(const ColumnWriterOptions& opts,
|
||||
std::unique_ptr<Field> field,
|
||||
bool is_nullable,
|
||||
WritableFile* output_file);
|
||||
~ColumnWriter();
|
||||
|
||||
@ -102,19 +103,17 @@ public:
|
||||
Status write_zone_map();
|
||||
Status write_bitmap_index();
|
||||
Status write_bloom_filter_index();
|
||||
void write_meta(ColumnMetaPB* meta);
|
||||
|
||||
private:
|
||||
// All Pages will be organized into a linked list
|
||||
struct Page {
|
||||
int32_t first_rowid;
|
||||
int32_t num_rows;
|
||||
// the data vector may contain:
|
||||
// 1. one OwnedSlice if the data is compressed
|
||||
// 2. one OwnedSlice if the data is not compressed and is not nullable
|
||||
// 3. two OwnedSlice if the data is not compressed and is nullable
|
||||
// 1. one OwnedSlice if the page body is compressed
|
||||
// 2. one OwnedSlice if the page body is not compressed and doesn't have nullmap
|
||||
// 3. two OwnedSlice if the page body is not compressed and has nullmap
|
||||
// use vector for easier management for lifetime of OwnedSlice
|
||||
std::vector<OwnedSlice> data;
|
||||
PageFooterPB footer;
|
||||
Page* next = nullptr;
|
||||
};
|
||||
|
||||
@ -135,45 +134,37 @@ private:
|
||||
for (auto& data_slice : page->data) {
|
||||
_data_size += data_slice.slice().size;
|
||||
}
|
||||
// estimate (page footer + footer size + checksum) took 20 bytes
|
||||
_data_size += 20;
|
||||
}
|
||||
|
||||
Status _append_data(const uint8_t** ptr, size_t num_rows);
|
||||
Status _finish_current_page();
|
||||
Status _write_raw_data(const std::vector<Slice>& data, size_t* bytes_written);
|
||||
|
||||
Status _write_data_page(Page* page);
|
||||
Status _compress_and_write_page(std::vector<Slice>* origin_data, PagePointer* pp);
|
||||
Status _write_physical_page(std::vector<Slice>* origin_data, PagePointer* pp);
|
||||
|
||||
private:
|
||||
ColumnWriterOptions _opts;
|
||||
std::unique_ptr<Field> _field;
|
||||
WritableFile* _output_file;
|
||||
bool _is_nullable;
|
||||
WritableFile* _output_file = nullptr;
|
||||
// total size of data page list
|
||||
uint64_t _data_size;
|
||||
|
||||
// cached generated pages,
|
||||
PageHead _pages;
|
||||
rowid_t _last_first_rowid = 0;
|
||||
rowid_t _next_rowid = 0;
|
||||
ordinal_t _first_rowid = 0;
|
||||
ordinal_t _next_rowid = 0;
|
||||
|
||||
const EncodingInfo* _encoding_info = nullptr;
|
||||
const BlockCompressionCodec* _compress_codec = nullptr;
|
||||
|
||||
std::unique_ptr<PageBuilder> _page_builder;
|
||||
std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder;
|
||||
std::unique_ptr<OrdinalPageIndexBuilder> _ordinal_index_builder;
|
||||
std::unique_ptr<ColumnZoneMapBuilder> _column_zone_map_builder;
|
||||
std::unique_ptr<Field> _field;
|
||||
|
||||
std::unique_ptr<OrdinalIndexWriter> _ordinal_index_builder;
|
||||
std::unique_ptr<ZoneMapIndexWriter> _zone_map_index_builder;
|
||||
std::unique_ptr<BitmapIndexWriter> _bitmap_index_builder;
|
||||
std::unique_ptr<BloomFilterIndexWriter> _bloom_filter_index_builder;
|
||||
BitmapIndexColumnPB _bitmap_index_meta;
|
||||
BloomFilterIndexPB _bloom_filter_index_meta;
|
||||
|
||||
PagePointer _ordinal_index_pp;
|
||||
PagePointer _zone_map_pp;
|
||||
PagePointer _dict_page_pp;
|
||||
// the total data size of page list
|
||||
uint64_t _data_size;
|
||||
uint64_t _written_size = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -1,127 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/column_zone_map.h"
|
||||
|
||||
#include "olap/olap_define.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
ColumnZoneMapBuilder::ColumnZoneMapBuilder(Field* field) : _field(field), _pool(&_tracker) {
|
||||
PageBuilderOptions options;
|
||||
options.data_page_size = 0;
|
||||
_page_builder.reset(new BinaryPlainPageBuilder(options));
|
||||
_zone_map.min_value = _field->allocate_value(&_pool);
|
||||
_zone_map.max_value = _field->allocate_value(&_pool);
|
||||
_reset_page_zone_map();
|
||||
_segment_zone_map.min_value = _field->allocate_value(&_pool);
|
||||
_segment_zone_map.max_value = _field->allocate_value(&_pool);
|
||||
_reset_segment_zone_map();
|
||||
}
|
||||
|
||||
Status ColumnZoneMapBuilder::add(const uint8_t *vals, size_t count) {
|
||||
if (vals != nullptr) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (_field->compare(_zone_map.min_value, (char *)vals) > 0) {
|
||||
_field->type_info()->direct_copy(_zone_map.min_value, (const char *)vals);
|
||||
}
|
||||
if (_field->compare(_zone_map.max_value, (char *)vals) < 0) {
|
||||
_field->type_info()->direct_copy(_zone_map.max_value, (const char *)vals);
|
||||
}
|
||||
vals += _field->size();
|
||||
if (!_zone_map.has_not_null) {
|
||||
_zone_map.has_not_null = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!_zone_map.has_null) {
|
||||
_zone_map.has_null = true;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void ColumnZoneMapBuilder::fill_segment_zone_map(ZoneMapPB* const to) {
|
||||
_fill_zone_map_to_pb(_segment_zone_map, to);
|
||||
}
|
||||
|
||||
Status ColumnZoneMapBuilder::flush() {
|
||||
// Update segment zone map.
|
||||
if (_field->compare(_segment_zone_map.min_value, _zone_map.min_value) > 0) {
|
||||
_field->type_info()->direct_copy(_segment_zone_map.min_value, _zone_map.min_value);
|
||||
}
|
||||
if (_field->compare(_segment_zone_map.max_value, _zone_map.max_value) < 0) {
|
||||
_field->type_info()->direct_copy(_segment_zone_map.max_value, _zone_map.max_value);
|
||||
}
|
||||
if (!_segment_zone_map.has_null && _zone_map.has_null) {
|
||||
_segment_zone_map.has_null = true;
|
||||
}
|
||||
if (!_segment_zone_map.has_not_null && _zone_map.has_not_null) {
|
||||
_segment_zone_map.has_not_null = true;
|
||||
}
|
||||
|
||||
ZoneMapPB page_zone_map;
|
||||
_fill_zone_map_to_pb(_zone_map, &page_zone_map);
|
||||
|
||||
std::string serialized_zone_map;
|
||||
bool ret = page_zone_map.SerializeToString(&serialized_zone_map);
|
||||
if (!ret) {
|
||||
return Status::InternalError("serialize zone map failed");
|
||||
}
|
||||
Slice data(serialized_zone_map.data(), serialized_zone_map.size());
|
||||
size_t num = 1;
|
||||
RETURN_IF_ERROR(_page_builder->add((const uint8_t *)&data, &num));
|
||||
// reset the variables
|
||||
// we should allocate max varchar length and set to max for min value
|
||||
_reset_page_zone_map();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void ColumnZoneMapBuilder::_reset_zone_map(ZoneMap* zone_map) {
|
||||
_field->set_to_max(zone_map->min_value);
|
||||
_field->set_to_min(zone_map->max_value);
|
||||
zone_map->has_null = false;
|
||||
zone_map->has_not_null = false;
|
||||
}
|
||||
|
||||
void ColumnZoneMapBuilder::_fill_zone_map_to_pb(const ZoneMap& from, ZoneMapPB* const to) {
|
||||
to->set_has_not_null(from.has_not_null);
|
||||
to->set_has_null(from.has_null);
|
||||
to->set_max(_field->to_string(from.max_value));
|
||||
to->set_min(_field->to_string(from.min_value));
|
||||
}
|
||||
|
||||
Status ColumnZoneMap::load() {
|
||||
BinaryPlainPageDecoder page_decoder(_data);
|
||||
RETURN_IF_ERROR(page_decoder.init());
|
||||
_num_pages = page_decoder.count();
|
||||
_page_zone_maps.resize(_num_pages);
|
||||
for (int i = 0; i < _num_pages; ++i) {
|
||||
Slice data = page_decoder.string_at_index(i);
|
||||
bool ret = _page_zone_maps[i].ParseFromString(std::string(data.data, data.size));
|
||||
if (!ret) {
|
||||
return Status::Corruption("parse zone map failed");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
@ -25,7 +25,10 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
// One segment file could store at most INT32_MAX rows,
|
||||
// but due to array type, each column could store more than INT32_MAX values.
|
||||
using rowid_t = uint32_t;
|
||||
using ordinal_t = uint64_t;
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
#include "olap/rowset/segment_v2/page_builder.h" // for PageBuilder
|
||||
#include "olap/rowset/segment_v2/page_decoder.h" // for PageDecoder
|
||||
#include "olap/rowset/segment_v2/options.h" // for PageBuilderOptions/PageDecoderOptions
|
||||
#include "olap/rowset/segment_v2/common.h" // for rowid_t
|
||||
#include "util/frame_of_reference_coding.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
#include <string>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "olap/key_coder.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace doris {
|
||||
@ -38,17 +37,15 @@ bool IndexPageBuilder::is_full() const {
|
||||
return _buffer.size() + 16 > _index_page_size;
|
||||
}
|
||||
|
||||
Slice IndexPageBuilder::finish() {
|
||||
void IndexPageBuilder::finish(OwnedSlice* body, PageFooterPB* footer) {
|
||||
DCHECK(!_finished) << "already called finish()";
|
||||
IndexPageFooterPB footer;
|
||||
footer.set_num_entries(_count);
|
||||
footer.set_type(_is_leaf ? IndexPageFooterPB::LEAF : IndexPageFooterPB::INTERNAL);
|
||||
*body = _buffer.build();
|
||||
|
||||
std::string footer_buf;
|
||||
footer.SerializeToString(&footer_buf);
|
||||
_buffer.append(footer_buf);
|
||||
put_fixed32_le(&_buffer, footer_buf.size());
|
||||
return Slice(_buffer);
|
||||
footer->set_type(INDEX_PAGE);
|
||||
footer->set_uncompressed_size(body->slice().get_size());
|
||||
footer->mutable_index_page_footer()->set_num_entries(_count);
|
||||
footer->mutable_index_page_footer()->set_type(
|
||||
_is_leaf ? IndexPageFooterPB::LEAF : IndexPageFooterPB::INTERNAL);
|
||||
}
|
||||
|
||||
Status IndexPageBuilder::get_first_key(Slice* key) const {
|
||||
@ -65,15 +62,11 @@ Status IndexPageBuilder::get_first_key(Slice* key) const {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Status IndexPageReader::parse(const Slice& data) {
|
||||
size_t buffer_len = data.size;
|
||||
const uint8_t* buffer = (uint8_t*)data.data;
|
||||
size_t footer_size = decode_fixed32_le(buffer + buffer_len - 4);
|
||||
std::string footer_buf(data.data + buffer_len - 4 - footer_size, footer_size);
|
||||
_footer.ParseFromString(footer_buf);
|
||||
Status IndexPageReader::parse(const Slice& body, const IndexPageFooterPB& footer) {
|
||||
_footer = footer;
|
||||
size_t num_entries = _footer.num_entries();
|
||||
|
||||
Slice input(data);
|
||||
Slice input(body);
|
||||
for (int i = 0; i < num_entries; ++i) {
|
||||
Slice key;
|
||||
PagePointer value;
|
||||
|
||||
@ -31,22 +31,18 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class IndexPageIterator; // forward decl.
|
||||
|
||||
// IndexPage is the building block for IndexedColumn's ordinal index and value index.
|
||||
// It is used to guide searching for a particular key to the data page containing it.
|
||||
// We use the same general format for all index pages, regardless of the data type and node type (leaf or internal)
|
||||
// IndexPage := IndexEntry^NumEntry, IndexPageFooterPB, IndexPageFooterPBSize(4)
|
||||
// IndexEntry := IndexKey, PagePointer
|
||||
// IndexKey := KeyLength(vint32), KeyData(KeyLength bytes)
|
||||
// PagePointer := PageOffset(vint64), PageSize(vint32)
|
||||
// IndexPageBody := IndexEntry^NumEntry
|
||||
// IndexEntry := KeyLength(vint), Byte^KeyLength, PageOffset(vlong), PageSize(vint)
|
||||
//
|
||||
// IndexPageFooterPB records NumEntry and type (leaf/internal) of the index page.
|
||||
// For leaf, IndexKey records the first/smallest key of the data page PagePointer points to.
|
||||
// For internal, IndexKey records the first/smallest key of the next-level index page PagePointer points to.
|
||||
//
|
||||
// All keys are treated as binary string and compared with memcpy. Keys of other data type are encoded first by
|
||||
// KeyCoder, e.g., ordinal index's original key type is uint32_t but is encoded to binary string.
|
||||
// KeyCoder, e.g., ordinal index's original key type is uint64_t but is encoded to binary string.
|
||||
class IndexPageBuilder {
|
||||
public:
|
||||
explicit IndexPageBuilder(size_t index_page_size, bool is_leaf)
|
||||
@ -59,7 +55,7 @@ public:
|
||||
|
||||
size_t count() const { return _count; }
|
||||
|
||||
Slice finish();
|
||||
void finish(OwnedSlice* body, PageFooterPB* footer);
|
||||
|
||||
uint64_t size() {
|
||||
return _buffer.size();
|
||||
@ -87,9 +83,9 @@ private:
|
||||
class IndexPageIterator;
|
||||
class IndexPageReader {
|
||||
public:
|
||||
IndexPageReader() : _parsed(false) {};
|
||||
IndexPageReader() : _parsed(false) {}
|
||||
|
||||
Status parse(const Slice& data);
|
||||
Status parse(const Slice& body, const IndexPageFooterPB& footer);
|
||||
|
||||
inline size_t count() const {
|
||||
DCHECK(_parsed);
|
||||
|
||||
@ -21,12 +21,7 @@
|
||||
#include "gutil/strings/substitute.h" // for Substitute
|
||||
#include "olap/key_coder.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h" // for EncodingInfo
|
||||
#include "olap/rowset/segment_v2/index_page.h" // for IndexPageReader
|
||||
#include "olap/rowset/segment_v2/options.h" // for PageDecoderOptions
|
||||
#include "olap/rowset/segment_v2/page_compression.h"
|
||||
#include "olap/rowset/segment_v2/page_decoder.h" // for PagePointer
|
||||
#include "util/crc32c.h"
|
||||
#include "util/rle_encoding.h" // for RleDecoder
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "util/file_manager.h"
|
||||
|
||||
namespace doris {
|
||||
@ -34,7 +29,10 @@ namespace segment_v2 {
|
||||
|
||||
using strings::Substitute;
|
||||
|
||||
Status IndexedColumnReader::load() {
|
||||
Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
_use_page_cache = use_page_cache;
|
||||
_kept_in_memory = kept_in_memory;
|
||||
|
||||
_type_info = get_type_info((FieldType)_meta.data_type());
|
||||
if (_type_info == nullptr) {
|
||||
return Status::NotSupported(Substitute("unsupported typeinfo, type=$0", _meta.data_type()));
|
||||
@ -51,8 +49,10 @@ Status IndexedColumnReader::load() {
|
||||
if (_meta.ordinal_index_meta().is_root_data_page()) {
|
||||
_sole_data_page = PagePointer(_meta.ordinal_index_meta().root_page());
|
||||
} else {
|
||||
RETURN_IF_ERROR(read_page(input_file, _meta.ordinal_index_meta().root_page(), &_ordinal_index_page_handle));
|
||||
RETURN_IF_ERROR(_ordinal_index_reader.parse(_ordinal_index_page_handle.data()));
|
||||
RETURN_IF_ERROR(load_index_page(input_file,
|
||||
_meta.ordinal_index_meta().root_page(),
|
||||
&_ordinal_index_page_handle,
|
||||
&_ordinal_index_reader));
|
||||
_has_index_page = true;
|
||||
}
|
||||
}
|
||||
@ -62,8 +62,10 @@ Status IndexedColumnReader::load() {
|
||||
if (_meta.value_index_meta().is_root_data_page()) {
|
||||
_sole_data_page = PagePointer(_meta.value_index_meta().root_page());
|
||||
} else {
|
||||
RETURN_IF_ERROR(read_page(input_file, _meta.value_index_meta().root_page(), &_value_index_page_handle));
|
||||
RETURN_IF_ERROR(_value_index_reader.parse(_value_index_page_handle.data()));
|
||||
RETURN_IF_ERROR(load_index_page(input_file,
|
||||
_meta.value_index_meta().root_page(),
|
||||
&_value_index_page_handle,
|
||||
&_value_index_reader));
|
||||
_has_index_page = true;
|
||||
}
|
||||
}
|
||||
@ -71,91 +73,45 @@ Status IndexedColumnReader::load() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status IndexedColumnReader::read_page(RandomAccessFile* file, const PagePointer& pp, PageHandle* handle) const {
|
||||
auto cache = StoragePageCache::instance();
|
||||
PageCacheHandle cache_handle;
|
||||
StoragePageCache::CacheKey cache_key(file->file_name(), pp.offset);
|
||||
// column index only load once, so we use global config to decide
|
||||
if (!config::disable_storage_page_cache && cache->lookup(cache_key, &cache_handle)) {
|
||||
// we find page in cache, use it
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
return Status::OK();
|
||||
}
|
||||
// Now we read this from file.
|
||||
size_t page_size = pp.size;
|
||||
if (page_size < sizeof(uint32_t)) {
|
||||
return Status::Corruption(Substitute("Bad page, page size is too small, size=$0", page_size));
|
||||
}
|
||||
|
||||
// Now we use this buffer to store page from storage, if this page is compressed
|
||||
// this buffer will assigned uncompressed page, and origin content will be freed.
|
||||
std::unique_ptr<uint8_t[]> page(new uint8_t[page_size]);
|
||||
Slice page_slice(page.get(), page_size);
|
||||
RETURN_IF_ERROR(file->read_at(pp.offset, page_slice));
|
||||
|
||||
size_t data_size = page_size - 4;
|
||||
if (_verify_checksum) {
|
||||
uint32_t expect = decode_fixed32_le((uint8_t*)page_slice.data + page_slice.size - 4);
|
||||
uint32_t actual = crc32c::Value(page_slice.data, page_slice.size - 4);
|
||||
if (expect != actual) {
|
||||
return Status::Corruption(
|
||||
Substitute("Page checksum mismatch, actual=$0 vs expect=$1", actual, expect));
|
||||
}
|
||||
}
|
||||
|
||||
// remove page's suffix
|
||||
page_slice.size = data_size;
|
||||
if (_compress_codec != nullptr) {
|
||||
PageDecompressor decompressor(page_slice, _compress_codec);
|
||||
|
||||
Slice uncompressed_page;
|
||||
RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_page));
|
||||
|
||||
// If decompressor create new heap memory for uncompressed data,
|
||||
// assign this uncompressed page to page and page slice
|
||||
if (uncompressed_page.data != page_slice.data) {
|
||||
page.reset((uint8_t*)uncompressed_page.data);
|
||||
}
|
||||
page_slice = uncompressed_page;
|
||||
}
|
||||
if (!config::disable_storage_page_cache) {
|
||||
// insert this into cache and return the cache handle
|
||||
cache->insert(cache_key, page_slice, &cache_handle, _cache_in_memory);
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
} else {
|
||||
*handle = PageHandle(page_slice);
|
||||
}
|
||||
|
||||
page.release();
|
||||
Status IndexedColumnReader::load_index_page(RandomAccessFile* file,
|
||||
const PagePointerPB& pp,
|
||||
PageHandle* handle,
|
||||
IndexPageReader* reader) {
|
||||
Slice body;
|
||||
PageFooterPB footer;
|
||||
RETURN_IF_ERROR(read_page(file, PagePointer(pp), handle, &body, &footer));
|
||||
RETURN_IF_ERROR(reader->parse(body, footer.index_page_footer()));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status IndexedColumnReader::read_page(RandomAccessFile* file, const PagePointer& pp,
|
||||
PageHandle* handle, Slice* body, PageFooterPB* footer) const {
|
||||
PageReadOptions opts;
|
||||
opts.file = file;
|
||||
opts.page_pointer = pp;
|
||||
opts.codec = _compress_codec;
|
||||
OlapReaderStatistics tmp_stats;
|
||||
opts.stats = &tmp_stats;
|
||||
opts.use_page_cache = _use_page_cache;
|
||||
opts.kept_in_memory = _kept_in_memory;
|
||||
|
||||
return PageIO::read_and_decompress_page(opts, handle, body, footer);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Status IndexedColumnIterator::_read_data_page(const PagePointer& page_pointer, ParsedPage* page) {
|
||||
RETURN_IF_ERROR(_reader->read_page(_file, page_pointer, &page->page_handle));
|
||||
Slice data = page->page_handle.data();
|
||||
|
||||
// decode first rowid
|
||||
if (!get_varint32(&data, &page->first_rowid)) {
|
||||
return Status::Corruption("Bad page, failed to decode first rowid");
|
||||
}
|
||||
|
||||
// decode number rows
|
||||
if (!get_varint32(&data, &page->num_rows)) {
|
||||
return Status::Corruption("Bad page, failed to decode rows count");
|
||||
}
|
||||
|
||||
// create page data decoder
|
||||
PageDecoderOptions options;
|
||||
RETURN_IF_ERROR(_reader->encoding_info()->create_page_decoder(data, options, &page->data_decoder));
|
||||
RETURN_IF_ERROR(page->data_decoder->init());
|
||||
|
||||
page->offset_in_page = 0;
|
||||
return Status::OK();
|
||||
Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
|
||||
PageHandle handle;
|
||||
Slice body;
|
||||
PageFooterPB footer;
|
||||
RETURN_IF_ERROR(_reader->read_page(_file, pp, &handle, &body, &footer));
|
||||
// parse data page
|
||||
// note that page_index is not used in IndexedColumnIterator, so we pass 0
|
||||
return ParsedPage::create(std::move(handle), body, footer.data_page_footer(),
|
||||
_reader->encoding_info(), pp, 0, &_data_page);
|
||||
}
|
||||
|
||||
Status IndexedColumnIterator::seek_to_ordinal(rowid_t idx) {
|
||||
Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) {
|
||||
DCHECK(idx >= 0 && idx <= _reader->num_values());
|
||||
|
||||
if (!_reader->support_ordinal_seek()) {
|
||||
@ -164,30 +120,29 @@ Status IndexedColumnIterator::seek_to_ordinal(rowid_t idx) {
|
||||
|
||||
// it's ok to seek past the last value
|
||||
if (idx == _reader->num_values()) {
|
||||
_current_rowid = idx;
|
||||
_current_ordinal = idx;
|
||||
_seeked = true;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (_data_page == nullptr || !_data_page->contains(idx)) {
|
||||
// need to read the data page containing row at idx
|
||||
_data_page.reset(new ParsedPage());
|
||||
if (_reader->_has_index_page) {
|
||||
std::string key;
|
||||
KeyCoderTraits<OLAP_FIELD_TYPE_UNSIGNED_INT>::full_encode_ascending(&idx, &key);
|
||||
KeyCoderTraits<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::full_encode_ascending(&idx, &key);
|
||||
RETURN_IF_ERROR(_ordinal_iter.seek_at_or_before(key));
|
||||
RETURN_IF_ERROR(_read_data_page(_ordinal_iter.current_page_pointer(), _data_page.get()));
|
||||
RETURN_IF_ERROR(_read_data_page(_ordinal_iter.current_page_pointer()));
|
||||
_current_iter = &_ordinal_iter;
|
||||
} else {
|
||||
RETURN_IF_ERROR(_read_data_page(_reader->_sole_data_page, _data_page.get()));
|
||||
RETURN_IF_ERROR(_read_data_page(_reader->_sole_data_page));
|
||||
}
|
||||
}
|
||||
|
||||
rowid_t offset_in_page = idx - _data_page->first_rowid;
|
||||
ordinal_t offset_in_page = idx - _data_page->first_ordinal;
|
||||
RETURN_IF_ERROR(_data_page->data_decoder->seek_to_position_in_page(offset_in_page));
|
||||
DCHECK(offset_in_page == _data_page->data_decoder->current_index());
|
||||
_data_page->offset_in_page = offset_in_page;
|
||||
_current_rowid = idx;
|
||||
_current_ordinal = idx;
|
||||
_seeked = true;
|
||||
return Status::OK();
|
||||
}
|
||||
@ -221,27 +176,21 @@ Status IndexedColumnIterator::seek_at_or_after(const void* key, bool* exact_matc
|
||||
}
|
||||
|
||||
if (load_data_page) {
|
||||
_data_page.reset(new ParsedPage());
|
||||
RETURN_IF_ERROR(_read_data_page(data_page_pp, _data_page.get()));
|
||||
RETURN_IF_ERROR(_read_data_page(data_page_pp));
|
||||
}
|
||||
|
||||
// seek inside data page
|
||||
RETURN_IF_ERROR(_data_page->data_decoder->seek_at_or_after_value(key, exact_match));
|
||||
_data_page->offset_in_page = _data_page->data_decoder->current_index();
|
||||
_current_rowid = _data_page->first_rowid + _data_page->offset_in_page;
|
||||
DCHECK(_data_page->contains(_current_rowid));
|
||||
_current_ordinal = _data_page->first_ordinal + _data_page->offset_in_page;
|
||||
DCHECK(_data_page->contains(_current_ordinal));
|
||||
_seeked = true;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
rowid_t IndexedColumnIterator::get_current_ordinal() const {
|
||||
DCHECK(_seeked);
|
||||
return _current_rowid;
|
||||
}
|
||||
|
||||
Status IndexedColumnIterator::next_batch(size_t* n, ColumnBlockView* column_view) {
|
||||
DCHECK(_seeked);
|
||||
if (_current_rowid == _reader->num_values()) {
|
||||
if (_current_ordinal == _reader->num_values()) {
|
||||
*n = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
@ -257,8 +206,7 @@ Status IndexedColumnIterator::next_batch(size_t* n, ColumnBlockView* column_view
|
||||
if (!has_next) {
|
||||
break; // no more data page
|
||||
}
|
||||
_data_page.reset(new ParsedPage());
|
||||
RETURN_IF_ERROR(_read_data_page(_current_iter->current_page_pointer(), _data_page.get()));
|
||||
RETURN_IF_ERROR(_read_data_page(_current_iter->current_page_pointer()));
|
||||
}
|
||||
|
||||
size_t rows_to_read = std::min(_data_page->remaining(), remaining);
|
||||
@ -267,7 +215,7 @@ Status IndexedColumnIterator::next_batch(size_t* n, ColumnBlockView* column_view
|
||||
DCHECK(rows_to_read == rows_read);
|
||||
|
||||
_data_page->offset_in_page += rows_read;
|
||||
_current_rowid += rows_read;
|
||||
_current_ordinal += rows_read;
|
||||
column_view->advance(rows_read);
|
||||
remaining -= rows_read;
|
||||
}
|
||||
|
||||
@ -47,34 +47,34 @@ class IndexedColumnIterator;
|
||||
class IndexedColumnReader {
|
||||
public:
|
||||
explicit IndexedColumnReader(const std::string& file_name,
|
||||
const IndexedColumnMetaPB& meta,
|
||||
const bool cache_in_memory)
|
||||
: _file_name(file_name), _meta(meta), _cache_in_memory(cache_in_memory) {};
|
||||
const IndexedColumnMetaPB& meta)
|
||||
: _file_name(file_name), _meta(meta) {};
|
||||
|
||||
Status load();
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
// read a page from file into a page handle
|
||||
// use file(usually is RandomAccessFile*) to read page
|
||||
Status read_page(RandomAccessFile* file, const PagePointer& pp, PageHandle* handle) const;
|
||||
// read a page specified by `pp' from `file' into `handle'
|
||||
Status read_page(RandomAccessFile* file, const PagePointer& pp,
|
||||
PageHandle* handle, Slice* body, PageFooterPB* footer) const;
|
||||
|
||||
int64_t num_values() const { return _num_values; }
|
||||
|
||||
const EncodingInfo* encoding_info() const { return _encoding_info; }
|
||||
|
||||
const TypeInfo* type_info() const { return _type_info; }
|
||||
|
||||
bool support_ordinal_seek() const { return _meta.has_ordinal_index_meta(); }
|
||||
|
||||
bool support_value_seek() const { return _meta.has_value_index_meta(); }
|
||||
|
||||
private:
|
||||
Status load_index_page(RandomAccessFile* file,
|
||||
const PagePointerPB& pp,
|
||||
PageHandle* handle,
|
||||
IndexPageReader* reader);
|
||||
|
||||
friend class IndexedColumnIterator;
|
||||
|
||||
std::string _file_name;
|
||||
IndexedColumnMetaPB _meta;
|
||||
// if _cache_in_memory is true, we will use DURABLE CachePriority in page cache,
|
||||
// otherwise we use NORMAL CachePriority
|
||||
bool _cache_in_memory;
|
||||
|
||||
bool _use_page_cache;
|
||||
bool _kept_in_memory;
|
||||
int64_t _num_values = 0;
|
||||
// whether this column contains any index page.
|
||||
// could be false when the column contains only one data page.
|
||||
@ -86,7 +86,6 @@ private:
|
||||
PageHandle _ordinal_index_page_handle;
|
||||
PageHandle _value_index_page_handle;
|
||||
|
||||
bool _verify_checksum = true;
|
||||
const TypeInfo* _type_info = nullptr;
|
||||
const EncodingInfo* _encoding_info = nullptr;
|
||||
const BlockCompressionCodec* _compress_codec = nullptr;
|
||||
@ -109,7 +108,7 @@ public:
|
||||
// Seek to the given ordinal entry. Entry 0 is the first entry.
|
||||
// Return NotFound if provided seek point is past the end.
|
||||
// Return NotSupported for column without ordinal index.
|
||||
Status seek_to_ordinal(rowid_t idx);
|
||||
Status seek_to_ordinal(ordinal_t idx);
|
||||
|
||||
// Seek the index to the given key, or to the index entry immediately
|
||||
// before it. Then seek the data block to the value matching value or to
|
||||
@ -123,14 +122,17 @@ public:
|
||||
Status seek_at_or_after(const void* key, bool* exact_match);
|
||||
|
||||
// Get the ordinal index that the iterator is currently pointed to.
|
||||
rowid_t get_current_ordinal() const;
|
||||
ordinal_t get_current_ordinal() const {
|
||||
DCHECK(_seeked);
|
||||
return _current_ordinal;
|
||||
}
|
||||
|
||||
// After one seek, we can only call this function once to read data
|
||||
// into ColumnBlock. when read string type data, memory will allocated
|
||||
// from Arena
|
||||
Status next_batch(size_t* n, ColumnBlockView* column_view);
|
||||
private:
|
||||
Status _read_data_page(const PagePointer& page_pointer, ParsedPage* page);
|
||||
Status _read_data_page(const PagePointer& pp);
|
||||
|
||||
const IndexedColumnReader* _reader;
|
||||
// iterator for ordinal index page
|
||||
@ -141,10 +143,10 @@ private:
|
||||
bool _seeked = false;
|
||||
// current in-use index iterator, could be `&_ordinal_iter` or `&_value_iter` or null
|
||||
IndexPageIterator* _current_iter = nullptr;
|
||||
// seeked data page, containing value at `_current_rowid`
|
||||
// seeked data page, containing value at `_current_ordinal`
|
||||
std::unique_ptr<ParsedPage> _data_page;
|
||||
// next_batch() will read from this position
|
||||
rowid_t _current_rowid = 0;
|
||||
ordinal_t _current_ordinal = 0;
|
||||
// open file handle
|
||||
OpenedFileHandle<RandomAccessFile> _file_handle;
|
||||
// file to read
|
||||
|
||||
@ -19,18 +19,18 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "env/env.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
#include "olap/rowset/segment_v2/index_page.h"
|
||||
#include "olap/rowset/segment_v2/options.h"
|
||||
#include "olap/rowset/segment_v2/page_builder.h"
|
||||
#include "olap/rowset/segment_v2/page_compression.h"
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "olap/rowset/segment_v2/page_pointer.h"
|
||||
#include "olap/key_coder.h"
|
||||
#include "olap/types.h"
|
||||
#include "util/block_compression.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
@ -55,6 +55,10 @@ IndexedColumnWriter::~IndexedColumnWriter() = default;
|
||||
Status IndexedColumnWriter::init() {
|
||||
const EncodingInfo* encoding_info;
|
||||
RETURN_IF_ERROR(EncodingInfo::get(_typeinfo, _options.encoding, &encoding_info));
|
||||
_options.encoding = encoding_info->encoding();
|
||||
// should store more concrete encoding type instead of DEFAULT_ENCODING
|
||||
// because the default encoding of a data type can be changed in the future
|
||||
DCHECK_NE(_options.encoding, DEFAULT_ENCODING);
|
||||
|
||||
PageBuilder* data_page_builder;
|
||||
RETURN_IF_ERROR(encoding_info->create_page_builder(PageBuilderOptions(), &data_page_builder));
|
||||
@ -89,31 +93,31 @@ Status IndexedColumnWriter::add(const void* value) {
|
||||
}
|
||||
|
||||
Status IndexedColumnWriter::_finish_current_data_page() {
|
||||
const uint32_t page_row_count = _data_page_builder->count();
|
||||
|
||||
if (page_row_count == 0) {
|
||||
auto num_values_in_page = _data_page_builder->count();
|
||||
if (num_values_in_page == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
ordinal_t first_ordinal = _num_values - num_values_in_page;
|
||||
|
||||
uint32_t first_rowid = _num_values - page_row_count;
|
||||
faststring page_header;
|
||||
put_varint32(&page_header, first_rowid);
|
||||
put_varint32(&page_header, page_row_count);
|
||||
|
||||
OwnedSlice page_data = _data_page_builder->finish();
|
||||
// IndexedColumn doesn't have NULLs, thus data page body only contains encoded values
|
||||
OwnedSlice page_body = _data_page_builder->finish();
|
||||
_data_page_builder->reset();
|
||||
|
||||
return _append_data_page({Slice(page_header), page_data.slice()}, first_rowid);
|
||||
}
|
||||
PageFooterPB footer;
|
||||
footer.set_type(DATA_PAGE);
|
||||
footer.set_uncompressed_size(page_body.slice().get_size());
|
||||
footer.mutable_data_page_footer()->set_first_ordinal(first_ordinal);
|
||||
footer.mutable_data_page_footer()->set_num_values(num_values_in_page);
|
||||
footer.mutable_data_page_footer()->set_nullmap_size(0);
|
||||
|
||||
Status IndexedColumnWriter::_append_data_page(const std::vector<Slice>& data_page, rowid_t first_rowid) {
|
||||
RETURN_IF_ERROR(_append_page(data_page, &_last_data_page));
|
||||
RETURN_IF_ERROR(PageIO::compress_and_write_page(
|
||||
_compress_codec, _options.compression_min_space_saving, _file, { page_body.slice() },
|
||||
footer, &_last_data_page));
|
||||
_num_data_pages++;
|
||||
|
||||
if (_options.write_ordinal_index) {
|
||||
std::string key;
|
||||
KeyCoderTraits<OLAP_FIELD_TYPE_UNSIGNED_INT>::full_encode_ascending(
|
||||
&first_rowid, &key);
|
||||
KeyCoderTraits<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::full_encode_ascending(&first_ordinal, &key);
|
||||
_ordinal_index_builder->add(key, _last_data_page);
|
||||
}
|
||||
|
||||
@ -127,31 +131,6 @@ Status IndexedColumnWriter::_append_data_page(const std::vector<Slice>& data_pag
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status IndexedColumnWriter::_append_page(const std::vector<Slice>& page, PagePointer* pp) {
|
||||
std::vector<Slice> output_page;
|
||||
|
||||
// Put compressor out of if block, because we will use compressor's
|
||||
// content until this function finished.
|
||||
PageCompressor compressor(_compress_codec);
|
||||
if (_compress_codec != nullptr) {
|
||||
RETURN_IF_ERROR(compressor.compress(page, &output_page));
|
||||
} else {
|
||||
output_page = page;
|
||||
}
|
||||
|
||||
// checksum
|
||||
uint8_t checksum_buf[sizeof(uint32_t)];
|
||||
uint32_t checksum = crc32c::Value(output_page);
|
||||
encode_fixed32_le(checksum_buf, checksum);
|
||||
output_page.emplace_back(checksum_buf, sizeof(uint32_t));
|
||||
|
||||
// append to file
|
||||
pp->offset = _file->size();
|
||||
RETURN_IF_ERROR(_file->appendv(&output_page[0], output_page.size()));
|
||||
pp->size = _file->size() - pp->offset;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status IndexedColumnWriter::finish(IndexedColumnMetaPB* meta) {
|
||||
RETURN_IF_ERROR(_finish_current_data_page());
|
||||
if (_options.write_ordinal_index) {
|
||||
@ -174,9 +153,14 @@ Status IndexedColumnWriter::_flush_index(IndexPageBuilder* index_builder, BTreeM
|
||||
meta->set_is_root_data_page(true);
|
||||
_last_data_page.to_proto(meta->mutable_root_page());
|
||||
} else {
|
||||
Slice root_page = index_builder->finish();
|
||||
OwnedSlice page_body;
|
||||
PageFooterPB page_footer;
|
||||
index_builder->finish(&page_body, &page_footer);
|
||||
|
||||
PagePointer pp;
|
||||
RETURN_IF_ERROR(_append_page({root_page}, &pp));
|
||||
RETURN_IF_ERROR(PageIO::compress_and_write_page(
|
||||
_compress_codec, _options.compression_min_space_saving, _file,
|
||||
{ page_body.slice() }, page_footer, &pp));
|
||||
|
||||
meta->set_is_root_data_page(false);
|
||||
pp.to_proto(meta->mutable_root_page());
|
||||
|
||||
@ -48,6 +48,7 @@ struct IndexedColumnWriterOptions {
|
||||
bool write_value_index = false;
|
||||
EncodingTypePB encoding = DEFAULT_ENCODING;
|
||||
CompressionTypePB compression = NO_COMPRESSION;
|
||||
double compression_min_space_saving = 0.1;
|
||||
};
|
||||
|
||||
// IndexedColumn is a column with an optional "ordinal index" and an optional "value index".
|
||||
@ -82,15 +83,6 @@ public:
|
||||
private:
|
||||
Status _finish_current_data_page();
|
||||
|
||||
// Append the given data page, update ordinal index or value index if they're used.
|
||||
Status _append_data_page(const std::vector<Slice>& data_page, rowid_t first_rowid);
|
||||
|
||||
// Append the given page into the file. After return, *pp points to the newly
|
||||
// inserted page.
|
||||
// Input data will be compressed when compression is enabled.
|
||||
// We also compute and append checksum for the page.
|
||||
Status _append_page(const std::vector<Slice>& page, PagePointer* pp);
|
||||
|
||||
Status _flush_index(IndexPageBuilder* index_builder, BTreeMetaPB* meta);
|
||||
|
||||
IndexedColumnWriterOptions _options;
|
||||
@ -100,7 +92,7 @@ private:
|
||||
MemTracker _mem_tracker;
|
||||
MemPool _mem_pool;
|
||||
|
||||
rowid_t _num_values;
|
||||
ordinal_t _num_values;
|
||||
uint32_t _num_data_pages;
|
||||
// remember the first value in current page
|
||||
faststring _first_value;
|
||||
|
||||
@ -17,58 +17,111 @@
|
||||
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h"
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "env/env.h"
|
||||
#include "olap/key_coder.h"
|
||||
#include "olap/rowset/segment_v2/page_handle.h"
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "util/file_manager.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
OrdinalPageIndex::~OrdinalPageIndex() {
|
||||
delete[] _rowids;
|
||||
delete[] _pages;
|
||||
void OrdinalIndexWriter::append_entry(ordinal_t ordinal, const PagePointer& data_pp) {
|
||||
std::string key;
|
||||
KeyCoderTraits<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::full_encode_ascending(&ordinal, &key);
|
||||
_page_builder->add(key, data_pp);
|
||||
_last_pp = data_pp;
|
||||
}
|
||||
|
||||
Status OrdinalPageIndex::load() {
|
||||
if (UNLIKELY(_data.size < _header_size())) {
|
||||
return Status::Corruption("block size must greate than header");
|
||||
}
|
||||
const uint8_t* ptr = (const uint8_t*)_data.data;
|
||||
const uint8_t* limit = (const uint8_t*)_data.data + _data.size;
|
||||
Status OrdinalIndexWriter::finish(WritableFile* file, ColumnIndexMetaPB* meta) {
|
||||
CHECK(_page_builder->count() > 0) << "no entry has been added, file=" << file->filename();
|
||||
meta->set_type(ORDINAL_INDEX);
|
||||
BTreeMetaPB* root_page_meta = meta->mutable_ordinal_index()->mutable_root_page();
|
||||
|
||||
_num_pages = decode_fixed32_le(ptr);
|
||||
ptr += 4;
|
||||
if (_page_builder->count() == 1) {
|
||||
// only one data page, no need to write index page
|
||||
root_page_meta->set_is_root_data_page(true);
|
||||
_last_pp.to_proto(root_page_meta->mutable_root_page());
|
||||
} else {
|
||||
OwnedSlice page_body;
|
||||
PageFooterPB page_footer;
|
||||
_page_builder->finish(&page_body, &page_footer);
|
||||
|
||||
// add a additional rowid for row id compute convenience
|
||||
_rowids = new rowid_t[_num_pages + 1];
|
||||
_pages = new PagePointer[_num_pages];
|
||||
for (int i = 0; i < _num_pages; ++i) {
|
||||
ptr = decode_varint32_ptr(ptr, limit, &_rowids[i]);
|
||||
if (ptr == nullptr) {
|
||||
return Status::InternalError("Data corruption");
|
||||
}
|
||||
ptr = _pages[i].decode_from(ptr, limit);
|
||||
if (ptr == nullptr) {
|
||||
return Status::InternalError("Data corruption");
|
||||
}
|
||||
// write index page (currently it's not compressed)
|
||||
PagePointer pp;
|
||||
RETURN_IF_ERROR(PageIO::write_page(file, { page_body.slice() }, page_footer, &pp));
|
||||
|
||||
root_page_meta->set_is_root_data_page(false);
|
||||
pp.to_proto(root_page_meta->mutable_root_page());
|
||||
}
|
||||
// set the additional last row id as number of rows
|
||||
_rowids[_num_pages] = _num_rows;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
OrdinalPageIndexIterator OrdinalPageIndex::seek_at_or_before(rowid_t rid) {
|
||||
Status OrdinalIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
if (_index_meta->root_page().is_root_data_page()) {
|
||||
// only one data page, no index page
|
||||
_num_pages = 1;
|
||||
_ordinals.push_back(0);
|
||||
_ordinals.push_back(_num_values);
|
||||
_pages.emplace_back(_index_meta->root_page().root_page());
|
||||
return Status::OK();
|
||||
}
|
||||
// need to read index page
|
||||
OpenedFileHandle<RandomAccessFile> file_handle;
|
||||
RETURN_IF_ERROR(FileManager::instance()->open_file(_filename, &file_handle));
|
||||
|
||||
PageReadOptions opts;
|
||||
opts.file = file_handle.file();
|
||||
opts.page_pointer = PagePointer(_index_meta->root_page().root_page());
|
||||
opts.codec = nullptr; // ordinal index page uses NO_COMPRESSION right now
|
||||
OlapReaderStatistics tmp_stats;
|
||||
opts.stats = &tmp_stats;
|
||||
opts.use_page_cache = use_page_cache;
|
||||
opts.kept_in_memory = kept_in_memory;
|
||||
|
||||
// read index page
|
||||
PageHandle page_handle;
|
||||
Slice body;
|
||||
PageFooterPB footer;
|
||||
RETURN_IF_ERROR(PageIO::read_and_decompress_page(opts, &page_handle, &body, &footer));
|
||||
|
||||
// parse and save all (ordinal, pp) from index page
|
||||
IndexPageReader reader;
|
||||
RETURN_IF_ERROR(reader.parse(body, footer.index_page_footer()));
|
||||
|
||||
_num_pages = reader.count();
|
||||
_ordinals.resize(_num_pages + 1);
|
||||
_pages.resize(_num_pages);
|
||||
for (int i = 0; i < _num_pages; i++) {
|
||||
Slice key = reader.get_key(i);
|
||||
ordinal_t ordinal;
|
||||
RETURN_IF_ERROR(KeyCoderTraits<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>::decode_ascending(
|
||||
&key, sizeof(ordinal_t), (uint8_t*) &ordinal, nullptr));
|
||||
|
||||
_ordinals[i] = ordinal;
|
||||
_pages[i] = reader.get_value(i);
|
||||
}
|
||||
_ordinals[_num_pages] = _num_values;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
OrdinalPageIndexIterator OrdinalIndexReader::seek_at_or_before(ordinal_t ordinal) {
|
||||
int32_t left = 0;
|
||||
int32_t right = _num_pages - 1;
|
||||
while (left < right) {
|
||||
int32_t mid = (left + right + 1) / 2;
|
||||
|
||||
if (_rowids[mid] < rid) {
|
||||
if (_ordinals[mid] < ordinal) {
|
||||
left = mid;
|
||||
} else if (_rowids[mid] > rid) {
|
||||
} else if (_ordinals[mid] > ordinal) {
|
||||
right = mid - 1;
|
||||
} else {
|
||||
left = mid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (_rowids[left] > rid) {
|
||||
if (_ordinals[left] > ordinal) {
|
||||
return OrdinalPageIndexIterator(this, _num_pages);
|
||||
}
|
||||
return OrdinalPageIndexIterator(this, left);
|
||||
|
||||
@ -18,154 +18,112 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "gutil/macros.h"
|
||||
#include "olap/rowset/segment_v2/common.h"
|
||||
#include "olap/rowset/segment_v2/index_page.h"
|
||||
#include "olap/rowset/segment_v2/page_pointer.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/slice.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class WritableFile;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
// this class encode ordinal page index
|
||||
// the binary format is like that
|
||||
// Header | Content
|
||||
// Header:
|
||||
// number of pages (4 Bytes)
|
||||
// Content:
|
||||
// array of index_pair
|
||||
// index_pair:
|
||||
// Ordinal (4 Bytes)
|
||||
// PagePointer (8 Bytes)
|
||||
|
||||
static const uint32_t ORDINAL_PAGE_INDEX_HEADER_SIZE = 4;
|
||||
|
||||
class OrdinalPageIndexBuilder {
|
||||
// Ordinal index is implemented by one IndexPage that stores the first value ordinal
|
||||
// and file pointer for each data page.
|
||||
// But if there is only one data page, there is no need for index page. So we store
|
||||
// the file pointer to that data page directly in index meta (OrdinalIndexPB).
|
||||
class OrdinalIndexWriter {
|
||||
public:
|
||||
OrdinalPageIndexBuilder() : _num_pages(0) {
|
||||
_buffer.reserve(4 * 1024);
|
||||
// reserve space for number of pages
|
||||
_buffer.resize(ORDINAL_PAGE_INDEX_HEADER_SIZE);
|
||||
}
|
||||
OrdinalIndexWriter() : _page_builder(new IndexPageBuilder(0, true)) {}
|
||||
|
||||
void append_entry(rowid_t rid, const PagePointer& page) {
|
||||
// rid
|
||||
put_varint32(&_buffer, rid);
|
||||
// page pointer
|
||||
page.encode_to(&_buffer);
|
||||
_num_pages++;
|
||||
}
|
||||
void append_entry(ordinal_t ordinal, const PagePointer& data_pp);
|
||||
|
||||
uint64_t size() {
|
||||
return _buffer.size();
|
||||
}
|
||||
uint64_t size() { return _page_builder->size(); }
|
||||
|
||||
Slice finish() {
|
||||
// encoded number of pages
|
||||
encode_fixed32_le((uint8_t*)_buffer.data(), _num_pages);
|
||||
return Slice(_buffer);
|
||||
}
|
||||
Status finish(WritableFile* file, ColumnIndexMetaPB* meta);
|
||||
|
||||
private:
|
||||
std::string _buffer;
|
||||
uint32_t _num_pages;
|
||||
DISALLOW_COPY_AND_ASSIGN(OrdinalIndexWriter);
|
||||
std::unique_ptr<IndexPageBuilder> _page_builder;
|
||||
PagePointer _last_pp;
|
||||
};
|
||||
|
||||
class OrdinalPageIndex;
|
||||
class OrdinalPageIndexIterator {
|
||||
class OrdinalPageIndexIterator;
|
||||
|
||||
class OrdinalIndexReader {
|
||||
public:
|
||||
OrdinalPageIndexIterator() : _index(nullptr), _cur_idx(-1) { }
|
||||
OrdinalPageIndexIterator(OrdinalPageIndex* index) : _index(index), _cur_idx(0) { }
|
||||
OrdinalPageIndexIterator(OrdinalPageIndex* index, int cur_idx) : _index(index), _cur_idx(cur_idx) { }
|
||||
inline bool valid() const;
|
||||
inline void next();
|
||||
inline rowid_t rowid() const;
|
||||
inline int32_t cur_idx() const;
|
||||
inline const PagePointer& page() const;
|
||||
inline rowid_t cur_page_first_row_id() const;
|
||||
inline rowid_t cur_page_last_row_id() const;
|
||||
private:
|
||||
OrdinalPageIndex* _index;
|
||||
int32_t _cur_idx;
|
||||
};
|
||||
|
||||
// Page index
|
||||
class OrdinalPageIndex {
|
||||
public:
|
||||
OrdinalPageIndex(const Slice& data, uint64_t num_rows)
|
||||
: _data(data), _num_rows(num_rows), _num_pages(0), _rowids(nullptr), _pages(nullptr) {
|
||||
}
|
||||
~OrdinalPageIndex();
|
||||
|
||||
Status load();
|
||||
|
||||
OrdinalPageIndexIterator seek_at_or_before(rowid_t rid);
|
||||
OrdinalPageIndexIterator begin() {
|
||||
return OrdinalPageIndexIterator(this);
|
||||
}
|
||||
OrdinalPageIndexIterator end() {
|
||||
return OrdinalPageIndexIterator(this, _num_pages);
|
||||
}
|
||||
rowid_t get_first_row_id(int page_index) const {
|
||||
return _rowids[page_index];
|
||||
explicit OrdinalIndexReader(const std::string& filename,
|
||||
const OrdinalIndexPB* index_meta,
|
||||
ordinal_t num_values) :
|
||||
_filename(filename), _index_meta(index_meta), _num_values(num_values) {
|
||||
}
|
||||
|
||||
rowid_t get_last_row_id(int page_index) const {
|
||||
// because add additional number of rows as the last rowid
|
||||
// so just return next_page_first_id - 1
|
||||
int next_page_index = page_index + 1;
|
||||
return get_first_row_id(next_page_index) - 1;
|
||||
// load and parse the index page into memory
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
OrdinalPageIndexIterator seek_at_or_before(ordinal_t ordinal);
|
||||
inline OrdinalPageIndexIterator begin();
|
||||
inline OrdinalPageIndexIterator end();
|
||||
ordinal_t get_first_ordinal(int page_index) const {
|
||||
return _ordinals[page_index];
|
||||
}
|
||||
|
||||
int32_t num_pages() const {
|
||||
return _num_pages;
|
||||
ordinal_t get_last_ordinal(int page_index) const {
|
||||
return get_first_ordinal(page_index + 1) - 1;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t _header_size() const { return ORDINAL_PAGE_INDEX_HEADER_SIZE; }
|
||||
// for test
|
||||
int32_t num_data_pages() const { return _num_pages; }
|
||||
|
||||
private:
|
||||
friend OrdinalPageIndexIterator;
|
||||
|
||||
Slice _data;
|
||||
uint64_t _num_rows;
|
||||
std::string _filename;
|
||||
const OrdinalIndexPB* _index_meta;
|
||||
// total number of values (including NULLs) in the indexed column,
|
||||
// equals to 1 + 'last ordinal of last data pages'
|
||||
ordinal_t _num_values;
|
||||
|
||||
// valid after laod
|
||||
int32_t _num_pages;
|
||||
// the last row id is additional, set to number of rows
|
||||
rowid_t* _rowids;
|
||||
PagePointer* _pages;
|
||||
// valid after load
|
||||
int _num_pages = 0;
|
||||
// _ordinals[i] = first ordinal of the i-th data page,
|
||||
std::vector<ordinal_t> _ordinals;
|
||||
// _pages[i] = page pointer to the i-th data page
|
||||
std::vector<PagePointer> _pages;
|
||||
};
|
||||
|
||||
inline bool OrdinalPageIndexIterator::valid() const {
|
||||
return _cur_idx < _index->_num_pages;
|
||||
class OrdinalPageIndexIterator {
|
||||
public:
|
||||
OrdinalPageIndexIterator() : _index(nullptr), _cur_idx(-1) { }
|
||||
OrdinalPageIndexIterator(OrdinalIndexReader* index) : _index(index), _cur_idx(0) { }
|
||||
OrdinalPageIndexIterator(OrdinalIndexReader* index, int cur_idx) : _index(index), _cur_idx(cur_idx) { }
|
||||
bool valid() const { return _cur_idx < _index->_num_pages; }
|
||||
void next() {
|
||||
DCHECK_LT(_cur_idx, _index->_num_pages);
|
||||
_cur_idx++;
|
||||
}
|
||||
int32_t page_index() const { return _cur_idx; };
|
||||
const PagePointer& page() const { return _index->_pages[_cur_idx]; };
|
||||
ordinal_t first_ordinal() const { return _index->get_first_ordinal(_cur_idx); }
|
||||
ordinal_t last_ordinal() const { return _index->get_last_ordinal(_cur_idx); }
|
||||
private:
|
||||
OrdinalIndexReader* _index;
|
||||
int32_t _cur_idx;
|
||||
};
|
||||
|
||||
OrdinalPageIndexIterator OrdinalIndexReader::begin() {
|
||||
return OrdinalPageIndexIterator(this);
|
||||
}
|
||||
|
||||
inline void OrdinalPageIndexIterator::next() {
|
||||
DCHECK_LT(_cur_idx, _index->_num_pages);
|
||||
_cur_idx++;
|
||||
}
|
||||
|
||||
inline rowid_t OrdinalPageIndexIterator::rowid() const {
|
||||
return _index->_rowids[_cur_idx];
|
||||
}
|
||||
|
||||
int32_t OrdinalPageIndexIterator::cur_idx() const {
|
||||
return _cur_idx;
|
||||
}
|
||||
|
||||
inline const PagePointer& OrdinalPageIndexIterator::page() const {
|
||||
return _index->_pages[_cur_idx];
|
||||
}
|
||||
|
||||
rowid_t OrdinalPageIndexIterator::cur_page_first_row_id() const {
|
||||
return _index->get_first_row_id(_cur_idx);
|
||||
}
|
||||
|
||||
rowid_t OrdinalPageIndexIterator::cur_page_last_row_id() const {
|
||||
return _index->get_last_row_id(_cur_idx);
|
||||
OrdinalPageIndexIterator OrdinalIndexReader::end() {
|
||||
return OrdinalPageIndexIterator(this, _num_pages);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,120 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/page_compression.h"
|
||||
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "util/block_compression.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
using strings::Substitute;
|
||||
|
||||
Status PageDecompressor::decompress_to(Slice* uncompressed_data) {
|
||||
if (_data.size < 4) {
|
||||
return Status::Corruption(
|
||||
Substitute("Compressed page's size is too small, size=$0, needed=$1",
|
||||
_data.size, 4));
|
||||
}
|
||||
// decode uncompressed_bytes from footer
|
||||
uint32_t uncompressed_bytes = decode_fixed32_le((uint8_t*)_data.data + _data.size - 4);
|
||||
|
||||
Slice compressed_slice(_data.data, _data.size - 4);
|
||||
if (compressed_slice.size == uncompressed_bytes) {
|
||||
// If compressed_slice's size is equal with _uncompressed_bytes, it means
|
||||
// compressor store this directly without compression. So we just copy
|
||||
// this to buf and return.
|
||||
*uncompressed_data = compressed_slice;
|
||||
return Status::OK();
|
||||
}
|
||||
std::unique_ptr<char[]> buf(new char[uncompressed_bytes]);
|
||||
|
||||
Slice uncompressed_slice(buf.get(), uncompressed_bytes);
|
||||
RETURN_IF_ERROR(_codec->decompress(compressed_slice, &uncompressed_slice));
|
||||
if (uncompressed_slice.size != uncompressed_bytes) {
|
||||
// If size after decompress didn't match recorded size, we think this
|
||||
// page is corrupt.
|
||||
return Status::Corruption(
|
||||
Substitute("Uncompressed size not match, record=$0 vs decompress=$1",
|
||||
uncompressed_bytes, uncompressed_slice.size));
|
||||
}
|
||||
*uncompressed_data = Slice(buf.release(), uncompressed_bytes);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PageCompressor::compress(const std::vector<Slice>& raw_data,
|
||||
std::vector<Slice>* compressed_slices) {
|
||||
size_t uncompressed_bytes = Slice::compute_total_size(raw_data);
|
||||
size_t max_compressed_bytes = _codec->max_compressed_len(uncompressed_bytes);
|
||||
_buf.resize(max_compressed_bytes + 4);
|
||||
Slice compressed_slice(_buf.data(), max_compressed_bytes);
|
||||
RETURN_IF_ERROR(_codec->compress(raw_data, &compressed_slice));
|
||||
|
||||
double space_saving = 1.0 - (double)compressed_slice.size / uncompressed_bytes;
|
||||
if (compressed_slice.size >= uncompressed_bytes || // use integer to make definite
|
||||
space_saving < _min_space_saving) {
|
||||
// If space saving is not higher enough we just copy uncompressed
|
||||
// data to avoid decompression CPU cost
|
||||
for (auto& slice : raw_data) {
|
||||
compressed_slices->push_back(slice);
|
||||
}
|
||||
|
||||
// encode uncompressed_bytes into footer of compressed value
|
||||
encode_fixed32_le((uint8_t*)_buf.data(), uncompressed_bytes);
|
||||
compressed_slices->emplace_back(_buf.data(), 4);
|
||||
return Status::OK();
|
||||
}
|
||||
// encode uncompressed_bytes into footer of compressed value
|
||||
encode_fixed32_le((uint8_t*)_buf.data() + compressed_slice.size, uncompressed_bytes);
|
||||
// return compressed data to client
|
||||
compressed_slices->emplace_back(_buf.data(), 4 + compressed_slice.size);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PageCompressor::compress(const std::vector<Slice>& raw_data,
|
||||
OwnedSlice* compressed_data, bool* compressed) {
|
||||
size_t uncompressed_bytes = Slice::compute_total_size(raw_data);
|
||||
size_t max_compressed_bytes = _codec->max_compressed_len(uncompressed_bytes);
|
||||
_buf.resize(max_compressed_bytes + 4);
|
||||
Slice compression_buffer(_buf.data(), max_compressed_bytes);
|
||||
RETURN_IF_ERROR(_codec->compress(raw_data, &compression_buffer));
|
||||
|
||||
double space_saving = 1.0 - (double)compression_buffer.size / uncompressed_bytes;
|
||||
if (compression_buffer.size >= uncompressed_bytes || // use integer to make definite
|
||||
space_saving < _min_space_saving) {
|
||||
// If space saving is not higher enough we just copy uncompressed
|
||||
// data to avoid decompression CPU cost
|
||||
_buf.resize(0);
|
||||
*compressed_data = _buf.build();
|
||||
*compressed = false;
|
||||
return Status::OK();
|
||||
}
|
||||
// encode uncompressed_bytes into footer of compressed value
|
||||
encode_fixed32_le((uint8_t*)_buf.data() + compression_buffer.size, uncompressed_bytes);
|
||||
// return compressed data to client
|
||||
_buf.resize(compression_buffer.size + 4);
|
||||
*compressed_data = _buf.build();
|
||||
*compressed = true;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -1,103 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "util/slice.h"
|
||||
#include "util/faststring.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class BlockCompressionCodec;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
// Utility class for parsing and decompressing compressed page.
|
||||
// Format of compressed page := Data, UncompressedSize(fixed32)
|
||||
// When sizeof(Data) == UncompressedSize, it means Data is stored in uncompressed
|
||||
// form, thus decompression is not needed.
|
||||
// Otherwise Data is in compressed form and should be decompressed.
|
||||
// The type of compression codec for Data is stored elsewhere and should
|
||||
// be passed into the constructor.
|
||||
// Usage example:
|
||||
// // page_slice refers to page read from storage
|
||||
// PageDecompressor decompressor(page_slice, codec);
|
||||
// // points to decompressed Data of the page (without footer)
|
||||
// Slice uncompressed_slice;
|
||||
// RETURN_IF_ERROR(decompressor.decompress_to(&uncompressed_slice));
|
||||
// // use uncompressed_slice
|
||||
// // we have a new buffer for decompressed page
|
||||
// if (uncompressed_slice.data != page_slice.data) {
|
||||
// delete[] page_slice.data;
|
||||
// }
|
||||
class PageDecompressor {
|
||||
public:
|
||||
PageDecompressor(const Slice& compressed_data, const BlockCompressionCodec* codec)
|
||||
: _data(compressed_data), _codec(codec) {
|
||||
}
|
||||
|
||||
// This client will set uncompress content to uncompressed_data.
|
||||
// In normal case(compressed_data.data != uncompressed_data.data) client should
|
||||
// call delete[] compressed_data.data to free heap memory. However
|
||||
// when the data is not compressed, this function will return compressed_data
|
||||
// directly. In this case compressed_data.data == uncompressed_data.data,
|
||||
// client should not free content.
|
||||
Status decompress_to(Slice* uncompressed_data);
|
||||
private:
|
||||
Slice _data;
|
||||
const BlockCompressionCodec* _codec;
|
||||
};
|
||||
|
||||
// Helper to build a compress page.
|
||||
// Usage:
|
||||
// std::vector<Slice> raw_data;
|
||||
// PageCompressor compressor(codec, 0.1);
|
||||
// std::vector<Slice> compressed_data;
|
||||
// compressor.compress(raw_data, &compressed_data)
|
||||
class PageCompressor {
|
||||
public:
|
||||
PageCompressor(const BlockCompressionCodec* codec, double min_space_saving = 0.1)
|
||||
: _codec(codec), _min_space_saving(min_space_saving) {
|
||||
}
|
||||
|
||||
// Try to compress input raw data into compressed page
|
||||
// according given BlockCompressionCodec. If compressed page is not
|
||||
// smaller enough than raw data, this class will return uncompressed data.
|
||||
Status compress(const std::vector<Slice>& raw_data,
|
||||
std::vector<Slice>* compressed_data);
|
||||
|
||||
// Try to compress input raw data into compressed page by returning OwnedSlice
|
||||
// according given BlockCompressionCodec. If compressed page is not
|
||||
// smaller enough than raw data, this class will return uncompressed data.
|
||||
Status compress(const std::vector<Slice>& raw_data,
|
||||
OwnedSlice* compressed_data, bool* compressed);
|
||||
private:
|
||||
const BlockCompressionCodec* _codec;
|
||||
|
||||
// If space saving is lower than _min_space_saving, compress will return origin data
|
||||
double _min_space_saving;
|
||||
|
||||
// used to store compressed data
|
||||
faststring _buf;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
@ -18,7 +18,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "olap/column_block.h" // for ColumnBlockView
|
||||
#include "olap/rowset/segment_v2/common.h" // for rowid_t
|
||||
#include "common/status.h" // for Status
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -65,7 +65,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// This function only valid when assign valid data, either in cache or not
|
||||
// the return slice contains uncompressed page body, page footer, and footer size
|
||||
Slice data() const {
|
||||
if (_is_data_owner) {
|
||||
return _data;
|
||||
|
||||
208
be/src/olap/rowset/segment_v2/page_io.cpp
Normal file
208
be/src/olap/rowset/segment_v2/page_io.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "env/env.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "olap/page_cache.h"
|
||||
#include "util/block_compression.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/crc32c.h"
|
||||
#include "util/faststring.h"
|
||||
#include "util/runtime_profile.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
using strings::Substitute;
|
||||
|
||||
Status PageIO::compress_page_body(const BlockCompressionCodec* codec,
|
||||
double min_space_saving,
|
||||
const std::vector<Slice>& body,
|
||||
OwnedSlice* compressed_body) {
|
||||
size_t uncompressed_size = Slice::compute_total_size(body);
|
||||
if (codec != nullptr && uncompressed_size > 0) {
|
||||
size_t max_compressed_size = codec->max_compressed_len(uncompressed_size);
|
||||
faststring buf;
|
||||
buf.resize(max_compressed_size);
|
||||
Slice compressed_slice(buf);
|
||||
RETURN_IF_ERROR(codec->compress(body, &compressed_slice));
|
||||
buf.resize(compressed_slice.get_size());
|
||||
|
||||
double space_saving = 1.0 - static_cast<double>(buf.size()) / uncompressed_size;
|
||||
// return compressed body only when it saves more than min_space_saving
|
||||
if (space_saving > 0 && space_saving >= min_space_saving) {
|
||||
*compressed_body = buf.build();
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
// otherwise, do not compress
|
||||
OwnedSlice empty;
|
||||
*compressed_body = std::move(empty);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PageIO::write_page(WritableFile* file,
|
||||
const std::vector<Slice>& body,
|
||||
const PageFooterPB& footer,
|
||||
PagePointer* result) {
|
||||
// sanity check of page footer
|
||||
CHECK(footer.has_type()) << "type must be set";
|
||||
CHECK(footer.has_uncompressed_size()) << "uncompressed_size must be set";
|
||||
switch (footer.type()) {
|
||||
case DATA_PAGE:
|
||||
CHECK(footer.has_data_page_footer());
|
||||
break;
|
||||
case INDEX_PAGE:
|
||||
CHECK(footer.has_index_page_footer());
|
||||
break;
|
||||
case DICTIONARY_PAGE:
|
||||
CHECK(footer.has_dict_page_footer());
|
||||
break;
|
||||
case SHORT_KEY_PAGE:
|
||||
CHECK(footer.has_short_key_page_footer());
|
||||
break;
|
||||
default:
|
||||
CHECK(false) << "Invalid page footer type: " << footer.type();
|
||||
break;
|
||||
}
|
||||
|
||||
std::string footer_buf; // serialized footer + footer size
|
||||
footer.SerializeToString(&footer_buf);
|
||||
put_fixed32_le(&footer_buf, static_cast<uint32_t>(footer_buf.size()));
|
||||
|
||||
std::vector<Slice> page = body;
|
||||
page.emplace_back(footer_buf);
|
||||
|
||||
// checksum
|
||||
uint8_t checksum_buf[sizeof(uint32_t)];
|
||||
uint32_t checksum = crc32c::Value(page);
|
||||
encode_fixed32_le(checksum_buf, checksum);
|
||||
page.emplace_back(checksum_buf, sizeof(uint32_t));
|
||||
|
||||
uint64_t offset = file->size();
|
||||
RETURN_IF_ERROR(file->appendv(&page[0], page.size()));
|
||||
|
||||
result->offset = offset;
|
||||
result->size = file->size() - offset;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PageIO::read_and_decompress_page(const PageReadOptions& opts,
|
||||
PageHandle* handle,
|
||||
Slice* body,
|
||||
PageFooterPB* footer) {
|
||||
opts.sanity_check();
|
||||
opts.stats->total_pages_num++;
|
||||
|
||||
auto cache = StoragePageCache::instance();
|
||||
PageCacheHandle cache_handle;
|
||||
StoragePageCache::CacheKey cache_key(opts.file->file_name(), opts.page_pointer.offset);
|
||||
if (opts.use_page_cache && cache->lookup(cache_key, &cache_handle)) {
|
||||
// we find page in cache, use it
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
opts.stats->cached_pages_num++;
|
||||
// parse body and footer
|
||||
Slice page_slice = handle->data();
|
||||
uint32_t footer_size = decode_fixed32_le((uint8_t*) page_slice.data + page_slice.size - 4);
|
||||
std::string footer_buf(page_slice.data + page_slice.size - 4 - footer_size, footer_size);
|
||||
if (!footer->ParseFromString(footer_buf)) {
|
||||
return Status::Corruption("Bad page: invalid footer");
|
||||
}
|
||||
*body = Slice(page_slice.data, page_slice.size - 4 - footer_size);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// every page contains 4 bytes footer length and 4 bytes checksum
|
||||
const uint32_t page_size = opts.page_pointer.size;
|
||||
if (page_size < 8) {
|
||||
return Status::Corruption(Substitute("Bad page: too small size ($0)", page_size));
|
||||
}
|
||||
|
||||
// hold compressed page at first, reset to decompressed page later
|
||||
std::unique_ptr<char[]> page(new char[page_size]);
|
||||
Slice page_slice(page.get(), page_size);
|
||||
{
|
||||
SCOPED_RAW_TIMER(&opts.stats->io_ns);
|
||||
RETURN_IF_ERROR(opts.file->read_at(opts.page_pointer.offset, page_slice));
|
||||
opts.stats->compressed_bytes_read += page_size;
|
||||
}
|
||||
|
||||
if (opts.verify_checksum) {
|
||||
uint32_t expect = decode_fixed32_le((uint8_t*) page_slice.data + page_slice.size - 4);
|
||||
uint32_t actual = crc32c::Value(page_slice.data, page_slice.size - 4);
|
||||
if (expect != actual) {
|
||||
return Status::Corruption(Substitute(
|
||||
"Bad page: checksum mismatch (actual=$0 vs expect=$1)", actual, expect));
|
||||
}
|
||||
}
|
||||
|
||||
// remove checksum suffix
|
||||
page_slice.size -= 4;
|
||||
// parse and set footer
|
||||
uint32_t footer_size = decode_fixed32_le((uint8_t*) page_slice.data + page_slice.size - 4);
|
||||
if (!footer->ParseFromArray(page_slice.data + page_slice.size - 4 - footer_size, footer_size)) {
|
||||
return Status::Corruption("Bad page: invalid footer");
|
||||
}
|
||||
|
||||
uint32_t body_size = page_slice.size - 4 - footer_size;
|
||||
if (body_size != footer->uncompressed_size()) { // need decompress body
|
||||
if (opts.codec == nullptr) {
|
||||
return Status::Corruption("Bad page: page is compressed but codec is NO_COMPRESSION");
|
||||
}
|
||||
SCOPED_RAW_TIMER(&opts.stats->decompress_ns);
|
||||
std::unique_ptr<char[]> decompressed_page(
|
||||
new char[footer->uncompressed_size() + footer_size + 4]);
|
||||
|
||||
// decompress page body
|
||||
Slice compressed_body(page_slice.data, body_size);
|
||||
Slice decompressed_body(decompressed_page.get(), footer->uncompressed_size());
|
||||
RETURN_IF_ERROR(opts.codec->decompress(compressed_body, &decompressed_body));
|
||||
if (decompressed_body.size != footer->uncompressed_size()) {
|
||||
return Status::Corruption(Substitute(
|
||||
"Bad page: record uncompressed size=$0 vs real decompressed size=$1",
|
||||
footer->uncompressed_size(), decompressed_body.size));
|
||||
}
|
||||
// append footer and footer size
|
||||
memcpy(decompressed_body.data + decompressed_body.size,
|
||||
page_slice.data + body_size,
|
||||
footer_size + 4);
|
||||
// free memory of compressed page
|
||||
page = std::move(decompressed_page);
|
||||
page_slice = Slice(page.get(), footer->uncompressed_size() + footer_size + 4);
|
||||
opts.stats->uncompressed_bytes_read += page_slice.size;
|
||||
}
|
||||
|
||||
*body = Slice(page_slice.data, page_slice.size - 4 - footer_size);
|
||||
if (opts.use_page_cache) {
|
||||
// insert this page into cache and return the cache handle
|
||||
cache->insert(cache_key, page_slice, &cache_handle, opts.kept_in_memory);
|
||||
*handle = PageHandle(std::move(cache_handle));
|
||||
} else {
|
||||
*handle = PageHandle(page_slice);
|
||||
}
|
||||
page.release(); // memory now managed by handle
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
116
be/src/olap/rowset/segment_v2/page_io.h
Normal file
116
be/src/olap/rowset/segment_v2/page_io.h
Normal file
@ -0,0 +1,116 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/segment_v2.pb.h"
|
||||
#include "olap/rowset/segment_v2/page_handle.h"
|
||||
#include "olap/rowset/segment_v2/page_pointer.h"
|
||||
#include "util/slice.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class BlockCompressionCodec;
|
||||
struct OlapReaderStatistics;
|
||||
class RandomAccessFile;
|
||||
class WritableFile;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
struct PageReadOptions {
|
||||
// file to read page
|
||||
RandomAccessFile* file = nullptr;
|
||||
// location of the page
|
||||
PagePointer page_pointer;
|
||||
// decompressor for page body (null means page body is not compressed)
|
||||
const BlockCompressionCodec* codec = nullptr;
|
||||
// used to collect IO metrics
|
||||
OlapReaderStatistics* stats = nullptr;
|
||||
// whether to verify page checksum
|
||||
bool verify_checksum = true;
|
||||
// whether to use page cache in read path
|
||||
bool use_page_cache = true;
|
||||
// if true, use DURABLE CachePriority in page cache
|
||||
// currently used for in memory olap table
|
||||
bool kept_in_memory = false;
|
||||
|
||||
void sanity_check() const {
|
||||
CHECK_NOTNULL(file);
|
||||
CHECK_NOTNULL(stats);
|
||||
}
|
||||
};
|
||||
|
||||
// Utility class for read and write page. All types of page share the same general layout:
|
||||
// Page := PageBody, PageFooter, FooterSize(4), Checksum(4)
|
||||
// - PageBody is defined by page type and may be compressed
|
||||
// - PageFooter is serialized PageFooterPB. It contains page_type, uncompressed_body_size,
|
||||
// and other custom metadata. PageBody is not compressed when its size is equal to
|
||||
// uncompressed_body_size
|
||||
// - FooterSize stores the size of PageFooter
|
||||
// - Checksum is the crc32c checksum of all previous part
|
||||
class PageIO {
|
||||
public:
|
||||
|
||||
// Compress `body' using `codec' into `compressed_body'.
|
||||
// The size of returned `compressed_body' is 0 when the body is not compressed, this
|
||||
// could happen when `codec' is null or space saving is less than `min_space_saving'.
|
||||
static Status compress_page_body(const BlockCompressionCodec* codec,
|
||||
double min_space_saving,
|
||||
const std::vector<Slice>& body,
|
||||
OwnedSlice* compressed_body);
|
||||
|
||||
// Encode page from `body' and `footer' and write to `file'.
|
||||
// `body' could be either uncompressed or compressed.
|
||||
// On success, the file pointer to the written page is stored in `result'.
|
||||
static Status write_page(WritableFile* file,
|
||||
const std::vector<Slice>& body,
|
||||
const PageFooterPB& footer,
|
||||
PagePointer* result);
|
||||
|
||||
// Convenient function to compress page body and write page in one go.
|
||||
static Status compress_and_write_page(const BlockCompressionCodec* codec,
|
||||
double min_space_saving,
|
||||
WritableFile* file,
|
||||
const std::vector<Slice>& body,
|
||||
const PageFooterPB& footer,
|
||||
PagePointer* result) {
|
||||
DCHECK_EQ(footer.uncompressed_size(), Slice::compute_total_size(body));
|
||||
OwnedSlice compressed_body;
|
||||
RETURN_IF_ERROR(compress_page_body(codec, min_space_saving, body, &compressed_body));
|
||||
if (compressed_body.slice().empty()) { // uncompressed
|
||||
return write_page(file, body, footer, result);
|
||||
}
|
||||
return write_page(file, { compressed_body.slice() }, footer, result);
|
||||
}
|
||||
|
||||
// Read and parse a page according to `opts'.
|
||||
// On success
|
||||
// `handle' holds the memory of page data,
|
||||
// `body' points to page body,
|
||||
// `footer' stores the page footer.
|
||||
static Status read_and_decompress_page(const PageReadOptions& opts,
|
||||
PageHandle* handle,
|
||||
Slice* body,
|
||||
PageFooterPB* footer);
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
@ -17,47 +17,88 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "olap/rowset/segment_v2/page_decoder.h" // for PagePointer
|
||||
#include "util/rle_encoding.h" // for RleDecoder
|
||||
#include <memory>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/segment_v2.pb.h"
|
||||
#include "olap/rowset/segment_v2/common.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
#include "olap/rowset/segment_v2/options.h"
|
||||
#include "olap/rowset/segment_v2/page_decoder.h"
|
||||
#include "olap/rowset/segment_v2/page_handle.h"
|
||||
#include "util/rle_encoding.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class PageHandle;
|
||||
struct PagePointer;
|
||||
|
||||
// This contains information when one page is loaded, and ready for read
|
||||
// This struct can be reused, client should call reset first before reusing
|
||||
// this object
|
||||
struct ParsedPage {
|
||||
ParsedPage() { }
|
||||
|
||||
static Status create(PageHandle handle,
|
||||
const Slice& body,
|
||||
const DataPageFooterPB& footer,
|
||||
const EncodingInfo* encoding,
|
||||
const PagePointer& page_pointer,
|
||||
uint32_t page_index,
|
||||
std::unique_ptr<ParsedPage>* result) {
|
||||
std::unique_ptr<ParsedPage> page(new ParsedPage);
|
||||
page->page_handle = std::move(handle);
|
||||
|
||||
auto null_size = footer.nullmap_size();
|
||||
page->has_null = null_size > 0;
|
||||
page->null_bitmap = Slice(body.data + body.size - null_size, null_size);
|
||||
|
||||
if (page->has_null) {
|
||||
page->null_decoder = RleDecoder<bool>(
|
||||
(const uint8_t*) page->null_bitmap.data, null_size, 1);
|
||||
}
|
||||
|
||||
Slice data_slice(body.data, body.size - null_size);
|
||||
PageDecoderOptions opts;
|
||||
RETURN_IF_ERROR(encoding->create_page_decoder(data_slice, opts, &page->data_decoder));
|
||||
RETURN_IF_ERROR(page->data_decoder->init());
|
||||
|
||||
page->first_ordinal = footer.first_ordinal();
|
||||
page->num_rows = footer.num_values();
|
||||
page->page_pointer = page_pointer;
|
||||
page->page_index = page_index;
|
||||
|
||||
*result = std::move(page);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
~ParsedPage() {
|
||||
delete data_decoder;
|
||||
}
|
||||
|
||||
PagePointer page_pointer;
|
||||
PageHandle page_handle;
|
||||
|
||||
bool has_null;
|
||||
Slice null_bitmap;
|
||||
RleDecoder<bool> null_decoder;
|
||||
PageDecoder* data_decoder = nullptr;
|
||||
|
||||
// first rowid for this page
|
||||
rowid_t first_rowid = 0;
|
||||
|
||||
// ordinal of the first value in this page
|
||||
ordinal_t first_ordinal = 0;
|
||||
// number of rows including nulls and not-nulls
|
||||
uint32_t num_rows = 0;
|
||||
ordinal_t num_rows = 0;
|
||||
|
||||
PagePointer page_pointer;
|
||||
uint32_t page_index = 0;
|
||||
|
||||
// current offset when read this page
|
||||
// this means next row we will read
|
||||
uint32_t offset_in_page = 0;
|
||||
ordinal_t offset_in_page = 0;
|
||||
|
||||
uint32_t page_index = 0;
|
||||
|
||||
bool contains(rowid_t rid) { return rid >= first_rowid && rid < (first_rowid + num_rows); }
|
||||
rowid_t last_rowid() { return first_rowid + num_rows - 1; }
|
||||
bool contains(ordinal_t ord) { return ord >= first_ordinal && ord < (first_ordinal + num_rows); }
|
||||
bool has_remaining() const { return offset_in_page < num_rows; }
|
||||
size_t remaining() const { return num_rows - offset_in_page; }
|
||||
|
||||
private:
|
||||
// client should use create() factory method
|
||||
ParsedPage() = default;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "olap/rowset/segment_v2/common.h" // for rowid_t
|
||||
#include "olap/rowset/segment_v2/options.h" // for PageBuilderOptions/PageDecoderOptions
|
||||
#include "olap/rowset/segment_v2/page_builder.h" // for PageBuilder
|
||||
#include "olap/rowset/segment_v2/page_decoder.h" // for PageDecoder
|
||||
|
||||
@ -21,6 +21,7 @@
|
||||
#include "env/env.h" // RandomAccessFile
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "olap/rowset/segment_v2/column_reader.h" // ColumnReader
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "olap/rowset/segment_v2/segment_writer.h" // k_segment_magic_length
|
||||
#include "olap/rowset/segment_v2/segment_iterator.h"
|
||||
#include "olap/rowset/segment_v2/empty_segment_iterator.h"
|
||||
@ -68,41 +69,10 @@ Status Segment::new_iterator(const Schema& schema,
|
||||
if (read_options.conditions != nullptr) {
|
||||
for (auto& column_condition : read_options.conditions->columns()) {
|
||||
int32_t column_id = column_condition.first;
|
||||
auto entry = _column_id_to_footer_ordinal.find(column_id);
|
||||
if (entry == _column_id_to_footer_ordinal.end()) {
|
||||
if (_column_readers[column_id] == nullptr || !_column_readers[column_id]->has_zone_map()) {
|
||||
continue;
|
||||
}
|
||||
auto& c_meta = _footer.columns(entry->second);
|
||||
if (!c_meta.has_zone_map()) {
|
||||
continue;
|
||||
}
|
||||
auto& c_zone_map = c_meta.zone_map();
|
||||
if (!c_zone_map.has_not_null() && !c_zone_map.has_null()) {
|
||||
// no data
|
||||
iter->reset(new EmptySegmentIterator(schema));
|
||||
return Status::OK();
|
||||
}
|
||||
// TODO Logic here and the similar logic in ColumnReader::_get_filtered_pages should be unified.
|
||||
TypeInfo* type_info = get_type_info((FieldType)c_meta.type());
|
||||
if (type_info == nullptr) {
|
||||
return Status::NotSupported(Substitute("unsupported typeinfo, type=$0", c_meta.type()));
|
||||
}
|
||||
FieldType type = type_info->type();
|
||||
const Field* field = schema.column(column_id);
|
||||
int32_t var_length = field->length();
|
||||
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, var_length));
|
||||
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, var_length));
|
||||
if (c_zone_map.has_not_null()) {
|
||||
min_value->from_string(c_zone_map.min());
|
||||
max_value->from_string(c_zone_map.max());
|
||||
}
|
||||
if (c_zone_map.has_null()) {
|
||||
min_value->set_null();
|
||||
if (!c_zone_map.has_not_null()) {
|
||||
max_value->set_null();
|
||||
}
|
||||
}
|
||||
if (!column_condition.second->eval({min_value.get(), max_value.get()})) {
|
||||
if (!_column_readers[column_id]->match_condition(column_condition.second)) {
|
||||
// any condition not satisfied, return.
|
||||
iter->reset(new EmptySegmentIterator(schema));
|
||||
return Status::OK();
|
||||
@ -164,18 +134,25 @@ Status Segment::_parse_footer() {
|
||||
|
||||
Status Segment::_load_index() {
|
||||
return _load_index_once.call([this] {
|
||||
// read short key index content
|
||||
// read and parse short key index page
|
||||
OpenedFileHandle<RandomAccessFile> file_handle;
|
||||
RETURN_IF_ERROR(FileManager::instance()->open_file(_fname, &file_handle));
|
||||
RandomAccessFile* input_file = file_handle.file();
|
||||
_sk_index_buf.resize(_footer.short_key_index_page().size());
|
||||
Slice slice(_sk_index_buf.data(), _sk_index_buf.size());
|
||||
RETURN_IF_ERROR(input_file->read_at(_footer.short_key_index_page().offset(), slice));
|
||||
|
||||
// Parse short key index
|
||||
_sk_index_decoder.reset(new ShortKeyIndexDecoder(_sk_index_buf));
|
||||
RETURN_IF_ERROR(_sk_index_decoder->parse());
|
||||
return Status::OK();
|
||||
PageReadOptions opts;
|
||||
opts.file = file_handle.file();
|
||||
opts.page_pointer = PagePointer(_footer.short_key_index_page());
|
||||
opts.codec = nullptr; // short key index page uses NO_COMPRESSION for now
|
||||
OlapReaderStatistics tmp_stats;
|
||||
opts.stats = &tmp_stats;
|
||||
|
||||
Slice body;
|
||||
PageFooterPB footer;
|
||||
RETURN_IF_ERROR(PageIO::read_and_decompress_page(opts, &_sk_index_handle, &body, &footer));
|
||||
DCHECK_EQ(footer.type(), SHORT_KEY_PAGE);
|
||||
DCHECK(footer.has_short_key_page_footer());
|
||||
|
||||
_sk_index_decoder.reset(new ShortKeyIndexDecoder);
|
||||
return _sk_index_decoder->parse(body, footer.short_key_page_footer());
|
||||
});
|
||||
}
|
||||
|
||||
@ -194,7 +171,7 @@ Status Segment::_create_column_readers() {
|
||||
}
|
||||
|
||||
ColumnReaderOptions opts;
|
||||
opts.cache_in_memory = _tablet_schema->is_in_memory();
|
||||
opts.kept_in_memory = _tablet_schema->is_in_memory();
|
||||
std::unique_ptr<ColumnReader> reader;
|
||||
// pass Descriptor<RandomAccessFile>* to column reader
|
||||
RETURN_IF_ERROR(ColumnReader::create(
|
||||
|
||||
@ -26,7 +26,7 @@
|
||||
#include "gen_cpp/segment_v2.pb.h"
|
||||
#include "gutil/macros.h"
|
||||
#include "olap/iterators.h"
|
||||
#include "olap/rowset/segment_v2/common.h" // rowid_t
|
||||
#include "olap/rowset/segment_v2/page_handle.h"
|
||||
#include "olap/short_key_index.h"
|
||||
#include "olap/tablet_schema.h"
|
||||
#include "util/faststring.h"
|
||||
@ -141,8 +141,8 @@ private:
|
||||
|
||||
// used to guarantee that short key index will be loaded at most once in a thread-safe way
|
||||
DorisCallOnce<Status> _load_index_once;
|
||||
// used to store short key index
|
||||
faststring _sk_index_buf;
|
||||
// used to hold short key index page in memory
|
||||
PageHandle _sk_index_handle;
|
||||
// short key index decoder
|
||||
std::unique_ptr<ShortKeyIndexDecoder> _sk_index_decoder;
|
||||
};
|
||||
|
||||
@ -27,8 +27,6 @@
|
||||
#include "olap/rowset/segment_v2/segment.h"
|
||||
#include "olap/schema.h"
|
||||
#include "olap/rowset/segment_v2/row_ranges.h"
|
||||
#include "olap/rowset/segment_v2/column_zone_map.h"
|
||||
#include "olap/rowset/segment_v2/ordinal_page_index.h"
|
||||
#include "olap/olap_cond.h"
|
||||
#include "util/file_cache.h"
|
||||
|
||||
|
||||
@ -19,9 +19,9 @@
|
||||
|
||||
#include "env/env.h" // Env
|
||||
#include "olap/row.h" // ContiguousRow
|
||||
#include "olap/row_block.h" // RowBlock
|
||||
#include "olap/row_cursor.h" // RowCursor
|
||||
#include "olap/rowset/segment_v2/column_writer.h" // ColumnWriter
|
||||
#include "olap/rowset/segment_v2/page_io.h"
|
||||
#include "olap/short_key_index.h"
|
||||
#include "util/crc32c.h"
|
||||
|
||||
@ -48,16 +48,20 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) {
|
||||
|
||||
uint32_t column_id = 0;
|
||||
for (auto& column : _tablet_schema->columns()) {
|
||||
ColumnMetaPB* column_meta = _footer.add_columns();
|
||||
// TODO(zc): Do we need this column_id??
|
||||
column_meta->set_column_id(column_id++);
|
||||
column_meta->set_unique_id(column.unique_id());
|
||||
bool is_nullable = column.is_nullable();
|
||||
column_meta->set_is_nullable(is_nullable);
|
||||
column_meta->set_length(column.length());
|
||||
std::unique_ptr<Field> field(FieldFactory::create(column));
|
||||
DCHECK(field.get() != nullptr);
|
||||
|
||||
ColumnWriterOptions opts;
|
||||
opts.compression_type = segment_v2::CompressionTypePB::LZ4F;
|
||||
opts.meta = _footer.add_columns();
|
||||
// TODO(zc): Do we need this column_id??
|
||||
opts.meta->set_column_id(column_id++);
|
||||
opts.meta->set_unique_id(column.unique_id());
|
||||
opts.meta->set_type(field->type());
|
||||
opts.meta->set_length(column.length());
|
||||
opts.meta->set_encoding(DEFAULT_ENCODING);
|
||||
opts.meta->set_compression(LZ4F);
|
||||
opts.meta->set_is_nullable(column.is_nullable());
|
||||
|
||||
// now we create zone map for key columns
|
||||
if (column.is_key()) {
|
||||
opts.need_zone_map = true;
|
||||
@ -85,9 +89,8 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) {
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Field> field(FieldFactory::create(column));
|
||||
DCHECK(field.get() != nullptr);
|
||||
std::unique_ptr<ColumnWriter> writer(new ColumnWriter(opts, std::move(field), is_nullable, _output_file.get()));
|
||||
std::unique_ptr<ColumnWriter> writer(
|
||||
new ColumnWriter(opts, std::move(field), _output_file.get()));
|
||||
RETURN_IF_ERROR(writer->init());
|
||||
_column_writers.push_back(std::move(writer));
|
||||
}
|
||||
@ -179,25 +182,18 @@ Status SegmentWriter::_write_bloom_filter_index() {
|
||||
}
|
||||
|
||||
Status SegmentWriter::_write_short_key_index() {
|
||||
std::vector<Slice> slices;
|
||||
// TODO(zc): we should get segment_size
|
||||
RETURN_IF_ERROR(_index_builder->finalize(_row_count * 100, _row_count, &slices));
|
||||
|
||||
uint64_t offset = _output_file->size();
|
||||
RETURN_IF_ERROR(_write_raw_data(slices));
|
||||
uint32_t written_bytes = _output_file->size() - offset;
|
||||
|
||||
_footer.mutable_short_key_index_page()->set_offset(offset);
|
||||
_footer.mutable_short_key_index_page()->set_size(written_bytes);
|
||||
std::vector<Slice> body;
|
||||
PageFooterPB footer;
|
||||
RETURN_IF_ERROR(_index_builder->finalize(_row_count, &body, &footer));
|
||||
PagePointer pp;
|
||||
// short key index page is not compressed right now
|
||||
RETURN_IF_ERROR(PageIO::write_page(_output_file.get(), body, footer, &pp));
|
||||
pp.to_proto(_footer.mutable_short_key_index_page());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentWriter::_write_footer() {
|
||||
_footer.set_num_rows(_row_count);
|
||||
// collect all
|
||||
for (int i = 0; i < _column_writers.size(); ++i) {
|
||||
_column_writers[i]->write_meta(_footer.mutable_columns(i));
|
||||
}
|
||||
|
||||
// Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4)
|
||||
std::string footer_buf;
|
||||
|
||||
@ -67,12 +67,6 @@ public:
|
||||
|
||||
Status finalize(uint64_t* segment_file_size, uint64_t* index_size);
|
||||
|
||||
// for ut
|
||||
// this function should be called after finalize
|
||||
bool has_bf_index(uint32_t col_id) const {
|
||||
return _footer.columns(col_id).has_bloom_filter_index();
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(SegmentWriter);
|
||||
Status _write_data();
|
||||
|
||||
142
be/src/olap/rowset/segment_v2/zone_map_index.cpp
Normal file
142
be/src/olap/rowset/segment_v2/zone_map_index.cpp
Normal file
@ -0,0 +1,142 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/zone_map_index.h"
|
||||
|
||||
#include "olap/column_block.h"
|
||||
#include "olap/olap_define.h"
|
||||
#include "olap/rowset/segment_v2/encoding_info.h"
|
||||
#include "olap/rowset/segment_v2/indexed_column_reader.h"
|
||||
#include "olap/rowset/segment_v2/indexed_column_writer.h"
|
||||
#include "olap/types.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
ZoneMapIndexWriter::ZoneMapIndexWriter(Field* field) : _field(field), _pool(&_tracker) {
|
||||
_page_zone_map.min_value = _field->allocate_value(&_pool);
|
||||
_page_zone_map.max_value = _field->allocate_value(&_pool);
|
||||
_reset_zone_map(&_page_zone_map);
|
||||
_segment_zone_map.min_value = _field->allocate_value(&_pool);
|
||||
_segment_zone_map.max_value = _field->allocate_value(&_pool);
|
||||
_reset_zone_map(&_segment_zone_map);
|
||||
}
|
||||
|
||||
void ZoneMapIndexWriter::add_values(const void* values, size_t count) {
|
||||
if (count > 0) {
|
||||
_page_zone_map.has_not_null = true;
|
||||
}
|
||||
const char* vals = reinterpret_cast<const char*>(values);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (_field->compare(_page_zone_map.min_value, vals) > 0) {
|
||||
_field->type_info()->direct_copy(_page_zone_map.min_value, vals);
|
||||
}
|
||||
if (_field->compare(_page_zone_map.max_value, vals) < 0) {
|
||||
_field->type_info()->direct_copy(_page_zone_map.max_value, vals);
|
||||
}
|
||||
vals += _field->size();
|
||||
}
|
||||
}
|
||||
|
||||
Status ZoneMapIndexWriter::flush() {
|
||||
// Update segment zone map.
|
||||
if (_field->compare(_segment_zone_map.min_value, _page_zone_map.min_value) > 0) {
|
||||
_field->type_info()->direct_copy(_segment_zone_map.min_value, _page_zone_map.min_value);
|
||||
}
|
||||
if (_field->compare(_segment_zone_map.max_value, _page_zone_map.max_value) < 0) {
|
||||
_field->type_info()->direct_copy(_segment_zone_map.max_value, _page_zone_map.max_value);
|
||||
}
|
||||
if (_page_zone_map.has_null) {
|
||||
_segment_zone_map.has_null = true;
|
||||
}
|
||||
if (_page_zone_map.has_not_null) {
|
||||
_segment_zone_map.has_not_null = true;
|
||||
}
|
||||
|
||||
ZoneMapPB zone_map_pb;
|
||||
_page_zone_map.to_proto(&zone_map_pb, _field);
|
||||
_reset_zone_map(&_page_zone_map);
|
||||
|
||||
std::string serialized_zone_map;
|
||||
bool ret = zone_map_pb.SerializeToString(&serialized_zone_map);
|
||||
if (!ret) {
|
||||
return Status::InternalError("serialize zone map failed");
|
||||
}
|
||||
_estimated_size += serialized_zone_map.size() + sizeof(uint32_t);
|
||||
_values.push_back(std::move(serialized_zone_map));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ZoneMapIndexWriter::finish(WritableFile* file, ColumnIndexMetaPB* index_meta) {
|
||||
index_meta->set_type(ZONE_MAP_INDEX);
|
||||
ZoneMapIndexPB* meta = index_meta->mutable_zone_map_index();
|
||||
// store segment zone map
|
||||
_segment_zone_map.to_proto(meta->mutable_segment_zone_map(), _field);
|
||||
|
||||
// write out zone map for each data pages
|
||||
const TypeInfo* typeinfo = get_type_info(OLAP_FIELD_TYPE_OBJECT);
|
||||
IndexedColumnWriterOptions options;
|
||||
options.write_ordinal_index = true;
|
||||
options.write_value_index = false;
|
||||
options.encoding = EncodingInfo::get_default_encoding(typeinfo, false);
|
||||
options.compression = NO_COMPRESSION; // currently not compressed
|
||||
|
||||
IndexedColumnWriter writer(options, typeinfo, file);
|
||||
RETURN_IF_ERROR(writer.init());
|
||||
|
||||
for (auto& value : _values) {
|
||||
Slice value_slice(value);
|
||||
RETURN_IF_ERROR(writer.add(&value_slice));
|
||||
}
|
||||
return writer.finish(meta->mutable_page_zone_maps());
|
||||
}
|
||||
|
||||
Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
|
||||
IndexedColumnReader reader(_filename, _index_meta->page_zone_maps());
|
||||
RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory));
|
||||
IndexedColumnIterator iter(&reader);
|
||||
|
||||
MemTracker tracker;
|
||||
MemPool pool(&tracker);
|
||||
_page_zone_maps.resize(reader.num_values());
|
||||
|
||||
// read and cache all page zone maps
|
||||
for (int i = 0; i < reader.num_values(); ++i) {
|
||||
Slice value;
|
||||
uint8_t nullmap;
|
||||
size_t num_to_read = 1;
|
||||
ColumnBlock block(reader.type_info(), (uint8_t*) &value, &nullmap, num_to_read, &pool);
|
||||
ColumnBlockView column_block_view(&block);
|
||||
|
||||
RETURN_IF_ERROR(iter.seek_to_ordinal(i));
|
||||
size_t num_read = num_to_read;
|
||||
RETURN_IF_ERROR(iter.next_batch(&num_read, &column_block_view));
|
||||
DCHECK(num_to_read == num_read);
|
||||
|
||||
if (!_page_zone_maps[i].ParseFromArray(value.data, value.size)) {
|
||||
return Status::Corruption("Failed to parse zone map");
|
||||
}
|
||||
pool.clear();
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
@ -17,8 +17,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "util/slice.h"
|
||||
@ -30,6 +31,8 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
class WritableFile;
|
||||
|
||||
namespace segment_v2 {
|
||||
|
||||
struct ZoneMap {
|
||||
@ -46,66 +49,77 @@ struct ZoneMap {
|
||||
bool has_null = false;
|
||||
// has_not_null means whether zone has none-null value
|
||||
bool has_not_null = false;
|
||||
|
||||
void to_proto(ZoneMapPB* dst, Field* field) {
|
||||
dst->set_min(field->to_string(min_value));
|
||||
dst->set_max(field->to_string(max_value));
|
||||
dst->set_has_null(has_null);
|
||||
dst->set_has_not_null(has_not_null);
|
||||
}
|
||||
};
|
||||
|
||||
// This class encode column pages' zone map.
|
||||
// The binary is encoded by BinaryPlainPageBuilder
|
||||
class ColumnZoneMapBuilder {
|
||||
// Zone map index is represented by an IndexedColumn with ordinal index.
|
||||
// The IndexedColumn stores serialized ZoneMapPB for each data page.
|
||||
// It also create and store the segment-level zone map in the index meta so that
|
||||
// reader can prune an entire segment without reading pages.
|
||||
class ZoneMapIndexWriter {
|
||||
public:
|
||||
ColumnZoneMapBuilder(Field* field);
|
||||
explicit ZoneMapIndexWriter(Field* field);
|
||||
|
||||
Status add(const uint8_t* vals, size_t count);
|
||||
void add_values(const void* values, size_t count);
|
||||
|
||||
void add_nulls(uint32_t count) {
|
||||
_page_zone_map.has_null = true;
|
||||
}
|
||||
|
||||
// mark the end of one data page so that we can finalize the corresponding zone map
|
||||
Status flush();
|
||||
|
||||
void fill_segment_zone_map(ZoneMapPB* const to);
|
||||
Status finish(WritableFile* file, ColumnIndexMetaPB* index_meta);
|
||||
|
||||
uint64_t size() {
|
||||
return _page_builder->size();
|
||||
}
|
||||
|
||||
OwnedSlice finish() {
|
||||
return _page_builder->finish();
|
||||
}
|
||||
uint64_t size() { return _estimated_size; }
|
||||
|
||||
private:
|
||||
void _reset_zone_map(ZoneMap* zone_map);
|
||||
void _reset_page_zone_map() { _reset_zone_map(&_zone_map); }
|
||||
void _reset_segment_zone_map() { _reset_zone_map(&_segment_zone_map); }
|
||||
void _fill_zone_map_to_pb(const ZoneMap& from, ZoneMapPB* const to);
|
||||
void _reset_zone_map(ZoneMap* zone_map) {
|
||||
// we should allocate max varchar length and set to max for min value
|
||||
_field->set_to_max(zone_map->min_value);
|
||||
_field->set_to_min(zone_map->max_value);
|
||||
zone_map->has_null = false;
|
||||
zone_map->has_not_null = false;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<BinaryPlainPageBuilder> _page_builder;
|
||||
Field* _field;
|
||||
// memory will be managed by MemPool
|
||||
ZoneMap _zone_map;
|
||||
ZoneMap _page_zone_map;
|
||||
ZoneMap _segment_zone_map;
|
||||
// TODO(zc): we should replace this memory pool later, we only allocate min/max
|
||||
// for field. But MemPool allocate 4KB least, it will a waste for most cases.
|
||||
MemTracker _tracker;
|
||||
MemPool _pool;
|
||||
|
||||
// serialized ZoneMapPB for each data page
|
||||
std::vector<std::string> _values;
|
||||
uint64_t _estimated_size = 0;
|
||||
};
|
||||
|
||||
// ColumnZoneMap
|
||||
class ColumnZoneMap {
|
||||
class ZoneMapIndexReader {
|
||||
public:
|
||||
ColumnZoneMap(const Slice& data) : _data(data), _num_pages(0) { }
|
||||
|
||||
Status load();
|
||||
|
||||
const std::vector<ZoneMapPB>& get_column_zone_map() const {
|
||||
return _page_zone_maps;
|
||||
explicit ZoneMapIndexReader(const std::string& filename, const ZoneMapIndexPB* index_meta) :
|
||||
_filename(filename),
|
||||
_index_meta(index_meta) {
|
||||
}
|
||||
|
||||
int32_t num_pages() const {
|
||||
return _num_pages;
|
||||
}
|
||||
// load all page zone maps into memory
|
||||
Status load(bool use_page_cache, bool kept_in_memory);
|
||||
|
||||
const std::vector<ZoneMapPB>& page_zone_maps() const { return _page_zone_maps; }
|
||||
|
||||
int32_t num_pages() const { return _page_zone_maps.size(); }
|
||||
|
||||
private:
|
||||
Slice _data;
|
||||
std::string _filename;
|
||||
const ZoneMapIndexPB* _index_meta;
|
||||
|
||||
// valid after load
|
||||
int32_t _num_pages;
|
||||
std::vector<ZoneMapPB> _page_zone_maps;
|
||||
};
|
||||
|
||||
@ -28,89 +28,63 @@ namespace doris {
|
||||
|
||||
Status ShortKeyIndexBuilder::add_item(const Slice& key) {
|
||||
put_varint32(&_offset_buf, _key_buf.size());
|
||||
_footer.set_num_items(_footer.num_items() + 1);
|
||||
_key_buf.append(key.data, key.size);
|
||||
_num_items++;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ShortKeyIndexBuilder::finalize(uint32_t segment_bytes,
|
||||
uint32_t num_segment_rows,
|
||||
std::vector<Slice>* slices) {
|
||||
_footer.set_num_segment_rows(num_segment_rows);
|
||||
_footer.set_segment_bytes(segment_bytes);
|
||||
_footer.set_key_bytes(_key_buf.size());
|
||||
_footer.set_offset_bytes(_offset_buf.size());
|
||||
Status ShortKeyIndexBuilder::finalize(uint32_t num_segment_rows,
|
||||
std::vector<Slice>* body,
|
||||
segment_v2::PageFooterPB* page_footer) {
|
||||
page_footer->set_type(segment_v2::SHORT_KEY_PAGE);
|
||||
page_footer->set_uncompressed_size(_key_buf.size() + _offset_buf.size());
|
||||
|
||||
// encode header
|
||||
if (!_footer.SerializeToString(&_footer_buf)) {
|
||||
return Status::InternalError("Failed to serialize index footer");
|
||||
}
|
||||
segment_v2::ShortKeyFooterPB* footer = page_footer->mutable_short_key_page_footer();
|
||||
footer->set_num_items(_num_items);
|
||||
footer->set_key_bytes(_key_buf.size());
|
||||
footer->set_offset_bytes(_offset_buf.size());
|
||||
footer->set_segment_id(_segment_id);
|
||||
footer->set_num_rows_per_block(_num_rows_per_block);
|
||||
footer->set_num_segment_rows(num_segment_rows);
|
||||
|
||||
put_fixed32_le(&_footer_buf, _footer_buf.size());
|
||||
// TODO(zc): checksum
|
||||
uint32_t checksum = 0;
|
||||
put_fixed32_le(&_footer_buf, checksum);
|
||||
|
||||
slices->emplace_back(_key_buf);
|
||||
slices->emplace_back(_offset_buf);
|
||||
slices->emplace_back(_footer_buf);
|
||||
body->emplace_back(_key_buf);
|
||||
body->emplace_back(_offset_buf);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ShortKeyIndexDecoder::parse() {
|
||||
Slice data = _data;
|
||||
Status ShortKeyIndexDecoder::parse(const Slice& body, const segment_v2::ShortKeyFooterPB& footer) {
|
||||
_footer = footer;
|
||||
|
||||
// 1. parse footer, get checksum and footer length
|
||||
if (data.size < 2 * sizeof(uint32_t)) {
|
||||
// check if body size match footer's information
|
||||
if (body.size != (_footer.key_bytes() + _footer.offset_bytes())) {
|
||||
return Status::Corruption(
|
||||
Substitute("Short key is too short, need=$0 vs real=$1",
|
||||
2 * sizeof(uint32_t), data.size));
|
||||
}
|
||||
size_t offset = data.size - 2 * sizeof(uint32_t);
|
||||
uint32_t footer_length = decode_fixed32_le((uint8_t*)data.data + offset);
|
||||
uint32_t checksum = decode_fixed32_le((uint8_t*)data.data + offset + 4);
|
||||
// TODO(zc): do checksum
|
||||
if (checksum != 0) {
|
||||
return Status::Corruption(
|
||||
Substitute("Checksum not match, need=$0 vs read=$1", 0, checksum));
|
||||
}
|
||||
// move offset to parse footer
|
||||
offset -= footer_length;
|
||||
std::string footer_buf(data.data + offset, footer_length);
|
||||
if (!_footer.ParseFromString(footer_buf)) {
|
||||
return Status::Corruption("Fail to parse index footer from string");
|
||||
}
|
||||
|
||||
// check if real data size match footer's content
|
||||
if (offset != _footer.key_bytes() + _footer.offset_bytes()) {
|
||||
return Status::Corruption(
|
||||
Substitute("Index size not match, need=$0, real=$1",
|
||||
_footer.key_bytes() + _footer.offset_bytes(), offset));
|
||||
Substitute("Index size not match, need=$0, real=$1",
|
||||
_footer.key_bytes() + _footer.offset_bytes(), body.size));
|
||||
}
|
||||
|
||||
// set index buffer
|
||||
_key_data = Slice(_data.data, _footer.key_bytes());
|
||||
|
||||
_key_data = Slice(body.data, _footer.key_bytes());
|
||||
|
||||
// parse offset information
|
||||
Slice offset_slice(_data.data + _footer.key_bytes(), _footer.offset_bytes());
|
||||
Slice offset_slice(body.data + _footer.key_bytes(), _footer.offset_bytes());
|
||||
// +1 for record total length
|
||||
_offsets.resize(_footer.num_items() + 1);
|
||||
_offsets[_footer.num_items()] = _footer.key_bytes();
|
||||
for (uint32_t i = 0; i < _footer.num_items(); ++i) {
|
||||
uint32_t offset = 0;
|
||||
if (!get_varint32(&offset_slice, &offset)) {
|
||||
return Status::Corruption("Fail to get varint from index offset buffer");
|
||||
}
|
||||
DCHECK(offset <= _footer.key_bytes())
|
||||
<< "Offset is larger than total bytes, offset=" << offset
|
||||
<< ", key_bytes=" << _footer.key_bytes();
|
||||
<< "Offset is larger than total bytes, offset=" << offset
|
||||
<< ", key_bytes=" << _footer.key_bytes();
|
||||
_offsets[i] = offset;
|
||||
}
|
||||
_offsets[_footer.num_items()] = _footer.key_bytes();
|
||||
|
||||
if (offset_slice.size != 0) {
|
||||
return Status::Corruption("Still has data after parse all key offset");
|
||||
}
|
||||
|
||||
_parsed = true;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -107,17 +107,12 @@ void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
|
||||
}
|
||||
}
|
||||
|
||||
// Used to encode a segment short key indices to binary format. This version
|
||||
// Encode a segment short key indices to one ShortKeyPage. This version
|
||||
// only accepts binary key, client should assure that input key is sorted,
|
||||
// otherwise error could happens. This builder would arrange data in following
|
||||
// format.
|
||||
// index = encoded_keys + encoded_offsets + footer + footer_size + checksum
|
||||
// encoded_keys = binary_key + [, ...]
|
||||
// encoded_offsets = encoded_offset + [, ...]
|
||||
// encoded_offset = variant32
|
||||
// footer = ShortKeyFooterPB
|
||||
// footer_size = fixed32
|
||||
// checksum = fixed32
|
||||
// otherwise error could happens. This builder would arrange the page body in the
|
||||
// following format:
|
||||
// ShortKeyPageBody := KeyContent^NumEntry, KeyOffset(vint)^NumEntry
|
||||
// NumEntry, KeyBytes, OffsetBytes is stored in ShortKeyFooterPB
|
||||
// Usage:
|
||||
// ShortKeyIndexBuilder builder(segment_id, num_rows_per_block);
|
||||
// builder.add_item(key1);
|
||||
@ -132,26 +127,25 @@ void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
|
||||
// more than short key
|
||||
class ShortKeyIndexBuilder {
|
||||
public:
|
||||
ShortKeyIndexBuilder(uint32_t segment_id,
|
||||
uint32_t num_rows_per_block) {
|
||||
_footer.set_segment_id(segment_id);
|
||||
_footer.set_num_rows_per_block(num_rows_per_block);
|
||||
ShortKeyIndexBuilder(uint32_t segment_id, uint32_t num_rows_per_block) :
|
||||
_segment_id(segment_id), _num_rows_per_block(num_rows_per_block), _num_items(0) {
|
||||
}
|
||||
|
||||
Status add_item(const Slice& key);
|
||||
|
||||
uint64_t size() {
|
||||
return _key_buf.size() + _offset_buf.size() + _footer_buf.size();
|
||||
return _key_buf.size() + _offset_buf.size();
|
||||
}
|
||||
|
||||
Status finalize(uint32_t segment_size, uint32_t num_rows, std::vector<Slice>* slices);
|
||||
Status finalize(uint32_t num_rows, std::vector<Slice>* body, segment_v2::PageFooterPB* footer);
|
||||
|
||||
private:
|
||||
segment_v2::ShortKeyFooterPB _footer;
|
||||
uint32_t _segment_id;
|
||||
uint32_t _num_rows_per_block;
|
||||
uint32_t _num_items;
|
||||
|
||||
faststring _key_buf;
|
||||
faststring _offset_buf;
|
||||
std::string _footer_buf;
|
||||
};
|
||||
|
||||
class ShortKeyIndexDecoder;
|
||||
@ -214,40 +208,54 @@ private:
|
||||
|
||||
// Used to decode short key to header and encoded index data.
|
||||
// Usage:
|
||||
// MemIndex index;
|
||||
// ShortKeyIndexDecoder decoder(slice)
|
||||
// decoder.parse();
|
||||
// ShortKeyIndexDecoder decoder;
|
||||
// decoder.parse(body, footer);
|
||||
// auto iter = decoder.lower_bound(key);
|
||||
class ShortKeyIndexDecoder {
|
||||
public:
|
||||
// Client should assure that data is available when this class
|
||||
// is used.
|
||||
ShortKeyIndexDecoder(const Slice& data) : _data(data) { }
|
||||
ShortKeyIndexDecoder() : _parsed(false) {}
|
||||
|
||||
Status parse();
|
||||
// client should assure that body is available when this class is used
|
||||
Status parse(const Slice& body, const segment_v2::ShortKeyFooterPB& footer);
|
||||
|
||||
ShortKeyIndexIterator begin() const { return {this, 0}; }
|
||||
ShortKeyIndexIterator end() const { return {this, num_items()}; }
|
||||
ShortKeyIndexIterator begin() const {
|
||||
DCHECK(_parsed);
|
||||
return {this, 0};
|
||||
}
|
||||
|
||||
ShortKeyIndexIterator end() const {
|
||||
DCHECK(_parsed);
|
||||
return {this, num_items()};
|
||||
}
|
||||
|
||||
// Return an iterator which locates at the first item who is
|
||||
// equal with or greater than the given key.
|
||||
// NOTE: If one key is the prefix of other key, this funciton thinks
|
||||
// that longer key is greater than the shorter key.
|
||||
ShortKeyIndexIterator lower_bound(const Slice& key) const {
|
||||
DCHECK(_parsed);
|
||||
return seek<true>(key);
|
||||
}
|
||||
|
||||
// Return the iterator which locates the first item greater than the
|
||||
// input key.
|
||||
ShortKeyIndexIterator upper_bound(const Slice& key) const {
|
||||
DCHECK(_parsed);
|
||||
return seek<false>(key);
|
||||
}
|
||||
|
||||
uint32_t num_items() const { return _footer.num_items(); }
|
||||
uint32_t num_items() const {
|
||||
DCHECK(_parsed);
|
||||
return _footer.num_items();
|
||||
}
|
||||
|
||||
uint32_t num_rows_per_block() const { return _footer.num_rows_per_block(); }
|
||||
uint32_t num_rows_per_block() const {
|
||||
DCHECK(_parsed);
|
||||
return _footer.num_rows_per_block();
|
||||
}
|
||||
|
||||
Slice key(ssize_t ordinal) const {
|
||||
DCHECK(_parsed);
|
||||
DCHECK(ordinal >= 0 && ordinal < num_items());
|
||||
return {_key_data.data + _offsets[ordinal], _offsets[ordinal + 1] - _offsets[ordinal]};
|
||||
}
|
||||
@ -266,7 +274,7 @@ private:
|
||||
}
|
||||
|
||||
private:
|
||||
Slice _data;
|
||||
bool _parsed;
|
||||
|
||||
// All following fields are only valid after parse has been executed successfully
|
||||
segment_v2::ShortKeyFooterPB _footer;
|
||||
|
||||
@ -150,6 +150,10 @@ template<> struct CppTypeTraits<OLAP_FIELD_TYPE_BIGINT> {
|
||||
using CppType = int64_t;
|
||||
using UnsignedCppType = uint64_t;
|
||||
};
|
||||
template<> struct CppTypeTraits<OLAP_FIELD_TYPE_UNSIGNED_BIGINT> {
|
||||
using CppType = uint64_t;
|
||||
using UnsignedCppType = uint64_t;
|
||||
};
|
||||
template<> struct CppTypeTraits<OLAP_FIELD_TYPE_LARGEINT> {
|
||||
using CppType = int128_t;
|
||||
using UnsignedCppType = unsigned int128_t;
|
||||
|
||||
@ -51,19 +51,18 @@ ADD_BE_TEST(rowset/segment_v2/bitshuffle_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/plain_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/binary_plain_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/binary_prefix_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/index_column_reader_writer_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/bitmap_index_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/column_reader_writer_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/encoding_info_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/page_compression_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/ordinal_page_index_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/rle_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/binary_dict_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/segment_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/column_zone_map_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/row_ranges_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/frame_of_reference_page_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/block_bloom_filter_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/bloom_filter_index_reader_writer_test)
|
||||
ADD_BE_TEST(rowset/segment_v2/zone_map_index_test)
|
||||
ADD_BE_TEST(tablet_meta_manager_test)
|
||||
ADD_BE_TEST(tablet_mgr_test)
|
||||
ADD_BE_TEST(rowset/rowset_meta_manager_test)
|
||||
|
||||
@ -108,6 +108,7 @@ TEST_F(KeyCoderTest, test_int) {
|
||||
test_integer_encode<OLAP_FIELD_TYPE_INT>();
|
||||
test_integer_encode<OLAP_FIELD_TYPE_UNSIGNED_INT>();
|
||||
test_integer_encode<OLAP_FIELD_TYPE_BIGINT>();
|
||||
test_integer_encode<OLAP_FIELD_TYPE_UNSIGNED_BIGINT>();
|
||||
test_integer_encode<OLAP_FIELD_TYPE_LARGEINT>();
|
||||
|
||||
test_integer_encode<OLAP_FIELD_TYPE_DATETIME>();
|
||||
|
||||
@ -20,12 +20,12 @@
|
||||
#include "olap/key_coder.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "env/env.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/types.h"
|
||||
#include "olap/column_block.h"
|
||||
#include "util/file_utils.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
@ -33,68 +33,72 @@
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class IndexColumnReaderWriterTest : public testing::Test {
|
||||
public:
|
||||
IndexColumnReaderWriterTest() : _pool(&_tracker) { }
|
||||
virtual ~IndexColumnReaderWriterTest() {
|
||||
class BitmapIndexTest : public testing::Test {
|
||||
public:
|
||||
const std::string kTestDir = "./ut_dir/bitmap_index_test";
|
||||
BitmapIndexTest() : _pool(&_tracker) { }
|
||||
|
||||
void SetUp() override {
|
||||
if (FileUtils::check_exist(kTestDir)) {
|
||||
ASSERT_TRUE(FileUtils::remove_all(kTestDir).ok());
|
||||
}
|
||||
ASSERT_TRUE(FileUtils::create_dir(kTestDir).ok());
|
||||
}
|
||||
private:
|
||||
void TearDown() override {
|
||||
if (FileUtils::check_exist(kTestDir)) {
|
||||
ASSERT_TRUE(FileUtils::remove_all(kTestDir).ok());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MemTracker _tracker;
|
||||
MemPool _pool;
|
||||
};
|
||||
|
||||
const std::string dname = "./ut_dir/index_column_reader_writer_test";
|
||||
|
||||
template<FieldType type>
|
||||
void wirte_index_file(std::string& file_name, const void* values,
|
||||
void write_index_file(std::string& filename, const void* values,
|
||||
size_t value_count, size_t null_count,
|
||||
BitmapIndexColumnPB* bitmap_index_meta) {
|
||||
ColumnIndexMetaPB* meta) {
|
||||
const TypeInfo* type_info = get_type_info(type);
|
||||
FileUtils::create_dir(dname);
|
||||
std::string fname = dname + "/" + file_name;
|
||||
{
|
||||
std::unique_ptr<WritableFile> wfile;
|
||||
auto st = Env::Default()->new_writable_file(fname, &wfile);
|
||||
ASSERT_TRUE(st.ok());
|
||||
std::unique_ptr<BitmapIndexWriter> _bitmap_index_builder;
|
||||
BitmapIndexWriter::create(type_info, &_bitmap_index_builder);
|
||||
_bitmap_index_builder->add_values(values, value_count);
|
||||
_bitmap_index_builder->add_nulls(null_count);
|
||||
st = _bitmap_index_builder->finish(wfile.get(), bitmap_index_meta);
|
||||
ASSERT_TRUE(st.ok()) << "writer finish status:" << st.to_string();
|
||||
wfile.reset();
|
||||
ASSERT_TRUE(Env::Default()->new_writable_file(filename, &wfile).ok());
|
||||
std::unique_ptr<BitmapIndexWriter> writer;
|
||||
BitmapIndexWriter::create(type_info, &writer);
|
||||
writer->add_values(values, value_count);
|
||||
writer->add_nulls(null_count);
|
||||
ASSERT_TRUE(writer->finish(wfile.get(), meta).ok());
|
||||
ASSERT_EQ(BITMAP_INDEX, meta->type());
|
||||
}
|
||||
}
|
||||
|
||||
template<FieldType type>
|
||||
void get_bitmap_reader_iter(std::string& file_name, BitmapIndexColumnPB& bitmap_index_meta,
|
||||
void get_bitmap_reader_iter(std::string& file_name, const ColumnIndexMetaPB& meta,
|
||||
BitmapIndexReader** reader,
|
||||
BitmapIndexIterator** iter) {
|
||||
file_name = dname + "/" + file_name;
|
||||
*reader = new BitmapIndexReader(file_name, bitmap_index_meta);
|
||||
auto st = (*reader)->load(true);
|
||||
*reader = new BitmapIndexReader(file_name, &meta.bitmap_index());
|
||||
auto st = (*reader)->load(true, false);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
st = (*reader)->new_iterator(iter);
|
||||
ASSERT_TRUE(st.ok());
|
||||
}
|
||||
|
||||
TEST_F(IndexColumnReaderWriterTest, test_invert) {
|
||||
TEST_F(BitmapIndexTest, test_invert) {
|
||||
size_t num_uint8_rows = 1024 * 10;
|
||||
int* val = new int[num_uint8_rows];
|
||||
for (int i = 0; i < num_uint8_rows; ++i) {
|
||||
val[i] = i;
|
||||
}
|
||||
|
||||
std::string file_name = "invert";
|
||||
BitmapIndexColumnPB bitmap_index_meta;
|
||||
wirte_index_file<OLAP_FIELD_TYPE_INT>(file_name, val, num_uint8_rows, 0,
|
||||
&bitmap_index_meta);
|
||||
std::string file_name = kTestDir + "/invert";
|
||||
ColumnIndexMetaPB meta;
|
||||
write_index_file<OLAP_FIELD_TYPE_INT>(file_name, val, num_uint8_rows, 0, &meta);
|
||||
{
|
||||
std::unique_ptr<RandomAccessFile> rfile;
|
||||
BitmapIndexReader* reader = nullptr;
|
||||
BitmapIndexIterator* iter = nullptr;
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_INT>(file_name, bitmap_index_meta, &reader, &iter);
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_INT>(file_name, meta, &reader, &iter);
|
||||
|
||||
int value = 2;
|
||||
bool exact_match;
|
||||
@ -129,7 +133,7 @@ TEST_F(IndexColumnReaderWriterTest, test_invert) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(IndexColumnReaderWriterTest, test_invert_2) {
|
||||
TEST_F(BitmapIndexTest, test_invert_2) {
|
||||
size_t num_uint8_rows = 1024 * 10;
|
||||
int* val = new int[num_uint8_rows];
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
@ -140,15 +144,14 @@ TEST_F(IndexColumnReaderWriterTest, test_invert_2) {
|
||||
val[i] = i * 10;
|
||||
}
|
||||
|
||||
std::string file_name = "invert2";
|
||||
BitmapIndexColumnPB bitmap_index_meta;
|
||||
wirte_index_file<OLAP_FIELD_TYPE_INT>(file_name, val, num_uint8_rows, 0,
|
||||
&bitmap_index_meta);
|
||||
std::string file_name = kTestDir + "/invert2";
|
||||
ColumnIndexMetaPB meta;
|
||||
write_index_file<OLAP_FIELD_TYPE_INT>(file_name, val, num_uint8_rows, 0, &meta);
|
||||
|
||||
{
|
||||
BitmapIndexReader* reader = nullptr;
|
||||
BitmapIndexIterator* iter = nullptr;
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_INT>(file_name, bitmap_index_meta, &reader, &iter);
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_INT>(file_name, meta, &reader, &iter);
|
||||
|
||||
int value = 1026;
|
||||
bool exact_match;
|
||||
@ -167,7 +170,7 @@ TEST_F(IndexColumnReaderWriterTest, test_invert_2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(IndexColumnReaderWriterTest, test_multi_pages) {
|
||||
TEST_F(BitmapIndexTest, test_multi_pages) {
|
||||
size_t num_uint8_rows = 1024 * 1024;
|
||||
int64_t* val = new int64_t[num_uint8_rows];
|
||||
for (int i = 0; i < num_uint8_rows; ++i) {
|
||||
@ -175,14 +178,13 @@ TEST_F(IndexColumnReaderWriterTest, test_multi_pages) {
|
||||
}
|
||||
val[1024 * 510] = 2019;
|
||||
|
||||
std::string file_name = "mul";
|
||||
BitmapIndexColumnPB bitmap_index_meta;
|
||||
wirte_index_file<OLAP_FIELD_TYPE_BIGINT>(file_name, val, num_uint8_rows, 0,
|
||||
&bitmap_index_meta);
|
||||
std::string file_name = kTestDir + "/mul";
|
||||
ColumnIndexMetaPB meta;
|
||||
write_index_file<OLAP_FIELD_TYPE_BIGINT>(file_name, val, num_uint8_rows, 0, &meta);
|
||||
{
|
||||
BitmapIndexReader* reader = nullptr;
|
||||
BitmapIndexIterator* iter = nullptr;
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_BIGINT>(file_name, bitmap_index_meta, &reader, &iter);
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_BIGINT>(file_name, meta, &reader, &iter);
|
||||
|
||||
int64_t value = 2019;
|
||||
bool exact_match;
|
||||
@ -199,21 +201,20 @@ TEST_F(IndexColumnReaderWriterTest, test_multi_pages) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(IndexColumnReaderWriterTest, test_null) {
|
||||
TEST_F(BitmapIndexTest, test_null) {
|
||||
size_t num_uint8_rows = 1024;
|
||||
int64_t* val = new int64_t[num_uint8_rows];
|
||||
for (int i = 0; i < num_uint8_rows; ++i) {
|
||||
val[i] = i;
|
||||
}
|
||||
|
||||
std::string file_name = "null";
|
||||
BitmapIndexColumnPB bitmap_index_meta;
|
||||
wirte_index_file<OLAP_FIELD_TYPE_BIGINT>(file_name, val, num_uint8_rows, 30,
|
||||
&bitmap_index_meta);
|
||||
std::string file_name = kTestDir + "/null";
|
||||
ColumnIndexMetaPB meta;
|
||||
write_index_file<OLAP_FIELD_TYPE_BIGINT>(file_name, val, num_uint8_rows, 30, &meta);
|
||||
{
|
||||
BitmapIndexReader* reader = nullptr;
|
||||
BitmapIndexIterator* iter = nullptr;
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_BIGINT>(file_name, bitmap_index_meta, &reader, &iter);
|
||||
get_bitmap_reader_iter<OLAP_FIELD_TYPE_BIGINT>(file_name, meta, &reader, &iter);
|
||||
|
||||
Roaring bitmap;
|
||||
iter->read_null_bitmap(&bitmap);
|
||||
@ -26,10 +26,7 @@
|
||||
#include "env/env.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/types.h"
|
||||
#include "olap/column_block.h"
|
||||
#include "util/file_utils.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
@ -46,7 +43,7 @@ const std::string dname = "./ut_dir/bloom_filter_index_reader_writer_test";
|
||||
template<FieldType type>
|
||||
void write_bloom_filter_index_file(const std::string& file_name, const void* values,
|
||||
size_t value_count, size_t null_count,
|
||||
BloomFilterIndexPB* bloom_filter_index_meta) {
|
||||
ColumnIndexMetaPB* index_meta) {
|
||||
const TypeInfo* type_info = get_type_info(type);
|
||||
using CppType = typename CppTypeTraits<type>::CppType;
|
||||
FileUtils::create_dir(dname);
|
||||
@ -70,20 +67,21 @@ void write_bloom_filter_index_file(const std::string& file_name, const void* val
|
||||
ASSERT_TRUE(st.ok());
|
||||
i += 1024;
|
||||
}
|
||||
st = bloom_filter_index_writer->finish(wfile.get(), bloom_filter_index_meta);
|
||||
st = bloom_filter_index_writer->finish(wfile.get(), index_meta);
|
||||
ASSERT_TRUE(st.ok()) << "writer finish status:" << st.to_string();
|
||||
wfile.reset();
|
||||
ASSERT_EQ(BLOOM_FILTER_INDEX, index_meta->type());
|
||||
ASSERT_EQ(bf_options.strategy, index_meta->bloom_filter_index().hash_strategy());
|
||||
}
|
||||
}
|
||||
|
||||
void get_bloom_filter_reader_iter(const std::string& file_name, const BloomFilterIndexPB& bloom_filter_index_meta,
|
||||
void get_bloom_filter_reader_iter(const std::string& file_name, const ColumnIndexMetaPB& meta,
|
||||
std::unique_ptr<RandomAccessFile>* rfile,
|
||||
BloomFilterIndexReader** reader,
|
||||
std::unique_ptr<BloomFilterIndexIterator>* iter) {
|
||||
std::string fname = dname + "/" + file_name;
|
||||
|
||||
*reader = new BloomFilterIndexReader(fname, bloom_filter_index_meta);
|
||||
auto st = (*reader)->load(true);
|
||||
*reader = new BloomFilterIndexReader(fname, &meta.bloom_filter_index());
|
||||
auto st = (*reader)->load(true, false);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
st = (*reader)->new_iterator(iter);
|
||||
@ -96,15 +94,13 @@ void test_bloom_filter_index_reader_writer_template(const std::string file_name,
|
||||
typename TypeTraits<Type>::CppType* not_exist_value,
|
||||
bool is_slice_type = false) {
|
||||
typedef typename TypeTraits<Type>::CppType CppType;
|
||||
BloomFilterIndexPB bloom_filter_index_meta;
|
||||
write_bloom_filter_index_file<Type>(file_name, val, num, null_num,
|
||||
&bloom_filter_index_meta);
|
||||
ColumnIndexMetaPB meta;
|
||||
write_bloom_filter_index_file<Type>(file_name, val, num, null_num, &meta);
|
||||
{
|
||||
std::unique_ptr<RandomAccessFile> rfile;
|
||||
BloomFilterIndexReader* reader = nullptr;
|
||||
std::unique_ptr<BloomFilterIndexIterator> iter;
|
||||
get_bloom_filter_reader_iter(file_name, bloom_filter_index_meta,
|
||||
&rfile, &reader, &iter);
|
||||
get_bloom_filter_reader_iter(file_name, meta, &rfile, &reader, &iter);
|
||||
|
||||
// page 0
|
||||
std::unique_ptr<BloomFilter> bf;
|
||||
|
||||
@ -79,8 +79,18 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
ColumnWriterOptions writer_opts;
|
||||
writer_opts.encoding_type = encoding;
|
||||
writer_opts.compression_type = segment_v2::CompressionTypePB::LZ4F;
|
||||
writer_opts.meta = &meta;
|
||||
writer_opts.meta->set_column_id(0);
|
||||
writer_opts.meta->set_unique_id(0);
|
||||
writer_opts.meta->set_type(type);
|
||||
if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_VARCHAR) {
|
||||
writer_opts.meta->set_length(10);
|
||||
} else {
|
||||
writer_opts.meta->set_length(0);
|
||||
}
|
||||
writer_opts.meta->set_encoding(encoding);
|
||||
writer_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F);
|
||||
writer_opts.meta->set_is_nullable(true);
|
||||
writer_opts.need_zone_map = true;
|
||||
|
||||
TabletColumn column(OLAP_FIELD_AGGREGATION_NONE, type);
|
||||
@ -90,7 +100,7 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
|
||||
column = create_char_key(1);
|
||||
}
|
||||
std::unique_ptr<Field> field(FieldFactory::create(column));
|
||||
ColumnWriter writer(writer_opts, std::move(field), true, wfile.get());
|
||||
ColumnWriter writer(writer_opts, std::move(field), wfile.get());
|
||||
st = writer.init();
|
||||
ASSERT_TRUE(st.ok()) << st.to_string();
|
||||
|
||||
@ -109,9 +119,6 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
|
||||
st = writer.write_zone_map();
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
writer.write_meta(&meta);
|
||||
ASSERT_TRUE(meta.has_zone_map_page());
|
||||
|
||||
// close the file
|
||||
wfile.reset();
|
||||
}
|
||||
|
||||
@ -19,84 +19,134 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "env/env.h"
|
||||
#include "util/file_utils.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class OrdinalPageIndexTest : public testing::Test {
|
||||
public:
|
||||
OrdinalPageIndexTest() { }
|
||||
virtual ~OrdinalPageIndexTest() {
|
||||
const std::string kTestDir = "./ut_dir/ordinal_page_index_test";
|
||||
|
||||
void SetUp() override {
|
||||
if (FileUtils::check_exist(kTestDir)) {
|
||||
ASSERT_TRUE(FileUtils::remove_all(kTestDir).ok());
|
||||
}
|
||||
ASSERT_TRUE(FileUtils::create_dir(kTestDir).ok());
|
||||
}
|
||||
void TearDown() override {
|
||||
if (FileUtils::check_exist(kTestDir)) {
|
||||
ASSERT_TRUE(FileUtils::remove_all(kTestDir).ok());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(OrdinalPageIndexTest, normal) {
|
||||
// rowid, page pointer
|
||||
// 1, (0, 4096)
|
||||
// 1 + 4096, (1 * 4096, 4096)
|
||||
// a page have 16KB, and have 4096 rows
|
||||
OrdinalPageIndexBuilder builder;
|
||||
std::string filename = kTestDir + "/normal.idx";
|
||||
|
||||
// we test a 16KB page
|
||||
OrdinalIndexWriter builder;
|
||||
// generate ordinal index for 16K data pages,
|
||||
// each data page is 16KB in size and contains 4096 values,
|
||||
// ordinal starts at 1 instead of 0
|
||||
for (uint64_t i = 0; i < 16 * 1024; ++i) {
|
||||
builder.append_entry(1 + 4096 * i, {16 * 1024 * i, 16 * 1024});
|
||||
}
|
||||
ColumnIndexMetaPB index_meta;
|
||||
{
|
||||
std::unique_ptr<WritableFile> out_file;
|
||||
ASSERT_TRUE(Env::Default()->new_writable_file(filename, &out_file).ok());
|
||||
ASSERT_TRUE(builder.finish(out_file.get(), &index_meta).ok());
|
||||
ASSERT_EQ(ORDINAL_INDEX, index_meta.type());
|
||||
ASSERT_FALSE(index_meta.ordinal_index().root_page().is_root_data_page());
|
||||
LOG(INFO) << "index page size="
|
||||
<< index_meta.ordinal_index().root_page().root_page().size();
|
||||
}
|
||||
|
||||
auto slice = builder.finish();
|
||||
LOG(INFO) << "index block's size=" << slice.size;
|
||||
OrdinalIndexReader index(filename, &index_meta.ordinal_index(), 16 * 1024 * 4096 + 1);
|
||||
ASSERT_TRUE(index.load(true, false).ok());
|
||||
ASSERT_EQ(16 * 1024, index.num_data_pages());
|
||||
ASSERT_EQ(1, index.get_first_ordinal(0));
|
||||
ASSERT_EQ(4096, index.get_last_ordinal(0));
|
||||
ASSERT_EQ((16 * 1024 - 1) * 4096 + 1, index.get_first_ordinal(16 * 1024 - 1));
|
||||
ASSERT_EQ(16 * 1024 * 4096, index.get_last_ordinal(16 * 1024 - 1));
|
||||
|
||||
OrdinalPageIndex index(slice, 16 * 1024 * 4096 + 1);
|
||||
auto st = index.load();
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(1, index.get_first_row_id(0));
|
||||
ASSERT_EQ(4096, index.get_last_row_id(0));
|
||||
ASSERT_EQ((16 * 1024 - 1) * 4096 + 1, index.get_first_row_id(16 * 1024 - 1));
|
||||
ASSERT_EQ(16 * 1024 * 4096, index.get_last_row_id(16 * 1024 - 1));
|
||||
|
||||
PagePointer page;
|
||||
{
|
||||
auto iter = index.seek_at_or_before(1);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(1, iter.rowid());
|
||||
ASSERT_EQ(1, iter.first_ordinal());
|
||||
ASSERT_EQ(PagePointer(0, 16 * 1024), iter.page());
|
||||
}
|
||||
{
|
||||
auto iter = index.seek_at_or_before(4095);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(1, iter.rowid());
|
||||
ASSERT_EQ(1, iter.first_ordinal());
|
||||
ASSERT_EQ(PagePointer(0, 16 * 1024), iter.page());
|
||||
}
|
||||
{
|
||||
auto iter = index.seek_at_or_before(4098);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(4097, iter.rowid());
|
||||
ASSERT_EQ(4097, iter.first_ordinal());
|
||||
ASSERT_EQ(PagePointer(1 * 16 * 1024, 16 * 1024), iter.page());
|
||||
|
||||
iter.next();
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(4097 + 4096, iter.rowid());
|
||||
ASSERT_EQ(4097 + 4096, iter.first_ordinal());
|
||||
ASSERT_EQ(PagePointer(2 * 16 * 1024, 16 * 1024), iter.page());
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
auto iter = index.seek_at_or_before(0);
|
||||
ASSERT_FALSE(iter.valid());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(OrdinalPageIndexTest, corrupt) {
|
||||
std::string str;
|
||||
str.resize(4);
|
||||
TEST_F(OrdinalPageIndexTest, one_data_page) {
|
||||
// index one data page with 1024 values
|
||||
int num_values = 1024;
|
||||
PagePointer data_page_pointer(0, 4096);
|
||||
|
||||
encode_fixed32_le((uint8_t*)str.data(), 1);
|
||||
OrdinalIndexWriter builder;
|
||||
builder.append_entry(0, data_page_pointer); // add only one entry
|
||||
ColumnIndexMetaPB index_meta;
|
||||
{
|
||||
// in this case, no index page is written, thus file could be null
|
||||
ASSERT_TRUE(builder.finish(nullptr, &index_meta).ok());
|
||||
ASSERT_EQ(ORDINAL_INDEX, index_meta.type());
|
||||
ASSERT_TRUE(index_meta.ordinal_index().root_page().is_root_data_page());
|
||||
PagePointer root_page_pointer(index_meta.ordinal_index().root_page().root_page());
|
||||
ASSERT_EQ(data_page_pointer, root_page_pointer);
|
||||
}
|
||||
|
||||
Slice slice(str);
|
||||
OrdinalPageIndex index(slice, 10);
|
||||
auto st = index.load();
|
||||
ASSERT_FALSE(st.ok());
|
||||
OrdinalIndexReader index("", &index_meta.ordinal_index(), num_values);
|
||||
ASSERT_TRUE(index.load(true, false).ok());
|
||||
ASSERT_EQ(1, index.num_data_pages());
|
||||
ASSERT_EQ(0, index.get_first_ordinal(0));
|
||||
ASSERT_EQ(num_values - 1, index.get_last_ordinal(0));
|
||||
|
||||
{
|
||||
auto iter = index.seek_at_or_before(0);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(0, iter.first_ordinal());
|
||||
ASSERT_EQ(num_values - 1, iter.last_ordinal());
|
||||
ASSERT_EQ(data_page_pointer, iter.page());
|
||||
}
|
||||
{
|
||||
auto iter = index.seek_at_or_before(num_values - 1);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(0, iter.first_ordinal());
|
||||
ASSERT_EQ(data_page_pointer, iter.page());
|
||||
}
|
||||
{
|
||||
auto iter = index.seek_at_or_before(num_values);
|
||||
ASSERT_TRUE(iter.valid());
|
||||
ASSERT_EQ(0, iter.first_ordinal());
|
||||
ASSERT_EQ(data_page_pointer, iter.page());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,143 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/rowset/segment_v2/page_compression.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "util/block_compression.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class PageCompressionTest : public testing::Test {
|
||||
public:
|
||||
PageCompressionTest() { }
|
||||
virtual ~PageCompressionTest() {
|
||||
}
|
||||
};
|
||||
|
||||
static std::string generate_rand_str(size_t len) {
|
||||
static char charset[] = "0123456789"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
std::string result;
|
||||
result.resize(len);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
result[i] = charset[rand() % sizeof(charset)];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string generate_str(size_t len) {
|
||||
static char charset[] = "0123456789"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
std::string result;
|
||||
result.resize(len);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
result[i] = charset[i % sizeof(charset)];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
TEST_F(PageCompressionTest, normal) {
|
||||
const BlockCompressionCodec* codec = nullptr;
|
||||
get_block_compression_codec(segment_v2::CompressionTypePB::LZ4F, &codec);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// compress
|
||||
PageCompressor compressor(codec);
|
||||
|
||||
std::vector<Slice> raw_slices;
|
||||
std::string raw_data;
|
||||
if (i == 0) {
|
||||
raw_data = generate_rand_str(102400);
|
||||
} else {
|
||||
raw_data = generate_str(102400);
|
||||
}
|
||||
|
||||
raw_slices.emplace_back(raw_data.data(), 10240);
|
||||
raw_slices.emplace_back(raw_data.data() + 10240, 10240);
|
||||
raw_slices.emplace_back(raw_data.data() + 20480, 81920);
|
||||
|
||||
std::vector<Slice> compressed_slices;
|
||||
auto st = compressor.compress(raw_slices, &compressed_slices);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
std::string compressed_data = Slice::to_string(compressed_slices);
|
||||
|
||||
// decompress
|
||||
PageDecompressor decompressor(compressed_data, codec);
|
||||
|
||||
{
|
||||
Slice check_slice;
|
||||
st = decompressor.decompress_to(&check_slice);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_STREQ(raw_data.c_str(), check_slice.data);
|
||||
if (check_slice.data != compressed_data.data()) {
|
||||
delete[] check_slice.data;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PageCompressionTest, bad_case) {
|
||||
const BlockCompressionCodec* codec = nullptr;
|
||||
get_block_compression_codec(segment_v2::CompressionTypePB::LZ4F, &codec);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// compress
|
||||
PageCompressor compressor(codec);
|
||||
|
||||
std::vector<Slice> raw_slices;
|
||||
std::string raw_data;
|
||||
if (i == 0) {
|
||||
raw_data = generate_rand_str(102400);
|
||||
} else {
|
||||
raw_data = generate_str(102400);
|
||||
}
|
||||
raw_slices.emplace_back(raw_data.data(), 102400);
|
||||
|
||||
std::vector<Slice> compressed_slices;
|
||||
auto st = compressor.compress(raw_slices, &compressed_slices);
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
std::string compressed_data = Slice::to_string(compressed_slices);
|
||||
|
||||
Slice bad_compressed_slice(compressed_data.data(), compressed_data.size() - 1);
|
||||
// decompress
|
||||
PageDecompressor decompressor(bad_compressed_slice, codec);
|
||||
|
||||
{
|
||||
Slice check_slice;
|
||||
st = decompressor.decompress_to(&check_slice);
|
||||
ASSERT_FALSE(st.ok());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@ -57,6 +57,15 @@ static void DefaultIntGenerator(size_t rid, int cid, int block_id, RowCursorCell
|
||||
*(int*)cell.mutable_cell_ptr() = rid * 10 + cid;
|
||||
}
|
||||
|
||||
static bool column_contains_index(ColumnMetaPB column_meta, ColumnIndexTypePB type) {
|
||||
for (int i = 0; i < column_meta.indexes_size(); ++i) {
|
||||
if (column_meta.indexes(i).type() == type) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
class SegmentReaderWriterTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
@ -353,7 +362,7 @@ TEST_F(SegmentReaderWriterTest, LazyMaterialization) {
|
||||
shared_ptr<Segment> segment;
|
||||
SegmentWriterOptions write_opts;
|
||||
build_segment(write_opts, tablet_schema, tablet_schema, 100, data_gen, &segment);
|
||||
ASSERT_TRUE(segment->footer().columns(0).has_bitmap_index());
|
||||
ASSERT_TRUE(column_contains_index(segment->footer().columns(0), BITMAP_INDEX));
|
||||
{
|
||||
// lazy disabled when all predicates are removed by bitmap index:
|
||||
// select c1, c2 where c2 = 30;
|
||||
@ -972,8 +981,8 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) {
|
||||
SegmentWriterOptions opts;
|
||||
shared_ptr<Segment> segment;
|
||||
build_segment(opts, tablet_schema, tablet_schema, 4096, DefaultIntGenerator, &segment);
|
||||
ASSERT_TRUE(segment->footer().columns(0).has_bitmap_index());
|
||||
ASSERT_TRUE(segment->footer().columns(1).has_bitmap_index());
|
||||
ASSERT_TRUE(column_contains_index(segment->footer().columns(0), BITMAP_INDEX));
|
||||
ASSERT_TRUE(column_contains_index(segment->footer().columns(1), BITMAP_INDEX));
|
||||
|
||||
{
|
||||
Schema schema(tablet_schema);
|
||||
@ -1104,14 +1113,14 @@ TEST_F(SegmentReaderWriterTest, TestBloomFilterIndexUniqueModel) {
|
||||
opts1.whether_to_filter_value = false;
|
||||
shared_ptr<Segment> seg1;
|
||||
build_segment(opts1, schema, schema, 100, DefaultIntGenerator, &seg1);
|
||||
ASSERT_FALSE(seg1->footer().columns(3).has_bloom_filter_index());
|
||||
ASSERT_FALSE(column_contains_index(seg1->footer().columns(3), BLOOM_FILTER_INDEX));
|
||||
|
||||
// for base segment
|
||||
SegmentWriterOptions opts2;
|
||||
opts2.whether_to_filter_value = true;
|
||||
shared_ptr<Segment> seg2;
|
||||
build_segment(opts2, schema, schema, 100, DefaultIntGenerator, &seg2);
|
||||
ASSERT_TRUE(seg2->footer().columns(3).has_bloom_filter_index());
|
||||
ASSERT_TRUE(column_contains_index(seg2->footer().columns(3), BLOOM_FILTER_INDEX));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -16,41 +16,70 @@
|
||||
// under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <memory>
|
||||
|
||||
#include "olap/rowset/segment_v2/column_zone_map.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "env/env.h"
|
||||
#include "olap/rowset/segment_v2/zone_map_index.h"
|
||||
#include "olap/tablet_schema_helper.h"
|
||||
#include "util/file_utils.h"
|
||||
|
||||
namespace doris {
|
||||
namespace segment_v2 {
|
||||
|
||||
class ColumnZoneMapTest : public testing::Test {
|
||||
public:
|
||||
void test_string(Field* field) {
|
||||
ColumnZoneMapBuilder builder(field);
|
||||
const std::string kTestDir = "./ut_dir/zone_map_index_test";
|
||||
|
||||
void SetUp() override {
|
||||
if (FileUtils::check_exist(kTestDir)) {
|
||||
ASSERT_TRUE(FileUtils::remove_all(kTestDir).ok());
|
||||
}
|
||||
ASSERT_TRUE(FileUtils::create_dir(kTestDir).ok());
|
||||
}
|
||||
void TearDown() override {
|
||||
if (FileUtils::check_exist(kTestDir)) {
|
||||
ASSERT_TRUE(FileUtils::remove_all(kTestDir).ok());
|
||||
}
|
||||
}
|
||||
|
||||
void test_string(std::string testname, Field* field) {
|
||||
std::string filename = kTestDir + "/" + testname;
|
||||
|
||||
ZoneMapIndexWriter builder(field);
|
||||
std::vector<std::string> values1 = {"aaaa", "bbbb", "cccc", "dddd", "eeee", "ffff"};
|
||||
for (auto& value : values1) {
|
||||
Slice slice(value);
|
||||
builder.add((const uint8_t*)&slice, 1);
|
||||
builder.add_values((const uint8_t*)&slice, 1);
|
||||
}
|
||||
builder.flush();
|
||||
std::vector<std::string> values2 = {"aaaaa", "bbbbb", "ccccc", "ddddd", "eeeee", "fffff"};
|
||||
for (auto& value : values2) {
|
||||
Slice slice(value);
|
||||
builder.add((const uint8_t*)&slice, 1);
|
||||
builder.add_values((const uint8_t*)&slice, 1);
|
||||
}
|
||||
builder.add(nullptr, 1);
|
||||
builder.add_nulls(1);
|
||||
builder.flush();
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
builder.add(nullptr, 1);
|
||||
builder.add_nulls(1);
|
||||
}
|
||||
builder.flush();
|
||||
OwnedSlice zone_map_page = builder.finish();
|
||||
ColumnZoneMap column_zone_map(zone_map_page.slice());
|
||||
Status status = column_zone_map.load();
|
||||
// write out zone map index
|
||||
ColumnIndexMetaPB index_meta;
|
||||
{
|
||||
std::unique_ptr<WritableFile> out_file;
|
||||
ASSERT_TRUE(Env::Default()->new_writable_file(filename, &out_file).ok());
|
||||
ASSERT_TRUE(builder.finish(out_file.get(), &index_meta).ok());
|
||||
ASSERT_EQ(ZONE_MAP_INDEX, index_meta.type());
|
||||
}
|
||||
|
||||
|
||||
ZoneMapIndexReader column_zone_map(filename, &index_meta.zone_map_index());
|
||||
Status status = column_zone_map.load(true, false);
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(3, column_zone_map.num_pages());
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.get_column_zone_map();
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
|
||||
ASSERT_EQ(3, zone_maps.size());
|
||||
ASSERT_EQ("aaaa", zone_maps[0].min());
|
||||
ASSERT_EQ("ffff", zone_maps[0].max());
|
||||
@ -69,31 +98,39 @@ public:
|
||||
|
||||
// Test for int
|
||||
TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
|
||||
std::string filename = kTestDir + "/NormalTestIntPage";
|
||||
|
||||
TabletColumn int_column = create_int_key(0);
|
||||
Field* field = FieldFactory::create(int_column);
|
||||
|
||||
ColumnZoneMapBuilder builder(field);
|
||||
ZoneMapIndexWriter builder(field);
|
||||
std::vector<int> values1 = {1, 10, 11, 20, 21, 22};
|
||||
for (auto value : values1) {
|
||||
builder.add((const uint8_t*)&value, 1);
|
||||
builder.add_values((const uint8_t*)&value, 1);
|
||||
}
|
||||
builder.flush();
|
||||
std::vector<int> values2 = {2, 12, 31, 23, 21, 22};
|
||||
for (auto value : values2) {
|
||||
builder.add((const uint8_t*)&value, 1);
|
||||
builder.add_values((const uint8_t*)&value, 1);
|
||||
}
|
||||
builder.add(nullptr, 1);
|
||||
builder.add_nulls(1);
|
||||
builder.flush();
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
builder.add(nullptr, 1);
|
||||
builder.add_nulls(6);
|
||||
builder.flush();
|
||||
// write out zone map index
|
||||
ColumnIndexMetaPB index_meta;
|
||||
{
|
||||
std::unique_ptr<WritableFile> out_file;
|
||||
ASSERT_TRUE(Env::Default()->new_writable_file(filename, &out_file).ok());
|
||||
ASSERT_TRUE(builder.finish(out_file.get(), &index_meta).ok());
|
||||
ASSERT_EQ(ZONE_MAP_INDEX, index_meta.type());
|
||||
}
|
||||
builder.flush();
|
||||
OwnedSlice zone_map_page = builder.finish();
|
||||
ColumnZoneMap column_zone_map(zone_map_page.slice());
|
||||
Status status = column_zone_map.load();
|
||||
|
||||
ZoneMapIndexReader column_zone_map(filename, &index_meta.zone_map_index());
|
||||
Status status = column_zone_map.load(true, false);
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_EQ(3, column_zone_map.num_pages());
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.get_column_zone_map();
|
||||
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
|
||||
ASSERT_EQ(3, zone_maps.size());
|
||||
|
||||
ASSERT_EQ(std::to_string(1), zone_maps[0].min());
|
||||
@ -114,14 +151,14 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
|
||||
TEST_F(ColumnZoneMapTest, NormalTestVarcharPage) {
|
||||
TabletColumn varchar_column = create_varchar_key(0);
|
||||
Field* field = FieldFactory::create(varchar_column);
|
||||
test_string(field);
|
||||
test_string("NormalTestVarcharPage", field);
|
||||
}
|
||||
|
||||
// Test for string
|
||||
TEST_F(ColumnZoneMapTest, NormalTestCharPage) {
|
||||
TabletColumn char_column = create_char_key(0);
|
||||
Field* field = FieldFactory::create(char_column);
|
||||
test_string(field);
|
||||
test_string("NormalTestCharPage", field);
|
||||
}
|
||||
|
||||
}
|
||||
@ -35,20 +35,25 @@ public:
|
||||
TEST_F(ShortKeyIndexTest, buider) {
|
||||
ShortKeyIndexBuilder builder(0, 1024);
|
||||
|
||||
int num_items = 0;
|
||||
for (int i = 1000; i < 10000; i += 2) {
|
||||
builder.add_item(std::to_string(i));
|
||||
num_items++;
|
||||
}
|
||||
std::vector<Slice> slices;
|
||||
auto st = builder.finalize(10000, 9000 * 1024, &slices);
|
||||
segment_v2::PageFooterPB footer;
|
||||
auto st = builder.finalize(9000 * 1024, &slices, &footer);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(segment_v2::SHORT_KEY_PAGE, footer.type());
|
||||
ASSERT_EQ(num_items, footer.short_key_page_footer().num_items());
|
||||
|
||||
std::string buf;
|
||||
for (auto& slice : slices) {
|
||||
buf.append(slice.data, slice.size);
|
||||
}
|
||||
|
||||
ShortKeyIndexDecoder decoder(buf);
|
||||
st = decoder.parse();
|
||||
ShortKeyIndexDecoder decoder;
|
||||
st = decoder.parse(buf, footer.short_key_page_footer());
|
||||
ASSERT_TRUE(st.ok());
|
||||
|
||||
// find 1499
|
||||
|
||||
@ -20,20 +20,6 @@ syntax="proto2";
|
||||
|
||||
package doris.segment_v2;
|
||||
|
||||
message ColumnSchemaPB {
|
||||
optional uint32 column_id = 1;
|
||||
optional string type = 2;
|
||||
optional string aggregation = 3;
|
||||
optional uint32 length = 4;
|
||||
optional bool is_key = 5;
|
||||
optional string default_value = 6;
|
||||
optional uint32 precision = 9 [default = 27];
|
||||
optional uint32 frac = 10 [default = 9];
|
||||
optional bool is_nullable = 11 [default=false];
|
||||
optional bool is_bf_column = 15 [default=false]; // is bloom filter indexed column
|
||||
optional bool has_bitmap_index = 16 [default=false];
|
||||
}
|
||||
|
||||
// page position info
|
||||
message PagePointerPB {
|
||||
required uint64 offset = 1; // offset in segment file
|
||||
@ -67,6 +53,75 @@ enum CompressionTypePB {
|
||||
ZSTD = 7;
|
||||
}
|
||||
|
||||
enum PageTypePB {
|
||||
UNKNOWN_PAGE_TYPE = 0;
|
||||
DATA_PAGE = 1;
|
||||
INDEX_PAGE = 2;
|
||||
DICTIONARY_PAGE = 3;
|
||||
SHORT_KEY_PAGE = 4;
|
||||
}
|
||||
|
||||
message DataPageFooterPB {
|
||||
// required: ordinal of the first value
|
||||
optional uint64 first_ordinal = 1;
|
||||
// required: number of values, including NULLs
|
||||
optional uint64 num_values = 2;
|
||||
// required: size of nullmap, 0 if the page doesn't contain NULL
|
||||
optional uint32 nullmap_size = 3;
|
||||
// only for array column, largest array item ordinal + 1,
|
||||
// used to calculate the length of last array in this page
|
||||
optional uint64 next_array_item_ordinal = 4;
|
||||
}
|
||||
|
||||
message IndexPageFooterPB {
|
||||
// required: number of index entries in this page
|
||||
optional uint32 num_entries = 1;
|
||||
|
||||
enum Type {
|
||||
UNKNOWN_INDEX_PAGE_TYPE = 0;
|
||||
LEAF = 1;
|
||||
INTERNAL = 2;
|
||||
};
|
||||
// required: type of the index page
|
||||
optional Type type = 2;
|
||||
}
|
||||
|
||||
message DictPageFooterPB {
|
||||
// required: encoding for dictionary
|
||||
optional EncodingTypePB encoding = 1;
|
||||
}
|
||||
|
||||
message ShortKeyFooterPB {
|
||||
// How many index item in this index.
|
||||
optional uint32 num_items = 1;
|
||||
// The total bytes occupied by the index key
|
||||
optional uint32 key_bytes = 2;
|
||||
// The total bytes occupied by the key offsets
|
||||
optional uint32 offset_bytes = 3;
|
||||
// Segment id which this index is belong to
|
||||
optional uint32 segment_id = 4;
|
||||
// number rows in each block
|
||||
optional uint32 num_rows_per_block = 5;
|
||||
// How many rows in this segment
|
||||
optional uint32 num_segment_rows = 6;
|
||||
}
|
||||
|
||||
message PageFooterPB {
|
||||
// required: indicates which of the *_footer fields is set
|
||||
optional PageTypePB type = 1;
|
||||
// required: page body size before compression (exclude footer and crc).
|
||||
// page body is uncompressed when it's equal to page body size
|
||||
optional uint32 uncompressed_size = 2;
|
||||
// present only when type == DATA_PAGE
|
||||
optional DataPageFooterPB data_page_footer = 7;
|
||||
// present only when type == INDEX_PAGE
|
||||
optional IndexPageFooterPB index_page_footer = 8;
|
||||
// present only when type == DICTIONARY_PAGE
|
||||
optional DictPageFooterPB dict_page_footer = 9;
|
||||
// present only when type == SHORT_KEY_PAGE
|
||||
optional ShortKeyFooterPB short_key_page_footer = 10;
|
||||
}
|
||||
|
||||
message ZoneMapPB {
|
||||
// minimum not-null value, invalid when all values are null(has_not_null==false)
|
||||
optional bytes min = 1;
|
||||
@ -85,72 +140,17 @@ message ColumnMetaPB {
|
||||
optional uint32 unique_id = 2;
|
||||
// this field is FieldType's value
|
||||
optional int32 type = 3;
|
||||
optional EncodingTypePB encoding = 4;
|
||||
// compress type for column
|
||||
optional CompressionTypePB compression = 5;
|
||||
// if this column can be nullable
|
||||
optional bool is_nullable = 6;
|
||||
// ordinal index page
|
||||
optional PagePointerPB ordinal_index_page = 7;
|
||||
// page-level zone map index
|
||||
optional PagePointerPB zone_map_page = 8;
|
||||
// segment-level zone map
|
||||
optional ZoneMapPB zone_map = 9;
|
||||
// // dictionary page for DICT_ENCODING
|
||||
optional PagePointerPB dict_page = 10;
|
||||
// bitmap index
|
||||
optional BitmapIndexColumnPB bitmap_index = 11;
|
||||
// var length for string type
|
||||
optional int32 length = 12;
|
||||
// bloom filter index
|
||||
optional BloomFilterIndexPB bloom_filter_index = 13;
|
||||
|
||||
// // bloom filter pages for bloom filter column
|
||||
// repeated PagePointerPB bloom_filter_pages = 3;
|
||||
|
||||
// optional PagePointerPB page_zonemap_page = 5; // page zonemap info of column
|
||||
|
||||
// optional PagePointerPB bitmap_index_page = 6; // bitmap index page
|
||||
|
||||
// // data footprint of column after encoding and compress
|
||||
// optional uint64 data_footprint = 7;
|
||||
// // index footprint of column after encoding and compress
|
||||
// optional uint64 index_footprint = 8;
|
||||
// // raw column data footprint
|
||||
// optional uint64 raw_data_footprint = 9;
|
||||
|
||||
// optional ZoneMapPB column_zonemap = 11; // column zonemap info
|
||||
// repeated MetadataPairPB column_meta_datas = 12;
|
||||
}
|
||||
|
||||
message FileFooterPB {
|
||||
optional uint32 version = 1 [default = 1]; // file version
|
||||
repeated ColumnSchemaPB schema = 2; // tablet schema
|
||||
optional uint64 num_values = 3; // number of values
|
||||
optional uint64 index_footprint = 4; // total idnex footprint of all columns
|
||||
optional uint64 data_footprint = 5; // total data footprint of all columns
|
||||
optional uint64 raw_data_footprint = 6; // raw data footprint
|
||||
|
||||
optional CompressionTypePB compress_type = 7 [default = LZ4F]; // default compression type for file columns
|
||||
repeated MetadataPairPB file_meta_datas = 8; // meta data of file
|
||||
optional PagePointerPB key_index_page = 9; // short key index page
|
||||
}
|
||||
|
||||
message ShortKeyFooterPB {
|
||||
// How many index item in this index.
|
||||
optional uint32 num_items = 1;
|
||||
// The total bytes occupied by the index key
|
||||
optional uint32 key_bytes = 2;
|
||||
// The total bytes occupied by the key offsets
|
||||
optional uint32 offset_bytes = 3;
|
||||
// Segment id which this index is belong to
|
||||
optional uint32 segment_id = 4;
|
||||
// number rows in each block
|
||||
optional uint32 num_rows_per_block = 5;
|
||||
// How many rows in this segment
|
||||
optional uint32 num_segment_rows = 6;
|
||||
// Total bytes for this segment
|
||||
optional uint32 segment_bytes = 7;
|
||||
optional int32 length = 4;
|
||||
optional EncodingTypePB encoding = 5;
|
||||
// compress type for column
|
||||
optional CompressionTypePB compression = 6;
|
||||
// if this column can be nullable
|
||||
optional bool is_nullable = 7;
|
||||
// metadata about all the column indexes
|
||||
repeated ColumnIndexMetaPB indexes = 8;
|
||||
// pointer to dictionary page when using DICT_ENCODING
|
||||
optional PagePointerPB dict_page = 9;
|
||||
}
|
||||
|
||||
message SegmentFooterPB {
|
||||
@ -168,19 +168,6 @@ message SegmentFooterPB {
|
||||
optional PagePointerPB short_key_index_page = 9;
|
||||
}
|
||||
|
||||
message IndexPageFooterPB {
|
||||
// required: number of entries in this page
|
||||
optional int32 num_entries = 1;
|
||||
|
||||
enum Type {
|
||||
UNKNOWN_INDEX_PAGE_TYPE = 0;
|
||||
LEAF = 1;
|
||||
INTERNAL = 2;
|
||||
};
|
||||
// required: type of the index page
|
||||
optional Type type = 2;
|
||||
}
|
||||
|
||||
message BTreeMetaPB {
|
||||
// required: pointer to either root index page or sole data page based on is_root_data_page
|
||||
optional PagePointerPB root_page = 1;
|
||||
@ -205,22 +192,53 @@ message IndexedColumnMetaPB {
|
||||
optional uint64 size = 7;
|
||||
}
|
||||
|
||||
message BitmapIndexColumnPB {
|
||||
// -------------------------------------------------------------
|
||||
// Column Index Metadata
|
||||
// -------------------------------------------------------------
|
||||
|
||||
enum ColumnIndexTypePB {
|
||||
UNKNOWN_INDEX_TYPE = 0;
|
||||
ORDINAL_INDEX = 1;
|
||||
ZONE_MAP_INDEX = 2;
|
||||
BITMAP_INDEX = 3;
|
||||
BLOOM_FILTER_INDEX = 4;
|
||||
}
|
||||
|
||||
message ColumnIndexMetaPB {
|
||||
optional ColumnIndexTypePB type = 1;
|
||||
optional OrdinalIndexPB ordinal_index = 7;
|
||||
optional ZoneMapIndexPB zone_map_index = 8;
|
||||
optional BitmapIndexPB bitmap_index = 9;
|
||||
optional BloomFilterIndexPB bloom_filter_index = 10;
|
||||
}
|
||||
|
||||
message OrdinalIndexPB {
|
||||
// required: the root page can be data page if there is only one data page,
|
||||
// or the only index page if there is more than one data pages.
|
||||
optional BTreeMetaPB root_page = 1;
|
||||
}
|
||||
|
||||
message ZoneMapIndexPB {
|
||||
// required: segment-level zone map
|
||||
optional ZoneMapPB segment_zone_map = 1;
|
||||
// required: zone map for each data page is stored in an IndexedColumn with ordinal index
|
||||
optional IndexedColumnMetaPB page_zone_maps = 2;
|
||||
}
|
||||
|
||||
message BitmapIndexPB {
|
||||
enum BitmapType {
|
||||
UNKNOWN_BITMAP_TYPE = 0;
|
||||
ROARING_BITMAP = 1;
|
||||
}
|
||||
optional uint32 column_id = 1;
|
||||
optional uint32 unique_id = 2;
|
||||
optional BitmapType bitmap_type = 1 [default=ROARING_BITMAP];
|
||||
// required: whether the index contains null key.
|
||||
// if true, the last bitmap (ordinal:dict_column.num_values) in bitmap_column is
|
||||
// the bitmap for null key. we don't store null key in dict_column.
|
||||
optional bool has_null = 3;
|
||||
optional bool has_null = 2;
|
||||
// required: meta for ordered dictionary part
|
||||
optional IndexedColumnMetaPB dict_column = 4;
|
||||
optional IndexedColumnMetaPB dict_column = 3;
|
||||
// required: meta for bitmaps part
|
||||
optional IndexedColumnMetaPB bitmap_column = 5;
|
||||
optional BitmapType bitmap_type = 6 [default=ROARING_BITMAP];
|
||||
optional IndexedColumnMetaPB bitmap_column = 4;
|
||||
}
|
||||
|
||||
enum HashStrategyPB {
|
||||
@ -238,4 +256,4 @@ message BloomFilterIndexPB {
|
||||
optional BloomFilterAlgorithmPB algorithm = 2;
|
||||
// required: meta for bloom filters
|
||||
optional IndexedColumnMetaPB bloom_filter = 3;
|
||||
}
|
||||
}
|
||||
|
||||
@ -282,18 +282,17 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/plain_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/binary_plain_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitmap_index_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/column_reader_writer_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/index_column_reader_writer_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/rle_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/binary_dict_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/binary_prefix_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/segment_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/page_compression_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/column_zone_map_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/row_ranges_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/frame_of_reference_page_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/block_bloom_filter_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bloom_filter_index_reader_writer_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/zone_map_index_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/txn_manager_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/storage_types_test
|
||||
${DORIS_TEST_BINARY_DIR}/olap/generic_iterators_test
|
||||
|
||||
Reference in New Issue
Block a user