build column oriented ddl sstable with absolute row offset

This commit is contained in:
simonjoylet
2024-01-11 07:18:01 +00:00
committed by ob-robot
parent eb22bf2855
commit 96e1ca2713
6 changed files with 218 additions and 81 deletions

View File

@ -544,7 +544,7 @@ int ObTabletDDLUtil::prepare_index_data_desc(ObTablet &tablet,
int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet,
const ObTabletDDLParam &ddl_param,
const ObIArray<const ObDataMacroBlockMeta *> &meta_array,
const ObIArray<ObDDLBlockMeta> &meta_array,
const ObSSTable *first_ddl_sstable,
const ObStorageSchema *storage_schema,
common::ObArenaAllocator &allocator,
@ -568,7 +568,7 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet,
storage_schema,
data_desc))) {
LOG_WARN("prepare data store desc failed", K(ret), K(ddl_param));
} else if (FALSE_IT(macro_block_column_count = meta_array.empty() ? 0 : meta_array.at(0)->get_meta_val().column_count_)) {
} else if (FALSE_IT(macro_block_column_count = meta_array.empty() ? 0 : meta_array.at(0).block_meta_->get_meta_val().column_count_)) {
} else if (meta_array.count() > 0 && OB_FAIL(data_desc.get_col_desc().mock_valid_col_default_checksum_array(macro_block_column_count))) {
LOG_WARN("mock valid column default checksum failed", K(ret), "firt_macro_block_meta", to_cstring(meta_array.at(0)), K(ddl_param));
} else if (OB_FAIL(sstable_index_builder.init(data_desc.get_desc(),
@ -578,17 +578,26 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet,
} else if (OB_FAIL(index_block_rebuilder.init(sstable_index_builder,
false/*need_sort*/,
nullptr/*task_idx*/,
true/*use_absolute_offset*/))) {
ddl_param.table_key_.is_ddl_merge_sstable()/*use_absolute_offset*/))) {
LOG_WARN("fail to alloc index builder", K(ret));
} else if (meta_array.empty()) {
// do nothing
} else {
if (ddl_param.table_key_.is_ddl_merge_sstable()) {
for (int64_t i = 0; OB_SUCC(ret) && i < meta_array.count(); ++i) {
if (OB_FAIL(index_block_rebuilder.append_macro_row(*meta_array.at(i)))) {
const ObDDLBlockMeta &ddl_block_meta = meta_array.at(i);
if (OB_FAIL(index_block_rebuilder.append_macro_row(*ddl_block_meta.block_meta_, ddl_block_meta.end_row_offset_))) {
LOG_WARN("append block meta failed", K(ret), K(i), K(ddl_block_meta));
}
}
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < meta_array.count(); ++i) {
if (OB_FAIL(index_block_rebuilder.append_macro_row(*meta_array.at(i).block_meta_))) {
LOG_WARN("append block meta failed", K(ret), K(i));
}
}
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(index_block_rebuilder.close())) {
LOG_WARN("close index block rebuilder failed", K(ret));
@ -764,13 +773,129 @@ int get_sstables(const ObIArray<ObDDLKVHandle> &frozen_ddl_kvs, const int64_t cg
}
return ret;
}
ObDDLMacroBlockIterator::ObDDLMacroBlockIterator()
: is_inited_(false), sstable_(nullptr), allocator_(nullptr), macro_block_iter_(nullptr), sec_meta_iter_(nullptr)
{
}
ObDDLMacroBlockIterator::~ObDDLMacroBlockIterator()
{
if ((nullptr != macro_block_iter_ || nullptr != sec_meta_iter_) && OB_ISNULL(allocator_)) {
int ret = OB_ERR_SYS;
LOG_ERROR("the iterator is allocated, but allocator is null", K(ret), KP(macro_block_iter_), KP(allocator_));
} else if (nullptr != macro_block_iter_) {
macro_block_iter_->~ObIMacroBlockIterator();
allocator_->free(macro_block_iter_);
macro_block_iter_ = nullptr;
} else if (nullptr != sec_meta_iter_) {
sec_meta_iter_->~ObSSTableSecMetaIterator();
allocator_->free(sec_meta_iter_);
sec_meta_iter_ = nullptr;
}
}
int ObDDLMacroBlockIterator::open(ObSSTable *sstable, const ObDatumRange &query_range, const ObITableReadInfo &read_info, ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(is_inited_)) {
ret = OB_INIT_TWICE;
LOG_WARN("init twice", K(ret), K(is_inited_));
} else if (OB_UNLIKELY(nullptr == sstable || !query_range.is_valid() || !read_info.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(sstable), K(query_range), K(read_info));
} else if (sstable->is_ddl_mem_sstable()) { // ddl mem, scan keybtree
ObDDLMemtable *ddl_memtable = static_cast<ObDDLMemtable *>(sstable);
if (OB_ISNULL(ddl_memtable)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ddl memtable cast failed", K(ret));
} else if (OB_FAIL(ddl_memtable->get_block_meta_tree()->get_keybtree().set_key_range(
ddl_iter_,
ObDatumRowkeyWrapper(&query_range.get_start_key(), &read_info.get_datum_utils()),
query_range.is_left_open(),
ObDatumRowkeyWrapper(&query_range.get_end_key(), &read_info.get_datum_utils()),
query_range.is_right_open(),
INT64_MAX/*version*/))) {
LOG_WARN("ddl memtable locate range failed", K(ret));
}
} else if (sstable->is_ddl_merge_sstable()) { // co ddl partial data, need scan macro block
if (OB_FAIL(sstable->scan_macro_block(
query_range,
read_info,
allocator,
macro_block_iter_,
false/*is_reverse_scan*/,
false/*need_record_micro_info*/,
true/*need_scan_sec_meta*/))) {
LOG_WARN("scan macro block iterator open failed", K(ret));
}
} else {
ObSSTableSecMetaIterator *sec_meta_iter;
if (OB_ISNULL(sec_meta_iter = OB_NEWx(ObSSTableSecMetaIterator, &allocator))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory for sec meta iterator failed", K(ret));
} else if (OB_FAIL(sec_meta_iter->open(query_range, ObMacroBlockMetaType::DATA_BLOCK_META, *sstable, read_info, allocator))) {
LOG_WARN("open sec meta iterator failed", K(ret));
sec_meta_iter->~ObSSTableSecMetaIterator();
allocator.free(sec_meta_iter);
} else {
sec_meta_iter_ = sec_meta_iter;
}
}
if (OB_SUCC(ret)) {
sstable_ = sstable;
allocator_ = &allocator;
is_inited_ = true;
}
return ret;
}
int ObDDLMacroBlockIterator::get_next(ObDataMacroBlockMeta &data_macro_meta, int64_t &end_row_offset)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret), K(is_inited_));
} else if (sstable_->is_ddl_mem_sstable()) {
ObDatumRowkeyWrapper tree_key;
ObBlockMetaTreeValue *tree_value = nullptr;
if (OB_FAIL(ddl_iter_.get_next(tree_key, tree_value))) {
if (OB_ITER_END != ret) {
LOG_WARN("get next tree value failed", K(ret));
}
} else if (OB_FAIL(data_macro_meta.assign(*tree_value->block_meta_))) {
LOG_WARN("assign block meta failed", K(ret));
} else {
end_row_offset = tree_value->co_sstable_row_offset_;
}
} else if (sstable_->is_ddl_merge_sstable()) {
ObMacroBlockDesc block_desc;
block_desc.macro_meta_ = &data_macro_meta;
if (OB_FAIL(macro_block_iter_->get_next_macro_block(block_desc))) {
LOG_WARN("get next macro block failed", K(ret));
} else {
end_row_offset = block_desc.start_row_offset_ + block_desc.row_count_;
}
} else {
if (OB_FAIL(sec_meta_iter_->get_next(data_macro_meta))) {
if (OB_ITER_END != ret) {
LOG_WARN("get data macro meta failed", K(ret));
}
} else {
end_row_offset = -1;
}
}
return ret;
}
// for cg sstable, endkey is end row id, confirm read_info not used
int get_sorted_meta_array(
const ObIArray<ObSSTable *> &sstables,
const ObITableReadInfo &read_info,
ObBlockMetaTree &meta_tree,
ObIAllocator &allocator,
ObArray<const ObDataMacroBlockMeta *> &sorted_metas)
ObArray<ObDDLBlockMeta> &sorted_metas)
{
int ret = OB_SUCCESS;
sorted_metas.reset();
@ -778,27 +903,24 @@ int get_sorted_meta_array(
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(sstables), K(read_info), K(meta_tree));
} else {
SMART_VAR(ObSSTableSecMetaIterator, meta_iter) {
ObDatumRange query_range;
query_range.set_whole_range();
ObDataMacroBlockMeta data_macro_meta;
for (int64_t i = 0; OB_SUCC(ret) && i < sstables.count(); ++i) {
ObSSTable *cur_sstable = sstables.at(i);
ObDDLMacroBlockIterator block_iter;
if (OB_ISNULL(cur_sstable)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(cur_sstable));
} else if (cur_sstable->is_empty()) {
// do nothing, skip
} else if (OB_FAIL(block_iter.open(cur_sstable, query_range, read_info, allocator))) {
LOG_WARN("open macro block iterator failed", K(ret), K(read_info), KPC(cur_sstable));
} else {
meta_iter.reset();
ObDataMacroBlockMeta *copied_meta = nullptr; // copied meta will destruct in the meta tree
if (OB_FAIL(meta_iter.open(query_range,
ObMacroBlockMetaType::DATA_BLOCK_META,
*cur_sstable,
read_info,
allocator))) {
LOG_WARN("sstable secondary meta iterator open failed", K(ret), KPC(cur_sstable), K(read_info));
} else {
int64_t end_row_offset = 0;
while (OB_SUCC(ret)) {
if (OB_FAIL(meta_iter.get_next(data_macro_meta))) {
if (OB_FAIL(block_iter.get_next(data_macro_meta, end_row_offset))) {
if (OB_ITER_END != ret) {
LOG_WARN("get data macro meta failed", K(ret));
} else {
@ -818,7 +940,7 @@ int get_sorted_meta_array(
LOG_WARN("hold macro block failed", K(ret));
} else if (OB_FAIL(data_macro_meta.deep_copy(copied_meta, allocator))) {
LOG_WARN("deep copy macro block meta failed", K(ret));
} else if (OB_FAIL(meta_tree.insert_macro_block(macro_handle, &copied_meta->end_key_, copied_meta))) { // useless co_sstable_row_offset
} else if (OB_FAIL(meta_tree.insert_macro_block(macro_handle, &copied_meta->end_key_, copied_meta, end_row_offset))) {
LOG_WARN("insert meta tree failed", K(ret), K(macro_handle), KPC(copied_meta));
copied_meta->~ObDataMacroBlockMeta();
} else {
@ -827,20 +949,18 @@ int get_sorted_meta_array(
}
}
}
}
LOG_INFO("append meta tree finished", K(ret), "table_key", cur_sstable->get_key(), "data_macro_block_cnt_in_sstable", cur_sstable->get_data_macro_block_count(),
K(meta_tree.get_macro_block_cnt()), "sstable_end_key", OB_ISNULL(copied_meta) ? "NOT_EXIST": to_cstring(copied_meta->end_key_));
}
}
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(meta_tree.get_sorted_meta_array(sorted_metas))) {
LOG_WARN("get sorted meta array failed", K(ret));
} else {
int64_t sstable_checksum = 0;
for (int64_t i = 0; OB_SUCC(ret) && i < sorted_metas.count(); ++i) {
const ObDataMacroBlockMeta *cur_macro_meta = sorted_metas.at(i);
const ObDataMacroBlockMeta *cur_macro_meta = sorted_metas.at(i).block_meta_;
sstable_checksum = ob_crc64_sse42(sstable_checksum, &cur_macro_meta->val_.data_checksum_, sizeof(cur_macro_meta->val_.data_checksum_));
FLOG_INFO("sorted meta array", K(i), "macro_block_id", cur_macro_meta->get_macro_id(), "data_checksum", cur_macro_meta->val_.data_checksum_, K(sstable_checksum), "macro_block_end_key", cur_macro_meta->end_key_);
}
@ -862,7 +982,7 @@ int compact_sstables(
int ret = OB_SUCCESS;
ObArenaAllocator arena("compact_sst", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID());
ObBlockMetaTree meta_tree;
ObArray<const ObDataMacroBlockMeta *> sorted_metas;
ObArray<ObDDLBlockMeta> sorted_metas;
if (OB_FAIL(meta_tree.init(tablet, ddl_param.table_key_, ddl_param.start_scn_, ddl_param.data_format_version_, storage_schema))) {
LOG_WARN("init meta tree failed", K(ret), K(ddl_param));
} else if (OB_FAIL(get_sorted_meta_array(sstables, read_info, meta_tree, arena, sorted_metas))) {

View File

@ -84,6 +84,22 @@ private:
DISALLOW_COPY_AND_ASSIGN(ObDDLTableMergeTask);
};
class ObDDLMacroBlockIterator final
{
public:
ObDDLMacroBlockIterator();
~ObDDLMacroBlockIterator();
int open(blocksstable::ObSSTable *sstable, const blocksstable::ObDatumRange &query_range, const ObITableReadInfo &read_info, ObIAllocator &allocator);
int get_next(blocksstable::ObDataMacroBlockMeta &data_macro_meta, int64_t &end_row_offset);
private:
bool is_inited_;
blocksstable::ObSSTable *sstable_;
ObIAllocator *allocator_;
blocksstable::ObIMacroBlockIterator *macro_block_iter_;
blocksstable::ObSSTableSecMetaIterator *sec_meta_iter_;
blocksstable::DDLBtreeIterator ddl_iter_;
};
class ObTabletDDLUtil
{
@ -101,7 +117,7 @@ public:
static int create_ddl_sstable(
ObTablet &tablet,
const ObTabletDDLParam &ddl_param,
const ObIArray<const blocksstable::ObDataMacroBlockMeta *> &meta_array,
const ObIArray<ObDDLBlockMeta> &meta_array,
const blocksstable::ObSSTable *first_ddl_sstable,
const ObStorageSchema *storage_schema,
common::ObArenaAllocator &allocator,

View File

@ -2751,7 +2751,7 @@ int ObTabletFullDirectLoadMgr::init_ddl_table_store(
ObTableHandleV2 sstable_handle;
ObTabletHandle new_tablet_handle;
ObTablesHandleArray empty_cg_sstable_handles;
ObArray<const ObDataMacroBlockMeta *> empty_meta_array;
ObArray<ObDDLBlockMeta> empty_meta_array;
empty_meta_array.set_attr(ObMemAttr(MTL_ID(), "TblFDL_EMA"));
ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK);

View File

@ -316,7 +316,7 @@ int ObBlockMetaTree::insert_macro_block(const ObDDLMacroHandle &macro_handle,
return ret;
}
int ObBlockMetaTree::get_sorted_meta_array(ObIArray<const ObDataMacroBlockMeta *> &meta_array)
int ObBlockMetaTree::get_sorted_meta_array(ObIArray<ObDDLBlockMeta> &meta_array)
{
int ret = OB_SUCCESS;
meta_array.reset();
@ -352,8 +352,13 @@ int ObBlockMetaTree::get_sorted_meta_array(ObIArray<const ObDataMacroBlockMeta *
} else if (((uint64_t)(tree_value) & 7ULL) != 0) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("invalid btree value", K(ret), K(tree_value));
} else if (OB_FAIL(meta_array.push_back(tree_value->block_meta_))) {
LOG_WARN("push back block meta failed", K(ret), K(*tree_value->block_meta_));
} else {
ObDDLBlockMeta ddl_block_meta;
ddl_block_meta.block_meta_ = tree_value->block_meta_;
ddl_block_meta.end_row_offset_ = tree_value->co_sstable_row_offset_;
if (OB_FAIL(meta_array.push_back(ddl_block_meta))) {
LOG_WARN("push back block meta failed", K(ret), K(ddl_block_meta));
}
}
}
if (OB_SUCC(ret)) {
@ -862,20 +867,6 @@ void ObDDLMemtable::set_scn_range(
key_.scn_range_.end_scn_ = end_scn;
}
int ObDDLMemtable::get_sorted_meta_array(
ObIArray<const blocksstable::ObDataMacroBlockMeta *> &meta_array)
{
int ret = OB_SUCCESS;
meta_array.reset();
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret), KP(this));
} else if (OB_FAIL(block_meta_tree_.get_sorted_meta_array(meta_array))) {
LOG_WARN("get sorted array failed", K(ret));
}
return ret;
}
int ObDDLMemtable::init_ddl_index_iterator(const blocksstable::ObStorageDatumUtils *datum_utils,
const bool is_reverse_scan,
blocksstable::ObDDLIndexBlockRowIterator *ddl_kv_index_iter)

View File

@ -63,6 +63,16 @@ public:
blocksstable::ObIndexBlockRowHeader header_;
};
struct ObDDLBlockMeta
{
public:
ObDDLBlockMeta() : block_meta_(nullptr), end_row_offset_(-1) {}
TO_STRING_KV(KPC(block_meta_), K(end_row_offset_));
public:
const blocksstable::ObDataMacroBlockMeta *block_meta_;
int64_t end_row_offset_;
};
class ObBlockMetaTree
{
typedef keybtree::ObKeyBtree<blocksstable::ObDatumRowkeyWrapper, ObBlockMetaTreeValue *> KeyBtree;
@ -81,7 +91,7 @@ public:
int insert_macro_block(const ObDDLMacroHandle &macro_handle,
const blocksstable::ObDatumRowkey *rowkey,
const blocksstable::ObDataMacroBlockMeta *meta,
const int64_t co_sstable_row_offset = 0);
const int64_t co_sstable_row_offset);
int locate_key(const blocksstable::ObDatumRange &range,
const blocksstable::ObStorageDatumUtils &datum_utils,
blocksstable::DDLBtreeIterator &iter,
@ -102,11 +112,12 @@ public:
ObBlockMetaTreeValue *&tree_value) const;
int64_t get_macro_block_cnt() const { return macro_blocks_.count(); }
int get_last_rowkey(const blocksstable::ObDatumRowkey *&last_rowkey);
int get_sorted_meta_array(ObIArray<const blocksstable::ObDataMacroBlockMeta *> &meta_array);
int get_sorted_meta_array(ObIArray<ObDDLBlockMeta> &meta_array);
int exist(const blocksstable::ObDatumRowkey *rowkey, bool &is_exist);
const blocksstable::ObDataStoreDesc &get_data_desc() const { return data_desc_.get_desc(); }
bool is_valid() const { return is_inited_; }
int64_t get_memory_used() const;
const KeyBtree &get_keybtree() const { return block_tree_; }
TO_STRING_KV(K(is_inited_), K(macro_blocks_.count()), K(arena_.total()), K(data_desc_));
private:
@ -169,8 +180,6 @@ public:
void set_scn_range(
const share::SCN &start_scn,
const share::SCN &end_scn);
int get_sorted_meta_array(
ObIArray<const blocksstable::ObDataMacroBlockMeta *> &meta_array);
const ObBlockMetaTree *get_block_meta_tree() { return &block_meta_tree_; }
int init_ddl_index_iterator(const blocksstable::ObStorageDatumUtils *datum_utils,
const bool is_reverse_scan,

View File

@ -133,6 +133,7 @@ public:
OB_INLINE bool is_mini_sstable() const { return ObITable::is_mini_sstable(table_type_); }
OB_INLINE bool is_major_sstable() const { return ObITable::is_major_sstable(table_type_) || ObITable::is_meta_major_sstable(table_type_); }
OB_INLINE bool is_major_or_ddl_merge_sstable() const { return is_major_sstable() || ObITable::is_ddl_merge_sstable(table_type_); }
OB_INLINE bool is_ddl_merge_sstable() const { return ObITable::is_ddl_merge_sstable(table_type_); }
OB_INLINE bool is_meta_major_sstable() const { return ObITable::is_meta_major_sstable(table_type_); }
OB_INLINE bool is_multi_version_table() const { return ObITable::is_multi_version_table(table_type_); }
OB_INLINE bool is_ddl_sstable() const { return ObITable::is_ddl_sstable(table_type_); }