2164 lines
86 KiB
C++
2164 lines
86 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#include "ob_partition_range_spliter.h"
|
|
#include "storage/tablet/ob_table_store_util.h"
|
|
#include "storage/blocksstable/ob_sstable_sec_meta_iterator.h"
|
|
#include "storage/memtable/ob_memtable.h"
|
|
#include "compaction/ob_tablet_merge_ctx.h"
|
|
#include "share/rc/ob_tenant_base.h"
|
|
#include "tx/ob_trans_service.h"
|
|
#include "access/ob_multiple_scan_merge.h"
|
|
|
|
namespace oceanbase
|
|
{
|
|
using namespace common;
|
|
using namespace share::schema;
|
|
using namespace share;
|
|
using namespace blocksstable;
|
|
namespace storage
|
|
{
|
|
|
|
ObMacroEndkeyIterator::ObMacroEndkeyIterator()
|
|
: endkeys_(),
|
|
cur_idx_(0),
|
|
iter_idx_(0),
|
|
is_inited_(false)
|
|
{
|
|
|
|
}
|
|
|
|
void ObMacroEndkeyIterator::reset()
|
|
{
|
|
endkeys_.reset();
|
|
cur_idx_ = 0;
|
|
iter_idx_ = 0;
|
|
is_inited_ = false;
|
|
}
|
|
|
|
int ObMacroEndkeyIterator::open(
|
|
const int64_t skip_cnt,
|
|
const int64_t iter_idx,
|
|
ObSSTable &sstable,
|
|
ObRangeSplitInfo &range_info)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (IS_INIT) {
|
|
ret = OB_INIT_TWICE;
|
|
STORAGE_LOG(WARN, "ObMacroEndkeyIterator init twice", K(ret), K(*this));
|
|
} else if (OB_UNLIKELY(
|
|
iter_idx < 0
|
|
|| skip_cnt <= 0
|
|
|| !sstable.is_valid()
|
|
|| !range_info.is_valid()
|
|
|| nullptr == range_info.key_allocator_)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to open ObMacroEndkeyIterator",
|
|
K(ret), K(skip_cnt), K(iter_idx), K(sstable), K(range_info));
|
|
} else {
|
|
ObIAllocator *key_allocator = range_info.key_allocator_;
|
|
ObSSTableSecMetaIterator *macro_iter = nullptr;
|
|
ObDatumRowkey sstable_endkey;
|
|
ObDataMacroBlockMeta macro_meta;
|
|
ObArenaAllocator temp_allocator;
|
|
ObDatumRange datum_range;
|
|
const ObTableReadInfo *index_read_info = range_info.index_read_info_;
|
|
const ObIArray<share::schema::ObColDesc> &col_descs = index_read_info->get_columns_desc();
|
|
const ObStorageDatumUtils &datum_utils = index_read_info->get_datum_utils();
|
|
int cmp_ret = 0;
|
|
// Sample start from a specified point
|
|
if (OB_FAIL(datum_range.from_range(*range_info.store_range_, temp_allocator))) {
|
|
STORAGE_LOG(WARN, "Failed to transfer store range", K(ret), K(range_info));
|
|
} else if (OB_FAIL(sstable.get_last_rowkey(*index_read_info, temp_allocator, sstable_endkey))) {
|
|
STORAGE_LOG(WARN, "Failed to get last rowkey from sstable");
|
|
} else if (OB_FAIL(sstable_endkey.compare(datum_range.get_start_key(), datum_utils, cmp_ret))) {
|
|
STORAGE_LOG(WARN, "Failed to compare sstable endkey with range start key",
|
|
K(ret), K(datum_range), K(sstable_endkey), K(datum_utils));
|
|
} else if (cmp_ret < 0) {
|
|
// sstable not in range
|
|
STORAGE_LOG(DEBUG, "Skip empty range", K(ret), K(range_info));
|
|
} else if (OB_FAIL(sstable.scan_secondary_meta(*key_allocator, datum_range,
|
|
*index_read_info, DATA_BLOCK_META, macro_iter, false, skip_cnt))) {
|
|
STORAGE_LOG(WARN, "Fail to scan secondary meta", K(ret), K(range_info));
|
|
} else {
|
|
ObStoreRowkey endkey;
|
|
while (OB_SUCC(ret)) {
|
|
if (OB_FAIL(macro_iter->get_next(macro_meta))) {
|
|
if (OB_LIKELY(OB_ITER_END == ret)) {
|
|
ret = OB_SUCCESS;
|
|
break;
|
|
} else {
|
|
STORAGE_LOG(WARN, "Fail to get next macro meta", K(ret));
|
|
}
|
|
} else if (OB_FAIL(build_rowkey(macro_meta, col_descs, *key_allocator, endkey))) {
|
|
STORAGE_LOG(WARN, "Fail to build new rowkey", K(ret), K(macro_meta));
|
|
} else if (OB_UNLIKELY(!endkey.is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid endkey", K(ret), K(endkey));
|
|
} else if (OB_FAIL(endkeys_.push_back(endkey))) {
|
|
STORAGE_LOG(WARN, "Fail to push endkey in searray", K(ret), K(endkey));
|
|
} else {
|
|
endkey.reset();
|
|
}
|
|
}
|
|
if (OB_NOT_NULL(macro_iter)) {
|
|
macro_iter->~ObSSTableSecMetaIterator();
|
|
}
|
|
}
|
|
|
|
if (OB_SUCC(ret)) {
|
|
cur_idx_ = 0;
|
|
iter_idx_ = iter_idx;
|
|
is_inited_ = true;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObMacroEndkeyIterator::get_endkey_cnt(int64_t &endkey_cnt) const
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObMacroEndkeyIterator is not init", K(ret));
|
|
} else {
|
|
endkey_cnt = endkeys_.count();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObMacroEndkeyIterator::get_next_macro_block_endkey(ObMacroEndkey &endkey)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObMacroEndkeyIterator is not init", K(ret));
|
|
} else if (cur_idx_ >= endkeys_.count()) {
|
|
ret = OB_ITER_END;
|
|
} else {
|
|
endkey.reset();
|
|
endkey.rowkey_ = &endkeys_[cur_idx_];
|
|
endkey.iter_idx_ = iter_idx_;
|
|
++cur_idx_;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObMacroEndkeyIterator::build_rowkey(
|
|
const ObDataMacroBlockMeta ¯o_meta,
|
|
const common::ObIArray<share::schema::ObColDesc> &col_descs,
|
|
ObIAllocator &allocator,
|
|
ObStoreRowkey &new_rowkey)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObDatumRowkey rowkey;
|
|
ObStoreRowkey tmp_rowkey;
|
|
const int64_t rowkey_col_cnt =
|
|
macro_meta.val_.rowkey_count_ - ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt();
|
|
if (OB_UNLIKELY(!macro_meta.is_valid())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid data macro meta", K(ret), K(macro_meta));
|
|
} else if (OB_FAIL(rowkey.assign(macro_meta.end_key_.datums_, rowkey_col_cnt))) {
|
|
STORAGE_LOG(WARN, "Fail to construct src rowkey", K(ret), K(macro_meta));
|
|
} else if (OB_FAIL(rowkey.to_store_rowkey(col_descs, allocator, tmp_rowkey))) {
|
|
STORAGE_LOG(WARN, "Fail to transfer store rowkey", K(ret), K(rowkey));
|
|
} else if (OB_FAIL(tmp_rowkey.deep_copy(new_rowkey, allocator))) {
|
|
STORAGE_LOG(WARN, "Fail to deep copy rowkey", K(ret), K(tmp_rowkey));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
ObPartitionParallelRanger::ObPartitionParallelRanger(ObArenaAllocator &allocator)
|
|
: store_range_(nullptr),
|
|
endkey_iters_(),
|
|
allocator_(allocator),
|
|
comparor_(),
|
|
range_heap_(comparor_),
|
|
last_macro_endkey_(nullptr),
|
|
total_endkey_cnt_(0),
|
|
sample_cnt_(0),
|
|
parallel_target_count_(0),
|
|
is_inited_(false)
|
|
{
|
|
}
|
|
|
|
ObPartitionParallelRanger::~ObPartitionParallelRanger()
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void ObPartitionParallelRanger::reset()
|
|
{
|
|
ObMacroEndkeyIterator *endkey_iter = nullptr;
|
|
for (int64_t i = 0; i < endkey_iters_.count(); i++) {
|
|
if (OB_NOT_NULL(endkey_iter = endkey_iters_.at(i))) {
|
|
endkey_iter->~ObMacroEndkeyIterator();
|
|
allocator_.free(endkey_iter);
|
|
}
|
|
}
|
|
store_range_ = nullptr;
|
|
endkey_iters_.reset();
|
|
range_heap_.reset();
|
|
comparor_.reset();
|
|
last_macro_endkey_ = nullptr;
|
|
total_endkey_cnt_ = 0;
|
|
sample_cnt_ = 0;
|
|
parallel_target_count_ = 0;
|
|
is_inited_ = false;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::init(ObRangeSplitInfo &range_info)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (IS_INIT) {
|
|
ret = OB_INIT_TWICE;
|
|
STORAGE_LOG(WARN, "ObPartitionParallelRanger is inited twice", K(ret));
|
|
} else if (OB_UNLIKELY(!range_info.is_valid())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid arugment to init ObPartitionParallelRanger", K(ret), K(range_info));
|
|
} else if (parallel_target_count_ == 1) {
|
|
// construct single range
|
|
} else if (OB_FAIL(calc_sample_count(range_info))) {
|
|
STORAGE_LOG(WARN, "Failed to calculate sample count", K(ret), K(range_info));
|
|
} else if (sample_cnt_ == 0) {
|
|
// no enough macroblock count, construct single range
|
|
} else if (OB_FAIL(init_macro_iters(range_info))) {
|
|
STORAGE_LOG(WARN, "Failed to init macro iters", K(ret), K(range_info));
|
|
} else if (OB_FAIL(build_parallel_range_heap())) {
|
|
STORAGE_LOG(WARN, "Failed to build parallel range heap", K(ret));
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
store_range_ = range_info.store_range_;
|
|
parallel_target_count_ = range_info.parallel_target_count_;
|
|
is_inited_ = true;
|
|
STORAGE_LOG(DEBUG, "succ to init partition parallel ranger", K(*this));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::calc_sample_count(ObRangeSplitInfo &range_info)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
// decide sample count due to the largest minor sstable
|
|
if (OB_UNLIKELY(0 == range_info.parallel_target_count_
|
|
|| range_info.max_macro_block_count_ <= 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid range split info", K(ret), K(range_info));
|
|
} else {
|
|
sample_cnt_ = range_info.max_macro_block_count_ / range_info.parallel_target_count_;
|
|
if (sample_cnt_ > 2) {
|
|
sample_cnt_ /= 2;
|
|
}
|
|
STORAGE_LOG(DEBUG, "finish calc sample cnt", K(range_info), K_(sample_cnt));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::init_macro_iters(ObRangeSplitInfo &range_info)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (OB_UNLIKELY(sample_cnt_ <= 0)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpecte sample count to init macro endkey iters", K(ret), K_(sample_cnt));
|
|
} else {
|
|
ObMacroEndkeyIterator *endkey_iter = nullptr;
|
|
total_endkey_cnt_ = 0;
|
|
int64_t iter_idx = 0;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < range_info.tables_->count(); i++) {
|
|
int64_t sample_count = sample_cnt_;
|
|
int64_t endkey_cnt = 0;
|
|
void *buf = nullptr;
|
|
ObITable *table = nullptr;
|
|
if (OB_FAIL(ret) || sample_count <= 0) {
|
|
} else if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObMacroEndkeyIterator)))) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
STORAGE_LOG(WARN, "Failed to alloc memory for endkey iter", K(ret));
|
|
} else if (FALSE_IT(endkey_iter = new (buf) ObMacroEndkeyIterator())) {
|
|
} else if (OB_ISNULL(table = range_info.tables_->at(i))) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected null pointer to table", K(ret), KP(table));
|
|
} else if (OB_UNLIKELY(!table->is_sstable())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected table type", K(ret), KPC(table));
|
|
} else if (OB_FAIL(endkey_iter->open(
|
|
sample_count, iter_idx, *(static_cast<ObSSTable *>(table)), range_info))) {
|
|
STORAGE_LOG(WARN, "Failed to scan macro block in sstable",
|
|
K(ret), KP(table), K(sample_count), K(iter_idx));
|
|
} else if (OB_FAIL(endkey_iters_.push_back(endkey_iter))) {
|
|
STORAGE_LOG(WARN, "Failed to push back macro block iter", K(ret));
|
|
} else if (OB_FAIL(endkey_iter->get_endkey_cnt(endkey_cnt))) {
|
|
STORAGE_LOG(WARN, "Failed to get endkey count from ite", K(ret));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "succ to init sample macro iter",
|
|
KP(table), K_(sample_cnt), K(endkey_cnt), K(iter_idx));
|
|
// last macro block should be ignored
|
|
total_endkey_cnt_ += endkey_cnt;
|
|
iter_idx++;
|
|
}
|
|
}
|
|
|
|
if (OB_FAIL(ret)) {
|
|
for (int64_t i = 0; i < endkey_iters_.count(); i++) {
|
|
if (OB_NOT_NULL(endkey_iter = endkey_iters_.at(i))) {
|
|
endkey_iter->~ObMacroEndkeyIterator();
|
|
allocator_.free(endkey_iter);
|
|
}
|
|
}
|
|
endkey_iters_.reset();
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::build_parallel_range_heap()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(endkey_iters_.count() <= 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to build parallel range heap", K(ret), K_(endkey_iters));
|
|
} else {
|
|
ObMacroEndkeyIterator *endkey_iter = nullptr;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < endkey_iters_.count(); i++) {
|
|
ObMacroEndkey endkey;
|
|
if (OB_ISNULL(endkey_iter = endkey_iters_.at(i))) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected null macro block iter", K(ret), K(i));
|
|
} else if (OB_FAIL(endkey_iter->get_next_macro_block_endkey(endkey))) {
|
|
if (OB_ITER_END == ret) {
|
|
ret = OB_SUCCESS;
|
|
} else {
|
|
STORAGE_LOG(WARN, "Failed to get next macro block endkey", K(ret), K(i));
|
|
}
|
|
} else if (OB_UNLIKELY(!endkey.is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid macro block endkey", K(ret), K(endkey));
|
|
} else if (OB_FAIL(range_heap_.push(endkey))) {
|
|
STORAGE_LOG(WARN, "Failed to push macro endkey to merge heap", K(ret), K(i), K(endkey));
|
|
} else if (OB_FAIL(comparor_.get_error_code())) {
|
|
STORAGE_LOG(WARN, "Failed to compare macro endkeys", K(ret));
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::get_next_macro_endkey(ObStoreRowkey &rowkey)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObPartitionParallelRanger is not inited", K(ret), K(*this));
|
|
} else {
|
|
ObMacroEndkey endkey;
|
|
ObMacroEndkeyIterator *endkey_iter = nullptr;
|
|
// already get last row from merge heap
|
|
if (OB_NOT_NULL(last_macro_endkey_) && last_macro_endkey_->is_valid()) {
|
|
if (OB_ISNULL(endkey_iter = endkey_iters_.at(last_macro_endkey_->iter_idx_))) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected null macro block iter", K(ret), K_(last_macro_endkey));
|
|
} else if (OB_UNLIKELY(last_macro_endkey_->iter_idx_ != endkey_iter->iter_idx_)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected iter idx", K(ret), K(*last_macro_endkey_), K(*endkey_iter));
|
|
} else if (OB_FAIL(endkey_iter->get_next_macro_block_endkey(endkey))) {
|
|
if (OB_ITER_END != ret) {
|
|
STORAGE_LOG(WARN, "Failed to get next macro block endkey", K(ret));
|
|
} else if (OB_FAIL(range_heap_.pop())) {
|
|
STORAGE_LOG(WARN, "Failed to pop the last macro endkey", K(ret));
|
|
} else if (OB_FAIL(comparor_.get_error_code())) {
|
|
STORAGE_LOG(WARN, "Failed copmare macro endkeys", K(ret));
|
|
}
|
|
} else if (OB_UNLIKELY(!endkey.is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid macro block endkey", K(ret), K(endkey));
|
|
} else if (OB_FAIL(range_heap_.replace_top(endkey))) {
|
|
STORAGE_LOG(WARN, "Failed to replace top of the merge heap", K(ret), K(endkey));
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
if (range_heap_.empty()) {
|
|
ret = OB_ITER_END;
|
|
last_macro_endkey_ = nullptr;
|
|
} else if (OB_FAIL(range_heap_.top(last_macro_endkey_))) {
|
|
STORAGE_LOG(WARN, "Failed to get top macro endkey from heap", K(ret), KPC(last_macro_endkey_));
|
|
} else if (OB_ISNULL(last_macro_endkey_) || OB_UNLIKELY(!last_macro_endkey_->is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid macro endkey", K(ret), KPC(last_macro_endkey_));
|
|
} else if (OB_FAIL(rowkey.assign(
|
|
last_macro_endkey_->rowkey_->get_obj_ptr(),
|
|
last_macro_endkey_->rowkey_->get_obj_cnt()))) {
|
|
STORAGE_LOG(WARN, "Failed to assign rowkey", K(ret), KPC(last_macro_endkey_));
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::check_rowkey_equal(const ObStoreRowkey &rowkey1, const ObStoreRowkey &rowkey2, bool &equal)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
equal = false;
|
|
if (OB_UNLIKELY(!rowkey1.is_valid() || !rowkey2.is_valid())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to check rowkey equal", K(ret), K(rowkey1), K(rowkey2));
|
|
} else if (rowkey1.is_min()) {
|
|
equal = rowkey2.is_min();
|
|
} else if (rowkey1.is_max()) {
|
|
equal = rowkey2.is_max();
|
|
} else if (0 == rowkey1.compare(rowkey2)) {
|
|
equal = true;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
int ObPartitionParallelRanger::check_continuous(ObIArray<ObStoreRange> &range_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(range_array.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to check continuous for range spliter", K(ret), K(range_array));
|
|
} else {
|
|
bool equal = false;
|
|
ObStoreRowkey macro_endkey = store_range_->get_start_key();
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < range_array.count(); i++) {
|
|
if (OB_UNLIKELY(!range_array.at(i).is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid range", K(ret), K(i), K(range_array.at(i)));
|
|
} else if (OB_FAIL(check_rowkey_equal(range_array.at(i).get_start_key(), macro_endkey, equal))) {
|
|
STORAGE_LOG(WARN, "Failed to check rowkey equal", K(ret), K(i));
|
|
} else if (!equal) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected range array which is not continuous", K(ret), K(i),
|
|
K(range_array.at(i)), K(macro_endkey), KPC(store_range_));
|
|
} else {
|
|
macro_endkey = range_array.at(i).get_end_key();
|
|
}
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else if (OB_FAIL(check_rowkey_equal(store_range_->get_end_key(), macro_endkey, equal))) {
|
|
STORAGE_LOG(WARN, "Failed to check rowkey equal", K(ret));
|
|
} else if (!equal) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected range array which is not continuous", K(ret), K(macro_endkey), KPC(store_range_));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::split_ranges(
|
|
const bool for_compaction,
|
|
ObIAllocator &allocator,
|
|
ObIArray<ObStoreRange> &range_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObStoreRange split_range;
|
|
|
|
range_array.reset();
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObPartitionParallelRanger is not init", K(ret));
|
|
} else if (sample_cnt_ == 0 || parallel_target_count_ == 1
|
|
|| parallel_target_count_ > total_endkey_cnt_ + 1) {
|
|
// cannot affort specified parallel target count, back into single whole range
|
|
if (OB_FAIL(construct_single_range(allocator,
|
|
store_range_->get_start_key(),
|
|
store_range_->get_end_key(),
|
|
store_range_->get_border_flag(),
|
|
for_compaction,
|
|
split_range))) {
|
|
STORAGE_LOG(WARN, "failed to construct single range", K(ret), KPC(store_range_));
|
|
} else if (OB_FAIL(range_array.push_back(split_range))) {
|
|
STORAGE_LOG(WARN, "failed to push back merge range", K(ret), K(split_range));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "build single range", K(split_range), K_(sample_cnt), K_(parallel_target_count),
|
|
K_(total_endkey_cnt));
|
|
}
|
|
} else {
|
|
ObStoreRowkey macro_endkey;
|
|
ObStoreRowkey last_macro_endkey = store_range_->get_start_key();
|
|
const int64_t range_skip_cnt = total_endkey_cnt_ / parallel_target_count_;
|
|
const int64_t endkey_left_cnt = total_endkey_cnt_ % parallel_target_count_;
|
|
int64_t iter_cnt = 0;
|
|
int64_t range_skip_revise_cnt = endkey_left_cnt > 0 ? 1 : 0;
|
|
ObBorderFlag border_flag;
|
|
border_flag.set_data(store_range_->get_border_flag().get_data());
|
|
border_flag.set_inclusive_end();
|
|
|
|
STORAGE_LOG(DEBUG, "start to iter endkeys to split range", KPC(store_range_), K_(total_endkey_cnt),
|
|
K_(parallel_target_count), K(range_skip_cnt), K(endkey_left_cnt), K(range_skip_revise_cnt));
|
|
|
|
while (OB_SUCC(ret) && OB_SUCC(get_next_macro_endkey(macro_endkey))) {
|
|
if (macro_endkey.compare(store_range_->get_end_key()) >= 0) {
|
|
// meet endkey which larger than endkey of the store_range, break
|
|
break;
|
|
} else if (++iter_cnt < range_skip_cnt + range_skip_revise_cnt) {
|
|
// too many split ranges, need skip
|
|
} else if (macro_endkey.compare(last_macro_endkey) <= 0) {
|
|
// duplicate rowkey due to we change last start key with max trans version
|
|
// continue
|
|
} else if (OB_FAIL(construct_single_range(
|
|
allocator, last_macro_endkey, macro_endkey, border_flag, for_compaction, split_range))) {
|
|
STORAGE_LOG(WARN, "failed to construct single range",
|
|
K(ret), K(last_macro_endkey), K(macro_endkey));
|
|
} else if (OB_FAIL(range_array.push_back(split_range))) {
|
|
STORAGE_LOG(WARN, "failed to push back merge range", K(ret), K(split_range));
|
|
} else {
|
|
border_flag.unset_inclusive_start();
|
|
split_range.reset();
|
|
range_skip_revise_cnt = range_array.count() < endkey_left_cnt ? 1 : 0;
|
|
iter_cnt = 0;
|
|
last_macro_endkey = macro_endkey;
|
|
if (range_array.count() == parallel_target_count_ - 1) {
|
|
// enough ranges break and add the last range with max value
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (OB_FAIL(ret) && OB_ITER_END != ret) {
|
|
STORAGE_LOG(WARN, "Failed to get next macro endkey", K(ret), K(macro_endkey));
|
|
} else {
|
|
ret = OB_SUCCESS;
|
|
macro_endkey = store_range_->get_end_key();
|
|
if (!store_range_->get_border_flag().inclusive_end()) {
|
|
border_flag.unset_inclusive_end();
|
|
}
|
|
if (OB_FAIL(construct_single_range(
|
|
allocator, last_macro_endkey, macro_endkey, border_flag, for_compaction, split_range))) {
|
|
STORAGE_LOG(WARN, "Failed to construct single range",
|
|
K(ret), K(last_macro_endkey), K(macro_endkey));
|
|
} else if (OB_FAIL(range_array.push_back(split_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back merge range", K(ret), K(split_range));
|
|
}
|
|
}
|
|
|
|
//check all ranges continuous
|
|
if (OB_SUCC(ret) && OB_FAIL(check_continuous(range_array))) {
|
|
STORAGE_LOG(WARN, "Failed to check range array continuous", K(ret), K(range_array));
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::build_new_rowkey(const ObStoreRowkey &rowkey,
|
|
const bool for_compaction,
|
|
ObIAllocator &allocator,
|
|
ObStoreRowkey &new_rowkey)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(!rowkey.is_valid())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to build new rowkey", K(ret), K(rowkey));
|
|
} else {
|
|
const int64_t extra_rowkey_cnt =
|
|
for_compaction ? ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt() : 0;
|
|
const int64_t rowkey_col_cnt = rowkey.get_obj_cnt() + extra_rowkey_cnt;
|
|
const int64_t total_size = rowkey.get_deep_copy_size() + sizeof(ObObj) * extra_rowkey_cnt;
|
|
char *ptr = nullptr;
|
|
if (OB_ISNULL(ptr = reinterpret_cast<char *>(allocator.alloc(total_size)))) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
STORAGE_LOG(WARN, "Failed to allocate memory", K(ret), K(total_size));
|
|
} else {
|
|
ObObj *obj_ptr = reinterpret_cast<ObObj *>(ptr);
|
|
int64_t pos = sizeof(ObObj) * rowkey_col_cnt;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < rowkey.get_obj_cnt(); i++) {
|
|
if (OB_FAIL(obj_ptr[i].deep_copy(rowkey.get_obj_ptr()[i], ptr, total_size, pos))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy object", K(ret), K(i), K(rowkey), K(total_size), K(pos));
|
|
}
|
|
}
|
|
for (int64_t i = rowkey.get_obj_cnt(); OB_SUCC(ret) && i < rowkey_col_cnt; i++) {
|
|
obj_ptr[i].set_max_value();
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
if (OB_FAIL(new_rowkey.assign(obj_ptr, rowkey_col_cnt))) {
|
|
STORAGE_LOG(WARN, "Failed to assign new rowkey", K(ret));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionParallelRanger::construct_single_range(ObIAllocator &allocator,
|
|
const ObStoreRowkey &start_key,
|
|
const ObStoreRowkey &end_key,
|
|
const ObBorderFlag &border_flag,
|
|
const bool for_compaction,
|
|
ObStoreRange &range)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (end_key.compare(start_key) < 0) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid keys to construct range", K(ret), K(start_key), K(end_key));
|
|
} else if (start_key.is_min()) {
|
|
range.get_start_key().set_min();
|
|
} else if (OB_FAIL(build_new_rowkey(start_key, for_compaction, allocator, range.get_start_key()))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy macro start endkey", K(ret), K(start_key));
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else if (end_key.is_max()) {
|
|
range.get_end_key().set_max();
|
|
} else if (OB_FAIL(build_new_rowkey(end_key, for_compaction, allocator, range.get_end_key()))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy macro end endkey", K(ret), K(start_key));
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
range.set_border_flag(border_flag);
|
|
if (start_key.is_min()) {
|
|
range.set_left_open();
|
|
}
|
|
if (end_key.is_max()) {
|
|
range.set_right_open();
|
|
}
|
|
if (OB_NOT_NULL(store_range_)) {
|
|
range.set_table_id(store_range_->get_table_id());
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
ObRangeSplitInfo::ObRangeSplitInfo()
|
|
: store_range_(nullptr),
|
|
index_read_info_(nullptr),
|
|
tables_(nullptr),
|
|
total_size_(0),
|
|
parallel_target_count_(1),
|
|
max_macro_block_count_(0),
|
|
key_allocator_(nullptr),
|
|
is_sstable_(false)
|
|
{}
|
|
|
|
ObRangeSplitInfo::~ObRangeSplitInfo()
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void ObRangeSplitInfo::reset()
|
|
{
|
|
store_range_ = nullptr;
|
|
index_read_info_ = nullptr;
|
|
tables_ = nullptr;
|
|
total_size_ = 0;
|
|
parallel_target_count_ = 1;
|
|
max_macro_block_count_ = 0;
|
|
key_allocator_ = nullptr;
|
|
is_sstable_ = false;
|
|
}
|
|
|
|
ObPartitionRangeSpliter::ObPartitionRangeSpliter()
|
|
: allocator_(),
|
|
parallel_ranger_(allocator_)
|
|
{
|
|
}
|
|
|
|
ObPartitionRangeSpliter::~ObPartitionRangeSpliter()
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void ObPartitionRangeSpliter::reset()
|
|
{
|
|
parallel_ranger_.reset();
|
|
allocator_.reset();
|
|
}
|
|
|
|
int ObPartitionRangeSpliter::get_range_split_info(ObIArray<ObITable *> &tables,
|
|
const ObTableReadInfo &index_read_info,
|
|
const ObStoreRange &store_range,
|
|
ObRangeSplitInfo &range_info)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
range_info.reset();
|
|
|
|
if (OB_UNLIKELY(tables.empty() || !store_range.is_valid())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to init ObPartitionRangeSpliter", K(ret), K(tables),
|
|
K(store_range));
|
|
} else {
|
|
// build range paras
|
|
range_info.store_range_ = &store_range;
|
|
range_info.tables_ = &tables;
|
|
bool is_sstable = false;
|
|
int64_t size = 0;
|
|
int64_t macro_block_cnt = 0;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < tables.count(); i++) {
|
|
ObITable *table = tables.at(i);
|
|
macro_block_cnt = 0;
|
|
if (OB_ISNULL(table)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid null table pointer", K(ret), KP(table));
|
|
} else if (i == 0) {
|
|
is_sstable = table->is_sstable();
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else if (OB_UNLIKELY(0 != i && !is_sstable)) {
|
|
// Multi memtable
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected multi memtable range info",
|
|
K(ret), K(range_info), KPC(table), K(is_sstable), K(i));
|
|
} else if (OB_UNLIKELY(!is_sstable && table->is_sstable())) {
|
|
// Memtable/sstable mixed up
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected memtable and sstable mixed up",
|
|
K(ret), K(range_info), KPC(table), K(is_sstable), K(i));
|
|
} else if (OB_FAIL(get_single_range_info(
|
|
*range_info.store_range_, index_read_info, table, size, macro_block_cnt))) {
|
|
STORAGE_LOG(WARN, "Failed to get single range info", K(ret), K(i), KPC(table));
|
|
} else {
|
|
range_info.total_size_ += size;
|
|
range_info.index_read_info_ = &index_read_info;
|
|
range_info.max_macro_block_count_ = MAX(macro_block_cnt, range_info.max_macro_block_count_);
|
|
}
|
|
STORAGE_LOG(DEBUG, "get range para from one table",
|
|
K(ret), K(i), KP(table), K(size), K(macro_block_cnt));
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
range_info.is_sstable_ = is_sstable;
|
|
}
|
|
STORAGE_LOG(DEBUG, "Get range split info", K(ret), K(range_info));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionRangeSpliter::get_single_range_info(const ObStoreRange &store_range,
|
|
const ObTableReadInfo &index_read_info,
|
|
ObITable *table,
|
|
int64_t &total_size,
|
|
int64_t ¯o_block_cnt)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_ISNULL(table)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid table pointer", K(ret), KP(table));
|
|
} else if (!table->is_sstable()) {
|
|
memtable::ObMemtable *memtable = static_cast<memtable::ObMemtable *>(table);
|
|
int64_t row_count = 0;
|
|
if (OB_FAIL(memtable->estimate_phy_size(&store_range.get_start_key(),
|
|
&store_range.get_end_key(),
|
|
total_size,
|
|
row_count))) {
|
|
STORAGE_LOG(WARN, "Failed to get single range info from memtable", K(ret), K(store_range));
|
|
}
|
|
} else {
|
|
ObSSTable *sstable = static_cast<ObSSTable *>(table);
|
|
if (store_range.is_whole_range()) {
|
|
total_size = sstable->get_meta().get_basic_meta().occupy_size_;
|
|
macro_block_cnt = sstable->get_meta().get_basic_meta().data_macro_block_count_;
|
|
} else if (0 == sstable->get_meta().get_basic_meta().data_macro_block_count_) {
|
|
total_size = 0;
|
|
macro_block_cnt = 0;
|
|
} else {
|
|
ObArenaAllocator temp_allocator;
|
|
ObDatumRange datum_range;
|
|
ObDatumRowkey sstable_endkey;
|
|
total_size = 0;
|
|
macro_block_cnt = 0;
|
|
ObSSTableSecMetaIterator *macro_meta_iter = nullptr;
|
|
ObDataMacroBlockMeta macro_meta;
|
|
const ObStorageDatumUtils &datum_utils = index_read_info.get_datum_utils();
|
|
int cmp_ret = 0;
|
|
if (OB_FAIL(datum_range.from_range(store_range, temp_allocator))) {
|
|
STORAGE_LOG(WARN, "Failed to transfer store range", K(ret), K(store_range));
|
|
} else if (OB_FAIL(sstable->get_last_rowkey(index_read_info, temp_allocator, sstable_endkey))) {
|
|
STORAGE_LOG(WARN, "Failed to get last rowkey from sstable");
|
|
} else if (OB_FAIL(sstable_endkey.compare(datum_range.get_start_key(), datum_utils, cmp_ret))) {
|
|
STORAGE_LOG(WARN, "Failed to compare sstable endkey with range start key",
|
|
K(ret), K(datum_range), K(sstable_endkey), K(datum_utils));
|
|
} else if (cmp_ret < 0) {
|
|
// sstable not in range
|
|
STORAGE_LOG(DEBUG, "Skip empty range", K(ret), K(datum_range), KPC(sstable));
|
|
} else if (OB_FAIL(sstable->scan_secondary_meta(
|
|
allocator_,
|
|
datum_range,
|
|
index_read_info,
|
|
ObMacroBlockMetaType::DATA_BLOCK_META,
|
|
macro_meta_iter))) {
|
|
STORAGE_LOG(DEBUG, "Skip empty range", K(ret), K(datum_range), KPC(sstable));
|
|
} else {
|
|
while (OB_SUCC(ret)) {
|
|
if (OB_FAIL(macro_meta_iter->get_next(macro_meta))) {
|
|
if (OB_UNLIKELY(OB_ITER_END != ret)) {
|
|
STORAGE_LOG(WARN, "Fail to get next macro block meta", K(ret));
|
|
} else {
|
|
ret = OB_SUCCESS;
|
|
break;
|
|
}
|
|
} else {
|
|
total_size += macro_meta.val_.occupy_size_;
|
|
macro_block_cnt++;
|
|
}
|
|
}
|
|
if (OB_NOT_NULL(macro_meta_iter)) {
|
|
macro_meta_iter->~ObSSTableSecMetaIterator();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionRangeSpliter::build_single_range(const bool for_compaction,
|
|
ObRangeSplitInfo &range_info,
|
|
ObIAllocator &allocator,
|
|
ObIArray<ObStoreRange> &range_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObStoreRange dst_range;
|
|
|
|
if (OB_FAIL(parallel_ranger_.construct_single_range(allocator,
|
|
range_info.store_range_->get_start_key(),
|
|
range_info.store_range_->get_end_key(),
|
|
range_info.store_range_->get_border_flag(),
|
|
for_compaction,
|
|
dst_range))) {
|
|
STORAGE_LOG(WARN, "failed to construct single range", K(ret), K(range_info));
|
|
} else if (FALSE_IT(dst_range.set_table_id(range_info.store_range_->get_table_id()))) {
|
|
} else if (OB_FAIL(range_array.push_back(dst_range))) {
|
|
STORAGE_LOG(WARN, "failed to push back merge range", K(ret), K(dst_range));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionRangeSpliter::split_ranges(ObRangeSplitInfo &range_info,
|
|
ObIAllocator &allocator,
|
|
const bool for_compaction,
|
|
ObIArray<ObStoreRange> &range_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
parallel_ranger_.reset();
|
|
range_array.reset();
|
|
|
|
if (OB_UNLIKELY(!range_info.is_valid())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to split ranges", K(ret), K(range_info));
|
|
} else if (range_info.parallel_target_count_ == 1
|
|
|| range_info.store_range_->is_single_rowkey()) {
|
|
if (OB_FAIL(build_single_range(for_compaction, range_info, allocator, range_array))) {
|
|
STORAGE_LOG(WARN, "Failed to build single range", K(ret));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "try to make single split range", K(range_info), K(range_array));
|
|
}
|
|
} else if (range_info.is_sstable()) {
|
|
range_info.key_allocator_ = &allocator;
|
|
if (OB_FAIL(parallel_ranger_.init(range_info))) {
|
|
STORAGE_LOG(WARN, "Failed to init parallel ranger", K(ret), K(range_info));
|
|
} else if (OB_FAIL(parallel_ranger_.split_ranges(for_compaction, allocator, range_array))) {
|
|
STORAGE_LOG(WARN, "Failed to split ranges", K(ret));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "splite ranges with sstable", K(range_info), K(range_array));
|
|
}
|
|
} else if (OB_UNLIKELY(for_compaction)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Split memtable ranges for compaction should not enter here",
|
|
K(ret), K(for_compaction), K(range_info));
|
|
} else if (OB_FAIL(split_ranges_memtable(range_info, allocator, range_array))) {
|
|
STORAGE_LOG(WARN, "Failed to split ranges for memtable", K(ret), K(range_info));
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionRangeSpliter::split_ranges_memtable(ObRangeSplitInfo &range_info,
|
|
ObIAllocator &allocator,
|
|
ObIArray<ObStoreRange> &range_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (OB_UNLIKELY(!range_info.is_valid() || range_info.is_sstable())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid range info to split ranges for memtable", K(ret));
|
|
} else if (OB_UNLIKELY(range_info.tables_->count() != 1)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpcted table count for memtable range info", K(ret), K(range_info));
|
|
} else {
|
|
ObSEArray<ObStoreRange, 16> store_ranges;
|
|
memtable::ObMemtable *memtable = static_cast<memtable::ObMemtable *>(range_info.tables_->at(0));
|
|
if (OB_ISNULL(memtable)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected null memtable", K(ret), KP(memtable), K(range_info));
|
|
} else if (OB_FAIL(memtable->get_split_ranges(
|
|
&range_info.store_range_->get_start_key(),
|
|
&range_info.store_range_->get_end_key(),
|
|
range_info.parallel_target_count_,
|
|
store_ranges))) {
|
|
if (OB_ENTRY_NOT_EXIST == ret) {
|
|
STORAGE_LOG(WARN, "Failed to get split ranges from memtable, build single range instead", K(ret));
|
|
if (OB_FAIL(build_single_range(false/*for compaction*/, range_info, allocator, range_array))) {
|
|
STORAGE_LOG(WARN, "Failed to build single range", K(ret));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "try to make single split range for memtable", K(range_info), K(range_array));
|
|
}
|
|
} else {
|
|
STORAGE_LOG(WARN, "Failed to get split ranges from memtable", K(ret));
|
|
}
|
|
} else {
|
|
ObStoreRange store_range;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < store_ranges.count(); i++) {
|
|
if (OB_FAIL(store_ranges.at(i).deep_copy(allocator, store_range))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy store range", K(ret), K(store_ranges));
|
|
} else if (FALSE_IT(store_range.set_table_id(range_info.store_range_->get_table_id()))) {
|
|
} else {
|
|
if (i == 0 && range_info.store_range_->get_border_flag().inclusive_start()) {
|
|
store_range.set_left_closed();
|
|
}
|
|
if (i == store_ranges.count() - 1 && range_info.store_range_->get_border_flag().inclusive_end()) {
|
|
store_range.set_right_closed();
|
|
}
|
|
if (OB_FAIL(range_array.push_back(store_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back store range", K(ret), K(store_range));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
STORAGE_LOG(DEBUG, "splite ranges with memtable", K(range_info), K(range_array));
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::get_split_tables(ObTableStoreIterator &table_iter,
|
|
ObIArray<ObITable *> &tables)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (OB_UNLIKELY(0 == table_iter.count())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to get split tables", K(ret), K(table_iter));
|
|
} else {
|
|
int64_t major_size = 0;
|
|
int64_t minor_size = 0;
|
|
int64_t memtable_size = 0;
|
|
ObITable *max_memtable = nullptr;
|
|
ObSEArray<ObITable *, OB_DEFAULT_SE_ARRAY_COUNT> minor_sstables;
|
|
ObITable *last_major_sstable = nullptr;
|
|
ObITable *table = nullptr;
|
|
tables.reset();
|
|
|
|
while (OB_SUCC(ret)) {
|
|
if (OB_FAIL(table_iter.get_next(table))) {
|
|
if (OB_UNLIKELY(OB_ITER_END != ret)) {
|
|
STORAGE_LOG(WARN, "Fail to get next table", K(ret), K(table_iter));
|
|
} else {
|
|
ret = OB_SUCCESS;
|
|
break;
|
|
}
|
|
} else if (OB_ISNULL(table)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected null table", K(ret), K(table_iter));
|
|
} else if (table->is_major_sstable()) {
|
|
major_size = (reinterpret_cast<ObSSTable *>(table))->get_meta().get_basic_meta().occupy_size_;
|
|
last_major_sstable = table;
|
|
} else if (table->is_minor_sstable()) {
|
|
minor_size += (reinterpret_cast<ObSSTable *>(table))->get_meta().get_basic_meta().occupy_size_;
|
|
if (OB_FAIL(minor_sstables.push_back(table))) {
|
|
STORAGE_LOG(WARN, "Fail to cache minor sstables", K(ret), KP(table));
|
|
}
|
|
} else if (table->is_data_memtable()) {
|
|
int64_t mem_rows = 0;
|
|
int64_t mem_size = 0;
|
|
memtable::ObMemtable *memtable = static_cast<memtable::ObMemtable *>(table);
|
|
if (OB_FAIL(memtable->estimate_phy_size(nullptr, nullptr, mem_size, mem_rows))) {
|
|
STORAGE_LOG(WARN, "Failed to get estimate size from memtable", K(ret));
|
|
} else {
|
|
memtable_size = MAX(mem_size, memtable_size);
|
|
max_memtable = table;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool split_by_memtable = false;
|
|
if (OB_FAIL(ret)) {
|
|
} else if (memtable_size > (major_size + minor_size) * MEMTABLE_SIZE_AMPLIFICATION_FACTOR) {
|
|
//too big memtable size, use memtable to range split
|
|
split_by_memtable = true;
|
|
if (OB_ISNULL(max_memtable)) {
|
|
ret = OB_ERR_SYS;
|
|
STORAGE_LOG(WARN, "Unexpected null max memtable", K(ret), KP(max_memtable));
|
|
} else if (OB_FAIL(tables.push_back(max_memtable))) {
|
|
STORAGE_LOG(WARN, "Failed to push back max memtable", K(ret));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "use big memtable to split range", K(memtable_size), K(major_size), K(minor_size));
|
|
}
|
|
} else if (minor_size > MIN_SPLIT_TARGET_SSTABLE_SIZE && minor_size > major_size / 2) {
|
|
// Add all minor sstables
|
|
if (OB_FAIL(tables.reserve(minor_sstables.count() + 1))) {
|
|
STORAGE_LOG(WARN, "Fail to reserve space for sstables", K(ret));
|
|
}
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < minor_sstables.count(); ++i) {
|
|
if (OB_FAIL(tables.push_back(minor_sstables.at(i)))) {
|
|
STORAGE_LOG(WARN, "Fail to add minor sstable", K(ret), KPC(table));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (OB_FAIL(ret) || split_by_memtable) {
|
|
} else if (major_size > MIN_SPLIT_TARGET_SSTABLE_SIZE) {
|
|
// Add last major sstable
|
|
if (OB_ISNULL(last_major_sstable)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Fail to get last major sstable", K(ret));
|
|
} else if (OB_FAIL(tables.push_back(last_major_sstable))) {
|
|
STORAGE_LOG(WARN, "Fail to add last major sstable", K(ret), KPC(last_major_sstable));
|
|
}
|
|
}
|
|
STORAGE_LOG(DEBUG, "get range split tables", K(ret), K(memtable_size), K(major_size), K(minor_size), K(tables));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::get_multi_range_size(
|
|
const ObIArray<ObStoreRange> &range_array,
|
|
const ObTableReadInfo &index_read_info,
|
|
ObTableStoreIterator &table_iter,
|
|
int64_t &total_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObSEArray<ObITable *, DEFAULT_STORE_CNT_IN_STORAGE> tables;
|
|
total_size = 0;
|
|
|
|
if (OB_UNLIKELY(0 == table_iter.count() || range_array.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to get multi range size", K(ret), K(table_iter),
|
|
K(range_array));
|
|
} else if (OB_FAIL(get_split_tables(table_iter, tables))) {
|
|
STORAGE_LOG(WARN, "Failed to get all sstables", K(ret), K(table_iter));
|
|
} else if (tables.empty()) {
|
|
// only small tables, can not support arbitary range split
|
|
total_size = 0;
|
|
} else {
|
|
RangeSplitInfoArray range_info_array;
|
|
if (OB_FAIL(get_range_split_infos(tables, index_read_info, range_array, range_info_array, total_size))) {
|
|
STORAGE_LOG(WARN, "Failed to get range split info array", K(ret));
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::split_multi_ranges(RangeSplitInfoArray &range_info_array,
|
|
const int64_t expected_task_count,
|
|
const int64_t total_size,
|
|
common::ObIAllocator &allocator,
|
|
ObArrayArray<ObStoreRange> &multi_range_split_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (OB_UNLIKELY(range_info_array.empty() || total_size <= 0 || expected_task_count <= 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to calc parallel target array", K(ret), K(range_info_array),
|
|
K(total_size), K(expected_task_count));
|
|
} else {
|
|
ObArenaAllocator local_allocator;
|
|
const int64_t avg_task_size = MAX(total_size / expected_task_count, MIN_SPLIT_TASK_SIZE);
|
|
const int64_t split_task_size = MAX(avg_task_size / 2, MIN_SPLIT_TASK_SIZE);
|
|
const int64_t task_size_high_watermark = MAX(avg_task_size *
|
|
SPLIT_TASK_SIZE_HIGH_WATER_MARK_FACTOR / 100, MIN_SPLIT_TASK_SIZE);
|
|
const int64_t task_size_low_watermark = max(avg_task_size * SPLIT_TASK_SIZE_LOW_WATER_MARK_FACTOR /
|
|
100, MIN_SPLIT_TASK_SIZE);
|
|
int64_t sum_size = 0;
|
|
RangeSplitArray range_split_array;
|
|
RangeSplitArray refra_range_split_array;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < range_info_array.count(); i++) {
|
|
ObRangeSplitInfo &range_info = range_info_array.at(i);
|
|
int64_t cur_avg_task_size = 0;
|
|
if (range_info.empty() || range_info.total_size_ < avg_task_size
|
|
|| sum_size + range_info.total_size_ < task_size_high_watermark) {
|
|
range_info.set_parallel_target(1);
|
|
cur_avg_task_size = MAX(range_info.total_size_, MIN_SPLIT_TASK_SIZE);
|
|
} else {
|
|
range_info.set_parallel_target(range_info.total_size_ / split_task_size);
|
|
cur_avg_task_size = range_info.total_size_ / range_info.parallel_target_count_;
|
|
}
|
|
STORAGE_LOG(DEBUG, "start to split range array", K(range_info), K(cur_avg_task_size),
|
|
K(avg_task_size));
|
|
range_spliter_.reset();
|
|
range_split_array.reset();
|
|
if (OB_FAIL(range_spliter_.split_ranges(range_info, local_allocator, false, range_split_array))) {
|
|
STORAGE_LOG(WARN, "Failed to split ranges", K(ret), K(range_info));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "get split ranges", K(range_split_array));
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < range_split_array.count(); i++) {
|
|
if (sum_size >= avg_task_size
|
|
|| (sum_size >= task_size_low_watermark
|
|
&& sum_size + cur_avg_task_size >= task_size_high_watermark)) {
|
|
if (OB_FAIL(merge_and_push_range_array(refra_range_split_array, allocator,
|
|
multi_range_split_array))) {
|
|
STORAGE_LOG(WARN, "Failed to merge and push split range array", K(ret), K(refra_range_split_array));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "succ to build refra split ranges", K(refra_range_split_array), K(sum_size),
|
|
K(avg_task_size));
|
|
refra_range_split_array.reset();
|
|
sum_size = 0;
|
|
}
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else if (OB_FAIL(refra_range_split_array.push_back(range_split_array.at(i)))) {
|
|
STORAGE_LOG(WARN, "Failed to push back store range", K(ret), K(range_split_array.at(i)));
|
|
} else {
|
|
sum_size += cur_avg_task_size;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (OB_SUCC(ret) && refra_range_split_array.count() > 0) {
|
|
if (OB_FAIL(merge_and_push_range_array(refra_range_split_array, allocator,
|
|
multi_range_split_array))) {
|
|
STORAGE_LOG(WARN, "Failed to merge and push split range array", K(ret), K(refra_range_split_array));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "succ to build refra split ranges", K(refra_range_split_array), K(sum_size),
|
|
K(avg_task_size));
|
|
refra_range_split_array.reset();
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::merge_and_push_range_array(
|
|
const RangeSplitArray &src_range_split_array,
|
|
ObIAllocator &allocator,
|
|
ObArrayArray<ObStoreRange> &multi_range_split_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (OB_UNLIKELY(src_range_split_array.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid arugment to merge range array", K(ret), K(src_range_split_array));
|
|
} else {
|
|
const ObStoreRange *last_range = nullptr;
|
|
RangeSplitArray dst_range_array;
|
|
ObStoreRange dst_range;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < src_range_split_array.count(); i++) {
|
|
const ObStoreRange &cur_range = src_range_split_array.at(i);
|
|
if (OB_NOT_NULL(last_range) && cur_range.get_start_key().compare(last_range->get_end_key()) == 0) {
|
|
// continous range
|
|
if (!cur_range.get_border_flag().inclusive_end()) {
|
|
// stop find next range
|
|
if (OB_FAIL(cur_range.get_end_key().deep_copy(dst_range.get_end_key(), allocator))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy store range", K(ret), K(cur_range));
|
|
} else if (FALSE_IT(dst_range.get_border_flag().unset_inclusive_end())) {
|
|
} else if (OB_FAIL(dst_range_array.push_back(dst_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back dst range array", K(ret), K(dst_range));
|
|
} else {
|
|
dst_range.reset();
|
|
last_range = nullptr;
|
|
}
|
|
} else {
|
|
last_range = &cur_range;
|
|
}
|
|
} else {
|
|
if (OB_NOT_NULL(last_range)) {
|
|
// break range
|
|
if (OB_FAIL(last_range->get_end_key().deep_copy(dst_range.get_end_key(), allocator))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy store range", K(ret), KPC(last_range));
|
|
} else if (FALSE_IT(dst_range.get_border_flag().set_inclusive_end())) {
|
|
} else if (OB_FAIL(dst_range_array.push_back(dst_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back dst range array", K(ret), K(dst_range));
|
|
} else {
|
|
dst_range.reset();
|
|
last_range = nullptr;
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
dst_range.set_table_id(cur_range.get_table_id());
|
|
dst_range.set_border_flag(cur_range.get_border_flag());
|
|
if (!cur_range.get_border_flag().inclusive_end()) {
|
|
// only deal with right close | left open situation
|
|
if (OB_FAIL(cur_range.deep_copy(allocator, dst_range))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy store range", K(ret), K(cur_range));
|
|
} else if (OB_FAIL(dst_range_array.push_back(dst_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back dst range array", K(ret), K(dst_range));
|
|
} else {
|
|
dst_range.reset();
|
|
}
|
|
} else if (OB_FAIL(cur_range.get_start_key().deep_copy(dst_range.get_start_key(), allocator))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy start key", K(ret), K(cur_range));
|
|
} else {
|
|
last_range = &cur_range;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (OB_SUCC(ret) && OB_NOT_NULL(last_range)) {
|
|
if (OB_FAIL(last_range->get_end_key().deep_copy(dst_range.get_end_key(), allocator))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy store range", K(ret), KPC(last_range));
|
|
} else if (FALSE_IT(dst_range.get_border_flag().set_inclusive_end())) {
|
|
} else if (OB_FAIL(dst_range_array.push_back(dst_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back dst range array", K(ret), K(dst_range));
|
|
} else {
|
|
dst_range.reset();
|
|
last_range = nullptr;
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
if (OB_FAIL(multi_range_split_array.push_back(dst_range_array))) {
|
|
STORAGE_LOG(WARN, "Failed to push back range split array", K(ret), K(dst_range_array));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "succ to merge range split array", K(ret), K(src_range_split_array),
|
|
K(dst_range_array));
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::build_single_range_array(
|
|
const ObIArray<ObStoreRange> &range_array,
|
|
ObIAllocator &allocator,
|
|
ObArrayArray<ObStoreRange> &multi_range_split_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
if (OB_UNLIKELY(range_array.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid arugment to build single range array", K(ret), K(range_array));
|
|
} else {
|
|
RangeSplitArray range_split_array;
|
|
ObStoreRange store_range;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < range_array.count(); i++) {
|
|
if (OB_FAIL(range_array.at(i).deep_copy(allocator, store_range))) {
|
|
STORAGE_LOG(WARN, "Failed to deep copy store range", K(ret), K(i), K(range_array.at(i)));
|
|
} else if (OB_FAIL(range_split_array.push_back(store_range))) {
|
|
STORAGE_LOG(WARN, "Failed to push back store range", K(ret), K(store_range));
|
|
} else {
|
|
store_range.reset();
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
if (OB_FAIL(multi_range_split_array.push_back(range_split_array))) {
|
|
STORAGE_LOG(WARN, "Failed to push range split array", K(ret), K(range_split_array));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "Fast split for single task", K(range_array));
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::get_split_multi_ranges(
|
|
const common::ObIArray<common::ObStoreRange> &range_array,
|
|
const int64_t expected_task_count,
|
|
const ObTableReadInfo &index_read_info,
|
|
ObTableStoreIterator &table_iter,
|
|
common::ObIAllocator &allocator,
|
|
common::ObArrayArray<common::ObStoreRange> &multi_range_split_array)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObSEArray<ObITable *, DEFAULT_STORE_CNT_IN_STORAGE> tables;
|
|
bool single_array = false;
|
|
multi_range_split_array.reset();
|
|
|
|
if (OB_UNLIKELY(0 == table_iter.count() || range_array.empty() || expected_task_count <= 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to get split multi ranges", K(ret), K(table_iter),
|
|
K(range_array), K(expected_task_count));
|
|
} else if (OB_UNLIKELY(expected_task_count == 1)) {
|
|
STORAGE_LOG(DEBUG, "Unexpected only one split task", K(expected_task_count), K(range_array));
|
|
single_array = true;
|
|
} else if (OB_FAIL(get_split_tables(table_iter, tables))) {
|
|
STORAGE_LOG(WARN, "Failed to get split tables", K(ret), K(table_iter));
|
|
} else if (tables.empty()) {
|
|
// only small tables, no need split
|
|
STORAGE_LOG(DEBUG, "empty split tables", K(table_iter));
|
|
single_array = true;
|
|
} else {
|
|
RangeSplitInfoArray range_info_array;
|
|
int64_t total_size = 0;
|
|
if (OB_FAIL(get_range_split_infos(tables, index_read_info, range_array, range_info_array, total_size))) {
|
|
STORAGE_LOG(WARN, "Failed to get range split info array", K(ret));
|
|
} else if (total_size == 0) {
|
|
STORAGE_LOG(DEBUG, "too small tables to split range", K(total_size), K(range_info_array));
|
|
single_array = true;
|
|
} else if (OB_FAIL(split_multi_ranges(range_info_array, expected_task_count, total_size, allocator,
|
|
multi_range_split_array))) {
|
|
STORAGE_LOG(WARN, "Failed to split multi ranges", K(ret));
|
|
}
|
|
}
|
|
|
|
if (OB_SUCC(ret) && single_array) {
|
|
if (OB_FAIL(build_single_range_array(range_array, allocator, multi_range_split_array))) {
|
|
STORAGE_LOG(WARN, "Failed to build single range array", K(ret));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMultiRangeSpliter::get_range_split_infos(ObIArray<ObITable *> &tables,
|
|
const ObTableReadInfo &index_read_info,
|
|
const ObIArray<ObStoreRange> &range_array,
|
|
RangeSplitInfoArray &range_info_array,
|
|
int64_t &total_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(tables.empty() || range_array.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to get range split info", K(ret), K(tables), K(range_array));
|
|
} else {
|
|
ObRangeSplitInfo range_info;
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < range_array.count(); i++) {
|
|
if (FALSE_IT(range_spliter_.reset())) {
|
|
} else if (OB_FAIL(range_spliter_.get_range_split_info(
|
|
tables, index_read_info, range_array.at(i), range_info))) {
|
|
STORAGE_LOG(WARN, "Failed to get range split info", K(ret), K(i), K(range_array.at(i)));
|
|
} else if (OB_FAIL(range_info_array.push_back(range_info))) {
|
|
STORAGE_LOG(WARN, "Failed to push back range info", K(ret), K(range_info));
|
|
} else {
|
|
STORAGE_LOG(DEBUG, "get single range split info", K(range_info));
|
|
total_size += range_info.total_size_;
|
|
range_info.reset();
|
|
}
|
|
}
|
|
STORAGE_LOG(DEBUG, "get total range split info", K(total_size), K(tables), K(range_info_array));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ObPartitionMajorSSTableRangeSpliter::ObPartitionMajorSSTableRangeSpliter()
|
|
: major_sstable_(nullptr),
|
|
index_read_info_(nullptr),
|
|
tablet_size_(-1),
|
|
allocator_(nullptr),
|
|
is_inited_(false)
|
|
{
|
|
}
|
|
|
|
ObPartitionMajorSSTableRangeSpliter::~ObPartitionMajorSSTableRangeSpliter()
|
|
{
|
|
}
|
|
|
|
int ObPartitionMajorSSTableRangeSpliter::init(const ObTableReadInfo &index_read_info,
|
|
ObSSTable *major_sstable, int64_t tablet_size,
|
|
ObIAllocator &allocator)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (IS_INIT) {
|
|
ret = OB_INIT_TWICE;
|
|
STORAGE_LOG(WARN, "ObPartitionMajorSSTableRangeSpliter init twice", KR(ret));
|
|
} else if (OB_ISNULL(major_sstable)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument null major sstable", KR(ret), K(major_sstable));
|
|
} else if (OB_UNLIKELY(tablet_size < 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument tablet size", KR(ret), K(tablet_size));
|
|
} else {
|
|
major_sstable_ = major_sstable;
|
|
index_read_info_ = &index_read_info;
|
|
tablet_size_ = tablet_size;
|
|
allocator_ = &allocator;
|
|
is_inited_ = true;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMajorSSTableRangeSpliter::scan_major_sstable_secondary_meta(
|
|
const ObDatumRange &scan_range, ObSSTableSecMetaIterator *&meta_iter)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_FAIL(major_sstable_->scan_secondary_meta(*allocator_, scan_range, *index_read_info_,
|
|
DATA_BLOCK_META, meta_iter))) {
|
|
STORAGE_LOG(WARN, "Failed to scan secondary meta", KR(ret), K(*major_sstable_));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMajorSSTableRangeSpliter::split_ranges(ObIArray<ObStoreRange> &result_ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t parallel_degree = 0;
|
|
result_ranges.reset();
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObPartitionMajorSSTableRangeSpliter not init", KR(ret));
|
|
} else {
|
|
// 计算parallel_degree
|
|
if (major_sstable_->get_meta().is_empty() || tablet_size_ == 0) {
|
|
parallel_degree = 1;
|
|
} else {
|
|
const int64_t macro_block_count = major_sstable_->get_meta().get_basic_meta().data_macro_block_count_;
|
|
const int64_t occupy_size = macro_block_count * OB_SERVER_BLOCK_MGR.get_macro_block_size();
|
|
parallel_degree = (occupy_size + tablet_size_ - 1) / tablet_size_;
|
|
if (parallel_degree > MAX_MERGE_THREAD) {
|
|
int64_t macro_cnts = (macro_block_count + MAX_MERGE_THREAD - 1) / MAX_MERGE_THREAD;
|
|
parallel_degree = (macro_block_count + macro_cnts - 1) / macro_cnts;
|
|
}
|
|
}
|
|
// 根据parallel_degree生成ranges
|
|
if (parallel_degree <= 1) {
|
|
ObStoreRange whole_range;
|
|
whole_range.set_whole_range();
|
|
if (OB_FAIL(result_ranges.push_back(whole_range))) {
|
|
STORAGE_LOG(WARN, "failed to push back merge range to array", KR(ret), K(whole_range));
|
|
}
|
|
} else {
|
|
if (OB_FAIL(generate_ranges_by_macro_block(parallel_degree, result_ranges))) {
|
|
STORAGE_LOG(WARN, "failed to generate ranges by macro block", KR(ret), K(parallel_degree));
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionMajorSSTableRangeSpliter::generate_ranges_by_macro_block(
|
|
int64_t parallel_degree, ObIArray<ObStoreRange> &result_ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const ObIArray<share::schema::ObColDesc> &col_descs = index_read_info_->get_columns_desc();
|
|
const int64_t macro_block_count =
|
|
major_sstable_->get_meta().get_basic_meta().data_macro_block_count_;
|
|
const int64_t macro_block_cnt_per_range =
|
|
(macro_block_count + parallel_degree - 1) / parallel_degree;
|
|
|
|
ObDataMacroBlockMeta blk_meta;
|
|
ObSSTableSecMetaIterator *meta_iter = nullptr;
|
|
ObDatumRange scan_range;
|
|
scan_range.set_whole_range();
|
|
if (OB_FAIL(scan_major_sstable_secondary_meta(scan_range, meta_iter))) {
|
|
STORAGE_LOG(WARN, "Failed to scan secondary meta", KR(ret), K(*major_sstable_));
|
|
}
|
|
|
|
// generate ranges
|
|
ObDatumRowkey endkey;
|
|
ObStoreRange range;
|
|
range.get_end_key().set_min();
|
|
range.set_left_open();
|
|
range.set_right_closed();
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < macro_block_count;) {
|
|
const int64_t last = i + macro_block_cnt_per_range - 1; // last macro block idx in current range
|
|
range.get_start_key() = range.get_end_key();
|
|
if (last < macro_block_count - 1) {
|
|
// locate to the last macro-block meta in current range
|
|
while (OB_SUCC(meta_iter->get_next(blk_meta)) && i++ < last);
|
|
if (OB_FAIL(ret)) {
|
|
STORAGE_LOG(WARN, "Failed to get macro block meta", KR(ret), K(i - 1));
|
|
} else if (OB_UNLIKELY(!blk_meta.is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid macro block meta", KR(ret), K(i - 1));
|
|
} else if (OB_FAIL(blk_meta.get_rowkey(endkey))) {
|
|
STORAGE_LOG(WARN, "Failed to get rowkey", KR(ret), K(blk_meta));
|
|
} else if (OB_FAIL(endkey.to_store_rowkey(col_descs, *allocator_, range.get_end_key()))) {
|
|
STORAGE_LOG(WARN, "Failed to transfer store rowkey", K(ret), K(endkey));
|
|
}
|
|
} else { // last range
|
|
i = last + 1;
|
|
range.get_end_key().set_max();
|
|
range.set_right_open();
|
|
}
|
|
if (OB_SUCC(ret) && OB_FAIL(result_ranges.push_back(range))) {
|
|
STORAGE_LOG(WARN, "Failed to push range", KR(ret), K(result_ranges), K(range));
|
|
}
|
|
}
|
|
|
|
if (OB_NOT_NULL(meta_iter)) {
|
|
meta_iter->~ObSSTableSecMetaIterator();
|
|
meta_iter = nullptr;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::ObIncrementalIterator(compaction::ObTabletMergeCtx &merge_ctx, ObIAllocator &allocator)
|
|
: merge_ctx_(merge_ctx),
|
|
allocator_(allocator),
|
|
tbl_read_info_(),
|
|
iter_(nullptr),
|
|
is_inited_(false)
|
|
{
|
|
}
|
|
|
|
ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::~ObIncrementalIterator()
|
|
{
|
|
reset();
|
|
}
|
|
|
|
void ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::reset()
|
|
{
|
|
rowkey_col_ids_.reset();
|
|
out_cols_project_.reset();
|
|
tbl_read_info_.reset();
|
|
tbl_xs_param_.reset();
|
|
store_ctx_.reset();
|
|
tbl_xs_ctx_.reset();
|
|
tbls_iter_.reset();
|
|
get_tbl_param_.reset();
|
|
range_to_scan_.reset();
|
|
if (iter_ != nullptr) {
|
|
iter_->~ObIStoreRowIterator();
|
|
iter_ = nullptr;
|
|
}
|
|
is_inited_ = false;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::init()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (IS_INIT) {
|
|
ret = OB_INIT_TWICE;
|
|
STORAGE_LOG(WARN, "ObIncrementalIterator init twice", KR(ret));
|
|
} else {
|
|
ObMultipleScanMerge *mpl_scan_mrg = nullptr;
|
|
range_to_scan_.set_whole_range();
|
|
if (OB_ISNULL(mpl_scan_mrg = OB_NEWx(ObMultipleScanMerge, (&allocator_)))) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
STORAGE_LOG(WARN, "failed to allocate memory", KR(ret));
|
|
} else if (OB_FAIL(prepare_table_access_param())) {
|
|
STORAGE_LOG(WARN, "Failed to prepare table access param", KR(ret));
|
|
} else if (OB_FAIL(prepare_store_ctx())) {
|
|
STORAGE_LOG(WARN, "Failed to prepare store ctx", KR(ret));
|
|
} else if (OB_FAIL(prepare_table_access_context())) {
|
|
STORAGE_LOG(WARN, "Failed to prepare table access context", KR(ret));
|
|
} else if (OB_FAIL(prepare_get_table_param())) {
|
|
STORAGE_LOG(WARN, "Failed to prepare get table param", KR(ret));
|
|
} else if (OB_FAIL(mpl_scan_mrg->init(tbl_xs_param_, tbl_xs_ctx_, get_tbl_param_))) {
|
|
STORAGE_LOG(WARN, "Failed to init multiple scan merge", KR(ret));
|
|
} else if (OB_FAIL(mpl_scan_mrg->open(range_to_scan_))) {
|
|
STORAGE_LOG(WARN, "Failed to open multiple scan merge", KR(ret));
|
|
} else {
|
|
mpl_scan_mrg->set_iter_del_row(true);
|
|
iter_ = mpl_scan_mrg;
|
|
is_inited_ = true;
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
if (mpl_scan_mrg != nullptr) {
|
|
mpl_scan_mrg->~ObMultipleScanMerge();
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::get_next_row(
|
|
const ObDatumRow *&row)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObIncrementalIterator not init", KR(ret));
|
|
} else {
|
|
ret = iter_->get_next_row(row);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::prepare_table_access_param()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const ObStorageSchema *storage_schema = merge_ctx_.get_schema();
|
|
if (OB_FAIL(storage_schema->get_rowkey_column_ids(rowkey_col_ids_))) {
|
|
STORAGE_LOG(WARN, "Failed to get rowkey column ids", KR(ret));
|
|
} else if (OB_FAIL(ObMultiVersionRowkeyHelpper::add_extra_rowkey_cols(rowkey_col_ids_))) {
|
|
STORAGE_LOG(WARN, "failed to add extra rowkey cols", KR(ret));
|
|
} else if (OB_FAIL(tbl_read_info_.init(allocator_, storage_schema->get_column_count(),
|
|
storage_schema->get_rowkey_column_num(),
|
|
lib::is_oracle_mode(), rowkey_col_ids_, true))) {
|
|
STORAGE_LOG(WARN, "Failed to init columns info", KR(ret));
|
|
} else if (OB_FAIL(tbl_xs_param_.init_merge_param(
|
|
merge_ctx_.param_.tablet_id_.id(), merge_ctx_.param_.tablet_id_, tbl_read_info_))) {
|
|
STORAGE_LOG(WARN, "Failed to init table access param", KR(ret));
|
|
}
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_col_ids_.count(); i++) {
|
|
if (OB_FAIL(out_cols_project_.push_back(static_cast<int32_t>(i)))) {
|
|
STORAGE_LOG(WARN, "Failed to push column project", KR(ret), K(i));
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
tbl_xs_param_.iter_param_.out_cols_project_ = &out_cols_project_;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::prepare_store_ctx()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
auto ls_id = merge_ctx_.param_.ls_id_;
|
|
auto &snapshot = merge_ctx_.sstable_version_range_.snapshot_version_;
|
|
SCN scn;
|
|
if (OB_FAIL(scn.convert_for_tx(snapshot))) {
|
|
STORAGE_LOG(WARN, "convert for tx fail", K(ret), K(ls_id), K(snapshot));
|
|
} else if (OB_FAIL(store_ctx_.init_for_read(ls_id,
|
|
INT64_MAX,
|
|
-1,
|
|
scn))) {
|
|
STORAGE_LOG(WARN, "init store ctx fail", K(ret), K(ls_id), K(snapshot));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::prepare_table_access_context()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObQueryFlag query_flag(ObQueryFlag::Forward,
|
|
true /*daily_merge*/,
|
|
true /*optimize*/,
|
|
true /*whole_macro_scan*/,
|
|
false /*full_row*/,
|
|
false /*index_back*/,
|
|
false /*query_stat*/
|
|
);
|
|
ObSSTable *major_sstable = static_cast<ObSSTable *>(merge_ctx_.tables_handle_.get_table(0));
|
|
ObVersionRange scan_version_range = merge_ctx_.sstable_version_range_;
|
|
scan_version_range.base_version_ = major_sstable->get_snapshot_version();
|
|
if (OB_FAIL(tbl_xs_ctx_.init(query_flag, store_ctx_, allocator_, allocator_, scan_version_range))) {
|
|
STORAGE_LOG(WARN, "Failed to init table access context", KR(ret));
|
|
} else {
|
|
tbl_xs_ctx_.merge_scn_ = merge_ctx_.merge_scn_;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::prepare_get_table_param()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObITable *table = nullptr;
|
|
for (int64_t i = 1; OB_SUCC(ret) && i < merge_ctx_.tables_handle_.get_count(); i++) {
|
|
table = merge_ctx_.tables_handle_.get_table(i);
|
|
if (OB_FAIL(tbls_iter_.add_tables(&table, 1))) {
|
|
STORAGE_LOG(WARN, "Failed to add table to inc handle", KR(ret));
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
get_tbl_param_.tablet_iter_.table_iter_ = tbls_iter_;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
ObPartitionIncrementalRangeSpliter::ObPartitionIncrementalRangeSpliter()
|
|
: merge_ctx_(nullptr),
|
|
allocator_(nullptr),
|
|
major_sstable_(nullptr),
|
|
tablet_size_(-1),
|
|
iter_(nullptr),
|
|
default_noisy_row_num_skipped_(DEFAULT_NOISY_ROW_NUM_SKIPPED),
|
|
default_row_num_per_range_(DEFAULT_ROW_NUM_PER_RANGE),
|
|
inc_ranges_(nullptr),
|
|
base_ranges_(nullptr),
|
|
combined_ranges_(nullptr),
|
|
is_inited_(false)
|
|
{
|
|
}
|
|
|
|
ObPartitionIncrementalRangeSpliter::~ObPartitionIncrementalRangeSpliter()
|
|
{
|
|
if (nullptr != iter_) {
|
|
iter_->~ObIncrementalIterator();
|
|
iter_ = nullptr;
|
|
}
|
|
if (nullptr != combined_ranges_) {
|
|
combined_ranges_->~ObIArray<ObDatumRange>();
|
|
combined_ranges_ = nullptr;
|
|
}
|
|
if (nullptr != base_ranges_) {
|
|
base_ranges_->~ObIArray<ObDatumRange>();
|
|
base_ranges_ = nullptr;
|
|
}
|
|
if (nullptr != inc_ranges_) {
|
|
inc_ranges_->~ObIArray<ObDatumRange>();
|
|
inc_ranges_ = nullptr;
|
|
}
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::init(compaction::ObTabletMergeCtx &merge_ctx,
|
|
ObIAllocator &allocator)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (IS_INIT) {
|
|
ret = OB_INIT_TWICE;
|
|
STORAGE_LOG(WARN, "ObPartitionIncrementalRangeSpliter init twice", KR(ret));
|
|
} else if (OB_UNLIKELY(merge_ctx.tables_handle_.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument tables handle (empty)", KR(ret), K(merge_ctx.tables_handle_));
|
|
} else if (OB_ISNULL(merge_ctx.tables_handle_.get_table(0))) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "unexpected null first table", KR(ret), K(merge_ctx.tables_handle_));
|
|
} else if (OB_UNLIKELY(!merge_ctx.tables_handle_.get_table(0)->is_major_sstable())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "first table must be major sstable", KR(ret), K(merge_ctx.tables_handle_));
|
|
} else {
|
|
merge_ctx_ = &merge_ctx;
|
|
allocator_ = &allocator;
|
|
major_sstable_ = static_cast<ObSSTable *>(merge_ctx.tables_handle_.get_table(0));
|
|
tablet_size_ = merge_ctx.get_schema()->get_tablet_size();
|
|
if (OB_UNLIKELY(tablet_size_ < 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument tablet size", KR(ret), K_(tablet_size));
|
|
} else if (OB_FAIL(alloc_ranges())) {
|
|
STORAGE_LOG(WARN, "failed to alloc ranges", KR(ret));
|
|
} else {
|
|
is_inited_ = true;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::alloc_ranges()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
char *buf = nullptr;
|
|
int64_t ranges_size = sizeof(ObSEArray<ObDatumRange, 64>);
|
|
if (OB_ISNULL(buf = static_cast<char *>(allocator_->alloc(ranges_size * 3)))) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
STORAGE_LOG(WARN, "failed to allocate memory", KR(ret), "size", ranges_size * 3);
|
|
} else {
|
|
inc_ranges_ = new (buf) ObSEArray<ObDatumRange, 64>();
|
|
base_ranges_ = new (buf + ranges_size) ObSEArray<ObDatumRange, 64>();
|
|
combined_ranges_ = new (buf + ranges_size * 2) ObSEArray<ObDatumRange, 64>();
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::init_incremental_iter()
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (nullptr != iter_) {
|
|
iter_->reset();
|
|
} else {
|
|
if (OB_ISNULL(iter_ = OB_NEWx(ObIncrementalIterator, allocator_, *merge_ctx_, *allocator_))) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
STORAGE_LOG(WARN, "failed to allocate memory", KR(ret));
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
if (OB_FAIL(iter_->init())) {
|
|
STORAGE_LOG(WARN, "failed to init incremental iterator", KR(ret));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::get_major_sstable_end_rowkey(ObDatumRowkey &rowkey)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const ObTableReadInfo &index_read_info =
|
|
merge_ctx_->tablet_handle_.get_obj()->get_index_read_info();
|
|
if (OB_FAIL(major_sstable_->get_last_rowkey(index_read_info, *allocator_, rowkey))) {
|
|
STORAGE_LOG(WARN, "failed to get major sstable last rowkey", KR(ret), K(*major_sstable_));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::scan_major_sstable_secondary_meta(
|
|
const ObDatumRange &scan_range, ObSSTableSecMetaIterator *&meta_iter)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const ObTableReadInfo &index_read_info =
|
|
merge_ctx_->tablet_handle_.get_obj()->get_index_read_info();
|
|
if (OB_FAIL(major_sstable_->scan_secondary_meta(*allocator_, scan_range, index_read_info,
|
|
DATA_BLOCK_META, meta_iter))) {
|
|
STORAGE_LOG(WARN, "Failed to scan secondary meta", KR(ret), K(*major_sstable_));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::check_is_incremental(bool &is_incremental)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const int64_t tables_handle_cnt = merge_ctx_->tables_handle_.get_count();
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObPartitionIncrementalRangeSpliter not init", KR(ret));
|
|
} else if (tables_handle_cnt <= 1) {
|
|
// no incremental data
|
|
is_incremental = false;
|
|
} else if (major_sstable_->get_meta().is_empty()) {
|
|
// no base data
|
|
is_incremental = true;
|
|
} else {
|
|
const ObDatumRow *row = nullptr;
|
|
int64_t row_count = 0;
|
|
is_incremental = false;
|
|
if (OB_FAIL(init_incremental_iter())) {
|
|
STORAGE_LOG(WARN, "failed to init incremental iterator", KR(ret));
|
|
} else {
|
|
// skip a few rows to avoid updated noise
|
|
while (OB_SUCC(ret) && OB_SUCC(iter_->get_next_row(row)) && OB_NOT_NULL(row) &&
|
|
++row_count <= default_noisy_row_num_skipped_);
|
|
if (OB_ITER_END == ret) {
|
|
STORAGE_LOG(DEBUG, "incremental row num less than skipped num");
|
|
ret = OB_SUCCESS;
|
|
} else if (OB_SUCC(ret) && row_count > default_noisy_row_num_skipped_) {
|
|
// compare with base sstable last rowkey
|
|
int cmp_ret = 0;
|
|
ObDatumRowkey row_rowkey;
|
|
ObDatumRowkey end_rowkey;
|
|
const int64_t rowkey_column_num = merge_ctx_->get_schema()->get_rowkey_column_num();
|
|
const ObStorageDatumUtils &datum_utils =
|
|
merge_ctx_->tablet_handle_.get_obj()->get_index_read_info().get_datum_utils();
|
|
if (OB_FAIL(row_rowkey.assign(row->storage_datums_, rowkey_column_num))) {
|
|
STORAGE_LOG(WARN, "failed to assign datum rowkey", KR(ret), K(*row),
|
|
K(rowkey_column_num));
|
|
} else if (OB_FAIL(get_major_sstable_end_rowkey(end_rowkey))) {
|
|
STORAGE_LOG(WARN, "failed to get base sstable last rowkey", KR(ret), K(*major_sstable_));
|
|
} else if (OB_FAIL(row_rowkey.compare(end_rowkey, datum_utils, cmp_ret))) {
|
|
STORAGE_LOG(WARN, "failed to compare rowkey", KR(ret), K(row_rowkey), K(end_rowkey));
|
|
} else if (cmp_ret > 0) {
|
|
is_incremental = true;
|
|
}
|
|
STORAGE_LOG(DEBUG, "cmp rowkey", KR(ret), K(cmp_ret), K(row_rowkey), K(end_rowkey));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::split_ranges(ObDatumRangeArray &result_ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
result_ranges.reset();
|
|
if (IS_NOT_INIT) {
|
|
ret = OB_NOT_INIT;
|
|
STORAGE_LOG(WARN, "ObPartitionIncrementalRangeSpliter not init", KR(ret));
|
|
} else if (tablet_size_ == 0) {
|
|
ObDatumRange whole_range;
|
|
whole_range.set_whole_range();
|
|
if (OB_FAIL(result_ranges.push_back(whole_range))) {
|
|
STORAGE_LOG(WARN, "failed to push back merge range to array", KR(ret), K(whole_range));
|
|
}
|
|
} else {
|
|
ObDatumRangeArray *ranges = nullptr;
|
|
if (OB_FAIL(get_ranges_by_inc_data(*inc_ranges_))) {
|
|
STORAGE_LOG(WARN, "failed to get ranges by inc data", KR(ret));
|
|
} else if (merge_ctx_->is_full_merge_) {
|
|
if (major_sstable_->get_meta().is_empty()) {
|
|
ranges = inc_ranges_;
|
|
} else if (OB_FAIL(get_ranges_by_base_sstable(*base_ranges_))) {
|
|
STORAGE_LOG(WARN, "failed to get ranges by base sstable", KR(ret));
|
|
} else if (OB_FAIL(combine_ranges(*base_ranges_, *inc_ranges_, *combined_ranges_))) {
|
|
STORAGE_LOG(WARN, "failed to combine base and inc ranges", KR(ret));
|
|
} else {
|
|
ranges = combined_ranges_;
|
|
}
|
|
} else {
|
|
ranges = inc_ranges_;
|
|
}
|
|
|
|
if (OB_SUCC(ret)) {
|
|
if (OB_ISNULL(ranges)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected null ranges", K(ret), K(ranges));
|
|
} else if (OB_FAIL(merge_ranges(*ranges, result_ranges))) {
|
|
STORAGE_LOG(WARN, "failed to merge ranges", KR(ret));
|
|
}
|
|
}
|
|
|
|
if (OB_SUCC(ret) && result_ranges.count() > 0) {
|
|
const ObStorageDatumUtils &datum_utils =
|
|
merge_ctx_->tablet_handle_.get_obj()->get_index_read_info().get_datum_utils();
|
|
if (OB_FAIL(check_continuous(datum_utils, result_ranges))) {
|
|
STORAGE_LOG(WARN, "failed to check continuous", KR(ret), K(result_ranges));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::get_ranges_by_inc_data(ObDatumRangeArray &ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const int64_t tables_handle_cnt = merge_ctx_->tables_handle_.get_count();
|
|
ranges.reset();
|
|
if (OB_UNLIKELY(tablet_size_ <= 0 || tables_handle_cnt <= 1)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument", KR(ret), K_(tablet_size), K(tables_handle_cnt));
|
|
} else {
|
|
if (OB_FAIL(init_incremental_iter())) {
|
|
STORAGE_LOG(WARN, "failed to init incremental iterator", KR(ret));
|
|
} else {
|
|
int64_t num_rows_per_range = default_row_num_per_range_;
|
|
// calculate num_rows_per_range by macro block
|
|
const int64_t macro_block_count =
|
|
major_sstable_->get_meta().get_basic_meta().data_macro_block_count_;
|
|
const int64_t macro_block_size = OB_SERVER_BLOCK_MGR.get_macro_block_size();
|
|
if (macro_block_count * macro_block_size > tablet_size_) {
|
|
const int64_t row_count = major_sstable_->get_meta().get_basic_meta().row_count_;
|
|
const int64_t num_rows_per_macro_block = row_count / macro_block_count;
|
|
num_rows_per_range = num_rows_per_macro_block * tablet_size_ / macro_block_size;
|
|
}
|
|
// to avoid block the macro-block, simply make num_rows_per_range >= DEFAULT_NOISY_ROW_NUM_SKIPPED
|
|
if (num_rows_per_range < default_noisy_row_num_skipped_) {
|
|
num_rows_per_range = default_noisy_row_num_skipped_;
|
|
}
|
|
|
|
const int64_t rowkey_column_num = merge_ctx_->get_schema()->get_rowkey_column_num();
|
|
int64_t count = 0;
|
|
const ObDatumRow *row = nullptr;
|
|
ObDatumRowkey rowkey;
|
|
ObDatumRange range;
|
|
ObDatumRange multi_version_range;
|
|
range.end_key_.set_min_rowkey();
|
|
range.set_left_open();
|
|
range.set_right_closed();
|
|
|
|
// generate ranges
|
|
while (OB_SUCC(ret)) {
|
|
if (OB_FAIL(iter_->get_next_row(row))) {
|
|
if (OB_UNLIKELY(OB_ITER_END != ret)) {
|
|
STORAGE_LOG(WARN, "failed to get nex row", KR(ret));
|
|
} else {
|
|
ret = OB_SUCCESS;
|
|
break;
|
|
}
|
|
} else if (OB_ISNULL(row)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "unexpected null row", KR(ret));
|
|
} else if (++count >= num_rows_per_range) {
|
|
count = 0;
|
|
range.start_key_ = range.end_key_;
|
|
if (OB_FAIL(rowkey.assign(row->storage_datums_, rowkey_column_num))) {
|
|
STORAGE_LOG(WARN, "failed to assign datum rowkey", KR(ret), K(*row),
|
|
K(rowkey_column_num));
|
|
} else if (OB_FAIL(rowkey.deep_copy(range.end_key_, *allocator_))) {
|
|
STORAGE_LOG(WARN, "failed to deep copy datum rowkey", KR(ret), K(rowkey));
|
|
} else if (OB_FAIL(range.to_multi_version_range(*allocator_, multi_version_range))) {
|
|
STORAGE_LOG(WARN, "failed to transfer multi version range", KR(ret), K(range));
|
|
} else if (OB_FAIL(ranges.push_back(multi_version_range))) {
|
|
STORAGE_LOG(WARN, "failed to push range", KR(ret), K(range));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (OB_SUCC(ret)) {
|
|
// handle the last range
|
|
if (count > 0) {
|
|
range.start_key_ = range.end_key_;
|
|
range.end_key_.set_max_rowkey();
|
|
range.set_right_open();
|
|
if (OB_FAIL(range.to_multi_version_range(*allocator_, multi_version_range))) {
|
|
STORAGE_LOG(WARN, "failed to transfer multi version range", KR(ret), K(range));
|
|
} else if (OB_FAIL(ranges.push_back(multi_version_range))) {
|
|
STORAGE_LOG(WARN, "failed to push range", KR(ret), K(range));
|
|
}
|
|
} else if (ranges.empty()) {
|
|
range.set_whole_range();
|
|
if (OB_FAIL(ranges.push_back(range))) {
|
|
STORAGE_LOG(WARN, "failed to push range", KR(ret), K(range));
|
|
}
|
|
} else {
|
|
ObDatumRange &last_range = ranges.at(ranges.count() - 1);
|
|
last_range.end_key_.set_max_rowkey();
|
|
last_range.set_right_open();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::get_ranges_by_base_sstable(ObDatumRangeArray &ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ranges.reset();
|
|
if (OB_UNLIKELY(tablet_size_ <= 0)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "Invalid argument to get ranges by base sstable", KR(ret), K_(tablet_size));
|
|
} else {
|
|
ObSSTableSecMetaIterator *meta_iter = nullptr;
|
|
ObDatumRange scan_range;
|
|
scan_range.set_whole_range();
|
|
if (OB_FAIL(scan_major_sstable_secondary_meta(scan_range, meta_iter))) {
|
|
STORAGE_LOG(WARN, "Failed to scan secondary meta", KR(ret), K(*major_sstable_));
|
|
} else {
|
|
const int64_t macro_block_cnt =
|
|
major_sstable_->get_meta().get_basic_meta().data_macro_block_count_;
|
|
const int64_t total_size = macro_block_cnt * OB_SERVER_BLOCK_MGR.get_macro_block_size();
|
|
const int64_t range_cnt = (total_size + tablet_size_ - 1) / tablet_size_;
|
|
const int64_t macro_block_cnt_per_range = (macro_block_cnt + range_cnt - 1) / range_cnt;
|
|
|
|
ObDatumRowkey endkey;
|
|
ObDatumRange range;
|
|
range.end_key_.set_min_rowkey();
|
|
range.set_left_open();
|
|
range.set_right_closed();
|
|
ObDataMacroBlockMeta blk_meta;
|
|
|
|
// generate ranges
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < macro_block_cnt;) {
|
|
const int64_t last = i + macro_block_cnt_per_range - 1;
|
|
range.start_key_ = range.end_key_;
|
|
if (last < macro_block_cnt - 1) {
|
|
// locate to the last macro-block meta in current range
|
|
while (OB_SUCC(meta_iter->get_next(blk_meta)) && i++ < last);
|
|
if (OB_FAIL(ret)) {
|
|
STORAGE_LOG(WARN, "Failed to get macro block meta", KR(ret), K(i - 1));
|
|
} else if (OB_UNLIKELY(!blk_meta.is_valid())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected invalid macro block meta", KR(ret), K(i - 1));
|
|
} else if (OB_FAIL(blk_meta.get_rowkey(endkey))) {
|
|
STORAGE_LOG(WARN, "Failed to get rowkey", KR(ret), K(blk_meta));
|
|
} else if (OB_FAIL(endkey.deep_copy(range.end_key_, *allocator_))) {
|
|
STORAGE_LOG(WARN, "Failed to transfer store rowkey", K(ret), K(endkey));
|
|
}
|
|
} else { // last range
|
|
i = last + 1;
|
|
range.end_key_.set_max_rowkey();
|
|
range.set_right_open();
|
|
}
|
|
if (OB_SUCC(ret) && OB_FAIL(ranges.push_back(range))) {
|
|
STORAGE_LOG(WARN, "Failed to push range", KR(ret), K(ranges), K(range));
|
|
}
|
|
}
|
|
}
|
|
if (OB_NOT_NULL(meta_iter)) {
|
|
meta_iter->~ObSSTableSecMetaIterator();
|
|
meta_iter = nullptr;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::combine_ranges(const ObDatumRangeArray &base_ranges,
|
|
const ObDatumRangeArray &inc_ranges,
|
|
ObDatumRangeArray &result_ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(base_ranges.empty() || inc_ranges.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument to combine ranges", KR(ret),
|
|
K(base_ranges), K(inc_ranges));
|
|
} else {
|
|
// incremental data is appended after base sstable
|
|
// so just append the inc ranges to the base ranges
|
|
result_ranges.reset();
|
|
|
|
ObDatumRowkey end_rowkey;
|
|
if (OB_FAIL(get_major_sstable_end_rowkey(end_rowkey))) {
|
|
STORAGE_LOG(WARN, "failed to get base sstable last rowkey", KR(ret), K_(major_sstable));
|
|
} else {
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < base_ranges.count(); i++) {
|
|
const ObDatumRange &range = base_ranges.at(i);
|
|
if (OB_FAIL(result_ranges.push_back(range))) {
|
|
STORAGE_LOG(WARN, "failed to push range", KR(ret), K(range));
|
|
}
|
|
}
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < inc_ranges.count(); i++) {
|
|
const ObDatumRange &range = inc_ranges.at(i);
|
|
if (OB_FAIL(result_ranges.push_back(range))) {
|
|
STORAGE_LOG(WARN, "failed to push range", KR(ret), K(range));
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
// base_ranges和inc_ranges交接的地方 (k1, MAX) (MIN, k2] 改成 (k1, endkey] (endkey, k2]
|
|
ObDatumRange &base_last_range = result_ranges.at(base_ranges.count() - 1);
|
|
ObDatumRange &inc_first_range = result_ranges.at(base_ranges.count());
|
|
if (OB_FAIL(end_rowkey.deep_copy(base_last_range.end_key_, *allocator_))) {
|
|
STORAGE_LOG(WARN, "failed to deep copy rowkey", KR(ret), K(end_rowkey),
|
|
K(base_last_range.get_end_key()));
|
|
} else {
|
|
base_last_range.set_right_closed();
|
|
inc_first_range.start_key_ = base_last_range.end_key_;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::merge_ranges(const ObDatumRangeArray &ranges,
|
|
ObDatumRangeArray &result_ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(ranges.empty())) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
STORAGE_LOG(WARN, "invalid argument", KR(ret), K(ranges));
|
|
} else {
|
|
const int64_t num_ranges_per_thread = (ranges.count() + MAX_MERGE_THREAD - 1) / MAX_MERGE_THREAD;
|
|
ObDatumRange merged_range;
|
|
const ObDatumRange *range;
|
|
merged_range.end_key_.set_min_rowkey();
|
|
merged_range.set_left_open();
|
|
merged_range.set_right_closed();
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < ranges.count(); i += num_ranges_per_thread) {
|
|
if (num_ranges_per_thread == 1) {
|
|
range = &ranges.at(i);
|
|
} else {
|
|
int64_t last = i + num_ranges_per_thread - 1;
|
|
last = (last < ranges.count() ? last : ranges.count() - 1);
|
|
const ObDatumRange &first_range = ranges.at(i);
|
|
const ObDatumRange &last_range = ranges.at(last);
|
|
// merged_range.end_key_ should be the same as first_range.start_key_
|
|
merged_range.set_start_key(first_range.get_start_key());
|
|
merged_range.set_end_key(last_range.get_end_key());
|
|
range = &merged_range;
|
|
}
|
|
|
|
if (OB_FAIL(result_ranges.push_back(*range))) {
|
|
STORAGE_LOG(WARN, "failed to push range", KR(ret), K(*range));
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
(result_ranges.at(result_ranges.count() - 1)).set_right_open();
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObPartitionIncrementalRangeSpliter::check_continuous(const ObStorageDatumUtils &datum_utils,
|
|
ObDatumRangeArray &ranges)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_UNLIKELY(ranges.empty())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected bad ranges (empty)", K(ret), K(ranges));
|
|
} else if (OB_UNLIKELY(!ranges.at(0).get_start_key().is_min_rowkey() ||
|
|
!ranges.at(0).is_left_open())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected bad ranges (first start key not min)", K(ret), K(ranges));
|
|
} else if (OB_UNLIKELY(!ranges.at(ranges.count() - 1).get_end_key().is_max_rowkey() ||
|
|
!ranges.at(ranges.count() - 1).is_right_open())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected bad ranges (last end key not max)", K(ret), K(ranges));
|
|
} else {
|
|
bool is_equal = false;
|
|
for (int64_t i = 1; OB_SUCC(ret) && i < ranges.count(); i++) {
|
|
if (OB_UNLIKELY(!ranges.at(i - 1).is_right_closed())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected bad ranges (end key not included)", K(ret), K(i), K(ranges));
|
|
} else if (OB_UNLIKELY(!ranges.at(i).is_left_open())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected bad ranges (start key not excluded)", K(ret), K(i), K(ranges));
|
|
} else if (OB_FAIL(ranges.at(i).get_start_key().equal(ranges.at(i - 1).get_end_key(),
|
|
datum_utils, is_equal))) {
|
|
STORAGE_LOG(WARN, "Failed to compare rowkeys", K(ret), K(i), K(ranges));
|
|
} else if (OB_UNLIKELY(!is_equal)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
STORAGE_LOG(WARN, "Unexpected bad ranges (not contiguous)", K(ret), K(i), K(ranges));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
}
|
|
}
|