diff --git a/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h b/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h index a47a78733..4de6120ed 100644 --- a/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h +++ b/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h @@ -390,6 +390,7 @@ void TestIndexBlockDataPrepare::prepare_data() index_desc.schema_version_ = 10; ASSERT_TRUE(index_desc.is_valid()); ASSERT_EQ(OB_SUCCESS, root_index_builder_->init(index_desc)); + root_index_builder_->index_store_desc_.need_pre_warm_ = false; // close index block pre warm ASSERT_EQ(OB_SUCCESS, writer.open(desc, start_seq)); ASSERT_EQ(OB_SUCCESS, row_generate_.init(table_schema_, &allocator_)); diff --git a/src/objit/include/objit/common/ob_item_type.h b/src/objit/include/objit/common/ob_item_type.h old mode 100644 new mode 100755 index c53823ce7..da5b9ee09 --- a/src/objit/include/objit/common/ob_item_type.h +++ b/src/objit/include/objit/common/ob_item_type.h @@ -1206,6 +1206,9 @@ typedef enum ObItemType T_FULL_HINT, T_USE_DAS_HINT, T_NO_USE_DAS_HINT, + T_INDEX_SS_HINT, + T_INDEX_SS_ASC_HINT, + T_INDEX_SS_DESC_HINT, T_USE_MERGE, T_USE_HASH, T_NO_USE_HASH, diff --git a/src/observer/CMakeLists.txt b/src/observer/CMakeLists.txt index 07f4c8953..c3ce0e978 100644 --- a/src/observer/CMakeLists.txt +++ b/src/observer/CMakeLists.txt @@ -162,6 +162,7 @@ ob_set_subtarget(ob_server virtual_table virtual_table/ob_all_virtual_files_table.cpp virtual_table/ob_all_virtual_id_service.cpp virtual_table/ob_all_virtual_io_stat.cpp + virtual_table/ob_all_virtual_kvcache_store_memblock.cpp virtual_table/ob_all_virtual_load_data_stat.cpp virtual_table/ob_all_virtual_lock_wait_stat.cpp virtual_table/ob_all_virtual_long_ops_status.cpp diff --git a/src/observer/virtual_table/ob_all_virtual_kvcache_store_memblock.cpp b/src/observer/virtual_table/ob_all_virtual_kvcache_store_memblock.cpp new file mode 100644 index 000000000..54cd79098 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_kvcache_store_memblock.cpp @@ -0,0 +1,200 @@ +// Copyright (c) 2022 OceanBase +// Authors: +// lvling +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "observer/virtual_table/ob_all_virtual_kvcache_store_memblock.h" + +namespace oceanbase +{ +namespace observer +{ + +ObAllVirtualKVCacheStoreMemblock::ObAllVirtualKVCacheStoreMemblock() + : ObVirtualTableScannerIterator(), + memblock_iter_(0), + addr_(nullptr), + ipstr_(), + port_(0), + memblock_infos_(), + str_buf_() +{ +} + +ObAllVirtualKVCacheStoreMemblock::~ObAllVirtualKVCacheStoreMemblock() +{ + reset(); +} + +void ObAllVirtualKVCacheStoreMemblock::reset() +{ + ObVirtualTableScannerIterator::reset(); + memblock_iter_ = 0; + addr_ = nullptr; + port_ = 0; + ipstr_.reset(); + str_buf_.reset(); + memblock_infos_.reset(); +} + +int ObAllVirtualKVCacheStoreMemblock::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + + row = nullptr; + if (OB_UNLIKELY(NULL == allocator_)) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "allocator is NULL", K(ret)); + } else if (memblock_iter_ >= memblock_infos_.count()) { + ret = OB_ITER_END; + } else if (OB_FAIL(process_row(memblock_infos_.at(memblock_iter_++)))) { + SERVER_LOG(WARN, "Fail to process current row", K(ret), K(memblock_iter_)); + } else { + row = &cur_row_; + } + + return ret; +} + +int ObAllVirtualKVCacheStoreMemblock::set_ip() +{ + int ret = OB_SUCCESS; + char ipbuf[common::OB_IP_STR_BUFF]; + if (nullptr == addr_) { + ret = OB_ENTRY_NOT_EXIST; + SERVER_LOG(WARN, "Null address", K(ret), KP(addr_)); + } else if (!addr_->ip_to_string(ipbuf, sizeof(ipbuf))) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(ERROR, "Fail to cast ip to string", K(ret)); + } else { + ipstr_ = ObString::make_string(ipbuf); + port_ = addr_->get_port(); + if (OB_FAIL(ob_write_string(*allocator_, ipstr_, ipstr_))) { + SERVER_LOG(WARN, "Failed to write string", K(ret)); + } + } + return ret; +} + +int ObAllVirtualKVCacheStoreMemblock::inner_open() +{ + int ret = OB_SUCCESS; + + memblock_infos_.reset(); + if (OB_FAIL(set_ip())) { + SERVER_LOG(WARN, "Fail to get ip in ObAllVirtualKVCacheStoreMemblock", K(ret)); + } else if (OB_FAIL(ObKVGlobalCache::get_instance().get_memblock_info(effective_tenant_id_, memblock_infos_))) { // get memblock info from kvcache + SERVER_LOG(WARN, "Fail to get memblock information from global cache", K(ret)); + } + + return ret; +} + +int ObAllVirtualKVCacheStoreMemblock::process_row(const ObKVCacheStoreMemblockInfo &info) +{ + int ret = OB_SUCCESS; + + if (!info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + SERVER_LOG(WARN, "Invalid argument", K(ret), K(info)); + } else { + cur_row_.count_ = reserved_column_cnt_; + for (int64_t cell_idx = 0 ; OB_SUCC(ret) && cell_idx < output_column_ids_.count() ; ++cell_idx) { + uint64_t col_id = output_column_ids_.at(cell_idx); + switch (col_id) { + case SVR_IP : { + cur_row_.cells_[cell_idx].set_varchar(ipstr_); + cur_row_.cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case SVR_PORT : { + cur_row_.cells_[cell_idx].set_int(port_); + break; + } + case TENANT_ID : { + cur_row_.cells_[cell_idx].set_int(info.tenant_id_); + break; + } + case CACHE_ID : { + cur_row_.cells_[cell_idx].set_int(info.cache_id_); + break; + } + case CACHE_NAME : { + cur_row_.cells_[cell_idx].set_varchar(info.cache_name_); + cur_row_.cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case MEMBLOCK_PTR : { + cur_row_.cells_[cell_idx].set_varchar(info.memblock_ptr_); + cur_row_.cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case REF_COUNT : { + cur_row_.cells_[cell_idx].set_int(info.ref_count_); + break; + } + case STATUS : { + cur_row_.cells_[cell_idx].set_int(info.using_status_); + break; + } + case POLICY : { + cur_row_.cells_[cell_idx].set_int(info.policy_); + break; + } + case KV_CNT : { + cur_row_.cells_[cell_idx].set_int(info.kv_cnt_); + break; + } + case GET_CNT : { + cur_row_.cells_[cell_idx].set_int(info.get_cnt_); + break; + } + case RECENT_GET_CNT : { + cur_row_.cells_[cell_idx].set_int(info.recent_get_cnt_); + break; + } + case PRIORITY : { + cur_row_.cells_[cell_idx].set_int(info.priority_); + break; + } + case SCORE : { + static const int64_t MAX_DOUBLE_PRINT_SIZE = 64; + char buf[MAX_DOUBLE_PRINT_SIZE]; + memset(buf, 0, MAX_DOUBLE_PRINT_SIZE); + str_buf_.reset(); + number::ObNumber num; + double value = info.score_; + if (OB_UNLIKELY(0 > snprintf(buf, MAX_DOUBLE_PRINT_SIZE, "%lf", value))) { + ret = OB_IO_ERROR; + SERVER_LOG(WARN, "snprintf fail", K(ret), K(errno), KERRNOMSG(errno)); + } else if (OB_FAIL(num.from(buf, str_buf_))) { + SERVER_LOG(WARN, "Fail to cast to number", K(ret), K(cell_idx), K(output_column_ids_), K(col_id)); + } else { + cur_row_.cells_[cell_idx].set_number(num); + } + break; + } + case ALIGN_SIZE : { + cur_row_.cells_[cell_idx].set_int(info.align_size_); + break; + } + default : { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "Invalid column id", K(ret), K(cell_idx), K(col_id), K(output_column_ids_)); + break; + } + } + } + } + return ret; +} + +} // observer +} // oceanbase \ No newline at end of file diff --git a/src/observer/virtual_table/ob_all_virtual_kvcache_store_memblock.h b/src/observer/virtual_table/ob_all_virtual_kvcache_store_memblock.h new file mode 100644 index 000000000..0805fe1ad --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_kvcache_store_memblock.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 OceanBase +// Authors: +// lvling +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_H_ +#define OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_H_ +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/cache/ob_kv_storecache.h" + +namespace oceanbase +{ +namespace observer +{ + +class ObAllVirtualKVCacheStoreMemblock : public common::ObVirtualTableScannerIterator +{ +public: + ObAllVirtualKVCacheStoreMemblock(); + virtual ~ObAllVirtualKVCacheStoreMemblock(); + virtual void reset(); + OB_INLINE void set_addr(common::ObAddr &addr) {addr_ = &addr;} + virtual int inner_get_next_row(common::ObNewRow *&row); +private: + virtual int set_ip(); + virtual int inner_open() override; + int process_row(const ObKVCacheStoreMemblockInfo &info); +private: + enum CACHE_COLUMN + { + SVR_IP = common::OB_APP_MIN_COLUMN_ID, + SVR_PORT, + TENANT_ID, + CACHE_ID, + CACHE_NAME, + MEMBLOCK_PTR, + REF_COUNT, + STATUS, + POLICY, + KV_CNT, + GET_CNT, + RECENT_GET_CNT, + PRIORITY, + SCORE, + ALIGN_SIZE + }; + int64_t memblock_iter_; + common::ObAddr *addr_; + common::ObString ipstr_; + int32_t port_; + ObSEArray memblock_infos_; + common::ObStringBuf str_buf_; + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualKVCacheStoreMemblock); +}; + +} // observer +} // oceanbase + +#endif // OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_H_ diff --git a/src/observer/virtual_table/ob_all_virtual_sys_stat.cpp b/src/observer/virtual_table/ob_all_virtual_sys_stat.cpp index 16011c8a0..f5e890297 100644 --- a/src/observer/virtual_table/ob_all_virtual_sys_stat.cpp +++ b/src/observer/virtual_table/ob_all_virtual_sys_stat.cpp @@ -396,7 +396,7 @@ int ObAllVirtualSysStat::get_cache_size_(const int64_t tenant_id, ObStatEventSet { int ret = OB_SUCCESS; ObArray inst_handles; - if (OB_FAIL(ObKVGlobalCache::get_instance().get_tenant_cache_info(tenant_id, inst_handles))) { + if (OB_FAIL(ObKVGlobalCache::get_instance().get_cache_inst_info(tenant_id, inst_handles))) { SERVER_LOG(WARN, "Fail to get tenant cache infos, ", K(ret)); } else { ObKVCacheInst * inst = NULL; diff --git a/src/observer/virtual_table/ob_information_kvcache_table.cpp b/src/observer/virtual_table/ob_information_kvcache_table.cpp index 43d09cd0e..179e59773 100644 --- a/src/observer/virtual_table/ob_information_kvcache_table.cpp +++ b/src/observer/virtual_table/ob_information_kvcache_table.cpp @@ -27,6 +27,7 @@ ObInfoSchemaKvCacheTable::ObInfoSchemaKvCacheTable() cache_iter_(0), str_buf_(), arenallocator_(), + tenant_di_info_(), tenant_dis_() { } @@ -49,241 +50,263 @@ void ObInfoSchemaKvCacheTable::reset() cells_[i].reset(); } arenallocator_.reset(); + tenant_di_info_.reset(); tenant_dis_.reset(); } -int ObInfoSchemaKvCacheTable::set_ip(common::ObAddr *addr) +int ObInfoSchemaKvCacheTable::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + + row = nullptr; + ObKVCacheInst * inst = NULL; + ObDiagnoseTenantInfo *tenant_info = nullptr; + if (OB_UNLIKELY(NULL == allocator_)) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "Invalid allocator, not init", K(ret), KP(allocator_)); + } else if (OB_FAIL(get_handles(inst, tenant_info))) { + if (OB_ITER_END != ret) { + SERVER_LOG(WARN, "Fail to get cache inst or tenant diagnose info", K(ret)); + } + } else if (OB_FAIL(set_diagnose_info(inst, tenant_info))) { + SERVER_LOG(WARN, "Fail to set diagnose info for cache inst", K(ret)); + } else if (OB_FAIL(process_row(inst))) { + SERVER_LOG(WARN, "Fail to process current row", K(ret)); + } else { + row = &cur_row_; + } + + return ret; +} + +int ObInfoSchemaKvCacheTable::set_ip() { int ret = OB_SUCCESS; char ipbuf[common::OB_IP_STR_BUFF]; - if (NULL == addr){ + if (nullptr == addr_) { ret = OB_ENTRY_NOT_EXIST; + SERVER_LOG(WARN, "Null address", K(ret), KP(addr_)); } else if (!addr_->ip_to_string(ipbuf, sizeof(ipbuf))) { - SERVER_LOG(ERROR, "ip to string failed"); ret = OB_ERR_UNEXPECTED; + SERVER_LOG(ERROR, "Fail to cast ip to string", K(ret)); } else { ipstr_ = ObString::make_string(ipbuf); if (OB_FAIL(ob_write_string(*allocator_, ipstr_, ipstr_))) { - SERVER_LOG(WARN, "failed to write string", K(ret)); + SERVER_LOG(WARN, "Failed to write string", K(ret)); } port_ = addr_->get_port(); } return ret; } -int ObInfoSchemaKvCacheTable::inner_get_next_row(ObNewRow *&row) +int ObInfoSchemaKvCacheTable::inner_open() { int ret = OB_SUCCESS; - static const int64_t MAX_DOUBLE_PRINT_SIZE = 64; - if (OB_UNLIKELY(NULL == allocator_)) { - ret = OB_NOT_INIT; - SERVER_LOG(WARN, "allocator is NULL", K(ret)); - } else { - ObKVCacheInst * inst = NULL; - const int64_t col_count = output_column_ids_.count(); - cur_row_.cells_ = cells_; - cur_row_.count_ = reserved_column_cnt_; - if (0 == cache_iter_) { - inst_handles_.reuse(); - if (OB_SUCCESS != (ret = set_ip(addr_))){ - SERVER_LOG(WARN, "can't get ip", K(ret)); - } else if (is_sys_tenant(effective_tenant_id_)) { - // sys tenant show all tenant infos - if (OB_FAIL(ObKVGlobalCache::get_instance().get_all_cache_info(inst_handles_))) { - SERVER_LOG(WARN, "Fail to get all cache info, ", K(ret)); - } - } else { - // user tenant show user tenant infos - if (OB_FAIL(ObKVGlobalCache::get_instance().get_tenant_cache_info(effective_tenant_id_, inst_handles_))) { - SERVER_LOG(WARN, "Fail to get tenant cache info", KR(ret), K(effective_tenant_id_)); - } - } - - if(OB_SUCC(ret) && oceanbase::lib::is_diagnose_info_enabled()) { - arenallocator_.reuse(); - tenant_dis_.reuse(); - if (is_sys_tenant(effective_tenant_id_)) { - if (OB_FAIL(ObDIGlobalTenantCache::get_instance().get_all_stat_event(arenallocator_, tenant_dis_))) { - SERVER_LOG(WARN, "Fail to get all_stat_event when diagnose_info_enabled, ", K(ret)); - } - } else { - // TODO: by chaser.ch, add get tenant stat event interface - } - } - } - - if (OB_SUCC(ret)) { - if (cache_iter_ >= inst_handles_.count()) { - ret = OB_ITER_END; - } else { - inst = inst_handles_.at(cache_iter_).get_inst(); - if (OB_ISNULL(inst)) { - ret = OB_ERR_SYS; - SERVER_LOG(WARN, "kvcache inst should not be null", K(ret)); - } - } - } - if(OB_SUCCESS == ret && oceanbase::lib::is_diagnose_info_enabled()) { - //use diagnose_info to set cache miss_cnt and hit_cnt - ObDiagnoseTenantInfo *tenant_info = NULL; - for (int64_t i = 0; i < tenant_dis_.count(); ++i) { - if (tenant_dis_.at(i).first == inst_handles_.at(cache_iter_).get_inst()->tenant_id_) { - tenant_info = tenant_dis_.at(i).second; - break; - } - } - if (OB_LIKELY(NULL != tenant_info && NULL != inst->status_.config_)) { - if( 0 == strcmp(inst->status_.config_->cache_name_,"user_block_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::BLOCK_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::BLOCK_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"user_row_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::ROW_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::ROW_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"bf_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::BLOOM_FILTER_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::BLOOM_FILTER_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"fuse_row_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::FUSE_ROW_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::FUSE_ROW_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"tablet_ls_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::TABLET_LS_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::TABLET_LS_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"schema_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::SCHEMA_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::SCHEMA_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"opt_table_stat_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::OPT_TABLE_STAT_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::OPT_TABLE_STAT_CACHE_HIT)); - } else if( 0 == strcmp(inst->status_.config_->cache_name_,"opt_column_stat_cache")) { - inst->status_.total_miss_cnt_ = - GLOBAL_EVENT_GET(ObStatEventIds::OPT_COLUMN_STAT_CACHE_MISS); - inst->status_.total_hit_cnt_.set( - GLOBAL_EVENT_GET(ObStatEventIds::OPT_COLUMN_STAT_CACHE_HIT)); - } - } - } - if (OB_SUCCESS == ret) { - str_buf_.reset(); - uint64_t cell_idx = 0; - double value = 0; - char buf[MAX_DOUBLE_PRINT_SIZE]; - memset(buf, 0, MAX_DOUBLE_PRINT_SIZE); - number::ObNumber num; - for (int64_t j = 0; OB_SUCC(ret) && j < col_count; ++j) { - uint64_t col_id = output_column_ids_.at(j); - switch(col_id) { - case TENANT_ID: { - cells_[cell_idx].set_int(inst->tenant_id_); - break; - } - case SVR_IP: { - cells_[cell_idx].set_varchar(ipstr_); - cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); - break; - } - case SVR_PORT: { - cells_[cell_idx].set_int(port_); - break; - } - case CACHE_NAME: { - if (NULL != inst->status_.config_) { - cells_[cell_idx].set_varchar(inst->status_.config_->cache_name_); - cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); - } - break; - } - case CACHE_ID: { - cells_[cell_idx].set_int(inst->cache_id_); - break; - } - case CACHE_SIZE: { - cells_[cell_idx].set_int(inst->status_.store_size_ + inst->status_.map_size_); - break; - } - case PRIORITY: { - if (NULL != inst->status_.config_) { - cells_[cell_idx].set_int(inst->status_.config_->priority_); - } else { - cells_[cell_idx].set_int(0); - } - break; - } - case CACHE_STORE_SIZE: { - cells_[cell_idx].set_int(inst->status_.store_size_); - break; - } - case CACHE_MAP_SIZE: { - cells_[cell_idx].set_int(inst->status_.map_size_); - break; - } - case KV_CNT: { - cells_[cell_idx].set_int(inst->status_.kv_cnt_); - break; - } - case HIT_RATIO: { - memset(buf, 0, MAX_DOUBLE_PRINT_SIZE); - value = inst->status_.get_hit_ratio() *100; - if (OB_UNLIKELY(0 > snprintf(buf, MAX_DOUBLE_PRINT_SIZE, "%lf", value))) { - ret = OB_IO_ERROR; - SERVER_LOG(WARN, "snprintf fail", K(ret), K(errno), KERRNOMSG(errno)); - } else if (OB_SUCCESS == (ret = num.from(buf, str_buf_))) { - cells_[cell_idx].set_number(num); - } - break; - } - case TOTAL_PUT_CNT: { - cells_[cell_idx].set_int(inst->status_.total_put_cnt_.value()); - break; - } - case TOTAL_HIT_CNT: { - cells_[cell_idx].set_int(inst->status_.total_hit_cnt_.value()); - break; - } - case TOTAL_MISS_CNT: { - cells_[cell_idx].set_int(inst->status_.total_miss_cnt_); - break; - } - case HOLD_SIZE: { - cells_[cell_idx].set_int(inst->status_.hold_size_); - break; - } - default: { - ret = OB_ERR_UNEXPECTED; - SERVER_LOG(WARN, "invalid column id", K(ret), K(cell_idx), - K(output_column_ids_), K(col_id)); - break; - } - } - if (OB_SUCC(ret)) { - cell_idx++; - } else if (OB_ERR_UNEXPECTED != ret) { - SERVER_LOG(WARN, "failed cast to number", K(ret), K(cell_idx), - K(output_column_ids_), K(col_id)); - } - } - } - - if (OB_SUCC(ret)) { - cache_iter_++; - row = &cur_row_; - } + inst_handles_.reuse(); + if (OB_FAIL(set_ip())) { + SERVER_LOG(WARN, "Fail to set ip from addr", K(ret), K(addr_)); + } else if (OB_FAIL(ObKVGlobalCache::get_instance().get_cache_inst_info(effective_tenant_id_, inst_handles_))) { + SERVER_LOG(WARN, "Fail to get cache info", K(ret), K(effective_tenant_id_)); + } else if (OB_FAIL(get_tenant_info())) { + SERVER_LOG(WARN, "Fail to get tenant info", K(ret)); } + return ret; } + +int ObInfoSchemaKvCacheTable::get_tenant_info() +{ + int ret = OB_SUCCESS; + + if (oceanbase::lib::is_diagnose_info_enabled()) { + if (is_sys_tenant(effective_tenant_id_)) { + arenallocator_.reuse(); + tenant_dis_.reuse(); + if (OB_FAIL(ObDIGlobalTenantCache::get_instance().get_all_stat_event(arenallocator_, tenant_dis_))) { + SERVER_LOG(WARN, "Fail to get all stat event", K(ret)); + } + } else { + tenant_di_info_.reset(); + if (OB_FAIL(ObDIGlobalTenantCache::get_instance().get_the_diag_info(effective_tenant_id_, tenant_di_info_))) { + SERVER_LOG(WARN, "Fail to get tenant stat event", K(ret), K(effective_tenant_id_)); + } + } + } + + return ret; +} + +int ObInfoSchemaKvCacheTable::get_handles(ObKVCacheInst *&inst, ObDiagnoseTenantInfo *&tenant_info) +{ + int ret = OB_SUCCESS; + + inst = nullptr; + tenant_info = nullptr; + if (cache_iter_ >= inst_handles_.count()) { + ret = OB_ITER_END; + } else { + inst = inst_handles_.at(cache_iter_++).get_inst(); + } + if (OB_FAIL(ret)) { + } else if (!oceanbase::lib::is_diagnose_info_enabled()) { + } else if (is_sys_tenant(effective_tenant_id_)) { + for (int64_t i = 0 ; i < tenant_dis_.count() ; ++i) { + if (tenant_dis_.at(i).first == inst->tenant_id_) { + tenant_info = tenant_dis_.at(i).second; + break; + } + } + } else { + tenant_info = &tenant_di_info_; + } + + return ret; +} + +int ObInfoSchemaKvCacheTable::set_diagnose_info(ObKVCacheInst *inst, ObDiagnoseTenantInfo *tenant_info) +{ + int ret = OB_SUCCESS; + + if (!oceanbase::lib::is_diagnose_info_enabled()) { + } else if (nullptr == tenant_info || nullptr == inst) { + ret = OB_INVALID_ARGUMENT; + SERVER_LOG(WARN, "Invalid argument", K(ret), KP(inst), KP(tenant_info)); + } else if (nullptr == inst->status_.config_) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "Unexpected null cache inst config", KP(inst->status_.config_)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"user_block_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::BLOCK_CACHE_MISS); + inst->status_.total_hit_cnt_.set( GLOBAL_EVENT_GET(ObStatEventIds::BLOCK_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"user_row_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::ROW_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::ROW_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"bf_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::BLOOM_FILTER_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::BLOOM_FILTER_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"fuse_row_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::FUSE_ROW_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::FUSE_ROW_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"tablet_ls_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::LOCATION_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::LOCATION_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"schema_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::SCHEMA_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::SCHEMA_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"opt_table_stat_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::OPT_TABLE_STAT_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::OPT_TABLE_STAT_CACHE_HIT)); + } else if (0 == strcmp(inst->status_.config_->cache_name_,"opt_column_stat_cache")) { + inst->status_.total_miss_cnt_ = GLOBAL_EVENT_GET(ObStatEventIds::OPT_COLUMN_STAT_CACHE_MISS); + inst->status_.total_hit_cnt_.set(GLOBAL_EVENT_GET(ObStatEventIds::OPT_COLUMN_STAT_CACHE_HIT)); + } + + return ret; +} + +int ObInfoSchemaKvCacheTable::process_row(const ObKVCacheInst *inst) +{ + int ret = OB_SUCCESS; + + uint64_t cell_idx = 0; + cur_row_.cells_ = cells_; + cur_row_.count_ = reserved_column_cnt_; + for (int64_t i = 0 ; OB_SUCC(ret) && i < output_column_ids_.count() ; ++i) { + uint64_t col_id = output_column_ids_.at(i); + switch(col_id) { + case TENANT_ID: { + cells_[cell_idx].set_int(inst->tenant_id_); + break; + } + case SVR_IP: { + cells_[cell_idx].set_varchar(ipstr_); + cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case SVR_PORT: { + cells_[cell_idx].set_int(port_); + break; + } + case CACHE_NAME: { + if (NULL != inst->status_.config_) { + cells_[cell_idx].set_varchar(inst->status_.config_->cache_name_); + cells_[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + } + break; + } + case CACHE_ID: { + cells_[cell_idx].set_int(inst->cache_id_); + break; + } + case CACHE_SIZE: { + cells_[cell_idx].set_int(inst->status_.store_size_ + inst->status_.map_size_); + break; + } + case PRIORITY: { + if (NULL != inst->status_.config_) { + cells_[cell_idx].set_int(inst->status_.config_->priority_); + } else { + cells_[cell_idx].set_int(0); + } + break; + } + case CACHE_STORE_SIZE: { + cells_[cell_idx].set_int(inst->status_.store_size_); + break; + } + case CACHE_MAP_SIZE: { + cells_[cell_idx].set_int(inst->status_.map_size_); + break; + } + case KV_CNT: { + cells_[cell_idx].set_int(inst->status_.kv_cnt_); + break; + } + case HIT_RATIO: { + str_buf_.reset(); + number::ObNumber num; + double value = inst->status_.get_hit_ratio() * 100; + static const int64_t MAX_DOUBLE_PRINT_SIZE = 64; + char buf[MAX_DOUBLE_PRINT_SIZE]; + memset(buf, 0, MAX_DOUBLE_PRINT_SIZE); + if (OB_UNLIKELY(0 > snprintf(buf, MAX_DOUBLE_PRINT_SIZE, "%lf", value))) { + ret = OB_IO_ERROR; + SERVER_LOG(WARN, "Fail to snprintf hit ratio", K(ret), K(errno), KERRNOMSG(errno)); + } else if (OB_FAIL(num.from(buf, str_buf_))) { + SERVER_LOG(WARN, "Fail to cast to number", K(ret)); + } else { + cells_[cell_idx].set_number(num); + } + break; + } + case TOTAL_PUT_CNT: { + cells_[cell_idx].set_int(inst->status_.total_put_cnt_.value()); + break; + } + case TOTAL_HIT_CNT: { + cells_[cell_idx].set_int(inst->status_.total_hit_cnt_.value()); + break; + } + case TOTAL_MISS_CNT: { + cells_[cell_idx].set_int(inst->status_.total_miss_cnt_); + break; + } + case HOLD_SIZE: { + cells_[cell_idx].set_int(inst->status_.hold_size_); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "Invalid column id", K(ret), K(cell_idx), K(output_column_ids_), K(col_id)); + break; + } + } + ++cell_idx; + } + + return ret; +} + + }/* ns observer*/ }/* ns oceanbase */ diff --git a/src/observer/virtual_table/ob_information_kvcache_table.h b/src/observer/virtual_table/ob_information_kvcache_table.h index fca4450d6..a76ad4868 100644 --- a/src/observer/virtual_table/ob_information_kvcache_table.h +++ b/src/observer/virtual_table/ob_information_kvcache_table.h @@ -33,13 +33,20 @@ class ObInfoSchemaKvCacheTable : public common::ObVirtualTableScannerIterator public: ObInfoSchemaKvCacheTable(); virtual ~ObInfoSchemaKvCacheTable(); - virtual int inner_get_next_row(common::ObNewRow *&row); - virtual void reset(); - inline void set_addr(common::ObAddr &addr) {addr_ = &addr;} - virtual int set_ip(common::ObAddr *addr); - -private: - enum CACHE_COLUMN + virtual int inner_get_next_row(common::ObNewRow *&row); + virtual void reset(); + inline void set_addr(common::ObAddr &addr) {addr_ = &addr;} + +private: + virtual int set_ip(); + virtual int inner_open() override; + int get_tenant_info(); + int get_handles(ObKVCacheInst *&inst, ObDiagnoseTenantInfo *& tenant_info); + int set_diagnose_info(ObKVCacheInst *inst, ObDiagnoseTenantInfo *tenant_info); + int process_row(const ObKVCacheInst *inst); + +private: + enum CACHE_COLUMN { TENANT_ID = common::OB_APP_MIN_COLUMN_ID, SVR_IP, @@ -62,12 +69,13 @@ private: int32_t port_; common::ObSEArray inst_handles_; int16_t cache_iter_; - common::ObStringBuf str_buf_; - common::ObObj cells_[common::OB_ROW_MAX_COLUMNS_COUNT]; - common::ObArenaAllocator arenallocator_; - common::ObArray > tenant_dis_; - DISALLOW_COPY_AND_ASSIGN(ObInfoSchemaKvCacheTable); -}; + common::ObStringBuf str_buf_; + common::ObObj cells_[common::OB_ROW_MAX_COLUMNS_COUNT]; + common::ObArenaAllocator arenallocator_; + common::ObDiagnoseTenantInfo tenant_di_info_; + common::ObArray > tenant_dis_; + DISALLOW_COPY_AND_ASSIGN(ObInfoSchemaKvCacheTable); +}; } } diff --git a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp index a7552d3ab..465890c58 100644 --- a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp +++ b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp @@ -184,6 +184,7 @@ #include "observer/virtual_table/ob_all_virtual_ls_archive_stat.h" #include "observer/virtual_table/ob_all_virtual_dml_stats.h" #include "observer/virtual_table/ob_tenant_virtual_privilege.h" +#include "observer/virtual_table/ob_all_virtual_kvcache_store_memblock.h" #include "observer/virtual_table/ob_information_query_response_time.h" #include "observer/virtual_table/ob_all_virtual_kvcache_handle_leak_info.h" #include "observer/virtual_table/ob_all_virtual_schema_memory.h" @@ -2220,6 +2221,16 @@ int ObVTIterCreator::create_vt_iter(ObVTableScanParam ¶ms, } break; } + case OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_TID: { + ObAllVirtualKVCacheStoreMemblock *kvcache_store_memblock = nullptr; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObAllVirtualKVCacheStoreMemblock, kvcache_store_memblock))) { + SERVER_LOG(ERROR, "Fail to create __all_virtual_kvcache_store_memblock", K(ret)); + } else { + kvcache_store_memblock->set_addr(addr_); + vt_iter = static_cast(kvcache_store_memblock); + } + break; + } case OB_ALL_VIRTUAL_DTL_INTERM_RESULT_MONITOR_TID: { ObAllDtlIntermResultMonitor *dtl_interm_result_monitor = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObAllDtlIntermResultMonitor, dtl_interm_result_monitor))) { diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index 1170fbb19..28829dcd6 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -61,6 +61,7 @@ ob_set_subtarget(ob_share cache cache/ob_working_set_mgr.cpp cache/ob_kvcache_hazard_version.cpp cache/ob_kvcache_handle_ref_checker.cpp + cache/ob_kvcache_pre_warmer.cpp ) ob_set_subtarget(ob_share scheduler diff --git a/src/share/cache/ob_kv_storecache.cpp b/src/share/cache/ob_kv_storecache.cpp index 86701b009..c66b1c9db 100644 --- a/src/share/cache/ob_kv_storecache.cpp +++ b/src/share/cache/ob_kv_storecache.cpp @@ -887,22 +887,6 @@ int ObKVGlobalCache::set_checker_cache_name(const char *cache_name) return ret; } -int ObKVGlobalCache::get_tenant_cache_info(const uint64_t tenant_id, ObIArray &inst_handles) -{ - int ret = OB_SUCCESS; - if (!inited_) { - ret = OB_NOT_INIT; - COMMON_LOG(WARN, "The ObKVGlobalCache has not been inited, ", K(ret)); - } else if (0 == tenant_id) { - ret = OB_INVALID_ARGUMENT; - COMMON_LOG(WARN, "Invalid argument, ", K(tenant_id), K(ret)); - } else if (OB_FAIL(insts_.get_tenant_cache_info(tenant_id, inst_handles))) { - COMMON_LOG(WARN, "Fail to get all cache info, ", K(ret)); - } - return ret; -} - - void ObKVGlobalCache::print_all_cache_info() { if (OB_UNLIKELY(!inited_)) { @@ -913,18 +897,39 @@ void ObKVGlobalCache::print_all_cache_info() } } -int ObKVGlobalCache::get_all_cache_info(ObIArray &inst_handles) +int ObKVGlobalCache::get_cache_inst_info(const uint64_t tenant_id, ObIArray &inst_handles) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "The ObKVGlobalCache has not been inited", K(ret)); + } else if (OB_INVALID_TENANT_ID == tenant_id) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Invalid argument", K(ret), K(tenant_id)); + } else if (OB_FAIL(insts_.get_cache_info(tenant_id, inst_handles))) { + COMMON_LOG(WARN, "Fail to get all cache info", K(ret)); + } + + return ret; +} + +int ObKVGlobalCache::get_memblock_info(const uint64_t tenant_id, ObIArray &memblock_infos) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; - COMMON_LOG(WARN, "The ObKVGlobalCache has not been inited, ", K(ret)); - } else if (OB_FAIL(insts_.get_all_cache_info(inst_handles))) { - COMMON_LOG(WARN, "Fail to get all cache info, ", K(ret)); + COMMON_LOG(WARN, "The ObKVGlobalCache has not been inited", K(ret)); + } else if (0 == tenant_id) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Invalid argument", K(ret), K(tenant_id)); + } else if (OB_FAIL(store_.get_memblock_info(tenant_id, memblock_infos))) { + COMMON_LOG(WARN, "Fail to get all memblock info", K(ret)); } return ret; } + int ObKVGlobalCache::get_cache_id(const char *cache_name, int64_t &cache_id) { int ret = OB_SUCCESS; diff --git a/src/share/cache/ob_kv_storecache.h b/src/share/cache/ob_kv_storecache.h index b103e8e42..9588215e8 100644 --- a/src/share/cache/ob_kv_storecache.h +++ b/src/share/cache/ob_kv_storecache.h @@ -140,8 +140,8 @@ public: void reload_priority(); int reload_wash_interval(); int64_t get_suitable_bucket_num(); - int get_tenant_cache_info(const uint64_t tenant_id, ObIArray &inst_handles); - int get_all_cache_info(ObIArray &inst_handles); + int get_cache_inst_info(const uint64_t tenant_id, ObIArray &inst_handles); + int get_memblock_info(const uint64_t tenant_id, ObIArray &memblock_infos); void print_all_cache_info(); int erase_cache(); virtual int erase_cache(const uint64_t tenant_id) override; diff --git a/src/share/cache/ob_kvcache_inst_map.cpp b/src/share/cache/ob_kvcache_inst_map.cpp index eccc2414d..47279e22e 100644 --- a/src/share/cache/ob_kvcache_inst_map.cpp +++ b/src/share/cache/ob_kvcache_inst_map.cpp @@ -116,6 +116,11 @@ void ObKVCacheInstHandle::reset() inst_ = NULL; } +bool ObKVCacheInstHandle::is_valid() const +{ + return (nullptr != map_) && (nullptr != inst_); +} + ObKVCacheInstHandle::ObKVCacheInstHandle(const ObKVCacheInstHandle &other) { map_ = other.map_; @@ -425,7 +430,7 @@ int ObKVCacheInstMap::set_priority(const int64_t cache_id, const int64_t old_pri return ret; } -int ObKVCacheInstMap::get_tenant_cache_info(const uint64_t tenant_id, ObIArray &inst_handles) +int ObKVCacheInstMap::get_cache_info(const uint64_t tenant_id, ObIArray &inst_handles) { int ret = OB_SUCCESS; if (!is_inited_) { @@ -437,15 +442,9 @@ int ObKVCacheInstMap::get_tenant_cache_info(const uint64_t tenant_id, ObIArrayfirst.tenant_id_ == tenant_id) { - ObKVCacheInstHandle handle; - handle.inst_ = iter->second; - handle.map_ = this; - handle.inst_->status_.map_size_ = iter->second->node_allocator_.allocated(); - add_inst_ref(handle.inst_); - if (OB_FAIL(inst_handles.push_back(handle))) { - COMMON_LOG(WARN, "Fail to push info to array, ", K(ret)); - } + if (iter->first.tenant_id_ != tenant_id && OB_SYS_TENANT_ID != tenant_id) { + } else if (OB_FAIL(inner_push_inst_handle(iter, inst_handles))) { + COMMON_LOG(WARN, "Fail to inner push cache inst", K(ret)); } } } @@ -521,29 +520,6 @@ void ObKVCacheInstMap::print_all_cache_info() } } -int ObKVCacheInstMap::get_all_cache_info(ObIArray &inst_handles) -{ - int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - COMMON_LOG(WARN, "The ObKVCacheInstMap has not been inited, ", K(ret)); - } else { - DRWLock::RDLockGuard rd_guard(lock_); - for (KVCacheInstMap::iterator iter = inst_map_.begin(); - OB_SUCC(ret) && iter != inst_map_.end(); ++iter) { - ObKVCacheInstHandle handle; - handle.inst_ = iter->second; - handle.map_ = this; - handle.inst_->status_.map_size_ = iter->second->node_allocator_.allocated(); - add_inst_ref(handle.inst_); - if (OB_FAIL(inst_handles.push_back(handle))) { - COMMON_LOG(WARN, "Fail to push info to array, ", K(ret)); - } - } - } - return ret; -} - int ObKVCacheInstMap::set_hold_size(const uint64_t tenant_id, const char *cache_name, const int64_t hold_size) { @@ -718,6 +694,22 @@ void ObKVCacheInstMap::de_inst_ref(ObKVCacheInst *inst) } } +int ObKVCacheInstMap::inner_push_inst_handle(const KVCacheInstMap::iterator &iter, ObIArray &inst_handles) +{ + INIT_SUCC(ret); + + ObKVCacheInstHandle handle; + handle.inst_ = iter->second; + handle.map_ = this; + handle.inst_->status_.map_size_ = iter->second->node_allocator_.allocated(); + add_inst_ref(handle.inst_); + if (OB_FAIL(inst_handles.push_back(handle))) { + COMMON_LOG(WARN, "Fail to push back inst handle to array", K(ret)); + } + + return ret; +} + }//end namespace common }//end namespace oceanbase diff --git a/src/share/cache/ob_kvcache_inst_map.h b/src/share/cache/ob_kvcache_inst_map.h index 9ee4b9da7..63a798d19 100644 --- a/src/share/cache/ob_kvcache_inst_map.h +++ b/src/share/cache/ob_kvcache_inst_map.h @@ -122,6 +122,7 @@ public: ObKVCacheInstHandle(); virtual ~ObKVCacheInstHandle(); void reset(); + bool is_valid() const; inline ObKVCacheInst *get_inst() { return inst_; } ObKVCacheInstHandle(const ObKVCacheInstHandle &other); ObKVCacheInstHandle& operator = (const ObKVCacheInstHandle& other); @@ -146,8 +147,7 @@ public: int clean_garbage_inst(); int refresh_score(); int set_priority(const int64_t cache_id, const int64_t old_priority, const int64_t new_priority); - int get_tenant_cache_info(const uint64_t tenant_id, ObIArray &inst_handles); - int get_all_cache_info(ObIArray &inst_handles); + int get_cache_info(const uint64_t tenant_id, ObIArray &inst_handles); void print_all_cache_info(); void print_tenant_cache_info(const uint64_t tenant_id); @@ -162,6 +162,8 @@ private: typedef ObFixedHashMap TenantMBListMap; void add_inst_ref(ObKVCacheInst *inst); void de_inst_ref(ObKVCacheInst *inst); + int inner_push_inst_handle(const KVCacheInstMap::iterator &iter, ObIArray &inst_handles); +private: DRWLock lock_; KVCacheInstMap inst_map_; ObFixedQueue inst_pool_; diff --git a/src/share/cache/ob_kvcache_pre_warmer.cpp b/src/share/cache/ob_kvcache_pre_warmer.cpp new file mode 100644 index 000000000..a3a00aba6 --- /dev/null +++ b/src/share/cache/ob_kvcache_pre_warmer.cpp @@ -0,0 +1,202 @@ +// Copyright 2022 Alibaba Inc. All Rights Reserved. +// Author: +// lvling + + +#include "ob_kvcache_pre_warmer.h" + +namespace oceanbase +{ +namespace common +{ +/* + * -------------------------------------------- ObDataBlockCachePreWarmer -------------------------------------------- + */ +ObDataBlockCachePreWarmer::ObDataBlockCachePreWarmer() + : base_percentage_(0), + cache_(nullptr), + read_info_(nullptr), + rest_size_(0), + warm_size_percentage_(100), + update_step_(0), + kvpair_(nullptr), + inst_handle_(), + cache_handle_() +{ +} + +ObDataBlockCachePreWarmer::~ObDataBlockCachePreWarmer() +{ + reset(); +} + +void ObDataBlockCachePreWarmer::reset() +{ + cache_ = nullptr; + read_info_ = nullptr; + base_percentage_ = 0; + rest_size_ = 0; + warm_size_percentage_ = 100; + update_step_ = 0; + reuse(); +} + +void ObDataBlockCachePreWarmer::reuse() +{ + kvpair_ = nullptr; + inst_handle_.reset(); + cache_handle_.reset(); +} + +void ObDataBlockCachePreWarmer::init(const ObTableReadInfo &read_info) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(inner_init(DATA_BLOCK_CACHE_PERCENTAGE, read_info, OB_STORE_CACHE.get_block_cache()))) { + COMMON_LOG(WARN, "Fail to inner init data block cache pre warmer", K(ret)); + } +} + +int ObDataBlockCachePreWarmer::reserve_kvpair(const blocksstable::ObMicroBlockDesc µ_block_desc, + const int64_t level) +{ + int ret = OB_SUCCESS; + + int64_t kvpair_size = 0; + if (OB_UNLIKELY(nullptr == cache_ || nullptr == read_info_)) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "The block cache pre warmer is not inited", K(ret), KP(cache_), KP(read_info_)); + } else if (OB_UNLIKELY(!micro_block_desc.is_valid() || level < 0)) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Invalid argument", K(ret), K(micro_block_desc), K(level)); + } else { + if (level < TOP_LEVEL && (rest_size_ <= 0 || !warm_block(level))) { + ret = OB_BUF_NOT_ENOUGH; + } else if (FALSE_IT(reuse())) { + } else if (OB_FAIL(cache_->reserve_kvpair(micro_block_desc, *read_info_, inst_handle_, + cache_handle_, kvpair_, kvpair_size))) { + COMMON_LOG(WARN, "Fail to reserve block cache value", K(ret), K(micro_block_desc), KPC(read_info_)); + } else { + rest_size_ = MAX(0, rest_size_ - kvpair_size); + } + update_rest(); + } + COMMON_LOG(DEBUG, "pre warmer reserve cache value details", K(ret), K(kvpair_size), K(level), + K(micro_block_desc)); + + return ret; +} + +int ObDataBlockCachePreWarmer::update_and_put_kvpair(const blocksstable::ObMicroBlockDesc µ_block_desc) +{ + int ret = OB_SUCCESS; + + blocksstable::ObIMicroBlockCache::BaseBlockCache *kvcache = nullptr; + if (OB_ISNULL(cache_)) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "The block cache pre warmer is not inited", K(ret), KP(cache_)); + } else if (OB_UNLIKELY(!micro_block_desc.is_valid() || !inst_handle_.is_valid() + || !cache_handle_.is_valid() || nullptr == kvpair_)) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Invalid argument", K(ret), K(micro_block_desc), K(inst_handle_), K(cache_handle_), K(kvpair_)); + } else if (cache_->get_cache(kvcache)) { + COMMON_LOG(WARN, "Fail to get block kvcache", K(ret)); + } else if (FALSE_IT(static_cast(kvpair_->key_)->set( + MTL_ID(), + micro_block_desc.macro_id_, + micro_block_desc.block_offset_, + micro_block_desc.buf_size_ + micro_block_desc.header_->header_size_))) { + } else if (OB_FAIL(kvcache->put_kvpair(inst_handle_, kvpair_, cache_handle_))) { + COMMON_LOG(WARN, "Fail to put kvpair into kvcache", K(ret)); + } + COMMON_LOG(DEBUG, "pre warmer build cache key and put details", K(ret), K(MTL_ID()), K(rest_size_), K(update_step_), + K(micro_block_desc), KPC(micro_block_desc.header_)); + // reuse handles outside + + return ret; +} + +int ObDataBlockCachePreWarmer::inner_init(const int64_t percentage, + const ObTableReadInfo &read_info, + blocksstable::ObIMicroBlockCache &block_cache) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(percentage < 0 || !read_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "Invalid argument", K(ret), K(percentage), K(read_info)); + } else { + cache_ = &block_cache; + read_info_ = &read_info; + warm_size_percentage_ = percentage; + inner_update_rest(); + } + + return ret; +} + +void ObDataBlockCachePreWarmer::update_rest() +{ + if (OB_UNLIKELY(update_step_++ >= UPDATE_INTERVAL)) { + inner_update_rest(); + } +} + +void ObDataBlockCachePreWarmer::inner_update_rest() +{ + int64_t free_memory = lib::get_tenant_memory_limit(MTL_ID()) - lib::get_tenant_memory_hold(MTL_ID()); + rest_size_ = free_memory / 100 * warm_size_percentage_; + calculate_base_percentage(free_memory); + update_step_ = 0; + COMMON_LOG(DEBUG, "pre warmer update rest", K(rest_size_), K(base_percentage_), K(free_memory)); +} + +void ObDataBlockCachePreWarmer::calculate_base_percentage(const int64_t free_memory) +{ + base_percentage_ = MIN(free_memory * 200 / lib::get_tenant_memory_limit(MTL_ID()), 50); +} + +bool ObDataBlockCachePreWarmer::warm_block(const int64_t level) +{ + bool bret = true; + + int64_t threshold = MIN(base_percentage_ + 5 * level, 100); + int64_t random_value = ObTimeUtility::fast_current_time() % 100; + if (100 > threshold) { + bret = random_value <= threshold; + } + COMMON_LOG(DEBUG, "block cache pre warmer filter", K(bret), K(base_percentage_), K(level), K(update_step_), + K(random_value), K(threshold)); + + return bret; +} + +/* + * -------------------------------------------- ObIndexBlockCachePreWarmer -------------------------------------------- + */ + +ObIndexBlockCachePreWarmer::ObIndexBlockCachePreWarmer() + : ObDataBlockCachePreWarmer() +{ +} + +ObIndexBlockCachePreWarmer::~ObIndexBlockCachePreWarmer() +{ +} + +void ObIndexBlockCachePreWarmer::init(const ObTableReadInfo &read_info) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(inner_init(INDEX_BLOCK_CACHE_PERCENTAGE, read_info, OB_STORE_CACHE.get_index_block_cache()))) { + COMMON_LOG(WARN, "Fail to inner init index block cache pre warmer", K(ret)); + } +} + +void ObIndexBlockCachePreWarmer::calculate_base_percentage(const int64_t free_memory) +{ + ObDataBlockCachePreWarmer::calculate_base_percentage(free_memory); + base_percentage_ += INDEX_BLOCK_BASE_PERCENTAGE; +} + + +}; // common +}; // oceanbase \ No newline at end of file diff --git a/src/share/cache/ob_kvcache_pre_warmer.h b/src/share/cache/ob_kvcache_pre_warmer.h new file mode 100644 index 000000000..4ff3e71e8 --- /dev/null +++ b/src/share/cache/ob_kvcache_pre_warmer.h @@ -0,0 +1,69 @@ +// Copyright 2022 Alibaba Inc. All Rights Reserved. +// Author: +// lvling + +#ifndef OCEANBASE_COMMON_KVCACHE_PRE_WARMER_H_ +#define OCEANBASE_COMMON_KVCACHE_PRE_WARMER_H_ + +#include "storage/blocksstable/ob_micro_block_cache.h" +#include "storage/blocksstable/ob_storage_cache_suite.h" + + +namespace oceanbase +{ +namespace common +{ + +class ObDataBlockCachePreWarmer +{ +public: + ObDataBlockCachePreWarmer(); + virtual ~ObDataBlockCachePreWarmer(); + void reset(); + void reuse(); + void init(const ObTableReadInfo &read_info); + OB_INLINE bool is_valid() const { return nullptr != cache_; } + int reserve_kvpair(const blocksstable::ObMicroBlockDesc µ_block_desc, const int64_t level = 0); + int update_and_put_kvpair(const blocksstable::ObMicroBlockDesc µ_block_desc); +protected: + int inner_init(const int64_t ratio, const ObTableReadInfo &read_info, blocksstable::ObIMicroBlockCache &block_cache); + void update_rest(); + void inner_update_rest(); + virtual void calculate_base_percentage(const int64_t free_memory); +private: + bool warm_block(const int64_t level); +protected: + int64_t base_percentage_; +private: + static const int64_t DATA_BLOCK_CACHE_PERCENTAGE = 5; + static const int64_t UPDATE_INTERVAL = 50; + static const int64_t TOP_LEVEL = 6; + + blocksstable::ObIMicroBlockCache *cache_; + const ObTableReadInfo *read_info_; + int64_t rest_size_; + int64_t warm_size_percentage_; + int64_t update_step_; + ObKVCachePair *kvpair_; + ObKVCacheInstHandle inst_handle_; + ObKVCacheHandle cache_handle_; +}; + +class ObIndexBlockCachePreWarmer : public ObDataBlockCachePreWarmer +{ +public: + ObIndexBlockCachePreWarmer(); + virtual ~ObIndexBlockCachePreWarmer(); + void init(const ObTableReadInfo &read_info); +protected: + virtual void calculate_base_percentage(const int64_t free_memory) override; +private: + static const int64_t INDEX_BLOCK_CACHE_PERCENTAGE = 2; + static const int64_t INDEX_BLOCK_BASE_PERCENTAGE = 30; +}; + + +}; // common +}; // oceanbase + +#endif // OCEANBASE_COMMON_KVCACHE_PRE_WARMER_H_ \ No newline at end of file diff --git a/src/share/cache/ob_kvcache_store.cpp b/src/share/cache/ob_kvcache_store.cpp index e1ba958c3..db968bf5a 100644 --- a/src/share/cache/ob_kvcache_store.cpp +++ b/src/share/cache/ob_kvcache_store.cpp @@ -184,7 +184,7 @@ bool ObKVCacheStore::add_handle_ref(ObKVMemBlockHandle *mb_handle) return bret; } -int64_t ObKVCacheStore::get_handle_ref_cnt(ObKVMemBlockHandle *mb_handle) +int64_t ObKVCacheStore::get_handle_ref_cnt(const ObKVMemBlockHandle *mb_handle) { if (NULL != mb_handle) { return mb_handle->handle_ref_.get_ref_cnt(); @@ -576,6 +576,9 @@ int ObKVCacheStore::try_flush_washable_mb( can_try_wash = true; } } + } else { + COMMON_LOG(DEBUG, "Cannot flush memblock", K(tenant_id), K(cache_id), K(get_handle_ref_cnt(handle)), + KP(handle), KPC(handle)); } de_handle_ref(handle); } @@ -613,6 +616,10 @@ int ObKVCacheStore::try_flush_washable_mb( } } // qclock guard + INIT_SUCC(tmp_ret); + if (OB_TMP_FAIL(print_tenant_memblock_info(head))) { + COMMON_LOG(WARN, "Fail to print tenant memblock info", K(tmp_ret)); + } if (size_need_washed == INT64_MAX) { // flush ObICacheWasher::ObCacheMemBlock *wash_block = wash_blocks; @@ -647,9 +654,110 @@ int ObKVCacheStore::try_flush_washable_mb( COMMON_LOG(INFO, "ObKVCache try flush washable memblock details", K(ret), K(tenant_id), K(cache_id), K(size_washed), K(size_need_washed)); retire_mb_handles(retire_list); + + COMMON_LOG(DEBUG, "Try flush cache result", K(size_washed), K(size_need_washed), K(tenant_id), K(cache_id), K(ret)); } + return ret; +} +int ObKVCacheStore::inner_push_memblock_info(const ObKVMemBlockHandle &handle, ObIArray &memblock_infos) +{ + INIT_SUCC(ret); + + ObKVCacheStoreMemblockInfo mb_info; + STRNCPY(mb_info.cache_name_, handle.inst_->status_.config_->cache_name_, MAX_CACHE_NAME_LENGTH - 1); + mb_info.tenant_id_ = handle.inst_->tenant_id_; + mb_info.cache_id_ = handle.inst_->cache_id_; + mb_info.ref_count_ = get_handle_ref_cnt(&handle); + mb_info.using_status_ = handle.status_; + mb_info.policy_ = handle.policy_; + mb_info.kv_cnt_ = handle.kv_cnt_; + mb_info.get_cnt_ = handle.get_cnt_; + mb_info.recent_get_cnt_ = handle.recent_get_cnt_; + mb_info.priority_ = handle.inst_->status_.config_->priority_; + mb_info.score_ = handle.score_; + mb_info.align_size_ = handle.mem_block_->get_align_size(); + if (OB_UNLIKELY(0 > snprintf(mb_info.memblock_ptr_, 32, "%p", handle.mem_block_))) { + ret = OB_IO_ERROR; + COMMON_LOG(WARN, "Fail to snprintf memblock pointer", K(ret), K(errno), KERRNOMSG(errno)); + } else if (OB_FAIL(memblock_infos.push_back(mb_info))) { + COMMON_LOG(WARN, "Fail to push memblock info", K(ret), K(mb_info)); + } + + return ret; +} + +int ObKVCacheStore::get_memblock_info(const uint64_t tenant_id, ObIArray &memblock_infos) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "The ObKVCacheStore is not inited", K(ret)); + } else { + for (int i = 0 ; OB_SUCC(ret) && i < cur_mb_num_ ; ++i) { + ObKVMemBlockHandle &handle = mb_handles_[i]; + if (add_handle_ref(&handle)) { + if (tenant_id != handle.inst_->tenant_id_ && OB_SYS_TENANT_ID != tenant_id) { + } else if (OB_FAIL(inner_push_memblock_info(handle, memblock_infos))) { + COMMON_LOG(WARN, "Fail to inner push memblock info", K(ret)); + } + de_handle_ref(&handle); + } + } + } + + return ret; +} + +int ObKVCacheStore::print_tenant_memblock_info(ObDLink* head) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "The ObKVCacheStore is not inited", K(ret)); + } else if (nullptr == head) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "Unexpected nullptr", K(ret), KP(head)); + } else { + ContextParam param; + param.set_mem_attr(common::OB_SERVER_TENANT_ID, ObModIds::OB_TEMP_VARIABLES); + CREATE_WITH_TEMP_CONTEXT(param) { + static const int64_t BUFLEN = 1 << 18; + char *buf = (char *)ctxalp(BUFLEN); + if (nullptr == buf) { + ret = OB_ALLOCATE_MEMORY_FAILED; + COMMON_LOG(WARN, "Fail to allocate memory for print tenant memblock info", K(ret), KP(buf)); + } else { + int64_t ctx_pos = 0; + QClockGuard guard(get_qclock()); + ObKVMemBlockHandle *handle = static_cast(link_next(head)); + while (OB_SUCC(ret) && head != handle) { + if (add_handle_ref(handle)) { + if (OB_FAIL(databuff_printf(buf, BUFLEN, ctx_pos, + "[CACHE] tenant_id=%8ld | cache_id=%8ld | ref_count=%8ld | status=%8d | policy=%8d | kv_cnt=%8ld | get_cnt=%8ld | score=%8lf |\n", + handle->inst_->tenant_id_, + handle->inst_->cache_id_, + get_handle_ref_cnt(handle), + handle->status_, + handle->policy_, + handle->kv_cnt_, + handle->get_cnt_, + handle->score_))) { + COMMON_LOG(WARN, "Fail to print tenant memblock info", K(ret), K(ctx_pos)); + } + de_handle_ref(handle); + } + handle = static_cast(link_next(handle)); + } + if (OB_SUCC(ret)) { + _OB_LOG(WARN, "[CACHE] len: %8ld tenant sync wash failed, cache memblock info: \n%s", ctx_pos, buf); + } + } + } + } return ret; } @@ -859,7 +967,7 @@ bool ObKVCacheStore::compute_tenant_wash_size() if (OB_FAIL(mem_limit_getter_->get_all_tenant_id(tenant_ids_))) { COMMON_LOG(WARN, "Fail to get all tenant ids, ", K(ret)); - } else if (OB_FAIL(insts_->get_all_cache_info(inst_handles_))) { + } else if (OB_FAIL(insts_->get_cache_info(OB_SYS_TENANT_ID, inst_handles_))) { COMMON_LOG(WARN, "Fail to get all cache infos, ", K(ret)); } diff --git a/src/share/cache/ob_kvcache_store.h b/src/share/cache/ob_kvcache_store.h index d8e7218ae..8197ccd49 100644 --- a/src/share/cache/ob_kvcache_store.h +++ b/src/share/cache/ob_kvcache_store.h @@ -74,13 +74,13 @@ public: bool wash(); int get_avg_cache_item_size(const uint64_t tenant_id, const int64_t cache_id, int64_t &avg_cache_item_size); - + int get_washable_size(const uint64_t tenant_id, int64_t &washable_size, const int64_t ratio = 0); void flush_washable_mbs(); void flush_washable_mbs(const uint64_t tenant_id); void flush_washable_mbs(const int64_t cache_id); void flush_washable_mbs(const uint64_t tenant_id, const int64_t cache_id); - + int sync_wash_mbs(const uint64_t tenant_id, const int64_t wash_size, const bool wash_single_mb, lib::ObICacheWasher::ObCacheMemBlock *&wash_blocks); @@ -95,7 +95,7 @@ public: virtual bool add_handle_ref(ObKVMemBlockHandle *mb_handle, const uint32_t seq_num); virtual bool add_handle_ref(ObKVMemBlockHandle *mb_handle); virtual void de_handle_ref(ObKVMemBlockHandle *mb_handle); - int64_t get_handle_ref_cnt(ObKVMemBlockHandle *mb_handle); + int64_t get_handle_ref_cnt(const ObKVMemBlockHandle *mb_handle); virtual int64_t get_block_size() const { return block_size_; } // implement functions of ObIMBWrapperMgr virtual int alloc(ObKVCacheInst &inst, const enum ObKVCachePolicy policy, @@ -104,14 +104,17 @@ public: virtual ObKVMemBlockHandle *&get_curr_mb(ObKVCacheInst &inst, const enum ObKVCachePolicy policy); virtual bool mb_status_match(ObKVCacheInst &inst, const enum ObKVCachePolicy policy, ObKVMemBlockHandle *mb_handle); + int get_memblock_info(const uint64_t tenant_id, ObIArray &memblock_infos); + int print_tenant_memblock_info(ObDLink *link); static const int64_t MAX_RATIO = 6; - + private: int try_flush_washable_mb( const uint64_t tenant_id, lib::ObICacheWasher::ObCacheMemBlock *&wash_blocks, const int64_t cache_id = -1, const int64_t size_need_washed = INT64_MAX); + int inner_push_memblock_info(const ObKVMemBlockHandle &handle, ObIArray &memblock_infos); private: static const int64_t SYNC_WASH_MB_TIMEOUT_US = 100 * 1000; // 100ms diff --git a/src/share/cache/ob_kvcache_struct.cpp b/src/share/cache/ob_kvcache_struct.cpp index 3f3fde9e4..15a489100 100644 --- a/src/share/cache/ob_kvcache_struct.cpp +++ b/src/share/cache/ob_kvcache_struct.cpp @@ -329,6 +329,17 @@ void ObKVMemBlockHandle::set_full(const double base_mb_score) score_ += base_mb_score; ATOMIC_STORE((uint32_t*)(&status_), FULL); } + + +/* + * -------------------------------------------------ObKVCacheStoreMemblockInfo------------------------------------------------ + */ +bool ObKVCacheStoreMemblockInfo::is_valid() const +{ + return tenant_id_ != OB_INVALID_TENANT_ID && cache_id_ >= 0; +} + + }//end namespace common }//end namespace oceanbase diff --git a/src/share/cache/ob_kvcache_struct.h b/src/share/cache/ob_kvcache_struct.h index 7ff931193..ddde1eebe 100644 --- a/src/share/cache/ob_kvcache_struct.h +++ b/src/share/cache/ob_kvcache_struct.h @@ -213,6 +213,47 @@ struct ObKVCacheInfo TO_STRING_KV(K_(inst_key), K_(status)); }; +struct ObKVCacheStoreMemblockInfo +{ +public: + ObKVCacheStoreMemblockInfo() + : tenant_id_(OB_INVALID_TENANT_ID), + cache_id_(-1), + ref_count_(-1), + using_status_(-1), + policy_(-1), + kv_cnt_(-1), + get_cnt_(-1), + recent_get_cnt_(-1), + priority_(0), + score_(0), + align_size_(-1), + cache_name_(), + memblock_ptr_() + { + memset(cache_name_, 0, MAX_CACHE_NAME_LENGTH); + memset(memblock_ptr_, 0, 32); + } + ~ObKVCacheStoreMemblockInfo() = default; + bool is_valid() const; + TO_STRING_KV(K_(tenant_id), K_(cache_id), K_(ref_count), K_(using_status), K_(policy), K_(kv_cnt), K_(get_cnt), + K_(recent_get_cnt), K_(priority), K_(score), K_(align_size), KP_(cache_name), KP_(memblock_ptr)); +public: + uint64_t tenant_id_; + int64_t cache_id_; + int64_t ref_count_; + int64_t using_status_; + int64_t policy_; + int64_t kv_cnt_; + int64_t get_cnt_; + int64_t recent_get_cnt_; + int64_t priority_; + double score_; + int64_t align_size_; + char cache_name_[MAX_CACHE_NAME_LENGTH]; + char memblock_ptr_[32]; // store memblock address by char[] +}; + class ObIMBHandleAllocator { public: diff --git a/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp b/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp index ad054fdf5..280ac20d6 100644 --- a/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp @@ -5019,6 +5019,285 @@ int ObInnerTableSchema::all_virtual_storage_meta_memory_status_schema(ObTableSch return ret; } +int ObInnerTableSchema::all_virtual_kvcache_store_memblock_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(0); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("cache_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("cache_name", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_KVCACHE_NAME_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("memblock_ptr", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + 32, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ref_count", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("status", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("policy", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("kv_cnt", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("get_cnt", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("recent_get_cnt", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("priority", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("score", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 3, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("align_size", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + int ObInnerTableSchema::all_virtual_mock_fk_parent_table_schema(ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index 75e825413..6b09e4349 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -826,6 +826,7 @@ public: static int all_virtual_privilege_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_tablet_pointer_status_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_storage_meta_memory_status_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_kvcache_store_memblock_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_mock_fk_parent_table_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_mock_fk_parent_table_history_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_mock_fk_parent_table_column_schema(share::schema::ObTableSchema &table_schema); @@ -2745,6 +2746,7 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_privilege_schema, ObInnerTableSchema::all_virtual_tablet_pointer_status_schema, ObInnerTableSchema::all_virtual_storage_meta_memory_status_schema, + ObInnerTableSchema::all_virtual_kvcache_store_memblock_schema, ObInnerTableSchema::all_virtual_mock_fk_parent_table_schema, ObInnerTableSchema::all_virtual_mock_fk_parent_table_history_schema, ObInnerTableSchema::all_virtual_mock_fk_parent_table_column_schema, @@ -7105,6 +7107,7 @@ const uint64_t cluster_distributed_vtables [] = { OB_ALL_VIRTUAL_TABLET_DDL_KV_INFO_TID, OB_ALL_VIRTUAL_TABLET_POINTER_STATUS_TID, OB_ALL_VIRTUAL_STORAGE_META_MEMORY_STATUS_TID, + OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_TID, OB_ALL_VIRTUAL_KVCACHE_HANDLE_LEAK_INFO_TID, OB_ALL_VIRTUAL_SCHEMA_MEMORY_TID, OB_ALL_VIRTUAL_SCHEMA_SLOT_TID, diff --git a/src/share/inner_table/ob_inner_table_schema_constants.h b/src/share/inner_table/ob_inner_table_schema_constants.h index ea712d9bb..7c9535d5a 100644 --- a/src/share/inner_table/ob_inner_table_schema_constants.h +++ b/src/share/inner_table/ob_inner_table_schema_constants.h @@ -572,6 +572,7 @@ const uint64_t OB_ALL_VIRTUAL_TABLET_DDL_KV_INFO_TID = 12315; // "__all_virtual_ const uint64_t OB_ALL_VIRTUAL_PRIVILEGE_TID = 12316; // "__all_virtual_privilege" const uint64_t OB_ALL_VIRTUAL_TABLET_POINTER_STATUS_TID = 12317; // "__all_virtual_tablet_pointer_status" const uint64_t OB_ALL_VIRTUAL_STORAGE_META_MEMORY_STATUS_TID = 12318; // "__all_virtual_storage_meta_memory_status" +const uint64_t OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_TID = 12319; // "__all_virtual_kvcache_store_memblock" const uint64_t OB_ALL_VIRTUAL_MOCK_FK_PARENT_TABLE_TID = 12320; // "__all_virtual_mock_fk_parent_table" const uint64_t OB_ALL_VIRTUAL_MOCK_FK_PARENT_TABLE_HISTORY_TID = 12321; // "__all_virtual_mock_fk_parent_table_history" const uint64_t OB_ALL_VIRTUAL_MOCK_FK_PARENT_TABLE_COLUMN_TID = 12322; // "__all_virtual_mock_fk_parent_table_column" @@ -2475,6 +2476,7 @@ const char *const OB_ALL_VIRTUAL_TABLET_DDL_KV_INFO_TNAME = "__all_virtual_table const char *const OB_ALL_VIRTUAL_PRIVILEGE_TNAME = "__all_virtual_privilege"; const char *const OB_ALL_VIRTUAL_TABLET_POINTER_STATUS_TNAME = "__all_virtual_tablet_pointer_status"; const char *const OB_ALL_VIRTUAL_STORAGE_META_MEMORY_STATUS_TNAME = "__all_virtual_storage_meta_memory_status"; +const char *const OB_ALL_VIRTUAL_KVCACHE_STORE_MEMBLOCK_TNAME = "__all_virtual_kvcache_store_memblock"; const char *const OB_ALL_VIRTUAL_MOCK_FK_PARENT_TABLE_TNAME = "__all_virtual_mock_fk_parent_table"; const char *const OB_ALL_VIRTUAL_MOCK_FK_PARENT_TABLE_HISTORY_TNAME = "__all_virtual_mock_fk_parent_table_history"; const char *const OB_ALL_VIRTUAL_MOCK_FK_PARENT_TABLE_COLUMN_TNAME = "__all_virtual_mock_fk_parent_table_column"; diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index e4d871581..f588ab99e 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -10697,7 +10697,34 @@ def_table_schema( vtable_route_policy = 'distributed', ) -# 12319: __all_virtual_kvcache_store_memblock +def_table_schema( + owner = 'zhaoruizhe.zrz', + table_name = '__all_virtual_kvcache_store_memblock', + table_id = '12319', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + rowkey_columns = [ + ], + normal_columns = [ + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH', 'false'), + ('svr_port', 'int'), + ('tenant_id', 'int'), + ('cache_id', 'int'), + ('cache_name', 'varchar:OB_MAX_KVCACHE_NAME_LENGTH'), + ('memblock_ptr', 'varchar:32'), + ('ref_count', 'int'), + ('status', 'int'), + ('policy', 'int'), + ('kv_cnt', 'int'), + ('get_cnt', 'int'), + ('recent_get_cnt', 'int'), + ('priority', 'int'), + ('score', 'number:38:3'), + ('align_size', 'int'), + ], + vtable_route_policy = 'distributed', + partition_columns = ['svr_ip', 'svr_port'], +) def_table_schema(**gen_iterate_virtual_table_def( table_id = '12320', diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index ded0f4db0..a13049630 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -3426,7 +3426,12 @@ int ObStaticEngineCG::generate_normal_tsc(ObLogTableScan &op, ObTableScanSpec &s CK(OB_NOT_NULL(schema_guard)); if (OB_SUCC(ret) && NULL != op.get_pre_query_range()) { OZ(spec.tsc_ctdef_.pre_query_range_.deep_copy(*op.get_pre_query_range())); - op.get_pre_query_range()->is_get(spec.tsc_ctdef_.scan_ctdef_.is_get_); + if (OB_FAIL(ret)) { + } else if (!op.is_skip_scan() && OB_FAIL(spec.tsc_ctdef_.pre_query_range_.reset_skip_scan_range())) { + LOG_WARN("reset skip scan range failed", K(ret)); + } else if (OB_FAIL(spec.tsc_ctdef_.pre_query_range_.is_get(spec.tsc_ctdef_.scan_ctdef_.is_get_))) { + LOG_WARN("extract the query range whether get failed", K(ret)); + } } bool is_equal_and = true; @@ -3462,8 +3467,11 @@ int ObStaticEngineCG::generate_normal_tsc(ObLogTableScan &op, ObTableScanSpec &s } root = root->and_next_; } - spec.tsc_ctdef_.pre_query_range_.set_is_equal_and(is_equal_and); - spec.tsc_ctdef_.pre_query_range_.get_equal_offs().assign(equal_offs); + // TODO @baixian.zr the above optimization is overrided by ObTscCgService::generate_tsc_ctdef before this commit + // but after the deep copy of pre_query_range_ is removed in ObTscCgService::generate_tsc_ctdef, + // error is returned in such sql 'set global x=y', should fix this; + // spec.tsc_ctdef_.pre_query_range_.set_is_equal_and(is_equal_and); + // spec.tsc_ctdef_.pre_query_range_.get_equal_offs().assign(equal_offs); OZ(ob_write_string(phy_plan_->get_allocator(), op.get_table_name(), tbl_name)); OZ(ob_write_string(phy_plan_->get_allocator(), op.get_index_name(), index_name)); diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index dbd951c24..1ef799490 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -28,30 +28,21 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc { int ret = OB_SUCCESS; ObDASScanCtDef &scan_ctdef = tsc_ctdef.scan_ctdef_; - if (op.get_pre_query_range() != nullptr) { - if (OB_FAIL(tsc_ctdef.pre_query_range_.deep_copy(*op.get_pre_query_range()))) { - LOG_WARN("deep copy tsc ctdef pre query range failed", K(ret)); - } else if (OB_FAIL(op.get_pre_query_range()->is_get(scan_ctdef.is_get_))) { - LOG_WARN("extract the query range whether get failed", K(ret)); - } + ObQueryFlag query_flag; + if (op.is_need_feedback() && + (op.get_plan()->get_optimizer_context().get_phy_plan_type() == OB_PHY_PLAN_LOCAL || + op.get_plan()->get_optimizer_context().get_phy_plan_type() == OB_PHY_PLAN_REMOTE)) { + ++(cg_.phy_plan_->get_access_table_num()); + query_flag.is_need_feedback_ = true; } - if (OB_SUCC(ret)) { - ObQueryFlag query_flag; - if (op.is_need_feedback() && - (op.get_plan()->get_optimizer_context().get_phy_plan_type() == OB_PHY_PLAN_LOCAL || - op.get_plan()->get_optimizer_context().get_phy_plan_type() == OB_PHY_PLAN_REMOTE)) { - ++(cg_.phy_plan_->get_access_table_num()); - query_flag.is_need_feedback_ = true; - } - ObOrderDirection scan_direction = op.get_scan_direction(); - if (is_descending_direction(scan_direction)) { - query_flag.scan_order_ = ObQueryFlag::Reverse; - } else { - query_flag.scan_order_ = ObQueryFlag::Forward; - } - tsc_ctdef.scan_flags_ = query_flag; + ObOrderDirection scan_direction = op.get_scan_direction(); + if (is_descending_direction(scan_direction)) { + query_flag.scan_order_ = ObQueryFlag::Reverse; + } else { + query_flag.scan_order_ = ObQueryFlag::Forward; } + tsc_ctdef.scan_flags_ = query_flag; if (OB_SUCC(ret) && (OB_NOT_NULL(op.get_flashback_query_expr()))) { if (OB_FAIL(cg_.generate_rt_expr(*op.get_flashback_query_expr(), tsc_ctdef.flashback_item_.flashback_query_expr_))) { diff --git a/src/sql/das/ob_das_scan_op.cpp b/src/sql/das/ob_das_scan_op.cpp index 203432b47..6474a17f2 100644 --- a/src/sql/das/ob_das_scan_op.cpp +++ b/src/sql/das/ob_das_scan_op.cpp @@ -592,6 +592,7 @@ int ObDASScanOp::reuse_iter() LOG_WARN("reuse lookup iterator failed", K(ret)); } else { scan_param_.key_ranges_.reuse(); + scan_param_.ss_key_ranges_.reuse(); scan_param_.mbr_filters_.reuse(); } return ret; @@ -612,7 +613,8 @@ int ObDASScanOp::set_lookup_tablet_id(const ObTabletID &tablet_id) OB_SERIALIZE_MEMBER((ObDASScanOp, ObIDASTaskOp), scan_param_.key_ranges_, scan_ctdef_, - scan_rtdef_); + scan_rtdef_, + scan_param_.ss_key_ranges_); ObDASScanResult::ObDASScanResult() : ObIDASTaskResult(), @@ -1179,6 +1181,7 @@ int ObLocalIndexLookupOp::reset_lookup_state(bool need_switch_param) if (lookup_iter_ != nullptr) { scan_param_.need_switch_param_ = need_switch_param; scan_param_.key_ranges_.reuse(); + scan_param_.ss_key_ranges_.reuse(); } if (OB_SUCC(ret) && lookup_memctx_ != nullptr) { lookup_memctx_->reset_remain_one_page(); diff --git a/src/sql/engine/px/ob_granule_pump.cpp b/src/sql/engine/px/ob_granule_pump.cpp index 55e68b99b..4ffbc2989 100644 --- a/src/sql/engine/px/ob_granule_pump.cpp +++ b/src/sql/engine/px/ob_granule_pump.cpp @@ -39,10 +39,13 @@ int ObGITaskSet::get_task_at_pos(ObGranuleTaskInfo &info, const int64_t &pos) co int64_t cur_idx = gi_task_set_.at(pos).idx_; info.tablet_loc_ = const_cast(gi_task_set_.at(pos).tablet_loc_); info.ranges_.reset(); + info.ss_ranges_.reset(); for (int64_t i = pos; OB_SUCC(ret) && i < gi_task_set_.count(); i++) { if (cur_idx == gi_task_set_.at(i).idx_) { if (OB_FAIL(info.ranges_.push_back(gi_task_set_.at(i).range_))) { LOG_WARN("push back ranges failed", K(ret)); + } else if (OB_FAIL(info.ss_ranges_.push_back(gi_task_set_.at(i).ss_range_))) { + LOG_WARN("push back skip scan ranges failed", K(ret)); } } else { break; @@ -89,10 +92,13 @@ int ObGITaskSet::get_next_gi_task(ObGranuleTaskInfo &info) int64_t cur_idx = gi_task_set_.at(cur_pos_).idx_; info.tablet_loc_ = gi_task_set_.at(cur_pos_).tablet_loc_; info.ranges_.reset(); + info.ss_ranges_.reset(); for (int64_t i = cur_pos_; OB_SUCC(ret) && i < gi_task_set_.count(); i++) { if (cur_idx == gi_task_set_.at(i).idx_) { if (OB_FAIL(info.ranges_.push_back(gi_task_set_.at(i).range_))) { LOG_WARN("push back ranges failed", K(ret)); + } else if (OB_FAIL(info.ss_ranges_.push_back(gi_task_set_.at(i).ss_range_))) { + LOG_WARN("push back skip scan ranges failed", K(ret)); } if (i == (gi_task_set_.count() - 1)) { cur_pos_ = gi_task_set_.count(); @@ -199,25 +205,30 @@ int ObGITaskSet::set_block_order(bool desc) int ObGITaskSet::construct_taskset(ObIArray &taskset_tablets, ObIArray &taskset_ranges, + ObIArray &ss_ranges, ObIArray &taskset_idxs, ObGIRandomType random_type) { int ret = OB_SUCCESS; - if (taskset_tablets.count() != taskset_ranges.count() || - taskset_tablets.count() != taskset_idxs.count() || - taskset_tablets.empty()) { + if (OB_UNLIKELY(taskset_tablets.count() != taskset_ranges.count() || + taskset_tablets.count() != taskset_idxs.count() || + taskset_tablets.empty() || ss_ranges.count() > 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("taskest count err", K(taskset_tablets.count()), K(taskset_ranges), - K(taskset_idxs)); + K(taskset_idxs), + K(ss_ranges.count())); } else if (!(GI_RANDOM_NONE <= random_type && random_type <= GI_RANDOM_RANGE)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("random type err", K(random_type)); } else if (gi_task_set_.empty() && OB_FAIL(gi_task_set_.reserve(taskset_tablets.count()))) { LOG_WARN("failed to prepare allocate", K(ret)); } else { + ObNewRange whole_range; + whole_range.set_whole_range(); + ObNewRange &ss_range = ss_ranges.empty() ? whole_range : ss_ranges.at(0); for (int64_t i = 0; OB_SUCC(ret) && i < taskset_tablets.count(); i++) { - ObGITaskInfo task_info(taskset_tablets.at(i), taskset_ranges.at(i), taskset_idxs.at(i)); + ObGITaskInfo task_info(taskset_tablets.at(i), taskset_ranges.at(i), ss_range, taskset_idxs.at(i)); if (random_type != ObGITaskSet::GI_RANDOM_NONE) { task_info.hash_value_ = common::murmurhash(&task_info.idx_, sizeof(task_info.idx_), 0); } @@ -673,6 +684,7 @@ int ObGranuleSplitter::split_gi_task(ObGranulePumpArgs &args, { int ret = OB_SUCCESS; ObSEArray ranges; + ObSEArray ss_ranges; DASTabletLocSEArray taskset_tablets; ObSEArray taskset_ranges; ObSEArray taskset_idxs; @@ -686,7 +698,8 @@ int ObGranuleSplitter::split_gi_task(ObGranulePumpArgs &args, } else if (tablets.count() <= 0 || OB_ISNULL(args.ctx_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("the task has an empty tablets", K(ret), K(tablets)); - } else if (OB_FAIL(get_query_range(*args.ctx_, tsc->get_query_range(), ranges, table_id, op_id, partition_granule, args.with_param_down()))) { + } else if (OB_FAIL(get_query_range(*args.ctx_, tsc->get_query_range(), ranges, ss_ranges, + table_id, op_id, partition_granule, args.with_param_down()))) { LOG_WARN("get query range failed", K(ret)); } else if (ranges.count() <= 0) { ret = OB_ERR_UNEXPECTED; @@ -703,9 +716,14 @@ int ObGranuleSplitter::split_gi_task(ObGranulePumpArgs &args, taskset_idxs, range_independent))) { LOG_WARN("failed to get graunle task", K(ret), K(ranges), K(tablets)); - } else if (OB_FAIL(task_set.construct_taskset(taskset_tablets, taskset_ranges, taskset_idxs, random_type))) { + } else if (OB_FAIL(task_set.construct_taskset(taskset_tablets, + taskset_ranges, + ss_ranges, + taskset_idxs, + random_type))) { LOG_WARN("construct taskset failed", K(ret), K(taskset_tablets), K(taskset_ranges), + K(ss_ranges), K(taskset_idxs), K(random_type)); } @@ -715,6 +733,7 @@ int ObGranuleSplitter::split_gi_task(ObGranulePumpArgs &args, int ObGranuleSplitter::get_query_range(ObExecContext &ctx, const ObQueryRange &tsc_pre_query_range, ObIArray &ranges, + ObIArray &ss_ranges, int64_t table_id, int64_t op_id, bool partition_granule, @@ -722,6 +741,7 @@ int ObGranuleSplitter::get_query_range(ObExecContext &ctx, { int ret = OB_SUCCESS; ObQueryRangeArray scan_ranges; + ObQueryRangeArray skip_scan_ranges; ObGetMethodArray get_method; ObPhysicalPlanCtx *plan_ctx = nullptr; bool has_extract_query_range = false; @@ -757,6 +777,12 @@ int ObGranuleSplitter::get_query_range(ObExecContext &ctx, get_method, ObBasicSessionInfo::create_dtc_params(ctx.get_my_session())))) { LOG_WARN("failed to get scan ranges", K(ret)); + } else if (OB_FAIL(tsc_pre_query_range.get_ss_tablet_ranges( + ctx.get_allocator(), + ctx, + skip_scan_ranges, + ObBasicSessionInfo::create_dtc_params(ctx.get_my_session())))) { + LOG_WARN("failed to final extract index skip query range", K(ret)); } else { has_extract_query_range = true; } @@ -783,12 +809,37 @@ int ObGranuleSplitter::get_query_range(ObExecContext &ctx, get_method, ObBasicSessionInfo::create_dtc_params(ctx.get_my_session())))) { LOG_WARN("failed to get scan ranges", K(ret)); + } else if (OB_FAIL(tsc_pre_query_range.get_ss_tablet_ranges( + ctx.get_allocator(), + ctx, + skip_scan_ranges, + ObBasicSessionInfo::create_dtc_params(ctx.get_my_session())))) { + LOG_WARN("failed to final extract index skip query range", K(ret)); } else { has_extract_query_range = true; } } - LOG_DEBUG("gi get the scan range", K(ret), K(partition_granule), K(has_extract_query_range), K(scan_ranges)); + LOG_DEBUG("gi get the scan range", K(ret), K(partition_granule), K(has_extract_query_range), + K(scan_ranges), K(skip_scan_ranges)); + if (OB_SUCC(ret)) { + // index skip scan, ranges from extract_pre_query_range/get_ss_tablet_ranges, + // prefix range and postfix range is single range + ObNewRange *ss_range = NULL; + ObNewRange whole_range; + whole_range.set_whole_range(); + if (!skip_scan_ranges.empty() && + (OB_ISNULL(skip_scan_ranges.at(0)) || + OB_UNLIKELY(1 != skip_scan_ranges.count() || 1 != scan_ranges.count()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index skip scan range", K(ret), K(scan_ranges), K(skip_scan_ranges)); + } else if (OB_FAIL(ss_ranges.push_back(skip_scan_ranges.empty() + ? whole_range : *skip_scan_ranges.at(0)))) { + LOG_WARN("push back ranges failed", K(ret)); + } else { + ss_ranges.at(ss_ranges.count() - 1).table_id_ = table_id; + } + } for (int64_t i = 0; i < scan_ranges.count() && OB_SUCC(ret); ++i) { if (OB_ISNULL(scan_ranges.at(i))) { ret = OB_ERR_UNEXPECTED; @@ -1187,6 +1238,7 @@ int ObPartitionWiseGranuleSplitter::split_insert_gi_task(ObGranulePumpArgs &args // insert的每一个partition对应的区间默认是[min_rowkey,max_rowkey] ObNewRange each_partition_range; ObSEArray ranges; + ObSEArray empty_ss_ranges; DASTabletLocSEArray taskset_tablets; ObSEArray taskset_ranges; ObSEArray taskset_idxs; @@ -1213,7 +1265,8 @@ int ObPartitionWiseGranuleSplitter::split_insert_gi_task(ObGranulePumpArgs &args taskset_idxs, range_independent))) { LOG_WARN("failed to get insert graunle task", K(ret), K(each_partition_range), K(tablets)); - } else if (OB_FAIL(task_set.construct_taskset(taskset_tablets, taskset_ranges, taskset_idxs, random_type))) { + } else if (OB_FAIL(task_set.construct_taskset(taskset_tablets, taskset_ranges, + empty_ss_ranges, taskset_idxs, random_type))) { // INSERT的任务划分一定是 partition wise的,并且INSERT算子每次rescan仅仅需要每一个task对应的partition key, // `ranges`,`idx`等任务参数是不需要 LOG_WARN("construct taskset failed", K(ret), K(taskset_tablets), diff --git a/src/sql/engine/px/ob_granule_pump.h b/src/sql/engine/px/ob_granule_pump.h index 7dd88ea7c..bd901f615 100644 --- a/src/sql/engine/px/ob_granule_pump.h +++ b/src/sql/engine/px/ob_granule_pump.h @@ -94,15 +94,20 @@ class ObGITaskSet { public: struct ObGITaskInfo { - ObGITaskInfo() : tablet_loc_(nullptr), range_(), idx_(0), hash_value_(0) {} - ObGITaskInfo(ObDASTabletLoc *tablet_loc, common::ObNewRange range, int64_t idx) : - tablet_loc_(tablet_loc), range_(range), idx_(idx), hash_value_(0) {} + ObGITaskInfo() : tablet_loc_(nullptr), range_(), ss_range_(), idx_(0), hash_value_(0) {} + ObGITaskInfo(ObDASTabletLoc *tablet_loc, + common::ObNewRange range, + common::ObNewRange ss_range, + int64_t idx) : + tablet_loc_(tablet_loc), range_(range), ss_range_(ss_range), idx_(idx), hash_value_(0) {} TO_STRING_KV(KPC(tablet_loc_), K(range_), + K(ss_range_), K(idx_), K(hash_value_)); ObDASTabletLoc *tablet_loc_; common::ObNewRange range_; + common::ObNewRange ss_range_; int64_t idx_; uint64_t hash_value_; }; @@ -124,6 +129,7 @@ public: int set_block_order(bool asc); int construct_taskset(common::ObIArray &taskset_tablets, common::ObIArray &taskset_ranges, + common::ObIArray &ss_ranges, common::ObIArray &taskset_idxs, ObGIRandomType random_type); public: @@ -169,6 +175,7 @@ public : static int get_query_range(ObExecContext &ctx, const ObQueryRange &tsc_pre_query_range, ObIArray &ranges, + ObIArray &ss_ranges, int64_t table_id, int64_t op_id, bool partition_granule, diff --git a/src/sql/engine/table/ob_table_scan_op.cpp b/src/sql/engine/table/ob_table_scan_op.cpp index db804798a..831bc47c2 100644 --- a/src/sql/engine/table/ob_table_scan_op.cpp +++ b/src/sql/engine/table/ob_table_scan_op.cpp @@ -237,6 +237,7 @@ void ObTableScanOpInput::reset() { tablet_loc_ = nullptr; key_ranges_.reset(); + ss_key_ranges_.reset(); mbr_filters_.reset(); range_array_pos_.reset(); not_need_extract_query_range_ = false; @@ -247,7 +248,8 @@ OB_DEF_SERIALIZE_SIZE(ObTableScanOpInput) int len = 0; LST_DO_CODE(OB_UNIS_ADD_LEN, key_ranges_, - not_need_extract_query_range_); + not_need_extract_query_range_, + ss_key_ranges_); return len; } @@ -256,7 +258,8 @@ OB_DEF_SERIALIZE(ObTableScanOpInput) int ret = OB_SUCCESS; LST_DO_CODE(OB_UNIS_ENCODE, key_ranges_, - not_need_extract_query_range_); + not_need_extract_query_range_, + ss_key_ranges_); return ret; } @@ -274,6 +277,18 @@ OB_DEF_DESERIALIZE(ObTableScanOpInput) exec_ctx_.get_allocator(), buf, data_len, pos))) { LOG_WARN("range deserialize failed", K(ret)); } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(serialization::decode_vi64(buf, data_len, pos, &cnt))) { + LOG_WARN("decode failed", K(ret)); + } else if (OB_FAIL(ss_key_ranges_.prepare_allocate(cnt))) { + LOG_WARN("array prepare allocate failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < cnt; i++) { + if (OB_FAIL(ss_key_ranges_.at(i).deserialize(exec_ctx_.get_allocator(), + buf, data_len, pos))) { + LOG_WARN("range deserialize failed", K(ret)); + } + } } if (OB_SUCC(ret)) { LST_DO_CODE(OB_UNIS_DECODE, not_need_extract_query_range_); @@ -761,6 +776,7 @@ int ObTableScanOp::prepare_all_das_tasks() LOG_WARN("prepare das task failed", K(ret)); } else { MY_INPUT.key_ranges_.reuse(); + MY_INPUT.ss_key_ranges_.reuse(); } } } @@ -962,7 +978,7 @@ int ObTableScanOp::prepare_batch_scan_range() LOG_WARN("prepare single scan range failed", K(ret)); } } - LOG_DEBUG("after prepare batch scan range", K(MY_INPUT.key_ranges_)); + LOG_DEBUG("after prepare batch scan range", K(MY_INPUT.key_ranges_), K(MY_INPUT.ss_key_ranges_)); return ret; } @@ -996,6 +1012,7 @@ int ObTableScanOp::prepare_single_scan_range(int64_t group_idx) { int ret = OB_SUCCESS; ObQueryRangeArray key_ranges; + ObQueryRangeArray ss_key_ranges; ObGetMethodArray get_method; ObPhysicalPlanCtx *plan_ctx = GET_PHY_PLAN_CTX(ctx_); ObIAllocator &range_allocator = (table_rescan_allocator_ != nullptr ? @@ -1039,20 +1056,51 @@ int ObTableScanOp::prepare_single_scan_range(int64_t group_idx) get_method, ObBasicSessionInfo::create_dtc_params(ctx_.get_my_session())))) { LOG_WARN("failed to extract pre query ranges", K(ret)); + } else if (OB_FAIL(MY_CTDEF.pre_query_range_.get_ss_tablet_ranges(range_allocator, + ctx_, + ss_key_ranges, + ObBasicSessionInfo::create_dtc_params(ctx_.get_my_session())))) { + LOG_WARN("failed to final extract index skip query range", K(ret)); } } - for (int64_t i = 0; OB_SUCC(ret) && i < key_ranges.count(); ++i) { - ObNewRange *key_range = key_ranges.at(i); - key_range->table_id_ = MY_CTDEF.scan_ctdef_.ref_table_id_; - key_range->group_idx_ = group_idx; - if (OB_FAIL(MY_INPUT.key_ranges_.push_back(*key_range))) { - LOG_WARN("store key range in TSC input failed", K(ret)); + if (OB_FAIL(ret)) { + } else if (!ss_key_ranges.empty()) { + // index skip scan, ranges from extract_pre_query_range/get_ss_tablet_ranges, + // prefix range and postfix range is single range + if (1 != ss_key_ranges.count() || 1 != key_ranges.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index skip scan range", K(ret), K(key_ranges), K(ss_key_ranges)); + } else { + key_ranges.at(0)->table_id_ = MY_CTDEF.scan_ctdef_.ref_table_id_; + key_ranges.at(0)->group_idx_ = group_idx; + ss_key_ranges.at(0)->table_id_ = MY_CTDEF.scan_ctdef_.ref_table_id_; + ss_key_ranges.at(0)->group_idx_ = group_idx; + if (OB_FAIL(MY_INPUT.key_ranges_.push_back(*key_ranges.at(0))) + || OB_FAIL(MY_INPUT.ss_key_ranges_.push_back(*ss_key_ranges.at(0)))) { + LOG_WARN("store key range in TSC input failed", K(ret)); + } + } + } else { + ObNewRange whole_range; + ObNewRange *key_range = NULL; + whole_range.set_whole_range(); + whole_range.table_id_ = MY_CTDEF.scan_ctdef_.ref_table_id_; + whole_range.group_idx_ = group_idx; + for (int64_t i = 0; OB_SUCC(ret) && i < key_ranges.count(); ++i) { + key_range = key_ranges.at(i); + key_range->table_id_ = MY_CTDEF.scan_ctdef_.ref_table_id_; + key_range->group_idx_ = group_idx; + if (OB_FAIL(MY_INPUT.key_ranges_.push_back(*key_range)) + || OB_FAIL(MY_INPUT.ss_key_ranges_.push_back(whole_range))) { + LOG_WARN("store key range in TSC input failed", K(ret)); + } } } if (OB_SUCC(ret) && MY_SPEC.is_vt_mapping_) { OZ(OB_FAIL(vt_result_converter_->convert_key_ranges(MY_INPUT.key_ranges_))); } - LOG_TRACE("prepare single scan range", K(ret), K(key_ranges), K(MY_INPUT.key_ranges_)); + LOG_TRACE("prepare single scan range", K(ret), K(key_ranges), K(MY_INPUT.key_ranges_), + K(MY_INPUT.ss_key_ranges_)); return ret; } @@ -1338,6 +1386,7 @@ int ObTableScanOp::inner_rescan() output_row_cnt_ = 0; iter_end_ = false; MY_INPUT.key_ranges_.reuse(); + MY_INPUT.ss_key_ranges_.reuse(); MY_INPUT.mbr_filters_.reuse(); if (OB_FAIL(ObOperator::inner_rescan())) { LOG_WARN("rescan operator failed", K(ret)); @@ -1381,6 +1430,7 @@ int ObTableScanOp::close_and_reopen() tsc_rtdef_.lookup_rtdef_->stmt_allocator_.set_alloc(&das_ref_.get_das_alloc()); } MY_INPUT.key_ranges_.reuse(); + MY_INPUT.ss_key_ranges_.reuse(); MY_INPUT.mbr_filters_.reuse(); } return ret; @@ -1460,6 +1510,7 @@ int ObTableScanOp::local_iter_reuse() } else { tsc_rtdef_.scan_rtdef_.scan_allocator_.set_alloc(table_rescan_allocator_); MY_INPUT.key_ranges_.reuse(); + MY_INPUT.ss_key_ranges_.reuse(); MY_INPUT.mbr_filters_.reuse(); } return ret; @@ -1833,13 +1884,19 @@ int ObTableScanOp::cherry_pick_range_by_tablet_id(ObDASScanOp *scan_op) { int ret = OB_SUCCESS; ObIArray &scan_ranges = scan_op->get_scan_param().key_ranges_; + ObIArray &ss_ranges = scan_op->get_scan_param().ss_key_ranges_; ObIArray &mbr_filters = scan_op->get_scan_param().mbr_filters_; const ObIArray &input_ranges = MY_INPUT.key_ranges_; + const ObIArray &input_ss_ranges = MY_INPUT.ss_key_ranges_; const ObIArray &input_filters = MY_INPUT.mbr_filters_; ObDASGroupScanOp *batch_op = DAS_GROUP_SCAN_OP(scan_op); bool add_all = false; bool prune_all = true; - if (ObPartitionLevel::PARTITION_LEVEL_MAX == MY_SPEC.part_level_ + if (OB_UNLIKELY(input_ranges.count() != input_ss_ranges.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ranges and skip scan postfix ranges mismatch", K(ret), K(input_ranges.count()), + K(input_ss_ranges.count())); + } else if (ObPartitionLevel::PARTITION_LEVEL_MAX == MY_SPEC.part_level_ || ObPartitionLevel::PARTITION_LEVEL_ZERO == MY_SPEC.part_level_ || (input_ranges.count() <= 1)) { add_all = true; @@ -1857,6 +1914,8 @@ int ObTableScanOp::cherry_pick_range_by_tablet_id(ObDASScanOp *scan_op) prune_all = false; if (OB_FAIL(scan_ranges.push_back(input_ranges.at(i)))) { LOG_WARN("store input range to scan param failed", K(ret)); + } else if (OB_FAIL(ss_ranges.push_back(input_ss_ranges.at(i)))) { + LOG_WARN("store input skip scan range to scan param failed", K(ret)); } else if (!input_ranges.at(i).is_physical_rowid_range_) { //do nothing } else if (OB_UNLIKELY(MY_SPEC.get_columns_desc().count() < 1)) { @@ -1885,15 +1944,20 @@ int ObTableScanOp::cherry_pick_range_by_tablet_id(ObDASScanOp *scan_op) } if (OB_SUCC(ret) && prune_all && !input_ranges.empty()) { ObNewRange false_range; + ObNewRange whole_range; false_range.set_false_range(); false_range.group_idx_ = input_ranges.at(0).group_idx_; + whole_range.set_whole_range(); if (OB_FAIL(scan_ranges.push_back(false_range))) { LOG_WARN("store false range to scan ranges failed", K(ret)); + } else if (OB_FAIL(ss_ranges.push_back(whole_range))) { + LOG_WARN("store whole range to skip scan ranges failed", K(ret)); } } if (OB_SUCC(ret)) { LOG_DEBUG("range after pruning", K(input_ranges), K(scan_ranges), K_(group_size), - "tablet_id", scan_op->get_tablet_id()); + "tablet_id", scan_op->get_tablet_id(), + K(input_ss_ranges), K(ss_ranges)); } return ret; } @@ -2057,9 +2121,11 @@ int ObTableScanOp::reassign_task_ranges(ObGranuleTaskInfo &info) if (MY_SPEC.gi_above_ && !iter_end_) { if (OB_UNLIKELY(MY_SPEC.get_query_range().is_contain_geo_filters())) { MY_INPUT.key_ranges_.reuse(); + MY_INPUT.ss_key_ranges_.reuse(); MY_INPUT.mbr_filters_.reuse(); } else if (!MY_INPUT.get_need_extract_query_range()) { - if (OB_FAIL(MY_INPUT.key_ranges_.assign(info.ranges_))) { + if (OB_FAIL(MY_INPUT.key_ranges_.assign(info.ranges_)) || + OB_FAIL(MY_INPUT.ss_key_ranges_.assign(info.ss_ranges_))) { LOG_WARN("assign the range info failed", K(ret), K(info)); } else if (MY_SPEC.is_vt_mapping_) { if (OB_FAIL(vt_result_converter_->convert_key_ranges(MY_INPUT.key_ranges_))) { @@ -2069,6 +2135,7 @@ int ObTableScanOp::reassign_task_ranges(ObGranuleTaskInfo &info) } else { // use prepare() to set key ranges if px do not extract query range MY_INPUT.key_ranges_.reuse(); + MY_INPUT.ss_key_ranges_.reuse(); MY_INPUT.mbr_filters_.reuse(); LOG_DEBUG("do prepare!!!"); } diff --git a/src/sql/engine/table/ob_table_scan_op.h b/src/sql/engine/table/ob_table_scan_op.h index 0698c2ba3..5846edae3 100644 --- a/src/sql/engine/table/ob_table_scan_op.h +++ b/src/sql/engine/table/ob_table_scan_op.h @@ -201,6 +201,7 @@ public: protected: ObDASTabletLoc *tablet_loc_; common::ObSEArray key_ranges_; + common::ObSEArray ss_key_ranges_; common::ObSEArray mbr_filters_; common::ObPosArray range_array_pos_; // if the query range was extracted before(include whole range), tsc not need to extract every time diff --git a/src/sql/executor/ob_task_info.h b/src/sql/executor/ob_task_info.h index 5b41deb96..c14f2d5fb 100644 --- a/src/sql/executor/ob_task_info.h +++ b/src/sql/executor/ob_task_info.h @@ -87,13 +87,15 @@ class ObGranuleTaskInfo public: ObGranuleTaskInfo() : ranges_(), + ss_ranges_(), tablet_loc_(nullptr), task_id_(0) { } virtual ~ObGranuleTaskInfo() { } - TO_STRING_KV(K_(ranges), K_(task_id)); + TO_STRING_KV(K_(ranges), K_(ss_ranges), K_(task_id)); public: common::ObSEArray ranges_; + common::ObSEArray ss_ranges_; ObDASTabletLoc *tablet_loc_; //just for print int64_t task_id_; diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index b276fede1..56fe2517c 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -17,6 +17,7 @@ #include "sql/optimizer/ob_storage_estimator.h" #include "share/stat/ob_opt_stat_manager.h" #include "sql/engine/table/ob_table_scan_op.h" +#include "ob_opt_est_parameter_normal.h" namespace oceanbase { using namespace share::schema; using namespace share; @@ -354,11 +355,16 @@ int ObAccessPathEstimation::estimate_prefix_range_rowcount( logical_row_count *= est_cost_info.pushdown_prefix_filter_sel_; physical_row_count *= est_cost_info.pushdown_prefix_filter_sel_; + // skip scan postfix range conditions + logical_row_count *= est_cost_info.ss_postfix_range_filters_sel_; + physical_row_count *= est_cost_info.ss_postfix_range_filters_sel_; + LOG_TRACE("OPT:[STORAGE EST ROW COUNT]", K(logical_row_count), K(physical_row_count), K(get_range_count), K(scan_range_count), K(range_sample_ratio), K(result), K(est_cost_info.index_meta_info_.index_part_count_), - K(est_cost_info.pushdown_prefix_filter_sel_)); + K(est_cost_info.pushdown_prefix_filter_sel_), + K(est_cost_info.ss_postfix_range_filters_sel_)); return ret; } @@ -404,7 +410,15 @@ int ObAccessPathEstimation::fill_cost_table_scan_info(ObCostTableScanInfo &est_c * est_cost_info.postfix_filter_sel_ * est_cost_info.table_filter_sel_; - if (OB_SUCC(ret)) { + if (OB_FAIL(ret)) { + } else if (!est_cost_info.ss_ranges_.empty()) { + int64_t scan_range_count = get_scan_range_count(est_cost_info.ss_ranges_); + if (scan_range_count == 1) { + est_cost_info.batch_type_ = ObSimpleBatch::T_MULTI_SCAN; + } else { + est_cost_info.batch_type_ = ObSimpleBatch::T_MULTI_GET; + } + } else { int64_t get_range_count = get_get_range_count(est_cost_info.ranges_); int64_t scan_range_count = get_scan_range_count(est_cost_info.ranges_); if (get_range_count + scan_range_count > 1) { @@ -506,6 +520,12 @@ int ObAccessPathEstimation::process_statistics_estimation(const ObTableMetaInfo path->est_cost_info_, path->parent_->get_plan()->get_predicate_selectivities()))) { LOG_WARN("failed to calculate filter selectivity", K(ret)); + } else if (OB_FAIL(calc_skip_scan_prefix_ndv(*path, path->est_cost_info_.ss_prefix_ndv_))) { + LOG_WARN("failed to calc skip scan prefix ndv", K(ret)); + } else if (OB_FAIL(update_use_skip_scan(path->est_cost_info_, + path->parent_->get_plan()->get_predicate_selectivities(), + path->use_skip_scan_))) { + LOG_WARN("failed to update use skip scan", K(ret)); } else { ObArenaAllocator allocator; ObCostTableScanInfo &est_cost_info = path->est_cost_info_; @@ -533,9 +553,14 @@ int ObAccessPathEstimation::process_statistics_estimation(const ObTableMetaInfo logical_row_count *= est_cost_info.pushdown_prefix_filter_sel_; physical_row_count *= est_cost_info.pushdown_prefix_filter_sel_; + // skip scan postfix range conditions + logical_row_count *= est_cost_info.ss_postfix_range_filters_sel_; + physical_row_count *= est_cost_info.ss_postfix_range_filters_sel_; + LOG_TRACE("OPT:[STATISTIC EST ROW COUNT", K(logical_row_count), K(physical_row_count), - K(est_cost_info.pushdown_prefix_filter_sel_)); + K(est_cost_info.pushdown_prefix_filter_sel_), + K(est_cost_info.ss_postfix_range_filters_sel_)); RowCountEstMethod est_method = meta.has_opt_stat_ ? RowCountEstMethod::BASIC_STAT : RowCountEstMethod::DEFAULT_STAT; @@ -550,6 +575,137 @@ int ObAccessPathEstimation::process_statistics_estimation(const ObTableMetaInfo return ret; } +// calculate skip scan prefix range columns NDV and postfix range conditions selectivity. +// use the table_metas and origin_rows after extract prefix range. +int ObAccessPathEstimation::calc_skip_scan_prefix_ndv(AccessPath &ap, double &prefix_ndv) +{ + int ret = OB_SUCCESS; + prefix_ndv = 1.0; + ObJoinOrder *join_order = NULL; + ObLogPlan *log_plan = NULL; + const ObTableMetaInfo *table_meta_info = NULL; + if (OB_ISNULL(ap.pre_query_range_) || !ap.pre_query_range_->is_ss_range()) { + /* do nothing */ + } else if (OB_ISNULL(join_order = ap.parent_) || OB_ISNULL(log_plan = join_order->get_plan()) + || OB_ISNULL(table_meta_info = ap.est_cost_info_.table_meta_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(join_order), K(log_plan), K(table_meta_info)); + } else { + // generate temporary update table metas use prefix range conditions + SMART_VAR(OptTableMetas, tmp_metas) { + ObSEArray prefix_exprs; + const double prefix_range_row_count = table_meta_info->table_row_count_ + * ap.est_cost_info_.prefix_filter_sel_ + * ap.est_cost_info_.pushdown_prefix_filter_sel_; + log_plan->get_selectivity_ctx().init_op_ctx(&join_order->get_output_equal_sets(), prefix_range_row_count); + if (OB_FAIL(get_skip_scan_prefix_exprs(ap.est_cost_info_.range_columns_, + ap.pre_query_range_->get_skip_scan_offset(), + prefix_exprs))) { + LOG_WARN("failed to get skip scan prefix expers", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::update_table_meta_info(log_plan->get_basic_table_metas(), + tmp_metas, + log_plan->get_selectivity_ctx(), + ap.get_table_id(), + prefix_range_row_count, + ap.pre_query_range_->get_range_exprs(), + log_plan->get_predicate_selectivities()))) { + LOG_WARN("failed to update table meta info", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_distinct(tmp_metas, + log_plan->get_selectivity_ctx(), + prefix_exprs, + prefix_range_row_count, + prefix_ndv))) { + LOG_WARN("failed to calculate distinct", K(ret), K(prefix_exprs)); + } else { + double refine_ndv = 1.0; + prefix_ndv = std::max(refine_ndv, prefix_ndv); + } + } + } + return ret; +} + +int ObAccessPathEstimation::get_skip_scan_prefix_exprs(ObIArray &column_items, + int64_t skip_scan_offset, + ObIArray &prefix_exprs) +{ + int ret = OB_SUCCESS; + prefix_exprs.reuse(); + if (OB_UNLIKELY(skip_scan_offset < 0 || skip_scan_offset >= column_items.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected params", K(ret), K(skip_scan_offset), K(column_items.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < skip_scan_offset; ++i) { + if (OB_FAIL(prefix_exprs.push_back(column_items.at(i).expr_))) { + LOG_WARN("failed to push back", K(ret), K(skip_scan_offset)); + } + } + } + return ret; +} + +int ObAccessPathEstimation::update_use_skip_scan(ObCostTableScanInfo &est_cost_info, + ObIArray &all_predicate_sel, + OptSkipScanState &use_skip_scan) +{ + int ret = OB_SUCCESS; + const ObTableMetaInfo *table_meta_info = NULL; + if (OB_ISNULL(table_meta_info = est_cost_info.table_meta_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(table_meta_info)); + } else { + // const static double NORMAL_CPU_TUPLE_COST = 0.02977945030613315927249275026; + // const static double NORMAL_TABLE_SCAN_CPU_TUPLE_COST = 0.3717749711890249146505031527; + // const static double NORMAL_MICRO_BLOCK_SEQ_COST = 4.12032943880540981; + // const static double NORMAL_MICRO_BLOCK_RND_COST = 5.45276187553; + const double row_count = table_meta_info->table_row_count_ + * est_cost_info.prefix_filter_sel_ + * est_cost_info.pushdown_prefix_filter_sel_; + const double row_count_per_range = std::max(row_count + * est_cost_info.ss_postfix_range_filters_sel_ + / est_cost_info.ss_prefix_ndv_, + 1.0); + const double ss_row_count = est_cost_info.ss_prefix_ndv_ + + row_count_per_range * est_cost_info.ss_prefix_ndv_; + const double index_scan_cost = row_count * (NORMAL_CPU_TUPLE_COST + NORMAL_TABLE_SCAN_CPU_TUPLE_COST); + const double skip_scan_cost = ss_row_count * NORMAL_MICRO_BLOCK_RND_COST; + LOG_TRACE("decide use skip scan by ndv and selectively", K(use_skip_scan), K(row_count), K(row_count_per_range), + K(ss_row_count), K(index_scan_cost), K(skip_scan_cost), + K(est_cost_info.ss_prefix_ndv_), K(est_cost_info.ss_postfix_range_filters_sel_), + K(est_cost_info.ss_postfix_range_filters_)); + bool reset_skip_scan = false; + if (OptSkipScanState::SS_UNSET != use_skip_scan) { + /* do nothing */ + } else if (!table_meta_info->has_opt_stat_ || + OB_DEFAULT_STAT_EST == table_meta_info->cost_est_type_) { + reset_skip_scan = true; + } else if (est_cost_info.ss_prefix_ndv_ > 1000 || est_cost_info.ss_postfix_range_filters_sel_ > 0.01) { + reset_skip_scan = true; + } else if (skip_scan_cost < index_scan_cost) { + use_skip_scan = OptSkipScanState::SS_NDV_SEL_ENABLE; + } else { + reset_skip_scan = true; + } + if (OB_FAIL(ret) || !reset_skip_scan) { + } else if (OB_FAIL(append(est_cost_info.postfix_filters_, est_cost_info.ss_postfix_range_filters_))) { + LOG_WARN("failed to append exprs", K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + est_cost_info.postfix_filters_, + est_cost_info.postfix_filter_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate selectivity", K(est_cost_info.postfix_filters_), K(ret)); + } else { + est_cost_info.ss_ranges_.reuse(); + est_cost_info.ss_postfix_range_filters_.reuse(); + est_cost_info.ss_prefix_ndv_ = 1.0; + est_cost_info.ss_postfix_range_filters_sel_ = 1.0; + use_skip_scan = OptSkipScanState::SS_DISABLE; + } + } + return ret; +} + int ObAccessPathEstimation::get_task(ObIArray &tasks, const ObAddr &addr, ObBatchEstTasks *&task) diff --git a/src/sql/optimizer/ob_access_path_estimation.h b/src/sql/optimizer/ob_access_path_estimation.h index bb66f9d89..ac0e0aea8 100644 --- a/src/sql/optimizer/ob_access_path_estimation.h +++ b/src/sql/optimizer/ob_access_path_estimation.h @@ -15,6 +15,7 @@ #include "sql/optimizer/ob_opt_est_cost.h" #include "sql/optimizer/ob_optimizer_context.h" +#include "sql/optimizer/ob_join_order.h" namespace oceanbase { namespace sql { class AccessPath; @@ -69,6 +70,16 @@ private: ObTableMetaInfo &meta, ObIArray& paths); + static int calc_skip_scan_prefix_ndv(AccessPath &ap, double &prefix_ndv); + + static int get_skip_scan_prefix_exprs(ObIArray &column_items, + int64_t skip_scan_offset, + ObIArray &prefix_exprs); + + static int update_use_skip_scan(ObCostTableScanInfo &est_cost_info, + ObIArray &all_predicate_sel, + OptSkipScanState &use_skip_scan); + static int do_storage_estimation(ObOptimizerContext &ctx, ObBatchEstTasks &tasks); diff --git a/src/sql/optimizer/ob_index_info_cache.h b/src/sql/optimizer/ob_index_info_cache.h index 60ade4c20..44efb939b 100644 --- a/src/sql/optimizer/ob_index_info_cache.h +++ b/src/sql/optimizer/ob_index_info_cache.h @@ -28,6 +28,7 @@ public: contain_always_false_(false), query_range_(NULL), ranges_(), + ss_ranges_(), equal_prefix_count_(0), equal_prefix_null_count_(0), range_prefix_count_(0), @@ -48,6 +49,8 @@ public: bool get_contain_always_false() const { return contain_always_false_; } ObQueryRangeArray& get_ranges() { return ranges_; } + ObQueryRangeArray& get_ss_ranges() { return ss_ranges_; } + const ObQueryRangeArray& get_ss_ranges() const { return ss_ranges_; } void set_query_range(ObQueryRange *query_range) { query_range_ = query_range; } common::ObIArray &get_range_columns() { return range_columns_; } common::ObIArray &get_expr_constraints() { return expr_constraints_; } @@ -88,6 +91,7 @@ private: bool contain_always_false_; ObQueryRange *query_range_; ObQueryRangeArray ranges_; + ObQueryRangeArray ss_ranges_; // for index skip scan, postfix range int64_t equal_prefix_count_; int64_t equal_prefix_null_count_; int64_t range_prefix_count_; diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 8518eb642..3d2aa062a 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -63,11 +63,14 @@ ObJoinOrder::~ObJoinOrder() } int ObJoinOrder::fill_query_range_info(const QueryRangeInfo &range_info, - ObCostTableScanInfo &est_cost_info) + ObCostTableScanInfo &est_cost_info, + bool use_skip_scan) { int ret = OB_SUCCESS; const ObQueryRangeArray &ranges = range_info.get_ranges(); + const ObQueryRangeArray &ss_ranges = range_info.get_ss_ranges(); est_cost_info.ranges_.reset(); + est_cost_info.ss_ranges_.reset(); // maintain query range info for(int64_t i = 0; OB_SUCC(ret) && i < ranges.count(); ++i) { if (OB_ISNULL(ranges.at(i))) { @@ -77,6 +80,14 @@ int ObJoinOrder::fill_query_range_info(const QueryRangeInfo &range_info, LOG_WARN("failed to add range", K(ret)); } else { /*do nothing*/ } } + for(int64_t i = 0; use_skip_scan && OB_SUCC(ret) && i < ss_ranges.count(); ++i) { + if (OB_ISNULL(ss_ranges.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("range is null", K(ret)); + } else if (OB_FAIL(est_cost_info.ss_ranges_.push_back(*ss_ranges.at(i)))) { + LOG_WARN("failed to add range", K(ret)); + } else { /*do nothing*/ } + } return ret; } @@ -640,6 +651,7 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, ObQueryRange *query_range = NULL; const share::schema::ObTableSchema *index_schema = NULL; ObQueryRangeArray &ranges = range_info.get_ranges(); + ObQueryRangeArray &ss_ranges = range_info.get_ss_ranges(); ObIArray &range_columns = range_info.get_range_columns(); bool is_geo_index = false; ColumnIdInfoMap geo_columnInfo_map; @@ -678,7 +690,7 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, if (!is_geo_index && OB_FAIL(extract_preliminary_query_range(range_columns, helper.filters_, range_info.get_expr_constraints(), - query_range))) { + query_range))) { LOG_WARN("failed to extract query range", K(ret), K(index_id)); } else if (is_geo_index && OB_FAIL(extract_geo_preliminary_query_range(range_columns, helper.filters_, @@ -694,6 +706,11 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, all_single_value_range, dtc_params))) { LOG_WARN("failed to final extract query range", K(ret)); + } else if (OB_FAIL(query_range->get_ss_tablet_ranges(*allocator_, + *exec_ctx, + ss_ranges, + dtc_params))) { + LOG_WARN("failed to final extract index skip query range", K(ret)); } else if (OB_FAIL(ObOptimizerUtil::check_prefix_ranges_count(range_info.get_ranges(), equal_prefix_count, equal_prefix_null_count, @@ -723,7 +740,8 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, query_range = NULL; } } else { - LOG_TRACE("succeed to get query range", K(ranges), K(*query_range), K(table_id), K(index_id)); + LOG_TRACE("succeed to get query range", K(ranges), K(ss_ranges), K(*query_range), + K(table_id), K(index_id)); } } return ret; @@ -865,7 +883,8 @@ int ObJoinOrder::add_table_by_heuristics(const uint64_t table_id, index_info_cache, helper, das_access_path, - true))) { + true, + OptSkipScanState::SS_DISABLE))) { LOG_WARN("failed to create primary index path", K(ret), K(table_id), K(ref_table_id)); } else if (OB_NOT_NULL(das_access_path) && OB_FAIL(access_paths.push_back(das_access_path))) { LOG_WARN("failed to push back access path"); @@ -876,7 +895,8 @@ int ObJoinOrder::add_table_by_heuristics(const uint64_t table_id, index_info_cache, helper, basic_access_path, - false))) { + false, + OptSkipScanState::SS_DISABLE))) { LOG_WARN("failed to make index path", "index_table_id", index_to_use, K(ret)); } else if (OB_NOT_NULL(basic_access_path) && OB_FAIL(access_paths.push_back(basic_access_path))) { LOG_WARN("failed to create primary index path", K(ret), K(table_id), K(ref_table_id)); @@ -1280,14 +1300,15 @@ int ObJoinOrder::will_use_das(const uint64_t table_id, } return ret; } - + int ObJoinOrder::create_one_access_path(const uint64_t table_id, const uint64_t ref_id, const uint64_t index_id, const ObIndexInfoCache &index_info_cache, PathHelper &helper, AccessPath *&access_path, - bool use_das) + bool use_das, + OptSkipScanState use_skip_scan) { int ret = OB_SUCCESS; IndexInfoEntry *index_info_entry = NULL; @@ -1350,6 +1371,7 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, ap->range_prefix_count_ = index_info_entry->get_range_info().get_range_prefix_count(); ap->interesting_order_info_ = index_info_entry->get_interesting_order_info(); ap->for_update_ = table_item->for_update_; + ap->use_skip_scan_ = use_skip_scan; if (!get_plan()->get_stmt()->is_select_stmt()) { // do nothing // sample scan doesn't support DML other than SELECT. @@ -1398,7 +1420,8 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, LOG_WARN("failed to get index keys", K(ret)); } else if (OB_FAIL(ap->est_cost_info_.range_columns_.assign(range_info.get_range_columns()))) { LOG_WARN("failed to assign range columns", K(ret)); - } else if (OB_FAIL(fill_query_range_info(range_info, ap->est_cost_info_))) { + } else if (OB_FAIL(fill_query_range_info(range_info, ap->est_cost_info_, + OptSkipScanState::SS_DISABLE != use_skip_scan))) { LOG_WARN("failed to fill query range info", K(ret)); } else { /*do nothing*/ } } else { /*do nothing*/ } @@ -1417,7 +1440,8 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, ap->pre_query_range_, ap->est_cost_info_, is_nl_with_extended_range, - ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id)))) { + ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id), + OptSkipScanState::SS_DISABLE != use_skip_scan))) { LOG_WARN("failed to fill filters for cost table info", K(ret)); } else if (!helper.is_inner_path_ && OB_FAIL(increase_diverse_path_count(ap))) { @@ -2039,6 +2063,7 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, bool is_create_das_path = false; AccessPath *das_access_path = NULL; AccessPath *basic_access_path = NULL; // the path does not use DAS, maybe optimal sometime. + OptSkipScanState use_skip_scan = OptSkipScanState::SS_UNSET; if (OB_FAIL(will_use_das(table_id, ref_table_id, skyline_index_ids.at(i), @@ -2047,6 +2072,13 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, is_create_das_path, is_create_basic_path))) { LOG_WARN("failed to check will use das", K(ret)); + } else if (OB_FAIL(will_use_skip_scan(table_id, + ref_table_id, + skyline_index_ids.at(i), + index_info_cache, + helper, + use_skip_scan))) { + LOG_WARN("failed to check will use skip scan", K(ret)); } else if (is_create_das_path && OB_FAIL(create_one_access_path(table_id, ref_table_id, @@ -2054,7 +2086,8 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, index_info_cache, helper, das_access_path, - true))) { + true, + use_skip_scan))) { LOG_WARN("failed to make index path", "index_table_id", skyline_index_ids.at(i), K(ret)); } else if ( OB_NOT_NULL(das_access_path) && OB_FAIL(access_paths.push_back(das_access_path))) { LOG_WARN("failed to push back access path", K(ret)); @@ -2065,7 +2098,8 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, index_info_cache, helper, basic_access_path, - false))) { + false, + use_skip_scan))) { LOG_WARN("failed to make index path", "index_table_id", skyline_index_ids.at(i), K(ret)); } else if( OB_NOT_NULL(basic_access_path) && OB_FAIL(access_paths.push_back(basic_access_path))) { LOG_WARN("failed to push back access path", K(ret)); @@ -2075,6 +2109,55 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, return ret; } +int ObJoinOrder::will_use_skip_scan(const uint64_t table_id, + const uint64_t ref_id, + const uint64_t index_id, + const ObIndexInfoCache &index_info_cache, + PathHelper &helper, + OptSkipScanState &use_skip_scan) +{ + int ret = OB_SUCCESS; + use_skip_scan = OptSkipScanState::SS_UNSET; + IndexInfoEntry *index_info_entry = NULL; + const ObQueryRange *query_range = NULL; + bool hint_force_skip_scan = false; + bool hint_force_no_skip_scan = false; + if (OB_UNLIKELY(OB_INVALID_ID == ref_id) || OB_UNLIKELY(OB_INVALID_ID == index_id) || + OB_ISNULL(get_plan())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ref_id), K(index_id), K(get_plan()), K(ret)); + } else if (is_virtual_table(ref_id)) { + use_skip_scan = OptSkipScanState::SS_DISABLE; + } else if (OB_FAIL(index_info_cache.get_index_info_entry(table_id, index_id, + index_info_entry))) { + LOG_WARN("failed to get index info entry", K(table_id), K(index_id), K(ret)); + } else if (OB_ISNULL(index_info_entry)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("index info entry should not be null", K(ret)); + } else if (OB_ISNULL(query_range = index_info_entry->get_range_info().get_query_range()) || + !query_range->is_ss_range()) { + use_skip_scan = OptSkipScanState::SS_DISABLE; + } else if (OB_FAIL(get_plan()->get_log_plan_hint().check_use_skip_scan(table_id, + index_id, + hint_force_skip_scan, + hint_force_no_skip_scan))) { + LOG_WARN("failed to check use skip scan", K(ret), K(table_id)); + } else if (hint_force_skip_scan) { + use_skip_scan = OptSkipScanState::SS_HINT_ENABLE; + } else if (hint_force_no_skip_scan) { + use_skip_scan = OptSkipScanState::SS_DISABLE; + } else if (helper.is_inner_path_ || get_tables().is_subset(get_plan()->get_subq_pdfilter_tset())) { + use_skip_scan = OptSkipScanState::SS_DISABLE; + } else { + // may use skip scan for SS_NDV_SEL_ENABLE after calculate ndv and selectivity + use_skip_scan = OptSkipScanState::SS_UNSET; + } + + LOG_TRACE("check use skip scan", K(helper.is_inner_path_), + K(hint_force_skip_scan), K(hint_force_no_skip_scan), K(use_skip_scan)); + return ret; +} + int ObJoinOrder::extract_param_for_query_range(const ObIArray &range_conditions, ObIArray ¶m_pos) { @@ -2199,7 +2282,7 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, LOG_WARN("failed to push back array", K(ret)); } else { /*do nothing*/ } } else if (FALSE_IT(log_table_hint = get_plan()->get_log_plan_hint().get_index_hint(table_id))) { - } else if (NULL != log_table_hint && !log_table_hint->is_no_index_hint()) { + } else if (NULL != log_table_hint && log_table_hint->is_use_index_hint()) { // for use index hint, get index ids from hint. if (OB_FAIL(valid_index_ids.assign(log_table_hint->index_list_))) { LOG_WARN("failed to assign index ids", K(ret)); @@ -4050,6 +4133,7 @@ int AccessPath::assign(const AccessPath &other, common::ObIAllocator *allocator) range_prefix_count_ = other.range_prefix_count_; table_opt_info_ = other.table_opt_info_; for_update_ = other.for_update_; + use_skip_scan_ = other.use_skip_scan_; if (OB_ISNULL(allocator)) { ret = OB_INVALID_ARGUMENT; @@ -4177,7 +4261,7 @@ int AccessPath::re_estimate_cost(EstimateCostInfo ¶m, double &card, double & return ret; } -const ObRangesArray& AccessPath::get_query_ranges() const +const ObIArray& AccessPath::get_query_ranges() const { return est_cost_info_.ranges_; } @@ -9448,7 +9532,8 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, const ObQueryRange *query_range, ObCostTableScanInfo &est_cost_info, bool &is_nl_with_extended_range, - bool is_link) + bool is_link, + bool use_skip_scan) { int ret = OB_SUCCESS; is_nl_with_extended_range = false; @@ -9493,6 +9578,8 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, if (OB_SUCC(ret)) { if (OB_FAIL(est_cost_info.prefix_filters_.assign(query_range->get_range_exprs()))) { LOG_WARN("failed to assign exprs", K(ret)); + } else if (use_skip_scan && OB_FAIL(est_cost_info.ss_postfix_range_filters_.assign(query_range->get_ss_range_exprs()))) { + LOG_WARN("failed to assign exprs", K(ret)); } } @@ -9568,7 +9655,9 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, } else if (can_extract) { ret = est_cost_info.pushdown_prefix_filters_.push_back(filter); } else if (est_cost_info.ref_table_id_ != est_cost_info.index_id_) { - ret = est_cost_info.postfix_filters_.push_back(filter); + if (!use_skip_scan || !ObOptimizerUtil::find_item(est_cost_info.ss_postfix_range_filters_, filter)) { + ret = est_cost_info.postfix_filters_.push_back(filter); + } // 对于空间索引,空间谓词一定要回表计算 if (OB_SUCC(ret) && est_cost_info.index_meta_info_.is_geo_index_) { ret = est_cost_info.table_filters_.push_back(filter); @@ -9607,7 +9696,8 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, is_nl_with_extended_range = true; } LOG_TRACE("succeed to classify filters", K(est_cost_info.prefix_filters_), - K(est_cost_info.pushdown_prefix_filters_), K(est_cost_info.postfix_filters_), + K(est_cost_info.pushdown_prefix_filters_), K(est_cost_info.ss_postfix_range_filters_), + K(est_cost_info.postfix_filters_), K(est_cost_info.table_filters_), K(is_nl_with_extended_range)); } } @@ -9968,19 +10058,17 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, int ret = OB_SUCCESS; ObSEArray column_exprs; ObSEArray column_ids; - const ObDMLStmt *stmt = NULL; ObSQLSessionInfo *session_info = NULL; if (OB_UNLIKELY(OB_INVALID_ID == table_id) || OB_UNLIKELY(OB_INVALID_ID == ref_table_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid table id", K(table_id), K(ref_table_id), K(ret)); } else if (OB_ISNULL(get_plan()) || - OB_ISNULL(stmt = get_plan()->get_stmt()) || OB_ISNULL(table_partition_info_) || OB_ISNULL(session_info = get_plan()->get_optimizer_context().get_session_info())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid params", K(get_plan()), K(ret)); - } else if (OB_FAIL(stmt->get_column_exprs(table_id, column_exprs))) { + } else if (OB_FAIL(get_plan()->get_column_exprs(table_id, column_exprs))) { LOG_WARN("failed to get column exprs", K(ret)); } else { ObSEArray all_used_part_id; diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 09582e491..5cced9f6a 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -497,6 +497,14 @@ struct EstimateCostInfo { DISALLOW_COPY_AND_ASSIGN(Path); }; + + enum OptSkipScanState + { + SS_DISABLE = 0, + SS_UNSET, + SS_HINT_ENABLE, + SS_NDV_SEL_ENABLE + }; class AccessPath : public Path { public: @@ -529,7 +537,8 @@ struct EstimateCostInfo { est_records_(), range_prefix_count_(0), table_opt_info_(), - for_update_(false) + for_update_(false), + use_skip_scan_(OptSkipScanState::SS_UNSET) { } virtual ~AccessPath() { @@ -562,7 +571,7 @@ struct EstimateCostInfo { return NULL == table_opt_info_ ? false : OptimizationMethod::RULE_BASED != table_opt_info_->optimization_method_; } - const ObRangesArray &get_query_ranges() const; + const ObIArray &get_query_ranges() const; virtual void get_name_internal(char *buf, const int64_t buf_len, int64_t &pos) const { BUF_PRINTF("@"); @@ -589,7 +598,8 @@ struct EstimateCostInfo { K_(sample_info), K_(range_prefix_count), K_(for_update), - K_(use_das)); + K_(use_das), + K_(use_skip_scan)); public: //member variables uint64_t table_id_; @@ -616,6 +626,7 @@ struct EstimateCostInfo { int64_t range_prefix_count_; // prefix count BaseTableOptInfo *table_opt_info_; bool for_update_; + OptSkipScanState use_skip_scan_; private: DISALLOW_COPY_AND_ASSIGN(AccessPath); }; @@ -1335,7 +1346,8 @@ struct NullAwareAntiJoinInfo { const ObIndexInfoCache &index_info_cache, PathHelper &helper, AccessPath *&ap, - bool use_das); + bool use_das, + OptSkipScanState use_skip_scan); int will_use_das(const uint64_t table_id, const uint64_t ref_id, @@ -1345,6 +1357,13 @@ struct NullAwareAntiJoinInfo { bool &create_das_path, bool &create_basic_path); + int will_use_skip_scan(const uint64_t table_id, + const uint64_t ref_id, + const uint64_t index_id, + const ObIndexInfoCache &index_info_cache, + PathHelper &helper, + OptSkipScanState &use_skip_scan); + int get_access_path_ordering(const uint64_t table_id, const uint64_t ref_table_id, const uint64_t index_id, @@ -1891,7 +1910,8 @@ struct NullAwareAntiJoinInfo { const common::ObIArray &where_conditions); int fill_query_range_info(const QueryRangeInfo &range_info, - ObCostTableScanInfo &est_cost_info); + ObCostTableScanInfo &est_cost_info, + bool use_skip_scan); int compute_table_location_for_paths(ObIArray &access_paths, ObIArray &tbl_part_infos); @@ -2067,7 +2087,8 @@ struct NullAwareAntiJoinInfo { const ObQueryRange* query_range, ObCostTableScanInfo &est_scan_cost_info, bool &is_nl_with_extended_range, - bool is_link = false); + bool is_link = false, + bool use_skip_scan = false); int can_extract_unprecise_range(const uint64_t table_id, const ObRawExpr *filter, diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 86481a6c6..b39168d83 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -4220,8 +4220,10 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, scan->set_index_back_row_count(ap->index_back_row_count_); scan->set_estimate_method(ap->est_cost_info_.row_est_method_); scan->set_pre_query_range(ap->pre_query_range_); + scan->set_skip_scan(OptSkipScanState::SS_DISABLE != ap->use_skip_scan_); if (!ap->is_inner_path_ && - OB_FAIL(scan->set_query_ranges(ap->get_cost_table_scan_info().ranges_))) { + OB_FAIL(scan->set_query_ranges(ap->get_cost_table_scan_info().ranges_, + ap->get_cost_table_scan_info().ss_ranges_))) { LOG_WARN("failed to set query ranges", K(ret)); } else if (OB_FAIL(scan->set_range_columns(ap->get_cost_table_scan_info().range_columns_))) { LOG_WARN("failed to set range column", K(ret)); @@ -6715,7 +6717,9 @@ int ObLogPlan::check_scalar_groupby_pushdown(const ObIArray & if (OB_ISNULL(cur_aggr = aggrs.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); - } else if (T_FUN_COUNT != cur_aggr->get_expr_type()) { + } else if (T_FUN_COUNT != cur_aggr->get_expr_type() + && T_FUN_MIN != cur_aggr->get_expr_type() + && T_FUN_MAX != cur_aggr->get_expr_type()) { can_push = false; } else if (cur_aggr->is_param_distinct() || 1 < cur_aggr->get_real_param_count()) { /* mysql mode, support count(distinct c1, c2). if this distinct can be eliminated, @@ -11769,6 +11773,26 @@ const ColumnItem *ObLogPlan::get_column_item_by_id(uint64_t table_id, uint64_t c return column_item; } + +int ObLogPlan::get_column_exprs(uint64_t table_id, ObIArray &column_exprs) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(get_stmt())); + } else if (OB_FAIL(get_stmt()->get_column_exprs(table_id, column_exprs))) { + LOG_WARN("failed to get column exprs", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < column_items_.count(); ++i) { + if (table_id == column_items_.at(i).table_id_ + && OB_FAIL(column_exprs.push_back(column_items_.at(i).expr_))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + return ret; +} + int ObLogPlan::generate_column_expr(ObRawExprFactory &expr_factory, const uint64_t &table_id, const ObColumnSchemaV2 &column_schema, diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index 850896165..046bfcce8 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -1279,6 +1279,9 @@ public: uint64_t table_id, const share::schema::ObTableSchema &index_table_schema, common::ObIArray &index_columns); + int get_column_exprs(uint64_t table_id, ObIArray &column_exprs) const; + ObColumnRefRawExpr *get_column_expr_by_id(uint64_t table_id, uint64_t column_id) const; + const ColumnItem *get_column_item_by_id(uint64_t table_id, uint64_t column_id) const; inline common::ObIArray &get_column_items() { return column_items_; } int generate_column_expr(ObRawExprFactory &expr_factory, const uint64_t &table_id, @@ -1336,8 +1339,6 @@ public: int allocate_material_for_recursive_cte_plan(ObIArray &child_ops); protected: - ObColumnRefRawExpr *get_column_expr_by_id(uint64_t table_id, uint64_t column_id) const; - const ColumnItem *get_column_item_by_id(uint64_t table_id, uint64_t column_id) const; int update_plans_interesting_order_info(ObIArray &candidate_plans, const int64_t check_scope); diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp old mode 100644 new mode 100755 index ac8254a54..fdcd44952 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -47,6 +47,8 @@ const char *ObLogTableScan::get_name() const } if (sample_method != SampleInfo::NO_SAMPLE) { name = (sample_method == SampleInfo::ROW_SAMPLE) ? "ROW SAMPLE SCAN" : "BLOCK SAMPLE SCAN"; + } else if (is_skip_scan()) { + name = use_das() ? "DISTRIBUTED INDEX SKIP SCAN" : "INDEX SKIP SCAN"; } else if (use_das()) { name = is_get ? "DISTRIBUTED TABLE GET" : "DISTRIBUTED TABLE SCAN"; } else { @@ -1165,6 +1167,17 @@ int ObLogTableScan::print_range_annotation(char *buf, ret = print_ranges(buf, buf_len, pos, ranges_); } + if (OB_SUCC(ret) && is_skip_scan()) { + int64_t skip_scan_offset = get_pre_query_range()->get_skip_scan_offset(); + if (OB_FAIL(BUF_PRINTF("\n prefix_columns_cnt = %ld , skip_scan_range", skip_scan_offset))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (ss_ranges_.empty() && OB_FAIL(BUF_PRINTF("(MIN ; MAX)"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(print_ranges(buf, buf_len, pos, ss_ranges_))) { + LOG_WARN("failed to print index skip ranges", K(ret)); + } else { /* Do nothing */ } + } + if (OB_SUCC(ret)) { if (!range_conds_.empty()) { //print range condition @@ -1204,11 +1217,14 @@ int ObLogTableScan::print_limit_offset_annotation(char *buf, return ret; } -int ObLogTableScan::set_query_ranges(ObRangesArray ranges) +int ObLogTableScan::set_query_ranges(ObIArray &ranges, + ObIArray &ss_ranges) { int ret = OB_SUCCESS; if (OB_FAIL(append(ranges_, ranges))) { LOG_WARN("Failed to do append to ranges_ in set_query_ranges()"); + } else if (OB_FAIL(append(ss_ranges_, ss_ranges))) { + LOG_WARN("Failed to do append to ranges_ in set_query_ranges()"); } else { /* Do nothing =*/ } return ret; } @@ -1233,6 +1249,7 @@ int ObLogTableScan::print_used_hint(planText &plan_text) const ObLogPlanHint &plan_hint = get_plan()->get_log_plan_hint(); const LogTableHint *table_hint = plan_hint.get_log_table_hint(table_id_); const ObHint *hint = plan_hint.get_normal_hint(T_USE_LATE_MATERIALIZATION); + int64_t idx = OB_INVALID_INDEX; if (NULL != hint && ((need_late_materialization() && hint->is_enable_hint()) || (!need_late_materialization() && hint->is_disable_hint())) @@ -1248,34 +1265,30 @@ int ObLogTableScan::print_used_hint(planText &plan_text) LOG_WARN("failed to print table parallel hint", K(ret)); } else if (table_hint->index_list_.empty()) { /*do nothing*/ - } else if (OB_UNLIKELY(table_hint->index_list_.count() != table_hint->index_hints_.count() - || (!table_hint->is_index_hint() && !table_hint->is_no_index_hint()))) { + } else if (OB_UNLIKELY(table_hint->index_list_.count() != table_hint->index_hints_.count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected log index hint", K(ret), K(*table_hint)); - } else { - int64_t idx = OB_INVALID_INDEX; + } else if (table_hint->is_use_index_hint()) {// print used use index hint if (ObOptimizerUtil::find_item(table_hint->index_list_, index_table_id_, &idx)) { if (OB_UNLIKELY(idx < 0 || idx >= table_hint->index_list_.count()) || OB_ISNULL(hint = table_hint->index_hints_.at(idx))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected idx", K(ret), K(idx), K(table_hint->index_list_)); - } else if (table_hint->is_index_hint() && - OB_FAIL(hint->print_hint(plan_text))) { + } else if (!is_skip_scan() && T_INDEX_SS_HINT == hint->get_hint_type()) { + /* is not index skip scan but exist index_ss hint */ + } else if (OB_FAIL(hint->print_hint(plan_text))) { LOG_WARN("failed to print indedx hint", K(ret), K(*hint)); } } - - // print all no index - if (OB_SUCC(ret) && table_hint->is_no_index_hint()) { - for (int64_t i = 0 ; OB_SUCC(ret) && i < table_hint->index_list_.count(); ++i) { - if (idx == i) { - /*do nothing*/ - } else if (OB_ISNULL(hint = table_hint->index_hints_.at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected NULL", K(ret), K(hint)); - } else if (OB_FAIL(hint->print_hint(plan_text))) { - LOG_WARN("failed to print indedx hint", K(ret), K(*hint)); - } + } else {// print all no index + for (int64_t i = 0 ; OB_SUCC(ret) && i < table_hint->index_list_.count(); ++i) { + if (idx == i) { + /*do nothing*/ + } else if (OB_ISNULL(hint = table_hint->index_hints_.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected NULL", K(ret), K(hint)); + } else if (OB_FAIL(hint->print_hint(plan_text))) { + LOG_WARN("failed to print indedx hint", K(ret), K(*hint)); } } } @@ -1291,6 +1304,21 @@ int ObLogTableScan::print_outline_data(planText &plan_text) int64_t &pos = plan_text.pos; TableItem *table_item = NULL; ObString qb_name; + const ObString *index_name = NULL; + ObItemType index_type = T_INDEX_HINT; + if (is_skip_scan()) { + index_type = T_INDEX_SS_HINT; + if (ref_table_id_ == index_table_id_) { + index_name = &ObIndexHint::PRIMARY_KEY; + } else { + index_name = &get_index_name(); + } + } else if (ref_table_id_ == index_table_id_) { + index_type = T_FULL_HINT; + } else { + index_type = T_INDEX_HINT; + index_name = &get_index_name(); + } const ObDMLStmt *stmt = NULL; const ObTableParallelHint *parallel_hint = NULL; if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt())) { @@ -1325,11 +1353,11 @@ int ObLogTableScan::print_outline_data(planText &plan_text) && static_cast(get_parent())->is_late_mat()) { // late materialization right table, do not print index hint. } else { - ObIndexHint index_hint(ref_table_id_ == index_table_id_ ? T_FULL_HINT: T_INDEX_HINT); + ObIndexHint index_hint(index_type); index_hint.set_qb_name(qb_name); index_hint.get_table().set_table(*table_item); - if (T_INDEX_HINT == index_hint.get_hint_type()) { - index_hint.get_index_name().assign(get_index_name().ptr(), get_index_name().length()); + if (NULL != index_name) { + index_hint.get_index_name().assign_ptr(index_name->ptr(), index_name->length()); } if (OB_FAIL(index_hint.print_hint(plan_text))) { LOG_WARN("failed to print index hint", K(ret)); diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 0d15312b2..78435671b 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -51,6 +51,8 @@ public: filter_before_index_back_(), table_partition_info_(NULL), ranges_(), + ss_ranges_(), + is_skip_scan_(), limit_count_expr_(NULL), limit_offset_expr_(NULL), sample_info_(), @@ -314,6 +316,8 @@ public: bool is_index_scan() const { return ref_table_id_ != index_table_id_; } bool is_table_whole_range_scan() const { return !is_index_scan() && (NULL == pre_query_range_ || (1 == ranges_.count() && ranges_.at(0).is_whole_range())); } + void set_skip_scan(bool is_skip_scan) { is_skip_scan_ = is_skip_scan; } + bool is_skip_scan() const { return is_skip_scan_; } virtual bool is_table_scan() const override { return true; } bool is_whole_range_scan() const {return NULL == pre_query_range_ || (1 == ranges_.count() && ranges_.at(0).is_whole_range()); } @@ -323,7 +327,7 @@ public: void set_is_multi_part_table_scan(bool multi_part_tsc) { is_multi_part_table_scan_ = multi_part_tsc; } bool get_is_multi_part_table_scan() { return is_multi_part_table_scan_; } - int set_query_ranges(ObRangesArray ranges); + int set_query_ranges(ObIArray &ranges, ObIArray &ss_ranges); virtual int inner_replace_generated_agg_expr( const common::ObIArray >&to_replace_exprs); inline common::ObIArray &get_filter_before_index_flags() { return filter_before_index_back_; } @@ -487,6 +491,8 @@ protected: // memeber variables //because its used in EXCHANGE stage, and //copy_without_child used before this ObRangesArray ranges_;//For explain. Code generator and executor cannot use this. + ObRangesArray ss_ranges_;//For explain. Code generator and executor cannot use this. + bool is_skip_scan_; // limit params from upper limit op ObRawExpr *limit_count_expr_; diff --git a/src/sql/optimizer/ob_opt_est_cost.cpp b/src/sql/optimizer/ob_opt_est_cost.cpp index 0515b9176..d9dc52942 100644 --- a/src/sql/optimizer/ob_opt_est_cost.cpp +++ b/src/sql/optimizer/ob_opt_est_cost.cpp @@ -605,6 +605,12 @@ int ObOptEstCost::calculate_filter_selectivity(ObCostTableScanInfo &est_cost_inf est_cost_info.pushdown_prefix_filter_sel_, all_predicate_sel))) { LOG_WARN("failed to calculate selectivity", K(est_cost_info.pushdown_prefix_filters_), K(ret)); + } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, + *est_cost_info.sel_ctx_, + est_cost_info.ss_postfix_range_filters_, + est_cost_info.ss_postfix_range_filters_sel_, + all_predicate_sel))) { + LOG_WARN("failed to calculate selectivity", K(est_cost_info.ss_postfix_range_filters_), K(ret)); } else if (OB_FAIL(ObOptSelectivity::calculate_selectivity(*est_cost_info.table_metas_, *est_cost_info.sel_ctx_, est_cost_info.postfix_filters_, @@ -622,6 +628,7 @@ int ObOptEstCost::calculate_filter_selectivity(ObCostTableScanInfo &est_cost_inf K(est_cost_info.prefix_filters_), K(est_cost_info.pushdown_prefix_filters_), K(est_cost_info.postfix_filters_), K(est_cost_info.table_filters_), K(est_cost_info.prefix_filter_sel_), K(est_cost_info.pushdown_prefix_filter_sel_), + K(est_cost_info.ss_postfix_range_filters_), K(est_cost_info.ss_postfix_range_filters_sel_), K(est_cost_info.postfix_filter_sel_), K(est_cost_info.table_filter_sel_)); } return ret; diff --git a/src/sql/optimizer/ob_opt_est_cost_model.cpp b/src/sql/optimizer/ob_opt_est_cost_model.cpp index 6adca2083..98c52e7e0 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.cpp +++ b/src/sql/optimizer/ob_opt_est_cost_model.cpp @@ -56,6 +56,8 @@ int ObCostTableScanInfo::assign(const ObCostTableScanInfo &est_cost_info) pushdown_prefix_filter_sel_ = est_cost_info.pushdown_prefix_filter_sel_; postfix_filter_sel_ = est_cost_info.postfix_filter_sel_; table_filter_sel_ = est_cost_info.table_filter_sel_; + ss_prefix_ndv_ = est_cost_info.ss_prefix_ndv_; + ss_postfix_range_filters_sel_ = est_cost_info.ss_postfix_range_filters_sel_; batch_type_ = est_cost_info.batch_type_; sample_info_ = est_cost_info.sample_info_; // no need to copy table scan param @@ -1243,38 +1245,71 @@ int ObOptEstCostModel::cost_table_one_batch(const ObCostTableScanInfo &est_cost_ double &index_back_cost) { int ret = OB_SUCCESS; + int64_t part_cnt = est_cost_info.index_meta_info_.index_part_count_; + double per_part_log_cnt = logical_row_count / part_cnt; + double per_part_phy_cnt = physical_row_count / part_cnt; if (OB_UNLIKELY(logical_row_count < 0.0) || OB_UNLIKELY(parallel < 1)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("get unexpected error", K(logical_row_count), K(parallel), K(ret)); - } else { - int64_t part_cnt = est_cost_info.index_meta_info_.index_part_count_; - double per_part_log_cnt = logical_row_count / part_cnt; - double per_part_phy_cnt = physical_row_count / part_cnt; - if (ObSimpleBatch::T_GET == type || ObSimpleBatch::T_MULTI_GET == type) { - if (OB_FAIL(cost_table_get_one_batch(est_cost_info, - per_part_log_cnt, - cost, - index_back_cost))) { - LOG_WARN("Failed to estimate get cost", K(ret)); - } else { - cost = cost * part_cnt / parallel; - index_back_cost = index_back_cost * part_cnt / parallel; - } - } else if (ObSimpleBatch::T_SCAN == type || ObSimpleBatch::T_MULTI_SCAN == type) { - if (OB_FAIL(cost_table_scan_one_batch(est_cost_info, - per_part_log_cnt, - per_part_phy_cnt, - cost, - index_back_cost))) { - LOG_WARN("Failed to estimate scan cost", K(ret)); - } else { - cost = cost * part_cnt / parallel; - index_back_cost = index_back_cost * part_cnt / parallel; - } - } else { + } else if (!est_cost_info.ss_ranges_.empty()) { + double ss_prefix_scan_cost = 0.0; + per_part_log_cnt /= est_cost_info.ss_prefix_ndv_; + per_part_phy_cnt /= est_cost_info.ss_prefix_ndv_; + if (OB_UNLIKELY(ObSimpleBatch::T_MULTI_GET != type && ObSimpleBatch::T_MULTI_SCAN != type)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid batch type", K(ret), K(type)); + LOG_WARN("invalid skip scan batch type", K(ret), K(type)); + } else if (OB_FAIL(cost_skip_scan_prefix_scan_one_row(est_cost_info, ss_prefix_scan_cost))) { + LOG_WARN("failed to calc skip scan prefix scan one row cost", K(ret), K(est_cost_info)); + } else if (ObSimpleBatch::T_MULTI_GET == type + && OB_FAIL(cost_table_get_one_batch(est_cost_info, + per_part_log_cnt, + cost, + index_back_cost))) { + LOG_WARN("Failed to estimate get cost", K(ret)); + } else if (ObSimpleBatch::T_MULTI_SCAN == type + && OB_FAIL(cost_table_scan_one_batch(est_cost_info, + per_part_log_cnt, + per_part_phy_cnt, + cost, + index_back_cost))) { + LOG_WARN("Failed to estimate scan cost", K(ret)); + } else { + // cost table scan/get one batch + cost = cost * part_cnt / parallel; + index_back_cost = index_back_cost * part_cnt / parallel; + ss_prefix_scan_cost = ss_prefix_scan_cost * part_cnt / parallel; + + // cost calculate skip scan prefix ndv + ss_prefix_scan_cost *= est_cost_info.ss_prefix_ndv_; + index_back_cost *= est_cost_info.ss_prefix_ndv_; + cost = cost * est_cost_info.ss_prefix_ndv_ + ss_prefix_scan_cost; + LOG_TRACE("OPT:[COST SKIP SCAN]", K(type), K(est_cost_info.ss_prefix_ndv_), + K(per_part_log_cnt), K(ss_prefix_scan_cost), K(cost)); } + } else if (ObSimpleBatch::T_GET == type || ObSimpleBatch::T_MULTI_GET == type) { + if (OB_FAIL(cost_table_get_one_batch(est_cost_info, + per_part_log_cnt, + cost, + index_back_cost))) { + LOG_WARN("Failed to estimate get cost", K(ret)); + } else { + cost = cost * part_cnt / parallel; + index_back_cost = index_back_cost * part_cnt / parallel; + } + } else if (ObSimpleBatch::T_SCAN == type || ObSimpleBatch::T_MULTI_SCAN == type) { + if (OB_FAIL(cost_table_scan_one_batch(est_cost_info, + per_part_log_cnt, + per_part_phy_cnt, + cost, + index_back_cost))) { + LOG_WARN("Failed to estimate scan cost", K(ret)); + } else { + cost = cost * part_cnt / parallel; + index_back_cost = index_back_cost * part_cnt / parallel; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid batch type", K(ret), K(type)); } return ret; } @@ -1457,6 +1492,91 @@ int ObOptEstCostModel::cost_table_get_one_batch_spatial(double row_count, return ret; } +int ObOptEstCostModel::cost_skip_scan_prefix_scan_one_row(const ObCostTableScanInfo &est_cost_info, + double &cost) +{ + int ret = OB_SUCCESS; + const ObTableMetaInfo *table_meta_info = est_cost_info.table_meta_info_; + const ObIndexMetaInfo &index_meta_info = est_cost_info.index_meta_info_; + const double row_count = 1.0; + double project_cost = 0.0; + if (OB_ISNULL(table_meta_info) || + OB_UNLIKELY(table_meta_info->table_row_count_ <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(table_meta_info), K(ret)); + } else if (OB_FAIL(cost_full_table_scan_project(row_count, + est_cost_info, + project_cost))) { + LOG_WARN("failed to cost project", K(ret)); + } else { + //索引总的微块数 = 总大小/微块大小 + double num_micro_blocks = index_meta_info.get_micro_block_numbers(); + //读微块数 = 总微块数 * 读行比例 + double num_micro_blocks_read = 0; + if (OB_LIKELY(table_meta_info->table_row_count_ > 0) && + row_count <= table_meta_info->table_row_count_) { + num_micro_blocks_read = std::ceil(num_micro_blocks + * row_count + / static_cast (table_meta_info->table_row_count_)); + } else { + num_micro_blocks_read = num_micro_blocks; + } + + // revise number of rows if is row sample scan + // 对于行采样,除了微块扫描数外,其他按比例缩小 + // if (est_cost_info.sample_info_.is_row_sample()) { + // row_count *= 0.01 * est_cost_info.sample_info_.percent_; + // } + + // IO代价,主要包括读取微块、反序列化的代价的代价 + double io_cost = 0.0; + double first_block_cost = cost_params_.MICRO_BLOCK_RND_COST; + double rows_in_one_block = static_cast (table_meta_info->table_row_count_) / num_micro_blocks; + rows_in_one_block = rows_in_one_block <= 1 ? 1.000001 : rows_in_one_block; + if (!est_cost_info.pushdown_prefix_filters_.empty()) { + if (est_cost_info.can_use_batch_nlj_) { + first_block_cost = cost_params_.BATCH_NL_SCAN_COST; + } else { + first_block_cost = cost_params_.NL_SCAN_COST; + } + } + if (num_micro_blocks_read < 1) { + io_cost = first_block_cost; + } else { + io_cost = first_block_cost + cost_params_.MICRO_BLOCK_SEQ_COST * (num_micro_blocks_read-1); + } + + // filter cost, skip scan prefix scan do not calculate filter now + double qual_cost = 0.0; + // ObSEArray filters; + // if (OB_FAIL(append(filters, est_cost_info.ss_prefix_filters_))) { need get skip scan prefix filter can not extract prefix range + // LOG_WARN("failed to append fiilters", K(ret)); + // } else { + // qual_cost += cost_quals(row_count, filters); + // } + + // CPU代价,包括get_next_row调用的代价和谓词代价 + double cpu_cost = row_count * cost_params_.CPU_TUPLE_COST + qual_cost; + // 从memtable读取数据的代价,待提供 + double memtable_cost = 0; + // memtable数据和基线数据合并的代价,待提供 + double memtable_merge_cost = 0; + //因为存储层有预期,所以去存储层的IO、CPU代价的最大值 + double scan_cpu_cost = row_count * cost_params_.TABLE_SCAN_CPU_TUPLE_COST + project_cost; + cpu_cost += scan_cpu_cost; + if (io_cost > cpu_cost) { + cost = io_cost + memtable_cost + memtable_merge_cost; + } else { + cost = cpu_cost + memtable_cost + memtable_merge_cost; + } + + LOG_TRACE("OPT:[COST SKIP SCAN PREFIX SCAN ONE ROW]", K(num_micro_blocks), K(table_meta_info->table_row_count_), + K(cost), K(io_cost), K(cpu_cost), K(memtable_cost), K(memtable_merge_cost), K(qual_cost), + K(project_cost), K(num_micro_blocks_read)); + } + return ret; +} + /** * 估算TableScan的代价 * formula: cost = io_cost + memtable_cost + memtable_merge_cost + cpu_cost diff --git a/src/sql/optimizer/ob_opt_est_cost_model.h b/src/sql/optimizer/ob_opt_est_cost_model.h index cb32540f4..0edf219a2 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.h +++ b/src/sql/optimizer/ob_opt_est_cost_model.h @@ -182,9 +182,11 @@ struct ObCostTableScanInfo is_inner_path_(false), can_use_batch_nlj_(false), ranges_(), + ss_ranges_(), range_columns_(), prefix_filters_(), pushdown_prefix_filters_(), + ss_postfix_range_filters_(), postfix_filters_(), table_filters_(), table_metas_(NULL), @@ -195,6 +197,8 @@ struct ObCostTableScanInfo postfix_filter_sel_(1.0), table_filter_sel_(1.0), join_filter_sel_(1.0), + ss_prefix_ndv_(1.0), + ss_postfix_range_filters_sel_(1.0), batch_type_(common::ObSimpleBatch::ObBatchType::T_NONE) { } virtual ~ObCostTableScanInfo() @@ -208,7 +212,8 @@ struct ObCostTableScanInfo K_(is_virtual_table), K_(is_unique), K_(is_inner_path), K_(can_use_batch_nlj), K_(prefix_filter_sel), K_(pushdown_prefix_filter_sel), - K_(postfix_filter_sel), K_(table_filter_sel)); + K_(postfix_filter_sel), K_(table_filter_sel), + K_(ss_prefix_ndv), K_(ss_postfix_range_filters_sel)); // the following information need to be set before estimating cost uint64_t table_id_; // table id uint64_t ref_table_id_; // ref table id @@ -220,6 +225,7 @@ struct ObCostTableScanInfo bool is_inner_path_; bool can_use_batch_nlj_; ObRangesArray ranges_; // all the ranges + ObRangesArray ss_ranges_; // skip scan ranges common::ObSEArray range_columns_; // all the range columns common::ObSEArray access_column_items_; // all the access columns common::ObSEArray index_access_column_items_; // all the access columns @@ -227,6 +233,7 @@ struct ObCostTableScanInfo //这几个filter的分类参考OptimizerUtil::classify_filters() common::ObSEArray prefix_filters_; // filters match index prefix common::ObSEArray pushdown_prefix_filters_; // filters match index prefix along pushed down filter + common::ObSEArray ss_postfix_range_filters_; // range conditions extract postfix range for skip scan common::ObSEArray postfix_filters_; // filters evaluated before index back, but not index prefix common::ObSEArray table_filters_; // filters evaluated after index back @@ -241,6 +248,8 @@ struct ObCostTableScanInfo double postfix_filter_sel_; double table_filter_sel_; double join_filter_sel_; + double ss_prefix_ndv_; // skip scan prefix columns NDV + double ss_postfix_range_filters_sel_; common::ObSimpleBatch::ObBatchType batch_type_; SampleInfo sample_info_; private: @@ -1020,6 +1029,9 @@ protected: const ObCostTableScanInfo &est_cost_info, bool is_scan_index, double &res); + + int cost_skip_scan_prefix_scan_one_row(const ObCostTableScanInfo &est_cost_info, + double &cost); protected: const double (&comparison_params_)[common::ObMaxTC + 1]; const double (&hash_params_)[common::ObMaxTC + 1]; diff --git a/src/sql/optimizer/ob_opt_selectivity.cpp b/src/sql/optimizer/ob_opt_selectivity.cpp index 1161ab9dc..5e6ed766f 100644 --- a/src/sql/optimizer/ob_opt_selectivity.cpp +++ b/src/sql/optimizer/ob_opt_selectivity.cpp @@ -735,12 +735,12 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met int ret = OB_SUCCESS; const OptTableMeta *base_table_meta = base_table_metas.get_table_meta_by_table_id(table_id); OptTableMeta *table_meta = NULL; - const ObDMLStmt *stmt = NULL; + const ObLogPlan *log_plan = NULL; ObSEArray column_sel_infos; filtered_rows = filtered_rows < 1.0 ? 1.0 : filtered_rows; - if (OB_ISNULL(base_table_meta) || OB_ISNULL(stmt = ctx.get_stmt())) { + if (OB_ISNULL(base_table_meta) || OB_ISNULL(log_plan = ctx.get_plan())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(base_table_meta), K(stmt)); + LOG_WARN("get unexpected null", K(ret), K(base_table_meta), K(log_plan)); } else if (OB_FAIL(update_table_metas.copy_table_meta_info(*base_table_meta, table_meta))) { LOG_WARN("failed to copy table meta info", K(ret)); } else { @@ -794,7 +794,7 @@ int ObOptSelectivity::update_table_meta_info(const OptTableMetas &base_table_met // update null number if (null_num > 0) { bool null_reject = false; - const ObColumnRefRawExpr *column_expr = stmt->get_column_expr_by_id( + const ObColumnRefRawExpr *column_expr = log_plan->get_column_expr_by_id( table_meta->get_table_id(), column_meta.get_column_id()); if (OB_ISNULL(column_expr)) { ret = OB_ERR_UNEXPECTED; @@ -3512,7 +3512,7 @@ int ObOptSelectivity::get_column_query_range(const OptSelectivityCtx &ctx, ObQueryRangeArray &ranges) { int ret = OB_SUCCESS; - const ObDMLStmt *stmt = ctx.get_stmt(); + const ObLogPlan *log_plan = ctx.get_plan(); const ParamStore *params = ctx.get_params(); ObExecContext *exec_ctx = ctx.get_opt_ctx().get_exec_ctx(); ObIAllocator &allocator = ctx.get_allocator(); @@ -3520,10 +3520,10 @@ int ObOptSelectivity::get_column_query_range(const OptSelectivityCtx &ctx, const ColumnItem* column_item = NULL; ObGetMethodArray get_methods; - if (OB_ISNULL(stmt) || OB_ISNULL(exec_ctx) || - OB_ISNULL(column_item = stmt->get_column_item_by_id(table_id, column_id))) { + if (OB_ISNULL(log_plan) || OB_ISNULL(exec_ctx) || + OB_ISNULL(column_item = log_plan->get_column_item_by_id(table_id, column_id))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null", K(ret), K(stmt), K(exec_ctx), K(column_item)); + LOG_WARN("get unexpected null", K(ret), K(log_plan), K(exec_ctx), K(column_item)); } else if (OB_FAIL(column_items.push_back(*column_item))) { LOG_WARN("failed to push back column item", K(ret)); } else if (OB_FAIL(query_range.preliminary_extract_query_range(column_items, diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l old mode 100644 new mode 100755 index 83b063a3a..41b25c72a --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -881,6 +881,9 @@ Timestamp{whitespace}?\"[^\"]*\" { NO_INDEX { return NO_INDEX_HINT; } USE_DAS { return USE_DAS_HINT; } NO_USE_DAS { return NO_USE_DAS_HINT; } +INDEX_SS { return INDEX_SS_HINT; } +INDEX_SS_ASC { return INDEX_SS_ASC_HINT; } +INDEX_SS_DESC { return INDEX_SS_DESC_HINT; } USE_NL { return USE_NL; } NO_USE_NL { return NO_USE_NL; } USE_NL_MATERIALIZATION { return USE_NL_MATERIALIZATION; } diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y old mode 100644 new mode 100755 index 940974855..52ae511d1 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -166,7 +166,9 @@ COALESCE_SQ NO_COALESCE_SQ COUNT_TO_EXISTS NO_COUNT_TO_EXISTS LEFT_TO_ANTI NO_LE ELIMINATE_JOIN NO_ELIMINATE_JOIN PUSH_LIMIT NO_PUSH_LIMIT PULLUP_EXPR NO_PULLUP_EXPR WIN_MAGIC NO_WIN_MAGIC // optimize hint -INDEX_HINT FULL_HINT NO_INDEX_HINT USE_DAS_HINT NO_USE_DAS_HINT LEADING_HINT ORDERED +INDEX_HINT FULL_HINT NO_INDEX_HINT USE_DAS_HINT NO_USE_DAS_HINT +INDEX_SS_HINT INDEX_SS_ASC_HINT INDEX_SS_DESC_HINT +LEADING_HINT ORDERED USE_NL USE_MERGE USE_HASH NO_USE_HASH NO_USE_MERGE NO_USE_NL USE_NL_MATERIALIZATION NO_USE_NL_MATERIALIZATION USE_HASH_AGGREGATION NO_USE_HASH_AGGREGATION @@ -8680,6 +8682,18 @@ INDEX_HINT '(' qb_name_option relation_factor_in_hint NAME_OB ')' { malloc_non_terminal_node($$, result->malloc_pool_, T_NO_USE_DAS_HINT, 2, $3, $4); } +| INDEX_SS_HINT '(' qb_name_option relation_factor_in_hint NAME_OB ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_SS_HINT, 3, $3, $4, $5); +} +| INDEX_SS_ASC_HINT '(' qb_name_option relation_factor_in_hint NAME_OB ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_SS_ASC_HINT, 3, $3, $4, $5); +} +| INDEX_SS_DESC_HINT '(' qb_name_option relation_factor_in_hint NAME_OB ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_SS_DESC_HINT, 3, $3, $4, $5); +} | LEADING_HINT '(' qb_name_option relation_factor_in_leading_hint_list ')' { malloc_non_terminal_node($$, result->malloc_pool_, T_LEADING, 2, $3, $4); diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp old mode 100644 new mode 100755 index abb0d1371..c3e60c9bd --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -9485,7 +9485,10 @@ int ObDMLResolver::resolve_optimize_hint(const ParseNode &hint_node, case T_NO_INDEX_HINT: case T_FULL_HINT: case T_USE_DAS_HINT: - case T_NO_USE_DAS_HINT: { + case T_NO_USE_DAS_HINT: + case T_INDEX_SS_HINT: + case T_INDEX_SS_ASC_HINT: + case T_INDEX_SS_DESC_HINT: { if (OB_FAIL(resolve_index_hint(hint_node, opt_hint))) { LOG_WARN("failed to resolve index hint", K(ret)); } diff --git a/src/sql/resolver/dml/ob_hint.cpp b/src/sql/resolver/dml/ob_hint.cpp old mode 100644 new mode 100755 index bd3dbebd4..72bd4e0a0 --- a/src/sql/resolver/dml/ob_hint.cpp +++ b/src/sql/resolver/dml/ob_hint.cpp @@ -780,6 +780,9 @@ const char* ObHint::get_hint_name(ObItemType type, bool is_enable_hint /* defaul case T_FULL_HINT: return "FULL"; case T_NO_INDEX_HINT: return "NO_INDEX"; case T_USE_DAS_HINT: return is_enable_hint ? "USE_DAS" : "NO_USE_DAS"; + case T_INDEX_SS_HINT: return "INDEX_SS"; + case T_INDEX_SS_ASC_HINT: return "INDEX_SS_ASC"; + case T_INDEX_SS_DESC_HINT: return "INDEX_SS_DESC"; case T_LEADING: return is_enable_hint ? "LEADING" : "ORDERED"; case T_USE_MERGE: return is_enable_hint ? "USE_MERGE" : "NO_USE_MERGE"; case T_USE_HASH: return is_enable_hint ? "USE_HASH" : "NO_USE_HASH"; diff --git a/src/sql/resolver/dml/ob_hint.h b/src/sql/resolver/dml/ob_hint.h index 959549555..adb89c772 100644 --- a/src/sql/resolver/dml/ob_hint.h +++ b/src/sql/resolver/dml/ob_hint.h @@ -700,6 +700,9 @@ public: const ObTableInHint &get_table() const { return table_; } ObString &get_index_name() { return index_name_; } const ObString &get_index_name() const { return index_name_; } + bool is_use_index_hint() const { return T_NO_INDEX_HINT != get_hint_type(); } + bool use_skip_scan() const { return T_INDEX_SS_HINT == get_hint_type(); } + INHERIT_TO_STRING_KV("ObHint", ObHint, K_(table), K_(index_name)); private: diff --git a/src/sql/resolver/dml/ob_sql_hint.cpp b/src/sql/resolver/dml/ob_sql_hint.cpp index 8369a62e2..9fd771e61 100644 --- a/src/sql/resolver/dml/ob_sql_hint.cpp +++ b/src/sql/resolver/dml/ob_sql_hint.cpp @@ -1171,7 +1171,10 @@ int ObStmtHint::merge_other_opt_hint(const ObIArray &hints, switch (hint->get_hint_type()) { case T_INDEX_HINT: case T_NO_INDEX_HINT: - case T_FULL_HINT: { + case T_FULL_HINT: + case T_INDEX_SS_HINT: + case T_INDEX_SS_ASC_HINT: + case T_INDEX_SS_DESC_HINT: { hint_type = T_INDEX_HINT; break; } @@ -1797,6 +1800,34 @@ int ObLogPlanHint::check_use_das(uint64_t table_id, bool &force_das, bool &force return ret; } +int ObLogPlanHint::check_use_skip_scan(uint64_t table_id, + uint64_t index_id, + bool &force_skip_scan, + bool &force_no_skip_scan) const +{ + int ret = OB_SUCCESS; + force_skip_scan = false; + force_no_skip_scan = false; + const LogTableHint *log_table_hint = get_log_table_hint(table_id); + int64_t pos = OB_INVALID_INDEX; + if (NULL != log_table_hint && + ObOptimizerUtil::find_item(log_table_hint->index_list_, index_id, &pos)) { + const ObIndexHint *hint = NULL; + if (OB_UNLIKELY(pos >= log_table_hint->index_hints_.count() || pos < 0) + || OB_ISNULL(hint = log_table_hint->index_hints_.at(pos))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected pos", K(ret), K(pos), K(log_table_hint->index_hints_.count()), K(hint)); + } else { + force_skip_scan = hint->use_skip_scan(); + force_no_skip_scan = !force_skip_scan && hint->is_use_index_hint(); + } + } + if (OB_SUCC(ret) && !force_skip_scan && !force_no_skip_scan && is_outline_data_) { + force_no_skip_scan = true; + } + return ret; +} + int ObLogPlanHint::check_use_join_filter(uint64_t filter_table_id, const ObRelIds &left_tables, bool part_join_filter, @@ -2183,7 +2214,6 @@ int LogTableHint::assign(const LogTableHint &other) { int ret = OB_SUCCESS; table_ = other.table_; - index_type_ = other.index_type_; parallel_hint_ = other.parallel_hint_; use_das_hint_ = other.use_das_hint_; if (OB_FAIL(index_list_.assign(other.index_list_))) { @@ -2223,9 +2253,10 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) } else { LOG_TRACE("get readable index", K(table_index_count)); const share::schema::ObTableSchema *index_schema = NULL; - ObItemType index_hint_type = T_INVALID; ObSEArray index_list; - ObSEArray hints; + ObSEArray no_index_list; + ObSEArray index_hints; + ObSEArray no_index_hints; for (int64_t i = -1; OB_SUCC(ret) && i < table_index_count; ++i) { uint64_t index_id = -1 == i ? table_->ref_id_ : tids[i]; ObString index_name; @@ -2244,54 +2275,63 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) } if (OB_SUCC(ret) && (!index_name.empty())) { - int64_t hint_pos = OB_INVALID_INDEX; + int64_t no_index_hint_pos = OB_INVALID_INDEX; + int64_t index_hint_pos = OB_INVALID_INDEX; + int64_t index_ss_hint_pos = OB_INVALID_INDEX; const uint64_t N = index_hints_.count(); const ObIndexHint *index_hint = NULL; - bool use_index = false; - bool no_use_index = false; for (int64_t hint_i = 0; OB_SUCC(ret) && hint_i < N; ++hint_i) { if (OB_ISNULL(index_hint = index_hints_.at(hint_i)) || OB_UNLIKELY(!index_hint->is_access_path_hint())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null index hint", K(ret), K(index_hint)); - } else if ((is_primary_key && T_FULL_HINT == index_hint->get_hint_type()) - || 0 == index_hint->get_index_name().case_compare(index_name)) { - hint_pos = hint_i; - use_index |= T_INDEX_HINT == index_hint->get_hint_type() - || T_FULL_HINT == index_hint->get_hint_type(); - no_use_index |= T_NO_INDEX_HINT == index_hint->get_hint_type(); + } else if (is_primary_key && T_FULL_HINT == index_hint->get_hint_type()) { + index_hint_pos = hint_i; + } else if (0 != index_hint->get_index_name().case_compare(index_name)) { + /* do nothing */ + } else if (T_NO_INDEX_HINT == index_hint->get_hint_type()) { + no_index_hint_pos = hint_i; + } else if (T_INDEX_SS_HINT == index_hint->get_hint_type()) { + index_ss_hint_pos = hint_i; + } else { + index_hint_pos = hint_i; } } - if (hint_pos >= 0 && hint_pos < N) { - if (use_index && no_use_index) { - /* conflict full/index and no_index hint*/ - } else if (no_use_index && T_INDEX_HINT == index_hint_type) { - /* get vaild index hint, ignore this no_index hint*/ - } else { - if (use_index && T_NO_INDEX_HINT == index_hint_type) { - hints.reuse(); - index_list.reuse(); - } - index_hint_type = use_index ? T_INDEX_HINT : T_NO_INDEX_HINT; - if (OB_FAIL(index_list.push_back(index_id))) { - LOG_WARN("fail to push back", K(ret), K(index_id)); - } else if (OB_FAIL(hints.push_back(index_hints_.at(hint_pos)))) { - LOG_WARN("fail to push back", K(ret), K(hint_pos)); - } + if (OB_FAIL(ret)) { + } else if (OB_INVALID_INDEX != no_index_hint_pos + && (OB_INVALID_INDEX != index_ss_hint_pos + || OB_INVALID_INDEX != index_hint_pos)) { + /* conflict full/index/index_ss and no_index hint*/ + } else if (OB_INVALID_INDEX != no_index_hint_pos) { + if (OB_FAIL(no_index_list.push_back(index_id))) { + LOG_WARN("fail to push back", K(ret), K(index_id)); + } else if (OB_FAIL(no_index_hints.push_back(index_hints_.at(no_index_hint_pos)))) { + LOG_WARN("fail to push back", K(ret), K(no_index_hint_pos)); + } + } else if (OB_INVALID_INDEX != index_ss_hint_pos + || OB_INVALID_INDEX != index_hint_pos) { + int64_t hint_pos = OB_INVALID_INDEX != index_ss_hint_pos + ? index_ss_hint_pos : index_hint_pos; + if (OB_FAIL(index_list.push_back(index_id))) { + LOG_WARN("fail to push back", K(ret), K(index_id)); + } else if (OB_FAIL(index_hints.push_back(index_hints_.at(hint_pos)))) { + LOG_WARN("fail to push back", K(ret), K(hint_pos)); } } } } - if (OB_FAIL(ret)) { - } else if (OB_UNLIKELY(index_list.count() != hints.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected log index hint", K(ret), K(index_list), K(hints)); - } else if (OB_FAIL(index_list_.assign(index_list))) { - LOG_WARN("failed to assign array", K(ret)); - } else if (OB_FAIL(index_hints_.assign(hints))) { - LOG_WARN("failed to assign array", K(ret)); - } else { - index_type_ = index_hint_type; + if (OB_SUCC(ret)) { + if (!index_list.empty()) { + if (OB_FAIL(index_list_.assign(index_list))) { + LOG_WARN("failed to assign array", K(ret)); + } else if (OB_FAIL(index_hints_.assign(index_hints))) { + LOG_WARN("failed to assign array", K(ret)); + } + } else if (OB_FAIL(index_list_.assign(no_index_list))) { + LOG_WARN("failed to assign array", K(ret)); + } else if (OB_FAIL(index_hints_.assign(no_index_hints))) { + LOG_WARN("failed to assign array", K(ret)); + } } } return ret; @@ -2367,5 +2407,31 @@ int LogTableHint::add_join_filter_hint(const ObDMLStmt &stmt, return ret; } +int LogTableHint::allowed_skip_scan(const uint64_t index_id, bool &allowed) const +{ + int ret = OB_SUCCESS; + allowed = false; + if (!is_use_index_hint()) { + /* do nothing */ + } else if (OB_UNLIKELY(index_list_.count() != index_hints_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected count", K(ret), K(index_list_.count()), K(index_hints_.count())); + } else { + bool find = false; + for (int64_t i = 0; OB_SUCC(ret) && !find && i < index_list_.count(); ++i) { + if (index_list_.at(i) != index_id) { + /* do nothing */ + } else if (OB_ISNULL(index_hints_.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(i), K(index_hints_)); + } else { + allowed = T_INDEX_SS_HINT == index_hints_.at(i)->get_hint_type(); + find = true; + } + } + } + return ret; +} + }//end of namespace sql }//end of namespace oceanbase diff --git a/src/sql/resolver/dml/ob_sql_hint.h b/src/sql/resolver/dml/ob_sql_hint.h old mode 100644 new mode 100755 index b967bc31e..f686b80c3 --- a/src/sql/resolver/dml/ob_sql_hint.h +++ b/src/sql/resolver/dml/ob_sql_hint.h @@ -280,17 +280,15 @@ struct LogJoinHint struct LogTableHint { LogTableHint() : table_(NULL), - index_type_(T_INVALID), parallel_hint_(NULL), use_das_hint_(NULL) {} LogTableHint(const TableItem *table) : table_(table), - index_type_(T_INVALID), parallel_hint_(NULL), use_das_hint_(NULL) {} int assign(const LogTableHint &other); int init_index_hints(ObSqlSchemaGuard &schema_guard); - bool is_no_index_hint() const { return T_NO_INDEX_HINT == index_type_; } - bool is_index_hint() const { return T_INDEX_HINT == index_type_; } + bool is_use_index_hint() const { return !index_hints_.empty() && NULL != index_hints_.at(0) + && index_hints_.at(0)->is_use_index_hint(); } bool is_valid() const { return !index_list_.empty() || NULL != parallel_hint_ || NULL != use_das_hint_ || !join_filter_hints_.empty(); } int get_join_filter_hint(const ObRelIds &left_tables, @@ -299,14 +297,13 @@ struct LogTableHint int add_join_filter_hint(const ObDMLStmt &stmt, const ObQueryHint &query_hint, const ObJoinFilterHint &hint); + int allowed_skip_scan(const uint64_t index_id, bool &allowed) const; - TO_STRING_KV(K_(table), K_(index_type), - K_(index_list), K_(index_hints), + TO_STRING_KV(K_(table), K_(index_list), K_(index_hints), K_(parallel_hint), K_(use_das_hint), K_(join_filter_hints), K_(left_tables)); const TableItem *table_; - ObItemType index_type_; common::ObSEArray index_list_; common::ObSEArray index_hints_; const ObTableParallelHint *parallel_hint_; @@ -397,6 +394,9 @@ struct ObLogPlanHint bool &can_use, const ObJoinFilterHint *&force_hint) const; int check_use_das(uint64_t table_id, bool &force_das, bool &force_no_das) const; + int check_use_skip_scan(uint64_t table_id, uint64_t index_id, + bool &force_skip_scan, + bool &force_no_skip_scan) const; const LogJoinHint* get_join_hint(const ObRelIds &join_tables) const; const ObIArray &get_join_hints() const { return join_hints_; } SetAlgo get_valid_set_algo() const; diff --git a/src/sql/rewrite/ob_query_range.cpp b/src/sql/rewrite/ob_query_range.cpp index 703121dc3..30de5db96 100644 --- a/src/sql/rewrite/ob_query_range.cpp +++ b/src/sql/rewrite/ob_query_range.cpp @@ -73,6 +73,7 @@ ObQueryRange::ObQueryRange() query_range_ctx_(NULL), key_part_store_(allocator_), range_exprs_(allocator_), + ss_range_exprs_(allocator_), mbr_filters_(allocator_), has_exec_param_(true), is_equal_and_(false), @@ -93,6 +94,7 @@ ObQueryRange::ObQueryRange(ObIAllocator &alloc) query_range_ctx_(NULL), key_part_store_(allocator_), range_exprs_(allocator_), + ss_range_exprs_(allocator_), mbr_filters_(allocator_), has_exec_param_(true), is_equal_and_(false), @@ -131,6 +133,7 @@ void ObQueryRange::reset() contain_geo_filters_ = false; table_graph_.reset(); range_exprs_.reset(); + ss_range_exprs_.reset(); inner_allocator_.reset(); has_exec_param_ = true; is_equal_and_ = false; @@ -290,17 +293,10 @@ int ObQueryRange::preliminary_extract_query_range(const ColumnIArray &range_colu if (OB_SUCC(ret) && NULL != root) { if (OB_FAIL(refine_large_range_graph(root))) { LOG_WARN("failed to refine large range graph", K(ret)); - } else if (OB_FAIL(remove_useless_range_graph(root))) { - LOG_WARN("failed to remove useless range", K(ret)); + } else if (OB_FAIL(check_graph_type(*root))) { + LOG_WARN("check graph type failed", K(ret)); } else if (OB_FAIL(generate_expr_final_info())) { - LOG_WARN("failed to generate final exprs"); - } else { - SQL_REWRITE_LOG(DEBUG, "root key part", K(*root)); - int64_t max_pos = -1; - table_graph_.key_part_head_ = root; - table_graph_.is_standard_range_ = is_standard_graph(root); - OZ(is_strict_equal_graph(root, 0, max_pos, table_graph_.is_equal_range_)); - OZ(check_graph_type()); + LOG_WARN("failed to generate final exprs"); } } } @@ -418,24 +414,13 @@ int ObQueryRange::preliminary_extract_query_range(const ColumnIArray &range_colu // no range left } else if (OB_FAIL(refine_large_range_graph(temp_result))) { LOG_WARN("failed to refine large range graph", K(ret)); - } else if (OB_FAIL(remove_useless_range_graph(temp_result))) { - LOG_WARN("failed to remove useless range", K(ret)); + } else if (OB_FAIL(check_graph_type(*temp_result))) { + LOG_WARN("check graph type failed", K(ret)); } else if (OB_FAIL(generate_expr_final_info())) { LOG_WARN("failed to generate final exprs"); - } else { - int64_t max_pos = -1; - table_graph_.key_part_head_ = temp_result; - table_graph_.is_standard_range_ = is_standard_graph(temp_result); - if (OB_FAIL(is_strict_equal_graph(temp_result, - 0, - max_pos, - table_graph_.is_equal_range_))) { - LOG_WARN("is strict equal graph failed", K(ret)); - } else if (OB_FAIL(check_graph_type())) { - LOG_WARN("check graph type failed", K(ret)); - } } } + if (OB_SUCC(ret)) { if (query_range_ctx_->need_final_extact_) { state_ = NEED_PREPARE_PARAMS; @@ -715,63 +700,172 @@ int ObQueryRange::check_is_get(ObKeyPart &key_part, return ret; } -int ObQueryRange::check_graph_type() +// 1. check range graph type: is_standard_range_/is_equal_range_/is_precise_get_/is_skip_scan_ +// 3. remove useless key part +int ObQueryRange::check_graph_type(ObKeyPart &key_part_head) { int ret = OB_SUCCESS; - table_graph_.is_precise_get_ = true; - if (OB_ISNULL(query_range_ctx_) || OB_ISNULL(table_graph_.key_part_head_)) { - ret = OB_NOT_INIT; - LOG_WARN("query isn't init", K_(query_range_ctx), K_(table_graph_.key_part_head)); + int64_t max_pos = -1; + int64_t max_precise_pos = -1; + int64_t ss_max_precise_pos = -1; + table_graph_.key_part_head_ = &key_part_head; + table_graph_.is_standard_range_ = is_standard_graph(&key_part_head); + table_graph_.is_precise_get_ = is_precise_get(key_part_head, max_precise_pos); + table_graph_.skip_scan_offset_ = -1; + ObKeyPart *ss_head = NULL; + if (OB_FAIL(check_skip_scan_range(&key_part_head, + table_graph_.is_standard_range_, + max_precise_pos, + ss_head, + table_graph_.skip_scan_offset_, + ss_max_precise_pos))) { + LOG_WARN("failed to check skip scan", K(ret)); + } else if (OB_FAIL(is_strict_equal_graph(&key_part_head, 0, max_pos, table_graph_.is_equal_range_))) { + LOG_WARN("is strict equal graph failed", K(ret)); + } else if (OB_FAIL(remove_useless_range_graph(is_ss_range() ? ss_head : &key_part_head))) { + LOG_WARN("failed to remove useless range", K(ret)); + } else if (OB_FAIL(remove_precise_range_expr(is_ss_range() ? ss_max_precise_pos : max_precise_pos))) { + LOG_WARN("remove precise range expr failed", K(ret)); + } else if (OB_FAIL(fill_range_exprs(max_precise_pos, table_graph_.skip_scan_offset_, ss_max_precise_pos))) { + LOG_WARN("failed to fill range exprs", K(ret)); } - if (OB_SUCC(ret)) { - int64_t max_pos = -1; - int64_t depth = -1; - int64_t column_count = column_count_; - bool is_terminated = false; - for (ObKeyPart *cur = table_graph_.key_part_head_; !is_terminated && NULL != cur; cur = cur->and_next_) { - if (cur->pos_.offset_ != (++depth)) { - table_graph_.is_precise_get_ = false; - max_pos = depth; - is_terminated = true; - } else if (NULL != cur->or_next_ || NULL != cur->item_next_) { - table_graph_.is_precise_get_ = false; - } else if (cur->is_like_key() || cur->is_geo_key()) { - table_graph_.is_precise_get_ = false; - } else if (!cur->is_equal_condition()) { - table_graph_.is_precise_get_ = false; - } else { + return ret; +} + +int ObQueryRange::check_skip_scan_range(ObKeyPart *key_part_head, + const bool is_standard_range, + const int64_t max_precise_pos, + ObKeyPart *&ss_head, + int64_t &skip_scan_offset, + int64_t &ss_max_precise_pos) +{ + int ret = OB_SUCCESS; + ss_head = NULL; + skip_scan_offset = -1; + ss_max_precise_pos = -1; + if (!is_standard_range) { + /* only standard range can extract skip scan range */ + } else { + ObKeyPart *cur = key_part_head; + // skip prefix precise range + while (NULL != cur && cur->pos_.offset_ < max_precise_pos) { + cur = cur->and_next_; + } + if (NULL != cur) { + ss_head = cur; + skip_scan_offset = ss_head->pos_.offset_; + is_precise_get(*ss_head, ss_max_precise_pos, true); + } + } + return ret; +} + +int ObQueryRange::reset_skip_scan_range() +{ + int ret = OB_SUCCESS; + if (-1 == table_graph_.skip_scan_offset_) { + /* do nothing */ + } else if (OB_ISNULL(table_graph_.key_part_head_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(table_graph_.key_part_head_)); + } else { + int64_t max_precise_pos = -1; + table_graph_.is_precise_get_ = is_precise_get(*table_graph_.key_part_head_, max_precise_pos); + table_graph_.skip_scan_offset_ = -1; + ss_range_exprs_.reset(); + if (OB_FAIL(remove_useless_range_graph(table_graph_.key_part_head_))) { + LOG_WARN("failed to remove useless range", K(ret)); + } else if (OB_FAIL(remove_precise_range_expr(max_precise_pos))) { + LOG_WARN("remove precise range expr failed", K(ret)); + } + } + return ret; +} + +bool ObQueryRange::is_precise_get(const ObKeyPart &key_part_head, + int64_t &max_precise_pos, + bool ignore_head /* = false */) +{ + bool is_precise_get = true; + int64_t max_pos = -1; + int64_t depth = ignore_head ? key_part_head.pos_.offset_ - 1 : -1; + bool is_terminated = false; + for (const ObKeyPart *cur = &key_part_head; !is_terminated && NULL != cur; cur = cur->and_next_) { + if (cur->pos_.offset_ != (++depth)) { + is_precise_get = false; + max_pos = depth; + is_terminated = true; + } else if (NULL != cur->or_next_ || NULL != cur->item_next_) { + is_precise_get = false; + } else if (cur->is_like_key() || cur->is_geo_key()) { + is_precise_get = false; + } else if (!cur->is_equal_condition()) { + is_precise_get = false; + } else { + // do nothing + } + if (!is_terminated) { + if (is_strict_in_graph(cur)) { // do nothing + } else if (!is_general_graph(*cur)) { + max_pos = cur->pos_.offset_ + 1; + is_terminated = true; + } else if (has_scan_key(*cur)) { + max_pos = cur->pos_.offset_ + 1; + is_terminated = true; } - if (OB_SUCC(ret) && !is_terminated) { - if (is_strict_in_graph(cur)) { - // do nothing - } else if (!is_general_graph(*cur)) { - max_pos = cur->pos_.offset_ + 1; - is_terminated = true; - } else if (has_scan_key(*cur)) { - max_pos = cur->pos_.offset_ + 1; - is_terminated = true; + } + } + + max_precise_pos = is_terminated ? max_pos : depth + 1; + if (is_precise_get && depth != column_count_ - 1) { + is_precise_get = false; + } + return is_precise_get; +} + +int ObQueryRange::fill_range_exprs(const int64_t max_precise_pos, + const int64_t ss_offset, + const int64_t ss_max_precise_pos) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(query_range_ctx_)) { + ret = OB_NOT_INIT; + LOG_WARN("query isn't init", K_(query_range_ctx)); + } else { + ObSEArray range_exprs; + ObSEArray ss_range_exprs; + bool precise = true; + bool ss_precise = true; + for (int64_t i = 0; OB_SUCC(ret) && i < query_range_ctx_->precise_range_exprs_.count(); ++i) { + precise = true; + ss_precise = is_ss_range(); + ObRangeExprItem &expr_item = query_range_ctx_->precise_range_exprs_.at(i); + for (int64_t j = 0 ; (precise || ss_precise) && j < expr_item.cur_pos_.count() ; ++j) { + if (expr_item.cur_pos_.at(j) >= max_precise_pos) { + precise = false; + } + if (ss_precise && (expr_item.cur_pos_.at(j) < ss_offset || expr_item.cur_pos_.at(j) >= ss_max_precise_pos)) { + ss_precise = false; + } + } + + if (OB_SUCC(ret) && NULL != expr_item.cur_expr_) { + if (precise && OB_FAIL(range_exprs.push_back(const_cast(expr_item.cur_expr_)))) { + LOG_WARN("push back precise range expr failed", K(ret)); + } else if (ss_precise && OB_FAIL(ss_range_exprs.push_back(const_cast(expr_item.cur_expr_)))) { + LOG_WARN("push back precise range expr failed", K(ret)); } } } - if (OB_SUCC(ret)) { - max_pos = is_terminated ? max_pos : depth + 1; - if (OB_FAIL(remove_precise_range_expr(max_pos))) { - LOG_WARN("remove precise range expr failed", K(ret)); - } else if (table_graph_.is_precise_get_ && depth != column_count - 1) { - table_graph_.is_precise_get_ = false; - } - } - } - if (OB_SUCC(ret) && OB_FAIL(range_exprs_.init(query_range_ctx_->precise_range_exprs_.count()))) { - LOG_WARN("init range exprs failed", K(ret)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < query_range_ctx_->precise_range_exprs_.count(); ++i) { - const ObRawExpr *cur_expr = query_range_ctx_->precise_range_exprs_.at(i).cur_expr_; - if (NULL != cur_expr) { - if (OB_FAIL(range_exprs_.push_back(const_cast(cur_expr)))) { - LOG_WARN("push back precise range expr failed", K(ret)); - } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(range_exprs_.assign(range_exprs))) { + LOG_WARN("failed to assign range exprs", K(ret)); + } else if (OB_FAIL(ss_range_exprs_.assign(ss_range_exprs))) { + LOG_WARN("failed to assign skip scan range exprs", K(ret)); + } else { + LOG_DEBUG("finish fill range exprs", K(max_precise_pos), K(range_exprs)); + LOG_DEBUG("finish fill skip scan range exprs", K(ss_offset), K(ss_max_precise_pos), K(ss_range_exprs)); } } return ret; @@ -4737,6 +4831,31 @@ int ObQueryRange::get_tablet_ranges(common::ObIAllocator &allocator, return ret; } +int ObQueryRange::get_ss_tablet_ranges(common::ObIAllocator &allocator, + ObExecContext &exec_ctx, + ObQueryRangeArray &ss_ranges, + const ObDataTypeCastParams &dtc_params) const +{ + int ret = OB_SUCCESS; + ss_ranges.reuse(); + const ObKeyPart *ss_head = get_ss_key_part_head(); + if (NULL == ss_head) { + /* is not skip scan range */ + } else if (OB_UNLIKELY(table_graph_.skip_scan_offset_ < 0 + || table_graph_.skip_scan_offset_ >= column_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected skip scan range", K(ret), K(table_graph_.skip_scan_offset_), + K(column_count_)); + } else if (OB_FAIL(gen_skip_scan_range(allocator, exec_ctx, dtc_params, ss_head, + column_count_ - table_graph_.skip_scan_offset_, + ss_ranges))) { + LOG_WARN("get skip scan ranges failed", K(ret)); + } else { + LOG_DEBUG("get skip range success", K(ss_ranges)); + } + return ret; +} + int ObQueryRange::ObSearchState::tailor_final_range(int64_t column_count) { int ret = OB_SUCCESS; @@ -4781,6 +4900,54 @@ int ObQueryRange::ObSearchState::tailor_final_range(int64_t column_count) return ret; } +int ObQueryRange::ObSearchState::init_search_state(int64_t column_count, bool init_as_full_range) +{ + int ret = OB_SUCCESS; + void *start_ptr = NULL; + void *end_ptr = NULL; + if (OB_UNLIKELY(column_count <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column count when init search state", K(ret), K(column_count)); + } else if (OB_ISNULL(start_ptr = allocator_.alloc(sizeof(ObObj) * column_count))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("alloc memory for start_ptr failed", K(ret)); + } else if(OB_ISNULL(end_ptr = allocator_.alloc(sizeof(ObObj) * column_count))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("alloc memory for end_ptr failed", K(ret)); + } else if (OB_ISNULL(include_start_ = static_cast(allocator_.alloc(sizeof(bool) * column_count)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("alloc memory for search state start failed", K(ret)); + } else if (OB_ISNULL(include_end_ = static_cast(allocator_.alloc(sizeof(bool) * column_count)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("alloc memory for search state end failed", K(ret)); + } else { + start_ = new(start_ptr) ObObj[column_count]; + end_ = new(end_ptr) ObObj[column_count]; + if (init_as_full_range) { + max_exist_index_ = column_count; + last_include_start_ = true; + last_include_end_ = true; + for (int64_t i = 0; i < column_count; ++i) { + start_[i].set_min_value(); + end_[i].set_max_value(); + include_start_[i] = false; + include_end_[i] = false; + } + } else { + max_exist_index_ = 0; + last_include_start_ = false; + last_include_end_ = false; + for (int64_t i = 0; i < column_count; ++i) { + start_[i].set_min_value(); + end_[i].set_max_value(); + include_start_[i] = false; + include_end_[i] = false; + } + } + } + return ret; +} + // @notice 调用这个接口之前必须调用need_deep_copy()来判断是否可以不用拷贝就进行final extract int ObQueryRange::get_tablet_ranges(ObIAllocator &allocator, ObExecContext &exec_ctx, @@ -4814,6 +4981,60 @@ int ObQueryRange::get_tablet_ranges(ObIAllocator &allocator, return ret; } +// for standard range, check is skip scan range +const ObKeyPart *ObQueryRange::get_ss_key_part_head() const +{ + const ObKeyPart *ss_head = NULL; + if (is_ss_range()) { + const ObKeyPart *cur = table_graph_.key_part_head_; + while (NULL != cur && cur->pos_.offset_ < table_graph_.skip_scan_offset_) { + cur = cur->and_next_; + } + if (NULL != cur && cur->pos_.offset_ == table_graph_.skip_scan_offset_) { + ss_head = cur; + } + } + return ss_head; +} + +OB_NOINLINE int ObQueryRange::gen_skip_scan_range(ObIAllocator &allocator, + ObExecContext &exec_ctx, + const ObDataTypeCastParams &dtc_params, + const ObKeyPart *ss_root, + int64_t post_column_count, + ObQueryRangeArray &ss_ranges) const +{ + int ret = OB_SUCCESS; + bool is_get_range = false; + ObSearchState search_state(allocator); + ObNewRange *ss_range = NULL; + if (OB_ISNULL(ss_root) || OB_UNLIKELY(1 > post_column_count)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected skip scan range", K(ret), K(ss_root), K(post_column_count)); + } else if (OB_FAIL(search_state.init_search_state(post_column_count, true))) { + LOG_WARN("failed to init postfix search state", K(ret)); + } + for (const ObKeyPart *cur = ss_root; OB_SUCC(ret) && NULL != cur && !search_state.is_empty_range_; + cur = cur->and_next_) { + if (OB_FAIL(get_single_key_value(cur, exec_ctx, search_state, dtc_params, + table_graph_.skip_scan_offset_))) { + LOG_WARN("get single key value failed", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if(OB_FAIL(search_state.tailor_final_range(post_column_count))) { + LOG_WARN("tailor final range failed", K(ret)); + } else if (OB_FAIL(generate_single_range(search_state, post_column_count, + table_graph_.key_part_head_->id_.table_id_, + ss_range, is_get_range))) { + LOG_WARN("generate single range failed", K(ret)); + } else if (OB_FAIL(ss_ranges.push_back(ss_range))) { + LOG_WARN("push back range to array failed", K(ret)); + } + return ret; +} + OB_NOINLINE int ObQueryRange::gen_simple_scan_range(ObIAllocator &allocator, ObExecContext &exec_ctx, ObQueryRangeArray &ranges, @@ -4822,34 +5043,8 @@ OB_NOINLINE int ObQueryRange::gen_simple_scan_range(ObIAllocator &allocator, { int ret = OB_SUCCESS; ObSearchState search_state(allocator); - void *start_ptr = NULL; - void *end_ptr = NULL; - - if (OB_ISNULL(start_ptr = search_state.allocator_.alloc(sizeof(ObObj) * column_count_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc memory for start_ptr failed", K(ret)); - } else if(OB_ISNULL(end_ptr = search_state.allocator_.alloc(sizeof(ObObj) * column_count_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc memory for end_ptr failed", K(ret)); - } else if (OB_ISNULL(search_state.include_start_ = static_cast(search_state.allocator_.alloc(sizeof(bool) * column_count_)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc memory for search state start failed", K(ret)); - } else if (OB_ISNULL(search_state.include_end_ = static_cast(search_state.allocator_.alloc(sizeof(bool) * column_count_)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc memory for search state end failed", K(ret)); - } else { - search_state.start_ = new(start_ptr) ObObj[column_count_]; - search_state.end_ = new(end_ptr) ObObj[column_count_]; - search_state.max_exist_index_ = column_count_; - search_state.last_include_start_ = true; - search_state.last_include_end_ = true; - } - for (int64_t i = 0; OB_SUCC(ret) && i < column_count_; ++i) { - //将所有range都初始化成true - search_state.start_[i].set_min_value(); - search_state.end_[i].set_max_value(); - search_state.include_start_[i] = false; - search_state.include_end_[i] = false; + if (OB_FAIL(search_state.init_search_state(column_count_, true))) { + LOG_WARN("failed to init search state", K(ret)); } for (ObKeyPart *cur = table_graph_.key_part_head_; OB_SUCC(ret) && NULL != cur && !search_state.is_empty_range_; @@ -4943,7 +5138,8 @@ if (OB_SUCC(ret) ) { \ inline int ObQueryRange::get_single_key_value(const ObKeyPart *key, ObExecContext &exec_ctx, ObSearchState &search_state, - const ObDataTypeCastParams &dtc_params) const + const ObDataTypeCastParams &dtc_params, + int64_t skip_offset /* default 0 */ ) const { int ret = OB_SUCCESS; for (const ObKeyPart *cur = key; @@ -5028,7 +5224,7 @@ inline int ObQueryRange::get_single_key_value(const ObKeyPart *key, CAST_VALUE_TYPE(expect_type, cur->pos_.column_type_, start, include_start, end, include_end); } if (OB_SUCC(ret)) { - search_state.depth_ = static_cast(cur->pos_.offset_); + search_state.depth_ = static_cast(cur->pos_.offset_ - skip_offset); if (search_state.is_phy_rowid_range_ != cur->is_phy_rowid_key_part()) { if (search_state.is_phy_rowid_range_) { //do nothing @@ -5078,36 +5274,15 @@ OB_NOINLINE int ObQueryRange::get_tablet_ranges(ObQueryRangeArray &ranges, } else if (OB_FAIL(get_methods.push_back(is_get_range))) { LOG_WARN("push back get_method failed", K(ret)); } else {} + } else if (OB_FAIL(search_state.init_search_state(column_count_, false))) { + LOG_WARN("failed to init search state", K(ret)); } else { - ret = OB_SUCCESS; search_state.depth_ = 0; - search_state.max_exist_index_ = 0; - search_state.last_include_start_ = false; - search_state.last_include_end_ = false; search_state.produce_range_ = true; search_state.is_equal_range_ = table_graph_.is_equal_range_; - search_state.start_ = static_cast(search_state.allocator_.alloc(sizeof(ObObj) * column_count_)); - search_state.end_ = static_cast(search_state.allocator_.alloc(sizeof(ObObj) * column_count_)); - search_state.include_start_ = static_cast(search_state.allocator_.alloc(sizeof(bool) * column_count_)); - search_state.include_end_ = static_cast(search_state.allocator_.alloc(sizeof(bool) * column_count_)); - if (OB_ISNULL(search_state.start_) || OB_ISNULL(search_state.end_) - || OB_ISNULL(search_state.include_start_) || OB_ISNULL(search_state.include_end_)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc memory failed", K(search_state.start_), K(search_state.end_), - K_(search_state.include_start), K_(search_state.include_end), K_(column_count)); - } else { - for (int i = 0; i < column_count_; ++i) { - new(search_state.start_ + i) ObObj(); - new(search_state.end_ + i) ObObj(); - (search_state.start_ + i)->set_max_value(); - (search_state.end_ + i)->set_min_value(); - search_state.include_start_[i] = false; - search_state.include_end_[i] = false; - } - if (OB_FAIL(and_first_search(search_state, table_graph_.key_part_head_, ranges, - get_methods, dtc_params))) { - LOG_WARN("and_first_search failed", K(ret)); - } + if (OB_FAIL(and_first_search(search_state, table_graph_.key_part_head_, ranges, + get_methods, dtc_params))) { + LOG_WARN("and_first_search failed", K(ret)); } } if (OB_SUCC(ret)) { @@ -5873,6 +6048,7 @@ OB_DEF_SERIALIZE(ObQueryRange) LOG_WARN("serialize srid map failed", K(ret)); } } + OB_UNIS_ENCODE(table_graph_.skip_scan_offset_); return ret; } @@ -5907,6 +6083,7 @@ OB_DEF_SERIALIZE_SIZE(ObQueryRange) if (map_count > 0) { len += get_columnId_map_size(); } + OB_UNIS_ADD_LEN(table_graph_.skip_scan_offset_); return len; } @@ -5949,6 +6126,7 @@ OB_DEF_DESERIALIZE(ObQueryRange) LOG_WARN("deserialize range graph failed", K(ret)); } } + OB_UNIS_DECODE(table_graph_.skip_scan_offset_); return ret; } @@ -6059,9 +6237,10 @@ OB_NOINLINE int ObQueryRange::deep_copy(const ObQueryRange &other, contain_geo_filters_ = other.contain_geo_filters_; has_exec_param_ = other.has_exec_param_; is_equal_and_ = other.is_equal_and_; - if (OB_FAIL(range_exprs_.assign(other.range_exprs_))) { LOG_WARN("assign range exprs failed", K(ret)); + } else if (OB_FAIL(ss_range_exprs_.assign(other.ss_range_exprs_))) { + LOG_WARN("assign range exprs failed", K(ret)); } else if (OB_FAIL(table_graph_.assign(other_graph))) { LOG_WARN("Deep copy range columns failed", K(ret)); } else if (OB_FAIL(equal_offs_.assign(other.equal_offs_))) { diff --git a/src/sql/rewrite/ob_query_range.h b/src/sql/rewrite/ob_query_range.h index cc3acdaee..63c8c8aec 100644 --- a/src/sql/rewrite/ob_query_range.h +++ b/src/sql/rewrite/ob_query_range.h @@ -239,6 +239,7 @@ private: { } + int init_search_state(int64_t column_count, bool init_as_full_range); bool has_intersect(const common::ObObj &start, bool include_start, const common::ObObj &end, @@ -293,7 +294,8 @@ private: : key_part_head_(NULL), is_equal_range_(false), is_standard_range_(true), - is_precise_get_(false) + is_precise_get_(false), + skip_scan_offset_(-1) { //将is_standard_range_初始化为true的原因是我们认为当表达式条件为空的时候也是一个简单range } @@ -304,6 +306,7 @@ private: is_equal_range_ = false; is_standard_range_ = true; is_precise_get_ = false; + skip_scan_offset_ = -1; } int assign(const ObRangeGraph &other) @@ -313,6 +316,7 @@ private: is_equal_range_ = other.is_equal_range_; is_standard_range_ = other.is_standard_range_; is_precise_get_ = other.is_precise_get_; + skip_scan_offset_ = other.skip_scan_offset_; return ret; } @@ -320,6 +324,7 @@ private: bool is_equal_range_; bool is_standard_range_; bool is_precise_get_; + int64_t skip_scan_offset_; }; struct ExprFinalInfo { @@ -408,6 +413,10 @@ public: ObQueryRangeArray &ranges, ObGetMethodArray &get_methods, const common::ObDataTypeCastParams &dtc_params) const; + int get_ss_tablet_ranges(common::ObIAllocator &allocator, + ObExecContext &exec_ctx, + ObQueryRangeArray &ss_ranges, + const ObDataTypeCastParams &dtc_params) const; int get_tablet_ranges(common::ObIAllocator &allocator, ObExecContext &exec_ctx, ObQueryRangeArray &ranges, @@ -447,7 +456,23 @@ public: bool is_precise_get() const { return table_graph_.is_precise_get_; } common::ObGeoRelationType get_geo_relation(ObItemType type) const; const common::ObIArray &get_range_exprs() const { return range_exprs_; } - int check_graph_type(); + const common::ObIArray &get_ss_range_exprs() const { return ss_range_exprs_; } + int check_graph_type(ObKeyPart &key_part_head); + int check_skip_scan_range(ObKeyPart *key_part_head, + const bool is_standard_range, + const int64_t max_precise_pos, + ObKeyPart *&ss_head, + int64_t &skip_scan_offset, + int64_t &ss_max_precise_pos); + int reset_skip_scan_range(); + bool is_precise_get(const ObKeyPart &key_part_head, + int64_t &max_precise_pos, + bool ignore_head = false); + int fill_range_exprs(const int64_t max_precise_pos, + const int64_t ss_offset, + const int64_t ss_max_precise_pos); + bool is_ss_range() const { return table_graph_.skip_scan_offset_ > -1; } + int64_t get_skip_scan_offset() const { return table_graph_.skip_scan_offset_; } static bool can_be_extract_range(ObItemType cmp_type, const ObExprResType &col_type, const ObExprCalcType &res_type, common::ObObjType data_type, @@ -692,7 +717,8 @@ private: inline int get_single_key_value(const ObKeyPart *key, ObExecContext &exec_ctx, ObSearchState &search_state, - const common::ObDataTypeCastParams &dtc_params) const; + const common::ObDataTypeCastParams &dtc_params, + int64_t skip_offset = 0) const; int gen_simple_get_range(const ObKeyPart &root, common::ObIAllocator &allocator, ObExecContext &exec_ctx, @@ -704,6 +730,16 @@ private: ObQueryRangeArray &ranges, ObGetMethodArray &get_methods, const common::ObDataTypeCastParams &dtc_params) const; + + const ObKeyPart* get_ss_key_part_head() const; + + int gen_skip_scan_range(ObIAllocator &allocator, + ObExecContext &exec_ctx, + const ObDataTypeCastParams &dtc_params, + const ObKeyPart *ss_root, + int64_t post_column_count, + ObQueryRangeArray &ss_ranges) const; + int cold_cast_cur_node(const ObKeyPart *cur, common::ObIAllocator &allocator, const common::ObDataTypeCastParams &dtc_params, @@ -768,6 +804,7 @@ private: KeyPartStore key_part_store_; //this flag used by optimizer, so don't need to serialize it common::ObFixedArray range_exprs_; + common::ObFixedArray ss_range_exprs_; MbrFilterArray mbr_filters_; bool has_exec_param_; bool is_equal_and_; diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 642e7b495..8c6afbbb3 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -28,6 +28,7 @@ ob_set_subtarget(ob_storage blocksstable blocksstable/ob_macro_block_writer.cpp blocksstable/ob_data_macro_block_merge_writer.cpp blocksstable/ob_micro_block_cache.cpp + blocksstable/ob_micro_block_hash_index.cpp blocksstable/ob_micro_block_reader.cpp blocksstable/ob_micro_block_row_exister.cpp blocksstable/ob_micro_block_row_getter.cpp @@ -346,6 +347,8 @@ ob_set_subtarget(ob_storage access access/ob_multiple_merge.cpp access/ob_multiple_multi_scan_merge.cpp access/ob_multiple_scan_merge.cpp + access/ob_multiple_skip_scan_merge.cpp + access/ob_multiple_multi_skip_scan_merge.cpp access/ob_table_scan_iterator.cpp access/ob_store_row_iterator.cpp access/ob_i_sample_iterator.cpp diff --git a/src/storage/access/ob_aggregated_store.cpp b/src/storage/access/ob_aggregated_store.cpp index 933f0449f..22cdc676a 100644 --- a/src/storage/access/ob_aggregated_store.cpp +++ b/src/storage/access/ob_aggregated_store.cpp @@ -62,7 +62,7 @@ int ObAggCell::fill_result(sql::ObEvalCtx &ctx,bool need_padding) } else { sql::ObEvalInfo &eval_info = expr_->get_eval_info(ctx); eval_info.evaluated_ = true; - LOG_DEBUG("fill result", K(result)); + LOG_DEBUG("fill result", K(result), KPC(this)); } return ret; } @@ -119,7 +119,9 @@ int ObFirstRowAggCell::process(blocksstable::ObDatumRow &row) { int ret = OB_SUCCESS; if (!aggregated_) { - if (OB_FAIL(datum_.deep_copy(row.storage_datums_[col_idx_], allocator_))) { + if (OB_FAIL(fill_default_if_need(row.storage_datums_[col_idx_]))) { + LOG_WARN("Failed to fill default", K(ret), K(*this)); + } else if (OB_FAIL(datum_.deep_copy(row.storage_datums_[col_idx_], allocator_))) { LOG_WARN("Failed to deep copy datum", K(ret), K(row), K(col_idx_)); } else { aggregated_ = true; @@ -265,7 +267,191 @@ int ObCountAggCell::fill_result(sql::ObEvalCtx &ctx, bool need_padding) result.set_int(row_count_); eval_info.evaluated_ = true; } - LOG_DEBUG("fill result", K(result)); + LOG_DEBUG("fill result", K(result), KPC(this)); + return ret; +} + +ObAggDatumBuf::ObAggDatumBuf(common::ObIAllocator &allocator) + : size_(0), datums_(nullptr), buf_(nullptr), allocator_(allocator) +{ +} + +int ObAggDatumBuf::init(const int64_t size) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObDatum) * size))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Failed to alloc datum buf", K(ret), K(size)); + } else if (FALSE_IT(datums_ = new (buf) ObDatum[size])) { + } else if (OB_ISNULL(buf = allocator_.alloc(common::OBJ_DATUM_NUMBER_RES_SIZE * size))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Failed to alloc datum buf", K(ret), K(size)); + } else { + buf_ = static_cast(buf); + size_ = size; + reuse(); + } + return ret; +} + +void ObAggDatumBuf::reset() +{ + if (OB_NOT_NULL(datums_)) { + allocator_.free(datums_); + datums_ = nullptr; + } + if (OB_NOT_NULL(buf_)) { + allocator_.free(buf_); + buf_ = nullptr; + } + size_ = 0; +} + +void ObAggDatumBuf::reuse() +{ + for(int64_t i = 0; i < size_; ++i) { + datums_[i].pack_ = 0; + datums_[i].ptr_ = buf_ + i * common::OBJ_DATUM_NUMBER_RES_SIZE; + } +} + +ObMinMaxAggCell::ObMinMaxAggCell( + bool is_min, + const int32_t col_idx, + const share::schema::ObColumnParam *col_param, + sql::ObExpr *expr, + common::ObIAllocator &allocator) + : ObAggCell(col_idx, col_param, expr, allocator), + is_min_(is_min), + agg_datum_buf_(allocator), + cell_data_ptrs_(nullptr), + datum_allocator_(ObModIds::OB_TABLE_SCAN_ITER) +{ + datum_.set_null(); +} + +void ObMinMaxAggCell::reset() +{ + agg_datum_buf_.reset(); + if (nullptr != cell_data_ptrs_) { + allocator_.free(cell_data_ptrs_); + cell_data_ptrs_ = nullptr; + } + ObAggCell::reset(); +} + +void ObMinMaxAggCell::reuse() +{ + datum_.reuse(); + datum_.set_null(); + ObAggCell::reuse(); +} + +int ObMinMaxAggCell::init(sql::ObPushdownOperator *op, sql::ObExpr *col_expr, const int64_t batch_size) +{ + int ret = OB_SUCCESS; + const ObDatumCmpFuncType cmp_fun = expr_->basic_funcs_->null_first_cmp_; + if (OB_ISNULL(cmp_fun)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cmp_func is NULL", K(ret), KPC(expr_)); + } else { + void *buf = nullptr; + cmp_fun_ = cmp_fun; + if (OB_FAIL(agg_datum_buf_.init(batch_size))) { + LOG_WARN("Failed to init agg datum buf", K(ret)); + } else if (OB_ISNULL(buf = allocator_.alloc(sizeof(char*) * batch_size))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Failed to alloc cell data ptrs", K(ret), K(batch_size)); + } else { + cell_data_ptrs_ = static_cast (buf); + } + } + return ret; +} + +int ObMinMaxAggCell::process(blocksstable::ObDatumRow &row) +{ + int ret = OB_SUCCESS; + blocksstable::ObStorageDatum &storage_datum = row.storage_datums_[col_idx_]; + if (OB_FAIL(process(storage_datum))) { + LOG_WARN("Failed to process datum", K(ret), K(storage_datum), KPC(this)); + } + LOG_DEBUG("after process single row", K(storage_datum), KPC(this)); + return ret; +} + +int ObMinMaxAggCell::process( + blocksstable::ObIMicroBlockReader *reader, + int64_t *row_ids, + const int64_t row_count) +{ + int ret = OB_SUCCESS; + blocksstable::ObStorageDatum storage_datum; + storage_datum.set_null(); + if (blocksstable::ObIMicroBlockReader::Reader == reader->get_type()) { + blocksstable::ObMicroBlockReader *block_reader = static_cast(reader); + blocksstable::ObMicroBlockAggInfo agg_info(is_min_, cmp_fun_, storage_datum); + if (OB_FAIL(block_reader->get_min_or_max(col_idx_, col_param_, row_ids, row_count, agg_info))) { + LOG_WARN("Failed to get min or max", K(ret), K(row_count), KPC(this)); + } + } else { + // agg_datum_buf_.reuse(); + blocksstable::ObMicroBlockDecoder *block_decoder = static_cast(reader); + blocksstable::ObMicroBlockAggInfo agg_info(is_min_, cmp_fun_, storage_datum); + if (OB_FAIL(block_decoder->get_min_or_max(col_idx_, row_ids, cell_data_ptrs_, row_count, agg_datum_buf_.get_datums(), agg_info))) { + LOG_WARN("Failed to get min or max", K(ret), K(row_count), KPC(this)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(process(storage_datum))) { + LOG_WARN("Failed to process datum", K(ret), K(storage_datum), KPC(this)); + } + } + LOG_DEBUG("after process batch rows", K(storage_datum), KPC(this)); + return ret; +} + +int ObMinMaxAggCell::process(const blocksstable::ObMicroIndexInfo &index_info) +{ + int ret = OB_NOT_SUPPORTED; + return ret; +} + +int ObMinMaxAggCell::process(blocksstable::ObStorageDatum &storage_datum) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(fill_default_if_need(storage_datum))) { + LOG_WARN("Failed to fill default", K(ret), K(storage_datum), K(*this)); + } else if (datum_.is_null() && !storage_datum.is_null()) { + if (OB_FAIL(datum_.deep_copy(storage_datum, datum_allocator_))) { + LOG_WARN("Failed to deep copy datum", K(ret), K(storage_datum), K(col_idx_)); + } + } else if (!storage_datum.is_null()) { + int cmp_ret = cmp_fun_(datum_, storage_datum); + if ((is_min_ && cmp_ret > 0) || (!is_min_ && cmp_ret < 0)) { + if (OB_FAIL(deep_copy_datum(storage_datum))) { + LOG_WARN("Failed to deep copy datum", K(ret), K(storage_datum), K(datum_), K(col_idx_)); + } + } + } + return ret; +} + +int ObMinMaxAggCell::deep_copy_datum(const blocksstable::ObStorageDatum &src) +{ + int ret = OB_SUCCESS; + if (src.is_null() || src.is_nop()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Uexpected datum", K(ret), K(src)); + } else { + if (!datum_.is_local_buf()) { + datum_allocator_.reuse(); + } + if (OB_FAIL(datum_.deep_copy(src, datum_allocator_))) { + LOG_WARN("Failed to deep copy", K(ret), K(src), K(datum_)); + } + } return ret; } @@ -347,9 +533,33 @@ int ObAggRow::init(const ObTableAccessParam ¶m) } else if (OB_FAIL(agg_cells_.push_back(cell))) { LOG_WARN("Failed to push back agg cell", K(ret), K(i)); } + } else if (T_FUN_MIN == expr->type_ || T_FUN_MAX == expr->type_) { + need_exclude_null_ = true; + const bool is_min = T_FUN_MIN == expr->type_; + const share::schema::ObColumnParam *col_param = out_cols_param->at(col_idx); + sql::ObExpr *col_expr = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < param.output_exprs_->count(); ++i) { + if (param.iter_param_.out_cols_project_->at(i) == col_idx) { + col_expr = param.output_exprs_->at(i); + break; + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(col_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ref col expr is null", K(ret), K(col_idx), K(i)); + } else if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObMinMaxAggCell))) || + OB_ISNULL(cell = new(buf) ObMinMaxAggCell(is_min, col_idx, col_param, expr, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Failed to alloc memroy for agg cell", K(ret), K(i)); + } else if (OB_FAIL(static_cast(cell)->init(param.op_, col_expr, ObAggregatedStore::BATCH_SIZE))) { + LOG_WARN("Failed to init ObMinMaxAggCell", K(ret), KPC(cell)); + } else if (OB_FAIL(agg_cells_.push_back(cell))) { + LOG_WARN("Failed to push back agg cell", K(ret), K(i)); + } } else { ret = OB_NOT_SUPPORTED; - LOG_WARN("Agg min/max/sum is not supported", K(ret)); + LOG_WARN("Agg is not supported", K(ret), K(expr->type_)); } } } @@ -360,7 +570,9 @@ int ObAggRow::init(const ObTableAccessParam ¶m) ObAggregatedStore::ObAggregatedStore(const int64_t batch_size, sql::ObEvalCtx &eval_ctx, ObTableAccessContext &context) : ObBlockBatchedRowStore(batch_size, eval_ctx, context), is_firstrow_aggregated_(false), - agg_row_(*context_.stmt_allocator_) + agg_row_(*context_.stmt_allocator_), + agg_flat_row_mode_(false), + row_buf_() { } @@ -374,6 +586,8 @@ void ObAggregatedStore::reset() ObBlockBatchedRowStore::reset(); agg_row_.reset(); is_firstrow_aggregated_ = false; + agg_flat_row_mode_ = false; + row_buf_.reset(); } void ObAggregatedStore::reuse() @@ -404,6 +618,11 @@ int ObAggregatedStore::init(const ObTableAccessParam ¶m) LOG_WARN("Failed to init ObBlockBatchedRowStore", K(ret)); } else if (OB_FAIL(agg_row_.init(param))) { LOG_WARN("Failed to init agg cells", K(ret)); + } else if (OB_FAIL(check_agg_in_row_mode(param.iter_param_))) { + LOG_WARN("Failed to check agg in row mode", K(ret)); + } else if (agg_flat_row_mode_ && + OB_FAIL(row_buf_.init(*context_.stmt_allocator_, param.iter_param_.get_full_out_col_cnt()))) { + LOG_WARN("Fail to init datum row buf", K(ret)); } if (OB_FAIL(ret)) { reset(); @@ -411,6 +630,36 @@ int ObAggregatedStore::init(const ObTableAccessParam ¶m) return ret; } +int ObAggregatedStore::check_agg_in_row_mode(const ObTableIterParam &iter_param) +{ + int ret = OB_SUCCESS; + int64_t agg_cnt = 0; + ObAggCell *cell = nullptr; + const ObTableReadInfo *read_info = nullptr; + if (OB_ISNULL(read_info = iter_param.get_read_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null read info", K(ret), K(iter_param)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < agg_row_.get_agg_count(); ++i) { + if (OB_ISNULL(cell = agg_row_.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpecte null agg cell", K(ret), K(i)); + } else if (OB_COUNT_AGG_PD_COLUMN_ID == cell->get_col_idx()) { + } else if (cell->get_col_idx() >= read_info->get_request_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected col idx", K(ret), K(i), KPC(cell), K(read_info->get_request_count())); + } else if (ObAggCell::FIRST_ROW != cell->get_type()) { + agg_cnt++; + } + } + if (OB_SUCC(ret)) { + agg_flat_row_mode_ = + agg_cnt > AGG_ROW_MODE_COUNT_THRESHOLD || + (double) agg_cnt/read_info->get_request_count() > AGG_ROW_MODE_RATIO_THRESHOLD; + } + return ret; +} + int ObAggregatedStore::fill_index_info(const blocksstable::ObMicroIndexInfo &index_info) { int ret = OB_SUCCESS; @@ -468,6 +717,13 @@ int ObAggregatedStore::fill_rows( if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Failed to get row ids", K(ret), K(begin_index), K(end_index)); } + } else if (0 == row_count) { + } else if (agg_flat_row_mode_ && blocksstable::ObIMicroBlockReader::Reader == reader->get_type()) { + // for flat block, do aggregate in row mode + blocksstable::ObMicroBlockReader *block_reader = static_cast(reader); + if (OB_FAIL(block_reader->get_aggregate_result(row_ids_, row_count, row_buf_, agg_row_.get_agg_cells()))) { + LOG_WARN("Failed to process aggregates", K(ret)); + } } else { for (int64_t i = 0; OB_SUCC(ret) && i < agg_row_.get_agg_count(); ++i) { ObAggCell *cell = agg_row_.at(i); diff --git a/src/storage/access/ob_aggregated_store.h b/src/storage/access/ob_aggregated_store.h index 021ecebb0..c574f5caf 100644 --- a/src/storage/access/ob_aggregated_store.h +++ b/src/storage/access/ob_aggregated_store.h @@ -29,9 +29,18 @@ struct ObMicroIndexInfo; namespace storage { +static const int64_t AGG_ROW_MODE_COUNT_THRESHOLD = 3; +static const double AGG_ROW_MODE_RATIO_THRESHOLD = 0.5; + class ObAggCell { public: + enum ObAggCellType + { + COUNT, + MINMAX, + FIRST_ROW, + }; ObAggCell( const int32_t col_idx, const share::schema::ObColumnParam *col_param, @@ -40,6 +49,7 @@ public: virtual ~ObAggCell(); virtual void reset(); virtual void reuse(); + virtual ObAggCellType get_type() const = 0; virtual int process(blocksstable::ObDatumRow &row) = 0; virtual int process( blocksstable::ObIMicroBlockReader *reader, @@ -47,6 +57,7 @@ public: const int64_t row_count) = 0; virtual int process(const blocksstable::ObMicroIndexInfo &index_info) = 0; virtual int fill_result(sql::ObEvalCtx &ctx, bool need_padding); + OB_INLINE int32_t get_col_idx() const { return col_idx_; } TO_STRING_KV(K_(col_idx), K_(datum), KPC(col_param_), K_(expr)); protected: int fill_default_if_need(blocksstable::ObStorageDatum &datum); @@ -69,6 +80,7 @@ public: common::ObIAllocator &allocator); virtual ~ObFirstRowAggCell() { reset(); }; virtual void reset() override; + virtual ObAggCellType get_type() const override { return FIRST_ROW; } virtual int process(blocksstable::ObDatumRow &row) override; virtual int process( blocksstable::ObIMicroBlockReader *reader, @@ -76,7 +88,7 @@ public: const int64_t row_count) override; virtual int process(const blocksstable::ObMicroIndexInfo &index_info) override; virtual int fill_result(sql::ObEvalCtx &ctx, bool need_padding) override; - TO_STRING_KV(K_(col_idx), K_(datum), K_(col_param), K_(expr), K_(aggregated)); + INHERIT_TO_STRING_KV("ObAggCell", ObAggCell, K_(aggregated)); private: bool aggregated_; }; @@ -93,6 +105,7 @@ public: virtual ~ObCountAggCell() { reset(); }; virtual void reset() override; virtual void reuse() override; + virtual ObAggCellType get_type() const override { return COUNT; } virtual int process(blocksstable::ObDatumRow &row) override; virtual int process( blocksstable::ObIMicroBlockReader *reader, @@ -100,12 +113,60 @@ public: const int64_t row_count) override; virtual int process(const blocksstable::ObMicroIndexInfo &index_info) override; virtual int fill_result(sql::ObEvalCtx &ctx, bool need_padding) override; - TO_STRING_KV(K_(col_idx), K_(datum), K_(col_param), K_(expr), K_(exclude_null), K_(row_count)); + INHERIT_TO_STRING_KV("ObAggCell", ObAggCell, K_(exclude_null), K_(row_count)); private: bool exclude_null_; int64_t row_count_; }; -// TODO sum/min/max + +class ObAggDatumBuf { +public: + ObAggDatumBuf(common::ObIAllocator &allocator); + ~ObAggDatumBuf() { reset(); }; + int init(const int64_t size); + void reuse(); + void reset(); + OB_INLINE ObDatum *get_datums() { return datums_; } + TO_STRING_KV(K_(size), K_(datums), K_(buf)); + private: + int64_t size_; + ObDatum *datums_; + char *buf_; + common::ObIAllocator &allocator_; +}; + +class ObMinMaxAggCell : public ObAggCell +{ +public: + ObMinMaxAggCell( + bool is_min, + const int32_t col_idx, + const share::schema::ObColumnParam *col_param, + sql::ObExpr *expr, + common::ObIAllocator &allocator); + virtual ~ObMinMaxAggCell() { reset(); }; + virtual void reset() override; + virtual void reuse() override; + virtual ObAggCellType get_type() const override { return MINMAX; } + int init(sql::ObPushdownOperator *op, sql::ObExpr *col_expr, const int64_t batch_size); + virtual int process(blocksstable::ObDatumRow &row) override; + virtual int process( + blocksstable::ObIMicroBlockReader *reader, + int64_t *row_ids, + const int64_t row_count) override; + virtual int process(const blocksstable::ObMicroIndexInfo &index_info) override; + INHERIT_TO_STRING_KV("ObAggCell", ObAggCell, K_(is_min), K_(cmp_fun), K_(agg_datum_buf)); +private: + int deep_copy_datum(const blocksstable::ObStorageDatum &src); + int process(blocksstable::ObStorageDatum &datum); + bool is_min_; + ObDatumCmpFuncType cmp_fun_; + ObAggDatumBuf agg_datum_buf_; + const char **cell_data_ptrs_; + common::ObArenaAllocator datum_allocator_; +}; + +// TODO sum class ObAggRow { @@ -119,7 +180,8 @@ public: bool need_exclude_null() const { return need_exclude_null_; }; // void set_firstrow_aggregated(bool aggregated) { is_firstrow_aggregated_ = aggregated; } // bool is_firstrow_aggregated() const { return is_firstrow_aggregated_; } - ObAggCell* at(int64_t idx) { return agg_cells_.at(idx); } + OB_INLINE ObAggCell* at(int64_t idx) { return agg_cells_.at(idx); } + OB_INLINE common::ObIArray& get_agg_cells() { return agg_cells_; } TO_STRING_KV(K_(agg_cells)); private: common::ObFixedArray agg_cells_; @@ -158,11 +220,14 @@ public: !index_info.is_right_border(); } OB_INLINE void set_end() { iter_end_flag_ = IterEndState::ITER_END; } - TO_STRING_KV(K_(agg_row)); + int check_agg_in_row_mode(const ObTableIterParam &iter_param); + TO_STRING_KV(K_(is_firstrow_aggregated), K_(agg_row), K_(agg_flat_row_mode), K_(row_buf)); private: bool is_firstrow_aggregated_; ObAggRow agg_row_; + bool agg_flat_row_mode_; + blocksstable::ObDatumRow row_buf_; }; } /* namespace storage */ diff --git a/src/storage/access/ob_dml_param.cpp b/src/storage/access/ob_dml_param.cpp index 684db2d39..788c24a12 100644 --- a/src/storage/access/ob_dml_param.cpp +++ b/src/storage/access/ob_dml_param.cpp @@ -269,6 +269,7 @@ DEF_TO_STRING(ObTableScanParam) N_COLUMN_IDS, column_ids_, N_INDEX_ID, index_id_, N_KEY_RANGES, key_ranges_, + K_(ss_key_ranges), K_(range_array_pos), N_TIMEOUT, timeout_, N_SCAN_FLAG, scan_flag_, @@ -287,7 +288,8 @@ DEF_TO_STRING(ObTableScanParam) K_(snapshot), KPC_(table_param), K_(sample_info), - K_(need_scn)); + K_(need_scn), + K_(need_switch_param)); J_OBJ_END(); return pos; } diff --git a/src/storage/access/ob_dml_param.h b/src/storage/access/ob_dml_param.h index 26da43aea..505ebf055 100644 --- a/src/storage/access/ob_dml_param.h +++ b/src/storage/access/ob_dml_param.h @@ -153,7 +153,11 @@ public: OB_INLINE virtual bool is_valid() const { return snapshot_.valid_ && ObVTableScanParam::is_valid(); } + OB_INLINE bool use_index_skip_scan() const { + return (1 == ss_key_ranges_.count()) && (!ss_key_ranges_.at(0).is_whole_range()); + } bool is_thread_scope_; + ObRangeArray ss_key_ranges_; // used for index skip scan, use as postfix range for ObVTableScanParam::key_ranges_ DECLARE_VIRTUAL_TO_STRING; private: diff --git a/src/storage/access/ob_index_tree_prefetcher.cpp b/src/storage/access/ob_index_tree_prefetcher.cpp index ff3fcf28a..c2dea3314 100644 --- a/src/storage/access/ob_index_tree_prefetcher.cpp +++ b/src/storage/access/ob_index_tree_prefetcher.cpp @@ -303,7 +303,7 @@ int ObIndexTreePrefetcher::prefetch_block_data( macro_id == micro_handle.macro_block_id_ && offset == micro_handle.micro_info_.offset_ && index_block_info.row_header_->get_block_size() == micro_handle.micro_info_.size_) { - LOG_DEBUG("Cur micro handle is still valid"); + LOG_DEBUG("Cur micro handle is still valid", K(index_block_info), K(micro_handle)); if (is_data) { EVENT_INC(ObStatEventIds::DATA_BLOCK_CACHE_HIT); } else { @@ -366,6 +366,14 @@ int ObIndexTreePrefetcher::prefetch_block_data( micro_handle.macro_block_id_ = macro_id; micro_handle.block_state_ = ObSSTableMicroBlockState::IN_BLOCK_IO; micro_handle.io_handle_ = macro_handle; + + if (is_data && OB_FAIL(micro_block_handle_mgr_.put_micro_block_handle( + tenant_id, + macro_id, + *index_block_info.row_header_, + micro_handle))) { + STORAGE_LOG(WARN, "failed to put handle cache", K(ret), K(tenant_id), K(macro_id), K(index_block_info)); + } } } } @@ -379,6 +387,256 @@ int ObIndexTreePrefetcher::prefetch_block_data( return ret; } +////////////////////////////////// ObIndexTreeMultiPrefetcher ///////////////////////////////////////////// + +void ObIndexTreeMultiPrefetcher::reset() +{ + ObIndexTreePrefetcher::reset(); + fetch_rowkey_idx_ = 0; + prefetch_rowkey_idx_ = 0; + prefetched_rowkey_cnt_ = 0; + rowkeys_ = nullptr; + ext_read_handles_.reset(); +} + +void ObIndexTreeMultiPrefetcher::reuse() +{ + ObIndexTreePrefetcher::reuse(); + fetch_rowkey_idx_ = 0; + prefetch_rowkey_idx_ = 0; + prefetched_rowkey_cnt_ = 0; + rowkeys_ = nullptr; +} + +int ObIndexTreeMultiPrefetcher::init( + const int iter_type, + ObSSTable &sstable, + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx, + const void *query_range) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObIndexTreeMultiPrefetcher has been inited", K(ret)); + } else if (OB_UNLIKELY(ObStoreRowIterator::IteratorMultiGet != iter_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), K(iter_type)); + } else { + sstable_ = &sstable; + access_ctx_ = &access_ctx; + iter_param_ = &iter_param; + index_read_info_ = iter_param.get_full_read_info()->get_index_read_info(); + data_version_ = sstable_->is_major_sstable() ? sstable_->get_snapshot_version() : sstable_->get_key().get_end_scn().get_val_for_tx(); + data_block_cache_ = &(ObStorageCacheSuite::get_instance().get_block_cache()); + index_block_cache_ = &(ObStorageCacheSuite::get_instance().get_index_block_cache()); + ext_read_handles_.set_allocator(access_ctx.stmt_allocator_); + rowkeys_ = static_cast *> (query_range); + index_tree_height_ = sstable_->get_meta().get_index_tree_height(); + int32_t range_count = rowkeys_->count(); + max_handle_prefetching_cnt_ = min(range_count, MAX_MULTIGET_MICRO_DATA_HANDLE_CNT); + if (0 == range_count) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("range count should be greater than 0", K(ret), K(range_count)); + } else if (OB_FAIL(ext_read_handles_.prepare_reallocate(max_handle_prefetching_cnt_))) { + LOG_WARN("Fail to init read_handles", K(ret), K(max_handle_prefetching_cnt_)); + } else if (OB_FAIL(micro_block_handle_mgr_.init(range_count > 1, false, *access_ctx.stmt_allocator_))) { + LOG_WARN("failed to init block handle mgr", K(ret)); + } else { + is_inited_ = true; + } + } + return ret; +} + +int ObIndexTreeMultiPrefetcher::switch_context( + const int iter_type, + const ObTableReadInfo &index_read_info, + ObSSTable &sstable, + ObTableAccessContext &access_ctx, + const void *query_range) +{ + int ret = OB_SUCCESS; + bool is_multi_range = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_UNLIKELY(ObStoreRowIterator::IteratorMultiGet != iter_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), K(iter_type)); + } else { + sstable_ = &sstable; + access_ctx_ = &access_ctx; + data_version_ = sstable_->is_major_sstable() ? sstable_->get_snapshot_version() : sstable_->get_key().get_end_scn().get_val_for_tx(); + rowkeys_ = static_cast *> (query_range); + index_read_info_ = &index_read_info; + max_handle_prefetching_cnt_ = min(rowkeys_->count(), MAX_MULTIGET_MICRO_DATA_HANDLE_CNT); + if (OB_FAIL(ext_read_handles_.prepare_reallocate(max_handle_prefetching_cnt_))) { + LOG_WARN("Fail to init read_handles", K(ret), K(max_handle_prefetching_cnt_)); + } else if (!is_rescan_) { + is_rescan_ = true; + for (int64_t i = 0; i < ext_read_handles_.count(); ++i) { + ext_read_handles_.at(i).reset(); + } + micro_block_handle_mgr_.reset(); + if (OB_FAIL(micro_block_handle_mgr_.init(true, false, *access_ctx.stmt_allocator_))) { + LOG_WARN("failed to init block handle mgr", K(ret)); + } + } + } + return ret; +} + +int ObIndexTreeMultiPrefetcher::multi_prefetch() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObIndexTreeMultiPrefetcher not init", K(ret)); + } else { + const int64_t rowkey_cnt = rowkeys_->count(); + for (int64_t i = fetch_rowkey_idx_; + OB_SUCC(ret) && prefetched_rowkey_cnt_ < rowkey_cnt && i < fetch_rowkey_idx_ + max_handle_prefetching_cnt_; + ++i) { + const bool is_rowkey_to_fetched = i == fetch_rowkey_idx_; + const bool is_empty_handle = i >= prefetch_rowkey_idx_; + ObSSTableReadHandleExt &read_handle = ext_read_handles_[i % max_handle_prefetching_cnt_]; + if (is_empty_handle && prefetch_rowkey_idx_ < rowkey_cnt) { + read_handle.reuse(); + read_handle.rowkey_ = &rowkeys_->at(prefetch_rowkey_idx_); + read_handle.range_idx_ = prefetch_rowkey_idx_; + read_handle.is_get_ = true; + prefetch_rowkey_idx_++; + + if (OB_FAIL(lookup_in_cache(read_handle))) { + LOG_WARN("Failed to lookup_in_cache", K(ret)); + } else if (ObSSTableRowState::IN_BLOCK == read_handle.row_state_) { + if (OB_FAIL(sstable_->get_index_tree_root(*index_read_info_, index_block_))) { + LOG_WARN("Fail to get index block root", K(ret)); + } else if (!index_scanner_.is_valid() && OB_FAIL(init_index_scanner(index_scanner_))) { + LOG_WARN("Fail to init index scanner", K(ret)); + } else if (OB_FAIL(drill_down(ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, read_handle, false, is_rowkey_to_fetched))) { + LOG_WARN("Fail to prefetch next level", K(ret), K(index_block_), K(read_handle), KPC(this)); + } else { + EVENT_INC(ObStatEventIds::INDEX_BLOCK_READ_CNT); + } + } else { + mark_cur_rowkey_prefetched(read_handle); + } + } else if (read_handle.cur_prefetch_end_) { + continue; + } else if (read_handle.cur_level_ >= index_tree_height_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Fail to prefetch, unexpected cur level", K(ret), K(read_handle.cur_level_), K(index_tree_height_), K(read_handle), KPC(this)); + } else if (ObSSTableRowState::IN_BLOCK == read_handle.row_state_) { + bool stop_prefetch = false; + int64_t tenant_id = MTL_ID(); + ObMicroIndexInfo &cur_index_info = read_handle.index_block_info_; + ObMicroBlockDataHandle &next_handle = read_handle.get_read_handle(); + if (OB_UNLIKELY(!cur_index_info.is_valid() || + nullptr == read_handle.micro_handle_ || + &next_handle == read_handle.micro_handle_ || + ObSSTableMicroBlockState::IN_BLOCK_IO != read_handle.micro_handle_->block_state_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Fail to prefetch, unexpected read handle", K(ret), K(read_handle), KPC(this)); + } else if (OB_FAIL(micro_block_handle_mgr_.get_micro_block_handle( + tenant_id, + cur_index_info, + cur_index_info.is_data_block(), + next_handle))) { + //not in cache yet, stop this rowkey prefetching if it's not the rowkey to be feteched + ret = OB_SUCCESS; + if (is_rowkey_to_fetched) { + if (OB_FAIL(read_handle.micro_handle_->get_index_block_data(*index_read_info_, index_block_))) { + LOG_WARN("Fail to get index block data", K(ret), KPC(read_handle.micro_handle_)); + } + } else { + stop_prefetch = true; + } + } else if (FALSE_IT(read_handle.set_cur_micro_handle(next_handle))) { + } else if (OB_FAIL(read_handle.micro_handle_->get_cached_index_block_data(*index_read_info_, index_block_))) { + LOG_WARN("Fail to get cached index block data", K(ret), KPC(read_handle.micro_handle_)); + } + if (OB_SUCC(ret) && !stop_prefetch) { + if (OB_FAIL(drill_down(cur_index_info.get_macro_id(), read_handle, cur_index_info.is_leaf_block(), is_rowkey_to_fetched))) { + LOG_WARN("Fail to prefetch next level", K(ret), K(index_block_), K(read_handle), KPC(this)); + } + } + } + } + } + return ret; +} + +int ObIndexTreeMultiPrefetcher::drill_down( + const MacroBlockId ¯o_id, + ObSSTableReadHandleExt &read_handle, + const bool cur_level_is_leaf, + const bool force_prefetch) +{ + int ret = OB_SUCCESS; + ObMicroIndexInfo index_block_info; + EVENT_INC(ObStatEventIds::INDEX_BLOCK_READ_CNT); + read_handle.cur_level_++; + if (OB_FAIL(index_scanner_.open(macro_id, index_block_, *read_handle.rowkey_, read_handle.range_idx_))) { + LOG_WARN("Fail to open index block scanner", K(ret), K(index_block_), K(read_handle)); + } else if (cur_level_is_leaf && read_handle.cur_level_ != index_tree_height_ - 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Fail to prefetch, unexpected level", K(ret), K(cur_level_is_leaf), + K(read_handle.cur_level_), K(index_tree_height_)); + } else if (OB_FAIL(index_scanner_.get_next(index_block_info))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("Fail to get index block row", K(ret), K_(index_scanner)); + } else { + mark_cur_rowkey_prefetched(read_handle); + read_handle.row_state_ = ObSSTableRowState::NOT_EXIST; + ret = OB_SUCCESS; + } + } else if (cur_level_is_leaf != index_block_info.is_data_block()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Fail to prefetch, unexpected level", K(ret), K(cur_level_is_leaf), K(index_block_info)); + } else if (index_block_info.is_macro_node() && OB_FAIL(check_bloom_filter(index_block_info, read_handle))) { + LOG_WARN("Fail to check bloom filter", K(ret), K(index_block_info), K(read_handle)); + } else if (ObSSTableRowState::NOT_EXIST == read_handle.row_state_) { + mark_cur_rowkey_prefetched(read_handle); + } else { + // hold block cache of the parent temporaliy to avoid freed + ObMicroBlockDataHandle &next_handle = read_handle.get_read_handle(); + if (OB_FAIL(prefetch_block_data(index_block_info, next_handle, cur_level_is_leaf))) { + LOG_WARN("fail to prefetch_block_data", K(ret), K(read_handle), K(index_block_info), K(cur_level_is_leaf)); + } else if (FALSE_IT(read_handle.set_cur_micro_handle(next_handle))) { + } else if (cur_level_is_leaf) { + mark_cur_rowkey_prefetched(read_handle); + read_handle.index_block_info_ = index_block_info; + } else if (force_prefetch || ObSSTableMicroBlockState::IN_BLOCK_CACHE == next_handle.block_state_) { + if (ObSSTableMicroBlockState::IN_BLOCK_CACHE == next_handle.block_state_) { + LOG_DEBUG("cur handle is in cache", K(read_handle), K(index_block_info), K(next_handle)); + if (OB_FAIL(next_handle.get_cached_index_block_data(*index_read_info_, index_block_))) { + LOG_WARN("Fail to get index block data", K(ret), K(next_handle)); + } + } else { + LOG_DEBUG("cur handle is not in cache, force prefetch", K(read_handle), K(index_block_info), K(next_handle)); + if (OB_FAIL(next_handle.get_index_block_data(*index_read_info_, index_block_))) { + LOG_WARN("Fail to get index block data", K(ret), K(next_handle)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(drill_down( + index_block_info.get_macro_id(), + read_handle, + index_block_info.is_leaf_block(), + force_prefetch))) { + LOG_WARN("Faile to prefetch data block", K(ret), K(read_handle)); + } + } + } else { + LOG_DEBUG("cur handle is not in cache, has submit io", K(read_handle), K(index_block_info), K(next_handle)); + read_handle.index_block_info_ = index_block_info; + } + } + return ret; +} + ////////////////////////////////// MultiPassPrefetcher ///////////////////////////////////////////// void ObIndexTreeMultiPassPrefetcher::reset() diff --git a/src/storage/access/ob_index_tree_prefetcher.h b/src/storage/access/ob_index_tree_prefetcher.h index 58c073203..86632e602 100644 --- a/src/storage/access/ob_index_tree_prefetcher.h +++ b/src/storage/access/ob_index_tree_prefetcher.h @@ -91,7 +91,7 @@ public: return ret; } TO_STRING_KV(K_(is_get), K_(is_bf_contain), K_(row_state), K_(range_idx), - K_(micro_begin_idx), K_(micro_end_idx), KP_(query_range)); + K_(micro_begin_idx), K_(micro_end_idx), KP_(query_range), KPC_(micro_handle)); public: bool is_get_; @@ -185,13 +185,119 @@ protected: const ObTableReadInfo *index_read_info_; common::ObFixedArray agg_projector_; common::ObFixedArray agg_column_schema_; -private: static const int64_t DEFAULT_GET_MICRO_DATA_HANDLE_CNT = 2; ObIndexBlockRowScanner index_scanner_; + private: ObMicroBlockDataHandle micro_handles_[DEFAULT_GET_MICRO_DATA_HANDLE_CNT]; MacroBlockId macro_id_; }; +class ObIndexTreeMultiPrefetcher : public ObIndexTreePrefetcher +{ +public: + static const int32_t MAX_MULTIGET_MICRO_DATA_HANDLE_CNT = 32; + struct ObSSTableReadHandleExt : public ObSSTableReadHandle { + ObSSTableReadHandleExt() : + ObSSTableReadHandle(), + cur_level_(-1), + cur_prefetch_end_(false), + index_block_info_(), + micro_handle_idx_(0) + {} + ~ObSSTableReadHandleExt() + {} + void reuse() + { + ObSSTableReadHandle::reuse(); + cur_level_ = -1; + cur_prefetch_end_ = false; + index_block_info_.reset(); + micro_handle_idx_ = 0; + } + void reset() + { + ObSSTableReadHandle::reset(); + cur_level_ = -1; + cur_prefetch_end_ = false; + index_block_info_.reset(); + micro_handle_idx_ = 0; + for (int64_t i = 0; i < DEFAULT_GET_MICRO_DATA_HANDLE_CNT; ++i) { + micro_handles_[i].reset(); + } + } + OB_INLINE ObMicroBlockDataHandle& get_read_handle() + { + return micro_handles_[micro_handle_idx_ % DEFAULT_GET_MICRO_DATA_HANDLE_CNT]; + } + OB_INLINE void set_cur_micro_handle(ObMicroBlockDataHandle &handle) + { + micro_handle_ = &handle; + micro_handle_idx_++; + } + INHERIT_TO_STRING_KV("ObSSTableReadHandle", ObSSTableReadHandle, KPC_(rowkey), + K_(cur_level), K_(cur_prefetch_end), K_(index_block_info), K_(micro_handle_idx), K_(micro_handles)); + int16_t cur_level_; + bool cur_prefetch_end_; + ObMicroIndexInfo index_block_info_; + int64_t micro_handle_idx_; + ObMicroBlockDataHandle micro_handles_[DEFAULT_GET_MICRO_DATA_HANDLE_CNT]; + }; + typedef ObReallocatedFixedArray ReadHandleExtArray; + ObIndexTreeMultiPrefetcher() : + index_tree_height_(0), + fetch_rowkey_idx_(0), + prefetch_rowkey_idx_(0), + prefetched_rowkey_cnt_(0), + rowkeys_(nullptr), + ext_read_handles_() + {} + virtual ~ObIndexTreeMultiPrefetcher() { reset(); } + virtual void reset() override; + virtual void reuse() override; + virtual int init( + const int iter_type, + ObSSTable &sstable, + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx, + const void *query_range) override; + virtual int switch_context( + const int iter_type, + const ObTableReadInfo &index_read_info, + ObSSTable &sstable, + ObTableAccessContext &access_ctx, + const void *query_range) override; + int multi_prefetch(); + OB_INLINE bool is_prefetch_end() { return prefetched_rowkey_cnt_ >= rowkeys_->count(); } + OB_INLINE void mark_cur_rowkey_prefetched(ObSSTableReadHandleExt &read_handle) + { + read_handle.cur_prefetch_end_ = true; + prefetched_rowkey_cnt_++; + } + OB_INLINE void mark_cur_rowkey_fetched(ObSSTableReadHandleExt &read_handle) + { + fetch_rowkey_idx_++; + } + OB_INLINE ObSSTableReadHandleExt ¤t_read_handle() + { return ext_read_handles_[fetch_rowkey_idx_ % MAX_MULTIGET_MICRO_DATA_HANDLE_CNT]; } + OB_INLINE ObMicroBlockDataHandle ¤t_micro_handle() + { return *ext_read_handles_[fetch_rowkey_idx_ % MAX_MULTIGET_MICRO_DATA_HANDLE_CNT].micro_handle_; } + INHERIT_TO_STRING_KV("ObIndexTreePrefetcher", ObIndexTreePrefetcher, K_(index_tree_height), + K_(fetch_rowkey_idx), K_(prefetch_rowkey_idx), K_(prefetched_rowkey_cnt), K_(max_handle_prefetching_cnt)); + int16_t index_tree_height_; + int32_t fetch_rowkey_idx_; + int32_t prefetch_rowkey_idx_; + int64_t prefetched_rowkey_cnt_; + int32_t max_handle_prefetching_cnt_; + const common::ObIArray *rowkeys_; + ReadHandleExtArray ext_read_handles_; +private: + int drill_down( + const MacroBlockId ¯o_id, + ObSSTableReadHandleExt &read_handle, + const bool cur_level_is_leaf, + const bool force_prefetch); +}; + class ObIndexTreeMultiPassPrefetcher : public ObIndexTreePrefetcher { public: diff --git a/src/storage/access/ob_multiple_merge.cpp b/src/storage/access/ob_multiple_merge.cpp index 60390f34e..2293dcca5 100644 --- a/src/storage/access/ob_multiple_merge.cpp +++ b/src/storage/access/ob_multiple_merge.cpp @@ -107,8 +107,6 @@ int ObMultipleMerge::init( STORAGE_LOG(WARN, "Failed to init datum row", K(ret)); } else if (OB_FAIL(unprojected_row_.init(*context.stmt_allocator_, param.get_out_col_cnt()))) { STORAGE_LOG(WARN, "Failed to init datum row", K(ret)); - } else if (OB_FAIL(full_row_.init(*context.stmt_allocator_, param.get_max_out_col_cnt()))) { - STORAGE_LOG(WARN, "Failed to init datum row", K(ret)); } else if (OB_FAIL(nop_pos_.init(*context.stmt_allocator_, param.get_max_out_col_cnt()))) { STORAGE_LOG(WARN, "Fail to init nop pos, ", K(ret)); } else if (NULL != param.op_ && (NULL == param.output_exprs_ || NULL == param.row2exprs_projector_ @@ -140,7 +138,6 @@ int ObMultipleMerge::init( for (int64_t i = cur_row_.get_column_count(); i < param.get_out_col_cnt(); ++i) { cur_row_.storage_datums_[i].set_nop(); } - full_row_.count_ = param.get_max_out_col_cnt(); unprojected_row_.count_ = 0; get_table_param_ = get_table_param; if (OB_SUCC(ret)) { diff --git a/src/storage/access/ob_multiple_merge.h b/src/storage/access/ob_multiple_merge.h index a72c235a4..b364e3feb 100644 --- a/src/storage/access/ob_multiple_merge.h +++ b/src/storage/access/ob_multiple_merge.h @@ -122,7 +122,6 @@ protected: common::ObSEArray tables_; blocksstable::ObDatumRow cur_row_; blocksstable::ObDatumRow unprojected_row_; - blocksstable::ObDatumRow full_row_; const ObIArray *out_cols_projector_; int64_t curr_scan_index_; blocksstable::ObDatumRowkey curr_rowkey_; diff --git a/src/storage/access/ob_multiple_multi_skip_scan_merge.cpp b/src/storage/access/ob_multiple_multi_skip_scan_merge.cpp new file mode 100644 index 000000000..870416df5 --- /dev/null +++ b/src/storage/access/ob_multiple_multi_skip_scan_merge.cpp @@ -0,0 +1,124 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "ob_multiple_multi_skip_scan_merge.h" + +namespace oceanbase +{ +namespace storage +{ + +ObMultipleMultiSkipScanMerge::ObMultipleMultiSkipScanMerge() + : cur_range_idx_(0), + ranges_(nullptr), + skip_scan_ranges_(nullptr) +{ +} + +ObMultipleMultiSkipScanMerge::~ObMultipleMultiSkipScanMerge() +{ + reset(); +} + +int ObMultipleMultiSkipScanMerge::init( + const ObTableAccessParam ¶m, + ObTableAccessContext &context, + const ObGetTableParam &get_table_param) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObMultipleSkipScanMerge::init(param, context, get_table_param))) { + STORAGE_LOG(WARN, "Fail to init ObMultipleSkipScanMerge", K(ret), K(context), K(get_table_param)); + } + return ret; +} + +void ObMultipleMultiSkipScanMerge::reset() +{ + cur_range_idx_ = 0; + ranges_ = nullptr; + skip_scan_ranges_ = nullptr; + ObMultipleSkipScanMerge::reset(); +} + +void ObMultipleMultiSkipScanMerge::reuse() +{ + cur_range_idx_ = 0; + ranges_ = nullptr; + skip_scan_ranges_ = nullptr; + ObMultipleSkipScanMerge::reuse(); + +} + +int ObMultipleMultiSkipScanMerge::open( + const common::ObIArray &ranges, + const common::ObIArray &skip_scan_ranges) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(ranges.count() != skip_scan_ranges.count() || ranges.count() == 0)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid argument", K(ret), K(ranges.count()), K(skip_scan_ranges.count())); + } else if (OB_FAIL(ObMultipleSkipScanMerge::open(ranges.at(cur_range_idx_), skip_scan_ranges.at(cur_range_idx_)))) { + STORAGE_LOG(WARN, "Fail to open cur range", K(ret), K(cur_range_idx_)); + } else { + ranges_ = &ranges; + skip_scan_ranges_ = &skip_scan_ranges; + } + return ret; +} + +int ObMultipleMultiSkipScanMerge::inner_get_next_row(blocksstable::ObDatumRow &row) +{ + int ret = OB_SUCCESS; + while (OB_SUCC(ret)) { + if (OB_FAIL(ObMultipleSkipScanMerge::inner_get_next_row(row))) { + if (OB_UNLIKELY(OB_ITER_END != ret && OB_PUSHDOWN_STATUS_CHANGED != ret)) { + STORAGE_LOG(WARN, "Fail to inner get next row", K(ret), K(cur_range_idx_)); + } else if (OB_ITER_END == ret) { + if (++cur_range_idx_ < ranges_->count()) { + ret = OB_SUCCESS; + ObMultipleSkipScanMerge::reuse(); + if (OB_FAIL(ObMultipleSkipScanMerge::open(ranges_->at(cur_range_idx_), skip_scan_ranges_->at(cur_range_idx_)))) { + STORAGE_LOG(WARN, "Fail to open cur range", K(ret), K(cur_range_idx_)); + } + } + } + } else { + STORAGE_LOG(DEBUG, "get next row", K(row)); + break; + } + } + return ret; +} + +int ObMultipleMultiSkipScanMerge::inner_get_next_rows() +{ + int ret = OB_SUCCESS; + while (OB_SUCC(ret)) { + if (OB_FAIL(ObMultipleSkipScanMerge::inner_get_next_rows())) { + if (OB_UNLIKELY(OB_ITER_END != ret && OB_PUSHDOWN_STATUS_CHANGED != ret)) { + STORAGE_LOG(WARN, "Fail to inner get next row", K(ret), K(cur_range_idx_)); + } else if (OB_ITER_END == ret) { + if (++cur_range_idx_ < ranges_->count()) { + ret = OB_SUCCESS; + ObMultipleSkipScanMerge::reuse(); + if (OB_FAIL(ObMultipleSkipScanMerge::open(ranges_->at(cur_range_idx_), skip_scan_ranges_->at(cur_range_idx_)))) { + STORAGE_LOG(WARN, "Fail to open cur range", K(ret), K(cur_range_idx_)); + } + } + } + } else { + break; + } + } + return ret; +} + +} +} \ No newline at end of file diff --git a/src/storage/access/ob_multiple_multi_skip_scan_merge.h b/src/storage/access/ob_multiple_multi_skip_scan_merge.h new file mode 100644 index 000000000..56978bf95 --- /dev/null +++ b/src/storage/access/ob_multiple_multi_skip_scan_merge.h @@ -0,0 +1,46 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_STORAGE_MULTIPLE_MULTI_SKIP_SCAN_MERGE_H +#define OCEANBASE_STORAGE_MULTIPLE_MULTI_SKIP_SCAN_MERGE_H + +#include "ob_multiple_skip_scan_merge.h" +namespace oceanbase +{ +namespace storage +{ + +class ObMultipleMultiSkipScanMerge final : public ObMultipleSkipScanMerge +{ +public: + ObMultipleMultiSkipScanMerge(); + virtual ~ObMultipleMultiSkipScanMerge(); + virtual int init( + const ObTableAccessParam ¶m, + ObTableAccessContext &context, + const ObGetTableParam &get_table_param) override; + virtual void reset() override; + virtual void reuse() override; + int open( + const common::ObIArray &ranges, + const common::ObIArray &skip_scan_ranges); +protected: + virtual int inner_get_next_row(blocksstable::ObDatumRow &row) override; + virtual int inner_get_next_rows() override; +private: + int64_t cur_range_idx_; + const ObIArray *ranges_; + const ObIArray *skip_scan_ranges_; +}; + +} +} + +#endif // OCEANBASE_STORAGE_MULTIPLE_MULTI_SKIP_SCAN_MERGE_H \ No newline at end of file diff --git a/src/storage/access/ob_multiple_scan_merge.cpp b/src/storage/access/ob_multiple_scan_merge.cpp index ebccee1a1..650f167b3 100644 --- a/src/storage/access/ob_multiple_scan_merge.cpp +++ b/src/storage/access/ob_multiple_scan_merge.cpp @@ -200,7 +200,6 @@ int ObMultipleScanMerge::construct_iters() void ObMultipleScanMerge::reset() { - ObMultipleMerge::reset(); if (nullptr != access_ctx_ && nullptr != access_ctx_->stmt_allocator_) { if (nullptr != simple_merge_) { simple_merge_->~ObScanSimpleMerger(); @@ -219,6 +218,7 @@ void ObMultipleScanMerge::reset() consumer_cnt_ = 0; range_ = NULL; cow_range_.reset(); + ObMultipleMerge::reset(); } void ObMultipleScanMerge::reuse() diff --git a/src/storage/access/ob_multiple_skip_scan_merge.cpp b/src/storage/access/ob_multiple_skip_scan_merge.cpp new file mode 100644 index 000000000..58559cb70 --- /dev/null +++ b/src/storage/access/ob_multiple_skip_scan_merge.cpp @@ -0,0 +1,442 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "ob_multiple_skip_scan_merge.h" +#include "lib/oblog/ob_log_module.h" + +namespace oceanbase +{ +namespace storage +{ + +ObMultipleSkipScanMerge::ObMultipleSkipScanMerge() + : ObMultipleScanMerge(), + state_(SCAN_ROWKEY), + schema_rowkey_cnt_(0), + ss_rowkey_prefix_cnt_(0), + scan_rowkey_range_(), + scan_rows_range_(), + datums_cnt_(0), + datums_(nullptr), + range_allocator_("SKIP_SCAN") +{ +} + +ObMultipleSkipScanMerge::~ObMultipleSkipScanMerge() +{ + reset(); +} + +int ObMultipleSkipScanMerge::init( + const ObTableAccessParam ¶m, + ObTableAccessContext &context, + const ObGetTableParam &get_table_param) +{ + int ret = OB_SUCCESS; + context.range_allocator_ = &range_allocator_; + if (OB_FAIL(ObMultipleScanMerge::init(param, context, get_table_param))) { + STORAGE_LOG(WARN, "Fail to init ObMultipleScanMerge", K(ret), K(param), K(context), K(get_table_param)); + } else { + // prepare ranges for finding distinct rowkey prefix and outputing rows + void *buf = nullptr; + schema_rowkey_cnt_ = param.iter_param_.get_schema_rowkey_count(); + ss_rowkey_prefix_cnt_ = param.iter_param_.get_ss_rowkey_prefix_cnt(); + datums_cnt_ = SKIP_SCAN_ROWKEY_DATUMS_ARRAY_CNT * schema_rowkey_cnt_; + if (schema_rowkey_cnt_ <= 0 || ss_rowkey_prefix_cnt_ <= 0 || ss_rowkey_prefix_cnt_ > schema_rowkey_cnt_) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid argument", K(ret), K(schema_rowkey_cnt_), K(ss_rowkey_prefix_cnt_)); + } else if (OB_ISNULL(buf = context.stmt_allocator_->alloc(sizeof(ObStorageDatum) * datums_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "Fail to alloc datums", K(ret), K(datums_cnt_)); + } else if (FALSE_IT(datums_ = new (buf) ObStorageDatum[datums_cnt_])) { + } else if (OB_FAIL(prepare_range(start_key_of_scan_rowkey_range(), scan_rowkey_range_))) { + STORAGE_LOG(WARN, "Fail to prepare distinct scan range", K(ret)); + } else if (OB_FAIL(prepare_range(start_key_of_scan_rows_range(), scan_rows_range_))) { + STORAGE_LOG(WARN, "Fail to prepare skip scan range", K(ret)); + } else { + STORAGE_LOG(DEBUG, "success to init ObMultipleSkipScanMerge", K(param), K(context), K(get_table_param), + K(schema_rowkey_cnt_), K(ss_rowkey_prefix_cnt_)); + } + } + return ret; +} + +void ObMultipleSkipScanMerge::reset() +{ + state_ = SCAN_ROWKEY; + schema_rowkey_cnt_ = 0; + ss_rowkey_prefix_cnt_ = 0; + scan_rowkey_range_.reset(); + scan_rows_range_.reset(); + datums_cnt_ = 0; + if (OB_NOT_NULL(datums_) && OB_NOT_NULL(access_ctx_->stmt_allocator_)) { + access_ctx_->stmt_allocator_->free(datums_); + } + datums_ = nullptr; + range_allocator_.reset(); + ObMultipleScanMerge::reset(); +} + +void ObMultipleSkipScanMerge::reuse() +{ + state_ = SCAN_ROWKEY; + reuse_datums(); + range_allocator_.reuse(); + ObMultipleScanMerge::reuse(); +} + +// range: the original key range to scan rows +// skip_scan_range: the key range only contains suffix columns in rowkey +int ObMultipleSkipScanMerge::open(const blocksstable::ObDatumRange &range, const blocksstable::ObDatumRange &skip_scan_range) +{ + int ret = OB_SUCCESS; + bool exceeded = false; + const int64_t skip_range_datum_cnt = schema_rowkey_cnt_ - ss_rowkey_prefix_cnt_; + if (skip_scan_range.is_whole_range()) { + ret = OB_NOT_SUPPORTED; + STORAGE_LOG(WARN, "not supported index skip scan plan", K(ret)); + } else if (skip_scan_range.start_key_.get_datum_cnt() > skip_range_datum_cnt || + (!skip_scan_range.start_key_.is_min_rowkey() && skip_scan_range.start_key_.get_datum_cnt() != skip_range_datum_cnt) || + (!skip_scan_range.end_key_.is_max_rowkey() && skip_scan_range.end_key_.get_datum_cnt() != skip_range_datum_cnt)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid skip scan range", K(ret), K(skip_scan_range), K(schema_rowkey_cnt_), K(ss_rowkey_prefix_cnt_)); + } else if (OB_FAIL(ObMultipleScanMerge::open(range))) { + STORAGE_LOG(WARN, "Fail to open ObMultipleScanMerge", K(ret), K(range)); + } else { + prepare_rowkey(start_key_of_scan_rowkey_range(), range.start_key_, schema_rowkey_cnt_, true); + prepare_rowkey(end_key_of_scan_rowkey_range(), range.end_key_, schema_rowkey_cnt_, false); + scan_rowkey_range_.set_border_flag(range.get_border_flag()); + // generate key range for outputing rows + for (int64_t i = 0; i < ss_rowkey_prefix_cnt_; ++i) { + start_key_of_scan_rows_range()[i].set_min(); + end_key_of_scan_rows_range()[i].set_max(); + } + prepare_rowkey(start_key_of_scan_rows_range() + ss_rowkey_prefix_cnt_, + skip_scan_range.start_key_, + schema_rowkey_cnt_ - ss_rowkey_prefix_cnt_, + true); + prepare_rowkey(end_key_of_scan_rows_range() + ss_rowkey_prefix_cnt_, + skip_scan_range.end_key_, + schema_rowkey_cnt_ - ss_rowkey_prefix_cnt_, + false); + scan_rows_range_.set_border_flag(skip_scan_range.get_border_flag()); + STORAGE_LOG(TRACE, "open skip scan", K(schema_rowkey_cnt_), K(ss_rowkey_prefix_cnt_), + K(scan_rows_range_), K(range), K(skip_scan_range)); + } + return ret; +} + +int ObMultipleSkipScanMerge::inner_get_next_row(blocksstable::ObDatumRow &row) +{ + int ret = OB_SUCCESS; + bool got_row = false; + while (OB_SUCC(ret) && !got_row) { + switch(state_) { + case SCAN_ROWKEY: { + // get next rowkey + // after get next rowkey, update and open scan rows range + if (OB_FAIL(ObMultipleScanMerge::inner_get_next_row(row))) { + if (OB_UNLIKELY(OB_ITER_END != ret && OB_PUSHDOWN_STATUS_CHANGED != ret)) { + STORAGE_LOG(WARN, "Fail to get next row", K(ret)); + } else if (OB_PUSHDOWN_STATUS_CHANGED == ret) { + } else { + state_ = SCAN_FINISHED; + } + } else { + state_ = UPDATE_SCAN_ROWS_RANGE; + } + break; + } + case UPDATE_SCAN_ROWS_RANGE: { + if (OB_FAIL(update_scan_rows_range(row))) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + state_ = UPDATE_SCAN_ROWKEY_RANGE; + } else { + STORAGE_LOG(WARN, "Fail to update scan rows range", K(ret), K(row)); + } + } else { + STORAGE_LOG(DEBUG, "skip scan update scan rows range", K(row), K(scan_rows_range_)); + state_ = SCAN_ROWS; + } + break; + } + case SCAN_ROWS: { + // get next row + // after get next row, update and open scan rowkey range + if (OB_FAIL(ObMultipleScanMerge::inner_get_next_row(row))) { + if (OB_UNLIKELY(OB_ITER_END != ret && OB_PUSHDOWN_STATUS_CHANGED != ret)) { + STORAGE_LOG(WARN, "Fail to get next row", K(ret)); + } else if (OB_PUSHDOWN_STATUS_CHANGED == ret) { + } else { + ret = OB_SUCCESS; + state_ = UPDATE_SCAN_ROWKEY_RANGE; + } + } else { + STORAGE_LOG(DEBUG, "skip scan get next row", K(row)); + got_row = true; + } + break; + } + case UPDATE_SCAN_ROWKEY_RANGE: { + if (OB_FAIL(update_scan_rowkey_range())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + STORAGE_LOG(WARN, "Fail to update scan rowkey range", K(ret), K(row)); + } else { + state_ = SCAN_FINISHED; + } + } else { + STORAGE_LOG(DEBUG, "skip scan update scan rowkey range", K(row), K(scan_rowkey_range_)); + state_ = SCAN_ROWKEY; + } + break; + } + case SCAN_FINISHED: { + ret = OB_ITER_END; + break; + } + default : { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected state", K(state_)); + } + } + } + return ret; +} + +int ObMultipleSkipScanMerge::inner_get_next_rows() +{ + int ret = OB_SUCCESS; + bool end_loop = false; + if (SCAN_ROWS != state_) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected state", K(ret), K(state_)); + } else { + while (OB_SUCC(ret) && !end_loop) { + switch(state_) { + case SCAN_ROWKEY: { + if (OB_FAIL(ObMultipleScanMerge::inner_get_next_row(unprojected_row_))) { + if (OB_UNLIKELY(OB_ITER_END != ret && OB_PUSHDOWN_STATUS_CHANGED != ret)) { + STORAGE_LOG(WARN, "Fail to get next row", K(ret)); + } else if (OB_PUSHDOWN_STATUS_CHANGED == ret) { + } else { + state_ = SCAN_FINISHED; + } + } else { + state_ = UPDATE_SCAN_ROWS_RANGE; + } + break; + } + case UPDATE_SCAN_ROWS_RANGE: { + if (OB_FAIL(update_scan_rows_range(unprojected_row_))) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + state_ = UPDATE_SCAN_ROWKEY_RANGE; + } else { + STORAGE_LOG(WARN, "Fail to update scan rows range", K(ret), K(unprojected_row_)); + } + } else { + STORAGE_LOG(DEBUG, "skip scan update scan rows range", K(unprojected_row_), K(scan_rows_range_)); + state_ = SCAN_ROWS; + } + break; + } + case SCAN_ROWS: { + bool can_batch = false; + if (OB_FAIL(can_batch_scan(can_batch))) { + STORAGE_LOG(WARN, "Fail to check can batch scan", K(ret)); + } else if (!can_batch) { + end_loop = true; + ret = OB_PUSHDOWN_STATUS_CHANGED; + } else if (OB_FAIL(ObMultipleScanMerge::inner_get_next_rows())) { + if (OB_UNLIKELY(OB_ITER_END != ret && OB_PUSHDOWN_STATUS_CHANGED != ret)) { + STORAGE_LOG(WARN, "Fail to get next rows", K(ret)); + } else if (OB_PUSHDOWN_STATUS_CHANGED == ret) { + } else { + ret = OB_SUCCESS; + state_ = UPDATE_SCAN_ROWKEY_RANGE; + } + } else { + end_loop = true; + } + break; + } + case UPDATE_SCAN_ROWKEY_RANGE: { + if (OB_FAIL(update_scan_rowkey_range())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + STORAGE_LOG(WARN, "Fail to update scan rowkey range", K(ret), K(unprojected_row_)); + } else { + state_ = SCAN_FINISHED; + } + } else { + STORAGE_LOG(DEBUG, "skip scan update scan rowkey range", K(unprojected_row_), K(scan_rowkey_range_)); + state_ = SCAN_ROWKEY; + } + break; + } + case SCAN_FINISHED: { + ret = OB_ITER_END; + break; + } + default : { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected state", K(state_)); + } + } + } + } + return ret; +} + +int ObMultipleSkipScanMerge::can_batch_scan(bool &can_batch) +{ + int ret = OB_SUCCESS; + can_batch = (state_ == SCAN_ROWS); + if (can_batch && OB_FAIL(ObMultipleScanMerge::can_batch_scan(can_batch))) { + STORAGE_LOG(WARN, "Fail to check can batch scan", K(ret)); + } + return ret; +} + +int ObMultipleSkipScanMerge::prepare_range(ObStorageDatum *datums, ObDatumRange &range) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(range.start_key_.assign(datums, schema_rowkey_cnt_))) { + STORAGE_LOG(WARN, "Fail to assign start key", K(ret), K(schema_rowkey_cnt_)); + } else if (OB_FAIL(range.end_key_.assign(datums + schema_rowkey_cnt_, schema_rowkey_cnt_))) { + STORAGE_LOG(WARN, "Fail to assign end key", K(ret), K(schema_rowkey_cnt_)); + } + return ret; +} + +void ObMultipleSkipScanMerge::prepare_rowkey( + blocksstable::ObStorageDatum *datums, + const blocksstable::ObDatumRowkey &rowkey, + const int64_t datum_cnt, + const bool is_min) +{ + for (int64_t i = 0; i < datum_cnt; ++i) { + if (i < rowkey.get_datum_cnt()) { + datums[i] = rowkey.get_datum(i); + } else if (is_min) { + datums[i].set_min(); + } else { + datums[i].set_max(); + } + } +} + +int ObMultipleSkipScanMerge::update_scan_rows_range(blocksstable::ObDatumRow &row) +{ + int ret = OB_SUCCESS; + range_allocator_.reuse(); + for (int64_t i = 0; OB_SUCC(ret) && i < ss_rowkey_prefix_cnt_; ++i) { + ObStorageDatum &prefix_of_start_key = start_key_of_scan_rows_range()[i]; + ObStorageDatum &prefix_of_end_key = end_key_of_scan_rows_range()[i]; + prefix_of_start_key.reuse(); + prefix_of_end_key.reuse(); + if (OB_FAIL(prefix_of_start_key.deep_copy(row.storage_datums_[i], range_allocator_))) { + STORAGE_LOG(WARN, "Fail to deep copy start key's datum", K(ret), K(i), K(row), K(scan_rows_range_)); + } else if (OB_FAIL(prefix_of_end_key.deep_copy(row.storage_datums_[i], range_allocator_))) { + STORAGE_LOG(WARN, "Fail to deep copy end key's datum", K(ret), K(i), K(row), K(scan_rows_range_)); + } + } + if (OB_SUCC(ret)) { + // check current skip scan range may exceed original range? + // one case is in parallel execution, splitted range + bool exceeded = false; + if (OB_FAIL(check_range_exceeded(exceeded))) { + STORAGE_LOG(WARN, "Fail to check range exceed", K(ret)); + } else if (exceeded) { + ret = OB_ITER_END; + } + } + + if (OB_SUCC(ret)) { + ObMultipleScanMerge::reuse(); + const ObColDescIArray *col_descs = nullptr; + if (OB_ISNULL(col_descs = access_param_->iter_param_.get_out_col_descs())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "Unexpected null out cols", K(ret)); + } else if (OB_FAIL(scan_rows_range_.prepare_memtable_readable(*col_descs, range_allocator_))) { + STORAGE_LOG(WARN, "Fail to transfer store rowkey", K(ret), K(scan_rows_range_)); + } else if (OB_FAIL(ObMultipleScanMerge::open(scan_rows_range_))) { + STORAGE_LOG(WARN, "Fail to open scan rows range", K(ret), K(scan_rows_range_)); + } + } + STORAGE_LOG(TRACE, "Update and open scan rows range", K(ret), K(row), K(scan_rows_range_)); + return ret; +} + +int ObMultipleSkipScanMerge::update_scan_rowkey_range() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!scan_rows_range_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected scan rows range", K(ret), K(scan_rows_range_)); + } else { + ObStorageDatum *rowkey_datums = access_ctx_->query_flag_.is_reverse_scan() ? + end_key_of_scan_rowkey_range() : + start_key_of_scan_rowkey_range(); + for (int64_t i = 0; OB_SUCC(ret) && i < ss_rowkey_prefix_cnt_; ++i) { + if (OB_FAIL(rowkey_datums[i].deep_copy(scan_rows_range_.start_key_.get_datum(i), range_allocator_))) { + STORAGE_LOG(WARN, "Fail to deep copy start key's datum", K(ret), K(i), K(scan_rowkey_range_)); + } + } + if (OB_SUCC(ret)) { + for (int64_t i = ss_rowkey_prefix_cnt_; i < schema_rowkey_cnt_; ++i) { + access_ctx_->query_flag_.is_reverse_scan() ? rowkey_datums[i].set_min() : rowkey_datums[i].set_max(); + } + access_ctx_->query_flag_.is_reverse_scan() ? scan_rowkey_range_.set_right_open() : scan_rowkey_range_.set_left_open(); + + int cmp_ret = 0; + const ObStorageDatumUtils &datum_utils = access_param_->iter_param_.get_read_info()->get_datum_utils(); + if (OB_FAIL(scan_rowkey_range_.start_key_.compare(scan_rowkey_range_.end_key_, datum_utils, cmp_ret))) { + STORAGE_LOG(WARN, "Fail to compare", K(ret), K(scan_rowkey_range_)); + } else if (cmp_ret >= 0) { + ret = OB_ITER_END; + } else { + ObMultipleScanMerge::reuse(); + const ObColDescIArray *col_descs = nullptr; + if (OB_ISNULL(col_descs = access_param_->iter_param_.get_out_col_descs())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "Unexpected null out cols", K(ret)); + } else if (OB_FAIL(scan_rowkey_range_.prepare_memtable_readable(*col_descs, range_allocator_))) { + STORAGE_LOG(WARN, "Fail to transfer store rowkey", K(ret), K(scan_rowkey_range_)); + } else if (OB_FAIL(ObMultipleScanMerge::open(scan_rowkey_range_))) { + STORAGE_LOG(WARN, "Fail to open scan rowkey range", K(ret), K(scan_rowkey_range_)); + } + } + } + } + STORAGE_LOG(TRACE, "Update and open scan rowkey range", K(ret), K(scan_rows_range_), K(scan_rowkey_range_)); + return ret; +} + +int ObMultipleSkipScanMerge::check_range_exceeded(bool &exceeded) +{ + int ret = OB_SUCCESS; + int cmp_ret = 0; + exceeded = false; + const ObStorageDatumUtils &datum_utils = access_param_->iter_param_.get_read_info()->get_datum_utils(); + if (OB_FAIL(scan_rows_range_.end_key_.compare(scan_rowkey_range_.start_key_, datum_utils, cmp_ret))) { + STORAGE_LOG(WARN, "Fail to compare", K(ret), K(scan_rows_range_.end_key_), K(scan_rowkey_range_.start_key_)); + } else if (cmp_ret < 0 || (0 == cmp_ret && (scan_rows_range_.is_right_open() || scan_rowkey_range_.is_left_open()))) { + exceeded = true; + } else if (OB_FAIL(scan_rows_range_.start_key_.compare(scan_rowkey_range_.end_key_, datum_utils, cmp_ret))) { + STORAGE_LOG(WARN, "Fail to compare", K(ret), K(scan_rows_range_.end_key_), K(scan_rowkey_range_.start_key_)); + } else if (cmp_ret > 0 || (0 == cmp_ret && (scan_rows_range_.is_left_open() || scan_rowkey_range_.is_right_open()))) { + exceeded = true; + } + return ret; +} + +} +} \ No newline at end of file diff --git a/src/storage/access/ob_multiple_skip_scan_merge.h b/src/storage/access/ob_multiple_skip_scan_merge.h new file mode 100644 index 000000000..dd9451755 --- /dev/null +++ b/src/storage/access/ob_multiple_skip_scan_merge.h @@ -0,0 +1,89 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_STORAGE_MULTIPLE_SKIP_SCAN_MERGE_ +#define OCEANBASE_STORAGE_MULTIPLE_SKIP_SCAN_MERGE_ + +#include "ob_multiple_scan_merge.h" +#include "storage/blocksstable/ob_datum_row.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace oceanbase::blocksstable; + +class ObMultipleSkipScanMerge : public ObMultipleScanMerge +{ +public: + ObMultipleSkipScanMerge(); + virtual ~ObMultipleSkipScanMerge(); + virtual int init( + const ObTableAccessParam ¶m, + ObTableAccessContext &context, + const ObGetTableParam &get_table_param) override; + virtual void reset() override; + virtual void reuse() override; + int open(const blocksstable::ObDatumRange &range) { return OB_NOT_SUPPORTED; } + int open(const blocksstable::ObDatumRange &range, const blocksstable::ObDatumRange &skip_scan_range); +protected: + virtual int inner_get_next_row(blocksstable::ObDatumRow &row) override; + virtual int inner_get_next_rows() override; + virtual int can_batch_scan(bool &can_batch) override; +private: + const static int32_t START_KEY_OFFSET_OF_SCAN_ROWKEY_RANGE = 0; + const static int32_t END_KEY_OFFSET_OF_SCAN_ROWKEY_RANGE = 1; + const static int32_t START_KEY_OFFSET_OF_SCAN_ROWS_RANGE = 2; + const static int32_t END_KEY_OFFSET_OF_SCAN_ROWS_RANGE = 3; + const static int32_t SKIP_SCAN_ROWKEY_DATUMS_ARRAY_CNT = 4; + int prepare_range(blocksstable::ObStorageDatum *datums, blocksstable::ObDatumRange &range); + void prepare_rowkey(blocksstable::ObStorageDatum *datums, const blocksstable::ObDatumRowkey &rowkey, + const int64_t datum_cnt, const bool is_min); + int update_scan_rowkey_range(); + int update_scan_rows_range(blocksstable::ObDatumRow &row); + int check_range_exceeded(bool &exceeded); + OB_INLINE blocksstable::ObStorageDatum* start_key_of_scan_rowkey_range() + { return datums_ + START_KEY_OFFSET_OF_SCAN_ROWKEY_RANGE * schema_rowkey_cnt_; } + OB_INLINE blocksstable::ObStorageDatum* end_key_of_scan_rowkey_range() + { return datums_ + END_KEY_OFFSET_OF_SCAN_ROWKEY_RANGE * schema_rowkey_cnt_; } + OB_INLINE blocksstable::ObStorageDatum* start_key_of_scan_rows_range() + { return datums_ + START_KEY_OFFSET_OF_SCAN_ROWS_RANGE * schema_rowkey_cnt_; } + OB_INLINE blocksstable::ObStorageDatum* end_key_of_scan_rows_range() + { return datums_ + END_KEY_OFFSET_OF_SCAN_ROWS_RANGE * schema_rowkey_cnt_; } + OB_INLINE void reuse_datums() + { + if (OB_NOT_NULL(datums_)) { + for (int64_t i = 0; i < datums_cnt_; ++i) { + datums_[i].reuse(); + } + } + } + + enum SkipScanState { + SCAN_ROWKEY, + UPDATE_SCAN_ROWS_RANGE, + SCAN_ROWS, + UPDATE_SCAN_ROWKEY_RANGE, + SCAN_FINISHED, + }; + SkipScanState state_; + int64_t schema_rowkey_cnt_; + int64_t ss_rowkey_prefix_cnt_; + blocksstable::ObDatumRange scan_rowkey_range_; + blocksstable::ObDatumRange scan_rows_range_; + int64_t datums_cnt_; + blocksstable::ObStorageDatum *datums_; + common::ObArenaAllocator range_allocator_; +}; + +} +} + +#endif // OCEANBASE_STORAGE_MULTIPLE_SKIP_SCAN_MERGE_ \ No newline at end of file diff --git a/src/storage/access/ob_single_merge.cpp b/src/storage/access/ob_single_merge.cpp index 4abefa7f2..8b7263ca3 100644 --- a/src/storage/access/ob_single_merge.cpp +++ b/src/storage/access/ob_single_merge.cpp @@ -26,7 +26,7 @@ namespace storage { ObSingleMerge::ObSingleMerge() - : rowkey_(NULL), fuse_row_cache_fetcher_() + : rowkey_(NULL), full_row_(), handle_(), fuse_row_cache_fetcher_() { type_ = ObQRIterType::T_SINGLE_GET; } @@ -46,7 +46,15 @@ int ObSingleMerge::open(const ObDatumRowkey &rowkey) LOG_WARN("ObSingleMerge has not been inited", K(ret), K_(get_table_param)); } else { const ObTabletMeta &tablet_meta = get_table_param_.tablet_iter_.tablet_handle_.get_obj()->get_tablet_meta(); - if (OB_FAIL(fuse_row_cache_fetcher_.init(access_param_->iter_param_.tablet_id_, access_param_->iter_param_.get_read_info(), tablet_meta.clog_checkpoint_scn_.get_val_for_tx()))) { + if (!full_row_.is_valid()) { + if (OB_FAIL(full_row_.init(*access_ctx_->stmt_allocator_, access_param_->get_max_out_col_cnt()))) { + STORAGE_LOG(WARN, "Failed to init datum row", K(ret)); + } else { + full_row_.count_ = access_param_->get_max_out_col_cnt(); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(fuse_row_cache_fetcher_.init(access_param_->iter_param_.tablet_id_, access_param_->iter_param_.get_read_info(), tablet_meta.clog_checkpoint_scn_.get_val_for_tx()))) { STORAGE_LOG(WARN, "fail to init fuse row cache fetcher", K(ret)); } else { rowkey_ = &rowkey; @@ -60,12 +68,14 @@ void ObSingleMerge::reset() { ObMultipleMerge::reset(); rowkey_ = nullptr; + full_row_.reset(); handle_.reset(); } void ObSingleMerge::reuse() { ObMultipleMerge::reuse(); + full_row_.row_flag_.reset(); rowkey_ = NULL; handle_.reset(); } diff --git a/src/storage/access/ob_single_merge.h b/src/storage/access/ob_single_merge.h index b1e2d3d38..93cad33ac 100644 --- a/src/storage/access/ob_single_merge.h +++ b/src/storage/access/ob_single_merge.h @@ -46,11 +46,11 @@ private: bool &final_result, bool &have_uncommited_row, bool &need_update_fuse_cache); -private: - const blocksstable::ObDatumRowkey *rowkey_; - blocksstable::ObFuseRowValueHandle handle_; private: static const int64_t SINGLE_GET_FUSE_ROW_CACHE_PUT_COUNT_THRESHOLD = 50; + const blocksstable::ObDatumRowkey *rowkey_; + blocksstable::ObDatumRow full_row_; + blocksstable::ObFuseRowValueHandle handle_; ObFuseRowCacheFetcher fuse_row_cache_fetcher_; // disallow copy DISALLOW_COPY_AND_ASSIGN(ObSingleMerge); diff --git a/src/storage/access/ob_sstable_multi_version_row_iterator.cpp b/src/storage/access/ob_sstable_multi_version_row_iterator.cpp index a7f4898d5..feef868d3 100644 --- a/src/storage/access/ob_sstable_multi_version_row_iterator.cpp +++ b/src/storage/access/ob_sstable_multi_version_row_iterator.cpp @@ -73,12 +73,12 @@ int ObSSTableMultiVersionRowGetter::inner_open( LOG_WARN("invalid argument", K(ret), KP(query_range), KP(table)); } else { base_rowkey_ = static_cast(query_range); - if (OB_FAIL(base_rowkey_->to_multi_version_range(*access_ctx.allocator_, multi_version_range_))) { + if (OB_FAIL(base_rowkey_->to_multi_version_range(*access_ctx.get_range_allocator(), multi_version_range_))) { STORAGE_LOG(WARN, "Failed to transfer multi version range", K(ret), KPC_(base_rowkey)); } else if (OB_FAIL(ObSSTableRowScanner::inner_open( iter_param, access_ctx, table, &multi_version_range_))) { LOG_WARN("failed to open scanner", K(ret)); - } else if (OB_FAIL(not_exist_row_.init(*access_ctx.allocator_, iter_param.get_out_col_cnt()))) { + } else if (OB_FAIL(not_exist_row_.init(*access_ctx.get_range_allocator(), iter_param.get_out_col_cnt()))) { LOG_WARN("fail to init datum row", K(ret)); } else { not_exist_row_.row_flag_.set_flag(ObDmlFlag::DF_NOT_EXIST); @@ -143,7 +143,7 @@ int ObSSTableMultiVersionRowScanner::inner_open( ObSSTable *sstable = static_cast(table); base_range_ = static_cast(query_range); trans_version_range_ = access_ctx.trans_version_range_; - if (OB_FAIL(base_range_->to_multi_version_range(*access_ctx.allocator_, multi_version_range_))) { + if (OB_FAIL(base_range_->to_multi_version_range(*access_ctx.get_range_allocator(), multi_version_range_))) { STORAGE_LOG(WARN, "Failed to transfer multi version range", K(ret), KPC(base_range_)); } else if (OB_FAIL(ObSSTableRowScanner::inner_open(iter_param, access_ctx, table, &multi_version_range_))) { LOG_WARN("failed to open scanner", K(ret)); @@ -192,7 +192,7 @@ int ObSSTableMultiVersionRowMultiGetter::inner_open( ObDatumRange tmp_multi_version_range; for (int i = 0; OB_SUCC(ret) && i < base_rowkeys_->count(); i++) { tmp_multi_version_range.reset(); - if (OB_FAIL(base_rowkeys_->at(i).to_multi_version_range(*access_ctx.allocator_, tmp_multi_version_range))) { + if (OB_FAIL(base_rowkeys_->at(i).to_multi_version_range(*access_ctx.get_range_allocator(), tmp_multi_version_range))) { STORAGE_LOG(WARN, "Failed to transfer multi version range", K(ret), K(i), K(base_rowkeys_->at(i))); } else if (OB_FAIL(multi_version_ranges_.push_back(tmp_multi_version_range))) { LOG_WARN("push back multi version range failed", K(ret)); @@ -202,7 +202,7 @@ int ObSSTableMultiVersionRowMultiGetter::inner_open( } else if (OB_FAIL(ObSSTableRowMultiScanner::inner_open( iter_param, access_ctx, table, &multi_version_ranges_))) { LOG_WARN("failed to open multi scanner", K(ret)); - } else if (OB_FAIL(not_exist_row_.init(*access_ctx.allocator_, iter_param.get_out_col_cnt()))) { + } else if (OB_FAIL(not_exist_row_.init(*access_ctx.get_range_allocator(), iter_param.get_out_col_cnt()))) { LOG_WARN("fail to init datum row", K(ret)); } else { not_exist_row_.row_flag_.set_flag(ObDmlFlag::DF_NOT_EXIST); @@ -303,7 +303,7 @@ int ObSSTableMultiVersionRowMultiScanner::inner_open( ObDatumRange tmp_multi_version_range; for (int i = 0; OB_SUCC(ret) && i < base_ranges->count(); i++) { tmp_multi_version_range.reset(); - if (OB_FAIL(base_ranges->at(i).to_multi_version_range(*access_ctx.allocator_, tmp_multi_version_range))) { + if (OB_FAIL(base_ranges->at(i).to_multi_version_range(*access_ctx.get_range_allocator(), tmp_multi_version_range))) { STORAGE_LOG(WARN, "Failed to transfer multi version range", K(ret), K(i), K(base_ranges->at(i))); } else if (OB_FAIL(multi_version_ranges_.push_back(tmp_multi_version_range))) { LOG_WARN("push back multi version range failed", K(ret)); diff --git a/src/storage/access/ob_sstable_row_multi_exister.cpp b/src/storage/access/ob_sstable_row_multi_exister.cpp index 1163a815d..47b9bade8 100644 --- a/src/storage/access/ob_sstable_row_multi_exister.cpp +++ b/src/storage/access/ob_sstable_row_multi_exister.cpp @@ -84,7 +84,6 @@ int ObSSTableRowMultiExister::exist_row(ObSSTableReadHandle &read_handle, ObDatu } if (OB_SUCC(ret)) { store_row.scan_index_ = read_handle.range_idx_; - ++prefetcher_.cur_range_fetch_idx_; LOG_DEBUG("get exist row", K(read_handle.row_state_), K(*read_handle.rowkey_), KP(this)); } return ret; @@ -93,59 +92,51 @@ int ObSSTableRowMultiExister::exist_row(ObSSTableReadHandle &read_handle, ObDatu int ObSSTableRowMultiExister::exist_block_row(ObSSTableReadHandle &read_handle, ObDatumRow &store_row) { int ret = OB_SUCCESS; - if (-1 == read_handle.micro_begin_idx_) { - store_row.row_flag_.set_flag(ObDmlFlag::DF_NOT_EXIST); - } else if (read_handle.micro_begin_idx_ >= prefetcher_.micro_data_prefetch_idx_) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected reader micro idx", K(ret), K(prefetcher_), K(read_handle)); - } else { - prefetcher_.cur_micro_data_fetch_idx_ = read_handle.micro_begin_idx_; - read_handle.micro_handle_ = &prefetcher_.current_micro_handle(); - if (nullptr == micro_exister_) { - if (nullptr == (micro_exister_ = OB_NEWx(ObMicroBlockRowExister, access_ctx_->allocator_))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Fail to allocate micro exister, ", K(ret)); - } else if (OB_FAIL(micro_exister_->init(*iter_param_, *access_ctx_, sstable_))) { - LOG_WARN("Fail to init micro exister, ", K(ret)); - } + read_handle.micro_handle_ = &prefetcher_.current_micro_handle(); + if (nullptr == micro_exister_) { + if (nullptr == (micro_exister_ = OB_NEWx(ObMicroBlockRowExister, access_ctx_->allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Fail to allocate micro exister, ", K(ret)); + } else if (OB_FAIL(micro_exister_->init(*iter_param_, *access_ctx_, sstable_))) { + LOG_WARN("Fail to init micro exister, ", K(ret)); } + } - if (OB_SUCC(ret)) { - bool exist = false; - bool found = false; - ObMicroBlockData block_data; - if (OB_FAIL(read_handle.get_block_data(macro_block_reader_, block_data))) { - LOG_WARN("Fail to get block data", K(ret), K(read_handle)); - } else if (OB_FAIL(micro_exister_->is_exist( - *read_handle.rowkey_, - block_data, - exist, - found))) { - LOG_WARN("Fail to get row", K(ret)); - } else { - if (!found) { - store_row.row_flag_.set_flag(ObDmlFlag::DF_NOT_EXIST); - if (!access_ctx_->query_flag_.is_index_back() && access_ctx_->query_flag_.is_use_bloomfilter_cache() && !sstable_->is_small_sstable()) { - (void) OB_STORE_CACHE.get_bf_cache().inc_empty_read( - MTL_ID(), - iter_param_->table_id_, - read_handle.micro_handle_->macro_block_id_, - read_handle.rowkey_->get_datum_cnt()); - if (read_handle.is_bf_contain_) { - ++access_ctx_->table_store_stat_.bf_empty_read_cnt_; - } + if (OB_SUCC(ret)) { + bool exist = false; + bool found = false; + ObMicroBlockData block_data; + if (OB_FAIL(read_handle.get_block_data(macro_block_reader_, block_data))) { + LOG_WARN("Fail to get block data", K(ret), K(read_handle)); + } else if (OB_FAIL(micro_exister_->is_exist( + *read_handle.rowkey_, + block_data, + exist, + found))) { + LOG_WARN("Fail to get row", K(ret)); + } else { + if (!found) { + store_row.row_flag_.set_flag(ObDmlFlag::DF_NOT_EXIST); + if (!access_ctx_->query_flag_.is_index_back() && access_ctx_->query_flag_.is_use_bloomfilter_cache() && !sstable_->is_small_sstable()) { + (void) OB_STORE_CACHE.get_bf_cache().inc_empty_read( + MTL_ID(), + iter_param_->table_id_, + read_handle.micro_handle_->macro_block_id_, + read_handle.rowkey_->get_datum_cnt()); + if (read_handle.is_bf_contain_) { + ++access_ctx_->table_store_stat_.bf_empty_read_cnt_; } - ++access_ctx_->table_store_stat_.exist_row_.empty_read_cnt_; - EVENT_INC(ObStatEventIds::EXIST_ROW_EMPTY_READ); - } else { - if (exist) { - store_row.row_flag_.set_flag(ObDmlFlag::DF_UPDATE); - } else { - store_row.row_flag_.set_flag(ObDmlFlag::DF_DELETE); - } - ++access_ctx_->table_store_stat_.exist_row_.effect_read_cnt_; - EVENT_INC(ObStatEventIds::EXIST_ROW_EFFECT_READ); } + ++access_ctx_->table_store_stat_.exist_row_.empty_read_cnt_; + EVENT_INC(ObStatEventIds::EXIST_ROW_EMPTY_READ); + } else { + if (exist) { + store_row.row_flag_.set_flag(ObDmlFlag::DF_UPDATE); + } else { + store_row.row_flag_.set_flag(ObDmlFlag::DF_DELETE); + } + ++access_ctx_->table_store_stat_.exist_row_.effect_read_cnt_; + EVENT_INC(ObStatEventIds::EXIST_ROW_EFFECT_READ); } } } diff --git a/src/storage/access/ob_sstable_row_multi_getter.cpp b/src/storage/access/ob_sstable_row_multi_getter.cpp index 54e780bbb..8862a3ea4 100644 --- a/src/storage/access/ob_sstable_row_multi_getter.cpp +++ b/src/storage/access/ob_sstable_row_multi_getter.cpp @@ -72,7 +72,7 @@ int ObSSTableRowMultiGetter::inner_open( LOG_WARN("fail to switch context for prefetcher, ", K(ret)); } if (OB_SUCC(ret)) { - if (OB_FAIL(prefetcher_.prefetch())) { + if (OB_FAIL(prefetcher_.multi_prefetch())) { LOG_WARN("Fail to prefetch data", K(ret)); } else { is_opened_ = true; @@ -94,29 +94,26 @@ int ObSSTableRowMultiGetter::inner_get_next_row(const blocksstable::ObDatumRow * LOG_WARN("The ObSSTableRowMultiGetter has not been opened", K(ret), KP(this)); } else { while (OB_SUCC(ret)) { - if (OB_FAIL(prefetcher_.prefetch())) { - LOG_WARN("Fail to prefetch micro block", K(ret)); - } else if (prefetcher_.cur_range_fetch_idx_ >= prefetcher_.cur_range_prefetch_idx_) { - if (OB_LIKELY(prefetcher_.is_prefetch_end_)) { + if (OB_FAIL(prefetcher_.multi_prefetch())) { + LOG_WARN("Fail to prefetch micro block", K(ret), K_(prefetcher)); + } else if (prefetcher_.fetch_rowkey_idx_ >= prefetcher_.prefetch_rowkey_idx_) { + if (OB_LIKELY(prefetcher_.is_prefetch_end())) { ret = OB_ITER_END; } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("Current fetch handle idx exceed prefetching idx", K(ret), K_(prefetcher)); } - } else if (!prefetcher_.is_prefetch_end_ && - prefetcher_.cur_range_fetch_idx_ >= prefetcher_.prefetching_range_idx() && - -1 == prefetcher_.current_read_handle().micro_begin_idx_) { + } else if (!prefetcher_.current_read_handle().cur_prefetch_end_) { continue; } else if (OB_FAIL(fetch_row(prefetcher_.current_read_handle(), store_row))) { if (OB_LIKELY(OB_ITER_END == ret)) { - if (prefetcher_.cur_range_fetch_idx_ < prefetcher_.prefetching_range_idx() || prefetcher_.is_prefetch_end_) { - ++prefetcher_.cur_range_fetch_idx_; - } + prefetcher_.mark_cur_rowkey_fetched(prefetcher_.current_read_handle()); ret = OB_SUCCESS; } else { LOG_WARN("Fail to fetch row", K(ret)); } } else { + prefetcher_.mark_cur_rowkey_fetched(prefetcher_.current_read_handle()); break; } } @@ -145,20 +142,10 @@ int ObSSTableRowMultiGetter::fetch_row(ObSSTableReadHandle &read_handle, const b } else if (OB_FAIL(micro_getter_->init(*iter_param_, *access_ctx_, sstable_))) { LOG_WARN("Fail to init micro block row getter", K(ret)); } + //switch context each row due to the cache will be disabled if too many rows getted } else if (OB_FAIL(micro_getter_->switch_context(*iter_param_, *access_ctx_, sstable_))) { STORAGE_LOG(WARN, "Fail to switch context", K(ret)); } - if (OB_SUCC(ret) && ObSSTableRowState::IN_BLOCK == read_handle.row_state_) { - if (-1 == read_handle.micro_begin_idx_) { - read_handle.row_state_ = ObSSTableRowState::NOT_EXIST; - } else if (read_handle.micro_begin_idx_ >= prefetcher_.micro_data_prefetch_idx_) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected reader micro idx", K(ret), K(prefetcher_), K(read_handle)); - } else { - prefetcher_.cur_micro_data_fetch_idx_ = read_handle.micro_begin_idx_; - read_handle.micro_handle_ = &prefetcher_.current_micro_handle(); - } - } if (OB_FAIL(ret)) { } else if (OB_FAIL(micro_getter_->get_row( @@ -166,8 +153,6 @@ int ObSSTableRowMultiGetter::fetch_row(ObSSTableReadHandle &read_handle, const b store_row, macro_block_reader_))) { LOG_WARN("Fail to get row", K(ret)); - } else { - ++prefetcher_.cur_range_fetch_idx_; } return ret; } diff --git a/src/storage/access/ob_sstable_row_multi_getter.h b/src/storage/access/ob_sstable_row_multi_getter.h index ff5dcbe01..3000dc17c 100644 --- a/src/storage/access/ob_sstable_row_multi_getter.h +++ b/src/storage/access/ob_sstable_row_multi_getter.h @@ -51,7 +51,7 @@ protected: ObSSTable *sstable_; const ObTableIterParam *iter_param_; ObTableAccessContext *access_ctx_; - ObIndexTreeMultiPassPrefetcher prefetcher_; + ObIndexTreeMultiPrefetcher prefetcher_; ObMacroBlockReader macro_block_reader_; private: bool is_opened_; diff --git a/src/storage/access/ob_table_access_context.cpp b/src/storage/access/ob_table_access_context.cpp index 96bd38f18..8100f70f8 100644 --- a/src/storage/access/ob_table_access_context.cpp +++ b/src/storage/access/ob_table_access_context.cpp @@ -60,6 +60,7 @@ ObTableAccessContext::ObTableAccessContext() limit_param_(NULL), stmt_allocator_(NULL), allocator_(NULL), + range_allocator_(nullptr), scan_mem_(nullptr), table_scan_stat_(NULL), table_store_stat_(), @@ -144,6 +145,7 @@ int ObTableAccessContext::init(ObTableScanParam &scan_param, } if (OB_SUCC(ret)) { stmt_allocator_ = scan_param.allocator_; + range_allocator_ = nullptr; ls_id_ = scan_param.ls_id_; tablet_id_ = scan_param.tablet_id_; query_flag_ = scan_param.scan_flag_; @@ -190,6 +192,7 @@ int ObTableAccessContext::init(const common::ObQueryFlag &query_flag, timeout_ = ctx.timeout_; allocator_ = &allocator; stmt_allocator_ = &stmt_allocator; + range_allocator_ = nullptr; trans_version_range_ = trans_version_range; ls_id_ = ctx.ls_id_; tablet_id_ = ctx.tablet_id_; @@ -217,6 +220,7 @@ int ObTableAccessContext::init(const common::ObQueryFlag &query_flag, timeout_ = ctx.timeout_; allocator_ = &allocator; stmt_allocator_ = &allocator; + range_allocator_ = nullptr; trans_version_range_ = trans_version_range; ls_id_ = ctx.ls_id_; tablet_id_ = ctx.tablet_id_; @@ -252,6 +256,7 @@ void ObTableAccessContext::reset() scan_mem_ = NULL; } allocator_ = NULL; + range_allocator_ = nullptr; table_scan_stat_ = NULL; table_store_stat_.reset(); out_cnt_ = 0; @@ -284,6 +289,7 @@ void ObTableAccessContext::reuse() scan_mem_->reuse_arena(); } allocator_ = NULL; + range_allocator_ = nullptr; table_scan_stat_ = NULL; out_cnt_ = 0; trans_version_range_.reset(); diff --git a/src/storage/access/ob_table_access_context.h b/src/storage/access/ob_table_access_context.h index 2633c9421..f1a852aa5 100644 --- a/src/storage/access/ob_table_access_context.h +++ b/src/storage/access/ob_table_access_context.h @@ -81,6 +81,9 @@ struct ObTableAccessContext inline bool is_limit_end() const { return (nullptr != limit_param_ && limit_param_->limit_ >= 0 && (out_cnt_ - limit_param_->offset_ >= limit_param_->limit_)); } + inline common::ObIAllocator *get_range_allocator() { + return nullptr == range_allocator_ ? allocator_ : range_allocator_; + } // used for query int init(ObTableScanParam &scan_param, ObStoreCtx &ctx, @@ -107,6 +110,7 @@ struct ObTableAccessContext KP_(limit_param), KP_(stmt_allocator), KP_(allocator), + KP_(range_allocator), KP_(table_scan_stat), K_(out_cnt), K_(trans_version_range), @@ -135,6 +139,8 @@ public: common::ObIAllocator *stmt_allocator_; // storage scan/rescan interface level allocator, will be reclaimed in every scan/rescan call common::ObIAllocator *allocator_; + // scan/rescan level alloctor in storage, will be reclaimed in every reuse/open call + common::ObIAllocator *range_allocator_; lib::MemoryContext scan_mem_; // scan/rescan level memory entity, only for query common::ObTableScanStatistic *table_scan_stat_; ObTableStoreStat table_store_stat_; diff --git a/src/storage/access/ob_table_access_param.cpp b/src/storage/access/ob_table_access_param.cpp index e2e5462dd..7e98a96e1 100644 --- a/src/storage/access/ob_table_access_param.cpp +++ b/src/storage/access/ob_table_access_param.cpp @@ -39,6 +39,7 @@ ObTableIterParam::ObTableIterParam() is_same_schema_column_(false), vectorized_enabled_(false), has_virtual_columns_(false), + ss_rowkey_prefix_cnt_(0), pd_storage_flag_(0) { } @@ -62,6 +63,7 @@ void ObTableIterParam::reset() is_same_schema_column_ = false; pd_storage_flag_ = 0; pushdown_filter_ = nullptr; + ss_rowkey_prefix_cnt_ = 0; vectorized_enabled_ = false; has_virtual_columns_ = false; } @@ -221,6 +223,9 @@ int ObTableAccessParam::init( if (OB_FAIL(iter_param_.check_read_info_valid())) { STORAGE_LOG(WARN, "Failed to check read info valdie", K(ret), K(iter_param_)); + } else if (scan_param.use_index_skip_scan() && + OB_FAIL(get_prefix_cnt_for_skip_scan(scan_param, iter_param_))) { + STORAGE_LOG(WARN, "Failed to get prefix for skip scan", K(ret)); } else { is_inited_ = true; } @@ -229,6 +234,26 @@ int ObTableAccessParam::init( return ret; } +int ObTableAccessParam::get_prefix_cnt_for_skip_scan(const ObTableScanParam &scan_param, ObTableIterParam &iter_param) +{ + int ret = OB_SUCCESS; + const int64_t key_range_count = scan_param.key_ranges_.count(); + const int64_t skip_range_count = scan_param.ss_key_ranges_.count(); + if (OB_UNLIKELY(key_range_count != skip_range_count)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), K(key_range_count), K(skip_range_count)); + } else { + const int64_t prefix = iter_param.get_schema_rowkey_count() - scan_param.ss_key_ranges_.at(0).start_key_.length(); + if (OB_UNLIKELY(prefix <= 0)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), K(prefix), K(scan_param.key_ranges_), K(scan_param.ss_key_ranges_)); + } else { + iter_param.ss_rowkey_prefix_cnt_ = prefix; + } + } + return ret; +} + int ObTableAccessParam::init_merge_param( const uint64_t table_id, const common::ObTabletID &tablet_id, diff --git a/src/storage/access/ob_table_access_param.h b/src/storage/access/ob_table_access_param.h index 273e4193b..9ea2148d8 100644 --- a/src/storage/access/ob_table_access_param.h +++ b/src/storage/access/ob_table_access_param.h @@ -127,6 +127,8 @@ public: } OB_INLINE bool need_fill_group_idx() const { return get_group_idx_col_index() != common::OB_INVALID_INDEX; } + OB_INLINE int64_t get_ss_rowkey_prefix_cnt() const + { return ss_rowkey_prefix_cnt_; } OB_INLINE void disable_blockscan() { pd_blockscan_ = 0; } OB_INLINE bool enable_pd_blockscan() const @@ -156,6 +158,7 @@ public: bool is_same_schema_column_; bool vectorized_enabled_; bool has_virtual_columns_; + int64_t ss_rowkey_prefix_cnt_; union { struct { int32_t pd_blockscan_:1; @@ -187,6 +190,7 @@ public: const ObTableReadInfo &full_read_info, const share::schema::ObTableSchemaParam &schema_param, const common::ObIArray *out_cols_project); + int get_prefix_cnt_for_skip_scan(const ObTableScanParam &scan_param, ObTableIterParam &iter_param); // used for index back when query OB_INLINE int64_t get_out_col_cnt() const { return iter_param_.get_out_col_cnt(); } OB_INLINE int64_t get_max_out_col_cnt() const { return iter_param_.get_max_out_col_cnt(); } diff --git a/src/storage/access/ob_table_scan_iterator.cpp b/src/storage/access/ob_table_scan_iterator.cpp index b45f4ab9c..70af32f06 100644 --- a/src/storage/access/ob_table_scan_iterator.cpp +++ b/src/storage/access/ob_table_scan_iterator.cpp @@ -46,6 +46,7 @@ ObTableScanIterator::ObTableScanIterator() get_merge_(NULL), scan_merge_(NULL), multi_scan_merge_(NULL), + skip_scan_merge_(NULL), row_sample_iterator_(NULL), block_sample_iterator_(NULL), main_table_param_(), @@ -66,30 +67,14 @@ ObTableScanIterator::~ObTableScanIterator() void ObTableScanIterator::reset() { - if (NULL != single_merge_) { - single_merge_->~ObSingleMerge(); - single_merge_ = NULL; - } - if (NULL != get_merge_) { - get_merge_->~ObMultipleGetMerge(); - get_merge_ = NULL; - } - if (NULL != scan_merge_) { - scan_merge_->~ObMultipleScanMerge(); - scan_merge_ = NULL; - } - if (NULL != multi_scan_merge_) { - multi_scan_merge_->~ObMultipleMultiScanMerge(); - multi_scan_merge_ = NULL; - } - if (NULL != row_sample_iterator_) { - row_sample_iterator_->~ObRowSampleIterator(); - row_sample_iterator_ = NULL; - } - if (NULL != block_sample_iterator_) { - block_sample_iterator_->~ObBlockSampleIterator(); - block_sample_iterator_ = NULL; - } + reset_scan_iter(single_merge_); + reset_scan_iter(get_merge_); + reset_scan_iter(scan_merge_); + reset_scan_iter(multi_scan_merge_); + reset_scan_iter(skip_scan_merge_); + reset_scan_iter(row_sample_iterator_); + reset_scan_iter(block_sample_iterator_); + main_table_param_.reset(); main_table_ctx_.reset(); get_table_param_.reset(); @@ -101,26 +86,30 @@ void ObTableScanIterator::reset() is_inited_ = false; } +template +void ObTableScanIterator::reset_scan_iter(T *&iter) +{ + if (NULL != iter) { + iter->~T(); + iter = NULL; + } +} + void ObTableScanIterator::reuse_row_iters() { - if (NULL != single_merge_) { - single_merge_->reuse(); - } - if (NULL != get_merge_) { - get_merge_->reuse(); - } - if (NULL != scan_merge_) { - scan_merge_->reuse(); - } - if (NULL != multi_scan_merge_) { - multi_scan_merge_->reuse(); - } - if (NULL != row_sample_iterator_) { - row_sample_iterator_->reuse(); - } - if (NULL != block_sample_iterator_) { - block_sample_iterator_->reuse(); - } +#define REUSE_SCAN_ITER(iter) \ + if (NULL != iter) { \ + iter->reuse(); \ + } \ + + REUSE_SCAN_ITER(single_merge_); + REUSE_SCAN_ITER(get_merge_); + REUSE_SCAN_ITER(scan_merge_); + REUSE_SCAN_ITER(multi_scan_merge_); + REUSE_SCAN_ITER(skip_scan_merge_); + REUSE_SCAN_ITER(row_sample_iterator_); + REUSE_SCAN_ITER(block_sample_iterator_); +#undef REUSE_SCAN_ITER } int ObTableScanIterator::prepare_table_param(const ObTabletHandle &tablet_handle) @@ -280,29 +269,22 @@ int ObTableScanIterator::switch_param(ObTableScanParam &scan_param, const ObTabl int ObTableScanIterator::switch_param_for_iter() { +#define SWITCH_PARAM_FOR_ITER(iter, ret) \ + if (OB_SUCC(ret) && NULL != iter) { \ + if (OB_FAIL(switch_scan_param(*iter))) { \ + STORAGE_LOG(WARN, "Fail to switch param, ", K(ret), KP(iter), KPC(iter)); \ + } \ + } \ + int ret = OB_SUCCESS; get_table_param_.frozen_version_ = scan_param_->frozen_version_; get_table_param_.sample_info_ = scan_param_->sample_info_; - if (NULL != single_merge_) { - if (OB_FAIL(switch_scan_param(*single_merge_))) { - STORAGE_LOG(WARN, "Fail to switch param for single merge, ", K(ret), KP_(single_merge), KPC_(single_merge)); - } - } - if (OB_SUCC(ret) && NULL != get_merge_) { - if (OB_FAIL(switch_scan_param(*get_merge_))) { - STORAGE_LOG(WARN, "Fail to switch param for get merge, ", K(ret), KP_(get_merge), KPC_(get_merge)); - } - } - if (OB_SUCC(ret) && NULL != scan_merge_) { - if (OB_FAIL(switch_scan_param(*scan_merge_))) { - STORAGE_LOG(WARN, "Fail to switch param for scan merge, ", K(ret), KP_(scan_merge), KPC_(scan_merge)); - } - } - if (OB_SUCC(ret) && NULL != multi_scan_merge_) { - if (OB_FAIL(switch_scan_param(*multi_scan_merge_))) { - STORAGE_LOG(WARN, "Fail to switch param for multi scan merge, ", K(ret), KP_(multi_scan_merge), KPC_(multi_scan_merge)); - } - } + SWITCH_PARAM_FOR_ITER(single_merge_, ret); + SWITCH_PARAM_FOR_ITER(get_merge_, ret); + SWITCH_PARAM_FOR_ITER(scan_merge_, ret); + SWITCH_PARAM_FOR_ITER(multi_scan_merge_, ret); + SWITCH_PARAM_FOR_ITER(skip_scan_merge_, ret); +#undef SWITCH_PARAM_FOR_ITER return ret; } @@ -349,6 +331,21 @@ do { \ } \ } while(0) +#define INIT_AND_OPEN_SKIP_SCAN_ITER(ITER_PTR, RANGE, SUFFIX_RANGE, USE_FUSE_CACHE) \ +do { \ + STORAGE_LOG(TRACE, "skip scan", K(main_table_param_), K(RANGE), K(SUFFIX_RANGE)); \ + if (nullptr == ITER_PTR && OB_FAIL(init_scan_iter(ITER_PTR))) { \ + STORAGE_LOG(WARN, "Failed to init single merge", K(ret)); \ + } else { \ + main_table_ctx_.use_fuse_row_cache_ = USE_FUSE_CACHE; \ + if (OB_FAIL(ITER_PTR->open(RANGE, SUFFIX_RANGE))) { \ + STORAGE_LOG(WARN, "Fail to open multiple merge iterator", K(ret)); \ + } else { \ + main_iter_ = ITER_PTR; \ + } \ + } \ +} while(0) + int ObTableScanIterator::open_iter() { int ret = OB_SUCCESS; @@ -405,9 +402,14 @@ int ObTableScanIterator::open_iter() main_table_ctx_.use_fuse_row_cache_ = false; } } + } else if (scan_param_->use_index_skip_scan()) { + INIT_AND_OPEN_SKIP_SCAN_ITER(skip_scan_merge_, table_scan_range_.get_ranges().at(0), table_scan_range_.get_suffix_ranges().at(0), false); } else { INIT_AND_OPEN_ITER(scan_merge_, table_scan_range_.get_ranges().at(0), false); } + } else if (scan_param_->use_index_skip_scan()) { + ret = OB_NOT_SUPPORTED; + STORAGE_LOG(WARN, "multiple ranges are not supported in index skip scan now"); } else { INIT_AND_OPEN_ITER(multi_scan_merge_, table_scan_range_.get_ranges(), false); } @@ -451,6 +453,7 @@ int ObTableScanIterator::open_iter() } #undef INIT_AND_OPEN_ITER +#undef INIT_AND_OPEN_SKIP_SCAN_ITER int ObTableScanIterator::can_retire_to_row_sample(bool &retire) { @@ -510,7 +513,8 @@ int ObTableScanIterator::get_next_row(ObNewRow *&row) if (OB_FAIL(main_iter_->get_next_row(store_row))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "Fail to get next row, ", K(ret), KPC_(scan_param), K_(main_table_param), - KP(single_merge_), KP(get_merge_), KP(scan_merge_), KP(multi_scan_merge_)); + KP(single_merge_), KP(get_merge_), KP(scan_merge_), KP(multi_scan_merge_), + KP(skip_scan_merge_)); } } else { row = &(store_row->get_new_row()); @@ -540,7 +544,8 @@ int ObTableScanIterator::get_next_rows(int64_t &count, int64_t capacity) if (OB_FAIL(main_iter_->get_next_rows(count, capacity))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "Fail to get next row, ", K(ret), K(*scan_param_), K_(main_table_param), - KP(single_merge_), KP(get_merge_), KP(scan_merge_), KP(multi_scan_merge_)); + KP(single_merge_), KP(get_merge_), KP(scan_merge_), KP(multi_scan_merge_), + KP(skip_scan_merge_)); } } } diff --git a/src/storage/access/ob_table_scan_iterator.h b/src/storage/access/ob_table_scan_iterator.h index bdf43e4ee..f0a7beb95 100644 --- a/src/storage/access/ob_table_scan_iterator.h +++ b/src/storage/access/ob_table_scan_iterator.h @@ -26,6 +26,8 @@ #include "ob_multiple_merge.h" #include "ob_multiple_multi_scan_merge.h" #include "ob_multiple_scan_merge.h" +#include "ob_multiple_skip_scan_merge.h" +#include "ob_multiple_multi_skip_scan_merge.h" #include "ob_row_sample_iterator.h" #include "ob_single_merge.h" #include "storage/tx_storage/ob_access_service.h" @@ -63,6 +65,7 @@ private: int prepare_table_param(const ObTabletHandle &tablet_handle); int prepare_table_context(); template int init_scan_iter(T *&iter); + template void reset_scan_iter(T *&iter); int switch_scan_param(ObMultipleMerge &iter); void reuse_row_iters(); int switch_param_for_iter(); @@ -76,6 +79,7 @@ private: ObMultipleGetMerge *get_merge_; ObMultipleScanMerge *scan_merge_; ObMultipleMultiScanMerge *multi_scan_merge_; + ObMultipleSkipScanMerge *skip_scan_merge_; ObRowSampleIterator *row_sample_iterator_; ObBlockSampleIterator *block_sample_iterator_; // TODO: refactor // we should consider the constructor cost diff --git a/src/storage/access/ob_table_scan_range.cpp b/src/storage/access/ob_table_scan_range.cpp index 319093061..f0388d49c 100644 --- a/src/storage/access/ob_table_scan_range.cpp +++ b/src/storage/access/ob_table_scan_range.cpp @@ -23,6 +23,7 @@ namespace storage ObTableScanRange::ObTableScanRange() : rowkeys_(), ranges_(), + skip_scan_ranges_(), allocator_(nullptr), status_(EMPTY), is_inited_(false) @@ -30,16 +31,23 @@ ObTableScanRange::ObTableScanRange() void ObTableScanRange::reset() { +#define RESET_SCAN_RANGES(RANGES) \ +do { \ + for (int64_t i = 0; i < RANGES.count(); i++) { \ + ObDatumRange &range = RANGES.at(i); \ + if (!range.get_start_key().is_static_rowkey()) { \ + allocator_->free(const_cast(range.get_start_key().datums_)); \ + } \ + if (!range.get_end_key().is_static_rowkey()) { \ + allocator_->free(const_cast(range.get_end_key().datums_)); \ + } \ + } \ +} while(0) \ + if (nullptr != allocator_) { - for (int64_t i = 0; i < ranges_.count(); i++) { - ObDatumRange &range = ranges_.at(i); - if (!range.get_start_key().is_static_rowkey()) { - allocator_->free(const_cast(range.get_start_key().datums_)); - } - if (!range.get_end_key().is_static_rowkey()) { - allocator_->free(const_cast(range.get_end_key().datums_)); - } - } + RESET_SCAN_RANGES(ranges_); + RESET_SCAN_RANGES(skip_scan_ranges_); + for (int64_t i = 0; i < rowkeys_.count(); i++) { if (!rowkeys_.at(i).is_static_rowkey()) { allocator_->free(const_cast(rowkeys_.at(i).datums_)); @@ -48,6 +56,7 @@ void ObTableScanRange::reset() } rowkeys_.reset(); ranges_.reset(); + skip_scan_ranges_.reset(); allocator_ = nullptr; status_ = EMPTY; is_inited_ = false; @@ -72,9 +81,16 @@ int ObTableScanRange::init(ObTableScanParam &scan_param) ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected error for invalid datum utils", K(ret), KPC(scan_param.table_param_)); } else if (scan_param.is_get_) { - if (OB_FAIL(init_rowkeys(scan_param.key_ranges_, scan_param.scan_flag_, datum_utils))) { + if (scan_param.use_index_skip_scan()) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected, index skip scan can only be used in scan", K(ret)); + } else if (OB_FAIL(init_rowkeys(scan_param.key_ranges_, scan_param.scan_flag_, datum_utils))) { STORAGE_LOG(WARN, "Failed to init rowkeys", K(ret)); } + } else if (scan_param.use_index_skip_scan()) { + if (OB_FAIL(init_ranges_in_skip_scan(scan_param.key_ranges_, scan_param.ss_key_ranges_, scan_param.scan_flag_, datum_utils))) { + STORAGE_LOG(WARN, "Failed to init range in skip scan", K(ret), K(scan_param.key_ranges_), K(scan_param.ss_key_ranges_)); + } } else if (OB_FAIL(init_ranges(scan_param.key_ranges_, scan_param.scan_flag_, datum_utils))) { STORAGE_LOG(WARN, "Failed to init ranges", K(ret)); } @@ -239,6 +255,62 @@ int ObTableScanRange::init_ranges(const common::ObIArray &ra return ret; } +int ObTableScanRange::init_ranges_in_skip_scan(const common::ObIArray &ranges, + const common::ObIArray &skip_scan_ranges, + const common::ObQueryFlag &scan_flag, + const blocksstable::ObStorageDatumUtils *datum_utils) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == allocator_ || + ranges.count() != skip_scan_ranges.count()) || + ranges.count() == 0) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid argument to init ranges", K(ret), K(allocator_), K(ranges.count()), K(skip_scan_ranges.count())); + } else { + common::ObSEArray wrapped_ranges_; + const int64_t range_cnt = ranges.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < range_cnt; i++) { + ObSkipScanWrappedRange wrapped_range; + const ObNewRange &range = ranges.at(i); + const ObNewRange &skip_scan_range = skip_scan_ranges.at(i); + bool is_false = false; + if (OB_FAIL(always_false(range, is_false))) { + STORAGE_LOG(WARN, "Failed to check range always false", K(ret), K(range)); + } else if (is_false) { + } else if (OB_FAIL(wrapped_range.datum_range_.from_range(range, *allocator_))) { + STORAGE_LOG(WARN, "Failed to transfer range to datum range", K(ret)); + } else if (OB_FAIL(wrapped_range.datum_skip_range_.from_range(skip_scan_range, *allocator_))) { + STORAGE_LOG(WARN, "Failed to transfer skip range to datum range", K(ret)); + } else if (OB_FAIL(wrapped_ranges_.push_back(wrapped_range))) { + STORAGE_LOG(WARN, "Failed to push back", K(ret), K(wrapped_range)); + } + } + if (OB_SUCC(ret)) { + if (wrapped_ranges_.empty()) { + status_ = EMPTY; + } else if (wrapped_ranges_.count() > 1 && nullptr != datum_utils + && (scan_flag.scan_order_ == ObQueryFlag::Forward || scan_flag.scan_order_ == ObQueryFlag::Reverse)) { + ObDatumComparor comparor(*datum_utils, ret, scan_flag.is_reverse_scan()); + std::sort(wrapped_ranges_.begin(), wrapped_ranges_.end(), comparor); + if (OB_FAIL(ret)) { + STORAGE_LOG(WARN, "Failed to sort datum ranges", K(ret), K(wrapped_ranges_)); + } + } + } + if (OB_SUCC(ret) && EMPTY != status_) { + for (int64_t i = 0; OB_SUCC(ret) && i < wrapped_ranges_.count(); i++) { + const ObSkipScanWrappedRange &wrapped_range = wrapped_ranges_.at(i); + STORAGE_LOG(DEBUG, "skip scan range", K(wrapped_range)); + if (OB_FAIL(ranges_.push_back(wrapped_range.datum_range_))) { + STORAGE_LOG(WARN, "Failed to push back datum range", K(ret)); + } else if (OB_FAIL(skip_scan_ranges_.push_back(wrapped_range.datum_skip_range_))) { + STORAGE_LOG(WARN, "Failed to push back datum range", K(ret)); + } + } + } + } + return ret; +} } // namespace storage diff --git a/src/storage/access/ob_table_scan_range.h b/src/storage/access/ob_table_scan_range.h index cf4b22ff0..a2269c896 100644 --- a/src/storage/access/ob_table_scan_range.h +++ b/src/storage/access/ob_table_scan_range.h @@ -37,6 +37,7 @@ public: OB_INLINE bool is_empty() const { return EMPTY == status_; } OB_INLINE void set_empty() { status_ = EMPTY; } OB_INLINE const ObIArray &get_ranges() const { return ranges_; } + OB_INLINE const ObIArray &get_suffix_ranges() const { return skip_scan_ranges_; } OB_INLINE const ObIArray &get_rowkeys() const { return rowkeys_; } TO_STRING_KV(K_(rowkeys), K_(ranges), K_(status), K_(is_inited)); private: @@ -44,10 +45,28 @@ private: const common::ObQueryFlag &scan_flag, const blocksstable::ObStorageDatumUtils *datum_utils); int init_ranges(const common::ObIArray &ranges, - const common::ObQueryFlag &scan_flag, + const common::ObQueryFlag &scan_flag, const blocksstable::ObStorageDatumUtils *datum_utils); +int init_ranges_in_skip_scan(const common::ObIArray &ranges, + const common::ObIArray &skip_scan_ranges, + const common::ObQueryFlag &scan_flag, + const blocksstable::ObStorageDatumUtils *datum_utils); int always_false(const common::ObNewRange &range, bool &is_false); private: + struct ObSkipScanWrappedRange + { + ObSkipScanWrappedRange() = default; + ObSkipScanWrappedRange(const blocksstable::ObDatumRange &datum_range, blocksstable::ObDatumRange &datum_skip_range) + : datum_range_(datum_range), datum_skip_range_(datum_skip_range) + {} + OB_INLINE int compare(const ObSkipScanWrappedRange &rhs, const blocksstable::ObStorageDatumUtils &datum_utils, int &cmp_ret) const + { + return datum_range_.compare(rhs.datum_range_, datum_utils, cmp_ret); + } + TO_STRING_KV(K_(datum_range), K_(datum_skip_range)); + blocksstable::ObDatumRange datum_range_; + blocksstable::ObDatumRange datum_skip_range_; + }; enum RangeStatus { EMPTY, @@ -57,6 +76,7 @@ private: static const int64_t DEFAULT_RANGE_CNT = 8; common::ObSEArray rowkeys_; common::ObSEArray ranges_; + common::ObSEArray skip_scan_ranges_; ObIAllocator *allocator_; RangeStatus status_; bool is_inited_; diff --git a/src/storage/access/ob_vector_store.cpp b/src/storage/access/ob_vector_store.cpp index bc4419d36..1ce948686 100644 --- a/src/storage/access/ob_vector_store.cpp +++ b/src/storage/access/ob_vector_store.cpp @@ -56,12 +56,6 @@ void ObVectorStore::reset() default_row_.reset(); } -void ObVectorStore::reuse() -{ - ObBlockBatchedRowStore::reuse(); - count_ = 0; -} - int ObVectorStore::init(const ObTableAccessParam ¶m) { int ret = OB_SUCCESS; diff --git a/src/storage/access/ob_vector_store.h b/src/storage/access/ob_vector_store.h index fcd57077d..b6817e09d 100644 --- a/src/storage/access/ob_vector_store.h +++ b/src/storage/access/ob_vector_store.h @@ -39,7 +39,6 @@ public: virtual ~ObVectorStore(); virtual int init(const ObTableAccessParam ¶m) override; virtual void reset() override; - virtual void reuse() override; // shallow copy virtual int fill_rows( const int64_t group_idx, diff --git a/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp b/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp index 45329a781..2955e66f5 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp @@ -2036,43 +2036,12 @@ int ObMicroBlockDecoder::get_rows( LOG_WARN("invalid argument", K(ret), KP(row_ids), KP(cell_datas), K(cols.count()), K(datums.count())); } else { - common::ObObj cell; for (int64_t i = 0; OB_SUCC(ret) && i < cols.count(); i++) { int32_t col_id = cols.at(i); - if (OB_UNLIKELY(col_id >= header_->column_count_)) { - ret = OB_INDEX_OUT_OF_RANGE; - LOG_WARN("Vector store col id greate than store cnt", K(ret), K(header_->column_count_), K(col_id)); - } else if (!decoders_[col_id].decoder_->can_vectorized()) { - // normal path - int64_t row_len = 0; - const char *row_data = NULL; - const int row_header_size = ObRowHeader::get_serialized_size(); - int64_t row_id = common::OB_INVALID_INDEX; - common::ObDatum *col_datums = datums.at(i); - for (int64_t idx = 0; OB_SUCC(ret) && idx < row_cap; idx++) { - row_id = row_ids[idx]; - if (OB_FAIL(row_index_->get(row_id, row_data, row_len))) { - LOG_WARN("get row data failed", K(ret), K(row_id)); - } else { - ObBitStream bs(reinterpret_cast(const_cast(row_data)), row_len); - if (OB_FAIL(decoders_[col_id].decode(cell, row_id, bs, row_data, row_len))) { - LOG_WARN("Decode cell failed", K(ret)); - } else if (OB_FAIL(col_datums[idx].from_obj(cell))) { - LOG_WARN("Failed to convert object from datum", K(ret), K(cell)); - } - } - } - } else if (OB_FAIL(decoders_[col_id].batch_decode( - row_index_, - row_ids, - cell_datas, - row_cap, - datums.at(i)))) { - LOG_WARN("fail to get datums from decoder", K(ret), K(col_id), K(row_cap), - "row_ids", common::ObArrayWrap(row_ids, row_cap)); - } - - if (OB_SUCC(ret) && nullptr != col_params.at(i)) { + common::ObDatum *col_datums = datums.at(i); + if (OB_FAIL(get_col_datums(col_id, row_ids, cell_datas, row_cap, col_datums))) { + LOG_WARN("Failed to get col datums", K(ret), K(i), K(col_id), K(row_cap)); + } else if (nullptr != col_params.at(i)) { // need padding if (OB_FAIL(storage::pad_on_datums( col_params.at(i)->get_accuracy(), @@ -2112,5 +2081,74 @@ int ObMicroBlockDecoder::get_row_count( return ret; } +int ObMicroBlockDecoder::get_min_or_max( + int32_t col_id, + const int64_t *row_ids, + const char **cell_datas, + const int64_t row_cap, + ObDatum *datum_buf, + ObMicroBlockAggInfo &agg_info) +{ + int ret = OB_SUCCESS; + decoder_allocator_.reuse(); + if (OB_FAIL(get_col_datums(col_id, row_ids, cell_datas, row_cap, datum_buf))) { + LOG_WARN("Failed to get col datums", K(ret), K(col_id), K(row_cap)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < row_cap; ++i) { + if (datum_buf[i].is_nop()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected datum, can not process in batch", K(ret), K(i)); + } else { + agg_info.update_min_or_max(datum_buf[i]); + LOG_DEBUG("update min/max", K(i), K(datum_buf[i]), K(agg_info)); + } + } + } + return ret; +} + +int ObMicroBlockDecoder::get_col_datums( + int32_t col_id, + const int64_t *row_ids, + const char **cell_datas, + const int64_t row_cap, + common::ObDatum *col_datums) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(col_id >= header_->column_count_)) { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("Vector store col id greate than store cnt", K(ret), K(header_->column_count_), K(col_id)); + } else if (!decoders_[col_id].decoder_->can_vectorized()) { + // normal path + common::ObObj cell; + int64_t row_len = 0; + const char *row_data = NULL; + const int row_header_size = ObRowHeader::get_serialized_size(); + int64_t row_id = common::OB_INVALID_INDEX; + for (int64_t idx = 0; OB_SUCC(ret) && idx < row_cap; idx++) { + row_id = row_ids[idx]; + if (OB_FAIL(row_index_->get(row_id, row_data, row_len))) { + LOG_WARN("get row data failed", K(ret), K(row_id)); + } else { + ObBitStream bs(reinterpret_cast(const_cast(row_data)), row_len); + if (OB_FAIL(decoders_[col_id].decode(cell, row_id, bs, row_data, row_len))) { + LOG_WARN("Decode cell failed", K(ret)); + } else if (OB_FAIL(col_datums[idx].from_obj(cell))) { + LOG_WARN("Failed to convert object from datum", K(ret), K(cell)); + } + } + } + } else if (OB_FAIL(decoders_[col_id].batch_decode( + row_index_, + row_ids, + cell_datas, + row_cap, + col_datums))) { + LOG_WARN("fail to get datums from decoder", K(ret), K(col_id), K(row_cap), + "row_ids", common::ObArrayWrap(row_ids, row_cap)); + } + return ret; +} + } } diff --git a/src/storage/blocksstable/encoding/ob_micro_block_decoder.h b/src/storage/blocksstable/encoding/ob_micro_block_decoder.h index 3333bdc71..6e766c2b4 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_decoder.h +++ b/src/storage/blocksstable/encoding/ob_micro_block_decoder.h @@ -260,6 +260,13 @@ public: const int64_t row_cap, const bool contains_null, int64_t &count) override final; + int get_min_or_max( + int32_t col_id, + const int64_t *row_ids, + const char **cell_datas, + const int64_t row_cap, + ObDatum *datum_buf, + ObMicroBlockAggInfo &agg_info); virtual int64_t get_column_count() const override { OB_ASSERT(nullptr != header_); @@ -294,6 +301,12 @@ private: const int64_t col_begin, const int64_t col_end, ObStorageDatum *datums); + + int get_col_datums(int32_t col_id, + const int64_t *row_ids, + const char **cell_datas, + const int64_t row_cap, + common::ObDatum *col_datums); //TODO @hanhui deleted after change rowkey to datum int decode_cells(const uint64_t row_id, const int64_t row_len, diff --git a/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp b/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp index 237fe1ecd..230e12a95 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp +++ b/src/storage/blocksstable/encoding/ob_micro_block_encoder.cpp @@ -619,7 +619,6 @@ int ObMicroBlockEncoder::build_block(char *&buf, int64_t &size) if (OB_SUCC(ret)) { header_->row_count_ = static_cast(datum_rows_.count()); header_->encoding_has_out_row_column_ = has_out_row_column_; - const int64_t header_size = header_->header_size_; char *data = data_buffer_.data() + header_size; FOREACH(e, encoders_) { @@ -957,7 +956,6 @@ int ObMicroBlockEncoder::copy_and_append_row(const ObDatumRow &src, int64_t &sto LOG_WARN("append row to array failed", K(ret), K(src)); } } - return ret; } diff --git a/src/storage/blocksstable/ob_block_sstable_struct.h b/src/storage/blocksstable/ob_block_sstable_struct.h index 8825de818..6cbd9807c 100644 --- a/src/storage/blocksstable/ob_block_sstable_struct.h +++ b/src/storage/blocksstable/ob_block_sstable_struct.h @@ -50,7 +50,9 @@ const int64_t BF_MICRO_BLOCK_HEADER_MAGIC = 1015; const int64_t SERVER_SUPER_BLOCK_MAGIC = 1018; const int64_t LINKED_MACRO_BLOCK_HEADER_MAGIC = 1019; -const int64_t MICRO_BLOCK_HEADER_VERSION = 1; +const int64_t MICRO_BLOCK_HEADER_VERSION_1 = 1; +const int64_t MICRO_BLOCK_HEADER_VERSION_2 = 2; +const int64_t MICRO_BLOCK_HEADER_VERSION = MICRO_BLOCK_HEADER_VERSION_2; const int64_t LINKED_MACRO_BLOCK_HEADER_VERSION = 1; const int64_t BF_MACRO_BLOCK_HEADER_VERSION = 1; const int64_t BF_MICRO_BLOCK_HEADER_VERSION = 1; diff --git a/src/storage/blocksstable/ob_datum_range.h b/src/storage/blocksstable/ob_datum_range.h index 3002633fe..9c4504175 100644 --- a/src/storage/blocksstable/ob_datum_range.h +++ b/src/storage/blocksstable/ob_datum_range.h @@ -59,6 +59,8 @@ public: common::ObIAllocator &allocator, common::ObStoreRange &store_range) const; OB_INLINE int to_multi_version_range(common::ObIAllocator &allocator, ObDatumRange &dest) const; + OB_INLINE int prepare_memtable_readable(const common::ObIArray &col_descs, + common::ObIAllocator &allocator); // !!Attension only compare start key OB_INLINE int compare(const ObDatumRange &rhs, const ObStorageDatumUtils &datum_utils, int &cmp_ret) const; // maybe we will need serialize @@ -272,6 +274,18 @@ OB_INLINE int ObDatumRange::to_multi_version_range(common::ObIAllocator &allocat return ret; } +OB_INLINE int ObDatumRange::prepare_memtable_readable(const common::ObIArray &col_descs, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(start_key_.prepare_memtable_readable(col_descs, allocator))) { + STORAGE_LOG(WARN, "Failed to prepare start key", K(ret), K(start_key_), K(col_descs)); + } else if (OB_FAIL(end_key_.prepare_memtable_readable(col_descs, allocator))) { + STORAGE_LOG(WARN, "Failed to prepare end key", K(ret), K(end_key_), K(col_descs)); + } + return ret; +} + } // namespace blocksstable } // namespace oceanbase #endif diff --git a/src/storage/blocksstable/ob_datum_rowkey.h b/src/storage/blocksstable/ob_datum_rowkey.h index 151433e22..9d637e66d 100644 --- a/src/storage/blocksstable/ob_datum_rowkey.h +++ b/src/storage/blocksstable/ob_datum_rowkey.h @@ -38,6 +38,7 @@ public: OB_INLINE bool is_memtable_valid() const { return store_rowkey_.is_valid() && is_valid(); } OB_INLINE int32_t get_datum_cnt() const { return datum_cnt_; } OB_INLINE const ObStorageDatum *get_datum_ptr() { return datums_; } + OB_INLINE const ObStorageDatum& get_datum(const int64_t idx) const { OB_ASSERT(idx < datum_cnt_); return datums_[idx]; } OB_INLINE int64_t get_deep_copy_size() const; OB_INLINE int deep_copy(ObDatumRowkey &dest, common::ObIAllocator &allocator) const; OB_INLINE int deep_copy(ObDatumRowkey &dest, char *buf, const int64_t buf_len) const; diff --git a/src/storage/blocksstable/ob_imicro_block_reader.cpp b/src/storage/blocksstable/ob_imicro_block_reader.cpp index 161734e25..1de610d27 100644 --- a/src/storage/blocksstable/ob_imicro_block_reader.cpp +++ b/src/storage/blocksstable/ob_imicro_block_reader.cpp @@ -19,6 +19,7 @@ namespace oceanbase { namespace blocksstable { + int ObIMicroBlockReader::locate_range( const ObDatumRange &range, const bool is_left_border, diff --git a/src/storage/blocksstable/ob_imicro_block_reader.h b/src/storage/blocksstable/ob_imicro_block_reader.h index 78c621b1e..508abc8dd 100644 --- a/src/storage/blocksstable/ob_imicro_block_reader.h +++ b/src/storage/blocksstable/ob_imicro_block_reader.h @@ -22,6 +22,7 @@ #include "storage/access/ob_table_read_info.h" #include "ob_block_sstable_struct.h" #include "ob_datum_range.h" +#include "ob_micro_block_hash_index.h" #include "ob_micro_block_header.h" namespace oceanbase @@ -50,6 +51,30 @@ struct ObMicroIndexInfo; } \ } while (0) +template +class ObMicroBlockAggInfo { +public: + ObMicroBlockAggInfo(bool is_min, const ObDatumCmpFuncType cmp_fun, T &result_datum) : + is_min_(is_min), cmp_fun_(cmp_fun), result_datum_(result_datum) {} + void update_min_or_max(const T& datum) + { + if (datum.is_null()) { + } else if (result_datum_.is_null()) { + result_datum_ = datum; + } else { + int cmp_ret = cmp_fun_(result_datum_, datum); + if ((is_min_ && cmp_ret > 0) || (!is_min_ && cmp_ret < 0)) { + result_datum_ = datum; + } + } + } + TO_STRING_KV(K_(is_min), K_(cmp_fun), K_(result_datum)); +private: + bool is_min_; + const ObDatumCmpFuncType cmp_fun_; + T &result_datum_; +}; + struct ObRowIndexIterator { public: @@ -208,6 +233,19 @@ public: const ObTableReadInfo &read_info, bool &exist, bool &found) = 0; +protected: + OB_INLINE static int init_hash_index( + const ObMicroBlockData &block_data, + ObMicroBlockHashIndex &hash_index, + const ObMicroBlockHeader *header) + { + int ret = OB_SUCCESS; + hash_index.reset(); + if (header->is_contain_hash_index() && OB_FAIL(hash_index.init(block_data))) { + STORAGE_LOG(WARN, "failed to init micro block hash index", K(ret), K(block_data)); + } + return ret; + } }; class ObIMicroBlockReader : public ObIMicroBlockReaderInfo diff --git a/src/storage/blocksstable/ob_imicro_block_writer.cpp b/src/storage/blocksstable/ob_imicro_block_writer.cpp index 7c2d323d9..563c89199 100644 --- a/src/storage/blocksstable/ob_imicro_block_writer.cpp +++ b/src/storage/blocksstable/ob_imicro_block_writer.cpp @@ -17,7 +17,7 @@ namespace oceanbase namespace blocksstable { - /** +/** * -------------------------------------------------------------------ObMicroBlockDesc------------------------------------------------------------------- */ bool ObMicroBlockDesc::is_valid() const diff --git a/src/storage/blocksstable/ob_imicro_block_writer.h b/src/storage/blocksstable/ob_imicro_block_writer.h index 84f9ff427..53f1b5500 100644 --- a/src/storage/blocksstable/ob_imicro_block_writer.h +++ b/src/storage/blocksstable/ob_imicro_block_writer.h @@ -20,6 +20,7 @@ #include "ob_datum_rowkey.h" #include "storage/ob_i_store.h" #include "ob_macro_block_id.h" +#include "ob_micro_block_hash_index.h" #include "ob_micro_block_header.h" namespace oceanbase @@ -93,7 +94,8 @@ public: block_size_upper_bound_(DEFAULT_UPPER_BOUND), contain_uncommitted_row_(false), has_out_row_column_(false), - is_last_row_last_flag_(false) + is_last_row_last_flag_(false), + header_(nullptr) { } virtual ~ObIMicroBlockWriter() {} @@ -106,6 +108,16 @@ public: virtual int64_t get_original_size() const = 0; virtual void reset() = 0; virtual void dump_diagnose_info() const {}; + virtual int append_hash_index(ObMicroBlockHashIndexBuilder& hash_index_builder) + { + int ret = OB_NOT_SUPPORTED; + STORAGE_LOG(WARN, "Unspported micro block format for hash index", K(ret)); + return ret; + } + virtual bool has_enough_space_for_hash_index(const int64_t hash_index_size) const + { + return false; + } virtual void reuse() { row_count_delta_ = 0; @@ -207,6 +219,7 @@ protected: bool contain_uncommitted_row_; bool has_out_row_column_; bool is_last_row_last_flag_; + ObMicroBlockHeader *header_; }; } // end namespace blocksstable diff --git a/src/storage/blocksstable/ob_index_block_builder.cpp b/src/storage/blocksstable/ob_index_block_builder.cpp index 65c7c4d34..310328e42 100644 --- a/src/storage/blocksstable/ob_index_block_builder.cpp +++ b/src/storage/blocksstable/ob_index_block_builder.cpp @@ -454,8 +454,11 @@ int ObSSTableIndexBuilder::init(const ObDataStoreDesc &index_desc, STORAGE_LOG(WARN, "Failed to init index row", K(ret), K(index_desc)); } else { index_store_desc_.sstable_index_builder_ = this; + index_store_desc_.need_pre_warm_ = true; callback_ = callback; optimization_mode_ = mode; + index_store_desc_.need_build_hash_index_for_micro_block_ = false; + container_store_desc_.need_build_hash_index_for_micro_block_ = false; is_inited_ = true; } STORAGE_LOG(DEBUG, "init sstable index builder", K(ret), K(index_desc), K_(index_store_desc)); @@ -1089,12 +1092,14 @@ ObBaseIndexBlockBuilder::ObBaseIndexBlockBuilder() :is_inited_(false), is_closed_(false), index_store_desc_(nullptr), + idx_read_info_(), row_builder_(), last_rowkey_(), rowkey_allocator_("BaseBuilder", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), allocator_(nullptr), micro_writer_(nullptr), macro_writer_(nullptr), + index_block_pre_warmer_(), row_count_(0), row_count_delta_(0), max_merged_trans_version_(0), @@ -1119,6 +1124,7 @@ void ObBaseIndexBlockBuilder::reset() { is_closed_ = false; index_store_desc_ = nullptr; + idx_read_info_.reset(); last_rowkey_.reset(); rowkey_allocator_.reset(); if (OB_NOT_NULL(micro_writer_)) { @@ -1133,6 +1139,7 @@ void ObBaseIndexBlockBuilder::reset() next_level_builder_ = nullptr; } macro_writer_ = nullptr; + index_block_pre_warmer_.reset(); allocator_ = nullptr; level_ = 0; reset_accumulative_info(); @@ -1153,11 +1160,21 @@ int ObBaseIndexBlockBuilder::init(ObDataStoreDesc &index_store_desc, allocator_ = &allocator; macro_writer_ = macro_writer; level_ = level; - if (OB_FAIL(row_builder_.init(*index_store_desc_))) { + if (!idx_read_info_.is_valid() && OB_FAIL(idx_read_info_.init(allocator, + index_store_desc_->row_column_count_ - ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(), + index_store_desc_->schema_rowkey_col_cnt_, + lib::is_oracle_mode(), + index_store_desc_->col_desc_array_, + true))) { + STORAGE_LOG(WARN, "Fail to init index read info", K(ret)); + } else if (OB_FAIL(row_builder_.init(*index_store_desc_))) { STORAGE_LOG(WARN, "fail to init ObBaseIndexBlockBuilder", K(ret)); } else if (OB_FAIL(ObMacroBlockWriter::build_micro_writer(index_store_desc_, allocator, micro_writer_))) { STORAGE_LOG(WARN, "fail to build micro writer", K(ret)); } else { + if (index_store_desc_->need_pre_warm_) { + index_block_pre_warmer_.init(idx_read_info_); + } is_inited_ = true; } } @@ -1241,6 +1258,7 @@ int ObBaseIndexBlockBuilder::append_row( int ObBaseIndexBlockBuilder::close(ObIAllocator &allocator, ObIndexTreeInfo &tree_info) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; ObBaseIndexBlockBuilder *root_builder = nullptr; ObIndexTreeRootBlockDesc &desc = tree_info.root_desc_; if (OB_UNLIKELY(!is_inited_)) { @@ -1271,6 +1289,12 @@ int ObBaseIndexBlockBuilder::close(ObIAllocator &allocator, ObIndexTreeInfo &tre STORAGE_LOG(WARN, "fail to build root block", K(ret)); } else if (FALSE_IT(micro_block_desc.last_rowkey_ = root_builder->last_rowkey_)) { } else if (OB_UNLIKELY(micro_block_desc.get_block_size() >= ObMetaDiskAddr::ROOT_BLOCK_SIZE_LIMIT)) { + if (index_block_pre_warmer_.is_valid() + && OB_TMP_FAIL(index_block_pre_warmer_.reserve_kvpair(micro_block_desc, root_builder->level_+1))) { + if (OB_BUF_NOT_ENOUGH != tmp_ret) { + STORAGE_LOG(WARN, "Fail to reserve kvpair", K(tmp_ret)); + } + } if (OB_FAIL(macro_writer_->append_index_micro_block(micro_block_desc))) { micro_writer->dump_diagnose_info(); // ignore dump error STORAGE_LOG(WARN, "fail to append root block", K(ret), K(micro_block_desc)); @@ -1284,6 +1308,10 @@ int ObBaseIndexBlockBuilder::close(ObIAllocator &allocator, ObIndexTreeInfo &tre STORAGE_LOG(WARN, "fail to set block address", K(ret), K(root_row_desc)); } } + if (OB_FAIL(ret) || OB_TMP_FAIL(tmp_ret) || !index_block_pre_warmer_.is_valid()) { + } else if (OB_TMP_FAIL(index_block_pre_warmer_.update_and_put_kvpair(micro_block_desc))) { + STORAGE_LOG(WARN, "Fail to update and put kvpair", K(tmp_ret)); + } } else { char *&root_buf = desc.buf_; const int64_t buf_size = micro_block_desc.buf_size_ + micro_block_desc.header_->header_size_; @@ -1341,19 +1369,34 @@ void ObBaseIndexBlockBuilder::clean_status() int ObBaseIndexBlockBuilder::append_index_micro_block() { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; ObMicroBlockDesc micro_block_desc; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "invalid base index builder", K(ret), K(is_inited_)); } else if (OB_FAIL(build_index_micro_block(micro_block_desc))) { STORAGE_LOG(WARN, "fail to build index micro block", K(ret)); - } else if (OB_FAIL(macro_writer_->append_index_micro_block(micro_block_desc))) { - micro_writer_->dump_diagnose_info(); // ignore dump error - STORAGE_LOG(WARN, "fail to append index micro block", K(ret), K(micro_block_desc)); - } else if (OB_FAIL(append_next_row(micro_block_desc))) { - STORAGE_LOG(WARN, "fail to append next row", K(ret), K(micro_block_desc)); - } else if (FALSE_IT(clean_status())) { + } else { + if (index_block_pre_warmer_.is_valid() + && OB_TMP_FAIL(index_block_pre_warmer_.reserve_kvpair(micro_block_desc, level_+1))) { + if (OB_BUF_NOT_ENOUGH != tmp_ret) { + STORAGE_LOG(WARN, "Fail to reserve kvpair", K(tmp_ret)); + } + } + if (OB_FAIL(macro_writer_->append_index_micro_block(micro_block_desc))) { + micro_writer_->dump_diagnose_info(); // ignore dump error + STORAGE_LOG(WARN, "fail to append index micro block", K(ret), K(micro_block_desc)); + } else if (OB_FAIL(append_next_row(micro_block_desc))) { + STORAGE_LOG(WARN, "fail to append next row", K(ret), K(micro_block_desc)); + } else if (FALSE_IT(clean_status())) { + } + if (OB_FAIL(ret) || OB_TMP_FAIL(tmp_ret) || !index_block_pre_warmer_.is_valid()) { + } else if (OB_TMP_FAIL(index_block_pre_warmer_.update_and_put_kvpair(micro_block_desc))) { + STORAGE_LOG(WARN, "Fail to build index block cache key and put into cache", K(tmp_ret)); + } + index_block_pre_warmer_.reuse(); } + return ret; } @@ -1465,6 +1508,7 @@ void ObBaseIndexBlockBuilder::row_desc_to_meta( macro_meta.val_.is_last_row_last_flag_ = macro_row_desc.is_last_row_last_flag_; } + //===================== ObBaseIndexBlockBuilder(private) ================ void ObBaseIndexBlockBuilder::reset_accumulative_info() { @@ -1550,7 +1594,6 @@ ObDataIndexBlockBuilder::ObDataIndexBlockBuilder() sstable_allocator_(nullptr), leaf_store_desc_(), micro_helper_(), - idx_read_info_(), macro_row_desc_(), root_micro_block_desc_(nullptr), macro_meta_list_(nullptr), @@ -1573,7 +1616,6 @@ void ObDataIndexBlockBuilder::reset() sstable_builder_ = nullptr; leaf_store_desc_.reset(); micro_helper_.reset(); - idx_read_info_.reset(); root_micro_block_desc_ = nullptr; macro_meta_list_ = nullptr; if (OB_NOT_NULL(meta_block_writer_)) { @@ -1836,19 +1878,32 @@ int ObDataIndexBlockBuilder::append_index_micro_block(ObMacroBlock ¯o_block, const MacroBlockId &block_id) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; ObMicroBlockDesc leaf_block_desc; // n-1 level index block int64_t data_offset = 0; int64_t leaf_block_size = 0; if (OB_FAIL(build_index_micro_block(leaf_block_desc))) { STORAGE_LOG(WARN, "fail to build n-1 level micro block", K(ret)); - } else if (OB_FAIL(micro_helper_.compress_encrypt_micro_block(leaf_block_desc))) { - STORAGE_LOG(WARN, "fail to compress and encrypt micro block", K(ret)); - } else if (OB_FAIL(macro_block.write_index_micro_block(leaf_block_desc, true, data_offset))) { - STORAGE_LOG(WARN, "fail to write n-1 level index block", K(ret), K(leaf_block_desc)); } else { - leaf_block_desc.macro_id_ = block_id; - leaf_block_desc.block_offset_ = data_offset; - leaf_block_size = leaf_block_desc.get_block_size(); + if (OB_TMP_FAIL(index_block_pre_warmer_.reserve_kvpair(leaf_block_desc, 1))) { + if (OB_BUF_NOT_ENOUGH != tmp_ret) { + STORAGE_LOG(WARN, "Fail to reserve index block value", K(tmp_ret)); + } + } + if (OB_FAIL(micro_helper_.compress_encrypt_micro_block(leaf_block_desc))) { + STORAGE_LOG(WARN, "fail to compress and encrypt micro block", K(ret)); + } else if (OB_FAIL(macro_block.write_index_micro_block(leaf_block_desc, true, data_offset))) { + STORAGE_LOG(WARN, "fail to write n-1 level index block", K(ret), K(leaf_block_desc)); + } else { + leaf_block_desc.macro_id_ = block_id; + leaf_block_desc.block_offset_ = data_offset; + leaf_block_size = leaf_block_desc.get_block_size(); + if (OB_TMP_FAIL(tmp_ret)) { + } else if (OB_TMP_FAIL(index_block_pre_warmer_.update_and_put_kvpair(leaf_block_desc))) { + STORAGE_LOG(WARN, "Fail to build index block cache key and put into cache", K(tmp_ret)); + } + } + index_block_pre_warmer_.reuse(); } if (OB_FAIL(ret)) { diff --git a/src/storage/blocksstable/ob_index_block_builder.h b/src/storage/blocksstable/ob_index_block_builder.h index 6afbe8e5f..6906cfbca 100644 --- a/src/storage/blocksstable/ob_index_block_builder.h +++ b/src/storage/blocksstable/ob_index_block_builder.h @@ -20,6 +20,7 @@ #include "storage/blocksstable/ob_micro_block_reader.h" #include "storage/blocksstable/encoding/ob_micro_block_decoder.h" #include "storage/meta_mem/ob_meta_obj_struct.h" +#include "share/cache/ob_kvcache_pre_warmer.h" namespace oceanbase { @@ -265,10 +266,12 @@ private: int new_next_builder(ObBaseIndexBlockBuilder *&next_builder); virtual int append_next_row(const ObMicroBlockDesc µ_block_desc); int64_t calc_basic_micro_block_data_offset(const uint64_t column_cnt); + protected: bool is_inited_; bool is_closed_; ObDataStoreDesc *index_store_desc_; + ObTableReadInfo idx_read_info_; ObIndexBlockRowBuilder row_builder_; ObDatumRowkey last_rowkey_; common::ObArenaAllocator rowkey_allocator_; @@ -276,6 +279,7 @@ protected: ObIMicroBlockWriter *micro_writer_; ObMacroBlockWriter *macro_writer_; // accumulative info + ObIndexBlockCachePreWarmer index_block_pre_warmer_; int64_t row_count_; int64_t row_count_delta_; int64_t max_merged_trans_version_; @@ -299,8 +303,7 @@ public: ObSSTableIndexBuilder &sstable_builder); int append_row(const ObMicroBlockDesc µ_block_desc, const ObMacroBlock ¯o_block); - int generate_macro_row(ObMacroBlock ¯o_block, - const MacroBlockId &id); + int generate_macro_row(ObMacroBlock ¯o_block, const MacroBlockId &id); int append_macro_block(const ObMacroBlockDesc ¯o_desc); int close(const ObDatumRowkey &last_key, ObMacroBlocksWriteCtx *data_write_ctx); @@ -324,7 +327,6 @@ private: ObIAllocator *sstable_allocator_; ObDataStoreDesc leaf_store_desc_; ObMicroBlockBufferHelper micro_helper_; - ObTableReadInfo idx_read_info_; ObIndexBlockRowDesc macro_row_desc_; ObIndexMicroBlockDesc *root_micro_block_desc_; ObMacroMetasArray *macro_meta_list_; diff --git a/src/storage/blocksstable/ob_macro_block.cpp b/src/storage/blocksstable/ob_macro_block.cpp index 3b3a6f0d0..fb48c5cdb 100644 --- a/src/storage/blocksstable/ob_macro_block.cpp +++ b/src/storage/blocksstable/ob_macro_block.cpp @@ -15,6 +15,7 @@ #include "lib/utility/ob_tracepoint.h" #include "ob_block_manager.h" #include "ob_macro_block.h" +#include "ob_micro_block_hash_index.h" #include "observer/ob_server_struct.h" #include "share/ob_encryption_util.h" #include "share/ob_force_print_log.h" @@ -232,6 +233,19 @@ int ObDataStoreDesc::init( STORAGE_LOG(INFO, "success to set major working cluster version", K(tmp_ret), K(merge_type), K(cluster_version), K(major_working_cluster_version_)); } + if (OB_SUCC(ret)) { + bool need_build_hash_index = merge_schema.get_table_type() == USER_TABLE + && !is_major_merge(); + if (need_build_hash_index + && OB_FAIL(ObMicroBlockHashIndexBuilder::need_build_hash_index(merge_schema, need_build_hash_index))) { + STORAGE_LOG(WARN, "Failed to judge whether to build hash index", K(ret)); + need_build_hash_index_for_micro_block_ = false; + ret = OB_SUCCESS; + } else { + need_build_hash_index_for_micro_block_ = need_build_hash_index; + } + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(col_desc_array_.init(row_column_count_))) { STORAGE_LOG(WARN, "Failed to reserve column desc array", K(ret)); @@ -275,6 +289,7 @@ void ObDataStoreDesc::reset() rowkey_column_count_ = 0; schema_rowkey_col_cnt_ = 0; row_store_type_ = ENCODING_ROW_STORE; + need_build_hash_index_for_micro_block_ = false; encoder_opt_.reset(); schema_version_ = 0; merge_info_ = NULL; @@ -291,6 +306,7 @@ void ObDataStoreDesc::reset() major_working_cluster_version_ = 0; sstable_index_builder_ = nullptr; is_ddl_ = false; + need_pre_warm_ = false; col_desc_array_.reset(); datum_utils_.reset(); allocator_.reset(); @@ -308,6 +324,7 @@ int ObDataStoreDesc::assign(const ObDataStoreDesc &desc) row_column_count_ = desc.row_column_count_; rowkey_column_count_ = desc.rowkey_column_count_; row_store_type_ = desc.row_store_type_; + need_build_hash_index_for_micro_block_ = desc.need_build_hash_index_for_micro_block_; schema_version_ = desc.schema_version_; schema_rowkey_col_cnt_ = desc.schema_rowkey_col_cnt_; encoder_opt_ = desc.encoder_opt_; @@ -323,6 +340,7 @@ int ObDataStoreDesc::assign(const ObDataStoreDesc &desc) MEMCPY(encrypt_key_, desc.encrypt_key_, sizeof(encrypt_key_)); major_working_cluster_version_ = desc.major_working_cluster_version_; is_ddl_ = desc.is_ddl_; + need_pre_warm_ = desc.need_pre_warm_; col_desc_array_.reset(); datum_utils_.reset(); sstable_index_builder_ = desc.sstable_index_builder_; diff --git a/src/storage/blocksstable/ob_macro_block.h b/src/storage/blocksstable/ob_macro_block.h index 84ffd669c..064265f5d 100644 --- a/src/storage/blocksstable/ob_macro_block.h +++ b/src/storage/blocksstable/ob_macro_block.h @@ -56,6 +56,7 @@ struct ObDataStoreDesc int64_t row_column_count_; int64_t rowkey_column_count_; ObRowStoreType row_store_type_; + bool need_build_hash_index_for_micro_block_; int64_t schema_version_; int64_t schema_rowkey_col_cnt_; ObMicroBlockEncoderOpt encoder_opt_; @@ -77,6 +78,7 @@ struct ObDataStoreDesc // which still use freezeinfo without cluster version int64_t major_working_cluster_version_; bool is_ddl_; + bool need_pre_warm_; common::ObArenaAllocator allocator_; common::ObFixedArray col_desc_array_; blocksstable::ObStorageDatumUtils datum_utils_; diff --git a/src/storage/blocksstable/ob_macro_block_writer.cpp b/src/storage/blocksstable/ob_macro_block_writer.cpp old mode 100644 new mode 100755 index ae7365c68..da0a1816d --- a/src/storage/blocksstable/ob_macro_block_writer.cpp +++ b/src/storage/blocksstable/ob_macro_block_writer.cpp @@ -355,6 +355,7 @@ ObMacroBlockWriter::ObMacroBlockWriter() :data_store_desc_(nullptr), micro_writer_(nullptr), reader_helper_(), + hash_index_builder_(), micro_helper_(), read_info_(), current_index_(0), @@ -372,7 +373,8 @@ ObMacroBlockWriter::ObMacroBlockWriter() datum_row_(), check_datum_row_(), callback_(nullptr), - builder_(NULL) + builder_(NULL), + data_block_pre_warmer_() { //macro_blocks_, macro_handles_ } @@ -391,6 +393,7 @@ void ObMacroBlockWriter::reset() micro_writer_ = nullptr; } reader_helper_.reset(); + hash_index_builder_.reset(); micro_helper_.reset(); read_info_.reset(); macro_blocks_[0].reset(); @@ -415,6 +418,7 @@ void ObMacroBlockWriter::reset() micro_block_adaptive_splitter_.reset(); allocator_.reset(); rowkey_allocator_.reset(); + data_block_pre_warmer_.reset(); } @@ -435,59 +439,60 @@ int ObMacroBlockWriter::open( } else { STORAGE_LOG(DEBUG, "open macro block writer: ", K(data_store_desc), K(start_seq)); ObSSTableIndexBuilder *sstable_index_builder = data_store_desc.sstable_index_builder_; - - if (OB_NOT_NULL(sstable_index_builder)) { + callback_ = callback; + data_store_desc_ = &data_store_desc; + current_macro_seq_ = start_seq.get_data_seq(); + if (OB_FAIL(init_hash_index_builder())) { + STORAGE_LOG(WARN, "Failed to build hash_index builder", K(ret)); + } else if (OB_FAIL(build_micro_writer(data_store_desc_, + allocator_, + micro_writer_, + GCONF.micro_block_merge_verify_level))) { + STORAGE_LOG(WARN, "fail to build micro writer", K(ret)); + } else if (OB_FAIL(read_info_.init( + allocator_, + data_store_desc.row_column_count_ - ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(), + data_store_desc.schema_rowkey_col_cnt_, + lib::is_oracle_mode(), + data_store_desc.col_desc_array_, + true))) { + STORAGE_LOG(WARN, "failed to init read info", K(data_store_desc), K(ret)); + } else if (OB_FAIL(datum_row_.init(allocator_, read_info_.get_request_count()))) { + STORAGE_LOG(WARN, "Failed to init datum row", K(ret), K_(read_info)); + } else if (OB_FAIL(micro_helper_.open(data_store_desc, read_info_, allocator_))) { + STORAGE_LOG(WARN, "Failed to open micro helper", K(ret), K_(read_info)); + } else if (OB_FAIL(check_datum_row_.init(allocator_, read_info_.get_request_count()))) { + STORAGE_LOG(WARN, "Failed to init datum row", K(ret), K_(read_info)); + } else if (OB_FAIL(reader_helper_.init(allocator_))) { + STORAGE_LOG(WARN, "Failed to init reader helper", K(ret)); + } else { + //TODO huronghui.hrh@oceanbase.com use 4.1.0.0 for version judgment + const bool is_use_adaptive = !data_store_desc_->is_major_merge() + || data_store_desc_->major_working_cluster_version_ >= DATA_VERSION_4_1_0_0; + if (OB_FAIL(micro_block_adaptive_splitter_.init(data_store_desc.macro_store_size_, is_use_adaptive))) { + STORAGE_LOG(WARN, "Failed to init micro block adaptive split", K(ret), K(data_store_desc.macro_store_size_)); + } + } + if (OB_SUCC(ret) && data_store_desc_->is_major_merge()) { + if (OB_ISNULL(curr_micro_column_checksum_ = static_cast( + allocator_.alloc(sizeof(int64_t) * data_store_desc_->row_column_count_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "fail to allocate memory for curr micro block column checksum", K(ret)); + } else { + MEMSET(curr_micro_column_checksum_, 0, + sizeof(int64_t) * data_store_desc_->row_column_count_); + } + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(sstable_index_builder)) { if (OB_FAIL(sstable_index_builder->new_index_builder(builder_, data_store_desc, allocator_))) { STORAGE_LOG(WARN, "fail to alloc index builder", K(ret)); + } else if (data_store_desc.need_pre_warm_) { + data_block_pre_warmer_.init(read_info_); } } else { builder_ = nullptr; } - - if (OB_SUCC(ret)) { - callback_ = callback; - data_store_desc_ = &data_store_desc; - current_macro_seq_ = start_seq.get_data_seq(); - if (OB_FAIL(build_micro_writer(data_store_desc_, - allocator_, - micro_writer_, - GCONF.micro_block_merge_verify_level))) { - STORAGE_LOG(WARN, "fail to build micro writer", K(ret)); - } else if (OB_FAIL(read_info_.init( - allocator_, - data_store_desc.row_column_count_ - ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(), - data_store_desc.schema_rowkey_col_cnt_, - lib::is_oracle_mode(), - data_store_desc.col_desc_array_, - true))) { - STORAGE_LOG(WARN, "failed to init read info", K(data_store_desc), K(ret)); - } else if (OB_FAIL(datum_row_.init(allocator_, read_info_.get_request_count()))) { - STORAGE_LOG(WARN, "Failed to init datum row", K(ret), K_(read_info)); - } else if (OB_FAIL(micro_helper_.open(data_store_desc, read_info_, allocator_))) { - STORAGE_LOG(WARN, "Failed to open micro helper", K(ret), K_(read_info)); - } else if (OB_FAIL(check_datum_row_.init(allocator_, read_info_.get_request_count()))) { - STORAGE_LOG(WARN, "Failed to init datum row", K(ret), K_(read_info)); - } else if (OB_FAIL(reader_helper_.init(allocator_))) { - STORAGE_LOG(WARN, "Failed to init reader helper", K(ret)); - } else { - //TODO huronghui.hrh@oceanbase.com use 4.1.0.0 for version judgment - const bool is_use_adaptive = !data_store_desc_->is_major_merge() - || data_store_desc_->major_working_cluster_version_ >= DATA_VERSION_4_1_0_0; - if (OB_FAIL(micro_block_adaptive_splitter_.init(data_store_desc.macro_store_size_, is_use_adaptive))) { - STORAGE_LOG(WARN, "Failed to init micro block adaptive split", K(ret), K(data_store_desc.macro_store_size_)); - } - } - if (OB_SUCC(ret) && data_store_desc_->is_major_merge()) { - if (OB_ISNULL(curr_micro_column_checksum_ = static_cast( - allocator_.alloc(sizeof(int64_t) * data_store_desc_->row_column_count_)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - STORAGE_LOG(WARN, "fail to allocate memory for curr micro block column checksum", K(ret)); - } else { - MEMSET(curr_micro_column_checksum_, 0, - sizeof(int64_t) * data_store_desc_->row_column_count_); - } - } - } } return ret; } @@ -522,17 +527,17 @@ int ObMacroBlockWriter::append_row(const ObDatumRow &row, const int64_t split_si } else if (OB_FAIL(check_order(row))) { STORAGE_LOG(WARN, "macro block writer fail to check order.", K(row)); } - if (OB_SUCC(ret) ){ + if (OB_SUCC(ret)) { is_macro_or_micro_block_reused_ = false; const ObStorageDatumUtils &datum_utils = read_info_.get_datum_utils(); - if (OB_FAIL(micro_writer_->append_row(*row_to_append))) { + if (OB_FAIL(append_row_and_hash_index(*row_to_append))) { if (OB_BUF_NOT_ENOUGH == ret) { if (0 == micro_writer_->get_row_count()) { ret = OB_NOT_SUPPORTED; STORAGE_LOG(ERROR, "The single row is too large, ", K(ret), K(row)); } else if (OB_FAIL(build_micro_block())) { STORAGE_LOG(WARN, "Fail to build micro block, ", K(ret)); - } else if (OB_FAIL(micro_writer_->append_row(*row_to_append))) { + } else if (OB_FAIL(OB_FAIL(append_row_and_hash_index(*row_to_append)))) { STORAGE_LOG(ERROR, "Fail to append row to micro block, ", K(ret), K(row)); } else if (OB_FAIL(save_last_key(*row_to_append))) { STORAGE_LOG(WARN, "Fail to save last key, ", K(ret), K(row)); @@ -844,6 +849,44 @@ int ObMacroBlockWriter::check_order(const ObDatumRow &row) return ret; } +int ObMacroBlockWriter::init_hash_index_builder() +{ + int ret = OB_SUCCESS; + if (data_store_desc_->need_build_hash_index_for_micro_block_ + && OB_FAIL(hash_index_builder_.init(data_store_desc_))) { + STORAGE_LOG(WARN, "Failed to build hash_index builder", K(ret)); + } + return ret; +} + +int ObMacroBlockWriter::append_row_and_hash_index(const ObDatumRow &row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(micro_writer_->append_row(row))) { + if (ret != OB_BUF_NOT_ENOUGH) { + STORAGE_LOG(WARN, "Failed to append row in micro writer", K(ret), K(row)); + } + } else if (hash_index_builder_.is_valid()) { + if (OB_UNLIKELY(FLAT_ROW_STORE != data_store_desc_->row_store_type_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected row store type", K(ret), K(data_store_desc_->row_store_type_)); + } else { + int64_t hash_index_size = hash_index_builder_.estimate_size(true); + if (OB_UNLIKELY(!micro_writer_->has_enough_space_for_hash_index(hash_index_size))) { + ret = OB_BUF_NOT_ENOUGH; + } else if (OB_FAIL(hash_index_builder_.add(row))) { + if (ret != OB_NOT_SUPPORTED) { + STORAGE_LOG(WARN, "Failed to append hash index", K(ret), K(row)); + } else { + ret = OB_SUCCESS; + } + hash_index_builder_.reset(); + } + } + } + return ret; +} + int ObMacroBlockWriter::append_index_micro_block(ObMicroBlockDesc µ_block_desc) { // used to append normal index micro block @@ -867,6 +910,7 @@ int ObMacroBlockWriter::append_index_micro_block(ObMicroBlockDesc µ_block_d int ObMacroBlockWriter::build_micro_block() { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; int64_t block_size = 0; ObMicroBlockDesc micro_block_desc; if (micro_writer_->get_row_count() <= 0) { @@ -874,21 +918,40 @@ int ObMacroBlockWriter::build_micro_block() STORAGE_LOG(WARN, "micro_block_writer is empty", K(ret)); } else if (OB_FAIL(micro_writer_->build_micro_block_desc(micro_block_desc))) { STORAGE_LOG(WARN, "failed to build micro block desc", K(ret)); - } else if (FALSE_IT(micro_block_desc.last_rowkey_ = last_key_)) { - } else if (FALSE_IT(block_size = micro_block_desc.buf_size_)) { - } else if (OB_FAIL(micro_helper_.compress_encrypt_micro_block(micro_block_desc))) { - micro_writer_->dump_diagnose_info(); // ignore dump error - STORAGE_LOG(WARN, "failed to compress and encrypt micro block", K(ret), K(micro_block_desc)); + } else if (OB_FAIL(build_hash_index_block(micro_block_desc))) { + STORAGE_LOG(WARN, "Failed to build hash index block", K(ret)); } else { - if (OB_FAIL(write_micro_block(micro_block_desc))) { - STORAGE_LOG(WARN, "fail to write micro block ", K(ret), K(micro_block_desc)); - } else if (OB_FAIL(micro_block_adaptive_splitter_.update_compression_info(micro_block_desc.row_count_, - block_size, micro_block_desc.buf_size_))) { - STORAGE_LOG(WARN, "Fail to update_compression_info", K(ret), K(micro_block_desc)); + micro_block_desc.last_rowkey_ = last_key_; + block_size = micro_block_desc.buf_size_; + if (data_block_pre_warmer_.is_valid() + && OB_TMP_FAIL(data_block_pre_warmer_.reserve_kvpair(micro_block_desc))) { + if (OB_BUF_NOT_ENOUGH != tmp_ret) { + STORAGE_LOG(WARN, "Fail to reserve data block cache value", K(tmp_ret)); + } } + + if (OB_FAIL(micro_helper_.compress_encrypt_micro_block(micro_block_desc))) { + micro_writer_->dump_diagnose_info(); // ignore dump error + STORAGE_LOG(WARN, "failed to compress and encrypt micro block", K(ret), K(micro_block_desc)); + } else { + if (OB_FAIL(write_micro_block(micro_block_desc))) { + STORAGE_LOG(WARN, "fail to write micro block ", K(ret), K(micro_block_desc)); + } else if (OB_FAIL(micro_block_adaptive_splitter_.update_compression_info(micro_block_desc.row_count_, + block_size, micro_block_desc.buf_size_))) { + STORAGE_LOG(WARN, "Fail to update_compression_info", K(ret), K(micro_block_desc)); + } + if (OB_FAIL(ret) || !data_block_pre_warmer_.is_valid() || OB_TMP_FAIL(tmp_ret)) { + } else if (OB_TMP_FAIL(data_block_pre_warmer_.update_and_put_kvpair(micro_block_desc))) { + STORAGE_LOG(WARN, "Fail to build data cache key and put into cache", K(tmp_ret)); + } + } + data_block_pre_warmer_.reuse(); } if (OB_SUCC(ret)) { micro_writer_->reuse(); + if (data_store_desc_->need_build_hash_index_for_micro_block_) { + hash_index_builder_.reuse(); + } if (data_store_desc_->need_prebuild_bloomfilter_ && micro_rowkey_hashs_.count() > 0) { micro_rowkey_hashs_.reuse(); } @@ -898,7 +961,8 @@ int ObMacroBlockWriter::build_micro_block() data_store_desc_->merge_info_->new_micro_count_in_new_macro_++; } } - STORAGE_LOG(DEBUG, "build micro block desc", K(data_store_desc_->tablet_id_), K(micro_block_desc), "lbt", lbt(), K(ret)); + STORAGE_LOG(DEBUG, "build micro block desc", K(data_store_desc_->tablet_id_), K(micro_block_desc), "lbt", lbt(), + K(ret), K(tmp_ret)); return ret; } @@ -926,6 +990,29 @@ int ObMacroBlockWriter::build_micro_block_desc( return ret; } +int ObMacroBlockWriter::build_hash_index_block(ObMicroBlockDesc µ_block_desc) +{ + int ret = OB_SUCCESS; + if (hash_index_builder_.is_valid()) { + if (OB_UNLIKELY(FLAT_ROW_STORE != data_store_desc_->row_store_type_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected row store type", K(ret), K(data_store_desc_->row_store_type_)); + } else if (OB_FAIL(micro_writer_->append_hash_index(hash_index_builder_))) { + if (ret != OB_NOT_SUPPORTED) { + LOG_WARN("Failed to append hash index to micro block writer", K(ret)); + } else { + ret = OB_SUCCESS; + } + hash_index_builder_.reset(); + } else { + const int64_t hash_index_size = hash_index_builder_.estimate_size(); + micro_block_desc.buf_size_ += hash_index_size; + micro_block_desc.data_size_ += hash_index_size; + } + } + return ret; +} + int ObMacroBlockWriter::build_micro_block_desc_with_reuse( const ObMicroBlock µ_block, ObMicroBlockDesc µ_block_desc) @@ -1045,6 +1132,7 @@ int ObMacroBlockWriter::write_micro_block(ObMicroBlockDesc µ_block_desc) } } if (OB_FAIL(ret)) { + } else if (FALSE_IT(micro_block_desc.macro_id_ = macro_handles_[current_index_].get_macro_id())) { } else if (OB_FAIL(macro_blocks_[current_index_].write_micro_block(micro_block_desc, data_offset))) { STORAGE_LOG(WARN, "Fail to write micro block, ", K(ret), K(micro_block_desc)); } else if (OB_UNLIKELY(micro_block_desc.block_offset_ != data_offset)) { diff --git a/src/storage/blocksstable/ob_macro_block_writer.h b/src/storage/blocksstable/ob_macro_block_writer.h index e71c1bca3..84e96babd 100644 --- a/src/storage/blocksstable/ob_macro_block_writer.h +++ b/src/storage/blocksstable/ob_macro_block_writer.h @@ -29,6 +29,7 @@ #include "share/schema/ob_table_schema.h" #include "ob_bloom_filter_cache.h" #include "ob_micro_block_reader_helper.h" +#include "share/cache/ob_kvcache_pre_warmer.h" namespace oceanbase { @@ -159,10 +160,13 @@ protected: private: int append_row(const ObDatumRow &row, const int64_t split_size); int check_order(const ObDatumRow &row); + int init_hash_index_builder(); + int append_row_and_hash_index(const ObDatumRow &row); int build_micro_block_desc( const ObMicroBlock µ_block, ObMicroBlockDesc µ_block_desc, ObMicroBlockHeader &header_for_rewrite); + int build_hash_index_block(ObMicroBlockDesc µ_block_desc); int build_micro_block_desc_with_rewrite( const ObMicroBlock µ_block, ObMicroBlockDesc µ_block_desc, @@ -198,6 +202,7 @@ protected: private: ObIMicroBlockWriter *micro_writer_; ObMicroBlockReaderHelper reader_helper_; + ObMicroBlockHashIndexBuilder hash_index_builder_; ObMicroBlockBufferHelper micro_helper_; ObTableReadInfo read_info_; ObMacroBlock macro_blocks_[2]; @@ -221,6 +226,7 @@ private: ObIMacroBlockFlushCallback *callback_; ObDataIndexBlockBuilder *builder_; ObMicroBlockAdaptiveSplitter micro_block_adaptive_splitter_; + ObDataBlockCachePreWarmer data_block_pre_warmer_; }; }//end namespace blocksstable diff --git a/src/storage/blocksstable/ob_micro_block_cache.cpp b/src/storage/blocksstable/ob_micro_block_cache.cpp index ceea615a1..bdcbed0d8 100644 --- a/src/storage/blocksstable/ob_micro_block_cache.cpp +++ b/src/storage/blocksstable/ob_micro_block_cache.cpp @@ -11,16 +11,13 @@ */ #define USING_LOG_PREFIX STORAGE -#include "lib/file/ob_file.h" -#include "share/io/ob_io_manager.h" -#include "share/schema/ob_table_param.h" -#include "lib/stat/ob_diagnose_info.h" -#include "encoding/ob_micro_block_decoder.h" -#include "storage/blocksstable/ob_index_block_row_struct.h" + #include "storage/blocksstable/ob_micro_block_cache.h" #include "storage/blocksstable/ob_block_manager.h" #include "storage/blocksstable/ob_macro_block_handle.h" #include "storage/blocksstable/ob_shared_macro_block_manager.h" +#include "storage/blocksstable/ob_macro_block_handle.h" +#include "storage/blocksstable/ob_shared_macro_block_manager.h" namespace oceanbase { @@ -309,6 +306,487 @@ bool ObMultiBlockIOCtx::is_valid() const return OB_NOT_NULL(micro_index_infos_) && block_count_ > 0; } +/*---------------------------------------ObIMicroBlockIOCallback-------------------------------------*/ +ObIMicroBlockIOCallback::ObIMicroBlockIOCallback() + : cache_(nullptr), + put_size_stat_(nullptr), + allocator_(nullptr), + io_buffer_(nullptr), + data_buffer_(nullptr), + read_info_(nullptr), + tenant_id_(OB_INVALID_TENANT_ID), + block_id_(), + offset_(0), + size_(0), + row_store_type_(MAX_ROW_STORE), + block_des_meta_(), + use_block_cache_(true), + need_write_extra_buf_(true) +{ + static_assert(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); +} + +ObIMicroBlockIOCallback::~ObIMicroBlockIOCallback() +{ + if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { + allocator_->free(io_buffer_); + io_buffer_ = nullptr; + } +} + +int ObIMicroBlockIOCallback::alloc_io_buf( + char *&io_buf, int64_t &align_size, int64_t &align_offset) +{ + int ret = OB_SUCCESS; + align_size = 0; + align_offset = 0; + common::align_offset_size(offset_, size_, align_offset, align_size); + if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected error, the allocator is NULL, ", KP_(allocator), K(ret)); + } else { + io_buffer_ = static_cast(allocator_->alloc(align_size + DIO_READ_ALIGN_SIZE)); + for (int64_t i = 1; OB_ISNULL(io_buffer_) && i <= ALLOC_BUF_RETRY_TIMES; i++) { + ob_usleep(ALLOC_BUF_RETRY_INTERVAL * i); + io_buffer_ = static_cast(allocator_->alloc(align_size + DIO_READ_ALIGN_SIZE)); + } + if (OB_NOT_NULL(io_buffer_)) { + io_buf = reinterpret_cast(upper_align(reinterpret_cast(io_buffer_), + DIO_READ_ALIGN_SIZE)); + data_buffer_ = io_buf + (offset_ - align_offset); + } else { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Fail to allocate memory", + K(ret), K_(offset), K_(size), K(align_offset), K(align_size)); + } + } + + return ret; +} + +int ObIMicroBlockIOCallback::process_block( + ObMacroBlockReader *reader, + char *buffer, + const int64_t offset, + const int64_t size, + const ObMicroBlockCacheValue *µ_block, + common::ObKVCacheHandle &cache_handle) +{ + int ret = OB_SUCCESS; + ObMicroBlockData block_data; + ObMicroBlockHeader header; + int64_t pos = 0; + int64_t payload_size = 0; + const char *payload_buf = nullptr; + if (OB_UNLIKELY(NULL == reader || NULL == buffer || offset < 0 || size < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(reader), KP(buffer), K(offset), K(size)); + } else if (OB_FAIL(header.deserialize(buffer, size, pos))) { + LOG_ERROR("Fail to deserialize record header", K(ret), K_(block_id), K(offset)); + } else if (OB_FAIL(header.check_and_get_record( + buffer, size, MICRO_BLOCK_HEADER_MAGIC, payload_buf, payload_size))) { + LOG_ERROR("Micro block data is corrupted", K(ret), K_(block_id), K(offset), + K(size), K_(tenant_id), KP(buffer), KP(io_buffer_), KP(data_buffer_), KP(this)); + } else { + if (OB_UNLIKELY(!use_block_cache_)) { + // Won't put in cache + } else { + ObIMicroBlockCache::BaseBlockCache *kvcache = nullptr; + ObKVCachePair *kvpair = nullptr; + ObKVCacheInstHandle inst_handle; + const int64_t block_size = header.header_size_ + header.data_length_; + int64_t extra_size = 0; + bool need_decoder = false; + ObMicroBlockCacheKey key(tenant_id_, block_id_, offset, size); + int64_t value_size = cache_->calc_value_size(block_size, row_store_type_, header.row_count_, + read_info_->get_request_count(), extra_size, need_decoder); + if (OB_FAIL(cache_->get_cache(kvcache))) { + LOG_WARN("Fail to get kvcache", K(ret)); + } else if (OB_UNLIKELY(OB_SUCCESS == (ret = kvcache->get(key, micro_block, cache_handle)))) { + // entry exist, no need to put + } else if (OB_FAIL(kvcache->alloc(tenant_id_, sizeof(ObMicroBlockCacheKey), value_size, + kvpair, cache_handle, inst_handle))) { + LOG_WARN("Fail to alloc cache buf", K(ret), K_(tenant_id), K(value_size)); + } else { + char *block_buf = reinterpret_cast(kvpair->value_) + sizeof(ObMicroBlockCacheValue); + kvpair->key_ = new (kvpair->key_) ObMicroBlockCacheKey(tenant_id_, block_id_, offset, size); + ObMicroBlockCacheValue *cache_value = new (kvpair->value_) ObMicroBlockCacheValue(block_buf, block_size); + ObMicroBlockData µ_data = cache_value->get_block_data(); + micro_data.type_ = cache_->get_type(); + int64_t pos = 0; + if (OB_FAIL(header.serialize(block_buf, header.header_size_, pos))) { + LOG_WARN("Fail to serialize header", K(ret), K(header)); + } else if (FALSE_IT(payload_buf = payload_buf + pos)) { + } else if (FALSE_IT(payload_size = payload_size - pos)) { + } else if (OB_FAIL(reader->decompress_data_with_prealloc_buf( + block_des_meta_.compressor_type_, + payload_buf, + payload_size, + block_buf + pos, + block_size - pos))) { + LOG_WARN("Fail to decompress data with preallocated buffer", K(ret)); + } else if (need_write_extra_buf_ && cache_->write_extra_buf(*read_info_, block_buf, block_size, extra_size, + block_buf + block_size, micro_data)) { + LOG_WARN("Fail to writer extra buffer of block data", K(ret), K(header), KPC(cache_value)); + } else if (FALSE_IT(micro_block = cache_value)) { + } else if (OB_FAIL(kvcache->put_kvpair(inst_handle, kvpair, cache_handle, false /* overwrite */))) { + if (OB_ENTRY_EXIST != ret) { + LOG_WARN("Fail to put micro block cache", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else { + const int64_t put_size = ObKVStoreMemBlock::get_align_size(key, *cache_value); + if (OB_FAIL(put_size_stat_->add_put_size(put_size))) { + LOG_WARN("add_put_size failed", K(ret), K(put_size)); + } + } + if (OB_FAIL(ret)) { + cache_handle.reset(); + micro_block = nullptr; + } + } + } + + if (OB_FAIL(ret)) { + } else if (use_block_cache_) { + // block already in cache + } else if (OB_FAIL(read_block_and_copy(*reader, buffer, size, block_data, micro_block, cache_handle))) { + LOG_WARN("Fail to read micro block and copy to cache value", K(ret)); + } + } + return ret; +} + +int ObIMicroBlockIOCallback::read_block_and_copy( + ObMacroBlockReader &reader, + char *buffer, + const int64_t size, + ObMicroBlockData &block_data, + const ObMicroBlockCacheValue *µ_block, + ObKVCacheHandle &handle) +{ + int ret = OB_SUCCESS; + bool is_compressed = false; + if (OB_ISNULL(buffer)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null pointer to data buffer", K(ret), KP(buffer)); + } else if (OB_FAIL(reader.decrypt_and_decompress_data( + block_des_meta_, buffer, size, + block_data.get_buf(), block_data.get_buf_size(), is_compressed))) { + LOG_WARN("Fail to decrypt and decompress data", K(ret)); + } else { + block_data.type_ = cache_->get_type(); + ObMicroBlockCacheValue value( + block_data.get_buf(), block_data.get_buf_size(), nullptr, 0, block_data.type_); + char *buf = nullptr; + const int64_t buf_len = value.size(); + handle.reset(); + micro_block = nullptr; + ObIKVCacheValue *value_copy = nullptr; + if (OB_ISNULL(buf = static_cast(allocator_->alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Failed to allocate value", K(ret), K(buf_len)); + } else if (OB_FAIL(value.deep_copy(buf, buf_len, value_copy))) { + LOG_WARN("Failed to deep copy value", K(ret)); + allocator_->free(buf); + } else { + micro_block = static_cast(value_copy); + } + } + return ret; +} + +int ObIMicroBlockIOCallback::assign(const ObIMicroBlockIOCallback &other) +{ + int ret = OB_SUCCESS; + cache_ = other.cache_; + put_size_stat_ = other.put_size_stat_; + allocator_ = other.allocator_; + io_buffer_ = other.io_buffer_; + data_buffer_ = other.data_buffer_; + read_info_ = other.read_info_; + tenant_id_ = other.tenant_id_; + block_id_ = other.block_id_; + offset_ = other.offset_; + size_ = other.size_; + row_store_type_ = other.row_store_type_; + block_des_meta_ = other.block_des_meta_; + use_block_cache_ = other.use_block_cache_; + need_write_extra_buf_ = other.need_write_extra_buf_; + return ret; +} + +/*-----------------------------------ObSingleMicroBlockIOCallback-----------------------------------*/ +ObSingleMicroBlockIOCallback::ObSingleMicroBlockIOCallback() + : ObIMicroBlockIOCallback(), + micro_block_(nullptr), + cache_handle_() +{ + STATIC_ASSERT(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); +} + +ObSingleMicroBlockIOCallback::~ObSingleMicroBlockIOCallback() +{ + if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(micro_block_) && !cache_handle_.is_valid()) { + allocator_->free(const_cast(micro_block_)); + micro_block_ = nullptr; + } + tablet_handle_.reset(); +} + +int64_t ObSingleMicroBlockIOCallback::size() const +{ + return sizeof(*this); +} + +int ObSingleMicroBlockIOCallback::inner_process(const bool is_success) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(cache_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Invalid micro block cache callback, ", KP_(cache), K(ret)); + } else if (is_success) { + ObMacroBlockReader *reader = nullptr; + if (OB_ISNULL(reader = GET_TSI_MULT(ObMacroBlockReader, 1))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Fail to allocate ObMacroBlockReader, ", K(ret)); + } else if (OB_FAIL(process_block(reader, data_buffer_, offset_, size_, micro_block_, cache_handle_))) { + LOG_WARN("process_block failed", K(ret)); + } + } + + if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { + allocator_->free(io_buffer_); + io_buffer_ = nullptr; + } + return ret; +} + +int ObSingleMicroBlockIOCallback::inner_deep_copy( + char *buf, + const int64_t buf_len, + ObIOCallback *&callback) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len < size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument, ", KP(buf), K(buf_len), K(ret)); + } else if (OB_ISNULL(cache_) || OB_ISNULL(allocator_)) { + ret = OB_INVALID_DATA; + LOG_WARN("The micro block io callback is not valid, ", KP_(cache), KP_(allocator), K(ret)); + } else { + ObSingleMicroBlockIOCallback *pcallback = new (buf) ObSingleMicroBlockIOCallback(); + if (OB_FAIL(pcallback->assign(*this))) { + LOG_WARN("fail to assign callback", K(ret)); + } else { + pcallback->micro_block_ = micro_block_; + pcallback->tablet_handle_ = tablet_handle_; + pcallback->cache_handle_ = cache_handle_; + callback = pcallback; + } + } + return ret; +} + +const char *ObSingleMicroBlockIOCallback::get_data() +{ + const char *data = nullptr; + if (OB_NOT_NULL(micro_block_)) { + data = reinterpret_cast (&(micro_block_->get_block_data())); + } + return data; +} + + +/*-----------------------------------ObMultiDataBlockIOCallback-----------------------------------*/ +ObMultiDataBlockIOCallback::ObMultiDataBlockIOCallback() + : ObIMicroBlockIOCallback(), + io_ctx_(), + io_result_() +{ + STATIC_ASSERT(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); +} + +ObMultiDataBlockIOCallback::~ObMultiDataBlockIOCallback() +{ + free_result(); +} + +int64_t ObMultiDataBlockIOCallback::size() const +{ + return sizeof(*this); +} + +int ObMultiDataBlockIOCallback::inner_process(const bool is_success) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(cache_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Invalid micro block cache callback, ", KP_(cache), K(ret)); + } else if (is_success) { + ObMacroBlockReader *reader = nullptr; + if (OB_ISNULL(reader = GET_TSI_MULT(ObMacroBlockReader, 1))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Fail to allocate ObMacroBlockReader, ", K(ret)); + } else if (OB_FAIL(alloc_result())) { + LOG_WARN("alloc_result failed", K(ret)); + } + + const int64_t block_count = io_ctx_.block_count_; + for (int64_t i = 0; OB_SUCC(ret) && i < block_count; ++i) { + const int64_t data_size = io_ctx_.micro_index_infos_[i].get_block_size(); + const int64_t data_offset = io_ctx_.micro_index_infos_[i].get_block_offset() - offset_; + if (OB_FAIL(process_block( + reader, + data_buffer_ + data_offset, + offset_ + data_offset, + data_size, + io_result_.micro_blocks_[i], + io_result_.handles_[i]))) { + LOG_WARN("process_block failed", K(ret)); + } + } + } + + if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { + allocator_->free(io_buffer_); + io_buffer_ = nullptr; + } + + if (OB_FAIL(ret)) { + io_result_.ret_code_ = ret; + } + return ret; +} + +int ObMultiDataBlockIOCallback::inner_deep_copy(char *buf, + const int64_t buf_len, ObIOCallback *&callback) const +{ + int ret = OB_SUCCESS; + callback = nullptr; + if (OB_UNLIKELY(nullptr == buf || buf_len < size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument, ", KP(buf), K(buf_len), K(ret)); + } else if (OB_ISNULL(cache_) || OB_ISNULL(allocator_)) { + ret = OB_INVALID_DATA; + LOG_WARN("The micro block io callback is not valid, ", KP_(cache), KP_(allocator), K(ret)); + } else { + ObMultiDataBlockIOCallback *pcallback = new (buf) ObMultiDataBlockIOCallback(); + pcallback->io_ctx_.reset(); + if (OB_FAIL(pcallback->assign(*this))) { + LOG_WARN("fail to assign callback", K(ret)); + } else if (OB_FAIL(pcallback->deep_copy_ctx(io_ctx_))) { + LOG_WARN("deep_copy_ctx failed", K(ret)); + } else { + pcallback->io_ctx_ = io_ctx_; + pcallback->io_result_ = io_result_; + callback = pcallback; + } + } + return ret; +} + +const char *ObMultiDataBlockIOCallback::get_data() +{ + const char *data = nullptr; + data = reinterpret_cast(&io_result_); + return data; +} + +int ObMultiDataBlockIOCallback::set_io_ctx( + const ObMultiBlockIOParam &io_param) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!io_param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid io_param", K(ret), K(io_param)); + } else { + io_ctx_.micro_index_infos_ = &io_param.micro_index_infos_->at(io_param.start_index_); + io_ctx_.block_count_ = io_param.block_count_; + } + return ret; +} + +int ObMultiDataBlockIOCallback::deep_copy_ctx( + const ObMultiBlockIOCtx &io_ctx) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!io_ctx.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid io_ctx", K(ret), K(io_ctx)); + } else if (OB_ISNULL(allocator_)) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("allocator_ is null", K(ret), KP(allocator_)); + } else { + void *ptr = nullptr; + int64_t alloc_size = sizeof(ObMicroIndexInfo) * io_ctx.block_count_; + if (OB_ISNULL(ptr = allocator_->alloc(alloc_size))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret), K(alloc_size)); + } else { + io_ctx_.micro_index_infos_ = reinterpret_cast(ptr); + MEMCPY(io_ctx_.micro_index_infos_, io_ctx.micro_index_infos_, alloc_size); + } + + if (OB_SUCC(ret)) { + io_ctx_.block_count_ = io_ctx.block_count_; + } + } + return ret; +} + +int ObMultiDataBlockIOCallback::alloc_result() +{ + int ret = OB_SUCCESS; + void *ptr = nullptr; + const int64_t block_count = io_ctx_.block_count_; + if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObMicroBlockCacheValue *) * block_count))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + io_result_.micro_blocks_ = reinterpret_cast(ptr); + MEMSET(io_result_.micro_blocks_, 0, sizeof(ObMicroBlockCacheValue *) * block_count); + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObKVCacheHandle) * block_count))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + io_result_.handles_ = new (ptr) ObKVCacheHandle[block_count]; + io_result_.block_count_ = block_count; + } + } + return ret; +} + +void ObMultiDataBlockIOCallback::free_result() +{ + if (OB_NOT_NULL(allocator_)) { + if (OB_NOT_NULL(io_result_.micro_blocks_)) { + if (OB_NOT_NULL(io_result_.handles_)) { + for (int64_t i = 0; i < io_result_.block_count_; ++i) { + if (!io_result_.handles_[i].is_valid() + && OB_NOT_NULL(io_result_.micro_blocks_[i])) { + allocator_->free(const_cast(io_result_.micro_blocks_[i])); + } + } + } + allocator_->free(io_result_.micro_blocks_); + io_result_.micro_blocks_ = nullptr; + } + if (OB_NOT_NULL(io_result_.handles_)) { + for (int64_t i = 0; i < io_result_.block_count_; ++i) { + io_result_.handles_[i].~ObKVCacheHandle(); + } + allocator_->free(io_result_.handles_); + io_result_.handles_ = nullptr; + io_result_.block_count_ = 0; + } + } +} + /*----------------------------------------ObIMicroBlockCache--------------------------------------*/ int ObIMicroBlockCache::get_cache_block( const uint64_t tenant_id, @@ -340,33 +818,83 @@ int ObIMicroBlockCache::get_cache_block( return ret; } +int ObIMicroBlockCache::reserve_kvpair(const ObMicroBlockDesc µ_block_desc, + const ObTableReadInfo &read_info, + ObKVCacheInstHandle &inst_handle, + ObKVCacheHandle &cache_handle, + ObKVCachePair *&kvpair, + int64_t &kvpair_size) +{ + int ret = OB_SUCCESS; + + kvpair_size = 0; + if (OB_UNLIKELY(!micro_block_desc.is_valid() || !read_info.is_valid() || inst_handle.is_valid() + || cache_handle.is_valid() || nullptr != kvpair)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), K(micro_block_desc), K(read_info), K(inst_handle), K(cache_handle), KP(kvpair)); + } else { + int64_t extra_size = 0; + bool need_decoder = false; + const int64_t block_size = micro_block_desc.header_->header_size_ + micro_block_desc.data_size_; + int64_t value_size = calc_value_size(block_size, + static_cast(micro_block_desc.header_->row_store_type_), + micro_block_desc.row_count_, + read_info.get_request_count(), + extra_size, + need_decoder); + if (OB_FAIL(alloc_base_kvpair(micro_block_desc, sizeof(ObMicroBlockCacheKey), value_size, + inst_handle, cache_handle, kvpair))) { + LOG_WARN("Fail to alloc kvpair buf", K(ret)); + } else if (!need_decoder) { + } else if (OB_FAIL(write_extra_buf(read_info, + reinterpret_cast(kvpair->value_) + sizeof(ObMicroBlockCacheValue), + block_size, + extra_size, + reinterpret_cast(kvpair->value_) + sizeof(ObMicroBlockCacheValue) + + block_size, + static_cast(kvpair->value_)->get_block_data()))) { + LOG_WARN("Fail to write decoder in extra buf", K(ret)); + } else { + kvpair_size = sizeof(ObMicroBlockCacheKey) + value_size; + } + + } + + return ret; +} + int ObIMicroBlockCache::prefetch( const uint64_t tenant_id, const MacroBlockId ¯o_id, const ObMicroIndexInfo& idx_row, const common::ObQueryFlag &flag, - const ObTableReadInfo &full_read_info, + const ObTableReadInfo &read_info, const ObTabletHandle &tablet_handle, ObMacroBlockHandle ¯o_handle) { - UNUSEDx(tenant_id, macro_id, idx_row, flag, full_read_info, tablet_handle, macro_handle); int ret = OB_SUCCESS; - ret = OB_NOT_IMPLEMENT; + const ObIndexBlockRowHeader *idx_header = idx_row.row_header_; + if (OB_ISNULL(idx_header)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid null index block row header", K(ret), K(idx_row)); + } else if (OB_UNLIKELY(!idx_header->is_valid() || 0 >= idx_header->get_block_size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid data index block row header ", K(ret), K(idx_row)); + } else { + ObSingleMicroBlockIOCallback callback; + callback.read_info_ = &read_info; + callback.tablet_handle_ = tablet_handle; + callback.need_write_extra_buf_ = idx_header->is_data_index() + && (!idx_header->is_data_block() + || (ObStoreFormat::is_row_store_type_with_encoding(idx_header->get_row_store_type()))); + if (OB_FAIL(prefetch( + tenant_id, macro_id, idx_row, flag, macro_handle, callback))) { + LOG_WARN("Fail to prefetch data micro block", K(ret)); + } + } return ret; } -int ObIMicroBlockCache::load_block( - const ObMicroBlockId µ_block_id, - const ObMicroBlockDesMeta &des_meta, - const ObTableReadInfo *read_info, - ObMacroBlockReader *macro_reader, - ObMicroBlockData &block_data, - ObIAllocator *allocator) -{ - UNUSEDx(micro_block_id, des_meta, read_info, macro_reader, block_data, allocator); - return OB_NOT_IMPLEMENT; -} - int ObIMicroBlockCache::prefetch( const uint64_t tenant_id, const MacroBlockId ¯o_id, @@ -377,18 +905,15 @@ int ObIMicroBlockCache::prefetch( { int ret = OB_SUCCESS; const ObIndexBlockRowHeader *idx_row_header = idx_row.row_header_; - BaseBlockCache *cache = nullptr; ObIAllocator *allocator = nullptr; if (OB_ISNULL(idx_row_header)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret)); - } else if (OB_FAIL(get_cache(cache))) { - LOG_WARN("Fail to get base cache", K(ret)); } else if (OB_FAIL(get_allocator(allocator))) { LOG_WARN("Fail to get allocator", K(ret)); } else { // fill callback - callback.cache_ = cache; + callback.cache_ = this; callback.allocator_ = allocator; callback.put_size_stat_ = this; callback.tenant_id_ = tenant_id; @@ -435,11 +960,8 @@ int ObIMicroBlockCache::prefetch( int ret = OB_SUCCESS; int64_t offset = 0; int64_t size = 0; - BaseBlockCache *cache = nullptr; ObIAllocator *allocator = nullptr; - if (OB_FAIL(get_cache(cache))) { - LOG_WARN("Fail to get base cache", K(ret)); - } else if (OB_FAIL(get_allocator(allocator))) { + if (OB_FAIL(get_allocator(allocator))) { LOG_WARN("Fail to get allocator", K(ret)); } else if (OB_FAIL(io_param.get_block_des_info( callback.block_des_meta_, callback.row_store_type_))) { @@ -447,7 +969,7 @@ int ObIMicroBlockCache::prefetch( } else { // fill callback io_param.get_io_range(offset, size); - callback.cache_ = cache; + callback.cache_ = this; callback.allocator_ = allocator; callback.put_size_stat_ = this; callback.tenant_id_ = tenant_id; @@ -475,272 +997,40 @@ int ObIMicroBlockCache::prefetch( return ret; } +int ObIMicroBlockCache::alloc_base_kvpair(const ObMicroBlockDesc µ_block_desc, + const int64_t key_size, + const int64_t value_size, + ObKVCacheInstHandle &inst_handle, + ObKVCacheHandle &cache_handle, + ObKVCachePair *&kvpair) +{ + int ret = OB_SUCCESS; + + BaseBlockCache *kvcache = nullptr; + if (OB_FAIL(get_cache(kvcache))) { + LOG_WARN("Fail to get cache", K(ret)); + } else if (OB_FAIL(kvcache->alloc(MTL_ID(), key_size, value_size, kvpair, cache_handle, inst_handle))) { + LOG_WARN("Fail to alloc cache buf", K(ret), K(key_size), K(value_size)); + } else { + char *block_buf = reinterpret_cast(kvpair->value_) + sizeof(ObMicroBlockCacheValue); + kvpair->key_ = new (kvpair->key_) ObMicroBlockCacheKey(); + const int64_t block_size = micro_block_desc.header_->header_size_ + micro_block_desc.data_size_; + ObMicroBlockCacheValue *cache_value = new (kvpair->value_) ObMicroBlockCacheValue(block_buf, block_size); + MEMCPY(block_buf, micro_block_desc.header_, micro_block_desc.header_->header_size_); + MEMCPY(block_buf + micro_block_desc.header_->header_size_, micro_block_desc.buf_, micro_block_desc.buf_size_); + } + + return ret; +} + int ObIMicroBlockCache::add_put_size(const int64_t put_size) { UNUSED(put_size); return OB_SUCCESS; } -/*---------------------------------------MicroBlockIOCallback-------------------------------------*/ -ObIMicroBlockCache::ObIMicroBlockIOCallback::ObIMicroBlockIOCallback() - : cache_(nullptr), - put_size_stat_(nullptr), - allocator_(nullptr), - io_buffer_(nullptr), - data_buffer_(nullptr), - tenant_id_(OB_INVALID_TENANT_ID), - block_id_(), - offset_(0), - size_(0), - row_store_type_(MAX_ROW_STORE), - block_des_meta_(), - use_block_cache_(true) -{ - static_assert(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); -} -ObIMicroBlockCache::ObIMicroBlockIOCallback::~ObIMicroBlockIOCallback() -{ - if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { - allocator_->free(io_buffer_); - io_buffer_ = nullptr; - } -} -int ObIMicroBlockCache::ObIMicroBlockIOCallback::alloc_io_buf( - char *&io_buf, int64_t &align_size, int64_t &align_offset) -{ - int ret = OB_SUCCESS; - align_size = 0; - align_offset = 0; - common::align_offset_size(offset_, size_, align_offset, align_size); - if (OB_ISNULL(allocator_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected error, the allocator is NULL, ", KP_(allocator), K(ret)); - } else { - io_buffer_ = static_cast(allocator_->alloc(align_size + DIO_READ_ALIGN_SIZE)); - for (int64_t i = 1; OB_ISNULL(io_buffer_) && i <= ALLOC_BUF_RETRY_TIMES; i++) { - ob_usleep(ALLOC_BUF_RETRY_INTERVAL * i); - io_buffer_ = static_cast(allocator_->alloc(align_size + DIO_READ_ALIGN_SIZE)); - } - if (OB_NOT_NULL(io_buffer_)) { - io_buf = reinterpret_cast(upper_align(reinterpret_cast(io_buffer_), - DIO_READ_ALIGN_SIZE)); - data_buffer_ = io_buf + (offset_ - align_offset); - } else { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Fail to allocate memory", - K(ret), K_(offset), K_(size), K(align_offset), K(align_size)); - } - } - - return ret; -} - -int ObIMicroBlockCache::ObIMicroBlockIOCallback::process_block( - ObMacroBlockReader *reader, - char *buffer, - const int64_t offset, - const int64_t size, - const ObMicroBlockCacheValue *µ_block, - common::ObKVCacheHandle &handle) -{ - int ret = OB_SUCCESS; - ObMicroBlockData block_data; - ObMicroBlockHeader header; - int64_t pos = 0; - int64_t payload_size = 0; - const char *payload_buf = nullptr; - if (OB_UNLIKELY(NULL == reader || NULL == buffer || offset < 0 || size < 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), KP(reader), KP(buffer), K(offset), K(size)); - } else if (OB_FAIL(header.deserialize(buffer, size, pos))) { - LOG_ERROR("Fail to deserialize record header", K(ret), K_(block_id), K(offset)); - } else if (OB_FAIL(header.check_and_get_record( - buffer, size, MICRO_BLOCK_HEADER_MAGIC, payload_buf, payload_size))) { - LOG_ERROR("Micro block data is corrupted", K(ret), K_(block_id), K(offset), - K(size), K_(tenant_id), KP(buffer), KP(io_buffer_), KP(data_buffer_), KP(this)); - } else { - if (OB_UNLIKELY(!use_block_cache_)) { - // Won't put in cache - } else { - ObKVCachePair *kvpair = nullptr; - ObKVCacheInstHandle inst_handle; - const bool overwrite = false; - ObMicroBlockCacheKey key(tenant_id_, block_id_, offset, size); - const int64_t buf_size = header.header_size_ + header.data_length_; - int64_t value_size = calc_value_size(buf_size, header.row_count_); - if (OB_UNLIKELY(OB_SUCCESS == (ret = cache_->get(key, micro_block, handle)))) { - // entry exist, no need to put - } else if (OB_FAIL(cache_->alloc( - tenant_id_, - sizeof(ObMicroBlockCacheKey), - value_size, - kvpair, - handle, - inst_handle))) { - LOG_WARN("Fail to alloc cache buf", K(ret), K_(tenant_id), K(value_size)); - } else { - char *block_buf = reinterpret_cast(kvpair->value_) - + sizeof(ObMicroBlockCacheValue); - new (kvpair->key_) ObMicroBlockCacheKey(tenant_id_, block_id_, offset, size); - ObMicroBlockCacheValue *cache_value - = new (kvpair->value_) ObMicroBlockCacheValue(block_buf, buf_size); - ObMicroBlockData µ_data = cache_value->get_block_data(); - micro_data.type_ = get_type(); - int64_t pos = 0; - if (OB_FAIL(header.serialize(block_buf, header.header_size_, pos))) { - LOG_WARN("Fail to serialize header", K(ret), K(header)); - } else if (FALSE_IT(payload_buf = payload_buf + pos)) { - } else if (FALSE_IT(payload_size = payload_size - pos)) { - } else if (OB_FAIL(reader->decompress_data_with_prealloc_buf( - block_des_meta_.compressor_type_, - payload_buf, - payload_size, - block_buf + pos, - buf_size - pos))) { - LOG_WARN("Fail to decompress data with preallocated buffer", K(ret)); - } else if (OB_FAIL(write_extra_buf_on_demand( - buf_size, micro_data, block_buf))) { - LOG_WARN("Fail to writer extra buffer of block data", - K(ret), K(header), KPC(cache_value)); - } else if (FALSE_IT(micro_block = cache_value)) { - } else if (OB_FAIL(cache_->put_kvpair(inst_handle, kvpair, handle, overwrite))) { - if (OB_ENTRY_EXIST != ret) { - LOG_WARN("Fail to put micro block cache", K(ret)); - } else { - ret = OB_SUCCESS; - } - } else { - const int64_t put_size = ObKVStoreMemBlock::get_align_size(key, *cache_value); - if (OB_FAIL(put_size_stat_->add_put_size(put_size))) { - LOG_WARN("add_put_size failed", K(ret), K(put_size)); - } - } - if (OB_FAIL(ret)) { - handle.reset(); - micro_block = nullptr; - } - } - } - - if (OB_FAIL(ret)) { - } else if (use_block_cache_) { - // block already in cache - } else if (OB_FAIL(read_block_and_copy(*reader, buffer, size, block_data, micro_block, handle))) { - LOG_WARN("Fail to read micro block and copy to cache value", K(ret)); - } - } - return ret; -} - -int ObIMicroBlockCache::ObIMicroBlockIOCallback::read_block_and_copy( - ObMacroBlockReader &reader, - char *buffer, - const int64_t size, - ObMicroBlockData &block_data, - const ObMicroBlockCacheValue *µ_block, - ObKVCacheHandle &handle) -{ - int ret = OB_SUCCESS; - bool is_compressed = false; - if (OB_ISNULL(buffer)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null pointer to data buffer", K(ret), KP(buffer)); - } else if (OB_FAIL(reader.decrypt_and_decompress_data( - block_des_meta_, buffer, size, - block_data.get_buf(), block_data.get_buf_size(), is_compressed))) { - LOG_WARN("Fail to decrypt and decompress data", K(ret)); - } else { - block_data.type_ = get_type(); - ObMicroBlockCacheValue value( - block_data.get_buf(), block_data.get_buf_size(), nullptr, 0, block_data.type_); - char *buf = nullptr; - const int64_t buf_len = value.size(); - handle.reset(); - micro_block = nullptr; - ObIKVCacheValue *value_copy = nullptr; - if (OB_ISNULL(buf = static_cast(allocator_->alloc(buf_len)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Failed to allocate value", K(ret), K(buf_len)); - } else if (OB_FAIL(value.deep_copy(buf, buf_len, value_copy))) { - LOG_WARN("Failed to deep copy value", K(ret)); - allocator_->free(buf); - } else { - micro_block = static_cast(value_copy); - } - } - return ret; -} - -int ObIMicroBlockCache::ObIMicroBlockIOCallback::cache_decoders( - const ObColDescIArray &full_col_descs, - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) -{ - int ret = OB_SUCCESS; - int64_t decoder_buf_size = 0; - char *decoder_buf = block_buf + data_length; - if (OB_FAIL(ObMicroBlockDecoder::get_decoder_cache_size( - block_buf, - data_length, - decoder_buf_size))) { - LOG_WARN("get decoder cache size failed", K(ret)); - } else if (OB_FAIL(ObMicroBlockDecoder::cache_decoders( - decoder_buf, - decoder_buf_size, - block_buf, - data_length, - full_col_descs))) { - LOG_WARN("cache decoder failed", K(ret)); - } else { - micro_data.get_extra_buf() = decoder_buf; - micro_data.get_extra_size() = decoder_buf_size; - } - return ret; -} - -int ObIMicroBlockCache::ObIMicroBlockIOCallback::transform_index_block( - const ObTableReadInfo &index_read_info, - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf, - ObIndexBlockDataTransformer &transformer) -{ - int ret = OB_SUCCESS; - char *extra_buf = block_buf + data_length; - int64_t extra_size = ObIndexBlockDataTransformer::get_transformed_block_mem_size(micro_data); - if (OB_UNLIKELY(!index_read_info.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid index column read info", K(ret), K(index_read_info)); - } else if (OB_FAIL(transformer.transform(index_read_info, micro_data, extra_buf, extra_size))) { - LOG_WARN("Fail to transform index block format", - K(ret), K(index_read_info), K(micro_data), K(extra_size)); - } else { - micro_data.get_extra_buf() = extra_buf; - micro_data.get_extra_size() = extra_size; - } - return ret; -} - -int ObIMicroBlockCache::ObIMicroBlockIOCallback::assign(const ObIMicroBlockIOCallback &other) -{ - int ret = OB_SUCCESS; - cache_ = other.cache_; - put_size_stat_ = other.put_size_stat_; - allocator_ = other.allocator_; - io_buffer_ = other.io_buffer_; - data_buffer_ = other.data_buffer_; - tenant_id_ = other.tenant_id_; - block_id_ = other.block_id_; - offset_ = other.offset_; - size_ = other.size_; - row_store_type_ = other.row_store_type_; - block_des_meta_ = other.block_des_meta_; - use_block_cache_ = other.use_block_cache_; - return ret; -} /*-------------------------------------ObDataMicroBlockCache--------------------------------------*/ int ObDataMicroBlockCache::init(const char *cache_name, const int64_t priority) @@ -758,36 +1048,10 @@ int ObDataMicroBlockCache::init(const char *cache_name, const int64_t priority) return ret; } -int ObDataMicroBlockCache::prefetch( - const uint64_t tenant_id, - const MacroBlockId ¯o_id, - const ObMicroIndexInfo& idx_row, - const common::ObQueryFlag &flag, - const ObTableReadInfo &full_read_info, - const ObTabletHandle &tablet_handle, - ObMacroBlockHandle ¯o_handle) +void ObDataMicroBlockCache::destroy() { - int ret = OB_SUCCESS; - const ObIndexBlockRowHeader *idx_header = idx_row.row_header_; - if (OB_UNLIKELY( - nullptr == idx_header - || !idx_header->is_valid() - || 0 >= idx_header->get_block_size() - || !idx_header->is_data_block())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid data index block row header ", K(ret), K(idx_row)); - } else { - ObDataMicroBlockIOCallback callback; - callback.full_cols_ = &full_read_info.get_columns_desc(); - callback.tablet_handle_ = tablet_handle; - callback.need_write_extra_buf_ = idx_header->is_data_index() - && ObStoreFormat::is_row_store_type_with_encoding(idx_header->get_row_store_type()); - if (OB_FAIL(ObIMicroBlockCache::prefetch( - tenant_id, macro_id, idx_row, flag, macro_handle, callback))) { - LOG_WARN("Fail to prefetch data micro block", K(ret)); - } - } - return ret; + common::ObKVCache::destroy(); + allocator_.destroy(); } int ObDataMicroBlockCache::prefetch( @@ -805,7 +1069,7 @@ int ObDataMicroBlockCache::prefetch( LOG_WARN("Invalid input parameters", K(ret), K(tenant_id)); } else if (OB_FAIL(callback.set_io_ctx(io_param))) { LOG_WARN("Set io context failed", K(ret), K(io_param)); - } else if (FALSE_IT(callback.full_cols_ = &full_read_info.get_columns_desc())) { + } else if (FALSE_IT(callback.read_info_ = &full_read_info)) { } else if (OB_FAIL(ObIMicroBlockCache::prefetch( tenant_id, macro_id, io_param, flag, macro_handle, callback))) { LOG_WARN("Fail to prefetch multi data blocks", K(ret)); @@ -865,404 +1129,66 @@ int ObDataMicroBlockCache::get_allocator(common::ObIAllocator *&allocator) return ret; } -void ObDataMicroBlockCache::destroy() +int64_t ObDataMicroBlockCache::calc_value_size(const int64_t data_length, + const ObRowStoreType &type, + const int64_t row_count, + const int64_t request_count, + int64_t &extra_size, + bool &need_decoder) { - common::ObKVCache::destroy(); - allocator_.destroy(); -} - -/*-----------------------------------ObDataMicroBlockIOCallback-----------------------------------*/ -ObDataMicroBlockCache::ObDataMicroBlockIOCallback::ObDataMicroBlockIOCallback() - : ObIMicroBlockIOCallback(), - full_cols_(nullptr), - micro_block_(nullptr), - tablet_handle_(), - handle_(), - need_write_extra_buf_(false) -{ - STATIC_ASSERT(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); -} - -ObDataMicroBlockCache::ObDataMicroBlockIOCallback::~ObDataMicroBlockIOCallback() -{ - if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(micro_block_) && !handle_.is_valid()) { - allocator_->free(const_cast(micro_block_)); - micro_block_ = nullptr; - } - tablet_handle_.reset(); -} - -int64_t ObDataMicroBlockCache::ObDataMicroBlockIOCallback::size() const -{ - return sizeof(*this); -} - -int ObDataMicroBlockCache::ObDataMicroBlockIOCallback::inner_process(const bool is_success) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(cache_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Invalid micro block cache callback, ", KP_(cache), K(ret)); - } else if (is_success) { - ObMacroBlockReader *reader = nullptr; - if (OB_ISNULL(reader = GET_TSI_MULT(ObMacroBlockReader, 1))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Fail to allocate ObMacroBlockReader, ", K(ret)); - } else if (OB_FAIL(process_block(reader, data_buffer_, offset_, size_, micro_block_, handle_))) { - LOG_WARN("process_block failed", K(ret)); - } - } - - if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { - allocator_->free(io_buffer_); - io_buffer_ = nullptr; - } - return ret; -} - -int ObDataMicroBlockCache::ObDataMicroBlockIOCallback::inner_deep_copy( - char *buf, - const int64_t buf_len, - ObIOCallback *&callback) const -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len < size())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument, ", KP(buf), K(buf_len), K(ret)); - } else if (OB_ISNULL(cache_) || OB_ISNULL(allocator_)) { - ret = OB_INVALID_DATA; - LOG_WARN("The micro block io callback is not valid, ", KP_(cache), KP_(allocator), K(ret)); - } else { - ObDataMicroBlockIOCallback *pcallback = new (buf) ObDataMicroBlockIOCallback(); - if (OB_FAIL(pcallback->assign(*this))) { - LOG_WARN("fail to assign callback", K(ret)); - } else { - pcallback->full_cols_ = full_cols_; - pcallback->micro_block_ = micro_block_; - pcallback->tablet_handle_ = tablet_handle_; - pcallback->handle_ = handle_; - pcallback->need_write_extra_buf_ = need_write_extra_buf_; - callback = pcallback; - } - } - return ret; -} - -const char *ObDataMicroBlockCache::ObDataMicroBlockIOCallback::get_data() -{ - const char *data = nullptr; - if (OB_NOT_NULL(micro_block_)) { - data = reinterpret_cast (&(micro_block_->get_block_data())); - } - return data; -} -int ObDataMicroBlockCache::ObDataMicroBlockIOCallback::write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) -{ - int ret = OB_SUCCESS; - if (!need_write_extra_buf_ - || !ObStoreFormat::is_row_store_type_with_encoding(row_store_type_) - || !tablet_handle_.is_valid()) { - // Skip - } else if (OB_ISNULL(full_cols_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null full column desc", K(ret), KP_(full_cols)); - } else if (OB_FAIL(ObIMicroBlockIOCallback::cache_decoders( - *full_cols_, data_length, micro_data, block_buf))) { - LOG_WARN("Fail to cache decoders", K(ret)); - } - return ret; -} - -int64_t ObDataMicroBlockCache::ObDataMicroBlockIOCallback::calc_value_size( - int64_t data_length, - int64_t row_count) -{ - UNUSED(row_count); + UNUSEDx(row_count, request_count, extra_size); + need_decoder = false; int64_t value_size = sizeof(ObMicroBlockCacheValue) + data_length; - if (ObStoreFormat::is_row_store_type_with_encoding(row_store_type_)) { + if (ObStoreFormat::is_row_store_type_with_encoding(type)) { + need_decoder = true; value_size += ObMicroBlockDecoder::MAX_CACHED_DECODER_BUF_SIZE; } return value_size; } -ObMicroBlockData::Type ObDataMicroBlockCache::ObDataMicroBlockIOCallback::get_type() +int ObDataMicroBlockCache::write_extra_buf(const ObTableReadInfo &read_info, + const char *block_buf, + const int64_t block_size, + const int64_t extra_size, + char *extra_buf, + ObMicroBlockData µ_data) +{ + UNUSEDx(extra_size); + int ret = OB_SUCCESS; + + int64_t decoder_size = 0; + if (OB_FAIL(ObMicroBlockDecoder::get_decoder_cache_size(block_buf, block_size, decoder_size))) { + LOG_WARN("Fail to get decoder cache size", K(ret)); + } else if (OB_FAIL(ObMicroBlockDecoder::cache_decoders(extra_buf, decoder_size, block_buf, + block_size, read_info.get_columns_desc()))) { + LOG_WARN("Fail to set cache decoder", K(ret)); + } else { + micro_data.get_extra_buf() = extra_buf; + micro_data.get_extra_size() = decoder_size; + } + + return ret; +} + +ObMicroBlockData::Type ObDataMicroBlockCache::get_type() { return ObMicroBlockData::DATA_BLOCK; } -/*-----------------------------------ObMultiDataBlockIOCallback-----------------------------------*/ -ObDataMicroBlockCache::ObMultiDataBlockIOCallback::ObMultiDataBlockIOCallback() - : ObIMicroBlockIOCallback(), - full_cols_(nullptr), - io_ctx_(), - io_result_() -{ - STATIC_ASSERT(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); -} - -ObDataMicroBlockCache::ObMultiDataBlockIOCallback::~ObMultiDataBlockIOCallback() -{ - free_result(); -} - -int64_t ObDataMicroBlockCache::ObMultiDataBlockIOCallback::size() const -{ - return sizeof(*this); -} - -int ObDataMicroBlockCache::ObMultiDataBlockIOCallback::inner_process(const bool is_success) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(cache_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Invalid micro block cache callback, ", KP_(cache), K(ret)); - } else if (is_success) { - ObMacroBlockReader *reader = nullptr; - if (OB_ISNULL(reader = GET_TSI_MULT(ObMacroBlockReader, 1))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Fail to allocate ObMacroBlockReader, ", K(ret)); - } else if (OB_FAIL(alloc_result())) { - LOG_WARN("alloc_result failed", K(ret)); - } - - const int64_t block_count = io_ctx_.block_count_; - for (int64_t i = 0; OB_SUCC(ret) && i < block_count; ++i) { - const int64_t data_size = io_ctx_.micro_index_infos_[i].get_block_size(); - const int64_t data_offset = io_ctx_.micro_index_infos_[i].get_block_offset() - offset_; - if (OB_FAIL(process_block( - reader, - data_buffer_ + data_offset, - offset_ + data_offset, - data_size, - io_result_.micro_blocks_[i], - io_result_.handles_[i]))) { - LOG_WARN("process_block failed", K(ret)); - } - } - } - - if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { - allocator_->free(io_buffer_); - io_buffer_ = nullptr; - } - - if (OB_FAIL(ret)) { - io_result_.ret_code_ = ret; - } - return ret; -} - -int ObDataMicroBlockCache::ObMultiDataBlockIOCallback::inner_deep_copy(char *buf, - const int64_t buf_len, ObIOCallback *&callback) const -{ - int ret = OB_SUCCESS; - callback = nullptr; - if (OB_UNLIKELY(nullptr == buf || buf_len < size())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument, ", KP(buf), K(buf_len), K(ret)); - } else if (OB_ISNULL(cache_) || OB_ISNULL(allocator_)) { - ret = OB_INVALID_DATA; - LOG_WARN("The micro block io callback is not valid, ", KP_(cache), KP_(allocator), K(ret)); - } else { - ObMultiDataBlockIOCallback *pcallback = new (buf) ObMultiDataBlockIOCallback(); - pcallback->io_ctx_.reset(); - if (OB_FAIL(pcallback->assign(*this))) { - LOG_WARN("fail to assign callback", K(ret)); - } else if (OB_FAIL(pcallback->deep_copy_ctx(io_ctx_))) { - LOG_WARN("deep_copy_ctx failed", K(ret)); - } else { - pcallback->full_cols_ = full_cols_; - pcallback->io_result_ = io_result_; - callback = pcallback; - } - } - return ret; -} - -const char *ObDataMicroBlockCache::ObMultiDataBlockIOCallback::get_data() -{ - const char *data = nullptr; - data = reinterpret_cast(&io_result_); - return data; -} - -int ObDataMicroBlockCache::ObMultiDataBlockIOCallback::set_io_ctx( - const ObMultiBlockIOParam &io_param) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!io_param.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid io_param", K(ret), K(io_param)); - } else { - io_ctx_.micro_index_infos_ = &io_param.micro_index_infos_->at(io_param.start_index_); - io_ctx_.block_count_ = io_param.block_count_; - } - return ret; -} - -int ObDataMicroBlockCache::ObMultiDataBlockIOCallback::deep_copy_ctx( - const ObMultiBlockIOCtx &io_ctx) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!io_ctx.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid io_ctx", K(ret), K(io_ctx)); - } else if (OB_ISNULL(allocator_)) { - ret = OB_INNER_STAT_ERROR; - LOG_WARN("allocator_ is null", K(ret), KP(allocator_)); - } else { - void *ptr = nullptr; - int64_t alloc_size = sizeof(ObMicroIndexInfo) * io_ctx.block_count_; - if (OB_ISNULL(ptr = allocator_->alloc(alloc_size))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret), K(alloc_size)); - } else { - io_ctx_.micro_index_infos_ = reinterpret_cast(ptr); - MEMCPY(io_ctx_.micro_index_infos_, io_ctx.micro_index_infos_, alloc_size); - } - - if (OB_SUCC(ret)) { - io_ctx_.block_count_ = io_ctx.block_count_; - } - } - return ret; -} - -int ObDataMicroBlockCache::ObMultiDataBlockIOCallback::alloc_result() -{ - int ret = OB_SUCCESS; - void *ptr = nullptr; - const int64_t block_count = io_ctx_.block_count_; - if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObMicroBlockCacheValue *) * block_count))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc failed", K(ret)); - } else { - io_result_.micro_blocks_ = reinterpret_cast(ptr); - MEMSET(io_result_.micro_blocks_, 0, sizeof(ObMicroBlockCacheValue *) * block_count); - } - - if (OB_SUCC(ret)) { - if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObKVCacheHandle) * block_count))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc failed", K(ret)); - } else { - io_result_.handles_ = new (ptr) ObKVCacheHandle[block_count]; - io_result_.block_count_ = block_count; - } - } - return ret; -} - -void ObDataMicroBlockCache::ObMultiDataBlockIOCallback::free_result() -{ - if (OB_NOT_NULL(allocator_)) { - if (OB_NOT_NULL(io_result_.micro_blocks_)) { - if (OB_NOT_NULL(io_result_.handles_)) { - for (int64_t i = 0; i < io_result_.block_count_; ++i) { - if (!io_result_.handles_[i].is_valid() - && OB_NOT_NULL(io_result_.micro_blocks_[i])) { - allocator_->free(const_cast(io_result_.micro_blocks_[i])); - } - } - } - allocator_->free(io_result_.micro_blocks_); - io_result_.micro_blocks_ = nullptr; - } - if (OB_NOT_NULL(io_result_.handles_)) { - for (int64_t i = 0; i < io_result_.block_count_; ++i) { - io_result_.handles_[i].~ObKVCacheHandle(); - } - allocator_->free(io_result_.handles_); - io_result_.handles_ = nullptr; - io_result_.block_count_ = 0; - } - } -} - -int ObDataMicroBlockCache::ObMultiDataBlockIOCallback::write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) -{ - int ret = OB_SUCCESS; - if (!ObStoreFormat::is_row_store_type_with_encoding(row_store_type_)) { - // Skip - } else if (OB_ISNULL(full_cols_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null full column desc", K(ret), KP_(full_cols)); - } else if (OB_FAIL(ObIMicroBlockIOCallback::cache_decoders( - *full_cols_, data_length, micro_data, block_buf))) { - LOG_WARN("Fail to cache decoders", K(ret)); - } - return ret; -} - -int64_t ObDataMicroBlockCache::ObMultiDataBlockIOCallback::calc_value_size( - int64_t data_length, - int64_t row_count) -{ - UNUSED(row_count); - int64_t value_size = sizeof(ObMicroBlockCacheValue) + data_length; - if (ObStoreFormat::is_row_store_type_with_encoding(row_store_type_)) { - value_size += ObMicroBlockDecoder::MAX_CACHED_DECODER_BUF_SIZE; - } - return value_size; -} - -ObMicroBlockData::Type ObDataMicroBlockCache::ObMultiDataBlockIOCallback::get_type() -{ - return ObMicroBlockData::DATA_BLOCK; -} /*-------------------------------------ObIndexMicroBlockCache-------------------------------------*/ -int ObIndexMicroBlockCache::init(const char *cache_name, const int64_t priority) +ObIndexMicroBlockCache::ObIndexMicroBlockCache() + : ObDataMicroBlockCache() { - int ret = OB_SUCCESS; - const int64_t mem_limit = 4 * 1024 * 1024 * 1024LL; - if (OB_SUCCESS != (ret = common::ObKVCache::init( - cache_name, priority))) { - STORAGE_LOG(WARN, "Fail to init kv cache, ", K(ret)); - } else if (OB_FAIL(allocator_.init(mem_limit, OB_MALLOC_BIG_BLOCK_SIZE, OB_MALLOC_BIG_BLOCK_SIZE))) { - STORAGE_LOG(WARN, "Fail to init io allocator, ", K(ret)); - } else { - allocator_.set_label(ObModIds::OB_SSTABLE_MICRO_BLOCK_ALLOCATOR); - } - return ret; } -int ObIndexMicroBlockCache::prefetch( - const uint64_t tenant_id, - const MacroBlockId ¯o_id, - const ObMicroIndexInfo& idx_row, - const common::ObQueryFlag &flag, - const ObTableReadInfo &index_read_info, - const ObTabletHandle &tablet_handle, - ObMacroBlockHandle ¯o_handle) +ObIndexMicroBlockCache::~ObIndexMicroBlockCache() { - int ret = OB_SUCCESS; - const ObIndexBlockRowHeader *idx_header = idx_row.row_header_; - if (OB_ISNULL(idx_header)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid null index block row header", K(ret), K(idx_row)); - } else if (OB_UNLIKELY( - !idx_header->is_valid() - || 0 >= idx_header->get_block_size() - || idx_header->is_data_block())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid data index block row header ", K(ret), K(idx_row)); - } else { - ObIndexMicroBlockIOCallback callback; - callback.index_read_info_ = &index_read_info; - callback.tablet_handle_ = tablet_handle; - if (OB_FAIL(ObIMicroBlockCache::prefetch( - tenant_id, macro_id, idx_row, flag, macro_handle, callback))) { - LOG_WARN("Fail to prefetch data micro block", K(ret)); - } - } - return ret; +} + +int ObIndexMicroBlockCache::init(const char *cache_name, const int64_t priority) +{ + return ObDataMicroBlockCache::init(cache_name, priority); } int ObIndexMicroBlockCache::load_block( @@ -1277,9 +1203,8 @@ int ObIndexMicroBlockCache::load_block( int ret = OB_SUCCESS; ObMacroBlockReadInfo macro_read_info; ObMacroBlockHandle macro_handle; - // TODO: make deserialize micro block with allocator static and remove tmp inner_macro_reader + // TODO: @chengji make deserialize micro block with allocator static and remove tmp inner_macro_reader ObMacroBlockReader inner_macro_reader; - ObIndexBlockDataTransformer idx_transformer; bool is_compressed = false; const bool need_deep_copy = true; if (OB_UNLIKELY(!micro_block_id.is_valid()) @@ -1306,13 +1231,9 @@ int ObIndexMicroBlockCache::load_block( if (OB_ISNULL(extra_buf = reinterpret_cast(allocator->alloc(extra_buf_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("Failed to alloc memory for transformed index block", K(ret)); - } else if (OB_FAIL(idx_transformer.transform( - *read_info, block_data, extra_buf, extra_buf_size))) { + } else if (OB_FAIL(write_extra_buf(*read_info, nullptr, 0, extra_buf_size, extra_buf, block_data))) { LOG_WARN("Failed to transform index block", K(ret)); } else { - block_data.extra_buf_ = extra_buf; - block_data.extra_size_ = extra_buf_size; - block_data.type_ = ObMicroBlockData::INDEX_BLOCK; EVENT_INC(ObStatEventIds::IO_READ_PREFETCH_MICRO_COUNT); EVENT_ADD(ObStatEventIds::IO_READ_PREFETCH_MICRO_BYTES, micro_block_id.size_); } @@ -1321,153 +1242,49 @@ int ObIndexMicroBlockCache::load_block( return ret; } -int ObIndexMicroBlockCache::get_cache(BaseBlockCache *&cache) +int64_t ObIndexMicroBlockCache::calc_value_size(const int64_t data_length, + const ObRowStoreType &type, + const int64_t row_count, + const int64_t request_count, + int64_t &extra_size, + bool &need_decoder) { + UNUSED(type); + need_decoder = true; + extra_size = ObIndexBlockDataTransformer::get_transformed_block_mem_size(row_count, request_count); + return sizeof(ObMicroBlockCacheValue) + data_length + extra_size; +} + +int ObIndexMicroBlockCache::write_extra_buf(const ObTableReadInfo &read_info, + const char *block_buf, + const int64_t block_size, + const int64_t extra_size, + char *extra_buf, + ObMicroBlockData µ_data) +{ + UNUSEDx(block_buf, block_size); int ret = OB_SUCCESS; - cache = this; - return ret; -} -int ObIndexMicroBlockCache::get_allocator(common::ObIAllocator *&allocator) -{ - int ret = OB_SUCCESS; - allocator = &allocator_; - return ret; -} - -void ObIndexMicroBlockCache::destroy() -{ - common::ObKVCache::destroy(); - allocator_.destroy(); -} - -/*-----------------------------------ObIndexMicroBlockIOCallback----------------------------------*/ -ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::ObIndexMicroBlockIOCallback() - : ObIMicroBlockIOCallback(), - index_read_info_(nullptr), - micro_block_(nullptr), - tablet_handle_(), - handle_() -{ - STATIC_ASSERT(sizeof(*this) <= CALLBACK_BUF_SIZE, "IOCallback buf size not enough"); -} - -ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::~ObIndexMicroBlockIOCallback() -{ - if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(micro_block_) && !handle_.is_valid()) { - allocator_->free(const_cast(micro_block_)); - micro_block_ = nullptr; - } - tablet_handle_.reset(); -} - -int64_t ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::size() const -{ - return sizeof(*this); -} - -int ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::inner_process(const bool is_success) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(cache_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Invalid micro block cache callback, ", KP_(cache), K(ret)); - } else if (is_success) { - ObMacroBlockReader *reader = nullptr; - if (OB_ISNULL(reader = GET_TSI_MULT(ObMacroBlockReader, 1))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Fail to allocate ObMacroBlockReader, ", K(ret)); - } else if (OB_FAIL(process_block(reader, data_buffer_, offset_, size_, micro_block_, handle_))) { - LOG_WARN("process_block failed", K(ret)); - } - } - - if (OB_NOT_NULL(allocator_) && OB_NOT_NULL(io_buffer_)) { - allocator_->free(io_buffer_); - io_buffer_ = nullptr; - } - return ret; -} - -int ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::inner_deep_copy( - char *buf, - const int64_t buf_len, - ObIOCallback *&callback) const -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(buf) || OB_UNLIKELY(buf_len < size())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument, ", KP(buf), K(buf_len), K(ret)); - } else if (OB_ISNULL(cache_) || OB_ISNULL(allocator_)) { - ret = OB_INVALID_DATA; - LOG_WARN("The micro block io callback is not valid, ", KP_(cache), KP_(allocator), K(ret)); - } else { - ObIndexMicroBlockIOCallback *pcallback = new (buf) ObIndexMicroBlockIOCallback(); - if (OB_FAIL(pcallback->assign(*this))) { - LOG_WARN("fail to assign callback", K(ret)); - } else { - pcallback->index_read_info_ = index_read_info_; - pcallback->micro_block_ = micro_block_; - pcallback->tablet_handle_ = tablet_handle_; - pcallback->handle_ = handle_; - callback = pcallback; - } - } - return ret; -} - -const char *ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::get_data() -{ - const char *data = NULL; - if (OB_NOT_NULL(micro_block_)) { - data = reinterpret_cast (&(micro_block_->get_block_data())); - } - return data; -} -int ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) -{ - int ret = OB_SUCCESS; ObIndexBlockDataTransformer *transformer = nullptr; - ObDecoderAllocator *allocator = nullptr; - if (OB_ISNULL(index_read_info_) - || OB_UNLIKELY(!index_read_info_->is_valid() || !micro_data.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument", K(ret), KPC(index_read_info_), K(micro_data)); - } else if (OB_ISNULL(allocator = get_decoder_allocator())) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to allocate decoder allocator", K(ret)); - } else if (OB_ISNULL(transformer = GET_TSI_MULT(ObIndexBlockDataTransformer, 1))) { + if (OB_ISNULL(transformer = GET_TSI_MULT(ObIndexBlockDataTransformer, 1))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("Fail to allocate ObIndexBlockDataTransformer", K(ret)); - } else if (OB_FAIL(ObIMicroBlockIOCallback::transform_index_block( - *index_read_info_, - data_length, - micro_data, - block_buf, - *transformer))) { - LOG_WARN("Failed to transform index block", - K(ret), KPC(index_read_info_), K(data_length), K(micro_data)); + } else if (OB_FAIL(transformer->transform(read_info, micro_data, extra_buf, extra_size))) { + LOG_WARN("Fail to transform index block data", K(ret)); + } else { + micro_data.extra_buf_ = extra_buf; + micro_data.extra_size_ = extra_size; + micro_data.type_ = ObMicroBlockData::INDEX_BLOCK; } + return ret; } -int64_t ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::calc_value_size( - int64_t data_length, - int64_t row_count) -{ - int64_t value_size = sizeof(ObMicroBlockCacheValue) + data_length; - value_size += ObIndexBlockDataTransformer::get_transformed_block_mem_size( - row_count, index_read_info_->get_request_count()); - return value_size; -} - -ObMicroBlockData::Type ObIndexMicroBlockCache::ObIndexMicroBlockIOCallback::get_type() +ObMicroBlockData::Type ObIndexMicroBlockCache::get_type() { return ObMicroBlockData::INDEX_BLOCK; } + }//end namespace blocksstable }//end namespace oceanbase diff --git a/src/storage/blocksstable/ob_micro_block_cache.h b/src/storage/blocksstable/ob_micro_block_cache.h index f372c13c0..6f8aa1a83 100644 --- a/src/storage/blocksstable/ob_micro_block_cache.h +++ b/src/storage/blocksstable/ob_micro_block_cache.h @@ -20,6 +20,9 @@ #include "storage/ob_i_table.h" #include "storage/blocksstable/ob_micro_block_info.h" #include "storage/meta_mem/ob_tablet_handle.h" +#include "lib/stat/ob_diagnose_info.h" +#include "storage/blocksstable/ob_block_manager.h" + namespace oceanbase { @@ -141,22 +144,121 @@ public: virtual int add_put_size(const int64_t put_size) = 0; }; +// New Block IO Callbacks for version 4.0 +class ObIMicroBlockIOCallback : public common::ObIOCallback +{ +public: + ObIMicroBlockIOCallback(); + virtual ~ObIMicroBlockIOCallback(); + virtual int alloc_io_buf(char *&io_buf, int64_t &io_buf_size, int64_t &aligned_offset); + VIRTUAL_TO_STRING_KV(KP_(io_buffer)); +protected: + friend class ObIMicroBlockCache; + int process_block( + ObMacroBlockReader *reader, + char *buffer, + const int64_t offset, + const int64_t size, + const ObMicroBlockCacheValue *µ_block, + common::ObKVCacheHandle &cache_handle); + int assign(const ObIMicroBlockIOCallback &other); +private: + int read_block_and_copy( + ObMacroBlockReader &reader, + char *buffer, + const int64_t size, + ObMicroBlockData &block_data, + const ObMicroBlockCacheValue *µ_block, + common::ObKVCacheHandle &handle); + + static const int64_t ALLOC_BUF_RETRY_INTERVAL = 100 * 1000; + static const int64_t ALLOC_BUF_RETRY_TIMES = 3; +protected: + ObIMicroBlockCache *cache_; + ObIPutSizeStat *put_size_stat_; + common::ObIAllocator *allocator_; + char *io_buffer_; + char *data_buffer_; + const ObTableReadInfo *read_info_; + uint64_t tenant_id_; + MacroBlockId block_id_; + int64_t offset_; + int64_t size_; + ObRowStoreType row_store_type_; + ObMicroBlockDesMeta block_des_meta_; + bool use_block_cache_; + bool need_write_extra_buf_; +}; + +class ObSingleMicroBlockIOCallback : public ObIMicroBlockIOCallback +{ +public: + ObSingleMicroBlockIOCallback(); + virtual ~ObSingleMicroBlockIOCallback(); + virtual int64_t size() const; + virtual int inner_process(const bool is_success) override; + virtual int inner_deep_copy( + char *buf, const int64_t buf_len, + ObIOCallback *&callback) const override; + virtual const char *get_data() override; + INHERIT_TO_STRING_KV("ObIMicroBlockIOCallback", ObIMicroBlockIOCallback, KP_(micro_block), + K_(tablet_handle), K_(cache_handle), K_(need_write_extra_buf)); +private: + friend class ObIMicroBlockCache; + // Notice: lifetime shoule be longer than AIO or deep copy here + const ObMicroBlockCacheValue *micro_block_; + ObTabletHandle tablet_handle_; + common::ObKVCacheHandle cache_handle_; +}; + +class ObMultiDataBlockIOCallback : public ObIMicroBlockIOCallback +{ +public: + ObMultiDataBlockIOCallback(); + virtual ~ObMultiDataBlockIOCallback(); + virtual int64_t size() const; + virtual int inner_process(const bool is_success) override; + virtual int inner_deep_copy( + char *buf, const int64_t buf_len, + ObIOCallback *&callback) const override; + virtual const char *get_data() override; + INHERIT_TO_STRING_KV("ObIMicroBlockIOCallback", ObIMicroBlockIOCallback, K_(io_ctx)); +private: + friend class ObDataMicroBlockCache; + int set_io_ctx(const ObMultiBlockIOParam &io_param); + void reset_io_ctx() { io_ctx_.reset(); } + int deep_copy_ctx(const ObMultiBlockIOCtx &io_ctx); + int alloc_result(); + void free_result(); + // Notice: lifetime shoule be longer than AIO or deep copy here + ObMultiBlockIOCtx io_ctx_; + ObMultiBlockIOResult io_result_; +}; + class ObIMicroBlockCache : public ObIPutSizeStat { public: typedef common::ObIKVCache BaseBlockCache; +public: int get_cache_block( const uint64_t tenant_id, const MacroBlockId block_id, const int64_t offset, const int64_t size, ObMicroBlockBufferHandle &handle); - virtual int prefetch( + virtual int reserve_kvpair( + const ObMicroBlockDesc µ_block_desc, + const ObTableReadInfo &read_info, + ObKVCacheInstHandle &inst_handle, + ObKVCacheHandle &cache_handle, + ObKVCachePair *&kvpair, + int64_t &kvpair_size); + int prefetch( const uint64_t tenant_id, const MacroBlockId ¯o_id, const ObMicroIndexInfo& idx_row, const common::ObQueryFlag &flag, - const ObTableReadInfo &full_read_info, + const ObTableReadInfo &read_info, const ObTabletHandle &tablet_handle, ObMacroBlockHandle ¯o_handle); virtual int load_block( @@ -165,87 +267,33 @@ public: const ObTableReadInfo *read_info, ObMacroBlockReader *macro_reader, ObMicroBlockData &block_data, - ObIAllocator *allocator); + ObIAllocator *allocator) = 0; virtual void destroy() = 0; virtual int get_cache(BaseBlockCache *&cache) = 0; virtual int get_allocator(common::ObIAllocator *&allocator) = 0; + virtual int64_t calc_value_size(const int64_t data_length, const ObRowStoreType &type, const int64_t row_count, + const int64_t request_count, int64_t &extra_size, bool &need_decoder) = 0; + virtual int write_extra_buf(const ObTableReadInfo &read_info, const char *block_buf, const int64_t block_size, + const int64_t extra_size, char *extra_buf, ObMicroBlockData µ_data) = 0; + virtual ObMicroBlockData::Type get_type() = 0; virtual int add_put_size(const int64_t put_size) override; -public: - // New Block IO Callbacks for version 4.0 - class ObIMicroBlockIOCallback : public common::ObIOCallback - { - public: - ObIMicroBlockIOCallback(); - virtual ~ObIMicroBlockIOCallback(); - virtual int alloc_io_buf(char *&io_buf, int64_t &io_buf_size, int64_t &aligned_offset); - VIRTUAL_TO_STRING_KV(KP_(io_buffer)); - protected: - friend class ObIMicroBlockCache; - int process_block( - ObMacroBlockReader *reader, - char *buffer, - const int64_t offset, - const int64_t size, - const ObMicroBlockCacheValue *µ_block, - common::ObKVCacheHandle &handle); - int assign(const ObIMicroBlockIOCallback &other); - static int cache_decoders( - const ObColDescIArray &full_col_descs, - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf); - static int transform_index_block( - const ObTableReadInfo &index_read_info, - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf, - ObIndexBlockDataTransformer &transformer); - private: - int read_block_and_copy( - ObMacroBlockReader &reader, - char *buffer, - const int64_t size, - ObMicroBlockData &block_data, - const ObMicroBlockCacheValue *µ_block, - common::ObKVCacheHandle &handle); - virtual int write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) = 0; - virtual int64_t calc_value_size(int64_t data_length, int64_t row_count) = 0; - virtual ObMicroBlockData::Type get_type() = 0; - - static const int64_t ALLOC_BUF_RETRY_INTERVAL = 100 * 1000; - static const int64_t ALLOC_BUF_RETRY_TIMES = 3; - protected: - BaseBlockCache *cache_; - ObIPutSizeStat *put_size_stat_; - common::ObIAllocator *allocator_; - char *io_buffer_; - char *data_buffer_; - uint64_t tenant_id_; - MacroBlockId block_id_; - int64_t offset_; - int64_t size_; - ObRowStoreType row_store_type_; - ObMicroBlockDesMeta block_des_meta_; - bool use_block_cache_; - }; protected: - virtual int prefetch( + int prefetch( const uint64_t tenant_id, const MacroBlockId ¯o_id, const ObMicroIndexInfo& idx_row, const common::ObQueryFlag &flag, ObMacroBlockHandle ¯o_handle, ObIMicroBlockIOCallback &callback); - virtual int prefetch( + int prefetch( const uint64_t tenant_id, const MacroBlockId ¯o_id, const ObMultiBlockIOParam &io_param, const ObQueryFlag &flag, ObMacroBlockHandle ¯o_handle, ObIMicroBlockIOCallback &callback); + int alloc_base_kvpair(const ObMicroBlockDesc µ_block_desc, const int64_t key_size, const int64_t value_size, + ObKVCacheInstHandle &inst_handle, ObKVCacheHandle &cache_handle, ObKVCachePair *&kvpair); }; class ObDataMicroBlockCache @@ -257,14 +305,7 @@ public: virtual ~ObDataMicroBlockCache() {} int init(const char *cache_name, const int64_t priority = 1); virtual void destroy() override; - int prefetch( - const uint64_t tenant_id, - const MacroBlockId ¯o_id, - const ObMicroIndexInfo& idx_row, - const common::ObQueryFlag &flag, - const ObTableReadInfo &full_read_info, - const ObTabletHandle &tablet_handle, - ObMacroBlockHandle ¯o_handle) override; + using ObIMicroBlockCache::prefetch; int prefetch( const uint64_t tenant_id, const MacroBlockId ¯o_id, @@ -281,90 +322,22 @@ public: ObIAllocator *allocator) override; virtual int get_cache(BaseBlockCache *&cache) override; virtual int get_allocator(common::ObIAllocator *&allocator) override; -public: - class ObDataMicroBlockIOCallback : public ObIMicroBlockIOCallback - { - public: - ObDataMicroBlockIOCallback(); - virtual ~ObDataMicroBlockIOCallback(); - virtual int64_t size() const; - virtual int inner_process(const bool is_success) override; - virtual int inner_deep_copy( - char *buf, const int64_t buf_len, - ObIOCallback *&callback) const override; - virtual const char *get_data() override; - INHERIT_TO_STRING_KV("ObIMicroBlockIOCallback", ObIMicroBlockIOCallback, - KPC(full_cols_), KP_(micro_block), K_(handle), K_(need_write_extra_buf)); - private: - virtual int64_t calc_value_size(int64_t data_length, int64_t row_count) override; - virtual int write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) override; - virtual ObMicroBlockData::Type get_type() override; - private: - friend class ObDataMicroBlockCache; - // Notice: lifetime shoule be longer than AIO or deep copy here - const ObColDescIArray *full_cols_; - const ObMicroBlockCacheValue *micro_block_; - ObTabletHandle tablet_handle_; - common::ObKVCacheHandle handle_; - bool need_write_extra_buf_; - }; - class ObMultiDataBlockIOCallback : public ObIMicroBlockIOCallback - { - public: - ObMultiDataBlockIOCallback(); - virtual ~ObMultiDataBlockIOCallback(); - virtual int64_t size() const; - virtual int inner_process(const bool is_success) override; - virtual int inner_deep_copy( - char *buf, const int64_t buf_len, - ObIOCallback *&callback) const override; - virtual const char *get_data() override; - INHERIT_TO_STRING_KV("ObIMicroBlockIOCallback", ObIMicroBlockIOCallback, - KPC(full_cols_), K_(io_ctx)); - private: - virtual int write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) override; - virtual int64_t calc_value_size(int64_t data_length, int64_t row_count) override; - virtual ObMicroBlockData::Type get_type() override; - private: - friend class ObDataMicroBlockCache; - int set_io_ctx(const ObMultiBlockIOParam &io_param); - void reset_io_ctx() { io_ctx_.reset(); } - int deep_copy_ctx(const ObMultiBlockIOCtx &io_ctx); - int alloc_result(); - void free_result(); - // Notice: lifetime shoule be longer than AIO or deep copy here - const ObColDescIArray *full_cols_; - ObMultiBlockIOCtx io_ctx_; - ObMultiBlockIOResult io_result_; - }; + virtual int64_t calc_value_size(const int64_t data_length, const ObRowStoreType &type, const int64_t row_count, + const int64_t request_count, int64_t &extra_size, bool &need_decoder); + virtual int write_extra_buf(const ObTableReadInfo &read_info, const char *block_buf, const int64_t block_size, + const int64_t extra_size, char *extra_buf, ObMicroBlockData µ_data); + virtual ObMicroBlockData::Type get_type() override; private: common::ObConcurrentFIFOAllocator allocator_; DISALLOW_COPY_AND_ASSIGN(ObDataMicroBlockCache); }; -class ObIndexMicroBlockCache - : public common::ObKVCache, - public ObIMicroBlockCache +class ObIndexMicroBlockCache : public ObDataMicroBlockCache { public: - ObIndexMicroBlockCache() {} - virtual ~ObIndexMicroBlockCache() {} + ObIndexMicroBlockCache(); + virtual ~ObIndexMicroBlockCache(); int init(const char *cache_name, const int64_t priority = 10); - virtual void destroy() override; - int prefetch( - const uint64_t tenant_id, - const MacroBlockId ¯o_id, - const ObMicroIndexInfo& idx_row, - const common::ObQueryFlag &flag, - const ObTableReadInfo &index_read_info, - const ObTabletHandle &tablet_handle, - ObMacroBlockHandle ¯o_handle) override; int load_block( const ObMicroBlockId µ_block_id, const ObMicroBlockDesMeta &des_meta, @@ -372,42 +345,14 @@ public: ObMacroBlockReader *macro_reader, ObMicroBlockData &block_data, ObIAllocator *allocator) override; - virtual int get_cache(BaseBlockCache *&cache) override; - virtual int get_allocator(common::ObIAllocator *&allocator) override; -public: - class ObIndexMicroBlockIOCallback : public ObIMicroBlockIOCallback - { - public: - ObIndexMicroBlockIOCallback(); - virtual ~ObIndexMicroBlockIOCallback(); - virtual int64_t size() const; - virtual int inner_process(const bool is_success) override; - virtual int inner_deep_copy( - char *buf, const int64_t buf_len, - ObIOCallback *&callback) const override; - virtual const char *get_data() override; - INHERIT_TO_STRING_KV("ObIMicroBlockIOCallback", ObIMicroBlockIOCallback, - KPC(index_read_info_), KP_(micro_block), K_(handle)); - private: - virtual int64_t calc_value_size(int64_t data_length, int64_t row_count) override; - virtual int write_extra_buf_on_demand( - const int64_t data_length, - ObMicroBlockData µ_data, - char *block_buf) override; - virtual ObMicroBlockData::Type get_type() override; - private: - friend class ObIndexMicroBlockCache; - // Notice: lifetime shoule be longer than AIO or deep copy here - const ObTableReadInfo *index_read_info_; - const ObMicroBlockCacheValue *micro_block_; - ObTabletHandle tablet_handle_; - common::ObKVCacheHandle handle_; - }; -private: - common::ObConcurrentFIFOAllocator allocator_; - DISALLOW_COPY_AND_ASSIGN(ObIndexMicroBlockCache); + virtual int64_t calc_value_size(const int64_t data_length, const ObRowStoreType &type, const int64_t row_count, + const int64_t request_count, int64_t &extra_size, bool &need_decoder); + virtual int write_extra_buf(const ObTableReadInfo &read_info, const char *block_buf, const int64_t block_size, + const int64_t extra_size, char *extra_buf, ObMicroBlockData µ_data); + virtual ObMicroBlockData::Type get_type() override; }; + }//end namespace blocksstable }//end namespace oceanbase #endif diff --git a/src/storage/blocksstable/ob_micro_block_hash_index.cpp b/src/storage/blocksstable/ob_micro_block_hash_index.cpp new file mode 100644 index 000000000..109f4f940 --- /dev/null +++ b/src/storage/blocksstable/ob_micro_block_hash_index.cpp @@ -0,0 +1,187 @@ +// Copyright (c) 2022 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "ob_macro_block.h" +#include "ob_micro_block_hash_index.h" +#include "ob_imicro_block_reader.h" +#include "share/ob_define.h" +#include "share/schema/ob_table_schema.h" + +namespace oceanbase +{ +namespace blocksstable +{ + + /** + * -------------------------------------------------------------------ObMicroBlockHashIndexBuilder---------------------------------------------------------- + */ +int ObMicroBlockHashIndexBuilder::add(const ObDatumRow &row) +{ + int ret = OB_SUCCESS; + const ObStorageDatumUtils &datum_utils = data_store_desc_->datum_utils_; + const int64_t schema_rowkey_col_cnt = data_store_desc_->schema_rowkey_col_cnt_; + if (OB_UNLIKELY(!row.is_valid() || row.get_column_count() < schema_rowkey_col_cnt)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid input argument", K(ret), + K(row), K(schema_rowkey_col_cnt)); + } else if (can_be_added_to_hash_index(row)) { + // Caculate hash value by schema_rowkey_col_cnt. + uint64_t hash_value = 0; + ObDatumRowkey tmp_rowkey; + if (OB_FAIL(tmp_rowkey.assign(row.storage_datums_, schema_rowkey_col_cnt))) { + STORAGE_LOG(WARN, "Failed to assign rowkey", K(ret), K(row), K(schema_rowkey_col_cnt)); + } else if (OB_FAIL(tmp_rowkey.murmurhash(0, datum_utils, hash_value))) { + STORAGE_LOG(WARN, "Failed to calc rowkey hash", K(ret), K(tmp_rowkey), K(datum_utils)); + } else if (OB_FAIL(internal_add(hash_value, row_index_))) { + if (ret != OB_NOT_SUPPORTED) { + STORAGE_LOG(WARN, "Failed to add row index to hash_index", K(ret), K(row_index_)); + } + } + } + if (OB_SUCC(ret)) { + ++row_index_; + last_key_with_L_flag_ = row.mvcc_row_flag_.is_last_multi_version_row(); + } + return ret; +} + +int ObMicroBlockHashIndexBuilder::build_block(ObSelfBufferWriter &buffer) +{ + int ret = OB_SUCCESS; + // ObMicroBlockHashIndexBuilder must be valid when call build_block. + if (OB_UNLIKELY(count_ <= ObMicroBlockHashIndex::MIN_ROWS_BUILD_HASH_INDEX)) { + ret = OB_NOT_SUPPORTED; + } else { + uint16_t num_buckets = caculate_bucket_number(count_); + if (OB_UNLIKELY(num_buckets > ObMicroBlockHashIndex::MAX_BUCKET_NUMBER)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Too much buckets ", K(ret), K(num_buckets)); + } else { + const uint8_t no_entry = ObMicroBlockHashIndex::NO_ENTRY; + MEMSET(buckets_, no_entry, num_buckets); + uint32_t collision_count = 0; + // Write the row_index array + for (int i = 0; i < count_; ++i) { + const uint64_t hash_value = hash_values_[i]; + const uint8_t row_index = row_indexes_[i]; + const uint16_t buck_idx = static_cast(hash_value % num_buckets); + if (buckets_[buck_idx] == ObMicroBlockHashIndex::NO_ENTRY) { + buckets_[buck_idx] = row_index; + } else if (buckets_[buck_idx] != ObMicroBlockHashIndex::COLLISION) { + // Same bucket cannot store two different offset, mark collision. + buckets_[buck_idx] = ObMicroBlockHashIndex::COLLISION; + collision_count += 2; + } else { + ++collision_count; + } + } + + if ((collision_count * ObMicroBlockHashIndex::MAX_COLLISION_RATIO) <= count_) { + const uint8_t reserved_byte = ObMicroBlockHashIndex::RESERVED_BYTE; + if (OB_FAIL(buffer.write(reserved_byte))) { + STORAGE_LOG(WARN, "Data buffer fail to write reserved byte", K(ret), K(num_buckets), K(count_), K(reserved_byte)); + } else if (OB_FAIL(buffer.write(num_buckets))) { + STORAGE_LOG(WARN, "Data buffer fail to write hash index buckets number", K(ret), K(num_buckets), K(count_)); + } else if (OB_FAIL(buffer.write(reinterpret_cast(buckets_), num_buckets))) { + STORAGE_LOG(WARN, "Data buffer fail to write hash index buckets", K(ret), K(num_buckets), K(count_)); + } + } else { + ret = OB_NOT_SUPPORTED; + } + } + } + STORAGE_LOG(DEBUG, "Build hash index block", K(count_), K(ret)); + return ret; +} + +int ObMicroBlockHashIndexBuilder::need_build_hash_index( + const ObMergeSchema &merge_schema, + bool &need_build) +{ + int ret = OB_SUCCESS; + need_build = false; + common::ObSEArray rowkey_col_desc_array; + if (OB_FAIL(merge_schema.get_rowkey_column_ids(rowkey_col_desc_array))) { + STORAGE_LOG(WARN, "Failed to get rowkey column ids", K(ret)); + } else { + int64_t int_column_count = 0; + for (int64_t i = 0; i < rowkey_col_desc_array.count(); ++i) { + if (rowkey_col_desc_array[i].col_type_.is_integer_type()) { + int_column_count++; + } + } + if (int_column_count != rowkey_col_desc_array.count() + || int_column_count >= ObMicroBlockHashIndex::MIN_INT_COLUMNS_NEEDED) { + need_build = true; + } + } + return ret; +} + +OB_INLINE bool ObMicroBlockHashIndexBuilder::can_be_added_to_hash_index(const ObDatumRow &row) +{ + return (data_store_desc_->is_major_merge() + || last_key_with_L_flag_ + || is_empty()) + && !row.is_ghost_row(); +} + +int ObMicroBlockHashIndexBuilder::internal_add(const uint64_t hash_value, const uint32_t row_index) +{ + int ret = OB_SUCCESS; + if (row_index >= ObMicroBlockHashIndex::MAX_OFFSET_SUPPORTED) { + ret = OB_NOT_SUPPORTED; + } else if (OB_UNLIKELY(!is_empty() && row_index <= row_indexes_[count_ - 1])) { + ret = OB_ERR_UNEXPECTED; + const uint32_t front_row_index = row_indexes_[count_ - 1]; + STORAGE_LOG(WARN, "Unexpected row_index ", K(ret), K(row_index), K(front_row_index), K(count_)); + } else { + hash_values_[count_] = static_cast(hash_value); + row_indexes_[count_] = static_cast(row_index); + count_++; + } + return ret; +} + +/** + * -------------------------------------------------------------------ObMicroBlockHashIndex----------------------------------------------------------------- + */ +int ObMicroBlockHashIndex::init(const ObMicroBlockData µ_block_data) +{ + int ret = OB_SUCCESS; + // ObMicroBlockHashIndex can be inited repeatedly. + const ObMicroBlockHeader *micro_block_header = micro_block_data.get_micro_header(); + if (OB_UNLIKELY(nullptr == micro_block_header)) { + ret = OB_INVALID_DATA; + STORAGE_LOG(WARN, "Invalid micro block header", K(ret), K(micro_block_data)); + } else { + const uint32_t hash_index_offset_from_end = micro_block_header->hash_index_offset_from_end_; + const char* start_data = micro_block_data.get_buf() + micro_block_data.get_buf_size() + - hash_index_offset_from_end; + const uint8 reserved_byte = reinterpret_cast(start_data)[0]; + bucket_table_ = reinterpret_cast(start_data + get_fixed_header_size()); + num_buckets_ = reinterpret_cast(start_data + 1)[0]; + STORAGE_LOG(DEBUG, "ObMicroBlockHashIndex init", K(num_buckets_), K(reserved_byte)); + bool is_valid = num_buckets_ != 0 && num_buckets_ <= MAX_BUCKET_NUMBER + && reserved_byte == RESERVED_BYTE + && get_serialize_size(num_buckets_) == hash_index_offset_from_end; + if (OB_UNLIKELY(!is_valid)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected hash index", K(ret), K(num_buckets_), K(reserved_byte), + K(hash_index_offset_from_end)); + } else { + is_inited_ = true; + } + } + return OB_SUCCESS; +} + +}//end namespace blocksstable +}//end namespace oceanbase diff --git a/src/storage/blocksstable/ob_micro_block_hash_index.h b/src/storage/blocksstable/ob_micro_block_hash_index.h new file mode 100644 index 000000000..2b51f791f --- /dev/null +++ b/src/storage/blocksstable/ob_micro_block_hash_index.h @@ -0,0 +1,179 @@ +// Copyright (c) 2022 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_STORAGE_BLOCKSSTABLE_OB_MICRO_BLOCK_HASH_INDEX_H_ +#define OCEANBASE_STORAGE_BLOCKSSTABLE_OB_MICRO_BLOCK_HASH_INDEX_H_ + +#include "lib/oblog/ob_log_module.h" +#include "ob_data_buffer.h" + +namespace oceanbase +{ +namespace share +{ +namespace schema +{ +class ObMergeSchema; +} +} +namespace blocksstable +{ +struct ObDataStoreDesc; +struct ObMicroBlockData; +class ObMicroBlockHashIndex +{ +public: + static const uint8_t NO_ENTRY = 255; + static const uint8_t COLLISION = 254; + static const uint8_t MAX_OFFSET_SUPPORTED = 253; + static const uint8_t RESERVED_BYTE = 0; + static constexpr double DEFAULT_UTIL_RATIO = 0.75; + static constexpr double BUCKET_PER_KEY = 1 / DEFAULT_UTIL_RATIO; + static const uint32_t MAX_BUCKET_NUMBER = static_cast(BUCKET_PER_KEY * MAX_OFFSET_SUPPORTED) | 1; + static constexpr double MAX_COLLISION_RATIO = 1.5; + static const uint32_t MIN_ROWS_BUILD_HASH_INDEX = 16; + static const uint32_t MIN_INT_COLUMNS_NEEDED = 3; +public: + ObMicroBlockHashIndex() + : is_inited_(false), + num_buckets_(0), + bucket_table_(nullptr) + { + } + OB_INLINE uint8_t find(const uint64_t hash_value) const + { + const uint16_t idx = static_cast( + static_cast(hash_value) % num_buckets_); + return bucket_table_[idx]; + } + OB_INLINE bool is_inited() + { + return is_inited_; + } + OB_INLINE void reset() + { + is_inited_ = false; + } + OB_INLINE void reuse() + { + reset(); + } + OB_INLINE static uint32_t get_serialize_size(uint32_t num_bucket) + { + return sizeof(uint8_t) * num_bucket + get_fixed_header_size(); + } + OB_INLINE static uint32_t get_fixed_header_size() + { + // reserved byte(1 byte) + num_buckets(2 bytes). + return sizeof(uint16_t) + sizeof(uint8_t); + } + OB_INLINE static uint32_t hash_index_size(const char *data) + { + uint32_t num_bucket = reinterpret_cast(data + 1)[0]; + return get_serialize_size(num_bucket); + } + int init(const ObMicroBlockData µ_block_data); +public: + bool is_inited_; + uint16_t num_buckets_; + const uint8_t *bucket_table_; +}; + +class ObMicroBlockHashIndexBuilder +{ +public: + ObMicroBlockHashIndexBuilder() + : count_(0), + row_index_(0), + last_key_with_L_flag_(false), + data_store_desc_(nullptr), + is_inited_(false) + { + } + ~ObMicroBlockHashIndexBuilder() {} + int init(const ObDataStoreDesc *data_store_desc) + { + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + STORAGE_LOG(WARN, "Micro_hash_index_builder is inited twice", K(ret)); + } else { + row_index_ = 0; + count_ = 0; + last_key_with_L_flag_ = false; + data_store_desc_ = data_store_desc; + is_inited_ = true; + } + return ret; + } + OB_INLINE bool is_valid() const + { + return is_inited_; + } + OB_INLINE void reset() + { + row_index_ = 0; + count_ = 0; + last_key_with_L_flag_ = false; + is_inited_ = false; + } + OB_INLINE bool is_empty() + { + return 0 == count_; + } + OB_INLINE uint16_t caculate_bucket_number(uint32_t count) const + { + uint16_t estimated_num_buckets = + static_cast(count * ObMicroBlockHashIndex::BUCKET_PER_KEY); + estimated_num_buckets |= 1; + return estimated_num_buckets; + } + OB_INLINE int64_t estimate_size(bool plus_one = false) const + { + int64_t size = 0; + if (is_valid()) { + const uint32_t count = plus_one ? (count_ + 1) : count_; + if (count > ObMicroBlockHashIndex::MIN_ROWS_BUILD_HASH_INDEX) { + uint16_t estimated_num_buckets = caculate_bucket_number(count); + size = ObMicroBlockHashIndex::get_serialize_size(estimated_num_buckets); + } + } + return size; + } + OB_INLINE void reuse() + { + row_index_ = 0; + count_ = 0; + last_key_with_L_flag_ = false; + is_inited_ = true; + } + int add(const ObDatumRow &row); + int build_block(ObSelfBufferWriter &buffer); + static int need_build_hash_index( + const ObMergeSchema &merge_schema, + bool &need_build); +private: + OB_INLINE bool can_be_added_to_hash_index(const ObDatumRow &row); + int internal_add(const uint64_t hash_value, const uint32_t row_index); +private: + uint32_t count_; + uint32_t row_index_; + bool last_key_with_L_flag_; + const ObDataStoreDesc *data_store_desc_; + bool is_inited_; + uint8_t buckets_[ObMicroBlockHashIndex::MAX_BUCKET_NUMBER]; + uint8_t row_indexes_[ObMicroBlockHashIndex::MAX_OFFSET_SUPPORTED]; + uint32_t hash_values_[ObMicroBlockHashIndex::MAX_OFFSET_SUPPORTED]; +}; + +} // end namespace blocksstable +} // end namespace oceanbase + +#endif // OCEANBASE_STORAGE_BLOCKSSTABLE_OB_MICRO_BLOCK_HASH_INDEX_H_ diff --git a/src/storage/blocksstable/ob_micro_block_header.cpp b/src/storage/blocksstable/ob_micro_block_header.cpp index 557ef50c0..d064e896e 100644 --- a/src/storage/blocksstable/ob_micro_block_header.cpp +++ b/src/storage/blocksstable/ob_micro_block_header.cpp @@ -49,7 +49,7 @@ bool ObMicroBlockHeader::is_valid() const { bool valid_data = header_size_ == get_serialize_size(column_count_, has_column_checksum_) - && version_ >= MICRO_BLOCK_HEADER_VERSION + && version_ >= MICRO_BLOCK_HEADER_VERSION_1 && MICRO_BLOCK_HEADER_MAGIC == magic_ && column_count_ >= rowkey_column_count_ && rowkey_column_count_ > 0 @@ -296,7 +296,10 @@ bool ObMicroBlockHeader::has_out_row_column() const : encoding_has_out_row_column_; } - +bool ObMicroBlockHeader::is_contain_hash_index() const +{ + return version_ >= MICRO_BLOCK_HEADER_VERSION_2 && contains_hash_index_ == 1; +} }//end namespace blocksstable }//end namespace oceanbase \ No newline at end of file diff --git a/src/storage/blocksstable/ob_micro_block_header.h b/src/storage/blocksstable/ob_micro_block_header.h index ad1641571..2f549950f 100644 --- a/src/storage/blocksstable/ob_micro_block_header.h +++ b/src/storage/blocksstable/ob_micro_block_header.h @@ -30,7 +30,9 @@ public: uint16_t rowkey_column_count_; struct { uint16_t has_column_checksum_ : 1; - uint16_t reserved16_ : 15; + uint8_t contains_hash_index_ : 1; + uint16_t hash_index_offset_from_end_ : 10; + uint16_t reserved16_ : 4; }; uint32_t row_count_; uint8_t row_store_type_; @@ -96,6 +98,7 @@ public: bool contain_uncommitted_rows() const { return contain_uncommitted_rows_; } bool has_out_row_column() const; bool is_last_row_last_flag() const { return is_last_row_last_flag_; } + bool is_contain_hash_index() const; }__attribute__((packed)); }//end namespace blocksstable }//end namespace oceanbase diff --git a/src/storage/blocksstable/ob_micro_block_reader.cpp b/src/storage/blocksstable/ob_micro_block_reader.cpp index 31c135759..e4e4c69c7 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.cpp +++ b/src/storage/blocksstable/ob_micro_block_reader.cpp @@ -15,7 +15,7 @@ #include "storage/tx/ob_trans_ctx_mgr.h" #include "storage/tx_table/ob_tx_table.h" #include "share/ob_force_print_log.h" -#include "storage/access/ob_block_row_store.h" +#include "storage/access/ob_aggregated_store.h" namespace oceanbase { @@ -171,7 +171,8 @@ int ObIMicroBlockFlatReader::init(const ObMicroBlockData &block_data) * */ int ObMicroBlockGetReader::inner_init( const ObMicroBlockData &block_data, - const ObTableReadInfo &read_info) + const ObTableReadInfo &read_info, + const ObDatumRowkey &rowkey) { int ret = OB_SUCCESS; if (OB_FAIL(ObIMicroBlockFlatReader::init(block_data))) { @@ -179,7 +180,11 @@ int ObMicroBlockGetReader::inner_init( } else { row_count_ = header_->row_count_; read_info_ = &read_info; - is_inited_ = true; + if (OB_FAIL(ObIMicroBlockGetReader::init_hash_index(block_data, hash_index_, header_))) { + LOG_WARN("failed to init micro block hash index", K(ret), K(rowkey), K(block_data), K(read_info)); + } else { + is_inited_ = true; + } } return ret; } @@ -195,7 +200,7 @@ int ObMicroBlockGetReader::get_row( if (OB_UNLIKELY(!read_info.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid columns info ", K(ret), K(read_info)); - } else if (OB_FAIL(inner_init(block_data, read_info))) { + } else if (OB_FAIL(inner_init(block_data, read_info, rowkey))) { LOG_WARN("fail to inner init ", K(ret), K(block_data)); } else if (OB_FAIL(locate_rowkey(rowkey, row_idx))){ if (OB_BEYOND_THE_RANGE != ret) { @@ -221,7 +226,7 @@ int ObMicroBlockGetReader::exist_row( bool &found) { int ret = OB_SUCCESS; - if (OB_FAIL(inner_init(block_data, read_info))) { + if (OB_FAIL(inner_init(block_data, read_info, rowkey))) { LOG_WARN("failed to inner init", K(ret), K(block_data)); } else { int64_t row_idx; @@ -246,12 +251,15 @@ int ObMicroBlockGetReader::exist_row( int ObMicroBlockGetReader::locate_rowkey(const ObDatumRowkey &rowkey, int64_t &row_idx) { int ret = OB_SUCCESS; - bool is_equal = false; + bool need_binary_search = false; + bool found = false; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else { - bool equal = false; + } else if (OB_FAIL(locate_rowkey_fast_path(rowkey, row_idx, need_binary_search, found))) { + LOG_WARN("faile to locate rowkey by hash index", K(ret)); + } else if (need_binary_search) { + bool is_equal = false; if (OB_FAIL(ObIMicroBlockFlatReader::find_bound_(rowkey, true/*lower_bound*/, 0, row_count_, *read_info_, row_idx, is_equal))) { LOG_WARN("fail to lower_bound rowkey", K(ret)); @@ -266,6 +274,54 @@ int ObMicroBlockGetReader::locate_rowkey(const ObDatumRowkey &rowkey, int64_t &r ret = OB_BEYOND_THE_RANGE; } } + } else if (!found) { + ret = OB_BEYOND_THE_RANGE; + } + return ret; +} + +int ObMicroBlockGetReader::locate_rowkey_fast_path(const ObDatumRowkey &rowkey, + int64_t &row_idx, + bool &need_binary_search, + bool &found) +{ + int ret = OB_SUCCESS; + need_binary_search = false; + if (hash_index_.is_inited()) { + uint64_t hash_value = 0; + const blocksstable::ObStorageDatumUtils &datum_utils = read_info_->get_datum_utils(); + if (OB_FAIL(rowkey.murmurhash(0, datum_utils, hash_value))) { + LOG_WARN("Failed to calc rowkey hash", K(ret), K(rowkey), K(datum_utils)); + } else { + const uint8_t tmp_row_idx = hash_index_.find(hash_value); + if (tmp_row_idx == ObMicroBlockHashIndex::NO_ENTRY) { + row_idx = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; + found = false; + } else if (tmp_row_idx == ObMicroBlockHashIndex::COLLISION) { + need_binary_search = true; + } else { + int32_t compare_result = 0; + if (OB_UNLIKELY(tmp_row_idx >= row_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected row_idx", K(ret), K(tmp_row_idx), K(row_count_), K(rowkey), KPC_(read_info)); + } else if (OB_FAIL(flat_row_reader_.compare_meta_rowkey( + rowkey, + *read_info_, + data_begin_ + index_data_[tmp_row_idx], + index_data_[tmp_row_idx + 1] - index_data_[tmp_row_idx], + compare_result))) { + LOG_WARN("fail to compare rowkey", K(ret), K(rowkey), KPC_(read_info)); + } else if (0 != compare_result) { + row_idx = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; + found = false; + } else { + row_idx = tmp_row_idx; + found = true; + } + } + } + } else { + need_binary_search = true; } return ret; } @@ -574,7 +630,10 @@ int ObMicroBlockReader::get_rows( } else { for (int64_t idx = 0; OB_SUCC(ret) && idx < row_cap; ++idx) { row_idx = row_ids[idx]; - if (OB_FAIL(flat_row_reader_.read_row( + if (OB_UNLIKELY(row_idx < 0 || row_idx >= header_->row_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Uexpected row idx", K(ret), K(row_idx), KPC(header_)); + } else if (OB_FAIL(flat_row_reader_.read_row( data_begin_ + index_data_[row_idx], index_data_[row_idx + 1] - index_data_[row_idx], read_info_, @@ -644,7 +703,10 @@ int ObMicroBlockReader::get_row_count( ObStorageDatum datum; for (int64_t i = 0; i < row_cap; ++i) { row_idx = row_ids[i]; - if (OB_FAIL(flat_row_reader_.read_column( + if (OB_UNLIKELY(row_idx < 0 || row_idx >= header_->row_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Uexpected row idx", K(ret), K(row_idx), KPC(header_)); + } else if (OB_FAIL(flat_row_reader_.read_column( data_begin_ + index_data_[row_idx], index_data_[row_idx + 1] - index_data_[row_idx], col_idx, @@ -658,5 +720,94 @@ int ObMicroBlockReader::get_row_count( return ret; } +int ObMicroBlockReader::get_min_or_max( + int32_t col, + const share::schema::ObColumnParam *col_param, + const int64_t *row_ids, + const int64_t row_cap, + ObMicroBlockAggInfo &agg_info) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == header_ || + nullptr == read_info_ || + row_cap > header_->row_count_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), KPC(header_), KPC_(read_info), K(row_cap), K(col)); + } else { + int64_t row_idx = common::OB_INVALID_INDEX; + const common::ObIArray &cols_index = read_info_->get_columns_index(); + int64_t col_idx = cols_index.at(col); + ObStorageDatum datum; + for (int64_t i = 0; OB_SUCC(ret) && i < row_cap; ++i) { + row_idx = row_ids[i]; + if (OB_UNLIKELY(row_idx < 0 || row_idx >= header_->row_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Uexpected row idx", K(ret), K(row_idx), KPC(header_)); + } else if (OB_FAIL(flat_row_reader_.read_column( + data_begin_ + index_data_[row_idx], + index_data_[row_idx + 1] - index_data_[row_idx], + col_idx, + datum))) { + LOG_WARN("fail to read column", K(ret), K(i), K(col_idx), K(row_idx)); + } else if (datum.is_nop()) { + if (OB_UNLIKELY(nullptr == col_param || col_param->get_orig_default_value().is_nop_value())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected datum, can not process in batch", K(ret), K(col), KPC(col_param)); + } else if (OB_FAIL(datum.from_obj_enhance(col_param->get_orig_default_value()))) { + STORAGE_LOG(WARN, "Failed to transfer obj to datum", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + agg_info.update_min_or_max(datum); + LOG_DEBUG("update min/max", K(i), K(row_idx), K(datum), K(agg_info)); + } + } + } + return ret; +} + +int ObMicroBlockReader::get_aggregate_result( + const int64_t *row_ids, + const int64_t row_cap, + ObDatumRow &row_buf, + common::ObIArray &agg_cells) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == header_ || + nullptr == read_info_ || + row_cap > header_->row_count_ || + !row_buf.is_valid()) || + agg_cells.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), KPC(header_), KPC(read_info_), K(row_cap), K(row_buf)); + } else if (OB_FAIL(row_buf.reserve(read_info_->get_request_count()))) { + LOG_WARN("Failed to reserve row buf", K(ret), K(row_buf), KPC(read_info_)); + } else { + int64_t row_idx = common::OB_INVALID_INDEX; + for (int64_t idx = 0; OB_SUCC(ret) && idx < row_cap; ++idx) { + row_idx = row_ids[idx]; + if (OB_UNLIKELY(row_idx < 0 || row_idx >= header_->row_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Uexpected row idx", K(ret), K(row_idx), KPC(header_)); + } else if (OB_FAIL(flat_row_reader_.read_row( + data_begin_ + index_data_[row_idx], + index_data_[row_idx + 1] - index_data_[row_idx], + read_info_, + row_buf))) { + LOG_WARN("Fail to read row", K(ret), K(idx), K(row_idx), K(row_cap), KPC_(header)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < agg_cells.count(); ++i) { + int32_t col_idx = agg_cells.at(i)->get_col_idx(); + if (OB_FAIL(agg_cells.at(i)->process(row_buf))) { + LOG_WARN("Failed to process agg cell", K(ret), K(row_buf)); + } + } + } + } + } + return ret; +} + } } diff --git a/src/storage/blocksstable/ob_micro_block_reader.h b/src/storage/blocksstable/ob_micro_block_reader.h index 4cecb746c..4ad517daf 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.h +++ b/src/storage/blocksstable/ob_micro_block_reader.h @@ -14,6 +14,7 @@ #define OB_STORAGE_BLOCKSSTABLE_OB_MICRO_BLOCK_READER_H_ #include "ob_imicro_block_reader.h" +#include "ob_micro_block_hash_index.h" #include "ob_row_reader.h" #include "sql/engine/basic/ob_pushdown_filter.h" @@ -23,6 +24,7 @@ using namespace common; using namespace storage; namespace storage { struct PushdownFilterInfo; +class ObAggCell; } namespace blocksstable { @@ -104,6 +106,17 @@ public: OB_ASSERT(nullptr != header_); return header_->column_count_; } + int get_min_or_max( + int32_t col, + const share::schema::ObColumnParam *col_param, + const int64_t *row_ids, + const int64_t row_cap, + ObMicroBlockAggInfo &agg_info); + int get_aggregate_result( + const int64_t *row_ids, + const int64_t row_cap, + ObDatumRow &row_buf, + common::ObIArray &agg_cells); OB_INLINE bool single_version_rows() { return nullptr != header_ && header_->single_version_rows_; } protected: @@ -127,7 +140,8 @@ class ObMicroBlockGetReader : public ObIMicroBlockFlatReader, public ObIMicroBlo public: ObMicroBlockGetReader() : ObIMicroBlockFlatReader(), - ObIMicroBlockGetReader() + ObIMicroBlockGetReader(), + hash_index_() {} virtual ~ObMicroBlockGetReader() {} virtual int get_row( @@ -143,8 +157,16 @@ public: bool &found) final; int locate_rowkey(const ObDatumRowkey &rowkey, int64_t &row_idx); protected: - int inner_init(const ObMicroBlockData &block_data, const ObTableReadInfo &read_info); - + int inner_init(const ObMicroBlockData &block_data, + const ObTableReadInfo &read_info, + const ObDatumRowkey &rowkey); +private: + int locate_rowkey_fast_path(const ObDatumRowkey &rowkey, + int64_t &row_idx, + bool &need_binary_search, + bool &found); +private: + ObMicroBlockHashIndex hash_index_; }; } //end namespace blocksstable diff --git a/src/storage/blocksstable/ob_micro_block_row_getter.cpp b/src/storage/blocksstable/ob_micro_block_row_getter.cpp index d6c9cb89a..fad04e705 100644 --- a/src/storage/blocksstable/ob_micro_block_row_getter.cpp +++ b/src/storage/blocksstable/ob_micro_block_row_getter.cpp @@ -140,10 +140,10 @@ int ObMicroBlockRowGetter::init( ret = OB_ERR_UNEXPECTED; LOG_WARN("null read info", K(ret), K(context.use_fuse_row_cache_), K(context.enable_put_row_cache()), K(param.read_with_same_schema()), K(param)); - } else if (OB_FAIL(row_.init(*context.allocator_, read_info_->get_request_count()))) { + } else if (OB_FAIL(row_.init(*context.stmt_allocator_, read_info_->get_request_count()))) { LOG_WARN("Failed to init datum row", K(ret)); } else if (context.enable_put_row_cache() && param.read_with_same_schema() - && OB_FAIL(cache_project_row_.init(*context.allocator_, read_info_->get_request_count()))) { + && OB_FAIL(cache_project_row_.init(*context.stmt_allocator_, read_info_->get_request_count()))) { STORAGE_LOG(WARN, "Failed to init cache project row", K(ret)); } else { LOG_DEBUG("success to init micro block row getter", K(param)); @@ -158,9 +158,11 @@ int ObMicroBlockRowGetter::switch_context( const blocksstable::ObSSTable *sstable) { int ret = OB_SUCCESS; - row_.reset(); - cache_project_row_.reset(); - if (OB_UNLIKELY(!param.is_valid()) + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObMicroBlockRowGetter is not inited", K(ret)); + } else if (OB_UNLIKELY(!param.is_valid()) || OB_UNLIKELY(!context.is_valid()) || OB_ISNULL(sstable)) { ret = OB_INVALID_ARGUMENT; @@ -177,11 +179,13 @@ int ObMicroBlockRowGetter::switch_context( ret = OB_ERR_UNEXPECTED; LOG_WARN("null read info", K(ret), K(context.use_fuse_row_cache_), K(context.enable_put_row_cache()), K(param.read_with_same_schema()), K(param)); - } else if (OB_FAIL(row_.init(*context.allocator_, read_info_->get_request_count()))) { - LOG_WARN("Failed to init datum row", K(ret)); - } else if (context.enable_put_row_cache() && param.read_with_same_schema() - && OB_FAIL(cache_project_row_.init(*context.allocator_, read_info_->get_request_count()))) { - STORAGE_LOG(WARN, "Failed to init cache project row", K(ret)); + } else { + if (context.enable_put_row_cache() && param.read_with_same_schema()) { + if (cache_project_row_.is_valid()) { + } else if (OB_FAIL(cache_project_row_.init(*context.stmt_allocator_, read_info_->get_request_count()))) { + STORAGE_LOG(WARN, "Failed to init cache project row", K(ret)); + } + } } return ret; } @@ -391,7 +395,6 @@ int ObMicroBlockRowGetter::inner_get_row( return ret; } -// TODO: remove this later if no store row needed to return in multi-scan if not found int ObMicroBlockRowGetter::get_not_exist_row(const ObDatumRowkey &rowkey, const ObDatumRow *&row) { int ret = OB_SUCCESS; @@ -403,17 +406,15 @@ int ObMicroBlockRowGetter::get_not_exist_row(const ObDatumRowkey &rowkey, const if (OB_ISNULL(read_info = param_->get_read_info())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null read_info", K(ret), K_(param)); - } else if (OB_FAIL(row_.reserve(read_info->get_request_count()))) { + } else if (OB_FAIL(row_.reserve(rowkey.get_datum_cnt()))) { LOG_WARN("fail to reserve datum row", K(ret), KPC(read_info)); } else { - row_.count_ = read_info->get_request_count(); + row_.count_ = rowkey.get_datum_cnt(); row_.row_flag_.set_flag(ObDmlFlag::DF_NOT_EXIST); + //TODO maybe we do not need to copy the rowkey datum for (int64_t i = 0; i < rowkey.get_datum_cnt(); i++) { row_.storage_datums_[i] = rowkey.datums_[i]; } - for (int64_t i = rowkey.get_datum_cnt(); i < read_info->get_request_count(); i++) { - row_.storage_datums_[i].set_nop(); - } row = &row_; } } diff --git a/src/storage/blocksstable/ob_micro_block_writer.cpp b/src/storage/blocksstable/ob_micro_block_writer.cpp index 04f7d0ad2..79c7317d4 100644 --- a/src/storage/blocksstable/ob_micro_block_writer.cpp +++ b/src/storage/blocksstable/ob_micro_block_writer.cpp @@ -24,7 +24,6 @@ ObMicroBlockWriter::ObMicroBlockWriter() :micro_block_size_limit_(0), column_count_(0), rowkey_column_count_(0), - header_(NULL), data_buffer_(0, "MicrBlocWriter", false), index_buffer_(0, "MicrBlocWriter", false), need_calc_column_chksum_(false), @@ -40,16 +39,11 @@ int ObMicroBlockWriter::init( const int64_t micro_block_size_limit, const int64_t rowkey_column_count, const int64_t column_count/* = 0*/, - const bool need_calc_column_chksum/* = false*/, - const ObRowStoreType row_store_type/* = FLAT_ROW_STORE*/) + const bool need_calc_column_chksum/* = false*/) { int ret = OB_SUCCESS; reset(); - if (OB_UNLIKELY(FLAT_ROW_STORE != row_store_type)) { - ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "invalid argument", K(ret), K(row_store_type)); - } else if (OB_FAIL(check_input_param(micro_block_size_limit, column_count, rowkey_column_count, - row_store_type))) { + if (OB_FAIL(check_input_param(micro_block_size_limit, column_count, rowkey_column_count))) { STORAGE_LOG(WARN, "micro block writer fail to check input param.", K(ret), K(micro_block_size_limit), K(column_count), K(rowkey_column_count)); } else { @@ -90,7 +84,7 @@ int ObMicroBlockWriter::inner_init() int ObMicroBlockWriter::try_to_append_row(const int64_t &row_length) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(get_block_size() + row_length + INDEX_ENTRY_SIZE > block_size_upper_bound_)) { + if (OB_UNLIKELY(get_future_block_size(row_length) > block_size_upper_bound_)) { ret = OB_BUF_NOT_ENOUGH; } return ret; @@ -176,6 +170,30 @@ int ObMicroBlockWriter::build_block(char *&buf, int64_t &size) return ret; } +int ObMicroBlockWriter::append_hash_index(ObMicroBlockHashIndexBuilder& hash_index_builder) +{ + int ret = OB_SUCCESS; + header_->contains_hash_index_ = 0; + if (hash_index_builder.is_valid()) { + if (is_contain_uncommitted_row()) { + ret = OB_NOT_SUPPORTED; + } else if (OB_FAIL(hash_index_builder.build_block(data_buffer_))) { + if (ret != OB_NOT_SUPPORTED) { + STORAGE_LOG(WARN, "data buffer fail to write hash index.", K(ret)); + } + } else { + header_->contains_hash_index_ = 1; + header_->hash_index_offset_from_end_ = hash_index_builder.estimate_size(); + } + } + return ret; +} + +bool ObMicroBlockWriter::has_enough_space_for_hash_index(const int64_t hash_index_size) const { + const int64_t total_size = get_data_size() + get_index_size() + hash_index_size; + return total_size <= micro_block_size_limit_ && total_size <= block_size_upper_bound_; +} + void ObMicroBlockWriter::reset() { ObIMicroBlockWriter::reuse(); @@ -202,19 +220,17 @@ void ObMicroBlockWriter::reuse() int ObMicroBlockWriter::check_input_param( const int64_t micro_block_size_limit, const int64_t column_count, - const int64_t rowkey_column_count, - const ObRowStoreType row_store_type) + const int64_t rowkey_column_count) { int ret = OB_SUCCESS; if (micro_block_size_limit <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid micro block writer input argument.", K(micro_block_size_limit), K(ret)); } else if (rowkey_column_count <= 0 || - (FLAT_ROW_STORE == row_store_type && - (column_count <= 0 || column_count < rowkey_column_count))) { + (column_count <= 0 || column_count < rowkey_column_count)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid micro block writer input argument.", K(ret), K(column_count), - K(row_store_type), K(rowkey_column_count)); + K(rowkey_column_count)); } return ret; } @@ -280,7 +296,7 @@ int ObMicroBlockWriter::reserve_header( bool ObMicroBlockWriter::is_exceed_limit(const int64_t row_length) { - return header_->row_count_ > 0 && get_block_size() + row_length + INDEX_ENTRY_SIZE > micro_block_size_limit_; + return header_->row_count_ > 0 && get_future_block_size(row_length) > micro_block_size_limit_; } }//end namespace blocksstable diff --git a/src/storage/blocksstable/ob_micro_block_writer.h b/src/storage/blocksstable/ob_micro_block_writer.h old mode 100644 new mode 100755 index 13200865e..3d5a673b6 --- a/src/storage/blocksstable/ob_micro_block_writer.h +++ b/src/storage/blocksstable/ob_micro_block_writer.h @@ -13,7 +13,6 @@ #ifndef OCEANBASE_STORAGE_BLOCKSSTABLE_OB_MICRO_BLOCK_WRITER_H_ #define OCEANBASE_STORAGE_BLOCKSSTABLE_OB_MICRO_BLOCK_WRITER_H_ #include "ob_block_sstable_struct.h" -#include "ob_data_buffer.h" #include "ob_row_writer.h" #include "ob_imicro_block_writer.h" @@ -31,12 +30,14 @@ namespace blocksstable // |- row data // |- row index buffer // |- ObRowIndex +// |- row hash index builder(optional) // // build output // |- compressed data // |- ObMicroBlockHeader // |- row data // |- RowIndex +// |- RowHashIndex(optional) class ObMicroBlockWriter : public ObIMicroBlockWriter { static const int64_t INDEX_ENTRY_SIZE = sizeof(int32_t); @@ -50,8 +51,7 @@ public: const int64_t micro_block_size_limit, const int64_t rowkey_column_count, const int64_t column_count = 0, - const bool need_calc_column_chksum = false, - const common::ObRowStoreType row_store_type = common::FLAT_ROW_STORE); + const bool need_calc_column_chksum = false); virtual int append_row(const ObDatumRow &row); virtual int build_block(char *&buf, int64_t &size); @@ -62,16 +62,18 @@ public: virtual int64_t get_data_size() const override; virtual int64_t get_column_count() const override; virtual int64_t get_original_size() const override; + virtual int append_hash_index(ObMicroBlockHashIndexBuilder& hash_index_builder); + virtual bool has_enough_space_for_hash_index(const int64_t hash_index_size) const; void reset(); private: int inner_init(); inline int64_t get_index_size() const; + inline int64_t get_future_block_size(const int64_t row_length) const; int try_to_append_row(const int64_t &row_length); int check_input_param( const int64_t macro_block_size, const int64_t column_count, - const int64_t rowkey_column_count, - const ObRowStoreType row_store_type); + const int64_t rowkey_column_count); int finish_row(const int64_t length); int reserve_header( const int64_t column_count, @@ -85,7 +87,6 @@ private: int64_t column_count_; ObRowWriter row_writer_; int64_t rowkey_column_count_; - ObMicroBlockHeader *header_; ObSelfBufferWriter data_buffer_; ObSelfBufferWriter index_buffer_; bool need_calc_column_chksum_; @@ -120,6 +121,10 @@ inline int64_t ObMicroBlockWriter::get_index_size() const } return index_size; } +inline int64_t ObMicroBlockWriter::get_future_block_size(const int64_t row_length) const { + return get_data_size() + row_length + + get_index_size() + INDEX_ENTRY_SIZE; +} inline int64_t ObMicroBlockWriter::get_data_base_offset() const { @@ -143,4 +148,3 @@ inline int64_t ObMicroBlockWriter::get_original_size() const }//end namespace blocksstable }//end namespace oceanbase #endif - diff --git a/src/storage/blocksstable/ob_row_reader.cpp b/src/storage/blocksstable/ob_row_reader.cpp index 15b1007cd..8eb2dfbb9 100644 --- a/src/storage/blocksstable/ob_row_reader.cpp +++ b/src/storage/blocksstable/ob_row_reader.cpp @@ -554,7 +554,8 @@ int ObRowReader::analyze_cluster_info() uint64_t ObRowReader::get_cluster_offset(const int64_t cluster_idx) const { - return get_offset_func[row_header_->get_offset_type()](cluster_offset_, cluster_idx); + return cluster_idx == 0 ? sizeof(ObRowHeader) : + get_offset_func[row_header_->get_offset_type()](cluster_offset_, cluster_idx); } uint64_t ObRowReader::get_cluster_end_pos(const int64_t cluster_idx) const diff --git a/src/storage/compaction/ob_partition_merger.cpp b/src/storage/compaction/ob_partition_merger.cpp index e3a0ff943..e99c48b51 100644 --- a/src/storage/compaction/ob_partition_merger.cpp +++ b/src/storage/compaction/ob_partition_merger.cpp @@ -101,6 +101,7 @@ int ObPartitionMerger::init_data_store_desc(ObTabletMergeCtx &ctx) merge_info_.concurrent_cnt_ = ctx.parallel_merge_ctx_.get_concurrent_cnt(); merge_info_.is_full_merge_ = ctx.is_full_merge_; data_store_desc_.merge_info_ = &merge_info_; + data_store_desc_.need_pre_warm_ = true; } return ret; } diff --git a/src/storage/memtable/ob_memtable_iterator.cpp b/src/storage/memtable/ob_memtable_iterator.cpp index 3f01d0c5c..e4687fb8c 100644 --- a/src/storage/memtable/ob_memtable_iterator.cpp +++ b/src/storage/memtable/ob_memtable_iterator.cpp @@ -243,10 +243,10 @@ int ObMemtableScanIterator::prepare_scan() ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected invalid datum range", K(ret), K(range)); } else if (OB_FAIL(ObMemtableKey::build( - start_key, *out_cols, &range.get_start_key().get_store_rowkey(), *context_->allocator_))) { + start_key, *out_cols, &range.get_start_key().get_store_rowkey(), *context_->get_range_allocator()))) { TRANS_LOG(WARN, "start key build fail", K(param_->table_id_), K(range)); } else if (OB_FAIL(ObMemtableKey::build( - end_key, *out_cols, &range.get_end_key().get_store_rowkey(), *context_->allocator_))) { + end_key, *out_cols, &range.get_end_key().get_store_rowkey(), *context_->get_range_allocator()))) { TRANS_LOG(WARN, "end key build fail", K(param_->table_id_), K(range)); } else { ObMvccEngine& mvcc_engine = ((ObMemtable*)memtable_)->get_mvcc_engine(); @@ -742,10 +742,10 @@ int ObMemtableMultiVersionScanIterator::init( ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected invalid datum range", K(ret), K(range)); } else if (OB_FAIL(ObMemtableKey::build_without_hash( - start_key_, *columns, &range->get_start_key().get_store_rowkey(), *context.allocator_))) { + start_key_, *columns, &range->get_start_key().get_store_rowkey(), *context.get_range_allocator()))) { TRANS_LOG(WARN, "start key build fail", K(param.table_id_), K(range->get_start_key())); } else if (OB_FAIL(ObMemtableKey::build_without_hash( - end_key_, *columns, &range->get_end_key().get_store_rowkey(), *context.allocator_))) { + end_key_, *columns, &range->get_end_key().get_store_rowkey(), *context.get_range_allocator()))) { TRANS_LOG(WARN, "end key build fail", K(param.table_id_), K(range->get_end_key())); } else { TRANS_LOG(DEBUG, "init multi version scan iterator", K(param), K(*range)); diff --git a/src/storage/ob_micro_block_handle_mgr.cpp b/src/storage/ob_micro_block_handle_mgr.cpp index 31b360de4..2d238772a 100644 --- a/src/storage/ob_micro_block_handle_mgr.cpp +++ b/src/storage/ob_micro_block_handle_mgr.cpp @@ -115,6 +115,29 @@ int ObMicroBlockDataHandle::get_index_block_data( return ret; } +int ObMicroBlockDataHandle::get_cached_index_block_data( + const ObTableReadInfo &read_info, + ObMicroBlockData &index_block) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!read_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid columns info", K(ret), K(read_info)); + } else if (ObSSTableMicroBlockState::IN_BLOCK_CACHE != block_state_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Fail to get block data, unexpected block state", K(ret), K(block_state_)); + } else { + const ObMicroBlockData *pblock = NULL; + if (NULL == (pblock = cache_handle_.get_block_data())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Fail to get cache block, ", K(ret)); + } else { + index_block = *pblock; + } + } + return ret; +} + int ObMicroBlockDataHandle::get_loaded_block_data(ObMicroBlockData &block_data) { int ret = OB_SUCCESS; @@ -285,5 +308,30 @@ int ObMicroBlockHandleMgr::get_micro_block_handle( return ret; } +int ObMicroBlockHandleMgr::put_micro_block_handle( + const uint64_t tenant_id, + const blocksstable::MacroBlockId ¯o_id, + const blocksstable::ObIndexBlockRowHeader &idx_header, + ObMicroBlockDataHandle µ_block_handle) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "block handle mgr is not inited", K(ret)); + } else if (is_multi_) { + if (is_ordered_) { + *last_handle_ = micro_block_handle; + } else { + const int64_t offset = idx_header.get_block_offset(); + const int64_t size = idx_header.get_block_size(); + ObMicroBlockCacheKey key(tenant_id, macro_id, offset, size); + if (OB_FAIL(handle_cache_->put_handle(key, micro_block_handle))) { + STORAGE_LOG(WARN, "failed to put handle cache", K(ret), K(key)); + } + } + } + return ret; +} + } } diff --git a/src/storage/ob_micro_block_handle_mgr.h b/src/storage/ob_micro_block_handle_mgr.h index 181d57f84..6fee396a6 100644 --- a/src/storage/ob_micro_block_handle_mgr.h +++ b/src/storage/ob_micro_block_handle_mgr.h @@ -44,6 +44,9 @@ struct ObMicroBlockDataHandle { int get_index_block_data( const ObTableReadInfo &read_info, blocksstable::ObMicroBlockData &index_block); + int get_cached_index_block_data( + const ObTableReadInfo &read_info, + blocksstable::ObMicroBlockData &index_block); TO_STRING_KV(K_(tenant_id), K_(macro_block_id), K_(micro_info), K_(block_state), K_(block_index), K_(cache_handle), K_(io_handle)); uint64_t tenant_id_; @@ -79,6 +82,11 @@ public: const blocksstable::ObMicroIndexInfo &index_block_info, const bool is_data_block, ObMicroBlockDataHandle µ_block_handle); + int put_micro_block_handle( + const uint64_t tenant_id, + const blocksstable::MacroBlockId ¯o_id, + const blocksstable::ObIndexBlockRowHeader &idx_header, + ObMicroBlockDataHandle µ_block_handle); private: // allocator for index micro block prefetch failed and async io common::ObFIFOAllocator allocator_; diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result index 9fb0fe7ba..fbf0956c1 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result @@ -544,6 +544,7 @@ select 0xffffffffff & table_id, table_name, table_type, database_id, part_num fr 12316 __all_virtual_privilege 2 201001 1 12317 __all_virtual_tablet_pointer_status 2 201001 1 12318 __all_virtual_storage_meta_memory_status 2 201001 1 +12319 __all_virtual_kvcache_store_memblock 2 201001 1 12320 __all_virtual_mock_fk_parent_table 2 201001 1 12321 __all_virtual_mock_fk_parent_table_history 2 201001 1 12322 __all_virtual_mock_fk_parent_table_column 2 201001 1 @@ -969,6 +970,7 @@ select * from oceanbase.__all_virtual_disk_stat limit 1; select * from oceanbase.__all_virtual_engine limit 1; select * from oceanbase.__all_virtual_io_stat limit 1; select * from oceanbase.__all_virtual_kvcache_info limit 1; +select * from oceanbase.__all_virtual_kvcache_store_memblock limit 1; select * from oceanbase.__all_virtual_latch limit 1; select * from oceanbase.__all_virtual_long_ops_status limit 1; select * from oceanbase.__all_virtual_macro_block_marker_status limit 1; diff --git a/tools/deploy/mysql_test/test_suite/inner_table/t/inner_table_overall.test b/tools/deploy/mysql_test/test_suite/inner_table/t/inner_table_overall.test index fcd4fcc61..a921b83d4 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/t/inner_table_overall.test +++ b/tools/deploy/mysql_test/test_suite/inner_table/t/inner_table_overall.test @@ -212,6 +212,7 @@ select * from oceanbase.__all_virtual_disk_stat limit 1; select * from oceanbase.__all_virtual_engine limit 1; select * from oceanbase.__all_virtual_io_stat limit 1; select * from oceanbase.__all_virtual_kvcache_info limit 1; +select * from oceanbase.__all_virtual_kvcache_store_memblock limit 1; select * from oceanbase.__all_virtual_latch limit 1; select * from oceanbase.__all_virtual_long_ops_status limit 1; select * from oceanbase.__all_virtual_macro_block_marker_status limit 1; diff --git a/tools/deploy/mysql_test/test_suite/with_clause_mysql/r/mysql/basic_mysql.result b/tools/deploy/mysql_test/test_suite/with_clause_mysql/r/mysql/basic_mysql.result index 7e143aaac..3b9331f34 100644 --- a/tools/deploy/mysql_test/test_suite/with_clause_mysql/r/mysql/basic_mysql.result +++ b/tools/deploy/mysql_test/test_suite/with_clause_mysql/r/mysql/basic_mysql.result @@ -2537,9 +2537,9 @@ Outputs & filters: conds([t1.c1 = cte.max( c1 )]), nl_params_(nil) 2 - output([cte.max( c1 )]), filter(nil), rowset=256, access([cte.max( c1 )]) - 3 - output([T_FUN_MAX(t1.c1)]), filter(nil), rowset=256, - group(nil), agg_func([T_FUN_MAX(t1.c1)]) - 4 - output([t1.c1]), filter(nil), rowset=256, + 3 - output([T_FUN_MAX(T_FUN_MAX(t1.c1))]), filter(nil), rowset=256, + group(nil), agg_func([T_FUN_MAX(T_FUN_MAX(t1.c1))]) + 4 - output([T_FUN_MAX(t1.c1)]), filter(nil), rowset=256, access([t1.c1]), partitions(p0) 5 - output([t1.__pk_increment], [t1.c1], [t1.c2], [t1.c3]), filter(nil), rowset=256, access([t1.__pk_increment], [t1.c1], [t1.c2], [t1.c3]), partitions(p0) @@ -2566,9 +2566,9 @@ Outputs & filters: conds([t1.c1 = cte.a]), nl_params_(nil) 2 - output([cte.a]), filter(nil), rowset=256, access([cte.a]) - 3 - output([T_FUN_MAX(t1.c1)]), filter(nil), rowset=256, - group(nil), agg_func([T_FUN_MAX(t1.c1)]) - 4 - output([t1.c1]), filter(nil), rowset=256, + 3 - output([T_FUN_MAX(T_FUN_MAX(t1.c1))]), filter(nil), rowset=256, + group(nil), agg_func([T_FUN_MAX(T_FUN_MAX(t1.c1))]) + 4 - output([T_FUN_MAX(t1.c1)]), filter(nil), rowset=256, access([t1.c1]), partitions(p0) 5 - output([t1.__pk_increment], [t1.c1], [t1.c2], [t1.c3]), filter(nil), rowset=256, access([t1.__pk_increment], [t1.c1], [t1.c2], [t1.c3]), partitions(p0)