diff --git a/deps/oblib/src/common/object/ob_object.cpp b/deps/oblib/src/common/object/ob_object.cpp index 6e9f64653..939705444 100644 --- a/deps/oblib/src/common/object/ob_object.cpp +++ b/deps/oblib/src/common/object/ob_object.cpp @@ -571,7 +571,7 @@ int ObLobLocatorV2::get_disk_locator(ObString &disc_loc_buff) const int64_t handle_size = reinterpret_cast(disk_loc) - reinterpret_cast(ptr_); if (handle_size > size_) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get invalid handle size", K(ret), K(size_), K(disk_loc), K(ptr_)); + LOG_WARN("get invalid handle size", K(ret), K(size_), K(disk_loc), K(ptr_), K(handle_size)); } else { if (disk_loc->in_row_) { handle_size = size_ - handle_size; diff --git a/deps/oblib/src/common/object/ob_object.h b/deps/oblib/src/common/object/ob_object.h index d6d3ebe71..5baac9688 100644 --- a/deps/oblib/src/common/object/ob_object.h +++ b/deps/oblib/src/common/object/ob_object.h @@ -501,6 +501,7 @@ struct ObLobId bool operator >(const ObLobId &other) const; TO_STRING_KV(K_(tablet_id), K_(lob_id)); void reset(); + inline bool is_valid() const {return tablet_id_ != 0 && lob_id_ != 0;} uint64_t tablet_id_; uint64_t lob_id_; }; diff --git a/deps/oblib/src/lib/allocator/ob_block_alloc_mgr.h b/deps/oblib/src/lib/allocator/ob_block_alloc_mgr.h index 4ffd4ae56..061be89dd 100644 --- a/deps/oblib/src/lib/allocator/ob_block_alloc_mgr.h +++ b/deps/oblib/src/lib/allocator/ob_block_alloc_mgr.h @@ -37,7 +37,7 @@ public: if (used_after_alloc > limit_) { ATOMIC_AAF(&hold_, -size); if (REACH_TIME_INTERVAL(1000 * 1000)) { - _OB_LOG_RET(WARN, common::OB_ERR_UNEXPECTED, "block alloc over limit, limit=%ld alloc_size=%ld", limit_, size); + _OB_LOG_RET(WARN, common::OB_ALLOCATE_MEMORY_FAILED, "block alloc over limit, limit=%ld alloc_size=%ld", limit_, size); } } else if (NULL == (ret = (void*)ob_malloc(size, attr))) { ATOMIC_AAF(&hold_, -size); diff --git a/deps/oblib/src/lib/hash/ob_linear_hash_map.h b/deps/oblib/src/lib/hash/ob_linear_hash_map.h index af58a7303..f2f99755a 100644 --- a/deps/oblib/src/lib/hash/ob_linear_hash_map.h +++ b/deps/oblib/src/lib/hash/ob_linear_hash_map.h @@ -394,9 +394,9 @@ public: // m_seg_sz and s_seg_sz are the sizes of micro-segment and standard-segment. // Set m_seg_sz = 0 to disable micro-segment. // dir_init_sz is the initial size of directory, it doubles when overflows. + int init(const ObMemAttr &mem_attr); int init(const lib::ObLabel &label = LABEL, uint64_t tenant_id = TENANT_ID); - int init(uint64_t m_seg_sz, uint64_t s_seg_sz, uint64_t dir_init_sz, - const lib::ObLabel &label = LABEL, uint64_t tenant_id = TENANT_ID); + int init(uint64_t m_seg_sz, uint64_t s_seg_sz, uint64_t dir_init_sz, const ObMemAttr &mem_attr); int destroy(); // Load factor control. int set_load_factor_lmt(double lower_lmt, double upper_lmt); @@ -867,34 +867,38 @@ void ObLinearHashMap::BlurredIterator::rewind() // Public functions. template -int ObLinearHashMap::init(const lib::ObLabel &label /*= LABEL*/, - uint64_t tenant_id /*=TENANT_ID*/) +int ObLinearHashMap::init(const ObMemAttr &mem_attr) { return init(OB_MALLOC_NORMAL_BLOCK_SIZE, /* Small segment. */ OB_MALLOC_BIG_BLOCK_SIZE, /* Large segment. */ DIR_SZ_L_LMT, /* Dir size, small when init, expand * 2. */ - label, - tenant_id); + mem_attr); +} + +template +int ObLinearHashMap::init(const lib::ObLabel &label /*= LABEL*/, + uint64_t tenant_id /*=TENANT_ID*/) +{ + return init(ObMemAttr(tenant_id, label)); } template int ObLinearHashMap::init(uint64_t m_seg_sz, uint64_t s_seg_sz, uint64_t dir_init_sz, - const lib::ObLabel &label /*= LABEL*/, uint64_t tenant_id /*=TENANT_ID*/) + const ObMemAttr &mem_attr) { const double LOAD_FCT_DEF_U_LMT = 1; const double LOAD_FCT_DEF_L_LMT = 0.01; int ret = OB_SUCCESS; /* Memory alloc from MemMgr, and its static, so label and tenant_id no longer used. */ - memattr_.tenant_id_ = tenant_id; - memattr_.label_ = label; + memattr_ = mem_attr; load_factor_u_limit_ = LOAD_FCT_DEF_U_LMT; load_factor_l_limit_ = LOAD_FCT_DEF_L_LMT; load_factor_ = 0.0; set_Lp_(0, 0); init_haz_(); init_foreach_(); - if (OB_SUCCESS != (ret = mem_mgr_.init(tenant_id))) + if (OB_SUCCESS != (ret = mem_mgr_.init(mem_attr.tenant_id_))) { } else if (OB_SUCCESS != (ret = init_d_arr_(m_seg_sz, s_seg_sz, dir_init_sz))) { } diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 9fc9a5e33..6492b709c 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -826,8 +826,10 @@ const int64_t MAX_COLUMN_YES_NO_LENGTH = 3; const int64_t MAX_COLUMN_VARCHAR_LENGTH = 262143; const int64_t MAX_COLUMN_CHAR_LENGTH = 255; //column group +const uint64_t INVALID_COLUMN_GROUP_ID = 0; const uint64_t DEFAULT_TYPE_COLUMN_GROUP_ID = 1; // reserve 2~999 const uint64_t COLUMN_GROUP_START_ID = 1000; +const uint64_t DEFAULT_CUSTOMIZED_CG_NUM = 2; //Oracle const int64_t MAX_ORACLE_COMMENT_LENGTH = 4000; diff --git a/deps/oblib/unittest/lib/hash/test_linear_hash_map.cpp b/deps/oblib/unittest/lib/hash/test_linear_hash_map.cpp index 821f49632..f14f9f9f0 100644 --- a/deps/oblib/unittest/lib/hash/test_linear_hash_map.cpp +++ b/deps/oblib/unittest/lib/hash/test_linear_hash_map.cpp @@ -150,7 +150,7 @@ TEST(ObLinearHashMap, EStest1) // Init map. Map map; - EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz)); + EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz, ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LINEAR_HASH_MAP))); // Validate some settings. EXPECT_EQ(m_sz / sizeof(Map::Bucket), map.L0_bkt_n_); { @@ -217,7 +217,7 @@ TEST(ObLinearHashMap, ACCSStest1) uint64_t m_sz = 1 << 12; // 4KB uint64_t s_sz = 1 << 16; // 64KB Map map; - EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz)); + EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz, ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LINEAR_HASH_MAP))); map.load_factor_l_limit_ = 0; map.load_factor_u_limit_ = 100000; const int64_t limit = 10000; @@ -259,7 +259,7 @@ TEST(ObLinearHashMap, ACCSStest2) uint64_t m_sz = 1 << 12; // 4KB uint64_t s_sz = 1 << 16; // 64KB const uint64_t maxL = 6; // will use both m-seg and s-seg - EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz)); + EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz, ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LINEAR_HASH_MAP))); map.load_factor_l_limit_ = 0; map.load_factor_u_limit_ = 100000000; const int64_t key_n = (int64_t)2 << maxL; // more than 2 keys in bucket when it reaches maxL. @@ -335,7 +335,7 @@ TEST(ObLinearHashMap, ACCSStest3) const int64_t thread_n = 4; pthread_t threads[thread_n]; Map map; - EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz)); + EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz, ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LINEAR_HASH_MAP))); map.load_factor_l_limit_ = 0; map.load_factor_u_limit_ = 100000; // Modify the dir size to an extremely short size. @@ -392,7 +392,7 @@ TEST(ObLinearHashMap, ACCSStest4) Map map; uint64_t m_sz = 1 << 12; // 4KB uint64_t s_sz = 1 << 16; // 64KB - EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz)); + EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz, ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LINEAR_HASH_MAP))); map.load_factor_l_limit_ = 0; map.load_factor_u_limit_ = 100000; uint64_t L, p; @@ -516,7 +516,7 @@ TEST(ObLinearHashMap, ACCSStest5) const int64_t thread_n = 4; pthread_t threads[thread_n]; Map map; - EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz)); + EXPECT_EQ(OB_SUCCESS, map.init(m_sz, s_sz, m_sz, ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_LINEAR_HASH_MAP))); map.load_factor_l_limit_ = 0; map.load_factor_u_limit_ = 100000; // Insert. diff --git a/mittest/mtlenv/mock_tenant_module_env.h b/mittest/mtlenv/mock_tenant_module_env.h index 6a598f145..301bc6070 100644 --- a/mittest/mtlenv/mock_tenant_module_env.h +++ b/mittest/mtlenv/mock_tenant_module_env.h @@ -707,6 +707,7 @@ int MockTenantModuleEnv::init() MTL_BIND(ObTenantSQLSessionMgr::mtl_init, ObTenantSQLSessionMgr::mtl_destroy); MTL_BIND2(mtl_new_default, ObTenantCGReadInfoMgr::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObDecodeResourcePool::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); + MTL_BIND2(mtl_new_default, ObTenantDirectLoadMgr::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObEmptyReadBucket::mtl_init, nullptr, nullptr, nullptr, ObEmptyReadBucket::mtl_destroy); MTL_BIND2(mtl_new_default, ObRebuildService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, table::ObHTableLockMgr::mtl_init, nullptr, nullptr, nullptr, table::ObHTableLockMgr::mtl_destroy); diff --git a/mittest/mtlenv/storage/blocksstable/CMakeLists.txt b/mittest/mtlenv/storage/blocksstable/CMakeLists.txt index a49781abd..5e606c490 100644 --- a/mittest/mtlenv/storage/blocksstable/CMakeLists.txt +++ b/mittest/mtlenv/storage/blocksstable/CMakeLists.txt @@ -20,3 +20,6 @@ storage_dml_unittest(test_co_sstable_row_scanner) storage_dml_unittest(test_pushdown_aggregate) storage_dml_unittest(test_cg_group_by_scanner) storage_dml_unittest(test_cs_cg_group_by_scanner) +storage_dml_unittest(test_ddl_merge_row_scanner) +storage_dml_unittest(test_ddl_merge_row_multi_scanner) +storage_dml_unittest(test_direct_load) diff --git a/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h b/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h index d4ddb7542..9b0e82539 100644 --- a/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h +++ b/mittest/mtlenv/storage/blocksstable/ob_index_block_data_prepare.h @@ -68,17 +68,22 @@ public: virtual void TearDown(); virtual void prepare_schema(); virtual void prepare_data(const int64_t micro_block_size = 0); + virtual void prepare_partial_ddl_data(); virtual void prepare_cg_data(); virtual void insert_data(ObMacroBlockWriter &data_writer); // override to define data in sstable virtual void insert_cg_data(ObMacroBlockWriter &data_writer); // override to define data in sstable + virtual void insert_partial_data(ObMacroBlockWriter &data_writer, const int64_t row_cnt); // override to define data in partial_sstable static void convert_to_multi_version_row(const ObDatumRow &org_row, const ObTableSchema &schema, const int64_t snapshot_version, const ObDmlFlag dml_flag, ObDatumRow &multi_row); static void fake_freeze_info(); virtual ObITable::TableType get_merged_table_type() const; void prepare_query_param(const bool is_reverse_scan, ObArenaAllocator *allocator = nullptr); void destroy_query_param(); + void prepare_contrastive_sstable(); void prepare_ddl_kv(); + void prepare_merge_ddl_kvs(); void close_builder_and_prepare_sstable(const int64_t column_cnt); + void prepare_partial_sstable(const int64_t column_cnt); int gen_create_tablet_arg(const int64_t tenant_id, const share::ObLSID &ls_id, const ObTabletID &tablet_id, @@ -96,16 +101,24 @@ protected: static const uint64_t tablet_id_ = 200001; static const uint64_t TEST_TABLE_ID = 200001; static const uint64_t ls_id_ = 1001; + static const int64_t DDL_KVS_CNT = 3; ObMergeType merge_type_; int64_t max_row_cnt_; + int64_t max_partial_row_cnt_; + int64_t partial_kv_start_idx_; ObTableSchema table_schema_; ObTableSchema index_schema_; ObRowGenerate row_generate_; int64_t row_cnt_; + int64_t partial_sstable_row_cnt_; + ObSSTable partial_sstable_; ObSSTable sstable_; - storage::ObDDLKV ddl_kv_; + storage::ObDDLMemtable ddl_kv_; + storage::ObDDLKVHandle ddl_kvs_; ObSSTableIndexBuilder *root_index_builder_; + ObSSTableIndexBuilder *merge_root_index_builder_; ObMicroBlockData root_block_data_buf_; + ObMicroBlockData merge_root_block_data_buf_; ObRowStoreType row_store_type_; int64_t max_row_seed_; int64_t min_row_seed_; @@ -126,6 +139,7 @@ protected: int64_t rows_per_mirco_block_; int64_t mirco_blocks_per_macro_block_; bool is_cg_data_; + bool is_ddl_merge_data_; }; ObArenaAllocator TestIndexBlockDataPrepare::allocator_; @@ -153,6 +167,7 @@ void TestIndexBlockDataPrepare::prepare_query_param(const bool is_reverse_scan, } context_.store_ctx_ = &store_ctx_; context_.ls_id_ = ls_id_; + context_.tablet_id_ = tablet_id_; context_.allocator_ = test_allocator; context_.stmt_allocator_ = test_allocator; context_.limit_param_ = nullptr; @@ -180,7 +195,9 @@ TestIndexBlockDataPrepare::TestIndexBlockDataPrepare( : merge_type_(merge_type), max_row_cnt_(max_row_cnt), row_cnt_(0), + partial_sstable_row_cnt_(0), root_index_builder_(nullptr), + merge_root_index_builder_(nullptr), row_store_type_(row_store_type), max_row_seed_(0), min_row_seed_(0), @@ -191,7 +208,8 @@ TestIndexBlockDataPrepare::TestIndexBlockDataPrepare( need_agg_data_(need_aggregate_data), rows_per_mirco_block_(rows_per_mirco_block), mirco_blocks_per_macro_block_(mirco_blocks_per_macro_block), - is_cg_data_(false) + is_cg_data_(false), + is_ddl_merge_data_(false) { } @@ -283,9 +301,12 @@ void TestIndexBlockDataPrepare::SetUp() ASSERT_EQ(OB_SUCCESS, TestTabletHelper::create_tablet(ls_handle, tablet_id, table_schema_, allocator_)); ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle_)); sstable_.key_.table_type_ = ObITable::TableType::COLUMN_ORIENTED_SSTABLE; + partial_sstable_.key_.table_type_ = ObITable::TableType::DDL_MERGE_CO_SSTABLE; if (is_cg_data_) { prepare_cg_data(); + } else if (is_ddl_merge_data_) { + prepare_partial_ddl_data(); } else { prepare_data(); } @@ -293,17 +314,32 @@ void TestIndexBlockDataPrepare::SetUp() void TestIndexBlockDataPrepare::TearDown() { sstable_.reset(); + partial_sstable_.reset(); ddl_kv_.reset(); + ddl_kvs_.reset(); cg_read_info_handle_.reset(); if (nullptr != root_block_data_buf_.buf_) { allocator_.free((void *)root_block_data_buf_.buf_); root_block_data_buf_.buf_ = nullptr; } + if (nullptr != root_block_data_buf_.buf_) { + allocator_.free((void *)root_block_data_buf_.buf_); + root_block_data_buf_.buf_ = nullptr; + } + if (nullptr != merge_root_block_data_buf_.buf_) { + allocator_.free((void *)merge_root_block_data_buf_.buf_); + merge_root_block_data_buf_.buf_ = nullptr; + } if (nullptr != root_index_builder_) { root_index_builder_->~ObSSTableIndexBuilder(); allocator_.free((void *)root_index_builder_); root_index_builder_ = nullptr; } + if (nullptr != merge_root_index_builder_) { + merge_root_index_builder_->~ObSSTableIndexBuilder(); + allocator_.free((void *)merge_root_index_builder_); + merge_root_index_builder_ = nullptr; + } ObLSID ls_id(ls_id_); ObTabletID tablet_id(tablet_id_); ObLSHandle ls_handle; @@ -345,7 +381,7 @@ ObITable::TableType TestIndexBlockDataPrepare::get_merged_table_type() const } else if (META_MAJOR_MERGE == merge_type_) { table_type = ObITable::TableType::META_MAJOR_SSTABLE; } else if (DDL_KV_MERGE == merge_type_) { - table_type = ObITable::TableType::DDL_DUMP_SSTABLE; + table_type = ObITable::TableType::DDL_MERGE_CO_SSTABLE; } else { // MINOR_MERGE || HISTORY_MINOR_MERGE table_type = ObITable::TableType::MINOR_SSTABLE; } @@ -354,7 +390,7 @@ ObITable::TableType TestIndexBlockDataPrepare::get_merged_table_type() const void TestIndexBlockDataPrepare::prepare_schema() { - ObColumnSchemaV2 column; + ObColumnSchemaV2 column; //init table schema uint64_t table_id = TEST_TABLE_ID; table_schema_.reset(); @@ -502,10 +538,9 @@ void TestIndexBlockDataPrepare::close_builder_and_prepare_sstable(const int64_t row_store_type_ = root_row_store_type; ObITable::TableKey table_key; - int64_t table_id = 3001; int64_t tenant_id = 1; table_key.table_type_ = get_merged_table_type(); - table_key.tablet_id_ = table_id; + table_key.tablet_id_ = tablet_id_; table_key.version_range_.snapshot_version_ = SNAPSHOT_VERSION; ObTabletCreateSSTableParam param; @@ -685,7 +720,7 @@ void TestIndexBlockDataPrepare::prepare_ddl_kv() share::SCN ddl_start_scn; ddl_start_scn.convert_from_ts(ObTimeUtility::current_time()); - ASSERT_EQ(OB_SUCCESS, ddl_kv_.init(*tablet_handle.get_obj(), ddl_start_scn, sstable_.get_data_version(), ddl_start_scn, 4000)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_.init(*tablet_handle.get_obj(), sstable_.get_key(), ddl_start_scn, DATA_CURRENT_VERSION)); SMART_VAR(ObSSTableSecMetaIterator, meta_iter) { ObDatumRange query_range; @@ -711,7 +746,6 @@ void TestIndexBlockDataPrepare::prepare_ddl_kv() } } ASSERT_EQ(OB_ITER_END, ret); - ASSERT_EQ(OB_SUCCESS, ddl_kv_.block_meta_tree_.build_sorted_rowkeys()); } } @@ -813,6 +847,326 @@ void TestIndexBlockDataPrepare::convert_to_multi_version_row(const ObDatumRow &o multi_row.mvcc_row_flag_.set_last_multi_version_row(true); } +void TestIndexBlockDataPrepare::prepare_partial_ddl_data() +{ + prepare_contrastive_sstable(); + ObMacroBlockWriter writer; + ObMacroDataSeq start_seq(0); + start_seq.set_data_block(); + row_generate_.reset(); + ObWholeDataStoreDesc desc(true/*is ddl*/); + share::SCN end_scn; + end_scn.convert_from_ts(ObTimeUtility::current_time()); + ASSERT_EQ(OB_SUCCESS, desc.init(table_schema_, ObLSID(ls_id_), ObTabletID(tablet_id_), merge_type_, SNAPSHOT_VERSION, CLUSTER_CURRENT_VERSION, end_scn)); + void *builder_buf = allocator_.alloc(sizeof(ObSSTableIndexBuilder)); + merge_root_index_builder_ = new (builder_buf) ObSSTableIndexBuilder(); + ASSERT_NE(nullptr, merge_root_index_builder_); + desc.get_desc().sstable_index_builder_ = merge_root_index_builder_; + ASSERT_TRUE(desc.is_valid()); + if (need_agg_data_) { + ASSERT_EQ(OB_SUCCESS, desc.get_desc().col_desc_->agg_meta_array_.assign(agg_col_metas_)); + } + ASSERT_EQ(OB_SUCCESS, merge_root_index_builder_->init(desc.get_desc())); + ASSERT_EQ(OB_SUCCESS, writer.open(desc.get_desc(), start_seq)); + ASSERT_EQ(OB_SUCCESS, row_generate_.init(table_schema_, &allocator_)); + const int64_t partial_row_cnt = max_partial_row_cnt_; + insert_partial_data(writer, partial_row_cnt); + ASSERT_EQ(OB_SUCCESS, writer.close()); + // data write ctx has been moved to merge_root_index_builder_ + ASSERT_EQ(writer.get_macro_block_write_ctx().get_macro_block_count(), 0); + data_macro_block_cnt_ = merge_root_index_builder_->roots_[0]->macro_metas_->count(); + ASSERT_GE(data_macro_block_cnt_, 0); + int64_t column_cnt = 0; + ObTabletID tablet_id(TestIndexBlockDataPrepare::tablet_id_); + ObLSID ls_id(ls_id_); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ObStorageSchema *storage_schema = nullptr; + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); + ASSERT_EQ(OB_SUCCESS, tablet_handle.get_obj()->load_storage_schema(allocator_, storage_schema)); + ASSERT_EQ(OB_SUCCESS, storage_schema->get_stored_column_count_in_sstable(column_cnt)); + prepare_partial_sstable(column_cnt); + prepare_merge_ddl_kvs(); + ObTabletObjLoadHelper::free(allocator_, storage_schema); +} + +void TestIndexBlockDataPrepare::prepare_partial_sstable(const int64_t column_cnt) +{ + ObSSTableMergeRes res; + OK(merge_root_index_builder_->close(res)); + ObIndexTreeRootBlockDesc root_desc; + root_desc = res.root_desc_; + ASSERT_TRUE(root_desc.is_valid()); + ObRowStoreType root_row_store_type = res.root_row_store_type_; + char *root_buf = nullptr; + int64_t root_size = 0; + if (root_desc.addr_.is_block()) { + // read macro block + ObMacroBlockReadInfo read_info; + ObMacroBlockHandle macro_handle; + const int64_t macro_block_size = 2 * 1024 * 1024; + ASSERT_EQ(OB_SUCCESS, root_desc.addr_.get_block_addr(read_info.macro_block_id_, read_info.offset_, read_info.size_)); + read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); + read_info.offset_ = 0; + read_info.size_ = macro_block_size; + read_info.io_timeout_ms_ = DEFAULT_IO_WAIT_TIME_MS; + ASSERT_NE(nullptr, read_info.buf_ = reinterpret_cast(allocator_.alloc(read_info.size_))); + ASSERT_EQ(OB_SUCCESS, ObBlockManager::read_block(read_info, macro_handle)); + ASSERT_NE(macro_handle.get_buffer(), nullptr); + ASSERT_EQ(macro_handle.get_data_size(), macro_block_size); + // get root block + int64_t block_offset = root_desc.addr_.offset_; + int64_t block_size = root_desc.addr_.size_; + const char *block_buf = macro_handle.get_buffer() + block_offset; + // decompress and decrypt root block + ObMicroBlockDesMeta meta(ObCompressorType::NONE_COMPRESSOR, root_row_store_type, 0, 0, nullptr); + ObMacroBlockReader reader; + const char *decomp_buf = nullptr; + int64_t decomp_size = 0; + bool is_compressed = false; + ASSERT_EQ(OB_SUCCESS, reader.decrypt_and_decompress_data(meta, block_buf, root_desc.addr_.size_, + decomp_buf, decomp_size, is_compressed, true, &allocator_)); + root_buf = const_cast(decomp_buf); + root_size = decomp_size; + } else if (root_desc.is_mem_type()) { + root_buf = root_desc.buf_; + root_size = root_desc.addr_.size_; + } else { + STORAGE_LOG(INFO, "not supported root block", K(root_desc)); + ASSERT_TRUE(false); + } + // deserialize micro block header in root block buf + ObMicroBlockHeader root_micro_header; + int64_t des_pos = 0; + ASSERT_EQ(OB_SUCCESS, root_micro_header.deserialize(root_buf, root_size, des_pos)); + merge_root_block_data_buf_.buf_ = static_cast(allocator_.alloc(root_size)); + merge_root_block_data_buf_.size_ = root_size; + int64_t copy_pos = 0; + ObMicroBlockHeader *copied_micro_header = nullptr; + ASSERT_EQ(OB_SUCCESS, root_micro_header.deep_copy( + (char *)merge_root_block_data_buf_.buf_, merge_root_block_data_buf_.size_, copy_pos, copied_micro_header)); + ASSERT_TRUE(copied_micro_header->is_valid()); + MEMCPY((char *)(merge_root_block_data_buf_.buf_ + copy_pos), root_buf + des_pos, root_size - des_pos); + row_store_type_ = root_row_store_type; + ObITable::TableKey table_key; + int64_t table_id = 3001; + int64_t tenant_id = 1; + table_key.table_type_ = ObITable::TableType::DDL_MERGE_CO_SSTABLE; + table_key.tablet_id_ = table_id; + table_key.version_range_.snapshot_version_ = SNAPSHOT_VERSION; + ObTabletCreateSSTableParam param; + param.table_key_ = table_key; + param.schema_version_ = 10; + param.create_snapshot_version_ = 0; + param.progressive_merge_round_ = table_schema_.get_progressive_merge_round(); + param.progressive_merge_step_ = 0; + param.table_mode_ = table_schema_.get_table_mode_struct(); + param.index_type_ = table_schema_.get_index_type(); + if (is_cg_data_) { + param.rowkey_column_cnt_ = 0; + } else { + param.rowkey_column_cnt_ = table_schema_.get_rowkey_column_num() + + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + } + ObSSTableMergeRes::fill_addr_and_data(res.root_desc_, + param.root_block_addr_, param.root_block_data_); + ObSSTableMergeRes::fill_addr_and_data(res.data_root_desc_, + param.data_block_macro_meta_addr_, param.data_block_macro_meta_); + param.is_meta_root_ = res.data_root_desc_.is_meta_root_; + param.max_merged_trans_version_ = res.max_merged_trans_version_; + param.row_count_ = res.row_count_; + param.root_row_store_type_ = root_row_store_type; + param.latest_row_store_type_ = table_schema_.get_row_store_type(); + param.data_index_tree_height_ = root_desc.height_; + param.index_blocks_cnt_ = res.index_blocks_cnt_; + param.data_blocks_cnt_ = res.data_blocks_cnt_; + param.micro_block_cnt_ = res.micro_block_cnt_; + param.use_old_macro_block_count_ = res.use_old_macro_block_count_; + param.column_cnt_= column_cnt; + param.data_checksum_ = res.data_checksum_; + param.occupy_size_ = res.occupy_size_; + param.original_size_ = res.original_size_; + param.nested_offset_ = res.nested_offset_; + param.nested_size_ = res.nested_size_; + param.compressor_type_ = ObCompressorType::NONE_COMPRESSOR; + param.data_block_ids_ = res.data_block_ids_; + param.other_block_ids_ = res.other_block_ids_; + param.ddl_scn_.convert_from_ts(ObTimeUtility::current_time()); + param.filled_tx_scn_.set_min(); + param.contain_uncommitted_row_ = false; + param.encrypt_id_ = res.encrypt_id_; + param.master_key_id_ = res.master_key_id_; + if (param.table_key_.is_co_sstable() && param.column_group_cnt_ <= 1) { + param.column_group_cnt_ = column_cnt + 2; /* set column group_cnt to avoid return err, cnt is calculated as each + all + default*/ + } + MEMCPY(param.encrypt_key_, res.encrypt_key_, share::OB_MAX_TABLESPACE_ENCRYPT_KEY_LENGTH); + if (merge_type_ == MAJOR_MERGE) { + OK(ObSSTableMergeRes::fill_column_checksum_for_empty_major(param.column_cnt_, param.column_checksums_)); + } + partial_sstable_.reset(); + ASSERT_EQ(OB_SUCCESS, partial_sstable_.init(param, &allocator_)); + STORAGE_LOG(INFO, "create partial_sstable param", K(param)); +} + +void TestIndexBlockDataPrepare::insert_partial_data(ObMacroBlockWriter &data_writer, const int64_t row_cnt) +{ + partial_sstable_row_cnt_ = 0; + int64_t seed = min_row_seed_; + ObDatumRow row; + ObDatumRow multi_row; + ASSERT_EQ(OB_SUCCESS, row.init(allocator_, MAX_TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, multi_row.init(allocator_, MAX_TEST_COLUMN_CNT)); + ObDmlFlag flags[] = {DF_INSERT, DF_UPDATE, DF_DELETE}; + int64_t rows_per_mirco_block = rows_per_mirco_block_; + int64_t rows_per_macro_block = rows_per_mirco_block_ * mirco_blocks_per_macro_block_; + int64_t rows_cnt = row_cnt; + if (INT64_MAX == rows_per_mirco_block_ || INT64_MAX == mirco_blocks_per_macro_block_) { + rows_per_mirco_block = INT64_MAX; + rows_per_macro_block = INT64_MAX; + } + while (true) { + if (partial_sstable_row_cnt_ >= rows_cnt) { + break; + } + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(seed, row)); + ++seed; + ObDmlFlag dml = flags[partial_sstable_row_cnt_ % ARRAYSIZEOF(flags)]; // INSERT / UPDATE / DELETE + convert_to_multi_version_row(row, table_schema_, SNAPSHOT_VERSION, dml, multi_row); + ASSERT_EQ(OB_SUCCESS, data_writer.append_row(multi_row)); + if (partial_sstable_row_cnt_ == 0) { + ObDatumRowkey &start_key = data_writer.last_key_; + ASSERT_EQ(OB_SUCCESS, start_key.deep_copy(start_key_, allocator_)); + } + if (partial_sstable_row_cnt_ == rows_cnt - 1) { + ObDatumRowkey &end_key = data_writer.last_key_; + ASSERT_EQ(OB_SUCCESS, end_key.deep_copy(end_key_, allocator_)); + } + if ((partial_sstable_row_cnt_ + 1) % rows_per_mirco_block == 0) { + OK(data_writer.build_micro_block()); + } + if ((partial_sstable_row_cnt_ + 1) % rows_per_macro_block == 0) { + OK(data_writer.try_switch_macro_block()); + } + ++partial_sstable_row_cnt_; + } + //max_row_seed_ = seed - 1; +} + +void TestIndexBlockDataPrepare::prepare_contrastive_sstable() +{ + ObMacroBlockWriter writer; + ObMacroDataSeq start_seq(0); + start_seq.set_data_block(); + row_generate_.reset(); + + ObWholeDataStoreDesc desc; + share::SCN end_scn; + end_scn.convert_from_ts(ObTimeUtility::current_time()); + ASSERT_EQ(OB_SUCCESS, desc.init(table_schema_, ObLSID(ls_id_), ObTabletID(tablet_id_), merge_type_, SNAPSHOT_VERSION, CLUSTER_CURRENT_VERSION, end_scn)); + void *builder_buf = allocator_.alloc(sizeof(ObSSTableIndexBuilder)); + root_index_builder_ = new (builder_buf) ObSSTableIndexBuilder(); + ASSERT_NE(nullptr, root_index_builder_); + desc.get_desc().sstable_index_builder_ = root_index_builder_; + + ASSERT_TRUE(desc.is_valid()); + if (need_agg_data_) { + ASSERT_EQ(OB_SUCCESS, desc.get_desc().col_desc_->agg_meta_array_.assign(agg_col_metas_)); + } + ASSERT_EQ(OB_SUCCESS, root_index_builder_->init(desc.get_desc())); + ASSERT_EQ(OB_SUCCESS, writer.open(desc.get_desc(), start_seq)); + ASSERT_EQ(OB_SUCCESS, row_generate_.init(table_schema_, &allocator_)); + + insert_data(writer); + + ASSERT_EQ(OB_SUCCESS, writer.close()); + // data write ctx has been moved to root_index_builder + ASSERT_EQ(writer.get_macro_block_write_ctx().get_macro_block_count(), 0); + data_macro_block_cnt_ = root_index_builder_->roots_[0]->macro_metas_->count(); + ASSERT_GE(data_macro_block_cnt_, 0); + + int64_t column_cnt = 0; + ObTabletID tablet_id(TestIndexBlockDataPrepare::tablet_id_); + ObLSID ls_id(ls_id_); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ObStorageSchema *storage_schema = nullptr; + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); + ASSERT_EQ(OB_SUCCESS, tablet_handle.get_obj()->load_storage_schema(allocator_, storage_schema)); + ASSERT_EQ(OB_SUCCESS, storage_schema->get_stored_column_count_in_sstable(column_cnt)); + close_builder_and_prepare_sstable(column_cnt); + prepare_ddl_kv(); + ObTabletObjLoadHelper::free(allocator_, storage_schema); +} + +void TestIndexBlockDataPrepare::prepare_merge_ddl_kvs() +{ + ddl_kvs_.reset(); + ObTabletHandle tablet_handle; + int ret = OB_SUCCESS; + ObLSID ls_id(ls_id_); + ObTabletID tablet_id(tablet_id_); + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle)); + share::SCN ddl_start_scn; + ddl_start_scn.convert_from_ts(ObTimeUtility::current_time()); + ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr *); + ASSERT_NE(t3m, nullptr); + ASSERT_EQ(OB_SUCCESS, t3m->acquire_ddl_kv(ddl_kvs_)); + ASSERT_EQ(OB_SUCCESS, ddl_kvs_.get_obj()->init(ls_id, tablet_id, ddl_start_scn, sstable_.get_data_version(), ddl_start_scn, 4000)); + + ObITable::TableKey ddl_key = sstable_.get_key(); + ddl_key.table_type_ = ObITable::TableType::MAJOR_SSTABLE; + for (int64_t i = 0; i < DDL_KVS_CNT; ++i) { + void *buf = allocator_.alloc(sizeof(ObDDLMemtable)); + ASSERT_NE(nullptr, buf); + ObDDLMemtable *new_ddl_table = new (buf) ObDDLMemtable; + ASSERT_EQ(OB_SUCCESS, new_ddl_table->init(*tablet_handle.get_obj(), ddl_key, ddl_start_scn, 4000)); + ASSERT_EQ(OB_SUCCESS, ddl_kvs_.get_obj()->get_ddl_memtables().push_back(new_ddl_table)); + } + ObDDLKVHandle kv_handle; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ASSERT_EQ(OB_SUCCESS, tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle, true /*CREATE*/)); + ddl_kv_mgr_handle.get_obj()->set_ddl_kv(0, ddl_kvs_); + ddl_kv_mgr_handle.get_obj()->freeze_ddl_kv(ddl_start_scn, sstable_.get_data_version(), 4000, ddl_start_scn); + SMART_VAR(ObSSTableSecMetaIterator, meta_iter) { + ObDatumRange query_range; + query_range.set_whole_range(); + ObDataMacroBlockMeta data_macro_meta; + ASSERT_EQ(OB_SUCCESS, meta_iter.open(query_range, + ObMacroBlockMetaType::DATA_BLOCK_META, + sstable_, + tablet_handle.get_obj()->get_rowkey_read_info(), + allocator_)); + int64_t macro_idx = 0; + int64_t kv_idx = 0; + while (OB_SUCC(ret)) { + if (OB_FAIL(meta_iter.get_next(data_macro_meta))) { + if (OB_ITER_END != ret) { + STORAGE_LOG(WARN, "get data macro meta failed", K(ret)); + } + } else { + STORAGE_LOG(INFO, "data_macro_meta_key", K(data_macro_meta)); + ++macro_idx; + ObDDLMacroHandle macro_handle; + macro_handle.set_block_id(data_macro_meta.get_macro_id()); + if (macro_idx > partial_kv_start_idx_) { + ObDataMacroBlockMeta *copied_meta = nullptr; + ASSERT_EQ(OB_SUCCESS, data_macro_meta.deep_copy(copied_meta, allocator_)); + ASSERT_EQ(OB_SUCCESS, ddl_kvs_.get_obj()->get_ddl_memtables().at(kv_idx)->insert_block_meta_tree(macro_handle, copied_meta)); + ++kv_idx; + } + } + } + ASSERT_EQ(OB_ITER_END, ret); + } +} + } // namespace blocksstable } // namespace oceanbase diff --git a/mittest/mtlenv/storage/blocksstable/test_ddl_merge_row_multi_scanner.cpp b/mittest/mtlenv/storage/blocksstable/test_ddl_merge_row_multi_scanner.cpp new file mode 100644 index 000000000..6e145c325 --- /dev/null +++ b/mittest/mtlenv/storage/blocksstable/test_ddl_merge_row_multi_scanner.cpp @@ -0,0 +1,772 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define private public +#define protected public + +#include "storage/access/ob_index_tree_prefetcher.h" +#include "storage/access/ob_sstable_row_multi_scanner.h" +#include "storage/access/ob_sstable_row_multi_getter.h" +#include "ob_index_block_data_prepare.h" + +namespace oceanbase +{ +using namespace storage; +using namespace common; +namespace blocksstable +{ +class TestDDLMergeRowMultiScanner : public TestIndexBlockDataPrepare +{ +public: + TestDDLMergeRowMultiScanner(); + virtual ~TestDDLMergeRowMultiScanner(); + static void SetUpTestCase(); + static void TearDownTestCase(); + + virtual void SetUp(); + virtual void TearDown(); +protected: + void generate_range(const int64_t start, const int64_t end, ObDatumRange &range); +public: + void test_one_case( + const ObIArray &start_seeds, + const int64_t count_per_range, + const bool is_reverse_scan); + void test_single_get_normal(const bool is_reverse_scan); + void test_single_get_border(const bool is_reverse_scan); + void test_multi_get_normal(const bool is_reverse_scan); + void test_multi_get_border(const bool is_reverse_scan); + void test_single_scan_normal(const bool is_reverse_scan); + void test_single_scan_border(const bool is_reverse_scan); + void test_multi_scan_multi_scan_range( + const bool is_reverse_scan, + const int64_t count_per_range); + void test_multi_scan_multi_get_with_scan( + const bool is_reverse_scan, + const int64_t count_per_range); + +protected: + static const int64_t TEST_MULTI_GET_CNT = 100; + enum CacheHitMode + { + HIT_ALL = 0, + HIT_NONE, + HIT_PART, + HIT_MAX, + }; +private: + ObArenaAllocator allocator_; + ObDatumRow start_row_; + ObDatumRow end_row_; +}; + +TestDDLMergeRowMultiScanner::TestDDLMergeRowMultiScanner() + : TestIndexBlockDataPrepare("Test sstable row multi scanner") +{ + is_ddl_merge_data_ = true; + max_row_cnt_ = 150000; + max_partial_row_cnt_ = 78881; + partial_kv_start_idx_ = 3; +} + +TestDDLMergeRowMultiScanner::~TestDDLMergeRowMultiScanner() +{ + +} + +void TestDDLMergeRowMultiScanner::SetUpTestCase() +{ + TestIndexBlockDataPrepare::SetUpTestCase(); +} + +void TestDDLMergeRowMultiScanner::TearDownTestCase() +{ + TestIndexBlockDataPrepare::TearDownTestCase(); +} + +void TestDDLMergeRowMultiScanner::SetUp() +{ + TestIndexBlockDataPrepare::SetUp(); + ObLSID ls_id(ls_id_); + ObTabletID tablet_id(tablet_id_); + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle_)); + + ASSERT_EQ(OB_SUCCESS, start_row_.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, end_row_.init(allocator_, TEST_COLUMN_CNT)); +} + +void TestDDLMergeRowMultiScanner::TearDown() +{ + tablet_handle_.reset(); + TestIndexBlockDataPrepare::TearDown(); +} + +void TestDDLMergeRowMultiScanner::generate_range( + const int64_t start, + const int64_t end, + ObDatumRange &range) +{ + ObDatumRowkey tmp_rowkey; + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(start, start_row_)); + tmp_rowkey.assign(start_row_.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(range.start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(end, end_row_)); + tmp_rowkey.assign(end_row_.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(range.end_key_, allocator_)); + range.border_flag_.set_inclusive_start(); + range.border_flag_.set_inclusive_end(); +} + +void TestDDLMergeRowMultiScanner::test_one_case( + const ObIArray &start_seeds, + const int64_t count_per_range, + const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObDatumRange mscan_ranges[TEST_MULTI_GET_CNT]; + ObSSTableRowMultiScanner<> scanner; + ObSSTableRowMultiScanner<> merge_ddl_scanner; + ObSEArray ranges; + const ObDatumRow *prow = NULL; + const ObDatumRow *kv_prow = NULL; + + ObDatumRow start_row; + ObDatumRow end_row; + ObDatumRow check_row; + ASSERT_EQ(OB_SUCCESS, start_row.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, end_row.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, check_row.init(allocator_, TEST_COLUMN_CNT)); + for (int64_t i = 0; i < start_seeds.count(); ++i) { + ObDatumRowkey tmp_rowkey; + mscan_ranges[i].border_flag_.set_inclusive_start(); + mscan_ranges[i].border_flag_.set_inclusive_end(); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(start_seeds.at(i), start_row)); + tmp_rowkey.assign(start_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mscan_ranges[i].start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(start_seeds.at(i) + count_per_range - 1, end_row)); + tmp_rowkey.assign(end_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mscan_ranges[i].end_key_, allocator_)); + } + for (int64_t i = 0; i < start_seeds.count(); ++i) { + ASSERT_EQ(OB_SUCCESS, ranges.push_back(mscan_ranges[i])); + } + ASSERT_EQ(OB_SUCCESS, scanner.inner_open(iter_param_, context_, &sstable_, &ranges)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open(iter_param_, context_, &partial_sstable_, &ranges)); + for (int64_t i = 0; i < start_seeds.count(); ++i) { + for (int64_t j = 0; j < count_per_range; ++j) { + const int64_t k = is_reverse_scan ? start_seeds.at(i) + count_per_range - j - 1 : start_seeds.at(i) + j; + if (k < row_cnt_ || count_per_range == 1) { + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(k, check_row)); + if (k < row_cnt_) { + ASSERT_EQ(OB_SUCCESS, scanner.inner_get_next_row(prow)); + ASSERT_TRUE(*prow == check_row); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_get_next_row(kv_prow)); + ASSERT_TRUE(*kv_prow == check_row); + } + } + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + scanner.reuse(); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + merge_ddl_scanner.reuse(); +} + +void TestDDLMergeRowMultiScanner::test_single_get_normal(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObArray seeds; + // prepare query param and context + prepare_query_param(is_reverse_scan); + + // row in first macro + ret = seeds.push_back(3); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // row in middle macro + seeds.reset(); + seeds.push_back(row_cnt_ / 2); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // row in last macro, in cache + seeds.reset(); + seeds.push_back(row_cnt_ - 3); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + destroy_query_param(); +} + +void TestDDLMergeRowMultiScanner::test_single_get_border(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObArray seeds; + // prepare query param and context + prepare_query_param(is_reverse_scan); + + // left border rowkey + ret = seeds.push_back(0); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // right border rowkey + seeds.reset(); + ret = seeds.push_back(row_cnt_ - 1); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // not exist + seeds.reset(); + ret = seeds.push_back(row_cnt_); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + destroy_query_param(); +} + +void TestDDLMergeRowMultiScanner::test_multi_get_normal(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObArray seeds; + + // prepare query param + prepare_query_param(is_reverse_scan); + + // 2 rows exist + seeds.reuse(); + for (int64_t i = 0; i < 2; ++i) { + ret = seeds.push_back(i * 11 + 2); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, 1, is_reverse_scan); + + // TEST_MULTI_GET_CNT rows exist + seeds.reuse(); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ret = seeds.push_back(i); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, 1, is_reverse_scan); + + // 2 row2 not exist + seeds.reuse(); + for (int64_t i = 0; i < 2; ++i) { + ret = seeds.push_back(i * 11 + 2 + row_cnt_); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, 1, is_reverse_scan); + + // TEST_MULTI_GET_CNT rows not exist + seeds.reuse(); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ret = seeds.push_back(i * 11 + 2 + row_cnt_); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, 1, is_reverse_scan); + + // some row exist, while other rows not exist + seeds.reuse(); + for (int64_t i = 0; i < 10; ++i) { + ret = seeds.push_back(i + (i % 2 ? row_cnt_ : 0)); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, 1, is_reverse_scan); + + destroy_query_param(); +} + +void TestDDLMergeRowMultiScanner::test_multi_get_border(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObArray seeds; + // prepare query param + prepare_query_param(is_reverse_scan); + + // first row of sstable + ret = seeds.push_back(0); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, false); + + // last row of sstable + seeds.reset(); + ret = seeds.push_back(row_cnt_ - 1); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, false); + + // 100 rows with same rowkey + seeds.reset(); + for (int64_t i = 0; i < 100; ++i) { + ret = seeds.push_back(row_cnt_ / 2); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, 1, is_reverse_scan); + destroy_query_param(); +} + +void TestDDLMergeRowMultiScanner::test_single_scan_normal(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObArray seeds; + ObRandom random; + const int64_t random_start = random.get(0, 10000000) % row_cnt_; + const int64_t random_end = random.get(0, 100000000) % row_cnt_; + const int64_t start = std::min(random_start, random_end); + const int64_t end = std::max(random_start, random_end); + + // prepare query param + prepare_query_param(is_reverse_scan); + + // multiple rows exist + ret = seeds.push_back(start); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, end - start, is_reverse_scan); + + // multiple rows, partial exist + seeds.reset(); + ret = seeds.push_back(start); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, row_cnt_ + 10 - start, is_reverse_scan); + + // single row exist + seeds.reset(); + ret = seeds.push_back(row_cnt_ / 2); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // not exist + seeds.reset(); + ret = seeds.push_back(row_cnt_ + 10); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 10, is_reverse_scan); + destroy_query_param(); +} + +void TestDDLMergeRowMultiScanner::test_single_scan_border(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObArray seeds; + // prepare query param + prepare_query_param(is_reverse_scan); + + // full table scan + ret = seeds.push_back(0); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, row_cnt_, is_reverse_scan); + + // first row of sstable + seeds.reset(); + ret = seeds.push_back(0); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // last row of sstable + seeds.reset(); + ret = seeds.push_back(row_cnt_ - 1); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + // not exist + seeds.reset(); + ret = seeds.push_back(row_cnt_); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + destroy_query_param(); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_single_get_normal) +{ + bool is_reverse_scan = false; + test_single_get_normal(is_reverse_scan); + is_reverse_scan = true; + test_single_get_normal(is_reverse_scan); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_single_get_border) +{ + bool is_reverse_scan = false; + test_single_get_border(is_reverse_scan); + is_reverse_scan = true; + test_single_get_border(is_reverse_scan); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_multi_get_normal) +{ + bool is_reverse_scan = false; + test_multi_get_normal(is_reverse_scan); + is_reverse_scan = true; + test_multi_get_normal(is_reverse_scan); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_multi_get_border) +{ + bool is_reverse_scan = false; + test_multi_get_border(is_reverse_scan); + is_reverse_scan = true; + test_multi_get_border(is_reverse_scan); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_single_scan_normal) +{ + bool is_reverse_scan = false; + test_single_scan_normal(is_reverse_scan); + is_reverse_scan = true; + test_single_scan_normal(is_reverse_scan); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_single_scan_border) +{ + bool is_reverse_scan = false; + test_single_scan_border(is_reverse_scan); + is_reverse_scan = true; + test_single_scan_border(is_reverse_scan); +} + +void TestDDLMergeRowMultiScanner::test_multi_scan_multi_scan_range( + const bool is_reverse_scan, + const int64_t count_per_range) +{ + int ret = OB_SUCCESS; + ObStoreRange range; + ObArray ranges; + ObStoreRow row; + ObStoreRowkey rowkey; + ObArray seeds; + ObSSTableRowMultiScanner<> scanner; + + // prepare query param + prepare_query_param(is_reverse_scan); + + //left border rowkey + seeds.reset(); + ret = seeds.push_back(0); + ASSERT_EQ(OB_SUCCESS, seeds.push_back(0)); + test_one_case(seeds, 1, is_reverse_scan); + + //right border rowkey + seeds.reset(); + ret = seeds.push_back(row_cnt_ - 1); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + //not exist rowkey + seeds.reset(); + ret = seeds.push_back(row_cnt_); + ASSERT_EQ(OB_SUCCESS, ret); + test_one_case(seeds, 1, is_reverse_scan); + + //20 exist + seeds.reset(); + for (int64_t i = 0; i < 50; ++i) { + ret = seeds.push_back(i); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, count_per_range, is_reverse_scan); + + //20 dup exist + seeds.reset(); + for (int64_t i = 0; i < 20; ++i) { + ret = seeds.push_back(0); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, count_per_range, is_reverse_scan); + + // 20 not exist + seeds.reset(); + for (int64_t i = 0; i < 20; ++i) { + ret = seeds.push_back(row_cnt_ + i); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, count_per_range, is_reverse_scan); + + // partial not exist + seeds.reset(); + for (int64_t i = 0; i < 20; ++i) { + ret = seeds.push_back(i % 2 ? i : i + row_cnt_); + ASSERT_EQ(OB_SUCCESS, ret); + } + test_one_case(seeds, count_per_range, is_reverse_scan); + destroy_query_param(); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_multi_scan) +{ + bool is_reverse_scan = false; + for (int64_t i = 2; i < 20; i += 10) { + test_multi_scan_multi_scan_range(is_reverse_scan, i); + } + is_reverse_scan = true; + for (int64_t i = 2; i < 20; i += 10) { + test_multi_scan_multi_scan_range(is_reverse_scan, i); + } +} + +void TestDDLMergeRowMultiScanner::test_multi_scan_multi_get_with_scan( + const bool is_reverse_scan, + const int64_t count_per_range) +{ + int ret = OB_SUCCESS; + ObDatumRange range; + ObArray ranges; + ObDatumRow row; + ObDatumRowkey rowkey; + const ObDatumRow *prow = NULL; + const ObDatumRow *kv_prow = NULL; + int64_t row_cnt = 0; + ObSSTableRowMultiScanner<> scanner; + ObSSTableRowMultiScanner<> merge_ddl_scanner; + + // prepare query param + prepare_query_param(is_reverse_scan); + + // multi scan interact with multi get + ObDatumRange mget_ranges[TEST_MULTI_GET_CNT]; + ObDatumRow start_row; + ObDatumRow end_row; + ObDatumRow check_row; + ASSERT_EQ(OB_SUCCESS, start_row.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, end_row.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, check_row.init(allocator_, TEST_COLUMN_CNT)); + ranges.reuse(); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ObDatumRowkey tmp_rowkey; + mget_ranges[i].border_flag_.set_inclusive_start(); + mget_ranges[i].border_flag_.set_inclusive_end(); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, start_row)); + tmp_rowkey.assign(start_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i + (i % 2 ? count_per_range - 1 : 0), end_row)); + tmp_rowkey.assign(end_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].end_key_, allocator_)); + } + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ret = ranges.push_back(mget_ranges[i]); + ASSERT_EQ(OB_SUCCESS, ret); + } + STORAGE_LOG(INFO, "multi scan begin"); + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &ranges)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &ranges)); + row_cnt = 0; + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + const int64_t p = i; + if (p % 2) { + for (int64_t j = 0; j < count_per_range; ++j) { + const int64_t k = is_reverse_scan ? i + count_per_range - j - 1 : i + j; + ASSERT_EQ(OB_SUCCESS, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_get_next_row(kv_prow)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(k, check_row)); + ++row_cnt; + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } else { + ASSERT_EQ(OB_SUCCESS, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_get_next_row(kv_prow)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(p, check_row)); + ++row_cnt; + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); + + // first half multi scan, second half multi get + ranges.reuse(); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ObDatumRowkey tmp_rowkey; + mget_ranges[i].border_flag_.set_inclusive_start(); + mget_ranges[i].border_flag_.set_inclusive_end(); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, start_row)); + tmp_rowkey.assign(start_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i + (i < TEST_MULTI_GET_CNT / 2 ? count_per_range - 1 : 0), end_row)); + tmp_rowkey.assign(end_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].end_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, ranges.push_back(mget_ranges[i])); + } + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &ranges)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &ranges)); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + const int64_t p = i; + if (p < TEST_MULTI_GET_CNT / 2) { + for (int64_t j = 0; j < count_per_range; ++j) { + const int64_t k = is_reverse_scan ? i + count_per_range - j - 1 : i + j; + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = row_generate_.get_next_row(k, check_row); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } else { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = row_generate_.get_next_row(p, check_row); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); + + // first one multi get, others multi scan + ranges.reuse(); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ObDatumRowkey tmp_rowkey; + mget_ranges[i].border_flag_.set_inclusive_start(); + mget_ranges[i].border_flag_.set_inclusive_end(); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i, start_row)); + tmp_rowkey.assign(start_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i + (i != 0 ? count_per_range - 1 : 0), end_row)); + tmp_rowkey.assign(end_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].end_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, ranges.push_back(mget_ranges[i])); + } + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &ranges)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &ranges)); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + const int64_t p = i; + if (p != 0) { + for (int64_t j = 0; j < count_per_range; ++j) { + const int64_t k = is_reverse_scan ? i + count_per_range - j - 1 : i + j; + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = row_generate_.get_next_row(k, check_row); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } else { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = row_generate_.get_next_row(p, check_row); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); + + // multi scan not exist row + STORAGE_LOG(DEBUG, "multi_scan_not_exist_row"); + ranges.reuse(); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + ObDatumRowkey tmp_rowkey; + mget_ranges[i].border_flag_.set_inclusive_start(); + mget_ranges[i].border_flag_.set_inclusive_end(); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i + (i % 2 ? row_cnt_ : 0), start_row)); + tmp_rowkey.assign(start_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(i + (i % 2 ? row_cnt_ + count_per_range - 1 : 0), end_row)); + tmp_rowkey.assign(end_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(mget_ranges[i].end_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, ranges.push_back(mget_ranges[i])); + } + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &ranges)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &ranges)); + for (int64_t i = 0; i < TEST_MULTI_GET_CNT; ++i) { + const int64_t p = i; + if (p % 2) { + continue; + } else { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret); + ret = row_generate_.get_next_row(p, check_row); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(*prow == check_row); + ASSERT_TRUE(*kv_prow == check_row); + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reset(); + destroy_query_param(); +} + +TEST_F(TestDDLMergeRowMultiScanner, test_multi_get_with_scan) +{ + bool is_reverse_scan = false; + for (int64_t i = 2; i < 10; i += 10) { + test_multi_scan_multi_get_with_scan(is_reverse_scan, i); + } + is_reverse_scan = true; + for (int64_t i = 2; i < 10; i += 10) { + test_multi_scan_multi_get_with_scan(is_reverse_scan, i); + } +} + +} +} + +int main(int argc, char **argv) +{ + system("rm -f test_ddl_merge_row_multi_scanner.log*"); + OB_LOGGER.set_file_name("test_ddl_merge_row_multi_scanner.log", true, true); + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/mittest/mtlenv/storage/blocksstable/test_ddl_merge_row_scanner.cpp b/mittest/mtlenv/storage/blocksstable/test_ddl_merge_row_scanner.cpp new file mode 100644 index 000000000..9177b394f --- /dev/null +++ b/mittest/mtlenv/storage/blocksstable/test_ddl_merge_row_scanner.cpp @@ -0,0 +1,537 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define private public +#define protected public + +#include "lib/random/ob_random.h" +#include "storage/access/ob_index_tree_prefetcher.h" +#include "storage/access/ob_sstable_row_getter.h" +#include "storage/access/ob_sstable_row_scanner.h" +#include "ob_index_block_data_prepare.h" + +namespace oceanbase +{ +using namespace storage; +using namespace common; +namespace blocksstable +{ +class TestDDLMergeRowScanner : public TestIndexBlockDataPrepare +{ +public: + TestDDLMergeRowScanner(); + virtual ~TestDDLMergeRowScanner(); + static void SetUpTestCase(); + static void TearDownTestCase(); + + virtual void SetUp(); + virtual void TearDown(); + + void generate_range(const int64_t start, const int64_t end, ObDatumRange &range); + void test_one_rowkey(const int64_t seed); //get + void test_single_case( + const ObDatumRange &range, + const int64_t start, + const int64_t end, + const bool is_reverse_scan); + void test_full_case( + const ObDatumRange &range, + const int64_t start, + const int64_t end, + const bool is_reverse_scan, + const int64_t hit_mode); + void test_border(const bool is_reverse_scan); + void test_basic(const bool is_reverse_scan); + +protected: + enum CacheHitMode + { + HIT_ALL = 0, + HIT_NONE, + HIT_PART, + HIT_MAX, + }; +private: + ObArenaAllocator allocator_; + ObDatumRow start_row_; + ObDatumRow end_row_; +}; + +TestDDLMergeRowScanner::TestDDLMergeRowScanner() + : TestIndexBlockDataPrepare("Test DDL multi row scanner") +{ + is_ddl_merge_data_ = true; + max_row_cnt_ = 150000; + max_partial_row_cnt_ = 78881; + partial_kv_start_idx_ = 3; +} + +TestDDLMergeRowScanner::~TestDDLMergeRowScanner() +{ + +} + +void TestDDLMergeRowScanner::SetUpTestCase() +{ + TestIndexBlockDataPrepare::SetUpTestCase(); +} + +void TestDDLMergeRowScanner::TearDownTestCase() +{ + TestIndexBlockDataPrepare::TearDownTestCase(); +} + +void TestDDLMergeRowScanner::SetUp() +{ + TestIndexBlockDataPrepare::SetUp(); + ASSERT_EQ(OB_SUCCESS, start_row_.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, end_row_.init(allocator_, TEST_COLUMN_CNT)); + ObLSID ls_id(ls_id_); + ObTabletID tablet_id(tablet_id_); + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle_)); +} + +void TestDDLMergeRowScanner::TearDown() +{ + tablet_handle_.reset(); + TestIndexBlockDataPrepare::TearDown(); +} + +void TestDDLMergeRowScanner::generate_range( + const int64_t start, + const int64_t end, + ObDatumRange &range) +{ + ObDatumRowkey tmp_rowkey; + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(start, start_row_)); + tmp_rowkey.assign(start_row_.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(range.start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(end, end_row_)); + tmp_rowkey.assign(end_row_.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(range.end_key_, allocator_)); + range.border_flag_.set_inclusive_start(); + range.border_flag_.set_inclusive_end(); +} + +void TestDDLMergeRowScanner::test_one_rowkey(const int64_t seed) +{ + ObSSTableRowGetter getter; + ObSSTableRowGetter merge_ddl_getter; + ObDatumRow query_row; + ASSERT_EQ(OB_SUCCESS, query_row.init(allocator_, TEST_COLUMN_CNT)); + row_generate_.get_next_row(seed, query_row); + ObDatumRowkey query_rowkey; + query_rowkey.assign(query_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + STORAGE_LOG(INFO, "Query rowkey", K(query_row)); + ASSERT_EQ(OB_SUCCESS, getter.inner_open(iter_param_, context_, &sstable_, &query_rowkey)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_getter.inner_open(iter_param_, context_, &partial_sstable_, &query_rowkey)); + + const ObDatumRow *prow = nullptr; + const ObDatumRow *kv_prow = nullptr; + ASSERT_EQ(OB_SUCCESS, getter.inner_get_next_row(prow)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_getter.inner_get_next_row(kv_prow)); + STORAGE_LOG(INFO, "debug datum row1", KPC(prow), KPC(kv_prow)); + if (seed >= row_cnt_) { + ASSERT_TRUE(prow->row_flag_.is_not_exist()); + ASSERT_TRUE(kv_prow->row_flag_.is_not_exist()); + } else { + ASSERT_TRUE(*prow == query_row); + if (!(*kv_prow == query_row)) { + STORAGE_LOG(INFO, "CHECK UNEUQAL", K(query_row), K(*kv_prow)); + } + ASSERT_TRUE(*kv_prow == query_row); + } + ASSERT_EQ(OB_ITER_END, getter.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_getter.inner_get_next_row(kv_prow)); + getter.reuse(); + merge_ddl_getter.reuse(); +} + +void TestDDLMergeRowScanner::test_single_case( + const ObDatumRange &range, + const int64_t start, + const int64_t end, + const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObDatumRow row; + ASSERT_EQ(OB_SUCCESS, row.init(allocator_, TEST_COLUMN_CNT)); + const ObDatumRow *prow = nullptr; + const ObDatumRow *kv_prow = nullptr; + ObSSTableRowScanner<> scanner; + ObSSTableRowScanner<> merge_ddl_scanner; + + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &range)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &range)); + for (int64_t i = start; i <= end; ++i) { + int64_t index = 0; + if (is_reverse_scan) { + ret = row_generate_.get_next_row(end - i + start, row); + index = end - i + start; + } else { + ret = row_generate_.get_next_row(i, row); + index = i; + } + + ASSERT_EQ(OB_SUCCESS, ret); + if (index < row_cnt_) { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret) << i << "index: " << index << " start: " << start + << " end: " << end << " prow: " << prow; + ASSERT_TRUE(row == *prow) << i << "index: " << index << " start: " << start + << " end: " << end << " prow: " << prow; + + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret) << i << "index: " << index << " start: " << start + << " end: " << end << " kv_prow: " << kv_prow; + ASSERT_TRUE(row == *kv_prow) << i << "index: " << index << " start: " << start + << " end: " << end << " kv_prow: " << kv_prow; + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); +} + +void TestDDLMergeRowScanner::test_full_case( + const ObDatumRange &range, + const int64_t start, + const int64_t end, + const bool is_reverse_scan, + const int64_t hit_mode) +{ + int ret = OB_SUCCESS; + ObDatumRow row; + ASSERT_EQ(OB_SUCCESS, row.init(allocator_, TEST_COLUMN_CNT)); + const ObDatumRow *prow = nullptr; + const ObDatumRow *kv_prow = nullptr; + ObSSTableRowScanner<> scanner; + ObSSTableRowScanner<> merge_ddl_scanner; + + if (HIT_PART == hit_mode) { + const int64_t part_start = start + (end - start) / 3; + const int64_t part_end = end - (end - start) / 3; + ObDatumRange part_range; + ObDatumRow start_row; + ObDatumRow end_row; + ObDatumRowkey tmp_rowkey; + ASSERT_EQ(OB_SUCCESS, start_row.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, end_row.init(allocator_, TEST_COLUMN_CNT)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(part_start, start_row)); + tmp_rowkey.assign(start_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(part_range.start_key_, allocator_)); + ASSERT_EQ(OB_SUCCESS, row_generate_.get_next_row(part_end, end_row)); + tmp_rowkey.assign(end_row.storage_datums_, TEST_ROWKEY_COLUMN_CNT); + ASSERT_EQ(OB_SUCCESS, tmp_rowkey.deep_copy(part_range.end_key_, allocator_)); + part_range.border_flag_.set_inclusive_start(); + part_range.border_flag_.set_inclusive_end(); + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &part_range)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &part_range)); + for (int64_t i = part_start; i <= part_end; ++i) { + if (i < row_cnt_) { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret) << "i: " << i << " part_start: " << part_start + << " part_end: " << part_end << " prow: " << prow; + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret) << "i: " << i << " part_start: " << part_start + << " part_end: " << part_end << " kv_prow: " << kv_prow; + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); + } + + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &range)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &range)); + for (int64_t i = start; i <= end; ++i) { + int64_t index = 0; + if (is_reverse_scan) { + ret = row_generate_.get_next_row(end - i + start, row); + index = end - i + start; + } else { + ret = row_generate_.get_next_row(i, row); + index = i; + } + ASSERT_EQ(OB_SUCCESS, ret); + if (index < row_cnt_) { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret) << i << "index: " << index << " start: " << start + << " end: " << end << " prow: " << prow; + ASSERT_TRUE(row == *prow) << i << "index: " << index << " start: " << start + << " end: " << end << " prow: " << prow; + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret) << i << "index: " << index << " start: " << start + << " end: " << end << " kv_prow: " << kv_prow; + ASSERT_TRUE(row == *kv_prow) << i << "index: " << index << " start: " << start + << " end: " << end << " kv_prow: " << kv_prow; + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); + + if (HIT_ALL == hit_mode) { + int64_t index = 0; + ASSERT_EQ(OB_SUCCESS, scanner.inner_open( + iter_param_, + context_, + &sstable_, + &range)); + ASSERT_EQ(OB_SUCCESS, merge_ddl_scanner.inner_open( + iter_param_, + context_, + &partial_sstable_, + &range)); + for (int64_t i = start; i <= end; ++i) { + if (is_reverse_scan) { + ret = row_generate_.get_next_row(end - i + start, row); + index = end - i + start; + } else { + ret = row_generate_.get_next_row(i, row); + index = i; + } + ASSERT_EQ(OB_SUCCESS, ret); + if (index < row_cnt_) { + ret = scanner.inner_get_next_row(prow); + ASSERT_EQ(OB_SUCCESS, ret) << i << "index: " << index << " start: " << start + << " end: " << end << " prow: " << prow; + ASSERT_TRUE(row == *prow) << i << "index: " << index << " start: " << start + << " end: " << end << " prow: " << prow; + ret = merge_ddl_scanner.inner_get_next_row(kv_prow); + ASSERT_EQ(OB_SUCCESS, ret) << i << "index: " << index << " start: " << start + << " end: " << end << " kv_prow: " << kv_prow; + ASSERT_TRUE(row == *kv_prow) << i << "index: " << index << " start: " << start + << " end: " << end << " kv_prow: " << kv_prow; + } + } + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, scanner.inner_get_next_row(prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + ASSERT_EQ(OB_ITER_END, merge_ddl_scanner.inner_get_next_row(kv_prow)); + scanner.reuse(); + merge_ddl_scanner.reuse(); + } +} + +void TestDDLMergeRowScanner::test_border(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObDatumRange range; + + // prepare query param + prepare_query_param(is_reverse_scan); + + // full table scan + range.set_whole_range(); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, 0, row_cnt_ - 1, is_reverse_scan, i); + } + + // the first row of sstable + generate_range(0, 0, range); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, 0, 0, is_reverse_scan, i); + } + + // the first 100 row of sstable + generate_range(0, 100, range); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, 0, 100, is_reverse_scan, i); + } + + // the last row of sstable + generate_range(row_cnt_ - 1, row_cnt_ - 1, range); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, row_cnt_ - 1, row_cnt_ - 1, is_reverse_scan, i); + } + + // the last 100 row of sstable + generate_range(row_cnt_ - 100, row_cnt_ - 1, range); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, row_cnt_ - 100, row_cnt_ - 1, is_reverse_scan, i); + } + + // not exist + generate_range(row_cnt_, row_cnt_, range); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, row_cnt_, row_cnt_, is_reverse_scan, i); + } + + destroy_query_param(); +} + +void TestDDLMergeRowScanner::test_basic(const bool is_reverse_scan) +{ + int ret = OB_SUCCESS; + ObDatumRange range; + + prepare_query_param(is_reverse_scan); + // full table scan + range.set_whole_range(); + test_single_case(range, 0, row_cnt_ - 1, is_reverse_scan); + + // the first row of sstable + generate_range(0, 0, range); + test_single_case(range, 0, 0, is_reverse_scan); + + // the first 100 row of sstable + generate_range(0, 100, range); + test_single_case(range, 0, 100, is_reverse_scan); + + // the last row of sstable + generate_range(row_cnt_ - 1, row_cnt_ - 1, range); + test_single_case(range, row_cnt_ - 1, row_cnt_ - 1, is_reverse_scan); + + // the last 100 row of sstable + generate_range(row_cnt_ - 100, row_cnt_ - 1, range); + test_single_case(range, row_cnt_ - 100, row_cnt_ - 1, is_reverse_scan); + + // not exist + generate_range(row_cnt_, row_cnt_, range); + test_single_case(range, row_cnt_, row_cnt_, is_reverse_scan); + destroy_query_param(); +} + +TEST_F(TestDDLMergeRowScanner, test_get) +{ + // forward get + prepare_query_param(false); + // left border rowkey + test_one_rowkey(0); + // end_key of left border + test_one_rowkey(15933); + // first_key of right border + test_one_rowkey(132361); + // end_key of right border + test_one_rowkey(row_cnt_ - 1); + // mid border rowkey + test_one_rowkey(row_cnt_ / 2); + // non-exist rowkey + test_one_rowkey(row_cnt_); + destroy_query_param(); + + // reverse get + prepare_query_param(true); + // left border rowkey + test_one_rowkey(0); + // end_key of left border + test_one_rowkey(15933); + // first_key of right border + test_one_rowkey(132361); + // end_key of right border + test_one_rowkey(row_cnt_ - 1); + // mid border rowkey + test_one_rowkey(row_cnt_ / 2); + // non-exist rowkey + test_one_rowkey(row_cnt_); + destroy_query_param(); +} + +TEST_F(TestDDLMergeRowScanner, test_basic_scan) +{ + bool is_reverse_scan = false; + test_basic(is_reverse_scan); + is_reverse_scan = true; + test_basic(is_reverse_scan); + STORAGE_LOG(INFO, "memory usage", K(lib::get_memory_hold()), K(lib::get_memory_limit())); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(1); +} + +TEST_F(TestDDLMergeRowScanner, test_border_scan) +{ + bool is_reverse_scan = false; + test_border(is_reverse_scan); + is_reverse_scan = true; + test_border(is_reverse_scan); + STORAGE_LOG(INFO, "memory usage", K(lib::get_memory_hold()), K(lib::get_memory_limit())); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(1); +} + +TEST_F(TestDDLMergeRowScanner, test_random_scan) +{ + ObDatumRange range; + int64_t start = ObRandom::rand(0, row_cnt_ - 1); + int64_t end = ObRandom::rand(0, row_cnt_ - 1); + if (start > end) { + int64_t temp = start; + start = end; + end = temp; + } + generate_range(start, end, range); + + bool is_reverse_scan = false; + // prepare query param + prepare_query_param(is_reverse_scan); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, start, end, is_reverse_scan, i); + } + destroy_query_param(); + + is_reverse_scan = true; + // prepare query param + prepare_query_param(is_reverse_scan); + for (int64_t i = HIT_ALL; i < HIT_MAX; ++i) { + test_full_case(range, start, end, is_reverse_scan, i); + } + destroy_query_param(); +} + +} +} + +int main(int argc, char **argv) +{ + system("rm -f test_ddl_merge_row_scanner.log*"); + OB_LOGGER.set_file_name("test_ddl_merge_row_scanner.log", true, true); + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/mittest/mtlenv/storage/blocksstable/test_direct_load.cpp b/mittest/mtlenv/storage/blocksstable/test_direct_load.cpp new file mode 100644 index 000000000..69ddccead --- /dev/null +++ b/mittest/mtlenv/storage/blocksstable/test_direct_load.cpp @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define private public +#define protected public + +#include "ob_index_block_data_prepare.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" + +namespace oceanbase +{ +using namespace lib; +using namespace common; +using namespace share; +using namespace storage; +using namespace blocksstable; + +class TestDirectLoad : public TestIndexBlockDataPrepare +{ +public: + TestDirectLoad() : TestIndexBlockDataPrepare("Test direct load") {} + virtual ~TestDirectLoad() {} + static void SetUpTestCase() { TestIndexBlockDataPrepare::SetUpTestCase(); } + static void TearDownTestCase() { TestIndexBlockDataPrepare::TearDownTestCase(); } + + virtual void SetUp(); + virtual void TearDown(); +}; + +void TestDirectLoad::SetUp() +{ + TestIndexBlockDataPrepare::SetUp(); + ObLSID ls_id(ls_id_); + ObTabletID tablet_id(tablet_id_); + ObLSHandle ls_handle; + ObLSService *ls_svr = MTL(ObLSService*); + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); + ASSERT_EQ(OB_SUCCESS, ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle_)); + + prepare_query_param(true); +} + +void TestDirectLoad::TearDown() +{ + destroy_query_param(); + tablet_handle_.reset(); + TestIndexBlockDataPrepare::TearDown(); +} + +TEST_F(TestDirectLoad, init_ddl_table_store) +{ + ObTabletFullDirectLoadMgr tablet_dl_mgr; + ObTabletDirectLoadInsertParam build_param; + build_param.common_param_.ls_id_ = ls_id_; + build_param.common_param_.tablet_id_ = tablet_id_; + build_param.common_param_.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_DDL; + build_param.common_param_.read_snapshot_ = SNAPSHOT_VERSION; + build_param.runtime_only_param_.task_cnt_ = 1; + build_param.runtime_only_param_.task_id_ = 1; + build_param.runtime_only_param_.table_id_ = TEST_TABLE_ID; + build_param.runtime_only_param_.schema_version_ = 1; + SCN ddl_start_scn; + ASSERT_EQ(OB_SUCCESS, ddl_start_scn.convert_from_ts(ObTimeUtility::current_time())); + ASSERT_EQ(OB_SUCCESS, tablet_dl_mgr.update(nullptr, build_param)); + tablet_dl_mgr.start_scn_ = ddl_start_scn; + tablet_dl_mgr.data_format_version_ = DATA_VERSION_4_0_0_0; + ASSERT_EQ(OB_SUCCESS, tablet_dl_mgr.init_ddl_table_store(ddl_start_scn, SNAPSHOT_VERSION, ddl_start_scn)); + +} + + +} // namespace oceanbase + + +int main(int argc, char **argv) +{ + system("rm -f test_direct_load.log*"); + OB_LOGGER.set_file_name("test_direct_load.log", true, true); + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/mittest/mtlenv/storage/blocksstable/test_index_block_row_scanner.cpp b/mittest/mtlenv/storage/blocksstable/test_index_block_row_scanner.cpp index d11e672cf..cb85c4953 100755 --- a/mittest/mtlenv/storage/blocksstable/test_index_block_row_scanner.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_index_block_row_scanner.cpp @@ -204,7 +204,6 @@ TEST_F(TestIndexBlockRowScanner, prefetch_and_scan) ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, *raw_block, root_blk_header->rowkey_array_[root_row_id])); - ASSERT_EQ(idx_scanner.current_, raw_idx_scanner.current_); idx_scanner.reuse(); raw_idx_scanner.reuse(); @@ -224,7 +223,6 @@ TEST_F(TestIndexBlockRowScanner, prefetch_and_scan) int tmp_ret = OB_SUCCESS; while (OB_SUCCESS == tmp_ret) { - ASSERT_EQ(idx_scanner.current_, raw_idx_scanner.current_); tmp_ret = idx_scanner.get_next(read_idx_info); if (OB_SUCCESS == tmp_ret) { ASSERT_EQ(tmp_ret, raw_idx_scanner.get_next(raw_read_idx_info)); diff --git a/mittest/mtlenv/storage/blocksstable/test_sstable_row_getter.cpp b/mittest/mtlenv/storage/blocksstable/test_sstable_row_getter.cpp index e0553bd8d..26939b88c 100644 --- a/mittest/mtlenv/storage/blocksstable/test_sstable_row_getter.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_sstable_row_getter.cpp @@ -113,7 +113,13 @@ TEST_F(TestSSTableRowGetter, get) //left border rowkey test_one_rowkey(0); - // right border rowkey + //end_key of left border + test_one_rowkey(15933); + + //first_key of right border + test_one_rowkey(64760); + + // end_key of right border test_one_rowkey(row_cnt_ - 1); // mid border rowkey diff --git a/mittest/mtlenv/storage/test_co_merge.cpp b/mittest/mtlenv/storage/test_co_merge.cpp index d172bafd3..d10e5684b 100644 --- a/mittest/mtlenv/storage/test_co_merge.cpp +++ b/mittest/mtlenv/storage/test_co_merge.cpp @@ -299,6 +299,7 @@ void TestCOMerge::prepare_scan_param( iter_param.vectorized_enabled_ = false; ASSERT_EQ(OB_SUCCESS, store_ctx.init_for_read(ls_id, + iter_param.tablet_id_, INT64_MAX, // query_expire_ts -1, // lock_timeout_us share::SCN::max_scn())); @@ -436,6 +437,7 @@ void TestCOMerge::prepare_query_param(const ObVersionRange &version_range) iter_param_.vectorized_enabled_ = false; ASSERT_EQ(OB_SUCCESS, store_ctx_.init_for_read(ls_id, + iter_param_.tablet_id_, INT64_MAX, // query_expire_ts -1, // lock_timeout_us share::SCN::max_scn())); diff --git a/mittest/mtlenv/storage/test_index_sstable_estimator.cpp b/mittest/mtlenv/storage/test_index_sstable_estimator.cpp index 32a8ce44c..f0005bbd4 100644 --- a/mittest/mtlenv/storage/test_index_sstable_estimator.cpp +++ b/mittest/mtlenv/storage/test_index_sstable_estimator.cpp @@ -31,7 +31,7 @@ namespace unittest class TestIndexSSTableEstimator : public TestIndexBlockDataPrepare { public: - TestIndexSSTableEstimator() : TestIndexBlockDataPrepare("Test index sstable estimator") {} + TestIndexSSTableEstimator(); virtual ~TestIndexSSTableEstimator() {} virtual void SetUp(); virtual void TearDown(); @@ -46,6 +46,14 @@ private: ObDatumRow end_row_; }; +TestIndexSSTableEstimator::TestIndexSSTableEstimator() + : TestIndexBlockDataPrepare("Test index sstable estimator") +{ + is_ddl_merge_data_ = true; + max_row_cnt_ = 150000; + max_partial_row_cnt_ = 78881; + partial_kv_start_idx_ = 3; +} void TestIndexSSTableEstimator::SetUpTestCase() { TestIndexBlockDataPrepare::SetUpTestCase(); @@ -113,12 +121,21 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_whole_range) range.set_whole_range(); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + ASSERT_EQ(part_est, ddl_merge_part_est); } TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_range) @@ -126,12 +143,21 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_range) ObDatumRange range; generate_range(100, -1, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + ASSERT_EQ(part_est, ddl_merge_part_est); } TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_left_range) @@ -139,12 +165,21 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_left_range) ObDatumRange range; generate_range(-1, 100, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + ASSERT_EQ(part_est, ddl_merge_part_est); } TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_right_range) @@ -152,12 +187,21 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_right_range) ObDatumRange range; generate_range(row_cnt_ - 100, -1, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + ASSERT_EQ(part_est, ddl_merge_part_est); } TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_middle_range) @@ -165,12 +209,21 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_middle_range) ObDatumRange range; generate_range(100, row_cnt_ - 100, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + ASSERT_EQ(part_est, ddl_merge_part_est); } TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_noexist_range) @@ -178,12 +231,21 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_noexist_range) ObDatumRange range; generate_range(row_cnt_, row_cnt_, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + ASSERT_EQ(part_est, ddl_merge_part_est); } } // end namespace unittest diff --git a/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp b/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp index 52baba692..86242c71f 100644 --- a/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp +++ b/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp @@ -31,7 +31,7 @@ namespace unittest class TestMultiVersionIndexSSTableEstimator : public TestIndexBlockDataPrepare { public: - TestMultiVersionIndexSSTableEstimator() : TestIndexBlockDataPrepare("Test multi version index sstable estimator", compaction::MINI_MERGE) {} + TestMultiVersionIndexSSTableEstimator(); virtual ~TestMultiVersionIndexSSTableEstimator() {} virtual void SetUp(); virtual void TearDown(); @@ -48,6 +48,15 @@ private: ObDatumRow end_row_; }; +TestMultiVersionIndexSSTableEstimator::TestMultiVersionIndexSSTableEstimator() + : TestIndexBlockDataPrepare("Test multi version index sstable estimator", MINI_MERGE) +{ + is_ddl_merge_data_ = true; + max_row_cnt_ = 150000; + max_partial_row_cnt_ = 137312; + partial_kv_start_idx_ = 29; +} + void TestMultiVersionIndexSSTableEstimator::SetUpTestCase() { TestIndexBlockDataPrepare::SetUpTestCase(); @@ -114,12 +123,20 @@ TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_minor_sstable_whole_range ObDatumRange range; range.set_whole_range(); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_minor_sstable_range) @@ -127,12 +144,20 @@ TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_minor_sstable_range) ObDatumRange range; generate_range(100, -1, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_left_range) @@ -140,12 +165,20 @@ TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_left_range) ObDatumRange range; generate_range(-1, 100, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_right_range) @@ -153,12 +186,20 @@ TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_right_range ObDatumRange range; generate_range(row_cnt_ - 100, -1, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_middle_range) @@ -166,12 +207,20 @@ TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_middle_rang ObDatumRange range; generate_range(100, row_cnt_ - 100, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_noexist_range) @@ -179,12 +228,20 @@ TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_noexist_ran ObDatumRange range; generate_range(row_cnt_, row_cnt_, range); ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); + ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); ObPartitionEst part_est; ObIndexBlockScanEstimator estimator(esti_ctx); + ObPartitionEst ddl_kv_part_est; + ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); + ObPartitionEst ddl_merge_part_est; + ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); + ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } diff --git a/mittest/mtlenv/storage/test_lob_meta_iterator.cpp b/mittest/mtlenv/storage/test_lob_meta_iterator.cpp new file mode 100644 index 000000000..de2c2f1a5 --- /dev/null +++ b/mittest/mtlenv/storage/test_lob_meta_iterator.cpp @@ -0,0 +1,204 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + +#include +#include +#include +#include +#define private public +#define protected public + +#include "common/object/ob_obj_type.h" +#include "common/object/ob_object.h" +#include "share/schema/ob_table_param.h" +#include "storage/blocksstable/ob_datum_row.h" +#include "storage/lob/ob_lob_util.h" +#include "storage/lob/ob_lob_meta.h" +#include "lib/number/ob_number_v2.h" +#include "share/schema/ob_column_schema.h" +#include "share/ob_ls_id.h" +#include "mtlenv/mock_tenant_module_env.h" + +namespace oceanbase +{ +namespace storage +{ + +class TestLobMetaIterator : public ::testing::Test +{ +public: + TestLobMetaIterator() = default; + virtual ~TestLobMetaIterator() = default; + static void SetUpTestCase(); + static void TearDownTestCase(); + + int fill_lob_sstable_slice_mock( + const ObLobId &lob_id, + const transaction::ObTransID &trans_id, + const int64_t trans_version, + const int64_t sql_no, + const bool has_lob_header, + const int64_t read_snapshot, + const ObCollationType collation_type, + blocksstable::ObStorageDatum &datum); + int build_lob_data(ObObj &obj, std::string &st, common::ObArenaAllocator &allocator); + int build_lob_data_not_common(ObObj &obj, std::string &str); +}; + +void TestLobMetaIterator::SetUpTestCase() +{ + EXPECT_EQ(OB_SUCCESS, MockTenantModuleEnv::get_instance().init()); +} + +void TestLobMetaIterator::TearDownTestCase() +{ + MockTenantModuleEnv::get_instance().destroy(); +} + +int TestLobMetaIterator::fill_lob_sstable_slice_mock( + const ObLobId &lob_id, + const transaction::ObTransID &trans_id, + const int64_t trans_version, + const int64_t sql_no, + const bool has_lob_header, + const int64_t read_snapshot, + const ObCollationType collation_type, + blocksstable::ObStorageDatum &datum) +{ + int ret = OB_SUCCESS; + int64_t ils_id = 1001; + int64_t itablet_id = 1001; + common::ObArenaAllocator allocator; + share::ObLSID ls_id(ils_id); + common::ObTabletID tablet_id(itablet_id); + const blocksstable::ObDatumRow *new_row = nullptr; + ObLobMetaInfo lob_meta_info; + const int64_t timeout_ts = ObTimeUtility::current_time(); + ObString data = datum.get_string(); + ObLobMetaWriteIter iter(data, &allocator, ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE); + ObLobMetaRowIterator row_iterator; + if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( + allocator, nullptr, ls_id, tablet_id, lob_id, collation_type, datum, timeout_ts, has_lob_header, iter))) { + STORAGE_LOG(WARN, "fail to insert lob column", K(ret), K(ls_id), K(tablet_id), K(lob_id)); + } else if (OB_FAIL(row_iterator.init(&iter, trans_id, trans_version, sql_no))) { + STORAGE_LOG(WARN, "fail to init lob meta row iterator", K(ret), K(trans_id), K(trans_version)); + } else if (OB_FAIL(row_iterator.get_next_row(new_row))) { + STORAGE_LOG(WARN, "get_next_row failed", K(ret)); + } else if (OB_ISNULL(new_row)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "new_row is nullptr", K(ret)); + } else if (OB_FAIL(ObLobMetaUtil::transform_from_row_to_info(new_row, lob_meta_info, true))) { + STORAGE_LOG(WARN, "transform failed", K(ret)); + } else if (lob_meta_info.lob_id_.lob_id_ != lob_id.lob_id_) { + //STORAGE_LOG("lob_meta_info", K(lob_meta_info)); + STORAGE_LOG(WARN, "error info", K(ret), K(lob_meta_info.lob_id_.lob_id_), K(lob_id.lob_id_)); + } + return ret; +} +int TestLobMetaIterator::build_lob_data(ObObj &obj, std::string &st, common::ObArenaAllocator &allocator) +{ + int ret = OB_SUCCESS; + ObLobCommon *value = NULL; + void *buf = NULL; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObLobCommon) + 1000000))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "fail to allocate memory for ObLobData", K(ret)); + } else { + // ObLobIndex index; + value = new (buf) ObLobCommon(); + // value->version_ = 1; + // value->reserve_ = 0; + // value->is_init_ = 1; + int64_t byte_size = 1000000; + value->in_row_ = 1; + MEMCPY(value->buffer_, st.c_str(), st.length()); + + obj.meta_.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + obj.meta_.set_collation_level(CS_LEVEL_IMPLICIT); + obj.set_type(ObMediumTextType); + obj.set_lob_value(ObMediumTextType, value, value->get_handle_size(byte_size)); + obj.set_has_lob_header(); + } + return ret; +} + +int TestLobMetaIterator::build_lob_data_not_common(ObObj &obj, std::string &str) +{ + int ret = OB_SUCCESS; + obj.meta_.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + obj.meta_.set_collation_level(CS_LEVEL_IMPLICIT); + obj.set_type(ObMediumTextType); + obj.set_lob_value(ObMediumTextType, str.c_str(), str.length()); + return ret; +} + +TEST_F(TestLobMetaIterator, test_not_lob_common) +{ + for (int x = 2; x <= 10; x++) { + std::string st = ""; + for (int i = 0; i < 1000000; i++) { + st += static_cast(i % 26 + 'a'); + } + ObDatumRow row; + ObObj obj; + ASSERT_EQ(OB_SUCCESS, build_lob_data_not_common(obj, st)); + ObStorageDatum datum; + ASSERT_EQ(OB_SUCCESS, datum.from_obj_enhance(obj)); + transaction::ObTransID trans_id(10); + ObLobId lob_id; + lob_id.lob_id_ = x; + lob_id.tablet_id_ = x; + int64_t sql_no = 2; + int64_t read_snapshot = 3; + ObCollationType collation_type = CS_TYPE_UTF8MB4_GENERAL_CI; + ASSERT_EQ(OB_SUCCESS, fill_lob_sstable_slice_mock(lob_id, trans_id, 100, sql_no, + false, read_snapshot, collation_type, datum)); + } +} + +TEST_F(TestLobMetaIterator, test_lob_common) +{ + for (int x = 2; x <= 10; x++) { + std::string st = ""; + for (int i = 0; i < 1000000; i++) { + st += static_cast(i % 26 + 'a'); + } + ObDatumRow row; + ObObj obj; + common::ObArenaAllocator allocator; + ASSERT_EQ(OB_SUCCESS, build_lob_data(obj, st, allocator)); + ObStorageDatum datum; + ASSERT_EQ(OB_SUCCESS, datum.from_obj_enhance(obj)); + transaction::ObTransID trans_id(10); + ObLobId lob_id; + lob_id.lob_id_ = x; + lob_id.tablet_id_ = x; + int64_t sql_no = 2; + int64_t read_snapshot = 3; + //ObLobMetaRowIterator iter; + ObCollationType collation_type = CS_TYPE_UTF8MB4_GENERAL_CI; + ASSERT_EQ(OB_SUCCESS, fill_lob_sstable_slice_mock(lob_id, trans_id, 100, sql_no, + true, read_snapshot, collation_type, datum)); + } +} +} +} + +int main(int argc, char **argv) +{ + system("rm -f test_lob_meta_iterator.log*"); + OB_LOGGER.set_file_name("test_lob_meta_iterator.log"); + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/mittest/mtlenv/storage/test_ls_migration_param.cpp b/mittest/mtlenv/storage/test_ls_migration_param.cpp index 97c2da032..b9778f2b3 100644 --- a/mittest/mtlenv/storage/test_ls_migration_param.cpp +++ b/mittest/mtlenv/storage/test_ls_migration_param.cpp @@ -239,15 +239,17 @@ TEST_F(TestLSMigrationParam, test_migrate_tablet_param) TestSchemaUtils::prepare_data_schema(table_schema); ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; + ret = create_tablet_schema.init(schema_allocator, table_schema, lib::Worker::CompatMode::MYSQL, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3); + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, true/*need_create_empty_major*/); ASSERT_EQ(OB_SUCCESS, ret); ObTabletID empty_tablet_id; SCN scn; scn.convert_from_ts(ObTimeUtility::current_time()); ret = src_handle.get_obj()->init_for_first_time_creation(allocator_, src_key.ls_id_, src_key.tablet_id_, src_key.tablet_id_, - scn, 2022, create_tablet_schema, false, ls_handle.get_ls()->get_freezer()); + scn, 2022, create_tablet_schema, ls_handle.get_ls()->get_freezer()); ASSERT_EQ(common::OB_SUCCESS, ret); ObMigrationTabletParam tablet_param; @@ -303,15 +305,17 @@ TEST_F(TestLSMigrationParam, test_migration_param_compat) TestSchemaUtils::prepare_data_schema(table_schema); ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; + ret = create_tablet_schema.init(schema_allocator, table_schema, lib::Worker::CompatMode::MYSQL, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3); + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, true/*need_create_empty_major*/); ASSERT_EQ(OB_SUCCESS, ret); ObTabletID empty_tablet_id; SCN scn; scn.convert_from_ts(ObTimeUtility::current_time()); ret = src_handle.get_obj()->init_for_first_time_creation(allocator_, src_key.ls_id_, src_key.tablet_id_, src_key.tablet_id_, - scn, 2022, create_tablet_schema, false, ls_handle.get_ls()->get_freezer()); + scn, 2022, create_tablet_schema, ls_handle.get_ls()->get_freezer()); ASSERT_EQ(common::OB_SUCCESS, ret); ObMigrationTabletParam tablet_param; diff --git a/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp b/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp index 94e2448e1..c6075c29e 100644 --- a/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp +++ b/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp @@ -156,15 +156,17 @@ void TestLSTabletInfoWR::fill_tablet_meta() TestSchemaUtils::prepare_data_schema(table_schema); ObArenaAllocator schema_allocator; ObCreateTabletSchema create_tablet_schema; + ret = create_tablet_schema.init(schema_allocator, table_schema, lib::Worker::CompatMode::MYSQL, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3); + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, true/*need_create_empty_major*/); ASSERT_EQ(OB_SUCCESS, ret); ObTabletID empty_tablet_id; SCN scn; scn.convert_from_ts(ObTimeUtility::current_time()); ret = src_handle.get_obj()->init_for_first_time_creation(arena_allocator_, src_key.ls_id_, src_key.tablet_id_, src_key.tablet_id_, - scn, 2022, create_tablet_schema, false, ls_handle.get_ls()->get_freezer()); + scn, 2022, create_tablet_schema, ls_handle.get_ls()->get_freezer()); ASSERT_EQ(common::OB_SUCCESS, ret); ObMigrationTabletParam tablet_param; diff --git a/mittest/mtlenv/storage/test_multi_version_merge_recycle.cpp b/mittest/mtlenv/storage/test_multi_version_merge_recycle.cpp index 4f10ff69a..9959fd84d 100644 --- a/mittest/mtlenv/storage/test_multi_version_merge_recycle.cpp +++ b/mittest/mtlenv/storage/test_multi_version_merge_recycle.cpp @@ -180,6 +180,7 @@ void TestMultiVersionMergeRecycle::prepare_query_param(const ObVersionRange &ver iter_param_.vectorized_enabled_ = false; ASSERT_EQ(OB_SUCCESS, store_ctx_.init_for_read(ls_id, + iter_param_.tablet_id_, INT64_MAX, // query_expire_ts -1, // lock_timeout_us INT64_MAX - 2)); diff --git a/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp b/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp index 69c11855a..081d365e8 100644 --- a/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp +++ b/mittest/mtlenv/storage/test_multi_version_sstable_merge.cpp @@ -198,6 +198,7 @@ void TestMultiVersionMerge::prepare_query_param(const ObVersionRange &version_ra iter_param_.vectorized_enabled_ = false; ASSERT_EQ(OB_SUCCESS, store_ctx_.init_for_read(ls_id, + iter_param_.tablet_id_, INT64_MAX, // query_expire_ts -1, // lock_timeout_us share::SCN::max_scn())); diff --git a/mittest/mtlenv/storage/test_multi_version_sstable_single_get.cpp b/mittest/mtlenv/storage/test_multi_version_sstable_single_get.cpp index a6de529ee..416f576d8 100644 --- a/mittest/mtlenv/storage/test_multi_version_sstable_single_get.cpp +++ b/mittest/mtlenv/storage/test_multi_version_sstable_single_get.cpp @@ -95,6 +95,7 @@ void TestMultiVersionSSTableSingleGet::prepare_query_param( iter_param_.vectorized_enabled_ = false; ASSERT_EQ(OB_SUCCESS, store_ctx_.init_for_read(ls_id, + iter_param_.tablet_id_, INT64_MAX, // query_expire_ts -1, // lock_timeout_us share::SCN::max_scn())); diff --git a/mittest/mtlenv/storage/test_tablet_create_delete_helper.cpp b/mittest/mtlenv/storage/test_tablet_create_delete_helper.cpp index 12fc8e373..7932f61e7 100644 --- a/mittest/mtlenv/storage/test_tablet_create_delete_helper.cpp +++ b/mittest/mtlenv/storage/test_tablet_create_delete_helper.cpp @@ -2880,8 +2880,8 @@ TEST_F(TestTabletCreateDeleteHelper, migrate_lob_tablets) const ObSArray index_tablet_array; const ObTabletID tablet_id(2); const ObTabletID lob_meta_tablet_id(101); - ret = helper.do_create_tablet(lob_meta_tablet_id, tablet_id, lob_meta_tablet_id, ObTabletID(), - index_tablet_array, arg, trans_flags, table_schema1, lib::Worker::CompatMode::MYSQL, tablet_handle); + ret = helper.create_tablet(lob_meta_tablet_id, tablet_id, lob_meta_tablet_id, ObTabletID(), + index_tablet_array, arg, trans_flags, table_schema1, true /*need_create_empty_major_sstable*/, lib::Worker::CompatMode::MYSQL); ASSERT_EQ(OB_SUCCESS, ret); // reset tx data to normal state tablet_handle.get_obj()->tablet_meta_.tx_data_.tx_id_ = 0; @@ -2889,8 +2889,8 @@ TEST_F(TestTabletCreateDeleteHelper, migrate_lob_tablets) tablet_handle.get_obj()->tablet_meta_.tx_data_.tx_scn_ = share::SCN::minus(share::SCN::max_scn(), 98); const ObTabletID lob_piece_tablet_id(102); - ret = helper.do_create_tablet(lob_piece_tablet_id, tablet_id, ObTabletID(), lob_piece_tablet_id, - index_tablet_array, arg, trans_flags, table_schema2, lib::Worker::CompatMode::MYSQL, tablet_handle); + ret = helper.create_tablet(lob_piece_tablet_id, tablet_id, ObTabletID(), lob_piece_tablet_id, + index_tablet_array, arg, trans_flags, table_schema2, true /*need_create_empty_major_sstable*/, lib::Worker::CompatMode::MYSQL); ASSERT_EQ(OB_SUCCESS, ret); // reset tx data to normal state tablet_handle.get_obj()->tablet_meta_.tx_data_.tx_id_ = 0; diff --git a/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp b/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp index 5941d2d39..cef35a0b6 100644 --- a/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp +++ b/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp @@ -226,7 +226,8 @@ void TestTenantMetaMemMgr::prepare_data_schema( LOG_INFO("dump data table schema", LITERAL_K(TEST_ROWKEY_COLUMN_CNT), K(table_schema)); ret = create_tablet_schema.init(allocator, table_schema, lib::Worker::CompatMode::MYSQL, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3); + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, true/*need_create_empty_major*/); ASSERT_EQ(OB_SUCCESS, ret); } @@ -704,8 +705,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_tablet) create_scn.convert_from_ts(ObTimeUtility::fast_current_time()); ObTabletID empty_tablet_id; + create_tablet_schema.set_need_create_empty_major(true); ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, - create_scn, create_scn.get_val_for_tx(), create_tablet_schema, true, &freezer); + create_scn, create_scn.get_val_for_tx(), create_tablet_schema, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); ObTabletPersister persister; @@ -799,8 +801,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_inner_tablet) ObTabletID empty_tablet_id; bool make_empty_co_sstable = true; + create_tablet_schema.set_need_create_empty_major(make_empty_co_sstable); ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, - create_scn, create_scn.get_val_for_tx(), create_tablet_schema, make_empty_co_sstable, &freezer); + create_scn, create_scn.get_val_for_tx(), create_tablet_schema, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); @@ -905,9 +908,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_no_sstable_tablet) ObTabletID empty_tablet_id; bool make_empty_co_sstable = false; + create_tablet_schema.set_need_create_empty_major(make_empty_co_sstable); ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, - create_scn, create_scn.get_val_for_tx(), create_tablet_schema, - make_empty_co_sstable, &freezer); + create_scn, create_scn.get_val_for_tx(), create_tablet_schema, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); @@ -999,8 +1002,9 @@ TEST_F(TestTenantMetaMemMgr, test_get_tablet_with_allocator) ObTabletID empty_tablet_id; bool make_empty_co_sstable = true; + create_tablet_schema.set_need_create_empty_major(make_empty_co_sstable); ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, - create_scn, create_scn.get_val_for_tx(), create_tablet_schema, make_empty_co_sstable, &freezer); + create_scn, create_scn.get_val_for_tx(), create_tablet_schema, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); @@ -1123,8 +1127,9 @@ TEST_F(TestTenantMetaMemMgr, test_wash_mem_tablet) ObTabletID empty_tablet_id; bool make_empty_co_sstable = false; + create_tablet_schema.set_need_create_empty_major(make_empty_co_sstable); ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, - create_scn, create_scn.get_val_for_tx(), create_tablet_schema, make_empty_co_sstable, &freezer); + create_scn, create_scn.get_val_for_tx(), create_tablet_schema, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); diff --git a/src/objit/include/objit/common/ob_item_type.h b/src/objit/include/objit/common/ob_item_type.h index 26413776a..eb81229f6 100755 --- a/src/objit/include/objit/common/ob_item_type.h +++ b/src/objit/include/objit/common/ob_item_type.h @@ -1128,6 +1128,7 @@ typedef enum ObItemType T_ALTER_PARTITION_DROP, T_ALTER_PARTITION_ADD, T_ALTER_COLUMN_OPTION, + T_ALTER_COLUMN_GROUP_OPTION, T_ALTER_TABLEGROUP_OPTION, T_ALTER_TABLE_ACTION_LIST, @@ -1141,6 +1142,8 @@ typedef enum ObItemType T_COLUMN_RENAME, T_COLUMN_MODIFY, T_COLUMN_CHANGE, + T_COLUMN_GROUP_ADD, + T_COLUMN_GROUP_DROP, T_INDEX_ADD, T_INDEX_DROP, T_INDEX_ALTER, diff --git a/src/observer/ob_rpc_processor_simple.cpp b/src/observer/ob_rpc_processor_simple.cpp index 27242bb4b..cd491662b 100644 --- a/src/observer/ob_rpc_processor_simple.cpp +++ b/src/observer/ob_rpc_processor_simple.cpp @@ -45,6 +45,7 @@ // for 4.0 #include "share/ob_ls_id.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ls/ob_ls.h" #include "storage/tablet/ob_tablet.h" #include "storage/tx/ob_trans_service.h" @@ -2319,14 +2320,12 @@ int ObRpcRemoteWriteDDLRedoLogP::process() } else { MTL_SWITCH(tenant_id) { ObRole role = INVALID_ROLE; - ObDDLSSTableRedoWriter sstable_redo_writer; + ObDDLRedoLogWriter sstable_redo_writer; MacroBlockId macro_block_id; ObMacroBlockHandle macro_handle; ObMacroBlockWriteInfo write_info; ObLSService *ls_service = MTL(ObLSService*); ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObDDLKvMgrHandle ddl_kv_mgr_handle; ObLS *ls = nullptr; // restruct write_info @@ -2344,19 +2343,15 @@ int ObRpcRemoteWriteDDLRedoLogP::process() } else if (ObRole::LEADER != role) { ret = OB_NOT_MASTER; LOG_INFO("leader may not have finished replaying clog, caller retry", K(ret), K(MTL_ID()), K(arg_.ls_id_)); - } else if (OB_FAIL(ls->get_tablet(arg_.redo_info_.table_key_.tablet_id_, tablet_handle))) { - LOG_WARN("get tablet failed", K(ret)); - } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - LOG_WARN("get ddl kv manager failed", K(ret)); } else if (OB_FAIL(ObBlockManager::async_write_block(write_info, macro_handle))) { LOG_WARN("fail to async write block", K(ret), K(write_info), K(macro_handle)); } else if (OB_FAIL(macro_handle.wait())) { LOG_WARN("fail to wait macro block io finish", K(ret)); } else if (OB_FAIL(sstable_redo_writer.init(arg_.ls_id_, arg_.redo_info_.table_key_.tablet_id_))) { LOG_WARN("init sstable redo writer", K(ret), K_(arg)); - } else if (OB_FAIL(sstable_redo_writer.write_redo_log(arg_.redo_info_, macro_handle.get_macro_id(), false, arg_.task_id_, tablet_handle, ddl_kv_mgr_handle))) { + } else if (OB_FAIL(sstable_redo_writer.write_macro_block_log(arg_.redo_info_, macro_handle.get_macro_id(), false, arg_.task_id_))) { LOG_WARN("fail to write macro redo", K(ret), K_(arg)); - } else if (OB_FAIL(sstable_redo_writer.wait_redo_log_finish(arg_.redo_info_, + } else if (OB_FAIL(sstable_redo_writer.wait_macro_block_log_finish(arg_.redo_info_, macro_handle.get_macro_id()))) { LOG_WARN("fail to wait macro redo finish", K(ret), K_(arg)); } @@ -2373,12 +2368,15 @@ int ObRpcRemoteWriteDDLCommitLogP::process() MTL_SWITCH(tenant_id) { ObRole role = INVALID_ROLE; const ObITable::TableKey &table_key = arg_.table_key_; - ObDDLSSTableRedoWriter sstable_redo_writer; + ObDDLRedoLogWriter sstable_redo_writer; ObLSService *ls_service = MTL(ObLSService*); ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObDDLKvMgrHandle ddl_kv_mgr_handle; ObLS *ls = nullptr; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletFullDirectLoadMgr *data_tablet_mgr = nullptr; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + direct_load_mgr_handle.reset(); + bool is_major_sstable_exist = false; if (OB_UNLIKELY(!arg_.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K_(arg)); @@ -2392,32 +2390,55 @@ int ObRpcRemoteWriteDDLCommitLogP::process() } else if (ObRole::LEADER != role) { ret = OB_NOT_MASTER; LOG_INFO("leader may not have finished replaying clog, caller retry", K(ret), K(MTL_ID()), K(arg_.ls_id_)); - } else if (OB_FAIL(ls->get_tablet(table_key.tablet_id_, tablet_handle))) { - LOG_WARN("get tablet failed", K(ret)); - } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - if (OB_ENTRY_NOT_EXIST == ret) { - ret = OB_EAGAIN; + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + arg_.ls_id_, + table_key.tablet_id_, + true /*is_full_direct_load*/, + direct_load_mgr_handle, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + LOG_INFO("major sstable already exist", K(ret), K(arg_)); } else { - LOG_WARN("get ddl kv manager failed", K(ret)); + LOG_WARN("get tablet direct load manager failed", K(ret), K(table_key)); } + } else if (OB_ISNULL(data_tablet_mgr = direct_load_mgr_handle.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(table_key)); } else if (OB_FAIL(sstable_redo_writer.init(arg_.ls_id_, table_key.tablet_id_))) { LOG_WARN("init sstable redo writer", K(ret), K(table_key)); - } else if (FALSE_IT(sstable_redo_writer.set_start_scn(arg_.start_scn_))) { } else { + uint32_t lock_tid = 0; SCN commit_scn; bool is_remote_write = false; - if (OB_FAIL(sstable_redo_writer.write_commit_log(tablet_handle, - ddl_kv_mgr_handle, - false, - table_key, - commit_scn, - is_remote_write))) { + ObTabletHandle tablet_handle; + if (OB_FAIL(data_tablet_mgr->wrlock(ObTabletDirectLoadMgr::TRY_LOCK_TIMEOUT, lock_tid))) { + LOG_WARN("failed to wrlock", K(ret), K(arg_)); + } else if (OB_FAIL(sstable_redo_writer.write_commit_log(false, + table_key, + arg_.start_scn_, + direct_load_mgr_handle, + commit_scn, + is_remote_write, + lock_tid))) { LOG_WARN("fail to remote write commit log", K(ret), K(table_key), K_(arg)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->ddl_commit(*tablet_handle.get_obj(), arg_.start_scn_, commit_scn))) { + } else if (OB_FAIL(ls->get_tablet(table_key.tablet_id_, tablet_handle, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet failed", K(ret), K(table_key)); + } else if (OB_FAIL(data_tablet_mgr->commit(*tablet_handle.get_obj(), + arg_.start_scn_, + commit_scn, + arg_.table_id_, + arg_.ddl_task_id_))) { LOG_WARN("failed to do ddl kv commit", K(ret), K(arg_)); } else { result_ = commit_scn.get_val_for_tx(); } + if (lock_tid != 0) { + data_tablet_mgr->unlock(lock_tid); + } } } return ret; diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index df0363d6d..3d1deb228 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -88,7 +88,6 @@ #include "storage/slog_ckpt/ob_server_checkpoint_slog_handler.h" #include "storage/tx_storage/ob_tenant_freezer.h" #include "storage/tx_storage/ob_tenant_memory_printer.h" -#include "storage/ddl/ob_direct_insert_sstable_ctx.h" #include "storage/compaction/ob_compaction_diagnose.h" #include "storage/ob_file_system_router.h" #include "storage/blocksstable/ob_storage_cache_suite.h" @@ -482,12 +481,12 @@ int ObServer::init(const ObServerOptions &opts, const ObPLogWriterCfg &log_cfg) LOG_ERROR("init server blacklist failed", KR(ret)); } else if (OB_FAIL(ObLongopsMgr::get_instance().init())) { LOG_WARN("init longops mgr fail", KR(ret)); + } else if (OB_FAIL(ObDDLRedoLock::get_instance().init())) { + LOG_WARN("init ddl redo lock failed", K(ret)); #ifdef ERRSIM } else if (OB_FAIL(ObDDLSimPointMgr::get_instance().init())) { LOG_WARN("init ddl sim point mgr fail", KR(ret)); #endif - } else if (OB_FAIL(ObDDLRedoLogWriter::get_instance().init())) { - LOG_WARN("init DDL redo log writer failed", KR(ret)); } #ifdef OB_BUILD_ARBITRATION else if (OB_FAIL(arb_gcs_.init(GCTX.self_addr(), @@ -2708,12 +2707,6 @@ int ObServer::init_storage() } } - if (OB_SUCC(ret)) { - if (OB_FAIL(ObSSTableInsertManager::get_instance().init())) { - LOG_WARN("init direct insert sstable manager failed", KR(ret)); - } - } - if (OB_SUCC(ret)) { if (OB_FAIL(ObDDLCtrlSpeedHandle::get_instance().init())) { LOG_WARN("fail to init ObDDLCtrlSpeedHandle", KR(ret)); diff --git a/src/observer/omt/ob_multi_tenant.cpp b/src/observer/omt/ob_multi_tenant.cpp index b16d089c0..01c0b027c 100644 --- a/src/observer/omt/ob_multi_tenant.cpp +++ b/src/observer/omt/ob_multi_tenant.cpp @@ -53,6 +53,7 @@ #include "logservice/data_dictionary/ob_data_dict_service.h" // ObDataDictService #include "ob_tenant_mtl_helper.h" #include "storage/blocksstable/ob_decode_resource_pool.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/multi_data_source/runtime_utility/mds_tenant_service.h" #include "storage/tx_storage/ob_ls_service.h" #include "storage/tx_storage/ob_access_service.h" @@ -540,6 +541,7 @@ int ObMultiTenant::init(ObAddr myaddr, MTL_BIND2(mtl_new_default, ObPsCache::mtl_init, nullptr, ObPsCache::mtl_stop, nullptr, mtl_destroy_default); MTL_BIND2(server_obj_pool_mtl_new, nullptr, nullptr, nullptr, nullptr, server_obj_pool_mtl_destroy); MTL_BIND2(server_obj_pool_mtl_new, nullptr, nullptr, nullptr, nullptr, server_obj_pool_mtl_destroy); + MTL_BIND2(mtl_new_default, ObTenantDirectLoadMgr::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND(ObDetectManager::mtl_init, ObDetectManager::mtl_destroy); MTL_BIND(ObTenantSQLSessionMgr::mtl_init, ObTenantSQLSessionMgr::mtl_destroy); MTL_BIND2(mtl_new_default, ObDTLIntermResultManager::mtl_init, ObDTLIntermResultManager::mtl_start, diff --git a/src/observer/table_load/client/ob_table_direct_load_rpc_executor.cpp b/src/observer/table_load/client/ob_table_direct_load_rpc_executor.cpp index bc6de6eaf..548d5333a 100644 --- a/src/observer/table_load/client/ob_table_direct_load_rpc_executor.cpp +++ b/src/observer/table_load/client/ob_table_direct_load_rpc_executor.cpp @@ -218,13 +218,10 @@ int ObTableDirectLoadBeginExecutor::create_table_ctx() if (OB_SUCC(ret)) { ObTableLoadRedefTableStartArg start_arg; ObTableLoadRedefTableStartRes start_res; - uint64_t data_version = 0; start_arg.tenant_id_ = tenant_id; start_arg.table_id_ = table_id; start_arg.parallelism_ = arg_.parallel_; - if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) { - LOG_WARN("fail to get tenant data version", KR(ret)); - } else if (OB_FAIL(ObTableLoadRedefTable::start(start_arg, start_res, + if (OB_FAIL(ObTableLoadRedefTable::start(start_arg, start_res, *client_task_->get_session_info()))) { LOG_WARN("fail to start redef table", KR(ret), K(start_arg)); } else { @@ -232,7 +229,7 @@ int ObTableDirectLoadBeginExecutor::create_table_ctx() ddl_param.task_id_ = start_res.task_id_; ddl_param.schema_version_ = start_res.schema_version_; ddl_param.snapshot_version_ = start_res.snapshot_version_; - ddl_param.data_version_ = data_version; + ddl_param.data_version_ = start_res.data_format_version_; } } // init param diff --git a/src/observer/table_load/ob_table_load_instance.cpp b/src/observer/table_load/ob_table_load_instance.cpp index f365dca52..31cb17e01 100644 --- a/src/observer/table_load/ob_table_load_instance.cpp +++ b/src/observer/table_load/ob_table_load_instance.cpp @@ -114,21 +114,18 @@ int ObTableLoadInstance::create_table_ctx(ObTableLoadParam ¶m, // start redef table ObTableLoadRedefTableStartArg start_arg; ObTableLoadRedefTableStartRes start_res; - uint64_t data_version = 0; start_arg.tenant_id_ = param.tenant_id_; start_arg.table_id_ = param.table_id_; start_arg.parallelism_ = param.parallel_; start_arg.is_load_data_ = !param.px_mode_; - if (OB_FAIL(GET_MIN_DATA_VERSION(param.tenant_id_, data_version))) { - LOG_WARN("fail to get tenant data version", KR(ret)); - } else if (OB_FAIL(ObTableLoadRedefTable::start(start_arg, start_res, *session_info))) { + if (OB_FAIL(ObTableLoadRedefTable::start(start_arg, start_res, *session_info))) { LOG_WARN("fail to start redef table", KR(ret), K(start_arg)); } else { ddl_param.dest_table_id_ = start_res.dest_table_id_; ddl_param.task_id_ = start_res.task_id_; ddl_param.schema_version_ = start_res.schema_version_; ddl_param.snapshot_version_ = start_res.snapshot_version_; - ddl_param.data_version_ = data_version; + ddl_param.data_version_ = start_res.data_format_version_; } if (OB_SUCC(ret)) { if (OB_ISNULL(table_ctx = ObTableLoadService::alloc_ctx())) { diff --git a/src/observer/table_load/ob_table_load_merger.cpp b/src/observer/table_load/ob_table_load_merger.cpp index fec1500b9..94bbc1200 100644 --- a/src/observer/table_load/ob_table_load_merger.cpp +++ b/src/observer/table_load/ob_table_load_merger.cpp @@ -106,6 +106,74 @@ private: ObTableLoadMerger *const merger_; }; +class ObTableLoadMerger::RescanTaskProcessor : public ObITableLoadTaskProcessor +{ +public: + RescanTaskProcessor(ObTableLoadTask &task, ObTableLoadTableCtx *ctx, ObTableLoadMerger *merger) + : ObITableLoadTaskProcessor(task), ctx_(ctx), merger_(merger) + { + ctx_->inc_ref_count(); + } + virtual ~RescanTaskProcessor() + { + ObTableLoadService::put_ctx(ctx_); + } + int process() override + { + int ret = OB_SUCCESS; + ObDirectLoadPartitionRescanTask *rescan_task = nullptr; + while (OB_SUCC(ret)) { + rescan_task = nullptr; + if (OB_FAIL(merger_->get_next_rescan_task(rescan_task))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to get next rescan task", KR(ret)); + } else { + ret = OB_SUCCESS; + break; + } + } else if (OB_FAIL(rescan_task->process())) { + LOG_WARN("fail to process rescan task", KR(ret)); + } + if (nullptr != rescan_task) { + merger_->handle_rescan_task_finish(rescan_task); + } + } + return ret; + } +private: + ObTableLoadTableCtx *const ctx_; + ObTableLoadMerger *const merger_; +}; + +class ObTableLoadMerger::RescanTaskCallback : public ObITableLoadTaskCallback +{ +public: + RescanTaskCallback(ObTableLoadTableCtx *ctx, ObTableLoadMerger *merger) + : ctx_(ctx), merger_(merger) + { + ctx_->inc_ref_count(); + } + virtual ~RescanTaskCallback() + { + ObTableLoadService::put_ctx(ctx_); + } + void callback(int ret_code, ObTableLoadTask *task) override + { + int ret = OB_SUCCESS; + if (OB_FAIL(merger_->handle_rescan_thread_finish(ret_code))) { + LOG_WARN("fail to handle rescan thread finish", KR(ret)); + } + if (OB_FAIL(ret)) { + ctx_->store_ctx_->set_status_error(ret); + } + ctx_->free_task(task); + OB_TABLE_LOAD_STATISTICS_PRINT_AND_RESET(); + } +private: + ObTableLoadTableCtx *const ctx_; + ObTableLoadMerger *const merger_; +}; + /** * ObTableLoadMerger */ @@ -123,6 +191,7 @@ ObTableLoadMerger::ObTableLoadMerger(ObTableLoadStoreCtx *store_ctx) ObTableLoadMerger::~ObTableLoadMerger() { abort_unless(merging_list_.is_empty()); + abort_unless(rescan_list_.is_empty()); } int ObTableLoadMerger::init() @@ -172,9 +241,13 @@ void ObTableLoadMerger::stop() // 遍历合并中的任务队列, 调用stop ObMutexGuard guard(mutex_); ObDirectLoadPartitionMergeTask *merge_task = nullptr; + ObDirectLoadPartitionRescanTask *rescan_task = nullptr; DLIST_FOREACH_NORET(merge_task, merging_list_) { merge_task->stop(); } + DLIST_FOREACH_NORET(rescan_task, rescan_list_) { + rescan_task->stop(); + } } int ObTableLoadMerger::handle_table_compact_success() @@ -203,10 +276,13 @@ int ObTableLoadMerger::build_merge_ctx() merge_param.table_data_desc_ = store_ctx_->table_data_desc_; merge_param.datum_utils_ = &(store_ctx_->ctx_->schema_.datum_utils_); merge_param.col_descs_ = &(store_ctx_->ctx_->schema_.column_descs_); + merge_param.lob_column_cnt_ = store_ctx_->ctx_->schema_.lob_column_cnt_; merge_param.cmp_funcs_ = &(store_ctx_->ctx_->schema_.cmp_funcs_); merge_param.is_heap_table_ = store_ctx_->ctx_->schema_.is_heap_table_; merge_param.is_fast_heap_table_ = store_ctx_->is_fast_heap_table_; merge_param.online_opt_stat_gather_ = param_.online_opt_stat_gather_; + merge_param.is_column_store_ = store_ctx_->ctx_->schema_.is_column_store_; + merge_param.px_mode_ = param_.px_mode_; merge_param.insert_table_ctx_ = store_ctx_->insert_table_ctx_; merge_param.dml_row_handler_ = store_ctx_->error_row_handler_; if (OB_FAIL(merge_ctx_.init(merge_param, store_ctx_->ls_partition_ids_, @@ -417,11 +493,37 @@ int ObTableLoadMerger::collect_sql_statistics(ObTableLoadSqlStatistics &sql_stat return ret; } +int ObTableLoadMerger::build_rescan_ctx() +{ + int ret = OB_SUCCESS; + const ObIArray &tablet_merge_ctxs = + merge_ctx_.get_tablet_merge_ctxs(); + for (int64_t i = 0; OB_SUCC(ret) && i < tablet_merge_ctxs.count(); ++i) { + ObDirectLoadTabletMergeCtx *tablet_merge_ctx = tablet_merge_ctxs.at(i); + if (OB_FAIL(tablet_merge_ctx->build_rescan_task(param_.session_count_))) { + LOG_WARN("fail to build rescan task", KR(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(rescan_task_iter_.init(&merge_ctx_))) { + LOG_WARN("fail to build rescan task", KR(ret)); + } + } + return ret; + +} + int ObTableLoadMerger::start_merge() { int ret = OB_SUCCESS; const int64_t thread_count = store_ctx_->task_scheduler_->get_thread_count(); ObTableLoadTableCtx *ctx = store_ctx_->ctx_; + if (OB_UNLIKELY(0 != running_thread_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected running thread count not zero", KR(ret), K(running_thread_count_)); + } else { + running_thread_count_ = thread_count; + } for (int32_t thread_idx = 0; OB_SUCC(ret) && thread_idx < thread_count; ++thread_idx) { ObTableLoadTask *task = nullptr; // 1. 分配task @@ -440,9 +542,46 @@ int ObTableLoadMerger::start_merge() else if (OB_FAIL(store_ctx_->task_scheduler_->add_task(thread_idx, task))) { LOG_WARN("fail to add task", KR(ret), K(thread_idx), KPC(task)); } - // 5. inc running_thread_count_ - else { - ATOMIC_INC(&running_thread_count_); + if (OB_FAIL(ret)) { + if (nullptr != task) { + ctx->free_task(task); + } + } + } + if (OB_FAIL(ret)) { + has_error_ = true; + } + return ret; +} + +int ObTableLoadMerger::start_rescan() +{ + int ret = OB_SUCCESS; + const int64_t thread_count = store_ctx_->task_scheduler_->get_thread_count(); + ObTableLoadTableCtx *ctx = store_ctx_->ctx_; + if (OB_UNLIKELY(0 != running_thread_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected running thread count not zero", KR(ret), K(running_thread_count_)); + } else { + running_thread_count_ = thread_count; + } + for (int32_t thread_idx = 0; OB_SUCC(ret) && thread_idx < thread_count; ++thread_idx) { + ObTableLoadTask *task = nullptr; + // 1. 分配task + if (OB_FAIL(ctx->alloc_task(task))) { + LOG_WARN("fail to alloc task", KR(ret)); + } + // 2. 设置processor + else if (OB_FAIL(task->set_processor(ctx, this))) { + LOG_WARN("fail to set merge task processor", KR(ret)); + } + // 3. 设置callback + else if (OB_FAIL(task->set_callback(ctx, this))) { + LOG_WARN("fail to set merge task callback", KR(ret)); + } + // 4. 把task放入调度器 + else if (OB_FAIL(store_ctx_->task_scheduler_->add_task(thread_idx, task))) { + LOG_WARN("fail to add task", KR(ret), K(thread_idx), KPC(task)); } if (OB_FAIL(ret)) { if (nullptr != task) { @@ -456,6 +595,25 @@ int ObTableLoadMerger::start_merge() return ret; } +int ObTableLoadMerger::get_next_rescan_task(ObDirectLoadPartitionRescanTask *&rescan_task) +{ + int ret = OB_SUCCESS; + rescan_task = nullptr; + if (OB_UNLIKELY(is_stop_ || has_error_)) { + ret = OB_ITER_END; + } else { + ObMutexGuard guard(mutex_); + if (OB_FAIL(rescan_task_iter_.get_next_task(rescan_task))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to get next task", KR(ret)); + } + } else { + OB_ASSERT(rescan_list_.add_last(rescan_task)); + } + } + return ret; +} + int ObTableLoadMerger::get_next_merge_task(ObDirectLoadPartitionMergeTask *&merge_task) { int ret = OB_SUCCESS; @@ -481,6 +639,12 @@ void ObTableLoadMerger::handle_merge_task_finish(ObDirectLoadPartitionMergeTask OB_ASSERT(OB_NOT_NULL(merging_list_.remove(merge_task))); } +void ObTableLoadMerger::handle_rescan_task_finish(ObDirectLoadPartitionRescanTask *&rescan_task) +{ + ObMutexGuard guard(mutex_); + OB_ASSERT(OB_NOT_NULL(rescan_list_.remove(rescan_task))); +} + int ObTableLoadMerger::handle_merge_thread_finish(int ret_code) { int ret = OB_SUCCESS; @@ -492,6 +656,33 @@ int ObTableLoadMerger::handle_merge_thread_finish(int ret_code) if (OB_UNLIKELY(is_stop_ || has_error_)) { } else { LOG_INFO("LOAD MERGE COMPLETED"); + if (store_ctx_->ctx_->schema_.is_column_store_) { + if (OB_FAIL(build_rescan_ctx())) { + LOG_WARN("fail to build rescan ctx", KR(ret)); + } else if (OB_FAIL(start_rescan())) { + LOG_WARN("fail to start rescan", KR(ret)); + } + } else { + if (OB_FAIL(store_ctx_->set_status_merged())) { + LOG_WARN("fail to set store status merged", KR(ret)); + } + } + } + } + return ret; +} + +int ObTableLoadMerger::handle_rescan_thread_finish(const int ret_code) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ret_code)) { + has_error_ = true; + } + const int64_t running_thread_count = ATOMIC_SAF(&running_thread_count_, 1); + if (0 == running_thread_count) { + if (OB_UNLIKELY(is_stop_ || has_error_)) { + } else { + LOG_INFO("LOAD RESCAN COMPLETED"); if (OB_FAIL(store_ctx_->set_status_merged())) { LOG_WARN("fail to set store status merged", KR(ret)); } diff --git a/src/observer/table_load/ob_table_load_merger.h b/src/observer/table_load/ob_table_load_merger.h index 8d25d1c3f..ed9d630b0 100644 --- a/src/observer/table_load/ob_table_load_merger.h +++ b/src/observer/table_load/ob_table_load_merger.h @@ -18,6 +18,7 @@ #include "share/table/ob_table_load_define.h" #include "storage/direct_load/ob_direct_load_merge_ctx.h" #include "storage/direct_load/ob_direct_load_merge_task_iterator.h" +#include "storage/direct_load/ob_direct_load_partition_rescan_task.h" #include "storage/direct_load/ob_direct_load_partition_merge_task.h" namespace oceanbase @@ -30,7 +31,9 @@ class ObTableLoadStoreCtx; class ObTableLoadMerger { class MergeTaskProcessor; + class RescanTaskProcessor; class MergeTaskCallback; + class RescanTaskCallback; public: ObTableLoadMerger(ObTableLoadStoreCtx *store_ctx); ~ObTableLoadMerger(); @@ -42,10 +45,15 @@ public: int collect_dml_stat(table::ObTableLoadDmlStat &dml_stats); private: int build_merge_ctx(); + int build_rescan_ctx(); int start_merge(); + int start_rescan(); int get_next_merge_task(storage::ObDirectLoadPartitionMergeTask *&merge_task); + int get_next_rescan_task(ObDirectLoadPartitionRescanTask *&rescan_task); void handle_merge_task_finish(storage::ObDirectLoadPartitionMergeTask *&merge_task); int handle_merge_thread_finish(int ret_code); + void handle_rescan_task_finish(ObDirectLoadPartitionRescanTask *&rescan_task); + int handle_rescan_thread_finish(const int ret_code); private: ObTableLoadStoreCtx * const store_ctx_; const ObTableLoadParam ¶m_; @@ -53,7 +61,9 @@ private: storage::ObDirectLoadMergeCtx merge_ctx_; mutable lib::ObMutex mutex_; ObDirectLoadMergeTaskIterator merge_task_iter_; + ObDirectLoadRescanTaskIterator rescan_task_iter_; common::ObDList merging_list_; + common::ObDList rescan_list_; int64_t running_thread_count_ CACHE_ALIGNED; volatile bool has_error_; volatile bool is_stop_; diff --git a/src/observer/table_load/ob_table_load_redef_table.cpp b/src/observer/table_load/ob_table_load_redef_table.cpp index 3c2ffe180..21bb1e147 100644 --- a/src/observer/table_load/ob_table_load_redef_table.cpp +++ b/src/observer/table_load/ob_table_load_redef_table.cpp @@ -38,7 +38,6 @@ int ObTableLoadRedefTable::start(const ObTableLoadRedefTableStartArg &arg, const int64_t origin_timeout_ts = THIS_WORKER.get_timeout_ts(); ObCreateHiddenTableArg create_table_arg; ObCreateHiddenTableRes create_table_res; - int64_t snapshot_version = OB_INVALID_VERSION; create_table_arg.reset(); create_table_arg.exec_tenant_id_ = arg.tenant_id_; create_table_arg.tenant_id_ = arg.tenant_id_; @@ -55,16 +54,16 @@ int ObTableLoadRedefTable::start(const ObTableLoadRedefTableStartArg &arg, create_table_arg.consumer_group_id_ = THIS_WORKER.get_group_id(); if (OB_FAIL(create_table_arg.tz_info_wrap_.deep_copy(session_info.get_tz_info_wrap()))) { LOG_WARN("failed to deep copy tz_info_wrap", KR(ret)); - } else if (OB_FAIL(ObDDLServerClient::create_hidden_table(create_table_arg, create_table_res, snapshot_version, session_info))) { + } else if (OB_FAIL(ObDDLServerClient::create_hidden_table(create_table_arg, create_table_res, + res.snapshot_version_, res.data_format_version_, session_info))) { LOG_WARN("failed to create hidden table", KR(ret), K(create_table_arg)); - } else if (OB_UNLIKELY(snapshot_version <= 0)) { + } else if (OB_UNLIKELY(res.snapshot_version_ <= 0 || res.data_format_version_ <= 0)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid snapshot version", K(ret)); + LOG_WARN("invalid snapshot version", K(ret), K(res)); } else { res.dest_table_id_ = create_table_res.dest_table_id_; res.task_id_ = create_table_res.task_id_; res.schema_version_ = create_table_res.schema_version_; - res.snapshot_version_ = snapshot_version; LOG_INFO("succeed to create hidden table", K(arg), K(res)); } THIS_WORKER.set_timeout_ts(origin_timeout_ts); diff --git a/src/observer/table_load/ob_table_load_redef_table.h b/src/observer/table_load/ob_table_load_redef_table.h index a55b8d15e..b400dbf4d 100644 --- a/src/observer/table_load/ob_table_load_redef_table.h +++ b/src/observer/table_load/ob_table_load_redef_table.h @@ -57,7 +57,7 @@ struct ObTableLoadRedefTableStartRes { public: ObTableLoadRedefTableStartRes() - : dest_table_id_(common::OB_INVALID_ID), task_id_(0), schema_version_(0), snapshot_version_(0) + : dest_table_id_(common::OB_INVALID_ID), task_id_(0), schema_version_(0), snapshot_version_(0), data_format_version_(0) { } ~ObTableLoadRedefTableStartRes() = default; @@ -67,13 +67,15 @@ public: task_id_ = 0; schema_version_ = 0; snapshot_version_ = 0; + data_format_version_ = 0; } - TO_STRING_KV(K_(dest_table_id), K_(task_id), K_(schema_version), K_(snapshot_version)); + TO_STRING_KV(K_(dest_table_id), K_(task_id), K_(schema_version), K_(snapshot_version), K_(data_format_version)); public: uint64_t dest_table_id_; int64_t task_id_; int64_t schema_version_; int64_t snapshot_version_; + uint64_t data_format_version_; }; struct ObTableLoadRedefTableFinishArg diff --git a/src/observer/table_load/ob_table_load_schema.cpp b/src/observer/table_load/ob_table_load_schema.cpp index 471156374..fadcfd960 100644 --- a/src/observer/table_load/ob_table_load_schema.cpp +++ b/src/observer/table_load/ob_table_load_schema.cpp @@ -195,10 +195,12 @@ ObTableLoadSchema::ObTableLoadSchema() : allocator_("TLD_Schema"), is_partitioned_table_(false), is_heap_table_(false), + is_column_store_(false), has_autoinc_column_(false), has_identity_column_(false), rowkey_column_count_(0), store_column_count_(0), + lob_column_cnt_(0), collation_type_(CS_TYPE_INVALID), schema_version_(0), is_inited_(false) @@ -217,10 +219,12 @@ void ObTableLoadSchema::reset() table_name_.reset(); is_partitioned_table_ = false; is_heap_table_ = false; + is_column_store_ = false; has_autoinc_column_ = false; has_identity_column_ = false; rowkey_column_count_ = 0; store_column_count_ = 0; + lob_column_cnt_ = 0; collation_type_ = CS_TYPE_INVALID; schema_version_ = 0; column_descs_.reset(); @@ -262,6 +266,7 @@ int ObTableLoadSchema::init_table_schema(const ObTableSchema *table_schema) } else { is_partitioned_table_ = table_schema->is_partitioned_table(); is_heap_table_ = table_schema->is_heap_table(); + is_column_store_ = (table_schema->get_column_group_count() > 1) ? true :false; has_autoinc_column_ = (table_schema->get_autoinc_column_id() != 0); rowkey_column_count_ = table_schema->get_rowkey_column_num(); collation_type_ = table_schema->get_collation_type(); @@ -282,6 +287,8 @@ int ObTableLoadSchema::init_table_schema(const ObTableSchema *table_schema) } else if (OB_FAIL(datum_utils_.init(multi_version_column_descs_, rowkey_column_count_, lib::is_oracle_mode(), allocator_))) { LOG_WARN("fail to init datum utils", KR(ret)); + } else if (OB_FAIL(init_lob_storage(column_descs_))) { + LOG_WARN("fail to check lob storage", KR(ret)); } else if (OB_FAIL(init_cmp_funcs(column_descs_, lib::is_oracle_mode()))) { LOG_WARN("fail to init cmp funcs", KR(ret)); } @@ -317,6 +324,20 @@ int ObTableLoadSchema::init_table_schema(const ObTableSchema *table_schema) return ret; } +int ObTableLoadSchema::init_lob_storage(common::ObIArray &column_descs) +{ + int ret = OB_SUCCESS; + lob_column_cnt_ = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < column_descs.count(); ++i) { + const ObColDesc &col_desc = column_descs.at(i); + if (col_desc.col_type_.is_lob_storage()) { + column_descs.at(i).col_type_.set_has_lob_header(); + ++lob_column_cnt_; + } + } + return ret; +} + int ObTableLoadSchema::init_cmp_funcs(const ObIArray &col_descs, const bool is_oracle_mode) { diff --git a/src/observer/table_load/ob_table_load_schema.h b/src/observer/table_load/ob_table_load_schema.h index 12adbd046..3ff849aac 100644 --- a/src/observer/table_load/ob_table_load_schema.h +++ b/src/observer/table_load/ob_table_load_schema.h @@ -55,22 +55,25 @@ public: K_(collation_type), K_(column_descs), K_(is_inited)); private: int init_table_schema(const share::schema::ObTableSchema *table_schema); + int init_cmp_funcs(const common::ObIArray &column_descs, + const bool is_oracle_mode); + int init_lob_storage(common::ObIArray &column_descs); int update_decimal_int_precision(const share::schema::ObTableSchema *table_schema, common::ObIArray &cols_desc); int prepare_col_desc(const ObTableSchema *table_schema, common::ObIArray &col_descs); - int init_cmp_funcs(const common::ObIArray &column_descs, - const bool is_oracle_mode); public: common::ObArenaAllocator allocator_; common::ObString table_name_; bool is_partitioned_table_; bool is_heap_table_; + bool is_column_store_; bool has_autoinc_column_; bool has_identity_column_; int64_t rowkey_column_count_; // column count in store, does not contain virtual generated columns int64_t store_column_count_; + int64_t lob_column_cnt_; common::ObCollationType collation_type_; int64_t schema_version_; // if it is a heap table, it contains hidden primary key column diff --git a/src/observer/table_load/ob_table_load_store.cpp b/src/observer/table_load/ob_table_load_store.cpp index b4661fe45..871e111d0 100644 --- a/src/observer/table_load/ob_table_load_store.cpp +++ b/src/observer/table_load/ob_table_load_store.cpp @@ -311,8 +311,6 @@ int ObTableLoadStore::commit(ObTableLoadResultInfo &result_info) ObTableLoadSqlStatistics sql_statistics; if (OB_FAIL(store_ctx_->check_status(ObTableLoadStatusType::MERGED))) { LOG_WARN("fail to check store status", KR(ret)); - } else if (OB_FAIL(store_ctx_->insert_table_ctx_->commit())) { - LOG_WARN("fail to commit insert table", KR(ret)); } else if (ctx_->schema_.has_autoinc_column_ && OB_FAIL(store_ctx_->commit_autoinc_value())) { LOG_WARN("fail to commit sync auto increment value", KR(ret)); } else if (param_.online_opt_stat_gather_ && diff --git a/src/observer/table_load/ob_table_load_store_ctx.cpp b/src/observer/table_load/ob_table_load_store_ctx.cpp index c3daa59b7..427b932e5 100644 --- a/src/observer/table_load/ob_table_load_store_ctx.cpp +++ b/src/observer/table_load/ob_table_load_store_ctx.cpp @@ -24,7 +24,6 @@ #include "share/sequence/ob_sequence_cache.h" #include "sql/engine/cmd/ob_load_data_utils.h" #include "storage/direct_load/ob_direct_load_data_block.h" -#include "storage/direct_load/ob_direct_load_fast_heap_table_ctx.h" #include "storage/direct_load/ob_direct_load_insert_table_ctx.h" #include "storage/direct_load/ob_direct_load_mem_context.h" #include "storage/direct_load/ob_direct_load_sstable_data_block.h" @@ -51,7 +50,6 @@ ObTableLoadStoreCtx::ObTableLoadStoreCtx(ObTableLoadTableCtx *ctx) insert_table_ctx_(nullptr), is_multiple_mode_(false), is_fast_heap_table_(false), - fast_heap_table_ctx_(nullptr), tmp_file_mgr_(nullptr), error_row_handler_(nullptr), sequence_schema_(&allocator_), @@ -88,6 +86,7 @@ int ObTableLoadStoreCtx::init( insert_table_param.ddl_task_id_ = ctx_->ddl_param_.task_id_; insert_table_param.execution_id_ = 1; //仓氐说暂时设置为1,不然后面检测过不了 insert_table_param.data_version_ = ctx_->ddl_param_.data_version_; + insert_table_param.reserved_parallel_ = ctx_->param_.session_count_; for (int64_t i = 0; OB_SUCC(ret) && i < partition_id_array.count(); ++i) { const ObLSID &ls_id = partition_id_array[i].ls_id_; const ObTableLoadPartitionId &part_tablet_id = partition_id_array[i].part_tablet_id_; @@ -138,7 +137,12 @@ int ObTableLoadStoreCtx::init( } if (OB_SUCC(ret)) { if (table_data_desc_.is_heap_table_) { - int64_t bucket_cnt = wa_mem_limit / (ctx_->param_.session_count_ * MACRO_BLOCK_WRITER_MEM_SIZE); + int64_t bucket_cnt = 0; + if (ctx_->schema_.lob_column_cnt_ > 0) { + bucket_cnt = wa_mem_limit / (ctx_->param_.session_count_ * MACRO_BLOCK_WRITER_MEM_SIZE * 2); + } else { + bucket_cnt = wa_mem_limit / (ctx_->param_.session_count_ * MACRO_BLOCK_WRITER_MEM_SIZE); + } if (ls_partition_ids_.count() <= bucket_cnt) { is_fast_heap_table_ = true; } else { @@ -154,7 +158,9 @@ int ObTableLoadStoreCtx::init( } } if (OB_FAIL(ret)) { - } else if (OB_FAIL(insert_table_param.ls_partition_ids_.assign(target_ls_partition_ids_))) { + } else if (OB_FAIL(insert_table_param.ls_partition_ids_.assign(ls_partition_ids_))) { + LOG_WARN("fail to assign ls tablet ids", KR(ret)); + } else if (OB_FAIL(insert_table_param.target_ls_partition_ids_.assign(target_ls_partition_ids_))) { LOG_WARN("fail to assign ls tablet ids", KR(ret)); } // init trans_allocator_ @@ -223,17 +229,6 @@ int ObTableLoadStoreCtx::init( else if (ctx_->schema_.has_identity_column_ && OB_FAIL(init_sequence())) { LOG_WARN("fail to init sequence", KR(ret)); } - if (OB_SUCC(ret) && is_fast_heap_table_) { - if (OB_ISNULL(fast_heap_table_ctx_ = - OB_NEWx(ObDirectLoadFastHeapTableContext, (&allocator_)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to new ObDirectLoadFastHeapTableContext", KR(ret)); - } else if (OB_FAIL(fast_heap_table_ctx_->init(ctx_->param_.tenant_id_, ls_partition_ids_, - target_ls_partition_ids_, - ctx_->param_.session_count_))) { - LOG_WARN("fail to init fast heap table ctx", KR(ret)); - } - } if (OB_SUCC(ret)) { is_inited_ = true; } else { @@ -293,11 +288,6 @@ void ObTableLoadStoreCtx::destroy() allocator_.free(insert_table_ctx_); insert_table_ctx_ = nullptr; } - if (nullptr != fast_heap_table_ctx_) { - fast_heap_table_ctx_->~ObDirectLoadFastHeapTableContext(); - allocator_.free(fast_heap_table_ctx_); - fast_heap_table_ctx_ = nullptr; - } if (nullptr != tmp_file_mgr_) { tmp_file_mgr_->~ObDirectLoadTmpFileManager(); allocator_.free(tmp_file_mgr_); diff --git a/src/observer/table_load/ob_table_load_store_ctx.h b/src/observer/table_load/ob_table_load_store_ctx.h index f46e143b6..1df46bda5 100644 --- a/src/observer/table_load/ob_table_load_store_ctx.h +++ b/src/observer/table_load/ob_table_load_store_ctx.h @@ -25,7 +25,6 @@ namespace oceanbase namespace storage { class ObDirectLoadInsertTableContext; -class ObDirectLoadFastHeapTableContext; class ObDirectLoadTmpFileManager; } // namespace storage namespace share @@ -146,7 +145,6 @@ public: storage::ObDirectLoadInsertTableContext *insert_table_ctx_; bool is_multiple_mode_; bool is_fast_heap_table_; - storage::ObDirectLoadFastHeapTableContext *fast_heap_table_ctx_; storage::ObDirectLoadTmpFileManager *tmp_file_mgr_; ObTableLoadErrorRowHandler *error_row_handler_; share::schema::ObSequenceSchema sequence_schema_; diff --git a/src/observer/table_load/ob_table_load_trans_store.cpp b/src/observer/table_load/ob_table_load_trans_store.cpp index 4c48976c8..8b4e664df 100644 --- a/src/observer/table_load/ob_table_load_trans_store.cpp +++ b/src/observer/table_load/ob_table_load_trans_store.cpp @@ -210,13 +210,14 @@ int ObTableLoadTransStoreWriter::init_session_ctx_array() param.table_data_desc_ = *table_data_desc_; param.datum_utils_ = &(trans_ctx_->ctx_->schema_.datum_utils_); param.col_descs_ = &(trans_ctx_->ctx_->schema_.column_descs_); + param.lob_column_cnt_ = trans_ctx_->ctx_->schema_.lob_column_cnt_; param.cmp_funcs_ = &(trans_ctx_->ctx_->schema_.cmp_funcs_); param.file_mgr_ = trans_ctx_->ctx_->store_ctx_->tmp_file_mgr_; param.is_multiple_mode_ = trans_ctx_->ctx_->store_ctx_->is_multiple_mode_; param.is_fast_heap_table_ = trans_ctx_->ctx_->store_ctx_->is_fast_heap_table_; param.online_opt_stat_gather_ = trans_ctx_->ctx_->param_.online_opt_stat_gather_; + param.px_mode_ = trans_ctx_->ctx_->param_.px_mode_; param.insert_table_ctx_ = trans_ctx_->ctx_->store_ctx_->insert_table_ctx_; - param.fast_heap_table_ctx_ = trans_ctx_->ctx_->store_ctx_->fast_heap_table_ctx_; param.dml_row_handler_ = trans_ctx_->ctx_->store_ctx_->error_row_handler_; for (int64_t i = 0; OB_SUCC(ret) && i < session_count; ++i) { SessionContext *session_ctx = session_ctx_array_ + i; diff --git a/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.cpp b/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.cpp index 5b6d9c8c1..abd07f208 100644 --- a/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.cpp +++ b/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.cpp @@ -122,7 +122,7 @@ int ObAllVirtualTabletDDLKVInfo::get_next_ddl_kv(ObDDLKV *&ddl_kv) int ret = OB_SUCCESS; ObTabletHandle tablet_handle; while (OB_SUCC(ret)) { - if (ddl_kv_idx_ < 0 || ddl_kv_idx_ >= ddl_kvs_handle_.get_count()) { + if (ddl_kv_idx_ < 0 || ddl_kv_idx_ >= ddl_kvs_handle_.count()) { ObDDLKvMgrHandle ddl_kv_mgr_handle; if (OB_FAIL(get_next_ddl_kv_mgr(ddl_kv_mgr_handle))) { if (OB_ITER_END != ret) { @@ -130,13 +130,13 @@ int ObAllVirtualTabletDDLKVInfo::get_next_ddl_kv(ObDDLKV *&ddl_kv) } } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_ddl_kvs(false/*frozen_only*/, ddl_kvs_handle_))) { SERVER_LOG(WARN, "fail to get ddl kvs", K(ret)); - } else if (ddl_kvs_handle_.get_count() > 0) { + } else if (ddl_kvs_handle_.count() > 0) { ddl_kv_idx_ = 0; } } - if (OB_SUCC(ret) && ddl_kv_idx_ >= 0 && ddl_kv_idx_ < ddl_kvs_handle_.get_count()) { - ddl_kv = static_cast(ddl_kvs_handle_.get_table(ddl_kv_idx_)); + if (OB_SUCC(ret) && ddl_kv_idx_ >= 0 && ddl_kv_idx_ < ddl_kvs_handle_.count()) { + ddl_kv = ddl_kvs_handle_.at(ddl_kv_idx_).get_obj(); if (OB_ISNULL(ddl_kv)) { ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "fail to get ddl kv", K(ret), K(ddl_kv_idx_)); diff --git a/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.h b/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.h index 6328a2ce6..d65d50a45 100644 --- a/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.h +++ b/src/observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.h @@ -48,7 +48,7 @@ private: int64_t ls_id_; ObSharedGuard ls_iter_guard_; storage::ObLSTabletIterator ls_tablet_iter_; - ObTablesHandleArray ddl_kvs_handle_; + ObArray ddl_kvs_handle_; common::ObTabletID curr_tablet_id_; int64_t ddl_kv_idx_; char ip_buf_[common::OB_IP_STR_BUFF]; diff --git a/src/rootserver/ddl_task/ob_column_redefinition_task.cpp b/src/rootserver/ddl_task/ob_column_redefinition_task.cpp index 099a03b06..241b83750 100644 --- a/src/rootserver/ddl_task/ob_column_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_column_redefinition_task.cpp @@ -41,24 +41,24 @@ ObColumnRedefinitionTask::~ObColumnRedefinitionTask() int ObColumnRedefinitionTask::init(const uint64_t tenant_id, const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t schema_version, const int64_t parallelism, const int64_t consumer_group_id, - const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, + const uint64_t tenant_data_version, const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; - uint64_t tenant_data_format_version = 0; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("ObColumnRedefinitionTask has already been inited", K(ret)); - } else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == data_table_id || OB_INVALID_ID == dest_table_id || schema_version <= 0 || task_status < ObDDLTaskStatus::PREPARE + } else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == data_table_id || OB_INVALID_ID == dest_table_id || schema_version <= 0 + || tenant_data_version <= 0 || task_status < ObDDLTaskStatus::PREPARE || task_status > ObDDLTaskStatus::SUCCESS || snapshot_version < 0 || (snapshot_version > 0 && task_status < ObDDLTaskStatus::WAIT_TRANS_END))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(data_table_id), K(dest_table_id), K(schema_version), K(task_status), K(snapshot_version)); + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(data_table_id), K(dest_table_id), K(schema_version), + K(tenant_data_version), K(task_status), K(snapshot_version)); LOG_WARN("fail to init task table operator", K(ret)); } else if (OB_FAIL(deep_copy_table_arg(allocator_, alter_table_arg, alter_table_arg_))) { LOG_WARN("deep copy alter table arg failed", K(ret)); } else if (OB_FAIL(set_ddl_stmt_str(alter_table_arg_.ddl_stmt_str_))) { LOG_WARN("set ddl stmt str failed", K(ret)); - } else if (OB_FAIL(ObShareUtil::fetch_current_data_version(*GCTX.sql_proxy_, tenant_id, tenant_data_format_version))) { - LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { set_gmt_create(ObTimeUtility::current_time()); task_type_ = ddl_type; @@ -83,7 +83,7 @@ int ObColumnRedefinitionTask::init(const uint64_t tenant_id, const int64_t task_ alter_table_arg_.alter_table_schema_.set_tenant_id(tenant_id_); alter_table_arg_.alter_table_schema_.set_schema_version(schema_version_); alter_table_arg_.exec_tenant_id_ = dst_tenant_id_; - data_format_version_ = tenant_data_format_version; + data_format_version_ = tenant_data_version; is_inited_ = true; ddl_tracing_.open(); } @@ -284,6 +284,7 @@ int ObColumnRedefinitionTask::copy_table_indexes() &create_index_arg, task_id_); param.sub_task_trace_id_ = sub_task_trace_id_; + param.tenant_data_version_ = data_format_version_; if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { diff --git a/src/rootserver/ddl_task/ob_column_redefinition_task.h b/src/rootserver/ddl_task/ob_column_redefinition_task.h index 6593fd872..fbcbd2aaf 100644 --- a/src/rootserver/ddl_task/ob_column_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_column_redefinition_task.h @@ -39,6 +39,7 @@ public: const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, + const uint64_t tenant_data_version, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); int init(const ObDDLTaskRecord &task_record); diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp index 3eaa01fc1..6945d0621 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp @@ -51,13 +51,14 @@ ObDDLRedefinitionSSTableBuildTask::ObDDLRedefinitionSSTableBuildTask( const bool is_mview_complete_refresh, const int64_t mview_table_id, ObRootService *root_service, - const common::ObAddr &inner_sql_exec_addr) + const common::ObAddr &inner_sql_exec_addr, + const int64_t data_format_version) : is_inited_(false), tenant_id_(tenant_id), task_id_(task_id), data_table_id_(data_table_id), dest_table_id_(dest_table_id), schema_version_(schema_version), snapshot_version_(snapshot_version), execution_id_(execution_id), consumer_group_id_(consumer_group_id), sql_mode_(sql_mode), trace_id_(trace_id), parallelism_(parallelism), use_heap_table_ddl_plan_(use_heap_table_ddl_plan), is_mview_complete_refresh_(is_mview_complete_refresh), mview_table_id_(mview_table_id), - root_service_(root_service), inner_sql_exec_addr_(inner_sql_exec_addr) + root_service_(root_service), inner_sql_exec_addr_(inner_sql_exec_addr), data_format_version_(0) { set_retry_times(0); // do not retry } @@ -196,10 +197,41 @@ int ObDDLRedefinitionSSTableBuildTask::process() LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, oracle_mode ? ObCompatibilityMode::ORACLE_MODE : ObCompatibilityMode::MYSQL_MODE, &session_param, sql_exec_addr))) { - LOG_WARN("fail to execute build replica sql", K(ret), K(tenant_id_)); - } else if (OB_FAIL(ObCheckTabletDataComplementOp::check_finish_report_checksum(tenant_id_, dest_table_id_, execution_id_, task_id_))) { - LOG_WARN("fail to check sstable checksum_report_finish", - K(ret), K(tenant_id_), K(dest_table_id_), K(execution_id_), K(task_id_)); + if (ret == OB_SERVER_OUTOF_DISK_SPACE && + data_format_version_ >= DATA_VERSION_4_3_0_0) { + // if version >= 4.3.0, would retry with compression. + int tmp_ret = OB_SUCCESS; + sql_string.reuse(); + SortCompactLevel compress_level = SORT_COMPRESSION_LEVEL; + if (OB_SUCCESS != (tmp_ret = ObDDLUtil::generate_build_replica_sql(tenant_id_, data_table_id_, + dest_table_id_, + data_table_schema->get_schema_version(), + snapshot_version_, + execution_id_, + task_id_, + parallelism_, + use_heap_table_ddl_plan_, + true, + &col_name_map_, + sql_string, + compress_level))) { + LOG_WARN("fail to generate build replica sql", K(tmp_ret)); + } else if (OB_SUCCESS != (tmp_ret = user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, + oracle_mode ? ObCompatibilityMode::ORACLE_MODE : ObCompatibilityMode::MYSQL_MODE, + &session_param, sql_exec_addr))) { + LOG_WARN("fail to execute build replica sql", K(tmp_ret), K(tenant_id_)); + } else { + ret = OB_SUCCESS; + } + } else { + LOG_WARN("fail to execute build replica sql", K(ret), K(tenant_id_)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObCheckTabletDataComplementOp::check_finish_report_checksum(tenant_id_, dest_table_id_, execution_id_, task_id_))) { + LOG_WARN("fail to check sstable checksum_report_finish", + K(ret), K(tenant_id_), K(dest_table_id_), K(execution_id_), K(task_id_)); + } } } } @@ -251,7 +283,8 @@ ObAsyncTask *ObDDLRedefinitionSSTableBuildTask::deep_copy(char *buf, const int64 is_mview_complete_refresh_, mview_table_id_, root_service_, - inner_sql_exec_addr_); + inner_sql_exec_addr_, + data_format_version_); if (OB_FAIL(new_task->tz_info_wrap_.deep_copy(tz_info_wrap_))) { LOG_WARN("failed to copy tz info wrap", K(ret)); } else if (OB_FAIL(new_task->col_name_map_.assign(col_name_map_))) { diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h index 6800d56e8..9613fa3c6 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h @@ -41,7 +41,8 @@ public: const bool is_mview_complete_refresh, const int64_t mview_table_id, ObRootService *root_service, - const common::ObAddr &inner_sql_exec_addr); + const common::ObAddr &inner_sql_exec_addr, + const int64_t data_format_version = 0); int init( const ObTableSchema &orig_table_schema, const AlterTableSchema &alter_table_schema, @@ -74,6 +75,7 @@ private: common::ObArray based_schema_object_infos_; ObRootService *root_service_; common::ObAddr inner_sql_exec_addr_; + int64_t data_format_version_; }; class ObSyncTabletAutoincSeqCtx final diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp index 0ea7c9c5c..09c5f2df5 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp @@ -936,6 +936,23 @@ void ObDDLScheduler::run1() } } +int ObDDLScheduler::check_conflict_with_upgrade( + const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLScheduler has not been inited", K(ret)); + } else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(tenant_id)); + } else if (GCONF.in_upgrade_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("Ddl task is disallowed to create when upgrading", K(ret)); + } + return ret; +} + int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, ObISQLClient &proxy, ObDDLTaskRecord &task_record) @@ -947,17 +964,12 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, const obrpc::ObDropIndexArg *drop_index_arg = nullptr; const obrpc::ObMViewCompleteRefreshArg *mview_complete_refresh_arg = nullptr; ObRootService *root_service = GCTX.root_service_; - uint64_t tenant_id = param.tenant_id_; - uint64_t compat_version = 0; LOG_INFO("create ddl task", K(param)); - if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { - LOG_WARN("fail to get data version", K(ret), K(tenant_id)); - } else if (compat_version < DATA_VERSION_4_1_0_0 && GCONF.in_upgrade_mode()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("4.0 is being upgrade to 4.1, create_ddl_task not supported", K(ret)); - } else if (OB_UNLIKELY(!is_inited_)) { + if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLScheduler has not been inited", K(ret)); + } else if (OB_FAIL(check_conflict_with_upgrade(param.tenant_id_))) { + LOG_WARN("conflict with upgrade", K(ret), K(param)); } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); @@ -978,6 +990,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.consumer_group_id_, param.sub_task_trace_id_, create_index_arg, + param.tenant_data_version_, *param.allocator_, task_record))) { LOG_WARN("fail to create build index task", K(ret)); @@ -1007,6 +1020,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, case DDL_TABLE_REDEFINITION: case DDL_DIRECT_LOAD: case DDL_DIRECT_LOAD_INSERT: + case DDL_ALTER_COLUMN_GROUP: case DDL_MVIEW_COMPLETE_REFRESH: if (OB_FAIL(create_table_redefinition_task(proxy, param.type_, @@ -1017,6 +1031,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.task_id_, param.sub_task_trace_id_, static_cast(param.ddl_arg_), + param.tenant_data_version_, *param.allocator_, task_record))) { LOG_WARN("fail to create table redefinition task", K(ret)); @@ -1045,6 +1060,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.task_id_, param.sub_task_trace_id_, static_cast(param.ddl_arg_), + param.tenant_data_version_, *param.allocator_, task_record))) { LOG_WARN("fail to create recover restore table task", K(ret)); @@ -1061,6 +1077,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.task_id_, param.sub_task_trace_id_, alter_table_arg, + param.tenant_data_version_, *param.allocator_, task_record))) { LOG_WARN("fail to create table redefinition task", K(ret)); @@ -1095,6 +1112,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.task_id_, param.sub_task_trace_id_, static_cast(param.ddl_arg_), + param.tenant_data_version_, *param.allocator_, task_record))) { LOG_WARN("fail to create column redefinition task", K(ret)); @@ -1486,6 +1504,7 @@ int ObDDLScheduler::create_build_index_task( const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg *create_index_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record) { @@ -1495,9 +1514,10 @@ int ObDDLScheduler::create_build_index_task( if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else if (OB_ISNULL(create_index_arg) || OB_ISNULL(data_table_schema) || OB_ISNULL(index_schema)) { + } else if (OB_ISNULL(create_index_arg) || OB_ISNULL(data_table_schema) || OB_ISNULL(index_schema) + || OB_UNLIKELY(tenant_data_version <= 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(create_index_arg), K(data_table_schema), K(index_schema)); + LOG_WARN("invalid argument", K(ret), K(create_index_arg), K(data_table_schema), K(index_schema), K(tenant_data_version)); } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), data_table_schema->get_tenant_id(), task_id))) { LOG_WARN("fetch new task id failed", K(ret)); } else if (OB_FAIL(index_task.init(data_table_schema->get_tenant_id(), @@ -1510,7 +1530,8 @@ int ObDDLScheduler::create_build_index_task( consumer_group_id, sub_task_trace_id, *create_index_arg, - parent_task_id))) { + parent_task_id, + tenant_data_version))) { LOG_WARN("init global index task failed", K(ret), K(data_table_schema), K(index_schema)); } else if (OB_FAIL(index_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); @@ -1618,29 +1639,30 @@ int ObDDLScheduler::create_table_redefinition_task( const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; + int64_t target_cg_cnt = 0; SMART_VAR(ObTableRedefinitionTask, redefinition_task) { if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLScheduler has not been inited", K(ret)); - } else if (OB_UNLIKELY(0 == task_id) || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { + } else if (OB_UNLIKELY(0 == task_id || tenant_data_version <= 0) || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema)); - } else if (OB_FAIL(redefinition_task.init(src_schema->get_tenant_id(), - dest_schema->get_tenant_id(), + LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema), K(tenant_data_version)); + } else if (OB_FAIL(dest_schema->get_store_column_group_count(target_cg_cnt))) { + LOG_WARN("fail to get target_cg_cnt", K(ret), K(dest_schema)); + } else if (OB_FAIL(redefinition_task.init(src_schema, + dest_schema, task_id, type, - src_schema->get_table_id(), - dest_schema->get_table_id(), - dest_schema->get_schema_version(), - dest_schema->get_schema_version(), parallelism, consumer_group_id, sub_task_trace_id, - *alter_table_arg))) { + *alter_table_arg, + tenant_data_version))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(redefinition_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); @@ -1662,27 +1684,30 @@ int ObDDLScheduler::create_drop_primary_key_task( const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; + int64_t target_cg_cnt = 0; SMART_VAR(ObDropPrimaryKeyTask, drop_pk_task) { if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLScheduler has not been inited", K(ret)); - } else if (OB_UNLIKELY(0 == task_id) || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { + } else if (OB_UNLIKELY(0 == task_id || tenant_data_version <= 0) || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema)); - } else if (OB_FAIL(drop_pk_task.init(src_schema->get_tenant_id(), + LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema), K(tenant_data_version)); + } else if (OB_FAIL(dest_schema->get_store_column_group_count(target_cg_cnt))) { + LOG_WARN("fail to get target_store_cg_cnt", K(ret), KPC(dest_schema)); + } else if (OB_FAIL(drop_pk_task.init(src_schema, + dest_schema, task_id, type, - src_schema->get_table_id(), - dest_schema->get_table_id(), - dest_schema->get_schema_version(), parallelism, consumer_group_id, sub_task_trace_id, - *alter_table_arg))) { + *alter_table_arg, + tenant_data_version))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(drop_pk_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); @@ -1704,17 +1729,20 @@ int ObDDLScheduler::create_column_redefinition_task( const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; + int64_t target_cg_cnt = 0; SMART_VAR(ObColumnRedefinitionTask, redefinition_task) { if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLScheduler has not been inited", K(ret)); - } else if (OB_UNLIKELY(0 == task_id) || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { + } else if (OB_UNLIKELY(0 == task_id || tenant_data_version <= 0) + || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema)); + LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema), K(tenant_data_version)); } else if (OB_FAIL(redefinition_task.init(src_schema->get_tenant_id(), task_id, type, @@ -1724,7 +1752,8 @@ int ObDDLScheduler::create_column_redefinition_task( parallelism, consumer_group_id, sub_task_trace_id, - *alter_table_arg))) { + *alter_table_arg, + tenant_data_version))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(redefinition_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); @@ -1816,29 +1845,31 @@ int ObDDLScheduler::create_recover_restore_table_task( const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; + int64_t target_cg_cnt = 0; SMART_VAR(ObRecoverRestoreTableTask, redefinition_task) { if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLScheduler has not been inited", K(ret)); - } else if (OB_UNLIKELY(0 == task_id) || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { + } else if (OB_UNLIKELY(0 == task_id || tenant_data_version <= 0) + || OB_ISNULL(alter_table_arg) || OB_ISNULL(src_schema) || OB_ISNULL(dest_schema)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema)); - } else if (OB_FAIL(redefinition_task.init(src_schema->get_tenant_id(), - dest_schema->get_tenant_id(), + LOG_WARN("invalid arguments", K(ret), K(task_id), KP(alter_table_arg), KP(src_schema), KP(dest_schema), K(tenant_data_version)); + } else if (OB_FAIL(dest_schema->get_store_column_group_count(target_cg_cnt))) { + LOG_WARN("fail to get store cg cnt", K(ret), KPC(dest_schema)); + } else if (OB_FAIL(redefinition_task.init(src_schema, + dest_schema, task_id, type, - src_schema->get_table_id(), - dest_schema->get_table_id(), - src_schema->get_schema_version(), - dest_schema->get_schema_version(), parallelism, consumer_group_id, sub_task_trace_id, - *alter_table_arg))) { + *alter_table_arg, + tenant_data_version))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(redefinition_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); @@ -2033,6 +2064,7 @@ int ObDDLScheduler::schedule_ddl_task(const ObDDLTaskRecord &record) case DDL_TABLE_REDEFINITION: case DDL_DIRECT_LOAD: case DDL_DIRECT_LOAD_INSERT: + case DDL_ALTER_COLUMN_GROUP: case DDL_MVIEW_COMPLETE_REFRESH: ret = schedule_table_redefinition_task(record); break; @@ -2602,6 +2634,7 @@ int ObDDLScheduler::on_sstable_complement_job_reply( case ObDDLType::DDL_MODIFY_COLUMN: case ObDDLType::DDL_CONVERT_TO_CHARACTER: case ObDDLType::DDL_TABLE_REDEFINITION: + case ObDDLType::DDL_ALTER_COLUMN_GROUP: case ObDDLType::DDL_MVIEW_COMPLETE_REFRESH: if (OB_FAIL(static_cast(&task)->update_complete_sstable_job_status(tablet_id, snapshot_version, execution_id, ret_code, addition_info))) { LOG_WARN("update complete sstable job status", K(ret)); @@ -2734,6 +2767,7 @@ int ObDDLScheduler::notify_update_autoinc_end(const ObDDLTaskKey &task_key, case ObDDLType::DDL_TABLE_REDEFINITION: case ObDDLType::DDL_DIRECT_LOAD: case ObDDLType::DDL_DIRECT_LOAD_INSERT: + case ObDDLType::DDL_ALTER_COLUMN_GROUP: case ObDDLType::DDL_MVIEW_COMPLETE_REFRESH: if (OB_FAIL(static_cast(&task)->notify_update_autoinc_finish(autoinc_val, ret_code))) { LOG_WARN("update complete sstable job status", K(ret)); diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.h b/src/rootserver/ddl_task/ob_ddl_scheduler.h index e9ca73563..f392b5673 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.h +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.h @@ -362,6 +362,7 @@ private: const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg *create_index_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record); int create_constraint_task( @@ -395,6 +396,7 @@ private: const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -408,6 +410,7 @@ private: const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -421,6 +424,7 @@ private: const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -470,6 +474,7 @@ private: const int64_t task_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, + const uint64_t tenant_data_version, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -490,6 +495,8 @@ private: int remove_task_from_longops_mgr(ObDDLTask *ddl_task); int remove_ddl_task(ObDDLTask *ddl_task); void add_event_info(const ObDDLTaskRecord &ddl_record, const ObString &ddl_event_stmt); + int check_conflict_with_upgrade( + const uint64_t tenant_id); private: static const int64_t TOTAL_LIMIT = 1024L * 1024L * 1024L; diff --git a/src/rootserver/ddl_task/ob_ddl_task.cpp b/src/rootserver/ddl_task/ob_ddl_task.cpp index 00c678987..1d976f841 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_task.cpp @@ -130,7 +130,7 @@ int ObDDLTaskID::assign(const ObDDLTaskID &other) ObDDLTaskSerializeField::ObDDLTaskSerializeField(const int64_t task_version, const int64_t parallelism, - const int64_t data_format_version, + const uint64_t data_format_version, const int64_t consumer_group_id, const bool is_abort, const int32_t sub_task_trace_id) @@ -162,8 +162,9 @@ OB_SERIALIZE_MEMBER(ObDDLTaskSerializeField, sub_task_trace_id_); ObCreateDDLTaskParam::ObCreateDDLTaskParam() - : sub_task_trace_id_(0), tenant_id_(OB_INVALID_ID), object_id_(OB_INVALID_ID), schema_version_(0), parallelism_(0), consumer_group_id_(0), parent_task_id_(0), task_id_(0), - type_(DDL_INVALID), src_table_schema_(nullptr), dest_table_schema_(nullptr), ddl_arg_(nullptr), allocator_(nullptr) + : sub_task_trace_id_(0), tenant_id_(OB_INVALID_ID), object_id_(OB_INVALID_ID), schema_version_(0), parallelism_(0), + consumer_group_id_(0), parent_task_id_(0), task_id_(0), type_(DDL_INVALID), src_table_schema_(nullptr), + dest_table_schema_(nullptr), ddl_arg_(nullptr), allocator_(nullptr), tenant_data_version_(0) { } @@ -277,6 +278,7 @@ trace::ObSpanCtx* ObDDLTracing::begin_task_span() case DDL_ALTER_PARTITION_BY: case DDL_CONVERT_TO_CHARACTER: case DDL_TABLE_REDEFINITION: + case DDL_ALTER_COLUMN_GROUP: span = FLT_BEGIN_SPAN(ddl_table_redefinition); break; case DDL_DROP_PRIMARY_KEY: @@ -359,6 +361,7 @@ trace::ObSpanCtx* ObDDLTracing::restore_task_span() case DDL_ALTER_PARTITION_BY: case DDL_CONVERT_TO_CHARACTER: case DDL_TABLE_REDEFINITION: + case DDL_ALTER_COLUMN_GROUP: span = FLT_RESTORE_DDL_SPAN(ddl_table_redefinition, task_span_id_, task_start_ts_); break; case DDL_DROP_PRIMARY_KEY: @@ -784,6 +787,9 @@ int ObDDLTask::get_ddl_type_str(const int64_t ddl_type, const char *&ddl_type_st case DDL_DROP_INDEX: ddl_type_str = "drop index"; break; + case DDL_ALTER_COLUMN_GROUP: + ddl_type_str = "alter column group"; + break; case DDL_MVIEW_COMPLETE_REFRESH: ddl_type_str = "mview complete refresh"; break; @@ -1627,10 +1633,12 @@ int ObDDLTask::gather_inserted_rows( const uint64_t tenant_id, const int64_t task_id, ObMySQLProxy &sql_proxy, - int64_t &row_inserted) + int64_t &row_inserted_cg, + int64_t &row_inserted_file) { int ret = OB_SUCCESS; - row_inserted = 0; + row_inserted_cg = 0; + row_inserted_file = 0; char trace_id_str[OB_MAX_TRACE_ID_BUFFER_SIZE] = ""; trace_id_.to_string(trace_id_str, OB_MAX_TRACE_ID_BUFFER_SIZE); if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0)) { @@ -1641,7 +1649,7 @@ int ObDDLTask::gather_inserted_rows( sqlclient::ObMySQLResult *insert_result = NULL; SMART_VAR(ObMySQLProxy::MySQLResult, insert_res) { if (OB_FAIL(insert_sql.assign_fmt( - "SELECT OTHERSTAT_1_VALUE AS ROW_INSERTED FROM %s WHERE TENANT_ID=%lu " + "SELECT OTHERSTAT_1_VALUE AS CG_ROW_INSERTED, OTHERSTAT_2_VALUE AS SSTABLE_ROW_INSERTED FROM %s WHERE TENANT_ID=%lu " "AND TRACE_ID='%s' AND PLAN_OPERATION='PHY_PX_MULTI_PART_SSTABLE_INSERT' AND OTHERSTAT_5_VALUE='%ld'", OB_ALL_VIRTUAL_SQL_PLAN_MONITOR_TNAME, tenant_id, trace_id_str, task_id))) { LOG_WARN("failed to assign sql", K(ret)); @@ -1662,9 +1670,13 @@ int ObDDLTask::gather_inserted_rows( LOG_WARN("failed to get next row", K(ret)); } } else { - int64_t row_inserted_tmp = 0; - EXTRACT_INT_FIELD_MYSQL(*insert_result, "ROW_INSERTED", row_inserted_tmp, int64_t); - row_inserted += row_inserted_tmp; + int64_t row_inserted_cg_tmp = 0; + EXTRACT_INT_FIELD_MYSQL(*insert_result, "CG_ROW_INSERTED", row_inserted_cg_tmp, int64_t); + row_inserted_cg += row_inserted_cg_tmp; + + int64_t row_inserted_file_tmp = 0; + EXTRACT_INT_FIELD_MYSQL(*insert_result, "SSTABLE_ROW_INSERTED", row_inserted_file_tmp, int64_t); + row_inserted_file += row_inserted_file_tmp; } } } @@ -1677,12 +1689,14 @@ int ObDDLTask::gather_redefinition_stats(const uint64_t tenant_id, ObMySQLProxy &sql_proxy, int64_t &row_scanned, int64_t &row_sorted, - int64_t &row_inserted) + int64_t &row_inserted_cg, + int64_t &row_inserted_file) { int ret = OB_SUCCESS; row_scanned = 0; row_sorted = 0; - row_inserted = 0; + row_inserted_cg = 0; + row_inserted_file = 0; if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(task_id)); @@ -1690,7 +1704,7 @@ int ObDDLTask::gather_redefinition_stats(const uint64_t tenant_id, LOG_WARN("gather scanned rows failed", K(ret)); } else if (OB_FAIL(gather_sorted_rows(tenant_id, task_id, sql_proxy, row_sorted))) { LOG_WARN("gather sorted rows failed", K(ret)); - } else if (OB_FAIL(gather_inserted_rows(tenant_id, task_id, sql_proxy, row_inserted))) { + } else if (OB_FAIL(gather_inserted_rows(tenant_id, task_id, sql_proxy, row_inserted_cg, row_inserted_file))) { LOG_WARN("gather inserted rows failed", K(ret)); } return ret; @@ -2957,33 +2971,33 @@ int ObDDLTaskRecordOperator::check_has_conflict_ddl( LOG_WARN("failed to fill task record", K(ret)); } else if (task_record.task_id_ != task_id) { switch (ddl_type) { - case ObDDLType::DDL_DROP_TABLE: { - if (task_record.ddl_type_ == ObDDLType::DDL_DROP_INDEX && task_record.target_object_id_ != task_record.object_id_) { - LOG_WARN("conflict with ddl", K(task_record)); - has_conflict_ddl = true; + case ObDDLType::DDL_DROP_TABLE: { + if (task_record.ddl_type_ == ObDDLType::DDL_DROP_INDEX && task_record.target_object_id_ != task_record.object_id_) { + LOG_WARN("conflict with ddl", K(task_record)); + has_conflict_ddl = true; + } + break; } - break; - } - case ObDDLType::DDL_DOUBLE_TABLE_OFFLINE: - case ObDDLType::DDL_MODIFY_COLUMN: - case ObDDLType::DDL_ADD_PRIMARY_KEY: - case ObDDLType::DDL_DROP_PRIMARY_KEY: - case ObDDLType::DDL_ALTER_PRIMARY_KEY: - case ObDDLType::DDL_ALTER_PARTITION_BY: - case ObDDLType::DDL_DROP_COLUMN: - case ObDDLType::DDL_CONVERT_TO_CHARACTER: - case ObDDLType::DDL_ADD_COLUMN_OFFLINE: - case ObDDLType::DDL_COLUMN_REDEFINITION: - case ObDDLType::DDL_TABLE_REDEFINITION: - case ObDDLType::DDL_DIRECT_LOAD: - case ObDDLType::DDL_DIRECT_LOAD_INSERT: - case ObDDLType::DDL_MVIEW_COMPLETE_REFRESH: { - has_conflict_ddl = true; - break; - } - default: { - // do nothing - } + case ObDDLType::DDL_DOUBLE_TABLE_OFFLINE: + case ObDDLType::DDL_MODIFY_COLUMN: + case ObDDLType::DDL_ADD_PRIMARY_KEY: + case ObDDLType::DDL_DROP_PRIMARY_KEY: + case ObDDLType::DDL_ALTER_PRIMARY_KEY: + case ObDDLType::DDL_ALTER_PARTITION_BY: + case ObDDLType::DDL_DROP_COLUMN: + case ObDDLType::DDL_CONVERT_TO_CHARACTER: + case ObDDLType::DDL_ADD_COLUMN_OFFLINE: + case ObDDLType::DDL_COLUMN_REDEFINITION: + case ObDDLType::DDL_TABLE_REDEFINITION: + case ObDDLType::DDL_DIRECT_LOAD: + case ObDDLType::DDL_DIRECT_LOAD_INSERT: + case ObDDLType::DDL_ALTER_COLUMN_GROUP: + case ObDDLType::DDL_MVIEW_COMPLETE_REFRESH: + has_conflict_ddl = true; + break; + default: + // do nothing + break; } } } diff --git a/src/rootserver/ddl_task/ob_ddl_task.h b/src/rootserver/ddl_task/ob_ddl_task.h index ab145347b..40f42f4f8 100755 --- a/src/rootserver/ddl_task/ob_ddl_task.h +++ b/src/rootserver/ddl_task/ob_ddl_task.h @@ -117,7 +117,7 @@ public: ObDDLTaskSerializeField() : task_version_(0), parallelism_(0), data_format_version_(0), consumer_group_id_(0), is_abort_(false), sub_task_trace_id_(0) {} ObDDLTaskSerializeField(const int64_t task_version, const int64_t parallelism, - const int64_t data_format_version, + const uint64_t data_format_version, const int64_t consumer_group_id, const bool is_abort, const int32_t sub_task_trace_id); @@ -126,7 +126,7 @@ public: public: int64_t task_version_; int64_t parallelism_; - int64_t data_format_version_; + uint64_t data_format_version_; int64_t consumer_group_id_; bool is_abort_; int32_t sub_task_trace_id_; @@ -151,8 +151,8 @@ public: ~ObCreateDDLTaskParam() = default; bool is_valid() const { return OB_INVALID_ID != tenant_id_ && type_ > share::DDL_INVALID && type_ < share::DDL_MAX && nullptr != allocator_; } - TO_STRING_KV(K_(sub_task_trace_id), K_(tenant_id), K_(object_id), K_(schema_version), K_(parallelism), K_(consumer_group_id), K_(parent_task_id), K_(task_id), - K_(type), KPC_(src_table_schema), KPC_(dest_table_schema), KPC_(ddl_arg)); + TO_STRING_KV(K_(tenant_id), K_(object_id), K_(schema_version), K_(parallelism), K_(consumer_group_id), K_(parent_task_id), K_(task_id), + K_(type), KPC_(src_table_schema), KPC_(dest_table_schema), KPC_(ddl_arg), K_(tenant_data_version), K_(sub_task_trace_id)); public: int32_t sub_task_trace_id_; uint64_t tenant_id_; @@ -167,6 +167,7 @@ public: const ObTableSchema *dest_table_schema_; const obrpc::ObDDLArg *ddl_arg_; common::ObIAllocator *allocator_; + uint64_t tenant_data_version_; }; class ObDDLTaskRecordOperator final @@ -516,7 +517,7 @@ public: obrpc::ObDDLArg &dest_arg); void set_longops_stat(share::ObDDLLongopsStat *longops_stat) { longops_stat_ = longops_stat; } share::ObDDLLongopsStat *get_longops_stat() const { return longops_stat_; } - int64_t get_data_format_version() const { return data_format_version_; } + uint64_t get_data_format_version() const { return data_format_version_; } static int fetch_new_task_id(ObMySQLProxy &sql_proxy, const uint64_t tenant_id, int64_t &new_task_id); virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const; virtual int deserlize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos); @@ -575,7 +576,8 @@ protected: ObMySQLProxy &sql_proxy, int64_t &row_scanned, int64_t &row_sorted, - int64_t &row_inserted); + int64_t &row_inserted_cg, + int64_t &row_inserted_file); int gather_scanned_rows( const uint64_t tenant_id, const int64_t task_id, @@ -590,7 +592,8 @@ protected: const uint64_t tenant_id, const int64_t task_id, ObMySQLProxy &sql_proxy, - int64_t &row_inserted); + int64_t &row_inserted_cg, + int64_t &row_inserted_file); int copy_longops_stat(share::ObLongopsValue &value); virtual bool is_error_need_retry(const int ret_code) { @@ -636,7 +639,7 @@ protected: int64_t execution_id_; // guarded by lock_ common::ObAddr sql_exec_addr_; int64_t start_time_; - int64_t data_format_version_; + uint64_t data_format_version_; int64_t consumer_group_id_; }; diff --git a/src/rootserver/ddl_task/ob_drop_index_task.cpp b/src/rootserver/ddl_task/ob_drop_index_task.cpp index a715ee2eb..31489ccde 100644 --- a/src/rootserver/ddl_task/ob_drop_index_task.cpp +++ b/src/rootserver/ddl_task/ob_drop_index_task.cpp @@ -402,6 +402,13 @@ int ObDropIndexTask::check_switch_succ() LOG_WARN("error sys", K(ret)); } else if (OB_FAIL(refresh_schema_version())) { LOG_WARN("refresh schema version failed", K(ret)); + } else if (OB_FAIL(ObDDLUtil::check_tenant_status_normal(&root_service_->get_sql_proxy(), tenant_id_))) { + if (OB_TENANT_HAS_BEEN_DROPPED == ret || OB_STANDBY_READ_ONLY == ret) { + need_retry_ = false; + LOG_INFO("tenant status is abnormal, exit anyway", K(ret), K(tenant_id_)); + } else { + LOG_WARN("check tenant status failed", K(ret), K(tenant_id_)); + } } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { LOG_WARN("get tenant schema failed", K(ret), K(tenant_id_)); } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, target_object_id_, is_index_exist))) { diff --git a/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp b/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp index 514fb41b3..25c858b5a 100644 --- a/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp +++ b/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp @@ -38,15 +38,15 @@ ObDropPrimaryKeyTask::~ObDropPrimaryKeyTask() { } -int ObDropPrimaryKeyTask::init(const uint64_t tenant_id, const int64_t task_id, const share::ObDDLType &ddl_type, - const int64_t data_table_id, const int64_t dest_table_id, const int64_t schema_version, const int64_t parallelism, - const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, - const int64_t task_status, const int64_t snapshot_version) +int ObDropPrimaryKeyTask::init(const ObTableSchema* src_table_schema, const ObTableSchema* dst_table_schema, + const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t parallelism, + const int64_t consumer_group_id, const int32_t sub_task_trace_id, + const obrpc::ObAlterTableArg &alter_table_arg, const uint64_t tenant_data_version, + const int64_t task_status,const int64_t snapshot_version ) { int ret = OB_SUCCESS; - if (OB_FAIL(ObTableRedefinitionTask::init(tenant_id, tenant_id, task_id, ddl_type, data_table_id, - dest_table_id, schema_version, schema_version, parallelism, consumer_group_id, - sub_task_trace_id, alter_table_arg, task_status, snapshot_version))) { + if (OB_FAIL(ObTableRedefinitionTask::init(src_table_schema, dst_table_schema, task_id, ddl_type, parallelism, consumer_group_id, + sub_task_trace_id, alter_table_arg, tenant_data_version, task_status, snapshot_version))) { LOG_WARN("fail to init ObDropPrimaryKeyTask", K(ret)); } else { set_gmt_create(ObTimeUtility::current_time()); diff --git a/src/rootserver/ddl_task/ob_drop_primary_key_task.h b/src/rootserver/ddl_task/ob_drop_primary_key_task.h index e87352aa4..8862fac7f 100644 --- a/src/rootserver/ddl_task/ob_drop_primary_key_task.h +++ b/src/rootserver/ddl_task/ob_drop_primary_key_task.h @@ -28,16 +28,15 @@ public: ObDropPrimaryKeyTask(); virtual ~ObDropPrimaryKeyTask(); int init( - const uint64_t tenant_id, + const ObTableSchema* src_table_schema, + const ObTableSchema* dst_table_schema, const int64_t task_id, const share::ObDDLType &ddl_type, - const int64_t data_table_id, - const int64_t dest_table_id, - const int64_t schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, + const uint64_t tenant_data_version, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); virtual int process() override; diff --git a/src/rootserver/ddl_task/ob_index_build_task.cpp b/src/rootserver/ddl_task/ob_index_build_task.cpp index eeae552f7..dfe684c34 100755 --- a/src/rootserver/ddl_task/ob_index_build_task.cpp +++ b/src/rootserver/ddl_task/ob_index_build_task.cpp @@ -102,7 +102,8 @@ int ObIndexSSTableBuildTask::process() false/*use_heap_table_ddl*/, !data_schema->is_user_hidden_table()/*use_schema_version_hint_for_src_table*/, nullptr, - sql_string))) { + sql_string, + compact_level_))) { LOG_WARN("fail to generate build replica sql", K(ret)); } else if (OB_FAIL(data_schema->is_need_padding_for_generated_column(need_padding))) { LOG_WARN("fail to check need padding", K(ret)); @@ -146,10 +147,60 @@ int ObIndexSSTableBuildTask::process() LOG_WARN("ddl sim failure: create index build sstable failed", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, oracle_mode ? ObCompatibilityMode::ORACLE_MODE : ObCompatibilityMode::MYSQL_MODE, &session_param, sql_exec_addr))) { - LOG_WARN("fail to execute build replica sql", K(ret), K(tenant_id_)); - } else if (OB_FAIL(ObCheckTabletDataComplementOp::check_finish_report_checksum(tenant_id_, dest_table_id_, execution_id_, task_id_))) { - LOG_WARN("fail to check sstable checksum_report_finish", - K(ret), K(tenant_id_), K(dest_table_id_), K(execution_id_), K(task_id_)); + if (ret == OB_SERVER_OUTOF_DISK_SPACE && + data_format_version_ >= DATA_VERSION_4_3_0_0) { + // if version >= 4.3.0, would retry with compression. + // use tmp_ret to avoid ret being reset. + int tmp_ret = OB_SUCCESS; + sql_string.reuse(); + SortCompactLevel compress_level = SORT_DEFAULT_LEVEL; + switch (compact_level_) { + case share::SORT_DEFAULT_LEVEL: { + compress_level = share::SORT_COMPRESSION_LEVEL; + break; + } + case share::SORT_COMPACT_LEVEL: { + compress_level = share::SORT_COMPRESSION_COMPACT_LEVEL; + break; + } + case share::SORT_ENCODE_LEVEL: { + compress_level = share::SORT_COMPRESSION_ENCODE_LEVEL; + break; + } + default: { + tmp_ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected compact level", K(tmp_ret), K(compact_level_)); + } + } + if (tmp_ret != OB_SUCCESS) { + } else if (OB_SUCCESS != (tmp_ret = ObDDLUtil::generate_build_replica_sql(tenant_id_, data_table_id_, + dest_table_id_, + data_schema->get_schema_version(), + snapshot_version_, + execution_id_, + task_id_, + parallelism_, + false/*use_heap_table_ddl*/, + !data_schema->is_user_hidden_table()/*use_schema_version_hint_for_src_table*/, + nullptr, + sql_string, + compress_level))) { + LOG_WARN("fail to generate build replica sql", K(tmp_ret)); + } else if (OB_SUCCESS != (tmp_ret = user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, + oracle_mode ? ObCompatibilityMode::ORACLE_MODE : ObCompatibilityMode::MYSQL_MODE, &session_param, sql_exec_addr))) { + LOG_WARN("fail to execute build replica sql", K(tmp_ret), K(tenant_id_)); + } else { + ret = OB_SUCCESS; + } + } else { + LOG_WARN("fail to execute build replica sql", K(ret), K(tenant_id_)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObCheckTabletDataComplementOp::check_finish_report_checksum(tenant_id_, dest_table_id_, execution_id_, task_id_))) { + LOG_WARN("fail to check sstable checksum_report_finish", + K(ret), K(tenant_id_), K(dest_table_id_), K(execution_id_), K(task_id_)); + } } } } @@ -199,7 +250,9 @@ ObAsyncTask *ObIndexSSTableBuildTask::deep_copy(char *buf, const int64_t buf_siz trace_id_, parallelism_, root_service_, - inner_sql_exec_addr_); + inner_sql_exec_addr_, + compact_level_, + data_format_version_); if (OB_SUCCESS != (task->set_nls_format(nls_date_format_, nls_timestamp_format_, nls_timestamp_tz_format_))) { task->~ObIndexSSTableBuildTask(); task = nullptr; @@ -214,7 +267,7 @@ ObIndexBuildTask::ObIndexBuildTask() : ObDDLTask(ObDDLType::DDL_CREATE_INDEX), index_table_id_(target_object_id_), is_unique_index_(false), is_global_index_(false), root_service_(nullptr), snapshot_held_(false), is_sstable_complete_task_submitted_(false), sstable_complete_request_time_(0), sstable_complete_ts_(0), - check_unique_snapshot_(0), complete_sstable_job_ret_code_(INT64_MAX), create_index_arg_() + check_unique_snapshot_(0), complete_sstable_job_ret_code_(INT64_MAX), create_index_arg_(), target_cg_cnt_(0) { } @@ -354,11 +407,11 @@ int ObIndexBuildTask::init( const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg &create_index_arg, const int64_t parent_task_id /* = 0 */, + const uint64_t tenant_data_version, const int64_t task_status /* = TaskStatus::PREPARE */, const int64_t snapshot_version /* = 0 */) { int ret = OB_SUCCESS; - uint64_t tenant_data_format_version = 0; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); @@ -373,6 +426,7 @@ int ObIndexBuildTask::init( && OB_INVALID_ID != tenant_id && index_schema != nullptr && schema_version > 0 + && tenant_data_version > 0 && (task_status >= ObDDLTaskStatus::PREPARE && task_status <= ObDDLTaskStatus::SUCCESS) && task_id > 0))) { ret = OB_INVALID_ARGUMENT; @@ -383,8 +437,6 @@ int ObIndexBuildTask::init( } else if (OB_ISNULL(index_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("fail to get table schema", K(ret)); - } else if (OB_FAIL(ObShareUtil::fetch_current_data_version(*GCTX.sql_proxy_, tenant_id, tenant_data_format_version))) { - LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else if (OB_UNLIKELY((ObIndexArg::ADD_MLOG == create_index_arg_.index_action_type_) && (!index_schema->is_mlog_table()))) { ret = OB_ERR_UNEXPECTED; @@ -413,7 +465,7 @@ int ObIndexBuildTask::init( parent_task_id_ = parent_task_id; task_version_ = OB_INDEX_BUILD_TASK_VERSION; start_time_ = ObTimeUtility::current_time(); - data_format_version_ = tenant_data_format_version; + data_format_version_ = tenant_data_version; if (OB_SUCC(ret)) { task_status_ = static_cast(task_status); } @@ -425,8 +477,14 @@ int ObIndexBuildTask::init( dst_schema_version_ = schema_version_; is_inited_ = true; } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_schema->get_store_column_group_count(target_cg_cnt_))) { + LOG_WARN("fail to get column group cnt", K(ret), K(index_schema)); + } ddl_tracing_.open(); } + return ret; } @@ -864,7 +922,9 @@ int ObIndexBuildTask::send_build_single_replica_request() trace_id_, parallelism_, root_service_, - create_index_arg_.inner_sql_exec_addr_); + create_index_arg_.inner_sql_exec_addr_, + create_index_arg_.compact_level_, + data_format_version_); if (OB_FAIL(task.set_nls_format(create_index_arg_.nls_date_format_, create_index_arg_.nls_timestamp_format_, create_index_arg_.nls_timestamp_tz_format_))) { @@ -1507,17 +1567,36 @@ int ObIndexBuildTask::collect_longops_stat(ObLongopsValue &value) case ObDDLTaskStatus::REDEFINITION: { int64_t row_scanned = 0; int64_t row_sorted = 0; - int64_t row_inserted = 0; - if (OB_FAIL(gather_redefinition_stats(tenant_id_, task_id_, *GCTX.sql_proxy_, row_scanned, row_sorted, row_inserted))) { + int64_t row_inserted_cg = 0; + int64_t row_inserted_file = 0; + + if (OB_FAIL(gather_redefinition_stats(tenant_id_, task_id_, *GCTX.sql_proxy_, row_scanned, row_sorted, row_inserted_cg, row_inserted_file))) { LOG_WARN("failed to gather redefinition stats", K(ret)); - } else if (OB_FAIL(databuff_printf(stat_info_.message_, - MAX_LONG_OPS_MESSAGE_LENGTH, - pos, - "STATUS: REPLICA BUILD, ROW_SCANNED: %ld, ROW_SORTED: %ld, ROW_INSERTED: %ld", - row_scanned, - row_sorted, - row_inserted))) { - LOG_WARN("failed to print", K(ret)); + } + + if (OB_FAIL(ret)){ + } else if (target_cg_cnt_ > 1) { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: REPLICA BUILD, ROW_SCANNED: %ld, ROW_SORTED: %ld, ROW_INSERTED_INTO_TMP_FILE: %ld, ROW_INSERTED: %ld out of %ld column group rows", + row_scanned, + row_sorted, + row_inserted_file, + row_inserted_cg, + row_scanned * target_cg_cnt_))) { + LOG_WARN("failed to print", K(ret)); + } + } else { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: REPLICA BUILD, ROW_SCANNED: %ld, ROW_SORTED: %ld, ROW_INSERTED: %ld", + row_scanned, + row_sorted, + row_inserted_file))) { + LOG_WARN("failed to print", K(ret)); + } } break; } @@ -1583,7 +1662,7 @@ int ObIndexBuildTask::serialize_params_to_message(char *buf, const int64_t buf_l } else if (OB_FAIL(create_index_arg_.serialize(buf, buf_len, pos))) { LOG_WARN("serialize create index arg failed", K(ret)); } else { - LST_DO_CODE(OB_UNIS_ENCODE, check_unique_snapshot_); + LST_DO_CODE(OB_UNIS_ENCODE, check_unique_snapshot_, target_cg_cnt_); } return ret; } @@ -1604,7 +1683,7 @@ int ObIndexBuildTask::deserlize_params_from_message(const uint64_t tenant_id, co } else if (OB_FAIL(deep_copy_table_arg(allocator_, tmp_arg, create_index_arg_))) { LOG_WARN("deep copy create index arg failed", K(ret)); } else { - LST_DO_CODE(OB_UNIS_DECODE, check_unique_snapshot_); + LST_DO_CODE(OB_UNIS_DECODE, check_unique_snapshot_, target_cg_cnt_); } return ret; } @@ -1613,5 +1692,6 @@ int64_t ObIndexBuildTask::get_serialize_param_size() const { return create_index_arg_.get_serialize_size() + serialization::encoded_length_i64(check_unique_snapshot_) - + ObDDLTask::get_serialize_param_size(); + + ObDDLTask::get_serialize_param_size() + + serialization::encoded_length_i64(target_cg_cnt_); } diff --git a/src/rootserver/ddl_task/ob_index_build_task.h b/src/rootserver/ddl_task/ob_index_build_task.h index ecdc6c365..a49882bd0 100644 --- a/src/rootserver/ddl_task/ob_index_build_task.h +++ b/src/rootserver/ddl_task/ob_index_build_task.h @@ -35,11 +35,14 @@ public: const common::ObCurTraceId::TraceId &trace_id, const int64_t parallelism, ObRootService *root_service, - const common::ObAddr &inner_sql_exec_addr) + const common::ObAddr &inner_sql_exec_addr, + const share::SortCompactLevel compact_level = share::SORT_DEFAULT_LEVEL, + const int64_t data_format_version = 0) : task_id_(task_id), tenant_id_(tenant_id), data_table_id_(data_table_id), dest_table_id_(dest_table_id), schema_version_(schema_version), snapshot_version_(snapshot_version), execution_id_(execution_id), consumer_group_id_(consumer_group_id), trace_id_(trace_id), parallelism_(parallelism), allocator_("IdxSSTBuildTask"), - root_service_(root_service), inner_sql_exec_addr_(inner_sql_exec_addr) + root_service_(root_service), inner_sql_exec_addr_(inner_sql_exec_addr), compact_level_(compact_level), + data_format_version_(data_format_version) { set_retry_times(0); } @@ -55,7 +58,7 @@ public: void add_event_info(const int ret, const ObString &ddl_event_stmt); TO_STRING_KV(K_(data_table_id), K_(dest_table_id), K_(schema_version), K_(snapshot_version), K_(execution_id), K_(consumer_group_id), K_(trace_id), K_(parallelism), K_(nls_date_format), - K_(nls_timestamp_format), K_(nls_timestamp_tz_format)); + K_(nls_timestamp_format), K_(nls_timestamp_tz_format), K_(compact_level), K_(data_format_version)); private: int64_t task_id_; @@ -74,6 +77,8 @@ private: ObString nls_timestamp_tz_format_; ObRootService *root_service_; common::ObAddr inner_sql_exec_addr_; + share::SortCompactLevel compact_level_; + int64_t data_format_version_; DISALLOW_COPY_AND_ASSIGN(ObIndexSSTableBuildTask); }; @@ -95,6 +100,7 @@ public: const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg &create_index_arg, const int64_t parent_task_id /* = 0 */, + const uint64_t tenant_data_version, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); int init(const ObDDLTaskRecord &task_record); @@ -119,7 +125,7 @@ public: virtual bool support_longops_monitoring() const override { return true; } static int deep_copy_index_arg(common::ObIAllocator &allocator, const obrpc::ObCreateIndexArg &source_arg, obrpc::ObCreateIndexArg &dest_arg); INHERIT_TO_STRING_KV("ObDDLTask", ObDDLTask, K(index_table_id_),K(snapshot_held_), K(is_sstable_complete_task_submitted_), - K(sstable_complete_ts_), K(check_unique_snapshot_), K_(redefinition_execution_id), K(create_index_arg_)); + K(sstable_complete_ts_), K(check_unique_snapshot_), K_(redefinition_execution_id), K(create_index_arg_), K(target_cg_cnt_)); private: int prepare(); int wait_trans_end(); @@ -142,6 +148,7 @@ private: const ObTableSchema *index_table_schema, bool &need_acquire); bool is_sstable_complete_task_submitted(); + int check_target_cg_cnt(); private: static const int64_t OB_INDEX_BUILD_TASK_VERSION = 1; using ObDDLTask::is_inited_; @@ -164,6 +171,7 @@ private: int64_t complete_sstable_job_ret_code_; int64_t redefinition_execution_id_; obrpc::ObCreateIndexArg create_index_arg_; // this is not a valid arg, only has nls formats for now + int64_t target_cg_cnt_; }; } // end namespace rootserver diff --git a/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp b/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp index 2968afee1..f29bcfffe 100755 --- a/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp +++ b/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp @@ -37,20 +37,30 @@ ObRecoverRestoreTableTask::~ObRecoverRestoreTableTask() { } -int ObRecoverRestoreTableTask::init(const uint64_t src_tenant_id, const uint64_t dst_tenant_id, const int64_t task_id, - const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t src_schema_version, - const int64_t dst_schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, - const ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) +int ObRecoverRestoreTableTask::init( + const ObTableSchema* src_table_schema, const ObTableSchema* dst_table_schema, + const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t parallelism, + const int64_t consumer_group_id, const int32_t sub_task_trace_id, + const obrpc::ObAlterTableArg &alter_table_arg, const uint64_t tenant_data_version, const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); + } else if (OB_ISNULL(src_table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("src schema should not be null", K(ret)); + } else if (OB_ISNULL(dst_table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("dst_table schema should not be null", K(ret)); + } else if ((!src_table_schema->is_valid()) || (!dst_table_schema->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("src_talbe or dst_table is invalid", K(ret), KPC(src_table_schema), KPC(dst_table_schema)); } else if (OB_UNLIKELY(ObDDLType::DDL_TABLE_RESTORE != ddl_type)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arg", K(ret), K(ddl_type), K(src_tenant_id), K(data_table_id)); - } else if (OB_FAIL(ObTableRedefinitionTask::init(src_tenant_id, dst_tenant_id, task_id, ddl_type, data_table_id, - dest_table_id, src_schema_version, dst_schema_version, parallelism, consumer_group_id, sub_task_trace_id, alter_table_arg, task_status, 0/*snapshot*/))) { + LOG_WARN("invalid arg", K(ret), K(ddl_type), KPC(src_table_schema), KPC(dst_table_schema)); + } else if (OB_FAIL(ObTableRedefinitionTask::init(src_table_schema, dst_table_schema, task_id, ddl_type, parallelism, consumer_group_id, + sub_task_trace_id, alter_table_arg, tenant_data_version, task_status, 0/*snapshot*/))) { LOG_WARN("fail to init ObDropPrimaryKeyTask", K(ret)); } else { execution_id_ = 1L; diff --git a/src/rootserver/ddl_task/ob_recover_restore_table_task.h b/src/rootserver/ddl_task/ob_recover_restore_table_task.h index 4e364e923..a3118cd85 100644 --- a/src/rootserver/ddl_task/ob_recover_restore_table_task.h +++ b/src/rootserver/ddl_task/ob_recover_restore_table_task.h @@ -32,18 +32,15 @@ public: ObRecoverRestoreTableTask(); virtual ~ObRecoverRestoreTableTask(); int init( - const uint64_t src_tenant_id, - const uint64_t dst_tenant_id, + const ObTableSchema* src_table_schema, + const ObTableSchema* dst_table_schema, const int64_t task_id, const share::ObDDLType &ddl_type, - const int64_t data_table_id, - const int64_t dest_table_id, - const int64_t src_schema_version, - const int64_t dest_schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, + const uint64_t tenant_data_version, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); int init(const ObDDLTaskRecord &task_record); diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp index e2ca734aa..c48e2f9ae 100755 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp @@ -39,7 +39,7 @@ ObTableRedefinitionTask::ObTableRedefinitionTask() has_rebuild_index_(false), has_rebuild_constraint_(false), has_rebuild_foreign_key_(false), allocator_(lib::ObLabel("RedefTask")), is_copy_indexes_(true), is_copy_triggers_(true), is_copy_constraints_(true), is_copy_foreign_keys_(true), - is_ignore_errors_(false), is_do_finish_(false) + is_ignore_errors_(false), is_do_finish_(false), target_cg_cnt_(0) { } @@ -47,61 +47,69 @@ ObTableRedefinitionTask::~ObTableRedefinitionTask() { } -int ObTableRedefinitionTask::init(const uint64_t src_tenant_id, const uint64_t dst_tenant_id, const int64_t task_id, - const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t src_schema_version, - const int64_t dst_schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, - const ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) +int ObTableRedefinitionTask::init(const ObTableSchema* src_table_schema, const ObTableSchema* dst_table_schema, const int64_t task_id, + const share::ObDDLType &ddl_type, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, + const ObAlterTableArg &alter_table_arg, const uint64_t tenant_data_version, const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; - uint64_t tenant_data_format_version = 0; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("ObTableRedefinitionTask has already been inited", K(ret)); - } else if (OB_UNLIKELY(OB_INVALID_ID == src_tenant_id || OB_INVALID_ID == dst_tenant_id - || task_id <= 0 || OB_INVALID_ID == data_table_id || OB_INVALID_ID == dest_table_id - || src_schema_version <= 0 || dst_schema_version <= 0 - || task_status < ObDDLTaskStatus::PREPARE || task_status > ObDDLTaskStatus::SUCCESS || snapshot_version < 0 - || (snapshot_version > 0 && task_status < ObDDLTaskStatus::WAIT_TRANS_END))) { + } else if (OB_ISNULL(src_table_schema) || OB_ISNULL(dst_table_schema)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(src_tenant_id), K(dst_tenant_id), K(task_id), - K(data_table_id), K(dest_table_id), K(src_schema_version), K(dst_schema_version), + LOG_WARN("invalid argument", K(ret), KP(src_table_schema), KP(dst_table_schema)); + } else if (OB_UNLIKELY( !src_table_schema->is_valid() + || !dst_table_schema->is_valid() + || task_id <= 0 || snapshot_version < 0 || tenant_data_version <= 0 + || task_status < ObDDLTaskStatus::PREPARE || task_status > ObDDLTaskStatus::SUCCESS + || (snapshot_version > 0 && task_status < ObDDLTaskStatus::WAIT_TRANS_END))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(src_table_schema), KPC(dst_table_schema), K(task_id), K(task_status), K(snapshot_version)); } else if (OB_FAIL(deep_copy_table_arg(allocator_, alter_table_arg, alter_table_arg_))) { LOG_WARN("deep copy alter table arg failed", K(ret)); } else if (OB_FAIL(set_ddl_stmt_str(alter_table_arg_.ddl_stmt_str_))) { LOG_WARN("set ddl stmt str failed", K(ret)); - } else if (OB_FAIL(ObShareUtil::fetch_current_data_version(*GCTX.sql_proxy_, src_tenant_id, tenant_data_format_version))) { - LOG_WARN("get min data version failed", K(ret), K(src_tenant_id)); } else { set_gmt_create(ObTimeUtility::current_time()); consumer_group_id_ = consumer_group_id; sub_task_trace_id_ = sub_task_trace_id; task_type_ = ddl_type; - object_id_ = data_table_id; - target_object_id_ = dest_table_id; - schema_version_ = src_schema_version; + object_id_ = src_table_schema->get_table_id(); + target_object_id_ = dst_table_schema->get_table_id(); + + /* only table restore set schema_serson = src, other use dst*/ + if (ObDDLType::DDL_TABLE_RESTORE == ddl_type) { + schema_version_ = src_table_schema->get_schema_version(); + } else { + schema_version_ = dst_table_schema->get_schema_version(); + } + task_status_ = static_cast(task_status); snapshot_version_ = snapshot_version; - tenant_id_ = src_tenant_id; + tenant_id_ = src_table_schema->get_tenant_id(); task_version_ = OB_TABLE_REDEFINITION_TASK_VERSION; task_id_ = task_id; parallelism_ = parallelism; - data_format_version_ = tenant_data_format_version; + data_format_version_ = tenant_data_version; start_time_ = ObTimeUtility::current_time(); // For common offline ddl, dest_tenant_id is also the tenant_id_, i.e., tenant id of the data table. // But for DDL_RESTORE_TABLE, dst_tenant_id_ is different to the tenant_id_. - dst_tenant_id_ = dst_tenant_id; - dst_schema_version_ = dst_schema_version; - alter_table_arg_.alter_table_schema_.set_tenant_id(src_tenant_id); - alter_table_arg_.alter_table_schema_.set_schema_version(src_schema_version); + dst_tenant_id_ = dst_table_schema->get_tenant_id(); + dst_schema_version_ = dst_table_schema->get_schema_version(); + alter_table_arg_.alter_table_schema_.set_tenant_id(tenant_id_); + alter_table_arg_.alter_table_schema_.set_schema_version(schema_version_); alter_table_arg_.exec_tenant_id_ = dst_tenant_id_; - if (OB_FAIL(init_ddl_task_monitor_info(target_object_id_))) { + if (OB_FAIL(dst_table_schema->get_store_column_group_count(target_cg_cnt_))) { + LOG_WARN("fail to get target cg cnt", K(ret), KPC(dst_table_schema)); + } else if (OB_FAIL(init_ddl_task_monitor_info(target_object_id_))) { LOG_WARN("init ddl task monitor info failed", K(ret)); } else { is_inited_ = true; ddl_tracing_.open(); } } + LOG_INFO("init table redefinition task finished", K(ret), KPC(this)); return ret; } @@ -167,6 +175,7 @@ int ObTableRedefinitionTask::init(const ObDDLTaskRecord &task_record) ddl_tracing_.open_for_recovery(); } } + LOG_INFO("init table redefinition task finished", K(ret), KPC(this)); return ret; } @@ -283,7 +292,8 @@ int ObTableRedefinitionTask::send_build_replica_request_by_sql() alter_table_arg_.mview_refresh_info_.is_mview_complete_refresh_, alter_table_arg_.mview_refresh_info_.mview_table_id_, GCTX.root_service_, - alter_table_arg_.inner_sql_exec_addr_); + alter_table_arg_.inner_sql_exec_addr_, + data_format_version_); if (OB_FAIL(root_service->get_ddl_service().get_tenant_schema_guard_with_version_in_inner_table(tenant_id_, schema_guard))) { LOG_WARN("get schema guard failed", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, orig_table_schema))) { @@ -531,6 +541,7 @@ int ObTableRedefinitionTask::copy_table_indexes() &create_index_arg, task_id_); param.sub_task_trace_id_ = sub_task_trace_id_; + param.tenant_data_version_ = data_format_version_; if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { if (OB_ENTRY_EXIST == ret) { ret = OB_SUCCESS; @@ -1037,7 +1048,8 @@ int64_t ObTableRedefinitionTask::get_serialize_param_size() const return alter_table_arg_.get_serialize_size() + ObDDLTask::get_serialize_param_size() + serialization::encoded_length_i8(copy_indexes) + serialization::encoded_length_i8(copy_triggers) + serialization::encoded_length_i8(copy_constraints) + serialization::encoded_length_i8(copy_foreign_keys) - + serialization::encoded_length_i8(ignore_errors) + serialization::encoded_length_i8(do_finish); + + serialization::encoded_length_i8(ignore_errors) + serialization::encoded_length_i8(do_finish) + + serialization::encoded_length_i64(target_cg_cnt_); } int ObTableRedefinitionTask::serialize_params_to_message(char *buf, const int64_t buf_len, int64_t &pos) const @@ -1068,6 +1080,8 @@ int ObTableRedefinitionTask::serialize_params_to_message(char *buf, const int64_ LOG_WARN("fail to serialize is_ignore_errors", K(ret)); } else if (OB_FAIL(serialization::encode_i8(buf, buf_len, pos, do_finish))) { LOG_WARN("fail to serialize is_do_finish", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, buf_len, pos, target_cg_cnt_))) { + LOG_WARN("fail to serialize target_cg_cnt", K(ret)); } FLOG_INFO("serialize message for table redefinition", K(ret), K(copy_indexes), K(copy_triggers), K(copy_constraints), K(copy_foreign_keys), K(ignore_errors), K(do_finish), K(*this)); @@ -1108,7 +1122,10 @@ int ObTableRedefinitionTask::deserlize_params_from_message(const uint64_t tenant LOG_WARN("fail to deserialize is_ignore_errors_", K(ret)); } else if (OB_FAIL(serialization::decode_i8(buf, data_len, pos, &do_finish))) { LOG_WARN("fail to deserialize is_do_finish_", K(ret)); - } else { + } else if (OB_FAIL(serialization::decode_i64(buf, data_len, pos, &target_cg_cnt_))) { + LOG_WARN("fail to deserialize target_cg_ctn_", K(ret)); + } + else { is_copy_indexes_ = static_cast(copy_indexes); is_copy_triggers_ = static_cast(copy_triggers); is_copy_constraints_ = static_cast(copy_constraints); @@ -1197,17 +1214,37 @@ int ObTableRedefinitionTask::collect_longops_stat(ObLongopsValue &value) case ObDDLTaskStatus::REDEFINITION: { int64_t row_scanned = 0; int64_t row_sorted = 0; - int64_t row_inserted = 0; - if (OB_FAIL(gather_redefinition_stats(dst_tenant_id_, task_id_, *GCTX.sql_proxy_, row_scanned, row_sorted, row_inserted))) { + int64_t row_inserted_cg = 0; + int64_t row_inserted_file = 0; + + if (OB_FAIL(gather_redefinition_stats(dst_tenant_id_, task_id_, *GCTX.sql_proxy_, row_scanned, row_sorted, row_inserted_cg, row_inserted_file))) { LOG_WARN("failed to gather redefinition stats", K(ret)); - } else if (OB_FAIL(databuff_printf(stat_info_.message_, - MAX_LONG_OPS_MESSAGE_LENGTH, - pos, - "STATUS: REPLICA BUILD, ROW_SCANNED: %ld, ROW_SORTED: %ld, ROW_INSERTED: %ld", - row_scanned, - row_sorted, - row_inserted))) { - LOG_WARN("failed to print", K(ret)); + } + + + if (OB_FAIL(ret)){ + } else if (target_cg_cnt_> 1) { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: REPLICA BUILD, ROW_SCANNED: %ld, ROW_SORTED: %ld, ROW_INSERTED_TMP_FILE: %ld, ROW_INSERTED: %ld out of %ld column group rows", + row_scanned, + row_sorted, + row_inserted_file, + row_inserted_cg, + row_scanned * target_cg_cnt_))) { + LOG_WARN("failed to print", K(ret)); + } + } else { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: REPLICA BUILD, ROW_SCANNED: %ld, ROW_SORTED: %ld, ROW_INSERTED: %ld", + row_scanned, + row_sorted, + row_inserted_file))) { + LOG_WARN("failed to print", K(ret)); + } } break; } diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.h b/src/rootserver/ddl_task/ob_table_redefinition_task.h index bb0f7a8c2..93a4037ef 100644 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.h @@ -32,18 +32,15 @@ public: ObTableRedefinitionTask(); virtual ~ObTableRedefinitionTask(); int init( - const uint64_t tenant_id, - const uint64_t dest_tenant_id, + const ObTableSchema* src_table_schema, + const ObTableSchema* dst_table_schema, const int64_t task_id, const share::ObDDLType &ddl_type, - const int64_t data_table_id, - const int64_t dest_table_id, - const int64_t schema_version, - const int64_t dest_schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, + const uint64_t tenant_data_version, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); int init(const ObDDLTaskRecord &task_record); @@ -72,7 +69,7 @@ public: INHERIT_TO_STRING_KV("ObDDLRedefinitionTask", ObDDLRedefinitionTask, K(has_rebuild_index_), K(has_rebuild_constraint_), K(has_rebuild_foreign_key_), K(is_copy_indexes_), K(is_copy_triggers_), K(is_copy_constraints_), - K(is_copy_foreign_keys_), K(is_ignore_errors_), K(is_do_finish_)); + K(is_copy_foreign_keys_), K(is_ignore_errors_), K(is_do_finish_), K(target_cg_cnt_)); protected: int table_redefinition(const share::ObDDLTaskStatus next_task_status); int copy_table_dependent_objects(const share::ObDDLTaskStatus next_task_status); @@ -99,6 +96,7 @@ private: int check_modify_autoinc(bool &modify_autoinc); int check_use_heap_table_ddl_plan(bool &use_heap_table_ddl_plan); int get_direct_load_job_stat(common::ObArenaAllocator &allocator, sql::ObLoadDataStat &job_stat); + int check_target_cg_cnt(); private: static const int64_t OB_TABLE_REDEFINITION_TASK_VERSION = 1L; bool has_rebuild_index_; @@ -111,6 +109,7 @@ private: bool is_copy_foreign_keys_; bool is_ignore_errors_; bool is_do_finish_; + int64_t target_cg_cnt_; }; } // end namespace rootserver diff --git a/src/rootserver/ob_bootstrap.cpp b/src/rootserver/ob_bootstrap.cpp index 8ecb9fa19..0328d758e 100644 --- a/src/rootserver/ob_bootstrap.cpp +++ b/src/rootserver/ob_bootstrap.cpp @@ -679,16 +679,19 @@ int ObBootstrap::prepare_create_partition( common::ObArray table_schema_array; common::ObArray table_schema_ptrs; common::ObArray ls_id_array; + common::ObArray need_create_empty_majors; if (OB_FAIL(generate_table_schema_array_for_create_partition(tschema, table_schema_array))) { LOG_WARN("fail to generate table schema array", KR(ret)); } else if (OB_UNLIKELY(table_schema_array.count() < 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("generate table schema count is unexpected", KR(ret)); - } else if (OB_FAIL(table_schema_ptrs.reserve(table_schema_array.count()))) { + } else if (OB_FAIL(table_schema_ptrs.reserve(table_schema_array.count())) + || OB_FAIL(need_create_empty_majors.reserve(table_schema_array.count()))) { LOG_WARN("Fail to reserve rowkey column array", KR(ret)); } else { for (int i = 0; i < table_schema_array.count() && OB_SUCC(ret); ++i) { - if (OB_FAIL(table_schema_ptrs.push_back(&table_schema_array.at(i)))) { + if (OB_FAIL(table_schema_ptrs.push_back(&table_schema_array.at(i))) + || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("fail to push back", KR(ret), K(table_schema_array)); } } @@ -702,7 +705,9 @@ int ObBootstrap::prepare_create_partition( if (OB_FAIL(ret)) { } else if (OB_FAIL(creator.add_create_tablets_of_tables_arg( table_schema_ptrs, - ls_id_array))) { + ls_id_array, + DATA_CURRENT_VERSION, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("fail to add create tablet arg", KR(ret)); } } diff --git a/src/rootserver/ob_ddl_operator.cpp b/src/rootserver/ob_ddl_operator.cpp index c282867be..4798cb25f 100644 --- a/src/rootserver/ob_ddl_operator.cpp +++ b/src/rootserver/ob_ddl_operator.cpp @@ -3998,10 +3998,69 @@ int ObDDLOperator::update_single_column(common::ObMySQLTransaction &trans, LOG_WARN("fail to gen new schema_version", K(ret), K(tenant_id)); } else { column_schema.set_schema_version(new_schema_version); + const ObColumnSchemaV2 *orig_column_schema = origin_table_schema.get_column_schema(column_schema.get_column_id()); if (OB_FAIL(schema_service_impl->get_table_sql_service().update_single_column( trans, origin_table_schema, new_table_schema, column_schema, true /* record_ddl_operation */))) { RS_LOG(WARN, "failed to update single column", K(ret)); + } else if (OB_ISNULL(orig_column_schema)) { + ret = OB_ERR_UNEXPECTED; + RS_LOG(WARN, "failed to get orig column schema", K(ret), K(origin_table_schema), K(column_schema)); + } else if (OB_FAIL(update_single_column_group(trans, origin_table_schema, *orig_column_schema, column_schema))) { + RS_LOG(WARN, "fail to update single column group", K(ret)); + } + } + return ret; +} + +int ObDDLOperator::update_single_column_group(common::ObMySQLTransaction &trans, + const ObTableSchema &origin_table_schema, + const ObColumnSchemaV2 &origin_column_schema, + const ObColumnSchemaV2 &column_schema) +{ + int ret = OB_SUCCESS; + bool is_each_cg_exist = false; + char cg_name[OB_MAX_COLUMN_GROUP_NAME_LENGTH] = {'\0'}; + ObString cg_name_str(OB_MAX_COLUMN_GROUP_NAME_LENGTH, 0, cg_name); + const uint64_t tenant_id = origin_table_schema.get_tenant_id(); + ObColumnGroupSchema *ori_cg = nullptr; + ObSchemaService *schema_service_impl = schema_service_.get_schema_service(); + if (!origin_table_schema.is_valid() || !origin_column_schema.is_valid() || !column_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + RS_LOG(WARN, "Invalid arguemnt", K(ret), K(origin_table_schema), K(origin_column_schema), K(column_schema)); + } else if (origin_column_schema.get_column_name_str() == column_schema.get_column_name_str()) { + /* now only rename column will use this func, other skip*/ + } else if (!origin_table_schema.is_column_store_supported()) { + /* only support table need column group*/ + } else if (OB_FAIL(origin_table_schema.is_column_group_exist(OB_EACH_COLUMN_GROUP_NAME, is_each_cg_exist))) { + RS_LOG(WARN, "fail check whether each cg exist", K(ret)); + } else if (!is_each_cg_exist) { + /* if each cg not exist skip*/ + } else if (OB_FAIL(origin_column_schema.get_each_column_group_name(cg_name_str))) { + RS_LOG(WARN, "fail to get each column group name", K(ret)); + } else if (OB_FAIL(origin_table_schema.get_column_group_by_name(cg_name_str, ori_cg))) { + RS_LOG(WARN, "column group cannot get", K(cg_name_str), K(origin_table_schema)); + } else if (OB_ISNULL(ori_cg)) { + ret = OB_ERR_UNEXPECTED; + RS_LOG(WARN, "column group should not be null", K(ret), K(cg_name_str), + K(origin_column_schema), K(origin_table_schema)); + } else { + ObColumnGroupSchema new_cg; + if (OB_FAIL(new_cg.assign(*ori_cg))) { + RS_LOG(WARN, "fail to assign column group", K(ret), K(ori_cg)); + } else { + new_cg.set_schema_version(column_schema.get_schema_version()); + cg_name_str.set_length(0); + if (OB_FAIL(column_schema.get_each_column_group_name(cg_name_str))) { + RS_LOG(WARN, "fail to gen column group related column group name", K(ret), K(column_schema)); + } else if (OB_FAIL(new_cg.set_column_group_name(cg_name_str))) { + RS_LOG(WARN, "fail to set column group name", K(ret), K(new_cg), K(cg_name_str)); + } else if (OB_FAIL(schema_service_impl->get_table_sql_service().update_single_column_group(trans, + origin_table_schema, + *ori_cg, + new_cg))) { + RS_LOG(WARN,"fail to update single column_group", K(ret)); + } } } return ret; diff --git a/src/rootserver/ob_ddl_operator.h b/src/rootserver/ob_ddl_operator.h index 4c59b677f..3a53da4e6 100644 --- a/src/rootserver/ob_ddl_operator.h +++ b/src/rootserver/ob_ddl_operator.h @@ -292,10 +292,10 @@ public: const share::schema::ObTableSchema &inc_table_schema, common::ObIArray &part_array); int insert_column_groups(ObMySQLTransaction &trans, const ObTableSchema &new_table_schema); - int insert_column_ids_into_column_group(ObMySQLTransaction &trans, - const ObTableSchema &new_table_schema, - const ObIArray &column_ids, - const ObColumnGroupSchema &column_group); + int insert_column_ids_into_column_group(ObMySQLTransaction &trans, + const ObTableSchema &new_table_schema, + const ObIArray &column_ids, + const ObColumnGroupSchema &column_group); int insert_single_column(common::ObMySQLTransaction &trans, const share::schema::ObTableSchema &new_table_schema, share::schema::ObColumnSchemaV2 &new_column); @@ -977,6 +977,10 @@ public: const share::schema::ObTableSchema &origin_table_schema, const share::schema::ObTableSchema &new_table_schema, share::schema::ObColumnSchemaV2 &column_schema); + int update_single_column_group(common::ObMySQLTransaction &trans, + const ObTableSchema &origin_table_schema, + const ObColumnSchemaV2 &origin_column_schema, + const ObColumnSchemaV2 &new_column_schema); int update_partition_option(common::ObMySQLTransaction &trans, share::schema::ObTableSchema &table_schema); int update_check_constraint_state(common::ObMySQLTransaction &trans, diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index 7c87d939c..8c756070d 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -469,6 +469,7 @@ int ObDDLService::create_user_tables( int ObDDLService::create_inner_expr_index(ObMySQLTransaction &trans, const ObTableSchema &orig_table_schema, + const uint64_t tenant_data_version, ObTableSchema &new_table_schema, ObIArray &new_columns, ObTableSchema &index_schema) @@ -536,7 +537,7 @@ int ObDDLService::create_inner_expr_index(ObMySQLTransaction &trans, } if (OB_SUCC(ret) && index_schema.has_tablet() && OB_FAIL(create_index_tablet(index_schema, trans, schema_guard, - true/*need_check_tablet_cnt*/))) { + true/*need_check_tablet_cnt*/, tenant_data_version))) { LOG_WARN("fail to create_index_tablet", KR(ret), K(index_schema)); } } @@ -548,6 +549,7 @@ int ObDDLService::create_global_index( ObMySQLTransaction &trans, const obrpc::ObCreateIndexArg &arg, const share::schema::ObTableSchema &table_schema, + const uint64_t tenant_data_version, share::schema::ObTableSchema &index_schema) { int ret = OB_SUCCESS; @@ -555,7 +557,7 @@ int ObDDLService::create_global_index( LOG_WARN("variable is not init", K(ret)); } else if (OB_FAIL(table_schema.check_create_index_on_hidden_primary_key(index_schema))) { LOG_WARN("fail to check create global index on table", K(ret), K(index_schema)); - } else if (OB_FAIL(create_index_table(arg, index_schema, trans))) { + } else if (OB_FAIL(create_index_table(arg, tenant_data_version, index_schema, trans))) { LOG_WARN("fail to create global index", K(ret)); } return ret; @@ -564,6 +566,7 @@ int ObDDLService::create_global_index( int ObDDLService::create_global_inner_expr_index( ObMySQLTransaction &trans, const share::schema::ObTableSchema &orig_table_schema, + const uint64_t tenant_data_version, share::schema::ObTableSchema &new_table_schema, common::ObIArray &new_columns, share::schema::ObTableSchema &index_schema) @@ -572,7 +575,7 @@ int ObDDLService::create_global_inner_expr_index( if (OB_FAIL(check_inner_stat())) { LOG_WARN("variable is not init", K(ret)); } else if (OB_FAIL(create_inner_expr_index(trans, orig_table_schema, - new_table_schema, new_columns, index_schema))) { + tenant_data_version, new_table_schema, new_columns, index_schema))) { LOG_WARN("fail to create inner expr index", K(ret)); } return ret; @@ -581,6 +584,7 @@ int ObDDLService::create_global_inner_expr_index( // create_index_table is used by create index int ObDDLService::create_index_table( const obrpc::ObCreateIndexArg &arg, + const uint64_t tenant_data_version, ObTableSchema &table_schema, ObMySQLTransaction &sql_trans) { @@ -625,7 +629,7 @@ int ObDDLService::create_index_table( // For create index operation, generate ddl_stmt_str when index enables, but // for alter table add index operation, keep generating ddl_stmt_str same as 3.x while generating index schema. if (OB_FAIL(create_index_or_mlog_table_in_trans(table_schema, - nullptr/* ddl_stmt_str */, &sql_trans, schema_guard, true/*need_check_tablet_cnt*/))) { + nullptr/* ddl_stmt_str */, &sql_trans, schema_guard, true/*need_check_tablet_cnt*/, tenant_data_version))) { LOG_WARN("create_table_in_trans failed", KR(ret), K(arg), K(table_schema)); } } @@ -637,6 +641,7 @@ int ObDDLService::create_index_table( int ObDDLService::create_mlog_table( ObMySQLTransaction &sql_trans, const obrpc::ObCreateMLogArg &arg, + const uint64_t tenant_data_version, ObSchemaGetterGuard &schema_guard, ObTableSchema &table_schema) { @@ -692,7 +697,8 @@ int ObDDLService::create_mlog_table( &arg.ddl_stmt_str_, &sql_trans, schema_guard, - true /*need_check_tablet_cnt*/))) { + true /*need_check_tablet_cnt*/, + tenant_data_version))) { LOG_WARN("failed to create index or mlog table in trans", KR(ret), K(arg.ddl_stmt_str_), K(table_schema)); } else if (OB_FAIL(add_mlog(sql_trans, arg, schema_guard, table_schema))) { LOG_WARN("failed to add mlog", KR(ret)); @@ -704,16 +710,17 @@ int ObDDLService::create_mlog_tablet( ObMySQLTransaction &trans, ObSchemaGetterGuard &schema_guard, const ObTableSchema &mlog_schema, - const bool need_check_tablet_cnt) + const bool need_check_tablet_cnt, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; int64_t tenant_id = mlog_schema.get_tenant_id(); SCN frozen_scn; if (OB_FAIL(check_inner_stat())) { LOG_WARN("check_inner_stat error", K(is_inited()), KR(ret)); - } else if (!mlog_schema.is_mlog_table()) { + } else if (!mlog_schema.is_mlog_table() || tenant_data_version <= 0) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("arg must be materialized view log table", KR(ret), K(tenant_id)); + LOG_WARN("arg must be materialized view log table", KR(ret), K(tenant_id), K(tenant_data_version), K(mlog_schema)); } else if (OB_ISNULL(GCTX.root_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("root service is null", KR(ret)); @@ -731,19 +738,23 @@ int ObDDLService::create_mlog_tablet( const ObTableSchema *data_table_schema = NULL; const uint64_t data_table_id = mlog_schema.get_data_table_id(); ObSEArray schemas; + ObSEArray need_create_empty_majors; if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_table_schema))) { LOG_WARN("failed to get table schema", KR(ret), K(tenant_id), K(data_table_id)); } else if (OB_ISNULL(data_table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("data table schema not exists", KR(ret), K(data_table_id)); - } else if (OB_FAIL(schemas.push_back(&mlog_schema))) { + } else if (OB_FAIL(schemas.push_back(&mlog_schema)) + || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("failed to push back mlog schema", KR(ret), K(mlog_schema)); } else if (OB_FAIL(new_table_tablet_allocator.prepare_like(*data_table_schema))) { LOG_WARN("failed to prepare like data table schema", KR(ret), KPC(data_table_schema)); } else if (OB_FAIL(new_table_tablet_allocator.get_ls_id_array(ls_id_array))) { LOG_WARN("failed to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_local_aux_tables_arg( - schemas, data_table_schema, ls_id_array))) { + schemas, data_table_schema, ls_id_array, + tenant_data_version, + need_create_empty_majors))) { LOG_WARN("failed to add create tablets of local aux tables arg", KR(ret)); } else if (OB_FAIL(table_creator.execute())) { LOG_WARN("failed to execute create tablet", KR(ret)); @@ -2181,7 +2192,8 @@ int ObDDLService::get_obj_privs_ora(const uint64_t tenant_id, int ObDDLService::create_tablets_in_trans_for_mv_(ObIArray &table_schemas, ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, - ObSchemaGetterGuard &schema_guard) + ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; SCN frozen_scn; @@ -2228,7 +2240,9 @@ int ObDDLService::create_tablets_in_trans_for_mv_(ObIArray &table LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_table_arg( this_table, - ls_id_array))) { + ls_id_array, + tenant_data_version, + true /*need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(this_table)); } else if (OB_FAIL(get_last_schema_version(last_schema_version))) { LOG_WARN("get last schema version failed", KR(ret)); @@ -2258,7 +2272,8 @@ int ObDDLService::create_tablets_in_trans_for_mv_(ObIArray &table int ObDDLService::create_tablets_in_trans_(ObIArray &table_schemas, ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, - ObSchemaGetterGuard &schema_guard) + ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; SCN frozen_scn; @@ -2295,13 +2310,14 @@ int ObDDLService::create_tablets_in_trans_(ObIArray &table_schema } ObArray schemas; + ObArray need_create_empty_majors; int64_t last_schema_version = OB_INVALID_VERSION; for (int64_t i = 0; OB_SUCC(ret) && i < table_schemas.count(); i++) { const share::schema::ObTableSchema &this_table = table_schemas.at(i); const int64_t table_id = this_table.get_table_id(); if (!this_table.has_tablet()) { } else if (!this_table.is_global_index_table()) { - if (OB_FAIL(schemas.push_back(&this_table))) { + if (OB_FAIL(schemas.push_back(&this_table)) || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("failed to push_back", KR(ret), K(this_table)); } } else { @@ -2312,7 +2328,9 @@ int ObDDLService::create_tablets_in_trans_(ObIArray &table_schema LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_table_arg( this_table, - ls_id_array))) { + ls_id_array, + tenant_data_version, + true /*need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(this_table)); } } @@ -2338,7 +2356,9 @@ int ObDDLService::create_tablets_in_trans_(ObIArray &table_schema LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg( schemas, - ls_id_array))) { + ls_id_array, + tenant_data_version, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), KPC(first_table), K(last_schema_version)); } else if (OB_FAIL(table_creator.execute())) { @@ -2365,6 +2385,7 @@ int ObDDLService::create_tables_in_trans(const bool if_not_exist, int64_t &ddl_task_id) { int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; ObArenaAllocator allocator(ObModIds::OB_RS_PARTITION_TABLE_TEMP); RS_TRACE(create_tables_in_trans_begin); bool is_standby = false; @@ -2400,7 +2421,8 @@ int ObDDLService::create_tables_in_trans(const bool if_not_exist, K(tenant_id), K(refreshed_schema_version)); } else if (OB_FAIL(first_table->check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("fail to check is oracle mode", KR(ret), KPC(first_table)); - } else { + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } if (OB_SUCC(ret)) { ObString tmp_ddl_stmt_str = ddl_stmt_str; @@ -2576,11 +2598,11 @@ int ObDDLService::create_tables_in_trans(const bool if_not_exist, if (OB_SUCC(ret)) { if (first_table->is_materialized_view()) { - if (OB_FAIL(create_tablets_in_trans_for_mv_(table_schemas, ddl_operator, trans, schema_guard))) { + if (OB_FAIL(create_tablets_in_trans_for_mv_(table_schemas, ddl_operator, trans, schema_guard, tenant_data_version))) { LOG_WARN("fail to create tablets in trans for mv", KR(ret)); } } else { - if (OB_FAIL(create_tablets_in_trans_(table_schemas, ddl_operator, trans, schema_guard))) { + if (OB_FAIL(create_tablets_in_trans_(table_schemas, ddl_operator, trans, schema_guard, tenant_data_version))) { LOG_WARN("fail to create tablets in trans", KR(ret)); } } @@ -2595,6 +2617,7 @@ int ObDDLService::create_tables_in_trans(const bool if_not_exist, container_table_schema, dep_infos, allocator, + tenant_data_version, task_record))) { LOG_WARN("failed to start mview complete refresh task", KR(ret)); } else { @@ -2649,6 +2672,7 @@ int ObDDLService::start_mview_complete_refresh_task( const ObTableSchema &container_table_schema, const ObIArray *dep_infos, common::ObIAllocator &allocator, + const uint64_t tenant_data_version, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; @@ -2735,6 +2759,7 @@ int ObDDLService::start_mview_complete_refresh_task( arg.consumer_group_id_, &allocator, &arg); + param.tenant_data_version_ = tenant_data_version; if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("submit create index ddl task failed", K(ret)); } @@ -2751,7 +2776,8 @@ int ObDDLService::create_index_or_mlog_table_in_trans( const ObString *ddl_stmt_str, ObMySQLTransaction *sql_trans, share::schema::ObSchemaGetterGuard &schema_guard, - const bool need_check_tablet_cnt) + const bool need_check_tablet_cnt, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; if (OB_FAIL(check_inner_stat())) { @@ -2790,11 +2816,11 @@ int ObDDLService::create_index_or_mlog_table_in_trans( } if (OB_SUCC(ret) && table_schema.has_tablet()) { if (table_schema.is_mlog_table()) { - if (OB_FAIL(create_mlog_tablet(trans, schema_guard, table_schema, need_check_tablet_cnt))) { + if (OB_FAIL(create_mlog_tablet(trans, schema_guard, table_schema, need_check_tablet_cnt, tenant_data_version))) { LOG_WARN("failed to create_mlog_tablet", KR(ret), K(table_schema)); } } else { - if (OB_FAIL(create_index_tablet(table_schema, trans, schema_guard, need_check_tablet_cnt))) { + if (OB_FAIL(create_index_tablet(table_schema, trans, schema_guard, need_check_tablet_cnt, tenant_data_version))) { LOG_WARN("fail to create_index_tablet", KR(ret), K(table_schema)); } } @@ -3613,7 +3639,8 @@ int ObDDLService::create_hidden_table_with_pk_changed( ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, ObArenaAllocator &allocator, - const ObIndexArg::IndexActionType &index_action_type) + const ObIndexArg::IndexActionType &index_action_type, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; const bool bind_tablets = false; @@ -3632,6 +3659,8 @@ int ObDDLService::create_hidden_table_with_pk_changed( } else if (is_drop_pk && OB_FAIL(drop_primary_key(new_table_schema))) { LOG_WARN("failed to add hidden pk column for heap table", K(ret)); } else if (!create_user_hidden_table_now) { + } else if (OB_FAIL(adjust_cg_for_offline(new_table_schema))) { + LOG_WARN("failed to adjust for create hiddent table with pk changed", K(ret)); } else if (OB_FAIL(get_add_pk_index_name(origin_table_schema, new_table_schema, index_action_type, @@ -3648,6 +3677,7 @@ int ObDDLService::create_hidden_table_with_pk_changed( ddl_operator, trans, allocator, + tenant_data_version, index_name))) { LOG_WARN("failed to alter table offline", K(ret)); } @@ -4119,7 +4149,12 @@ int ObDDLService::check_alter_table_column(obrpc::ObAlterTableArg &alter_table_a || ObDDLType::DDL_ADD_COLUMN_OFFLINE == ddl_type || ObDDLType::DDL_COLUMN_REDEFINITION == ddl_type) { bool is_exist_stored_gen_col = false; // whether the target table contain stored generated column. - if (OB_FAIL(check_exist_stored_gen_col(orig_table_schema, + bool is_column_group_store = false; + if (OB_FAIL(ObCODDLUtil::need_column_group_store(orig_table_schema, is_column_group_store))) { + LOG_WARN("fail to check schema is column group store", K(ret)); + } else if (is_column_group_store) { + ddl_type = ObDDLType::DDL_TABLE_REDEFINITION; + } else if (OB_FAIL(check_exist_stored_gen_col(orig_table_schema, alter_table_schema, is_exist_stored_gen_col))) { LOG_WARN("fail to check exist stored generated column", K(ret)); @@ -4423,6 +4458,379 @@ int ObDDLService::check_alter_table_partition(const obrpc::ObAlterTableArg &alte return ret; } +/*use default column group to save column not exist in other column group*/ +int ObDDLService::alter_default_column_group(share::schema::ObTableSchema &new_table_schema) +{ + int ret = OB_SUCCESS; + ObColumnGroupSchema* default_cg = nullptr; + hash::ObHashSet cg_column_ids; + ObArray column_groups; + bool is_all_cg_exist = false; + ObColumnGroupSchema* all_column_group = nullptr; + column_groups.reset(); + if (new_table_schema.get_column_group_count() < 1) { + // TODO, wait to support table update from 4.1 or lesss to support alter_column_group + ret = OB_INVALID_ARGUMENT; + LOG_WARN("there is no column group in the table schema", K(ret), K(new_table_schema)); + } else if (OB_FAIL(cg_column_ids.create(new_table_schema.get_column_count()))) { + LOG_WARN("fail to create hashmap", K(ret)); + } + + ObTableSchema::const_column_group_iterator iter_begin = new_table_schema.column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = new_table_schema.column_group_end(); + + + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + const ObColumnGroupSchema *column_group = *iter_begin; + if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), K(new_table_schema)); + } else if (column_group->get_column_group_type() != ObColumnGroupType::DEFAULT_COLUMN_GROUP) { + for (int64_t col_index = 0; + OB_SUCC(ret) && col_index < column_group->get_column_id_count(); + col_index++) { + if (OB_FAIL(cg_column_ids.set_refactored((column_group->get_column_ids())[col_index], + 1 /*overwrite*/))) { + LOG_WARN("fail to add column id", K(ret)); + } + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(new_table_schema.get_column_group_by_name(OB_DEFAULT_COLUMN_GROUP_NAME, default_cg))) { + LOG_WARN("fail to get all columns in column groups", K(ret), K(new_table_schema)); + } else { + default_cg->remove_all_cols(); + ObArray col_ids; + + if (OB_FAIL(new_table_schema.get_column_ids(col_ids, true /* no virtual col*/))) { + LOG_WARN("fail to get not virtual col ids", K(ret)); + } + ObArray::iterator iter_begin = col_ids.begin(); + ObArray::iterator iter_end = col_ids.end(); + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + int hash_ret = cg_column_ids.exist_refactored(iter_begin->col_id_); + if (hash_ret != OB_HASH_EXIST && hash_ret != OB_HASH_NOT_EXIST) { + ret = hash_ret; + LOG_WARN("fail to check key exist", K(ret)); + } else if (OB_HASH_EXIST == hash_ret) { + /*skip, column exist in other column group don't need to be added*/ + } else if (OB_HASH_NOT_EXIST == hash_ret) { + if (OB_FAIL(default_cg->add_column_id(iter_begin->col_id_))) { + LOG_WARN("fail to add column to default cg", KPC(default_cg), KPC(iter_begin)); + } + } + } + + /*default cg check, used when only support all/each column group*/ + if (OB_SUCC(ret) && default_cg->get_column_id_count() != 0 && default_cg->get_column_id_count() != col_ids.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("default column group have invalid column id count", K(ret), KPC(default_cg)); + } + + } + } + return ret; +} + +int ObDDLService::alter_rowkey_column_group(share::schema::ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + bool is_each_cg_exist = false; + bool is_all_cg_exist = false; + ObColumnGroupSchema *rowkey_cg = nullptr; + /* scan all column column group*/ + + /*get rowkey_cg*/ + if (OB_FAIL(table_schema.get_column_group_by_name(OB_ROWKEY_COLUMN_GROUP_NAME, rowkey_cg))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + rowkey_cg = nullptr; + } else { + LOG_WARN("Fail to get rowkey column group", K(ret), K(table_schema)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.is_column_group_exist(OB_ALL_COLUMN_GROUP_NAME, is_all_cg_exist))) { + LOG_WARN("Fail to check whetehre all column group exist", K(ret)); + } else if (OB_FAIL(table_schema.is_column_group_exist(OB_EACH_COLUMN_GROUP_NAME, is_each_cg_exist))) { + LOG_WARN("Fail to check whether each column group exist", K(ret)); + } + } + + + if (OB_SUCC(ret)) { + /* only when only each exist, rowkey_cg is needed*/ + if (is_each_cg_exist && (!is_all_cg_exist)) { + if (OB_ISNULL(rowkey_cg)) { + ObColumnGroupSchema new_rowkey_cg; + ObArray rowkey_ids; + uint64_t rowkey_cg_id = table_schema.get_max_used_column_group_id() + 1; + if (OB_FAIL(table_schema.get_rowkey_column_ids(rowkey_ids))) { + LOG_WARN("fail to get rowkey column ids", K(ret)); + } else if (OB_FAIL(ObSchemaUtils::build_column_group( + table_schema, table_schema.get_tenant_id(),ObColumnGroupType::ROWKEY_COLUMN_GROUP, + OB_ROWKEY_COLUMN_GROUP_NAME, rowkey_ids, rowkey_cg_id, new_rowkey_cg))) { + LOG_WARN("fail to build rowkey column group", K(ret)); + } else if (OB_FAIL(table_schema.add_column_group(new_rowkey_cg))) { + LOG_WARN("fail to add rowkey column group to table_schema", K(ret)); + } + } else { + /*rowkey cg exist skip*/ + } + } else { + /*other situation, rowkey column group should not exist*/ + if (OB_NOT_NULL(rowkey_cg)) { + if (OB_FAIL(table_schema.remove_column_group(rowkey_cg->get_column_group_id()))){ + LOG_WARN("fail to remove rowkey cg", K(ret)); + } + } + } + } + return ret; +} + +int ObDDLService::add_column_group(const obrpc::ObAlterTableArg &alter_table_arg, + const share::schema::ObTableSchema &ori_table_schema, + share::schema::ObTableSchema &new_table_schema) +{ + int ret = OB_SUCCESS; + if (alter_table_arg.alter_table_schema_.get_column_group_count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("alter table arg has no column groups", K(ret), K(alter_table_arg)); + } else if (alter_table_arg.based_schema_object_infos_.count() <= 0) { + /* based schema object infos is checked in the alter column group, here: only check count*/ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("based info object count <=0 cannot promise consist", K(ret)); + } else { + ObTableSchema::const_column_group_iterator iter_begin = + alter_table_arg.alter_table_schema_.column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = + alter_table_arg.alter_table_schema_.column_group_end(); + + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + bool cg_exist = false; + ObColumnGroupSchema *column_group = *iter_begin; + + if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), K(alter_table_arg)); + } else if (column_group->get_column_group_id() <= new_table_schema.get_max_used_column_group_id()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("added column group should have greater id than used column id", + K(ret), K(new_table_schema.get_max_used_column_group_id()), + K(column_group->get_column_group_id())); + } else if (OB_FAIL(new_table_schema.add_column_group(*column_group))) { + if (OB_HASH_EXIST == ret) { + ret = OB_ERR_COLUMN_GROUP_DUPLICATE; + LOG_WARN("fail to add column group, column group duplicate", K(ret), K(new_table_schema)); + char err_msg[OB_MAX_COLUMN_GROUP_NAME_LENGTH] = {'\0'}; + ObString err_msg_str(OB_MAX_COLUMN_GROUP_NAME_LENGTH, 0 /*length*/, err_msg); + int tmp_ret = column_group->get_column_group_type_name(err_msg_str); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("fail to get readable column group name", K(tmp_ret), KPC(column_group)); + } else { + LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, err_msg_str.length(), err_msg_str.ptr()); + } + } else { + LOG_WARN("fail to add column group to table schema", K(ret), K(new_table_schema), KPC(column_group)); + } + } + } + if (OB_SUCC(ret)) { + /* note must alter rowkey cg first, else will affect default cg*/ + if (OB_FAIL(alter_rowkey_column_group(new_table_schema))) { + LOG_WARN("fail to adjust rowkey column group when add column group", K(ret)); + } else if (OB_FAIL(alter_default_column_group(new_table_schema))) { + LOG_WARN("fail to alter default column group", K(ret)); + } + } + } + return ret; +} + +int ObDDLService::drop_column_group(const obrpc::ObAlterTableArg &alter_table_arg, + const share::schema::ObTableSchema &ori_table_schema, + share::schema::ObTableSchema &new_table_schema) +{ + int ret = OB_SUCCESS; + if (alter_table_arg.alter_table_schema_.get_column_group_count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("alter table arg has no column group", K(ret), K(alter_table_arg.alter_table_schema_)); + } else if (alter_table_arg.based_schema_object_infos_.count() <= 0) { + /* based schema object infos is checked in the alter column group, here only check count*/ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("based schema object info count <= 0, cannot promise column consist", K(ret)); + } else { + ObTableSchema::const_column_group_iterator iter_begin = + alter_table_arg.alter_table_schema_.column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = + alter_table_arg.alter_table_schema_.column_group_end(); + + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + const ObColumnGroupSchema *column_group = *iter_begin; + ObColumnGroupSchema *ori_column_group = nullptr; + /* drop column group use column group name to get real column*/ + if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group in origin table should not be null", K(ret)); + } else if (OB_FAIL(ori_table_schema.get_column_group_by_name(column_group->get_column_group_name(), + ori_column_group))) { + /* if not exist in origin*/ + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_COLUMN_GROUP_NOT_FOUND; + LOG_WARN("cannot found column group", KPC(column_group)); + char err_msg[OB_MAX_COLUMN_GROUP_NAME_LENGTH] = {'\0'}; + ObString err_msg_str(OB_MAX_COLUMN_GROUP_NAME_LENGTH, 0, err_msg); + int tmp_ret = column_group->get_column_group_type_name(err_msg_str); + if (tmp_ret != OB_SUCCESS){ + LOG_WARN("fail to get readable column group name"); + } else { + LOG_USER_ERROR(OB_COLUMN_GROUP_NOT_FOUND, err_msg_str.length(), err_msg_str.ptr()); + } + } else { + LOG_WARN("fail to get column group by name", K(ret), K(ori_table_schema), KPC(column_group)); + } + } else if (OB_ISNULL(ori_column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), KPC(column_group)); + } else if (OB_FAIL(new_table_schema.remove_column_group(ori_column_group->get_column_group_id()))) { + LOG_WARN("fail to remove column group from new table schema", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(alter_rowkey_column_group(new_table_schema))) { + LOG_WARN("fail to alter rowkey column group", K(ret)); + } else if (OB_FAIL(alter_default_column_group(new_table_schema))) { + LOG_WARN("fail to alter default column group", K(ret)); + } + } + } + return ret; +} + + +int ObDDLService::alter_column_group(obrpc::ObAlterTableArg &alter_table_arg, + const share::schema::ObTableSchema &orig_table_schema, + share::schema::ObTableSchema &new_table_schema, + share::schema::ObSchemaGetterGuard &schema_guard, + ObDDLOperator &ddl_operator, + common::ObMySQLTransaction &trans) +{ + int ret = OB_SUCCESS; + bool bind_tablets = false; + uint64_t compat_version = 0; + if (alter_table_arg.alter_table_schema_.get_column_group_count() == 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("alter table arg has no column group", K(ret), K(alter_table_arg.alter_table_schema_)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(orig_table_schema.get_tenant_id(), compat_version))) { + LOG_WARN("fail to get compat version", K(ret), K(orig_table_schema), K(compat_version)); + } else if (compat_version < DATA_VERSION_4_3_0_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("compat version not support", K(ret), K(compat_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3, alter column group"); + } else if (alter_table_arg.based_schema_object_infos_.count() <= 0) { + /* alter_table() has use check_parallel_ddl_conflict() before + so here only need to check count + */ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("there is no schema object infos to promise consit", K(ret)); + } else { + new_table_schema.set_column_store(true); + switch (alter_table_arg.alter_table_schema_.alter_type_) { + case share::schema::OB_DDL_ADD_COLUMN_GROUP: { + if (OB_FAIL(add_column_group(alter_table_arg, orig_table_schema, new_table_schema))) { + LOG_WARN("fail to add column group to new table schema", K(ret)); + } + break; + } + case share::schema::OB_DDL_DROP_COLUMN_GROUP: { + if (OB_FAIL(drop_column_group(alter_table_arg, orig_table_schema, new_table_schema))) { + LOG_WARN("fail to dorp column in new table schema", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("recevive unexpected alter table actions", K(ret), + K(alter_table_arg.alter_table_schema_.alter_type_)); + } + } + } + return ret; +} + +int ObDDLService::adjust_cg_for_offline(ObTableSchema &new_table_schema) +{ + /* do adjustment on column group when add or drop column/primary key*/ + int ret = OB_SUCCESS; + bool is_each_cg_exist = false; + bool is_all_cg_exist = false; + if (!new_table_schema.is_column_store_supported()) { + /*skip*/ + } else if (OB_FAIL(new_table_schema.is_column_group_exist(OB_ALL_COLUMN_GROUP_NAME, is_all_cg_exist))) { + LOG_WARN("fail to check is all column group exist", K(ret)); + } else if (OB_FAIL(new_table_schema.is_column_group_exist(OB_EACH_COLUMN_GROUP_NAME, is_each_cg_exist))) { + LOG_WARN("fail to check is each column group exist", K(ret)); + } else { + /* for double_table_ddl reset all column groups*/ + new_table_schema.reset_column_group_info(); + /* add each column group*/ + ObTableSchema::const_column_iterator col_iter = new_table_schema.column_begin(); + for (; OB_SUCC(ret) && is_each_cg_exist && col_iter != new_table_schema.column_end(); col_iter++) { + ObColumnSchemaV2 *col = *col_iter; + ObColumnGroupSchema new_single_cg; + new_single_cg.reset(); + if (OB_ISNULL(col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group pointer should not be null", K(ret)); + } else if (col->is_virtual_generated_column()) { + /* skip virtual column group*/ + } else if (OB_FAIL(ObSchemaUtils::build_single_column_group(new_table_schema, col, new_table_schema.get_tenant_id(), + new_table_schema.get_max_used_column_group_id() +1, + new_single_cg))) { + LOG_WARN("fail to build single column group", K(ret)); + } else if (OB_FAIL(new_table_schema.add_column_group(new_single_cg))) { + LOG_WARN("fail to add new column group to table schema", K(ret)); + } + } + + /* add all column group*/ + if (OB_SUCC(ret) &&is_all_cg_exist) { + ObColumnGroupSchema new_cg; + new_cg.reset(); + if (OB_FAIL(ObSchemaUtils::build_all_column_group( + new_table_schema, new_table_schema.get_tenant_id(), + new_table_schema.get_max_used_column_group_id() +1, new_cg))) { + LOG_WARN("fail to build new all column group schema", K(ret)); + } else if (OB_FAIL(new_table_schema.add_column_group(new_cg))) { + LOG_WARN("fail to add new column group to table schema", K(ret)); + } + } + /* adjust rowkey & default column group*/ + if (OB_SUCC(ret)) { + ObArray column_ids; + ObColumnGroupSchema default_cg; + default_cg.reset(); + if (OB_FAIL(ObSchemaUtils::build_column_group(new_table_schema, new_table_schema.get_tenant_id(), + ObColumnGroupType::DEFAULT_COLUMN_GROUP, + OB_DEFAULT_COLUMN_GROUP_NAME, column_ids, + DEFAULT_TYPE_COLUMN_GROUP_ID, default_cg))) { + LOG_WARN("fail to build column group", K(ret)); + } else if (OB_FAIL(new_table_schema.add_column_group(default_cg))) { + LOG_WARN("failt to add default column group", K(ret)); + } else if (OB_FAIL(alter_rowkey_column_group(new_table_schema))) { + LOG_WARN("fail to alter rowkey column group", K(ret)); + } else if (OB_FAIL(alter_default_column_group(new_table_schema))) { + LOG_WARN("fail to alter default column grouop schema", K(ret)); + } + } + } + return ret; +} + int ObDDLService::gen_alter_partition_new_table_schema_offline( const AlterTableSchema & alter_table_schema, const ObTableSchema &orig_table_schema, @@ -4476,7 +4884,8 @@ int ObDDLService::alter_table_partition_by( ObTableSchema &new_table_schema, ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, - ObMySQLTransaction &trans) + ObMySQLTransaction &trans, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; const bool bind_tablets = false; @@ -4491,7 +4900,8 @@ int ObDDLService::alter_table_partition_by( schema_guard, ddl_operator, trans, - alter_table_arg.allocator_)); + alter_table_arg.allocator_, + tenant_data_version)); return ret; } @@ -4588,7 +4998,8 @@ int ObDDLService::convert_to_character( ObTableSchema &new_table_schema, ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, - ObMySQLTransaction &trans) + ObMySQLTransaction &trans, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; const bool bind_tablets = false; @@ -4645,7 +5056,8 @@ int ObDDLService::convert_to_character( schema_guard, ddl_operator, trans, - alter_table_arg.allocator_)); + alter_table_arg.allocator_, + tenant_data_version)); } return ret; } @@ -4711,7 +5123,8 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, - common::ObArenaAllocator &allocator) + common::ObArenaAllocator &allocator, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; int64_t index_count = new_table_schema.get_index_tid_count(); @@ -4741,7 +5154,8 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar ddl_operator, trans, allocator, - type))) { + type, + tenant_data_version))) { LOG_WARN("failed to add hidden primary key for heap table", K(ret)); } break; @@ -4782,7 +5196,8 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar ddl_operator, trans, allocator, - type))) { + type, + tenant_data_version))) { LOG_WARN("failed to add primary key", K(ret)); } } @@ -5783,16 +6198,17 @@ int ObDDLService::lock_tables_in_recyclebin(const ObDatabaseSchema &database_sch int ObDDLService::create_index_tablet(const ObTableSchema &index_schema, ObMySQLTransaction &trans, share::schema::ObSchemaGetterGuard &schema_guard, - const bool need_check_tablet_cnt) + const bool need_check_tablet_cnt, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; int64_t tenant_id = index_schema.get_tenant_id(); SCN frozen_scn; if (OB_FAIL(check_inner_stat())) { LOG_WARN("check_inner_stat error", K(is_inited()), KR(ret)); - } else if (!index_schema.is_index_table()) { + } else if (!index_schema.is_index_table() || tenant_data_version <= 0) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("arg must be index table", KR(ret), K(tenant_id)); + LOG_WARN("arg must be index table", KR(ret), K(tenant_id), K(tenant_data_version), K(index_schema)); } else if (OB_ISNULL(GCTX.root_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("root service is null", KR(ret)); @@ -5817,12 +6233,14 @@ int ObDDLService::create_index_tablet(const ObTableSchema &index_schema, const ObTableSchema *data_table_schema = NULL; const uint64_t data_table_id = index_schema.get_data_table_id(); ObSEArray schemas; + ObSEArray need_create_empty_majors; if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_table_schema))) { LOG_WARN("failed to get table schema", KR(ret), K(tenant_id), K(data_table_id)); } else if (OB_ISNULL(data_table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("data table schema not exists", KR(ret), K(data_table_id)); - } else if (OB_FAIL(schemas.push_back(&index_schema))) { + } else if (OB_FAIL(schemas.push_back(&index_schema)) + || OB_FAIL(need_create_empty_majors.push_back(false))) { LOG_WARN("failed to push_back", KR(ret), K(index_schema)); } else if (OB_FAIL(new_table_tablet_allocator.prepare(trans, index_schema))) { LOG_WARN("fail to prepare ls for index schema tablets", KR(ret)); @@ -5832,7 +6250,9 @@ int ObDDLService::create_index_tablet(const ObTableSchema &index_schema, } else if (OB_FAIL(table_creator.add_create_tablets_of_local_aux_tables_arg( schemas, data_table_schema, - ls_id_array))) { + ls_id_array, + tenant_data_version, + need_create_empty_majors /* add_index, need_create_empty_major_sstable*/))) { LOG_WARN("create table tablet failed", KR(ret), K(index_schema)); } } else { @@ -5843,7 +6263,9 @@ int ObDDLService::create_index_tablet(const ObTableSchema &index_schema, LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_table_arg( index_schema, - ls_id_array))) { + ls_id_array, + tenant_data_version, + false /*need_create_empty_major_sstable*/))) { LOG_WARN("create table tablet failed", KR(ret), K(index_schema)); } } @@ -5895,6 +6317,7 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, ObArenaAllocator &allocator, + const uint64_t tenant_data_version, obrpc::ObAlterTableRes &res, ObIArray &ddl_tasks) { @@ -6036,7 +6459,7 @@ int ObDDLService::alter_table_index(const obrpc::ObAlterTableArg &alter_table_ar // The index data is stored separately from the main table, // the partition needs to be built, and insert ori_schema_version in the outer insert if (index_schema.has_tablet() - && OB_FAIL(create_index_tablet(index_schema, trans, schema_guard, true/*need_check_tablet_cnt*/))) { + && OB_FAIL(create_index_tablet(index_schema, trans, schema_guard, true/*need_check_tablet_cnt*/, tenant_data_version))) { LOG_WARN("fail to create_index_tablet", KR(ret), K(index_schema)); } if (OB_SUCC(ret)) { @@ -8932,7 +9355,7 @@ int ObDDLService::add_new_column_to_table_schema( return ret; } -int ObDDLService::add_column_group_to_table_schema( +int ObDDLService::add_column_to_column_group( const share::schema::ObTableSchema &origin_table_schema, const share::schema::AlterTableSchema &alter_table_schema, share::schema::ObTableSchema &new_table_schema, @@ -8940,68 +9363,19 @@ int ObDDLService::add_column_group_to_table_schema( common::ObMySQLTransaction &trans) { int ret = OB_SUCCESS; - bool is_oracle_mode = false; - if (alter_table_schema.get_column_group_count() == 0) { - } else if (!origin_table_schema.is_valid()) { + uint64_t cur_column_group_id = origin_table_schema.get_max_used_column_group_id(); + ObArray column_ids; + ObTableSchema::const_column_iterator it_begin = alter_table_schema.column_begin(); + ObTableSchema::const_column_iterator it_end = alter_table_schema.column_end(); + AlterColumnSchema *alter_column_schema = nullptr; + + if (!origin_table_schema.is_valid() ) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid table schema", K(ret), K(origin_table_schema)); - } else if (OB_FAIL(origin_table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { - LOG_WARN("fail to check if oracle mode", K(ret), K(origin_table_schema)); + LOG_WARN("invalid argument", K(ret), K(origin_table_schema), K(alter_table_schema)); + } else if (!new_table_schema.is_column_store_supported()) { + /* skip*/ } else { - uint64_t cur_column_group_id = origin_table_schema.get_max_used_column_group_id(); - new_table_schema.reset_column_group_info(); - share::schema::ObTableSchema::const_column_group_iterator cg_begin = alter_table_schema.column_group_begin(); - share::schema::ObTableSchema::const_column_group_iterator cg_end = alter_table_schema.column_group_end(); - for (; OB_SUCC(ret) && (cg_begin != cg_end); cg_begin++) { - ObColumnGroupSchema column_group; - const ObColumnGroupSchema *cg = *cg_begin; - if (OB_ISNULL(cg)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null column group", K(ret)); - } else if (OB_FAIL(column_group.assign(*cg))) { - LOG_WARN("failed to assign column group", K(ret), KPC(cg)); - } else if (FALSE_IT(column_group.set_column_group_id(++cur_column_group_id))) { - } else { - const ObStoreFormatType store_format = alter_table_schema.get_store_format(); - const uint64_t tenant_id = origin_table_schema.get_tenant_id(); - int64_t storage_encoding_mode = 0; - omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id)); - if (OB_LIKELY(tenant_config.is_valid())) { - storage_encoding_mode = tenant_config->storage_encoding_mode; - } - bool is_flat = is_oracle_mode ? ((OB_STORE_FORMAT_NOCOMPRESS_ORACLE == store_format) - || (OB_STORE_FORMAT_BASIC_ORACLE == store_format) - || (OB_STORE_FORMAT_OLTP_ORACLE == store_format)) - : ((OB_STORE_FORMAT_REDUNDANT_MYSQL == store_format) - || (OB_STORE_FORMAT_COMPACT_MYSQL == store_format)); - if (is_flat || (1 == storage_encoding_mode)) { - // all use encoding - column_group.set_row_store_type(alter_table_schema.get_row_store_type()); - } else if (2 == storage_encoding_mode) { - // all use cs_encoding - column_group.set_row_store_type(ObRowStoreType::CS_ENCODING_ROW_STORE); - } else { - // row_store uses encoding; column_store uses cs_encoding - if ((column_group.get_column_group_type() == ObColumnGroupType::DEFAULT_COLUMN_GROUP) - || (column_group.get_column_group_type() == ObColumnGroupType::ALL_COLUMN_GROUP)) { - column_group.set_row_store_type(alter_table_schema.get_row_store_type()); - } else { - column_group.set_row_store_type(ObRowStoreType::CS_ENCODING_ROW_STORE); - } - } - - if (OB_FAIL(new_table_schema.add_column_group(column_group))) { - LOG_WARN("fail to add column group into new table schema", K(ret), K(column_group), K(new_table_schema)); - } - } - } - - ObArray column_ids; - ObTableSchema::const_column_iterator it_begin = alter_table_schema.column_begin(); - ObTableSchema::const_column_iterator it_end = alter_table_schema.column_end(); - AlterColumnSchema *alter_column_schema = nullptr; for(; OB_SUCC(ret) && it_begin != it_end; it_begin++) { - ObColumnGroupSchema *column_group; if (OB_ISNULL(alter_column_schema = static_cast(*it_begin))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("*it_begin is NULL", K(ret)); @@ -9014,29 +9388,101 @@ int ObDDLService::add_column_group_to_table_schema( // skip virtual column } else if (OB_FAIL(column_ids.push_back(column_schema->get_column_id()))) { LOG_WARN("fali to push back column id", K(ret)); - } else if (OB_FAIL(new_table_schema.get_column_group_by_name(alter_column_schema->get_column_group_name(), column_group))) { - LOG_WARN("fail to get column group by name", K(ret), K(new_table_schema), K(alter_column_schema)); - } else if (OB_ISNULL(column_group)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null column group", K(ret), KPC(alter_column_schema), K(new_table_schema)); - } else if (OB_FAIL(column_group->add_column_id(column_schema->get_column_id()))) { - LOG_WARN("fail to add column id", K(ret), KPC(column_group)); } } } - if (OB_SUCC(ret)) { - // Cuz we checked data_version in resolver, thus we can skip checking here. - new_table_schema.set_column_store(true); - const ObColumnGroupSchema *all_cg = nullptr; - if (OB_FAIL(ddl_operator.insert_column_groups(trans, new_table_schema))) { - LOG_WARN("fail to insert column groups", K(ret), K(new_table_schema)); - } else if (OB_FAIL(origin_table_schema.get_all_cg_type_column_group(all_cg))) { - LOG_WARN("fail to get_all_cg_type_column_group", K(ret), K(origin_table_schema)); - } else if (OB_ISNULL(all_cg) || column_ids.empty() ) { - //skip insert column id to all cg - } else if (OB_FAIL(ddl_operator.insert_column_ids_into_column_group(trans, new_table_schema, column_ids, *all_cg))) { - LOG_WARN("fail to insert_column_id_into_column_group", K(ret), K(new_table_schema), K(column_ids), KPC(all_cg)); + if (OB_FAIL(ret)) { + /* skip do nothing*/ + } else if (column_ids.count() == 0){ + /* do not add column, skip */ + } else { + bool is_all_cg_exist = false; + bool is_each_cg_exist = false; + if (OB_FAIL(new_table_schema.is_column_group_exist(OB_ALL_COLUMN_GROUP_NAME, is_all_cg_exist))) { + LOG_WARN("fail to check whether all cg exist", K(ret), K(new_table_schema)); + } else if (OB_FAIL(new_table_schema.is_column_group_exist(OB_EACH_COLUMN_GROUP_NAME, is_each_cg_exist))) { + LOG_WARN("fail to check whether each cg exist", K(ret), K(new_table_schema)); + } + + /* update info about each column group*/ + if (OB_FAIL(ret)) { + } else if (is_each_cg_exist) { + HEAP_VAR(ObTableSchema, tmp_table) { + if (OB_FAIL(tmp_table.assign(new_table_schema))) { + LOG_WARN("fail to assign", K(ret), K(new_table_schema), K(tmp_table)); + } + tmp_table.reset_column_group_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); i++) { + ObColumnGroupSchema cg_schema; + if (OB_FAIL(ObSchemaUtils::build_single_column_group(new_table_schema, + new_table_schema.get_column_schema(column_ids.at(i)), + new_table_schema.get_tenant_id(), + ++cur_column_group_id, + cg_schema))) { + LOG_WARN("fail to build single column group", K(ret), K(new_table_schema), K(column_ids.at(i))); + } else if (OB_FAIL(new_table_schema.add_column_group(cg_schema))) { + LOG_WARN("fail to add new column group schema to table", K(ret), K(cg_schema)); + } else if (OB_FAIL(tmp_table.add_column_group(cg_schema))) { + LOG_WARN("fail to add new column group schema to tmp_cg", K(ret), K(tmp_table), K(cg_schema)); + } + } + if (OB_FAIL(ret)) { + } else if (tmp_table.get_column_group_count() == 0){ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column_group array should not be empty", K(ret), K(tmp_table)); + } else if (OB_FAIL(ddl_operator.insert_column_groups(trans, tmp_table))) { + LOG_WARN("fail to insert new table_schema to each column gorup", K(ret), K(tmp_table)); + } + } + } + /* update info about all column group*/ + if (OB_FAIL(ret)) { + } else if (is_all_cg_exist) { + ObColumnGroupSchema* all_cg = nullptr; + if (OB_FAIL(new_table_schema.get_column_group_by_name(OB_ALL_COLUMN_GROUP_NAME, all_cg))) { + LOG_WARN("fail to get all column group", K(ret), K(new_table_schema)); + } else if (OB_ISNULL(all_cg)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); i++) { + if (OB_FAIL(all_cg->add_column_id(column_ids.at(i)))) { + LOG_WARN("fail to add column id", K(ret), K(new_table_schema), K(column_ids.at(i))); + } + } + if (OB_FAIL(ret)){ + } else if (column_ids.count() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column_ids should not be empty", K(ret), K(column_ids)); + } else if (OB_FAIL(ddl_operator.insert_column_ids_into_column_group(trans, new_table_schema, column_ids, *all_cg))) { + LOG_WARN("fail to insert column ids into inner table", K(ret), K(new_table_schema),K(column_ids)); + } + } + + /* update info about default column group*/ + if (OB_FAIL(ret)) { + } else if (!is_all_cg_exist && !is_each_cg_exist) { + ObColumnGroupSchema *default_cg = nullptr; + if (OB_FAIL(new_table_schema.get_column_group_by_name(OB_DEFAULT_COLUMN_GROUP_NAME, default_cg))) { + LOG_WARN("fail get default column group", K(ret), K(new_table_schema)); + } else if (OB_ISNULL(default_cg)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), K(new_table_schema)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); i++) { + if (OB_FAIL(default_cg->add_column_id(column_ids.at(i)))) { + LOG_WARN("fail to add column id", K(ret), K(new_table_schema), K(column_ids.at(i))); + } + } + + if (OB_FAIL(ret)){ + } else if (column_ids.count() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column_ids should not be empty", K(ret), K(column_ids)); + } else if (OB_FAIL(ddl_operator.insert_column_ids_into_column_group(trans, new_table_schema, column_ids, *default_cg))) { + LOG_WARN("fail to insert column ids into inner table", K(ret), K(new_table_schema)); + } } } } @@ -9880,9 +10326,9 @@ int ObDDLService::alter_table_column(const ObTableSchema &origin_table_schema, bool is_add_lob = false; if(OB_FAIL(ret)) { - } else if (OB_FAIL(add_column_group_to_table_schema(origin_table_schema, + } else if (OB_FAIL(add_column_to_column_group(origin_table_schema, alter_table_schema, new_table_schema, ddl_operator, trans))) { - LOG_WARN("fail to add_column_group_to_table_schema", K(ret), K(alter_table_schema), K(new_table_schema)); + LOG_WARN("fail to add_column_to_column_group", K(ret), K(alter_table_schema), K(new_table_schema)); } else if (OB_FAIL(new_table_schema.check_skip_index_valid())) { LOG_WARN("failed to check new table schema skip index", K(ret)); } else if (!is_origin_table_has_lob_column) { @@ -9912,6 +10358,8 @@ int ObDDLService::create_aux_lob_table_if_need(ObTableSchema &data_table_schema, const uint64_t tenant_id = data_table_schema.get_tenant_id(); bool need_sync_schema_version = false; SCN frozen_scn; + uint64_t tenant_data_version = 0; + ObArray need_create_empty_majors; if (OB_FAIL(ObMajorFreezeHelper::get_frozen_scn(tenant_id, frozen_scn))) { LOG_WARN("failed to get frozen status for create tablet", KR(ret), K(tenant_id)); @@ -9925,6 +10373,8 @@ int ObDDLService::create_aux_lob_table_if_need(ObTableSchema &data_table_schema, ret = OB_OP_NOT_ALLOW; (void)snprintf(err_msg, sizeof(err_msg),"%s", "system table add or modify column"); LOG_USER_ERROR(OB_OP_NOT_ALLOW, err_msg); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { ObTableCreator table_creator( tenant_id, @@ -9951,7 +10401,8 @@ int ObDDLService::create_aux_lob_table_if_need(ObTableSchema &data_table_schema, share::schema::ObTableSchema &table_schema = aux_table_schemas.at(i); if (OB_FAIL(ddl_operator.create_table(table_schema, trans, NULL, need_sync_schema_version))) { LOG_WARN("failed to create table schema", K(ret)); - } else if (OB_FAIL(schemas.push_back(&table_schema))) { + } else if (OB_FAIL(schemas.push_back(&table_schema)) + || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("failed to push_back table schema", K(ret), K(table_schema)); } @@ -9969,7 +10420,8 @@ int ObDDLService::create_aux_lob_table_if_need(ObTableSchema &data_table_schema, } else if (OB_FAIL(new_table_tablet_allocator.get_ls_id_array(ls_id_array))) { LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_local_aux_tables_arg( - schemas, &data_table_schema, ls_id_array))) { + schemas, &data_table_schema, ls_id_array, tenant_data_version, + need_create_empty_majors /*online_ddl, need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(last_schema_version)); } else if (OB_FAIL(table_creator.execute())) { LOG_WARN("fail to execute crate tablet", KR(ret)); @@ -10761,7 +11213,8 @@ int ObDDLService::update_global_index(ObAlterTableArg &arg, const uint64_t tenant_id, const ObTableSchema &orig_table_schema, ObDDLOperator &ddl_operator, - ObMySQLTransaction &trans) + ObMySQLTransaction &trans, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; ObSEArray simple_index_infos; @@ -10810,7 +11263,7 @@ int ObDDLService::update_global_index(ObAlterTableArg &arg, if (OB_FAIL(new_table_schema.assign(*index_table_schema))) { LOG_WARN("fail to assign schema", K(ret)); } else if (OB_FAIL(rebuild_index_in_trans(schema_guard, orig_table_schema, new_table_schema, - NULL, &trans))) { + NULL, &trans, tenant_data_version))) { LOG_WARN("ddl_service_ rebuild_index failed", KR(ret)); } else { ObSArray &index_arg_list = arg.index_arg_list_; @@ -11466,6 +11919,7 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, { int ret = OB_SUCCESS; const ObDDLType ddl_type = res.ddl_type_; + uint64_t tenant_data_version = 0; if (OB_FAIL(check_inner_stat())) { LOG_WARN("variable is not init"); @@ -11567,6 +12021,8 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, if (OB_FAIL(ret)) { } else if (OB_FAIL(new_table_schema.assign(*orig_table_schema))) { LOG_WARN("fail to assign schema", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { bool need_update_index_table = false; AlterLocalityOp alter_locality_op = ALTER_LOCALITY_OP_INVALID; @@ -11658,7 +12114,7 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, trans, &global_idx_schema_array))) { ObString origin_table_name = alter_table_schema.get_origin_table_name(); - LOG_WARN("failed to alter table options,", K(origin_table_name), K(ret)); + LOG_WARN("failed to alter table options,", K(origin_table_name), K(new_table_schema), K(ret)); } if (OB_SUCC(ret) && !alter_table_schema.alter_option_bitset_.is_empty()) { if (OB_FAIL(ObDDLLock::lock_for_common_ddl_in_trans(*orig_table_schema, trans))) { @@ -11693,6 +12149,7 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, ddl_operator, trans, alter_table_arg.allocator_, + tenant_data_version, res, ddl_tasks))) { LOG_WARN("failed to alter table index!", K(ret)); @@ -11731,7 +12188,8 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, tenant_id, *orig_table_schema, ddl_operator, - trans))) { + trans, + tenant_data_version))) { LOG_WARN("update_global_index failed", K(ret)); } else if (OB_FAIL(generate_tables_array(alter_table_arg.alter_part_type_, orig_table_schemas, @@ -11881,6 +12339,7 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, } if (OB_SUCC(ret)) { + common::ObArray need_create_empty_majors; common::ObArray inc_table_schema_ptrs; common::ObArray del_table_schema_ptrs; for (int i = 0; i < inc_table_schemas.count() && OB_SUCC(ret); i++) { @@ -11889,7 +12348,8 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, ret = OB_ERR_UNEXPECTED; LOG_WARN("table_schemas is NULL", KR(ret), K(i), K(tmp_table_schema)); } else if (FALSE_IT(inc_table_schemas.at(i)->set_schema_version(new_table_schemas.at(i)->get_schema_version()))) { - } else if (OB_FAIL(inc_table_schema_ptrs.push_back(tmp_table_schema))) { + } else if (OB_FAIL(inc_table_schema_ptrs.push_back(tmp_table_schema)) + || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("fail to push back", KR(ret), KPC(tmp_table_schema)); } } @@ -11956,7 +12416,9 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg( inc_table_schema_ptrs, - ls_id_array))) { + ls_id_array, + tenant_data_version, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(alter_table_schema), K(inc_table_schema_ptrs)); } else if (OB_FAIL(table_creator.execute())) { @@ -12040,9 +12502,10 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, &del_tablet_ids, &index_schema, alter_table_arg.parallelism_, + const_alter_table_arg.consumer_group_id_, + tenant_data_version, alter_table_arg.allocator_, - task_record, - const_alter_table_arg.consumer_group_id_))) { + task_record))) { LOG_WARN("fail to submit build index task", K(ret), "type", create_index_arg->index_type_); } else if (OB_FAIL(ddl_tasks.push_back(task_record))) { LOG_WARN("fail to push ddl task", K(ret), K(task_record)); @@ -12218,6 +12681,22 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, return ret; } +int ObDDLService::check_alter_column_group(const obrpc::ObAlterTableArg &alter_table_arg, ObDDLType &ddl_type) const +{ + int ret = OB_SUCCESS; + if (OB_DDL_ADD_COLUMN_GROUP == alter_table_arg.alter_table_schema_.alter_type_ || + OB_DDL_DROP_COLUMN_GROUP == alter_table_arg.alter_table_schema_.alter_type_) { + ddl_type = ObDDLType::DDL_ALTER_COLUMN_GROUP; + if (alter_table_arg.alter_table_schema_.get_column_group_count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, alter table arg don't have any column group when alter column group", + K(ret), K(alter_table_arg.alter_table_schema_)); + } + } + return ret; +} + + int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, ObDDLType &ddl_type) { @@ -12261,6 +12740,12 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, ddl_type))) { LOG_WARN("fail to check alter table partition", K(ret)); } + if (OB_SUCC(ret)) { + if (OB_FAIL(check_alter_column_group(alter_table_arg, ddl_type))) { + LOG_WARN("fail to check alter column gorup", K(ret), K(alter_table_arg.alter_table_schema_), K(ddl_type)); + } + } + if (OB_SUCC(ret) && alter_table_arg.alter_constraint_type_!= obrpc::ObAlterTableArg::CONSTRAINT_NO_OPERATION && OB_FAIL(check_alter_table_constraint(alter_table_arg, *orig_table_schema, ddl_type))) { LOG_WARN("fail to check alter table constraint", K(ret), K(alter_table_arg), K(ddl_type)); @@ -12307,6 +12792,7 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, if (OB_SUCC(ret) && is_double_table_long_running_ddl(ddl_type)) { bool has_index_operation = false; bool is_adding_constraint = false; + bool is_column_store = false; uint64_t table_id = alter_table_arg.alter_table_schema_.get_table_id(); if (orig_table_schema->has_mlog_table()) { ret = OB_NOT_SUPPORTED; @@ -12316,11 +12802,6 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, ret = OB_NOT_SUPPORTED; LOG_WARN("double table long running ddl on materialized view log is not supported", KR(ret)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "double table long running ddl on materialized view log is"); - } else if (orig_table_schema->is_normal_column_store_table()) { - ret = OB_NOT_SUPPORTED; - (void)snprintf(err_msg, sizeof(err_msg), "%s with column store table", - ddl_type_str(ddl_type)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, err_msg); } else if (OB_FAIL(check_has_index_operation(schema_guard, tenant_id, table_id, @@ -12497,6 +12978,7 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar const ObDDLType ddl_type = res.ddl_type_; ObRootService *root_service = GCTX.root_service_; bool need_redistribute_column_id = false; + uint64_t tenant_data_version = 0; if (OB_UNLIKELY(DDL_INVALID == ddl_type)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("unexpected ddl type", K(ret), K(ddl_type), K(alter_table_arg)); @@ -12509,6 +12991,8 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar LOG_WARN("fail to get schema guard with version in inner table", K(ret), K(tenant_id)); } else if (OB_FAIL(check_can_bind_tablets(ddl_type, bind_tablets))) { LOG_WARN("failed to check can bind tablets", K(ret), K(ddl_type)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { ObDDLOperator ddl_operator(*schema_service_, *sql_proxy_); ObTableSchema new_table_schema; @@ -12520,6 +13004,8 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar LOG_WARN("fail to get and check table schema", K(ret)); } else if (OB_FAIL(new_table_schema.assign(*orig_table_schema))) { LOG_WARN("fail to assign schema", K(ret)); + } else if (OB_FAIL(ObSchemaUtils::mock_default_cg(orig_table_schema->get_tenant_id(), new_table_schema))) { + LOG_WARN("fail to mock default cg", K(ret), K(orig_table_schema), K(new_table_schema)); } else { ObDDLSQLTransaction trans(schema_service_); ObDDLTaskRecord task_record; @@ -12556,7 +13042,8 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar schema_guard, ddl_operator, trans, - alter_table_arg.allocator_))) { + alter_table_arg.allocator_, + tenant_data_version))) { LOG_WARN("failed to alter table primary key", K(ret)); } } @@ -12599,6 +13086,8 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar schema_guard, need_redistribute_column_id))) { LOG_WARN("failed to alter table column!", K(*orig_table_schema), K(new_table_schema), K(ret)); + } else if (OB_FAIL(adjust_cg_for_offline(new_table_schema))) { + LOG_WARN("fail to adjust cg after alter column", K(ret)); } else if (OB_FAIL(create_user_hidden_table(*orig_table_schema, new_table_schema, &alter_table_arg.sequence_ddl_arg_, @@ -12607,7 +13096,8 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar schema_guard, ddl_operator, trans, - alter_table_arg.allocator_))) { + alter_table_arg.allocator_, + tenant_data_version))) { LOG_WARN("fail to create user hidden table", K(ret)); } } @@ -12620,7 +13110,8 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar new_table_schema, schema_guard, ddl_operator, - trans))) { + trans, + tenant_data_version))) { LOG_WARN("failed to alter table partition by", K(ret)); } } @@ -12647,10 +13138,33 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar new_table_schema, schema_guard, ddl_operator, - trans))) { + trans, + tenant_data_version))) { LOG_WARN("failed to convert to character", K(ret)); } } + if (OB_SUCC(ret) && ddl_type == ObDDLType::DDL_ALTER_COLUMN_GROUP) { + if (OB_FAIL(alter_column_group(alter_table_arg, + *orig_table_schema, + new_table_schema, + schema_guard, + ddl_operator, + trans))) { + LOG_WARN("failed to alter table column group", K(ret)); + } else if (OB_FAIL(create_user_hidden_table(*orig_table_schema, + new_table_schema, + &alter_table_arg.sequence_ddl_arg_, + bind_tablets, + schema_guard, + schema_guard, + ddl_operator, + trans, + alter_table_arg.allocator_, + tenant_data_version))) { + LOG_WARN("fail to create user_hidden table", K(ret)); + } + } + if (OB_SUCC(ret) && need_redistribute_column_id) { if (OB_FAIL(redistribute_column_ids(new_table_schema))) { LOG_WARN("failed to redistribute column ids", K(ret)); @@ -12692,6 +13206,7 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar &alter_table_arg, 0/*parent_task_id*/, task_id); + param.tenant_data_version_ = tenant_data_version; if (orig_table_schema->is_external_table()) { ret = OB_OP_NOT_ALLOW; char err_msg[OB_MAX_ERROR_MSG_LEN] = {0}; @@ -12721,6 +13236,7 @@ int ObDDLService::do_offline_ddl_in_trans(obrpc::ObAlterTableArg &alter_table_ar &alter_table_arg, 0/*parent_task_id*/, task_id); + param.tenant_data_version_ = tenant_data_version; if (OB_FAIL(root_service->get_ddl_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("submit ddl task failed", K(ret)); } else { @@ -12756,6 +13272,7 @@ int ObDDLService::create_hidden_table( obrpc::ObCreateHiddenTableRes &res) { int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; const uint64_t tenant_id = create_hidden_table_arg.tenant_id_; const int64_t table_id = create_hidden_table_arg.table_id_; const uint64_t dest_tenant_id = tenant_id; @@ -12785,6 +13302,8 @@ int ObDDLService::create_hidden_table( } else if (OB_ISNULL(orig_database_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("orig_database_schema is nullptr", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { HEAP_VAR(ObTableSchema, new_table_schema) { ObDDLOperator ddl_operator(*schema_service_, *sql_proxy_); @@ -12818,8 +13337,11 @@ int ObDDLService::create_hidden_table( schema_guard, ddl_operator, trans, - allocator))) { + allocator, + tenant_data_version))) { LOG_WARN("fail to create hidden table", K(ret)); + } else if (OB_FAIL(ddl_operator.update_table_attribute(new_table_schema, trans, OB_DDL_ALTER_TABLE))) { + LOG_WARN("failed to update data table schema attribute", K(ret)); } else { LOG_INFO("create hidden table success!", K(table_id), K(new_table_schema)); } @@ -12853,6 +13375,7 @@ int ObDDLService::create_hidden_table( &alter_table_arg, 0, task_id); + param.tenant_data_version_ = tenant_data_version; if (OB_FAIL(root_service->get_ddl_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("submit ddl task failed", K(ret)); } else if (orig_table_schema->get_table_state_flag() == ObTableStateFlag::TABLE_STATE_OFFLINE_DDL) { @@ -12904,6 +13427,7 @@ int ObDDLService::mview_complete_refresh( int64_t refreshed_schema_version = 0; common::ObArenaAllocator allocator("MVRef"); ObDDLTaskRecord task_record; + uint64_t tenant_data_version = 0; if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", KR(ret), K(arg)); @@ -12914,11 +13438,13 @@ int ObDDLService::mview_complete_refresh( LOG_WARN("variable is not init", KR(ret)); } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { ObDDLSQLTransaction trans(schema_service_); if (OB_FAIL(trans.start(sql_proxy_, tenant_id, refreshed_schema_version))) { LOG_WARN("start transaction failed", KR(ret), K(tenant_id), K(refreshed_schema_version)); - } else if (OB_FAIL(mview_complete_refresh_in_trans(arg, res, trans, allocator, schema_guard, task_record))) { + } else if (OB_FAIL(mview_complete_refresh_in_trans(arg, res, trans, allocator, schema_guard, tenant_data_version, task_record))) { LOG_WARN("failed to do mview complete refresh in trans", KR(ret), K(arg)); } if (trans.is_started()) { @@ -12948,6 +13474,7 @@ int ObDDLService::mview_complete_refresh_in_trans( ObDDLSQLTransaction &trans, common::ObIAllocator &allocator, share::schema::ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_data_version, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; @@ -13010,7 +13537,8 @@ int ObDDLService::mview_complete_refresh_in_trans( schema_guard, ddl_operator, trans, - allocator))) { + allocator, + tenant_data_version))) { LOG_WARN("fail to create hidden table", KR(ret)); } else { LOG_INFO("create hidden table success!", K(mview_table_id), "container_table_id", container_table_schema->get_table_id(), @@ -13055,6 +13583,7 @@ int ObDDLService::mview_complete_refresh_in_trans( &alter_table_arg, arg.parent_task_id_, task_id); + param.tenant_data_version_ = tenant_data_version; if (OB_FAIL(root_service->get_ddl_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("submit ddl task failed", KR(ret)); } else { @@ -13089,6 +13618,7 @@ int ObDDLService::recover_restore_table_ddl_task( // thus we need no lock on it. // Same as the offline ddl, we will create a restore dest table, a hidden one with table mode `hidden_offline_ddl`. // Different from the offline ddl, we will not change any attribute of the source table. + uint64_t tenant_data_version = 0; int64_t refreshed_dst_tenant_version = 0; const uint64_t session_id = arg.target_schema_.get_session_id(); ObSchemaGetterGuard hold_buf_src_tenant_schema_guard; @@ -13127,15 +13657,26 @@ int ObDDLService::recover_restore_table_ddl_task( } else if (OB_ISNULL(dst_db_schema)) { ret = OB_ERR_BAD_DATABASE; LOG_WARN("unknown database", K(ret), K(dst_tenant_id), K(session_id), "db_id", arg.target_schema_.get_database_id()); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(dst_tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(dst_tenant_id)); } else { ObDDLOperator ddl_operator(*schema_service_, *sql_proxy_); + bool is_dest_table_column_store = false; if (OB_FAIL(dst_tenant_trans.start(sql_proxy_, dst_tenant_id, refreshed_dst_tenant_version))) { LOG_WARN("start transaction failed", K(ret), K(dst_tenant_id), K(refreshed_dst_tenant_version)); } else if (OB_FAIL(dst_table_schema.assign(arg.target_schema_))) { LOG_WARN("assign failed", K(ret), K(session_id), K(arg)); + } else if (OB_FAIL(dst_table_schema.get_is_column_store(is_dest_table_column_store))) { + LOG_WARN("judge if dest table is column store failed", K(ret), K(arg)); + } else if (is_dest_table_column_store) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported to retore table with column store", K(ret), K(arg)); } else if (OB_FAIL(create_user_hidden_table(*src_table_schema, dst_table_schema, nullptr/*sequence_ddl_arg*/, - false/*bind_tablets*/, *src_tenant_schema_guard, *dst_tenant_schema_guard, ddl_operator, dst_tenant_trans, allocator))) { - LOG_WARN("create user hidden table failed", K(ret), K(arg)); + false/*bind_tablets*/, *src_tenant_schema_guard, *dst_tenant_schema_guard, ddl_operator, + dst_tenant_trans, allocator, tenant_data_version))) { + LOG_WARN("create user hidden table failed", K(ret), K(arg), K(tenant_data_version)); + } else if (OB_FAIL(ddl_operator.update_table_attribute(dst_table_schema, dst_tenant_trans, OB_DDL_ALTER_TABLE))) { + LOG_WARN("failed to update data table schema attribute", K(ret), K(arg)); } else { ObPrepareAlterTableArgParam param; if (OB_FAIL(param.init(arg.consumer_group_id_, session_id, 0/*sql_mode, unused*/, arg.ddl_stmt_str_, @@ -13145,6 +13686,7 @@ int ObDDLService::recover_restore_table_ddl_task( } else if (OB_FAIL(root_service->get_ddl_scheduler().prepare_alter_table_arg(param, &dst_table_schema, alter_table_arg))) { LOG_WARN("prepare alter table arg failed", K(ret), K(param)); } else { + alter_table_arg.alter_table_schema_.set_schema_version(dst_table_schema.get_schema_version()); alter_table_arg.alter_table_schema_.set_table_name(arg.target_schema_.get_table_name_str()); ObCreateDDLTaskParam param(dst_table_schema.get_tenant_id(), ObDDLType::DDL_TABLE_RESTORE, @@ -13158,6 +13700,7 @@ int ObDDLService::recover_restore_table_ddl_task( &alter_table_arg, 0, arg.ddl_task_id_); + param.tenant_data_version_ = tenant_data_version; if (OB_FAIL(root_service->get_ddl_scheduler().create_ddl_task(param, dst_tenant_trans, task_record))) { LOG_WARN("submit ddl task failed", K(ret)); } @@ -13443,7 +13986,7 @@ int ObDDLService::check_alter_partitions(const ObTableSchema &orig_table_schema, bool is_split = false; bool is_oracle_mode = false; bool has_local_index = false; - uint64_t compat_version = OB_INVALID_VERSION; + uint64_t compat_version = 0; if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { LOG_WARN("get min data_version failed", K(ret), K(tenant_id)); } else if (compat_version < DATA_VERSION_4_2_0_0 && OB_INVALID_ID != tablegroup_id) { @@ -14931,13 +15474,16 @@ int ObDDLService::truncate_table_in_trans(const obrpc::ObTruncateTableArg &arg, const ObString &database_name) { int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; + const uint64_t tenant_id = orig_table_schema.get_tenant_id(); if (OB_FAIL(check_inner_stat())) { LOG_WARN("variable is not init"); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { ObDDLOperator ddl_operator(*schema_service_, *sql_proxy_); ObSchemaService *schema_service = schema_service_->get_schema_service(); ObArenaAllocator allocator(ObModIds::OB_RS_PARTITION_TABLE_TEMP); - const uint64_t tenant_id = orig_table_schema.get_tenant_id(); ObSArray audit_schemas; ObArray orig_obj_privs_ora; const bool to_recyclebin = false; @@ -15227,13 +15773,15 @@ int ObDDLService::truncate_table_in_trans(const obrpc::ObTruncateTableArg &arg, } ObArray schemas; + ObArray need_create_empty_majors; int64_t last_schema_version = OB_INVALID_VERSION; for (int64_t i = 0; OB_SUCC(ret) && i < table_schemas.count(); i++) { const share::schema::ObTableSchema &this_table = table_schemas.at(i); const int64_t table_id = this_table.get_table_id(); if (!this_table.has_tablet()) { } else if (!this_table.is_global_index_table()) { - if (OB_FAIL(schemas.push_back(&this_table))) { + if (OB_FAIL(schemas.push_back(&this_table)) + || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("failed to push_back", KR(ret), K(this_table)); } } else { @@ -15244,7 +15792,9 @@ int ObDDLService::truncate_table_in_trans(const obrpc::ObTruncateTableArg &arg, LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_table_arg( this_table, - ls_id_array))) { + ls_id_array, + tenant_data_version, + true /*need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(this_table)); } } @@ -15270,7 +15820,9 @@ int ObDDLService::truncate_table_in_trans(const obrpc::ObTruncateTableArg &arg, LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg( schemas, - ls_id_array))) { + ls_id_array, + tenant_data_version, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(orig_table_schema), K(last_schema_version)); } else if (OB_FAIL(table_creator.execute())) { @@ -15803,12 +16355,14 @@ int ObDDLService::create_user_hidden_table(const ObTableSchema &orig_table_schem ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, ObIAllocator &allocator, + const uint64_t tenant_data_version, const ObString &index_name/*default ""*/) { int ret = OB_SUCCESS; const uint64_t tenant_id = hidden_table_schema.get_tenant_id(); ObArray aux_table_schemas; ObSEArray schemas; // 1(hidden_table) + 2(aux_lob_table) + ObSEArray need_create_empty_majors; bool need_sync_schema_version = false; SCN frozen_scn = SCN::min_scn(); bool is_add_identity_column = false; @@ -15818,6 +16372,9 @@ int ObDDLService::create_user_hidden_table(const ObTableSchema &orig_table_schem if (OB_ISNULL(GCTX.root_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("root service is null", KR(ret)); + } else if (OB_UNLIKELY(tenant_data_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(tenant_data_version)); } else if (OB_FAIL(ObMajorFreezeHelper::get_frozen_scn(tenant_id, frozen_scn))) { LOG_WARN("failed to get frozen status for create tablet", KR(ret), K(tenant_id)); } else if (OB_FAIL(check_is_add_identity_column(orig_table_schema, hidden_table_schema, is_add_identity_column))) { @@ -15853,16 +16410,28 @@ int ObDDLService::create_user_hidden_table(const ObTableSchema &orig_table_schem // to prevent other action to effect table partition info in tablegroup } else if (OB_FAIL(check_alter_partition_with_tablegroup(&orig_table_schema, hidden_table_schema, dst_tenant_schema_guard))) { LOG_WARN("fail to check alter partition with tablegroup", KR(ret)); + } else if (OB_FAIL(schemas.push_back(&hidden_table_schema))) { + LOG_WARN("push back schema failed", K(ret)); + } else if (OB_FAIL(need_create_empty_majors.push_back(false))) { + LOG_WARN("push back flag failed" , K(ret)); } else { - if (OB_FAIL(schemas.push_back(&hidden_table_schema))) { - LOG_WARN("fail to push back hidden table schema" , K(ret)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < aux_table_schemas.count(); i++) { + for (int64_t i = 0; OB_SUCC(ret) && i < aux_table_schemas.count(); i++) { ObTableSchema &table_schema = aux_table_schemas.at(i); // allow offline ddl execute if there's no offline ddl doing table_schema.set_in_offline_ddl_white_list(orig_table_schema.check_can_do_ddl()); if (OB_FAIL(schemas.push_back(&table_schema))) { LOG_WARN("fail to push back aux table schema" , K(ret)); + } else if (table_schema.is_aux_lob_piece_table()) { + if (OB_FAIL(need_create_empty_majors.push_back(true))) { + LOG_WARN("push back failed", K(ret)); + } + } else if (table_schema.is_aux_lob_meta_table()) { + if (OB_FAIL(need_create_empty_majors.push_back(DATA_VERSION_4_3_0_0 > tenant_data_version))) { + LOG_WARN("push back failed", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table", K(ret), K(table_schema)); } } } @@ -15906,7 +16475,7 @@ int ObDDLService::create_user_hidden_table(const ObTableSchema &orig_table_schem } // when need bind tablets, schemas array only store aux tables, need remove data schema if (OB_SUCC(ret) && bind_tablets) { - if (OB_FAIL(schemas.remove(0))) { + if (OB_FAIL(schemas.remove(0)) || OB_FAIL(need_create_empty_majors.remove(0))) { LOG_WARN("failed to remove data schema.", K(ret)); } } @@ -15920,18 +16489,23 @@ int ObDDLService::create_user_hidden_table(const ObTableSchema &orig_table_schem LOG_WARN("fail to get ls id array", KR(ret)); } else if (!bind_tablets && OB_FAIL(table_creator.add_create_tablets_of_tables_arg( schemas, - ls_id_array))) { + ls_id_array, + tenant_data_version, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("create table tablets failed", K(ret), K(hidden_table_schema)); } else if (bind_tablets && OB_FAIL(table_creator.add_create_bind_tablets_of_hidden_table_arg( orig_table_schema, hidden_table_schema, - ls_id_array))) { + ls_id_array, + tenant_data_version))) { LOG_WARN("failed to add arg", K(ret), K(hidden_table_schema)); } else if (bind_tablets && schemas.count() > 0 && OB_FAIL(table_creator.add_create_tablets_of_local_aux_tables_arg( schemas, &hidden_table_schema, - ls_id_array))) { + ls_id_array, + tenant_data_version, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("failed to add arg", K(ret), K(aux_table_schemas), K(hidden_table_schema)); } else if (OB_FAIL(table_creator.execute())) { LOG_WARN("fail to execute create tablet", KR(ret)); @@ -16545,14 +17119,17 @@ int ObDDLService::reconstruct_index_schema(obrpc::ObAlterTableArg &alter_table_a } else { bool is_exist = false; new_index_schema.set_max_used_column_id(max( - new_index_schema.get_max_used_column_id(), hidden_table_schema.get_max_used_column_id())); + new_index_schema.get_max_used_column_id(), hidden_table_schema.get_max_used_column_id())); new_index_schema.set_table_id(new_idx_tid); new_index_schema.set_data_table_id(hidden_table_schema.get_table_id()); new_index_schema.set_index_status(INDEX_STATUS_UNAVAILABLE); new_index_schema.set_tenant_id(hidden_table_schema.get_tenant_id()); new_index_schema.set_database_id(hidden_table_schema.get_database_id()); new_index_schema.set_table_state_flag(target_flag); - if (is_recover_restore_table) { + + if (OB_FAIL(ObSchemaUtils::mock_default_cg(new_index_schema.get_tenant_id(), new_index_schema))) { + LOG_WARN("fail to mock default cg", K(ret), K(new_index_schema)); + } else if (is_recover_restore_table) { if (OB_FAIL(new_index_schema.set_encryption_str(hidden_table_schema.get_encryption_str()))) { LOG_WARN("set encryption str failed", K(ret), K(hidden_table_schema.get_encryption_str())); } else if (OB_FAIL(new_index_schema.set_encrypt_key(hidden_table_schema.get_encrypt_key()))) { @@ -16563,6 +17140,8 @@ int ObDDLService::reconstruct_index_schema(obrpc::ObAlterTableArg &alter_table_a } } if (OB_FAIL(ret)) { + } else if (OB_FAIL(adjust_cg_for_offline(new_index_schema))) { + LOG_WARN("fail to adjust column group for index", K(ret)); } else if (OB_FAIL(dest_schema_guard.check_table_exist(new_index_schema.get_tenant_id(), new_index_schema.get_database_id(), new_index_schema.get_table_name_str(), @@ -16594,8 +17173,11 @@ int ObDDLService::rebuild_hidden_table_index( ObSArray &new_table_schemas) { int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; if (OB_FAIL(check_inner_stat())) { LOG_WARN("variable is not init", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret), K(tenant_data_version)); } else { for (int64_t i = 0; i < new_table_schemas.count() && OB_SUCC(ret); i++) { ObTableSchema &tmp_schema = new_table_schemas.at(i); @@ -16616,7 +17198,7 @@ int ObDDLService::rebuild_hidden_table_index( has_tablet = is_system_table(table_id); } if (!has_tablet) { - } else if (OB_FAIL(create_index_tablet(this_table, trans, schema_guard, false/*need_check_tablet_cnt*/))) { + } else if (OB_FAIL(create_index_tablet(this_table, trans, schema_guard, false/*need_check_tablet_cnt*/, tenant_data_version))) { LOG_WARN("create table tablets failed", K(ret), K(this_table)); } else {} if (OB_SUCC(ret)) { @@ -18196,7 +18778,23 @@ int ObDDLService::check_and_replace_dup_constraint_name_on_demand( tmp_schema, new_constraint.get_constraint_name(), false/*is_foreign_key*/, is_constraint_name_exist))) { LOG_WARN("check constraint name is exist failed", K(ret)); } else if (is_constraint_name_exist) { - LOG_INFO("duplicated constraint, can ignore", K(ret), K(new_constraint)); + if (CONSTRAINT_TYPE_PRIMARY_KEY == new_constraint.get_constraint_type()) { + // duplicated primary key name, the recover restore table task should fail finally. + ret = OB_ERR_CONSTRAINT_NAME_DUPLICATE; + LOG_WARN("duplicated pk name, should fail the recover restore table task", K(ret), K(new_constraint)); + } else if (CONSTRAINT_TYPE_NOT_NULL == new_constraint.get_constraint_type()) { + // duplicated not null cst name, should delete the not null flag, and ignore to rebuild the cst. + const uint64_t column_id = *(new_constraint.cst_col_begin()); + ObColumnSchemaV2 *not_null_column = hidden_data_schema.get_column_schema(column_id); + if (OB_ISNULL(not_null_column) || OB_UNLIKELY(!not_null_column->has_not_null_constraint())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status", K(ret), KPC(not_null_column), K(new_constraint), K(hidden_data_schema)); + } else if (OB_FALSE_IT(not_null_column->drop_not_null_cst())) { + } else if (OB_FAIL(ddl_operator.update_single_column(trans, hidden_data_schema, hidden_data_schema, *not_null_column))) { + LOG_WARN("update single column failed", K(ret), KPC(not_null_column), K(hidden_data_schema)); + } + LOG_INFO("duplicated not null name, remove the cst for the restore table task", K(ret), K(new_constraint)); + } } else if (OB_FAIL(hidden_data_schema.add_constraint(new_constraint))) { LOG_WARN("failed to add constraint", K(ret)); } else {/* do nothing. */} @@ -19043,6 +19641,7 @@ int ObDDLService::inner_drop_and_create_tablet_(const int64_t &schema_version, const ObTableSchema *tmp_table_schema = NULL; int64_t del_tablet_count = orig_table_schemas.count(); int64_t create_table_count = new_table_schemas.count(); + common::ObArray need_create_empty_majors; common::ObArray create_table_schema_ptrs; int64_t start_time = ObTimeUtility::current_time(); @@ -19083,7 +19682,8 @@ int ObDDLService::inner_drop_and_create_tablet_(const int64_t &schema_version, } if (OB_SUCC(ret)) { tenant_id = new_table_schemas.at(0)->get_tenant_id(); - if (OB_FAIL(create_table_schema_ptrs.reserve(create_table_count))) { + if (OB_FAIL(create_table_schema_ptrs.reserve(create_table_count)) + || OB_FAIL(need_create_empty_majors.reserve(create_table_count))) { LOG_WARN("fail to reserve create_table_schema_ptrs", KR(ret), K(tenant_id), K(schema_version)); } else { for (int64_t i = 0; i < create_table_count && OB_SUCC(ret); ++i) { @@ -19091,23 +19691,29 @@ int ObDDLService::inner_drop_and_create_tablet_(const int64_t &schema_version, if (OB_ISNULL(tmp_table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tmp table schema is NULL", KR(ret), K(tenant_id)); - } else if (OB_FAIL(create_table_schema_ptrs.push_back(tmp_table_schema))) { + } else if (OB_FAIL(create_table_schema_ptrs.push_back(tmp_table_schema)) + || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("fail to push back to new_table_schema_ptrs", KR(ret), K(tenant_id), KPC(tmp_table_schema)); } } // create tablet SCN frozen_scn; + uint64_t tenant_data_version = 0; if (FAILEDx(ObMajorFreezeHelper::get_frozen_scn(tenant_id, frozen_scn))) { LOG_WARN("fail to get frozen status for create tablet", KR(ret), K(tenant_id)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { ObTableCreator table_creator(tenant_id, frozen_scn, trans); if (OB_FAIL(table_creator.init(true/*need_check_tablet_cnt*/))) { LOG_WARN("table_creator init failed", KR(ret), K(tenant_id)); } else if (1 == create_table_count && create_table_schema_ptrs.at(0)->is_global_index_table()) { - if (OB_FAIL(table_creator.add_create_tablets_of_table_arg(*create_table_schema_ptrs.at(0), orig_ls_id_array))) { - LOG_WARN("fail to add_create_tablets_of_tables_arg", KR(ret), K(tenant_id)); + if (OB_FAIL(table_creator.add_create_tablets_of_table_arg(*create_table_schema_ptrs.at(0), orig_ls_id_array, + tenant_data_version, true/*need_create_empty_major_sstable*/))) { + LOG_WARN("fail to add_create_tablets_of_tables_arg", KR(ret), K(tenant_id)); } - } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg(create_table_schema_ptrs, orig_ls_id_array))) { + } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg(create_table_schema_ptrs, orig_ls_id_array, + tenant_data_version, need_create_empty_majors/*need_create_empty_major_sstable*/))) { LOG_WARN("fail to add_create_tablets_of_tables_arg", KR(ret), K(tenant_id)); } if (FAILEDx(table_creator.execute())) { @@ -22666,6 +23272,7 @@ int ObDDLService::rebuild_index(const ObRebuildIndexArg &arg, obrpc::ObAlterTabl const ObTableSchema *index_table_schema = NULL; ObString index_table_name; ObIndexBuilder index_builder(*this); + uint64_t tenant_data_version = 0; if (OB_FAIL(ObTableSchema::build_index_table_name( // index name allocator, table_id, arg.index_name_, index_table_name))) { @@ -22680,6 +23287,8 @@ int ObDDLService::rebuild_index(const ObRebuildIndexArg &arg, obrpc::ObAlterTabl ret = OB_ERR_CANT_DROP_FIELD_OR_KEY; LOG_WARN("index table schema should not be null", K(arg.index_name_), KR(ret)); LOG_USER_ERROR(OB_ERR_CANT_DROP_FIELD_OR_KEY, arg.index_name_.length(), arg.index_name_.ptr()); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { ObString ddl_stmt_str = arg.ddl_stmt_str_; ObTableSchema new_table_schema; @@ -22694,7 +23303,8 @@ int ObDDLService::rebuild_index(const ObRebuildIndexArg &arg, obrpc::ObAlterTabl *table_schema, new_table_schema, &ddl_stmt_str, - &trans))) { + &trans, + tenant_data_version))) { LOG_WARN("ddl_service_ rebuild_index failed", K(tenant_id), KR(ret)); } else if (OB_FAIL(index_builder.submit_build_index_task(trans, create_index_arg, @@ -22703,9 +23313,10 @@ int ObDDLService::rebuild_index(const ObRebuildIndexArg &arg, obrpc::ObAlterTabl nullptr/*del_data_tablet_ids*/, &new_table_schema, arg.parallelism_, + arg.consumer_group_id_, + tenant_data_version, allocator, - task_record, - arg.consumer_group_id_))) { + task_record))) { LOG_WARN("fail to submit build global index task", KR(ret)); } else { res.index_table_id_ = new_table_schema.get_table_id(); @@ -22752,7 +23363,8 @@ int ObDDLService::rebuild_index_in_trans( const ObTableSchema &data_table_schema, ObTableSchema &index_schema, const ObString *ddl_stmt_str, - ObMySQLTransaction *sql_trans) + ObMySQLTransaction *sql_trans, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; uint64_t new_table_id = index_schema.get_table_id(); @@ -22789,7 +23401,7 @@ int ObDDLService::rebuild_index_in_trans( LOG_WARN("failed to generate tablet id", K(ret)); } else if (OB_FAIL(create_index_or_mlog_table_in_trans(index_schema, ddl_stmt_str, &trans, schema_guard, - false/*need_check_tablet_cnt*/))) { + false/*need_check_tablet_cnt*/, tenant_data_version))) { LOG_WARN("create_table_in_trans failed", K(index_schema), KR(ret), K(ddl_stmt_str)); } @@ -22854,6 +23466,7 @@ int ObDDLService::update_index_status(const obrpc::ObUpdateIndexStatusArg &arg) } else if (OB_FAIL(ddl_operator.update_index_status( tenant_id, table->get_data_table_id(), table_id, new_status, arg.in_offline_ddl_white_list_, trans, ddl_stmt_str))) { + LOG_WARN("update index status failed", K(ret)); } if (OB_SUCC(ret) && arg.task_id_ != 0) { @@ -22873,6 +23486,12 @@ int ObDDLService::update_index_status(const obrpc::ObUpdateIndexStatusArg &arg) } } + if (OB_SUCC(ret) && arg.task_id_ > 0) { + if (OB_FAIL(ObDDLTaskRecordOperator::update_ret_code(trans, tenant_id, arg.task_id_, arg.error_code_))) { + LOG_WARN("update ret code failed", K(ret)); + } + } + if (trans.is_started()) { int commit_ret = trans.end(OB_SUCC(ret)); if (OB_SUCCESS != commit_ret) { @@ -23160,11 +23779,14 @@ int ObDDLService::add_table_schema( share::schema::ObSchemaGetterGuard &schema_guard) { int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; int64_t start_time = ObTimeUtility::current_time(); if (OB_FAIL(check_inner_stat())) { LOG_WARN("variable is not init", KR(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(table_schema.get_tenant_id(), tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(table_schema)); } else if (OB_FAIL(create_index_or_mlog_table_in_trans(table_schema, NULL, NULL, schema_guard, - false/*need_check_tablet_cnt*/))) { + false/*need_check_tablet_cnt*/, tenant_data_version))) { LOG_WARN("create_table_in_trans failed", KR(ret), K(table_schema)); } LOG_INFO("[UPGRADE] add inner table", KR(ret), @@ -24532,6 +25154,7 @@ int ObDDLService::create_tenant_sys_tablets( common::ObArray ls_id_array; ObArray table_schemas; ObArray index_tids; + ObArray need_create_empty_majors; if (OB_FAIL(trans.start(sql_proxy_, tenant_id))) { LOG_WARN("fail to start trans", KR(ret), K(tenant_id)); } else if (OB_FAIL(table_creator.init(false/*need_tablet_cnt_check*/))) { @@ -24544,7 +25167,8 @@ int ObDDLService::create_tenant_sys_tablets( const uint64_t data_table_id = data_table.get_table_id(); if (data_table.has_partition()) { table_schemas.reset(); - if (OB_FAIL(table_schemas.push_back(&data_table))) { + need_create_empty_majors.reset(); + if (OB_FAIL(table_schemas.push_back(&data_table)) || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("fail to push back data table ptr", KR(ret), K(data_table_id)); } else if (ObSysTableChecker::is_sys_table_has_index(data_table_id)) { if (OB_FAIL(ObSysTableChecker::get_sys_table_index_tids(data_table_id, index_tids))) { @@ -24564,6 +25188,8 @@ int ObDDLService::create_tenant_sys_tablets( LOG_WARN("sys index schema order is not match", KR(ret), K(data_table_id), K(j), K(index_schema)); } else if (OB_FAIL(table_schemas.push_back(&index_schema))) { LOG_WARN("fail to push back index schema", KR(ret), K(index_id), K(data_table_id)); + } else if (OB_FAIL(need_create_empty_majors.push_back(true))) { + LOG_WARN("fail to push back need create empty major", KR(ret), K(index_id), K(data_table_id)); } } // end for } @@ -24596,9 +25222,9 @@ int ObDDLService::create_tenant_sys_tablets( LOG_WARN("sys table's lob table not matched", KR(ret), K(meta_idx), K(piece_idx), K(lob_piece_table_id), K(lob_meta_table_id), K(data_table_id)); } else { - if (OB_FAIL(table_schemas.push_back(&tables.at(meta_idx)))) { + if (OB_FAIL(table_schemas.push_back(&tables.at(meta_idx))) || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("fail to push back lob meta aux table ptr", KR(ret), K(meta_idx), K(data_table_id)); - } else if (OB_FAIL(table_schemas.push_back(&tables.at(piece_idx)))) { + } else if (OB_FAIL(table_schemas.push_back(&tables.at(piece_idx))) || OB_FAIL(need_create_empty_majors.push_back(true))) { LOG_WARN("fail to push back lob piece aux table ptr", KR(ret), K(piece_idx), K(data_table_id)); } } @@ -24613,8 +25239,10 @@ int ObDDLService::create_tenant_sys_tablets( LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg( table_schemas, - ls_id_array))) { - LOG_WARN("fail to add create tablets of table", KR(ret), K(data_table), K(table_schemas)); + ls_id_array, + DATA_CURRENT_VERSION, + need_create_empty_majors/*need_create_empty_major_sstable*/))) { + LOG_WARN("fail to add create tablets of table", KR(ret), K(data_table), K(table_schemas), K(need_create_empty_majors)); } } } // end for diff --git a/src/rootserver/ob_ddl_service.h b/src/rootserver/ob_ddl_service.h index 6f2f83b9c..98d5d6598 100644 --- a/src/rootserver/ob_ddl_service.h +++ b/src/rootserver/ob_ddl_service.h @@ -134,18 +134,21 @@ public: // create_index_table will fill table_id and frozen_version to table_schema virtual int create_index_table(const obrpc::ObCreateIndexArg &arg, + const uint64_t tenant_data_version, share::schema::ObTableSchema &table_schema, ObMySQLTransaction &sql_trans); virtual int create_mlog_table(ObMySQLTransaction &sql_trans, const obrpc::ObCreateMLogArg &arg, + const uint64_t tenant_data_version, share::schema::ObSchemaGetterGuard &schema_guard, share::schema::ObTableSchema &table_schema); virtual int create_mlog_tablet(ObMySQLTransaction &trans, share::schema::ObSchemaGetterGuard &schema_guard, const share::schema::ObTableSchema &mlog_schema, - const bool need_check_tablet_cnt); + const bool need_check_tablet_cnt, + const uint64_t tenant_data_version); virtual int add_mlog(ObMySQLTransaction &trans, const obrpc::ObCreateMLogArg &arg, @@ -159,10 +162,12 @@ public: const share::schema::ObTableSchema &data_table_schema, share::schema::ObTableSchema &table_schema, const ObString *ddl_stmt_str, - ObMySQLTransaction *sql_trans); + ObMySQLTransaction *sql_trans, + const uint64_t tenant_data_version); int create_inner_expr_index(ObMySQLTransaction &trans, const share::schema::ObTableSchema &orig_table_schema, + const uint64_t tenant_data_version, share::schema::ObTableSchema &new_table_schema, common::ObIArray &new_columns, share::schema::ObTableSchema &index_schema); @@ -175,10 +180,12 @@ public: ObMySQLTransaction &trans, const obrpc::ObCreateIndexArg &arg, const share::schema::ObTableSchema &table_schema, + const uint64_t tenant_data_version, share::schema::ObTableSchema &index_schema); int create_global_inner_expr_index( ObMySQLTransaction &trans, const share::schema::ObTableSchema &orig_table_schema, + const uint64_t tenant_data_version, share::schema::ObTableSchema &new_table_schema, common::ObIArray &new_columns, share::schema::ObTableSchema &index_schema); @@ -248,6 +255,7 @@ public: ObDDLSQLTransaction &trans, common::ObIAllocator &allocator, share::schema::ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_data_version, ObDDLTaskRecord &task_record); /** * For recover restore table ddl task, it is a cross-tenant task, including, @@ -300,7 +308,8 @@ public: int create_index_tablet(const ObTableSchema &index_schema, ObMySQLTransaction &trans, share::schema::ObSchemaGetterGuard &schema_guard, - const bool need_check_tablet_cnt); + const bool need_check_tablet_cnt, + const uint64_t tenant_data_version); virtual int alter_table_index(const obrpc::ObAlterTableArg &alter_table_arg, const share::schema::ObTableSchema &orgin_table_schema, share::schema::ObTableSchema &new_table_schema, @@ -308,6 +317,7 @@ public: ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, common::ObArenaAllocator &allocator, + const uint64_t tenant_data_version, obrpc::ObAlterTableRes &res, ObIArray &ddl_tasks); int generate_object_id_for_partition_schemas( @@ -344,7 +354,7 @@ public: share::schema::ObSchemaGetterGuard &schema_guard, ObDDLOperator *ddl_operator, common::ObMySQLTransaction *trans); - int add_column_group_to_table_schema( + int add_column_to_column_group( const share::schema::ObTableSchema &origin_table_schema, const share::schema::AlterTableSchema &alter_table_schema, share::schema::ObTableSchema &new_table_schema, @@ -697,7 +707,8 @@ int check_table_udt_id_is_exist(share::schema::ObSchemaGetterGuard &schema_guard ObDDLOperator &ddl_operator, common::ObMySQLTransaction &trans, common::ObArenaAllocator &allocator, - const obrpc::ObIndexArg::IndexActionType &index_action_type); + const obrpc::ObIndexArg::IndexActionType &index_action_type, + const uint64_t tenant_data_version); int get_add_pk_index_name(const share::schema::ObTableSchema &origin_table_schema, share::schema::ObTableSchema &new_table_schema, const obrpc::ObIndexArg::IndexActionType &index_action_type, @@ -1246,7 +1257,8 @@ private: const uint64_t tenant_id, const share::schema::ObTableSchema &orig_table_schema, ObDDLOperator &ddl_operator, - ObMySQLTransaction &trans); + ObMySQLTransaction &trans, + const uint64_t tenant_data_version); int fill_interval_info_for_set_interval(const ObTableSchema &orig_table_schema, ObTableSchema &new_table_schema, AlterTableSchema &inc_table_schema); @@ -1287,15 +1299,18 @@ private: const common::ObString *ddl_stmt_str, ObMySQLTransaction *sql_trans, share::schema::ObSchemaGetterGuard &schema_guard, - const bool need_check_tablet_cnt); + const bool need_check_tablet_cnt, + const uint64_t tenant_data_version); int create_tablets_in_trans_(common::ObIArray &table_schemas, ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, - share::schema::ObSchemaGetterGuard &schema_guard); + share::schema::ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_data_version); int create_tablets_in_trans_for_mv_(common::ObIArray &table_schemas, ObDDLOperator &ddl_operator, ObMySQLTransaction &trans, - share::schema::ObSchemaGetterGuard &schema_guard); + share::schema::ObSchemaGetterGuard &schema_guard, + const uint64_t tenant_data_version); /* * Check and set various options of modify tenant, among which the modifications of zone_list, @@ -1452,6 +1467,7 @@ private: ObDDLOperator &ddl_operator, common::ObMySQLTransaction &trans, common::ObIAllocator &allocator, + const uint64_t tenant_data_version, const ObString &index_name = ObString("")); int rebuild_triggers_on_hidden_table( const share::schema::ObTableSchema &orig_table_schema, @@ -1557,7 +1573,8 @@ private: share::schema::ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, common::ObMySQLTransaction &trans, - common::ObArenaAllocator &allocator); + common::ObArenaAllocator &allocator, + const uint64_t tenant_data_version); int check_alter_partition_with_tablegroup(const ObTableSchema *orig_table_schema, ObTableSchema &new_table_schema, ObSchemaGetterGuard &schema_guard); @@ -1566,7 +1583,8 @@ private: share::schema::ObTableSchema &new_table_schema, share::schema::ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, - common::ObMySQLTransaction &trans); + common::ObMySQLTransaction &trans, + const uint64_t tenant_data_version); int convert_to_character_for_partition(const ObCollationType &to_collation, share::schema::ObTableSchema &new_table_schema); int convert_to_character(obrpc::ObAlterTableArg &alter_table_arg, @@ -1574,7 +1592,17 @@ private: share::schema::ObTableSchema &new_table_schema, share::schema::ObSchemaGetterGuard &schema_guard, ObDDLOperator &ddl_operator, - common::ObMySQLTransaction &trans); + common::ObMySQLTransaction &trans, + const uint64_t tenant_data_version); + int check_alter_column_group(const obrpc::ObAlterTableArg &alter_table_arg, share::ObDDLType &ddl_type) const; + int alter_column_group(obrpc::ObAlterTableArg &alter_table_arg, + const share::schema::ObTableSchema &origin_table_schema, + share::schema::ObTableSchema &new_table_schema, + share::schema::ObSchemaGetterGuard &schema_guard, + ObDDLOperator &ddl_operator, + common::ObMySQLTransaction &trans); + + int check_alter_table_constraint( const obrpc::ObAlterTableArg &alter_table_arg, const ObTableSchema &orig_table_schema, @@ -1888,6 +1916,11 @@ private: ObDDLOperator *ddl_operator, common::ObMySQLTransaction *trans); + int alter_table_update_cg_column(common::ObMySQLTransaction &trans, + ObDDLOperator &ddl_operator, + share::schema::ObColumnSchemaV2 &new_column_schema, + share::schema::ObTableSchema &new_table_schema); + bool is_zone_exist(const common::ObArray &zones, const common::ObZone &zone); int try_drop_sys_ls_(const uint64_t meta_tenant_id, common::ObMySQLTransaction &trans); @@ -2035,6 +2068,16 @@ public: common::ObIAllocator *allocator = NULL); #endif private: + int adjust_cg_for_offline(ObTableSchema &new_table_schema); + int alter_default_column_group(share::schema::ObTableSchema &new_table_schema); + int add_column_group(const obrpc::ObAlterTableArg &alter_table_arg, + const share::schema::ObTableSchema &ori_table_schema, + share::schema::ObTableSchema &new_table_schema); + + int drop_column_group(const obrpc::ObAlterTableArg &alter_table_arg, + const share::schema::ObTableSchema &ori_table_schema, + share::schema::ObTableSchema &new_table_schema); + int alter_rowkey_column_group(share::schema::ObTableSchema &table_schema); int handle_security_audit_for_stmt(const obrpc::ObSecurityAuditArg &arg, share::schema::ObSAuditSchema &audit_schema); int handle_security_audit_for_object(const obrpc::ObSecurityAuditArg &arg, @@ -2656,6 +2699,7 @@ private: const ObTableSchema &container_table_schema, const ObIArray *dep_infos, common::ObIAllocator &allocator, + const uint64_t tenant_data_version, ObDDLTaskRecord &task_record); bool need_modify_dep_obj_status(const obrpc::ObAlterTableArg &alter_table_arg) const; diff --git a/src/rootserver/ob_index_builder.cpp b/src/rootserver/ob_index_builder.cpp index 4e3c04d3a..1f786a588 100644 --- a/src/rootserver/ob_index_builder.cpp +++ b/src/rootserver/ob_index_builder.cpp @@ -294,6 +294,7 @@ int ObIndexBuilder::do_create_global_index( obrpc::ObAlterTableRes &res) { int ret = OB_SUCCESS; + uint64_t tenant_data_version = 0; ObArray gen_columns; const bool global_index_without_column_info = false; ObDDLTaskRecord task_record; @@ -325,15 +326,17 @@ int ObIndexBuilder::do_create_global_index( new_arg, new_table_schema, global_index_without_column_info, true/*generate_id*/, index_schema))) { LOG_WARN("fail to generate schema", K(ret), K(new_arg)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); } else { if (gen_columns.empty()) { if (OB_FAIL(ddl_service_.create_global_index( - trans, new_arg, new_table_schema, index_schema))) { + trans, new_arg, new_table_schema, tenant_data_version, index_schema))) { LOG_WARN("fail to create global index", K(ret)); } } else { if (OB_FAIL(ddl_service_.create_global_inner_expr_index( - trans, table_schema, new_table_schema, gen_columns, index_schema))) { + trans, table_schema, tenant_data_version, new_table_schema, gen_columns, index_schema))) { LOG_WARN("fail to create global inner expr index", K(ret)); } } @@ -345,9 +348,10 @@ int ObIndexBuilder::do_create_global_index( nullptr/*del_data_tablet_ids*/, &index_schema, arg.parallelism_, + arg.consumer_group_id_, + tenant_data_version, allocator, - task_record, - arg.consumer_group_id_))) { + task_record))) { LOG_WARN("fail to submit build global index task", K(ret)); } } @@ -383,9 +387,10 @@ int ObIndexBuilder::submit_build_index_task( const ObIArray *del_data_tablet_ids, const ObTableSchema *index_schema, const int64_t parallelism, + const int64_t group_id, + const uint64_t tenant_data_version, common::ObIAllocator &allocator, - ObDDLTaskRecord &task_record, - const int64_t group_id) + ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; ObCreateDDLTaskParam param(index_schema->get_tenant_id(), @@ -398,9 +403,10 @@ int ObIndexBuilder::submit_build_index_task( group_id, &allocator, &create_index_arg); - if (OB_ISNULL(data_schema) || OB_ISNULL(index_schema)) { + param.tenant_data_version_ = tenant_data_version; + if (OB_UNLIKELY(nullptr == data_schema || nullptr == index_schema || tenant_data_version <= 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("schema is invalid", K(ret), K(data_schema), K(index_schema)); + LOG_WARN("schema is invalid", K(ret), KP(data_schema), KP(index_schema), K(tenant_data_version)); } else if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("submit create index ddl task failed", K(ret)); } else if (OB_FAIL(ObDDLLock::lock_for_add_drop_index( @@ -509,12 +515,13 @@ int ObIndexBuilder::do_create_local_index( } else if (OB_FAIL(new_table_schema.check_create_index_on_hidden_primary_key(index_schema))) { LOG_WARN("failed to check create index on table", K(ret), K(index_schema)); } else if (gen_columns.empty()) { - if (OB_FAIL(ddl_service_.create_index_table(my_arg, index_schema, trans))) { + if (OB_FAIL(ddl_service_.create_index_table(my_arg, tenant_data_version, index_schema, trans))) { LOG_WARN("fail to create index", K(ret), K(index_schema)); } } else { if (OB_FAIL(ddl_service_.create_inner_expr_index(trans, table_schema, + tenant_data_version, new_table_schema, gen_columns, index_schema))) { @@ -529,9 +536,10 @@ int ObIndexBuilder::do_create_local_index( nullptr/*del_data_tablet_ids*/, &index_schema, create_index_arg.parallelism_, + create_index_arg.consumer_group_id_, + tenant_data_version, allocator, - task_record, - create_index_arg.consumer_group_id_))) { + task_record))) { LOG_WARN("failt to submit build local index task", K(ret)); } else { res.index_table_id_ = index_schema.get_table_id(); @@ -825,8 +833,6 @@ int ObIndexBuilder::generate_schema( LOG_WARN("set_index_table_columns failed", K(arg), K(data_schema), K(ret)); } else if (OB_FAIL(set_index_table_options(arg, data_schema, schema))) { LOG_WARN("set_index_table_options failed", K(arg), K(data_schema), K(ret)); - } else if (OB_FAIL(set_index_table_column_store_if_need(schema))) { - LOG_WARN("fail to set index table column store if need", KR(ret)); } else { schema.set_name_generated_type(arg.index_schema_.get_name_generated_type()); LOG_INFO("finish generate index schema", K(schema)); @@ -847,6 +853,121 @@ int ObIndexBuilder::generate_schema( } } } + + if (OB_SUCC(ret)) { + // create index column_group after schema generate + if (OB_FAIL(create_index_column_group(arg, schema))) { + LOG_WARN("fail to create cg for index", K(ret)); + } + } + return ret; +} + +int ObIndexBuilder::create_index_column_group(const obrpc::ObCreateIndexArg &arg, ObTableSchema &index_table_schema) +{ + int ret = OB_SUCCESS; + uint64_t compat_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(index_table_schema.get_tenant_id(), compat_version))) { + LOG_WARN("fail to get min data version", K(ret)); + } else if (compat_version >= DATA_VERSION_4_3_0_0) { + bool enable_table_with_cg = false; + ObArray column_ids; // not include virtual column + index_table_schema.set_column_store(true); + if (arg.index_cgs_.count() > 0) { + index_table_schema.set_max_used_column_group_id(index_table_schema.get_max_used_column_group_id()); + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_cgs_.count(); ++i) { + const obrpc::ObCreateIndexArg::ObIndexColumnGroupItem &cur_item = arg.index_cgs_.at(i); + if (!cur_item.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid cg item", K(ret), K(cur_item)); + } else if (cur_item.is_each_cg_) { + // handle all_type column_group & single_type column_group + ObColumnGroupSchema column_group_schema; + const int64_t column_cnt = index_table_schema.get_column_count(); + if (OB_FAIL(column_ids.reserve(column_cnt))) { + LOG_WARN("fail to reserve", KR(ret), K(column_cnt)); + } else { + ObTableSchema::const_column_iterator tmp_begin = index_table_schema.column_begin(); + ObTableSchema::const_column_iterator tmp_end = index_table_schema.column_end(); + for (; OB_SUCC(ret) && (tmp_begin != tmp_end); tmp_begin++) { + column_group_schema.reset(); + ObColumnSchemaV2 *column = (*tmp_begin); + if (OB_FAIL(ObSchemaUtils::build_single_column_group( + index_table_schema, column, index_table_schema.get_tenant_id(), + index_table_schema.get_max_used_column_group_id() + 1, column_group_schema))) { + LOG_WARN("fail to build single column group"); + } else if (column_group_schema.is_valid()) { + if (OB_FAIL(index_table_schema.add_column_group(column_group_schema))) { + LOG_WARN("fail to add single type column group", KR(ret), K(column_group_schema)); + } else if (column->is_rowkey_column() || arg.exist_all_column_group_) {//if not exist all cg, build rowkey cg + if (OB_FAIL(column_ids.push_back(column->get_column_id()))) { + LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); + } + } + } + } + } + + if (OB_SUCC(ret)) { + column_group_schema.reset(); + const ObColumnGroupType cg_type = arg.exist_all_column_group_ ? ObColumnGroupType::ALL_COLUMN_GROUP + : ObColumnGroupType::ROWKEY_COLUMN_GROUP; + const ObString cg_name = arg.exist_all_column_group_ ? OB_ALL_COLUMN_GROUP_NAME : OB_ROWKEY_COLUMN_GROUP_NAME; + + if (OB_FAIL(ObSchemaUtils::build_column_group(index_table_schema, index_table_schema.get_tenant_id(), cg_type, cg_name, + column_ids, index_table_schema.get_max_used_column_group_id() + 1, column_group_schema))) { + LOG_WARN("fail to build all type column_group", KR(ret), K(column_ids)); + } else if (OB_FAIL(index_table_schema.add_column_group(column_group_schema))) { + LOG_WARN("fail to add all type column group", KR(ret), K(column_group_schema)); + } + } + } + } + } else { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(index_table_schema.get_tenant_id())); + if (OB_SUCC(ret) && OB_LIKELY(tenant_config.is_valid())) { + if (tenant_config->enable_table_with_cg) { + enable_table_with_cg = true; // which means create each_cg and all_cg default + } + } + } + + // add default column_group + if (OB_SUCC(ret)) { + ObColumnGroupSchema tmp_cg; + if (arg.index_cgs_.count() > 0 || enable_table_with_cg) { + column_ids.reuse(); // if exists cg node, column_ids in default_type will be empty + } else { + ObTableSchema::const_column_iterator tmp_begin = index_table_schema.column_begin(); + ObTableSchema::const_column_iterator tmp_end = index_table_schema.column_end(); + for (; OB_SUCC(ret) && (tmp_begin != tmp_end); tmp_begin++) { + ObColumnSchemaV2 *column = (*tmp_begin); + if (OB_ISNULL(column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column should not be null", KR(ret)); + } else if (column->is_virtual_generated_column()) { + // skip virtual column + } else if (OB_FAIL(column_ids.push_back(column->get_column_id()))) { + LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); + } + } + } + + if (FAILEDx(ObSchemaUtils::build_column_group(index_table_schema, index_table_schema.get_tenant_id(), + ObColumnGroupType::DEFAULT_COLUMN_GROUP, OB_DEFAULT_COLUMN_GROUP_NAME, column_ids, + DEFAULT_TYPE_COLUMN_GROUP_ID, tmp_cg))) { + LOG_WARN("fail to build default type column_group", KR(ret), "table_id", index_table_schema.get_table_id(), K(column_ids)); + } else if (OB_FAIL(index_table_schema.add_column_group(tmp_cg))) { + LOG_WARN("fail to add default column group", KR(ret), "table_id", index_table_schema.get_table_id(), K(arg.index_cgs_.count()), + K(enable_table_with_cg), K(column_ids)); + } + } + } else if (arg.index_cgs_.count() > 0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("data_version not support for create index with column group", K(ret), K(compat_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3, create index with column group"); + } + return ret; } @@ -1000,28 +1121,5 @@ bool ObIndexBuilder::is_final_index_status(const ObIndexStatus index_status) con || is_error_index_status(index_status)); } -int ObIndexBuilder::set_index_table_column_store_if_need( - share::schema::ObTableSchema &table_schema) -{ - int ret = OB_SUCCESS; - uint64_t compat_version = 0; - const uint64_t tenant_id = table_schema.get_tenant_id(); - const uint64_t table_id = table_schema.get_table_id(); - if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(table_schema)); - } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { - LOG_WARN("fail to get min data version", KR(ret), K(tenant_id), K(table_id)); - } else if (compat_version >= DATA_VERSION_4_2_0_0) { - table_schema.set_column_store(true); - if (table_schema.get_column_group_count() == 0) { - if (OB_FAIL(table_schema.add_default_column_group())) { - LOG_WARN("fail to add default column group", KR(ret), K(tenant_id), K(table_id)); - } - } - } - return ret; -} - }//end namespace rootserver }//end namespace oceanbase diff --git a/src/rootserver/ob_index_builder.h b/src/rootserver/ob_index_builder.h index c8a8da435..08bc96ab4 100644 --- a/src/rootserver/ob_index_builder.h +++ b/src/rootserver/ob_index_builder.h @@ -93,9 +93,10 @@ public: const common::ObIArray *del_data_tablet_ids, const share::schema::ObTableSchema *index_schema, const int64_t parallelism, + const int64_t group_id, + const uint64_t tenant_data_version, common::ObIAllocator &allocator, - ObDDLTaskRecord &task_record, - const int64_t group_id); + ObDDLTaskRecord &task_record); private: typedef common::ObArray > OrderFTColumns; class FulltextColumnOrder @@ -123,7 +124,8 @@ private: bool is_final_index_status(const share::schema::ObIndexStatus index_status) const; - int set_index_table_column_store_if_need(share::schema::ObTableSchema &table_schema); + int create_index_column_group(const obrpc::ObCreateIndexArg &arg, + share::schema::ObTableSchema &index_table_schema); private: ObDDLService &ddl_service_; diff --git a/src/rootserver/ob_lob_meta_builder.cpp b/src/rootserver/ob_lob_meta_builder.cpp index c4d129b6d..ebb0523ab 100644 --- a/src/rootserver/ob_lob_meta_builder.cpp +++ b/src/rootserver/ob_lob_meta_builder.cpp @@ -183,7 +183,7 @@ int ObLobMetaBuilder::set_lob_table_column_store_if_need(ObTableSchema &table_sc const uint64_t tenant_id = table_schema.get_tenant_id(); if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { LOG_WARN("fail to get min data version", KR(ret), K(tenant_id), "table_id", table_schema.get_table_id()); - } else if (compat_version >= DATA_VERSION_4_2_0_0) { + } else if (compat_version >= DATA_VERSION_4_3_0_0) { table_schema.set_column_store(true); if (table_schema.get_column_group_count() == 0) { if (OB_FAIL(table_schema.add_default_column_group())) { diff --git a/src/rootserver/ob_lob_piece_builder.cpp b/src/rootserver/ob_lob_piece_builder.cpp index 7e29c9584..be95ee2e7 100644 --- a/src/rootserver/ob_lob_piece_builder.cpp +++ b/src/rootserver/ob_lob_piece_builder.cpp @@ -183,7 +183,7 @@ int ObLobPieceBuilder::set_lob_table_column_store_if_need(ObTableSchema &table_s const uint64_t tenant_id = table_schema.get_tenant_id(); if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { LOG_WARN("fail to get min data version", KR(ret), K(tenant_id), "table_id", table_schema.get_table_id()); - } else if (compat_version >= DATA_VERSION_4_2_0_0) { + } else if (compat_version >= DATA_VERSION_4_3_0_0) { table_schema.set_column_store(true); if (table_schema.get_column_group_count() == 0) { if (OB_FAIL(table_schema.add_default_column_group())) { diff --git a/src/rootserver/ob_mlog_builder.cpp b/src/rootserver/ob_mlog_builder.cpp index 6f90e6c4b..67751d389 100644 --- a/src/rootserver/ob_mlog_builder.cpp +++ b/src/rootserver/ob_mlog_builder.cpp @@ -396,6 +396,7 @@ int ObMLogBuilder::create_mlog( } else if (OB_FAIL(do_create_mlog(schema_guard, create_mlog_arg, *base_table_schema, + compat_version, create_mlog_res))) { LOG_WARN("failed to do create mlog", KR(ret), K(create_mlog_arg)); } @@ -407,6 +408,7 @@ int ObMLogBuilder::do_create_mlog( ObSchemaGetterGuard &schema_guard, const ObCreateMLogArg &create_mlog_arg, const ObTableSchema &base_table_schema, + const uint64_t tenant_data_version, ObCreateMLogRes &create_mlog_res) { int ret = OB_SUCCESS; @@ -431,7 +433,7 @@ int ObMLogBuilder::do_create_mlog( LOG_WARN("failed to copy table schema", KR(ret)); } else if (OB_FAIL(generate_mlog_schema(schema_guard, create_mlog_arg, src_table_schema, mlog_schema))) { LOG_WARN("failed to generate schema", KR(ret), K(create_mlog_arg), K(src_table_schema)); - } else if (OB_FAIL(ddl_service_.create_mlog_table(trans, create_mlog_arg, schema_guard, mlog_schema))) { + } else if (OB_FAIL(ddl_service_.create_mlog_table(trans, create_mlog_arg, tenant_data_version, schema_guard, mlog_schema))) { LOG_WARN("failed to create mlog table", KR(ret), K(create_mlog_arg), K(mlog_schema)); } else { // submit build mlog task @@ -451,6 +453,7 @@ int ObMLogBuilder::do_create_mlog( create_mlog_arg.consumer_group_id_, &allocator, &create_index_arg); + param.tenant_data_version_ = tenant_data_version; if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("failed to submit create mlog task", KR(ret)); } else if (OB_FAIL(ObDDLLock::lock_for_add_drop_index( diff --git a/src/rootserver/ob_mlog_builder.h b/src/rootserver/ob_mlog_builder.h index d6314e5de..43021161c 100644 --- a/src/rootserver/ob_mlog_builder.h +++ b/src/rootserver/ob_mlog_builder.h @@ -33,6 +33,7 @@ public: int do_create_mlog(share::schema::ObSchemaGetterGuard &schema_guard, const obrpc::ObCreateMLogArg &create_mlog_arg, const share::schema::ObTableSchema &table_schema, + const uint64_t tenant_data_version, obrpc::ObCreateMLogRes &create_mlog_res); int generate_mlog_schema(share::schema::ObSchemaGetterGuard &schema_guard, const obrpc::ObCreateMLogArg &create_mlog_arg, @@ -86,4 +87,4 @@ private: }; } // namespace rootserver } // namespace oceanbase -#endif // OCEANBASE_ROOTSERVER_OB_MLOG_BUILDER_H_ \ No newline at end of file +#endif // OCEANBASE_ROOTSERVER_OB_MLOG_BUILDER_H_ diff --git a/src/rootserver/ob_schema_history_recycler.cpp b/src/rootserver/ob_schema_history_recycler.cpp index 12ae880be..0955d5ba8 100644 --- a/src/rootserver/ob_schema_history_recycler.cpp +++ b/src/rootserver/ob_schema_history_recycler.cpp @@ -796,6 +796,9 @@ int ObSchemaHistoryRecycler::try_recycle_schema_history( RECYCLE_FIRST_SCHEMA(RECYCLE_ONLY, tablet, OB_ALL_TABLET_TO_TABLE_HISTORY_TNAME, tablet_id); ret = OB_SUCCESS; // overwrite ret + RECYCLE_SECOND_SCHEMA(column_group, OB_ALL_COLUMN_GROUP_HISTORY_TNAME, table_id, column_group_id); + RECYCLE_SECOND_SCHEMA(column_group_mapping, OB_ALL_COLUMN_GROUP_MAPPING_HISTORY_TNAME, table_id, column_group_id); + ret = OB_SUCCESS; // overwrite ret // ----------------------------- database ---------------------------------------- RECYCLE_FIRST_SCHEMA(RECYCLE_AND_COMPRESS, database, OB_ALL_DATABASE_HISTORY_TNAME, database_id); diff --git a/src/rootserver/ob_table_creator.cpp b/src/rootserver/ob_table_creator.cpp index 1f212cbee..0f8237359 100644 --- a/src/rootserver/ob_table_creator.cpp +++ b/src/rootserver/ob_table_creator.cpp @@ -59,7 +59,9 @@ int ObTableCreator::execute() int ObTableCreator::add_create_tablets_of_local_aux_tables_arg( const common::ObIArray &schemas, const share::schema::ObTableSchema *data_table_schema, - const common::ObIArray &ls_id_array) + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors) { int ret = OB_SUCCESS; if (OB_ISNULL(data_table_schema)) { @@ -71,6 +73,10 @@ int ObTableCreator::add_create_tablets_of_local_aux_tables_arg( data_table_schema->is_mlog_table()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("data_table_schema must be data table", KR(ret), KPC(data_table_schema)); + } else if (OB_UNLIKELY(tenant_data_version <= 0 || need_create_empty_majors.count() != schemas.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(tenant_data_version), "count_need_create_empty_majors", need_create_empty_majors.count(), + "count_schemas", schemas.count()); } for (int64_t i = 0; OB_SUCC(ret) && i < schemas.count(); ++i) { const share::schema::ObTableSchema *aux_schema = schemas.at(i); @@ -86,7 +92,7 @@ int ObTableCreator::add_create_tablets_of_local_aux_tables_arg( } if (OB_FAIL(ret)) { } else if (OB_FAIL(add_create_tablets_of_tables_arg_( - schemas, data_table_schema, ls_id_array))) { + schemas, data_table_schema, ls_id_array, tenant_data_version, need_create_empty_majors))) { LOG_WARN("fail to add_create_tablets_of_tables_arg_", KR(ret), K(schemas)); } return ret; @@ -95,10 +101,12 @@ int ObTableCreator::add_create_tablets_of_local_aux_tables_arg( int ObTableCreator::add_create_bind_tablets_of_hidden_table_arg( const share::schema::ObTableSchema &orig_table_schema, const share::schema::ObTableSchema &hidden_table_schema, - const common::ObIArray &ls_id_array) + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; ObSEArray schemas; + ObSEArray need_create_empty_majors; if (OB_UNLIKELY(!orig_table_schema.has_tablet() || orig_table_schema.is_index_table() || hidden_table_schema.is_index_table() @@ -107,10 +115,10 @@ int ObTableCreator::add_create_bind_tablets_of_hidden_table_arg( || !hidden_table_schema.is_user_hidden_table())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("both orig and hidden table must be data table", K(ret), K(orig_table_schema), K(hidden_table_schema)); - } else if (OB_FAIL(schemas.push_back(&hidden_table_schema))) { + } else if (OB_FAIL(schemas.push_back(&hidden_table_schema)) || OB_FAIL(need_create_empty_majors.push_back(false))) { LOG_WARN("failed to push back hidden table schema", K(ret)); } else if (OB_FAIL(add_create_tablets_of_tables_arg_( - schemas, &orig_table_schema, ls_id_array))) { + schemas, &orig_table_schema, ls_id_array, tenant_data_version, need_create_empty_majors))) { LOG_WARN("failed to add arg", K(ret), K(schemas)); } return ret; @@ -118,20 +126,21 @@ int ObTableCreator::add_create_bind_tablets_of_hidden_table_arg( int ObTableCreator::add_create_tablets_of_table_arg( const share::schema::ObTableSchema &table_schema, - const common::ObIArray &ls_id_array) + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const bool need_create_empty_major_sstable) { int ret = OB_SUCCESS; ObSEArray schemas; - if (!table_schema.has_tablet() - || table_schema.is_index_local_storage() - || table_schema.is_aux_lob_table() - || table_schema.is_mlog_table()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("table_schema must be data table or global indexes", KR(ret), K(table_schema)); - } else if (OB_FAIL(schemas.push_back(&table_schema))) { - LOG_WARN("failed to push_back", KR(ret), K(table_schema)); + ObSEArray need_create_empty_majors; + if (!table_schema.has_tablet() || table_schema.is_index_local_storage() || table_schema.is_aux_lob_table() + || table_schema.is_mlog_table() || tenant_data_version <= 0) { + LOG_WARN("table_schema must be data table or global indexes", KR(ret), K(table_schema), K(tenant_data_version)); + } else if (OB_FAIL(schemas.push_back(&table_schema)) + || OB_FAIL(need_create_empty_majors.push_back(need_create_empty_major_sstable))) { + LOG_WARN("failed to push_back", KR(ret), K(table_schema), K(need_create_empty_major_sstable)); } else if (OB_FAIL(add_create_tablets_of_tables_arg_( - schemas, NULL, ls_id_array))) { + schemas, NULL, ls_id_array, tenant_data_version, need_create_empty_majors))) { LOG_WARN("failed to add create tablet arg", KR(ret), K(table_schema)); } return ret; @@ -139,9 +148,17 @@ int ObTableCreator::add_create_tablets_of_table_arg( int ObTableCreator::add_create_tablets_of_tables_arg( const common::ObIArray &schemas, - const common::ObIArray &ls_id_array) + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors) { int ret = OB_SUCCESS; + if (OB_UNLIKELY(tenant_data_version <= 0 + || schemas.count() != need_create_empty_majors.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(tenant_data_version), "count_schemas", schemas.count(), + "count_need_create_empty_majors", need_create_empty_majors.count()); + } for (int64_t i = 0; OB_SUCC(ret) && i < schemas.count(); ++i) { const share::schema::ObTableSchema *table_schema = schemas.at(i); if (OB_ISNULL(table_schema)) { @@ -166,7 +183,7 @@ int ObTableCreator::add_create_tablets_of_tables_arg( } if (OB_FAIL(ret)) { } else if (OB_FAIL(add_create_tablets_of_tables_arg_( - schemas, NULL, ls_id_array))) { + schemas, NULL, ls_id_array, tenant_data_version, need_create_empty_majors))) { LOG_WARN("fail to add_create_tablets_of_tables_arg_", KR(ret), K(schemas)); } return ret; @@ -178,13 +195,17 @@ int ObTableCreator::add_create_tablets_of_tables_arg( int ObTableCreator::add_create_tablets_of_tables_arg_( const common::ObIArray &schemas, const share::schema::ObTableSchema *data_table_schema, - const common::ObIArray &ls_id_array) + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors) { int ret = OB_SUCCESS; const int64_t schema_cnt = schemas.count(); - if (OB_UNLIKELY(schema_cnt < 1)) { + if (OB_UNLIKELY(schema_cnt < 1 || tenant_data_version <= 0 + || schema_cnt != need_create_empty_majors.count())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("schemas count is less 1", KR(ret), K(schema_cnt)); + LOG_WARN("schemas count is less 1", KR(ret), K(schema_cnt), K(tenant_data_version), + "create_major_flag_cnt", need_create_empty_majors.count()); } else if (OB_ISNULL(schemas.at(0))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("NULL ptr", KR(ret), K(schemas)); @@ -268,7 +289,9 @@ int ObTableCreator::add_create_tablets_of_tables_arg_( pairs, OB_INVALID_INDEX, OB_INVALID_INDEX, - is_create_bind_hidden_tablets))) { + is_create_bind_hidden_tablets, + tenant_data_version, + need_create_empty_majors))) { LOG_WARN("fail to generate_create_tablet_arg", K(table_schema), K(schemas), KR(ret), K(is_create_bind_hidden_tablets)); } @@ -292,7 +315,9 @@ int ObTableCreator::add_create_tablets_of_tables_arg_( pairs, i, OB_INVALID_INDEX, - is_create_bind_hidden_tablets))) { + is_create_bind_hidden_tablets, + tenant_data_version, + need_create_empty_majors))) { LOG_WARN("fail to generate_create_tablet_arg", K(table_schema), K(schemas), KR(ret), K(i), K(is_create_bind_hidden_tablets)); } @@ -316,7 +341,9 @@ int ObTableCreator::add_create_tablets_of_tables_arg_( pairs, i, j, - is_create_bind_hidden_tablets))) { + is_create_bind_hidden_tablets, + tenant_data_version, + need_create_empty_majors))) { LOG_WARN("fail to generate_create_tablet_arg", K(table_schema), K(schemas), KR(ret), K(i), K(j), K(is_create_bind_hidden_tablets)); } @@ -355,7 +382,9 @@ int ObTableCreator::generate_create_tablet_arg_( common::ObIArray &pairs, const int64_t part_idx, const int64_t subpart_idx, - const bool is_create_bind_hidden_tablets) + const bool is_create_bind_hidden_tablets, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors) { int ret = OB_SUCCESS; ObTabletID data_tablet_id; @@ -416,7 +445,9 @@ int ObTableCreator::generate_create_tablet_arg_( data_tablet_id, schemas, mode, - is_create_bind_hidden_tablets))) { + is_create_bind_hidden_tablets, + tenant_data_version, + need_create_empty_majors))) { LOG_WARN("fail to init create tablet arg", KR(ret), K(schemas), K(is_create_bind_hidden_tablets)); } else if (OB_FAIL(tablet_creator_.add_create_tablet_arg(create_tablet_arg))) { LOG_WARN("fail to add create tablet arg", KR(ret), K(create_tablet_arg)); diff --git a/src/rootserver/ob_table_creator.h b/src/rootserver/ob_table_creator.h index 9ecc859c0..a9b7635c3 100644 --- a/src/rootserver/ob_table_creator.h +++ b/src/rootserver/ob_table_creator.h @@ -48,7 +48,9 @@ public: // @param [in] schemas, tables schema for creating tablets, the first is data table, others are its local indexes int add_create_tablets_of_tables_arg( const common::ObIArray &schemas, - const common::ObIArray &ls_id_array); + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors); // create tablets for local aux tables(include local_index/aux_lob_table), which are belong to a data table. // @@ -57,25 +59,32 @@ public: int add_create_tablets_of_local_aux_tables_arg( const common::ObIArray &schemas, const share::schema::ObTableSchema *data_table_schema, - const common::ObIArray &ls_id_array); + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors); // create tablets of hidden table from original table, used by ddl table redefinition int add_create_bind_tablets_of_hidden_table_arg( const share::schema::ObTableSchema &orig_table_schema, const share::schema::ObTableSchema &hidden_table_schema, - const common::ObIArray &ls_id_array); + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version); // create tablets in a table // // @param [in] table_schema, table schema for creating tablets int add_create_tablets_of_table_arg( const share::schema::ObTableSchema &table_schema, - const common::ObIArray &ls_id_array); + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const bool need_create_empty_major_sstable); private: int add_create_tablets_of_tables_arg_( const common::ObIArray &schemas, const share::schema::ObTableSchema *data_table_schema, - const common::ObIArray &ls_id_array); + const common::ObIArray &ls_id_array, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors); int generate_create_tablet_arg_( const common::ObIArray &schemas, const ObTableSchema &data_table_schema, @@ -84,7 +93,9 @@ private: common::ObIArray &pairs, const int64_t part_idx, const int64_t subpart_idx, - const bool is_create_bind_hidden_tablets); + const bool is_create_bind_hidden_tablets, + const uint64_t tenant_data_version, + const common::ObIArray &need_create_empty_majors); int get_tablet_list_str_( const share::schema::ObTableSchema &table_schema, ObSqlString &tablet_list); diff --git a/src/rootserver/ob_tablet_creator.cpp b/src/rootserver/ob_tablet_creator.cpp index ddfd257ab..41767573f 100644 --- a/src/rootserver/ob_tablet_creator.cpp +++ b/src/rootserver/ob_tablet_creator.cpp @@ -32,7 +32,9 @@ bool ObTabletCreatorArg::is_valid() const bool is_valid = ls_key_.is_valid() && table_schemas_.count() > 0 && table_schemas_.count() == tablet_ids_.count() - && lib::Worker::CompatMode::INVALID != compat_mode_; + && lib::Worker::CompatMode::INVALID != compat_mode_ + && tenant_data_version_ > 0 + && need_create_empty_majors_.count() == table_schemas_.count(); for (int64_t i = 0; i < tablet_ids_.count() && is_valid; i++) { is_valid = tablet_ids_.at(i).is_valid(); } @@ -47,6 +49,8 @@ void ObTabletCreatorArg::reset() data_tablet_id_.reset(); compat_mode_ = lib::Worker::CompatMode::INVALID; is_create_bind_hidden_tablets_ = false; + tenant_data_version_ = 0; + need_create_empty_majors_.reset(); } int ObTabletCreatorArg::assign(const ObTabletCreatorArg &arg) @@ -59,11 +63,14 @@ int ObTabletCreatorArg::assign(const ObTabletCreatorArg &arg) LOG_WARN("failed to assign table schemas", KR(ret), K(arg)); } else if (OB_FAIL(tablet_ids_.assign(arg.tablet_ids_))) { LOG_WARN("failed to assign table schemas", KR(ret), K(arg)); + } else if (OB_FAIL(need_create_empty_majors_.assign(arg.need_create_empty_majors_))) { + LOG_WARN("failed to assign need create empty majors", KR(ret), K(arg)); } else { data_tablet_id_ = arg.data_tablet_id_; ls_key_ = arg.ls_key_; compat_mode_ = arg.compat_mode_; is_create_bind_hidden_tablets_ = arg.is_create_bind_hidden_tablets_; + tenant_data_version_ = arg.tenant_data_version_; } return ret; } @@ -74,27 +81,35 @@ int ObTabletCreatorArg::init( const ObTabletID data_tablet_id, const ObIArray &table_schemas, const lib::Worker::CompatMode &mode, - const bool is_create_bind_hidden_tablets) + const bool is_create_bind_hidden_tablets, + const uint64_t tenant_data_version, + const ObIArray &need_create_empty_majors) { int ret = OB_SUCCESS; bool is_valid = ls_key.is_valid() && table_schemas.count() > 0 - && table_schemas.count() == tablet_ids.count(); + && table_schemas.count() == tablet_ids.count() + && tenant_data_version > 0 + && need_create_empty_majors.count() == table_schemas.count(); for (int64_t i = 0; i < tablet_ids.count() && is_valid; i++) { is_valid = tablet_ids.at(i).is_valid(); } if (OB_UNLIKELY(!is_valid)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tablet_ids), - "count", table_schemas.count(), K(tablet_ids), K(ls_key)); + "count", table_schemas.count(), K(tablet_ids), K(ls_key), + K(tenant_data_version), "count_to_create_empty_major", need_create_empty_majors.count()); } else if (OB_FAIL(tablet_ids_.assign(tablet_ids))) { - LOG_WARN("failed to assign table schemas", KR(ret), K(table_schemas)); + LOG_WARN("failed to assign table schemas", KR(ret), K(tablet_ids)); } else if (OB_FAIL(table_schemas_.assign(table_schemas))) { LOG_WARN("failed to assign table schemas", KR(ret), K(table_schemas)); + } else if (OB_FAIL(need_create_empty_majors_.assign(need_create_empty_majors))) { + LOG_WARN("failed to assign need create empty majors", K(ret), K(need_create_empty_majors)); } else { data_tablet_id_ = data_tablet_id; ls_key_ = ls_key; compat_mode_ = mode; is_create_bind_hidden_tablets_ = is_create_bind_hidden_tablets; + tenant_data_version_ = tenant_data_version; } return ret; } @@ -102,7 +117,8 @@ int ObTabletCreatorArg::init( DEF_TO_STRING(ObTabletCreatorArg) { int64_t pos = 0; - J_KV(K_(compat_mode), K_(tablet_ids), K_(data_tablet_id), K_(ls_key), K_(table_schemas), K_(is_create_bind_hidden_tablets)); + J_KV(K_(compat_mode), K_(tablet_ids), K_(data_tablet_id), K_(ls_key), K_(table_schemas), K_(is_create_bind_hidden_tablets), + K_(tenant_data_version), K_(need_create_empty_majors)); return pos; } @@ -139,12 +155,15 @@ int ObBatchCreateTabletHelper::add_arg_to_batch_arg( ObArray index_array; for (int64_t i = 0; OB_SUCC(ret) && i < tablet_arg.table_schemas_.count(); ++i) { const share::schema::ObTableSchema *table_schema = tablet_arg.table_schemas_.at(i); + const uint64_t tenant_data_version = tablet_arg.tenant_data_version_; + const bool need_create_empty_major = tablet_arg.need_create_empty_majors_.at(i); int64_t index = OB_INVALID_INDEX; if (OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("table schema is null", KR(ret), K(i), K(tablet_arg)); - } else if (OB_FAIL(try_add_table_schema(table_schema, index, tablet_arg.compat_mode_))) { - LOG_WARN("failed to add table schema to batch", KR(ret), K(table_schema), K(index), K(batch_arg_)); + } else if (OB_FAIL(try_add_table_schema(table_schema, tenant_data_version, + need_create_empty_major, index, tablet_arg.compat_mode_))) { + LOG_WARN("failed to add table schema to batch", KR(ret), K(table_schema), K(need_create_empty_major), K(index), K(batch_arg_)); } else if (OB_UNLIKELY(OB_INVALID_INDEX == index)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("index can not be invalid", KR(ret), K(index), K(tablet_arg), K(batch_arg_)); @@ -168,15 +187,15 @@ int ObBatchCreateTabletHelper::add_arg_to_batch_arg( return ret; } -int ObBatchCreateTabletHelper::add_table_schema_(const share::schema::ObTableSchema &table_schema, +int ObBatchCreateTabletHelper::add_table_schema_( + const share::schema::ObTableSchema &table_schema, const lib::Worker::CompatMode compat_mode, + const uint64_t tenant_data_version, + const bool need_create_empty_major, int64_t &index) { int ret = OB_SUCCESS; - uint64_t data_version = 0; - if (OB_FAIL(GET_MIN_DATA_VERSION(table_schema.get_tenant_id(), data_version))) { - LOG_WARN("failed to get data version", KR(ret), K(table_schema)); - } else if (data_version < DATA_VERSION_4_2_2_0) { + if (tenant_data_version < DATA_VERSION_4_2_2_0) { // compatibility with DATA_VERSION_4_2_1. index = batch_arg_.table_schemas_.count(); if (OB_FAIL(batch_arg_.table_schemas_.push_back(table_schema))) { @@ -191,7 +210,8 @@ int ObBatchCreateTabletHelper::add_table_schema_(const share::schema::ObTableSch LOG_WARN("failed to allocate storage schema", KR(ret), K(table_schema)); } else if (FALSE_IT(create_tablet_schema = new (create_tablet_schema_ptr)ObCreateTabletSchema())) { } else if (OB_FAIL(create_tablet_schema->init(batch_arg_.allocator_, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + tenant_data_version, need_create_empty_major))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(batch_arg_.create_tablet_schemas_.push_back(create_tablet_schema))) { LOG_WARN("failed to push back table schema", KR(ret), K(table_schema)); @@ -202,6 +222,8 @@ int ObBatchCreateTabletHelper::add_table_schema_(const share::schema::ObTableSch int ObBatchCreateTabletHelper::try_add_table_schema( const share::schema::ObTableSchema *table_schema, + const uint64_t tenant_data_version, + const bool need_create_empty_major, int64_t &index, const lib::Worker::CompatMode compat_mode) { @@ -219,7 +241,8 @@ int ObBatchCreateTabletHelper::try_add_table_schema( if (OB_FAIL(temp_table_schema.assign(*table_schema))) { LOG_WARN("failed to assign temp_table_schema", KR(ret), KPC(table_schema)); } else if (FALSE_IT(temp_table_schema.reset_partition_schema())) { - } else if (OB_FAIL(add_table_schema_(temp_table_schema, compat_mode, index))) { + } else if (OB_FAIL(add_table_schema_(temp_table_schema, compat_mode, + tenant_data_version, need_create_empty_major, index))) { LOG_WARN("failed to push back table schema", KR(ret), K(temp_table_schema)); } else if (OB_FAIL(table_schemas_map_.set_refactored(temp_table_schema.get_table_id(), index))) { LOG_WARN("failed to set table schema map", KR(ret), K(index), K(temp_table_schema)); diff --git a/src/rootserver/ob_tablet_creator.h b/src/rootserver/ob_tablet_creator.h index 56dc00605..5106960be 100644 --- a/src/rootserver/ob_tablet_creator.h +++ b/src/rootserver/ob_tablet_creator.h @@ -37,7 +37,9 @@ public: ls_key_(), table_schemas_(), compat_mode_(lib::Worker::CompatMode::INVALID), - is_create_bind_hidden_tablets_(false) {} + is_create_bind_hidden_tablets_(false), + tenant_data_version_(0), + need_create_empty_majors_() {} virtual ~ObTabletCreatorArg() {} bool is_valid() const; void reset(); @@ -47,7 +49,9 @@ public: const common::ObTabletID data_tablet_id, const ObIArray &table_schemas, const lib::Worker::CompatMode &mode, - const bool is_create_bind_hidden_tablets); + const bool is_create_bind_hidden_tablets, + const uint64_t tenant_data_version, + const ObIArray &need_create_empty_majors); DECLARE_TO_STRING; common::ObArray tablet_ids_; @@ -56,6 +60,8 @@ public: common::ObArray table_schemas_; lib::Worker::CompatMode compat_mode_; bool is_create_bind_hidden_tablets_; + uint64_t tenant_data_version_; + common::ObArray need_create_empty_majors_; private: DISALLOW_COPY_AND_ASSIGN(ObTabletCreatorArg); }; @@ -74,6 +80,8 @@ public: const share::SCN &major_frozen_scn, const bool need_check_tablet_cnt); int try_add_table_schema(const share::schema::ObTableSchema *table_schema, + const uint64_t tenant_data_version, + const bool need_create_empty_major_sstable, int64_t &index, const lib::Worker::CompatMode compat_mode); int add_arg_to_batch_arg(const ObTabletCreatorArg &arg); @@ -93,6 +101,8 @@ public: private: int add_table_schema_(const share::schema::ObTableSchema &table_schema, const lib::Worker::CompatMode compat_mode, + const uint64_t tenant_data_version, + const bool need_create_empty_major, int64_t &index); DISALLOW_COPY_AND_ASSIGN(ObBatchCreateTabletHelper); }; diff --git a/src/rootserver/parallel_ddl/ob_create_table_helper.cpp b/src/rootserver/parallel_ddl/ob_create_table_helper.cpp index 2189de4e8..d9fa135c7 100644 --- a/src/rootserver/parallel_ddl/ob_create_table_helper.cpp +++ b/src/rootserver/parallel_ddl/ob_create_table_helper.cpp @@ -2318,6 +2318,8 @@ int ObCreateTableHelper::create_tablets_() SCN frozen_scn; ObSchemaGetterGuard schema_guard; ObSchemaService *schema_service_impl = NULL; + uint64_t tenant_data_version = 0; + if (OB_FAIL(check_inner_stat_())) { LOG_WARN("fail to check inner stat", KR(ret)); } else if (OB_ISNULL(schema_service_impl = schema_service_->get_schema_service())) { @@ -2330,6 +2332,8 @@ int ObCreateTableHelper::create_tablets_() } else if (OB_UNLIKELY(new_tables_.count() <= 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected table cnt", KR(ret), K(new_tables_.count())); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id_, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K_(tenant_id)); } else { ObTableCreator table_creator( tenant_id_, @@ -2360,6 +2364,7 @@ int ObCreateTableHelper::create_tablets_() } else { ObArray schemas; common::ObArray ls_id_array; + ObArray need_create_empty_majors; for (int64_t i = 0; OB_SUCC(ret) && i < new_tables_.count(); i++) { const ObTableSchema &new_table = new_tables_.at(i); const uint64_t table_id = new_table.get_table_id(); @@ -2368,6 +2373,8 @@ int ObCreateTableHelper::create_tablets_() } else if (!new_table.is_global_index_table()) { if (OB_FAIL(schemas.push_back(&new_table))) { LOG_WARN("fail to push back new table", KR(ret)); + } else if (OB_FAIL(need_create_empty_majors.push_back(true))) { + LOG_WARN("fail to push back need create empty major", KR(ret)); } } else { if (OB_FAIL(new_table_tablet_allocator.prepare(trans_, new_table))) { @@ -2375,7 +2382,7 @@ int ObCreateTableHelper::create_tablets_() } else if (OB_FAIL(new_table_tablet_allocator.get_ls_id_array(ls_id_array))) { LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_table_arg( - new_table, ls_id_array))) { + new_table, ls_id_array, tenant_data_version, true/*need create major sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(new_table)); } } @@ -2395,7 +2402,7 @@ int ObCreateTableHelper::create_tablets_() } else if (OB_FAIL(new_table_tablet_allocator.get_ls_id_array(ls_id_array))) { LOG_WARN("fail to get ls id array", KR(ret)); } else if (OB_FAIL(table_creator.add_create_tablets_of_tables_arg( - schemas, ls_id_array))) { + schemas, ls_id_array, tenant_data_version, need_create_empty_majors /*need create major sstable*/))) { LOG_WARN("create table partitions failed", KR(ret), K(data_table)); } else if (OB_FAIL(table_creator.execute())) { LOG_WARN("execute create partition failed", KR(ret)); diff --git a/src/share/datum/ob_datum.h b/src/share/datum/ob_datum.h index f0a171a13..f5576fbe8 100644 --- a/src/share/datum/ob_datum.h +++ b/src/share/datum/ob_datum.h @@ -162,6 +162,11 @@ struct ObDatumDesc { bool is_ext() const { return flag_ == FlagType::EXT; } void set_outrow() { null_ = 0; flag_ = FlagType::OUTROW; } bool is_outrow() const { return flag_ == FlagType::OUTROW; } + + void set_flag(const FlagType &flag_type) { flag_ = flag_type; } + void set_has_lob_header() { flag_ = FlagType::HAS_LOB_HEADER; } + bool has_lob_header() const { return flag_ == FlagType::HAS_LOB_HEADER; } + void set_flag_none() { flag_ = FlagType::NONE; } } __attribute__ ((packed)) ; // Datum structure, multiple inheritance from ObDatumPtr and ObDatumDesc makes diff --git a/src/share/diagnosis/ob_sql_monitor_statname.h b/src/share/diagnosis/ob_sql_monitor_statname.h index f1e70eea0..ca58652b2 100644 --- a/src/share/diagnosis/ob_sql_monitor_statname.h +++ b/src/share/diagnosis/ob_sql_monitor_statname.h @@ -56,6 +56,7 @@ SQL_MONITOR_STATNAME_DEF(SORT_DUMP_DATA_TIME, sql_monitor_statname::INT, "sort d // SSTABLE INSERT SQL_MONITOR_STATNAME_DEF(DDL_TASK_ID, sql_monitor_statname::INT, "ddl task id", "sort ddl task id") SQL_MONITOR_STATNAME_DEF(SSTABLE_INSERT_ROW_COUNT, sql_monitor_statname::INT, "sstable insert row count", "sstable insert row count") +SQL_MONITOR_STATNAME_DEF(SSTABLE_INSERT_CG_ROW_COUNT, sql_monitor_statname::INT, "sstable insert cg_row count", "sstable insert cg row count") // Table Scan stat SQL_MONITOR_STATNAME_DEF(IO_READ_BYTES, sql_monitor_statname::CAPACITY, "total io bytes read from disk", "total io bytes read from storage") SQL_MONITOR_STATNAME_DEF(TOTAL_READ_BYTES, sql_monitor_statname::CAPACITY, "total bytes processed by storage", "total bytes processed by storage, including memtable") diff --git a/src/share/inner_table/ob_inner_table_schema.12401_12450.cpp b/src/share/inner_table/ob_inner_table_schema.12401_12450.cpp index da3aeccf1..e2e3df408 100644 --- a/src/share/inner_table/ob_inner_table_schema.12401_12450.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12401_12450.cpp @@ -8692,6 +8692,538 @@ int ObInnerTableSchema::all_virtual_import_table_task_history_schema(ObTableSche return ret; } +int ObInnerTableSchema::all_virtual_column_group_mapping_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(4); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("table_id", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("column_group_id", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("column_id", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("gmt_create", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("gmt_modified", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + table_schema.set_index_using_type(USING_BTREE); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_column_group_history_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(4); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("table_id", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("column_group_id", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("schema_version", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("gmt_create", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("gmt_modified", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("is_deleted", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ObObj column_group_name_default; + column_group_name_default.set_varchar(ObString::make_string("")); + ADD_COLUMN_SCHEMA_T("column_group_name", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_COLUMN_GROUP_NAME_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false, //is_autoincrement + column_group_name_default, + column_group_name_default); //default_value + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("column_group_type", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("block_size", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("compressor_type", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("row_store_type", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + table_schema.set_index_using_type(USING_BTREE); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_column_group_mapping_history_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(5); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("table_id", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("column_group_id", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("column_id", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("schema_version", //column_name + ++column_id, //column_id + 5, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("gmt_create", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("gmt_modified", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("is_deleted", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + table_schema.set_index_using_type(USING_BTREE); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + int ObInnerTableSchema::all_virtual_clone_job_schema(ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/share/inner_table/ob_inner_table_schema.15401_15450.cpp b/src/share/inner_table/ob_inner_table_schema.15401_15450.cpp index 5dcf73599..f5cd9c8f3 100644 --- a/src/share/inner_table/ob_inner_table_schema.15401_15450.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15401_15450.cpp @@ -4659,6 +4659,528 @@ int ObInnerTableSchema::all_virtual_ls_info_ora_schema(ObTableSchema &table_sche return ret; } +int ObInnerTableSchema::all_virtual_column_group_mapping_real_agent_ora_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_ORA_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(4); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCollationType::CS_TYPE_UTF8MB4_BIN); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TENANT_ID", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TABLE_ID", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_GROUP_ID", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_ID", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("GMT_CREATE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampLTZType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("GMT_MODIFIED", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampLTZType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + table_schema.set_index_using_type(USING_BTREE); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_column_group_history_ora_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_ORA_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(4); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCollationType::CS_TYPE_UTF8MB4_BIN); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TENANT_ID", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TABLE_ID", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_GROUP_ID", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SCHEMA_VERSION", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("GMT_CREATE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampLTZType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("GMT_MODIFIED", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampLTZType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("IS_DELETED", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_GROUP_NAME", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_UTF8MB4_BIN, //column_collation_type + OB_MAX_COLUMN_GROUP_NAME_LENGTH, //column_length + 2, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_GROUP_TYPE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("BLOCK_SIZE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COMPRESSOR_TYPE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ROW_STORE_TYPE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + true, //is_nullable + false); //is_autoincrement + } + table_schema.set_index_using_type(USING_BTREE); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_column_group_mapping_history_ora_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_ORA_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(5); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCollationType::CS_TYPE_UTF8MB4_BIN); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TENANT_ID", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TABLE_ID", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_GROUP_ID", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COLUMN_ID", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SCHEMA_VERSION", //column_name + ++column_id, //column_id + 5, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("GMT_CREATE", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampLTZType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("GMT_MODIFIED", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampLTZType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("IS_DELETED", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + table_schema.set_index_using_type(USING_BTREE); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + int ObInnerTableSchema::all_virtual_aux_stat_real_agent_ora_schema(ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/share/inner_table/ob_inner_table_schema.21101_21150.cpp b/src/share/inner_table/ob_inner_table_schema.21101_21150.cpp index d4070ce27..97afb67c5 100644 --- a/src/share/inner_table/ob_inner_table_schema.21101_21150.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21101_21150.cpp @@ -60,7 +60,7 @@ int ObInnerTableSchema::gv_ob_sstables_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT M.SVR_IP, M.SVR_PORT, (case M.TABLE_TYPE when 0 then 'MEMTABLE' when 1 then 'TX_DATA_MEMTABLE' when 2 then 'TX_CTX_MEMTABLE' when 3 then 'LOCK_MEMTABLE' when 10 then 'MAJOR' when 11 then 'MINOR' when 12 then 'MINI' when 13 then 'META' when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'DDL_MEM' when 17 then 'CO_MAJOR' when 18 then 'NORMAL_CG' when 19 then 'ROWKEY_CG' else 'INVALID' end) as TABLE_TYPE, M.TENANT_ID, M.LS_ID, M.TABLET_ID, M.CG_IDX, M.START_LOG_SCN, M.END_LOG_SCN, M.DATA_CHECKSUM, M.SIZE, M.REF, M.UPPER_TRANS_VERSION, M.IS_ACTIVE, M.CONTAIN_UNCOMMITTED_ROW FROM oceanbase.__all_virtual_table_mgr M )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT M.SVR_IP, M.SVR_PORT, (case M.TABLE_TYPE when 0 then 'MEMTABLE' when 1 then 'TX_DATA_MEMTABLE' when 2 then 'TX_CTX_MEMTABLE' when 3 then 'LOCK_MEMTABLE' when 10 then 'MAJOR' when 11 then 'MINOR' when 12 then 'MINI' when 13 then 'META' when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'DDL_MEM' when 17 then 'CO_MAJOR' when 18 then 'NORMAL_CG' when 19 then 'ROWKEY_CG' when 20 then 'DDL_MERGE' else 'INVALID' end) as TABLE_TYPE, M.TENANT_ID, M.LS_ID, M.TABLET_ID, M.CG_IDX, M.START_LOG_SCN, M.END_LOG_SCN, M.DATA_CHECKSUM, M.SIZE, M.REF, M.UPPER_TRANS_VERSION, M.IS_ACTIVE, M.CONTAIN_UNCOMMITTED_ROW FROM oceanbase.__all_virtual_table_mgr M )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.28051_28100.cpp b/src/share/inner_table/ob_inner_table_schema.28051_28100.cpp index 620bc4685..a0f0b5a1c 100644 --- a/src/share/inner_table/ob_inner_table_schema.28051_28100.cpp +++ b/src/share/inner_table/ob_inner_table_schema.28051_28100.cpp @@ -310,7 +310,7 @@ int ObInnerTableSchema::gv_ob_sstables_ora_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT M.SVR_IP, M.SVR_PORT, (case M.TABLE_TYPE when 0 then 'MEMTABLE' when 1 then 'TX_DATA_MEMTABLE' when 2 then 'TX_CTX_MEMTABLE' when 3 then 'LOCK_MEMTABLE' when 10 then 'MAJOR' when 11 then 'MINOR' when 12 then 'MINI' when 13 then 'META' when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'IMC_SEGMENT' else 'INVALID' end) as TABLE_TYPE, M.LS_ID, M.TABLET_ID, M.START_LOG_SCN, M.END_LOG_SCN, M."SIZE", M.REF, M.UPPER_TRANS_VERSION, M.IS_ACTIVE, M.CONTAIN_UNCOMMITTED_ROW FROM SYS.ALL_VIRTUAL_TABLE_MGR M )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT M.SVR_IP, M.SVR_PORT, (case M.TABLE_TYPE when 0 then 'MEMTABLE' when 1 then 'TX_DATA_MEMTABLE' when 2 then 'TX_CTX_MEMTABLE' when 3 then 'LOCK_MEMTABLE' when 10 then 'MAJOR' when 11 then 'MINOR' when 12 then 'MINI' when 13 then 'META' when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'IMC_SEGMENT' when 20 then 'DDL_MERGE' else 'INVALID' end) as TABLE_TYPE, M.LS_ID, M.TABLET_ID, M.START_LOG_SCN, M.END_LOG_SCN, M."SIZE", M.REF, M.UPPER_TRANS_VERSION, M.IS_ACTIVE, M.CONTAIN_UNCOMMITTED_ROW FROM SYS.ALL_VIRTUAL_TABLE_MGR M )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index a4ab3293c..084e94b8f 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -988,6 +988,9 @@ public: static int all_virtual_import_table_job_history_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_import_table_task_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_import_table_task_history_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_column_group_mapping_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_column_group_history_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_column_group_mapping_history_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_clone_job_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_clone_job_history_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_aux_stat_schema(share::schema::ObTableSchema &table_schema); @@ -1251,6 +1254,9 @@ public: static int all_virtual_import_table_task_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_import_table_task_history_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_ls_info_ora_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_column_group_mapping_real_agent_ora_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_column_group_history_ora_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_column_group_mapping_history_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_aux_stat_real_agent_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_ls_snapshot_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_index_usage_info_real_agent_ora_schema(share::schema::ObTableSchema &table_schema); @@ -3551,6 +3557,9 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_import_table_job_history_schema, ObInnerTableSchema::all_virtual_import_table_task_schema, ObInnerTableSchema::all_virtual_import_table_task_history_schema, + ObInnerTableSchema::all_virtual_column_group_mapping_schema, + ObInnerTableSchema::all_virtual_column_group_history_schema, + ObInnerTableSchema::all_virtual_column_group_mapping_history_schema, ObInnerTableSchema::all_virtual_clone_job_schema, ObInnerTableSchema::all_virtual_clone_job_history_schema, ObInnerTableSchema::all_virtual_aux_stat_schema, @@ -3824,6 +3833,9 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_import_table_task_ora_schema, ObInnerTableSchema::all_virtual_import_table_task_history_ora_schema, ObInnerTableSchema::all_virtual_ls_info_ora_schema, + ObInnerTableSchema::all_virtual_column_group_mapping_real_agent_ora_schema, + ObInnerTableSchema::all_virtual_column_group_history_ora_schema, + ObInnerTableSchema::all_virtual_column_group_mapping_history_ora_schema, ObInnerTableSchema::all_virtual_aux_stat_real_agent_ora_schema, ObInnerTableSchema::all_virtual_ls_snapshot_ora_schema, ObInnerTableSchema::all_virtual_index_usage_info_real_agent_ora_schema, @@ -5324,6 +5336,9 @@ const uint64_t tenant_space_tables [] = { OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_HISTORY_TID, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_TID, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TID, OB_ALL_VIRTUAL_CLONE_JOB_TID, OB_ALL_VIRTUAL_CLONE_JOB_HISTORY_TID, OB_ALL_VIRTUAL_TENANT_SNAPSHOT_JOB_TID, @@ -5594,6 +5609,9 @@ const uint64_t tenant_space_tables [] = { OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_ORA_TID, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_ORA_TID, OB_ALL_VIRTUAL_LS_INFO_ORA_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TID, OB_ALL_VIRTUAL_AUX_STAT_REAL_AGENT_ORA_TID, OB_ALL_VIRTUAL_LS_SNAPSHOT_ORA_TID, OB_ALL_VIRTUAL_INDEX_USAGE_INFO_REAL_AGENT_ORA_TID, @@ -7133,6 +7151,8 @@ const uint64_t all_ora_mapping_virtual_table_org_tables [] = { OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_TID, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_TID, OB_ALL_VIRTUAL_LS_INFO_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TID, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TID, OB_ALL_VIRTUAL_LS_SNAPSHOT_TID, }; const uint64_t all_ora_mapping_virtual_tables [] = { OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID @@ -7272,6 +7292,8 @@ const uint64_t all_ora_mapping_virtual_tables [] = { OB_ALL_VIRTUAL_SQL_AUDIT_O , OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_ORA_TID , OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_ORA_TID , OB_ALL_VIRTUAL_LS_INFO_ORA_TID +, OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TID +, OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TID , OB_ALL_VIRTUAL_LS_SNAPSHOT_ORA_TID , }; @@ -7757,6 +7779,9 @@ const char* const tenant_space_table_names [] = { OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_HISTORY_TNAME, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_TNAME, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_TNAME, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TNAME, + OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TNAME, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TNAME, OB_ALL_VIRTUAL_CLONE_JOB_TNAME, OB_ALL_VIRTUAL_CLONE_JOB_HISTORY_TNAME, OB_ALL_VIRTUAL_TENANT_SNAPSHOT_JOB_TNAME, @@ -8027,6 +8052,9 @@ const char* const tenant_space_table_names [] = { OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_ORA_TNAME, OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_ORA_TNAME, OB_ALL_VIRTUAL_LS_INFO_ORA_TNAME, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TNAME, + OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TNAME, + OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TNAME, OB_ALL_VIRTUAL_AUX_STAT_REAL_AGENT_ORA_TNAME, OB_ALL_VIRTUAL_LS_SNAPSHOT_ORA_TNAME, OB_ALL_VIRTUAL_INDEX_USAGE_INFO_REAL_AGENT_ORA_TNAME, @@ -12201,11 +12229,11 @@ static inline int get_sys_table_lob_aux_schema(const uint64_t tid, const int64_t OB_CORE_TABLE_COUNT = 4; const int64_t OB_SYS_TABLE_COUNT = 279; -const int64_t OB_VIRTUAL_TABLE_COUNT = 771; +const int64_t OB_VIRTUAL_TABLE_COUNT = 777; const int64_t OB_SYS_VIEW_COUNT = 826; -const int64_t OB_SYS_TENANT_TABLE_COUNT = 1881; +const int64_t OB_SYS_TENANT_TABLE_COUNT = 1887; const int64_t OB_CORE_SCHEMA_VERSION = 1; -const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1884; +const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1890; } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema.vt.cpp b/src/share/inner_table/ob_inner_table_schema.vt.cpp index 579a81999..6919f6b42 100644 --- a/src/share/inner_table/ob_inner_table_schema.vt.cpp +++ b/src/share/inner_table/ob_inner_table_schema.vt.cpp @@ -77,6 +77,13 @@ bool vt_mapping_init() tmp_vt_mapping.is_real_vt_ = true; } + { + int64_t idx = OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TID - start_idx; + VTMapping &tmp_vt_mapping = vt_mappings[idx]; + tmp_vt_mapping.mapping_tid_ = OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TID; + tmp_vt_mapping.is_real_vt_ = true; + } + { int64_t idx = OB_ALL_VIRTUAL_COLUMN_REAL_AGENT_ORA_TID - start_idx; VTMapping &tmp_vt_mapping = vt_mappings[idx]; diff --git a/src/share/inner_table/ob_inner_table_schema_constants.h b/src/share/inner_table/ob_inner_table_schema_constants.h index 04999b5c7..f475b658a 100644 --- a/src/share/inner_table/ob_inner_table_schema_constants.h +++ b/src/share/inner_table/ob_inner_table_schema_constants.h @@ -724,6 +724,9 @@ const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_TID = 12424; // "__all_virtual_im const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_HISTORY_TID = 12425; // "__all_virtual_import_table_job_history" const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_TID = 12426; // "__all_virtual_import_table_task" const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_TID = 12427; // "__all_virtual_import_table_task_history" +const uint64_t OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TID = 12430; // "__all_virtual_column_group_mapping" +const uint64_t OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TID = 12431; // "__all_virtual_column_group_history" +const uint64_t OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TID = 12432; // "__all_virtual_column_group_mapping_history" const uint64_t OB_ALL_VIRTUAL_CLONE_JOB_TID = 12435; // "__all_virtual_clone_job" const uint64_t OB_ALL_VIRTUAL_CLONE_JOB_HISTORY_TID = 12436; // "__all_virtual_clone_job_history" const uint64_t OB_ALL_VIRTUAL_AUX_STAT_TID = 12447; // "__all_virtual_aux_stat" @@ -987,6 +990,9 @@ const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_HISTORY_ORA_TID = 15410; // "ALL_ const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_ORA_TID = 15411; // "ALL_VIRTUAL_IMPORT_TABLE_TASK_ORA" const uint64_t OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_ORA_TID = 15412; // "ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_ORA" const uint64_t OB_ALL_VIRTUAL_LS_INFO_ORA_TID = 15414; // "ALL_VIRTUAL_LS_INFO_ORA" +const uint64_t OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TID = 15417; // "ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA" +const uint64_t OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TID = 15419; // "ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA" +const uint64_t OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TID = 15420; // "ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA" const uint64_t OB_ALL_VIRTUAL_AUX_STAT_REAL_AGENT_ORA_TID = 15427; // "ALL_VIRTUAL_AUX_STAT_REAL_AGENT_ORA" const uint64_t OB_ALL_VIRTUAL_LS_SNAPSHOT_ORA_TID = 15439; // "ALL_VIRTUAL_LS_SNAPSHOT_ORA" const uint64_t OB_ALL_VIRTUAL_INDEX_USAGE_INFO_REAL_AGENT_ORA_TID = 15440; // "ALL_VIRTUAL_INDEX_USAGE_INFO_REAL_AGENT_ORA" @@ -3271,6 +3277,9 @@ const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_TNAME = "__all_virtual_import_ const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_HISTORY_TNAME = "__all_virtual_import_table_job_history"; const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_TNAME = "__all_virtual_import_table_task"; const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_TNAME = "__all_virtual_import_table_task_history"; +const char *const OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TNAME = "__all_virtual_column_group_mapping"; +const char *const OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TNAME = "__all_virtual_column_group_history"; +const char *const OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TNAME = "__all_virtual_column_group_mapping_history"; const char *const OB_ALL_VIRTUAL_CLONE_JOB_TNAME = "__all_virtual_clone_job"; const char *const OB_ALL_VIRTUAL_CLONE_JOB_HISTORY_TNAME = "__all_virtual_clone_job_history"; const char *const OB_ALL_VIRTUAL_AUX_STAT_TNAME = "__all_virtual_aux_stat"; @@ -3534,6 +3543,9 @@ const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_JOB_HISTORY_ORA_TNAME = "ALL_VIRTU const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_ORA_TNAME = "ALL_VIRTUAL_IMPORT_TABLE_TASK"; const char *const OB_ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY_ORA_TNAME = "ALL_VIRTUAL_IMPORT_TABLE_TASK_HISTORY"; const char *const OB_ALL_VIRTUAL_LS_INFO_ORA_TNAME = "ALL_VIRTUAL_LS_INFO"; +const char *const OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT_ORA_TNAME = "ALL_VIRTUAL_COLUMN_GROUP_MAPPING_REAL_AGENT"; +const char *const OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_ORA_TNAME = "ALL_VIRTUAL_COLUMN_GROUP_HISTORY"; +const char *const OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_ORA_TNAME = "ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY"; const char *const OB_ALL_VIRTUAL_AUX_STAT_REAL_AGENT_ORA_TNAME = "ALL_VIRTUAL_AUX_STAT_REAL_AGENT"; const char *const OB_ALL_VIRTUAL_LS_SNAPSHOT_ORA_TNAME = "ALL_VIRTUAL_LS_SNAPSHOT"; const char *const OB_ALL_VIRTUAL_INDEX_USAGE_INFO_REAL_AGENT_ORA_TNAME = "ALL_VIRTUAL_INDEX_USAGE_INFO_REAL_AGENT"; diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index 1b345ccd3..0fb391600 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -13656,13 +13656,27 @@ def_table_schema(**gen_iterate_private_virtual_table_def( table_name = '__all_virtual_import_table_task_history', keywords = all_def_keywords['__all_import_table_task_history'], in_tenant_space = True)) + # 12428: __all_virtual_import_stmt_exec_history # 12429: __all_virtual_data_activity_metrics -# 12430: __all_virtual_column_group_mapping -# 12431: __all_virtual_column_group_history -# 12432: __all_virtual_column_gorup_mapping_history +def_table_schema(**gen_iterate_virtual_table_def( + table_id = '12430', + table_name = '__all_virtual_column_group_mapping', + keywords = all_def_keywords['__all_column_group_mapping'], + in_tenant_space = True)) +def_table_schema(**gen_iterate_virtual_table_def( + table_id = '12431', + table_name = '__all_virtual_column_group_history', + keywords = all_def_keywords['__all_column_group_history'], + in_tenant_space = True)) +def_table_schema(**gen_iterate_virtual_table_def( + table_id = '12432', + table_name = '__all_virtual_column_group_mapping_history', + keywords = all_def_keywords['__all_column_group_mapping_history'], + in_tenant_space = True)) + # 12433: __all_virtual_storage_ha_error_diagnose # 12434: __all_virtual_storage_ha_perf_diagnose @@ -14154,11 +14168,11 @@ def_table_schema(**gen_oracle_mapping_virtual_table_def('15414', all_def_keyword # 15415: idx_dbms_lock_allocated_lockhandle_real_agent # 15416: idx_dbms_lock_allocated_expiration_real_agent -# 15417: __all_virtual_column_group_mapping +def_table_schema(**gen_oracle_mapping_real_virtual_table_def('15417', all_def_keywords['__all_virtual_column_group_mapping'])) # 15418: __all_virtual_cgroup_config -# 15419: __all_virutal_column_group_history -# 15420: __all_virutal_column_group_maping_history +def_table_schema(**gen_oracle_mapping_virtual_table_def('15419', all_def_keywords['__all_virtual_column_group_history'])) +def_table_schema(**gen_oracle_mapping_virtual_table_def('15420', all_def_keywords['__all_virtual_column_group_mapping_history'])) # 15421: __all_virtual_wr_system_event # 15422: __all_virtual_wr_event_name # 15423: __all_tenant_scheduler_running_job @@ -14186,6 +14200,7 @@ def_table_schema(**no_direct_access(gen_oracle_mapping_real_virtual_table_def('1 # 15442: __all_virtual_column_group # 余留位置 + ################################################################################ # System View (20000,30000] # MySQL System View (20000, 25000] @@ -16813,7 +16828,7 @@ SELECT when 3 then 'LOCK_MEMTABLE' when 10 then 'MAJOR' when 11 then 'MINOR' when 12 then 'MINI' when 13 then 'META' when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'DDL_MEM' - when 17 then 'CO_MAJOR' when 18 then 'NORMAL_CG' when 19 then 'ROWKEY_CG' + when 17 then 'CO_MAJOR' when 18 then 'NORMAL_CG' when 19 then 'ROWKEY_CG' when 20 then 'DDL_MERGE' else 'INVALID' end) as TABLE_TYPE, M.TENANT_ID, @@ -51818,7 +51833,7 @@ SELECT when 0 then 'MEMTABLE' when 1 then 'TX_DATA_MEMTABLE' when 2 then 'TX_CTX_MEMTABLE' when 3 then 'LOCK_MEMTABLE' when 10 then 'MAJOR' when 11 then 'MINOR' when 12 then 'MINI' when 13 then 'META' - when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'IMC_SEGMENT' + when 14 then 'DDL_DUMP' when 15 then 'REMOTE_LOGICAL_MINOR' when 16 then 'IMC_SEGMENT' when 20 then 'DDL_MERGE' else 'INVALID' end) as TABLE_TYPE, M.LS_ID, diff --git a/src/share/inner_table/ob_inner_table_schema_misc.ipp b/src/share/inner_table/ob_inner_table_schema_misc.ipp index 4817fa04d..0832babfa 100644 --- a/src/share/inner_table/ob_inner_table_schema_misc.ipp +++ b/src/share/inner_table/ob_inner_table_schema_misc.ipp @@ -1765,6 +1765,9 @@ case OB_ALL_VIRTUAL_COLL_TYPE_TID: case OB_ALL_VIRTUAL_COLL_TYPE_HISTORY_TID: case OB_ALL_VIRTUAL_COLUMN_TID: case OB_ALL_VIRTUAL_COLUMN_GROUP_TID: +case OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TID: +case OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TID: +case OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TID: case OB_ALL_VIRTUAL_COLUMN_HISTORY_TID: case OB_ALL_VIRTUAL_COLUMN_STAT_TID: case OB_ALL_VIRTUAL_COLUMN_STAT_HISTORY_TID: @@ -2073,6 +2076,51 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: break; } + case OB_ALL_VIRTUAL_COLUMN_GROUP_HISTORY_TID: { + ObIterateVirtualTable *iter = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { + SERVER_LOG(WARN, "create virtual table iterator failed", K(ret)); + } else if (OB_FAIL(iter->init(OB_ALL_COLUMN_GROUP_HISTORY_TID, index_schema, params))) { + SERVER_LOG(WARN, "virtual table iter init failed", K(ret)); + iter->~ObIterateVirtualTable(); + allocator.free(iter); + iter = NULL; + } else { + vt_iter = iter; + } + break; + } + + case OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_TID: { + ObIterateVirtualTable *iter = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { + SERVER_LOG(WARN, "create virtual table iterator failed", K(ret)); + } else if (OB_FAIL(iter->init(OB_ALL_COLUMN_GROUP_MAPPING_TID, index_schema, params))) { + SERVER_LOG(WARN, "virtual table iter init failed", K(ret)); + iter->~ObIterateVirtualTable(); + allocator.free(iter); + iter = NULL; + } else { + vt_iter = iter; + } + break; + } + + case OB_ALL_VIRTUAL_COLUMN_GROUP_MAPPING_HISTORY_TID: { + ObIterateVirtualTable *iter = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { + SERVER_LOG(WARN, "create virtual table iterator failed", K(ret)); + } else if (OB_FAIL(iter->init(OB_ALL_COLUMN_GROUP_MAPPING_HISTORY_TID, index_schema, params))) { + SERVER_LOG(WARN, "virtual table iter init failed", K(ret)); + iter->~ObIterateVirtualTable(); + allocator.free(iter); + iter = NULL; + } else { + vt_iter = iter; + } + break; + } + case OB_ALL_VIRTUAL_COLUMN_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -2177,7 +2225,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_CONSTRAINT_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -2222,9 +2272,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_DAM_LAST_ARCH_TS_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -2479,7 +2527,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_FOREIGN_KEY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -2524,9 +2574,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_FOREIGN_KEY_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -2781,7 +2829,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_MVIEW_REFRESH_CHANGE_STATS_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -2826,9 +2876,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_MVIEW_REFRESH_STATS_PARAMS_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3083,7 +3131,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_PENDING_TRANSACTION_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3128,9 +3178,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_RECYCLEBIN_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3385,7 +3433,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_SECURITY_AUDIT_RECORD_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3430,9 +3480,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_SEQUENCE_VALUE_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3687,7 +3735,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_TABLEGROUP_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3732,9 +3782,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_TABLET_TO_TABLE_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -3989,7 +4037,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_TENANT_PROFILE_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -4034,9 +4084,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_TENANT_SCHEDULER_JOB_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -4291,7 +4339,9 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } + END_CREATE_VT_ITER_SWITCH_LAMBDA + BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_TYPE_ATTR_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { @@ -4336,9 +4386,7 @@ case OB_ALL_VIRTUAL_USER_HISTORY_TID: } break; } - END_CREATE_VT_ITER_SWITCH_LAMBDA - BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA case OB_ALL_VIRTUAL_USER_HISTORY_TID: { ObIterateVirtualTable *iter = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObIterateVirtualTable, iter))) { diff --git a/src/share/ob_ddl_common.cpp b/src/share/ob_ddl_common.cpp index bedf7a7b4..b0598cc18 100644 --- a/src/share/ob_ddl_common.cpp +++ b/src/share/ob_ddl_common.cpp @@ -30,6 +30,7 @@ #include "storage/tx_storage/ob_ls_map.h" #include "rootserver/ob_root_service.h" #include "rootserver/ddl_task/ob_ddl_task.h" +#include "storage/column_store/ob_column_oriented_sstable.h" using namespace oceanbase::share; using namespace oceanbase::common; @@ -711,7 +712,8 @@ int ObDDLUtil::generate_build_replica_sql( const bool use_heap_table_ddl_plan, const bool use_schema_version_hint_for_src_table, const ObColumnNameMap *col_name_map, - ObSqlString &sql_string) + ObSqlString &sql_string, + const SortCompactLevel compact_level) { int ret = OB_SUCCESS; ObSchemaGetterGuard schema_guard; @@ -918,8 +920,6 @@ int ObDDLUtil::generate_build_replica_sql( } } - - if (OB_SUCC(ret)) { ObArenaAllocator allocator("ObDDLTmp"); ObString new_dest_database_name; @@ -962,8 +962,8 @@ int ObDDLUtil::generate_build_replica_sql( } if (OB_FAIL(ret)) { } else if (oracle_mode) { - if (OB_FAIL(sql_string.assign_fmt("INSERT /*+ monitor enable_parallel_dml parallel(%ld) opt_param('ddl_execution_id', %ld) opt_param('ddl_task_id', %ld) opt_param('enable_newsort', 'false') use_px */INTO \"%.*s\".\"%.*s\"(%.*s) SELECT /*+ index(\"%.*s\" primary) %.*s */ %.*s from \"%.*s\".\"%.*s\" as of scn %ld %.*s", - real_parallelism, execution_id, task_id, + if (OB_FAIL(sql_string.assign_fmt("INSERT /*+ monitor enable_parallel_dml parallel(%ld) opt_param('ddl_execution_id', %ld) opt_param('ddl_task_id', %ld) opt_param('compact_sort_level', %ld) opt_param('enable_newsort', 'false') use_px */INTO \"%.*s\".\"%.*s\"(%.*s) SELECT /*+ index(\"%.*s\" primary) %.*s */ %.*s from \"%.*s\".\"%.*s\" as of scn %ld %.*s", + real_parallelism, execution_id, task_id, static_cast(compact_level), static_cast(new_dest_database_name.length()), new_dest_database_name.ptr(), static_cast(new_dest_table_name.length()), new_dest_table_name.ptr(), static_cast(insert_column_sql_string.length()), insert_column_sql_string.ptr(), static_cast(new_source_table_name.length()), new_source_table_name.ptr(), @@ -974,8 +974,8 @@ int ObDDLUtil::generate_build_replica_sql( LOG_WARN("fail to assign sql string", K(ret)); } } else { - if (OB_FAIL(sql_string.assign_fmt("INSERT /*+ monitor enable_parallel_dml parallel(%ld) opt_param('ddl_execution_id', %ld) opt_param('ddl_task_id', %ld) opt_param('enable_newsort', 'false') use_px */INTO `%.*s`.`%.*s`(%.*s) SELECT /*+ index(`%.*s` primary) %.*s */ %.*s from `%.*s`.`%.*s` as of snapshot %ld %.*s", - real_parallelism, execution_id, task_id, + if (OB_FAIL(sql_string.assign_fmt("INSERT /*+ monitor enable_parallel_dml parallel(%ld) opt_param('ddl_execution_id', %ld) opt_param('ddl_task_id', %ld) opt_param('compact_sort_level', %ld), opt_param('enable_newsort', 'false') use_px */INTO `%.*s`.`%.*s`(%.*s) SELECT /*+ index(`%.*s` primary) %.*s */ %.*s from `%.*s`.`%.*s` as of snapshot %ld %.*s", + real_parallelism, execution_id, task_id, static_cast(compact_level), static_cast(new_dest_database_name.length()), new_dest_database_name.ptr(), static_cast(new_dest_table_name.length()), new_dest_table_name.ptr(), static_cast(insert_column_sql_string.length()), insert_column_sql_string.ptr(), static_cast(new_source_table_name.length()), new_source_table_name.ptr(), @@ -1424,12 +1424,17 @@ int64_t ObDDLUtil::get_default_ddl_tx_timeout() } -int ObDDLUtil::get_data_format_version( +int ObDDLUtil::get_data_information( const uint64_t tenant_id, const uint64_t task_id, - int64_t &data_format_version) + uint64_t &data_format_version, + int64_t &snapshot_version, + share::ObDDLTaskStatus &task_status) { int ret = OB_SUCCESS; + data_format_version = 0; + snapshot_version = 0; + task_status = share::ObDDLTaskStatus::PREPARE; if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0 || nullptr == GCTX.sql_proxy_)) { ret = OB_INVALID_ARGUMENT; @@ -1440,7 +1445,7 @@ int ObDDLUtil::get_data_format_version( SMART_VAR(ObMySQLProxy::MySQLResult, res) { ObSqlString query_string; sqlclient::ObMySQLResult *result = NULL; - if (OB_FAIL(query_string.assign_fmt(" SELECT ddl_type, UNHEX(message) as message_unhex FROM %s WHERE task_id = %lu", + if (OB_FAIL(query_string.assign_fmt(" SELECT snapshot_version, ddl_type, UNHEX(message) as message_unhex, status FROM %s WHERE task_id = %lu", OB_ALL_DDL_TASK_STATUS_TNAME, task_id))) { LOG_WARN("assign sql string failed", K(ret)); } else if (OB_FAIL(GCTX.sql_proxy_->read(res, tenant_id, query_string.ptr()))) { @@ -1452,10 +1457,14 @@ int ObDDLUtil::get_data_format_version( LOG_WARN("get next row failed", K(ret)); } else { int64_t pos = 0; + int cur_task_status = 0; ObDDLType ddl_type = ObDDLType::DDL_INVALID; ObString task_message; + EXTRACT_UINT_FIELD_MYSQL(*result, "snapshot_version", snapshot_version, uint64_t); EXTRACT_INT_FIELD_MYSQL(*result, "ddl_type", ddl_type, ObDDLType); EXTRACT_VARCHAR_FIELD_MYSQL(*result, "message_unhex", task_message); + EXTRACT_INT_FIELD_MYSQL(*result, "status", cur_task_status, int); + task_status = static_cast(cur_task_status); if (ObDDLType::DDL_CREATE_INDEX == ddl_type) { SMART_VAR(rootserver::ObIndexBuildTask, task) { if (OB_FAIL(task.deserlize_params_from_message(tenant_id, task_message.ptr(), task_message.length(), pos))) { @@ -2343,6 +2352,145 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task( return ret; } +int ObCODDLUtil::get_base_cg_idx(const storage::ObStorageSchema *storage_schema, int64_t &base_cg_idx) +{ + int ret = OB_SUCCESS; + base_cg_idx = -1; + if (OB_UNLIKELY(nullptr == storage_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(storage_schema)); + } else { + bool found_base_cg_idx = false; + const ObIArray &cg_schemas = storage_schema->get_column_groups(); + for (int64_t i = 0; OB_SUCC(ret) && !found_base_cg_idx && i < cg_schemas.count(); ++i) { + const ObStorageColumnGroupSchema &cur_cg_schmea = cg_schemas.at(i); + if (cur_cg_schmea.is_all_column_group() || cur_cg_schmea.is_rowkey_column_group()) { + base_cg_idx = i; + found_base_cg_idx = true; + } + } + if (OB_SUCC(ret) && !found_base_cg_idx) { + ret = OB_ENTRY_NOT_EXIST; + LOG_WARN("base columng group schema not found", K(ret)); + } + } + LOG_DEBUG("get base cg idx", K(ret), K(base_cg_idx)); + return ret; +} + +int ObCODDLUtil::get_column_checksums( + const storage::ObCOSSTableV2 *co_sstable, + const storage::ObStorageSchema *storage_schema, + ObIArray &column_checksums) +{ + int ret = OB_SUCCESS; + column_checksums.reset(); + int64_t column_count = 0; + if (OB_UNLIKELY(nullptr == co_sstable || nullptr == storage_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(co_sstable), KP(storage_schema)); + } else if (OB_FAIL(storage_schema->get_stored_column_count_in_sstable(column_count))) { + LOG_WARN("fail to get_stored_column_count_in_sstable", K(ret), KPC(storage_schema)); + } else { + const common::ObIArray &column_groups = storage_schema->get_column_groups(); + ObArray checksum_ready_array; + if (OB_FAIL(checksum_ready_array.reserve(column_count))) { + LOG_WARN("reserve checksum ready array failed", K(ret), K(column_count)); + } else if (OB_FAIL(column_checksums.reserve(column_count))) { + LOG_WARN("reserve checksum array failed", K(ret), K(column_count)); + } + for (int64_t i = 0; i < column_count && OB_SUCC(ret); i ++) { + if (OB_FAIL(checksum_ready_array.push_back(false))) { + LOG_WARN("push back ready flag failed", K(ret), K(i)); + } else if (OB_FAIL(column_checksums.push_back(0))) { + LOG_WARN("fail to push back column checksum", K(ret), K(i)); + } + } + ObSSTableWrapper cg_sstable_wrapper; + ObSSTable *cg_sstable = nullptr; + for (int64_t i = 0; !co_sstable->is_empty_co_table() && i < column_groups.count() && OB_SUCC(ret); i++) { + const ObStorageColumnGroupSchema &column_group = column_groups.at(i); + ObSSTableMetaHandle cg_table_meta_hdl; + if (column_group.is_all_column_group()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column_group", K(ret), K(i)); + } else if (OB_FAIL(co_sstable->fetch_cg_sstable(i, cg_sstable_wrapper))) { + LOG_WARN("fail to get cg sstable", K(ret), K(i)); + } else if (OB_FAIL(cg_sstable_wrapper.get_sstable(cg_sstable))) { + LOG_WARN("get sstable failed", K(ret)); + } else if (OB_UNLIKELY(cg_sstable == nullptr || !cg_sstable->is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpec cg sstable", K(ret), KPC(cg_sstable)); + } else if (OB_FAIL(cg_sstable->get_meta(cg_table_meta_hdl))) { + LOG_WARN("fail to get meta", K(ret), KPC(cg_sstable)); + } else if (OB_UNLIKELY(cg_table_meta_hdl.get_sstable_meta().get_col_checksum_cnt() != column_group.get_column_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_checksum_cnt", K(ret), + K(cg_table_meta_hdl.get_sstable_meta().get_col_checksum_cnt()), K(column_group.get_column_count())); + } else { + for (int64_t j = 0; j < column_group.get_column_count() && OB_SUCC(ret); j++) { + const uint16_t column_idx = column_group.column_idxs_[j]; + if (column_idx < 0 || column_idx >= column_checksums.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid column index", K(ret), K(i), K(j), K(column_idx), K(column_checksums.count())); + } else { + int64_t &column_checksum = column_checksums.at(column_idx); + bool &is_checksum_ready = checksum_ready_array.at(column_idx); + if (!is_checksum_ready) { + column_checksum = cg_table_meta_hdl.get_sstable_meta().get_col_checksum()[j]; + is_checksum_ready = true; + } else if (OB_UNLIKELY(column_checksum != cg_table_meta_hdl.get_sstable_meta().get_col_checksum()[j])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_checksum_cnt", K(ret), K(column_checksum), K(cg_table_meta_hdl.get_sstable_meta().get_col_checksum()[j])); + } + } + } + } + } + } + return ret; +} + +int ObCODDLUtil::is_rowkey_based_co_sstable( + const storage::ObCOSSTableV2 *co_sstable, + const storage::ObStorageSchema *storage_schema, + bool &is_rowkey_based) +{ + int ret = OB_SUCCESS; + is_rowkey_based = false; + if (OB_UNLIKELY(nullptr == co_sstable || nullptr == storage_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(co_sstable), KP(storage_schema)); + } else { + const int64_t base_cg_idx = co_sstable->get_key().get_column_group_id(); + if (base_cg_idx < 0 || base_cg_idx >= storage_schema->get_column_groups().count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid base column group index", K(ret), K(base_cg_idx)); + } else { + is_rowkey_based = storage_schema->get_column_groups().at(base_cg_idx).is_rowkey_column_group(); + } + } + return ret; +} + + + +int ObCODDLUtil::need_column_group_store(const storage::ObStorageSchema &table_schema, bool &need_column_group) +{ + int ret = OB_SUCCESS; + need_column_group = table_schema.get_column_group_count() > 1; + return ret; +} + +int ObCODDLUtil::need_column_group_store(const schema::ObTableSchema &table_schema, bool &need_column_group) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(table_schema.get_is_column_store(need_column_group))) { + SHARE_LOG(WARN, "fail to check whether table is column store", K(ret)); + } + return ret; +} + //record trace_id ObDDLEventInfo::ObDDLEventInfo() : addr_(GCTX.self_addr()), diff --git a/src/share/ob_ddl_common.h b/src/share/ob_ddl_common.h index cd2d0441c..342e592dc 100644 --- a/src/share/ob_ddl_common.h +++ b/src/share/ob_ddl_common.h @@ -31,6 +31,7 @@ struct ObDropTableArg; struct ObDropIndexArg; struct ObTruncateTableArg; struct ObCreateIndexArg; +struct ObIndexArg; } namespace sql { @@ -41,6 +42,8 @@ namespace storage { class ObTabletHandle; class ObLSHandle; +struct ObStorageColumnGroupSchema; +class ObCOSSTableV2; } namespace share { @@ -89,6 +92,7 @@ enum ObDDLType DDL_TABLE_RESTORE = 1013, // table restore DDL_MVIEW_COMPLETE_REFRESH = 1014, DDL_CREATE_MVIEW = 1015, + DDL_ALTER_COLUMN_GROUP = 1016, // alter table add/drop column group // @note new normal ddl type to be defined here !!! DDL_NORMAL_TYPE = 10001, @@ -143,6 +147,16 @@ enum ObDDLTaskStatus { SUCCESS = 100 }; +enum SortCompactLevel +{ + SORT_DEFAULT_LEVEL = 0, + SORT_COMPACT_LEVEL = 1, + SORT_ENCODE_LEVEL = 2, + SORT_COMPRESSION_LEVEL = 3, + SORT_COMPRESSION_COMPACT_LEVEL = 4, + SORT_COMPRESSION_ENCODE_LEVEL = 5 +}; + static const char* ddl_task_status_to_str(const ObDDLTaskStatus &task_status) { const char *str = nullptr; switch(task_status) { @@ -375,7 +389,8 @@ public: const bool use_heap_table_ddl_plan, const bool use_schema_version_hint_for_src_table, const ObColumnNameMap *col_name_map, - ObSqlString &sql_string); + ObSqlString &sql_string, + const share::SortCompactLevel compact_level = share::SORT_DEFAULT_LEVEL); static int generate_build_mview_replica_sql( const uint64_t tenant_id, @@ -422,7 +437,7 @@ public: storage::ObLSHandle &ls_handle, const ObTabletID &tablet_id, storage::ObTabletHandle &tablet_handle, - const storage::ObMDSGetTabletMode mode = storage::ObMDSGetTabletMode::READ_READABLE_COMMITED); + const storage::ObMDSGetTabletMode mode = storage::ObMDSGetTabletMode::READ_WITHOUT_CHECK); static int clear_ddl_checksum(sql::ObPhysicalPlan *phy_plan); @@ -467,10 +482,12 @@ public: static int64_t get_default_ddl_rpc_timeout(); static int64_t get_default_ddl_tx_timeout(); - static int get_data_format_version( + static int get_data_information( const uint64_t tenant_id, const uint64_t task_id, - int64_t &data_format_version); + uint64_t &data_format_version, + int64_t &snapshot_version, + share::ObDDLTaskStatus &task_status); static int replace_user_tenant_id( const ObDDLType &ddl_type, @@ -549,6 +566,28 @@ private: uint64_t &table_id); }; +class ObCODDLUtil +{ +public: + static int need_column_group_store(const storage::ObStorageSchema &table_schema, bool &need_column_group); + static int need_column_group_store(const schema::ObTableSchema &table_schema, bool &need_column_group); + + static int get_base_cg_idx( + const storage::ObStorageSchema *storage_schema, + int64_t &base_cg_idx); + + static int get_column_checksums( + const storage::ObCOSSTableV2 *co_sstable, + const storage::ObStorageSchema *storage_schema, + ObIArray &column_checksums); + + static int is_rowkey_based_co_sstable( + const storage::ObCOSSTableV2 *co_sstable, + const storage::ObStorageSchema *storage_schema, + bool &is_rowkey_based); +}; + + class ObCheckTabletDataComplementOp { diff --git a/src/share/ob_ddl_error_message_table_operator.cpp b/src/share/ob_ddl_error_message_table_operator.cpp index 677a9637d..eedee9690 100644 --- a/src/share/ob_ddl_error_message_table_operator.cpp +++ b/src/share/ob_ddl_error_message_table_operator.cpp @@ -16,6 +16,7 @@ #include "share/inner_table/ob_inner_table_schema.h" #include "share/ob_get_compat_mode.h" #include "share/schema/ob_schema_utils.h" +#include "share/schema/ob_table_param.h" #include "share/ob_ddl_sim_point.h" using namespace oceanbase::share; @@ -103,7 +104,7 @@ int ObDDLErrorMessageTableOperator::get_index_task_info( } int ObDDLErrorMessageTableOperator::extract_index_key(const ObTableSchema &index_schema, - const ObStoreRowkey &index_key, char *buffer, const int64_t buffer_len) + const blocksstable::ObDatumRowkey &index_key, char *buffer, const int64_t buffer_len) { int ret = OB_SUCCESS; if (!index_schema.is_valid() || !index_key.is_valid() || OB_ISNULL(buffer) || buffer_len <= 0) { @@ -112,33 +113,33 @@ int ObDDLErrorMessageTableOperator::extract_index_key(const ObTableSchema &index } else { const int64_t index_size = index_schema.get_index_column_num(); int64_t pos = 0; - int64_t valid_index_size = 0; - uint64_t column_id = OB_INVALID_ID; MEMSET(buffer, 0, buffer_len); - for (int64_t i = 0; OB_SUCC(ret) && i < index_size; i++) { - if (OB_FAIL(index_schema.get_index_info().get_column_id(i, column_id))) { + const ObRowkeyColumn *column = index_schema.get_index_info().get_column(i); + if (OB_ISNULL(column)) { + ret = OB_ERR_UNEXPECTED; LOG_WARN("Failed to get index column description", K(i), K(ret)); - } else if (column_id <= OB_MIN_SHADOW_COLUMN_ID) { - valid_index_size ++; - } - } - for (int64_t i = 0; OB_SUCC(ret) && i < valid_index_size; ++i) { - const ObObj &obj = index_key.get_obj_ptr()[i]; - if (OB_FAIL(obj.print_plain_str_literal(buffer, buffer_len, pos))) { - LOG_WARN("fail to print_plain_str_literal", K(ret), KP(buffer)); - } else if (i < valid_index_size - 1) { - if (OB_FAIL(databuff_printf(buffer, buffer_len, pos, "-"))) { + } else if (IS_SHADOW_COLUMN(column->column_id_)) { + break; + } else { + const blocksstable::ObStorageDatum &datum = index_key.get_datum(i); + ObObj obj; + if (OB_FAIL(datum.to_obj(obj, column->get_meta_type()))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(obj.print_plain_str_literal(buffer, buffer_len, pos))) { + LOG_WARN("fail to print_plain_str_literal", K(ret), KP(buffer)); + } else if (OB_FAIL(databuff_printf(buffer, buffer_len, pos, "-"))) { LOG_WARN("databuff print failed", K(ret)); } } } - if (buffer != nullptr) { - buffer[pos++] = '\0'; - if (OB_SIZE_OVERFLOW == ret) { - LOG_WARN("the index key length is larger than OB_TMP_BUF_SIZE_256", K(index_key), KP(buffer)); - ret = OB_SUCCESS; - } + if (OB_SUCC(ret) && pos > 0) { + buffer[pos - 1] = '\0'; // overwrite the tail '-' + } + if (OB_SIZE_OVERFLOW == ret) { + buffer[buffer_len - 1] = '\0'; + LOG_WARN("the index key length is larger than OB_TMP_BUF_SIZE_256", K(index_key), KP(buffer)); + ret = OB_SUCCESS; } } diff --git a/src/share/ob_ddl_error_message_table_operator.h b/src/share/ob_ddl_error_message_table_operator.h index be3b91f3e..2cc7c65c5 100644 --- a/src/share/ob_ddl_error_message_table_operator.h +++ b/src/share/ob_ddl_error_message_table_operator.h @@ -21,6 +21,7 @@ #include "share/schema/ob_schema_struct.h" #include "share/ob_ddl_common.h" #include "share/schema/ob_table_schema.h" +#include "storage/blocksstable/ob_datum_rowkey.h" namespace oceanbase { @@ -87,7 +88,7 @@ public: ObDDLErrorMessageTableOperator(); virtual ~ObDDLErrorMessageTableOperator(); static int get_index_task_info(ObMySQLProxy &sql_proxy, const share::schema::ObTableSchema &index_schema, ObDDLErrorInfo &info); - static int extract_index_key(const share::schema::ObTableSchema &index_schema, const common::ObStoreRowkey &index_key, + static int extract_index_key(const share::schema::ObTableSchema &index_schema, const blocksstable::ObDatumRowkey &index_key, char *buffer, const int64_t buffer_len); static int load_ddl_user_error(const uint64_t tenant_id, const int64_t task_id, const uint64_t table_id, common::ObMySQLProxy &sql_proxy, ObBuildDDLErrorMessage &error_message); diff --git a/src/share/ob_ddl_task_executor.h b/src/share/ob_ddl_task_executor.h index fba81e987..96ff99470 100644 --- a/src/share/ob_ddl_task_executor.h +++ b/src/share/ob_ddl_task_executor.h @@ -77,7 +77,7 @@ private: || common::OB_TRANS_NEED_ROLLBACK == ret_code || common::OB_RPC_SEND_ERROR == ret_code || common::OB_RPC_CONNECT_ERROR == ret_code || common::OB_RPC_POST_ERROR == ret_code || common::OB_TRANS_ROLLBACKED == ret_code || common::OB_TRANS_KILLED == ret_code || common::OB_GET_LOCATION_TIME_OUT == ret_code || common::OB_TRANS_RPC_TIMEOUT == ret_code || common::OB_LIBEASY_ERROR == ret_code - || common::OB_TRANS_CTX_NOT_EXIST == ret_code; + || common::OB_TRANS_CTX_NOT_EXIST == ret_code || OB_ERR_SESSION_INTERRUPTED == ret_code; } static bool is_retry(const int ret_code) { return common::OB_EAGAIN == ret_code || common::OB_DDL_SCHEMA_VERSION_NOT_MATCH == ret_code || common::OB_TASK_EXPIRED == ret_code || common::OB_NEED_RETRY == ret_code diff --git a/src/share/ob_errno.cpp b/src/share/ob_errno.cpp index 6d7b6a229..e5868f593 100644 --- a/src/share/ob_errno.cpp +++ b/src/share/ob_errno.cpp @@ -1924,10 +1924,10 @@ static const _error _error_OB_COLUMN_GROUP_NOT_FOUND = { .mysql_errno = -1, .sqlstate = "HY000", .str_error = "Column group not found", - .str_user_error = "Column group not found", + .str_user_error = "Column group \'%.*s\' not found", .oracle_errno = 600, .oracle_str_error = "ORA-00600: internal error code, arguments: -4185, Column group not found", - .oracle_str_user_error = "ORA-00600: internal error code, arguments: -4185, Column group not found" + .oracle_str_user_error = "ORA-00600: internal error code, arguments: -4185, Column grou \'%.*s\' not found" }; static const _error _error_OB_CS_COMPRESS_LIB_ERROR = { .error_name = "OB_CS_COMPRESS_LIB_ERROR", diff --git a/src/share/ob_errno.def b/src/share/ob_errno.def index 338b4613a..589b14916 100755 --- a/src/share/ob_errno.def +++ b/src/share/ob_errno.def @@ -253,7 +253,7 @@ DEFINE_ERROR_DEP(OB_ERR_ALREADY_EXISTS, -4181, -1, "42S01", "Already exist"); DEFINE_ERROR_DEP(OB_SEARCH_NOT_FOUND, -4182, -1, "HY000", "Value not found"); DEFINE_ERROR(OB_BEYOND_THE_RANGE, -4183, -1, "HY000", "Key out of range"); DEFINE_ERROR(OB_SERVER_OUTOF_DISK_SPACE, -4184, -1, "53100", "Server out of disk space"); -DEFINE_ERROR(OB_COLUMN_GROUP_NOT_FOUND, -4185, -1, "HY000", "Column group not found"); +DEFINE_ERROR(OB_COLUMN_GROUP_NOT_FOUND, -4185, -1, "HY000", "Column group not found", "Column group \'%.*s\' not found"); DEFINE_ERROR(OB_CS_COMPRESS_LIB_ERROR, -4186, -1, "HY000", "Server failed to get compress library"); DEFINE_ERROR_DEP(OB_ITEM_NOT_MATCH, -4187, -1, "HY000", "Item not match"); DEFINE_ERROR(OB_SCHEDULER_TASK_CNT_MISMATCH, -4188, -1, "HY000", "Running task cnt and unfinished task cnt not consistent"); diff --git a/src/share/ob_errno.h b/src/share/ob_errno.h index cfe241488..b93bf00e3 100644 --- a/src/share/ob_errno.h +++ b/src/share/ob_errno.h @@ -2005,7 +2005,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_SEARCH_NOT_FOUND__USER_ERROR_MSG "Value not found" #define OB_BEYOND_THE_RANGE__USER_ERROR_MSG "Key out of range" #define OB_SERVER_OUTOF_DISK_SPACE__USER_ERROR_MSG "Server out of disk space" -#define OB_COLUMN_GROUP_NOT_FOUND__USER_ERROR_MSG "Column group not found" +#define OB_COLUMN_GROUP_NOT_FOUND__USER_ERROR_MSG "Column group \'%.*s\' not found" #define OB_CS_COMPRESS_LIB_ERROR__USER_ERROR_MSG "Server failed to get compress library" #define OB_ITEM_NOT_MATCH__USER_ERROR_MSG "Item not match" #define OB_SCHEDULER_TASK_CNT_MISMATCH__USER_ERROR_MSG "Running task cnt and unfinished task cnt not consistent" @@ -4215,7 +4215,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_SEARCH_NOT_FOUND__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4182, Value not found" #define OB_BEYOND_THE_RANGE__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4183, Key out of range" #define OB_SERVER_OUTOF_DISK_SPACE__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4184, Server out of disk space" -#define OB_COLUMN_GROUP_NOT_FOUND__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4185, Column group not found" +#define OB_COLUMN_GROUP_NOT_FOUND__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4185, Column group %.*s not found" #define OB_CS_COMPRESS_LIB_ERROR__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4186, Server failed to get compress library" #define OB_ITEM_NOT_MATCH__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4187, Item not match" #define OB_SCHEDULER_TASK_CNT_MISMATCH__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -4188, Running task cnt and unfinished task cnt not consistent" diff --git a/src/share/ob_rpc_struct.cpp b/src/share/ob_rpc_struct.cpp index e1f3d07df..bffb15168 100755 --- a/src/share/ob_rpc_struct.cpp +++ b/src/share/ob_rpc_struct.cpp @@ -2960,7 +2960,8 @@ DEF_TO_STRING(ObIndexArg) K_(index_name), K_(table_name), K_(database_name), - K_(index_action_type)); + K_(index_action_type), + K_(compact_level)); J_OBJ_END(); return pos; } @@ -2971,7 +2972,8 @@ OB_SERIALIZE_MEMBER((ObIndexArg, ObDDLArg), table_name_, database_name_, index_action_type_, - session_id_); + session_id_, + compact_level_); bool ObCreateIndexArg::is_valid() const { @@ -2983,6 +2985,17 @@ bool ObCreateIndexArg::is_valid() const && index_using_type_ >= USING_BTREE && index_using_type_ < USING_TYPE_MAX; } +OB_SERIALIZE_MEMBER(ObCreateIndexArg::ObIndexColumnGroupItem, is_each_cg_, column_list_); + +int ObCreateIndexArg::ObIndexColumnGroupItem::assign(const ObCreateIndexArg::ObIndexColumnGroupItem &other) +{ + int ret = OB_SUCCESS; + is_each_cg_ = other.is_each_cg_; + if (OB_FAIL(column_list_.assign(other.column_list_))) { + LOG_WARN("fail to assign array", K(ret)); + } + return ret; +} DEF_TO_STRING(ObCreateIndexArg) { @@ -3008,7 +3021,9 @@ DEF_TO_STRING(ObCreateIndexArg) K_(nls_timestamp_tz_format), K_(sql_mode), K_(inner_sql_exec_addr), - K_(local_session_var)); + K_(local_session_var), + K_(exist_all_column_group), + K_(index_cgs)); J_OBJ_END(); return pos; } @@ -3032,7 +3047,9 @@ OB_SERIALIZE_MEMBER((ObCreateIndexArg, ObIndexArg), nls_timestamp_tz_format_, sql_mode_, inner_sql_exec_addr_, - local_session_var_); + local_session_var_, + exist_all_column_group_, + index_cgs_); bool ObAlterIndexArg::is_valid() const { @@ -5298,6 +5315,24 @@ bool ObUpdateIndexStatusArg::is_valid() const && status_ < INDEX_STATUS_MAX; } +int ObUpdateIndexStatusArg::assign(const ObUpdateIndexStatusArg &other_arg) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObDDLArg::assign(other_arg))) { + LOG_WARN("assign other arg failed", K(ret)); + } else { + index_table_id_ = other_arg.index_table_id_; + status_ = other_arg.status_; + convert_status_ = other_arg.convert_status_; + in_offline_ddl_white_list_ = other_arg.in_offline_ddl_white_list_; + data_table_id_ = other_arg.data_table_id_; + database_name_ = other_arg.database_name_; + task_id_ = other_arg.task_id_; + error_code_ = other_arg.error_code_; + } + return ret; +} + int ObUpdateMViewStatusArg::assign(const ObUpdateMViewStatusArg &other) { int ret = OB_SUCCESS; @@ -5326,8 +5361,10 @@ OB_SERIALIZE_MEMBER((ObUpdateIndexStatusArg, ObDDLArg), status_, convert_status_, in_offline_ddl_white_list_, - data_table_id_, - database_name_); + data_table_id_, + database_name_, + task_id_, + error_code_); OB_SERIALIZE_MEMBER((ObUpdateMViewStatusArg, ObDDLArg), mview_table_id_, diff --git a/src/share/ob_rpc_struct.h b/src/share/ob_rpc_struct.h index 10d452603..2322221dc 100755 --- a/src/share/ob_rpc_struct.h +++ b/src/share/ob_rpc_struct.h @@ -1254,6 +1254,7 @@ public: common::ObString table_name_; common::ObString database_name_; IndexActionType index_action_type_; + share::SortCompactLevel compact_level_; ObIndexArg(): ObDDLArg(), @@ -1262,7 +1263,8 @@ public: index_name_(), table_name_(), database_name_(), - index_action_type_(INVALID_ACTION) + index_action_type_(INVALID_ACTION), + compact_level_(share::SORT_COMPACT_LEVEL) {} virtual ~ObIndexArg() {} void reset() @@ -1273,6 +1275,7 @@ public: table_name_.reset(); database_name_.reset(); index_action_type_ = INVALID_ACTION; + compact_level_ = share::SORT_COMPACT_LEVEL; ObDDLArg::reset(); } bool is_valid() const; @@ -1288,6 +1291,7 @@ public: table_name_ = other.table_name_; database_name_ = other.database_name_; index_action_type_ = other.index_action_type_; + compact_level_ = other.compact_level_; } return ret; } @@ -2376,7 +2380,7 @@ public: DECLARE_TO_STRING; }; -struct ObColumnSortItem +struct ObColumnSortItem final { OB_UNIS_VERSION(1); public: @@ -2502,7 +2506,9 @@ public: sql_mode_(0), inner_sql_exec_addr_(), allocator_(), - local_session_var_(&allocator_) + local_session_var_(&allocator_), + exist_all_column_group_(false), + index_cgs_() { index_action_type_ = ADD_INDEX; index_using_type_ = share::schema::USING_BTREE; @@ -2532,6 +2538,8 @@ public: inner_sql_exec_addr_.reset(); local_session_var_.reset(); allocator_.reset(); + exist_all_column_group_ = false; + index_cgs_.reset(); } void set_index_action_type(const IndexActionType type) { index_action_type_ = type; } bool is_valid() const; @@ -2547,6 +2555,8 @@ public: SHARE_LOG(WARN, "fail to assign hidden store columns", K(ret)); } else if (OB_FAIL(fulltext_columns_.assign(other.fulltext_columns_))) { SHARE_LOG(WARN, "fail to assign fulltext columns", K(ret)); + } else if (OB_FAIL(index_cgs_.assign(other.index_cgs_))) { + SHARE_LOG(WARN, "fail to assign index cgs", K(ret)); } else if (OB_FAIL(index_schema_.assign(other.index_schema_))) { SHARE_LOG(WARN, "fail to assign index schema", K(ret)); } else if (OB_FAIL(local_session_var_.deep_copy(other.local_session_var_))){ @@ -2566,6 +2576,7 @@ public: sql_mode_ = other.sql_mode_; inner_sql_exec_addr_ = other.inner_sql_exec_addr_; consumer_group_id_ = other.consumer_group_id_; + exist_all_column_group_ = other.exist_all_column_group_; } return ret; } @@ -2581,6 +2592,38 @@ public: || share::schema::INDEX_TYPE_SPATIAL_GLOBAL == index_type_ || share::schema::INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type_; } +//todo @qilu:only for each_cg now, when support customized cg ,refine this + typedef common::ObSEArray ObCGColumnList; + struct ObIndexColumnGroupItem final + { + OB_UNIS_VERSION(1); + public: + ObIndexColumnGroupItem() : is_each_cg_(false), column_list_() + {} + ObIndexColumnGroupItem(const bool is_each_cg) : is_each_cg_(is_each_cg), column_list_() + {} + ~ObIndexColumnGroupItem() + { + reset(); + } + bool is_valid() const + { + return is_each_cg_; + } + void reset() + { + is_each_cg_ = false; + column_list_.reset(); + } + int assign(const ObIndexColumnGroupItem &other); + TO_STRING_KV(K(is_each_cg_), K(column_list_)); + + public: + bool is_each_cg_; + ObCGColumnList column_list_; + }; + +public: share::schema::ObIndexType index_type_; common::ObSEArray index_columns_; common::ObSEArray store_columns_; @@ -2602,7 +2645,8 @@ public: common::ObAddr inner_sql_exec_addr_; common::ObArenaAllocator allocator_; ObLocalSessionVar local_session_var_; - + bool exist_all_column_group_; + common::ObSEArray index_cgs_; }; typedef ObCreateIndexArg ObAlterPrimaryArg; @@ -6051,13 +6095,16 @@ public: convert_status_(true), in_offline_ddl_white_list_(false), data_table_id_(common::OB_INVALID_ID), - database_name_() + database_name_(), + task_id_(0), + error_code_(OB_SUCCESS) {} bool is_valid() const; virtual bool is_allow_when_disable_ddl() const; virtual bool is_allow_when_upgrade() const { return true; } virtual bool is_in_offline_ddl_white_list() const { return in_offline_ddl_white_list_; } - TO_STRING_KV(K_(index_table_id), K_(status), K_(convert_status), K_(in_offline_ddl_white_list), K_(data_table_id), K_(database_name)); + int assign(const ObUpdateIndexStatusArg &other_arg); + TO_STRING_KV(K_(index_table_id), K_(status), K_(convert_status), K_(in_offline_ddl_white_list), K_(task_id), K_(error_code), K_(data_table_id), K_(database_name)); uint64_t index_table_id_; share::schema::ObIndexStatus status_; @@ -6065,6 +6112,8 @@ public: bool in_offline_ddl_white_list_; uint64_t data_table_id_; ObString database_name_; + int64_t task_id_; + int error_code_; }; struct ObUpdateMViewStatusArg : public ObDDLArg @@ -9306,7 +9355,7 @@ public: int64_t parallelism_; int64_t execution_id_; int64_t tablet_task_id_; - int64_t data_format_version_; + uint64_t data_format_version_; int64_t consumer_group_id_; uint64_t dest_tenant_id_; share::ObLSID dest_ls_id_; diff --git a/src/share/ob_tablet_autoincrement_param.cpp b/src/share/ob_tablet_autoincrement_param.cpp index a0a4f0bbf..9abc8eb7f 100644 --- a/src/share/ob_tablet_autoincrement_param.cpp +++ b/src/share/ob_tablet_autoincrement_param.cpp @@ -41,6 +41,16 @@ int ObTabletCacheInterval::next_value(uint64_t &next_value) return ret; } +int ObTabletCacheInterval::get_value(uint64_t &value) +{ + int ret = OB_SUCCESS; + value = next_value_; + if (value + 1 > end_) { + ret = OB_EAGAIN; + } + return ret; +} + int ObTabletCacheInterval::fetch(uint64_t count, ObTabletCacheInterval &dest) { int ret = OB_SUCCESS; diff --git a/src/share/ob_tablet_autoincrement_param.h b/src/share/ob_tablet_autoincrement_param.h index fd60bf1c6..b10e16274 100644 --- a/src/share/ob_tablet_autoincrement_param.h +++ b/src/share/ob_tablet_autoincrement_param.h @@ -106,8 +106,10 @@ public: TO_STRING_KV(K_(tablet_id), K_(start), K_(end), K_(cache_size), K_(next_value), K_(task_id)); void set(uint64_t start, uint64_t end); int next_value(uint64_t &next_value); + int get_value(uint64_t &value); int fetch(uint64_t count, ObTabletCacheInterval &dest); uint64_t count() const { return end_ - start_ + 1; } + uint64_t remain_count() const { return end_ - next_value_ + 1; } bool operator <(const ObTabletCacheInterval &other) { return tablet_id_ < other.tablet_id_; } public: common::ObTabletID tablet_id_; diff --git a/src/share/parameter/ob_parameter_seed.ipp b/src/share/parameter/ob_parameter_seed.ipp index 2b71b8a0e..977f8ba7d 100644 --- a/src/share/parameter/ob_parameter_seed.ipp +++ b/src/share/parameter/ob_parameter_seed.ipp @@ -1728,6 +1728,9 @@ TEMP_DEF_INT(v4.3, encoding_test_seed, OB_CLUSTER_PARAMETER, "0", "[0,)" TEMP_DEF_BOOL(v4.3, enable_table_without_all_cg, OB_TENANT_PARAMETER, "True", "enables creating table without all column_group. The default value is False.", ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +TEMP_DEF_BOOL(v4.3, enable_store_compression, OB_TENANT_PARAMETER, "False", + "enable compression in ObTempBlockStore", + ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); DEF_BOOL(_enable_prefetch_limiting, OB_TENANT_PARAMETER, "False", "enable limiting memory in prefetch for single query", ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); @@ -1785,6 +1788,8 @@ DEF_STR_WITH_CHECKER(sql_protocol_min_tls_version, OB_CLUSTER_PARAMETER, "none", DEF_MODE_WITH_PARSER(_obkv_feature_mode, OB_CLUSTER_PARAMETER, "", common::ObKvFeatureModeParser, "_obkv_feature_mode is a option list to control specified OBKV features on/off.", ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); + + DEF_BOOL(_enable_optimizer_qualify_filter, OB_TENANT_PARAMETER, "True", "Enable extracting qualify filters for window function", ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); diff --git a/src/share/rc/ob_tenant_base.h b/src/share/rc/ob_tenant_base.h index 98d544fdf..261bee816 100755 --- a/src/share/rc/ob_tenant_base.h +++ b/src/share/rc/ob_tenant_base.h @@ -91,6 +91,7 @@ class ObTenantMdsService; class ObTableScanIterator; class ObTenantSnapshotService; class ObTenantCGReadInfoMgr; + class ObTenantDirectLoadMgr; class ObEmptyReadBucket; class ObTabletMemtableMgrPool; } // namespace storage @@ -339,6 +340,7 @@ using ObTableScanIteratorObjPool = common::ObServerObjectPool(sizeof(OB_COLUMN_GROUP_NAME_PREFIX)), + OB_COLUMN_GROUP_NAME_PREFIX, column_name_.length(), column_name_.ptr()); + if (write_len < 0 || write_len >= OB_MAX_COLUMN_GROUP_NAME_LENGTH) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to format column group_name", K(ret), K(write_len)); + } + + if (OB_SUCC(ret)) { + if (cg_name.write(tmp_cg_name, write_len) != write_len) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to write column group name to str", K(ret), K(cg_name), K(write_len)); + } + } + return ret; +} } //end of namespace schema } //end of namespace share diff --git a/src/share/schema/ob_column_schema.h b/src/share/schema/ob_column_schema.h index f494f9430..ede2fb630 100644 --- a/src/share/schema/ob_column_schema.h +++ b/src/share/schema/ob_column_schema.h @@ -326,6 +326,7 @@ int assign(const ObColumnSchemaV2 &other); return ret; } + int get_each_column_group_name(ObString &cg_name) const; inline ObLocalSessionVar &get_local_session_var() { return local_session_vars_; } inline const ObLocalSessionVar &get_local_session_var() const { return local_session_vars_; } diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index aec04ce99..b28ac7317 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -153,7 +153,7 @@ int ObSchemaPrinter::print_table_definition(const uint64_t tenant_id, SHARE_SCHEMA_LOG(WARN, "fail to print table options", K(ret), K(*table_schema)); } else if (OB_FAIL(print_table_definition_partition_options(*table_schema, buf, buf_len, pos, agent_mode, tz_info))) { SHARE_SCHEMA_LOG(WARN, "fail to print partition options", K(ret), K(*table_schema)); - } else if (OB_FAIL(print_table_definition_column_group(*table_schema, buf, buf_len, pos))) { + } else if ((!strict_compat_) && OB_FAIL(print_table_definition_column_group(*table_schema, buf, buf_len, pos))) { SHARE_SCHEMA_LOG(WARN, "fail to print column_group", K(ret), K(*table_schema)); } else if (OB_FAIL(print_table_definition_on_commit_options(*table_schema, buf, buf_len, pos))) { SHARE_SCHEMA_LOG(WARN, "fail to print on commit options", K(ret), K(*table_schema)); @@ -709,6 +709,14 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc SHARE_SCHEMA_LOG(WARN, "fail to print partition info for index", K(ret), KPC(index_schema)); } } + + // print column group info + if (OB_FAIL(ret)) { + } else if (strict_compat_) { + /* strict mode skip*/ + } else if (OB_FAIL(print_table_definition_column_group(*index_schema, buf, buf_len, pos))) { + LOG_WARN("fail to print column group info", K(ret)); + } } } } @@ -5243,17 +5251,61 @@ int ObSchemaPrinter::print_table_definition_column_group(const ObTableSchema &ta int64_t &pos) const { int ret = OB_SUCCESS; - bool has_all_column_group = false; - if (table_schema.get_column_group_count() <= 1) { - } else if (OB_FAIL(table_schema.has_all_column_group(has_all_column_group))) { - SHARE_SCHEMA_LOG(WARN, "fail to check row store", K(ret)); - } else if (has_all_column_group) { - if (OB_FAIL(databuff_printf(buf, buf_len, pos, " WITH COLUMN GROUP FOR all columns, each column "))) { - SHARE_SCHEMA_LOG(WARN, "fail to print column group", K(ret)); - } + ObTableSchema::const_column_group_iterator iter_begin = table_schema.column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = table_schema.column_group_end(); + int64_t print_cg_cnt = 0; + bool is_each_cg_exist = false; + bool is_all_cg_exist = false; + + if (table_schema.get_column_group_count() <= 1){ + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, " WITH COLUMN GROUP("))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column group", K(ret)); + } else if (OB_FAIL(table_schema.is_column_group_exist(OB_EACH_COLUMN_GROUP_NAME, is_each_cg_exist))) { + LOG_WARN("fail to check is each column group exist", K(ret)); + } else if (OB_FAIL(table_schema.is_column_group_exist(OB_ALL_COLUMN_GROUP_NAME, is_all_cg_exist))) { + LOG_WARN("fail to check is all column group exist", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (!is_all_cg_exist) { + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "all columns"))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column group", K(ret)); } else { - if (OB_FAIL(databuff_printf(buf, buf_len, pos, " WITH COLUMN GROUP FOR each column "))) { - SHARE_SCHEMA_LOG(WARN, "fail to print column group", K(ret)); + print_cg_cnt += 1; + } + + if (OB_FAIL(ret)) { + } else if (!(is_all_cg_exist && is_each_cg_exist)) { + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, ", "))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column group", K(ret)); + } + + if (OB_FAIL(ret)){ + } else if (!is_each_cg_exist) { + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "each column"))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column group", K(ret)); + } else { + print_cg_cnt += 1; + } + + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + const ObColumnGroupSchema *column_group = *iter_begin; + if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + SHARE_SCHEMA_LOG(WARN, "column group should not be null", K(ret), K(table_schema)); + } else { + ObColumnGroupType cg_type = column_group->get_column_group_type(); + if (cg_type >= ObColumnGroupType::NORMAL_COLUMN_GROUP ) { + ret = OB_NOT_SUPPORTED; + SHARE_SCHEMA_LOG(WARN, "column group type not supported", K(ret), K(table_schema), KPC(column_group)); + } else { + /* skip, all/each cg check already, default and rowkey not need to be print*/ + } + } + } + if (OB_SUCC(ret) && print_cg_cnt > 0) { + if (OB_FAIL(databuff_printf(buf, buf_len, pos, ")"))) { + SHARE_SCHEMA_LOG(WARN,"fail to print column group", K(ret)); } } return ret; diff --git a/src/share/schema/ob_schema_retrieve_utils.ipp b/src/share/schema/ob_schema_retrieve_utils.ipp index 01c0b70ee..ba103b241 100644 --- a/src/share/schema/ob_schema_retrieve_utils.ipp +++ b/src/share/schema/ob_schema_retrieve_utils.ipp @@ -1501,7 +1501,7 @@ int ObSchemaRetrieveUtils::fill_table_schema( int64_t, true/*skip null error*/, ignore_column_error, OB_DEFAULT_LOB_INROW_THRESHOLD); // field for column_group EXTRACT_INT_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE(result, max_used_column_group_id, table_schema, - uint64_t, true, true/*ignore_column_error*/, DEFAULT_TYPE_COLUMN_GROUP_ID); + uint64_t, true, true/*ignore_column_error*/, COLUMN_GROUP_START_ID); EXTRACT_INT_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE(result, column_store, table_schema, bool, true, true/*ignore_column_error*/, false); } diff --git a/src/share/schema/ob_schema_service.h b/src/share/schema/ob_schema_service.h old mode 100644 new mode 100755 index 41f27eef9..3d9817cc4 --- a/src/share/schema/ob_schema_service.h +++ b/src/share/schema/ob_schema_service.h @@ -116,7 +116,9 @@ enum ObSchemaOperationCategory ACT(OB_DDL_TRUNCATE_TABLE, = 59) \ ACT(OB_DDL_RENAME_PARTITION, = 60) \ ACT(OB_DDL_RENAME_SUB_PARTITION, = 61) \ - ACT(OB_DDL_MODIFY_MATERIALIZED_VIEW_STATUS, = 62) \ + ACT(OB_DDL_MODIFY_MATERIALIZED_VIEW_STATUS, = 62) \ + ACT(OB_DDL_ADD_COLUMN_GROUP, = 63) \ + ACT(OB_DDL_DROP_COLUMN_GROUP, = 64) \ ACT(OB_DDL_TABLE_OPERATION_END, = 100) \ ACT(OB_DDL_TENANT_OPERATION_BEGIN, = 101) \ ACT(OB_DDL_ADD_TENANT,) \ diff --git a/src/share/schema/ob_schema_service_sql_impl.cpp b/src/share/schema/ob_schema_service_sql_impl.cpp index 5a8aa8e59..26e05629e 100644 --- a/src/share/schema/ob_schema_service_sql_impl.cpp +++ b/src/share/schema/ob_schema_service_sql_impl.cpp @@ -1664,10 +1664,9 @@ int ObSchemaServiceSQLImpl::fetch_all_column_group_schema( LOG_WARN("fail to append sql", KR(ret), K(schema_version)); } else if (OB_FAIL(sql.append_fmt(" ORDER BY TENANT_ID DESC, TABLE_ID DESC, COLUMN_GROUP_ID ASC"))) { LOG_WARN("fail to append sql", KR(ret)); - } else if (is_history && OB_FAIL(sql.append_fmt(", SCHEMA_VERSION DESC"))) { + } else if (is_history && OB_FAIL(sql.append_fmt(", SCHEMA_VERSION ASC"))) { LOG_WARN("fail to append sql", KR(ret)); } - if (OB_SUCC(ret)) { SMART_VAR(ObMySQLProxy::MySQLResult, res) { ObMySQLResult *result = NULL; diff --git a/src/share/schema/ob_schema_struct.cpp b/src/share/schema/ob_schema_struct.cpp index 7ac03f11b..e5eb1f9c7 100644 --- a/src/share/schema/ob_schema_struct.cpp +++ b/src/share/schema/ob_schema_struct.cpp @@ -13911,6 +13911,30 @@ int ObColumnGroupSchema::remove_column_id(const uint64_t column_id) return ret; } +void ObColumnGroupSchema::remove_all_cols() { + column_id_cnt_ = 0; + MEMSET(column_id_arr_, 0, sizeof(uint64_t) * column_id_arr_capacity_); +} + +int ObColumnGroupSchema::get_column_group_type_name(ObString &readable_cg_name) const +{ + int ret = OB_SUCCESS; + if (column_group_type_ > ObColumnGroupType::NORMAL_COLUMN_GROUP || + column_group_type_ < ObColumnGroupType::DEFAULT_COLUMN_GROUP) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("receive not suppoted column group type", K(ret), K(column_group_type_)); + } else { + /* use column group type as index, and check whether out of range*/ + const char* readable_name = OB_COLUMN_GROUP_TYPE_NAME[column_group_type_]; + const int32_t readable_name_len = static_cast(strlen(readable_name)); + if (readable_name_len != readable_cg_name.write(readable_name, readable_name_len)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to wriet column group name, check whether buffer size enough", K(ret), K(readable_cg_name)); + } + } + return ret; +} + OB_DEF_SERIALIZE(ObSkipIndexColumnAttr) { int ret = OB_SUCCESS; diff --git a/src/share/schema/ob_schema_struct.h b/src/share/schema/ob_schema_struct.h index 3fc00de89..3d883112e 100755 --- a/src/share/schema/ob_schema_struct.h +++ b/src/share/schema/ob_schema_struct.h @@ -7583,6 +7583,15 @@ enum ObSAuditOperationType : uint64_t //privilege .... AUDIT_OP_MAX }; + +enum ObStorageEncodingMode : uint64_t +{ + ROW_ENCODING_COL_CSENCODIGN = 0, + ALL_ENCODING, + ALL_CSENCODING, + MAX_ENCODING +}; + const char *get_audit_operation_type_str(const ObSAuditOperationType type); int get_operation_type_from_item_type(const bool is_stmt_audit, @@ -8345,12 +8354,19 @@ enum ObColumnGroupType : uint8_t NORMAL_COLUMN_GROUP, MAX_COLUMN_GROUP }; - +const char OB_COLUMN_GROUP_TYPE_NAME[][OB_MAX_COLUMN_NAME_LENGTH] = +{ + "default column group", + "all column group", + "rowkey column group", + "each column group" +}; const char *const OB_COLUMN_GROUP_NAME_PREFIX = "__cg"; const char *const OB_ROWKEY_COLUMN_GROUP_NAME = "__co_rowkey"; const char *const OB_DEFAULT_COLUMN_GROUP_NAME = "__co_default"; const char *const OB_ALL_COLUMN_GROUP_NAME = "__co_all"; +const char *const OB_EACH_COLUMN_GROUP_NAME = "__cg_each"; /* cannot be used on single column group name*/ class ObColumnGroupSchemaHashWrapper { public: @@ -8390,6 +8406,7 @@ public: int64_t get_convert_size() const; void reset(); bool is_valid() const; + void remove_all_cols(); inline void set_column_group_id(const uint64_t id) { column_group_id_ = id; } inline void set_column_group_type(const ObColumnGroupType &type) { column_group_type_ = type; } @@ -8416,6 +8433,7 @@ public: int add_column_id(const uint64_t column_id); int get_column_id(const int64_t idx, uint64_t &column_id) const; int remove_column_id(const uint64_t column_id); + int get_column_group_type_name(ObString &readable_cg_name) const; VIRTUAL_TO_STRING_KV(K_(column_group_id), K_(column_group_name), diff --git a/src/share/schema/ob_schema_utils.cpp b/src/share/schema/ob_schema_utils.cpp index 1270eef00..12f07ba67 100644 --- a/src/share/schema/ob_schema_utils.cpp +++ b/src/share/schema/ob_schema_utils.cpp @@ -550,6 +550,68 @@ int ObSchemaUtils::try_check_parallel_ddl_schema_in_sync( return ret; } +int ObSchemaUtils::build_column_group( + const ObTableSchema &table_schema, + const uint64_t tenant_id, + const ObColumnGroupType &cg_type, + const ObString &cg_name, + const ObIArray &column_ids, + const uint64_t cg_id, + ObColumnGroupSchema &column_group) +{ + int ret = OB_SUCCESS; + lib::Worker::CompatMode mode = lib::Worker::CompatMode::INVALID; + column_group.reset(); + if (!is_valid_tenant_id(tenant_id) || /*table_schema may be not valid*/ + cg_name.empty() || (cg_type >= ObColumnGroupType::MAX_COLUMN_GROUP)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument",K(table_schema), KR(ret), K(cg_name), K(cg_type), "column_id_cnt", column_ids.count()); + } else if(OB_FAIL(ObCompatModeGetter::get_tenant_mode(tenant_id, mode))) { + LOG_WARN("fail to check if oralce mode", K(ret), K(table_schema)); + } else { + column_group.set_column_group_id(cg_id); + column_group.set_column_group_type(cg_type); + column_group.set_block_size(table_schema.get_block_size()); + column_group.set_compressor_type(table_schema.get_compressor_type()); + const ObStoreFormatType store_format = table_schema.get_store_format(); + int64_t storage_encoding_mode = 0; + omt::ObTenantConfigGuard tcg(TENANT_CONF(tenant_id)); + if (OB_LIKELY(tcg.is_valid())) { + storage_encoding_mode = tcg->storage_encoding_mode; + } + bool is_flat = lib::Worker::CompatMode::ORACLE == mode ? ((OB_STORE_FORMAT_NOCOMPRESS_ORACLE == store_format) + || (OB_STORE_FORMAT_BASIC_ORACLE == store_format) + || (OB_STORE_FORMAT_OLTP_ORACLE == store_format)) + : ((OB_STORE_FORMAT_REDUNDANT_MYSQL == store_format) + || (OB_STORE_FORMAT_COMPACT_MYSQL == store_format)); + if (is_flat || ( ObStorageEncodingMode::ALL_ENCODING == storage_encoding_mode)) { + // all use encoding + column_group.set_row_store_type(table_schema.get_row_store_type()); + } else if (ObStorageEncodingMode::ALL_ENCODING == storage_encoding_mode) { + // all use cs_encoding + column_group.set_row_store_type(ObRowStoreType::CS_ENCODING_ROW_STORE); + } else { + // row_store uses encoding; column_store uses cs_encoding + if ((cg_type == ObColumnGroupType::DEFAULT_COLUMN_GROUP) || (cg_type == ObColumnGroupType::ALL_COLUMN_GROUP)) { + column_group.set_row_store_type(table_schema.get_row_store_type()); + } else { + column_group.set_row_store_type(ObRowStoreType::CS_ENCODING_ROW_STORE); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(column_group.set_column_group_name(cg_name))) { + LOG_WARN("fail to set column group name", KR(ret), K(cg_name)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && (i < column_ids.count()); ++i) { + if (OB_FAIL(column_group.add_column_id(column_ids.at(i)))) { + LOG_WARN("fail to add column_id into column_group", KR(ret), K(i), "column_id", column_ids.at(i)); + } + } + } + return ret; +} + int ObSchemaUtils::batch_get_latest_table_schemas( common::ObISQLClient &sql_client, common::ObIAllocator &allocator, @@ -609,6 +671,124 @@ int ObSchemaUtils::batch_get_latest_table_schemas( return ret; } +int ObSchemaUtils::build_single_column_group( + const share::schema::ObTableSchema &table_schema, + share::schema::ObColumnSchemaV2 *column_schema, + const uint64_t tenant_id, + const uint64_t column_group_id, + share::schema::ObColumnGroupSchema &column_group_schema) +{ + /* check and build single_column_group for the column*/ + /* table schema may be not fully constructed and do not check here*/ + int ret = OB_SUCCESS; + column_group_schema.reset(); + if (OB_ISNULL(column_schema) || column_group_id < COLUMN_GROUP_START_ID + || column_schema->is_virtual_generated_column()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, column is null or virtual column or invalid column group id", + K(ret), KPC(column_schema), K(column_group_id)); + } else { + ObArray column_ids; + char cg_name_ptr[OB_MAX_COLUMN_GROUP_NAME_LENGTH] = {'\0'}; + ObString cg_name(sizeof(cg_name_ptr), 0 /*length*/, cg_name_ptr); + column_ids.reset(); + if (OB_FAIL(column_ids.push_back(column_schema->get_column_id()))) { + LOG_WARN("fail to add column group", K(ret)); + } else if (OB_FAIL(column_schema->get_each_column_group_name(cg_name))){ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to format column group_name", K(ret)); + } else if (OB_FAIL(build_column_group(table_schema, tenant_id, + ObColumnGroupType::SINGLE_COLUMN_GROUP, + cg_name, column_ids, column_group_id, column_group_schema))) { + LOG_WARN("fail to build column group for single column", K(ret)); + } + } + return ret; +} + +int ObSchemaUtils::build_all_column_group( + const share::schema::ObTableSchema &table_schema, + const uint64_t tenant_id, + const uint64_t column_group_id, + share::schema::ObColumnGroupSchema &column_group_schema) +{ + int ret = OB_SUCCESS; + column_group_schema.reset(); + /* table_schema_ is constructed by resolver and is not valid*/ + if (column_group_id < COLUMN_GROUP_START_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument, table schema or column group_id is invalid", + K(ret), K(table_schema), K(column_group_id)); + } else{ + ObArray column_ids; + if (OB_FAIL(table_schema.get_all_column_ids(column_ids))) { + LOG_WARN("fail to get all column id in table schema", K(ret)); + } else { + const ObString cg_name = OB_ALL_COLUMN_GROUP_NAME; + if (column_ids.count() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("number of available columns should not be zeror", K(ret)); + + } else if (OB_FAIL(build_column_group(table_schema, tenant_id, + ObColumnGroupType::ALL_COLUMN_GROUP, cg_name, + column_ids, column_group_id, column_group_schema))) { + LOG_WARN("fail to build column group", K(ret)); + } + } + } + return ret; +} + +int ObSchemaUtils::mock_default_cg( + const uint64_t tenant_id, + share::schema::ObTableSchema &new_table_schema) +{ + int ret = OB_SUCCESS; + ObColumnGroupSchema* default_cg = nullptr; + /* for table update from less than 4.2, default cg may not exist + * mock a default cg + */ + uint64_t compat_version = 0; + + if (OB_INVALID == tenant_id || new_table_schema.get_table_id() == OB_INVALID_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tenant_id", K(ret), K(new_table_schema), K(tenant_id)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("fail to get tenant, data_version", K(ret)); + } else if (compat_version < DATA_VERSION_4_3_0_0) { + /* skip, do nothing*/ + } else if (!(new_table_schema.is_user_table() || new_table_schema.is_tmp_table() || new_table_schema.is_index_table())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not mock inner table", K(ret), K(new_table_schema)); + } else { + new_table_schema.set_column_store(true); + if (OB_FAIL(new_table_schema.get_column_group_by_name(OB_DEFAULT_COLUMN_GROUP_NAME, default_cg))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + default_cg = nullptr; + } else { + LOG_WARN("fail to get default column group", K(ret), K(new_table_schema)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(default_cg)) { + ObColumnGroupSchema tmp_cg; + ObArray column_ids; + if (OB_FAIL(new_table_schema.get_all_column_ids(column_ids))) { + LOG_WARN("fail to get column ids", K(ret), K(new_table_schema)); + } else if (OB_FAIL(ObSchemaUtils::build_column_group(new_table_schema, new_table_schema.get_tenant_id(), + ObColumnGroupType::DEFAULT_COLUMN_GROUP, + OB_DEFAULT_COLUMN_GROUP_NAME, column_ids, + DEFAULT_TYPE_COLUMN_GROUP_ID, tmp_cg))) { + LOG_WARN("fail to build column group", K(ret)); + } else if (OB_FAIL(new_table_schema.add_column_group(tmp_cg))) { + LOG_WARN("failt to add default column group", K(ret)); + } + } + } + return ret; +} + int ObSchemaUtils::get_latest_table_schema( common::ObISQLClient &sql_client, common::ObIAllocator &allocator, diff --git a/src/share/schema/ob_schema_utils.h b/src/share/schema/ob_schema_utils.h index 4b17bfaa9..ab27b55d2 100644 --- a/src/share/schema/ob_schema_utils.h +++ b/src/share/schema/ob_schema_utils.h @@ -121,6 +121,28 @@ public: uint64_t tenant_id, uint64_t data_table_id, ObIArray &table_schemas); + static int build_column_group( + const share::schema::ObTableSchema &table_schema, + const uint64_t tenant_id, + const share::schema::ObColumnGroupType &cg_type, + const common::ObString &cg_name, + const common::ObIArray &column_ids, + const uint64_t cg_id, + share::schema::ObColumnGroupSchema &column_group); + static int build_all_column_group( + const share::schema::ObTableSchema &table_schema, + const uint64_t tenant_id, + const uint64_t column_group_id, + share::schema::ObColumnGroupSchema &column_group_schema); + static int build_single_column_group( + const share::schema::ObTableSchema &table_schema, + share::schema::ObColumnSchemaV2 *column_schema, + const uint64_t tenant_id, + const uint64_t column_group_id, + share::schema::ObColumnGroupSchema &column_group_schema); + static int mock_default_cg( + const uint64_t tenant_id, + share::schema::ObTableSchema &new_table_schema); // Optimized method to batch get latest table schemas from cache or inner_table automatically. // diff --git a/src/share/schema/ob_table_dml_param.cpp b/src/share/schema/ob_table_dml_param.cpp index 75ea5eecc..c981d215e 100644 --- a/src/share/schema/ob_table_dml_param.cpp +++ b/src/share/schema/ob_table_dml_param.cpp @@ -88,8 +88,12 @@ int ObTableSchemaParam::convert(const ObTableSchema *schema) table_id_ = schema->get_table_id(); schema_version_ = schema->get_schema_version(); table_type_ = schema->get_table_type(); - use_cs = !schema->is_row_store(); lob_inrow_threshold_ = schema->get_lob_inrow_threshold(); + if (OB_FAIL(schema->get_is_row_store(use_cs))) { + LOG_WARN("fail to get is row store", K(ret)); + } else { + use_cs = !use_cs; + } } if (OB_SUCC(ret) && schema->is_user_table() && !schema->is_heap_table()) { diff --git a/src/share/schema/ob_table_param.cpp b/src/share/schema/ob_table_param.cpp index 052a0d2b9..4aafc1eef 100644 --- a/src/share/schema/ob_table_param.cpp +++ b/src/share/schema/ob_table_param.cpp @@ -819,10 +819,21 @@ int ObTableParam::construct_columns_and_projector( share::schema::ObColDesc tmp_col_desc; share::schema::ObColExtend tmp_col_extend; int32_t cg_idx = 0; + bool is_cs = false; bool has_all_column_group = false; - bool is_cs = !table_schema.is_row_store(); int64_t rowkey_count = 0; - if (OB_FAIL(table_schema.has_all_column_group(has_all_column_group))) { + + if (OB_SUCC(ret)) { + bool is_table_row_store = false; + if (OB_FAIL(table_schema.get_is_row_store(is_table_row_store))) { + LOG_WARN("fail to get is talbe row store", K(ret)); + } else { + is_cs = !is_table_row_store; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(table_schema.has_all_column_group(has_all_column_group))) { LOG_WARN("Failed to check if has all column group", K(ret)); } diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index b5a6d38e3..aeccf842a 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -2241,9 +2241,14 @@ int ObTableSchema::alter_column(ObColumnSchemaV2 &column_schema, ObColumnCheckMo if (src_schema->is_autoincrement() && !column_schema.is_autoincrement()) { autoinc_column_id_ = 0; } - if (src_schema->get_column_name_str() != dst_name) { + if (src_schema->get_column_name_str() != dst_name && is_column_store_supported()) { bool is_oracle_mode = false; - if (OB_FAIL(check_if_oracle_compat_mode(is_oracle_mode))) { + char cg_name[OB_MAX_COLUMN_GROUP_NAME_LENGTH] = {'\0'}; + ObString cg_name_str(OB_MAX_COLUMN_GROUP_NAME_LENGTH, 0, cg_name); + + if (OB_FAIL(src_schema->get_each_column_group_name(cg_name_str))) { + LOG_WARN("fail to get each column group name", K(ret)); + } else if (OB_FAIL(check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("fail to check oracle mode", KR(ret)); } else if (OB_FAIL(remove_col_from_name_hash_array(is_oracle_mode, src_schema))) { LOG_WARN("Failed to remove old column name from name_hash_array", K(ret)); @@ -2252,6 +2257,33 @@ int ObTableSchema::alter_column(ObColumnSchemaV2 &column_schema, ObColumnCheckMo } else if (OB_FAIL(add_col_to_name_hash_array(is_oracle_mode, src_schema))) { LOG_WARN("Failed to add new column name to name_hash_array", K(ret)); } + + /*alter relavent column group name*/ + ObColumnGroupSchema *column_group = nullptr; + if (OB_FAIL(ret)){ + } else if (OB_FAIL(get_column_group_by_name(cg_name_str, column_group))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to check column group exist", K(ret), K(cg_name_str)); + } + } else if(OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should no be null", K(ret)); + } else if (OB_FAIL(cg_name_hash_arr_->erase_refactored(ObColumnGroupSchemaHashWrapper( + column_group->get_column_group_name())))) { + LOG_WARN("fail to remove from cg name arr", K(ret)); + } else { + cg_name_str.set_length(0); + if (OB_FAIL(src_schema->get_each_column_group_name(cg_name_str))) { /* src_schema column name has been changed*/ + LOG_WARN("fail to get column group name", K(ret)); + } else if (OB_FAIL(column_group->set_column_group_name(cg_name_str))) { + LOG_WARN("fail to set column group name", K(ret)); + } else if (OB_FAIL((add_column_group_to_hash_array( + column_group, ObColumnGroupSchemaHashWrapper(column_group->get_column_group_name()), cg_name_hash_arr_)))) { + LOG_WARN("fail to set cg_name _hash_arr", K(ret)); + } + } } if (OB_FAIL(ret)) { } else if (OB_FAIL(src_schema->assign(column_schema))) { @@ -8338,7 +8370,7 @@ int ObTableSchema::has_all_column_group(bool &has_all_column_group) const int ObTableSchema::get_column_group_by_id( const uint64_t column_group_id, - ObColumnGroupSchema *&column_group) + ObColumnGroupSchema *&column_group) const { int ret = OB_SUCCESS; column_group = NULL; @@ -8355,9 +8387,7 @@ int ObTableSchema::get_column_group_by_id( return ret; } -int ObTableSchema::get_column_group_by_name( - const ObString &cg_name, - ObColumnGroupSchema *&column_group) +int ObTableSchema::get_column_group_by_name(const ObString &cg_name, ObColumnGroupSchema *&column_group) const { int ret = OB_SUCCESS; column_group = nullptr; @@ -8366,10 +8396,12 @@ int ObTableSchema::get_column_group_by_name( LOG_WARN("invalid argument", K(ret), K(cg_name)); } else if (OB_NOT_NULL(cg_name_hash_arr_)) { if (OB_FAIL(cg_name_hash_arr_->get_refactored(ObColumnGroupSchemaHashWrapper(cg_name), column_group))) { - LOG_WARN("fail to get column_group from hash array", K(ret), K(cg_name)); - } else if (OB_ISNULL(column_group)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("column_group should not be null", K(ret), K(cg_name)); + column_group = nullptr; + if (OB_HASH_NOT_EXIST == ret) { + /* skip, hash not exist normal situation no warn*/ + } else { + LOG_WARN("fail to get column_group from hash array", K(ret), K(cg_name)); + } } } return ret; @@ -8520,7 +8552,43 @@ int ObTableSchema::add_column_group_to_array(ObColumnGroupSchema *column_group) return ret; } -int ObTableSchema::is_column_group_exist(const ObString &cg_name, bool &exist) +int ObTableSchema::remove_column_group(const uint64_t column_group_id) +{ + int ret = OB_SUCCESS; + bool is_cg_exist = false; + ObColumnGroupSchema *column_group = nullptr; + if (OB_FAIL(get_column_group_by_id(column_group_id, column_group))) { + LOG_WARN("fail to get column group by id", K(ret), K(column_group_id)); + } else if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should be null", K(ret)); + } else if (OB_ISNULL(cg_id_hash_arr_) || OB_ISNULL(cg_name_hash_arr_) || + OB_ISNULL(column_group_arr_) || column_cnt_ <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column array and cnt should not be none or zero", K(ret), KP(column_group_arr_), + KP(cg_id_hash_arr_), KP(cg_name_hash_arr_), K(column_cnt_)); + } else if (OB_FAIL(cg_id_hash_arr_->erase_refactored(ObColumnGroupIdKey(column_group->get_column_group_id())))) { + LOG_WARN("faile to erase column group id from table schema", K(ret)); + } else if (OB_FAIL(cg_name_hash_arr_->erase_refactored(ObColumnGroupSchemaHashWrapper( + column_group->get_column_group_name())))){ + LOG_WARN("faile to erase column group name from table scheam", K(ret)); + } else { + int new_loc = 0; + for (int64_t i = 0; i < column_group_arr_capacity_ && i < column_group_cnt_; i++) { + if (column_group_arr_[i] != column_group) { + column_group_arr_[new_loc] = column_group_arr_[i]; + new_loc += 1; + } else { + /* skip column group need to be delete*/ + } + } + column_group_cnt_--; + } + return ret; +} + + +int ObTableSchema::is_column_group_exist(const ObString &cg_name, bool &exist) const { int ret = OB_SUCCESS; exist = false; @@ -8531,10 +8599,27 @@ int ObTableSchema::is_column_group_exist(const ObString &cg_name, bool &exist) if (OB_ISNULL(cg_name_hash_arr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("cg_name_hash_array should not be null", KR(ret)); + } else if (cg_name == OB_EACH_COLUMN_GROUP_NAME) { + ObTableSchema::const_column_group_iterator iter_begin = column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = column_group_end(); + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + const ObColumnGroupSchema *cg = *iter_begin; + if (ObColumnGroupType::SINGLE_COLUMN_GROUP == cg->get_column_group_type()) { + exist = true; + break; + } + } } else { ObColumnGroupSchema *column_group = NULL; - if (OB_SUCC(cg_name_hash_arr_->get_refactored( - ObColumnGroupSchemaHashWrapper(cg_name), column_group))) { + if (OB_FAIL(cg_name_hash_arr_->get_refactored(ObColumnGroupSchemaHashWrapper(cg_name), column_group))) { + exist = false; + if (OB_HASH_NOT_EXIST == ret) { + /* hash no exist is sucess situation */ + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get refactored from cg_name_hash_arr ", K(ret)); + } + } else{ exist = true; } } @@ -8639,6 +8724,53 @@ int ObTableSchema::get_base_rowkey_column_group_index(int32_t &cg_idx) const return ret; } +int ObTableSchema::get_each_column_group(ObIArray &each_cgs) const +{ + int ret = OB_SUCCESS; + each_cgs.reset(); + ObTableSchema::const_column_group_iterator iter_begin = column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = column_group_end(); + + for (;OB_SUCC(ret) && iter_begin != iter_end; iter_begin++ ) { + ObColumnGroupSchema *cg = *iter_begin; + if (OB_ISNULL(cg)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), KP(cg)); + } else if (cg->get_column_group_type() == ObColumnGroupType::SINGLE_COLUMN_GROUP) { + if (OB_FAIL(each_cgs.push_back(cg))) { + LOG_WARN("fail to add column group pointer to the array", K(ret)); + } + } + } + return ret; +} + +int ObTableSchema::get_all_column_ids(ObIArray &column_ids) const +{ + int ret = OB_SUCCESS; + column_ids.reset(); + if (get_column_count() == 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("table has no columns", K(ret)); + } else { + ObArray col_desc; + col_desc.reset(); + + if (OB_FAIL(get_column_ids(col_desc, true /*no virtual columns*/))) { + LOG_WARN("fail to get not virtual columns", K(ret)); + } else { + ObArray::iterator iter_begin = col_desc.begin(); + ObArray::iterator iter_end = col_desc.end(); + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + if (OB_FAIL(column_ids.push_back(iter_begin->col_id_))) { + LOG_WARN("fail to push column id to array", K(ret)); + } + } + } + } + return ret; +} + // convert column_udt_set_id int ObTableSchema::convert_column_udt_set_ids(const ObHashMap &column_id_map) { @@ -8706,6 +8838,30 @@ int ObTableSchema::convert_column_udt_set_ids(const ObHashMap 1; } + int get_is_column_store(bool &is_column_store) const; uint64_t get_max_used_column_group_id() const { return max_used_column_group_id_; } void set_max_used_column_group_id(const uint64_t id) { max_used_column_group_id_ = id; } int add_column_group(const ObColumnGroupSchema &other); @@ -1383,6 +1383,7 @@ public: const bool filter_empty_cg = true) const; int get_store_column_groups(ObIArray &column_groups, const bool filter_empty_cg = true) const; + int remove_column_group(const uint64_t column_group_id); int has_all_column_group(bool &has_all_column_group) const; // materialized view log related template @@ -1497,12 +1498,16 @@ public: int set_column_encodings(const common::ObIArray &col_encodings); virtual int get_column_encodings(common::ObIArray &col_encodings) const override; - int get_column_group_by_id(const uint64_t column_group_id, ObColumnGroupSchema *&column_group); - int get_column_group_by_name(const ObString &cg_name, ObColumnGroupSchema *&column_group); + int get_column_group_by_id(const uint64_t column_group_id, ObColumnGroupSchema *&column_group) const; + int get_column_group_by_name(const ObString &cg_name, ObColumnGroupSchema *&column_group) const; int get_all_cg_type_column_group(const ObColumnGroupSchema *&column_group) const; + int get_each_column_group(ObIArray &each_cgs) const; int is_partition_key_match_rowkey_prefix(bool &is_prefix) const; int get_column_group_index(const share::schema::ObColumnParam ¶m, int32_t &cg_idx) const; + int is_column_group_exist(const common::ObString &cg_name, bool &exist) const; + + int get_all_column_ids(ObIArray &column_ids) const; int generate_partition_key_from_rowkey(const common::ObRowkey &rowkey, common::ObRowkey &hign_bound_value) const; virtual int init_column_meta_array( @@ -1630,7 +1635,6 @@ protected: int add_column_group_to_hash_array(ObColumnGroupSchema *column_group, const KeyType &key, ArrayType *&array); - int is_column_group_exist(const common::ObString &cg_name, bool &exist); protected: // constraint related diff --git a/src/share/schema/ob_table_sql_service.cpp b/src/share/schema/ob_table_sql_service.cpp index e5c9e05de..9fa6c562c 100644 --- a/src/share/schema/ob_table_sql_service.cpp +++ b/src/share/schema/ob_table_sql_service.cpp @@ -798,6 +798,14 @@ int ObTableSqlService::drop_table(const ObTableSchema &table_schema, } } + // delete column group + if (OB_SUCC(ret)) { + if (!table_schema.is_column_store_supported()) { + } else if (OB_FAIL(delete_column_group(sql_client, table_schema, new_schema_version))) { + LOG_WARN("fail to delete column group", K(ret)); + } + } + // log operations if (OB_SUCC(ret)) { ObSchemaOperation opt; @@ -5269,7 +5277,10 @@ int ObTableSqlService::insert_column_ids_into_column_group( { int ret = OB_SUCCESS; uint64_t data_version = 0; - if (OB_FAIL(GET_MIN_DATA_VERSION(table.get_tenant_id(), data_version))) { + if (!table.is_column_store_supported()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table not support column store", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(table.get_tenant_id(), data_version))) { LOG_WARN("fail to get min data version", KR(ret), K(table)); } else if (OB_FAIL(check_column_store_valid(table, data_version))) { LOG_WARN("fail to check column store valid", KR(ret)); @@ -5318,7 +5329,6 @@ int ObTableSqlService::exec_insert_column_group( const char* tname = is_history ? OB_ALL_COLUMN_GROUP_HISTORY_TNAME : OB_ALL_COLUMN_GROUP_TNAME; const uint64_t tenant_id = table.get_tenant_id(); - const uint64_t table_id = table.get_table_id(); const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id); ObDMLExecHelper exec(sql_client, exec_tenant_id); ObTableSchema::const_column_group_iterator it_begin = table.column_group_begin(); @@ -5338,25 +5348,9 @@ int ObTableSqlService::exec_insert_column_group( && column_group->get_compressor_type() == ObCompressorType::ZLIB_LITE_COMPRESSOR) { ret = OB_NOT_SUPPORTED; LOG_WARN("zlib_lite_1.0 not support before 4.3", K(ret), K(table)); - } else { - const int64_t IS_DELETED = 0; - const uint64_t tmp_tenant_id = ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id); - const uint64_t tmp_table_id = ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id); - const uint64_t tmp_cg_id = ObSchemaUtils::get_extract_schema_id(exec_tenant_id, column_group->get_column_group_id()); - if (OB_FAIL(dml.add_pk_column("tenant_id", tmp_tenant_id)) - || OB_FAIL(dml.add_pk_column("table_id", tmp_table_id)) - || OB_FAIL(dml.add_pk_column("column_group_id", tmp_cg_id)) - || OB_FAIL(dml.add_column("column_group_name", column_group->get_column_group_name().ptr())) - || OB_FAIL(dml.add_column("column_group_type", column_group->get_column_group_type())) - || OB_FAIL(dml.add_column("block_size", column_group->get_block_size())) - || OB_FAIL(dml.add_column("compressor_type", column_group->get_compressor_type())) - || OB_FAIL(dml.add_column("row_store_type", column_group->get_row_store_type())) - || (is_history && OB_FAIL(dml.add_column("is_deleted", IS_DELETED))) - || (is_history && OB_FAIL(dml.add_column("schema_version", schema_version)))) { - LOG_WARN("fail to add column group mapping", KR(ret), KPC(column_group)); - } else if (OB_FAIL(dml.finish_row())) { - LOG_WARN("fail to finish row", KR(ret), KPC(column_group)); - } + } else if (OB_FAIL(gen_column_group_dml(table, *column_group, is_history, + false /*not deleted*/, schema_version, dml))){ + LOG_WARN("fail to gen column_group_dml", K(ret)); } } @@ -5396,14 +5390,8 @@ int ObTableSqlService::exec_insert_column_group_mapping( for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); i++) { const int64_t IS_DELETED = 0; uint64_t tmp_column_id = column_ids.at(i); - if (OB_FAIL(dml.add_pk_column("tenant_id", tmp_tenant_id)) - || OB_FAIL(dml.add_pk_column("table_id", tmp_table_id)) - || OB_FAIL(dml.add_pk_column("column_group_id", tmp_cg_id)) - || OB_FAIL(dml.add_pk_column("column_id", tmp_column_id)) - || (is_history && OB_FAIL(dml.add_column("is_deleted", IS_DELETED))) - || (is_history && OB_FAIL(dml.add_column("schema_version", schema_version)))) { - LOG_WARN("fail to add column group mapping", KR(ret), K(column_group)); - } else if (OB_FAIL(dml.finish_row())) { + if (OB_FAIL(gen_column_group_mapping_dml(table, column_group, tmp_column_id, is_history, + false /*not delete*/, schema_version, dml))) { LOG_WARN("fail to finish row", K(ret), K(i), K(column_group)); } } @@ -5465,6 +5453,271 @@ int ObTableSqlService::exec_insert_column_group_mapping( return ret; } +int ObTableSqlService::gen_column_group_dml(const ObTableSchema &table_schema, + const ObColumnGroupSchema &column_group_schema, + const bool is_history, + const bool is_deleted, + const int64_t schema_version, + ObDMLSqlSplicer &dml) +{ + int ret = OB_SUCCESS; + if (!table_schema.is_valid() || !column_group_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(table_schema), K(column_group_schema)); + } else { + const uint64_t tenant_id = table_schema.get_tenant_id(); + const uint64_t table_id = table_schema.get_table_id(); + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id); + const uint64_t tmp_tenant_id = ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id); + const uint64_t tmp_table_id = ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id); + const uint64_t tmp_cg_id = ObSchemaUtils::get_extract_schema_id(exec_tenant_id, column_group_schema.get_column_group_id()); + + if (OB_FAIL(dml.add_pk_column("tenant_id", tmp_tenant_id)) + || OB_FAIL(dml.add_pk_column("table_id", tmp_table_id)) + || OB_FAIL(dml.add_pk_column("column_group_id", tmp_cg_id)) + || (!(is_history && is_deleted) && OB_FAIL(dml.add_column("column_group_name", column_group_schema.get_column_group_name().ptr()))) + || OB_FAIL(dml.add_column("column_group_type", column_group_schema.get_column_group_type())) + || OB_FAIL(dml.add_column("block_size", column_group_schema.get_block_size())) + || OB_FAIL(dml.add_column("compressor_type", column_group_schema.get_compressor_type())) + || OB_FAIL(dml.add_column("row_store_type", column_group_schema.get_row_store_type())) + || (is_history && OB_FAIL(dml.add_column("is_deleted", is_deleted))) + || (is_history && OB_FAIL(dml.add_column("schema_version", schema_version)))) { + LOG_WARN("fail to build column column dml", K(ret)); + } else if (OB_FAIL(dml.finish_row())) { + LOG_WARN("fail to finish column group dml row", K(ret)); + } + } + return ret; +} + +int ObTableSqlService::gen_column_group_mapping_dml(const ObTableSchema &table_schema, + const ObColumnGroupSchema &column_group_schema, + const int64_t column_id, + const bool is_history, + const bool is_deleted, + const int64_t schema_version, + ObDMLSqlSplicer &dml) +{ + int ret = OB_SUCCESS; + if (!table_schema.is_valid() || !column_group_schema.is_valid() || schema_version == OB_INVALID_SCHEMA_VERSION) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(table_schema), K(column_group_schema), K(schema_version)); + } else { + const uint64_t tenant_id = table_schema.get_tenant_id(); + const uint64_t table_id = table_schema.get_table_id(); + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id); + + const uint64_t tmp_tenant_id = ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id); + const uint64_t tmp_table_id = ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id); + const uint64_t tmp_cg_id = ObSchemaUtils::get_extract_schema_id(exec_tenant_id, column_group_schema.get_column_group_id()); + if (OB_FAIL(dml.add_pk_column("tenant_id", tmp_tenant_id)) + || OB_FAIL(dml.add_pk_column("table_id", tmp_table_id)) + || OB_FAIL(dml.add_pk_column("column_group_id", tmp_cg_id)) + || OB_FAIL(dml.add_pk_column("column_id", column_id)) + || (is_history && OB_FAIL(dml.add_column("is_deleted", is_deleted ? 1: 0))) + || (is_history && OB_FAIL(dml.add_column("schema_version", schema_version)))) { + LOG_WARN("fail to add info to column group mapping dml", K(ret), K(table_schema), K(column_group_schema)); + } else if (OB_FAIL(dml.finish_row())) { + LOG_WARN("dml splicer fail to finish row", K(ret)); + } + } + return ret; +} + + +int ObTableSqlService::delete_from_column_group(ObISQLClient &sql_client, + const ObTableSchema &table_schema, + const int64_t new_schema_version, + const bool is_history) +{ + int ret = OB_SUCCESS; + if (!table_schema.is_valid() || new_schema_version == OB_INVALID_SCHEMA_VERSION) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid table schema", K(ret), K(table_schema), K(new_schema_version)); + } else { + ObDMLSqlSplicer dml; + int64_t affect_rows = 0; + ObSqlString sql; + const uint64_t tenant_id = table_schema.get_tenant_id(); + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(table_schema.get_tenant_id()); + ObDMLExecHelper exec(sql_client, exec_tenant_id); + ObTableSchema::const_column_group_iterator iter_begin = table_schema.column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = table_schema.column_group_end(); + if (table_schema.get_column_group_count() == 0) { + /* skip table has not column*/ + } else if (is_history) { /* remove from __all_column_group_history*/ + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + const ObColumnGroupSchema *column_group = *iter_begin; + if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), K(table_schema)); + } else if (OB_FAIL(gen_column_group_dml(table_schema, *column_group, is_history, + true /* is_delete */, new_schema_version, dml))) { + LOG_WARN("fail to write dml for __all_column_group", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(dml.splice_batch_insert_sql(OB_ALL_COLUMN_GROUP_HISTORY_TNAME, sql))) { + LOG_WARN("fail to splice batch insert sql", K(ret), K(sql), K(table_schema)); + } else if (OB_FAIL(sql_client.write(exec_tenant_id, sql.ptr(), affect_rows))) { + LOG_WARN("fail to insert deleted record to all column group history", K(ret)); + } else if (table_schema.get_column_group_count() != affect_rows){ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to drop all column group columns", K(ret), K(affect_rows), K(table_schema)); + } + } else { + if (OB_FAIL(dml.add_pk_column("tenant_id", ObSchemaUtils::get_extract_tenant_id( + exec_tenant_id, table_schema.get_tenant_id()))) + || OB_FAIL(dml.add_pk_column("table_id", table_schema.get_table_id()))) { + LOG_WARN("fail to gen dml to delete from __all_column_group", K(ret)); + } else if (OB_FAIL(exec.exec_delete(OB_ALL_COLUMN_GROUP_TNAME, dml, affect_rows))) { + LOG_WARN("fail to insert deleted record to all column group history", K(ret)); + } else if (table_schema.get_column_group_count() != affect_rows && affect_rows != 0) { + /*the table upgrade from 4.2 or less has not record in inner table, allow 0 row affected*/ + ret = OB_ERR_UNDEFINED; + LOG_WARN("fail to drop all column group columns", K(ret), K(affect_rows), K(table_schema)); + } + } + } + return ret; +} + +int ObTableSqlService::delete_from_column_group_mapping(ObISQLClient &sql_client, + const ObTableSchema &table_schema, + const int64_t schema_version, + const bool is_history) +{ + int ret = OB_SUCCESS; + if (!table_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid table schema", K(ret), K(table_schema)); + } else { + ObDMLSqlSplicer dml; + int64_t cg_mapping_cnt = 0; + int64_t affect_rows = 0; + ObSqlString sql; + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(table_schema.get_tenant_id()); + ObDMLExecHelper exec(sql_client, exec_tenant_id); + ObTableSchema::const_column_group_iterator iter_begin = table_schema.column_group_begin(); + ObTableSchema::const_column_group_iterator iter_end = table_schema.column_group_end(); + /* count affect rows & form dml for history table*/ + for (; OB_SUCC(ret) && iter_begin != iter_end; iter_begin++) { + const ObColumnGroupSchema *column_group = *iter_begin; + cg_mapping_cnt += column_group->get_column_id_count(); + if (OB_ISNULL(column_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column group should not be null", K(ret), K(table_schema)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && is_history && i < column_group->get_column_id_count(); i++) { + if (OB_FAIL(gen_column_group_mapping_dml(table_schema, *column_group, column_group->get_column_ids()[i], + is_history, true /* is_deleted*/, schema_version, dml))) { + LOG_WARN("fail to write column group mapping dml", K(ret)); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (cg_mapping_cnt == 0) { + /*skip table don't have column group like view*/ + } else if (is_history) { + if (OB_FAIL(dml.splice_batch_insert_sql(OB_ALL_COLUMN_GROUP_MAPPING_HISTORY_TNAME, sql))) { + LOG_WARN("fail to splice batch insert_sql", K(ret), K(sql), K(table_schema)); + } else if (OB_FAIL(sql_client.write(exec_tenant_id, sql.ptr(), affect_rows))) { + LOG_WARN("fail to wirte rows into __all_column_group_mapping_history", K(ret), K(sql)); + } else if (cg_mapping_cnt != affect_rows){ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to drop in __all_column_group_mapping_history", K(ret), K(affect_rows), K(table_schema)); + } + } else { + if (OB_FAIL(dml.add_pk_column("tenant_id", ObSchemaUtils::get_extract_tenant_id( + exec_tenant_id, table_schema.get_tenant_id()))) + || OB_FAIL(dml.add_pk_column("table_id", table_schema.get_table_id()))) { + LOG_WARN("fail to gen dml to delete from __all_column_group", K(ret)); + } else if (OB_FAIL(exec.exec_delete(OB_ALL_COLUMN_GROUP_MAPPING_TNAME, dml, affect_rows))) { + LOG_WARN("fail to insert deleted record to all column group", K(ret)); + } else if (cg_mapping_cnt != affect_rows && 0 != affect_rows) { + /*the table upgrade from 4.2 or less has not record in inner table, allow 0 row affected*/ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to drop in __all_column_group_mapping", K(ret), K(affect_rows), K(table_schema)); + } + } + } + return ret; +} + + +int ObTableSqlService::delete_column_group(ObISQLClient &sql_client, + const ObTableSchema &table_schema, + const int64_t schema_version) +{ + + int ret = OB_SUCCESS; + if (!table_schema.is_valid() || !table_schema.is_column_store_supported()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid table", K(ret), K(table_schema)); + } else if (OB_FAIL(delete_from_column_group(sql_client, table_schema, schema_version))) { + LOG_WARN("fail to delete from table __all_column_group", K(ret)); + } else if (OB_FAIL(delete_from_column_group(sql_client, table_schema, schema_version, true /*history table*/))) { + LOG_WARN("fail to delete from table __all_column_group_history", K(ret)); + } else if (OB_FAIL(delete_from_column_group_mapping(sql_client, table_schema, schema_version))) { + LOG_WARN("fail to delete from table __all_column_group_mapping", K(ret)); + } else if (OB_FAIL(delete_from_column_group_mapping(sql_client, table_schema, schema_version, true /*history*/))) { + LOG_WARN("fail to delete from talbe __all_column_group_mapping_history", K(ret)); + } + return ret; +} + +int ObTableSqlService::update_single_column_group(ObISQLClient &sql_client, + const ObTableSchema &new_table_schema, + const ObColumnGroupSchema &ori_cg_schema, + const ObColumnGroupSchema &new_cg_schema) +{ + int ret = OB_SUCCESS; + ObDMLSqlSplicer dml; + int64_t affect_rows = 0; + uint64_t compat_version = 0; + if (!sql_client.is_active() || !new_table_schema.is_valid() || + !ori_cg_schema.is_valid() || !new_cg_schema.is_valid() || !new_table_schema.is_column_store_supported()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(new_table_schema), K(ori_cg_schema), K(new_cg_schema)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(new_table_schema.get_tenant_id(), compat_version))) { + LOG_WARN("fail to check min data_version", K(ret), K(new_table_schema)); + } else if (OB_FAIL(check_column_store_valid(new_table_schema, compat_version))) { + LOG_WARN("fail to check column store valid", KR(ret), K(new_table_schema), K(compat_version)); + } else { + const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(new_table_schema.get_tenant_id()); + ObDMLExecHelper exec(sql_client, exec_tenant_id); + /* write into __all_column_group*/ + if (OB_FAIL(gen_column_group_dml(new_table_schema, new_cg_schema, false, /* not history*/ + false /* not deleted*/, new_cg_schema.get_schema_version(), dml))) { + LOG_WARN("fail to gen column group dml", K(ret)); + } else if (OB_FAIL(exec.exec_update(OB_ALL_COLUMN_GROUP_TNAME, dml, affect_rows))) { + LOG_WARN("fail to update all column group", K(ret)); + } else if (affect_rows != (ori_cg_schema.get_column_group_name() != new_cg_schema.get_column_group_name())) { + /* for some ddl don't change propertype, affect rows should be 0, since all_column_group has no schema version*/ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to update single row in all column group ", K(ret), K(affect_rows), K(ori_cg_schema), K(new_cg_schema)); + } + + /* write into __all_column_group_history*/ + dml.reset(); + affect_rows = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(gen_column_group_dml(new_table_schema, new_cg_schema, true, /* history table*/ + false/*not delete*/, new_cg_schema.get_schema_version(), dml))) { + LOG_WARN("fail to gen column group dml", K(ret)); + } else if (OB_FAIL(exec.exec_insert(OB_ALL_COLUMN_GROUP_HISTORY_TNAME, dml, affect_rows))) { + LOG_WARN("fail to exec dml on history table", K(ret)); + } else if (1 != affect_rows) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("affect row not equal to 1", K(ret), K(affect_rows), K(ori_cg_schema), K(new_cg_schema)); + } + } + return ret; +} + // Three scenes : // 1. drop fk parent table // 2. create child table with a fk references a mock fk parent table not exist diff --git a/src/share/schema/ob_table_sql_service.h b/src/share/schema/ob_table_sql_service.h index a01931107..0cbd6cc61 100644 --- a/src/share/schema/ob_table_sql_service.h +++ b/src/share/schema/ob_table_sql_service.h @@ -395,6 +395,10 @@ public: int delete_from_all_temp_table(common::ObISQLClient &sql_client, const uint64_t tenant_id, const uint64_t table_id); + int update_single_column_group(ObISQLClient &sql_client, + const ObTableSchema &new_table_schema, + const ObColumnGroupSchema &ori_cg_schema, + const ObColumnGroupSchema &new_cg_schema); private: int log_operation_wrapper( ObSchemaOperation &opt, @@ -434,12 +438,37 @@ private: const ObTableSchema &table, const int64_t schema_version, bool is_history); - int exec_insert_column_group_mapping(ObISQLClient &sql_client, - const ObTableSchema &table, - const int64_t schema_version, - const ObColumnGroupSchema &column_group, - const ObIArray &column_ids, - const bool is_history); + int exec_insert_column_group_mapping(ObISQLClient &sql_client, + const ObTableSchema &table, + const int64_t schema_version, + const ObColumnGroupSchema &column_group, + const ObIArray &column_ids, + const bool is_history); + + int delete_column_group(ObISQLClient &sql_clinet, + const ObTableSchema &table, + const int64_t schema_version); + int gen_column_group_dml(const ObTableSchema &table_schema, + const ObColumnGroupSchema &column_group_schema, + const bool is_history, + const bool is_deleted, + const int64_t schema_verison, + ObDMLSqlSplicer &dml); + int gen_column_group_mapping_dml(const ObTableSchema &table_schema, + const ObColumnGroupSchema &column_group_schema, + const int64_t column_id_index, + const bool is_history, + const bool is_deleted, + const int64_t schema_version, + ObDMLSqlSplicer &dml); + int delete_from_column_group(ObISQLClient &sql_client, + const ObTableSchema &table_schema, + const int64_t schema_version, + const bool is_history = false); + int delete_from_column_group_mapping(ObISQLClient &sql_client, + const ObTableSchema &table_schema, + const int64_t schema_version, + const bool is_history = false); // MockFKParentTable begin public: int add_mock_fk_parent_table( diff --git a/src/share/table/ob_table_load_row_array.h b/src/share/table/ob_table_load_row_array.h index f62cd216b..c4b13b4f6 100644 --- a/src/share/table/ob_table_load_row_array.h +++ b/src/share/table/ob_table_load_row_array.h @@ -27,7 +27,10 @@ class ObTableLoadRowArray { OB_UNIS_VERSION(1); public: - ObTableLoadRowArray() : allocator_handle_() {} + ObTableLoadRowArray() : allocator_handle_() + { + array_.set_attr(ObMemAttr(MTL_ID(), "TDL_RowArray")); + } ~ObTableLoadRowArray() {} int push_back(const T &obj_row); diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index 8db2780bf..0a71551d0 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -161,6 +161,7 @@ ob_set_subtarget(ob_sql engine_basic engine/basic/ob_pushdown_filter.cpp engine/basic/ob_ra_datum_store.cpp engine/basic/ob_ra_row_store.cpp + engine/basic/ob_temp_block_store.cpp engine/basic/ob_select_into_op.cpp engine/basic/ob_temp_table_access_op.cpp engine/basic/ob_temp_table_insert_op.cpp @@ -168,6 +169,15 @@ ob_set_subtarget(ob_sql engine_basic engine/basic/ob_topk_op.cpp engine/basic/ob_values_op.cpp engine/basic/ob_stat_collector_op.cpp + engine/basic/chunk_store/ob_chunk_block.cpp + engine/basic/chunk_store/ob_compact_store.cpp + engine/basic/chunk_store/ob_default_block_reader.cpp + engine/basic/chunk_store/ob_default_block_writer.cpp + engine/basic/chunk_store/ob_compact_block_reader.cpp + engine/basic/chunk_store/ob_compact_block_writer.cpp + engine/basic/chunk_store/ob_chunk_block_compressor.cpp + #engine/basic/chunk_store/ob_encoded_block_writer.cpp + #engine/basic/chunk_store/ob_encoded_block_reader.cpp ) ob_set_subtarget(ob_sql engine_cmd diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 7ead81c4e..84ee2687b 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -1827,7 +1827,46 @@ int ObStaticEngineCG::generate_spec(ObLogSort &op, ObSortSpec &spec, const bool } spec.enable_encode_sortkey_opt_ = op.enable_encode_sortkey_opt(); spec.part_cnt_ = op.get_part_cnt(); - LOG_TRACE("trace order by", K(spec.all_exprs_.count()), K(spec.all_exprs_)); + int64_t compact_level = 0; + OZ(op.get_plan()->get_optimizer_context().get_global_hint().opt_params_.get_integer_opt_param(ObOptParamHint::COMPACT_SORT_LEVEL, compact_level)); + if (OB_SUCC(ret)) { + int64_t tenant_id = op.get_plan()->get_optimizer_context().get_session_info()->get_effective_tenant_id(); + spec.sort_compact_level_ = static_cast(compact_level); + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id)); + if (OB_UNLIKELY(!tenant_config.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail get tenant_config", K(ret), K(tenant_id)); + } else if (tenant_config->enable_store_compression || compact_level == SORT_COMPRESSION_LEVEL || + compact_level == SORT_COMPRESSION_ENCODE_LEVEL || + compact_level == SORT_COMPRESSION_COMPACT_LEVEL) { + if (opt_ctx_->is_online_ddl()) { + // for normal sort we use default compress type. for online ddl, we use the compress type in source table + ObLogicalOperator *child_op = op.get_child(0); + while(OB_NOT_NULL(child_op) && child_op->get_type() != log_op_def::LOG_TABLE_SCAN ) { + child_op = child_op->get_child(0); + if (OB_NOT_NULL(child_op) && child_op->get_type() == log_op_def::LOG_TABLE_SCAN ) { + share::schema::ObSchemaGetterGuard *schema_guard = nullptr; + const share::schema::ObTableSchema *table_schema = nullptr; + uint64_t table_id = static_cast(child_op)->get_ref_table_id(); + if (OB_ISNULL(schema_guard = opt_ctx_->get_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get schema guard", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(tenant_id, table_id, table_schema))) { + LOG_WARN("fail to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("can't find table schema", K(ret), K(table_id)); + } else { + spec.compress_type_ = table_schema->get_compressor_type(); + } + } + } + LOG_TRACE("compact type is", K(spec.compress_type_)); + } + } + } + LOG_TRACE("trace order by", K(spec.all_exprs_.count()), K(spec.all_exprs_), + K(compact_level)); } if (OB_SUCC(ret)) { if (spec.sort_collations_.count() != spec.sort_cmp_funs_.count() diff --git a/src/sql/das/ob_das_utils.cpp b/src/sql/das/ob_das_utils.cpp index 750cd3b07..a69271c13 100644 --- a/src/sql/das/ob_das_utils.cpp +++ b/src/sql/das/ob_das_utils.cpp @@ -295,6 +295,47 @@ int ObDASUtils::reshape_storage_value(const ObObjMeta &col_type, return ret; } +int ObDASUtils::reshape_datum_value(const ObObjMeta &col_type, + const ObAccuracy &col_accuracy, + const bool enable_oracle_empty_char_reshape_to_null, + ObIAllocator &allocator, + blocksstable::ObStorageDatum &datum_value) +{ + int ret = OB_SUCCESS; + if (col_type.is_binary()) { + int32_t binary_len = col_accuracy.get_length(); + int32_t len = datum_value.len_; + if (binary_len > len) { + char *dest_str = NULL; + const char *str = datum_value.ptr_; + if (OB_ISNULL(dest_str = (char *)(allocator.alloc(binary_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc mem to binary", K(ret), K(binary_len)); + } else { + char pad_char = '\0'; + MEMCPY(dest_str, str, len); + MEMSET(dest_str + len, pad_char, binary_len - len); + datum_value.set_string(ObString(binary_len, dest_str)); + } + } + } else if (lib::is_oracle_mode() && !enable_oracle_empty_char_reshape_to_null && col_type.is_character_type() && datum_value.len_ == 0) { + // Oracle compatibility mode: '' as null + LOG_DEBUG("reshape empty string to null", K(datum_value)); + datum_value.set_null(); + } else if (col_type.is_fixed_len_char_type()) { + const char *str = datum_value.ptr_; + int32_t len = datum_value.len_; + ObString space_pattern = ObCharsetUtils::get_const_str(col_type.get_collation_type(), ' '); + for (; len >= space_pattern.length(); len -= space_pattern.length()) { + if (0 != MEMCMP(str + len - space_pattern.length(), space_pattern.ptr(), space_pattern.length())) { + break; + } + } + datum_value.set_string(ObString(len, str)); + } + return ret; +} + int ObDASUtils::generate_spatial_index_rows( ObIAllocator &allocator, const ObDASDMLBaseCtDef &das_ctdef, diff --git a/src/sql/das/ob_das_utils.h b/src/sql/das/ob_das_utils.h index f2fa61884..540fe4034 100644 --- a/src/sql/das/ob_das_utils.h +++ b/src/sql/das/ob_das_utils.h @@ -57,6 +57,11 @@ public: const common::ObAccuracy &col_accuracy, common::ObIAllocator &allocator, common::ObObj &value); + static int reshape_datum_value(const ObObjMeta &col_type, + const ObAccuracy &col_accuracy, + const bool enable_oracle_empty_char_reshape_to_null, + ObIAllocator &allocator, + blocksstable::ObStorageDatum &datum_value); static int generate_spatial_index_rows(ObIAllocator &allocator, const ObDASDMLBaseCtDef &das_ctdef, const ObString &wkb_str, diff --git a/src/sql/engine/basic/chunk_store/ob_block_ireader.h b/src/sql/engine/basic/chunk_store/ob_block_ireader.h new file mode 100644 index 000000000..d993cf491 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_block_ireader.h @@ -0,0 +1,60 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_BLOCK_IREADER_H_ +#define OCEANBASE_BASIC_OB_BLOCK_IREADER_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "sql/engine/basic/ob_temp_block_store.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/basic/ob_temp_block_store.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObBlockIReader +{ +public: + explicit ObBlockIReader(ObTempBlockStore *store) : store_(store), cur_blk_(nullptr) {}; + virtual ~ObBlockIReader() { reset(); }; + + void reset() + { + cur_blk_ = nullptr; + store_ = nullptr; + } + virtual void reuse() = 0; + virtual int get_row(const ObChunkDatumStore::StoredRow *&sr) = 0; + virtual void set_meta(const RowMeta* row_meta) = 0; + void set_block(const ObTempBlockStore::Block *blk) { cur_blk_ = blk; } + const ObTempBlockStore::Block *get_block() {return cur_blk_; } + virtual int prepare_blk_for_read(ObTempBlockStore::Block *blk) = 0; + +protected: + ObTempBlockStore *store_; + const ObTempBlockStore::Block* cur_blk_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_BLOCK_IREADER_H_ diff --git a/src/sql/engine/basic/chunk_store/ob_block_iwriter.h b/src/sql/engine/basic/chunk_store/ob_block_iwriter.h new file mode 100644 index 000000000..80fff61d3 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_block_iwriter.h @@ -0,0 +1,117 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_BLOCK_IWRITER_H_ +#define OCEANBASE_BASIC_OB_BLOCK_IWRITER_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObCompactStore; + +class ObBlockIWriter +{ +public: + const static int64_t DEFAULT_BUF_SIZE = 64L * 1024L; + ObBlockIWriter(ObTempBlockStore *store = nullptr) : store_(store), inited_(false), cur_blk_(nullptr), last_row_pos_(nullptr) {}; + ~ObBlockIWriter() { reset(); }; + + void reset() + { + inited_ = false; + store_ = nullptr; + cur_blk_ = nullptr; + last_row_pos_ = nullptr; + }; + + virtual int add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row = nullptr) = 0; + virtual int add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr = nullptr) = 0; + virtual int add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) = 0; + + virtual int add_batch(const common::ObDatum **datums, const common::ObIArray &exprs, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows, BatchCtx *batch_ctx) = 0; + virtual int close() = 0; + + virtual void set_meta(const RowMeta* row_meta) = 0; + virtual int prepare_blk_for_write(ObTempBlockStore::Block *blk) = 0; + virtual int get_last_stored_row(const ObChunkDatumStore::StoredRow *&sr) = 0; + +protected: + inline int64_t get_size() + { + int64_t size = 0; + if (OB_NOT_NULL(cur_blk_)) { + size = cur_blk_->get_buffer()->head_size(); + } + return size; + } + inline int64_t get_remain() + { + int64_t remain_size = 0; + if (OB_NOT_NULL(cur_blk_)) { + remain_size = cur_blk_->get_buffer()->remain(); + } + return remain_size; + } + + inline char *get_cur_buf() + { + char *res = nullptr; + if (OB_NOT_NULL(cur_blk_)) { + res = cur_blk_->get_buffer()->head(); + } + return res; + } + inline int advance(int64_t size) + { + int ret = OB_SUCCESS; + if (OB_NOT_NULL(cur_blk_)) { + last_row_pos_ = cur_blk_->get_buffer()->head(); + cur_blk_->cnt_ += 1; + store_->inc_block_id_cnt(1); + ret = cur_blk_->get_buffer()->advance(size); + } + return ret; + } + + inline bool is_overflow(uint64_t size) + { + return size > get_remain(); + } + + inline const char *get_last_row() { return last_row_pos_; } + +protected: + ObTempBlockStore *store_; + bool inited_; + ObTempBlockStore::Block* cur_blk_; + const char *last_row_pos_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_BLOCK_IWRITER_H_ diff --git a/src/sql/engine/basic/chunk_store/ob_chunk_block.cpp b/src/sql/engine/basic/chunk_store/ob_chunk_block.cpp new file mode 100644 index 000000000..90d865536 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_chunk_block.cpp @@ -0,0 +1,230 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_macro_utils.h" +#include "sql/engine/basic/ob_temp_block_store.h" +#include "src/storage/ddl/ob_direct_load_struct.h" + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + +int RowMeta::init(const ObExprPtrIArray &exprs, const int32_t extra_size) +{ + int ret = OB_SUCCESS; + if (extra_size < 0 || exprs.count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("argument is INVALID", K(ret), K(exprs), K(extra_size)); + } else { + col_cnt_ = exprs.count(); + extra_size_ = extra_size; + fixed_cnt_ = 0; + fixed_offsets_ = NULL; + projector_ = NULL; + var_data_off_ = 0; + + if (OB_FAIL(column_length_.prepare_allocate(exprs.count()))) { + LOG_WARN("fail to prepare allocate column_length", K(ret), K(exprs.count())); + } else if (OB_FAIL(column_offset_.prepare_allocate(exprs.count()))) { + LOG_WARN("fail to prepare allocate column_offset", K(ret), K(exprs.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i++) { + ObExpr *e = exprs.at(i); + if (OB_ISNULL(e)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer", K(ret)); + } else if (is_fixed_length(e->datum_meta_.type_)) { + int16_t len = get_type_fixed_length(e->datum_meta_.type_); + column_length_.at(i) = len; + column_offset_.at(i) = var_data_off_; + var_data_off_ += len; + fixed_cnt_++; + } else { + column_length_.at(i) = 0; + column_offset_.at(i) = 0; + } + } + } + if (OB_SUCC(ret)) { + nulls_off_ = 0; + var_offsets_off_ = nulls_off_ + ObBitVector::memory_size(col_cnt_); + extra_off_ = var_offsets_off_ + get_var_col_cnt() * sizeof(int32_t); + fix_data_off_ = extra_off_ + extra_size_; + } + LOG_INFO("successfully init row meta", K(fixed_cnt_), K(col_cnt_), K(var_data_off_), K(column_offset_), K(fix_data_off_)); + } + + return ret; +} + +int RowMeta::init(const ObIArray &col_array, const int32_t extra_size) +{ + int ret = OB_SUCCESS; + if (extra_size < 0 || col_array.count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("argument is INVALID", K(ret), K(col_array), K(extra_size)); + } else { + col_cnt_ = col_array.count(); + extra_size_ = extra_size; + fixed_cnt_ = 0; + fixed_offsets_ = NULL; + projector_ = NULL; + var_data_off_ = 0; + + if (OB_FAIL(column_length_.prepare_allocate(col_array.count()))) { + LOG_WARN("fail to prepare allocate column_length", K(ret), K(col_array.count())); + } else if (OB_FAIL(column_offset_.prepare_allocate(col_array.count()))) { + LOG_WARN("fail to prepare allocate column_offset", K(ret), K(col_array.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < col_array.count(); i++) { + if (!col_array.at(i).is_valid_) { + // the multiversion column and snapshot column; + column_length_.at(i) = 8; + column_offset_.at(i) = var_data_off_; + var_data_off_ += 8; + fixed_cnt_++; + } else if (is_fixed_length(col_array.at(i).col_type_.get_type())) { + int16_t len = get_type_fixed_length(col_array.at(i).col_type_.get_type()); + column_length_.at(i) = len; + column_offset_.at(i) = var_data_off_; + var_data_off_ += len; + fixed_cnt_++; + } else { + column_length_.at(i) = 0; + column_offset_.at(i) = 0; + } + } + } + if (OB_SUCC(ret)) { + nulls_off_ = 0; + var_offsets_off_ = nulls_off_ + ObBitVector::memory_size(col_cnt_); + extra_off_ = var_offsets_off_ + get_var_col_cnt() * sizeof(int32_t); + fix_data_off_ = extra_off_ + extra_size_; + } + LOG_INFO("successfully init row meta", K(fixed_cnt_), K(col_cnt_), K(var_data_off_), K(column_offset_), K(fix_data_off_)); + } + + return ret; +} + +OB_DEF_SERIALIZE(RowMeta) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_ENCODE, + col_cnt_, + extra_size_, + fixed_cnt_, + nulls_off_, + var_offsets_off_, + extra_off_, + fix_data_off_, + var_data_off_, + column_length_, + column_offset_); + return ret; +} + + +OB_DEF_DESERIALIZE(RowMeta) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_DECODE, + col_cnt_, + extra_size_, + fixed_cnt_, + nulls_off_, + var_offsets_off_, + extra_off_, + fix_data_off_, + var_data_off_, + column_length_, + column_offset_); + return ret; +} + +OB_DEF_SERIALIZE_SIZE(RowMeta) +{ + int64_t len = 0; + LST_DO_CODE(OB_UNIS_ADD_LEN, + col_cnt_, + extra_size_, + fixed_cnt_, + nulls_off_, + var_offsets_off_, + extra_off_, + fix_data_off_, + var_data_off_, + column_length_, + column_offset_); + return len; +} + + +int WriterBufferHandler::write_data(char *data, const int64_t data_size) +{ + int ret = OB_SUCCESS; + if (data_size > buf_size_ - cur_pos_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data size shouldn't be large than reamain size", K(ret)); + } else { + MEMCPY(buf_, data, data_size); + cur_pos_ += data_size; + } + + return ret; +} + +int WriterBufferHandler::init(const int64_t buf_size) +{ + return resize(buf_size); +} + +int WriterBufferHandler::resize(const int64_t size) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(store_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the allocator sholdn't be null", K(ret)); + } else { + free_buffer(); + if (OB_SUCC(ret)) { + buf_ = static_cast(store_->alloc(size)); + if (OB_ISNULL(buf_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret), K(size)); + } else { + buf_size_ = size; + cur_pos_ = 0; + row_cnt_ = 0; + } + } + } + return ret; +} + +void WriterBufferHandler::free_buffer() +{ + if (OB_NOT_NULL(buf_) && OB_NOT_NULL(store_)) { + store_->free(buf_, buf_size_); + buf_ = nullptr; + } +} + +} +} \ No newline at end of file diff --git a/src/sql/engine/basic/chunk_store/ob_chunk_block.h b/src/sql/engine/basic/chunk_store/ob_chunk_block.h new file mode 100644 index 000000000..ca65cd413 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_chunk_block.h @@ -0,0 +1,173 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_CHUNK_BLOCK_H_ +#define OCEANBASE_BASIC_OB_CHUNK_BLOCK_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/basic/ob_temp_block_store.h" +#include "share/schema/ob_column_schema.h" + +namespace oceanbase +{ +namespace storage { + class ObColumnSchemaItem; +} +namespace sql +{ + +struct RowHeader final +{ +public: + RowHeader() : offset_len_(4), has_null_(false), reserved_(0) {} +public: + uint32_t row_size_; + union { + struct { + uint32_t offset_len_ : 3; + uint32_t has_null_ : 1; + uint32_t reserved_ : 28; + }; + uint32_t flag_; + }; +}; + +struct RowMeta final +{ + OB_UNIS_VERSION_V(1); +public: + RowMeta(common::ObIAllocator &alloc) : allocator_(alloc), col_cnt_(0), extra_size_(0), + fixed_cnt_(0), fixed_offsets_(NULL), projector_(NULL), + nulls_off_(0), var_offsets_off_(0), extra_off_(0), + fix_data_off_(0), var_data_off_(0), + column_length_(alloc), column_offset_(alloc) + { + } + int init(const ObExprPtrIArray &exprs, const int32_t extra_size); + int init(const ObIArray &col_array, const int32_t extra_size); + int32_t get_row_fixed_size() const { return sizeof(RowHeader) + var_data_off_; } + int32_t get_var_col_cnt() const { return col_cnt_ - fixed_cnt_; } + int32_t get_fixed_length(const int64_t idx) const + { + return fixed_offsets_[idx + 1] - fixed_offsets_[idx]; + } + + // todo: make allocator as pointer and free memory when row_meta destroyed + +public: + common::ObIAllocator &allocator_; + int32_t col_cnt_; + int32_t extra_size_; + + int32_t fixed_cnt_; + int32_t *fixed_offsets_; + + int32_t *projector_; + + // start pos of those offset is payload + int32_t nulls_off_; + int32_t var_offsets_off_; + int32_t extra_off_; + int32_t fix_data_off_; + int32_t var_data_off_; + + ObFixedArray column_length_; + ObFixedArray column_offset_; +}; + +class WriterBufferHandler final +{ + // used for DefaultWriter and CompactWriter. + // provide API to add data to buffer, and manage memory; +public: + WriterBufferHandler(ObTempBlockStore *store = nullptr) : store_(store), buf_(nullptr), buf_size_(0), cur_pos_(0), row_cnt_(0), inner_alloc_() {}; + virtual ~WriterBufferHandler() { reset(); }; + + int init(const int64_t buf_size); + int resize(const int64_t size); + + int write_data(char *data, const int64_t data_size); + + void reuse() + { + MEMSET(buf_, 0, buf_size_); + cur_pos_ = 0; + row_cnt_ = 0; + } + + void reset() + { + free_buffer(); + buf_size_ = 0; + cur_pos_ = 0; + row_cnt_ = 0; + store_ = nullptr; + } + + void free_buffer(); + int64_t get_remain() { return buf_size_ - cur_pos_; } + char* get_buf() { return buf_; } + char* get_cur_data() { return buf_ + cur_pos_; } + int64_t& get_cur_pos() { return cur_pos_; } + int64_t get_size() { return cur_pos_; } + int64_t get_max_size() { return buf_size_; } + int64_t get_row_cnt() { return row_cnt_; } + bool is_empty() { return cur_pos_ == 0; } + inline int advance(const int64_t size) + { + int ret = common::OB_SUCCESS; + if (size < -cur_pos_) { + //overflow + ret = common::OB_INVALID_ARGUMENT; + SQL_ENG_LOG(WARN, "invalid argument", K(size), K_(cur_pos)); + } else if (size > get_remain()) { + ret = common::OB_BUF_NOT_ENOUGH; + SQL_ENG_LOG(WARN, "buffer not enough", K(size), "remain", get_remain()); + } else { + cur_pos_ += size; + row_cnt_++; + } + return ret; + }; + +private: + ObTempBlockStore *store_; + char * buf_; + int64_t buf_size_; + int64_t cur_pos_; + int64_t row_cnt_; + ObArenaAllocator inner_alloc_; +}; + +struct BatchCtx final +{ +public: + BatchCtx() : datums_(nullptr), stored_rows_(nullptr), row_size_array_(nullptr), selector_(nullptr) {} + ~BatchCtx() {} +public: + const ObDatum **datums_; + ObChunkDatumStore::StoredRow **stored_rows_; + uint32_t *row_size_array_; + uint16_t *selector_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_CHUNK_BLOCK_H_ diff --git a/src/sql/engine/basic/chunk_store/ob_chunk_block_compressor.cpp b/src/sql/engine/basic/chunk_store/ob_chunk_block_compressor.cpp new file mode 100644 index 000000000..6bb3aaa8d --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_chunk_block_compressor.cpp @@ -0,0 +1,128 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/basic/chunk_store/ob_chunk_block_compressor.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_macro_utils.h" + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + +ObChunkBlockCompressor::ObChunkBlockCompressor() + : compressor_type_(NONE_COMPRESSOR), + compressor_(NULL) +{ +} + +ObChunkBlockCompressor::~ObChunkBlockCompressor() +{ + reset(); +} + +void ObChunkBlockCompressor::reset() +{ + compressor_type_ = NONE_COMPRESSOR; + if (compressor_ != nullptr) { + compressor_->reset_mem(); + compressor_ = nullptr; + } +} + +int ObChunkBlockCompressor::init(const ObCompressorType comptype) +{ + int ret = OB_SUCCESS; + reset(); + + if (comptype == NONE_COMPRESSOR) { + } else if (OB_FAIL(ObCompressorPool::get_instance().get_compressor(comptype, compressor_))) { + STORAGE_LOG(WARN, "Fail to get compressor, ", K(ret), K(comptype)); + } else { + compressor_type_ = comptype; + } + return ret; +} + +int ObChunkBlockCompressor::calc_need_size(int64_t in_size, int64_t &need_size) +{ + int ret = OB_SUCCESS; + int64_t max_overflow_size = 0; + need_size = 0; + if (compressor_type_ == NONE_COMPRESSOR) { + need_size = in_size; + } else if (OB_ISNULL(compressor_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "compressor is unexpected null", K(ret), K_(compressor)); + } else if (OB_FAIL(compressor_->get_max_overflow_size(in_size, max_overflow_size))) { + STORAGE_LOG(WARN, "fail to get max_overflow_size, ", K(ret), K(in_size)); + } else { + need_size = max_overflow_size + in_size; + } + + return ret; +} + +int ObChunkBlockCompressor::compress(const char *in, const int64_t in_size, const int64_t max_comp_size, + char *out, int64_t &out_size) +{ + int ret = OB_SUCCESS; + if (compressor_type_ == NONE_COMPRESSOR) { + MEMCPY(out, in, in_size); + out_size = in_size; + } else if (OB_ISNULL(compressor_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "compressor is unexpected null", K(ret), K_(compressor)); + } else { + int64_t comp_size = 0; + if (OB_FAIL(compressor_->compress(in, in_size, out, max_comp_size, comp_size))) { + STORAGE_LOG(WARN, "compressor fail to compress.", K(in), K(in_size), + "comp_ptr", out, K(max_comp_size), K(comp_size)); + } else if (comp_size >= in_size) { + STORAGE_LOG(TRACE, "compressed_size is larger than origin_size", + K(comp_size), K(in_size)); + MEMCPY(out, in, in_size); + out_size = in_size; + } else { + out_size = comp_size; + } + } + return ret; +} + +int ObChunkBlockCompressor::decompress(const char *in, const int64_t in_size, + const int64_t uncomp_size, + char *out, int64_t &out_size) +{ + int ret = OB_SUCCESS; + int64_t decomp_size = 0; + if ((compressor_type_ == NONE_COMPRESSOR) || in_size == uncomp_size) { + MEMCPY(out, in, in_size); + out_size = in_size; + } else if (OB_ISNULL(compressor_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "compressor is unexpected null", K(ret), K_(compressor)); + } else if (OB_FAIL(compressor_->decompress(in, in_size, out, uncomp_size, + decomp_size))) { + STORAGE_LOG(WARN, "failed to decompress data", K(ret), K(in_size), K(uncomp_size)); + } else { + out_size = decomp_size; + } + return ret; +} + +} +} \ No newline at end of file diff --git a/src/sql/engine/basic/chunk_store/ob_chunk_block_compressor.h b/src/sql/engine/basic/chunk_store/ob_chunk_block_compressor.h new file mode 100644 index 000000000..15113c931 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_chunk_block_compressor.h @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_CHUNK_BLOCK_COMPRESSOR_H_ +#define OCEANBASE_BASIC_OB_CHUNK_BLOCK_COMPRESSOR_H_ + +#include "lib/compress/ob_compressor_pool.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObChunkBlockCompressor final +{ +public: + ObChunkBlockCompressor(); + virtual ~ObChunkBlockCompressor(); + void reset(); + int init(const ObCompressorType type); + int compress(const char *in, const int64_t in_size, const int64_t max_comp_size, + char *out, int64_t &out_size); + int decompress(const char *in, const int64_t in_size, const int64_t uncomp_size, + char *out, int64_t &out_size); + ObCompressorType get_compressor_type() { return compressor_type_; } + + int calc_need_size(int64_t in_size, int64_t &need_size); +private: + common::ObCompressorType compressor_type_; + common::ObCompressor *compressor_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif diff --git a/src/sql/engine/basic/chunk_store/ob_compact_block_reader.cpp b/src/sql/engine/basic/chunk_store/ob_compact_block_reader.cpp new file mode 100644 index 000000000..5793e924b --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_compact_block_reader.cpp @@ -0,0 +1,215 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/basic/chunk_store/ob_compact_block_reader.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/container/ob_bitmap.h" +#include "sql/engine/ob_bit_vector.h" + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + +int ObCompactBlockReader::CurRowInfo::init(const RowMeta *row_meta, const uint8_t offset_width, const char *buf) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row_meta) || OB_ISNULL(buf)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer", K(ret), KP(row_meta), KP(buf)); + } else if (offset_width != BASE_OFFSET_SIZE && offset_width != EXTENDED_OFFSET_SIZE) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("offset_width is invalid", K(ret), K(offset_width)); + } else { + buf_ = buf; + row_size_ = reinterpret_cast(buf); + // the n-th var_column in offset array. + cur_var_offset_pos_ = 0; + cur_data_ptr_ = sizeof(ObDatum) * row_meta->col_cnt_ + sizeof(ObChunkDatumStore::StoredRow); + var_column_cnt_ = row_meta->col_cnt_ - row_meta->fixed_cnt_; + int64_t var_offset_size = offset_width * (var_column_cnt_ + 1); + bitmap_size_ = sql::ObBitVector::memory_size(row_meta->col_cnt_); + bit_vec_ = sql::to_bit_vector(buf + HEAD_SIZE); + data_offset_ = HEAD_SIZE + bitmap_size_ + var_offset_size; // the start of fixed data buffer + fix_offset_ = data_offset_; + var_col_end_offset_ = *row_size_ - data_offset_; + } + return ret; +} + +int ObCompactBlockReader::get_row(const ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + sr = nullptr; + if (OB_ISNULL(cur_blk_) || OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur block or row_meta is null", K(ret), KP(cur_blk_), KP(row_meta_)); + } else if (!blk_has_next_row()) { + ret = OB_ITER_END; + } else if (cur_pos_in_blk_ > cur_blk_->raw_size_ - sizeof(ObTempBlockStore::Block)) { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("invalid index", K(ret), K(cur_pos_in_blk_), KP(cur_blk_)); + } else { + int64_t size = 0; + sr = nullptr; + ObChunkDatumStore::StoredRow *tmp_sr = nullptr; + if (OB_FAIL(get_stored_row_size(size))) { + LOG_WARN("fail to get stored row size", K(ret)); + } else if (OB_FAIL(alloc_stored_row(tmp_sr, size))) { + LOG_WARN("fail to alloc space for stored row", K(ret)); + } else if (OB_ISNULL(tmp_sr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the sr is null", K(ret)); + } else if (cur_row_offset_width_ == BASE_OFFSET_SIZE && OB_FAIL(get_stored_row(tmp_sr))){ + LOG_WARN("fail to get stored row", K(ret)); + } else if (cur_row_offset_width_ == EXTENDED_OFFSET_SIZE && OB_FAIL(get_stored_row(tmp_sr))) { + LOG_WARN("fail to get stored row", K(ret)); + } else { + sr = tmp_sr; + } + } + return ret; +} + + + +template +int ObCompactBlockReader::get_stored_row(ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row meta shouldn't be null", K(ret), KP(row_meta_)); + } else if (OB_FAIL(row_info_.init(row_meta_, cur_row_offset_width_, &cur_blk_->payload_[cur_pos_in_blk_]))) { + LOG_WARN("fail to init row info", K(ret)); + } else if (OB_ISNULL(row_info_.buf_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buf is null", K(ret)); + } else { + const T *offset_array = reinterpret_cast(row_info_.buf_ + HEAD_SIZE + row_info_.bitmap_size_); + for (int64_t i = 0; OB_SUCC(ret) && i < row_meta_->col_cnt_; i++) { + ObDatum *cur_datum = reinterpret_cast(sr->payload_ + i * sizeof(ObDatum)); + uint32_t len = 0; + char *tmp_ptr = reinterpret_cast(sr) + row_info_.cur_data_ptr_; + if (row_info_.bit_vec_->at(i)) { + cur_datum->set_null(); + if (row_meta_->column_length_[i] == 0) { + row_info_.cur_var_offset_pos_ ++; + } + } else if (row_meta_->column_length_[i] == 0) { + T offset = offset_array[row_info_.cur_var_offset_pos_]; + if (row_info_.cur_var_offset_pos_ < row_info_.var_column_cnt_) { + len = offset_array[row_info_.cur_var_offset_pos_ + 1] - offset; + } else { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("the var column idx in out of range", K(ret)); + } + if (OB_SUCC(ret)) { + // set datum->len_, use the pack_ to conver the NULL_FLAG + cur_datum->pack_ = len; + // set data + MEMCPY(tmp_ptr, row_info_.buf_ + row_info_.fix_offset_ + offset, len); + // set datum->ptr_ + cur_datum->ptr_ = tmp_ptr; + row_info_.cur_var_offset_pos_ ++; + } + } else { + int32_t offset = row_meta_->column_offset_[i]; + len = row_meta_->column_length_[i]; + cur_datum->pack_ = len; + MEMCPY(tmp_ptr, row_info_.buf_ + row_info_.fix_offset_ + offset, len); + cur_datum->ptr_ = tmp_ptr; + } + row_info_.cur_data_ptr_ += len; + } + if (OB_SUCC(ret)) { + sr->cnt_ = row_meta_->col_cnt_; + sr->row_size_ = row_info_.cur_data_ptr_; + cur_pos_in_blk_ += *row_info_.row_size_; + cur_row_in_blk_++; + } + } + return ret; +} + +int ObCompactBlockReader::alloc_stored_row(ObChunkDatumStore::StoredRow *&sr, const int64_t size) +{ + int ret = OB_SUCCESS; + if (size <= sr_size_ ) { + } else { + if (OB_NOT_NULL(sr_buffer_) && OB_NOT_NULL(store_)) { + store_->free(sr_buffer_, sr_size_); + sr_buffer_ = nullptr; + } + sr_buffer_ = static_cast(store_->alloc(size)); + if (OB_ISNULL(sr_buffer_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret)); + } else { + sr_size_ = size; + } + } + if (OB_SUCC(ret)) { + MEMSET(sr_buffer_, 0, sr_size_); + sr = reinterpret_cast(sr_buffer_); + } + return ret; +} + +int ObCompactBlockReader::get_stored_row_size(int64_t &size) +{ + int ret = OB_SUCCESS; + size = 0; + if (OB_ISNULL(cur_blk_) || OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur block or row_meta is null", K(ret), KP(cur_blk_), KP(row_meta_)); + } else if (OB_FAIL(ObCompactBlockReader::calc_stored_row_size(&cur_blk_->payload_[cur_pos_in_blk_], row_meta_, size))) { + LOG_WARN("fail to get stored row size", K(ret)); + } else { + cur_row_offset_width_ = *reinterpret_cast(cur_blk_->payload_ + cur_pos_in_blk_ + sizeof(uint32_t)); + } + return ret; +} + +int ObCompactBlockReader::calc_stored_row_size(const char *compact_row, const RowMeta *row_meta, int64_t &size) +{ + int ret = OB_SUCCESS; + size = 0; + if (OB_ISNULL(compact_row) || OB_ISNULL(row_meta)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur block or row_meta is null", K(ret), KP(compact_row), KP(row_meta)); + } else { + int64_t bit_map_size = sql::ObBitVector::memory_size(row_meta->col_cnt_); + uint32_t row_size = *reinterpret_cast(compact_row); + int64_t offset_width = *reinterpret_cast(compact_row + sizeof(uint32_t)); + int64_t var_column_cnt = row_meta->col_cnt_ - row_meta->fixed_cnt_; + if (offset_width != BASE_OFFSET_SIZE && offset_width != EXTENDED_OFFSET_SIZE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the offset width is unexpected", K(ret)); + } + if (OB_SUCC(ret)) { + size = row_size - HEAD_SIZE - bit_map_size - offset_width * (var_column_cnt + 1)\ + + row_meta->col_cnt_ * sizeof(ObDatum) + sizeof(ObChunkDatumStore::StoredRow); + + } + } + return ret; +} + +} +} diff --git a/src/sql/engine/basic/chunk_store/ob_compact_block_reader.h b/src/sql/engine/basic/chunk_store/ob_compact_block_reader.h new file mode 100644 index 000000000..2680f25a2 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_compact_block_reader.h @@ -0,0 +1,158 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_COMPACT_BLOCK_READER_H_ +#define OCEANBASE_BASIC_OB_COMPACT_BLOCK_READER_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/chunk_store/ob_block_ireader.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObTempBlockStore; +class RowMeta; +/* + * compact row format + * +----------+--------------+-------------+-------------------+------------+---------+ + * | row_size | offset_width | null_bitmap | var_column_offset | fixed_data | var_data| + * +----------+--------------+-------------+-------------------+------------+---------+ + * offset_width: the width of offset in var_column_offset (e.g., 2 bytes /4 bytes), + * 4 bytes is for long row. + * null_bitmap: mark wether the i-th datum is null. equal to datum->desc.null_ + * + * to get i-th datum(fixed data): 1. get offset by row_meta.column_offset[i]. + * 2. use the offset to get fixed data ( ObDatum *datum = fixed_data + offset) + * to get i-th datum(var_data): 1. find the var_data is the j-th var_data. + * 2. get offset in var_column_offset. (T *offset = var_column_offset + j * offset_width) + * T is int16_ or int32_t. + * 3. use the offset to get datum from var_data. (datum = var_data + offset) + */ +class ObCompactBlockReader final : public ObBlockIReader +{ + static const int HEAD_SIZE = 5; + static const int BASE_OFFSET_SIZE = 2; + static const int EXTENDED_OFFSET_SIZE = 4; + struct CurRowInfo final + { + public: + CurRowInfo() : buf_(nullptr), row_size_(nullptr), var_column_cnt_(0), cur_var_offset_pos_(0), bitmap_size_(0), + bit_vec_(nullptr), data_offset_(0), fix_offset_(0), var_col_end_offset_(0) {} + + ~CurRowInfo() {reset();} + int init(const RowMeta *row_meta, const uint8_t offset_width, const char *buf); + + void reset() + { + buf_ = nullptr; + row_size_ = nullptr; + cur_var_offset_pos_ = 0; + cur_data_ptr_ = 0; + var_column_cnt_ = 0; + bitmap_size_ = 0; + bit_vec_ = nullptr; + data_offset_ = 0; + fix_offset_ = 0; + var_col_end_offset_ = 0; + } + TO_STRING_KV(K_(cur_var_offset_pos), K_(var_column_cnt), K_(bitmap_size), + K_(cur_data_ptr), K_(data_offset), K_(fix_offset)); + + public: + const char *buf_; + const uint32_t *row_size_; + int64_t cur_data_ptr_; + int64_t var_column_cnt_; + int64_t cur_var_offset_pos_; // the i-th in the var_array + int64_t bitmap_size_; + + // Use BitVector to set the result of filter here, because the memory of ObBitMap is not continuous + // null_bitmap + const sql::ObBitVector *bit_vec_ = nullptr; + int64_t data_offset_; // the start of fixed data buffer. + int64_t fix_offset_; + int64_t var_col_end_offset_; + }; + + +public: + ObCompactBlockReader(ObTempBlockStore *store = nullptr) : ObBlockIReader(store), row_meta_(nullptr), + sr_buffer_(nullptr), sr_size_(0), cur_row_offset_width_(0), + cur_pos_in_blk_(0), cur_row_in_blk_(0) {}; + ObCompactBlockReader(ObTempBlockStore *store, const RowMeta *row_meta) : ObBlockIReader(store), row_meta_(row_meta), + sr_buffer_(nullptr), sr_size_(0), cur_row_offset_width_(0), + cur_pos_in_blk_(0), cur_row_in_blk_(0) {}; + virtual ~ObCompactBlockReader() { reset(); }; + + void reset() + { + cur_pos_in_blk_ = 0; + if (OB_NOT_NULL(sr_buffer_)) { + store_->free(sr_buffer_, sr_size_); + sr_buffer_ = nullptr; + } + sr_size_ = 0; + cur_row_in_blk_ = 0; + cur_row_offset_width_ = 0; + row_meta_ = nullptr; + } + + void reuse() + { + cur_pos_in_blk_ = 0; + if (OB_NOT_NULL(sr_buffer_)) { + MEMSET(sr_buffer_, 0, sr_size_); + } + cur_row_in_blk_ = 0; + cur_row_offset_width_ = 0; + row_info_.reset(); + } + + int get_row(const ObChunkDatumStore::StoredRow *&sr) override; + + void set_meta(const RowMeta *row_meta) { row_meta_ = row_meta; }; + const RowMeta *get_meta() { return row_meta_; } + virtual int prepare_blk_for_read(ObTempBlockStore::Block *blk) final override { return OB_SUCCESS; } + +public: + static int calc_stored_row_size(const char *compact_row, const RowMeta *row_meta, int64_t &size); + +private: + inline bool blk_has_next_row() { return cur_blk_ != NULL && cur_blk_->cnt_ > cur_row_in_blk_; } + int get_stored_row_size(int64_t &size); + template + int get_stored_row(ObChunkDatumStore::StoredRow *&sr); + + int alloc_stored_row(ObChunkDatumStore::StoredRow *&sr, const int64_t size); + +private: + const RowMeta *row_meta_; + char *sr_buffer_; + int64_t sr_size_; + int64_t cur_row_offset_width_; + int64_t cur_pos_in_blk_; + int64_t cur_row_in_blk_; + CurRowInfo row_info_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_COMPACT_BLOCK_READER_H_ diff --git a/src/sql/engine/basic/chunk_store/ob_compact_block_writer.cpp b/src/sql/engine/basic/chunk_store/ob_compact_block_writer.cpp new file mode 100644 index 000000000..b820e4ffd --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_compact_block_writer.cpp @@ -0,0 +1,584 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/basic/chunk_store/ob_compact_block_writer.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/container/ob_bitmap.h" +#include "sql/engine/ob_bit_vector.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + + +int ObCompactBlockWriter::CurRowInfo::init(const RowMeta *row_meta, const uint8_t offset_width, char* buf) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row_meta) || OB_ISNULL(buf)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer", K(ret), KP(row_meta), KP(buf)); + } else if (offset_width != BASE_OFFSET_SIZE && offset_width != EXTENDED_OFFSET_SIZE) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("offset_width is invalid", K(ret), K(offset_width)); + } else { + buf_ = buf; + cur_var_offset_pos_ = 0; + var_column_cnt_ = row_meta->col_cnt_ - row_meta->fixed_cnt_; + bitmap_size_ = sql::ObBitVector::memory_size(row_meta->col_cnt_); + bit_vec_ = sql::to_bit_vector(buf_ + HEAD_SIZE); + bit_vec_->reset(row_meta->col_cnt_); + data_offset_ = HEAD_SIZE + bitmap_size_ + (var_column_cnt_ + 1) * offset_width; // the start of fixed data buffer + var_offset_ = data_offset_ + row_meta->var_data_off_; + } + return ret; +} + +void ObCompactBlockWriter::CurRowInfo::reset() +{ + buf_ = nullptr; + bit_vec_ = nullptr; + var_column_cnt_ = 0; + cur_var_offset_pos_ = 0; + bitmap_size_ = 0; + data_offset_ = 0; + var_offset_ = 0; +} + +int ObCompactBlockWriter::add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(ensure_init())) { + LOG_WARN("fail to ensure init", K(ret)); + } else if (OB_FAIL(ensure_write(exprs, ctx))) { + LOG_WARN("fail to ensure write", K(ret)); + } else { + if ((cur_row_offset_width_ == BASE_OFFSET_SIZE) && + OB_FAIL(inner_add_row(exprs, ctx))) { + LOG_WARN("fail to add row", K(ret)); + } else if ((cur_row_offset_width_ == EXTENDED_OFFSET_SIZE) && + OB_FAIL(inner_add_row(exprs, ctx))) { + LOG_WARN("fail to add row", K(ret)); + } + } + return ret; +} + +int ObCompactBlockWriter::add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ensure_init())) { + LOG_WARN("fail to ensure init", K(ret)); + } else if (OB_FAIL(ensure_write(src_sr))) { + LOG_WARN("fail to ensure write", K(ret)); + } else{ + if ((cur_row_offset_width_ == BASE_OFFSET_SIZE) && + OB_FAIL(inner_build_from_stored_row(src_sr))) { + LOG_WARN("fail to build from stored row", K(ret)); + } else if ((cur_row_offset_width_ == EXTENDED_OFFSET_SIZE) && + OB_FAIL(inner_build_from_stored_row(src_sr))) { + LOG_WARN("fail to build from stored row", K(ret)); + } + } + + return ret; +} + + +int ObCompactBlockWriter::add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ensure_init())) { + LOG_WARN("fail to ensure init", K(ret)); + } else if (OB_FAIL(ensure_write(storage_datums, cnt, extra_size))) { + LOG_WARN("fail to ensure write", K(ret)); + } else { + if ((cur_row_offset_width_ == BASE_OFFSET_SIZE) && + OB_FAIL(inner_add_row(storage_datums, cnt, extra_size, stored_row))) { + LOG_WARN("add row to block failed", K(ret), K(storage_datums), K(cnt), K(extra_size)); + } else if (cur_row_offset_width_ == EXTENDED_OFFSET_SIZE && + OB_FAIL(inner_add_row(storage_datums, cnt, extra_size, stored_row))) { + LOG_WARN("add row to block failed", K(ret), K(storage_datums), K(cnt), K(extra_size)); + } + } + return ret; +} + +template +int ObCompactBlockWriter::inner_add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row meta is null", K(ret), KP(row_meta_)); + } else if (OB_FAIL(row_info_.init(row_meta_, sizeof(T), get_cur_buf()))) { + LOG_WARN("fail to init row info", K(ret)); + } else if (OB_ISNULL(row_info_.buf_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buf is null", K(ret)); + } else { + T *var_offset_array = reinterpret_cast(row_info_.buf_ + HEAD_SIZE + row_info_.bitmap_size_); + for (int64_t i = 0; OB_SUCC(ret) && i < cnt; i++) { + if (OB_FAIL(inner_process_datum(storage_datums[i], i, *row_meta_, row_info_))) { + LOG_WARN("fail to process datum", K(ret)); + } + } + if (OB_SUCC(ret)) { + // head + uint32_t row_size = row_info_.var_offset_; + MEMCPY(row_info_.buf_, &row_size, sizeof(row_size)); // row_size_ + MEMCPY(row_info_.buf_ + sizeof(row_size), &cur_row_offset_width_, sizeof(cur_row_offset_width_)); // the offset width + // last offset; + T tmp_offset = row_info_.var_offset_ - row_info_.data_offset_; + MEMCPY(var_offset_array + row_info_.cur_var_offset_pos_, &tmp_offset, sizeof(T)); + // payload + if (OB_FAIL(advance(row_size))) { + LOG_WARN("fail to advance buf", K(ret)); + } + } + } + + return ret; +} + +template +int ObCompactBlockWriter::inner_build_from_stored_row(const ObChunkDatumStore::StoredRow &sr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row meta is null", K(ret), KP(row_meta_)); + } else if (OB_FAIL(row_info_.init(row_meta_, sizeof(T), get_cur_buf()))) { + LOG_WARN("fail to init row info", K(ret)); + } else if (OB_ISNULL(row_info_.buf_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buf is null", K(ret)); + } else { + T *var_offset_array = reinterpret_cast(row_info_.buf_ + HEAD_SIZE + row_info_.bitmap_size_); + for (int64_t i = 0 ; OB_SUCC(ret) && i < sr.cnt_; i++) { + if (OB_FAIL(inner_process_datum(sr.cells()[i], i, *row_meta_, row_info_))) { + LOG_WARN("fail to process datum", K(ret)); + } + } + + if (OB_SUCC(ret)) { + // head + uint32_t row_size = row_info_.var_offset_; + MEMCPY(row_info_.buf_, &row_size, sizeof(row_size)); // row_size_ + MEMCPY(row_info_.buf_ + sizeof(row_size), &cur_row_offset_width_, sizeof(cur_row_offset_width_)); // the offset width + // last offset + T tmp_offset = row_info_.var_offset_ - row_info_.data_offset_; + MEMCPY(var_offset_array + row_info_.cur_var_offset_pos_, &tmp_offset, sizeof(T)); + // payload + if (OB_FAIL(advance(row_size))) { + LOG_WARN("fail to advance buf", K(ret)); + } + } + } + + return ret; +} + +// before call this function -- we need to ensure the size if enough. +template +int ObCompactBlockWriter::inner_add_row(const common::ObIArray &exprs, ObEvalCtx &ctx) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row meta is null", K(ret), KP(row_meta_)); + } else if (OB_FAIL(row_info_.init(row_meta_, sizeof(T), get_cur_buf()))) { + LOG_WARN("fail to init row info", K(ret)); + } else if (OB_ISNULL(row_info_.buf_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("buf is null", K(ret)); + } else { + T *var_offset_array = reinterpret_cast(row_info_.buf_ + HEAD_SIZE + row_info_.bitmap_size_); + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i++) { + ObExpr *expr = exprs.at(i); + ObDatum *in_datum = NULL; + if (OB_UNLIKELY(NULL == expr)) { + // Set datum to NULL for NULL expr + row_info_.bit_vec_->set(i); + if (row_meta_->column_length_[i] == 0) { + T tmp_offset = row_info_.var_offset_ - row_info_.data_offset_; + MEMCPY(var_offset_array + row_info_.cur_var_offset_pos_, &tmp_offset, sizeof(T)); + row_info_.cur_var_offset_pos_ ++; + } + } else if (OB_FAIL(expr->eval(ctx, in_datum))) { + LOG_WARN("expression evaluate failed", K(ret)); + } else if (OB_NOT_NULL(in_datum)) { + if (OB_FAIL(inner_process_datum(*in_datum, i, *row_meta_, row_info_))) { + LOG_WARN("fail to process datum", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + // head + uint32_t row_size = row_info_.var_offset_; + MEMCPY(row_info_.buf_, &row_size, sizeof(row_size)); // row_size_ + MEMCPY(row_info_.buf_ + sizeof(row_size), &cur_row_offset_width_, sizeof(cur_row_offset_width_)); // the offset width + // last offset + T tmp_offset = row_info_.var_offset_ - row_info_.data_offset_; + MEMCPY(var_offset_array + row_info_.cur_var_offset_pos_, &tmp_offset, sizeof(T)); + + // payload + if (OB_FAIL(advance(row_size))) { + LOG_WARN("fail to advance buf", K(ret)); + } + } + } + + return ret; +} + +template +int ObCompactBlockWriter::inner_process_datum(const ObDatum &src_datum, const int64_t cur_pos, + const RowMeta &row_meta, CurRowInfo &row_info) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row_info.buf_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer", K(ret)); + } else { + T *var_offset_array = reinterpret_cast(row_info.buf_ + HEAD_SIZE + row_info.bitmap_size_); + if (OB_ISNULL(row_info.bit_vec_) || cur_pos < 0 || cur_pos >= row_meta.col_cnt_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get unexpected null bitmap", K(ret)); + } else if (src_datum.is_null()) { + row_info.bit_vec_->set(cur_pos); + if (row_meta.column_length_[cur_pos] == 0) { + T tmp_offset = row_info.var_offset_ - row_info.data_offset_; + MEMCPY(var_offset_array + row_info.cur_var_offset_pos_, &tmp_offset, sizeof(T)); + row_info.cur_var_offset_pos_ ++; + } + } else if (row_meta.column_length_[cur_pos] == 0) { // the column is variable size; + if (row_info.cur_var_offset_pos_ >= row_info.var_column_cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the offset is out of range", K(ret), K(row_info)); + } else { + T tmp_offset = row_info.var_offset_ - row_info.data_offset_; + MEMCPY(var_offset_array + row_info.cur_var_offset_pos_, &tmp_offset, sizeof(T)); + MEMCPY(row_info.buf_ + row_info.var_offset_, src_datum.ptr_, src_datum.len_); + row_info.var_offset_ += src_datum.len_; + row_info.cur_var_offset_pos_ ++; + } + } else { // the column is fixed size; + if (src_datum.len_ != row_meta.column_length_[cur_pos]) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fixe data length do not match", K(ret), K(src_datum.len_), + K(row_meta.column_length_[cur_pos])); + } else { + MEMCPY(row_info.buf_ + row_info.data_offset_ + row_meta.column_offset_[cur_pos], src_datum.ptr_, + row_meta.column_length_[cur_pos]); + } + } + } + return ret; +} + +int ObCompactBlockWriter::get_row_stored_size(const common::ObIArray &exprs, ObEvalCtx &ctx, uint64_t &size) +{ + int ret = OB_SUCCESS; + const RowMeta &row_meta = *get_meta(); + int64_t bit_map_size = sql::ObBitVector::memory_size(row_meta.col_cnt_); + int64_t data_size = 0; + int64_t offset_size = 0; + int8_t tmp_offset_width = 0; // byte size of each offset in a row; + size = 0; + + ObExpr *expr = nullptr; + common::ObDatum *datum = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { + if (row_meta.column_length_[i] != 0) { + data_size += row_meta.column_length_[i]; + } else { + expr = exprs.at(i); + if (OB_ISNULL(expr)) { + } else if (OB_FAIL(expr->eval(ctx, datum))) { + SQL_ENG_LOG(WARN, "failed to eval expr datum", KPC(expr), K(ret)); + } else if (OB_ISNULL(datum)) { + ret = OB_ERR_UNEXPECTED; + SQL_ENG_LOG(WARN, "the datum is null", K(ret), KP(datum)); + } else if (!datum->is_null()) { + data_size += datum->len_; + } + } + } + + if (OB_SUCC(ret)) { + cur_row_offset_width_ = get_offset_width(data_size); + offset_size = (row_meta.col_cnt_ - row_meta.fixed_cnt_ + 1) * cur_row_offset_width_; + size = data_size + offset_size + bit_map_size + HEAD_SIZE; + + // offset_width_ is used in add_row + cur_row_size_ = size; + } + + return ret; +} + +int ObCompactBlockWriter::get_row_stored_size(const ObChunkDatumStore::StoredRow &sr, uint64_t &size) +{ + int ret = OB_SUCCESS; + const RowMeta &row_meta = *get_meta(); + int64_t bit_map_size = sql::ObBitVector::memory_size(row_meta.col_cnt_); + int64_t data_size = 0; + int64_t offset_size = 0; + size = 0; + + ObExpr *expr = nullptr; + common::ObDatum *datum = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < sr.cnt_; ++i) { + if (row_meta.column_length_[i] != 0) { + data_size += row_meta.column_length_[i]; + } else if (!sr.cells()[i].is_null()) { + data_size += sr.cells()[i].len_; + } + } + + if (OB_SUCC(ret)) { + cur_row_offset_width_ = get_offset_width(data_size); + offset_size = (row_meta.col_cnt_ - row_meta.fixed_cnt_ + 1) * cur_row_offset_width_; + size = data_size + offset_size + bit_map_size + HEAD_SIZE; + + // offset_width_ is used in add_row + cur_row_size_ = size; + } + return ret; +} + + +int ObCompactBlockWriter::get_row_stored_size(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, uint64_t &size) +{ + int ret = OB_SUCCESS; + int64_t head_size = sizeof(ObChunkDatumStore::StoredRow); + const RowMeta &row_meta = *get_meta(); + int64_t bit_map_size = sql::ObBitVector::memory_size(row_meta.col_cnt_); + int64_t datum_size = sizeof(ObDatum) * cnt; + int64_t data_size = 0; + int64_t offset_size = 0; + for (int64_t i = 0; i < cnt; ++i) { + if (row_meta.column_length_[i] != 0) { + data_size += row_meta.column_length_[i]; + } else { + if (!storage_datums[i].is_null()) { + data_size += storage_datums[i].len_; + } + } + } + cur_row_offset_width_ = get_offset_width(data_size); + offset_size = (row_meta.col_cnt_ - row_meta.fixed_cnt_ + 1) * cur_row_offset_width_; + size = HEAD_SIZE + bit_map_size + offset_size + extra_size + data_size; + return ret; +} + + +int ObCompactBlockWriter::close() +{ + int ret = OB_SUCCESS; + inited_ = false; + return ret; +} + +int ObCompactBlockWriter::ensure_write(const common::ObIArray &exprs, ObEvalCtx &ctx) +{ + int ret = OB_SUCCESS; + uint64_t row_size = 0; + if (OB_FAIL(get_row_stored_size(exprs, ctx, row_size))) { + LOG_WARN("fail to get row_size", K(exprs), K(ret)); + } else if (OB_FAIL(ensure_write(row_size))) { + LOG_WARN("fail ensure write", K(ret)); + } + return ret; +} + +int ObCompactBlockWriter::ensure_write(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size) +{ + int ret = OB_SUCCESS; + uint64_t row_size = 0; + if (OB_FAIL(get_row_stored_size(storage_datums, cnt, extra_size, row_size))) { + LOG_WARN("fail to get row_size", K(cnt), K(extra_size), K(row_size), K(ret)); + } else if (OB_FAIL(ensure_write(row_size))) { + LOG_WARN("fail to call inner ensure write", K(ret)); + } + + return ret; +} + +int ObCompactBlockWriter::ensure_write(const ObChunkDatumStore::StoredRow &sr) +{ + int ret = OB_SUCCESS; + uint64_t row_size = 0; + if (OB_FAIL(get_row_stored_size(sr, row_size))) { + LOG_WARN("fail to get row_size", K(sr), K(ret)); + } else if (OB_FAIL(ensure_write(row_size))) { + LOG_WARN("fail ensure write", K(ret)); + } + return ret; +} + +int ObCompactBlockWriter::ensure_write(const int64_t size) +{ + int ret = OB_SUCCESS; + if (is_overflow(size)) { + int64_t new_blk_size = size < DEFAULT_BUF_SIZE ? DEFAULT_BUF_SIZE : size; + ObTempBlockStore::Block *tmp_blk = nullptr; + if (OB_FAIL(store_->new_block(new_blk_size, tmp_blk, true))) { + LOG_WARN("fail to alloc block", K(ret)); + } else if (OB_ISNULL(tmp_blk)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc block", K(ret)); + } else { + cur_blk_ = tmp_blk; + } + } else { + // do nothing, directly add row to buffer. + } + + return ret; +} +int ObCompactBlockWriter::get_last_stored_row(const ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + const char *compact_row = get_last_row(); + int64_t size = 0; + ObChunkDatumStore::StoredRow *tmp_sr = nullptr; + sr = nullptr; + // convert from compact format to storedrow; + if (OB_ISNULL(compact_row) || OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null pointer", K(ret)); + } else if (OB_FAIL(inner_get_stored_row_size(compact_row, size))) { + LOG_WARN("fail to calc size", K(ret)); + } else if (size <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null pointer", K(ret)); + } else if (OB_ISNULL(tmp_sr = reinterpret_cast(store_->alloc(size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocal memory", K(ret)); + } else { + const int8_t offset_width = *reinterpret_cast(compact_row + sizeof(int32_t)); + if (offset_width == BASE_OFFSET_SIZE) { + if (OB_FAIL(convert_to_stored_row(compact_row, tmp_sr))) { + LOG_WARN("fail to convert", K(ret), K(size)); + } + } else if (offset_width == EXTENDED_OFFSET_SIZE) { + if (OB_FAIL(convert_to_stored_row(compact_row, tmp_sr))) { + LOG_WARN("fail to convert", K(ret), K(size)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected offset width", K(offset_width), K(ret)); + } + if (OB_FAIL(ret)) { + store_->free(tmp_sr, size); + } + } + if (OB_SUCC(ret)) { + sr = tmp_sr; + last_stored_row_ = tmp_sr; + last_sr_size_ = size; + } + return ret; +} + +int ObCompactBlockWriter::inner_get_stored_row_size(const char *compact_row, int64_t &size) +{ + int ret = OB_SUCCESS; + size = 0; + if (OB_ISNULL(row_meta_) || OB_ISNULL(compact_row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null pointer", K(ret)); + } else if (OB_FAIL(ObCompactBlockReader::calc_stored_row_size(compact_row, row_meta_, size))){ + LOG_WARN("fail to get stored row size", K(ret)); + } + + return ret; +} + +template +int ObCompactBlockWriter::convert_to_stored_row(const char *compact_row, ObChunkDatumStore::StoredRow *sr) +{ + //TODO DAISI: this function is similar to ObCompactBlockReader::get_stored_row + int ret = OB_SUCCESS; + if (OB_ISNULL(row_meta_) || OB_ISNULL((compact_row))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null pointer", K(ret), KP(row_meta_), KP(compact_row)); + } else { + const int64_t offset_width = sizeof(T); + const int32_t row_size = *reinterpret_cast(compact_row); + int64_t bitmap_size = sql::ObBitVector::memory_size(row_meta_->col_cnt_); + const sql::ObBitVector *bit_vec = sql::to_bit_vector(compact_row + HEAD_SIZE); + int64_t cur_var_offset_pos = 0; + const int64_t var_column_cnt = row_meta_->col_cnt_ - row_meta_->fixed_cnt_; + const int64_t data_offset = HEAD_SIZE + bitmap_size + (var_column_cnt + 1) * offset_width; // the start of fixed data buffer + int64_t cur_data_ptr = sizeof(ObDatum) * row_meta_->col_cnt_ + sizeof(ObChunkDatumStore::StoredRow); + const T *offset_array = reinterpret_cast(compact_row + HEAD_SIZE + bitmap_size); + for (int64_t i = 0; OB_SUCC(ret) && i < row_meta_->col_cnt_; i++) { + ObDatum *cur_datum = reinterpret_cast(sr->payload_ + i * sizeof(ObDatum)); + uint32_t len = 0; + char *tmp_ptr = reinterpret_cast(sr) + cur_data_ptr; + if (bit_vec->at(i)) { + cur_datum->set_null(); + if (row_meta_->column_length_[i] == 0) { + cur_var_offset_pos ++; + } + } else if (row_meta_->column_length_[i] == 0) { + const T offset = offset_array[cur_var_offset_pos]; + if (cur_var_offset_pos < var_column_cnt) { + len = offset_array[cur_var_offset_pos + 1] - offset; + } else { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("the var column idx in out of range", K(ret)); + } + if (OB_SUCC(ret)) { + // set datum->len_, use the pack_ to conver the NULL_FLAG + cur_datum->pack_ = len; + // set data + MEMCPY(tmp_ptr, compact_row + data_offset + offset, len); + // set datum->ptr_ + cur_datum->ptr_ = tmp_ptr; + cur_var_offset_pos ++; + } + } else { + const int32_t offset = row_meta_->column_offset_[i]; + len = row_meta_->column_length_[i]; + cur_datum->pack_ = len; + MEMCPY(tmp_ptr, compact_row + data_offset + offset, len); + cur_datum->ptr_ = tmp_ptr; + } + cur_data_ptr += len; + } + if (OB_SUCC(ret)) { + sr->cnt_ = row_meta_->col_cnt_; + sr->row_size_ = cur_data_ptr; + } + } + + return ret; +} + +} +} diff --git a/src/sql/engine/basic/chunk_store/ob_compact_block_writer.h b/src/sql/engine/basic/chunk_store/ob_compact_block_writer.h new file mode 100644 index 000000000..e60f3c7b6 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_compact_block_writer.h @@ -0,0 +1,182 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_COMPACT_BLOCK_WRITER_H_ +#define OCEANBASE_BASIC_OB_COMPACT_BLOCK_WRITER_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "sql/engine/basic/chunk_store/ob_block_iwriter.h" +#include "sql/ob_sql_define.h" +#include "sql/engine/basic/chunk_store/ob_compact_block_reader.h" + +namespace oceanbase +{ +namespace sql +{ + + +/* + * compact row format + * +----------+--------------+-------------+-------------------+------------+---------+ + * | row_size | offset_width | null_bitmap | var_column_offset | fixed_data | var_data| + * +----------+--------------+-------------+-------------------+------------+---------+ + * offset_width: the width of offset in var_column_offset (e.g., 2 bytes /4 bytes), + * 4 bytes is for long row. + * null_bitmap: mark wether the i-th datum is null. equal to datum->desc.null_ + * + * to get i-th datum(fixed data): 1. get offset by row_meta.column_offset[i]. + * 2. use the offset to get fixed data ( ObDatum *datum = fixed_data + offset) + * to get i-th datum(var_data): 1. find the var_data is the j-th var_data. + * 2. get offset in var_column_offset. (T *offset = var_column_offset + j * offset_width) + * T is int16_ or int32_t. + * 3. use the offset to get datum from var_data. (datum = var_data + offset) + */ +class ObTempBlockStore; +class RowMeta; + +class ObCompactBlockWriter final : public ObBlockIWriter +{ + static const int HEAD_SIZE = 5; + static const int BASE_OFFSET_SIZE = 2; + static const int EXTENDED_OFFSET_SIZE = 4; + + struct CurRowInfo final + { + public: + CurRowInfo() : buf_(nullptr), var_column_cnt_(0), cur_var_offset_pos_(0), bitmap_size_(0), + bit_vec_(nullptr), data_offset_(0), var_offset_(0) {} + + ~CurRowInfo() { reset(); } + int init(const RowMeta *row_meta, const uint8_t offset_width, char *buf); + void reset(); + TO_STRING_KV(K_(cur_var_offset_pos), K_(var_column_cnt), K_(bitmap_size), + K_(data_offset), K_(var_offset)); + public: + char *buf_; + int64_t var_column_cnt_; + int64_t cur_var_offset_pos_; // the i-th in the var_array + int64_t bitmap_size_; + + // Use BitVector to set the result of filter here, because the memory of ObBitMap is not continuous + // null_bitmap + sql::ObBitVector *bit_vec_ = nullptr; + int64_t data_offset_; // the start of fixed data buffer. + int64_t var_offset_; + }; + +public: + ObCompactBlockWriter(ObTempBlockStore *store = nullptr) : ObBlockIWriter(store), row_meta_(nullptr), cur_row_offset_width_(0), + cur_row_size_(0), row_info_(), last_stored_row_(nullptr), last_sr_size_(0) {}; + + ObCompactBlockWriter(ObTempBlockStore *store, const RowMeta *row_meta) : ObBlockIWriter(store), row_meta_(row_meta), + cur_row_offset_width_(0), cur_row_size_(0), row_info_(), last_stored_row_(nullptr), + last_sr_size_(0) {}; + virtual ~ObCompactBlockWriter() { reset(); }; + + void reset() + { + cur_row_offset_width_ = 0; + cur_row_size_ = 0; + row_meta_ = nullptr; + row_info_.reset(); + if (OB_NOT_NULL(last_stored_row_)) { + store_->free(last_stored_row_, last_sr_size_); + } + last_sr_size_ = 0; + } + virtual int add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row = nullptr) override; + // if full, construct the block and use the block's block_mgr. return block + virtual int add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr = nullptr) override; + //virtual int try_add_row(const common::ObIArray &exprs, ObEvalCtx &ctx); + virtual int add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) override; + + virtual int add_batch(const common::ObDatum **datums, const common::ObIArray &exprs, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows, BatchCtx *batch_ctx) override { return OB_NOT_IMPLEMENT; } + + void set_meta(const RowMeta *row_meta) override { row_meta_ = row_meta; }; + const RowMeta *get_meta() { return row_meta_; } + int close() override; + virtual int prepare_blk_for_write(ObTempBlockStore::Block *blk) final override { return OB_SUCCESS; } + int get_last_stored_row(const ObChunkDatumStore::StoredRow *&sr); + +protected: + /* + * before add_row we should call ensure_write + * 1. if the write buffer could hold the next row: + * 2. if the write buffer couldn't hold next row: + * 2.1 if the write buffer isn't empty, construct_block use the write buffer. and reset writer buffer. + * 2.2 if the write buffer is emptry (large row), construct block use the large row. + */ + int ensure_write(const common::ObIArray &exprs, ObEvalCtx &ctx); + int ensure_write(const ObChunkDatumStore::StoredRow &stored_row); + int ensure_write(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size); + int ensure_write(const int64_t size); + + // get the stored size in writer buffer for a row. + int get_row_stored_size(const common::ObIArray &exprs, ObEvalCtx &ctx, uint64_t &size); + int get_row_stored_size(const ObChunkDatumStore::StoredRow &sr, uint64_t &size); + int get_row_stored_size(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, uint64_t &size); + +private: + template + int inner_process_datum(const ObDatum &src_datum, const int64_t cur_pos, const RowMeta &row_meta, + CurRowInfo &row_info); + template + int inner_build_from_stored_row(const ObChunkDatumStore::StoredRow &sr); + inline uint8_t get_offset_width(const int64_t data_size) { + return (data_size < (1 << 16)) ? BASE_OFFSET_SIZE : EXTENDED_OFFSET_SIZE; + } + template + int inner_add_row(const common::ObIArray &exprs, ObEvalCtx &ctx); + template + int inner_add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row); + + inline int ensure_init() + { + int ret = OB_SUCCESS; + if (OB_ISNULL(row_meta_)) { + ret = OB_ERR_UNEXPECTED; + SQL_ENG_LOG(WARN, "the row meta is null", K(ret)); + } else if (!inited_) { + inited_ = true; + } + return ret; + } + + int inner_get_stored_row_size(const char *compact_row, int64_t &size); + template + int convert_to_stored_row(const char *compact_row, ObChunkDatumStore::StoredRow *sr); + +private: + const RowMeta *row_meta_; + uint8_t cur_row_offset_width_; + int32_t cur_row_size_; + CurRowInfo row_info_; + ObChunkDatumStore::StoredRow *last_stored_row_; + int64_t last_sr_size_; +}; + +} // end namespace sql +} // end namespace oceanbase +#endif // OCEANBASE_BASIC_OB_COMPACT_BLOCK_WRITER_H_ diff --git a/src/sql/engine/basic/chunk_store/ob_compact_store.cpp b/src/sql/engine/basic/chunk_store/ob_compact_store.cpp new file mode 100644 index 000000000..10be339d3 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_compact_store.cpp @@ -0,0 +1,538 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/engine/basic/chunk_store/ob_compact_store.h" +#include "sql/engine/basic/chunk_store/ob_block_ireader.h" +#include "sql/engine/basic/chunk_store/ob_default_block_writer.h" +#include "sql/engine/basic/chunk_store/ob_default_block_reader.h" +#include "sql/engine/basic/chunk_store/ob_compact_block_reader.h" +#include "sql/engine/basic/chunk_store/ob_compact_block_writer.h" +#include "storage/ddl/ob_direct_load_struct.h" + +namespace oceanbase +{ +namespace sql +{ + +int ObCompactStore::prepare_blk_for_write(Block *blk) +{ + int ret = OB_SUCCESS; + if (!inited_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the store is not inited", K(ret)); + } else if (OB_FAIL(writer_->prepare_blk_for_write(blk))) { + LOG_WARN("fail to prepare blk for write", K(ret)); + } + + return ret; +} + +int ObCompactStore::prepare_blk_for_read(Block *blk) +{ + int ret = OB_SUCCESS; + if (!inited_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the store is not inited", K(ret)); + } else if (OB_FAIL(reader_->prepare_blk_for_read(blk))) { + LOG_WARN("fail to prepare blk for write", K(ret)); + } + + return ret; +} + +void ObCompactStore::rescan() +{ + cur_blk_id_ = 0; + start_iter_ = false; + // shouldn't truncate for ObChunkSliceStore. + if (enable_truncate_) { + last_truncate_offset_ = 0; + } + if (OB_NOT_NULL(reader_)) { + reader_->reuse(); + } + block_reader_.reuse(); +} + +int ObCompactStore::inner_get_next_row(const ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + const ObTempBlockStore::Block* tmp_blk = nullptr; + + if (inited_) { + if (!start_iter_) { + cur_blk_id_ = 0; + if (cur_blk_id_ >= get_block_id_cnt()) { + ret = OB_ITER_END; + } else if (OB_FAIL(block_reader_.get_block(cur_blk_id_, tmp_blk))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get block", K(ret)); + } + } else { + start_iter_ = true; + reader_->reuse(); + reader_->set_block(tmp_blk); + int64_t file_offset = block_reader_.get_cur_file_offset(); + if (enable_truncate_ && (file_offset > last_truncate_offset_ + TRUNCATE_SIZE)) { + if (OB_FAIL(truncate_file(file_offset))) { + LOG_WARN("fail to truncate file", K(ret)); + } else { + last_truncate_offset_ = file_offset; + } + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(reader_->get_row(sr))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get row", K(ret)); + } else if (cur_blk_id_ >= get_block_id_cnt()) { + ret = OB_ITER_END; + } else if (OB_FAIL(block_reader_.get_block(cur_blk_id_, tmp_blk))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get block", K(ret)); + } + } else { + reader_->reuse(); + reader_->set_block(tmp_blk); + int64_t file_offset = block_reader_.get_cur_file_offset(); + if (enable_truncate_ && (file_offset > last_truncate_offset_ + TRUNCATE_SIZE)) { + if (OB_FAIL(truncate_file(file_offset))) { + LOG_WARN("fail to truncate file", K(ret)); + } else { + last_truncate_offset_ = file_offset; + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(reader_->get_row(sr))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get row", K(ret)); + } + } else { + cur_blk_id_++; + } + } + } + } else { + cur_blk_id_++; + } + if (OB_SUCC(ret)) { + LOG_TRACE("block reader read row", KPC(sr)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("store is not init", K(ret)); + } + return ret; +} + +int ObCompactStore::inner_add_batch(const common::ObDatum **datums, const common::ObIArray &exprs, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows) +{ + int ret = OB_SUCCESS; + if (inited_) { + if (OB_FAIL(writer_->add_batch(datums, exprs, selector, size, stored_rows, batch_ctx_))) { + LOG_WARN("fail to add row", K(ret)); + } else { + row_cnt_ += size; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should init writer first", K(ret)); + } + + return ret; +} + +int ObCompactStore::add_batch_fallback(const common::ObIArray &exprs, ObEvalCtx &ctx, + const ObBitVector &skip, const int64_t batch_size, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows) +{ + int ret = OB_SUCCESS; + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_size(batch_size); + for (int64_t i = 0; i < size && OB_SUCC(ret); i++) { + int64_t idx = selector[i]; + batch_info_guard.set_batch_idx(idx); + ObChunkDatumStore::StoredRow *srow = NULL; + if (OB_FAIL(add_row(exprs, ctx, &srow))) { + LOG_WARN("add row failed", K(ret), K(i), K(idx)); + } else { + if (NULL != stored_rows) { + stored_rows[i] = srow; + } + } + } + return ret; +} + +int ObCompactStore::add_batch(const common::ObIArray &exprs, ObEvalCtx &ctx, + const ObBitVector &skip, const int64_t batch_size, + int64_t &stored_rows_count, + ObChunkDatumStore::StoredRow **stored_rows, + const int64_t start_pos /* 0 */) +{ + int ret = OB_SUCCESS; + CK(is_inited()); + OZ(init_batch_ctx(exprs.count(), ctx.max_batch_size_)); + int64_t size = 0; + if (OB_SUCC(ret)) { + for (int64_t i = start_pos; i < batch_size; i++) { + if (skip.at(i)) { + continue; + } else { + batch_ctx_->selector_[size++] = i; + } + } + } + if (OB_SUCC(ret)) { + stored_rows_count = size; + if (OB_FAIL(add_batch(exprs, ctx, skip, batch_size, + batch_ctx_->selector_, size, stored_rows))) { + LOG_WARN("add batch failed"); + } + } + return ret; +} + +int ObCompactStore::add_batch(const common::ObIArray &exprs, ObEvalCtx &ctx, + const ObBitVector &skip, const int64_t batch_size, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows) +{ + int ret = OB_SUCCESS; + CK(is_inited()); + OZ(init_batch_ctx(exprs.count(), ctx.max_batch_size_)); + bool all_batch_res = (compact_level_ == share::SORT_DEFAULT_LEVEL || compact_level_ == share::SORT_COMPRESSION_LEVEL); + for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); i++) { + ObExpr *e = exprs.at(i); + if (OB_ISNULL(e)) { + batch_ctx_->datums_[i] = nullptr; + } else if (OB_FAIL(e->eval_batch(ctx, skip, batch_size))) { + LOG_WARN("evaluate batch failed", K(ret)); + } else { + if (!e->is_batch_result()) { + all_batch_res = false; + break; + } else { + batch_ctx_->datums_[i] = e->locate_batch_datums(ctx); + } + } + } + if (OB_SUCC(ret) && !all_batch_res) { + if (OB_FAIL(add_batch_fallback(exprs, ctx, skip, batch_size, selector, size, stored_rows))) { + LOG_WARN("add batch fallback failed", K(batch_size), K(size)); + } + } + + if (OB_SUCC(ret) && all_batch_res) { + if (OB_FAIL(inner_add_batch(batch_ctx_->datums_, exprs, selector, size, + NULL == stored_rows ? batch_ctx_->stored_rows_ : stored_rows))) { + LOG_WARN("inner add batch failed", K(ret), K(batch_size), K(size)); + } + } + + return ret; +} +int ObCompactStore::has_next(bool &has_next) +{ + int ret = OB_SUCCESS; + has_next = false; + if (inited_) { + if (cur_blk_id_ < block_id_cnt_) { + has_next = true; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should init writer first", K(ret)); + } + + return ret; +} + +int ObCompactStore::add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + if (inited_) { + if (OB_FAIL(writer_->add_row(exprs, ctx, stored_row))) { + LOG_WARN("fail to add row", K(ret)); + } else { + row_cnt_++; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should init writer first", K(ret)); + } + return ret; +} + +int ObCompactStore::add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + if (inited_) { + if (OB_FAIL(writer_->add_row(storage_datums, cnt, extra_size, stored_row))) { + LOG_WARN("fail to add row", K(ret)); + } else { + row_cnt_++; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should init writer first", K(ret)); + } + return ret; +} + +int ObCompactStore::add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr) +{ + int ret = OB_SUCCESS; + if (inited_) { + if (OB_FAIL(writer_->add_row(src_sr, dst_sr))) { + LOG_WARN("fail to add row", K(ret)); + } else { + row_cnt_++; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should init writer first", K(ret)); + } + + return ret; +} + +int ObCompactStore::get_next_row(const ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + if (inited_) { + if (OB_FAIL(inner_get_next_row(sr))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get row", K(ret)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should init reader first", K(ret)); + } + return ret; +} + +int ObCompactStore::init_batch_ctx(const int64_t col_cnt, const int64_t max_batch_size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(NULL == batch_ctx_)) { + const int64_t size = sizeof(*batch_ctx_) + + sizeof(ObDatum *) * col_cnt + + sizeof(*batch_ctx_->row_size_array_) * max_batch_size + + sizeof(*batch_ctx_->selector_) * max_batch_size + + sizeof(*batch_ctx_->stored_rows_) * max_batch_size; + char *mem = static_cast(alloc(size)); + if (OB_UNLIKELY(max_batch_size <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("max batch size is not positive when init batch ctx", K(ret), K(max_batch_size)); + } else if (NULL == mem) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(size), K(col_cnt), K(max_batch_size)); + } else { + char* begin = mem; + batch_ctx_ = reinterpret_cast(mem); + mem += sizeof(*batch_ctx_); +#define SET_BATCH_CTX_FIELD(X, N) \ + batch_ctx_->X = reinterpret_castX)>(mem); \ + mem += sizeof(*batch_ctx_->X) * N; + + SET_BATCH_CTX_FIELD(datums_, col_cnt); + SET_BATCH_CTX_FIELD(stored_rows_, max_batch_size); + SET_BATCH_CTX_FIELD(row_size_array_, max_batch_size); + SET_BATCH_CTX_FIELD(selector_, max_batch_size); +#undef SET_BATCH_CTX_FIELD + + if (mem - begin != size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("size mismatch", K(ret), K(mem - begin), K(size), K(col_cnt), K(max_batch_size)); + } + } + } + return ret; +} + +int ObCompactStore::init(const int64_t mem_limit, + const uint64_t tenant_id, + const int64_t mem_ctx_id, + const char *label, + const bool enable_dump, + const uint32_t row_extra_size, + const bool enable_trunc, + const share::SortCompactLevel compact_level, + const ObCompressorType compress_type, + const ExprFixedArray *exprs) +{ + int ret = OB_SUCCESS; + compact_level_ = compact_level; + enable_truncate_ = enable_trunc; + inited_ = true; + if (OB_ISNULL(exprs) || (compact_level != share::SORT_COMPACT_LEVEL && compact_level != share::SORT_COMPRESSION_COMPACT_LEVEL)) { + } else { + OZ(row_meta_.init(*exprs, row_extra_size)); + } + OZ(ObTempBlockStore::init(mem_limit, enable_dump, tenant_id, mem_ctx_id, label, compress_type)); + OZ(block_reader_.init(this)); + OZ(init_writer_reader()); + LOG_INFO("success to init compact store", K(enable_dump), K(enable_trunc), K(compact_level), K(compress_type), + K(exprs), K(ret)); + return ret; +} + +int ObCompactStore::init(const int64_t mem_limit, + const ObIArray &col_array, + const uint64_t tenant_id, + const int64_t mem_ctx_id, + const char *label, + const bool enable_dump, + const uint32_t row_extra_size, + const bool enable_trunc, + const share::SortCompactLevel compact_level, + const ObCompressorType compress_type) +{ + int ret = OB_SUCCESS; + compact_level_ = compact_level; + enable_truncate_ = enable_trunc; + inited_ = true; + if (compact_level != share::SORT_COMPACT_LEVEL && compact_level != share::SORT_COMPRESSION_COMPACT_LEVEL) { + } else { + OZ(row_meta_.init(col_array, row_extra_size)); + } + OZ(ObTempBlockStore::init(mem_limit, enable_dump, tenant_id, mem_ctx_id, label, compress_type)); + OZ(block_reader_.init(this)); + OZ(init_writer_reader()); + return ret; +} + +void ObCompactStore::reset() +{ + if (OB_NOT_NULL(reader_)) { + reader_->reset(); + allocator_->free(reader_); + } + if (OB_NOT_NULL(writer_)) { + writer_->reset(); + allocator_->free(writer_); + } + compact_level_ = share::SORT_DEFAULT_LEVEL; + writer_ = nullptr; + reader_ = nullptr; + batch_ctx_ = nullptr; + row_cnt_ = 0; + start_iter_ = false; + block_reader_.reset(); + last_truncate_offset_ = 0; + enable_truncate_ = false; + cur_blk_id_ = 0; +} + +int ObCompactStore::finish_write() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(writer_) || !inited_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("the store in not proper status", K(ret)); + } else if (OB_FAIL(writer_->close())) { + LOG_WARN("fail to flush buffer", K(ret)); + } + return ret; +} + + +int ObCompactStore::init_writer_reader() +{ + int ret = OB_SUCCESS; + void *writer_buf = nullptr; + void *reader_buf = nullptr; + switch (compact_level_) { + case share::SORT_COMPRESSION_LEVEL: + case share::SORT_DEFAULT_LEVEL: { + writer_buf = allocator_->alloc(sizeof(ObDefaultBlockWriter)); + reader_buf = allocator_->alloc(sizeof(ObDefaultBlockReader)); + if (OB_ISNULL(writer_buf) || OB_ISNULL(reader_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for writer", K(ret), KP(writer_buf), KP(reader_buf)); + } else { + writer_ = new (writer_buf)ObDefaultBlockWriter(this); + reader_ = new (reader_buf)ObDefaultBlockReader(this); + } + break; + } + case share::SORT_COMPRESSION_COMPACT_LEVEL: + case share::SORT_COMPACT_LEVEL: { + writer_buf = allocator_->alloc(sizeof(ObCompactBlockWriter)); + reader_buf = allocator_->alloc(sizeof(ObCompactBlockReader)); + if (OB_ISNULL(writer_buf) || OB_ISNULL(reader_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for writer", K(ret)); + } else { + writer_ = new (writer_buf)ObCompactBlockWriter(this, &row_meta_); + reader_ = new (reader_buf)ObCompactBlockReader(this, &row_meta_); + } + break; + } + case share::SORT_COMPRESSION_ENCODE_LEVEL: + case share::SORT_ENCODE_LEVEL: { + // TODO + ret = OB_NOT_SUPPORTED; + LOG_WARN("encoding is not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "encoding in chunk store"); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to init reader/writer", K(ret), K(compact_level_)); + } + } + + return ret; +} + +int ObCompactStore::get_last_stored_row(const ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + if (!inited_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("store is not inited", K(ret)); + } else if (OB_FAIL(writer_->get_last_stored_row(sr))) { + LOG_WARN("fail to get last stored row", K(ret)); + } + return ret; +} + +OB_DEF_SERIALIZE(ObCompactStore) +{ + int ret = OB_ERR_UNEXPECTED; + return ret; +} + + +OB_DEF_DESERIALIZE(ObCompactStore) +{ + int ret = OB_ERR_UNEXPECTED; + return ret; +} + +OB_DEF_SERIALIZE_SIZE(ObCompactStore) +{ + int64_t len = 0; + return len; +} + +} +} diff --git a/src/sql/engine/basic/chunk_store/ob_compact_store.h b/src/sql/engine/basic/chunk_store/ob_compact_store.h new file mode 100644 index 000000000..2433593cc --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_compact_store.h @@ -0,0 +1,134 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifndef OCEANBASE_BASIC_OB_COMPACT_STORE_H_ +#define OCEANBASE_BASIC_OB_COMPACT_STORE_H_ +#include "share/ob_define.h" +#include "sql/engine/basic/ob_temp_block_store.h" +#include "lib/alloc/alloc_struct.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "sql/engine/basic/chunk_store/ob_block_ireader.h" +#include "sql/engine/basic/chunk_store/ob_block_iwriter.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "src/share/ob_ddl_common.h" +namespace oceanbase +{ +namespace storage { + class ObColumnSchemaItem; +} +namespace sql +{ + +class ObCompactStore final : public ObTempBlockStore +{ + OB_UNIS_VERSION_V(1); + static const int64_t TRUNCATE_SIZE = 2L * 1024 * 1024; +public: + explicit ObCompactStore(common::ObIAllocator *alloc = NULL) : ObTempBlockStore(alloc), + compact_level_(share::SORT_DEFAULT_LEVEL), + writer_(nullptr), reader_(nullptr), batch_ctx_(nullptr), + row_meta_(*allocator_), row_cnt_(0), block_reader_(), start_iter_(false), + cur_blk_id_(0), last_truncate_offset_(0), enable_truncate_(true) + { + }; + virtual ~ObCompactStore() {reset();}; + void reset(); + void rescan(); + int init(const int64_t mem_limit, + const uint64_t tenant_id = common::OB_SERVER_TENANT_ID, + const int64_t mem_ctx_id = common::ObCtxIds::DEFAULT_CTX_ID, + const char *label = common::ObModIds::OB_SQL_ROW_STORE, + const bool enable_dump = true, + const uint32_t row_extra_size = 0, + const bool enable_trunc = true, + const share::SortCompactLevel compact_level = share::SORT_DEFAULT_LEVEL, + const ObCompressorType compress_type = NONE_COMPRESSOR, + const ExprFixedArray *exprs = nullptr); + + int init(const int64_t mem_limit, + const ObIArray &col_array, + const uint64_t tenant_id = common::OB_SERVER_TENANT_ID, + const int64_t mem_ctx_id = common::ObCtxIds::DEFAULT_CTX_ID, + const char *label = common::ObModIds::OB_SQL_ROW_STORE, + const bool enable_dump = true, + const uint32_t row_extra_size = 0, + const bool enable_trunc = true, + const share::SortCompactLevel compact_level = share::SORT_DEFAULT_LEVEL, + const ObCompressorType compress_type = NONE_COMPRESSOR); + int add_batch(const common::ObIArray &exprs, ObEvalCtx &ctx, + const ObBitVector &skip, const int64_t batch_size, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows = nullptr); + // called in sort_op + int add_batch(const common::ObIArray &exprs, ObEvalCtx &ctx, + const ObBitVector &skip, const int64_t batch_size, + int64_t &stored_rows_count, + ObChunkDatumStore::StoredRow **stored_rows = nullptr, + const int64_t start_pos = 0); + int add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row = nullptr); + int add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr = nullptr); + // for chunkslicestore. + int add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row = nullptr); + int get_next_row(const ObChunkDatumStore::StoredRow *&sr); + + int finish_write(); + // for ChunkSliceStore, get the last row to split range. + int get_last_stored_row(const ObChunkDatumStore::StoredRow *&sr); + ObBlockIReader* get_reader() { return reader_; } + ObTempBlockStore::BlockReader* get_block_reader() { return &block_reader_; } + ObBlockIWriter* get_writer() { return writer_; } + int64_t get_row_cnt() const { return row_cnt_; } + void set_enable_truncate(bool enable_trunc) { enable_truncate_ = enable_trunc; } + bool enable_truncate() { return enable_truncate_; } + int has_next(bool &has_next); + RowMeta *get_row_meta() { return &row_meta_; } + void set_meta(RowMeta *row_meta) { writer_->set_meta(row_meta); reader_->set_meta(row_meta); } + share::SortCompactLevel get_compact_level() { return compact_level_; } + void set_blk_holder(ObTempBlockStore::BlockHolder *blk_holder) { block_reader_.set_blk_holder(blk_holder); } +protected: + int prepare_blk_for_write(Block *) final override; + int prepare_blk_for_read(Block *) final override; + +private: + int init_writer_reader(); + int init_batch_ctx(const int64_t col_cnt, const int64_t max_batch_size); + + int inner_add_batch(const common::ObDatum **datums, const common::ObIArray &exprs, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows); + + int add_batch_fallback(const common::ObIArray &exprs, ObEvalCtx &ctx, + const ObBitVector &skip, const int64_t batch_size, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows); + int inner_get_next_row(const ObChunkDatumStore::StoredRow *&sr); + +private: + share::SortCompactLevel compact_level_; + ObBlockIWriter *writer_; + ObBlockIReader *reader_; + BatchCtx *batch_ctx_; + + RowMeta row_meta_; + int64_t row_cnt_; + ObTempBlockStore::BlockReader block_reader_; + bool start_iter_; + int64_t cur_blk_id_; + int64_t last_truncate_offset_; + bool enable_truncate_; +} +; + +} // end namespace sql +} // end namespace oceanbase +#endif // OCEANBASE_BASIC_OB_COMPACT_STORE_H_ \ No newline at end of file diff --git a/src/sql/engine/basic/chunk_store/ob_default_block_reader.cpp b/src/sql/engine/basic/chunk_store/ob_default_block_reader.cpp new file mode 100644 index 000000000..188ce4f0a --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_default_block_reader.cpp @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/basic/chunk_store/ob_default_block_reader.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/container/ob_bitmap.h" +#include "sql/engine/ob_bit_vector.h" + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + +int ObDefaultBlockReader::get_row(const ObChunkDatumStore::StoredRow *&sr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(cur_blk_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur block is null", K(ret)); + } else if (!blk_has_next_row()) { + ret = OB_ITER_END; + } else if (cur_pos_in_blk_ > cur_blk_->raw_size_ - sizeof(ObTempBlockStore::Block)) { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("get unexpected index", K(ret)); + } else { + const ObChunkDatumStore::StoredRow *row = reinterpret_cast(&cur_blk_->payload_[cur_pos_in_blk_]); + sr = row; + cur_pos_in_blk_ += row->row_size_; + cur_row_in_blk_ += 1; + } + + return ret; +} + +int ObDefaultBlockReader::prepare_blk_for_read(ObTempBlockStore::Block *blk) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(blk)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to block is null", K(ret)); + } else { + int64_t cur_row = 0; + int64_t cur_pos = 0; + const int64_t buf_size = blk->raw_size_ - sizeof(ObTempBlockStore::Block); + while (cur_row < blk->cnt_ && cur_pos < buf_size) { + ObChunkDatumStore::StoredRow *sr = reinterpret_cast(blk->payload_ + cur_pos); + sr->swizzling(); + cur_pos += sr->row_size_; + cur_row++; + } + } + + return ret; +} + +} +} \ No newline at end of file diff --git a/src/sql/engine/basic/chunk_store/ob_default_block_reader.h b/src/sql/engine/basic/chunk_store/ob_default_block_reader.h new file mode 100644 index 000000000..d5c748286 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_default_block_reader.h @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_DEFAULT_BLOCK_READER_H_ +#define OCEANBASE_BASIC_OB_DEFAULT_BLOCK_READER_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/chunk_store/ob_block_ireader.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObCompactStore; +class StoredRow; +class ObDefaultBlockReader final : public ObBlockIReader +{ +public: + ObDefaultBlockReader(ObTempBlockStore *store) : ObBlockIReader(store), cur_pos_in_blk_(0), cur_row_in_blk_(0) {}; + virtual ~ObDefaultBlockReader() { reset(); }; + void reuse() + { + cur_pos_in_blk_ = 0; + cur_row_in_blk_ = 0; + cur_blk_ = nullptr; + } + void reset() + { + cur_pos_in_blk_ = 0; + cur_row_in_blk_ = 0; + cur_blk_ = nullptr; + } + virtual int get_row(const ObChunkDatumStore::StoredRow *&sr) override; + inline bool blk_has_next_row() { return cur_blk_ != NULL && cur_blk_->cnt_ > cur_row_in_blk_; } + void set_meta(const RowMeta *row_meta) override {}; + int prepare_blk_for_read(ObTempBlockStore::Block *blk) final override; + +private: + int64_t cur_pos_in_blk_; + int64_t cur_row_in_blk_; +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_DEFAULT_BLOCK_READER_H_ diff --git a/src/sql/engine/basic/chunk_store/ob_default_block_writer.cpp b/src/sql/engine/basic/chunk_store/ob_default_block_writer.cpp new file mode 100644 index 000000000..5f6e49e16 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_default_block_writer.cpp @@ -0,0 +1,416 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/basic/chunk_store/ob_default_block_writer.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/container/ob_bitmap.h" +#include "sql/engine/ob_bit_vector.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + +int ObDefaultBlockWriter::add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(ensure_init())) { + LOG_WARN("fail to ensure init", K(ret)); + } else if (OB_FAIL(ensure_write(exprs, ctx))) { + LOG_WARN("fail to ensure write", K(ret)); + } else { + if (OB_FAIL(inner_add_row(exprs, ctx, stored_row))) { + LOG_WARN("fail to add row", K(ret)); + } + } + + return ret; +} + +int ObDefaultBlockWriter::add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ensure_init())) { + LOG_WARN("fail to ensure init", K(ret)); + } else if (OB_FAIL(ensure_write(src_sr))) { + LOG_WARN("fail to ensure write", K(ret)); + } else { + ObChunkDatumStore::StoredRow *sr = new (get_cur_buf())ObChunkDatumStore::StoredRow; + sr->assign(&src_sr); + if (OB_FAIL(advance(sr->row_size_))) { + LOG_WARN("fill buffer head failed", K(ret)); + } else { + if (nullptr != dst_sr) { + *dst_sr = sr; + } + } + } + + return ret; +} + +int ObDefaultBlockWriter::add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ensure_init())) { + LOG_WARN("fail to ensure init", K(ret)); + } else if (OB_FAIL(ensure_write(storage_datums, cnt, extra_size))) { + LOG_WARN("fail to ensure write", K(ret)); + } else if (OB_FAIL(inner_add_row(storage_datums, cnt, extra_size, stored_row))) { + LOG_WARN("add row to block failed", K(ret), K(storage_datums), K(cnt), K(extra_size)); + } + return ret; +} + +template +static void assign_datums(const ObDatum **datums, const uint16_t selector[], const int64_t start_pos, + const int64_t end_pos, ObChunkDatumStore::StoredRow **stored_rows, int64_t col_idx) +{ + const ObDatum *cur_datums = datums[col_idx]; + for (int64_t i = start_pos; i < end_pos; i++) { + ObChunkDatumStore::StoredRow *srow = stored_rows[i]; + const ObDatum &src = cur_datums[selector[i]]; + ObDatum &dst = srow->cells()[col_idx]; + dst.pack_ = src.pack_; + dst.ptr_ = reinterpret_cast(srow) + srow->row_size_; + if (!src.is_null()) { + T::assign_datum_value((void *)dst.ptr_, src.ptr_, src.len_); + } + srow->row_size_ += src.len_; + } +} + +int ObDefaultBlockWriter::add_batch(const common::ObDatum **datums, const common::ObIArray &exprs, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows, BatchCtx *batch_ctx) +{ + int ret = OB_SUCCESS; + uint32_t *size_array = batch_ctx->row_size_array_; + int64_t col_cnt = exprs.count(); + const int64_t base_row_size = sizeof(ObChunkDatumStore::StoredRow) + + sizeof(ObDatum) * col_cnt; + for (int64_t i = 0; i < size; i++) { + size_array[i] = base_row_size; + } + for (int64_t col_idx = 0; col_idx < col_cnt; col_idx++) { + if (OB_ISNULL(datums[col_idx])) { + continue; + } + const ObDatum *cur_datums = datums[col_idx]; + for (int64_t i = 0; i < size; i++) { + size_array[i] += cur_datums[selector[i]].len_; + } + } + + // allocate block and assign stored rows + int64_t idx = 0; + OZ(ensure_init()); + while (idx < size && OB_SUCC(ret)) { + if (OB_FAIL(ensure_write(size_array[idx]))) { + LOG_WARN("ensure write block failed", K(ret), K(size_array[idx]), K(col_cnt), K(size)); + for (int64_t col_idx = 0; col_idx < col_cnt; col_idx++) { + if (OB_ISNULL(datums[col_idx])) { + continue; + } + const ObDatum *cur_datums = datums[col_idx]; + } + } else { + int64_t rows = 0; + for (int64_t i = idx; i < size; i++) { + const int64_t remain = get_remain(); + if (size_array[i] <= remain) { + char *buf = get_cur_buf(); + if (OB_ISNULL(buf)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get cur buf", K(ret)); + } + ObChunkDatumStore::StoredRow *srow = reinterpret_cast(buf); + stored_rows[i] = srow; + srow->cnt_ = col_cnt; + srow->row_size_ = base_row_size; + rows += 1; + advance(size_array[i]); + } else { + break; + } + } + if (OB_SUCC(ret)) { + for (int64_t col_idx = 0; col_idx < col_cnt; col_idx++) { + if (OB_ISNULL(exprs.at(col_idx))) { + for (int64_t i = idx; i < idx + rows; i++) { + stored_rows[i]->cells()[col_idx].set_null(); + } + continue; + } + ObObjType meta_type = exprs.at(col_idx)->datum_meta_.type_; + const ObObjDatumMapType datum_map_type = ObDatum::get_obj_datum_map_type(meta_type); + switch (datum_map_type) { + case OBJ_DATUM_NUMBER: + assign_datums(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case OBJ_DATUM_DECIMALINT: + switch (get_decimalint_type(exprs.at(col_idx)->datum_meta_.precision_)) { + case DECIMAL_INT_32: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case DECIMAL_INT_64: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case DECIMAL_INT_128: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case DECIMAL_INT_256: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case DECIMAL_INT_512: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + default: + assign_datums(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + } + break; + case OBJ_DATUM_8BYTE_DATA: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case OBJ_DATUM_4BYTE_DATA: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + case OBJ_DATUM_1BYTE_DATA: + assign_datums>(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + default: + assign_datums(datums, selector, idx, idx + rows, stored_rows, col_idx); + break; + } + } + } + + idx += rows; + } + } + + return ret; +} + +int ObDefaultBlockWriter::inner_add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **dst_sr) +{ + int ret = OB_SUCCESS; + int64_t head_size = sizeof(ObChunkDatumStore::StoredRow); + int64_t datum_size = sizeof(ObDatum) * cnt; + int64_t row_size = head_size + datum_size + extra_size; + ObChunkDatumStore::StoredRow *sr = static_cast((void*)get_cur_buf()); + if (OB_ISNULL(sr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get buffer", K(ret)); + } else { + sr->cnt_ = cnt; + for (int64_t i = 0; i < cnt; ++i) { + const ObDatum *tmp_datum = static_cast(&storage_datums[i]); + MEMCPY(sr->payload_ + i * sizeof(ObDatum), tmp_datum, sizeof(ObDatum)); + } + char* data_start = sr->payload_ + datum_size + extra_size; + int64_t pos = 0; + for (int64_t i = 0; i < cnt; ++i) { + MEMCPY(data_start + pos, storage_datums[i].ptr_, storage_datums[i].len_); + sr->cells()[i].ptr_ = data_start + pos; + pos += storage_datums[i].len_; + row_size += storage_datums[i].len_; + } + sr->row_size_ = row_size; + if (OB_FAIL(advance(row_size))) { + LOG_WARN("fill buffer head failed", K(ret), K(row_size)); + } else if (OB_NOT_NULL(dst_sr)) { + *dst_sr = sr; + } + } + return ret; +} + +// before call this function -- we need to ensure the size if enough. +int ObDefaultBlockWriter::inner_add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + ObChunkDatumStore::StoredRow *sr = static_cast((void*)get_cur_buf()); + if (OB_ISNULL(sr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get buffer", K(ret)); + } else { + int64_t pos = sizeof(*sr) + sizeof(ObDatum) * exprs.count(); + if (pos > get_remain()) { + ret = OB_BUF_NOT_ENOUGH; + LOG_WARN("wirte buffer is not enough", K(ret)); + } else { + sr->cnt_ = exprs.count(); + ObDatum *datums = sr->cells(); + for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); i++) { + ObExpr *expr = exprs.at(i); + ObDatum *in_datum = NULL; + if (OB_UNLIKELY(NULL == expr)) { + // Set datum to NULL for NULL expr + datums[i].set_null(); + } else if (OB_FAIL(expr->eval(ctx, in_datum))) { + LOG_WARN("expression evaluate failed", K(ret)); + } else { + datums[i].deep_copy(*in_datum, get_cur_buf(), get_remain(), pos); + } + } + if (OB_SUCC(ret)) { + sr->row_size_ = static_cast(pos); + if (OB_FAIL(advance(sr->row_size_))) { + LOG_WARN("fail to advance buf", K(ret)); + } else if (OB_NOT_NULL(stored_row)) { + *stored_row = sr; + } + } + } + } + return ret; +} + +int ObDefaultBlockWriter::get_row_stored_size(const common::ObIArray &exprs, ObEvalCtx &ctx, uint64_t &size) +{ + int ret = OB_SUCCESS; + size = 0; + ObExpr *expr = nullptr; + common::ObDatum *datum = nullptr; + size = sizeof(ObDatum) * exprs.count() + sizeof(ObChunkDatumStore::StoredRow); + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { + expr = exprs.at(i); + if (OB_ISNULL(expr)) { + } else if (OB_FAIL(expr->eval(ctx, datum))) { + SQL_ENG_LOG(WARN, "failed to eval expr datum", KPC(expr), K(ret)); + } else { + size += datum->len_; + } + } + return ret; +} + +int ObDefaultBlockWriter::close() +{ + int ret = OB_SUCCESS; + inited_ = false; + return ret; +} + +int ObDefaultBlockWriter::ensure_write(const common::ObIArray &exprs, ObEvalCtx &ctx) +{ + int ret = OB_SUCCESS; + uint64_t row_size; + if (OB_FAIL(get_row_stored_size(exprs, ctx, row_size))) { + LOG_WARN("fail to get row_size", K(exprs), K(ret)); + } else if (OB_FAIL(ensure_write(row_size))) { + LOG_WARN("fail to call inner ensure write", K(ret)); + } + return ret; +} + +int ObDefaultBlockWriter::ensure_write(const ObChunkDatumStore::StoredRow &sr) +{ + int ret = OB_SUCCESS; + uint64_t row_size; + if (OB_FAIL(ensure_write(sr.row_size_))) { + LOG_WARN("fail to call inner ensure write", K(ret)); + } + return ret; +} + +int ObDefaultBlockWriter::block_unswizzling(ObTempBlockStore::Block *blk) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(blk)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to unswizzling block", K(ret)); + } else { + int64_t cur_row = 0; + int64_t cur_pos = 0; + const int64_t buf_size = blk->raw_size_ - sizeof(ObTempBlockStore::Block); + while (cur_row < blk->cnt_ && cur_pos < buf_size) { + ObChunkDatumStore::StoredRow *sr = reinterpret_cast(blk->payload_ + cur_pos); + sr->unswizzling(); + cur_pos += sr->row_size_; + cur_row++; + } + } + + return ret; +} + +int ObDefaultBlockWriter::prepare_blk_for_write(ObTempBlockStore::Block *blk) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(block_unswizzling(blk))) { + LOG_WARN("fail to unswizzling block", K(ret)); + } + return ret; +} + +int ObDefaultBlockWriter::ensure_write(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size) +{ + int ret = OB_SUCCESS; + uint64_t row_size; + if (OB_FAIL(get_row_stored_size(storage_datums, cnt, extra_size, row_size))) { + LOG_WARN("fail to get row_size", K(cnt), K(extra_size), K(row_size), K(ret)); + } else if (OB_FAIL(ensure_write(row_size))) { + LOG_WARN("fail to call inner ensure write", K(ret)); + } + + return ret; +} + +int ObDefaultBlockWriter::get_row_stored_size(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, uint64_t &size) +{ + int ret = OB_SUCCESS; + int64_t head_size = sizeof(ObChunkDatumStore::StoredRow); + int64_t datum_size = sizeof(ObDatum) * cnt; + int64_t data_size = 0; + for (int64_t i = 0; i < cnt; ++i) { + data_size += storage_datums[i].len_; + } + size = head_size + datum_size + extra_size + data_size; + return ret; +} + +int ObDefaultBlockWriter::ensure_write(const int64_t size) +{ + int ret = OB_SUCCESS; + if (is_overflow(size)) { + // need to alloc a new block to write. + int64_t new_blk_size = size < DEFAULT_BUF_SIZE ? DEFAULT_BUF_SIZE : size; + ObTempBlockStore::Block *tmp_blk = nullptr; + if (OB_FAIL(store_->new_block(new_blk_size, tmp_blk, true))) { + LOG_WARN("fail to alloc block", K(ret)); + } else if (OB_ISNULL(tmp_blk)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc block", K(ret)); + } else { + cur_blk_ = tmp_blk; + } + } + + return ret; +} + +} +} \ No newline at end of file diff --git a/src/sql/engine/basic/chunk_store/ob_default_block_writer.h b/src/sql/engine/basic/chunk_store/ob_default_block_writer.h new file mode 100644 index 000000000..2a09f15b4 --- /dev/null +++ b/src/sql/engine/basic/chunk_store/ob_default_block_writer.h @@ -0,0 +1,94 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_DEFAULT_BLOCK_WRITER_H_ +#define OCEANBASE_BASIC_OB_DEFAULT_BLOCK_WRITER_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/list/ob_dlist.h" +#include "src/share/datum/ob_datum.h" +#include "sql/engine/expr/ob_expr.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block.h" +#include "sql/engine/basic/chunk_store/ob_block_iwriter.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/basic/ob_temp_block_store.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObDefaultBlockWriter final : public ObBlockIWriter +{ +public: + ObDefaultBlockWriter(ObTempBlockStore *store) : ObBlockIWriter(store) {}; + ~ObDefaultBlockWriter() { reset(); }; + + void reset() {} + int add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row = nullptr) override; + int add_row(const ObChunkDatumStore::StoredRow &src_sr, ObChunkDatumStore::StoredRow **dst_sr = nullptr) override; + int add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **stored_row) override; + int close() override; + int add_batch(const common::ObDatum **datums, const common::ObIArray &exprs, + const uint16_t selector[], const int64_t size, + ObChunkDatumStore::StoredRow **stored_rows, BatchCtx *batch_ctx) override; + void set_meta(const RowMeta *row_meta) override {}; + int prepare_blk_for_write(ObTempBlockStore::Block *blk) final override; + + inline int get_last_stored_row(const ObChunkDatumStore::StoredRow *&sr) + { + int ret = OB_SUCCESS; + sr = reinterpret_cast(get_last_row()); + return ret; + } +private: + /* + * before add_row we should call ensure_write + * 1. if the cur_blk could hold the next row: + * 2. if the write buffer couldn't hold next row: + * 2.1 if the current row's size <= DEFAULT_BUF_SIZE (64KB), then alloc a block which size if 64KB. + * 2.2 if current row's size > 64KB, then the size of alloced block is current row's size. + */ + int ensure_write(const common::ObIArray &exprs, ObEvalCtx &ctx); + int ensure_write(const ObChunkDatumStore::StoredRow &stored_row); + int ensure_write(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size); + + // before dump the block we need to unswizzling each row; + int block_unswizzling(ObTempBlockStore::Block *blk); + + int ensure_write(const int64_t size); + // get the stored size in writer buffer for a row. + int get_row_stored_size(const common::ObIArray &exprs, ObEvalCtx &ctx, uint64_t &size); + int get_row_stored_size(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, uint64_t &size); + inline int ensure_init() + { + int ret = OB_SUCCESS; + if (!inited_) { + inited_ = true; + } + return ret; + } + int inner_add_row(const common::ObIArray &exprs, ObEvalCtx &ctx, ObChunkDatumStore::StoredRow **stored_row = nullptr); + int inner_add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, ObChunkDatumStore::StoredRow **dst_sr); +}; + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_DEFAULT_BLOCK_WRITER_H_ diff --git a/src/sql/engine/basic/ob_chunk_datum_store.cpp b/src/sql/engine/basic/ob_chunk_datum_store.cpp index 2fee5d823..8f5a48b1d 100644 --- a/src/sql/engine/basic/ob_chunk_datum_store.cpp +++ b/src/sql/engine/basic/ob_chunk_datum_store.cpp @@ -377,6 +377,45 @@ int ObChunkDatumStore::Block::copy_datums(const ObDatum *datums, const int64_t c return ret; } +int ObChunkDatumStore::Block::copy_storage_datums(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, StoredRow **dst_sr) +{ + int ret = OB_SUCCESS; + BlockBuffer *buf = get_buffer(); + int64_t head_size = sizeof(StoredRow); + int64_t datum_size = sizeof(ObDatum) * cnt; + int64_t row_size = head_size + sizeof(ObDatum) * cnt + extra_size; + if (!buf->is_inited()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(buf), K(row_size)); + } else { + StoredRow *sr = new (buf->head())StoredRow; + sr->cnt_ = cnt; + for (int64_t i = 0; i < cnt; ++i) { + const ObDatum *tmp_datum = static_cast(&storage_datums[i]); + MEMCPY(sr->payload_ + i * sizeof(ObDatum), tmp_datum, sizeof(ObDatum)); + } + char* data_start = sr->payload_ + datum_size + extra_size; + int64_t pos = 0; + for (int64_t i = 0; i < cnt; ++i) { + MEMCPY(data_start + pos, storage_datums[i].ptr_, storage_datums[i].len_); + sr->cells()[i].ptr_ = data_start + pos; + pos += storage_datums[i].len_; + row_size += storage_datums[i].len_; + } + sr->row_size_ = row_size; + if (OB_FAIL(buf->advance(row_size))) { + LOG_WARN("fill buffer head failed", K(ret), K(buf), K(row_size)); + } else { + rows_++; + if (nullptr != dst_sr) { + *dst_sr = sr; + } + } + } + return ret; +} + //the memory of shadow stored row is not continuous, //so you cannot directly copy the memory of the entire stored row, //and you should make a deep copy of each datum in turn @@ -1220,6 +1259,35 @@ int ObChunkDatumStore::add_row(const ObDatum *datums, const int64_t cnt, return ret; } +int ObChunkDatumStore::add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, StoredRow **stored_row) +{ + int ret = OB_SUCCESS; + if (!is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + int64_t head_size = sizeof(StoredRow); + int64_t datum_size = sizeof(ObDatum) * cnt; + int64_t data_size = 0; + for (int64_t i = 0; i < cnt; ++i) { + data_size += storage_datums[i].len_; + } + const int64_t row_size = head_size + datum_size + extra_size + data_size; + if (OB_FAIL(ensure_write_blk(row_size))) { + LOG_WARN("ensure write block failed", K(ret)); + } else if (OB_FAIL(cur_blk_->copy_storage_datums(storage_datums, cnt, extra_size, stored_row))) { + LOG_WARN("add row to block failed", K(ret), K(storage_datums), K(cnt), K(extra_size), K(row_size)); + } else { + row_cnt_++; + if (col_count_ < 0) { + col_count_ = cnt; + } + } + } + return ret; +} + int ObChunkDatumStore::add_row(const ShadowStoredRow &sr, StoredRow **stored_row) { int ret = OB_SUCCESS; @@ -1321,40 +1389,6 @@ int ObChunkDatumStore::add_batch(const common::ObIArray &exprs, ObEval return ret; } -template -struct AssignFixedLenDatumValue -{ - static void assign_datum_value(void *dst, const char *src, uint32_t len) - { - UNUSED(len); - MEMCPY(dst, src, LEN); - } -}; - -struct AssignNumberDatumValue -{ - static void assign_datum_value(void *dst, const char *src, uint32_t len) - { - if (4 == len) { - MEMCPY(dst, src, 4); - } else if (8 == len) { - MEMCPY(dst, src, 8); - } else if (12 == len){ - MEMCPY(dst, src, 12); - } else { - MEMCPY(dst, src, len); - } - } -}; - -struct AssignDefaultDatumValue -{ - static void assign_datum_value(void *dst, const char *src, uint32_t len) - { - MEMCPY(dst, src, len); - } -}; - template static void assign_datums(const ObDatum **datums, const uint16_t selector[], const int64_t size, ObChunkDatumStore::StoredRow **stored_rows, int64_t col_idx) diff --git a/src/sql/engine/basic/ob_chunk_datum_store.h b/src/sql/engine/basic/ob_chunk_datum_store.h index 5dfa4949a..f3347f414 100644 --- a/src/sql/engine/basic/ob_chunk_datum_store.h +++ b/src/sql/engine/basic/ob_chunk_datum_store.h @@ -32,6 +32,41 @@ namespace sql { class ObIOEventObserver; + +template +struct AssignFixedLenDatumValue +{ + static void assign_datum_value(void *dst, const char *src, uint32_t len) + { + UNUSED(len); + MEMCPY(dst, src, LEN); + } +}; + +struct AssignNumberDatumValue +{ + static void assign_datum_value(void *dst, const char *src, uint32_t len) + { + if (4 == len) { + MEMCPY(dst, src, 4); + } else if (8 == len) { + MEMCPY(dst, src, 8); + } else if (12 == len){ + MEMCPY(dst, src, 12); + } else { + MEMCPY(dst, src, len); + } + } +}; + +struct AssignDefaultDatumValue +{ + static void assign_datum_value(void *dst, const char *src, uint32_t len) + { + MEMCPY(dst, src, len); + } +}; + // Random access row store, support disk store. // All row must have same cell count and projector. class ObChunkDatumStore @@ -447,6 +482,10 @@ public: const int64_t cnt, const int64_t extra_size, StoredRow **dst_sr); + int copy_storage_datums(const blocksstable::ObStorageDatum *storage_datums, + const int64_t cnt, + const int64_t extra_size, + StoredRow **dst_sr); //the memory of shadow stored row is not continuous, //so you cannot directly copy the memory of the entire stored row, //and you should make a deep copy of each datum in turn @@ -946,6 +985,8 @@ public: int add_row(const StoredRow &sr, StoredRow **stored_row = nullptr); int add_row(const ObDatum *datums, const int64_t cnt, const int64_t extra_size, StoredRow **stored_row); + int add_row(const blocksstable::ObStorageDatum *storage_datums, const int64_t cnt, + const int64_t extra_size, StoredRow **stored_row); int add_row(const StoredRow &sr, ObEvalCtx *ctx, StoredRow **stored_row = nullptr); int add_row(const ShadowStoredRow &sr, StoredRow **stored_row = nullptr); diff --git a/src/sql/engine/basic/ob_temp_block_store.cpp b/src/sql/engine/basic/ob_temp_block_store.cpp new file mode 100644 index 000000000..731931e83 --- /dev/null +++ b/src/sql/engine/basic/ob_temp_block_store.cpp @@ -0,0 +1,1554 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "ob_temp_block_store.h" +#include "lib/container/ob_se_array_iterator.h" +#include "storage/blocksstable/ob_tmp_file.h" +#include "lib/utility/ob_tracepoint.h" +#include "share/config/ob_server_config.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/ob_io_event_observer.h" + + +namespace oceanbase +{ +using namespace common; + +namespace sql +{ + +const int64_t ObTempBlockStore::IndexBlock::INDEX_BLOCK_SIZE; +const int64_t ObTempBlockStore::BIG_BLOCK_SIZE; + +int ObTempBlockStore::ShrinkBuffer::init(char *buf, const int64_t buf_size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(NULL == buf) || OB_UNLIKELY(buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else { + data_ = buf; + head_ = 0; + tail_ = buf_size; + cap_ = buf_size; + } + return ret; +} + +ObTempBlockStore::ObTempBlockStore(common::ObIAllocator *alloc /* = NULL */) + : inited_(false), allocator_(NULL == alloc ? &inner_allocator_ : alloc), blk_(NULL), + block_id_cnt_(0), saved_block_id_cnt_(0), dumped_block_id_cnt_(0), enable_dump_(true), + tenant_id_(0), label_(), ctx_id_(0), mem_limit_(0), mem_hold_(0), mem_used_(0), + fd_(-1), dir_id_(-1), file_size_(0), block_cnt_(0), index_block_cnt_(0), block_cnt_on_disk_(0), + alloced_mem_size_(0), max_block_size_(0), idx_blk_(NULL), mem_stat_(NULL), io_observer_(NULL), + last_block_on_disk_(false) +{ + label_[0] = '\0'; +} + +int ObTempBlockStore::init(int64_t mem_limit, + bool enable_dump, + uint64_t tenant_id, + int64_t mem_ctx_id, + const char *label, + common::ObCompressorType compress_type) +{ + int ret = OB_SUCCESS; + mem_limit_ = mem_limit; + enable_dump_ = enable_dump; + tenant_id_ = tenant_id; + ctx_id_ = mem_ctx_id; + const int label_len = MIN(lib::AOBJECT_LABEL_SIZE, strlen(label)); + MEMCPY(label_, label, label_len); + label_[label_len] = '\0'; + fd_ = -1; + inner_reader_.init(this); + inited_ = true; + compressor_.init(compress_type); + return ret; +} + +void ObTempBlockStore::reset() +{ + int ret = OB_SUCCESS; + + blk_ = NULL; + // the last index block may not be linked to `blk_mem_list_` and needs to be released manually + if (NULL != idx_blk_) { + free_blk_mem(idx_blk_); + idx_blk_ = NULL; + } + + reset_block_cnt(); + inner_reader_.reset(); + + if (is_file_open()) { + write_io_handle_.reset(); + if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.remove(fd_))) { + LOG_WARN("remove file failed", K(ret), K_(fd)); + } else { + LOG_INFO("close file success", K(ret), K_(fd)); + } + fd_ = -1; + dir_id_ = -1; + file_size_ = 0; + } + + free_mem_list(blk_mem_list_); + free_mem_list(alloced_mem_list_); + blocks_.reset(); + set_mem_hold(0); + set_mem_used(0); +} + +void ObTempBlockStore::reuse() +{ + int ret = OB_SUCCESS; + reset_block_cnt(); + inner_reader_.reset(); + if (is_file_open()) { + write_io_handle_.reset(); + if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.remove(fd_))) { + LOG_WARN("remove file failed", K(ret), K_(fd)); + } else { + LOG_INFO("close file success", K(ret), K_(fd)); + } + fd_ = -1; + dir_id_ = -1; + file_size_ = 0; + } + if (NULL != idx_blk_) { + free_blk_mem(idx_blk_); + idx_blk_ = NULL; + } + free_mem_list(alloced_mem_list_); + DLIST_FOREACH_REMOVESAFE_NORET(node, blk_mem_list_) { + if (&(*node) + 1 != static_cast(static_cast(blk_->get_buffer()->data()))) { + node->unlink(); + node->~LinkNode(); + allocator_->free(node); + } + } + if (NULL != blk_) { + if (OB_FAIL(setup_block(blk_->get_buffer(), blk_))) { + LOG_WARN("setup block failed", K(ret)); + } + block_cnt_ = 1; + const ShrinkBuffer *buf = blk_->get_buffer(); + set_mem_hold(buf->capacity() + sizeof(LinkNode)); + max_block_size_ = buf->capacity(); + } + set_mem_used(0); + blocks_.reset(); +} + +void ObTempBlockStore::reset_block_cnt() +{ + block_cnt_ = 0; + index_block_cnt_ = 0; + block_cnt_on_disk_ = 0; + block_id_cnt_ = 0; + saved_block_id_cnt_ = 0; + dumped_block_id_cnt_ = 0; + alloced_mem_size_ = 0; + max_block_size_ = 0; +} + +int ObTempBlockStore::alloc_dir_id() +{ + int ret = OB_SUCCESS; + if (-1 == dir_id_) { + dir_id_ = 0; + if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.alloc_dir(dir_id_))) { + LOG_WARN("allocate file directory failed", K(ret)); + } + } + return ret; +} + +int ObTempBlockStore::finish_add_row(bool need_dump /*true*/) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited())) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(finish_write())) { + LOG_WARN("fail to flush write buffer", K(ret)); + } else if (OB_ISNULL(blk_)) { + // do nothing if store is empty or has called finish_add_row already + } else if (is_file_open()) { + if (need_dump && OB_FAIL(dump(true)) && OB_EXCEED_MEM_LIMIT != ret) { + LOG_WARN("fail to dump all when finish add row", K(ret)); + } else { + int64_t timeout_ms = 0; + const uint64_t begin_io_dump_time = rdtsc(); + if (OB_FAIL(get_timeout(timeout_ms))) { + LOG_WARN("get timeout failed", K(ret)); + } else if (OB_FAIL(write_io_handle_.wait())) { + LOG_WARN("fail to wait write", K(ret), K(write_io_handle_)); + } else if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.sync(fd_, timeout_ms))) { + LOG_WARN("sync file failed", K(ret), K(fd_), K(timeout_ms)); + } + if (OB_LIKELY(nullptr != io_observer_)) { + io_observer_->on_write_io(rdtsc() - begin_io_dump_time); + } + } + } + return ret; +} + +int ObTempBlockStore::init_block_buffer(void* mem, const int64_t size, Block *&block) +{ + int ret = OB_SUCCESS; + ShrinkBuffer *buf = NULL; + if (OB_ISNULL(mem)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("mem is null", KP(mem)); + } else if (OB_UNLIKELY(size <= Block::min_blk_size(0))) { + ret = OB_BUF_NOT_ENOUGH; + LOG_WARN("buffer is not enough", K(size)); + } else if (OB_ISNULL(buf = new (Block::buffer_position(mem, size))ShrinkBuffer)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc buffer failed", K(ret)); + } else if (OB_FAIL(buf->init(static_cast(mem), size))) { + LOG_WARN("init shrink buffer failed", K(ret)); + } else { + block = new (buf->head()) Block; + if (OB_FAIL(buf->fill_head(sizeof(Block)))) { + LOG_WARN("fill buffer head failed", K(ret), K(buf), K(sizeof(Block))); + } else if (OB_FAIL(buf->fill_tail(sizeof(ShrinkBuffer)))) { + LOG_WARN("fill buffer tail failed", K(ret), K(buf), K(sizeof(ShrinkBuffer))); + } else { + block->block_id_ = 0; // unused + block->cnt_ = 0; + block->buf_off_ = buf->remain(); + } + } + return ret; +} + +/* + * Append block to store. + * The `buf` is the pointer to block, and the `size` is the size of the block header and + * payload size + */ +int ObTempBlockStore::append_block(const char *buf, const int64_t size) +{ + int ret = OB_SUCCESS; + const Block *src_block = reinterpret_cast(buf); + const int64_t payload_size = size - sizeof(Block); + if (OB_ISNULL(buf) || OB_UNLIKELY(payload_size < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("buf is null", K(ret), KP(buf), K(size), K(payload_size)); + } else if (OB_UNLIKELY(!is_block(buf))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block magic is mismatch", K(ret), K(block_magic(buf))); + } else if (OB_FAIL(append_block_payload(src_block->payload_, payload_size, src_block->cnt_))) { + LOG_WARN("fail to append block payload", K(ret)); + } + return ret; +} + +/* + * Append block payload to store. + * The `buf` is the pointer to block payload, and the `size` is payload size. + */ +int ObTempBlockStore::append_block_payload(const char *buf, const int64_t size, const int64_t cnt) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || OB_UNLIKELY(size < 0) || cnt < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("buf is null", K(ret), KP(buf), K(size), K(cnt)); + } else if (OB_FAIL(new_block(size, blk_, true))) { + LOG_WARN("fail to new block", K(ret)); + } else if (OB_UNLIKELY(size > blk_->remain())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("blk size is not enough", K(ret), K(size), K(blk_->remain())); + } else { + blk_->cnt_ = static_cast(cnt); + MEMCPY(blk_->payload_, buf, size); + block_id_cnt_ = blk_->end(); + blk_->get_buffer()->fast_advance(size); + LOG_DEBUG("append block payload", K(*this), K(*blk_)); + } + return ret; +} + +int ObTempBlockStore::new_block(const int64_t mem_size, + Block *&blk, + const bool strict_mem_size /* false*/) +{ + int ret = OB_SUCCESS; + const int64_t min_blk_size = Block::min_blk_size(mem_size); + if (OB_UNLIKELY(!is_inited())) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(NULL == blk_)) { + if (OB_FAIL(alloc_block(blk_, min_blk_size, strict_mem_size))) { + LOG_WARN("alloc block failed", K(ret), KPC(this)); + } + } else if (OB_FAIL(dump_block_if_need(min_blk_size))) { + LOG_WARN("fail to dump block if need", K(ret), K(min_blk_size)); + } else if (OB_FAIL(switch_block(min_blk_size, strict_mem_size))) { + LOG_WARN("switch block failed", K(ret), K(mem_size), K(min_blk_size)); + } + if (OB_SUCC(ret)) { + if (OB_ISNULL(blk_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to new block", K(ret), KP(blk_)); + } else { + blk = blk_; + } + } + return ret; +} + +int ObTempBlockStore::get_block(BlockReader &reader, const int64_t block_id, const Block *&blk) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited())) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(block_id < 0) || OB_UNLIKELY(block_id >= block_id_cnt_)) { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("invalid of row_id", K(ret), K(block_id), K_(block_id_cnt)); + } else { + if (reader.file_size_ != file_size_) { + reader.reset_cursor(file_size_); + blk = NULL; + } + if (NULL != blk && blk->contain(block_id)) { + // found in previous visited block + } else if (block_id >= saved_block_id_cnt_) { + // found in write block + blk = blk_; + } else { + blk = NULL; + bool blk_on_disk = true; + if (OB_FAIL(inner_get_block(reader, block_id, blk, blk_on_disk))) { + LOG_WARN("fail to get next block", K(ret)); + } else if (blk_on_disk) { + if (need_compress() && OB_FAIL(decompr_block(reader, blk))) { + LOG_WARN("fail to decompress block", K(ret), K(last_block_on_disk_)); + } else { + Block *tmp_blk = const_cast(blk); + if (OB_FAIL(prepare_blk_for_read(tmp_blk))) { + LOG_WARN("fail to prepare blk", K(ret)); + } + } + } + if (OB_SUCC(ret) && reader.is_async() && OB_NOT_NULL(blk)) { + // 1. prefetch next block, if do not need prefetch, the aio_blk is null; + // 2. should prefetch after decompress, since we need the info in read_io_handler_ + int64_t next_block_id = block_id + blk->cnt_; + if (OB_LIKELY(next_block_id >= 0) && OB_LIKELY(next_block_id < saved_block_id_cnt_)) { + // if still have next block, prefetch next block + reader.aio_buf_idx_ = (reader.aio_buf_idx_ + 1) % BlockReader::AIO_BUF_CNT; + last_block_on_disk_ = true; + if (OB_FAIL(load_block(reader, next_block_id, reader.aio_blk_, last_block_on_disk_))) { + LOG_WARN("fail to prefetch next block", K(ret)); + } + } + } + } + if (OB_SUCC(ret) && OB_ISNULL(blk)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Null block returned", K(ret)); + } + } + return ret; +} + +/* the compressed block is in reader.buf_.data(); we need to decompr it. + * blk->raw_size_ is the decompressed size. reader.read_io_handle.size is the compressd_size + * 1. alloc a buf (decompressd size) to decompr_buf_. + * 2. decompress( from reader.buf_.data(), to decompre_buf_.data()) + * 3. release the compressed_buf's space. + * 4. set buf.data_, point to decompressed_buf_ + */ + +int ObTempBlockStore::decompr_block(BlockReader &reader, const Block *&blk) +{ + int ret = OB_SUCCESS; + // need decompress here, the compressed data is in reader.buf_ + if (OB_ISNULL(reader.buf_.data()) || OB_ISNULL(blk)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpeteced null pointer", K(ret), KP(blk), KP(reader.buf_.data())); + } else { + int64_t comp_size = reader.read_io_handle_.get_data_size() - sizeof(Block); + int64_t decomp_size = blk->raw_size_ - sizeof(Block); + int64_t actual_uncomp_size = 0; + if (OB_FAIL(ensure_reader_buffer(reader, reader.decompr_buf_, blk->raw_size_))) { + LOG_WARN("fail to alloc decomp_buf", K(ret)); + } else if (FALSE_IT(MEMCPY(reader.decompr_buf_.data(), blk, sizeof(Block)))) { + } else { + if (OB_FAIL(compressor_.decompress(blk->payload_, comp_size, + decomp_size, reader.decompr_buf_.data() + sizeof(Block), + actual_uncomp_size))) { + LOG_WARN("fail to decompress block", K(ret), KPC(this), K(blk->block_id_), K(blk->cnt_)); + } else if (reader.is_async()) { + if (OB_FAIL(reader.buf_.init(reader.decompr_buf_.data(), reader.decompr_buf_.capacity()))) { + LOG_WARN("fail to init reader buf ", K(ret), K(reader.buf_), K(reader.decompr_buf_)); + } else { + blk = reinterpret_cast(reader.buf_.data()); + } + } else { + free_blk_mem(reader.buf_.data(), reader.buf_.capacity()); + if (OB_FAIL(reader.buf_.init(reader.decompr_buf_.data(), reader.decompr_buf_.capacity()))) { + LOG_WARN("fail to init reader buf ", K(ret), K(reader.buf_), K(reader.decompr_buf_)); + } else { + blk = reinterpret_cast(reader.buf_.data()); + reader.decompr_buf_.reset(); + } + } + if (OB_FAIL(ret)) { + free_blk_mem(reader.decompr_buf_.data(), reader.decompr_buf_.capacity()); + } + } + } + return ret; +} + +// get block async or sync +int ObTempBlockStore::inner_get_block(BlockReader &reader, const int64_t block_id, + const Block *&blk, bool &blk_on_disk) +{ + int ret = OB_SUCCESS; + blk = nullptr; + blk_on_disk = true; + if (reader.is_async()) { + int aio_buf_idx = reader.aio_buf_idx_ % BlockReader::AIO_BUF_CNT; + bool need_sync_read = false; + if (OB_NOT_NULL(reader.aio_blk_)) { + // the blk is in memory, do not need wait. + if (reader.aio_blk_->magic_ == Block::MAGIC && reader.aio_blk_->contain(block_id)) { + blk_on_disk = false; + blk = reader.aio_blk_; + reader.aio_blk_ = nullptr; + } else { + need_sync_read = true; + } + } else { + if (OB_ISNULL(reader.aio_buf_[aio_buf_idx].data())) { + // no prefetch + need_sync_read = true; + } else if (OB_FAIL(reader.aio_wait())) { + LOG_WARN("fail to wait read", K(ret), K(reader)); + } else { + const Block *tmp_blk = reinterpret_cast(reader.aio_buf_[aio_buf_idx].data()); + if (tmp_blk->magic_ == Block::MAGIC && tmp_blk->contain(block_id)) { + // using the prefetch blk + if (OB_FAIL(reader.buf_.init(reader.aio_buf_[aio_buf_idx].data(), + reader.aio_buf_[aio_buf_idx].capacity()))) { + LOG_WARN("fail to init buf with aio_buf", K(ret)); + } else { + blk = reinterpret_cast(reader.buf_.data()); + } + } else { + // check fail, shouldn't use the prefetch block, need to load block + need_sync_read = true; + } + } + } + + if (OB_SUCC(ret) && need_sync_read) { + // fail to prefetch, read using block_id + if (OB_FAIL(load_block(reader, block_id, reader.aio_blk_, blk_on_disk))) { + LOG_WARN("fail to load block", K(ret)); + } else if (OB_NOT_NULL(reader.aio_blk_)) { + // the blk is in memory, do not need wait. + blk_on_disk = false; + blk = reader.aio_blk_; + reader.aio_blk_ = nullptr; + } else if (OB_FAIL(reader.aio_wait())) { + LOG_WARN("fail to wait read", K(ret), K(reader)); + } else { + if (OB_FAIL(reader.buf_.init(reader.aio_buf_[aio_buf_idx].data(), + reader.aio_buf_[aio_buf_idx].capacity()))) { + LOG_WARN("fail to init buf with aio_buf", K(ret)); + } else { + blk = reinterpret_cast(reader.buf_.data()); + } + } + } + if (OB_SUCC(ret)) { + blk_on_disk = (need_sync_read && blk_on_disk) || (!need_sync_read && last_block_on_disk_); + } + } else { + if(OB_FAIL(load_block(reader, block_id, blk, blk_on_disk))) { + LOG_WARN("fail to load block", K(ret)); + } + } + return ret; +} + +int ObTempBlockStore::BlockReader::get_block(const int64_t block_id, const Block *&blk) { + // wait and process block + int ret = OB_SUCCESS; + if (OB_FAIL(store_->get_block(*this, block_id, blk))) { + LOG_WARN("fail to get block", K(ret), K(block_id)); + } + return ret; +} + +int ObTempBlockStore::get_timeout(int64_t &timeout_ms) +{ + int ret = OB_SUCCESS; + const int64_t timeout_us = THIS_WORKER.get_timeout_remain(); + if (OB_UNLIKELY(timeout_us / 1000 <= 0)) { + ret = OB_TIMEOUT; + LOG_WARN("query is timeout", K(ret), K(timeout_us)); + } else { + timeout_ms = timeout_us / 1000; + } + return ret; +} + +int ObTempBlockStore::alloc_block(Block *&blk, const int64_t min_size, const bool strict_mem_size) +{ + int ret = OB_SUCCESS; + int64_t size = min_size; + if (!strict_mem_size) { + size = std::max(static_cast(BLOCK_SIZE), min_size); + if (block_cnt_ > 0 || need_dump(size)) { + size = std::max(size, static_cast(BIG_BLOCK_SIZE)); + } + size += sizeof(LinkNode); + size = next_pow2(size); + size -= sizeof(LinkNode); + } + void *mem = alloc_blk_mem(size, &blk_mem_list_); + ShrinkBuffer *buf = NULL; + if (OB_ISNULL(mem)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret), K(size)); + } else if (OB_ISNULL(buf = new (Block::buffer_position(mem, size))ShrinkBuffer)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc buffer failed", K(ret)); + } else if (OB_FAIL(buf->init(static_cast(mem), size))) { + LOG_WARN("init shrink buffer failed", K(ret)); + } else if (OB_FAIL(setup_block(buf, blk))) { + LOG_WARN("setup block buffer fail", K(ret)); + } else { + ++block_cnt_; + } + if (OB_FAIL(ret) && !OB_ISNULL(mem)) { + free_blk_mem(mem, size); + } + return ret; +} + +void *ObTempBlockStore::alloc_blk_mem(const int64_t size, ObDList *list) +{ + void *blk = NULL; + int ret = OB_SUCCESS; + if (OB_UNLIKELY(size < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(size)); + } else { + ObMemAttr attr(tenant_id_, label_, ctx_id_); + void *mem = allocator_->alloc(size + sizeof(LinkNode), attr); + if (OB_UNLIKELY(NULL == mem)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret), KP(mem), K(size + sizeof(LinkNode)), + K(label_), K(ctx_id_), K(mem_limit_), K(enable_dump_), K(mem_hold_), K(mem_used_)); + } else { + LinkNode *node = new (mem) LinkNode; + if (NULL != list && OB_UNLIKELY(!list->add_last(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("add node to list failed", K(ret)); + node->~LinkNode(); + allocator_->free(mem); + } else { + blk = static_cast(mem) + sizeof(LinkNode); + inc_mem_hold(size + sizeof(LinkNode)); + max_block_size_ = MAX(max_block_size_, size); + } + } + } + return blk; +} + +int ObTempBlockStore::setup_block(ShrinkBuffer *buf, Block *&blk) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!buf->is_inited())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("block buffer not inited", K(ret)); + } else { + buf->reuse(); + blk = new (buf->head()) Block; + blk->block_id_ = block_id_cnt_; + if (OB_FAIL(buf->fill_head(sizeof(Block)))) { + LOG_WARN("fill buffer head failed", K(ret), K(buf), K(sizeof(Block))); + } else if (OB_FAIL(buf->fill_tail(sizeof(ShrinkBuffer)))) { + LOG_WARN("fill buffer tail failed", K(ret), K(buf), K(sizeof(ShrinkBuffer))); + } else { + blk->buf_off_ = buf->remain(); + inc_mem_used(sizeof(Block) + sizeof(ShrinkBuffer)); + } + } + return ret; +} + +int ObTempBlockStore::switch_block(const int64_t min_size, const bool strict_mem_size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(min_size < 0) || OB_ISNULL(blk_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(min_size)); + } else { + const bool finish_add = (0 == min_size); + Block *new_blk = NULL; + const ShrinkBuffer *buf = blk_->get_buffer(); + BlockIndex bi; + bi.is_idx_block_ = false; + bi.on_disk_ = false; + bi.block_id_ = ~(0b11UL << 62) & saved_block_id_cnt_; + bi.blk_ = blk_; + bi.length_ = static_cast(buf->head_size()); + bi.capacity_ = static_cast(buf->capacity()); + blk_->raw_size_ = bi.length_; + if (OB_FAIL(add_block_idx(bi))) { + LOG_WARN("add block index failed", K(ret)); + } else if (!finish_add && OB_FAIL(alloc_block(new_blk, min_size, strict_mem_size))) { + LOG_WARN("alloc block failed", K(ret), K(min_size)); + } else { + LOG_DEBUG("switch block", KP(blk_), K(*blk_), K(blk_->checksum())); + saved_block_id_cnt_ = block_id_cnt_; + blk_ = new_blk; + } + if (OB_FAIL(ret) && NULL != new_blk) { + free_blk_mem(new_blk, new_blk->get_buffer()->capacity()); + } + } + return ret; +} + +int ObTempBlockStore::add_block_idx(const BlockIndex &bi) +{ + int ret = OB_SUCCESS; + if (NULL == idx_blk_) { + if (OB_FAIL(blocks_.push_back(bi))) { + LOG_WARN("add block index to array failed", K(ret)); + } else { + if (blocks_.count() >= DEFAULT_BLOCK_CNT) { + if (OB_FAIL(build_idx_block())) { + LOG_WARN("build index block failed", K(ret)); + } + } + } + } else { + if (idx_blk_->is_full()) { + if (OB_FAIL(switch_idx_block())) { + LOG_WARN("switch index block failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + idx_blk_->block_indexes_[idx_blk_->cnt_++] = bi; + inc_mem_used(sizeof(BlockIndex)); + } + } + return ret; +} + +int ObTempBlockStore::alloc_idx_block(IndexBlock *&ib) +{ + int ret = OB_SUCCESS; + void *mem = alloc_blk_mem(IndexBlock::INDEX_BLOCK_SIZE, NULL); + if (OB_ISNULL(mem)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret)); + } else { + ib = new (mem) IndexBlock; + ++index_block_cnt_; + inc_mem_used(sizeof(IndexBlock)); + } + return ret; +} + +int ObTempBlockStore::build_idx_block() +{ + STATIC_ASSERT(IndexBlock::capacity() > DEFAULT_BLOCK_CNT, + "DEFAULT_BLOCK_CNT block indexes must fit in one index block"); + int ret = OB_SUCCESS; + if (OB_FAIL(alloc_idx_block(idx_blk_))) { + LOG_WARN("alloc idx block failed", K(ret)); + } else if (OB_UNLIKELY(NULL == idx_blk_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc null index block", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < blocks_.count(); ++i) { + if (OB_FAIL(add_block_idx(blocks_.at(i)))) { + LOG_WARN("add block idx failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + blocks_.reset(); + } + } + return ret; +} + +int ObTempBlockStore::switch_idx_block(bool finish_add) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(NULL == idx_blk_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("index block should not be null"); + } else if (OB_FAIL(link_idx_block(idx_blk_))) { + LOG_WARN("fail to link index block", K(ret)); + } else { + IndexBlock *ib = NULL; + BlockIndex bi; + bi.is_idx_block_ = true; + bi.on_disk_ = false; + bi.block_id_ = idx_blk_->block_id(); + bi.idx_blk_ = idx_blk_; + bi.length_ = static_cast(idx_blk_->buffer_size()); + bi.capacity_ = static_cast(IndexBlock::INDEX_BLOCK_SIZE); + if (!finish_add) { + if (OB_FAIL(alloc_idx_block(ib))) { + LOG_WARN("alloc index block failed", K(ret)); + } else if (OB_ISNULL(ib)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc null block", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(blocks_.push_back(bi))) { + LOG_WARN("add block index to array failed", K(ret)); + } else { + idx_blk_ = NULL; + if (NULL != ib) { + idx_blk_ = ib; + ib = NULL; + } + } + if (OB_FAIL(ret) && NULL != ib) { + ib->~IndexBlock(); + free_blk_mem(ib, IndexBlock::INDEX_BLOCK_SIZE); + ib = NULL; + } + } + return ret; +} + +int ObTempBlockStore::link_idx_block(IndexBlock *idx_blk) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(idx_blk)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("idx_blk_ is null", K(ret)); + } else { + void *mem = idx_blk; + LinkNode *node = static_cast(mem) - 1; + if (OB_UNLIKELY(!blk_mem_list_.add_last(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("add node to list failed", K(ret)); + } + } + return ret; +} + +void ObTempBlockStore::set_mem_hold(int64_t hold) +{ + inc_mem_hold(hold - mem_hold_); +} + +void ObTempBlockStore::inc_mem_hold(int64_t hold) +{ + if (NULL != mem_stat_) { + if (hold > 0) { + mem_stat_->alloc(hold); + } else if (hold < 0) { + mem_stat_->free(-hold); + } + } + mem_hold_ += hold; +} + +void ObTempBlockStore::free_blk_mem(void *mem, const int64_t size /* = 0 */) +{ + if (NULL != mem) { + LinkNode *node = static_cast(mem) - 1; + if (NULL != node->get_next()) { + node->unlink(); + } + node->~LinkNode(); + allocator_->free(node); + inc_mem_hold(-(size + sizeof(LinkNode))); + } +} + +int ObTempBlockStore::load_block(BlockReader &reader, const int64_t block_id, + const Block *&blk, bool &on_disk) +{ + int ret = OB_SUCCESS; + BlockIndex *bi; + blk = nullptr; + on_disk = true; + if (OB_UNLIKELY(block_id < 0) || OB_UNLIKELY(block_id >= saved_block_id_cnt_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("row should be saved", K(ret), K(block_id), K_(saved_block_id_cnt)); + } else if (OB_FAIL(find_block_idx(reader, block_id, bi))) { + LOG_WARN("find block index failed", K(ret), K(block_id)); + } else { + if (!bi->on_disk_) { + blk = bi->blk_; + on_disk = false; + } else { + if (reader.is_async()) { + int aio_buf_idx = reader.aio_buf_idx_ % BlockReader::AIO_BUF_CNT; + if (OB_FAIL(ensure_reader_buffer(reader, reader.aio_buf_[aio_buf_idx], + bi->length_))) { + LOG_WARN("ensure reader buffer failed", K(ret)); + } else if (OB_FAIL(read_file(reader.aio_buf_[aio_buf_idx].data(), bi->length_, bi->offset_, + reader.get_read_io_handler(), reader.is_async()))) { + LOG_WARN("read block from file failed", K(ret), K(bi)); + } + } else { + if (OB_FAIL(ensure_reader_buffer(reader, reader.buf_, bi->length_))) { + LOG_WARN("ensure reader buffer failed", K(ret)); + } else if (OB_FAIL(read_file(reader.buf_.data(), bi->length_, bi->offset_, + reader.get_read_io_handler(), reader.is_async()))) { + LOG_WARN("read block from file failed", K(ret), K(bi)); + } + } + } + if (OB_SUCC(ret) && bi->on_disk_) { + if (reader.is_async()) { + reader.set_cur_file_offset(bi->offset_ > 0 ? bi->offset_ - 1 : 0); + } else { + blk = reinterpret_cast(reader.buf_.data()); + reader.set_cur_file_offset(bi->offset_ + bi->length_); + } + } + } + return ret; +} + +int ObTempBlockStore::find_block_idx(BlockReader &reader, const int64_t block_id, BlockIndex *&bi) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(block_id < 0) || OB_UNLIKELY(block_id >= saved_block_id_cnt_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("row should be saved", K(ret), K(block_id), K_(saved_block_id_cnt)); + } else { + bool found = false; + if (NULL != reader.idx_blk_) { + if (OB_UNLIKELY(reader.ib_pos_ < 0) + || OB_UNLIKELY(reader.ib_pos_ >= reader.idx_blk_->cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ib_pos out of range", K(ret), K(reader.ib_pos_), K(*reader.idx_blk_)); + } else { + int64_t pos = reader.ib_pos_; + if (block_id > reader.idx_blk_->block_indexes_[pos].block_id_) { + pos += 1; + if (reader.idx_blk_->blk_in_pos(block_id, pos)) { + found = true; + reader.ib_pos_ = pos; + } + } else { + pos -= 1; + if (reader.idx_blk_->blk_in_pos(block_id, pos)) { + found = true; + reader.ib_pos_ = pos; + } + } + } + if (!found) { + reader.reset_cursor(file_size_, false); + } else { + bi = &(reader.idx_blk_->block_indexes_[reader.ib_pos_]); + } + } + if (OB_FAIL(ret) || found) { + } else { + IndexBlock *ib = NULL; + if (NULL != idx_blk_ && !idx_blk_->is_empty() && block_id >= idx_blk_->block_id()) { + ib = idx_blk_; + } + + if (NULL == ib && blocks_.count() > 0) { + ObSEArray::iterator it = std::lower_bound(blocks_.begin(), blocks_.end(), block_id, &BlockIndex::compare); + if (it == blocks_.end() || it->block_id_ != block_id) { + it--; + } + bi = &(*it); + if (!bi->is_idx_block_) { + found = true; + } else { + if (OB_FAIL(load_idx_block(reader, ib, *bi))) { + LOG_WARN("load index block failed", K(ret), K(bi)); + } + } + } + + if (OB_FAIL(ret) || found) { + } else if (OB_UNLIKELY(NULL == ib) || OB_UNLIKELY(ib->cnt_ <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block index not found and index block is NULL or empty", K(ret)); + } else { + BlockIndex *it = std::lower_bound(&ib->block_indexes_[0], &ib->block_indexes_[ib->cnt_], + block_id, &BlockIndex::compare); + if (it == ib->block_indexes_ + ib->cnt_ || it->block_id_ != block_id) { + it--; + } + bi = &(*it); + reader.idx_blk_ = ib; + reader.ib_pos_ = it - ib->block_indexes_; + } + } + } + return ret; +} + +int ObTempBlockStore::load_idx_block(BlockReader &reader, IndexBlock *&ib, const BlockIndex &bi) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!bi.is_idx_block_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid block index", K(ret), K(bi)); + } else { + if (!bi.on_disk_) { + ib = bi.idx_blk_; + } else { + if (OB_UNLIKELY(bi.length_ > IndexBlock::INDEX_BLOCK_SIZE)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument", K(ret), K(bi)); + } else if (OB_FAIL(ensure_reader_buffer( + reader, reader.idx_buf_, IndexBlock::INDEX_BLOCK_SIZE))) { + LOG_WARN("ensure reader buffer failed", K(ret)); + } else if (OB_FAIL(read_file( + reader.idx_buf_.data(), bi.length_, bi.offset_, reader.get_read_io_handler()))) { + LOG_WARN("read block index from file failed", K(ret), K(bi)); + } else { + ib = reinterpret_cast(reader.idx_buf_.data()); + } + } + } + return ret; +} + +int ObTempBlockStore::ensure_reader_buffer(BlockReader &reader, ShrinkBuffer &buf, const int64_t size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else { + TryFreeMemBlk *try_reuse_blk = NULL; + // try free expired blocks + if (NULL != reader.try_free_list_) { + TryFreeMemBlk *cur = reader.try_free_list_; + TryFreeMemBlk **p_cur = &reader.try_free_list_; + while (NULL != cur && (NULL != reader.age_) && cur->age_ >= reader.age_->get()) { + p_cur = &cur->next_; + cur = cur->next_; + } + if (NULL != cur) { + *p_cur = NULL; + while (NULL != cur) { + TryFreeMemBlk *p = cur->next_; + if (NULL == try_reuse_blk && cur->size_ >= size) { + try_reuse_blk = cur; + } else { + free_blk_mem(cur, cur->size_); + } + cur = p; + } + } + } + // add used block to try free list if in iteration age control. + if ((NULL != reader.blk_holder_ptr_ || NULL != reader.age_) && buf.is_inited()) { + TryFreeMemBlk *p = reinterpret_cast(buf.data()); + p->size_ = buf.capacity(); + if (NULL != reader.blk_holder_ptr_) { + p->reader_ = &reader; + p->next_ = reader.blk_holder_ptr_->blocks_; + reader.blk_holder_ptr_->blocks_ = p; + } else if (NULL != reader.age_) { + p->age_ = reader.age_->get(); + p->next_ = reader.try_free_list_; + reader.try_free_list_ = p; + } + buf.reset(); + } + + if (buf.is_inited() && buf.capacity() < size) { + free_blk_mem(buf.data(), buf.capacity()); + buf.reset(); + } + if (!buf.is_inited()) { + if (NULL == try_reuse_blk) { + // alloc new memory block for reader + const int64_t alloc_size = next_pow2(size); + char *mem = static_cast(alloc_blk_mem(alloc_size, &blk_mem_list_)); + if (OB_UNLIKELY(NULL == mem)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret), K(alloc_size)); + } else if (OB_FAIL(buf.init(mem, alloc_size))) { + LOG_WARN("init buffer failed", K(ret)); + free_blk_mem(mem); + mem = NULL; + } + } else if (OB_FAIL(buf.init(reinterpret_cast(try_reuse_blk), try_reuse_blk->size_))) { + LOG_WARN("fail to init reused block buf", K(ret), K_(try_reuse_blk->size)); + free_blk_mem(try_reuse_blk); + try_reuse_blk = NULL; + } + } + } + return ret; +} + +int ObTempBlockStore::BlockReader::aio_wait() +{ + int ret = OB_SUCCESS; + int64_t timeout_ms = 0; + OZ(get_timeout(timeout_ms)); + if (OB_SUCC(ret)) { + if (OB_FAIL(read_io_handle_.wait())) { + LOG_WARN("aio wait failed", K(ret), K(timeout_ms)); + } + } + return ret; +} + +int ObTempBlockStore::write_file(BlockIndex &bi, void *buf, int64_t size) +{ + int ret = OB_SUCCESS; + int64_t timeout_ms = 0; + if (OB_UNLIKELY(size < 0) || OB_UNLIKELY(size > 0 && NULL == buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(size), KP(buf)); + } else if (OB_FAIL(get_timeout(timeout_ms))) { + LOG_WARN("get timeout failed", K(ret)); + } else { + if (!is_file_open()) { + if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.alloc_dir(dir_id_))) { + LOG_WARN("alloc file directory failed", K(ret)); + } else if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.open(fd_, dir_id_))) { + LOG_WARN("open file failed", K(ret)); + } else { + file_size_ = 0; + LOG_INFO("open file success", K_(fd), K_(dir_id)); + } + } + ret = OB_E(EventTable::EN_8) ret; + } + if (OB_SUCC(ret) && size > 0) { + if (NULL != mem_stat_) { + mem_stat_->dumped(size); + } + blocksstable::ObTmpFileIOInfo io; + io.fd_ = fd_; + io.buf_ = static_cast(buf); + io.size_ = size; + io.tenant_id_ = tenant_id_; + io.io_desc_.set_wait_event(ObWaitEventIds::ROW_STORE_DISK_WRITE); + io.io_timeout_ms_ = timeout_ms; + const uint64_t start = rdtsc(); + if (write_io_handle_.is_valid() && OB_FAIL(write_io_handle_.wait())) { + LOG_WARN("fail to wait write", K(ret), K(write_io_handle_)); + } else if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.aio_write(io, write_io_handle_))) { + LOG_WARN("write to file failed", K(ret), K(io), K(timeout_ms)); + } + if (NULL != io_observer_) { + io_observer_->on_write_io(rdtsc() - start); + } + } + if (OB_SUCC(ret)) { + bi.on_disk_ = true; + bi.offset_ = file_size_; + file_size_ += size; + } + return ret; +} + +int ObTempBlockStore::read_file(void *buf, const int64_t size, const int64_t offset, + blocksstable::ObTmpFileIOHandle &handle, const bool is_async) +{ + int ret = OB_SUCCESS; + int64_t timeout_ms = 0; + if (OB_UNLIKELY(offset < 0) || OB_UNLIKELY(size < 0) || OB_UNLIKELY(size > 0 && NULL == buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(size), K(offset), KP(buf)); + } else if (OB_FAIL(get_timeout(timeout_ms))) { + LOG_WARN("get timeout failed", K(ret)); + } else if (!handle.is_valid()) { + if (OB_FAIL(write_io_handle_.wait())) { + LOG_WARN("fail to wait write", K(ret)); + } + } + + if (OB_SUCC(ret) && size > 0) { + blocksstable::ObTmpFileIOInfo io; + io.fd_ = fd_; + io.dir_id_ = dir_id_; + io.buf_ = static_cast(buf); + io.size_ = size; + io.tenant_id_ = tenant_id_; + io.io_desc_.set_wait_event(ObWaitEventIds::ROW_STORE_DISK_READ); + io.io_timeout_ms_ = timeout_ms; + const uint64_t start = rdtsc(); + if (is_async) { + if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.aio_pread(io, offset, handle))) { + LOG_WARN("read form file failed", K(ret), K(io), K(offset), K(timeout_ms)); + } + } else { + if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.pread(io, offset, handle))) { + LOG_WARN("read form file failed", K(ret), K(io), K(offset), K(timeout_ms)); + } else if (OB_UNLIKELY(handle.get_data_size() != size)) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("read data less than expected", + K(ret), K(io), "read_size", handle.get_data_size()); + } + } + if (NULL != io_observer_) { + io_observer_->on_read_io(rdtsc() - start); + } + } + return ret; +} + + +int ObTempBlockStore::dump_block_if_need(const int64_t extra_size) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(need_dump(extra_size))) { + int64_t target_dump_size = extra_size + mem_hold_ - mem_limit_; + // Check whether an IndexBlock will be added, and pre-allocate the corresponding mem size + if ((NULL == idx_blk_ && blocks_.count() >= DEFAULT_BLOCK_CNT - 1) || + (NULL != idx_blk_ && idx_blk_->is_full())) { + target_dump_size += IndexBlock::INDEX_BLOCK_SIZE; + } + if (OB_FAIL(dump(false, std::max(target_dump_size, BIG_BLOCK_SIZE)))) { + LOG_WARN("fail to dump block", K(ret), K(mem_hold_), K(mem_limit_)); + } + } + return ret; +} + +bool ObTempBlockStore::need_dump(const int64_t extra_size) +{ + bool need_to_dump = false; + if (!GCONF.is_sql_operator_dump_enabled() || !enable_dump_) { // no dump + } else if (mem_limit_ > 0) { + if (mem_hold_ + extra_size > mem_limit_) { + need_to_dump = true; + LOG_TRACE("need dump", K(mem_hold_), K(mem_limit_)); + } + } + return need_to_dump; +} + +int ObTempBlockStore::dump(const bool all_dump, const int64_t target_dump_size /*INT64_MAX*/) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited())) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (!enable_dump_) { + ret = OB_EXCEED_MEM_LIMIT; + LOG_INFO("BlockStore exceed mem limit and dump is disabled", K(ret)); + } else { + if (all_dump) { + // If need to dump all, first switch block and index block to ensure + // that all block indexes are established. + if (OB_FAIL(switch_block(0 /*finish_add */, false))) { + LOG_WARN("fail to dump last block", K(ret)); + } else if (NULL != idx_blk_ && OB_FAIL(switch_idx_block(true /* finish_add */))) { + LOG_WARN("fail to dump last index block", K(ret)); + } + LOG_TRACE("dump all blocks", K(blk_mem_list_.get_size())); + } + int64_t total_dumped_size = 0; + LinkNode *node = blk_mem_list_.get_first(); + LinkNode *next_node = NULL; + void *mem = nullptr; + while (OB_SUCC(ret) && node != blk_mem_list_.get_header() && + (all_dump || total_dumped_size < target_dump_size)) { + next_node = node->get_next(); + if (OB_ISNULL(mem = static_cast(node + 1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur block is null", K(ret)); + } else { + BlockIndex *bi = NULL; + int64_t dumped_size = 0; + if (is_last_block(mem)) { + // skip the last block or index block + } else if (is_block(mem)) { + if (OB_FAIL(dump_block(static_cast(mem), dumped_size))) { + LOG_WARN("fail to dump block", K(ret)); + } + } else if (is_index_block(mem)) { + if (OB_FAIL(dump_index_block(static_cast(mem), dumped_size))) { + LOG_WARN("fail to dump index block", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("magic of cur block is unexpected", K(ret), K(block_magic(mem)), + K(blk_mem_list_.get_size())); + } + if (OB_SUCC(ret) && dumped_size > 0) { + if (OB_ISNULL(blk_mem_list_.remove(node))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("remove node failed", K(ret), K(blk_mem_list_.get_size())); + } else { + node->~LinkNode(); + allocator_->free(node); + total_dumped_size += dumped_size; + } + } + } + if (OB_SUCC(ret)) { + node = next_node; + } + } + if (OB_SUCC(ret) && OB_UNLIKELY(all_dump && !blk_mem_list_.is_empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("all_dump mode blk_mem_list_ is non-empty", K(ret), K(blk_mem_list_.get_size())); + } + } + return ret; +} + +int ObTempBlockStore::write_compressed_block(Block *blk, BlockIndex *bi) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(blk) || OB_ISNULL(bi)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer", K(ret)); + } else { + // only compress payload_ in block. + int64_t need_size = 0; + int64_t data_size = blk->raw_size_ - sizeof(Block); + char *comp_buf = nullptr; + int64_t comp_size = 0; + if (OB_FAIL(compressor_.calc_need_size(data_size, need_size))) { + LOG_WARN("fail to calc need size", K(ret)); + } else if (OB_ISNULL(comp_buf = (char *)allocator_->alloc(need_size + sizeof(Block)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret), KP(comp_buf)); + } else if (FALSE_IT(MEMCPY(comp_buf, blk, sizeof(Block)))) { // copy the head + } else if (OB_FAIL(compressor_.compress(blk->payload_, data_size, need_size, comp_buf + sizeof(Block), comp_size))) { + LOG_WARN("fail to compress block", K(ret)); + } else if (OB_FAIL(write_file(*bi, static_cast(comp_buf), comp_size + sizeof(Block)))) { + LOG_WARN("fail to write compressed block to file", K(ret)); + } else { + bi->length_ = comp_size + sizeof(Block); + } + if (OB_NOT_NULL(comp_buf)) { + allocator_->free(comp_buf); + } + } + + return ret; +} + +int ObTempBlockStore::dump_block(Block *blk, int64_t &dumped_size) +{ + int ret = OB_SUCCESS; + BlockIndex *bi; + if (OB_FAIL(find_block_idx(inner_reader_, blk->block_id_, bi))) { + LOG_WARN("fail to find_block_index", K(ret), K(blk)); + } else if (OB_UNLIKELY(bi->on_disk_ || bi->is_idx_block_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block on disk is unexpected", K(ret), K(bi)); + } else if (FALSE_IT(dumped_size = bi->length_)) { + } else if (OB_FAIL(prepare_blk_for_write(blk))) { + LOG_WARN("fail to prepare blk for write", K(ret)); + } else if (need_compress()) { + if (OB_FAIL(write_compressed_block(blk, bi))) { + LOG_WARN("fail to write compressed block", K(ret), K(bi)); + } + } else { + if (OB_FAIL(write_file(*bi, static_cast(blk), bi->length_))) { + LOG_WARN("write block to file failed", K(ret), K(bi)); + } + } + + if (OB_SUCC(ret)) { + LOG_TRACE("Dump block", K(*blk), K(*bi)); + ++block_cnt_on_disk_; + dumped_block_id_cnt_ += blk->cnt_; + inc_mem_hold(-(bi->capacity_ + sizeof(LinkNode))); + inc_mem_used(-(dumped_size + sizeof(ShrinkBuffer))); + } + return ret; +} + +int ObTempBlockStore::dump_index_block(IndexBlock *idx_blk, int64_t &dumped_size) +{ + int ret = OB_SUCCESS; + dumped_size = 0; + LOG_TRACE("dump idx_bk", K(*idx_blk)); + if (OB_ISNULL(idx_blk)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Index block is null", K(ret), KP(idx_blk), KP(idx_blk_)); + } else if (OB_UNLIKELY(idx_blk->is_empty() || blocks_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("idx_blk or blocks is empty", K(ret), K(idx_blk), K(blocks_)); + } else { + BlockIndex *bi = NULL; + const int64_t block_id = idx_blk->block_id(); + ObSEArray::iterator it = std::lower_bound(blocks_.begin(), blocks_.end(), block_id, &BlockIndex::compare); + if (it == blocks_.end() || it->block_id_ != block_id) { + it--; + } + bi = &(*it); + if (OB_ISNULL(bi)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("bi is null", K(ret), K(blocks_.count())); + } else if (OB_UNLIKELY(!bi->is_idx_block_ || bi->on_disk_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block index is not idx block or memory index", K(ret), KP(bi)); + } else if (OB_FAIL(write_file(*bi, static_cast(idx_blk), bi->length_))) { // write file and update bi + LOG_WARN("write index block to file failed", K(ret), K(bi)); + } else { + dumped_size = bi->length_; + inc_mem_hold(-(bi->capacity_ + sizeof(LinkNode))); + inc_mem_used(-(dumped_size)); + } + } + return ret; +} + +void ObTempBlockStore::free_mem_list(ObDList &list) +{ + while (!list.is_empty()) { + LinkNode *node = list.remove_first(); + if (NULL != node) { + node->~LinkNode(); + allocator_->free(node); + } + } +} + +int ObTempBlockStore::BlockReader::init(ObTempBlockStore *store) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(store)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(store)); + } else { + store_ = store; + } + return ret; +} + +void ObTempBlockStore::BlockReader::reset() +{ + reset_cursor(0); + if (NULL != store_) { + free_all_blks(); + store_->free_blk_mem(idx_buf_.data(), idx_buf_.capacity()); + idx_buf_.reset(); + for (int64_t i = 0; i < AIO_BUF_CNT; i++) { + if (aio_buf_[i].data() != buf_.data()) { + store_->free_blk_mem(aio_buf_[i].data(), aio_buf_[i].capacity()); + } + aio_buf_[i].reset(); + } + store_->free_blk_mem(buf_.data(), buf_.capacity()); + buf_.reset(); + decompr_buf_.reset(); + /* + * 1. do not need to free decompr_buf_, since it's data_ is same as buf. + * 2. aio_buf_[N].data() may have same ptr as buf_.data(); shoudn't free twice + */ + } + read_io_handle_.reset(); +} + +void ObTempBlockStore::BlockReader::reuse() +{ + reset_cursor(0); + if (NULL != store_) { + free_all_blks(); + store_->free_blk_mem(idx_buf_.data(), idx_buf_.capacity()); + idx_buf_.reset(); + for (int64_t i = 0; i < AIO_BUF_CNT; i++) { + if (aio_buf_[i].data() != buf_.data()) { + store_->free_blk_mem(aio_buf_[i].data(), aio_buf_[i].capacity()); + } + aio_buf_[i].reset(); + } + store_->free_blk_mem(buf_.data(), buf_.capacity()); + buf_.reset(); + decompr_buf_.reset(); + } + read_io_handle_.set_last_extent_id(0); +} + +void ObTempBlockStore::BlockReader::reset_cursor(const int64_t file_size, const bool need_release) +{ + file_size_ = file_size; + idx_blk_ = NULL; + aio_blk_ = NULL; + ib_pos_ = 0; + cur_file_offset_ = 0; + if (need_release && nullptr != blk_holder_ptr_) { + blk_holder_ptr_->release(); + blk_holder_ptr_ = nullptr; + } +} + +void ObTempBlockStore::BlockReader::free_all_blks() +{ + while (NULL != try_free_list_ && NULL != store_) { + TryFreeMemBlk *next = try_free_list_->next_; + store_->free_blk_mem(try_free_list_, try_free_list_->size_); + try_free_list_ = next; + } +} + +void ObTempBlockStore::BlockHolder::release() +{ + while (NULL != blocks_) { + TryFreeMemBlk *next = blocks_->next_; + if (OB_NOT_NULL(blocks_) && OB_NOT_NULL(blocks_->reader_)) { + blocks_->reader_->free_blk_mem(blocks_, blocks_->size_); + } else { + LOG_ERROR_RET(OB_ERR_UNEXPECTED, "get unexpected block pair", KP(blocks_), KP(blocks_->reader_)); + } + blocks_ = next; + } +} + +OB_DEF_SERIALIZE(ObTempBlockStore) +{ + int ret = OB_SUCCESS; + if (enable_dump_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block store not support serialize if enable dump", K(ret)); + } + LST_DO_CODE(OB_UNIS_ENCODE, + tenant_id_, + ctx_id_, + mem_limit_, + label_); + const int64_t count = get_block_cnt(); + OB_UNIS_ENCODE(count); + if (OB_SUCC(ret)) { + const LinkNode *node = blk_mem_list_.get_first(); + const LinkNode *next_node = NULL; + const void *mem = nullptr; + while (OB_SUCC(ret) && node != blk_mem_list_.get_header()) { + next_node = node->get_next(); + if (OB_ISNULL(mem = static_cast(node + 1))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur block is null", K(ret)); + } else if (is_index_block(mem)) { + // skip serialize index block + } else if (OB_UNLIKELY(!is_block(mem))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block magic mismatch", K(ret), K(block_magic(mem))); + } else { + // serialize data_buf_size + const Block* blk = static_cast(mem); + LOG_DEBUG("serialize block", K(*blk)); + const int64_t raw_size = get_block_raw_size(blk); + OB_UNIS_ENCODE(raw_size); + if (OB_SUCC(ret)) { + // serialize block data + if (buf_len - pos < raw_size) { + ret = OB_SIZE_OVERFLOW; + } else { + MEMCPY(buf + pos, mem, raw_size); + pos += raw_size; + } + } + } + if (OB_SUCC(ret)) { + node = next_node; + } + } //end while + } + return ret; +} + + +OB_DEF_DESERIALIZE(ObTempBlockStore) +{ + int ret = OB_SUCCESS; + char label[lib::AOBJECT_LABEL_SIZE + 1]; + LST_DO_CODE(OB_UNIS_DECODE, + tenant_id_, + ctx_id_, + mem_limit_, + label); + if (!is_inited()) { + if (OB_FAIL(init(mem_limit_, false/*enable_dump*/, tenant_id_, ctx_id_, label))) { + LOG_WARN("fail to init Block row store", K(ret)); + } + } + if (OB_SUCC(ret)) { + Block *block = NULL; + int64_t raw_size = 0; + int64_t blk_cnt = 0; + OB_UNIS_DECODE(blk_cnt); + for (int64_t i = 0; i < blk_cnt && OB_SUCC(ret); ++i) { + OB_UNIS_DECODE(raw_size); + OZ(append_block(buf + pos, raw_size)); + } + } + return ret; +} + +OB_DEF_SERIALIZE_SIZE(ObTempBlockStore) +{ + int64_t len = 0; + LST_DO_CODE(OB_UNIS_ADD_LEN, + tenant_id_, + ctx_id_, + mem_limit_, + label_); + const int64_t count = get_block_cnt(); + OB_UNIS_ADD_LEN(count); + const LinkNode *node = blk_mem_list_.get_first(); + const LinkNode *next_node = NULL; + const void *mem = nullptr; + while (node != blk_mem_list_.get_header()) { + next_node = node->get_next(); + if (OB_ISNULL(mem = static_cast(node + 1))) { + break; + } else if (is_block(mem)) { + const Block* blk = static_cast(mem); + const int64_t payload_size = get_block_raw_size(blk); + OB_UNIS_ADD_LEN(payload_size); + len += payload_size; + } + node = next_node; + } //end while + + return len; +} + +int ObTempBlockStore::truncate_file(int64_t offset) +{ + int ret = OB_SUCCESS; + if (!is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(FILE_MANAGER_INSTANCE_V2.truncate(get_file_fd(), offset))) { + LOG_WARN("truncate failed", K(ret), K(get_file_fd()), K(offset)); + } + return ret; +} + +} // end namespace sql +} // end namespace oceanbase diff --git a/src/sql/engine/basic/ob_temp_block_store.h b/src/sql/engine/basic/ob_temp_block_store.h new file mode 100644 index 000000000..4070c0274 --- /dev/null +++ b/src/sql/engine/basic/ob_temp_block_store.h @@ -0,0 +1,606 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_BASIC_OB_TEMP_BLOCK_STORE_H_ +#define OCEANBASE_BASIC_OB_TEMP_BLOCK_STORE_H_ + +#include "share/ob_define.h" +#include "lib/container/ob_se_array.h" +#include "lib/allocator/page_arena.h" +#include "lib/list/ob_dlist.h" +#include "sql/engine/basic/ob_sql_mem_callback.h" +#include "lib/checksum/ob_crc64.h" +#include "sql/engine/basic/chunk_store/ob_chunk_block_compressor.h" +#include "storage/blocksstable/ob_tmp_file.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObIOEventObserver; + +/* + * Implementation of block store supporting random access, structure simplified as follows: + * +------------+ + * | BlockIndex | (in-memory list) + * +------------+ + * | + * +------------+ + * | IndexBlock | + * +------------+ + * | + * +------------+ + * | BlockIndex | + * +------------+ + * | + * +-------+ + * | Block | ... + * +-------+ + * Block: data block used to store data + * BlockIndex: index of block, which can be either data block or index block, + * storing memory pointer (for in-memory block) or file offset (for disk block) + * IndexBlock: index block made up of multiple indexes. + * + * Supports random access between blocks, and the BlockReader can randomly access the block + * by specifying block_id externally. + */ +class ObTempBlockStore +{ + OB_UNIS_VERSION_V(1); +public: + /* + * ShrinkBuffer, a buffer wrapper class supporting bidirectional writing, + * which writes from the front using the head and from the back using the tail. + * data(block payload) cap + * | | + * +-----------------------------------+ + * | (ShrinkBuffer) | + * +-----------------------------------+ + * | -> <- | + * head tail + */ + class ShrinkBuffer + { + public: + ShrinkBuffer() : data_(NULL), head_(0), tail_(0), cap_(0) {} + + int init(char *buf, const int64_t buf_size); + inline int64_t remain() const { return tail_ - head_; } + inline char *data() { return data_; } + inline const char *data() const { return data_; } + inline char *head() const { return data_ + head_; } + inline int64_t head_size() const { return head_; } + inline char *tail() const { return data_ + tail_; } + inline int64_t tail_size() const { return cap_ - tail_; } + inline int64_t capacity() const { return cap_; } + inline bool is_inited() const { return NULL != data_; } + inline void reset() { *this = ShrinkBuffer(); } + inline void reuse() { head_ = 0; tail_ = cap_; } + inline void fast_advance(int64_t size) { head_ += size; } + inline int advance(int64_t size) { return fill_head(size); } + inline int fill_head(int64_t size); + inline int fill_tail(int64_t size); + inline int compact(); + TO_STRING_KV(KP_(data), K_(head), K_(tail), K_(cap)); + private: + char *data_; + int64_t head_; + int64_t tail_; + int64_t cap_; + }; + + /* + * Block, a stucture storing the data uses block id for indexing, + * the real data starts from the payload. + * If the block is in the process of data appending, the tail will occupy one `ShrinkBuffer` size + * to record the current writing position information. + * The memory layout is as follows: + * +------------------------------------------------------------------+ + * | Block Header | Payload | ShrinkBuffer(optional)| + * +------------------------------------------------------------------+ + */ + struct Block + { + static const int64_t MAGIC = 0x35f4451b9b56eb12; + + Block() : magic_(MAGIC), block_id_(0), cnt_(0), raw_size_(0) {} + + inline static int64_t min_blk_size(const int64_t size) + { + return sizeof(Block) + sizeof(ShrinkBuffer) + size; + } + inline bool contain(const int64_t block_id) const + { + return begin() <= block_id && block_id < end(); + } + inline int64_t begin() const { return block_id_; } + inline int64_t end() const { return block_id_ + cnt_; } + inline int64_t remain() const { return get_buffer()->remain(); } + inline ShrinkBuffer* get_buffer() + { + return static_cast(static_cast(payload_ + buf_off_)); + } + inline const ShrinkBuffer* get_buffer() const + { + return static_cast(static_cast(payload_ + buf_off_)); + } + inline static char *buffer_position(void *mem, const int64_t size) + { + return static_cast(mem) + size - sizeof(ShrinkBuffer); + } + inline uint64_t checksum() const { + ObBatchChecksum bc; + bc.fill(payload_, raw_size_ - sizeof(Block)); + return bc.calc(); + } + TO_STRING_KV(K_(magic), K_(block_id), K_(cnt), K_(raw_size)); + + int64_t magic_; + // increment identity of the block, it also can be row_id or any identity that + // satisfies the incremental unique identity. + int64_t block_id_; + // count of items in the block, ensure that the next block's `begin()` should + // be equal to the `end()` of this block. + uint32_t cnt_; + union { + // raw size of the block before compression. + uint32_t raw_size_; + // buffer offset before block switching. + uint32_t buf_off_; + }; + char payload_[0]; + } __attribute__((packed)); + + struct IndexBlock; + // The index of the block (index block) records the block's id, size, capacity, memory pointer + // or file offset. + struct BlockIndex + { + static bool compare(const BlockIndex &bi, const int64_t block_id) + { + return bi.block_id_ < block_id; + } + + TO_STRING_KV(K_(is_idx_block), K_(on_disk), K_(block_id), K_(offset), K_(length), K_(capacity)); + + uint64_t is_idx_block_:1; + uint64_t on_disk_:1; + uint64_t block_id_ : 62; + union { + IndexBlock *idx_blk_; + Block *blk_; + int64_t offset_; + }; + int32_t length_; + int32_t capacity_; + } __attribute__((packed)); + + // Used for block linking in memory. + class LinkNode : public common::ObDLinkBase + { + }; + // An index block composed of indexes. + struct IndexBlock + { + const static int64_t MAGIC = 0x4847bcb053c3703f; + const static int64_t INDEX_BLOCK_SIZE = (64 << 10) - sizeof(LinkNode); + constexpr static inline int64_t capacity() + { + return (INDEX_BLOCK_SIZE - sizeof(IndexBlock)) / sizeof(BlockIndex); + } + + IndexBlock() : magic_(MAGIC), cnt_(0) {} + + inline int64_t buffer_size() const { return sizeof(*this) + sizeof(BlockIndex) * cnt_; } + inline bool is_full() const { return cnt_ == capacity(); } + inline bool is_empty() const { return 0 == cnt_; } + + // may return false when row in position (false negative), + // since block index only contain start id, we can not detect right boundary. + inline bool blk_in_pos(const int64_t block_id, const int64_t pos); + + void reset() { cnt_ = 0; } + + inline uint64_t block_id() const { return block_indexes_[0].block_id_; } + + TO_STRING_KV(K_(magic), K_(cnt)); + + int64_t magic_; + int32_t cnt_; + BlockIndex block_indexes_[0]; + } __attribute__((packed)); + + // Iteration age used for iterated rows life cycle control, iterated rows' memory are available + // until age increased. E.g.: + // + // IterationAge iter_age; + // Reader it(ra_row_store); + // it.set_iteration_age(iter_age); + // + // while (...) { + // iter_age.inc(); + // + // it.get_row(idx1, row1); + // it.get_row(iex2, row2); + // + // // row1 and row2's memory are still available here, until the get_row() is called + // // after iteration age increased. + // } + class IterationAge + { + public: + IterationAge() : age_(0) {} + int64_t get(void) const { return age_; } + void inc(void) { age_ += 1; } + private: + int64_t age_; + }; + + class BlockReader; + struct TryFreeMemBlk + { + TryFreeMemBlk *next_; + union { + int64_t age_; + BlockReader *reader_; + }; + int64_t size_; + + TryFreeMemBlk() = delete; + }; + + static_assert(std::is_pod::value == true, "TryFreeMemBlk should be pod"); + + struct BlockHolder + { + BlockHolder() : blocks_(NULL) {} + ~BlockHolder() + { + release(); + } + void release(); + TryFreeMemBlk *blocks_; + }; + + // A reader that supports random access between blocks. must be deleted before TempBlockStore + class BlockReader + { + friend class ObTempBlockStore; + friend class BlockHolder; + static const int AIO_BUF_CNT = 2; + public: + BlockReader() : store_(NULL), idx_blk_(NULL), ib_pos_(0), file_size_(0), cur_file_offset_(0), + age_(NULL), try_free_list_(NULL), blk_holder_ptr_(NULL), read_io_handle_(), + is_async_(true), aio_buf_idx_(0), aio_blk_(nullptr) {} + virtual ~BlockReader() { reset(); } + + int init(ObTempBlockStore *store); + + inline int64_t get_cur_file_offset() const { return cur_file_offset_; } + inline void set_cur_file_offset(int64_t file_offset) { cur_file_offset_ = file_offset; } + int get_block(const int64_t block_id, const Block *&blk); + inline int64_t get_block_cnt() const { return store_->get_block_cnt(); } + void set_iteration_age(IterationAge *age) { age_ = age; } + void set_blk_holder(BlockHolder *holder) { blk_holder_ptr_ = holder; } + blocksstable::ObTmpFileIOHandle& get_read_io_handler() { return read_io_handle_; } + inline bool is_async() { return is_async_; } + inline void set_async(bool async) { is_async_ = async; } + void reset(); + void reuse(); + TO_STRING_KV(KPC_(store), K_(buf), K_(idx_buf), KP_(idx_blk), K_(ib_pos), K_(file_size), + KP_(age), KP_(try_free_list), KP_(blk_holder_ptr), K_(cur_file_offset), K_(is_async), + K(read_io_handle_), K(aio_buf_), K(decompr_buf_)); + + private: + void reset_cursor(const int64_t file_size, const bool need_release = true); + void free_all_blks(); + void free_blk_mem(void *mem, const int64_t size) { store_->free_blk_mem(mem, size); } + int aio_wait(); + + private: + ObTempBlockStore *store_; + ShrinkBuffer buf_; + ShrinkBuffer aio_buf_[AIO_BUF_CNT]; + ShrinkBuffer decompr_buf_; + ShrinkBuffer idx_buf_; + IndexBlock *idx_blk_; + // current block index position in index block + int64_t ib_pos_; + // idx_blk_, blk_ may point to the writing block, + // we need to invalid the pointers if file_size_ change. + int64_t file_size_; + int64_t cur_file_offset_; + IterationAge *age_; + TryFreeMemBlk *try_free_list_; + BlockHolder *blk_holder_ptr_; + // to optimize performance, record the last_extent_id to avoid do binary search every time calling read. + blocksstable::ObTmpFileIOHandle read_io_handle_; + bool is_async_; + int aio_buf_idx_; + const Block *aio_blk_; + DISALLOW_COPY_AND_ASSIGN(BlockReader); + }; + +public: + const static int64_t BLOCK_SIZE = (64L << 10) - sizeof(LinkNode); + const static int64_t BIG_BLOCK_SIZE = (256L << 10) - sizeof(LinkNode); + const static int64_t DEFAULT_BLOCK_CNT = (1L << 20) / BLOCK_SIZE; + + explicit ObTempBlockStore(common::ObIAllocator *alloc = NULL); + virtual ~ObTempBlockStore() { reset(); } + int init(int64_t mem_limit, + bool enable_dump, + uint64_t tenant_id, + int64_t mem_ctx_id, + const char *label, + common::ObCompressorType compressor_type = NONE_COMPRESSOR); + void reset(); + void reuse(); + void reset_block_cnt(); + bool is_inited() const { return inited_; } + bool is_file_open() const { return fd_ >= 0; } + void set_tenant_id(const uint64_t tenant_id) { tenant_id_ = tenant_id; } + void set_mem_ctx_id(const int64_t ctx_id) { ctx_id_ = ctx_id; } + void set_mem_limit(const int64_t limit) { mem_limit_ = limit; } + void set_mem_stat(ObSqlMemoryCallback *mem_stat) { mem_stat_ = mem_stat; } + void set_callback(ObSqlMemoryCallback *callback) { mem_stat_ = callback; } + void reset_callback() + { + mem_stat_ = nullptr; + io_observer_ = nullptr; + } + void set_io_event_observer(ObIOEventObserver *io_observer) { io_observer_ = io_observer; } + // set iteration age for inner reader. + void set_allocator(common::ObIAllocator &alloc) { allocator_ = &alloc; } + void set_dir_id(int64_t dir_id) { dir_id_ = dir_id; } + void set_iteration_age(IterationAge *age) { inner_reader_.set_iteration_age(age); } + inline void set_mem_used(const int64_t mem_used) { mem_used_ = mem_used; } + inline void inc_mem_used(const int64_t mem_used) { mem_used_ += mem_used; } + inline uint64_t get_tenant_id() const { return tenant_id_; } + inline int64_t get_mem_ctx_id() const { return ctx_id_; } + inline int64_t get_block_id_cnt() const { return block_id_cnt_; } + inline void inc_block_id_cnt(int64_t cnt) { block_id_cnt_ += cnt; } + inline int64_t get_dumped_block_id_cnt() const { return dumped_block_id_cnt_; } + inline int64_t get_block_cnt() const { return block_cnt_; } + inline int64_t get_index_block_cnt() const { return index_block_cnt_; } + inline int64_t get_block_cnt_on_disk() const { return block_cnt_on_disk_; } + inline int64_t get_block_cnt_in_mem() const { return block_cnt_ - block_cnt_on_disk_; } + inline int64_t get_blk_mem_list_cnt() const { return blk_mem_list_.get_size(); } + inline int64_t get_block_list_cnt() { return blk_mem_list_.get_size(); } + inline int64_t get_mem_hold() const { return mem_hold_; } + inline int64_t get_mem_used() const { return mem_used_; } + inline int64_t get_alloced_mem_size() const { return alloced_mem_size_; } + inline int64_t get_alloced_mem_cnt() const { return alloced_mem_list_.get_size(); } + inline int64_t get_file_fd() const { return fd_; } + inline int64_t get_file_dir_id() const { return dir_id_; } + inline int64_t get_file_size() const { return file_size_; } + inline int64_t get_max_blk_size() const { return max_block_size_; } + inline int64_t has_dumped() const { return block_cnt_on_disk_ > 0; } + inline int64_t get_last_buffer_mem_size() const + { + return nullptr == blk_ ? 0 : blk_->get_buffer()->capacity(); + } + static int init_block_buffer(void* mem, const int64_t size, Block *&block); + int append_block(const char *buf, const int64_t size); + int append_block_payload(const char *buf, const int64_t size, const int64_t cnt); + int alloc_dir_id(); + int dump(const bool all_dump, const int64_t target_dump_size=INT64_MAX); + virtual int finish_write() { + return OB_SUCCESS; + } + int finish_add_row(bool need_dump = true); + + + TO_STRING_KV(K_(inited), K_(enable_dump), K_(tenant_id), K_(label), K_(ctx_id), K_(mem_limit), + K_(mem_hold), K_(mem_used), K_(fd), K_(dir_id), K_(file_size), K_(block_cnt), + K_(index_block_cnt), K_(block_cnt_on_disk), K_(block_id_cnt), K_(dumped_block_id_cnt), + K_(alloced_mem_size)); + + void *alloc(const int64_t size) + { + alloced_mem_size_ += size; + return alloc_blk_mem(size, &alloced_mem_list_); + } + void free(void *mem, const int64_t size) + { + alloced_mem_size_ -= size; + free_blk_mem(mem, size); + } + + + int new_block(const int64_t mem_size, Block *&blk, const bool strict_mem_size); + int truncate_file(int64_t offset); + +protected: + /* + * Allocate a new block as the currently written block, which can get block pointer through + * `blk_` and plz ensure that the `head_size` of ` blk_->get_buffer()` is the actual size + * after used. During the allocation process, the block will be indexed and memory managed. + * + * @param `mem_size`: memory size required by external callers + * @param `strict_mem_size`: If true, allocate memory strictly according to the size passed in. + The actual memory size used is the sum of mem_size, + block header(sizeof(Block)) and link size (sizeof(LinkNode)). + If false, The size of the actual allocation may limit the minimum + block size and do memory alignment. + */ + inline int ensure_write_blk(const int64_t mem_size, const bool strict_mem_size = false) + { + return new_block(mem_size, blk_, strict_mem_size); + } + int get_block(BlockReader &reader, const int64_t block_id, const Block *&blk); + +private: + int inner_get_block(BlockReader &reader, const int64_t block_id, + const Block *&blk, bool &blk_on_disk); + int decompr_block(BlockReader &reader, const Block *&blk); + inline static int64_t block_magic(const void *mem) + { + return *(static_cast(mem)); + } + inline static bool is_block(const void *mem) { return Block::MAGIC == block_magic(mem); }; + inline static bool is_index_block(const void *mem) + { + return IndexBlock::MAGIC == block_magic(mem); + } + inline bool is_last_block(const void *mem) const + { + return mem == blk_ || mem == idx_blk_; + } + static int get_timeout(int64_t &timeout_ms); + int alloc_block(Block *&blk, const int64_t min_size, const bool strict_mem_size); + void *alloc_blk_mem(const int64_t size, common::ObDList *list); + int setup_block(ShrinkBuffer *buf, Block *&blk); + // new block is not needed if %min_size is zero. (finish add row) + int switch_block(const int64_t min_size, const bool strict_mem_size); + int add_block_idx(const BlockIndex &bi); + int alloc_idx_block(IndexBlock *&ib); + int build_idx_block(); + int switch_idx_block(bool finish_add = false); + int link_idx_block(IndexBlock *idx_blk); + void set_mem_hold(int64_t hold); + void inc_mem_hold(int64_t hold); + void free_blk_mem(void *mem, const int64_t size = 0); + + int load_block(BlockReader &reader, const int64_t block_id, const Block *&blk, bool &on_disk); + int find_block_idx(BlockReader &reader, const int64_t block_id, BlockIndex *&bi); + int load_idx_block(BlockReader &reader, IndexBlock *&ib, const BlockIndex &bi); + int ensure_reader_buffer(BlockReader &reader, ShrinkBuffer &buf, const int64_t size); + int write_file(BlockIndex &bi, void *buf, int64_t size); + int read_file(void *buf, const int64_t size, const int64_t offset, + blocksstable::ObTmpFileIOHandle &handle, const bool is_async = false); + int dump_block_if_need(const int64_t extra_size); + bool need_dump(const int64_t extra_size); + int write_compressed_block(Block *blk, BlockIndex *bi); + int dump_block(Block *blk, int64_t &dumped_size); + int dump_index_block(IndexBlock *idx_blk, int64_t &dumped_size); + void free_mem_list(common::ObDList &list); + inline bool has_index_block() const { return index_block_cnt_ > 0; } + inline int64_t get_block_raw_size(const Block *blk) const + { return is_last_block(blk) ? blk->get_buffer()->head_size() : blk->raw_size_; } + inline bool need_compress() { return compressor_.get_compressor_type() != NONE_COMPRESSOR; } + virtual int prepare_blk_for_write(Block *blk) { return OB_NOT_IMPLEMENT; } + virtual int prepare_blk_for_read(Block *blk) { return OB_NOT_IMPLEMENT; } + +protected: + bool inited_; + common::ObIAllocator *allocator_; + Block *blk_; // currently operating block + // variables related to `block_id`, the total number of `block_id` is the sum of + // all block's `cnt_`, and it can also be used to count rows. + int64_t block_id_cnt_; + int64_t saved_block_id_cnt_; + int64_t dumped_block_id_cnt_; + bool enable_dump_; + +private: + uint64_t tenant_id_; + char label_[lib::AOBJECT_LABEL_SIZE + 1]; + int64_t ctx_id_; + + // variables used to record memory usage + int64_t mem_limit_; + int64_t mem_hold_; + int64_t mem_used_; + + int64_t fd_; + int64_t dir_id_; + int64_t file_size_; + + // block related variables used to count various blocks + int64_t block_cnt_; + int64_t index_block_cnt_; + int64_t block_cnt_on_disk_; + int64_t alloced_mem_size_; + int64_t max_block_size_; + int64_t default_block_size_; + + IndexBlock *idx_blk_; + BlockReader inner_reader_; + + common::ObDList blk_mem_list_; + common::ObDList alloced_mem_list_; + common::ObSEArray blocks_; + common::DefaultPageAllocator inner_allocator_; + ObSqlMemoryCallback *mem_stat_; + ObChunkBlockCompressor compressor_; + ObIOEventObserver *io_observer_; + blocksstable::ObTmpFileIOHandle write_io_handle_; + bool last_block_on_disk_; + + DISALLOW_COPY_AND_ASSIGN(ObTempBlockStore); +}; + +inline int ObTempBlockStore::ShrinkBuffer::fill_head(int64_t size) +{ + int ret = common::OB_SUCCESS; + if (size < -head_) { + ret = common::OB_INVALID_ARGUMENT; + SQL_ENG_LOG(WARN, "invalid argument", K(size), K_(head)); + } else if (size > remain()) { + ret = common::OB_BUF_NOT_ENOUGH; + SQL_ENG_LOG(WARN, "buffer not enough", K(size), "remain", remain()); + } else { + head_ += size; + } + return ret; +} + +inline int ObTempBlockStore::ShrinkBuffer::fill_tail(int64_t size) +{ + int ret = common::OB_SUCCESS; + if (size < -tail_size()) { + ret = common::OB_INVALID_ARGUMENT; + SQL_ENG_LOG(WARN, "invalid argument", K(size), "tail_size", tail_size()); + } else if (size > remain()) { + ret = common::OB_BUF_NOT_ENOUGH; + SQL_ENG_LOG(WARN, "buffer not enough", K(size), "remain", remain()); + } else { + tail_ -= size; + } + return ret; +} + +inline int ObTempBlockStore::ShrinkBuffer::compact() +{ + int ret = common::OB_SUCCESS; + if (!is_inited()) { + ret = common::OB_NOT_INIT; + SQL_ENG_LOG(WARN, "not inited", K(ret)); + } else { + const int64_t tail_data_size = tail_size() - sizeof(ShrinkBuffer); + MEMMOVE(head(), tail(), tail_data_size); + head_ += tail_data_size; + tail_ += tail_data_size; + } + return ret; +} + +inline bool ObTempBlockStore::IndexBlock::blk_in_pos(const int64_t block_id, const int64_t pos) +{ + bool in_pos = false; + if (cnt_ > 0 && pos >= 0 && pos < cnt_) { + if (pos + 1 == cnt_) { + in_pos = block_indexes_[pos].block_id_ == block_id; + } else { + in_pos = block_indexes_[pos].block_id_ <= block_id && + block_id < block_indexes_[pos + 1].block_id_; + } + } + return in_pos; +} + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_BASIC_OB_TEMP_BLOCK_STORE_H_ diff --git a/src/sql/engine/cmd/ob_index_executor.cpp b/src/sql/engine/cmd/ob_index_executor.cpp index 2aed455a7..32a2cd856 100644 --- a/src/sql/engine/cmd/ob_index_executor.cpp +++ b/src/sql/engine/cmd/ob_index_executor.cpp @@ -82,6 +82,7 @@ int ObCreateIndexExecutor::execute(ObExecContext &ctx, ObCreateIndexStmt &stmt) //impossible } else if (FALSE_IT(create_index_arg.is_inner_ = my_session->is_inner())) { } else if (FALSE_IT(create_index_arg.parallelism_ = stmt.get_parallelism())) { + } else if (FALSE_IT(create_index_arg.compact_level_ = stmt.get_compact_level())) { } else if (FALSE_IT(create_index_arg.consumer_group_id_ = THIS_WORKER.get_group_id())) { } else if (OB_FAIL(common_rpc_proxy->create_index(create_index_arg, res))) { //send the signal of creating index to rs LOG_WARN("rpc proxy create index failed", K(create_index_arg), diff --git a/src/sql/engine/expr/ob_expr_ascii.cpp b/src/sql/engine/expr/ob_expr_ascii.cpp index 473bd4386..703385cdd 100644 --- a/src/sql/engine/expr/ob_expr_ascii.cpp +++ b/src/sql/engine/expr/ob_expr_ascii.cpp @@ -104,7 +104,7 @@ int ObExprAscii::calc(common::ObObj &obj, calc_ascii_inner(obj, expr_ctx, str_val); } else { ObString str_val = obj1.get_string(); - if (OB_FAIL(ObTextStringHelper::read_prefix_string_data(expr_ctx.calc_buf_, obj1, str_val))) { + if (OB_FAIL(sql::ObTextStringHelper::read_prefix_string_data(expr_ctx.calc_buf_, obj1, str_val))) { LOG_WARN("failed to get string data", K(ret), K(obj1.get_meta())); } else { calc_ascii_inner(obj, expr_ctx, str_val); @@ -142,7 +142,7 @@ int ObExprAscii::calc_ascii_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &re ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObString str_val = s_datum->get_string(); - if (OB_FAIL(ObTextStringHelper::read_prefix_string_data(ctx, + if (OB_FAIL(sql::ObTextStringHelper::read_prefix_string_data(ctx, *s_datum, expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), @@ -242,7 +242,7 @@ int ObExprOrd::calc(common::ObObj &obj, ObString str_val = obj1.get_string(); if (!ob_is_text_tc(obj1.get_type())) { ret = calc_ord_inner(type, str_val, cs_type, obj); - } else if (OB_FAIL(ObTextStringHelper::read_prefix_string_data(expr_ctx.calc_buf_, obj1, str_val))) { + } else if (OB_FAIL(sql::ObTextStringHelper::read_prefix_string_data(expr_ctx.calc_buf_, obj1, str_val))) { LOG_WARN("failed to get lob data", K(ret), K(obj1.get_meta())); } else { ret = calc_ord_inner(type, str_val, cs_type, obj); @@ -319,7 +319,7 @@ int ObExprOrd::calc_ord_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_da ObString str_val = s_datum->get_string(); ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - if (OB_FAIL(ObTextStringHelper::read_prefix_string_data(ctx, + if (OB_FAIL(sql::ObTextStringHelper::read_prefix_string_data(ctx, *s_datum, expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), diff --git a/src/sql/engine/expr/ob_expr_nlssort.cpp b/src/sql/engine/expr/ob_expr_nlssort.cpp index 7af7e30c9..c1fb6fddf 100644 --- a/src/sql/engine/expr/ob_expr_nlssort.cpp +++ b/src/sql/engine/expr/ob_expr_nlssort.cpp @@ -223,7 +223,7 @@ int ObExprNLSSort::eval_nlssort(const ObExpr &expr, // so just use prefix for calc ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); - if (OB_FAIL(ObTextStringHelper::read_prefix_string_data(ctx, + if (OB_FAIL(sql::ObTextStringHelper::read_prefix_string_data(ctx, *input, expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), diff --git a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp index cb87e8980..1304aa1d4 100644 --- a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp +++ b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp @@ -15,7 +15,7 @@ #include "sql/engine/pdml/static/ob_px_sstable_insert_op.h" #include "common/ob_tablet_id.h" #include "sql/engine/px/ob_px_sqc_handler.h" -#include "storage/ddl/ob_direct_insert_sstable_ctx.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -79,24 +79,33 @@ const ObPxMultiPartSSTableInsertSpec &ObPxMultiPartSSTableInsertOp::get_spec() c int ObPxMultiPartSSTableInsertOp::inner_open() { int ret = OB_SUCCESS; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); if (OB_FAIL(ObPxMultiPartInsertOp::inner_open())) { LOG_WARN("inner open failed", K(ret)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tablet_store_map_.create(MAP_HASH_BUCKET_NUM, "SSTABLE_INS"))) { + LOG_WARN("fail to create row cnt map", K(ret)); } else { - int64_t snapshot_version = 0; - const int64_t context_id = ctx_.get_sqc_handler()->get_ddl_context_id(); - if (OB_FAIL(MY_SPEC.get_snapshot_version(eval_ctx_, snapshot_version))) { + const int64_t ddl_table_id = MY_SPEC.plan_->get_ddl_table_id(); + if (OB_FAIL(ctx_.get_sqc_handler()->get_sub_coord().get_participants( + ctx_.get_sqc_handler()->get_sqc_init_arg().sqc_, + ddl_table_id, + participants_))) { + LOG_WARN("get participants failed", K(ret)); + } else if (OB_FAIL(MY_SPEC.get_snapshot_version(eval_ctx_, snapshot_version_))) { LOG_WARN("get snapshot version failed", K(ret)); - } else if (OB_FAIL(ObSSTableInsertManager::get_instance().update_table_context( - context_id, snapshot_version))) { - LOG_WARN("update table context failed", K(ret)); - } else if (OB_FAIL(tablet_store_map_.create(MAP_HASH_BUCKET_NUM, "SSTABLE_INS"))) { - LOG_WARN("fail to create row cnt map", K(ret)); } else { - op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SSTABLE_INSERT_ROW_COUNT; + // sort in ASC order by tablet id. + std::sort(participants_.begin(), participants_.end(), ObLSTabletIDPairCmp()); + op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SSTABLE_INSERT_CG_ROW_COUNT; op_monitor_info_.otherstat_1_value_ = 0; + op_monitor_info_.otherstat_2_id_ = ObSqlMonitorStatIds::SSTABLE_INSERT_ROW_COUNT; + op_monitor_info_.otherstat_2_value_ = 0; op_monitor_info_.otherstat_5_id_ = ObSqlMonitorStatIds::DDL_TASK_ID; op_monitor_info_.otherstat_5_value_ = MY_SPEC.plan_->get_ddl_task_id(); - LOG_INFO("update table context", K(context_id), K(snapshot_version), + LOG_INFO("update table context", K(snapshot_version_), K(MY_SPEC.ins_ctdef_.das_ctdef_.table_id_), K(MY_SPEC.ins_ctdef_.das_ctdef_.index_tid_)); } } @@ -105,6 +114,7 @@ int ObPxMultiPartSSTableInsertOp::inner_open() void ObPxMultiPartSSTableInsertOp::destroy() { + participants_.reset(); curr_tablet_store_iter_.reset(); tablet_seq_caches_.reset(); if (tablet_store_map_.created()) { @@ -117,52 +127,24 @@ void ObPxMultiPartSSTableInsertOp::destroy() allocator_.reset(); } -static int notify_tablet_end(const int64_t context_id, const ObTabletID &tablet_id, const int64_t tablets_count, int64_t ¬ify_idx, bool emergent_finish = false) -{ - int ret = OB_SUCCESS; - ObSSTableInsertManager &sstable_context_mgr = ObSSTableInsertManager::get_instance(); - if (OB_UNLIKELY(context_id < 0 || !tablet_id.is_valid() || tablets_count <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(context_id), K(tablet_id), K(tablets_count)); - } else if (OB_FAIL(sstable_context_mgr.notify_tablet_end(context_id, tablet_id))) { - LOG_WARN("notify partition end failed", K(ret), K(context_id), K(tablet_id)); - } - ++notify_idx; // ignore ret - if (0 == notify_idx % 1000 || tablets_count == notify_idx || emergent_finish) { // batch 1000 or reach the end - int tmp_ret = OB_SUCCESS; - if (OB_SUCCESS != (tmp_ret = sstable_context_mgr.finish_ready_tablets(context_id, notify_idx))) { - LOG_WARN("finsh ready partitions failed", K(tmp_ret), K(context_id), K(notify_idx)); - ret = OB_SUCC(ret) ? tmp_ret : ret; - } - } - return ret; -} - int ObPxMultiPartSSTableInsertOp::inner_get_next_row() { int ret = OB_SUCCESS; ObSQLSessionInfo *my_session = nullptr; const ObTableSchema *table_schema = nullptr; // TODO(shuangcan): remove this ObSqlCtx *sql_ctx = NULL; - ObArray tablet_ids; int64_t notify_idx = 0; - int64_t context_id = -1; - ObSSTableInsertManager &sstable_context_mgr = ObSSTableInsertManager::get_instance(); - if (OB_ISNULL(child_)) { + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObInsertMonitor insert_monitor(op_monitor_info_.otherstat_2_value_, op_monitor_info_.otherstat_1_value_); + if (OB_UNLIKELY(nullptr == child_ || nullptr == tenant_direct_load_mgr)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("the child op is null", K(ret)); + LOG_WARN("the child op is null", K(ret), K(MTL_ID()), KP(child_), KP(tenant_direct_load_mgr)); } else if (get_spec().is_returning_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sstable insert op should not return rows", K(ret)); } else if (OB_ISNULL(my_session = GET_MY_SESSION(ctx_))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, session must not be nullptr", K(ret)); - } else if (OB_ISNULL(ctx_.get_sqc_handler())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("sqc handler is null", K(ret)); - } else if (FALSE_IT(context_id = ctx_.get_sqc_handler()->get_ddl_context_id())) { - } else if (OB_FAIL(sstable_context_mgr.get_tablet_ids(context_id, tablet_ids))) { - LOG_WARN("get tablet ids failed", K(ret), K(context_id)); } else if (OB_ISNULL(sql_ctx = ctx_.get_sql_ctx()) || OB_ISNULL(sql_ctx->schema_guard_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, schema guard not be nullptr", K(ret)); @@ -173,81 +155,86 @@ int ObPxMultiPartSSTableInsertOp::inner_get_next_row() } else if (OB_ISNULL(table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("Table not exist", K(MY_SPEC.plan_->get_ddl_table_id()), K(ret)); - } else { - std::sort(tablet_ids.begin(), tablet_ids.end()); // sort in ASC order - } - - if (OB_FAIL(ret)) { - } else if (need_count_rows()) { - if (OB_FAIL(get_all_rows_and_count())) { - LOG_WARN("fail to get all rows and count", K(ret)); - } - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(get_next_row_with_cache())) {// get one row first for calc part_id - if (OB_UNLIKELY(OB_ITER_END != ret)) { - LOG_WARN("fail get next row from child", K(ret)); - } + } else if (need_count_rows() && OB_FAIL(get_all_rows_and_count())) { + LOG_WARN("fail to get all rows and count", K(ret)); } else { const ObPhysicalPlan *phy_plan = NULL; - ObSSTableInsertTabletParam write_sstable_param; ObMacroDataSeq block_start_seq; int64_t schema_version = 0; + bool all_slices_empty = false; // all slices empty. const uint64_t index_tid = MY_SPEC.plan_->get_ddl_table_id(); if (OB_ISNULL(ctx_.get_physical_plan_ctx()) || OB_ISNULL(phy_plan = ctx_.get_physical_plan_ctx()->get_phy_plan())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get phy_plan failed", K(ret), KP(ctx_.get_physical_plan_ctx()), KP(phy_plan)); - } else { - write_sstable_param.context_id_ = context_id; - write_sstable_param.table_id_ = index_tid; - write_sstable_param.write_major_ = true; - write_sstable_param.task_cnt_ = ctx_.get_sqc_handler()->get_sqc_ctx().get_task_count(); - write_sstable_param.schema_version_ = MY_SPEC.plan_->get_ddl_schema_version(); - write_sstable_param.execution_id_ = MY_SPEC.plan_->get_ddl_execution_id(); - } - while (OB_SUCC(ret) && notify_idx < tablet_ids.count()) { - ObTabletID ¬ify_tablet_id = tablet_ids.at(notify_idx); - clear_evaluated_flag(); - const ObExprPtrIArray *row = &child_->get_spec().output_; - ObTabletID row_tablet_id; - if (OB_FAIL(get_tablet_id_from_row(*row, get_spec().row_desc_.get_part_id_index(), row_tablet_id))) { - LOG_WARN("get part id failed", K(ret)); - } else if (row_tablet_id != notify_tablet_id) { - notify_tablet_end(context_id, notify_tablet_id, tablet_ids.count(), notify_idx); + } else if (OB_FAIL(get_next_row_with_cache())) {// get one row first for calc part_id + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail get next row from child", K(ret)); } else { - write_sstable_param.tablet_id_ = row_tablet_id; - int64_t affected_rows = 0; - ObSSTableInsertRowIterator row_iter(ctx_, this); - const ObTabletCacheInterval *curr_tablet_seq_cache = - count_rows_finish_ && curr_tablet_idx_ < tablet_seq_caches_.count() ? &tablet_seq_caches_.at(curr_tablet_idx_) : nullptr; - int64_t parallel_idx = curr_tablet_seq_cache ? curr_tablet_seq_cache->task_id_ : ctx_.get_px_task_id(); - FLOG_INFO("update ddl parallel id", K(ret), K(parallel_idx), K(ctx_.get_px_task_id()), - K(count_rows_finish_), K(curr_tablet_idx_), K(tablet_seq_caches_.count()), KPC(curr_tablet_seq_cache)); - if (OB_FAIL(block_start_seq.set_parallel_degree(parallel_idx))) { - LOG_WARN("set parallel index failed", K(ret), K(parallel_idx)); - } else if (OB_FAIL(ObSSTableInsertManager::get_instance().add_sstable_slice( - write_sstable_param, block_start_seq, row_iter, affected_rows))) { - if (OB_ITER_END != ret) { - LOG_WARN("failed to write sstable rows to storage layer", K(ret), - K(row_tablet_id), K(block_start_seq), K(write_sstable_param)); - } + all_slices_empty = true; + ret = OB_SUCCESS; + } + } + for (notify_idx = 0; OB_SUCC(ret) && notify_idx < participants_.count(); notify_idx++) { + clear_evaluated_flag(); + bool is_current_slice_empty = false; + const share::ObLSID ¬ify_ls_id = participants_.at(notify_idx).first; + const ObTabletID ¬ify_tablet_id = participants_.at(notify_idx).second; + ObDirectLoadSliceInfo slice_info; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = false; + slice_info.ls_id_ = notify_ls_id; + slice_info.data_tablet_id_ = notify_tablet_id; + slice_info.context_id_ = ctx_.get_sqc_handler()->get_ddl_context_id(); + int64_t affected_rows = 0; + ObTabletID row_tablet_id; + const ObTabletCacheInterval *curr_tablet_seq_cache = + count_rows_finish_ && curr_tablet_idx_ < tablet_seq_caches_.count() && curr_tablet_idx_ >= 0 ? + &tablet_seq_caches_.at(curr_tablet_idx_) : nullptr; + int64_t parallel_idx = curr_tablet_seq_cache ? curr_tablet_seq_cache->task_id_ : ctx_.get_px_task_id(); + if (all_slices_empty || is_all_partition_finished_) { + is_current_slice_empty = true; + } else { + const ObExprPtrIArray *row = &child_->get_spec().output_; + if (OB_FAIL(get_tablet_id_from_row(*row, get_spec().row_desc_.get_part_id_index(), row_tablet_id))) { + LOG_WARN("get part id failed", K(ret)); + } else if (notify_tablet_id != row_tablet_id) { + is_current_slice_empty = true; } - if (OB_SUCC(ret) || OB_ITER_END == ret) { - ctx_.get_physical_plan_ctx()->add_affected_rows(affected_rows); - notify_tablet_end(context_id, row_tablet_id, tablet_ids.count(), notify_idx, affected_rows > 0); - if (row_iter.get_current_tablet_id() == row_tablet_id) { - ret = OB_ITER_END; // continue to next partition - } + } + FLOG_INFO("update ddl parallel id", K(ret), K(notify_tablet_id), K(slice_info), K(parallel_idx), K(ctx_.get_px_task_id()), K(is_current_slice_empty), + K(row_tablet_id), K(is_all_partition_finished_), K(count_rows_finish_), K(curr_tablet_idx_), K(tablet_seq_caches_.count()), KPC(curr_tablet_seq_cache)); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(block_start_seq.set_parallel_degree(parallel_idx))) { + LOG_WARN("set parallel index failed", K(ret), K(parallel_idx)); + } else if (OB_FAIL(tenant_direct_load_mgr->open_sstable_slice(block_start_seq, + slice_info))) { + LOG_WARN("create sstable slice writer failed", K(ret), K(block_start_seq), K(slice_info)); + } else { + ObDDLInsertRowIterator row_iter(this, is_current_slice_empty /*is_slice_empty*/, + notify_ls_id, notify_tablet_id, table_schema->get_rowkey_column_num(), snapshot_version_, slice_info.context_id_); + if (OB_FAIL(tenant_direct_load_mgr->fill_sstable_slice(slice_info, + &row_iter, + affected_rows, + &insert_monitor))) { + LOG_WARN("fill data into sstable slice failed", K(ret), K(slice_info)); } } + if (OB_SUCC(ret)) { + if (OB_FAIL(tenant_direct_load_mgr->close_sstable_slice(slice_info, &insert_monitor))) { + LOG_WARN("close sstable slice failed", K(ret), K(slice_info)); + } + ctx_.get_physical_plan_ctx()->add_affected_rows(affected_rows); + } } } - if (OB_ITER_END == ret) { - // try flush sstable, ignore ret - while (OB_ITER_END == ret && notify_idx < tablet_ids.count()) { - ObTabletID ¬ify_tablet_id = tablet_ids.at(notify_idx); - notify_tablet_end(context_id, notify_tablet_id, tablet_ids.count(), notify_idx); + if (OB_SUCC(ret)) { + if (notify_idx < participants_.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(notify_idx), K(participants_)); + } else { + ret = OB_ITER_END; + LOG_INFO("all partitions is end", K(notify_idx), K(participants_)); } } return ret; @@ -317,7 +304,10 @@ int ObPxMultiPartSSTableInsertOp::get_next_row_with_cache() } if (OB_SUCC(ret)) { op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::SSTABLE_INSERT_ROW_COUNT; - op_monitor_info_.otherstat_1_value_++; + } + if (OB_ITER_END == ret) { + is_all_partition_finished_ = true; + LOG_INFO("scan all partition finished"); } return ret; } @@ -325,11 +315,16 @@ int ObPxMultiPartSSTableInsertOp::get_next_row_with_cache() int ObPxMultiPartSSTableInsertOp::get_all_rows_and_count() { int ret = OB_SUCCESS; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); if (OB_UNLIKELY(!tablet_store_map_.created())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("cache row store or row_cnt_map is not inited", K(ret)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); } else { int64_t curr_tablet_row_cnt = 0; + const int64_t context_id = ctx_.get_sqc_handler()->get_ddl_context_id(); while (OB_SUCC(ret)) { const ObExprPtrIArray *row = &child_->get_spec().output_; ObTabletID row_tablet_id; @@ -367,11 +362,8 @@ int ObPxMultiPartSSTableInsertOp::get_all_rows_and_count() TabletStoreMap::const_iterator iter; for (iter = tablet_store_map_.begin(); OB_SUCC(ret) && iter != tablet_store_map_.end(); ++iter) { ObTabletCacheInterval interval(iter->first, iter->second->get_row_cnt()); - if (OB_FAIL(ObSSTableInsertManager::get_instance().get_tablet_cache_interval( - ctx_.get_sqc_handler()->get_ddl_context_id(), - iter->first, - interval))) { - LOG_WARN("failed to get tablet cache intervals", K(ret)); + if (OB_FAIL(tenant_direct_load_mgr->get_tablet_cache_interval(context_id, iter->first, interval))) { + LOG_WARN("failed to get tablet cache intervals", K(ret), "tablet_id", iter->first); } else if (OB_FAIL(tablet_seq_caches_.push_back(interval))) { LOG_WARN("failed to add tablet cache interval", K(ret), K(interval)); } diff --git a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h index b2bb175e6..0c430f840 100644 --- a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h +++ b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h @@ -21,6 +21,8 @@ namespace oceanbase namespace storage { class ObSSTableInsertRowIterator; +typedef std::pair LSTabletIDPair; +struct ObInsertMonitor; } namespace sql @@ -63,12 +65,15 @@ public: ObOpInput *input) : ObPxMultiPartInsertOp(exec_ctx, spec, input), allocator_("SSTABLE_INS"), + participants_(), tablet_store_map_(), tablet_seq_caches_(), curr_tablet_store_iter_(), curr_tablet_idx_(-1), count_rows_finish_(false), - curr_part_idx_(0) + is_all_partition_finished_(false), + curr_part_idx_(0), + snapshot_version_(0) {} virtual ~ObPxMultiPartSSTableInsertOp() { destroy(); } const ObPxMultiPartSSTableInsertSpec &get_spec() const; @@ -79,6 +84,20 @@ public: int get_tablet_id_from_row(const ObExprPtrIArray &row, const int64_t part_id_idx, common::ObTabletID &tablet_id); +private: + struct ObLSTabletIDPairCmp final + { + public: + ObLSTabletIDPairCmp() { } + OB_INLINE bool operator() (const LSTabletIDPair &left, const LSTabletIDPair &right) + { + if (left.second == right.second) { + return left.first < right.first; + } else { + return left.second < right.second; + } + } + }; private: int get_all_rows_and_count(); int create_tablet_store(common::ObTabletID &tablet_id, ObChunkDatumStore *&tablet_store); @@ -90,12 +109,15 @@ private: static const uint64_t TABLET_STORE_MEM_LIMIT = 2 * 1024 * 1024; // 2M typedef common::hash::ObHashMap TabletStoreMap; common::ObArenaAllocator allocator_; + common::ObArray participants_; TabletStoreMap tablet_store_map_; ObArray tablet_seq_caches_; ObChunkDatumStore::Iterator curr_tablet_store_iter_; int64_t curr_tablet_idx_; bool count_rows_finish_; + bool is_all_partition_finished_; int64_t curr_part_idx_; + int64_t snapshot_version_; // ddl snapshot version. DISALLOW_COPY_AND_ASSIGN(ObPxMultiPartSSTableInsertOp); }; diff --git a/src/sql/engine/px/ob_px_sub_coord.cpp b/src/sql/engine/px/ob_px_sub_coord.cpp index 98ff0d1aa..21812ec48 100644 --- a/src/sql/engine/px/ob_px_sub_coord.cpp +++ b/src/sql/engine/px/ob_px_sub_coord.cpp @@ -39,7 +39,7 @@ #include "observer/ob_server_struct.h" #include "observer/ob_server.h" #include "sql/ob_sql_trans_control.h" -#include "storage/ddl/ob_direct_insert_sstable_ctx.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "sql/engine/px/ob_granule_pump.h" #include "sql/das/ob_das_utils.h" #include "sql/engine/px/p2p_datahub/ob_p2p_dh_mgr.h" @@ -820,16 +820,18 @@ int ObPxSubCoord::check_need_start_ddl(bool &need_start_ddl) return ret; } +typedef std::pair LSTabletIDPair; + int ObPxSubCoord::start_ddl() { int ret = OB_SUCCESS; ObExecContext *exec_ctx = sqc_arg_.exec_ctx_; - int64_t schema_version = 0; ObSQLSessionInfo *my_session = nullptr; - ObPhysicalPlanCtx *plan_ctx = NULL; + ObPhysicalPlanCtx *plan_ctx = nullptr; const ObPhysicalPlan *phy_plan = nullptr; ObIArray &location_keys = sqc_arg_.sqc_.get_access_table_location_keys(); - if (OB_UNLIKELY(ddl_ctrl_.is_valid())) { + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_UNLIKELY(ddl_ctrl_.is_in_progress())) { ret = OB_INIT_TWICE; LOG_WARN("ddl ctrl has already been inited", K(ret), K(ddl_ctrl_)); } else if (OB_ISNULL(exec_ctx)) { @@ -847,43 +849,111 @@ int ObPxSubCoord::start_ddl() } else if (OB_UNLIKELY(location_keys.count() == 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("there is no location key", K(ret)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); } else { - ObSSTableInsertTableParam param; + common::ObArray ls_tablet_ids; + uint64_t data_format_version = 0; + int64_t snapshot_version = 0; + share::ObDDLTaskStatus unused_task_status = share::ObDDLTaskStatus::PREPARE; + const int64_t tenant_id = my_session->get_effective_tenant_id(); const int64_t ref_table_id = location_keys.at(0).ref_table_id_; const int64_t ddl_table_id = phy_plan->get_ddl_table_id(); - const int64_t tenant_id = my_session->get_effective_tenant_id(); - if (OB_FAIL(get_participants(sqc_arg_.sqc_, ddl_table_id, param.ls_tablet_ids_))) { + const int64_t ddl_task_id = phy_plan->get_ddl_task_id(); + const int64_t schema_version = phy_plan->get_ddl_schema_version(); + const int64_t ddl_execution_id = phy_plan->get_ddl_execution_id(); + if (OB_FAIL(ObDDLUtil::get_data_information(tenant_id, ddl_task_id, data_format_version, snapshot_version, unused_task_status))) { + LOG_WARN("get ddl cluster version failed", K(ret)); + } else if (OB_UNLIKELY(snapshot_version <= 0)) { + ret = OB_NEED_RETRY; + LOG_WARN("invalid snapshot version", K(ret),K(tenant_id), K(ddl_task_id), K(ddl_execution_id), + K(ddl_table_id), K(schema_version), K(snapshot_version)); + } else if (OB_FAIL(get_participants(sqc_arg_.sqc_, ddl_table_id, ls_tablet_ids))) { LOG_WARN("fail to get tablet ids", K(ret)); } else { - param.dest_table_id_ = phy_plan->get_ddl_table_id(); - param.snapshot_version_ = 0L; - param.schema_version_ = phy_plan->get_ddl_schema_version(); - param.task_cnt_ = sqc_arg_.sqc_.get_task_count(); - param.write_major_ = true; - param.exec_ctx_ = exec_ctx; - param.execution_id_ = phy_plan->get_ddl_execution_id(); - param.ddl_task_id_ = phy_plan->get_ddl_task_id(); - if (OB_FAIL(ObDDLUtil::get_data_format_version(tenant_id, param.ddl_task_id_, param.data_format_version_))) { - LOG_WARN("get ddl cluster version failed", K(ret)); - } else if (OB_FAIL(ObSSTableInsertManager::get_instance().create_table_context(param, ddl_ctrl_.context_id_))) { - LOG_WARN("create table context failed", K(ret)); - } else { - FLOG_INFO("start ddl", "context_id", ddl_ctrl_.context_id_, K(param)); + ObTabletDirectLoadInsertParam direct_load_param; + direct_load_param.is_replay_ = false; + direct_load_param.common_param_.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_DDL; + direct_load_param.common_param_.data_format_version_ = data_format_version; + direct_load_param.common_param_.read_snapshot_ = snapshot_version; + direct_load_param.runtime_only_param_.exec_ctx_ = exec_ctx; + direct_load_param.runtime_only_param_.task_id_ = ddl_task_id; + direct_load_param.runtime_only_param_.table_id_ = ddl_table_id; + direct_load_param.runtime_only_param_.schema_version_ = schema_version; + direct_load_param.runtime_only_param_.task_cnt_ = sqc_arg_.sqc_.get_task_count(); + SCN unused_scn; + ObTabletDirectLoadMgrHandle unsued_handle; + if (OB_FAIL(tenant_direct_load_mgr->alloc_execution_context_id(ddl_ctrl_.context_id_))) { + LOG_WARN("alloc execution context id failed", K(ret)); } + for (int64_t i = 0; OB_SUCC(ret) && i < ls_tablet_ids.count(); ++i) { + direct_load_param.common_param_.ls_id_ = ls_tablet_ids.at(i).first; + direct_load_param.common_param_.tablet_id_ = ls_tablet_ids.at(i).second; + if (OB_FAIL(tenant_direct_load_mgr->create_tablet_direct_load(ddl_ctrl_.context_id_, + ddl_execution_id, direct_load_param))) { + LOG_WARN("create tablet manager failed", K(ret)); + } else if (OB_FAIL(tenant_direct_load_mgr->open_tablet_direct_load(true, + direct_load_param.common_param_.ls_id_, direct_load_param.common_param_.tablet_id_, ddl_ctrl_.context_id_, unused_scn, unsued_handle))) { + LOG_WARN("write ddl start log failed", K(ret), K(direct_load_param)); + } + } + if (OB_SUCC(ret)) { + ddl_ctrl_.in_progress_ = true; + } + FLOG_INFO("start ddl", K(ret), K(direct_load_param), K(ls_tablet_ids)); } } return ret; } +// TODO yiren, end ddl in table level, and create sstable in parallel. int ObPxSubCoord::end_ddl(const bool need_commit) { int ret = OB_SUCCESS; - if (ddl_ctrl_.is_valid()) { - ObSSTableInsertManager &ddl_ctx_mgr = ObSSTableInsertManager::get_instance(); - if (OB_FAIL(ddl_ctx_mgr.finish_table_context(ddl_ctrl_.context_id_, need_commit))) { - LOG_WARN("ddl manager finish contex failed", K(ret), K(ddl_ctrl_)); + if (ddl_ctrl_.is_in_progress()) { + ObExecContext *exec_ctx = sqc_arg_.exec_ctx_; + ObSQLSessionInfo *my_session = nullptr; + ObPhysicalPlanCtx *plan_ctx = nullptr; + const ObPhysicalPlan *phy_plan = nullptr; + common::ObArray ls_tablet_ids; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, exec ctx must not be nullptr", K(ret)); + } else if (OB_ISNULL(my_session = GET_MY_SESSION(*exec_ctx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, session must not be nullptr", K(ret)); + } else if (OB_ISNULL(plan_ctx = GET_PHY_PLAN_CTX(*exec_ctx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("deserialized exec ctx without phy plan ctx set. Unexpected", K(ret)); + } else if (OB_ISNULL(phy_plan = plan_ctx->get_phy_plan())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, phy plan must not be nullptr", K(ret)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else { + const int64_t ddl_table_id = phy_plan->get_ddl_table_id(); + const int64_t ddl_task_id = phy_plan->get_ddl_task_id(); + const int64_t ddl_execution_id = phy_plan->get_ddl_execution_id(); + if (OB_FAIL(get_participants(sqc_arg_.sqc_, ddl_table_id, ls_tablet_ids))) { + LOG_WARN("fail to get tablet ids", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < ls_tablet_ids.count(); ++i) { + if (OB_FAIL(tenant_direct_load_mgr->close_tablet_direct_load(ddl_ctrl_.context_id_, true, /*is_full_direct_load*/ + ls_tablet_ids.at(i).first, ls_tablet_ids.at(i).second, need_commit, true /*emergent_finish*/, + ddl_task_id, ddl_table_id, ddl_execution_id))) { + LOG_WARN("close tablet direct load failed", K(ret), "tablet_id", ls_tablet_ids.at(i).second); + } + } + if (OB_SUCC(ret)) { + // finish this execution. + ddl_ctrl_.in_progress_ = false; + } + } } - LOG_INFO("end ddl sstable", K(ret), K(need_commit)); + FLOG_INFO("end ddl sstable", K(ret), K(need_commit), K(ls_tablet_ids)); DEBUG_SYNC(END_DDL_IN_PX_SUBCOORD); } if (OB_EAGAIN == ret) { diff --git a/src/sql/engine/px/ob_px_sub_coord.h b/src/sql/engine/px/ob_px_sub_coord.h index 0208b8f06..601658159 100644 --- a/src/sql/engine/px/ob_px_sub_coord.h +++ b/src/sql/engine/px/ob_px_sub_coord.h @@ -64,6 +64,7 @@ public: int init_exec_env(ObExecContext &exec_ctx); ObPxSQCProxy &get_sqc_proxy() { return sqc_ctx_.sqc_proxy_; } ObSqcCtx &get_sqc_ctx() { return sqc_ctx_; } + int64_t get_ddl_context_id() const { return ddl_ctrl_.context_id_; } int set_partitions_info(ObIArray &partitions_info) { return sqc_ctx_.partitions_info_.assign(partitions_info); } @@ -78,7 +79,6 @@ public: int check_need_start_ddl(bool &need_start_ddl); int start_ddl(); int end_ddl(const bool need_commit); - int64_t get_ddl_context_id() const { return ddl_ctrl_.context_id_; } int pre_setup_op_input(ObExecContext &ctx, ObOpSpec &root, @@ -87,6 +87,9 @@ public: const ObIArray &tsc_location_keys); int rebuild_sqc_access_table_locations(); void set_is_single_tsc_leaf_dfo(bool flag) { is_single_tsc_leaf_dfo_ = flag; } + int get_participants(ObPxSqcMeta &sqc, + const int64_t table_id, + ObIArray> &ls_tablet_ids) const; void destroy_shared_rf_msgs(); private: int setup_loop_proc(ObSqcCtx &sqc_ctx) const; @@ -129,9 +132,6 @@ private: int try_prealloc_receive_channel(ObSqcCtx &sqc_ctx, ObPxSqcMeta &sqc); dtl::ObDtlLocalFirstBufferCache *get_first_buffer_cache() { return &first_buffer_cache_; } - int get_participants(ObPxSqcMeta &sqc, - const int64_t table_id, - ObIArray> &ls_tablet_ids) const; void try_get_dml_op(ObOpSpec &root, ObTableModifySpec *&dml_op); int construct_p2p_dh_map() { return sqc_ctx_.sqc_proxy_.construct_p2p_dh_map( diff --git a/src/sql/engine/px/ob_sub_trans_ctrl.h b/src/sql/engine/px/ob_sub_trans_ctrl.h index 0cb1cf73e..9b15ef091 100644 --- a/src/sql/engine/px/ob_sub_trans_ctrl.h +++ b/src/sql/engine/px/ob_sub_trans_ctrl.h @@ -35,12 +35,15 @@ private: class ObDDLCtrl final { public: - ObDDLCtrl() : context_id_(0) {} + ObDDLCtrl() : context_id_(0), in_progress_(false) {} ~ObDDLCtrl() = default; - bool is_valid() const { return context_id_ > 0; } - TO_STRING_KV(K_(context_id)); + + bool is_in_progress() const { return in_progress_; } + TO_STRING_KV(K_(in_progress)); public: int64_t context_id_; + // to tag whether the ddl is in progress (between start_ddl and end_ddl). + bool in_progress_; }; } } diff --git a/src/sql/engine/sort/ob_sort_op.cpp b/src/sql/engine/sort/ob_sort_op.cpp index e3bf11131..ad33657a2 100644 --- a/src/sql/engine/sort/ob_sort_op.cpp +++ b/src/sql/engine/sort/ob_sort_op.cpp @@ -16,6 +16,7 @@ #include "sql/engine/px/ob_px_util.h" #include "sql/engine/aggregate/ob_hash_groupby_op.h" #include "sql/engine/window_function/ob_window_function_op.h" +#include "share/ob_rpc_struct.h" namespace oceanbase { @@ -37,7 +38,9 @@ ObSortSpec::ObSortSpec(common::ObIAllocator &alloc, const ObPhyOperatorType type is_fetch_with_ties_(false), prescan_enabled_(false), enable_encode_sortkey_opt_(false), - part_cnt_(0) + part_cnt_(0), + sort_compact_level_(share::SORT_DEFAULT_LEVEL), + compress_type_(NONE_COMPRESSOR) {} OB_SERIALIZE_MEMBER((ObSortSpec, ObOpSpec), @@ -54,7 +57,9 @@ OB_SERIALIZE_MEMBER((ObSortSpec, ObOpSpec), is_fetch_with_ties_, prescan_enabled_, enable_encode_sortkey_opt_, - part_cnt_); + part_cnt_, + sort_compact_level_, + compress_type_); ObSortOp::ObSortOp(ObExecContext &ctx_, const ObOpSpec &spec, ObOpInput *input) : ObOperator(ctx_, spec, input), @@ -281,10 +286,11 @@ int ObSortOp::process_sort_batch() int ObSortOp::scan_all_then_sort() { int ret = OB_SUCCESS; - SMART_VAR(ObChunkDatumStore, cache_store, "SORT_CACHE_CTX") { + SMART_VAR(ObCompactStore, cache_store) { if (OB_FAIL(cache_store.init(2 * 1024 * 1024, ctx_.get_my_session()->get_effective_tenant_id(), - ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true/*enable dump*/))) { + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true/*enable dump*/, 0, true, + MY_SPEC.sort_compact_level_, MY_SPEC.compress_type_, &MY_SPEC.all_exprs_))) { LOG_WARN("init sample chunk store failed", K(ret)); } else if (OB_FAIL(cache_store.alloc_dir_id())) { LOG_WARN("failed to alloc dir id", K(ret)); @@ -299,7 +305,7 @@ int ObSortOp::scan_all_then_sort() } } else { sort_row_count_++; - if (OB_FAIL(cache_store.add_row(MY_SPEC.all_exprs_, &eval_ctx_))) { + if (OB_FAIL(cache_store.add_row(MY_SPEC.all_exprs_, eval_ctx_))) { LOG_WARN("failed to add row to cache store", K(ret)); } } @@ -310,21 +316,19 @@ int ObSortOp::scan_all_then_sort() } if (OB_SUCC(ret)) { - ObChunkDatumStore::Iterator iterator; if (OB_FAIL(cache_store.finish_add_row(false))) { LOG_WARN("fail to finish add row", K(ret)); - } else if (OB_FAIL(cache_store.begin(iterator))) { - LOG_WARN("fail to get cache_store iter", K(ret)); } else { const ObChunkDatumStore::StoredRow *store_row = NULL; - while (OB_SUCC(ret) && iterator.has_next()) { - if (OB_FAIL(iterator.get_next_row(store_row))) { + bool has_next = false; + while (OB_SUCC(ret) && OB_SUCC(cache_store.has_next(has_next)) && has_next) { + if (OB_FAIL(cache_store.get_next_row(store_row))) { if (OB_ITER_END != ret) { - LOG_WARN("failed to get next row"); + LOG_WARN("failed to get next row", K(ret)); } } else if (OB_ISNULL(store_row)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get next row"); + LOG_WARN("failed to get next row", K(ret)); } else { OZ(sort_impl_.add_stored_row(*store_row)); } @@ -338,10 +342,11 @@ int ObSortOp::scan_all_then_sort() int ObSortOp::scan_all_then_sort_batch() { int ret = OB_SUCCESS; - SMART_VAR(ObChunkDatumStore, cache_store, "SORT_CACHE_CTX") { + SMART_VAR(ObCompactStore, cache_store) { if (OB_FAIL(cache_store.init(2 * 1024 * 1024, ctx_.get_my_session()->get_effective_tenant_id(), - ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true/*enable dump*/))) { + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true/*enable dump*/, 0, true, + MY_SPEC.sort_compact_level_, MY_SPEC.compress_type_, &MY_SPEC.all_exprs_))) { LOG_WARN("init sample chunk store failed", K(ret)); } else if (OB_FAIL(cache_store.alloc_dir_id())) { LOG_WARN("failed to alloc dir id", K(ret)); @@ -373,15 +378,13 @@ int ObSortOp::scan_all_then_sort_batch() ret = OB_SUCCESS; } if (OB_SUCC(ret)) { - ObChunkDatumStore::Iterator iterator; if (OB_FAIL(cache_store.finish_add_row(false))) { LOG_WARN("fail to finish add row", K(ret)); - } else if (OB_FAIL(cache_store.begin(iterator))) { - LOG_WARN("fail to get cache_store iter", K(ret)); } else { const ObChunkDatumStore::StoredRow *store_row = NULL; - while (OB_SUCC(ret) && iterator.has_next()) { - if (OB_FAIL(iterator.get_next_row(store_row))) { + bool has_next = false; + while (OB_SUCC(ret) && OB_SUCC(cache_store.has_next(has_next)) && has_next) { + if (OB_FAIL(cache_store.get_next_row(store_row))) { if (OB_ITER_END != ret) { LOG_WARN("failed to get next row"); } @@ -430,7 +433,8 @@ int ObSortOp::init_sort(int64_t tenant_id, int ret = OB_SUCCESS; OZ(sort_impl_.init(tenant_id, &MY_SPEC.sort_collations_, &MY_SPEC.sort_cmp_funs_, &eval_ctx_, &ctx_, MY_SPEC.enable_encode_sortkey_opt_, MY_SPEC.is_local_merge_sort_, - false /* need_rewind */, MY_SPEC.part_cnt_, topn_cnt, MY_SPEC.is_fetch_with_ties_)); + false /* need_rewind */, MY_SPEC.part_cnt_, topn_cnt, MY_SPEC.is_fetch_with_ties_, + ObChunkDatumStore::BLOCK_SIZE, MY_SPEC.sort_compact_level_, MY_SPEC.compress_type_, &MY_SPEC.all_exprs_)); if (is_batch) { read_batch_func_ = &ObSortOp::sort_impl_next_batch; } else { diff --git a/src/sql/engine/sort/ob_sort_op.h b/src/sql/engine/sort/ob_sort_op.h index 8a18cea17..24eb3a174 100644 --- a/src/sql/engine/sort/ob_sort_op.h +++ b/src/sql/engine/sort/ob_sort_op.h @@ -18,6 +18,8 @@ #include "common/object/ob_object.h" #include "share/datum/ob_datum_funcs.h" #include "sql/engine/sort/ob_sort_basic_info.h" +#include "sql/engine/basic/chunk_store/ob_compact_store.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" namespace oceanbase { @@ -33,7 +35,8 @@ public: INHERIT_TO_STRING_KV("op_spec", ObOpSpec, K_(topn_expr), K_(topk_limit_expr), K_(topk_offset_expr), K_(prefix_pos), K_(minimum_row_count), K_(topk_precision), K_(prefix_pos), K_(is_local_merge_sort), - K_(prescan_enabled), K_(enable_encode_sortkey_opt), K_(part_cnt)); + K_(prescan_enabled), K_(enable_encode_sortkey_opt), K_(part_cnt), K_(sort_compact_level), + K_(compress_type)); public: ObExpr *topn_expr_; ObExpr *topk_limit_expr_; @@ -56,6 +59,8 @@ public: bool enable_encode_sortkey_opt_; // if use, all_exprs_ is : hash(part_by) + part_by + order_by. int64_t part_cnt_; + share::SortCompactLevel sort_compact_level_; + ObCompressorType compress_type_; }; class ObSortOp : public ObOperator diff --git a/src/sql/engine/sort/ob_sort_op_impl.cpp b/src/sql/engine/sort/ob_sort_op_impl.cpp index 74d58fab9..1df98411c 100644 --- a/src/sql/engine/sort/ob_sort_op_impl.cpp +++ b/src/sql/engine/sort/ob_sort_op_impl.cpp @@ -575,9 +575,10 @@ ObSortOpImpl::ObSortOpImpl(ObMonitorNode &op_monitor_info) op_type_(PHY_INVALID), op_id_(UINT64_MAX), exec_ctx_(nullptr), stored_rows_(nullptr), io_event_observer_(nullptr), buckets_(NULL), max_bucket_cnt_(0), part_hash_nodes_(NULL), max_node_cnt_(0), part_cnt_(0), topn_cnt_(INT64_MAX), outputted_rows_cnt_(0), - is_fetch_with_ties_(false), topn_heap_(), ties_array_pos_(0), last_ties_row_(NULL), - pt_buckets_(NULL), use_partition_topn_sort_(false), heap_nodes_(), cur_heap_idx_(0), - rows_(NULL) + is_fetch_with_ties_(false), topn_heap_(NULL), ties_array_pos_(0), + last_ties_row_(NULL), pt_buckets_(NULL), use_partition_topn_sort_(false), heap_nodes_(), cur_heap_idx_(0), + rows_(NULL), sort_compact_level_(share::SORT_DEFAULT_LEVEL), sort_exprs_(nullptr), + compress_type_(NONE_COMPRESSOR) { } @@ -637,7 +638,10 @@ int ObSortOpImpl::init( const int64_t part_cnt /* = 0 */, const int64_t topn_cnt /* = INT64_MAX */, const bool is_fetch_with_ties /* = false */, - const int64_t default_block_size /* = 64KB */) + const int64_t default_block_size /* = 64KB */, + const SortCompactLevel compact_level /* = false */, + const ObCompressorType compress_type /* = NONE_COMPRESS */, + const ExprFixedArray *exprs /* =nullptr */) { int ret = OB_SUCCESS; if (is_inited()) { @@ -665,6 +669,9 @@ int ObSortOpImpl::init( exec_ctx_ = exec_ctx; part_cnt_ = part_cnt; topn_cnt_ = topn_cnt; + sort_compact_level_ = compact_level; + compress_type_ = compress_type; + sort_exprs_ = exprs; use_heap_sort_ = is_topn_sort() && part_cnt_ == 0; use_partition_topn_sort_ = is_topn_sort() && part_cnt_ > 0; is_fetch_with_ties_ = is_fetch_with_ties; @@ -737,6 +744,8 @@ void ObSortOpImpl::reuse() { sorted_ = false; iter_.reset(); + default_blk_holder_.release(); + compact_blk_holder_.release(); quick_sort_array_.reuse(); datum_store_.reset(); inmem_row_size_ = 0; @@ -798,6 +807,9 @@ void ObSortOpImpl::reset() is_fetch_with_ties_ = false; rows_ = NULL; ties_array_pos_ = 0; + sort_compact_level_ = share::SORT_DEFAULT_LEVEL; + compress_type_ = NONE_COMPRESSOR; + sort_exprs_ = nullptr; // for partition topn sort cur_heap_idx_ = 0; heap_nodes_.reset(); @@ -850,7 +862,6 @@ int ObSortOpImpl::build_chunk(const int64_t level, Input &input, int64_t extra_s int ret = OB_SUCCESS; const int64_t curr_time = ObTimeUtility::fast_current_time(); int64_t stored_row_cnt = 0; - ObChunkDatumStore *datum_store = NULL; const ObChunkDatumStore::StoredRow *src_store_row = NULL; ObChunkDatumStore::StoredRow *dst_store_row = NULL; ObSortOpChunk *chunk = NULL; @@ -858,12 +869,13 @@ int ObSortOpImpl::build_chunk(const int64_t level, Input &input, int64_t extra_s ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else if (OB_ISNULL(chunk = OB_NEWx(ObSortOpChunk, - (&mem_context_->get_malloc_allocator()), level))) { + (&mem_context_->get_malloc_allocator()), level, use_compact_store()))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate memory failed", K(ret)); } else if (OB_FAIL(chunk->datum_store_.init(1/*+ mem limit, small limit for dump immediately */, tenant_id_, ObCtxIds::WORK_AREA, ObModIds::OB_SQL_SORT_ROW, - true/*+ enable dump */, extra_size/* for InMemoryTopnSort */))) { + true/*+ enable dump */, extra_size/* for InMemoryTopnSort */, true, + sort_compact_level_, compress_type_, sort_exprs_))) { LOG_WARN("init row store failed", K(ret)); } else { chunk->datum_store_.set_dir_id(sql_mem_processor_.get_dir_id()); @@ -874,7 +886,7 @@ int ObSortOpImpl::build_chunk(const int64_t level, Input &input, int64_t extra_s while (OB_SUCC(ret)) { if (use_heap_sort_ && !is_fetch_with_ties_ && stored_row_cnt >= topn_cnt_) { break; - } else if (OB_FAIL(input(datum_store, src_store_row))) { + } else if (OB_FAIL(input(src_store_row))) { if (OB_ITER_END != ret) { LOG_WARN("get input row failed", K(ret)); } else { @@ -1532,13 +1544,11 @@ int ObSortOpImpl::do_dump() int64_t cur_heap_idx = 0; int64_t row_idx = 0; const int64_t level = 0; - auto input = [&](ObChunkDatumStore *&rs, const ObChunkDatumStore::StoredRow *&row) { + auto input = [&](const ObChunkDatumStore::StoredRow *&row) { if (OB_FAIL(part_topn_heap_next(cur_heap_idx, row_idx, row))) { if (OB_ITER_END != ret) { LOG_WARN("get row from part topn heap failed", K(ret)); } - } else { - rs = &datum_store_; } return ret; }; @@ -1548,18 +1558,16 @@ int ObSortOpImpl::do_dump() } else if (!need_imms()) { int64_t row_pos = 0; int64_t ties_array_pos = 0; - auto input = [&](ObChunkDatumStore *&rs, const ObChunkDatumStore::StoredRow *&row) { + auto input = [&](const ObChunkDatumStore::StoredRow *&row) { int ret = OB_SUCCESS; if (row_pos >= rows_->count() && !(NULL != topn_heap_ && ties_array_pos < topn_heap_->ties_array_.count())) { ret = OB_ITER_END; } else if (row_pos < rows_->count()) { row = rows_->at(row_pos); - rs = &datum_store_; row_pos += 1; } else if (NULL != topn_heap_) { row = topn_heap_->ties_array_.at(ties_array_pos); - rs = &datum_store_; ties_array_pos += 1; } return ret; @@ -1568,14 +1576,12 @@ int ObSortOpImpl::do_dump() LOG_WARN("build chunk failed", K(ret)); } } else { - auto input = [&](ObChunkDatumStore *&rs, const ObChunkDatumStore::StoredRow *&row) { + auto input = [&](const ObChunkDatumStore::StoredRow *&row) { int ret = OB_SUCCESS; if (OB_FAIL(imms_heap_next(row))) { if (OB_ITER_END != ret) { LOG_WARN("get row from memory heap failed", K(ret)); } - } else { - rs = &datum_store_; } return ret; }; @@ -1714,10 +1720,8 @@ int ObSortOpImpl::build_ems_heap(int64_t &merge_ways) if (OB_SUCC(ret)) { ObSortOpChunk *chunk = sort_chunks_.get_first(); for (int64_t i = 0; i < merge_ways && OB_SUCC(ret); i++) { - chunk->iter_.reset(); - if (OB_FAIL(chunk->iter_.init(&chunk->datum_store_))) { - LOG_WARN("init iterator failed", K(ret)); - } else if (OB_FAIL(chunk->iter_.get_next_row(chunk->row_)) + chunk->datum_store_.reset(); + if (OB_FAIL(chunk->datum_store_.get_next_row(chunk->row_)) || NULL == chunk->row_) { if (OB_ITER_END == ret || OB_SUCCESS == ret) { ret = OB_ERR_UNEXPECTED; @@ -1783,13 +1787,16 @@ int ObSortOpImpl::ems_heap_next(ObSortOpChunk *&chunk) { const auto f = [](ObSortOpChunk *&c, bool &is_end) { int ret = OB_SUCCESS; - if (OB_FAIL(c->iter_.get_next_row(c->row_))) { + if (OB_FAIL(c->datum_store_.get_next_row(c->row_))) { if (OB_ITER_END == ret) { is_end = true; ret = OB_SUCCESS; } else { LOG_WARN("get next row failed", K(ret)); } + } else if (OB_ISNULL(c->row_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); } return ret; }; @@ -1968,8 +1975,13 @@ int ObSortOpImpl::sort() if (OB_FAIL(ret)) { // do nothing } else if (sort_chunks_.get_size() >= 2) { - blk_holder_.release(); - set_blk_holder(nullptr); + if (use_compact_store()) { + compact_blk_holder_.release(); + } else { + default_blk_holder_.release(); + } + + set_blk_holder(nullptr, nullptr); // do merge sort int64_t ways = 0; while (OB_SUCC(ret)) { @@ -1980,7 +1992,7 @@ int ObSortOpImpl::sort() if (ways == sort_chunks_.get_size()) { break; } - auto input = [&](ObChunkDatumStore *&rs, const ObChunkDatumStore::StoredRow *&row) { + auto input = [&](const ObChunkDatumStore::StoredRow *&row) { int ret = OB_SUCCESS; ObSortOpChunk *chunk = NULL; if (OB_FAIL(ems_heap_next(chunk))) { @@ -1991,7 +2003,6 @@ int ObSortOpImpl::sort() ret = OB_ERR_UNEXPECTED; LOG_WARN("get chunk from heap is NULL", K(ret)); } else { - rs = &chunk->datum_store_; row = chunk->row_; } return ret; @@ -2013,8 +2024,7 @@ int ObSortOpImpl::sort() } if (OB_SUCC(ret)) { - // set iteration age for batch iteration. - set_blk_holder(&blk_holder_); + set_blk_holder(&compact_blk_holder_, &default_blk_holder_); next_stored_row_func_ = &ObSortOpImpl::ems_heap_next_stored_row; } } @@ -2206,7 +2216,11 @@ int ObSortOpImpl::get_next_batch_stored_rows(int64_t max_cnt, int64_t &read_rows LOG_WARN("get next batch failed", K(ret)); } else { read_rows = 0; - blk_holder_.release(); + if (use_compact_store()) { + compact_blk_holder_.release(); + } else { + default_blk_holder_.release(); + } for (int64_t i = 0; OB_SUCC(ret) && i < max_cnt; i++) { const ObChunkDatumStore::StoredRow *sr = NULL; if (OB_FAIL((this->*next_stored_row_func_)(sr))) { @@ -2219,8 +2233,15 @@ int ObSortOpImpl::get_next_batch_stored_rows(int64_t max_cnt, int64_t &read_rows } else { LOG_WARN("get stored rows failed", K(ret)); } + } else if (OB_ISNULL(sr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get next row", K(ret)); } else { stored_rows_[read_rows++] = const_cast(sr); + if (sort_compact_level_ != share::SORT_DEFAULT_LEVEL && sort_compact_level_ != share::SORT_COMPRESSION_LEVEL) { + // can't hold multi rows for get_batch, if we use compact/encoding + break; + } } } if (OB_ITER_END == ret && !need_rewind_) { @@ -2710,10 +2731,11 @@ int ObSortOpImpl::adjust_topn_read_rows(ObChunkDatumStore::StoredRow **stored_ro return ret; } -void ObSortOpImpl::set_blk_holder(ObChunkDatumStore::IteratedBlockHolder *blk_holder) +void ObSortOpImpl::set_blk_holder(ObTempBlockStore::BlockHolder *compact_blk_holder, + ObChunkDatumStore::IteratedBlockHolder *default_blk_holder) { DLIST_FOREACH_NORET(chunk, sort_chunks_) { - chunk->iter_.set_blk_holder_ptr(blk_holder); + chunk->datum_store_.set_blk_holder(compact_blk_holder, default_blk_holder); } } diff --git a/src/sql/engine/sort/ob_sort_op_impl.h b/src/sql/engine/sort/ob_sort_op_impl.h index 48157dd14..a5e54f335 100644 --- a/src/sql/engine/sort/ob_sort_op_impl.h +++ b/src/sql/engine/sort/ob_sort_op_impl.h @@ -18,19 +18,192 @@ #include "sql/engine/basic/ob_chunk_datum_store.h" #include "sql/engine/ob_sql_mem_mgr_processor.h" #include "sql/engine/sort/ob_sort_basic_info.h" +#include "share/ob_rpc_struct.h" +#include "sql/engine/basic/chunk_store/ob_compact_store.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" namespace oceanbase { namespace sql { -struct ObSortOpChunk : public common::ObDLinkBase +struct ObChunkStoreWrapper { - explicit ObSortOpChunk(const int64_t level): level_(level), datum_store_(ObModIds::OB_SQL_SORT_ROW), row_(NULL) {} +public: + explicit ObChunkStoreWrapper() : datum_store_(ObModIds::OB_SQL_SORT_ROW), is_compact_(false) {} + explicit ObChunkStoreWrapper(const bool is_compact): datum_store_(ObModIds::OB_SQL_SORT_ROW), is_compact_(is_compact) {} - int64_t level_; + void set_dir_id(int64_t dir_id) + { + if (is_compact_) { + compact_store_.set_dir_id(dir_id); + } else { + datum_store_.set_dir_id(dir_id); + } + } + + void set_allocator(common::ObIAllocator &alloc) + { + if (is_compact_) { + compact_store_.set_allocator(alloc); + } else { + datum_store_.set_allocator(alloc); + } + } + + void set_callback(ObSqlMemoryCallback *callback) + { + if (is_compact_) { + compact_store_.set_callback(callback); + } else { + datum_store_.set_callback(callback); + } + } + + void set_io_event_observer(ObIOEventObserver *observer) + { + if (is_compact_) { + compact_store_.set_io_event_observer(observer); + } else { + datum_store_.set_io_event_observer(observer); + } + } + + int64_t get_row_cnt() const + { + return is_compact_ ? compact_store_.get_row_cnt() : datum_store_.get_row_cnt(); + } + + int64_t get_file_size() const + { + return is_compact_ ? compact_store_.get_file_size() : datum_store_.get_file_size(); + } + + int64_t get_mem_hold() const + { + return is_compact_ ? compact_store_.get_mem_hold() : datum_store_.get_mem_hold(); + } + + int reset() + { + int ret = OB_SUCCESS; + if (is_compact_) { + if (OB_ISNULL(compact_store_.get_block_reader())) { + ret = OB_ERR_UNEXPECTED; + SQL_ENG_LOG(WARN, "the reader id null", K(ret)); + } else { + compact_store_.get_block_reader()->reset(); + } + } else { + iter_.reset(); + if (OB_FAIL(iter_.init(&datum_store_))) { + SQL_ENG_LOG(WARN, "fail to init iter", K(ret)); + } + } + return ret; + } + + int init(const int64_t mem_limit, + const uint64_t tenant_id = common::OB_SERVER_TENANT_ID, + const int64_t mem_ctx_id = common::ObCtxIds::DEFAULT_CTX_ID, + const char *label = common::ObModIds::OB_SQL_ROW_STORE, + const bool enable_dump = true, + const uint32_t row_extra_size = 0, + const bool enable_truncate = true, + const share::SortCompactLevel compact_level = share::SORT_DEFAULT_LEVEL, + const ObCompressorType compress_type = NONE_COMPRESSOR, + const ExprFixedArray *exprs = nullptr) + { + int ret = OB_SUCCESS; + if (is_compact_) { + ret = compact_store_.init(mem_limit, tenant_id, mem_ctx_id, label, enable_dump, row_extra_size, + enable_truncate, compact_level, compress_type, exprs); + } else { + ret = datum_store_.init(mem_limit, tenant_id, mem_ctx_id, label, enable_dump, row_extra_size); + } + return ret; + } + + int finish_add_row(bool need_dump) + { + int ret = OB_SUCCESS; + if (is_compact_) { + ret = compact_store_.finish_add_row(need_dump); + } else { + ret = datum_store_.finish_add_row(need_dump); + } + return ret; + } + + int dump(const bool reuse, const bool all_dump) + { + int ret = OB_SUCCESS; + if (is_compact_) { + } else { + ret = datum_store_.dump(reuse, all_dump); + } + return ret; + } + + int add_row(const ObChunkDatumStore::StoredRow &sr, ObChunkDatumStore::StoredRow **stored_row = nullptr) + { + int ret = OB_SUCCESS; + if (is_compact_) { + ret = compact_store_.add_row(sr, stored_row); + } else { + ret = datum_store_.add_row(sr, stored_row); + } + return ret; + } + + int get_next_row(const ObChunkDatumStore::StoredRow *&sr) + { + int ret = OB_SUCCESS; + if (is_compact_) { + if (OB_FAIL(compact_store_.get_next_row(sr))) { + if (ret != OB_ITER_END) { + SQL_ENG_LOG(WARN, "fail to get next row", K(ret)); + } + } + } else { + if (OB_FAIL(iter_.get_next_row(sr))) { + if (ret != OB_ITER_END) { + SQL_ENG_LOG(WARN, "fail to get next row", K(ret)); + } + } + } + return ret; + } + void set_blk_holder(ObTempBlockStore::BlockHolder *compact_blk_holder, + ObChunkDatumStore::IteratedBlockHolder *default_blk_holder) + { + if (is_compact_) { + compact_store_.set_blk_holder(compact_blk_holder); + } else { + iter_.set_blk_holder_ptr(default_blk_holder); + } + } + + + +public: ObChunkDatumStore datum_store_; ObChunkDatumStore::Iterator iter_; + ObCompactStore compact_store_; +private: + const bool is_compact_; +}; + +struct ObSortOpChunk : public common::ObDLinkBase +{ +public: + explicit ObSortOpChunk(const int64_t level): level_(level), row_(NULL) {} + explicit ObSortOpChunk(const int64_t level, const bool is_compact): + level_(level), datum_store_(is_compact), row_(NULL) {} + +public: + int64_t level_; + ObChunkStoreWrapper datum_store_; const ObChunkDatumStore::StoredRow *row_; private: DISALLOW_COPY_AND_ASSIGN(ObSortOpChunk); @@ -88,7 +261,10 @@ public: const int64_t part_cnt = 0, const int64_t topn_cnt = INT64_MAX, const bool is_fetch_with_ties = false, - const int64_t default_block_size = ObChunkDatumStore::BLOCK_SIZE); + const int64_t default_block_size = ObChunkDatumStore::BLOCK_SIZE, + const share::SortCompactLevel compact_level = share::SORT_DEFAULT_LEVEL, + const common::ObCompressorType compressor_type = common::NONE_COMPRESSOR, + const ExprFixedArray *exprs = nullptr); virtual int64_t get_prefix_pos() const { return 0; } // keep initialized, can sort same rows (same cell type, cell count, projector) after reuse. @@ -182,7 +358,11 @@ public: } else if (outputted_rows_cnt_ >= topn_cnt_ && !is_fetch_with_ties_ && !use_partition_topn_sort_) { ret = OB_ITER_END; } else { - blk_holder_.release(); + if (use_compact_store()) { + compact_blk_holder_.release(); + } else { + default_blk_holder_.release(); + } ret = (this->*next_stored_row_func_)(sr); if (OB_UNLIKELY(common::OB_ITER_END == ret) && !need_rewind_) { reuse(); @@ -483,7 +663,7 @@ protected: ret = OB_ERR_UNEXPECTED; SQL_ENG_LOG(WARN, "unexpected status: store row is null", K(ret)); } else if (OB_FAIL(sr->to_expr(exprs, *eval_ctx_))) { - SQL_ENG_LOG(WARN, "convert store row to expr value failed", K(ret)); + SQL_ENG_LOG(WARN, "convert store row to expr value failed", K(ret), KPC(sr)); } return ret; } @@ -553,8 +733,9 @@ protected: int is_equal_part(const ObChunkDatumStore::StoredRow *l, const ObChunkDatumStore::StoredRow *r, bool &is_equal); int do_partition_sort(common::ObIArray &rows, const int64_t rows_begin, const int64_t rows_end); + void set_blk_holder(ObTempBlockStore::BlockHolder *compact_blk_holder, + ObChunkDatumStore::IteratedBlockHolder *default_blk_holder); int do_partition_topn_sort(); - void set_blk_holder(ObChunkDatumStore::IteratedBlockHolder *blk_holder); bool is_in_same_heap(const SortStoredRow *l, const SortStoredRow*r); // for topn sort @@ -591,6 +772,7 @@ protected: SortStoredRow *&new_row); int generate_last_ties_row(const ObChunkDatumStore::StoredRow *orign_row); int adjust_topn_read_rows(ObChunkDatumStore::StoredRow **stored_rows, int64_t &read_cnt); + bool use_compact_store() { return sort_compact_level_ != SORT_DEFAULT_LEVEL; } // for partition topn int init_partition_topn(); void reuse_part_topn_heap(); @@ -677,7 +859,11 @@ protected: ObSEArray heap_nodes_; int64_t cur_heap_idx_; common::ObIArray *rows_; - ObChunkDatumStore::IteratedBlockHolder blk_holder_; + ObTempBlockStore::BlockHolder compact_blk_holder_; + ObChunkDatumStore::IteratedBlockHolder default_blk_holder_; + share::SortCompactLevel sort_compact_level_; + const ExprFixedArray *sort_exprs_; + common::ObCompressorType compress_type_; }; class ObInMemoryTopnSortImpl; diff --git a/src/sql/engine/table/ob_table_scan_op.cpp b/src/sql/engine/table/ob_table_scan_op.cpp index 233d6b4f5..2b3408175 100644 --- a/src/sql/engine/table/ob_table_scan_op.cpp +++ b/src/sql/engine/table/ob_table_scan_op.cpp @@ -2718,6 +2718,7 @@ int ObTableScanOp::report_ddl_column_checksum() } if (OB_SUCC(ret)) { + LOG_INFO("report ddl checksum table scan", K(tablet_id), K(checksum_items)); if (OB_FAIL(ObDDLChecksumOperator::update_checksum(checksum_items, *GCTX.sql_proxy_))) { LOG_WARN("fail to update checksum", K(ret)); } else { diff --git a/src/sql/ob_sql_define.h b/src/sql/ob_sql_define.h index c99d41c2f..1ec44df16 100644 --- a/src/sql/ob_sql_define.h +++ b/src/sql/ob_sql_define.h @@ -649,6 +649,70 @@ ObTMSegmentArrayset_tenant_id(MTL_ID()); } +static bool is_fixed_length(ObObjType type) { + bool is_fixed = true; + ObObjTypeClass tc = ob_obj_type_class(type); + OB_ASSERT(tc >= ObNullTC && tc < ObMaxTC); + if (ObNumberTC == tc + || ObExtendTC == tc + || ObTextTC == tc + || ObStringTC == tc + || ObEnumSetInnerTC == tc + || ObRawTC == tc + || ObRowIDTC == tc + || ObLobTC == tc + || ObJsonTC == tc + || ObGeometryTC == tc + || ObUserDefinedSQLTC == tc + || ObDecimalIntTC == tc) { + is_fixed = false; + } + return is_fixed; +} + +static int16_t get_type_fixed_length(ObObjType type) { + int16_t len = 0; + ObObjTypeClass tc = ob_obj_type_class(type); + OB_ASSERT(tc >= ObNullTC && tc < ObMaxTC); + switch (tc) + { + case ObUIntTC: + case ObIntTC: + case ObDoubleTC: + case ObDateTimeTC: + case ObTimeTC: + case ObBitTC: + case ObEnumSetTC: + { + len = 8; + break; + } + case ObDateTC: + case ObFloatTC: + { + len = 4; + break; + } + case ObYearTC: + { + len = 1; + break; + } + case ObOTimestampTC: { + len = (type == ObTimestampTZType) ? 12 : 10; + break; + } + case ObIntervalTC: + { + len = (type == ObIntervalYMType) ? 8 : 12; + break; + } + default: + break; + } + return len; +} + } // namespace sql } // namespace oceanbase #endif /* OCEANBASE_SQL_OB_SQL_DEFINE_H_ */ diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 46396d69c..e7051d342 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -2658,8 +2658,8 @@ int ObJoinOrder::will_use_skip_scan(const uint64_t table_id, for (int64_t i = 0; OB_SUCC(ret) && i < ss_offset; ++i) { if (OB_FAIL(table_meta->add_column_meta_no_dup(column_items.at(i).column_id_ , ctx))) { LOG_WARN("failed to add column meta no duplicate", K(ret)); - } - } + } + } } LOG_TRACE("check use skip scan", K(helper.is_inner_path_), K(hint_force_skip_scan), K(hint_force_no_skip_scan), K(use_skip_scan)); diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 9e4c2d7c9..0f69ec323 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -14057,7 +14057,8 @@ int ObLogPlan::will_use_column_store(const uint64_t table_id, LOG_WARN("unexpect null table schema", K(ret)); } else if (OB_FAIL(schema->has_all_column_group(has_all_column_group))) { LOG_WARN("failed to check has row store", K(ret)); - } else if (OB_FALSE_IT(has_normal_column_group = schema->is_normal_column_store_table())) { + } else if (OB_FAIL(schema->get_is_column_store(has_normal_column_group))) { + LOG_WARN("failed to get is column store", K(ret)); } else if (OB_FAIL(get_log_plan_hint().check_use_column_store(table_id, hint_force_use_column_store, hint_force_no_use_column_store))) { diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l index 5c8adeea9..291f9ee03 100644 --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -95,9 +95,12 @@ mysql_compatible_comment_with_version \/\*\![0-9]{5} mysql_compatible_comment_without_version \/\*\! mysql_compatible_comment_end \*\/ +ColumnStorePattern (WITH{whitespace}COLUMN{whitespace}GROUP) %% -INTERVAL { +{ColumnStorePattern} { REPUT_TOKEN_NEG_SIGN(WITH_COLUMN_GROUP); } +INTERVAL { + ParseResult *p = (ParseResult *)yyextra; REPUT_NEG_SIGN(p); return INTERVAL; diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index 244d13f16..cc68a3991 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -358,7 +358,7 @@ END_P SET_VAR DELIMITER VALID VALUE VARIANCE VARIABLES VERBOSE VERIFY VIEW VISIBLE VIRTUAL_COLUMN_ID VALIDATE VAR_POP VAR_SAMP - WAIT WARNINGS WASH WEEK WEIGHT_STRING WHENEVER WORK WRAPPER WINDOW WEAK + WAIT WARNINGS WASH WEEK WEIGHT_STRING WHENEVER WORK WRAPPER WINDOW WEAK WITH_COLUMN_GROUP X509 XA XML @@ -445,7 +445,7 @@ END_P SET_VAR DELIMITER %type parameterized_trim %type opt_with_consistent_snapshot opt_config_scope opt_index_keyname opt_full %type opt_work begin_stmt commit_stmt rollback_stmt opt_ignore xa_begin_stmt xa_end_stmt xa_prepare_stmt xa_commit_stmt xa_rollback_stmt -%type alter_table_stmt alter_table_actions alter_table_action_list alter_table_action alter_column_option alter_index_option alter_constraint_option standalone_alter_action alter_partition_option opt_to alter_tablegroup_option opt_table opt_tablegroup_option_list alter_tg_partition_option +%type alter_table_stmt alter_table_actions alter_table_action_list alter_table_action alter_column_option alter_index_option alter_constraint_option standalone_alter_action alter_partition_option opt_to alter_tablegroup_option opt_table opt_tablegroup_option_list alter_tg_partition_option alter_column_group_option %type tablegroup_option_list tablegroup_option alter_tablegroup_actions alter_tablegroup_action tablegroup_option_list_space_seperated %type opt_tg_partition_option tg_hash_partition_option tg_key_partition_option tg_range_partition_option tg_subpartition_option tg_list_partition_option %type alter_column_behavior opt_set opt_position_column @@ -4941,7 +4941,17 @@ column_definition ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $5); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $7); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $2, col_list, index_option, $3, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $2, col_list, index_option, $3, $8, NULL); + $$->value_ = 0; +} +| key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option with_column_group +{ + (void)($1); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $5); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $7); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $2, col_list, index_option, $3, $8, $9); $$->value_ = 0; } | UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option @@ -4951,7 +4961,17 @@ column_definition ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $3, col_list, index_option, $4, $9); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $3, col_list, index_option, $4, $9, NULL); + $$->value_ = 1; +} +| UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option with_column_group +{ + (void)($2); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $3, col_list, index_option, $4, $9, $10); $$->value_ = 1; } | CONSTRAINT opt_constraint_name UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list @@ -4961,7 +4981,17 @@ column_definition ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $8); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $10); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $5 ? $5 : $2, col_list, index_option, $6, NULL); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $5 ? $5 : $2, col_list, index_option, $6, NULL, NULL); + $$->value_ = 1; +} +| CONSTRAINT opt_constraint_name UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list with_column_group +{ + (void)($4); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $8); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $10); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $5 ? $5 : $2, col_list, index_option, $6, NULL, $11); $$->value_ = 1; } | CONSTRAINT opt_constraint_name FOREIGN KEY opt_index_name '(' column_name_list ')' REFERENCES relation_factor '(' column_name_list ')' opt_match_option opt_reference_option_list @@ -4983,7 +5013,17 @@ column_definition ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $3, col_list, index_option, $4, NULL); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $3, col_list, index_option, $4, NULL, NULL); + $$->value_ = 2; +} +| SPATIAL opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list with_column_group +{ + (void)($2); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 6, $3, col_list, index_option, $4, NULL, $9); $$->value_ = 2; } | FOREIGN KEY opt_index_name '(' column_name_list ')' REFERENCES relation_factor '(' column_name_list ')' opt_match_option opt_reference_option_list @@ -6866,9 +6906,9 @@ column_group_list ',' column_group_element ; with_column_group: -WITH COLUMN GROUP FOR column_group_list +WITH_COLUMN_GROUP '(' column_group_list ')' { - merge_nodes($$, result, T_COLUMN_GROUP ,$5); + merge_nodes($$, result, T_COLUMN_GROUP ,$3); } ; @@ -8111,13 +8151,33 @@ opt_index_option_list opt_partition_option merge_nodes(idx_columns, result, T_INDEX_COLUMN_LIST, $10); merge_nodes(index_options, result, T_TABLE_OPTION_LIST, $12); $5->value_ = $2[0]; /* index prefix keyname */ - malloc_non_terminal_node($$, result->malloc_pool_, T_CREATE_INDEX, 8, + malloc_non_terminal_node($$, result->malloc_pool_, T_CREATE_INDEX, 9, $5, /* index name */ $8, /* table name */ idx_columns, /* index columns */ index_options, /* index option(s) */ $6, /* index method */ $13, /* partition method*/ + NULL, /* column group */ + $4, /* if not exists*/ + $1); /* index hint*/ +} +| create_with_opt_hint opt_index_keyname INDEX opt_if_not_exists normal_relation_factor opt_index_using_algorithm ON relation_factor '(' sort_column_list ')' +opt_index_option_list opt_partition_option with_column_group +{ + ParseNode *idx_columns = NULL; + ParseNode *index_options = NULL; + merge_nodes(idx_columns, result, T_INDEX_COLUMN_LIST, $10); + merge_nodes(index_options, result, T_TABLE_OPTION_LIST, $12); + $5->value_ = $2[0]; /* index prefix keyname */ + malloc_non_terminal_node($$, result->malloc_pool_, T_CREATE_INDEX, 9, + $5, /* index name */ + $8, /* table name */ + idx_columns, /* index columns */ + index_options, /* index option(s) */ + $6, /* index method */ + $13, /* partition method*/ + $14, /* column group */ $4, /* if not exists*/ $1); /* index hint*/ }; @@ -15005,6 +15065,14 @@ ALTER EXTERNAL TABLE relation_factor alter_table_actions malloc_non_terminal_node($$, result->malloc_pool_, T_ALTER_TABLE, 3, $4, table_actions, external_node); $$->value_ = 0; } +| +ALTER TABLE relation_factor alter_column_group_option +{ + ParseNode *table_actions = NULL; + merge_nodes(table_actions, result, T_ALTER_TABLE_ACTION_LIST, $4); + malloc_non_terminal_node($$, result->malloc_pool_, T_ALTER_TABLE, 3, $3, table_actions, NULL); + $$->value_ = 0; +} ; alter_table_actions: @@ -15340,10 +15408,19 @@ key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' o ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $5); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $7); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 5, $2, col_list, index_option, $3, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $2, col_list, index_option, $3, $8, NULL); $$->value_ = 0; } -; +| key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option with_column_group +{ + (void)($1); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $5); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $7); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $2, col_list, index_option, $3, $8, $9); + $$->value_ = 0; +}; add_unique_key_opt: add_unique_key @@ -15364,7 +15441,17 @@ UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 5, $3, col_list, index_option, $4, $9); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $3, col_list, index_option, $4, $9, NULL); + $$->value_ = 1; +} +| UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option with_column_group +{ + (void)($2); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $3, col_list, index_option, $4, $9, $10); $$->value_ = 1; } ; @@ -15388,7 +15475,18 @@ CONSTRAINT opt_constraint_name UNIQUE opt_key_or_index opt_index_name opt_index_ ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $8); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $10); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 5, $5 ? $5 : $2, col_list, index_option, $6, $11); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $5 ? $5 : $2, col_list, index_option, $6, $11, NULL); + $$->value_ = 1; +} +| +CONSTRAINT opt_constraint_name UNIQUE opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option with_column_group +{ + (void)($4); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $8); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $10); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $5 ? $5 : $2, col_list, index_option, $6, $11, $12); $$->value_ = 1; } ; @@ -15436,7 +15534,18 @@ SPATIAL opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_colum ParseNode *index_option = NULL; merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); - malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 5, $3, col_list, index_option, $4, NULL); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $3, col_list, index_option, $4, NULL, NULL); + $$->value_ = 2; +} +| SPATIAL opt_key_or_index opt_index_name opt_index_using_algorithm '(' sort_column_list ')' opt_index_option_list opt_partition_option with_column_group +{ + (void)($2); + (void)($9); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $6); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $8); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX_ADD, 6, $3, col_list, index_option, $4, NULL, $10); $$->value_ = 2; } ; @@ -15504,6 +15613,21 @@ VISIBLE } ; +alter_column_group_option: +ADD COLUMN GROUP '(' column_group_list ')' +{ + ParseNode *column_group_list = NULL; + merge_nodes(column_group_list, result, T_COLUMN_GROUP_ADD, $5); + malloc_non_terminal_node($$, result->malloc_pool_, T_ALTER_COLUMN_GROUP_OPTION, 1, column_group_list); +} +| DROP COLUMN GROUP '(' column_group_list ')' +{ + ParseNode *column_group_list = NULL; + merge_nodes(column_group_list, result, T_COLUMN_GROUP_DROP,$5); + malloc_non_terminal_node($$, result->malloc_pool_, T_ALTER_COLUMN_GROUP_OPTION, 1, column_group_list); +} +; + alter_column_option: ADD COLUMN column_definition { diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index 1606e30f7..388da5f49 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -698,6 +698,12 @@ int ObAlterTableResolver::resolve_action_list(const ParseNode &node) } break; } + case T_ALTER_COLUMN_GROUP_OPTION: { + if (OB_FAIL(resolve_alter_column_groups(*action_node))) { + SQL_RESV_LOG(WARN, "Resolve column group option failed!", K(ret)); + } + break; + } case T_ALTER_INDEX_OPTION_ORACLE: { alter_table_stmt->set_alter_table_index(); if (OB_FAIL(resolve_index_options_oracle(*action_node))) { @@ -926,7 +932,7 @@ int ObAlterTableResolver::resolve_action_list(const ParseNode &node) } } if (OB_FAIL(ret)) { - } else if (OB_FAIL(resolve_column_group())) { + } else if (OB_FAIL(resolve_column_group_for_column())) { LOG_WARN("failed to resolve column group", K(ret)); } else if (OB_FAIL(check_skip_index(alter_table_stmt->get_alter_table_arg().alter_table_schema_))) { LOG_WARN("failed to resolve skip index", K(ret)); @@ -1380,6 +1386,7 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) ParseNode *column_list_node = nullptr; ParseNode *table_option_node = nullptr; ParseNode *index_partition_option = nullptr; + ParseNode *colulmn_group_node = nullptr; bool is_index_part_specified = false; CHECK_COMPATIBILITY_MODE(session_info_); if (is_unique_key && lib::is_oracle_mode()) { @@ -1393,10 +1400,16 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) } } else { // mysql mode - index_name_node = node.children_[0]; - column_list_node = node.children_[1]; - table_option_node = node.children_[2]; - index_partition_option = node.children_[4]; + if (OB_UNLIKELY(ALTER_INDEX_CHILD_NUM != node.num_child_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid parse tree", K(ret), K(node.num_child_)); + } else { + index_name_node = node.children_[0]; + column_list_node = node.children_[1]; + table_option_node = node.children_[2]; + index_partition_option = node.children_[4]; + colulmn_group_node = node.children_[5]; + } } ObAlterTableStmt *alter_table_stmt = get_alter_table_stmt(); if (OB_ISNULL(alter_table_stmt)) { @@ -1554,6 +1567,21 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) is_index_part_specified = true; } } + + if (OB_SUCC(ret) && is_mysql_mode()) { + if (OB_ISNULL(colulmn_group_node)) { + // no cg, ignore + } else if (T_COLUMN_GROUP != colulmn_group_node->type_ || colulmn_group_node->num_child_ <= 0) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "invalid argument", KR(ret), K(colulmn_group_node->type_), K(colulmn_group_node->num_child_)); + } else if (OB_ISNULL(colulmn_group_node->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node is null", K(ret)); + } else if (OB_FAIL(resolve_index_column_group(colulmn_group_node, *create_index_arg))) { + SQL_RESV_LOG(WARN, "resolve index column group failed", K(ret)); + } + } + if (OB_SUCC(ret) && lib::is_mysql_mode()) { if (OB_FAIL(set_index_tablespace(*table_schema_, *create_index_arg))) { LOG_WARN("fail to set index tablespace", K(ret)); @@ -5933,7 +5961,7 @@ int ObAlterTableResolver::resolve_modify_all_trigger(const ParseNode &node) return ret; } -int ObAlterTableResolver::resolve_column_group() +int ObAlterTableResolver::resolve_column_group_for_column() { int ret = OB_SUCCESS; bool is_normal_column_store_table = false; @@ -5946,7 +5974,8 @@ int ObAlterTableResolver::resolve_column_group() } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { LOG_WARN("fail to get min data version", KR(ret), K(tenant_id)); } else if (compat_version < DATA_VERSION_4_3_0_0) { //skip resolve cg - } else if (table_schema_->is_normal_column_store_table()) { + } else if (table_schema_->get_column_group_count() > 0) { + // TODO, wait to support table update from 4.1 or less ObColumnGroupSchema column_group; char cg_name[OB_MAX_COLUMN_GROUP_NAME_LENGTH]; ObArray column_ids; @@ -5989,6 +6018,129 @@ int ObAlterTableResolver::resolve_column_group() return ret; } +int ObAlterTableResolver::resolve_alter_column_groups(const ParseNode &node) +{ + int ret = OB_SUCCESS; + ObAlterTableStmt *alter_table_stmt = get_alter_table_stmt(); + if (OB_ISNULL(alter_table_stmt) || OB_UNLIKELY(T_ALTER_COLUMN_GROUP_OPTION != node.type_ || + OB_ISNULL(node.children_))) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "get alter table stmt failed", K(ret), K(node.type_), KP(node.children_)); + } else { + const ParseNode *column_group_node = node.children_[0]; + uint64_t compat_version = 0; + + ObAlterTableArg &alter_table_arg = alter_table_stmt->get_alter_table_arg(); + share::schema::AlterTableSchema &alter_table_schema = alter_table_arg.alter_table_schema_; + const uint64_t column_cnt = table_schema_->get_column_count(); + const uint64_t tenant_id = table_schema_->get_tenant_id(); + + if (OB_ISNULL(column_group_node) || column_group_node->num_child_ <= 0) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "invalid parse tree, column group node is null or have no children!", + K(ret), KP(column_group_node)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + SQL_RESV_LOG(WARN, "fail to get min data version", K(ret)); + } else if (compat_version < DATA_VERSION_4_3_0_0) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "data_version not support for altering column group", K(ret), K(compat_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3, alter column group"); + } else if (!need_column_group(*table_schema_)) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "table don't support alter column group", K(ret)); + } else { + alter_table_schema.set_column_store(true); + if (column_group_node->type_ == T_COLUMN_GROUP_ADD) { + alter_table_stmt->get_alter_table_arg().alter_table_schema_.alter_type_ = OB_DDL_ADD_COLUMN_GROUP; + } else if (column_group_node->type_ == T_COLUMN_GROUP_DROP) { + alter_table_stmt->get_alter_table_arg().alter_table_schema_.alter_type_ = OB_DDL_DROP_COLUMN_GROUP; + } else { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "invalid parse tree ", K(ret), K(column_group_node->type_)); + } + if (OB_SUCC(ret)) { + ObColumnGroupSchema column_group_schema; + bool sql_exist_all_column_group = false; + bool sql_exist_single_column_group = false; + alter_table_schema.set_max_used_column_group_id(table_schema_->get_max_used_column_group_id()); + + /* check exist type and whether exist repeation of column group type*/ + for (int64_t i = 0; OB_SUCC(ret) && i < column_group_node->num_child_; ++i) { + if (OB_ISNULL(column_group_node->children_[i])) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "column group node children is null", K(ret), K(i)); + } else if (column_group_node->children_[i]->type_ == T_ALL_COLUMN_GROUP) { + if (sql_exist_all_column_group) { + ret = OB_ERR_COLUMN_GROUP_DUPLICATE; + SQL_RESV_LOG(WARN, "all column group already exist in sql", + K(ret), K(column_group_node->children_[i]->type_)); + const ObString error_msg = "all column group"; + LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, error_msg.length(), error_msg.ptr()); + } else { + sql_exist_all_column_group = true; + } + } else if (column_group_node->children_[i]-> type_ == T_SINGLE_COLUMN_GROUP) { + if (sql_exist_single_column_group) { + ret = OB_ERR_COLUMN_GROUP_DUPLICATE; + SQL_RESV_LOG(WARN, "single column group already exist in sql", + K(ret), K(column_group_node->children_[i]->type_)); + const ObString error_msg = "single column group"; + LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, error_msg.length(), error_msg.ptr()); + } else { + sql_exist_single_column_group = true; + } + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "Resovle unsupported column group type", + K(ret), K(column_group_node->children_[i]->type_)); + } + } + + /* all column group */ + /* column group in resolver do not use real column group id*/ + /* ddl service use column group name to distingush them*/ + if (OB_SUCC(ret) && sql_exist_all_column_group) { + column_group_schema.reset(); + if (OB_FAIL(ObSchemaUtils::build_all_column_group(*table_schema_, session_info_->get_effective_tenant_id(), + alter_table_schema.get_max_used_column_group_id() + 1, + column_group_schema))) { + SQL_RESV_LOG(WARN, "build all column group failed", K(ret)); + } else if (OB_FAIL(alter_table_schema.add_column_group(column_group_schema))) { + SQL_RESV_LOG(WARN, "fail to add column group schema", K(ret)); + } + } + + /* single column group*/ + if (OB_SUCC(ret) && sql_exist_single_column_group) { + column_group_schema.reset(); + ObTableSchema::const_column_iterator iter_begin = table_schema_->column_begin(); + ObTableSchema::const_column_iterator iter_end = table_schema_->column_end(); + for (;OB_SUCC(ret) && iter_begin != iter_end; ++iter_begin) { + ObColumnSchemaV2 *column = (*iter_begin); + uint64_t cg_id = alter_table_schema.get_max_used_column_group_id() + 1; + if (OB_ISNULL(column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column schema should not be null", K(ret)); + } else if (column->is_virtual_generated_column()) { + /* skip virtual column*/ + } else if (OB_FAIL(ObSchemaUtils::build_single_column_group( + *table_schema_, column, session_info_->get_effective_tenant_id(), + cg_id, column_group_schema))) { + LOG_WARN("fail to build single column group", K(ret)); + } else if (column_group_schema.is_valid()) { + if (OB_FAIL(alter_table_schema.add_column_group(column_group_schema))) { + LOG_WARN("fail to add single column group to table schema", K(ret), K(column_group_schema)); + } + } + + } + } + } + } + } + return ret; +} + bool ObAlterTableResolver::is_ttl_column(const ObString &orig_column_name, const ObIArray &ttl_columns) { bool bret = false; diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.h b/src/sql/resolver/ddl/ob_alter_table_resolver.h index cd9d0ffcd..d4f2d85d7 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.h +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.h @@ -40,6 +40,7 @@ class ObAlterTableResolver : public ObDDLResolver static const int64_t TABLE = 0; // 0. table_node static const int64_t ACTION_LIST = 1; // 1. alter table action list static const int64_t SPECIAL_TABLE_TYPE = 2; // 2. special table type + static const int64_t ALTER_INDEX_CHILD_NUM = 6; public: explicit ObAlterTableResolver(ObResolverParams ¶ms); virtual ~ObAlterTableResolver(); @@ -179,8 +180,9 @@ private: int check_alter_part_key_allowed(const share::schema::ObTableSchema &table_schema, const share::schema::ObColumnSchemaV2 &src_col_schema, const share::schema::ObColumnSchemaV2 &dst_col_schema); - int resolve_column_group(); + int resolve_column_group_for_column(); int generate_index_arg_cascade(); + int resolve_alter_column_groups(const ParseNode &node); bool is_ttl_column(const common::ObString &orig_column_name, const ObIArray &ttl_columns); int check_alter_column_schemas_valid(ObAlterTableStmt &stmt); diff --git a/src/sql/resolver/ddl/ob_create_index_resolver.cpp b/src/sql/resolver/ddl/ob_create_index_resolver.cpp index 86adde065..6b17b5fa2 100644 --- a/src/sql/resolver/ddl/ob_create_index_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_index_resolver.cpp @@ -472,7 +472,7 @@ int ObCreateIndexResolver::resolve(const ParseNode &parse_tree) LOG_WARN("session_info_ is null", K(ret)); } else { stmt_ = crt_idx_stmt; - if_not_exist_node = parse_tree.children_[6]; + if_not_exist_node = parse_tree.children_[7]; } // 将session中的信息添写到 stmt 的 arg 中 @@ -591,20 +591,34 @@ int ObCreateIndexResolver::resolve(const ParseNode &parse_tree) } } + // index column_group + if (OB_FAIL(ret)) { + } else if (NULL != parse_node.children_[6]) { + if (T_COLUMN_GROUP != parse_node.children_[6]->type_ || parse_node.children_[6]->num_child_ <= 0) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "invalid argument", K(ret), K(parse_node.children_[6]->type_), K(parse_node.children_[6]->num_child_)); + } else if (OB_ISNULL(parse_node.children_[6]->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node is null", K(ret)); + } else if (OB_FAIL(resolve_index_column_group(parse_node.children_[6], crt_idx_stmt->get_create_index_arg()))) { + SQL_RESV_LOG(WARN, "resolve index column group failed", K(ret)); + } + } + if (OB_SUCC(ret)) { crt_idx_stmt->set_if_not_exists(NULL != if_not_exist_node); // 设置block size, 如果未指定block size,则使用主表block size // 否则使用默认block_size if (!is_spec_block_size) { - ObCreateIndexArg &index_arg =crt_idx_stmt->get_create_index_arg(); + ObCreateIndexArg &index_arg = crt_idx_stmt->get_create_index_arg(); index_arg.index_option_.block_size_ = tbl_schema->get_block_size(); } } } if (OB_SUCC(ret)) { - const ParseNode *parallel_node = parse_tree.children_[7]; - if (OB_FAIL(resolve_hints(parse_tree.children_[7], *crt_idx_stmt, *tbl_schema))) { + const ParseNode *parallel_node = parse_tree.children_[8]; + if (OB_FAIL(resolve_hints(parse_tree.children_[8], *crt_idx_stmt, *tbl_schema))) { LOG_WARN("resolve hints failed", K(ret)); } } diff --git a/src/sql/resolver/ddl/ob_create_index_resolver.h b/src/sql/resolver/ddl/ob_create_index_resolver.h index 174aad20d..44a1e6b26 100644 --- a/src/sql/resolver/ddl/ob_create_index_resolver.h +++ b/src/sql/resolver/ddl/ob_create_index_resolver.h @@ -26,7 +26,7 @@ class ObCreateIndexStmt; class ObCreateIndexResolver : public ObDDLResolver { public: - static const int64_t CREATE_INDEX_CHILD_NUM = 8; + static const int64_t CREATE_INDEX_CHILD_NUM = 9; explicit ObCreateIndexResolver(ObResolverParams ¶ms); virtual ~ObCreateIndexResolver(); diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.cpp b/src/sql/resolver/ddl/ob_create_table_resolver.cpp index e140de09b..c1d28e3ba 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_table_resolver.cpp @@ -2632,6 +2632,24 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) is_index_part_specified = true; } } + + // index column_group + if (OB_SUCC(ret) && lib::is_mysql_mode()) { //only mysql support create table with index + if (node->num_child_ < 6) { + // no cg, ignore + } else if (ObItemType::T_INDEX == node->type_ && NULL != node->children_[5]) { + if (T_COLUMN_GROUP != node->children_[5]->type_ || node->children_[5]->num_child_ <= 0) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "invalid argument", KR(ret), K(node->type_), K(node->num_child_)); + } else if (OB_ISNULL(node->children_[5]->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("node is null", K(ret)); + } else if (OB_FAIL(resolve_index_column_group(node->children_[5], index_arg_))) { + SQL_RESV_LOG(WARN, "resolve index column group failed", K(ret)); + } + } + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(generate_index_arg())) { SQL_RESV_LOG(WARN, "generate index arg failed", K(ret)); @@ -2953,65 +2971,6 @@ uint64_t ObCreateTableResolver::gen_column_group_id() return ++cur_column_group_id_; } -// TEMP: if use sql 'create table xxx with column group for yyy', single_type must exist. -int ObCreateTableResolver::parse_cg_node(const ParseNode &cg_node, bool &exist_all_column_group) const -{ - int ret = OB_SUCCESS; - exist_all_column_group = false; - - if (OB_UNLIKELY(T_COLUMN_GROUP != cg_node.type_ || cg_node.num_child_ <= 0)) { - ret = OB_INVALID_ARGUMENT; - SQL_RESV_LOG(WARN, "invalid argument", KR(ret), K(cg_node.type_), K(cg_node.num_child_)); - } else { - const int64_t num_child = cg_node.num_child_; - bool exist_single_type = false; - bool already_exist_all_column_group = false; - bool already_exist_single_column_group = false; - // handle all_type column_group & single_type column_group - for (int64_t i = 0; OB_SUCC(ret) && (i < num_child); ++i) { - ParseNode *node = cg_node.children_[i]; - if (OB_ISNULL(node)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("children of column_group_list should not be null", KR(ret)); - } else if (T_ALL_COLUMN_GROUP == node->type_) { - if (already_exist_all_column_group) { - ret = OB_ERR_COLUMN_GROUP_DUPLICATE; - SQL_RESV_LOG(WARN, "all column group already exist in sql", - K(ret), K(node->children_[i]->type_)); - const ObString error_msg = "all columns"; - LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, error_msg.length(), error_msg.ptr()); - } else { - already_exist_all_column_group = true; - exist_all_column_group = true; - } - } else if (T_SINGLE_COLUMN_GROUP == node->type_) { - if (already_exist_single_column_group) { - ret = OB_ERR_COLUMN_GROUP_DUPLICATE; - SQL_RESV_LOG(WARN, "single column group already exist in sql", - K(ret), K(node->type_)); - const ObString error_msg = "each column"; - LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, error_msg.length(), error_msg.ptr()); - } else { - already_exist_single_column_group = true; - exist_single_type = true; - } - } else if (T_NORMAL_COLUMN_GROUP == node->type_) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("column store table with customized column group are not supported", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "column store tables with customized column group are"); - } - } - - if (OB_SUCC(ret) && exist_single_type == false) { - ret = OB_NOT_SUPPORTED; - SQL_RESV_LOG(WARN, "each column not exist", KR(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "column store tables without each column group are"); - } - } - - return ret; -} - int ObCreateTableResolver::resolve_column_group(const ParseNode *cg_node) { int ret = OB_SUCCESS; @@ -3040,7 +2999,7 @@ int ObCreateTableResolver::resolve_column_group(const ParseNode *cg_node) // handle all_type column_group & single_type column_group if (OB_NOT_NULL(cg_node)) { bool exist_all_column_group = false; - + bool exist_each_column_group = false; if (OB_FAIL(parse_cg_node(*cg_node, exist_all_column_group))) { LOG_WARN("fail to parse cg node", KR(ret)); } else { @@ -3049,31 +3008,20 @@ int ObCreateTableResolver::resolve_column_group(const ParseNode *cg_node) ObTableSchema::const_column_iterator tmp_begin = table_schema.column_begin(); ObTableSchema::const_column_iterator tmp_end = table_schema.column_end(); for (; OB_SUCC(ret) && (tmp_begin != tmp_end); tmp_begin++) { - tmp_cg.reset(); - tmp_column_ids.reset(); - char tmp_cg_name[OB_MAX_COLUMN_GROUP_NAME_LENGTH]; - MEMSET(tmp_cg_name, '\0', OB_MAX_COLUMN_GROUP_NAME_LENGTH); - ObColumnSchemaV2 *column = (*tmp_begin); - if (OB_ISNULL(column)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("column should not be null", KR(ret)); - } else if (column->is_virtual_generated_column()) { - // skip virtual column - } else if (OB_FAIL(tmp_column_ids.push_back(column->get_column_id()))) { - LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); - } else if (0 >= snprintf(tmp_cg_name, OB_MAX_COLUMN_GROUP_NAME_LENGTH, "%s_%s", - OB_COLUMN_GROUP_NAME_PREFIX, column->get_column_name_str().ptr())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to snprintf", KR(ret), KPC(column)); - } else if (OB_FAIL(build_column_group(table_schema, ObColumnGroupType::SINGLE_COLUMN_GROUP, - tmp_cg_name, tmp_column_ids, gen_column_group_id(), tmp_cg))) { - LOG_WARN("fail to build single type column_group", KR(ret)); - } else if (OB_FAIL(table_schema.add_column_group(tmp_cg))) { - LOG_WARN("fail to add single type column group", KR(ret), K(tmp_cg)); - } else if (column->is_rowkey_column() || exist_all_column_group) {//if not exist all cg, build rowkey cg - if (OB_FAIL(column_ids.push_back(column->get_column_id()))) { - LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); + if (column->is_virtual_generated_column()){ + /*skip virtual generated column group*/ + } else if (OB_FAIL(ObSchemaUtils::build_single_column_group( + table_schema, column, session_info_->get_effective_tenant_id(), + gen_column_group_id(), tmp_cg))) { + LOG_WARN("fail to build single column group"); + } else if (tmp_cg.is_valid()) { + if (OB_FAIL(table_schema.add_column_group(tmp_cg))) { + LOG_WARN("fail to add single type column group", KR(ret), K(tmp_cg)); + } else if (column->is_rowkey_column() || exist_all_column_group) {//if not exist all cg, build rowkey cg + if (OB_FAIL(column_ids.push_back(column->get_column_id()))) { + LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); + } } } } @@ -3112,32 +3060,19 @@ int ObCreateTableResolver::resolve_column_group(const ParseNode *cg_node) ObTableSchema::const_column_iterator tmp_begin = table_schema.column_begin(); ObTableSchema::const_column_iterator tmp_end = table_schema.column_end(); for (; OB_SUCC(ret) && (tmp_begin != tmp_end); tmp_begin++) { - tmp_cg.reset(); - tmp_column_ids.reset(); - char tmp_cg_name[OB_MAX_COLUMN_GROUP_NAME_LENGTH]; - MEMSET(tmp_cg_name, '\0', OB_MAX_COLUMN_GROUP_NAME_LENGTH); - ObColumnSchemaV2 *column = (*tmp_begin); - if (OB_ISNULL(column)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("column should not be null", KR(ret)); - } else if (column->is_virtual_generated_column()) { - // skip virtual column - LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); - } else if (OB_FAIL(tmp_column_ids.push_back(column->get_column_id()))) { - LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); - } else if (0 >= snprintf(tmp_cg_name, OB_MAX_COLUMN_GROUP_NAME_LENGTH, "%s_%s", - OB_COLUMN_GROUP_NAME_PREFIX, column->get_column_name_str().ptr())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to snprintf", KR(ret), KPC(column)); - } else if (OB_FAIL(build_column_group(table_schema, ObColumnGroupType::SINGLE_COLUMN_GROUP, - tmp_cg_name, tmp_column_ids, gen_column_group_id(), tmp_cg))) { - LOG_WARN("fail to build single type column_group", KR(ret)); - } else if (OB_FAIL(table_schema.add_column_group(tmp_cg))) { - LOG_WARN("fail to add single type column group", KR(ret), K(tmp_cg)); - } else if (column->is_rowkey_column() || exist_all_type) {//if not exist all cg, build rowkey cg - if (OB_FAIL(column_ids.push_back(column->get_column_id()))) { - LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); + if (column->is_virtual_generated_column()){ + /*skip virtual generated column group*/ + } else if(OB_FAIL(ObSchemaUtils::build_single_column_group(table_schema, column, session_info_->get_effective_tenant_id(), + gen_column_group_id(), tmp_cg))) { + LOG_WARN("fail to build single column group", K(ret)); + } else if (tmp_cg.is_valid()) { + if (OB_FAIL(table_schema.add_column_group(tmp_cg))) { + LOG_WARN("fail to add single type column group", KR(ret), K(tmp_cg)); + } else if (column->is_rowkey_column() || exist_all_type) {//if not exist all cg, build rowkey cg + if (OB_FAIL(column_ids.push_back(column->get_column_id()))) { + LOG_WARN("fail to push back", KR(ret), "column_id", column->get_column_id()); + } } } } diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.h b/src/sql/resolver/ddl/ob_create_table_resolver.h index cbaa25418..b97c4b31c 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.h +++ b/src/sql/resolver/ddl/ob_create_table_resolver.h @@ -131,12 +131,10 @@ private: typedef common::hash::ObPlacementHashSet VPColumnIdHashSet; // check this type of table_schema should build column_group or not - bool need_column_group(const share::schema::ObTableSchema &table_schema); uint64_t gen_column_group_id(); int resolve_column_group(const ParseNode *cg_node); - int parse_cg_node(const ParseNode &cg_node, bool &exist_all_column_group) const; - int check_column_store_config(); + bool need_column_group(const ObTableSchema &table_schema); int add_inner_index_for_heap_gtt(); private: diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index bef2a3d58..c0bf7024e 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -11176,7 +11176,7 @@ int ObDDLResolver::resolve_hints(const ParseNode *node, ObDDLStmt &stmt, const O int ret = OB_SUCCESS; uint64_t hint_parallel = 1; uint64_t parallelism = 1; - if (OB_UNLIKELY(nullptr == node)) { + if (OB_UNLIKELY(nullptr == node) || OB_UNLIKELY(OB_ISNULL(session_info_))) { } else { for (int32_t i = 0; OB_SUCC(ret) && i < node->num_child_; i++) { ParseNode *hint_node = node->children_[i]; @@ -11194,6 +11194,37 @@ int ObDDLResolver::resolve_hints(const ParseNode *node, ObDDLStmt &stmt, const O } else { hint_parallel = parallel_node->value_; } + } else if (T_OPT_PARAM_HINT == hint_node->type_) { + ParseNode *child0 = nullptr; + ParseNode *child1 = nullptr; + if (2 != hint_node->num_child_) { + ret = OB_ERR_UNEXPECTED; + // do not raise error, but print a message + LOG_WARN("the opt param hint is illegal", K(ret)); + } else if (OB_ISNULL(child0 = hint_node->children_[0]) || OB_ISNULL(child1 = hint_node->children_[1])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null pointer"); + } else if (child0->type_ == T_VARCHAR){ + ObString param_name(child0->str_len_, child0->str_value_); + ObOptParamHint::OptParamType param_type = ObOptParamHint::get_opt_param_value(param_name); + uint64_t data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), data_version))) { + SQL_RESV_LOG(WARN, "fail to get min data version", K(ret)); + } else if (param_type == ObOptParamHint::OptParamType::COMPACT_SORT_LEVEL + && data_version >= DATA_VERSION_4_3_0_0) { + if (T_INT != child1->type_) { + // do not raise error, but print a message + LOG_WARN("the second param is not int"); + } else { + ObObj val; + val.set_int(child1->value_); + if (ObOptParamHint::is_param_val_valid(param_type, val)) { + stmt.set_compact_level(child1->value_); + } + } + } else { + } + } } } } @@ -11276,6 +11307,65 @@ int ObDDLResolver::deep_copy_column_expr_name(common::ObIAllocator &allocator, return ret; } +// TEMP: if use sql 'create table xxx with column group for yyy', single_type must exist. +int ObDDLResolver::parse_cg_node(const ParseNode &cg_node, bool &exist_all_column_group) const +{ + int ret = OB_SUCCESS; + exist_all_column_group = false; + + if (OB_UNLIKELY(T_COLUMN_GROUP != cg_node.type_ || cg_node.num_child_ <= 0)) { + ret = OB_INVALID_ARGUMENT; + SQL_RESV_LOG(WARN, "invalid argument", KR(ret), K(cg_node.type_), K(cg_node.num_child_)); + } else { + const int64_t num_child = cg_node.num_child_; + bool exist_single_type = false; + bool already_exist_all_column_group = false; + bool already_exist_single_column_group = false; + // handle all_type column_group & single_type column_group + for (int64_t i = 0; OB_SUCC(ret) && (i < num_child); ++i) { + ParseNode *node = cg_node.children_[i]; + if (OB_ISNULL(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("children of column_group_list should not be null", KR(ret)); + } else if (T_ALL_COLUMN_GROUP == node->type_) { + if (already_exist_all_column_group) { + ret = OB_ERR_COLUMN_GROUP_DUPLICATE; + SQL_RESV_LOG(WARN, "all column group already exist in sql", + K(ret), K(node->children_[i]->type_)); + const ObString error_msg = "all columns"; + LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, error_msg.length(), error_msg.ptr()); + } else { + already_exist_all_column_group = true; + exist_all_column_group = true; + } + } else if (T_SINGLE_COLUMN_GROUP == node->type_) { + if (already_exist_single_column_group) { + ret = OB_ERR_COLUMN_GROUP_DUPLICATE; + SQL_RESV_LOG(WARN, "single column group already exist in sql", + K(ret), K(node->type_)); + const ObString error_msg = "each column"; + LOG_USER_ERROR(OB_ERR_COLUMN_GROUP_DUPLICATE, error_msg.length(), error_msg.ptr()); + } else { + already_exist_single_column_group = true; + exist_single_type = true; + } + } else if (T_NORMAL_COLUMN_GROUP == node->type_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("column store table with customized column group are not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "column store tables with customized column group are"); + } + } + + if (OB_SUCC(ret) && exist_single_type == false) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "each column not exist", KR(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "column store tables without each column group are"); + } + } + + return ret; +} + int ObDDLResolver::check_ttl_definition(const ParseNode *node) { int ret = OB_SUCCESS; @@ -11325,6 +11415,33 @@ int ObDDLResolver::check_ttl_definition(const ParseNode *node) return ret; } +int ObDDLResolver::resolve_index_column_group(const ParseNode *cg_node, obrpc::ObCreateIndexArg &create_index_arg) +{ + int ret = OB_SUCCESS; + uint64_t compat_version = 0; + if (OB_ISNULL(cg_node) || cg_node->num_child_ <= 0) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(WARN, "invalid parse tree, column group node is null or have no children!", + K(ret), KP(cg_node)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), compat_version))) { + SQL_RESV_LOG(WARN, "fail to get min data version", K(ret)); + } else if (compat_version < DATA_VERSION_4_3_0_0) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "data_version not support for index column_group", K(ret), K(compat_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3, create index with column group"); + } else { + bool exist_all_column_group = false; + if (OB_FAIL(parse_cg_node(*cg_node, exist_all_column_group))) { + LOG_WARN("fail to parse cg node", KR(ret)); + } else if (OB_FAIL(create_index_arg.index_cgs_.push_back(obrpc::ObCreateIndexArg::ObIndexColumnGroupItem(true/*each cg*/)))) { + LOG_WARN("fail to push each cg", K(ret)); + } else { + create_index_arg.exist_all_column_group_ = exist_all_column_group; + } + } + return ret; +} + int ObDDLResolver::build_column_group( const ObTableSchema &table_schema, const ObColumnGroupType &cg_type, @@ -11337,49 +11454,20 @@ int ObDDLResolver::build_column_group( if (cg_name.empty() || (cg_type >= ObColumnGroupType::MAX_COLUMN_GROUP)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(cg_name), K(cg_type), "column_id_cnt", column_ids.count()); - } else { - column_group.set_column_group_id(cg_id); - column_group.set_column_group_type(cg_type); - column_group.set_block_size(table_schema.get_block_size()); - column_group.set_compressor_type(table_schema.get_compressor_type()); - const ObStoreFormatType store_format = table_schema.get_store_format(); - int64_t storage_encoding_mode = 0; - omt::ObTenantConfigGuard tcg(TENANT_CONF(session_info_->get_effective_tenant_id())); - if (OB_LIKELY(tcg.is_valid())) { - storage_encoding_mode = tcg->storage_encoding_mode; - } - bool is_flat = lib::is_oracle_mode() ? ((OB_STORE_FORMAT_NOCOMPRESS_ORACLE == store_format) - || (OB_STORE_FORMAT_BASIC_ORACLE == store_format) - || (OB_STORE_FORMAT_OLTP_ORACLE == store_format)) - : ((OB_STORE_FORMAT_REDUNDANT_MYSQL == store_format) - || (OB_STORE_FORMAT_COMPACT_MYSQL == store_format)); - if (is_flat || (1 == storage_encoding_mode)) { - // all use encoding - column_group.set_row_store_type(table_schema.get_row_store_type()); - } else if (2 == storage_encoding_mode) { - // all use cs_encoding - column_group.set_row_store_type(ObRowStoreType::CS_ENCODING_ROW_STORE); - } else { - // row_store uses encoding; column_store uses cs_encoding - if ((cg_type == ObColumnGroupType::DEFAULT_COLUMN_GROUP) || (cg_type == ObColumnGroupType::ALL_COLUMN_GROUP)) { - column_group.set_row_store_type(table_schema.get_row_store_type()); - } else { - column_group.set_row_store_type(ObRowStoreType::CS_ENCODING_ROW_STORE); - } - } - - if (OB_FAIL(column_group.set_column_group_name(cg_name))) { - LOG_WARN("fail to set column group name", KR(ret), K(cg_name)); - } - for (int64_t i = 0; OB_SUCC(ret) && (i < column_ids.count()); ++i) { - if (OB_FAIL(column_group.add_column_id(column_ids.at(i)))) { - LOG_WARN("fail to add column_id into column_group", KR(ret), K(i), "column_id", column_ids.at(i)); - } - } + } else if (OB_FAIL(ObSchemaUtils::build_column_group(table_schema, session_info_->get_effective_tenant_id(), cg_type, cg_name, + column_ids, cg_id, column_group))) { + LOG_WARN("fail to build column group", K(ret)); } return ret; } + + +bool ObDDLResolver::need_column_group(const ObTableSchema &table_schema) +{ + return table_schema.is_user_table() || table_schema.is_tmp_table() || table_schema.is_index_table(); +} + int ObDDLResolver::resolve_column_skip_index( const ParseNode &skip_index_node, ObColumnSchemaV2 &column_schema) diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index 2c02e94f3..3642eea8e 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -467,6 +467,9 @@ protected: const common::ObIArray &column_ids, const uint64_t cg_id, share::schema::ObColumnGroupSchema &column_group); + int parse_cg_node(const ParseNode &cg_node, bool &exist_all_column_group) const; + int resolve_index_column_group(const ParseNode *node, obrpc::ObCreateIndexArg &create_index_arg); + bool need_column_group(const ObTableSchema &table_schema); int resolve_hints(const ParseNode *parse_node, ObDDLStmt &stmt, const ObTableSchema &table_schema); int calc_ddl_parallelism(const uint64_t hint_parallelism, const uint64_t table_dop, uint64_t ¶llelism); int deep_copy_str(const common::ObString &src, common::ObString &dest); diff --git a/src/sql/resolver/ddl/ob_ddl_stmt.h b/src/sql/resolver/ddl/ob_ddl_stmt.h index 96aa12c71..09c111ae6 100644 --- a/src/sql/resolver/ddl/ob_ddl_stmt.h +++ b/src/sql/resolver/ddl/ob_ddl_stmt.h @@ -15,6 +15,7 @@ #include "share/ob_rpc_struct.h" #include "sql/resolver/ob_stmt.h" #include "sql/resolver/ob_cmd.h" +#include "share/ob_rpc_struct.h" namespace oceanbase { namespace sql @@ -24,7 +25,7 @@ class ObDDLStmt : public ObStmt, public ObICmd const static int OB_DEFAULT_ARRAY_SIZE = 16; public: ObDDLStmt(common::ObIAllocator *name_pool, stmt::StmtType type) - : ObStmt(name_pool, type), parallelism_(1L) + : ObStmt(name_pool, type), parallelism_(1L), compact_level_(share::SORT_COMPACT_LEVEL) { } explicit ObDDLStmt(stmt::StmtType type): ObStmt(type) @@ -45,10 +46,16 @@ public: virtual int get_first_stmt(common::ObString &first_stmt); void set_parallelism(const int64_t parallelism) { parallelism_ = parallelism; } int64_t &get_parallelism() { return parallelism_; } + void set_compact_level(int64_t compact_level) { + compact_level_ = static_cast(compact_level); + }; + share::SortCompactLevel get_compact_level() { return compact_level_; } protected: ObArenaAllocator allocator_; private: int64_t parallelism_; + share::SortCompactLevel compact_level_; + DISALLOW_COPY_AND_ASSIGN(ObDDLStmt); }; } // namespace sql diff --git a/src/sql/resolver/dml/ob_hint.cpp b/src/sql/resolver/dml/ob_hint.cpp index fbe173fec..d1bca8a8d 100644 --- a/src/sql/resolver/dml/ob_hint.cpp +++ b/src/sql/resolver/dml/ob_hint.cpp @@ -737,6 +737,10 @@ bool ObOptParamHint::is_param_val_valid(const OptParamType param_type, const ObO || 0 == val.get_varchar().case_compare("false")); break; } + case COMPACT_SORT_LEVEL: { + is_valid = val.is_int() && (val.get_int() >= 0 && val.get_int() <=5); + break; + } default: LOG_TRACE("invalid opt param val", K(param_type), K(val)); break; diff --git a/src/sql/resolver/dml/ob_hint.h b/src/sql/resolver/dml/ob_hint.h index 55f541377..902491064 100644 --- a/src/sql/resolver/dml/ob_hint.h +++ b/src/sql/resolver/dml/ob_hint.h @@ -92,6 +92,7 @@ struct ObOptParamHint DEF(USE_DEFAULT_OPT_STAT,) \ DEF(ENABLE_IN_RANGE_OPTIMIZATION,) \ DEF(XSOLAPI_GENERATE_WITH_CLAUSE,) \ + DEF(COMPACT_SORT_LEVEL,) \ DECLARE_ENUM(OptParamType, opt_param, OPT_PARAM_TYPE_DEF, static); diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 155e5d895..bb8b37ab4 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -123,7 +123,9 @@ ob_set_subtarget(ob_storage blocksstable_index_block blocksstable/index_block/ob_index_block_aggregator.cpp blocksstable/index_block/ob_index_block_builder.cpp blocksstable/index_block/ob_index_block_macro_iterator.cpp + blocksstable/index_block/ob_ddl_sstable_scan_merge.cpp blocksstable/index_block/ob_index_block_row_scanner.cpp + blocksstable/index_block/ob_ddl_index_block_row_iterator.cpp blocksstable/index_block/ob_index_block_row_struct.cpp blocksstable/index_block/ob_index_block_tree_cursor.cpp blocksstable/index_block/ob_index_block_util.cpp @@ -510,7 +512,8 @@ ob_set_subtarget(ob_storage ddl ddl/ob_ddl_redo_log_replayer.cpp ddl/ob_ddl_redo_log_writer.cpp ddl/ob_ddl_struct.cpp - ddl/ob_direct_insert_sstable_ctx.cpp + ddl/ob_direct_load_struct.cpp + ddl/ob_direct_insert_sstable_ctx_new.cpp ddl/ob_tablet_barrier_log.cpp ddl/ob_tablet_ddl_kv.cpp ddl/ob_tablet_ddl_kv_mgr.cpp @@ -700,9 +703,10 @@ ob_set_subtarget(ob_storage direct_load direct_load/ob_direct_load_external_table_compactor.cpp direct_load/ob_direct_load_fast_heap_table.cpp direct_load/ob_direct_load_fast_heap_table_builder.cpp - direct_load/ob_direct_load_fast_heap_table_ctx.cpp direct_load/ob_direct_load_insert_table_ctx.cpp + direct_load/ob_direct_load_insert_table_row_iterator.cpp direct_load/ob_direct_load_merge_ctx.cpp + direct_load/ob_direct_load_lob_builder.cpp direct_load/ob_direct_load_merge_task_iterator.cpp direct_load/ob_direct_load_multiple_datum_range.cpp direct_load/ob_direct_load_multiple_datum_row.cpp @@ -731,6 +735,7 @@ ob_set_subtarget(ob_storage direct_load direct_load/ob_direct_load_multiple_sstable_scanner.cpp direct_load/ob_direct_load_origin_table.cpp direct_load/ob_direct_load_partition_merge_task.cpp + direct_load/ob_direct_load_partition_rescan_task.cpp direct_load/ob_direct_load_range_splitter.cpp direct_load/ob_direct_load_rowkey_iterator.cpp direct_load/ob_direct_load_sstable_builder.cpp diff --git a/src/storage/access/ob_index_sstable_estimator.cpp b/src/storage/access/ob_index_sstable_estimator.cpp index 2ef2ffa22..8ee102aa9 100644 --- a/src/storage/access/ob_index_sstable_estimator.cpp +++ b/src/storage/access/ob_index_sstable_estimator.cpp @@ -92,6 +92,13 @@ int ObIndexBlockScanEstimator::estimate_row_count(ObPartitionEst &part_est) STORAGE_LOG(WARN, "Failed to init index block row scanner", K(ret), K(agg_projector), K(agg_column_schema)); } else if (OB_FAIL(context_.sstable_.get_index_tree_root(root_index_block_))) { STORAGE_LOG(WARN, "Failed to get index tree root", K(ret)); + } else if (context_.sstable_.is_ddl_merge_sstable()) { + index_block_row_scanner_.set_iter_param(&context_.sstable_, + context_.tablet_handle_.get_obj()->get_ls_id(), + context_.tablet_handle_.get_obj()->get_tablet_id(), + context_.tablet_handle_.get_obj()); + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(cal_total_row_count(result))) { STORAGE_LOG(WARN, "Failed to get total_row_count_delta", K(ret), K(root_index_block_)); } else if (result.total_row_count_ > 0) { diff --git a/src/storage/access/ob_index_sstable_estimator.h b/src/storage/access/ob_index_sstable_estimator.h index ec323ea55..0ac8bc85d 100644 --- a/src/storage/access/ob_index_sstable_estimator.h +++ b/src/storage/access/ob_index_sstable_estimator.h @@ -34,6 +34,8 @@ struct ObPartitionEst int add(const ObPartitionEst &pe); int deep_copy(const ObPartitionEst &src); void reset() { logical_row_count_ = physical_row_count_ = 0; } + bool operator ==(const ObPartitionEst &other) const { return logical_row_count_ == other.logical_row_count_ && + physical_row_count_ == other.physical_row_count_; } }; struct ObIndexSSTableEstimateContext diff --git a/src/storage/access/ob_index_tree_prefetcher.cpp b/src/storage/access/ob_index_tree_prefetcher.cpp index c13dd4dd4..5e57c6de2 100644 --- a/src/storage/access/ob_index_tree_prefetcher.cpp +++ b/src/storage/access/ob_index_tree_prefetcher.cpp @@ -115,7 +115,7 @@ int ObIndexTreePrefetcher::init_basic_info( iter_param_ = &iter_param; datum_utils_ = &index_read_info->get_datum_utils(); data_version_ = sstable_->get_data_version(); - index_tree_height_ = sstable_meta_handle_.get_sstable_meta().get_index_tree_height(); + index_tree_height_ = sstable_meta_handle_.get_sstable_meta().get_index_tree_height(sstable.is_ddl_merge_empty_sstable()); } return ret; } @@ -130,11 +130,11 @@ int ObIndexTreePrefetcher::single_prefetch(ObSSTableReadHandle &read_handle) read_handle.index_block_info_.is_root_ = true; read_handle.index_block_info_.cs_row_range_.start_row_id_ = 0; read_handle.index_block_info_.cs_row_range_.end_row_id_ = - sstable_meta_handle_.get_sstable_meta().get_row_count() - 1; + sstable_meta_handle_.get_sstable_meta().get_end_row_id(sstable_->is_ddl_merge_empty_sstable()); if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObIndexTreePrefetcher not init", K(ret)); - } else if (sstable_->is_empty()) { + } else if (sstable_->no_data_to_read()) { //empty sstable read_handle.row_state_ = ObSSTableRowState::NOT_EXIST; } else if (ObStoreRowIterator::IteratorSingleGet == iter_type_ && @@ -205,7 +205,7 @@ int ObIndexTreePrefetcher::lookup_in_index_tree(ObSSTableReadHandle &read_handle while (OB_SUCC(ret) && !found && cur_level_ < index_tree_height_) { if (0 == cur_level_) { if (OB_FAIL(sstable_->get_index_tree_root(index_block_))) { - LOG_WARN("Fail to get index block root", K(ret)); + LOG_WARN("Fail to get index block root", K(ret), KPC(sstable_)); } } else { ObMicroBlockDataHandle &curr_handle = get_read_handle(cur_level_); @@ -222,7 +222,7 @@ int ObIndexTreePrefetcher::lookup_in_index_tree(ObSSTableReadHandle &read_handle *read_handle.rowkey_, read_handle.range_idx_, &index_block_info))) { - LOG_WARN("Fail to open index block scanner", K(ret)); + LOG_WARN("Fail to open index block scanner", K(ret), K(cur_level_), K(index_block_), K(index_tree_height_), KPC(sstable_)); } else if (OB_FAIL(index_scanner_.get_next(index_block_info))) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Fail to get index block row", K(ret), K_(index_scanner)); @@ -266,6 +266,8 @@ int ObIndexTreePrefetcher::init_index_scanner(ObIndexBlockRowScanner &index_scan sstable_->get_macro_offset(), sstable_->is_normal_cg_sstable()))) { LOG_WARN("init index scanner fail", K(ret), KPC(sstable_)); + } else { + index_scanner.set_iter_param(sstable_, access_ctx_->ls_id_, access_ctx_->tablet_id_); } return ret; } @@ -472,7 +474,7 @@ int ObIndexTreeMultiPrefetcher::multi_prefetch() read_handle.index_block_info_.is_root_ = true; read_handle.index_block_info_.cs_row_range_.start_row_id_ = 0; read_handle.index_block_info_.cs_row_range_.end_row_id_ = - sstable_meta_handle_.get_sstable_meta().get_row_count() - 1; + sstable_meta_handle_.get_sstable_meta().get_end_row_id(sstable_->is_ddl_merge_empty_sstable()); prefetch_rowkey_idx_++; if (OB_FAIL(ObStoreRowIterator::IteratorMultiGet == iter_type_ && @@ -480,7 +482,9 @@ int ObIndexTreeMultiPrefetcher::multi_prefetch() LOG_WARN("Failed to lookup_in_cache", K(ret)); } else if (ObSSTableRowState::IN_BLOCK == read_handle.row_state_) { if (OB_FAIL(sstable_->get_index_tree_root(index_block_))) { - LOG_WARN("Fail to get index block root", K(ret)); + LOG_WARN("Fail to get index block root", K(ret), KPC(sstable_)); + } + if (OB_FAIL(ret)) { } else if (!index_scanner_.is_valid() && OB_FAIL(init_index_scanner(index_scanner_))) { LOG_WARN("Fail to init index scanner", K(ret)); } else if (OB_FAIL(drill_down(ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, read_handle, false, is_rowkey_to_fetched))) { @@ -725,7 +729,7 @@ int ObIndexTreeMultiPassPrefetcher::i if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("ObIndexTreeMultiPassPrefetcher has been inited", K(ret)); - } else if (sstable.is_empty()) { + } else if (sstable.no_data_to_read()) { is_prefetch_end_ = true; is_inited_ = true; } else { @@ -759,7 +763,7 @@ int ObIndexTreeMultiPassPrefetcher::s if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not inited", K(ret)); - } else if (sstable.is_empty()) { + } else if (sstable.no_data_to_read()) { is_prefetch_end_ = true; } else if (OB_FAIL(init_basic_info(iter_type, sstable, iter_param, access_ctx, query_range))) { LOG_WARN("Fail to init basic info", K(ret), K(access_ctx)); @@ -860,7 +864,7 @@ int ObIndexTreeMultiPassPrefetcher::i } else if (FALSE_IT(datum_utils_ = &index_read_info->get_datum_utils())) { } else if (OB_FAIL(sstable.get_meta(sstable_meta_handle_))) { LOG_WARN("failed to get sstable meta handle", K(ret)); - } else if (FALSE_IT(index_tree_height_ = sstable_meta_handle_.get_sstable_meta().get_index_tree_height())) { + } else if (FALSE_IT(index_tree_height_ = sstable_meta_handle_.get_sstable_meta().get_index_tree_height(sstable.is_ddl_merge_empty_sstable()))) { } else if (1 >= index_tree_height_ || MAX_INDEX_TREE_HEIGHT < index_tree_height_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected index tree height", K(ret), K(index_tree_height_), K(MAX_INDEX_TREE_HEIGHT)); @@ -961,7 +965,9 @@ int ObIndexTreeMultiPassPrefetcher::t LOG_WARN("Failed to lookup_in_cache", K(ret)); } else if (ObSSTableRowState::IN_BLOCK == read_handle.row_state_) { if (OB_FAIL(sstable_->get_index_tree_root(index_block_))) { - LOG_WARN("Fail to get index block root", K(ret)); + LOG_WARN("Fail to get index block root", K(ret), KPC(sstable_)); + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(tree_handle.index_scanner_.open( ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, index_block_, @@ -974,7 +980,9 @@ int ObIndexTreeMultiPassPrefetcher::t // scan read_handle.row_state_ = ObSSTableRowState::IN_BLOCK; if (OB_FAIL(sstable_->get_index_tree_root(index_block_))) { - LOG_WARN("Fail to get index tree root", K(ret)); + LOG_WARN("Fail to get index tree root", K(ret), KPC(sstable_)); + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(tree_handle.index_scanner_.open( ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, index_block_, @@ -991,7 +999,9 @@ int ObIndexTreeMultiPassPrefetcher::t } else { read_handle.row_state_ = ObSSTableRowState::IN_BLOCK; if (OB_FAIL(sstable_->get_index_tree_root(index_block_))) { - LOG_WARN("Fail to get index tree root", K(ret)); + LOG_WARN("Fail to get index tree root", K(ret), KPC(sstable_)); + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(tree_handle.index_scanner_.open(ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, index_block_, read_handle.rows_info_, diff --git a/src/storage/access/ob_multiple_merge.cpp b/src/storage/access/ob_multiple_merge.cpp index 657332173..5f0a7c6b4 100644 --- a/src/storage/access/ob_multiple_merge.cpp +++ b/src/storage/access/ob_multiple_merge.cpp @@ -1152,7 +1152,7 @@ int ObMultipleMerge::prepare_tables_from_iterator(ObTableStoreIterator &table_it } } if (OB_SUCC(ret) && need_table) { - if (table_ptr->is_empty()) { + if (table_ptr->no_data_to_read()) { LOG_DEBUG("cur table is empty", K(ret), KPC(table_ptr)); continue; } else if (table_ptr->is_memtable()) { diff --git a/src/storage/access/ob_store_row_iterator.cpp b/src/storage/access/ob_store_row_iterator.cpp index 4d63a6a7f..52401dbf4 100644 --- a/src/storage/access/ob_store_row_iterator.cpp +++ b/src/storage/access/ob_store_row_iterator.cpp @@ -48,7 +48,7 @@ int ObStoreRowIterator::init( block_row_store_ = access_ctx.block_row_store_; } if (OB_FAIL(inner_open(iter_param, access_ctx, table, query_range))) { - STORAGE_LOG(WARN, "Failed to inner open ObStoreRowIterator", K(ret)); + STORAGE_LOG(WARN, "Failed to inner open ObStoreRowIterator", K(ret), K(iter_param), K(access_ctx)); } return ret; } diff --git a/src/storage/backup/ob_backup_utils.cpp b/src/storage/backup/ob_backup_utils.cpp index 1facd05ee..ebc31486a 100644 --- a/src/storage/backup/ob_backup_utils.cpp +++ b/src/storage/backup/ob_backup_utils.cpp @@ -112,7 +112,7 @@ int ObBackupUtils::get_sstables_by_data_type(const storage::ObTabletHandle &tabl ObArray ddl_sstable_array; if (OB_FAIL(minor_sstable_array_ptr->get_all_table_wrappers(minor_sstable_array))) { LOG_WARN("failed to get all tables", K(ret), KPC(minor_sstable_array_ptr)); - } else if (OB_FAIL(ddl_sstable_array_ptr->get_all_table_wrappers(ddl_sstable_array))) { + } else if (OB_FAIL(ddl_sstable_array_ptr->get_all_table_wrappers(ddl_sstable_array, true/*unpack_table*/))) { LOG_WARN("failed to get all tables", K(ret), KPC(ddl_sstable_array_ptr)); } else if (OB_FAIL(check_tablet_minor_sstable_validity_(tablet_handle, minor_sstable_array))) { LOG_WARN("failed to check tablet minor sstable validity", K(ret), K(tablet_handle), K(minor_sstable_array)); @@ -324,8 +324,8 @@ int ObBackupUtils::check_tablet_ddl_sstable_validity_(const storage::ObTabletHan int ret = OB_SUCCESS; ObTablet *tablet = NULL; ObITable *last_table_ptr = NULL; - SCN ddl_start_scn = SCN::min_scn(); - SCN ddl_checkpoint_scn = SCN::min_scn(); + SCN compact_start_scn = SCN::min_scn(); + SCN compact_end_scn = SCN::min_scn(); ObTableStoreIterator ddl_table_iter; bool is_data_complete = false; if (ddl_sstable_array.empty()) { @@ -333,8 +333,6 @@ int ObBackupUtils::check_tablet_ddl_sstable_validity_(const storage::ObTabletHan } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle)); - } else if (FALSE_IT(ddl_start_scn = tablet->get_tablet_meta().ddl_start_scn_)) { - } else if (FALSE_IT(ddl_checkpoint_scn = tablet->get_tablet_meta().ddl_checkpoint_scn_)) { } else if (OB_ISNULL(last_table_ptr = ddl_sstable_array.at(ddl_sstable_array.count() - 1).get_sstable())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get invalid table ptr", K(ret), K(ddl_sstable_array)); @@ -343,14 +341,14 @@ int ObBackupUtils::check_tablet_ddl_sstable_validity_(const storage::ObTabletHan LOG_WARN("table ptr not correct", K(ret), KPC(last_table_ptr)); } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_sstables(ddl_table_iter))) { LOG_WARN("failed to get ddl sstables", K(ret), K(tablet_handle)); - } else if (OB_FAIL(ObTabletDDLUtil::check_data_integrity(ddl_table_iter, ddl_start_scn, ddl_checkpoint_scn, is_data_complete))) { - LOG_WARN("failed to check data integrity", K(ret), K(ddl_start_scn), K(ddl_checkpoint_scn)); + } else if (OB_FAIL(ObTabletDDLUtil::check_data_continue(ddl_table_iter, is_data_complete, compact_start_scn, compact_end_scn))) { + LOG_WARN("failed to check data integrity", K(ret), K(ddl_table_iter)); } else if (!is_data_complete) { ret = OB_INVALID_TABLE_STORE; LOG_WARN("get invalid ddl table store", K(ret), K(tablet_handle), K(ddl_sstable_array), K(ddl_table_iter)); } else { - LOG_INFO("check data intergirty", K(tablet_handle), K(ddl_start_scn), - K(ddl_checkpoint_scn), K(ddl_table_iter), K(is_data_complete)); + LOG_INFO("check data intergirty", K(tablet_handle), K(compact_start_scn), + K(compact_end_scn), K(ddl_table_iter), K(is_data_complete)); } return ret; } diff --git a/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h b/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h index 7569adc25..ebfde1f0d 100644 --- a/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h +++ b/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h @@ -30,7 +30,7 @@ public: ObIMicroBlockDecoder() : ObIMicroBlockReader() {} virtual ~ObIMicroBlockDecoder() {} virtual int compare_rowkey( - const ObDatumRowkey &rowkey, const int64_t index, int32_t &compare_result) override = 0; + const ObDatumRowkey &rowkey, const int64_t index, int32_t &compare_result) = 0; virtual int compare_rowkey(const ObDatumRange &range, const int64_t index, int32_t &start_key_compare_result, int32_t &end_key_compare_result) = 0; diff --git a/src/storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.cpp b/src/storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.cpp new file mode 100644 index 000000000..bedd53869 --- /dev/null +++ b/src/storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.cpp @@ -0,0 +1,1761 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE +#include "storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h" +#include "storage/ddl/ob_tablet_ddl_kv.h" +#include "storage/ls/ob_ls.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/ddl/ob_tablet_ddl_kv_mgr.h" + +namespace oceanbase +{ +namespace blocksstable +{ + +/****************** ObDDLIndexBlockRowIterator **********************/ +ObDDLIndexBlockRowIterator::ObDDLIndexBlockRowIterator() + : is_iter_start_(false), + is_iter_finish_(true), + btree_iter_(), + block_meta_tree_(nullptr), + cur_tree_value_(nullptr) +{ + +} + +ObDDLIndexBlockRowIterator::~ObDDLIndexBlockRowIterator() +{ + reset(); +} + +void ObDDLIndexBlockRowIterator::reset() +{ + ObIndexBlockRowIterator::reset(); + is_iter_finish_ = true; + is_iter_start_ = false; + btree_iter_.reset(); + block_meta_tree_ = nullptr; + cur_tree_value_ = nullptr; +} + +void ObDDLIndexBlockRowIterator::reuse() +{ + is_iter_finish_ = true; + is_iter_start_ = false; + btree_iter_.reset(); + block_meta_tree_ = nullptr; + cur_tree_value_ = nullptr; +} + +int ObDDLIndexBlockRowIterator::init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(datum_utils) || !datum_utils->is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(allocator), KPC(datum_utils)); + } else { + block_meta_tree_ = reinterpret_cast(idx_block_data.buf_); + is_reverse_scan_ = is_reverse_scan; + iter_step_ = is_reverse_scan_ ? -1 : 1; + datum_utils_ = datum_utils; + is_inited_ = true; + } + return ret; +} + +int ObDDLIndexBlockRowIterator::set_iter_param(const ObStorageDatumUtils *datum_utils, + bool is_reverse_scan, + const storage::ObBlockMetaTree *block_meta_tree, + const int64_t iter_step) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(datum_utils) || OB_UNLIKELY(!datum_utils->is_valid()) || OB_ISNULL(block_meta_tree)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(block_meta_tree), KPC(datum_utils)); + } else { + block_meta_tree_ = block_meta_tree; + is_reverse_scan_ = is_reverse_scan; + iter_step_ = iter_step == INT64_MAX ? (is_reverse_scan_ ? -1 : 1) : iter_step; + datum_utils_ = datum_utils; + is_inited_ = true; + } + return ret; +} + +int ObDDLIndexBlockRowIterator::locate_key(const ObDatumRowkey &rowkey) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey)); + } else { + ObDatumRange range; + range.set_start_key(rowkey); + range.set_end_key(rowkey); + range.set_left_closed(); + range.set_right_closed(); + if (OB_ISNULL(block_meta_tree_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block meta tree is null", K(ret)); + } else if (OB_FAIL(block_meta_tree_->locate_key(range, + *datum_utils_, + btree_iter_, + cur_tree_value_))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("locate rowkey failed", K(ret), K(range), K(*this)); + } else { + is_iter_finish_ = true; + } + } else if (OB_ISNULL(cur_tree_value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur tree value is null", K(ret), KP(cur_tree_value_)); + } else { + is_iter_start_ = true; + is_iter_finish_ = false; + } + LOG_TRACE("Binary search rowkey in ddl block", K(ret), K(rowkey), KPC(this)); + } + return ret; +} + +int ObDDLIndexBlockRowIterator::locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!range.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid range", K(ret), K(range)); + } else if (OB_ISNULL(block_meta_tree_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block meta tree is null", K(ret)); + } else if (OB_FAIL(block_meta_tree_->locate_range(range, + *datum_utils_, + is_left_border, + is_right_border, + is_reverse_scan_, + btree_iter_, + cur_tree_value_))) { + is_iter_finish_ = true; + LOG_WARN("block meta tree locate range failed", K(ret), K(range)); + } else if (OB_ISNULL(cur_tree_value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur tree value is null", K(ret), KP(cur_tree_value_)); + } else { + is_iter_start_ = true; + is_iter_finish_ = false; + } + LOG_TRACE("Locate range in ddl block by range", K(ret), K(range), KPC(this)); + return ret; +} + +int ObDDLIndexBlockRowIterator::check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) +{ + int ret = OB_SUCCESS; + can_blockscan = false; + return ret; +} + +int ObDDLIndexBlockRowIterator::get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) +{ + int ret = OB_SUCCESS; + bool is_start_key = false; + bool is_end_key = false; + idx_row_header = nullptr; + endkey = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_ISNULL(cur_tree_value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur tree value is null", K(ret)); + } else { + idx_row_header = &(cur_tree_value_->header_); + endkey = &(cur_tree_value_->block_meta_->end_key_); + } + return ret; +} + +int ObDDLIndexBlockRowIterator::get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + is_scan_left_border = false; + is_scan_right_border = false; + idx_minor_info = nullptr; + agg_row_buf = nullptr; + agg_buf_size = 0; + row_offset = 0; + bool is_start_key = false; + bool is_end_key = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_FAIL(get_current(idx_row_header, endkey))) { + LOG_WARN("read cur idx row failed", K(ret), KPC(idx_row_header), KPC(endkey)); + } else if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KP(idx_row_header), KP(endkey)); + } else if (OB_UNLIKELY((idx_row_header->is_data_index() && !idx_row_header->is_major_node()) || + idx_row_header->is_pre_aggregated() || + !idx_row_header->is_major_node())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid index row header", K(ret), KPC(idx_row_header)); + } + + if (OB_SUCC(ret)) { + if (is_iter_start_) { + is_start_key = true; + is_iter_start_ = false; + } + storage::ObBlockMetaTreeValue *tmp_tree_value = nullptr; + if (OB_ISNULL(block_meta_tree_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block meta iterator is null", K(ret)); + } else if (OB_FAIL(block_meta_tree_->get_next_tree_value(btree_iter_, std::abs(iter_step_), tmp_tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get index block row header failed", K(ret), K(*this)); + } else { + is_iter_finish_ = true; + is_end_key = true; + ret = OB_SUCCESS; + } + } else { + cur_tree_value_ = tmp_tree_value; + } + if (OB_SUCC(ret)) { + row_offset = idx_row_parser_.get_row_offset(); + is_scan_left_border = is_reverse_scan_ ? is_end_key : is_start_key; + is_scan_right_border = is_reverse_scan_ ? is_start_key : is_end_key; + } + } + return ret; +} + +int ObDDLIndexBlockRowIterator::get_next_meta(const ObDataMacroBlockMeta *&meta) +{ + int ret = OB_SUCCESS; + meta = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_ISNULL(cur_tree_value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur tree value is null", K(ret)); + } else { + meta = cur_tree_value_->block_meta_; + if (is_iter_start_) { + is_iter_start_ = false; + } + storage::ObBlockMetaTreeValue *tmp_tree_value = nullptr; + if (OB_ISNULL(block_meta_tree_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block meta iterator is null", K(ret)); + } else if (OB_FAIL(block_meta_tree_->get_next_tree_value(btree_iter_, std::abs(iter_step_), tmp_tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get index block row header failed", K(ret), K(*this)); + } else { + is_iter_finish_ = true; + ret = OB_SUCCESS; + } + } else { + cur_tree_value_ = tmp_tree_value; + } + } + return ret; +} + +bool ObDDLIndexBlockRowIterator::end_of_block() const +{ + return is_iter_finish_; +} + +int ObDDLIndexBlockRowIterator::get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) +{ + int ret = OB_SUCCESS; + index_row_count = 0; + DDLBtreeIterator tmp_iter; + ObBlockMetaTreeValue *cur_tree_value = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!range.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(range)); + } else if (OB_ISNULL(block_meta_tree_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block meta tree is null", K(ret)); + } else if (OB_FAIL(block_meta_tree_->locate_range(range, + *datum_utils_, + is_left_border, + is_right_border, + is_reverse_scan_, + tmp_iter, + cur_tree_value))) { + LOG_WARN("locate rowkey failed", K(ret), K(range), KPC(datum_utils_), KPC(cur_tree_value)); + } else { + if (OB_NOT_NULL(cur_tree_value)) { + ++index_row_count; //first + } + while (OB_SUCC(ret)) { + ObDatumRowkeyWrapper rowkey_wrapper; + if (OB_FAIL(tmp_iter.get_next(rowkey_wrapper, cur_tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else { + ret = OB_SUCCESS; + break; + } + } else { + ++index_row_count; + } + } + if (OB_FAIL(ret)) { + index_row_count = 0; + } + } + return ret; +} + +/****************** ObDDLMergeEmptyIterator **********************/ +ObDDLMergeEmptyIterator::ObDDLMergeEmptyIterator() +{ +} + +ObDDLMergeEmptyIterator::~ObDDLMergeEmptyIterator() +{ +} + + +void ObDDLMergeEmptyIterator::reuse() +{ +} + +int ObDDLMergeEmptyIterator::init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) +{ + is_inited_ = true; + return OB_SUCCESS; +} + +int ObDDLMergeEmptyIterator::locate_key(const ObDatumRowkey &rowkey) +{ + return OB_BEYOND_THE_RANGE; +} + +int ObDDLMergeEmptyIterator::locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) +{ + return OB_BEYOND_THE_RANGE; +} + +int ObDDLMergeEmptyIterator::check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) +{ + can_blockscan = false; + return OB_SUCCESS; +} + +int ObDDLMergeEmptyIterator::get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) +{ + idx_row_header = nullptr; + endkey = nullptr; + return OB_SUCCESS; +} + +int ObDDLMergeEmptyIterator::get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + is_scan_left_border = false; + is_scan_right_border = false; + idx_minor_info = nullptr; + agg_row_buf = nullptr; + agg_buf_size = 0; + row_offset = 0; + bool is_start_key = false; + bool is_end_key = false; + + return OB_SUCCESS; +} + +bool ObDDLMergeEmptyIterator::end_of_block() const +{ + return true; +} + +int ObDDLMergeEmptyIterator::get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) +{ + index_row_count = 0; + return OB_SUCCESS; +} + +/****************** ObDDLMergeBlockRowIterator **********************/ +ObDDLMergeBlockRowIterator::ObDDLMergeBlockRowIterator() + : is_single_sstable_(true), + is_iter_start_(false), + is_iter_finish_(true), + allocator_(nullptr), + idx_block_data_(nullptr), + raw_iter_(nullptr), + transformed_iter_(nullptr), + empty_merge_iter_(nullptr), + iters_(), + consumers_(nullptr), + consumer_cnt_(0), + compare_(), + simple_merge_(nullptr), + loser_tree_(nullptr), + endkey_merger_(nullptr), + query_range_(), + first_index_item_(), + iter_param_() +{ + +} + +ObDDLMergeBlockRowIterator::~ObDDLMergeBlockRowIterator() +{ + reset(); +} + +void ObDDLMergeBlockRowIterator::reset() +{ + is_single_sstable_ = true; + is_iter_start_ = false; + is_iter_finish_ = true; + ObIndexBlockRowIterator::reset(); + if (OB_NOT_NULL(transformed_iter_)) { + transformed_iter_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(transformed_iter_); + transformed_iter_ = nullptr; + } + } + if (OB_NOT_NULL(raw_iter_)) { + raw_iter_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(raw_iter_); + raw_iter_ = nullptr; + } + } + if (OB_NOT_NULL(empty_merge_iter_)) { + empty_merge_iter_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(empty_merge_iter_); + empty_merge_iter_ = nullptr; + } + } + + for (int64_t i = 0; i < iters_.count() - 1; ++i) { // skip sstable_iter + if (OB_NOT_NULL(iters_.at(i))) { + iters_.at(i)->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(iters_.at(i)); + iters_.at(i) = nullptr; + } + } + } + iters_.reset(); + // merger + if (OB_NOT_NULL(simple_merge_)) { + simple_merge_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(simple_merge_); + simple_merge_ = nullptr; + } + } + if (OB_NOT_NULL(loser_tree_)) { + loser_tree_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(loser_tree_); + loser_tree_ = nullptr; + } + } + if (OB_NOT_NULL(consumers_)) { + if (OB_NOT_NULL(allocator_)) { + allocator_->free(consumers_); + consumers_ = nullptr; + } + } + compare_.reset(); + consumer_cnt_ = 0; + endkey_merger_ = nullptr; + query_range_.reset(); + first_index_item_.reset(); + idx_block_data_ = nullptr; + iter_param_.reset(); + + allocator_ = nullptr; +} + +void ObDDLMergeBlockRowIterator::reuse() +{ + is_iter_start_ = false; + is_iter_finish_ = true; + is_single_sstable_ = true; + + if (OB_NOT_NULL(transformed_iter_)) { + transformed_iter_->reuse(); + } + if (OB_NOT_NULL(raw_iter_)) { + raw_iter_->reuse(); + } + if (OB_NOT_NULL(empty_merge_iter_)) { + empty_merge_iter_->reuse(); + } + + for (int64_t i = 0; i < iters_.count() - 1; ++i) { // skip sstable_iter + if (OB_NOT_NULL(iters_.at(i))) { + iters_.at(i)->reuse(); + } + } + if (OB_NOT_NULL(simple_merge_)) { + simple_merge_->reuse(); + } + if (OB_NOT_NULL(loser_tree_)) { + loser_tree_->reuse(); + } + if (OB_NOT_NULL(consumers_)) { + if (OB_NOT_NULL(allocator_)) { + allocator_->free(consumers_); + consumers_ = nullptr; + } + } + consumer_cnt_ = 0; + idx_block_data_ = nullptr; + endkey_merger_ = nullptr; +} + +int ObDDLMergeBlockRowIterator::init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) +{ + int ret = OB_SUCCESS; + iters_.set_attr(ObMemAttr(MTL_ID(), "index_iters")); + if (OB_ISNULL(allocator) || OB_ISNULL(datum_utils) || !datum_utils->is_valid() || !iter_param.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(allocator), KPC(datum_utils), K(iter_param)); + } else { + ObIndexBlockRowIterator *sst_index_iter = nullptr; + if (ObMicroBlockData::DDL_MERGE_INDEX_BLOCK != idx_block_data.type_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid index block data type", K(ret), K(idx_block_data.type_)); + //STEP 1: reuse or alloc sstable iter + } else if (OB_FAIL(init_sstable_index_iter(idx_block_data, + datum_utils, + allocator, + is_reverse_scan, + set_iter_end, + iter_param, + sst_index_iter))) { + LOG_WARN("fail to init sstable index iter", K(ret), K(iters_), KPC(sst_index_iter)); + //STEP 2: reuse or alloc ddl_kv iters + // todo qilu :get DDLKV from ls or from tablet_handle now, opt this get DDLKV from MTL() after refactor ddl_kv_mgr + } else if (OB_FAIL(init_ddl_kv_index_iters(idx_block_data, + datum_utils, + allocator, + is_reverse_scan, + iter_param))) { + LOG_WARN("fail to init ddl kv index iters", K(ret), K(iters_), KPC(sst_index_iter)); + } else if (OB_FAIL(iters_.push_back(sst_index_iter))) { + LOG_WARN("push back sstable iter failed", K(ret)); + } else if (iters_.count() > 1) { + is_single_sstable_ = false; + } else if (iter_param.sstable_->is_ddl_merge_empty_sstable()) { + LOG_INFO("empty sstable without ddl_kv", K(idx_block_data), K(iter_param)); + } + } + + if (OB_SUCC(ret)) { + is_reverse_scan_ = is_reverse_scan; + iter_step_ = is_reverse_scan_ ? -1 : 1; + datum_utils_ = datum_utils; + allocator_ = allocator; + idx_block_data_ = &idx_block_data; + iter_param_ = iter_param; + compare_.reverse_scan_ = is_reverse_scan_; + compare_.datum_utils_ = datum_utils_; + if (!is_single_sstable_ && OB_FAIL(init_merger())) { + LOG_WARN("fail to init merger", K(ret)); + } + if (OB_SUCC(ret)) { + is_inited_ = true; + } + } else { + if (OB_NOT_NULL(transformed_iter_)) { + transformed_iter_->reset(); + if (OB_NOT_NULL(allocator)) { + allocator->free(transformed_iter_); + transformed_iter_ = nullptr; + } + } + if (OB_NOT_NULL(raw_iter_)) { + raw_iter_->reset(); + if (OB_NOT_NULL(allocator)) { + allocator->free(raw_iter_); + raw_iter_ = nullptr; + } + } + if (OB_NOT_NULL(empty_merge_iter_)) { + empty_merge_iter_->reset(); + if (OB_NOT_NULL(empty_merge_iter_)) { + allocator->free(empty_merge_iter_); + empty_merge_iter_ = nullptr; + } + } + for (int64_t i = 0; i < iters_.count() - 1; ++i) { // skip sstable_iter + if (OB_NOT_NULL(iters_.at(i))) { + iters_.at(i)->reset(); + if (OB_NOT_NULL(allocator)) { + allocator->free(iters_.at(i)); + iters_.at(i) = nullptr; + } + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::init_sstable_index_iter(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param, + ObIndexBlockRowIterator *&sst_index_iter) +{ + int ret = OB_SUCCESS; + sst_index_iter = nullptr; + ObIndexBlockRowIterator *tmp_index_iter = nullptr; + if (iters_.count() > 0) { + // reuse, last one must be sstable index iter + if (OB_ISNULL(iters_.at(iters_.count() - 1))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("last iter is null", K(ret), K(iters_)); + } else { + iters_.at(iters_.count() - 1)->reuse(); + iters_.pop_back(tmp_index_iter); + } + } + + if (OB_SUCC(ret)) { + void *iter_buf = nullptr; + if (OB_ISNULL(allocator) || OB_ISNULL(datum_utils) || !datum_utils->is_valid() || !iter_param.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(allocator), KPC(datum_utils), K(iter_param)); + } else if (iter_param.sstable_->is_ddl_merge_empty_sstable()) { + // EMPTY DDL_MERGE_SSTABLE + if (OB_NOT_NULL(empty_merge_iter_)) { + empty_merge_iter_->reuse(); + } else if (OB_ISNULL(iter_buf = allocator->alloc(sizeof(ObDDLMergeEmptyIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDDLMergeEmptyIterator))); + } else { + empty_merge_iter_ = new (iter_buf) ObDDLMergeEmptyIterator; + } + if (OB_SUCC(ret)) { + sst_index_iter = empty_merge_iter_; + FLOG_INFO("empty ddl merge sstable", K(iter_param), K(idx_block_data)); + } + } else if (nullptr == idx_block_data.get_extra_buf()) { + // RAW + if (OB_NOT_NULL(raw_iter_)) { + raw_iter_->reuse(); + } else if (OB_ISNULL(iter_buf = allocator->alloc(sizeof(ObRAWIndexBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObRAWIndexBlockRowIterator))); + } else { + raw_iter_ = new (iter_buf) ObRAWIndexBlockRowIterator; + } + if (OB_SUCC(ret)) { + sst_index_iter = raw_iter_; + } + } else { + // TRANSFORMED + if (OB_NOT_NULL(transformed_iter_)) { + transformed_iter_->reuse(); + } else if (OB_ISNULL(iter_buf = allocator->alloc(sizeof(ObTFMIndexBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObTFMIndexBlockRowIterator))); + } else { + transformed_iter_ = new (iter_buf) ObTFMIndexBlockRowIterator; + } + if (OB_SUCC(ret)) { + sst_index_iter = transformed_iter_; + } + } + } + + + if (OB_SUCC(ret)) { + if (OB_ISNULL(sst_index_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(idx_block_data.type_), K(ret)); + } else if (OB_FAIL(sst_index_iter->init(idx_block_data, datum_utils, allocator, is_reverse_scan, set_iter_end, iter_param))) { + LOG_WARN("fail to init iter", K(ret), K(idx_block_data), KPC(sst_index_iter)); + } + } + LOG_INFO("init ddl merge iter", K(ret), KPC(sst_index_iter), K(iter_param), K(idx_block_data), KPC(iter_param.sstable_)); + return ret; +} + +int ObDDLMergeBlockRowIterator::get_readable_ddl_kvs(const ObIndexBlockIterParam &iter_param, + ObArray &ddl_memtables) +{ + int ret = OB_SUCCESS; + // todo qilu :get DDLKV from ls or from tablet_handle now, opt this get DDLKV from MTL() after refactor ddl_kv_mgr + ObTablet *cur_tablet = nullptr; + ddl_memtables.reset(); + ObTabletHandle tmp_tablet_handle; + if (OB_UNLIKELY(!iter_param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid iter param", K(ret), K(iter_param)); + } else { + if (OB_ISNULL(iter_param.tablet_)) { + //get tablet handle from ls + ObLSService *ls_service = MTL(ObLSService *); + ObLSHandle ls_handle; + if (OB_FAIL(ls_service->get_ls(iter_param.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(iter_param.ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + iter_param.tablet_id_, + tmp_tablet_handle, + ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("get tablet failed", K(ret), K(iter_param)); + } else { + cur_tablet = tmp_tablet_handle.get_obj(); + } + } else { + cur_tablet = const_cast(iter_param.tablet_); + } + } + + if (OB_SUCC(ret)) { + const uint16_t sstable_cg_idx = iter_param.sstable_->get_key().get_column_group_id(); + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObArray ddl_kvs_handle; + if (OB_ISNULL(cur_tablet)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet is null", K(ret), KP(cur_tablet)); + } else if (OB_FAIL(cur_tablet->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_INFO("ddl kv mgr not exist", K(ret), K(iter_param), KPC(cur_tablet)); + } else { + LOG_WARN("get ddl kv mgr failed", K(ret), K(iter_param), KPC(cur_tablet)); + } + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_ddl_kvs(false/*not frozen_only*/, ddl_kvs_handle))) { + LOG_WARN("get freezed ddl kv failed", K(ret), K(ddl_kv_mgr_handle)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kvs_handle.count(); ++i) { + ObDDLKV *ddl_kv = ddl_kvs_handle.at(i).get_obj(); + bool skip = false; + for (int64_t j = 0; OB_SUCC(ret) && j < ddl_kv->get_ddl_memtables().count(); ++j) { + ObDDLMemtable *cur_ddl_memtable = ddl_kv->get_ddl_memtables().at(j); + if (OB_NOT_NULL(cur_ddl_memtable)) { + if (cur_ddl_memtable->is_table_with_scn_range() && OB_NOT_NULL(iter_param.sstable_)) { + if (cur_ddl_memtable->get_scn_range().is_valid() && iter_param.sstable_->get_end_scn() >= cur_ddl_memtable->get_scn_range().end_scn_) { + LOG_INFO("smaller scn, skip ddl memtable", K(iter_param.sstable_->get_end_scn()), K(cur_ddl_memtable->get_scn_range()), K(sstable_cg_idx)); + skip = true; + } + } + if (cur_ddl_memtable->get_key().get_column_group_id() != sstable_cg_idx) { + LOG_INFO("unmatch cg_idx, skip ddl memtable", K(sstable_cg_idx), K(cur_ddl_memtable->get_key().get_column_group_id())); + skip = true; + } + if (!skip) { + if (OB_FAIL(ddl_memtables.push_back(cur_ddl_memtable))) { + LOG_WARN("fail to push back ddl_memtable", K(ret)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null ddl_memtable", K(ret)); + } + } + } + } + } + FLOG_INFO("get ddl readable memtables", K(ret), K(iters_.count()), K(ddl_memtables.count()), K(ddl_memtables)); + return ret; +} + +int ObDDLMergeBlockRowIterator::init_ddl_kv_index_iters(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const ObIndexBlockIterParam &iter_param) +{ + int ret = OB_SUCCESS; + ObArray ddl_memtables; + if (!idx_block_data.is_valid() || OB_ISNULL(datum_utils) || OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid iter param", K(ret), K(idx_block_data), KP(datum_utils), KP(allocator)); + } else if (OB_FAIL(get_readable_ddl_kvs(iter_param, ddl_memtables))) { + LOG_WARN("fail to get readable ddl kvs", K(ret)); + } else { + // reset iters count + if (ddl_memtables.count() > (iters_.count())) { + while (OB_SUCC(ret) && ddl_memtables.count() > (iters_.count())) { + ObDDLIndexBlockRowIterator *cur_ddl_kv_index_iter = nullptr; + void *iter_buf = nullptr; + if (OB_ISNULL(iter_buf = allocator->alloc(sizeof(ObDDLIndexBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDDLIndexBlockRowIterator))); + } else if (FALSE_IT(cur_ddl_kv_index_iter = new (iter_buf) ObDDLIndexBlockRowIterator)) { + } else if (OB_FAIL(iters_.push_back(cur_ddl_kv_index_iter))) { + LOG_WARN("push back ddl iter failed", K(ret)); + if (OB_NOT_NULL(cur_ddl_kv_index_iter)) { + cur_ddl_kv_index_iter->~ObDDLIndexBlockRowIterator(); + allocator->free(cur_ddl_kv_index_iter); + } + } + } + } else if (ddl_memtables.count() < (iters_.count())) { + while (OB_SUCC(ret) && ddl_memtables.count() < (iters_.count())) { + ObIndexBlockRowIterator *tmp_iter = iters_.at(iters_.count() - 1); + if (OB_NOT_NULL(tmp_iter)) { + tmp_iter->~ObIndexBlockRowIterator(); + if (OB_NOT_NULL(allocator)) { + allocator->free(tmp_iter); + tmp_iter = nullptr; + } + } + iters_.pop_back(); + } + } + } + + if (OB_SUCC(ret)) { + if (iters_.count() != ddl_memtables.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iter count", K(iters_), K(ddl_memtables)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < iters_.count(); ++i) { + if (OB_ISNULL(iters_.at(i)) || OB_ISNULL(ddl_memtables.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur iter is null", K(ret), KPC(iters_.at(i))); + } else { + ObDDLIndexBlockRowIterator *cur_ddl_kv_index_iter = static_cast(iters_.at(i)); + cur_ddl_kv_index_iter->reuse(); + if (OB_FAIL(ddl_memtables.at(i)->init_ddl_index_iterator(datum_utils, is_reverse_scan, cur_ddl_kv_index_iter))) { + LOG_WARN("fail to init ddl iter", K(ret), K(datum_utils), KPC(cur_ddl_kv_index_iter)); + } + } + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::init_merger() +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + // step 1:alloc merger + if (iters_.count() <= ObScanSimpleMerger::USE_SIMPLE_MERGER_MAX_TABLE_CNT) { + if (OB_NOT_NULL(simple_merge_)) { + endkey_merger_ = simple_merge_; + } else { + if (OB_ISNULL(buf = allocator_->alloc(sizeof(SimpleMerger)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(SimpleMerger))); + } else if (FALSE_IT(simple_merge_ = new (buf) SimpleMerger(compare_))) { + } else { + endkey_merger_ = simple_merge_; + } + } + } else { + if (OB_NOT_NULL(loser_tree_)) { + endkey_merger_ = loser_tree_; + } else { + if (OB_ISNULL(buf = allocator_->alloc(sizeof(MergeLoserTree)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(MergeLoserTree))); + } else if (FALSE_IT(loser_tree_ = new (buf) MergeLoserTree(compare_))) { + } else { + endkey_merger_ = loser_tree_; + } + } + } + + if (OB_SUCC(ret)) { + // step 2:init consumers + if (OB_ISNULL(consumers_ = static_cast( + allocator_->alloc(sizeof(int64_t) * iters_.count())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret), K(iters_.count())); + } else { + for (int64_t i = 0; i < iters_.count(); ++i) { + consumers_[i] = 0; + } + consumer_cnt_ = 0; + } + } + + // step 3:init merger + if (OB_SUCC(ret)) { + if (OB_ISNULL(endkey_merger_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("merger is null", K(ret)); + } else if (FALSE_IT(endkey_merger_->reset())) { + } else if (OB_FAIL(endkey_merger_->init(iters_.count(), *allocator_))) { + LOG_WARN("fail to init rows merger", K(ret), K(iters_.count())); + } else if (OB_FAIL(endkey_merger_->open(iters_.count()))) { + LOG_WARN("fail to open rows merger", K(ret), K(iters_.count())); + } + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(simple_merge_)) { + simple_merge_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(simple_merge_); + simple_merge_ = nullptr; + } + } + if (OB_NOT_NULL(loser_tree_)) { + loser_tree_->reset(); + if (OB_NOT_NULL(allocator_)) { + allocator_->free(loser_tree_); + loser_tree_ = nullptr; + } + } + if (OB_NOT_NULL(consumers_)) { + if (OB_NOT_NULL(allocator_)) { + allocator_->free(consumers_); + consumers_ = nullptr; + } + } + consumer_cnt_ = 0; + endkey_merger_ = nullptr; + } + return ret; +} + +int ObDDLMergeBlockRowIterator::locate_key(const ObDatumRowkey &rowkey) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey)); + } else if (is_single_sstable_) { + if (OB_UNLIKELY(iters_.count() != 1) || OB_ISNULL(iters_.at(0)) || OB_UNLIKELY(!iters_.at(0)->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iter", K(ret), K(is_single_sstable_), K(iters_)); + } else if (OB_FAIL(iters_.at(0)->locate_key(rowkey))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("locate rowkey failed", K(ret), K(rowkey), K(iters_)); + } + } + } else { + consumer_cnt_ = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < iters_.count(); ++i) { + ObIndexBlockRowIterator *cur_iter = iters_.at(i); + if (OB_ISNULL(cur_iter) || OB_UNLIKELY(!cur_iter->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(ret), K(i)); + } else if (OB_FAIL(cur_iter->locate_key(rowkey))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("locate rowkey failed", K(ret), K(rowkey), KPC(cur_iter)); + } else { + ret = OB_SUCCESS; // get next iter + } + } else { + consumers_[consumer_cnt_] = i; + ++consumer_cnt_; + } + } + + if (OB_SUCC(ret) && consumer_cnt_ > 0) { + query_range_.reset(); + query_range_.set_start_key(rowkey); + query_range_.set_end_key(rowkey); + query_range_.set_left_closed(); + query_range_.set_right_closed(); + is_iter_start_ = true; + is_iter_finish_ = false; + if (is_reverse_scan_ && OB_FAIL(locate_first_endkey())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to locate first endkey", K(ret)); + } else { + is_iter_finish_ = true; + ret = OB_SUCCESS; // return OB_ITER_END when get_next + } + } + } else { + is_iter_finish_ = true; + if (OB_SUCC(ret)) { + ret = OB_BEYOND_THE_RANGE; + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!range.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid range", K(ret), K(range)); + } else if (is_single_sstable_) { + if (OB_UNLIKELY(iters_.count() != 1) || OB_ISNULL(iters_.at(0)) || OB_UNLIKELY(!iters_.at(0)->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iter", K(ret), K(is_single_sstable_), K(iters_)); + } else if (OB_FAIL(iters_.at(0)->locate_range(range, is_left_border, is_right_border, is_normal_cg))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("locate rowkey failed", K(ret), K(range), K(iters_)); + } + } + } else { + consumer_cnt_ = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < iters_.count(); ++i) { + ObIndexBlockRowIterator *cur_iter = iters_.at(i); + if (OB_ISNULL(cur_iter) || OB_UNLIKELY(!cur_iter->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null or not inited", K(ret), K(i), KPC(cur_iter)); + } else if (OB_FAIL(cur_iter->locate_range(range, is_left_border, is_right_border, is_normal_cg))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate range", K(ret), K(range), K(is_left_border), K(is_right_border), KPC(cur_iter)); + } else { + ret = OB_SUCCESS; // next iter + } + } else { + consumers_[consumer_cnt_] = i; + ++consumer_cnt_; + } + } + + if (OB_SUCC(ret)) { + query_range_.reset(); + query_range_ = range; + if (consumer_cnt_ == 0) { + ret = OB_BEYOND_THE_RANGE; + is_iter_finish_ = true; + } else if (consumer_cnt_ > 0) { + is_iter_start_ = true; + is_iter_finish_ = false; + if (is_reverse_scan_ && OB_FAIL(locate_first_endkey())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to locate first endkey", K(ret)); + } else { + is_iter_finish_ = true; + ret = OB_BEYOND_THE_RANGE; + } + } + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (is_single_sstable_) { + // direct get next row from sstable iter + if (OB_UNLIKELY(iters_.count() != 1) || OB_ISNULL(iters_.at(0)) || OB_UNLIKELY(!iters_.at(0)->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iters count or iter is nll", K(ret), K(iters_)); + } else if (OB_FAIL(iters_.at(0)->get_current(idx_row_header, endkey))) { + LOG_WARN("read cur idx row failed", K(ret), KPC(idx_row_header), KPC(endkey), KPC(iters_.at(0))); + } + } else { + // get next row from loser tree + bool tmp_border = false; + int64_t size = 0; + int64_t offset = 0; + const char *agg_row_buf = nullptr; + const ObIndexBlockRowMinorMetaInfo *idx_minor_info = nullptr; + if (consumer_cnt_ > 0 && OB_FAIL(supply_consume())) { + LOG_WARN("supply consume failed", K(ret)); + } else if (OB_FAIL(inner_get_next(idx_row_header, + endkey, + tmp_border, + tmp_border, + idx_minor_info, + agg_row_buf, + size, + offset))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to do inner get next row", K(ret)); + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + is_scan_left_border = false; + is_scan_right_border = false; + idx_minor_info = nullptr; + agg_row_buf = nullptr; + agg_buf_size = 0; + row_offset = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (is_single_sstable_) { + // direct get next row from sstable iter + if (OB_UNLIKELY(iters_.count() != 1) || OB_ISNULL(iters_.at(0)) || OB_UNLIKELY(!iters_.at(0)->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iters count or iter is nll", K(ret), K(iters_)); + } else if (OB_FAIL(iters_.at(0)->get_next(idx_row_header, + endkey, + is_scan_left_border, + is_scan_right_border, + idx_minor_info, + agg_row_buf, + agg_buf_size, + row_offset))) { + LOG_WARN("read cur idx row failed", K(ret), KPC(idx_row_header), KPC(endkey), KPC(iters_.at(0))); + } + } else { + // get next row from loser tree + if (is_iter_finish_) { + ret = OB_ITER_END; + } else if (OB_FAIL(inner_get_next(idx_row_header, + endkey, + is_scan_left_border, + is_scan_right_border, + idx_minor_info, + agg_row_buf, + agg_buf_size, + row_offset))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to do inner get next row", K(ret)); + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::supply_consume() +{ + int ret = OB_SUCCESS; + ObDDLSSTableMergeLoserTreeItem item; + for (int64_t i = 0; OB_SUCC(ret) && i < consumer_cnt_; ++i) { + const int64_t iter_idx = consumers_[i]; + const ObIndexBlockRowHeader *idx_row_header = nullptr; + const ObDatumRowkey *endkey = nullptr; + ObIndexBlockRowIterator *cur_iter = iters_.at(iter_idx); + if (OB_ISNULL(cur_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(ret), KPC(cur_iter)); + } else if (cur_iter->end_of_block()) { + //ignore + } else if (OB_FAIL(cur_iter->get_next(item.header_, + item.end_key_, + item.is_scan_left_border_, + item.is_scan_right_border_, + item.idx_minor_info_, + item.agg_row_buf_, + item.agg_buf_size_, + item.row_offset_))) { + LOG_WARN("fail to get next row from scanner", K(ret)); + } else { + item.iter_idx_ = iter_idx; + if (OB_FAIL(endkey_merger_->push(item))) { + LOG_WARN("fail to push to loser tree", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + // if no new items pushed, the rebuild will quickly exit + if (OB_FAIL(endkey_merger_->rebuild())) { + LOG_WARN("fail to rebuild loser tree", K(ret), K(consumer_cnt_)); + } else { + consumer_cnt_ = 0; + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::inner_get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + is_scan_left_border = false; + is_scan_right_border = false; + idx_minor_info = nullptr; + agg_row_buf = nullptr; + agg_buf_size = 0; + row_offset = 0; + const ObDDLSSTableMergeLoserTreeItem *top_item = nullptr; + int64_t cur_iter_idx = INT64_MAX; + if (is_reverse_scan_ && is_iter_start_) { + // reverse scan will save info when locate_first_endkey + if (!first_index_item_.is_valid()) { + ret = OB_ITER_END; + } else { + idx_row_header = first_index_item_.idx_row_header_; + endkey = first_index_item_.rowkey_; + is_scan_left_border = first_index_item_.is_scan_left_border_; + is_scan_right_border = first_index_item_.is_scan_right_border_; + idx_minor_info = first_index_item_.idx_minor_info_; + agg_row_buf = first_index_item_.agg_row_buf_; + agg_buf_size = first_index_item_.agg_buf_size_; + row_offset = first_index_item_.row_offset_; + } + is_iter_start_ = false; + if (OB_SUCC(ret)) { + if (consumer_cnt_ == 0 && endkey_merger_->empty()) { + is_iter_finish_ = true; + } + } + } else { + if (OB_FAIL(supply_consume())) { + LOG_WARN("supply consume failed", K(ret)); + } else if (endkey_merger_->empty()) { + ret = OB_ITER_END; + } + + if (OB_SUCC(ret)) { + while (OB_SUCC(ret) && !endkey_merger_->empty() && nullptr == endkey) { + bool skip_iter = false; + if (OB_FAIL(endkey_merger_->top(top_item))) { + LOG_WARN("fail to get top item", K(ret)); + } else if (OB_LIKELY(endkey_merger_->is_unique_champion())) { + endkey = top_item->end_key_; + idx_row_header = top_item->header_; + cur_iter_idx = top_item->iter_idx_; + is_scan_left_border = top_item->is_scan_left_border_; + is_scan_right_border = top_item->is_scan_right_border_; + idx_minor_info = top_item->idx_minor_info_; + agg_row_buf = top_item->agg_row_buf_; + agg_buf_size = top_item->agg_buf_size_; + row_offset = top_item->row_offset_; + if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey || cur_iter_idx >= iters_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KP(idx_row_header), KP(endkey)); + } else { + ObIndexBlockRowIterator *cur_iter = iters_.at(cur_iter_idx); + if (OB_ISNULL(cur_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur iter is null", K(ret), KPC(cur_iter)); + } else if (cur_iter->end_of_block()) { + skip_iter = true; + } + } + + if (OB_SUCC(ret) && !is_reverse_scan_) { // not_reverse_scan can quit early + int tmp_cmp_ret = 0; + if (OB_FAIL(endkey->compare(query_range_.get_end_key(), *datum_utils_, tmp_cmp_ret))) { + LOG_WARN("fail to cmp rowkey", K(ret), K(query_range_.get_end_key()), KPC(endkey), KPC(datum_utils_)); + } else if (tmp_cmp_ret >= 0) { + // reach endkey, stop get_next + is_iter_finish_ = true; + while (OB_SUCC(ret) && !endkey_merger_->empty()) { + if (OB_FAIL(endkey_merger_->pop())) { + LOG_WARN("fail to pop top item", K(ret)); + } else { + consumer_cnt_ = 0; + } + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("exist same endkey!!!", K(ret), KPC(top_item)); + } + + if (OB_SUCC(ret) && !endkey_merger_->empty()) { + if (OB_FAIL(endkey_merger_->pop())) { + LOG_WARN("fail to pop top item", K(ret)); + } else if (!skip_iter) { + consumers_[consumer_cnt_] = cur_iter_idx; + ++consumer_cnt_; + } + } + } + } + if (OB_SUCC(ret)) { + if (consumer_cnt_ == 0 && endkey_merger_->empty()) { + is_iter_finish_ = true; + } + } + } + return ret; +} + +void ObDDLMergeBlockRowIterator::MergeIndexItem::reset() +{ + if (OB_NOT_NULL(item_allocator_)) { + if (OB_NOT_NULL(rowkey_)){ + rowkey_->~ObDatumRowkey(); + item_allocator_->free(rowkey_); + rowkey_ = nullptr; + } + if (OB_NOT_NULL(idx_row_header_)){ + idx_row_header_->~ObIndexBlockRowHeader(); + item_allocator_->free(idx_row_header_); + idx_row_header_ = nullptr; + } + if (OB_NOT_NULL(idx_minor_info_)){ + idx_minor_info_->~ObIndexBlockRowMinorMetaInfo(); + item_allocator_->free(idx_minor_info_); + idx_minor_info_ = nullptr; + } + if (OB_NOT_NULL(agg_row_buf_)){ + item_allocator_->free(agg_row_buf_); + agg_row_buf_ = nullptr; + } + } + item_allocator_ = nullptr; + + is_scan_left_border_ = false; + is_scan_right_border_ = false; + agg_buf_size_ = 0; + row_offset_ = 0; + iter_index_ = INT64_MAX; +} + +int ObDDLMergeBlockRowIterator::MergeIndexItem::init(ObIAllocator *allocator, + const ObIndexBlockRowHeader *idx_row_header, + const ObDatumRowkey *endkey, + const bool is_scan_left_border, + const bool is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *idx_minor_info, + const char *agg_row_buf, + const int64_t agg_buf_size, + const int64_t row_offset, + const int64_t iter_idx) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(allocator) || OB_ISNULL(idx_row_header) || OB_ISNULL(endkey)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguemen", K(ret), KP(allocator), KP(idx_row_header), KP(endkey), KP(idx_minor_info), KP(agg_row_buf)); + } else { + item_allocator_ = allocator; + void *key_buf = nullptr; + void *header_buf = nullptr; + void *minor_info_buf = nullptr; + void *agg_buf = nullptr; + if (OB_ISNULL(key_buf = item_allocator_->alloc(sizeof(ObDatumRowkey)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDatumRowkey))); + } else if (FALSE_IT(rowkey_ = new (key_buf) ObDatumRowkey())) { + } else if (OB_FAIL(endkey->deep_copy(*rowkey_, *allocator))) { + LOG_WARN("fail to deep copy rowkey", K(ret), KPC(rowkey_), KPC(endkey)); + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(header_buf = item_allocator_->alloc(sizeof(ObIndexBlockRowHeader)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObIndexBlockRowHeader))); + } else if (FALSE_IT(idx_row_header_ = new (header_buf) ObIndexBlockRowHeader())) { + } else { + *idx_row_header_ =*idx_row_header; + } + + if (OB_FAIL(ret) || OB_ISNULL(idx_minor_info)) { + } else if (OB_ISNULL(minor_info_buf = item_allocator_->alloc(sizeof(agg_row_buf)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObIndexBlockRowMinorMetaInfo))); + } else if (FALSE_IT(idx_minor_info = new (minor_info_buf) ObIndexBlockRowMinorMetaInfo())) { + } else { + *idx_minor_info_ = *idx_minor_info; + } + + if (OB_FAIL(ret) || OB_ISNULL(agg_row_buf)) { + } else if (OB_ISNULL(agg_buf = item_allocator_->alloc(STRLEN(agg_row_buf) + 1))) { //+1 for null + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(STRLEN(agg_row_buf))); + } else { + MEMCPY(agg_buf, agg_row_buf, STRLEN(agg_row_buf) + 1); + agg_row_buf_ = reinterpret_cast(agg_buf); + } + } + + if (OB_SUCC(ret)) { + is_scan_left_border_ = is_scan_left_border; + is_scan_right_border_ = is_scan_right_border; + agg_buf_size_ = agg_buf_size; + row_offset_ = row_offset; + iter_index_ = iter_idx; + } + return ret; +} + +bool ObDDLMergeBlockRowIterator::MergeIndexItem::is_valid() +{ + return OB_NOT_NULL(idx_row_header_) + && OB_NOT_NULL(rowkey_); +} + +int ObDDLMergeBlockRowIterator::locate_first_endkey() +{ + // for reverse scan, find first useful endkey + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_FAIL(supply_consume())) { + LOG_WARN("supply consume failed", K(ret)); + } else { + if (endkey_merger_->empty()) { + ret = OB_ITER_END; + } else { + first_index_item_.reset(); + bool find = false; + while (OB_SUCC(ret) && !endkey_merger_->empty() && !find) { + if (!first_index_item_.is_valid()) { + // first round + const ObDDLSSTableMergeLoserTreeItem *top_item = nullptr; + bool skip_iter = false; + + if (OB_FAIL(endkey_merger_->top(top_item))) { + LOG_WARN("fail to get top item", K(ret)); + } else if (OB_LIKELY(endkey_merger_->is_unique_champion())) { + if (OB_UNLIKELY(nullptr == top_item->header_ || nullptr == top_item->end_key_ || top_item->iter_idx_ >= iters_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KP(top_item->header_), KP(top_item->end_key_)); + } else { + ObIndexBlockRowIterator *tmp_iter = iters_.at(top_item->iter_idx_); + if (OB_ISNULL(tmp_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur iter is null", K(ret), KPC(tmp_iter)); + } else if (tmp_iter->end_of_block()) { + skip_iter = true; + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("exist same endkey!!!", K(ret), KPC(top_item)); + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(first_index_item_.init(allocator_, + top_item->header_, + top_item->end_key_, + top_item->is_scan_left_border_, + top_item->is_scan_right_border_, + top_item->idx_minor_info_, + top_item->agg_row_buf_, + top_item->agg_buf_size_, + top_item->row_offset_, + top_item->iter_idx_))) { + LOG_WARN("fail to init first_index_item_", K(ret)); + } else if (!endkey_merger_->empty() && OB_FAIL(endkey_merger_->pop())) { + LOG_WARN("fail to pop top item", K(ret), K(endkey_merger_)); + } else if (!skip_iter) { + consumers_[consumer_cnt_] = first_index_item_.iter_index_; + ++consumer_cnt_; + } + } + } else { + // regular round + if (OB_FAIL(supply_consume())) { + LOG_WARN("supply consume failed", K(ret)); + } else { + const ObDDLSSTableMergeLoserTreeItem *top_item = nullptr; + bool skip_iter = false; + if (OB_FAIL(endkey_merger_->top(top_item))) { + LOG_WARN("fail to get top item", K(ret)); + } else if (OB_LIKELY(endkey_merger_->is_unique_champion())) { + if (OB_UNLIKELY(nullptr == top_item->header_ || nullptr == top_item->end_key_ || top_item->iter_idx_ >= iters_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KP(top_item->end_key_), KP(top_item->header_)); + } else if (top_item->iter_idx_ == first_index_item_.iter_index_) { + // continuous item from same iter, find + find = true; //first_index_item_ + } else { + int tmp_cmp_ret = 0; + // top_item->end_key_ means first_index_item_.start_key + if (OB_FAIL(top_item->end_key_->compare(query_range_.get_end_key(), *datum_utils_, tmp_cmp_ret))) { + LOG_WARN("fail to cmp rowkey", K(ret), K(query_range_.get_end_key()), KPC(top_item->end_key_), KPC(datum_utils_)); + } else if (tmp_cmp_ret < 0) { + find = true; //first_index_item_ + } else { + if (tmp_cmp_ret == 0) { + find = true; + } + + ObIndexBlockRowIterator *cur_iter = iters_.at(top_item->iter_idx_); + bool tmp_is_iter_end = false; + if (OB_ISNULL(cur_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur iter is null", K(ret), KPC(cur_iter)); + } else if (cur_iter->end_of_block()) { + skip_iter = true; + } + + if (OB_SUCC(ret)) { + first_index_item_.reset(); + if (OB_FAIL(first_index_item_.init(allocator_, + top_item->header_, + top_item->end_key_, + top_item->is_scan_left_border_, + top_item->is_scan_right_border_, + top_item->idx_minor_info_, + top_item->agg_row_buf_, + top_item->agg_buf_size_, + top_item->row_offset_, + top_item->iter_idx_))) { + LOG_WARN("fail to init first_index_item_", K(ret)); + } else if (!endkey_merger_->empty() && OB_FAIL(endkey_merger_->pop())) { + LOG_WARN("fail to pop top item", K(ret), K(endkey_merger_)); + } else if (!skip_iter) { + consumers_[consumer_cnt_] = first_index_item_.iter_index_; + ++consumer_cnt_; + } + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("exist same endkey!!!", K(ret), KPC(top_item)); + } + } + } + } + if (OB_SUCC(ret) && !find && !first_index_item_.is_valid()) { + ret = OB_ITER_END; + is_iter_finish_ = true; + } + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey)); + } else { + if (is_single_sstable_) { + if (iters_.count() != 1 || OB_ISNULL(iters_.at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iters count or iter is nll", K(ret), K(iters_)); + } else if (OB_FAIL(iters_.at(0)->check_blockscan(rowkey, can_blockscan))) { + LOG_WARN("fail to check blockscan", K(ret), KPC(iters_.at(0)), K(rowkey)); + } + } else { + // with ddl kvs, cannot blockscan + // todo @qilu :reopen later + can_blockscan = false; + } + } + return ret; +} + +int ObDDLMergeBlockRowIterator::switch_context(ObStorageDatumUtils *datum_utils) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(datum_utils)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("datum utils is null", K(ret), KP(datum_utils)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < iters_.count(); ++i) { + ObIndexBlockRowIterator *cur_iter = iters_.at(i); + if (OB_UNLIKELY(!cur_iter->is_inited())) { + ret = OB_NOT_INIT; + LOG_WARN("not init yet", K(ret), KPC(cur_iter)); + } else if (OB_FAIL(cur_iter->switch_context(datum_utils))) { + LOG_WARN("fail to switch context", K(ret), KPC(datum_utils)); + } + } + if (OB_SUCC(ret)) { + datum_utils_ = datum_utils; + compare_.datum_utils_ = datum_utils_; + } + } + return ret; +} + +bool ObDDLMergeBlockRowIterator::end_of_block() const +{ + bool bret = true; + int ret = OB_SUCCESS; + if (is_single_sstable_) { + // direct get next row from sstable iter + if (OB_UNLIKELY(iters_.count() != 1) || OB_ISNULL(iters_.at(0)) || OB_UNLIKELY(!iters_.at(0)->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iters count or iter is nll", K(ret), K(iters_)); + } else { + bret = iters_.at(0)->end_of_block(); + } + } else { + bret = is_iter_finish_; + } + return bret; +} + +int ObDDLMergeBlockRowIterator::get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) +{ + int ret = OB_SUCCESS; + index_row_count = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!range.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(range)); + } else if (is_single_sstable_) { + if (OB_UNLIKELY(iters_.count() != 1) || OB_ISNULL(iters_.at(0)) || OB_UNLIKELY(!iters_.at(0)->is_inited())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid iters count or iter is nll", K(ret), K(iters_)); + } else if (OB_FAIL(iters_.at(0)->get_index_row_count(range, is_left_border, is_right_border, index_row_count))) { + LOG_WARN("fail to check blockscan", K(ret), KPC(iters_.at(0)), K(range)); + } + } else { + ObDDLMergeBlockRowIterator *tmp_merge_iter = nullptr; + void *buf = nullptr; + if (OB_ISNULL(buf = allocator_->alloc(sizeof(ObDDLMergeBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDDLMergeBlockRowIterator))); + } else if (FALSE_IT(tmp_merge_iter = new (buf) ObDDLMergeBlockRowIterator())) { + } else if (OB_ISNULL(idx_block_data_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("idx block data is null", K(ret)); + } else if (OB_FAIL(tmp_merge_iter->init(*idx_block_data_, + datum_utils_, + allocator_, + is_reverse_scan_, + false/*set iter end*/, + iter_param_))) { + LOG_WARN("fail to init iter", K(ret), KPC(idx_block_data_), KPC(tmp_merge_iter)); + } else if (OB_FAIL(tmp_merge_iter->locate_range(range, is_left_border, is_right_border, true/*is_normal_cg*/))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate range", K(ret), K(range), K(is_left_border), K(is_right_border), KPC(tmp_merge_iter)); + } + } else { + int ret = OB_SUCCESS; + const ObDatumRowkey *endkey = nullptr; + const ObIndexBlockRowHeader *idx_row_header = nullptr; + const ObIndexBlockRowMinorMetaInfo *idx_minor_info = nullptr; + const char *idx_data_buf = nullptr; + const char *agg_row_buf = nullptr; + int64_t agg_buf_size = 0; + int64_t row_offset = 0; + bool is_scan_left_border = false; + bool is_scan_right_border = false; + while (OB_SUCC(ret)) { + if (OB_FAIL(tmp_merge_iter->get_next(idx_row_header, endkey, is_scan_left_border, is_scan_right_border, idx_minor_info, agg_row_buf, agg_buf_size, row_offset))) { + LOG_WARN("get next idx block row failed", K(ret), KP(idx_row_header), KPC(endkey), K(is_reverse_scan_)); + } else if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KPC(tmp_merge_iter), KP(idx_row_header), KP(endkey)); + } else { + ++index_row_count; + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + LOG_INFO("get merge idx row cnt success", K(index_row_count)); + } + } + + //free iter buf + if (OB_NOT_NULL(allocator_)) { + if (OB_NOT_NULL(tmp_merge_iter)) { + tmp_merge_iter->~ObDDLMergeBlockRowIterator(); + allocator_->free(tmp_merge_iter); + } + } + } + return ret; +} + + +} // end namespace blocksstable +} // end namespace oceanbase diff --git a/src/storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h b/src/storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h new file mode 100644 index 000000000..e3658d478 --- /dev/null +++ b/src/storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h @@ -0,0 +1,245 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_BLOCKSSTABLE_OB_DDL_INDEX_BLOCK_ROW_ITERATOR_H +#define OCEANBASE_STORAGE_BLOCKSSTABLE_OB_DDL_INDEX_BLOCK_ROW_ITERATOR_H + +#include "storage/blocksstable/index_block/ob_index_block_row_scanner.h" + +namespace oceanbase +{ + +namespace storage +{ +class ObDDLMemtable; +} +namespace blocksstable +{ +typedef keybtree::BtreeIterator DDLBtreeIterator; +class ObDDLIndexBlockRowIterator : public ObIndexBlockRowIterator +{ +public: + ObDDLIndexBlockRowIterator(); + virtual ~ObDDLIndexBlockRowIterator(); + virtual int init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) override; + virtual int get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) override; + virtual int get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) override; + virtual int locate_key(const ObDatumRowkey &rowkey) override; + virtual int locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) override; + virtual int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) override; + virtual bool end_of_block() const override; + virtual int get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) override; + virtual void reset() override; + virtual void reuse() override; + INHERIT_TO_STRING_KV("base iterator:", ObIndexBlockRowIterator, "format:", "ObDDLIndexBlockRowIterator", + K_(is_iter_start), K_(is_iter_finish), KP(cur_tree_value_), KP(block_meta_tree_)); +public: + int set_iter_param(const ObStorageDatumUtils *datum_utils, + bool is_reverse_scan, + const storage::ObBlockMetaTree *block_meta_tree, + const int64_t iter_step = INT64_MAX); + bool is_valid() { return OB_NOT_NULL(block_meta_tree_); } + void set_iter_end() { is_iter_finish_ = true; } + int get_next_meta(const ObDataMacroBlockMeta *&meta); +private: + bool is_iter_start_; + bool is_iter_finish_; + DDLBtreeIterator btree_iter_; + const storage::ObBlockMetaTree *block_meta_tree_; + storage::ObBlockMetaTreeValue *cur_tree_value_; +}; + +class ObDDLMergeEmptyIterator : public ObIndexBlockRowIterator +{ +public: + ObDDLMergeEmptyIterator(); + virtual ~ObDDLMergeEmptyIterator(); + virtual int init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) override; + virtual int get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) override; + virtual int get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) override; + virtual int locate_key(const ObDatumRowkey &rowkey) override; + virtual int locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) override; + virtual int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) override; + virtual bool end_of_block() const override; + virtual int get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) override; + virtual void reuse() override; + INHERIT_TO_STRING_KV("base iterator:", ObIndexBlockRowIterator, "format:", "ObDDLMergeEmptyIterator"); +}; + +class ObDDLMergeBlockRowIterator : public ObIndexBlockRowIterator +{ +public: + static const int64_t MAX_SSTABLE_COUNT = 4096; + typedef ObSimpleRowsMerger SimpleMerger; + typedef common::ObLoserTree MergeLoserTree; + ObDDLMergeBlockRowIterator(); + virtual ~ObDDLMergeBlockRowIterator(); + virtual int init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) override; + virtual int get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) override; + virtual int get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) override; + virtual int locate_key(const ObDatumRowkey &rowkey) override; + virtual int locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) override; + virtual int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) override; + virtual bool end_of_block() const override; + virtual int get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) override; + virtual void reset() override; + virtual void reuse() override; + virtual int switch_context(ObStorageDatumUtils *datum_utils) override; + INHERIT_TO_STRING_KV("base iterator:", ObIndexBlockRowIterator, "format:", "ObDDLMergeBlockRowIterator", + KP(raw_iter_), KP(transformed_iter_), KP(empty_merge_iter_), K(iters_), KP(allocator_), KP(consumers_), K(consumer_cnt_), + K(compare_), KPC(simple_merge_), KPC(loser_tree_), KPC(endkey_merger_), K(is_single_sstable_), + K(is_iter_start_), K(is_iter_finish_), K(query_range_), KP(idx_block_data_), K(first_index_item_), K(iter_param_)); + struct MergeIndexItem final + { + public: + MergeIndexItem() : is_scan_left_border_(false), is_scan_right_border_(false), + idx_row_header_(nullptr), rowkey_(nullptr), idx_minor_info_(nullptr), agg_row_buf_(nullptr), + item_allocator_(nullptr), agg_buf_size_(0), row_offset_(0), iter_index_(INT64_MAX) {} + ~MergeIndexItem() + { + reset(); + } + int init(ObIAllocator *allocator, + const ObIndexBlockRowHeader *idx_row_header, + const ObDatumRowkey *endkey, + const bool is_scan_left_border, + const bool is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *idx_minor_info, + const char *agg_row_buf, + const int64_t agg_buf_size, + const int64_t row_offset, + const int64_t iter_idx); + void reset(); + bool is_valid(); + TO_STRING_KV(K(is_scan_left_border_), K(is_scan_right_border_), K(agg_buf_size_), K(row_offset_), K(iter_index_), + KP(idx_minor_info_), KP(agg_row_buf_), KPC_(idx_row_header), KPC_(rowkey), KP(item_allocator_)); + + public: + bool is_scan_left_border_; + bool is_scan_right_border_; + ObIndexBlockRowHeader *idx_row_header_; + blocksstable::ObDatumRowkey *rowkey_; + ObIndexBlockRowMinorMetaInfo *idx_minor_info_; + char *agg_row_buf_; + ObIAllocator *item_allocator_; + int64_t agg_buf_size_; + int64_t row_offset_; + int64_t iter_index_; + }; + +private: + int locate_first_endkey(); //for reverse scan + int get_readable_ddl_kvs(const ObIndexBlockIterParam &iter_param, + ObArray &ddl_memtables); + int init_sstable_index_iter(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param, + ObIndexBlockRowIterator *&sst_index_iter); + int init_ddl_kv_index_iters(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const ObIndexBlockIterParam &iter_param); + int init_merger(); + int inner_get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset); + int supply_consume(); +private: + bool is_single_sstable_; + bool is_iter_start_; + bool is_iter_finish_; + ObIAllocator *allocator_; + const ObMicroBlockData *idx_block_data_; + ObRAWIndexBlockRowIterator *raw_iter_; + ObTFMIndexBlockRowIterator *transformed_iter_; + ObDDLMergeEmptyIterator *empty_merge_iter_; + ObArray iters_; + int64_t *consumers_; + int64_t consumer_cnt_; + ObDDLSSTableMergeLoserTreeCompare compare_; + SimpleMerger *simple_merge_; + MergeLoserTree *loser_tree_; + common::ObRowsMerger *endkey_merger_; //point to one of above two iters + ObDatumRange query_range_; + MergeIndexItem first_index_item_; + ObIndexBlockIterParam iter_param_; +}; + +} // end namespace blocksstable +} // end namespace oceanbase +#endif diff --git a/src/storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.cpp b/src/storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.cpp new file mode 100644 index 000000000..917efa1f7 --- /dev/null +++ b/src/storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.cpp @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE +#include "storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.h" +namespace oceanbase +{ +namespace blocksstable +{ + +/****************** ObDDLSSTableMergeLoserTreeCompare **********************/ +ObDDLSSTableMergeLoserTreeCompare::ObDDLSSTableMergeLoserTreeCompare() + : reverse_scan_(false), + datum_utils_(nullptr) +{ +} + +ObDDLSSTableMergeLoserTreeCompare::~ObDDLSSTableMergeLoserTreeCompare() +{ + reset(); +} + +void ObDDLSSTableMergeLoserTreeCompare::reset() +{ + reverse_scan_ = false; + datum_utils_ = nullptr; +} + +int ObDDLSSTableMergeLoserTreeCompare::cmp(const ObDDLSSTableMergeLoserTreeItem &lhs, + const ObDDLSSTableMergeLoserTreeItem &rhs, + int64_t &cmp_ret) +{ + int ret = OB_SUCCESS; + int tmp_cmp_ret = 0; + if (OB_UNLIKELY(nullptr == datum_utils_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadSSTableScanMergeLoserTreeCompare not init", K(ret), KP(this)); + } else if (OB_UNLIKELY(nullptr == lhs.end_key_ || nullptr == rhs.end_key_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(lhs), K(rhs)); + } else if (OB_FAIL(lhs.end_key_->compare(*rhs.end_key_, *datum_utils_, tmp_cmp_ret))) { + LOG_WARN("fail to compare rowkey", K(ret), K(lhs), K(rhs), KPC(datum_utils_)); + } else { + cmp_ret = tmp_cmp_ret * (reverse_scan_ ? -1 : 1); + } + return ret; +} + +} // end namespace blocksstable +} // end namespace oceanbase diff --git a/src/storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.h b/src/storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.h new file mode 100644 index 000000000..fcfd6e10f --- /dev/null +++ b/src/storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.h @@ -0,0 +1,87 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_BLOCKSSTABLE_OB_DDL_SSTABLE_SCAN_MERGE_H +#define OCEANBASE_STORAGE_BLOCKSSTABLE_OB_DDL_SSTABLE_SCAN_MERGE_H + +#include "storage/blocksstable/ob_block_sstable_struct.h" +#include "storage/column_store/ob_column_store_util.h" +#include "ob_index_block_row_struct.h" +#include "storage/access/ob_simple_rows_merger.h" + +namespace oceanbase +{ +namespace blocksstable +{ +struct ObDDLSSTableMergeLoserTreeItem final +{ +public: + ObDDLSSTableMergeLoserTreeItem() + : equal_with_next_(false), + is_scan_left_border_(false), + is_scan_right_border_(false), + end_key_(nullptr), + header_(nullptr), + iter_idx_(0), + agg_buf_size_(0), + row_offset_(0), + idx_minor_info_(nullptr), + agg_row_buf_(nullptr) + { + } + ~ObDDLSSTableMergeLoserTreeItem() = default; + void reset() + { + end_key_ = nullptr; + header_ = nullptr; + idx_minor_info_ = nullptr; + agg_row_buf_ = nullptr; + iter_idx_ = 0; + agg_buf_size_ = 0; + row_offset_ = 0; + equal_with_next_ = false; + is_scan_left_border_ = false; + is_scan_right_border_ = false; + } + TO_STRING_KV(K_(equal_with_next), KPC_(end_key), KPC(header_), K_(iter_idx), K_(is_scan_left_border), K_(is_scan_right_border), + K_(agg_buf_size), K_(row_offset), KP_(idx_minor_info), KP_(agg_row_buf)); +public: + bool equal_with_next_; // for simple row merger + bool is_scan_left_border_; + bool is_scan_right_border_; + const blocksstable::ObDatumRowkey *end_key_; + const blocksstable::ObIndexBlockRowHeader *header_; + int64_t iter_idx_; + int64_t agg_buf_size_; + int64_t row_offset_; + const ObIndexBlockRowMinorMetaInfo *idx_minor_info_; + const char *agg_row_buf_; +}; + +class ObDDLSSTableMergeLoserTreeCompare final +{ +public: + ObDDLSSTableMergeLoserTreeCompare(); + ~ObDDLSSTableMergeLoserTreeCompare(); + void reset(); + int cmp(const ObDDLSSTableMergeLoserTreeItem &lhs, + const ObDDLSSTableMergeLoserTreeItem &rhs, + int64_t &cmp_ret); + TO_STRING_KV(K(reverse_scan_), KPC(datum_utils_)); +public: + bool reverse_scan_; + const blocksstable::ObStorageDatumUtils *datum_utils_; +}; + +} // end namespace blocksstable +} // end namespace oceanbase +#endif diff --git a/src/storage/blocksstable/index_block/ob_index_block_builder.cpp b/src/storage/blocksstable/index_block/ob_index_block_builder.cpp index 73517cda1..0f3af9ae1 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_builder.cpp +++ b/src/storage/blocksstable/index_block/ob_index_block_builder.cpp @@ -67,6 +67,7 @@ void ObIndexTreeRootCtx::reset() meta_block_offset_ = 0; meta_block_size_ = 0; last_macro_size_ = 0; + use_absolute_offset_ = false; } int ObIndexTreeRootCtx::init(common::ObIAllocator &allocator) @@ -608,6 +609,7 @@ int ObSSTableIndexBuilder::trim_empty_roots() } else if (OB_FAIL(tmp_roots.reserve(root_count))) { STORAGE_LOG(WARN, "fail to reserve tmp roots", K(ret), K(root_count)); } else { + bool use_absolute_offset = false; for (int64_t i = 0; i < root_count && OB_SUCC(ret); ++i) { if (nullptr == roots_[i]) { // skip @@ -616,6 +618,11 @@ int ObSSTableIndexBuilder::trim_empty_roots() } else { if (OB_FAIL(tmp_roots.push_back(roots_[i]))) { STORAGE_LOG(WARN, "fail to push back root", K(ret), KPC(roots_[i])); + } else if (tmp_roots.count() == 1) { + use_absolute_offset = tmp_roots.at(0)->use_absolute_offset_; + } else if (OB_UNLIKELY(roots_[i]->use_absolute_offset_ != use_absolute_offset)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "unexpected row offset", K(ret), KPC(roots_[i]), K(use_absolute_offset)); } } } @@ -730,6 +737,7 @@ int ObSSTableIndexBuilder::merge_index_tree(ObSSTableMergeRes &res) ObIndexBlockRowDesc row_desc(data_desc.get_desc()); int64_t row_idx = -1; ObLogicMacroBlockId prev_logic_id; + const bool need_rewrite = index_store_desc_.get_desc().is_cg() && !roots_[0]->use_absolute_offset_; for (int64_t i = 0; OB_SUCC(ret) && i < roots_.count(); ++i) { ObMacroMetasArray *macro_metas = roots_[i]->macro_metas_; for (int64_t j = 0; OB_SUCC(ret) && j < macro_metas->count(); ++j) { @@ -742,8 +750,8 @@ int ObSSTableIndexBuilder::merge_index_tree(ObSSTableMergeRes &res) // and we don't want more additional memory/time consumption, we only check continuous ids here ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "unexpected duplicate logic macro id", K(ret), KPC(macro_meta), K(prev_logic_id)); - } else if (index_store_desc_.get_desc().is_cg()) { - // use row_idx to rewrite endkey + } else if (need_rewrite) { + // use row_idx to rewrite endkey unless absolute offset has been used macro_meta->end_key_.datums_[0].set_int( macro_meta->val_.row_count_ + row_idx); } @@ -1285,7 +1293,13 @@ int ObBaseIndexBlockBuilder::check_order(const ObIndexBlockRowDesc &row_desc) if (OB_UNLIKELY(!row_desc.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid row desc", K(ret), K(row_desc)); - } else if (last_rowkey_.is_valid() && !index_store_desc_->is_cg()) { + } else if (!last_rowkey_.is_valid()) { // skip + } else if (index_store_desc_->is_cg()) { // datum_utils is lacked for cg + if (row_desc.row_key_.get_datum(0).get_int() <= last_rowkey_.get_datum(0).get_int()) { + ret = OB_ROWKEY_ORDER_ERROR; + STORAGE_LOG(ERROR, "input rowkey is less then last rowkey.", K(row_desc.row_key_), K(last_rowkey_), K(ret)); + } + } else { const ObDatumRowkey &cur_rowkey = row_desc.row_key_; int32_t compare_result = 0; const ObStorageDatumUtils &datum_utils = index_store_desc_->get_datum_utils(); @@ -2491,7 +2505,7 @@ void ObIndexBlockRebuilder::reset() sstable_builder_ = nullptr; } -int ObIndexBlockRebuilder::init(ObSSTableIndexBuilder &sstable_builder, bool need_sort, const int64_t *task_idx) +int ObIndexBlockRebuilder::init(ObSSTableIndexBuilder &sstable_builder, bool need_sort, const int64_t *task_idx, const bool use_absolute_offset) { int ret = OB_SUCCESS; const int64_t bucket_num = 109; @@ -2513,6 +2527,7 @@ int ObIndexBlockRebuilder::init(ObSSTableIndexBuilder &sstable_builder, bool nee } if (OB_SUCC(ret)) { need_sort_ = need_sort; + index_tree_root_ctx_->use_absolute_offset_ = use_absolute_offset; is_inited_ = true; } } diff --git a/src/storage/blocksstable/index_block/ob_index_block_builder.h b/src/storage/blocksstable/index_block/ob_index_block_builder.h index 99b188b15..30cbc6c3d 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_builder.h +++ b/src/storage/blocksstable/index_block/ob_index_block_builder.h @@ -57,6 +57,7 @@ public: meta_block_offset_(0), meta_block_size_(0), last_macro_size_(0), + use_absolute_offset_(false), is_inited_(false) {} ~ObIndexTreeRootCtx(); int init(common::ObIAllocator &allocator); @@ -80,6 +81,7 @@ public: int64_t meta_block_offset_; int64_t meta_block_size_; int64_t last_macro_size_; + bool use_absolute_offset_; bool is_inited_; DISALLOW_COPY_AND_ASSIGN(ObIndexTreeRootCtx); }; @@ -405,7 +407,8 @@ class ObIndexBlockRebuilder final public: ObIndexBlockRebuilder(); ~ObIndexBlockRebuilder(); - int init(ObSSTableIndexBuilder &sstable_builder, bool need_sort = true, const int64_t *task_idx = nullptr); + // TOOD(yunsong.lhp) rm use_absolute_offset from rebuilder + int init(ObSSTableIndexBuilder &sstable_builder, bool need_sort = true, const int64_t *task_idx = nullptr, const bool use_absolute_offset = false); int append_macro_row( const char *buf, const int64_t size, diff --git a/src/storage/blocksstable/index_block/ob_index_block_macro_iterator.cpp b/src/storage/blocksstable/index_block/ob_index_block_macro_iterator.cpp index 0340da949..333c77c6d 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_macro_iterator.cpp +++ b/src/storage/blocksstable/index_block/ob_index_block_macro_iterator.cpp @@ -102,7 +102,7 @@ int ObIndexBlockMacroIterator::open( } else if (OB_UNLIKELY(!sstable.is_valid() || !range.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("SSTable is not valid", K(ret), K(sstable), K(range)); - } else if (sstable.is_empty()) { + } else if (sstable.no_data_to_read()) { is_iter_end_ = true; } else if (OB_FAIL(sstable.get_last_rowkey(allocator, sstable_endkey))) { LOG_WARN("Fail to get last rowkey of sstable", K(ret)); diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp index 63f87a05f..1eb96f69c 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp +++ b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp @@ -17,6 +17,10 @@ #include "ob_index_block_row_struct.h" #include "storage/access/ob_rows_info.h" #include "storage/ddl/ob_tablet_ddl_kv.h" +#include "storage/ls/ob_ls.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h" namespace oceanbase { @@ -287,46 +291,103 @@ int ObIndexBlockDataTransformer::get_reader( return ret; } -ObIndexBlockRowScanner::ObIndexBlockRowScanner() - : query_range_(nullptr), agg_projector_(nullptr), agg_column_schema_(nullptr), - idx_data_header_(nullptr), macro_id_(), allocator_(nullptr), - micro_reader_helper_(), micro_reader_(nullptr), - block_meta_tree_(nullptr), datum_row_(nullptr), endkey_(), - idx_row_parser_(), datum_utils_(nullptr), - current_(ObIMicroBlockReaderInfo::INVALID_ROW_INDEX), +/****************** ObIndexBlockIterParam **********************/ +ObIndexBlockIterParam::ObIndexBlockIterParam() + : sstable_(nullptr), + tablet_(nullptr), + ls_id_(), + tablet_id_() +{ +} + +ObIndexBlockIterParam::~ObIndexBlockIterParam() +{ + reset(); +} + +ObIndexBlockIterParam &ObIndexBlockIterParam::operator=(const ObIndexBlockIterParam &other) +{ + sstable_ = other.sstable_; + tablet_ = other.tablet_; + ls_id_ = other.ls_id_; + tablet_id_ = other.tablet_id_; + return *this; +} + +int ObIndexBlockIterParam::assign(const ObIndexBlockIterParam &other) +{ + int ret = OB_SUCCESS; + sstable_ = other.sstable_; + tablet_ = other.tablet_; + ls_id_ = other.ls_id_; + tablet_id_ = other.tablet_id_; + return ret; +} + +void ObIndexBlockIterParam::reset() +{ + sstable_ = nullptr; + tablet_ = nullptr; + ls_id_.reset(); + tablet_id_.reset(); +} + +bool ObIndexBlockIterParam::is_valid() const +{ + return OB_NOT_NULL(sstable_) && ((ls_id_.is_valid() && tablet_id_.is_valid()) || OB_NOT_NULL(tablet_)); +} + +/****************** ObIndexBlockRowIterator **********************/ +ObIndexBlockRowIterator::ObIndexBlockRowIterator() + : is_inited_(false), + is_reverse_scan_(false), + iter_step_(1), + idx_row_parser_(), + datum_utils_(nullptr) +{ + +} + +ObIndexBlockRowIterator::~ObIndexBlockRowIterator() +{ + reset(); +} + +void ObIndexBlockRowIterator::reset() +{ + iter_step_ = 1; + datum_utils_ = nullptr; + is_reverse_scan_ = false; + idx_row_parser_.reset(); + is_inited_ = false; +} + +/****************** ObRAWIndexBlockRowIterator **********************/ +ObRAWIndexBlockRowIterator::ObRAWIndexBlockRowIterator() + : current_(ObIMicroBlockReaderInfo::INVALID_ROW_INDEX), start_(ObIMicroBlockReaderInfo::INVALID_ROW_INDEX), end_(ObIMicroBlockReaderInfo::INVALID_ROW_INDEX), - step_(1), range_idx_(0), nested_offset_(0), rowkey_begin_idx_(0), rowkey_end_idx_(0), - index_format_(IndexFormat::INVALID), parent_row_range_(), is_get_(false), is_reverse_scan_(false), - is_left_border_(false), is_right_border_(false), is_inited_(false), - is_normal_cg_(false), filter_constant_type_(sql::ObBoolMaskType::PROBABILISTIC) -{} - -ObIndexBlockRowScanner::~ObIndexBlockRowScanner() {} - -void ObIndexBlockRowScanner::reuse() + micro_reader_(nullptr), + allocator_(nullptr), + datum_row_(nullptr), + micro_reader_helper_(), + endkey_() { - query_range_ = nullptr; - idx_data_header_ = nullptr; + +} + +ObRAWIndexBlockRowIterator::~ObRAWIndexBlockRowIterator() +{ + reset(); +} + +void ObRAWIndexBlockRowIterator::reset() +{ + ObIndexBlockRowIterator::reset(); current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; start_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; end_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - block_meta_tree_ = nullptr; - index_format_ = IndexFormat::INVALID; - is_left_border_ = false; - is_right_border_ = false; - parent_row_range_.reset(); - filter_constant_type_ = sql::ObBoolMaskType::PROBABILISTIC; -} - -void ObIndexBlockRowScanner::reset() -{ - query_range_ = nullptr; - idx_data_header_ = nullptr; - micro_reader_helper_.reset(); - parent_row_range_.reset(); micro_reader_ = nullptr; - block_meta_tree_ = nullptr; if (nullptr != datum_row_) { datum_row_->~ObDatumRow(); if (nullptr != allocator_) { @@ -334,345 +395,60 @@ void ObIndexBlockRowScanner::reset() } datum_row_ = nullptr; } - datum_utils_ = nullptr; + micro_reader_helper_.reset(); + allocator_ = nullptr; +} + +void ObRAWIndexBlockRowIterator::reuse() +{ current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; start_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; end_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - step_ = 1; - range_idx_ = 0; - nested_offset_ = 0; - rowkey_begin_idx_ = 0; - rowkey_end_idx_ = 0; - index_format_ = IndexFormat::INVALID; - is_get_ = false; - is_reverse_scan_ = false; - is_left_border_ = false; - is_right_border_ = false; - is_inited_ = false; - is_normal_cg_ = false; - filter_constant_type_ = sql::ObBoolMaskType::PROBABILISTIC; } -int ObIndexBlockRowScanner::init( - const ObIArray &agg_projector, - const ObIArray &agg_column_schema, - const ObStorageDatumUtils &datum_utils, - ObIAllocator &allocator, - const common::ObQueryFlag &query_flag, - const int64_t nested_offset, - const bool is_normal_cg) +int ObRAWIndexBlockRowIterator::init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) { int ret = OB_SUCCESS; - if (IS_INIT) { - ret = OB_INIT_TWICE; - LOG_WARN("Already inited", K(ret)); - } else if (OB_UNLIKELY(agg_projector.count() != agg_column_schema.count())) { + if (OB_ISNULL(allocator) || OB_ISNULL(datum_utils) || !datum_utils->is_valid()) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("Agg meta count not same", K(ret), K(agg_projector), K(agg_column_schema)); - } else if (OB_FAIL(micro_reader_helper_.init(allocator))) { - LOG_WARN("Fail to init micro reader helper", K(ret)); + LOG_WARN("invalid arguement", K(ret), KP(allocator), KPC(datum_utils)); + } else if (!micro_reader_helper_.is_inited() && OB_FAIL(micro_reader_helper_.init(*allocator))) { + LOG_WARN("Fail to init micro reader helper", K(ret), KP(allocator)); + } else if (OB_FAIL(micro_reader_helper_.get_reader(idx_block_data.get_store_type(), micro_reader_))) { + LOG_WARN("Fail to get micro block reader", K(ret), K(idx_block_data), K(idx_block_data.get_store_type())); + } else if (OB_FAIL(micro_reader_->init(idx_block_data, datum_utils))) { + LOG_WARN("Fail to init micro reader", K(ret), K(idx_block_data)); + } else if (OB_FAIL(init_datum_row(*datum_utils, allocator))) { + LOG_WARN("Fail to init datum row", K(ret)); } else { - agg_projector_ = &agg_projector; - agg_column_schema_ = &agg_column_schema; - allocator_ = &allocator; - is_reverse_scan_ = query_flag.is_reverse_scan(); - step_ = is_reverse_scan_ ? -1 : 1; - datum_utils_ = &datum_utils; - nested_offset_ = nested_offset; - is_normal_cg_ = is_normal_cg; + is_reverse_scan_ = is_reverse_scan; + iter_step_ = is_reverse_scan_ ? -1 : 1; + datum_utils_ = datum_utils; + allocator_ = allocator; is_inited_ = true; } return ret; } -int ObIndexBlockRowScanner::open( - const MacroBlockId ¯o_id, - const ObMicroBlockData &idx_block_data, - const ObDatumRowkey &rowkey, - const int64_t range_idx, - const ObMicroIndexInfo *idx_info) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K(ret)); - } else if (OB_UNLIKELY(!macro_id.is_valid() || !idx_block_data.is_valid() || !rowkey.is_valid() - || !idx_block_data.is_index_block() || (is_normal_cg_ && nullptr == idx_info))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument to open an index micro block", K(ret), - K(macro_id), K(idx_block_data), K(rowkey), K_(is_normal_cg), KP(idx_info)); - } else if (OB_FAIL(init_by_micro_data(idx_block_data))) { - LOG_WARN("Fail to init scanner by micro data", K(ret), K(idx_block_data)); - } else if (is_normal_cg_ && !idx_info->is_root() && idx_info->is_macro_node()) { - // Rowkey offset in macro node is local - ObStorageDatum offset; - ObDatumRowkey offset_rowkey; - offset.set_int(rowkey.datums_[0].get_int() - idx_info->get_row_range().start_row_id_); - offset_rowkey.assign(&offset, 1); - if (OB_FAIL(locate_key(offset_rowkey))) { - LOG_WARN("Fail to locate rowkey", K(ret), K(idx_block_data), K(offset_rowkey)); - } - } else if (OB_FAIL(locate_key(rowkey))) { - LOG_WARN("Fail to locate rowkey", K(ret), K(idx_block_data), K(rowkey)); - } - if (OB_SUCC(ret)) { - macro_id_ = macro_id; - range_idx_ = range_idx; - rowkey_ = &rowkey; - is_get_ = true; - if (nullptr != idx_info) { - parent_row_range_ = idx_info->get_row_range(); - } else { - parent_row_range_.reset(); - } - } - return ret; -} -int ObIndexBlockRowScanner::open( - const MacroBlockId ¯o_id, - const ObMicroBlockData &idx_block_data, - const ObRowsInfo *rows_info, - const int64_t rowkey_begin_idx, - const int64_t rowkey_end_idx) -{ - int ret = OB_SUCCESS; - int64_t row_count = 0; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K(ret)); - } else if (OB_UNLIKELY(!macro_id.is_valid() || !idx_block_data.is_valid() || nullptr == rows_info)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument to open an index micro block", K(ret), K(macro_id), K(idx_block_data), - KP(rows_info)); - } else if (OB_FAIL(init_by_micro_data(idx_block_data))) { - LOG_WARN("Fail to init scanner by micro data", K(ret), K(idx_block_data)); - } else if (IndexFormat::TRANSFORMED == index_format_) { - row_count = idx_data_header_->row_cnt_; - } else if (IndexFormat::BLOCK_TREE == index_format_) { - row_count = block_meta_tree_->get_rowkey_count(); - } else if (IndexFormat::RAW_DATA == index_format_) { - if (OB_FAIL(micro_reader_->get_row_count(row_count))) { - LOG_WARN("Failed to get row count", K(ret), K(idx_block_data)); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected index format", K_(index_format)); - } - - if (OB_SUCC(ret)) { - start_ = 0; - end_ = row_count - 1; - macro_id_ = macro_id; - rows_info_ = rows_info; - rowkey_begin_idx_ = rowkey_begin_idx; - rowkey_end_idx_ = rowkey_end_idx; - current_ = 0; - is_get_ = false; - } - return ret; -} - -int ObIndexBlockRowScanner::open( - const MacroBlockId ¯o_id, - const ObMicroBlockData &idx_block_data, - const ObDatumRange &range, - const int64_t range_idx, - const bool is_left_border, - const bool is_right_border, - const ObMicroIndexInfo *idx_info) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K(ret)); - } else if (OB_UNLIKELY(!macro_id.is_valid() || !idx_block_data.is_valid() || !range.is_valid() - || !idx_block_data.is_index_block() || (is_normal_cg_ && nullptr == idx_info))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument to open an index micro block", K(ret), K(idx_block_data), K(range), K_(is_normal_cg), KP(idx_info)); - } else if (OB_FAIL(init_by_micro_data(idx_block_data))) { - LOG_WARN("Fail to init scanner by micro data", K(ret), K(idx_block_data)); - } else if (OB_FAIL(locate_range(range, is_left_border, is_right_border))) { - if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { - LOG_WARN("Fail to locate range", K(ret), K(range), K(is_left_border), K(is_right_border)); - } - } else { - macro_id_ = macro_id; - is_left_border_ = is_left_border; - is_right_border_ = is_right_border; - range_idx_ = range_idx; - is_get_ = false; - if (nullptr != idx_info) { - parent_row_range_ = idx_info->get_row_range(); - filter_constant_type_ = idx_info->get_filter_constant_type(); - } else { - parent_row_range_.reset(); - } - } - return ret; -} - -int ObIndexBlockRowScanner::get_next( - ObMicroIndexInfo &idx_block_row, - const bool is_multi_check) -{ - int ret = OB_SUCCESS; - idx_block_row.reset(); - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K(ret)); - } else if (end_of_block()) { - ret = OB_ITER_END; - } else if (is_multi_check && OB_FAIL(skip_to_next_valid_position(idx_block_row))) { - if (OB_UNLIKELY(OB_ITER_END != ret)) { - LOG_WARN("Failed to skip to next valid position", K(ret)); - } else { - current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - } - } else if (OB_FAIL(get_next_idx_row(idx_block_row))) { - LOG_WARN("Failed to get next idx row", K(ret), K(is_multi_check)); - } - return ret; -} - -int ObIndexBlockRowScanner::get_cur_row_id_range(ObCSRange &cs_range) -{ - int ret = OB_SUCCESS; - const ObIndexBlockRowHeader *idx_row_header = nullptr; - const ObDatumRowkey *endkey = nullptr; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K_(is_inited)); - } else if (end_of_block()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected end of index block scanner", KPC(this)); - } else if (OB_FAIL(read_curr_idx_row(idx_row_header, endkey))) { - LOG_WARN("Fail to read currend index row", K(ret), K(index_format_), K_(current)); - } else if (OB_ISNULL(idx_row_header)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null index block row header", K(ret)); - } else { - cs_range.start_row_id_ = idx_row_parser_.get_row_offset() - idx_row_header->get_row_count() + 1; - cs_range.end_row_id_ = idx_row_parser_.get_row_offset(); - if (idx_row_header->is_data_block()) { - cs_range.start_row_id_ += parent_row_range_.start_row_id_; - cs_range.end_row_id_ += parent_row_range_.start_row_id_; - } - LOG_DEBUG("ObIndexBlockRowScanner::get_cur_row_id_range", - K(cs_range), K_(parent_row_range), K_(current), K_(start), K_(end)); - } - return ret; -} - -bool ObIndexBlockRowScanner::end_of_block() const -{ - return current_ < start_ - || current_ > end_ - || current_ == ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; -} - -int ObIndexBlockRowScanner::get_index_row_count(int64_t &index_row_count) const -{ - int ret = OB_SUCCESS; - index_row_count = 0; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K(ret)); - } else if (start_ < 0 || end_ < 0) { - index_row_count = 0; - } else { - index_row_count = end_ - start_ + 1; - } - return ret; -} - -int ObIndexBlockRowScanner::check_blockscan( - const ObDatumRowkey &rowkey, - bool &can_blockscan) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not init", K(ret)); - } else if (IndexFormat::BLOCK_TREE == index_format_) { - can_blockscan = false; - } else if (is_reverse_scan_) { - if (rowkey.is_min_rowkey()) { - can_blockscan = true; - } else { - // TODO(yuanzhe) opt this - can_blockscan = false; - } - } else if (rowkey.is_max_rowkey()) { - can_blockscan = true; - } else { - int cmp_ret = 0; - if (IndexFormat::RAW_DATA == index_format_) { - ObDatumRowkey last_endkey; - ObDatumRow tmp_datum_row; // Normally will use local datum buf, won't allocate memory - const int64_t request_cnt = datum_utils_->get_rowkey_count() + 1; - if (OB_FAIL(tmp_datum_row.init(request_cnt))) { - LOG_WARN("Fail to init tmp_datum_row", K(ret)); - } else if (OB_FAIL(micro_reader_->get_row(end_, tmp_datum_row))) { - LOG_WARN("Fail to get last row of micro block", K(ret), K_(end)); - } else if (OB_FAIL(last_endkey.assign(tmp_datum_row.storage_datums_, datum_utils_->get_rowkey_count()))) { - LOG_WARN("Fail to assign storage datum to endkey", K(ret), K(tmp_datum_row)); - } else if (OB_FAIL(last_endkey.compare(rowkey, *datum_utils_, cmp_ret, false))) { - LOG_WARN("Fail to compare rowkey", K(ret), K(last_endkey), K(rowkey)); - } - } else if (OB_FAIL((idx_data_header_->rowkey_array_ + end_)->compare(rowkey, *datum_utils_, cmp_ret, false))) { - LOG_WARN("Fail to compare rowkey", K(ret), K(rowkey)); - } - - if (OB_FAIL(ret)) { - } else if (cmp_ret < 0) { - can_blockscan = true; - } else { - can_blockscan = false; - } - } - return ret; -} - -int ObIndexBlockRowScanner::init_by_micro_data(const ObMicroBlockData &idx_block_data) -{ - int ret = OB_SUCCESS; - if (ObMicroBlockData::INDEX_BLOCK == idx_block_data.type_) { - if (nullptr == idx_block_data.get_extra_buf()) { - if (OB_FAIL(micro_reader_helper_.get_reader(idx_block_data.get_store_type(), micro_reader_))) { - LOG_WARN("Fail to get micro block reader", K(ret), - K(idx_block_data), K(idx_block_data.get_store_type())); - } else if (OB_FAIL(micro_reader_->init(idx_block_data, datum_utils_))) { - LOG_WARN("Fail to init micro reader", K(ret), K(idx_block_data)); - } else if (OB_FAIL(init_datum_row())) { - LOG_WARN("Fail to init datum row", K(ret)); - } else { - index_format_ = IndexFormat::RAW_DATA; - idx_data_header_ = nullptr; - } - } else { - idx_data_header_ = reinterpret_cast(idx_block_data.get_extra_buf()); - if (OB_UNLIKELY(!idx_data_header_->is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Invalid index block data header", K(ret), KPC(idx_data_header_)); - } else { - index_format_ = IndexFormat::TRANSFORMED; - } - } - } else if (ObMicroBlockData::DDL_BLOCK_TREE == idx_block_data.type_) { - block_meta_tree_ = reinterpret_cast(const_cast(idx_block_data.buf_)); - index_format_ = IndexFormat::BLOCK_TREE; - } - return ret; -} - -int ObIndexBlockRowScanner::locate_key(const ObDatumRowkey &rowkey) +int ObRAWIndexBlockRowIterator::locate_key(const ObDatumRowkey &rowkey) { int ret = OB_SUCCESS; int64_t begin_idx = -1; int64_t end_idx = -1; - if (IndexFormat::RAW_DATA == index_format_) { - ObDatumRange range; + ObDatumRange range; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid() || OB_ISNULL(micro_reader_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey), KP(micro_reader_)); + } else { range.set_start_key(rowkey); range.end_key_.set_max_rowkey(); range.set_left_closed(); @@ -682,11 +458,296 @@ int ObIndexBlockRowScanner::locate_key(const ObDatumRowkey &rowkey) LOG_WARN("Fail to locate range in micro data", K(ret)); } else { current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - ret = OB_SUCCESS; // return OB_ITER_END on get_next() for get } } LOG_TRACE("Binary search rowkey with micro reader", K(ret), K(range), K(begin_idx), K(rowkey)); - } else if (IndexFormat::TRANSFORMED == index_format_) { + } + if (OB_SUCC(ret)) { + current_ = begin_idx; + start_ = begin_idx; + end_ = begin_idx; + } + return ret; +} + +int ObRAWIndexBlockRowIterator::locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) +{ + int ret = OB_SUCCESS; + int64_t begin_idx = -1; + int64_t end_idx = -1; + current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!range.is_valid() || OB_ISNULL(micro_reader_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid range", K(ret), K(range), KP(micro_reader_)); + } else if (OB_FAIL(micro_reader_->locate_range( + range, is_left_border, is_right_border, begin_idx, end_idx, true))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate range with micro reader", K(ret)); + } + } else { + LOG_TRACE("Binary search range with micro reader", K(ret), K(range), K(begin_idx), K(end_idx)); + } + + if (OB_SUCC(ret)) { + start_ = begin_idx; + end_ = end_idx; + current_ = is_reverse_scan_ ? end_idx : begin_idx; + } + return ret; +} + +int ObRAWIndexBlockRowIterator::check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) +{ + int ret = OB_SUCCESS; + int cmp_ret = 0; + ObDatumRowkey last_endkey; + ObDatumRow tmp_datum_row; // Normally will use local datum buf, won't allocate memory + const int64_t request_cnt = datum_utils_->get_rowkey_count() + 1; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey)); + } else if (OB_FAIL(tmp_datum_row.init(request_cnt))) { + LOG_WARN("Fail to init tmp_datum_row", K(ret)); + } else if (OB_FAIL(micro_reader_->get_row(end_, tmp_datum_row))) { + LOG_WARN("Fail to get last row of micro block", K(ret), K_(end)); + } else if (OB_FAIL(last_endkey.assign(tmp_datum_row.storage_datums_, datum_utils_->get_rowkey_count()))) { + LOG_WARN("Fail to assign storage datum to endkey", K(ret), K(tmp_datum_row)); + } else if (OB_FAIL(last_endkey.compare(rowkey, *datum_utils_, cmp_ret, false))) { + LOG_WARN("Fail to compare rowkey", K(ret), K(last_endkey), K(rowkey)); + } else { + can_blockscan = cmp_ret < 0; + } + return ret; +} + +bool ObRAWIndexBlockRowIterator::end_of_block() const +{ + return current_ < start_ + || current_ > end_ + || current_ == ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; +} + +int ObRAWIndexBlockRowIterator::get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + const int64_t rowkey_column_count = datum_utils_->get_rowkey_count(); + idx_row_parser_.reset(); + endkey_.reset(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_ISNULL(datum_row_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null pointer to index row", K(ret)); + } else if (OB_FAIL(micro_reader_->get_row(current_, *datum_row_))) { + LOG_WARN("Fail to read index row from block", K(ret), K(current_)); + } else if (OB_FAIL(idx_row_parser_.init(rowkey_column_count, *datum_row_))) { + LOG_WARN("Fail to parser index block row", K(ret), KPC(datum_row_), K(rowkey_column_count)); + } else if (OB_FAIL(idx_row_parser_.get_header(idx_row_header))) { + LOG_WARN("Fail to get index block row header", K(ret)); + } else if (OB_FAIL(endkey_.assign(datum_row_->storage_datums_, rowkey_column_count))) { + LOG_WARN("Fail to assign storage datum to endkey", K(ret), KPC(datum_row_), K(rowkey_column_count)); + } else { + endkey = &endkey_; + } + return ret; +} + +int ObRAWIndexBlockRowIterator::get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + idx_minor_info = nullptr; + agg_row_buf = nullptr; + agg_buf_size = 0; + row_offset = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_FAIL(get_current(idx_row_header, endkey))) { + LOG_WARN("read cur idx row failed", K(ret), KPC(idx_row_header), KPC(endkey)); + } else if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KP(idx_row_header), KP(endkey)); + } else if (idx_row_header->is_data_index() && !idx_row_header->is_major_node()) { + if (OB_FAIL(idx_row_parser_.get_minor_meta(idx_minor_info))) { + LOG_WARN("Fail to get minor meta info", K(ret)); + } + } else if (!idx_row_header->is_major_node() || !idx_row_header->is_pre_aggregated()) { + // Do not have aggregate data + } else if (OB_FAIL(idx_row_parser_.get_agg_row(agg_row_buf, agg_buf_size))) { + LOG_WARN("Fail to get aggregate", K(ret)); + } + if (OB_SUCC(ret)) { + row_offset = idx_row_parser_.get_row_offset(); + is_scan_left_border = current_ == start_; + is_scan_right_border = current_ == end_; + current_ += iter_step_; + } + return ret; +} + +int ObRAWIndexBlockRowIterator::init_datum_row(const ObStorageDatumUtils &datum_utils, ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + if (nullptr != datum_row_ && datum_row_->is_valid()) { + // row allocated + } else if (nullptr != datum_row_) { + if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator is null", K(ret), KP(allocator)); + } else { + datum_row_->~ObDatumRow(); + allocator->free(datum_row_); + datum_row_ = nullptr; + } + } + + if (OB_SUCC(ret)) { + if (nullptr == datum_row_) { + const int64_t request_cnt = datum_utils.get_rowkey_count() + 1; + void *buf = nullptr; + if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator is null", K(ret), KP(allocator)); + } else if (OB_ISNULL(buf = allocator->alloc(sizeof(ObDatumRow)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Fail to allocate memory for datum row", K(ret)); + } else if (FALSE_IT(datum_row_ = new (buf) ObDatumRow())) { + } else if (OB_FAIL(datum_row_->init(*allocator, request_cnt))) { + LOG_WARN("Fail to init datum row", K(ret), K(request_cnt)); + } + + if (OB_FAIL(ret) && nullptr != buf) { + if (OB_NOT_NULL(datum_row_)) { + datum_row_->~ObDatumRow(); + } + allocator->free(buf); + datum_row_ = nullptr; + } + } + } + return ret; +} + +bool ObRAWIndexBlockRowIterator::is_in_border(bool is_reverse_scan, bool is_left_border, bool is_right_border) +{ + bool in_border = false; + if (!is_reverse_scan) { + in_border = is_right_border && current_ == end_; + } else { + in_border = is_left_border && current_ == start_; + } + return in_border; +} + +int ObRAWIndexBlockRowIterator::get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) +{ + int ret = OB_SUCCESS; + index_row_count = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else { + if (start_ < 0 || end_ < 0) { + index_row_count = 0; + } else { + index_row_count = end_ - start_ + 1; + } + } + return ret; +} + +/****************** ObTFMIndexBlockRowIterator **********************/ +ObTFMIndexBlockRowIterator::ObTFMIndexBlockRowIterator() + : idx_data_header_(nullptr) +{ + +} + +ObTFMIndexBlockRowIterator::~ObTFMIndexBlockRowIterator() +{ + reset(); +} + +void ObTFMIndexBlockRowIterator::reset() +{ + ObRAWIndexBlockRowIterator::reset(); + idx_data_header_ = nullptr; +} + +void ObTFMIndexBlockRowIterator::reuse() +{ + ObRAWIndexBlockRowIterator::reuse(); + idx_data_header_ = nullptr; +} + +int ObTFMIndexBlockRowIterator::init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) +{ + int ret = OB_SUCCESS; + idx_data_header_ = reinterpret_cast(idx_block_data.get_extra_buf()); + if (OB_ISNULL(allocator) || OB_ISNULL(datum_utils) || !datum_utils->is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(allocator), KPC(datum_utils)); + } else if (!micro_reader_helper_.is_inited() && OB_FAIL(micro_reader_helper_.init(*allocator_))) { + LOG_WARN("Fail to init micro reader helper", K(ret)); + } else if (OB_UNLIKELY(!idx_data_header_->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid index block data header", K(ret), KPC(idx_data_header_)); + } else { + is_reverse_scan_ = is_reverse_scan; + iter_step_ = is_reverse_scan_ ? -1 : 1; + datum_utils_ = datum_utils; + if (set_iter_end) { + current_ = 0; + start_ = 0; + end_ = idx_data_header_->row_cnt_ - 1; + } + is_inited_ = true; + } + return ret; +} + +int ObTFMIndexBlockRowIterator::locate_key(const ObDatumRowkey &rowkey) +{ + int ret = OB_SUCCESS; + int64_t begin_idx = -1; + int64_t end_idx = -1; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid() || OB_ISNULL(idx_data_header_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey), KP(idx_data_header_)); + } else { ObDatumComparor cmp(*datum_utils_, ret); const ObDatumRowkey *first = idx_data_header_->rowkey_array_; const ObDatumRowkey *last = idx_data_header_->rowkey_array_ + idx_data_header_->row_cnt_; @@ -700,31 +761,6 @@ int ObIndexBlockRowScanner::locate_key(const ObDatumRowkey &rowkey) } LOG_TRACE("Binary search rowkey in transformed block", K(ret), KP(found), KPC(first), KP(last), K(current_), K(rowkey), KPC(idx_data_header_)); - } else if (IndexFormat::BLOCK_TREE == index_format_) { - ObDatumRange range; - range.set_start_key(rowkey); - range.set_end_key(rowkey); - range.set_left_closed(); - range.set_right_closed(); - if (OB_ISNULL(block_meta_tree_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("block meta tree is null", K(ret)); - } else if (OB_FAIL(block_meta_tree_->locate_range(range, - *datum_utils_, - true,// is_left_border - true,// is_right_border - begin_idx, - end_idx))) { - if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { - LOG_WARN("locate rowkey failed", K(ret), K(range)); - } else { - current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - ret = OB_SUCCESS; // return OB_ITER_END on get_next() for get - } - } - } else { - ret = OB_NOT_SUPPORTED; - LOG_WARN("not supported index format", K(ret), K(index_format_)); } if (OB_SUCC(ret)) { @@ -735,21 +771,27 @@ int ObIndexBlockRowScanner::locate_key(const ObDatumRowkey &rowkey) return ret; } -int ObIndexBlockRowScanner::locate_range( - const ObDatumRange &range, - const bool is_left_border, - const bool is_right_border) +int ObTFMIndexBlockRowIterator::locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) { int ret = OB_SUCCESS; int64_t begin_idx = -1; int64_t end_idx = -1; current_ = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - if (IndexFormat::TRANSFORMED == index_format_) { - bool is_begin_equal = false; - ObDatumComparor lower_bound_cmp(*datum_utils_, ret); - ObDatumComparor upper_bound_cmp(*datum_utils_, ret, false, false); - const ObDatumRowkey *first = idx_data_header_->rowkey_array_; - const ObDatumRowkey *last = idx_data_header_->rowkey_array_ + idx_data_header_->row_cnt_; + bool is_begin_equal = false; + ObDatumComparor lower_bound_cmp(*datum_utils_, ret); + ObDatumComparor upper_bound_cmp(*datum_utils_, ret, false, false); + const ObDatumRowkey *first = idx_data_header_->rowkey_array_; + const ObDatumRowkey *last = idx_data_header_->rowkey_array_ + idx_data_header_->row_cnt_; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!range.is_valid() || OB_ISNULL(idx_data_header_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid range", K(ret), K(range), KP(idx_data_header_)); + } else { if (!is_left_border || range.get_start_key().is_min_rowkey()) { begin_idx = 0; } else { @@ -783,7 +825,7 @@ int ObIndexBlockRowScanner::locate_range( const ObDatumRowkey *end_found = nullptr; // TODO remove is_normal_cg_, use flag in header // no need to use upper_bound for column store - if (!is_normal_cg_ && range.get_border_flag().inclusive_end()) { + if (!is_normal_cg && range.get_border_flag().inclusive_end()) { end_found = std::upper_bound(first, last, range.get_end_key(), upper_bound_cmp); } else { end_found = std::lower_bound(first, last, range.get_end_key(), lower_bound_cmp); @@ -805,41 +847,811 @@ int ObIndexBlockRowScanner::locate_range( } LOG_TRACE("Locate range in index block by range", K(ret), K(range), K(begin_idx), K(end_idx), K(is_left_border), K(is_right_border), K_(current), KPC(idx_data_header_)); - } else if (IndexFormat::RAW_DATA == index_format_) { - if (OB_FAIL(micro_reader_->locate_range( - range, is_left_border, is_right_border, begin_idx, end_idx, true))) { - if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { - LOG_WARN("Fail to locate range with micro reader", K(ret)); - } - } else { - LOG_TRACE("Binary search range with micro reader", K(ret), K(range), K(begin_idx), K(end_idx)); - } - } else if (IndexFormat::BLOCK_TREE == index_format_) { - if (OB_ISNULL(block_meta_tree_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("block meta tree is null", K(ret)); - } else if (OB_FAIL(block_meta_tree_->locate_range(range, - *datum_utils_, - is_left_border, - is_right_border, - begin_idx, - end_idx))) { - if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { - LOG_WARN("locate rowkey failed", K(ret), K(range)); - } - } - } else { - ret = OB_NOT_SUPPORTED; - LOG_WARN("not supported index format", K(ret), K(index_format_)); } if (OB_SUCC(ret)) { start_ = begin_idx; end_ = end_idx; - current_ = is_reverse_scan_ ? end_ : start_; + current_ = is_reverse_scan_ ? end_idx : begin_idx; + } + return ret; +} + + +int ObTFMIndexBlockRowIterator::check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) +{ + int ret = OB_SUCCESS; + int cmp_ret = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!rowkey.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey)); + } else if (OB_FAIL((idx_data_header_->rowkey_array_ + end_)->compare(rowkey, *datum_utils_, cmp_ret, false))) { + LOG_WARN("Fail to compare rowkey", K(ret), K(rowkey)); + } else { + can_blockscan = cmp_ret < 0; + } + return ret; +} + +int ObTFMIndexBlockRowIterator::get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + const int64_t rowkey_column_count = datum_utils_->get_rowkey_count(); + idx_row_parser_.reset(); + const char *idx_data_buf = nullptr; + int64_t idx_data_len = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_FAIL(idx_data_header_->get_index_data(current_, idx_data_buf, idx_data_len))) { + LOG_WARN("Fail to get index data", K(ret), K_(current), KPC_(idx_data_header)); + } else if (OB_FAIL(idx_row_parser_.init(idx_data_buf, idx_data_len))) { + LOG_WARN("Fail to parse index block row", K(ret), K_(current), KPC(idx_data_header_)); + } else if (OB_FAIL(idx_row_parser_.get_header(idx_row_header))) { + LOG_WARN("Fail to get index block row header", K(ret)); + } else { + endkey = &idx_data_header_->rowkey_array_[current_]; + } + return ret; +} + +int ObTFMIndexBlockRowIterator::get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + endkey = nullptr; + idx_minor_info = nullptr; + agg_row_buf = nullptr; + agg_buf_size = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_FAIL(get_current(idx_row_header, endkey))) { + LOG_WARN("read cur idx row failed", K(ret), KPC(idx_row_header), KPC(endkey)); + } else if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KP(idx_row_header), KP(endkey)); + } else if (idx_row_header->is_data_index() && !idx_row_header->is_major_node()) { + if (OB_FAIL(idx_row_parser_.get_minor_meta(idx_minor_info))) { + LOG_WARN("Fail to get minor meta info", K(ret)); + } + } else if (!idx_row_header->is_major_node() || !idx_row_header->is_pre_aggregated()) { + // Do not have aggregate data + } else if (OB_FAIL(idx_row_parser_.get_agg_row(agg_row_buf, agg_buf_size))) { + LOG_WARN("Fail to get aggregate", K(ret)); + } + if (OB_SUCC(ret)) { + row_offset = idx_row_parser_.get_row_offset(); + is_scan_left_border = current_ == start_; + is_scan_right_border = current_ == end_; + current_ += iter_step_; + } + return ret; +} + +int ObTFMIndexBlockRowIterator::get_idx_row_header_in_target_idx(const int64_t idx, + const ObIndexBlockRowHeader *&idx_row_header) +{ + int ret = OB_SUCCESS; + idx_row_header = nullptr; + idx_row_parser_.reset(); + const char *idx_data_buf = nullptr; + int64_t idx_data_len = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_FAIL(idx_data_header_->get_index_data(idx, idx_data_buf, idx_data_len))) { + LOG_WARN("Fail to get index data", K(idx), K_(start), K_(end), K_(current), KPC_(idx_data_header)); + } else if (OB_FAIL(idx_row_parser_.init(idx_data_buf, idx_data_len))) { + LOG_WARN("Fail to parse index block row", K(idx), KPC(idx_data_header_)); + } else if (OB_FAIL(idx_row_parser_.get_header(idx_row_header))) { + LOG_WARN("Fail to get index block row header", KPC(idx_row_header)); + } + return ret; +} + +int ObTFMIndexBlockRowIterator::find_out_rows(const int32_t range_idx, + const int64_t scanner_range_idx, + int64_t &found_idx) +{ + int ret = OB_SUCCESS; + found_idx = -1; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (range_idx == scanner_range_idx && !end_of_block()) { + const int64_t start_idx = current_; + const int64_t end_idx = is_reverse_scan_ ? start_ : end_; + for (int64_t i = start_idx; OB_SUCC(ret) && (i * iter_step_) <= (end_idx * iter_step_); i += iter_step_) { + const ObIndexBlockRowHeader *idx_row_header = nullptr; + if (OB_FAIL(get_idx_row_header_in_target_idx(i, idx_row_header))) { + LOG_WARN("Failed to get idx row header", K(i)); + } else if (idx_row_header->has_lob_out_row()) { + found_idx = i; + break; + } + } + } + LOG_DEBUG("ObTFMIndexBlockRowIterator::find_out_rows", K(range_idx), KPC(this)); + return ret; +} + +int ObTFMIndexBlockRowIterator::find_out_rows_from_start_to_end(const int32_t range_idx, + const int64_t scanner_range_idx, + const ObCSRowId start_row_id, + const ObCSRange &parent_row_range, + bool &is_certain, + int64_t &found_idx) +{ + int ret = OB_SUCCESS; + found_idx = -1; + is_certain = true; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (range_idx == scanner_range_idx) { + const int64_t start_idx = is_reverse_scan_ ? end_ : start_; + const int64_t end_idx = is_reverse_scan_ ? start_ : end_; + bool meet_start_row_id = false; + for (int64_t i = start_idx; OB_SUCC(ret) && (i * iter_step_) <= (end_idx * iter_step_); i += iter_step_) { + const ObIndexBlockRowHeader *idx_row_header = nullptr; + if (OB_FAIL(get_idx_row_header_in_target_idx(i, idx_row_header))) { + LOG_WARN("Failed to get idx row header", K(i)); + } + if (OB_SUCC(ret)) { + if (!meet_start_row_id) { + ObCSRowId cur_start_row_id = idx_row_parser_.get_row_offset() - idx_row_header->get_row_count() + 1; + ObCSRowId cur_end_row_id = idx_row_parser_.get_row_offset(); + if (idx_row_header->is_data_block()) { + cur_start_row_id += parent_row_range.start_row_id_; + cur_end_row_id += parent_row_range.start_row_id_; + } + meet_start_row_id = (start_row_id >= cur_start_row_id && start_row_id <= cur_end_row_id); + } + if (meet_start_row_id && idx_row_header->has_lob_out_row()) { + if ((i * iter_step_) >= (current_ * iter_step_)) { + found_idx = i; + } else { + is_certain = false; + } + break; + } + } + } + } else { + is_certain = false; + } + return ret; +} + +int ObTFMIndexBlockRowIterator::advance_to_border(const ObDatumRowkey &rowkey, + const bool is_left_border, + const bool is_right_border, + const ObCSRange &parent_row_range, + ObCSRange &cs_range) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (OB_UNLIKELY(end_of_block())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected error", K(ret), K(end_of_block())); + } else if (OB_UNLIKELY(!rowkey.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey", K(ret), K(rowkey)); + } else { + const int64_t limit_idx = is_reverse_scan_ ? start_ : end_; + const bool is_range_end = is_reverse_scan_ ? is_left_border : is_right_border; + ObDatumComparor lower_bound_cmp(*datum_utils_, ret); + const ObDatumRowkey *first = nullptr; + const ObDatumRowkey *last = nullptr; + if (!is_reverse_scan_) { + first = idx_data_header_->rowkey_array_ + current_; + last = idx_data_header_->rowkey_array_ + limit_idx + 1; + } else { + first =idx_data_header_->rowkey_array_ + limit_idx; + last = idx_data_header_->rowkey_array_ + current_ + 1; + } + const ObDatumRowkey *start_found = std::lower_bound(first, last, rowkey, lower_bound_cmp); + if (OB_FAIL(ret)) { + LOG_WARN("Failed to get rowkey lower bound", K(ret), K(rowkey), KPC(this)); + } else if (!is_reverse_scan_) { + // found_pos is safe to skip(end_key < border_rowkey). + int64_t found_pos = start_found - idx_data_header_->rowkey_array_ - 1; + if (is_range_end && found_pos == limit_idx) { + // if is_range_end is true, we cannot skip all rowids because only subset of rowids statisy query range. + found_pos--; + } + LOG_DEBUG("ObTFMIndexBlockRowIterator::advance_to_border", K(found_pos), K(is_range_end), + KPC(this), K(limit_idx), K(is_range_end)); + if (found_pos >= current_) { + current_ = found_pos + 1; + if (OB_FAIL(get_cur_row_id_range(parent_row_range, cs_range))) { + LOG_WARN("Failed to get cur row id range", K(ret), K(rowkey), KPC(this), + K(limit_idx), K(is_range_end)); + } + } + } else { + // found_pos is safe to skip. + int64_t found_pos = start_found - idx_data_header_->rowkey_array_ + 1; + // found_pos != start_, there is no need to check is_range_end. + if (found_pos <= current_ + 1) { + current_ = found_pos - 1; + if (OB_FAIL(get_cur_row_id_range(parent_row_range, cs_range))) { + LOG_WARN("Failed to get cur row id range", K(ret), K(rowkey), KPC(this), + K(limit_idx), K(is_range_end)); + } + } + } + LOG_DEBUG("ObTFMIndexBlockRowIterator::advance_to_border", K(limit_idx), K(is_range_end), KPC(this)); + } + return ret; +} + +int ObTFMIndexBlockRowIterator::get_cur_row_id_range(const ObCSRange &parent_row_range, + ObCSRange &cs_range) +{ + int ret = OB_SUCCESS; + const ObIndexBlockRowHeader *idx_row_header = nullptr; + const ObDatumRowkey *endkey = nullptr; + bool is_scan_left_border = false; + bool is_scan_right_border = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else if (end_of_block()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected end of index block scanner", KPC(this)); + } else if (OB_FAIL(get_current(idx_row_header, endkey))) { + LOG_WARN("get next idx block row failed", K(ret), KP(idx_row_header), KPC(endkey), K(is_reverse_scan_)); + } else if (OB_ISNULL(idx_row_header)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header", K(ret)); + } else { + cs_range.start_row_id_ = idx_row_parser_.get_row_offset() - idx_row_header->get_row_count() + 1; + cs_range.end_row_id_ = idx_row_parser_.get_row_offset(); + if (idx_row_header->is_data_block()) { + cs_range.start_row_id_ += parent_row_range.start_row_id_; + cs_range.end_row_id_ += parent_row_range.start_row_id_; + } + LOG_DEBUG("ObTFMIndexBlockRowIterator::get_cur_row_id_range", + K(cs_range), K(parent_row_range), KPC(this)); + } + return ret; +} + +int ObTFMIndexBlockRowIterator::skip_to_next_valid_position(ObMicroIndexInfo &idx_block_row, + int64_t &rowkey_begin_idx, + int64_t &rowkey_end_idx, + const ObRowsInfo *&rows_info) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else { + for (; rowkey_begin_idx < rowkey_end_idx; ++rowkey_begin_idx) { + if (!rows_info->is_row_skipped(rowkey_begin_idx)) { + break; + } + } + + if (rowkey_begin_idx == rowkey_end_idx) { + ret = OB_ITER_END; + } else { + const ObDatumRowkey &rowkey = rows_info->get_rowkey(rowkey_begin_idx); + ObDatumComparor cmp(*datum_utils_, ret); + const ObDatumRowkey *first = idx_data_header_->rowkey_array_ + current_; + const ObDatumRowkey *last = idx_data_header_->rowkey_array_ + end_ + 1; + const ObDatumRowkey *found = std::lower_bound(first, last, rowkey, cmp); + if (OB_FAIL(ret)) { + LOG_WARN("Failed to get lower bound of rowkey", K(ret), K(rowkey), KPC(this)); + } else if (found == last) { + ret = OB_ITER_END; + } else { + current_= found - idx_data_header_->rowkey_array_; + idx_block_row.rows_info_ = rows_info; + idx_block_row.rowkey_begin_idx_ = rowkey_begin_idx++; + if (OB_FAIL(find_rowkeys_belong_to_same_idx_row(idx_block_row.rowkey_end_idx_, rowkey_begin_idx, rowkey_end_idx, rows_info))) { + LOG_WARN("Failed to find rowkeys belong to same index row", K(ret), K(rowkey_begin_idx), K(rowkey_end_idx), KPC(rows_info)); + } + } + } + } + return ret; +} + +int ObTFMIndexBlockRowIterator::find_rowkeys_belong_to_same_idx_row(int64_t &rowkey_idx, + int64_t &rowkey_begin_idx, + int64_t &rowkey_end_idx, + const ObRowsInfo *&rows_info) +{ + int ret = OB_SUCCESS; + const ObDatumRowkey *cur_rowkey = idx_data_header_->rowkey_array_ + current_; + bool is_decided = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Iter not opened yet", K(ret), KPC(this)); + } else { + for (; OB_SUCC(ret) && rowkey_begin_idx < rowkey_end_idx; ++rowkey_begin_idx) { + if (rows_info->is_row_skipped(rowkey_begin_idx)) { + continue; + } + const ObDatumRowkey &rowkey = rows_info->get_rowkey(rowkey_begin_idx); + int cmp_ret = 0; + if (OB_FAIL(rowkey.compare(*cur_rowkey, *datum_utils_, cmp_ret))) { + LOG_WARN("Failed to compare rowkey", K(ret), K(rowkey), KPC(cur_rowkey)); + } else if (cmp_ret > 0) { + rowkey_idx = rowkey_begin_idx; + is_decided = true; + break; + } else if (cmp_ret == 0) { + rowkey_idx = rowkey_begin_idx + 1; + is_decided = true; + break; + } + } + if (!is_decided) { + rowkey_idx = rowkey_begin_idx; + } + } + return ret; +} + +/****************** ObIndexBlockRowScanner **********************/ +ObIndexBlockRowScanner::ObIndexBlockRowScanner() + : query_range_(nullptr), agg_projector_(nullptr), agg_column_schema_(nullptr), + macro_id_(), allocator_(nullptr), raw_iter_(nullptr), transformed_iter_(nullptr), + ddl_iter_(nullptr), ddl_merge_iter_(nullptr), iter_(nullptr), datum_utils_(nullptr), + range_idx_(0), nested_offset_(0), rowkey_begin_idx_(0), rowkey_end_idx_(0), + index_format_(ObIndexFormat::INVALID), parent_row_range_(), is_get_(false), is_reverse_scan_(false), + is_left_border_(false), is_right_border_(false), is_inited_(false), + is_normal_cg_(false), is_normal_query_(true), filter_constant_type_(sql::ObBoolMaskType::PROBABILISTIC), + iter_param_() +{} + +ObIndexBlockRowScanner::~ObIndexBlockRowScanner() +{ + reset(); +} + +void ObIndexBlockRowScanner::reuse() +{ + query_range_ = nullptr; + if (OB_NOT_NULL(raw_iter_)) { + raw_iter_->reuse(); + } + if (OB_NOT_NULL(transformed_iter_)) { + transformed_iter_->reuse(); + } + if (OB_NOT_NULL(ddl_iter_)) { + ddl_iter_->reuse(); + } + if (OB_NOT_NULL(ddl_merge_iter_)) { + ddl_merge_iter_->reuse(); + } + is_left_border_ = false; + is_right_border_ = false; + parent_row_range_.reset(); + filter_constant_type_ = sql::ObBoolMaskType::PROBABILISTIC; +} + +void ObIndexBlockRowScanner::reset() +{ + query_range_ = nullptr; + parent_row_range_.reset(); + if (nullptr != raw_iter_) { + raw_iter_->reset(); + if (nullptr != allocator_) { + allocator_->free(raw_iter_); + raw_iter_ = nullptr; + } + } + if (nullptr != transformed_iter_) { + transformed_iter_->reset(); + if (nullptr != allocator_) { + allocator_->free(transformed_iter_); + transformed_iter_ = nullptr; + } + } + if (nullptr != ddl_iter_) { + ddl_iter_->reset(); + if (nullptr != allocator_) { + allocator_->free(ddl_iter_); + ddl_iter_ = nullptr; + } + } + if (nullptr != ddl_merge_iter_) { + ddl_merge_iter_->reset(); + if (nullptr != allocator_) { + allocator_->free(ddl_merge_iter_); + ddl_merge_iter_ = nullptr; + } + } + iter_ = nullptr; + datum_utils_ = nullptr; + range_idx_ = 0; + nested_offset_ = 0; + rowkey_begin_idx_ = 0; + rowkey_end_idx_ = 0; + index_format_ = ObIndexFormat::INVALID; + is_get_ = false; + is_reverse_scan_ = false; + is_left_border_ = false; + is_right_border_ = false; + is_inited_ = false; + is_normal_cg_ = false; + is_normal_query_ = true; + iter_param_.reset(); + allocator_ = nullptr; + filter_constant_type_ = sql::ObBoolMaskType::PROBABILISTIC; +} + +int ObIndexBlockRowScanner::init( + const ObIArray &agg_projector, + const ObIArray &agg_column_schema, + const ObStorageDatumUtils &datum_utils, + ObIAllocator &allocator, + const common::ObQueryFlag &query_flag, + const int64_t nested_offset, + const bool is_normal_cg) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("Already inited", K(ret)); + } else if (OB_UNLIKELY(!datum_utils.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid datum utils", K(ret), K(datum_utils)); + } else if (OB_UNLIKELY(agg_projector.count() != agg_column_schema.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Agg meta count not same", K(ret), K(agg_projector), K(agg_column_schema)); + } else { + agg_projector_ = &agg_projector; + agg_column_schema_ = &agg_column_schema; + allocator_ = &allocator; + is_reverse_scan_ = query_flag.is_reverse_scan(); + datum_utils_ = &datum_utils; + nested_offset_ = nested_offset; + is_normal_cg_ = is_normal_cg; + is_normal_query_ = !query_flag.is_daily_merge() && !query_flag.is_multi_version_minor_merge(); + is_inited_ = true; + } + return ret; +} + +int ObIndexBlockRowScanner::open( + const MacroBlockId ¯o_id, + const ObMicroBlockData &idx_block_data, + const ObDatumRowkey &rowkey, + const int64_t range_idx, + const ObMicroIndexInfo *idx_info) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not inited", K(ret)); + } else if (OB_UNLIKELY(!macro_id.is_valid() || !idx_block_data.is_valid() || !rowkey.is_valid() + || !idx_block_data.is_index_block() || (is_normal_cg_ && nullptr == idx_info))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument to open an index micro block", K(ret), + K(macro_id), K(idx_block_data), K(rowkey), K_(is_normal_cg), KP(idx_info)); + } else if (OB_FAIL(init_by_micro_data(idx_block_data, false/*set iter finish*/))) { + LOG_WARN("Fail to init scanner by micro data", K(ret), K(idx_block_data), K(index_format_)); + } else if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret)); + } else if (is_normal_cg_ && !idx_info->is_root() && idx_info->is_macro_node()) { + // Rowkey offset in macro node is local + ObStorageDatum offset; + ObDatumRowkey offset_rowkey; + offset.set_int(rowkey.datums_[0].get_int() - idx_info->get_row_range().start_row_id_); + offset_rowkey.assign(&offset, 1); + if (OB_FAIL(iter_->locate_key(offset_rowkey))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate rowkey", K(ret), K(idx_block_data), K(offset_rowkey), KPC(iter_)); + } else { + ret = OB_SUCCESS; // return OB_ITER_END on get_next() for get + } + } + } else if (OB_FAIL(iter_->locate_key(rowkey))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate rowkey", K(ret), K(idx_block_data), K(rowkey), KPC(iter_)); + } else { + ret = OB_SUCCESS; // return OB_ITER_END on get_next() for get + } + } + if (OB_SUCC(ret)) { + macro_id_ = macro_id; + range_idx_ = range_idx; + rowkey_ = &rowkey; + is_get_ = true; + if (nullptr != idx_info) { + parent_row_range_ = idx_info->get_row_range(); + } else { + parent_row_range_.reset(); + } + } + return ret; +} + +int ObIndexBlockRowScanner::open( + const MacroBlockId ¯o_id, + const ObMicroBlockData &idx_block_data, + const ObRowsInfo *rows_info, + const int64_t rowkey_begin_idx, + const int64_t rowkey_end_idx) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not inited", K(ret)); + } else if (OB_UNLIKELY(!macro_id.is_valid() || !idx_block_data.is_valid() || nullptr == rows_info)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument to open an index micro block", K(ret), K(macro_id), K(idx_block_data), + KP(rows_info)); + } else if (OB_FAIL(init_by_micro_data(idx_block_data, true/*set iter finish*/))) { + LOG_WARN("Fail to init scanner by micro data", K(ret), K(idx_block_data)); + } else if (OB_ISNULL(iter_) || ObIndexFormat::TRANSFORMED != index_format_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected index format or iter is null", K(index_format_), K(ret), KPC(iter_)); + } else { + macro_id_ = macro_id; + rows_info_ = rows_info; + rowkey_begin_idx_ = rowkey_begin_idx; + rowkey_end_idx_ = rowkey_end_idx; + is_get_ = false; + } + return ret; +} + +int ObIndexBlockRowScanner::open( + const MacroBlockId ¯o_id, + const ObMicroBlockData &idx_block_data, + const ObDatumRange &range, + const int64_t range_idx, + const bool is_left_border, + const bool is_right_border, + const ObMicroIndexInfo *idx_info) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not inited", K(ret)); + } else if (OB_UNLIKELY(!macro_id.is_valid() || !idx_block_data.is_valid() || !range.is_valid() + || !idx_block_data.is_index_block() || (is_normal_cg_ && nullptr == idx_info))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument to open an index micro block", K(ret), K(idx_block_data), K(range), K_(is_normal_cg), KP(idx_info)); + } else if (OB_FAIL(init_by_micro_data(idx_block_data, false/*set iter finish*/))) { + LOG_WARN("Fail to init scanner by micro data", K(ret), K(idx_block_data)); + } else if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret)); + } else if (OB_FAIL(locate_range(range, is_left_border, is_right_border))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate range", K(ret), K(range), K(is_left_border), K(is_right_border)); + } + } else { + macro_id_ = macro_id; + is_left_border_ = is_left_border; + is_right_border_ = is_right_border; + range_idx_ = range_idx; + is_get_ = false; + if (nullptr != idx_info) { + parent_row_range_ = idx_info->get_row_range(); + filter_constant_type_ = idx_info->get_filter_constant_type(); + } else { + parent_row_range_.reset(); + } + } + return ret; +} + +int ObIndexBlockRowScanner::get_next( + ObMicroIndexInfo &idx_block_row, + const bool is_multi_check) +{ + int ret = OB_SUCCESS; + idx_block_row.reset(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not inited", K(ret)); + } else if (end_of_block()) { + ret = OB_ITER_END; + } else if (is_multi_check && OB_FAIL(iter_->skip_to_next_valid_position(idx_block_row, rowkey_begin_idx_, rowkey_end_idx_, rows_info_))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("Failed to skip to next valid position", K(ret), K(rowkey_begin_idx_), K(rowkey_end_idx_), KPC(rows_info_)); + } else if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret)); + } else { + iter_->reuse(); + } + } else if (OB_FAIL(get_next_idx_row(idx_block_row))) { + LOG_WARN("Failed to get next idx row", K(ret), K(is_multi_check)); + } + return ret; +} + +void ObIndexBlockRowScanner::set_iter_param(const blocksstable::ObSSTable *sstable, + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const ObTablet *tablet) +{ + iter_param_.sstable_ = sstable; + iter_param_.ls_id_ = ls_id; + iter_param_.tablet_id_ = tablet_id; + iter_param_.tablet_ = tablet; +} + +bool ObIndexBlockRowScanner::end_of_block() const +{ + int ret = OB_SUCCESS; + bool bret = true; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not inited", K(ret)); + } else if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret)); + } else { + bret = iter_->end_of_block(); + } + return bret; +} + +bool ObIndexBlockRowScanner::is_ddl_merge_type() const +{ + return OB_NOT_NULL(iter_param_.sstable_) && iter_param_.sstable_->is_ddl_merge_sstable(); +} + +int ObIndexBlockRowScanner::get_index_row_count(int64_t &index_row_count) const +{ + int ret = OB_SUCCESS; + index_row_count = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not inited", K(ret)); + } else if (OB_ISNULL(iter_) || OB_ISNULL(range_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret), KP(iter_), KP(range_)); + } else if (OB_FAIL(iter_->get_index_row_count(*range_, is_left_border_, is_right_border_, index_row_count))) { + LOG_WARN("get index row count failed", K(ret), KP(range_)); + } + return ret; +} + +int ObIndexBlockRowScanner::check_blockscan( + const ObDatumRowkey &rowkey, + bool &can_blockscan) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("Not init", K(ret)); + } else if (is_reverse_scan_) { + if (rowkey.is_min_rowkey()) { + can_blockscan = true; + } else { + // TODO(yuanzhe) opt this + can_blockscan = false; + } + } else if (rowkey.is_max_rowkey()) { + can_blockscan = true; + } else { + if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret)); + } else if (OB_FAIL(iter_->check_blockscan(rowkey, can_blockscan))) { + LOG_WARN("fail to check iter can block scan", K(ret), KPC(iter_), K(can_blockscan), K(rowkey)); + } + } + return ret; +} + +int ObIndexBlockRowScanner::init_by_micro_data(const ObMicroBlockData &idx_block_data, bool set_iter_end) +{ + int ret = OB_SUCCESS; + void *iter_buf = nullptr; + if (ObMicroBlockData::INDEX_BLOCK == idx_block_data.type_ || ObMicroBlockData::DDL_MERGE_INDEX_BLOCK == idx_block_data.type_) { + if (ObMicroBlockData::DDL_MERGE_INDEX_BLOCK == idx_block_data.type_ && is_ddl_merge_type() && is_normal_query_) { + if (OB_NOT_NULL(ddl_merge_iter_)) { + iter_ = ddl_merge_iter_; + } else { + if (OB_ISNULL(iter_buf = allocator_->alloc(sizeof(ObDDLMergeBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDDLMergeBlockRowIterator))); + } else if (FALSE_IT(ddl_merge_iter_ = new (iter_buf) ObDDLMergeBlockRowIterator)) { + } else { + iter_ = ddl_merge_iter_; + index_format_ = ObIndexFormat::DDL_MERGE; + } + } + } else { + if (nullptr == idx_block_data.get_extra_buf()) { + if (OB_NOT_NULL(raw_iter_)) { + iter_ = raw_iter_; + } else { + if (OB_ISNULL(iter_buf = allocator_->alloc(sizeof(ObRAWIndexBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObRAWIndexBlockRowIterator))); + } else if (FALSE_IT(raw_iter_ = new (iter_buf) ObRAWIndexBlockRowIterator)) { + } else { + iter_ = raw_iter_; + index_format_ = ObIndexFormat::RAW_DATA; + } + } + } else { + if (OB_NOT_NULL(transformed_iter_)) { + iter_ = transformed_iter_; + } else { + if (OB_ISNULL(iter_buf = allocator_->alloc(sizeof(ObTFMIndexBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObTFMIndexBlockRowIterator))); + } else if (FALSE_IT(transformed_iter_ = new (iter_buf) ObTFMIndexBlockRowIterator)) { + } else { + iter_ = transformed_iter_; + index_format_ = ObIndexFormat::TRANSFORMED; + } + } + } + } + } else if (ObMicroBlockData::DDL_BLOCK_TREE == idx_block_data.type_) { + if (OB_NOT_NULL(ddl_iter_)) { + iter_ = ddl_iter_; + } else { + if (OB_ISNULL(iter_buf = allocator_->alloc(sizeof(ObDDLIndexBlockRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDDLIndexBlockRowIterator))); + } else if (FALSE_IT(ddl_iter_ = new (iter_buf) ObDDLIndexBlockRowIterator)) { + } else { + iter_ = ddl_iter_; + index_format_ = ObIndexFormat::BLOCK_TREE; + } + } + } + if (OB_SUCC(ret)) { + if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret)); + } else if (OB_FAIL(iter_->init(idx_block_data, datum_utils_, allocator_, is_reverse_scan_, set_iter_end, iter_param_))) { + LOG_WARN("fail to init iter", K(ret), K(idx_block_data), KPC(iter_)); + } + } + return ret; +} + +int ObIndexBlockRowScanner::locate_range( + const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null", K(index_format_), K(ret), KPC(iter_)); + } else if (OB_FAIL(iter_->locate_range(range, is_left_border, is_right_border, is_normal_cg_))) { + if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { + LOG_WARN("Fail to locate range", K(ret), K(range), K(is_left_border), K(is_right_border), KPC(iter_)); + } + } else { range_ = ⦥ - LOG_TRACE("Locate range in index block by range", K(ret), K(range), K(begin_idx), K(end_idx), - K(is_left_border), K(is_right_border), K_(current), KPC(idx_data_header_), KP(this)); + LOG_TRACE("Locate range in index block by range", K(ret), K(range), KPC(iter_), + K(is_left_border), K(is_right_border), KP(this)); } return ret; } @@ -850,96 +1662,20 @@ int ObIndexBlockRowScanner::advance_to_border( ObCSRange &cs_range) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(index_format_ != IndexFormat::TRANSFORMED)) { + if (OB_UNLIKELY(index_format_ != ObIndexFormat::TRANSFORMED) || OB_ISNULL(iter_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected error", K(ret), K(index_format_)); + LOG_WARN("Unexpected error", K(ret), K(index_format_), KP(iter_)); } else if (OB_UNLIKELY(end_of_block())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected error", K(ret), K(end_of_block())); } else if (range_idx == range_idx_) { - const int64_t limit_idx = is_reverse_scan_ ? start_ : end_; - if(OB_FAIL(advance_to_border(rowkey, limit_idx, cs_range))) { + if(OB_FAIL(iter_->advance_to_border(rowkey, is_left_border_, is_right_border_, parent_row_range_, cs_range))) { LOG_WARN("Failed to advance to border", K(range_idx)); } } return ret; } -int ObIndexBlockRowScanner::get_idx_row_header_in_target_idx( - const ObIndexBlockRowHeader *&idx_row_header, - const int64_t idx) -{ - int ret = OB_SUCCESS; - idx_row_parser_.reset(); - const char *idx_data_buf = nullptr; - int64_t idx_data_len = 0; - if (OB_FAIL(idx_data_header_->get_index_data(idx, idx_data_buf, idx_data_len))) { - LOG_WARN("Fail to get index data", K(idx), K_(start), K_(end), K_(current), KPC_(idx_data_header)); - } else if (OB_FAIL(idx_row_parser_.init(idx_data_buf, idx_data_len))) { - LOG_WARN("Fail to parse index block row", K(idx), KPC(idx_data_header_)); - } else if (OB_FAIL(idx_row_parser_.get_header(idx_row_header))) { - LOG_WARN("Fail to get index block row header", KPC(idx_row_header)); - } - return ret; -} - -int ObIndexBlockRowScanner::advance_to_border( - const ObDatumRowkey &rowkey, - const int64_t limit_idx, - ObCSRange &cs_range) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(end_of_block())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected error", K(ret), K(end_of_block())); - } else { - const bool is_range_end = is_reverse_scan_ ? is_left_border_ : is_right_border_; - ObDatumComparor lower_bound_cmp(*datum_utils_, ret, false, true, false); - const ObDatumRowkey *first = nullptr; - const ObDatumRowkey *last = nullptr; - if (!is_reverse_scan_) { - first = idx_data_header_->rowkey_array_ + current_; - last = idx_data_header_->rowkey_array_ + limit_idx + 1; - } else { - first = idx_data_header_->rowkey_array_ + limit_idx; - last = idx_data_header_->rowkey_array_ + current_ + 1; - } - const ObDatumRowkey *start_found = std::lower_bound(first, last, rowkey, lower_bound_cmp); - if (OB_FAIL(ret)) { - LOG_WARN("Failed to get rowkey lower bound", K(ret), K(rowkey), KPC(idx_data_header_)); - } else if (!is_reverse_scan_) { - // found_pos is safe to skip(end_key < border_rowkey). - int64_t found_pos = start_found - idx_data_header_->rowkey_array_ - 1; - if (is_range_end && found_pos == limit_idx) { - // if is_range_end is true, we cannot skip all rowids because only subset of rowids statisy query range. - found_pos--; - } - LOG_DEBUG("ObIndexBlockRowScanner::advance_to_border", K(found_pos), K(is_range_end), - K_(current), K_(start), K_(end), K(limit_idx), K(is_range_end)); - if (found_pos >= current_) { - current_ = found_pos + 1; - if (OB_FAIL(get_cur_row_id_range(cs_range))) { - LOG_WARN("Failed to get cur row id range", K(ret), K(rowkey), K(current_), K(start_), K(end_), - K(limit_idx), K(is_range_end), KPC(idx_data_header_)); - } - } - } else { - // found_pos is safe to skip. - int64_t found_pos = start_found - idx_data_header_->rowkey_array_ + 1; - // found_pos != start_, there is no need to check is_range_end. - if (found_pos <= current_ + 1) { - current_ = found_pos - 1; - if (OB_FAIL(get_cur_row_id_range(cs_range))) { - LOG_WARN("Failed to get cur row id range", K(ret), K(rowkey), K(current_), K(start_), - K(end_), K(limit_idx), K(is_range_end)); - } - } - } - } - LOG_DEBUG("ObIndexBlockRowScanner::advance_to_border", K(limit_idx), K(start_), K(end_), K(current_)); - return ret; -} - int ObIndexBlockRowScanner::find_out_rows( const int32_t range_idx, int64_t &found_idx) @@ -949,21 +1685,13 @@ int ObIndexBlockRowScanner::find_out_rows( if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("Not inited", K_(is_inited)); - } else if (range_idx == range_idx_ && !end_of_block()) { - const int64_t start_idx = current_; - const int64_t end_idx = is_reverse_scan_ ? start_ : end_; - for (int64_t i = start_idx; OB_SUCC(ret) && (i * step_) <= (end_idx * step_); i += step_) { - const ObIndexBlockRowHeader *idx_row_header = nullptr; - if (OB_FAIL(get_idx_row_header_in_target_idx(idx_row_header, i))) { - LOG_WARN("Failed to get idx row header", K(i)); - } else if (idx_row_header->has_lob_out_row()) { - found_idx = i; - break; - } - } + } else if (OB_ISNULL(iter_) || OB_UNLIKELY(index_format_ != ObIndexFormat::TRANSFORMED)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null or wrong format", K(index_format_), K(ret)); + } else if (OB_FAIL(iter_->find_out_rows(range_idx, range_idx_, found_idx))) { + LOG_WARN("fail to find out rows", K(ret), K(range_idx), K(range_idx_), K(found_idx)); } - LOG_DEBUG("ObIndexBlockRowScanner::find_out_rows", K(range_idx), K_(range_idx), K_(current), K(start_), - K(end_), K(step_)); + LOG_DEBUG("ObIndexBlockRowScanner::find_out_rows", K(range_idx), KPC(iter_), K(found_idx)); return ret; } @@ -979,50 +1707,22 @@ int ObIndexBlockRowScanner::find_out_rows_from_start_to_end( if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("Not inited", K_(is_inited)); - } else if (range_idx == range_idx_) { - const int64_t start_idx = is_reverse_scan_ ? end_ : start_; - const int64_t end_idx = is_reverse_scan_ ? start_ : end_; - bool meet_start_row_id = false; - for (int64_t i = start_idx; OB_SUCC(ret) && (i * step_) <= (end_idx * step_); i += step_) { - const ObIndexBlockRowHeader *idx_row_header = nullptr; - if (OB_FAIL(get_idx_row_header_in_target_idx(idx_row_header, i))) { - LOG_WARN("Failed to get idx row header", K(i)); - } - if (OB_SUCC(ret)) { - if (!meet_start_row_id) { - ObCSRowId cur_start_row_id = idx_row_parser_.get_row_offset() - idx_row_header->get_row_count() + 1; - ObCSRowId cur_end_row_id = idx_row_parser_.get_row_offset(); - if (idx_row_header->is_data_block()) { - cur_start_row_id += parent_row_range_.start_row_id_; - cur_end_row_id += parent_row_range_.start_row_id_; - } - meet_start_row_id = (start_row_id >= cur_start_row_id && start_row_id <= cur_end_row_id); - } - if (meet_start_row_id && idx_row_header->has_lob_out_row()) { - if ((i * step_) >= (current_ * step_)) { - found_idx = i; - } else { - is_certain = false; - } - break; - } - } - } - } else { - is_certain = false; + } else if (OB_ISNULL(iter_) || OB_UNLIKELY(index_format_ != ObIndexFormat::TRANSFORMED)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter is null or wrong format", KP(iter_), K(index_format_), K(ret)); + } else if (OB_FAIL(iter_->find_out_rows_from_start_to_end(range_idx, range_idx_, start_row_id, parent_row_range_, is_certain, found_idx))) { + LOG_WARN("fail to find out rows from start to end", K(ret), K(range_idx), K(start_row_id), K(parent_row_range_), K(is_certain), K(found_idx)); } return ret; } -bool ObIndexBlockRowScanner::is_in_border() +const ObDatumRowkey &ObIndexBlockRowScanner::get_end_key() const { - bool in_border = false; - if (!is_reverse_scan_) { - in_border = is_right_border_ && current_ == end_; - } else { - in_border = is_left_border_ && current_ == start_; + const ObDatumRowkey *tmp_key = nullptr; + if (OB_NOT_NULL(iter_)) { + iter_->get_end_key(tmp_key); } - return in_border; + return *tmp_key; } void ObIndexBlockRowScanner::switch_context(const ObSSTable &sstable, const ObStorageDatumUtils &datum_utils) @@ -1030,83 +1730,11 @@ void ObIndexBlockRowScanner::switch_context(const ObSSTable &sstable, const ObSt nested_offset_ = sstable.get_macro_offset(); datum_utils_ = &datum_utils; is_normal_cg_ = sstable.is_normal_cg_sstable(); -} - -int ObIndexBlockRowScanner::init_datum_row() -{ int ret = OB_SUCCESS; - if (nullptr != datum_row_ && datum_row_->is_valid()) { - // row allocated - } else if (nullptr != datum_row_) { - datum_row_->~ObDatumRow(); - allocator_->free(datum_row_); - datum_row_ = nullptr; + if (OB_NOT_NULL(iter_)) { + ObStorageDatumUtils *switch_datum_utils = const_cast(datum_utils_); + iter_->switch_context(switch_datum_utils); } - if (nullptr == datum_row_) { - int64_t request_cnt = datum_utils_->get_rowkey_count() + 1; - void *buf = nullptr; - if (OB_ISNULL(buf = allocator_->alloc(sizeof(ObDatumRow)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("Fail to allocate memory for datum row", K(ret)); - } else if (FALSE_IT(datum_row_ = new (buf) ObDatumRow())) { - } else if (OB_FAIL(datum_row_->init(*allocator_, request_cnt))) { - LOG_WARN("Fail to init datum row", K(ret), K(request_cnt)); - } - - if (OB_FAIL(ret) && nullptr != buf) { - allocator_->free(buf); - datum_row_ = nullptr; - } - } - return ret; -} - -int ObIndexBlockRowScanner::read_curr_idx_row(const ObIndexBlockRowHeader *&idx_row_header, const ObDatumRowkey *&endkey) -{ - int ret = OB_SUCCESS; - idx_row_header = nullptr; - const int64_t rowkey_column_count = datum_utils_->get_rowkey_count(); - idx_row_parser_.reset(); - if (IndexFormat::TRANSFORMED == index_format_) { - const char *idx_data_buf = nullptr; - int64_t idx_data_len = 0; - if (OB_FAIL(idx_data_header_->get_index_data(current_, idx_data_buf, idx_data_len))) { - LOG_WARN("Fail to get index data", K(ret), K_(current), KPC_(idx_data_header)); - } else if (OB_FAIL(idx_row_parser_.init(idx_data_buf, idx_data_len))) { - LOG_WARN("Fail to parse index block row", K(ret), K_(current), KPC(idx_data_header_)); - } else if (OB_FAIL(idx_row_parser_.get_header(idx_row_header))) { - LOG_WARN("Fail to get index block row header", K(ret)); - } else { - endkey = &idx_data_header_->rowkey_array_[current_]; - } - } else if (IndexFormat::RAW_DATA == index_format_) { - endkey_.reset(); - if (OB_ISNULL(datum_row_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null pointer to index row", K(ret)); - } else if (OB_FAIL(micro_reader_->get_row(current_, *datum_row_))) { - LOG_WARN("Fail to read index row from block", K(ret), K(current_)); - } else if (OB_FAIL(idx_row_parser_.init(rowkey_column_count, *datum_row_))) { - LOG_WARN("Fail to parser index block row", K(ret), K_(datum_row), K(rowkey_column_count)); - } else if (OB_FAIL(idx_row_parser_.get_header(idx_row_header))) { - LOG_WARN("Fail to get index block row header", K(ret)); - } else if (OB_FAIL(endkey_.assign(datum_row_->storage_datums_, rowkey_column_count))) { - LOG_WARN("Fail to assign storage datum to endkey", K(ret), KPC(datum_row_), K(rowkey_column_count)); - } else { - endkey = &endkey_; - } - } else if (IndexFormat::BLOCK_TREE == index_format_) { - if (OB_ISNULL(block_meta_tree_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("block meta iterator is null", K(ret)); - } else if (OB_FAIL(block_meta_tree_->get_index_block_row_header(current_, idx_row_header, endkey))) { - LOG_WARN("get index block row header failed", K(ret), K(current_)); - } - } else { - ret = OB_NOT_SUPPORTED; - LOG_WARN("not supported index format", K(ret), K(index_format_)); - } - return ret; } int ObIndexBlockRowScanner::get_next_idx_row(ObMicroIndexInfo &idx_block_row) @@ -1118,20 +1746,20 @@ int ObIndexBlockRowScanner::get_next_idx_row(ObMicroIndexInfo &idx_block_row) const char *idx_data_buf = nullptr; const char *agg_row_buf = nullptr; int64_t agg_buf_size = 0; - if (OB_FAIL(read_curr_idx_row(idx_row_header, endkey))) { - LOG_WARN("Fail to read currend index row", K(ret), K_(index_format), K_(current)); - } else if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey)) { + int64_t row_offset = 0; + bool is_scan_left_border = false; + bool is_scan_right_border = false; + if (OB_ISNULL(iter_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null index block row header/endkey", K(ret), - K(index_format_), KP(idx_row_header), KP(endkey)); - } else if (idx_row_header->is_data_index() && !idx_row_header->is_major_node()) { - if (OB_FAIL(idx_row_parser_.get_minor_meta(idx_minor_info))) { - LOG_WARN("Fail to get minor meta info", K(ret)); + LOG_WARN("iter is null", K(ret), K(index_format_), KP(iter_)); + } else { + if (OB_FAIL(iter_->get_next(idx_row_header, endkey, is_scan_left_border, is_scan_right_border, idx_minor_info, agg_row_buf, agg_buf_size, row_offset))) { + LOG_WARN("get next idx block row failed", K(ret), KP(idx_row_header), KPC(endkey), K(is_reverse_scan_)); + } else if (OB_UNLIKELY(nullptr == idx_row_header || nullptr == endkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null index block row header/endkey", K(ret), KPC(iter_), + K(index_format_), KP(idx_row_header), KP(endkey)); } - } else if (!idx_row_header->is_major_node() || !idx_row_header->is_pre_aggregated()) { - // Do not have aggregate data - } else if (OB_FAIL(idx_row_parser_.get_agg_row(agg_row_buf, agg_buf_size))) { - LOG_WARN("Fail to get aggregate", K(ret)); } if (OB_SUCC(ret)) { @@ -1140,10 +1768,9 @@ int ObIndexBlockRowScanner::get_next_idx_row(ObMicroIndexInfo &idx_block_row) idx_block_row.row_header_ = idx_row_header; idx_block_row.minor_meta_info_ = idx_minor_info; idx_block_row.is_get_ = is_get_; - idx_block_row.is_left_border_ = is_left_border_ && current_ == start_; - idx_block_row.is_right_border_ = is_right_border_ && current_ == end_; + idx_block_row.is_left_border_ = is_left_border_ && is_scan_left_border; + idx_block_row.is_right_border_ = is_right_border_ && is_scan_right_border; idx_block_row.copy_lob_out_row_flag(); - current_ += step_; idx_block_row.range_idx_ = range_idx_; idx_block_row.query_range_ = query_range_; idx_block_row.parent_macro_id_ = macro_id_; @@ -1155,15 +1782,15 @@ int ObIndexBlockRowScanner::get_next_idx_row(ObMicroIndexInfo &idx_block_row) idx_block_row.cs_row_range_.end_row_id_ = idx_block_row.endkey_->datums_[0].get_int(); idx_block_row.set_filter_constant_type(filter_constant_type_); } else { - idx_block_row.cs_row_range_.start_row_id_ = idx_row_parser_.get_row_offset() - idx_block_row.get_row_count() + 1; - idx_block_row.cs_row_range_.end_row_id_ = idx_row_parser_.get_row_offset(); + idx_block_row.cs_row_range_.start_row_id_ = row_offset - idx_block_row.get_row_count() + 1; + idx_block_row.cs_row_range_.end_row_id_ = row_offset; } if (idx_block_row.is_data_block()) { idx_block_row.cs_row_range_.start_row_id_ += parent_row_range_.start_row_id_; idx_block_row.cs_row_range_.end_row_id_ += parent_row_range_.start_row_id_; } } - LOG_DEBUG("Get next index block row", K(ret), K_(current), K_(start), K_(end), K(idx_block_row), KP(this), K(endkey_)); + LOG_DEBUG("Get next index block row", K(ret), KPC(iter_), K(idx_block_row), KP(this), K(endkey)); return ret; } @@ -1176,98 +1803,5 @@ void ObIndexBlockRowScanner::skip_index_rows() } } -int ObIndexBlockRowScanner::find_rowkeys_belong_to_same_idx_row(int64_t &rowkey_idx) -{ - int ret = OB_SUCCESS; - bool is_decided = false; - const ObDatumRowkey *cur_rowkey = nullptr; - if (IndexFormat::TRANSFORMED == index_format_) { - cur_rowkey = idx_data_header_->rowkey_array_ + current_; - } else if (IndexFormat::BLOCK_TREE == index_format_) { - cur_rowkey = block_meta_tree_->get_rowkey(current_); - } - for (; OB_SUCC(ret) && rowkey_begin_idx_ < rowkey_end_idx_; ++rowkey_begin_idx_) { - if (rows_info_->is_row_skipped(rowkey_begin_idx_)) { - continue; - } - const ObDatumRowkey &rowkey = rows_info_->get_rowkey(rowkey_begin_idx_); - int32_t cmp_ret = 0; - if (nullptr != cur_rowkey) { - if (OB_FAIL(rowkey.compare(*cur_rowkey, *datum_utils_, cmp_ret, false))) { - LOG_WARN("Failed to compare rowkey", K(ret), K(rowkey), KPC(cur_rowkey)); - } - } else if (OB_FAIL(micro_reader_->compare_rowkey(rowkey, current_, cmp_ret))) { - LOG_WARN("Failed to compare rowkey", K(ret), K(rowkey)); - } else { - cmp_ret = -cmp_ret; - } - - if (OB_FAIL(ret)) { - } else if (cmp_ret > 0) { - rowkey_idx = rowkey_begin_idx_; - is_decided = true; - break; - } else if (cmp_ret == 0) { - rowkey_idx = rowkey_begin_idx_ + 1; - is_decided = true; - break; - } - } - if (!is_decided) { - rowkey_idx = rowkey_begin_idx_; - } - return ret; -} - -int ObIndexBlockRowScanner::skip_to_next_valid_position(ObMicroIndexInfo &idx_block_row) -{ - int ret = OB_SUCCESS; - skip_index_rows(); - if (rowkey_begin_idx_ == rowkey_end_idx_) { - ret = OB_ITER_END; - } else if (IndexFormat::TRANSFORMED == index_format_) { - const ObDatumRowkey &rowkey = rows_info_->get_rowkey(rowkey_begin_idx_); - ObDatumComparor cmp(*datum_utils_, ret, false, true, false); - const ObDatumRowkey *first = idx_data_header_->rowkey_array_ + current_; - const ObDatumRowkey *last = idx_data_header_->rowkey_array_ + end_ + 1; - const ObDatumRowkey *found = std::lower_bound(first, last, rowkey, cmp); - if (OB_FAIL(ret)) { - LOG_WARN("Failed to get lower bound of rowkey", K(ret), K(rowkey), KPC_(idx_data_header)); - } else if (found == last) { - ret = OB_ITER_END; - } else { - current_ = found - idx_data_header_->rowkey_array_; - } - } else if (IndexFormat::BLOCK_TREE == index_format_) { - if (OB_FAIL(block_meta_tree_->skip_to_next_valid_position(rows_info_->get_rowkey(rowkey_begin_idx_), - *datum_utils_, - current_))) { - if (OB_UNLIKELY(OB_ITER_END != ret)) { - LOG_WARN("Failed to skip to next valid position in block meta tree", K(ret), K_(current), K_(rowkey_begin_idx), - KPC_(idx_data_header)); - } - } - } else if (IndexFormat::RAW_DATA == index_format_) { - bool equal = false; - if (OB_FAIL(micro_reader_->find_bound(rows_info_->get_rowkey(rowkey_begin_idx_), true, current_, current_, equal))) { - LOG_WARN("Failed to skip to next valid position in micro block reader", K(ret), K_(current), K_(rowkey_begin_idx), - KPC_(idx_data_header)); - } else if (current_ == (end_ + 1)) { - ret = OB_ITER_END; - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected index format", K_(index_format)); - } - if (OB_SUCC(ret)) { - idx_block_row.rows_info_ = rows_info_; - idx_block_row.rowkey_begin_idx_ = rowkey_begin_idx_; - if (OB_FAIL(find_rowkeys_belong_to_same_idx_row(idx_block_row.rowkey_end_idx_))) { - LOG_WARN("Failed to find rowkeys belong to same index row", K(ret), K_(current), KPC_(idx_data_header)); - } - } - return ret; -} - } // namespace blocksstable } // namespace oceanbase diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h index 29634f204..704b54dc9 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h +++ b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h @@ -17,10 +17,12 @@ #include "storage/blocksstable/ob_block_sstable_struct.h" #include "storage/blocksstable/ob_micro_block_reader_helper.h" #include "storage/blocksstable/ob_datum_range.h" +#include "storage/blocksstable/index_block/ob_ddl_sstable_scan_merge.h" #include "storage/column_store/ob_column_store_util.h" #include "ob_index_block_row_struct.h" - - +#include "storage/memtable/mvcc/ob_keybtree.h" +#include "storage/meta_mem/ob_tablet_handle.h" +#include "storage/access/ob_simple_rows_merger.h" namespace oceanbase { namespace storage @@ -28,11 +30,14 @@ namespace storage struct ObTableIterParam; struct ObTableAccessContext; class ObBlockMetaTree; +class ObBlockMetaTreeValue; class ObRowsInfo; } namespace blocksstable { class ObSSTable; +class ObDDLIndexBlockRowIterator; +class ObDDLMergeBlockRowIterator; // Memory structure of Index micro block. // This struct won't hold extra memory, lifetime security need to be ensured by caller struct ObIndexBlockDataHeader @@ -95,6 +100,221 @@ private: ObMicroBlockReaderHelper micro_reader_helper_; }; +enum class ObIndexFormat { + INVALID = 0, + RAW_DATA, + TRANSFORMED, + BLOCK_TREE, + DDL_MERGE +}; + +class ObIndexBlockIterParam final +{ +public: + ObIndexBlockIterParam(); + ~ObIndexBlockIterParam(); + ObIndexBlockIterParam &operator=(const ObIndexBlockIterParam &other); + int assign(const ObIndexBlockIterParam &other); + void reset(); + bool is_valid() const; + TO_STRING_KV(KP(sstable_), KP(tablet_), K(ls_id_), K(tablet_id_)); + +public: + const ObSSTable *sstable_; + // pass ls_id + table_id or directly pass tablet + const ObTablet *tablet_; + share::ObLSID ls_id_; + common::ObTabletID tablet_id_; +}; + +class ObIndexBlockRowIterator +{ +public: + ObIndexBlockRowIterator(); + virtual ~ObIndexBlockRowIterator(); + virtual void reset(); + virtual void reuse() = 0; + virtual int init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) = 0; + virtual int get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) = 0; + virtual int get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) = 0; + virtual int locate_key(const ObDatumRowkey &rowkey) = 0; + virtual int locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) = 0; + virtual int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) = 0; + virtual bool end_of_block() const = 0; + virtual int get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) = 0; + //todo @hanling :refactor these OB_NOT_SUPPORTED interface + virtual int get_idx_row_header_in_target_idx(const int64_t idx, + const ObIndexBlockRowHeader *&idx_row_header) { return OB_NOT_SUPPORTED; } + virtual int find_out_rows(const int32_t range_idx, + const int64_t scanner_range_idx, + int64_t &found_idx) { return OB_NOT_SUPPORTED; } + virtual int find_out_rows_from_start_to_end(const int32_t range_idx, + const int64_t scanner_range_idx, + const ObCSRowId start_row_id, + const ObCSRange &parent_row_range, + bool &is_certain, + int64_t &found_idx) { return OB_NOT_SUPPORTED; } + virtual int skip_to_next_valid_position(ObMicroIndexInfo &idx_block_row, + int64_t &rowkey_begin_idx, + int64_t &rowkey_end_idx, + const ObRowsInfo *&rows_info) { return OB_NOT_SUPPORTED; } + virtual int advance_to_border(const ObDatumRowkey &rowkey, + const bool is_left_border, + const bool is_right_border, + const ObCSRange &parent_row_range, + ObCSRange &cs_range) { return OB_NOT_SUPPORTED; } + virtual void get_end_key(const ObDatumRowkey *&rowkey) {} +public: + virtual int switch_context(ObStorageDatumUtils *datum_utils) + { + datum_utils_ = datum_utils; + return OB_SUCCESS; + } + bool is_inited() { return is_inited_; } + VIRTUAL_TO_STRING_KV(K(is_inited_), K(is_reverse_scan_), K(iter_step_), K(idx_row_parser_), KPC(datum_utils_)); + +protected: + bool is_inited_; + bool is_reverse_scan_; + int64_t iter_step_; + ObIndexBlockRowParser idx_row_parser_; + const ObStorageDatumUtils *datum_utils_; +}; + + +class ObRAWIndexBlockRowIterator : public ObIndexBlockRowIterator +{ +public: + ObRAWIndexBlockRowIterator(); + virtual ~ObRAWIndexBlockRowIterator(); + virtual int init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) override; + virtual int get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) override; + virtual int get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) override; + virtual int locate_key(const ObDatumRowkey &rowkey) override; + virtual int locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) override; + virtual int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) override; + virtual bool end_of_block() const override; + virtual int get_index_row_count(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + int64_t &index_row_count) override; + virtual void reset() override; + virtual void reuse() override; + INHERIT_TO_STRING_KV("base iterator:", ObIndexBlockRowIterator, "format:", "ObRAWIndexBlockRowIterator", + K(current_), K(start_), K(end_), KP(micro_reader_), K(endkey_), KPC(datum_row_), KP(allocator_)); +private: + int init_datum_row(const ObStorageDatumUtils &datum_utils, ObIAllocator *allocator); + bool is_in_border(bool is_reverse_scan, bool is_left_border, bool is_right_border); +protected: + int64_t current_; + int64_t start_; // inclusive + int64_t end_; // inclusive + ObIMicroBlockReader *micro_reader_; + ObIAllocator *allocator_; + ObDatumRow *datum_row_; + ObMicroBlockReaderHelper micro_reader_helper_; + ObDatumRowkey endkey_; +}; + +class ObTFMIndexBlockRowIterator : public ObRAWIndexBlockRowIterator +{ +public: + ObTFMIndexBlockRowIterator(); + virtual ~ObTFMIndexBlockRowIterator(); + virtual int init(const ObMicroBlockData &idx_block_data, + const ObStorageDatumUtils *datum_utils, + ObIAllocator *allocator, + const bool is_reverse_scan, + const bool set_iter_end, + const ObIndexBlockIterParam &iter_param) override; + virtual int get_current(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey) override; + virtual int get_next(const ObIndexBlockRowHeader *&idx_row_header, + const ObDatumRowkey *&endkey, + bool &is_scan_left_border, + bool &is_scan_right_border, + const ObIndexBlockRowMinorMetaInfo *&idx_minor_info, + const char *&agg_row_buf, + int64_t &agg_buf_size, + int64_t &row_offset) override; + virtual int locate_key(const ObDatumRowkey &rowkey) override; + virtual int locate_range(const ObDatumRange &range, + const bool is_left_border, + const bool is_right_border, + const bool is_normal_cg) override; + virtual int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan) override; + virtual void reset() override; + virtual void reuse() override; + virtual int find_out_rows(const int32_t range_idx, + const int64_t scanner_range_idx, + int64_t &found_idx) override; + virtual int find_out_rows_from_start_to_end(const int32_t range_idx, + const int64_t scanner_range_idx, + const ObCSRowId start_row_id, + const ObCSRange &parent_row_range, + bool &is_certain, + int64_t &found_idx) override; + virtual int skip_to_next_valid_position(ObMicroIndexInfo &idx_block_row, + int64_t &rowkey_begin_idx, + int64_t &rowkey_end_idx, + const ObRowsInfo *&rows_info) override; + virtual int get_idx_row_header_in_target_idx(const int64_t idx, + const ObIndexBlockRowHeader *&idx_row_header) override; + virtual int advance_to_border(const ObDatumRowkey &rowkey, + const bool is_left_border, + const bool is_right_border, + const ObCSRange &parent_row_range, + ObCSRange &cs_range) override; + virtual void get_end_key(const ObDatumRowkey *&rowkey) { rowkey = &(idx_data_header_->rowkey_array_[idx_data_header_->row_cnt_ - 1]); } + INHERIT_TO_STRING_KV("base iterator:", ObRAWIndexBlockRowIterator, "format:", "ObTFMIndexBlockRowIterator", KPC(idx_data_header_)); + +private: + int get_cur_row_id_range(const ObCSRange &parent_row_range, + ObCSRange &cs_range); + int find_rowkeys_belong_to_same_idx_row(int64_t &rowkey_idx, + int64_t &rowkey_begin_idx, + int64_t &rowkey_end_idx, + const ObRowsInfo *&rows_info); + +private: + const ObIndexBlockDataHeader *idx_data_header_; +}; + class ObIndexBlockRowScanner { public: @@ -111,6 +331,7 @@ public: const common::ObQueryFlag &query_flag, const int64_t nested_offset, const bool is_normal_cg = false); + // todo :qilu get ls_id from MTL() after ddl_kv_mgr split to tenant int open( const MacroBlockId ¯o_id, const ObMicroBlockData &idx_block_data, @@ -134,7 +355,12 @@ public: int get_next( ObMicroIndexInfo &idx_block_row, const bool is_multi_check = false); + void set_iter_param(const ObSSTable *sstable, + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const ObTablet *tablet = nullptr); bool end_of_block() const; + bool is_ddl_merge_type() const; int get_index_row_count(int64_t &index_row_count) const; int check_blockscan(const ObDatumRowkey &rowkey, bool &can_blockscan); int locate_range(const ObDatumRange &range, const bool is_left_border, const bool is_right_border); @@ -151,20 +377,17 @@ public: bool &is_certain, int64_t &found_idx); bool is_in_border(); - inline const ObDatumRowkey &get_end_key() const - { - return idx_data_header_->rowkey_array_[idx_data_header_->row_cnt_ - 1]; - } + const ObDatumRowkey &get_end_key() const; OB_INLINE bool is_valid() const { return is_inited_; } + OB_INLINE bool is_ddl_merge_scan() const { return index_format_ == ObIndexFormat::DDL_MERGE; } void switch_context(const ObSSTable &sstable, const ObStorageDatumUtils &datum_utils); - TO_STRING_KV(K_(current), K_(start), K_(end), K_(step), - K_(range_idx), K_(is_get), K_(is_reverse_scan), - K_(is_left_border), K_(is_right_border), - K_(rowkey_begin_idx), K_(rowkey_end_idx), - K_(is_inited), K_(index_format), K_(macro_id), KPC_(idx_data_header), KPC_(datum_utils), - K_(is_normal_cg), K_(parent_row_range), K_(filter_constant_type)); + TO_STRING_KV(K_(index_format), KP_(raw_iter), KP_(transformed_iter), KP_(ddl_iter), KP_(ddl_merge_iter), + KPC_(iter), K_(range_idx), K_(is_get), K_(is_reverse_scan), K_(is_left_border), K_(is_right_border), + K_(rowkey_begin_idx), K_(rowkey_end_idx), K_(is_inited), K_(macro_id), KPC_(datum_utils), + K_(is_normal_cg), K_(parent_row_range), K_(filter_constant_type), K_(is_normal_query), + K_(iter_param)); private: - int init_by_micro_data(const ObMicroBlockData &idx_block_data); + int init_by_micro_data(const ObMicroBlockData &idx_block_data, bool set_iter_end); int locate_key(const ObDatumRowkey &rowkey); int init_datum_row(); int read_curr_idx_row(const ObIndexBlockRowHeader *&idx_row_header, const ObDatumRowkey *&endkey); @@ -178,8 +401,6 @@ private: ObCSRange &cs_range); int get_next_idx_row(ObMicroIndexInfo &idx_block_row); void skip_index_rows(); - int find_rowkeys_belong_to_same_idx_row(int64_t &rowkey_idx); - int skip_to_next_valid_position(ObMicroIndexInfo &idx_block_row); private: union { const ObDatumRowkey *rowkey_; @@ -187,33 +408,21 @@ private: const ObRowsInfo *rows_info_; const void *query_range_; }; - enum IndexFormat { - INVALID = 0, - RAW_DATA, - TRANSFORMED, - BLOCK_TREE - }; const ObIArray *agg_projector_; const ObIArray *agg_column_schema_; - const ObIndexBlockDataHeader *idx_data_header_; MacroBlockId macro_id_; ObIAllocator *allocator_; - ObMicroBlockReaderHelper micro_reader_helper_; - ObIMicroBlockReader *micro_reader_; - storage::ObBlockMetaTree *block_meta_tree_; - ObDatumRow *datum_row_; - ObDatumRowkey endkey_; - ObIndexBlockRowParser idx_row_parser_; + ObRAWIndexBlockRowIterator *raw_iter_; + ObTFMIndexBlockRowIterator *transformed_iter_; + ObDDLIndexBlockRowIterator *ddl_iter_; + ObDDLMergeBlockRowIterator *ddl_merge_iter_; + ObIndexBlockRowIterator *iter_; //point to one of above four iter const ObStorageDatumUtils *datum_utils_; - int64_t current_; - int64_t start_; // inclusive - int64_t end_; // inclusive - int64_t step_; int64_t range_idx_; int64_t nested_offset_; int64_t rowkey_begin_idx_; int64_t rowkey_end_idx_; - IndexFormat index_format_; + ObIndexFormat index_format_; ObCSRange parent_row_range_; bool is_get_; bool is_reverse_scan_; @@ -221,7 +430,9 @@ private: bool is_right_border_; bool is_inited_; bool is_normal_cg_; + bool is_normal_query_; sql::ObBoolMaskType filter_constant_type_; + ObIndexBlockIterParam iter_param_; // todo qilu: refactor this after refactor ddl_kv_mgr }; } // namespace blocksstable diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_struct.cpp b/src/storage/blocksstable/index_block/ob_index_block_row_struct.cpp index 2d0e3092a..157e5baf3 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_struct.cpp +++ b/src/storage/blocksstable/index_block/ob_index_block_row_struct.cpp @@ -14,6 +14,7 @@ #include "common/row/ob_row.h" #include "storage/blocksstable/ob_block_sstable_struct.h" +#include "storage/blocksstable/ob_data_store_desc.h" #include "ob_index_block_row_struct.h" diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_struct.h b/src/storage/blocksstable/index_block/ob_index_block_row_struct.h index abb033916..4075abcaf 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_struct.h +++ b/src/storage/blocksstable/index_block/ob_index_block_row_struct.h @@ -624,6 +624,7 @@ public: int64_t get_max_merged_trans_version() const; int64_t get_row_count_delta() const; int64_t get_row_offset() const { return row_offset_; }; + bool is_inited() const { return is_inited_; } TO_STRING_KV(K_(is_inited), KPC(header_), K(row_offset_)); private: diff --git a/src/storage/blocksstable/index_block/ob_sstable_meta_info.cpp b/src/storage/blocksstable/index_block/ob_sstable_meta_info.cpp index e9de2d52b..01ba52b10 100644 --- a/src/storage/blocksstable/index_block/ob_sstable_meta_info.cpp +++ b/src/storage/blocksstable/index_block/ob_sstable_meta_info.cpp @@ -288,6 +288,7 @@ int ObRootBlockInfo::read_block_data( read_info.io_desc_.set_group_id(ObIOModule::ROOT_BLOCK_IO); read_info.io_timeout_ms_ = GCONF._data_storage_io_timeout / 1000L; read_info.buf_ = buf; + read_info.io_desc_.set_group_id(ObIOModule::ROOT_BLOCK_IO); if (OB_FAIL(addr.get_block_addr(read_info.macro_block_id_, read_info.offset_, read_info.size_))) { LOG_WARN("fail to get block address", K(ret), K(addr)); } else if (OB_FAIL(ObBlockManager::read_block(read_info, handle))) { diff --git a/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.cpp b/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.cpp index f4ec0bddf..d8e20cedb 100644 --- a/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.cpp +++ b/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.cpp @@ -27,7 +27,7 @@ namespace blocksstable ObSSTableSecMetaIterator::ObSSTableSecMetaIterator() : tenant_id_(OB_INVALID_TENANT_ID), rowkey_read_info_(nullptr), sstable_meta_hdl_(), prefetch_flag_(), idx_cursor_(), macro_reader_(), block_cache_(nullptr), - micro_reader_(nullptr), micro_reader_helper_(), block_meta_tree_(nullptr), + micro_reader_(nullptr), micro_reader_helper_(), block_meta_tree_(nullptr), ddl_iter_(), query_range_(nullptr), start_bound_micro_block_(), end_bound_micro_block_(), micro_handles_(), row_(), io_allocator_(), curr_handle_idx_(0), prefetch_handle_idx_(0), prev_block_row_cnt_(0), curr_block_start_idx_(0), curr_block_end_idx_(0), curr_block_idx_(0), step_cnt_(0), @@ -44,6 +44,7 @@ void ObSSTableSecMetaIterator::reset() block_cache_ = nullptr; micro_reader_ = nullptr; micro_reader_helper_.reset(); + ddl_iter_.reset(); block_meta_tree_ = nullptr; row_.reset(); query_range_ = nullptr; @@ -75,7 +76,7 @@ int ObSSTableSecMetaIterator::open( { int ret = OB_SUCCESS; bool is_meta_root = false; - bool is_ddl_mem_sstable = false; + const bool is_ddl_mem_sstable = sstable.is_ddl_mem_sstable(); if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("Fail to open sstable secondary meta iterator", K(ret)); @@ -100,30 +101,29 @@ int ObSSTableSecMetaIterator::open( block_cache_ = &ObStorageCacheSuite::get_instance().get_block_cache(); is_meta_root = sstable_meta_hdl_.get_sstable_meta().get_macro_info().is_meta_root(); } - if (OB_FAIL(ret) || is_prefetch_end_) { - } else if (sstable.is_ddl_mem_sstable()) { - is_ddl_mem_sstable = true; + } else if (is_ddl_mem_sstable) { const ObMicroBlockData &root_block = sstable_meta_hdl_.get_sstable_meta().get_root_info().get_block_data(); if (ObMicroBlockData::DDL_BLOCK_TREE != root_block.type_ || nullptr == root_block.buf_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("block type is not ddl block tree", K(ret), K(root_block)); } else { block_meta_tree_ = reinterpret_cast(const_cast(root_block.buf_)); - if (OB_FAIL(block_meta_tree_->locate_range(query_range, - rowkey_read_info.get_datum_utils(), - true, //is_left_border - true, //is_right_border, - curr_block_start_idx_, - curr_block_end_idx_))) { + const int64_t step = max(1, sample_step); + if (OB_FAIL(ddl_iter_.set_iter_param(const_cast(&rowkey_read_info.get_datum_utils()), is_reverse_scan, block_meta_tree_, step))) { + LOG_WARN("fail to set ddl iter param", K(ret)); + } else if (OB_FAIL(ddl_iter_.locate_range(query_range, + true, /*is_left_border*/ + true, /*is_right_border*/ + true /*is_bormal_cg*/))) { if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { - LOG_WARN("locate range failed", K(ret), K(query_range)); + LOG_WARN("locate range failed", K(ret), K(query_range), K(ddl_iter_)); } else { - curr_block_idx_ = curr_block_end_idx_ + 1; + ddl_iter_.set_iter_end(); ret = OB_SUCCESS; // return OB_ITER_END on get_next() for get } - } else { - const int64_t step = max(1, sample_step); + } + if (OB_SUCC(ret)) { step_cnt_ = !is_reverse_scan ? step : -step; curr_block_idx_ = !is_reverse_scan ? curr_block_start_idx_ : curr_block_end_idx_; is_inited_ = true; @@ -143,7 +143,7 @@ int ObSSTableSecMetaIterator::open( const int64_t request_col_cnt = rowkey_read_info.get_schema_rowkey_count() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt() + 1; - if (OB_SUCC(ret) && !is_prefetch_end_ && !is_meta_root && !is_ddl_mem_sstable) { + if (OB_SUCC(ret) && !is_prefetch_end_ && !is_meta_root && !is_ddl_mem_sstable /* ddl kv use ddl_iter directly*/) { bool start_key_beyond_range = false; bool end_key_beyond_range = false; if (is_reverse_scan) { @@ -218,6 +218,7 @@ int ObSSTableSecMetaIterator::get_next(ObDataMacroBlockMeta ¯o_meta) { int ret = OB_SUCCESS; MacroBlockId macro_id; + const ObDataMacroBlockMeta *tmp_meta = nullptr; row_.reuse(); if (IS_NOT_INIT) { ret = OB_NOT_INIT; @@ -225,10 +226,13 @@ int ObSSTableSecMetaIterator::get_next(ObDataMacroBlockMeta ¯o_meta) } else if (nullptr != block_meta_tree_) { if (!is_target_row_in_curr_block()) { ret = OB_ITER_END; - } else if (OB_FAIL(block_meta_tree_->get_macro_block_meta(curr_block_idx_, macro_meta))) { - LOG_WARN("get next macro block meta failed", K(ret), K(curr_block_idx_)); - } else { - curr_block_idx_ += step_cnt_; + } else if (OB_UNLIKELY(!ddl_iter_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cur tree value is null", K(ret), K(ddl_iter_)); + } else if (OB_FAIL(ddl_iter_.get_next_meta(tmp_meta))) { + LOG_WARN("get next meta failed", K(ret)); + } else if (OB_FAIL(macro_meta.assign(*tmp_meta))) { + LOG_WARN("assign macro meta failed", K(ret), KPC(tmp_meta)); } } else { while (OB_SUCC(ret) && !is_target_row_in_curr_block()) { diff --git a/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.h b/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.h index a2f8cc259..47c5ed720 100644 --- a/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.h +++ b/src/storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.h @@ -15,6 +15,7 @@ #include "storage/access/ob_micro_block_handle_mgr.h" #include "ob_index_block_tree_cursor.h" +#include "storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h" namespace oceanbase { @@ -40,7 +41,7 @@ public: const bool is_reverse_scan = false, const int64_t sample_step = 0); virtual int get_next(ObDataMacroBlockMeta ¯o_meta); - TO_STRING_KV(K_(is_reverse_scan), K_(is_inited), K_(start_bound_micro_block), + TO_STRING_KV(K_(is_reverse_scan), K_(is_inited), K_(start_bound_micro_block), K_(ddl_iter), K_(end_bound_micro_block), K_(idx_cursor), K_(curr_handle_idx), K_(prefetch_handle_idx), K_(prev_block_row_cnt), K_(curr_block_start_idx), K_(curr_block_end_idx), K_(curr_block_idx), K_(step_cnt), K_(is_prefetch_end), KPC_(query_range), KPC_(rowkey_read_info)); @@ -56,7 +57,7 @@ private: OB_INLINE bool is_target_row_in_curr_block() const { - return curr_block_idx_ >= curr_block_start_idx_ && curr_block_idx_ <= curr_block_end_idx_; + return nullptr != block_meta_tree_ ? !ddl_iter_.end_of_block() : curr_block_idx_ >= curr_block_start_idx_ && curr_block_idx_ <= curr_block_end_idx_; } int locate_bound_micro_block( @@ -86,6 +87,7 @@ private: ObIMicroBlockReader *micro_reader_; ObMicroBlockReaderHelper micro_reader_helper_; storage::ObBlockMetaTree *block_meta_tree_; + ObDDLIndexBlockRowIterator ddl_iter_; const ObDatumRange *query_range_; ObMicroBlockId start_bound_micro_block_; ObMicroBlockId end_bound_micro_block_; diff --git a/src/storage/blocksstable/ob_block_manager.cpp b/src/storage/blocksstable/ob_block_manager.cpp index afb149526..586a36843 100644 --- a/src/storage/blocksstable/ob_block_manager.cpp +++ b/src/storage/blocksstable/ob_block_manager.cpp @@ -168,7 +168,7 @@ int ObBlockManager::init( LOG_WARN("fail to init timer", K(ret)); } else if (OB_FAIL(bucket_lock_.init(DEFAULT_LOCK_BUCKET_COUNT, ObLatchIds::BLOCK_MANAGER_LOCK))) { LOG_WARN("fail to init bucket lock", K(ret)); - } else if (OB_FAIL(block_map_.init("BlockMap", OB_SYS_TENANT_ID))) { + } else if (OB_FAIL(block_map_.init(SET_USE_UNEXPECTED_500(ObMemAttr(OB_SERVER_TENANT_ID, "BlockMap"))))) { LOG_WARN("fail to init block map", K(ret)); } else if (OB_FAIL(super_block_buf_holder_.init(ObServerSuperBlockHeader::OB_MAX_SUPER_BLOCK_SIZE))) { LOG_WARN("fail to init super block buffer holder, ", K(ret)); diff --git a/src/storage/blocksstable/ob_block_sstable_struct.cpp b/src/storage/blocksstable/ob_block_sstable_struct.cpp index c587c0262..b1963bf08 100644 --- a/src/storage/blocksstable/ob_block_sstable_struct.cpp +++ b/src/storage/blocksstable/ob_block_sstable_struct.cpp @@ -798,17 +798,35 @@ int ObRecordHeaderV3::deserialize(const char *buf, int64_t buf_len, int64_t &pos } ObDDLMacroBlockRedoInfo::ObDDLMacroBlockRedoInfo() - : table_key_(), data_buffer_(), block_type_(ObDDLMacroBlockType::DDL_MB_INVALID_TYPE), start_scn_(SCN::min_scn()) + : table_key_(), data_buffer_(), block_type_(ObDDLMacroBlockType::DDL_MB_INVALID_TYPE), start_scn_(SCN::min_scn()), + data_format_version_(0/*for compatibility*/), end_row_id_(-1) { } +void ObDDLMacroBlockRedoInfo::reset() +{ + table_key_.reset(); + data_buffer_.reset(); + block_type_ = ObDDLMacroBlockType::DDL_MB_INVALID_TYPE; + logic_id_.reset(); + start_scn_ = SCN::min_scn(); + data_format_version_ = 0; + end_row_id_ = -1; +} + bool ObDDLMacroBlockRedoInfo::is_valid() const { return table_key_.is_valid() && data_buffer_.ptr() != nullptr && block_type_ != ObDDLMacroBlockType::DDL_MB_INVALID_TYPE - && logic_id_.is_valid() && start_scn_.is_valid_and_not_min(); + && logic_id_.is_valid() && start_scn_.is_valid_and_not_min() && data_format_version_ >= 0; } -OB_SERIALIZE_MEMBER(ObDDLMacroBlockRedoInfo, table_key_, data_buffer_, block_type_, logic_id_, start_scn_); +bool ObDDLMacroBlockRedoInfo::is_column_group_info_valid() const +{ + return table_key_.is_column_store_sstable() && end_row_id_ >= 0; +} + +OB_SERIALIZE_MEMBER(ObDDLMacroBlockRedoInfo, table_key_, data_buffer_, block_type_, logic_id_, + start_scn_, data_format_version_, end_row_id_); constexpr uint8_t ObColClusterInfoMask::BYTES_TYPE_TO_LEN[]; diff --git a/src/storage/blocksstable/ob_block_sstable_struct.h b/src/storage/blocksstable/ob_block_sstable_struct.h index 716f95060..b1fb070aa 100644 --- a/src/storage/blocksstable/ob_block_sstable_struct.h +++ b/src/storage/blocksstable/ob_block_sstable_struct.h @@ -1108,13 +1108,18 @@ public: ObDDLMacroBlockRedoInfo(); ~ObDDLMacroBlockRedoInfo() = default; bool is_valid() const; - TO_STRING_KV(K_(table_key), K_(data_buffer), K_(block_type), K_(logic_id), K_(start_scn)); + bool is_column_group_info_valid() const; + void reset(); + TO_STRING_KV(K_(table_key), K_(data_buffer), K_(block_type), K_(logic_id), + K_(start_scn), K_(data_format_version), K_(end_row_id)); public: storage::ObITable::TableKey table_key_; ObString data_buffer_; ObDDLMacroBlockType block_type_; ObLogicMacroBlockId logic_id_; share::SCN start_scn_; + uint64_t data_format_version_; + int64_t end_row_id_; }; }//end namespace blocksstable diff --git a/src/storage/blocksstable/ob_imacro_block_flush_callback.h b/src/storage/blocksstable/ob_imacro_block_flush_callback.h index b02675457..8c7f55de9 100644 --- a/src/storage/blocksstable/ob_imacro_block_flush_callback.h +++ b/src/storage/blocksstable/ob_imacro_block_flush_callback.h @@ -31,7 +31,7 @@ public: const ObLogicMacroBlockId &logic_id, char *buf, const int64_t buf_len, - const int64_t data_seq) = 0; + const int64_t row_count) = 0; virtual int wait() = 0; }; diff --git a/src/storage/blocksstable/ob_imicro_block_reader.h b/src/storage/blocksstable/ob_imicro_block_reader.h index 1e86848b5..7f9a6415c 100644 --- a/src/storage/blocksstable/ob_imicro_block_reader.h +++ b/src/storage/blocksstable/ob_imicro_block_reader.h @@ -103,6 +103,7 @@ struct ObMicroBlockData DATA_BLOCK, INDEX_BLOCK, DDL_BLOCK_TREE, + DDL_MERGE_INDEX_BLOCK, MAX_TYPE }; public: @@ -131,7 +132,7 @@ public: int64_t &get_extra_size() { return extra_size_; } int64_t total_size() const { return size_ + extra_size_; } - bool is_index_block() const { return INDEX_BLOCK == type_ || DDL_BLOCK_TREE == type_;} + bool is_index_block() const { return INDEX_BLOCK == type_ || DDL_BLOCK_TREE == type_ || DDL_MERGE_INDEX_BLOCK == type_;} void reset() { *this = ObMicroBlockData(); } OB_INLINE const ObMicroBlockHeader *get_micro_header() const @@ -375,10 +376,6 @@ public: const int64_t begin_idx, int64_t &row_idx, bool &equal) = 0; - virtual int compare_rowkey( - const ObDatumRowkey &rowkey, - const int64_t index, - int32_t &compare_result) = 0; static int filter_white_filter( const sql::ObWhiteFilterExecutor &filter, const common::ObObjMeta &obj_meta, diff --git a/src/storage/blocksstable/ob_macro_block_handle.cpp b/src/storage/blocksstable/ob_macro_block_handle.cpp index 4248568ff..6f0743405 100644 --- a/src/storage/blocksstable/ob_macro_block_handle.cpp +++ b/src/storage/blocksstable/ob_macro_block_handle.cpp @@ -141,6 +141,8 @@ int ObMacroBlockHandle::async_read(const ObMacroBlockReadInfo &read_info) const int64_t real_timeout_ms = min(read_info.io_timeout_ms_, GCONF._data_storage_io_timeout / 1000L); io_info.timeout_us_ = real_timeout_ms * 1000L; io_info.user_data_buf_ = read_info.buf_; + // resource manager level is higher than default + io_info.flag_.set_group_id(read_info.io_desc_.get_io_module()); io_info.flag_.set_read(); if (OB_FAIL(ObIOManager::get_instance().aio_read(io_info, io_handle_))) { @@ -170,6 +172,7 @@ int ObMacroBlockHandle::async_write(const ObMacroBlockWriteInfo &write_info) io_info.flag_.set_group_id(write_info.io_desc_.get_io_module()); const int64_t real_timeout_ms = min(write_info.io_timeout_ms_, GCONF._data_storage_io_timeout / 1000L); io_info.timeout_us_ = real_timeout_ms * 1000L; + io_info.flag_.set_group_id(write_info.io_desc_.get_io_module()); io_info.flag_.set_write(); if (OB_FAIL(ObIOManager::get_instance().aio_write(io_info, io_handle_))) { diff --git a/src/storage/blocksstable/ob_macro_block_writer.cpp b/src/storage/blocksstable/ob_macro_block_writer.cpp index 5a6923f19..5c3c248e0 100644 --- a/src/storage/blocksstable/ob_macro_block_writer.cpp +++ b/src/storage/blocksstable/ob_macro_block_writer.cpp @@ -1368,7 +1368,7 @@ int ObMacroBlockWriter::flush_macro_block(ObMacroBlock ¯o_block) cur_logic_id, macro_block.get_data_buf(), upper_align(macro_block.get_data_size(), DIO_ALIGN_SIZE), - current_macro_seq_))) { + macro_block.get_row_count()))) { STORAGE_LOG(WARN, "fail to do callback flush", K(ret)); } if (OB_SUCC(ret)) { diff --git a/src/storage/blocksstable/ob_micro_block_reader.cpp b/src/storage/blocksstable/ob_micro_block_reader.cpp index 4aa9f9e44..d2a85003f 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.cpp +++ b/src/storage/blocksstable/ob_micro_block_reader.cpp @@ -471,28 +471,6 @@ int ObMicroBlockReader::find_bound( return ret; } -int ObMicroBlockReader::compare_rowkey( - const ObDatumRowkey &rowkey, - const int64_t idx, - int32_t &compare_result) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("Not inited", K(ret)); - } else if (OB_UNLIKELY(!rowkey.is_valid() || idx < 0 || idx >= row_count_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument", K(ret), K(rowkey), K(idx), K_(row_count)); - } else if (OB_FAIL(flat_row_reader_.compare_meta_rowkey(rowkey, - *datum_utils_, - data_begin_ + index_data_[idx], - index_data_[idx + 1] - index_data_[idx], - compare_result))) { - LOG_WARN("Failed to compare rowkey", K(ret), K(rowkey), K_(row_count), K(idx)); - } - return ret; -} - int ObMicroBlockReader::find_bound( const ObDatumRowkey &key, const bool lower_bound, diff --git a/src/storage/blocksstable/ob_micro_block_reader.h b/src/storage/blocksstable/ob_micro_block_reader.h index 66ba72da8..9f825a46f 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.h +++ b/src/storage/blocksstable/ob_micro_block_reader.h @@ -127,10 +127,6 @@ public: const int32_t col_offset, const int64_t row_index, ObStorageDatum &datum) override; - virtual int compare_rowkey( - const ObDatumRowkey &rowkey, - const int64_t index, - int32_t &compare_result) override; virtual int find_bound( const ObDatumRowkey &key, const bool lower_bound, diff --git a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp index 5ede0f0e6..ce6339e8e 100644 --- a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp +++ b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp @@ -875,18 +875,18 @@ int ObSharedMacroBlockMgr::read_sstable_block( read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_READ); read_info.io_timeout_ms_ = GCONF._data_storage_io_timeout / 1000L; read_info.io_desc_.set_group_id(ObIOModule::SHARED_MACRO_BLOCK_MGR_IO); - } - if (OB_ISNULL(read_info.buf_ = reinterpret_cast(allocator.alloc(read_info.size_)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - STORAGE_LOG(WARN, "failed to alloc macro read info buffer", K(ret), K(read_info.size_)); - } else { - if (OB_FAIL(ObBlockManager::read_block(read_info, block_handle))) { - LOG_WARN("fail to read block", K(ret), K(read_info)); - } else if (OB_UNLIKELY(!block_handle.is_valid() - || sstable.get_macro_read_size() != block_handle.get_data_size())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("block handle is invalid", K(ret), K(block_handle)); + if (OB_ISNULL(read_info.buf_ = reinterpret_cast(allocator.alloc(read_info.size_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "failed to alloc macro read info buffer", K(ret), K(read_info.size_)); + } else { + if (OB_FAIL(ObBlockManager::read_block(read_info, block_handle))) { + LOG_WARN("fail to read block", K(ret), K(read_info)); + } else if (OB_UNLIKELY(!block_handle.is_valid() + || sstable.get_macro_read_size() != block_handle.get_data_size())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("block handle is invalid", K(ret), K(block_handle)); + } } } return ret; diff --git a/src/storage/blocksstable/ob_sstable.cpp b/src/storage/blocksstable/ob_sstable.cpp index e32d89e83..76eea461f 100644 --- a/src/storage/blocksstable/ob_sstable.cpp +++ b/src/storage/blocksstable/ob_sstable.cpp @@ -41,6 +41,8 @@ using namespace share; namespace blocksstable { +const char *DDL_EMPTY_SSTABLE_DUMMY_INDEX_DATA_BUF = "DO_NOT_VISIT"; +const int64_t DDL_EMPTY_SSTABLE_DUMMY_INDEX_DATA_SIZE = 13; void ObSSTableMetaHandle::reset() { handle_.reset(); @@ -59,7 +61,6 @@ int ObSSTableMetaHandle::get_sstable_meta(const ObSSTableMeta *&sstable_meta) co return ret; } - ObSSTableMetaCache::ObSSTableMetaCache() : header_(0), data_macro_block_count_(0), @@ -571,7 +572,7 @@ int ObSSTable::exist( || !context.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid arguments", K(ret), K(rowkey), K(param), K(context)); - } else if (meta_->is_empty()) { + } else if (no_data_to_read()) { is_exist = false; has_found = false; } else { @@ -634,7 +635,7 @@ int ObSSTable::exist(ObRowsInfo &rows_info, bool &is_exist, bool &all_rows_found } else if (OB_UNLIKELY(rows_info.tablet_id_ != key_.tablet_id_)) { ret = OB_ERR_SYS; LOG_ERROR("Tablet id not match", K(ret), K_(key), K(rows_info)); - } else if (is_empty()) { + } else if (no_data_to_read()) { // Skip } else if (rows_info.all_rows_found()) { all_rows_found = true; @@ -862,7 +863,7 @@ int ObSSTable::check_rows_locked( } else if (OB_UNLIKELY(!is_valid())) { ret = OB_NOT_INIT; LOG_WARN("The SSTable has not been inited", K(ret), K_(valid_for_reading), KP_(meta)); - } else if (meta_->is_empty() || (is_major_sstable() && !check_exist)) { + } else if (no_data_to_read() || (is_major_sstable() && !check_exist)) { } else if (!check_exist && get_upper_trans_version() <= snapshot_version.get_val_for_tx()) { if (max_trans_version.get_val_for_tx() < get_upper_trans_version()) { if (OB_FAIL(max_trans_version.convert_for_tx(get_upper_trans_version()))) { @@ -915,7 +916,7 @@ int ObSSTable::check_row_locked( if (OB_UNLIKELY(!is_valid())) { ret = OB_NOT_INIT; LOG_WARN("The SSTable has not been inited", K(ret), K_(key), K_(valid_for_reading), KPC_(meta)); - } else if (is_empty()) { + } else if (no_data_to_read()) { } else if (OB_FAIL(get_last_rowkey(sstable_endkey))) { LOG_WARN("Fail to get SSTable endkey", K(ret), KP_(meta)); } else if (OB_ISNULL(sstable_endkey)) { @@ -1659,13 +1660,18 @@ int ObSSTable::get_index_tree_root( if (OB_UNLIKELY(!is_valid())) { ret = OB_NOT_INIT; LOG_WARN("The SSTable has not been inited", K(ret), K_(valid_for_reading), K_(meta)); - } else if (OB_UNLIKELY(is_empty())) { + } else if (OB_UNLIKELY(no_data_to_read())) { index_data.reset(); ret = OB_ENTRY_NOT_EXIST; LOG_WARN("SSTable is empty", K(ret)); } else if (OB_UNLIKELY(!is_loaded())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("can not get index tree rot from an unloaded sstable", K(ret)); + } else if (is_ddl_merge_empty_sstable()) { + // mock here, skip valid_check + index_data.type_ = ObMicroBlockData::DDL_MERGE_INDEX_BLOCK; + index_data.buf_ = DDL_EMPTY_SSTABLE_DUMMY_INDEX_DATA_BUF; + index_data.size_ = DDL_EMPTY_SSTABLE_DUMMY_INDEX_DATA_SIZE; } else if (OB_UNLIKELY(!meta_->get_root_info().get_addr().is_valid() || !meta_->get_root_info().get_block_data().is_valid())) { ret = OB_STATE_NOT_MATCH; @@ -1680,6 +1686,9 @@ int ObSSTable::get_index_tree_root( ret = OB_ERR_UNEXPECTED; LOG_WARN("Shouldn't happen, transform has already been done in initialize,", K(ret), KPC(this)); } + if (OB_SUCC(ret) && is_ddl_merge_sstable()) { + index_data.type_ = ObMicroBlockData::DDL_MERGE_INDEX_BLOCK; + } return ret; } @@ -1796,6 +1805,9 @@ int ObSSTable::get_last_rowkey(const ObDatumRowkey *&sstable_endkey) } else if (OB_FAIL(block_meta_tree->get_last_rowkey(sstable_endkey))) { LOG_WARN("get last rowkey failed", K(ret)); } + } else if (is_ddl_merge_sstable()) { + //todo qilu: get endkey from sstable + ddl kv after ddl_kv_mgr refactor + sstable_endkey = &ObDatumRowkey::MAX_ROWKEY; } else { if (OB_ISNULL(idx_data_header = reinterpret_cast( root_block.get_extra_buf()))) { diff --git a/src/storage/blocksstable/ob_sstable.h b/src/storage/blocksstable/ob_sstable.h index 478b9cb14..dfc673a9d 100644 --- a/src/storage/blocksstable/ob_sstable.h +++ b/src/storage/blocksstable/ob_sstable.h @@ -36,6 +36,8 @@ class ObRowState; } namespace blocksstable { +extern const char *DDL_EMPTY_SSTABLE_DUMMY_INDEX_DATA_BUF; +extern const int64_t DDL_EMPTY_SSTABLE_DUMMY_INDEX_DATA_SIZE; class ObSSTableSecMetaIterator; class ObIMacroBlockIterator; struct ObMacroBlocksWriteCtx; @@ -226,9 +228,18 @@ public: { return 0 == meta_cache_.data_macro_block_count_; } + virtual bool no_data_to_read() const override + { + return is_empty() && !is_ddl_merge_sstable(); + } + virtual bool is_ddl_merge_empty_sstable() const override + { + return is_empty() && is_ddl_merge_sstable(); + } int set_addr(const ObMetaDiskAddr &addr); OB_INLINE const ObMetaDiskAddr &get_addr() const { return addr_; } OB_INLINE int64_t get_data_macro_block_count() const { return meta_cache_.data_macro_block_count_; } + OB_INLINE int64_t get_merged_row_count() const { return is_ddl_merge_empty_sstable() ? INT64_MAX : meta_cache_.row_count_; } // empty ddl_merge_sstable cannot speed up queries OB_INLINE int64_t get_macro_offset() const { return meta_cache_.nested_offset_; } OB_INLINE int64_t get_macro_read_size() const { return meta_cache_.nested_size_; } diff --git a/src/storage/blocksstable/ob_sstable_meta.h b/src/storage/blocksstable/ob_sstable_meta.h index 7363b382d..3f1aa6cd9 100644 --- a/src/storage/blocksstable/ob_sstable_meta.h +++ b/src/storage/blocksstable/ob_sstable_meta.h @@ -166,13 +166,14 @@ public: return basic_meta_.column_cnt_ - ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); } - OB_INLINE int16_t get_index_tree_height() const { return basic_meta_.data_index_tree_height_; } + OB_INLINE int16_t get_index_tree_height(const bool is_ddl_merge_empty_sstable) const { return is_ddl_merge_empty_sstable ? 2 : basic_meta_.data_index_tree_height_; } OB_INLINE ObSSTableStatus get_status() const { return static_cast(basic_meta_.status_); } OB_INLINE int64_t get_occupy_size() const { return basic_meta_.occupy_size_; } OB_INLINE int64_t get_row_count() const { return basic_meta_.row_count_; } + OB_INLINE int64_t get_end_row_id(const bool is_ddl_merge_empty_sstable) const { return is_ddl_merge_empty_sstable ? INT64_MAX : basic_meta_.row_count_ - 1; } OB_INLINE int64_t get_data_micro_block_count() const { return basic_meta_.get_data_micro_block_count(); diff --git a/src/storage/blocksstable/ob_tmp_file.cpp b/src/storage/blocksstable/ob_tmp_file.cpp index 6ab4b26e8..d0355ea83 100644 --- a/src/storage/blocksstable/ob_tmp_file.cpp +++ b/src/storage/blocksstable/ob_tmp_file.cpp @@ -463,7 +463,8 @@ ObTmpFileExtent::ObTmpFileExtent(ObTmpFile *file) g_offset_end_(0), owner_(file), block_id_(-1), - lock_(common::ObLatchIds::TMP_FILE_EXTENT_LOCK) + lock_(common::ObLatchIds::TMP_FILE_EXTENT_LOCK), + is_truncated_(false) { } @@ -561,6 +562,7 @@ void ObTmpFileExtent::reset() page_nums_ = 0; block_id_ = -1; ATOMIC_STORE(&is_closed_, false); + ATOMIC_STORE(&is_truncated_, false); } bool ObTmpFileExtent::is_valid() @@ -692,7 +694,7 @@ int ObTmpFileMeta::clear() for (int64_t i = extents_.count() - 1; OB_SUCC(ret) && i >= 0; --i) { tmp = extents_.at(i); if (NULL != tmp) { - if (!tmp->is_alloced()) { + if (!tmp->is_alloced() || tmp->is_truncated()) { // nothing to do. } else if (OB_FAIL(OB_TMP_FILE_STORE.free(tmp->get_owner().get_tenant_id(), tmp))) { STORAGE_LOG(WARN, "fail to free extents", K(ret)); @@ -720,7 +722,9 @@ ObTmpFile::ObTmpFile() tenant_id_(-1), lock_(common::ObLatchIds::TMP_FILE_LOCK), allocator_(NULL), - file_meta_() + file_meta_(), + read_guard_(0), + next_truncated_extent_id_(0) { } @@ -741,6 +745,8 @@ int ObTmpFile::clear() offset_ = 0; allocator_ = NULL; is_inited_ = false; + read_guard_ = 0; + next_truncated_extent_id_ = 0; } } return ret; @@ -853,6 +859,18 @@ int ObTmpFile::once_aio_read_batch( return ret; } +int ObTmpFile::fill_zero(char *buf, const int64_t size) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf) || size < 0) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "buf is null or size is negative", K(ret), K(size), KP(buf)); + } else { + MEMSET(buf, 0, size); + } + return ret; +} + int ObTmpFile::once_aio_read_batch_without_lock( const ObTmpFileIOInfo &io_info, int64_t &offset, @@ -879,9 +897,35 @@ int ObTmpFile::once_aio_read_batch_without_lock( read_size = remain_size; } // read from the extent. - if (OB_FAIL(tmp->read(io_info, offset - tmp->get_global_start(), read_size, buf, handle))) { - STORAGE_LOG(WARN, "fail to read the extent", K(ret), K(io_info), K(buf), KP_(io_info.buf)); + if (tmp->is_truncated()) { + if (read_guard_ < tmp->get_global_end()) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "extent is truncated but read_guard not set correctlly", K(ret), K(tmp), K(read_guard_)); + } else if (OB_FAIL(fill_zero(buf, read_size))) { + STORAGE_LOG(WARN, "fail to fill zero data to buf", K(ret)); + } + } else if (offset >= read_guard_) { + if (OB_FAIL(tmp->read(io_info, offset - tmp->get_global_start(), read_size, buf, handle))) { + STORAGE_LOG(WARN, "fail to read the extent", K(ret), K(io_info), K(buf), KP_(io_info.buf)); + } } else { + if (read_guard_ < offset + read_size) { + const int64_t zero_size = read_guard_ - offset; + const int64_t file_read_size = read_size - zero_size; + if (OB_FAIL(fill_zero(buf, zero_size))) { + STORAGE_LOG(WARN, "fail to read zero from truncated pos", K(ret)); + } else if (OB_FAIL(tmp->read(io_info, read_guard_ - tmp->get_global_start(), file_read_size, buf + zero_size, handle))) { + STORAGE_LOG(WARN, "fail to read the extent", K(ret), K(io_info), K(buf + zero_size), KP_(io_info.buf)); + } + } else { + // read 0; + if (OB_FAIL(fill_zero(buf, read_size))) { + STORAGE_LOG(WARN, "fail to read zero from truncated pos", K(ret), KP(buf), K(read_size)); + } + } + + } + if (OB_SUCC(ret)) { offset += read_size; remain_size -= read_size; buf += read_size; @@ -1258,6 +1302,66 @@ void ObTmpFile::get_file_size(int64_t &file_size) file_size = (nullptr == tmp) ? 0 : tmp->get_global_end(); } +/* + * to avoid truncating blocks that is using now (e.g., the io request is in io manager but not finish). + * we need to ensure there is no other file operation while calling truncate. + */ +int ObTmpFile::truncate(const int64_t offset) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(lock_); + // release extents + ObTmpFileExtent *tmp = nullptr; + //the extents before read_guard_ is truncated; + int64_t ith_extent = next_truncated_extent_id_; + common::ObIArray &extents = file_meta_.get_extents(); + STORAGE_LOG(INFO, "truncate ", K(offset), K(read_guard_), K(ith_extent)); + + if (OB_ISNULL(tmp = file_meta_.get_last_extent())) { + ret = OB_BAD_NULL_ERROR; + STORAGE_LOG(WARN, "fail to truncate, because the tmp file is empty", K(ret), KP(tmp)); + } else if (offset < 0 || offset > tmp->get_global_end()) { + ret = OB_INDEX_OUT_OF_RANGE; + STORAGE_LOG(WARN, "offset out of range", K(ret), K(tmp), K(offset)); + } + + while (OB_SUCC(ret) && ith_extent >= 0 + && ith_extent < extents.count()) { + tmp = extents.at(ith_extent); + if (tmp->get_global_start() >= offset) { + break; + } else if (!tmp->is_closed()) { + // for extent that is not closed, shouldn't truncate. + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "the truncate extent is not closed", K(ret)); + } else if (tmp->get_global_end() > offset) { + break; + } else { + // release space + if (!tmp->is_truncated()) { + tmp->set_truncated(); + if (OB_FAIL(OB_TMP_FILE_STORE.free(get_tenant_id(), tmp->get_block_id(), + tmp->get_start_page_id(), + tmp->get_page_nums()))) { + STORAGE_LOG(WARN, "fail to release space", K(ret), K(read_guard_), K(tmp)); + } + STORAGE_LOG(TRACE, "release extents", K(ith_extent), K(tmp->get_start_page_id()), K(tmp->get_page_nums())); + } + if (OB_SUCC(ret)) { + // if only part of extent is truncated, we only need to set the read_guard + ith_extent++; + } + } + } + + if (OB_SUCC(ret) && offset > read_guard_) { + read_guard_ = offset; + next_truncated_extent_id_ = ith_extent; + } + return ret; +} + int ObTmpFile::write_file_extent(const ObTmpFileIOInfo &io_info, ObTmpFileExtent *file_extent, int64_t &size, char *&buf) { @@ -1382,12 +1486,15 @@ int ObTmpFileManager::open(int64_t &fd, int64_t &dir) { int ret = OB_SUCCESS; ObTmpFile file; + common::ObIAllocator *allocator = nullptr; if (IS_NOT_INIT) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObTmpFileManager has not been inited", K(ret)); + } else if (OB_FAIL(OB_TMP_FILE_STORE.get_tenant_extent_allocator(MTL_ID(), allocator))) { + STORAGE_LOG(WARN, "fail to get extent allocator", K(ret)); } else if (OB_FAIL(get_next_fd(fd))) { STORAGE_LOG(WARN, "fail to get next fd", K(ret)); - } else if (OB_FAIL(file.init(fd, dir, files_.get_allocator()))) { + } else if (OB_FAIL(file.init(fd, dir, *allocator))) { STORAGE_LOG(WARN, "fail to open file", K(ret)); } else if (OB_FAIL(files_.set(fd, file))) { STORAGE_LOG(WARN, "fail to set tmp file", K(ret)); @@ -1553,6 +1660,21 @@ int ObTmpFileManager::seek(const int64_t fd, const int64_t offset, const int whe return ret; } +int ObTmpFileManager::truncate(const int64_t fd, const int64_t offset) +{ + int ret = OB_SUCCESS; + ObTmpFileHandle file_handle; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObTmpFileManager has not been inited", K(ret)); + } else if (OB_FAIL(files_.get(fd, file_handle))) { + STORAGE_LOG(WARN, "fail to get tmp file handle", K(ret), K(fd)); + } else if (OB_FAIL(file_handle.get_resource_ptr()->truncate(offset))) { + STORAGE_LOG(WARN, "fail to seek file", K(ret)); + } + return ret; +} + int ObTmpFileManager::get_tmp_file_handle(const int64_t fd, ObTmpFileHandle &handle) { int ret = OB_SUCCESS; diff --git a/src/storage/blocksstable/ob_tmp_file.h b/src/storage/blocksstable/ob_tmp_file.h index bd8b6fa8f..50ee560f6 100644 --- a/src/storage/blocksstable/ob_tmp_file.h +++ b/src/storage/blocksstable/ob_tmp_file.h @@ -180,6 +180,8 @@ public: int write(const ObTmpFileIOInfo &io_info, int64_t &size, char *&buf); void reset(); OB_INLINE bool is_closed() const { return ATOMIC_LOAD(&is_closed_); } + OB_INLINE bool is_truncated() const { return ATOMIC_LOAD(&is_truncated_); } + void set_truncated() { ATOMIC_STORE(&is_truncated_, true); } bool is_valid(); bool close(bool force = false); bool close(uint8_t &free_page_start_id, uint8_t &free_page_nums, bool force = false); @@ -216,6 +218,7 @@ private: ObTmpFile *owner_; int64_t block_id_; common::SpinRWLock lock_; + bool is_truncated_; DISALLOW_COPY_AND_ASSIGN(ObTmpFileExtent); }; @@ -265,6 +268,9 @@ public: int aio_write(const ObTmpFileIOInfo &io_info, ObTmpFileIOHandle &handle); int write(const ObTmpFileIOInfo &io_info); int seek(const int64_t offset, const int whence); + + // the data before the offset is released + int truncate(const int64_t offset); int clear(); int64_t get_dir_id() const; uint64_t get_tenant_id() const; @@ -283,6 +289,7 @@ public: TO_STRING_KV(K_(file_meta), K_(is_big), K_(tenant_id), K_(is_inited)); private: + static int fill_zero(char *buf, const int64_t size); int write_file_extent(const ObTmpFileIOInfo &io_info, ObTmpFileExtent *file_extent, int64_t &size, char *&buf); int aio_read_without_lock( @@ -314,6 +321,12 @@ private: common::ObIAllocator *allocator_; ObTmpFileMeta file_meta_; + // content before read_guard_ is truncated, which means the space is released. read before read_guard_ will only return 0; + int64_t read_guard_; + + // to optimize truncated speed, record the last_truncated_extent_id, so that we do not need to binary search the extent id every time we truncated. + int64_t next_truncated_extent_id_; + DISALLOW_COPY_AND_ASSIGN(ObTmpFile); }; @@ -357,6 +370,7 @@ public: // NOTE: // remove file and all of block in this file, after not used file, should be called in case // of block leak. + int truncate(const int64_t fd, const int64_t offset); int remove(const int64_t fd); int remove_tenant_file(const uint64_t tenant_id); diff --git a/src/storage/blocksstable/ob_tmp_file_store.cpp b/src/storage/blocksstable/ob_tmp_file_store.cpp index 9aaf50d92..1bd293b4a 100644 --- a/src/storage/blocksstable/ob_tmp_file_store.cpp +++ b/src/storage/blocksstable/ob_tmp_file_store.cpp @@ -802,7 +802,7 @@ int ObTmpTenantMacroBlockManager::get_macro_block(const int64_t block_id, ObTmpM ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), K(block_id)); } else if (OB_FAIL(blocks_.get_refactored(block_id, t_mblk))) { - STORAGE_LOG(WARN, "fail to get tmp macro block", K(ret)); + STORAGE_LOG(WARN, "fail to get tmp macro block", K(ret), K(block_id)); } return ret; } @@ -939,9 +939,10 @@ int ObTmpTenantFileStore::init(const uint64_t tenant_id) STORAGE_LOG(WARN, "ObTmpTenantFileStore has not been inited", K(ret)); } else if (OB_FAIL(allocator_.init(BLOCK_SIZE, ObModIds::OB_TMP_BLOCK_MANAGER, tenant_id, get_memory_limit(tenant_id)))) { STORAGE_LOG(WARN, "fail to init allocator", K(ret)); - } else if (OB_FAIL(io_allocator_.init(lib::ObMallocAllocator::get_instance(), - OB_MALLOC_MIDDLE_BLOCK_SIZE, - ObMemAttr(OB_SERVER_TENANT_ID, ObModIds::OB_TMP_PAGE_CACHE, ObCtxIds::DEFAULT_CTX_ID)))) { + } else if (OB_FAIL(io_allocator_.init( + lib::ObMallocAllocator::get_instance(), + OB_MALLOC_MIDDLE_BLOCK_SIZE, + ObMemAttr(tenant_id, ObModIds::OB_TMP_PAGE_CACHE, ObCtxIds::DEFAULT_CTX_ID)))) { STORAGE_LOG(WARN, "Fail to init io allocator, ", K(ret)); } else if (OB_ISNULL(page_cache_ = &ObTmpPageCache::get_instance())) { ret = OB_ERR_UNEXPECTED; @@ -1939,6 +1940,21 @@ int64_t ObTmpFileStore::get_next_blk_id() return next_blk_id; } +int ObTmpFileStore::get_tenant_extent_allocator(const int64_t tenant_id, common::ObIAllocator *&allocator) +{ + int ret = OB_SUCCESS; + ObTmpTenantFileStoreHandle store_handle; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObTmpFileStore has not been inited", K(ret), K(tenant_id)); + } else if (OB_FAIL(get_store(tenant_id, store_handle))) { + STORAGE_LOG(WARN, "fail to get tmp tenant file store", K(ret), K(tenant_id)); + } else { + allocator = &(store_handle.get_tenant_store()->get_extent_allocator()); + } + return ret; +} + void ObTmpFileStore::destroy() { ObTmpPageCache::get_instance().destroy(); diff --git a/src/storage/blocksstable/ob_tmp_file_store.h b/src/storage/blocksstable/ob_tmp_file_store.h index 5a4ac94e5..81f1fc4f3 100644 --- a/src/storage/blocksstable/ob_tmp_file_store.h +++ b/src/storage/blocksstable/ob_tmp_file_store.h @@ -255,6 +255,8 @@ public: int wait_write_finish(const int64_t block_id, const int64_t timeout_ms); int get_disk_macro_block_list(common::ObIArray ¯o_id_list); int get_macro_block(const int64_t block_id, ObTmpMacroBlock *&t_mblk); + // use io_allocator_ to allocate tenant extent memory. + common::ObIAllocator &get_extent_allocator() { return allocator_; } void print_block_usage() { tmp_block_manager_.print_block_usage(); } OB_INLINE void inc_page_cache_num(const int64_t num) { ATOMIC_FAA(&page_cache_num_, num); @@ -351,6 +353,7 @@ public: int get_macro_block_list(common::ObIArray &tmp_block_cnt_pairs); int get_all_tenant_id(common::ObIArray &tenant_ids); int64_t get_next_blk_id(); + int get_tenant_extent_allocator(const int64_t tenant_id, common::ObIAllocator *&allocator); static int64_t get_block_size() { diff --git a/src/storage/blockstore/ob_shared_block_reader_writer.cpp b/src/storage/blockstore/ob_shared_block_reader_writer.cpp index 79cf0c992..32483644f 100644 --- a/src/storage/blockstore/ob_shared_block_reader_writer.cpp +++ b/src/storage/blockstore/ob_shared_block_reader_writer.cpp @@ -1013,6 +1013,7 @@ int ObSharedBlockReaderWriter::async_read( ObMacroBlockHandle macro_handle; macro_read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_READ); macro_read_info.io_timeout_ms_ = read_info.io_timeout_ms_; + macro_read_info.io_desc_.set_group_id(ObIOModule::SHARED_BLOCK_RW_IO); macro_read_info.io_desc_.set_group_id(ObIOModule::SHARED_BLOCK_RW_IO); macro_read_info.io_callback_ = read_info.io_callback_; diff --git a/src/storage/column_store/ob_cg_aggregated_scanner.cpp b/src/storage/column_store/ob_cg_aggregated_scanner.cpp index a4e2372c2..f8b09ed0f 100644 --- a/src/storage/column_store/ob_cg_aggregated_scanner.cpp +++ b/src/storage/column_store/ob_cg_aggregated_scanner.cpp @@ -52,7 +52,7 @@ int ObCGAggregatedScanner::init( if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("The ObCGAggregatedScanner has been inited", K(ret)); - } else if (OB_UNLIKELY(!wrapper.is_valid() || !wrapper.get_sstable()->is_major_sstable() || + } else if (OB_UNLIKELY(!wrapper.is_valid() || !wrapper.get_sstable()->is_major_or_ddl_merge_sstable() || !iter_param.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), K(wrapper), K(iter_param)); diff --git a/src/storage/column_store/ob_cg_prefetcher.cpp b/src/storage/column_store/ob_cg_prefetcher.cpp index 7422affe5..0ce900a36 100644 --- a/src/storage/column_store/ob_cg_prefetcher.cpp +++ b/src/storage/column_store/ob_cg_prefetcher.cpp @@ -108,7 +108,7 @@ int ObCGPrefetcher::open_index_root() ObMicroIndexInfo index_info; index_info.is_root_ = true; index_info.cs_row_range_.start_row_id_ = 0; - index_info.cs_row_range_.end_row_id_ = sstable_meta_handle_.get_sstable_meta().get_row_count() - 1; + index_info.cs_row_range_.end_row_id_ = sstable_meta_handle_.get_sstable_meta().get_end_row_id(sstable_->is_ddl_merge_empty_sstable()); ObIndexTreeLevelHandle &tree_handle = tree_handles_[0]; if (OB_FAIL(sstable_->get_index_tree_root(index_block_))) { LOG_WARN("Fail to get index block root", K(ret)); @@ -120,7 +120,13 @@ int ObCGPrefetcher::open_index_root() true, true, &index_info))) { - LOG_WARN("Fail to open index scanner", K(ret), K(query_range_)); + if (OB_BEYOND_THE_RANGE != ret) { + LOG_WARN("Fail to open index scanner", K(ret), K(query_range_)); + } else { + // empty ddl_merge_sstable with empty ddl_kvs may return OB_BEYOND_THE_RANGE + ret = OB_SUCCESS; + is_prefetch_end_ = true; + } } else { tree_handle.fetch_idx_ = tree_handle.prefetch_idx_ = 0; tree_handle.is_prefetch_end_ = true; diff --git a/src/storage/column_store/ob_cg_scanner.cpp b/src/storage/column_store/ob_cg_scanner.cpp index 76b7ba29e..1c275ef5a 100644 --- a/src/storage/column_store/ob_cg_scanner.cpp +++ b/src/storage/column_store/ob_cg_scanner.cpp @@ -33,7 +33,7 @@ int ObCGScanner::init( if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("The ObCGScanner has been inited", K(ret)); - } else if (OB_UNLIKELY(!wrapper.is_valid() || !wrapper.get_sstable()->is_major_sstable() || + } else if (OB_UNLIKELY(!wrapper.is_valid() || !wrapper.get_sstable()->is_major_or_ddl_merge_sstable() || !iter_param.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument to init ObCGScanner", K(ret), K(wrapper), K(iter_param)); @@ -48,7 +48,7 @@ int ObCGScanner::init( } else { iter_param_ = &iter_param; access_ctx_ = &access_ctx; - sstable_row_cnt_ = sstable_->get_row_count(); + sstable_row_cnt_ = sstable_->get_merged_row_count(); is_reverse_scan_ = access_ctx.query_flag_.is_reverse_scan(); } @@ -71,7 +71,7 @@ int ObCGScanner::switch_context( if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("The ObCGScanner is not inited"); - } else if (OB_UNLIKELY(!wrapper.is_valid() || !wrapper.get_sstable()->is_major_sstable() || + } else if (OB_UNLIKELY(!wrapper.is_valid() || !wrapper.get_sstable()->is_major_or_ddl_merge_sstable() || !iter_param.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), K(wrapper), K(iter_param)); @@ -92,7 +92,7 @@ int ObCGScanner::switch_context( if (OB_SUCC(ret)) { iter_param_ = &iter_param; access_ctx_ = &access_ctx; - sstable_row_cnt_ = sstable_->get_row_count(); + sstable_row_cnt_ = sstable_->get_merged_row_count(); is_reverse_scan_ = access_ctx.query_flag_.is_reverse_scan(); } } @@ -146,7 +146,7 @@ int ObCGScanner::init_micro_scanner() { int ret = OB_SUCCESS; if (nullptr != micro_scanner_) { - } else if (OB_UNLIKELY(!sstable_->is_major_sstable())) { + } else if (OB_UNLIKELY(!sstable_->is_major_or_ddl_merge_sstable())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected not major sstable", K(ret), KPC_(sstable)); } else if (nullptr == (micro_scanner_ = OB_NEWx(ObMicroBlockRowScanner, diff --git a/src/storage/column_store/ob_co_sstable_row_scanner.cpp b/src/storage/column_store/ob_co_sstable_row_scanner.cpp index adbada443..c3d5a0b7e 100644 --- a/src/storage/column_store/ob_co_sstable_row_scanner.cpp +++ b/src/storage/column_store/ob_co_sstable_row_scanner.cpp @@ -333,7 +333,7 @@ int ObCOSSTableRowScanner::init_project_iter( int ret = OB_SUCCESS; ObCOSSTableV2* co_sstable = static_cast(table); common::ObSEArray iter_params; - if (OB_FAIL(construct_cg_iter_params(false, row_param, context, iter_params))) { + if (OB_FAIL(construct_cg_iter_params(row_param, context, iter_params))) { LOG_WARN("Failed to construct cg scan params", K(ret)); } else if (nullptr == project_iter_) { if (1 == iter_params.count()) { @@ -370,7 +370,7 @@ int ObCOSSTableRowScanner::init_project_iter_for_single_row( // use all cg if exists for getter } else if (OB_FAIL(init_fixed_array_param(getter_projector_, row_param.get_out_col_cnt()))) { LOG_WARN("Failed to reserve getter projector", K(ret)); - } else if (OB_FAIL(construct_cg_iter_params(true, row_param, context, iter_params))) { + } else if (OB_FAIL(construct_cg_iter_params_for_single_row(row_param, context, iter_params))) { LOG_WARN("Failed to construct cg scan params", K(ret)); } else if (iter_params.empty()) { if (OB_NOT_NULL(getter_project_iter_)) { @@ -399,8 +399,44 @@ int ObCOSSTableRowScanner::init_project_iter_for_single_row( return ret; } +int ObCOSSTableRowScanner::construct_cg_iter_params_for_single_row( + const ObTableIterParam &row_param, + ObTableAccessContext &context, + common::ObIArray &iter_params) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!row_param.is_valid() || nullptr == row_param.out_cols_project_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), K(row_param)); + } else { + ObTableIterParam* cg_param = nullptr; + const common::ObIArray *access_cgs = nullptr; + const int64_t schema_rowkey_cnt = row_param.get_read_info()->get_schema_rowkey_count(); + if (OB_ISNULL(access_cgs = row_param.get_read_info()->get_cg_idxs())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected null access cg index", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < row_param.out_cols_project_->count(); ++i) { + const int32_t col_offset = row_param.out_cols_project_->at(i); + sql::ObExpr* expr = row_param.output_exprs_ == nullptr ? nullptr : row_param.output_exprs_->at(i); + if ((nullptr == expr || !is_group_idx_expr(expr)) && col_offset >= schema_rowkey_cnt) { + int32_t cg_idx = access_cgs->at(col_offset); + if (OB_FAIL(cg_param_pool_->get_iter_param(cg_idx, row_param, expr, cg_param))) { + LOG_WARN("Fail to get cg iter param", K(ret), K(i), K(cg_idx), K(row_param), KPC(access_cgs)); + } else if (OB_FAIL(iter_params.push_back(cg_param))) { + LOG_WARN("Fail to push back cg iter param", K(ret), K(cg_param)); + } else if (OB_FAIL(getter_projector_.push_back(col_offset))) { + LOG_WARN("Fail to push back projector idx", K(ret)); + } + LOG_DEBUG("[COLUMNSTORE] cons one cg param", K(ret), K(cg_idx), KPC(cg_param)); + } + } + } + } + return ret; +} + int ObCOSSTableRowScanner::construct_cg_iter_params( - const bool project_single_row, const ObTableIterParam &row_param, ObTableAccessContext &context, common::ObIArray &iter_params) @@ -416,15 +452,14 @@ int ObCOSSTableRowScanner::construct_cg_iter_params( const common::ObIArray *access_cgs = nullptr; const int64_t schema_rowkey_cnt = row_param.get_read_info()->get_schema_rowkey_count(); // Assert only one column in one column group - if (!project_single_row && row_param.enable_pd_aggregate()) { + if (row_param.enable_pd_aggregate()) { if (OB_FAIL(construct_cg_agg_iter_params(row_param, context, iter_params))) { LOG_WARN("Fail to cons agg iter_params", K(ret)); } } else if (0 == row_param.output_exprs_->count()) { const uint32_t cg_idx = OB_CS_VIRTUAL_CG_IDX; - if (project_single_row) { - } else if (OB_FAIL(cg_param_pool_->get_iter_param(cg_idx, row_param, *row_param.output_exprs_, - cg_param, row_param.enable_pd_aggregate()))) { + if (OB_FAIL(cg_param_pool_->get_iter_param(cg_idx, row_param, *row_param.output_exprs_, + cg_param, row_param.enable_pd_aggregate()))) { LOG_WARN("Fail to get cg iter param", K(ret), K(cg_idx), K(row_param)); } else if (OB_FAIL(iter_params.push_back(cg_param))) { LOG_WARN("Fail to push back cg iter param", K(ret), K(cg_param)); @@ -436,18 +471,14 @@ int ObCOSSTableRowScanner::construct_cg_iter_params( int32_t idx = 0; for (int64_t i = 0; OB_SUCC(ret) && i < row_param.output_exprs_->count(); ++i) { const int32_t col_offset = row_param.out_cols_project_->at(i); - const bool need_iter_param = project_single_row ? (col_offset >= schema_rowkey_cnt) - : (nullptr == row_param.output_sel_mask_ || row_param.output_sel_mask_->at(i)); + const bool need_iter_param = (nullptr == row_param.output_sel_mask_ || row_param.output_sel_mask_->at(i)); if (!is_group_idx_expr(row_param.output_exprs_->at(i)) && need_iter_param) { int32_t cg_idx = access_cgs->at(row_param.out_cols_project_->at(i)); if (OB_FAIL(cg_param_pool_->get_iter_param(cg_idx, row_param, row_param.output_exprs_->at(i), cg_param))) { LOG_WARN("Fail to get cg iter param", K(ret), K(i), K(cg_idx), K(row_param), KPC(access_cgs)); } else if (OB_FAIL(iter_params.push_back(cg_param))) { LOG_WARN("Fail to push back cg iter param", K(ret), K(cg_param)); - } else if (project_single_row && OB_FAIL(getter_projector_.push_back(col_offset))) { - LOG_WARN("Fail to push back projector idx", K(ret)); - } else if (!project_single_row && - row_param.enable_pd_group_by() && + } else if (row_param.enable_pd_group_by() && row_param.out_cols_project_->at(i) == row_param.group_by_cols_project_->at(0)) { group_by_project_idx_ = idx; } diff --git a/src/storage/column_store/ob_co_sstable_row_scanner.h b/src/storage/column_store/ob_co_sstable_row_scanner.h index 413de801b..f944ddc13 100644 --- a/src/storage/column_store/ob_co_sstable_row_scanner.h +++ b/src/storage/column_store/ob_co_sstable_row_scanner.h @@ -106,8 +106,11 @@ private: ObTableAccessContext &context, ObITable *table); int extract_group_by_iters(); + int construct_cg_iter_params_for_single_row( + const ObTableIterParam &row_param, + ObTableAccessContext &context, + common::ObIArray &iter_params); int construct_cg_iter_params( - const bool project_single_row, const ObTableIterParam &row_param, ObTableAccessContext &context, common::ObIArray &iter_params); diff --git a/src/storage/column_store/ob_column_oriented_sstable.cpp b/src/storage/column_store/ob_column_oriented_sstable.cpp index d1ae7c781..9ed16823a 100644 --- a/src/storage/column_store/ob_column_oriented_sstable.cpp +++ b/src/storage/column_store/ob_column_oriented_sstable.cpp @@ -264,7 +264,7 @@ int ObCOSSTableV2::build_cs_meta() && ObCOSSTableBaseType::ROWKEY_CG_TYPE == base_type_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected rowkey cg table", K(ret), K(base_type_), KPC(cg_sstable)); - } else if (OB_UNLIKELY(cg_sstable->get_snapshot_version() != get_snapshot_version())) { + } else if (OB_UNLIKELY(cg_sstable->get_end_scn() != get_end_scn())) { // ddl sstable may only contain partial data ret = OB_ERR_UNEXPECTED; LOG_WARN("the snapshot version of cg sstables must be equal", K(ret)); } else if (OB_FAIL(cg_sstable->get_meta(cg_meta_handle))) { @@ -272,7 +272,7 @@ int ObCOSSTableV2::build_cs_meta() } else if (OB_UNLIKELY(cg_meta_handle.get_sstable_meta().get_schema_version() != meta_->get_schema_version())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("the schema version of cg sstables must be equal", K(ret), KPC(meta_), K(cg_meta_handle)); - } else if (OB_UNLIKELY(cg_meta_handle.get_sstable_meta().get_row_count() != meta_->get_row_count())) { + } else if (OB_UNLIKELY(cg_sstable->is_major_sstable() && cg_meta_handle.get_sstable_meta().get_row_count() != meta_->get_row_count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("the row count of cg sstables must be equal", K(ret), KPC(cg_sstable), KPC(meta_), K(cg_meta_handle)); } else { @@ -473,7 +473,7 @@ int ObCOSSTableV2::deep_copy( int ObCOSSTableV2::fetch_cg_sstable( const uint32_t cg_idx, - ObSSTableWrapper &cg_wrapper) + ObSSTableWrapper &cg_wrapper) const { int ret = OB_SUCCESS; cg_wrapper.reset(); @@ -522,7 +522,7 @@ int ObCOSSTableV2::get_cg_sstable( const ObSSTableArray &cg_sstables = co_meta_handle.get_sstable_meta().get_cg_sstables(); cg_wrapper.sstable_ = cg_idx < key_.column_group_idx_ ? cg_sstables[cg_idx] - : cg_sstables[cg_idx - 1]; + : cg_sstables[cg_idx - 1]; // deal with that the rowkey/all cg idx is at the middle when add column online } if (OB_FAIL(ret)) { diff --git a/src/storage/column_store/ob_column_oriented_sstable.h b/src/storage/column_store/ob_column_oriented_sstable.h index 128497ffd..0c93fef62 100644 --- a/src/storage/column_store/ob_column_oriented_sstable.h +++ b/src/storage/column_store/ob_column_oriented_sstable.h @@ -126,7 +126,7 @@ public: } int fetch_cg_sstable( const uint32_t cg_idx, - ObSSTableWrapper &cg_wrapper); + ObSSTableWrapper &cg_wrapper) const; int get_cg_sstable(const uint32_t cg_idx, ObSSTableWrapper &cg_wrapper) const; int get_all_tables(common::ObIArray &table_wrappers) const; diff --git a/src/storage/compaction/ob_compaction_util.h b/src/storage/compaction/ob_compaction_util.h index f6f22910e..9ef1cd507 100644 --- a/src/storage/compaction/ob_compaction_util.h +++ b/src/storage/compaction/ob_compaction_util.h @@ -26,7 +26,7 @@ enum ObMergeType MINI_MERGE = 3, // mini merge, only flush memtable MAJOR_MERGE = 4, MEDIUM_MERGE = 5, - DDL_KV_MERGE = 6, + DDL_KV_MERGE = 6, // only use for ddl dag BACKFILL_TX_MERGE = 7, MDS_TABLE_MERGE = 8, MERGE_TYPE_MAX diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index 9ae8a66a1..ba4ddad05 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -1157,37 +1157,43 @@ int ObTenantTabletScheduler::schedule_tablet_minor_merge( return ret; } -int ObTenantTabletScheduler::schedule_tablet_ddl_major_merge(ObTabletHandle &tablet_handle) +int ObTenantTabletScheduler::schedule_tablet_ddl_major_merge( + const share::ObLSID &ls_id, + ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; - ObDDLKvMgrHandle kv_mgr_handle; - if (!tablet_handle.is_valid()) { + ObDDLTableMergeDagParam param; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + bool is_major_sstable_exist = false; + if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_handle.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(tablet_handle)); + LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_handle)); } else if (tablet_handle.get_obj()->get_tablet_meta().has_transfer_table()) { if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { LOG_INFO("The tablet in the transfer process does not do ddl major_merge", K(tablet_handle)); } - } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(kv_mgr_handle))) { - if (OB_ENTRY_NOT_EXIST != ret) { - LOG_WARN("get ddl kv mgr failed", K(ret), K(tablet_handle)); - } else { + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + ls_id, + tablet_handle.get_obj()->get_tablet_meta().tablet_id_, + true, /* is_full_direct_load */ + direct_load_mgr_handle, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { ret = OB_SUCCESS; - } - } else if (kv_mgr_handle.is_valid()) { - ObDDLTableMergeDagParam param; - if (OB_FAIL(kv_mgr_handle.get_obj()->get_ddl_major_merge_param(*tablet_handle.get_obj(), param))) { - if (OB_EAGAIN != ret) { - LOG_WARN("failed to get ddl major merge param", K(ret)); - } - } else if (OB_FAIL(kv_mgr_handle.get_obj()->freeze_ddl_kv(*tablet_handle.get_obj()))) { - LOG_WARN("failed to freeze ddl kv", K(ret)); - } else if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { - if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { - LOG_WARN("schedule ddl merge dag failed", K(ret), K(param)); - } } else { - LOG_INFO("schedule ddl merge task for major sstable success", K(param)); + LOG_WARN("get tablet direct load mgr failed", K(ret), "tablet_id", tablet_handle.get_obj()->get_tablet_meta().tablet_id_); + } + } else if (OB_FAIL(direct_load_mgr_handle.get_full_obj()->prepare_ddl_merge_param(*tablet_handle.get_obj(), param))) { + if (OB_EAGAIN != ret) { + LOG_WARN("prepare major merge param failed", K(ret), "tablet_id", tablet_handle.get_obj()->get_tablet_meta().tablet_id_); + } + } else if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { + if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { + LOG_WARN("schedule ddl merge dag failed", K(ret), K(param)); } } return ret; @@ -1320,7 +1326,7 @@ int ObTenantTabletScheduler::schedule_tablet_minor( } } if (!tablet_id.is_ls_inner_tablet()) { // data tablet - if (OB_TMP_FAIL(schedule_tablet_ddl_major_merge(tablet_handle))) { + if (OB_TMP_FAIL(schedule_tablet_ddl_major_merge(ls_id, tablet_handle))) { if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { LOG_WARN("failed to schedule tablet ddl merge", K(tmp_ret), K(ls_id), K(tablet_handle)); } diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.h b/src/storage/compaction/ob_tenant_tablet_scheduler.h index bdbbfa87f..656957da4 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.h +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.h @@ -20,6 +20,7 @@ #include "storage/compaction/ob_tablet_merge_task.h" #include "storage/compaction/ob_partition_merge_policy.h" #include "storage/compaction/ob_tenant_medium_checker.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "lib/hash/ob_hashset.h" #include "storage/compaction/ob_tenant_tablet_scheduler_task_mgr.h" #include "storage/compaction/ob_compaction_schedule_iterator.h" @@ -220,6 +221,7 @@ public: const ObMergeType merge_type, const int64_t &merge_snapshot_version); static int schedule_tablet_ddl_major_merge( + const share::ObLSID &ls_id, ObTabletHandle &tablet_handle); int get_min_dependent_schema_version(int64_t &min_schema_version); diff --git a/src/storage/ddl/ob_build_index_task.cpp b/src/storage/ddl/ob_build_index_task.cpp index fe34ff379..916ab6b92 100644 --- a/src/storage/ddl/ob_build_index_task.cpp +++ b/src/storage/ddl/ob_build_index_task.cpp @@ -155,18 +155,20 @@ int ObUniqueIndexChecker::scan_table_with_column_checksum( transaction::ObTransService *trans_service = nullptr; ObTabletTableIterator iterator; ObQueryFlag query_flag(ObQueryFlag::Forward, - true, /*is daily merge scan*/ - true, /*is read multiple macro block*/ - false, /*sys task scan, read one macro block in single io*/ - false, /*is full row scan?*/ - false, - false); + false, /* daily merge*/ + true, /* use *optimize */ + false, /* use whole macro scan*/ + false, /* not full row*/ + false, /* not index_back*/ + false);/* query stat */ + query_flag.disable_cache(); query_flag.skip_read_lob_ = 1; ObDatumRange range; bool allow_not_ready = false; ObArray need_reshape; ObLSHandle ls_handle; range.set_whole_range(); + if (OB_ISNULL(trans_service = MTL(transaction::ObTransService*))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("trans_service is null", K(ret)); @@ -352,6 +354,7 @@ int ObUniqueIndexChecker::scan_main_table_with_column_checksum( param.org_col_ids_ = &org_col_ids; param.output_projector_ = &output_projector; param.is_scan_index_ = false; + STORAGE_LOG(INFO, "scan main table column checksum", K(col_ids), K(org_col_ids)); if (OB_FAIL(scan_table_with_column_checksum(param, column_checksum, row_count))) { STORAGE_LOG(WARN, "fail to scan table with column checksum", K(ret)); diff --git a/src/storage/ddl/ob_complement_data_task.cpp b/src/storage/ddl/ob_complement_data_task.cpp index f6efa6f6c..76dcad66d 100644 --- a/src/storage/ddl/ob_complement_data_task.cpp +++ b/src/storage/ddl/ob_complement_data_task.cpp @@ -40,6 +40,7 @@ #include "storage/tx/ob_trans_service.h" #include "storage/lob/ob_lob_util.h" #include "logservice/ob_log_service.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" #include "observer/ob_server_event_history_table_operator.h" @@ -290,10 +291,12 @@ int ObComplementDataParam::get_hidden_table_key(ObITable::TableKey &table_key) c return ret; } -int ObComplementDataContext::init(const ObComplementDataParam ¶m, const ObDataStoreDesc &desc) +int ObComplementDataContext::init(const ObComplementDataParam ¶m, const blocksstable::ObDataStoreDesc &desc) { int ret = OB_SUCCESS; void *builder_buf = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; const ObSSTable *first_major_sstable = nullptr; ObTabletMemberWrapper table_store_wrapper; if (OB_UNLIKELY(is_inited_)) { @@ -302,6 +305,16 @@ int ObComplementDataContext::init(const ObComplementDataParam ¶m, const ObDa } else if (OB_UNLIKELY(!param.is_valid() || !desc.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(param), K(desc)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(param.dest_ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(param)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + param.dest_tablet_id_, + tablet_handle, + ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("get tablet handle failed", K(ret), K(param)); + } else if (OB_UNLIKELY(nullptr == tablet_handle.get_obj())) { + ret = OB_ERR_SYS; + LOG_WARN("tablet handle is null", K(ret), K(param)); } else if (OB_FAIL(ObTabletDDLUtil::check_and_get_major_sstable(param.dest_ls_id_, param.dest_tablet_id_, first_major_sstable, table_store_wrapper))) { LOG_WARN("check if major sstable exist failed", K(ret), K(param)); } else if (OB_FAIL(data_sstable_redo_writer_.init(param.dest_ls_id_, @@ -320,6 +333,30 @@ int ObComplementDataContext::init(const ObComplementDataParam ¶m, const ObDa ObSSTableIndexBuilder::DISABLE))) { LOG_WARN("failed to init index builder", K(ret), K(desc)); } else { + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletFullDirectLoadMgr *tablet_direct_load_mgr = nullptr; + ObTabletDirectLoadInsertParam direct_load_param; + direct_load_param.is_replay_ = false; + direct_load_param.common_param_.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_DDL; + direct_load_param.common_param_.data_format_version_ = param.data_format_version_; + direct_load_param.common_param_.read_snapshot_ = param.snapshot_version_; + direct_load_param.common_param_.ls_id_ = param.dest_ls_id_; + direct_load_param.common_param_.tablet_id_ = param.dest_tablet_id_; + direct_load_param.runtime_only_param_.exec_ctx_ = nullptr; + direct_load_param.runtime_only_param_.task_id_ = param.task_id_; + direct_load_param.runtime_only_param_.table_id_ = param.dest_table_id_; + direct_load_param.runtime_only_param_.schema_version_ = param.dest_schema_version_; + direct_load_param.runtime_only_param_.task_cnt_ = 1; // default value. + if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->alloc_execution_context_id(context_id_))) { + LOG_WARN("alloc execution context id failed", K(ret)); + } else if (OB_FAIL(tenant_direct_load_mgr->create_tablet_direct_load(context_id_, param.execution_id_, direct_load_param))) { + LOG_WARN("create tablet manager failed", K(ret)); + } + } + if (OB_SUCC(ret)) { is_major_sstable_exist_ = nullptr != first_major_sstable ? true : false; concurrent_cnt_ = param.concurrent_cnt_; is_inited_ = true; @@ -341,6 +378,8 @@ int ObComplementDataContext::write_start_log(const ObComplementDataParam ¶m) { int ret = OB_SUCCESS; ObITable::TableKey hidden_table_key; + SCN start_scn; + ObTabletDirectLoadMgrHandle handle; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObComplementDataContext not init", K(ret)); @@ -352,11 +391,16 @@ int ObComplementDataContext::write_start_log(const ObComplementDataParam ¶m) } else if (OB_UNLIKELY(!hidden_table_key.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid table key", K(ret), K(hidden_table_key)); - } else if (OB_FAIL(data_sstable_redo_writer_.start_ddl_redo(hidden_table_key, param.task_id_, - param.execution_id_, param.data_format_version_, ddl_kv_mgr_handle_))) { - LOG_WARN("fail write start log", K(ret), K(hidden_table_key), K(param)); } else { - LOG_INFO("complement task start ddl redo success", K(hidden_table_key)); + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->open_tablet_direct_load(true, /*is_full_direct_load*/ + param.dest_ls_id_, param.dest_tablet_id_, context_id_, start_scn, handle))) { + LOG_WARN("write ddl start log failed", K(ret)); + } + LOG_INFO("complement task start ddl redo success", K(ret), K(param)); } return ret; } @@ -419,8 +463,9 @@ void ObComplementDataContext::destroy() allocator_.free(index_builder_); index_builder_ = nullptr; } - ddl_kv_mgr_handle_.reset(); + tablet_direct_load_mgr_handle_.reset(); allocator_.reset(); + context_id_ = 0; } ObComplementDataDag::ObComplementDataDag() @@ -747,6 +792,7 @@ int ObComplementWriteTask::init(const int64_t task_id, ObComplementDataParam &pa ObComplementDataContext &context) { int ret = OB_SUCCESS; + int64_t schema_stored_column_cnt = 0; ObSchemaGetterGuard schema_guard; const ObTableSchema *hidden_table_schema = nullptr; if (OB_UNLIKELY(is_inited_)) { @@ -764,8 +810,10 @@ int ObComplementWriteTask::init(const int64_t task_id, ObComplementDataParam &pa } else if (OB_ISNULL(hidden_table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("hidden table schema not exist", K(ret), K(param)); + } else if (OB_FAIL(hidden_table_schema->get_store_column_count(schema_stored_column_cnt))) { + LOG_WARN("get stored column cnt failed", K(ret)); } else if (OB_FAIL(write_row_.init( - param.allocator_, hidden_table_schema->get_column_count() + storage::ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()))) { + param.allocator_, schema_stored_column_cnt + storage::ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()))) { LOG_WARN("Fail to init write row", K(ret)); } else { write_row_.row_flag_.set_flag(ObDmlFlag::DF_INSERT); @@ -943,9 +991,9 @@ int ObComplementWriteTask::local_scan_by_range() int ret = OB_SUCCESS; int64_t start_time = ObTimeUtility::current_time(); int64_t concurrent_cnt = 0; - if (OB_ISNULL(param_) || OB_UNLIKELY(!param_->is_valid())) { + if (OB_ISNULL(param_) || OB_ISNULL(context_) || OB_UNLIKELY(!param_->is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(idx), KP(param_)); + LOG_WARN("invalid arguments", K(ret), KPC(param_), KPC(context_)); } else { concurrent_cnt = param_->concurrent_cnt_; LOG_INFO("start to do local scan by range", K(task_id_), K(concurrent_cnt), KPC(param_)); @@ -1074,9 +1122,9 @@ int ObComplementWriteTask::remote_scan() { int ret = OB_SUCCESS; const int64_t start_time = ObTimeUtility::current_time(); - if (OB_ISNULL(param_) || OB_UNLIKELY(!param_->is_valid())) { + if (OB_ISNULL(param_) || OB_ISNULL(context_) || OB_UNLIKELY(!param_->is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(idx), KP(param_)); + LOG_WARN("invalid arguments", K(ret), KPC(param_), KPC(context_)); } else if (OB_FAIL(generate_col_param())) { LOG_WARN("fail to get column ids", K(ret)); } else if (OB_FAIL(do_remote_scan())) { @@ -1133,16 +1181,76 @@ int ObComplementWriteTask::add_extra_rowkey(const int64_t rowkey_cnt, return ret; } +int ObComplementWriteTask::append_lob( + const int64_t schema_rowkey_cnt, + const int64_t extra_rowkey_cnt, + ObDDLInsertRowIterator &iterator, + ObArenaAllocator &lob_allocator) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(extra_rowkey_cnt + org_col_ids_.count() != write_row_.count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(schema_rowkey_cnt), K(extra_rowkey_cnt), K(org_col_ids_), K(write_row_)); + } else { + ObArray lob_column_idxs; + ObArray col_types; + lob_column_idxs.set_attr(ObMemAttr(param_->dest_tenant_id_, "DL_lob_idxs")); + col_types.set_attr(ObMemAttr(param_->dest_tenant_id_, "DL_col_types")); + const int64_t storage_rowkey_cnt = schema_rowkey_cnt + extra_rowkey_cnt; + for (int64_t i = 0; OB_SUCC(ret) && i < write_row_.count_; i++) { + int64_t index = 0; + ObStorageDatum &datum = write_row_.storage_datums_[i]; + if (i < storage_rowkey_cnt || datum.is_nop() || datum.is_null()) { + // do nothing + } else if (OB_UNLIKELY((index = i - extra_rowkey_cnt) >= org_col_ids_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(index), K(extra_rowkey_cnt), K(org_col_ids_)); + } else if (!org_col_ids_.at(index).col_type_.is_lob_storage()) { + // not lob. + } else if (OB_FAIL(lob_column_idxs.push_back(i))) { + LOG_WARN("fail to push back storage_index", K(ret), K(i)); + } else if (OB_FAIL(col_types.push_back(org_col_ids_.at(index).col_type_))) { + LOG_WARN("fail to push back col_type", K(ret), K(index), K(org_col_ids_.at(index))); + } + } + if (OB_FAIL(ret)) { + } else if (lob_column_idxs.empty()) { + // no lob. + } else if (iterator.get_lob_id_cache().remain_count() < lob_column_idxs.count() + && OB_FAIL(iterator.switch_to_new_lob_slice())) { + LOG_WARN("switch to new lob slice failed", K(ret), K(iterator)); + } else { + lob_allocator.reuse(); + ObDirectLoadSliceInfo slice_info; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = param_->dest_ls_id_; + slice_info.data_tablet_id_ = param_->dest_tablet_id_; + slice_info.slice_id_ = iterator.get_lob_slice_id(); + slice_info.context_id_ = context_->context_id_; + if (OB_FAIL(MTL(ObTenantDirectLoadMgr *)->fill_lob_sstable_slice(lob_allocator, slice_info, + iterator.get_lob_id_cache(), lob_column_idxs, col_types, write_row_))) { + LOG_WARN("fill batch lob sstable slice failed", K(ret), K(slice_info), K(write_row_)); + } + } + } + return ret; +} + int ObComplementWriteTask::append_row(ObScan *scan) { int ret = OB_SUCCESS; - ObWholeDataStoreDesc data_desc(true/*is_ddl*/); - HEAP_VARS_3((ObMacroBlockWriter, writer), + HEAP_VARS_4((ObMacroBlockWriter, writer), (ObSchemaGetterGuard, schema_guard), - (ObRelativeTable, relative_table)) { + (ObRelativeTable, relative_table), + (blocksstable::ObNewRowBuilder, new_row_builder)) { + HEAP_VAR(ObWholeDataStoreDesc, data_desc, true) { ObArray report_col_checksums; ObArray report_col_ids; - ObDDLSSTableRedoWriter sstable_redo_writer; + ObDDLRedoLogWriter sstable_redo_writer; ObDDLRedoLogWriterCallback callback; ObITable::TableKey hidden_table_key; ObMacroDataSeq macro_start_seq(0); @@ -1162,14 +1270,22 @@ int ObComplementWriteTask::append_row(ObScan *scan) int64_t rowkey_column_cnt = 0; const int64_t extra_rowkey_cnt = storage::ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); bool ddl_committed = false; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle direct_load_hdl; + bool is_major_sstable_exist = false; + ObDDLInsertRowIterator row_iter(nullptr/*ObPxMultiPartSSTableInsertOp*/, false/*is_slice_empty*/, + param_->dest_ls_id_, param_->dest_tablet_id_, 0/*unused_rowkey_num*/, param_->snapshot_version_, context_->context_id_); blocksstable::ObNewRowBuilder new_row_builder; int64_t lob_inrow_threshold = OB_DEFAULT_LOB_INROW_THRESHOLD; - if (OB_UNLIKELY(!is_inited_)) { + if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObComplementWriteTask is not inited", K(ret)); } else if (OB_ISNULL(param_) || OB_ISNULL(scan) || OB_UNLIKELY(!param_->is_valid()) || OB_ISNULL(context_)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), KPC(param_), KPC(context_)); + LOG_WARN("invalid arguments", K(ret)); } else if (OB_FAIL(macro_start_seq.set_parallel_degree(task_id_))) { LOG_WARN("set parallel degree failed", K(ret), K(task_id_)); } else { @@ -1201,9 +1317,27 @@ int ObComplementWriteTask::append_row(ObScan *scan) } else if (OB_UNLIKELY(nullptr == static_cast(get_dag()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("the dag of this task is null", K(ret)); - } else if (FALSE_IT(sstable_redo_writer.set_start_scn( - static_cast(get_dag())->get_context().data_sstable_redo_writer_.get_start_scn()))) { - } else if (OB_FAIL(callback.init(DDL_MB_DATA_TYPE, hidden_table_key, param_->task_id_, &sstable_redo_writer, context_->ddl_kv_mgr_handle_))) { + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + param_->dest_ls_id_, + param_->dest_tablet_id_, + true, /* is_full_direct_load */ + direct_load_hdl, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + LOG_INFO("major sstable already exist", K(ret), KPC(param_)); + } else { + LOG_WARN("get tablet mgr failed", K(ret), KPC(param_)); + } + } else if (OB_UNLIKELY(!direct_load_hdl.get_full_obj()->get_start_scn().is_valid_and_not_min())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(direct_load_hdl.get_full_obj()->get_start_scn())); + } else if (OB_FAIL(callback.init(DDL_MB_DATA_TYPE, + hidden_table_key, + param_->task_id_, + direct_load_hdl.get_full_obj()->get_start_scn(), + param_->data_format_version_, + &sstable_redo_writer))) { LOG_WARN("fail to init data callback", K(ret), K(hidden_table_key)); } else if (OB_FAIL(writer.open(data_desc.get_desc(), macro_start_seq, &callback))) { LOG_WARN("fail to open macro block writer", K(ret), K(data_desc)); @@ -1255,27 +1389,10 @@ int ObComplementWriteTask::append_row(ObScan *scan) } else if (OB_ISNULL(tmp_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tmp_row is nullptr", K(ret)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < org_col_ids_.count(); i++) { - ObStorageDatum &datum = tmp_row->storage_datums_[i]; - if (datum.is_nop() || datum.is_null()) { - // do nothing - } else if (org_col_ids_.at(i).col_type_.is_lob_storage()) { - lob_cnt++; - const int64_t timeout_ts = ObTimeUtility::current_time() + 60000000; // 60s - ObLobStorageParam lob_storage_param; - lob_storage_param.inrow_threshold_ = lob_inrow_threshold; - if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( - lob_allocator, param_->dest_ls_id_, param_->dest_tablet_id_, - org_col_ids_.at(i), lob_storage_param, datum, timeout_ts, true, param_->orig_tenant_id_))) { - LOG_WARN("fail to insert_lob_col", K(ret), K(datum)); - } - } - } - if (OB_FAIL(ret)) { - // do nothing } else if (OB_FAIL(add_extra_rowkey(rowkey_column_cnt, extra_rowkey_cnt, *tmp_row))) { LOG_WARN("fail to add extra rowkey", K(ret)); + } else if (OB_FAIL(append_lob(rowkey_column_cnt, extra_rowkey_cnt, row_iter, lob_allocator))) { + LOG_WARN("append lob into macro block failed", K(ret)); } else if (OB_FAIL(new_row_builder.build(write_row_, tmp_store_row))) { } else if (OB_FAIL(ObRowReshapeUtil::reshape_table_rows( tmp_store_row, reshape_ptr, cols_desc.count(), &reshaped_row, 1, sql_mode_for_ddl_reshape))) { @@ -1322,6 +1439,9 @@ int ObComplementWriteTask::append_row(ObScan *scan) } if (OB_ITER_END == ret) { ret = OB_SUCCESS; + if (row_iter.get_lob_slice_id() > 0 && OB_FAIL(row_iter.close_lob_sstable_slice())) { + LOG_WARN("close lob sstable slice failed", K(ret)); + } } LOG_INFO("print append row to macro block cost time", K(ret), K(task_id_), K(context_->row_inserted_), K(get_next_row_time), K(append_row_time)); @@ -1368,6 +1488,7 @@ int ObComplementWriteTask::append_row(ObScan *scan) } } } + } return ret; } @@ -1404,8 +1525,6 @@ int ObComplementMergeTask::process() int tmp_ret = OB_SUCCESS; ObIDag *tmp_dag = get_dag(); ObComplementDataDag *dag = nullptr; - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; ObTablet *tablet = nullptr; ObArray report_col_checksums; ObArray report_col_ids; @@ -1475,6 +1594,7 @@ int ObComplementMergeTask::add_build_hidden_table_sstable() ObLSHandle ls_handle; ObITable::TableKey hidden_table_key; SCN commit_scn; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObComplementMergetask has not been inited", K(ret)); @@ -1487,10 +1607,14 @@ int ObComplementMergeTask::add_build_hidden_table_sstable() LOG_WARN("failed to get log stream", K(ret), K(param_->dest_ls_id_)); } else if (OB_FAIL(param_->get_hidden_table_key(hidden_table_key))) { LOG_WARN("fail to get hidden table key", K(ret), K(hidden_table_key)); - } else if (OB_FAIL(context_->data_sstable_redo_writer_.end_ddl_redo_and_create_ddl_sstable( - param_->dest_ls_id_, hidden_table_key, param_->dest_table_id_, param_->execution_id_, param_->task_id_))) { - LOG_WARN("failed to end ddl redo", K(ret)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->close_tablet_direct_load(context_->context_id_, true, /*is_full_direct_load*/ + param_->dest_ls_id_, param_->dest_tablet_id_, true /*need_commit*/, true /*emergent_finish*/))) { + LOG_WARN("close tablet direct load failed", K(ret), KPC(param_)); } + return ret; } @@ -1731,7 +1855,9 @@ int ObLocalScan::construct_access_param( read_info_.reset(); ObArray cols_index; ObArray tmp_col_ids; + ObArray cg_idxs; bool is_oracle_mode = false; + bool has_all_cg = true; /* default is row store*/ // to construct column index, i.e., cols_index. if (OB_FAIL(data_table_schema.get_store_column_ids(tmp_col_ids, false))) { LOG_WARN("fail to get store columns id", K(ret), K(tmp_col_ids)); @@ -1754,6 +1880,22 @@ int ObLocalScan::construct_access_param( } } } + + /*construct cg_idx*/ + if (OB_FAIL(ret)) { + } else if(OB_FAIL(data_table_schema.has_all_column_group(has_all_cg))) { + LOG_WARN("fail to check whether table has all cg", K(ret), K(data_table_schema)); + } else if (!has_all_cg) { + for (int64_t i = 0; i < col_params_.count(); i++) { + int32_t tmp_cg_idx = -1; + if (OB_FAIL(data_table_schema.get_column_group_index(*col_params_.at(i), tmp_cg_idx))) { + LOG_WARN("fail to get column group idx", K(ret), K(data_table_schema)); + } else if (OB_FAIL(cg_idxs.push_back(tmp_cg_idx))) { + LOG_WARN("fail to push back cg idx", K(ret)); + } + } + } + if (OB_FAIL(ret)) { } else if (cols_index.count() != extended_gc_.extended_col_ids_.count()) { ret = OB_ERR_UNEXPECTED; @@ -1766,7 +1908,10 @@ int ObLocalScan::construct_access_param( is_oracle_mode, extended_gc_.extended_col_ids_, // TODO @yiren, remove column id. &cols_index, - &col_params_))) { + &col_params_, + has_all_cg ? nullptr : &cg_idxs, + nullptr, /* don't use skip scan*/ + has_all_cg))) { LOG_WARN("fail to init read info", K(ret)); } else { ObArray &extended_col_ids = extended_gc_.extended_col_ids_; @@ -1800,6 +1945,7 @@ int ObLocalScan::construct_range_ctx(ObQueryFlag &query_flag, if (OB_FAIL(tmp_scn.convert_for_tx(snapshot_version_))) { LOG_WARN("convert fail", K(ret), K(ls_id), K_(snapshot_version)); } else if (OB_FAIL(ctx_.init_for_read(ls_id, + access_param_.iter_param_.tablet_id_, INT64_MAX, -1, tmp_scn))) { diff --git a/src/storage/ddl/ob_complement_data_task.h b/src/storage/ddl/ob_complement_data_task.h index e479f7ed9..8db9c4b0e 100644 --- a/src/storage/ddl/ob_complement_data_task.h +++ b/src/storage/ddl/ob_complement_data_task.h @@ -19,9 +19,9 @@ #include "storage/blocksstable/index_block/ob_index_block_builder.h" #include "storage/compaction/ob_column_checksum_calculator.h" #include "storage/ddl/ob_ddl_redo_log_writer.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ob_store_row_comparer.h" #include "sql/engine/expr/ob_expr_frame_info.h" -#include "storage/ddl/ob_tablet_ddl_kv_mgr.h" namespace oceanbase { @@ -47,7 +47,7 @@ public: snapshot_version_(0), concurrent_cnt_(0), task_id_(0), execution_id_(-1), tablet_task_id_(0), compat_mode_(lib::Worker::CompatMode::INVALID), data_format_version_(0) {} ~ObComplementDataParam() { destroy(); } - int init(const ObDDLBuildSingleReplicaRequestArg &arg); + int init(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg); int split_task_ranges(const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, const int64_t tablet_size, const int64_t hint_parallelism); bool is_valid() const @@ -109,7 +109,7 @@ public: int64_t execution_id_; int64_t tablet_task_id_; lib::Worker::CompatMode compat_mode_; - int64_t data_format_version_; + uint64_t data_format_version_; ObSEArray ranges_; }; @@ -121,15 +121,15 @@ public: ObComplementDataContext(): is_inited_(false), is_major_sstable_exist_(false), complement_data_ret_(common::OB_SUCCESS), allocator_("CompleteDataCtx", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), lock_(ObLatchIds::COMPLEMENT_DATA_CONTEXT_LOCK), concurrent_cnt_(0), - data_sstable_redo_writer_(), index_builder_(nullptr), ddl_kv_mgr_handle_(), row_scanned_(0), row_inserted_(0) + data_sstable_redo_writer_(), index_builder_(nullptr), tablet_direct_load_mgr_handle_(), row_scanned_(0), row_inserted_(0), context_id_(0) {} ~ObComplementDataContext() { destroy(); } - int init(const ObComplementDataParam ¶m, const ObDataStoreDesc &desc); + int init(const ObComplementDataParam ¶m, const blocksstable::ObDataStoreDesc &desc); void destroy(); int write_start_log(const ObComplementDataParam ¶m); int add_column_checksum(const ObIArray &report_col_checksums, const ObIArray &report_col_ids); int get_column_checksum(ObIArray &report_col_checksums, ObIArray &report_col_ids); - TO_STRING_KV(K_(is_inited), K_(complement_data_ret), K_(concurrent_cnt), KP_(index_builder), K_(ddl_kv_mgr_handle), K_(row_scanned), K_(row_inserted)); + TO_STRING_KV(K_(is_inited), K_(complement_data_ret), K_(concurrent_cnt), KP_(index_builder), K_(tablet_direct_load_mgr_handle), K_(row_scanned), K_(row_inserted)); public: bool is_inited_; bool is_major_sstable_exist_; @@ -137,11 +137,12 @@ public: common::ObArenaAllocator allocator_; ObSpinLock lock_; int64_t concurrent_cnt_; - ObDDLSSTableRedoWriter data_sstable_redo_writer_; + ObDDLRedoLogWriter data_sstable_redo_writer_; blocksstable::ObSSTableIndexBuilder *index_builder_; - ObDDLKvMgrHandle ddl_kv_mgr_handle_; // for keeping ddl kv mgr alive + ObTabletDirectLoadMgrHandle tablet_direct_load_mgr_handle_; int64_t row_scanned_; int64_t row_inserted_; + int64_t context_id_; ObArray report_col_checksums_; ObArray report_col_ids_; }; @@ -154,7 +155,7 @@ class ObComplementDataDag final: public share::ObIDag public: ObComplementDataDag(); ~ObComplementDataDag(); - int init(const ObDDLBuildSingleReplicaRequestArg &arg); + int init(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg); int prepare_context(); int64_t hash() const; bool operator ==(const share::ObIDag &other) const; @@ -210,6 +211,11 @@ private: int do_local_scan(); int do_remote_scan(); int append_row(ObScan *scan); + int append_lob( + const int64_t schema_rowkey_cnt, + const int64_t extra_rowkey_cnt, + ObDDLInsertRowIterator &iterator, + ObArenaAllocator &lob_allocator); int add_extra_rowkey(const int64_t rowkey_cnt, const int64_t extra_rowkey_cnt, const blocksstable::ObDatumRow &row, @@ -397,7 +403,7 @@ private: sqlclient::ObMySQLResult *result_; common::ObArenaAllocator allocator_; ObArray org_col_ids_; - common::ObArray column_names_; + common::ObArray column_names_; compaction::ObColumnChecksumCalculator checksum_calculator_; }; diff --git a/src/storage/ddl/ob_ddl_clog.cpp b/src/storage/ddl/ob_ddl_clog.cpp index e27203648..f901a7f3f 100644 --- a/src/storage/ddl/ob_ddl_clog.cpp +++ b/src/storage/ddl/ob_ddl_clog.cpp @@ -19,6 +19,7 @@ #include "storage/tx_storage/ob_ls_service.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" #include "storage/tablet/ob_tablet.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" namespace oceanbase { @@ -67,30 +68,35 @@ void ObDDLClogCb::try_release() } ObDDLStartClogCb::ObDDLStartClogCb() - : is_inited_(false), status_(), lock_tid_(0), ddl_kv_mgr_handle_() + : is_inited_(false), status_(), lock_tid_(0), direct_load_mgr_handle_() { } int ObDDLStartClogCb::init(const ObITable::TableKey &table_key, - const int64_t data_format_version, + const uint64_t data_format_version, const int64_t execution_id, - const uint32_t lock_tid, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + const uint32_t lock_tid) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); - } else if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version < 0 - || 0 == lock_tid || !ddl_kv_mgr_handle.is_valid())) { + } else if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version <= 0 + || 0 == lock_tid)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret)); + LOG_WARN("invalid argument", K(ret), K(table_key), K(execution_id), K(data_format_version), K(lock_tid)); + } else if (OB_FAIL(direct_load_mgr_handle_.assign(direct_load_mgr_handle))) { + LOG_WARN("assign direct load mgr handle failed", K(ret)); } else { table_key_ = table_key; data_format_version_ = data_format_version; execution_id_ = execution_id; lock_tid_ = lock_tid; ddl_kv_mgr_handle_ = ddl_kv_mgr_handle; + lob_kv_mgr_handle_ = lob_kv_mgr_handle; is_inited_ = true; } return ret; @@ -100,14 +106,22 @@ int ObDDLStartClogCb::on_success() { int ret = OB_SUCCESS; const SCN &start_scn = __get_scn(); + bool unused_brand_new = false; + ObTabletFullDirectLoadMgr *data_direct_load_mgr = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else if (OB_FAIL(ddl_kv_mgr_handle_.get_obj()->ddl_start_nolock(table_key_, start_scn, data_format_version_, - execution_id_, SCN::min_scn()/*checkpoint_scn*/))) { + } else if (OB_ISNULL(data_direct_load_mgr + = (direct_load_mgr_handle_.get_full_obj()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(table_key_)); + } else if (OB_FAIL(data_direct_load_mgr->start_nolock(table_key_, start_scn, data_format_version_, + execution_id_, SCN::min_scn()/*checkpoint_scn*/, ddl_kv_mgr_handle_, lob_kv_mgr_handle_))) { LOG_WARN("failed to start ddl in cb", K(ret), K(table_key_), K(start_scn), K(execution_id_)); } - ddl_kv_mgr_handle_.get_obj()->unlock(lock_tid_); + if (OB_NOT_NULL(data_direct_load_mgr)) { + data_direct_load_mgr->unlock(lock_tid_); + } status_.set_ret_code(ret); status_.set_state(STATE_SUCCESS); try_release(); @@ -120,8 +134,11 @@ int ObDDLStartClogCb::on_failure() if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(direct_load_mgr_handle_.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(table_key_), K(execution_id_)); } else { - ddl_kv_mgr_handle_.get_obj()->unlock(lock_tid_); + direct_load_mgr_handle_.get_full_obj()->unlock(lock_tid_); } status_.set_state(STATE_FAILED); try_release(); @@ -138,7 +155,7 @@ void ObDDLStartClogCb::try_release() ObDDLMacroBlockClogCb::ObDDLMacroBlockClogCb() : is_inited_(false), status_(), ls_id_(), redo_info_(), macro_block_id_(), - data_buffer_lock_(), is_data_buffer_freed_(false), ddl_kv_mgr_handle_() + data_buffer_lock_(), is_data_buffer_freed_(false), direct_load_mgr_handle_() { } @@ -155,28 +172,40 @@ ObDDLMacroBlockClogCb::~ObDDLMacroBlockClogCb() int ObDDLMacroBlockClogCb::init(const share::ObLSID &ls_id, const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, const blocksstable::MacroBlockId ¯o_block_id, - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) + ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + direct_load_mgr_handle_.reset(); + bool is_major_sstable_exist = false; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); - } else if (OB_UNLIKELY(!ls_id.is_valid() || !redo_info.is_valid() || !macro_block_id.is_valid() || !tablet_handle.is_valid() || !ddl_kv_mgr_handle.is_valid())) { + } else if (OB_UNLIKELY(!ls_id.is_valid() || !redo_info.is_valid() || !macro_block_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ls_id), K(redo_info), K(macro_block_id)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + ls_id, + redo_info.table_key_.tablet_id_, + true/* is_full_direct_load */, + direct_load_mgr_handle_, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + LOG_INFO("major sstable already exist", K(ret), K(redo_info_)); + } else { + LOG_WARN("get tablet mgr failed", K(ret), K(redo_info_)); + } } else if (OB_FAIL(OB_SERVER_BLOCK_MGR.inc_ref(macro_block_id))) { LOG_WARN("inc reference count failed", K(ret), K(macro_block_id)); } else { - redo_info_.data_buffer_.assign(const_cast(redo_info.data_buffer_.ptr()), redo_info.data_buffer_.length()); - redo_info_.block_type_ = redo_info.block_type_; - redo_info_.logic_id_ = redo_info.logic_id_; - redo_info_.table_key_ = redo_info.table_key_; - redo_info_.start_scn_ = redo_info.start_scn_; + redo_info_ = redo_info; ls_id_ = ls_id; macro_block_id_ = macro_block_id; tablet_handle_ = tablet_handle; - ddl_kv_mgr_handle_ = ddl_kv_mgr_handle; } return ret; } @@ -210,8 +239,14 @@ int ObDDLMacroBlockClogCb::on_success() macro_block.buf_ = redo_info_.data_buffer_.ptr(); macro_block.size_ = redo_info_.data_buffer_.length(); macro_block.ddl_start_scn_ = redo_info_.start_scn_; - if (OB_FAIL(ObDDLKVPendingGuard::set_macro_block(tablet_handle_.get_obj(), macro_block))) { - LOG_WARN("set macro block into ddl kv failed", K(ret), K(tablet_handle_), K(macro_block)); + macro_block.table_key_ = redo_info_.table_key_; + macro_block.end_row_id_ = redo_info_.end_row_id_; + const int64_t snapshot_version = redo_info_.table_key_.get_snapshot_version(); + const uint64_t data_format_version = redo_info_.data_format_version_; + if (OB_FAIL(ObDDLKVPendingGuard::set_macro_block(tablet_handle_.get_obj(), macro_block, + snapshot_version, data_format_version))) { + LOG_WARN("set macro block into ddl kv failed", K(ret), K(tablet_handle_), K(macro_block), + K(snapshot_version), K(data_format_version)); } } } @@ -229,7 +264,7 @@ int ObDDLMacroBlockClogCb::on_failure() } ObDDLCommitClogCb::ObDDLCommitClogCb() - : is_inited_(false), status_(), ls_id_(), tablet_id_(), start_scn_(SCN::min_scn()), lock_tid_(0), ddl_kv_mgr_handle_() + : is_inited_(false), status_(), ls_id_(), tablet_id_(), start_scn_(SCN::min_scn()), lock_tid_(0), direct_load_mgr_handle_() { } @@ -238,14 +273,16 @@ int ObDDLCommitClogCb::init(const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, const share::SCN &start_scn, const uint32_t lock_tid, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); + } else if (OB_FAIL(direct_load_mgr_handle_.assign(direct_load_mgr_handle))) { + LOG_WARN("assign handle failed", K(ret)); } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || !start_scn.is_valid_and_not_min() - || 0 == lock_tid || !ddl_kv_mgr_handle.is_valid())) { + || 0 == lock_tid)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_id), K(start_scn), K(lock_tid)); } else { @@ -253,7 +290,6 @@ int ObDDLCommitClogCb::init(const share::ObLSID &ls_id, tablet_id_ = tablet_id; start_scn_ = start_scn; lock_tid_ = lock_tid; - ddl_kv_mgr_handle_ = ddl_kv_mgr_handle; is_inited_ = true; } return ret; @@ -262,8 +298,18 @@ int ObDDLCommitClogCb::init(const share::ObLSID &ls_id, int ObDDLCommitClogCb::on_success() { int ret = OB_SUCCESS; - ddl_kv_mgr_handle_.get_obj()->set_commit_scn_nolock(__get_scn()); - ddl_kv_mgr_handle_.get_obj()->unlock(lock_tid_); + ObTabletFullDirectLoadMgr *data_direct_load_mgr = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(data_direct_load_mgr + = direct_load_mgr_handle_.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(tablet_id_)); + } else { + data_direct_load_mgr->set_commit_scn_nolock(__get_scn()); + data_direct_load_mgr->unlock(lock_tid_); + } status_.set_ret_code(ret); status_.set_state(STATE_SUCCESS); try_release(); @@ -273,7 +319,15 @@ int ObDDLCommitClogCb::on_success() int ObDDLCommitClogCb::on_failure() { int ret = OB_SUCCESS; - ddl_kv_mgr_handle_.get_obj()->unlock(lock_tid_); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(direct_load_mgr_handle_.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(tablet_id_)); + } else { + direct_load_mgr_handle_.get_full_obj()->unlock(lock_tid_); + } status_.set_state(STATE_FAILED); try_release(); return OB_SUCCESS; @@ -331,25 +385,30 @@ DEFINE_GET_SERIALIZE_SIZE(ObDDLClogHeader) } ObDDLStartLog::ObDDLStartLog() - : table_key_(), data_format_version_(0), execution_id_(-1) + : table_key_(), data_format_version_(0), execution_id_(-1), direct_load_type_(ObDirectLoadType::DIRECT_LOAD_DDL) /*for compatibility*/ { } -int ObDDLStartLog::init(const ObITable::TableKey &table_key, const int64_t data_format_version, const int64_t execution_id) +int ObDDLStartLog::init( + const ObITable::TableKey &table_key, + const uint64_t data_format_version, + const int64_t execution_id, + const ObDirectLoadType direct_load_type) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version_ < 0)) { + if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version <= 0 || !is_valid_direct_load(direct_load_type))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(table_key), K(execution_id), K(data_format_version)); + LOG_WARN("invalid argument", K(ret), K(table_key), K(execution_id), K(data_format_version), K(direct_load_type)); } else { table_key_ = table_key; data_format_version_ = data_format_version; execution_id_ = execution_id; + direct_load_type_ = direct_load_type; } return ret; } -OB_SERIALIZE_MEMBER(ObDDLStartLog, table_key_, data_format_version_, execution_id_); +OB_SERIALIZE_MEMBER(ObDDLStartLog, table_key_, data_format_version_, execution_id_, direct_load_type_); ObDDLRedoLog::ObDDLRedoLog() : redo_info_() diff --git a/src/storage/ddl/ob_ddl_clog.h b/src/storage/ddl/ob_ddl_clog.h index fb4c6d987..45ca2d1e0 100644 --- a/src/storage/ddl/ob_ddl_clog.h +++ b/src/storage/ddl/ob_ddl_clog.h @@ -16,6 +16,7 @@ #include "storage/ob_i_table.h" #include "storage/blocksstable/ob_block_sstable_struct.h" #include "storage/blocksstable/index_block/ob_index_block_builder.h" +#include "storage/ddl/ob_ddl_struct.h" #include "storage/meta_mem/ob_tablet_pointer.h" #include "logservice/ob_append_callback.h" @@ -80,7 +81,13 @@ class ObDDLStartClogCb : public logservice::AppendCb public: ObDDLStartClogCb(); virtual ~ObDDLStartClogCb() = default; - int init(const ObITable::TableKey &table_key, const int64_t data_format_version, const int64_t execution_id, const uint32_t lock_tid, ObDDLKvMgrHandle &ddl_kv_mgr_handle); + int init(const ObITable::TableKey &table_key, + const uint64_t data_format_version, + const int64_t execution_id, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + const uint32_t lock_tid); virtual int on_success() override; virtual int on_failure() override; inline bool is_success() const { return status_.is_success(); } @@ -93,10 +100,12 @@ private: bool is_inited_; ObDDLClogCbStatus status_; ObITable::TableKey table_key_; - int64_t data_format_version_; + uint64_t data_format_version_; int64_t execution_id_; uint32_t lock_tid_; ObDDLKvMgrHandle ddl_kv_mgr_handle_; + ObDDLKvMgrHandle lob_kv_mgr_handle_; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle_; }; class ObDDLMacroBlockClogCb : public logservice::AppendCb @@ -107,8 +116,7 @@ public: int init(const share::ObLSID &ls_id, const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, const blocksstable::MacroBlockId ¯o_block_id, - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle); + ObTabletHandle &tablet_handle); virtual int on_success() override; virtual int on_failure() override; inline bool is_success() const { return status_.is_success(); } @@ -124,8 +132,8 @@ private: blocksstable::MacroBlockId macro_block_id_; ObSpinLock data_buffer_lock_; bool is_data_buffer_freed_; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle_; ObTabletHandle tablet_handle_; - ObDDLKvMgrHandle ddl_kv_mgr_handle_; }; class ObDDLCommitClogCb : public logservice::AppendCb @@ -137,7 +145,7 @@ public: const common::ObTabletID &tablet_id, const share::SCN &start_scn, const uint32_t lock_tid, - ObDDLKvMgrHandle &ddl_kv_mgr_handle); + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle); virtual int on_success() override; virtual int on_failure() override; inline bool is_success() const { return status_.is_success(); } @@ -153,7 +161,7 @@ private: common::ObTabletID tablet_id_; share::SCN start_scn_; uint32_t lock_tid_; - ObDDLKvMgrHandle ddl_kv_mgr_handle_; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle_; }; class ObDDLClogHeader final @@ -177,16 +185,18 @@ class ObDDLStartLog final public: ObDDLStartLog(); ~ObDDLStartLog() = default; - int init(const ObITable::TableKey &table_key, const int64_t data_format_version, const int64_t execution_id); - bool is_valid() const { return table_key_.is_valid() && data_format_version_ >= 0 && execution_id_ >= 0; } + int init(const ObITable::TableKey &table_key, const uint64_t data_format_version, const int64_t execution_id, const ObDirectLoadType direct_load_type); + bool is_valid() const { return table_key_.is_valid() && data_format_version_ >= 0 && execution_id_ >= 0 && is_valid_direct_load(direct_load_type_); } ObITable::TableKey get_table_key() const { return table_key_; } - int64_t get_data_format_version() const { return data_format_version_; } + uint64_t get_data_format_version() const { return data_format_version_; } int64_t get_execution_id() const { return execution_id_; } - TO_STRING_KV(K_(table_key), K_(data_format_version), K_(execution_id)); + ObDirectLoadType get_direct_load_type() const { return direct_load_type_; } + TO_STRING_KV(K_(table_key), K_(data_format_version), K_(execution_id), K_(direct_load_type)); private: - ObITable::TableKey table_key_; - int64_t data_format_version_; // used for compatibility + ObITable::TableKey table_key_; // use table type to distinguish column store, column group id is valid + uint64_t data_format_version_; // used for compatibility int64_t execution_id_; + ObDirectLoadType direct_load_type_; }; class ObDDLRedoLog final diff --git a/src/storage/ddl/ob_ddl_merge_task.cpp b/src/storage/ddl/ob_ddl_merge_task.cpp index 4a087e801..692ee2016 100644 --- a/src/storage/ddl/ob_ddl_merge_task.cpp +++ b/src/storage/ddl/ob_ddl_merge_task.cpp @@ -22,6 +22,7 @@ #include "storage/blocksstable/index_block/ob_index_block_builder.h" #include "storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "storage/ddl/ob_direct_load_struct.h" #include "storage/ls/ob_ls.h" #include "storage/meta_mem/ob_tablet_handle.h" #include "storage/tablet/ob_tablet_create_delete_helper.h" @@ -33,6 +34,8 @@ #include "share/ob_ddl_sim_point.h" #include "observer/ob_server_event_history_table_operator.h" #include "storage/column_store/ob_column_oriented_sstable.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" +#include "storage/column_store/ob_column_oriented_sstable.h" using namespace oceanbase::observer; using namespace oceanbase::share::schema; @@ -79,45 +82,44 @@ int ObDDLTableMergeDag::init_by_param(const share::ObIDagInitParam *param) int ObDDLTableMergeDag::create_first_task() { int ret = OB_SUCCESS; - ObTablesHandleArray ddl_kvs_handle; + ObLSService *ls_service = MTL(ObLSService *); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObArray ddl_kvs_handle; ObDDLTableMergeTask *merge_task = nullptr; - if (OB_FAIL(ddl_param_.ddl_kv_mgr_handle_.get_obj()->get_ddl_kvs(true/*frozen_only*/, ddl_kvs_handle))) { + if (OB_FAIL(ls_service->get_ls(ddl_param_.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ddl_param_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + ddl_param_.tablet_id_, + tablet_handle, + ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("get tablet failed", K(ret), K(ddl_param_)); + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(ddl_param_)); + } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_TASK_EXPIRED; + LOG_INFO("ddl kv mgr not exist", K(ret), K(ddl_param_)); + } else { + LOG_WARN("get ddl kv mgr failed", K(ret), K(ddl_param_)); + } + } else if (is_full_direct_load(ddl_param_.direct_load_type_) + && ddl_param_.start_scn_ < tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_) { + ret = OB_TASK_EXPIRED; + LOG_WARN("ddl task expired, skip it", K(ret), K(ddl_param_), "new_start_scn", tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_); + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->freeze_ddl_kv( + ddl_param_.start_scn_, ddl_param_.snapshot_version_, ddl_param_.data_format_version_))) { + LOG_WARN("ddl kv manager try freeze failed", K(ret), K(ddl_param_)); + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_ddl_kvs(true/*frozen_only*/, ddl_kvs_handle))) { LOG_WARN("get freezed ddl kv failed", K(ret), K(ddl_param_)); } else if (OB_FAIL(alloc_task(merge_task))) { LOG_WARN("Fail to alloc task", K(ret), K(ddl_param_)); - } else if (OB_FAIL(merge_task->init(ddl_param_))) { + } else if (OB_FAIL(merge_task->init(ddl_param_, ddl_kvs_handle))) { LOG_WARN("failed to init ddl table merge task", K(ret), K(*this)); } else if (OB_FAIL(add_task(*merge_task))) { LOG_WARN("Fail to add task", K(ret), K(ddl_param_)); - } else { - // use chain task to ensure log ts continuious in table store - ObDDLTableDumpTask *last_task = nullptr; - for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kvs_handle.get_count(); ++i) { - ObDDLKV *ddl_kv = static_cast(ddl_kvs_handle.get_table(i)); - ObDDLTableDumpTask *task = nullptr; - if (OB_ISNULL(ddl_kv)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get ddl kv failed", K(ret), K(i)); - } else if (OB_FAIL(alloc_task(task))) { - LOG_WARN("Fail to alloc task", K(ret)); - } else if (OB_FAIL(task->init(ddl_param_.ls_id_, - ddl_param_.tablet_id_, - ddl_kv->get_freeze_scn()))) { - LOG_WARN("failed to init ddl dump task", K(ret), K(ddl_param_), K(ddl_kv->get_freeze_scn())); - } else if (OB_FAIL(add_task(*task))) { - LOG_WARN("Fail to add task", K(ret), K(ddl_param_)); - } else { - if (nullptr != last_task && OB_FAIL(last_task->add_child(*task))) { - LOG_WARN("add child task failed", K(ret), K(ddl_param_)); - } - last_task = task; - } - } - if (OB_SUCC(ret)) { - if (nullptr != last_task && OB_FAIL(last_task->add_child(*merge_task))) { - LOG_WARN("add child merge task failed", K(ret), K(ddl_param_)); - } - } } return ret; } @@ -151,7 +153,8 @@ int ObDDLTableMergeDag::fill_info_param(compaction::ObIBasicInfoParam *&out_para } else if (OB_FAIL(ADD_DAG_WARN_INFO_PARAM(out_param, allocator, get_type(), ddl_param_.ls_id_.id(), static_cast(ddl_param_.tablet_id_.id()), - static_cast(ddl_param_.rec_scn_.get_val_for_inner_table_field())))) { + static_cast(ddl_param_.rec_scn_.get_val_for_inner_table_field()), + "is_commit", to_cstring(ddl_param_.is_commit_)))) { LOG_WARN("failed to fill info param", K(ret)); } return ret; @@ -176,95 +179,6 @@ bool ObDDLTableMergeDag::ignore_warning() || OB_NEED_RETRY == dag_ret_; } -/****************** ObDDLTableDumpTask *****************/ -ObDDLTableDumpTask::ObDDLTableDumpTask() - : ObITask(ObITaskType::TASK_TYPE_DDL_KV_DUMP), - is_inited_(false), ls_id_(), tablet_id_(), freeze_scn_(SCN::min_scn()) -{ - -} - -ObDDLTableDumpTask::~ObDDLTableDumpTask() -{ -} - -int ObDDLTableDumpTask::init(const share::ObLSID &ls_id, - const ObTabletID &tablet_id, - const SCN &freeze_scn) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(is_inited_)) { - ret = OB_INIT_TWICE; - LOG_WARN("init twice", K(ret), K(tablet_id_)); - } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || !freeze_scn.is_valid_and_not_min())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_id), K(freeze_scn)); - } else { - ls_id_ = ls_id; - tablet_id_ = tablet_id; - freeze_scn_ = freeze_scn; - is_inited_ = true; - } - return ret; -} - -int ObDDLTableDumpTask::process() -{ - int ret = OB_SUCCESS; - LOG_INFO("ddl dump task start process", K(*this), "ddl_event_info", ObDDLEventInfo()); - ObTabletHandle tablet_handle; - ObDDLKvMgrHandle ddl_kv_mgr_handle; - ObLSHandle ls_handle; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, - tablet_id_, - tablet_handle, - ObMDSGetTabletMode::READ_ALL_COMMITED))) { - LOG_WARN("failed to get tablet", K(ret), K(tablet_id_)); - } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - if (OB_ENTRY_NOT_EXIST == ret) { - ret = OB_TASK_EXPIRED; - LOG_INFO("ddl kv mgr not exist", K(ret), K(ls_id_), K(tablet_id_)); - } else { - LOG_WARN("get ddl kv mgr failed", K(ret), K(ls_id_), K(tablet_id_)); - } - } else { - ObTableHandleV2 ddl_kv_handle; - ObDDLKV *ddl_kv = nullptr; - ObTablesHandleArray ddl_sstable_handles; - bool need_compact = false; - ObArray candidate_sstables; - if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_freezed_ddl_kv(freeze_scn_, ddl_kv_handle))) { - LOG_WARN("get ddl kv handle failed", K(ret), K(freeze_scn_)); - if (OB_ENTRY_NOT_EXIST == ret) { - ret = OB_NEED_RETRY; // dag is async, the ddl kv may be dumped, no need record in dag warning history - } - } else if (OB_ISNULL(ddl_kv = static_cast(ddl_kv_handle.get_table()))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get ddl kv failed", K(ret)); - } else if (OB_FAIL(ddl_kv->close(*tablet_handle.get_obj()))) { - if (OB_EAGAIN != ret) { - LOG_WARN("close ddl kv failed", K(ret)); - } - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->release_ddl_kvs(freeze_scn_))) { - LOG_WARN("release ddl kv failed", K(ret), K(freeze_scn_)); - } - } - SERVER_EVENT_ADD("ddl", "ddl table dump task", - "tenant_id", MTL_ID(), - "ret", ret, - "trace_id", *ObCurTraceId::get_trace_id(), - "tablet_id", tablet_id_, - "freeze_scn", freeze_scn_, - "ls_id", ls_id_); - LOG_INFO("ddl dump task start process", K(ret), K(*this), "ddl_event_info", ObDDLEventInfo()); - return ret; -} - ObDDLTableMergeTask::ObDDLTableMergeTask() : ObITask(ObITaskType::TASK_TYPE_DDL_KV_MERGE), is_inited_(false), merge_param_() @@ -276,7 +190,7 @@ ObDDLTableMergeTask::~ObDDLTableMergeTask() { } -int ObDDLTableMergeTask::init(const ObDDLTableMergeDagParam &ddl_dag_param) +int ObDDLTableMergeTask::init(const ObDDLTableMergeDagParam &ddl_dag_param, const ObIArray &frozen_ddl_kvs) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { @@ -285,6 +199,8 @@ int ObDDLTableMergeTask::init(const ObDDLTableMergeDagParam &ddl_dag_param) } else if (OB_UNLIKELY(!ddl_dag_param.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ddl_dag_param)); + } else if (OB_FAIL(frozen_ddl_kvs_.assign(frozen_ddl_kvs))) { + LOG_WARN("assign ddl kv handle array failed", K(ret), K(frozen_ddl_kvs.count())); } else { merge_param_ = ddl_dag_param; is_inited_ = true; @@ -312,12 +228,19 @@ int ObDDLTableMergeTask::process() ObTabletMemberWrapper table_store_wrapper; const uint64_t tenant_id = MTL_ID(); common::ObArenaAllocator allocator("DDLMergeTask", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - ObSSTable compact_sstable; + ObTableHandleV2 old_sstable_handle; + ObTableHandleV2 compacted_sstable_handle; ObSSTable *sstable = nullptr; - bool skip_major_process = false; + bool is_major_exist = false; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle tablet_mgr_hdl; + ObTabletFullDirectLoadMgr *tablet_direct_load_mgr = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); } else if (OB_FAIL(MTL(ObLSService *)->get_ls(merge_param_.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("failed to get log stream", K(ret), K(merge_param_)); } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, @@ -334,7 +257,7 @@ int ObDDLTableMergeTask::process() } } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_sstables(ddl_table_iter))) { LOG_WARN("get ddl sstable handles failed", K(ret)); - } else if (ddl_table_iter.count() >= MAX_DDL_SSTABLE || merge_param_.is_commit_) { + } else { DEBUG_SYNC(BEFORE_DDL_TABLE_MERGE_TASK); #ifdef ERRSIM static int64_t counter = 0; @@ -347,96 +270,101 @@ int ObDDLTableMergeTask::process() ObTabletDDLParam ddl_param; bool is_data_complete = false; const ObSSTable *first_major_sstable = nullptr; + SCN compact_start_scn, compact_end_scn; if (OB_FAIL(ObTabletDDLUtil::check_and_get_major_sstable( merge_param_.ls_id_, merge_param_.tablet_id_, first_major_sstable, table_store_wrapper))) { LOG_WARN("check if major sstable exist failed", K(ret)); } else if (nullptr != first_major_sstable) { - LOG_INFO("major sstable has been created before", K(merge_param_), K(ddl_param.table_key_)); - if (OB_FAIL(tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) { - LOG_WARN("failed to fetch table store", K(ret)); - } else { - sstable = static_cast( - table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)); - skip_major_process = true; - } + is_major_exist = true; + LOG_INFO("major sstable has been created before", K(merge_param_)); } else if (tablet_handle.get_obj()->get_tablet_meta().table_store_flag_.with_major_sstable()) { - skip_major_process = true; + is_major_exist = true; LOG_INFO("tablet me says with major but no major, meaning its a migrated deleted tablet, skip"); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_ddl_param(ddl_param))) { - LOG_WARN("get tablet ddl param failed", K(ret)); - if (OB_STATE_NOT_MATCH == ret) { - ret = OB_NEED_RETRY; - } + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr(merge_param_.tablet_id_, + true /* is_full_direct_load */, + tablet_mgr_hdl))) { + LOG_WARN("get tablet direct load mgr failed", K(ret), K(merge_param_)); + } else if (OB_FAIL(tablet_mgr_hdl.get_full_obj()->prepare_major_merge_param(ddl_param))) { + LOG_WARN("preare full direct load sstable param failed", K(ret)); } else if (merge_param_.start_scn_ > SCN::min_scn() && merge_param_.start_scn_ < ddl_param.start_scn_) { ret = OB_TASK_EXPIRED; LOG_INFO("ddl merge task expired, do nothing", K(merge_param_), "new_start_scn", ddl_param.start_scn_); + } else if (OB_FAIL(ObTabletDDLUtil::get_compact_scn(ddl_table_iter, frozen_ddl_kvs_, compact_start_scn, compact_end_scn))) { + LOG_WARN("get compact scn failed", K(ret), K(merge_param_)); + } else if (ddl_param.commit_scn_.is_valid_and_not_min() && compact_end_scn > ddl_param.commit_scn_) { + ret = OB_ERR_SYS; + LOG_WARN("compact end scn is larger than commit scn", K(ret), K(ddl_param), K(compact_end_scn), K(frozen_ddl_kvs_), K(ddl_table_iter)); + } else { + bool is_data_complete = merge_param_.is_commit_ + && compact_start_scn == SCN::scn_dec(merge_param_.start_scn_) + && compact_end_scn == merge_param_.rec_scn_ #ifdef ERRSIM - } else { - const SCN commit_scn = ddl_kv_mgr_handle.get_obj()->get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()); - skip_major_process = commit_scn.is_valid_and_not_min() - && ObTimeUtility::current_time() - commit_scn.convert_to_ts() <= GCONF.errsim_ddl_major_delay_time; + // skip build major until current time reach the delayed time + && ObTimeUtility::current_time() > merge_param_.rec_scn_.convert_to_ts() + GCONF.errsim_ddl_major_delay_time #endif - } - if (OB_FAIL(ret)) { - } else if (skip_major_process) { - // do nothing - } else if (OB_FAIL(ObTabletDDLUtil::compact_ddl_sstable(*tablet_handle.get_obj(), - ddl_table_iter, - tablet_handle.get_obj()->get_rowkey_read_info(), - merge_param_.is_commit_, - merge_param_.rec_scn_, - ddl_param, - allocator, - compact_sstable))) { - LOG_WARN("compact sstables failed", K(ret)); - } else { - sstable = &compact_sstable; - } - - if (OB_SUCC(ret) && merge_param_.rec_scn_.is_valid_and_not_min()) { - // when the ddl dag is self scheduled when ddl kv is full, the rec_scn is invalid - // but no worry, the formmer ddl dump task will also release ddl kvs - if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->release_ddl_kvs(merge_param_.rec_scn_))) { - LOG_WARN("release ddl kv failed", K(ret)); - } - } - - if (OB_SUCC(ret) && merge_param_.is_commit_) { - if (skip_major_process) { - } else if (OB_ISNULL(sstable)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ddl major sstable is null", K(ret), K(ddl_param)); - } else if (OB_FAIL(MTL(ObTabletTableUpdater*)->submit_tablet_update_task(merge_param_.ls_id_, merge_param_.tablet_id_))) { - LOG_WARN("fail to submit tablet update task", K(ret), K(tenant_id), K(merge_param_)); - } - if (OB_FAIL(ret)) { - } else if (OB_ISNULL(sstable)) { - // not set success - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->set_commit_success(merge_param_.start_scn_))) { - if (OB_EAGAIN != ret) { - LOG_WARN("set is commit success failed", K(ret)); - } + ; + if (!is_data_complete) { + ddl_param.table_key_.table_type_ = ddl_param.table_key_.is_co_sstable() ? ObITable::DDL_MERGE_CO_SSTABLE : ObITable::DDL_DUMP_SSTABLE; + ddl_param.table_key_.scn_range_.start_scn_ = compact_start_scn; + ddl_param.table_key_.scn_range_.end_scn_ = compact_end_scn; } else { - LOG_INFO("commit ddl sstable succ", K(ddl_param), K(merge_param_), "ddl_event_info", ObDDLEventInfo()); + // use the final table key of major, do nothing } + if (OB_FAIL(ObTabletDDLUtil::compact_ddl_kv(*ls_handle.get_ls(), + *tablet_handle.get_obj(), + ddl_table_iter, + frozen_ddl_kvs_, + ddl_param, + allocator, + compacted_sstable_handle))) { + LOG_WARN("compact sstables failed", K(ret), K(ddl_param), K(is_data_complete)); + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->release_ddl_kvs(compact_end_scn))) { + LOG_WARN("release ddl kv failed", K(ret), K(ddl_param), K(compact_end_scn)); + } + if (OB_SUCC(ret) && is_data_complete) { + is_major_exist = true; + LOG_INFO("create major sstable success", K(ret), K(ddl_param), KPC(compacted_sstable_handle.get_table())); + } + } + + if (OB_SUCC(ret) && merge_param_.is_commit_ && is_major_exist) { + if (OB_FAIL(MTL(ObTabletTableUpdater*)->submit_tablet_update_task(merge_param_.ls_id_, merge_param_.tablet_id_))) { + LOG_WARN("fail to submit tablet update task", K(ret), K(tenant_id), K(merge_param_)); + } else if (OB_FAIL(tenant_direct_load_mgr->remove_tablet_direct_load(ObTabletDirectLoadMgrKey(merge_param_.tablet_id_, true)))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("remove tablet mgr failed", K(ret), K(MTL_ID()), K(merge_param_)); + } + } + LOG_INFO("commit ddl sstable finished", K(ret), K(ddl_param), K(merge_param_), KPC(tablet_mgr_hdl.get_full_obj()), "ddl_event_info", ObDDLEventInfo()); } } return ret; } // the input ddl sstable is sorted with start_scn -int ObTabletDDLUtil::check_data_integrity(ObTableStoreIterator &ddl_sstable_iter, - const SCN &start_scn, - const SCN &prepare_scn, - bool &is_data_complete) +int ObTabletDDLUtil::check_data_continue( + ObTableStoreIterator &ddl_sstable_iter, + bool &is_data_continue, + share::SCN &compact_start_scn, + share::SCN &compact_end_scn) { int ret = OB_SUCCESS; - is_data_complete = false; - if (OB_UNLIKELY(!ddl_sstable_iter.is_valid() || !start_scn.is_valid_and_not_min() || !prepare_scn.is_valid_and_not_min() || prepare_scn < start_scn)) { + is_data_continue = false; + ddl_sstable_iter.resume(); + if (OB_UNLIKELY(!ddl_sstable_iter.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ddl_sstable_iter.count()), K(start_scn), K(prepare_scn)); - } else if (0 == ddl_sstable_iter.count()) { - is_data_complete = false; + LOG_WARN("invalid argument", K(ret), K(ddl_sstable_iter.count())); + } else if (1 == ddl_sstable_iter.count()) { + ObITable *single_table = nullptr; + if (OB_FAIL(ddl_sstable_iter.get_boundary_table(true/*is_last*/, single_table))) { + LOG_WARN("get single table failed", K(ret)); + } else { + is_data_continue = true; + compact_start_scn = SCN::min(compact_start_scn, single_table->get_start_scn()); + compact_end_scn = SCN::max(compact_end_scn, single_table->get_end_scn()); + } } else { ObITable *first_ddl_sstable = nullptr; ObITable *last_ddl_sstable = nullptr; @@ -444,128 +372,139 @@ int ObTabletDDLUtil::check_data_integrity(ObTableStoreIterator &ddl_sstable_iter LOG_WARN("fail to get first ddl sstable", K(ret)); } else if (OB_FAIL(ddl_sstable_iter.get_boundary_table(true, last_ddl_sstable))) { LOG_WARN("fail to get last ddl sstable", K(ret)); - } else if (first_ddl_sstable->get_start_scn() != SCN::scn_dec(start_scn)) { - LOG_INFO("start log ts not match", K(first_ddl_sstable->get_key()), K(start_scn)); - } else if (last_ddl_sstable->get_end_scn() != prepare_scn) { - LOG_INFO("prepare log ts not match", K(last_ddl_sstable->get_key()), K(prepare_scn)); - } else if (1 == ddl_sstable_iter.count()) { - // Only one ddl table, skip - is_data_complete = true; } else { - is_data_complete = true; + is_data_continue = true; SCN last_end_scn = first_ddl_sstable->get_end_scn(); - ObITable *cur_ddl_sstable = nullptr; - while (OB_SUCC(ddl_sstable_iter.get_next(cur_ddl_sstable))) { - if (OB_ISNULL(cur_ddl_sstable) || OB_UNLIKELY(!cur_ddl_sstable->is_sstable())) { + ObITable *table = nullptr; + while (OB_SUCC(ddl_sstable_iter.get_next(table))) { + if (OB_ISNULL(table) || OB_UNLIKELY(!table->is_sstable())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(cur_ddl_sstable)); - } else if (cur_ddl_sstable->get_start_scn() <= last_end_scn) { - last_end_scn = SCN::max(last_end_scn, cur_ddl_sstable->get_end_scn()); + LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(table)); } else { - is_data_complete = false; - LOG_INFO("ddl sstable not continue", K(cur_ddl_sstable->get_key()), K(last_end_scn)); - break; + ObSSTable *cur_ddl_sstable = static_cast(table); + if (cur_ddl_sstable->get_start_scn() <= last_end_scn) { + last_end_scn = SCN::max(last_end_scn, cur_ddl_sstable->get_end_scn()); + } else { + is_data_continue = false; + LOG_INFO("ddl sstable not continue", K(cur_ddl_sstable->get_key()), K(last_end_scn)); + break; + } } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } + if (OB_SUCC(ret) && is_data_continue) { + compact_start_scn = SCN::min(compact_start_scn, first_ddl_sstable->get_start_scn()); + compact_end_scn = SCN::max(compact_end_scn, last_ddl_sstable->get_end_scn()); + } } } return ret; } -ObTabletDDLParam::ObTabletDDLParam() - : tenant_id_(0), ls_id_(), table_key_(), start_scn_(SCN::min_scn()), commit_scn_(SCN::min_scn()), snapshot_version_(0), data_format_version_(0) + +int ObTabletDDLUtil::check_data_continue( + const ObIArray &ddl_kvs, + bool &is_data_continue, + share::SCN &compact_start_scn, + share::SCN &compact_end_scn) { - -} - -ObTabletDDLParam::~ObTabletDDLParam() -{ - -} - -bool ObTabletDDLParam::is_valid() const -{ - return tenant_id_ > 0 && tenant_id_ != OB_INVALID_ID - && ls_id_.is_valid() - && table_key_.is_valid() - && start_scn_.is_valid_and_not_min() - && commit_scn_.is_valid() && commit_scn_ != SCN::max_scn() - && snapshot_version_ > 0 - && data_format_version_ >= 0; + int ret = OB_SUCCESS; + is_data_continue = false; + if (OB_UNLIKELY(ddl_kvs.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ddl_kvs.count())); + } else if (1 == ddl_kvs.count()) { + is_data_continue = true; + ObDDLKV *single_kv = ddl_kvs.at(0).get_obj(); + compact_start_scn = SCN::min(compact_start_scn, single_kv->get_start_scn()); + compact_end_scn = SCN::max(compact_end_scn, single_kv->get_end_scn()); + } else { + ObDDLKVHandle first_kv_handle = ddl_kvs.at(0); + ObDDLKVHandle last_kv_handle = ddl_kvs.at(ddl_kvs.count() - 1); + is_data_continue = true; + SCN last_end_scn = first_kv_handle.get_obj()->get_end_scn(); + for (int64_t i = 1; OB_SUCC(ret) && i < ddl_kvs.count(); ++i) { + ObDDLKVHandle cur_kv = ddl_kvs.at(i); + if (OB_ISNULL(cur_kv.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl kv is null", K(ret), K(i)); + } else if (cur_kv.get_obj()->get_start_scn() <= last_end_scn) { + last_end_scn = SCN::max(last_end_scn, cur_kv.get_obj()->get_end_scn()); + } else { + is_data_continue = false; + LOG_INFO("ddl kv not continue", K(i), K(last_end_scn), KPC(cur_kv.get_obj())); + break; + } + } + if (OB_SUCC(ret) && is_data_continue) { + compact_start_scn = SCN::min(compact_start_scn, first_kv_handle.get_obj()->get_start_scn()); + compact_end_scn = SCN::max(compact_end_scn, last_kv_handle.get_obj()->get_end_scn()); + } + } + return ret; } int ObTabletDDLUtil::prepare_index_data_desc(ObTablet &tablet, + const int64_t cg_idx, const int64_t snapshot_version, - const int64_t data_format_version, + const uint64_t data_format_version, const ObSSTable *first_ddl_sstable, + const SCN &end_scn, ObWholeDataStoreDesc &data_desc) { int ret = OB_SUCCESS; data_desc.reset(); ObLSService *ls_service = MTL(ObLSService *); ObArenaAllocator tmp_arena("DDLIdxDescTmp", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - ObStorageSchema *storage_schema = nullptr; const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || snapshot_version <= 0 || data_format_version < 0)) { + ObStorageSchema *storage_schema = nullptr; + if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || snapshot_version <= 0 || data_format_version <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_id), K(snapshot_version), K(data_format_version)); } else if (OB_FAIL(tablet.load_storage_schema(tmp_arena, storage_schema))) { LOG_WARN("fail to get storage schema", K(ret)); + } else if (cg_idx >= 0) { + const ObIArray &cg_schemas = storage_schema->get_column_groups(); + if (cg_idx >= cg_schemas.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid cg idx", K(ret), K(cg_idx), K(cg_schemas.count())); + } else { + const ObStorageColumnGroupSchema &cur_cg_schema = cg_schemas.at(cg_idx); + if (OB_FAIL(data_desc.init(*storage_schema, ls_id, tablet_id, + compaction::ObMergeType::MAJOR_MERGE, snapshot_version, data_format_version, end_scn, &cur_cg_schema, cg_idx))) { + LOG_WARN("init data desc for cg failed", K(ret)); + } else { + LOG_DEBUG("get data desc from column group schema", K(ret), K(tablet_id), K(cg_idx), K(data_desc), K(cur_cg_schema)); + } + } } else if (OB_FAIL(data_desc.init(*storage_schema, ls_id, tablet_id, - MAJOR_MERGE, + compaction::MAJOR_MERGE, snapshot_version, data_format_version))) { // use storage schema to init ObDataStoreDesc // all cols' default checksum will assigned to 0 // means all macro should contain all columns in schema LOG_WARN("init data store desc failed", K(ret), K(tablet_id)); - } else { - if (nullptr != first_ddl_sstable) { - // use the param in first ddl sstable, which persist the param when ddl start - ObSSTableMetaHandle meta_handle; - if (OB_FAIL(first_ddl_sstable->get_meta(meta_handle))) { - LOG_WARN("get sstable meta handle fail", K(ret), KPC(first_ddl_sstable)); - } else { - const ObSSTableBasicMeta &basic_meta = meta_handle.get_sstable_meta().get_basic_meta(); - if (OB_FAIL(data_desc.get_desc().update_basic_info_from_macro_meta( - meta_handle.get_sstable_meta().get_basic_meta()))) { - LOG_WARN("failed to update basic info from macro_meta", KR(ret), K(basic_meta)); - } + } + if (OB_SUCC(ret) && nullptr != first_ddl_sstable) { + // use the param in first ddl sstable, which persist the param when ddl start + ObSSTableMetaHandle meta_handle; + if (OB_FAIL(first_ddl_sstable->get_meta(meta_handle))) { + LOG_WARN("get sstable meta handle fail", K(ret), KPC(first_ddl_sstable)); + } else { + const ObSSTableBasicMeta &basic_meta = meta_handle.get_sstable_meta().get_basic_meta(); + if (OB_FAIL(data_desc.get_desc().update_basic_info_from_macro_meta(basic_meta))) { + LOG_WARN("failed to update basic info from macro_meta", KR(ret), K(basic_meta)); } } } ObTabletObjLoadHelper::free(tmp_arena, storage_schema); - return ret; -} - -int ObTabletDDLUtil::try_get_first_ddl_sstable(ObTablet &tablet, - ObTableStoreIterator &ddl_table_iter, - blocksstable::ObSSTable *&first_sstable) -{ - int ret = OB_SUCCESS; - first_sstable = nullptr; - - ObITable *first_ddl_sstable = nullptr; - if (OB_FAIL(tablet.get_ddl_sstables(ddl_table_iter))) { - LOG_WARN("get ddl sstable handles failed", K(ret)); - } else if (ddl_table_iter.count() > 0) { - if (OB_FAIL(ddl_table_iter.get_boundary_table(false/*is_last*/, first_ddl_sstable))) { - LOG_WARN("failed to get boundary table", K(ret)); - } else if (OB_ISNULL(first_ddl_sstable)) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("first_ddl_sstable must not null", K(ret)); - } - } - - if (OB_NOT_NULL(first_ddl_sstable)) { - first_sstable = static_cast(first_ddl_sstable); - } + LOG_DEBUG("prepare_index_data_desc", K(ret), K(data_desc)); return ret; } @@ -574,72 +513,67 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, const ObIArray &meta_array, const ObSSTable *first_ddl_sstable, common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable) + ObTableHandleV2 &sstable_handle) { int ret = OB_SUCCESS; - ObArenaAllocator arena("DdlCreateSST", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - void *buf = nullptr; - ObSSTableIndexBuilder *sstable_index_builder = nullptr; - ObIndexBlockRebuilder *index_block_rebuilder = nullptr; - ObWholeDataStoreDesc data_desc(true/*is_ddl*/); - if (OB_UNLIKELY(!ddl_param.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ddl_param)); - } else if (OB_FAIL(ObTabletDDLUtil::prepare_index_data_desc(tablet, - ddl_param.table_key_.version_range_.snapshot_version_, - ddl_param.data_format_version_, - first_ddl_sstable, - data_desc))) { + HEAP_VAR(ObSSTableIndexBuilder, sstable_index_builder) { + ObIndexBlockRebuilder index_block_rebuilder; + ObWholeDataStoreDesc data_desc(true/*is_ddl*/); + int64_t macro_block_column_count = 0; + if (OB_UNLIKELY(!ddl_param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ddl_param)); + } else if (OB_FAIL(ObTabletDDLUtil::prepare_index_data_desc( + tablet, + ddl_param.table_key_.is_column_store_sstable() ? ddl_param.table_key_.get_column_group_id() : -1/*negative value means row store*/, + ddl_param.snapshot_version_, + ddl_param.data_format_version_, + first_ddl_sstable, + ddl_param.table_key_.get_end_scn(), + data_desc))) { LOG_WARN("prepare data store desc failed", K(ret), K(ddl_param)); - } else if (OB_ISNULL(buf = arena.alloc(sizeof(ObSSTableIndexBuilder)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory for sstable index builder failed", K(ret)); - } else if (FALSE_IT(sstable_index_builder = new (buf) ObSSTableIndexBuilder)) { - } else if (OB_FAIL(sstable_index_builder->init(data_desc.get_desc(), - nullptr, // macro block flush callback - ddl_param.table_key_.is_major_sstable() ? ObSSTableIndexBuilder::ENABLE : ObSSTableIndexBuilder::DISABLE))) { - LOG_WARN("init sstable index builder failed", K(ret), K(data_desc)); - } else if (OB_ISNULL(buf = arena.alloc(sizeof(ObIndexBlockRebuilder)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret)); - } else if (FALSE_IT(index_block_rebuilder = new (buf) ObIndexBlockRebuilder)) { - } else if (OB_FAIL(index_block_rebuilder->init(*sstable_index_builder))) { - LOG_WARN("fail to alloc index builder", K(ret)); - } else if (meta_array.empty()) { - // do nothing - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < meta_array.count(); ++i) { - if (OB_FAIL(index_block_rebuilder->append_macro_row(*meta_array.at(i)))) { - LOG_WARN("append block meta failed", K(ret), K(i)); + } else if (FALSE_IT(macro_block_column_count = meta_array.empty() ? 0 : meta_array.at(0)->get_meta_val().column_count_)) { + } else if (meta_array.count() > 0 && OB_FAIL(data_desc.get_col_desc().mock_valid_col_default_checksum_array(macro_block_column_count))) { + LOG_WARN("mock valid column default checksum failed", K(ret), "firt_macro_block_meta", to_cstring(meta_array.at(0)), K(ddl_param)); + } else if (OB_FAIL(sstable_index_builder.init(data_desc.get_desc(), + nullptr, // macro block flush callback + ddl_param.table_key_.is_major_sstable() ? ObSSTableIndexBuilder::ENABLE : ObSSTableIndexBuilder::DISABLE))) { + LOG_WARN("init sstable index builder failed", K(ret), K(data_desc)); + } else if (OB_FAIL(index_block_rebuilder.init(sstable_index_builder, + false/*need_sort*/, + nullptr/*task_idx*/, + true/*use_absolute_offset*/))) { + LOG_WARN("fail to alloc index builder", K(ret)); + } else if (meta_array.empty()) { + // do nothing + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < meta_array.count(); ++i) { + if (OB_FAIL(index_block_rebuilder.append_macro_row(*meta_array.at(i)))) { + LOG_WARN("append block meta failed", K(ret), K(i)); + } } } - } - if (OB_SUCC(ret)) { - if (OB_FAIL(index_block_rebuilder->close())) { - LOG_WARN("close index block rebuilder failed", K(ret)); - } else if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(tablet, sstable_index_builder, ddl_param, first_ddl_sstable, allocator, sstable))) { - LOG_WARN("create ddl sstable failed", K(ret), K(ddl_param)); + if (OB_SUCC(ret)) { + if (OB_FAIL(index_block_rebuilder.close())) { + LOG_WARN("close index block rebuilder failed", K(ret)); + } else if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(tablet, &sstable_index_builder, ddl_param, first_ddl_sstable, + macro_block_column_count, allocator, sstable_handle))) { + LOG_WARN("create ddl sstable failed", K(ret), K(ddl_param)); + } } } - if (nullptr != index_block_rebuilder) { - index_block_rebuilder->~ObIndexBlockRebuilder(); - arena.free(index_block_rebuilder); - index_block_rebuilder = nullptr; - } - if (nullptr != sstable_index_builder) { - sstable_index_builder->~ObSSTableIndexBuilder(); - arena.free(sstable_index_builder); - sstable_index_builder = nullptr; - } return ret; } -int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, - ObSSTableIndexBuilder *sstable_index_builder, - const ObTabletDDLParam &ddl_param, - const ObSSTable *first_ddl_sstable, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable) + +int ObTabletDDLUtil::create_ddl_sstable( + ObTablet &tablet, + ObSSTableIndexBuilder *sstable_index_builder, + const ObTabletDDLParam &ddl_param, + const ObSSTable *first_ddl_sstable, + const int64_t macro_block_column_count, + common::ObArenaAllocator &allocator, + ObTableHandleV2 &sstable_handle) { int ret = OB_SUCCESS; ObArenaAllocator tmp_arena("CreateDDLSstTmp", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); @@ -649,9 +583,10 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(sstable_index_builder), K(ddl_param)); } else if (OB_FAIL(tablet.load_storage_schema(tmp_arena, storage_schema))) { - LOG_WARN("failed to load storage schema", K(ret), K(tablet)); + LOG_WARN("failed to load storage schema", K(ret), K(tablet.get_tablet_meta())); } else { int64_t column_count = 0; + int64_t full_column_cnt = 0; // only used for co sstable share::schema::ObTableMode table_mode = storage_schema->get_table_mode_struct(); share::schema::ObIndexType index_type = storage_schema->get_index_type(); int64_t rowkey_column_cnt = storage_schema->get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); @@ -666,12 +601,52 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, index_type = static_cast(meta_handle.get_sstable_meta().get_basic_meta().index_type_); rowkey_column_cnt = meta_handle.get_sstable_meta().get_basic_meta().rowkey_column_count_; row_store_type = meta_handle.get_sstable_meta().get_basic_meta().latest_row_store_type_; + if (first_ddl_sstable->is_co_sstable()) { + const ObCOSSTableV2 *first_co_sstable = static_cast(first_ddl_sstable); + if (OB_ISNULL((first_co_sstable))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("first co sstable is null", K(ret), KP(first_co_sstable), KPC(first_ddl_sstable)); + } else { + full_column_cnt = first_co_sstable->get_cs_meta().full_column_cnt_; + } + } + } + } else if (ddl_param.table_key_.is_column_store_sstable()) { + if (ddl_param.table_key_.is_normal_cg_sstable()) { + rowkey_column_cnt = 0; + column_count = 1; + } else { // co sstable with all cg or rowkey cg + const ObIArray &cg_schemas = storage_schema->get_column_groups(); + const int64_t cg_idx = ddl_param.table_key_.get_column_group_id(); + if (cg_idx < 0 || cg_idx >= cg_schemas.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column group index", K(ret), K(cg_idx)); + } else if (OB_FAIL(storage_schema->get_stored_column_count_in_sstable(full_column_cnt))) { // set full_column_cnt in first ddl sstable + LOG_WARN("fail to get stored column count in sstable", K(ret)); + } else if (cg_schemas.at(cg_idx).is_rowkey_column_group()) { + column_count = rowkey_column_cnt; + } else { + column_count = full_column_cnt; + if (macro_block_column_count > 0 && macro_block_column_count < column_count) { + LOG_INFO("use macro block column count", K(ddl_param), K(macro_block_column_count), K(column_count)); + column_count = macro_block_column_count; + full_column_cnt = macro_block_column_count; + } + } + } + } else { // row store sstable + if (OB_FAIL(storage_schema->get_stored_column_count_in_sstable(column_count))) { + LOG_WARN("fail to get stored column count in sstable", K(ret)); + } else if (macro_block_column_count > 0 && macro_block_column_count < column_count) { + LOG_INFO("use macro block column count", K(ddl_param), K(macro_block_column_count), K(column_count)); + column_count = macro_block_column_count; } - } else if (OB_FAIL(storage_schema->get_stored_column_count_in_sstable(column_count))) { - LOG_WARN("fail to get stored column count in sstable", K(ret)); } - if (FAILEDx(sstable_index_builder->close(res))) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sstable_index_builder->close(res))) { LOG_WARN("close sstable index builder close failed", K(ret)); + } else if (ddl_param.table_key_.is_normal_cg_sstable() // index builder of cg sstable cannot get trans_version from row, manually set it + && FALSE_IT(res.max_merged_trans_version_ = ddl_param.snapshot_version_)) { } else if (OB_UNLIKELY((ddl_param.table_key_.is_major_sstable() || ddl_param.table_key_.is_ddl_sstable()) && res.row_count_ > 0 && @@ -703,6 +678,7 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, param.use_old_macro_block_count_ = res.use_old_macro_block_count_; param.row_count_ = res.row_count_; param.column_cnt_ = column_count; + param.full_column_cnt_ = full_column_cnt; param.data_checksum_ = res.data_checksum_; param.occupy_size_ = res.occupy_size_; param.original_size_ = res.original_size_; @@ -716,18 +692,47 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, param.data_block_ids_ = res.data_block_ids_; param.other_block_ids_ = res.other_block_ids_; MEMCPY(param.encrypt_key_, res.encrypt_key_, share::OB_MAX_TABLESPACE_ENCRYPT_KEY_LENGTH); - - if (OB_FAIL(param.column_checksums_.assign(res.data_column_checksums_))) { + if (ddl_param.table_key_.is_co_sstable()) { + param.column_group_cnt_ = storage_schema->get_column_group_count(); + // only set true when build empty major sstable. ddl co sstable must set false and fill cg sstables + param.is_empty_co_table_ = ddl_param.table_key_.is_major_sstable() && 0 == param.data_blocks_cnt_; + const int64_t base_cg_idx = ddl_param.table_key_.get_column_group_id(); + if (base_cg_idx < 0 || base_cg_idx >= storage_schema->get_column_group_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid column group index", K(ret), K(ddl_param.table_key_)); + } else { + const ObStorageColumnGroupSchema &base_cg_schema = storage_schema->get_column_groups().at(base_cg_idx); + if (base_cg_schema.is_all_column_group()) { + param.co_base_type_ = ObCOSSTableBaseType::ALL_CG_TYPE; + } else if (base_cg_schema.is_rowkey_column_group()) { + param.co_base_type_ = ObCOSSTableBaseType::ROWKEY_CG_TYPE; + } else { + ret = OB_ERR_SYS; + LOG_WARN("unknown type of base cg schema", K(ret), K(base_cg_idx)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(param.column_checksums_.assign(res.data_column_checksums_))) { LOG_WARN("fail to fill column checksum for empty major", K(ret), K(param)); } else if (OB_UNLIKELY(param.column_checksums_.count() != column_count)) { // we have corrected the col_default_checksum_array_ in prepare_index_data_desc ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected column checksums", K(ret), K(column_count), K(param)); - } else if (OB_FAIL(ObTabletCreateDeleteHelper::create_sstable(param, allocator, sstable))) { - LOG_WARN("create sstable failed", K(ret), K(param)); } else { - LOG_INFO("create ddl sstable success", K(ddl_param), K(sstable), - "create_schema_version", create_schema_version_on_tablet); + if (ddl_param.table_key_.is_co_sstable()) { + if (OB_FAIL(ObTabletCreateDeleteHelper::create_sstable(param, allocator, sstable_handle))) { + LOG_WARN("create sstable failed", K(ret), K(param)); + } + } else { + if (OB_FAIL(ObTabletCreateDeleteHelper::create_sstable(param, allocator, sstable_handle))) { + LOG_WARN("create sstable failed", K(ret), K(param)); + } + } + if (OB_SUCC(ret)) { + LOG_INFO("create ddl sstable success", K(ddl_param), K(sstable_handle), + "create_schema_version", create_schema_version_on_tablet); + } } } } @@ -736,34 +741,31 @@ int ObTabletDDLUtil::create_ddl_sstable(ObTablet &tablet, return ret; } -int ObTabletDDLUtil::update_ddl_table_store(ObTablet &tablet, - const ObTabletDDLParam &ddl_param, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable) +int ObTabletDDLUtil::update_ddl_table_store( + ObLS &ls, + ObTablet &tablet, + const ObTabletDDLParam &ddl_param, + common::ObArenaAllocator &allocator, + blocksstable::ObSSTable *sstable) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!ddl_param.is_valid())) { + if (OB_UNLIKELY(!ddl_param.is_valid() || nullptr == sstable)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ddl_param)); + LOG_WARN("invalid argument", K(ret), K(ddl_param), KP(sstable)); } else { - ObLSService *ls_service = MTL(ObLSService *); - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; ObArenaAllocator allocator("DDLUtil_update", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); ObStorageSchema *tablet_storage_schema = nullptr; - if (OB_FAIL(ls_service->get_ls(ddl_param.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("get ls failed", K(ret), K(ddl_param)); - } else if (OB_FAIL(tablet.load_storage_schema(allocator, tablet_storage_schema))) { + if (OB_FAIL(tablet.load_storage_schema(allocator, tablet_storage_schema))) { LOG_WARN("fail to load storage schema failed", K(ret)); } else { const bool is_major_sstable = ddl_param.table_key_.is_major_sstable(); - const int64_t rebuild_seq = ls_handle.get_ls()->get_rebuild_seq(); + const int64_t rebuild_seq = ls.get_rebuild_seq(); const int64_t snapshot_version = is_major_sstable ? max(ddl_param.snapshot_version_, tablet.get_snapshot_version()) : tablet.get_snapshot_version(); const int64_t multi_version_start = is_major_sstable ? max(ddl_param.snapshot_version_, tablet.get_multi_version_start()) : 0; ObTabletHandle new_tablet_handle; - ObUpdateTableStoreParam table_store_param(&sstable, + ObUpdateTableStoreParam table_store_param(sstable, snapshot_version, multi_version_start, rebuild_seq, @@ -771,16 +773,17 @@ int ObTabletDDLUtil::update_ddl_table_store(ObTablet &tablet, is_major_sstable, // update_with_major_flag /*DDL does not have verification between replicas, So using medium merge to force verification between replicas*/ - MEDIUM_MERGE, + compaction::MEDIUM_MERGE, is_major_sstable// need report checksum ); table_store_param.ddl_info_.keep_old_ddl_sstable_ = !is_major_sstable; table_store_param.ddl_info_.data_format_version_ = ddl_param.data_format_version_; table_store_param.ddl_info_.ddl_commit_scn_ = ddl_param.commit_scn_; - if (OB_FAIL(ls_handle.get_ls()->update_tablet_table_store(ddl_param.table_key_.get_tablet_id(), table_store_param, new_tablet_handle))) { + table_store_param.ddl_info_.ddl_checkpoint_scn_ = sstable->is_ddl_dump_sstable() ? sstable->get_end_scn() : ddl_param.commit_scn_; + if (OB_FAIL(ls.update_tablet_table_store(ddl_param.table_key_.get_tablet_id(), table_store_param, new_tablet_handle))) { LOG_WARN("failed to update tablet table store", K(ret), K(ddl_param.table_key_), K(table_store_param)); } else { - LOG_INFO("ddl update table store success", K(ddl_param), K(table_store_param), K(sstable)); + LOG_INFO("ddl update table store success", K(ddl_param), K(table_store_param), KPC(sstable)); } } ObTabletObjLoadHelper::free(allocator, tablet_storage_schema); @@ -788,169 +791,413 @@ int ObTabletDDLUtil::update_ddl_table_store(ObTablet &tablet, return ret; } -int ObTabletDDLUtil::compact_ddl_sstable(ObTablet &tablet, - ObTableStoreIterator &ddl_sstable_iter, - const ObITableReadInfo &read_info, - const bool is_commit, - const share::SCN &rec_scn, - ObTabletDDLParam &ddl_param, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable) +int get_sstables(ObTableStoreIterator &ddl_sstable_iter, const int64_t cg_idx, ObIArray &target_sstables) { int ret = OB_SUCCESS; - ObArenaAllocator arena("compact_sst", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - ObBlockMetaTree meta_tree; - ObArray sorted_metas; - bool is_data_complete = false; - ObITable *first_ddl_sstable = nullptr; - ObITable *last_ddl_sstable = nullptr; - SCN sstable_end_scn = SCN::max_scn(); - - if (OB_UNLIKELY(!ddl_param.is_valid() || (is_commit && !rec_scn.is_valid_and_not_min()))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ddl_param), K(is_commit), K(rec_scn)); - } else if (ddl_sstable_iter.count() > 0) { - if (OB_FAIL(ddl_sstable_iter.get_boundary_table(false/*is_last*/, first_ddl_sstable))) { - LOG_WARN("failed to get boundary table", K(ret)); - } else if (OB_ISNULL(first_ddl_sstable)) { - ret = OB_ERR_SYS; - LOG_ERROR("first_ddl_sstable must not null", K(ret)); - } else if (OB_FAIL(ddl_sstable_iter.get_boundary_table(true/*is_last*/, last_ddl_sstable))) { - LOG_WARN("failed to get boundary table", K(ret)); - } else if (OB_ISNULL(last_ddl_sstable)) { - ret = OB_ERR_SYS; - LOG_ERROR("last_ddl_sstable must not null", K(ret)); - } else { - sstable_end_scn = last_ddl_sstable->get_end_scn(); - } - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObTabletDDLUtil::check_data_integrity(ddl_sstable_iter, - ddl_param.start_scn_, - is_commit ? rec_scn : sstable_end_scn, - is_data_complete))) { - LOG_WARN("check ddl sstable integrity failed", K(ret), K(ddl_sstable_iter), K(ddl_param)); - } else if (!is_data_complete) { - ret = OB_EAGAIN; - if (TC_REACH_TIME_INTERVAL(10L * 1000L * 1000L)) { - LOG_WARN("current ddl sstables not contain all data", K(ddl_sstable_iter), K(ddl_param)); - } - } else if (OB_FAIL(meta_tree.init(tablet, ddl_param.table_key_, ddl_param.start_scn_, ddl_param.data_format_version_))) { - LOG_WARN("init meta tree failed", K(ret), K(ddl_param)); - } else if (FALSE_IT(ddl_sstable_iter.resume())) { - } else { - ddl_sstable_iter.resume(); - ObDatumRowkey last_rowkey; - SMART_VAR(ObSSTableSecMetaIterator, meta_iter) { - ObDatumRange query_range; - query_range.set_whole_range(); - ObDataMacroBlockMeta data_macro_meta; - while (OB_SUCC(ret)) { - ObITable *table = nullptr; - const ObSSTable *cur_sstable = nullptr; - meta_iter.reset(); - if (OB_FAIL(ddl_sstable_iter.get_next(table))) { - if (OB_ITER_END != ret) { - LOG_WARN("get next ddl sstable failed", K(ret)); - } else { - ret = OB_SUCCESS; - break; - } - } else if (OB_ISNULL(table) || OB_UNLIKELY(!table->is_sstable())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(table)); - } else { - cur_sstable = static_cast(table); - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(meta_iter.open(query_range, - ObMacroBlockMetaType::DATA_BLOCK_META, - *cur_sstable, - read_info, - arena))) { - LOG_WARN("sstable secondary meta iterator open failed", K(ret)); - } else { - while (OB_SUCC(ret)) { - if (OB_FAIL(meta_iter.get_next(data_macro_meta))) { - if (OB_ITER_END != ret) { - LOG_WARN("get data macro meta failed", K(ret)); - } else { - ret = OB_SUCCESS; - break; - } - } else { - ObDataMacroBlockMeta *copied_meta = nullptr; // copied meta will destruct in the meta tree - ObDDLMacroHandle macro_handle; - bool is_exist = false; - if (OB_FAIL(meta_tree.exist(&data_macro_meta.end_key_, is_exist))) { - LOG_WARN("check block meta exist failed", K(ret), K(data_macro_meta)); - } else if (is_exist) { - // skip - } else if (OB_FAIL(macro_handle.set_block_id(data_macro_meta.get_macro_id()))) { - LOG_WARN("hold macro block failed", K(ret)); - } else if (OB_FAIL(data_macro_meta.deep_copy(copied_meta, arena))) { - LOG_WARN("deep copy macro block meta failed", K(ret)); - } else if (OB_FAIL(meta_tree.insert_macro_block(macro_handle, &copied_meta->end_key_, copied_meta))) { - LOG_WARN("insert meta tree failed", K(ret), K(macro_handle), KPC(copied_meta)); - copied_meta->~ObDataMacroBlockMeta(); - } - } - } - LOG_INFO("append meta tree finished", K(ret), - "data_macro_block_cnt_in_sstable", cur_sstable->get_data_macro_block_count(), K(meta_tree.get_macro_block_cnt())); -#ifdef ERRSIM - if (OB_SUCC(ret) && ddl_param.table_key_.is_major_sstable()) { - ret = OB_E(EventTable::EN_DDL_COMPACT_FAIL) OB_SUCCESS; - if (OB_FAIL(ret)) { - LOG_WARN("errsim compact ddl sstable failed", KR(ret)); - } - } -#endif - } + ddl_sstable_iter.resume(); + while (OB_SUCC(ret)) { + ObITable *table = nullptr; + if (OB_FAIL(ddl_sstable_iter.get_next(table))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next table failed", K(ret)); + } else { + ret = OB_SUCCESS; + break; } - } - if (OB_ITER_END == ret) { - ret = OB_SUCCESS; - } - } - // close - if (OB_SUCC(ret) && is_data_complete) { - if (is_commit) { - ddl_param.table_key_.table_type_ = ObITable::TableType::MAJOR_SSTABLE; - ddl_param.table_key_.version_range_.base_version_ = 0; - ddl_param.table_key_.version_range_.snapshot_version_ = ddl_param.snapshot_version_; + } else if (OB_ISNULL(table) || OB_UNLIKELY(!table->is_sstable())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(table)); + } else if (cg_idx < 0) { // row store + if (OB_FAIL(target_sstables.push_back(static_cast(table)))) { + LOG_WARN("push back target sstable failed", K(ret)); + } + } else if (!table->is_co_sstable()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current table not co sstable", K(ret), KPC(table)); } else { - ddl_param.table_key_.table_type_ = ObITable::TableType::DDL_DUMP_SSTABLE; - ddl_param.table_key_.scn_range_.start_scn_ = first_ddl_sstable->get_start_scn(); - ddl_param.table_key_.scn_range_.end_scn_ = last_ddl_sstable->get_end_scn(); - } - if (OB_FAIL(meta_tree.build_sorted_rowkeys())) { - LOG_WARN("build sorted rowkey failed", K(ret)); - } else if (OB_FAIL(meta_tree.get_sorted_meta_array(sorted_metas))) { - LOG_WARN("get sorted metas failed", K(ret)); - } else if (OB_FAIL(create_ddl_sstable(tablet, - ddl_param, - sorted_metas, - static_cast(first_ddl_sstable), - allocator, - sstable))) { - LOG_WARN("create ddl sstable failed", K(ret)); - } else if (OB_FAIL(update_ddl_table_store(tablet, ddl_param, allocator, sstable))) { - LOG_WARN("update ddl table store failed", K(ret)); - } else { - LOG_INFO("compact ddl sstable success", K(ddl_param)); + ObCOSSTableV2 *cur_co_sstable = static_cast(table); + ObSSTableWrapper cg_sstable_wrapper; + ObSSTable *cg_sstable = nullptr; + if (OB_ISNULL(cur_co_sstable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current co sstable is null", K(ret), KP(cur_co_sstable)); + } else if (cur_co_sstable->is_empty_co_table()) { + // skip + } else if (OB_FAIL(cur_co_sstable->fetch_cg_sstable(cg_idx, cg_sstable_wrapper))) { + LOG_WARN("get all tables failed", K(ret)); + } else if (OB_FAIL(cg_sstable_wrapper.get_sstable(cg_sstable))) { + LOG_WARN("get sstable failed", K(ret)); + } else if (OB_ISNULL(cg_sstable)) { + // skip + } else if (OB_FAIL(target_sstables.push_back(cg_sstable))) { + LOG_WARN("push back cg sstable failed", K(ret)); + } } } return ret; } -int ObTabletDDLUtil::report_ddl_checksum(const share::ObLSID &ls_id, - const ObTabletID &tablet_id, - const uint64_t table_id, - const int64_t execution_id, - const int64_t ddl_task_id, - const int64_t *column_checksums, - const int64_t column_count) +int get_sstables(const ObIArray &frozen_ddl_kvs, const int64_t cg_idx, ObIArray &target_sstables) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < frozen_ddl_kvs.count(); ++i) { + ObDDLKV *cur_kv = frozen_ddl_kvs.at(i).get_obj(); + ObDDLMemtable *target_sstable = nullptr; + if (OB_ISNULL(cur_kv)) { + ret = OB_ERR_UNEXPECTED; + } else if (cg_idx < 0) { // row store + if (cur_kv->get_ddl_memtables().empty()) { + // do nothing + } else if (OB_ISNULL(target_sstable = cur_kv->get_ddl_memtables().at(0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current sstable is null", K(ret), KPC(cur_kv), K(target_sstable)); + } else if (OB_FAIL(target_sstables.push_back(target_sstable))) { + LOG_WARN("push back target sstable failed", K(ret)); + } + } else if (OB_FAIL(cur_kv->get_ddl_memtable(cg_idx, target_sstable))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("get ddl memtable failed", K(ret), K(i), K(cg_idx)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(target_sstable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("target sstable from ddl kv is null", K(ret), K(i), K(cg_idx), KPC(cur_kv), KP(target_sstable)); + } else if (OB_FAIL(target_sstables.push_back(target_sstable))) { + LOG_WARN("push back target sstable failed", K(ret)); + } + } + return ret; +} + // for cg sstable, endkey is end row id, confirm read_info not used +int get_sorted_meta_array( + const ObIArray &sstables, + const ObITableReadInfo &read_info, + ObBlockMetaTree &meta_tree, + ObIAllocator &allocator, + ObArray &sorted_metas) +{ + int ret = OB_SUCCESS; + sorted_metas.reset(); + if (OB_UNLIKELY(!read_info.is_valid() || !meta_tree.is_valid())) { // allow empty sstable array + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(sstables), K(read_info), K(meta_tree)); + } else { + SMART_VAR(ObSSTableSecMetaIterator, meta_iter) { + ObDatumRange query_range; + query_range.set_whole_range(); + ObDataMacroBlockMeta data_macro_meta; + for (int64_t i = 0; OB_SUCC(ret) && i < sstables.count(); ++i) { + ObSSTable *cur_sstable = sstables.at(i); + if (OB_ISNULL(cur_sstable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(cur_sstable)); + } else { + meta_iter.reset(); + ObDataMacroBlockMeta *copied_meta = nullptr; // copied meta will destruct in the meta tree + if (OB_FAIL(meta_iter.open(query_range, + ObMacroBlockMetaType::DATA_BLOCK_META, + *cur_sstable, + read_info, + allocator))) { + LOG_WARN("sstable secondary meta iterator open failed", K(ret), KPC(cur_sstable), K(read_info)); + } else { + while (OB_SUCC(ret)) { + if (OB_FAIL(meta_iter.get_next(data_macro_meta))) { + if (OB_ITER_END != ret) { + LOG_WARN("get data macro meta failed", K(ret)); + } else { + ret = OB_SUCCESS; + break; + } + } else { + ObDDLMacroHandle macro_handle; + bool is_exist = false; + if (OB_FAIL(meta_tree.exist(&data_macro_meta.end_key_, is_exist))) { + LOG_WARN("check block meta exist failed", K(ret), K(data_macro_meta)); + } else if (is_exist) { + // skip + FLOG_INFO("append meta tree skip", K(ret), "table_key", cur_sstable->get_key(), "macro_block_id", data_macro_meta.get_macro_id(), + "data_checksum", data_macro_meta.val_.data_checksum_, K(meta_tree.get_macro_block_cnt()), "macro_block_end_key", to_cstring(data_macro_meta.end_key_)); + } else if (OB_FAIL(macro_handle.set_block_id(data_macro_meta.get_macro_id()))) { + LOG_WARN("hold macro block failed", K(ret)); + } else if (OB_FAIL(data_macro_meta.deep_copy(copied_meta, allocator))) { + LOG_WARN("deep copy macro block meta failed", K(ret)); + } else if (OB_FAIL(meta_tree.insert_macro_block(macro_handle, &copied_meta->end_key_, copied_meta))) { + LOG_WARN("insert meta tree failed", K(ret), K(macro_handle), KPC(copied_meta)); + copied_meta->~ObDataMacroBlockMeta(); + } else { + FLOG_INFO("append meta tree success", K(ret), "table_key", cur_sstable->get_key(), "macro_block_id", data_macro_meta.get_macro_id(), + "data_checksum", copied_meta->val_.data_checksum_, K(meta_tree.get_macro_block_cnt()), "macro_block_end_key", to_cstring(copied_meta->end_key_)); + } + } + } + } + LOG_INFO("append meta tree finished", K(ret), "table_key", cur_sstable->get_key(), "data_macro_block_cnt_in_sstable", cur_sstable->get_data_macro_block_count(), + K(meta_tree.get_macro_block_cnt()), "sstable_end_key", OB_ISNULL(copied_meta) ? "NOT_EXIST": to_cstring(copied_meta->end_key_)); + } + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(meta_tree.get_sorted_meta_array(sorted_metas))) { + LOG_WARN("get sorted meta array failed", K(ret)); + } else { + int64_t sstable_checksum = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < sorted_metas.count(); ++i) { + const ObDataMacroBlockMeta *cur_macro_meta = sorted_metas.at(i); + sstable_checksum = ob_crc64_sse42(sstable_checksum, &cur_macro_meta->val_.data_checksum_, sizeof(cur_macro_meta->val_.data_checksum_)); + FLOG_INFO("sorted meta array", K(i), "macro_block_id", cur_macro_meta->get_macro_id(), "data_checksum", cur_macro_meta->val_.data_checksum_, K(sstable_checksum), "macro_block_end_key", cur_macro_meta->end_key_); + } + } + } + return ret; +} + + +int compact_sstables( + ObTablet &tablet, + ObIArray &sstables, + const ObTabletDDLParam &ddl_param, + const ObITableReadInfo &read_info, + ObArenaAllocator &allocator, + ObTableHandleV2 &sstable_handle) +{ + int ret = OB_SUCCESS; + ObArenaAllocator arena("compact_sst", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObBlockMetaTree meta_tree; + ObArray sorted_metas; + if (OB_FAIL(meta_tree.init(tablet, ddl_param.table_key_, ddl_param.start_scn_, ddl_param.data_format_version_))) { + LOG_WARN("init meta tree failed", K(ret), K(ddl_param)); + } else if (OB_FAIL(get_sorted_meta_array(sstables, read_info, meta_tree, arena, sorted_metas))) { + LOG_WARN("get sorted meta array failed", K(ret), K(read_info), K(sstables)); + } else if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable( + tablet, + ddl_param, + sorted_metas, + sstables.empty() ? nullptr : sstables.at(0)/*first ddl sstable*/, + allocator, + sstable_handle))) { + LOG_WARN("create sstable failed", K(ret), K(ddl_param), K(sstables)); + } + LOG_DEBUG("compact_sstables", K(ret), K(sstables), K(ddl_param), K(read_info), KPC(sstable_handle.get_table())); + return ret; +} + +int compact_co_ddl_sstable( + ObTablet &tablet, + ObTableStoreIterator &ddl_sstable_iter, + const ObIArray &frozen_ddl_kvs, + const ObTabletDDLParam &ddl_param, + common::ObArenaAllocator &allocator, + ObTablesHandleArray &compacted_cg_sstable_handles, + ObTableHandleV2 &co_sstable_handle) +{ + int ret = OB_SUCCESS; + compacted_cg_sstable_handles.reset(); + co_sstable_handle.reset(); + const ObITableReadInfo *cg_index_read_info = nullptr; + ObArenaAllocator arena("compact_co_ddl", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStorageSchema *storage_schema = nullptr; + if (OB_UNLIKELY(ddl_sstable_iter.count() == 0 && frozen_ddl_kvs.count() == 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ddl_sstable_iter.count()), K(frozen_ddl_kvs.count())); + } else if (OB_FAIL(tablet.load_storage_schema(arena, storage_schema))) { + LOG_WARN("load storage schema failed", K(ret), K(ddl_param)); + } else { + const int64_t base_cg_idx = ddl_param.table_key_.get_column_group_id(); + ObArray base_sstables; + ObTabletDDLParam cg_ddl_param = ddl_param; + bool need_fill_cg_sstables = true; + if (OB_FAIL(get_sstables(ddl_sstable_iter, base_cg_idx, base_sstables))) { + LOG_WARN("get base sstable from ddl sstables failed", K(ret), K(ddl_sstable_iter), K(base_cg_idx)); + } else if (OB_FAIL(get_sstables(frozen_ddl_kvs, base_cg_idx, base_sstables))) { + LOG_WARN("get base sstable from ddl kv array failed", K(ret), K(frozen_ddl_kvs), K(base_cg_idx)); + } else if (OB_FAIL(compact_sstables(tablet, base_sstables, ddl_param, tablet.get_rowkey_read_info(), allocator, co_sstable_handle))) { + LOG_WARN("compact base sstable failed", K(ret)); + } else { + // empty major co sstable, no need fill cg sstables + need_fill_cg_sstables = !static_cast(co_sstable_handle.get_table())->is_empty_co_table(); + } + if (OB_SUCC(ret) && need_fill_cg_sstables) { + if (OB_FAIL(MTL(ObTenantCGReadInfoMgr *)->get_index_read_info(cg_index_read_info))) { + LOG_WARN("failed to get index read info from ObTenantCGReadInfoMgr", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < storage_schema->get_column_group_count(); ++i) { + const int64_t cur_cg_idx = i; + ObArray cur_cg_sstables; + ObTableHandleV2 target_table_handle; + cg_ddl_param.table_key_.table_type_ = ObITable::TableType::DDL_MERGE_CO_SSTABLE == ddl_param.table_key_.table_type_ + ? ObITable::TableType::DDL_MERGE_CG_SSTABLE : ObITable::TableType::NORMAL_COLUMN_GROUP_SSTABLE; + cg_ddl_param.table_key_.column_group_idx_ = cur_cg_idx; + if (cur_cg_idx == base_cg_idx) { + // do nothing + } else if (OB_FAIL(get_sstables(ddl_sstable_iter, cur_cg_idx, cur_cg_sstables))) { + LOG_WARN("get current cg sstables failed", K(ret)); + } else if (OB_FAIL(get_sstables(frozen_ddl_kvs, cur_cg_idx, cur_cg_sstables))) { + LOG_WARN("get current cg sstables failed", K(ret)); + } else if (OB_FAIL(compact_sstables(tablet, cur_cg_sstables, cg_ddl_param, *cg_index_read_info, allocator, target_table_handle))) { + LOG_WARN("compact cg sstable failed", K(ret), K(cur_cg_idx), K(cur_cg_sstables.count()), K(cg_ddl_param), KPC(cg_index_read_info)); + } else if (OB_FAIL(compacted_cg_sstable_handles.add_table(target_table_handle))) { + LOG_WARN("push back compacted cg sstable failed", K(ret), K(i), KP(target_table_handle.get_table())); + } + } + if (OB_SUCC(ret)) { // assemble the cg sstables into co sstable + ObArray cg_sstables; + if (OB_FAIL(compacted_cg_sstable_handles.get_tables(cg_sstables))) { + LOG_WARN("get cg sstables failed", K(ret)); + } else if (OB_FAIL(static_cast(co_sstable_handle.get_table())->fill_cg_sstables(cg_sstables))) { + LOG_WARN("fill cg sstables failed", K(ret)); + } + } + } + } + ObTabletObjLoadHelper::free(arena, storage_schema); + LOG_INFO("compact_co_ddl_sstable", K(ret), K(ddl_sstable_iter), K(ddl_param), KP(&tablet), KPC(co_sstable_handle.get_table())); + return ret; +} + +int compact_ro_ddl_sstable( + ObTablet &tablet, + ObTableStoreIterator &ddl_sstable_iter, + const ObIArray &frozen_ddl_kvs, + const ObTabletDDLParam &ddl_param, + common::ObArenaAllocator &allocator, + ObTableHandleV2 &ro_sstable_handle) +{ + int ret = OB_SUCCESS; + ro_sstable_handle.reset(); + if (OB_UNLIKELY(ddl_sstable_iter.count() == 0 && frozen_ddl_kvs.count() == 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ddl_sstable_iter.count()), K(frozen_ddl_kvs.count())); + } else { + const int64_t base_cg_idx = -1; // negative value means row store + ObArray base_sstables; + if (OB_FAIL(get_sstables(ddl_sstable_iter, base_cg_idx, base_sstables))) { + LOG_WARN("get base sstable from ddl sstables failed", K(ret), K(ddl_sstable_iter), K(base_cg_idx)); + } else if (OB_FAIL(get_sstables(frozen_ddl_kvs, base_cg_idx, base_sstables))) { + LOG_WARN("get base sstable from ddl kv array failed", K(ret), K(frozen_ddl_kvs), K(base_cg_idx)); + } else if (OB_FAIL(compact_sstables(tablet, base_sstables, ddl_param, tablet.get_rowkey_read_info(), allocator, ro_sstable_handle))) { + LOG_WARN("compact base sstable failed", K(ret)); + } + } + LOG_INFO("compact_ro_ddl_sstable", K(ret), K(ddl_sstable_iter), K(ddl_param), KP(&tablet), KPC(ro_sstable_handle.get_table())); + return ret; +} + +int ObTabletDDLUtil::compact_ddl_kv( + ObLS &ls, + ObTablet &tablet, + ObTableStoreIterator &ddl_sstable_iter, + const ObIArray &frozen_ddl_kvs, + const ObTabletDDLParam &ddl_param, + common::ObArenaAllocator &allocator, + ObTableHandleV2 &compacted_sstable_handle) +{ + int ret = OB_SUCCESS; + compacted_sstable_handle.reset(); + ObArenaAllocator arena("compact_ddl_kv", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObArray sorted_metas; + bool is_data_continue = true; + ObTablesHandleArray compacted_cg_sstable_handles; // for tmp hold handle of macro block until the tablet updated + if (OB_UNLIKELY(!ddl_param.is_valid() || (0 == ddl_sstable_iter.count() && frozen_ddl_kvs.empty()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ddl_param), K(ddl_sstable_iter.count()), K(frozen_ddl_kvs.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < frozen_ddl_kvs.count(); ++i) { + if (OB_FAIL(frozen_ddl_kvs.at(i).get_obj()->close())) { + LOG_WARN("close ddl kv failed", K(ret), K(i)); + } + } + +#ifdef ERRSIM + if (OB_SUCC(ret) && ddl_param.table_key_.is_major_sstable()) { + ret = OB_E(EventTable::EN_DDL_COMPACT_FAIL) OB_SUCCESS; + if (OB_FAIL(ret)) { + LOG_WARN("errsim compact ddl sstable failed", KR(ret)); + } + } +#endif + + if (OB_FAIL(ret)) { + } else if (ddl_param.table_key_.is_co_sstable()) { + if (OB_FAIL(compact_co_ddl_sstable(tablet, ddl_sstable_iter, frozen_ddl_kvs, ddl_param, allocator, compacted_cg_sstable_handles, compacted_sstable_handle))) { + LOG_WARN("compact co ddl sstable failed", K(ret), K(ddl_param)); + } + } else { + if (OB_FAIL(compact_ro_ddl_sstable(tablet, ddl_sstable_iter, frozen_ddl_kvs, ddl_param, allocator, compacted_sstable_handle))) { + LOG_WARN("compact co ddl sstable failed", K(ret), K(ddl_param)); + } + } + if (OB_SUCC(ret)) { // update table store + if (OB_FAIL(update_ddl_table_store(ls, tablet, ddl_param, allocator, static_cast(compacted_sstable_handle.get_table())))) { + LOG_WARN("update ddl table store failed", K(ret)); + } else { + LOG_INFO("compact ddl sstable success", K(ddl_param)); + } + } + } + return ret; +} + +int ObTabletDDLUtil::get_compact_scn( + ObTableStoreIterator &ddl_sstable_iter, + const ObIArray &frozen_ddl_kvs, + SCN &compact_start_scn, + SCN &compact_end_scn) +{ + int ret = OB_SUCCESS; + bool is_data_continue = true; + compact_start_scn = SCN::max_scn(); + compact_end_scn = SCN::min_scn(); + ddl_sstable_iter.resume(); + if (OB_UNLIKELY((0 == ddl_sstable_iter.count() && frozen_ddl_kvs.empty()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ddl_sstable_iter.count()), K(frozen_ddl_kvs.count())); + } else if (ddl_sstable_iter.count() > 0 && OB_FAIL(check_data_continue(ddl_sstable_iter, is_data_continue, compact_start_scn, compact_end_scn))) { + LOG_WARN("check ddl sstable continue failed", K(ret)); + } else if (!is_data_continue) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl sstable not continuous", K(ret), K(ddl_sstable_iter)); + } else if (frozen_ddl_kvs.count() > 0 && OB_FAIL(check_data_continue(frozen_ddl_kvs, is_data_continue, compact_start_scn, compact_end_scn))) { + LOG_WARN("check ddl sstable continue failed", K(ret)); + } else if (!is_data_continue) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl kv not continuous", K(ret), K(frozen_ddl_kvs)); + } else if (ddl_sstable_iter.count() > 0 && frozen_ddl_kvs.count() > 0) { + ObITable *first_ddl_sstable = nullptr; + ObITable *last_ddl_sstable = nullptr; + ObDDLKVHandle first_ddl_kv_handle = frozen_ddl_kvs.at(0); + ObDDLKVHandle last_ddl_kv_handle = frozen_ddl_kvs.at(frozen_ddl_kvs.count() - 1); + if (OB_FAIL(ddl_sstable_iter.get_boundary_table(false/*is_last*/, first_ddl_sstable))) { + LOG_WARN("get last ddl sstable failed", K(ret)); + } else if (OB_FAIL(ddl_sstable_iter.get_boundary_table(true/*is_last*/, last_ddl_sstable))) { + LOG_WARN("get last ddl sstable failed", K(ret)); + } else { + // |___________________________________________________| + // fisrt_ddl_sstable.start_scn last_ddl_sstable.end_scn + // |____________________________________________________________| + // first_ddl_kv.start_scn last_ddl_kv.end_scn + is_data_continue = first_ddl_kv_handle.get_obj()->get_start_scn() >= first_ddl_sstable->get_start_scn() + && first_ddl_kv_handle.get_obj()->get_start_scn() <= last_ddl_sstable->get_end_scn() + && last_ddl_kv_handle.get_obj()->get_end_scn() >= last_ddl_sstable->get_end_scn(); + if (!is_data_continue) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("scn range not continue between ddl sstable iter and ddl kv array", K(ret), K(ddl_sstable_iter), K(frozen_ddl_kvs)); + } + } + } + return ret; +} + +int ObTabletDDLUtil::report_ddl_checksum( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const uint64_t table_id, + const int64_t execution_id, + const int64_t ddl_task_id, + const int64_t *column_checksums, + const int64_t column_count) { int ret = OB_SUCCESS; ObMySQLProxy *sql_proxy = GCTX.sql_proxy_; @@ -958,10 +1205,10 @@ int ObTabletDDLUtil::report_ddl_checksum(const share::ObLSID &ls_id, ObSchemaGetterGuard schema_guard; const ObTableSchema *table_schema = nullptr; const uint64_t tenant_id = MTL_ID(); - if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || OB_INVALID_ID == ddl_task_id - || !is_valid_id(table_id) || 0 == table_id || execution_id < 0)) { + if (OB_UNLIKELY(!tablet_id.is_valid() || OB_INVALID_ID == ddl_task_id + || !is_valid_id(table_id) || 0 == table_id || execution_id < 0 || nullptr == column_checksums || column_count <= 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_id), K(table_id), K(execution_id)); + LOG_WARN("invalid argument", K(ret), K(tablet_id), K(table_id), K(execution_id), KP(column_checksums), K(column_count)); } else if (!is_valid_tenant_id(tenant_id) || OB_ISNULL(sql_proxy) || OB_ISNULL(schema_service)) { ret = OB_ERR_SYS; LOG_WARN("ls service or sql proxy is null", K(ret), K(tenant_id), KP(sql_proxy), KP(schema_service)); @@ -979,11 +1226,11 @@ int ObTabletDDLUtil::report_ddl_checksum(const share::ObLSID &ls_id, ObArray column_ids; ObArray ddl_checksum_items; if (OB_FAIL(table_schema->get_multi_version_column_descs(column_ids))) { - LOG_WARN("fail to get column ids", K(ret), K(ls_id), K(tablet_id)); + LOG_WARN("fail to get column ids", K(ret), K(tablet_id)); } else if (OB_UNLIKELY(column_count > column_ids.count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpect error, column checksums count larger than column ids count", K(ret), - K(ls_id), K(tablet_id), K(column_count), K(column_ids.count())); + K(tablet_id), K(column_count), K(column_ids.count())); } for (int64_t i = 0; OB_SUCC(ret) && i < column_count; ++i) { share::ObDDLChecksumItem item; @@ -1021,9 +1268,9 @@ int ObTabletDDLUtil::report_ddl_checksum(const share::ObLSID &ls_id, #endif if (OB_FAIL(ret)) { } else if (OB_FAIL(ObDDLChecksumOperator::update_checksum(ddl_checksum_items, *sql_proxy))) { - LOG_WARN("fail to update checksum", K(ret), K(ls_id), K(tablet_id), K(table_id), K(ddl_checksum_items)); + LOG_WARN("fail to update checksum", K(ret), K(tablet_id), K(table_id), K(ddl_checksum_items)); } else { - LOG_INFO("report ddl checkum success", K(ls_id), K(tablet_id), K(table_id), K(execution_id)); + LOG_INFO("report ddl checkum success", K(tablet_id), K(table_id), K(execution_id), K(ddl_checksum_items)); } } return ret; @@ -1060,5 +1307,44 @@ int ObTabletDDLUtil::check_and_get_major_sstable(const share::ObLSID &ls_id, return ret; } +int ObTabletDDLUtil::freeze_ddl_kv(const ObDDLTableMergeDagParam ¶m) +{ + int ret = OB_SUCCESS; + ObLSService *ls_service = MTL(ObLSService *); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObArray ddl_kvs_handle; + ObDDLTableMergeTask *merge_task = nullptr; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle tablet_mgr_hdl; + if (OB_FAIL(ls_service->get_ls(param.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(param)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + param.tablet_id_, + tablet_handle, + ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("get tablet failed", K(ret), K(param)); + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(param)); + } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_TASK_EXPIRED; + LOG_INFO("ddl kv mgr not exist", K(ret), K(param)); + } else { + LOG_WARN("get ddl kv mgr failed", K(ret), K(param)); + } + } else if (is_full_direct_load(param.direct_load_type_) + && param.start_scn_ < tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_) { + ret = OB_TASK_EXPIRED; + LOG_WARN("ddl task expired, skip it", K(ret), K(param), "new_start_scn", tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_); + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->freeze_ddl_kv( + param.start_scn_, param.snapshot_version_, param.data_format_version_))) { + LOG_WARN("ddl kv manager try freeze failed", K(ret), K(param)); + } + return ret; +} + } // namespace storage } // namespace oceanbase diff --git a/src/storage/ddl/ob_ddl_merge_task.h b/src/storage/ddl/ob_ddl_merge_task.h index e100191cf..36eb8d955 100644 --- a/src/storage/ddl/ob_ddl_merge_task.h +++ b/src/storage/ddl/ob_ddl_merge_task.h @@ -23,6 +23,7 @@ #include "storage/tablet/ob_tablet.h" #include "storage/blocksstable/ob_macro_block_struct.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "storage/ddl/ob_direct_load_struct.h" namespace oceanbase { @@ -34,35 +35,9 @@ struct ObDDLChecksumItem; namespace storage { -class ObLS; -struct ObDDLTableMergeDagParam : public share::ObIDagInitParam -{ -public: - ObDDLTableMergeDagParam() - : ls_id_(), - tablet_id_(), - rec_scn_(share::SCN::min_scn()), - is_commit_(false), - start_scn_(share::SCN::min_scn()), - compat_mode_(lib::Worker::CompatMode::INVALID), - ddl_kv_mgr_handle_() - { } - bool is_valid() const - { - return ls_id_.is_valid() && tablet_id_.is_valid() && start_scn_.is_valid_and_not_min() && ddl_kv_mgr_handle_.is_valid(); - } - virtual ~ObDDLTableMergeDagParam() = default; - TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(rec_scn), K_(is_commit), K_(start_scn), K_(compat_mode), K_(ddl_kv_mgr_handle)); -public: - share::ObLSID ls_id_; - ObTabletID tablet_id_; - share::SCN rec_scn_; - bool is_commit_; - share::SCN start_scn_; // start log ts at schedule, for skipping expired task - lib::Worker::CompatMode compat_mode_; - ObDDLKvMgrHandle ddl_kv_mgr_handle_; -}; +class ObLS; +class ObCOSSTableV2; class ObDDLTableMergeDag : public share::ObIDag { @@ -80,7 +55,7 @@ public: virtual int fill_dag_key(char *buf, const int64_t buf_len) const override; virtual bool ignore_warning() override; virtual lib::Worker::CompatMode get_compat_mode() const override - { return ddl_param_.compat_mode_; } + { return lib::Worker::CompatMode::MYSQL; } // TODO@wenqu: confirm it virtual uint64_t get_consumer_group_id() const override { return consumer_group_id_; } virtual bool is_ha_dag() const override { return false; } @@ -93,92 +68,58 @@ private: class ObDDLMacroBlock; class ObDDLKV; -// each task process only one ddl kv -class ObDDLTableDumpTask : public share::ObITask -{ -public: - ObDDLTableDumpTask(); - virtual ~ObDDLTableDumpTask(); - int init(const share::ObLSID &ls_id, const ObTabletID &tablet_id, const share::SCN &freeze_scn); - virtual int process() override; - TO_STRING_KV(K_(is_inited), K_(ls_id), K_(tablet_id), K_(freeze_scn)); -private: - bool is_inited_; - share::ObLSID ls_id_; - ObTabletID tablet_id_; - share::SCN freeze_scn_; - DISALLOW_COPY_AND_ASSIGN(ObDDLTableDumpTask); -}; class ObDDLTableMergeTask : public share::ObITask { public: ObDDLTableMergeTask(); virtual ~ObDDLTableMergeTask(); - int init(const ObDDLTableMergeDagParam &ddl_dag_param); + int init(const ObDDLTableMergeDagParam &ddl_dag_param, const ObIArray &frozen_ddl_kvs); virtual int process() override; TO_STRING_KV(K_(is_inited), K_(merge_param)); private: bool is_inited_; ObDDLTableMergeDagParam merge_param_; + ObArray frozen_ddl_kvs_; DISALLOW_COPY_AND_ASSIGN(ObDDLTableMergeTask); }; -struct ObTabletDDLParam final -{ -public: - ObTabletDDLParam(); - ~ObTabletDDLParam(); - bool is_valid() const; - TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(table_key), K_(start_scn), K_(commit_scn), K_(snapshot_version), K_(data_format_version)); -public: - uint64_t tenant_id_; - share::ObLSID ls_id_; - ObITable::TableKey table_key_; - share::SCN start_scn_; - share::SCN commit_scn_; - int64_t snapshot_version_; - int64_t data_format_version_; -}; class ObTabletDDLUtil { public: - static int prepare_index_data_desc(ObTablet &tablet, - const int64_t snapshot_version, - const int64_t ddl_format_version, - const blocksstable::ObSSTable *first_ddl_sstable, - blocksstable::ObWholeDataStoreDesc &data_desc); - static int try_get_first_ddl_sstable(ObTablet &tablet, - ObTableStoreIterator &ddl_table_iter, - blocksstable::ObSSTable *&first_sstable); - static int create_ddl_sstable(ObTablet &tablet, - const ObTabletDDLParam &ddl_param, - const ObIArray &meta_array, - const blocksstable::ObSSTable *first_ddl_sstable, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable); + static int prepare_index_data_desc( + ObTablet &tablet, + const int64_t cg_idx, // negative means row store + const int64_t snapshot_version, + const uint64_t data_format_version, + const blocksstable::ObSSTable *first_ddl_sstable, + const share::SCN &end_scn, + blocksstable::ObWholeDataStoreDesc &data_desc); - static int create_ddl_sstable(ObTablet &tablet, - blocksstable::ObSSTableIndexBuilder *sstable_index_builder, - const ObTabletDDLParam &ddl_param, - const blocksstable::ObSSTable *first_ddl_sstable, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable); + static int create_ddl_sstable( + ObTablet &tablet, + const ObTabletDDLParam &ddl_param, + const ObIArray &meta_array, + const blocksstable::ObSSTable *first_ddl_sstable, + common::ObArenaAllocator &allocator, + ObTableHandleV2 &sstable_handle); - static int update_ddl_table_store(ObTablet &tablet, - const ObTabletDDLParam &ddl_param, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable); + static int update_ddl_table_store( + ObLS &ls, + ObTablet &tablet, + const ObTabletDDLParam &ddl_param, + common::ObArenaAllocator &allocator, + blocksstable::ObSSTable *sstable); - static int compact_ddl_sstable(ObTablet &tablet, - ObTableStoreIterator &ddl_sstable_iter, - const ObITableReadInfo &read_info, - const bool is_commit, - const share::SCN &rec_scn, - ObTabletDDLParam &ddl_param, - common::ObArenaAllocator &allocator, - blocksstable::ObSSTable &sstable); + static int compact_ddl_kv( + ObLS &ls, + ObTablet &tablet, + ObTableStoreIterator &ddl_sstable_iter, + const ObIArray &frozen_ddl_kvs, + const ObTabletDDLParam &ddl_param, + common::ObArenaAllocator &allocator, + ObTableHandleV2 &compacted_sstable_handle); static int report_ddl_checksum(const share::ObLSID &ls_id, const ObTabletID &tablet_id, @@ -187,14 +128,44 @@ public: const int64_t ddl_task_id, const int64_t *column_checksums, const int64_t column_count); - static int check_and_get_major_sstable(const share::ObLSID &ls_id, - const ObTabletID &tablet_id, - const blocksstable::ObSSTable *&first_major_sstable, - ObTabletMemberWrapper &table_store_wrapper); - static int check_data_integrity(ObTableStoreIterator &ddl_sstable_iter, - const share::SCN &start_scn, - const share::SCN &prepare_scn, - bool &is_data_complete); + + static int check_and_get_major_sstable( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const blocksstable::ObSSTable *&first_major_sstable, + ObTabletMemberWrapper &table_store_wrapper); + + static int get_compact_scn( + ObTableStoreIterator &ddl_sstable_iter, + const ObIArray &frozen_ddl_kvs, + share::SCN &compact_start_scn, + share::SCN &compact_end_scn); + + static int freeze_ddl_kv(const ObDDLTableMergeDagParam ¶m); + + static int check_data_continue( + ObTableStoreIterator &ddl_sstable_iter, + bool &is_data_continue, + share::SCN &compact_start_scn, + share::SCN &compact_end_scn); + +private: + + static int create_ddl_sstable( + ObTablet &tablet, + blocksstable::ObSSTableIndexBuilder *sstable_index_builder, + const ObTabletDDLParam &ddl_param, + const blocksstable::ObSSTable *first_ddl_sstable, + const int64_t macro_block_column_count, + common::ObArenaAllocator &allocator, + ObTableHandleV2 &sstable_handle); + + static int check_data_continue( + const ObIArray &ddl_kvs, + bool &is_data_continue, + share::SCN &compact_start_scn, + share::SCN &compact_end_scn); + }; } // namespace storage diff --git a/src/storage/ddl/ob_ddl_redo_log_replayer.cpp b/src/storage/ddl/ob_ddl_redo_log_replayer.cpp index 66c9502ef..30da1b2f4 100644 --- a/src/storage/ddl/ob_ddl_redo_log_replayer.cpp +++ b/src/storage/ddl/ob_ddl_redo_log_replayer.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX STORAGE #include "ob_ddl_redo_log_replayer.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ddl/ob_ddl_clog.h" #include "storage/ddl/ob_ddl_merge_task.h" #include "storage/ddl/ob_ddl_replay_executor.h" @@ -103,18 +104,13 @@ int ObDDLRedoLogReplayer::replay_commit(const ObDDLCommitLog &log, const SCN &sc ObDDLCommitReplayExecutor replay_executor; DEBUG_SYNC(BEFORE_REPLAY_DDL_PREPRARE); - if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedoLogReplayer has not been inited", K(ret)); } else if (OB_FAIL(replay_executor.init(ls_, log, scn))) { - LOG_WARN("failed to init ddl commit log replay executor", K(ret)); + LOG_WARN("init replay executor failed", K(ret)); } else if (OB_FAIL(replay_executor.execute(scn, ls_->get_ls_id(), log.get_table_key().tablet_id_))) { - if (OB_NO_NEED_UPDATE == ret || OB_TASK_EXPIRED == ret) { - ret = OB_SUCCESS; - } else if (OB_EAGAIN != ret) { - LOG_WARN("failed to replay ddl commit log", K(ret), K(scn), K(log), K(ls_->get_ls_id())); - } + LOG_WARN("execute replay execute failed", K(ret)); } return ret; } diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.cpp b/src/storage/ddl/ob_ddl_redo_log_writer.cpp index ec63c01cf..b69f81370 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.cpp +++ b/src/storage/ddl/ob_ddl_redo_log_writer.cpp @@ -24,6 +24,7 @@ #include "storage/tx_storage/ob_ls_service.h" #include "storage/tx/ob_ts_mgr.h" #include "storage/ddl/ob_ddl_merge_task.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" #include "storage/blocksstable/ob_logic_macro_id.h" #include "observer/ob_server_event_history_table_operator.h" @@ -635,21 +636,21 @@ void ObDDLCtrlSpeedHandle::RefreshSpeedHandleTask::runTimerTask() } } -ObDDLRedoLogWriter::ObDDLRedoLogWriter() : is_inited_(false), bucket_lock_() +ObDDLRedoLock::ObDDLRedoLock() : is_inited_(false), bucket_lock_() { } -ObDDLRedoLogWriter::~ObDDLRedoLogWriter() +ObDDLRedoLock::~ObDDLRedoLock() { } -ObDDLRedoLogWriter &ObDDLRedoLogWriter::get_instance() +ObDDLRedoLock &ObDDLRedoLock::get_instance() { - static ObDDLRedoLogWriter instance; + static ObDDLRedoLock instance; return instance; } -int ObDDLRedoLogWriter::init() +int ObDDLRedoLock::init() { int ret = OB_SUCCESS; const int64_t bucket_num = 10243L; @@ -662,26 +663,23 @@ int ObDDLRedoLogWriter::init() return ret; } -int ObDDLRedoLogWriter::write( - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const ObDDLRedoLog &log, - const uint64_t tenant_id, - const int64_t task_id, - const share::ObLSID &ls_id, - ObLogHandler *log_handler, - const blocksstable::MacroBlockId ¯o_block_id, - char *buffer, - ObDDLRedoLogHandle &handle) +int ObDDLRedoLogWriter::local_write_ddl_macro_redo( + const ObDDLMacroBlockRedoInfo &redo_info, + const share::ObLSID &ls_id, + const int64_t task_id, + logservice::ObLogHandler *log_handler, + const blocksstable::MacroBlockId ¯o_block_id, + char *buffer, + ObDDLRedoLogHandle &handle) { int ret = OB_SUCCESS; + const uint64_t tenant_id = MTL_ID(); + ObDDLRedoLog log; const enum ObReplayBarrierType replay_barrier_type = ObReplayBarrierType::NO_NEED_BARRIER; logservice::ObLogBaseHeader base_header(logservice::ObLogBaseType::DDL_LOG_BASE_TYPE, replay_barrier_type); ObDDLClogHeader ddl_header(ObDDLClogType::DDL_REDO_LOG); - const int64_t buffer_size = base_header.get_serialize_size() - + ddl_header.get_serialize_size() - + log.get_serialize_size(); + int64_t buffer_size = 0; int64_t pos = 0; ObDDLMacroBlockClogCb *cb = nullptr; ObDDLRedoLog tmp_log; @@ -691,26 +689,40 @@ int ObDDLRedoLogWriter::write( const bool need_nonblock= false; SCN base_scn = SCN::min_scn(); SCN scn; - uint32_t lock_tid = 0; int64_t real_sleep_us = 0; int tmp_ret = OB_SUCCESS; - if (!log.is_valid() || nullptr == log_handler || !ls_id.is_valid() - || OB_INVALID_TENANT_ID == tenant_id - || nullptr == buffer || 0 == task_id) { + + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObTabletHandle tablet_handle; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ddl_kv_mgr_handle.reset(); + if (OB_UNLIKELY(!redo_info.is_valid() + || nullptr == log_handler + || OB_INVALID_TENANT_ID == tenant_id + || nullptr == buffer + || 0 == task_id + || !ls_id.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(log), K(ls_id), K(tenant_id), KP(buffer)); + LOG_WARN("invalid arguments", K(ret), K(redo_info), KP(log_handler), KP(buffer), K(task_id), K(ls_id), K(tenant_id)); + } else if (OB_FAIL(log.init(redo_info))) { + LOG_WARN("fail to init DDLRedoLog", K(ret), K(redo_info)); + } else if (FALSE_IT(buffer_size = base_header.get_serialize_size() + + ddl_header.get_serialize_size() + + log.get_serialize_size())) { + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls should not be null", K(ret)); + } else if (OB_FAIL(ls->get_tablet(log.get_redo_info().table_key_.tablet_id_, tablet_handle, ObTabletCommon::DEFAULT_GET_TABLET_NO_WAIT, ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id), K(log.get_redo_info())); + } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { + LOG_WARN("create ddl kv mgr failed", K(ret)); } else if (OB_TMP_FAIL(ObDDLCtrlSpeedHandle::get_instance().limit_and_sleep(tenant_id, ls_id, buffer_size, task_id, ddl_kv_mgr_handle, real_sleep_us))) { LOG_WARN("fail to limit and sleep", K(tmp_ret), K(tenant_id), K(task_id), K(ls_id), K(buffer_size), K(real_sleep_us)); } if (OB_FAIL(ret)) { - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->rdlock(ObDDLRedoLogHandle::DDL_REDO_LOG_TIMEOUT, lock_tid))) { - LOG_WARN("failed to rdlock", K(ret)); - } else if (ddl_kv_mgr_handle.get_obj()->get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()).is_valid_and_not_min()) { - ret = OB_TRANS_COMMITED; - LOG_WARN("already commit", K(ret)); - } else if (ddl_kv_mgr_handle.get_obj()->get_start_scn() != log.get_redo_info().start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_WARN("restarted", K(ret)); } else if (OB_ISNULL(cb = op_alloc(ObDDLMacroBlockClogCb))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret)); @@ -724,7 +736,7 @@ int ObDDLRedoLogWriter::write( } else if (OB_FAIL(tmp_log.deserialize(buffer, buffer_size, log_start_pos))) { LOG_WARN("fail to deserialize ddl redo log", K(ret)); /* use the ObString data_buffer_ in tmp_log.redo_info_, do not rely on the macro_block_buf in original log*/ - } else if (OB_FAIL(cb->init(ls_id, tmp_log.get_redo_info(), macro_block_id, tablet_handle, ddl_kv_mgr_handle))) { + } else if (OB_FAIL(cb->init(ls_id, tmp_log.get_redo_info(), macro_block_id, tablet_handle))) { LOG_WARN("init ddl clog callback failed", K(ret)); } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_REDO_WRITER_WRITE_MACRO_LOG_FAILED))) { LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); @@ -740,10 +752,6 @@ int ObDDLRedoLogWriter::write( handle.cb_ = cb; cb = nullptr; handle.scn_ = scn; - LOG_INFO("submit ddl redo log succeed", K(lsn), K(base_scn), K(scn)); - } - if (0 != lock_tid) { - ddl_kv_mgr_handle.get_obj()->unlock(lock_tid); } if (OB_FAIL(ret)) { if (nullptr != cb) { @@ -754,12 +762,15 @@ int ObDDLRedoLogWriter::write( return ret; } -int ObDDLRedoLogWriter::write_ddl_start_log(ObLSHandle &ls_handle, - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const ObDDLStartLog &log, - ObLogHandler *log_handler, - SCN &start_scn) +int ObDDLRedoLogWriter::local_write_ddl_start_log( + const ObDDLStartLog &log, + ObLSHandle &ls_handle, + ObLogHandler *log_handler, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + uint32_t &lock_tid, + SCN &start_scn) { int ret = OB_SUCCESS; start_scn.set_min(); @@ -778,28 +789,12 @@ int ObDDLRedoLogWriter::write_ddl_start_log(ObLSHandle &ls_handle, const bool need_nonblock= false; SCN scn = SCN::min_scn(); bool is_external_consistent = false; - ObBucketHashWLockGuard guard(bucket_lock_, log.get_table_key().get_tablet_id().hash()); - uint32_t lock_tid = 0; - if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->wrlock(ObDDLRedoLogHandle::DDL_REDO_LOG_TIMEOUT, lock_tid))) { - LOG_WARN("failed to wrlock", K(ret)); - } else if (ddl_kv_mgr_handle.get_obj()->is_execution_id_older(log.get_execution_id())) { - ret = OB_TASK_EXPIRED; - LOG_INFO("receive a old execution id, don't do ddl start", K(ret), K(log)); - } else if (ddl_kv_mgr_handle.get_obj()->get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()).is_valid_and_not_min()) { - start_scn = ddl_kv_mgr_handle.get_obj()->get_start_scn(); - if (!start_scn.is_valid_and_not_min()) { - start_scn = tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_; - } - if (!start_scn.is_valid_and_not_min()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("start scn must be valid after commit", K(ret), K(start_scn)); - } else { - LOG_INFO("already committed, use previous start scn", K(ret), K(tablet_handle.get_obj()->get_tablet_meta())); - } - } else if (OB_ISNULL(cb = op_alloc(ObDDLStartClogCb))) { + ObDDLRedoLockGuard guard(log.get_table_key().get_tablet_id().hash()); + if (OB_ISNULL(cb = op_alloc(ObDDLStartClogCb))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret)); - } else if (OB_FAIL(cb->init(log.get_table_key(), log.get_data_format_version(), log.get_execution_id(), lock_tid, ddl_kv_mgr_handle))) { + } else if (OB_FAIL(cb->init(log.get_table_key(), log.get_data_format_version(), log.get_execution_id(), + ddl_kv_mgr_handle, lob_kv_mgr_handle, direct_load_mgr_handle, lock_tid))) { LOG_WARN("failed to init cb", K(ret)); } else if (OB_FAIL(base_header.serialize(buffer, buffer_size, pos))) { LOG_WARN("failed to serialize log base header", K(ret)); @@ -808,7 +803,9 @@ int ObDDLRedoLogWriter::write_ddl_start_log(ObLSHandle &ls_handle, } else if (OB_FAIL(log.serialize(buffer, buffer_size, pos))) { LOG_WARN("fail to seriaize ddl start log", K(ret)); } else if (OB_FAIL(ls_handle.get_ls()->get_ddl_log_handler()->add_tablet(log.get_table_key().get_tablet_id()))) { - LOG_WARN("add tablet failed", K(ret)); + LOG_WARN("add tablet failed", K(ret), "tablet_id", log.get_table_key().get_tablet_id()); + } else if (lob_kv_mgr_handle.is_valid() && OB_FAIL(ls_handle.get_ls()->get_ddl_log_handler()->add_tablet(lob_kv_mgr_handle.get_obj()->get_tablet_id()))) { + LOG_WARN("add lob tablet failed", K(ret), "lob_tablet_id", lob_kv_mgr_handle.get_obj()->get_tablet_id()); } else if (OB_FAIL(log_handler->append(buffer, buffer_size, SCN::min_scn(), @@ -827,6 +824,7 @@ int ObDDLRedoLogWriter::write_ddl_start_log(ObLSHandle &ls_handle, lock_tid = 0; bool finish = false; const int64_t start_time = ObTimeUtility::current_time(); + start_scn = scn; while (OB_SUCC(ret) && !finish) { if (OB_FAIL(THIS_WORKER.check_status())) { LOG_WARN("check status failed", K(ret)); @@ -848,15 +846,6 @@ int ObDDLRedoLogWriter::write_ddl_start_log(ObLSHandle &ls_handle, } } } - if (OB_SUCC(ret)) { - const int64_t saved_snapshot_version = log.get_table_key().get_snapshot_version(); - start_scn = scn; - // remove ddl sstable if exists and flush ddl start log ts and snapshot version into tablet meta - if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->update_tablet(*tablet_handle.get_obj(), start_scn, saved_snapshot_version, log.get_data_format_version(), log.get_execution_id(), start_scn))) { - LOG_WARN("clean up ddl sstable failed", K(ret), K(log)); - } - FLOG_INFO("start ddl kv mgr finished", K(ret), K(start_scn), K(log)); - } tmp_cb->try_release(); // release the memory no matter succ or not } if (OB_FAIL(ret)) { @@ -865,20 +854,17 @@ int ObDDLRedoLogWriter::write_ddl_start_log(ObLSHandle &ls_handle, cb = nullptr; } } - if (0 != lock_tid) { - ddl_kv_mgr_handle.get_obj()->unlock(lock_tid); - } return ret; } -template -int ObDDLRedoLogWriter::write_ddl_commit_log(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const T &log, - const ObDDLClogType clog_type, - const share::ObLSID &ls_id, - ObLogHandler *log_handler, - ObDDLCommitLogHandle &handle) +int ObDDLRedoLogWriter::local_write_ddl_commit_log( + const ObDDLCommitLog &log, + const ObDDLClogType clog_type, + const share::ObLSID &ls_id, + ObLogHandler *log_handler, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + ObDDLCommitLogHandle &handle, + uint32_t &lock_tid) { int ret = OB_SUCCESS; const enum ObReplayBarrierType replay_barrier_type = ObReplayBarrierType::PRE_BARRIER; @@ -898,22 +884,13 @@ int ObDDLRedoLogWriter::write_ddl_commit_log(ObTabletHandle &tablet_handle, SCN base_scn = SCN::min_scn(); SCN scn = SCN::min_scn(); bool is_external_consistent = false; - uint32_t lock_tid = 0; - if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->wrlock(ObDDLRedoLogHandle::DDL_REDO_LOG_TIMEOUT, lock_tid))) { - LOG_WARN("failed to wrlock", K(ret)); - } else if (ddl_kv_mgr_handle.get_obj()->get_start_scn() != log.get_start_scn()) { - ret = OB_TASK_EXPIRED; - LOG_WARN("restarted", K(ret)); - } else if (ddl_kv_mgr_handle.get_obj()->get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()).is_valid_and_not_min()) { - ret = OB_TRANS_COMMITED; - LOG_WARN("already committed", K(ret), K(log)); - } else if (OB_ISNULL(buffer = static_cast(ob_malloc(buffer_size, ObMemAttr(MTL_ID(), "DDL_COMMIT_LOG"))))) { +if (OB_ISNULL(buffer = static_cast(ob_malloc(buffer_size, ObMemAttr(MTL_ID(), "DDL_COMMIT_LOG"))))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret)); } else if (OB_ISNULL(cb = op_alloc(ObDDLCommitClogCb))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret)); - } else if (OB_FAIL(cb->init(ls_id, log.get_table_key().tablet_id_, log.get_start_scn(), lock_tid, ddl_kv_mgr_handle))) { + } else if (OB_FAIL(cb->init(ls_id, log.get_table_key().tablet_id_, log.get_start_scn(), lock_tid, direct_load_mgr_handle))) { LOG_WARN("init ddl commit log callback failed", K(ret), K(ls_id), K(log)); } else if (OB_FAIL(base_header.serialize(buffer, buffer_size, pos))) { LOG_WARN("failed to serialize log base header", K(ret)); @@ -950,7 +927,6 @@ int ObDDLRedoLogWriter::write_ddl_commit_log(ObTabletHandle &tablet_handle, if (OB_SUCC(ret)) { handle.cb_ = tmp_cb; handle.commit_scn_ = scn; - LOG_INFO("submit ddl commit log succeed", K(lsn), K(base_scn), K(scn)); } else { tmp_cb->try_release(); // release the memory } @@ -959,9 +935,6 @@ int ObDDLRedoLogWriter::write_ddl_commit_log(ObTabletHandle &tablet_handle, ob_free(buffer); buffer = nullptr; } - if (0 != lock_tid) { - ddl_kv_mgr_handle.get_obj()->unlock(lock_tid); - } if (OB_FAIL(ret)) { if (nullptr != cb) { op_free(cb); @@ -1076,48 +1049,9 @@ void ObDDLCommitLogHandle::reset() } } - -int ObDDLMacroBlockRedoWriter::write_macro_redo(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const ObDDLMacroBlockRedoInfo &redo_info, - const share::ObLSID &ls_id, - const int64_t task_id, - logservice::ObLogHandler *log_handler, - const blocksstable::MacroBlockId ¯o_block_id, - char *buffer, - ObDDLRedoLogHandle &handle) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!redo_info.is_valid() - || nullptr == log_handler - || nullptr == buffer - || 0 == task_id - || !ls_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(redo_info), KP(log_handler), KP(buffer), K(task_id), K(ls_id)); - } else { - ObDDLRedoLog log; - int64_t tmp_ret = OB_SUCCESS; - const uint64_t tenant_id = MTL_ID(); - if (OB_FAIL(log.init(redo_info))) { - LOG_WARN("fail to init DDLRedoLog", K(ret), K(redo_info)); - } else if (OB_FAIL(ObDDLRedoLogWriter::get_instance().write(tablet_handle, - ddl_kv_mgr_handle, - log, tenant_id, - task_id, ls_id, - log_handler, - macro_block_id, buffer, - handle))) { - LOG_WARN("fail to write ddl redo log item", K(ret)); - } - } - return ret; -} - -int ObDDLMacroBlockRedoWriter::remote_write_macro_redo(const int64_t task_id, - const ObAddr &leader_addr, - const ObLSID &leader_ls_id, - const ObDDLMacroBlockRedoInfo &redo_info) +int ObDDLRedoLogWriter::remote_write_ddl_macro_redo( + const int64_t task_id, + const ObDDLMacroBlockRedoInfo &redo_info) { int ret = OB_SUCCESS; obrpc::ObSrvRpcProxy *srv_rpc_proxy = nullptr; @@ -1129,27 +1063,27 @@ int ObDDLMacroBlockRedoWriter::remote_write_macro_redo(const int64_t task_id, LOG_WARN("srv rpc proxy is null", K(ret), KP(srv_rpc_proxy)); } else { obrpc::ObRpcRemoteWriteDDLRedoLogArg arg; - if (OB_FAIL(arg.init(MTL_ID(), leader_ls_id, redo_info, task_id))) { + if (OB_FAIL(arg.init(MTL_ID(), leader_ls_id_, redo_info, task_id))) { LOG_WARN("fail to init ObRpcRemoteWriteDDLRedoLogArg", K(ret)); - } else if (OB_FAIL(srv_rpc_proxy->to(leader_addr).by(MTL_ID()).remote_write_ddl_redo_log(arg))) { - LOG_WARN("fail to remote write ddl redo log", K(ret), K(leader_addr), K(arg)); + } else if (OB_FAIL(srv_rpc_proxy->to(leader_addr_).by(MTL_ID()).remote_write_ddl_redo_log(arg))) { + LOG_WARN("fail to remote write ddl redo log", K(ret), K_(leader_addr), K(arg)); } } return ret; } -ObDDLSSTableRedoWriter::ObDDLSSTableRedoWriter() - : is_inited_(false), remote_write_(false), start_scn_(SCN::min_scn()), +ObDDLRedoLogWriter::ObDDLRedoLogWriter() + : is_inited_(false), remote_write_(false), ls_id_(), tablet_id_(), ddl_redo_handle_(), leader_addr_(), leader_ls_id_(), buffer_(nullptr) { } -int ObDDLSSTableRedoWriter::init(const ObLSID &ls_id, const ObTabletID &tablet_id) +int ObDDLRedoLogWriter::init(const ObLSID &ls_id, const ObTabletID &tablet_id) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; - LOG_WARN("ObDDLSSTableRedoWriter has been inited twice", K(ret)); + LOG_WARN("ddl redo log writer has been inited twice", K(ret)); } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(ls_id), K(tablet_id)); @@ -1161,183 +1095,70 @@ int ObDDLSSTableRedoWriter::init(const ObLSID &ls_id, const ObTabletID &tablet_i return ret; } -int ObDDLSSTableRedoWriter::start_ddl_redo(const ObITable::TableKey &table_key, - const int64_t ddl_task_id, - const int64_t execution_id, - const int64_t data_format_version, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) +void ObDDLRedoLogWriter::reset() +{ + is_inited_ = false; + remote_write_ = false; + ls_id_.reset(); + tablet_id_.reset(); + ddl_redo_handle_.reset(); + leader_addr_.reset(); + leader_ls_id_.reset(); +} + +int ObDDLRedoLogWriter::write_start_log( + const ObITable::TableKey &table_key, + const int64_t execution_id, + const uint64_t data_format_version, + const ObDirectLoadType direct_load_type, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + uint32_t &lock_tid, + SCN &start_scn) { int ret = OB_SUCCESS; - ObLSHandle ls_handle; - ObLS *ls = nullptr; - ObTabletHandle tablet_handle; ObDDLStartLog log; - ddl_kv_mgr_handle.reset(); - SCN tmp_scn; + ObLS *ls = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + start_scn.set_min(); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; - LOG_WARN("ObDDLSSTableRedoWriter has not been inited", K(ret)); - } else if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version <= 0)) { + LOG_WARN("ddl redo log writer has not been inited", K(ret)); + } else if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version <= 0 || !is_valid_direct_load(direct_load_type))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(table_key), K(execution_id), K(data_format_version)); - } else if (OB_FAIL(log.init(table_key, data_format_version, execution_id))) { + LOG_WARN("invalid arguments", K(ret), K(table_key), K(execution_id), K(data_format_version), K(direct_load_type)); + } else if (OB_FAIL(log.init(table_key, data_format_version, execution_id, direct_load_type))) { LOG_WARN("fail to init DDLStartLog", K(ret), K(table_key), K(execution_id), K(data_format_version)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("get ls failed", K(ret), K(ls_id_)); } else if (OB_ISNULL(ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("ls should not be null", K(ret), K(table_key)); - } else if (OB_FAIL(ls->get_tablet(tablet_id_, tablet_handle, 0, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { - LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); - } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle, true/*try_create*/))) { - LOG_WARN("create ddl kv mgr failed", K(ret)); - } else if (OB_FAIL(DDL_SIM(MTL_ID(), ddl_task_id, DDL_REDO_WRITER_WRITE_START_LOG_FAILED))) { - LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(ddl_task_id)); - } else if (OB_FAIL(ObDDLRedoLogWriter::get_instance().write_ddl_start_log(ls_handle, tablet_handle, ddl_kv_mgr_handle, log, ls->get_log_handler(), tmp_scn))) { + /*} else if (OB_FAIL(DDL_SIM(MTL_ID(), ddl_task_id, DDL_REDO_WRITER_WRITE_START_LOG_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(ddl_task_id));*/ + } else if (OB_FAIL(local_write_ddl_start_log(log, ls_handle, ls->get_log_handler(), + ddl_kv_mgr_handle, lob_kv_mgr_handle, direct_load_mgr_handle, lock_tid, start_scn))) { LOG_WARN("fail to write ddl start log", K(ret), K(table_key)); - } else if (FALSE_IT(set_start_scn(tmp_scn))) { - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->register_to_tablet(get_start_scn(), ddl_kv_mgr_handle))) { - LOG_WARN("register ddl kv mgr to tablet failed", K(ret), K(ls_id_), K(tablet_id_)); } else { - ddl_kv_mgr_handle.get_obj()->reset_commit_success(); // releated issue: - } - SERVER_EVENT_ADD("ddl", "ddl write start log", + /*SERVER_EVENT_ADD("ddl", "ddl write start log", "tenant_id", MTL_ID(), "ret", ret, "trace_id", *ObCurTraceId::get_trace_id(), "task_id", ddl_task_id, "tablet_id", tablet_id_, - "start_scn", get_start_scn()); - LOG_INFO("ddl write start log", K(ret), "ddl_event_info", ObDDLEventInfo(), K(ddl_task_id)); - return ret; -} - -int ObDDLSSTableRedoWriter::end_ddl_redo_and_create_ddl_sstable( - const share::ObLSID &ls_id, - const ObITable::TableKey &table_key, - const uint64_t table_id, - const int64_t execution_id, - const int64_t ddl_task_id) -{ - int ret = OB_SUCCESS; - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObDDLKvMgrHandle ddl_kv_mgr_handle; - const ObTabletID &tablet_id = table_key.tablet_id_; - ObLS *ls = nullptr; - SCN ddl_start_scn = get_start_scn(); - SCN commit_scn = SCN::min_scn(); - bool is_remote_write = false; - bool commit_by_this_execution = false; - if (OB_UNLIKELY(!ls_id.is_valid() || !table_key.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid ls", K(ret), K(ls_id), K(table_key)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("get ls failed", K(ret), K(ls_id)); - } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id, tablet_handle))) { - LOG_WARN("get tablet failed", K(ret)); - } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - LOG_WARN("get ddl kv manager failed", K(ret), K(ls_id), K(tablet_id)); - } else if (OB_FAIL(write_commit_log(tablet_handle, ddl_kv_mgr_handle, true, table_key, commit_scn, is_remote_write))) { - if (OB_TASK_EXPIRED == ret) { - LOG_INFO("ddl task expired", K(ret), K(table_key), K(table_id), K(execution_id), K(ddl_task_id)); - } else { - LOG_WARN("fail write ddl commit log", K(ret), K(table_key)); - } - } else { - commit_by_this_execution = true; - } - - if (OB_TRANS_COMMITED == ret) { - commit_scn = ddl_kv_mgr_handle.get_obj()->get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()); - if (!commit_scn.is_valid_and_not_min()) { - ret = OB_EAGAIN; - LOG_WARN("committed on leader but not committed on me, retry", K(ret), K(ddl_start_scn), K(commit_scn), K(table_id), K(execution_id), K(ddl_task_id)); - } else { - ret = OB_SUCCESS; - ddl_start_scn = ddl_kv_mgr_handle.get_obj()->get_start_scn(); - set_start_scn(ddl_start_scn); - } - } - - if (OB_FAIL(ret)) { - } else if (is_remote_write) { - LOG_INFO("ddl commit log is written in remote, need wait replay", K(ddl_task_id), K(tablet_id), K(ddl_start_scn), K(commit_scn)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->ddl_commit(*tablet_handle.get_obj(), ddl_start_scn, commit_scn))) { - if (OB_TASK_EXPIRED == ret) { - LOG_INFO("ddl task expired", K(ret), K(ls_id), K(tablet_id), - K(ddl_start_scn), "new_ddl_start_scn", ddl_kv_mgr_handle.get_obj()->get_start_scn()); - } else { - LOG_WARN("failed to do ddl kv commit", K(ret), K(ddl_start_scn), K(commit_scn)); - } - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->wait_ddl_merge_success(*tablet_handle.get_obj(), ddl_start_scn, commit_scn))) { - if (OB_TASK_EXPIRED == ret) { - LOG_INFO("ddl task expired, but return success", K(ret), K(ls_id), K(tablet_id), - K(ddl_start_scn), "new_ddl_start_scn", - ddl_kv_mgr_handle.get_obj()->get_start_scn()); - } else { - LOG_WARN("failed to wait ddl merge", K(ret), K(ddl_start_scn)); - } - } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, - tablet_id, - tablet_handle, - ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { - LOG_WARN("get tablet handle failed", K(ret), K(ls_id), K(tablet_id)); - } else if (OB_ISNULL(tablet_handle.get_obj())) { - ret = OB_ERR_SYS; - LOG_WARN("tablet handle is null", K(ret), K(ls_id), K(tablet_id)); - } else { - bool need_report_ddl_checksum = true; -#ifdef ERRSIM - need_report_ddl_checksum = 0 == GCONF.errsim_ddl_major_delay_time; -#endif - ObTabletMemberWrapper table_store_wrapper; - ObSSTableMetaHandle sst_meta_hdl; - const ObSSTable *first_major_sstable = nullptr; - if (OB_FAIL(tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) { - LOG_WARN("fail to fetch table store", K(ret)); - } else if (OB_FALSE_IT(first_major_sstable = static_cast( - table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)))) { - } else if (!need_report_ddl_checksum) { - // skip - } else if (OB_ISNULL(first_major_sstable)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("no major after wait merge success", K(ret), K(ls_id), K(tablet_id)); - } else if (OB_FAIL(first_major_sstable->get_meta(sst_meta_hdl))) { - LOG_WARN("fail to get sstable meta handle", K(ret)); - } else if (commit_by_this_execution && OB_UNLIKELY(first_major_sstable->get_key() != table_key)) { - ret = OB_SNAPSHOT_DISCARDED; - LOG_WARN("ddl major sstable dropped, snapshot holding may have bug", K(ret), KPC(first_major_sstable), K(table_key), K(tablet_id), K(execution_id), K(ddl_task_id)); - } else if (OB_FAIL(first_major_sstable->get_meta(sst_meta_hdl))) { - LOG_WARN("fail to get sstable meta handle", K(ret)); - } else { - for (int64_t retry_cnt = 10; retry_cnt > 0; retry_cnt--) { // overwrite ret - if (OB_FAIL(ObTabletDDLUtil::report_ddl_checksum(ls_id, - tablet_id, - table_id, - execution_id, - ddl_task_id, - sst_meta_hdl.get_sstable_meta().get_col_checksum(), - sst_meta_hdl.get_sstable_meta().get_col_checksum_cnt()))) { - LOG_WARN("report ddl column checksum failed", K(ret), K(ls_id), K(tablet_id), K(execution_id), K(ddl_task_id)); - } else { - break; - } - ob_usleep(100L * 1000L); - } - } + "start_scn", start_scn); + LOG_INFO("write ddl start log", K(ret), K(table_key), K(start_scn));*/ } return ret; } -int ObDDLSSTableRedoWriter::write_redo_log(const ObDDLMacroBlockRedoInfo &redo_info, - const blocksstable::MacroBlockId ¯o_block_id, - const bool allow_remote_write, - const int64_t task_id, - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) +int ObDDLRedoLogWriter::write_macro_block_log( + const ObDDLMacroBlockRedoInfo &redo_info, + const blocksstable::MacroBlockId ¯o_block_id, + const bool allow_remote_write, + const int64_t task_id) { int ret = OB_SUCCESS; ObLSHandle ls_handle; @@ -1345,10 +1166,10 @@ int ObDDLSSTableRedoWriter::write_redo_log(const ObDDLMacroBlockRedoInfo &redo_i const int64_t BUF_SIZE = 2 * 1024 * 1024 + 16 * 1024; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; - LOG_WARN("ObDDLSSTableRedoWriter has not been inited", K(ret)); + LOG_WARN("ddl redo log writer has not been inited", K(ret)); } else if (OB_UNLIKELY(!redo_info.is_valid() || 0 == task_id)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(task_id)); + LOG_WARN("invalid arguments", K(ret), K(redo_info), K(task_id)); } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("get ls failed", K(ret), K(ls_id_)); } else if (OB_ISNULL(ls = ls_handle.get_ls())) { @@ -1358,7 +1179,7 @@ int ObDDLSSTableRedoWriter::write_redo_log(const ObDDLMacroBlockRedoInfo &redo_i ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate memory failed", K(ret), K(BUF_SIZE)); } else if (!remote_write_) { - if (OB_FAIL(ObDDLMacroBlockRedoWriter::write_macro_redo(tablet_handle, ddl_kv_mgr_handle, redo_info, ls->get_ls_id(), task_id, ls->get_log_handler(), macro_block_id, buffer_, ddl_redo_handle_))) { + if (OB_FAIL(local_write_ddl_macro_redo(redo_info, ls->get_ls_id(), task_id, ls->get_log_handler(), macro_block_id, buffer_, ddl_redo_handle_))) { if (ObDDLUtil::need_remote_write(ret) && allow_remote_write) { if (OB_FAIL(switch_to_remote_write())) { LOG_WARN("fail to switch to remote write", K(ret)); @@ -1366,24 +1187,29 @@ int ObDDLSSTableRedoWriter::write_redo_log(const ObDDLMacroBlockRedoInfo &redo_i } else { LOG_WARN("fail to write ddl redo clog", K(ret), K(MTL_GET_TENANT_ROLE_CACHE())); } + } else { + LOG_INFO("local write redo log of macro block", K(redo_info), K(macro_block_id)); } } if (OB_SUCC(ret) && remote_write_) { - if (OB_FAIL(retry_remote_write_ddl_clog( [&]() { return remote_write_macro_redo(task_id, redo_info); }))) { + if (OB_FAIL(retry_remote_write_macro_redo(task_id, redo_info))) { LOG_WARN("remote write redo failed", K(ret), K(task_id)); + } else { + LOG_INFO("remote write redo log of macro block", K(redo_info), K(macro_block_id)); } } return ret; } -int ObDDLSSTableRedoWriter::wait_redo_log_finish(const ObDDLMacroBlockRedoInfo &redo_info, - const blocksstable::MacroBlockId ¯o_block_id) +int ObDDLRedoLogWriter::wait_macro_block_log_finish( + const ObDDLMacroBlockRedoInfo &redo_info, + const blocksstable::MacroBlockId ¯o_block_id) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; - LOG_WARN("ObDDLSSTableRedoWriter has not been inited", K(ret)); + LOG_WARN("ddl redo log writer has not been inited", K(ret)); } else if (remote_write_) { // remote write no need to wait local handle } else if (OB_UNLIKELY(!ddl_redo_handle_.is_valid())) { @@ -1397,13 +1223,14 @@ int ObDDLSSTableRedoWriter::wait_redo_log_finish(const ObDDLMacroBlockRedoInfo & return ret; } -int ObDDLSSTableRedoWriter::write_commit_log(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const bool allow_remote_write, - const ObITable::TableKey &table_key, - SCN &commit_scn, - bool &is_remote_write) - +int ObDDLRedoLogWriter::write_commit_log( + const bool allow_remote_write, + const ObITable::TableKey &table_key, + const share::SCN &start_scn, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + SCN &commit_scn, + bool &is_remote_write, + uint32_t &lock_tid) { int ret = OB_SUCCESS; #ifdef ERRSIM @@ -1419,19 +1246,23 @@ int ObDDLSSTableRedoWriter::write_commit_log(ObTabletHandle &tablet_handle, ObDDLCommitLogHandle handle; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; - LOG_WARN("ObDDLSSTableRedoWriter has not been inited", K(ret)); - } else if (OB_UNLIKELY(!table_key.is_valid() || !start_scn_.is_valid_and_not_min())) { + LOG_WARN("ddl redo log writer has not been inited", K(ret)); + } else if (OB_UNLIKELY(!table_key.is_valid() || !start_scn.is_valid_and_not_min())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(table_key), K(start_scn_)); - } else if (OB_FAIL(log.init(table_key, get_start_scn()))) { - LOG_WARN("fail to init DDLCommitLog", K(ret), K(table_key), K(start_scn_)); + LOG_WARN("invalid arguments", K(ret), K(table_key), K(start_scn)); + } else if (OB_FAIL(log.init(table_key, start_scn))) { + LOG_WARN("fail to init DDLCommitLog", K(ret), K(table_key), K(start_scn)); } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("get ls failed", K(ret), K(ls_id_)); } else if (OB_ISNULL(ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("ls should not be null", K(ret), K(table_key)); + } else if (start_scn != direct_load_mgr_handle.get_obj()->get_start_scn()) { + ret = OB_TASK_EXPIRED; + LOG_WARN("current task is restarted", K(ret), K(start_scn), "current_start_scn", direct_load_mgr_handle.get_obj()->get_start_scn()); } else if (!remote_write_) { - if (OB_FAIL(ObDDLRedoLogWriter::get_instance().write_ddl_commit_log(tablet_handle, ddl_kv_mgr_handle, log, ObDDLClogType::DDL_COMMIT_LOG, ls_id_, ls->get_log_handler(), handle))) { + if (OB_FAIL(local_write_ddl_commit_log( + log, ObDDLClogType::DDL_COMMIT_LOG, ls_id_, ls->get_log_handler(), direct_load_mgr_handle, handle, lock_tid))) { if (ObDDLUtil::need_remote_write(ret) && allow_remote_write) { if (OB_FAIL(switch_to_remote_write())) { LOG_WARN("fail to switch to remote write", K(ret), K(table_key)); @@ -1443,23 +1274,25 @@ int ObDDLSSTableRedoWriter::write_commit_log(ObTabletHandle &tablet_handle, LOG_WARN("wait ddl commit log finish failed", K(ret), K(table_key)); } else { commit_scn = handle.get_commit_scn(); + LOG_INFO("local write ddl commit log", K(ret), K(table_key), K(commit_scn)); } } if (OB_SUCC(ret) && remote_write_) { obrpc::ObRpcRemoteWriteDDLCommitLogArg arg; - if (OB_FAIL(arg.init(MTL_ID(), leader_ls_id_, table_key, get_start_scn()))) { + if (OB_FAIL(arg.init(MTL_ID(), leader_ls_id_, table_key, start_scn))) { LOG_WARN("fail to init ObRpcRemoteWriteDDLCommitLogArg", K(ret)); - } else if (OB_FAIL(retry_remote_write_ddl_clog( [&]() { return remote_write_commit_log(arg, commit_scn); }))) { + } else if (OB_FAIL(retry_remote_write_commit_clog(arg, commit_scn))) { LOG_WARN("remote write ddl commit log failed", K(ret), K(arg)); } else { is_remote_write = !(leader_addr_ == GCTX.self_addr()); + LOG_INFO("remote write ddl commit log", K(ret), K(table_key), K(commit_scn), K(is_remote_write)); } } SERVER_EVENT_ADD("ddl", "ddl write commit log", "tenant_id", MTL_ID(), "ret", ret, "trace_id", *ObCurTraceId::get_trace_id(), - "start_scn", start_scn_, + "start_scn", direct_load_mgr_handle.get_obj()->get_start_scn(), "tablet_id", tablet_id_, "commit_scn", commit_scn, is_remote_write); @@ -1467,7 +1300,8 @@ int ObDDLSSTableRedoWriter::write_commit_log(ObTabletHandle &tablet_handle, return ret; } -int ObDDLSSTableRedoWriter::switch_to_remote_write() + +int ObDDLRedoLogWriter::switch_to_remote_write() { int ret = OB_SUCCESS; const uint64_t tenant_id = MTL_ID(); @@ -1490,47 +1324,71 @@ int ObDDLSSTableRedoWriter::switch_to_remote_write() LOG_WARN("get leader failed", K(ret), K(leader_ls_id_)); } else { remote_write_ = true; - LOG_INFO("switch to remote write", K(ret), K_(tablet_id)); + LOG_INFO("switch to remote write", K(ret), K_(tablet_id), K_(leader_ls_id), K_(leader_addr)); } return ret; } -template -int ObDDLSSTableRedoWriter::retry_remote_write_ddl_clog(T function) +int ObDDLRedoLogWriter::retry_remote_write_macro_redo( + const int64_t task_id, + const blocksstable::ObDDLMacroBlockRedoInfo &redo_info) { int ret = OB_SUCCESS; int retry_cnt = 0; const int64_t MAX_REMOTE_WRITE_RETRY_CNT = 800; - while (OB_SUCC(ret)) { - if (OB_FAIL(switch_to_remote_write())) { - LOG_WARN("flush ls leader location failed", K(ret)); - } else if (OB_FAIL(function())) { - if (OB_NOT_MASTER == ret && retry_cnt++ < MAX_REMOTE_WRITE_RETRY_CNT) { - ob_usleep(10 * 1000); // 10 ms. - ret = OB_SUCCESS; + if (OB_UNLIKELY(!redo_info.is_valid() || 0 == task_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(redo_info)); + } else { + while (OB_SUCC(ret)) { + if (OB_FAIL(switch_to_remote_write())) { + LOG_WARN("flush ls leader location failed", K(ret)); + } else if (OB_FAIL(remote_write_ddl_macro_redo(task_id, redo_info))) { + if (OB_NOT_MASTER == ret && retry_cnt++ < MAX_REMOTE_WRITE_RETRY_CNT) { + ob_usleep(10 * 1000); // 10 ms. + ret = OB_SUCCESS; + } else { + LOG_WARN("remote write macro redo failed", K(ret), K_(leader_ls_id), K_(leader_addr)); + } } else { - LOG_WARN("remote write macro redo failed", K(ret), K_(leader_ls_id), K_(leader_addr)); + break; // remote write ddl clog successfully. } - } else { - break; // remote write ddl clog successfully. } } return ret; } -int ObDDLSSTableRedoWriter::remote_write_macro_redo(const int64_t task_id, const ObDDLMacroBlockRedoInfo &redo_info) +int ObDDLRedoLogWriter::retry_remote_write_commit_clog( + const obrpc::ObRpcRemoteWriteDDLCommitLogArg &arg, + share::SCN &commit_scn) { int ret = OB_SUCCESS; - if (OB_FAIL(ObDDLMacroBlockRedoWriter::remote_write_macro_redo(task_id, - leader_addr_, - leader_ls_id_, - redo_info))) { - LOG_WARN("remote write macro redo failed", K(ret)); + int retry_cnt = 0; + const int64_t MAX_REMOTE_WRITE_RETRY_CNT = 800; + if (OB_UNLIKELY(!arg.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(arg)); + } else { + while (OB_SUCC(ret)) { + if (OB_FAIL(switch_to_remote_write())) { + LOG_WARN("flush ls leader location failed", K(ret)); + } else if (OB_FAIL(remote_write_ddl_commit_redo(arg, commit_scn))) { + if (OB_NOT_MASTER == ret && retry_cnt++ < MAX_REMOTE_WRITE_RETRY_CNT) { + ob_usleep(10 * 1000); // 10 ms. + ret = OB_SUCCESS; + } else { + LOG_WARN("remote write macro redo failed", K(ret), K_(leader_ls_id), K_(leader_addr)); + } + } else { + break; // remote write ddl clog successfully. + } + } } return ret; } -int ObDDLSSTableRedoWriter::remote_write_commit_log(const obrpc::ObRpcRemoteWriteDDLCommitLogArg &arg, SCN &commit_scn) + +int ObDDLRedoLogWriter::remote_write_ddl_commit_redo(const obrpc::ObRpcRemoteWriteDDLCommitLogArg &arg, SCN &commit_scn) { int ret = OB_SUCCESS; ObSrvRpcProxy *srv_rpc_proxy = GCTX.srv_rpc_proxy_; @@ -1550,7 +1408,7 @@ int ObDDLSSTableRedoWriter::remote_write_commit_log(const obrpc::ObRpcRemoteWrit return ret; } -ObDDLSSTableRedoWriter::~ObDDLSSTableRedoWriter() +ObDDLRedoLogWriter::~ObDDLRedoLogWriter() { if (nullptr != buffer_) { ob_free(buffer_); @@ -1559,76 +1417,98 @@ ObDDLSSTableRedoWriter::~ObDDLSSTableRedoWriter() } ObDDLRedoLogWriterCallback::ObDDLRedoLogWriterCallback() - : is_inited_(false), redo_info_(), table_key_(), macro_block_id_(), ddl_writer_(nullptr), block_buffer_(nullptr), task_id_(0) + : is_inited_(false), redo_info_(), block_type_(ObDDLMacroBlockType::DDL_MB_INVALID_TYPE), + table_key_(), macro_block_id_(), ddl_writer_(nullptr), task_id_(0), + data_format_version_(0), row_id_offset_(-1) { } ObDDLRedoLogWriterCallback::~ObDDLRedoLogWriterCallback() { (void)wait(); - if (nullptr != block_buffer_) { - ob_free(block_buffer_); - block_buffer_ = nullptr; - } } int ObDDLRedoLogWriterCallback::init(const ObDDLMacroBlockType block_type, const ObITable::TableKey &table_key, const int64_t task_id, - ObDDLSSTableRedoWriter *ddl_writer, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) + const share::SCN &start_scn, + const uint64_t data_format_version, + ObDDLRedoLogWriter *ddl_writer, + const int64_t row_id_offset/*=-1*/) { int ret = OB_SUCCESS; ObLS *ls = nullptr; ObLSService *ls_service = nullptr; bool is_cache_hit = false; ObLSHandle ls_handle; - ObTabletHandle tablet_handle; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; - LOG_WARN("ObDDLSSTableRedoWriter has been inited twice", K(ret)); - } else if (OB_UNLIKELY(!table_key.is_valid() || nullptr == ddl_writer || DDL_MB_INVALID_TYPE == block_type || 0 == task_id || !ddl_kv_mgr_handle.is_valid())) { + LOG_WARN("ddl redo log writer has been inited twice", K(ret)); + } else if (OB_UNLIKELY(!table_key.is_valid() || nullptr == ddl_writer || DDL_MB_INVALID_TYPE == block_type || 0 == task_id + || data_format_version < 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(table_key), K(block_type), K(task_id)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ddl_kv_mgr_handle.get_obj()->get_ls_id(), ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), KPC(ddl_kv_mgr_handle.get_obj())); - } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, - ddl_kv_mgr_handle.get_obj()->get_tablet_id(), - tablet_handle, - ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { - LOG_WARN("get tablet handle failed", K(ret), KPC(ddl_kv_mgr_handle.get_obj())); + LOG_WARN("invalid arguments", K(ret), K(table_key), K(block_type), K(data_format_version), K(task_id), KP(ddl_writer)); + } else if (OB_UNLIKELY(table_key.is_column_store_sstable() && row_id_offset < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument of column group data", K(ret), K(table_key), K(row_id_offset)); } else { block_type_ = block_type; table_key_ = table_key; ddl_writer_ = ddl_writer; task_id_ = task_id; - tablet_handle_ = tablet_handle; - ddl_kv_mgr_handle_ = ddl_kv_mgr_handle; + start_scn_ = start_scn; + data_format_version_ = data_format_version; + row_id_offset_ = row_id_offset; is_inited_ = true; } return ret; } +void ObDDLRedoLogWriterCallback::reset() +{ + is_inited_ = false; + redo_info_.reset(); + block_type_ = ObDDLMacroBlockType::DDL_MB_INVALID_TYPE; + table_key_.reset(); + macro_block_id_.reset(); + ddl_writer_ = nullptr; + task_id_ = 0; + start_scn_.reset(); + data_format_version_ = 0; + row_id_offset_ = -1; +} + +bool ObDDLRedoLogWriterCallback::is_column_group_info_valid() const +{ + return table_key_.is_column_store_sstable() && row_id_offset_ >= 0; +} + int ObDDLRedoLogWriterCallback::write(const ObMacroBlockHandle ¯o_handle, const ObLogicMacroBlockId &logic_id, char *buf, const int64_t buf_len, - const int64_t data_seq) + const int64_t row_count) { int ret = OB_SUCCESS; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedoLogWriterCallback is not inited", K(ret)); - } else if (OB_FAIL(prepare_block_buffer_if_need())) { - LOG_WARN("prepare block buffer failed", K(ret)); + } else if (OB_UNLIKELY(!macro_handle.is_valid() || !logic_id.is_valid() || nullptr == buf || row_count <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(macro_handle), K(logic_id), KP(buf), K(row_count)); } else { macro_block_id_ = macro_handle.get_macro_id(); redo_info_.table_key_ = table_key_; redo_info_.data_buffer_.assign(buf, buf_len); redo_info_.block_type_ = block_type_; redo_info_.logic_id_ = logic_id; - redo_info_.start_scn_ = ddl_writer_->get_start_scn(); - if (OB_FAIL(ddl_writer_->write_redo_log(redo_info_, macro_block_id_, true/*allow remote write*/, task_id_, tablet_handle_, ddl_kv_mgr_handle_))) { + redo_info_.start_scn_ = start_scn_; + redo_info_.data_format_version_ = data_format_version_; + if (is_column_group_info_valid()) { + redo_info_.end_row_id_ = row_id_offset_ + row_count - 1; + row_id_offset_ += row_count; + } + if (OB_FAIL(ddl_writer_->write_macro_block_log(redo_info_, macro_block_id_, true/*allow remote write*/, task_id_))) { LOG_WARN("fail to write ddl redo log", K(ret)); } } @@ -1641,21 +1521,9 @@ int ObDDLRedoLogWriterCallback::wait() if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedoLogWriterCallback is not inited", K(ret)); - } else if (OB_FAIL(ddl_writer_->wait_redo_log_finish(redo_info_, macro_block_id_))) { + } else if (OB_FAIL(ddl_writer_->wait_macro_block_log_finish(redo_info_, macro_block_id_))) { LOG_WARN("fail to wait redo log finish", K(ret)); } return ret; } -int ObDDLRedoLogWriterCallback::prepare_block_buffer_if_need() -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(block_buffer_)) { - block_buffer_ = static_cast(ob_malloc(OB_SERVER_BLOCK_MGR.get_macro_block_size(), ObMemAttr(MTL_ID(), "DDL_REDO_CB"))); - if (nullptr == block_buffer_) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory for block bufffer failed", K(ret)); - } - } - return ret; -} diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.h b/src/storage/ddl/ob_ddl_redo_log_writer.h index 538857833..e7ed9453c 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.h +++ b/src/storage/ddl/ob_ddl_redo_log_writer.h @@ -18,27 +18,26 @@ #include "storage/ddl/ob_ddl_clog.h" #include "storage/ddl/ob_ddl_struct.h" #include "storage/blocksstable/ob_imacro_block_flush_callback.h" -#include "storage/tx_storage/ob_ls_map.h" #include "storage/tx_storage/ob_ls_handle.h" #include "storage/blocksstable/ob_logic_macro_id.h" -#include "storage/meta_mem/ob_tablet_pointer.h" namespace oceanbase { + namespace blocksstable { struct ObDDLMacroBlockRedoInfo; struct ObSSTableMergeRes; } + namespace logservice { class ObLogHandler; } + namespace storage { -class ObDDLKV; -class ObDDLKVPendingGuard; class ObLSHandle; // control the write speed of ddl clog for 4.0 . More detailly, @@ -213,118 +212,109 @@ public: share::SCN commit_scn_; }; -class ObDDLRedoLogWriter final +class ObDDLRedoLock final { + friend class ObDDLRedoLockGuard; public: - static ObDDLRedoLogWriter &get_instance(); + static ObDDLRedoLock &get_instance(); int init(); - int write(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const ObDDLRedoLog &log, - const uint64_t tenant_id, - const int64_t task_id, - const share::ObLSID &ls_id, - logservice::ObLogHandler *log_handler, - const blocksstable::MacroBlockId ¯o_block_id, - char *buffer, - ObDDLRedoLogHandle &handle); - int write_ddl_start_log(ObLSHandle &ls_handle, - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const ObDDLStartLog &log, - logservice::ObLogHandler *log_handler, - share::SCN &start_scn); - template - int write_ddl_commit_log(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const T &log, - const ObDDLClogType clog_type, - const share::ObLSID &ls_id, - logservice::ObLogHandler *log_handler, - ObDDLCommitLogHandle &handle); private: - ObDDLRedoLogWriter(); - ~ObDDLRedoLogWriter(); - struct ObDDLRedoLogStat final - { - public: - ObDDLRedoLogStat(); - ~ObDDLRedoLogStat(); - public: - }; - // TODO: traffic control + ObDDLRedoLock(); + ~ObDDLRedoLock(); private: bool is_inited_; common::ObBucketLock bucket_lock_; }; - -class ObDDLMacroBlockRedoWriter final +class ObDDLRedoLockGuard { public: - static int write_macro_redo(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const ObDDLMacroBlockRedoInfo &redo_info, - const share::ObLSID &ls_id, - const int64_t task_id, - logservice::ObLogHandler *log_handler, - const blocksstable::MacroBlockId ¯o_block_id, - char *buffer, - ObDDLRedoLogHandle &handle); - static int remote_write_macro_redo(const int64_t task_id, - const ObAddr &leader_addr, - const share::ObLSID &leader_ls_id, - const blocksstable::ObDDLMacroBlockRedoInfo &redo_info); + explicit ObDDLRedoLockGuard(const uint64_t hash_val) + : guard_(ObDDLRedoLock::get_instance().bucket_lock_, hash_val) {} + ~ObDDLRedoLockGuard() {} private: - ObDDLMacroBlockRedoWriter() = default; - ~ObDDLMacroBlockRedoWriter() = default; -private: - static const int64_t SLEEP_INTERVAL = 1 * 1000; // 1ms + common::ObBucketHashWLockGuard guard_; }; + // This class should be the entrance to write redo log and commit log -class ObDDLSSTableRedoWriter final +class ObDDLRedoLogWriter final { public: - ObDDLSSTableRedoWriter(); - ~ObDDLSSTableRedoWriter(); - int init(const share::ObLSID &ls_id, const ObTabletID &tablet_id); - int start_ddl_redo(const ObITable::TableKey &table_key, - const int64_t ddl_task_id, - const int64_t execution_id, - const int64_t data_format_version, - ObDDLKvMgrHandle &ddl_kv_mgr_handle); - int end_ddl_redo_and_create_ddl_sstable(const share::ObLSID &ls_id, - const ObITable::TableKey &table_key, - const uint64_t table_id, - const int64_t execution_id, - const int64_t ddl_task_id); - int write_redo_log(const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, - const blocksstable::MacroBlockId ¯o_block_id, - const bool allow_remote_write, - const int64_t task_id, - ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle); - int wait_redo_log_finish(const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, - const blocksstable::MacroBlockId ¯o_block_id); - int write_commit_log(ObTabletHandle &tablet_handle, - ObDDLKvMgrHandle &ddl_kv_mgr_handle, - const bool allow_remote_write, - const ObITable::TableKey &table_key, - share::SCN &commit_scn, - bool &is_remote_write); - OB_INLINE void set_start_scn(const share::SCN &start_scn) { start_scn_.atomic_set(start_scn); } - OB_INLINE share::SCN get_start_scn() const { return start_scn_.atomic_get(); } + ObDDLRedoLogWriter(); + ~ObDDLRedoLogWriter(); + int init( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id); + void reset(); + int write_start_log( + const ObITable::TableKey &table_key, + const int64_t execution_id, + const uint64_t data_format_version, + const ObDirectLoadType direct_load_type, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle, + ObTabletDirectLoadMgrHandle &mgr_handle, + uint32_t &lock_tid, + share::SCN &start_scn); + int write_macro_block_log( + const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, + const blocksstable::MacroBlockId ¯o_block_id, + const bool allow_remote_write, + const int64_t task_id); + int wait_macro_block_log_finish( + const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, + const blocksstable::MacroBlockId ¯o_block_id); + int write_commit_log( + const bool allow_remote_write, + const ObITable::TableKey &table_key, + const share::SCN &start_scn, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + share::SCN &commit_scn, + bool &is_remote_write, + uint32_t &lock_tid); private: int switch_to_remote_write(); - int remote_write_macro_redo(const int64_t task_id, const ObDDLMacroBlockRedoInfo &redo_info); - int remote_write_commit_log(const obrpc::ObRpcRemoteWriteDDLCommitLogArg &arg, SCN &commit_scn); - template - int retry_remote_write_ddl_clog(T function); + int local_write_ddl_start_log( + const ObDDLStartLog &log, + ObLSHandle &ls_handle, + logservice::ObLogHandler *log_handler, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + uint32_t &lock_tid, + share::SCN &start_scn); + int local_write_ddl_commit_log( + const ObDDLCommitLog &log, + const ObDDLClogType clog_type, + const share::ObLSID &ls_id, + logservice::ObLogHandler *log_handler, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + ObDDLCommitLogHandle &handle, + uint32_t &lock_tid); + int remote_write_ddl_commit_redo( + const obrpc::ObRpcRemoteWriteDDLCommitLogArg &arg, + share::SCN &commit_scn); + int retry_remote_write_macro_redo( + const int64_t task_id, + const blocksstable::ObDDLMacroBlockRedoInfo &redo_info); + int retry_remote_write_commit_clog( + const obrpc::ObRpcRemoteWriteDDLCommitLogArg &arg, + share::SCN &commit_scn); + int local_write_ddl_macro_redo( + const blocksstable::ObDDLMacroBlockRedoInfo &redo_info, + const share::ObLSID &ls_id, + const int64_t task_id, + logservice::ObLogHandler *log_handler, + const blocksstable::MacroBlockId ¯o_block_id, + char *buffer, + ObDDLRedoLogHandle &handle); + int remote_write_ddl_macro_redo( + const int64_t task_id, + const blocksstable::ObDDLMacroBlockRedoInfo &redo_info); private: bool is_inited_; bool remote_write_; - share::SCN start_scn_; share::ObLSID ls_id_; ObTabletID tablet_id_; ObDDLRedoLogHandle ddl_redo_handle_; @@ -333,36 +323,41 @@ private: char *buffer_; }; -// write macro redo for data block, need to set lsn on ObDDLSSTableRedoWriter when commit. +// write macro redo for data block, need to set lsn on ObDDLRedoLogWriter when commit. class ObDDLRedoLogWriterCallback : public blocksstable::ObIMacroBlockFlushCallback { public: ObDDLRedoLogWriterCallback(); virtual ~ObDDLRedoLogWriterCallback(); - int init(const blocksstable::ObDDLMacroBlockType block_type, - const ObITable::TableKey &table_key, - const int64_t task_id, - ObDDLSSTableRedoWriter *ddl_writer, - ObDDLKvMgrHandle &ddl_kv_mgr_handle); + int init( + const blocksstable::ObDDLMacroBlockType block_type, + const ObITable::TableKey &table_key, + const int64_t task_id, + const share::SCN &start_scn, + const uint64_t data_format_version, + ObDDLRedoLogWriter *ddl_writer, + const int64_t row_id_offset = -1); + void reset(); int write( - const ObMacroBlockHandle ¯o_handle, + const blocksstable::ObMacroBlockHandle ¯o_handle, const blocksstable::ObLogicMacroBlockId &logic_id, char *buf, const int64_t buf_len, - const int64_t data_seq); + const int64_t row_count); int wait(); - int prepare_block_buffer_if_need(); +private: + bool is_column_group_info_valid() const; private: bool is_inited_; blocksstable::ObDDLMacroBlockRedoInfo redo_info_; blocksstable::ObDDLMacroBlockType block_type_; ObITable::TableKey table_key_; blocksstable::MacroBlockId macro_block_id_; - ObDDLSSTableRedoWriter *ddl_writer_; - char *block_buffer_; + ObDDLRedoLogWriter *ddl_writer_; int64_t task_id_; - ObTabletHandle tablet_handle_; - ObDDLKvMgrHandle ddl_kv_mgr_handle_; + share::SCN start_scn_; + uint64_t data_format_version_; + int64_t row_id_offset_; }; } // end namespace storage diff --git a/src/storage/ddl/ob_ddl_replay_executor.cpp b/src/storage/ddl/ob_ddl_replay_executor.cpp index 712124346..a76121649 100644 --- a/src/storage/ddl/ob_ddl_replay_executor.cpp +++ b/src/storage/ddl/ob_ddl_replay_executor.cpp @@ -18,6 +18,7 @@ #include "storage/ls/ob_ls.h" #include "storage/compaction/ob_schedule_dag_func.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" using namespace oceanbase::common; using namespace oceanbase::lib; @@ -40,6 +41,7 @@ int ObDDLReplayExecutor::check_need_replay_ddl_log_( ObTablet *tablet = nullptr; ObDDLKvMgrHandle ddl_kv_mgr_handle; ObMigrationStatus migration_status; + ObTabletBindingMdsUserData ddl_data; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); @@ -52,6 +54,12 @@ int ObDDLReplayExecutor::check_need_replay_ddl_log_( } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet is null", K(ret), K(tablet_handle)); + } else if (tablet->is_empty_shell()) { + need_replay = false; + if (REACH_COUNT_INTERVAL(1000L)) { + LOG_INFO("no need to replay ddl log, because this tablet is empty shell", + K(tablet_handle), "tablet_meta", tablet->get_tablet_meta()); + } } else if (tablet->get_tablet_meta().ha_status_.is_expected_status_deleted()) { need_replay = false; if (REACH_COUNT_INTERVAL(1000L)) { @@ -64,18 +72,22 @@ int ObDDLReplayExecutor::check_need_replay_ddl_log_( LOG_INFO("no need to replay ddl log, because the log ts is less than the ddl checkpoint ts", K(tablet_handle), K(scn), "ddl_checkpoint_ts", tablet->get_tablet_meta().ddl_checkpoint_scn_); } - } else if (OB_FAIL(tablet->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - if (OB_ENTRY_NOT_EXIST != ret) { - LOG_WARN("get ddl kv manager failed", K(ret), K(tablet_handle)); - } else { - need_replay = (ddl_start_scn == scn); // only replay start log if ddl kv mgr is null - ret = OB_SUCCESS; - } - } else if (ddl_start_scn < ddl_kv_mgr_handle.get_obj()->get_start_scn()) { + } else if (ddl_start_scn < tablet->get_tablet_meta().ddl_start_scn_) { need_replay = false; if (REACH_COUNT_INTERVAL(1000L)) { LOG_INFO("no need to replay ddl log, because the ddl start log ts is less than the value in ddl kv manager", - K(tablet_handle), K(ddl_start_scn), "ddl_start_scn_in_ddl_kv_mgr", ddl_kv_mgr_handle.get_obj()->get_start_scn()); + K(tablet_handle), K(ddl_start_scn), "ddl_start_scn_in_tablet", tablet->get_tablet_meta().ddl_start_scn_); + } + } else if (OB_FAIL(tablet_handle.get_obj()->ObITabletMdsInterface::get_ddl_data(share::SCN::max_scn(), ddl_data))) { + LOG_WARN("failed to get ddl data from tablet", K(ret), K(tablet_handle)); + } else if (ddl_data.lob_meta_tablet_id_.is_valid()) { + ObTabletHandle lob_tablet_handle; + const ObTabletID lob_tablet_id = ddl_data.lob_meta_tablet_id_; + if (OB_FAIL(ls_->replay_get_tablet_no_check(lob_tablet_id, scn, lob_tablet_handle))) { + LOG_WARN("get tablet handle failed", K(ret), K(lob_tablet_id), K(scn)); + } else if (lob_tablet_handle.get_obj()->is_empty_shell()) { + need_replay = false; + LOG_INFO("lob tablet is empty, skip replay", K(ret), K(tablet_id_), K(lob_tablet_id)); } } return ret; @@ -113,35 +125,68 @@ int ObDDLStartReplayExecutor::init( return ret; } -int ObDDLStartReplayExecutor::do_replay_(ObTabletHandle &handle) +int ObDDLStartReplayExecutor::do_replay_(ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; - ObITable::TableKey table_key = log_->get_table_key(); - ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + const int64_t unused_context_id = -1; bool need_replay = true; - if (OB_FAIL(check_need_replay_ddl_log_(handle, scn_, scn_, need_replay))) { + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedoLogReplayer has not been inited", K(ret)); + } else if (OB_UNLIKELY(!log_->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K_(log)); + } else if (OB_FAIL(check_need_replay_ddl_log_(tablet_handle, scn_, scn_, need_replay))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need replay ddl log", K(ret), K(handle), K(scn_)); + LOG_WARN("fail to check need replay ddl log", K(ret), K_(tablet_id), K_(scn)); } } else if (!need_replay) { - ret = OB_NO_NEED_UPDATE; - LOG_WARN("skip replay ddl start", K(ret), "ls_id", ls_->get_ls_id(), K(handle)); - } else if (OB_FAIL(handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle, true/*try_create*/))) { - LOG_WARN("create ddl kv mgr failed", K(ret), K(handle), KPC_(log), K_(scn)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->ddl_start(*ls_, - *handle.get_obj(), - table_key, - scn_, - log_->get_data_format_version(), - log_->get_execution_id(), - SCN::min_scn()/*checkpoint_scn*/))) { - if (OB_TASK_EXPIRED != ret) { - LOG_WARN("start ddl log failed", K(ret), KPC_(log), K_(scn)); - } else { - ret = OB_SUCCESS; // ignored expired ddl start log - } + // do nothing + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("need replay but tablet handle is invalid", K(ret), K(need_replay), K(tablet_handle)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(MTL_ID())); } else { - LOG_INFO("succeed to replay ddl start log", K(ret), KPC_(log), K_(scn)); + ObTabletDirectLoadInsertParam direct_load_param; + const ObITable::TableKey &table_key = log_->get_table_key(); + direct_load_param.is_replay_ = true; + bool is_major_sstable_exist = false; + direct_load_param.common_param_.ls_id_ = tablet_handle.get_obj()->get_tablet_meta().ls_id_; + direct_load_param.common_param_.tablet_id_ = table_key.tablet_id_; + direct_load_param.common_param_.data_format_version_ = log_->get_data_format_version(); + direct_load_param.common_param_.direct_load_type_ = log_->get_direct_load_type(); + direct_load_param.common_param_.read_snapshot_ = table_key.get_snapshot_version(); + if (OB_FAIL(tenant_direct_load_mgr->create_tablet_direct_load(unused_context_id, log_->get_execution_id(), direct_load_param))) { + LOG_WARN("create tablet manager failed", K(ret)); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + ls_->get_ls_id(), + table_key.tablet_id_, + true/* is_full_direct_load */, + direct_load_mgr_handle, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_SUCCESS; + LOG_INFO("ddl start log is expired, skip", K(ret), KPC(log_), K(scn_)); + } else { + LOG_WARN("get tablet mgr failed", K(ret), K(table_key)); + } + } else if (OB_FAIL(direct_load_mgr_handle.get_full_obj()->start(*tablet_handle.get_obj(), + table_key, scn_, log_->get_data_format_version(), log_->get_execution_id(), SCN::min_scn()/*checkpoint_scn*/))) { + LOG_WARN("direct load start failed", K(ret)); + if (OB_TASK_EXPIRED != ret) { + LOG_WARN("start ddl log failed", K(ret), K_(log), K_(scn)); + } else { + ret = OB_SUCCESS; // ignored expired ddl start log + } + } else { + LOG_INFO("succeed to replay ddl start log", K(ret), KPC_(log), K_(scn)); + } } LOG_INFO("finish replay ddl start log", K(ret), K(need_replay), KPC_(log), K_(scn), "ddl_event_info", ObDDLEventInfo()); return ret; @@ -179,37 +224,48 @@ int ObDDLRedoReplayExecutor::init( return ret; } -int ObDDLRedoReplayExecutor::do_replay_(ObTabletHandle &handle) +int ObDDLRedoReplayExecutor::do_replay_(ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; - const ObDDLMacroBlockRedoInfo &redo_info = log_->get_redo_info(); bool need_replay = true; ObTabletMemberWrapper table_store_wrapper; - if (OB_FAIL(check_need_replay_ddl_log_(handle, redo_info.start_scn_, scn_, need_replay))) { + ObDDLMacroBlock macro_block; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedoLogExecutor has not been inited", K(ret)); + } else if (OB_UNLIKELY(!log_->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K_(log)); + } else if (OB_FAIL(check_need_replay_ddl_log_(tablet_handle, log_->get_redo_info().start_scn_, scn_, need_replay))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need replay ddl log", K(ret), K(handle), K_(scn)); + LOG_WARN("fail to check need replay ddl log", K(ret), K_(tablet_id), K_(scn)); } } else if (!need_replay) { - ret = OB_NO_NEED_UPDATE; - LOG_WARN("skip replay ddl redo", K(ret), "ls_id", ls_->get_ls_id(), K(handle)); - } else if (OB_UNLIKELY(!handle.is_valid())) { + // do nothing + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("need replay but tablet handle is invalid", K(ret), K(need_replay), K(handle)); - } else if (OB_FAIL(handle.get_obj()->fetch_table_store(table_store_wrapper))) { + LOG_WARN("need replay but tablet handle is invalid", K(ret), K(need_replay), K(tablet_handle)); + } else if (OB_FAIL(tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) { LOG_WARN("fail to fetch table store", K(ret)); + } else if (!table_store_wrapper.get_member()->get_major_sstables().empty()) { + // major sstable already exist, means ddl commit success + need_replay = false; + if (REACH_TIME_INTERVAL(1000L * 1000L)) { + LOG_INFO("no need to replay ddl log, because the major sstable already exist", K_(tablet_id)); + } } else { ObMacroBlockWriteInfo write_info; ObMacroBlockHandle macro_handle; + const ObDDLMacroBlockRedoInfo &redo_info = log_->get_redo_info(); write_info.buffer_ = redo_info.data_buffer_.ptr(); write_info.size_= redo_info.data_buffer_.length(); write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_WRITE); write_info.io_timeout_ms_ = max(DDL_FLUSH_MACRO_BLOCK_TIMEOUT / 1000L, GCONF._data_storage_io_timeout / 1000L); - ObDDLMacroBlock macro_block; if (OB_FAIL(ObBlockManager::async_write_block(write_info, macro_handle))) { LOG_WARN("fail to async write block", K(ret), K(write_info), K(macro_handle)); } else if (OB_FAIL(macro_handle.wait())) { - LOG_WARN("fail to wait macro block io finish", K(ret)); + LOG_WARN("fail to wait macro block io finish", K(ret), K(write_info)); } else if (OB_FAIL(macro_block.block_handle_.set_block_id(macro_handle.get_macro_id()))) { LOG_WARN("set macro block id failed", K(ret), K(macro_handle.get_macro_id())); } else { @@ -219,12 +275,45 @@ int ObDDLRedoReplayExecutor::do_replay_(ObTabletHandle &handle) macro_block.buf_ = redo_info.data_buffer_.ptr(); macro_block.size_ = redo_info.data_buffer_.length(); macro_block.ddl_start_scn_ = redo_info.start_scn_; - if (OB_FAIL(ObDDLKVPendingGuard::set_macro_block(handle.get_obj(), macro_block))) { - LOG_WARN("set macro block into ddl kv failed", K(ret), K(handle), K(macro_block)); + macro_block.table_key_ = redo_info.table_key_; + macro_block.end_row_id_ = redo_info.end_row_id_; + const int64_t snapshot_version = redo_info.table_key_.get_snapshot_version(); + const ObITable::TableKey &table_key = redo_info.table_key_; + bool is_major_sstable_exist = false; + uint64_t data_format_version = redo_info.data_format_version_; + if (data_format_version <= 0) { + // to upgrade from lower version without `data_format_version` in redo log, + // use data_format_version in start log instead. + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret)); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + ls_->get_ls_id(), + redo_info.table_key_.tablet_id_, + true/* is_full_direct_load */, + direct_load_mgr_handle, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_SUCCESS; + LOG_INFO("major sstable already exist", K(ret), K(scn_), K(table_key)); + } else { + LOG_WARN("get tablet mgr failed", K(ret), K(table_key)); + } + } else { + data_format_version = direct_load_mgr_handle.get_obj()->get_data_format_version(); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObDDLKVPendingGuard::set_macro_block(tablet_handle.get_obj(), macro_block, + snapshot_version, data_format_version))) { + LOG_WARN("set macro block into ddl kv failed", K(ret), K(tablet_handle), K(macro_block), + K(snapshot_version), K(data_format_version)); } } } - LOG_INFO("finish replay ddl redo log", K(ret), K(need_replay), KPC_(log), "ddl_event_info", ObDDLEventInfo()); + FLOG_INFO("finish replay ddl redo log", K(ret), K(need_replay), KPC_(log), K(macro_block), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -259,33 +348,60 @@ int ObDDLCommitReplayExecutor::init( return ret; } -int ObDDLCommitReplayExecutor::do_replay_(ObTabletHandle &handle) //TODO(jianyun.sjy): check it +int ObDDLCommitReplayExecutor::do_replay_(ObTabletHandle &tablet_handle) //TODO(jianyun.sjy): check it { int ret = OB_SUCCESS; - ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObITable::TableKey table_key; + ObTabletFullDirectLoadMgr *data_direct_load_mgr = nullptr; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; bool need_replay = true; + bool is_major_sstable_exist = false; - if (OB_FAIL(check_need_replay_ddl_log_(handle, log_->get_start_scn(), scn_, need_replay))) { + DEBUG_SYNC(BEFORE_REPLAY_DDL_PREPRARE); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedoLogReplayer has not been inited", K(ret)); + } else if (OB_UNLIKELY(!log_->is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K_(log)); + } else if (OB_FAIL(check_need_replay_ddl_log_(tablet_handle, log_->get_start_scn(), scn_, need_replay))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need replay ddl log", K(ret), K(handle), K_(scn), KPC_(log)); + LOG_WARN("fail to check need replay ddl log", K(ret), K(table_key), K_(scn), K_(log)); } } else if (!need_replay) { - ret = OB_NO_NEED_UPDATE; - LOG_WARN("skip replay ddl commit", K(ret), "ls_id", ls_->get_ls_id(), K(handle)); - } else if (OB_FAIL(handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - LOG_WARN("get ddl kv mgr failed", K(ret), K_(scn), KPC_(log)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->set_commit_scn(handle.get_obj()->get_tablet_meta(), scn_))) { - LOG_WARN("failed to start prepare", K(ret), KPC_(log), K_(scn)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->ddl_commit(*handle.get_obj(), log_->get_start_scn(), scn_))) { + // do nothing + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("need replay but tablet handle is invalid", K(ret), K(need_replay), K(tablet_handle), K_(log), K_(scn)); + } else if (OB_FALSE_IT(table_key = log_->get_table_key())) { + } else if (OB_ISNULL(MTL(ObTenantDirectLoadMgr *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(MTL_ID())); + } else if (OB_FAIL(MTL(ObTenantDirectLoadMgr *)->get_tablet_mgr_and_check_major( + ls_->get_ls_id(), + table_key.tablet_id_, + true/* is_full_direct_load */, + direct_load_mgr_handle, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_SUCCESS; + LOG_INFO("ddl commit log is expired, skip", K(ret), KPC(log_), K(scn_)); + } else { + LOG_WARN("get tablet mgr failed", K(ret), K(table_key)); + } + } else if (OB_ISNULL(data_direct_load_mgr = direct_load_mgr_handle.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(table_key)); + } else if (OB_FAIL(data_direct_load_mgr->commit(*tablet_handle.get_obj(), log_->get_start_scn(), scn_, false/*wait_major_generate*/))) { if (OB_TABLET_NOT_EXIST == ret || OB_TASK_EXPIRED == ret) { ret = OB_SUCCESS; // exit when tablet not exist or task expired } else { - LOG_WARN("replay ddl commit log failed", K(ret), KPC_(log), K_(scn)); + LOG_WARN("replay ddl commit log failed", K(ret), K_(log), K_(scn)); } } else { - LOG_INFO("replay ddl commit log success", K(ret), KPC_(log), K_(scn)); + LOG_INFO("replay ddl commit log success", K(ret), K_(log), K_(scn)); } - LOG_INFO("finish replay ddl commit log", K(ret), K(need_replay), K_(scn), KPC_(log), "ddl_event_info", ObDDLEventInfo()); + LOG_INFO("finish replay ddl commit log", K(ret), K(need_replay), K_(log), K_(scn), "ddl_event_info", ObDDLEventInfo()); return ret; } diff --git a/src/storage/ddl/ob_ddl_server_client.cpp b/src/storage/ddl/ob_ddl_server_client.cpp index 712a79427..a42e4c894 100644 --- a/src/storage/ddl/ob_ddl_server_client.cpp +++ b/src/storage/ddl/ob_ddl_server_client.cpp @@ -55,6 +55,7 @@ int ObDDLServerClient::create_hidden_table( const obrpc::ObCreateHiddenTableArg &arg, obrpc::ObCreateHiddenTableRes &res, int64_t &snapshot_version, + uint64_t &data_format_version, sql::ObSQLSessionInfo &session) { int ret = OB_SUCCESS; @@ -90,7 +91,7 @@ int ObDDLServerClient::create_hidden_table( LOG_WARN("failed to set register task id", K(ret), K(res)); } if (OB_SUCC(ret)) { - if (OB_FAIL(wait_task_reach_pending(arg.tenant_id_, res.task_id_, snapshot_version, *GCTX.sql_proxy_))) { + if (OB_FAIL(wait_task_reach_pending(arg.tenant_id_, res.task_id_, snapshot_version, data_format_version, *GCTX.sql_proxy_))) { LOG_WARN("failed to wait table lock. remove register task id and abort redef table task.", K(ret), K(arg), K(res)); } #ifdef ERRSIM @@ -130,6 +131,7 @@ int ObDDLServerClient::start_redef_table(const obrpc::ObStartRedefTableArg &arg, ObAddr rs_leader_addr; obrpc::ObCommonRpcProxy *common_rpc_proxy = GCTX.rs_rpc_proxy_; int64_t unused_snapshot_version = OB_INVALID_VERSION; + uint64_t unused_data_format_version = 0; if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(arg)); @@ -142,7 +144,7 @@ int ObDDLServerClient::start_redef_table(const obrpc::ObStartRedefTableArg &arg, LOG_WARN("failed to start redef table", KR(ret), K(arg)); } else if (OB_FAIL(OB_DDL_HEART_BEAT_TASK_CONTAINER.set_register_task_id(res.task_id_, res.tenant_id_))) { LOG_WARN("failed to set register task id", K(ret), K(res)); - } else if (OB_FAIL(wait_task_reach_pending(arg.orig_tenant_id_, res.task_id_, unused_snapshot_version, *GCTX.sql_proxy_))) { + } else if (OB_FAIL(wait_task_reach_pending(arg.orig_tenant_id_, res.task_id_, unused_snapshot_version, unused_data_format_version, *GCTX.sql_proxy_))) { LOG_WARN("failed to wait table lock. remove register task id and abort redef table task.", K(ret), K(arg), K(res)); int tmp_ret = OB_SUCCESS; obrpc::ObAbortRedefTableArg abort_redef_table_arg; @@ -383,11 +385,18 @@ int ObDDLServerClient::build_ddl_single_replica_response(const obrpc::ObDDLBuild return ret; } -int ObDDLServerClient::wait_task_reach_pending(const uint64_t tenant_id, const int64_t task_id, int64_t &snapshot_version, ObMySQLProxy &sql_proxy) +int ObDDLServerClient::wait_task_reach_pending( + const uint64_t tenant_id, + const int64_t task_id, + int64_t &snapshot_version, + uint64_t &data_format_version, + ObMySQLProxy &sql_proxy) { int ret = OB_SUCCESS; - const int64_t retry_interval = 100 * 1000; ObSqlString sql_string; + snapshot_version = 0; + data_format_version = 0; + const int64_t retry_interval = 100 * 1000; THIS_WORKER.set_timeout_ts(ObTimeUtility::current_time() + OB_MAX_USER_SPECIFIED_TIMEOUT); SMART_VAR(ObMySQLProxy::MySQLResult, res) { sqlclient::ObMySQLResult *result = NULL; @@ -398,16 +407,9 @@ int ObDDLServerClient::wait_task_reach_pending(const uint64_t tenant_id, const i LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else { while (OB_SUCC(ret)) { - if (OB_FAIL(sql_string.assign_fmt("SELECT status, snapshot_version FROM %s WHERE task_id = %lu", share::OB_ALL_DDL_TASK_STATUS_TNAME, task_id))) { - LOG_WARN("assign sql string failed", K(ret), K(task_id)); - } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, WAIT_REDEF_TASK_REACH_PENDING_SLOW))) { - LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); - } else if (OB_FAIL(sql_proxy.read(res, tenant_id, sql_string.ptr()))) { - LOG_WARN("fail to execute sql", K(ret), K(sql_string)); - } else if (OB_ISNULL(result = res.get_result())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("error unexpected, query result must not be NULL", K(ret)); - } else if (OB_FAIL(result->next())) { + share::ObDDLTaskStatus task_status = share::ObDDLTaskStatus::PREPARE; + if (OB_FAIL(ObDDLUtil::get_data_information(tenant_id, task_id, data_format_version, + snapshot_version, task_status))) { if (OB_LIKELY(OB_ITER_END == ret)) { ret = OB_ENTRY_NOT_EXIST; ObAddr unused_addr; @@ -423,16 +425,10 @@ int ObDDLServerClient::wait_task_reach_pending(const uint64_t tenant_id, const i } LOG_WARN("ddl task execute end", K(ret)); } else { - LOG_WARN("fail to get next row", K(ret)); - } - } else { - int task_status = 0; - EXTRACT_INT_FIELD_MYSQL(*result, "status", task_status, int); - EXTRACT_UINT_FIELD_MYSQL(*result, "snapshot_version", snapshot_version, uint64_t); - share::ObDDLTaskStatus task_cur_status = static_cast(task_status); - if (rootserver::ObTableRedefinitionTask::check_task_status_is_pending(task_cur_status)) { - break; + LOG_WARN("get information failed", K(ret), K(tenant_id), K(task_id)); } + } else if (rootserver::ObTableRedefinitionTask::check_task_status_is_pending(task_status)) { + break; } } } diff --git a/src/storage/ddl/ob_ddl_server_client.h b/src/storage/ddl/ob_ddl_server_client.h index a9f19db80..95d62589b 100644 --- a/src/storage/ddl/ob_ddl_server_client.h +++ b/src/storage/ddl/ob_ddl_server_client.h @@ -32,7 +32,12 @@ public: /** * for load data. */ - static int create_hidden_table(const obrpc::ObCreateHiddenTableArg &arg, obrpc::ObCreateHiddenTableRes &res, int64_t &snapshot_version, sql::ObSQLSessionInfo &session); + static int create_hidden_table( + const obrpc::ObCreateHiddenTableArg &arg, + obrpc::ObCreateHiddenTableRes &res, + int64_t &snapshot_version, + uint64_t &data_format_version, + sql::ObSQLSessionInfo &session); static int start_redef_table(const obrpc::ObStartRedefTableArg &arg, obrpc::ObStartRedefTableRes &res, sql::ObSQLSessionInfo &session); static int copy_table_dependents(const obrpc::ObCopyTableDependentsArg &arg, sql::ObSQLSessionInfo &session); static int finish_redef_table(const obrpc::ObFinishRedefTableArg &finish_redef_arg, @@ -46,6 +51,7 @@ private: const uint64_t tenant_id, const int64_t task_id, int64_t &snapshot_version, + uint64_t &data_format_version, ObMySQLProxy &sql_proxy); static int heart_beat_clear(const int64_t task_id); static int check_need_stop(const uint64_t tenant_id); diff --git a/src/storage/ddl/ob_ddl_struct.cpp b/src/storage/ddl/ob_ddl_struct.cpp index e09292aac..d9ae461ac 100644 --- a/src/storage/ddl/ob_ddl_struct.cpp +++ b/src/storage/ddl/ob_ddl_struct.cpp @@ -18,6 +18,7 @@ #include "storage/tablet/ob_tablet.h" #include "storage/blocksstable/ob_block_manager.h" #include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" using namespace oceanbase::storage; using namespace oceanbase::blocksstable; @@ -77,7 +78,8 @@ int ObDDLMacroHandle::reset_macro_block_ref() } ObDDLMacroBlock::ObDDLMacroBlock() - : block_handle_(), logic_id_(), block_type_(DDL_MB_INVALID_TYPE), ddl_start_scn_(SCN::min_scn()), scn_(SCN::min_scn()), buf_(nullptr), size_(0) + : block_handle_(), logic_id_(), block_type_(DDL_MB_INVALID_TYPE), ddl_start_scn_(SCN::min_scn()), + scn_(SCN::min_scn()), buf_(nullptr), size_(0), table_key_(), end_row_id_(-1) { } @@ -104,6 +106,8 @@ int ObDDLMacroBlock::deep_copy(ObDDLMacroBlock &dst_block, common::ObIAllocator dst_block.logic_id_ = logic_id_; dst_block.ddl_start_scn_ = ddl_start_scn_; dst_block.scn_ = scn_; + dst_block.table_key_ = table_key_; + dst_block.end_row_id_ = end_row_id_; } return ret; } @@ -119,8 +123,47 @@ bool ObDDLMacroBlock::is_valid() const && size_ > 0; } +bool ObDDLMacroBlock::is_column_group_info_valid() const +{ + return table_key_.is_column_store_sstable() && end_row_id_ >= 0; +} -ObDDLKVPendingGuard::ObDDLKVPendingGuard(ObTablet *tablet, const SCN &start_scn, const SCN &scn) +ObDDLKVHandle &ObDDLKVHandle::operator =(const ObDDLKVHandle &other) +{ + if (this != &other) { + reset(); + if (OB_NOT_NULL(other.ddl_kv_)) { + ddl_kv_ = other.ddl_kv_; + ddl_kv_->inc_ref(); + } + } + return *this; +} + +int ObDDLKVHandle::set_obj(ObDDLKV *ddl_kv) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ddl_kv)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(ddl_kv)); + } else { + reset(); + ddl_kv_ = ddl_kv; + ddl_kv_->inc_ref(); + } + return ret; +} + +void ObDDLKVHandle::reset() +{ + if (nullptr != ddl_kv_) { + ddl_kv_->dec_ref(); + ddl_kv_ = nullptr; + } +} + +ObDDLKVPendingGuard::ObDDLKVPendingGuard(ObTablet *tablet, const SCN &start_scn, const SCN &scn, + const int64_t snapshot_version, const uint64_t data_format_version) : tablet_(tablet), scn_(scn), kv_handle_(), ret_(OB_SUCCESS) { int ret = OB_SUCCESS; @@ -131,13 +174,15 @@ ObDDLKVPendingGuard::ObDDLKVPendingGuard(ObTablet *tablet, const SCN &start_scn, LOG_WARN("invalid arguments", K(ret), KP(tablet), K(scn)); } else if (OB_FAIL(tablet->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { LOG_WARN("get ddl kv mgr failed", K(ret)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_or_create_ddl_kv(*tablet, start_scn, scn, kv_handle_))) { + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_or_create_ddl_kv(start_scn, scn, + snapshot_version, data_format_version, kv_handle_))) { LOG_WARN("acquire ddl kv failed", K(ret)); - } else if (OB_ISNULL(curr_kv = static_cast(kv_handle_.get_table()))) { + } else if (OB_ISNULL(curr_kv = kv_handle_.get_obj())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, active ddl kv must not be nullptr", K(ret)); } else { curr_kv->inc_pending_cnt(); + can_freeze_ = ddl_kv_mgr_handle.get_obj()->can_freeze(); } if (OB_FAIL(ret)) { kv_handle_.reset(); @@ -152,7 +197,7 @@ int ObDDLKVPendingGuard::get_ddl_kv(ObDDLKV *&kv) if (OB_FAIL(ret_)) { // do nothing } else { - kv = static_cast(kv_handle_.get_table()); + kv = kv_handle_.get_obj(); } return ret; } @@ -161,15 +206,20 @@ ObDDLKVPendingGuard::~ObDDLKVPendingGuard() { int ret = OB_SUCCESS; if (OB_SUCCESS == ret_) { - ObDDLKV *curr_kv = static_cast(kv_handle_.get_table()); + ObDDLKV *curr_kv = kv_handle_.get_obj(); if (nullptr != curr_kv) { curr_kv->dec_pending_cnt(); } } kv_handle_.reset(); + can_freeze_ = false; } -int ObDDLKVPendingGuard::set_macro_block(ObTablet *tablet, const ObDDLMacroBlock ¯o_block) +int ObDDLKVPendingGuard::set_macro_block( + ObTablet *tablet, + const ObDDLMacroBlock ¯o_block, + const int64_t snapshot_version, + const uint64_t data_format_version) { int ret = OB_SUCCESS; static const int64_t MAX_RETRY_COUNT = 10; @@ -180,14 +230,15 @@ int ObDDLKVPendingGuard::set_macro_block(ObTablet *tablet, const ObDDLMacroBlock int64_t try_count = 0; while ((OB_SUCCESS == ret || OB_EAGAIN == ret) && try_count < MAX_RETRY_COUNT) { ObDDLKV *ddl_kv = nullptr; - ObDDLKVPendingGuard guard(tablet, macro_block.ddl_start_scn_, macro_block.scn_); + ObDDLKVPendingGuard guard(tablet, macro_block.ddl_start_scn_, macro_block.scn_, + snapshot_version, data_format_version); if (OB_FAIL(guard.get_ddl_kv(ddl_kv))) { LOG_WARN("get ddl kv failed", K(ret)); } else if (OB_ISNULL(ddl_kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret), KP(ddl_kv), K(guard)); - } else if (OB_FAIL(ddl_kv->set_macro_block(*tablet, macro_block))) { - LOG_WARN("fail to set macro block info", K(ret)); + } else if (OB_FAIL(ddl_kv->set_macro_block(*tablet, macro_block, snapshot_version, data_format_version, guard.can_freeze()))) { + LOG_WARN("fail to set macro block info", K(ret), K(macro_block), K(snapshot_version), K(data_format_version)); } else { break; } @@ -199,3 +250,76 @@ int ObDDLKVPendingGuard::set_macro_block(ObTablet *tablet, const ObDDLMacroBlock } return ret; } + + +ObTabletDirectLoadMgrHandle::ObTabletDirectLoadMgrHandle() + : tablet_mgr_(nullptr) +{ } + +ObTabletDirectLoadMgrHandle::~ObTabletDirectLoadMgrHandle() +{ + reset(); +} + +int ObTabletDirectLoadMgrHandle::set_obj(ObTabletDirectLoadMgr *mgr) +{ + int ret = OB_SUCCESS; + reset(); + if (OB_ISNULL(mgr)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret)); + } else { + mgr->inc_ref(); + tablet_mgr_ = mgr; + } + return ret; +} + +ObTabletDirectLoadMgr* ObTabletDirectLoadMgrHandle::get_obj() +{ + return tablet_mgr_; +} + +const ObTabletDirectLoadMgr *ObTabletDirectLoadMgrHandle::get_obj() const +{ + return tablet_mgr_; +} + +ObTabletFullDirectLoadMgr* ObTabletDirectLoadMgrHandle::get_full_obj() +{ + return static_cast(tablet_mgr_); +} + +ObTabletIncDirectLoadMgr* ObTabletDirectLoadMgrHandle::get_inc_obj() +{ + return static_cast(tablet_mgr_); +} + +bool ObTabletDirectLoadMgrHandle::is_valid() const +{ + return nullptr != tablet_mgr_; +} + +void ObTabletDirectLoadMgrHandle::reset() +{ + if (nullptr != tablet_mgr_) { + if (0 == tablet_mgr_->dec_ref()) { + tablet_mgr_->~ObTabletDirectLoadMgr(); + MTL(ObTenantDirectLoadMgr *)->get_allocator().free(tablet_mgr_); + } + tablet_mgr_ = nullptr; + } +} + +int ObTabletDirectLoadMgrHandle::assign(const ObTabletDirectLoadMgrHandle &other) +{ + int ret = OB_SUCCESS; + reset(); + if (OB_UNLIKELY(!other.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(other)); + } else if (OB_FAIL(set_obj(other.tablet_mgr_))) { + LOG_WARN("set obj failed", K(ret)); + } + return ret; +} diff --git a/src/storage/ddl/ob_ddl_struct.h b/src/storage/ddl/ob_ddl_struct.h index 70f05e601..c16c0ef11 100644 --- a/src/storage/ddl/ob_ddl_struct.h +++ b/src/storage/ddl/ob_ddl_struct.h @@ -48,7 +48,9 @@ public: const blocksstable::MacroBlockId &get_block_id() const { return block_handle_.get_block_id(); } int deep_copy(ObDDLMacroBlock &dst_block, common::ObIAllocator &allocator) const; bool is_valid() const; - TO_STRING_KV(K_(block_handle), K_(logic_id), K_(block_type), K_(ddl_start_scn), K_(scn), KP_(buf), K_(size)); + bool is_column_group_info_valid() const; + TO_STRING_KV(K_(block_handle), K_(logic_id), K_(block_type), K_(ddl_start_scn), + K_(scn), KP_(buf), K_(size), K_(table_key), K_(end_row_id)); public: ObDDLMacroHandle block_handle_; blocksstable::ObLogicMacroBlockId logic_id_; @@ -57,10 +59,26 @@ public: share::SCN scn_; const char *buf_; int64_t size_; + ObITable::TableKey table_key_; + int64_t end_row_id_; }; - class ObDDLKV; +class ObDDLKVHandle final +{ +public: + ObDDLKVHandle() : ddl_kv_(nullptr) {} + ObDDLKVHandle(const ObDDLKVHandle &other) : ddl_kv_(nullptr) { *this = other; } + ObDDLKVHandle &operator =(const ObDDLKVHandle &other); + ~ObDDLKVHandle() { reset(); } + ObDDLKV* get_obj() const { return ddl_kv_; } + bool is_valid() const { return nullptr != ddl_kv_; } + int set_obj(ObDDLKV *ddl_kv); + void reset(); + TO_STRING_KV(KP_(ddl_kv)); +private: + ObDDLKV *ddl_kv_; +}; class ObTablet; @@ -68,19 +86,83 @@ class ObTablet; class ObDDLKVPendingGuard final { public: - static int set_macro_block(ObTablet *tablet, const ObDDLMacroBlock ¯o_block); + static int set_macro_block( + ObTablet *tablet, + const ObDDLMacroBlock ¯o_block, + const int64_t snapshot_version, + const uint64_t data_format_version); public: - ObDDLKVPendingGuard(ObTablet *tablet, const share::SCN &start_scn, const share::SCN &scn); + ObDDLKVPendingGuard(ObTablet *tablet, const share::SCN &start_scn, const share::SCN &scn, + const int64_t snapshot_version, const uint64_t data_format_version); ~ObDDLKVPendingGuard(); int get_ret() const { return ret_; } int get_ddl_kv(ObDDLKV *&kv); + bool can_freeze() { return can_freeze_; } TO_STRING_KV(KP(tablet_), K(scn_), K(kv_handle_), K(ret_)); private: ObTablet *tablet_; share::SCN start_scn_; share::SCN scn_; - ObTableHandleV2 kv_handle_; + ObDDLKVHandle kv_handle_; int ret_; + bool can_freeze_; +}; + + +enum ObDirectLoadType { + DIRECT_LOAD_INVALID = 0, + DIRECT_LOAD_DDL = 1, + DIRECT_LOAD_LOAD_DATA = 2, + DIRECT_LOAD_INCREMENTAL = 3, + DIRECT_LOAD_MAX +}; + +static inline bool is_valid_direct_load(const ObDirectLoadType &type) +{ + return ObDirectLoadType::DIRECT_LOAD_INVALID < type && ObDirectLoadType::DIRECT_LOAD_MAX > type; +} + +static inline bool is_ddl_direct_load(const ObDirectLoadType &type) +{ + return ObDirectLoadType::DIRECT_LOAD_DDL == type; +} + +static inline bool is_full_direct_load(const ObDirectLoadType &type) +{ + return ObDirectLoadType::DIRECT_LOAD_DDL <= type + && ObDirectLoadType::DIRECT_LOAD_LOAD_DATA >= type; +} + +static inline bool is_data_direct_load(const ObDirectLoadType &type) +{ + return ObDirectLoadType::DIRECT_LOAD_LOAD_DATA <= type + && ObDirectLoadType::DIRECT_LOAD_INCREMENTAL >= type; +} + +static inline bool is_incremental_direct_load(const ObDirectLoadType &type) +{ + return ObDirectLoadType::DIRECT_LOAD_INCREMENTAL == type; +} + +class ObTabletDirectLoadMgr; +class ObTabletFullDirectLoadMgr; +class ObTabletIncDirectLoadMgr; +class ObTabletDirectLoadMgrHandle final +{ +public: + ObTabletDirectLoadMgrHandle(); + ~ObTabletDirectLoadMgrHandle(); + int set_obj(ObTabletDirectLoadMgr *mgr); + int assign(const ObTabletDirectLoadMgrHandle &handle); + ObTabletDirectLoadMgr *get_obj(); + const ObTabletDirectLoadMgr *get_obj() const; + ObTabletFullDirectLoadMgr *get_full_obj(); + ObTabletIncDirectLoadMgr *get_inc_obj(); + void reset(); + bool is_valid() const; + TO_STRING_KV(KP_(tablet_mgr)); +private: + ObTabletDirectLoadMgr *tablet_mgr_; }; } // end namespace storage diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp b/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp deleted file mode 100644 index d1115d65e..000000000 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp +++ /dev/null @@ -1,1608 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ - -#define USING_LOG_PREFIX STORAGE - -#include "ob_direct_insert_sstable_ctx.h" -#include "share/ob_ddl_checksum.h" -#include "share/ob_ddl_error_message_table_operator.h" -#include "share/ob_ddl_common.h" -#include "share/ob_tablet_autoincrement_service.h" -#include "share/ob_ddl_sim_point.h" -#include "storage/ddl/ob_ddl_merge_task.h" -#include "storage/blocksstable/index_block/ob_index_block_builder.h" -#include "storage/compaction/ob_column_checksum_calculator.h" -#include "storage/tx_storage/ob_ls_service.h" -#include "storage/compaction/ob_tenant_freeze_info_mgr.h" -#include "sql/engine/pdml/static/ob_px_sstable_insert_op.h" -#include "storage/lob/ob_lob_util.h" -#include "storage/ddl/ob_tablet_ddl_kv_mgr.h" -#include "sql/engine/expr/ob_expr_lob_utils.h" - -using namespace oceanbase; -using namespace oceanbase::common; -using namespace oceanbase::storage; -using namespace oceanbase::blocksstable; -using namespace oceanbase::share; -using namespace oceanbase::share::schema; -using namespace oceanbase::sql; - - -/*************** ObSSTableInsertTabletParam *****************/ -ObSSTableInsertTabletParam::ObSSTableInsertTabletParam() - : context_id_(0), ls_id_(), tablet_id_(), table_id_(0), write_major_(false), - task_cnt_(0), schema_version_(0), snapshot_version_(0), execution_id_(1), ddl_task_id_(0), - data_format_version_(0) -{ - -} - -ObSSTableInsertTabletParam::~ObSSTableInsertTabletParam() -{ - -} - -bool ObSSTableInsertTabletParam::is_valid() const -{ - bool bret = context_id_ > 0 - && ls_id_.is_valid() - && tablet_id_.is_valid() - && table_id_ > 0 - && task_cnt_ >= 0 - && schema_version_ > 0 - && execution_id_ >= 0 - && ddl_task_id_ > 0 - && data_format_version_ > 0; - return bret; -} - -ObSSTableInsertRowIterator::ObSSTableInsertRowIterator(sql::ObExecContext &exec_ctx, sql::ObPxMultiPartSSTableInsertOp *op) - : exec_ctx_(exec_ctx), op_(op), current_row_(), current_tablet_id_(), is_next_row_cached_(true) -{ - -} - -ObSSTableInsertRowIterator::~ObSSTableInsertRowIterator() -{ - -} - -void ObSSTableInsertRowIterator::reset() -{ - -} - -int ObSSTableInsertRowIterator::get_next_row(common::ObNewRow *&row) -{ - UNUSEDx(row); - return OB_NOT_SUPPORTED; -} - -int ObSSTableInsertRowIterator::get_next_row_with_tablet_id( - const uint64_t table_id, - const int64_t rowkey_count, - const int64_t snapshot_version, - common::ObNewRow *&row, - ObTabletID &tablet_id) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(nullptr == op_ || 0 >= snapshot_version) ) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("operator is null", K(ret), KP(op_), K(snapshot_version)); - } else { - if (OB_UNLIKELY(is_next_row_cached_)) { - is_next_row_cached_ = false; - } else if (OB_FAIL(op_->get_next_row_with_cache())) { - if (OB_ITER_END != ret) { - LOG_WARN("get next row from child failed", K(ret)); - } - } - if (OB_SUCC(ret)) { - op_->clear_evaluated_flag(); - if (OB_FAIL(op_->get_tablet_id_from_row(op_->get_child()->get_spec().output_, - op_->get_spec().row_desc_.get_part_id_index(), - current_tablet_id_))) { - LOG_WARN("get part id failed", K(ret)); - } else { - const ObExprPtrIArray &exprs = op_->get_spec().ins_ctdef_.new_row_; - ObEvalCtx &eval_ctx = op_->get_eval_ctx(); - int64_t extra_rowkey_cnt = storage::ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); - int64_t request_cnt = exprs.count() + extra_rowkey_cnt; - if (OB_UNLIKELY((rowkey_count > exprs.count()))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected rowkey count", K(ret), K(rowkey_count), K(exprs.count())); - } else if (current_row_.get_count() <= 0) { - ObObj *cells = static_cast(op_->get_exec_ctx().get_allocator().alloc(sizeof(ObObj) * request_cnt)); - if (OB_ISNULL(cells)) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret)); - } else { - new (cells) ObObj[request_cnt]; - current_row_.cells_ = cells; - current_row_.count_ = request_cnt; - } - } else if (OB_UNLIKELY(current_row_.get_count() < request_cnt)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected row col count", K(ret), K(current_row_.get_count()), K(request_cnt)); - } - - if (OB_SUCC(ret)) { - for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i++) { - ObDatum *datum = NULL; - const ObExpr *e = exprs.at(i); - if (OB_ISNULL(e)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("expr is NULL", K(ret), K(i)); - } else if (OB_FAIL(e->eval(eval_ctx, datum))) { - LOG_WARN("evaluate expression failed", K(ret), K(i), KPC(e)); - } else if (i < rowkey_count) { - if (OB_FAIL(datum->to_obj(current_row_.cells_[i], e->obj_meta_, e->obj_datum_map_))) { - LOG_WARN("convert datum to obj failed", K(ret), K(i), KPC(e)); - } - } else if (OB_FAIL(datum->to_obj(current_row_.cells_[i + extra_rowkey_cnt], e->obj_meta_, e->obj_datum_map_))) { - LOG_WARN("convert datum to obj failed", K(ret), K(i), KPC(e)); - } - } - // add extra rowkey - current_row_.cells_[rowkey_count].set_int(-snapshot_version); - current_row_.cells_[rowkey_count + 1].set_int(0); - } - } - } - if (OB_SUCC(ret)) { - row = ¤t_row_; - tablet_id = current_tablet_id_; - } - } - return ret; -} - -ObTabletID ObSSTableInsertRowIterator::get_current_tablet_id() const -{ - return current_tablet_id_; -} - -int ObSSTableInsertRowIterator::get_sql_mode(ObSQLMode &sql_mode) const -{ - int ret = OB_SUCCESS; - ObOperator *base_op = nullptr; - const ObSQLSessionInfo *session_info = nullptr; - if (OB_ISNULL(op_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid op", K(ret)); - } else if (OB_ISNULL(base_op = static_cast(op_))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid base operator", K(ret)); - } else if (OB_ISNULL(session_info = base_op->get_exec_ctx().get_my_session())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid session info pointer", K(ret)); - } else { - sql_mode = session_info->get_sql_mode(); - } - return ret; -} - -/*************** ObSSTableInsertSliceWriter *****************/ - -ObSSTableInsertSliceParam::ObSSTableInsertSliceParam() - : snapshot_version_(0), - write_major_(false), - sstable_index_builder_(nullptr), - task_id_(0) -{ -} - -ObSSTableInsertSliceParam::~ObSSTableInsertSliceParam() -{ -} - -bool ObSSTableInsertSliceParam::is_valid() const -{ - return tablet_id_.is_valid() && ls_id_.is_valid() && table_key_.is_valid() && - start_seq_.is_valid() && start_scn_.is_valid() && frozen_scn_.is_valid() && - nullptr != sstable_index_builder_ && 0 != task_id_; -} - -ObSSTableInsertSliceWriter::ObSSTableInsertSliceWriter() - : rowkey_column_num_(0), - is_index_table_(false), - col_descs_(nullptr), - snapshot_version_(0), - data_desc_(true/*is_ddl*/), - allocator_(lib::ObLabel("PartInsSst"), OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), - lob_allocator_(lib::ObLabel("PartInsSstLob"), OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), - lob_cnt_(0), - sql_mode_for_ddl_reshape_(0), - reshape_ptr_(nullptr), - is_inited_(false), - new_row_builder_(), - lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD) -{ -} - -ObSSTableInsertSliceWriter::~ObSSTableInsertSliceWriter() -{ - if (nullptr != reshape_ptr_) { - ObRowReshapeUtil::free_row_reshape(allocator_, reshape_ptr_, 1); - reshape_ptr_ = nullptr; - } -} - -int ObSSTableInsertSliceWriter::init(const ObSSTableInsertSliceParam &slice_param, - const ObTableSchema *table_schema, - ObDDLKvMgrHandle &ddl_kv_mgr_handle) -{ - int ret = OB_SUCCESS; - if (IS_INIT) { - ret = OB_INIT_TWICE; - LOG_WARN("ObSSTableInsertSliceWriter init twice", KR(ret), KP(this)); - } else if (OB_UNLIKELY(!slice_param.is_valid() || nullptr == table_schema)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(slice_param), KP(table_schema)); - } else { - const ObSQLMode sql_mode_for_ddl_reshape = SMO_TRADITIONAL; - if (OB_FAIL(sstable_redo_writer_.init(slice_param.ls_id_, slice_param.tablet_id_))) { - LOG_WARN("fail to init sstable redo writer", KR(ret), K(slice_param.ls_id_), - K(slice_param.tablet_id_)); - } else if (FALSE_IT(sstable_redo_writer_.set_start_scn(slice_param.start_scn_))) { - } else if (OB_FAIL(redo_log_writer_callback_.init(DDL_MB_DATA_TYPE, slice_param.table_key_, - slice_param.task_id_, &sstable_redo_writer_, ddl_kv_mgr_handle))) { - LOG_WARN("fail to init redo log writer callback", KR(ret)); - } else if (OB_FAIL(data_desc_.init(*table_schema, - slice_param.ls_id_, - slice_param.tablet_id_, // TODO(shuangcan): confirm this - slice_param.write_major_ ? MAJOR_MERGE : MINOR_MERGE, - slice_param.frozen_scn_.get_val_for_tx(), - 0/*cluster_version*/))) { - LOG_WARN("fail to init data desc", KR(ret), K_(data_desc)); - } else { - lob_inrow_threshold_ = table_schema->get_lob_inrow_threshold(); - data_desc_.get_desc().sstable_index_builder_ = slice_param.sstable_index_builder_; - if (OB_FAIL(macro_block_writer_.open(data_desc_.get_desc(), slice_param.start_seq_, - &redo_log_writer_callback_))) { - LOG_WARN("fail to open macro block writer", KR(ret), K_(data_desc), - K(slice_param.start_seq_)); - } - } - if (OB_SUCC(ret)) { - const ObColDescIArray &col_descs = data_desc_.get_desc().get_full_stored_col_descs(); - ObTableSchemaParam schema_param(allocator_); - ObRelativeTable relative_table; - // Hack to prevent row reshaping from converting empty string to null. - // - // Supposing we have a row of type varchar with some spaces and an index on this column, - // and then we convert this column to char. In this case, the DDL routine will first rebuild - // the data table and then rebuilding the index table. The row may be reshaped as follows. - // - // - without hack: ' '(varchar) => ''(char) => null(char) - // - with hack: ' '(varchar) => ''(char) => ''(char) - if (OB_FAIL(prepare_reshape(slice_param.tablet_id_, table_schema, schema_param, relative_table))) { - LOG_WARN("failed to prepare params for reshape", K(ret)); - } else if (OB_FAIL(ObRowReshapeUtil::malloc_rows_reshape_if_need( - allocator_, col_descs, 1, relative_table, sql_mode_for_ddl_reshape, - reshape_ptr_))) { - LOG_WARN("failed to malloc row reshape", KR(ret)); - } else if (OB_FAIL(datum_row_.init(allocator_, col_descs.count()))) { - LOG_WARN("fail to init datum row", KR(ret), K(col_descs)); - } else if (OB_FAIL(new_row_builder_.init(col_descs, allocator_))) { - LOG_WARN("Failed to init ObNewRowBuilder", K(ret), K(col_descs)); - } - } - if (OB_SUCC(ret)) { - tablet_id_ = slice_param.tablet_id_; - ls_id_ = slice_param.ls_id_; - rowkey_column_num_ = table_schema->get_rowkey_column_num(); - is_index_table_ = table_schema->is_index_table(); - col_descs_ = &data_desc_.get_desc().get_full_stored_col_descs(); - snapshot_version_ = slice_param.snapshot_version_; - sql_mode_for_ddl_reshape_ = sql_mode_for_ddl_reshape; - store_row_.flag_.set_flag(ObDmlFlag::DF_INSERT); - is_inited_ = true; - } - } - return ret; -} - -int ObSSTableInsertSliceWriter::append_row(ObDatumRow &datum_row) -{ - int ret = OB_SUCCESS; - common::ObNewRow *new_row = nullptr; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObSSTableInsertSliceWriter not init", KR(ret), KP(this)); - } else if (OB_UNLIKELY(!datum_row.is_valid() || - datum_row.get_column_count() != col_descs_->count())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(datum_row), K(col_descs_->count())); - } else { - if (OB_FAIL(new_row_builder_.build(datum_row, new_row))) { - LOG_WARN("Failed to build new row", KR(ret), K(datum_row)); - } else if (OB_FAIL(append_row(*new_row))) { - LOG_WARN("fail to append row", KR(ret), K(datum_row)); - } - } - return ret; -} - -int ObSSTableInsertSliceWriter::append_row(const ObNewRow &row_val) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObSSTableInsertSliceWriter not init", KR(ret), KP(this)); - } else if (OB_UNLIKELY(!row_val.is_valid() || row_val.get_count() != col_descs_->count())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(row_val), K(col_descs_->count())); - } else { - if (OB_FAIL(ObRowReshapeUtil::reshape_table_rows(&row_val, reshape_ptr_, col_descs_->count(), - &store_row_, 1, sql_mode_for_ddl_reshape_))) { - LOG_WARN("fail to reshape table rows", KR(ret)); - } else if (OB_FAIL(check_null(store_row_.row_val_))) { - LOG_WARN("fail to check null value in row", KR(ret), K(store_row_)); - } else if (OB_FAIL(datum_row_.from_store_row(store_row_))) { - LOG_WARN("fail to transfer store row ", KR(ret), K(store_row_)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < col_descs_->count(); i++) { - ObStorageDatum &datum = datum_row_.storage_datums_[i]; - if (col_descs_->at(i).col_type_.is_lob_storage() && !datum.is_nop() && !datum.is_null()) { - lob_cnt_++; - const int64_t timeout_ts = - ObTimeUtility::current_time() + ObInsertLobColumnHelper::LOB_ACCESS_TX_TIMEOUT; - bool has_lob_header = store_row_.row_val_.cells_[i].has_lob_header(); - ObLobStorageParam lob_storage_param; - lob_storage_param.inrow_threshold_ = lob_inrow_threshold_; - if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( - lob_allocator_, ls_id_, tablet_id_, col_descs_->at(i), lob_storage_param, datum, timeout_ts, has_lob_header, - MTL_ID()))) { - LOG_WARN("fail to insert_lob_col", KR(ret), K(datum)); - } - } - } - if (OB_SUCC(ret)) { - if (OB_FAIL(macro_block_writer_.append_row(datum_row_))) { - LOG_WARN("fail to appen row", KR(ret)); - } - } - if (lob_cnt_ % ObInsertLobColumnHelper::LOB_ALLOCATOR_RESET_CYCLE == 0) { - lob_allocator_.reuse(); // reuse after append_row to macro block to save memory - } - } - return ret; -} - -int ObSSTableInsertSliceWriter::prepare_reshape(const ObTabletID &tablet_id, - const ObTableSchema *table_schema, - ObTableSchemaParam &schema_param, - ObRelativeTable &relative_table) const -{ - int ret = OB_SUCCESS; - if (OB_FAIL(schema_param.convert(table_schema))) { - LOG_WARN("failed to convert schema param", K(ret)); - if (OB_SCHEMA_ERROR == ret) { - ret = OB_CANCELED; - } - } else if (OB_FAIL(relative_table.init(&schema_param, tablet_id))) { - LOG_WARN("fail to init relative_table", K(ret), K(schema_param), K(tablet_id)); - } - return ret; -} - -int ObSSTableInsertSliceWriter::check_null(const ObNewRow &row_val) const -{ - int ret = OB_SUCCESS; - if (is_index_table_) { - // index table is index-organized but can have null values in index column - } else if (OB_UNLIKELY(rowkey_column_num_ > row_val.count_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid rowkey column number", KR(ret), K_(rowkey_column_num), K(row_val)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_column_num_; i++) { - const ObObj &cell = row_val.cells_[i]; - if (cell.is_null()) { - ret = OB_ER_INVALID_USE_OF_NULL; - LOG_WARN("invalid null cell for row key column", KR(ret), K(cell)); - } - } - } - return ret; -} - -int ObSSTableInsertSliceWriter::close() -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObSSTableInsertSliceWriter not init", KR(ret), KP(this)); - } else { - if (OB_FAIL(macro_block_writer_.close())) { - LOG_WARN("fail to close macro block writer", K(ret)); - } - } - return ret; -} - -/*************** ObSSTableInsertTabletContext *****************/ - -ObSSTableInsertTabletContext::ObSSTableInsertTabletContext() - : mutex_(ObLatchIds::SSTABLE_INSERT_TABLET_CONTEXT_LOCK), allocator_(), data_sstable_redo_writer_(), - sstable_created_(false), task_finish_count_(0), index_builder_(nullptr), - task_id_(0) -{ - -} - -ObSSTableInsertTabletContext::~ObSSTableInsertTabletContext() -{ - if (OB_NOT_NULL(index_builder_)) { - index_builder_->~ObSSTableIndexBuilder(); - allocator_.free(index_builder_); - index_builder_ = nullptr; - } - ddl_kv_mgr_handle_.reset(); - allocator_.reset(); -} - -int ObSSTableInsertTabletContext::init(const ObSSTableInsertTabletParam &build_param) -{ - int ret = OB_SUCCESS; - const int64_t memory_limit = 1024L * 1024L * 1024L * 10L; // 10GB - share::ObLocationService *location_service = GCTX.location_service_; - lib::ObMutexGuard guard(mutex_); - if (OB_UNLIKELY(build_param_.is_valid())) { - ret = OB_INIT_TWICE; - LOG_WARN("build param has been inited", K(ret), K(build_param_)); - } else if (OB_UNLIKELY(!build_param.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(build_param)); - } else if (OB_FAIL(data_sstable_redo_writer_.init(build_param.ls_id_, build_param.tablet_id_))) { - LOG_WARN("fail to init sstable redo writer", K(ret), K(build_param)); - } else if (OB_FAIL(allocator_.init(OB_MALLOC_MIDDLE_BLOCK_SIZE, - lib::ObLabel("TabletInsCtx"), - OB_SERVER_TENANT_ID, - memory_limit))) { - LOG_WARN("init alloctor failed", K(ret)); - } else { - build_param_ = build_param; - } - return ret; -} - -int ObSSTableInsertTabletContext::update(const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(snapshot_version <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(snapshot_version)); - } else { - ObITable::TableKey table_key; - lib::ObMutexGuard guard(mutex_); - build_param_.snapshot_version_ = snapshot_version; - if (OB_FAIL(get_table_key(table_key))) { - LOG_WARN("get table key failed", K(ret), K(build_param_)); - } else if (OB_UNLIKELY(!table_key.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(table_key)); - } else if (data_sstable_redo_writer_.get_start_scn().is_valid_and_not_min()) { - // ddl start log is already written, do nothing - } else if (OB_FAIL(data_sstable_redo_writer_.start_ddl_redo(table_key, build_param_.ddl_task_id_, - build_param_.execution_id_, build_param_.data_format_version_, ddl_kv_mgr_handle_))) { - LOG_WARN("fail write start log", K(ret), K(table_key), K(build_param_)); - } - } - return ret; -} - -int ObSSTableInsertTabletContext::build_sstable_slice( - const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows) -{ - int ret = OB_SUCCESS; - affected_rows = 0; - const int64_t tenant_id = MTL_ID(); - const ObTabletID &tablet_id = build_param.tablet_id_; - ObSchemaGetterGuard schema_guard; - const ObTableSchema *table_schema = nullptr; - ObArenaAllocator allocator(lib::ObLabel("PartInsSstTmp"), OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id); - ObSSTableInsertSliceWriter *sstable_slice_writer = nullptr; - bool ddl_committed = false; - if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( - tenant_id, schema_guard, build_param.schema_version_))) { - LOG_WARN("get tenant schema failed", K(ret), K(build_param)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, - build_param.table_id_, table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(build_param)); - } else if (OB_ISNULL(table_schema)) { - ret = OB_TABLE_NOT_EXIST; - LOG_WARN("table not exist", K(ret), K(build_param)); - } else if (OB_FAIL(construct_sstable_slice_writer(build_param, start_seq, sstable_slice_writer, allocator))) { - LOG_WARN("fail to construct sstable slice writer", KR(ret), K(build_param), K(start_seq)); - } else if (OB_ISNULL(sstable_slice_writer)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null sstable slce writer", KR(ret)); - } else { - const int64_t rowkey_column_num = table_schema->get_rowkey_column_num(); - const int64_t snapshot_version = sstable_slice_writer->get_snapshot_version(); - ObISSTableInsertRowIterator *tablet_row_iter = reinterpret_cast(&iter); - ObNewRow *row_val = nullptr; - ObTabletID row_tablet_id; - while (OB_SUCC(ret)) { - if (OB_FAIL(THIS_WORKER.check_status())) { - LOG_WARN("check status failed", K(ret)); - } else if (OB_FAIL(tablet_row_iter->get_next_row_with_tablet_id( - build_param.table_id_, rowkey_column_num, snapshot_version, row_val, - row_tablet_id))) { - if (OB_ITER_END != ret) { - LOG_WARN("get next row failed", K(ret)); - } else { - ret = OB_SUCCESS; - break; - } - } else if (OB_FAIL(DDL_SIM(MTL_ID(), build_param_.ddl_task_id_, DDL_INSERT_SSTABLE_GET_NEXT_ROW_FAILED))) { - LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(build_param_)); - } else if (tablet_id != row_tablet_id) { - ret = OB_SUCCESS; - break; - } else if (!ddl_committed && OB_FAIL(sstable_slice_writer->append_row(*row_val))) { - int tmp_ret = OB_SUCCESS; - int report_ret_code = OB_SUCCESS; - if (OB_ERR_PRIMARY_KEY_DUPLICATE == ret && table_schema->is_unique_index()) { - LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, - "", static_cast(sizeof("UNIQUE IDX") - 1), "UNIQUE IDX"); - char index_key_buffer[OB_TMP_BUF_SIZE_256]; - ObStoreRowkey index_key; - ObDDLErrorMessageTableOperator::ObDDLErrorInfo info; - index_key.assign(row_val->cells_, rowkey_column_num); - if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::extract_index_key(*table_schema, index_key, index_key_buffer, OB_TMP_BUF_SIZE_256))) { // read the unique key that violates the unique constraint - LOG_WARN("extract unique index key failed", K(tmp_ret), K(index_key), K(index_key_buffer)); - // TODO(shuangcan): check if we need to change part_id to tablet_id - } else if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::get_index_task_info(*GCTX.sql_proxy_, *table_schema, info))) { - LOG_WARN("get task id of index table failed", K(tmp_ret), K(info), KPC(table_schema)); - } else if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::generate_index_ddl_error_message(ret, *table_schema, info.trace_id_str_, - info.task_id_, info.parent_task_id_, row_tablet_id.id(), GCTX.self_addr(), *GCTX.sql_proxy_, index_key_buffer, report_ret_code))) { - LOG_WARN("generate index ddl error message", K(tmp_ret), K(ret), K(report_ret_code)); - } - if (OB_ERR_DUPLICATED_UNIQUE_KEY == report_ret_code) { - //error message of OB_ERR_PRIMARY_KEY_DUPLICATE is not compatiable with oracle, so use a new error code - ret = OB_ERR_DUPLICATED_UNIQUE_KEY; - } - } else if (OB_TRANS_COMMITED == ret) { - ret = OB_SUCCESS; - ddl_committed = true; - } else { - LOG_WARN("macro block writer append row failed", K(ret)); - } - } - if (OB_SUCC(ret)) { - LOG_DEBUG("sstable insert op append row", KPC(row_val)); - ++affected_rows; - } - } - if (OB_SUCC(ret)) { - if (!ddl_committed && OB_FAIL(sstable_slice_writer->close())) { - if (OB_TRANS_COMMITED == ret) { - ret = OB_SUCCESS; - ddl_committed = true; - } else { - LOG_WARN("close writer failed", K(ret)); - } - } - } - } - if (OB_NOT_NULL(sstable_slice_writer)) { - sstable_slice_writer->~ObSSTableInsertSliceWriter(); - allocator.free(sstable_slice_writer); - sstable_slice_writer = nullptr; - } - return ret; -} - -int ObSSTableInsertTabletContext::construct_sstable_slice_writer( - const ObSSTableInsertTabletParam &build_param, - const ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&sstable_slice_writer, - ObIAllocator &allocator) -{ - int ret = OB_SUCCESS; - sstable_slice_writer = nullptr; - const int64_t tenant_id = MTL_ID(); - ObMySQLProxy *sql_proxy = GCTX.sql_proxy_; - ObFreezeInfoProxy freeze_info_proxy(tenant_id); - ObFreezeInfo frozen_status; - ObSchemaGetterGuard schema_guard; - const ObTableSchema *table_schema = nullptr; - ObITable::TableKey table_key; - int64_t snapshot_version = 0; - SCN snapshot_scn; - { - lib::ObMutexGuard guard(mutex_); - snapshot_version = build_param_.snapshot_version_; - } - if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( - tenant_id, schema_guard, build_param.schema_version_))) { - LOG_WARN("get tenant schema failed", K(ret), K(build_param)); - } else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), - build_param.table_id_, table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(build_param)); - } else if (OB_ISNULL(table_schema)) { - ret = OB_TABLE_NOT_EXIST; - LOG_WARN("table not exist", K(ret), K(build_param)); - } else if (OB_FAIL(prepare_index_builder_if_need(*table_schema))) { - LOG_WARN("prepare sstable index builder failed", K(ret), K(build_param)); - } else if (OB_FAIL(get_table_key(table_key))) { - LOG_WARN("get table key failed", K(ret), K(build_param_)); - } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { - LOG_WARN("fail to convert val to SCN", KR(ret), K(snapshot_version)); - } else if (OB_UNLIKELY(!table_key.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(table_key)); - } else if (OB_FAIL(freeze_info_proxy.get_frozen_info_less_than( - *GCTX.sql_proxy_, snapshot_scn, frozen_status))) { - if (OB_ENTRY_NOT_EXIST != ret) { - LOG_WARN("get freeze info failed", K(ret), K(build_param_)); - } else { - frozen_status.frozen_scn_ = SCN::base_scn(); - ret = OB_SUCCESS; - } - } - if (OB_SUCC(ret)) { - ObSSTableInsertSliceParam slice_param; - slice_param.tablet_id_ = build_param_.tablet_id_; - slice_param.ls_id_ = build_param_.ls_id_; - slice_param.table_key_ = table_key; - slice_param.start_seq_ = start_seq; - slice_param.start_scn_ = data_sstable_redo_writer_.get_start_scn(); - slice_param.snapshot_version_ = snapshot_version; - slice_param.frozen_scn_ = frozen_status.frozen_scn_; - slice_param.write_major_ = build_param.write_major_; - slice_param.sstable_index_builder_ = index_builder_; - slice_param.task_id_ = build_param_.ddl_task_id_; - if (OB_ISNULL(sstable_slice_writer = OB_NEWx(ObSSTableInsertSliceWriter, (&allocator)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to new ObSSTableInsertSliceWriter", KR(ret)); - } else if (OB_FAIL(sstable_slice_writer->init(slice_param, table_schema, ddl_kv_mgr_handle_))) { - LOG_WARN("fail to init sstable slice writer", KR(ret), K(slice_param)); - } else { - FLOG_INFO("init sstable slice writer finished", K(ret), K(slice_param)); - } - if (OB_FAIL(ret)) { - if (nullptr != sstable_slice_writer) { - sstable_slice_writer->~ObSSTableInsertSliceWriter(); - allocator.free(sstable_slice_writer); - sstable_slice_writer = nullptr; - } - } - } - return ret; -} - -int ObSSTableInsertTabletContext::prepare_index_builder_if_need(const ObTableSchema &table_schema) -{ - int ret = OB_SUCCESS; - ObWholeDataStoreDesc data_desc(true/*is_ddl*/); - lib::ObMutexGuard guard(mutex_); - if (index_builder_ != nullptr) { - LOG_INFO("index builder is already prepared"); - } else if (OB_FAIL(data_desc.init(table_schema, - build_param_.ls_id_, - build_param_.tablet_id_, // TODO(shuangcan): confirm this - build_param_.write_major_ ? compaction::MAJOR_MERGE : compaction::MINOR_MERGE, - 1L /*snapshot_version*/, - build_param_.data_format_version_))) { - LOG_WARN("failed to init data desc", K(ret)); - } else { - void *builder_buf = nullptr; - - if (OB_ISNULL(builder_buf = allocator_.alloc(sizeof(ObSSTableIndexBuilder)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory", K(ret)); - } else if (OB_ISNULL(index_builder_ = new (builder_buf) ObSSTableIndexBuilder())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to new ObSSTableIndexBuilder", K(ret)); - } else if (OB_FAIL(index_builder_->init(data_desc.get_desc(), - nullptr, // macro block flush callback - ObSSTableIndexBuilder::DISABLE))) { - LOG_WARN("failed to init index builder", K(ret), K(data_desc)); - } - - if (OB_FAIL(ret)) { - if (nullptr != index_builder_) { - index_builder_->~ObSSTableIndexBuilder(); - index_builder_ = nullptr; - } - if (nullptr != builder_buf) { - allocator_.free(builder_buf); - builder_buf = nullptr; - } - } - } - return ret; -} - -int ObSSTableInsertTabletContext::get_tablet_cache_interval(ObTabletCacheInterval &interval) -{ - int ret = OB_SUCCESS; - lib::ObMutexGuard guard(mutex_); - ObTabletAutoincrementService &autoinc_service = ObTabletAutoincrementService::get_instance(); - if (OB_FAIL(autoinc_service.get_tablet_cache_interval(MTL_ID(), - interval))) { - LOG_WARN("failed to get tablet cache intervals", K(ret)); - } else { - interval.task_id_ = task_id_; - ++task_id_; - } - return ret; -} - -int ObSSTableInsertTabletContext::inc_finish_count(bool &is_ready) -{ - int ret = OB_SUCCESS; - is_ready = false; - ATOMIC_INC(&task_finish_count_); - if (task_finish_count_ >= build_param_.task_cnt_) { - is_ready = true; - } - return ret; -} - -int ObSSTableInsertTabletContext::create_sstable() -{ - int ret = OB_SUCCESS; - lib::ObMutexGuard guard(mutex_); - ObITable::TableKey table_key; - if (!build_param_.write_major_) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("create minor sstable is not support", K(ret)); - } else if (sstable_created_) { - ret = OB_SUCCESS; - LOG_INFO("sstable has been created", K(ret), K(build_param_), K(sstable_created_)); - } else if (OB_FAIL(get_table_key(table_key))) { - LOG_WARN("get table key failed", K(ret), K(build_param_)); - } else if (OB_FAIL(create_sstable_with_clog(table_key, build_param_.table_id_))) { - LOG_WARN("create sstable with clog failed", K(ret), K(build_param_), K(table_key)); - } else { - sstable_created_ = true; - if (OB_NOT_NULL(index_builder_)) { - index_builder_->~ObSSTableIndexBuilder(); - allocator_.free(index_builder_); - index_builder_ = nullptr; - } - } - return ret; -} - -struct SliceKey final -{ -public: - SliceKey() : idx_(-1), end_key_() {} - ~SliceKey() = default; - TO_STRING_KV(K(idx_), K(end_key_)); -public: - int64_t idx_; - ObRowkey end_key_; -}; - -struct GetManageTabletIDs final -{ -public: - explicit GetManageTabletIDs() : ret_code_(OB_SUCCESS) {} - ~GetManageTabletIDs() = default; - int operator()(common::hash::HashMapPair &entry) - { - int ret = ret_code_; // for LOG_WARN - if (OB_LIKELY(OB_SUCCESS == ret_code_) && OB_SUCCESS != (ret_code_ = tablet_ids_.push_back(entry.first))) { - ret = ret_code_; - LOG_WARN("push back tablet id failed", K(ret_code_), K(entry.first)); - } - return ret_code_; - } - TO_STRING_KV(K(tablet_ids_), K(ret_code_)); -public: - ObArray tablet_ids_; - int ret_code_; -}; - -int ObSSTableInsertTabletContext::create_sstable_with_clog( - const ObITable::TableKey &table_key, - const int64_t table_id) -{ - int ret = OB_SUCCESS; - // write clog and create sstable - const int64_t max_kept_major_version_number = 1; - share::schema::ObMultiVersionSchemaService *schema_service = nullptr; - const share::schema::ObTableSchema *table_schema = nullptr; - const uint64_t tenant_id = MTL_ID(); - ObSchemaGetterGuard schema_guard; - if (OB_ISNULL(schema_service = GCTX.schema_service_)) { - ret = OB_ERR_SYS; - LOG_WARN("schema service is null", K(ret), KP(schema_service)); - } else if (OB_FAIL(schema_service->get_tenant_schema_guard(tenant_id, schema_guard))) { - LOG_WARN("get schema_guard failed", K(ret), K(table_key)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(table_key)); - } else if (OB_ISNULL(table_schema)) { - ret = OB_TABLE_NOT_EXIST; - LOG_WARN("table schema is null", K(ret), K(table_key), KP(table_schema)); - } else { - DEBUG_SYNC(AFTER_REMOTE_WRITE_DDL_PREPARE_LOG); - if (OB_FAIL(data_sstable_redo_writer_.end_ddl_redo_and_create_ddl_sstable( - build_param_.ls_id_, table_key, table_id, build_param_.execution_id_, build_param_.ddl_task_id_))) { - LOG_WARN("fail create ddl sstable", K(ret), K(table_key)); - } - } - return ret; -} - -int ObSSTableInsertTabletContext::get_table_key(ObITable::TableKey &table_key) -{ - int ret = OB_SUCCESS; - table_key.reset(); - table_key.table_type_ = ObITable::TableType::MAJOR_SSTABLE; - table_key.tablet_id_ = build_param_.tablet_id_; - table_key.version_range_.snapshot_version_ = build_param_.snapshot_version_; - return ret; -} - -ObSSTableInsertTableParam::ObSSTableInsertTableParam() - : exec_ctx_(nullptr), context_id_(0), dest_table_id_(OB_INVALID_ID), write_major_(false), schema_version_(0), - snapshot_version_(0), task_cnt_(0), execution_id_(1), ddl_task_id_(1), data_format_version_(0), ls_tablet_ids_() -{ -} - -int ObSSTableInsertTableParam::assign(const ObSSTableInsertTableParam &other) -{ - int ret = OB_SUCCESS; - if (OB_FAIL(ls_tablet_ids_.assign(other.ls_tablet_ids_))) { - LOG_WARN("assign tablet_ids failed", K(ret)); - } else { - context_id_ = other.context_id_; - dest_table_id_ = other.dest_table_id_; - write_major_ = other.write_major_; - schema_version_ = other.schema_version_; - snapshot_version_ = other.snapshot_version_; - task_cnt_ = other.task_cnt_; - execution_id_ = other.execution_id_; - ddl_task_id_ = other.ddl_task_id_; - data_format_version_ = other.data_format_version_; - exec_ctx_ = other.exec_ctx_; - } - return ret; -} - -int ObSSTableInsertTableParam::fast_check_status() -{ - int ret = common::OB_SUCCESS; - if (exec_ctx_ != nullptr) { - ret = exec_ctx_->fast_check_status(); - } - return ret; -} - -ObSSTableInsertTableContext::ObSSTableInsertTableContext() - : is_inited_(false), lock_(ObLatchIds::SSTABLE_INSERT_TABLE_CONTEXT_LOCK), param_(), allocator_(), tablet_ctx_map_(), finishing_idx_(0) -{ -} - -ObSSTableInsertTableContext::~ObSSTableInsertTableContext() -{ - remove_all_tablets_context(); // ignore error code. - tablet_ctx_map_.destroy(); -} - -int ObSSTableInsertTableContext::init( - const ObSSTableInsertTableParam ¶m) -{ - int ret = OB_SUCCESS; - const int64_t memory_limit = 1024L * 1024L * 1024L * 10L; // 10GB - if (OB_UNLIKELY(is_inited_)) { - ret = OB_INIT_TWICE; - LOG_WARN("ObTableInsertSSTableContext has been inited twice", K(ret)); - } else if (OB_UNLIKELY(!param.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(param)); - } else if (OB_FAIL(allocator_.init(OB_MALLOC_MIDDLE_BLOCK_SIZE, - lib::ObLabel("TablInsCtx"), - OB_SERVER_TENANT_ID, - memory_limit))) { - LOG_WARN("init alloctor failed", K(ret)); - } else if (OB_FAIL(param_.assign(param))) { - LOG_WARN("assign table insert param failed", K(ret)); - } else if (OB_FAIL(create_all_tablet_contexts(param.ls_tablet_ids_))) { - LOG_WARN("create all tablet contexts failed", K(ret)); - } else { - is_inited_ = true; - } - return ret; -} - -int ObSSTableInsertTableContext::create_all_tablet_contexts( - const common::ObIArray &ls_tablet_ids) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(ls_tablet_ids.count() <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(ls_tablet_ids.count())); - } else if (OB_FAIL(tablet_ctx_map_.create(ls_tablet_ids.count(), lib::ObLabel("TabInsCtx")))) { - LOG_WARN("create tablet ctx map failed", K(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < ls_tablet_ids.count(); ++i) { - const ObTabletID &tablet_id = ls_tablet_ids.at(i).second; - void *buf = nullptr; - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObSSTableInsertTabletContext)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory failed", K(ret)); - } else { - tablet_ctx = new (buf) ObSSTableInsertTabletContext(); - ObSSTableInsertTabletParam param; - param.context_id_ = param_.context_id_; - param.ls_id_ = ls_tablet_ids.at(i).first; - param.tablet_id_ = tablet_id; - param.schema_version_ = param_.schema_version_; - param.snapshot_version_ = param_.snapshot_version_; - param.table_id_ = param_.dest_table_id_; - param.write_major_ = param_.write_major_; - param.task_cnt_ = param_.task_cnt_; - param.execution_id_ = param_.execution_id_; - param.ddl_task_id_ = param_.ddl_task_id_; - param.data_format_version_ = param_.data_format_version_; - if (OB_FAIL(tablet_ctx->init(param))) { - LOG_WARN("init tablet insert sstable context", K(ret)); - } else if (OB_FAIL(tablet_ctx_map_.set_refactored(tablet_id, tablet_ctx))) { - LOG_WARN("set tablet ctx map failed", K(ret)); - } - } - if (OB_FAIL(ret)) { - if (nullptr != tablet_ctx) { - tablet_ctx->~ObSSTableInsertTabletContext(); - tablet_ctx = nullptr; - } - if (nullptr != buf) { - allocator_.free(buf); - buf = nullptr; - } - } - } - } - return ret; -} - -int ObSSTableInsertTableContext::update_context(const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_UNLIKELY(snapshot_version <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(snapshot_version)); - } else { - for (TABLET_CTX_MAP::iterator iter = tablet_ctx_map_.begin(); OB_SUCC(ret) && iter != tablet_ctx_map_.end(); ++iter) { - ObSSTableInsertTabletContext *tablet_ctx = iter->second; - if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_SYS; - LOG_WARN("error sys, part ctx must not be nullptr", K(ret)); - } else if (OB_FAIL(tablet_ctx->update(snapshot_version))) { - LOG_WARN("update tablet context failed", K(ret)); - } - } - } - return ret; -} - -int ObSSTableInsertTableContext::update_tablet_context( - const ObTabletID &tablet_id, - const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_UNLIKELY(!tablet_id.is_valid() || snapshot_version <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(tablet_id), K(snapshot_version)); - } else { - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (OB_FAIL(get_tablet_context(tablet_id, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret), K(tablet_id)); - } else if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("error unexpected, tablet ctx must not be nullptr", K(ret)); - } else if (OB_FAIL(tablet_ctx->update(snapshot_version))) { - LOG_WARN("update tablet context failed", K(ret)); - } - } - return ret; -} - -int ObSSTableInsertTableContext::add_sstable_slice( - const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_FAIL(get_tablet_context(build_param.tablet_id_, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret), "tablet_id", build_param.tablet_id_); - } else if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("error unexpected, tablet ctx must not be nullptr", K(ret)); - } else if (OB_FAIL(tablet_ctx->build_sstable_slice(build_param, start_seq, iter, affected_rows))) { - LOG_WARN("build sstable slice failed", K(ret)); - } - return ret; -} - -int ObSSTableInsertTableContext::construct_sstable_slice_writer( - const ObSSTableInsertTabletParam &build_param, - const ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&sstable_slice_writer, - ObIAllocator &allocator) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_FAIL(get_tablet_context(build_param.tablet_id_, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret), "tablet_id", build_param.tablet_id_); - } else if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("error unexpected, tablet ctx must not be nullptr", K(ret)); - } else if (OB_FAIL(tablet_ctx->construct_sstable_slice_writer(build_param, start_seq, sstable_slice_writer, allocator))) { - LOG_WARN("construct sstable slice writer failed", K(ret)); - } - return ret; -} - -int ObSSTableInsertTableContext::get_tablet_context( - const ObTabletID &tablet_id, - ObSSTableInsertTabletContext *&context) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_UNLIKELY(!tablet_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(tablet_id)); - } else if (OB_FAIL(tablet_ctx_map_.get_refactored(tablet_id, context))) { - if (OB_HASH_NOT_EXIST == ret) { - ret = OB_ENTRY_NOT_EXIST; - } - } - return ret; -} - -int ObSSTableInsertTableContext::remove_all_tablets_context() -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else { - GetManageTabletIDs get_tablet_ids_fn; - if (OB_FAIL(tablet_ctx_map_.foreach_refactored(get_tablet_ids_fn))) { - LOG_WARN("get tablet ids failed", K(ret)); - } else if (OB_FAIL(get_tablet_ids_fn.ret_code_)) { - LOG_WARN("get tablet ids failed", K(ret)); - } - for (int64_t i = 0; i < get_tablet_ids_fn.tablet_ids_.count(); ++i) { // ignore error code. - ObSSTableInsertTabletContext *tablet_context = nullptr; - const ObTabletID &tablet_id = get_tablet_ids_fn.tablet_ids_.at(i); - if (OB_FAIL(tablet_ctx_map_.erase_refactored(tablet_id, &tablet_context))) { - LOG_WARN("erase failed", K(ret), K(tablet_id)); - } else { - tablet_context->~ObSSTableInsertTabletContext(); - allocator_.free(tablet_context); - tablet_context = nullptr; - } - } - } - return ret; -} - -int ObSSTableInsertTableContext::finish(const bool need_commit) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else { - GetManageTabletIDs get_tablet_ids_fn; - if (OB_FAIL(tablet_ctx_map_.foreach_refactored(get_tablet_ids_fn))) { - LOG_WARN("get tablet ids failed", K(ret)); - } else if (OB_FAIL(get_tablet_ids_fn.ret_code_)) { - LOG_WARN("get tablet ids failed", K(ret)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < get_tablet_ids_fn.tablet_ids_.count(); ++i) { - const ObTabletID &tablet_id = get_tablet_ids_fn.tablet_ids_.at(i); - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (OB_FAIL(get_tablet_context(tablet_id, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret)); - } else if (need_commit && OB_FAIL(tablet_ctx->create_sstable())) { - LOG_WARN("create sstable failed", K(ret)); - } - } - remove_all_tablets_context(); // ignore error code. - } - return ret; -} - -int ObSSTableInsertTableContext::get_tablet_ids(common::ObIArray &tablet_ids) -{ - int ret = OB_SUCCESS; - tablet_ids.reset(); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else { - GetManageTabletIDs get_tablet_ids_fn; - if (OB_FAIL(tablet_ctx_map_.foreach_refactored(get_tablet_ids_fn))) { - LOG_WARN("get tablet ids failed", K(ret)); - } else if (OB_FAIL(get_tablet_ids_fn.ret_code_)) { - LOG_WARN("get tablet ids failed", K(ret)); - } - for (int64_t i = 0; OB_SUCC(ret) && i < get_tablet_ids_fn.tablet_ids_.count(); ++i) { - const ObTabletID &tablet_id = get_tablet_ids_fn.tablet_ids_.at(i); - if (OB_FAIL(tablet_ids.push_back(tablet_id))) { - LOG_WARN("push back tablet id failed", K(ret), K(tablet_id)); - } - } - } - return ret; -} - -int ObSSTableInsertTableContext::notify_tablet_end(const ObTabletID &tablet_id) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - bool is_ready = false; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_UNLIKELY(!tablet_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(tablet_id)); - } else if (OB_FAIL(get_tablet_context(tablet_id, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret), K(tablet_id)); - } else if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet context is null", K(ret), K(tablet_id),KP(tablet_ctx)); - } else if (OB_FAIL(tablet_ctx->inc_finish_count(is_ready))) { - LOG_WARN("increase finish count failed", K(ret), K(tablet_id)); - } else if (is_ready) { - ObSpinLockGuard guard(lock_); - if (OB_FAIL(ready_tablets_.push_back(tablet_id))) { - LOG_WARN("push back tablet id failed", K(ret), K(tablet_id)); - } - } - return ret; -} - -int ObSSTableInsertTableContext::finish_ready_tablets(const int64_t target_count) -{ - int ret = OB_SUCCESS; - int64_t cur_finishing_idx = 0; - int64_t next_finishing_idx = 0; - int64_t old_finishing_idx = 0; - while (OB_SUCC(ret) && OB_SUCC(param_.fast_check_status()) && ready_tablets_.count() < target_count) { - ob_usleep(1000); - if (TC_REACH_TIME_INTERVAL(1000L * 1000L * 1L)) { - LOG_INFO("wait ready tablets reach target count", K(ready_tablets_.count()), K(target_count)); - } - } - while (OB_SUCC(ret) && OB_SUCC(param_.fast_check_status())) { - old_finishing_idx = cur_finishing_idx = ATOMIC_LOAD(&finishing_idx_); - while ((next_finishing_idx = cur_finishing_idx + 1) <= target_count && - old_finishing_idx != (cur_finishing_idx = ATOMIC_CAS(&finishing_idx_, old_finishing_idx, - next_finishing_idx))) { - old_finishing_idx = cur_finishing_idx; - PAUSE(); - } - if (next_finishing_idx > target_count) { - break; - } - ObTabletID tablet_id = ready_tablets_.at(cur_finishing_idx); - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (OB_FAIL(get_tablet_context(tablet_id, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret), K(tablet_id)); - } else if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet context is null", K(ret), K(tablet_id), KP(tablet_ctx)); - } else if (OB_FAIL(tablet_ctx->create_sstable())) { - LOG_WARN("create sstable failed", K(ret), K(tablet_id)); - } else { - LOG_INFO("finish ready tablet", K(ret), K(cur_finishing_idx), K(tablet_id), K(ready_tablets_.count())); - } - } - return ret; -} - -int ObSSTableInsertTableContext::get_tablet_cache_interval(const ObTabletID &tablet_id, - ObTabletCacheInterval &interval) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTabletContext *tablet_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableInsertSSTableContext has not been inited", K(ret)); - } else if (OB_FAIL(get_tablet_context(tablet_id, tablet_ctx))) { - LOG_WARN("get tablet context failed", K(ret), "tablet_id", tablet_id); - } else if (OB_ISNULL(tablet_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("error unexpected, tablet ctx must not be nullptr", K(ret)); - } else if (OB_FAIL(tablet_ctx->get_tablet_cache_interval(interval))) { - LOG_WARN("add sstable slice failed", K(ret)); - } - return ret; -} - - -/*************** ObSSTableInsertManager *****************/ -ObSSTableInsertManager::ObSSTableInsertManager() - : is_inited_(false), mutex_(ObLatchIds::SSTABLE_INSERT_TABLE_MANAGER_LOCK), context_id_generator_(0) -{ - -} - -ObSSTableInsertManager::~ObSSTableInsertManager() -{ - destroy(); -} - -ObSSTableInsertManager &ObSSTableInsertManager::get_instance() -{ - static ObSSTableInsertManager instance; - return instance; -} - -int ObSSTableInsertManager::init() -{ - int ret = OB_SUCCESS; - const int64_t bucket_num = 1000L * 100L; // 10w - const int64_t memory_limit = 1024L * 1024L * 1024L * 10L; // 10GB - lib::ObMemAttr attr(OB_SERVER_TENANT_ID, "DInsSstMgr"); - SET_USE_500(attr); - if (OB_UNLIKELY(is_inited_)) { - ret = OB_INIT_TWICE; - LOG_WARN("init twice", K(ret)); - } else if (OB_FAIL(allocator_.init(OB_MALLOC_MIDDLE_BLOCK_SIZE, - attr.label_, - OB_SERVER_TENANT_ID, - memory_limit))) { - LOG_WARN("init alloctor failed", K(ret)); - } else if (OB_FAIL(bucket_lock_.init(bucket_num))) { - LOG_WARN("init bucket lock failed", K(ret), K(bucket_num)); - } else if (OB_FAIL(table_ctx_map_.create(bucket_num, attr, attr))) { - LOG_WARN("create context map failed", K(ret)); - } else { - allocator_.set_attr(attr); - context_id_generator_ = ObTimeUtility::current_time(); - is_inited_ = true; - } - return ret; -} - -int64_t ObSSTableInsertManager::alloc_context_id() -{ - return ATOMIC_AAF(&context_id_generator_, 1); -} - -int ObSSTableInsertManager::create_table_context( - const ObSSTableInsertTableParam ¶m, - int64_t &context_id) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_context = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(!param.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(param)); - } else { - void *buf = nullptr; - if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObSSTableInsertTableContext)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("alloc memory for insert sstable context failed", K(ret), K(param)); - } else { - context_id = alloc_context_id(); - table_context = new (buf) ObSSTableInsertTableContext(); - const_cast(param).context_id_ = context_id; - ObBucketHashWLockGuard guard(bucket_lock_, get_context_id_hash(context_id)); - if (OB_FAIL(table_context->init(param))) { - LOG_WARN("set build param faild", K(ret), K(param)); - } else if (OB_FAIL(table_ctx_map_.set_refactored(context_id, table_context))) { - LOG_WARN("set into hash map failed", K(ret), K(param), KP(table_context)); - } - } - if (OB_FAIL(ret) && nullptr != table_context) { - table_context->~ObSSTableInsertTableContext(); - allocator_.free(table_context); - table_context = nullptr; - } - } - return ret; -} - -int ObSSTableInsertManager::update_table_context( - const int64_t context_id, - const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_UNLIKELY(context_id <= 0 || snapshot_version <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(context_id), K(snapshot_version)); - } else { - ObSSTableInsertTableContext *table_context = nullptr; - ObBucketHashRLockGuard guard(bucket_lock_, get_context_id_hash(context_id)); - if (OB_FAIL(get_context_no_lock(context_id, table_context))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_FAIL(table_context->update_context(snapshot_version))) { - LOG_WARN("update context failed", K(ret)); - } - } - return ret; -} - -int ObSSTableInsertManager::update_table_tablet_context( - const int64_t context_id, - const ObTabletID &tablet_id, - const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_UNLIKELY(context_id <= 0 || !tablet_id.is_valid() || snapshot_version <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(context_id), K(tablet_id), K(snapshot_version)); - } else { - ObSSTableInsertTableContext *table_context = nullptr; - if (OB_FAIL(get_context(context_id, table_context))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_FAIL(table_context->update_tablet_context(tablet_id, snapshot_version))) { - LOG_WARN("update tablet context failed", K(ret)); - } - } - return ret; -} - -uint64_t ObSSTableInsertManager::get_context_id_hash(const int64_t context_id) -{ - return common::murmurhash(&context_id, sizeof(context_id), 0L); -} - -int ObSSTableInsertManager::finish_table_context(const int64_t context_id, const bool need_commit) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_context = nullptr; - ObBucketHashWLockGuard guard(bucket_lock_, get_context_id_hash(context_id)); - if (OB_FAIL(get_context_no_lock(context_id, table_context))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_FAIL(table_context->finish(need_commit))) { - LOG_WARN("finish table context failed", K(ret)); - } - if (nullptr != table_context) { // ignore ret - int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(remove_context_no_lock(context_id))) { - LOG_ERROR("erase factored failed", K(ret), K(tmp_ret), K(context_id)); - } - } - return ret; -} - -int ObSSTableInsertManager::add_sstable_slice( - const ObSSTableInsertTabletParam ¶m, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_FAIL(get_context(param.context_id_, table_ctx))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_FAIL(table_ctx->add_sstable_slice(param, start_seq, iter, affected_rows))) { - LOG_WARN("add sstable slice failed", K(ret)); - } - return ret; -} - -int ObSSTableInsertManager::construct_sstable_slice_writer( - const ObSSTableInsertTabletParam ¶m, - const ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&sstable_slice_writer, - ObIAllocator &allocator) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_ctx = nullptr; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_FAIL(get_context(param.context_id_, table_ctx))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_FAIL(table_ctx->construct_sstable_slice_writer(param, start_seq, sstable_slice_writer, allocator))) { - LOG_WARN("construct sstable slice writer failed", K(ret)); - } - return ret; -} - -int ObSSTableInsertManager::notify_tablet_end(const int64_t context_id, const ObTabletID &tablet_id) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_UNLIKELY(context_id < 0 || !tablet_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(context_id), K(tablet_id)); - } else if (OB_FAIL(get_context(context_id, table_ctx))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_ISNULL(table_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("table context is null", K(ret), KP(table_ctx)); - } else if (OB_FAIL(table_ctx->notify_tablet_end(tablet_id))) { - LOG_WARN("notify tablet failed", K(ret), K(tablet_id)); - } - return ret; -} - -int ObSSTableInsertManager::finish_ready_tablets(const int64_t context_id, const int64_t target_count) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_UNLIKELY(context_id < 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(context_id)); - } else if (OB_FAIL(get_context(context_id, table_ctx))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_ISNULL(table_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("table context is null", K(ret), KP(table_ctx)); - } else if (OB_FAIL(table_ctx->finish_ready_tablets(target_count))) { - LOG_WARN("finsh ready tablets failed failed", K(ret), K(target_count)); - } - return ret; -} - -int ObSSTableInsertManager::get_tablet_ids(const int64_t context_id, common::ObIArray &tablet_ids) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_FAIL(get_context(context_id, table_ctx))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_ISNULL(table_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("table context is null", K(ret), KP(table_ctx)); - } else if (OB_FAIL(table_ctx->get_tablet_ids(tablet_ids))) { - LOG_WARN("get tablet ids failed", K(ret)); - } - return ret; -} - -int ObSSTableInsertManager::get_tablet_cache_interval(const int64_t context_id, - const ObTabletID &tablet_id, - ObTabletCacheInterval &interval) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_ctx = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectInsertSSTableManager has not been inited", K(ret)); - } else if (OB_FAIL(get_context(context_id, table_ctx))) { - LOG_WARN("get context failed", K(ret)); - } else if (OB_ISNULL(table_ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("table context is null", K(ret), KP(table_ctx)); - } else if (OB_FAIL(table_ctx->get_tablet_cache_interval(tablet_id, interval))) { - LOG_WARN("get tablet cache interval failed", K(ret)); - } - return ret; -} - -int ObSSTableInsertManager::get_context( - const int64_t context_id, - ObSSTableInsertTableContext *&ctx) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(context_id <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(context_id)); - } else { - ObBucketHashRLockGuard guard(bucket_lock_, get_context_id_hash(context_id)); - if (OB_FAIL(get_context_no_lock(context_id, ctx))) { - LOG_WARN("get context without lock failed", K(ret), K(context_id)); - } - } - return ret; -} - -int ObSSTableInsertManager::get_context_no_lock( - const int64_t context_id, - ObSSTableInsertTableContext *&ctx) -{ - int ret = OB_SUCCESS; - ctx = nullptr; - if (OB_FAIL(table_ctx_map_.get_refactored(context_id, ctx))) { - if (OB_HASH_NOT_EXIST != ret) { - LOG_WARN("get sstable insert context failed", K(ret), K(context_id)); - } else { - ret = OB_ENTRY_NOT_EXIST; - } - } else if (OB_ISNULL(ctx)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ctx is null", K(ret), K(context_id), KP(ctx)); - } - return ret; -} - -int ObSSTableInsertManager::remove_context_no_lock(const int64_t context_id) -{ - int ret = OB_SUCCESS; - ObSSTableInsertTableContext *table_context = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(context_id <= 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(context_id)); - } else if (OB_FAIL(table_ctx_map_.erase_refactored(context_id, &table_context))) { - LOG_WARN("erase table context failed", K(ret), K(context_id)); - } else { - table_context->~ObSSTableInsertTableContext(); - allocator_.free(table_context); - table_context = nullptr; - } - return ret; -} - -void ObSSTableInsertManager::destroy() -{ - int ret = OB_SUCCESS; - ObArray context_id_arr; - common::ObBucketWLockAllGuard lock_guard(bucket_lock_); - for(TABLE_CTX_MAP::iterator iter = table_ctx_map_.begin(); iter != table_ctx_map_.end(); ++iter) { // ignore error code. - if (OB_FAIL(context_id_arr.push_back(iter->first))) { - LOG_ERROR("push back failed", K(ret)); - } - } - for (int64_t i = 0; i < context_id_arr.count(); i++) { // ignore error code. - const int64_t context_id = context_id_arr.at(i); - if (OB_FAIL(remove_context_no_lock(context_id))) { - LOG_ERROR("remove context failed", K(ret), K(context_id)); - } - } - table_ctx_map_.destroy(); - allocator_.destroy(); -} diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx.h b/src/storage/ddl/ob_direct_insert_sstable_ctx.h deleted file mode 100644 index d0c4cb630..000000000 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx.h +++ /dev/null @@ -1,352 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ - -#ifndef OCEANBASE_STORAGE_OB_DIRECT_INSERT_SSTABLE_CTX_H -#define OCEANBASE_STORAGE_OB_DIRECT_INSERT_SSTABLE_CTX_H - -#include "storage/meta_mem/ob_tablet_handle.h" -#include "lib/lock/ob_mutex.h" -#include "lib/hash/ob_hashmap.h" -#include "lib/lock/ob_bucket_lock.h" -#include "common/ob_tablet_id.h" -#include "common/row/ob_row_iterator.h" -#include "storage/ob_i_table.h" -#include "storage/ob_row_reshape.h" -#include "storage/blocksstable/ob_macro_block_struct.h" -#include "storage/ddl/ob_ddl_redo_log_writer.h" -#include "storage/tx_storage/ob_ls_map.h" - -namespace oceanbase -{ -namespace sql -{ -class ObPxMultiPartSSTableInsertOp; -class ObExecContext; -} - -namespace blocksstable -{ -class ObSSTableMergeRes; -} - -namespace share -{ -struct ObTabletCacheInterval; -} - -namespace storage -{ -class ObDDLRedoLogWriterCallback; -class ObTablet; - -struct ObSSTableInsertTabletParam final -{ -public: - ObSSTableInsertTabletParam(); - ~ObSSTableInsertTabletParam(); - bool is_valid() const; - TO_STRING_KV(K(context_id_), K(ls_id_), K(tablet_id_), K(table_id_), K(write_major_), - K(task_cnt_), K(schema_version_), K(snapshot_version_), K_(execution_id), K_(ddl_task_id), - K_(data_format_version)); -public: - int64_t context_id_; - share::ObLSID ls_id_; - common::ObTabletID tablet_id_; - uint64_t table_id_; - bool write_major_; - int64_t task_cnt_; - int64_t schema_version_; - int64_t snapshot_version_; - int64_t execution_id_; - int64_t ddl_task_id_; - int64_t data_format_version_; -}; - -typedef std::pair LSTabletIDPair; - -class ObISSTableInsertRowIterator : public common::ObNewRowIterator -{ -public: - ObISSTableInsertRowIterator() {} - virtual ~ObISSTableInsertRowIterator() {} - virtual int get_next_row_with_tablet_id( - const uint64_t table_id, - const int64_t rowkey_count, - const int64_t snapshot_version, - common::ObNewRow *&row, - common::ObTabletID &tablet_id) = 0; -}; - -class ObSSTableInsertRowIterator : public ObISSTableInsertRowIterator -{ -public: - ObSSTableInsertRowIterator(sql::ObExecContext &exec_ctx, sql::ObPxMultiPartSSTableInsertOp *op); - virtual ~ObSSTableInsertRowIterator(); - virtual void reset() override; - virtual int get_next_row(common::ObNewRow *&row) override; - int get_sql_mode(ObSQLMode &sql_mode) const; - int get_next_row_with_tablet_id( - const uint64_t table_id, - const int64_t rowkey_count, - const int64_t snapshot_version, - common::ObNewRow *&row, - common::ObTabletID &tablet_id) override; - common::ObTabletID get_current_tablet_id() const; -private: - sql::ObExecContext &exec_ctx_; - sql::ObPxMultiPartSSTableInsertOp *op_; - common::ObNewRow current_row_; - common::ObTabletID current_tablet_id_; - bool is_next_row_cached_; -}; - -struct ObSSTableInsertSliceParam final -{ -public: - ObSSTableInsertSliceParam(); - ~ObSSTableInsertSliceParam(); - bool is_valid() const; - TO_STRING_KV(K_(tablet_id), K_(ls_id), K_(table_key), K_(start_seq), K_(start_scn), - K_(snapshot_version), K_(task_id), K_(frozen_scn), K_(write_major), KP_(sstable_index_builder), K_(task_id)); -public: - common::ObTabletID tablet_id_; - share::ObLSID ls_id_; - ObITable::TableKey table_key_; - blocksstable::ObMacroDataSeq start_seq_; - share::SCN start_scn_; - int64_t snapshot_version_; - share::SCN frozen_scn_; - bool write_major_; - blocksstable::ObSSTableIndexBuilder *sstable_index_builder_; - int64_t task_id_; -}; - -class ObSSTableInsertSliceWriter final -{ -public: - ObSSTableInsertSliceWriter(); - ~ObSSTableInsertSliceWriter(); - int init(const ObSSTableInsertSliceParam &slice_param, - const share::schema::ObTableSchema *table_schema, - ObDDLKvMgrHandle &ddl_kv_mgr_handle); - int append_row(blocksstable::ObDatumRow &datum_row); - int append_row(const common::ObNewRow &row_val); - int close(); - OB_INLINE int64_t get_snapshot_version() const { return snapshot_version_; } - TO_STRING_KV(K_(tablet_id), K_(ls_id), K_(rowkey_column_num), K_(is_index_table), KP_(col_descs), - K_(snapshot_version), K_(data_desc), K_(lob_cnt), K_(sql_mode_for_ddl_reshape), - KP_(reshape_ptr), K_(lob_inrow_threshold)); -private: - int prepare_reshape( - const common::ObTabletID &tablet_id, - const share::schema::ObTableSchema *table_schema, - share::schema::ObTableSchemaParam &schema_param, - ObRelativeTable &relative_table) const; - int check_null(const common::ObNewRow &row_val) const; -private: - common::ObTabletID tablet_id_; - share::ObLSID ls_id_; - int64_t rowkey_column_num_; - bool is_index_table_; - const blocksstable::ObColDescIArray *col_descs_; - int64_t snapshot_version_; - ObDDLSSTableRedoWriter sstable_redo_writer_; - blocksstable::ObWholeDataStoreDesc data_desc_; - /** - * ATTENTION! - * The deconstruction order of the `redo_log_writer_callback_` should be in front of the `macro_block_writer_` - * to ensure the safety-used of the ddl macro block. - */ - blocksstable::ObMacroBlockWriter macro_block_writer_; - ObDDLRedoLogWriterCallback redo_log_writer_callback_; - common::ObArenaAllocator allocator_; - common::ObArenaAllocator lob_allocator_; - int64_t lob_cnt_; - ObSQLMode sql_mode_for_ddl_reshape_; - ObRowReshape *reshape_ptr_; - ObStoreRow store_row_; - blocksstable::ObDatumRow datum_row_; - bool is_inited_; - blocksstable::ObNewRowBuilder new_row_builder_; - int64_t lob_inrow_threshold_; -}; - -class ObSSTableInsertTabletContext final -{ -public: - ObSSTableInsertTabletContext(); - ~ObSSTableInsertTabletContext(); - int init(const ObSSTableInsertTabletParam &build_param); - int update(const int64_t snapshot_version); - int build_sstable_slice( - const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows); - int construct_sstable_slice_writer(const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&sstable_slice_writer, - common::ObIAllocator &allocator); - int create_sstable(); - int inc_finish_count(bool &is_ready); - int get_tablet_cache_interval(share::ObTabletCacheInterval &interval); - TO_STRING_KV(K(build_param_), K(sstable_created_)); -private: - int create_sstable_with_clog( - const ObITable::TableKey &table_key, - const int64_t table_id); - int get_table_key(ObITable::TableKey &table_key); - int prepare_index_builder_if_need(const share::schema::ObTableSchema &table_schema); - -private: - lib::ObMutex mutex_; - common::ObConcurrentFIFOAllocator allocator_; - ObSSTableInsertTabletParam build_param_; - ObDDLSSTableRedoWriter data_sstable_redo_writer_; - bool sstable_created_; - int64_t task_finish_count_; - blocksstable::ObSSTableIndexBuilder *index_builder_; - int64_t task_id_; - ObDDLKvMgrHandle ddl_kv_mgr_handle_; // for keeping ddl kv mgr alive -}; - -struct ObSSTableInsertTableParam final -{ -public: - ObSSTableInsertTableParam(); - ~ObSSTableInsertTableParam() = default; - int assign(const ObSSTableInsertTableParam &other); - int fast_check_status(); - bool is_valid() const { return OB_INVALID_ID != dest_table_id_ - && schema_version_ >= 0 && snapshot_version_ >= 0 && task_cnt_ >= 0 - && execution_id_ >= 0 && ddl_task_id_ > 0 && data_format_version_ > 0 && ls_tablet_ids_.count() > 0; } - TO_STRING_KV(K_(context_id), K_(dest_table_id), K_(write_major), K_(schema_version), K_(snapshot_version), - K_(task_cnt), K_(execution_id), K_(ddl_task_id), K_(data_format_version), K_(ls_tablet_ids)); -public: - sql::ObExecContext *exec_ctx_; - int64_t context_id_; - int64_t dest_table_id_; - bool write_major_; - int64_t schema_version_; - int64_t snapshot_version_; - int64_t task_cnt_; - int64_t execution_id_; - int64_t ddl_task_id_; - int64_t data_format_version_; - common::ObArray ls_tablet_ids_; -}; - -class ObSSTableInsertTableContext final -{ -public: - ObSSTableInsertTableContext(); - ~ObSSTableInsertTableContext(); - int init(const ObSSTableInsertTableParam ¶m); - int update_context(const int64_t snapshot_version); - int update_tablet_context(const ObTabletID &tablet_id, const int64_t snapshot_version); - int add_sstable_slice( - const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows); - int construct_sstable_slice_writer(const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&sstable_slice_writer, - common::ObIAllocator &allocator); - int finish(const bool need_commit); - int get_tablet_ids(common::ObIArray &tablet_ids); - int notify_tablet_end(const ObTabletID &tablet_id); - int finish_ready_tablets(const int64_t target_count); - int get_tablet_cache_interval(const ObTabletID &tablet_id, - share::ObTabletCacheInterval &interval); -private: - void destroy(); - int create_all_tablet_contexts(const common::ObIArray &ls_tablet_ids); - int get_tablet_context(const common::ObTabletID &tablet_id, ObSSTableInsertTabletContext *&tablet_ctx); - int remove_all_tablets_context(); -private: - typedef - common::hash::ObHashMap< - common::ObTabletID, - ObSSTableInsertTabletContext *, - common::hash::NoPthreadDefendMode> TABLET_CTX_MAP; - bool is_inited_; - common::ObSpinLock lock_; - ObSSTableInsertTableParam param_; - common::ObConcurrentFIFOAllocator allocator_; - TABLET_CTX_MAP tablet_ctx_map_; - ObArray ready_tablets_; - int64_t finishing_idx_; -}; - -class ObSSTableInsertManager final -{ -public: - static ObSSTableInsertManager &get_instance(); - int init(); - int create_table_context( - const ObSSTableInsertTableParam &build_param, - int64_t &context_id); - int finish_table_context(const int64_t context_id, const bool need_commit); - int update_table_context( - const int64_t context_id, - const int64_t snapshot_version); - int update_table_tablet_context( - const int64_t context_id, - const ObTabletID &tablet_id, - const int64_t snapshot_version); - int add_sstable_slice( - const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows); - int construct_sstable_slice_writer(const ObSSTableInsertTabletParam &build_param, - const blocksstable::ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&sstable_slice_writer, - common::ObIAllocator &allocator); - void destroy(); - int get_tablet_ids(const int64_t context_id, common::ObIArray &tablet_ids); - int notify_tablet_end(const int64_t context_id, const ObTabletID &tablet_id); - int finish_ready_tablets(const int64_t context_id, const int64_t target_count); - int get_tablet_cache_interval(const int64_t context_id, - const ObTabletID &tablet_id, - share::ObTabletCacheInterval &interval); -private: - ObSSTableInsertManager(); - ~ObSSTableInsertManager(); - int get_context( - const int64_t context_id, - ObSSTableInsertTableContext *&ctx); - int get_context_no_lock( - const int64_t context_id, - ObSSTableInsertTableContext *&ctx); - int remove_context_no_lock(const int64_t context_id); - int64_t alloc_context_id(); - uint64_t get_context_id_hash(const int64_t context_id); -private: - typedef common::hash::ObHashMap< - int64_t, // context id - ObSSTableInsertTableContext *, - common::hash::NoPthreadDefendMode> TABLE_CTX_MAP; - bool is_inited_; - lib::ObMutex mutex_; - common::ObBucketLock bucket_lock_; - common::ObConcurrentFIFOAllocator allocator_; - int64_t context_id_generator_; - TABLE_CTX_MAP table_ctx_map_; - DISALLOW_COPY_AND_ASSIGN(ObSSTableInsertManager); -}; - -}// namespace storage -}// namespace oceanbase - -#endif//OCEANBASE_STORAGE_OB_DIRECT_INSERT_SSTABLE_CTX_H diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp new file mode 100644 index 000000000..db93a8475 --- /dev/null +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp @@ -0,0 +1,2783 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "ob_direct_insert_sstable_ctx_new.h" +#include "share/ob_ddl_checksum.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_ddl_common.h" +#include "share/ob_tablet_autoincrement_service.h" +#include "sql/engine/pdml/static/ob_px_sstable_insert_op.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/px/ob_sub_trans_ctrl.h" +#include "storage/blocksstable/index_block/ob_index_block_builder.h" +#include "storage/compaction/ob_schedule_dag_func.h" +#include "storage/compaction/ob_column_checksum_calculator.h" +#include "storage/compaction/ob_tenant_freeze_info_mgr.h" +#include "storage/ddl/ob_direct_load_struct.h" +#include "storage/ddl/ob_ddl_merge_task.h" +#include "storage/ddl/ob_ddl_redo_log_writer.h" +#include "storage/lob/ob_lob_util.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/column_store/ob_column_oriented_sstable.h" + +using namespace oceanbase; +using namespace oceanbase::common; +using namespace oceanbase::storage; +using namespace oceanbase::blocksstable; +using namespace oceanbase::share; +using namespace oceanbase::share::schema; +using namespace oceanbase::sql; + +int64_t ObTenantDirectLoadMgr::generate_context_id() +{ + return ATOMIC_AAF(&context_id_generator_, 1); +} + +ObTenantDirectLoadMgr::ObTenantDirectLoadMgr() + : is_inited_(false), slice_id_generator_(0), context_id_generator_(0) +{ +} + +ObTenantDirectLoadMgr::~ObTenantDirectLoadMgr() +{ + destroy(); +} + +void ObTenantDirectLoadMgr::destroy() +{ + is_inited_ = false; + int ret = OB_SUCCESS; + bucket_lock_.destroy(); + common::ObArray tablet_mgr_keys; + for (TABLET_MGR_MAP::const_iterator iter = tablet_mgr_map_.begin(); + iter != tablet_mgr_map_.end(); ++iter) { + if (OB_FAIL(tablet_mgr_keys.push_back(iter->first))) { + LOG_WARN("push back failed", K(ret)); + } + } + for (int64_t i = 0; i < tablet_mgr_keys.count(); i++) { + if (OB_FAIL(remove_tablet_direct_load(tablet_mgr_keys.at(i)))) { + LOG_WARN("remove tablet mgr failed", K(ret), K(tablet_mgr_keys.at(i))); + } + } + allocator_.reset(); +} + +int64_t ObTenantDirectLoadMgr::generate_slice_id() +{ + return ATOMIC_AAF(&slice_id_generator_, 1); +} + +int ObTenantDirectLoadMgr::mtl_init(ObTenantDirectLoadMgr *&tenant_direct_load_mgr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret)); + } else if (OB_FAIL(tenant_direct_load_mgr->init())) { + LOG_WARN("init failed", K(ret)); + } + return ret; +} + +int ObTenantDirectLoadMgr::init() +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = MTL_ID(); + const int64_t bucket_num = 1000L * 100L; // 10w + const int64_t memory_limit = 1024L * 1024L * 1024L * 10L; // 10GB + lib::ObMemAttr attr(tenant_id, "TenantDLMgr"); + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(tenant_id)); + } else if (OB_FAIL(allocator_.init(OB_MALLOC_MIDDLE_BLOCK_SIZE, + attr.label_, tenant_id, memory_limit))) { + LOG_WARN("init alloctor failed", K(ret)); + } else if (OB_FAIL(bucket_lock_.init(bucket_num, ObLatchIds::TENANT_DIRECT_LOAD_MGR_LOCK, + ObLabel("TenDLBucket"), tenant_id))) { + LOG_WARN("init bucket lock failed", K(ret), K(bucket_num)); + } else if (OB_FAIL(tablet_mgr_map_.create(bucket_num, attr, attr))) { + LOG_WARN("create context map failed", K(ret)); + } else if (OB_FAIL(tablet_exec_context_map_.create(bucket_num, attr, attr))) { + LOG_WARN("create context map failed", K(ret)); + } else { + allocator_.set_attr(attr); + slice_id_generator_ = ObTimeUtility::current_time(); + is_inited_ = true; + } + return ret; +} + +// 1. Leader create it when start tablet direct load task; +// 2. Follower create it before replaying start log; +// 3. Migrate/Rebuild create tablet/ LS online create it. +int ObTenantDirectLoadMgr::create_tablet_direct_load( + const int64_t context_id, + const int64_t execution_id, + const ObTabletDirectLoadInsertParam &build_param, + const share::SCN checkpoint_scn) +{ + int ret = OB_SUCCESS; + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObTabletBindingMdsUserData ddl_data; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!build_param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(context_id), K(build_param)); + } else if (OB_ISNULL(ls_service = MTL(ObLSService *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(ls_service->get_ls(build_param.common_param_.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(build_param)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, build_param.common_param_.tablet_id_, + tablet_handle, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(build_param)); + } else if (OB_FAIL(tablet_handle.get_obj()->ObITabletMdsInterface::get_ddl_data(share::SCN::max_scn(), ddl_data))) { + LOG_WARN("failed to get ddl data from tablet", K(ret), K(tablet_handle)); + } else { + ObTabletHandle lob_tablet_handle; + ObTabletMemberWrapper table_store_wrapper; + ObTabletMemberWrapper lob_store_wrapper; + ObTabletDirectLoadMgrHandle data_tablet_direct_load_mgr_handle; + ObTabletDirectLoadMgrHandle lob_tablet_direct_load_mgr_handle; + data_tablet_direct_load_mgr_handle.reset(); + lob_tablet_direct_load_mgr_handle.reset(); + const bool is_full_direct_load_task = is_full_direct_load(build_param.common_param_.direct_load_type_); + const ObTabletID &lob_meta_tablet_id = ddl_data.lob_meta_tablet_id_; + if (!lob_meta_tablet_id.is_valid() || checkpoint_scn.is_valid_and_not_min()) { + // has no lob, or recover from checkpoint. + LOG_DEBUG("do not create lob mgr handle when create data tablet mgr", K(ret), K(lob_meta_tablet_id), K(checkpoint_scn), + K(build_param)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, lob_meta_tablet_id, + lob_tablet_handle, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(lob_meta_tablet_id)); + } else if (OB_FAIL(lob_tablet_handle.get_obj()->fetch_table_store(lob_store_wrapper))) { + LOG_WARN("fail to fetch table store", K(ret)); + } else if (OB_FAIL(try_create_tablet_direct_load_mgr(context_id, execution_id, + nullptr != lob_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/), + allocator_, ObTabletDirectLoadMgrKey(lob_meta_tablet_id, is_full_direct_load_task), true /*is lob tablet*/, + lob_tablet_direct_load_mgr_handle))) { + LOG_WARN("try create data tablet direct load mgr failed", K(ret), K(build_param)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) { + LOG_WARN("fetch table store failed", K(ret)); + } else if (OB_FAIL(try_create_tablet_direct_load_mgr(context_id, execution_id, + nullptr != table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/), + allocator_, ObTabletDirectLoadMgrKey(build_param.common_param_.tablet_id_, is_full_direct_load_task), false /*is lob tablet*/, + data_tablet_direct_load_mgr_handle))) { + // Newly-allocated Lob meta tablet direct load mgr will be cleanuped when tablet gc task works. + LOG_WARN("try create data tablet direct load mgr failed", K(ret), K(build_param)); + } + + if (OB_FAIL(ret)) { + } else if (data_tablet_direct_load_mgr_handle.is_valid()) { + if (OB_FAIL(data_tablet_direct_load_mgr_handle.get_obj()->update( + lob_tablet_direct_load_mgr_handle.get_obj(), build_param))) { + LOG_WARN("init tablet mgr failed", K(ret), K(build_param)); + } + } + } + return ret; +} + +int ObTenantDirectLoadMgr::try_create_tablet_direct_load_mgr( + const int64_t context_id, + const int64_t execution_id, + const bool major_sstable_exist, + ObIAllocator &allocator, + const ObTabletDirectLoadMgrKey &mgr_key, + const bool is_lob_tablet_mgr, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle) +{ + int ret = OB_SUCCESS; + direct_load_mgr_handle.reset(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!mgr_key.is_valid()) || execution_id < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(mgr_key), K(execution_id)); + } else { + ObTabletDirectLoadMgr *direct_load_mgr = nullptr; + ObBucketHashWLockGuard guard(bucket_lock_, mgr_key.hash()); + if (OB_FAIL(get_tablet_mgr_no_lock(mgr_key, direct_load_mgr_handle))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("get refactored failed", K(ret), K(is_full_direct_load), K(mgr_key)); + } + } else if (OB_ISNULL(direct_load_mgr = direct_load_mgr_handle.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(mgr_key)); + } + if (OB_SUCC(ret) && !major_sstable_exist) { + if (nullptr == direct_load_mgr) { + void *buf = nullptr; + const int64_t buf_size = mgr_key.is_full_direct_load_ ? + sizeof(ObTabletFullDirectLoadMgr) : sizeof(ObTabletIncDirectLoadMgr); + if (OB_ISNULL(buf = allocator.alloc(buf_size))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret), K(mgr_key)); + } else if (mgr_key.is_full_direct_load_) { + direct_load_mgr = new (buf) ObTabletFullDirectLoadMgr(); + } else { + direct_load_mgr = new (buf) ObTabletIncDirectLoadMgr(); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(direct_load_mgr_handle.set_obj(direct_load_mgr))) { + LOG_WARN("set direct load mgr failed", K(ret)); + } else if (OB_FAIL(tablet_mgr_map_.set_refactored(mgr_key, direct_load_mgr))) { + LOG_WARN("set tablet mgr failed", K(ret)); + } else { + direct_load_mgr->inc_ref(); + LOG_INFO("create tablet direct load mgr", K(mgr_key), K(execution_id), K(major_sstable_exist)); + } + // cleanup if failed. + if (OB_FAIL(ret)) { + if (nullptr != direct_load_mgr) { + direct_load_mgr->~ObTabletDirectLoadMgr(); + direct_load_mgr = nullptr; + } + if (buf != nullptr) { + allocator.free(buf); + buf = nullptr; + } + } + } + } + if (OB_SUCC(ret) && context_id >= 0 && !is_lob_tablet_mgr) { // only build execution context map for data tablet + ObTabletDirectLoadExecContextId exec_id; + ObTabletDirectLoadExecContext exec_context; + exec_id.tablet_id_ = mgr_key.tablet_id_; + exec_id.context_id_ = context_id; + exec_context.execution_id_ = execution_id; + exec_context.start_scn_.reset(); + if (OB_FAIL(tablet_exec_context_map_.set_refactored(exec_id, exec_context, true /*overwrite*/))) { + LOG_WARN("get table execution context failed", K(ret), K(exec_id)); + } + } + } + return ret; +} + +int ObTenantDirectLoadMgr::alloc_execution_context_id( + int64_t &context_id) +{ + int ret = OB_SUCCESS; + context_id = generate_context_id(); + return ret; +} + +int ObTenantDirectLoadMgr::open_tablet_direct_load( + const bool is_full_direct_load, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const int64_t context_id, + SCN &start_scn, + ObTabletDirectLoadMgrHandle &handle) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadExecContextId exec_id; + ObTabletDirectLoadExecContext exec_context; + exec_id.tablet_id_ = tablet_id; + exec_id.context_id_ = context_id; + ObTabletDirectLoadMgrKey mgr_key(tablet_id, is_full_direct_load); + bool is_mgr_exist = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!tablet_id.is_valid() || context_id < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(context_id)); + } else if (OB_FAIL(get_tablet_mgr(tablet_id, is_full_direct_load, handle))) { + if (OB_ENTRY_NOT_EXIST == ret && is_full_direct_load) { + if (OB_FAIL(check_and_process_finished_tablet(ls_id, tablet_id))) { + LOG_WARN("check and report checksum if need failed", K(ret), K(ls_id), K(tablet_id)); + } + } else { + LOG_WARN("get table mgr failed", K(ret), K(tablet_id), K(is_full_direct_load)); + } + } else { + is_mgr_exist = true; + } + + if (OB_SUCC(ret)) { + ObBucketHashRLockGuard guard(bucket_lock_, mgr_key.hash()); + if (OB_FAIL(tablet_exec_context_map_.get_refactored(exec_id, exec_context))) { + LOG_WARN("get table execution context failed", K(ret), K(exec_id)); + } + } + + if (OB_SUCC(ret) && is_mgr_exist) { + if (OB_FAIL(handle.get_obj()->open(exec_context.execution_id_, start_scn))) { + LOG_WARN("update tablet direct load failed", K(ret), K(is_full_direct_load), K(tablet_id), K(exec_context)); + } + } + + if (OB_SUCC(ret)) { + ObBucketHashWLockGuard guard(bucket_lock_, mgr_key.hash()); + exec_context.start_scn_ = start_scn; + if (OB_FAIL(tablet_exec_context_map_.set_refactored(exec_id, exec_context, true/*overwrite*/))) { + LOG_WARN("get table execution context failed", K(ret), K(exec_id)); + } + } + return ret; +} + +int ObTenantDirectLoadMgr::open_sstable_slice( + const blocksstable::ObMacroDataSeq &start_seq, + ObDirectLoadSliceInfo &slice_info) +{ + int ret = OB_SUCCESS; + slice_info.slice_id_ = 0; + ObTabletDirectLoadMgrHandle handle; + const int64_t new_slice_id = generate_slice_id(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid() || !start_seq.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(slice_info), K(start_seq)); + } else if (OB_FAIL(get_tablet_mgr(slice_info.data_tablet_id_, slice_info.is_full_direct_load_, handle))) { + if (OB_ENTRY_NOT_EXIST == ret && slice_info.is_full_direct_load_) { + if (OB_FAIL(check_and_process_finished_tablet(slice_info.ls_id_, slice_info.data_tablet_id_))) { + LOG_WARN("check and report checksum if need failed", K(ret), K(slice_info)); + } + } else { + LOG_WARN("get table mgr failed", K(ret), K(slice_info)); + } + } else if (OB_FAIL(handle.get_obj()->open_sstable_slice( + slice_info.is_lob_slice_/*is_data_tablet_process_for_lob*/, start_seq, new_slice_id))) { + LOG_WARN("open sstable slice failed", K(ret), K(slice_info)); + } + if (OB_SUCC(ret)) { + // To simplify the logic of TabletDirectLoadMgr, + // unique slice id is generated here. + slice_info.slice_id_ = new_slice_id; + } + return ret; +} + +int ObTenantDirectLoadMgr::fill_sstable_slice( + const ObDirectLoadSliceInfo &slice_info, + ObIStoreRowIterator *iter, + int64_t &affected_rows, + ObInsertMonitor *insert_monitor) +{ + int ret = OB_SUCCESS; + bool need_iter_part_row = false; + ObTabletDirectLoadMgrHandle handle; + ObTabletDirectLoadExecContext exec_context; + ObTabletDirectLoadExecContextId exec_id; + exec_id.tablet_id_ = slice_info.data_tablet_id_; + exec_id.context_id_ = slice_info.context_id_; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(slice_info)); + } else if (OB_FAIL(get_tablet_mgr(slice_info.data_tablet_id_, slice_info.is_full_direct_load_, handle))) { + if (OB_ENTRY_NOT_EXIST == ret && slice_info.is_full_direct_load_) { + need_iter_part_row = true; + } else { + LOG_WARN("get table mgr failed", K(ret), K(slice_info)); + } + } else if (OB_FAIL(tablet_exec_context_map_.get_refactored(exec_id, exec_context))) { + LOG_WARN("get tablet execution context failed", K(ret)); + } else if (OB_FAIL(handle.get_obj()->fill_sstable_slice(slice_info, exec_context.start_scn_, iter, affected_rows, insert_monitor))) { + if (OB_TRANS_COMMITED == ret && slice_info.is_full_direct_load_) { + need_iter_part_row = true; + } else { + LOG_WARN("fill sstable slice failed", K(ret), K(slice_info)); + } + } + + if (need_iter_part_row && + OB_FAIL(check_and_process_finished_tablet(slice_info.ls_id_, slice_info.data_tablet_id_, iter))) { + LOG_WARN("check and report checksum if need failed", K(ret), K(slice_info)); + } + return ret; +} + +int ObTenantDirectLoadMgr::fill_lob_sstable_slice( + ObIAllocator &allocator, + const ObDirectLoadSliceInfo &slice_info, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgrHandle handle; + ObTabletDirectLoadExecContext exec_context; + ObTabletDirectLoadExecContextId exec_id; + exec_id.tablet_id_ = slice_info.data_tablet_id_; + exec_id.context_id_ = slice_info.context_id_; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(slice_info)); + } else if (OB_FAIL(get_tablet_mgr(slice_info.data_tablet_id_, slice_info.is_full_direct_load_, handle))) { + if (OB_ENTRY_NOT_EXIST == ret && slice_info.is_full_direct_load_) { + if (OB_FAIL(check_and_process_finished_tablet(slice_info.ls_id_, slice_info.data_tablet_id_))) { + LOG_WARN("check and report checksum if need failed", K(ret), K(slice_info)); + } + } else { + LOG_WARN("get table mgr failed", K(ret), K(slice_info)); + } + } else if (OB_FAIL(tablet_exec_context_map_.get_refactored(exec_id, exec_context))) { + LOG_WARN("get tablet execution context failed", K(ret)); + } else if (OB_FAIL(handle.get_obj()->fill_lob_sstable_slice(allocator, slice_info, exec_context.start_scn_, pk_interval, lob_column_idxs, col_types, datum_row))) { + LOG_WARN("fail to fill batch sstable slice", KR(ret), K(slice_info), K(datum_row)); + } + return ret; +} + +int ObTenantDirectLoadMgr::calc_range( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const bool is_full_direct_load) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgrHandle handle; + bool is_major_sstable_exist = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(get_tablet_mgr_and_check_major(ls_id, tablet_id, is_full_direct_load, handle, is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + LOG_INFO("direct load mgr not exist, but major sstable exist", K(ret), K(tablet_id)); + } else { + LOG_WARN("get table mgr failed", K(ret), K(tablet_id)); + } + } else { + ObStorageSchema *storage_schema = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObArenaAllocator arena_allocator("DIRECT_RESCAN", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + bool is_column_store = false; + if (OB_FAIL(MTL(ObLSService *)->get_ls(handle.get_obj()->get_ls_id(), ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(handle), "ls_id", handle.get_obj()->get_ls_id()); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + tablet_id, + tablet_handle, + ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("failed to get tablet", K(ret), "ls_id", handle.get_obj()->get_ls_id(), K(tablet_id)); + } else if (OB_FAIL(tablet_handle.get_obj()->load_storage_schema(arena_allocator, storage_schema))) { + LOG_WARN("load storage schema failed", K(ret), K(tablet_id)); + } else if (OB_FAIL(ObCODDLUtil::need_column_group_store(*storage_schema, is_column_store))) { + LOG_WARN("fail to check need column group", K(ret)); + } else if (OB_UNLIKELY(!is_column_store)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table withou cg", K(ret)); + } else if (OB_FAIL(handle.get_obj()->calc_range(storage_schema, tablet_handle.get_obj()->get_rowkey_read_info().get_datum_utils()))) { + LOG_WARN("calc range failed", K(ret)); + } + ObTabletObjLoadHelper::free(arena_allocator, storage_schema); + arena_allocator.reset(); + } + return ret; +} + +int ObTenantDirectLoadMgr::fill_column_group( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const bool is_full_direct_load, + const int64_t thread_cnt, + const int64_t thread_id) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgrHandle handle; + bool is_major_sstable_exist = false; + + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || thread_cnt <= 0 || thread_id < 0 || thread_id > thread_cnt - 1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(ls_id), K(tablet_id), K(thread_cnt), K(thread_id)); + } else if (OB_FAIL(get_tablet_mgr_and_check_major(ls_id, tablet_id, is_full_direct_load, handle, is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + LOG_INFO("direct load mgr not exist, but major sstable exist", K(ret), K(tablet_id)); + } else { + LOG_WARN("get table mgr failed", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(handle.get_obj()->fill_column_group(thread_cnt, thread_id))) { + LOG_WARN("fill sstable slice failed", K(ret), K(thread_cnt), K(thread_id)); + } + return ret; +} + +int ObTenantDirectLoadMgr::cancel( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const bool is_full_direct_load) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgrHandle handle; + bool is_major_sstable_exist = false; + + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(get_tablet_mgr_and_check_major(ls_id, tablet_id, is_full_direct_load, handle, is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + LOG_INFO("direct load mgr not exist, but major sstable exist", K(ret), K(tablet_id)); + } else { + LOG_WARN("get table mgr failed", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(handle.get_obj()->cancel())) { + LOG_WARN("cancel fill sstable slice failed", K(ret)); + } + return ret; +} + +int ObTenantDirectLoadMgr::close_sstable_slice(const ObDirectLoadSliceInfo &slice_info, ObInsertMonitor* insert_monitor) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgrHandle handle; + ObTabletDirectLoadExecContext exec_context; + ObTabletDirectLoadExecContextId exec_id; + exec_id.tablet_id_ = slice_info.data_tablet_id_; + exec_id.context_id_ = slice_info.context_id_; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(slice_info)); + } else if (OB_FAIL(get_tablet_mgr(slice_info.data_tablet_id_, slice_info.is_full_direct_load_, handle))) { + if (OB_ENTRY_NOT_EXIST == ret && slice_info.is_full_direct_load_) { + if (OB_FAIL(check_and_process_finished_tablet(slice_info.ls_id_, slice_info.data_tablet_id_))) { + LOG_WARN("check and report checksum if need failed", K(ret), K(slice_info)); + } + } else { + LOG_WARN("get table mgr failed", K(ret), K(slice_info)); + } + } else if (OB_FAIL(tablet_exec_context_map_.get_refactored(exec_id, exec_context))) { + LOG_WARN("get tablet execution context failed", K(ret)); + } else if (OB_FAIL(handle.get_obj()->close_sstable_slice( + slice_info.is_lob_slice_/*is_data_tablet_process_for_lob*/, slice_info, exec_context.start_scn_, exec_context.execution_id_, insert_monitor))) { + LOG_WARN("close sstable slice failed", K(ret), K(slice_info), "execution_start_scn", exec_context.start_scn_, "execution_id", exec_context.execution_id_); + } + return ret; +} + +int ObTenantDirectLoadMgr::close_tablet_direct_load( + const int64_t context_id, + const bool is_full_direct_load, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const bool need_commit, + const bool emergent_finish, + const int64_t task_id, + const int64_t table_id, + const int64_t execution_id) +{ + int ret = OB_SUCCESS; + UNUSED(emergent_finish); + ObTabletDirectLoadMgrHandle handle; + ObTabletDirectLoadMgrKey mgr_key(tablet_id, is_full_direct_load); + ObTabletDirectLoadExecContextId exec_id; + exec_id.tablet_id_ = tablet_id; + exec_id.context_id_ = context_id; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!tablet_id.is_valid() || context_id < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tablet_id), K(context_id)); + } else if (OB_FAIL(get_tablet_mgr(tablet_id, is_full_direct_load, handle))) { + if (OB_ENTRY_NOT_EXIST == ret && is_full_direct_load) { + if (OB_FAIL(check_and_process_finished_tablet(ls_id, tablet_id, nullptr/*row_iterator*/, task_id, table_id, execution_id))) { + LOG_WARN("check and report checksum if need failed", K(ret), K(ls_id), K(tablet_id), K(task_id), K(execution_id)); + } + } else { + LOG_WARN("get table mgr failed", K(ret), K(ls_id), K(tablet_id)); + } + } else { + if (need_commit) { + ObTabletDirectLoadExecContext exec_context; + { + ObBucketHashRLockGuard guard(bucket_lock_, mgr_key.hash()); + if (OB_FAIL(tablet_exec_context_map_.get_refactored(exec_id, exec_context))) { + LOG_WARN("get tablet execution context failed", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(handle.get_obj()->close(exec_context.execution_id_, exec_context.start_scn_))) { + LOG_WARN("close failed", K(ret)); + } + } else { + // For full/incremental direct load, the ObTabletDirectLoadMgr will be removed from MTL when, + // 1. the direct load task abort indicated by `need_commit = false`, and we do not care about + // the error code triggered by the not found ObTabletDirectLoadMgr after. + // 2. the direct load task commit and all ddl kvs persist successfully. + + // But how to notify the follower to remove it, with write commit failed log or tablet gc task ?? + } + if (OB_SUCC(ret)) { + ObBucketHashWLockGuard guard(bucket_lock_, mgr_key.hash()); + if (OB_FAIL(tablet_exec_context_map_.erase_refactored(exec_id))) { + LOG_WARN("erase tablet execution context failed", K(ret), K(exec_id)); + } else { + LOG_INFO("erase execution context", K(exec_id), K(tablet_id)); + } + } + } + return ret; +} + +// Other utils function. +int ObTenantDirectLoadMgr::get_online_stat_collect_result( + const bool is_full_direct_load, + const ObTabletID &tablet_id, + const ObArray *&column_stat_array) +{ + int ret = OB_NOT_IMPLEMENT; + // ObTableDirectLoadMgr *table_mgr = nullptr; + // if (OB_FAIL(get_table_mgr(task_id, table_mgr))) { + // LOG_WARN("get context failed", K(ret)); + // } else if (OB_FAIL(table_mgr->get_online_stat_collect_result(tablet_id, column_stat_array))) { + // LOG_WARN("finish table context failed", K(ret)); + // } + return ret; +} + +int ObTenantDirectLoadMgr::get_tablet_cache_interval( + const int64_t context_id, + const ObTabletID &tablet_id, + ObTabletCacheInterval &interval) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgrKey mgr_key(tablet_id, true/*full direct load*/); // only support in ddl, which is full direct load + ObBucketHashWLockGuard guard(bucket_lock_, mgr_key.hash()); + ObTabletAutoincrementService &autoinc_service = ObTabletAutoincrementService::get_instance(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(context_id < 0 || !tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(context_id), K(tablet_id)); + } else if (OB_FAIL(autoinc_service.get_tablet_cache_interval(MTL_ID(), interval))) { + LOG_WARN("failed to get tablet cache intervals", K(ret)); + } else { + ObTabletDirectLoadExecContext exec_context; + ObTabletDirectLoadExecContextId exec_id; + exec_id.tablet_id_ = tablet_id; + exec_id.context_id_ = context_id; + if (OB_FAIL(tablet_exec_context_map_.get_refactored(exec_id, exec_context))) { + LOG_WARN("get tablet execution context failed", K(ret)); + } else { + interval.task_id_ = exec_context.seq_interval_task_id_++; + if (OB_FAIL(tablet_exec_context_map_.set_refactored(exec_id, exec_context, true/*overwrite*/))) { + LOG_WARN("set tablet execution context map", K(ret)); + } + } + } + + return ret; +} + +int get_co_column_checksums_if_need( + ObTabletHandle &tablet_handle, + const ObSSTable *sstable, + ObIArray &column_checksum_array) +{ + int ret = OB_SUCCESS; + column_checksum_array.reset(); + if (OB_UNLIKELY(!tablet_handle.is_valid() || nullptr == sstable)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tablet_handle), KP(sstable)); + } else if (!sstable->is_co_sstable()) { + // do nothing + } else { + bool is_rowkey_based_co_sstable = false; + ObStorageSchema *storage_schema = nullptr; + ObArenaAllocator arena("co_ddl_cksm", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + if (OB_FAIL(tablet_handle.get_obj()->load_storage_schema(arena, storage_schema))) { + LOG_WARN("load storage schema failed", K(ret)); + } else if (OB_FAIL(ObCODDLUtil::is_rowkey_based_co_sstable( + static_cast(sstable), storage_schema, is_rowkey_based_co_sstable))) { + LOG_WARN("check is rowkey based co sstable failed", K(ret)); + } else if (is_rowkey_based_co_sstable) { + if (OB_FAIL(ObCODDLUtil::get_column_checksums( + static_cast(sstable), + storage_schema, + column_checksum_array))) { + LOG_WARN("get column checksum from co sstable failed", K(ret)); + } + } + ObTabletObjLoadHelper::free(arena, storage_schema); + } + return ret; +} + +int ObTenantDirectLoadMgr::check_and_process_finished_tablet( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + ObIStoreRowIterator *row_iter, + const int64_t task_id, + const int64_t table_id, + const int64_t execution_id) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObSSTableMetaHandle sst_meta_hdl; + const ObSSTable *first_major_sstable = nullptr; + ObTabletMemberWrapper table_store_wrapper; + const int64_t max_wait_timeout_us = 30L * 1000L * 1000L; // 30s + ObTimeGuard tg("ddl_retry_tablet", max_wait_timeout_us); + if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id)); + } + while (OB_SUCC(ret)) { + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", K(ret), K(ls_id), K(tablet_id)); + } else if (tg.get_diff() > max_wait_timeout_us) { + ret = OB_NEED_RETRY; + LOG_WARN("process finished tablet timeout, need retry", K(ret), K(ls_id), K(tablet_id), K(tg)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + tablet_id, tablet_handle, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_UNLIKELY(nullptr == tablet_handle.get_obj())) { + ret = OB_ERR_SYS; + LOG_WARN("tablet handle is null", K(ret), K(ls_id), K(tablet_id)); + } else if (task_id <= 0 || common::OB_INVALID_ID == table_id || execution_id < 0 + || tablet_handle.get_obj()->get_tablet_meta().ddl_execution_id_ > execution_id) { + // no need to report checkksum. + LOG_INFO("no need to report checksum", K(ret), K(task_id), K(table_id), K(execution_id), + "tablet_meta", tablet_handle.get_obj()->get_tablet_meta()); + break; + } else if (OB_FAIL(tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) { + LOG_WARN("fail to fetch table store", K(ret)); + } else if (FALSE_IT(first_major_sstable = static_cast( + table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)))) { + } else if (nullptr == first_major_sstable) { + LOG_INFO("major not exist, retry later", K(ret), K(ls_id), K(tablet_id), K(tg)); + usleep(100L * 1000L); // 100ms + } else if (OB_FAIL(ObTabletDDLUtil::check_and_get_major_sstable( + ls_id, tablet_id, first_major_sstable, table_store_wrapper))) { + LOG_WARN("check if major sstable exist failed", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(first_major_sstable->get_meta(sst_meta_hdl))) { + LOG_WARN("fail to get sstable meta handle", K(ret)); + } else { + const int64_t *column_checksums = sst_meta_hdl.get_sstable_meta().get_col_checksum(); + int64_t column_count = sst_meta_hdl.get_sstable_meta().get_col_checksum_cnt(); + ObArray co_column_checksums; + co_column_checksums.set_attr(ObMemAttr(MTL_ID(), "TblDL_Ccc")); + if (OB_FAIL(get_co_column_checksums_if_need(tablet_handle, first_major_sstable, co_column_checksums))) { + LOG_WARN("get column checksum from co sstable failed", K(ret)); + } else if (OB_FAIL(ObTabletDDLUtil::report_ddl_checksum( + ls_id, + tablet_id, + table_id, + execution_id, + task_id, + co_column_checksums.empty() ? column_checksums : co_column_checksums.get_data(), + co_column_checksums.empty() ? column_count : co_column_checksums.count()))) { + LOG_WARN("report ddl column checksum failed", K(ret), K(ls_id), K(tablet_id), K(execution_id)); + } else { + break; + } + } + } + if (OB_SUCC(ret) && nullptr != row_iter) { + const ObDatumRow *row = nullptr; + while (OB_SUCC(ret)) { + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", K(ret)); + } else if (OB_FAIL(row_iter->get_next_row(row))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + // ignore error, iter part row completely. + ret = OB_SUCCESS; + } + } + } + } + return ret; +} + +int ObTenantDirectLoadMgr::get_tablet_mgr_and_check_major( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const bool is_full_direct_load, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + bool &is_major_sstable_exist) +{ + int ret = get_tablet_mgr(tablet_id, is_full_direct_load, direct_load_mgr_handle); + is_major_sstable_exist = false; + if (OB_ENTRY_NOT_EXIST == ret) { + int tmp_ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + if (OB_TMP_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(tmp_ret), K(ls_id)); + } else if (OB_TMP_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id, tablet_handle))) { + LOG_WARN("get tablet handle failed", K(tmp_ret), K(ls_id), K(tablet_id)); + } else { + is_major_sstable_exist = tablet_handle.get_obj()->get_major_table_count() > 0 + || tablet_handle.get_obj()->get_tablet_meta().table_store_flag_.with_major_sstable(); + } + if (!is_major_sstable_exist) { + ret = OB_TASK_EXPIRED; + } + } + return ret; +} + +int ObTenantDirectLoadMgr::get_tablet_mgr( + const ObTabletID &tablet_id, + const bool is_full_direct_load, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tablet_id)); + } else { + ObTabletDirectLoadMgrKey mgr_key(tablet_id, is_full_direct_load); + ObBucketHashRLockGuard guard(bucket_lock_, mgr_key.hash()); + if (OB_FAIL(get_tablet_mgr_no_lock(mgr_key, direct_load_mgr_handle))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("get table mgr without lock failed", K(ret), K(mgr_key)); + } + } + } + return ret; +} + +int ObTenantDirectLoadMgr::get_tablet_mgr_no_lock( + const ObTabletDirectLoadMgrKey &mgr_key, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle) +{ + int ret = OB_SUCCESS; + ObTabletDirectLoadMgr *tablet_mgr = nullptr; + if (OB_UNLIKELY(!mgr_key.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(mgr_key)); + } else if (OB_FAIL(tablet_mgr_map_.get_refactored(mgr_key, tablet_mgr))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("get refactored failed", K(ret), K(mgr_key)); + } else { + ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; + } + } else if (OB_FAIL(direct_load_mgr_handle.set_obj(tablet_mgr))) { + LOG_WARN("set handle failed", K(ret), K(mgr_key)); + } else if (!direct_load_mgr_handle.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(mgr_key)); + } + return ret; +} + +int ObTenantDirectLoadMgr::remove_tablet_direct_load(const ObTabletDirectLoadMgrKey &mgr_key) +{ + ObBucketHashWLockGuard guard(bucket_lock_, mgr_key.hash()); + return remove_tablet_direct_load_nolock(mgr_key); +} + +int ObTenantDirectLoadMgr::remove_tablet_direct_load_nolock(const ObTabletDirectLoadMgrKey &mgr_key) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!mgr_key.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(mgr_key)); + } else { + ObTabletDirectLoadMgr *tablet_direct_load_mgr = nullptr; + if (OB_FAIL(tablet_mgr_map_.get_refactored(mgr_key, tablet_direct_load_mgr))) { + ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; + LOG_TRACE("get table mgr failed", K(ret), K(mgr_key), K(common::lbt())); + } else if (OB_ISNULL(tablet_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(mgr_key)); + } else if (OB_FAIL(tablet_mgr_map_.erase_refactored(mgr_key))) { + LOG_WARN("erase from map failed", K(ret)); + } else { + LOG_INFO("remove tablet direct load mgr from MTL", K(ret), K(mgr_key), K(common::lbt()), K(tablet_direct_load_mgr->get_ref())); + if (0 == tablet_direct_load_mgr->dec_ref()) { + tablet_direct_load_mgr->~ObTabletDirectLoadMgr(); + allocator_.free(tablet_direct_load_mgr); + } + } + } + return ret; +} + +struct DestroySliceWriterMapFn +{ +public: + DestroySliceWriterMapFn(ObIAllocator *allocator) :allocator_(allocator) {} + int operator () (hash::HashMapPair &entry) { + int ret = OB_SUCCESS; + if (nullptr != allocator_) { + if (nullptr != entry.second) { + LOG_INFO("erase a slice writer", K(&entry.second), "slice_id", entry.first); + entry.second->~ObDirectLoadSliceWriter(); + allocator_->free(entry.second); + entry.second = nullptr; + } + } + return ret; + } + +private: + ObIAllocator *allocator_; +}; + +ObTabletDirectLoadBuildCtx::ObTabletDirectLoadBuildCtx() + : allocator_(), slice_writer_allocator_(), build_param_(), slice_mgr_map_(), data_block_desc_(true/*is ddl*/), index_builder_(nullptr), + column_stat_array_(), sorted_slice_writers_(), is_task_end_(false), task_finish_count_(0), fill_column_group_finish_count_(0) +{ + column_stat_array_.set_attr(ObMemAttr(MTL_ID(), "TblDL_CSA")); + sorted_slice_writers_.set_attr(ObMemAttr(MTL_ID(), "TblDL_SSR")); +} + +ObTabletDirectLoadBuildCtx::~ObTabletDirectLoadBuildCtx() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(index_builder_)) { + index_builder_->~ObSSTableIndexBuilder(); + allocator_.free(index_builder_); + index_builder_ = nullptr; + } + for (int64_t i = 0; i < column_stat_array_.count(); i++) { + ObOptColumnStat *col_stat = column_stat_array_.at(i); + col_stat->~ObOptColumnStat(); + allocator_.free(col_stat); + col_stat = nullptr; + } + column_stat_array_.reset(); + sorted_slice_writers_.reset(); + + if (!slice_mgr_map_.empty()) { + DestroySliceWriterMapFn destroy_map_fn(&slice_writer_allocator_); + slice_mgr_map_.foreach_refactored(destroy_map_fn); + slice_mgr_map_.destroy(); + } + allocator_.reset(); + slice_writer_allocator_.reset(); +} + +bool ObTabletDirectLoadBuildCtx::is_valid() const +{ + return build_param_.is_valid(); +} + +void ObTabletDirectLoadBuildCtx::reset_slice_ctx_on_demand() +{ + ATOMIC_STORE(&task_finish_count_, 0); + ATOMIC_STORE(&fill_column_group_finish_count_, 0); + ATOMIC_STORE(&task_total_cnt_, build_param_.runtime_only_param_.task_cnt_); +} + +ObTabletDirectLoadMgr::ObTabletDirectLoadMgr() + : is_inited_(false), is_schema_item_ready_(false), ls_id_(), tablet_id_(), table_key_(), data_format_version_(0), + lock_(), ref_cnt_(0), direct_load_type_(ObDirectLoadType::DIRECT_LOAD_INVALID), sqc_build_ctx_(), + column_items_(), lob_column_idxs_(), lob_col_types_(), tablet_handle_(), schema_item_() +{ + column_items_.set_attr(ObMemAttr(MTL_ID(), "DL_schema")); + lob_column_idxs_.set_attr(ObMemAttr(MTL_ID(), "DL_schema")); + lob_col_types_.set_attr(ObMemAttr(MTL_ID(), "DL_schema")); +} + +ObTabletDirectLoadMgr::~ObTabletDirectLoadMgr() +{ + FLOG_INFO("deconstruct tablet direct load mgr", KP(this), KPC(this)); + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + is_inited_ = false; + ls_id_.reset(); + tablet_id_.reset(); + table_key_.reset(); + data_format_version_ = 0; + ATOMIC_STORE(&ref_cnt_, 0); + direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_INVALID; + column_items_.reset(); + lob_column_idxs_.reset(); + lob_col_types_.reset(); + tablet_handle_.reset(); + schema_item_.reset(); + is_schema_item_ready_ = false; +} + +bool ObTabletDirectLoadMgr::is_valid() +{ + return is_inited_ == true && ls_id_.is_valid() && tablet_id_.is_valid() + && is_valid_direct_load(direct_load_type_); +} + +int ObTabletDirectLoadMgr::update( + ObTabletDirectLoadMgr *lob_tablet_mgr, + const ObTabletDirectLoadInsertParam &build_param) +{ + UNUSED(lob_tablet_mgr); + int ret = OB_SUCCESS; + const int64_t bucket_num = 97L; // 97 + const int64_t memory_limit = 1024L * 1024L * 1024L * 10L; // 10GB + if (OB_UNLIKELY(!build_param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(build_param)); + } else if (!build_param.is_replay_ && !sqc_build_ctx_.slice_mgr_map_.created()) { + // 1. Create slice_mgr_map if the tablet_direct_load_mgr is created firstly. + // 2. Create slice_mgr_map if the node is switched from follower to leader. + const uint64_t tenant_id = MTL_ID(); + lib::ObMemAttr attr(tenant_id, "TabletDLMgr"); + lib::ObMemAttr slice_writer_attr(tenant_id, "SliceWriter"); + lib::ObMemAttr slice_writer_map_attr(tenant_id, "SliceWriterMap"); + if (OB_FAIL(sqc_build_ctx_.allocator_.init(OB_MALLOC_MIDDLE_BLOCK_SIZE, + attr.label_, tenant_id, memory_limit))) { + LOG_WARN("init alloctor failed", K(ret)); + } else if (OB_FAIL(sqc_build_ctx_.slice_writer_allocator_.init(OB_MALLOC_MIDDLE_BLOCK_SIZE, + slice_writer_attr.label_, tenant_id, memory_limit))) { + LOG_WARN("init allocator failed", K(ret)); + } else if (OB_FAIL(sqc_build_ctx_.slice_mgr_map_.create(bucket_num, + slice_writer_map_attr, slice_writer_map_attr))) { + LOG_WARN("create slice writer map failed", K(ret)); + } else if (OB_FAIL(cond_.init(ObWaitEventIds::COLUMN_STORE_DDL_RESCAN_LOCK_WAIT))) { + LOG_WARN("init condition failed", K(ret)); + } else { + sqc_build_ctx_.allocator_.set_attr(attr); + sqc_build_ctx_.slice_writer_allocator_.set_attr(slice_writer_attr); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(sqc_build_ctx_.build_param_.assign(build_param))) { + LOG_WARN("assign build param failed", K(ret)); + } else { + ls_id_ = build_param.common_param_.ls_id_; + tablet_id_ = build_param.common_param_.tablet_id_; + direct_load_type_ = build_param.common_param_.direct_load_type_; + is_inited_ = true; + } + } + return ret; +} + +int ObTabletDirectLoadMgr::open_sstable_slice( + const bool is_data_tablet_process_for_lob, + const blocksstable::ObMacroDataSeq &start_seq, + const int64_t slice_id) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), KPC(this)); + } else if (OB_UNLIKELY(!start_seq.is_valid() || slice_id <= 0 || !sqc_build_ctx_.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(tablet_id_), K(start_seq), K(slice_id), K(sqc_build_ctx_)); + } else if (is_data_tablet_process_for_lob) { + if (OB_UNLIKELY(!lob_mgr_handle_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (OB_FAIL(lob_mgr_handle_.get_obj()->open_sstable_slice( + false, start_seq, slice_id))) { + LOG_WARN("open sstable slice for lob failed", K(ret), KPC(this)); + } + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (OB_FAIL(prepare_schema_item_on_demand(sqc_build_ctx_.build_param_.runtime_only_param_.table_id_))) { + LOG_WARN("prepare table schema item on demand", K(ret), K(sqc_build_ctx_.build_param_)); + } else { + ObDirectLoadSliceWriter *slice_writer = nullptr; + if (OB_ISNULL(slice_writer = OB_NEWx(ObDirectLoadSliceWriter, (&sqc_build_ctx_.slice_writer_allocator_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to new ObDirectLoadSliceWriter", KR(ret)); + } else if (OB_FAIL(slice_writer->init(this, start_seq))) { + LOG_WARN("init sstable slice writer failed", K(ret), KPC(this)); + } else if (OB_FAIL(sqc_build_ctx_.slice_mgr_map_.set_refactored(slice_id, slice_writer))) { + LOG_WARN("set refactored failed", K(ret), K(slice_id), KPC(this)); + } else { + LOG_INFO("add a slice writer", KP(slice_writer), K(slice_id), K(sqc_build_ctx_.slice_mgr_map_.size())); + } + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(slice_writer)) { + slice_writer->~ObDirectLoadSliceWriter(); + sqc_build_ctx_.slice_writer_allocator_.free(slice_writer); + slice_writer = nullptr; + } + } + } + return ret; +} + +int ObTabletDirectLoadMgr::prepare_schema_item_on_demand(const uint64_t table_id) +{ + int ret = OB_SUCCESS; + bool is_schema_item_ready = ATOMIC_LOAD(&is_schema_item_ready_); + if (!is_schema_item_ready) { + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + is_schema_item_ready = is_schema_item_ready_; + } + if (!is_schema_item_ready) { + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + if (is_schema_item_ready_) { + // do nothing + } else if (OB_UNLIKELY(OB_INVALID_ID == table_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(table_id)); + } else { + const uint64_t tenant_id = MTL_ID(); + ObSchemaGetterGuard schema_guard; + const ObDataStoreDesc &data_desc = sqc_build_ctx_.data_block_desc_.get_desc(); + const ObTableSchema *table_schema = nullptr; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("get tenant schema failed", K(ret), K(tenant_id), K(table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(tenant_id), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(tenant_id), K(table_id)); + } else if (OB_FAIL(prepare_index_builder_if_need(*table_schema))) { + LOG_WARN("prepare sstable index builder failed", K(ret), K(sqc_build_ctx_)); + } else if (OB_FAIL(table_schema->get_is_column_store(schema_item_.is_column_store_))) { + LOG_WARN("fail to get is column store", K(ret)); + } else { + schema_item_.is_index_table_ = table_schema->is_index_table(); + schema_item_.rowkey_column_num_ = table_schema->get_rowkey_column_num(); + schema_item_.is_unique_index_ = table_schema->is_unique_index(); + + if (OB_FAIL(column_items_.reserve(data_desc.get_col_desc_array().count()))) { + LOG_WARN("reserve column schema array failed", K(ret), K(data_desc.get_col_desc_array().count()), K(column_items_)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < data_desc.get_col_desc_array().count(); ++i) { + const ObColDesc &col_desc = data_desc.get_col_desc_array().at(i); + const schema::ObColumnSchemaV2 *column_schema = nullptr; + ObColumnSchemaItem column_item; + if (i >= table_schema->get_rowkey_column_num() && i < table_schema->get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()) { + // skip multi version column, keep item invalid + } else if (OB_ISNULL(column_schema = table_schema->get_column_schema(col_desc.col_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column schema is null", K(ret), K(i), K(data_desc.get_col_desc_array()), K(col_desc.col_id_)); + } else { + column_item.is_valid_ = true; + column_item.col_type_ = column_schema->get_meta_type(); + column_item.col_accuracy_ = column_schema->get_accuracy(); + } + if (OB_SUCC(ret)) { + if (OB_FAIL(column_items_.push_back(column_item))) { + LOG_WARN("push back null column schema failed", K(ret)); + } else if (OB_NOT_NULL(column_schema) && column_schema->get_meta_type().is_lob_storage()) { // not multi version column + if (OB_FAIL(lob_column_idxs_.push_back(i))) { + LOG_WARN("push back lob column idx failed", K(ret), K(i)); + } else if (OB_FAIL(lob_col_types_.push_back(column_schema->get_meta_type()))) { + LOG_WARN("push back lob col_type failed", K(ret), K(i)); + } + } + } + } + } + if (OB_SUCC(ret)) { + // get compress type + uint64_t tenant_id = table_schema->get_tenant_id(); + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id)); + if (OB_UNLIKELY(!tenant_config.is_valid())) { + //tenant config获取失败时,租户不存在;返回默认值 + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail get tenant_config", K(ret), K(tenant_id)); + } else if (tenant_config->enable_store_compression) { + schema_item_.compress_type_ = table_schema->get_compressor_type(); + } + LOG_INFO("load compress type is:", K(schema_item_.compress_type_), K(tenant_config->enable_store_compression)); + } + } + if (OB_SUCC(ret)) { + is_schema_item_ready_ = true; + } + } + } + return ret; +} + +int ObTabletDirectLoadMgr::fill_sstable_slice( + const ObDirectLoadSliceInfo &slice_info, + const SCN &start_scn, + ObIStoreRowIterator *iter, + int64_t &affected_rows, + ObInsertMonitor *insert_monitor) +{ + int ret = OB_SUCCESS; + affected_rows = 0; + share::SCN commit_scn; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid() || !start_scn.is_valid_and_not_min()) || !sqc_build_ctx_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(slice_info), K(start_scn), K(sqc_build_ctx_)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (is_full_direct_load(direct_load_type_)) { + if (OB_UNLIKELY(!tablet_handle_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle_)); + } else { + commit_scn = get_commit_scn(tablet_handle_.get_obj()->get_tablet_meta()); + if (commit_scn.is_valid_and_not_min()) { + ret = OB_TRANS_COMMITED; + FLOG_INFO("already committed", K(commit_scn), KPC(this)); + } else if (start_scn != get_start_scn()) { + ret = OB_TASK_EXPIRED; + LOG_WARN("task expired", K(ret), "start_scn of current execution", start_scn, "start_scn latest", get_start_scn()); + } + } + } + if (OB_SUCC(ret)) { + ObDirectLoadSliceWriter *slice_writer = nullptr; + if (OB_FAIL(sqc_build_ctx_.slice_mgr_map_.get_refactored(slice_info.slice_id_, slice_writer))) { + LOG_WARN("get refactored failed", K(ret), K(slice_info)); + } else if (OB_ISNULL(slice_writer) || OB_UNLIKELY(!ATOMIC_LOAD(&is_schema_item_ready_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(slice_info), K(is_schema_item_ready_)); + } else if (OB_FAIL(slice_writer->fill_sstable_slice(start_scn, sqc_build_ctx_.build_param_.runtime_only_param_.table_id_, tablet_id_, + iter, schema_item_, direct_load_type_, column_items_, affected_rows, insert_monitor))) { + LOG_WARN("fill sstable slice failed", K(ret), KPC(this)); + } + } + if (OB_FAIL(ret) && (OB_TRANS_COMMITED != ret)) { + // cleanup when failed. + int tmp_ret = OB_SUCCESS; + ObDirectLoadSliceWriter *slice_writer = nullptr; + if (OB_TMP_FAIL(sqc_build_ctx_.slice_mgr_map_.erase_refactored(slice_info.slice_id_, &slice_writer))) { + LOG_ERROR("erase failed", K(ret), K(tmp_ret), K(slice_info)); + } else { + LOG_INFO("erase a slice writer", KP(slice_writer), "slice_id", slice_info.slice_id_, K(sqc_build_ctx_.slice_mgr_map_.size())); + slice_writer->~ObDirectLoadSliceWriter(); + sqc_build_ctx_.slice_writer_allocator_.free(slice_writer); + slice_writer = nullptr; + } + } + return ret; +} + +int ObTabletDirectLoadMgr::fill_lob_sstable_slice( + ObIAllocator &allocator, + const ObDirectLoadSliceInfo &slice_info, + const SCN &start_scn, + share::ObTabletCacheInterval &pk_interval, + blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + share::SCN commit_scn; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid() || !sqc_build_ctx_.is_valid() || !start_scn.is_valid_and_not_min() || + !lob_mgr_handle_.is_valid() || !lob_mgr_handle_.get_obj()->get_sqc_build_ctx().is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(slice_info), "lob_direct_load_mgr is valid", lob_mgr_handle_.is_valid(), KPC(this), K(start_scn)); + } else if (is_full_direct_load(direct_load_type_)) { + commit_scn = get_commit_scn(tablet_handle_.get_obj()->get_tablet_meta()); + if (commit_scn.is_valid_and_not_min()) { + ret = OB_TRANS_COMMITED; + FLOG_INFO("already committed", K(commit_scn), KPC(this)); + } else if (start_scn != get_start_scn()) { + ret = OB_TASK_EXPIRED; + LOG_WARN("task expired", K(ret), "start_scn of current execution", start_scn, "start_scn latest", get_start_scn()); + } + } + + if (OB_SUCC(ret)) { + ObDirectLoadSliceWriter *slice_writer = nullptr; + const int64_t trans_version = is_full_direct_load(direct_load_type_) ? table_key_.get_snapshot_version() : INT64_MAX; + ObBatchSliceWriteInfo info(tablet_id_, ls_id_, trans_version, direct_load_type_, sqc_build_ctx_.build_param_.runtime_only_param_.trans_id_, + sqc_build_ctx_.build_param_.runtime_only_param_.seq_no_); + + if (OB_FAIL(lob_mgr_handle_.get_obj()->get_sqc_build_ctx().slice_mgr_map_.get_refactored(slice_info.slice_id_, slice_writer))) { + LOG_WARN("get refactored failed", K(ret), K(slice_info), K(sqc_build_ctx_.slice_mgr_map_.size())); + } else if (OB_ISNULL(slice_writer) || OB_UNLIKELY(!ATOMIC_LOAD(&(lob_mgr_handle_.get_obj()->is_schema_item_ready_)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(slice_info), K(lob_mgr_handle_.get_obj()->is_schema_item_ready_)); + } else if (OB_FAIL(slice_writer->fill_lob_sstable_slice(lob_mgr_handle_.get_obj()->sqc_build_ctx_.build_param_.runtime_only_param_.table_id_, allocator, sqc_build_ctx_.allocator_, + start_scn, info, pk_interval, lob_column_idxs_, lob_col_types_, schema_item_.lob_inrow_threshold_, datum_row))) { + LOG_WARN("fail to fill batch sstable slice", K(ret), K(start_scn), K(tablet_id_), K(pk_interval)); + } + } + if (OB_FAIL(ret) && lob_mgr_handle_.is_valid()) { + // cleanup when failed. + int tmp_ret = OB_SUCCESS; + ObDirectLoadSliceWriter *slice_writer = nullptr; + if (OB_TMP_FAIL(lob_mgr_handle_.get_obj()->get_sqc_build_ctx().slice_mgr_map_.erase_refactored(slice_info.slice_id_, &slice_writer))) { + LOG_ERROR("erase failed", K(ret), K(tmp_ret), K(slice_info)); + } else { + LOG_INFO("erase a slice writer", KP(slice_writer), "slice_id", slice_info.slice_id_, K(sqc_build_ctx_.slice_mgr_map_.size())); + slice_writer->~ObDirectLoadSliceWriter(); + lob_mgr_handle_.get_obj()->get_sqc_build_ctx().slice_writer_allocator_.free(slice_writer); + slice_writer = nullptr; + } + } + return ret; +} + +int ObTabletDirectLoadMgr::fill_lob_sstable_slice( + ObIAllocator &allocator, + const ObDirectLoadSliceInfo &slice_info, + const SCN &start_scn, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + share::SCN commit_scn; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid() || !sqc_build_ctx_.is_valid() || !start_scn.is_valid_and_not_min() || + !lob_mgr_handle_.is_valid() || !lob_mgr_handle_.get_obj()->get_sqc_build_ctx().is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(slice_info), "lob_direct_load_mgr is valid", lob_mgr_handle_.is_valid(), KPC(this), K(start_scn)); + } else if (is_full_direct_load(direct_load_type_)) { + commit_scn = get_commit_scn(tablet_handle_.get_obj()->get_tablet_meta()); + if (commit_scn.is_valid_and_not_min()) { + ret = OB_TRANS_COMMITED; + FLOG_INFO("already committed", K(commit_scn), KPC(this)); + } else if (start_scn != get_start_scn()) { + ret = OB_TASK_EXPIRED; + LOG_WARN("task expired", K(ret), "start_scn of current execution", start_scn, "start_scn latest", get_start_scn()); + } + } + + if (OB_SUCC(ret)) { + ObDirectLoadSliceWriter *slice_writer = nullptr; + const int64_t trans_version = is_full_direct_load(direct_load_type_) ? table_key_.get_snapshot_version() : INT64_MAX; + ObBatchSliceWriteInfo info(tablet_id_, ls_id_, trans_version, direct_load_type_, sqc_build_ctx_.build_param_.runtime_only_param_.trans_id_, + sqc_build_ctx_.build_param_.runtime_only_param_.seq_no_); + + if (OB_FAIL(lob_mgr_handle_.get_obj()->get_sqc_build_ctx().slice_mgr_map_.get_refactored(slice_info.slice_id_, slice_writer))) { + LOG_WARN("get refactored failed", K(ret), K(slice_info), K(sqc_build_ctx_.slice_mgr_map_.size())); + } else if (OB_ISNULL(slice_writer) || OB_UNLIKELY(!ATOMIC_LOAD(&(lob_mgr_handle_.get_obj()->is_schema_item_ready_)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(slice_info), K(lob_mgr_handle_.get_obj()->is_schema_item_ready_)); + } else if (OB_FAIL(slice_writer->fill_lob_sstable_slice(lob_mgr_handle_.get_obj()->sqc_build_ctx_.build_param_.runtime_only_param_.table_id_, + allocator, sqc_build_ctx_.allocator_, start_scn, info, + pk_interval, lob_column_idxs, col_types, schema_item_.lob_inrow_threshold_, datum_row))) { + LOG_WARN("fail to fill batch sstable slice", K(ret), K(start_scn), K(tablet_id_), K(pk_interval)); + } + } + if (OB_FAIL(ret) && lob_mgr_handle_.is_valid()) { + // cleanup when failed. + int tmp_ret = OB_SUCCESS; + ObDirectLoadSliceWriter *slice_writer = nullptr; + if (OB_TMP_FAIL(lob_mgr_handle_.get_obj()->get_sqc_build_ctx().slice_mgr_map_.erase_refactored(slice_info.slice_id_, &slice_writer))) { + LOG_ERROR("erase failed", K(ret), K(tmp_ret), K(slice_info)); + } else { + LOG_INFO("erase a slice writer", KP(slice_writer), "slice_id", slice_info.slice_id_, K(sqc_build_ctx_.slice_mgr_map_.size())); + slice_writer->~ObDirectLoadSliceWriter(); + lob_mgr_handle_.get_obj()->get_sqc_build_ctx().slice_writer_allocator_.free(slice_writer); + slice_writer = nullptr; + } + } + return ret; +} + +int ObTabletDirectLoadMgr::wait_notify(const ObDirectLoadSliceWriter *slice_writer, const share::SCN &start_scn) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(slice_writer) || !start_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(slice_writer), K(start_scn)); + } else { + while (OB_SUCC(ret)) { + const SCN tablet_start_scn = get_start_scn(); + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", K(ret)); + } else if (start_scn != tablet_start_scn) { + ret = OB_TASK_EXPIRED; + LOG_WARN("task expired", K(ret), K(start_scn), K(tablet_start_scn)); + } else if (slice_writer->get_row_offset() >= 0) { + // row offset already set + break; + } else { + const int64_t wait_interval_ms = 100L; + ObThreadCondGuard guard(cond_); + if (OB_FAIL(cond_.wait(wait_interval_ms))) { + if (OB_TIMEOUT != ret) { + LOG_WARN("wait thread condition failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + } + } + } + return ret; +} + +int ObTabletDirectLoadMgr::notify_all() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + ObThreadCondGuard guard(cond_); + if (OB_FAIL(cond_.broadcast())) { + LOG_WARN("broadcast thread condition failed", K(ret)); + } + } + return ret; +} + +struct SliceEndkeyCompareFunctor +{ +public: + SliceEndkeyCompareFunctor(const ObStorageDatumUtils &datum_utils) : datum_utils_(datum_utils), ret_code_(OB_SUCCESS) {} + bool operator ()(const ObDirectLoadSliceWriter *left, const ObDirectLoadSliceWriter *right) + { + bool bret = false; + int ret = ret_code_; + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(left) || OB_ISNULL(right)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else if (!left->is_empty() && !right->is_empty()) { + const ObChunkSliceStore *left_slice_store = static_cast(left->get_slice_store()); + const ObChunkSliceStore *right_slice_store = static_cast(right->get_slice_store()); + int cmp_ret = 0; + if (OB_FAIL(left_slice_store->endkey_.compare(right_slice_store->endkey_, datum_utils_, cmp_ret))) { + LOG_WARN("endkey compare failed", K(ret)); + } else { + bret = cmp_ret < 0; + } + } else if (left->is_empty() && right->is_empty()) { + // both empty, compare pointer + bret = left < right; + } else { + // valid formmer, empty latter + bret = !left->is_empty(); + } + ret_code_ = OB_SUCCESS == ret_code_ ? ret : ret_code_; + return bret; + } +public: + const ObStorageDatumUtils &datum_utils_; + int ret_code_; +}; + +int ObTabletDirectLoadMgr::calc_range(const ObStorageSchema *storage_schema, const ObStorageDatumUtils &datum_utils) +{ + int ret = OB_SUCCESS; + ObArray sorted_slices; + sorted_slices.set_attr(ObMemAttr(MTL_ID(), "DL_SortS_tmp")); + if (OB_UNLIKELY(nullptr == storage_schema || !datum_utils.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(storage_schema), K(datum_utils)); + } else if (OB_FAIL(sorted_slices.reserve(sqc_build_ctx_.slice_mgr_map_.size()))) { + LOG_WARN("reserve slice array failed", K(ret), K(sqc_build_ctx_.slice_mgr_map_.size())); + } else { + for (ObTabletDirectLoadBuildCtx::SLICE_MGR_MAP::const_iterator iter = sqc_build_ctx_.slice_mgr_map_.begin(); + OB_SUCC(ret) && iter != sqc_build_ctx_.slice_mgr_map_.end(); ++iter) { + ObDirectLoadSliceWriter *cur_slice = iter->second; + if (OB_ISNULL(cur_slice)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(cur_slice)); + } else if (OB_FAIL(sorted_slices.push_back(cur_slice))) { + LOG_WARN("push back slice failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + SliceEndkeyCompareFunctor cmp(datum_utils); + std::sort(sorted_slices.begin(), sorted_slices.end(), cmp); + ret = cmp.ret_code_; + if (OB_FAIL(ret)) { + LOG_WARN("sort slice failed", K(ret), K(sorted_slices)); + } + } + int64_t offset = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < sorted_slices.count(); ++i) { + sorted_slices.at(i)->set_row_offset(offset); + offset += sorted_slices.at(i)->get_row_count(); + } + } + if (OB_SUCC(ret) && is_data_direct_load(direct_load_type_)) { + bool is_column_store = false; + if (OB_FAIL(ObCODDLUtil::need_column_group_store(*storage_schema, is_column_store))) { + LOG_WARN("fail to check need column group", K(ret)); + } else if (is_column_store) { + if (OB_FAIL(sqc_build_ctx_.sorted_slice_writers_.assign(sorted_slices))) { + LOG_WARN("copy slice array failed", K(ret), K(sorted_slices.count())); + } + } + } + return ret; +} + +int ObTabletDirectLoadMgr::cancel() +{ + int ret = OB_SUCCESS; + for (ObTabletDirectLoadBuildCtx::SLICE_MGR_MAP::const_iterator iter = sqc_build_ctx_.slice_mgr_map_.begin(); + OB_SUCC(ret) && iter != sqc_build_ctx_.slice_mgr_map_.end(); ++iter) { + ObDirectLoadSliceWriter *cur_slice = iter->second; + if (OB_ISNULL(cur_slice)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(cur_slice)); + } else { + cur_slice->cancel(); + } + } + return ret; +} + +int ObTabletDirectLoadMgr::close_sstable_slice( + const bool is_data_tablet_process_for_lob, + const ObDirectLoadSliceInfo &slice_info, + const share::SCN &start_scn, + const int64_t execution_id, + ObInsertMonitor *insert_monitor) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!slice_info.is_valid() || !start_scn.is_valid_and_not_min() || !sqc_build_ctx_.is_valid() || execution_id < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(slice_info), K(start_scn), K(execution_id), K(sqc_build_ctx_)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (is_data_tablet_process_for_lob) { + if (OB_UNLIKELY(!lob_mgr_handle_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(slice_info)); + } else if (OB_FAIL(lob_mgr_handle_.get_obj()->close_sstable_slice( + false, slice_info, start_scn, execution_id))) { + LOG_WARN("close lob sstable slice failed", K(ret), K(slice_info)); + } + } else { + ObDirectLoadSliceWriter *slice_writer = nullptr; + if (OB_FAIL(sqc_build_ctx_.slice_mgr_map_.get_refactored(slice_info.slice_id_, slice_writer))) { + ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; + LOG_WARN("get refactored failed", K(ret), K(slice_info)); + } else if (OB_ISNULL(slice_writer)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(slice_info)); + } else if (OB_FAIL(slice_writer->close())) { + LOG_WARN("close failed", K(ret), K(slice_info)); + } else if (!slice_info.is_lob_slice_ && is_ddl_direct_load(direct_load_type_)) { + int64_t task_finish_count = -1; + { + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + if (start_scn == get_start_scn()) { + task_finish_count = ATOMIC_AAF(&sqc_build_ctx_.task_finish_count_, 1); + } + } + LOG_INFO("inc task finish count", K(tablet_id_), K(execution_id), K(task_finish_count), K(sqc_build_ctx_.task_total_cnt_)); + ObTablet *tablet = nullptr; + ObStorageSchema *storage_schema = nullptr; + ObArenaAllocator arena_allocator("DDL_RESCAN", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + bool is_column_group_store = false; + if (OB_UNLIKELY(!tablet_handle_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle_)); + } else if (OB_ISNULL(tablet = tablet_handle_.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet is null", K(ret), K(ls_id_), K(tablet_id_)); + } else if (OB_FAIL(tablet->load_storage_schema(arena_allocator, storage_schema))) { + LOG_WARN("load storage schema failed", K(ret), K(tablet_id_)); + } else if (OB_FAIL(ObCODDLUtil::need_column_group_store(*storage_schema, is_column_group_store))) { + LOG_WARN("fail to check is column group store", K(ret)); + } else if (!is_column_group_store) { + if (task_finish_count >= sqc_build_ctx_.task_total_cnt_) { + // for ddl, write commit log when all slices ready. + if (OB_FAIL(close(execution_id, start_scn))) { + LOG_WARN("close sstable slice failed", K(ret), K(sqc_build_ctx_.build_param_)); + } + } + } else { + if (task_finish_count < sqc_build_ctx_.task_total_cnt_) { + if (OB_FAIL(wait_notify(slice_writer, start_scn))) { + LOG_WARN("wait notify failed", K(ret)); + } else if (OB_FAIL(slice_writer->fill_column_group(storage_schema, start_scn, insert_monitor))) { + LOG_WARN("slice writer fill column group failed", K(ret)); + } + } else { + if (OB_FAIL(calc_range(storage_schema, tablet->get_rowkey_read_info().get_datum_utils()))) { + LOG_WARN("calc range failed", K(ret)); + } else if (OB_FAIL(notify_all())) { + LOG_WARN("notify all failed", K(ret)); + } else if (OB_FAIL(slice_writer->fill_column_group(storage_schema, start_scn, insert_monitor))) { + LOG_WARN("slice fill column group failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + int64_t fill_cg_finish_count = -1; + { + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + if (start_scn == get_start_scn()) { + fill_cg_finish_count = ATOMIC_AAF(&sqc_build_ctx_.fill_column_group_finish_count_, 1); + } + } + LOG_INFO("inc fill cg finish count", K(tablet_id_), K(execution_id), K(fill_cg_finish_count), K(sqc_build_ctx_.task_total_cnt_)); + if (fill_cg_finish_count >= sqc_build_ctx_.task_total_cnt_) { + // for ddl, write commit log when all slices ready. + if (OB_FAIL(close(execution_id, start_scn))) { + LOG_WARN("close sstable slice failed", K(ret)); + } + } + } + } + ObTabletObjLoadHelper::free(arena_allocator, storage_schema); + } + if (OB_NOT_NULL(slice_writer)) { + if (is_data_direct_load(direct_load_type_) && slice_writer->need_column_store()) { + //ignore, free after rescan + } else { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(sqc_build_ctx_.slice_mgr_map_.erase_refactored(slice_info.slice_id_))) { + LOG_ERROR("erase failed", K(ret), K(tmp_ret), K(slice_info)); + } else { + LOG_INFO("erase a slice writer", KP(slice_writer), K(sqc_build_ctx_.slice_mgr_map_.size())); + slice_writer->~ObDirectLoadSliceWriter(); + sqc_build_ctx_.slice_writer_allocator_.free(slice_writer); + slice_writer = nullptr; + } + ret = OB_SUCC(ret) ? tmp_ret : ret; + } + } + } + return ret; +} + +void ObTabletDirectLoadMgr::calc_cg_idx(const int64_t thread_cnt, const int64_t thread_id, int64_t &strat_idx, int64_t &end_idx) +{ + int ret = OB_SUCCESS; + const int64_t each_thread_task_cnt = sqc_build_ctx_.sorted_slice_writers_.count() / thread_cnt; + const int64_t need_plus_thread_cnt = sqc_build_ctx_.sorted_slice_writers_.count() % thread_cnt; // handle +1 task + const int64_t pre_handle_cnt = need_plus_thread_cnt * (each_thread_task_cnt + 1); + if (need_plus_thread_cnt != 0) { + if (thread_id < need_plus_thread_cnt) { + strat_idx = (each_thread_task_cnt + 1) * thread_id; + end_idx = strat_idx + (each_thread_task_cnt + 1); + } else { + strat_idx = pre_handle_cnt + (thread_id - need_plus_thread_cnt) * each_thread_task_cnt; + end_idx = strat_idx + each_thread_task_cnt; + } + } else { + strat_idx = each_thread_task_cnt * thread_id; + end_idx = strat_idx + each_thread_task_cnt; + } +} + +int ObTabletDirectLoadMgr::fill_column_group(const int64_t thread_cnt, const int64_t thread_id) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(thread_cnt <= 0 || thread_id < 0 || thread_id > thread_cnt - 1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(thread_cnt), K(thread_id)); + } else if (sqc_build_ctx_.sorted_slice_writers_.count() == 0) { + //ignore + } else if (sqc_build_ctx_.sorted_slice_writers_.count() != sqc_build_ctx_.slice_mgr_map_.size()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong slice writer num", K(ret), K(sqc_build_ctx_.sorted_slice_writers_.count()), K(sqc_build_ctx_.slice_mgr_map_.size()), K(common::lbt())); + } else { + int64_t strat_idx = 0; + int64_t last_idx = 0; + calc_cg_idx(thread_cnt, thread_id, strat_idx, last_idx); + LOG_INFO("direct load start fill column group", K(tablet_id_), K(sqc_build_ctx_.sorted_slice_writers_.count()), K(thread_cnt), K(thread_id), K(strat_idx), K(last_idx)); + if (strat_idx < 0 || strat_idx >= sqc_build_ctx_.sorted_slice_writers_.count() || last_idx > sqc_build_ctx_.sorted_slice_writers_.count()) { + //skip + } else { + ObArenaAllocator arena_allocator("DIRECT_RESCAN", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObTablet *tablet = nullptr; + ObStorageSchema *storage_schema = nullptr; + int64_t fill_cg_finish_count = -1; + if (OB_UNLIKELY(!tablet_handle_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle_)); + } else if (OB_ISNULL(tablet = tablet_handle_.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet is null", K(ret), K(ls_id_), K(tablet_id_)); + } else if (OB_FAIL(tablet->load_storage_schema(arena_allocator, storage_schema))) { + LOG_WARN("load storage schema failed", K(ret), K(tablet_id_)); + } else { + for (int64_t i = strat_idx; OB_SUCC(ret) && i < last_idx; ++i) { + ObDirectLoadSliceWriter *slice_writer = sqc_build_ctx_.sorted_slice_writers_.at(i); + if (OB_ISNULL(slice_writer) || !slice_writer->need_column_store()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("wrong slice writer", KPC(slice_writer)); + } else if (OB_FAIL(slice_writer->fill_column_group(storage_schema, get_start_scn()))) { + LOG_WARN("slice writer rescan failed", K(ret), KP(storage_schema), K(get_start_scn())); + } else { + fill_cg_finish_count = ATOMIC_AAF(&sqc_build_ctx_.fill_column_group_finish_count_, 1); + } + } + } + ObTabletObjLoadHelper::free(arena_allocator, storage_schema); //arena cannot free + arena_allocator.reset(); + if (OB_SUCC(ret)) { + if (fill_cg_finish_count == sqc_build_ctx_.sorted_slice_writers_.count()) { + sqc_build_ctx_.sorted_slice_writers_.reset(); + FLOG_INFO("tablet_direct_mgr finish fill column group", K(sqc_build_ctx_.slice_mgr_map_.size()), K(this), K(fill_cg_finish_count)); + if (!sqc_build_ctx_.slice_mgr_map_.empty()) { + DestroySliceWriterMapFn destroy_map_fn(&sqc_build_ctx_.slice_writer_allocator_); + int tmp_ret = sqc_build_ctx_.slice_mgr_map_.foreach_refactored(destroy_map_fn); + if (tmp_ret == OB_SUCCESS) { + sqc_build_ctx_.slice_mgr_map_.destroy(); + } else { + ret = tmp_ret; + } + } + } + } + } + if (OB_SUCC(ret)) { + LOG_INFO("direct load finish fill column group", K(tablet_id_), K(sqc_build_ctx_.sorted_slice_writers_.count()), K(thread_cnt), K(thread_id), K(strat_idx), K(last_idx), + K(sqc_build_ctx_.slice_mgr_map_.size())); + } + } + return ret; +} + +int ObTabletDirectLoadMgr::prepare_index_builder_if_need(const ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + ObWholeDataStoreDesc index_block_desc(true/*is ddl*/); + if (sqc_build_ctx_.index_builder_ != nullptr) { + LOG_INFO("index builder is already prepared"); + } else if (OB_FAIL(index_block_desc.init(table_schema, ls_id_, tablet_id_, + is_full_direct_load(direct_load_type_) ? compaction::ObMergeType::MAJOR_MERGE : compaction::ObMergeType::MINOR_MERGE, + is_full_direct_load(direct_load_type_) ? table_key_.get_snapshot_version() : 1L, + data_format_version_))) { + LOG_WARN("fail to init data desc", K(ret)); + } else { + void *builder_buf = nullptr; + + if (OB_ISNULL(builder_buf = sqc_build_ctx_.allocator_.alloc(sizeof(ObSSTableIndexBuilder)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret)); + } else if (OB_ISNULL(sqc_build_ctx_.index_builder_ = new (builder_buf) ObSSTableIndexBuilder())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to new ObSSTableIndexBuilder", K(ret)); + } else if (OB_FAIL(sqc_build_ctx_.index_builder_->init( + index_block_desc.get_desc(), // index_block_desc is copied in index_builder + nullptr, // macro block flush callback + ObSSTableIndexBuilder::DISABLE))) { + LOG_WARN("failed to init index builder", K(ret), K(index_block_desc)); + } else if (OB_FAIL(sqc_build_ctx_.data_block_desc_.init(table_schema, ls_id_, tablet_id_, + is_full_direct_load(direct_load_type_) ? compaction::ObMergeType::MAJOR_MERGE : compaction::ObMergeType::MINOR_MERGE, + is_full_direct_load(direct_load_type_) ? table_key_.get_snapshot_version() : 1L, + data_format_version_))) { + LOG_WARN("fail to init data block desc", K(ret)); + } else { + sqc_build_ctx_.data_block_desc_.get_desc().sstable_index_builder_ = sqc_build_ctx_.index_builder_; // for build the tail index block in macro block + } + + + if (OB_FAIL(ret)) { + if (nullptr != sqc_build_ctx_.index_builder_) { + sqc_build_ctx_.index_builder_->~ObSSTableIndexBuilder(); + sqc_build_ctx_.index_builder_ = nullptr; + } + if (nullptr != builder_buf) { + sqc_build_ctx_.allocator_.free(builder_buf); + builder_buf = nullptr; + } + sqc_build_ctx_.data_block_desc_.reset(); + } + } + return ret; +} + +int ObTabletDirectLoadMgr::wrlock(const int64_t timeout_us, uint32_t &tid) +{ + int ret = OB_SUCCESS; + const int64_t abs_timeout_us = timeout_us + ObTimeUtility::current_time(); + if (OB_SUCC(lock_.wrlock(ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK, abs_timeout_us))) { + tid = static_cast(GETTID()); + } + if (OB_TIMEOUT == ret) { + ret = OB_EAGAIN; + } + return ret; +} + +void ObTabletDirectLoadMgr::unlock(const uint32_t tid) +{ + if (OB_SUCCESS != lock_.unlock(&tid)) { + ob_abort(); + } +} + + +ObTabletFullDirectLoadMgr::ObTabletFullDirectLoadMgr() + : ObTabletDirectLoadMgr(), start_scn_(share::SCN::min_scn()), + commit_scn_(share::SCN::min_scn()), execution_id_(-1) +{ +} + +ObTabletFullDirectLoadMgr::~ObTabletFullDirectLoadMgr() +{ +} + +int ObTabletFullDirectLoadMgr::update( + ObTabletDirectLoadMgr *lob_tablet_mgr, + const ObTabletDirectLoadInsertParam &build_param) +{ + int ret = OB_SUCCESS; + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObStorageSchema *storage_schema = nullptr; + ObArenaAllocator arena_allocator("dl_mgr_update", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + if (OB_UNLIKELY(!build_param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(build_param)); + } else if (OB_ISNULL(ls_service = MTL(ObLSService *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(ls_service->get_ls(build_param.common_param_.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(build_param)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + build_param.common_param_.tablet_id_, + tablet_handle_, + ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(build_param)); + } else if (OB_FAIL(tablet_handle_.get_obj()->load_storage_schema(arena_allocator, storage_schema))) { + LOG_WARN("load storage schema failed", K(ret)); + } else if (nullptr != lob_tablet_mgr) { + // has lob + ObTabletDirectLoadInsertParam lob_param; + ObSchemaGetterGuard schema_guard; + ObTabletBindingMdsUserData ddl_data; + const ObTableSchema *table_schema = nullptr; + if (OB_FAIL(lob_param.assign(build_param))) { + LOG_WARN("assign lob parameter failed", K(ret)); + } else if (OB_FAIL(tablet_handle_.get_obj()->ObITabletMdsInterface::get_ddl_data(share::SCN::max_scn(), ddl_data))) { + LOG_WARN("get ddl data failed", K(ret)); + } else if (OB_FALSE_IT(lob_param.common_param_.tablet_id_ = ddl_data.lob_meta_tablet_id_)) { + } else if (build_param.is_replay_) { + // no need to update table id. + } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( + MTL_ID(), schema_guard, lob_param.runtime_only_param_.schema_version_))) { + LOG_WARN("get tenant schema failed", K(ret), K(MTL_ID()), K(lob_param)); + } else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), + lob_param.runtime_only_param_.table_id_, table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(lob_param)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(lob_param)); + } else { + lob_param.runtime_only_param_.table_id_ = table_schema->get_aux_lob_meta_tid(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(lob_mgr_handle_.set_obj(lob_tablet_mgr))) { + LOG_WARN("set lob direct load mgr failed", K(ret), K(lob_param)); + } else if (OB_FAIL(lob_mgr_handle_.get_obj()->update(nullptr, lob_param))) { + LOG_WARN("init lob failed", K(ret), K(lob_param)); + } else { + LOG_INFO("set lob mgr handle", K(lob_param)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ObTabletDirectLoadMgr::update(nullptr, build_param))) { + LOG_WARN("init failed", K(ret), K(build_param)); + } else { + table_key_.reset(); + table_key_.tablet_id_ = build_param.common_param_.tablet_id_; + bool is_column_group_store = false; + if (OB_FAIL(ObCODDLUtil::need_column_group_store(*storage_schema, is_column_group_store))) { + LOG_WARN("fail to get schema is column group store", K(ret)); + } else if (is_column_group_store) { + table_key_.table_type_ = ObITable::COLUMN_ORIENTED_SSTABLE; + int64_t base_cg_idx = -1; + if (OB_FAIL(ObCODDLUtil::get_base_cg_idx(storage_schema, base_cg_idx))) { + LOG_WARN("get base cg idx failed", K(ret)); + } else { + table_key_.column_group_idx_ = static_cast(base_cg_idx); + } + } else { + table_key_.table_type_ = ObITable::MAJOR_SSTABLE; + } + table_key_.version_range_.snapshot_version_ = build_param.common_param_.read_snapshot_; + } + } + ObTabletObjLoadHelper::free(arena_allocator, storage_schema); + LOG_INFO("init tablet direct load mgr finished", K(ret), K(build_param), KPC(this)); + return ret; +} + +int ObTabletFullDirectLoadMgr::open(const int64_t current_execution_id, share::SCN &start_scn) +{ + int ret = OB_SUCCESS; + uint32_t lock_tid = 0; + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObTabletFullDirectLoadMgr *lob_tablet_mgr = nullptr; + start_scn.reset(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!is_valid() || !sqc_build_ctx_.is_valid() || current_execution_id < 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this), K(current_execution_id)); + } else if (OB_FAIL(wrlock(TRY_LOCK_TIMEOUT, lock_tid))) { + LOG_WARN("failed to wrlock", K(ret), KPC(this)); + } else if (lob_mgr_handle_.is_valid() + && OB_ISNULL(lob_tablet_mgr = lob_mgr_handle_.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (OB_ISNULL(ls_service = MTL(ObLSService*))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls service should not be null", K(ret)); + } else if (OB_FAIL(ls_service->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id_, tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(tablet_id_)); + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet handle is invalid", K(ret), K(tablet_handle)); + } else if (current_execution_id < execution_id_ + || current_execution_id < tablet_handle.get_obj()->get_tablet_meta().ddl_execution_id_) { + ret = OB_TASK_EXPIRED; + LOG_INFO("receive a old execution id, don't do start", K(ret), K(current_execution_id), K(sqc_build_ctx_), + "tablet_meta", tablet_handle.get_obj()->get_tablet_meta()); + } else if (get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()).is_valid_and_not_min()) { + // has already committed. + start_scn = start_scn_; + if (!start_scn.is_valid_and_not_min()) { + start_scn = tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_; + } + if (!start_scn.is_valid_and_not_min()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("start scn must be valid after commit", K(ret), K(start_scn)); + } + } else { + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObDDLKvMgrHandle lob_kv_mgr_handle; + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + if (OB_FAIL(direct_load_mgr_handle.set_obj(this))) { + LOG_WARN("set handle failed", K(ret)); + } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle, true/*try_create*/))) { + LOG_WARN("create ddl kv mgr failed", K(ret)); + } else if (nullptr != lob_tablet_mgr) { + ObTabletHandle lob_tablet_handle; + if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, lob_tablet_mgr->get_tablet_id(), lob_tablet_handle))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), KPC(lob_tablet_mgr)); + } else if (OB_FAIL(lob_tablet_handle.get_obj()->get_ddl_kv_mgr(lob_kv_mgr_handle, true/*try_create*/))) { + LOG_WARN("create ddl kv mgr failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + ObDDLRedoLogWriter redo_writer; + if (OB_FAIL(redo_writer.init(ls_id_, tablet_id_))) { + LOG_WARN("init redo writer failed", K(ret), K(ls_id_), K(tablet_id_)); + } else if (OB_FAIL(redo_writer.write_start_log(table_key_, + current_execution_id, sqc_build_ctx_.build_param_.common_param_.data_format_version_, direct_load_type_, + ddl_kv_mgr_handle, lob_kv_mgr_handle, direct_load_mgr_handle, lock_tid, start_scn))) { + LOG_WARN("fail write start log", K(ret), K(table_key_), K(data_format_version_), K(sqc_build_ctx_)); + } else if (OB_UNLIKELY(!start_scn.is_valid_and_not_min())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(start_scn)); + } else if (OB_FAIL(init_ddl_table_store(start_scn, table_key_.get_snapshot_version(), start_scn))) { + LOG_WARN("clean up ddl sstable failed", K(ret), K(start_scn), K(table_key_)); + } else if (nullptr != lob_tablet_mgr + && OB_FAIL(lob_tablet_mgr->init_ddl_table_store(start_scn, table_key_.get_snapshot_version(), start_scn))) { + LOG_WARN("clean up ddl sstable failed", K(ret), K(start_scn), K(table_key_)); + } + } + } + if (lock_tid != 0) { + unlock(lock_tid); + } + return ret; +} + +int ObTabletFullDirectLoadMgr::close(const int64_t execution_id, const SCN &start_scn) +{ + int ret = OB_SUCCESS; + SCN commit_scn; + bool is_remote_write = false; + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObTabletHandle new_tablet_handle; + ObTabletFullDirectLoadMgr *lob_tablet_mgr = nullptr; + bool sstable_already_created = false; + const uint64_t tenant_id = MTL_ID(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(execution_id < 0 || !start_scn.is_valid_and_not_min())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(execution_id), K(start_scn)); + } else if (lob_mgr_handle_.is_valid() + && OB_ISNULL(lob_tablet_mgr = lob_mgr_handle_.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (OB_ISNULL(ls_service = MTL(ObLSService*))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls service should not be null", K(ret)); + } else if (OB_FAIL(ls_service->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id_, tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(tablet_id_)); + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet handle is invalid", K(ret), K(tablet_handle)); + } else { + uint32_t lock_tid = 0; + ObDDLRedoLogWriter redo_writer; + if (OB_FAIL(wrlock(TRY_LOCK_TIMEOUT, lock_tid))) { + LOG_WARN("failed to wrlock", K(ret), KPC(this)); + } else if (FALSE_IT(sstable_already_created = sqc_build_ctx_.is_task_end_)) { + } else if (sstable_already_created) { + // Why use is_task_end_ rather than commit_scn_. + // sqc may switch to follower, and the commit_scn will not be set. + LOG_INFO("had already closed", K(ret)); + } else if (get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()).is_valid_and_not_min()) { + commit_scn = get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()); + FLOG_INFO("already committed", K(ret), K(commit_scn), "tablet_meta", tablet_handle.get_obj()->get_tablet_meta()); + } else if (OB_FAIL(redo_writer.init(ls_id_, tablet_id_))) { + LOG_WARN("init redo writer failed", K(ret), K(ls_id_), K(tablet_id_)); + } else { + DEBUG_SYNC(AFTER_REMOTE_WRITE_DDL_PREPARE_LOG); + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + if (OB_FAIL(direct_load_mgr_handle.set_obj(this))) { + LOG_WARN("set direct load mgr handle failed", K(ret)); + } else if (OB_FAIL(redo_writer.write_commit_log(true, table_key_, + start_scn, direct_load_mgr_handle, commit_scn, is_remote_write, lock_tid))) { + LOG_WARN("fail write ddl commit log", K(ret), K(table_key_), K(sqc_build_ctx_)); + } + } + if (0 != lock_tid) { + unlock(lock_tid); + } + } + + bool is_delay_build_major = false; +#ifdef ERRSIM + is_delay_build_major = 0 != GCONF.errsim_ddl_major_delay_time; + sqc_build_ctx_.is_task_end_ = is_delay_build_major ? true : sqc_build_ctx_.is_task_end_; // skip report checksum +#endif + if (OB_FAIL(ret) || sstable_already_created) { + } else if (is_remote_write) { + LOG_INFO("ddl commit log is written in remote, need wait replay", K(sqc_build_ctx_), K(start_scn), K(commit_scn)); + } else if (OB_UNLIKELY(!start_scn.is_valid_and_not_min()) || !commit_scn.is_valid_and_not_min()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), KPC(this)); + } else if (OB_FAIL(commit(*tablet_handle.get_obj(), start_scn, commit_scn, + sqc_build_ctx_.build_param_.runtime_only_param_.table_id_, sqc_build_ctx_.build_param_.runtime_only_param_.task_id_))) { + LOG_WARN("failed to do ddl kv commit", K(ret), KPC(this)); + } + + if (OB_FAIL(ret)) { + } else if (sstable_already_created || is_delay_build_major) { + LOG_INFO("sstable had already created, skip waiting for major generated and reporting chksum", K(start_scn), K(commit_scn), + K(sstable_already_created), K(is_delay_build_major)); + } else if (OB_FAIL(schedule_merge_task(start_scn, commit_scn, true/*wait_major_generate*/))) { + LOG_WARN("schedule merge task and wait real major generate", K(ret), + K(is_remote_write), K(sstable_already_created), K(start_scn), K(commit_scn)); + } else if (lob_mgr_handle_.is_valid() && + OB_FAIL(lob_mgr_handle_.get_full_obj()->schedule_merge_task(start_scn, commit_scn, true/*wait_major_generate*/))) { + LOG_WARN("schedule merge task and wait real major generate for lob failed", K(ret), + K(is_remote_write), K(sstable_already_created), K(start_scn), K(commit_scn)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id_, new_tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(tablet_id_)); + } else { + ObSSTableMetaHandle sst_meta_hdl; + ObSSTable *first_major_sstable = nullptr; + ObTabletMemberWrapper table_store_wrapper; + if (OB_FAIL(new_tablet_handle.get_obj()->fetch_table_store(table_store_wrapper))) { + LOG_WARN("fetch table store failed", K(ret)); + } else if (OB_ISNULL(first_major_sstable = static_cast + (table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("no major after wait merge success", K(ret), K(ls_id_), K(tablet_id_)); + } else if (OB_UNLIKELY(first_major_sstable->get_key() != table_key_)) { + ret = OB_SNAPSHOT_DISCARDED; + LOG_WARN("ddl major sstable dropped, snapshot holding may have bug", + K(ret), KPC(first_major_sstable), K(table_key_), K(tablet_id_), K(sqc_build_ctx_.build_param_), K(sqc_build_ctx_.build_param_.runtime_only_param_.task_id_)); + } else if (OB_FAIL(first_major_sstable->get_meta(sst_meta_hdl))) { + LOG_WARN("fail to get sstable meta handle", K(ret)); + } else { + const int64_t *column_checksums = sst_meta_hdl.get_sstable_meta().get_col_checksum(); + int64_t column_count = sst_meta_hdl.get_sstable_meta().get_col_checksum_cnt(); + ObArray co_column_checksums; + co_column_checksums.set_attr(ObMemAttr(MTL_ID(), "TblDL_Ccc")); + if (OB_FAIL(get_co_column_checksums_if_need(tablet_handle, first_major_sstable, co_column_checksums))) { + LOG_WARN("get column checksum from co sstable failed", K(ret)); + } else { + for (int64_t retry_cnt = 10; retry_cnt > 0; retry_cnt--) { // overwrite ret + if (OB_FAIL(ObTabletDDLUtil::report_ddl_checksum( + ls_id_, + tablet_id_, + sqc_build_ctx_.build_param_.runtime_only_param_.table_id_, + execution_id, + sqc_build_ctx_.build_param_.runtime_only_param_.task_id_, + co_column_checksums.empty() ? column_checksums : co_column_checksums.get_data(), + co_column_checksums.empty() ? column_count : co_column_checksums.count()))) { + LOG_WARN("report ddl column checksum failed", K(ret), K(ls_id_), K(tablet_id_), K(execution_id), K(sqc_build_ctx_)); + } else { + break; + } + ob_usleep(100L * 1000L); + } + } + } + + if (OB_SUCC(ret)) { + sqc_build_ctx_.is_task_end_ = true; + } + } + return ret; +} + +int ObTabletFullDirectLoadMgr::start_with_checkpoint( + ObTablet &tablet, + const share::SCN &start_scn, + const uint64_t data_format_version, + const int64_t execution_id, + const share::SCN &checkpoint_scn) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!checkpoint_scn.is_valid_and_not_min())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(checkpoint_scn)); + } else if (OB_UNLIKELY(!table_key_.is_valid())) { + ret = OB_ERR_SYS; + LOG_WARN("the table key not updated", K(ret), KPC(this)); + } else { + ObITable::TableKey table_key = table_key_; + ret = start(tablet, table_key, start_scn, data_format_version, execution_id, checkpoint_scn); + } + return ret; +} + +// For Leader and follower both. +// For replay start log only, migration_create_tablet and online will no call the intrface. +int ObTabletFullDirectLoadMgr::start( + ObTablet &tablet, + const ObITable::TableKey &table_key, + const share::SCN &start_scn, + const uint64_t data_format_version, + const int64_t execution_id, + const share::SCN &checkpoint_scn) +{ + int ret = OB_SUCCESS; + share::SCN saved_start_scn; + int64_t saved_snapshot_version = 0; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObDDLKvMgrHandle lob_kv_mgr_handle; + ddl_kv_mgr_handle.reset(); + lob_kv_mgr_handle.reset(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(table_key != table_key_) + || !start_scn.is_valid_and_not_min() + || execution_id < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(table_key), K(table_key_), K(start_scn), K(execution_id)); + } else if (OB_FAIL(tablet.get_ddl_kv_mgr(ddl_kv_mgr_handle, true/*try_create*/))) { + LOG_WARN("create tablet ddl kv mgr handle failed", K(ret)); + } else if (lob_mgr_handle_.is_valid()) { + ObLSHandle ls_handle; + ObTabletHandle lob_tablet_handle; + if (OB_ISNULL(MTL(ObLSService *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, lob_mgr_handle_.get_obj()->get_tablet_id(), lob_tablet_handle))) { + LOG_WARN("get tablet failed", K(ret)); + } else if (OB_FAIL(lob_tablet_handle.get_obj()->get_ddl_kv_mgr(lob_kv_mgr_handle, true/*try_create*/))) { + LOG_WARN("create tablet ddl kv mgr handle failed", K(ret)); + } + } + + if (OB_SUCC(ret)) { + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls handle failed", K(ret), K(ls_id_)); + } else if (OB_ISNULL(ls_handle.get_ls()) || OB_ISNULL(ls_handle.get_ls()->get_ddl_log_handler())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls or ddl log handler is null", K(ret), KPC(ls_handle.get_ls()), K(ls_id_)); + } else if (OB_FAIL(ls_handle.get_ls()->get_ddl_log_handler()->add_tablet(tablet_id_))) { + LOG_WARN("add tablet id failed", K(ret), K(ls_id_), K(tablet_id_)); + } else if (lob_kv_mgr_handle.is_valid() && OB_FAIL(ls_handle.get_ls()->get_ddl_log_handler()->add_tablet(lob_mgr_handle_.get_obj()->get_tablet_id()))) { + LOG_WARN("add lob tablet id failed", K(ret), "lob_tablet_id", lob_mgr_handle_.get_obj()->get_tablet_id()); + } + } + if (OB_SUCC(ret)) { + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + if (OB_FAIL(start_nolock(table_key, start_scn, data_format_version, execution_id, checkpoint_scn, + ddl_kv_mgr_handle, lob_kv_mgr_handle))) { + LOG_WARN("failed to ddl start", K(ret)); + } else { + // save variables under lock + saved_start_scn = start_scn_; + saved_snapshot_version = table_key_.get_snapshot_version(); + const SCN ddl_commit_scn = get_commit_scn(tablet.get_tablet_meta()); + commit_scn_.atomic_store(ddl_commit_scn); + if (lob_mgr_handle_.is_valid()) { + lob_mgr_handle_.get_full_obj()->set_commit_scn_nolock(ddl_commit_scn); + } + } + } + if (OB_SUCC(ret) && !checkpoint_scn.is_valid_and_not_min()) { + // remove ddl sstable if exists and flush ddl start log ts and snapshot version into tablet meta. + // persist lob meta tablet before data tablet is necessary, to avoid start-loss for lob meta tablet when recovered from checkpoint. + if (lob_mgr_handle_.is_valid() && + OB_FAIL(lob_mgr_handle_.get_full_obj()->init_ddl_table_store(saved_start_scn, saved_snapshot_version, saved_start_scn))) { + LOG_WARN("clean up ddl sstable failed", K(ret)); + } else if (OB_FAIL(init_ddl_table_store(saved_start_scn, saved_snapshot_version, saved_start_scn))) { + LOG_WARN("clean up ddl sstable failed", K(ret), K(tablet_id_)); + } + } + FLOG_INFO("start full direct load mgr finished", K(ret), K(start_scn), K(execution_id), KPC(this)); + return ret; +} + +int ObTabletFullDirectLoadMgr::start_nolock( + const ObITable::TableKey &table_key, + const share::SCN &start_scn, + const uint64_t data_format_version, + const int64_t execution_id, + const SCN &checkpoint_scn, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle) +{ + int ret = OB_SUCCESS; + bool is_brand_new = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(!table_key.is_valid() || !start_scn.is_valid_and_not_min() || data_format_version < 0 || execution_id < 0 + || (checkpoint_scn.is_valid_and_not_min() && checkpoint_scn < start_scn)) || !ddl_kv_mgr_handle.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(table_key), K(start_scn), K(data_format_version), K(execution_id), K(checkpoint_scn), + "kv_mgr_handle is valid", ddl_kv_mgr_handle.is_valid()); + } else if (table_key.get_tablet_id() != tablet_id_ || table_key_ != table_key) { + ret = OB_ERR_SYS; + LOG_WARN("tablet id not same", K(ret), K(table_key), K(table_key_), K(tablet_id_)); + } else { + if (start_scn_.is_valid_and_not_min()) { + if (execution_id >= execution_id_ && start_scn >= start_scn_) { + is_brand_new = true; + LOG_INFO("execution id changed, need cleanup", K(ls_id_), K(tablet_id_), K(execution_id_), K(execution_id), K(start_scn_), K(start_scn)); + } else { + if (!checkpoint_scn.is_valid_and_not_min()) { + // only return error code when not start from checkpoint. + ret = OB_TASK_EXPIRED; + } + LOG_INFO("ddl start ignored", K(ls_id_), K(tablet_id_), K(execution_id_), K(execution_id), K(start_scn_), K(start_scn), K(checkpoint_scn)); + } + } else { + is_brand_new = true; + FLOG_INFO("ddl start brand new", K(table_key), K(start_scn), K(execution_id), KPC(this)); + } + if (OB_SUCC(ret) && is_brand_new) { + if (OB_FAIL(cleanup_unlock())) { + LOG_WARN("cleanup unlock failed", K(ret)); + } else { + table_key_ = table_key; + data_format_version_ = data_format_version; + execution_id_ = execution_id; + start_scn_.atomic_store(start_scn); + ddl_kv_mgr_handle.get_obj()->set_max_freeze_scn(SCN::max(start_scn, checkpoint_scn)); + sqc_build_ctx_.reset_slice_ctx_on_demand(); + } + } + } + if (OB_SUCC(ret) && lob_mgr_handle_.is_valid()) { + // For lob meta tablet recover from checkpoint, execute start itself to avoid the data loss when, + // 1. lob meta tablet recover from checkpoint; + // 2. replay some data redo log on lob meta tablet. + // 3. data tablet recover from checkpoint, and cleanup will be triggered if lob meta tablet + // execute start again. + ObDDLKvMgrHandle unused_kv_mgr_handle; + ObITable::TableKey lob_table_key; + lob_table_key.tablet_id_ = lob_mgr_handle_.get_full_obj()->get_tablet_id(); + lob_table_key.table_type_ = ObITable::TableType::MAJOR_SSTABLE; // lob tablet not support column group store + lob_table_key.version_range_ = table_key.version_range_; + if (OB_FAIL(lob_mgr_handle_.get_full_obj()->start_nolock(lob_table_key, start_scn, data_format_version, execution_id, checkpoint_scn, + lob_kv_mgr_handle, unused_kv_mgr_handle))) { + LOG_WARN("start nolock for lob meta tablet failed", K(ret)); + } + } + FLOG_INFO("start_nolock full direct load mgr finished", K(ret), K(start_scn), K(execution_id), KPC(this)); + return ret; +} + +int ObTabletFullDirectLoadMgr::commit( + ObTablet &tablet, + const share::SCN &start_scn, + const share::SCN &commit_scn, + const uint64_t table_id, + const int64_t ddl_task_id) +{ + int ret = OB_SUCCESS; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (!is_started()) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("ddl not started", K(ret), KPC(this)); + } else if (start_scn < get_start_scn()) { + ret = OB_TASK_EXPIRED; + LOG_INFO("skip ddl commit log", K(start_scn), K(*this)); + } else if (OB_FAIL(set_commit_scn(commit_scn))) { + LOG_WARN("failed to set commit scn", K(ret)); + } else if (OB_FAIL(tablet.get_ddl_kv_mgr(ddl_kv_mgr_handle))) { + LOG_WARN("create ddl kv mgr failed", K(ret)); + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->freeze_ddl_kv( + start_scn, table_key_.get_snapshot_version(), data_format_version_, commit_scn))) { + LOG_WARN("failed to start prepare", K(ret), K(tablet_id_), K(commit_scn)); + } else { + ret = OB_EAGAIN; + while (OB_EAGAIN == ret) { + if (OB_FAIL(update_major_sstable())) { + LOG_WARN("update ddl major sstable failed", K(ret), K(tablet_id_), K(start_scn), K(commit_scn)); + } + if (OB_EAGAIN == ret) { + usleep(1000L); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(schedule_merge_task(start_scn, commit_scn, false/*wait_major_generate*/))) { + LOG_WARN("schedule major merge task failed", K(ret)); + } + } + } + if (OB_SUCC(ret) && lob_mgr_handle_.is_valid()) { + const ObLSID &ls_id = lob_mgr_handle_.get_full_obj()->get_ls_id(); + const ObTabletID &lob_tablet_id = lob_mgr_handle_.get_full_obj()->get_tablet_id(); + ObLSHandle ls_handle; + ObLS *ls = nullptr; + ObTabletHandle lob_tablet_handle; + if (OB_ISNULL(MTL(ObLSService *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls should not be null", K(ret)); + } else if (OB_FAIL(ls->get_tablet(lob_tablet_id, lob_tablet_handle, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US, ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id), K(lob_tablet_id)); + } else if (OB_FAIL(lob_mgr_handle_.get_full_obj()->commit(*lob_tablet_handle.get_obj(), start_scn, commit_scn))) { + LOG_WARN("commit for lob failed", K(ret), K(start_scn), K(commit_scn)); + } + } + return ret; +} + +int ObTabletFullDirectLoadMgr::schedule_merge_task(const share::SCN &start_scn, const share::SCN &commit_scn, const bool wait_major_generated) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(!start_scn.is_valid_and_not_min() || !commit_scn.is_valid_and_not_min())) { + ret = OB_ERR_SYS; + LOG_WARN("unknown start scn or commit snc", K(ret), K(start_scn), K(commit_scn)); + } else { + const int64_t wait_start_ts = ObTimeUtility::fast_current_time(); + while (OB_SUCC(ret)) { + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", K(ret)); + } else { + ObDDLTableMergeDagParam param; + param.direct_load_type_ = direct_load_type_; + param.ls_id_ = ls_id_; + param.tablet_id_ = tablet_id_; + param.rec_scn_ = commit_scn; + param.is_commit_ = true; + param.start_scn_ = start_scn; + param.data_format_version_ = data_format_version_; + param.snapshot_version_ = table_key_.get_snapshot_version(); + if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { + if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { + LOG_WARN("schedule ddl merge dag failed", K(ret), K(param)); + } else { + ret = OB_SUCCESS; + } + } else if (!wait_major_generated) { + // schedule successfully and no need to wait physical major generates. + break; + } + } + if (OB_SUCC(ret)) { + const ObSSTable *first_major_sstable = nullptr; + ObTabletMemberWrapper table_store_wrapper; + if (OB_FAIL(ObTabletDDLUtil::check_and_get_major_sstable(ls_id_, tablet_id_, first_major_sstable, table_store_wrapper))) { + LOG_WARN("check if major sstable exist failed", K(ret)); + } else if (nullptr != first_major_sstable) { + FLOG_INFO("major has already existed", KPC(this)); + break; + } + } + if (REACH_TIME_INTERVAL(10L * 1000L * 1000L)) { + LOG_INFO("wait build ddl sstable", K(ret), K(ls_id_), K(tablet_id_), K(start_scn), K(commit_scn), + "wait_elpased_s", (ObTimeUtility::fast_current_time() - wait_start_ts) / 1000000L); + } + } + } + return ret; +} + +void ObTabletFullDirectLoadMgr::set_commit_scn_nolock(const share::SCN &scn) +{ + commit_scn_.atomic_store(scn); + if (lob_mgr_handle_.is_valid()) { + lob_mgr_handle_.get_full_obj()->set_commit_scn_nolock(scn); + } +} + +int ObTabletFullDirectLoadMgr::set_commit_scn(const share::SCN &commit_scn) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(!commit_scn.is_valid_and_not_min())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(commit_scn)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, + tablet_handle, + ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US, + ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); + } else { + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + const share::SCN old_commit_scn = get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()); + if (old_commit_scn.is_valid_and_not_min() && old_commit_scn != commit_scn) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("already committed by others", K(ret), K(commit_scn), KPC(this)); + } else { + commit_scn_.atomic_store(commit_scn); + } + } + return ret; +} + +share::SCN ObTabletFullDirectLoadMgr::get_commit_scn(const ObTabletMeta &tablet_meta) +{ + share::SCN mgr_commit_scn = commit_scn_.atomic_load(); + share::SCN commit_scn = share::SCN::min_scn(); + if (tablet_meta.ddl_commit_scn_.is_valid_and_not_min() || mgr_commit_scn.is_valid_and_not_min()) { + if (tablet_meta.ddl_commit_scn_.is_valid_and_not_min()) { + commit_scn = tablet_meta.ddl_commit_scn_; + } else { + commit_scn = mgr_commit_scn; + } + } else { + commit_scn = share::SCN::min_scn(); + } + return commit_scn; +} + +share::SCN ObTabletFullDirectLoadMgr::get_start_scn() +{ + return start_scn_.atomic_load(); +} + +int ObTabletFullDirectLoadMgr::can_schedule_major_compaction_nolock( + const ObTablet &tablet, + bool &can_schedule) +{ + int ret = OB_SUCCESS; + can_schedule = false; + share::SCN commit_scn; + const ObTabletMeta &tablet_meta = tablet.get_tablet_meta(); + ObTabletMemberWrapper table_store_wrapper; + + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(tablet.fetch_table_store(table_store_wrapper))) { + LOG_WARN("fetch table store failed", K(ret)); + } else if (nullptr != table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)) { + // major sstable has already existed. + } else { + can_schedule = get_commit_scn(tablet_meta).is_valid_and_not_min() ? true : false; + } + return ret; +} + +int ObTabletFullDirectLoadMgr::prepare_ddl_merge_param( + const ObTablet &tablet, + ObDDLTableMergeDagParam &merge_param) +{ + int ret = OB_SUCCESS; + bool can_schedule = false; + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + if (OB_FAIL(can_schedule_major_compaction_nolock(tablet, can_schedule))) { + LOG_WARN("check can schedule major compaction failed", K(ret)); + } else if (can_schedule) { + merge_param.direct_load_type_ = direct_load_type_; + merge_param.ls_id_ = ls_id_; + merge_param.tablet_id_ = tablet_id_; + merge_param.rec_scn_ = get_commit_scn(tablet.get_tablet_meta()); + merge_param.is_commit_ = true; + merge_param.start_scn_ = start_scn_; + merge_param.data_format_version_ = data_format_version_; + merge_param.snapshot_version_ = table_key_.get_snapshot_version(); + } else { + merge_param.direct_load_type_ = direct_load_type_; + merge_param.ls_id_ = ls_id_; + merge_param.tablet_id_ = tablet_id_; + merge_param.start_scn_ = start_scn_; + merge_param.data_format_version_ = data_format_version_; + merge_param.snapshot_version_ = table_key_.get_snapshot_version(); + } + return ret; +} + +int ObTabletFullDirectLoadMgr::prepare_major_merge_param( + ObTabletDDLParam ¶m) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (!is_started()) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("ddl not started", K(ret)); + } else { + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + param.direct_load_type_ = direct_load_type_; + param.ls_id_ = ls_id_; + param.table_key_ = table_key_; + param.start_scn_ = start_scn_; + param.commit_scn_ = commit_scn_; + param.snapshot_version_ = table_key_.get_snapshot_version(); + param.data_format_version_ = data_format_version_; + } + return ret; +} + +int ObTabletFullDirectLoadMgr::cleanup_unlock() +{ + int ret = OB_SUCCESS; + LOG_INFO("cleanup expired sstables", K(*this)); + ObLS *ls = nullptr; + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObDDLKvMgrHandle ddl_kv_mgr_handle; + if (OB_ISNULL(ls_service = MTL(ObLSService*))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls service should not be null", K(ret)); + } else if (OB_FAIL(ls_service->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("get ls failed", K(ret), K(ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id_, tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(tablet_id_)); + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("need replay but tablet handle is invalid", K(ret), K(tablet_handle)); + } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle))) { + LOG_WARN("create ddl kv mgr failed", K(ret)); + } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->cleanup())) { + LOG_WARN("cleanup failed", K(ret)); + } else { + table_key_.reset(); + data_format_version_ = 0; + start_scn_.atomic_store(share::SCN::min_scn()); + commit_scn_.atomic_store(share::SCN::min_scn()); + execution_id_ = -1; + } + return ret; +} + +int ObTabletFullDirectLoadMgr::init_ddl_table_store( + const share::SCN &start_scn, + const int64_t snapshot_version, + const share::SCN &ddl_checkpoint_scn) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + ObArenaAllocator tmp_arena("DDLUpdateTblTmp", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStorageSchema *storage_schema = nullptr; + bool is_column_group_store = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!start_scn.is_valid_and_not_min() || snapshot_version <= 0 || !ddl_checkpoint_scn.is_valid_and_not_min())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(start_scn), K(snapshot_version), K(ddl_checkpoint_scn)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + tablet_id_, + tablet_handle, + ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); + } else if (OB_FAIL(tablet_handle.get_obj()->load_storage_schema(tmp_arena, storage_schema))) { + LOG_WARN("failed to load storage schema", K(ret), K(tablet_handle)); + } else if (OB_FAIL(ObCODDLUtil::need_column_group_store(*storage_schema, is_column_group_store))) { + LOG_WARN("fail to check schema is column group store", K(ret)); + } + else { + ObTableHandleV2 table_handle; // empty + const int64_t rebuild_seq = ls_handle.get_ls()->get_rebuild_seq(); + ObTableHandleV2 sstable_handle; + ObTabletHandle new_tablet_handle; + ObTablesHandleArray empty_cg_sstable_handles; + ObArray empty_meta_array; + empty_meta_array.set_attr(ObMemAttr(MTL_ID(), "TblFDL_EMA")); + + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); + ObTabletDDLParam ddl_param; + ddl_param.direct_load_type_ = direct_load_type_; + ddl_param.ls_id_ = ls_id_; + ddl_param.table_key_ = table_key_; + ddl_param.start_scn_ = start_scn; + ddl_param.commit_scn_ = commit_scn_; + ddl_param.snapshot_version_ = table_key_.get_snapshot_version(); + ddl_param.data_format_version_ = data_format_version_; + ddl_param.table_key_.table_type_ = is_column_group_store ? ObITable::DDL_MERGE_CO_SSTABLE : ObITable::DDL_DUMP_SSTABLE; + ddl_param.table_key_.scn_range_.start_scn_ = SCN::scn_dec(start_scn); + ddl_param.table_key_.scn_range_.end_scn_ = start_scn; + + ObUpdateTableStoreParam param(tablet_handle.get_obj()->get_snapshot_version(), + ObVersionRange::MIN_VERSION, // multi_version_start + storage_schema, + rebuild_seq); + param.ddl_info_.keep_old_ddl_sstable_ = false; + param.ddl_info_.ddl_start_scn_ = start_scn; + param.ddl_info_.ddl_snapshot_version_ = snapshot_version; + param.ddl_info_.ddl_checkpoint_scn_ = ddl_checkpoint_scn; + param.ddl_info_.ddl_execution_id_ = execution_id_; + param.ddl_info_.data_format_version_ = data_format_version_; + if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(*tablet_handle.get_obj(), ddl_param, empty_meta_array, nullptr/*first_ddl_sstable*/, + tmp_arena, sstable_handle))) { + LOG_WARN("create empty ddl sstable failed", K(ret)); + } else if (ddl_param.table_key_.is_co_sstable()) { + // add empty cg sstables + ObCOSSTableV2 *co_sstable = static_cast(sstable_handle.get_table()); + const ObIArray &cg_schemas = storage_schema->get_column_groups(); + ObTabletDDLParam cg_ddl_param = ddl_param; + cg_ddl_param.table_key_.table_type_ = ObITable::TableType::DDL_MERGE_CG_SSTABLE; + for (int64_t i = 0; OB_SUCC(ret) && i < cg_schemas.count(); ++i) { + ObTableHandleV2 cur_handle; + cg_ddl_param.table_key_.column_group_idx_ = static_cast(i); + if (table_key_.get_column_group_id() == i) { + // skip base cg idx + } else if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(*tablet_handle.get_obj(), cg_ddl_param, empty_meta_array, nullptr/*first_ddl_sstable*/, tmp_arena, cur_handle))) { + LOG_WARN("create empty cg sstable failed", K(ret), K(i), K(cg_ddl_param)); + } else if (OB_FAIL(empty_cg_sstable_handles.add_table(cur_handle))) { + LOG_WARN("add table handle failed", K(ret), K(i), K(cur_handle)); + } + } + if (OB_SUCC(ret)) { + ObArray cg_sstables; + cg_sstables.set_attr(ObMemAttr(MTL_ID(), "TblFDL_CGS")); + if (OB_FAIL(empty_cg_sstable_handles.get_tables(cg_sstables))) { + LOG_WARN("get cg sstables failed", K(ret)); + } else if (OB_FAIL(co_sstable->fill_cg_sstables(cg_sstables))) { + LOG_WARN("fill empty cg sstables failed", K(ret)); + } else { + LOG_DEBUG("fill co sstable with empty cg sstables success", K(ret), K(ddl_param), KPC(co_sstable)); + } + } + } + bool is_column_group_store = false; + if (OB_FAIL(ret)) { + } else if (FALSE_IT(param.sstable_ = static_cast(sstable_handle.get_table()))) { + } else if (OB_FAIL(ls_handle.get_ls()->update_tablet_table_store(tablet_id_, param, new_tablet_handle))) { + LOG_WARN("failed to update tablet table store", K(ret), K(ls_id_), K(tablet_id_), K(param)); + } else if (OB_FAIL(ObCODDLUtil::need_column_group_store(*storage_schema, is_column_group_store))) { + LOG_WARN("failed to check storage schema is column group store", K(ret)); + } else { + LOG_INFO("update tablet success", K(ls_id_), K(tablet_id_), + "is_column_store", is_column_group_store, K(ddl_param), + "column_group_schemas", storage_schema->get_column_groups(), + "update_table_store_param", param, K(start_scn), K(snapshot_version), K(ddl_checkpoint_scn)); + } + } + ObTabletObjLoadHelper::free(tmp_arena, storage_schema); + return ret; +} + +int ObTabletFullDirectLoadMgr::update_major_sstable() +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, + tablet_id_, + tablet_handle, + ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); + } else { + SCN ddl_commit_scn = get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()); + if (OB_ISNULL(ls_handle.get_ls()->get_tablet_svr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls tablet service is null", K(ret), K(ls_id_)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet_svr()->update_tablet_ddl_commit_scn(tablet_id_, ddl_commit_scn))) { + LOG_WARN("update ddl commit scn failed", K(ret), K(ls_id_), K(tablet_id_), K(ddl_commit_scn)); + } + } + return ret; +} diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h new file mode 100644 index 000000000..1211f2d41 --- /dev/null +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h @@ -0,0 +1,491 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_DDL_OB_DIRECT_INSERT_SSTABLE_CTX_NEW_H +#define OCEANBASE_STORAGE_DDL_OB_DIRECT_INSERT_SSTABLE_CTX_NEW_H + +#include "storage/meta_mem/ob_tablet_handle.h" +#include "lib/lock/ob_mutex.h" +#include "lib/hash/ob_hashmap.h" +#include "lib/lock/ob_bucket_lock.h" +#include "common/ob_tablet_id.h" +#include "common/row/ob_row_iterator.h" +#include "share/stat/ob_opt_column_stat.h" +#include "share/scn.h" +#include "storage/ob_i_table.h" +#include "storage/ob_row_reshape.h" +#include "storage/blocksstable/ob_imacro_block_flush_callback.h" +#include "storage/blocksstable/ob_macro_block_struct.h" +#include "storage/blocksstable/ob_macro_block_writer.h" +#include "storage/ddl/ob_direct_load_struct.h" +#include "storage/meta_mem/ob_tablet_pointer.h" +#include "storage/tablet/ob_tablet_meta.h" + +namespace oceanbase +{ +namespace sql +{ +class ObPxMultiPartSSTableInsertOp; +class ObExecContext; +class ObDDLCtrl; +} + +namespace blocksstable +{ +class ObIMacroBlockFlushCallback; +class ObMacroBlockWriter; +} + +namespace share +{ +struct ObTabletCacheInterval; +} + +namespace storage +{ +class ObTablet; +class ObLobMetaRowIterator; +class ObTabletDirectLoadMgrHandle; +class ObTabletDirectLoadMgr; +class ObTabletFullDirectLoadMgr; +class ObTabletIncDirectLoadMgr; +struct ObInsertMonitor; + +class ObTenantDirectLoadMgr final +{ +public: + ObTenantDirectLoadMgr(); + ~ObTenantDirectLoadMgr(); + void destroy(); + static int mtl_init( + ObTenantDirectLoadMgr *&tenant_direct_load_mgr); + int init(); + + int alloc_execution_context_id(int64_t &context_id); + + // create tablet direct lob manager for data tablet, and + // create lob meta tablet manager inner on need. + // Actually, + // 1. lob meta direct load mgr will be created when creating data tablet direct load mgr. + // 2. lob meta direct load mgr will be created by itself when it is recovered from checkpoint. + // @param [in] param, to init or update tablet direct load mgr. + // @param [in] checkpoint_scn, to decide when to create the lob meta tablet direct load mgr. + int create_tablet_direct_load( + const int64_t context_id, + const int64_t execution_id, + const ObTabletDirectLoadInsertParam ¶m, + const share::SCN checkpoint_scn = share::SCN::min_scn()); + + // to start the direct load, write start log in actually. + // @param [in] is_full_direct_load. + // @param [in] ls_id. + // @param [in] tablet_id, the commit version for the full direct load, + int open_tablet_direct_load( + const bool is_full_direct_load, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const int64_t context_id, + share::SCN &start_scn, + ObTabletDirectLoadMgrHandle &handle); + + // create sstable slice writer for direct load. + // @param [in] slice_info.is_full_direct_load_. + // @param [in] slice_info.is_lob_tablet_slice_, to decide create slice writer for data tablet or lob meta tablet. + // @param [in] slice_info.tablet_id_, is always the data tablet id rather than lob meta tablet id. + // @param [in] start_seq, start sequence of macro block, decide the logical id. + // @param [out] slice_info.slice_id, to identify the created slice writer. + int open_sstable_slice( + const blocksstable::ObMacroDataSeq &start_seq, + ObDirectLoadSliceInfo &slice_info); + + // fill data row into macro block directly for data tablet. + int fill_sstable_slice( + const ObDirectLoadSliceInfo &slice_info, + ObIStoreRowIterator *iter, + int64_t &affected_rows, + ObInsertMonitor *insert_monitor = NULL); + + // fill lob meta data into macro block directly. + // @param [in] slice_info, contains is_full_direct_load, data_tablet_id, lob slice id. + // @param [in] cs_type, collation type of the lob column. + // @param [in] lob_id. + // @param [out] datum, to fill the lob column in the data row. + int fill_lob_sstable_slice( + ObIAllocator &allocator, + const ObDirectLoadSliceInfo &slice_info /*contains data_tablet_id, lob_slice_id, start_seq*/, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + blocksstable::ObDatumRow &datum_row); + // flush macro block, close and destroy slice writer. + int close_sstable_slice(const ObDirectLoadSliceInfo &slice_info, ObInsertMonitor *insert_monitor = NULL); + + // end direct load due to commit or abort. + // @param [in] is_full_direct_load. + // @param [in] tablet_id. + // @param [in] need_commit, to decide whether to create sstable. + // need_commit = true when commit, and need_commit = false when abort. + // @param [in] emergent_finish, to decide whether to create sstable immediately or later(batch create). + // @param [in] task_id, table_id, execution_id, for ddl report checksum. + int close_tablet_direct_load( + const int64_t context_id, + const bool is_full_direct_load, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const bool need_commit, + const bool emergent_finish = true, + const int64_t task_id = 0, + const int64_t table_id = common::OB_INVALID_ID, + const int64_t execution_id = -1); + + // some utils functions below. + // to get online stats result, + // and to avoid empty result, the caller should set need_online_opt_stat_gather_ when create tablet manager. + int get_online_stat_collect_result( + const bool is_full_direct_load, + const ObTabletID &tablet_id, + const ObArray *&column_stat_array); + // fetch hidden pk value, for ddl only. + int get_tablet_cache_interval( + const int64_t context_id, + const ObTabletID &tablet_id, + share::ObTabletCacheInterval &interval); + int get_tablet_mgr( + const ObTabletID &tablet_id, + const bool is_full_direct_load, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle); + int get_tablet_mgr_and_check_major( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const bool is_full_direct_load, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle, + bool &is_major_sstable_exist); + // for direct load rescan + int calc_range( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const bool is_full_direct_load); + int fill_column_group( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const bool is_full_direct_load, + const int64_t thread_cnt, + const int64_t thread_id); + int cancel( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const bool is_full_direct_load); + // remove tablet direct load mgr from hashmap, + // for full direct load, it will be called when physical major generates, + // for incremental direct load, it will be called when all KVs dump. + int remove_tablet_direct_load(const ObTabletDirectLoadMgrKey &mgr_key); + ObIAllocator &get_allocator() { return allocator_; } +private: + int try_create_tablet_direct_load_mgr( + const int64_t context_id, + const int64_t execution_id, + const bool major_sstable_exist, + ObIAllocator &allocator, + const ObTabletDirectLoadMgrKey &mgr_key, + const bool is_lob_tablet, + ObTabletDirectLoadMgrHandle &handle); + int get_tablet_mgr_no_lock( + const ObTabletDirectLoadMgrKey &mgr_key, + ObTabletDirectLoadMgrHandle &direct_load_mgr_handle); + int check_and_process_finished_tablet( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + ObIStoreRowIterator *row_iter = nullptr, + const int64_t task_id = 0, + const int64_t table_id = common::OB_INVALID_ID, + const int64_t execution_id = -1); + int remove_tablet_direct_load_nolock(const ObTabletDirectLoadMgrKey &mgr_key); + // to generate unique slice id for slice writer, putting here is just to + // simplify the logic of the tablet_direct_load_mgr. + int64_t generate_slice_id(); + int64_t generate_context_id(); + +private: + typedef common::hash::ObHashMap< + ObTabletDirectLoadMgrKey, + ObTabletDirectLoadMgr *, + common::hash::NoPthreadDefendMode> TABLET_MGR_MAP; + typedef common::hash::ObHashMap< + ObTabletDirectLoadExecContextId, // context_id + ObTabletDirectLoadExecContext, + common::hash::NoPthreadDefendMode> TABLET_EXEC_CONTEXT_MAP; + bool is_inited_; + common::ObBucketLock bucket_lock_; // to avoid concurrent execution on the TabletDirectLoadMgr. + common::ObConcurrentFIFOAllocator allocator_; + TABLET_MGR_MAP tablet_mgr_map_; + TABLET_EXEC_CONTEXT_MAP tablet_exec_context_map_; + int64_t slice_id_generator_; + int64_t context_id_generator_; +DISALLOW_COPY_AND_ASSIGN(ObTenantDirectLoadMgr); +}; + + +struct ObTabletDirectLoadBuildCtx final +{ +public: + ObTabletDirectLoadBuildCtx(); + ~ObTabletDirectLoadBuildCtx(); + bool is_valid () const; + static uint64_t get_slice_id_hash(const int64_t slice_id) + { + return common::murmurhash(&slice_id, sizeof(slice_id), 0L); + } + void reset_slice_ctx_on_demand(); + TO_STRING_KV(K_(build_param), K_(is_task_end), K_(task_finish_count), K_(task_total_cnt)); +public: + typedef common::hash::ObHashMap< + int64_t, + ObDirectLoadSliceWriter *> SLICE_MGR_MAP; + common::ObConcurrentFIFOAllocator allocator_; + common::ObConcurrentFIFOAllocator slice_writer_allocator_; + ObTabletDirectLoadInsertParam build_param_; + SLICE_MGR_MAP slice_mgr_map_; // key is slice_id, decided by upper caller. + blocksstable::ObWholeDataStoreDesc data_block_desc_; + blocksstable::ObSSTableIndexBuilder *index_builder_; + common::ObArray column_stat_array_; // online column stat result. + common::ObArray sorted_slice_writers_; + bool is_task_end_; // to avoid write commit log/freeze in memory index sstable again. + int64_t task_finish_count_; // reach the parallel slice cnt, means the tablet data finished. + int64_t task_total_cnt_; // parallelism of the PX. + int64_t fill_column_group_finish_count_; +}; + +class ObTabletDirectLoadMgr +{ +public: + ObTabletDirectLoadMgr(); + virtual ~ObTabletDirectLoadMgr(); + virtual bool is_valid(); + virtual int update( + ObTabletDirectLoadMgr *lob_tablet_mgr, + const ObTabletDirectLoadInsertParam &build_param); + virtual int open(const int64_t current_execution_id, share::SCN &start_scn) = 0; // write start log. + virtual int close(const int64_t current_execution_id, const share::SCN &start_scn) = 0; // end tablet. + + virtual int open_sstable_slice( + const bool is_data_tablet_process_for_lob, + const blocksstable::ObMacroDataSeq &start_seq, + const int64_t slice_id); + virtual int fill_sstable_slice( + const ObDirectLoadSliceInfo &slice_info, + const share::SCN &start_scn, + ObIStoreRowIterator *iter, + int64_t &affected_rows, + ObInsertMonitor *insert_monitor = NULL); + virtual int fill_lob_sstable_slice( + ObIAllocator &allocator, + const ObDirectLoadSliceInfo &slice_info /*contains data_tablet_id, lob_slice_id, start_seq*/, + const share::SCN &start_scn, + share::ObTabletCacheInterval &pk_interval, + blocksstable::ObDatumRow &datum_row); + virtual int fill_lob_sstable_slice( + ObIAllocator &allocator, + const ObDirectLoadSliceInfo &slice_info /*contains data_tablet_id, lob_slice_id, start_seq*/, + const share::SCN &start_scn, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + blocksstable::ObDatumRow &datum_row); + virtual int close_sstable_slice( + const bool is_data_tablet_process_for_lob, + const ObDirectLoadSliceInfo &slice_info, + const share::SCN &start_scn, + const int64_t execution_id, + ObInsertMonitor *insert_monitor=NULL); + + // for ref_cnt + void inc_ref() { ATOMIC_INC(&ref_cnt_); } + int cancel(); + int64_t dec_ref() { return ATOMIC_SAF(&ref_cnt_, 1); } + int64_t get_ref() { return ATOMIC_LOAD(&ref_cnt_); } + + // some utils. + virtual share::SCN get_start_scn() = 0; + virtual share::SCN get_commit_scn(const ObTabletMeta &tablet_meta) = 0; + inline const ObITable::TableKey &get_table_key() const { return table_key_; } + inline uint64_t get_data_format_version() const { return data_format_version_; } + inline ObDirectLoadType get_direct_load_type() const { return direct_load_type_; } + inline ObTabletDirectLoadBuildCtx &get_sqc_build_ctx() { return sqc_build_ctx_; } + inline const share::ObLSID &get_ls_id() const { return ls_id_; } + inline const ObTabletID &get_tablet_id() const { return tablet_id_; } + inline ObTabletID get_lob_meta_tablet_id() { + return lob_mgr_handle_.is_valid() ? lob_mgr_handle_.get_obj()->get_tablet_id() : ObTabletID(); + } + inline int64_t get_ddl_task_id() const { return sqc_build_ctx_.build_param_.runtime_only_param_.task_id_; } + // virtual int get_online_stat_collect_result(); + + virtual int wrlock(const int64_t timeout_us, uint32_t &lock_tid); + virtual void unlock(const uint32_t lock_tid); + int prepare_index_builder_if_need(const ObTableSchema &table_schema); + virtual int wait_notify(const ObDirectLoadSliceWriter *slice_writer, const share::SCN &start_scn); + int fill_column_group(const int64_t thread_cnt, const int64_t thread_id); + virtual int notify_all(); + virtual int calc_range(const ObStorageSchema *storage_schema, const blocksstable::ObStorageDatumUtils &datum_utils); + const ObIArray &get_column_info() const { return column_items_; }; + + VIRTUAL_TO_STRING_KV(K_(is_inited), K_(is_schema_item_ready), K_(ls_id), K_(tablet_id), K_(table_key), K_(data_format_version), K_(ref_cnt), + K_(direct_load_type), K_(sqc_build_ctx), KPC(lob_mgr_handle_.get_obj()), K_(schema_item), K_(column_items), K_(lob_column_idxs)); + +private: + int prepare_schema_item_on_demand(const uint64_t table_id); + void calc_cg_idx(const int64_t thread_cnt, const int64_t thread_id, int64_t &strat_idx, int64_t &end_idx); + +// private: + /* +++++ online column stat collect +++++ */ + // virtual int init_sql_statistics_if_needed(); + // int collect_obj(const blocksstable::ObDatumRow &datum_row); + /* +++++ -------------------------- +++++ */ +public: + static const int64_t TRY_LOCK_TIMEOUT = 1 * 1000000; // 1s +protected: + bool is_inited_; + bool is_schema_item_ready_; + share::ObLSID ls_id_; + ObTabletID tablet_id_; + ObITable::TableKey table_key_; + uint64_t data_format_version_; + common::ObLatch lock_; + int64_t ref_cnt_; + ObDirectLoadType direct_load_type_; + // sqc_build_ctx_ is just used for the observer node who receives the requests from the SQL Layer + // to write the start log and the data redo log. And other observer nodes can not use it. + ObTabletDirectLoadBuildCtx sqc_build_ctx_; + // to handle the lob meta tablet, use it before the is_valid judgement. + ObTabletDirectLoadMgrHandle lob_mgr_handle_; + common::ObThreadCond cond_; // for fill column group + // cache ObTableSchema for lob direct load performance + ObArray column_items_; + ObArray lob_column_idxs_; + ObArray lob_col_types_; + ObTabletHandle tablet_handle_; + ObTableSchemaItem schema_item_; +}; + +class ObTabletFullDirectLoadMgr final : public ObTabletDirectLoadMgr +{ +public: + ObTabletFullDirectLoadMgr(); + ~ObTabletFullDirectLoadMgr(); + virtual int update( + ObTabletDirectLoadMgr *lob_tablet_mgr, + const ObTabletDirectLoadInsertParam &build_param); + int open(const int64_t current_execution_id, share::SCN &start_scn) override; // start + int close(const int64_t execution_id, const share::SCN &start_scn) override; // end, including write commit log, wait major sstable generates. + + int start_nolock( + const ObITable::TableKey &table_key, + const share::SCN &start_scn, + const uint64_t data_format_version, + const int64_t execution_id, + const share::SCN &checkpoint_scn, + ObDDLKvMgrHandle &ddl_kv_mgr_handle, + ObDDLKvMgrHandle &lob_kv_mgr_handle); + int start( + ObTablet &tablet, + const ObITable::TableKey &table_key, + const share::SCN &start_scn, + const uint64_t data_format_version, + const int64_t execution_id, + const share::SCN &checkpoint_scn); + int start_with_checkpoint( + ObTablet &tablet, + const share::SCN &start_scn, + const uint64_t data_format_version, + const int64_t execution_id, + const share::SCN &checkpoint_scn); + int commit( + ObTablet &tablet, + const share::SCN &start_scn, + const share::SCN &commit_scn, + const uint64_t table_id = 0, + const int64_t ddl_task_id = 0); // schedule build a major sstable + + void set_commit_scn_nolock(const share::SCN &scn); + int set_commit_scn(const share::SCN &scn); + share::SCN get_start_scn() override; + share::SCN get_commit_scn(const ObTabletMeta &tablet_meta) override; + + // check need schedule major compaction. + int can_schedule_major_compaction_nolock( + const ObTablet &tablet, + bool &can_schedule); + int prepare_ddl_merge_param( + const ObTablet &tablet, + ObDDLTableMergeDagParam &merge_param); + int prepare_major_merge_param(ObTabletDDLParam ¶m); + INHERIT_TO_STRING_KV("ObTabletDirectLoadMgr", ObTabletDirectLoadMgr, K_(start_scn), K_(commit_scn), K_(execution_id)); +private: + bool is_started() { return start_scn_.is_valid_and_not_min(); } + int schedule_merge_task(const share::SCN &start_scn, const share::SCN &commit_scn, const bool wait_major_generated); // try wait build major sstable + int cleanup_unlock(); + int init_ddl_table_store(const share::SCN &start_scn, const int64_t snapshot_version, const share::SCN &ddl_checkpoint_scn); + int update_major_sstable(); + +private: + share::SCN start_scn_; + share::SCN commit_scn_; + int64_t execution_id_; +DISALLOW_COPY_AND_ASSIGN(ObTabletFullDirectLoadMgr); +}; + +class ObTabletIncDirectLoadMgr final : public ObTabletDirectLoadMgr +{ +public: + ObTabletIncDirectLoadMgr() + : ObTabletDirectLoadMgr() + { } + ~ObTabletIncDirectLoadMgr() {} + virtual int update( + ObTabletDirectLoadMgr *lob_tablet_mgr, + const ObTabletDirectLoadInsertParam &build_param) + { + return OB_NOT_IMPLEMENT; + } + int open(const int64_t current_execution_id, share::SCN &start_scn) override + { + // write start log to freeze memtable and ddl kv. + return OB_NOT_IMPLEMENT; + } + int close(const int64_t current_execution_id, const share::SCN &start_scn) override + { + return OB_NOT_IMPLEMENT; + } + + // to freeze in_memory_indexed_sstables and memtables. + int start(ObTablet &tablet, const ObITable::TableKey &table_key, const share::SCN &start_scn, const uint64_t data_format_version, const int64_t execution_id, const share::SCN &checkpoint_scn); + int start_nolock(const ObITable::TableKey &table_key, const share::SCN &start_scn, const uint64_t data_format_version, const int64_t execution_id, const share::SCN &checkpoint_scn); + // return ddl param with end_scn. + // int get_direct_load_merge_param(ObTabletDDLParam &ddl_param) override; + // mini merge, used for compaction. + // int get_direct_load_merge_param(const ObTabletMeta &tablet_meta, ObDDLTableMergeDagParam &merge_param) override; + share::SCN get_start_scn() override { return share::SCN::min_scn(); } + share::SCN get_commit_scn(const ObTabletMeta &tablet_meta) override { return share::SCN::invalid_scn(); } + int freeze_in_memory_indexed_sstable( + const share::SCN &freeze_scn = share::SCN::min_scn()); + +// private: + int wait_memory_index_sstable_freeze(); // check and wait in memory index sstable freeze. +private: +DISALLOW_COPY_AND_ASSIGN(ObTabletIncDirectLoadMgr); +}; + + + +}// namespace storage +}// namespace oceanbase + +#endif//OCEANBASE_STORAGE_OB_DIRECT_INSERT_SSTABLE_CTX_NEW_H diff --git a/src/storage/ddl/ob_direct_load_struct.cpp b/src/storage/ddl/ob_direct_load_struct.cpp new file mode 100644 index 000000000..f4ee15a58 --- /dev/null +++ b/src/storage/ddl/ob_direct_load_struct.cpp @@ -0,0 +1,1217 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "ob_direct_load_struct.h" +#include "share/ob_ddl_checksum.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_ddl_common.h" +#include "share/ob_tablet_autoincrement_service.h" +#include "storage/blocksstable/index_block/ob_index_block_builder.h" +#include "storage/compaction/ob_column_checksum_calculator.h" +#include "storage/compaction/ob_tenant_freeze_info_mgr.h" +#include "sql/engine/pdml/static/ob_px_sstable_insert_op.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" +#include "storage/lob/ob_lob_util.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/das/ob_das_utils.h" +#include "sql/engine/basic/chunk_store/ob_compact_store.h" + +using namespace oceanbase; +using namespace oceanbase::common; +using namespace oceanbase::storage; +using namespace oceanbase::blocksstable; +using namespace oceanbase::share; +using namespace oceanbase::share::schema; +using namespace oceanbase::sql; + +int ObTabletDirectLoadInsertParam::assign(const ObTabletDirectLoadInsertParam &other_param) +{ + int ret = OB_SUCCESS; + if (other_param.common_param_.is_valid()) { + common_param_ = other_param.common_param_; + } + if (other_param.runtime_only_param_.is_valid()) { + runtime_only_param_ = other_param.runtime_only_param_; + } + is_replay_ = other_param.is_replay_; + return ret; +} + +ObDDLInsertRowIterator::ObDDLInsertRowIterator( + sql::ObPxMultiPartSSTableInsertOp *op, + const bool is_slice_empty, const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, + const int64_t rowkey_cnt, const int64_t snapshot_version, const int64_t context_id) + : lob_allocator_(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), op_(op), ls_id_(ls_id), current_tablet_id_(tablet_id), current_row_(), is_next_row_cached_(true), + is_slice_empty_(is_slice_empty), rowkey_count_(rowkey_cnt), snapshot_version_(snapshot_version), lob_slice_id_(0), context_id_(context_id) +{ + lob_id_cache_.set(1/*start*/, 0/*end*/); +} + +ObDDLInsertRowIterator::~ObDDLInsertRowIterator() +{ + +} + +int ObDDLInsertRowIterator::close_lob_sstable_slice() +{ + int ret = OB_SUCCESS; + if (lob_slice_id_ > 0) { + ObDirectLoadSliceInfo slice_info; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = ls_id_; + slice_info.data_tablet_id_ = current_tablet_id_; + slice_info.slice_id_ = lob_slice_id_; + slice_info.context_id_ = context_id_; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_FAIL(tenant_direct_load_mgr->close_sstable_slice(slice_info))) { + LOG_WARN("close sstable slice failed", K(ret), K(slice_info)); + } else { + lob_slice_id_ = 0; + } + } + return ret; +} + +int ObDDLInsertRowIterator::get_next_row( + const blocksstable::ObDatumRow *&row) +{ + int ret = OB_SUCCESS; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_UNLIKELY(nullptr == op_ || snapshot_version_ <= 0 || nullptr == tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("operator is null", K(ret), KP(op_), K(snapshot_version_), KP(tenant_direct_load_mgr), K(MTL_ID())); + } else { + if (is_slice_empty_) { + // without any data in the current slice. + ret = OB_ITER_END; + } else if (OB_UNLIKELY(is_next_row_cached_)) { + is_next_row_cached_ = false; + } else if (OB_FAIL(op_->get_next_row_with_cache())) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row from child failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + op_->clear_evaluated_flag(); + common::ObTabletID row_tablet_id; + lob_allocator_.reuse(); + if (OB_FAIL(op_->get_tablet_id_from_row(op_->get_child()->get_spec().output_, + op_->get_spec().row_desc_.get_part_id_index(), + row_tablet_id))) { + LOG_WARN("get part id failed", K(ret)); + } else if (row_tablet_id != current_tablet_id_) { + // iter the partition end, and switch to next part. + ret = OB_ITER_END; + } else { + const ObExprPtrIArray &exprs = op_->get_spec().ins_ctdef_.new_row_; + const int64_t extra_rowkey_cnt = storage::ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + ObEvalCtx &eval_ctx = op_->get_eval_ctx(); + const int64_t request_cnt = exprs.count() + extra_rowkey_cnt; + if (OB_UNLIKELY((rowkey_count_ > exprs.count()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected rowkey count", K(ret), K(rowkey_count_), K(exprs.count())); + } else if (current_row_.get_column_count() <= 0 + && OB_FAIL(current_row_.init(op_->get_exec_ctx().get_allocator(), request_cnt))) { + LOG_WARN("init datum row failed", K(ret), K(request_cnt)); + } else if (OB_UNLIKELY(current_row_.get_column_count() != request_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(request_cnt), "datum_row_cnt", current_row_.get_column_count()); + } else { + ObArray lob_column_idxs; + ObArray col_types; + lob_column_idxs.set_attr(ObMemAttr(MTL_ID(), "DL_lob_idxs")); + col_types.set_attr(ObMemAttr(MTL_ID(), "DL_col_types")); + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); i++) { + int64_t storage_index = 0; // real storage index of the column. + ObDatum *datum = NULL; + const ObExpr *e = exprs.at(i); + if (OB_ISNULL(e)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is NULL", K(ret), K(i)); + } else if (OB_FAIL(e->eval(eval_ctx, datum))) { + LOG_WARN("evaluate expression failed", K(ret), K(i), KPC(e)); + } else if (i < rowkey_count_) { + storage_index = i; + current_row_.storage_datums_[storage_index].shallow_copy_from_datum(*datum); + } else { + storage_index = i + extra_rowkey_cnt; + current_row_.storage_datums_[storage_index].shallow_copy_from_datum(*datum); + } + + if (OB_FAIL(ret)) { + } else if (exprs.at(i)->obj_meta_.is_lob_storage() + && !current_row_.storage_datums_[storage_index].is_nop() + && !current_row_.storage_datums_[storage_index].is_null()) { + if (OB_FAIL(lob_column_idxs.push_back(storage_index))) { + LOG_WARN("fail to push back storage_index", K(ret), K(storage_index)); + } else if (OB_FAIL(col_types.push_back(exprs.at(i)->obj_meta_))) { + LOG_WARN("fail to push back col_type", K(ret), K(exprs.at(i)->obj_meta_)); + } + } + } + + if (OB_SUCC(ret) && lob_column_idxs.count() > 0) { + //handle lob + if (lob_id_cache_.remain_count() < lob_column_idxs.count()) { + if (OB_FAIL(switch_to_new_lob_slice())) { + LOG_WARN("switch to new lob slice failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + ObDirectLoadSliceInfo slice_info; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = ls_id_; + slice_info.data_tablet_id_ = current_tablet_id_; + slice_info.slice_id_ = lob_slice_id_; + slice_info.context_id_ = context_id_; + if (OB_FAIL(tenant_direct_load_mgr->fill_lob_sstable_slice(lob_allocator_, slice_info, + lob_id_cache_, lob_column_idxs, col_types, current_row_))) { + LOG_WARN("fill batch lob sstable slice failed", K(ret), K(slice_info), K(current_row_)); + } + } + } + + if (OB_SUCC(ret)) { + // add extra rowkey + current_row_.storage_datums_[rowkey_count_].set_int(-snapshot_version_); + current_row_.storage_datums_[rowkey_count_ + 1].set_int(0); + } + } + } + } + if (OB_ITER_END == ret) { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(close_lob_sstable_slice())) { + LOG_WARN("fail to close lob sstable slice", K(tmp_ret)); + } else { + ret = tmp_ret == OB_SUCCESS ? OB_ITER_END : tmp_ret; + } + } + } + if (OB_SUCC(ret)) { + current_row_.row_flag_.set_flag(ObDmlFlag::DF_INSERT); + row = ¤t_row_; + } + return ret; +} + + +// close old lob sstable slice, request new lob id cache interval, +// and construct new sstable slice writer. +int ObDDLInsertRowIterator::switch_to_new_lob_slice() +{ + int ret = OB_SUCCESS; + // slice info to close. + ObDirectLoadSliceInfo slice_info; + uint64_t lob_id = 0; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = ls_id_; + slice_info.data_tablet_id_ = current_tablet_id_; + slice_info.slice_id_ = lob_slice_id_; + slice_info.context_id_ = context_id_; + ObMacroDataSeq block_start_seq; + ObTabletAutoincrementService &auto_inc = ObTabletAutoincrementService::get_instance(); + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + direct_load_mgr_handle.reset(); + if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr(current_tablet_id_, + true /*is_full_direct_load*/, direct_load_mgr_handle))) { + LOG_WARN("get tablet direct load mgr failed", K(ret), K(current_tablet_id_)); + } else if (OB_FALSE_IT(lob_id_cache_.tablet_id_ = + direct_load_mgr_handle.get_obj()->get_lob_meta_tablet_id())) { + // fetch cache via lob meta tablet id. + } else if (OB_FALSE_IT(lob_id_cache_.cache_size_ = AUTO_INC_CACHE_SIZE)) { + } else if (lob_slice_id_ > 0 && + OB_FAIL(tenant_direct_load_mgr->close_sstable_slice(slice_info))) { + LOG_WARN("close old lob slice failed", K(ret), K(slice_info)); + } else if (OB_FAIL(auto_inc.get_tablet_cache_interval(MTL_ID(), lob_id_cache_))) { + LOG_WARN("get_autoinc_seq fail", K(ret), K(MTL_ID()), K(slice_info)); + } else if (OB_UNLIKELY(AUTO_INC_CACHE_SIZE > lob_id_cache_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected autoincrement value count", K(ret), K(lob_id_cache_)); + } else if (OB_FAIL(lob_id_cache_.get_value(lob_id))) { + LOG_WARN("get value failed", K(ret), K(lob_id)); + } else if (OB_FAIL(block_start_seq.set_parallel_degree(lob_id / AUTO_INC_CACHE_SIZE))) { + LOG_WARN("set parall degree failed", K(ret), K(lob_id)); + } else { + // new slice info to open. + slice_info.slice_id_ = 0; + if (OB_FAIL(tenant_direct_load_mgr->open_sstable_slice(block_start_seq, slice_info))) { + LOG_WARN("open lob sstable slice failed", KR(ret), K(block_start_seq), K(slice_info)); + } else { + lob_slice_id_ = slice_info.slice_id_; + } + } + return ret; +} + +ObLobMetaRowIterator::ObLobMetaRowIterator() + : is_inited_(false), iter_(nullptr), trans_id_(0), trans_version_(0), sql_no_(0), + tmp_row_(), lob_meta_write_result_() +{ +} + +ObLobMetaRowIterator::~ObLobMetaRowIterator() +{ + reset(); +} + +int ObLobMetaRowIterator::init(ObLobMetaWriteIter *iter, + const transaction::ObTransID &trans_id, + const int64_t trans_version, + const int64_t sql_no) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_ISNULL(iter) || OB_UNLIKELY(trans_id < 0 || sql_no < 0 || trans_version < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("iter is nullptr", K(ret), K(trans_id), K(sql_no), K(trans_version)); + } else if (!tmp_row_.is_valid() && OB_FAIL(tmp_row_.init(ObLobMetaUtil::LOB_META_COLUMN_CNT + ObLobMetaUtil::SKIP_INVALID_COLUMN))) { + LOG_WARN("Failed to init datum row", K(ret)); + } else { + iter_ = iter; + trans_id_ = trans_id; + trans_version_ = trans_version; + sql_no_ = sql_no; + is_inited_ = true; + } + return ret; +} + +void ObLobMetaRowIterator::reset() +{ + is_inited_ = false; + iter_ = nullptr; + trans_id_.reset(); + trans_version_ = 0; + sql_no_ = 0; + tmp_row_.reset(); +} + +void ObLobMetaRowIterator::reuse() +{ + is_inited_ = false; + iter_ = nullptr; + trans_id_.reset(); + trans_version_ = 0; + sql_no_ = 0; + tmp_row_.reuse(); +} + +int ObLobMetaRowIterator::get_next_row(const blocksstable::ObDatumRow *&row) +{ + int ret = OB_SUCCESS; + row = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ObLobMetaWriteIter is nullptr", K(ret)); + } else if (OB_FAIL(iter_->get_next_row(lob_meta_write_result_))) { + if (OB_UNLIKELY(ret != OB_ITER_END)) { + LOG_WARN("failed to get next row", K(ret)); + } + } else { + if (OB_FAIL(ObLobMetaUtil::transform_from_info_to_row(lob_meta_write_result_.info_, &tmp_row_, true))) { + LOG_WARN("transform failed", K(ret), K(lob_meta_write_result_.info_)); + } else { + tmp_row_.storage_datums_[ObLobMetaUtil::SEQ_ID_COL_ID + 1].set_int(-trans_version_); + tmp_row_.storage_datums_[ObLobMetaUtil::SEQ_ID_COL_ID + 2].set_int(sql_no_); + tmp_row_.set_trans_id(trans_id_); + tmp_row_.row_flag_.set_flag(ObDmlFlag::DF_INSERT); + row = &tmp_row_; + } + } + return ret; +} + +ObTabletDDLParam::ObTabletDDLParam() + : direct_load_type_(ObDirectLoadType::DIRECT_LOAD_INVALID), ls_id_(), start_scn_(SCN::min_scn()), commit_scn_(SCN::min_scn()), data_format_version_(0), table_key_(), snapshot_version_(0) +{ + +} + +ObTabletDDLParam::~ObTabletDDLParam() +{ + +} + +int ObChunkSliceStore::init(const int64_t rowkey_column_count, ObArenaAllocator &allocator, + const ObIArray &col_array, + common::ObCompressorType compress_type) +{ + int ret = OB_SUCCESS; + const int64_t chunk_mem_limit = 2 * 1024L * 1024L; // 2M + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(rowkey_column_count <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalida argument", K(ret), K(rowkey_column_count)); + } else if (OB_FAIL(datum_store_.init(chunk_mem_limit, col_array, MTL_ID(), ObCtxIds::DEFAULT_CTX_ID, + "DL_SLICE_STORE", true/*enable_dump*/, 0, false/*disable truncate*/, + compress_type == NONE_COMPRESSOR ? SORT_COMPACT_LEVEL : SORT_COMPRESSION_COMPACT_LEVEL, + compress_type))) { + LOG_WARN("failed to init chunk datum store", K(ret)); + } else if (OB_FAIL(datum_store_.alloc_dir_id())) { + LOG_WARN("failed to alloc dir id", K(ret)); + } else { + arena_allocator_ = &allocator; + rowkey_column_count_ = rowkey_column_count; + is_inited_ = true; + LOG_DEBUG("init chunk slice store", K(ret), KPC(this)); + } + return ret; +} + +int ObChunkSliceStore::append_row(const blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!datum_row.is_valid() || datum_row.get_column_count() < rowkey_column_count_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(datum_row), K(rowkey_column_count_)); + } else if (OB_FAIL(datum_store_.add_row(datum_row.storage_datums_, datum_row.get_column_count(), 0/*extra_size*/))) { + LOG_WARN("chunk datum store add row failed", K(ret)); + } + return ret; +} + +int ObChunkSliceStore::close() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (datum_store_.get_row_cnt() > 0) { // save endkey + const ObChunkDatumStore::StoredRow *stored_row = nullptr; + if (OB_FAIL(datum_store_.get_last_stored_row(stored_row))) { + LOG_WARN("fail to get last stored row", K(ret)); + } else if (OB_UNLIKELY(nullptr == stored_row || stored_row->cnt_ < rowkey_column_count_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("last stored row is null", K(ret), KPC(stored_row)); + } else { + void *buf = arena_allocator_->alloc(sizeof(ObStorageDatum) * rowkey_column_count_); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for endkey datums failed", K(ret), KPC(stored_row)); + } else { + endkey_.datums_ = new (buf) ObStorageDatum[rowkey_column_count_]; + endkey_.datum_cnt_ = rowkey_column_count_; + ObStorageDatum tmp_datum; + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_column_count_; ++i) { + tmp_datum.shallow_copy_from_datum(stored_row->cells()[i]); + if (OB_FAIL(endkey_.datums_[i].deep_copy(tmp_datum, *arena_allocator_))) { + LOG_WARN("deep copy storage datum failed", K(ret)); + } + } + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(datum_store_.finish_add_row(true/*need_dump*/))) { + LOG_WARN("finish add row failed", K(ret)); + } + } + LOG_DEBUG("chunk slice store closed", K(ret), K(endkey_)); + return ret; +} + + +int ObMacroBlockSliceStore::init( + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &data_seq, + const SCN &start_scn) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(nullptr == tablet_direct_load_mgr || !data_seq.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(tablet_direct_load_mgr), K(data_seq)); + } else { + const ObLSID &ls_id = tablet_direct_load_mgr->get_ls_id(); + const ObITable::TableKey &table_key = tablet_direct_load_mgr->get_table_key(); // TODO(cangdi): fix it with right table key + const int64_t ddl_task_id = tablet_direct_load_mgr->get_ddl_task_id(); + const uint64_t data_format_version = tablet_direct_load_mgr->get_data_format_version(); + const ObDirectLoadType direct_load_type = tablet_direct_load_mgr->get_direct_load_type(); + const ObWholeDataStoreDesc &data_desc = tablet_direct_load_mgr->get_sqc_build_ctx().data_block_desc_; + if (OB_FAIL(ddl_redo_writer_.init(ls_id, table_key.tablet_id_))) { + LOG_WARN("init sstable redo writer failed", K(ret), K(ls_id), K(table_key)); + } else if (OB_FAIL(write_ddl_redo_callback_.init(DDL_MB_DATA_TYPE, table_key, ddl_task_id, + start_scn, data_format_version, &ddl_redo_writer_))) { + LOG_WARN("init write ddl redo callback failed", K(ret)); + } else if (OB_FAIL(macro_block_writer_.open(data_desc.get_desc(), data_seq, &write_ddl_redo_callback_))) { + LOG_WARN("open macro bock writer failed", K(ret)); + } else { + is_inited_ = true; + } + } + return ret; +} + +int ObMacroBlockSliceStore::append_row(const blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(macro_block_writer_.append_row(datum_row))) { + LOG_WARN("macro block writer append row failed", K(ret), K(datum_row)); + } + return ret; +} + +int ObMacroBlockSliceStore::close() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(macro_block_writer_.close())) { + LOG_WARN("close macro block writer failed", K(ret)); + } + return ret; +} + +bool ObTabletDDLParam::is_valid() const +{ + return is_valid_direct_load(direct_load_type_) + && ls_id_.is_valid() + && table_key_.is_valid() + && start_scn_.is_valid_and_not_min() + && commit_scn_.is_valid() && commit_scn_ != SCN::max_scn() + && snapshot_version_ > 0 + && data_format_version_ > 0; +} + +ObDirectLoadSliceWriter::ObDirectLoadSliceWriter() + : is_inited_(false), need_column_store_(false), is_canceled_(false), start_seq_(), tablet_direct_load_mgr_(nullptr), + slice_store_(nullptr), meta_write_iter_(nullptr), row_iterator_(nullptr), + allocator_(lib::ObLabel("SliceWriter"), OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), row_offset_(-1) +{ +} + +ObDirectLoadSliceWriter::~ObDirectLoadSliceWriter() +{ + if (nullptr != slice_store_) { + slice_store_->~ObTabletSliceStore(); + allocator_.free(slice_store_); + slice_store_ = nullptr; + } + if (nullptr != meta_write_iter_) { + meta_write_iter_->~ObLobMetaWriteIter(); + allocator_.free(meta_write_iter_); + meta_write_iter_ = nullptr; + } + if (nullptr != row_iterator_) { + row_iterator_->~ObLobMetaRowIterator(); + allocator_.free(row_iterator_); + row_iterator_ = nullptr; + } + allocator_.reset(); + row_offset_ = -1; + need_column_store_ = false; +} + +int ObDirectLoadSliceWriter::prepare_slice_store_if_need( + const int64_t schema_rowkey_column_num, + const bool is_column_store, + const ObCompressorType compress_type, + const SCN &start_scn) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (nullptr != slice_store_) { + // do nothing + } else if (is_full_direct_load(tablet_direct_load_mgr_->get_direct_load_type())) { + if (is_column_store) { + need_column_store_ = true; + ObChunkSliceStore *chunk_slice_store = nullptr; + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(chunk_slice_store = OB_NEWx(ObChunkSliceStore, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for chunk slice store failed", K(ret)); + } else if (OB_FAIL(chunk_slice_store->init(schema_rowkey_column_num + + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(), allocator_, + tablet_direct_load_mgr_->get_column_info(), + compress_type))) { + LOG_WARN("init chunk slice store failed", K(ret)); + } else { + slice_store_ = chunk_slice_store; + } + if (OB_FAIL(ret) && nullptr != chunk_slice_store) { + chunk_slice_store->~ObChunkSliceStore(); + allocator_.free(chunk_slice_store); + } + } else { + ObMacroBlockSliceStore *macro_block_slice_store = nullptr; + if (OB_ISNULL(macro_block_slice_store = OB_NEWx(ObMacroBlockSliceStore, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for macro block slice store failed", K(ret)); + } else if (OB_FAIL(macro_block_slice_store->init(tablet_direct_load_mgr_, start_seq_, start_scn))) { + LOG_WARN("init macro block slice store failed", K(ret), KPC(tablet_direct_load_mgr_), K(start_seq_)); + } else { + slice_store_ = macro_block_slice_store; + } + if (OB_FAIL(ret) && nullptr != macro_block_slice_store) { + macro_block_slice_store->~ObMacroBlockSliceStore(); + allocator_.free(macro_block_slice_store); + } + } + } + return ret; +} + +int ObDirectLoadSliceWriter::init( + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &start_seq) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", KR(ret), KPC(this)); + } else if (OB_UNLIKELY(nullptr == tablet_direct_load_mgr || !start_seq.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KPC(tablet_direct_load_mgr), K(start_seq)); + } else { + tablet_direct_load_mgr_ = tablet_direct_load_mgr; + start_seq_ = start_seq; + is_inited_ = true; + } + return ret; +} + +int ObDirectLoadSliceWriter::prepare_iters( + ObIAllocator &allocator, + ObIAllocator &iter_allocator, + blocksstable::ObStorageDatum &datum, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const int64_t trans_version, + const ObCollationType &cs_type, + const ObLobId &lob_id, + const transaction::ObTransID trans_id, + const int64_t seq_no, + const int64_t timeout_ts, + const int64_t lob_inrow_threshold, + ObLobMetaRowIterator *&row_iter) +{ + int ret = OB_SUCCESS; + row_iter = nullptr; + + if (OB_ISNULL(meta_write_iter_)) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObLobMetaWriteIter)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc lob meta write iter failed", K(ret)); + } else { + meta_write_iter_ = new (buf) ObLobMetaWriteIter(datum.get_string(), &iter_allocator, ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE); + } + } else { + meta_write_iter_->set_data(datum.get_string()); + } + if (OB_SUCC(ret)) { + if (OB_ISNULL(row_iterator_)) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObLobMetaRowIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc lob meta row iter failed", K(ret)); + } else { + row_iterator_ = new (buf) ObLobMetaRowIterator(); + } + } + } + + if (OB_SUCC(ret)) { + ObLobStorageParam lob_storage_param; + lob_storage_param.inrow_threshold_ = lob_inrow_threshold; + int64_t unused_affected_rows = 0; + if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( + allocator, nullptr, ls_id, tablet_id, lob_id, cs_type, lob_storage_param, datum, timeout_ts, true/*has_lob_header*/, *meta_write_iter_))) { + LOG_WARN("fail to insert_lob_col", K(ret), K(ls_id), K(tablet_id), K(lob_id)); + } else if (OB_FAIL(row_iterator_->init(meta_write_iter_, trans_id, + trans_version, seq_no))) { + LOG_WARN("fail to lob meta row iterator", K(ret), K(trans_id), K(trans_version), K(seq_no)); + } else { + row_iter = row_iterator_; + } + } + return ret; +} + +int ObDirectLoadSliceWriter::fill_lob_sstable_slice( + const uint64_t table_id, + ObIAllocator &allocator, + ObIAllocator &iter_allocator, + const SCN &start_scn, + const ObBatchSliceWriteInfo &info, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + const int64_t lob_inrow_threshold, + blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + const uint64_t data_format_version = tablet_direct_load_mgr_->get_data_format_version(); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadSliceWriter not init", KR(ret), KP(this)); + } else if (DATA_VERSION_4_3_0_0 > data_format_version) { + if (OB_FAIL(fill_lob_into_memtable(allocator, info, lob_column_idxs, col_types, lob_inrow_threshold, datum_row))) { + LOG_WARN("fill lob into memtable failed", K(ret), K(data_format_version)); + } + } else if (OB_FAIL(fill_lob_into_macro_block(allocator, iter_allocator, start_scn, info, + pk_interval, lob_column_idxs, col_types, lob_inrow_threshold, datum_row))) { + LOG_WARN("fill lob into macro block failed", K(ret), K(data_format_version)); + } + return ret; +} + +int ObDirectLoadSliceWriter::fill_lob_into_memtable( + ObIAllocator &allocator, + const ObBatchSliceWriteInfo &info, + const ObArray &lob_column_idxs, + const ObArray &col_types, + const int64_t lob_inrow_threshold, + blocksstable::ObDatumRow &datum_row) +{ + // to insert lob data into memtable. + int ret = OB_SUCCESS; + const int64_t timeout_ts = + ObTimeUtility::fast_current_time() + (ObInsertLobColumnHelper::LOB_ACCESS_TX_TIMEOUT * lob_column_idxs.count()); + for (int64_t i = 0; OB_SUCC(ret) && i < lob_column_idxs.count(); i++) { + const int64_t idx = lob_column_idxs.at(i); + ObStorageDatum &datum = datum_row.storage_datums_[idx]; + ObLobStorageParam lob_storage_param; + lob_storage_param.inrow_threshold_ = lob_inrow_threshold; + if (OB_FAIL(ObInsertLobColumnHelper::insert_lob_column( + allocator, info.ls_id_, info.data_tablet_id_, col_types.at(i).get_collation_type(), + lob_storage_param, datum, timeout_ts, true/*has_lob_header*/, MTL_ID()))) { + LOG_WARN("fail to insert_lob_col", K(ret), K(datum)); + } + } + return ret; +} + +int ObDirectLoadSliceWriter::fill_lob_into_macro_block( + ObIAllocator &allocator, + ObIAllocator &iter_allocator, + const SCN &start_scn, + const ObBatchSliceWriteInfo &info, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + const int64_t lob_inrow_threshold, + blocksstable::ObDatumRow &datum_row) +{ + // to insert lob data into macro block. + int ret = OB_SUCCESS; + int64_t unused_affected_rows = 0; + const int64_t timeout_ts = + ObTimeUtility::fast_current_time() + (ObInsertLobColumnHelper::LOB_ACCESS_TX_TIMEOUT * lob_column_idxs.count()); + for (int64_t i = 0; OB_SUCC(ret) && i < lob_column_idxs.count(); i++) { + int64_t idx = lob_column_idxs.at(i); + ObStorageDatum &datum = datum_row.storage_datums_[idx]; + if (!datum.is_nop() && !datum.is_null()) { + uint64_t pk_seq = OB_INVALID_ID; + if (OB_FAIL(pk_interval.next_value(pk_seq))) { + LOG_WARN("fail to get next lob_id", K(ret), K(pk_seq)); + } else { + ObLobId lob_id; + lob_id.lob_id_ = pk_seq; + lob_id.tablet_id_ = tablet_direct_load_mgr_->get_tablet_id().id(); // lob meta tablet id. + ObLobMetaRowIterator *row_iter = nullptr; + if (OB_FAIL(prepare_iters(allocator, iter_allocator, datum, info.ls_id_, + info.data_tablet_id_, info.trans_version_, col_types.at(i).get_collation_type(), lob_id, + info.trans_id_, info.seq_no_, timeout_ts, lob_inrow_threshold, row_iter))) { + LOG_WARN("fail to prepare iters", K(ret), KP(row_iter), K(datum)); + } else { + while (OB_SUCC(ret)) { + const blocksstable::ObDatumRow *cur_row = nullptr; + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", K(ret)); + } else if (ATOMIC_LOAD(&is_canceled_)) { + ret = OB_CANCELED; + LOG_WARN("fil lob task canceled", K(ret), K(is_canceled_)); + } else if (OB_FAIL(row_iter->get_next_row(cur_row))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("get next row failed", K(ret)); + } + } else if (OB_ISNULL(cur_row) || !cur_row->is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KPC(cur_row)); + } else if (OB_FAIL(check_null(false/*is_index_table*/, ObLobMetaUtil::LOB_META_SCHEMA_ROWKEY_COL_CNT, *cur_row))) { + LOG_WARN("fail to check null value in row", KR(ret), KPC(cur_row)); + } else if (OB_FAIL(prepare_slice_store_if_need(ObLobMetaUtil::LOB_META_SCHEMA_ROWKEY_COL_CNT, + false/*is_column_store*/, NONE_COMPRESSOR/*do not use compressort*/, start_scn))) { + LOG_WARN("prepare macro block writer failed", K(ret)); + } else if (OB_FAIL(slice_store_->append_row(*cur_row))) { + LOG_WARN("macro block writer append row failed", K(ret), KPC(cur_row)); + } + if (OB_SUCC(ret)) { + ++unused_affected_rows; + LOG_DEBUG("sstable insert op append row", K(unused_affected_rows), KPC(cur_row)); + } + } + if (OB_SUCC(ret)) { + if (OB_NOT_NULL(meta_write_iter_)) { + meta_write_iter_->reuse(); + } + if (OB_NOT_NULL(row_iterator_)) { + row_iterator_->reuse(); + } + } + } + } + } + } + return ret; +} + +int ObDirectLoadSliceWriter::fill_sstable_slice( + const SCN &start_scn, + const uint64_t table_id, + const ObTabletID &tablet_id, + ObIStoreRowIterator *row_iter, + const ObTableSchemaItem &schema_item, + const ObDirectLoadType &direct_load_type, + const ObArray &column_items, + int64_t &affected_rows, + ObInsertMonitor *insert_monitor) +{ + int ret = OB_SUCCESS; + affected_rows = 0; + const bool is_full_direct_load_task = is_full_direct_load(direct_load_type); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadSliceWriter not init", KR(ret), KP(this)); + } else { + ObArenaAllocator arena("SliceW_sst", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + const ObDataStoreDesc &data_desc = tablet_direct_load_mgr_->get_sqc_build_ctx().data_block_desc_.get_desc(); + + while (OB_SUCC(ret)) { + arena.reuse(); + const blocksstable::ObDatumRow *cur_row = nullptr; + if (OB_FAIL(THIS_WORKER.check_status())) { + LOG_WARN("check status failed", K(ret)); + } else if (ATOMIC_LOAD(&is_canceled_)) { + ret = OB_CANCELED; + LOG_WARN("fil sstable task canceled", K(ret), K(is_canceled_)); + } else if (OB_FAIL(row_iter->get_next_row(cur_row))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("get next row failed", K(ret)); + } + } else if (OB_ISNULL(cur_row) || !cur_row->is_valid() || cur_row->get_column_count() != data_desc.get_col_desc_array().count()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KPC(cur_row), K(data_desc.get_col_desc_array())); + } else { // row reshape + for (int64_t i = 0; OB_SUCC(ret) && i < cur_row->get_column_count(); ++i) { + const ObColDesc &col_desc = data_desc.get_col_desc_array().at(i); + ObStorageDatum &datum_cell = cur_row->storage_datums_[i]; + if (i >= schema_item.rowkey_column_num_ && i < schema_item.rowkey_column_num_ + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()) { + // skip multi version column + } else if (datum_cell.is_null()) { + //ignore null + } else if (OB_UNLIKELY(i >= column_items.count()) || OB_UNLIKELY(!column_items.at(i).is_valid_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column schema is wrong", K(ret), K(i), K(column_items)); + } else if (OB_FAIL(ObDASUtils::reshape_datum_value(column_items.at(i).col_type_, column_items.at(i).col_accuracy_, true/*enable_oracle_empty_char_reshape_to_null*/, arena, datum_cell))) { + LOG_WARN("reshape storage datum failed", K(ret)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(check_null(schema_item.is_index_table_, schema_item.rowkey_column_num_, *cur_row))) { + LOG_WARN("fail to check null value in row", KR(ret), KPC(cur_row)); + } else if (OB_FAIL(prepare_slice_store_if_need(schema_item.rowkey_column_num_, schema_item.is_column_store_, + schema_item.compress_type_, start_scn))) { + LOG_WARN("prepare macro block writer failed", K(ret)); + } else if (OB_FAIL(slice_store_->append_row(*cur_row))) { + if (is_full_direct_load_task && OB_ERR_PRIMARY_KEY_DUPLICATE == ret && schema_item.is_unique_index_) { + int report_ret_code = OB_SUCCESS; + LOG_USER_ERROR(OB_ERR_PRIMARY_KEY_DUPLICATE, "", static_cast(sizeof("UNIQUE IDX") - 1), "UNIQUE IDX"); + (void) report_unique_key_dumplicated(ret, table_id, *cur_row, tablet_direct_load_mgr_->get_tablet_id(), report_ret_code); // ignore ret + if (OB_ERR_DUPLICATED_UNIQUE_KEY == report_ret_code) { + //error message of OB_ERR_PRIMARY_KEY_DUPLICATE is not compatiable with oracle, so use a new error code + ret = OB_ERR_DUPLICATED_UNIQUE_KEY; + } + } else { + LOG_WARN("macro block writer append row failed", K(ret), KPC(cur_row), KPC(cur_row)); + } + } + if (OB_SUCC(ret)) { + LOG_DEBUG("sstable insert op append row", KPC(cur_row), KPC(cur_row)); + ++affected_rows; + if (OB_NOT_NULL(insert_monitor)) { + insert_monitor->inserted_row_cnt_ = insert_monitor->inserted_row_cnt_ + 1; + } + } + } + } + return ret; +} + +int ObDirectLoadSliceWriter::report_unique_key_dumplicated( + const int ret_code, const uint64_t table_id, const ObDatumRow &datum_row, + const ObTabletID &tablet_id, int &report_ret_code) +{ + int ret = OB_SUCCESS; + report_ret_code = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = nullptr; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( + MTL_ID(), schema_guard))) { + LOG_WARN("get tenant schema failed", K(ret), K(table_id), K(MTL_ID()), K(table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), + table_id, table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(MTL_ID()), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(MTL_ID()), K(table_id)); + } else { + const int64_t rowkey_column_num = table_schema->get_rowkey_column_num(); + char index_key_buffer[OB_TMP_BUF_SIZE_256] = { 0 }; + int64_t task_id = 0; + ObDatumRowkey index_key; + ObDDLErrorMessageTableOperator::ObDDLErrorInfo error_info; + index_key.assign(datum_row.storage_datums_, rowkey_column_num); + if (OB_FAIL(ObDDLErrorMessageTableOperator::extract_index_key(*table_schema, index_key, index_key_buffer, OB_TMP_BUF_SIZE_256))) { // read the unique key that violates the unique constraint + LOG_WARN("extract unique index key failed", K(ret), K(index_key), K(index_key_buffer)); + } else if (OB_FAIL(ObDDLErrorMessageTableOperator::get_index_task_info(*GCTX.sql_proxy_, *table_schema, error_info))) { + LOG_WARN("get task id of index table failed", K(ret), K(task_id), K(table_schema)); + } else if (OB_FAIL(ObDDLErrorMessageTableOperator::generate_index_ddl_error_message(ret_code, *table_schema, ObCurTraceId::get_trace_id_str(), + error_info.task_id_, error_info.parent_task_id_, tablet_id.id(), GCTX.self_addr(), *GCTX.sql_proxy_, index_key_buffer, report_ret_code))) { + LOG_WARN("generate index ddl error message", K(ret), K(ret), K(report_ret_code)); + } + } + return ret; +} + +int ObDirectLoadSliceWriter::check_null( + const bool is_index_table, + const int64_t rowkey_column_num, + const ObDatumRow &row_val) const +{ + int ret = OB_SUCCESS; + if (is_index_table) { + // index table is index-organized but can have null values in index column + } else if (OB_UNLIKELY(rowkey_column_num > row_val.get_column_count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey column number", KR(ret), K(rowkey_column_num), K(row_val)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_column_num; i++) { + const ObStorageDatum &cell = row_val.storage_datums_[i]; + if (cell.is_null()) { + ret = OB_ER_INVALID_USE_OF_NULL; + LOG_WARN("invalid null cell for row key column", KR(ret), K(cell)); + } + } + } + return ret; +} + +int ObDirectLoadSliceWriter::close() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadSliceWriter not init", KR(ret), KP(this)); + } else if (nullptr != slice_store_ && OB_FAIL(slice_store_->close())) { + LOG_WARN("close slice store failed", K(ret)); + } + return ret; +} + +int ObDirectLoadSliceWriter::fill_column_group(const ObStorageSchema *storage_schema, const SCN &start_scn, ObInsertMonitor* insert_monitor) +{ + int ret = OB_SUCCESS; + ObChunkSliceStore *chunk_slice_store = static_cast(slice_store_); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(nullptr == storage_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(storage_schema)); + } else if (OB_UNLIKELY(row_offset_ < 0)) { + ret = OB_ERR_SYS; + LOG_WARN("row offset not set", K(ret), K(row_offset_)); + } else if (nullptr == chunk_slice_store || is_empty()) { + // do nothing + LOG_INFO("chunk slice store is null or empty", K(ret), + KPC(chunk_slice_store), KPC(tablet_direct_load_mgr_)); + } else if (ATOMIC_LOAD(&is_canceled_)) { + ret = OB_CANCELED; + LOG_WARN("fil cg task canceled", K(ret), K(is_canceled_)); + } else { + const ObIArray &cg_schemas = storage_schema->get_column_groups(); + const int64_t MAX_CO_BATCH_SIZE = 10; // todo @qilu: add opt hint for batch_cnt + ObArray co_ddl_writers; + co_ddl_writers.set_attr(ObMemAttr(MTL_ID(), "DL_co_writers")); + ObTimeGuard tg("fill_column_group", 1000L * 1000L * 600L); // 10 mins + FLOG_INFO("[DDL_FILL_CG] fill column group start", + "tablet_id", tablet_direct_load_mgr_->get_tablet_id(), + "row_count", chunk_slice_store->get_row_count(), + "column_group_count", cg_schemas.count()); + + // 1. reserve writers + const int64_t batch_count = MIN(MAX_CO_BATCH_SIZE, cg_schemas.count()); + for (int64_t i = 0; OB_SUCC(ret) && i < batch_count; ++i) { + ObCOSliceWriter *tmp_writer = nullptr; + if (OB_ISNULL(tmp_writer = OB_NEWx(ObCOSliceWriter, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for co writer failed", K(ret)); + } else if (OB_FAIL(co_ddl_writers.reserve(batch_count))) { + LOG_WARN("fail to reserve writers array", K(ret), K(batch_count)); + } else if (OB_FAIL(co_ddl_writers.push_back(tmp_writer))) { + LOG_WARN("push back co writer failed", K(ret)); + tmp_writer->~ObCOSliceWriter(); + allocator_.free(tmp_writer); + } + } + int64_t cg_idx = 0; + while (OB_SUCC(ret) && cg_idx < cg_schemas.count()) { + tg.click("batch_fill"); + int64_t current_batch_count = batch_count; + for (int64_t i = 0; OB_SUCC(ret) && i < batch_count; ++i) { + if (cg_idx >= cg_schemas.count()) { + current_batch_count = i; + break; + } else { + const ObStorageColumnGroupSchema &cg_schema = cg_schemas.at(cg_idx); + ObCOSliceWriter *cur_writer = co_ddl_writers.at(i); + cur_writer->reset(); + if (OB_FAIL(cur_writer->init(storage_schema, cg_idx, tablet_direct_load_mgr_, start_seq_, row_offset_, start_scn))) { + LOG_WARN("init co ddl writer failed", K(ret), K(i), K(cg_idx), KPC(this)); + } else { + ++cg_idx; + } + } + } + if (OB_SUCC(ret)) { + // 2. rescan and write + const ObChunkDatumStore::StoredRow *stored_row = nullptr; + bool has_next = false; + chunk_slice_store->datum_store_.rescan(); + int64_t begin_ts = ObTimeUtility::fast_current_time(); + while (OB_SUCC(ret) && OB_SUCC(chunk_slice_store->datum_store_.has_next(has_next)) && has_next) { + int64_t row_count = 0; + if (row_count > 0 && row_count % (10L * 10000L) == 0) { // print log per 10w records + int64_t curr_ts = ObTimeUtility::fast_current_time(); + FLOG_INFO("[DDL_FILL_CG] rescan and fill", "tablet_id", tablet_direct_load_mgr_->get_tablet_id(), + "start_cg_idx", cg_idx - current_batch_count, + K(current_batch_count), K(row_count), "cost_time_us", curr_ts - begin_ts); + begin_ts = curr_ts; + } + if (OB_FAIL(chunk_slice_store->datum_store_.get_next_row(stored_row))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("get next row failed", K(ret)); + } + } else { + ++row_count; + if (OB_NOT_NULL(insert_monitor)) { + insert_monitor->inserted_cg_row_cnt_ = insert_monitor->inserted_cg_row_cnt_ + current_batch_count; + } + for (int64_t i = 0; OB_SUCC(ret) && i < current_batch_count; ++i) { + ObCOSliceWriter *cur_writer = co_ddl_writers.at(i); + if (OB_FAIL(cur_writer->append_row(stored_row))) { + LOG_WARN("append row failed", K(ret), KPC(stored_row), K(row_count)); + } + } + } + } + } + + if (OB_SUCC(ret)) { + // 3. close writers + for (int64_t i = 0; OB_SUCC(ret) && i < current_batch_count; ++i) { + ObCOSliceWriter *cur_writer = co_ddl_writers.at(i); + if (OB_FAIL(cur_writer->close())) { + LOG_WARN("close co ddl writer failed", K(ret)); + } + } + } + FLOG_INFO("[DDL_FILL_CG] finish cg batch", "tablet_id", tablet_direct_load_mgr_->get_tablet_id(), + "next_cg_idx", cg_idx, "total_cg_count", cg_schemas.count(), K(current_batch_count)); + } + + tg.click("fill_end"); + // 4. free writers, ignore ret + for (int64_t i = 0; i < co_ddl_writers.count(); ++i) { + ObCOSliceWriter *cur_writer = co_ddl_writers.at(i); + if (OB_NOT_NULL(cur_writer)) { + cur_writer->~ObCOSliceWriter(); + allocator_.free(cur_writer); + } + } + co_ddl_writers.reset(); + FLOG_INFO("[DDL_FILL_CG] fill column group finished", + "tablet_id", tablet_direct_load_mgr_->get_tablet_id(), + "row_count", chunk_slice_store->get_row_count(), + "column_group_count", cg_schemas.count(), + "time_cost_us", tg.get_diff()); + } + return ret; +} + + +void ObCOSliceWriter::reset() +{ + is_inited_ = false; + cg_row_.reset(); + macro_block_writer_.reset(); + flush_callback_.reset(); + ddl_clog_writer_.reset(); + index_builder_.reset(); + data_desc_.reset(); + cg_schema_ = nullptr; + cg_idx_ = -1; +} + +int ObCOSliceWriter::init(const ObStorageSchema *storage_schema, const int64_t cg_idx, + ObTabletDirectLoadMgr *tablet_direct_load_mgr, const ObMacroDataSeq &start_seq, const int64_t row_id_offset, + const SCN &start_scn) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(nullptr == storage_schema || cg_idx < 0 || cg_idx >= storage_schema->get_column_group_count() + || nullptr == tablet_direct_load_mgr || !start_seq.is_valid() || row_id_offset < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(cg_idx), K(row_id_offset), K(start_seq), KPC(tablet_direct_load_mgr), KPC(storage_schema)); + } else { + const ObStorageColumnGroupSchema &cg_schema = storage_schema->get_column_groups().at(cg_idx); + ObITable::TableKey table_key = tablet_direct_load_mgr->get_table_key(); // TODO(cangdi): fix it + table_key.column_group_idx_ = cg_idx; + table_key.table_type_ = (cg_schema.is_all_column_group() || cg_schema.is_rowkey_column_group()) ? + ObITable::TableType::COLUMN_ORIENTED_SSTABLE : ObITable::TableType::NORMAL_COLUMN_GROUP_SSTABLE; + const int64_t ddl_task_id = tablet_direct_load_mgr->get_ddl_task_id(); + const uint64_t data_format_version = tablet_direct_load_mgr->get_data_format_version(); + ObLSID ls_id = tablet_direct_load_mgr->get_ls_id(); + + if (OB_FAIL(data_desc_.init(*storage_schema, + ls_id, + table_key.get_tablet_id(), + compaction::ObMergeType::MAJOR_MERGE, + table_key.get_snapshot_version(), + data_format_version, + SCN::min_scn(), + &cg_schema, + cg_idx))) { + LOG_WARN("init data store desc failed", K(ret)); + } else if (OB_FAIL(index_builder_.init(data_desc_.get_desc(), nullptr/*macro block flush callback*/, ObSSTableIndexBuilder::ENABLE))) { // data_desc is deep copied + LOG_WARN("init sstable index builder failed", K(ret), K(ls_id), K(table_key), K(data_desc_)); + } else if (FALSE_IT(data_desc_.get_desc().sstable_index_builder_ = &index_builder_)) { // for build the tail index block in macro block + } else if (OB_FAIL(ddl_clog_writer_.init(ls_id, table_key.tablet_id_))) { + LOG_WARN("ddl clog writer init failed", K(ret), K(ls_id), K(table_key)); + } else if (OB_FAIL(flush_callback_.init(DDL_MB_DATA_TYPE, table_key, ddl_task_id, + start_scn, data_format_version, &ddl_clog_writer_, row_id_offset))) { + LOG_WARN("fail to init redo log writer callback", KR(ret)); + } else if (OB_FAIL(macro_block_writer_.open(data_desc_.get_desc(), start_seq, &flush_callback_))) { + LOG_WARN("fail to open macro block writer", K(ret), K(ls_id), K(table_key), K(data_desc_), K(start_seq)); + } else if (OB_FAIL(cg_row_.init(cg_schema.column_cnt_))) { + LOG_WARN("init column group row failed", K(ret)); + } else { + cg_idx_ = cg_idx; + cg_schema_ = &cg_schema; + is_inited_ = true; + } + } + LOG_DEBUG("co ddl writer init", K(ret), K(cg_idx), K(row_id_offset), KPC(this)); + return ret; +} + +int ObCOSliceWriter::append_row(const sql::ObChunkDatumStore::StoredRow *stored_row) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(project_cg_row(*cg_schema_, stored_row, cg_row_))) { + LOG_WARN("project column group row failed", K(ret)); + } else if (OB_FAIL(macro_block_writer_.append_row(cg_row_))) { + LOG_WARN("write column group row failed", K(ret)); + } + return ret; +} + +int ObCOSliceWriter::project_cg_row(const ObStorageColumnGroupSchema &cg_schema, + const ObChunkDatumStore::StoredRow *stored_row, + ObDatumRow &cg_row) +{ + int ret = OB_SUCCESS; + cg_row.reuse(); + cg_row.row_flag_.set_flag(ObDmlFlag::DF_INSERT); + if (OB_UNLIKELY(!cg_schema.is_valid() || nullptr == stored_row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(cg_schema), KP(stored_row)); + } else if (cg_schema.column_cnt_ > stored_row->cnt_ || cg_row.get_column_count() != cg_schema.column_cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column count not match", K(ret), K(stored_row->cnt_), K(cg_row), K(cg_schema)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < cg_schema.column_cnt_; ++i) { + int64_t column_idx = cg_schema.column_idxs_ ? cg_schema.column_idxs_[i] : i; + if (column_idx >= stored_row->cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid column idex", K(ret)); + } else { + const ObDatum &cur_datum = stored_row->cells()[column_idx]; + cg_row.storage_datums_[i].set_datum(cur_datum); + } + } + } + return ret; +} + +int ObCOSliceWriter::close() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(macro_block_writer_.close())) { + LOG_WARN("close macro block writer failed", K(ret)); + } + LOG_DEBUG("co ddl writer close", K(ret), KPC(this)); + return ret; +} + +ObInsertMonitor::~ObInsertMonitor() +{ +} diff --git a/src/storage/ddl/ob_direct_load_struct.h b/src/storage/ddl/ob_direct_load_struct.h new file mode 100644 index 000000000..cba5d25c5 --- /dev/null +++ b/src/storage/ddl/ob_direct_load_struct.h @@ -0,0 +1,612 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_DDL_OB_DIRECT_LOAD_COMMON_H +#define OCEANBASE_STORAGE_DDL_OB_DIRECT_LOAD_COMMON_H + +#include "lib/lock/ob_mutex.h" +#include "lib/lock/ob_bucket_lock.h" +#include "common/ob_tablet_id.h" +#include "common/row/ob_row_iterator.h" +#include "share/scn.h" +#include "share/ob_tablet_autoincrement_param.h" +#include "share/scheduler/ob_tenant_dag_scheduler.h" +#include "share/ob_ddl_common.h" +#include "sql/engine/basic/ob_chunk_datum_store.h" +#include "sql/engine/basic/chunk_store/ob_compact_store.h" +#include "storage/ob_i_table.h" +#include "storage/access/ob_store_row_iterator.h" +#include "storage/blocksstable/index_block/ob_index_block_builder.h" +#include "storage/blocksstable/ob_macro_block_struct.h" +#include "storage/blocksstable/ob_imacro_block_flush_callback.h" +#include "storage/ddl/ob_ddl_redo_log_writer.h" +#include "storage/lob/ob_lob_meta.h" + +namespace oceanbase +{ +namespace sql +{ +class ObPxMultiPartSSTableInsertOp; +class ObExecContext; +} + +namespace storage +{ +class ObTablet; + +struct ObBatchSliceWriteInfo final +{ +public: + ObBatchSliceWriteInfo() + : data_tablet_id_(), // tablet id of the data table. + ls_id_(), + trans_version_(0), + direct_load_type_(), + trans_id_(), + seq_no_(0) + { } + ObBatchSliceWriteInfo(const common::ObTabletID &tablet_id, const share::ObLSID &ls_id, const int64_t &trans_version, + const ObDirectLoadType &direct_load_type, const transaction::ObTransID &trans_id, const int64_t &seq_no) + : data_tablet_id_(tablet_id), + ls_id_(ls_id), + trans_version_(trans_version), + direct_load_type_(direct_load_type), + trans_id_(trans_id), + seq_no_(seq_no) + + { } + ~ObBatchSliceWriteInfo() = default; + TO_STRING_KV(K(ls_id_), K(data_tablet_id_), K(trans_version_), K(direct_load_type_)); +public: + common::ObTabletID data_tablet_id_; + share::ObLSID ls_id_; + int64_t trans_version_; + ObDirectLoadType direct_load_type_; + transaction::ObTransID trans_id_; + int64_t seq_no_; // +}; + +struct ObTabletDirectLoadMgrKey final +{ +public: + ObTabletDirectLoadMgrKey() + : tablet_id_(), is_full_direct_load_(false) + { } + ObTabletDirectLoadMgrKey(const common::ObTabletID &tablet_id, const bool is_full_direct_load) + : tablet_id_(tablet_id), is_full_direct_load_(is_full_direct_load) + { } + ~ObTabletDirectLoadMgrKey() = default; + uint64_t hash() const { + return tablet_id_.hash() + murmurhash(&is_full_direct_load_, sizeof(is_full_direct_load_), 0); + } + int hash(uint64_t &hash_val) const {hash_val = hash(); return OB_SUCCESS;} + bool is_valid() const { return tablet_id_.is_valid(); } + bool operator == (const ObTabletDirectLoadMgrKey &other) const { + return tablet_id_ == other.tablet_id_ && is_full_direct_load_ == other.is_full_direct_load_; } + TO_STRING_KV(K_(tablet_id), K_(is_full_direct_load)); +public: + common::ObTabletID tablet_id_; + bool is_full_direct_load_; +}; + +struct ObDirectLoadSliceInfo final +{ +public: + ObDirectLoadSliceInfo() + : is_full_direct_load_(false), is_lob_slice_(false), ls_id_(), data_tablet_id_(), slice_id_(-1), + context_id_(0) + { } + ~ObDirectLoadSliceInfo() = default; + bool is_valid() const { return ls_id_.is_valid() && data_tablet_id_.is_valid() && slice_id_ >= 0 && context_id_ >= 0; } + TO_STRING_KV(K_(is_full_direct_load), K_(is_lob_slice), K_(ls_id), K_(data_tablet_id), K_(slice_id), K_(context_id)); +public: + bool is_full_direct_load_; + bool is_lob_slice_; + share::ObLSID ls_id_; + common::ObTabletID data_tablet_id_; + int64_t slice_id_; + int64_t context_id_; +DISALLOW_COPY_AND_ASSIGN(ObDirectLoadSliceInfo); +}; + +struct ObTableSchemaItem final +{ +public: + ObTableSchemaItem() + : is_column_store_(false), is_index_table_(false), is_unique_index_(false), rowkey_column_num_(0), + compress_type_(NONE_COMPRESSOR), lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD) + {} + ~ObTableSchemaItem() { reset(); } + void reset() + { + is_column_store_ = false; + is_index_table_ = false; + is_unique_index_ = false; + rowkey_column_num_ = 0; + compress_type_ = NONE_COMPRESSOR; + lob_inrow_threshold_ = OB_DEFAULT_LOB_INROW_THRESHOLD; + } + TO_STRING_KV(K(is_column_store_), K(is_index_table_), K(is_unique_index_), K(rowkey_column_num_), + K(compress_type_), K_(lob_inrow_threshold)); + +public: + bool is_column_store_; + bool is_index_table_; + bool is_unique_index_; + int64_t rowkey_column_num_; + common::ObCompressorType compress_type_; + int64_t lob_inrow_threshold_; +}; + +struct ObColumnSchemaItem final +{ +public: + ObColumnSchemaItem() + : is_valid_(false), col_type_(), col_accuracy_() + {} + ObColumnSchemaItem(const ObColumnSchemaItem &other) + { + *this = other; + } + ~ObColumnSchemaItem() { reset(); } + void reset() + { + is_valid_ = false; + col_type_.reset(); + col_accuracy_.reset(); + } + ObColumnSchemaItem &operator=(const ObColumnSchemaItem &other) + { + is_valid_ = other.is_valid_; + col_type_ = other.col_type_; + col_accuracy_ = other.col_accuracy_; + return *this; + } + int assign(const ObColumnSchemaItem &other) + { + is_valid_ = other.is_valid_; + col_type_ = other.col_type_; + col_accuracy_ = other.col_accuracy_; + return OB_SUCCESS; + } + TO_STRING_KV(K(is_valid_), K(col_type_), K(col_accuracy_)); +public: + bool is_valid_; + common::ObObjMeta col_type_; + ObAccuracy col_accuracy_; +}; + +// usued in replay replay and runtime execution +struct ObDirectInsertCommonParam final +{ +public: + ObDirectInsertCommonParam() + : ls_id_(), tablet_id_(), direct_load_type_(DIRECT_LOAD_INVALID), data_format_version_(0), read_snapshot_(0) + {} + ~ObDirectInsertCommonParam() = default; + bool is_valid() const { return ls_id_.is_valid() && tablet_id_.is_valid() + && data_format_version_ >= 0 && read_snapshot_ >= 0 && DIRECT_LOAD_INVALID <= direct_load_type_ && direct_load_type_ <= DIRECT_LOAD_MAX; + } + TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(direct_load_type), K_(data_format_version), K_(read_snapshot)); +public: + share::ObLSID ls_id_; + common::ObTabletID tablet_id_; + ObDirectLoadType direct_load_type_; + uint64_t data_format_version_; + // read_snapshot_ is used to scan the source data. + // For full direct load task, it is also the commit version of the target macro block. + int64_t read_snapshot_; +}; + +// only used in runtime execution +struct ObDirectInsertRuntimeOnlyParam final +{ +public: + ObDirectInsertRuntimeOnlyParam() + : exec_ctx_(nullptr), task_id_(0), table_id_(OB_INVALID_ID), schema_version_(0), task_cnt_(0), need_online_opt_stat_gather_(false), trans_id_(), seq_no_(0) + {} + ~ObDirectInsertRuntimeOnlyParam() = default; + bool is_valid() const { return OB_INVALID_ID != task_id_ && OB_INVALID_ID != table_id_ && schema_version_ > 0 && task_cnt_ >= 0; } + TO_STRING_KV(KP_(exec_ctx), K_(task_id), K_(table_id), K_(schema_version), K_(task_cnt), K_(need_online_opt_stat_gather), K_(trans_id), K_(seq_no)); +public: + sql::ObExecContext *exec_ctx_; + int64_t task_id_; + int64_t table_id_; + int64_t schema_version_; + int64_t task_cnt_; + bool need_online_opt_stat_gather_; + // default value is invalid tx_id, + // participant tx_id for the incremental direct load, + // and invalid tx_id for the full_direct_load. + transaction::ObTransID trans_id_; + // default value is 0, + // sequence number for the incremental direct load, + // fixed 0 for the full direct load. + int64_t seq_no_; // +}; + +// full parameters used by runtime execution +struct ObTabletDirectLoadInsertParam final +{ +public: + ObTabletDirectLoadInsertParam() + : common_param_(), runtime_only_param_(), is_replay_(false) + {} + ~ObTabletDirectLoadInsertParam() = default; + bool is_valid() const { + return (!is_replay_ && (common_param_.is_valid() && runtime_only_param_.is_valid())) + || (is_replay_ && common_param_.is_valid()); + } + int assign(const ObTabletDirectLoadInsertParam &other_param); + TO_STRING_KV(K_(common_param), K_(runtime_only_param), K_(is_replay)); +public: + ObDirectInsertCommonParam common_param_; + ObDirectInsertRuntimeOnlyParam runtime_only_param_; + bool is_replay_; +}; + +// for ddl insert row. +class ObDDLInsertRowIterator : public ObIStoreRowIterator +{ +public: + ObDDLInsertRowIterator( + sql::ObPxMultiPartSSTableInsertOp *op, + const bool is_slice_empty, + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const int64_t rowkey_cnt, + const int64_t snapshot_version, + const int64_t context_id); + virtual ~ObDDLInsertRowIterator(); + virtual int get_next_row(const blocksstable::ObDatumRow *&row) override; + TO_STRING_KV(K_(ls_id), K_(current_tablet_id), K_(current_row), K_(is_slice_empty), K_(is_next_row_cached), K_(rowkey_count), K_(snapshot_version), + K_(lob_slice_id), K_(lob_id_cache), K_(context_id)); +public: + int switch_to_new_lob_slice(); + int close_lob_sstable_slice(); + inline int64_t get_lob_slice_id() { return lob_slice_id_; } + inline share::ObTabletCacheInterval &get_lob_id_cache() { return lob_id_cache_; } +private: + static const int64_t AUTO_INC_CACHE_SIZE = 100000; // 10w. + ObArenaAllocator lob_allocator_; + sql::ObPxMultiPartSSTableInsertOp *op_; + share::ObLSID ls_id_; + common::ObTabletID current_tablet_id_; // data_tablet_id rather than lob_meta_tablet_id. + blocksstable::ObDatumRow current_row_; + bool is_next_row_cached_; + bool is_slice_empty_; // without data. + int64_t rowkey_count_; + int64_t snapshot_version_; + int64_t lob_slice_id_; + share::ObTabletCacheInterval lob_id_cache_; + int64_t context_id_; +}; + +class ObLobMetaRowIterator : public ObIStoreRowIterator +{ +public: + ObLobMetaRowIterator(); + virtual ~ObLobMetaRowIterator(); + int init(ObLobMetaWriteIter *iter, + const transaction::ObTransID &trans_id, + const int64_t trans_version, + const int64_t sql_no); + void reset(); + void reuse(); + virtual int get_next_row(const blocksstable::ObDatumRow *&row) override; +// private: +public: + bool is_inited_; + ObLobMetaWriteIter *iter_; + transaction::ObTransID trans_id_; + int64_t trans_version_; + int64_t sql_no_; + blocksstable::ObDatumRow tmp_row_; + ObLobMetaWriteResult lob_meta_write_result_; +}; + +struct ObTabletDDLParam final +{ +public: + ObTabletDDLParam(); + ~ObTabletDDLParam(); + bool is_valid() const; + TO_STRING_KV(K_(direct_load_type), K_(ls_id), K_(start_scn), K_(commit_scn), K_(data_format_version), K_(table_key), K_(snapshot_version)); +public: + ObDirectLoadType direct_load_type_; + share::ObLSID ls_id_; + share::SCN start_scn_; + share::SCN commit_scn_; + uint64_t data_format_version_; + ObITable::TableKey table_key_; + int64_t snapshot_version_; // used for full direct load only. +}; + +struct ObDDLTableMergeDagParam : public share::ObIDagInitParam +{ +public: + ObDDLTableMergeDagParam() + : direct_load_type_(ObDirectLoadType::DIRECT_LOAD_INVALID), + ls_id_(), + tablet_id_(), + rec_scn_(share::SCN::min_scn()), + is_commit_(false), + start_scn_(share::SCN::min_scn()), + data_format_version_(0), + snapshot_version_(0) + { } + bool is_valid() const + { + bool is_valid = data_format_version_ > 0 && snapshot_version_ > 0; + if (is_full_direct_load(direct_load_type_)) { + is_valid = ls_id_.is_valid() && tablet_id_.is_valid() && start_scn_.is_valid_and_not_min(); + } else if (is_incremental_direct_load(direct_load_type_)) { + is_valid = ls_id_.is_valid() && tablet_id_.is_valid(); + } + return is_valid; + } + virtual ~ObDDLTableMergeDagParam() = default; + TO_STRING_KV(K_(direct_load_type), K_(ls_id), K_(tablet_id), K_(rec_scn), K_(is_commit), K_(start_scn), + K_(data_format_version), K_(snapshot_version)); +public: + ObDirectLoadType direct_load_type_; + share::ObLSID ls_id_; + ObTabletID tablet_id_; + share::SCN rec_scn_; + bool is_commit_; + share::SCN start_scn_; // start log ts at schedule, for skipping expired task + uint64_t data_format_version_; + int64_t snapshot_version_; +}; + +// column organization of row in ObChunkDatumStore::SotredRow +// +// this organization is same as row of all_cg in macro block +// and match the column index in ObStorageColumnGroupSchema +class ObTabletSliceStore +{ +public: + ObTabletSliceStore() {} + virtual ~ObTabletSliceStore() {} + virtual int append_row(const blocksstable::ObDatumRow &datum_row) = 0; + virtual int close() = 0; + virtual int64_t get_row_count() const { return 0; } // dummy one + DECLARE_PURE_VIRTUAL_TO_STRING; +}; + +class ObChunkSliceStore : public ObTabletSliceStore +{ +public: + ObChunkSliceStore() : is_inited_(false), arena_allocator_(nullptr), rowkey_column_count_(0) {} + virtual ~ObChunkSliceStore() {} + int init(const int64_t rowkey_column_count, ObArenaAllocator &allocator, + const ObIArray &col_schema, + common::ObCompressorType compress_type = NONE_COMPRESSOR); + virtual int append_row(const blocksstable::ObDatumRow &datum_row) override; + virtual int close() override; + virtual int64_t get_row_count() const { return datum_store_.get_row_cnt(); } + TO_STRING_KV(K(is_inited_), KP(arena_allocator_), K(datum_store_), K(endkey_), K(rowkey_column_count_)); +public: + bool is_inited_; + ObArenaAllocator *arena_allocator_; + sql::ObCompactStore datum_store_; + blocksstable::ObDatumRowkey endkey_; + int64_t rowkey_column_count_; +}; + +class ObMacroBlockSliceStore: public ObTabletSliceStore +{ +public: + ObMacroBlockSliceStore() : is_inited_(false) {} + virtual ~ObMacroBlockSliceStore() {} + int init( + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &data_seq, + const share::SCN &start_scn); + virtual int append_row(const blocksstable::ObDatumRow &datum_row) override; + virtual int close() override; + TO_STRING_KV(K(is_inited_), K(macro_block_writer_)); +private: + bool is_inited_; + ObDDLRedoLogWriter ddl_redo_writer_; + ObDDLRedoLogWriterCallback write_ddl_redo_callback_; + blocksstable::ObMacroBlockWriter macro_block_writer_; +}; + +class ObTabletDirectLoadMgr; + +struct ObInsertMonitor final{ +public: + ObInsertMonitor(int64_t &tmp_insert_row, int64_t &cg_insert_row):inserted_row_cnt_(tmp_insert_row), inserted_cg_row_cnt_(cg_insert_row) + {}; + ~ObInsertMonitor(); + void set(sql::ObMonitorNode &op_monitor_info); + +public: + int64_t &inserted_row_cnt_; + int64_t &inserted_cg_row_cnt_; +}; + +class ObDirectLoadSliceWriter final +{ +public: + ObDirectLoadSliceWriter(); + ~ObDirectLoadSliceWriter(); + int init( + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &start_seq); + int fill_sstable_slice( + const share::SCN &start_scn, + const uint64_t table_id, + const ObTabletID &curr_tablet_id, + ObIStoreRowIterator *row_iter, + const ObTableSchemaItem &schema_item, + const ObDirectLoadType &direct_load_type, + const ObArray &column_items, + int64_t &affected_rows, + ObInsertMonitor *insert_monitor = NULL); + int fill_lob_sstable_slice( + const uint64_t table_id, + ObIAllocator &allocator, + ObIAllocator &iter_allocator, + const share::SCN &start_scn, + const ObBatchSliceWriteInfo &info, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + const int64_t lob_inrow_threshold, + blocksstable::ObDatumRow &datum_row); + int close(); + int fill_column_group( + const ObStorageSchema *storage_schema, + const share::SCN &start_scn, + ObInsertMonitor *monitor_node = NULL); + void set_row_offset(const int64_t row_offset) { row_offset_ = row_offset; } + int64_t get_row_count() const { return nullptr == slice_store_ ? 0 : slice_store_->get_row_count(); } + int64_t get_row_offset() const { return row_offset_; } + bool is_empty() const { return 0 == get_row_count(); } + bool need_column_store() const { return need_column_store_; } + ObTabletSliceStore *get_slice_store() const { return slice_store_; } + void cancel() { ATOMIC_SET(&is_canceled_, true); } + TO_STRING_KV(K(is_inited_), K(need_column_store_), K(is_canceled_), K(start_seq_), KPC(slice_store_), K(row_offset_)); +private: + int fill_lob_into_memtable( // for version < 4.3.0.0 + ObIAllocator &allocator, + const ObBatchSliceWriteInfo &info, + const ObArray &lob_column_idxs, + const ObArray &col_types, + const int64_t lob_inrow_threshold, + blocksstable::ObDatumRow &datum_row); + int fill_lob_into_macro_block( // for version >= 4.3.0.0 + ObIAllocator &allocator, + ObIAllocator &iter_allocator, + const share::SCN &start_scn, + const ObBatchSliceWriteInfo &info, + share::ObTabletCacheInterval &pk_interval, + const ObArray &lob_column_idxs, + const ObArray &col_types, + const int64_t lob_inrow_threshold, + blocksstable::ObDatumRow &datum_row); + + int check_null( + const bool is_index_table, + const int64_t rowkey_column_cnt, + const blocksstable::ObDatumRow &row_val) const; + int prepare_slice_store_if_need( + const int64_t schema_rowkey_column_num, + const bool is_slice_store, + const ObCompressorType compress_type, + const share::SCN &start_scn); + int report_unique_key_dumplicated( + const int ret_code, + const uint64_t table_id, + const blocksstable::ObDatumRow &datum_row, + const common::ObTabletID &tablet_id, + int &report_ret_code); + int prepare_iters( + ObIAllocator &allocator, + ObIAllocator &iter_allocator, + blocksstable::ObStorageDatum &datum, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const int64_t trans_version, + const ObCollationType &cs_type, + const ObLobId &lob_id, + const transaction::ObTransID trans_id, + const int64_t seq_no, + const int64_t timeout_ts, + const int64_t lob_inrow_threshold, + ObLobMetaRowIterator *&row_iter); +private: + bool is_inited_; + bool need_column_store_; + bool is_canceled_; + blocksstable::ObMacroDataSeq start_seq_; + ObTabletDirectLoadMgr *tablet_direct_load_mgr_; + ObTabletSliceStore *slice_store_; + ObLobMetaWriteIter *meta_write_iter_; + ObLobMetaRowIterator *row_iterator_; + common::ObArenaAllocator allocator_; + int64_t row_offset_; +}; + +class ObCOSliceWriter +{ +public: + ObCOSliceWriter() : is_inited_(false), cg_idx_(-1), cg_schema_(nullptr), data_desc_(true /*is ddl*/) {} + ~ObCOSliceWriter() {} + int init( + const ObStorageSchema *storage_schema, + const int64_t cg_idx, + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &start_seq, + const int64_t row_id_offset, + const share::SCN &start_scn); + void reset(); + int append_row( + const sql::ObChunkDatumStore::StoredRow *stored_row); + int project_cg_row( + const ObStorageColumnGroupSchema &cg_schema, + const sql::ObChunkDatumStore::StoredRow *stored_row, + blocksstable::ObDatumRow &cg_row); + int close(); + TO_STRING_KV(K(is_inited_), K(cg_idx_), KPC(cg_schema_), K(macro_block_writer_), K(data_desc_), K(cg_row_)); +private: + bool is_inited_; + int64_t cg_idx_; + const ObStorageColumnGroupSchema *cg_schema_; + blocksstable::ObWholeDataStoreDesc data_desc_; + blocksstable::ObSSTableIndexBuilder index_builder_; + blocksstable::ObMacroBlockWriter macro_block_writer_; + storage::ObDDLRedoLogWriter ddl_clog_writer_; + storage::ObDDLRedoLogWriterCallback flush_callback_; + blocksstable::ObDatumRow cg_row_; +}; + +struct ObTabletDirectLoadExecContextId final +{ +public: + ObTabletDirectLoadExecContextId() + : tablet_id_(), context_id_(OB_INVALID_ID) + {} + ~ObTabletDirectLoadExecContextId() = default; + uint64_t hash() const { + return tablet_id_.hash() + murmurhash(&context_id_, sizeof(context_id_), 0); + } + int hash(uint64_t &hash_val) const {hash_val = hash(); return OB_SUCCESS;} + bool is_valid() const { return context_id_ >= 0; } + bool operator == (const ObTabletDirectLoadExecContextId &other) const { + return tablet_id_ == other.tablet_id_ && context_id_ == other.context_id_; } + TO_STRING_KV(K_(tablet_id), K_(context_id)); +public: + common::ObTabletID tablet_id_; + int64_t context_id_; +}; + +struct ObTabletDirectLoadExecContext final +{ +public: + ObTabletDirectLoadExecContext() + : start_scn_(), execution_id_(0), seq_interval_task_id_(0) + {} + ~ObTabletDirectLoadExecContext() = default; + TO_STRING_KV(K_(start_scn), K_(execution_id), K_(seq_interval_task_id)); +public: + share::SCN start_scn_; + int64_t execution_id_; + int64_t seq_interval_task_id_; +}; + +}// namespace storage +}// namespace oceanbase + +#endif//OCEANBASE_STORAGE_OB_DIRECT_LOAD_COMMON_H diff --git a/src/storage/ddl/ob_tablet_ddl_kv.cpp b/src/storage/ddl/ob_tablet_ddl_kv.cpp index b13483af6..fdf3413c0 100644 --- a/src/storage/ddl/ob_tablet_ddl_kv.cpp +++ b/src/storage/ddl/ob_tablet_ddl_kv.cpp @@ -28,6 +28,8 @@ #include "storage/compaction/ob_schedule_dag_func.h" #include "storage/blocksstable/ob_datum_rowkey.h" #include "storage/tablet/ob_tablet_create_delete_helper.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" +#include "storage/column_store/ob_column_oriented_sstable.h" using namespace oceanbase::storage; using namespace oceanbase::blocksstable; @@ -37,8 +39,9 @@ using namespace oceanbase::share::schema; +/****************** ObBlockMetaTree **********************/ ObBlockMetaTree::ObBlockMetaTree() - : is_inited_(false), macro_blocks_(), arena_("DDL_Btree", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), tree_allocator_(arena_), block_tree_(tree_allocator_) + : is_inited_(false), macro_blocks_(), arena_("DDL_Btree", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), tree_allocator_(arena_), block_tree_(tree_allocator_), datum_utils_(nullptr) { macro_blocks_.set_attr(ObMemAttr(MTL_ID(), "DDL_Btree")); } @@ -51,12 +54,13 @@ ObBlockMetaTree::~ObBlockMetaTree() int ObBlockMetaTree::init(ObTablet &tablet, const ObITable::TableKey &table_key, const share::SCN &ddl_start_scn, - const int64_t data_format_version) + const uint64_t data_format_version) { int ret = OB_SUCCESS; const ObMemAttr mem_attr(MTL_ID(), "BlockMetaTree"); ObTableStoreIterator ddl_table_iter; ObITable *first_ddl_sstable = nullptr; // get compressor_type of macro block for query + ObTabletHandle tablet_handle; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); @@ -71,30 +75,58 @@ int ObBlockMetaTree::init(ObTablet &tablet, } else if (ddl_table_iter.count() > 0 && OB_FAIL(ddl_table_iter.get_boundary_table(false/*is_last*/, first_ddl_sstable))) { LOG_WARN("failed to get boundary table", K(ret)); } else if (OB_FAIL(ObTabletDDLUtil::prepare_index_data_desc(tablet, + table_key.is_column_store_sstable() ? table_key.get_column_group_id() : -1/*negative value means row_store*/, table_key.get_snapshot_version(), data_format_version, static_cast(first_ddl_sstable), + table_key.get_end_scn(), data_desc_))) { LOG_WARN("prepare data store desc failed", K(ret), K(table_key), K(data_format_version)); } else { + if (data_desc_.get_desc().is_cg()) { + schema::ObColDesc int_col_desc; + int_col_desc.col_id_ = 0; + int_col_desc.col_order_ = ObOrderType::ASC; + int_col_desc.col_type_.set_int(); + ObSEArray col_descs; + col_descs.set_attr(ObMemAttr(MTL_ID(), "DDL_Btree_descs")); + const bool is_column_store = true; + if (OB_FAIL(col_descs.push_back(int_col_desc))) { + LOG_WARN("push back col desc failed", K(ret)); + } else if (OB_FAIL(row_id_datum_utils_.init(col_descs, col_descs.count(), lib::is_oracle_mode(), arena_, is_column_store))) { + LOG_WARN("init row id datum utils failed", K(ret), K(col_descs)); + } else { + datum_utils_ = &row_id_datum_utils_; + LOG_INFO("block meta tree sort with row id", K(table_key)); + } + } else { + datum_utils_ = const_cast(&data_desc_.get_desc().get_datum_utils()); + LOG_INFO("block meta tree sort with row key", K(table_key)); + } is_inited_ = true; } return ret; } -int ObDDLKV::init_sstable_param(ObTablet &tablet, - const ObITable::TableKey &table_key, - const share::SCN &ddl_start_scn, - ObTabletCreateSSTableParam &sstable_param) +int ObDDLMemtable::init_sstable_param( + ObTablet &tablet, + const ObITable::TableKey &table_key, + const share::SCN &ddl_start_scn, + ObTabletCreateSSTableParam &sstable_param) { int ret = OB_SUCCESS; ObStorageSchema *storage_schema_ptr = nullptr; - ObArenaAllocator allocator("DDLKV", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObLSService *ls_service = MTL(ObLSService *); + ObArenaAllocator allocator("DDL_MMT", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObTabletHandle tablet_handle; if (OB_UNLIKELY(!table_key.is_valid() || !ddl_start_scn.is_valid_and_not_min())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(table_key), K(ddl_start_scn)); + } else if (OB_ISNULL(ls_service)) { + ret = OB_ERR_SYS; + LOG_WARN("ls service is null", K(ret), K(table_key)); } else if (OB_FAIL(tablet.load_storage_schema(allocator, storage_schema_ptr))) { - LOG_WARN("load storage schema fail", K(ret), K(table_key)); + LOG_WARN("fail to get storage schema", K(ret)); } else { int64_t column_count = 0; const ObStorageSchema &storage_schema = *storage_schema_ptr; @@ -104,19 +136,47 @@ int ObDDLKV::init_sstable_param(ObTablet &tablet, LOG_WARN("fail to get stored column count in sstable", K(ret)); } else { sstable_param.table_key_ = table_key; - sstable_param.table_key_.table_type_ = ObITable::DDL_MEM_SSTABLE; + if (table_key.is_column_store_sstable()) { + if (table_key.is_normal_cg_sstable()) { + sstable_param.table_key_.table_type_ = ObITable::TableType::DDL_MEM_CG_SSTABLE; + sstable_param.rowkey_column_cnt_ = 0; + sstable_param.column_cnt_ = 1; + } else { // co sstable with all cg or rowkey cg + sstable_param.table_key_.table_type_ = ObITable::TableType::DDL_MEM_CO_SSTABLE; + sstable_param.rowkey_column_cnt_ = storage_schema.get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + + // calculate column count + const ObIArray &cg_schemas = storage_schema.get_column_groups(); + const int64_t cg_idx = sstable_param.table_key_.get_column_group_id(); + if (cg_idx < 0 || cg_idx >= cg_schemas.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column group index", K(ret), K(cg_idx)); + } else if (cg_schemas.at(cg_idx).is_rowkey_column_group()) { + column_count = storage_schema.get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + } else { + if (OB_FAIL(storage_schema.get_stored_column_count_in_sstable(column_count))) { + LOG_WARN("fail to get stored column count in sstable", K(ret)); + } + } + if (OB_SUCC(ret)) { + sstable_param.column_cnt_ = column_count; + } + } + } else { + sstable_param.table_key_.table_type_ = ObITable::TableType::DDL_MEM_SSTABLE; + sstable_param.rowkey_column_cnt_ = storage_schema.get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + sstable_param.column_cnt_ = column_count; + } sstable_param.is_ready_for_read_ = true; sstable_param.table_mode_ = storage_schema.get_table_mode_struct(); sstable_param.index_type_ = storage_schema.get_index_type(); - sstable_param.rowkey_column_cnt_ = storage_schema.get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); sstable_param.schema_version_ = storage_schema.get_schema_version(); sstable_param.latest_row_store_type_ = storage_schema.get_row_store_type(); sstable_param.create_snapshot_version_ = table_key.get_snapshot_version(); sstable_param.max_merged_trans_version_ = table_key.get_snapshot_version(); sstable_param.ddl_scn_ = ddl_start_scn; - sstable_param.root_row_store_type_ = data_desc.get_row_store_type(); + sstable_param.root_row_store_type_ = data_desc.get_row_store_type(); // for root block, not used for ddl memtable sstable_param.data_index_tree_height_ = 2; // fixed tree height, because there is only one root block - sstable_param.column_cnt_ = column_count; sstable_param.contain_uncommitted_row_ = false; // ddl build major sstable with committed rows only sstable_param.compressor_type_ = data_desc.get_compressor_type(); sstable_param.encrypt_id_ = data_desc.get_encrypt_id(); @@ -157,17 +217,47 @@ void ObBlockMetaTree::destroy() { is_inited_ = false; macro_blocks_.reset(); + destroy_tree_value(); block_tree_.destroy(false /*is_batch_destroy*/); + tree_allocator_.reset(); data_desc_.reset(); - for (int64_t i = 0; i < sorted_rowkeys_.count(); ++i) { - const ObDataMacroBlockMeta *cur_meta = sorted_rowkeys_.at(i).block_meta_; - if (OB_NOT_NULL(cur_meta)) { - cur_meta->~ObDataMacroBlockMeta(); + row_id_datum_utils_.reset(); + datum_utils_ = nullptr; + arena_.reset(); +} + +void ObBlockMetaTree::destroy_tree_value() +{ + int ret = OB_SUCCESS; + const int64_t version = INT64_MAX; + blocksstable::DDLBtreeIterator tmp_iter; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + } else if (OB_FAIL(block_tree_.set_key_range(tmp_iter, + ObDatumRowkeyWrapper(&ObDatumRowkey::MIN_ROWKEY, datum_utils_), + false, + ObDatumRowkeyWrapper(&ObDatumRowkey::MAX_ROWKEY, datum_utils_), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else { + while (OB_SUCC(ret)) { + ObDatumRowkeyWrapper rowkey_wrapper; + ObBlockMetaTreeValue *tree_value = nullptr; + if (OB_FAIL(tmp_iter.get_next(rowkey_wrapper, tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else { + ret = OB_SUCCESS; + break; + } + } else if (OB_NOT_NULL(tree_value)) { + // destruct meta before free + tree_value->block_meta_->~ObDataMacroBlockMeta(); + tree_value->~ObBlockMetaTreeValue(); + } } } - sorted_rowkeys_.reset(); - tree_allocator_.reset(); - arena_.reset(); } int ObBlockMetaTree::insert_macro_block(const ObDDLMacroHandle ¯o_handle, @@ -176,6 +266,8 @@ int ObBlockMetaTree::insert_macro_block(const ObDDLMacroHandle ¯o_handle, { int ret = OB_SUCCESS; ObDataMacroBlockMeta *insert_meta = const_cast(meta); + void *buf = nullptr; + ObBlockMetaTreeValue *tree_value = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); @@ -184,99 +276,86 @@ int ObBlockMetaTree::insert_macro_block(const ObDDLMacroHandle ¯o_handle, LOG_WARN("invalid argument", K(ret), K(macro_handle), K(rowkey), KP(meta)); } else if (OB_FAIL(macro_blocks_.push_back(macro_handle))) { LOG_WARN("push back macro handle failed", K(ret), K(macro_handle)); - } else if (OB_FAIL(block_tree_.insert(ObDatumRowkeyWrapper(rowkey, &data_desc_.get_desc().get_datum_utils()), insert_meta))) { - LOG_WARN("insert block tree failed", K(ret), K(rowkey), KPC(meta)); + } else if (OB_ISNULL(buf = arena_.alloc(sizeof(ObBlockMetaTreeValue)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObBlockMetaTreeValue))); + } else { + tree_value = new (buf) ObBlockMetaTreeValue(insert_meta, rowkey); + + tree_value->header_.version_ = ObIndexBlockRowHeader::INDEX_BLOCK_HEADER_V1; + tree_value->header_.row_store_type_ = static_cast(data_desc_.get_desc().get_row_store_type()); + tree_value->header_.compressor_type_ = static_cast(data_desc_.get_desc().get_compressor_type()); + tree_value->header_.is_data_index_ = true; + tree_value->header_.is_data_block_ = false; + tree_value->header_.is_leaf_block_ = true; + tree_value->header_.is_macro_node_ = true; + tree_value->header_.is_major_node_ = true; + tree_value->header_.is_deleted_ = insert_meta->val_.is_deleted_; + tree_value->header_.contain_uncommitted_row_ = insert_meta->val_.contain_uncommitted_row_; + tree_value->header_.macro_id_ = insert_meta->val_.macro_id_; + tree_value->header_.block_offset_ = insert_meta->val_.block_offset_; + tree_value->header_.block_size_ = insert_meta->val_.block_size_; + tree_value->header_.macro_block_count_ = 1; + tree_value->header_.micro_block_count_ = insert_meta->val_.micro_block_count_; + tree_value->header_.master_key_id_ = data_desc_.get_desc().get_master_key_id(); + tree_value->header_.encrypt_id_ = data_desc_.get_desc().get_encrypt_id(); + MEMCPY(tree_value->header_.encrypt_key_, data_desc_.get_desc().get_encrypt_key(), sizeof(tree_value->header_.encrypt_key_)); + tree_value->header_.schema_version_ = data_desc_.get_desc().get_schema_version(); + tree_value->header_.row_count_ = insert_meta->val_.row_count_; + if (OB_UNLIKELY(!tree_value->header_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Built an invalid index block row", K(ret), K(tree_value->header_), KPC(insert_meta)); + } else if (OB_FAIL(block_tree_.insert(ObDatumRowkeyWrapper(tree_value->rowkey_, datum_utils_), tree_value))) { + LOG_WARN("insert block tree failed", K(ret), K(rowkey), KPC(meta)); + } } return ret; } -// TODO@wenqu: direct use btree iterator -int ObBlockMetaTree::build_sorted_rowkeys() +int ObBlockMetaTree::get_sorted_meta_array(ObIArray &meta_array) { int ret = OB_SUCCESS; + meta_array.reset(); const int64_t version = INT64_MAX; - BtreeIterator iter; + blocksstable::DDLBtreeIterator tmp_iter; + if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else if (sorted_rowkeys_.count() > 0) { - // already sorted, do nothing - } else if (OB_FAIL(block_tree_.set_key_range(iter, - ObDatumRowkeyWrapper(&ObDatumRowkey::MIN_ROWKEY, &data_desc_.get_desc().get_datum_utils()), + } else if (OB_FAIL(block_tree_.set_key_range(tmp_iter, + ObDatumRowkeyWrapper(&ObDatumRowkey::MIN_ROWKEY, datum_utils_), false, - ObDatumRowkeyWrapper(&ObDatumRowkey::MAX_ROWKEY, &data_desc_.get_desc().get_datum_utils()), + ObDatumRowkeyWrapper(&ObDatumRowkey::MAX_ROWKEY, datum_utils_), false, version))) { LOG_WARN("locate range failed", K(ret)); - } else if (OB_FAIL(sorted_rowkeys_.reserve(get_macro_block_cnt()))) { - LOG_WARN("reserve sorted rowkeys failed", K(ret), K(get_macro_block_cnt())); + } else if (OB_FAIL(meta_array.reserve(macro_blocks_.count()))) { + LOG_WARN("reserve meta array failed", K(ret), K(macro_blocks_.count())); } else { while (OB_SUCC(ret)) { ObDatumRowkeyWrapper rowkey_wrapper; - ObDataMacroBlockMeta *block_meta = nullptr; - if (OB_FAIL(iter.get_next(rowkey_wrapper, block_meta))) { + ObBlockMetaTreeValue *tree_value = nullptr; + if (OB_FAIL(tmp_iter.get_next(rowkey_wrapper, tree_value))) { if (OB_ITER_END != ret) { LOG_WARN("get next failed", K(ret)); } else { ret = OB_SUCCESS; break; } - } else if (OB_ISNULL(block_meta)) { + } else if (OB_ISNULL(tree_value)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("block_meta is null", K(ret), KP(block_meta)); - } else if (((uint64_t)(block_meta) & 7ULL) != 0) { + LOG_WARN("tree_value is null", K(ret), KP(tree_value)); + } else if (((uint64_t)(tree_value) & 7ULL) != 0) { ret = OB_ERR_UNEXPECTED; - LOG_ERROR("invalid btree value", K(ret), KP(block_meta)); - } else { - IndexItem cur_item(rowkey_wrapper.rowkey_, block_meta); - cur_item.header_.version_ = ObIndexBlockRowHeader::INDEX_BLOCK_HEADER_V1; - cur_item.header_.row_store_type_ = static_cast(data_desc_.get_desc().get_row_store_type()); - cur_item.header_.compressor_type_ = static_cast(data_desc_.get_desc().get_compressor_type()); - cur_item.header_.is_data_index_ = true; - cur_item.header_.is_data_block_ = false; - cur_item.header_.is_leaf_block_ = true; - cur_item.header_.is_macro_node_ = true; - cur_item.header_.is_major_node_ = true; - cur_item.header_.is_deleted_ = block_meta->val_.is_deleted_; - cur_item.header_.contain_uncommitted_row_ = block_meta->val_.contain_uncommitted_row_; - cur_item.header_.macro_id_ = block_meta->val_.macro_id_; - cur_item.header_.block_offset_ = block_meta->val_.block_offset_; - cur_item.header_.block_size_ = block_meta->val_.block_size_; - cur_item.header_.macro_block_count_ = 1; - cur_item.header_.micro_block_count_ = block_meta->val_.micro_block_count_; - cur_item.header_.master_key_id_ = data_desc_.get_desc().get_master_key_id(); - cur_item.header_.encrypt_id_ = data_desc_.get_desc().get_encrypt_id(); - MEMCPY(cur_item.header_.encrypt_key_, data_desc_.get_desc().get_encrypt_key(), sizeof(cur_item.header_.encrypt_key_)); - cur_item.header_.schema_version_ = data_desc_.get_desc().get_schema_version(); - cur_item.header_.row_count_ = block_meta->val_.row_count_; - if (OB_UNLIKELY(!cur_item.header_.is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Built an invalid index block row", K(ret), K(cur_item)); - } else if (OB_FAIL(sorted_rowkeys_.push_back(cur_item))) { - LOG_WARN("push back index item failed", K(ret), K(rowkey_wrapper), KPC(block_meta)); - } + LOG_ERROR("invalid btree value", K(ret), K(tree_value)); + } else if (OB_FAIL(meta_array.push_back(tree_value->block_meta_))) { + LOG_WARN("push back block meta failed", K(ret), K(*tree_value->block_meta_)); } } - } - return ret; -} - -int ObBlockMetaTree::get_sorted_meta_array(ObIArray &meta_array) const -{ - int ret = OB_SUCCESS; - meta_array.reset(); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (sorted_rowkeys_.count() != macro_blocks_.count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("sorted array not ready", K(ret), K(sorted_rowkeys_.count()), K(macro_blocks_.count())); - } else if (OB_FAIL(meta_array.reserve(sorted_rowkeys_.count()))) { - LOG_WARN("reserve meta array failed", K(ret), K(sorted_rowkeys_.count())); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < sorted_rowkeys_.count(); ++i) { - if (OB_FAIL(meta_array.push_back(sorted_rowkeys_.at(i).block_meta_))) { - LOG_WARN("push back block meta failed", K(ret), K(i)); + if (OB_SUCC(ret)) { + if (meta_array.count() != macro_blocks_.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("meta array count not euqal with macro_block count", K(ret), K(meta_array.count()), K(macro_blocks_.count())); } } } @@ -287,14 +366,14 @@ int ObBlockMetaTree::exist(const blocksstable::ObDatumRowkey *rowkey, bool &is_e { int ret = OB_SUCCESS; is_exist = false; - ObDataMacroBlockMeta *dummp_meta = nullptr; + ObBlockMetaTreeValue *tree_value = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else if (OB_ISNULL(rowkey)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(rowkey)); - } else if (OB_FAIL(block_tree_.get(ObDatumRowkeyWrapper(rowkey, &data_desc_.get_desc().get_datum_utils()), dummp_meta))) { + } else if (OB_FAIL(block_tree_.get(ObDatumRowkeyWrapper(rowkey, datum_utils_), tree_value))) { if (OB_ENTRY_NOT_EXIST != ret) { LOG_WARN("get value from block meta tree failed", K(ret), KPC(rowkey)); } else { @@ -311,7 +390,7 @@ bool ObBlockMetaTree::CompareFunctor::operator ()(const IndexItem &item, const blocksstable::ObDatumRowkey &rowkey) { int cmp_ret = 0; - item.rowkey_->compare(rowkey, datum_utils_, cmp_ret, need_compare_datum_cnt_); + item.rowkey_->compare(rowkey, datum_utils_, cmp_ret); return cmp_ret < 0; } @@ -319,107 +398,314 @@ bool ObBlockMetaTree::CompareFunctor::operator ()(const blocksstable::ObDatumRow const IndexItem &item) { int cmp_ret = 0; - item.rowkey_->compare(rowkey, datum_utils_, cmp_ret, need_compare_datum_cnt_); + item.rowkey_->compare(rowkey, datum_utils_, cmp_ret); return cmp_ret > 0; } +int ObBlockMetaTree::lower_bound(const blocksstable::ObDatumRowkey *target_rowkey, + const blocksstable::ObStorageDatumUtils &datum_utils, + blocksstable::ObDatumRowkey *&rowkey, + ObBlockMetaTreeValue *&tree_value) const +{ + int ret = OB_SUCCESS; + rowkey = nullptr; + tree_value = nullptr; + const int64_t version = INT64_MAX; + bool find = false; + blocksstable::DDLBtreeIterator tmp_iter; + if (OB_ISNULL(target_rowkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("target rowkey is null", K(ret)); + } else if (OB_FAIL(block_tree_.set_key_range(tmp_iter, + ObDatumRowkeyWrapper(target_rowkey, &datum_utils), + false, + ObDatumRowkeyWrapper(&ObDatumRowkey::MAX_ROWKEY, &datum_utils), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else { + while (OB_SUCC(ret)) { + ObDatumRowkeyWrapper rowkey_wrapper; + ObBlockMetaTreeValue *tmp_tree_value = nullptr; + if (find) { + break; + } else if (OB_FAIL(tmp_iter.get_next(rowkey_wrapper, tmp_tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else if (!find) { + ret = OB_BEYOND_THE_RANGE; + } + } else { + int cmp_ret = 0; + if (!find) { + rowkey_wrapper.rowkey_->compare(*target_rowkey, datum_utils, cmp_ret); + if (cmp_ret >= 0) { + rowkey = const_cast(rowkey_wrapper.rowkey_); + tree_value = tmp_tree_value; + find = true; + } + } + } + } + } + return ret; +} + +int ObBlockMetaTree::upper_bound(const blocksstable::ObDatumRowkey *target_rowkey, + const blocksstable::ObStorageDatumUtils &datum_utils, + blocksstable::ObDatumRowkey *&rowkey, + ObBlockMetaTreeValue *&tree_value) const +{ + int ret = OB_SUCCESS; + rowkey = nullptr; + tree_value = nullptr; + const int64_t version = INT64_MAX; + bool find = false; + blocksstable::DDLBtreeIterator tmp_iter; + if (OB_ISNULL(target_rowkey)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("target rowkey is null", K(ret)); + } else if (OB_FAIL(block_tree_.set_key_range(tmp_iter, + ObDatumRowkeyWrapper(target_rowkey, &datum_utils), + true, + ObDatumRowkeyWrapper(&ObDatumRowkey::MAX_ROWKEY, &datum_utils), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else { + while (OB_SUCC(ret)) { + ObDatumRowkeyWrapper rowkey_wrapper; + ObBlockMetaTreeValue *tmp_tree_value = nullptr; + if (find) { + break; + } else if (OB_FAIL(tmp_iter.get_next(rowkey_wrapper, tmp_tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else if (!find) { + ret = OB_BEYOND_THE_RANGE; + } + } else { + int cmp_ret = 0; + if (!find) { + rowkey_wrapper.rowkey_->compare(*target_rowkey, datum_utils, cmp_ret); + if (cmp_ret > 0) { + rowkey = const_cast(rowkey_wrapper.rowkey_); + tree_value = tmp_tree_value; + find = true; + } + } + } + } + } + return ret; +} + +int ObBlockMetaTree::locate_key(const blocksstable::ObDatumRange &range, + const blocksstable::ObStorageDatumUtils &datum_utils, + blocksstable::DDLBtreeIterator &iter, + ObBlockMetaTreeValue *&cur_tree_value) const +{ + int ret = OB_SUCCESS; + cur_tree_value = nullptr; + const int64_t version = INT64_MAX; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + ObDatumRowkey *rowkey = nullptr; + ObBlockMetaTreeValue *tree_value = nullptr; + //locate key must be lower_bound + if (OB_FAIL(lower_bound(&range.get_start_key(), datum_utils, rowkey, tree_value))) { + LOG_WARN("lower bound failed", K(ret), K(range.get_start_key())); + } else { + //todo qilu: not set_key_range again + iter.reset(); + if (OB_FAIL(block_tree_.set_key_range(iter, + ObDatumRowkeyWrapper(rowkey, &datum_utils), + true, + ObDatumRowkeyWrapper(rowkey, &datum_utils), + true, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else { + cur_tree_value = tree_value; + } + } + } + return ret; +} + int ObBlockMetaTree::locate_range(const blocksstable::ObDatumRange &range, const blocksstable::ObStorageDatumUtils &datum_utils, const bool is_left_border, const bool is_right_border, - int64_t &begin_idx, - int64_t &end_idx) + const bool is_reverse_scan, + blocksstable::DDLBtreeIterator &iter, + ObBlockMetaTreeValue *&cur_tree_value) const { int ret = OB_SUCCESS; - begin_idx = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - end_idx = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; + cur_tree_value = nullptr; + const int64_t version = INT64_MAX; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else if (sorted_rowkeys_.empty()) { - // do nothing } else { - CompareFunctor cmp(datum_utils); + //pre check range + ObDatumRowkey *start_rowkey = nullptr; + ObDatumRowkey *end_rowkey = nullptr; + ObBlockMetaTreeValue *start_tree_value = nullptr; + ObBlockMetaTreeValue *end_tree_value = nullptr; + bool right_border_beyond_range = false; if (!is_left_border || range.get_start_key().is_min_rowkey()) { - begin_idx = 0; + start_rowkey = &ObDatumRowkey::MIN_ROWKEY; } else { if (range.is_left_closed()) { - begin_idx = std::lower_bound(sorted_rowkeys_.begin(), sorted_rowkeys_.end(), range.get_start_key(), cmp) - sorted_rowkeys_.begin(); + if (OB_FAIL(lower_bound(&range.get_start_key(), datum_utils, start_rowkey, start_tree_value))) { + LOG_WARN("lower bound failed", K(ret), K(range.get_start_key())); + } } else { - begin_idx = std::upper_bound(sorted_rowkeys_.begin(), sorted_rowkeys_.end(), range.get_start_key(), cmp) - sorted_rowkeys_.begin(); - } - if (sorted_rowkeys_.count() == begin_idx) { - ret = OB_BEYOND_THE_RANGE; + if (OB_FAIL(upper_bound(&range.get_start_key(), datum_utils, start_rowkey, start_tree_value))) { + LOG_WARN("upper bound failed", K(ret), K(range.get_start_key())); + } } + // maybe OB_BEYOND_THE_RANGE } if (OB_SUCC(ret)) { if (!is_right_border || range.get_end_key().is_max_rowkey()) { - end_idx = sorted_rowkeys_.count() - 1; + end_rowkey = &ObDatumRowkey::MAX_ROWKEY; } else { - end_idx = std::lower_bound(sorted_rowkeys_.begin(), sorted_rowkeys_.end(), range.get_end_key(), cmp) - sorted_rowkeys_.begin(); - if (sorted_rowkeys_.count() == end_idx) { - end_idx = sorted_rowkeys_.count() - 1; + if (OB_FAIL(lower_bound(&range.get_end_key(), datum_utils, end_rowkey, end_tree_value))) { + if (OB_BEYOND_THE_RANGE == ret) { + ret = OB_SUCCESS; + right_border_beyond_range = true; + end_rowkey = &ObDatumRowkey::MAX_ROWKEY; + } else { + LOG_WARN("lower bound failed", K(ret), K(range.get_end_key())); + } + } + } + } + + //locate real range + if (OB_SUCC(ret)) { + iter.reset(); + if (!is_reverse_scan) { + if (!is_left_border || range.get_start_key().is_min_rowkey()) { + ObDatumRowkeyWrapper rowkey_wrapper_left; + ObBlockMetaTreeValue *tree_value_left = nullptr; + if (OB_FAIL(block_tree_.set_key_range(iter, + ObDatumRowkeyWrapper(start_rowkey /*ObDatumRowkey::MIN_ROWKEY*/, &datum_utils), + false, + ObDatumRowkeyWrapper(end_rowkey, &datum_utils), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else if (OB_FAIL(iter.get_next(rowkey_wrapper_left, tree_value_left))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else { + ret = OB_BEYOND_THE_RANGE; + LOG_WARN("beyond range", K(ret), K(range)); + } + } else { + cur_tree_value = tree_value_left; + } + } else { + int cmp_ret = 0; + if (OB_FAIL(start_rowkey->compare(*end_rowkey, datum_utils, cmp_ret))) { + LOG_WARN("fail to compare rowkey", K(ret), KPC(end_rowkey), KPC(start_rowkey), K(datum_utils)); + } else if (cmp_ret > 0) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("start row key is bigger than right rowkey", K(ret), KPC(start_rowkey), KPC(end_rowkey)); + } else if (OB_FAIL(block_tree_.set_key_range(iter, + ObDatumRowkeyWrapper(start_rowkey, &datum_utils), + true, + ObDatumRowkeyWrapper(end_rowkey, &datum_utils), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else { + cur_tree_value = start_tree_value; + } + } + } else { + if (right_border_beyond_range || !is_right_border || range.get_end_key().is_max_rowkey()) { + ObDatumRowkeyWrapper rowkey_wrapper_right; + ObBlockMetaTreeValue *tree_value_right = nullptr; + if (OB_FAIL(block_tree_.set_key_range(iter, + ObDatumRowkeyWrapper(end_rowkey /*ObDatumRowkey::MAX_ROWKEY*/, &datum_utils), + false, + ObDatumRowkeyWrapper(start_rowkey, &datum_utils), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else if (OB_FAIL(iter.get_next(rowkey_wrapper_right, tree_value_right))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else { + ret = OB_BEYOND_THE_RANGE; + LOG_WARN("beyond range", K(ret), K(range)); + } + } else { + cur_tree_value = tree_value_right; + } + } else { + int cmp_ret = 0; + if (OB_FAIL(start_rowkey->compare(*end_rowkey, datum_utils, cmp_ret))) { + LOG_WARN("fail to compare rowkey", K(ret), KPC(end_rowkey), KPC(start_rowkey), K(datum_utils)); + } else if (cmp_ret > 0) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("start row key is bigger than right rowkey", K(ret), KPC(start_rowkey), KPC(end_rowkey)); + } else if (OB_FAIL(block_tree_.set_key_range(iter, + ObDatumRowkeyWrapper(end_rowkey, &datum_utils), + true, + ObDatumRowkeyWrapper(start_rowkey, &datum_utils), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); + } else { + cur_tree_value = end_tree_value; + } } } } } - if (OB_FAIL(ret)) { - begin_idx = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - end_idx = ObIMicroBlockReaderInfo::INVALID_ROW_INDEX; - } return ret; } -int ObBlockMetaTree::skip_to_next_valid_position(const ObDatumRowkey &rowkey, - const blocksstable::ObStorageDatumUtils &datum_utils, - int64_t ¤t_pos) +int ObBlockMetaTree::get_next_tree_value(blocksstable::DDLBtreeIterator &iter, + const int64_t step, + ObBlockMetaTreeValue *&tree_value) const { int ret = OB_SUCCESS; - CompareFunctor cmp(datum_utils, false); - const int64_t found_idx = std::lower_bound(sorted_rowkeys_.begin() + current_pos, sorted_rowkeys_.end(), rowkey, cmp) - sorted_rowkeys_.begin(); - if (found_idx == sorted_rowkeys_.count()) { - ret = OB_ITER_END; - } else { - current_pos = found_idx; - } - return ret; -} - -int ObBlockMetaTree::get_index_block_row_header(const int64_t idx, - const ObIndexBlockRowHeader *&idx_header, - const blocksstable::ObDatumRowkey *&endkey) -{ - int ret = OB_SUCCESS; - idx_header = nullptr; + tree_value = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(idx < 0 || idx >= sorted_rowkeys_.count())) { + } else if (OB_UNLIKELY(step <= 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(idx), K(sorted_rowkeys_.count())); + LOG_WARN("invalid argument", K(ret), K(step)); } else { - IndexItem &cur_item = sorted_rowkeys_.at(idx); - endkey = &cur_item.block_meta_->end_key_; - idx_header = &cur_item.header_; - } - return ret; -} - -int ObBlockMetaTree::get_macro_block_meta(const int64_t idx, - ObDataMacroBlockMeta ¯o_meta) -{ - int ret = OB_SUCCESS; - macro_meta.reset(); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(idx < 0 || idx >= sorted_rowkeys_.count())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret)); - } else { - const ObDataMacroBlockMeta &found_meta = *sorted_rowkeys_.at(idx).block_meta_; - if (OB_FAIL(macro_meta.assign(found_meta))) { - LOG_WARN("assign macro meta failed", K(ret), K(found_meta)); + ObBlockMetaTreeValue *tmp_tree_value = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < step; ++i) { + ObDatumRowkeyWrapper rowkey_wrapper; + if (OB_FAIL(iter.get_next(rowkey_wrapper, tmp_tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } + // just return ITER_END + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_ISNULL(tmp_tree_value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tree_value is null", K(ret), KP(tmp_tree_value)); + } else if (((uint64_t)(tmp_tree_value) & 7ULL) != 0) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("invalid btree value", K(ret), KP(tmp_tree_value)); + } else { + tree_value = tmp_tree_value; } } return ret; @@ -428,20 +714,145 @@ int ObBlockMetaTree::get_macro_block_meta(const int64_t idx, int ObBlockMetaTree::get_last_rowkey(const ObDatumRowkey *&last_rowkey) { int ret = OB_SUCCESS; - if (sorted_rowkeys_.count() > 0) { - last_rowkey = sorted_rowkeys_.at(sorted_rowkeys_.count() - 1).rowkey_; + const int64_t version = INT64_MAX; + last_rowkey = nullptr; + blocksstable::DDLBtreeIterator tmp_iter; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + //always forward + } else if (OB_FAIL(block_tree_.set_key_range(tmp_iter, + ObDatumRowkeyWrapper(&ObDatumRowkey::MIN_ROWKEY, datum_utils_), + false, + ObDatumRowkeyWrapper(&ObDatumRowkey::MAX_ROWKEY, datum_utils_), + false, + version))) { + LOG_WARN("locate range failed", K(ret)); } else { - last_rowkey = &ObDatumRowkey::MAX_ROWKEY; + bool find = false; + ObDatumRowkeyWrapper rowkey_wrapper; + ObBlockMetaTreeValue *tree_value = nullptr; + while (OB_SUCC(ret)) { + if (OB_FAIL(tmp_iter.get_next(rowkey_wrapper, tree_value))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next failed", K(ret)); + } else { + ret = OB_SUCCESS; + find = true; + break; + } + } + } + if (find) { + last_rowkey = rowkey_wrapper.rowkey_; + } else { + last_rowkey = &ObDatumRowkey::MAX_ROWKEY; + } } return ret; } +int64_t ObBlockMetaTree::get_memory_used() const +{ + return arena_.total(); +} + +/****************** ObDDLKV **********************/ + +ObDDLMemtable::ObDDLMemtable() + : is_inited_(false), allocator_("ddl_mem_sst", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), block_meta_tree_() +{ + +} + +ObDDLMemtable::~ObDDLMemtable() +{ + +} + +int ObDDLMemtable::init( + ObTablet &tablet, + const ObITable::TableKey &table_key, + const share::SCN &ddl_start_scn, + const uint64_t data_format_version) +{ + + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), KP(this)); + } else if (OB_UNLIKELY(!table_key.is_valid() + || !ddl_start_scn.is_valid_and_not_min() + || data_format_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(table_key), K(ddl_start_scn), K(data_format_version)); + } else { + ObTabletCreateSSTableParam sstable_param; + if (OB_FAIL(block_meta_tree_.init(tablet, table_key, ddl_start_scn, data_format_version))) { + LOG_WARN("init mem index sstable failed", K(ret), K(table_key), K(ddl_start_scn)); + } else if (OB_FAIL(init_sstable_param(tablet, table_key, ddl_start_scn, sstable_param))) { + LOG_WARN("init sstable param failed", K(ret)); + } else if (OB_FAIL(ObSSTable::init(sstable_param, &allocator_))) { + LOG_WARN("init sstable failed", K(ret)); + } else { + is_inited_ = true; + } + } + return ret; +} + +void ObDDLMemtable::reset() +{ + is_inited_ = false; + ObSSTable::reset(); + block_meta_tree_.destroy(); + allocator_.reset(); +} + +void ObDDLMemtable::set_scn_range( + const share::SCN &start_scn, + const share::SCN &end_scn) +{ + key_.scn_range_.start_scn_ = start_scn; + key_.scn_range_.end_scn_ = end_scn; +} + +int ObDDLMemtable::get_sorted_meta_array( + ObIArray &meta_array) +{ + int ret = OB_SUCCESS; + meta_array.reset(); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), KP(this)); + } else if (OB_FAIL(block_meta_tree_.get_sorted_meta_array(meta_array))) { + LOG_WARN("get sorted array failed", K(ret)); + } + return ret; +} + +int ObDDLMemtable::init_ddl_index_iterator(const blocksstable::ObStorageDatumUtils *datum_utils, + const bool is_reverse_scan, + blocksstable::ObDDLIndexBlockRowIterator *ddl_kv_index_iter) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(datum_utils) || OB_UNLIKELY(!datum_utils->is_valid()) || OB_ISNULL(ddl_kv_index_iter)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), KP(ddl_kv_index_iter), KPC(datum_utils)); + } else if (OB_FAIL(ddl_kv_index_iter->set_iter_param(datum_utils, is_reverse_scan, &block_meta_tree_))) { + LOG_WARN("fail to set ddl iter param", K(ret)); + } + return ret; +} ObDDLKV::ObDDLKV() - : is_inited_(false), ls_id_(), tablet_id_(), ddl_start_scn_(SCN::min_scn()), snapshot_version_(0), - lock_(), arena_allocator_("DDL_KV", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), is_freezed_(false), is_closed_(false), last_freezed_scn_(SCN::min_scn()), - min_scn_(SCN::max_scn()), max_scn_(SCN::min_scn()), freeze_scn_(SCN::max_scn()), pending_cnt_(0), data_format_version_(0) + : is_inited_(false), is_closed_(false), ref_cnt_(0), lock_(), arena_allocator_("DDL_CONTAINER", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), + ls_id_(), tablet_id_(), ddl_start_scn_(SCN::min_scn()), snapshot_version_(0), data_format_version_(0), + is_freezed_(false), last_freezed_scn_(SCN::min_scn()), + min_scn_(SCN::max_scn()), max_scn_(SCN::min_scn()), freeze_scn_(SCN::max_scn()), pending_cnt_(0), + macro_block_count_(0) { + } ObDDLKV::~ObDDLKV() @@ -455,61 +866,36 @@ void ObDDLKV::inc_ref() // FLOG_INFO("DDLKV inc_ref", K(ref_cnt_), KP(this), K(tablet_id_)); } -int64_t ObDDLKV::dec_ref() -{ - // FLOG_INFO("DDLKV dec_ref", K(ref_cnt_), KP(this), K(tablet_id_)); - return ATOMIC_SAF(&ref_cnt_, 1 /* just sub 1 */); -} -int ObDDLKV::init(ObTablet &tablet, +int ObDDLKV::init(const ObLSID &ls_id, + const ObTabletID &tablet_id, const SCN &ddl_start_scn, const int64_t snapshot_version, const SCN &last_freezed_scn, - const int64_t data_format_version) + const uint64_t data_format_version) { int ret = OB_SUCCESS; - const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; - LOG_WARN("ObDDLKV has been inited twice", K(ret), KP(this)); + LOG_WARN("init twice", K(ret), KP(this)); } else if (OB_UNLIKELY(!ls_id.is_valid() || !tablet_id.is_valid() || !ddl_start_scn.is_valid_and_not_min() || snapshot_version <= 0 || !last_freezed_scn.is_valid_and_not_min() - || data_format_version < 0)) { + || data_format_version <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_id), K(ddl_start_scn), K(snapshot_version), K(last_freezed_scn), K(data_format_version)); } else { - ObTabletDDLParam ddl_param; - ddl_param.tenant_id_ = MTL_ID(); - ddl_param.ls_id_ = ls_id; - ddl_param.table_key_.tablet_id_ = tablet_id; - ddl_param.table_key_.table_type_ = ObITable::TableType::MAJOR_SSTABLE; - ddl_param.table_key_.version_range_.base_version_ = 0; - ddl_param.table_key_.version_range_.snapshot_version_ = snapshot_version; - ddl_param.start_scn_ = ddl_start_scn; - ddl_param.snapshot_version_ = snapshot_version; - ddl_param.data_format_version_ = data_format_version; - ObTabletCreateSSTableParam sstable_param; - if (OB_FAIL(block_meta_tree_.init(tablet, ddl_param.table_key_, ddl_start_scn, data_format_version))) { - LOG_WARN("init mem index sstable failed", K(ret), K(ddl_param)); - } else if (OB_FAIL(init_sstable_param(tablet, ddl_param.table_key_, ddl_start_scn, sstable_param))) { - LOG_WARN("init sstable param failed", K(ret)); - } else if (OB_FAIL(ObSSTable::init(sstable_param, &arena_allocator_))) { - LOG_WARN("init sstable failed", K(ret)); - } else { - ls_id_ = ls_id; - tablet_id_ = tablet_id; - ddl_start_scn_ = ddl_start_scn; - snapshot_version_ = snapshot_version; - last_freezed_scn_ = last_freezed_scn; - data_format_version_ = data_format_version; - is_inited_ = true; - LOG_INFO("ddl kv init success", K(ls_id_), K(tablet_id_), K(ddl_start_scn_), K(snapshot_version_), K(last_freezed_scn_), K(data_format_version_), KP(this)); - } + ls_id_ = ls_id; + tablet_id_ = tablet_id; + ddl_start_scn_ = ddl_start_scn; + snapshot_version_ = snapshot_version; + data_format_version_ = data_format_version; + last_freezed_scn_ = last_freezed_scn; + is_inited_ = true; + LOG_INFO("ddl kv init success", K(ret), KP(this), K(*this)); } return ret; } @@ -518,27 +904,101 @@ void ObDDLKV::reset() { FLOG_INFO("ddl kv reset", KP(this), K(*this)); is_inited_ = false; - ObSSTable::reset(); + is_closed_ = false; ls_id_.reset(); tablet_id_.reset(); ddl_start_scn_ = SCN::min_scn(); snapshot_version_ = 0; + data_format_version_ = 0; + is_freezed_ = false; - is_closed_ = false; last_freezed_scn_ = SCN::min_scn(); min_scn_ = SCN::max_scn(); max_scn_ = SCN::min_scn(); freeze_scn_ = SCN::max_scn(); pending_cnt_ = 0; - data_format_version_ = 0; - block_meta_tree_.destroy(); + + for (int64_t i = 0; i < ddl_memtables_.count(); ++i) { + if (OB_NOT_NULL(ddl_memtables_.at(i))) { + ddl_memtables_.at(i)->reset(); + ddl_memtables_.at(i) = nullptr; + } + } + macro_block_count_ = 0; + ddl_memtables_.reset(); arena_allocator_.reset(); } -int ObDDLKV::set_macro_block(ObTablet &tablet, const ObDDLMacroBlock ¯o_block) +int ObDDLKV::create_ddl_memtable(ObTablet &tablet, const ObITable::TableKey &table_key, ObDDLMemtable *&ddl_memtable) { int ret = OB_SUCCESS; - const int64_t MAX_DDL_BLOCK_COUNT = 10L * 1024L * 1024L * 1024L / OB_SERVER_BLOCK_MGR.get_macro_block_size(); + ddl_memtable = nullptr; + void *buf = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!table_key.is_valid() || table_key.tablet_id_ != tablet_id_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(table_key), K(tablet_id_)); + } else if (OB_ISNULL(buf = arena_allocator_.alloc(sizeof(ObDDLMemtable)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(sizeof(ObDDLMemtable))); + } else { + ddl_memtable = new (buf) ObDDLMemtable; + if (OB_FAIL(ddl_memtable->init(tablet, table_key, ddl_start_scn_, data_format_version_))) { + LOG_WARN("init ddl memtable failed", K(ret), K(table_key)); + } else if (OB_FAIL(ddl_memtables_.push_back(ddl_memtable))) { + LOG_WARN("push back ddl memtable failed", K(ret)); + } + if (OB_FAIL(ret) && nullptr != ddl_memtable) { + ddl_memtable->~ObDDLMemtable(); + ddl_memtable = nullptr; + arena_allocator_.free(ddl_memtable); + } + } + return ret; +} + +int ObDDLKV::get_ddl_memtable(const int64_t cg_idx, ObDDLMemtable *&ddl_memtable) +{ + int ret = OB_SUCCESS; + ddl_memtable = nullptr; + bool have_found = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(cg_idx < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(cg_idx)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && !have_found && i < ddl_memtables_.count(); ++i) { + ObDDLMemtable *cur_ddl_memtable = ddl_memtables_.at(i); + if (OB_ISNULL(cur_ddl_memtable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current ddl memtable is null", K(ret), K(i), K(cur_ddl_memtable)); + } else if (cur_ddl_memtable->get_column_group_id() == cg_idx) { + ddl_memtable = cur_ddl_memtable; + have_found = true; + } + } + } + if (OB_SUCC(ret) && !have_found) { + ddl_memtable = nullptr; + ret = OB_ENTRY_NOT_EXIST; + } + return ret; +} + +int ObDDLKV::set_macro_block( + ObTablet &tablet, + const ObDDLMacroBlock ¯o_block, + const int64_t snapshot_version, + const uint64_t data_format_version, + const bool can_freeze) +{ + int ret = OB_SUCCESS; + const int64_t MAX_DDL_BLOCK_COUNT = 10 * 10L * 1024L * 1024L * 1024L / OB_SERVER_BLOCK_MGR.get_macro_block_size(); + const int64_t MEMORY_LIMIT = 50 * 1024 * 1024; // 50M int64_t freeze_block_count = MAX_DDL_BLOCK_COUNT; #ifdef ERRSIM if (0 != GCONF.errsim_max_ddl_block_count) { @@ -549,9 +1009,9 @@ int ObDDLKV::set_macro_block(ObTablet &tablet, const ObDDLMacroBlock ¯o_bloc if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ddl kv is not init", K(ret)); - } else if (OB_UNLIKELY(!macro_block.is_valid())) { + } else if (OB_UNLIKELY(!macro_block.is_valid() || data_format_version <= 0 || snapshot_version <= 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(macro_block)); + LOG_WARN("invalid argument", K(ret), K(macro_block), K(data_format_version), K(snapshot_version)); } else { const uint64_t tenant_id = MTL_ID(); ObUnitInfoGetter::ObTenantConfig unit; @@ -568,19 +1028,26 @@ int ObDDLKV::set_macro_block(ObTablet &tablet, const ObDDLMacroBlock ¯o_bloc } } } - if (OB_SUCC(ret) && get_macro_block_cnt() >= freeze_block_count) { - ObDDLKvMgrHandle ddl_kv_mgr_handle; + if (OB_SUCC(ret) && (get_macro_block_cnt() >= freeze_block_count || get_memory_used() >= MEMORY_LIMIT) && can_freeze) { + ObDDLTableMergeDagParam param; + param.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_DDL; + param.ls_id_ = ls_id_; + param.tablet_id_ = tablet_id_; + param.start_scn_ = ddl_start_scn_; + param.data_format_version_ = data_format_version; + param.snapshot_version_ = snapshot_version; int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(tablet.get_ddl_kv_mgr(ddl_kv_mgr_handle))) { - LOG_WARN("failed to get ddl kv mgr", K(ret)); - } else if (OB_TMP_FAIL(ddl_kv_mgr_handle.get_obj()->schedule_ddl_dump_task(tablet, ddl_start_scn_, SCN::min_scn()))) { + if (OB_TMP_FAIL(ObTabletDDLUtil::freeze_ddl_kv(param))) { + LOG_WARN("try to freeze ddl kv failed", K(tmp_ret), K(param)); + } else if (OB_TMP_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { LOG_WARN("try schedule ddl merge dag failed when ddl kv is full ", - K(tmp_ret), K(ls_id_), K(tablet_id_), K(get_macro_block_cnt())); + K(tmp_ret), K(param), K(get_macro_block_cnt())); } } if (OB_SUCC(ret)) { ObDataMacroBlockMeta *data_macro_meta = nullptr; TCWLockGuard guard(lock_); + // For incremental direct load, ddl_start_scn is set to min_scn(). if (macro_block.ddl_start_scn_ != ddl_start_scn_) { if (macro_block.ddl_start_scn_ > ddl_start_scn_) { ret = OB_EAGAIN; @@ -594,20 +1061,57 @@ int ObDDLKV::set_macro_block(ObTablet &tablet, const ObDDLMacroBlock ¯o_bloc } else if (macro_block.scn_ > freeze_scn_) { ret = OB_EAGAIN; LOG_INFO("this ddl kv is freezed, retry other ddl kv", K(ret), K(ls_id_), K(tablet_id_), K(macro_block), K(freeze_scn_)); - } else if (OB_FAIL(ObIndexBlockRebuilder::get_macro_meta(macro_block.buf_, macro_block.size_, macro_block.get_block_id(), arena_allocator_, data_macro_meta))) { - LOG_WARN("get macro meta failed", K(ret), K(macro_block)); - } else if (OB_FAIL(insert_block_meta_tree(macro_block.block_handle_, data_macro_meta))) { - LOG_WARN("insert macro block failed", K(ret), K(macro_block), KPC(data_macro_meta)); } else { - min_scn_ = SCN::min(min_scn_, macro_block.scn_); - max_scn_ = SCN::max(max_scn_, macro_block.scn_); - LOG_INFO("succeed to set macro block into ddl kv", K(macro_block), KPC(data_macro_meta)); + ObDDLMemtable *ddl_memtable = nullptr; + // 1. try find the ddl memtable + if (OB_FAIL(get_ddl_memtable(macro_block.table_key_.get_column_group_id(), ddl_memtable))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("get ddl memtable failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + // 2. if not found, create one + if (OB_SUCC(ret) && OB_ISNULL(ddl_memtable)) { + if (OB_FAIL(create_ddl_memtable(tablet, macro_block.table_key_, ddl_memtable))) { + LOG_WARN("create ddl memtable failed", K(ret), K(macro_block.table_key_)); + } + } + + // 3. set macro block into meta tree + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(ddl_memtable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl memtable is null", K(ret)); + } else if (OB_FAIL(ObIndexBlockRebuilder::get_macro_meta( + macro_block.buf_, macro_block.size_, macro_block.get_block_id(), arena_allocator_, data_macro_meta))) { + LOG_WARN("get macro meta failed", K(ret), K(macro_block)); + } else if (data_macro_meta->end_key_.get_datum_cnt() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid end key of data macro block meta", K(ret), K(data_macro_meta->end_key_)); + } else if (macro_block.table_key_.is_cg_sstable()) { // for normal cg, use row id as rowkey + if (!macro_block.is_column_group_info_valid() || !data_macro_meta->end_key_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid ddl macro block", K(ret), K(macro_block), K(data_macro_meta->end_key_)); + } else { + data_macro_meta->end_key_.datums_[0].set_int(macro_block.end_row_id_); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ddl_memtable->insert_block_meta_tree(macro_block.block_handle_, data_macro_meta))) { + LOG_WARN("insert block meta tree faield", K(ret)); + } else { + min_scn_ = SCN::min(min_scn_, macro_block.scn_); + max_scn_ = SCN::max(max_scn_, macro_block.scn_); + ++macro_block_count_; + LOG_INFO("succeed to set macro block into ddl kv", K(macro_block), K(macro_block_count_), KPC(data_macro_meta)); + } } } return ret; } -int ObDDLKV::insert_block_meta_tree(const ObDDLMacroHandle ¯o_handle, blocksstable::ObDataMacroBlockMeta *data_macro_meta) +int ObDDLMemtable::insert_block_meta_tree(const ObDDLMacroHandle ¯o_handle, blocksstable::ObDataMacroBlockMeta *data_macro_meta) { int ret = OB_SUCCESS; if (OB_FAIL(block_meta_tree_.insert_macro_block(macro_handle, &data_macro_meta->end_key_, data_macro_meta))) { @@ -636,7 +1140,7 @@ int ObDDLKV::freeze(const SCN &freeze_scn) LOG_WARN("ddl kv is not init", K(ret)); } else { TCWLockGuard guard(lock_); - if (is_freezed_) { + if (is_freezed()) { // do nothing } else { if (freeze_scn.is_valid_and_not_min()) { @@ -665,6 +1169,8 @@ int ObDDLKV::prepare_sstable(const bool need_check/*=true*/) } else if (!is_freezed()) { ret = OB_STATE_NOT_MATCH; LOG_WARN("ddl kv not freezed", K(ret), K(*this)); + } else if (ddl_memtables_.empty()) { + // do nothing } else if (need_check && OB_FAIL(wait_pending())) { if (OB_EAGAIN != ret) { LOG_WARN("wait pending failed", K(ret)); @@ -672,20 +1178,22 @@ int ObDDLKV::prepare_sstable(const bool need_check/*=true*/) } if (OB_SUCC(ret)) { TCWLockGuard guard(lock_); - if (OB_FAIL(block_meta_tree_.build_sorted_rowkeys())) { - LOG_WARN("build sorted keys failed", K(ret), K(block_meta_tree_)); - } else { - key_.scn_range_.start_scn_ = last_freezed_scn_; - key_.scn_range_.end_scn_ = freeze_scn_; + for (int64_t i = 0; OB_SUCC(ret) && i < ddl_memtables_.count(); ++i) { + ObDDLMemtable *ddl_memtable = ddl_memtables_.at(i); + if (OB_ISNULL(ddl_memtable)) { + ret = OB_INVALID_ERROR; + LOG_WARN("ddl memtable is null", K(ret)); + } else { + ddl_memtable->set_scn_range(last_freezed_scn_, freeze_scn_); + } } } return ret; } -int ObDDLKV::close(ObTablet &tablet) +int ObDDLKV::close() { int ret = OB_SUCCESS; - ObArray meta_array; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ddl kv is not init", K(ret)); @@ -694,33 +1202,9 @@ int ObDDLKV::close(ObTablet &tablet) LOG_INFO("ddl kv already closed", K(*this)); } else if (OB_FAIL(prepare_sstable())) { LOG_WARN("prepare sstable failed", K(ret)); - } else if (OB_FAIL(block_meta_tree_.get_sorted_meta_array(meta_array))) { - LOG_WARN("get sorted meta array failed", K(ret)); } else { - ObArenaAllocator allocator("DDLUpTabStore", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - ObSSTable sstable; - ObTabletDDLParam ddl_param; - ddl_param.tenant_id_ = MTL_ID(); - ddl_param.ls_id_ = ls_id_; - ddl_param.table_key_.tablet_id_ = tablet_id_; - ddl_param.table_key_.table_type_ = ObITable::TableType::DDL_DUMP_SSTABLE; - ddl_param.table_key_.scn_range_.start_scn_ = last_freezed_scn_; - ddl_param.table_key_.scn_range_.end_scn_ = freeze_scn_; - ddl_param.start_scn_ = ddl_start_scn_; - ddl_param.snapshot_version_ = snapshot_version_; - ddl_param.data_format_version_ = data_format_version_; - ObTableStoreIterator ddl_table_iter; // need to hold first_sstable until finish - ObSSTable *first_sstable = nullptr; - if (OB_FAIL(ObTabletDDLUtil::try_get_first_ddl_sstable(tablet, ddl_table_iter, first_sstable))) { - LOG_WARN("fail to get first sstable", K(ret), K(tablet)); - } else if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(tablet, ddl_param, meta_array, first_sstable, allocator, sstable))) { - LOG_WARN("create ddl sstable failed", K(ret), K(ddl_param), KP(first_sstable)); - } else if (OB_FAIL(ObTabletDDLUtil::update_ddl_table_store(tablet, ddl_param, allocator, sstable))) { - LOG_WARN("update ddl table store failed", K(ret), K(ddl_param), K(sstable)); - } else { - is_closed_ = true; - LOG_INFO("ddl kv closed success", K(*this)); - } + is_closed_ = true; // data not dumped, just means data is complete + LOG_INFO("ddl kv closed success", K(*this)); } return ret; } @@ -761,11 +1245,33 @@ int ObDDLKV::wait_pending() const bool pending_finished = SCN::plus(max_decided_scn, 1) >= freeze_scn_ && !is_pending(); if (!pending_finished) { ret = OB_EAGAIN; - //if (REACH_TIME_INTERVAL(1000L * 1000L)) { - LOG_INFO("wait pending not finish", K(ret), K(*this), K(max_decided_scn)); - //} + LOG_INFO("wait pending not finish", K(ret), K_(ls_id), K_(tablet_id), K_(freeze_scn), K_(last_freezed_scn), K_(min_scn), K_(max_scn), K(max_decided_scn)); } } } return ret; } + +int64_t ObDDLKV::dec_ref() +{ + int64_t tmp_cnt = ATOMIC_SAF(&ref_cnt_, 1 /* just sub 1 */); + if (0 == tmp_cnt) { + MTL(ObTenantMetaMemMgr *)->release_ddl_kv(this); + } else if (tmp_cnt < 0) { + int ret = OB_ERR_SYS; + LOG_ERROR("ref_cnt of ddl kv less than 0", KP(this)); + } + return tmp_cnt; +} + +int64_t ObDDLKV::get_memory_used() const +{ + int64_t total_used_memory = 0; + TCRLockGuard guard(lock_); + for (int64_t i = 0; i < ddl_memtables_.count(); ++i) { + if (nullptr != ddl_memtables_.at(i)) { + total_used_memory += ddl_memtables_.at(i)->get_memory_used(); + } + } + return total_used_memory; +} diff --git a/src/storage/ddl/ob_tablet_ddl_kv.h b/src/storage/ddl/ob_tablet_ddl_kv.h index 6b631ea39..c1e9401f0 100644 --- a/src/storage/ddl/ob_tablet_ddl_kv.h +++ b/src/storage/ddl/ob_tablet_ddl_kv.h @@ -20,6 +20,7 @@ #include "storage/tablet/ob_tablet.h" #include "storage/blocksstable/ob_block_sstable_struct.h" #include "storage/blocksstable/index_block/ob_index_block_builder.h" +#include "storage/blocksstable/index_block/ob_ddl_index_block_row_iterator.h" #include "storage/checkpoint/ob_freeze_checkpoint.h" #include "storage/memtable/mvcc/ob_keybtree.h" #include "storage/blocksstable/ob_logic_macro_id.h" @@ -40,47 +41,76 @@ class ObDataMacroBlockMeta; namespace storage { +class ObBlockMetaTreeValue final +{ +public: + ObBlockMetaTreeValue() : block_meta_(nullptr), rowkey_(nullptr), header_() {} + ObBlockMetaTreeValue(const blocksstable::ObDataMacroBlockMeta *block_meta, + const blocksstable::ObDatumRowkey *rowkey) + : block_meta_(block_meta), rowkey_(rowkey), header_(){} + ~ObBlockMetaTreeValue() + { + block_meta_ = nullptr; + rowkey_ = nullptr; + } + TO_STRING_KV(KPC_(block_meta), KPC_(rowkey), K_(header)); + +public: + const blocksstable::ObDataMacroBlockMeta *block_meta_; + const blocksstable::ObDatumRowkey *rowkey_; + blocksstable::ObIndexBlockRowHeader header_; +}; + class ObBlockMetaTree { - typedef keybtree::ObKeyBtree KeyBtree; - typedef keybtree::BtreeIterator BtreeIterator; - typedef keybtree::BtreeNodeAllocator BtreeNodeAllocator; - typedef keybtree::BtreeRawIterator BtreeRawIterator; + typedef keybtree::ObKeyBtree KeyBtree; + typedef keybtree::BtreeNodeAllocator BtreeNodeAllocator; + typedef keybtree::BtreeRawIterator BtreeRawIterator; public: ObBlockMetaTree(); virtual ~ObBlockMetaTree(); int init(ObTablet &tablet, const ObITable::TableKey &table_key, const share::SCN &ddl_start_scn, - const int64_t data_format_version); + const uint64_t data_format_version); void destroy(); + void destroy_tree_value(); int insert_macro_block(const ObDDLMacroHandle ¯o_handle, const blocksstable::ObDatumRowkey *rowkey, const blocksstable::ObDataMacroBlockMeta *meta); + int locate_key(const blocksstable::ObDatumRange &range, + const blocksstable::ObStorageDatumUtils &datum_utils, + blocksstable::DDLBtreeIterator &iter, + ObBlockMetaTreeValue *&cur_tree_value) const; int locate_range(const blocksstable::ObDatumRange &range, const blocksstable::ObStorageDatumUtils &datum_utils, const bool is_left_border, const bool is_right_border, - int64_t &begin_idx, - int64_t &end_idx); - int skip_to_next_valid_position(const blocksstable::ObDatumRowkey &rowkey, - const blocksstable::ObStorageDatumUtils &datum_utils, - int64_t ¤t_pos); - int get_index_block_row_header(const int64_t idx, - const blocksstable::ObIndexBlockRowHeader *&header, - const blocksstable::ObDatumRowkey *&endkey); - int get_macro_block_meta(const int64_t idx, - blocksstable::ObDataMacroBlockMeta ¯o_meta); + const bool is_reverse_scan, + blocksstable::DDLBtreeIterator &iter, + ObBlockMetaTreeValue *&cur_tree_value) const; + int get_next_tree_value(blocksstable::DDLBtreeIterator &iter, + const int64_t step, + ObBlockMetaTreeValue *&tree_value) const; int64_t get_macro_block_cnt() const { return macro_blocks_.count(); } int get_last_rowkey(const blocksstable::ObDatumRowkey *&last_rowkey); - int build_sorted_rowkeys(); - int get_sorted_meta_array(ObIArray &meta_array) const; + int get_sorted_meta_array(ObIArray &meta_array); int exist(const blocksstable::ObDatumRowkey *rowkey, bool &is_exist); const blocksstable::ObDataStoreDesc &get_data_desc() const { return data_desc_.get_desc(); } - const blocksstable::ObDatumRowkey *get_rowkey(const int64_t idx) const { return sorted_rowkeys_[idx].rowkey_; } - int64_t get_rowkey_count() const { return sorted_rowkeys_.count(); } + bool is_valid() const { return is_inited_; } + int64_t get_memory_used() const; + TO_STRING_KV(K(is_inited_), K(macro_blocks_.count()), K(arena_.total()), K(data_desc_)); + +private: + int lower_bound(const blocksstable::ObDatumRowkey *target_rowkey, + const blocksstable::ObStorageDatumUtils &datum_utils, + blocksstable::ObDatumRowkey *&rowkey, + ObBlockMetaTreeValue *&tree_value) const; + int upper_bound(const blocksstable::ObDatumRowkey *target_rowkey, + const blocksstable::ObStorageDatumUtils &datum_utils, + blocksstable::ObDatumRowkey *&rowkey, + ObBlockMetaTreeValue *&tree_value) const; - TO_STRING_KV(K(is_inited_), K(macro_blocks_.count()), K(arena_.total()), K(data_desc_), K(sorted_rowkeys_.count())); private: struct IndexItem final { @@ -96,13 +126,10 @@ private: }; struct CompareFunctor { - CompareFunctor(const blocksstable::ObStorageDatumUtils &datum_utils, - const bool need_compare_datum_cnt = true) - : datum_utils_(datum_utils), need_compare_datum_cnt_(need_compare_datum_cnt) {} + CompareFunctor(const blocksstable::ObStorageDatumUtils &datum_utils) : datum_utils_(datum_utils) {} bool operator ()(const IndexItem &item, const blocksstable::ObDatumRowkey &rowkey); bool operator ()(const blocksstable::ObDatumRowkey &rowkey, const IndexItem &item); const blocksstable::ObStorageDatumUtils &datum_utils_; - const bool need_compare_datum_cnt_; }; private: @@ -112,71 +139,119 @@ private: BtreeNodeAllocator tree_allocator_; KeyBtree block_tree_; blocksstable::ObWholeDataStoreDesc data_desc_; - ObArray sorted_rowkeys_; + blocksstable::ObStorageDatumUtils row_id_datum_utils_; + blocksstable::ObStorageDatumUtils *datum_utils_; }; +class ObDDLMemtable : public blocksstable::ObSSTable +{ +public: + ObDDLMemtable(); + virtual ~ObDDLMemtable(); + int init( + ObTablet &tablet, + const ObITable::TableKey &table_key, + const share::SCN &ddl_start_scn, + const uint64_t data_format_version); + void reset(); + int insert_block_meta_tree( + const ObDDLMacroHandle ¯o_handle, + blocksstable::ObDataMacroBlockMeta *data_macro_meta); + void set_scn_range( + const share::SCN &start_scn, + const share::SCN &end_scn); + int get_sorted_meta_array( + ObIArray &meta_array); + const ObBlockMetaTree *get_block_meta_tree() { return &block_meta_tree_; } + int init_ddl_index_iterator(const blocksstable::ObStorageDatumUtils *datum_utils, + const bool is_reverse_scan, + blocksstable::ObDDLIndexBlockRowIterator *ddl_kv_index_iter); + int64_t get_memory_used() const { return block_meta_tree_.get_memory_used(); } + INHERIT_TO_STRING_KV("ObSSTable", ObSSTable, K(is_inited_), K(block_meta_tree_)); +private: + int init_sstable_param( + ObTablet &tablet, + const ObITable::TableKey &table_key, + const share::SCN &ddl_start_scn, + ObTabletCreateSSTableParam &sstable_param); +private: + bool is_inited_; + ObArenaAllocator allocator_; + ObBlockMetaTree block_meta_tree_; +}; -class ObDDLKV : public blocksstable::ObSSTable +class ObDDLKV { public: ObDDLKV(); - virtual ~ObDDLKV(); - virtual void inc_ref() override; - virtual int64_t dec_ref() override; - virtual int64_t get_ref() const override { return ObITable::get_ref(); } - int init(ObTablet &tablet, + ~ObDDLKV(); + int init(const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, const share::SCN &ddl_start_scn, const int64_t snapshot_version, const share::SCN &last_freezed_scn, - const int64_t data_format_version); + const uint64_t data_format_version); void reset(); - int set_macro_block(ObTablet &tablet, const ObDDLMacroBlock ¯o_block); + int set_macro_block( + ObTablet &tablet, + const ObDDLMacroBlock ¯o_block, + const int64_t snapshot_version, + const uint64_t data_format_version, + const bool can_freeze); int freeze(const share::SCN &freeze_scn); bool is_freezed() const { return ATOMIC_LOAD(&is_freezed_); } - int close(ObTablet &tablet); + int close(); int prepare_sstable(const bool need_check = true); bool is_closed() const { return is_closed_; } share::SCN get_min_scn() const { return min_scn_; } share::SCN get_freeze_scn() const { return freeze_scn_; } share::SCN get_ddl_start_scn() const { return ddl_start_scn_; } share::SCN get_start_scn() const { return last_freezed_scn_; } - int64_t get_macro_block_cnt() const { return block_meta_tree_.get_macro_block_cnt(); } + share::SCN get_end_scn() const { return freeze_scn_; } + int64_t get_macro_block_cnt() const { return macro_block_count_; } + int create_ddl_memtable(ObTablet &tablet, const ObITable::TableKey &table_key, ObDDLMemtable *&ddl_memtable); + int get_ddl_memtable(const int64_t cg_idx, ObDDLMemtable *&ddl_memtable); + ObIArray &get_ddl_memtables() { return ddl_memtables_; } void inc_pending_cnt(); // used by ddl kv pending guard void dec_pending_cnt(); + void inc_ref(); + int64_t dec_ref(); + int64_t get_ref() { return ATOMIC_LOAD(&ref_cnt_); } + const common::ObTabletID &get_tablet_id() const { return tablet_id_; } + int64_t get_snapshot_version() const { return snapshot_version_; } + int64_t get_memory_used() const; + TO_STRING_KV(K_(is_inited), K_(is_closed), K_(ref_cnt), K_(ls_id), K_(tablet_id), + K_(ddl_start_scn), K_(snapshot_version), K_(data_format_version), + K_(is_freezed), K_(last_freezed_scn), K_(min_scn), K_(max_scn), K_(freeze_scn), K_(pending_cnt), + K_(macro_block_count), K_(ddl_memtables)); +private: bool is_pending() const { return ATOMIC_LOAD(&pending_cnt_) > 0; } int wait_pending(); - INHERIT_TO_STRING_KV("ObSSTable", ObSSTable, K_(is_inited), K_(ls_id), K_(tablet_id), K_(ddl_start_scn), K_(snapshot_version), - K_(is_freezed), K_(is_closed), - K_(last_freezed_scn), K_(min_scn), K_(max_scn), K_(freeze_scn), - K_(pending_cnt), K_(data_format_version), K_(ref_cnt), - K_(block_meta_tree)); -private: - int insert_block_meta_tree(const ObDDLMacroHandle ¯o_handle, - blocksstable::ObDataMacroBlockMeta *data_macro_meta); - int init_sstable_param(ObTablet &tablet, - const ObITable::TableKey &table_key, - const share::SCN &ddl_start_scn, - ObTabletCreateSSTableParam &sstable_param); private: static const int64_t TOTAL_LIMIT = 10 * 1024 * 1024 * 1024L; static const int64_t HOLD_LIMIT = 10 * 1024 * 1024 * 1024L; bool is_inited_; + bool is_closed_; + int64_t ref_cnt_; + common::TCRWLock lock_; // lock for block_meta_tree_ and freeze_log_ts_ + common::ObArenaAllocator arena_allocator_; share::ObLSID ls_id_; common::ObTabletID tablet_id_; share::SCN ddl_start_scn_; // the log ts of ddl start log int64_t snapshot_version_; // the snapshot version for major sstable which is completed by ddl - common::TCRWLock lock_; // lock for block_meta_tree_ and freeze_log_ts_ - common::ObArenaAllocator arena_allocator_; + uint64_t data_format_version_; + + // freeze related bool is_freezed_; - bool is_closed_; share::SCN last_freezed_scn_; // the freezed log ts of last ddl kv. the log ts range of this ddl kv is (last_freezed_log_ts_, freeze_log_ts_] share::SCN min_scn_; // the min log ts of macro blocks share::SCN max_scn_; // the max log ts of macro blocks share::SCN freeze_scn_; // ddl kv refuse data larger than freeze log ts, freeze_log_ts >= max_log_ts int64_t pending_cnt_; // the amount of kvs that are replaying - int64_t data_format_version_; - ObBlockMetaTree block_meta_tree_; + + int64_t macro_block_count_; + ObArray ddl_memtables_; }; diff --git a/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp b/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp index 924ebf753..e13d17a8c 100644 --- a/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp +++ b/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp @@ -18,6 +18,7 @@ #include "storage/ddl/ob_ddl_struct.h" #include "storage/ddl/ob_tablet_ddl_kv.h" #include "storage/ddl/ob_ddl_merge_task.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/blocksstable/index_block/ob_sstable_sec_meta_iterator.h" #include "storage/compaction/ob_schedule_dag_func.h" #include "storage/tx_storage/ob_ls_service.h" @@ -30,9 +31,10 @@ using namespace oceanbase::share; using namespace oceanbase::storage; ObTabletDDLKvMgr::ObTabletDDLKvMgr() - : is_inited_(false), ls_id_(), tablet_id_(), success_start_scn_(SCN::min_scn()), table_key_(), data_format_version_(0), - start_scn_(SCN::min_scn()), commit_scn_(SCN::min_scn()), execution_id_(-1), state_lock_(), - max_freeze_scn_(SCN::min_scn()), head_(0), tail_(0), lock_(), ref_cnt_(0) + : is_inited_(false), + ls_id_(), tablet_id_(), + max_freeze_scn_(SCN::min_scn()), + head_(0), tail_(0), lock_(), ref_cnt_(0) { } @@ -43,9 +45,6 @@ ObTabletDDLKvMgr::~ObTabletDDLKvMgr() void ObTabletDDLKvMgr::destroy() { - if (is_started()) { - LOG_INFO("start destroy ddl kv manager", K(ls_id_), K(tablet_id_), K(start_scn_), K(head_), K(tail_), K(lbt())); - } ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); ATOMIC_STORE(&ref_cnt_, 0); for (int64_t pos = head_; pos < tail_; ++pos) { @@ -59,13 +58,7 @@ void ObTabletDDLKvMgr::destroy() } ls_id_.reset(); tablet_id_.reset(); - table_key_.reset(); - data_format_version_ = 0; - start_scn_.set_min(); - commit_scn_.set_min(); max_freeze_scn_.set_min(); - execution_id_ = -1; - success_start_scn_.set_min(); is_inited_ = false; } @@ -79,6 +72,7 @@ int ObTabletDDLKvMgr::init(const share::ObLSID &ls_id, const common::ObTabletID ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ls_id), K(tablet_id)); } else { + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); ls_id_ = ls_id; tablet_id_ = tablet_id; is_inited_ = true; @@ -86,301 +80,15 @@ int ObTabletDDLKvMgr::init(const share::ObLSID &ls_id, const common::ObTabletID return ret; } -int ObTabletDDLKvMgr::ddl_start_nolock(const ObITable::TableKey &table_key, - const SCN &start_scn, - const int64_t data_format_version, - const int64_t execution_id, - const SCN &checkpoint_scn) +int ObTabletDDLKvMgr::set_max_freeze_scn(const share::SCN &checkpoint_scn) { int ret = OB_SUCCESS; - bool is_brand_new = false; - ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (OB_UNLIKELY(!table_key.is_valid() || !start_scn.is_valid_and_not_min() || execution_id < 0 || data_format_version < 0 - || (checkpoint_scn.is_valid_and_not_min() && checkpoint_scn < start_scn))) { + if (OB_UNLIKELY(!checkpoint_scn.is_valid_and_not_min())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(table_key), K(start_scn), K(execution_id), K(data_format_version), K(checkpoint_scn)); - } else if (table_key.get_tablet_id() != tablet_id_) { - ret = OB_ERR_SYS; - LOG_WARN("tablet id not same", K(ret), K(table_key), K(tablet_id_)); + LOG_WARN("invalid arg", K(ret), K(checkpoint_scn)); } else { - if (start_scn_.is_valid_and_not_min()) { - if (execution_id >= execution_id_ && start_scn >= start_scn_) { - LOG_INFO("execution id changed, need cleanup", K(ls_id_), K(tablet_id_), K(execution_id_), K(execution_id), K(start_scn_), K(start_scn)); - cleanup_unlock(); - is_brand_new = true; - } else { - if (!checkpoint_scn.is_valid_and_not_min()) { - // only return error code when not start from checkpoint. - ret = OB_TASK_EXPIRED; - } - LOG_INFO("ddl start ignored", K(ls_id_), K(tablet_id_), K(execution_id_), K(execution_id), K(start_scn_), K(start_scn)); - } - } else { - is_brand_new = true; - } - if (OB_SUCC(ret) && is_brand_new) { - table_key_ = table_key; - data_format_version_ = data_format_version; - execution_id_ = execution_id; - start_scn_.atomic_store(start_scn); - max_freeze_scn_ = SCN::max(start_scn, checkpoint_scn); - } - } - return ret; -} - -// ddl start from log -// cleanup ddl sstable -// ddl start from checkpoint -// keep ddl sstable table - -int ObTabletDDLKvMgr::ddl_start(ObLS &ls, - ObTablet &tablet, - const ObITable::TableKey &table_key, - const SCN &start_scn, - const int64_t data_format_version, - const int64_t execution_id, - const SCN &checkpoint_scn) -{ - int ret = OB_SUCCESS; - SCN saved_start_scn; - int64_t saved_snapshot_version = 0; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else { - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - if (OB_FAIL(ddl_start_nolock(table_key, start_scn, data_format_version, execution_id, checkpoint_scn))) { - LOG_WARN("failed to ddl start", K(ret)); - } else if (OB_FAIL(ls.get_ddl_log_handler()->add_tablet(tablet_id_))) { - LOG_WARN("add tablet failed", K(ret)); - } else { - // save variables under lock - saved_start_scn = start_scn_; - saved_snapshot_version = table_key_.get_snapshot_version(); - commit_scn_.atomic_store(get_commit_scn(tablet.get_tablet_meta())); - if (checkpoint_scn.is_valid_and_not_min()) { - if (tablet.get_tablet_meta().table_store_flag_.with_major_sstable() && tablet.get_tablet_meta().ddl_commit_scn_.is_valid_and_not_min()) { - success_start_scn_.atomic_store(tablet.get_tablet_meta().ddl_start_scn_); - } - } - } - } - if (OB_SUCC(ret) && !checkpoint_scn.is_valid_and_not_min()) { - // remove ddl sstable if exists and flush ddl start log ts and snapshot version into tablet meta - if (OB_FAIL(update_tablet(tablet, saved_start_scn, saved_snapshot_version, data_format_version, execution_id, saved_start_scn))) { - LOG_WARN("clean up ddl sstable failed", K(ret), K(ls_id_), K(tablet_id_)); - } - } - FLOG_INFO("start ddl kv mgr finished", K(ret), K(start_scn), K(execution_id), K(checkpoint_scn), K(*this)); - return ret; -} - -int ObTabletDDLKvMgr::ddl_commit(ObTablet &tablet, const SCN &start_scn, const SCN &commit_scn) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (!is_started()) { - ret = OB_STATE_NOT_MATCH; - LOG_WARN("ddl not started", K(ret)); - } else if (start_scn < get_start_scn()) { - ret = OB_TASK_EXPIRED; - LOG_INFO("skip ddl commit log", K(start_scn), K(*this)); - } else if (OB_FAIL(set_commit_scn(tablet.get_tablet_meta(), commit_scn))) { - LOG_WARN("failed to set commit scn", K(ret)); - } else if (OB_FAIL(freeze_ddl_kv(tablet, commit_scn))) { - LOG_WARN("freeze ddl kv failed", K(ret), K(commit_scn)); - } else { - ret = OB_EAGAIN; - while (OB_EAGAIN == ret) { - if (OB_FAIL(update_ddl_major_sstable(tablet))) { - LOG_WARN("update ddl major sstable failed", K(ret)); - } - if (OB_EAGAIN == ret) { - usleep(1000L); - } - } - - ObDDLTableMergeDagParam param; - param.ls_id_ = ls_id_; - param.tablet_id_ = tablet_id_; - param.rec_scn_ = commit_scn; - param.is_commit_ = true; - param.start_scn_ = start_scn; - param.compat_mode_ = tablet.get_tablet_meta().compat_mode_; - const int64_t start_ts = ObTimeUtility::fast_current_time(); - if (OB_FAIL(ret)) { - } else if (OB_FAIL(tablet.get_ddl_kv_mgr(param.ddl_kv_mgr_handle_))) { - LOG_WARN("failed to get ddl kv mgr", K(ret)); - } else if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { - if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { - LOG_WARN("schedule ddl merge dag failed", K(ret), K(param)); - } else { - ret = OB_SUCCESS; // the backgroud scheduler will reschedule again - LOG_INFO("schedule ddl merge task need retry", - K(start_scn), K(commit_scn), K(*this), - "wait_elpased_s", (ObTimeUtility::fast_current_time() - start_ts) / 1000000L); - } - } else { - LOG_INFO("schedule ddl commit task success", K(start_scn), K(commit_scn), K(*this), "ddl_event_info", ObDDLEventInfo()); - } - } - return ret; -} - -int ObTabletDDLKvMgr::schedule_ddl_dump_task(ObTablet &tablet, const SCN &start_scn, const SCN &rec_scn) -{ - int ret = OB_SUCCESS; - ObDDLTableMergeDagParam param; - param.ls_id_ = ls_id_; - param.tablet_id_ = tablet_id_; - param.rec_scn_ = rec_scn; - param.is_commit_ = false; - param.start_scn_ = start_scn; - param.compat_mode_ = tablet.get_tablet_meta().compat_mode_; - LOG_INFO("schedule ddl dump task", K(param), "ddl_event_info", ObDDLEventInfo()); - if (OB_UNLIKELY(tablet.get_tablet_meta().tablet_id_ != tablet_id_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("tablet id mismatched", K(ret), K(tablet), KPC(this)); - } else if (OB_FAIL(tablet.get_ddl_kv_mgr(param.ddl_kv_mgr_handle_))) { - LOG_WARN("failed to get ddl kv mgr", K(ret)); - } else if (OB_FAIL(freeze_ddl_kv(tablet))) { - LOG_WARN("ddl kv manager try freeze failed", K(ret), K(param)); - } else if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { - if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { - LOG_WARN("schedule ddl merge dag failed", K(ret), K(param)); - } - } - return ret; -} - -int ObTabletDDLKvMgr::schedule_ddl_merge_task(ObTablet &tablet, const SCN &start_scn, const SCN &commit_scn) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (is_commit_success()) { - FLOG_INFO("ddl commit already succeed", K(start_scn), K(commit_scn), K(*this)); - } else if (start_scn < get_start_scn()) { - ret = OB_TASK_EXPIRED; - LOG_INFO("skip ddl commit log", K(start_scn), K(commit_scn), K(*this)); - } else { - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObDDLTableMergeDagParam param; - param.ls_id_ = ls_id_; - param.tablet_id_ = tablet_id_; - param.rec_scn_ = commit_scn; - param.is_commit_ = true; - param.start_scn_ = start_scn; - param.compat_mode_ = tablet.get_tablet_meta().compat_mode_; - // check ls/tablet state by get_ls/ddl_get_tablet, and retry submit dag in case of the previous dag failed - if (OB_FAIL(tablet.get_ddl_kv_mgr(param.ddl_kv_mgr_handle_))) { - LOG_WARN("failed to get ddl kv", K(ret), K(param)); - } else if (OB_FAIL(freeze_ddl_kv(tablet))) { - LOG_WARN("ddl kv manager try freeze failed", K(ret), K(param)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(param.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(param)); - } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, - param.tablet_id_, - tablet_handle, - ObMDSGetTabletMode::READ_ALL_COMMITED))) { - LOG_WARN("failed to get tablet", K(ret), K(param)); - } else if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { - if (OB_SIZE_OVERFLOW == ret || OB_EAGAIN == ret) { - ret = OB_EAGAIN; - } else { - LOG_WARN("schedule ddl merge dag failed", K(ret), K(param)); - } - } else { - ret = OB_EAGAIN; // until major sstable is ready -#ifdef ERRSIM - if (GCONF.errsim_ddl_major_delay_time.get() > 0) { - ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - if (commit_scn_.is_valid_and_not_min()) { - ret = OB_SUCCESS; - FLOG_INFO("assume ddl success for delay schedule ddl merge task", K(ret), KPC(this)); - } - } -#endif - } - } - return ret; -} - -int ObTabletDDLKvMgr::wait_ddl_merge_success(ObTablet &tablet, const SCN &start_scn, const SCN &commit_scn) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (OB_UNLIKELY(!start_scn.is_valid_and_not_min() || !commit_scn.is_valid_and_not_min())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(start_scn), K(commit_scn)); - } else if (!is_started()) { - ret = OB_STATE_NOT_MATCH; - LOG_WARN("ddl not started", K(ret)); - } else if (start_scn > get_start_scn()) { - ret = OB_ERR_SYS; - LOG_WARN("start log ts not match", K(ret), K(start_scn), K(start_scn_), K(ls_id_), K(tablet_id_)); - } else { - const int64_t wait_start_ts = ObTimeUtility::fast_current_time(); - while (OB_SUCC(ret)) { - if (OB_FAIL(THIS_WORKER.check_status())) { - LOG_WARN("check status failed", K(ret)); - } else if (OB_FAIL(schedule_ddl_merge_task(tablet, start_scn, commit_scn))) { - if (OB_EAGAIN == ret) { -#ifdef ERRSIM - ob_usleep(1000L * 1000L); // 1s -#else - ob_usleep(100L); // 100us. -#endif - ret = OB_SUCCESS; // retry - } else { - LOG_WARN("commit ddl log failed", K(ret), K(start_scn), K(commit_scn), K(ls_id_), K(tablet_id_)); - } - } else { - break; - } - if (REACH_TIME_INTERVAL(10L * 1000L * 1000L)) { - LOG_INFO("wait build ddl sstable", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_), K(commit_scn), K(max_freeze_scn_), - "wait_elpased_s", (ObTimeUtility::fast_current_time() - wait_start_ts) / 1000000L); - } - } - } - return ret; -} - -int ObTabletDDLKvMgr::get_ddl_major_merge_param(ObTablet &tablet, ObDDLTableMergeDagParam ¶m) -{ - int ret = OB_SUCCESS; - uint32_t lock_tid = 0; - if (OB_FAIL(tablet.get_ddl_kv_mgr(param.ddl_kv_mgr_handle_))) { - LOG_WARN("failed to get ddl kv mgr", K(ret)); - } else if (OB_FAIL(rdlock(TRY_LOCK_TIMEOUT, lock_tid))) { - LOG_WARN("failed to rdlock", K(ret), KPC(this)); - } else if (can_schedule_major_compaction_nolock(tablet.get_tablet_meta()) -#ifdef ERRSIM - && ObTimeUtility::current_time() - get_commit_scn(tablet.get_tablet_meta()).convert_to_ts() > GCONF.errsim_ddl_major_delay_time -#endif - ) { - param.ls_id_ = ls_id_; - param.tablet_id_ = tablet_id_; - param.rec_scn_ = get_commit_scn(tablet.get_tablet_meta()); - param.is_commit_ = true; - param.start_scn_ = start_scn_; - param.compat_mode_ = tablet.get_tablet_meta().compat_mode_; - } else { - ret = OB_EAGAIN; - } - if (0 != lock_tid) { - unlock(lock_tid); + ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); + max_freeze_scn_ = checkpoint_scn; } return ret; } @@ -390,12 +98,32 @@ int ObTabletDDLKvMgr::get_rec_scn(SCN &rec_scn) int ret = OB_SUCCESS; ObLSHandle ls_handle; ObTabletHandle tablet_handle; - const bool is_commit_succ = is_commit_success(); + ObTabletFullDirectLoadMgr *tablet_mgr = nullptr; + ObTabletDirectLoadMgrHandle direct_load_mgr_hdl; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + bool is_major_sstable_exist = false; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys", K(ret), K(MTL_ID())); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + ls_id_, + tablet_id_, + true/* is_full_direct_load */, + direct_load_mgr_hdl, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + ret = OB_SUCCESS; + } else { + LOG_WARN("get tablet mgr failed", K(ret), K(tablet_id_)); + } + } else if (OB_ISNULL(tablet_mgr = direct_load_mgr_hdl.get_full_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(ls_id_), K(tablet_id_)); } - if (OB_SUCC(ret) && !is_commit_succ) { + if (OB_SUCC(ret) && nullptr != tablet_mgr) { if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, @@ -407,11 +135,11 @@ int ObTabletDDLKvMgr::get_rec_scn(SCN &rec_scn) // rec scn of ddl start log if (OB_SUCC(ret)) { + const share::SCN start_scn_in_mem = tablet_mgr->get_start_scn(); const ObTabletMeta &tablet_meta = tablet_handle.get_obj()->get_tablet_meta(); - const SCN start_scn = get_start_scn(); - if (start_scn.is_valid_and_not_min() && start_scn > tablet_meta.ddl_start_scn_) { + if (start_scn_in_mem.is_valid_and_not_min() && start_scn_in_mem > tablet_meta.ddl_start_scn_) { // has a latest start log and not flushed to tablet meta, keep it - rec_scn = SCN::min(rec_scn, start_scn); + rec_scn = SCN::min(rec_scn, start_scn_in_mem); } } @@ -421,7 +149,7 @@ int ObTabletDDLKvMgr::get_rec_scn(SCN &rec_scn) if (tablet_meta.ddl_commit_scn_.is_valid_and_not_min()) { // has commit log and already dumped to tablet meta, skip } else { - const SCN commit_scn = get_commit_scn(tablet_meta); + const SCN commit_scn = tablet_mgr->get_commit_scn(tablet_meta); if (commit_scn.is_valid_and_not_min()) { // has commit log and not yet dumped to tablet meta rec_scn = SCN::min(rec_scn, commit_scn); @@ -449,93 +177,6 @@ int ObTabletDDLKvMgr::get_rec_scn(SCN &rec_scn) return ret; } -int ObTabletDDLKvMgr::set_commit_scn(const ObTabletMeta &tablet_meta, const SCN &commit_scn) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (OB_UNLIKELY(commit_scn <= SCN::min_scn())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(commit_scn)); - } else { - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - const SCN old_commit_scn = get_commit_scn(tablet_meta); - if (old_commit_scn.is_valid_and_not_min() && old_commit_scn != commit_scn) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("already committed by others", K(ret), K(commit_scn), K(*this)); - } else { - commit_scn_.atomic_store(commit_scn); - } - } - return ret; -} - -SCN ObTabletDDLKvMgr::get_commit_scn(const ObTabletMeta &tablet_meta) -{ - SCN mgr_commit_scn = commit_scn_.atomic_load(); - SCN commit_scn = SCN::min_scn(); - if (tablet_meta.ddl_commit_scn_.is_valid_and_not_min() || mgr_commit_scn.is_valid_and_not_min()) { - if (tablet_meta.ddl_commit_scn_.is_valid_and_not_min()) { - commit_scn = tablet_meta.ddl_commit_scn_; - } else { - commit_scn = mgr_commit_scn; - } - } else { - commit_scn = SCN::min_scn(); - } - return commit_scn; -} - -int ObTabletDDLKvMgr::set_commit_success(const SCN &start_scn) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (OB_UNLIKELY(start_scn <= SCN::min_scn())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(start_scn)); - } else { - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - if (start_scn < start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_WARN("ddl task expired", K(ret), K(start_scn), K(*this)); - } else if (OB_UNLIKELY(start_scn > start_scn_)) { - if (start_scn_.is_valid_and_not_min()) { - ret = OB_ERR_SYS; - LOG_WARN("sucess start log ts too large", K(ret), K(start_scn), K(*this)); - } else { - ret = OB_EAGAIN; - if (REACH_TIME_INTERVAL(1000L * 1000L * 60L)) { - LOG_INFO("ddl start scn is invalid, maybe migration has offlined the logstream", K(*this)); - } - } - } else { - success_start_scn_.atomic_store(start_scn); - } - } - return ret; -} - -bool ObTabletDDLKvMgr::is_commit_success() -{ - const SCN success_start_scn = success_start_scn_.atomic_load(); - const SCN start_scn = start_scn_.atomic_load(); - return success_start_scn > SCN::min_scn() && success_start_scn == start_scn; -} - -void ObTabletDDLKvMgr::reset_commit_success() -{ - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - success_start_scn_.atomic_store(SCN::min_scn()); -} - -bool ObTabletDDLKvMgr::can_schedule_major_compaction_nolock(const ObTabletMeta &tablet_meta) -{ - return get_commit_scn(tablet_meta).is_valid_and_not_min() && !is_commit_success(); -} - int ObTabletDDLKvMgr::cleanup() { int ret = OB_SUCCESS; @@ -543,7 +184,6 @@ int ObTabletDDLKvMgr::cleanup() ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else { - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); cleanup_unlock(); } @@ -562,18 +202,7 @@ void ObTabletDDLKvMgr::cleanup_unlock() for (int64_t i = 0; i < MAX_DDL_KV_CNT_IN_STORAGE; ++i) { ddl_kv_handles_[i].reset(); } - table_key_.reset(); - data_format_version_ = 0; - start_scn_.atomic_store(SCN::min_scn()); - commit_scn_.atomic_store(SCN::min_scn()); max_freeze_scn_.set_min(); - execution_id_ = -1; - success_start_scn_.atomic_store(SCN::min_scn()); -} - -bool ObTabletDDLKvMgr::is_execution_id_older(const int64_t execution_id) -{ - return execution_id < execution_id_; } int ObTabletDDLKvMgr::online() @@ -588,114 +217,10 @@ int ObTabletDDLKvMgr::online() LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, tablet_handle, - 0, - ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { + ObTabletCommon::DEFAULT_GET_TABLET_NO_WAIT))) { LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); } else if (OB_FAIL(cleanup())) { LOG_WARN("failed to cleanup ddl kv mgr", K(ret), KPC(tablet_handle.get_obj())); - } else if (!tablet_handle.get_obj()->get_tablet_meta().ddl_start_scn_.is_valid_and_not_min()) { - LOG_DEBUG("no need to start ddl kv manager", K(ret), "tablet_meta", tablet_handle.get_obj()->get_tablet_meta()); - } else { - const ObTabletMeta &tablet_meta = tablet_handle.get_obj()->get_tablet_meta(); - ObITable::TableKey table_key; - table_key.table_type_ = ObITable::TableType::MAJOR_SSTABLE; - table_key.tablet_id_ = tablet_meta.tablet_id_; - table_key.version_range_.base_version_ = 0; - table_key.version_range_.snapshot_version_ = tablet_meta.ddl_snapshot_version_; - const SCN &start_scn = tablet_meta.ddl_start_scn_; - if (OB_FAIL(ddl_start(*ls_handle.get_ls(), - *tablet_handle.get_obj(), - table_key, - start_scn, - tablet_meta.ddl_data_format_version_, - tablet_meta.ddl_execution_id_, - tablet_meta.ddl_checkpoint_scn_))) { - if (OB_TASK_EXPIRED == ret) { - ret = OB_SUCCESS; - } else { - LOG_WARN("start ddl kv manager failed", K(ret), K(tablet_meta)); - } - } - } - return ret; -} - -int ObTabletDDLKvMgr::register_to_tablet(const SCN &ddl_start_scn, ObDDLKvMgrHandle &kv_mgr_handle) -{ - int ret = OB_SUCCESS; - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(!ddl_start_scn.is_valid_and_not_min() || kv_mgr_handle.get_obj() != this)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ddl_start_scn), KP(kv_mgr_handle.get_obj()), KP(this)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, - tablet_handle, - 0, - ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { - LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); - } else { - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - if (ddl_start_scn < start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_INFO("ddl task expired", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_), K(ddl_start_scn)); - } else if (ddl_start_scn > start_scn_) { - if (SCN::min_scn() == start_scn_) { - // maybe ls offline - ret = OB_EAGAIN; - } else { - ret = OB_ERR_SYS; - } - LOG_WARN("ddl kv mgr register before start", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_), K(ddl_start_scn)); - } else { - if (OB_FAIL(tablet_handle.get_obj()->set_ddl_kv_mgr(kv_mgr_handle))) { - LOG_WARN("set ddl kv mgr into tablet failed", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_)); - } - } - } - return ret; -} - -int ObTabletDDLKvMgr::unregister_from_tablet(const SCN &ddl_start_scn, ObDDLKvMgrHandle &kv_mgr_handle) -{ - int ret = OB_SUCCESS; - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(!ddl_start_scn.is_valid_and_not_min() || kv_mgr_handle.get_obj() != this)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ddl_start_scn), KP(kv_mgr_handle.get_obj()), KP(this)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, - tablet_handle, - 0, - ObMDSGetTabletMode::READ_WITHOUT_CHECK))) { - LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); - } else { - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - if (ddl_start_scn < start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_INFO("ddl task expired", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_), K(ddl_start_scn)); - } else if (ddl_start_scn > start_scn_) { - if (SCN::min_scn() == start_scn_) { - // maybe ls offline - ret = OB_EAGAIN; - } else { - ret = OB_ERR_SYS; - } - LOG_WARN("ddl kv mgr register before start", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_), K(ddl_start_scn)); - } else { - if (OB_FAIL(tablet_handle.get_obj()->remove_ddl_kv_mgr(kv_mgr_handle))) { - LOG_WARN("remove ddl kv mgr from tablet failed", K(ret), K(ls_id_), K(tablet_id_), K(start_scn_)); - } - } } return ret; } @@ -704,7 +229,7 @@ int ObTabletDDLKvMgr::rdlock(const int64_t timeout_us, uint32_t &tid) { int ret = OB_SUCCESS; const int64_t abs_timeout_us = timeout_us + ObTimeUtility::current_time(); - if (OB_SUCC(state_lock_.rdlock(ObLatchIds::TABLET_DDL_KV_MGR_LOCK, abs_timeout_us))) { + if (OB_SUCC(lock_.rdlock(ObLatchIds::TABLET_DDL_KV_MGR_LOCK, abs_timeout_us))) { tid = static_cast(GETTID()); } if (OB_TIMEOUT == ret) { @@ -717,7 +242,7 @@ int ObTabletDDLKvMgr::wrlock(const int64_t timeout_us, uint32_t &tid) { int ret = OB_SUCCESS; const int64_t abs_timeout_us = timeout_us + ObTimeUtility::current_time(); - if (OB_SUCC(state_lock_.wrlock(ObLatchIds::TABLET_DDL_KV_MGR_LOCK, abs_timeout_us))) { + if (OB_SUCC(lock_.wrlock(ObLatchIds::TABLET_DDL_KV_MGR_LOCK, abs_timeout_us))) { tid = static_cast(GETTID()); } if (OB_TIMEOUT == ret) { @@ -728,132 +253,12 @@ int ObTabletDDLKvMgr::wrlock(const int64_t timeout_us, uint32_t &tid) void ObTabletDDLKvMgr::unlock(const uint32_t tid) { - if (OB_SUCCESS != state_lock_.unlock(&tid)) { + if (OB_SUCCESS != lock_.unlock(&tid)) { ob_abort(); } } -int ObTabletDDLKvMgr::update_tablet(ObTablet &tablet, - const SCN &start_scn, - const int64_t snapshot_version, - const int64_t data_format_version, - const int64_t execution_id, - const SCN &ddl_checkpoint_scn) -{ - int ret = OB_SUCCESS; - ObLSHandle ls_handle; - ObArenaAllocator tmp_arena("DDLUpdateTblTmp"); - ObStorageSchema *storage_schema = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_UNLIKELY(!start_scn.is_valid_and_not_min() || snapshot_version <= 0 || !ddl_checkpoint_scn.is_valid_and_not_min())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(start_scn), K(snapshot_version), K(ddl_checkpoint_scn)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(tablet.load_storage_schema(tmp_arena, storage_schema))) { - LOG_WARN("failed to load storage schema", K(ret), K(tablet)); - } else { - ObSSTable sstable; - const int64_t rebuild_seq = ls_handle.get_ls()->get_rebuild_seq(); - ObTabletHandle new_tablet_handle; - ObUpdateTableStoreParam param(tablet.get_snapshot_version(), - ObVersionRange::MIN_VERSION, // multi_version_start - storage_schema, - rebuild_seq); - param.ddl_info_.keep_old_ddl_sstable_ = false; - param.ddl_info_.ddl_start_scn_ = start_scn; - param.ddl_info_.ddl_snapshot_version_ = snapshot_version; - param.ddl_info_.ddl_checkpoint_scn_ = ddl_checkpoint_scn; - param.ddl_info_.ddl_execution_id_ = execution_id; - param.ddl_info_.data_format_version_ = data_format_version; - if (OB_FAIL(create_empty_ddl_sstable(tablet, tmp_arena, sstable))) { - LOG_WARN("create empty ddl sstable failed", K(ret)); - } else if (FALSE_IT(param.sstable_ = &sstable)) { - } else if (OB_FAIL(ls_handle.get_ls()->update_tablet_table_store(tablet_id_, param, new_tablet_handle))) { - LOG_WARN("failed to update tablet table store", K(ret), K(ls_id_), K(tablet_id_), K(param)); - } else { - LOG_INFO("update tablet success", K(ls_id_), K(tablet_id_), K(param), K(start_scn), K(snapshot_version), K(ddl_checkpoint_scn)); - } - } - ObTabletObjLoadHelper::free(tmp_arena, storage_schema); - return ret; -} - -int ObTabletDDLKvMgr::create_empty_ddl_sstable(ObTablet &tablet, common::ObArenaAllocator &allocator, blocksstable::ObSSTable &sstable) -{ - int ret = OB_SUCCESS; - ObTabletDDLParam ddl_param; - if (OB_FAIL(get_ddl_param(ddl_param))) { - LOG_WARN("get ddl param failed", K(ret)); - } else { - ddl_param.table_key_.table_type_ = ObITable::DDL_DUMP_SSTABLE; - ddl_param.table_key_.scn_range_.start_scn_ = SCN::scn_dec(start_scn_); - ddl_param.table_key_.scn_range_.end_scn_ = start_scn_; - ObArray empty_meta_array; - if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(tablet, ddl_param, empty_meta_array, nullptr/*first_ddl_sstable*/, allocator, sstable))) { - LOG_WARN("create empty ddl sstable failed", K(ret)); - } - } - return ret; -} - -int ObTabletDDLKvMgr::update_ddl_major_sstable(ObTablet &tablet) -{ - int ret = OB_SUCCESS; - ObLSHandle ls_handle; - ObTabletHandle tablet_handle; - ObArenaAllocator allocator; - ObStorageSchema *storage_schema = nullptr; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(tablet.load_storage_schema(allocator, storage_schema))) { - LOG_WARN("load storage schema failed", K(ret), K(ls_id_), K(tablet_id_)); - } else { - ObTabletHandle new_tablet_handle; - ObUpdateTableStoreParam param(tablet.get_snapshot_version(), - ObVersionRange::MIN_VERSION, // multi_version_start - storage_schema, - ls_handle.get_ls()->get_rebuild_seq()); - param.ddl_info_.keep_old_ddl_sstable_ = true; - param.ddl_info_.ddl_commit_scn_ = get_commit_scn(tablet.get_tablet_meta()); - if (OB_FAIL(ls_handle.get_ls()->update_tablet_table_store(tablet_id_, param, new_tablet_handle))) { - LOG_WARN("failed to update tablet table store", K(ret), K(ls_id_), K(tablet_id_), K(param)); - } - } - ObTabletObjLoadHelper::free(allocator, storage_schema); - return ret; -} - -int ObTabletDDLKvMgr::get_ddl_param(ObTabletDDLParam &ddl_param) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret), K(is_inited_)); - } else if (!is_started()) { - ret = OB_STATE_NOT_MATCH; - LOG_WARN("ddl not started", K(ret)); - } else { - ObLatchRGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); - ddl_param.tenant_id_ = MTL_ID(); - ddl_param.ls_id_ = ls_id_; - ddl_param.table_key_ = table_key_; - ddl_param.start_scn_ = start_scn_; - ddl_param.commit_scn_ = commit_scn_; - ddl_param.snapshot_version_ = table_key_.get_snapshot_version(); - ddl_param.data_format_version_ = data_format_version_; - } - - return ret; -} - -int ObTabletDDLKvMgr::get_freezed_ddl_kv(const SCN &freeze_scn, ObTableHandleV2 &kv_handle) +int ObTabletDDLKvMgr::get_freezed_ddl_kv(const SCN &freeze_scn, ObDDLKVHandle &kv_handle) { int ret = OB_SUCCESS; kv_handle.reset(); @@ -865,8 +270,8 @@ int ObTabletDDLKvMgr::get_freezed_ddl_kv(const SCN &freeze_scn, ObTableHandleV2 ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); for (int64_t i = head_; OB_SUCC(ret) && !found && i < tail_; ++i) { const int64_t idx = get_idx(i); - ObTableHandleV2 &cur_kv_handle = ddl_kv_handles_[idx]; - ObDDLKV *cur_kv = static_cast(cur_kv_handle.get_table()); + ObDDLKVHandle &cur_kv_handle = ddl_kv_handles_[idx]; + ObDDLKV *cur_kv = cur_kv_handle.get_obj(); if (OB_ISNULL(cur_kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret), K(ls_id_), K(tablet_id_), KP(cur_kv), K(i), K(head_), K(tail_)); @@ -892,6 +297,12 @@ int64_t ObTabletDDLKvMgr::get_count() return ddl_kv_count; } +bool ObTabletDDLKvMgr::can_freeze() +{ + int64_t ddl_kv_count = get_count(); + return ddl_kv_count < MAX_DDL_KV_CNT_IN_STORAGE; +} + int64_t ObTabletDDLKvMgr::get_count_nolock() const { return tail_ - head_; @@ -902,15 +313,15 @@ int64_t ObTabletDDLKvMgr::get_idx(const int64_t pos) const return pos & (MAX_DDL_KV_CNT_IN_STORAGE - 1); } -int ObTabletDDLKvMgr::get_active_ddl_kv_impl(ObTableHandleV2 &kv_handle) +int ObTabletDDLKvMgr::get_active_ddl_kv_impl(ObDDLKVHandle &kv_handle) { int ret = OB_SUCCESS; kv_handle.reset(); if (get_count_nolock() == 0) { ret = OB_ENTRY_NOT_EXIST; } else { - ObTableHandleV2 &tail_kv_handle = ddl_kv_handles_[get_idx(tail_ - 1)]; - ObDDLKV *kv = static_cast(tail_kv_handle.get_table()); + ObDDLKVHandle &tail_kv_handle = ddl_kv_handles_[get_idx(tail_ - 1)]; + ObDDLKV *kv = tail_kv_handle.get_obj(); if (nullptr == kv) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, kv must not be nullptr", K(ret)); @@ -924,7 +335,12 @@ int ObTabletDDLKvMgr::get_active_ddl_kv_impl(ObTableHandleV2 &kv_handle) return ret; } -int ObTabletDDLKvMgr::get_or_create_ddl_kv(ObTablet &tablet, const SCN &start_scn, const SCN &scn, ObTableHandleV2 &kv_handle) +int ObTabletDDLKvMgr::get_or_create_ddl_kv( + const share::SCN &start_scn, + const share::SCN &scn, + const int64_t snapshot_version, + const uint64_t data_format_version, + ObDDLKVHandle &kv_handle) { int ret = OB_SUCCESS; kv_handle.reset(); @@ -938,11 +354,7 @@ int ObTabletDDLKvMgr::get_or_create_ddl_kv(ObTablet &tablet, const SCN &start_sc uint32_t lock_tid = 0; // try lock to avoid hang in clog callback if (OB_FAIL(rdlock(TRY_LOCK_TIMEOUT, lock_tid))) { LOG_WARN("failed to rdlock", K(ret), K(start_scn), KPC(this)); - } else if (start_scn != start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_WARN("ddl task expired", K(ret), K(start_scn), KPC(this)); } else { - ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); try_get_ddl_kv_unlock(scn, kv_handle); } if (lock_tid != 0) { @@ -953,15 +365,12 @@ int ObTabletDDLKvMgr::get_or_create_ddl_kv(ObTablet &tablet, const SCN &start_sc uint32_t lock_tid = 0; // try lock to avoid hang in clog callback if (OB_FAIL(wrlock(TRY_LOCK_TIMEOUT, lock_tid))) { LOG_WARN("failed to wrlock", K(ret), K(start_scn), KPC(this)); - } else if (start_scn != start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_WARN("ddl task expired", K(ret), K(start_scn), KPC(this)); } else { - ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); try_get_ddl_kv_unlock(scn, kv_handle); if (kv_handle.is_valid()) { // do nothing - } else if (OB_FAIL(alloc_ddl_kv(tablet, kv_handle))) { + } else if (OB_FAIL(alloc_ddl_kv(start_scn, + snapshot_version, data_format_version, kv_handle))) { LOG_WARN("create ddl kv failed", K(ret)); } } @@ -972,14 +381,14 @@ int ObTabletDDLKvMgr::get_or_create_ddl_kv(ObTablet &tablet, const SCN &start_sc return ret; } -void ObTabletDDLKvMgr::try_get_ddl_kv_unlock(const SCN &scn, ObTableHandleV2 &kv_handle) +void ObTabletDDLKvMgr::try_get_ddl_kv_unlock(const SCN &scn, ObDDLKVHandle &kv_handle) { int ret = OB_SUCCESS; kv_handle.reset(); if (get_count_nolock() > 0) { for (int64_t i = tail_ - 1; OB_SUCC(ret) && i >= head_ && !kv_handle.is_valid(); --i) { - ObTableHandleV2 &tmp_kv_handle = ddl_kv_handles_[get_idx(i)]; - ObDDLKV *tmp_kv = static_cast(tmp_kv_handle.get_table()); + ObDDLKVHandle &tmp_kv_handle = ddl_kv_handles_[get_idx(i)]; + ObDDLKV *tmp_kv = tmp_kv_handle.get_obj(); if (OB_ISNULL(tmp_kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret), K(ls_id_), K(tablet_id_), KP(tmp_kv), K(i), K(head_), K(tail_)); @@ -991,11 +400,14 @@ void ObTabletDDLKvMgr::try_get_ddl_kv_unlock(const SCN &scn, ObTableHandleV2 &kv } } -int ObTabletDDLKvMgr::freeze_ddl_kv(ObTablet &tablet, const SCN &freeze_scn) +int ObTabletDDLKvMgr::freeze_ddl_kv( + const share::SCN &start_scn, + const int64_t snapshot_version, + const uint64_t data_format_version, + const SCN &freeze_scn) { int ret = OB_SUCCESS; - ObTableHandleV2 kv_handle; - ObLatchWGuard state_guard(state_lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); + ObDDLKVHandle kv_handle; ObLatchWGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); if (IS_NOT_INIT) { ret = OB_NOT_INIT; @@ -1008,12 +420,12 @@ int ObTabletDDLKvMgr::freeze_ddl_kv(ObTablet &tablet, const SCN &freeze_scn) if (OB_SUCC(ret) && !kv_handle.is_valid() && freeze_scn > max_freeze_scn_) { // freeze_scn > 0 only occured when ddl commit // assure there is an alive ddl kv, for waiting pre-logs - if (OB_FAIL(alloc_ddl_kv(tablet, kv_handle))) { + if (OB_FAIL(alloc_ddl_kv(start_scn, snapshot_version, data_format_version, kv_handle))) { LOG_WARN("create ddl kv failed", K(ret)); } } if (OB_SUCC(ret) && kv_handle.is_valid()) { - ObDDLKV *kv = static_cast(kv_handle.get_table()); + ObDDLKV *kv = kv_handle.get_obj(); if (OB_ISNULL(kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret), KP(kv), K(kv_handle)); @@ -1042,7 +454,7 @@ int ObTabletDDLKvMgr::release_ddl_kvs(const SCN &end_scn) } else { for (int64_t i = head_; OB_SUCC(ret) && i < tail_; ++i) { const int64_t idx = get_idx(head_); - ObDDLKV *kv = static_cast(ddl_kv_handles_[idx].get_table()); + ObDDLKV *kv = ddl_kv_handles_[idx].get_obj(); LOG_INFO("try release ddl kv", K(end_scn), KPC(kv)); #ifdef ERRSIM if (OB_SUCC(ret)) { @@ -1078,7 +490,7 @@ int ObTabletDDLKvMgr::get_ddl_kv_min_scn(SCN &min_scn) } else { for (int64_t i = head_; OB_SUCC(ret) && i < tail_; ++i) { const int64_t idx = get_idx(head_); - ObDDLKV *kv = static_cast(ddl_kv_handles_[idx].get_table()); + ObDDLKV *kv = ddl_kv_handles_[idx].get_obj(); if (OB_ISNULL(kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret), K(ls_id_), K(tablet_id_), KP(kv), K(i), K(head_), K(tail_)); @@ -1090,7 +502,7 @@ int ObTabletDDLKvMgr::get_ddl_kv_min_scn(SCN &min_scn) return ret; } -int ObTabletDDLKvMgr::get_ddl_kvs_unlock(const bool frozen_only, ObTablesHandleArray &kv_handle_array) +int ObTabletDDLKvMgr::get_ddl_kvs_unlock(const bool frozen_only, ObIArray &kv_handle_array) { int ret = OB_SUCCESS; kv_handle_array.reset(); @@ -1100,13 +512,13 @@ int ObTabletDDLKvMgr::get_ddl_kvs_unlock(const bool frozen_only, ObTablesHandleA } else { for (int64_t pos = head_; OB_SUCC(ret) && pos < tail_; ++pos) { const int64_t idx = get_idx(pos); - ObTableHandleV2 &cur_kv_handle = ddl_kv_handles_[idx]; - ObDDLKV *cur_kv = static_cast(cur_kv_handle.get_table()); + ObDDLKVHandle &cur_kv_handle = ddl_kv_handles_[idx]; + ObDDLKV *cur_kv = cur_kv_handle.get_obj(); if (OB_ISNULL(cur_kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret), K(ls_id_), K(tablet_id_), KP(cur_kv), K(pos), K(head_), K(tail_)); } else if (!frozen_only || cur_kv->is_freezed()) { - if (OB_FAIL(kv_handle_array.add_table(cur_kv_handle))) { + if (OB_FAIL(kv_handle_array.push_back(cur_kv_handle))) { LOG_WARN("fail to push back ddl kv", K(ret)); } } @@ -1115,7 +527,7 @@ int ObTabletDDLKvMgr::get_ddl_kvs_unlock(const bool frozen_only, ObTablesHandleA return ret; } -int ObTabletDDLKvMgr::get_ddl_kvs(const bool frozen_only, ObTablesHandleArray &kv_handle_array) +int ObTabletDDLKvMgr::get_ddl_kvs(const bool frozen_only, ObIArray &kv_handle_array) { int ret = OB_SUCCESS; kv_handle_array.reset(); @@ -1129,7 +541,7 @@ int ObTabletDDLKvMgr::get_ddl_kvs(const bool frozen_only, ObTablesHandleArray &k return ret; } -int ObTabletDDLKvMgr::get_ddl_kvs_for_query(ObTablet &tablet, ObTablesHandleArray &kv_handle_array) +int ObTabletDDLKvMgr::get_ddl_kvs_for_query(ObTablet &tablet, ObIArray &kv_handle_array) { int ret = OB_SUCCESS; kv_handle_array.reset(); @@ -1137,8 +549,6 @@ int ObTabletDDLKvMgr::get_ddl_kvs_for_query(ObTablet &tablet, ObTablesHandleArra if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObTabletDDLKvMgr is not inited", K(ret)); - } else if (!tablet.get_tablet_meta().ddl_commit_scn_.is_valid_and_not_min()) { - // do nothing } else if (OB_FAIL(get_ddl_kvs_unlock(true/*frozen_only*/, kv_handle_array))) { LOG_WARN("get ddl kv unlock failed", K(ret)); } @@ -1158,34 +568,36 @@ int ObTabletDDLKvMgr::check_has_effective_ddl_kv(bool &has_ddl_kv) return ret; } -int ObTabletDDLKvMgr::alloc_ddl_kv(ObTablet &tablet, ObTableHandleV2 &kv_handle) +int ObTabletDDLKvMgr::alloc_ddl_kv( + const share::SCN &start_scn, + const int64_t snapshot_version, + const uint64_t data_format_version, + ObDDLKVHandle &kv_handle) { int ret = OB_SUCCESS; kv_handle.reset(); ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr *); - ObTableHandleV2 tmp_kv_handle; + ObDDLKVHandle tmp_kv_handle; ObDDLKV *kv = nullptr; + ObDDLMemtable *ddl_memtable = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ddl kv manager not init", K(ret)); - } else if (OB_UNLIKELY(!is_started())) { - ret = OB_ERR_SYS; - LOG_WARN("ddl kv manager not started", K(ret)); } else if (get_count_nolock() == MAX_DDL_KV_CNT_IN_STORAGE) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, too much ddl kv count", K(ret)); } else if (OB_FAIL(t3m->acquire_ddl_kv(tmp_kv_handle))) { LOG_WARN("acquire ddl kv failed", K(ret)); - } else if (OB_ISNULL(kv = static_cast(tmp_kv_handle.get_table()))) { + } else if (OB_ISNULL(kv = tmp_kv_handle.get_obj())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl kv is null", K(ret)); - } else if (OB_FAIL(kv->init(tablet, - start_scn_, - table_key_.get_snapshot_version(), + } else if (OB_FAIL(kv->init(ls_id_, + tablet_id_, + start_scn, + snapshot_version, max_freeze_scn_, - data_format_version_))) { - LOG_WARN("fail to init ddl kv", K(ret), K(ls_id_), K(tablet_id_), - K(start_scn_), K(table_key_), K(max_freeze_scn_), K(data_format_version_)); + data_format_version))) { + LOG_WARN("fail to init ddl kv", K(ret), K(ls_id_), K(tablet_id_)); } else { const int64_t idx = get_idx(tail_); tail_++; @@ -1196,6 +608,13 @@ int ObTabletDDLKvMgr::alloc_ddl_kv(ObTablet &tablet, ObTableHandleV2 &kv_handle) return ret; } +void ObTabletDDLKvMgr::set_ddl_kv(const int64_t idx, ObDDLKVHandle &kv_handle) +{ + //only for unittest + ddl_kv_handles_[idx] = kv_handle; + tail_++; +} + void ObTabletDDLKvMgr::free_ddl_kv(const int64_t idx) { int ret = OB_SUCCESS; @@ -1206,7 +625,7 @@ void ObTabletDDLKvMgr::free_ddl_kv(const int64_t idx) ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(idx)); } else { - FLOG_INFO("free ddl kv", K(ls_id_), K(tablet_id_), KPC(ddl_kv_handles_[idx].get_table())); + FLOG_INFO("free ddl kv", K(ls_id_), K(tablet_id_), KPC(ddl_kv_handles_[idx].get_obj())); ddl_kv_handles_[idx].reset(); } } diff --git a/src/storage/ddl/ob_tablet_ddl_kv_mgr.h b/src/storage/ddl/ob_tablet_ddl_kv_mgr.h index 2a8a823bb..c0aaeb083 100644 --- a/src/storage/ddl/ob_tablet_ddl_kv_mgr.h +++ b/src/storage/ddl/ob_tablet_ddl_kv_mgr.h @@ -28,73 +28,67 @@ namespace oceanbase namespace storage { -struct ObTabletDDLParam; -struct ObDDLTableMergeDagParam; - +// ddl kv: create, get, freeze, dump, release +// checkpoint manage +// recycle scn manage class ObTabletDDLKvMgr final { public: ObTabletDDLKvMgr(); ~ObTabletDDLKvMgr(); + int register_to_tablet(ObDDLKvMgrHandle &kv_mgr_handle); int init(const share::ObLSID &ls_id, const common::ObTabletID &tablet_id); // init before memtable mgr - int ddl_start_nolock(const ObITable::TableKey &table_key, const share::SCN &start_scn, const int64_t data_format_version, const int64_t execution_id, const share::SCN &checkpoint_scn); - int ddl_start(ObLS &ls, ObTablet &tablet, const ObITable::TableKey &table_key, const share::SCN &start_scn, const int64_t data_format_version, const int64_t execution_id, const share::SCN &checkpoint_scn); - int ddl_commit(ObTablet &tablet, const share::SCN &start_scn, const share::SCN &commit_scn); // schedule build a major sstable - int schedule_ddl_dump_task(ObTablet &tablet, const share::SCN &start_scn, const share::SCN &rec_scn); - int schedule_ddl_merge_task(ObTablet &tablet, const share::SCN &start_scn, const share::SCN &commit_scn); // try wait build major sstable - int wait_ddl_merge_success(ObTablet &tablet, const share::SCN &start_scn, const share::SCN &commit_scn); - int get_ddl_param(ObTabletDDLParam &ddl_param); - int get_or_create_ddl_kv(ObTablet &tablet, const share::SCN &start_scn, const share::SCN &scn, ObTableHandleV2 &kv_handle); // used in active ddl kv guard - int get_freezed_ddl_kv(const share::SCN &freeze_scn, ObTableHandleV2 &kv_handle); // locate ddl kv with exeact freeze log ts - int get_ddl_kvs(const bool frozen_only, ObTablesHandleArray &kv_handle_array); // get all freeze ddl kvs - int get_ddl_kvs_for_query(ObTablet &tablet, ObTablesHandleArray &kv_handle_array); - int freeze_ddl_kv(ObTablet &tablet, const share::SCN &freeze_scn = share::SCN::min_scn()); // freeze the active ddl kv, when memtable freeze or ddl commit + int set_max_freeze_scn(const share::SCN &checkpoint_scn); + int get_or_create_ddl_kv( + const share::SCN &start_scn, + const share::SCN &scn, + const int64_t snapshot_version, + const uint64_t data_format_version, + ObDDLKVHandle &kv_handle); // used in active ddl kv guard + int get_freezed_ddl_kv(const share::SCN &freeze_scn, ObDDLKVHandle &kv_handle); // locate ddl kv with exeact freeze log ts + int get_ddl_kvs(const bool frozen_only, ObIArray &kv_handle_array); // get all freeze ddl kvs + int get_ddl_kvs_for_query(ObTablet &tablet, ObIArray &kv_handle_array); + int freeze_ddl_kv( + const share::SCN &start_scn, + const int64_t snapshot_version, + const uint64_t data_format_version, + const share::SCN &freeze_scn = share::SCN::min_scn()); // freeze the active ddl kv, when memtable freeze or ddl commit int release_ddl_kvs(const share::SCN &rec_scn); // release persistent ddl kv, used in ddl merge task for free ddl kv int check_has_effective_ddl_kv(bool &has_ddl_kv); // used in ddl log handler for checkpoint - int get_ddl_kv_min_scn(share::SCN &min_scn); // for calculate rec_scn of ls - share::SCN get_start_scn() const { return start_scn_.atomic_load(); } - bool is_started() const { return share::SCN::min_scn() != start_scn_; } - void set_commit_scn_nolock(const share::SCN &scn) { commit_scn_ = scn; } - int set_commit_scn(const ObTabletMeta &tablet_meta, const share::SCN &scn); - share::SCN get_commit_scn(const ObTabletMeta &tablet_meta); - int set_commit_success(const share::SCN &start_scn); - bool is_commit_success(); - void reset_commit_success(); - common::ObTabletID get_tablet_id() const { return tablet_id_; } - share::ObLSID get_ls_id() const { return ls_id_; } - int cleanup(); - int online(); - bool is_execution_id_older(const int64_t execution_id); - int register_to_tablet(const share::SCN &ddl_start_scn, ObDDLKvMgrHandle &kv_mgr_handle); - int unregister_from_tablet(const share::SCN &ddl_start_scn, ObDDLKvMgrHandle &kv_mgr_handle); - int rdlock(const int64_t timeout_us, uint32_t &lock_tid); - int wrlock(const int64_t timeout_us, uint32_t &lock_tid); - void unlock(const uint32_t lock_tid); - int update_tablet(ObTablet &tablet, const share::SCN &start_scn, const int64_t snapshot_version, const int64_t data_format_version, const int64_t execution_id, const share::SCN &ddl_checkpoint_scn); int64_t get_count(); + void set_ddl_kv(const int64_t idx, ObDDLKVHandle &kv_handle); //for unittest OB_INLINE void inc_ref() { ATOMIC_INC(&ref_cnt_); } OB_INLINE int64_t dec_ref() { return ATOMIC_SAF(&ref_cnt_, 1 /* just sub 1 */); } OB_INLINE int64_t get_ref() const { return ATOMIC_LOAD(&ref_cnt_); } OB_INLINE void reset() { destroy(); } - bool can_schedule_major_compaction_nolock(const ObTabletMeta &tablet_meta); - int get_ddl_major_merge_param(ObTablet &tablet, ObDDLTableMergeDagParam &merge_param); - int get_rec_scn(share::SCN &rec_scn); - TO_STRING_KV(K_(is_inited), K_(success_start_scn), K_(ls_id), K_(tablet_id), K_(table_key), - K_(data_format_version), K_(start_scn), K_(commit_scn), K_(max_freeze_scn), - K_(execution_id), K_(head), K_(tail), K_(ref_cnt)); + int get_rec_scn(share::SCN &rec_scn); // when data persisted, should return INT64_MAX + ObTabletID get_tablet_id() { return tablet_id_; } + int online(); + int cleanup(); + bool can_freeze(); + TO_STRING_KV(K_(is_inited), K_(ls_id), K_(tablet_id), + K_(max_freeze_scn), + K_(head), K_(tail), K_(ref_cnt)); private: int64_t get_idx(const int64_t pos) const; - int alloc_ddl_kv(ObTablet &tablet, ObTableHandleV2 &kv_handle); + int alloc_ddl_kv( + const share::SCN &start_scn, + const int64_t snapshot_version, + const uint64_t data_format_version, + ObDDLKVHandle &kv_handle); void free_ddl_kv(const int64_t idx); - int get_active_ddl_kv_impl(ObTableHandleV2 &kv_handle); - void try_get_ddl_kv_unlock(const share::SCN &scn, ObTableHandleV2 &kv_handle); - int get_ddl_kvs_unlock(const bool frozen_only, ObTablesHandleArray &kv_handle_array); + int get_active_ddl_kv_impl(ObDDLKVHandle &kv_handle); + void try_get_ddl_kv_unlock(const share::SCN &scn, ObDDLKVHandle &kv_handle); + int get_ddl_kvs_unlock(const bool frozen_only, ObIArray &kv_handle_array); int64_t get_count_nolock() const; - int update_ddl_major_sstable(ObTablet &tablet); - int create_empty_ddl_sstable(ObTablet &tablet, common::ObArenaAllocator &allocator, blocksstable::ObSSTable &sstable); - void cleanup_unlock(); + int get_ddl_kv_min_scn(share::SCN &min_scn); // for calculate rec_scn of ls + int create_empty_ddl_sstable(common::ObArenaAllocator &allocator, blocksstable::ObSSTable &sstable); void destroy(); + int rdlock(const int64_t timeout_us, uint32_t &lock_tid); + int wrlock(const int64_t timeout_us, uint32_t &lock_tid); + void unlock(const uint32_t lock_tid); + void cleanup_unlock(); public: static const int64_t MAX_DDL_KV_CNT_IN_STORAGE = 16; static const int64_t TRY_LOCK_TIMEOUT = 10 * 1000000; // 10s @@ -102,19 +96,9 @@ private: bool is_inited_; share::ObLSID ls_id_; common::ObTabletID tablet_id_; - - // state_lock_ guarded members - share::SCN success_start_scn_; - ObITable::TableKey table_key_; - int64_t data_format_version_; - share::SCN start_scn_; - share::SCN commit_scn_; - int64_t execution_id_; - common::ObLatch state_lock_; - - // lock_ guarded members share::SCN max_freeze_scn_; - ObTableHandleV2 ddl_kv_handles_[MAX_DDL_KV_CNT_IN_STORAGE]; + ObDDLKVHandle ddl_kv_handles_[MAX_DDL_KV_CNT_IN_STORAGE]; + int64_t head_; int64_t tail_; common::ObLatch lock_; diff --git a/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.cpp b/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.cpp index 21712a96b..6042d9e87 100644 --- a/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.cpp +++ b/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.cpp @@ -15,7 +15,6 @@ #include "share/stat/ob_opt_column_stat.h" #include "share/stat/ob_stat_define.h" #include "share/table/ob_table_load_define.h" -#include "storage/ddl/ob_direct_insert_sstable_ctx.h" #include "storage/direct_load/ob_direct_load_dml_row_handler.h" #include "storage/direct_load/ob_direct_load_fast_heap_table.h" #include "storage/direct_load/ob_direct_load_insert_table_ctx.h" @@ -34,13 +33,14 @@ using namespace share; ObDirectLoadFastHeapTableBuildParam::ObDirectLoadFastHeapTableBuildParam() : snapshot_version_(0), + lob_column_cnt_(0), datum_utils_(nullptr), col_descs_(nullptr), cmp_funcs_(nullptr), insert_table_ctx_(nullptr), - fast_heap_table_ctx_(nullptr), dml_row_handler_(nullptr), - online_opt_stat_gather_(false) + online_opt_stat_gather_(false), + px_mode_(false) { } @@ -51,8 +51,78 @@ ObDirectLoadFastHeapTableBuildParam::~ObDirectLoadFastHeapTableBuildParam() bool ObDirectLoadFastHeapTableBuildParam::is_valid() const { return tablet_id_.is_valid() && snapshot_version_ > 0 && table_data_desc_.is_valid() && - nullptr != col_descs_ && nullptr != cmp_funcs_ && nullptr != insert_table_ctx_ && - nullptr != fast_heap_table_ctx_ && nullptr != dml_row_handler_ && nullptr != datum_utils_; + nullptr != col_descs_ && nullptr != cmp_funcs_ && + nullptr != insert_table_ctx_ && nullptr != dml_row_handler_ && nullptr != datum_utils_; +} + +/** + * RowIterator + */ + +ObDirectLoadFastHeapTableBuilder::RowIterator::RowIterator() + :iter_end_(false), is_inited_(false) +{} + +ObDirectLoadFastHeapTableBuilder::RowIterator::~RowIterator() +{ +} + +void ObDirectLoadFastHeapTableBuilder::RowIterator::reset() +{ + datum_row_ = nullptr; + iter_end_ = false; + is_inited_ = false; +} + +void ObDirectLoadFastHeapTableBuilder::RowIterator::reuse() +{ + iter_end_ = false; +} + + +int ObDirectLoadFastHeapTableBuilder::RowIterator::init(const ObDirectLoadFastHeapTableBuildParam ¶m, ObDatumRow &row, + ObIArray &column_stat_array, ObDirectLoadLobBuilder &lob_builder) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDirectLoadFastHeapTableBuilder init twice", KR(ret), KP(this)); + } else { + // init row iterator + ObDirectLoadInsertTableRowIteratorParam row_iterator_param; + row_iterator_param.table_data_desc_ = param.table_data_desc_; + row_iterator_param.datum_utils_ = param.datum_utils_; + row_iterator_param.col_descs_ = param.col_descs_; + row_iterator_param.lob_column_cnt_ = param.lob_column_cnt_; + row_iterator_param.cmp_funcs_ = param.cmp_funcs_; + row_iterator_param.column_stat_array_ = &column_stat_array; + row_iterator_param.lob_builder_ = &lob_builder; + row_iterator_param.is_heap_table_ = true; + row_iterator_param.online_opt_stat_gather_ = param.online_opt_stat_gather_; + row_iterator_param.px_mode_ = param.px_mode_; + if (OB_FAIL(inner_init(row_iterator_param))) { + LOG_WARN("fail to inner init", KR(ret)); + } else { + datum_row_ = &row; + is_inited_ = true; + } + } + return ret; +} + +int ObDirectLoadFastHeapTableBuilder::RowIterator::inner_get_next_row(blocksstable::ObDatumRow *&row) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("RowIterator not init", KR(ret), KP(this)); + } else if (iter_end_) { + ret = OB_ITER_END; + } else { + row = datum_row_; + iter_end_ = true; + } + return ret; } /** @@ -61,9 +131,8 @@ bool ObDirectLoadFastHeapTableBuildParam::is_valid() const ObDirectLoadFastHeapTableBuilder::ObDirectLoadFastHeapTableBuilder() : allocator_("TLD_FastHTable"), - slice_writer_allocator_("TLD_SliceWriter"), - fast_heap_table_tablet_ctx_(nullptr), - slice_writer_(nullptr), + insert_tablet_ctx_(nullptr), + current_slice_id_(0), row_count_(0), is_closed_(false), is_inited_(false) @@ -72,11 +141,7 @@ ObDirectLoadFastHeapTableBuilder::ObDirectLoadFastHeapTableBuilder() ObDirectLoadFastHeapTableBuilder::~ObDirectLoadFastHeapTableBuilder() { - if (nullptr != slice_writer_) { - slice_writer_->~ObSSTableInsertSliceWriter(); - slice_writer_allocator_.free(slice_writer_); - slice_writer_ = nullptr; - } + int ret = OB_SUCCESS; for (int64_t i = 0; i < column_stat_array_.count(); ++i) { ObOptOSGColumnStat *col_stat = column_stat_array_.at(i); col_stat->~ObOptOSGColumnStat(); @@ -107,24 +172,15 @@ int ObDirectLoadFastHeapTableBuilder::init_sql_statistics() return ret; } -int ObDirectLoadFastHeapTableBuilder::collect_obj(const ObDatumRow &datum_row) +int ObDirectLoadFastHeapTableBuilder::init_lob_builder() { int ret = OB_SUCCESS; - const int64_t extra_rowkey_cnt = ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); - for (int64_t i = 0; OB_SUCC(ret) && i < param_.table_data_desc_.column_count_; i++) { - const ObStorageDatum &datum = - datum_row.storage_datums_[i + extra_rowkey_cnt + 1]; - const ObCmpFunc &cmp_func = param_.cmp_funcs_->at(i + 1).get_cmp_func(); - const ObColDesc &col_desc = param_.col_descs_->at(i + 1); - ObOptOSGColumnStat *col_stat = column_stat_array_.at(i); - bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); - if (col_stat != nullptr && is_valid) { - if (OB_FAIL(col_stat->update_column_stat_info(&datum, - col_desc.col_type_, - cmp_func.cmp_func_))) { - LOG_WARN("failed to update column stat info"); - } - } + ObDirectLoadLobBuildParam param; + param.tablet_id_ = param_.tablet_id_; + param.insert_table_ctx_ = param_.insert_table_ctx_; + param.lob_column_cnt_ = param_.lob_column_cnt_; + if (OB_FAIL(lob_builder_.init(param))) { + LOG_WARN("fail to init lob builder", KR(ret)); } return ret; } @@ -139,20 +195,24 @@ int ObDirectLoadFastHeapTableBuilder::init(const ObDirectLoadFastHeapTableBuildP ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", KR(ret), K(param)); } else { + bool has_lob_storage = param.lob_column_cnt_ > 0? true :false; param_ = param; allocator_.set_tenant_id(MTL_ID()); - slice_writer_allocator_.set_tenant_id(MTL_ID()); - if (param_.online_opt_stat_gather_ && OB_FAIL(init_sql_statistics())) { + if (param.online_opt_stat_gather_ && OB_FAIL(init_sql_statistics())) { LOG_WARN("fail to inner init sql statistics", KR(ret)); - } else if (OB_FAIL(param_.fast_heap_table_ctx_->get_tablet_context( - param_.tablet_id_, fast_heap_table_tablet_ctx_))) { + } else if (OB_FAIL(param_.insert_table_ctx_->get_tablet_context( + param_.tablet_id_, insert_tablet_ctx_))) { LOG_WARN("fail to get tablet context", KR(ret)); + } else if (has_lob_storage && OB_FAIL(init_lob_builder())) { + LOG_WARN("fail to inner init sql statistics", KR(ret)); } else if (OB_FAIL(init_sstable_slice_ctx())) { LOG_WARN("fail to init sstable slice ctx", KR(ret)); } else if (OB_FAIL(datum_row_.init(param.table_data_desc_.column_count_ + HIDDEN_ROWKEY_COLUMN_NUM + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()))) { LOG_WARN("fail to init datum row", KR(ret)); + } else if (OB_FAIL(row_iter_.init(param_, datum_row_, column_stat_array_ ,lob_builder_))) { + LOG_WARN("fail to init row iter", KR(ret)); } else { datum_row_.row_flag_.set_flag(ObDmlFlag::DF_INSERT); datum_row_.mvcc_row_flag_.set_last_multi_version_row(true); @@ -167,14 +227,12 @@ int ObDirectLoadFastHeapTableBuilder::init(const ObDirectLoadFastHeapTableBuildP int ObDirectLoadFastHeapTableBuilder::init_sstable_slice_ctx() { int ret = OB_SUCCESS; - if (OB_FAIL(fast_heap_table_tablet_ctx_->get_write_ctx(write_ctx_))) { + if (OB_FAIL(insert_tablet_ctx_->get_write_ctx(write_ctx_))) { LOG_WARN("fail to get write ctx", KR(ret)); - } else if (OB_FAIL(param_.insert_table_ctx_->construct_sstable_slice_writer( - fast_heap_table_tablet_ctx_->get_target_tablet_id(), - write_ctx_.start_seq_, - slice_writer_, - slice_writer_allocator_))) { - LOG_WARN("fail to construct sstable slice writer", KR(ret)); + } else if (OB_FAIL(insert_tablet_ctx_->open_sstable_slice( + write_ctx_.start_seq_, + current_slice_id_))) { + LOG_WARN("fail to open sstable slice", KR(ret)); } return ret; } @@ -182,17 +240,10 @@ int ObDirectLoadFastHeapTableBuilder::init_sstable_slice_ctx() int ObDirectLoadFastHeapTableBuilder::switch_sstable_slice() { int ret = OB_SUCCESS; - if (OB_ISNULL(slice_writer_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null slice builder", KR(ret)); - } else if (OB_FAIL(slice_writer_->close())) { + if (OB_FAIL(insert_tablet_ctx_->close_sstable_slice(current_slice_id_))) { LOG_WARN("fail to close sstable slice builder", KR(ret)); - } else { - slice_writer_->~ObSSTableInsertSliceWriter(); - slice_writer_allocator_.reuse(); - if (OB_FAIL(init_sstable_slice_ctx())) { - LOG_WARN("fail to init sstable slice ctx", KR(ret)); - } + } else if (OB_FAIL(init_sstable_slice_ctx())) { + LOG_WARN("fail to init sstable slice ctx", KR(ret)); } return ret; } @@ -216,6 +267,8 @@ int ObDirectLoadFastHeapTableBuilder::append_row(const ObTabletID &tablet_id, LOG_WARN("invalid args", KR(ret), K(datum_row), K(param_.table_data_desc_.column_count_)); } else { uint64_t pk_seq = OB_INVALID_ID; + int64_t affected_rows = 0; + row_iter_.reuse(); if (OB_FAIL(write_ctx_.pk_interval_.next_value(pk_seq))) { if (OB_UNLIKELY(OB_EAGAIN != ret)) { LOG_WARN("fail to get next pk seq", KR(ret)); @@ -232,10 +285,8 @@ int ObDirectLoadFastHeapTableBuilder::append_row(const ObTabletID &tablet_id, i < datum_row.count_; ++i, ++j) { datum_row_.storage_datums_[j] = datum_row.storage_datums_[i]; } - if (OB_FAIL(slice_writer_->append_row(datum_row_))) { - LOG_WARN("fail to append row", KR(ret)); - } else if (param_.online_opt_stat_gather_ && OB_FAIL(collect_obj(datum_row_))) { - LOG_WARN("fail to collect", KR(ret)); + if (OB_FAIL(insert_tablet_ctx_->fill_sstable_slice(current_slice_id_, row_iter_, affected_rows))) { + LOG_WARN("fail to fill sstable slice", KR(ret)); } else { ++row_count_; } @@ -259,9 +310,13 @@ int ObDirectLoadFastHeapTableBuilder::close() ret = OB_ERR_UNEXPECTED; LOG_WARN("fast heap table builder is closed", KR(ret)); } else { - if (OB_FAIL(slice_writer_->close())) { - LOG_WARN("fail to close sstable slice writer", KR(ret)); + const bool has_lob_storage = param_.lob_column_cnt_ > 0; + if (has_lob_storage && OB_FAIL(lob_builder_.close())) { + LOG_WARN("fail to close lob_builder", KR(ret)); + } else if (OB_FAIL(insert_tablet_ctx_->close_sstable_slice(current_slice_id_))) { + LOG_WARN("fail to close sstable slice builder", KR(ret)); } else { + current_slice_id_ = 0; is_closed_ = true; } } diff --git a/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.h b/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.h index c87f5bc01..29fe2c012 100644 --- a/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.h +++ b/src/storage/direct_load/ob_direct_load_fast_heap_table_builder.h @@ -12,17 +12,15 @@ #pragma once #include "common/ob_tablet_id.h" -#include "storage/direct_load/ob_direct_load_fast_heap_table_ctx.h" #include "storage/direct_load/ob_direct_load_i_table.h" #include "storage/direct_load/ob_direct_load_table_data_desc.h" +#include "storage/direct_load/ob_direct_load_insert_table_row_iterator.h" +#include "storage/direct_load/ob_direct_load_lob_builder.h" #include "sql/engine/expr/ob_expr_sys_op_opnsize.h" +#include "storage/direct_load/ob_direct_load_insert_table_ctx.h" namespace oceanbase { -namespace table -{ -class ObTableLoadResultInfo; -} // namespace table namespace common { class ObOptOSGColumnStat; @@ -30,9 +28,7 @@ class ObOptOSGColumnStat; namespace storage { class ObDirectLoadInsertTableContext; -class ObSSTableInsertSliceWriter; class ObDirectLoadDMLRowHandler; - struct ObDirectLoadFastHeapTableBuildParam { public: @@ -40,19 +36,20 @@ public: ~ObDirectLoadFastHeapTableBuildParam(); bool is_valid() const; TO_STRING_KV(K_(tablet_id), K_(snapshot_version), K_(table_data_desc), KP_(datum_utils), - KP_(col_descs), KP_(cmp_funcs), KP_(insert_table_ctx), KP_(fast_heap_table_ctx), - KP_(dml_row_handler), K_(online_opt_stat_gather)); + KP_(col_descs), KP_(lob_column_cnt), KP_(cmp_funcs), KP_(dml_row_handler), + K_(online_opt_stat_gather), K_(px_mode)); public: common::ObTabletID tablet_id_; int64_t snapshot_version_; + int64_t lob_column_cnt_; ObDirectLoadTableDataDesc table_data_desc_; const blocksstable::ObStorageDatumUtils *datum_utils_; const common::ObIArray *col_descs_; const blocksstable::ObStoreCmpFuncs *cmp_funcs_; ObDirectLoadInsertTableContext *insert_table_ctx_; - ObDirectLoadFastHeapTableContext *fast_heap_table_ctx_; ObDirectLoadDMLRowHandler *dml_row_handler_; bool online_opt_stat_gather_; + bool px_mode_; }; class ObDirectLoadFastHeapTableBuilder : public ObIDirectLoadPartitionTableBuilder @@ -71,18 +68,36 @@ public: common::ObIAllocator &allocator) override; private: int init_sql_statistics(); - int collect_obj(const blocksstable::ObDatumRow &datum_row); + int init_lob_builder(); int init_sstable_slice_ctx(); int switch_sstable_slice(); +private: + class RowIterator : public ObDirectLoadInsertTableRowIterator + { + public: + RowIterator(); + virtual ~RowIterator(); + void reuse(); + void reset(); + int init(const ObDirectLoadFastHeapTableBuildParam ¶m, blocksstable::ObDatumRow &row, + common::ObIArray &column_stat_array, ObDirectLoadLobBuilder &lob_builder); + protected: + int inner_get_next_row(blocksstable::ObDatumRow *&row) override; + private: + blocksstable::ObDatumRow *datum_row_; + bool iter_end_; + bool is_inited_; + }; private: ObDirectLoadFastHeapTableBuildParam param_; common::ObArenaAllocator allocator_; - common::ObArenaAllocator slice_writer_allocator_; - ObDirectLoadFastHeapTableTabletContext *fast_heap_table_tablet_ctx_; - ObSSTableInsertSliceWriter *slice_writer_; - ObDirectLoadFastHeapTableTabletWriteCtx write_ctx_; + ObDirectLoadInsertTabletContext *insert_tablet_ctx_; + ObDirectLoadInsertTabletWriteCtx write_ctx_; blocksstable::ObDatumRow datum_row_; common::ObArray column_stat_array_; + ObDirectLoadLobBuilder lob_builder_; + RowIterator row_iter_; + int64_t current_slice_id_; int64_t row_count_; bool is_closed_; bool is_inited_; diff --git a/src/storage/direct_load/ob_direct_load_fast_heap_table_ctx.cpp b/src/storage/direct_load/ob_direct_load_fast_heap_table_ctx.cpp deleted file mode 100644 index 0cf075839..000000000 --- a/src/storage/direct_load/ob_direct_load_fast_heap_table_ctx.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ -#define USING_LOG_PREFIX STORAGE - -#include "storage/direct_load/ob_direct_load_fast_heap_table_ctx.h" -#include "observer/table_load/ob_table_load_stat.h" -#include "share/ob_tablet_autoincrement_service.h" - -namespace oceanbase -{ -namespace storage -{ -using namespace common; -using namespace blocksstable; -using namespace lib; -using namespace share; -using namespace table; - -ObDirectLoadFastHeapTableContext::ObDirectLoadFastHeapTableContext() - : allocator_("TLD_FHTableCtx"), is_inited_(false) -{ -} - -ObDirectLoadFastHeapTableContext::~ObDirectLoadFastHeapTableContext() -{ - for (TABLET_CTX_MAP::iterator iter = tablet_ctx_map_.begin(); iter != tablet_ctx_map_.end(); - ++iter) { - ObDirectLoadFastHeapTableTabletContext *tablet_ctx = iter->second; - tablet_ctx->~ObDirectLoadFastHeapTableTabletContext(); - allocator_.free(tablet_ctx); - } - tablet_ctx_map_.reuse(); -} - -int ObDirectLoadFastHeapTableContext::init(uint64_t tenant_id, - const ObIArray &ls_partition_ids, - const ObIArray &target_ls_partition_ids, - int64_t reserved_parallel) -{ - int ret = OB_SUCCESS; - if (IS_INIT) { - ret = OB_INIT_TWICE; - LOG_WARN("ObDirectLoadFastHeapTableContext init twice", KR(ret), KP(this)); - } else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id - || ls_partition_ids.empty() - || (ls_partition_ids.count() != target_ls_partition_ids.count()))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(tenant_id), K(ls_partition_ids), K(target_ls_partition_ids)); - } else { - allocator_.set_tenant_id(MTL_ID()); - if (OB_FAIL(create_all_tablet_contexts(tenant_id, ls_partition_ids, target_ls_partition_ids, reserved_parallel))) { - LOG_WARN("fail to create all tablet contexts", KR(ret)); - } else { - is_inited_ = true; - } - } - return ret; -} - -int ObDirectLoadFastHeapTableContext::create_all_tablet_contexts( - uint64_t tenant_id, - const ObIArray &ls_partition_ids, - const ObIArray &target_ls_partition_ids, - int64_t reserved_parallel) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(ls_partition_ids.empty() - || target_ls_partition_ids.empty() - || (ls_partition_ids.count() != target_ls_partition_ids.count()))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(ls_partition_ids), K(target_ls_partition_ids)); - } else if (OB_FAIL( - tablet_ctx_map_.create(ls_partition_ids.count(), "TLD_TabInsCtx", "TLD_TabInsCtx", MTL_ID()))) { - LOG_WARN("fail to create tablet ctx map", KR(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < ls_partition_ids.count(); ++i) { - const ObTabletID &tablet_id = ls_partition_ids.at(i).part_tablet_id_.tablet_id_; - const ObTabletID &target_tablet_id = target_ls_partition_ids.at(i).part_tablet_id_.tablet_id_; - ObDirectLoadFastHeapTableTabletContext *tablet_ctx = nullptr; - if (OB_ISNULL(tablet_ctx = OB_NEWx(ObDirectLoadFastHeapTableTabletContext, (&allocator_)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to new ObDirectLoadFastHeapTableTabletContext", KR(ret)); - } else if (OB_FAIL(tablet_ctx->init(tenant_id, tablet_id, target_tablet_id, reserved_parallel))) { - LOG_WARN("fail to init fast heap table tablet ctx", KR(ret)); - } else if (OB_FAIL(tablet_ctx_map_.set_refactored(tablet_id, tablet_ctx))) { - LOG_WARN("fail to set tablet ctx map", KR(ret)); - } - if (OB_FAIL(ret)) { - if (nullptr != tablet_ctx) { - tablet_ctx->~ObDirectLoadFastHeapTableTabletContext(); - allocator_.free(tablet_ctx); - tablet_ctx = nullptr; - } - } - } - } - return ret; -} - -int ObDirectLoadFastHeapTableContext::get_tablet_context( - const ObTabletID &tablet_id, ObDirectLoadFastHeapTableTabletContext *&tablet_ctx) const -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectLoadFastHeapTableContext not init", KR(ret), KP(this)); - } else if (OB_UNLIKELY(!tablet_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(tablet_id)); - } else if (OB_FAIL(tablet_ctx_map_.get_refactored(tablet_id, tablet_ctx))) { - if (OB_UNLIKELY(OB_HASH_NOT_EXIST != ret)) { - LOG_WARN("fail to get tablet ctx map", KR(ret), K(tablet_id)); - } else { - ret = OB_ENTRY_NOT_EXIST; - } - } - return ret; -} - -/** - * ObDirectLoadFastHeapTableTabletContext - */ - -ObDirectLoadFastHeapTableTabletContext::ObDirectLoadFastHeapTableTabletContext() - : tenant_id_(OB_INVALID_ID), is_inited_(false) -{ -} - -int ObDirectLoadFastHeapTableTabletContext::init(uint64_t tenant_id, - const ObTabletID &tablet_id, const ObTabletID &target_tablet_id, int64_t reserved_parallel) -{ - int ret = OB_SUCCESS; - if (IS_INIT) { - ret = OB_INIT_TWICE; - LOG_WARN("ObDirectLoadFastHeapTableTabletContext init twice", KR(ret), KP(this)); - } else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id - || !tablet_id.is_valid() - || !target_tablet_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", KR(ret), K(tenant_id), K(tablet_id), K(target_tablet_id)); - } else { - tenant_id_ = tenant_id; - tablet_id_ = tablet_id; - target_tablet_id_ = target_tablet_id; - start_seq_.set_parallel_degree(reserved_parallel); - is_inited_ = true; - } - return ret; -} - -int ObDirectLoadFastHeapTableTabletContext::get_write_ctx( - ObDirectLoadFastHeapTableTabletWriteCtx &write_ctx) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectLoadFastHeapTableTabletContext not init", KR(ret), KP(this)); - } else { - ObMutexGuard guard(mutex_); - if (OB_FAIL(pk_cache_.fetch(WRITE_BATCH_SIZE, write_ctx.pk_interval_))) { - if (OB_UNLIKELY(OB_EAGAIN != ret)) { - LOG_WARN("fail to fetch from pk cache", KR(ret)); - } else { - if (OB_FAIL(refresh_pk_cache())) { - LOG_WARN("fail to refresh pk cache", KR(ret)); - } else if (OB_FAIL(pk_cache_.fetch(WRITE_BATCH_SIZE, write_ctx.pk_interval_))) { - LOG_WARN("fail to fetch from pk cache", KR(ret)); - } - } - } - if (OB_SUCC(ret)) { - write_ctx.start_seq_.macro_data_seq_ = start_seq_.macro_data_seq_; - start_seq_.macro_data_seq_ += WRITE_BATCH_SIZE; - } - } - return ret; -} - -int ObDirectLoadFastHeapTableTabletContext::refresh_pk_cache() -{ - OB_TABLE_LOAD_STATISTICS_TIME_COST(DEBUG, fast_heap_table_refresh_pk_cache); - int ret = OB_SUCCESS; - ObTabletAutoincrementService &auto_inc = ObTabletAutoincrementService::get_instance(); - pk_cache_.tablet_id_ = tablet_id_; - pk_cache_.cache_size_ = PK_CACHE_SIZE; - if (OB_FAIL(auto_inc.get_tablet_cache_interval(tenant_id_, pk_cache_))) { - LOG_WARN("get_autoinc_seq fail", K(ret), K_(tenant_id), K_(tablet_id)); - } else if (OB_UNLIKELY(PK_CACHE_SIZE > pk_cache_.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected autoincrement value count", K(ret), K(pk_cache_)); - } - return ret; -} - -} // namespace storage -} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_fast_heap_table_ctx.h b/src/storage/direct_load/ob_direct_load_fast_heap_table_ctx.h deleted file mode 100644 index 9ed6b1cbe..000000000 --- a/src/storage/direct_load/ob_direct_load_fast_heap_table_ctx.h +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ -#pragma once - -#include "lib/hash/ob_hashmap.h" -#include "lib/lock/ob_mutex.h" -#include "share/ob_tablet_autoincrement_param.h" -#include "share/table/ob_table_load_define.h" -#include "storage/blocksstable/ob_block_sstable_struct.h" - -namespace oceanbase -{ -namespace storage -{ -class ObDirectLoadFastHeapTableTabletContext; - -class ObDirectLoadFastHeapTableContext -{ -public: - ObDirectLoadFastHeapTableContext(); - ~ObDirectLoadFastHeapTableContext(); - int init(uint64_t tenant_id, - const common::ObIArray &ls_partition_ids, - const common::ObIArray &target_ls_partition_ids, - int64_t reserved_parallel); - int get_tablet_context(const common::ObTabletID &tablet_id, - ObDirectLoadFastHeapTableTabletContext *&tablet_ctx) const; -private: - int create_all_tablet_contexts(uint64_t tenant_id, - const common::ObIArray &ls_partition_ids, - const common::ObIArray &target_ls_partition_ids, - int64_t reserved_parallel); - typedef common::hash::ObHashMap - TABLET_CTX_MAP; - common::ObArenaAllocator allocator_; - TABLET_CTX_MAP tablet_ctx_map_; - bool is_inited_; -}; - -struct ObDirectLoadFastHeapTableTabletWriteCtx -{ - blocksstable::ObMacroDataSeq start_seq_; - share::ObTabletCacheInterval pk_interval_; - TO_STRING_KV(K_(start_seq), K_(pk_interval)); -}; - -class ObDirectLoadFastHeapTableTabletContext -{ - static const int64_t PK_CACHE_SIZE = 1000000; - static const int64_t WRITE_BATCH_SIZE = 100000; -public: - ObDirectLoadFastHeapTableTabletContext(); - int init(uint64_t tenant_id, - const common::ObTabletID &tablet_id, - const common::ObTabletID &target_tablet_id, - int64_t reserved_parallel); - int get_write_ctx(ObDirectLoadFastHeapTableTabletWriteCtx &write_ctx); - const common::ObTabletID &get_target_tablet_id() - { - return target_tablet_id_; - } -private: - int refresh_pk_cache(); -private: - uint64_t tenant_id_; - common::ObTabletID tablet_id_; - common::ObTabletID target_tablet_id_; - lib::ObMutex mutex_; - blocksstable::ObMacroDataSeq start_seq_; - share::ObTabletCacheInterval pk_cache_; - bool is_inited_; -}; - -} // namespace storage -} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp b/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp index ee432c5a8..4bf0e05c0 100644 --- a/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp +++ b/src/storage/direct_load/ob_direct_load_insert_table_ctx.cpp @@ -12,7 +12,9 @@ #define USING_LOG_PREFIX STORAGE #include "storage/direct_load/ob_direct_load_insert_table_ctx.h" -#include "storage/ddl/ob_direct_insert_sstable_ctx.h" +#include "share/ob_tablet_autoincrement_service.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/tablet/ob_tablet.h" namespace oceanbase { @@ -20,15 +22,28 @@ namespace storage { using namespace common; using namespace table; +using namespace observer; +using namespace blocksstable; +using namespace lib; +using namespace share; /** * ObDirectLoadInsertTableParam */ ObDirectLoadInsertTableParam::ObDirectLoadInsertTableParam() - : table_id_(OB_INVALID_ID), schema_version_(0), snapshot_version_(0), execution_id_(0), ddl_task_id_(0), ls_partition_ids_() + : table_id_(OB_INVALID_ID), + schema_version_(0), + snapshot_version_(0), + execution_id_(0), + ddl_task_id_(0), + data_version_(0), + reserved_parallel_(0), + ls_partition_ids_(), + target_ls_partition_ids_() { ls_partition_ids_.set_attr(ObMemAttr(MTL_ID(), "DLITP_ids")); + target_ls_partition_ids_.set_attr(ObMemAttr(MTL_ID(), "DLITP_t_ids")); } ObDirectLoadInsertTableParam::~ObDirectLoadInsertTableParam() @@ -38,7 +53,9 @@ ObDirectLoadInsertTableParam::~ObDirectLoadInsertTableParam() bool ObDirectLoadInsertTableParam::is_valid() const { return OB_INVALID_ID != table_id_ && schema_version_ >= 0 && snapshot_version_ >= 0 && - ls_partition_ids_.count() > 0; + execution_id_ >= 0 && ddl_task_id_ > 0 && data_version_ >= 0 && reserved_parallel_ >= 0 && + ls_partition_ids_.count() > 0 && + ls_partition_ids_.count() == target_ls_partition_ids_.count(); } int ObDirectLoadInsertTableParam::assign(const ObDirectLoadInsertTableParam &other) @@ -47,8 +64,422 @@ int ObDirectLoadInsertTableParam::assign(const ObDirectLoadInsertTableParam &oth table_id_ = other.table_id_; schema_version_ = other.schema_version_; snapshot_version_ = other.snapshot_version_; + execution_id_ = other.execution_id_; + ddl_task_id_ = other.ddl_task_id_; + data_version_ = other.data_version_; + reserved_parallel_ = other.reserved_parallel_; if (OB_FAIL(ls_partition_ids_.assign(other.ls_partition_ids_))) { LOG_WARN("fail to assign ls tablet ids", KR(ret)); + } else if (OB_FAIL(target_ls_partition_ids_.assign(other.target_ls_partition_ids_))) { + LOG_WARN("fail to assign ls tablet ids", KR(ret)); + } + return ret; +} + +/** + * ObDirectLoadInsertTabletParam + */ + +ObDirectLoadInsertTabletParam::ObDirectLoadInsertTabletParam() + : tenant_id_(OB_INVALID_TENANT_ID), + table_id_(OB_INVALID_ID), + schema_version_(0), + snapshot_version_(0), + execution_id_(0), + ddl_task_id_(0), + data_version_(0), + reserved_parallel_(0), + context_id_(0) +{ +} + +ObDirectLoadInsertTabletParam::~ObDirectLoadInsertTabletParam() {} + +bool ObDirectLoadInsertTabletParam::is_valid() const +{ + return OB_INVALID_TENANT_ID != tenant_id_ && ls_id_.is_valid() && OB_INVALID_ID != table_id_ && + tablet_id_.is_valid() && origin_tablet_id_.is_valid() && schema_version_ >= 0 && + snapshot_version_ >= 0 && execution_id_ >= 0 && ddl_task_id_ > 0 && data_version_ >= 0 && + reserved_parallel_ >= 0 && context_id_ >= 0; +} + +/** + * ObDirectLoadInsertTabletContext + */ + +ObDirectLoadInsertTabletContext::ObDirectLoadInsertTabletContext() + : is_open_(false), is_inited_(false) +{ +} + +ObDirectLoadInsertTabletContext::~ObDirectLoadInsertTabletContext() +{ +} + +int ObDirectLoadInsertTabletContext::init(const ObDirectLoadInsertTabletParam ¶m) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDirectLoadInsertTabletContext init twice", KR(ret), KP(this)); + } else if (OB_UNLIKELY(!param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(param)); + } else { + param_ = param; + start_seq_.set_parallel_degree(param.reserved_parallel_); + is_inited_ = true; + } + return ret; +} + +int ObDirectLoadInsertTabletContext::open() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else if (is_open_) { + // do nothing + } else { + lib::ObMutexGuard guard(mutex_); + if (!is_open_) { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadInsertParam direct_load_param; + direct_load_param.is_replay_ = false; + direct_load_param.common_param_.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_LOAD_DATA; + direct_load_param.common_param_.data_format_version_ = param_.data_version_; + direct_load_param.common_param_.read_snapshot_ = param_.snapshot_version_; + direct_load_param.common_param_.ls_id_ = param_.ls_id_; + direct_load_param.common_param_.tablet_id_ = param_.tablet_id_; + direct_load_param.runtime_only_param_.exec_ctx_ = nullptr; + direct_load_param.runtime_only_param_.task_id_ = param_.ddl_task_id_; + direct_load_param.runtime_only_param_.table_id_ = param_.table_id_; + direct_load_param.runtime_only_param_.schema_version_ = param_.schema_version_; + direct_load_param.runtime_only_param_.task_cnt_ = 1; // default value. + if (OB_FAIL(sstable_insert_mgr->create_tablet_direct_load( + param_.context_id_, param_.execution_id_, direct_load_param))) { + LOG_WARN("create tablet manager failed", K(ret)); + } else if (OB_FAIL(sstable_insert_mgr->open_tablet_direct_load( + true, param_.ls_id_, param_.tablet_id_, param_.context_id_, start_scn_, + handle_))) { + LOG_WARN("fail to open tablet direct load", KR(ret), K(param_.tablet_id_)); + } else { + is_open_ = true; + } + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::close() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTabletContext not init", KR(ret), KP(this)); + } else if (OB_UNLIKELY(!is_open_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expected does not open", KR(ret)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_FAIL(sstable_insert_mgr->close_tablet_direct_load( + param_.context_id_, true, param_.ls_id_, param_.tablet_id_, true, true))) { + LOG_WARN("fail to close tablet direct load", KR(ret), K(param_.ls_id_), + K(param_.tablet_id_)); + } else { + is_open_ = false; + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::get_pk_interval(uint64_t count, + share::ObTabletCacheInterval &pk_interval) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(pk_cache_.fetch(count, pk_interval))) { + if (OB_UNLIKELY(OB_EAGAIN != ret)) { + LOG_WARN("fail to fetch from pk cache", KR(ret)); + } else { + if (OB_FAIL(refresh_pk_cache(param_.origin_tablet_id_, pk_cache_))) { + LOG_WARN("fail to refresh pk cache", KR(ret)); + } else if (OB_FAIL(pk_cache_.fetch(count, pk_interval))) { + LOG_WARN("fail to fetch from pk cache", KR(ret)); + } + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::get_lob_pk_interval(uint64_t count, + share::ObTabletCacheInterval &pk_interval) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(lob_pk_cache_.fetch(count, pk_interval))) { + if (OB_UNLIKELY(OB_EAGAIN != ret)) { + LOG_WARN("fail to fetch from pk cache", KR(ret)); + } else { + if (OB_FAIL(refresh_pk_cache(param_.lob_tablet_id_, lob_pk_cache_))) { + LOG_WARN("fail to refresh pk cache", KR(ret)); + } else if (OB_FAIL(lob_pk_cache_.fetch(count, pk_interval))) { + LOG_WARN("fail to fetch from pk cache", KR(ret)); + } + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::get_write_ctx(ObDirectLoadInsertTabletWriteCtx &write_ctx) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadLobTabletContext not init", KR(ret), KP(this)); + } else { + ObMutexGuard guard(mutex_); + if (OB_FAIL(get_pk_interval(WRITE_BATCH_SIZE, write_ctx.pk_interval_))) { + LOG_WARN("fail to get pk interval", KR(ret), KP(this)); + } else { + write_ctx.start_seq_.macro_data_seq_ = start_seq_.macro_data_seq_; + start_seq_.macro_data_seq_ += WRITE_BATCH_SIZE; + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::get_lob_write_ctx(ObDirectLoadInsertTabletWriteCtx &write_ctx) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadLobTabletContext not init", KR(ret), KP(this)); + } else { + ObMutexGuard guard(mutex_); + if (OB_FAIL( + get_lob_pk_interval(WRITE_BATCH_SIZE, write_ctx.pk_interval_))) { + LOG_WARN("fail to get pk interval", KR(ret), KP(this)); + } else { + write_ctx.start_seq_.macro_data_seq_ = lob_start_seq_.macro_data_seq_; + lob_start_seq_.macro_data_seq_ += WRITE_BATCH_SIZE; + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::refresh_pk_cache(const common::ObTabletID &tablet_id, share::ObTabletCacheInterval &pk_cache) +{ + int ret = OB_SUCCESS; + ObTabletAutoincrementService &auto_inc = ObTabletAutoincrementService::get_instance(); + pk_cache.tablet_id_ = tablet_id; + pk_cache.cache_size_ = PK_CACHE_SIZE; + if (OB_FAIL(auto_inc.get_tablet_cache_interval(param_.tenant_id_, pk_cache))) { + LOG_WARN("get_autoinc_seq fail", K(ret), K_(param_.tenant_id), K(tablet_id)); + } else if (OB_UNLIKELY(PK_CACHE_SIZE > pk_cache.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected autoincrement value count", K(ret), K(pk_cache)); + } + return ret; + +} + +int ObDirectLoadInsertTabletContext::fill_sstable_slice(const int64_t &slice_id, + ObIStoreRowIterator &iter, + int64_t &affected_rows) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObDirectLoadInsertTabletContext *tablet_ctx = nullptr; + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + ObDirectLoadSliceInfo slice_info; + // slice_info.is_full_direct_load_ = !param_.px_mode_; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = false; + slice_info.ls_id_ = param_.ls_id_; + slice_info.data_tablet_id_ = param_.tablet_id_; + slice_info.slice_id_ = slice_id; + slice_info.context_id_ = param_.context_id_; + if (OB_FAIL(sstable_insert_mgr->fill_sstable_slice(slice_info, &iter, affected_rows))) { + LOG_WARN("fail to fill sstable slice", KR(ret)); + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::fill_lob_sstable_slice(ObIAllocator &allocator, + const int64_t &lob_slice_id, + share::ObTabletCacheInterval &pk_interval, + blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObDirectLoadSliceInfo slice_info; + // slice_info.is_full_direct_load_ = !param_.px_mode_; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = param_.ls_id_; + slice_info.data_tablet_id_ = param_.tablet_id_; + slice_info.slice_id_ = lob_slice_id; + slice_info.context_id_ = param_.context_id_; + if (OB_FAIL(handle_.get_obj()->fill_lob_sstable_slice(allocator, slice_info, start_scn_, pk_interval, datum_row))) { + LOG_WARN("fail to fill sstable slice", KR(ret), K(slice_info), K(datum_row)); + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::open_sstable_slice( + const blocksstable::ObMacroDataSeq &start_seq, int64_t &slice_id) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + ObDirectLoadSliceInfo slice_info; + // slice_info.is_full_direct_load_ = !param_.px_mode_; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = false; + slice_info.ls_id_ = param_.ls_id_; + slice_info.data_tablet_id_ = param_.tablet_id_; + slice_info.slice_id_ = slice_id; + slice_info.context_id_ = param_.context_id_; + if (OB_FAIL(open())) { + LOG_WARN("fail to open tablet direct load", KR(ret)); + } else if (OB_FAIL(sstable_insert_mgr->open_sstable_slice(start_seq, slice_info))) { + LOG_WARN("fail to construct sstable slice writer", KR(ret), K(slice_info.data_tablet_id_)); + } else { + slice_id = slice_info.slice_id_; + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::open_lob_sstable_slice( + const blocksstable::ObMacroDataSeq &start_seq, int64_t &slice_id) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + ObDirectLoadSliceInfo slice_info; + // slice_info.is_full_direct_load_ = !param_.px_mode_; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = param_.ls_id_; + slice_info.data_tablet_id_ = param_.tablet_id_; + slice_info.slice_id_ = slice_id; + slice_info.context_id_ = param_.context_id_; + if (OB_FAIL(open())) { + LOG_WARN("fail to open tablet direct load", KR(ret)); + } else if (OB_FAIL(sstable_insert_mgr->open_sstable_slice(start_seq, slice_info))) { + LOG_WARN("fail to construct sstable slice writer", KR(ret), K(slice_info.data_tablet_id_)); + } else { + slice_id = slice_info.slice_id_; + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::close_sstable_slice(const int64_t slice_id) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + ObDirectLoadSliceInfo slice_info; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = false; + slice_info.ls_id_ = param_.ls_id_; + slice_info.data_tablet_id_ = param_.tablet_id_; + slice_info.slice_id_ = slice_id; + slice_info.context_id_ = param_.context_id_; + if (OB_FAIL(sstable_insert_mgr->close_sstable_slice(slice_info))) { + LOG_WARN("fail to close tablet direct load", KR(ret), K(slice_id), + K(param_.tablet_id_)); + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::close_lob_sstable_slice(const int64_t slice_id) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + ObDirectLoadSliceInfo slice_info; + slice_info.is_full_direct_load_ = true; + slice_info.is_lob_slice_ = true; + slice_info.ls_id_ = param_.ls_id_; + slice_info.data_tablet_id_ = param_.tablet_id_; + slice_info.slice_id_ = slice_id; + slice_info.context_id_ = param_.context_id_; + if (OB_FAIL(sstable_insert_mgr->close_sstable_slice(slice_info))) { + LOG_WARN("fail to close tablet direct load", KR(ret), K(slice_id), + K(param_.tablet_id_)); + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::calc_range() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_FAIL(sstable_insert_mgr->calc_range(param_.ls_id_, param_.tablet_id_, true))) { + LOG_WARN("fail to calc range", KR(ret), K(param_.tablet_id_)); + } else { + LOG_INFO("success to calc range", K(param_.tablet_id_)); + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::fill_column_group(const int64_t thread_cnt, const int64_t thread_id) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else { + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_FAIL(sstable_insert_mgr->fill_column_group(param_.ls_id_, param_.tablet_id_, true/*is direct load*/, thread_cnt, thread_id))) { + LOG_WARN("fail to fill column group", KR(ret), K(param_.tablet_id_), K(thread_cnt), K(thread_id)); + } + } + return ret; +} + +int ObDirectLoadInsertTabletContext::cancel() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", K(ret), KP(this)); + } else { + LOG_INFO("start to remove slice writers", K(param_.tablet_id_)); + ObTenantDirectLoadMgr *sstable_insert_mgr = MTL(ObTenantDirectLoadMgr *); + if (OB_FAIL(sstable_insert_mgr->cancel(param_.ls_id_, param_.tablet_id_, true/*is direct load*/))) { + LOG_WARN("cancel direct load fill task failed", K(ret), K(param_.tablet_id_)); + } } return ret; } @@ -57,26 +488,21 @@ int ObDirectLoadInsertTableParam::assign(const ObDirectLoadInsertTableParam &oth * ObDirectLoadInsertTableContext */ -ObDirectLoadInsertTableContext::ObDirectLoadInsertTableContext() - : tablet_finish_count_(0), is_inited_(false) -{ -} +ObDirectLoadInsertTableContext::ObDirectLoadInsertTableContext() : is_inited_(false) {} -ObDirectLoadInsertTableContext::~ObDirectLoadInsertTableContext() -{ - reset(); -} +ObDirectLoadInsertTableContext::~ObDirectLoadInsertTableContext() { destory(); } -void ObDirectLoadInsertTableContext::reset() +void ObDirectLoadInsertTableContext::destory() { int ret = OB_SUCCESS; - if (0 != ddl_ctrl_.context_id_) { - ObSSTableInsertManager &sstable_insert_mgr = ObSSTableInsertManager::get_instance(); - if (OB_FAIL(sstable_insert_mgr.finish_table_context(ddl_ctrl_.context_id_, false))) { - LOG_WARN("fail to finish table context", KR(ret), K_(ddl_ctrl)); - } - ddl_ctrl_.context_id_ = 0; + for (TABLET_CTX_MAP::iterator iter = tablet_ctx_map_.begin(); iter != tablet_ctx_map_.end(); + ++iter) { + ObDirectLoadInsertTabletContext *tablet_ctx = iter->second; + tablet_ctx->~ObDirectLoadInsertTabletContext(); + allocator_.free(tablet_ctx); } + tablet_ctx_map_.destroy(); + allocator_.reset(); is_inited_ = false; } @@ -89,134 +515,95 @@ int ObDirectLoadInsertTableContext::init(const ObDirectLoadInsertTableParam &par } else if (OB_UNLIKELY(!param.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", KR(ret), K(param)); + } else if (OB_FAIL(param_.assign(param))) { + LOG_WARN("fail to assign param", KR(ret)); + } else if (OB_FAIL(tablet_ctx_map_.create(1024, "TLD_InsTabCtx", + "TLD_InsTabCtx", MTL_ID()))) { + LOG_WARN("fail to create tablet ctx map", KR(ret)); + } else if (OB_FAIL(MTL(ObTenantDirectLoadMgr *)->alloc_execution_context_id(ddl_ctrl_.context_id_))) { + LOG_WARN("alloc execution context id failed", K(ret)); + } else if (OB_FAIL(create_all_tablet_contexts())) { + LOG_WARN("fail to create all tablet contexts", KR(ret)); } else { - ObSSTableInsertManager &sstable_insert_mgr = ObSSTableInsertManager::get_instance(); - ObSSTableInsertTableParam table_insert_param; - table_insert_param.dest_table_id_ = param.table_id_; - table_insert_param.snapshot_version_ = 0; - table_insert_param.schema_version_ = param.schema_version_; - table_insert_param.task_cnt_ = 1; - table_insert_param.write_major_ = true; - table_insert_param.execution_id_ = param.execution_id_; - table_insert_param.ddl_task_id_ = param.ddl_task_id_; - table_insert_param.data_format_version_ = param.data_version_; - for (int64_t i = 0; i < param.ls_partition_ids_.count(); ++i) { - const ObTableLoadLSIdAndPartitionId &ls_partition_id = param.ls_partition_ids_.at(i); - if (OB_FAIL(table_insert_param.ls_tablet_ids_.push_back( - std::make_pair(ls_partition_id.ls_id_, ls_partition_id.part_tablet_id_.tablet_id_)))) { - LOG_WARN("fail to push back ls tablet id", KR(ret)); + is_inited_ = true; + } + return ret; +} + +int ObDirectLoadInsertTableContext::create_all_tablet_contexts() +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = MTL_ID(); + for (int64_t i = 0; OB_SUCC(ret) && i < param_.ls_partition_ids_.count(); ++i) { + const ObLSID &target_ls_id = param_.target_ls_partition_ids_.at(i).ls_id_; + const ObTabletID &tablet_id = param_.ls_partition_ids_.at(i).part_tablet_id_.tablet_id_; + const ObTabletID &target_tablet_id = + param_.target_ls_partition_ids_.at(i).part_tablet_id_.tablet_id_; + ObDirectLoadInsertTabletContext *tablet_ctx = nullptr; + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + if (OB_ISNULL(ls_service = MTL(ObLSService *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(MTL_ID())); + } else if (OB_FAIL(ls_service->get_ls(target_ls_id, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get log stream", K(ret)); + } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, target_tablet_id, tablet_handle, + ObMDSGetTabletMode::READ_ALL_COMMITED))) { + LOG_WARN("get tablet handle failed", K(ret)); + } else { + ObTabletBindingMdsUserData ddl_data; + ObDirectLoadInsertTabletParam insert_tablet_param; + insert_tablet_param.tenant_id_ = tenant_id; + insert_tablet_param.ls_id_ = target_ls_id; + insert_tablet_param.table_id_ = param_.table_id_; + insert_tablet_param.tablet_id_ = target_tablet_id; + insert_tablet_param.origin_tablet_id_ = tablet_id; + insert_tablet_param.schema_version_ = param_.schema_version_; + insert_tablet_param.snapshot_version_ = param_.snapshot_version_; + insert_tablet_param.execution_id_ = param_.execution_id_; + insert_tablet_param.ddl_task_id_ = param_.ddl_task_id_; + insert_tablet_param.data_version_ = param_.data_version_; + insert_tablet_param.reserved_parallel_ = param_.reserved_parallel_; + insert_tablet_param.context_id_ = ddl_ctrl_.context_id_; + if (OB_FAIL(tablet_handle.get_obj()->get_ddl_data(SCN::max_scn(), ddl_data))) { + LOG_WARN("get ddl data failed", K(ret)); + } else if (OB_ISNULL(tablet_ctx = OB_NEWx(ObDirectLoadInsertTabletContext, (&allocator_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to new ObDirectLoadInsertTabletContext", KR(ret)); + } else if (OB_FALSE_IT(insert_tablet_param.lob_tablet_id_ = ddl_data.lob_meta_tablet_id_)) { + } else if (OB_FAIL(tablet_ctx->init(insert_tablet_param))) { + LOG_WARN("fail to init fast heap table tablet ctx", KR(ret)); + } else if (OB_FAIL(tablet_ctx_map_.set_refactored(tablet_id, tablet_ctx))) { + LOG_WARN("fail to set tablet ctx map", KR(ret)); + } + if (OB_FAIL(ret)) { + if (nullptr != tablet_ctx) { + tablet_ctx->~ObDirectLoadInsertTabletContext(); + allocator_.free(tablet_ctx); + tablet_ctx = nullptr; + } } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(param_.assign(param))) { - LOG_WARN("fail to assign param", KR(ret)); - } else if (OB_FAIL(sstable_insert_mgr.create_table_context(table_insert_param, - ddl_ctrl_.context_id_))) { - LOG_WARN("fail to create table context", KR(ret), K(table_insert_param)); + } + return ret; +} + +int ObDirectLoadInsertTableContext::get_tablet_context( + const ObTabletID &tablet_id, ObDirectLoadInsertTabletContext *&tablet_ctx) const +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); + } else if (OB_UNLIKELY(!tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(tablet_id)); + } else if (OB_FAIL(tablet_ctx_map_.get_refactored(tablet_id, tablet_ctx))) { + if (OB_UNLIKELY(OB_HASH_NOT_EXIST != ret)) { + LOG_WARN("fail to get tablet ctx map", KR(ret), K(tablet_id)); } else { - is_inited_ = true; - } - if (OB_FAIL(ret)) { - reset(); - } - } - return ret; -} - -int ObDirectLoadInsertTableContext::add_sstable_slice(const ObTabletID &tablet_id, - const ObMacroDataSeq &start_seq, - ObNewRowIterator &iter, - int64_t &affected_rows) -{ - int ret = OB_SUCCESS; - ObSSTableInsertManager &sstable_insert_mgr = ObSSTableInsertManager::get_instance(); - ObSSTableInsertTabletParam tablet_insert_param; - tablet_insert_param.context_id_ = ddl_ctrl_.context_id_; - tablet_insert_param.table_id_ = param_.table_id_; - tablet_insert_param.tablet_id_ = tablet_id; - tablet_insert_param.write_major_ = true; - tablet_insert_param.task_cnt_ = 1; - tablet_insert_param.schema_version_ = param_.schema_version_; - tablet_insert_param.snapshot_version_ = param_.snapshot_version_; - tablet_insert_param.execution_id_ = param_.execution_id_; - tablet_insert_param.ddl_task_id_ = param_.ddl_task_id_; - if (OB_FAIL(sstable_insert_mgr.update_table_tablet_context(ddl_ctrl_.context_id_, tablet_id, - param_.snapshot_version_))) { - LOG_WARN("fail to update table context", KR(ret), K_(ddl_ctrl), K(tablet_id)); - } else if (OB_FAIL(sstable_insert_mgr.add_sstable_slice(tablet_insert_param, start_seq, iter, - affected_rows))) { - LOG_WARN("fail to add sstable slice", KR(ret)); - } - return ret; -} - -int ObDirectLoadInsertTableContext::construct_sstable_slice_writer( - const ObTabletID &tablet_id, const ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&slice_writer, ObIAllocator &allocator) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); - } else { - ObSSTableInsertManager &sstable_insert_mgr = ObSSTableInsertManager::get_instance(); - ObSSTableInsertTabletParam tablet_insert_param; - tablet_insert_param.context_id_ = ddl_ctrl_.context_id_; - tablet_insert_param.table_id_ = param_.table_id_; - tablet_insert_param.tablet_id_ = tablet_id; - tablet_insert_param.write_major_ = true; - tablet_insert_param.task_cnt_ = 1; - tablet_insert_param.schema_version_ = param_.schema_version_; - tablet_insert_param.snapshot_version_ = param_.snapshot_version_; - tablet_insert_param.execution_id_ = param_.execution_id_; - tablet_insert_param.ddl_task_id_ = param_.ddl_task_id_; - if (OB_FAIL(sstable_insert_mgr.update_table_tablet_context(ddl_ctrl_.context_id_, tablet_id, - param_.snapshot_version_))) { - LOG_WARN("fail to update table context", KR(ret), K_(ddl_ctrl), K(tablet_id)); - } else if (OB_FAIL(sstable_insert_mgr.construct_sstable_slice_writer( - tablet_insert_param, start_seq, slice_writer, allocator))) { - LOG_WARN("fail to construct sstable slice writer", KR(ret)); - } - } - return ret; -} - -int ObDirectLoadInsertTableContext::notify_tablet_finish(const ObTabletID &tablet_id) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); - } else { - ObSSTableInsertManager &sstable_insert_mgr = ObSSTableInsertManager::get_instance(); - int64_t tablet_finish_count = 0; - if (OB_FAIL(sstable_insert_mgr.notify_tablet_end(ddl_ctrl_.context_id_, tablet_id))) { - LOG_WARN("fail to notify tablet end", KR(ret), K_(ddl_ctrl), K(tablet_id)); - } else if (FALSE_IT(tablet_finish_count = ATOMIC_AAF(&tablet_finish_count_, 1))) { - } else if (OB_FAIL(sstable_insert_mgr.finish_ready_tablets(ddl_ctrl_.context_id_, - tablet_finish_count))) { - LOG_WARN("fail to finish ready tablets", KR(ret), K_(ddl_ctrl), K(tablet_finish_count)); - } - } - return ret; -} - -int ObDirectLoadInsertTableContext::commit() -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObDirectLoadInsertTableContext not init", KR(ret), KP(this)); - } else if (OB_UNLIKELY(tablet_finish_count_ != param_.ls_partition_ids_.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected finished tablet count", KR(ret), K(tablet_finish_count_), - K(param_.ls_partition_ids_.count())); - } else { - ObSSTableInsertManager &sstable_insert_mgr = ObSSTableInsertManager::get_instance(); - if (OB_FAIL(sstable_insert_mgr.finish_table_context(ddl_ctrl_.context_id_, true))) { - LOG_WARN("fail to finish table context", KR(ret), K_(ddl_ctrl)); - } else { - ddl_ctrl_.context_id_ = 0; + ret = OB_ENTRY_NOT_EXIST; } } return ret; diff --git a/src/storage/direct_load/ob_direct_load_insert_table_ctx.h b/src/storage/direct_load/ob_direct_load_insert_table_ctx.h index 0ebef10e8..5409702e4 100644 --- a/src/storage/direct_load/ob_direct_load_insert_table_ctx.h +++ b/src/storage/direct_load/ob_direct_load_insert_table_ctx.h @@ -11,15 +11,26 @@ */ #pragma once +#include "lib/hash/ob_hashmap.h" +#include "lib/lock/ob_mutex.h" +#include "share/ob_ls_id.h" +#include "share/ob_tablet_autoincrement_param.h" #include "share/table/ob_table_load_define.h" +#include "storage/blocksstable/ob_block_sstable_struct.h" +#include "storage/access/ob_store_row_iterator.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "sql/engine/px/ob_sub_trans_ctrl.h" namespace oceanbase { +namespace sql +{ +class ObDDLCtrl; +} + namespace storage { -class ObSSTableInsertSliceWriter; - +struct ObDirectLoadInsertTableRowIteratorParam; struct ObDirectLoadInsertTableParam { public: @@ -27,7 +38,10 @@ public: ~ObDirectLoadInsertTableParam(); int assign(const ObDirectLoadInsertTableParam &other); bool is_valid() const; - TO_STRING_KV(K_(table_id), K_(schema_version), K_(snapshot_version), K_(ls_partition_ids), K_(execution_id), K_(ddl_task_id)); + TO_STRING_KV(K_(table_id), K_(schema_version), K_(snapshot_version), K_(execution_id), + K_(ddl_task_id), K_(data_version), K_(reserved_parallel), K_(ls_partition_ids), + K_(target_ls_partition_ids)); + public: uint64_t table_id_; int64_t schema_version_; @@ -35,31 +49,108 @@ public: int64_t execution_id_; int64_t ddl_task_id_; int64_t data_version_; + int64_t reserved_parallel_; common::ObArray ls_partition_ids_; + common::ObArray target_ls_partition_ids_; +}; + +struct ObDirectLoadInsertTabletParam +{ +public: + ObDirectLoadInsertTabletParam(); + ~ObDirectLoadInsertTabletParam(); + bool is_valid() const; + TO_STRING_KV(K_(tenant_id), K_(table_id), K_(ls_id), K_(table_id), K_(origin_tablet_id), K_(tablet_id), + K_(lob_tablet_id), K_(schema_version), K_(snapshot_version), K_(execution_id), K_(ddl_task_id), + K_(data_version), K_(reserved_parallel), K_(context_id)); +public: + uint64_t tenant_id_; + uint64_t table_id_; + share::ObLSID ls_id_; + common::ObTabletID tablet_id_; + common::ObTabletID lob_tablet_id_; + common::ObTabletID origin_tablet_id_; + int64_t schema_version_; + int64_t snapshot_version_; + int64_t execution_id_; + int64_t ddl_task_id_; + int64_t data_version_; + int64_t reserved_parallel_; + int64_t context_id_; +}; + +struct ObDirectLoadInsertTabletWriteCtx +{ + blocksstable::ObMacroDataSeq start_seq_; + share::ObTabletCacheInterval pk_interval_; + TO_STRING_KV(K_(start_seq), K_(pk_interval)); +}; + +class ObDirectLoadInsertTabletContext +{ + static const int64_t PK_CACHE_SIZE = 1000000; + static const int64_t WRITE_BATCH_SIZE = 100000; +public: + ObDirectLoadInsertTabletContext(); + ~ObDirectLoadInsertTabletContext(); + bool is_open() const { return is_open_; } + const common::ObTabletID &get_tablet_id() const { return param_.tablet_id_; } +public: + int init(const ObDirectLoadInsertTabletParam ¶m); + int open(); + int close(); + int open_sstable_slice(const blocksstable::ObMacroDataSeq &start_seq, int64_t &slice_id); + int close_sstable_slice(const int64_t slice_id); + int fill_sstable_slice(const int64_t &slice_id, ObIStoreRowIterator &iter, + int64_t &affected_rows); + int get_write_ctx(ObDirectLoadInsertTabletWriteCtx &write_ctx); + int open_lob_sstable_slice(const blocksstable::ObMacroDataSeq &start_seq, int64_t &slice_id); + int close_lob_sstable_slice(const int64_t slice_id); + int fill_lob_sstable_slice(ObIAllocator &allocator, const int64_t &lob_slice_id, share::ObTabletCacheInterval &pk_interval, + blocksstable::ObDatumRow &datum_row); + + int get_lob_write_ctx(ObDirectLoadInsertTabletWriteCtx &write_ctx); + int calc_range(); + int fill_column_group(const int64_t thread_cnt, const int64_t thread_id); + int cancel(); + TO_STRING_KV(K_(param), K_(is_open)); +private: + int get_pk_interval(uint64_t count, share::ObTabletCacheInterval &pk_interval); + int get_lob_pk_interval(uint64_t count, share::ObTabletCacheInterval &pk_interval); + int refresh_pk_cache(const common::ObTabletID &tablet_id, share::ObTabletCacheInterval &pk_cache); +private: + ObDirectLoadInsertTabletParam param_; + volatile bool is_open_; + lib::ObMutex mutex_; + blocksstable::ObMacroDataSeq start_seq_; + blocksstable::ObMacroDataSeq lob_start_seq_; + share::ObTabletCacheInterval pk_cache_; + share::ObTabletCacheInterval lob_pk_cache_; + share::SCN start_scn_; + ObTabletDirectLoadMgrHandle handle_; + bool is_inited_; }; class ObDirectLoadInsertTableContext { +private: + typedef common::hash::ObHashMap + TABLET_CTX_MAP; public: ObDirectLoadInsertTableContext(); ~ObDirectLoadInsertTableContext(); - void reset(); + void destory(); int init(const ObDirectLoadInsertTableParam ¶m); - int add_sstable_slice(const common::ObTabletID &tablet_id, - const blocksstable::ObMacroDataSeq &start_seq, - common::ObNewRowIterator &iter, - int64_t &affected_rows); - int construct_sstable_slice_writer(const common::ObTabletID &tablet_id, - const blocksstable::ObMacroDataSeq &start_seq, - ObSSTableInsertSliceWriter *&slice_writer, - common::ObIAllocator &allocator); - int notify_tablet_finish(const common::ObTabletID &tablet_id); - int commit(); - TO_STRING_KV(K_(param), K_(ddl_ctrl)); + int get_tablet_context(const common::ObTabletID &tablet_id, + ObDirectLoadInsertTabletContext *&tablet_ctx) const; + TO_STRING_KV(K_(param)); +private: + int create_all_tablet_contexts(); private: ObDirectLoadInsertTableParam param_; + TABLET_CTX_MAP tablet_ctx_map_; + common::ObArenaAllocator allocator_; sql::ObDDLCtrl ddl_ctrl_; - int64_t tablet_finish_count_ CACHE_ALIGNED; bool is_inited_; }; diff --git a/src/storage/direct_load/ob_direct_load_insert_table_row_iterator.cpp b/src/storage/direct_load/ob_direct_load_insert_table_row_iterator.cpp new file mode 100644 index 000000000..7983443b2 --- /dev/null +++ b/src/storage/direct_load/ob_direct_load_insert_table_row_iterator.cpp @@ -0,0 +1,152 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/direct_load/ob_direct_load_insert_table_row_iterator.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace common; +using namespace blocksstable; + +/** + * ObDirectLoadInsertTableRowIteratorParam + */ + +ObDirectLoadInsertTableRowIteratorParam::ObDirectLoadInsertTableRowIteratorParam() + : lob_column_cnt_(0), + datum_utils_(nullptr), + col_descs_(nullptr), + cmp_funcs_(nullptr), + column_stat_array_(nullptr), + lob_builder_(nullptr), + is_heap_table_(false), + online_opt_stat_gather_(false), + px_mode_(false) +{ +} + +ObDirectLoadInsertTableRowIteratorParam::~ObDirectLoadInsertTableRowIteratorParam() +{ +} + +/** + * ObDirectLoadInsertTableRowIterator + */ + +ObDirectLoadInsertTableRowIterator::ObDirectLoadInsertTableRowIterator() +: lob_allocator_(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()) +{ +} + +ObDirectLoadInsertTableRowIterator::~ObDirectLoadInsertTableRowIterator() +{ +} + +int ObDirectLoadInsertTableRowIterator::inner_init( + const ObDirectLoadInsertTableRowIteratorParam ¶m) +{ + int ret = OB_SUCCESS; + param_ = param; + return ret; +} + +int ObDirectLoadInsertTableRowIterator::get_next_row(const blocksstable::ObDatumRow *&result_row) +{ + int ret = OB_SUCCESS; + ObDatumRow *datum_row = nullptr; + if (OB_FAIL(inner_get_next_row(datum_row))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to do inner get next row", KR(ret)); + } + } + if (OB_SUCC(ret)) { + if (param_.online_opt_stat_gather_ && OB_FAIL(collect_obj(*datum_row))) { + LOG_WARN("fail to collect obj", KR(ret)); + } + } + if (OB_SUCC(ret) && param_.lob_column_cnt_ > 0) { + lob_allocator_.reuse(); + if (OB_FAIL(handle_lob(*datum_row))) { + LOG_WARN("fail to handle lob", KR(ret)); + } + } + if (OB_SUCC(ret)) { + result_row = datum_row; + } + return ret; +} + +int ObDirectLoadInsertTableRowIterator::collect_obj(const blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + const int64_t extra_rowkey_cnt = ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + if (param_.is_heap_table_) { + for (int64_t i = 0; OB_SUCC(ret) && i < param_.table_data_desc_.column_count_; i++) { + const ObStorageDatum &datum = datum_row.storage_datums_[i + extra_rowkey_cnt + 1]; + const common::ObCmpFunc &cmp_func = param_.cmp_funcs_->at(i + 1).get_cmp_func(); + const ObColDesc &col_desc = param_.col_descs_->at(i + 1); + ObOptOSGColumnStat *col_stat = param_.column_stat_array_->at(i); + bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); + if (col_stat != nullptr && is_valid) { + if (OB_FAIL( + col_stat->update_column_stat_info(&datum, col_desc.col_type_, cmp_func.cmp_func_))) { + LOG_WARN("Failed to merge obj", K(ret), KP(col_stat)); + } + } + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < param_.table_data_desc_.rowkey_column_num_; i++) { + const ObStorageDatum &datum = datum_row.storage_datums_[i]; + const common::ObCmpFunc &cmp_func = param_.cmp_funcs_->at(i).get_cmp_func(); + const ObColDesc &col_desc = param_.col_descs_->at(i); + ObOptOSGColumnStat *col_stat = param_.column_stat_array_->at(i); + bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); + if (col_stat != nullptr && is_valid) { + if (OB_FAIL( + col_stat->update_column_stat_info(&datum, col_desc.col_type_, cmp_func.cmp_func_))) { + LOG_WARN("Failed to merge obj", K(ret), KP(col_stat)); + } + } + } + for (int64_t i = param_.table_data_desc_.rowkey_column_num_; + OB_SUCC(ret) && i < param_.table_data_desc_.column_count_; i++) { + const ObStorageDatum &datum = datum_row.storage_datums_[i + extra_rowkey_cnt]; + const common::ObCmpFunc &cmp_func = param_.cmp_funcs_->at(i).get_cmp_func(); + const ObColDesc &col_desc = param_.col_descs_->at(i); + ObOptOSGColumnStat *col_stat = param_.column_stat_array_->at(i); + bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); + if (col_stat != nullptr && is_valid) { + if (OB_FAIL( + col_stat->update_column_stat_info(&datum, col_desc.col_type_, cmp_func.cmp_func_))) { + LOG_WARN("Failed to merge obj", K(ret), KP(col_stat)); + } + } + } + } + return ret; +} + +int ObDirectLoadInsertTableRowIterator::handle_lob(blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(param_.lob_builder_->append_lob(lob_allocator_, datum_row))) { + LOG_WARN("fail to append lob", KR(ret), K(param_.tablet_id_), K(datum_row)); + } + return ret; +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_insert_table_row_iterator.h b/src/storage/direct_load/ob_direct_load_insert_table_row_iterator.h new file mode 100644 index 000000000..31def1d3c --- /dev/null +++ b/src/storage/direct_load/ob_direct_load_insert_table_row_iterator.h @@ -0,0 +1,66 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "share/stat/ob_opt_osg_column_stat.h" +#include "storage/direct_load/ob_direct_load_lob_builder.h" +#include "storage/direct_load/ob_direct_load_merge_ctx.h" +#include "storage/direct_load/ob_direct_load_table_data_desc.h" + +namespace oceanbase +{ +namespace blocksstable +{ +class ObIStoreRowIterator; +} // namespace blocksstable +namespace storage +{ +struct ObDirectLoadInsertTableRowIteratorParam +{ +public: + ObDirectLoadInsertTableRowIteratorParam(); + ~ObDirectLoadInsertTableRowIteratorParam(); + TO_STRING_KV(K_(tablet_id), K_(table_data_desc), K_(is_heap_table), K_(online_opt_stat_gather), K_(px_mode)); +public: + ObTabletID tablet_id_; + ObDirectLoadTableDataDesc table_data_desc_; + int64_t lob_column_cnt_; + const blocksstable::ObStorageDatumUtils *datum_utils_; + const common::ObIArray *col_descs_; + const blocksstable::ObStoreCmpFuncs *cmp_funcs_; + common::ObIArray *column_stat_array_; + ObDirectLoadLobBuilder *lob_builder_; + bool is_heap_table_; + bool online_opt_stat_gather_; + bool px_mode_; +}; + +class ObDirectLoadInsertTableRowIterator : public ObIStoreRowIterator +{ +public: + ObDirectLoadInsertTableRowIterator(); + virtual ~ObDirectLoadInsertTableRowIterator(); + int get_next_row(const blocksstable::ObDatumRow *&datum_row) override; +private: + int collect_obj(const blocksstable::ObDatumRow &datum_row); + int handle_lob(blocksstable::ObDatumRow &datum_row); +protected: + int inner_init(const ObDirectLoadInsertTableRowIteratorParam ¶m); + virtual int inner_get_next_row(blocksstable::ObDatumRow *&datum_row) = 0; +private: + ObDirectLoadInsertTableRowIteratorParam param_; + common::ObArenaAllocator lob_allocator_; +}; + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_lob_builder.cpp b/src/storage/direct_load/ob_direct_load_lob_builder.cpp new file mode 100644 index 000000000..2c32427d7 --- /dev/null +++ b/src/storage/direct_load/ob_direct_load_lob_builder.cpp @@ -0,0 +1,156 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/direct_load/ob_direct_load_lob_builder.h" +#include "share/stat/ob_stat_define.h" +#include "share/table/ob_table_load_define.h" +#include "storage/direct_load/ob_direct_load_insert_table_ctx.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace common; +using namespace blocksstable; +using namespace share; + +/** + * ObDirectLoadLobBuildParam + */ + +ObDirectLoadLobBuildParam::ObDirectLoadLobBuildParam() + : insert_table_ctx_(nullptr), lob_column_cnt_(0) +{ +} + +ObDirectLoadLobBuildParam::~ObDirectLoadLobBuildParam() +{ +} + +bool ObDirectLoadLobBuildParam::is_valid() const +{ + return tablet_id_.is_valid() && nullptr != insert_table_ctx_; +} + +/** + * ObDirectLoadLobBuilder + */ + +ObDirectLoadLobBuilder::ObDirectLoadLobBuilder() + : insert_tablet_ctx_(nullptr), + current_lob_slice_id_(0), + is_closed_(false), + is_inited_(false) +{ +} + +ObDirectLoadLobBuilder::~ObDirectLoadLobBuilder() +{ +} + +int ObDirectLoadLobBuilder::init(const ObDirectLoadLobBuildParam ¶m) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDirectLoadLobBuilder init twice", KR(ret), KP(this)); + } else if (OB_UNLIKELY(!param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(param)); + } else { + param_ = param; + if (OB_FAIL(param_.insert_table_ctx_->get_tablet_context( + param_.tablet_id_, insert_tablet_ctx_))) { + LOG_WARN("fail to get tablet context", KR(ret)); + } else if (OB_FAIL(init_sstable_slice_ctx())) { + LOG_WARN("fail to init sstable slice ctx", KR(ret)); + } else { + lob_tablet_id_ = insert_tablet_ctx_->get_tablet_id(); + is_inited_ = true; + } + } + return ret; +} + +int ObDirectLoadLobBuilder::init_sstable_slice_ctx() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(insert_tablet_ctx_->get_lob_write_ctx(write_ctx_))) { + LOG_WARN("fail to get write ctx", KR(ret)); + } else if (OB_FAIL(insert_tablet_ctx_->open_lob_sstable_slice( + write_ctx_.start_seq_, + current_lob_slice_id_))) { + LOG_WARN("fail to construct sstable slice", KR(ret)); + } + return ret; +} + +int ObDirectLoadLobBuilder::switch_sstable_slice() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(insert_tablet_ctx_->close_lob_sstable_slice(current_lob_slice_id_))) { + LOG_WARN("fail to close sstable slice builder", KR(ret)); + } else if (OB_FAIL(init_sstable_slice_ctx())) { + LOG_WARN("fail to init sstable slice ctx", KR(ret)); + } + return ret; +} + +int ObDirectLoadLobBuilder::append_lob(ObIAllocator &allocator, blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadLobBuilder not init", KR(ret), KP(this)); + } else if (OB_UNLIKELY(is_closed_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lob builder is closed", KR(ret)); + } else { + if (write_ctx_.pk_interval_.remain_count() < param_.lob_column_cnt_) { + if (OB_FAIL(switch_sstable_slice())) { + LOG_WARN("fail to switch sstable slice", KR(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(insert_tablet_ctx_->fill_lob_sstable_slice(allocator, current_lob_slice_id_, + write_ctx_.pk_interval_, datum_row))) { + LOG_WARN("fill lob sstable slice failed", K(ret), KP(insert_tablet_ctx_), K(current_lob_slice_id_), K(write_ctx_.pk_interval_), K(datum_row)); + } + } + } + return ret; +} + +int ObDirectLoadLobBuilder::close() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadLobBuilder not init", KR(ret), KP(this)); + } else if (OB_UNLIKELY(is_closed_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet lob builder is closed", KR(ret)); + } else { + if (OB_FAIL(insert_tablet_ctx_->close_lob_sstable_slice(current_lob_slice_id_))) { + LOG_WARN("fail to close sstable slice ", KR(ret)); + } else { + current_lob_slice_id_ = 0; + is_closed_ = true; + } + } + return ret; +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_lob_builder.h b/src/storage/direct_load/ob_direct_load_lob_builder.h new file mode 100644 index 000000000..012b1fc31 --- /dev/null +++ b/src/storage/direct_load/ob_direct_load_lob_builder.h @@ -0,0 +1,67 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "common/ob_tablet_id.h" +#include "storage/direct_load/ob_direct_load_i_table.h" +#include "storage/direct_load/ob_direct_load_table_data_desc.h" +#include "storage/direct_load/ob_direct_load_insert_table_ctx.h" + +namespace oceanbase +{ +namespace table +{ +} // namespace table +namespace common +{ +} // namespace common +namespace storage +{ + +struct ObDirectLoadLobBuildParam +{ +public: + ObDirectLoadLobBuildParam(); + ~ObDirectLoadLobBuildParam(); + bool is_valid() const; + TO_STRING_KV(K_(tablet_id)); +public: + common::ObTabletID tablet_id_; + ObDirectLoadInsertTableContext *insert_table_ctx_; + int64_t lob_column_cnt_; +}; + +class ObDirectLoadLobBuilder +{ +public: + ObDirectLoadLobBuilder(); + ~ObDirectLoadLobBuilder(); + int init(const ObDirectLoadLobBuildParam ¶m); + int append_lob(common::ObIAllocator &allocator, blocksstable::ObDatumRow &datum_row); + int close(); +private: + int init_sstable_slice_ctx(); + int switch_sstable_slice(); +private: + ObDirectLoadLobBuildParam param_; + common::ObTabletID lob_tablet_id_; + ObDirectLoadInsertTabletContext *insert_tablet_ctx_; + ObDirectLoadInsertTabletWriteCtx write_ctx_; + int64_t current_lob_slice_id_; + bool is_closed_; + bool is_inited_; + DISALLOW_COPY_AND_ASSIGN(ObDirectLoadLobBuilder); +}; + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_merge_ctx.cpp b/src/storage/direct_load/ob_direct_load_merge_ctx.cpp index 6f16724f2..75a8b2d4a 100644 --- a/src/storage/direct_load/ob_direct_load_merge_ctx.cpp +++ b/src/storage/direct_load/ob_direct_load_merge_ctx.cpp @@ -20,6 +20,7 @@ #include "storage/direct_load/ob_direct_load_multiple_sstable.h" #include "storage/direct_load/ob_direct_load_partition_merge_task.h" #include "storage/direct_load/ob_direct_load_range_splitter.h" +#include "storage/direct_load/ob_direct_load_partition_rescan_task.h" #include "observer/table_load/ob_table_load_schema.h" #include "share/stat/ob_opt_table_stat.h" #include "share/stat/ob_opt_stat_monitor_manager.h" @@ -48,12 +49,15 @@ ObDirectLoadMergeParam::ObDirectLoadMergeParam() rowkey_column_num_(0), store_column_count_(0), snapshot_version_(0), + lob_column_cnt_(0), datum_utils_(nullptr), col_descs_(nullptr), cmp_funcs_(nullptr), is_heap_table_(false), is_fast_heap_table_(false), + is_column_store_(false), online_opt_stat_gather_(false), + px_mode_(false), insert_table_ctx_(nullptr), dml_row_handler_(nullptr) { @@ -67,8 +71,8 @@ bool ObDirectLoadMergeParam::is_valid() const { return OB_INVALID_ID != table_id_ && 0 < rowkey_column_num_ && 0 < store_column_count_ && snapshot_version_ > 0 && table_data_desc_.is_valid() && nullptr != datum_utils_ && - nullptr != col_descs_ && nullptr != cmp_funcs_ && nullptr != insert_table_ctx_ && - nullptr != dml_row_handler_; + nullptr != col_descs_ && nullptr != cmp_funcs_ && + nullptr != insert_table_ctx_ && nullptr != dml_row_handler_; } /** @@ -153,7 +157,7 @@ int ObDirectLoadMergeCtx::create_all_tablet_ctxs( */ ObDirectLoadTabletMergeCtx::ObDirectLoadTabletMergeCtx() - : allocator_("TLD_MegTbtCtx"), task_finish_count_(0), is_inited_(false) + : allocator_("TLD_MegTbtCtx"), task_finish_count_(0), rescan_task_finish_count_(0), is_inited_(false) { } @@ -164,7 +168,13 @@ ObDirectLoadTabletMergeCtx::~ObDirectLoadTabletMergeCtx() task->~ObDirectLoadPartitionMergeTask(); allocator_.free(task); } + for (int64_t i = 0; i < rescan_task_array_.count(); ++i) { + ObDirectLoadPartitionRescanTask *task = rescan_task_array_.at(i); + task->~ObDirectLoadPartitionRescanTask(); + allocator_.free(task); + } task_array_.reset(); + rescan_task_array_.reset(); } int ObDirectLoadTabletMergeCtx::init(const ObDirectLoadMergeParam ¶m, @@ -777,6 +787,31 @@ int ObDirectLoadTabletMergeCtx::build_aggregate_merge_task_for_multiple_heap_tab return ret; } +int ObDirectLoadTabletMergeCtx::build_rescan_task(int64_t thread_count) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < thread_count; ++i) { + ObDirectLoadPartitionRescanTask *rescan_task = nullptr; + if (OB_ISNULL(rescan_task = OB_NEWx(ObDirectLoadPartitionRescanTask, + (&allocator_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to new ObDirectLoadPartitionRescanTask", KR(ret)); + } else if (OB_FAIL(rescan_task->init(param_, this, thread_count, i))) { + LOG_WARN("fail to init merge task", KR(ret)); + } else if (OB_FAIL(rescan_task_array_.push_back(rescan_task))) { + LOG_WARN("fail to push back merge task", KR(ret)); + } + if (OB_FAIL(ret)) { + if (nullptr != rescan_task) { + rescan_task->~ObDirectLoadPartitionRescanTask(); + allocator_.free(rescan_task); + rescan_task = nullptr; + } + } + } + return ret; +} + int ObDirectLoadTabletMergeCtx::get_autoincrement_value(uint64_t count, ObTabletCacheInterval &interval) { @@ -812,5 +847,18 @@ int ObDirectLoadTabletMergeCtx::inc_finish_count(bool &is_ready) return ret; } +int ObDirectLoadTabletMergeCtx::inc_rescan_finish_count(bool &is_ready) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadTabletMergeCtx not init", KR(ret), KP(this)); + } else { + const int64_t finish_count = ATOMIC_AAF(&rescan_task_finish_count_, 1); + is_ready = (finish_count >= rescan_task_array_.count()); + } + return ret; +} + } // namespace storage } // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_merge_ctx.h b/src/storage/direct_load/ob_direct_load_merge_ctx.h index 750a3ecf8..323b549df 100644 --- a/src/storage/direct_load/ob_direct_load_merge_ctx.h +++ b/src/storage/direct_load/ob_direct_load_merge_ctx.h @@ -35,6 +35,7 @@ namespace storage { class ObDirectLoadInsertTableContext; class ObDirectLoadPartitionMergeTask; +class ObDirectLoadPartitionRescanTask; class ObDirectLoadTabletMergeCtx; class ObIDirectLoadPartitionTable; class ObDirectLoadSSTable; @@ -50,22 +51,26 @@ public: ~ObDirectLoadMergeParam(); bool is_valid() const; TO_STRING_KV(K_(table_id), K_(target_table_id), K_(rowkey_column_num), K_(store_column_count), - K_(snapshot_version), K_(table_data_desc), KP_(datum_utils), KP_(col_descs), - KP_(cmp_funcs), K_(is_heap_table), K_(is_fast_heap_table), - K_(online_opt_stat_gather), KP_(insert_table_ctx), KP_(dml_row_handler)); + K_(snapshot_version), K_(table_data_desc), KP_(datum_utils), KP_(col_descs), + KP_(lob_column_cnt), KP_(cmp_funcs), K_(is_heap_table), K_(is_fast_heap_table), + K_(is_column_store), K_(online_opt_stat_gather), KP_(insert_table_ctx), + KP_(dml_row_handler)); public: uint64_t table_id_; uint64_t target_table_id_; int64_t rowkey_column_num_; int64_t store_column_count_; int64_t snapshot_version_; + int64_t lob_column_cnt_; storage::ObDirectLoadTableDataDesc table_data_desc_; const blocksstable::ObStorageDatumUtils *datum_utils_; const common::ObIArray *col_descs_; const blocksstable::ObStoreCmpFuncs *cmp_funcs_; bool is_heap_table_; bool is_fast_heap_table_; + bool is_column_store_; bool online_opt_stat_gather_; + bool px_mode_; ObDirectLoadInsertTableContext *insert_table_ctx_; ObDirectLoadDMLRowHandler *dml_row_handler_; }; @@ -99,6 +104,7 @@ public: ~ObDirectLoadTabletMergeCtx(); int init(const ObDirectLoadMergeParam ¶m, const table::ObTableLoadLSIdAndPartitionId &ls_partition_id, const table::ObTableLoadLSIdAndPartitionId &target_ls_partition_id); + int build_rescan_task(int64_t thread_count); int build_merge_task(const common::ObIArray &table_array, const common::ObIArray &col_descs, int64_t max_parallel_degree, bool is_multiple_mode); @@ -109,6 +115,7 @@ public: int build_aggregate_merge_task_for_multiple_heap_table( const common::ObIArray &table_array); int inc_finish_count(bool &is_ready); + int inc_rescan_finish_count(bool &is_ready); int collect_sql_statistics( const common::ObIArray &fast_heap_table_array, table::ObTableLoadSqlStatistics &sql_statistics); int collect_dml_stat(const common::ObIArray &fast_heap_table_array, @@ -120,6 +127,10 @@ public: { return task_array_; } + const common::ObIArray &get_rescan_tasks() const + { + return rescan_task_array_; + } TO_STRING_KV(K_(param), K_(target_partition_id), K_(tablet_id), K_(target_tablet_id)); private: int init_sstable_array(const common::ObIArray &table_array); @@ -157,7 +168,9 @@ private: common::ObSEArray multiple_heap_table_array_; common::ObSEArray range_array_; common::ObSEArray task_array_; + common::ObSEArray rescan_task_array_; int64_t task_finish_count_ CACHE_ALIGNED; + int64_t rescan_task_finish_count_ CACHE_ALIGNED; bool is_inited_; }; diff --git a/src/storage/direct_load/ob_direct_load_origin_table.cpp b/src/storage/direct_load/ob_direct_load_origin_table.cpp index 05d1a998d..b06ca3681 100644 --- a/src/storage/direct_load/ob_direct_load_origin_table.cpp +++ b/src/storage/direct_load/ob_direct_load_origin_table.cpp @@ -213,13 +213,9 @@ int ObDirectLoadOriginTableScanner::init(ObDirectLoadOriginTable *origin_table, ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", KR(ret), KPC(origin_table), K(query_range)); } else { - blocksstable::ObSSTable *major_sstable = origin_table->get_major_sstable(); origin_table_ = origin_table; allocator_.set_tenant_id(MTL_ID()); - if (major_sstable != nullptr && major_sstable->is_co_sstable() && !major_sstable->is_empty()) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("not supported scan co sstable", K(ret), KPC(major_sstable)); - } else if (OB_FAIL((init_table_access_param()))) { + if (OB_FAIL((init_table_access_param()))) { LOG_WARN("fail to init query range", KR(ret)); } else if (OB_FAIL(init_table_access_ctx())) { LOG_WARN("fail to init table access param", KR(ret)); @@ -300,7 +296,7 @@ int ObDirectLoadOriginTableScanner::init_table_access_ctx() share::SCN snapshot_scn; if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { LOG_WARN("fail to convert scn", KR(ret)); - } else if (OB_FAIL(store_ctx_.init_for_read(origin_table_->get_meta().ls_id_, INT64_MAX, -1, + } else if (OB_FAIL(store_ctx_.init_for_read(origin_table_->get_meta().ls_id_, tablet_id, INT64_MAX, -1, snapshot_scn))) { LOG_WARN("fail to init for read", KR(ret)); } else if (OB_FAIL(table_access_ctx_.init(query_flag, store_ctx_, allocator_, allocator_, diff --git a/src/storage/direct_load/ob_direct_load_partition_merge_task.cpp b/src/storage/direct_load/ob_direct_load_partition_merge_task.cpp index 5a32c8bc7..019dc1f40 100644 --- a/src/storage/direct_load/ob_direct_load_partition_merge_task.cpp +++ b/src/storage/direct_load/ob_direct_load_partition_merge_task.cpp @@ -14,7 +14,6 @@ #include "storage/direct_load/ob_direct_load_partition_merge_task.h" #include "share/stat/ob_opt_column_stat.h" #include "share/stat/ob_stat_define.h" -#include "storage/ddl/ob_direct_insert_sstable_ctx.h" #include "storage/direct_load/ob_direct_load_external_table.h" #include "storage/direct_load/ob_direct_load_insert_table_ctx.h" #include "storage/direct_load/ob_direct_load_merge_ctx.h" @@ -64,67 +63,58 @@ int ObDirectLoadPartitionMergeTask::process() ret = OB_NOT_INIT; LOG_WARN("ObDirectLoadPartitionMergeTask not init", KR(ret), KP(this)); } else { + int64_t slice_id = 0; const ObTabletID &tablet_id = merge_ctx_->get_tablet_id(); - const ObTabletID &target_tablet_id = merge_ctx_->get_target_tablet_id(); + ObDirectLoadInsertTabletContext *tablet_ctx = nullptr; ObIStoreRowIterator *row_iter = nullptr; - ObSSTableInsertSliceWriter *writer = nullptr; + bool has_lob_storage = merge_param_->lob_column_cnt_ > 0 ? true :false; ObMacroDataSeq block_start_seq; block_start_seq.set_parallel_degree(parallel_idx_); if (merge_param_->online_opt_stat_gather_ && OB_FAIL(init_sql_statistics())) { - LOG_WARN("fail to init sql statistics", KR(ret)); + LOG_WARN("fail to inner init sql statistics", KR(ret)); + } else if (has_lob_storage && OB_FAIL(init_lob_builder())) { + LOG_WARN("fail to inner init lob builder", KR(ret)); } else if (OB_FAIL(construct_row_iter(allocator_, row_iter))) { LOG_WARN("fail to construct row iter", KR(ret)); - } else if (OB_FAIL(merge_param_->insert_table_ctx_->construct_sstable_slice_writer( - target_tablet_id, block_start_seq, writer, allocator_))) { - LOG_WARN("fail to construct sstable slice writer", KR(ret), K(target_tablet_id), + } else if (OB_FAIL(merge_param_->insert_table_ctx_->get_tablet_context( + tablet_id, tablet_ctx))) { + LOG_WARN("fail to get tablet context ", KR(ret), K(tablet_id), + K(block_start_seq)); + } else if (OB_FAIL(tablet_ctx->open_sstable_slice( + block_start_seq, slice_id))) { + LOG_WARN("fail to construct sstable slice ", KR(ret), K(slice_id), K(block_start_seq)); } else { - LOG_INFO("add sstable slice begin", K(target_tablet_id), K(parallel_idx_)); - const ObDatumRow *datum_row = nullptr; - while (OB_SUCC(ret)) { - if (OB_UNLIKELY(is_stop_)) { - ret = OB_CANCELED; - LOG_WARN("merge task canceled", KR(ret)); - } else if (OB_FAIL(row_iter->get_next_row(datum_row))) { - if (OB_UNLIKELY(OB_ITER_END != ret)) { - LOG_WARN("fail to get next row", KR(ret)); - } else { - ret = OB_SUCCESS; - break; - } - } else if (OB_FAIL(writer->append_row(*const_cast(datum_row)))) { - LOG_WARN("fail to append row", KR(ret), KPC(datum_row)); - } else if (merge_param_->online_opt_stat_gather_ && OB_FAIL(collect_obj(*datum_row))) { - LOG_WARN("fail to collect statistics", KR(ret)); - } else { - ++affected_rows_; - } + LOG_INFO("add sstable slice begin", KP(tablet_ctx), K(slice_id)); + if (OB_UNLIKELY(is_stop_)) { + ret = OB_CANCELED; + LOG_WARN("merge task canceled", KR(ret)); + } else if (OB_FAIL(tablet_ctx->fill_sstable_slice(slice_id, *row_iter, affected_rows_))) { + LOG_WARN("fail to fill sstable slice", KR(ret)); + } else if (OB_FAIL(tablet_ctx->close_sstable_slice(slice_id))) { + LOG_WARN("fail to close writer", KR(ret)); + } else if (has_lob_storage && OB_FAIL(lob_builder_.close())) { + LOG_WARN("fail to close lob_builder", KR(ret)); } - if (OB_SUCC(ret)) { - if (OB_FAIL(writer->close())) { - LOG_WARN("fail to close writer", KR(ret)); - } - } - LOG_INFO("add sstable slice end", KR(ret), K(target_tablet_id), K(tablet_id), - K(parallel_idx_), K(affected_rows_)); + LOG_INFO("add sstable slice end", KR(ret), K(tablet_id), K(parallel_idx_), K(affected_rows_)); } if (OB_NOT_NULL(row_iter)) { row_iter->~ObIStoreRowIterator(); allocator_.free(row_iter); row_iter = nullptr; } - if (OB_NOT_NULL(writer)) { - writer->~ObSSTableInsertSliceWriter(); - allocator_.free(writer); - writer = nullptr; - } if (OB_SUCC(ret)) { bool is_ready = false; if (OB_FAIL(merge_ctx_->inc_finish_count(is_ready))) { LOG_WARN("fail to inc finish count", KR(ret)); - } else if (is_ready && - OB_FAIL(merge_param_->insert_table_ctx_->notify_tablet_finish(target_tablet_id))) { - LOG_WARN("fail to notify tablet finish", KR(ret), K(target_tablet_id)); + } else if (is_ready) { + if (merge_param_->is_column_store_) { + if (OB_FAIL(tablet_ctx->calc_range())) { + LOG_WARN("fail to calc range", KR(ret)); + } + } else if (OB_FAIL(tablet_ctx->close())) { + LOG_WARN("fail to close", KR(ret)); + } } } } @@ -153,55 +143,38 @@ int ObDirectLoadPartitionMergeTask::init_sql_statistics() return ret; } -int ObDirectLoadPartitionMergeTask::collect_obj(const ObDatumRow &datum_row) +int ObDirectLoadPartitionMergeTask::init_lob_builder() { int ret = OB_SUCCESS; - const int64_t extra_rowkey_cnt = ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); - if (merge_param_->is_heap_table_ ) { - for (int64_t i = 0; OB_SUCC(ret) && i < merge_param_->table_data_desc_.column_count_; i++) { - const ObStorageDatum &datum = datum_row.storage_datums_[i + extra_rowkey_cnt + 1]; - const ObColDesc &col_desc = merge_param_->col_descs_->at(i + 1); - const ObCmpFunc &cmp_func = merge_param_->cmp_funcs_->at(i + 1).get_cmp_func(); - ObOptOSGColumnStat *col_stat = column_stat_array_.at(i); - bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); - if (col_stat != nullptr && is_valid) { - if (OB_FAIL(col_stat->update_column_stat_info(&datum, col_desc.col_type_, cmp_func.cmp_func_))) { - LOG_WARN("Failed to merge obj", K(ret), KP(col_stat)); - } - } - } - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < merge_param_->rowkey_column_num_; i++) { - const ObStorageDatum &datum = datum_row.storage_datums_[i]; - const ObColDesc &col_desc = merge_param_->col_descs_->at(i); - const ObCmpFunc &cmp_func = merge_param_->cmp_funcs_->at(i).get_cmp_func(); - ObOptOSGColumnStat *col_stat = column_stat_array_.at(i); - bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); - if (col_stat != nullptr && is_valid) { - if (OB_FAIL(col_stat->update_column_stat_info(&datum, col_desc.col_type_, cmp_func.cmp_func_))) { - LOG_WARN("Failed to merge obj", K(ret), KP(col_stat)); - } - } - } - for (int64_t i = merge_param_->rowkey_column_num_; OB_SUCC(ret) && i < merge_param_->table_data_desc_.column_count_; i++) { - const ObStorageDatum &datum = datum_row.storage_datums_[i + extra_rowkey_cnt]; - const ObColDesc &col_desc = merge_param_->col_descs_->at(i); - const ObCmpFunc &cmp_func = merge_param_->cmp_funcs_->at(i).get_cmp_func(); - ObOptOSGColumnStat *col_stat = column_stat_array_.at(i); - bool is_valid = ObColumnStatParam::is_valid_opt_col_type(col_desc.col_type_.get_type()); - if (col_stat != nullptr && is_valid) { - if (OB_FAIL(col_stat->update_column_stat_info(&datum, col_desc.col_type_, cmp_func.cmp_func_))) { - LOG_WARN("Failed to merge obj", K(ret), KP(col_stat)); - } - } - } + ObDirectLoadLobBuildParam param; + param.tablet_id_ = merge_ctx_->get_tablet_id(); + param.insert_table_ctx_ = merge_param_->insert_table_ctx_; + param.lob_column_cnt_ = merge_param_->lob_column_cnt_; + if (OB_FAIL(lob_builder_.init(param))) { + LOG_WARN("fail to init lob builder", KR(ret)); } return ret; } void ObDirectLoadPartitionMergeTask::stop() { - is_stop_ = true; + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadPartitionRescanTask not init", KR(ret), KP(this)); + } else { + ObDirectLoadInsertTabletContext *tablet_ctx = nullptr; + const ObTabletID &tablet_id = merge_ctx_->get_tablet_id(); + if (OB_FAIL(merge_param_->insert_table_ctx_->get_tablet_context( + tablet_id, tablet_ctx))) { + LOG_WARN("fail to get tablet context ", KR(ret), K(tablet_id)); + } else if (OB_FAIL(tablet_ctx->cancel())) { + LOG_WARN("fail to cancel fill task", K(ret)); + } else { + is_stop_ = true; + } + } + //ignore ret } /** @@ -221,6 +194,8 @@ int ObDirectLoadPartitionRangeMergeTask::RowIterator::init( const ObDirectLoadMergeParam &merge_param, const ObTabletID &tablet_id, ObDirectLoadOriginTable *origin_table, + ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const ObIArray &sstable_array, const ObDatumRange &range) { @@ -233,6 +208,19 @@ int ObDirectLoadPartitionRangeMergeTask::RowIterator::init( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", KR(ret), K(merge_param), K(tablet_id), K(sstable_array), K(range)); } else { + // init row iterator + ObDirectLoadInsertTableRowIteratorParam row_iterator_param; + row_iterator_param.tablet_id_ = tablet_id; + row_iterator_param.table_data_desc_ = merge_param.table_data_desc_; + row_iterator_param.datum_utils_ = merge_param.datum_utils_; + row_iterator_param.col_descs_ = merge_param.col_descs_; + row_iterator_param.lob_column_cnt_ = merge_param.lob_column_cnt_; + row_iterator_param.cmp_funcs_ = merge_param.cmp_funcs_; + row_iterator_param.column_stat_array_ = &column_stat_array; + row_iterator_param.lob_builder_ = &lob_builder; + row_iterator_param.is_heap_table_ = merge_param.is_heap_table_; + row_iterator_param.online_opt_stat_gather_ = merge_param.online_opt_stat_gather_; + row_iterator_param.px_mode_ = merge_param.px_mode_; // init data_fuse_ ObDirectLoadDataFuseParam data_fuse_param; data_fuse_param.tablet_id_ = tablet_id; @@ -240,7 +228,9 @@ int ObDirectLoadPartitionRangeMergeTask::RowIterator::init( data_fuse_param.table_data_desc_ = merge_param.table_data_desc_; data_fuse_param.datum_utils_ = merge_param.datum_utils_; data_fuse_param.dml_row_handler_ = merge_param.dml_row_handler_; - if (OB_FAIL(data_fuse_.init(data_fuse_param, origin_table, sstable_array, range))) { + if (OB_FAIL(inner_init(row_iterator_param))) { + LOG_WARN("fail to inner init", KR(ret)); + } else if (OB_FAIL(data_fuse_.init(data_fuse_param, origin_table, sstable_array, range))) { LOG_WARN("fail to init data fuse", KR(ret)); } // init datum_row_ @@ -260,7 +250,7 @@ int ObDirectLoadPartitionRangeMergeTask::RowIterator::init( return ret; } -int ObDirectLoadPartitionRangeMergeTask::RowIterator::get_next_row(const ObDatumRow *&result_row) +int ObDirectLoadPartitionRangeMergeTask::RowIterator::inner_get_next_row(ObDatumRow *&result_row) { int ret = OB_SUCCESS; result_row = nullptr; @@ -340,7 +330,7 @@ int ObDirectLoadPartitionRangeMergeTask::construct_row_iter(ObIAllocator &alloca if (OB_ISNULL(row_iter = OB_NEWx(RowIterator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to new RowIterator", KR(ret)); - } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), origin_table_, + } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), origin_table_, column_stat_array_, lob_builder_, *sstable_array_, *range_))) { LOG_WARN("fail to init row iter", KR(ret)); } else { @@ -374,6 +364,8 @@ int ObDirectLoadPartitionRangeMultipleMergeTask::RowIterator::init( const ObDirectLoadMergeParam &merge_param, const ObTabletID &tablet_id, ObDirectLoadOriginTable *origin_table, + ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const ObIArray &sstable_array, const ObDatumRange &range) { @@ -387,6 +379,19 @@ int ObDirectLoadPartitionRangeMultipleMergeTask::RowIterator::init( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", KR(ret), K(merge_param), K(tablet_id), K(sstable_array), K(range)); } else { + // init row iterator + ObDirectLoadInsertTableRowIteratorParam row_iterator_param; + row_iterator_param.tablet_id_ = tablet_id; + row_iterator_param.table_data_desc_ = merge_param.table_data_desc_; + row_iterator_param.datum_utils_ = merge_param.datum_utils_; + row_iterator_param.col_descs_ = merge_param.col_descs_; + row_iterator_param.lob_column_cnt_ = merge_param.lob_column_cnt_; + row_iterator_param.cmp_funcs_ = merge_param.cmp_funcs_; + row_iterator_param.column_stat_array_ = &column_stat_array; + row_iterator_param.lob_builder_ = &lob_builder; + row_iterator_param.is_heap_table_ = merge_param.is_heap_table_; + row_iterator_param.online_opt_stat_gather_ = merge_param.online_opt_stat_gather_; + row_iterator_param.px_mode_ = merge_param.px_mode_; // init data_fuse_ ObDirectLoadDataFuseParam data_fuse_param; data_fuse_param.tablet_id_ = tablet_id; @@ -394,7 +399,9 @@ int ObDirectLoadPartitionRangeMultipleMergeTask::RowIterator::init( data_fuse_param.table_data_desc_ = merge_param.table_data_desc_; data_fuse_param.datum_utils_ = merge_param.datum_utils_; data_fuse_param.dml_row_handler_ = merge_param.dml_row_handler_; - if (OB_FAIL(data_fuse_.init(data_fuse_param, origin_table, sstable_array, range))) { + if (OB_FAIL(inner_init(row_iterator_param))) { + LOG_WARN("fail to inner init", KR(ret)); + } else if (OB_FAIL(data_fuse_.init(data_fuse_param, origin_table, sstable_array, range))) { LOG_WARN("fail to init data fuse", KR(ret)); } // init datum_row_ @@ -414,8 +421,8 @@ int ObDirectLoadPartitionRangeMultipleMergeTask::RowIterator::init( return ret; } -int ObDirectLoadPartitionRangeMultipleMergeTask::RowIterator::get_next_row( - const ObDatumRow *&result_row) +int ObDirectLoadPartitionRangeMultipleMergeTask::RowIterator::inner_get_next_row( + ObDatumRow *&result_row) { int ret = OB_SUCCESS; result_row = nullptr; @@ -497,7 +504,7 @@ int ObDirectLoadPartitionRangeMultipleMergeTask::construct_row_iter( if (OB_ISNULL(row_iter = OB_NEWx(RowIterator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to new RowIterator", KR(ret)); - } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), origin_table_, + } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), origin_table_, column_stat_array_, lob_builder_, *sstable_array_, *range_))) { LOG_WARN("fail to init row iter", KR(ret)); } else { @@ -534,6 +541,8 @@ int ObDirectLoadPartitionHeapTableMergeTask::RowIterator::init( const ObDirectLoadMergeParam &merge_param, const ObTabletID &tablet_id, ObDirectLoadExternalTable *external_table, + ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const ObTabletCacheInterval &pk_interval) { int ret = OB_SUCCESS; @@ -546,8 +555,24 @@ int ObDirectLoadPartitionHeapTableMergeTask::RowIterator::init( LOG_WARN("invalid args", KR(ret), K(merge_param), K(tablet_id), KP(external_table), K(pk_interval)); } else { + // init row iterator + ObDirectLoadInsertTableRowIteratorParam row_iterator_param; + row_iterator_param.tablet_id_ = tablet_id; + row_iterator_param.table_data_desc_ = merge_param.table_data_desc_; + row_iterator_param.datum_utils_ = merge_param.datum_utils_; + row_iterator_param.col_descs_ = merge_param.col_descs_; + row_iterator_param.lob_column_cnt_ = merge_param.lob_column_cnt_; + row_iterator_param.cmp_funcs_ = merge_param.cmp_funcs_; + row_iterator_param.column_stat_array_ = &column_stat_array; + row_iterator_param.lob_builder_ = &lob_builder; + row_iterator_param.is_heap_table_ = merge_param.is_heap_table_; + row_iterator_param.online_opt_stat_gather_ = merge_param.online_opt_stat_gather_; + row_iterator_param.px_mode_ = merge_param.px_mode_; + if (OB_FAIL(inner_init(row_iterator_param))) { + LOG_WARN("fail to inner init", KR(ret)); + } // init scanner_ - if (OB_FAIL(scanner_.init(merge_param.table_data_desc_.external_data_block_size_, + else if (OB_FAIL(scanner_.init(merge_param.table_data_desc_.external_data_block_size_, external_table->get_meta().max_data_block_size_, merge_param.table_data_desc_.compressor_type_, external_table->get_fragments()))) { @@ -574,8 +599,8 @@ int ObDirectLoadPartitionHeapTableMergeTask::RowIterator::init( return ret; } -int ObDirectLoadPartitionHeapTableMergeTask::RowIterator::get_next_row( - const ObDatumRow *&result_row) +int ObDirectLoadPartitionHeapTableMergeTask::RowIterator::inner_get_next_row( + ObDatumRow *&result_row) { int ret = OB_SUCCESS; result_row = nullptr; @@ -607,6 +632,8 @@ int ObDirectLoadPartitionHeapTableMergeTask::RowIterator::get_next_row( return ret; } + + ObDirectLoadPartitionHeapTableMergeTask::ObDirectLoadPartitionHeapTableMergeTask() : external_table_(nullptr) { @@ -656,7 +683,7 @@ int ObDirectLoadPartitionHeapTableMergeTask::construct_row_iter( if (OB_ISNULL(row_iter = OB_NEWx(RowIterator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to new RowIterator", KR(ret)); - } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), external_table_, + } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), external_table_, column_stat_array_, lob_builder_, pk_interval_))) { LOG_WARN("fail to init row iter", KR(ret)); } else { @@ -693,6 +720,8 @@ int ObDirectLoadPartitionHeapTableMultipleMergeTask::RowIterator::init( const ObDirectLoadMergeParam &merge_param, const ObTabletID &tablet_id, ObDirectLoadMultipleHeapTable *heap_table, + ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const ObTabletCacheInterval &pk_interval) { int ret = OB_SUCCESS; @@ -706,8 +735,24 @@ int ObDirectLoadPartitionHeapTableMultipleMergeTask::RowIterator::init( LOG_WARN("invalid args", KR(ret), K(merge_param), K(tablet_id), KPC(heap_table), K(pk_interval)); } else { + // init row iterator + ObDirectLoadInsertTableRowIteratorParam row_iterator_param; + row_iterator_param.tablet_id_ = tablet_id; + row_iterator_param.table_data_desc_ = merge_param.table_data_desc_; + row_iterator_param.datum_utils_ = merge_param.datum_utils_; + row_iterator_param.col_descs_ = merge_param.col_descs_; + row_iterator_param.lob_column_cnt_ = merge_param.lob_column_cnt_; + row_iterator_param.cmp_funcs_ = merge_param.cmp_funcs_; + row_iterator_param.column_stat_array_ = &column_stat_array; + row_iterator_param.lob_builder_ = &lob_builder; + row_iterator_param.is_heap_table_ = merge_param.is_heap_table_; + row_iterator_param.online_opt_stat_gather_ = merge_param.online_opt_stat_gather_; + row_iterator_param.px_mode_ = merge_param.px_mode_; + if (OB_FAIL(inner_init(row_iterator_param))) { + LOG_WARN("fail to inner init", KR(ret)); + } // init scanner_ - if (OB_FAIL(scanner_.init(heap_table, tablet_id, merge_param.table_data_desc_))) { + else if (OB_FAIL(scanner_.init(heap_table, tablet_id, merge_param.table_data_desc_))) { LOG_WARN("fail to init tablet whole scanner", KR(ret)); } // init datum_row_ @@ -731,8 +776,9 @@ int ObDirectLoadPartitionHeapTableMultipleMergeTask::RowIterator::init( return ret; } -int ObDirectLoadPartitionHeapTableMultipleMergeTask::RowIterator::get_next_row( - const ObDatumRow *&result_row) + +int ObDirectLoadPartitionHeapTableMultipleMergeTask::RowIterator::inner_get_next_row( + ObDatumRow *&result_row) { int ret = OB_SUCCESS; result_row = nullptr; @@ -813,7 +859,7 @@ int ObDirectLoadPartitionHeapTableMultipleMergeTask::construct_row_iter( if (OB_ISNULL(row_iter = OB_NEWx(RowIterator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to new RowIterator", KR(ret)); - } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), heap_table_, + } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), heap_table_, column_stat_array_, lob_builder_, pk_interval_))) { LOG_WARN("fail to init row iter", KR(ret)); } else { @@ -859,6 +905,8 @@ ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::RowIterator::~RowItera int ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::RowIterator::init( const ObDirectLoadMergeParam &merge_param, const ObTabletID &tablet_id, ObDirectLoadOriginTable *origin_table, + ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const ObIArray *heap_table_array, const ObTabletCacheInterval &pk_interval) { @@ -875,7 +923,22 @@ int ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::RowIterator::init( } else { allocator_.set_tenant_id(MTL_ID()); range_.set_whole_range(); - if (OB_FAIL(origin_table->scan(range_, allocator_, origin_iter_))) { + // init row iterator + ObDirectLoadInsertTableRowIteratorParam row_iterator_param; + row_iterator_param.tablet_id_ = tablet_id; + row_iterator_param.table_data_desc_ = merge_param.table_data_desc_; + row_iterator_param.datum_utils_ = merge_param.datum_utils_; + row_iterator_param.col_descs_ = merge_param.col_descs_; + row_iterator_param.lob_column_cnt_ = merge_param.lob_column_cnt_; + row_iterator_param.cmp_funcs_ = merge_param.cmp_funcs_; + row_iterator_param.column_stat_array_ = &column_stat_array; + row_iterator_param.lob_builder_ = &lob_builder; + row_iterator_param.is_heap_table_ = merge_param.is_heap_table_; + row_iterator_param.online_opt_stat_gather_ = merge_param.online_opt_stat_gather_; + row_iterator_param.px_mode_ = merge_param.px_mode_; + if (OB_FAIL(inner_init(row_iterator_param))) { + LOG_WARN("fail to inner init", KR(ret)); + } else if (OB_FAIL(origin_table->scan(range_, allocator_, origin_iter_))) { LOG_WARN("fail to scan origin table", KR(ret)); } // init datum_row_ @@ -904,8 +967,8 @@ int ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::RowIterator::init( return ret; } -int ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::RowIterator::get_next_row( - const ObDatumRow *&result_row) +int ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::RowIterator::inner_get_next_row( + ObDatumRow *&result_row) { int ret = OB_SUCCESS; result_row = nullptr; @@ -1053,7 +1116,7 @@ int ObDirectLoadPartitionHeapTableMultipleAggregateMergeTask::construct_row_iter if (OB_ISNULL(row_iter = OB_NEWx(RowIterator, (&allocator)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to new RowIterator", KR(ret)); - } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), origin_table_, + } else if (OB_FAIL(row_iter->init(*merge_param_, merge_ctx_->get_tablet_id(), origin_table_, column_stat_array_, lob_builder_, heap_table_array_, pk_interval_))) { LOG_WARN("fail to init row iter", KR(ret)); } else { diff --git a/src/storage/direct_load/ob_direct_load_partition_merge_task.h b/src/storage/direct_load/ob_direct_load_partition_merge_task.h index e33822847..b3e5118be 100644 --- a/src/storage/direct_load/ob_direct_load_partition_merge_task.h +++ b/src/storage/direct_load/ob_direct_load_partition_merge_task.h @@ -17,6 +17,7 @@ #include "storage/direct_load/ob_direct_load_external_scanner.h" #include "storage/direct_load/ob_direct_load_merge_ctx.h" #include "storage/direct_load/ob_direct_load_multiple_heap_table_scanner.h" +#include "storage/direct_load/ob_direct_load_insert_table_row_iterator.h" #include "sql/engine/expr/ob_expr_sys_op_opnsize.h" namespace oceanbase @@ -54,13 +55,14 @@ protected: ObIStoreRowIterator *&row_iter) = 0; private: int init_sql_statistics(); - int collect_obj(const blocksstable::ObDatumRow &datum_row); + int init_lob_builder(); protected: const ObDirectLoadMergeParam *merge_param_; ObDirectLoadTabletMergeCtx *merge_ctx_; int64_t parallel_idx_; int64_t affected_rows_; common::ObArray column_stat_array_; + ObDirectLoadLobBuilder lob_builder_; common::ObArenaAllocator allocator_; bool is_stop_; bool is_inited_; @@ -80,7 +82,7 @@ public: protected: int construct_row_iter(common::ObIAllocator &allocator, ObIStoreRowIterator *&row_iter) override; private: - class RowIterator : public ObIStoreRowIterator + class RowIterator : public ObDirectLoadInsertTableRowIterator { public: RowIterator(); @@ -88,9 +90,11 @@ private: int init(const ObDirectLoadMergeParam &merge_param, const common::ObTabletID &tablet_id, ObDirectLoadOriginTable *origin_table, + common::ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const common::ObIArray &sstable_array, const blocksstable::ObDatumRange &range); - int get_next_row(const blocksstable::ObDatumRow *&datum_row) override; + int inner_get_next_row(blocksstable::ObDatumRow *&datum_row) override; private: ObDirectLoadSSTableDataFuse data_fuse_; blocksstable::ObDatumRow datum_row_; @@ -117,7 +121,7 @@ public: protected: int construct_row_iter(common::ObIAllocator &allocator, ObIStoreRowIterator *&row_iter) override; private: - class RowIterator : public ObIStoreRowIterator + class RowIterator : public ObDirectLoadInsertTableRowIterator { public: RowIterator(); @@ -125,9 +129,11 @@ private: int init(const ObDirectLoadMergeParam &merge_param, const common::ObTabletID &tablet_id, ObDirectLoadOriginTable *origin_table, + common::ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const common::ObIArray &sstable_array, const blocksstable::ObDatumRange &range); - int get_next_row(const blocksstable::ObDatumRow *&datum_row) override; + int inner_get_next_row(blocksstable::ObDatumRow *&datum_row) override; private: ObDirectLoadMultipleSSTableDataFuse data_fuse_; blocksstable::ObDatumRow datum_row_; @@ -153,7 +159,7 @@ public: protected: int construct_row_iter(common::ObIAllocator &allocator, ObIStoreRowIterator *&row_iter) override; private: - class RowIterator : public ObIStoreRowIterator + class RowIterator : public ObDirectLoadInsertTableRowIterator { public: RowIterator(); @@ -161,8 +167,10 @@ private: int init(const ObDirectLoadMergeParam &merge_param, const common::ObTabletID &tablet_id, ObDirectLoadExternalTable *external_table, + common::ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const share::ObTabletCacheInterval &pk_interval); - int get_next_row(const blocksstable::ObDatumRow *&datum_row) override; + int inner_get_next_row(blocksstable::ObDatumRow *&datum_row) override; private: ObDirectLoadExternalSequentialScanner scanner_; blocksstable::ObDatumRow datum_row_; @@ -190,7 +198,7 @@ public: protected: int construct_row_iter(common::ObIAllocator &allocator, ObIStoreRowIterator *&row_iter) override; private: - class RowIterator : public ObIStoreRowIterator + class RowIterator : public ObDirectLoadInsertTableRowIterator { public: RowIterator(); @@ -198,8 +206,10 @@ private: int init(const ObDirectLoadMergeParam &merge_param, const common::ObTabletID &tablet_id, ObDirectLoadMultipleHeapTable *heap_table, + common::ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const share::ObTabletCacheInterval &pk_interval); - int get_next_row(const blocksstable::ObDatumRow *&datum_row) override; + int inner_get_next_row(blocksstable::ObDatumRow *&datum_row) override; private: ObDirectLoadMultipleHeapTableTabletWholeScanner scanner_; blocksstable::ObDatumRow datum_row_; @@ -227,16 +237,18 @@ public: protected: int construct_row_iter(common::ObIAllocator &allocator, ObIStoreRowIterator *&row_iter) override; private: - class RowIterator : public ObIStoreRowIterator + class RowIterator : public ObDirectLoadInsertTableRowIterator { public: RowIterator(); virtual ~RowIterator(); int init(const ObDirectLoadMergeParam &merge_param, const common::ObTabletID &tablet_id, ObDirectLoadOriginTable *origin_table, + common::ObIArray &column_stat_array, + ObDirectLoadLobBuilder &lob_builder, const common::ObIArray *heap_table_array, const share::ObTabletCacheInterval &pk_interval); - int get_next_row(const blocksstable::ObDatumRow *&datum_row) override; + int inner_get_next_row(blocksstable::ObDatumRow *&datum_row) override; private: int switch_next_heap_table(); private: diff --git a/src/storage/direct_load/ob_direct_load_partition_rescan_task.cpp b/src/storage/direct_load/ob_direct_load_partition_rescan_task.cpp new file mode 100644 index 000000000..b5fe6a1bd --- /dev/null +++ b/src/storage/direct_load/ob_direct_load_partition_rescan_task.cpp @@ -0,0 +1,163 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/direct_load/ob_direct_load_partition_rescan_task.h" +#include "storage/direct_load/ob_direct_load_insert_table_ctx.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace common; +using namespace blocksstable; +using namespace share; + + +int ObDirectLoadPartitionRescanTask::init(const ObDirectLoadMergeParam &merge_param, + ObDirectLoadTabletMergeCtx *merge_ctx, + int64_t thread_cnt, + int64_t thread_idx) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDirectLoadPartitionRescanTask init twice", KR(ret), KP(this)); + } else if (OB_UNLIKELY(!merge_param.is_valid() || thread_cnt <= 0 || thread_idx < 0 || thread_idx > thread_cnt - 1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguement", K(ret), K(merge_param), K(thread_cnt), K(thread_idx)); + } else { + merge_param_ = &merge_param; + merge_ctx_ = merge_ctx; + thread_cnt_ = thread_cnt; + thread_idx_ = thread_idx; + is_inited_ = true; + } + return ret; + +} + +int ObDirectLoadPartitionRescanTask::process() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadPartitionRescanTask not init", KR(ret), KP(this)); + } else { + ObDirectLoadInsertTabletContext *tablet_ctx = nullptr; + const ObTabletID &tablet_id = merge_ctx_->get_tablet_id(); + if (OB_FAIL(merge_param_->insert_table_ctx_->get_tablet_context( + tablet_id, tablet_ctx))) { + LOG_WARN("fail to get tablet context ", KR(ret), K(tablet_id)); + } else if (OB_FAIL(tablet_ctx->fill_column_group(thread_cnt_, thread_idx_))) { + LOG_WARN("fail to close writer", KR(ret)); + } + if (OB_SUCC(ret)) { + bool is_ready = false; + if (OB_FAIL(merge_ctx_->inc_rescan_finish_count(is_ready))) { + LOG_WARN("fail to inc finish count", KR(ret)); + } else if (is_ready) { + if (OB_FAIL(tablet_ctx->close())) { + LOG_WARN("fail to notify tablet finish", KR(ret)); + } + } + } + } + return ret; +} + +void ObDirectLoadPartitionRescanTask::stop() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadPartitionRescanTask not init", KR(ret), KP(this)); + } else { + ObDirectLoadInsertTabletContext *tablet_ctx = nullptr; + const ObTabletID &tablet_id = merge_ctx_->get_tablet_id(); + if (OB_FAIL(merge_param_->insert_table_ctx_->get_tablet_context( + tablet_id, tablet_ctx))) { + LOG_WARN("fail to get tablet context ", KR(ret), K(tablet_id)); + } else if (OB_FAIL(tablet_ctx->cancel())) { + LOG_WARN("fail to cancel fill column group", K(ret)); + } + } + //ignore ret +} + + +ObDirectLoadRescanTaskIterator::ObDirectLoadRescanTaskIterator() + : merge_ctx_(nullptr), + tablet_merge_ctx_(nullptr), + tablet_pos_(0), + task_pos_(0), + is_inited_(false) +{ +} + +ObDirectLoadRescanTaskIterator::~ObDirectLoadRescanTaskIterator() +{ +} + +int ObDirectLoadRescanTaskIterator::init(ObDirectLoadMergeCtx *merge_ctx) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDirectLoadRescanTaskIterator init twice", KR(ret), KP(this)); + } else if (OB_UNLIKELY(nullptr == merge_ctx)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KP(merge_ctx)); + } else { + merge_ctx_ = merge_ctx; + is_inited_ = true; + } + return ret; +} + +int ObDirectLoadRescanTaskIterator::get_next_task(ObDirectLoadPartitionRescanTask *&task) +{ + int ret = OB_SUCCESS; + task = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObDirectLoadRescanTaskIterator not init", KR(ret), KP(this)); + } else { + while (OB_SUCC(ret) && nullptr == task) { + if (nullptr == tablet_merge_ctx_) { + // get next partition merge ctx + const ObIArray &tablet_merge_ctxs = + merge_ctx_->get_tablet_merge_ctxs(); + if (tablet_pos_ >= tablet_merge_ctxs.count()) { + ret = OB_ITER_END; + } else { + tablet_merge_ctx_ = tablet_merge_ctxs.at(tablet_pos_++); + task_pos_ = 0; + } + } + if (OB_SUCC(ret)) { + const ObIArray &tasks = tablet_merge_ctx_->get_rescan_tasks(); + if (task_pos_ >= tasks.count()) { + // try next partition + tablet_merge_ctx_ = nullptr; + } else { + task = tasks.at(task_pos_++); + } + } + } + } + return ret; +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_partition_rescan_task.h b/src/storage/direct_load/ob_direct_load_partition_rescan_task.h new file mode 100644 index 000000000..782b82500 --- /dev/null +++ b/src/storage/direct_load/ob_direct_load_partition_rescan_task.h @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "lib/list/ob_dlist.h" +#include "storage/direct_load/ob_direct_load_merge_ctx.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObDirectLoadPartitionRescanTask : public common::ObDLinkBase +{ +public: + ObDirectLoadPartitionRescanTask() + : thread_cnt_(0), + thread_idx_(-1), + is_inited_(false) + { + } + int init(const ObDirectLoadMergeParam &merge_param, + ObDirectLoadTabletMergeCtx *merge_ctx, + int64_t thread_cnt, + int64_t thread_idx); + int process(); + void stop(); + TO_STRING_KV(K_(thread_cnt), K_(thread_idx)); +private: + const ObDirectLoadMergeParam *merge_param_; + ObDirectLoadTabletMergeCtx *merge_ctx_; + int64_t thread_cnt_; + int64_t thread_idx_; + bool is_inited_; +}; + +class ObDirectLoadRescanTaskIterator +{ +public: + ObDirectLoadRescanTaskIterator(); + ~ObDirectLoadRescanTaskIterator(); + int init(storage::ObDirectLoadMergeCtx *merge_ctx); + int get_next_task(ObDirectLoadPartitionRescanTask *&task); +private: + storage::ObDirectLoadMergeCtx *merge_ctx_; + storage::ObDirectLoadTabletMergeCtx *tablet_merge_ctx_; + int64_t tablet_pos_; + int64_t task_pos_; + bool is_inited_; +}; + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/direct_load/ob_direct_load_range_splitter.cpp b/src/storage/direct_load/ob_direct_load_range_splitter.cpp index 8ee851b5c..ab2ed5a34 100644 --- a/src/storage/direct_load/ob_direct_load_range_splitter.cpp +++ b/src/storage/direct_load/ob_direct_load_range_splitter.cpp @@ -340,13 +340,19 @@ int ObDirectLoadRowkeyMergeRangeSplitter::split_range(ObIArray &ra range_array.reset(); const int64_t range_count = MIN(total_rowkey_count_, max_range_count); if (range_count > 1) { - const int64_t block_count_per_range = (total_rowkey_count_ + range_count - 1) / range_count; + const int64_t block_count_per_range = total_rowkey_count_ / range_count; + int64_t block_count_remainder = total_rowkey_count_ - block_count_per_range * range_count; + int64_t block_count_cur_range = block_count_per_range; ObDatumRange range; range.end_key_.set_min_rowkey(); range.set_left_open(); range.set_right_closed(); int64_t count = 0; const ObDatumRowkey *rowkey = nullptr; + if (block_count_remainder > 0) { + block_count_cur_range = block_count_per_range + 1; + --block_count_remainder; + } while (OB_SUCC(ret)) { if (OB_FAIL(rowkey_merger_.get_next_rowkey(rowkey))) { if (OB_UNLIKELY(OB_ITER_END != ret)) { @@ -369,7 +375,7 @@ int ObDirectLoadRowkeyMergeRangeSplitter::split_range(ObIArray &ra } break; } - } else if (++count >= block_count_per_range) { + } else if (++count >= block_count_cur_range) { bool rowkey_equal = false; if (OB_FAIL(rowkey->equal(range.end_key_, *datum_utils_, rowkey_equal))) { LOG_WARN("fail to compare euqal rowkey", KR(ret)); @@ -385,6 +391,13 @@ int ObDirectLoadRowkeyMergeRangeSplitter::split_range(ObIArray &ra LOG_WARN("fail to push back datum ranges", KR(ret)); } else { count = 0; + block_count_cur_range = block_count_per_range; + if (block_count_remainder > 0) { + block_count_cur_range = block_count_per_range + 1; + --block_count_remainder; + } else { + block_count_cur_range = block_count_per_range; + } } } } @@ -946,13 +959,20 @@ int ObDirectLoadMultipleMergeRangeSplitter::combine_final_ranges( LOG_WARN("fail to init rowkey merger", KR(ret)); } else { const int64_t rowkey_count_per_range = - (rowkey_array1.count() + rowkey_array2.count() + max_range_count - 1) / max_range_count; + (rowkey_array1.count() + rowkey_array2.count()) / max_range_count; + int64_t rowkey_count_remainder = + (rowkey_array1.count() + rowkey_array2.count()) - rowkey_count_per_range * max_range_count; + int64_t rowkey_count_cur_range = rowkey_count_per_range; ObDatumRange range; range.end_key_.set_min_rowkey(); range.set_left_open(); range.set_right_closed(); const ObDatumRowkey *datum_rowkey = nullptr; int64_t count = 0; + if (rowkey_count_remainder > 0) { + rowkey_count_cur_range = rowkey_count_per_range + 1; + --rowkey_count_remainder; + } while (OB_SUCC(ret)) { if (OB_FAIL(rowkey_merger.get_next_rowkey(datum_rowkey))) { if (OB_UNLIKELY(OB_ITER_END != ret)) { @@ -961,7 +981,7 @@ int ObDirectLoadMultipleMergeRangeSplitter::combine_final_ranges( ret = OB_SUCCESS; break; } - } else if (++count >= rowkey_count_per_range) { + } else if (++count >= rowkey_count_cur_range) { int cmp_ret = 0; if (OB_FAIL(datum_rowkey->compare(range.end_key_, *datum_utils_, cmp_ret))) { LOG_WARN("fail to compare rowkey", KR(ret)); @@ -975,6 +995,12 @@ int ObDirectLoadMultipleMergeRangeSplitter::combine_final_ranges( LOG_WARN("fail to push back range", KR(ret)); } else { count = 0; + if (rowkey_count_remainder > 0) { + rowkey_count_cur_range = rowkey_count_per_range + 1; + --rowkey_count_remainder; + } else { + rowkey_count_cur_range = rowkey_count_per_range; + } } } else { abort_unless(0 == cmp_ret); @@ -1137,13 +1163,19 @@ int ObDirectLoadMultipleSSTableRangeSplitter::split_range( LOG_WARN("fail to push back range", KR(ret)); } } else { - const int64_t block_count_per_range = (total_block_count_ + range_count - 1) / range_count; + const int64_t block_count_per_range = total_block_count_ / range_count; + int64_t block_count_remainder = total_block_count_ - block_count_per_range * range_count; + int64_t block_count_cur_range = block_count_per_range; ObDirectLoadMultipleDatumRange range; range.end_key_.set_min_rowkey(); range.set_left_open(); range.set_right_closed(); int64_t count = 0; const ObDirectLoadMultipleDatumRowkey *rowkey = nullptr; + if (block_count_remainder > 0) { + block_count_cur_range = block_count_per_range + 1; + --block_count_remainder; + } while (OB_SUCC(ret)) { if (OB_FAIL(rowkey_merger_.get_next_rowkey(rowkey))) { if (OB_UNLIKELY(OB_ITER_END != ret)) { @@ -1164,7 +1196,7 @@ int ObDirectLoadMultipleSSTableRangeSplitter::split_range( } break; } - } else if (++count >= block_count_per_range) { + } else if (++count >= block_count_cur_range) { int cmp_ret = 0; if (OB_FAIL(rowkey->compare(range.end_key_, *datum_utils_, cmp_ret))) { LOG_WARN("fail to compare euqal rowkey", KR(ret)); @@ -1178,6 +1210,12 @@ int ObDirectLoadMultipleSSTableRangeSplitter::split_range( LOG_WARN("fail to push back datum ranges", KR(ret)); } else { count = 0; + if (block_count_remainder > 0) { + block_count_cur_range = block_count_per_range + 1; + --block_count_remainder; + } else { + block_count_cur_range = block_count_per_range; + } } } } diff --git a/src/storage/direct_load/ob_direct_load_table_store.cpp b/src/storage/direct_load/ob_direct_load_table_store.cpp index 30f40eb4f..84eb92834 100644 --- a/src/storage/direct_load/ob_direct_load_table_store.cpp +++ b/src/storage/direct_load/ob_direct_load_table_store.cpp @@ -33,14 +33,16 @@ using namespace table; ObDirectLoadTableStoreParam::ObDirectLoadTableStoreParam() : snapshot_version_(0), + lob_column_cnt_(0), datum_utils_(nullptr), col_descs_(nullptr), cmp_funcs_(nullptr), file_mgr_(nullptr), is_multiple_mode_(false), is_fast_heap_table_(false), + online_opt_stat_gather_(false), + px_mode_(false), insert_table_ctx_(nullptr), - fast_heap_table_ctx_(nullptr), dml_row_handler_(nullptr), extra_buf_(nullptr), extra_buf_size_(0) @@ -54,9 +56,9 @@ ObDirectLoadTableStoreParam::~ObDirectLoadTableStoreParam() bool ObDirectLoadTableStoreParam::is_valid() const { return snapshot_version_ > 0 && table_data_desc_.is_valid() && nullptr != datum_utils_ && - nullptr != col_descs_ && nullptr != cmp_funcs_ && nullptr != file_mgr_ && - (!is_fast_heap_table_ || - (nullptr != insert_table_ctx_ && nullptr != fast_heap_table_ctx_)) && + nullptr != col_descs_ && nullptr != cmp_funcs_ && + nullptr != file_mgr_ && (!is_fast_heap_table_ || + (nullptr != insert_table_ctx_)) && nullptr != dml_row_handler_; } @@ -112,11 +114,12 @@ int ObDirectLoadTableStoreBucket::init(const ObDirectLoadTableStoreParam ¶m, fast_heap_table_build_param.table_data_desc_ = param.table_data_desc_; fast_heap_table_build_param.datum_utils_ = param.datum_utils_; fast_heap_table_build_param.col_descs_ = param.col_descs_; + fast_heap_table_build_param.lob_column_cnt_ = param.lob_column_cnt_; fast_heap_table_build_param.cmp_funcs_ = param.cmp_funcs_; fast_heap_table_build_param.insert_table_ctx_ = param.insert_table_ctx_; - fast_heap_table_build_param.fast_heap_table_ctx_ = param.fast_heap_table_ctx_; fast_heap_table_build_param.dml_row_handler_ = param.dml_row_handler_; fast_heap_table_build_param.online_opt_stat_gather_ = param.online_opt_stat_gather_; + fast_heap_table_build_param.px_mode_ = param.px_mode_; ObDirectLoadFastHeapTableBuilder *fast_heap_table_builder = nullptr; if (OB_ISNULL(fast_heap_table_builder = table_builder_allocator_->alloc())) { diff --git a/src/storage/direct_load/ob_direct_load_table_store.h b/src/storage/direct_load/ob_direct_load_table_store.h index 7f4c236af..b2a6fde2a 100644 --- a/src/storage/direct_load/ob_direct_load_table_store.h +++ b/src/storage/direct_load/ob_direct_load_table_store.h @@ -25,7 +25,6 @@ class ObDirectLoadTableDataDesc; class ObDirectLoadTmpFileManager; class ObDirectLoadTableBuilderAllocator; class ObDirectLoadInsertTableContext; -class ObDirectLoadFastHeapTableContext; class ObDirectLoadDMLRowHandler; struct ObDirectLoadTableStoreParam @@ -35,11 +34,12 @@ public: ~ObDirectLoadTableStoreParam(); bool is_valid() const; TO_STRING_KV(K_(snapshot_version), K_(table_data_desc), KP_(datum_utils), KP_(col_descs), - KP_(cmp_funcs), KP_(file_mgr), K_(is_multiple_mode), K_(is_fast_heap_table), - KP_(insert_table_ctx), KP_(fast_heap_table_ctx), KP_(dml_row_handler), - KP_(extra_buf), K_(extra_buf_size)); + KP_(lob_column_cnt), KP_(cmp_funcs), KP_(file_mgr), K_(is_multiple_mode), + K_(is_fast_heap_table), K_(online_opt_stat_gather), K_(px_mode), + KP_(insert_table_ctx), KP_(dml_row_handler), KP_(extra_buf), K_(extra_buf_size)); public: int64_t snapshot_version_; + int64_t lob_column_cnt_; ObDirectLoadTableDataDesc table_data_desc_; const blocksstable::ObStorageDatumUtils *datum_utils_; const common::ObIArray *col_descs_; @@ -48,8 +48,8 @@ public: bool is_multiple_mode_; bool is_fast_heap_table_; bool online_opt_stat_gather_; + bool px_mode_; ObDirectLoadInsertTableContext *insert_table_ctx_; - ObDirectLoadFastHeapTableContext *fast_heap_table_ctx_; ObDirectLoadDMLRowHandler *dml_row_handler_; char *extra_buf_; int64_t extra_buf_size_; diff --git a/src/storage/high_availability/ob_physical_copy_task.cpp b/src/storage/high_availability/ob_physical_copy_task.cpp index 26593334a..68cea17d6 100644 --- a/src/storage/high_availability/ob_physical_copy_task.cpp +++ b/src/storage/high_availability/ob_physical_copy_task.cpp @@ -928,7 +928,7 @@ int ObSSTableCopyFinishTask::get_merge_type_( } else if (sstable_param->table_key_.is_minor_sstable()) { merge_type = ObMergeType::MINOR_MERGE; } else if (sstable_param->table_key_.is_ddl_dump_sstable()) { - merge_type = ObMergeType::DDL_KV_MERGE; + merge_type = ObMergeType::MAJOR_MERGE; } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("sstable type is unexpected", K(ret), KPC(sstable_param)); @@ -979,7 +979,7 @@ int ObSSTableCopyFinishTask::create_empty_sstable_() } else if (OB_FAIL(tablet->load_storage_schema(tmp_allocator, storage_schema_ptr))) { LOG_WARN("failed to load storage_schema", K(ret), KPC(tablet)); } else if (FALSE_IT(param.column_group_cnt_ = sstable_param_->column_group_cnt_)) { - } else if (FALSE_IT(param.is_empty_co_table_ = true)) { + } else if (FALSE_IT(param.is_empty_co_table_ = param.table_key_.is_ddl_sstable() ? false : true)) { } else if (FALSE_IT(param.full_column_cnt_ = sstable_param_->full_column_cnt_)) { LOG_WARN("failed to get_stored_column_count_in_sstable", K(ret), KPC(storage_schema_ptr)); } else if (FALSE_IT(param.co_base_type_ = storage_schema_ptr->has_all_column_group() @@ -1438,7 +1438,9 @@ int ObTabletCopyFinishTask::create_new_table_store_with_ddl_() if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("tablet copy finish task do not init", K(ret)); - } else if (OB_FAIL(ObStorageHATabletBuilderUtil::build_table_with_ddl_tables(ls_, tablet_id_, ddl_tables_handle_))) { + } else if (ddl_tables_handle_.empty()) { + // do nothing + } else if (OB_FAIL(ObStorageHATabletBuilderUtil::build_tablet_with_ddl_tables(ls_, tablet_id_, ddl_tables_handle_))) { LOG_WARN("failed to build table with ddl tables", K(ret)); } return ret; diff --git a/src/storage/high_availability/ob_storage_ha_macro_block_writer.cpp b/src/storage/high_availability/ob_storage_ha_macro_block_writer.cpp index 1485a3866..fed431f79 100644 --- a/src/storage/high_availability/ob_storage_ha_macro_block_writer.cpp +++ b/src/storage/high_availability/ob_storage_ha_macro_block_writer.cpp @@ -121,6 +121,7 @@ int ObStorageHAMacroBlockWriter::process(blocksstable::ObMacroBlocksWriteCtx &co write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_MIGRATE_WRITE); write_info.io_desc_.set_group_id(ObIOModule::HA_MACRO_BLOCK_WRITER_IO); write_info.io_timeout_ms_ = GCONF._data_storage_io_timeout / 1000L; + write_info.io_desc_.set_group_id(ObIOModule::HA_MACRO_BLOCK_WRITER_IO); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; diff --git a/src/storage/high_availability/ob_storage_ha_reader.cpp b/src/storage/high_availability/ob_storage_ha_reader.cpp index 9d666250a..fa7d84627 100644 --- a/src/storage/high_availability/ob_storage_ha_reader.cpp +++ b/src/storage/high_availability/ob_storage_ha_reader.cpp @@ -779,6 +779,7 @@ int ObCopyMacroBlockObProducer::prefetch_() read_info.io_desc_.set_group_id(ObIOModule::HA_COPY_MACRO_BLOCK_IO); read_info.io_timeout_ms_ = GCONF._data_storage_io_timeout / 1000L; read_info.buf_ = io_buf_[handle_idx_]; + read_info.io_desc_.set_group_id(ObIOModule::HA_COPY_MACRO_BLOCK_IO); if (OB_FAIL(ObBlockManager::async_read_block(read_info, copy_macro_block_handle_[handle_idx_].read_handle_))) { STORAGE_LOG(WARN, "Fail to async read block, ", K(ret), K(read_info)); } diff --git a/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp b/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp index 76c78fa64..04ee34fec 100644 --- a/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp +++ b/src/storage/high_availability/ob_storage_ha_tablet_builder.cpp @@ -2061,17 +2061,7 @@ int ObStorageHATabletBuilderUtil::build_tablet_with_major_tables( ret = OB_INVALID_ARGUMENT; LOG_WARN("get invalid argument", K(ret), KP(ls), K(tablet_id), K(storage_schema)); } else if (!storage_schema.is_row_store()) { - if (NULL == major_tables.get_table(0) || !major_tables.get_table(0)->is_column_store_sstable()) { - /* - * the following code is a temp solution to deal with the column store table that created by ddl kv / load directly - * in this scene, ddl kv will create major sstable rather than co sstable even the table schema is column store - */ - // TODO(@DanLing) tmp code, should removed when column_store_ddl branch merged - if (OB_FAIL(ObStorageHATabletBuilderUtil::build_tablet_for_ddl_kv_(ls, - tablet_id, major_tables, storage_schema, medium_info_list))) { - LOG_WARN("failed to build tablet with ddl major tables", K(ret), K(tablet_id), KPC(ls)); - } - } else if (OB_FAIL(ObStorageHATabletBuilderUtil::build_tablet_for_column_store_(ls, + if (OB_FAIL(ObStorageHATabletBuilderUtil::build_tablet_for_column_store_(ls, tablet_id, major_tables, storage_schema, medium_info_list))) { LOG_WARN("failed to build tablet with co tables", K(ret), K(tablet_id), KPC(ls)); } @@ -2082,92 +2072,6 @@ int ObStorageHATabletBuilderUtil::build_tablet_with_major_tables( return ret; } -int ObStorageHATabletBuilderUtil::build_tablet_for_ddl_kv_( - ObLS *ls, - const common::ObTabletID &tablet_id, - const ObTablesHandleArray &major_tables, - const ObStorageSchema &storage_schema, - const compaction::ObMediumCompactionInfoList &medium_info_list) -{ - int ret = OB_SUCCESS; - ObTabletHandle tablet_handle; - ObTablet *tablet = nullptr; - int64_t multi_version_start = 0; - int64_t transfer_seq = 0; - - if (major_tables.empty()) { - // do nothing - } else if (1 == major_tables.get_count()) { - // only exist one major table - if (OB_FAIL(ObStorageHATabletBuilderUtil::build_tablet_for_row_store_(ls, - tablet_id, major_tables, storage_schema, medium_info_list))) { - LOG_WARN("failed to build tablet with major tables", K(ret), K(tablet_id), KPC(ls)); - } - } else if (OB_ISNULL(ls) || !tablet_id.is_valid()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("build tablet with major tables get invalid argument", K(ret), KP(ls), K(tablet_id)); - } else if (OB_FAIL(get_tablet_(tablet_id, ls, tablet_handle))) { - LOG_WARN("failed to get tablet", K(ret), K(tablet_id), KPC(ls)); - } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { - } else if (FALSE_IT(transfer_seq = tablet->get_tablet_meta().transfer_info_.transfer_seq_)) { - } else if (OB_FAIL(calc_multi_version_start_with_major_(major_tables, tablet, multi_version_start))) { - LOG_WARN("failed to calc multi version start with major", K(ret), KPC(tablet)); - } else { - // [MAJOR, CO_1, CG_1_1, CG_1_2, ..., CO_N, CG_N_1, CG_N_2] - ObTablesHandleArray column_store_tables; - - for (int64_t i = 0; OB_SUCC(ret) && i < major_tables.get_count(); ++i) { - ObITable *table_ptr = major_tables.get_table(i); - ObTableHandleV2 table_handle; - if (OB_UNLIKELY(nullptr == table_ptr || !table_ptr->is_major_sstable())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected table", K(ret), K(tablet_id), K(major_tables), K(storage_schema)); - } else if (0 == i) { - if (OB_UNLIKELY(table_ptr->is_column_store_sstable())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("first table must be major sstable in ddl kv situation", K(ret), K(tablet_id), KPC(table_ptr), K(storage_schema)); - } - continue; - } else if (OB_FAIL(major_tables.get_table(i, table_handle))) { - LOG_WARN("failed to get table handle", K(ret), K(i), KPC(table_ptr)); - } else if (OB_UNLIKELY(!table_ptr->is_column_store_sstable())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("remain table must be column store sstable in ddl kv situation", K(ret), K(tablet_id), KPC(table_ptr), K(storage_schema)); - } else if (OB_FAIL(column_store_tables.add_table(table_handle))) { - LOG_WARN("failed to add table handle", K(ret)); - } - } // end for - - if (OB_FAIL(ret)) { - } else if (column_store_tables.get_count() != major_tables.get_count() - 1) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected tables", K(ret), K(tablet_id), K(column_store_tables)); - } - - // deal with major table - if (OB_SUCC(ret)) { - ObTableHandleV2 major_table_handle; - if (OB_FAIL(major_tables.get_table(0, major_table_handle))) { - LOG_WARN("failed to get tables", K(ret)); - } else if (OB_FAIL(inner_update_tablet_table_store_with_major_(multi_version_start, major_table_handle, ls, tablet, storage_schema, transfer_seq))) { - LOG_WARN("failed to update tablet table store", K(ret), K(tablet_id), K(major_table_handle)); - } - } - - if (OB_SUCC(ret)) { - ObSEArray cs_tables; - int64_t co_table_cnt = 0; - if (OB_FAIL(get_column_store_tables_(column_store_tables, cs_tables, co_table_cnt))) { - LOG_WARN("failed to get column store tables", K(ret)); - } else if (OB_FAIL(build_tablet_with_co_tables_( //we should assemble flattened cg sstables when updating tablet due to allocator - ls, tablet, storage_schema, multi_version_start, co_table_cnt, column_store_tables, cs_tables))) { - LOG_WARN("failed to build tablet with column store tables", K(ret)); - } - } - } - return ret; -} - int ObStorageHATabletBuilderUtil::build_tablet_for_row_store_( ObLS *ls, const common::ObTabletID &tablet_id, @@ -2227,7 +2131,7 @@ int ObStorageHATabletBuilderUtil::build_tablet_for_column_store_( int ret = OB_SUCCESS; ObTabletHandle tablet_handle; ObTablet *tablet = nullptr; - ObSEArray column_store_tables; + ObTablesHandleArray co_tables; int64_t co_table_cnt = 0; int64_t multi_version_start = 0; UNUSED(medium_info_list); @@ -2243,10 +2147,10 @@ int ObStorageHATabletBuilderUtil::build_tablet_for_column_store_( } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { } else if (OB_FAIL(calc_multi_version_start_with_major_(major_tables, tablet, multi_version_start))) { LOG_WARN("failed to calc multi version start with major", K(ret), KPC(tablet)); - } else if (OB_FAIL(get_column_store_tables_(major_tables, column_store_tables, co_table_cnt))) { - LOG_WARN("failed to get column store tables", K(ret)); + } else if (OB_FAIL(assemble_column_oriented_sstable_(major_tables, co_tables))) { + LOG_WARN("assemble co tables failed", K(ret), K(major_tables)); } else if (OB_FAIL(build_tablet_with_co_tables_( //we should assemble flattened cg sstables when updating tablet due to allocator - ls, tablet, storage_schema, multi_version_start, co_table_cnt, major_tables, column_store_tables))) { + ls, tablet, storage_schema, multi_version_start, co_tables))) { LOG_WARN("failed to build tablet with column store tables", K(ret)); } return ret; @@ -2286,9 +2190,9 @@ int ObStorageHATabletBuilderUtil::get_column_store_tables_( LOG_WARN("get unexpected table count", K(ret), K(full_co_table_cnt), K(column_store_tables.count()), K(cg_tables.count()), K(major_tables)); } else if (FALSE_IT(co_table_cnt = column_store_tables.count())) { - } else if (OB_FAIL(ObTableStoreUtil::sort_major_tables(column_store_tables))) { + } else if (OB_FAIL(ObTableStoreUtil::sort_column_store_tables(column_store_tables))) { LOG_WARN("failed to sort co tables", K(ret)); - } else if (OB_FAIL(ObTableStoreUtil::sort_major_tables(cg_tables))) { + } else if (OB_FAIL(ObTableStoreUtil::sort_column_store_tables(cg_tables))) { LOG_WARN("failed to sort cg tables", K(ret)); } else if (OB_FAIL(append(column_store_tables, cg_tables))) { LOG_WARN("failed to append cg tables", K(ret)); @@ -2296,57 +2200,81 @@ int ObStorageHATabletBuilderUtil::get_column_store_tables_( return ret; } +int ObStorageHATabletBuilderUtil::assemble_column_oriented_sstable_( + const ObTablesHandleArray &mixed_tables, + ObTablesHandleArray &co_tables) +{ + int ret = OB_SUCCESS; + co_tables.reset(); + ObSEArray column_store_tables; + int64_t co_table_cnt = 0; + if (OB_FAIL(get_column_store_tables_(mixed_tables, column_store_tables, co_table_cnt))) { + LOG_WARN("failed to get column store tables", K(ret)); + } + + ObSEArray cur_cg_tables; + int64_t start_cg_idx = co_table_cnt; + + // [CO_1, CO_N, CG_1_1, CG_1_2, ..., CG_N_1, CG_N_2] + for (int64_t co_idx = 0; OB_SUCC(ret) && co_idx < co_table_cnt; ++co_idx) { + ObCOSSTableV2 *co_sstable = static_cast(column_store_tables.at(co_idx)); + const int64_t co_snapshot_version = co_sstable->get_snapshot_version(); + cur_cg_tables.reset(); + + if (co_sstable->is_inited()) { + LOG_INFO("co sstable is inited", K(co_idx), K(co_table_cnt), K(start_cg_idx), KPC(co_sstable)); + // co sstable no need to fill cg tables + } else { + for (int64_t cg_idx = start_cg_idx; OB_SUCC(ret) && cg_idx < column_store_tables.count(); ++cg_idx) { + ObITable *cg_table = column_store_tables.at(cg_idx); + if (co_snapshot_version != cg_table->get_snapshot_version()) { + if (cur_cg_tables.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("co table mismatch cg table!", K(ret), K(co_idx), K(co_table_cnt), K(start_cg_idx), K(cg_idx), + K(co_snapshot_version), KPC(cg_table), K(column_store_tables)); + } else { + start_cg_idx += cur_cg_tables.count(); + } + break; + } else if (OB_FAIL(cur_cg_tables.push_back(cg_table))) { + LOG_WARN("failed to add cg table", K(ret), KPC(cg_table)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(co_sstable->fill_cg_sstables(cur_cg_tables))) { + LOG_WARN("failed to fill cg tables", K(ret), KPC(co_sstable)); + } + } + } + + ObTableHandleV2 co_table_handle; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(mixed_tables.get_table(co_sstable->get_key(), co_table_handle))) { + LOG_WARN("fail to get table handle from array by table key", K(ret), KPC(co_sstable), K(mixed_tables)); + } else if (OB_FAIL(co_tables.add_table(co_table_handle))) { + LOG_WARN("failed to add table", K(ret), K(co_table_handle)); + } + } + return ret; +} + int ObStorageHATabletBuilderUtil::build_tablet_with_co_tables_( ObLS *ls, ObTablet *tablet, const ObStorageSchema &storage_schema, const int64_t multi_version_start, - const int64_t co_table_cnt, - const ObTablesHandleArray &major_tables, - common::ObIArray &column_store_tables) + const ObTablesHandleArray &co_tables) { int ret = OB_SUCCESS; - ObSEArray cur_cg_tables; - int64_t start_cg_idx = co_table_cnt; int64_t transfer_seq = tablet->get_tablet_meta().transfer_info_.transfer_seq_; - for (int64_t co_idx = 0; OB_SUCC(ret) && co_idx < co_table_cnt; ++co_idx) { - ObCOSSTableV2 *co_sstable = static_cast(column_store_tables.at(co_idx)); - const int64_t co_snapshot_version = co_sstable->get_snapshot_version(); - cur_cg_tables.reset(); - - if (co_sstable->is_inited()) { - // co sstable no need to fill cg tables - } else { - for (int64_t cg_idx = start_cg_idx; OB_SUCC(ret) && cg_idx < column_store_tables.count(); ++cg_idx) { - ObITable *cg_table = column_store_tables.at(cg_idx); - if (co_snapshot_version != cg_table->get_snapshot_version()) { - if (cur_cg_tables.empty()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("co table mismatch cg table!", K(ret), K(co_snapshot_version), KPC(cg_table)); - } else { - start_cg_idx += cur_cg_tables.count(); - } - break; - } else if (OB_FAIL(cur_cg_tables.push_back(cg_table))) { - LOG_WARN("failed to add cg table", K(ret), KPC(cg_table)); - } - } - } - + for (int64_t co_idx = 0; OB_SUCC(ret) && co_idx < co_tables.get_count(); ++co_idx) { ObTableHandleV2 major_table_handle; - if (OB_FAIL(ret)) { - } else if (OB_FAIL(major_tables.get_table(co_sstable->get_key(), major_table_handle))) { - LOG_WARN("fail to get table handle from array by table key", K(ret), KPC(co_sstable), K(major_tables)); - } else if (co_sstable->is_inited()) { - // do nothing - } else if (OB_FAIL(co_sstable->fill_cg_sstables(cur_cg_tables))) { - LOG_WARN("failed to fill cg tables", K(ret), KPC(co_sstable)); - } - - if (FAILEDx(inner_update_tablet_table_store_with_major_(multi_version_start, + if (OB_FAIL(co_tables.get_table(co_idx, major_table_handle))) { + LOG_WARN("get co table handle failed", K(ret), K(co_idx)); + } else if (OB_FAIL(inner_update_tablet_table_store_with_major_(multi_version_start, major_table_handle, ls, tablet, storage_schema, transfer_seq))) { - LOG_WARN("failed to update tablet table store", K(ret), KPC(tablet), KPC(co_sstable)); + LOG_WARN("failed to update tablet table store", K(ret), KPC(tablet), "major_sstable", PC(major_table_handle.get_table())); } } return ret; @@ -2492,7 +2420,7 @@ int ObStorageHATabletBuilderUtil::build_table_with_minor_tables( return ret; } -int ObStorageHATabletBuilderUtil::build_table_with_ddl_tables( +int ObStorageHATabletBuilderUtil::build_tablet_with_ddl_tables( ObLS *ls, const common::ObTabletID &tablet_id, const ObTablesHandleArray &ddl_tables) @@ -2503,15 +2431,26 @@ int ObStorageHATabletBuilderUtil::build_table_with_ddl_tables( const bool need_tablet_meta_merge = false; const ObMigrationTabletParam *src_tablet_meta = nullptr; const bool update_ddl_sstable = true; - - if (OB_ISNULL(ls) || !tablet_id.is_valid()) { + ObTablesHandleArray co_tables; + const ObTablesHandleArray *target_ddl_tables = nullptr; + if (OB_ISNULL(ls) || !tablet_id.is_valid() || ddl_tables.empty() || OB_ISNULL(ddl_tables.get_table(0))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("build tablet with major tables get invalid argument", K(ret), KP(ls), K(tablet_id)); + LOG_WARN("build tablet with major tables get invalid argument", K(ret), KP(ls), K(tablet_id), K(ddl_tables)); } else if (OB_FAIL(get_tablet_(tablet_id, ls, tablet_handle))) { LOG_WARN("failed to get tablet", K(ret), K(tablet_id), KPC(ls)); } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { + } else if (ddl_tables.get_table(0)->is_column_store_sstable()) { + if (OB_FAIL(assemble_column_oriented_sstable_(ddl_tables, co_tables))) { + LOG_WARN("assemble co tables failed", K(ret), K(ddl_tables)); + } else { + target_ddl_tables = &co_tables; + } + } else { + target_ddl_tables = &ddl_tables; + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(inner_update_tablet_table_store_with_minor_(ls, tablet, need_tablet_meta_merge, - src_tablet_meta, ddl_tables, update_ddl_sstable))) { + src_tablet_meta, *target_ddl_tables, update_ddl_sstable))) { LOG_WARN("failed to update tablet table store with minor", K(ret)); } return ret; diff --git a/src/storage/high_availability/ob_storage_ha_tablet_builder.h b/src/storage/high_availability/ob_storage_ha_tablet_builder.h index 8b17ee421..e67c9f77e 100644 --- a/src/storage/high_availability/ob_storage_ha_tablet_builder.h +++ b/src/storage/high_availability/ob_storage_ha_tablet_builder.h @@ -293,7 +293,7 @@ public: const common::ObTabletID &tablet_id, const ObMigrationTabletParam *src_tablet_meta, const ObTablesHandleArray &minor_tables); - static int build_table_with_ddl_tables( + static int build_tablet_with_ddl_tables( ObLS *ls, const common::ObTabletID &tablet_id, const ObTablesHandleArray &ddl_tables); @@ -301,13 +301,6 @@ public: ObTablet *tablet, bool &is_exist); private: - // TODO(@DanLing) tmp interface, remove after column_store_ddl branch merged. - static int build_tablet_for_ddl_kv_( - ObLS *ls, - const common::ObTabletID &tablet_id, - const ObTablesHandleArray &major_tables, - const ObStorageSchema &storage_schema, - const compaction::ObMediumCompactionInfoList &medium_info_list); static int build_tablet_for_row_store_( ObLS *ls, const common::ObTabletID &tablet_id, @@ -348,6 +341,9 @@ private: const ObMigrationTabletParam *src_tablet_meta, ObTablet *tablet, bool &need_merge); + static int assemble_column_oriented_sstable_( + const ObTablesHandleArray &mixed_tables, + ObTablesHandleArray &co_tables); static int get_column_store_tables_( const ObTablesHandleArray &major_tables, common::ObSEArray &column_store_tables, @@ -357,9 +353,7 @@ private: ObTablet *tablet, const ObStorageSchema &storage_schema, const int64_t multi_version_start, - const int64_t co_table_cnt, - const ObTablesHandleArray &major_tables, - common::ObIArray &co_table_array); + const ObTablesHandleArray &co_tables); }; diff --git a/src/storage/high_availability/ob_tablet_group_restore.cpp b/src/storage/high_availability/ob_tablet_group_restore.cpp index 0226e0e5c..99df701cc 100644 --- a/src/storage/high_availability/ob_tablet_group_restore.cpp +++ b/src/storage/high_availability/ob_tablet_group_restore.cpp @@ -2577,6 +2577,10 @@ int ObTabletRestoreTask::check_need_copy_sstable_( LOG_WARN("failed to get table info", K(ret), KPC(tablet_restore_ctx_), K(table_key)); } else if (OB_FAIL(ObStorageHATaskUtils::check_need_copy_sstable(*copy_table_info, tablet_restore_ctx_->tablet_handle_, need_copy))) { LOG_WARN("failed to check need copy sstable", K(ret), KPC(tablet_restore_ctx_), K(table_key)); + if (OB_INVALID_DATA == ret) { + LOG_ERROR("restore invalid data", K(ret), K(table_key), KPC(tablet_restore_ctx_)); + abort(); // TODO@wenqu: remove this line + } } return ret; } diff --git a/src/storage/lob/ob_lob_locator.cpp b/src/storage/lob/ob_lob_locator.cpp index 7f3c14f0e..47b430ae9 100644 --- a/src/storage/lob/ob_lob_locator.cpp +++ b/src/storage/lob/ob_lob_locator.cpp @@ -616,17 +616,27 @@ int ObLobLocatorHelper::build_lob_locatorv2(ObLobLocatorV2 &locator, output_data.assign_buffer(buffer + offset, param.len_); if (OB_FAIL(lob_mngr->query(param, output_data))) { COMMON_LOG(WARN,"Lob: falied to query lob tablets.", K(ret), K(param)); - } else if (padding_char_size) { - ObString data_str; - if (OB_FAIL(locator.get_inrow_data(data_str))) { - STORAGE_LOG(WARN, "Lob: read lob data failed", - K(ret), K(column_id), K(data_str), K(data_str.length()), K(full_loc_size), K(payload)); - } else if (OB_ISNULL(char_len_ptr)) { + } else { + if (output_data.length() != param.byte_size_) { ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "Lob: get null char len ptr when need padding char len", - K(ret), K(column_id), K(data_str), K(data_str.length()), K(full_loc_size), K(payload)); - } else { - *char_len_ptr = ObCharset::strlen_char(param.coll_type_, data_str.ptr(), data_str.length()); + ObLobData ld; + if (lob_common->is_init_) { + ld = *(ObLobData*)lob_common->buffer_; + } + STORAGE_LOG(WARN, "Lob: read full data size not expected", K(ret), K(*lob_common), + K(ld), K(output_data.length()), K(param.byte_size_)); + } else if (padding_char_size) { + ObString data_str; + if (OB_FAIL(locator.get_inrow_data(data_str))) { + STORAGE_LOG(WARN, "Lob: read lob data failed", + K(ret), K(column_id), K(data_str), K(data_str.length()), K(full_loc_size), K(payload)); + } else if (OB_ISNULL(char_len_ptr)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Lob: get null char len ptr when need padding char len", + K(ret), K(column_id), K(data_str), K(data_str.length()), K(full_loc_size), K(payload)); + } else { + *char_len_ptr = ObCharset::strlen_char(param.coll_type_, data_str.ptr(), data_str.length()); + } } } } diff --git a/src/storage/lob/ob_lob_manager.cpp b/src/storage/lob/ob_lob_manager.cpp index 83aecad4e..37b771854 100644 --- a/src/storage/lob/ob_lob_manager.cpp +++ b/src/storage/lob/ob_lob_manager.cpp @@ -1047,6 +1047,13 @@ int ObLobManager::erase_one_piece(ObLobAccessParam& param, return ret; } +void ObLobManager::transform_lob_id(uint64_t src, uint64_t &dst) +{ + dst = htonll(src << 1); + char *bytes = reinterpret_cast(&dst); + bytes[7] |= 0x01; +} + int ObLobManager::check_need_out_row( ObLobAccessParam& param, int64_t add_len, @@ -1123,9 +1130,13 @@ int ObLobManager::check_need_out_row( // init lob data and alloc lob id(when not init) ObLobData *new_lob_data = new(new_lob_common->buffer_)ObLobData(); new_lob_data->id_.tablet_id_ = param.tablet_id_.id(); - if (OB_FAIL(lob_ctx_.lob_meta_mngr_->fetch_lob_id(param, new_lob_data->id_.lob_id_))) { + if (param.spec_lob_id_.is_valid()) { + new_lob_data->id_ = param.spec_lob_id_; + } else if (OB_FAIL(lob_ctx_.lob_meta_mngr_->fetch_lob_id(param, new_lob_data->id_.lob_id_))) { LOG_WARN("get lob id failed.", K(ret), K(param)); - } else { + } + if (OB_SUCC(ret)) { + transform_lob_id(new_lob_data->id_.lob_id_, new_lob_data->id_.lob_id_); new_lob_common->is_init_ = true; } } @@ -1164,7 +1175,12 @@ int ObLobManager::init_out_row_ctx( // for append, most oper len/256K + 1 // for sql update, calc erase+insert int64_t N = ((len + param.update_len_) / (ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE / 2) + 2) * 2; - param.seq_no_st_ = param.tx_desc_->get_and_inc_tx_seq(param.parent_seq_no_.get_branch(), N); + if (nullptr != param.tx_desc_) { + param.seq_no_st_ = param.tx_desc_->get_and_inc_tx_seq(param.parent_seq_no_.get_branch(), N); + } else { + int tmp_seq = ObSequence::get_and_inc_max_seq_no(N); + param.seq_no_st_ = transaction::ObTxSEQ::mk_v0(tmp_seq); + } param.used_seq_cnt_ = 0; param.total_seq_cnt_ = N; } @@ -1582,6 +1598,148 @@ int ObLobManager::append( return ret; } +int ObLobManager::append(ObLobAccessParam& param, ObLobLocatorV2& lob, ObLobMetaWriteIter &iter) +{ + int ret = OB_SUCCESS; + bool is_char = param.coll_type_ != common::ObCollationType::CS_TYPE_BINARY; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObLobManager is not initialized", K(ret)); + } else if (OB_FAIL(param.set_lob_locator(param.lob_locator_))) { + LOG_WARN("failed to set lob locator for param", K(ret), K(param)); + } else if (!lob.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid lob locator", K(ret)); + } else if (lob.is_delta_temp_lob()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid lob locator", K(ret)); + } else { + bool alloc_inside = false; + bool need_out_row = false; + if (OB_FAIL(prepare_lob_common(param, alloc_inside))) { + LOG_WARN("fail to prepare lob common", K(ret), K(param)); + } + ObLobCommon *lob_common = param.lob_common_; + ObLobData *lob_data = param.lob_data_; + bool is_remote_lob = false; + common::ObAddr dst_addr; + int64_t append_lob_len = 0; + ObString ori_inrow_data; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(check_handle_size(param))) { + LOG_WARN("check handle size failed.", K(ret)); + } else if (OB_FAIL(is_remote(param, is_remote_lob, dst_addr))) { + LOG_WARN("check is remote failed.", K(ret), K(param)); + } else if (is_remote_lob) { + ret = OB_NOT_IMPLEMENT; + LOG_WARN("Unsupport remote append", K(ret), K(param)); + } else if (OB_FAIL(lob.get_lob_data_byte_len(append_lob_len))) { + LOG_WARN("fail to get append lob byte len", K(ret), K(lob)); + } else if (OB_FAIL(check_need_out_row(param, append_lob_len, ori_inrow_data, false, alloc_inside, need_out_row))) { + LOG_WARN("process out row check failed.", K(ret), K(param), KPC(lob_common), KPC(lob_data), K(lob)); + } else if (!need_out_row) { + // do inrow append + int32_t cur_handle_size = lob_common->get_handle_size(param.byte_size_); + int32_t ptr_offset = 0; + if (OB_NOT_NULL(param.lob_locator_)) { + ptr_offset = reinterpret_cast(param.lob_common_) - reinterpret_cast(param.lob_locator_->ptr_); + cur_handle_size += ptr_offset; + } + uint64_t total_size = cur_handle_size + append_lob_len; + char *buf = static_cast(param.allocator_->alloc(total_size)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc buf failed.", K(ret), K(total_size)); + } else { + if (OB_NOT_NULL(param.lob_locator_)) { + MEMCPY(buf, param.lob_locator_->ptr_, ptr_offset); + } + ObLobCommon *new_lob_common = reinterpret_cast(buf + ptr_offset); + MEMCPY(new_lob_common, lob_common, cur_handle_size - ptr_offset); + ObString data; + data.assign_buffer(buf + cur_handle_size, append_lob_len); + SMART_VAR(ObLobAccessParam, read_param) { + read_param.tx_desc_ = param.tx_desc_; + if (OB_FAIL(build_lob_param(read_param, *param.allocator_, param.coll_type_, + 0, UINT64_MAX, param.timeout_, lob))) { + LOG_WARN("fail to build read param", K(ret), K(lob)); + } else if (OB_FAIL(query(read_param, data))) { + LOG_WARN("fail to read src lob", K(ret), K(read_param)); + } + } + if (OB_SUCC(ret)) { + // refresh lob info + param.byte_size_ += data.length(); + if (new_lob_common->is_init_) { + ObLobData *new_lob_data = reinterpret_cast(new_lob_common->buffer_); + new_lob_data->byte_size_ += data.length(); + } + if (alloc_inside) { + param.allocator_->free(param.lob_common_); + } + param.lob_common_ = new_lob_common; + param.handle_size_ = total_size; + if (OB_NOT_NULL(param.lob_locator_)) { + param.lob_locator_->ptr_ = buf; + param.lob_locator_->size_ = total_size; + if (OB_FAIL(fill_lob_locator_extern(param))) { + LOG_WARN("fail to fill lob locator extern", K(ret), KPC(param.lob_locator_)); + } + } + } + iter.set_end(); + } + } else if (!lob.has_lob_header()) { + ObString data; + data.assign_ptr(lob.ptr_, lob.size_); + ObLobCtx lob_ctx = lob_ctx_; + if (OB_FAIL(lob_ctx.lob_meta_mngr_->append(param, iter))) { + LOG_WARN("Failed to open lob meta write iter.", K(ret), K(param)); + } + } else { + // prepare out row ctx + ObLobCtx lob_ctx = lob_ctx_; + if (OB_FAIL(init_out_row_ctx(param, append_lob_len, param.op_type_))) { + LOG_WARN("init lob data out row ctx failed", K(ret)); + } + // prepare read buffer + ObString read_buffer; + uint64_t read_buff_size = LOB_READ_BUFFER_LEN; + char *read_buff = static_cast(param.allocator_->alloc(read_buff_size)); + if (OB_ISNULL(read_buff)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc read buffer failed.", K(ret), K(read_buff_size)); + } else { + read_buffer.assign_buffer(read_buff, read_buff_size); + } + + // prepare read full lob + if (OB_SUCC(ret)) { + ObLobAccessParam *read_param = reinterpret_cast(param.allocator_->alloc(sizeof(ObLobAccessParam))); + if (OB_ISNULL(read_param)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc read param failed.", K(ret), K(sizeof(ObLobAccessParam))); + } else { + read_param = new(read_param)ObLobAccessParam(); + read_param->tx_desc_ = param.tx_desc_; + if (OB_FAIL(build_lob_param(*read_param, *param.allocator_, param.coll_type_, + 0, UINT64_MAX, param.timeout_, lob))) { + LOG_WARN("fail to build read param", K(ret), K(lob)); + } else { + ObLobQueryIter *qiter = nullptr; + if (OB_FAIL(query(*read_param, qiter))) { + LOG_WARN("do query src by iter failed.", K(ret), K(read_param)); + } else if (OB_FAIL(iter.open(param, qiter, read_param, read_buffer))) { + LOG_WARN("open lob meta write iter failed.", K(ret)); + } + } + } + } + } + } + return ret; +} + int ObLobManager::prepare_lob_common(ObLobAccessParam& param, bool &alloc_inside) { int ret = OB_SUCCESS; @@ -1597,6 +1755,9 @@ int ObLobManager::prepare_lob_common(ObLobAccessParam& param, bool &alloc_inside param.lob_common_ = new(tbuf)ObLobCommon(); param.lob_data_ = new(param.lob_common_->buffer_)ObLobData(); param.lob_data_->id_.tablet_id_ = param.tablet_id_.id(); + if (param.spec_lob_id_.is_valid()) { + param.lob_data_->id_ = param.spec_lob_id_; + } ObLobDataOutRowCtx *outrow_ctx = new(param.lob_data_->buffer_)ObLobDataOutRowCtx(); // init char len uint64_t *char_len = reinterpret_cast(outrow_ctx + 1); @@ -1796,9 +1957,13 @@ int ObLobManager::prepare_for_write( // init lob data and alloc lob id(when not init) ObLobData *new_lob_data = new(new_lob_common->buffer_)ObLobData(); new_lob_data->id_.tablet_id_ = param.tablet_id_.id(); - if (OB_FAIL(lob_ctx_.lob_meta_mngr_->fetch_lob_id(param, new_lob_data->id_.lob_id_))) { + if (param.spec_lob_id_.is_valid()) { + new_lob_data->id_ = param.spec_lob_id_; + } else if (OB_FAIL(lob_ctx_.lob_meta_mngr_->fetch_lob_id(param, new_lob_data->id_.lob_id_))) { LOG_WARN("get lob id failed.", K(ret), K(param)); - } else { + } + if (OB_SUCC(ret)) { + transform_lob_id(new_lob_data->id_.lob_id_, new_lob_data->id_.lob_id_); new_lob_common->is_init_ = true; } } diff --git a/src/storage/lob/ob_lob_manager.h b/src/storage/lob/ob_lob_manager.h index 1c95cd41d..f7eeece0f 100644 --- a/src/storage/lob/ob_lob_manager.h +++ b/src/storage/lob/ob_lob_manager.h @@ -139,6 +139,7 @@ public: static const int64_t LOB_WITH_OUTROW_CTX_SIZE = sizeof(ObLobCommon) + sizeof(ObLobData) + sizeof(ObLobDataOutRowCtx); static const int64_t LOB_OUTROW_FULL_SIZE = sizeof(ObLobCommon) + sizeof(ObLobData) + sizeof(ObLobDataOutRowCtx) + sizeof(uint64_t); static const uint64_t LOB_READ_BUFFER_LEN = 1024L*1024L; // 1M + static const int64_t LOB_IN_ROW_MAX_LENGTH = 4096; // 4K static const uint64_t REMOTE_LOB_QUERY_RETRY_MAX = 10L; // 1M private: explicit ObLobManager(const uint64_t tenant_id) @@ -196,6 +197,9 @@ public: // Lob data interface int append(ObLobAccessParam& param, ObString& data); + int append(ObLobAccessParam& param, + ObLobLocatorV2& lob, + ObLobMetaWriteIter &iter); int append(ObLobAccessParam& param, ObLobLocatorV2 &lob); int query(ObLobAccessParam& param, @@ -311,14 +315,12 @@ private: bool lob_handle_has_char_len(ObLobAccessParam& param); int64_t* get_char_len_ptr(ObLobAccessParam& param); int fill_lob_locator_extern(ObLobAccessParam& param); - int compare(ObLobAccessParam& param_left, ObLobAccessParam& param_right, int64_t& result); - + void transform_lob_id(uint64_t src, uint64_t &dst); private: static const int64_t DEFAULT_LOB_META_BUCKET_CNT = 1543; - static const int64_t LOB_IN_ROW_MAX_LENGTH = 4096; // 4K const uint64_t tenant_id_; bool is_inited_; common::ObFIFOAllocator allocator_; diff --git a/src/storage/lob/ob_lob_meta.cpp b/src/storage/lob/ob_lob_meta.cpp index 304cbd08b..0a32629c3 100644 --- a/src/storage/lob/ob_lob_meta.cpp +++ b/src/storage/lob/ob_lob_meta.cpp @@ -68,7 +68,7 @@ int ObLobMetaScanIter::get_next_row(ObLobMetaInfo &row) } else if(OB_ISNULL(datum_row)) { ret = OB_ERR_NULL_VALUE; LOG_WARN("row is null.", K(ret)); - } else if (OB_FAIL(ObLobMetaUtil::transform(datum_row, row))) { + } else if (OB_FAIL(ObLobMetaUtil::transform_from_row_to_info(datum_row, row, false))) { LOG_WARN("get meta info from row failed.", K(ret), KPC(datum_row)); } else { cur_info_ = row; @@ -146,7 +146,7 @@ int ObLobMetaScanIter::get_next_row(ObLobMetaScanResult &result) return ret; } -int ObLobMetaUtil::transform_lob_id(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_lob_id(const blocksstable::ObDatumRow* row, ObLobMetaInfo &info) { int ret = OB_SUCCESS; ObString buf = row->storage_datums_[ObLobMetaUtil::LOB_ID_COL_ID].get_string(); @@ -159,60 +159,202 @@ int ObLobMetaUtil::transform_lob_id(blocksstable::ObDatumRow* row, ObLobMetaInfo return ret; } -int ObLobMetaUtil::transform_seq_id(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_seq_id(const blocksstable::ObDatumRow* row, ObLobMetaInfo &info) { info.seq_id_ = row->storage_datums_[ObLobMetaUtil::SEQ_ID_COL_ID].get_string(); return OB_SUCCESS; } -int ObLobMetaUtil::transform_byte_len(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_byte_len(const blocksstable::ObDatumRow* row, ObLobMetaInfo &info, bool with_extra_rowkey) { - info.byte_len_ = row->storage_datums_[ObLobMetaUtil::BYTE_LEN_COL_ID].get_uint32(); + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::BYTE_LEN_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::BYTE_LEN_COL_ID; + info.byte_len_ = row->storage_datums_[idx].get_uint32(); return OB_SUCCESS; } -int ObLobMetaUtil::transform_char_len(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_char_len(const blocksstable::ObDatumRow* row, ObLobMetaInfo &info, bool with_extra_rowkey) { - info.char_len_ = row->storage_datums_[ObLobMetaUtil::CHAR_LEN_COL_ID].get_uint32(); + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::CHAR_LEN_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::CHAR_LEN_COL_ID; + info.char_len_ = row->storage_datums_[idx].get_uint32(); return OB_SUCCESS; } -int ObLobMetaUtil::transform_piece_id(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_piece_id(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey) { - info.piece_id_ = row->storage_datums_[ObLobMetaUtil::PIECE_ID_COL_ID].get_uint64(); - return OB_SUCCESS;; + int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("row is nullptr", K(ret)); + } else { + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::PIECE_ID_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::PIECE_ID_COL_ID; + info.piece_id_ = row->storage_datums_[idx].get_uint64(); + } + return ret; } -int ObLobMetaUtil::transform_lob_data(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_lob_data(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey) { - info.lob_data_ = row->storage_datums_[ObLobMetaUtil::LOB_DATA_COL_ID].get_string(); - return OB_SUCCESS; + int ret = OB_SUCCESS; + ObString buf; + buf.reset(); + if (OB_ISNULL(row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("row is nullptr", K(ret)); + } else { + info.lob_data_.reset(); + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::LOB_DATA_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::LOB_DATA_COL_ID; + info.lob_data_ = row->storage_datums_[idx].get_string(); + } + return ret; } -int ObLobMetaUtil::transform(blocksstable::ObDatumRow* row, ObLobMetaInfo &info) +int ObLobMetaUtil::transform_from_row_to_info(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey) +{ + int ret = OB_SUCCESS; + int expcect_row_cnt = (with_extra_rowkey) ? + LOB_META_COLUMN_CNT + SKIP_INVALID_COLUMN : + LOB_META_COLUMN_CNT; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is null", K(ret)); + } else if (!row->is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid lob meta row.", K(ret), KPC(row)); + } else if (row->get_column_count() != expcect_row_cnt) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid lob meta row.", K(ret), KPC(row), K(expcect_row_cnt)); + } else if (OB_FAIL(transform_lob_id(row, info))) { + LOG_WARN("transform lob id failed", K(ret)); + } else if (OB_FAIL(transform_seq_id(row, info))) { + LOG_WARN("transform seq id failed", K(ret)); + } else if (OB_FAIL(transform_byte_len(row, info, with_extra_rowkey))) { + LOG_WARN("transform byte len failed", K(ret)); + } else if (OB_FAIL(transform_char_len(row, info, with_extra_rowkey))) { + LOG_WARN("transform char len failed", K(ret)); + } else if (OB_FAIL(transform_piece_id(row, info, with_extra_rowkey))) { + LOG_WARN("transform piece id failed", K(ret)); + } else if (OB_FAIL(transform_lob_data(row, info, with_extra_rowkey))) { + LOG_WARN("transform lob data failed", K(ret)); + } + return ret; +} + +int ObLobMetaUtil::transform_lob_id(ObLobMetaInfo &info, blocksstable::ObDatumRow *row) { int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is NULL", K(ret)); + } else { + row->storage_datums_[ObLobMetaUtil::LOB_ID_COL_ID].set_string(reinterpret_cast(&info.lob_id_), sizeof(ObLobId)); + } + return ret; +} + +int ObLobMetaUtil::transform_seq_id(ObLobMetaInfo &info, blocksstable::ObDatumRow *row) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is NULL", K(ret)); + } else { + row->storage_datums_[ObLobMetaUtil::SEQ_ID_COL_ID].set_string(info.seq_id_); + } + return ret; +} + +int ObLobMetaUtil::transform_byte_len(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is NULL", K(ret)); + } else { + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::BYTE_LEN_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::BYTE_LEN_COL_ID; + row->storage_datums_[idx].set_uint32(info.byte_len_); + } + return ret; +} + +int ObLobMetaUtil::transform_char_len(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is NULL", K(ret)); + } else { + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::CHAR_LEN_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::CHAR_LEN_COL_ID; + row->storage_datums_[idx].set_uint32(info.char_len_); + } + return ret; +} + +int ObLobMetaUtil::transform_piece_id(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is NULL", K(ret)); + } else { + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::PIECE_ID_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::PIECE_ID_COL_ID; + row->storage_datums_[idx].set_uint(info.piece_id_); + } + return ret; +} + +int ObLobMetaUtil::transform_lob_data(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row is NULL", K(ret)); + } else { + int idx = (with_extra_rowkey) ? + ObLobMetaUtil::LOB_DATA_COL_ID + SKIP_INVALID_COLUMN : + ObLobMetaUtil::LOB_DATA_COL_ID; + row->storage_datums_[idx].set_string(info.lob_data_); + } + return ret; +} + +int ObLobMetaUtil::transform_from_info_to_row(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey) +{ + int ret = OB_SUCCESS; + int expcect_row_cnt = (with_extra_rowkey) ? + LOB_META_COLUMN_CNT + SKIP_INVALID_COLUMN : + LOB_META_COLUMN_CNT; if (OB_ISNULL(row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("row is null.", K(ret)); - } else if (!row->is_valid()) { + } else if (row->get_column_count() != expcect_row_cnt) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid lob meta row.", K(ret), KPC(row)); - } else if (row->get_column_count() != LOB_META_COLUMN_CNT) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid lob meta row.", K(ret), KPC(row)); - } else if (OB_FAIL(transform_lob_id(row, info))) { - LOG_WARN("get lob id from row failed.", K(ret), KPC(row)); - } else if (OB_FAIL(transform_seq_id(row, info))) { - LOG_WARN("get seq id from row failed.", K(ret), KPC(row)); - } else if (OB_FAIL(transform_byte_len(row, info))) { - LOG_WARN("get byte len from row failed.", K(ret), KPC(row)); - } else if (OB_FAIL(transform_char_len(row, info))) { - LOG_WARN("get char len from row failed.", K(ret), KPC(row)); - } else if (OB_FAIL(transform_piece_id(row, info))) { - LOG_WARN("get macro id from row failed.", K(ret), KPC(row)); - } else if (OB_FAIL(transform_lob_data(row, info))) { - LOG_WARN("get macro id from row failed.", K(ret), KPC(row)); + LOG_WARN("invalid lob meta row.", K(ret), K(info), K(with_extra_rowkey)); + } else if (OB_FAIL(transform_lob_id(info, row))) { + LOG_WARN("get lob id from row failed.", K(ret), K(info)); + } else if (OB_FAIL(transform_seq_id(info, row))) { + LOG_WARN("get seq id from row failed.", K(ret), K(info)); + } else if (OB_FAIL(transform_byte_len(info, row, with_extra_rowkey))) { + LOG_WARN("get byte len from row failed.", K(ret), K(info)); + } else if (OB_FAIL(transform_char_len(info, row, with_extra_rowkey))) { + LOG_WARN("get char len from row failed.", K(ret), K(info)); + } else if (OB_FAIL(transform_piece_id(info, row, with_extra_rowkey))) { + LOG_WARN("get macro id from row failed.", K(ret), K(info)); + } else if (OB_FAIL(transform_lob_data(info, row, with_extra_rowkey))) { + LOG_WARN("get macro id from row failed.", K(ret), K(info)); } return ret; } @@ -265,6 +407,29 @@ bool ObLobMetaScanIter::is_range_over(const ObLobMetaInfo& info) return cur_pos_ >= param_.offset_ + param_.len_; } +ObLobMetaWriteIter::ObLobMetaWriteIter(ObIAllocator* allocator) + : seq_id_(allocator), + offset_(0), + lob_id_(), + piece_id_(0), + data_(), + coll_type_(CS_TYPE_BINARY), + piece_block_size_(ObLobMetaUtil::LOB_OPER_PIECE_DATA_SIZE), + scan_iter_(), + padding_size_(0), + seq_id_end_(allocator), + post_data_(), + remain_buf_(), + inner_buffer_(), + allocator_(allocator), + last_info_(), + iter_(nullptr), + read_param_(nullptr), + lob_common_(nullptr), + is_end_(false) +{ +} + ObLobMetaWriteIter::ObLobMetaWriteIter( const ObString& data, ObIAllocator* allocator, @@ -280,7 +445,10 @@ ObLobMetaWriteIter::ObLobMetaWriteIter( inner_buffer_(), allocator_(allocator), last_info_(), - iter_(nullptr) + iter_(nullptr), + read_param_(nullptr), + lob_common_(nullptr), + is_end_(false) { data_ = data; offset_ = 0; @@ -382,6 +550,35 @@ int ObLobMetaWriteIter::open(ObLobAccessParam ¶m, return ret; } +int ObLobMetaWriteIter::open(ObLobAccessParam ¶m, + void *iter, // ObLobQueryIter + void *read_param, // ObLobAccessParam + ObString &read_buf) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(iter) || OB_ISNULL(read_param)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null query iter", K(ret)); + } else { + coll_type_ = param.coll_type_; + lob_id_ = param.lob_data_->id_; + piece_id_ = ObLobMetaUtil::LOB_META_INLINE_PIECE_ID; + iter_ = iter; + read_param_ = read_param; + allocator_ = param.allocator_; + lob_common_ = param.lob_common_; + data_.assign_buffer(read_buf.ptr(), piece_block_size_); + char *buf = reinterpret_cast(allocator_->alloc(piece_block_size_)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc buffer failed.", K(piece_block_size_)); + } else { + inner_buffer_.assign_ptr(buf, piece_block_size_); + } + } + return ret; +} + int ObLobMetaWriteIter::try_fill_data( ObLobMetaWriteResult& row, bool &use_inner_buffer, @@ -598,7 +795,9 @@ int ObLobMetaWriteIter::try_update_last_info(ObLobMetaWriteResult &row) int ObLobMetaWriteIter::get_next_row(ObLobMetaWriteResult &row) { int ret = OB_SUCCESS; - if (OB_FAIL(try_update_last_info(row))) { + if (is_end_) { + ret = OB_ITER_END; // mock for inrow situation + } else if (OB_FAIL(try_update_last_info(row))) { LOG_WARN("fail to do try update last info.", K(ret)); } else if (!row.is_update_) { // 1. init common info for ObLobMetaWriteResult @@ -657,6 +856,18 @@ int ObLobMetaWriteIter::get_next_row(ObLobMetaWriteResult &row) if (ret == OB_ITER_END && row.info_.byte_len_ > 0) { ret = OB_SUCCESS; } + if (OB_SUCC(ret) && OB_NOT_NULL(lob_common_)) { + // refresh byte len + ObLobCommon *lob_common = reinterpret_cast(lob_common_); + ObLobData *lob_data = reinterpret_cast(lob_common->buffer_); + lob_data->byte_size_ += row.info_.byte_len_; + // refresh char len + char *ptr = reinterpret_cast(lob_common_); + int64_t *len = reinterpret_cast(ptr + ObLobManager::LOB_WITH_OUTROW_CTX_SIZE); + *len = *len + row.info_.char_len_; + // set lob data + row.info_.lob_data_.assign_ptr(row.data_.ptr(), row.data_.length()); + } } return ret; } @@ -671,9 +882,42 @@ int ObLobMetaWriteIter::close() allocator_->free(inner_buffer_.ptr()); } inner_buffer_.reset(); + if (OB_NOT_NULL(read_param_)) { + if (OB_NOT_NULL(iter_)) { + ObLobQueryIter *iter = reinterpret_cast(iter_); + iter->reset(); + OB_DELETE(ObLobQueryIter, "unused", iter); + } + if (OB_NOT_NULL(data_.ptr())) { // free read_buf + allocator_->free(data_.ptr()); + data_.reset(); + } + allocator_->free(read_param_); + read_param_ = nullptr; + } return ret; } +void ObLobMetaWriteIter::reuse() +{ + close(); + offset_ = 0; + piece_id_ = 0; + lob_id_.reset(); + padding_size_ = 0; + post_data_.reset(); + remain_buf_.reset(); + last_info_.reset(); + iter_ = nullptr; + lob_common_ = nullptr; + is_end_ = false; +} + +void ObLobMetaWriteIter::set_data(const ObString& data) +{ + data_ = data; +} + int ObLobMetaManager::write(ObLobAccessParam& param, ObLobMetaInfo& in_row) { int ret = OB_SUCCESS; @@ -744,7 +988,9 @@ int ObLobMetaManager::update(ObLobAccessParam& param, ObLobMetaInfo& old_row, Ob int ObLobMetaManager::fetch_lob_id(ObLobAccessParam& param, uint64_t &lob_id) { int ret = OB_SUCCESS; - if (OB_FAIL(persistent_lob_adapter_.fetch_lob_id(param, lob_id))) { + if (param.spec_lob_id_.is_valid()) { + lob_id = param.spec_lob_id_.lob_id_; + } else if (OB_FAIL(persistent_lob_adapter_.fetch_lob_id(param, lob_id))) { LOG_WARN("fetch lob id failed.", K(ret), K(param)); } return ret; diff --git a/src/storage/lob/ob_lob_meta.h b/src/storage/lob/ob_lob_meta.h index 2932ac37d..63bd575fc 100644 --- a/src/storage/lob/ob_lob_meta.h +++ b/src/storage/lob/ob_lob_meta.h @@ -28,6 +28,7 @@ namespace storage class ObLobMetaUtil { public: static const uint64_t LOB_META_COLUMN_CNT = 6; + static const uint64_t LOB_META_SCHEMA_ROWKEY_COL_CNT = 2; static const uint64_t LOB_ID_COL_ID = 0; static const uint64_t SEQ_ID_COL_ID = 1; static const uint64_t BYTE_LEN_COL_ID = 2; @@ -36,15 +37,26 @@ public: static const uint64_t LOB_DATA_COL_ID = 5; static const uint64_t LOB_META_INLINE_PIECE_ID = UINT64_MAX - 1; static const uint64_t LOB_OPER_PIECE_DATA_SIZE = 256 * 1024; // 256K + static const uint64_t SKIP_INVALID_COLUMN = 2; public: - static int transform(blocksstable::ObDatumRow *row, ObLobMetaInfo &info); + static int transform_from_info_to_row(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey); + static int transform_from_row_to_info(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey); private: - static int transform_lob_id(blocksstable::ObDatumRow* row, ObLobMetaInfo &info); - static int transform_seq_id(blocksstable::ObDatumRow* row, ObLobMetaInfo &info); - static int transform_byte_len(blocksstable::ObDatumRow* row, ObLobMetaInfo &info); - static int transform_char_len(blocksstable::ObDatumRow* row, ObLobMetaInfo &info); - static int transform_piece_id(blocksstable::ObDatumRow* row, ObLobMetaInfo &info); - static int transform_lob_data(blocksstable::ObDatumRow* row, ObLobMetaInfo &info); + // from_row_to_info. + static int transform_lob_id(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info); + static int transform_seq_id(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info); + static int transform_byte_len(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey); + static int transform_char_len(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey); + static int transform_piece_id(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey); + static int transform_lob_data(const blocksstable::ObDatumRow *row, ObLobMetaInfo &info, bool with_extra_rowkey); + + // from_info_to_row. + static int transform_lob_id(ObLobMetaInfo &info, blocksstable::ObDatumRow *row); + static int transform_seq_id(ObLobMetaInfo &info, blocksstable::ObDatumRow *row); + static int transform_byte_len(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey); + static int transform_char_len(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey); + static int transform_piece_id(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey); + static int transform_lob_data(ObLobMetaInfo &info, blocksstable::ObDatumRow *row, bool with_extra_rowkey); }; struct ObLobMetaScanResult { @@ -93,7 +105,9 @@ struct ObLobMetaWriteResult { class ObLobMetaWriteIter { public: + ObLobMetaWriteIter(ObIAllocator* allocator); ObLobMetaWriteIter(const ObString& data, ObIAllocator* allocator, uint32_t piece_block_size); + ~ObLobMetaWriteIter() { close(); } int open(ObLobAccessParam ¶m, uint64_t padding_size, ObString &post_data, @@ -109,8 +123,15 @@ public: ObString &remain_buf, ObString &seq_id_st, ObString &seq_id_end); + int open(ObLobAccessParam ¶m, + void *iter, // ObLobQueryIter + void *read_param, // ObLobAccessParam + ObString &read_buf); int get_next_row(ObLobMetaWriteResult &row); int close(); + void set_end() { is_end_ = true; } + void reuse(); + void set_data(const ObString& data); TO_STRING_KV(K_(seq_id), K_(offset), K_(lob_id), K_(piece_id), K_(coll_type), K_(piece_block_size), K_(scan_iter), K_(padding_size), K_(seq_id_end), K_(last_info)); private: @@ -143,6 +164,9 @@ private: ObIAllocator* allocator_; ObLobMetaInfo last_info_; void *iter_; // ObLobQueryIter + void *read_param_; // ObLobAccessParam + void* lob_common_; // ObLobCommon + bool is_end_; }; class ObLobMetaManager { diff --git a/src/storage/lob/ob_lob_util.cpp b/src/storage/lob/ob_lob_util.cpp index 85b0a6668..6776d5137 100644 --- a/src/storage/lob/ob_lob_util.cpp +++ b/src/storage/lob/ob_lob_util.cpp @@ -16,6 +16,7 @@ #include "ob_lob_manager.h" #include "storage/tx/ob_trans_service.h" #include "storage/blocksstable/ob_datum_row.h" +#include "ob_lob_meta.h" namespace oceanbase { @@ -116,7 +117,7 @@ int ObInsertLobColumnHelper::end_trans(transaction::ObTxDesc *tx_desc, int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, - const ObColDesc &column, + const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, blocksstable::ObStorageDatum &datum, const int64_t timeout_ts, @@ -133,9 +134,6 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, if (OB_ISNULL(lob_mngr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get lob manager handle.", K(ret)); - } else if (!column.col_type_.is_lob_storage()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(column)); } else { ObString data = datum.get_string(); // datum with null ptr and zero len should treat as no lob header @@ -158,7 +156,7 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, } } else { if (OB_FAIL(start_trans(ls_id, false/*is_for_read*/, timeout_ts, tx_desc))) { - LOG_WARN("fail to get tx_desc", K(ret), K(column)); + LOG_WARN("fail to get tx_desc", K(ret)); } else if (OB_FAIL(txs->get_ls_read_snapshot(*tx_desc, transaction::ObTxIsolationLevel::RC, ls_id, timeout_ts, snapshot))) { LOG_WARN("fail to get snapshot", K(ret)); } else { @@ -170,14 +168,14 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, lob_param.sql_mode_ = SMO_DEFAULT; lob_param.ls_id_ = ls_id; lob_param.tablet_id_ = tablet_id; - lob_param.coll_type_ = column.col_type_.get_collation_type(); + lob_param.coll_type_ = cs_type; lob_param.allocator_ = &allocator; lob_param.lob_common_ = nullptr; lob_param.timeout_ = timeout_ts; lob_param.scan_backward_ = false; lob_param.offset_ = 0; lob_param.inrow_threshold_ = lob_storage_param.inrow_threshold_; - LOG_DEBUG("lob storage param", K(lob_storage_param), K(column)); + LOG_DEBUG("lob storage param", K(lob_storage_param), K(cs_type)); if (!src.is_valid()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid src lob locator.", K(ret)); @@ -199,7 +197,7 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, - const ObColDesc &column, + const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, ObObj &obj, const int64_t timeout_ts) @@ -207,11 +205,85 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, int ret = OB_SUCCESS; ObStorageDatum datum; datum.from_obj(obj); - if (OB_SUCC(insert_lob_column(allocator, ls_id, tablet_id, column, lob_storage_param, datum, timeout_ts, obj.has_lob_header(), MTL_ID()))) { + if (OB_SUCC(insert_lob_column(allocator, ls_id, tablet_id, cs_type, lob_storage_param, datum, timeout_ts, obj.has_lob_header(), MTL_ID()))) { obj.set_lob_value(obj.get_type(), datum.get_string().ptr(), datum.get_string().length()); } return ret; } +int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, + transaction::ObTxDesc *tx_desc, + const share::ObLSID ls_id, + const common::ObTabletID tablet_id, + const ObLobId &lob_id, + const ObCollationType collation_type, + const ObLobStorageParam &lob_storage_param, + blocksstable::ObStorageDatum &datum, + const int64_t timeout_ts, + const bool has_lob_header, + ObLobMetaWriteIter &iter) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + ObLobManager *lob_mngr = MTL(ObLobManager*); + if (OB_ISNULL(lob_mngr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get lob manager handle.", K(ret)); + } else { + ObString data = datum.get_string(); + // datum with null ptr and zero len should treat as no lob header + bool set_has_lob_header = has_lob_header && data.length() > 0; + ObLobLocatorV2 src(data, set_has_lob_header); + int64_t byte_len = 0; + if (OB_FAIL(src.get_lob_data_byte_len(byte_len))) { + LOG_WARN("fail to get lob data byte len", K(ret), K(src)); + } else if (src.has_inrow_data() && byte_len <= ObLobManager::LOB_IN_ROW_MAX_LENGTH) { + // do fast inrow + if (OB_FAIL(src.get_inrow_data(data))) { + LOG_WARN("fail to get inrow data", K(ret), K(src)); + } else { + void *buf = allocator.alloc(data.length() + sizeof(ObLobCommon)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc buffer failed", K(ret), K(data.length())); + } else { + ObLobCommon *lob_comm = new(buf)ObLobCommon(); + MEMCPY(lob_comm->buffer_, data.ptr(), data.length()); + datum.set_lob_data(*lob_comm, data.length() + sizeof(ObLobCommon)); + iter.set_end(); + } + } + } else { + ObTransService *txs = MTL(transaction::ObTransService*); + ObTxReadSnapshot snapshot; + // 4.0 text tc compatiable + ObLobAccessParam lob_param; + // lob_param.tx_desc_ = tx_desc; + // lob_param.snapshot_ = snapshot; + lob_param.sql_mode_ = SMO_DEFAULT; + lob_param.ls_id_ = ls_id; + lob_param.tablet_id_ = tablet_id; + lob_param.coll_type_ = collation_type; + lob_param.allocator_ = &allocator; + lob_param.lob_common_ = nullptr; + lob_param.timeout_ = timeout_ts; + lob_param.scan_backward_ = false; + lob_param.offset_ = 0; + lob_param.spec_lob_id_ = lob_id; + lob_param.inrow_threshold_ = lob_storage_param.inrow_threshold_; + if (!src.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid src lob locator.", K(ret)); + } else if (OB_FAIL(lob_mngr->append(lob_param, src, iter))) { + LOG_WARN("lob append failed.", K(ret)); + } else { + datum.set_lob_data(*lob_param.lob_common_, lob_param.handle_size_); + } + } + } + return ret; +} + } } diff --git a/src/storage/lob/ob_lob_util.h b/src/storage/lob/ob_lob_util.h index c893dc3eb..85334bbb3 100644 --- a/src/storage/lob/ob_lob_util.h +++ b/src/storage/lob/ob_lob_util.h @@ -24,6 +24,7 @@ #include "common/object/ob_object.h" #include "storage/lob/ob_lob_seq.h" + namespace oceanbase { @@ -54,7 +55,7 @@ struct ObLobAccessParam { scan_backward_(false), asscess_ptable_(false), offset_(0), len_(0), parent_seq_no_(), seq_no_st_(), used_seq_cnt_(0), total_seq_cnt_(0), checksum_(0), update_len_(0), op_type_(ObLobDataOutRowCtx::OpType::SQL), is_fill_zero_(false), from_rpc_(false), - inrow_read_nocopy_(false), inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD) + inrow_read_nocopy_(false), inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), spec_lob_id_() {} ~ObLobAccessParam() { if (OB_NOT_NULL(dml_base_param_)) { @@ -66,8 +67,9 @@ public: int64_t get_inrow_threshold(); TO_STRING_KV(K_(tenant_id), K_(src_tenant_id), K_(ls_id), K_(tablet_id), KPC_(lob_locator), KPC_(lob_common), KPC_(lob_data), K_(byte_size), K_(handle_size), K_(coll_type), K_(scan_backward), K_(offset), K_(len), - K_(parent_seq_no), K_(seq_no_st), K_(used_seq_cnt), K_(total_seq_cnt), K_(checksum), K_(update_len), K_(op_type), - K_(is_fill_zero), K_(from_rpc), K_(snapshot), K_(tx_id), K_(inrow_read_nocopy), K_(inrow_threshold)); + K_(parent_seq_no), K_(seq_no_st), K_(used_seq_cnt), K_(total_seq_cnt), K_(checksum), + K_(update_len), K_(op_type), K_(is_fill_zero), K_(from_rpc), K_(snapshot), K_(tx_id), K_(inrow_read_nocopy), + K_(inrow_threshold), K_(spec_lob_id)); public: transaction::ObTxDesc *tx_desc_; // for write/update/delete transaction::ObTxReadSnapshot snapshot_; // for read @@ -113,6 +115,7 @@ public: bool from_rpc_; bool inrow_read_nocopy_; int64_t inrow_threshold_; + ObLobId spec_lob_id_; }; struct ObLobMetaInfo { @@ -174,6 +177,16 @@ struct ObLobMetaInfo { return pos; } + void reset() + { + lob_id_.reset(); + seq_id_.reset(); + char_len_ = 0; + byte_len_ = 0; + piece_id_ = 0; + lob_data_.reset(); + } + ObLobId lob_id_; ObString seq_id_; uint32_t char_len_; @@ -193,6 +206,8 @@ struct ObLobPieceInfo { TO_STRING_KV(K_(piece_id), K_(len), K_(macro_id)); }; +class ObLobMetaWriteIter; + class ObInsertLobColumnHelper final { public: @@ -210,7 +225,7 @@ public: static int insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, - const share::schema::ObColDesc &column, + const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, blocksstable::ObStorageDatum &datum, const int64_t timeout_ts, @@ -219,10 +234,22 @@ public: static int insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, - const share::schema::ObColDesc &column, + const ObCollationType &cs_type, const ObLobStorageParam &lob_storage_param, ObObj &obj, const int64_t timeout_ts); + // should call iter.close outter + static int insert_lob_column(ObIAllocator &allocator, + transaction::ObTxDesc *tx_desc, + const share::ObLSID ls_id, + const common::ObTabletID tablet_id, + const ObLobId &lob_id, + const ObCollationType collation_type, + const ObLobStorageParam &lob_storage_param, + blocksstable::ObStorageDatum &datum, + const int64_t timeout_ts, + const bool has_lob_header, + ObLobMetaWriteIter &iter); }; struct ObLobDiffFlags diff --git a/src/storage/ls/ob_ls_ddl_log_handler.cpp b/src/storage/ls/ob_ls_ddl_log_handler.cpp index 3b2e75192..ae40f965c 100644 --- a/src/storage/ls/ob_ls_ddl_log_handler.cpp +++ b/src/storage/ls/ob_ls_ddl_log_handler.cpp @@ -19,6 +19,7 @@ #include "storage/compaction/ob_schedule_dag_func.h" #include "storage/tablet/ob_tablet_iterator.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ddl/ob_ddl_replay_executor.h" #include "logservice/ob_log_base_header.h" #include "share/scn.h" @@ -144,6 +145,11 @@ int ObActiveDDLKVIterator::get_next_ddl_kv_mgr(ObDDLKvMgrHandle &handle) if (OB_FAIL(to_del_tablets_.push_back(tablet_id))) { LOG_WARN("push back to deleted tablet failed", K(ret)); } + } else if (tablet_handle.get_obj()->get_major_table_count() > 0 + || tablet_handle.get_obj()->get_tablet_meta().table_store_flag_.with_major_sstable()) { + if (OB_FAIL(to_del_tablets_.push_back(tablet_id))) { + LOG_WARN("push back to deleted tablet failed", K(ret)); + } } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(handle))) { LOG_WARN("get ddl kv mgr failed", K(ret)); } @@ -221,19 +227,16 @@ int ObLSDDLLogHandler::online() LOG_WARN("failed to build ls tablet iter", K(ret), K(ls_)); } else { while (OB_SUCC(ret)) { - ObDDLKvMgrHandle ddl_kv_mgr_handle; - if (OB_FAIL(tablet_iter.get_next_ddl_kv_mgr(ddl_kv_mgr_handle))) { + ObTabletHandle tablet_handle; + if (OB_FAIL(tablet_iter.get_next_tablet(tablet_handle))) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; break; } else { - LOG_WARN("failed to get ddl kv mgr", K(ret), K(ddl_kv_mgr_handle)); + LOG_WARN("get next tablet failed", K(ret)); } - } else if (OB_UNLIKELY(!ddl_kv_mgr_handle.is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid tablet handle", K(ret), K(ddl_kv_mgr_handle)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->online())) { - LOG_WARN("ddl kv mgr cleanup failed", K(ret), "ls_meta", ls_->get_ls_meta(), "tablet_id", ddl_kv_mgr_handle.get_obj()->get_tablet_id()); + } else if (OB_FAIL(tablet_handle.get_obj()->start_direct_load_task_if_need())) { + LOG_WARN("start ddl if need failed", K(ret)); } } } @@ -346,16 +349,22 @@ int ObLSDDLLogHandler::flush(SCN &rec_scn) { int ret = OB_SUCCESS; ObLSTabletIterator tablet_iter(ObMDSGetTabletMode::READ_WITHOUT_CHECK); + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); if (OB_FAIL(ls_->get_tablet_svr()->build_tablet_iter(tablet_iter))) { LOG_WARN("failed to build ls tablet iter", K(ret), K(ls_)); } else { TCRLockGuard guard(online_lock_); if (!is_online_) { LOG_INFO("ddl log handler is offline, no need to flush", K(ret), "ls_meta", ls_->get_ls_meta()); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys", K(ret), K(MTL_ID())); } else { - bool has_ddl_kv = false; while (OB_SUCC(ret)) { + bool has_ddl_kv = false; ObDDLKvMgrHandle ddl_kv_mgr_handle; + ObTabletDirectLoadMgrHandle direct_load_mgr_hdl; + bool is_major_sstable_exist = false; if (OB_FAIL(tablet_iter.get_next_ddl_kv_mgr(ddl_kv_mgr_handle))) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; @@ -368,14 +377,31 @@ int ObLSDDLLogHandler::flush(SCN &rec_scn) LOG_WARN("invalid ddl kv mgr handle", K(ret), K(ddl_kv_mgr_handle)); } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->check_has_effective_ddl_kv(has_ddl_kv))) { LOG_WARN("failed to check ddl kv", K(ret)); - } else if (has_ddl_kv) { + } else if (!has_ddl_kv) { + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( + ls_->get_ls_id(), + ddl_kv_mgr_handle.get_obj()->get_tablet_id(), + true/* is_full_direct_load */, + direct_load_mgr_hdl, + is_major_sstable_exist))) { + if (OB_ENTRY_NOT_EXIST == ret && is_major_sstable_exist) { + LOG_WARN("major sstable already exist, ddl kv may leak", K(ret), "tablet_id", ddl_kv_mgr_handle.get_obj()->get_tablet_id()); + } else { + LOG_WARN("get tablet direct load mgr failed", K(ret), "tablet_id", ddl_kv_mgr_handle.get_obj()->get_tablet_id(), K(is_major_sstable_exist)); + } + } else { DEBUG_SYNC(BEFORE_DDL_CHECKPOINT); - const SCN start_scn = ddl_kv_mgr_handle.get_obj()->get_start_scn(); - const ObTabletID &tablet_id = ddl_kv_mgr_handle.get_obj()->get_tablet_id(); - ObTabletHandle tablet_handle; - if (OB_FAIL(ls_->get_tablet(tablet_id, tablet_handle))) { - LOG_WARN("failed to get tablet", K(ret), K(ls_->get_ls_id()), K(tablet_id)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->schedule_ddl_dump_task(*tablet_handle.get_obj(), start_scn, rec_scn))) { + ObDDLTableMergeDagParam param; + param.ls_id_ = ls_->get_ls_id(); + param.tablet_id_ = ddl_kv_mgr_handle.get_obj()->get_tablet_id(); + param.start_scn_ = direct_load_mgr_hdl.get_full_obj()->get_start_scn(); + param.rec_scn_ = rec_scn; + param.direct_load_type_ = direct_load_mgr_hdl.get_full_obj()->get_direct_load_type(); + param.is_commit_ = false; + param.data_format_version_ = direct_load_mgr_hdl.get_full_obj()->get_data_format_version(); + param.snapshot_version_ = direct_load_mgr_hdl.get_full_obj()->get_table_key().get_snapshot_version(); + LOG_INFO("schedule ddl merge dag", K(param)); + if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_ddl_table_merge_dag(param))) { if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { LOG_WARN("failed to schedule ddl kv merge dag", K(ret)); } else { @@ -396,11 +422,13 @@ SCN ObLSDDLLogHandler::get_rec_scn() SCN rec_scn = SCN::max_scn(); bool has_ddl_kv = false; ObActiveDDLKVIterator active_ddl_kv_mgr_iter; + ObTabletID barrier_tablet_id; if (OB_FAIL(active_ddl_kv_mgr_iter.init(ls_, active_ddl_kv_mgr_))) { LOG_WARN("initialize active ddl kv mgr iterator failed", K(ret)); } else { ObDDLKvMgrHandle ddl_kv_mgr_handle; while (OB_SUCC(ret)) { + SCN last_scn = rec_scn; if (OB_FAIL(active_ddl_kv_mgr_iter.get_next_ddl_kv_mgr(ddl_kv_mgr_handle))) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; @@ -413,6 +441,8 @@ SCN ObLSDDLLogHandler::get_rec_scn() LOG_WARN("invalid ddl kv mgr handle", K(ret), K(ddl_kv_mgr_handle)); } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->get_rec_scn(rec_scn))) { LOG_WARN("get rec scn failed", K(ret)); + } else if (rec_scn < last_scn) { + barrier_tablet_id = ddl_kv_mgr_handle.get_obj()->get_tablet_id(); } } } @@ -421,6 +451,9 @@ SCN ObLSDDLLogHandler::get_rec_scn() } else if (!rec_scn.is_max()) { last_rec_scn_ = SCN::max(last_rec_scn_, rec_scn); } + LOG_INFO("[CHECKPOINT] ObLSDDLLogHandler::get_rec_scn", K(ret), + "ls_id", OB_ISNULL(ls_) ? ObLSID() : ls_->get_ls_id(), + K(barrier_tablet_id), K(rec_scn), K_(last_rec_scn)); return rec_scn; } diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index fd1290160..fe6c5477b 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -48,6 +48,7 @@ #include "storage/access/ob_index_sstable_estimator.h" #include "storage/column_store/ob_column_oriented_sstable.h" #include "storage/blocksstable/ob_sstable.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ls/ob_ls.h" #include "storage/tablet/ob_tablet.h" #include "storage/tablet/ob_tablet_iterator.h" @@ -512,6 +513,7 @@ int ObLSTabletService::inner_remove_tablet( int ret = OB_SUCCESS; const ObTabletMapKey key(ls_id, tablet_id); ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); if (OB_FAIL(tablet_id_set_.erase(tablet_id))) { if (OB_HASH_NOT_EXIST == ret) { @@ -531,6 +533,17 @@ int ObLSTabletService::inner_remove_tablet( } } + if (OB_SUCC(ret)) { + if (OB_FAIL(tenant_direct_load_mgr->remove_tablet_direct_load( + ObTabletDirectLoadMgrKey(tablet_id, true)))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_ERROR("remove tablet direct load failed", K(ret), K(ls_id), K(tablet_id)); + } + } + } + if (OB_SUCC(ret)) { FLOG_INFO("succeeded to remove tablet", K(ret), K(ls_id), K(tablet_id)); } @@ -827,7 +840,7 @@ int ObLSTabletService::rebuild_tablet_with_old( } else if (FALSE_IT(disk_addr = new_tablet_hdl.get_obj()->tablet_addr_)) { } else if (OB_FAIL(safe_update_cas_tablet(key, disk_addr, old_tablet_hdl, new_tablet_hdl, time_guard))) { LOG_WARN("fail to update tablet", K(ret), K(key), K(disk_addr)); - } else if (OB_FAIL(new_tablet_hdl.get_obj()->start_ddl_if_need())) { + } else if (OB_FAIL(new_tablet_hdl.get_obj()->start_direct_load_task_if_need())) { LOG_WARN("start ddl if need failed", K(ret), K(key)); } else { LOG_INFO("rebuild tablet with old succeed", K(ret), K(key), K(disk_addr)); @@ -866,7 +879,7 @@ int ObLSTabletService::migrate_update_tablet( } else if (FALSE_IT(disk_addr = new_tablet_hdl.get_obj()->tablet_addr_)) { } else if (OB_FAIL(safe_update_cas_tablet(key, disk_addr, old_tablet_hdl, new_tablet_hdl, time_guard))) { LOG_WARN("fail to update tablet", K(ret), K(key), K(disk_addr)); - } else if (OB_FAIL(new_tablet_hdl.get_obj()->start_ddl_if_need())) { + } else if (OB_FAIL(new_tablet_hdl.get_obj()->start_direct_load_task_if_need())) { LOG_WARN("start ddl if need failed", K(ret)); } else { LOG_INFO("migrate update tablet succeed", K(ret), K(key), K(disk_addr)); @@ -901,7 +914,7 @@ int ObLSTabletService::migrate_create_tablet( } else if (FALSE_IT(disk_addr = tablet_handle.get_obj()->tablet_addr_)) { } else if (OB_FAIL(safe_create_cas_tablet(ls_id, tablet_id, disk_addr, tablet_handle, time_guard))) { LOG_WARN("fail to create tablet and cas", K(ret), K(ls_id), K(tablet_id), K(disk_addr)); - } else if (OB_FAIL(tablet_handle.get_obj()->start_ddl_if_need())) { + } else if (OB_FAIL(tablet_handle.get_obj()->start_direct_load_task_if_need())) { LOG_WARN("start ddl if need failed", K(ret)); } else { LOG_INFO("migrate create tablet succeed", K(ret), K(key), K(disk_addr)); @@ -1838,7 +1851,7 @@ int ObLSTabletService::replay_create_tablet( } else if (FALSE_IT(time_guard.click("CASwap"))) { } else if (OB_FAIL(tablet->check_and_set_initial_state())) { LOG_WARN("fail to check and set initial state", K(ret), K(key)); - } else if (OB_FAIL(tablet->start_ddl_if_need())) { + } else if (OB_FAIL(tablet->start_direct_load_task_if_need())) { LOG_WARN("start ddl if need failed", K(ret)); } else if (OB_FAIL(tablet->inc_macro_ref_cnt())) { LOG_WARN("fail to increase macro blocks' ref cnt for meta and data", K(ret)); @@ -1983,6 +1996,7 @@ int ObLSTabletService::create_tablet( const share::SCN &create_scn, const int64_t snapshot_version, const ObCreateTabletSchema &create_tablet_schema, + const lib::Worker::CompatMode &compat_mode, ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; @@ -1992,7 +2006,6 @@ int ObLSTabletService::create_tablet( const ObTabletMapKey key(ls_id, tablet_id); ObTablet *tablet = nullptr; ObFreezer *freezer = ls_->get_freezer(); - bool need_create_empty_major_sstable = false; tablet_handle.reset(); if (OB_FAIL(ObTabletCreateDeleteHelper::prepare_create_msd_tablet())) { @@ -2005,11 +2018,8 @@ int ObLSTabletService::create_tablet( || OB_ISNULL(allocator = tablet_handle.get_allocator())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("new tablet is null", K(ret), KP(tablet), KP(allocator), K(tablet_handle)); - } else if (OB_FAIL(ObTabletCreateDeleteHelper::check_need_create_empty_major_sstable( - create_tablet_schema, need_create_empty_major_sstable))) { - LOG_WARN("failed to check need create sstable", K(ret)); } else if (OB_FAIL(tablet->init_for_first_time_creation(*allocator, ls_id, tablet_id, data_tablet_id, - create_scn, snapshot_version, create_tablet_schema, need_create_empty_major_sstable, freezer))) { + create_scn, snapshot_version, create_tablet_schema, freezer))) { LOG_WARN("failed to init tablet", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), K(create_scn), K(snapshot_version), K(create_tablet_schema)); } else if (OB_FAIL(t3m->compare_and_swap_tablet(key, tablet_handle, tablet_handle))) { @@ -2039,7 +2049,6 @@ int ObLSTabletService::create_inner_tablet( const ObTabletMapKey key(ls_id, tablet_id); ObTablet *tmp_tablet = nullptr; ObFreezer *freezer = ls_->get_freezer(); - bool need_create_empty_major_sstable = false; ObTabletHandle tmp_tablet_hdl; ObMetaDiskAddr disk_addr; ObTimeGuard time_guard("ObLSTabletService::create_inner_tablet", 10_ms); @@ -2051,11 +2060,8 @@ int ObLSTabletService::create_inner_tablet( ret = OB_ERR_UNEXPECTED; LOG_ERROR("new tablet is null", K(ret), KPC(tmp_tablet), K(tmp_tablet_hdl)); } else if (FALSE_IT(time_guard.click("CreateTablet"))) { - } else if (OB_FAIL(ObTabletCreateDeleteHelper::check_need_create_empty_major_sstable( - create_tablet_schema, need_create_empty_major_sstable))) { - LOG_WARN("failed to check need create sstable", K(ret)); } else if (OB_FAIL(tmp_tablet->init_for_first_time_creation(allocator, ls_id, tablet_id, data_tablet_id, - create_scn, snapshot_version, create_tablet_schema, need_create_empty_major_sstable, freezer))) { + create_scn, snapshot_version, create_tablet_schema, freezer))) { LOG_WARN("failed to init tablet", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), K(create_scn), K(snapshot_version), K(create_tablet_schema)); int tmp_ret = OB_SUCCESS; diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index 48e2c0547..640557166 100644 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -187,6 +187,7 @@ public: const share::SCN &create_scn, const int64_t snapshot_version, const ObCreateTabletSchema &create_tablet_schema, + const lib::Worker::CompatMode &compat_mode, ObTabletHandle &tablet_handle); int create_transfer_in_tablet( const share::ObLSID &ls_id, diff --git a/src/storage/memtable/mvcc/ob_keybtree.cpp b/src/storage/memtable/mvcc/ob_keybtree.cpp index 18abe69da..c6b8ea07a 100644 --- a/src/storage/memtable/mvcc/ob_keybtree.cpp +++ b/src/storage/memtable/mvcc/ob_keybtree.cpp @@ -1233,7 +1233,7 @@ int BtreeIterator::KVQueue::pop(BtreeKV &data) } template -int BtreeIterator::init(ObKeyBtree &btree) +int BtreeIterator::init(const ObKeyBtree &btree) { int ret = OB_SUCCESS; if (OB_NOT_NULL(iter_)) { @@ -1434,7 +1434,7 @@ int BtreeNodeAllocator::pop(BtreeNode*& p) } template -int BtreeRawIterator::init(ObKeyBtree &btree) +int BtreeRawIterator::init(const ObKeyBtree &btree) { int ret = OB_SUCCESS; if (OB_NOT_NULL(iter_)) { @@ -1777,7 +1777,7 @@ int ObKeyBtree::get(const BtreeKey key, BtreeVal &value) template int ObKeyBtree::set_key_range(BtreeIterator &iter, const BtreeKey min_key, const bool start_exclude, - const BtreeKey max_key, const bool end_exclude, int64_t version) + const BtreeKey max_key, const bool end_exclude, int64_t version) const { int ret = OB_SUCCESS; if (OB_FAIL(iter.init(*this))) { @@ -1790,7 +1790,7 @@ int ObKeyBtree::set_key_range(BtreeIterator &iter, const Btr template int ObKeyBtree::set_key_range(BtreeRawIterator &iter, const BtreeKey min_key, const bool start_exclude, - const BtreeKey max_key, const bool end_exclude, int64_t version) + const BtreeKey max_key, const bool end_exclude, int64_t version) const { int ret = OB_SUCCESS; if (OB_FAIL(iter.init(*this))) { diff --git a/src/storage/memtable/mvcc/ob_keybtree.h b/src/storage/memtable/mvcc/ob_keybtree.h index 294432e61..9da00ff4f 100644 --- a/src/storage/memtable/mvcc/ob_keybtree.h +++ b/src/storage/memtable/mvcc/ob_keybtree.h @@ -141,7 +141,7 @@ public: scan_backward_(false), kv_queue_() {} ~BtreeIterator() { reset(); } - int init(ObKeyBtree &btree); + int init(const ObKeyBtree &btree); void reset(); int set_key_range(const BtreeKey min_key, const bool start_exclude, const BtreeKey max_key, const bool end_exclude, int64_t version); @@ -176,7 +176,7 @@ private: public: explicit BtreeRawIterator(): iter_(NULL) {} ~BtreeRawIterator() { reset(); } - int init(ObKeyBtree &btree); + int init(const ObKeyBtree &btree); void reset(); int set_key_range(const BtreeKey min_key, const bool start_exclude, const BtreeKey max_key, const bool end_exclude, int64_t version); @@ -228,9 +228,9 @@ public: int insert(const BtreeKey key, BtreeVal &value); int get(const BtreeKey key, BtreeVal &value); int set_key_range(BtreeIterator &iter, const BtreeKey min_key, const bool start_exclude, - const BtreeKey max_key, const bool end_exclude, int64_t version); + const BtreeKey max_key, const bool end_exclude, int64_t version) const; int set_key_range(BtreeRawIterator &handle, const BtreeKey min_key, const bool start_exclude, - const BtreeKey max_key, bool end_exclude, int64_t version); + const BtreeKey max_key, bool end_exclude, int64_t version) const; BtreeNode *alloc_node(const bool is_emergency); static void free_node(BtreeNode *p); void retire(common::HazardList &retire_list); diff --git a/src/storage/memtable/mvcc/ob_keybtree_deps.h b/src/storage/memtable/mvcc/ob_keybtree_deps.h index 9b7afe685..cc72a133f 100644 --- a/src/storage/memtable/mvcc/ob_keybtree_deps.h +++ b/src/storage/memtable/mvcc/ob_keybtree_deps.h @@ -431,7 +431,7 @@ private: Path path_; int64_t version_; public: - explicit ScanHandle(ObKeyBtree &tree): BaseHandle(tree.get_qclock()), version_(INT64_MAX) { UNUSED(tree); } + explicit ScanHandle(const ObKeyBtree &tree): BaseHandle(tree.get_qclock()), version_(INT64_MAX) { UNUSED(tree); } ~ScanHandle() {} void reset() { @@ -557,7 +557,7 @@ private: typedef CompHelper CompHelper; typedef ScanHandle ScanHandle; public: - explicit Iterator(ObKeyBtree &btree): btree_(btree), scan_handle_(btree), jump_key_(nullptr), + explicit Iterator(const ObKeyBtree &btree): btree_(btree), scan_handle_(btree), jump_key_(nullptr), cmp_result_(0), comp_(scan_handle_.get_comp()), start_key_(), end_key_(), start_exclude_(false), end_exclude_(false), scan_backward_(false), is_iter_end_(false), iter_count_(0) {} @@ -585,7 +585,7 @@ private: const double ratio); int comp(BtreeKey& cur_key, BtreeKey* jump_key, int &cmp); private: - ObKeyBtree &btree_; + const ObKeyBtree &btree_; ScanHandle scan_handle_; BtreeKey* jump_key_; int cmp_result_; diff --git a/src/storage/meta_mem/ob_tablet_pointer.cpp b/src/storage/meta_mem/ob_tablet_pointer.cpp index 4b3655e34..7c1189eaf 100644 --- a/src/storage/meta_mem/ob_tablet_pointer.cpp +++ b/src/storage/meta_mem/ob_tablet_pointer.cpp @@ -415,7 +415,10 @@ void ObTabletPointer::set_initial_state(const bool initial_state) ATOMIC_STORE(&initial_state_, initial_state); } -int ObTabletPointer::create_ddl_kv_mgr(const share::ObLSID &ls_id, const ObTabletID &tablet_id, ObDDLKvMgrHandle &ddl_kv_mgr_handle) +int ObTabletPointer::create_ddl_kv_mgr( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + ObDDLKvMgrHandle &ddl_kv_mgr_handle) { int ret = OB_SUCCESS; ddl_kv_mgr_handle.reset(); diff --git a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp index aa0b34cbd..29a8f5044 100644 --- a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp +++ b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.cpp @@ -226,7 +226,6 @@ void ObTenantMetaMemMgr::init_pool_arr() pool_arr_[static_cast(ObITable::TableType::TX_DATA_MEMTABLE)] = &tx_data_memtable_pool_; pool_arr_[static_cast(ObITable::TableType::TX_CTX_MEMTABLE)] = &tx_ctx_memtable_pool_; pool_arr_[static_cast(ObITable::TableType::LOCK_MEMTABLE)] = &lock_memtable_pool_; - pool_arr_[static_cast(ObITable::TableType::DDL_MEM_SSTABLE)] = &ddl_kv_pool_; } int ObTenantMetaMemMgr::start() @@ -364,7 +363,7 @@ int ObTenantMetaMemMgr::push_table_into_gc_queue(ObITable *table, const ObITable } else if (OB_UNLIKELY(!ObITable::is_table_type_valid(table_type))) { ret = OB_INVALID_ARGUMENT; LOG_ERROR("invalid table key", K(ret), K(table_type), KPC(table)); - } else if (OB_UNLIKELY(ObITable::is_sstable(table_type) && ObITable::DDL_MEM_SSTABLE != table_type)) { + } else if (OB_UNLIKELY(ObITable::is_sstable(table_type))) { ret = OB_INVALID_ARGUMENT; LOG_ERROR("should not recycle sstable", K(ret), K(table_type), KPC(table)); } else if (OB_ISNULL(item = (TableGCItem *)ob_malloc(size, attr))) { @@ -972,7 +971,7 @@ int ObTenantMetaMemMgr::get_min_mds_ckpt_scn(const ObTabletMapKey &key, share::S return ret; } -int ObTenantMetaMemMgr::acquire_ddl_kv(ObTableHandleV2 &handle) +int ObTenantMetaMemMgr::acquire_ddl_kv(ObDDLKVHandle &handle) { int ret = OB_SUCCESS; ObDDLKV *ddl_kv = nullptr; @@ -982,7 +981,7 @@ int ObTenantMetaMemMgr::acquire_ddl_kv(ObTableHandleV2 &handle) LOG_WARN("ObTenantMetaMemMgr hasn't been initialized", K(ret)); } else if (OB_FAIL(ddl_kv_pool_.acquire(ddl_kv))) { LOG_WARN("fail to acquire ddl kv object", K(ret)); - } else if (OB_FAIL(handle.set_table(ddl_kv, this, ObITable::TableType::DDL_MEM_SSTABLE))) { + } else if (OB_FAIL(handle.set_obj(ddl_kv))) { LOG_WARN("fail to set table", K(ret), KP(ddl_kv)); } else { ddl_kv = nullptr; diff --git a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.h b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.h index 6a36c0d9f..5d5c9f2f2 100644 --- a/src/storage/meta_mem/ob_tenant_meta_mem_mgr.h +++ b/src/storage/meta_mem/ob_tenant_meta_mem_mgr.h @@ -202,7 +202,8 @@ public: int gc_tablets_in_queue(bool &all_tablet_cleaned); // trigger to gc tablets // ddl kv interface - int acquire_ddl_kv(ObTableHandleV2 &handle); + int acquire_ddl_kv(ObDDLKVHandle &handle); + void release_ddl_kv(ObDDLKV *ddl_kv); // memtable interfaces int acquire_memtable(ObTableHandleV2 &handle); @@ -451,7 +452,6 @@ private: void *recycle_tablet(ObTablet *tablet, TabletBufferList *header = nullptr); void release_memtable(memtable::ObMemtable *memtable); void release_tablet(ObTablet *tablet); - void release_ddl_kv(ObDDLKV *ddl_kv); void release_tablet_ddl_kv_mgr(ObTabletDDLKvMgr *ddl_kv_mgr); void release_tx_data_memtable_(ObTxDataMemtable *memtable); void release_tx_ctx_memtable_(ObTxCtxMemtable *memtable); diff --git a/src/storage/ob_i_store.cpp b/src/storage/ob_i_store.cpp index f27aa7bff..6a4e38309 100644 --- a/src/storage/ob_i_store.cpp +++ b/src/storage/ob_i_store.cpp @@ -80,6 +80,7 @@ void ObStoreCtx::reset() } int ObStoreCtx::init_for_read(const ObLSID &ls_id, + const common::ObTabletID tablet_id, const int64_t timeout, const int64_t tx_lock_timeout, const SCN &snapshot_version) @@ -90,6 +91,7 @@ int ObStoreCtx::init_for_read(const ObLSID &ls_id, if (OB_FAIL(ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { STORAGE_LOG(WARN, "get_ls from ls service fail.", K(ret), K(*ls_svr)); } else { + tablet_id_ = tablet_id; ret = init_for_read(ls_handle, timeout, tx_lock_timeout, snapshot_version); } return ret; diff --git a/src/storage/ob_i_store.h b/src/storage/ob_i_store.h index 2c692fcc0..8570059ac 100644 --- a/src/storage/ob_i_store.h +++ b/src/storage/ob_i_store.h @@ -416,6 +416,7 @@ struct ObStoreCtx bool is_replay() const { return mvcc_acc_ctx_.is_replay(); } bool is_read_store_ctx() const { return is_read_store_ctx_; } int init_for_read(const share::ObLSID &ls_id, + const common::ObTabletID tablet_id, const int64_t timeout, const int64_t lock_timeout_us, const share::SCN &snapshot_version); diff --git a/src/storage/ob_i_table.cpp b/src/storage/ob_i_table.cpp index 867e4d8d7..950728839 100644 --- a/src/storage/ob_i_table.cpp +++ b/src/storage/ob_i_table.cpp @@ -209,8 +209,6 @@ bool ObTableHandleV2::is_valid() const if (nullptr == table_) { } else if (ObITable::is_memtable(table_type_)) { bret = (nullptr != t3m_) ^ (nullptr != allocator_); - } else if (ObITable::is_ddl_mem_sstable(table_type_)) { - bret = nullptr != t3m_; } else { // all other sstables bret = (meta_handle_.is_valid() ^ (nullptr != allocator_)) || lifetime_guaranteed_by_tablet_; @@ -501,7 +499,7 @@ int ObTableHandleV2::set_table( OB_UNLIKELY(!ObITable::is_table_type_valid(table_type))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument", K(ret), KP(table), KP(t3m), K(table_type)); - } else if (OB_UNLIKELY(ObITable::is_sstable(table_type) && !ObITable::is_ddl_mem_sstable(table_type))) { + } else if (OB_UNLIKELY(ObITable::is_sstable(table_type))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(ERROR, "sstable should not use this interface", K(ret), KP(table), K(table_type)); } else { diff --git a/src/storage/ob_i_table.h b/src/storage/ob_i_table.h index ce07c83ab..e916fe0d4 100644 --- a/src/storage/ob_i_table.h +++ b/src/storage/ob_i_table.h @@ -132,6 +132,7 @@ public: OB_INLINE bool is_minor_sstable() const { return ObITable::is_minor_sstable(table_type_); } OB_INLINE bool is_mini_sstable() const { return ObITable::is_mini_sstable(table_type_); } OB_INLINE bool is_major_sstable() const { return ObITable::is_major_sstable(table_type_) || ObITable::is_meta_major_sstable(table_type_); } + OB_INLINE bool is_major_or_ddl_merge_sstable() const { return is_major_sstable() || ObITable::is_ddl_merge_sstable(table_type_); } OB_INLINE bool is_meta_major_sstable() const { return ObITable::is_meta_major_sstable(table_type_); } OB_INLINE bool is_multi_version_table() const { return ObITable::is_multi_version_table(table_type_); } OB_INLINE bool is_ddl_sstable() const { return ObITable::is_ddl_sstable(table_type_); } @@ -150,7 +151,6 @@ public: OB_INLINE share::SCN get_end_scn() const { return scn_range_.end_scn_; } OB_INLINE int64_t get_snapshot_version() const { - OB_ASSERT(is_major_sstable() || is_meta_major_sstable()); return version_range_.snapshot_version_; } OB_INLINE uint16_t get_column_group_id() const { return column_group_idx_; } @@ -252,6 +252,7 @@ public: virtual bool is_column_store_sstable() const { return is_co_sstable() || is_cg_sstable(); } virtual bool is_meta_major_sstable() const { return is_meta_major_sstable(key_.table_type_); } virtual bool is_major_sstable() const { return is_major_sstable(key_.table_type_) || is_meta_major_sstable(key_.table_type_); } + virtual bool is_major_or_ddl_merge_sstable() const { return is_major_sstable() || is_ddl_merge_sstable(key_.table_type_); } virtual bool is_minor_sstable() const { return is_minor_sstable(key_.table_type_); } virtual bool is_mini_sstable() const { return is_mini_sstable(key_.table_type_); } virtual bool is_multi_version_minor_sstable() const { return is_multi_version_minor_sstable(key_.table_type_); } @@ -269,8 +270,11 @@ public: virtual bool is_ddl_sstable() const { return is_ddl_sstable(key_.table_type_); } virtual bool is_ddl_dump_sstable() const { return is_ddl_dump_sstable(key_.table_type_); } virtual bool is_ddl_mem_sstable() const { return is_ddl_mem_sstable(key_.table_type_); } + virtual bool is_ddl_merge_sstable() const { return is_ddl_merge_sstable(key_.table_type_); } virtual bool is_remote_logical_minor_sstable() const { return is_remote_logical_minor_sstable(key_.table_type_); } virtual bool is_empty() const = 0; + virtual bool no_data_to_read() const { return is_empty(); } + virtual bool is_ddl_merge_empty_sstable() const { return is_empty() && is_ddl_merge_sstable(); } DECLARE_VIRTUAL_TO_STRING; static bool is_sstable(const TableType table_type) @@ -280,8 +284,9 @@ public: static bool is_major_sstable(const TableType table_type) { return ObITable::TableType::MAJOR_SSTABLE == table_type - || ObITable::TableType::COLUMN_ORIENTED_SSTABLE == table_type - || is_cg_sstable(table_type); + || ObITable::TableType::COLUMN_ORIENTED_SSTABLE == table_type + || ObITable::TableType::NORMAL_COLUMN_GROUP_SSTABLE == table_type + || ObITable::TableType::ROWKEY_COLUMN_GROUP_SSTABLE == table_type; } static bool is_minor_sstable(const TableType table_type) { @@ -317,11 +322,14 @@ public: static bool is_co_sstable(const TableType table_type) { return ObITable::TableType::COLUMN_ORIENTED_SSTABLE == table_type - || ObITable::TableType::COLUMN_ORIENTED_META_SSTABLE == table_type; + || ObITable::TableType::COLUMN_ORIENTED_META_SSTABLE == table_type + || ObITable::TableType::DDL_MERGE_CO_SSTABLE == table_type; } static bool is_normal_cg_sstable(const TableType table_type) { - return ObITable::TableType::NORMAL_COLUMN_GROUP_SSTABLE == table_type; + return ObITable::TableType::NORMAL_COLUMN_GROUP_SSTABLE == table_type + || ObITable::TableType::DDL_MERGE_CG_SSTABLE == table_type + || ObITable::TableType::DDL_MEM_CG_SSTABLE == table_type; } static bool is_rowkey_cg_sstable(const TableType table_type) { @@ -335,7 +343,8 @@ public: static bool is_column_store_sstable(const TableType table_type) { return is_co_sstable(table_type) - || is_cg_sstable(table_type); + || is_cg_sstable(table_type) + || ObITable::TableType::DDL_MEM_CO_SSTABLE == table_type; } static bool is_remote_logical_minor_sstable(const TableType table_type) @@ -384,15 +393,28 @@ public: static bool is_ddl_sstable(const TableType table_type) { return ObITable::TableType::DDL_DUMP_SSTABLE == table_type - || ObITable::TableType::DDL_MEM_SSTABLE == table_type; + || ObITable::TableType::DDL_MERGE_CO_SSTABLE == table_type + || ObITable::TableType::DDL_MERGE_CG_SSTABLE == table_type + || ObITable::TableType::DDL_MEM_SSTABLE == table_type + || ObITable::TableType::DDL_MEM_CO_SSTABLE == table_type + || ObITable::TableType::DDL_MEM_CG_SSTABLE == table_type; } static bool is_ddl_dump_sstable(const TableType table_type) { - return ObITable::TableType::DDL_DUMP_SSTABLE == table_type; + return ObITable::TableType::DDL_DUMP_SSTABLE == table_type + || ObITable::TableType::DDL_MERGE_CO_SSTABLE == table_type + || ObITable::TableType::DDL_MERGE_CG_SSTABLE == table_type; } static bool is_ddl_mem_sstable(const TableType table_type) { - return ObITable::TableType::DDL_MEM_SSTABLE == table_type; + return ObITable::TableType::DDL_MEM_SSTABLE == table_type + || ObITable::TableType::DDL_MEM_CO_SSTABLE == table_type + || ObITable::TableType::DDL_MEM_CG_SSTABLE == table_type; + } + static bool is_ddl_merge_sstable(const TableType table_type) + { + return ObITable::TableType::DDL_MERGE_CO_SSTABLE == table_type + || ObITable::TableType::DDL_MERGE_CG_SSTABLE == table_type; } static bool is_table_with_scn_range(const TableType table_type) { diff --git a/src/storage/ob_partition_range_spliter.cpp b/src/storage/ob_partition_range_spliter.cpp index cf1e381aa..c48b44eec 100644 --- a/src/storage/ob_partition_range_spliter.cpp +++ b/src/storage/ob_partition_range_spliter.cpp @@ -1834,6 +1834,7 @@ int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::prepare_store_ctx if (OB_FAIL(scn.convert_for_tx(snapshot))) { STORAGE_LOG(WARN, "convert for tx fail", K(ret), K(ls_id), K(snapshot)); } else if (OB_FAIL(store_ctx_.init_for_read(ls_id, + merge_ctx_.get_tablet_id(), INT64_MAX, -1, scn))) { diff --git a/src/storage/ob_storage_schema.cpp b/src/storage/ob_storage_schema.cpp index dac18dc7e..5c8810685 100644 --- a/src/storage/ob_storage_schema.cpp +++ b/src/storage/ob_storage_schema.cpp @@ -1593,7 +1593,9 @@ int ObCreateTabletSchema::serialize(char *buf, const int64_t buf_len, int64_t &p LST_DO_CODE(OB_UNIS_ENCODE, table_id_, index_status_, - truncate_version_); + truncate_version_, + tenant_data_version_, + need_create_empty_major_); return ret; } @@ -1606,7 +1608,9 @@ int ObCreateTabletSchema::deserialize(common::ObIAllocator &allocator, const cha LST_DO_CODE(OB_UNIS_DECODE, table_id_, index_status_, - truncate_version_); + truncate_version_, + tenant_data_version_, + need_create_empty_major_); } return ret; } @@ -1617,7 +1621,9 @@ int64_t ObCreateTabletSchema::get_serialize_size() const LST_DO_CODE(OB_UNIS_ADD_LEN, table_id_, index_status_, - truncate_version_); + truncate_version_, + tenant_data_version_, + need_create_empty_major_); return len; } @@ -1626,7 +1632,9 @@ int ObCreateTabletSchema::init( const share::schema::ObTableSchema &input_schema, const lib::Worker::CompatMode compat_mode, const bool skip_column_info, - const int64_t compat_version) + const int64_t compat_version, + const uint64_t tenant_data_version, + const bool need_create_empty_major) { int ret = OB_SUCCESS; if (OB_FAIL(ObStorageSchema::init(allocator, input_schema, compat_mode, skip_column_info, compat_version))) { @@ -1635,6 +1643,8 @@ int ObCreateTabletSchema::init( table_id_ = input_schema.get_table_id(); index_status_ = input_schema.get_index_status(); truncate_version_ = input_schema.get_truncate_version(); + tenant_data_version_ = tenant_data_version; + need_create_empty_major_ = need_create_empty_major; } return ret; } @@ -1650,6 +1660,8 @@ int ObCreateTabletSchema::init( table_id_ = old_schema.get_table_id(); index_status_ = old_schema.get_index_status(); truncate_version_ = old_schema.get_truncate_version(); + tenant_data_version_ = old_schema.get_tenant_data_version(); + need_create_empty_major_ = old_schema.get_need_create_empty_major(); } return ret; } diff --git a/src/storage/ob_storage_schema.h b/src/storage/ob_storage_schema.h index adacad70f..bc8322b77 100644 --- a/src/storage/ob_storage_schema.h +++ b/src/storage/ob_storage_schema.h @@ -363,7 +363,9 @@ public: : ObStorageSchema(), table_id_(common::OB_INVALID_ID), index_status_(share::schema::ObIndexStatus::INDEX_STATUS_UNAVAILABLE), - truncate_version_(OB_INVALID_VERSION) + truncate_version_(OB_INVALID_VERSION), + tenant_data_version_(0), + need_create_empty_major_(true) {} int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; @@ -376,6 +378,12 @@ public: { return table_id_; } int64_t get_truncate_version() const { return truncate_version_; } + uint64_t get_tenant_data_version() const + { return tenant_data_version_; } + bool get_need_create_empty_major () const + { return need_create_empty_major_; } + void set_need_create_empty_major(const bool need_create_empty_major) + { need_create_empty_major_ = need_create_empty_major; } bool is_valid() const { return ObStorageSchema::is_valid() && common::OB_INVALID_ID != table_id_; @@ -384,10 +392,13 @@ public: const share::schema::ObTableSchema &input_schema, const lib::Worker::CompatMode compat_mode, const bool skip_column_info, - const int64_t compat_version); + const int64_t compat_version, + const uint64_t tenant_data_version, + const bool need_create_empty_major); int init(common::ObIAllocator &allocator, const ObCreateTabletSchema &old_schema); - INHERIT_TO_STRING_KV("ObStorageSchema", ObStorageSchema, K_(table_id), K_(index_status), K_(truncate_version)); + INHERIT_TO_STRING_KV("ObStorageSchema", ObStorageSchema, K_(table_id), K_(index_status), K_(truncate_version), + K_(tenant_data_version), K_(need_create_empty_major)); private: // for cdc uint64_t table_id_; @@ -395,6 +406,8 @@ private: share::schema::ObIndexStatus index_status_; // for tablet throttling int64_t truncate_version_; + uint64_t tenant_data_version_; + bool need_create_empty_major_; }; template diff --git a/src/storage/tablelock/ob_lock_table.cpp b/src/storage/tablelock/ob_lock_table.cpp index 840305d55..a0ebbc049 100644 --- a/src/storage/tablelock/ob_lock_table.cpp +++ b/src/storage/tablelock/ob_lock_table.cpp @@ -324,13 +324,17 @@ int ObLockTable::create_tablet(const lib::Worker::CompatMode compat_mode, const ObMemtableMgrHandle memtable_mgr_handle; ObArenaAllocator arena_allocator; ObCreateTabletSchema create_tablet_schema; + uint64_t tenant_data_version = 0; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObLockTable not inited", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get data version failed", K(ret)); } else if (OB_FAIL(get_table_schema_(tenant_id, table_schema))) { LOG_WARN("get lock table schema failed", K(ret)); } else if (OB_FAIL(create_tablet_schema.init(arena_allocator, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + tenant_data_version, true/*need_create_empty_major*/))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(parent_->create_ls_inner_tablet(ls_id, LS_LOCK_TABLET, diff --git a/src/storage/tablet/ob_table_store_util.cpp b/src/storage/tablet/ob_table_store_util.cpp index d8d623669..90ec2161d 100644 --- a/src/storage/tablet/ob_table_store_util.cpp +++ b/src/storage/tablet/ob_table_store_util.cpp @@ -24,6 +24,7 @@ #include "storage/tablet/ob_tablet_table_store.h" #include "observer/ob_server_struct.h" #include "storage/column_store/ob_column_oriented_sstable.h" +#include "storage/ddl/ob_tablet_ddl_kv.h" #include "share/scn.h" @@ -197,7 +198,6 @@ int ObSSTableArray::add_tables_for_cg( return ret; } - int ObSSTableArray::inner_init( ObArenaAllocator &allocator, const ObIArray &tables, @@ -1019,7 +1019,7 @@ bool ObMemtableArray::exist_memtable_with_end_scn(const ObITable *table, const S int ObDDLKVArray::init( ObArenaAllocator &allocator, - common::ObIArray &ddl_kvs) + common::ObIArray &ddl_kvs) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { @@ -1029,14 +1029,14 @@ int ObDDLKVArray::init( count_ = 0; ddl_kvs_ = nullptr; if (0 != ddl_kvs.count()) { - const int64_t size = sizeof(ObITable *) * ddl_kvs.count(); - if (OB_ISNULL(ddl_kvs_ = static_cast(allocator.alloc(size)))) { + const int64_t size = sizeof(ObDDLKV *) * ddl_kvs.count(); + if (OB_ISNULL(ddl_kvs_ = static_cast(allocator.alloc(size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate ddl kv pointer arrays", K(ret), K(size)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kvs.count(); ++i) { - ObITable *table = ddl_kvs.at(i); - if (OB_UNLIKELY(nullptr == table || !table->is_ddl_sstable())) { + ObDDLKV *table = ddl_kvs.at(i); + if (OB_UNLIKELY(nullptr == table)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("table must be ddl kv", K(ret), K(i), KPC(table)); } else { @@ -1069,8 +1069,8 @@ int ObDDLKVArray::deep_copy( ret = OB_INVALID_ARGUMENT; LOG_WARN("finvalid argument", K(ret), KP(dst_buf), K(buf_size), K(pos), K(deep_copy_size), K(count_)); } else { - dst.ddl_kvs_ = 0 == count_ ? nullptr : reinterpret_cast(dst_buf + pos); - const int64_t array_size = count_ * sizeof(ObITable *); + dst.ddl_kvs_ = 0 == count_ ? nullptr : reinterpret_cast(dst_buf + pos); + const int64_t array_size = count_ * sizeof(ObDDLKV *); pos += array_size; for (int64_t i = 0; i < count_; ++i) { dst.ddl_kvs_[i] = ddl_kvs_[i]; @@ -1081,6 +1081,21 @@ int ObDDLKVArray::deep_copy( return ret; } +int64_t ObDDLKVArray::to_string(char *buf, const int64_t buf_len) const +{ + int64_t pos = 0; + if (OB_ISNULL(buf) || buf_len <= 0) { + // do nothing + } else { + J_OBJ_START(); + J_NAME("ObDDLKVArray"); + J_KV(KP(this), + K_(count), + "ddl_kv_ptr_array", ObArrayWrap(ddl_kvs_, count_)); + J_OBJ_END(); + } + return pos; +} /* ObTableStoreUtil Section */ bool ObTableStoreUtil::ObITableLogTsRangeCompare::operator()( @@ -1105,6 +1120,17 @@ bool ObTableStoreUtil::ObITableSnapshotVersionCompare::operator()( return bret; } +bool ObTableStoreUtil::ObITableEndScnCompare::operator()( + const ObITable *ltable, const ObITable *rtable) const +{ + bool bret = false; + if (OB_SUCCESS != result_code_) { + } else if (OB_SUCCESS != (result_code_ = compare_table_by_end_scn(ltable, rtable, bret))) { + LOG_WARN_RET(result_code_, "failed to compare table with end scn", K(result_code_), KPC(ltable), KPC(rtable)); + } + return bret; +} + bool ObTableStoreUtil::ObTableHandleV2LogTsRangeCompare::operator()( const ObTableHandleV2 &lhandle, const ObTableHandleV2 &rhandle) const { @@ -1206,6 +1232,26 @@ int ObTableStoreUtil::compare_table_by_snapshot_version(const ObITable *ltable, return ret; } +int ObTableStoreUtil::compare_table_by_end_scn(const ObITable *ltable, const ObITable *rtable, bool &bret) +{ + int ret = OB_SUCCESS; + bret = false; + if (OB_ISNULL(ltable)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("left store must not null", K(ret)); + } else if (OB_ISNULL(rtable)) { + bret = true; + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("right store must not null", K(ret)); + } else { + bret = ltable->get_end_scn() < rtable->get_end_scn(); + if (ltable->get_end_scn() == rtable->get_end_scn()) { + bret = ltable->get_column_group_id() < rtable->get_column_group_id(); + } + } + return ret; +} + int ObTableStoreUtil::sort_major_tables(ObSEArray &tables) { int ret = OB_SUCCESS; @@ -1222,6 +1268,22 @@ int ObTableStoreUtil::sort_major_tables(ObSEArray &tables) +{ + int ret = OB_SUCCESS; + + if (tables.empty()) { + // no need sort + } else { + ObITableEndScnCompare comp(ret); + std::sort(tables.begin(), tables.end(), comp); + if (OB_FAIL(ret)) { + LOG_ERROR("failed to sort tables", K(ret), K(tables)); + } + } + return ret; +} + int ObTableStoreUtil::sort_minor_tables(ObArray &tables) { int ret = OB_SUCCESS; diff --git a/src/storage/tablet/ob_table_store_util.h b/src/storage/tablet/ob_table_store_util.h index b4de3cd55..9dcfdcf70 100644 --- a/src/storage/tablet/ob_table_store_util.h +++ b/src/storage/tablet/ob_table_store_util.h @@ -83,7 +83,7 @@ public: } OB_INLINE int64_t count() const { return cnt_; } OB_INLINE bool empty() const { return 0 == cnt_; } - TO_STRING_KV(K_(cnt), K_(serialize_table_type), K_(is_inited)); + TO_STRING_KV(K_(cnt), KP_(sstable_array), K_(serialize_table_type), K_(is_inited)); private: int inc_meta_ref_cnt(bool &inc_success) const; int inc_data_ref_cnt(bool &inc_success) const; @@ -147,6 +147,7 @@ private: DISALLOW_COPY_AND_ASSIGN(ObMemtableArray); }; +class ObDDLKV; class ObDDLKVArray final { public: @@ -155,9 +156,9 @@ public: ObDDLKVArray() : is_inited_(false), ddl_kvs_(nullptr), count_(0) {} ~ObDDLKVArray() { reset(); } - OB_INLINE ObITable *operator[](const int64_t pos) const + OB_INLINE ObDDLKV *operator[](const int64_t pos) const { - ObITable *ddl_kv = nullptr; + ObDDLKV *ddl_kv = nullptr; if (OB_UNLIKELY(!is_valid() || pos < 0 || pos >= count_)) { ddl_kv = nullptr; } else { @@ -174,13 +175,13 @@ public: OB_INLINE int64_t count() const { return count_; } OB_INLINE bool empty() const { return 0 == count_; } OB_INLINE bool is_valid() const { return 1 == count_ || (is_inited_ && count_ > 1 && nullptr != ddl_kvs_); } - OB_INLINE int64_t get_deep_copy_size() const { return count_ * sizeof(ObITable *); } - int init(ObArenaAllocator &allocator, common::ObIArray &ddl_kvs); + OB_INLINE int64_t get_deep_copy_size() const { return count_ * sizeof(ObDDLKV *); } + int init(ObArenaAllocator &allocator, common::ObIArray &ddl_kvs); int deep_copy(char *buf, const int64_t buf_size, int64_t &pos, ObDDLKVArray &dst) const; - TO_STRING_KV(K_(count), K_(is_inited)); + int64_t to_string(char *buf, const int64_t buf_len) const; private: bool is_inited_; - ObITable **ddl_kvs_; + ObDDLKV **ddl_kvs_; int64_t count_; private: DISALLOW_COPY_AND_ASSIGN(ObDDLKVArray); @@ -204,6 +205,14 @@ struct ObTableStoreUtil int &result_code_; }; + struct ObITableEndScnCompare { + explicit ObITableEndScnCompare(int &sort_ret) + : result_code_(sort_ret) {} + bool operator()(const ObITable *ltable, const ObITable *rtable) const; + + int &result_code_; + }; + struct ObTableHandleV2LogTsRangeCompare { explicit ObTableHandleV2LogTsRangeCompare(int &sort_ret) : result_code_(sort_ret) {} @@ -230,10 +239,12 @@ struct ObTableStoreUtil static int compare_table_by_scn_range(const ObITable *ltable, const ObITable *rtable, const bool is_ascend, bool &bret); static int compare_table_by_snapshot_version(const ObITable *ltable, const ObITable *rtable, bool &bret); + static int compare_table_by_end_scn(const ObITable *ltable, const ObITable *rtable, bool &bret); static int sort_minor_tables(ObArray &tables); static int reverse_sort_minor_table_handles(ObArray &table_handles); static int sort_major_tables(ObSEArray &tables); + static int sort_column_store_tables(ObSEArray &tables); static bool check_include_by_scn_range(const ObITable <able, const ObITable &rtable); static bool check_intersect_by_scn_range(const ObITable <able, const ObITable &rtable); diff --git a/src/storage/tablet/ob_tablet.cpp b/src/storage/tablet/ob_tablet.cpp index 1666baaef..68fe02de3 100644 --- a/src/storage/tablet/ob_tablet.cpp +++ b/src/storage/tablet/ob_tablet.cpp @@ -57,6 +57,7 @@ #include "storage/ddl/ob_ddl_struct.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" #include "storage/ddl/ob_tablet_ddl_kv.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/ls/ob_ls_tablet_service.h" #include "storage/tablet/ob_tablet_common.h" #include "storage/tablet/ob_tablet_obj_load_helper.h" @@ -286,21 +287,18 @@ int ObTablet::init_for_first_time_creation( const common::ObTabletID &data_tablet_id, const share::SCN &create_scn, const int64_t snapshot_version, - const ObStorageSchema &storage_schema, - const bool need_empty_major_table, + const ObCreateTabletSchema &storage_schema, ObFreezer *freezer) { int ret = OB_SUCCESS; const lib::Worker::CompatMode compat_mode = storage_schema.get_compat_mode(); + const bool need_create_empty_major_sstable = storage_schema.get_need_create_empty_major(); const int64_t default_max_sync_medium_scn = 0; ObTableHandleV2 table_handle; + bool is_table_row_store = false; ObTabletTableStoreFlag table_store_flag; - if (need_empty_major_table) { - table_store_flag.set_with_major_sstable(); - } else { - table_store_flag.set_without_major_sstable(); - } - ObITable **ddl_kvs_addr = nullptr; + table_store_flag.set_with_major_sstable(); + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObLinkedMacroBlockItemWriter linked_writer; @@ -322,6 +320,7 @@ int ObTablet::init_for_first_time_creation( || OB_ISNULL(log_handler_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(log_handler)); + } else if (!need_create_empty_major_sstable && FALSE_IT(table_store_flag.set_without_major_sstable())) { } else if (OB_FAIL(init_shared_params(ls_id, tablet_id, storage_schema.get_schema_version(), default_max_sync_medium_scn, compat_mode, freezer))) { LOG_WARN("failed to init shared params", K(ret), K(ls_id), K(tablet_id), K(compat_mode), KP(freezer)); @@ -341,7 +340,7 @@ int ObTablet::init_for_first_time_creation( } } if (OB_FAIL(ret)) { - } else if (need_empty_major_table + } else if (need_create_empty_major_sstable && OB_FAIL(ObTabletCreateDeleteHelper::create_empty_sstable( allocator, *storage_schema_addr_.get_ptr(), tablet_id, snapshot_version, table_handle))) { LOG_WARN("failed to make empty co sstable", K(ret), K(snapshot_version)); @@ -394,7 +393,7 @@ int ObTablet::init_for_merge( const ObTabletMdsData &old_mds_data = old_tablet.mds_data_; const bool update_in_major_type_merge = param.need_report_ && param.sstable_->is_major_sstable(); int64_t finish_medium_scn = 0; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObLinkedMacroBlockItemWriter linked_writer; @@ -452,8 +451,6 @@ int ObTablet::init_for_merge( LOG_WARN("failed to init table store cache", K(ret), KPC(this)); } else if (OB_FAIL(try_update_start_scn())) { LOG_WARN("failed to update start scn", K(ret), K(param), K(table_store_addr_)); - } else if (OB_FAIL(try_update_ddl_checkpoint_scn())) { - LOG_WARN("failed to update clog checkpoint ts", K(ret), K(param), K(table_store_addr_)); } else if (OB_FAIL(try_update_table_store_flag(param))) { LOG_WARN("failed to update table store flag", K(ret), K(param), K(table_store_addr_)); } else if (OB_FAIL(get_finish_medium_scn(finish_medium_scn))) { @@ -515,7 +512,7 @@ int ObTablet::init_for_mds_table_dump( const ObTabletTableStore *old_table_store = nullptr; ObStorageSchema *old_storage_schema = nullptr; int64_t finish_medium_scn = 0; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObLinkedMacroBlockItemWriter linked_writer; @@ -587,7 +584,7 @@ int ObTablet::init_with_migrate_param( const share::ObLSID &ls_id = param.ls_id_; const common::ObTabletID &tablet_id = param.tablet_id_; allocator_ = &allocator; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObLinkedMacroBlockItemWriter linked_writer; @@ -687,7 +684,7 @@ int ObTablet::init_for_defragment( ObStorageSchema *old_storage_schema = nullptr; const ObTabletMdsData &old_mds_data = old_tablet.mds_data_; allocator_ = &allocator; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObLinkedMacroBlockItemWriter linked_writer; @@ -799,7 +796,7 @@ int ObTablet::init_for_sstable_replace( ObStorageSchema *old_storage_schema = nullptr; const ObStorageSchema *storage_schema = nullptr; int64_t finish_medium_scn = 0; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObLinkedMacroBlockItemWriter linked_writer; @@ -1074,7 +1071,7 @@ int ObTablet::init_with_update_medium_info( LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(pointer_hdl), K_(log_handler)); } else if (OB_FAIL(assign_memtables(old_tablet.memtables_, old_tablet.memtable_count_))) { LOG_WARN("fail to assign memtables", K(ret)); - } else if (OB_ISNULL(ddl_kvs_ = static_cast(allocator.alloc(sizeof(ObITable*) * DDL_KV_ARRAY_SIZE)))) { + } else if (OB_ISNULL(ddl_kvs_ = static_cast(allocator.alloc(sizeof(ObDDLKV *) * DDL_KV_ARRAY_SIZE)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory for ddl_kvs_", K(ret), KP(ddl_kvs_)); } else if (OB_FAIL(assign_ddl_kvs(old_tablet.ddl_kvs_, old_tablet.ddl_kv_count_))) { @@ -1652,7 +1649,7 @@ int ObTablet::load_deserialize_v1( ObTabletTxMultiSourceDataUnit tx_data; ObTabletBindingInfo ddl_data; ObMediumCompactionInfoList info_list; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; int64_t new_pos = pos; macro_info_addr_.addr_.set_none_addr(); @@ -1813,7 +1810,7 @@ int ObTablet::load_deserialize_v2( const bool prepare_memtable) { int ret = OB_SUCCESS; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; int64_t new_pos = pos; macro_info_addr_.addr_.set_none_addr(); @@ -1919,7 +1916,7 @@ int ObTablet::deserialize( int64_t remain = buf_header.buf_len_ - sizeof(ObTablet); int64_t start_pos = sizeof(ObTablet); ObArenaAllocator allocator; - ObITable **ddl_kvs_addr = nullptr; + ObDDLKV **ddl_kvs_addr = nullptr; int64_t ddl_kv_count = 0; ObTabletBlockHeader header; int32_t version = 0; @@ -1987,13 +1984,13 @@ int ObTablet::deserialize( // tiny tablet needs to deep copy `ddl_kvs_addr` array to `tablet_buf + start_pos`, and CANNOT additionally // inc ref count. Cause `pull_memtables` already done this. if (OB_NOT_NULL(ddl_kvs_addr)) { - const int64_t ddl_kv_size = sizeof(ObITable*) * DDL_KV_ARRAY_SIZE; + const int64_t ddl_kv_size = sizeof(ObDDLKV *) * DDL_KV_ARRAY_SIZE; if (remain < ddl_kv_size) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to deep copy ddl kv to tablet", K(ret), K(remain), K(ddl_kv_size), K(ddl_kv_count)); } else { ddl_kv_count_ = ddl_kv_count; - ddl_kvs_ = reinterpret_cast(tablet_buf + start_pos); + ddl_kvs_ = reinterpret_cast(tablet_buf + start_pos); if (OB_ISNULL(MEMCPY(ddl_kvs_, ddl_kvs_addr, ddl_kv_size))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to memcpy ddl_kvs", K(ret), KP(ddl_kvs_), KP(ddl_kvs_addr), K(ddl_kv_count_)); @@ -3484,15 +3481,15 @@ int ObTablet::get_read_major_sstable( return ret; } -int ObTablet::get_ddl_memtables(common::ObIArray &ddl_memtables) const +int ObTablet::get_ddl_kvs(common::ObIArray &ddl_kvs) const { int ret = OB_SUCCESS; - ddl_memtables.reset(); + ddl_kvs.reset(); for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kv_count_; ++i) { if (OB_ISNULL(ddl_kvs_[i])) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null ddl mem table", K(ret), K(i), KPC(this)); - } else if (OB_FAIL(ddl_memtables.push_back(ddl_kvs_[i]))) { + } else if (OB_FAIL(ddl_kvs.push_back(ddl_kvs_[i]))) { LOG_WARN("failed to push back ddl memtables", K(ret)); } } @@ -4661,34 +4658,6 @@ int ObTablet::try_update_start_scn() return ret; } -int ObTablet::try_update_ddl_checkpoint_scn() -{ - int ret = OB_SUCCESS; - ObTabletMemberWrapper table_store_wrapper; - ObTableStoreIterator iter; - if (OB_FAIL(fetch_table_store(table_store_wrapper))) { - LOG_WARN("fail to fetch table store", K(ret)); - } else { - ObSSTable *last_ddl_sstable = static_cast( - table_store_wrapper.get_member()->get_ddl_sstables().get_boundary_table(true/*last*/)); - if (OB_NOT_NULL(last_ddl_sstable)) { - const SCN &ddl_checkpoint_scn = last_ddl_sstable->get_end_scn(); - if (OB_UNLIKELY(ddl_checkpoint_scn < tablet_meta_.ddl_checkpoint_scn_)) { - if (ddl_checkpoint_scn < tablet_meta_.ddl_start_scn_) { - ret = OB_TASK_EXPIRED; - LOG_INFO("ddl checkpoint scn is less than ddl start log ts, task expired", K(ret), K(ddl_checkpoint_scn), K(tablet_meta_)); - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected clog checkpoint scn", K(ret), K(ddl_checkpoint_scn), K(tablet_meta_)); - } - } else { - tablet_meta_.ddl_checkpoint_scn_ = ddl_checkpoint_scn; - } - } - } - return ret; -} - int ObTablet::try_update_table_store_flag(const ObUpdateTableStoreParam ¶m) { int ret = OB_SUCCESS; @@ -5077,40 +5046,62 @@ int ObTablet::update_tablet_autoinc_seq(const uint64_t autoinc_seq) return ret; } -int ObTablet::start_ddl_if_need() +int ObTablet::start_direct_load_task_if_need() { int ret = OB_SUCCESS; - ObDDLKvMgrHandle ddl_kv_mgr_handle; - if (!tablet_meta_.ddl_start_scn_.is_valid_and_not_min()) { - LOG_DEBUG("no need to start ddl kv manager", K(ret), K(tablet_meta_)); - } else if (OB_FAIL(get_ddl_kv_mgr(ddl_kv_mgr_handle, true/*try_create*/))) { - LOG_WARN("create ddl kv mgr failed", K(ret)); - } else { - ObLS *ls = nullptr; - ObLSService *ls_service = nullptr; + ObTabletMemberWrapper table_store_wrapper; + ObTenantDirectLoadMgr *tenant_direct_load_mgr = MTL(ObTenantDirectLoadMgr *); + ObTabletDirectLoadMgrHandle direct_load_mgr_handle; + if (is_empty_shell()) { + LOG_DEBUG("this tablet is empty shell, skip", K(ret), K(tablet_meta_)); + } else if (OB_ISNULL(tenant_direct_load_mgr)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys", K(ret), K(MTL_ID())); + } else if (!tablet_meta_.ddl_start_scn_.is_valid_and_not_min()) { + LOG_DEBUG("no need to reload tablet direct load task", K(ret), K(tablet_meta_)); + int tmp_ret = OB_SUCCESS; + ObTabletDirectLoadMgrKey direct_load_mgr_key(tablet_meta_.tablet_id_, true/* is_full_direct_load */); + ObSEArray tablet_ids; ObLSHandle ls_handle; - ObITable::TableKey table_key; - table_key.table_type_ = ObITable::TableType::MAJOR_SSTABLE; - table_key.tablet_id_ = tablet_meta_.tablet_id_; - table_key.version_range_.base_version_ = 0; - table_key.version_range_.snapshot_version_ = tablet_meta_.ddl_snapshot_version_; - const SCN &start_scn = tablet_meta_.ddl_start_scn_; - if (OB_ISNULL(ls_service = MTL(ObLSService*))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get ObLSService from MTL", K(ret), KP(ls_service)); - } else if (OB_FAIL(ls_service->get_ls(tablet_meta_.ls_id_, ls_handle, ObLSGetMod::TABLET_MOD))) { - LOG_WARN("failed to get ls", K(ret)); - } else if (OB_ISNULL(ls = ls_handle.get_ls())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("ls should not be NULL", K(ret), KP(ls)); - } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->ddl_start(*ls, - *this, - table_key, - start_scn, - tablet_meta_.ddl_data_format_version_, - tablet_meta_.ddl_execution_id_, - tablet_meta_.ddl_checkpoint_scn_))) { - LOG_WARN("start ddl kv manager failed", K(ret), K(table_key), K(tablet_meta_)); + if (OB_TMP_FAIL(tenant_direct_load_mgr->remove_tablet_direct_load(direct_load_mgr_key))) { + if (OB_ENTRY_NOT_EXIST != tmp_ret) { + LOG_WARN("get tablet mgr failed", K(tmp_ret), K(tablet_meta_)); + } + } else if (OB_TMP_FAIL(MTL(ObLSService *)->get_ls(tablet_meta_.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get ls", K(tmp_ret), "ls_id", tablet_meta_.ls_id_); + } else if (OB_ISNULL(ls_handle.get_ls()->get_ddl_log_handler())) { + tmp_ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl log handler is null", K(tmp_ret), K(tablet_meta_)); + } else if (OB_TMP_FAIL(tablet_ids.push_back(tablet_meta_.tablet_id_))) { + LOG_WARN("push back tablet id failed", K(tmp_ret), K(tablet_meta_)); + } else if (OB_TMP_FAIL(ls_handle.get_ls()->get_ddl_log_handler()->del_tablets(tablet_ids))) { + LOG_WARN("delete tablet from ddl log handler failed", K(tmp_ret), K(tablet_meta_)); + } + } else if (OB_FAIL(fetch_table_store(table_store_wrapper))) { + LOG_WARN("fail to fetch table store", K(ret)); + } else if (nullptr != table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)) { + // the major sstable has already existed. + LOG_DEBUG("no need to reload tablet direct load task", K(ret), K(tablet_meta_)); + } else { + const int64_t unused_context_id = -1; + ObTabletDirectLoadInsertParam direct_load_param; + direct_load_param.is_replay_ = true; + direct_load_param.common_param_.ls_id_ = tablet_meta_.ls_id_; + direct_load_param.common_param_.tablet_id_ = tablet_meta_.tablet_id_; + direct_load_param.common_param_.data_format_version_ = tablet_meta_.ddl_data_format_version_; + direct_load_param.common_param_.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_DDL; + direct_load_param.common_param_.read_snapshot_ = tablet_meta_.ddl_snapshot_version_; + + if (OB_FAIL(tenant_direct_load_mgr->create_tablet_direct_load( + unused_context_id, tablet_meta_.ddl_execution_id_, direct_load_param, tablet_meta_.ddl_checkpoint_scn_))) { + LOG_WARN("create tablet manager failed", K(ret)); + } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr( + tablet_meta_.tablet_id_, true/* is_full_direct_load */, direct_load_mgr_handle))) { + LOG_WARN("get tablet mgr failed", K(ret), K(tablet_meta_)); + } else if (OB_FAIL(direct_load_mgr_handle.get_full_obj()->start_with_checkpoint( + *this, tablet_meta_.ddl_start_scn_, tablet_meta_.ddl_data_format_version_, + tablet_meta_.ddl_execution_id_, tablet_meta_.ddl_checkpoint_scn_))) { + LOG_WARN("direct load start failed", K(ret)); } } return ret; @@ -6023,7 +6014,7 @@ int ObTablet::refresh_memtable_and_update_seq(const uint64_t seq) return ret; } -int ObTablet::pull_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kvs_addr, int64_t &ddl_kv_count) +int ObTablet::pull_memtables(ObArenaAllocator &allocator, ObDDLKV **&ddl_kvs_addr, int64_t &ddl_kv_count) { int ret = OB_SUCCESS; if (OB_FAIL(pull_memtables_without_ddl())) { @@ -6435,7 +6426,7 @@ int ObTablet::assign_memtables(memtable::ObIMemtable * const * memtables, const return ret; } -int ObTablet::assign_ddl_kvs(ObITable * const *ddl_kvs, const int64_t ddl_kv_count) +int ObTablet::assign_ddl_kvs(ObDDLKV * const *ddl_kvs, const int64_t ddl_kv_count) { int ret = OB_SUCCESS; @@ -6447,7 +6438,7 @@ int ObTablet::assign_ddl_kvs(ObITable * const *ddl_kvs, const int64_t ddl_kv_cou } else { // deep copy ddl_kvs to tablet.ddl_kvs_ and inc ref for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kv_count; ++i) { - ObITable *ddl_kv = ddl_kvs[i]; + ObDDLKV *ddl_kv = ddl_kvs[i]; if (OB_ISNULL(ddl_kv)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected ddl_kvs", K(ret), K(i), KP(ddl_kvs)); @@ -6527,14 +6518,14 @@ int ObTablet::get_mds_table_handle_(mds::MdsTableHandle &handle, return ret; } -int ObTablet::pull_ddl_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kvs_addr, int64_t &ddl_kv_count) +int ObTablet::pull_ddl_memtables(ObArenaAllocator &allocator, ObDDLKV **&ddl_kvs_addr, int64_t &ddl_kv_count) { int ret = OB_SUCCESS; ObArray ddl_memtables; ObDDLKvMgrHandle kv_mgr_handle; bool has_ddl_kv = false; - ObTablesHandleArray ddl_kvs_handle; - if (OB_UNLIKELY(0 != ddl_kv_count)) { + ObArray ddl_kvs_handle; + if (OB_UNLIKELY(0 != ddl_kv_count_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected ddl kv count when pull ddl memtables", K(ret), K(ddl_kv_count), KPC(this)); } else if (OB_FAIL(get_ddl_kv_mgr(kv_mgr_handle))) { @@ -6548,19 +6539,19 @@ int ObTablet::pull_ddl_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kv LOG_WARN("failed to get all ddl freeze kvs", K(ret)); } else { ObITable *temp_ddl_kvs; - if (ddl_kvs_handle.get_count() > 0) { - ddl_kvs_addr = static_cast(allocator.alloc(sizeof(ObITable*) * DDL_KV_ARRAY_SIZE)); + if (ddl_kvs_handle.count() > 0) { + ddl_kvs_addr = static_cast(allocator.alloc(sizeof(ObDDLKV *) * DDL_KV_ARRAY_SIZE)); if (OB_ISNULL(ddl_kvs_addr)) { ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to allocate memory for ddl_kvs_addr", K(ret), K(ddl_kvs_handle.get_count())); + LOG_WARN("failed to allocate memory for ddl_kvs_addr", K(ret), K(ddl_kvs_handle.count())); } } SCN ddl_checkpoint_scn = get_tablet_meta().ddl_checkpoint_scn_; - for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kvs_handle.get_count(); ++i) { - ObDDLKV *ddl_kv = static_cast(ddl_kvs_handle.get_table(i)); + for (int64_t i = 0; OB_SUCC(ret) && i < ddl_kvs_handle.count(); ++i) { + ObDDLKV *ddl_kv = ddl_kvs_handle.at(i).get_obj(); if (OB_ISNULL(ddl_kv)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("get ddl kv failed", K(ret), K(i)); + LOG_WARN("get ddl kv failed", K(ret), KP(ddl_kv)); } else if (ddl_kv->is_closed()) { // skip, because closed meanns ddl dump sstable created } else if (ddl_kv->get_freeze_scn() > ddl_checkpoint_scn) { @@ -6577,7 +6568,7 @@ int ObTablet::pull_ddl_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kv } } LOG_INFO("pull ddl memtables", K(ret), K(ddl_kvs_handle), K(ddl_checkpoint_scn), - K(ddl_kv_count), "ddl_kv", ObArrayWrap(ddl_kvs_addr, ddl_kv_count)); + K(ddl_kv_count), "ddl_kv", ObArrayWrap(ddl_kvs_addr, ddl_kv_count)); } if (ddl_kv_count == 0) { // In the above for loop, ddl_kvs_addr's assignment can be skipped (e.g. ddl_kv->is_closed()). @@ -6592,16 +6583,9 @@ int ObTablet::pull_ddl_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kv void ObTablet::reset_ddl_memtables() { - ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); - for(int i = 0; i < ddl_kv_count_; ++i) { - ObITable *ddl_kv = ddl_kvs_[i]; - if (OB_NOT_NULL(ddl_kv)) { - const ObITable::TableType table_type = ddl_kv->get_key().table_type_; - const int64_t ref_cnt = ddl_kv->dec_ref(); - if (0 == ref_cnt) { - t3m->push_table_into_gc_queue(ddl_kv, table_type); - } - } + for(int64_t i = 0; i < ddl_kv_count_; ++i) { + ObDDLKV *ddl_kv = ddl_kvs_[i]; + ddl_kv->dec_ref(); ddl_kvs_[i] = nullptr; } ddl_kvs_ = nullptr; diff --git a/src/storage/tablet/ob_tablet.h b/src/storage/tablet/ob_tablet.h index a75bd21b8..f15af731d 100644 --- a/src/storage/tablet/ob_tablet.h +++ b/src/storage/tablet/ob_tablet.h @@ -159,7 +159,6 @@ public: bool is_ls_inner_tablet() const; bool is_ls_tx_data_tablet() const; bool is_ls_tx_ctx_tablet() const; - bool is_data_tablet() const; void update_wash_score(const int64_t score); void inc_ref(); int64_t dec_ref(); @@ -175,6 +174,9 @@ public: inline int64_t get_last_major_column_count() const { return table_store_cache_.last_major_column_count_; } inline common::ObCompressorType get_last_major_compressor_type() const { return table_store_cache_.last_major_compressor_type_; } inline common::ObRowStoreType get_last_major_latest_row_store_type() const { return table_store_cache_.last_major_latest_row_store_type_; } + inline share::ObLSID get_ls_id() const { return tablet_meta_.ls_id_; } + inline common::ObTabletID get_tablet_id() const { return tablet_meta_.tablet_id_; } + inline common::ObTabletID get_data_tablet_id() const { return tablet_meta_.data_tablet_id_; } inline bool is_row_store() const { return table_store_cache_.is_row_store_; } int get_mds_table_rec_log_scn(share::SCN &rec_scn); int mds_table_flush(const share::SCN &decided_scn); @@ -188,8 +190,7 @@ public: const common::ObTabletID &data_tablet_id, const share::SCN &create_scn, const int64_t snapshot_version, - const ObStorageSchema &storage_schema, - const bool need_empty_major_table, + const ObCreateTabletSchema &storage_schema, ObFreezer *freezer); // dump/merge build new multi version tablet int init_for_merge( @@ -367,7 +368,7 @@ public: int get_all_sstables(ObTableStoreIterator &iter, const bool need_unpack = false) const; int get_tablet_size(const bool ignore_shared_block, int64_t &meta_size, int64_t &data_size); int get_memtables(common::ObIArray &memtables, const bool need_active = false) const; - int get_ddl_memtables(common::ObIArray &ddl_memtables) const; + int get_ddl_kvs(common::ObIArray &ddl_kvs) const; int check_need_remove_old_table(const int64_t multi_version_start, bool &need_remove) const; int update_upper_trans_version(ObLS &ls, bool &is_updated); @@ -450,7 +451,7 @@ public: int get_ddl_kv_mgr(ObDDLKvMgrHandle &ddl_kv_mgr_handle, bool try_create = false); int set_ddl_kv_mgr(const ObDDLKvMgrHandle &ddl_kv_mgr_handle); int remove_ddl_kv_mgr(const ObDDLKvMgrHandle &ddl_kv_mgr_handle); - int start_ddl_if_need(); + int start_direct_load_task_if_need(); int get_ddl_sstables(ObTableStoreIterator &table_store_iter) const; int get_mini_minor_sstables(ObTableStoreIterator &table_store_iter) const; int get_table(const ObITable::TableKey &table_key, ObTableHandleV2 &handle) const; @@ -801,7 +802,7 @@ private: #endif // memtable operation - int pull_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kvs_addr, int64_t &ddl_kv_count); + int pull_memtables(ObArenaAllocator &allocator, ObDDLKV **&ddl_kvs_addr, int64_t &ddl_kv_count); int pull_memtables_without_ddl(); int update_memtables(); int build_memtable(common::ObIArray &handle_array, const int64_t start_pos = 0); @@ -812,9 +813,9 @@ private: int add_memtable(memtable::ObMemtable* const table); bool exist_memtable_with_end_scn(const ObITable *table, const share::SCN &end_scn); int assign_memtables(memtable::ObIMemtable * const *memtables, const int64_t memtable_count); - int assign_ddl_kvs(ObITable * const *ddl_kvs, const int64_t ddl_kv_count); + int assign_ddl_kvs(ObDDLKV * const *ddl_kvs, const int64_t ddl_kv_count); void reset_memtable(); - int pull_ddl_memtables(ObArenaAllocator &allocator, ObITable **&ddl_kvs_addr, int64_t &ddl_kv_count); + int pull_ddl_memtables(ObArenaAllocator &allocator, ObDDLKV **&ddl_kvs_addr, int64_t &ddl_kv_count); void reset_ddl_memtables(); int wait_release_memtables_(); int mark_mds_table_switched_to_empty_shell_(); @@ -843,7 +844,7 @@ private: ObTabletComplexAddr storage_schema_addr_; // size: 48B, alignment: 8B ObTabletComplexAddr macro_info_addr_; // size: 48B, alignment: 8B int64_t memtable_count_; - ObITable **ddl_kvs_; + ObDDLKV **ddl_kvs_; int64_t ddl_kv_count_; ObTabletPointerHandle pointer_hdl_; // size: 24B, alignment: 8B ObMetaDiskAddr tablet_addr_; // size: 40B, alignment: 8B @@ -877,7 +878,7 @@ private: inline int64_t ObTablet::get_try_cache_size() const { return sizeof(ObTablet) + (OB_ISNULL(rowkey_read_info_) ? 0 : rowkey_read_info_->get_deep_copy_size()) - + (ddl_kv_count_ > 0 ? sizeof(ObITable *) * DDL_KV_ARRAY_SIZE : 0); + + (ddl_kv_count_ > 0 ? sizeof(ObDDLKV *) * DDL_KV_ARRAY_SIZE : 0); } inline bool ObTablet::is_ls_inner_tablet() const @@ -910,12 +911,6 @@ inline bool ObTablet::is_valid() const && nullptr == rowkey_read_info_); } -inline bool ObTablet::is_data_tablet() const -{ - return is_valid() - && (tablet_meta_.tablet_id_ == tablet_meta_.data_tablet_id_); -} - inline int ObTablet::allow_to_read_() { return tablet_meta_.ha_status_.is_none() ? common::OB_SUCCESS : common::OB_REPLICA_NOT_READABLE; diff --git a/src/storage/tablet/ob_tablet_create_delete_helper.cpp b/src/storage/tablet/ob_tablet_create_delete_helper.cpp index 828d316f9..c815b0d35 100644 --- a/src/storage/tablet/ob_tablet_create_delete_helper.cpp +++ b/src/storage/tablet/ob_tablet_create_delete_helper.cpp @@ -628,22 +628,6 @@ bool ObTabletCreateDeleteHelper::is_pure_hidden_tablets(const ObCreateTabletInfo return tablet_ids.count() >= 1 && !is_contain(tablet_ids, data_tablet_id) && info.is_create_bind_hidden_tablets_; } -int ObTabletCreateDeleteHelper::check_need_create_empty_major_sstable( - const ObCreateTabletSchema &create_table_schema, - bool &need_create_sstable) -{ - int ret = OB_SUCCESS; - need_create_sstable = false; - if (OB_UNLIKELY(!create_table_schema.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid args", K(ret), K(create_table_schema)); - } else { - need_create_sstable = !(create_table_schema.is_user_hidden_table() - || (create_table_schema.is_index_table() && !create_table_schema.can_read_index())); - } - return ret; -} - int ObTabletCreateDeleteHelper::build_create_sstable_param( const ObStorageSchema &storage_schema, const ObTabletID &tablet_id, diff --git a/src/storage/tablet/ob_tablet_create_delete_helper.h b/src/storage/tablet/ob_tablet_create_delete_helper.h index 39cca5b7e..ee92b235f 100644 --- a/src/storage/tablet/ob_tablet_create_delete_helper.h +++ b/src/storage/tablet/ob_tablet_create_delete_helper.h @@ -123,9 +123,6 @@ public: const ObTabletPoolType &type, const ObTabletMapKey &key, ObTabletHandle &handle); - static int check_need_create_empty_major_sstable( - const ObCreateTabletSchema &create_tablet_schema, - bool &need_create_sstable); // Attention !!! only used when first creating tablet static int create_empty_sstable( common::ObArenaAllocator &allocator, diff --git a/src/storage/tablet/ob_tablet_create_mds_helper.cpp b/src/storage/tablet/ob_tablet_create_mds_helper.cpp index 406de6911..a82b7def5 100644 --- a/src/storage/tablet/ob_tablet_create_mds_helper.cpp +++ b/src/storage/tablet/ob_tablet_create_mds_helper.cpp @@ -45,7 +45,7 @@ int ObTabletCreateMdsHelper::on_commit_for_old_mds( const int64_t len, const transaction::ObMulSourceDataNotifyArg ¬ify_arg) { - mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = NotifyType::UNKNOWN;// disable runtime check + mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = transaction::NotifyType::UNKNOWN;// disable runtime check return ObTabletCreateDeleteHelper::process_for_old_mds(buf, len, notify_arg); } @@ -570,7 +570,24 @@ int ObTabletCreateMdsHelper::convert_schemas( obrpc::ObBatchCreateTabletArg &arg) { int ret = OB_SUCCESS; + // For tenant data version belongs to (, DATA_VERSION_4_3_0_0), + // use the schema status to decide whether to create major sstable or not when creating tablet. + // For tenant data version belongs to [DATA_VERSION_4_3_0_0, ), + // use the original `need_create_empty_major_` in ObCreateTabletSchema to decide it. if (arg.create_tablet_schemas_.count() > 0) { + const uint64_t tenant_data_version = arg.create_tablet_schemas_[0]->get_tenant_data_version(); + for (int64_t i = 0; OB_SUCC(ret) && tenant_data_version < DATA_VERSION_4_3_0_0 + && i < arg.table_schemas_.count(); ++i) { + ObCreateTabletSchema *create_tablet_schema = nullptr; + if (OB_ISNULL(create_tablet_schema = arg.create_tablet_schemas_[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", K(ret), K(i), K(arg)); + } else { + const bool need_create_empty_major = + !(create_tablet_schema->is_user_hidden_table() || (create_tablet_schema->is_index_table() && !create_tablet_schema->can_read_index())); + create_tablet_schema->set_need_create_empty_major(need_create_empty_major); + } + } } // compatibility with DATA_VERSION_4_1_0_0 else if (arg.tablets_.count() <= 0) { @@ -582,12 +599,15 @@ int ObTabletCreateMdsHelper::convert_schemas( ObTableSchema &table_schema = arg.table_schemas_[i]; ObCreateTabletSchema *create_tablet_schema = NULL; void *create_tablet_schema_ptr = arg.allocator_.alloc(sizeof(ObCreateTabletSchema)); + const bool need_create_empty_major = + !(table_schema.is_user_hidden_table() || (table_schema.is_index_table() && !table_schema.can_read_index())); if (OB_ISNULL(create_tablet_schema_ptr)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate storage schema", KR(ret), K(table_schema)); } else if (FALSE_IT(create_tablet_schema = new (create_tablet_schema_ptr)ObCreateTabletSchema())) { } else if (OB_FAIL(create_tablet_schema->init(arg.allocator_, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, need_create_empty_major))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(arg.create_tablet_schemas_.push_back(create_tablet_schema))) { LOG_WARN("failed to push back table schema", KR(ret), K(create_tablet_schema)); @@ -652,7 +672,7 @@ int ObTabletCreateMdsHelper::build_pure_data_tablet( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(info), K(arg)); } else if (CLICK_FAIL(ls->get_tablet_svr()->create_tablet(ls_id, data_tablet_id, data_tablet_id, - scn, snapshot_version, *create_tablet_schemas[info.table_schema_index_[index]], + scn, snapshot_version, *create_tablet_schemas[info.table_schema_index_[index]], compat_mode, tablet_handle))) { LOG_WARN("failed to do create tablet", K(ret), K(ls_id), K(data_tablet_id), "arg", PRETTY_ARG(arg)); } @@ -732,7 +752,7 @@ int ObTabletCreateMdsHelper::build_mixed_tablets( } else if (CLICK_FAIL(tablet_id_array.push_back(tablet_id))) { LOG_WARN("failed to push back tablet id", K(ret), K(ls_id), K(tablet_id)); } else if (CLICK_FAIL(ls->get_tablet_svr()->create_tablet(ls_id, tablet_id, data_tablet_id, - scn, snapshot_version, *create_tablet_schema, tablet_handle))) { + scn, snapshot_version, *create_tablet_schema, compat_mode, tablet_handle))) { LOG_WARN("failed to do create tablet", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), "arg", PRETTY_ARG(arg)); } @@ -820,7 +840,7 @@ int ObTabletCreateMdsHelper::build_pure_aux_tablets( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(info), K(i)); } else if (CLICK_FAIL(ls->get_tablet_svr()->create_tablet(ls_id, tablet_id, data_tablet_id, - scn, snapshot_version, *create_tablet_schema, tablet_handle))) { + scn, snapshot_version, *create_tablet_schema, compat_mode, tablet_handle))) { LOG_WARN("failed to do create tablet", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), "arg", PRETTY_ARG(arg)); } @@ -914,8 +934,8 @@ int ObTabletCreateMdsHelper::build_bind_hidden_tablets( K(ls_id), K(orig_tablet_id), K(tablet_id)); } else if (CLICK_FAIL(tablet_id_array.push_back(tablet_id))) { LOG_WARN("failed to push back tablet id", K(ret), K(ls_id), K(tablet_id)); - } else if (CLICK_FAIL(ls->get_tablet_svr()->create_tablet(ls_id, tablet_id, tablet_id, - scn, snapshot_version, *create_tablet_schema, tablet_handle))) { + } else if (CLICK_FAIL(ls->get_tablet_svr()->create_tablet(ls_id, tablet_id, orig_tablet_id, + scn, snapshot_version, *create_tablet_schema, compat_mode, tablet_handle))) { LOG_WARN("failed to do create tablet", K(ret), K(ls_id), K(tablet_id), K(orig_tablet_id), "arg", PRETTY_ARG(arg)); } diff --git a/src/storage/tablet/ob_tablet_persister.cpp b/src/storage/tablet/ob_tablet_persister.cpp index 413acaaee..d80a4e76c 100644 --- a/src/storage/tablet/ob_tablet_persister.cpp +++ b/src/storage/tablet/ob_tablet_persister.cpp @@ -916,7 +916,7 @@ int ObTabletPersister::transform(const ObTabletTransformArg &arg, char *buf, con ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet memory buffer not enough for ddl kvs", K(ret), K(remain), K(ddl_kvs_size)); } else { - tiny_tablet->ddl_kvs_ = reinterpret_cast(buf + start_pos); + tiny_tablet->ddl_kvs_ = reinterpret_cast(buf + start_pos); if (OB_FAIL(tiny_tablet->assign_ddl_kvs(arg.ddl_kvs_, arg.ddl_kv_count_))) { LOG_WARN("fail to assign ddl_kvs_", K(ret), KP(arg.ddl_kvs_), K(arg.ddl_kv_count_), KP(buf), K(start_pos)); } else { diff --git a/src/storage/tablet/ob_tablet_persister.h b/src/storage/tablet/ob_tablet_persister.h index 523b78605..5930db63b 100644 --- a/src/storage/tablet/ob_tablet_persister.h +++ b/src/storage/tablet/ob_tablet_persister.h @@ -100,7 +100,7 @@ public: ObMetaDiskAddr tablet_macro_info_addr_; ObTabletCreateDeleteMdsUserData tablet_status_cache_; bool is_row_store_; - ObITable **ddl_kvs_; + ObDDLKV **ddl_kvs_; int64_t ddl_kv_count_; memtable::ObIMemtable *memtables_[MAX_MEMSTORE_CNT]; int64_t memtable_count_; diff --git a/src/storage/tablet/ob_tablet_table_store.cpp b/src/storage/tablet/ob_tablet_table_store.cpp index 79b38e082..7df89edeb 100644 --- a/src/storage/tablet/ob_tablet_table_store.cpp +++ b/src/storage/tablet/ob_tablet_table_store.cpp @@ -737,6 +737,7 @@ int ObTabletTableStore::calculate_read_tables( LOG_DEBUG("the snapshot_version of ddl major sstable is not match", "ddl_major_sstable_version", first_ddl_sstable->get_data_version(), K(snapshot_version)); } + LOG_INFO("calc ddl read tables", K(ret), K(snapshot_version), K(ddl_major_sstables.count()), KPC(first_ddl_sstable)); } } } else { // no major table, not ready for reading @@ -1015,7 +1016,7 @@ int ObTabletTableStore::get_all_sstable( LOG_WARN("fail to add all major tables to iterator", K(ret), K_(major_tables)); } else if (!minor_tables_.empty() && OB_FAIL(iter.add_tables(minor_tables_, 0, minor_tables_.count()))) { LOG_WARN("fail to add all minor tables to iterator", K(ret), K_(major_tables)); - } else if (!ddl_sstables_.empty() && OB_FAIL(iter.add_tables(ddl_sstables_, 0, ddl_sstables_.count()))) { + } else if (!ddl_sstables_.empty() && OB_FAIL(iter.add_tables(ddl_sstables_, 0, ddl_sstables_.count(), unpack_co_table))) { LOG_WARN("fail to add all ddl sstables to iterator", K(ret), K_(ddl_sstables)); } return ret; @@ -1108,7 +1109,7 @@ int ObTabletTableStore::get_ha_tables(ObTableStoreIterator &iter, bool &is_ready LOG_WARN("failed to add major table to iterator", K(ret)); } else if (!minor_tables_.empty() && OB_FAIL(iter.add_tables(minor_tables_, 0, minor_tables_.count()))) { LOG_WARN("failed to add minor table to iterator", K(ret)); - } else if (!ddl_sstables_.empty() && OB_FAIL(iter.add_tables(ddl_sstables_, 0, ddl_sstables_.count()))) { + } else if (!ddl_sstables_.empty() && OB_FAIL(iter.add_tables(ddl_sstables_, 0, ddl_sstables_.count(), true/*unpack_co*/))) { LOG_WARN("failed to add ddl table to iterator", K(ret)); } else if (OB_FAIL(iter.set_retire_check())) { LOG_WARN("failed to set retire check to iterator", K(ret)); @@ -1510,8 +1511,15 @@ int ObTabletTableStore::get_ddl_major_sstables(ObIArray &ddl_major_s } } for (int64_t i = 0; OB_SUCC(ret) && i < ddl_mem_sstables_.count(); ++i) { - if (OB_FAIL(ddl_major_sstables.push_back(ddl_mem_sstables_[i]))) { - LOG_WARN("push back old ddl sstable failed", K(ret), K(i)); + ObIArray &ddl_memtables_in_kv = ddl_mem_sstables_[i]->get_ddl_memtables(); + if (ddl_memtables_in_kv.empty()) { + // skip + } else { + ObDDLMemtable *ddl_memtable = ddl_memtables_in_kv.at(0); + if (ObITable::DDL_MEM_SSTABLE == ddl_memtable->get_key().table_type_ + && OB_FAIL(ddl_major_sstables.push_back(ddl_memtable))) { + LOG_WARN("push back old ddl sstable failed", K(ret), K(i)); + } } } } @@ -1523,8 +1531,8 @@ int ObTabletTableStore::pull_ddl_memtables( const ObTablet &tablet) { int ret = OB_SUCCESS; - ObArray ddl_memtables; - if (OB_FAIL(tablet.get_ddl_memtables(ddl_memtables))) { + ObArray ddl_memtables; + if (OB_FAIL(tablet.get_ddl_kvs(ddl_memtables))) { LOG_WARN("failed to get ddl memtables array from tablet", K(ret)); } else if (!ddl_memtables.empty() && OB_FAIL(ddl_mem_sstables_.init(allocator, ddl_memtables))) { LOG_WARN("assign ddl memtables failed", K(ret), K(ddl_memtables)); @@ -2506,14 +2514,17 @@ int64_t ObTabletTableStore::to_string(char *buf, const int64_t buf_len) const } if (ddl_mem_sstables_.is_valid()) { for (int64_t i = 0; i < ddl_mem_sstables_.count(); ++i) { - ObITable *table = ddl_mem_sstables_[i]; - if (NULL != table && table->is_sstable()) { + ObDDLKV *table = ddl_mem_sstables_[i]; + if (NULL != table) { J_OBJ_START(); - J_KV(K(i), "type", ObITable::get_table_type_name(table->get_key().table_type_), - "tablet_id", table->get_key().tablet_id_, - "scn_range", table->get_key().scn_range_, + ObScnRange scn_range; + scn_range.start_scn_ = table->get_start_scn(); + scn_range.end_scn_ = table->get_freeze_scn(); + J_KV(K(i), "type", ObITable::get_table_type_name(ObITable::DDL_MEM_SSTABLE), + "tablet_id", table->get_tablet_id(), + "scn_range", scn_range, "ref", table->get_ref(), - "max_merge_version", static_cast(table)->get_max_merged_trans_version()); + "max_merge_version", table->get_snapshot_version()); J_OBJ_END(); J_COMMA(); } @@ -2577,9 +2588,9 @@ int64_t ObPrintTableStore::to_string(char *buf, const int64_t buf_len) const bool is_print = false; print_arr(major_tables_, "MAJOR", buf, buf_len, pos, is_print); print_arr(minor_tables_, "MINOR", buf, buf_len, pos, is_print); - print_arr(ddl_sstables_, "DDL_DUMP", buf, buf_len, pos, is_print); print_ddl_mem(ddl_mem_sstables_, "DDL_MEM", buf, buf_len, pos, is_print); print_mem(memtables_, "MEM", buf, buf_len, pos, is_print); + print_arr(ddl_sstables_, "DDL_SSTABLES", buf, buf_len, pos, is_print); print_arr(meta_major_tables_, "META_MAJOR", buf, buf_len, pos, is_print); } else { J_EMPTY_OBJ(); @@ -2623,7 +2634,7 @@ void ObPrintTableStore::print_ddl_mem( if (is_print && 0 == i) { J_NEWLINE(); } - table_to_string(tables[i], i == 0 ? table_arr : " ", buf, buf_len, pos); + ddl_kv_to_string(tables[i], i == 0 ? table_arr : " ", buf, buf_len, pos); if (i < tables.count() - 1) { J_NEWLINE(); } @@ -2686,5 +2697,32 @@ void ObPrintTableStore::table_to_string( } } +void ObPrintTableStore::ddl_kv_to_string( + ObDDLKV *table, + const char* table_arr, + char *buf, + const int64_t buf_len, + int64_t &pos) const +{ + if (nullptr != table) { + ObCurTraceId::TraceId *trace_id = ObCurTraceId::get_trace_id(); + BUF_PRINTF("[%ld] [ ", GETTID()); + BUF_PRINTO(PC(trace_id)); + BUF_PRINTF(" ] "); + const char *table_name = ObITable::get_table_type_name(ObITable::DDL_MEM_SSTABLE); + const char *uncommit_row = "false"; + + BUF_PRINTF(" %-10s %-14s %-19lu %-19lu %-19lu %-19lu %-4ld %-16s ", + table_arr, + table_name, + table->get_snapshot_version(), + table->get_snapshot_version(), + table->get_start_scn().get_val_for_tx(), + table->get_freeze_scn().get_val_for_tx(), + table->get_ref(), + uncommit_row); + } +} + } // namespace storage } // namespace oceanbase diff --git a/src/storage/tablet/ob_tablet_table_store.h b/src/storage/tablet/ob_tablet_table_store.h index 66eb12154..9fdf39be2 100644 --- a/src/storage/tablet/ob_tablet_table_store.h +++ b/src/storage/tablet/ob_tablet_table_store.h @@ -354,6 +354,13 @@ private: const int64_t buf_len, int64_t &pos) const; + void ddl_kv_to_string( + ObDDLKV *table, + const char* table_arr, + char *buf, + const int64_t buf_len, + int64_t &pos) const; + void print_arr( const ObSSTableArray &tables, const char* table_arr, diff --git a/src/storage/tx_table/ob_tx_table.cpp b/src/storage/tx_table/ob_tx_table.cpp index 8aac27c9a..1c19404bc 100644 --- a/src/storage/tx_table/ob_tx_table.cpp +++ b/src/storage/tx_table/ob_tx_table.cpp @@ -197,9 +197,12 @@ int ObTxTable::create_tablet(const lib::Worker::CompatMode compat_mode, const SC } else { const uint64_t tenant_id = ls_->get_tenant_id(); const share::ObLSID &ls_id = ls_->get_ls_id(); - if (OB_FAIL(create_data_tablet_(tenant_id, ls_id, compat_mode, create_scn))) { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); + } else if (OB_FAIL(create_data_tablet_(tenant_id, ls_id, compat_mode, create_scn, tenant_data_version))) { LOG_WARN("create data tablet failed", K(ret)); - } else if (OB_FAIL(create_ctx_tablet_(tenant_id, ls_id, compat_mode, create_scn))) { + } else if (OB_FAIL(create_ctx_tablet_(tenant_id, ls_id, compat_mode, create_scn, tenant_data_version))) { LOG_WARN("create ctx tablet failed", K(ret)); } if (OB_FAIL(ret)) { @@ -280,7 +283,8 @@ int ObTxTable::create_ctx_tablet_( const uint64_t tenant_id, const ObLSID ls_id, const lib::Worker::CompatMode compat_mode, - const share::SCN &create_scn) + const share::SCN &create_scn, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; share::schema::ObTableSchema table_schema; @@ -289,7 +293,8 @@ int ObTxTable::create_ctx_tablet_( if (OB_FAIL(get_ctx_table_schema_(tenant_id, table_schema))) { LOG_WARN("get ctx table schema failed", K(ret)); } else if (OB_FAIL(create_tablet_schema.init(arena_allocator, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + tenant_data_version, true/*need_create_empty_major_sstable*/))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(ls_->create_ls_inner_tablet(ls_id, LS_TX_CTX_TABLET, @@ -405,7 +410,8 @@ int ObTxTable::get_data_table_schema_(const uint64_t tenant_id, share::schema::O int ObTxTable::create_data_tablet_(const uint64_t tenant_id, const ObLSID ls_id, const lib::Worker::CompatMode compat_mode, - const share::SCN &create_scn) + const share::SCN &create_scn, + const uint64_t tenant_data_version) { int ret = OB_SUCCESS; share::schema::ObTableSchema table_schema; @@ -414,7 +420,8 @@ int ObTxTable::create_data_tablet_(const uint64_t tenant_id, if (OB_FAIL(get_data_table_schema_(tenant_id, table_schema))) { LOG_WARN("get data table schema failed", K(ret)); } else if (OB_FAIL(create_tablet_schema.init(arena_allocator, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + tenant_data_version, true/*need_create_empty_major*/))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(ls_->create_ls_inner_tablet(ls_id, LS_TX_DATA_TABLET, diff --git a/src/storage/tx_table/ob_tx_table.h b/src/storage/tx_table/ob_tx_table.h index d004f41ec..eafd7ff35 100644 --- a/src/storage/tx_table/ob_tx_table.h +++ b/src/storage/tx_table/ob_tx_table.h @@ -290,12 +290,14 @@ private: const uint64_t tenant_id, const share::ObLSID ls_id, const lib::Worker::CompatMode compat_mode, - const share::SCN &create_scn); + const share::SCN &create_scn, + const uint64_t tenant_data_version); int create_ctx_tablet_( const uint64_t tenant_id, const share::ObLSID ls_id, const lib::Worker::CompatMode compat_mode, - const share::SCN &create_scn); + const share::SCN &create_scn, + const uint64_t tenant_data_version); int remove_tablet_(const common::ObTabletID &tablet_id); int get_data_table_schema_( const uint64_t tenant_id, diff --git a/tools/deploy/mysql_test/r/mysql/information_schema.result b/tools/deploy/mysql_test/r/mysql/information_schema.result index dbee70649..3cf61e07a 100644 --- a/tools/deploy/mysql_test/r/mysql/information_schema.result +++ b/tools/deploy/mysql_test/r/mysql/information_schema.result @@ -674,6 +674,9 @@ select * from information_schema.tables where table_schema in ('oceanbase', 'mys | def | oceanbase | __all_virtual_checkpoint | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_clone_job | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_clone_job_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_column_group_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_column_group_mapping | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_column_group_mapping_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_compaction_diagnose_info | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_compaction_suggestion | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_core_all_table | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | @@ -2024,6 +2027,9 @@ select * from information_schema.tables where table_schema in ('oceanbase', 'mys | def | oceanbase | __all_virtual_checkpoint | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_clone_job | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_clone_job_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_column_group_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_column_group_mapping | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_column_group_mapping_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_compaction_diagnose_info | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_compaction_suggestion | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_core_all_table | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/add_column.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/add_column.result index fe9a7ca8f..a336da5fd 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/add_column.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/add_column.result @@ -1,6 +1,6 @@ alter system enable_table_without_all_cg = false; -create table t1(a int, b varchar(3048), primary key (a), c int) with column group for all columns, each column; -create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a), c int) with column group (all columns, each column); +create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group (all columns, each column); select count(*) from t1; count(*) 150 @@ -330,8 +330,8 @@ count(*) count(c1) min(c1) max(c1) sum(c1) count(c2) min(c2) max(c2) drop table t1,t2; alter system enable_table_without_all_cg = true; alter system flush plan cache; -create table t1(a int, b varchar(3048), primary key (a), c int) with column group for all columns, each column; -create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a), c int) with column group (all columns, each column); +create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group (all columns, each column); select count(*) from t1; count(*) 150 diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan.result index 17feaa437..d9ed5e8f3 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan.result @@ -1,5 +1,5 @@ drop table if exists ct1; -create table ct1(c1 int, c2 int) with column group for all columns, each column; +create table ct1(c1 int, c2 int) with column group (all columns, each column); alter system major freeze; select * from ct1 where c1 < 1; c1 c2 diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_flat.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_flat.result index 33ec4cb07..19fe5810c 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_flat.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_flat.result @@ -1,6 +1,6 @@ drop table if exists ct1; set session ob_trx_timeout=100000000000; -create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group for all columns, each column; +create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group (all columns, each column); alter system minor freeze; alter system major freeze; select c1, c2, c3, length(c4) from ct1 where c1 < 1; diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_with_minor.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_with_minor.result index de8c39615..45820e15a 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_with_minor.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_scan_with_minor.result @@ -1,6 +1,6 @@ drop table if exists ct1; set session ob_trx_timeout=100000000000; -create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group for all columns, each column; +create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group (all columns, each column); alter system minor freeze; alter system major freeze; update ct1 set c3 = 0 where c1 = 0; @@ -449,7 +449,7 @@ drop table ct1; alter system enable_table_without_all_cg = false; alter system set _rowsets_enabled = true; alter system flush plan cache; -create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group for all columns, each column; +create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group (all columns, each column); alter system minor freeze; alter system major freeze; update ct1 set c3 = 0 where c1 = 0; diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_syntax.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_syntax.result index 5e9611c45..bbb64710a 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_syntax.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/basic_column_group_syntax.result @@ -1,9 +1,7 @@ -create table tt1(a int, b int, c int, primary key(a)) with column group for all columns, each column; +create table tt1(a int, b int, c int, primary key(a)) with column group (all columns, each column); create index idx_tt1 on tt1(b); create table tt2(d int, e int); -alter table tt1 drop column c; -ERROR 0A000: drop column with column store table not supported alter table tt1 modify column c varchar(20); -ERROR 0A000: alter column with column store table not supported +alter table tt1 drop column c; drop table tt1; drop table tt2; diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_merge.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_merge.result index dbfe16b33..a0f20df16 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_merge.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_merge.result @@ -1,6 +1,6 @@ -create table t1(a int, b varchar(3048), primary key (a)) with column group for all columns, each column; -create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group for all columns, each column; -create table t3(a int, b varchar(3048), primary key (a)) with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a)) with column group (all columns, each column); +create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group (all columns, each column); +create table t3(a int, b varchar(3048), primary key (a)) with column group (all columns, each column); insert into t1 values(1, 1); insert into t2 values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); insert into t3 values(1, 1); diff --git a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_without_all_cg.result b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_without_all_cg.result index d58860fd7..f46289340 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_without_all_cg.result +++ b/tools/deploy/mysql_test/test_suite/column_store/r/mysql/co_major_without_all_cg.result @@ -1,6 +1,6 @@ -create table t1(a int, b varchar(3048), primary key (a)) with column group for each column; -create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group for each column; -create table t3(a int, b varchar(3048), primary key (a)) with column group for each column; +create table t1(a int, b varchar(3048), primary key (a)) with column group (each column); +create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group (each column); +create table t3(a int, b varchar(3048), primary key (a)) with column group (each column); insert into t1 values(1, 1); insert into t2 values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); insert into t3 values(1, 1); diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/add_column.test b/tools/deploy/mysql_test/test_suite/column_store/t/add_column.test index 39ffd402f..f96b3c4bf 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/add_column.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/add_column.test @@ -10,8 +10,8 @@ drop table if exists t1,t2; --enable_query_log alter system enable_table_without_all_cg = false; -create table t1(a int, b varchar(3048), primary key (a), c int) with column group for all columns, each column; -create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a), c int) with column group (all columns, each column); +create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group (all columns, each column); sleep 10; @@ -189,8 +189,8 @@ drop table t1,t2; alter system enable_table_without_all_cg = true; alter system flush plan cache; -create table t1(a int, b varchar(3048), primary key (a), c int) with column group for all columns, each column; -create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a), c int) with column group (all columns, each column); +create table t2(pk int, c1 int, c2 int unsigned, c3 varchar(100)) block_size=2048 with column group (all columns, each column); sleep 10; diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan.test b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan.test index c80ac49c7..c15ab895c 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan.test @@ -14,7 +14,7 @@ connection conn1; --disable_warnings drop table if exists ct1; --enable_warnings -create table ct1(c1 int, c2 int) with column group for all columns, each column; +create table ct1(c1 int, c2 int) with column group (all columns, each column); --disable_query_log --let $count = 0 diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_flat.test b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_flat.test index a415e3abe..e10e89afd 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_flat.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_flat.test @@ -15,7 +15,7 @@ set @@recyclebin = off; drop table if exists ct1; --enable_warnings set session ob_trx_timeout=100000000000; -create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group for all columns, each column; +create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group (all columns, each column); --disable_query_log --let $count = 0 diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_with_minor.test b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_with_minor.test index 4723b3db0..956865fd0 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_with_minor.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_scan_with_minor.test @@ -20,8 +20,7 @@ set @@recyclebin = off; drop table if exists ct1; --enable_warnings set session ob_trx_timeout=100000000000; -# create table without all column group -create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group for all columns, each column; +create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group (all columns, each column); --disable_query_log --let $count = 0 @@ -123,8 +122,7 @@ alter system enable_table_without_all_cg = false; alter system set _rowsets_enabled = true; alter system flush plan cache; -# create table with all column group -create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group for all columns, each column; +create table ct1(c1 int primary key, c2 int, c3 number, c4 varchar(60000), c5 varchar(60000)) ROW_FORMAT = redundant COMPRESSION = 'none' with column group (all columns, each column); --disable_query_log --let $count = 0 diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_syntax.test b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_syntax.test index b01c51d68..0f863e16a 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_syntax.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/basic_column_group_syntax.test @@ -13,20 +13,18 @@ drop table if exists tt2; --enable_warnings --enable_query_log -create table tt1(a int, b int, c int, primary key(a)) with column group for all columns, each column; +create table tt1(a int, b int, c int, primary key(a)) with column group (all columns, each column); create index idx_tt1 on tt1(b); create table tt2(d int, e int); ---error 1235 -alter table tt1 drop column c; ---error 1235 alter table tt1 modify column c varchar(20); +alter table tt1 drop column c; let $tt1_table_id= query_get_value(select table_id from __all_virtual_table where table_name='tt1', table_id, 1); let $tt2_table_id= query_get_value(select table_id from __all_virtual_table where table_name='tt2', table_id, 1); -## In tt1 table schema, there exists 5 column_group: __co_default, __co_all, __cg_a, __cg_b, __cg_c +## In tt1 table schema, there exists 4 column_group: __cg_default, __cg_all, __cg_a, __cg_b let $tt1_cg_cnt = query_get_value(select count(*) as cg_cnt from __all_column_group where table_id=$tt1_table_id, cg_cnt, 1); -if ($tt1_cg_cnt != 5) +if ($tt1_cg_cnt != 4) { --echo unexpected column_group count of table tt1, real value is $tt1_cg_cnt } diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/co_major_merge.test b/tools/deploy/mysql_test/test_suite/column_store/t/co_major_merge.test index 4872b44f0..4a410d32a 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/co_major_merge.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/co_major_merge.test @@ -12,9 +12,9 @@ drop table if exists t3; --enable_warnings --enable_query_log -create table t1(a int, b varchar(3048), primary key (a)) with column group for all columns, each column; -create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group for all columns, each column; -create table t3(a int, b varchar(3048), primary key (a)) with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a)) with column group (all columns, each column); +create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group (all columns, each column); +create table t3(a int, b varchar(3048), primary key (a)) with column group (all columns, each column); sleep 10; insert into t1 values(1, 1); insert into t2 values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/co_major_without_all_cg.test b/tools/deploy/mysql_test/test_suite/column_store/t/co_major_without_all_cg.test index 59a800278..6c327c2bc 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/co_major_without_all_cg.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/co_major_without_all_cg.test @@ -12,9 +12,9 @@ drop table if exists t3; --enable_warnings --enable_query_log -create table t1(a int, b varchar(3048), primary key (a)) with column group for each column; -create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group for each column; -create table t3(a int, b varchar(3048), primary key (a)) with column group for each column; +create table t1(a int, b varchar(3048), primary key (a)) with column group (each column); +create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) with column group (each column); +create table t3(a int, b varchar(3048), primary key (a)) with column group (each column); sleep 10; insert into t1 values(1, 1); insert into t2 values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); diff --git a/tools/deploy/mysql_test/test_suite/column_store/t/tile_project.test b/tools/deploy/mysql_test/test_suite/column_store/t/tile_project.test index 9957504f2..6170f32e3 100644 --- a/tools/deploy/mysql_test/test_suite/column_store/t/tile_project.test +++ b/tools/deploy/mysql_test/test_suite/column_store/t/tile_project.test @@ -12,9 +12,9 @@ drop table if exists ct1,ct2,ct3; --enable_warnings --disable_query_log -create table ct1 (c1 int, c2 int) with column group for all columns, each column; -create table ct2(c1 int, c2 varchar(100), c3 varchar(1000)) block_size=2048 with column group for all columns, each column; -create table ct3(c1 int primary key, c2 int, c3 int, c4 int) partition by hash(c1) partitions 2 with column group for all columns, each column; +create table ct1 (c1 int, c2 int) with column group (all columns, each column); +create table ct2(c1 int, c2 varchar(100), c3 varchar(1000)) block_size=2048 with column group (all columns, each column); +create table ct3(c1 int primary key, c2 int, c3 int, c4 int) partition by hash(c1) partitions 2 with column group (all columns, each column); --let $count = 0 while($count < 1000) { diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result index b233ea5a4..11e7aa520 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/basic_cs_encoding.result @@ -35,7 +35,7 @@ alter system set _rowsets_enabled = true tenant = 'mysql'; alter system flush plan cache global; delete from t3 where a = 2; insert into t3 (a, b, c, d) values(31, 1, "a", repeat('a', 1000)); -create table t4(a int) row_format = compressed with column group for all columns, each column; +create table t4(a int) row_format = compressed with column group (all columns, each column); insert into t4 values (-1), (0); explain select /*+ LEADING(t4,t3) USE_NL(t3,t4) NO_USE_NL_MATERIALIZATION(t3) */ t3.a from t4,t3 where (t3.a >= t4.a + 31 and t3.a <= t4.a + 35) or (t3.a >= t4.a + 1 and t3.a <= t4.a + 5) order by t3.a; Query Plan diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/co_cs_major_merge.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/co_cs_major_merge.result index f515aedbe..7272bc9e6 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/co_cs_major_merge.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/co_cs_major_merge.result @@ -1,6 +1,6 @@ -create table t1(a int, b varchar(3048), primary key (a)) row_format = compressed with column group for all columns, each column; -create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) row_format = compressed with column group for all columns, each column; -create table t3(a int, b varchar(3048), primary key (a)) row_format = compressed with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a)) row_format = compressed with column group (all columns, each column); +create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) row_format = compressed with column group (all columns, each column); +create table t3(a int, b varchar(3048), primary key (a)) row_format = compressed with column group (all columns, each column); insert into t1 values(1, 1); insert into t2 values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); insert into t3 values(1, 1); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result index e526d8fba..7505c1770 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/count_pushdown_cs_encoding.result @@ -222,7 +222,7 @@ Outputs & filters: access([t3.__pk_increment]), partitions(p0) is_index_back=false, is_global_index=false, range_key([t3.__pk_increment]), range(MIN ; MAX)always true -create table t4 (a bigint auto_increment primary key, b bigint, c char(100), d varchar(100), e bigint not null) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t4 (a bigint auto_increment primary key, b bigint, c char(100), d varchar(100), e bigint not null) row_format = compressed block_size = 2048 with column group (all columns, each column); insert into t4 (a, b, c, d, e) values(1, 9, "a", repeat('a', 10),1); insert into t4 (a, b, c, d, e) values(2, 2, "ab", repeat("ab", 10),1); insert into t4 (a, b, c, d, e) values(3, 5, "abcd", repeat("ab", 10),1); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/cs_add_column.result b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/cs_add_column.result index d3c1e9bb9..19b0c4e03 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/cs_add_column.result +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/r/mysql/cs_add_column.result @@ -1,4 +1,4 @@ -create table t1(a int, b varchar(3048), primary key (a), c int) row_format = compressed with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a), c int) row_format = compressed with column group (all columns, each column); select count(*) from t1; count(*) 150 diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/add_column_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/add_column_cs_encoding.test index 1e01b8a8f..536bfe1f1 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/add_column_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/add_column_cs_encoding.test @@ -15,7 +15,7 @@ set ob_trx_timeout=1000000000; drop table if exists t1; --enable_warnings -create table t1 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t1 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group (all columns, each column); insert into t1 (b, c, d) values(1, "a", repeat('a', 1000)); insert into t1 (b, c, d) values(2, "ab", repeat("ab", 500)); insert into t1 (b, c, d) values(3, "ab ", repeat("ab", 500)); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/basic_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/basic_cs_encoding.test index de83c57e4..176780b5d 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/basic_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/basic_cs_encoding.test @@ -19,7 +19,7 @@ drop table if exists t3; drop table if exists t4; --enable_warnings -create table t1 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t1 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group (all columns, each column); insert into t1 (b, c, d) values(1, "a", repeat('a', 1000)); insert into t1 (b, c, d) values(2, "ab", repeat("ab", 500)); insert into t1 (b, c, d) values(3, "ab ", repeat("ab", 500)); @@ -37,11 +37,11 @@ while($loop > 0) select count(1) from t1; # bug -create table t2 (a smallint(94) NOT NULL, b smallint(40) DEFAULT NULL, primary key(a)) row_format = compressed with column group for all columns, each column; +create table t2 (a smallint(94) NOT NULL, b smallint(40) DEFAULT NULL, primary key(a)) row_format = compressed with column group (all columns, each column); insert into t2 values (39,845), (280,748), (520,693), (531,834), (548,17), (575,81), (576,711), (597,756), (680,594); insert into t2 values (751,576), (763,941), (797,906), (806,792), (841,320), (888,10), (952,398), (974,321); -create table t3 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t3 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group (all columns, each column); insert into t3 (b, c, d) values(1, "a", repeat('a', 1000)); insert into t3 (b, c, d) values(2, "ab", repeat("ab", 500)); insert into t3 (b, c, d) values(3, "ab ", repeat("ab", 500)); @@ -83,7 +83,7 @@ alter system flush plan cache global; connection default; delete from t3 where a = 2; insert into t3 (a, b, c, d) values(31, 1, "a", repeat('a', 1000)); -create table t4(a int) row_format = compressed with column group for all columns, each column; +create table t4(a int) row_format = compressed with column group (all columns, each column); insert into t4 values (-1), (0); explain select /*+ LEADING(t4,t3) USE_NL(t3,t4) NO_USE_NL_MATERIALIZATION(t3) */ t3.a from t4,t3 where (t3.a >= t4.a + 31 and t3.a <= t4.a + 35) or (t3.a >= t4.a + 1 and t3.a <= t4.a + 5) order by t3.a; select /*+ LEADING(t4,t3) USE_NL(t3,t4) NO_USE_NL_MATERIALIZATION(t3) */ t3.a from t4,t3 where (t3.a >= t4.a + 31 and t3.a <= t4.a + 35) or (t3.a >= t4.a + 1 and t3.a <= t4.a + 5) order by t3.a; diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/black_special_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/black_special_cs_encoding.test index 644f66ac8..47ff08d17 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/black_special_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/black_special_cs_encoding.test @@ -14,7 +14,7 @@ set ob_trx_timeout=1000000000; drop table if exists t_black; --enable_warnings -create table t_black(a int primary key, b int, c date, d text, e text, f text) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t_black(a int primary key, b int, c date, d text, e text, f text) row_format = compressed block_size = 2048 with column group (all columns, each column); --let $count = 1 while($count < 101) { diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/co_cs_major_merge.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/co_cs_major_merge.test index f9b4f049a..96aa6567d 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/co_cs_major_merge.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/co_cs_major_merge.test @@ -10,9 +10,9 @@ drop table if exists t3; --enable_warnings --enable_query_log -create table t1(a int, b varchar(3048), primary key (a)) row_format = compressed with column group for all columns, each column; -create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) row_format = compressed with column group for all columns, each column; -create table t3(a int, b varchar(3048), primary key (a)) row_format = compressed with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a)) row_format = compressed with column group (all columns, each column); +create table t2(c0 int, c1 int, c2 int, c3 int, c4 int, c5 int, c6 int, c7 int, c8 int, c9 int, c10 int, c11 int, c12 int) row_format = compressed with column group (all columns, each column); +create table t3(a int, b varchar(3048), primary key (a)) row_format = compressed with column group (all columns, each column); sleep 10; insert into t1 values(1, 1); insert into t2 values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/count_pushdown_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/count_pushdown_cs_encoding.test index 987b8c2cb..528b43b80 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/count_pushdown_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/count_pushdown_cs_encoding.test @@ -57,7 +57,7 @@ explain select a,count(b) from t3 group by a; explain select count(*) from t3 for update; # minor&major freeze -create table t4 (a bigint auto_increment primary key, b bigint, c char(100), d varchar(100), e bigint not null) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t4 (a bigint auto_increment primary key, b bigint, c char(100), d varchar(100), e bigint not null) row_format = compressed block_size = 2048 with column group (all columns, each column); insert into t4 (a, b, c, d, e) values(1, 9, "a", repeat('a', 10),1); insert into t4 (a, b, c, d, e) values(2, 2, "ab", repeat("ab", 10),1); insert into t4 (a, b, c, d, e) values(3, 5, "abcd", repeat("ab", 10),1); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/cs_add_column.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/cs_add_column.test index 9d9240f66..3eff9f84c 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/cs_add_column.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/cs_add_column.test @@ -8,7 +8,7 @@ drop table if exists t1; --enable_warnings --enable_query_log -create table t1(a int, b varchar(3048), primary key (a), c int) row_format = compressed with column group for all columns, each column; +create table t1(a int, b varchar(3048), primary key (a), c int) row_format = compressed with column group (all columns, each column); sleep 10; diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_const_dict_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_const_dict_cs_encoding.test index e8fc74d4a..12f2afe08 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_const_dict_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_const_dict_cs_encoding.test @@ -15,7 +15,7 @@ set ob_trx_timeout=1000000000; drop table if exists t1; --enable_warnings -create table t1 (a int auto_increment primary key, b smallint(6), c varchar(10)) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t1 (a int auto_increment primary key, b smallint(6), c varchar(10)) row_format = compressed block_size = 2048 with column group (all columns, each column); --let $loop=1000 while($loop > 0) { diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_null_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_null_cs_encoding.test index 54270221f..2a8601634 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_null_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/filter_null_cs_encoding.test @@ -15,7 +15,7 @@ set ob_trx_timeout=1000000000; drop table if exists t1; --enable_warnings -create table t1 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t1 (a int auto_increment primary key, b bigint, c char(100), d varchar(1000)) row_format = compressed block_size = 2048 with column group (all columns, each column); insert into t1 (b, c, d) values(1, "a", repeat('a', 1000)); insert into t1 (b, c, d) values(2, "ab", repeat("ab", 500)); insert into t1 (b, c, d) values(3, "ab ", repeat("ab", 500)); diff --git a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/pushdown_cs_encoding.test b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/pushdown_cs_encoding.test index c8946776c..331d5dbe3 100644 --- a/tools/deploy/mysql_test/test_suite/column_store_encoding/t/pushdown_cs_encoding.test +++ b/tools/deploy/mysql_test/test_suite/column_store_encoding/t/pushdown_cs_encoding.test @@ -14,7 +14,7 @@ connection default; --disable_warnings drop table if exists t1; --enable_warnings -create table t1 (c1 int auto_increment primary key, c2 bigint, c3 varchar(100)) row_format = compressed block_size = 2048 with column group for all columns, each column; +create table t1 (c1 int auto_increment primary key, c2 bigint, c3 varchar(100)) row_format = compressed block_size = 2048 with column group (all columns, each column); --let $loop=10 while($loop > 0) diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result index 0fbbfb3ae..d5131f8ee 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result @@ -94,6 +94,7 @@ enable_rpc_authentication_bypass enable_sql_audit enable_sql_extension enable_sql_operator_dump +enable_store_compression enable_syslog_recycle enable_syslog_wf enable_sys_table_ddl diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result index a9126cea7..2fd57078b 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result @@ -4485,6 +4485,47 @@ comment longtext YES select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_import_table_task_history; IF(count(*) >= 0, 1, 0) 1 +desc oceanbase.__all_virtual_column_group_mapping; +Field Type Null Key Default Extra +tenant_id bigint(20) NO PRI NULL +table_id bigint(20) NO PRI NULL +column_group_id bigint(20) NO PRI NULL +column_id bigint(20) NO PRI NULL +gmt_create timestamp(6) NO NULL +gmt_modified timestamp(6) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_column_group_mapping; +IF(count(*) >= 0, 1, 0) +1 +desc oceanbase.__all_virtual_column_group_history; +Field Type Null Key Default Extra +tenant_id bigint(20) NO PRI NULL +table_id bigint(20) NO PRI NULL +column_group_id bigint(20) NO PRI NULL +schema_version bigint(20) NO PRI NULL +gmt_create timestamp(6) NO NULL +gmt_modified timestamp(6) NO NULL +is_deleted bigint(20) NO NULL +column_group_name varchar(256) YES +column_group_type bigint(20) YES NULL +block_size bigint(20) YES NULL +compressor_type bigint(20) YES NULL +row_store_type bigint(20) YES NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_column_group_history; +IF(count(*) >= 0, 1, 0) +1 +desc oceanbase.__all_virtual_column_group_mapping_history; +Field Type Null Key Default Extra +tenant_id bigint(20) NO PRI NULL +table_id bigint(20) NO PRI NULL +column_group_id bigint(20) NO PRI NULL +column_id bigint(20) NO PRI NULL +schema_version bigint(20) NO PRI NULL +gmt_create timestamp(6) NO NULL +gmt_modified timestamp(6) NO NULL +is_deleted bigint(20) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_column_group_mapping_history; +IF(count(*) >= 0, 1, 0) +1 desc oceanbase.__all_virtual_clone_job; Field Type Null Key Default Extra tenant_id bigint(20) NO PRI NULL diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result index af4ff4e43..2b71687c4 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result @@ -8846,6 +8846,47 @@ comment longtext YES select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_import_table_task_history; IF(count(*) >= 0, 1, 0) 1 +desc oceanbase.__all_virtual_column_group_mapping; +Field Type Null Key Default Extra +tenant_id bigint(20) NO PRI NULL +table_id bigint(20) NO PRI NULL +column_group_id bigint(20) NO PRI NULL +column_id bigint(20) NO PRI NULL +gmt_create timestamp(6) NO NULL +gmt_modified timestamp(6) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_column_group_mapping; +IF(count(*) >= 0, 1, 0) +1 +desc oceanbase.__all_virtual_column_group_history; +Field Type Null Key Default Extra +tenant_id bigint(20) NO PRI NULL +table_id bigint(20) NO PRI NULL +column_group_id bigint(20) NO PRI NULL +schema_version bigint(20) NO PRI NULL +gmt_create timestamp(6) NO NULL +gmt_modified timestamp(6) NO NULL +is_deleted bigint(20) NO NULL +column_group_name varchar(256) YES +column_group_type bigint(20) YES NULL +block_size bigint(20) YES NULL +compressor_type bigint(20) YES NULL +row_store_type bigint(20) YES NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_column_group_history; +IF(count(*) >= 0, 1, 0) +1 +desc oceanbase.__all_virtual_column_group_mapping_history; +Field Type Null Key Default Extra +tenant_id bigint(20) NO PRI NULL +table_id bigint(20) NO PRI NULL +column_group_id bigint(20) NO PRI NULL +column_id bigint(20) NO PRI NULL +schema_version bigint(20) NO PRI NULL +gmt_create timestamp(6) NO NULL +gmt_modified timestamp(6) NO NULL +is_deleted bigint(20) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_column_group_mapping_history; +IF(count(*) >= 0, 1, 0) +1 desc oceanbase.__all_virtual_clone_job; Field Type Null Key Default Extra tenant_id bigint(20) NO PRI NULL diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result index 9055959a7..42f76a45d 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result @@ -696,6 +696,9 @@ select 0xffffffffff & table_id, table_name, table_type, database_id, part_num fr 12425 __all_virtual_import_table_job_history 2 201001 1 12426 __all_virtual_import_table_task 2 201001 1 12427 __all_virtual_import_table_task_history 2 201001 1 +12430 __all_virtual_column_group_mapping 2 201001 1 +12431 __all_virtual_column_group_history 2 201001 1 +12432 __all_virtual_column_group_mapping_history 2 201001 1 12435 __all_virtual_clone_job 2 201001 1 12436 __all_virtual_clone_job_history 2 201001 1 12447 __all_virtual_aux_stat 2 201001 1 diff --git a/unittest/sql/parser/test_parser.result b/unittest/sql/parser/test_parser.result index bcf3edd6c..8e2e8da02 100644 --- a/unittest/sql/parser/test_parser.result +++ b/unittest/sql/parser/test_parser.result @@ -328,6 +328,7 @@ question_mask_size: 0 } ] }, + { }, { }, { }, { } @@ -511,6 +512,7 @@ question_mask_size: 0 } ] }, + { }, { }, { }, { } @@ -694,6 +696,7 @@ question_mask_size: 0 } ] }, + { }, { }, { }, { } diff --git a/unittest/storage/blocksstable/ob_multi_version_sstable_test.h b/unittest/storage/blocksstable/ob_multi_version_sstable_test.h index 64bf759ed..9be389c52 100644 --- a/unittest/storage/blocksstable/ob_multi_version_sstable_test.h +++ b/unittest/storage/blocksstable/ob_multi_version_sstable_test.h @@ -298,7 +298,7 @@ ObITable::TableType ObMultiVersionSSTableTest::get_merged_table_type() const } else if (META_MAJOR_MERGE == merge_type_) { table_type = ObITable::TableType::META_MAJOR_SSTABLE; } else if (DDL_KV_MERGE == merge_type_) { - table_type = ObITable::TableType::DDL_DUMP_SSTABLE; + table_type = ObITable::TableType::DDL_MERGE_CO_SSTABLE; } else { // MINOR_MERGE table_type = ObITable::TableType::MINOR_SSTABLE; } diff --git a/unittest/storage/blocksstable/test_tmp_file.cpp b/unittest/storage/blocksstable/test_tmp_file.cpp index 585ede323..2e6d4ef1e 100644 --- a/unittest/storage/blocksstable/test_tmp_file.cpp +++ b/unittest/storage/blocksstable/test_tmp_file.cpp @@ -86,7 +86,7 @@ public: TestTmpFileStress(ObTenantBase *tenant_ctx); virtual ~TestTmpFileStress(); int init(const int fd, const bool is_write, const int64_t thread_cnt, ObTableSchema *table_schema, - const bool is_plain_data, const bool is_big_file); + const bool is_plain_data, const bool is_big_file, bool is_truncate = false); virtual void run1(); private: void prepare_data(char *buf, const int64_t macro_block_size); @@ -97,6 +97,7 @@ private: void write_data(const int64_t macro_block_size); void write_plain_data(char *&buf, const int64_t macro_block_size); void read_data(const int64_t macro_block_size); + void read_and_truncate(const int64_t macro_block_size); void read_plain_data(const char *buf, const int64_t macro_block_size); private: static const int64_t BUF_COUNT = 16; @@ -107,19 +108,21 @@ private: bool is_big_file_; ObTableSchema *table_schema_; bool is_plain_; + bool is_truncate_; ObTenantBase *tenant_ctx_; }; TestTmpFileStress::TestTmpFileStress() : thread_cnt_(0), size_(OB_SERVER_BLOCK_MGR.get_macro_block_size()), fd_(0), - is_write_(false), is_big_file_(false), table_schema_(NULL), is_plain_(false) + is_write_(false), is_big_file_(false), table_schema_(NULL), is_plain_(false), + is_truncate_(false) { } TestTmpFileStress::TestTmpFileStress(ObTenantBase *tenant_ctx) : thread_cnt_(0), size_(OB_SERVER_BLOCK_MGR.get_macro_block_size()), fd_(0), is_write_(false), is_big_file_(false), table_schema_(NULL), is_plain_(false), - tenant_ctx_(tenant_ctx) + is_truncate_(false), tenant_ctx_(tenant_ctx) { } @@ -129,7 +132,7 @@ TestTmpFileStress::~TestTmpFileStress() int TestTmpFileStress::init(const int fd, const bool is_write, const int64_t thread_cnt, ObTableSchema *table_schema, - const bool is_plain, const bool is_big_file) + const bool is_plain, const bool is_big_file, const bool is_truncate) { int ret = OB_SUCCESS; if (thread_cnt < 0) { @@ -145,6 +148,7 @@ int TestTmpFileStress::init(const int fd, const bool is_write, if (!is_big_file_) { size_ = 16L * 1024L; } + is_truncate_ = is_truncate; set_thread_count(static_cast(thread_cnt)); } return ret; @@ -327,6 +331,51 @@ void TestTmpFileStress::read_data(const int64_t macro_block_size) handle.reset(); } +void TestTmpFileStress::read_and_truncate(const int64_t macro_block_size) +{ + int ret = OB_SUCCESS; + const int64_t timeout_ms = 100000; + ObTmpFileIOInfo io_info; + ObTmpFileIOHandle handle; + io_info.fd_ = fd_; + io_info.size_ = macro_block_size; + io_info.tenant_id_ = 1; + io_info.io_desc_.set_wait_event(2); + int64_t trunc_offset = 0; + char *buf = new char[macro_block_size]; + char *zero_buf = new char[macro_block_size]; + memset(zero_buf, 0, macro_block_size); + for (int64_t i = 0; i < BUF_COUNT; ++i) { + io_info.buf_ = buf; + ret = ObTmpFileManager::get_instance().read(io_info, handle); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(macro_block_size, handle.get_data_size()); + check_data(handle.get_buffer(), handle.get_data_size()); + ASSERT_EQ(OB_SUCCESS, ret); + // truncate data + // truncate trunc_offset + macro_block_size; + ret = ObTmpFileManager::get_instance().truncate(io_info.fd_, trunc_offset + macro_block_size); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObTmpFileManager::get_instance().pread(io_info, trunc_offset, handle); + ASSERT_EQ(OB_SUCCESS, ret); + check_plain_data(zero_buf, buf, macro_block_size); + trunc_offset += macro_block_size; + } + // check tuncated(0) won't reset the read_guard; + if (ret == OB_SUCCESS) { + io_info.buf_ = buf; + ret = ObTmpFileManager::get_instance().pread(io_info, 0, handle); + ASSERT_EQ(OB_SUCCESS, ret); + check_plain_data(zero_buf, buf, macro_block_size); + ret = ObTmpFileManager::get_instance().truncate(io_info.fd_, 0); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObTmpFileManager::get_instance().pread(io_info, 0, handle); + ASSERT_EQ(OB_SUCCESS, ret); + check_plain_data(zero_buf, buf, macro_block_size); + } + handle.reset(); +} + void TestTmpFileStress::read_plain_data(const char *read_buf, const int64_t macro_block_size) { int ret = OB_SUCCESS; @@ -375,7 +424,8 @@ public: TestMultiTmpFileStress(ObTenantBase *tenant_ctx); virtual ~TestMultiTmpFileStress(); int init(const int64_t file_cnt, const int64_t dir_id, const int64_t thread_cnt, - ObTableSchema *table_schema, const bool is_plain_data, const bool is_big_file); + ObTableSchema *table_schema, const bool is_plain_data, const bool is_big_file, + const bool is_truncate = false); virtual void run1(); private: void run_plain_case(); @@ -387,6 +437,7 @@ private: ObTableSchema *table_schema_; bool is_big_file_; bool is_plain_data_; + bool is_truncate_; ObTenantBase *tenant_ctx_; }; @@ -396,7 +447,8 @@ TestMultiTmpFileStress::TestMultiTmpFileStress() thread_cnt_perf_file_(0), table_schema_(NULL), is_big_file_(false), - is_plain_data_(false) + is_plain_data_(false), + is_truncate_(false) { } TestMultiTmpFileStress::TestMultiTmpFileStress(ObTenantBase *tenant_ctx) @@ -406,6 +458,7 @@ TestMultiTmpFileStress::TestMultiTmpFileStress(ObTenantBase *tenant_ctx) table_schema_(NULL), is_big_file_(false), is_plain_data_(false), + is_truncate_(false), tenant_ctx_(tenant_ctx) { } @@ -419,7 +472,8 @@ int TestMultiTmpFileStress::init(const int64_t file_cnt, const int64_t thread_cnt, ObTableSchema *table_schema, const bool is_plain_data, - const bool is_big_file) + const bool is_big_file, + const bool is_truncate) { int ret = OB_SUCCESS; if (file_cnt < 0 || thread_cnt < 0 || NULL == table_schema) { @@ -433,6 +487,7 @@ int TestMultiTmpFileStress::init(const int64_t file_cnt, table_schema_ = table_schema; is_big_file_ = is_big_file; is_plain_data_ = is_plain_data; + is_truncate_ = is_truncate; set_thread_count(static_cast(file_cnt)); } return ret; @@ -463,9 +518,9 @@ void TestMultiTmpFileStress::run_normal_case() ret = ObTmpFileManager::get_instance().open(fd, dir_id_); ASSERT_EQ(OB_SUCCESS, ret); STORAGE_LOG(INFO, "open file success", K(fd)); - ret = test_write.init(fd, true, thread_cnt_perf_file_, table_schema_, is_plain_data_, is_big_file_); + ret = test_write.init(fd, true, thread_cnt_perf_file_, table_schema_, is_plain_data_, is_big_file_, is_truncate_); ASSERT_EQ(OB_SUCCESS, ret); - ret = test_read.init(fd, false, thread_cnt_perf_file_, table_schema_, is_plain_data_, is_big_file_); + ret = test_read.init(fd, false, thread_cnt_perf_file_, table_schema_, is_plain_data_, is_big_file_, is_truncate_); ASSERT_EQ(OB_SUCCESS, ret); test_write.start(); test_write.wait(); @@ -2016,6 +2071,190 @@ TEST_F(TestTmpFile, test_tmp_file_wash) } } +// test truncate, simple thread and multi thread +TEST_F(TestTmpFile, test_tmp_file_truncate) +{ + int ret = OB_SUCCESS; + int64_t dir = -1; + int64_t fd = -1; + const int64_t timeout_ms = 5000; + ObTmpFileIOHandle handle; + ObTmpFileIOInfo io_info; + const int64_t macro_block_size = OB_SERVER_BLOCK_MGR.get_macro_block_size(); + + ret = ObTmpFileManager::get_instance().alloc_dir(dir); + ASSERT_EQ(OB_SUCCESS, ret); + + ret = ObTmpFileManager::get_instance().open(fd, dir); + ASSERT_EQ(OB_SUCCESS, ret); + char *write_buf = new char [macro_block_size + 256]; + for (int i = 0; i < macro_block_size + 256; ++i) { + write_buf[i] = static_cast(i % 256); + } + char *read_buf = new char [macro_block_size + 256]; + io_info.fd_ = fd; + io_info.tenant_id_ = 1; + io_info.io_desc_.set_wait_event(2); + io_info.buf_ = write_buf; + io_info.size_ = macro_block_size + 256; + io_info.io_desc_.set_group_id(THIS_WORKER.get_group_id()); + + int64_t write_time = ObTimeUtility::current_time(); + ret = ObTmpFileManager::get_instance().write(io_info); + write_time = ObTimeUtility::current_time() - write_time; + ASSERT_EQ(OB_SUCCESS, ret); + io_info.buf_ = read_buf; + + int64_t read_time = ObTimeUtility::current_time(); + ret = ObTmpFileManager::get_instance().read(io_info, handle); + read_time = ObTimeUtility::current_time() - read_time; + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(macro_block_size + 256, handle.get_data_size()); + int cmp = memcmp(handle.get_buffer(), write_buf, macro_block_size + 256); + ASSERT_EQ(0, cmp); + + + ret = ObTmpFileManager::get_instance().seek(fd, 0, ObTmpFile::SET_SEEK); + ASSERT_EQ(OB_SUCCESS, ret); + io_info.size_ = 200; + ret = ObTmpFileManager::get_instance().read(io_info, handle); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(200, handle.get_data_size()); + cmp = memcmp(handle.get_buffer(), write_buf, 200); + ASSERT_EQ(0, cmp); + + io_info.size_ = 200; + ret = ObTmpFileManager::get_instance().seek(fd, 0, ObTmpFile::SET_SEEK); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObTmpFileManager::get_instance().truncate(fd, 100); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObTmpFileManager::get_instance().read(io_info, handle); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(200, handle.get_data_size()); + MEMSET(write_buf, 0, 100); + cmp = memcmp(handle.get_buffer(), write_buf, 200); + ASSERT_EQ(0, cmp); + + io_info.size_ = 200; + ret = ObTmpFileManager::get_instance().truncate(fd, 300); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObTmpFileManager::get_instance().read(io_info, handle); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(200, handle.get_data_size()); + MEMSET(write_buf + 100, 0, 200); + cmp = memcmp(handle.get_buffer(), write_buf + 200, 200); + ASSERT_EQ(0, cmp); + + free(write_buf); + free(read_buf); + + STORAGE_LOG(INFO, "test_tmp_file_truncate"); + STORAGE_LOG(INFO, "io time", K(write_time), K(read_time)); + ObTmpTenantFileStoreHandle store_handle; + OB_TMP_FILE_STORE.get_store(1, store_handle); + store_handle.get_tenant_store()->print_block_usage(); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(1); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(1); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(500); + ObTmpFileManager::get_instance().remove(fd); +} + +TEST_F(TestTmpFile, test_multi_thread_truncate) +{ + int ret = OB_SUCCESS; + const int64_t thread_cnt = 4; + const int64_t file_cnt = 1; + const bool is_plain_data = false; + const bool is_big_file = true; + const bool is_truncate = true; + TestMultiTmpFileStress test(MTL_CTX()); + int64_t dir = -1; + ret = ObTmpFileManager::get_instance().alloc_dir(dir); + ASSERT_EQ(OB_SUCCESS, ret); + ret = test.init(file_cnt, dir, thread_cnt, &table_schema_, is_plain_data, is_big_file, is_truncate); + ASSERT_EQ(OB_SUCCESS, ret); + int64_t io_time = ObTimeUtility::current_time(); + test.start(); + test.wait(); + io_time = ObTimeUtility::current_time() - io_time; + + + STORAGE_LOG(INFO, "test_multi_thread_truncate"); + STORAGE_LOG(INFO, "io time", K(io_time)); + ObTmpTenantFileStoreHandle store_handle; + OB_TMP_FILE_STORE.get_store(1, store_handle); + store_handle.get_tenant_store()->print_block_usage(); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(1); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(1); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(500); +} + +TEST_F(TestTmpFile, test_truncate_free_block) { + int ret = OB_SUCCESS; + int count = 32; + int64_t dir = -1; + int64_t fd = -1; + ObTmpFileIOHandle handle; + ObTmpFileIOInfo io_info; + io_info.tenant_id_ = 1; + io_info.io_desc_.set_group_id(THIS_WORKER.get_group_id()); + io_info.io_desc_.set_wait_event(2); + //int64_t write_size = OB_SERVER_BLOCK_MGR.get_macro_block_size(); + int64_t write_size = 1024 * 1024; + + char *write_buf = (char *)malloc(write_size); + for (int64_t i = 0; i < write_size; ++i) { + write_buf[i] = static_cast(i % 256); + } + ret = ObTmpFileManager::get_instance().alloc_dir(dir); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObTmpFileManager::get_instance().open(fd, dir); + ASSERT_EQ(OB_SUCCESS, ret); + io_info.fd_ = fd; + io_info.buf_ = write_buf; + io_info.size_ = write_size; + + for (int64_t i = 0; i < count; i++) { + ret = ObTmpFileManager::get_instance().write(io_info); + ASSERT_EQ(OB_SUCCESS, ret); + } + + STORAGE_LOG(INFO, "test_truncate_free_block"); + ObTmpTenantFileStoreHandle store_handle; + OB_TMP_FILE_STORE.get_store(1, store_handle); + ASSERT_EQ(count, store_handle.get_tenant_store()->tmp_mem_block_manager_.t_mblk_map_.size()); + + for (int64_t i = 0; i < count; i++) { + ret = ObTmpFileManager::get_instance().truncate(fd, (i + 1) * write_size); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(count - i - 1 , store_handle.get_tenant_store()->tmp_mem_block_manager_.t_mblk_map_.size()); + } + + ret = ObTmpFileManager::get_instance().truncate(fd, 0); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(0 , store_handle.get_tenant_store()->tmp_mem_block_manager_.t_mblk_map_.size()); + int64_t read_size = write_size; + char *read_buf = (char *)malloc(read_size); + memset(write_buf, 0, write_size); + io_info.buf_ = read_buf; + ret = ObTmpFileManager::get_instance().read(io_info, handle); + int cmp = memcmp(read_buf, write_buf, read_size); + ASSERT_EQ(0, cmp); + free(write_buf); + free(read_buf); + + store_handle.get_tenant_store()->print_block_usage(); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(1); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(1); + ObMallocAllocator::get_instance()->print_tenant_memory_usage(500); + ObMallocAllocator::get_instance()->print_tenant_ctx_memory_usage(500); + + ObTmpFileManager::get_instance().remove(fd); + +} + } // end namespace unittest } // end namespace oceanbase diff --git a/unittest/storage/ddl/CMakeLists.txt b/unittest/storage/ddl/CMakeLists.txt index 63fd732ac..759c9574b 100644 --- a/unittest/storage/ddl/CMakeLists.txt +++ b/unittest/storage/ddl/CMakeLists.txt @@ -1 +1,2 @@ #storage_unittest(test_ddl_kv) +storage_unittest(test_chunk_compact_store) diff --git a/unittest/storage/ddl/test_chunk_compact_store.cpp b/unittest/storage/ddl/test_chunk_compact_store.cpp new file mode 100644 index 000000000..626e82953 --- /dev/null +++ b/unittest/storage/ddl/test_chunk_compact_store.cpp @@ -0,0 +1,802 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#define ASSERT_OK(x) ASSERT_EQ(OB_SUCCESS, (x)) +#include + +#define private public +#include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" +#include "storage/blocksstable/ob_row_generate.h" +#include "storage/blocksstable/ob_data_file_prepare.h" +#include "src/sql/engine/basic/ob_chunk_datum_store.h" +#include "unittest/storage/blocksstable/ob_data_file_prepare.h" +#include "src/sql/engine/basic/chunk_store/ob_compact_store.h" +#include "src/sql/engine/basic/ob_temp_block_store.h" + +#undef private + +namespace oceanbase +{ + +using namespace common; +using namespace lib; +using namespace share; +using namespace sql; + +//const int64_t COLUMN_CNT = 64; +const int64_t COLUMN_CNT = 64; +const int64_t BATCH_SIZE = 10000; +const int64_t ROUND[6] = {2,8,32,128,512, 1024}; +int64_t RESULT_ADD[6] = {0,0,0,0,0,0}; +int64_t RESULT_BUILD[6] = {0,0,0,0,0,0}; +static ObSimpleMemLimitGetter getter; + +typedef ObChunkDatumStore::StoredRow StoredRow; +//typedef ObChunkDatumStore::Block Block; +typedef ObTempBlockStore::Block Block; + +class ObStoredRowGenerate { +public: + int get_stored_row(StoredRow **&sr); + int get_stored_row_irregular(StoredRow **&sr); + + common::ObArenaAllocator allocator_; +}; + +int ObStoredRowGenerate::get_stored_row(StoredRow **&sr) +{ + int ret = OB_SUCCESS; + int64_t data_size = ((sizeof(ObDatum) + 8) * COLUMN_CNT + 8) * BATCH_SIZE; + int32_t row_size = (sizeof(ObDatum) + 8) * COLUMN_CNT + 8; + allocator_.reuse(); + void *buf = allocator_.alloc(data_size); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc buff", K(ret)); + } else { + MEMSET(buf, 0, data_size); + for (int64_t i = 0; i < BATCH_SIZE; i++) + { + StoredRow * cur_sr = (StoredRow*) ((char*)buf + i * row_size); + if (i == BATCH_SIZE) { + cur_sr->row_size_ = 8 + 1042*COLUMN_CNT; + } else { + cur_sr->row_size_ = row_size; + } + cur_sr->cnt_ = COLUMN_CNT; + for (int64_t j = 0; j < COLUMN_CNT; j++) { + if (i != BATCH_SIZE) { + int64_t datum_offset = sizeof(ObDatum) * j; + int64_t data_offset = COLUMN_CNT * sizeof(ObDatum) + 8 * j + sizeof(StoredRow); + ObDatum *datum_ptr = (ObDatum *)(cur_sr->payload_ + datum_offset); + int64_t *data_ptr = (int64_t *)((char*)cur_sr + data_offset); + datum_ptr->len_ = 8; + //MEMCPY((void*)&datum_ptr->ptr_, &data_offset, 8); + MEMCPY((void*)&datum_ptr->ptr_, &data_ptr, 8); + *data_ptr = 1; + } else { + // wont't go here + // generate var data + int64_t datum_offset = sizeof(ObDatum) * j; + int64_t data_offset = COLUMN_CNT * sizeof(ObDatum) + 8 * j + sizeof(StoredRow); + ObDatum *datum_ptr = (ObDatum *)(cur_sr->payload_ + datum_offset); + int64_t *data_ptr = (int64_t *)((char*)cur_sr + data_offset); + datum_ptr->len_ = 1030; + //MEMCPY((void*)&datum_ptr->ptr_, &data_offset, 8); + MEMCPY((void*)&datum_ptr->ptr_, &data_ptr, 8); + *data_ptr = 1; + } + } + } + sr = (StoredRow**)buf; + } + + return ret; +} + +int ObStoredRowGenerate::get_stored_row_irregular(StoredRow **&sr) +{ + int ret = OB_SUCCESS; + int64_t data_size = ((sizeof(ObDatum) + 8) * COLUMN_CNT + 8) * BATCH_SIZE; + int32_t row_size = (sizeof(ObDatum) + 8) * COLUMN_CNT + 8; + allocator_.reuse(); + void *buf = allocator_.alloc(data_size); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc buff", K(ret)); + } else { + MEMSET(buf, 0, data_size); + for (int64_t i = 0; i < BATCH_SIZE; i++) + { + StoredRow * cur_sr = (StoredRow*) ((char*)buf + i * row_size); + if (i == BATCH_SIZE) { + cur_sr->row_size_ = 8 + 1042*COLUMN_CNT; + } else { + cur_sr->row_size_ = row_size; + } + cur_sr->cnt_ = COLUMN_CNT; + for (int64_t j = 0; j < COLUMN_CNT; j++) { + if (i != BATCH_SIZE) { + int64_t datum_offset = sizeof(ObDatum) * j; + int64_t data_offset = COLUMN_CNT * sizeof(ObDatum) + 8 * j + sizeof(StoredRow); + ObDatum *datum_ptr = (ObDatum *)(cur_sr->payload_ + datum_offset); + int64_t *data_ptr = (int64_t *)((char*)cur_sr + data_offset); + datum_ptr->len_ = 8; + //MEMCPY((void*)&datum_ptr->ptr_, &data_offset, 8); + MEMCPY((void*)&datum_ptr->ptr_, &data_ptr, 8); + *data_ptr = i * 1024 + j; + } else { + // wont't go here + // generate var data + int64_t datum_offset = sizeof(ObDatum) * j; + int64_t data_offset = COLUMN_CNT * sizeof(ObDatum) + 8 * j + sizeof(StoredRow); + ObDatum *datum_ptr = (ObDatum *)(cur_sr->payload_ + datum_offset); + int64_t *data_ptr = (int64_t *)((char*)cur_sr + data_offset); + datum_ptr->len_ = 1030; + //MEMCPY((void*)&datum_ptr->ptr_, &data_offset, 8); + MEMCPY((void*)&datum_ptr->ptr_, &data_ptr, 8); + *data_ptr = 1; + } + } + } + sr = (StoredRow**)buf; + } + return ret; +} +class TestCompactChunk : public TestDataFilePrepare +{ +public: + TestCompactChunk() : + TestDataFilePrepare(&getter, "TestTmpFile", 2 * 1024 * 1024, 2048) {}; + void SetUp(); + void TearDown(); + +protected: + ObStoredRowGenerate row_generate_; + ObArenaAllocator allocator_; +}; +void TestCompactChunk::SetUp() +{ + int ret = OB_SUCCESS; + const int64_t bucket_num = 1024; + const int64_t max_cache_size = 1024 * 1024 * 1024; + const int64_t block_size = common::OB_MALLOC_BIG_BLOCK_SIZE; + TestDataFilePrepare::SetUp(); + ret = getter.add_tenant(1, + 8L * 1024L * 1024L, 2L * 1024L * 1024L * 1024L); + ASSERT_EQ(OB_SUCCESS, ret); + ret = ObKVGlobalCache::get_instance().init(&getter, bucket_num, max_cache_size, block_size); + if (OB_INIT_TWICE == ret) { + ret = OB_SUCCESS; + } else { + ASSERT_EQ(OB_SUCCESS, ret); + } + // set observer memory limit + CHUNK_MGR.set_limit(8L * 1024L * 1024L * 1024L); + ret = ObTmpFileManager::get_instance().init(); + ASSERT_EQ(OB_SUCCESS, ret); + static ObTenantBase tenant_ctx(1); + ObTenantEnv::set_tenant(&tenant_ctx); + ObTenantIOManager *io_service = nullptr; + EXPECT_EQ(OB_SUCCESS, ObTenantIOManager::mtl_init(io_service)); +} + +void TestCompactChunk::TearDown() +{ + ObTmpFileManager::get_instance().destroy(); + ObKVGlobalCache::get_instance().destroy(); + ObTmpFileStore::get_instance().destroy(); + allocator_.reuse(); + row_generate_.allocator_.reuse(); + TestDataFilePrepare::TearDown(); +} + +TEST_F(TestCompactChunk, test_read_writer) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, true, share::SORT_DEFAULT_LEVEL); + + StoredRow **sr; + ret = row_generate_.get_stored_row(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == 64); + } +} + +TEST_F(TestCompactChunk, test_batch) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, true, share::SORT_DEFAULT_LEVEL); + + StoredRow **sr; + ret = row_generate_.get_stored_row(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == 64); + } +} + +TEST_F(TestCompactChunk, test_read_writer_compact) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, true, share::SORT_COMPACT_LEVEL); + RowMeta row_meta(allocator_); + row_meta.col_cnt_ = COLUMN_CNT; + row_meta.fixed_cnt_ = COLUMN_CNT; + row_meta.var_data_off_ = 8 * row_meta.fixed_cnt_; + row_meta.column_length_.prepare_allocate(COLUMN_CNT); + row_meta.column_offset_.prepare_allocate(COLUMN_CNT); + for (int64_t i = 0; i < COLUMN_CNT; i++) { + if (i != COLUMN_CNT) { + row_meta.column_length_[i] = 8; + row_meta.column_offset_[i] = 8 * i; + } else { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } + } + cs_chunk.set_meta(&row_meta); + + + StoredRow **sr; + ret = row_generate_.get_stored_row(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == 64); + } +} + + +TEST_F(TestCompactChunk, test_read_writer_compact_vardata) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, true, share::SORT_COMPACT_LEVEL); + RowMeta row_meta(allocator_); + row_meta.col_cnt_ = COLUMN_CNT; + row_meta.fixed_cnt_ = 0; + row_meta.var_data_off_ = 0; + row_meta.column_length_.prepare_allocate(COLUMN_CNT); + row_meta.column_offset_.prepare_allocate(COLUMN_CNT); + for (int64_t i = 0; i < COLUMN_CNT; i++) { + if (i != COLUMN_CNT) { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } else { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } + } + cs_chunk.set_meta(&row_meta); + + StoredRow **sr; + ret = row_generate_.get_stored_row(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == 64); + } +} + +TEST_F(TestCompactChunk, test_read_writer_compression) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, true, share::SORT_COMPRESSION_LEVEL, ZSTD_COMPRESSOR); + StoredRow **sr; + ret = row_generate_.get_stored_row(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == 64); + } +} + +TEST_F(TestCompactChunk, test_irregular) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_DEFAULT_LEVEL); + StoredRow **sr; + ret = row_generate_.get_stored_row_irregular(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 1; j++ ) { + int64_t total_res = 0; + //cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + total_res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + OB_ASSERT(total_res == ((1024 * (BATCH_SIZE-1) * BATCH_SIZE * COLUMN_CNT / 2) + BATCH_SIZE * ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } +} + +TEST_F(TestCompactChunk, test_rescan_irregular_1) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_DEFAULT_LEVEL); + StoredRow **sr; + ret = row_generate_.get_stored_row_irregular(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 1; j++ ) { + int64_t total_res = 0; + cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + total_res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + OB_ASSERT(total_res == ((1024 * (BATCH_SIZE-1) * BATCH_SIZE * COLUMN_CNT / 2) + BATCH_SIZE * ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } +} + + +TEST_F(TestCompactChunk, test_rescan) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_DEFAULT_LEVEL); + StoredRow **sr; + ret = row_generate_.get_stored_row(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 10; j++ ) { + cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == 64); + } + } +} + +TEST_F(TestCompactChunk, test_rescan_irregular) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_DEFAULT_LEVEL); + StoredRow **sr; + ret = row_generate_.get_stored_row_irregular(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 2; j++ ) { + int64_t total_res = 0; + cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + total_res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + OB_ASSERT(total_res == ((1024 * (BATCH_SIZE-1) * BATCH_SIZE * COLUMN_CNT / 2) + BATCH_SIZE * ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } +} + +TEST_F(TestCompactChunk, test_rescan_get_last_row) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_DEFAULT_LEVEL); + StoredRow **sr; + ret = row_generate_.get_stored_row_irregular(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + // get last row + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_last_stored_row(cur_sr); + ASSERT_EQ(ret, OB_SUCCESS); + int64_t res = 0; + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(res == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 2; j++ ) { + int64_t total_res = 0; + cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + total_res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + OB_ASSERT(total_res == ((1024 * (BATCH_SIZE-1) * BATCH_SIZE * COLUMN_CNT / 2) + BATCH_SIZE * ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } +} + +TEST_F(TestCompactChunk, test_rescan_get_last_row_compact) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_COMPACT_LEVEL); + RowMeta row_meta(allocator_); + row_meta.col_cnt_ = COLUMN_CNT; + row_meta.fixed_cnt_ = 0; + row_meta.var_data_off_ = 0; + row_meta.column_length_.prepare_allocate(COLUMN_CNT); + row_meta.column_offset_.prepare_allocate(COLUMN_CNT); + for (int64_t i = 0; i < COLUMN_CNT; i++) { + if (i != COLUMN_CNT) { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } else { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } + } + cs_chunk.set_meta(&row_meta); + StoredRow **sr; + ret = row_generate_.get_stored_row_irregular(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ret = cs_chunk.add_row(*tmp_sr); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + // get last row + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_last_stored_row(cur_sr); + ASSERT_EQ(ret, OB_SUCCESS); + int64_t res = 0; + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(res == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 2; j++ ) { + int64_t total_res = 0; + cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + total_res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + OB_ASSERT(total_res == ((1024 * (BATCH_SIZE-1) * BATCH_SIZE * COLUMN_CNT / 2) + BATCH_SIZE * ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } +} + +TEST_F(TestCompactChunk, test_rescan_add_storagedatum) +{ + int ret = OB_SUCCESS; + ObCompactStore cs_chunk; + cs_chunk.init(1, 1, + ObCtxIds::DEFAULT_CTX_ID, "SORT_CACHE_CTX", true, 0, false/*disable trunc*/, share::SORT_COMPACT_LEVEL); + RowMeta row_meta(allocator_); + row_meta.col_cnt_ = COLUMN_CNT; + row_meta.fixed_cnt_ = 0; + row_meta.var_data_off_ = 0; + row_meta.column_length_.prepare_allocate(COLUMN_CNT); + row_meta.column_offset_.prepare_allocate(COLUMN_CNT); + for (int64_t i = 0; i < COLUMN_CNT; i++) { + if (i != COLUMN_CNT) { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } else { + row_meta.column_length_[i] = 0; + row_meta.column_offset_[i] = 0; + } + } + cs_chunk.set_meta(&row_meta); + StoredRow **sr; + ret = row_generate_.get_stored_row_irregular(sr); + ASSERT_EQ(ret, OB_SUCCESS); + + char *buf = reinterpret_cast(sr); + int64_t pos = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + StoredRow *tmp_sr = (StoredRow *)(buf + pos); + ObStorageDatum ssr[COLUMN_CNT]; + for (int64_t k = 0; OB_SUCC(ret) && k < COLUMN_CNT; k++) { + ssr[k].shallow_copy_from_datum(tmp_sr->cells()[k]); + } + ret = cs_chunk.add_row(ssr, COLUMN_CNT, 0); + ASSERT_EQ(ret, OB_SUCCESS); + pos += tmp_sr->row_size_; + // get last row + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_last_stored_row(cur_sr); + ASSERT_EQ(ret, OB_SUCCESS); + int64_t res = 0; + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(res == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + + ret = cs_chunk.finish_add_row(); + ASSERT_EQ(ret, OB_SUCCESS); + for (int j = 0; OB_SUCC(ret) && j < 2; j++ ) { + int64_t total_res = 0; + cs_chunk.rescan(); + for (int64_t i = 0; OB_SUCC(ret) && i < BATCH_SIZE; i++) { + int64_t result = 0; + const StoredRow *cur_sr = nullptr; + ret = cs_chunk.get_next_row(cur_sr); + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + ASSERT_EQ(ret, OB_SUCCESS); + for (int64_t k = 0; k < cur_sr->cnt_; k++) { + ObDatum cur_cell = cur_sr->cells()[k]; + result += *(int64_t *)(cur_cell.ptr_); + total_res += *(int64_t *)(cur_cell.ptr_); + } + OB_ASSERT(result == ((1024 * i * COLUMN_CNT) + ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } + OB_ASSERT(total_res == ((1024 * (BATCH_SIZE-1) * BATCH_SIZE * COLUMN_CNT / 2) + BATCH_SIZE * ((COLUMN_CNT - 1) * COLUMN_CNT / 2))); + } +} + +} + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + system("rm -rf test_ddl_compact_store.log*"); + OB_LOGGER.set_log_level("INFO"); + OB_LOGGER.set_file_name("test_ddl_compact_store.log", true); + //testing::FLAGS_gtest_filter = "TestCompactChunk.test_dump_one_block"; + return RUN_ALL_TESTS(); +} diff --git a/unittest/storage/ddl/test_ddl_kv.cpp b/unittest/storage/ddl/test_ddl_kv.cpp index 8a65891ac..cc6f391b1 100644 --- a/unittest/storage/ddl/test_ddl_kv.cpp +++ b/unittest/storage/ddl/test_ddl_kv.cpp @@ -22,6 +22,7 @@ #include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" #include "storage/blocksstable/ob_row_generate.h" #include "storage/blocksstable/ob_data_file_prepare.h" +#include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #undef private namespace oceanbase @@ -154,7 +155,8 @@ TEST_F(TestBlockMetaTree, random_keybtree) for (int64_t i = 0; i < 10; ++i) { ASSERT_OK(meta_tree.block_tree_.init()); meta_tree.is_inited_ = true; - ASSERT_OK(meta_tree.data_desc_.assign(data_desc_.get_desc())); + ASSERT_OK(meta_tree.data_desc_.assign(data_desc_)); + meta_tree.datum_utils_ = &meta_tree.data_desc_.get_desc().get_datum_utils(); for (int64_t j = 0; j < 10000; ++j) { void *buf = allocator_.alloc(sizeof(ObDatumRow)); ASSERT_TRUE(nullptr != buf); @@ -195,13 +197,13 @@ TEST_F(TestBlockMetaTree, random_keybtree) //for (int64_t i = 1; i <= 5; ++i) { //void *buf = arena.alloc(sizeof(ObSSTable)); //ObSSTable *tmp_sstable = new (buf) ObSSTable(); - //tmp_sstable->key_.table_type_ = ObITable::DDL_DUMP_SSTABLE; + //tmp_sstable->key_.table_type_ = ObITable::DDL_MERGE_CO_SSTABLE; //tmp_sstable->key_.scn_range_.start_scn_ = SCN::plus(SCN::min_scn(), 10 * i); //tmp_sstable->key_.scn_range_.end_scn_ = SCN::plus(SCN::min_scn(), 10 * (i + 1)); //ASSERT_OK(ddl_sstables.push_back(tmp_sstable)); //} //ObSSTable compact_sstable; - //compact_sstable.key_.table_type_ = ObITable::DDL_DUMP_SSTABLE; + //compact_sstable.key_.table_type_ = ObITable::DDL_MERGE_CO_SSTABLE; //compact_sstable.key_.scn_range_.start_scn_ = SCN::plus(SCN::min_scn(), 10); //compact_sstable.key_.scn_range_.end_scn_ = SCN::plus(SCN::min_scn(), 60); diff --git a/unittest/storage/tablet/test_tablet.cpp b/unittest/storage/tablet/test_tablet.cpp index c9cd25bb5..f1799f9f7 100644 --- a/unittest/storage/tablet/test_tablet.cpp +++ b/unittest/storage/tablet/test_tablet.cpp @@ -390,7 +390,7 @@ public: virtual ~TestTablet(); virtual void SetUp(); virtual void TearDown(); - void pull_ddl_memtables(ObIArray &ddl_kvs) + void pull_ddl_memtables(ObIArray &ddl_kvs) { for (int64_t i = 0; i < ddl_kv_count_; ++i) { ASSERT_EQ(OB_SUCCESS, ddl_kvs.push_back(ddl_kvs_[i])); @@ -400,7 +400,7 @@ public: void reproducing_bug(); private: ObArenaAllocator allocator_; - ObITable **ddl_kvs_; + ObDDLKV **ddl_kvs_; volatile int64_t ddl_kv_count_; }; @@ -544,7 +544,7 @@ public: int init(ObArenaAllocator &allocator, TestTablet &tablet) { int ret = OB_SUCCESS; - ObArray ddl_kvs; + ObArray ddl_kvs; tablet.pull_ddl_memtables(ddl_kvs); ret = ddl_kvs_.init(allocator, ddl_kvs); const int64_t count = ddl_kvs_.count(); @@ -554,10 +554,9 @@ public: } void reproducing_bug(ObArenaAllocator &allocator) { - ObArray ddl_kvs; + ObArray ddl_kvs; for (int64_t i = 0; i < 3; ++i) { - ObITable *ddl_kv = new ObDDLKV(); - ddl_kv->key_.table_type_ = ObITable::TableType::DDL_MEM_SSTABLE; + ObDDLKV *ddl_kv = new ObDDLKV(); ddl_kvs.push_back(ddl_kv); } ddl_kvs_.init(allocator, ddl_kvs); @@ -573,14 +572,11 @@ void TestTablet::reproducing_bug() { int ret = OB_SUCCESS; ObTabletComplexAddr table_store_addr; - ddl_kvs_ = static_cast(allocator_.alloc(sizeof(ObITable*) * ObTablet::DDL_KV_ARRAY_SIZE)); + ddl_kvs_ = static_cast(allocator_.alloc(sizeof(ObDDLKV*) * ObTablet::DDL_KV_ARRAY_SIZE)); ASSERT_TRUE(nullptr != ddl_kvs_); ddl_kvs_[0] = new ObDDLKV(); - ddl_kvs_[0]->key_.table_type_ = ObITable::TableType::DDL_MEM_SSTABLE; ddl_kvs_[1] = new ObDDLKV(); - ddl_kvs_[1]->key_.table_type_ = ObITable::TableType::DDL_MEM_SSTABLE; ddl_kvs_[2] = new ObDDLKV(); - ddl_kvs_[2]->key_.table_type_ = ObITable::TableType::DDL_MEM_SSTABLE; std::cout<< "reproducing_bug 1:" << ddl_kv_count_ << std::endl; ddl_kv_count_ = 3; std::cout<< "reproducing_bug 2:" << ddl_kv_count_ << std::endl; diff --git a/unittest/storage/test_compaction_policy.cpp b/unittest/storage/test_compaction_policy.cpp index 32b69e7ca..b35857c13 100644 --- a/unittest/storage/test_compaction_policy.cpp +++ b/unittest/storage/test_compaction_policy.cpp @@ -396,12 +396,12 @@ int TestCompactionPolicy::mock_tablet( ObTablet *tablet = nullptr; ObTableHandleV2 table_handle; - bool need_empty_major_table = false; ObLSHandle ls_handle; ObLSService *ls_svr = nullptr; ObArenaAllocator arena_allocator; ObCreateTabletSchema create_tablet_schema; + bool need_empty_major_table = false; if (OB_ISNULL(t3m)) { ret = OB_ERR_UNEXPECTED; @@ -416,10 +416,11 @@ int TestCompactionPolicy::mock_tablet( LOG_WARN("failed to acquire tablet", K(ret), K(key)); } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { } else if (OB_FAIL(create_tablet_schema.init(arena_allocator, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, need_empty_major_table/*need_create_empty_major*/))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(tablet->init_for_first_time_creation(allocator, ls_id, tablet_id, tablet_id, - SCN::min_scn(), snapshot_version, create_tablet_schema, need_empty_major_table, ls_handle.get_ls()->get_freezer()))) { + SCN::min_scn(), snapshot_version, create_tablet_schema, ls_handle.get_ls()->get_freezer()))) { LOG_WARN("failed to init tablet", K(ret), K(ls_id), K(tablet_id), K(snapshot_version), K(table_schema), K(compat_mode)); } else { diff --git a/unittest/storage/test_lob_common.h b/unittest/storage/test_lob_common.h index d7f709ce8..12dba97e6 100644 --- a/unittest/storage/test_lob_common.h +++ b/unittest/storage/test_lob_common.h @@ -332,7 +332,8 @@ int TestLobCommon::build_lob_tablet_arg( } else if (OB_FAIL(tablet_schema_index_array.push_back(2))) { STORAGE_LOG(WARN, "failed to push index into array", K(ret)); } else if (OB_FAIL(tablet_info.init(tablet_id_array, data_tablet_id, tablet_schema_index_array, - lib::get_compat_mode(), false/*is_create_bind_hidden_tablets*/))) { + lib::get_compat_mode(), false/*is_create_bind_hidden_tablets*/, + true /*need_create_empty_major_sstable*/))) { STORAGE_LOG(WARN, "failed to init tablet info", K(ret), K(tablet_id_array), K(data_tablet_id), K(tablet_schema_index_array)); } else if (OB_FAIL(arg.init_create_tablet(ls_id, share::SCN::min_scn(), false/*need_check_tablet_cnt*/))) { diff --git a/unittest/storage/test_tablet_helper.h b/unittest/storage/test_tablet_helper.h index 22dda0971..97fae43bc 100644 --- a/unittest/storage/test_tablet_helper.h +++ b/unittest/storage/test_tablet_helper.h @@ -118,7 +118,8 @@ inline int TestTabletHelper::create_tablet( prepare_sstable_param(tablet_id, table_schema, param); void *buff = nullptr; if (OB_FAIL(create_tablet_schema.init(schema_allocator, table_schema, compat_mode, - false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { + false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3, + 0/*tenant_data_version, default val*/, true/*need_create_empty_major*/))) { STORAGE_LOG(WARN, "failed to init storage schema", KR(ret), K(table_schema)); } else if (OB_FAIL(ObSSTableMergeRes::fill_column_checksum_for_empty_major(param.column_cnt_, param.column_checksums_))) { STORAGE_LOG(WARN, "fill column checksum failed", K(ret), K(param)); @@ -133,7 +134,7 @@ inline int TestTabletHelper::create_tablet( } else if (OB_FAIL(tablet_handle.get_obj()->init_for_first_time_creation( *tablet_handle.get_allocator(), ls_id, tablet_id, tablet_id, share::SCN::base_scn(), - snapshot_version, create_tablet_schema, true, freezer))){ + snapshot_version, create_tablet_schema, freezer))){ STORAGE_LOG(WARN, "failed to init tablet", K(ret), K(ls_id), K(tablet_id)); } else if (ObTabletStatus::Status::MAX != tablet_status) { ObTabletCreateDeleteMdsUserData data;