diff --git a/deps/oblib/src/common/ob_member.cpp b/deps/oblib/src/common/ob_member.cpp index 39499ce39..3dea591bf 100644 --- a/deps/oblib/src/common/ob_member.cpp +++ b/deps/oblib/src/common/ob_member.cpp @@ -86,6 +86,21 @@ void ObMember::reset_migrating() flag_ &= ~(1UL << MIGRATING_FLAG_BIT); } +bool ObMember::is_columnstore() const +{ + return (flag_ >> COLUMNSTORE_FLAG_BIT) & 1U; +} + +void ObMember::set_columnstore() +{ + flag_ |= (1UL << COLUMNSTORE_FLAG_BIT); +} + +void ObMember::reset_columnstore() +{ + flag_ &= ~(1UL << COLUMNSTORE_FLAG_BIT); +} + OB_SERIALIZE_MEMBER(ObMember, server_, timestamp_, flag_); bool ObReplicaMember::is_readonly_replica() const @@ -93,19 +108,63 @@ bool ObReplicaMember::is_readonly_replica() const return REPLICA_TYPE_READONLY == replica_type_; } +int ObReplicaMember::init( + const common::ObAddr &server, + const int64_t timestamp, + const common::ObReplicaType replica_type) +{ + int ret = OB_SUCCESS; + reset(); + if (OB_UNLIKELY(!server.is_valid() + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type))) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid argument", K(ret), K(server), K(replica_type)); + } else { + server_ = server; + timestamp_ = timestamp; + replica_type_ = replica_type; + if (REPLICA_TYPE_COLUMNSTORE == replica_type) { + ObMember::set_columnstore(); + } + } + return ret; +} + +int ObReplicaMember::init( + const ObMember &member, + const common::ObReplicaType replica_type) +{ + int ret = OB_SUCCESS; + reset(); + if (OB_UNLIKELY(!member.is_valid() + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type))) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid argument", K(ret), K(member), K(replica_type)); + } else if (OB_FAIL(ObMember::assign(member))) { + COMMON_LOG(WARN, "failed to assign member", K(ret), K(member)); + } else if (OB_FALSE_IT(replica_type_ = replica_type)) { + // should never be here + } else if (OB_UNLIKELY(! is_valid())) { // check flag_ and replica_type_ correct + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid argument", K(ret), K(member), K(replica_type), KPC(this)); + } + return ret; +} + void ObReplicaMember::reset() { ObMember::reset(); replica_type_ = REPLICA_TYPE_FULL; - region_ = DEFAULT_REGION_NAME; memstore_percent_ = 100; } bool ObReplicaMember::is_valid() const { + // columnstore bit is 1 if and only if replica_type is C + bool is_flag_valid = (is_columnstore() == (REPLICA_TYPE_COLUMNSTORE == replica_type_)); return ObMember::is_valid() && ObReplicaTypeCheck::is_replica_type_valid(replica_type_) - && !region_.is_empty() + && is_flag_valid && memstore_percent_ <= 100 && memstore_percent_ >= 0; } @@ -115,46 +174,6 @@ common::ObReplicaType ObReplicaMember::get_replica_type() const return replica_type_; } -int ObReplicaMember::set_replica_type(const common::ObReplicaType replica_type) -{ - int ret = OB_SUCCESS; - if (!ObReplicaTypeCheck::is_replica_type_valid(replica_type)) { - ret = OB_INVALID_ARGUMENT; - } else { - replica_type_ = replica_type; - } - return ret; -} - -const common::ObRegion &ObReplicaMember::get_region() const -{ - return region_; -} - -int ObReplicaMember::set_member(const ObMember &member) -{ - int ret = OB_SUCCESS; - - if (!member.is_valid()) { - ret = OB_INVALID_ARGUMENT; - COMMON_LOG(WARN, "invalid args", K(ret), K(member)); - } else if (OB_FAIL(ObMember::assign(member))) { - COMMON_LOG(WARN, "failed to assign member", K(ret), K(member)); - } - return ret; -} - -int ObReplicaMember::set_region(const common::ObRegion ®ion) -{ - int ret = OB_SUCCESS; - if (region.is_empty()) { - ret = OB_INVALID_ARGUMENT; - } else { - region_ = region; - } - return ret; -} - ObReplicaMember &ObReplicaMember::operator=(const ObReplicaMember &rhs) { server_ = rhs.server_; diff --git a/deps/oblib/src/common/ob_member.h b/deps/oblib/src/common/ob_member.h index 25b8d005f..91e5ceec1 100644 --- a/deps/oblib/src/common/ob_member.h +++ b/deps/oblib/src/common/ob_member.h @@ -49,11 +49,16 @@ public: void set_migrating(); void reset_migrating(); + bool is_columnstore() const; + void set_columnstore(); + void reset_columnstore(); + TO_STRING_KV(K_(server), K_(timestamp), K_(flag)); TO_YSON_KV(OB_Y_(server), OB_ID(t), timestamp_, OB_Y_(flag)); OB_UNIS_VERSION(1); protected: static const int64_t MIGRATING_FLAG_BIT = 1; + static const int64_t COLUMNSTORE_FLAG_BIT = 0; common::ObAddr server_; int64_t timestamp_; int64_t flag_; @@ -90,61 +95,46 @@ inline int member_to_string(const common::ObMember &member, ObSqlString &member_ class ObReplicaMember : public ObMember { public: + // default constructor ObReplicaMember() : ObMember(), replica_type_(REPLICA_TYPE_FULL), - region_(DEFAULT_REGION_NAME), memstore_percent_(100) {} + // construct with only server and timestamp, when we don't know or care about replica_type + // TODO(cangming.zl): remove this constructor when DRTask do not use it. ObReplicaMember(const common::ObAddr &server, const int64_t timestamp) - : ObMember(server, timestamp), + : ObMember(ObMember(server, timestamp)), replica_type_(REPLICA_TYPE_FULL), - region_(DEFAULT_REGION_NAME), - memstore_percent_(100) - {} - ObReplicaMember(const ObMember &member) - : ObMember(member), - replica_type_(REPLICA_TYPE_FULL), - region_(DEFAULT_REGION_NAME), - memstore_percent_(100) - {} - /* After the subsequent type conversion code is completed, remove the constructor */ - ObReplicaMember(const common::ObAddr &server, - const int64_t timestamp, - const common::ObReplicaType replica_type) - : ObMember(server, timestamp), - replica_type_(replica_type), - region_(DEFAULT_REGION_NAME), memstore_percent_(100) {} + // construct with server, timestamp and replica_type, + // this func will set columnstore flag if replica_type is C. ObReplicaMember(const common::ObAddr &server, const int64_t timestamp, const common::ObReplicaType replica_type, - const int64_t memstore_percent) - : ObMember(server, timestamp), + const int64_t memstore_percent = 100) + : ObMember(ObMember(server, timestamp)), replica_type_(replica_type), - region_(DEFAULT_REGION_NAME), memstore_percent_(memstore_percent) - {} - ObReplicaMember(const common::ObAddr &server, - const int64_t timestamp, - const common::ObReplicaType replica_type, - const common::ObRegion ®ion, - const int64_t memstore_percent) - : ObMember(server, timestamp), - replica_type_(replica_type), - region_(region), - memstore_percent_(memstore_percent) - {} + { + if (REPLICA_TYPE_COLUMNSTORE == replica_type) { + ObMember::set_columnstore(); + } + } public: + // init with server, timestamp, replica_type. + // this func will set columnstore flag if replica_type is C. + int init(const common::ObAddr &server, + const int64_t timestamp, + const common::ObReplicaType replica_type); + // init with existing member whose flag_ may have been set. + // this function will check whether flag_ is consistent with replica_type. + int init(const ObMember &member, + const common::ObReplicaType replica_type); common::ObReplicaType get_replica_type() const; - int set_replica_type(const common::ObReplicaType replica_type); - const common::ObRegion &get_region() const; - int set_region(const common::ObRegion ®ion); - int set_member(const ObMember &member); int64_t get_memstore_percent() const { return memstore_percent_; } - void set_memstore_percent(const int64_t memstore_percent) { memstore_percent_ = memstore_percent; } virtual void reset(); virtual bool is_valid() const; virtual bool is_readonly_replica() const; @@ -154,8 +144,8 @@ public: OB_UNIS_VERSION(1); private: common::ObReplicaType replica_type_; - common::ObRegion region_; - int64_t memstore_percent_; + int64_t memstore_percent_; // obsolate, only as placeholder + common::ObRegion region_ = DEFAULT_REGION_NAME; // obsolate, only as placeholder }; } // namespace common } // namespace oceanbase diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 5b98c87a3..457f8d571 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -2084,17 +2084,19 @@ enum ObFreezeStatus }; /* - * |---- 2 bits ---|--- 4 bits ---|--- 2 bits ---|--- 2 bits ---| LSB - * |-- encryption--|--- clog ---|-- SSStore ---|--- MemStore--| LSB + * |---- 2 bits ---|---- 2 bits ---|--- 4 bits ---|--- 2 bits ---|--- 2 bits ---| LSB + * |--column-store-|-- encryption--|--- clog ---|-- SSStore ---|--- MemStore--| LSB */ const int64_t MEMSTORE_BITS_SHIFT = 0; const int64_t SSSTORE_BITS_SHIFT = 2; const int64_t CLOG_BITS_SHIFT = 4; const int64_t ENCRYPTION_BITS_SHIFT = 8; +const int64_t COLUMNSTORE_BITS_SHIFT = 10; const int64_t REPLICA_TYPE_MEMSTORE_MASK = (0x3UL << MEMSTORE_BITS_SHIFT); const int64_t REPLICA_TYPE_SSSTORE_MASK = (0x3UL << SSSTORE_BITS_SHIFT); const int64_t REPLICA_TYPE_CLOG_MASK = (0xFUL << CLOG_BITS_SHIFT); const int64_t REPLICA_TYPE_ENCRYPTION_MASK = (0x3UL << ENCRYPTION_BITS_SHIFT); +const int64_t REPLICA_TYPE_COLUMNSTORE_MASK = (0x3UL << COLUMNSTORE_BITS_SHIFT); // replica type associated with memstore const int64_t WITH_MEMSTORE = 0; const int64_t WITHOUT_MEMSTORE = 1; @@ -2107,16 +2109,22 @@ const int64_t ASYNC_CLOG = 1 << CLOG_BITS_SHIFT; // replica type associated with encryption const int64_t WITHOUT_ENCRYPTION = 0 << ENCRYPTION_BITS_SHIFT; const int64_t WITH_ENCRYPTION = 1 << ENCRYPTION_BITS_SHIFT; +// replica type associated with columnstore +const int64_t NOT_COLUMNSTORE = 0 << COLUMNSTORE_BITS_SHIFT; +const int64_t COLUMNSTORE = 1 << COLUMNSTORE_BITS_SHIFT; // tracepoint, refer to OB_MAX_CONFIG_xxx const int64_t OB_MAX_TRACEPOINT_NAME_LEN = 128; const int64_t OB_MAX_TRACEPOINT_DESCRIBE_LEN = 4096; -// Need to manually maintain the replica_type_to_str function in utility.cpp, -// Currently there are only three types: REPLICA_TYPE_FULL, REPLICA_TYPE_READONLY, and REPLICA_TYPE_LOGONLY +// Please modify the replica_type_to_string and string_to_replica_type function +// in ob_share_util.cpp when adding new replica_type. enum ObReplicaType { - // Almighty copy: is a member of paxos; has ssstore; has memstore + // Invalid replica_type, value of which is -1. + // Attention: Please DO use REPLICA_TYPE_INVALID as initial value. DO NOT use REPLICA_TYPE_MAX. + REPLICA_TYPE_INVALID = -1, + // Fully functional copy: is a member of paxos; has ssstore; has memstore REPLICA_TYPE_FULL = (SYNC_CLOG | WITH_SSSTORE | WITH_MEMSTORE), // 0 // Backup copy: Paxos member; ssstore; no memstore REPLICA_TYPE_BACKUP = (SYNC_CLOG | WITH_SSSTORE | WITHOUT_MEMSTORE), // 1 @@ -2133,54 +2141,44 @@ enum ObReplicaType REPLICA_TYPE_ARBITRATION = (ASYNC_CLOG | WITHOUT_SSSTORE | WITHOUT_MEMSTORE), // 21 // Encrypted log copy: encrypted; paxos member; no sstore; no memstore REPLICA_TYPE_ENCRYPTION_LOGONLY = (WITH_ENCRYPTION | SYNC_CLOG | WITHOUT_SSSTORE | WITHOUT_MEMSTORE), // 261 - // invalid value + // Column-store copy: column-store, not a member of paxos; ssstore; memstore + REPLICA_TYPE_COLUMNSTORE = (COLUMNSTORE | ASYNC_CLOG | WITH_SSSTORE | WITH_MEMSTORE), // 1040 + // max value REPLICA_TYPE_MAX, }; -static inline int replica_type_to_string(const ObReplicaType replica_type, char *name_str, const int64_t str_len) -{ - int ret = OB_SUCCESS; - switch(replica_type) { - case REPLICA_TYPE_FULL: { - strncpy(name_str ,"FULL", str_len); - break; - } - case REPLICA_TYPE_BACKUP: { - strncpy(name_str ,"BACKUP", str_len); - break; - } - case REPLICA_TYPE_LOGONLY: { - strncpy(name_str ,"LOGONLY", str_len); - break; - } - case REPLICA_TYPE_READONLY: { - strncpy(name_str ,"READONLY", str_len); - break; - } - case REPLICA_TYPE_MEMONLY: { - strncpy(name_str ,"MEMONLY", str_len); - break; - } - case REPLICA_TYPE_ENCRYPTION_LOGONLY: { - strncpy(name_str ,"ENCRYPTION_LOGONLY", str_len); - break; - } - default: { - ret = OB_INVALID_ARGUMENT; - strncpy(name_str ,"INVALID", str_len); - break; - } // default - } // switch - return ret; -} +// full replica +const char *const FULL_REPLICA_STR = "FULL"; +const char *const F_REPLICA_STR = "F"; +// logonly replica +const char *const LOGONLY_REPLICA_STR = "LOGONLY"; +const char *const L_REPLICA_STR = "L"; +// backup replica +const char *const BACKUP_REPLICA_STR = "BACKUP"; +const char *const B_REPLICA_STR = "B"; +// readonly replica +const char *const READONLY_REPLICA_STR = "READONLY"; +const char *const R_REPLICA_STR = "R"; +// memonly replica +const char *const MEMONLY_REPLICA_STR = "MEMONLY"; +const char *const M_REPLICA_STR = "M"; +// encryption logonly replica +const char *const ENCRYPTION_LOGONLY_REPLICA_STR = "ENCRYPTION_LOGONLY"; +const char *const E_REPLICA_STR = "E"; +// columnstore replica +const char *const COLUMNSTORE_REPLICA_STR = "COLUMNSTORE"; +const char *const C_REPLICA_STR = "C"; class ObReplicaTypeCheck { public: + // Currently only three types are valid, + // including REPLICA_TYPE_FULL, REPLICA_TYPE_READONLY, and REPLICA_TYPE_COLUMNSTORE static bool is_replica_type_valid(const int32_t replica_type) { return REPLICA_TYPE_FULL == replica_type - || REPLICA_TYPE_READONLY == replica_type; + || REPLICA_TYPE_READONLY == replica_type + || REPLICA_TYPE_COLUMNSTORE == replica_type; } static bool is_can_elected_replica(const int32_t replica_type) { @@ -2194,6 +2192,10 @@ public: { return (REPLICA_TYPE_READONLY == replica_type); } + static bool is_columnstore_replica(const int32_t replica_type) + { + return (REPLICA_TYPE_COLUMNSTORE == replica_type); + } static bool is_log_replica(const int32_t replica_type) { return (REPLICA_TYPE_LOGONLY == replica_type || REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type); @@ -2208,13 +2210,18 @@ public: return (replica_type >= REPLICA_TYPE_FULL && replica_type <= REPLICA_TYPE_LOGONLY) || (REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type); } + static bool is_non_paxos_replica(const int32_t replica_type) + { + return (REPLICA_TYPE_READONLY == replica_type || REPLICA_TYPE_COLUMNSTORE == replica_type); + } static bool is_writable_replica(const int32_t replica_type) { return (REPLICA_TYPE_FULL == replica_type); } static bool is_readable_replica(const int32_t replica_type) { - return (REPLICA_TYPE_FULL == replica_type || REPLICA_TYPE_READONLY == replica_type); + return (REPLICA_TYPE_FULL == replica_type || REPLICA_TYPE_READONLY == replica_type + || REPLICA_TYPE_COLUMNSTORE == replica_type); } static bool is_replica_with_memstore(const ObReplicaType replica_type) { @@ -2228,15 +2235,11 @@ public: { return (REPLICA_TYPE_FULL == replica_type || REPLICA_TYPE_READONLY == replica_type); } - static bool can_as_data_source(const int32_t dest_replica_type, const int32_t src_replica_type) - { - return (dest_replica_type == src_replica_type - || REPLICA_TYPE_FULL == src_replica_type); // TODO temporarily only supports the same type or F as the data source - } //Currently only copies of F and R can be used for machine reading, not L static bool can_slave_read_replica(const int32_t replica_type) { - return (REPLICA_TYPE_FULL == replica_type || REPLICA_TYPE_READONLY == replica_type); + return (REPLICA_TYPE_FULL == replica_type || REPLICA_TYPE_READONLY == replica_type + || REPLICA_TYPE_COLUMNSTORE == replica_type); } static bool change_replica_op_allow(const ObReplicaType source, const ObReplicaType target) @@ -2245,6 +2248,8 @@ public: if (REPLICA_TYPE_LOGONLY == source || REPLICA_TYPE_LOGONLY == target) { bool_ret = false; + } else if (REPLICA_TYPE_COLUMNSTORE == source || REPLICA_TYPE_COLUMNSTORE == target) { + bool_ret = false; } else if (REPLICA_TYPE_FULL == source) { bool_ret = true; } else if (REPLICA_TYPE_READONLY == source && REPLICA_TYPE_FULL == target) { diff --git a/deps/oblib/src/lib/utility/utility.cpp b/deps/oblib/src/lib/utility/utility.cpp index 3dbe3d4bb..598592531 100644 --- a/deps/oblib/src/lib/utility/utility.cpp +++ b/deps/oblib/src/lib/utility/utility.cpp @@ -1658,33 +1658,6 @@ int long_to_str10(int64_t val,char *dst, const int64_t buf_len, const bool is_si //////////////////////////////////////////////////////////////////////////////////////////////////// - -const char *replica_type_to_str(const ObReplicaType &type) -{ - const char *str = ""; - - switch (type) { - case REPLICA_TYPE_FULL: - str = "REPLICA_TYPE_FULL"; - break; - case REPLICA_TYPE_BACKUP: - str = "REPLICA_TYPE_BACKUP"; - break; - case REPLICA_TYPE_LOGONLY: - str = "REPLICA_TYPE_LOGONLY"; - break; - case REPLICA_TYPE_READONLY: - str = "REPLICA_TYPE_READONLY"; - break; - case REPLICA_TYPE_MEMONLY: - str = "REPLICA_TYPE_MEMONLY"; - break; - default: - str = "REPLICA_TYPE_UNKNOWN"; - } - return str; -} - bool ez2ob_addr(ObAddr &addr, easy_addr_t& ez) { bool ret = false; diff --git a/deps/oblib/src/lib/utility/utility.h b/deps/oblib/src/lib/utility/utility.h index a049aa222..93dc35660 100644 --- a/deps/oblib/src/lib/utility/utility.h +++ b/deps/oblib/src/lib/utility/utility.h @@ -1273,8 +1273,6 @@ private: void get_addr_by_proxy_sessid(const uint64_t session_id, ObAddr &addr); -const char *replica_type_to_str(const ObReplicaType &type); - int ob_atoll(const char *str, int64_t &res); int ob_strtoll(const char *str, char *&endptr, int64_t &res); int ob_strtoull(const char *str, char *&endptr, uint64_t &res); diff --git a/mittest/mtlenv/storage/test_ls_migration_param.cpp b/mittest/mtlenv/storage/test_ls_migration_param.cpp index 6f7e162dc..e03038d4c 100644 --- a/mittest/mtlenv/storage/test_ls_migration_param.cpp +++ b/mittest/mtlenv/storage/test_ls_migration_param.cpp @@ -253,7 +253,7 @@ TEST_F(TestLSMigrationParam, test_migrate_tablet_param) SCN scn; scn.convert_from_ts(ObTimeUtility::current_time()); ret = src_handle.get_obj()->init_for_first_time_creation(allocator_, src_key.ls_id_, src_key.tablet_id_, src_key.tablet_id_, - scn, 2022, create_tablet_schema, true/*need_create_empty_major_sstable*/, ls_handle.get_ls()->get_freezer()); + scn, 2022, create_tablet_schema, true/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, ls_handle.get_ls()->get_freezer()); ASSERT_EQ(common::OB_SUCCESS, ret); share::SCN create_commit_scn; @@ -334,7 +334,7 @@ TEST_F(TestLSMigrationParam, test_migration_param_compat) SCN scn; scn.convert_from_ts(ObTimeUtility::current_time()); ret = src_handle.get_obj()->init_for_first_time_creation(allocator_, src_key.ls_id_, src_key.tablet_id_, src_key.tablet_id_, - scn, 2022, create_tablet_schema, true/*need_create_empty_major_sstable*/, ls_handle.get_ls()->get_freezer()); + scn, 2022, create_tablet_schema, true/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, ls_handle.get_ls()->get_freezer()); ASSERT_EQ(common::OB_SUCCESS, ret); share::SCN create_commit_scn; diff --git a/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp b/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp index 67b2a25ee..3461b2930 100644 --- a/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp +++ b/mittest/mtlenv/storage/test_ls_tablet_info_writer_and_reader.cpp @@ -170,7 +170,7 @@ void TestLSTabletInfoWR::fill_tablet_meta() SCN scn; scn.convert_from_ts(ObTimeUtility::current_time()); ret = src_handle.get_obj()->init_for_first_time_creation(arena_allocator_, src_key.ls_id_, src_key.tablet_id_, src_key.tablet_id_, - scn, 2022, create_tablet_schema, true/*need_create_empty_major_sstable*/, ls_handle.get_ls()->get_freezer()); + scn, 2022, create_tablet_schema, true/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, ls_handle.get_ls()->get_freezer()); ASSERT_EQ(common::OB_SUCCESS, ret); share::SCN create_commit_scn; diff --git a/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp b/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp index d5c29649b..91cb6d6b1 100644 --- a/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp +++ b/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp @@ -141,6 +141,7 @@ void TestTableScanPureDataTable::insert_data_to_tablet(MockObAccessService *acce share::schema::ObTableSchema table_schema; TestDmlCommon::build_data_table_schema(tenant_id_, table_schema); + table_schema.set_tablet_id(tablet_id_); ObSEArray index_schema_array; @@ -181,6 +182,7 @@ void TestTableScanPureDataTable::table_scan( // prepare table schema share::schema::ObTableSchema table_schema; TestDmlCommon::build_data_table_schema(tenant_id_, table_schema); + table_schema.set_tablet_id(tablet_id_); // 1. get tx desc transaction::ObTxDesc *tx_desc = nullptr; diff --git a/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp b/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp index a02718ad8..c6e8eeacc 100644 --- a/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp +++ b/mittest/mtlenv/storage/test_tenant_meta_mem_mgr.cpp @@ -718,7 +718,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_tablet) ObTabletID empty_tablet_id; ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, - create_scn, create_scn.get_val_for_tx(), create_tablet_schema, true/*need_create_empty_major_sstable*/, &freezer); + create_scn, create_scn.get_val_for_tx(), create_tablet_schema, true/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); ObTabletPersister persister; @@ -817,7 +817,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_inner_tablet) bool make_empty_co_sstable = true; ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, create_scn, create_scn.get_val_for_tx(), create_tablet_schema, - make_empty_co_sstable/*need_create_empty_major_sstable*/, &freezer); + make_empty_co_sstable/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); @@ -927,7 +927,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_no_sstable_tablet) bool make_empty_co_sstable = false; ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, create_scn, create_scn.get_val_for_tx(), create_tablet_schema, - make_empty_co_sstable/*need_create_empty_major_sstable*/, &freezer); + make_empty_co_sstable/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); @@ -1024,7 +1024,7 @@ TEST_F(TestTenantMetaMemMgr, test_get_tablet_with_allocator) bool make_empty_co_sstable = true; ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, create_scn, create_scn.get_val_for_tx(), create_tablet_schema, - make_empty_co_sstable/*need_create_empty_major_sstable*/, &freezer); + make_empty_co_sstable/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); @@ -1152,7 +1152,7 @@ TEST_F(TestTenantMetaMemMgr, test_wash_mem_tablet) bool make_empty_co_sstable = false; ret = tablet->init_for_first_time_creation(allocator_, ls_id_, tablet_id, tablet_id, create_scn, create_scn.get_val_for_tx(), create_tablet_schema, - make_empty_co_sstable/*need_create_empty_major_sstable*/, &freezer); + make_empty_co_sstable/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, &freezer); ASSERT_EQ(common::OB_SUCCESS, ret); ASSERT_EQ(1, tablet->get_ref()); diff --git a/mittest/mtlenv/test_tx_data_table.cpp b/mittest/mtlenv/test_tx_data_table.cpp index 212516f49..0be8d15ca 100644 --- a/mittest/mtlenv/test_tx_data_table.cpp +++ b/mittest/mtlenv/test_tx_data_table.cpp @@ -727,6 +727,7 @@ void TestTxDataTable::fake_ls_(ObLS &ls) ls.ls_meta_.migration_status_ = ObMigrationStatus::OB_MIGRATION_STATUS_NONE; ls.ls_meta_.restore_status_ = ObLSRestoreStatus::NONE; ls.ls_meta_.rebuild_seq_ = 0; + ls.ls_meta_.store_format_ = common::ObLSStoreType::OB_LS_STORE_NORMAL; } void TestTxDataTable::do_print_leak_slice_test() diff --git a/mittest/simple_server/test_ls_recover.cpp b/mittest/simple_server/test_ls_recover.cpp index 544df0d00..9d0639d64 100644 --- a/mittest/simple_server/test_ls_recover.cpp +++ b/mittest/simple_server/test_ls_recover.cpp @@ -267,6 +267,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_without_disk) migration_status, ObLSRestoreStatus(ObLSRestoreStatus::NONE), arg.get_create_scn(), + ObLSStoreFormat(ObLSStoreType::OB_LS_STORE_NORMAL), ls)); ObLSLockGuard lock_ls(ls); const ObLSMeta &ls_meta = ls->get_ls_meta(); @@ -295,6 +296,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_disk) migration_status, ObLSRestoreStatus(ObLSRestoreStatus::NONE), arg.get_create_scn(), + ObLSStoreFormat(ObLSStoreType::OB_LS_STORE_NORMAL), ls)); const bool unused_allow_log_sync = true; prepare_palf_base_info(arg, palf_base_info); @@ -329,6 +331,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_inner_tablet) migration_status, ObLSRestoreStatus(ObLSRestoreStatus::NONE), arg.get_create_scn(), + ObLSStoreFormat(ObLSStoreType::OB_LS_STORE_NORMAL), ls)); const bool unused_allow_log_sync = true; prepare_palf_base_info(arg, palf_base_info); @@ -365,6 +368,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_commit_slog) migration_status, ObLSRestoreStatus(ObLSRestoreStatus::NONE), arg.get_create_scn(), + ObLSStoreFormat(ObLSStoreType::OB_LS_STORE_NORMAL), ls)); const bool unused_allow_log_sync = true; prepare_palf_base_info(arg, palf_base_info); @@ -404,6 +408,7 @@ TEST_F(ObLSBeforeRestartTest, create_restore_ls) migration_status, ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_START), arg.get_create_scn(), + ObLSStoreFormat(ObLSStoreType::OB_LS_STORE_NORMAL), ls)); const bool unused_allow_log_sync = true; prepare_palf_base_info(arg, palf_base_info); @@ -448,6 +453,7 @@ TEST_F(ObLSBeforeRestartTest, create_rebuild_ls) migration_status, ObLSRestoreStatus(ObLSRestoreStatus::NONE), arg.get_create_scn(), + ObLSStoreFormat(ObLSStoreType::OB_LS_STORE_NORMAL), ls)); const bool unused_allow_log_sync = true; prepare_palf_base_info(arg, palf_base_info); diff --git a/src/logservice/palf/palf_handle_impl.cpp b/src/logservice/palf/palf_handle_impl.cpp index dfa18756f..eb735c266 100755 --- a/src/logservice/palf/palf_handle_impl.cpp +++ b/src/logservice/palf/palf_handle_impl.cpp @@ -5435,10 +5435,8 @@ void PalfHandleImpl::report_switch_learner_to_acceptor_(const common::ObMember & "member", member_buf, "curr_member_list", member_list_buf, "curr_replica_num", curr_replica_num); - char replica_readonly_name_[common::MAX_REPLICA_TYPE_LENGTH]; - char replica_full_name_[common::MAX_REPLICA_TYPE_LENGTH]; - replica_type_to_string(ObReplicaType::REPLICA_TYPE_READONLY, replica_readonly_name_, sizeof(replica_readonly_name_)); - replica_type_to_string(ObReplicaType::REPLICA_TYPE_FULL, replica_full_name_, sizeof(replica_full_name_)); + const char *replica_readonly_name_ = ObShareUtil::replica_type_to_string(ObReplicaType::REPLICA_TYPE_READONLY); + const char *replica_full_name_ = ObShareUtil::replica_type_to_string(ObReplicaType::REPLICA_TYPE_FULL); plugins_.record_replica_type_change_event(palf_id_, config_version, replica_readonly_name_, replica_full_name_, EXTRA_INFOS); } @@ -5457,10 +5455,8 @@ void PalfHandleImpl::report_switch_acceptor_to_learner_(const common::ObMember & "member", member_buf, "curr_member_list", member_list_buf, "curr_replica_num", curr_replica_num); - char replica_readonly_name_[common::MAX_REPLICA_TYPE_LENGTH]; - char replica_full_name_[common::MAX_REPLICA_TYPE_LENGTH]; - replica_type_to_string(ObReplicaType::REPLICA_TYPE_READONLY, replica_readonly_name_, sizeof(replica_readonly_name_)); - replica_type_to_string(ObReplicaType::REPLICA_TYPE_FULL, replica_full_name_, sizeof(replica_full_name_)); + const char *replica_readonly_name_ = ObShareUtil::replica_type_to_string(ObReplicaType::REPLICA_TYPE_READONLY); + const char *replica_full_name_ = ObShareUtil::replica_type_to_string(ObReplicaType::REPLICA_TYPE_FULL); plugins_.record_replica_type_change_event(palf_id_, config_version, replica_full_name_, replica_readonly_name_, EXTRA_INFOS); } diff --git a/src/observer/ob_service.cpp b/src/observer/ob_service.cpp index 7c4076df5..878ca9281 100644 --- a/src/observer/ob_service.cpp +++ b/src/observer/ob_service.cpp @@ -88,6 +88,8 @@ #include "share/ob_heartbeat_handler.h" #include "storage/slog/ob_storage_logger_manager.h" #include "storage/high_availability/ob_transfer_lock_utils.h" +#include "storage/column_store/ob_column_store_replica_util.h" +#include "common/ob_store_format.h" // ObLSStoreFormat namespace oceanbase { @@ -2704,7 +2706,17 @@ int ObService::inner_fill_tablet_info_( int64_t data_size = 0; int64_t required_size = 0; ObArray column_checksums; - if (OB_FAIL(tablet_handle.get_obj()->get_tablet_report_info(snapshot_version, column_checksums, + ObTablet *tablet = tablet_handle.get_obj(); + bool need_wait_major_convert_in_cs_replica = false; + if (OB_ISNULL(tablet)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet is unexpected nullptr", K(ret), K(tenant_id), K(tablet_id), K(tablet_handle)); + } else if (OB_FAIL(ObCSReplicaUtil::check_need_wait_major_convert(*ls, tablet_id, *tablet, need_wait_major_convert_in_cs_replica))) { + LOG_WARN("fail to check need wait major convert in cs replica", K(ret), KPC(ls), K(tablet)); + } else if (need_wait_major_convert_in_cs_replica) { + ret = OB_EAGAIN; + LOG_WARN("need wait major convert for cs replica", K(ret), K(tablet_id)); + } else if (OB_FAIL(tablet->get_tablet_report_info(snapshot_version, column_checksums, data_size, required_size, need_checksum))) { LOG_WARN("fail to get tablet report info from tablet", KR(ret), K(tenant_id), K(tablet_id)); } else if (OB_FAIL(tablet_replica.init( @@ -2788,118 +2800,27 @@ int ObService::fill_tablet_report_info( return ret; } -int ObService::get_role_from_palf_( - logservice::ObLogService &log_service, - const share::ObLSID &ls_id, - common::ObRole &role, - int64_t &proposal_id) -{ - int ret = OB_SUCCESS; - role = FOLLOWER; - proposal_id = 0; - palf::PalfHandleGuard palf_handle_guard; - if (OB_FAIL(log_service.open_palf(ls_id, palf_handle_guard))) { - LOG_WARN("open palf failed", KR(ret), K(ls_id)); - } else if (OB_FAIL(palf_handle_guard.get_role(role, proposal_id))) { - LOG_WARN("get role failed", KR(ret), K(ls_id)); - } - return ret; -} - int ObService::fill_ls_replica( const uint64_t tenant_id, const ObLSID &ls_id, share::ObLSReplica &replica) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - uint64_t unit_id = common::OB_INVALID_ID; + replica.reset(); if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("service not inited", KR(ret)); - } else if (!ls_id.is_valid() - || OB_INVALID_TENANT_ID == tenant_id - || OB_ISNULL(gctx_.config_)) { + } else if (!ls_id.is_valid() || OB_INVALID_TENANT_ID == tenant_id) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(ls_id)); - } else if (OB_FAIL(GCTX.omt_->get_unit_id(tenant_id, unit_id))) { - LOG_WARN("get tenant unit id failed", KR(ret), K(tenant_id), K(ls_id)); } else { MTL_SWITCH(tenant_id) { - ObLSHandle ls_handle; - ObLSService *ls_svr = nullptr; - logservice::ObLogService *log_service = nullptr; - common::ObRole role = FOLLOWER; - ObMemberList ob_member_list; - ObLSReplica::MemberList member_list; - GlobalLearnerList learner_list; - int64_t proposal_id = 0; - int64_t paxos_replica_number = 0; - ObLSRestoreStatus restore_status; - ObReplicaStatus replica_status = REPLICA_STATUS_NORMAL; - ObReplicaType replica_type = REPLICA_TYPE_FULL; - bool is_compatible_with_readonly_replica = false; - ObMigrationStatus migration_status = OB_MIGRATION_STATUS_MAX; - if (OB_ISNULL(ls_svr = MTL(ObLSService*))) { + ObLSService *ls_svr = MTL(ObLSService*); + if (OB_ISNULL(ls_svr)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("MTL ObLSService is null", KR(ret), K(tenant_id)); - } else if (OB_FAIL(ls_svr->get_ls( - ObLSID(ls_id), - ls_handle, ObLSGetMod::OBSERVER_MOD))) { - LOG_WARN("get ls handle failed", KR(ret)); - } else if (OB_FAIL(ls_handle.get_ls()->get_paxos_member_list_and_learner_list(ob_member_list, paxos_replica_number, learner_list))) { - LOG_WARN("get member list and learner list from ObLS failed", KR(ret)); - } else if (OB_FAIL(ls_handle.get_ls()->get_restore_status(restore_status))) { - LOG_WARN("get restore status failed", KR(ret)); - } else if (OB_FAIL(ls_handle.get_ls()->get_migration_status(migration_status))) { - LOG_WARN("get migration status failed", KR(ret)); - } else if (OB_FAIL(ls_handle.get_ls()->get_replica_status(replica_status))) { - LOG_WARN("get replica status failed", KR(ret)); - } else if (OB_ISNULL(log_service = MTL(logservice::ObLogService*))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("MTL ObLogService is null", KR(ret), K(tenant_id)); - } else if (OB_FAIL(get_role_from_palf_(*log_service, ls_id, role, proposal_id))) { - LOG_WARN("failed to get role from palf", KR(ret), K(tenant_id), K(ls_id)); - } else if (OB_SUCCESS != (tmp_ret = ObShareUtil::check_compat_version_for_readonly_replica( - tenant_id, is_compatible_with_readonly_replica))) { - LOG_WARN("fail to check data version for read-only replica", KR(ret), K(tenant_id)); - } - - if (OB_FAIL(ret)) { - } else if (!is_compatible_with_readonly_replica) { - replica_type = REPLICA_TYPE_FULL; - } else if (learner_list.contains(gctx_.self_addr())) { - // if replica exists in learner_list, report it as R-replica. - // Otherwise, report as F-replica - replica_type = REPLICA_TYPE_READONLY; - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObLSReplica::transform_ob_member_list(ob_member_list, member_list))) { - LOG_WARN("fail to transfrom ob_member_list into member_list", KR(ret), K(ob_member_list)); - } else if (OB_FAIL(replica.init( - 0, /*create_time_us*/ - 0, /*modify_time_us*/ - tenant_id, /*tenant_id*/ - ls_id, /*ls_id*/ - gctx_.self_addr(), /*server*/ - gctx_.config_->mysql_port, /*sql_port*/ - role, /*role*/ - replica_type, /*replica_type*/ - proposal_id, /*proposal_id*/ - is_strong_leader(role) ? REPLICA_STATUS_NORMAL : replica_status,/*replica_status*/ - restore_status, /*restore_status*/ - 100, /*memstore_percent*/ - unit_id, /*unit_id*/ - gctx_.config_->zone.str(), /*zone*/ - paxos_replica_number, /*paxos_replica_number*/ - 0, /*data_size*/ - 0, /*required_size*/ - member_list, - learner_list, - OB_MIGRATION_STATUS_REBUILD == migration_status /*is_rebuild*/))) { - LOG_WARN("fail to init a ls replica", KR(ret), K(tenant_id), K(ls_id), K(role), - K(proposal_id), K(unit_id), K(paxos_replica_number), K(member_list), K(learner_list)); + LOG_WARN("ObLSService is null", KR(ret)); + } else if (OB_FAIL(ls_svr->get_ls_replica(ls_id, ObLSGetMod::OBSERVER_MOD, replica))) { + LOG_WARN("fail to get_ls_replica", KR(ret), K(ls_id)); } else { LOG_TRACE("finish fill ls replica", KR(ret), K(tenant_id), K(ls_id), K(replica)); } diff --git a/src/observer/ob_service.h b/src/observer/ob_service.h index a4ed78244..c7f3dfea1 100644 --- a/src/observer/ob_service.h +++ b/src/observer/ob_service.h @@ -269,11 +269,6 @@ public: int check_server_empty(bool &server_empty); private: - int get_role_from_palf_( - logservice::ObLogService &log_service, - const share::ObLSID &ls_id, - common::ObRole &role, - int64_t &proposal_id); int inner_fill_tablet_info_( const int64_t tenant_id, const ObTabletID &tablet_id, diff --git a/src/observer/table/ob_table_cg_service.cpp b/src/observer/table/ob_table_cg_service.cpp index 1cc7014c2..e797165aa 100644 --- a/src/observer/table/ob_table_cg_service.cpp +++ b/src/observer/table/ob_table_cg_service.cpp @@ -688,6 +688,7 @@ int ObTableLocCgService::generate_table_loc_meta(const ObTableCtx &ctx, bool is_lookup) { int ret = OB_SUCCESS; + int64_t route_policy = 0; const ObTableSchema *table_schema = ctx.get_table_schema(); const ObTableSchema *index_schema = ctx.get_index_schema(); @@ -697,8 +698,11 @@ int ObTableLocCgService::generate_table_loc_meta(const ObTableCtx &ctx, } else if (ctx.is_index_scan() && OB_ISNULL(index_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("index schema is null", K(ret)); + } else if (OB_FAIL(ctx.get_session_info().get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) { + LOG_WARN("fail to get route policy from session", K(ret)); } else { loc_meta.reset(); + loc_meta.route_policy_ = route_policy; // is_lookup 有什么用?好像都是 false loc_meta.ref_table_id_ = is_lookup ? ctx.get_ref_table_id() : ctx.get_index_table_id(); loc_meta.table_loc_id_ = ctx.get_ref_table_id(); @@ -2541,7 +2545,8 @@ int ObTableTscCgService::generate_das_result_output(ObDASScanCtDef &das_tsc_ctde // 主表/索引回表/索引扫描不需要回表: select column ids // 索引表: rowkey column ids int ObTableTscCgService::generate_table_param(const ObTableCtx &ctx, - ObDASScanCtDef &das_tsc_ctdef) + ObDASScanCtDef &das_tsc_ctdef, + const bool query_cs_replica /*=false*/) { int ret = OB_SUCCESS; ObSEArray tsc_out_cols; @@ -2595,7 +2600,9 @@ int ObTableTscCgService::generate_table_param(const ObTableCtx &ctx, } else if (OB_FAIL(das_tsc_ctdef.table_param_.convert(*table_schema, das_tsc_ctdef.access_column_ids_, das_tsc_ctdef.pd_expr_spec_.pd_storage_flag_, - &tsc_out_cols))) { + &tsc_out_cols, + false /*force_mysql_mode*/, + query_cs_replica))) { LOG_WARN("fail to convert schema", K(ret), K(*table_schema)); } else if (OB_FAIL(generate_das_result_output(das_tsc_ctdef, tsc_out_cols))) { LOG_WARN("fail to generate das result outpur", K(ret), K(tsc_out_cols)); @@ -2606,7 +2613,8 @@ int ObTableTscCgService::generate_table_param(const ObTableCtx &ctx, int ObTableTscCgService::generate_das_tsc_ctdef(const ObTableCtx &ctx, ObIAllocator &allocator, - ObDASScanCtDef &das_tsc_ctdef) + ObDASScanCtDef &das_tsc_ctdef, + const bool query_cs_replica /*=false*/) { int ret = OB_SUCCESS; ObSchemaGetterGuard &schema_guard = (const_cast(ctx)).get_schema_guard(); @@ -2616,7 +2624,7 @@ int ObTableTscCgService::generate_das_tsc_ctdef(const ObTableCtx &ctx, if (OB_FAIL(generate_access_ctdef(ctx, allocator, das_tsc_ctdef))) { // init access_column_ids_,pd_expr_spec_.access_exprs_ LOG_WARN("fail to generate asccess ctdef", K(ret)); - } else if (OB_FAIL(generate_table_param(ctx, das_tsc_ctdef))) { // init table_param_, result_output_ + } else if (OB_FAIL(generate_table_param(ctx, das_tsc_ctdef, query_cs_replica))) { // init table_param_, result_output_ LOG_WARN("fail to generate table param", K(ret)); } @@ -2663,7 +2671,7 @@ int ObTableTscCgService::generate_tsc_ctdef(const ObTableCtx &ctx, int ret = OB_SUCCESS; ObStaticEngineCG cg(ctx.get_cur_cluster_version()); const int64_t filter_exprs_cnt = ctx.get_filter_exprs().count(); - + bool query_cs_replica = false; // init scan_ctdef_.ref_table_id_ tsc_ctdef.scan_ctdef_.ref_table_id_ = ctx.get_index_table_id(); if (OB_FAIL(tsc_ctdef.output_exprs_.init(ctx.get_select_exprs().count()))) { @@ -2674,7 +2682,9 @@ int ObTableTscCgService::generate_tsc_ctdef(const ObTableCtx &ctx, LOG_WARN("fail to generate output exprs", K(ret)); } else if (OB_FAIL(cg.generate_rt_exprs(ctx.get_filter_exprs(), tsc_ctdef.filter_exprs_))) { LOG_WARN("fail to generate filter rt exprs ", K(ret)); - } else if (OB_FAIL(generate_das_tsc_ctdef(ctx, allocator, tsc_ctdef.scan_ctdef_))) { // init scan_ctdef_ + } else if (OB_FAIL(ctx.check_is_cs_replica_query(query_cs_replica))) { + LOG_WARN("fail to check is cs replica query", K(ret)); + } else if (OB_FAIL(generate_das_tsc_ctdef(ctx, allocator, tsc_ctdef.scan_ctdef_, query_cs_replica))) { // init scan_ctdef_ LOG_WARN("fail to generate das scan ctdef", K(ret)); } else if (ctx.is_index_back()) { // init lookup_ctdef_,lookup_loc_meta_ diff --git a/src/observer/table/ob_table_cg_service.h b/src/observer/table/ob_table_cg_service.h index 5caf02217..ef0cba40a 100644 --- a/src/observer/table/ob_table_cg_service.h +++ b/src/observer/table/ob_table_cg_service.h @@ -362,7 +362,8 @@ public: private: static int generate_das_tsc_ctdef(const ObTableCtx &ctx, ObIAllocator &allocator, - sql::ObDASScanCtDef &das_tsc_ctdef); + sql::ObDASScanCtDef &das_tsc_ctdef, + const bool query_cs_replica = false); static int replace_gen_col_exprs(const ObTableCtx &ctx, common::ObIArray &access_exprs); static int generate_output_exprs(const ObTableCtx &ctx, @@ -371,7 +372,8 @@ private: ObIAllocator &allocator, sql::ObDASScanCtDef &das_tsc_ctdef); static int generate_table_param(const ObTableCtx &ctx, - sql::ObDASScanCtDef &das_tsc_ctdef); + sql::ObDASScanCtDef &das_tsc_ctdef, + const bool query_cs_replica = false); static OB_INLINE bool is_in_array(const common::ObIArray &array, const sql::ObRawExpr *expr) { diff --git a/src/observer/table/ob_table_context.cpp b/src/observer/table/ob_table_context.cpp index 8bb85b7b2..2fb5c0ffa 100644 --- a/src/observer/table/ob_table_context.cpp +++ b/src/observer/table/ob_table_context.cpp @@ -784,6 +784,20 @@ int ObTableCtx::adjust_entity() return ret; } +int ObTableCtx::check_is_cs_replica_query(bool &is_cs_replica_query) const +{ + int ret = OB_SUCCESS; + is_cs_replica_query = false; + if (ObRoutePolicyType::INVALID_POLICY == loc_meta_.route_policy_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid route policy", K(ret)); + } else { + is_cs_replica_query = ObRoutePolicyType::COLUMN_STORE_ONLY == loc_meta_.route_policy_; + } + LOG_TRACE("[CS-Replica] check cs replica query", K(ret), K(is_cs_replica_query), K_(loc_meta)); + return ret; +} + bool ObTableCtx::has_exist_in_columns(const ObIArray &columns, const ObString &name, int64_t *idx /* =nullptr */) const diff --git a/src/observer/table/ob_table_context.h b/src/observer/table/ob_table_context.h index af494a51f..7a797aa2c 100644 --- a/src/observer/table/ob_table_context.h +++ b/src/observer/table/ob_table_context.h @@ -413,6 +413,9 @@ public: // read lob的allocator需要保证obj序列化到rpc buffer后才能析构 static int read_real_lob(common::ObIAllocator &allocator, ObObj &obj); int adjust_entity(); +public: + // for column store replica query + int check_is_cs_replica_query(bool &is_cs_replica_query) const; private: // for common int get_tablet_by_rowkey(const common::ObRowkey &rowkey, diff --git a/src/observer/virtual_table/ob_all_virtual_ls_info.cpp b/src/observer/virtual_table/ob_all_virtual_ls_info.cpp index f568fccf6..361da89c3 100644 --- a/src/observer/virtual_table/ob_all_virtual_ls_info.cpp +++ b/src/observer/virtual_table/ob_all_virtual_ls_info.cpp @@ -130,15 +130,8 @@ int ObAllVirtualLSInfo::process_curr_tenant(ObNewRow *&row) break; case OB_APP_MIN_COLUMN_ID + 4: { // replica_type - if (OB_FAIL(replica_type_to_string(ls_info.replica_type_, - replica_type_name_, - sizeof(replica_type_name_)))) { - SERVER_LOG(WARN, "get replica type name failed", K(ret), K(ls_info.replica_type_)); - } else { - replica_type_name_[MAX_REPLICA_TYPE_LENGTH - 1] = '\0'; - cur_row_.cells_[i].set_varchar(replica_type_name_); - cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); - } + cur_row_.cells_[i].set_varchar(ObShareUtil::replica_type_to_string(ls_info.replica_type_)); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); break; } case OB_APP_MIN_COLUMN_ID + 5: { diff --git a/src/observer/virtual_table/ob_all_virtual_ls_info.h b/src/observer/virtual_table/ob_all_virtual_ls_info.h index 23e922447..eb005701b 100644 --- a/src/observer/virtual_table/ob_all_virtual_ls_info.h +++ b/src/observer/virtual_table/ob_all_virtual_ls_info.h @@ -48,7 +48,6 @@ private: common::ObAddr addr_; char ip_buf_[common::OB_IP_STR_BUFF]; char state_name_[common::MAX_LS_STATE_LENGTH]; - char replica_type_name_[common::MAX_REPLICA_TYPE_LENGTH]; /* 跨租户访问的资源必须由ObMultiTenantOperator来处理释放*/ int64_t ls_id_; ObSharedGuard ls_iter_guard_; diff --git a/src/observer/virtual_table/ob_all_virtual_proxy_schema.cpp b/src/observer/virtual_table/ob_all_virtual_proxy_schema.cpp index 5a0e22873..91f434005 100644 --- a/src/observer/virtual_table/ob_all_virtual_proxy_schema.cpp +++ b/src/observer/virtual_table/ob_all_virtual_proxy_schema.cpp @@ -931,6 +931,43 @@ int ObAllVirtualProxySchema::get_next_tenant_server_( return ret; } +int ObAllVirtualProxySchema::get_replica_type_from_locality_( + const ZoneLocalityIArray &zone_locality_array, + const ObZone &zone, + ObReplicaType &replica_type) +{ + int ret = OB_SUCCESS; + replica_type = REPLICA_TYPE_FULL; + if (OB_UNLIKELY(zone_locality_array.empty() || zone.is_empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(zone_locality_array), K(zone)); + } else { + bool zone_found = false; + FOREACH_CNT_X(zone_locality, zone_locality_array, !zone_found && OB_SUCCESS == ret) { + if (zone_locality->get_zone_set().at(0) == zone) { + zone_found = true; + if (zone_locality->get_readonly_replica_num() > 0) { + replica_type = REPLICA_TYPE_READONLY; + } else if (zone_locality->get_columnstore_replica_num() > 0) { + replica_type = REPLICA_TYPE_COLUMNSTORE; + } else if (zone_locality->get_full_replica_num() > 0) { + replica_type = REPLICA_TYPE_FULL; + } else { + // unrecognized replica_type + ret = OB_ERR_UNEXPECTED; + replica_type = REPLICA_TYPE_INVALID; + LOG_WARN("unrecognized replica type", KR(ret), KPC(zone_locality)); + } + } + } + if (!zone_found) { + // tenant locality does not include this zone, regard as FULL + replica_type = REPLICA_TYPE_FULL; + } + } + return ret; +} + int ObAllVirtualProxySchema::fill_tenant_servers_( const uint64_t tenant_id, ObMySQLResult &result, @@ -952,6 +989,17 @@ int ObAllVirtualProxySchema::fill_tenant_servers_( int64_t svr_idx = 0; first_idx_in_zone.reset(); tenant_servers_.reset(); + const ObTenantSchema *tenant_schema = NULL; + ObArray zone_locality; + + if (OB_FAIL(schema_guard_.get_tenant_info(tenant_id, tenant_schema))) { + LOG_WARN("fail to get tenant info", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(tenant_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tenant not exist", KR(ret), K(tenant_id)); + } else if (OB_FAIL(tenant_schema->get_zone_replica_attr_array(zone_locality))) { + LOG_WARN("fail to get zone_locality_array"); + } while (OB_SUCC(ret) && OB_SUCC(result.next())) { tenant_server.reset(); @@ -960,7 +1008,9 @@ int ObAllVirtualProxySchema::fill_tenant_servers_( EXTRACT_VARCHAR_FIELD_MYSQL(result, "svr_ip", svr_ip); EXTRACT_INT_FIELD_MYSQL(result, "inner_port", sql_port, int64_t); EXTRACT_VARCHAR_FIELD_MYSQL(result, "zone", zone); - if (OB_UNLIKELY(!server.set_ip_addr(svr_ip, svr_port))) { + if (FAILEDx(get_replica_type_from_locality_(zone_locality, zone, replica_type))) { + LOG_WARN("failed to get replica_type", KR(ret), K(zone_locality), K(zone)); + } else if (OB_UNLIKELY(!server.set_ip_addr(svr_ip, svr_port))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to set_ip_addr", KR(ret), K(svr_ip), K(svr_port)); } else if (OB_FAIL(replica_location.init( diff --git a/src/observer/virtual_table/ob_all_virtual_proxy_schema.h b/src/observer/virtual_table/ob_all_virtual_proxy_schema.h index 0cbdeca1d..c7e512c12 100644 --- a/src/observer/virtual_table/ob_all_virtual_proxy_schema.h +++ b/src/observer/virtual_table/ob_all_virtual_proxy_schema.h @@ -144,6 +144,10 @@ private: int get_next_tenant_server_(const common::ObString &table_name, const share::schema::ObTableSchema *table_schema); int get_tenant_servers_(const uint64_t tenant_id); + int get_replica_type_from_locality_( + const share::schema::ZoneLocalityIArray &zone_locality_array, + const ObZone &zone, + ObReplicaType &replica_type); int fill_tenant_servers_( const uint64_t tenant_id, common::sqlclient::ObMySQLResult &result, diff --git a/src/rootserver/freeze/ob_checksum_validator.cpp b/src/rootserver/freeze/ob_checksum_validator.cpp index 8694cedc8..bfbbdc8c2 100755 --- a/src/rootserver/freeze/ob_checksum_validator.cpp +++ b/src/rootserver/freeze/ob_checksum_validator.cpp @@ -28,6 +28,7 @@ #include "share/ob_zone_merge_info.h" #include "share/ob_freeze_info_manager.h" #include "rootserver/freeze/ob_fts_checksum_validate_util.h" +#include "storage/compaction/ob_medium_compaction_func.h" namespace oceanbase { @@ -422,42 +423,9 @@ int ObChecksumValidator::verify_tablet_replica_checksum() if (OB_UNLIKELY(replica_ckm_items_.empty())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(replica_ckm_items_)); - } else { - const ObTabletReplicaChecksumItem *prev_item = nullptr; - ObSEArray error_pairs; - error_pairs.set_attr(ObMemAttr(tenant_id_, "CkmErrPairs")); - ObLSID prev_error_ls_id; - ObTabletID prev_error_table_id; - int64_t affected_rows = 0; - for (int64_t i = 0; OB_SUCC(ret) && (i < replica_ckm_items_.count()); ++i) { - const ObTabletReplicaChecksumItem &curr_item = replica_ckm_items_.at(i); - if (OB_NOT_NULL(prev_item) - && curr_item.is_same_tablet(*prev_item)) { // same tablet - if (OB_FAIL(curr_item.verify_checksum(*prev_item))) { - if (OB_CHECKSUM_ERROR == ret) { - LOG_DBA_ERROR(OB_CHECKSUM_ERROR, "msg", "checksum error in tablet replica checksum", KR(ret), - K(curr_item), KPC(prev_item)); - if (curr_item.ls_id_ != prev_error_ls_id || curr_item.tablet_id_ != prev_error_table_id) { - prev_error_ls_id = curr_item.ls_id_; - prev_error_table_id = curr_item.tablet_id_; - if (OB_TMP_FAIL(error_pairs.push_back(ObTabletLSPair(curr_item.tablet_id_, curr_item.ls_id_)))) { - LOG_WARN("fail to push back error pair", K(tmp_ret), "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); - } - } - } else { - LOG_WARN("unexpected error in tablet replica checksum", KR(ret), K(curr_item), KPC(prev_item)); - } - } - } - prev_item = &curr_item; - } - if (!error_pairs.empty()) { - if (OB_TMP_FAIL(ObTabletMetaTableCompactionOperator::batch_set_info_status(MTL_ID(), error_pairs, affected_rows))) { - LOG_WARN("fail to batch set info status", KR(tmp_ret)); - } else { - LOG_INFO("succ to batch set info status", K(tmp_ret), K(affected_rows), K(error_pairs)); - } - } + } else if (OB_FAIL(ObMediumCompactionScheduleFunc::check_replica_checksum_items( + replica_ckm_items_.array_, ls_locality_cache_.get_cs_replica_cache(), false /*is_medium_checker*/))) { + LOG_WARN("failed to verify tablet replica checksum", K(ret)); } return ret; } diff --git a/src/rootserver/freeze/ob_checksum_validator.h b/src/rootserver/freeze/ob_checksum_validator.h index fc8dcbf54..045d6c2b9 100644 --- a/src/rootserver/freeze/ob_checksum_validator.h +++ b/src/rootserver/freeze/ob_checksum_validator.h @@ -67,7 +67,8 @@ public: ObArray &finish_tablet_ls_pair_array, ObArray &finish_tablet_ckm_array, compaction::ObUncompactInfo &uncompact_info, - ObFTSGroupArray &fts_group_array) + ObFTSGroupArray &fts_group_array, + share::ObCompactionLocalityCache &ls_locality_cache) : is_inited_(false), is_primary_service_(false), need_validate_index_ckm_(false), @@ -93,7 +94,8 @@ public: simple_schema_(nullptr), table_compaction_info_(), replica_ckm_items_(), - last_table_ckm_items_(tenant_id) + last_table_ckm_items_(tenant_id), + ls_locality_cache_(ls_locality_cache) {} ~ObChecksumValidator() {} int init( @@ -198,6 +200,7 @@ private: ObArray cur_tablet_ls_pair_array_; ObReplicaCkmItems replica_ckm_items_; compaction::ObTableCkmItems last_table_ckm_items_; // only cached last data table with index + share::ObCompactionLocalityCache &ls_locality_cache_; }; } // end namespace rootserver diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp index f3ac87708..46912199c 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp @@ -50,11 +50,11 @@ ObMajorMergeProgressChecker::ObMajorMergeProgressChecker( loop_cnt_(0), last_errno_(OB_SUCCESS), tenant_id_(tenant_id), compaction_scn_(), expected_epoch_(OB_INVALID_ID), sql_proxy_(nullptr), schema_service_(nullptr), server_trace_(nullptr), progress_(), - tablet_status_map_(), table_compaction_map_(), fts_group_array_(), + tablet_status_map_(), table_compaction_map_(), fts_group_array_(), ls_locality_cache_(), ckm_validator_(tenant_id, stop_, tablet_ls_pair_cache_, tablet_status_map_, table_compaction_map_, idx_ckm_validate_array_, validator_statistics_, - finish_tablet_ls_pair_array_, finish_tablet_ckm_array_, uncompact_info_, fts_group_array_), - uncompact_info_(), ls_locality_cache_(), total_time_guard_(), validator_statistics_(), batch_size_mgr_() {} + finish_tablet_ls_pair_array_, finish_tablet_ckm_array_, uncompact_info_, fts_group_array_, ls_locality_cache_), + uncompact_info_(), total_time_guard_(), validator_statistics_(), batch_size_mgr_() {} int ObMajorMergeProgressChecker::init( const bool is_primary_service, @@ -437,13 +437,11 @@ int ObMajorMergeProgressChecker::prepare_check_progress( bool &exist_uncompacted_table) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; exist_uncompacted_table = true; table_ids_.start_looping(); - if (OB_TMP_FAIL(ls_locality_cache_.refresh_ls_locality(first_loop_in_cur_round_ /*force_refresh*/))) { - LOG_WARN("failed to refresh ls locality", K(tmp_ret)); - } - if (first_loop_in_cur_round_) { + if (OB_FAIL(ls_locality_cache_.refresh_ls_locality(first_loop_in_cur_round_ /*force_refresh*/))) { + LOG_WARN("failed to refresh ls locality", K(ret)); + } else if (first_loop_in_cur_round_) { total_time_guard_.reuse(); if (OB_FAIL(prepare_unfinish_table_ids())) { LOG_WARN("fail to prepare table_id_map", KR(ret), K_(tenant_id)); diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.h b/src/rootserver/freeze/ob_major_merge_progress_checker.h index f8a02da91..e4ad4c7ad 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.h +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.h @@ -149,10 +149,10 @@ private: // record each table compaction/verify status compaction::ObTableCompactionInfoMap table_compaction_map_; // ObFTSGroupArray fts_group_array_; + share::ObCompactionLocalityCache ls_locality_cache_; ObChecksumValidator ckm_validator_; compaction::ObUncompactInfo uncompact_info_; // cache of ls_infos in __all_ls_meta_table - share::ObCompactionLocalityCache ls_locality_cache_; // statistics section compaction::ObRSCompactionTimeGuard total_time_guard_; compaction::ObCkmValidatorStatistics validator_statistics_; diff --git a/src/rootserver/ob_admin_drtask_util.cpp b/src/rootserver/ob_admin_drtask_util.cpp index fdea3695b..b34f6040b 100644 --- a/src/rootserver/ob_admin_drtask_util.cpp +++ b/src/rootserver/ob_admin_drtask_util.cpp @@ -13,7 +13,6 @@ #define USING_LOG_PREFIX RS #include "ob_admin_drtask_util.h" #include "logservice/ob_log_service.h" // for ObLogService -#include "share/ob_locality_parser.h" // for ObLocalityParser #include "storage/tx_storage/ob_ls_service.h" // for ObLSService #include "storage/ls/ob_ls.h" // for ObLS #include "observer/ob_server_event_history_table_operator.h" // for SERVER_EVENT_ADD @@ -146,8 +145,8 @@ int ObAdminDRTaskUtil::construct_arg_for_add_command_( ret = OB_INVALID_ARGUMENT; ret_comment = ObAdminDRTaskRetComment::TENANT_ID_OR_LS_ID_NOT_VALID; LOG_WARN("invalid tenant_id or ls_id", KR(ret), K(command_arg), K(tenant_id), K(ls_id)); - } else if (OB_UNLIKELY(!target_server.is_valid()) - || OB_UNLIKELY(REPLICA_TYPE_FULL != replica_type && REPLICA_TYPE_READONLY != replica_type)) { + } else if (OB_UNLIKELY(!target_server.is_valid() + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(replica_type), K(target_server)); // STEP 2: construct orig_paxos_replica_number and leader_server if not specified by ob_admin command @@ -164,8 +163,8 @@ int ObAdminDRTaskUtil::construct_arg_for_add_command_( new_paxos_replica_number = 0 == new_paxos_replica_number ? orig_paxos_replica_number : new_paxos_replica_number; - ObReplicaMember data_source_member(leader_server, 0/*timstamp*/); - ObReplicaMember force_data_source_member(force_data_source_server, 0/*timstamp*/); + ObReplicaMember data_source_member(leader_server, 0/*timstamp*/, REPLICA_TYPE_FULL/*dummy_replica_type*/); + ObReplicaMember force_data_source_member(force_data_source_server, 0/*timstamp*/, REPLICA_TYPE_FULL/*dummy_replica_type*/); ObReplicaMember add_member(target_server, ObTimeUtility::current_time(), replica_type); // STEP 3: construct arg if (OB_ISNULL(ObCurTraceId::get_trace_id())) { @@ -292,8 +291,8 @@ int ObAdminDRTaskUtil::handle_remove_command_( ret = OB_INVALID_ARGUMENT; ret_comment = ObAdminDRTaskRetComment::TENANT_ID_OR_LS_ID_NOT_VALID; LOG_WARN("invalid tenant_id or ls_id", KR(ret), K(command_arg), K(tenant_id), K(ls_id)); - } else if (OB_UNLIKELY(!target_server.is_valid()) - || OB_UNLIKELY(REPLICA_TYPE_FULL != replica_type && REPLICA_TYPE_READONLY != replica_type)) { + } else if (OB_UNLIKELY(!target_server.is_valid() + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(replica_type), K(target_server)); } else { @@ -311,10 +310,10 @@ int ObAdminDRTaskUtil::handle_remove_command_( } else { ret_comment = SUCCEED_TO_SEND_COMMAND; } - } else if (REPLICA_TYPE_READONLY == replica_type) { + } else if (ObReplicaTypeCheck::is_non_paxos_replica(replica_type)) { ObLSDropNonPaxosReplicaArg remove_nonpaxos_arg; if (OB_FAIL(construct_remove_nonpaxos_task_arg_( - tenant_id, ls_id, target_server, ret_comment, remove_nonpaxos_arg))) { + tenant_id, ls_id, target_server, replica_type, ret_comment, remove_nonpaxos_arg))) { LOG_WARN("fail to construct remove non-paxos replica task arg", KR(ret), K(tenant_id), K(ls_id), K(target_server), K(ret_comment), K(remove_nonpaxos_arg)); } else if (OB_FAIL(execute_remove_nonpaxos_task_(command_arg, remove_nonpaxos_arg))) { @@ -359,20 +358,17 @@ int ObAdminDRTaskUtil::construct_remove_paxos_task_arg_( LOG_WARN("replica not found in member_list", KR(ret), K(target_server), K(palf_stat)); } else if (OB_FAIL(palf_stat.paxos_member_list_.get_member_by_addr(target_server, member))) { LOG_WARN("fail to get member from paxos_member_list", KR(ret), K(palf_stat), K(target_server)); + } else if (OB_FAIL(member_to_remove.init(member, REPLICA_TYPE_FULL))) { + LOG_WARN("fail to init member_to_remove", KR(ret), K(member)); } else { - member_to_remove = ObReplicaMember(member); - if (OB_FAIL(member_to_remove.set_replica_type(REPLICA_TYPE_FULL))) { - LOG_WARN("fail to set replica type for member to remove", KR(ret)); - } else { - // If [orig_paxos_replica_number] not specified in obadmin command, - // use leader replica's info as default - orig_paxos_replica_number = 0 == orig_paxos_replica_number - ? palf_stat.paxos_replica_num_ - : orig_paxos_replica_number; - new_paxos_replica_number = 0 == new_paxos_replica_number - ? orig_paxos_replica_number - : new_paxos_replica_number; - } + // If [orig_paxos_replica_number] not specified in obadmin command, + // use leader replica's info as default + orig_paxos_replica_number = 0 == orig_paxos_replica_number + ? palf_stat.paxos_replica_num_ + : orig_paxos_replica_number; + new_paxos_replica_number = 0 == new_paxos_replica_number + ? orig_paxos_replica_number + : new_paxos_replica_number; } if (OB_FAIL(ret)) { } else if (OB_ISNULL(ObCurTraceId::get_trace_id())) { @@ -391,6 +387,7 @@ int ObAdminDRTaskUtil::construct_remove_nonpaxos_task_arg_( const uint64_t &tenant_id, const share::ObLSID &ls_id, const common::ObAddr &target_server, + const ObReplicaType &replica_type, ObAdminDRTaskRetComment &ret_comment, ObLSDropNonPaxosReplicaArg &remove_nonpaxos_arg) { @@ -401,9 +398,11 @@ int ObAdminDRTaskUtil::construct_remove_nonpaxos_task_arg_( palf::PalfStat palf_stat; if (OB_UNLIKELY(!ls_id.is_valid_with_tenant(tenant_id)) - || OB_UNLIKELY(!target_server.is_valid())) { + || OB_UNLIKELY(!target_server.is_valid()) + || OB_UNLIKELY(!ObReplicaTypeCheck::is_non_paxos_replica(replica_type))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid tenant_id or ls_id", KR(ret), K(tenant_id), K(ls_id), K(target_server)); + LOG_WARN("invalid arguments for remove_non_paxos_task", KR(ret), K(tenant_id), K(ls_id), + K(target_server), K(replica_type)); } else if (OB_FAIL(get_local_palf_stat_(tenant_id, ls_id, palf_stat, ret_comment))) { LOG_WARN("fail to get local palf stat", KR(ret), K(tenant_id), K(ls_id)); } else if (OB_UNLIKELY(!palf_stat.is_valid())) { @@ -414,18 +413,15 @@ int ObAdminDRTaskUtil::construct_remove_nonpaxos_task_arg_( LOG_WARN("replica not found in learner_list", KR(ret), K(target_server), K(palf_stat)); } else if (OB_FAIL(palf_stat.learner_list_.get_learner_by_addr(target_server, member))) { LOG_WARN("fail to get member from learner_list", KR(ret), K(palf_stat), K(target_server)); - } else { - member_to_remove = ObReplicaMember(member); - if (OB_FAIL(member_to_remove.set_replica_type(REPLICA_TYPE_READONLY))) { - LOG_WARN("fail to set replica type for member to remove", KR(ret)); - } else if (OB_ISNULL(ObCurTraceId::get_trace_id())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret)); - } else if (OB_FAIL(remove_nonpaxos_arg.init( - *ObCurTraceId::get_trace_id()/*task_id*/, tenant_id, - ls_id, member_to_remove))) { - LOG_WARN("fail to init arg", KR(ret), K(tenant_id), K(ls_id), K(member_to_remove)); - } + } else if (OB_FAIL(member_to_remove.init(member, replica_type))) { + LOG_WARN("fail to init member_to_remove", KR(ret), K(member), K(replica_type)); + } else if (OB_ISNULL(ObCurTraceId::get_trace_id())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret)); + } else if (OB_FAIL(remove_nonpaxos_arg.init( + *ObCurTraceId::get_trace_id()/*task_id*/, tenant_id, + ls_id, member_to_remove))) { + LOG_WARN("fail to init arg", KR(ret), K(tenant_id), K(ls_id), K(member_to_remove)); } return ret; } @@ -594,11 +590,10 @@ int ObAdminDRTaskUtil::parse_params_from_obadmin_command_arg( ls_id = share::ObLSID(ls_id_to_set); } } else if (0 == param_name.string().case_compare("replica_type")) { - if (OB_FAIL(share::ObLocalityParser::parse_type( - param_value.ptr(), - param_value.length(), - replica_type))) { - LOG_WARN("fail to parse replica type", KR(ret), K(param_name_with_value), K(replica_type)); + replica_type = share::ObShareUtil::string_to_replica_type(param_value.ptr()); + if (! ObReplicaTypeCheck::is_replica_type_valid(replica_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid replica_type", KR(ret), K(param_name_with_value), K(replica_type)); } } else if (0 == param_name.string().case_compare("orig_paxos_replica_number")) { if (OB_FAIL(extract_int(param_value.string(), 0, pos, orig_paxos_replica_number))) { diff --git a/src/rootserver/ob_admin_drtask_util.h b/src/rootserver/ob_admin_drtask_util.h index d42f7e6a6..be3488231 100644 --- a/src/rootserver/ob_admin_drtask_util.h +++ b/src/rootserver/ob_admin_drtask_util.h @@ -89,12 +89,14 @@ private: // params[in] tenant_id, specified tenant_id // params[in] ls_id, specified ls_id // params[in] target_server, the replica to remove on which server + // params[in] replica_type, the replica type to remove, could be R or C // params[out] ret_comment, failed reason // params[out] remove_non_paxos_arg, arg for remove-R task static int construct_remove_nonpaxos_task_arg_( const uint64_t &tenant_id, const share::ObLSID &ls_id, const common::ObAddr &target_server, + const ObReplicaType &replica_type, ObAdminDRTaskRetComment &ret_comment, ObLSDropNonPaxosReplicaArg &remove_nonpaxos_arg); diff --git a/src/rootserver/ob_balance_info.h b/src/rootserver/ob_balance_info.h index 6c531f92a..ee159f9f3 100644 --- a/src/rootserver/ob_balance_info.h +++ b/src/rootserver/ob_balance_info.h @@ -44,19 +44,6 @@ class ObUnitManager; class ObZoneManager; -class ObDataSourceCandidateChecker -{ -public: - ObDataSourceCandidateChecker(common::ObReplicaType type) : this_type_(type) {} - inline bool is_candidate(common::ObReplicaType other_type) const - { - // TODO: Use a more refined way, such as this_type_ = F, you can have other_type = R - return common::ObReplicaTypeCheck::can_as_data_source(this_type_, other_type); - } -private: - common::ObReplicaType this_type_; -}; - class ObStatisticsCalculator { public: diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index f5f41395f..4ee875a8c 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -3413,15 +3413,22 @@ int ObDDLService::set_raw_table_options( } int ObDDLService::check_locality_compatible_( - ObTenantSchema &schema) + ObTenantSchema &schema, + const bool for_create_tenant) { int ret = OB_SUCCESS; common::ObArray zone_locality; + const uint64_t tenant_id = for_create_tenant ? OB_SYS_TENANT_ID : schema.get_tenant_id(); bool is_compatible_with_readonly_replica = false; + bool is_compatible_with_columnstore_replica = false; if (OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica( - schema.get_tenant_id(), is_compatible_with_readonly_replica))) { + tenant_id, is_compatible_with_readonly_replica))) { LOG_WARN("fail to check compatible with readonly replica", KR(ret), K(schema)); - } else if (is_compatible_with_readonly_replica) { + } else if (OB_FAIL(ObShareUtil::check_compat_version_for_columnstore_replica( + tenant_id, is_compatible_with_columnstore_replica))) { + LOG_WARN("fail to check compatible with columnstore replica", KR(ret), K(schema)); + } else if (is_compatible_with_readonly_replica && is_compatible_with_columnstore_replica) { + // check pass } else if (OB_FAIL(schema.get_zone_replica_attr_array(zone_locality))) { LOG_WARN("fail to get locality from schema", K(ret), K(schema)); } else { @@ -3430,10 +3437,16 @@ int ObDDLService::check_locality_compatible_( if (this_set.zone_set_.count() <= 0) { ret = OB_ERR_UNEXPECTED; LOG_WARN("zone set count unexpected", K(ret), "zone_set_cnt", this_set.zone_set_.count()); - } else if (0 != this_set.get_readonly_replica_num()) { + } else if (! is_compatible_with_readonly_replica + && 0 != this_set.get_readonly_replica_num()) { ret = OB_NOT_SUPPORTED; LOG_WARN("can not create tenant with read-only replica below data version 4.2", KR(ret)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "Create tenant with R-replica in locality below data version 4.2"); + } else if (! is_compatible_with_columnstore_replica + && 0 != this_set.get_columnstore_replica_num()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("can not create tenant with column-store replica below data version 4.3.3", KR(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "Create tenant with C-replica in locality below data version 4.3.3"); } } } @@ -27298,7 +27311,7 @@ int ObDDLService::set_new_tenant_options( } else if (OB_FAIL(parse_and_set_create_tenant_new_locality_options( schema_guard, new_tenant_schema, resource_pool_names, zones_in_pool, zone_region_list))) { LOG_WARN("fail to parse and set new locality option", K(ret)); - } else if (OB_FAIL(check_locality_compatible_(new_tenant_schema))) { + } else if (OB_FAIL(check_locality_compatible_(new_tenant_schema, false /*for_create_tenant*/))) { LOG_WARN("fail to check locality with data version", KR(ret), K(new_tenant_schema)); } else if (OB_FAIL(check_alter_tenant_locality_type( schema_guard, orig_tenant_schema, new_tenant_schema, alter_locality_type))) { @@ -30301,6 +30314,8 @@ int ObDDLService::check_create_tenant_locality( } else if (OB_FAIL(parse_and_set_create_tenant_new_locality_options( schema_guard, tenant_schema, pools, pool_zones, zone_region_list))) { LOG_WARN("fail to parse and set new locality option", K(ret)); + } else if (OB_FAIL(check_locality_compatible_(tenant_schema, true /*for_create_tenant*/))) { + LOG_WARN("fail to check locality with data version", KR(ret), K(tenant_schema)); } else if (OB_FAIL(check_pools_unit_num_enough_for_schema_locality( pools, schema_guard, tenant_schema))) { LOG_WARN("pools unit num is not enough for locality", K(ret)); diff --git a/src/rootserver/ob_ddl_service.h b/src/rootserver/ob_ddl_service.h index 9e4b62b28..f67c70144 100644 --- a/src/rootserver/ob_ddl_service.h +++ b/src/rootserver/ob_ddl_service.h @@ -2753,7 +2753,7 @@ private: common::ObIArray &init_configs); private: - int check_locality_compatible_(ObTenantSchema &schema); + int check_locality_compatible_(ObTenantSchema &schema, const bool for_create_tenant); int pre_rename_mysql_columns_online(const ObTableSchema &origin_table_schema, const AlterTableSchema &alter_table_schema, diff --git a/src/rootserver/ob_disaster_recovery_task.cpp b/src/rootserver/ob_disaster_recovery_task.cpp index 1ba8c83b3..834a42bce 100644 --- a/src/rootserver/ob_disaster_recovery_task.cpp +++ b/src/rootserver/ob_disaster_recovery_task.cpp @@ -254,34 +254,6 @@ const char *ob_disaster_recovery_task_type_strs(const rootserver::ObDRTaskType t return str; } -const char *ob_replica_type_strs(const ObReplicaType type) -{ - const char *str = NULL; - switch (type) { - case ObReplicaType::REPLICA_TYPE_FULL: { - str = "FULL"; - break; - } - case ObReplicaType::REPLICA_TYPE_LOGONLY: { - str = "LOGONLY"; - break; - } - case ObReplicaType::REPLICA_TYPE_READONLY: { - str = "READONLY"; - break; - } - case ObReplicaType::REPLICA_TYPE_ENCRYPTION_LOGONLY: { - str = "ENCRYPTION_LOGONLY"; - break; - } - default: { - LOG_WARN_RET(OB_ERR_UNEXPECTED, "invalid replica type", K(type)); - break; - } - } - return str; -} - bool ObDRTaskKey::is_valid() const { return key_type_ > ObDRTaskKeyType::INVALID @@ -558,8 +530,7 @@ int ObMigrateLSReplicaTask::get_execute_transmit_size( int ret = OB_SUCCESS; execute_transmit_size = 0; ObReplicaType dst_replica_type = dst_replica_.get_replica_type(); - if (REPLICA_TYPE_FULL == dst_replica_type - || REPLICA_TYPE_READONLY == dst_replica_type) { + if (ObReplicaTypeCheck::is_replica_type_valid(dst_replica_type)) { execute_transmit_size = transmit_data_size_; } else if (REPLICA_TYPE_LOGONLY == dst_replica_type || REPLICA_TYPE_ENCRYPTION_LOGONLY == dst_replica_type) { @@ -717,11 +688,11 @@ int ObMigrateLSReplicaTask::fill_dml_splicer( || OB_FAIL(dml_splicer.add_column("target_replica_svr_ip", dest_ip)) || OB_FAIL(dml_splicer.add_column("target_replica_svr_port", get_dst_server().get_port())) || OB_FAIL(dml_splicer.add_column("target_paxos_replica_number", get_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("target_replica_type", ob_replica_type_strs(get_dst_replica().get_member().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("target_replica_type", ObShareUtil::replica_type_to_string(get_dst_replica().get_member().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("source_replica_svr_ip", src_ip)) || OB_FAIL(dml_splicer.add_column("source_replica_svr_port", get_src_member().get_server().get_port())) || OB_FAIL(dml_splicer.add_column("source_paxos_replica_number", get_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("source_replica_type", ob_replica_type_strs(get_src_member().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("source_replica_type", ObShareUtil::replica_type_to_string(get_src_member().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("task_exec_svr_ip", dest_ip)) || OB_FAIL(dml_splicer.add_column("task_exec_svr_port", get_dst_server().get_port()))) { LOG_WARN("add column failed", KR(ret)); @@ -991,6 +962,7 @@ int ObMigrateLSReplicaTask::build_task_from_sql_result( (void)GET_COL_IGNORE_NULL(res.get_int, "target_replica_svr_port", dest_port); (void)GET_COL_IGNORE_NULL(res.get_int, "source_paxos_replica_number", src_paxos_replica_number); (void)GET_COL_IGNORE_NULL(res.get_varchar, "comment", comment); + // TODO(cangming.zl): get src_replica_type and dest_replica_type from result EXTRACT_INT_FIELD_MYSQL_WITH_DEFAULT_VALUE(res, "data_source_svr_port", data_source_port, int64_t, true/*skip null error*/, true/*skip column error*/, 0); EXTRACT_VARCHAR_FIELD_MYSQL_WITH_DEFAULT_VALUE(res, "data_source_svr_ip", data_source_ip, @@ -1081,8 +1053,7 @@ int ObAddLSReplicaTask::get_execute_transmit_size( int ret = OB_SUCCESS; execute_transmit_size = 0; ObReplicaType dst_replica_type = dst_replica_.get_replica_type(); - if (REPLICA_TYPE_FULL == dst_replica_type - || REPLICA_TYPE_READONLY == dst_replica_type) { + if (ObReplicaTypeCheck::is_replica_type_valid(dst_replica_type)) { execute_transmit_size = transmit_data_size_; } else if (REPLICA_TYPE_LOGONLY == dst_replica_type || REPLICA_TYPE_ENCRYPTION_LOGONLY == dst_replica_type) { @@ -1224,11 +1195,11 @@ int ObAddLSReplicaTask::fill_dml_splicer( || OB_FAIL(dml_splicer.add_column("target_replica_svr_ip", dest_ip)) || OB_FAIL(dml_splicer.add_column("target_replica_svr_port", get_dst_server().get_port())) || OB_FAIL(dml_splicer.add_column("target_paxos_replica_number", get_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("target_replica_type", ob_replica_type_strs(get_dst_replica().get_member().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("target_replica_type", ObShareUtil::replica_type_to_string(get_dst_replica().get_member().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("source_replica_svr_ip", src_ip)) || OB_FAIL(dml_splicer.add_column("source_replica_svr_port", get_data_src_member().get_server().get_port())) || OB_FAIL(dml_splicer.add_column("source_paxos_replica_number", get_orig_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("source_replica_type", ob_replica_type_strs(get_dst_replica().get_member().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("source_replica_type", ObShareUtil::replica_type_to_string(get_dst_replica().get_member().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("task_exec_svr_ip", dest_ip)) || OB_FAIL(dml_splicer.add_column("task_exec_svr_port", get_dst_server().get_port()))) { LOG_WARN("add column failed", KR(ret)); @@ -1505,6 +1476,7 @@ int ObAddLSReplicaTask::build_task_from_sql_result( (void)GET_COL_IGNORE_NULL(res.get_int, "source_paxos_replica_number", src_paxos_replica_number); (void)GET_COL_IGNORE_NULL(res.get_int, "target_paxos_replica_number", dest_paxos_replica_number); (void)GET_COL_IGNORE_NULL(res.get_varchar, "comment", comment); + // TODO(cangming.zl): get src_replica_type and dest_replica_type from result EXTRACT_INT_FIELD_MYSQL_WITH_DEFAULT_VALUE(res, "data_source_svr_port", data_source_port, int64_t, true/*skip null error*/, true/*skip column error*/, 0); EXTRACT_VARCHAR_FIELD_MYSQL_WITH_DEFAULT_VALUE(res, "data_source_svr_ip", data_source_ip, @@ -1751,11 +1723,11 @@ int ObLSTypeTransformTask::fill_dml_splicer( || OB_FAIL(dml_splicer.add_column("target_replica_svr_ip", dest_ip)) || OB_FAIL(dml_splicer.add_column("target_replica_svr_port", get_dst_server().get_port())) || OB_FAIL(dml_splicer.add_column("target_paxos_replica_number", get_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("target_replica_type", ob_replica_type_strs(get_dst_replica().get_member().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("target_replica_type", ObShareUtil::replica_type_to_string(get_dst_replica().get_member().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("source_replica_svr_ip", src_ip)) || OB_FAIL(dml_splicer.add_column("source_replica_svr_port", get_src_member().get_server().get_port())) || OB_FAIL(dml_splicer.add_column("source_paxos_replica_number", get_orig_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("source_replica_type", ob_replica_type_strs(get_src_member().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("source_replica_type", ObShareUtil::replica_type_to_string(get_src_member().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("task_exec_svr_ip", dest_ip)) || OB_FAIL(dml_splicer.add_column("task_exec_svr_port", get_dst_server().get_port()))) { LOG_WARN("add column failed", KR(ret)); @@ -1985,8 +1957,8 @@ int ObLSTypeTransformTask::build_task_from_sql_result( int64_t transmit_data_size = 0; int64_t src_paxos_replica_number = OB_INVALID_COUNT; int64_t dest_paxos_replica_number = OB_INVALID_COUNT; - common::ObString src_type; - common::ObString dest_type; + common::ObString src_type_str; + common::ObString dest_type_str; int64_t schedule_time_us = 0; int64_t generate_time_us = 0; common::ObString comment; @@ -2010,8 +1982,8 @@ int ObLSTypeTransformTask::build_task_from_sql_result( (void)GET_COL_IGNORE_NULL(res.get_int, "target_replica_svr_port", dest_port); (void)GET_COL_IGNORE_NULL(res.get_int, "source_paxos_replica_number", src_paxos_replica_number); (void)GET_COL_IGNORE_NULL(res.get_int, "target_paxos_replica_number", dest_paxos_replica_number); - (void)GET_COL_IGNORE_NULL(res.get_varchar, "source_replica_type", src_type); - (void)GET_COL_IGNORE_NULL(res.get_varchar, "target_replica_type", dest_type); + (void)GET_COL_IGNORE_NULL(res.get_varchar, "source_replica_type", src_type_str); + (void)GET_COL_IGNORE_NULL(res.get_varchar, "target_replica_type", dest_type_str); (void)GET_COL_IGNORE_NULL(res.get_varchar, "comment", comment); EXTRACT_BOOL_FIELD_MYSQL_SKIP_RET(res, "is_manual", is_manual); //STEP2_0: make necessary members to build a task @@ -2020,8 +1992,8 @@ int ObLSTypeTransformTask::build_task_from_sql_result( common::ObAddr dest_server; common::ObString zone; rootserver::ObDRTaskPriority priority_to_set; - ObReplicaType src_type_to_set = REPLICA_TYPE_MAX; - ObReplicaType dest_type_to_set = REPLICA_TYPE_MAX; + ObReplicaType src_type_to_set = ObShareUtil::string_to_replica_type(src_type_str); + ObReplicaType dest_type_to_set = ObShareUtil::string_to_replica_type(dest_type_str); ObDstReplica dst_replica; share::ObTaskId task_id_to_set; ObSqlString comment_to_set; @@ -2048,18 +2020,6 @@ int ObLSTypeTransformTask::build_task_from_sql_result( ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid server address", K(dest_ip), K(dest_port)); } else { - //transform replica_type(string) -> src_type_to_set(ObReplicaType) - if (src_type == common::ObString("FULL")) { - src_type_to_set = REPLICA_TYPE_FULL; - } else if (src_type == common::ObString("READONLY")) { - src_type_to_set = REPLICA_TYPE_READONLY; - } - //transform replica_type(string) -> dest_type_to_set(ObReplicaType) - if (dest_type == common::ObString("FULL")) { - dest_type_to_set = REPLICA_TYPE_FULL; - } else if (dest_type == common::ObString("READONLY")) { - dest_type_to_set = REPLICA_TYPE_READONLY; - } //transform priority(int) -> priority_to_set(ObDRTaskPriority) if (priority == 0) { priority_to_set = ObDRTaskPriority::HIGH_PRI; @@ -2068,12 +2028,12 @@ int ObLSTypeTransformTask::build_task_from_sql_result( } else { priority_to_set = ObDRTaskPriority::MAX_PRI; } - ObReplicaMember src_member(src_server, 0); - ObReplicaMember dest_member(dest_server, 0); - if (OB_FAIL(src_member.set_replica_type(src_type_to_set))) { - LOG_WARN("fail to set src replica type", KR(ret), K(src_type_to_set)); - } else if (OB_FAIL(dest_member.set_replica_type(dest_type_to_set))) { - LOG_WARN("fail to set dest replica type", KR(ret), K(dest_type_to_set)); + ObReplicaMember src_member; + ObReplicaMember dest_member; + if (OB_FAIL(src_member.init(src_server, 0, src_type_to_set))) { + LOG_WARN("failed to init src_member", KR(ret), K(src_server), K(src_type_str), K(src_type_to_set)); + } else if (OB_FAIL(dest_member.init(dest_server, 0, dest_type_to_set))) { + LOG_WARN("failed to init dest_member", KR(ret), K(dest_server), K(dest_type_str), K(dest_type_to_set)); } else if (OB_FAIL(dst_replica.assign( 0/*unit id*/, 0/*unit group id*/, @@ -2251,7 +2211,7 @@ int ObRemoveLSReplicaTask::fill_dml_splicer( || OB_FAIL(dml_splicer.add_column("target_replica_svr_ip", target_ip)) || OB_FAIL(dml_splicer.add_column("target_replica_svr_port", get_remove_server().get_server().get_port())) || OB_FAIL(dml_splicer.add_column("target_paxos_replica_number", get_paxos_replica_number())) - || OB_FAIL(dml_splicer.add_column("target_replica_type", ob_replica_type_strs(get_remove_server().get_replica_type()))) + || OB_FAIL(dml_splicer.add_column("target_replica_type", ObShareUtil::replica_type_to_string(get_remove_server().get_replica_type()))) || OB_FAIL(dml_splicer.add_column("source_replica_svr_ip", src_ip)) || OB_FAIL(dml_splicer.add_column("source_replica_svr_port", 0)) || OB_FAIL(dml_splicer.add_column("source_paxos_replica_number", get_orig_paxos_replica_number())) @@ -2374,7 +2334,7 @@ int ObRemoveLSReplicaTask::simple_build( || !remove_server.is_valid() || orig_paxos_replica_number <= 0 || paxos_replica_number <= 0 - || REPLICA_TYPE_MAX == replica_type)) { + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(ls_id), K(task_id), K(leader), K(remove_server), K(orig_paxos_replica_number), @@ -2422,7 +2382,7 @@ int ObRemoveLSReplicaTask::build_task_from_sql_result( int64_t schedule_time_us = 0; int64_t generate_time_us = 0; common::ObString comment; - ObReplicaType replica_type = REPLICA_TYPE_MAX; + common::ObString replica_type_str; bool is_manual = false; //STEP1_0: read certain members from sql result EXTRACT_INT_FIELD_MYSQL(res, "tenant_id", tenant_id, uint64_t); @@ -2442,6 +2402,7 @@ int ObRemoveLSReplicaTask::build_task_from_sql_result( (void)GET_COL_IGNORE_NULL(res.get_int, "task_exec_svr_port", dest_port); (void)GET_COL_IGNORE_NULL(res.get_varchar, "target_replica_svr_ip", target_ip); (void)GET_COL_IGNORE_NULL(res.get_int, "target_replica_svr_port", target_port); + (void)GET_COL_IGNORE_NULL(res.get_varchar, "target_replica_type", replica_type_str); (void)GET_COL_IGNORE_NULL(res.get_int, "source_paxos_replica_number", src_paxos_replica_number); (void)GET_COL_IGNORE_NULL(res.get_int, "target_paxos_replica_number", dest_paxos_replica_number); (void)GET_COL_IGNORE_NULL(res.get_varchar, "comment", comment); @@ -2453,7 +2414,7 @@ int ObRemoveLSReplicaTask::build_task_from_sql_result( rootserver::ObDRTaskPriority priority_to_set; share::ObTaskId task_id_to_set; ObSqlString comment_to_set; - ObSqlString task_id_sqlstring_format; + ObSqlString task_id_sqlstring_format; // for adding trailing null if (OB_FAIL(ret)) { } else if (OB_FAIL(comment_to_set.assign(comment))) { @@ -2484,13 +2445,21 @@ int ObRemoveLSReplicaTask::build_task_from_sql_result( } else { priority_to_set = ObDRTaskPriority::MAX_PRI; } - //transform task_type(string) -> replica_type(ObReplicaType) + // get replica_type_ from replica_type_str and check whether or not match with task_type. + replica_type_ = ObShareUtil::string_to_replica_type(replica_type_str); if (0 == task_type.case_compare(ob_disaster_recovery_task_type_strs(ObDRTaskType::LS_REMOVE_PAXOS_REPLICA))) { - replica_type_ = ObReplicaType::REPLICA_TYPE_FULL; + if (OB_UNLIKELY(REPLICA_TYPE_FULL != replica_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("task_type and replica_type do not match", KR(ret), K(task_type), K(replica_type_)); + } } else if (0 == task_type.case_compare(ob_disaster_recovery_task_type_strs(ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA))) { - replica_type_ = ObReplicaType::REPLICA_TYPE_READONLY; + if (OB_UNLIKELY(! ObReplicaTypeCheck::is_non_paxos_replica(replica_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("task_type and replica_type do not match", KR(ret), K(task_type), K(replica_type_)); + } } else { - replica_type_ = ObReplicaType::REPLICA_TYPE_MAX; + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected task_type", KR(ret), K(task_type)); } } //STEP3_0: to build a task diff --git a/src/rootserver/ob_disaster_recovery_worker.cpp b/src/rootserver/ob_disaster_recovery_worker.cpp index 678796cc0..df03fde97 100755 --- a/src/rootserver/ob_disaster_recovery_worker.cpp +++ b/src/rootserver/ob_disaster_recovery_worker.cpp @@ -128,7 +128,7 @@ bool ObLSReplicaTaskDisplayInfo::is_valid() const && task_type_ != ObDRTaskType::MAX_TYPE && task_priority_ != ObDRTaskPriority::MAX_PRI && target_server_.is_valid() - && target_replica_type_ != REPLICA_TYPE_MAX + && target_replica_type_ != REPLICA_TYPE_INVALID && target_replica_paxos_replica_number_ != OB_INVALID_COUNT && source_replica_paxos_replica_number_ != OB_INVALID_COUNT && execute_server_.is_valid(); @@ -293,6 +293,9 @@ int ObDRWorker::LocalityAlignment::build_locality_stat_map() // readonly locality const ObIArray &readonly_locality = zone_locality.replica_attr_set_.get_readonly_replica_attr_array(); + // columnstore locality + const ObIArray &columnstore_locality = + zone_locality.replica_attr_set_.get_columnstore_replica_attr_array(); if (OB_FAIL(locate_zone_locality(zone, zone_replica_desc))) { LOG_WARN("fail to locate zone locality", KR(ret), K(zone)); @@ -336,11 +339,17 @@ int ObDRWorker::LocalityAlignment::build_locality_stat_map() LOG_WARN("fail to push back", KR(ret)); } } - // readonly replica, all_server + // readonly or colmnstore replica, all_server if (dr_ls_info_.is_duplicate_ls()) { - // duplicate ls, should has R-replica all_server - zone_replica_desc->is_readonly_all_server_ = true; - zone_replica_desc->readonly_memstore_percent_ = 100; + // duplicate ls, should has R-replica or C-replica all_server + // if locality is C, then set columnstore_all_server. + // if locality is F/R, then set readonly_all_server, + if (columnstore_locality.count() > 0) { + // dup_ls must not be sys_ls + zone_replica_desc->set_columnstore_all_server(); + } else { + zone_replica_desc->set_readonly_all_server(); + } } else { // readonly replica, normal for (int64_t j = 0; OB_SUCC(ret) && j < readonly_locality.count(); ++j) { @@ -354,6 +363,22 @@ int ObDRWorker::LocalityAlignment::build_locality_stat_map() LOG_WARN("fail to push back", KR(ret)); } } + // columnstore replica, normal + if (ls_id.is_sys_ls()) { + // for sys ls, ignore C-replica in locality + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < columnstore_locality.count(); ++j) { + const ReplicaAttr &replica_attr = columnstore_locality.at(j); + if (0 >= replica_attr.num_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replica num unexpected", KR(ret), K(zone), K(columnstore_locality)); + } else if (OB_FAIL(zone_replica_desc->push_back(ReplicaDesc(REPLICA_TYPE_COLUMNSTORE, + replica_attr.memstore_percent_, + replica_attr.num_)))) { + LOG_WARN("fail to push back", KR(ret)); + } + } + } } } LOG_INFO("ls zone locality map info", K(zone), KPC(zone_replica_desc)); @@ -592,6 +617,16 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_full_replica( } else { found = true; } + } else if (ObReplicaTypeCheck::is_columnstore_replica(replica_desc.replica_type_)) { + // if zone_locality is C-replica, remove this F-replica + // because transform from F to C is not supported + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret)); + } else { + found = true; + } } } // process not found @@ -599,11 +634,18 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_full_replica( // failed } else if (found) { // found, bypass - } else if (zone_replica_desc->is_readonly_all_server_) { + } else if (zone_replica_desc->is_columnstore_all_server()) { + // remove this F, because transform from F to C is not supported + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret)); + } + } else if (zone_replica_desc->is_readonly_all_server()) { if (OB_FAIL(generate_type_transform_task( replica_stat_desc, REPLICA_TYPE_READONLY, - zone_replica_desc->readonly_memstore_percent_))) { + zone_replica_desc->get_readonly_memstore_percent()))) { LOG_WARN("fail to generate type transform task", KR(ret), K(replica_stat_desc)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret), K(index), K(replica), K(replica_stat_map_)); @@ -715,7 +757,7 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_encryption_log return ret; } -int ObDRWorker::LocalityAlignment::try_generate_type_transform_task_for_readonly_replica_( +int ObDRWorker::LocalityAlignment::try_generate_task_for_readonly_replica_( ReplicaDescArray &zone_replica_desc_in_locality, ReplicaStatDesc &replica_stat_desc, const int64_t index, @@ -732,6 +774,15 @@ int ObDRWorker::LocalityAlignment::try_generate_type_transform_task_for_readonly if (REPLICA_TYPE_READONLY == replica_desc.replica_type_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), K(dr_ls_info_)); + } else if (ObReplicaTypeCheck::is_columnstore_replica(replica_desc.replica_type_)) { + // if locality is C-replica, remove this R-replica because transform from R to C is not supported + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret), K(replica_stat_desc)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret), K(replica_stat_map_), K(index)); + } else { + task_generated = true; + } } else if (REPLICA_TYPE_FULL == replica_desc.replica_type_) { if (OB_ISNULL(replica_stat_desc.unit_stat_info_)) { ret = OB_INVALID_ARGUMENT; @@ -764,7 +815,7 @@ int ObDRWorker::LocalityAlignment::try_generate_type_transform_task_for_readonly return ret; } -int ObDRWorker::LocalityAlignment::try_generate_remove_readonly_task_for_duplicate_log_stream_( +int ObDRWorker::LocalityAlignment::try_generate_remove_redundant_replica_task_for_dup_ls_( ReplicaStatDesc &replica_stat_desc, share::ObLSReplica &replica, const int64_t index) @@ -829,15 +880,23 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_readonly_repli bool task_generated = false; lib::ob_sort(zone_replica_desc->begin(), zone_replica_desc->end()); // try to generate type_transform task if needed - if (OB_FAIL(try_generate_type_transform_task_for_readonly_replica_( + if (OB_FAIL(try_generate_task_for_readonly_replica_( *zone_replica_desc, replica_stat_desc, index, task_generated))) { LOG_WARN("fail to try generate type transform task", KR(ret), KPC(zone_replica_desc), K(replica_stat_desc), K(index), K(task_generated)); } else if (task_generated) { - // a type transform task generated, bypass - } else if (zone_replica_desc->is_readonly_all_server_) { - // for duplicate log stream, try to remove redudant R-replicas - if (OB_FAIL(try_generate_remove_readonly_task_for_duplicate_log_stream_(replica_stat_desc, replica, index))) { + // a type transform task or remove task generated, bypass + } else if (zone_replica_desc->is_columnstore_all_server()) { + // for duplicate log stream, if locality is C-replica, remove this R-replica + // because transform from R to C is not supported + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret), K(replica_stat_desc)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret), K(replica_stat_map_), K(index)); + } + } else if (zone_replica_desc->is_readonly_all_server()) { + // for duplicate log stream, if locality is R-replica, try to remove redudant R-replicas + if (OB_FAIL(try_generate_remove_redundant_replica_task_for_dup_ls_(replica_stat_desc, replica, index))) { LOG_WARN("fail to generate remove replica task for duplicate log stream", KR(ret), K(replica_stat_desc), K(replica), K(index)); } @@ -890,6 +949,65 @@ int ObDRWorker::LocalityAlignment::try_generate_locality_task_from_paxos_replica return ret; } +int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_columnstore_replica( + ReplicaStatDesc &replica_stat_desc, + share::ObLSReplica &replica, + const int64_t index) +{ + int ret = OB_SUCCESS; + const common::ObZone &zone = replica.get_zone(); + if (REPLICA_TYPE_COLUMNSTORE != replica.get_replica_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replica type unexpected", KR(ret), K(replica)); + } else { + ReplicaDescArray *zone_replica_desc = nullptr; + int tmp_ret = locality_map_.get_refactored(zone, zone_replica_desc); + if (OB_HASH_NOT_EXIST == tmp_ret) { + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret), K(replica_stat_desc)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret), K(replica_stat_map_), K(index)); + } + } else if (OB_SUCCESS == tmp_ret && OB_NOT_NULL(zone_replica_desc)) { + bool task_generated = false; + lib::ob_sort(zone_replica_desc->begin(), zone_replica_desc->end()); + // defensive check + for (int64_t i = zone_replica_desc->count() - 1; OB_SUCC(ret) && i >= 0; --i) { + ReplicaDesc &replica_desc = zone_replica_desc->at(i); + if (ObReplicaTypeCheck::is_columnstore_replica(replica_desc.replica_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replica type unexpected", KR(ret), K(replica_desc), K(dr_ls_info_)); + } + } + // normal routine + if (OB_SUCC(ret)) { + if (zone_replica_desc->is_columnstore_all_server()) { + // for duplicate log stream, if locality is C-replica, try to remove redudant C-replicas + if (OB_FAIL(try_generate_remove_redundant_replica_task_for_dup_ls_(replica_stat_desc, replica, index))) { + LOG_WARN("fail to generate remove replica task for duplicate log stream", + KR(ret), K(replica_stat_desc), K(replica), K(index)); + } + } else { + // handle two abnormal occasions: + // 1. for duplicate ls, locality is R or F-replica; + // 2. for non-duplicate ls, locality is other type than C-replica + // in these both occasions, directly remove this C-replica, + // because transform from C-replica to R/F is not supported. + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret), K(replica_stat_desc)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret), K(replica_stat_map_), K(index)); + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get refactored", KR(ret), K(zone)); + } + } + return ret; +} + void ObDRWorker::LocalityAlignment::print_locality_information() { for (LocalityMap::iterator iter = locality_map_.begin(); @@ -945,6 +1063,13 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task() i))) { LOG_WARN("fail to generate locality task from readonly replica", KR(ret)); } + } else if (REPLICA_TYPE_COLUMNSTORE == replica->get_replica_type()) { + if (OB_FAIL(do_generate_locality_task_from_columnstore_replica( + replica_stat_desc, + *replica, + i))) { + LOG_WARN("fail to generate locality task from columnstore replica", KR(ret)); + } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), KPC(replica)); @@ -1429,7 +1554,7 @@ int ObDRWorker::LocalityAlignment::try_get_normal_locality_alignment_task( return ret; } -int ObDRWorker::LocalityAlignment::try_get_readonly_all_server_locality_alignment_task( +int ObDRWorker::LocalityAlignment::try_get_readonly_or_columnstore_all_server_locality_alignment_task( UnitProvider &unit_provider, const LATask *&task) { @@ -1444,7 +1569,8 @@ int ObDRWorker::LocalityAlignment::try_get_readonly_all_server_locality_alignmen if (OB_UNLIKELY(nullptr == replica_desc_array)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica desc array ptr is null", KR(ret)); - } else if (!replica_desc_array->is_readonly_all_server_) { + } else if (!replica_desc_array->is_readonly_all_server() + && !replica_desc_array->is_columnstore_all_server()) { // bypass } else if (OB_FAIL(dr_ls_info_.get_ls_status_info(ls_status_info))) { LOG_WARN("fail to get log stream info", KR(ret)); @@ -1462,8 +1588,9 @@ int ObDRWorker::LocalityAlignment::try_get_readonly_all_server_locality_alignmen add_replica_task_.member_time_us_ = ObTimeUtility::current_time(); add_replica_task_.unit_id_ = unit.unit_id_; add_replica_task_.unit_group_id_ = ls_status_info->unit_group_id_; - add_replica_task_.replica_type_ = REPLICA_TYPE_READONLY; - add_replica_task_.memstore_percent_ = replica_desc_array->readonly_memstore_percent_; + add_replica_task_.replica_type_ = replica_desc_array->is_columnstore_all_server() ? + REPLICA_TYPE_COLUMNSTORE : REPLICA_TYPE_READONLY; + add_replica_task_.memstore_percent_ = replica_desc_array->get_readonly_memstore_percent(); add_replica_task_.orig_paxos_replica_number_ = curr_paxos_replica_number_; add_replica_task_.paxos_replica_number_ = curr_paxos_replica_number_; task = &add_replica_task_; @@ -1492,7 +1619,7 @@ int ObDRWorker::LocalityAlignment::get_next_locality_alignment_task( } else if (nullptr != task) { // got one LOG_INFO("success to get a normal task", KPC(task)); - } else if (OB_FAIL(try_get_readonly_all_server_locality_alignment_task( + } else if (OB_FAIL(try_get_readonly_or_columnstore_all_server_locality_alignment_task( unit_provider_, task))) { LOG_WARN("fail to get readonly all server locality alignment task", KR(ret)); @@ -2528,7 +2655,7 @@ int ObDRWorker::get_replica_type_by_leader_( // not leader replica get replica type may not right. when remove or modify replica, // replica type wrong may result in fatal error, so get it by leader int ret = OB_SUCCESS; - replica_type = REPLICA_TYPE_MAX; + replica_type = REPLICA_TYPE_INVALID; common::ObAddr leader_addr; // not used GlobalLearnerList learner_list; common::ObMemberList member_list; @@ -2546,7 +2673,16 @@ int ObDRWorker::get_replica_type_by_leader_( } else if (member_list.contains(server_addr)) { replica_type = REPLICA_TYPE_FULL; } else if (learner_list.contains(server_addr)) { - replica_type = REPLICA_TYPE_READONLY; + ObMember learner; + if (OB_FAIL(learner_list.get_learner_by_addr(server_addr, learner))) { + LOG_WARN("failed to get_learner_by_addr", KR(ret), K(server_addr)); + } else { + if (learner.is_columnstore()) { + replica_type = REPLICA_TYPE_COLUMNSTORE; + } else { + replica_type = REPLICA_TYPE_READONLY; + } + } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to find server in leader member list and learner list", @@ -2628,7 +2764,7 @@ int ObDRWorker::build_remove_replica_task_( ObRemoveLSReplicaTask &remove_replica_task) { int ret = OB_SUCCESS; - common::ObReplicaType replica_type = REPLICA_TYPE_MAX; + common::ObReplicaType replica_type = REPLICA_TYPE_INVALID; common::ObAddr leader_addr; ObMember member_to_remove; if (OB_UNLIKELY(!inited_)) { @@ -2652,10 +2788,10 @@ int ObDRWorker::build_remove_replica_task_( share::ObTaskId task_id; int64_t new_paxos_replica_number = 0; bool has_leader = false; - ObReplicaMember remove_member(member_to_remove); + ObReplicaMember remove_member; if (FALSE_IT(task_id.init(self_addr_))) { - } else if (OB_FAIL(remove_member.set_replica_type(replica_type))) { - LOG_WARN("fail to set replica type", KR(ret), K(replica_type), K(remove_member)); + } else if (OB_FAIL(remove_member.init(member_to_remove, replica_type))) { + LOG_WARN("fail to init remove_member", KR(ret), K(member_to_remove), K(replica_type)); } else if (OB_FAIL(check_and_generate_new_paxos_replica_num_( arg, replica_type, dr_ls_info, new_paxos_replica_number))) { LOG_WARN("fail to check and generate new paxos replica num", KR(ret), K(arg), K(replica_type), K(dr_ls_info)); @@ -2687,7 +2823,7 @@ int ObDRWorker::build_modify_replica_type_task_( int ret = OB_SUCCESS; share::ObUnit unit; share::ObLSReplica ls_replica; - common::ObReplicaType replica_type = REPLICA_TYPE_MAX; + common::ObReplicaType replica_type = REPLICA_TYPE_INVALID; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("DRWorker not init", KR(ret)); @@ -2713,6 +2849,12 @@ int ObDRWorker::build_modify_replica_type_task_( LOG_USER_ERROR(OB_ENTRY_EXIST, "Current replica type is same as the target type, no need to modify"); LOG_WARN("replica type is the same as the target type, no need to modify type", KR(ret), K(arg), K(replica_type)); + } else if (ObReplicaTypeCheck::is_columnstore_replica(arg.get_replica_type()) + || ObReplicaTypeCheck::is_columnstore_replica(replica_type)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "Current or target replica-type is C-replica, type transform"); + LOG_WARN("Current or target replica_type is C-replica, type transform not supported", + KR(ret), K(arg), K(replica_type)); } else if (REPLICA_TYPE_FULL == arg.get_replica_type() && share::ObLSReplica::DEFAULT_REPLICA_COUNT == dr_ls_info.get_member_list_cnt()) { ret = OB_OP_NOT_ALLOW; @@ -2769,8 +2911,8 @@ int ObDRWorker::build_migrate_replica_task_( { int ret = OB_SUCCESS; share::ObUnit destination_unit; - common::ObReplicaType replica_type = REPLICA_TYPE_MAX; - share::ObLSReplica desti_ls_replica; + common::ObReplicaType replica_type = REPLICA_TYPE_INVALID; + share::ObLSReplica dest_ls_replica; share::ObLSReplica source_ls_replica; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; @@ -2781,15 +2923,15 @@ int ObDRWorker::build_migrate_replica_task_( } else if (OB_FAIL(dr_ls_info.check_replica_exist_and_get_ls_replica( arg.get_server_addr(), source_ls_replica))) { LOG_WARN("fail to check and get replica by server", KR(ret), K(arg)); - } else if (!source_ls_replica.is_valid() || (source_ls_replica.is_valid() && !source_ls_replica.is_in_service())) { + } else if (!source_ls_replica.is_valid() || !source_ls_replica.is_in_service()) { ret = OB_ENTRY_NOT_EXIST; LOG_USER_ERROR(OB_ENTRY_NOT_EXIST, "Source server does not have a replica of this LS"); LOG_WARN("source server does not have a replica of this LS", KR(ret), K(arg), K(dr_ls_info), K(source_ls_replica)); } else if (OB_FAIL(dr_ls_info.check_replica_exist_and_get_ls_replica( - arg.get_destination_addr(), desti_ls_replica))) { + arg.get_destination_addr(), dest_ls_replica))) { LOG_WARN("fail to check and get replica by server", KR(ret), K(arg)); - } else if (desti_ls_replica.is_valid()) { + } else if (dest_ls_replica.is_valid()) { ret = OB_ENTRY_EXIST; LOG_USER_ERROR(OB_ENTRY_EXIST, "The destination server already has a replica"); LOG_WARN("target server already has a replica, no need migrate", KR(ret), K(arg), K(dr_ls_info)); @@ -2898,6 +3040,16 @@ int ObDRWorker::build_modify_paxos_replica_num_task_( return ret; } +// this func is only for basic checking. +// 1. if dest_replica_type is F/R replica, data_src can be same replica_type or F; +// 2. if dest_replica_type is C replica, data_src can be F/R/C. +bool can_as_data_source(const int32_t dest_replica_type, const int32_t src_replica_type) +{ + return (dest_replica_type == src_replica_type + || REPLICA_TYPE_FULL == src_replica_type + || ObReplicaTypeCheck::is_columnstore_replica(dest_replica_type)); +} + int ObDRWorker::check_data_source_available_and_init_( const obrpc::ObAdminAlterLSReplicaArg &arg, const common::ObReplicaType &replica_type, @@ -2916,7 +3068,7 @@ int ObDRWorker::check_data_source_available_and_init_( data_source.reset(); share::ObLSReplica ls_replica; ObServerInfoInTable server_info; - common::ObReplicaType provide_replica_type = REPLICA_TYPE_MAX; + common::ObReplicaType provide_replica_type = REPLICA_TYPE_INVALID; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("DRWorker not init", KR(ret)); @@ -2924,11 +3076,10 @@ int ObDRWorker::check_data_source_available_and_init_( // passed LOG_INFO("data_source is not valid", KR(ret), K(arg)); } else if (OB_UNLIKELY(!arg.is_valid() - || OB_UNLIKELY(replica_type != REPLICA_TYPE_FULL && replica_type != REPLICA_TYPE_READONLY))) { + || OB_UNLIKELY(!ObReplicaTypeCheck::is_replica_type_valid(replica_type)))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(arg), K(replica_type)); } else { - ObDataSourceCandidateChecker type_checker(replica_type); if (OB_FAIL(dr_ls_info.check_replica_exist_and_get_ls_replica(arg.get_data_source(), ls_replica))) { LOG_WARN("fail to get ls replica", KR(ret), K(dr_ls_info)); } else if (!ls_replica.is_valid() || (ls_replica.is_valid() && !ls_replica.is_in_service())) { @@ -2941,10 +3092,10 @@ int ObDRWorker::check_data_source_available_and_init_( ret = OB_OP_NOT_ALLOW; LOG_USER_ERROR(OB_OP_NOT_ALLOW, "Data source replica restore or clone failed, which is"); LOG_WARN("ls replica restore failed", KR(ret), K(arg), K(ls_replica)); - } else if (!type_checker.is_candidate(provide_replica_type)) { + } else if (!can_as_data_source(replica_type, provide_replica_type)) { ret = OB_OP_NOT_ALLOW; LOG_USER_ERROR(OB_OP_NOT_ALLOW, "R replica is not supported as the source of F replica, which is"); - LOG_WARN("type_checker failed", KR(ret), K(arg), K(provide_replica_type)); + LOG_WARN("provided data_source replica_type not supported", KR(ret), K(arg), K(replica_type), K(provide_replica_type)); } else if (OB_FAIL(SVR_TRACER.get_server_info(arg.get_data_source(), server_info))) { LOG_WARN("fail to get server info", KR(ret), K(arg)); } else if (!server_info.is_alive()) { @@ -3013,7 +3164,7 @@ int ObDRWorker::check_and_generate_new_paxos_replica_num_( ret = OB_NOT_INIT; LOG_WARN("DRWorker not init", KR(ret)); } else if (OB_UNLIKELY(!arg.is_valid() - || (REPLICA_TYPE_FULL != replica_type && REPLICA_TYPE_READONLY != replica_type))) { + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(arg), K(replica_type)); } else if (std::abs(new_p - curr_p) > 1) { @@ -3023,13 +3174,13 @@ int ObDRWorker::check_and_generate_new_paxos_replica_num_( } else if (task_type.is_add_task()) { if (REPLICA_TYPE_FULL == replica_type) { member_change_type = MEMBER_CHANGE_ADD; - } else if (REPLICA_TYPE_READONLY == replica_type) { + } else if (ObReplicaTypeCheck::is_non_paxos_replica(replica_type)) { member_change_type = MEMBER_CHANGE_NOP; } } else if (task_type.is_remove_task()) { if (REPLICA_TYPE_FULL == replica_type) { member_change_type = MEMBER_CHANGE_SUB; - } else if (REPLICA_TYPE_READONLY == replica_type) { + } else if (ObReplicaTypeCheck::is_non_paxos_replica(replica_type)) { member_change_type = MEMBER_CHANGE_NOP; } } else if (task_type.is_modify_replica_task()) { @@ -3037,11 +3188,15 @@ int ObDRWorker::check_and_generate_new_paxos_replica_num_( member_change_type = MEMBER_CHANGE_ADD; } else if (REPLICA_TYPE_READONLY == replica_type) { member_change_type = MEMBER_CHANGE_SUB; + } else if (ObReplicaTypeCheck::is_columnstore_replica(replica_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid replica_type, do not support transforming to C-replica", KR(ret), K(arg)); } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("task type unexpected", KR(ret), K(arg), K(dr_ls_info), K(replica_type)); } + if (OB_FAIL(ret)) { } else if ((MEMBER_CHANGE_ADD == member_change_type)) { if (OB_FAIL(check_for_alter_full_replica_(member_list_count + 1, new_p))) { @@ -3192,7 +3347,7 @@ int ObDRWorker::check_has_leader_while_remove_replica( int64_t arb_replica_num = 0; uint64_t tenant_id = OB_INVALID_TENANT_ID; ObLSID ls_id; - ObReplicaType replica_type = REPLICA_TYPE_MAX; + ObReplicaType replica_type = REPLICA_TYPE_INVALID; for (int64_t index = 0; OB_SUCC(ret) && index < replica_cnt; ++index) { share::ObLSReplica *ls_replica = nullptr; DRServerStatInfo *server_stat_info = nullptr; @@ -3482,6 +3637,9 @@ int ObDRWorker::try_remove_permanent_offline_replicas( common::ObReplicaType replica_type = REPLICA_TYPE_READONLY; if (OB_FAIL(learner_list.get_member_by_index(index, learner_to_remove))) { LOG_WARN("fail to get learner by index", KR(ret), K(index)); + } else if (FALSE_IT(replica_type = learner_to_remove.is_columnstore() ? + REPLICA_TYPE_COLUMNSTORE : REPLICA_TYPE_READONLY)) { + // shall never be here } else if (OB_FAIL(do_single_replica_permanent_offline_( tenant_id, ls_id, @@ -3490,7 +3648,7 @@ int ObDRWorker::try_remove_permanent_offline_replicas( replica_type, learner_to_remove, acc_dr_task))) { - LOG_WARN("fail to do single replica permanent offline task for readonly replica", KR(ret), K(tenant_id), + LOG_WARN("fail to do single replica permanent offline task for non-paxos replica", KR(ret), K(tenant_id), K(ls_id), K(dr_ls_info), K(only_for_display), K(replica_type), K(learner_to_remove), K(acc_dr_task)); } } @@ -3532,12 +3690,12 @@ int ObDRWorker::do_single_replica_permanent_offline_( const int64_t memstore_percent = 100; ObDRTaskKey task_key; bool can_generate = false; - ObReplicaMember remove_member(member_to_remove); + ObReplicaMember remove_member; ObDRTaskType task_type = ObReplicaTypeCheck::is_paxos_replica_V2(replica_type) ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; - if (OB_FAIL(remove_member.set_replica_type(replica_type))) { - LOG_WARN("fail to set replica type", KR(ret), K(replica_type), K(remove_member)); + if (OB_FAIL(remove_member.init(member_to_remove, replica_type))) { + LOG_WARN("failed to init remove_member", KR(ret), K(member_to_remove), K(replica_type)); } else if (OB_FAIL(construct_extra_infos_to_build_remove_replica_task( dr_ls_info, task_id, @@ -3560,7 +3718,7 @@ int ObDRWorker::do_single_replica_permanent_offline_( replica_type, new_paxos_replica_number, source_server, - REPLICA_TYPE_MAX/*source_replica_type*/, + REPLICA_TYPE_INVALID/*source_replica_type*/, old_paxos_replica_number, leader_addr, "remove permanent offline replica"))) { @@ -4216,8 +4374,8 @@ int ObDRWorker::record_task_plan_for_locality_alignment( ObDRTaskType task_type = ObDRTaskType::MAX_TYPE; uint64_t tenant_id = OB_INVALID_ID; share::ObLSID ls_id; - ObReplicaType source_replica_type = REPLICA_TYPE_MAX; - ObReplicaType target_replica_type = REPLICA_TYPE_MAX; + ObReplicaType source_replica_type = REPLICA_TYPE_INVALID; + ObReplicaType target_replica_type = REPLICA_TYPE_INVALID; ObDRTaskPriority task_priority = ObDRTaskPriority::MAX_PRI; common::ObAddr leader_addr; common::ObAddr source_svr; @@ -4245,7 +4403,7 @@ int ObDRWorker::record_task_plan_for_locality_alignment( case RemoveNonPaxos: { const RemoveReplicaLATask *my_task = reinterpret_cast(task); task_type = RemovePaxos == task->get_task_type() ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; - source_replica_type = REPLICA_TYPE_MAX; + source_replica_type = REPLICA_TYPE_INVALID; target_replica_type = my_task->replica_type_; task_priority = task_type == ObDRTaskType::LS_REMOVE_PAXOS_REPLICA ? ObDRTaskPriority::HIGH_PRI : ObDRTaskPriority::LOW_PRI; target_svr = my_task->remove_server_; @@ -4476,15 +4634,15 @@ int ObDRWorker::try_shrink_resource_pools( && share::ObUnit::UNIT_STATUS_DELETING == unit_stat_info->get_unit().status_) { // replica is still in member_list, but unit is in DELETING status // If this is a duplicate log stream - // 1.1 for R-replica: execute remove_learner task directly + // 1.1 for non-paxos(R or C) replica: execute remove_learner task directly // 1.2 for F-replica: try to execute migrate-replica first, // if migrate-replica task can not generate then try to type_transform another R to F // If this is a normal log stream - // 2.1 try to execute migrate-replica task for both R-replica and F-replica + // 2.1 try to execute migrate-replica task for replica of any type if (dr_ls_info.is_duplicate_ls()) { - if (REPLICA_TYPE_READONLY == ls_replica->get_replica_type()) { + if (ObReplicaTypeCheck::is_non_paxos_replica(ls_replica->get_replica_type())) { // 1.1 try to generate and execute remove learner task - if (OB_FAIL(try_remove_readonly_replica_for_deleting_unit_( + if (OB_FAIL(try_remove_non_paxos_replica_for_deleting_unit_( *ls_replica, only_for_display, dr_ls_info, @@ -4554,7 +4712,7 @@ int ObDRWorker::try_shrink_resource_pools( return ret; } -int ObDRWorker::try_remove_readonly_replica_for_deleting_unit_( +int ObDRWorker::try_remove_non_paxos_replica_for_deleting_unit_( const share::ObLSReplica &ls_replica, const bool &only_for_display, DRLSInfo &dr_ls_info, @@ -4607,7 +4765,7 @@ int ObDRWorker::try_remove_readonly_replica_for_deleting_unit_( ls_replica.get_replica_type(), new_paxos_replica_number, source_server, - REPLICA_TYPE_MAX/*source_replica_type*/, + REPLICA_TYPE_INVALID/*source_replica_type*/, old_paxos_replica_number, leader_addr, "shrink unit task"))) { @@ -5055,7 +5213,7 @@ int ObDRWorker::check_need_generate_cancel_unit_migration_task( return ret; } -int ObDRWorker::construct_extra_info_to_build_cancael_migration_task( +int ObDRWorker::construct_extra_info_to_build_cancel_migration_task( const bool &is_paxos_replica_related, DRLSInfo &dr_ls_info, const share::ObLSReplica &ls_replica, @@ -5103,7 +5261,7 @@ int ObDRWorker::generate_cancel_unit_migration_task( int ret = OB_SUCCESS; ObRemoveLSReplicaTask remove_member_task; ObString comment_to_set = ""; - ObReplicaType replica_type = is_paxos_replica_related ? REPLICA_TYPE_FULL : REPLICA_TYPE_READONLY; + ObReplicaType replica_type = remove_member.get_replica_type(); if (is_paxos_replica_related) { comment_to_set.assign_ptr(drtask::CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA, strlen(drtask::CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA)); @@ -5132,7 +5290,7 @@ int ObDRWorker::generate_cancel_unit_migration_task( old_paxos_replica_number, new_paxos_replica_number, replica_type))) { - LOG_WARN("fail to build remove member task", KR(ret)); + LOG_WARN("fail to build remove member task", KR(ret), K(task_key), K(task_id)); } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_member_task))) { LOG_WARN("fail to add task", KR(ret), K(remove_member_task)); } else { @@ -5218,7 +5376,7 @@ int ObDRWorker::try_cancel_unit_migration( strlen(drtask::CANCEL_MIGRATE_UNIT_WITH_NON_PAXOS_REPLICA)); } - if (OB_FAIL(construct_extra_info_to_build_cancael_migration_task( + if (OB_FAIL(construct_extra_info_to_build_cancel_migration_task( is_paxos_replica_related, dr_ls_info, *ls_replica, @@ -5240,7 +5398,7 @@ int ObDRWorker::try_cancel_unit_migration( ls_replica->get_replica_type(), new_paxos_replica_number, source_svr, - REPLICA_TYPE_MAX, + REPLICA_TYPE_INVALID, old_paxos_replica_number, leader_addr, comment_to_set))) { @@ -5773,7 +5931,8 @@ int ObDRWorker::check_ls_only_in_member_list_or_with_flag_( } } else { ret = OB_STATE_NOT_MATCH; - LOG_WARN("read only replica with flag should not appear in inner_ls_info", KR(ret), K(learner_to_check), K(inner_ls_info)); + LOG_WARN("read only replica with migrating-flag should not appear in inner_ls_info", + KR(ret), K(learner_to_check), K(inner_ls_info)); } } else if (OB_FAIL(inner_ls_info.find(learner_to_check.get_server(), replica))) { LOG_WARN("fail to find read only replica", KR(ret), K(inner_ls_info), K(learner_to_check)); diff --git a/src/rootserver/ob_disaster_recovery_worker.h b/src/rootserver/ob_disaster_recovery_worker.h index ec162fd32..97e73ee00 100755 --- a/src/rootserver/ob_disaster_recovery_worker.h +++ b/src/rootserver/ob_disaster_recovery_worker.h @@ -310,7 +310,7 @@ private: LA_P_ADD_LOGONLY, LA_P_ADD_ENCRYPTION, LA_P_FULL_TO_LOGONLY, - LA_P_ADD_READONLY, + LA_P_ADD_NON_PAXOS, LA_P_REMOVE_NON_PAXOS, LA_P_FULL_TO_READONLY, LA_P_REMOVE_PAXOS, @@ -348,7 +348,7 @@ private: RemoveReplicaLATask() : LATask(), remove_server_(), - replica_type_(REPLICA_TYPE_MAX), + replica_type_(REPLICA_TYPE_INVALID), memstore_percent_(100), member_time_us_(-1), orig_paxos_replica_number_(0), @@ -389,7 +389,7 @@ private: dst_server_(), unit_id_(OB_INVALID_ID), unit_group_id_(OB_INVALID_ID), - replica_type_(REPLICA_TYPE_MAX), + replica_type_(REPLICA_TYPE_INVALID), memstore_percent_(100), member_time_us_(-1), orig_paxos_replica_number_(0), @@ -405,8 +405,8 @@ private: priority = LATaskPrio::LA_P_ADD_LOGONLY; } else if (common::REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type_) { priority = LATaskPrio::LA_P_ADD_ENCRYPTION; - } else if (common::REPLICA_TYPE_READONLY == replica_type_) { - priority = LATaskPrio::LA_P_ADD_READONLY; + } else if (ObReplicaTypeCheck::is_non_paxos_replica(replica_type_)) { + priority = LATaskPrio::LA_P_ADD_NON_PAXOS; } else {} // default priority value return priority; } @@ -442,10 +442,10 @@ private: dst_server_(), unit_id_(OB_INVALID_ID), unit_group_id_(OB_INVALID_ID), - src_replica_type_(REPLICA_TYPE_MAX), + src_replica_type_(REPLICA_TYPE_INVALID), src_memstore_percent_(100), src_member_time_us_(-1), - dst_replica_type_(REPLICA_TYPE_MAX), + dst_replica_type_(REPLICA_TYPE_INVALID), dst_memstore_percent_(100), dst_member_time_us_(-1), orig_paxos_replica_number_(0), @@ -527,7 +527,7 @@ private: memstore_percent_(memstore_percent), replica_num_(replica_num) {} ReplicaDesc() - : replica_type_(REPLICA_TYPE_MAX), + : replica_type_(REPLICA_TYPE_INVALID), memstore_percent_(100), replica_num_(0) {} TO_STRING_KV(K(replica_type_), @@ -535,14 +535,16 @@ private: K(replica_num_)); int64_t cast(const common::ObReplicaType replica_type) { int64_t ret_val = 0; - if (REPLICA_TYPE_READONLY == replica_type) { + if (REPLICA_TYPE_COLUMNSTORE == replica_type) { ret_val = 1; - } else if (REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type) { + } else if (REPLICA_TYPE_READONLY == replica_type) { ret_val = 2; - } else if (REPLICA_TYPE_LOGONLY == replica_type) { + } else if (REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type) { ret_val = 3; - } else if (REPLICA_TYPE_FULL == replica_type) { + } else if (REPLICA_TYPE_LOGONLY == replica_type) { ret_val = 4; + } else if (REPLICA_TYPE_FULL == replica_type) { + ret_val = 5; } else { ret_val = 0; // invalid type, put it at the beginning } @@ -580,35 +582,6 @@ private: && nullptr != unit_stat_info_ && nullptr != unit_in_group_stat_info_; } - int64_t cast(const common::ObReplicaType replica_type) { - int64_t ret_val = 0; - if (REPLICA_TYPE_READONLY == replica_type) { - ret_val = 1; - } else if (REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type) { - ret_val = 2; - } else if (REPLICA_TYPE_LOGONLY == replica_type) { - ret_val = 3; - } else if (REPLICA_TYPE_FULL == replica_type) { - ret_val = 4; - } else { - ret_val = 0; // invalid type, put it at the beginning - } - return ret_val; - } - bool operator<(const ReplicaStatDesc &that) { - bool bool_ret = true; - if (nullptr == this->replica_ && nullptr != that.replica_) { - bool_ret = true; - } else if (nullptr != this->replica_ && nullptr == that.replica_) { - bool_ret = false; - } else if (nullptr == this->replica_ && nullptr == that.replica_) { - bool_ret = true; - } else { - bool_ret = cast(this->replica_->get_replica_type()) - < cast(that.replica_->get_replica_type()); - } - return bool_ret; - } TO_STRING_KV(KPC(replica_), KPC(server_stat_info_), KPC(unit_stat_info_), @@ -625,10 +598,22 @@ private: public: ReplicaDescArray() : common::ObSEArrayImpl(), is_readonly_all_server_(false), - readonly_memstore_percent_(100) {} - public: + is_columnstore_all_server_(false) {} + void set_readonly_all_server() { + is_readonly_all_server_ = true; + is_columnstore_all_server_ = false; + } + void set_columnstore_all_server() { + is_columnstore_all_server_ = true; + is_readonly_all_server_ = false; + } + bool is_readonly_all_server() { return is_readonly_all_server_; } + bool is_columnstore_all_server() { return is_columnstore_all_server_; } + int64_t get_readonly_memstore_percent() { return readonly_memstore_percent_; } + private: bool is_readonly_all_server_; - int64_t readonly_memstore_percent_; + bool is_columnstore_all_server_; + const int64_t readonly_memstore_percent_ = 100; // obsolete }; @@ -699,13 +684,17 @@ private: ReplicaStatDesc &replica_stat_desc, share::ObLSReplica &replica, const int64_t index); + int do_generate_locality_task_from_columnstore_replica( + ReplicaStatDesc &replica_stat_desc, + share::ObLSReplica &replica, + const int64_t index); - int try_generate_type_transform_task_for_readonly_replica_( + int try_generate_task_for_readonly_replica_( ReplicaDescArray &zone_replica_desc_in_locality, ReplicaStatDesc &replica_stat_desc, const int64_t index, bool &task_generated); - int try_generate_remove_readonly_task_for_duplicate_log_stream_( + int try_generate_remove_redundant_replica_task_for_dup_ls_( ReplicaStatDesc &replica_stat_desc, share::ObLSReplica &replica, const int64_t index); @@ -733,7 +722,7 @@ private: ReplicaDesc &replica_desc); int generate_modify_paxos_replica_number_task(); // private func for get_next_locality_alignment_task - int try_get_readonly_all_server_locality_alignment_task( + int try_get_readonly_or_columnstore_all_server_locality_alignment_task( UnitProvider &unit_provider, const LATask *&task); int try_get_normal_locality_alignment_task( @@ -933,7 +922,7 @@ private: bool &is_paxos_replica_related, bool &need_generate); - int construct_extra_info_to_build_cancael_migration_task( + int construct_extra_info_to_build_cancel_migration_task( const bool &is_paxos_replica_related, DRLSInfo &dr_ls_info, const share::ObLSReplica &ls_replica, @@ -1043,7 +1032,7 @@ private: // @params[in] only_for_display, whether just to display this task // @params[in] dr_ls_info, disaster recovery infos of this log stream // @params[out] acc_dr_task, accumulated disaster recovery task count - int try_remove_readonly_replica_for_deleting_unit_( + int try_remove_non_paxos_replica_for_deleting_unit_( const share::ObLSReplica &ls_replica, const bool &only_for_display, DRLSInfo &dr_ls_info, diff --git a/src/rootserver/ob_locality_util.cpp b/src/rootserver/ob_locality_util.cpp index e3d7e262c..64460d518 100644 --- a/src/rootserver/ob_locality_util.cpp +++ b/src/rootserver/ob_locality_util.cpp @@ -40,19 +40,6 @@ using namespace oceanbase::share::schema; ret = OB_INVALID_ARGUMENT; \ LOG_WARN("invalid locality", K(ret)); \ } while (0) -// full replica -const char *const ObLocalityDistribution::FULL_REPLICA_STR = "FULL"; -const char *const ObLocalityDistribution::F_REPLICA_STR = "F"; -// logonly replica -const char *const ObLocalityDistribution::LOGONLY_REPLICA_STR = "LOGONLY"; -const char *const ObLocalityDistribution::L_REPLICA_STR = "L"; -// readonly replica -const char *const ObLocalityDistribution::READONLY_REPLICA_STR = "READONLY"; -const char *const ObLocalityDistribution::R_REPLICA_STR = "R"; -// encryption logonly replica -const char *const ObLocalityDistribution::ENCRYPTION_LOGONLY_REPLICA_STR = "ENCRYPTION_LOGONLY"; -const char *const ObLocalityDistribution::E_REPLICA_STR = "E"; -// some other terminology const common::ObZone ObLocalityDistribution::EVERY_ZONE("everyzone"); const char *const ObLocalityDistribution::ALL_SERVER_STR = "ALL_SERVER"; const char *const ObLocalityDistribution::MEMSTORE_PERCENT_STR = "MEMSTORE_PERCENT"; @@ -149,6 +136,9 @@ int ObLocalityDistribution::ZoneSetReplicaDist::check_valid_replica_dist( for (int64_t i = 0; is_valid && i < all_replica_attr_array_[READONLY_REPLICA].count(); ++i) { is_valid = all_replica_attr_array_[READONLY_REPLICA].at(i).num_ >= 0; } + for (int64_t i = 0; is_valid && i < all_replica_attr_array_[COLUMNSTORE_REPLICA].count(); ++i) { + is_valid = all_replica_attr_array_[COLUMNSTORE_REPLICA].at(i).num_ >= 0; + } } } else { is_valid = false; // do not support mixed-zone locality from 3.2.1 and versions to come @@ -209,6 +199,12 @@ bool ObLocalityDistribution::ZoneSetReplicaDist::has_non_encryption_logonly() co const ReplicaAttr &attr = all_replica_attr_array_[READONLY_REPLICA].at(i); has = attr.num_ > 0; } + for (int64_t i = 0; + !has && i < all_replica_attr_array_[COLUMNSTORE_REPLICA].count(); + ++i) { + const ReplicaAttr &attr = all_replica_attr_array_[COLUMNSTORE_REPLICA].at(i); + has = attr.num_ > 0; + } return has; } @@ -436,6 +432,9 @@ int ObLocalityDistribution::ZoneSetReplicaDist::replica_type_to_str( case ENCRYPTION_LOGONLY_REPLICA: replica_type_str = "ENCRYPTION_LOGONLY"; break; + case COLUMNSTORE_REPLICA: + replica_type_str = "COLUMNSTORE"; + break; default: ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected replica type", K(ret), K(replica_type)); @@ -658,9 +657,11 @@ int ObLocalityDistribution::RawLocalityIter::get_replica_arrangements( && OB_SUCC(get_next_replica_arrangement( cursor, end, replica_type, replica_num, memstore_percent))) { if (OB_UNLIKELY(FULL_REPLICA != replica_type - && READONLY_REPLICA != replica_type)) { + && READONLY_REPLICA != replica_type + && COLUMNSTORE_REPLICA != replica_type)) { // TODO: F-replica is supported since 4.0, // R-replica is supported since 4.2, + // C-replica is supported since 4.3.2 // other types will be supported later INVALID_LOCALITY(); switch (replica_type) { @@ -775,6 +776,20 @@ int ObLocalityDistribution::RawLocalityIter::get_replica_type( cursor += strlen(R_REPLICA_STR); } else {} // not this type } + if (!type_found && remain >= strlen(COLUMNSTORE_REPLICA_STR)) { + if (0 == strncmp(COLUMNSTORE_REPLICA_STR, &locality_str_[cursor], strlen(COLUMNSTORE_REPLICA_STR))) { + replica_type = COLUMNSTORE_REPLICA; + type_found = true; + cursor += strlen(COLUMNSTORE_REPLICA_STR); + } else {} // not this type + } + if (!type_found && remain >= strlen(C_REPLICA_STR)) { + if (0 == strncmp(C_REPLICA_STR, &locality_str_[cursor], strlen(C_REPLICA_STR))) { + replica_type = COLUMNSTORE_REPLICA; + type_found = true; + cursor += strlen(C_REPLICA_STR); + } else {} // not this type + } if (!type_found && remain >= strlen(ENCRYPTION_LOGONLY_REPLICA_STR)) { if (0 == strncmp(ENCRYPTION_LOGONLY_REPLICA_STR, &locality_str_[cursor], @@ -1591,72 +1606,6 @@ int ObLocalityDistribution::convert_zone_list( return ret; } -int ObLocalityDistribution::get_zone_replica_num( - const common::ObZone &zone, - share::ObReplicaNumSet &replica_num_set) -{ - int ret = OB_SUCCESS; - if (zone.is_empty()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(zone)); - } else { - bool found = false; - for (int64_t i = 0; !found && OB_SUCC(ret) && i < zone_set_replica_dist_array_.count(); ++i) { - const ZoneSetReplicaDist &this_dist = zone_set_replica_dist_array_.at(i); - if (zone != this_dist.get_zone_set().at(0)) { - // bypass - } else{ - replica_num_set.set_replica_num(this_dist.get_full_replica_num(), - this_dist.get_logonly_replica_num(), - this_dist.get_readonly_replica_num(), - this_dist.get_encryption_logonly_replica_num()); - } - } - if (OB_FAIL(ret)) { - // failed - } else if (!found) { - ret = OB_ENTRY_NOT_EXIST; - } - } - return ret; -} - -int ObLocalityDistribution::get_zone_replica_num( - const common::ObZone &zone, - share::ObZoneReplicaAttrSet &zone_replica_attr_set) -{ - int ret = OB_SUCCESS; - if (zone.is_empty()) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(zone)); - } else { - bool found = false; - for (int64_t i = 0; !found && OB_SUCC(ret) && i < zone_set_replica_dist_array_.count(); ++i) { - const ZoneSetReplicaDist &this_dist = zone_set_replica_dist_array_.at(i); - if (zone != this_dist.get_zone_set().at(0)) { - // bypass - } else if (OB_FAIL(zone_replica_attr_set.zone_set_.assign(this_dist.get_zone_set()))) { - LOG_WARN("fail to assign zone set", K(ret)); - } else { - zone_replica_attr_set.zone_ = zone; - if (OB_FAIL(zone_replica_attr_set.replica_attr_set_.set_replica_attr_array( - this_dist.get_full_replica_attr(), - this_dist.get_logonly_replica_attr(), - this_dist.get_readonly_replica_attr(), - this_dist.get_encryption_logonly_replica_attr()))) { - LOG_WARN("fail to set replica attr array", KR(ret)); - } - } - } - if (OB_FAIL(ret)) { - // failed - } else if (!found) { - ret = OB_ENTRY_NOT_EXIST; - } - } - return ret; -} - int ObLocalityDistribution::get_zone_replica_attr_array( common::ObIArray &zone_replica_num_array) { @@ -1675,7 +1624,8 @@ int ObLocalityDistribution::get_zone_replica_attr_array( this_dist.get_full_replica_attr(), this_dist.get_logonly_replica_attr(), this_dist.get_readonly_replica_attr(), - this_dist.get_encryption_logonly_replica_attr()))) { + this_dist.get_encryption_logonly_replica_attr(), + this_dist.get_columnstore_replica_attr()))) { LOG_WARN("fail to set replica attr array", KR(ret)); } } diff --git a/src/rootserver/ob_locality_util.h b/src/rootserver/ob_locality_util.h index 95b13eb79..5d70e37bf 100644 --- a/src/rootserver/ob_locality_util.h +++ b/src/rootserver/ob_locality_util.h @@ -35,7 +35,6 @@ namespace schema { struct ObZoneRegion; class ObSchemaGetterGuard; -class ObLocality; class ObSimpleTableSchemaV2; class ObTablegroupSchema; class ObTenantSchema; @@ -80,12 +79,6 @@ public: int64_t &pos); int get_zone_replica_attr_array( common::ObIArray &zone_replica_num_array); - int get_zone_replica_num( - const common::ObZone &zone, - share::ObReplicaNumSet &replica_num_set); - int get_zone_replica_num( - const common::ObZone &zone, - share::ObZoneReplicaAttrSet &zone_replica_num_set); public: static const int64_t ALL_SERVER_CNT = INT64_MAX; private: @@ -94,24 +87,12 @@ private: static const int32_t LOGONLY_REPLICA = 1; static const int32_t READONLY_REPLICA = 2; static const int32_t ENCRYPTION_LOGONLY_REPLICA = 3; - static const int32_t REPLICA_TYPE_MAX = 4; + static const int32_t COLUMNSTORE_REPLICA = 4; + static const int32_t REPLICA_TYPE_MAX = 5; private: static const int64_t MAX_BUCKET_NUM = 2 * common::MAX_ZONE_NUM; static const int64_t INVALID_CURSOR = -1; static const int64_t INVALID_COUNT = -1; - // full replica - static const char *const FULL_REPLICA_STR; - static const char *const F_REPLICA_STR; - // logonly replica - static const char *const LOGONLY_REPLICA_STR; - static const char *const L_REPLICA_STR; - // readonly replica - static const char *const READONLY_REPLICA_STR; - static const char *const R_REPLICA_STR; - // encryption logonly replica - static const char *const ENCRYPTION_LOGONLY_REPLICA_STR; - static const char *const E_REPLICA_STR; - // others static const common::ObZone EVERY_ZONE; static const char *const ALL_SERVER_STR; static const char *const MEMSTORE_PERCENT_STR; @@ -173,6 +154,12 @@ private: : 0); return num; } + inline int64_t get_columnstore_replica_num() const { + int64_t num = (all_replica_attr_array_[COLUMNSTORE_REPLICA].count() > 0 + ? all_replica_attr_array_[COLUMNSTORE_REPLICA].at(0).num_ + : 0); + return num; + } inline const ReplicaAttrArray &get_full_replica_attr() const { return all_replica_attr_array_[FULL_REPLICA]; } @@ -185,6 +172,9 @@ private: inline const ReplicaAttrArray &get_encryption_logonly_replica_attr() const { return all_replica_attr_array_[ENCRYPTION_LOGONLY_REPLICA]; } + inline const ReplicaAttrArray &get_columnstore_replica_attr() const { + return all_replica_attr_array_[COLUMNSTORE_REPLICA]; + } inline const common::ObIArray &get_zone_set() const { return zone_set_; } public: int format_to_locality_str(char *buf, int64_t buf_len, int64_t &pos) const; @@ -199,7 +189,8 @@ private: "full_replica_attr", all_replica_attr_array_[FULL_REPLICA], "logonly_replica_attr", all_replica_attr_array_[LOGONLY_REPLICA], "readonly_replica_attr", all_replica_attr_array_[READONLY_REPLICA], - "encryption_logonly_replica_attr", all_replica_attr_array_[ENCRYPTION_LOGONLY_REPLICA]); + "encryption_logonly_replica_attr", all_replica_attr_array_[ENCRYPTION_LOGONLY_REPLICA], + "columnstore_replica_attr", all_replica_attr_array_[COLUMNSTORE_REPLICA]); private: bool specific_replica_need_format( const ReplicaTypeID replica_type) const; diff --git a/src/rootserver/ob_replica_addr.h b/src/rootserver/ob_replica_addr.h index 02c75407e..06c17b70f 100644 --- a/src/rootserver/ob_replica_addr.h +++ b/src/rootserver/ob_replica_addr.h @@ -35,7 +35,7 @@ struct ObReplicaAddr initial_leader_(false), addr_(), zone_(), - replica_type_(common::REPLICA_TYPE_MAX), + replica_type_(common::REPLICA_TYPE_INVALID), replica_property_() {} void reset() { *this = ObReplicaAddr(); } int64_t get_memstore_percent() const {return replica_property_.get_memstore_percent();} diff --git a/src/rootserver/ob_root_service.cpp b/src/rootserver/ob_root_service.cpp index 80b342bb5..c2ee2855e 100755 --- a/src/rootserver/ob_root_service.cpp +++ b/src/rootserver/ob_root_service.cpp @@ -2565,14 +2565,10 @@ int ObRootService::create_resource_pool(const obrpc::ObCreateResourcePoolArg &ar LOG_USER_ERROR(OB_MISS_ARGUMENT, "unit_num"); } LOG_WARN("missing arg to create resource pool", K(arg), K(ret)); - } else if (REPLICA_TYPE_LOGONLY != arg.replica_type_ - && REPLICA_TYPE_FULL != arg.replica_type_) { + } else if (REPLICA_TYPE_FULL != arg.replica_type_) { ret = OB_NOT_SUPPORTED; - LOG_WARN("only full/logonly pool are supported", K(ret), K(arg)); - } else if (REPLICA_TYPE_LOGONLY == arg.replica_type_ - && arg.unit_num_> 1) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("logonly resource pool should only have one unit on one zone", K(ret), K(arg)); + LOG_WARN("only full replica pool are supported", K(ret), K(arg)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "replica_type of resource pool other than FULL replica"); } else if (0 == arg.unit_.case_compare(OB_STANDBY_UNIT_CONFIG_TEMPLATE_NAME)) { ret = OB_OP_NOT_ALLOW; LOG_WARN("can not create resource pool use standby unit config template", K(ret), K(arg)); @@ -9374,7 +9370,7 @@ int ObRootService::add_rs_event_for_alter_ls_replica_( ROOTSERVICE_EVENT_ADD(ADD_EVENT_FOR_ALTER_LS_REPLICA, "ls_id", arg.get_ls_id().id(), "target_replica", arg.get_server_addr(), - "replica_type", replica_type_to_str(arg.get_replica_type()), + "replica_type", share::ObShareUtil::replica_type_to_string(arg.get_replica_type()), "", NULL, extra_info); } else if (arg.get_alter_task_type().is_migrate_task()) { @@ -9641,6 +9637,31 @@ int ObRootService::check_restore_tenant_valid(const share::ObPhysicalRestoreJob } } } + // check if loclaity contains any C replica + ObLocalityDistribution locality_dist; + common::ObArray zone_region_list; + common::ObArray zone_replica_num_array; + if (OB_FAIL(ret)) { + // already failed + } else if (OB_FAIL(locality_dist.init())) { + LOG_WARN("fail to init locality dist", K(ret)); + } else if (OB_FAIL(ddl_service_.construct_zone_region_list(zone_region_list, zones))) { + LOG_WARN("fail to construct zone region list", K(ret)); + } else if (OB_FAIL(locality_dist.parse_locality( + job_info.get_locality(), zones, &zone_region_list))) { + LOG_WARN("fail to parse locality", K(ret)); + } else if (OB_FAIL(locality_dist.get_zone_replica_attr_array(zone_replica_num_array))) { + LOG_WARN("fail to get zone region replica num array", K(ret)); + } else { + FOREACH_X(zone_replica_attr, zone_replica_num_array, OB_SUCC(ret)) { + if (zone_replica_attr->get_columnstore_replica_num() > 0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("restore tenant with C replica not supported", KR(ret), + "locality_str", job_info.get_locality(), K(zone_replica_num_array)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "restore tenant with COLUMNSTORE replica in locality is"); + } + } + } } } //TODO check if need check R replica diff --git a/src/rootserver/ob_root_utils.cpp b/src/rootserver/ob_root_utils.cpp index c0d269627..bb4d45e6c 100644 --- a/src/rootserver/ob_root_utils.cpp +++ b/src/rootserver/ob_root_utils.cpp @@ -569,188 +569,6 @@ int ObTenantGroupParser::jump_to_next_ttg( return ret; } -int ObLocalityTaskHelp::filter_logonly_task(const common::ObIArray &pools, - ObUnitManager &unit_mgr, - ObIArray &zone_locality) -{ - int ret = OB_SUCCESS; - ObArray logonly_unit_infos; - ObArray unit_infos; - if (pools.count() <= 0) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(pools)); - } else if (OB_FAIL(unit_mgr.get_unit_infos(pools, unit_infos))) { - LOG_WARN("fail to get unit infos", K(ret), K(pools)); - } else { - for (int64_t i = 0; i < unit_infos.count() && OB_SUCC(ret); ++i) { - if (REPLICA_TYPE_LOGONLY != unit_infos.at(i).unit_.replica_type_) { - // only L unit is counted - } else if (OB_FAIL(logonly_unit_infos.push_back(unit_infos.at(i)))) { - LOG_WARN("fail to push back", K(ret), K(i), K(unit_infos)); - } - } - for (int64_t i = 0; i < zone_locality.count() && OB_SUCC(ret); ++i) { - share::ObZoneReplicaAttrSet &zone_replica_attr_set = zone_locality.at(i); - if (zone_replica_attr_set.get_logonly_replica_num() - + zone_replica_attr_set.get_encryption_logonly_replica_num() <= 0) { - // no L replica : nothing todo - } else if (zone_replica_attr_set.zone_set_.count() <= 0) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("zone set unexpected", K(ret), K(zone_replica_attr_set)); - } else { - for (int64_t j = 0; j < logonly_unit_infos.count(); j++) { - const ObUnitInfo &unit_info = logonly_unit_infos.at(j); - if (!has_exist_in_array(zone_replica_attr_set.zone_set_, unit_info.unit_.zone_)) { - // bypass - } else if (zone_replica_attr_set.get_logonly_replica_num() - + zone_replica_attr_set.get_encryption_logonly_replica_num() <= 0) { - // bypass - } else if (zone_replica_attr_set.get_logonly_replica_num() > 0) { - ret = zone_replica_attr_set.sub_logonly_replica_num(ReplicaAttr(1, 100)); - } else { - ret = zone_replica_attr_set.sub_encryption_logonly_replica_num(ReplicaAttr(1, 100)); - } - } - } - } - } - return ret; -} - -int ObLocalityTaskHelp::get_logonly_task_with_logonly_unit(const uint64_t tenant_id, - ObUnitManager &unit_mgr, - share::schema::ObSchemaGetterGuard &schema_guard, - ObIArray &zone_locality) -{ - int ret = OB_SUCCESS; - ObArray logonly_unit_infos; - const ObTenantSchema *tenant_schema = NULL; - zone_locality.reset(); - common::ObArray tenant_zone_locality; - if (OB_INVALID_ID == tenant_id) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(tenant_id)); - } else if (OB_FAIL(schema_guard.get_tenant_info(tenant_id, tenant_schema))) { - LOG_WARN("fail to get tenant info", K(ret), K(tenant_id)); - } else if (OB_ISNULL(tenant_schema)) { - ret = OB_TENANT_NOT_EXIST; - LOG_WARN("get invalid tenant schema", K(ret), K(tenant_schema)); - } else if (OB_FAIL(unit_mgr.get_logonly_unit_by_tenant(tenant_id, logonly_unit_infos))) { - LOG_WARN("fail to get logonly unit infos", K(ret), K(tenant_id)); - } else if (OB_FAIL(tenant_schema->get_zone_replica_attr_array(tenant_zone_locality))) { - LOG_WARN("fail to get zone replica attr array", K(ret)); - } else { - share::ObZoneReplicaNumSet logonly_set; - for (int64_t i = 0; i < logonly_unit_infos.count() && OB_SUCC(ret); i++) { - const ObUnitInfo &unit = logonly_unit_infos.at(i); - for (int64_t j = 0; j < tenant_zone_locality.count(); j++) { - logonly_set.reset(); - const ObZoneReplicaNumSet &zone_set = tenant_zone_locality.at(j); - if (zone_set.zone_ == unit.unit_.zone_ - && zone_set.get_logonly_replica_num() == 1) { - logonly_set.zone_ = zone_set.zone_; - if (OB_FAIL(logonly_set.replica_attr_set_.add_logonly_replica_num(ReplicaAttr(1, 100)))) { - LOG_WARN("fail to add logonly replica num", K(ret)); - } else if (OB_FAIL(zone_locality.push_back(logonly_set))) { - LOG_WARN("fail to push back", K(ret)); - } - } else if (zone_set.zone_ == unit.unit_.zone_ - && zone_set.get_encryption_logonly_replica_num() == 1) { - logonly_set.zone_ = zone_set.zone_; - if (OB_FAIL(logonly_set.replica_attr_set_.add_encryption_logonly_replica_num(ReplicaAttr(1, 100)))) { - LOG_WARN("fail to add logonly replica num", K(ret)); - } else if (OB_FAIL(zone_locality.push_back(logonly_set))) { - LOG_WARN("fail to push back", K(ret)); - } - } - } - } - } - return ret; -} - -int ObLocalityTaskHelp::filter_logonly_task(const uint64_t tenant_id, - ObUnitManager &unit_mgr, - share::schema::ObSchemaGetterGuard &schema_guard, - ObIArray &zone_locality) -{ - int ret = OB_SUCCESS; - ObArray logonly_unit_infos; - if (OB_FAIL(unit_mgr.get_logonly_unit_by_tenant(schema_guard, tenant_id, logonly_unit_infos))) { - LOG_WARN("fail to get loggonly unit by tenant", K(ret), K(tenant_id)); - } else { - LOG_DEBUG("get all logonly unit", K(tenant_id), K(logonly_unit_infos), K(zone_locality)); - for (int64_t i = 0; i < zone_locality.count() && OB_SUCC(ret); ++i) { - share::ObZoneReplicaAttrSet &zone_replica_attr_set = zone_locality.at(i); - if (zone_replica_attr_set.get_logonly_replica_num() - + zone_replica_attr_set.get_encryption_logonly_replica_num() <= 0) { - // no L replica : nothing todo - } else if (zone_replica_attr_set.zone_set_.count() <= 0) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("zone set unexpected", K(ret), K(zone_replica_attr_set)); - } else { - for (int64_t j = 0; j < logonly_unit_infos.count(); j++) { - const ObUnitInfo &unit_info = logonly_unit_infos.at(j); - if (!has_exist_in_array(zone_replica_attr_set.zone_set_, unit_info.unit_.zone_)) { - // bypass - } else if (zone_replica_attr_set.get_logonly_replica_num() - + zone_replica_attr_set.get_encryption_logonly_replica_num() <= 0) { - // bypass - } else if (zone_replica_attr_set.get_logonly_replica_num() > 0) { - ret = zone_replica_attr_set.sub_logonly_replica_num(ReplicaAttr(1, 100)); - } else { - ret = zone_replica_attr_set.sub_encryption_logonly_replica_num(ReplicaAttr(1, 100)); - } - } - } - } - } - return ret; -} - -int ObLocalityTaskHelp::alloc_logonly_replica(ObUnitManager &unit_mgr, - const ObIArray &pools, - const common::ObIArray &zone_locality, - ObPartitionAddr &partition_addr) -{ - int ret = OB_SUCCESS; - ObArray logonly_units; - ObArray unit_infos; - if (OB_FAIL(unit_mgr.get_unit_infos(pools, unit_infos))) { - LOG_WARN("fail to get unit infos", K(ret), K(pools)); - } else { - for (int64_t i = 0; i < unit_infos.count() && OB_SUCC(ret); i++) { - if (REPLICA_TYPE_LOGONLY != unit_infos.at(i).unit_.replica_type_) { - //nothing todo - } else if (OB_FAIL(logonly_units.push_back(unit_infos.at(i)))) { - LOG_WARN("fail to push back", K(ret), K(i), K(unit_infos)); - } - } - } - ObReplicaAddr raddr; - for (int64_t i = 0; i < logonly_units.count() && OB_SUCC(ret); i++) { - for (int64_t j = 0; j < zone_locality.count() && OB_SUCC(ret); j++) { - if (zone_locality.at(j).zone_ == logonly_units.at(i).unit_.zone_ - && (zone_locality.at(j).get_logonly_replica_num() == 1 - || zone_locality.at(j).get_encryption_logonly_replica_num() == 1)) { - raddr.reset(); - raddr.unit_id_ = logonly_units.at(i).unit_.unit_id_; - raddr.addr_ = logonly_units.at(i).unit_.server_; - raddr.zone_ = logonly_units.at(i).unit_.zone_; - raddr.replica_type_ = zone_locality.at(j).get_logonly_replica_num() == 1 - ? REPLICA_TYPE_LOGONLY - : REPLICA_TYPE_ENCRYPTION_LOGONLY; - if (OB_FAIL(partition_addr.push_back(raddr))) { - LOG_WARN("fail to push back", K(ret), K(raddr)); - } else { - LOG_INFO("alloc partition for logonly replica", K(raddr)); - } - } - } - } - return ret; -} - int ObLocalityCheckHelp::calc_paxos_replica_num( const common::ObIArray &zone_locality, int64_t &paxos_num) @@ -1562,6 +1380,7 @@ int ObLocalityCheckHelp::check_alter_single_zone_locality_valid( int ret = OB_SUCCESS; bool is_legal = true; // 1. check whether non_paxos member change + // check R-replica if (!non_paxos_locality_modified) { const ObIArray &pre_readonly_replica = orig_locality.replica_attr_set_.get_readonly_replica_attr_array(); const ObIArray &cur_readonly_replica = new_locality.replica_attr_set_.get_readonly_replica_attr_array(); @@ -1577,8 +1396,19 @@ int ObLocalityCheckHelp::check_alter_single_zone_locality_valid( } } } + // check C-replica + if (new_locality.get_columnstore_replica_num() != orig_locality.get_columnstore_replica_num()) { + if (new_locality.get_full_replica_num() != orig_locality.get_full_replica_num() + || new_locality.get_readonly_replica_num() != orig_locality.get_readonly_replica_num()) { + // transform between R/F and C is illegal + is_legal = false; + } else { + non_paxos_locality_modified = true; + } + } // 2. check whether alter locality is legal. - if (new_locality.get_logonly_replica_num() < orig_locality.get_logonly_replica_num()) { + if (!is_legal) { + } else if (new_locality.get_logonly_replica_num() < orig_locality.get_logonly_replica_num()) { // L-replica must not transfrom to other replica type. if (new_locality.get_full_replica_num() > orig_locality.get_full_replica_num()) { is_legal = false; // maybe L->F diff --git a/src/rootserver/ob_root_utils.h b/src/rootserver/ob_root_utils.h index c2f9300a2..7c84a209c 100644 --- a/src/rootserver/ob_root_utils.h +++ b/src/rootserver/ob_root_utils.h @@ -36,7 +36,6 @@ namespace schema { class ObMultiVersionSchemaService; class ObTableSchema; -class ObLocality; class ObSchemaGetterGuard; } } @@ -394,35 +393,6 @@ public: } }; -class ObLocalityTaskHelp -{ -public: - ObLocalityTaskHelp() {} - virtual ~ObLocalityTaskHelp() {} - static int filter_logonly_task( - const common::ObIArray &pools, - ObUnitManager &unit_mgr, - common::ObIArray &zone_locality); - - static int filter_logonly_task( - const uint64_t tenant_id, - ObUnitManager &unit_manager, - share::schema::ObSchemaGetterGuard &schema_guard, - common::ObIArray &zone_locality); - - static int alloc_logonly_replica( - ObUnitManager &unit_manager, - const common::ObIArray &pools, - const common::ObIArray &zone_locality, - ObPartitionAddr &partition_addr); - - static int get_logonly_task_with_logonly_unit( - const uint64_t tenant_id, - ObUnitManager &unit_mgr, - share::schema::ObSchemaGetterGuard &schema_guard, - common::ObIArray &zone_locality); -}; - enum PaxosReplicaNumberTaskType { NOP_PAXOS_REPLICA_NUMBER = 0, diff --git a/src/rootserver/ob_unit_manager.cpp b/src/rootserver/ob_unit_manager.cpp index 2f978a99d..47f3bee9a 100644 --- a/src/rootserver/ob_unit_manager.cpp +++ b/src/rootserver/ob_unit_manager.cpp @@ -2744,7 +2744,7 @@ int ObUnitManager::check_old_pool_name_condition( common::ObIArray &old_pool) { int ret = OB_SUCCESS; - common::ObReplicaType replica_type = REPLICA_TYPE_MAX; + common::ObReplicaType replica_type = REPLICA_TYPE_INVALID; uint64_t tenant_id = OB_INVALID_ID; share::ObUnitConfig *unit_config = NULL; int64_t unit_count = 0; diff --git a/src/rootserver/virtual_table/ob_all_virtual_ls_replica_task_plan.cpp b/src/rootserver/virtual_table/ob_all_virtual_ls_replica_task_plan.cpp index 589179b9c..46acb34c3 100644 --- a/src/rootserver/virtual_table/ob_all_virtual_ls_replica_task_plan.cpp +++ b/src/rootserver/virtual_table/ob_all_virtual_ls_replica_task_plan.cpp @@ -184,11 +184,11 @@ int ObAllVirtualLSReplicaTaskPlan::get_full_row_( ADD_COLUMN(set_varchar, table, "target_replica_svr_ip", target_ip_str, columns); ADD_COLUMN(set_int, table, "target_replica_svr_port", target_port, columns); ADD_COLUMN(set_int, table, "target_paxos_replica_number", task_stat.get_target_replica_paxos_replica_number(), columns); - ADD_COLUMN(set_varchar, table, "target_replica_type", ob_replica_type_strs(task_stat.get_target_replica_type()), columns); + ADD_COLUMN(set_varchar, table, "target_replica_type", ObShareUtil::replica_type_to_string(task_stat.get_target_replica_type()), columns); ADD_COLUMN(set_varchar, table, "source_replica_svr_ip", task_stat.get_source_server().is_valid() ? source_ip_str : "", columns); ADD_COLUMN(set_int, table, "source_replica_svr_port", source_port, columns); ADD_COLUMN(set_int, table, "source_paxos_replica_number", task_stat.get_source_replica_paxos_replica_number(), columns); - ADD_COLUMN(set_varchar, table, "source_replica_type", task_stat.get_source_server().is_valid() ? ob_replica_type_strs(task_stat.get_source_replica_type()) : "", columns); + ADD_COLUMN(set_varchar, table, "source_replica_type", task_stat.get_source_server().is_valid() ? ObShareUtil::replica_type_to_string(task_stat.get_source_replica_type()) : "", columns); ADD_COLUMN(set_varchar, table, "task_exec_svr_ip", execute_ip_str, columns); ADD_COLUMN(set_int, table, "task_exec_svr_port", execute_port, columns); ADD_COLUMN(set_varchar, table, "comment", task_stat.get_comment().string(), columns); diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index 8687880dd..b83f6be79 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -127,7 +127,6 @@ ob_set_subtarget(ob_share common ob_list_parser.cpp ob_local_device.cpp ob_locality_info.cpp - ob_locality_parser.cpp ob_locality_priority.cpp ob_locality_table_operator.cpp ob_ls_id.cpp diff --git a/src/share/client_feedback/ob_feedback_partition_struct.h b/src/share/client_feedback/ob_feedback_partition_struct.h index d3b226f9e..23ba2f526 100644 --- a/src/share/client_feedback/ob_feedback_partition_struct.h +++ b/src/share/client_feedback/ob_feedback_partition_struct.h @@ -61,7 +61,7 @@ inline bool ObFeedbackReplicaLocation::is_valid_obj() const { return server_.is_valid() && (common::INVALID_ROLE != role_) - && (common::REPLICA_TYPE_MAX != replica_type_); + && (common::REPLICA_TYPE_INVALID != replica_type_); } class ObFeedbackPartitionLocation : public ObAbstractFeedbackObject @@ -195,7 +195,7 @@ inline bool ObFeedbackRerouteInfo::is_valid_obj() const return (for_session_reroute_ && server_.is_valid()) || (!for_session_reroute_ && server_.is_valid() && (common::INVALID_ROLE != role_) - && (common::REPLICA_TYPE_MAX != replica_type_) + && (common::REPLICA_TYPE_INVALID != replica_type_) && tbl_name_len_ > 0 && tbl_name_len_ <= common::OB_MAX_TABLE_NAME_LENGTH && OB_INVALID_VERSION != tbl_schema_version_); diff --git a/src/share/compaction/ob_compaction_locality_cache.cpp b/src/share/compaction/ob_compaction_locality_cache.cpp index 5d5f3a257..a98370f7b 100644 --- a/src/share/compaction/ob_compaction_locality_cache.cpp +++ b/src/share/compaction/ob_compaction_locality_cache.cpp @@ -11,6 +11,7 @@ #include "share/compaction/ob_compaction_locality_cache.h" #include "src/storage/compaction/ob_medium_compaction_func.h" #include "src/storage/compaction/ob_compaction_util.h" +#include "src/storage/tx_storage/ob_ls_service.h" #include "src/share/ob_zone_merge_info.h" #include "observer/ob_server_struct.h" #include "src/share/ob_zone_merge_table_operator.h" @@ -22,11 +23,233 @@ namespace oceanbase namespace share { +/****************************** ObLSReplicaUniItem ******************************/ +ObLSReplicaUniItem::ObLSReplicaUniItem() + : ls_id_(), + server_() +{} + +ObLSReplicaUniItem::ObLSReplicaUniItem(const ObLSID &ls_id, const common::ObAddr &server) + : ls_id_(ls_id), + server_(server) +{} + +ObLSReplicaUniItem::~ObLSReplicaUniItem() +{ + reset(); +} + +void ObLSReplicaUniItem::reset() { + ls_id_.reset(); + server_.reset(); +} + +uint64_t ObLSReplicaUniItem::hash() const +{ + uint64_t hash_val = 0; + hash_val += ls_id_.hash(); + hash_val += server_.hash(); + return hash_val; +} + +int ObLSReplicaUniItem::hash(uint64_t &hash_val) const +{ + hash_val = hash(); + return OB_SUCCESS; +} + +bool ObLSReplicaUniItem::is_valid() const +{ + return ls_id_.is_valid() && server_.is_valid(); +} + +bool ObLSReplicaUniItem::operator == (const ObLSReplicaUniItem &other) const +{ + bool bret = true; + if (this == &other) { + } else if (ls_id_ != other.ls_id_ || server_ != other.server_) { + bret = false; + } + return bret; +} + +bool ObLSReplicaUniItem::operator != (const ObLSReplicaUniItem &other) const +{ + return !(*this == other); +} + +/****************************** ObLSColumnReplicaCache ******************************/ +ObLSColumnReplicaCache::ObLSColumnReplicaCache() + : is_inited_(false), + ls_id_set_(), + ls_replica_set_() +{} + +ObLSColumnReplicaCache::~ObLSColumnReplicaCache() +{ + destroy(); +} + +// init +int ObLSColumnReplicaCache::init() +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_FAIL(ls_id_set_.create(BUCKET_NUM_OF_LS_ID_SET, ObMemAttr(MTL_ID(), "LSIDsForCkm")))) { + LOG_WARN("fail to create ls id set", K(ret)); + } else if (OB_FAIL(ls_replica_set_.create(BUCKET_NUM_OF_LS_REPLICA_SET, ObMemAttr(MTL_ID(), "LSReplTypes")))) { + LOG_WARN("fail to create ls replica type map", K(ret)); + } else { + is_inited_ = true; + } + return ret; +} + +void ObLSColumnReplicaCache::destroy() +{ + int ret = OB_SUCCESS; // only for log + if (ls_replica_set_.created()) { + if (OB_FAIL(ls_replica_set_.destroy())) { + LOG_WARN("fail to destroy ls replica set", K(ret)); + } + } + if (ls_id_set_.created()) { + if (OB_FAIL(ls_id_set_.destroy())) { + LOG_WARN("fail to destroy ls replica set", K(ret)); + } + } + is_inited_ = false; +} + +void ObLSColumnReplicaCache::reuse() +{ + ls_id_set_.reuse(); + ls_replica_set_.reuse(); +} + +int ObLSColumnReplicaCache::check_contains_ls(const ObLSID &ls_id, bool &contained) const +{ + int ret = OB_SUCCESS; + contained = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_FAIL(ls_id_set_.exist_refactored(ls_id))) { + if (OB_HASH_EXIST == ret || OB_HASH_NOT_EXIST == ret) { + contained = (OB_HASH_EXIST == ret); + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to check contains ls", K(ret), K(ls_id), KPC(this)); + } + } + return ret; +} + +int ObLSColumnReplicaCache::mark_ls_finished(const ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_FAIL(ls_id_set_.set_refactored(ls_id))) { + if (OB_HASH_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to mark ls finish", K(ret), K(ls_id), KPC(this)); + } + } + return ret; +} + +int ObLSColumnReplicaCache::add_cs_replica(const ObLSReplicaUniItem &ls_item) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_FAIL(ls_replica_set_.set_refactored(ls_item))) { + LOG_WARN("fail to add col replica", K(ret), K(ls_item), KPC(this)); + } + return ret; +} + +int ObLSColumnReplicaCache::update(const ObLSID &ls_id, const ObAddr &server) +{ + int ret = OB_SUCCESS; + bool is_contained = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_FAIL(check_contains_ls(ls_id, is_contained))) { + LOG_WARN("fail to check exist for ls", K(ls_id), KPC(this)); + } else if (is_contained) { + } else if (OB_ISNULL(GCTX.lst_operator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lst operator is null", K(ret)); + } else { + const int64_t tenant_id = MTL_ID(); + const int64_t cluster_id = GCONF.cluster_id; + ObLSInfo ls_info; + if (OB_FAIL(GCTX.lst_operator_->get(cluster_id, tenant_id, ls_id, ObLSTable::DEFAULT_MODE, ls_info))) { + LOG_WARN("fail to get ls info", K(ret), K(cluster_id), K(tenant_id), K(ls_id)); + } else { + const ObLSInfo::ReplicaArray &all_replicas = ls_info.get_replicas(); + ObLSReplicaUniItem ls_item(ls_id, server); + for (int64_t i = 0; i < all_replicas.count() && OB_SUCC(ret); ++i) { + const ObLSReplica &replica = all_replicas.at(i); + if (ObRole::LEADER == replica.get_role()) { + const common::GlobalLearnerList &learner_list = replica.get_learner_list(); + for (int64_t i = 0; OB_SUCC(ret) && i < learner_list.get_member_number(); ++i) { + ObMember learner; + if (OB_FAIL(learner_list.get_learner(i, learner))) { + LOG_WARN("fail to get learner", K(ret), K(i), K(learner_list)); + } else if (learner.is_columnstore()) { + ls_item.server_ = learner.get_server(); + if (OB_FAIL(add_cs_replica(ls_item))) { + LOG_WARN("fail to add ls item", K(ret), K(ls_item)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(mark_ls_finished(ls_item.ls_id_))) { + LOG_WARN("fail to make ls finished", K(ret)); + } + LOG_TRACE("[CS-Replica] get learner list", K(ret), K(learner_list)); + } + } + } + } + return ret; +} + +int ObLSColumnReplicaCache::check_is_cs_replica(const ObLSReplicaUniItem &ls_item, bool &is_cs_replica) const +{ + int ret = OB_SUCCESS; + is_cs_replica = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_FAIL(ls_replica_set_.exist_refactored(ls_item))) { + if (OB_HASH_EXIST == ret || OB_HASH_NOT_EXIST == ret) { + is_cs_replica = (OB_HASH_EXIST == ret); + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to check contains ls", K(ret), K(ls_item), KPC(this)); + } + } + LOG_TRACE("[CS-Replica] check current ls is cs replica", K(ret), K(ls_item), K(is_cs_replica), KPC(this)); + return ret; +} + +/****************************** ObCompactionLocalityCache ******************************/ ObCompactionLocalityCache::ObCompactionLocalityCache() : is_inited_(false), tenant_id_(OB_INVALID_TENANT_ID), merge_info_mgr_(nullptr), - ls_infos_map_() + ls_infos_map_(), + ls_cs_replica_cache_() {} ObCompactionLocalityCache::~ObCompactionLocalityCache() @@ -45,6 +268,8 @@ int ObCompactionLocalityCache::init(const uint64_t tenant_id, rootserver::ObMajo LOG_WARN("invalid argument", K(ret), K(tenant_id)); } else if (OB_FAIL(ls_infos_map_.create(OB_MAX_LS_NUM_PER_TENANT_PER_SERVER, "CaLsInfoMap", "CaLsInfoNode", tenant_id))) { LOG_WARN("fail to create ls info map", K(ret)); + } else if (OB_FAIL(ls_cs_replica_cache_.init())) { + LOG_WARN("fail to init col replica cache", K(ret)); } if (OB_FAIL(ret)) { destroy(); @@ -100,6 +325,7 @@ int ObCompactionLocalityCache::inner_refresh_ls_locality() if (OB_SUCC(ret)) { // 1. clear ls_infos cached in memory ls_infos_map_.reuse(); + ls_cs_replica_cache_.reuse(); // 2. load ls_infos from __all_ls_meta_table ObArray ls_infos; ls_infos.set_attr(ObMemAttr(tenant_id_, "RefLSInfos")); @@ -230,12 +456,16 @@ int ObCompactionLocalityCache::refresh_by_zone( } else if (OB_FAIL(tmp_ls_info.init_by_replica(tmp_replica))) { LOG_WARN("fail to init ls_info by replica", KR(ret), K(tmp_replica)); } + if (OB_FAIL(ret)) { + } else if (tmp_replica.is_column_replica() && OB_FAIL(ls_cs_replica_cache_.add_cs_replica(ObLSReplicaUniItem(ls_id, tmp_replica.get_server())))) { + LOG_WARN("fail to add cs replica", K(ret), K(ls_id), K(tmp_replica), K_(ls_cs_replica_cache)); + } } } if (FAILEDx(ls_infos_map_.set_refactored(ls_id, tmp_ls_info, 1/*overwrite*/))) { LOG_WARN("fail to set refactored", KR(ret), K(ls_id), K(tmp_ls_info)); } else { - FLOG_INFO("success to refresh cached ls_info", K(ret), K(tmp_ls_info), K(zone_list), K(member_list_array)); + FLOG_INFO("success to refresh cached ls_info", K(ret), K(tmp_ls_info), K(zone_list)); } } return ret; diff --git a/src/share/compaction/ob_compaction_locality_cache.h b/src/share/compaction/ob_compaction_locality_cache.h index fac8249fa..e0c290fee 100644 --- a/src/share/compaction/ob_compaction_locality_cache.h +++ b/src/share/compaction/ob_compaction_locality_cache.h @@ -29,6 +29,46 @@ class ObMySQLResult; namespace share { +struct ObLSReplicaUniItem +{ + ObLSReplicaUniItem(); + ObLSReplicaUniItem(const ObLSID &ls_id, const common::ObAddr &server); + ~ObLSReplicaUniItem(); + void reset(); + uint64_t hash() const; + int hash(uint64_t &hash_val) const; + bool is_valid() const; + bool operator == (const ObLSReplicaUniItem &other) const; + bool operator != (const ObLSReplicaUniItem &other) const; + TO_STRING_KV(K_(ls_id), K_(server)); + + share::ObLSID ls_id_; + common::ObAddr server_; +}; + +class ObLSColumnReplicaCache +{ +public: + ObLSColumnReplicaCache(); + ~ObLSColumnReplicaCache(); + int init(); + void destroy(); + void reuse(); + int check_contains_ls(const ObLSID &ls_id, bool &contained) const; + int mark_ls_finished(const ObLSID &ls_id); + int add_cs_replica(const ObLSReplicaUniItem &ls_item); + int update(const ObLSID &ls_id, const ObAddr &server); + int check_is_cs_replica(const ObLSReplicaUniItem &ls_item, bool &is_cs_replica) const; + TO_STRING_KV(K_(is_inited), K_(ls_id_set), K_(ls_replica_set)); +private: + const static int64_t BUCKET_NUM_OF_LS_ID_SET = 15; + const static int64_t BUCKET_NUM_OF_LS_REPLICA_SET = 31; +private: + bool is_inited_; + hash::ObHashSet ls_id_set_; // pre-wamred ls id, unused + hash::ObHashSet ls_replica_set_; // cs-prelica ls +}; + class ObCompactionLocalityCache { public: @@ -39,6 +79,7 @@ public: bool empty() const { return ls_infos_map_.empty(); } int refresh_ls_locality(const bool force_refresh); int get_ls_info(const share::ObLSID &ls_id, share::ObLSInfo &ls_info); + const share::ObLSColumnReplicaCache& get_cs_replica_cache() const { return ls_cs_replica_cache_; } TO_STRING_KV(K_(is_inited), K_(tenant_id)); private: @@ -62,6 +103,7 @@ private: uint64_t tenant_id_; rootserver::ObMajorMergeInfoManager *merge_info_mgr_; common::hash::ObHashMap ls_infos_map_; + share::ObLSColumnReplicaCache ls_cs_replica_cache_; }; } // namespace share diff --git a/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp b/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp index 23e8a030e..8e2145813 100644 --- a/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp @@ -560,7 +560,7 @@ int ObInnerTableSchema::dba_ob_resource_pools_schema(ObTableSchema &table_schema table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT RESOURCE_POOL_ID, NAME, CASE TENANT_ID WHEN -1 THEN NULL ELSE TENANT_ID END AS TENANT_ID, gmt_create AS CREATE_TIME, gmt_modified AS MODIFY_TIME, UNIT_COUNT, UNIT_CONFIG_ID, ZONE_LIST, CASE replica_type WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END AS REPLICA_TYPE FROM oceanbase.__all_resource_pool )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT RESOURCE_POOL_ID, NAME, CASE TENANT_ID WHEN -1 THEN NULL ELSE TENANT_ID END AS TENANT_ID, gmt_create AS CREATE_TIME, gmt_modified AS MODIFY_TIME, UNIT_COUNT, UNIT_CONFIG_ID, ZONE_LIST, CASE replica_type WHEN 0 THEN "FULL" ELSE NULL END AS REPLICA_TYPE FROM oceanbase.__all_resource_pool )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -910,7 +910,7 @@ int ObInnerTableSchema::dba_ob_ls_locations_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE WHERE EFFECTIVE_TENANT_ID() = 1 ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() AND TENANT_ID != 1 ) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE WHERE EFFECTIVE_TENANT_ID() = 1 ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() AND TENANT_ID != 1 ) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -960,7 +960,7 @@ int ObInnerTableSchema::cdb_ob_ls_locations_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID != 1 ) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN "FALSE" ELSE "TRUE" END) AS REBUILD FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID != 1 ) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp b/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp index 8f09df6f7..afc4d5913 100644 --- a/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp @@ -817,7 +817,7 @@ int ObInnerTableSchema::cdb_ob_tablet_checksum_error_info_schema(ObTableSchema & table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT TENANT_ID, TABLET_ID FROM ( SELECT TENANT_ID, TABLET_ID, ROW_COUNT, DATA_CHECKSUM, B_COLUMN_CHECKSUMS, COMPACTION_SCN FROM OCEANBASE.__ALL_VIRTUAL_TABLET_REPLICA_CHECKSUM ) J GROUP BY J.TENANT_ID, J.TABLET_ID, J.COMPACTION_SCN HAVING MIN(J.DATA_CHECKSUM) != MAX(J.DATA_CHECKSUM) OR MIN(J.ROW_COUNT) != MAX(J.ROW_COUNT) OR MIN(J.B_COLUMN_CHECKSUMS) != MAX(J.B_COLUMN_CHECKSUMS) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT TENANT_ID, TABLET_ID FROM ( SELECT CKM.TENANT_ID, CKM.TABLET_ID, CKM.ROW_COUNT, CKM.DATA_CHECKSUM, CKM.B_COLUMN_CHECKSUMS, CKM.COMPACTION_SCN, M.REPLICA_TYPE FROM OCEANBASE.__ALL_VIRTUAL_TABLET_REPLICA_CHECKSUM CKM JOIN OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE M ON CKM.TENANT_ID = M.TENANT_ID AND CKM.LS_ID = M.LS_ID AND CKM.SVR_IP = M.SVR_IP AND CKM.SVR_PORT = M.SVR_PORT ) J GROUP BY J.TENANT_ID, J.TABLET_ID, J.COMPACTION_SCN, J.REPLICA_TYPE HAVING MIN(J.DATA_CHECKSUM) != MAX(J.DATA_CHECKSUM) OR MIN(J.ROW_COUNT) != MAX(J.ROW_COUNT) OR MIN(J.B_COLUMN_CHECKSUMS) != MAX(J.B_COLUMN_CHECKSUMS) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp b/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp index 7e67bc32c..6df9edb3d 100644 --- a/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp @@ -710,7 +710,7 @@ int ObInnerTableSchema::dba_ob_ls_locations_ora_schema(ObTableSchema &table_sche table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TO_CHAR(GMT_CREATE) AS VARCHAR2(19)) AS CREATE_TIME, CAST(TO_CHAR(GMT_MODIFIED) AS VARCHAR2(19)) AS MODIFY_TIME, CAST(LS_ID AS NUMBER) AS LS_ID, SVR_IP, CAST(SVR_PORT AS NUMBER) AS SVR_PORT, CAST(SQL_PORT AS NUMBER) AS SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN 'LEADER' ELSE 'FOLLOWER' END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, CAST((CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS NUMBER) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN 'FULL' WHEN 5 THEN 'LOGONLY' WHEN 16 THEN 'READONLY' WHEN 261 THEN 'ENCRYPTION LOGONLY' ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE NULL END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN 'FALSE' ELSE 'TRUE' END) AS REBUILD FROM SYS.ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TO_CHAR(GMT_CREATE) AS VARCHAR2(19)) AS CREATE_TIME, CAST(TO_CHAR(GMT_MODIFIED) AS VARCHAR2(19)) AS MODIFY_TIME, CAST(LS_ID AS NUMBER) AS LS_ID, SVR_IP, CAST(SVR_PORT AS NUMBER) AS SVR_PORT, CAST(SQL_PORT AS NUMBER) AS SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN 'LEADER' ELSE 'FOLLOWER' END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, CAST((CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS NUMBER) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN 'FULL' WHEN 5 THEN 'LOGONLY' WHEN 16 THEN 'READONLY' WHEN 261 THEN 'ENCRYPTION LOGONLY' WHEN 1040 THEN 'COLUMNSTORE' ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE NULL END) AS LEARNER_LIST, (CASE REBUILD WHEN 0 THEN 'FALSE' ELSE 'TRUE' END) AS REBUILD FROM SYS.ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index e393b6aba..4337c3efb 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -19630,9 +19630,6 @@ SELECT RESOURCE_POOL_ID, ZONE_LIST, CASE replica_type WHEN 0 THEN "FULL" - WHEN 5 THEN "LOGONLY" - WHEN 16 THEN "READONLY" - WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END AS REPLICA_TYPE FROM oceanbase.__all_resource_pool @@ -19889,6 +19886,7 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" + WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD @@ -19915,6 +19913,7 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" + WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD @@ -19954,6 +19953,7 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" + WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD @@ -19979,6 +19979,7 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" + WHEN 1040 THEN "COLUMNSTORE" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST, (CASE REBUILD @@ -28107,15 +28108,18 @@ def_table_schema( TABLET_ID FROM ( - SELECT TENANT_ID, - TABLET_ID, - ROW_COUNT, - DATA_CHECKSUM, - B_COLUMN_CHECKSUMS, - COMPACTION_SCN - FROM OCEANBASE.__ALL_VIRTUAL_TABLET_REPLICA_CHECKSUM + SELECT CKM.TENANT_ID, + CKM.TABLET_ID, + CKM.ROW_COUNT, + CKM.DATA_CHECKSUM, + CKM.B_COLUMN_CHECKSUMS, + CKM.COMPACTION_SCN, + M.REPLICA_TYPE + FROM OCEANBASE.__ALL_VIRTUAL_TABLET_REPLICA_CHECKSUM CKM + JOIN OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE M + ON CKM.TENANT_ID = M.TENANT_ID AND CKM.LS_ID = M.LS_ID AND CKM.SVR_IP = M.SVR_IP AND CKM.SVR_PORT = M.SVR_PORT ) J - GROUP BY J.TENANT_ID, J.TABLET_ID, J.COMPACTION_SCN + GROUP BY J.TENANT_ID, J.TABLET_ID, J.COMPACTION_SCN, J.REPLICA_TYPE HAVING MIN(J.DATA_CHECKSUM) != MAX(J.DATA_CHECKSUM) OR MIN(J.ROW_COUNT) != MAX(J.ROW_COUNT) OR MIN(J.B_COLUMN_CHECKSUMS) != MAX(J.B_COLUMN_CHECKSUMS) @@ -50752,6 +50756,7 @@ def_table_schema( WHEN 5 THEN 'LOGONLY' WHEN 16 THEN 'READONLY' WHEN 261 THEN 'ENCRYPTION LOGONLY' + WHEN 1040 THEN 'COLUMNSTORE' ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE NULL END) AS LEARNER_LIST, (CASE REBUILD diff --git a/src/share/location_cache/ob_location_struct.cpp b/src/share/location_cache/ob_location_struct.cpp index 9a53ccaf8..8475c41e8 100644 --- a/src/share/location_cache/ob_location_struct.cpp +++ b/src/share/location_cache/ob_location_struct.cpp @@ -366,17 +366,17 @@ bool ObLSLocation::operator!=(const ObLSLocation &other) const return !(*this == other); } -int ObLSLocation::get_replica_count(int64_t &full_replica_cnt, int64_t &readonly_replica_cnt) +int ObLSLocation::get_replica_count(int64_t &full_replica_cnt, int64_t &non_paxos_replica_cnt) { int ret = OB_SUCCESS; full_replica_cnt = 0; - readonly_replica_cnt = 0; + non_paxos_replica_cnt = 0; for (int64_t i = 0; OB_SUCC(ret) && i < replica_locations_.count(); ++i) { const ObLSReplicaLocation &replica = replica_locations_.at(i); if (REPLICA_TYPE_FULL == replica.get_replica_type()) { full_replica_cnt++; - } else if (REPLICA_TYPE_READONLY == replica.get_replica_type()) { - readonly_replica_cnt++; + } else if (ObReplicaTypeCheck::is_non_paxos_replica(replica.get_replica_type())) { + non_paxos_replica_cnt++; } } return ret; diff --git a/src/share/location_cache/ob_location_struct.h b/src/share/location_cache/ob_location_struct.h index de5d448aa..fae0e6538 100644 --- a/src/share/location_cache/ob_location_struct.h +++ b/src/share/location_cache/ob_location_struct.h @@ -190,7 +190,7 @@ public: inline uint64_t get_tenant_id() const { return cache_key_.get_tenant_id(); } inline ObLSID get_ls_id() const { return cache_key_.get_ls_id(); } const ObLSLocationCacheKey &get_cache_key() const { return cache_key_; } - int get_replica_count(int64_t &full_replica_cnt, int64_t &readonly_replica_cnt); + int get_replica_count(int64_t &full_replica_cnt, int64_t &non_paxos_replica_cnt); inline const common::ObIArray &get_replica_locations() const { return replica_locations_; diff --git a/src/share/ls/ob_ls_creator.cpp b/src/share/ls/ob_ls_creator.cpp index 350821d31..b356af6fd 100644 --- a/src/share/ls/ob_ls_creator.cpp +++ b/src/share/ls/ob_ls_creator.cpp @@ -51,7 +51,7 @@ int ObLSReplicaAddr::init(const common::ObAddr &addr, { int ret = OB_SUCCESS; if (OB_UNLIKELY(!addr.is_valid() - || common::REPLICA_TYPE_MAX == replica_type)) { + || common::REPLICA_TYPE_INVALID == replica_type)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(addr), K(replica_type)); } else { @@ -662,6 +662,12 @@ int ObLSCreator::check_create_ls_result_( if (OB_FAIL(learner_list.add_learner(ObMember(addr, timestamp)))) { LOG_WARN("failed to add member", KR(ret), K(addr)); } + } else if (result->get_replica_type() == REPLICA_TYPE_COLUMNSTORE) { + ObMember member(addr, timestamp); + member.set_columnstore(); + if (OB_FAIL(learner_list.add_learner(member))) { + LOG_WARN("failed to add member", KR(ret), K(addr), K(member)); + } } LOG_TRACE("create ls result", KR(ret), K(i), K(addr), KPC(result)); } @@ -991,7 +997,9 @@ int ObLSCreator::construct_ls_addrs_according_to_locality_( for (int64_t i = 0; OB_SUCC(ret) && i < zone_locality_array.count(); ++i) { const share::ObZoneReplicaAttrSet &zone_locality = zone_locality_array.at(i); ObLSReplicaAddr replica_addr; - if (OB_FAIL(alloc_zone_ls_addr(is_sys_ls, zone_locality, unit_info_array, replica_addr))) { + if (is_sys_ls && zone_locality.get_columnstore_replica_num() > 0) { + // ignore, C-replica not applicable for sys-ls + } else if (OB_FAIL(alloc_zone_ls_addr(is_sys_ls, zone_locality, unit_info_array, replica_addr))) { LOG_WARN("fail to alloc zone ls addr", KR(ret), K(zone_locality), K(unit_info_array)); } else if (OB_FAIL(ls_addr.push_back(replica_addr))) { LOG_WARN("fail to push back", KR(ret), K(replica_addr)); @@ -1106,11 +1114,9 @@ int ObLSCreator::alloc_duplicate_ls_addr_( } else if (OB_FAIL(construct_ls_addrs_according_to_locality_( zone_locality_array, unit_info_array, - true/*is_sys_ls*/, + false/*is_sys_ls*/, true/*is_duplicate_ls*/, ls_addr))) { - // although duplicate log stream is a user log steam, we use the same logic to alloc addrs as sys log stream - // so set is_sys_ls to true when execute construct_ls_addrs_according_to_locality_ LOG_WARN("fail to construct ls addrs for tenant user ls", KR(ret), K(zone_locality_array), K(unit_info_array), K(ls_addr)); } @@ -1129,6 +1135,14 @@ int ObLSCreator::compensate_zone_readonly_replica_( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(unit_info_array)); } else { + ObReplicaType replica_type_to_add = ObReplicaType::REPLICA_TYPE_INVALID; + if (zlocality.get_columnstore_replica_num() > 0) { + // For C zone locality, compensate C-replica. + replica_type_to_add = ObReplicaType::REPLICA_TYPE_COLUMNSTORE; + } else { + // For other zone locality (F/R), compensate R-replica + replica_type_to_add = ObReplicaType::REPLICA_TYPE_READONLY; + } for (int64_t i = 0; OB_SUCC(ret) && i < unit_info_array.count(); ++i) { const share::ObUnit &unit = unit_info_array.at(i); if (locality_zone != unit.zone_) { @@ -1142,7 +1156,7 @@ int ObLSCreator::compensate_zone_readonly_replica_( ObLSReplicaAddr ls_replica_addr; if (OB_FAIL(ls_replica_addr.init( unit.server_, - ObReplicaType::REPLICA_TYPE_READONLY))) { + replica_type_to_add))) { LOG_WARN("fail to init ls replica addr", KR(ret), K(unit), K(locality_zone)); } else if (OB_FAIL(ls_addr.push_back(ls_replica_addr))) { LOG_WARN("fail to push back", KR(ret), K(ls_replica_addr)); @@ -1174,25 +1188,31 @@ int ObLSCreator::alloc_zone_ls_addr( if (OB_FAIL(ls_replica_addr.init( unit.server_, ObReplicaType::REPLICA_TYPE_FULL))) { - LOG_WARN("fail to init ls replica addr", KR(ret)); + LOG_WARN("fail to init ls replica addr", KR(ret), K(unit.server_)); } } else if (zlocality.replica_attr_set_.get_logonly_replica_attr_array().count() > 0) { if (OB_FAIL(ls_replica_addr.init( unit.server_, ObReplicaType::REPLICA_TYPE_LOGONLY))) { - LOG_WARN("fail to init ls replica addr", KR(ret)); + LOG_WARN("fail to init ls replica addr", KR(ret), K(unit.server_)); } } else if (zlocality.replica_attr_set_.get_encryption_logonly_replica_attr_array().count() > 0) { if (OB_FAIL(ls_replica_addr.init( unit.server_, ObReplicaType::REPLICA_TYPE_ENCRYPTION_LOGONLY))) { - LOG_WARN("fail to init ls replica addr", KR(ret)); + LOG_WARN("fail to init ls replica addr", KR(ret), K(unit.server_)); } } else if (zlocality.replica_attr_set_.get_readonly_replica_attr_array().count() > 0) { if (OB_FAIL(ls_replica_addr.init( unit.server_, ObReplicaType::REPLICA_TYPE_READONLY))) { - LOG_WARN("fail to init ls replica addr", KR(ret)); + LOG_WARN("fail to init ls replica addr", KR(ret), K(unit.server_)); + } + } else if (zlocality.replica_attr_set_.get_columnstore_replica_attr_array().count() > 0) { + if (OB_FAIL(ls_replica_addr.init( + unit.server_, + ObReplicaType::REPLICA_TYPE_COLUMNSTORE))) { + LOG_WARN("fail to init ls replica addr", KR(ret), K(unit.server_)); } } else { // zone locality shall has a paxos replica in 4.0 by // now(2021.10.25) diff --git a/src/share/ls/ob_ls_creator.h b/src/share/ls/ob_ls_creator.h index 2497d2234..3e227454f 100644 --- a/src/share/ls/ob_ls_creator.h +++ b/src/share/ls/ob_ls_creator.h @@ -52,7 +52,7 @@ struct ObLSReplicaAddr ObLSReplicaAddr() : addr_(), - replica_type_(common::REPLICA_TYPE_MAX) {} + replica_type_(common::REPLICA_TYPE_INVALID) {} void reset() { *this = ObLSReplicaAddr(); } int init(const common::ObAddr &addr, const common::ObReplicaType replica_type); diff --git a/src/share/ls/ob_ls_info.cpp b/src/share/ls/ob_ls_info.cpp index 625e33ffc..f6c1973e5 100644 --- a/src/share/ls/ob_ls_info.cpp +++ b/src/share/ls/ob_ls_info.cpp @@ -993,13 +993,18 @@ int ObLSInfo::update_replica_status() bool in_leader_learner_list = false; ObMember learner; // rectify replica_type_ + const ObReplicaType replica_type_before_rectify = r->get_replica_type(); if (OB_NOT_NULL(learner_list) && learner_list->contains(r->get_server())) { - r->set_replica_type(REPLICA_TYPE_READONLY); in_leader_learner_list = true; if (OB_FAIL(learner_list->get_learner_by_addr(r->get_server(), learner))) { LOG_WARN("fail to get learner by addr", KR(ret)); - } else if (in_leader_learner_list) { + } else { in_member_time_us = learner.get_timestamp(); + if (learner.is_columnstore()) { + r->set_replica_type(REPLICA_TYPE_COLUMNSTORE); + } else { + r->set_replica_type(REPLICA_TYPE_READONLY); + } } } else { r->set_replica_type(REPLICA_TYPE_FULL); @@ -1021,8 +1026,17 @@ int ObLSInfo::update_replica_status() // 2 non_paxos replicas (READONLY),NORMAL when in leader's learner_list otherwise offline // 3 if non_paxos replicas are deleted by partition service, status in meta table is set to REPLICA_STATUS_OFFLINE, // then set replica_status to REPLICA_STATUS_OFFLINE + // 4 COLUMNSTORE replica, if not in learner list or columnstore-flag is false, + // then set replica_status to REPLICA_STATUS_OFFLINE if (REPLICA_STATUS_OFFLINE == r->get_replica_status()) { // do nothing + } else if (REPLICA_TYPE_COLUMNSTORE == replica_type_before_rectify + && REPLICA_TYPE_COLUMNSTORE != r->get_replica_type()) { + // we set replica_type according to leader's member/learner_list, but need to log a warning. + LOG_WARN("replica_type before rectify is COLUMNSTORE, " + "but not match with leader's member_list and learner_list", + K(replica_type_before_rectify), K(member_list), K(learner_list), KPC(r)); + r->set_replica_status(REPLICA_STATUS_OFFLINE); } else if (in_leader_member_list || in_leader_learner_list) { r->set_replica_status(REPLICA_STATUS_NORMAL); } else { diff --git a/src/share/ls/ob_ls_info.h b/src/share/ls/ob_ls_info.h index 36bbb0e0d..b9a31fa37 100644 --- a/src/share/ls/ob_ls_info.h +++ b/src/share/ls/ob_ls_info.h @@ -135,6 +135,7 @@ public: inline bool is_paxos_replica() const { return common::REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type_ || common::REPLICA_TYPE_FULL == replica_type_ || common::REPLICA_TYPE_LOGONLY == replica_type_; } + inline bool is_column_replica() const { return common::REPLICA_TYPE_COLUMNSTORE == replica_type_; } int64_t to_string(char *buf, const int64_t buf_len) const; // operator-related functions int assign(const ObLSReplica &other); diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index fb4aa4bc7..0c3a9ca23 100755 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -633,6 +633,8 @@ class ObString; ACT(BEFROE_UPDATE_DATA_VERSION,)\ ACT(BEFORE_DATA_DICT_DUMP_FINISH,)\ ACT(AFTER_PHYSICAL_RESTORE_CREATE_TENANT,)\ + ACT(BEFROE_UPDATE_MIG_TABLET_CONVERT_CO_PROGRESSING,)\ + ACT(AFTER_SET_CO_CONVERT_RETRY_EXHUASTED,)\ ACT(MAX_DEBUG_SYNC_POINT,) DECLARE_ENUM(ObDebugSyncPoint, debug_sync_point, OB_DEBUG_SYNC_POINT_DEF); diff --git a/src/share/ob_locality_parser.cpp b/src/share/ob_locality_parser.cpp deleted file mode 100644 index 9f40422ed..000000000 --- a/src/share/ob_locality_parser.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ - -#define USING_LOG_PREFIX SHARE -#include "ob_locality_parser.h" -#include "lib/alloc/alloc_assist.h" - -using namespace oceanbase::common; -using namespace oceanbase::share; - -// full replica -const char *ObLocalityParser::FULL_REPLICA_STR = "FULL"; -const char *ObLocalityParser::F_REPLICA_STR = "F"; -// logonly replica -const char *ObLocalityParser::LOGONLY_REPLICA_STR = "LOGONLY"; -const char *ObLocalityParser::L_REPLICA_STR = "L"; -// backup replica -const char *ObLocalityParser::BACKUP_REPLICA_STR = "BACKUP"; -const char *ObLocalityParser::B_REPLICA_STR = "B"; -// readonly replica -const char *ObLocalityParser::READONLY_REPLICA_STR = "READONLY"; -const char *ObLocalityParser::R_REPLICA_STR = "R"; -// memonly replica -const char *ObLocalityParser::MEMONLY_REPLICA_STR = "MEMONLY"; -const char *ObLocalityParser::M_REPLICA_STR = "M"; -// encryption logonly replica -const char *ObLocalityParser::ENCRYPTION_LOGONLY_REPLICA_STR = "ENCRYPTION_LOGONLY"; -const char *ObLocalityParser::E_REPLICA_STR = "E"; - -int ObLocalityParser::parse_type(const char *str, int64_t len, ObReplicaType &replica_type) -{ - UNUSED(len); - // TODO: only support F-replica in 4.0 and R-replica in 4.2 for now, will support others in the future - int ret = OB_SUCCESS; - if (OB_ISNULL(str)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid replica type string. null!", K(ret)); - LOG_USER_ERROR(OB_INVALID_ARGUMENT, "replica_type, replica_type should not be null"); - } else if (0 == STRCASECMP(FULL_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_FULL; - } else if (0 == STRCASECMP(F_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_FULL; - } else if (0 == STRCASECMP(LOGONLY_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_LOGONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "logonly-replica"); - } else if ( 0 == STRCASECMP(L_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_LOGONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "logonly-replica"); - } else if (0 == STRCASECMP(ENCRYPTION_LOGONLY_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_ENCRYPTION_LOGONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "encryption-logonly-replica"); - } else if ( 0 == STRCASECMP(E_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_ENCRYPTION_LOGONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "encryption-logonly-replica"); - } else if ( 0 == STRCASECMP(BACKUP_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_BACKUP; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "backup-replica"); - } else if ( 0 == STRCASECMP(B_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_BACKUP; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "backup-replica"); - } else if ( 0 == STRCASECMP(READONLY_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_READONLY; - } else if ( 0 == STRCASECMP(R_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_READONLY; - } else if ( 0 == STRCASECMP(MEMONLY_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_MEMONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "memonly-replica"); - } else if ( 0 == STRCASECMP(M_REPLICA_STR, str)) { - replica_type = REPLICA_TYPE_MEMONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "memonly-replica"); - } else { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid replica type string", K(str), K(ret)); - LOG_USER_ERROR(OB_INVALID_ARGUMENT, "replica_type, unrecognized replica_type"); - } - return ret; -} - diff --git a/src/share/ob_locality_parser.h b/src/share/ob_locality_parser.h deleted file mode 100644 index 689befa43..000000000 --- a/src/share/ob_locality_parser.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright (c) 2021 OceanBase - * OceanBase CE is licensed under Mulan PubL v2. - * You can use this software according to the terms and conditions of the Mulan PubL v2. - * You may obtain a copy of Mulan PubL v2 at: - * http://license.coscl.org.cn/MulanPubL-2.0 - * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, - * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, - * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. - * See the Mulan PubL v2 for more details. - */ - -#ifndef OCEANBASE_SHARE_OB_LOCALITY_PARSER_H_ -#define OCEANBASE_SHARE_OB_LOCALITY_PARSER_H_ - -#include "share/ob_define.h" - -namespace oceanbase -{ -namespace share -{ -class ObLocalityParser -{ -public: - static int parse_type(const char *str, int64_t len, common::ObReplicaType &type); -private: - // full replica - static const char *FULL_REPLICA_STR; - static const char *F_REPLICA_STR; - // logonly replica - static const char *LOGONLY_REPLICA_STR; - static const char *L_REPLICA_STR; - // backup replica - static const char *BACKUP_REPLICA_STR; - static const char *B_REPLICA_STR; - // readonly replica - static const char *READONLY_REPLICA_STR; - static const char *R_REPLICA_STR; - // memonly replica - static const char *MEMONLY_REPLICA_STR; - static const char *M_REPLICA_STR; - // encryption logonly replica - static const char *ENCRYPTION_LOGONLY_REPLICA_STR; - static const char *E_REPLICA_STR; -}; - -} // end namespace share -} // end namespace oceanbase -#endif diff --git a/src/share/ob_replica_info.cpp b/src/share/ob_replica_info.cpp index 0cfc1e2e0..8888e1b2c 100644 --- a/src/share/ob_replica_info.cpp +++ b/src/share/ob_replica_info.cpp @@ -59,6 +59,15 @@ int64_t BaseReplicaAttrSet::get_encryption_logonly_replica_num() const return num; } +int64_t BaseReplicaAttrSet::get_columnstore_replica_num() const +{ + int64_t num = 0; + for (int64_t i = 0; i < get_columnstore_replica_attr_array().count(); ++i) { + num += get_columnstore_replica_attr_array().at(i).num_; + } + return num; +} + int64_t BaseReplicaAttrSet::get_paxos_replica_num() const { return get_full_replica_num() @@ -81,6 +90,9 @@ int64_t BaseReplicaAttrSet::get_specific_replica_num() const if (ObLocalityDistribution::ALL_SERVER_CNT != get_encryption_logonly_replica_num()) { specific_replica_num += get_encryption_logonly_replica_num(); } + if (ObLocalityDistribution::ALL_SERVER_CNT != get_columnstore_replica_num()) { + specific_replica_num += get_columnstore_replica_num(); + } return specific_replica_num; } @@ -117,7 +129,8 @@ bool ObReplicaAttrSet::operator==(const ObReplicaAttrSet &that) const if (full_replica_attr_array_.count() != that.full_replica_attr_array_.count() || logonly_replica_attr_array_.count() != that.logonly_replica_attr_array_.count() || readonly_replica_attr_array_.count() != that.readonly_replica_attr_array_.count() - || encryption_logonly_replica_attr_array_.count() != that.encryption_logonly_replica_attr_array_.count()) { + || encryption_logonly_replica_attr_array_.count() != that.encryption_logonly_replica_attr_array_.count() + || columnstore_replica_attr_array_.count() != that.columnstore_replica_attr_array_.count()) { equal = false; } else { for (int64_t i = 0; equal && i < full_replica_attr_array_.count(); ++i) { @@ -140,6 +153,11 @@ bool ObReplicaAttrSet::operator==(const ObReplicaAttrSet &that) const equal = false; } } + for (int64_t i = 0; equal && i < columnstore_replica_attr_array_.count(); ++i) { + if (columnstore_replica_attr_array_.at(i) != that.columnstore_replica_attr_array_.at(i)) { + equal = false; + } + } } return equal; } @@ -162,6 +180,9 @@ int ObReplicaAttrSet::assign(const BaseReplicaAttrSet &that) } else if (OB_FAIL(encryption_logonly_replica_attr_array_.assign( that.get_encryption_logonly_replica_attr_array()))) { LOG_WARN("fail to assign encryption logonly replica attr array", KR(ret)); + } else if (OB_FAIL(columnstore_replica_attr_array_.assign( + that.get_columnstore_replica_attr_array()))) { + LOG_WARN("fail to assign columnstore replica attr array", KR(ret)); } return ret; } @@ -170,7 +191,8 @@ int ObReplicaAttrSet::set_replica_attr_array( const common::ObIArray &full_replica_attr_array, const common::ObIArray &logonly_replica_attr_array, const common::ObIArray &readonly_replica_attr_array, - const common::ObIArray &encryption_logonly_replica_attr_array) + const common::ObIArray &encryption_logonly_replica_attr_array, + const common::ObIArray &columnstore_replica_attr_array) { int ret = OB_SUCCESS; if (OB_FAIL(full_replica_attr_array_.assign(full_replica_attr_array))) { @@ -181,6 +203,8 @@ int ObReplicaAttrSet::set_replica_attr_array( LOG_WARN("fail to assign readonly replica attr array", KR(ret)); } else if (OB_FAIL(encryption_logonly_replica_attr_array_.assign(encryption_logonly_replica_attr_array))) { LOG_WARN("fail to assign encryption logonly replica attr array", KR(ret)); + } else if (OB_FAIL(columnstore_replica_attr_array_.assign(columnstore_replica_attr_array))) { + LOG_WARN("fail to assign columnstore replica attr array", KR(ret)); } return ret; } @@ -201,12 +225,15 @@ int ObReplicaAttrSet::set_paxos_replica_attr_array( return ret; } -int ObReplicaAttrSet::set_readonly_replica_attr_array( - const common::ObIArray &readonly_replica_attr_array) +int ObReplicaAttrSet::set_non_paxos_replica_attr_array( + const common::ObIArray &readonly_replica_attr_array, + const common::ObIArray &columnstore_replica_attr_array) { int ret = OB_SUCCESS; if (OB_FAIL(readonly_replica_attr_array_.assign(readonly_replica_attr_array))) { LOG_WARN("fail to assign full replica attr array", K(ret)); + } else if (OB_FAIL(columnstore_replica_attr_array_.assign(columnstore_replica_attr_array))) { + LOG_WARN("fail to assign columnstore replica attr array", KR(ret)); } return ret; } @@ -263,6 +290,10 @@ bool ObReplicaAttrSet::is_specific_replica_attr() const const ReplicaAttr &replica_attr = encryption_logonly_replica_attr_array_.at(i); bool_ret = 100 != replica_attr.memstore_percent_; } + for (int64_t i = 0; !bool_ret && i < columnstore_replica_attr_array_.count(); i++) { + const ReplicaAttr &replica_attr = columnstore_replica_attr_array_.at(i); + bool_ret = 100 != replica_attr.memstore_percent_; + } return bool_ret; } @@ -373,6 +404,30 @@ int ObReplicaAttrSet::add_encryption_logonly_replica_num(const ReplicaAttr &repl return ret; } +int ObReplicaAttrSet::add_columnstore_replica_num(const ReplicaAttr &replica_attr) +{ + int ret = OB_SUCCESS; + if (replica_attr.num_ > 0) { + if (columnstore_replica_attr_array_.count() <= 0) { + if (OB_FAIL(columnstore_replica_attr_array_.push_back( + ReplicaAttr(0, replica_attr.memstore_percent_)))) { + LOG_WARN("fail to push back", K(ret)); + } + } + if (OB_FAIL(ret)) { + // bypass + } else if (columnstore_replica_attr_array_.count() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("index unexpected", K(ret), + "columnstore_replica_attr_array_count", + columnstore_replica_attr_array_.count()); + } else { + columnstore_replica_attr_array_.at(0).num_ += replica_attr.num_; + } + } + return ret; +} + int ObReplicaAttrSet::sub_full_replica_num(const ReplicaAttr &replica_attr) { int ret = OB_SUCCESS; @@ -485,56 +540,27 @@ int ObReplicaAttrSet::sub_encryption_logonly_replica_num(const ReplicaAttr &repl return ret; } -bool ObReplicaAttrSet::has_this_replica( - const common::ObReplicaType replica_type, - const int64_t memstore_percent) -{ - bool found = false; - if (common::REPLICA_TYPE_FULL == replica_type) { - for (int64_t i = 0; !found && i < full_replica_attr_array_.count(); ++i) { - ReplicaAttr &this_replica_attr = full_replica_attr_array_.at(i); - if (this_replica_attr.num_ > 0 && memstore_percent == this_replica_attr.memstore_percent_) { - found = true; - } - } - } else if (common::REPLICA_TYPE_LOGONLY == replica_type) { - for (int64_t i = 0; !found && i < logonly_replica_attr_array_.count(); ++i) { - ReplicaAttr &this_replica_attr = logonly_replica_attr_array_.at(i); - if (this_replica_attr.num_ > 0 && memstore_percent == this_replica_attr.memstore_percent_) { - found = true; - } - } - } else if (common::REPLICA_TYPE_READONLY == replica_type) { - for (int64_t i = 0; !found && i < readonly_replica_attr_array_.count(); ++i) { - ReplicaAttr &this_replica_attr = readonly_replica_attr_array_.at(i); - if (this_replica_attr.num_ > 0 && memstore_percent == this_replica_attr.memstore_percent_) { - found = true; - } - } - } else if (common::REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type) { - for (int64_t i = 0; !found && i < encryption_logonly_replica_attr_array_.count(); ++i) { - ReplicaAttr &this_replica_attr = encryption_logonly_replica_attr_array_.at(i); - if (this_replica_attr.num_ > 0 && memstore_percent == this_replica_attr.memstore_percent_) { - found = true; - } - } - } else { - found = false; - } - return found; -} - - -int ObReplicaAttrSet::get_readonly_memstore_percent(int64_t &memstore_percent) const +int ObReplicaAttrSet::sub_columnstore_replica_num(const ReplicaAttr &replica_attr) { int ret = OB_SUCCESS; - if (readonly_replica_attr_array_.count() <= 0 || readonly_replica_attr_array_.count() > 1) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("readonly replica attr array count unexpected", K(ret), - "array_count", readonly_replica_attr_array_.count()); - } else { - const ReplicaAttr &replica_attr = readonly_replica_attr_array_.at(0); - memstore_percent = replica_attr.memstore_percent_; + if (replica_attr.num_ > 0) { + if (columnstore_replica_attr_array_.count() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("columnstore replica attr array empty", K(ret)); + } else if (columnstore_replica_attr_array_.at(0).num_ < replica_attr.num_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("columnstore replica num not enough", K(ret), + "columnstore_replica_num_in_array", + columnstore_replica_attr_array_.at(0).num_, + K(replica_attr)); + } else { + columnstore_replica_attr_array_.at(0).num_ -= replica_attr.num_; + if (columnstore_replica_attr_array_.at(0).num_ <= 0) { + if (OB_FAIL(columnstore_replica_attr_array_.remove(0))) { + LOG_WARN("fail to remove", K(ret)); + } + } + } } return ret; } @@ -552,7 +578,7 @@ int ObZoneReplicaAttrSet::append(const ObZoneReplicaAttrSet &that) OB_SUCC(ret) && i < that.replica_attr_set_.get_full_replica_attr_array().count(); ++i) { const ReplicaAttr &this_replica_attr = that.replica_attr_set_.get_full_replica_attr_array().at(i); - if (OB_FAIL(add_full_replica_num(this_replica_attr))) { + if (OB_FAIL(replica_attr_set_.add_full_replica_num(this_replica_attr))) { LOG_WARN("fail to add full replica num", K(ret)); } } @@ -562,7 +588,7 @@ int ObZoneReplicaAttrSet::append(const ObZoneReplicaAttrSet &that) OB_SUCC(ret) && i < that.replica_attr_set_.get_logonly_replica_attr_array().count(); ++i) { const ReplicaAttr &this_replica_attr = that.replica_attr_set_.get_logonly_replica_attr_array().at(i); - if (OB_FAIL(add_logonly_replica_num(this_replica_attr))) { + if (OB_FAIL(replica_attr_set_.add_logonly_replica_num(this_replica_attr))) { LOG_WARN("fail to add logonly replica num", K(ret)); } } @@ -572,7 +598,7 @@ int ObZoneReplicaAttrSet::append(const ObZoneReplicaAttrSet &that) OB_SUCC(ret) && i < that.replica_attr_set_.get_readonly_replica_attr_array().count(); ++i) { const ReplicaAttr &this_replica_attr = that.replica_attr_set_.get_readonly_replica_attr_array().at(i); - if (OB_FAIL(add_readonly_replica_num(this_replica_attr))) { + if (OB_FAIL(replica_attr_set_.add_readonly_replica_num(this_replica_attr))) { LOG_WARN("fail to add readonly replica num", K(ret)); } } @@ -583,12 +609,22 @@ int ObZoneReplicaAttrSet::append(const ObZoneReplicaAttrSet &that) ++i) { const ReplicaAttr &this_replica_attr = that.replica_attr_set_.get_encryption_logonly_replica_attr_array().at(i); - if (OB_FAIL(add_encryption_logonly_replica_num(this_replica_attr))) { + if (OB_FAIL(replica_attr_set_.add_encryption_logonly_replica_num(this_replica_attr))) { LOG_WARN("fail to add logonly replica num", K(ret)); } } lib::ob_sort(replica_attr_set_.get_encryption_logonly_replica_attr_array_for_sort().begin(), replica_attr_set_.get_encryption_logonly_replica_attr_array_for_sort().end()); + for (int64_t i = 0; + OB_SUCC(ret) && i < that.replica_attr_set_.get_columnstore_replica_attr_array().count(); + ++i) { + const ReplicaAttr &this_replica_attr = that.replica_attr_set_.get_columnstore_replica_attr_array().at(i); + if (OB_FAIL(replica_attr_set_.add_columnstore_replica_num(this_replica_attr))) { + LOG_WARN("fail to add columnstore replica num", K(ret)); + } + } + lib::ob_sort(replica_attr_set_.get_columnstore_replica_attr_array_for_sort().begin(), + replica_attr_set_.get_columnstore_replica_attr_array_for_sort().end()); return ret; } @@ -755,36 +791,6 @@ bool ObZoneReplicaAttrSet::check_paxos_num_valid() const } -void ObReplicaNumSet::set_replica_num( - int64_t full_replica_num, - int64_t logonly_replica_num, - int64_t readonly_replica_num, - int64_t encryption_logonly_replica_num) -{ - full_replica_num_ = full_replica_num; - logonly_replica_num_ = logonly_replica_num; - readonly_replica_num_ = readonly_replica_num; - encryption_logonly_replica_num_ = encryption_logonly_replica_num; -} - -int64_t ObReplicaNumSet::get_specific_replica_num() const -{ - int64_t specific_replica_num = 0; - if (ObLocalityDistribution::ALL_SERVER_CNT != full_replica_num_) { - specific_replica_num += full_replica_num_; - } - if (ObLocalityDistribution::ALL_SERVER_CNT != logonly_replica_num_) { - specific_replica_num += logonly_replica_num_; - } - if (ObLocalityDistribution::ALL_SERVER_CNT != readonly_replica_num_) { - specific_replica_num += readonly_replica_num_; - } - if (ObLocalityDistribution::ALL_SERVER_CNT != encryption_logonly_replica_num_) { - specific_replica_num += encryption_logonly_replica_num_; - } - return specific_replica_num; -} - int64_t SchemaReplicaAttrSet::get_convert_size() const { int64_t convert_size = sizeof(SchemaReplicaAttrSet); @@ -796,6 +802,8 @@ int64_t SchemaReplicaAttrSet::get_convert_size() const convert_size += readonly_set.count() * static_cast(sizeof(share::ReplicaAttr)); const ObIArray &encryption_logonly_set = get_encryption_logonly_replica_attr_array(); convert_size += encryption_logonly_set.count() * static_cast(sizeof(share::ReplicaAttr)); + const ObIArray &columnstore_set = get_columnstore_replica_attr_array(); + convert_size += columnstore_set.count() * static_cast(sizeof(share::ReplicaAttr)); return convert_size; } diff --git a/src/share/ob_replica_info.h b/src/share/ob_replica_info.h index f2359cbad..36656a6b0 100644 --- a/src/share/ob_replica_info.h +++ b/src/share/ob_replica_info.h @@ -72,22 +72,26 @@ public: virtual const common::ObIArray &get_logonly_replica_attr_array() const = 0; virtual const common::ObIArray &get_readonly_replica_attr_array() const = 0; virtual const common::ObIArray &get_encryption_logonly_replica_attr_array() const = 0; + virtual const common::ObIArray &get_columnstore_replica_attr_array() const = 0; virtual common::ObIArray &get_full_replica_attr_array() = 0; virtual common::ObIArray &get_logonly_replica_attr_array() = 0; virtual common::ObIArray &get_readonly_replica_attr_array() = 0; virtual common::ObIArray &get_encryption_logonly_replica_attr_array() = 0; + virtual common::ObIArray &get_columnstore_replica_attr_array() = 0; int64_t get_full_replica_num() const; int64_t get_logonly_replica_num() const; int64_t get_readonly_replica_num() const; int64_t get_encryption_logonly_replica_num() const; + int64_t get_columnstore_replica_num() const; int64_t get_paxos_replica_num() const; int64_t get_specific_replica_num() const; TO_STRING_KV("full_replica_attr_array", get_full_replica_attr_array(), "logonly_replica_attr_array", get_logonly_replica_attr_array(), "readonly_replica_attr_array", get_readonly_replica_attr_array(), - "encryption_logonly_replica_attr_array", get_encryption_logonly_replica_attr_array()); + "encryption_logonly_replica_attr_array", get_encryption_logonly_replica_attr_array(), + "columnstore_replica_attr_array", get_columnstore_replica_attr_array()); }; typedef common::ObArrayHelper SchemaReplicaAttrArray; @@ -100,7 +104,8 @@ public: full_replica_attr_array_(), logonly_replica_attr_array_(), readonly_replica_attr_array_(), - encryption_logonly_replica_attr_array_() {} + encryption_logonly_replica_attr_array_(), + columnstore_replica_attr_array_() {} virtual ~SchemaReplicaAttrSet() {} int64_t get_convert_size() const; public: @@ -116,6 +121,9 @@ public: virtual const common::ObIArray &get_encryption_logonly_replica_attr_array() const override { return encryption_logonly_replica_attr_array_; } + virtual const common::ObIArray &get_columnstore_replica_attr_array() const override { + return columnstore_replica_attr_array_; + } virtual common::ObIArray &get_full_replica_attr_array() override { return full_replica_attr_array_; } @@ -128,18 +136,23 @@ public: virtual common::ObIArray &get_encryption_logonly_replica_attr_array() override { return encryption_logonly_replica_attr_array_; } + virtual common::ObIArray &get_columnstore_replica_attr_array() override { + return columnstore_replica_attr_array_; + } public: void reset() { full_replica_attr_array_.reset(); logonly_replica_attr_array_.reset(); readonly_replica_attr_array_.reset(); encryption_logonly_replica_attr_array_.reset(); + columnstore_replica_attr_array_.reset(); } private: SchemaReplicaAttrArray full_replica_attr_array_; SchemaReplicaAttrArray logonly_replica_attr_array_; SchemaReplicaAttrArray readonly_replica_attr_array_; SchemaReplicaAttrArray encryption_logonly_replica_attr_array_; + SchemaReplicaAttrArray columnstore_replica_attr_array_; }; class ObReplicaAttrSet : public BaseReplicaAttrSet @@ -160,21 +173,20 @@ public: logonly_replica_attr_array_.reset(); readonly_replica_attr_array_.reset(); encryption_logonly_replica_attr_array_.reset(); + columnstore_replica_attr_array_.reset(); } int add_full_replica_num(const ReplicaAttr &replica_attr); int add_logonly_replica_num(const ReplicaAttr &replica_attr); int add_readonly_replica_num(const ReplicaAttr &replica_attr); int add_encryption_logonly_replica_num(const ReplicaAttr &replica_attr); + int add_columnstore_replica_num(const ReplicaAttr &replica_attr); int sub_full_replica_num(const ReplicaAttr &replica_attr); int sub_logonly_replica_num(const ReplicaAttr &replica_attr); int sub_readonly_replica_num(const ReplicaAttr &replica_attr); int sub_encryption_logonly_replica_num(const ReplicaAttr &replica_attr); - - bool has_this_replica( - const common::ObReplicaType replica_type, - const int64_t memstore_percent); + int sub_columnstore_replica_num(const ReplicaAttr &replica_attr); virtual const common::ObIArray &get_full_replica_attr_array() const override { return full_replica_attr_array_; @@ -188,6 +200,9 @@ public: virtual const common::ObIArray &get_encryption_logonly_replica_attr_array() const override { return encryption_logonly_replica_attr_array_; } + virtual const common::ObIArray &get_columnstore_replica_attr_array() const override { + return columnstore_replica_attr_array_; + } virtual common::ObIArray &get_full_replica_attr_array() override { return full_replica_attr_array_; } @@ -200,6 +215,9 @@ public: virtual common::ObIArray &get_encryption_logonly_replica_attr_array() override { return encryption_logonly_replica_attr_array_; } + virtual common::ObIArray &get_columnstore_replica_attr_array() override { + return columnstore_replica_attr_array_; + } ReplicaAttrArray &get_full_replica_attr_array_for_sort() { return full_replica_attr_array_; @@ -213,22 +231,25 @@ public: ReplicaAttrArray &get_encryption_logonly_replica_attr_array_for_sort() { return encryption_logonly_replica_attr_array_; } + ReplicaAttrArray &get_columnstore_replica_attr_array_for_sort() { + return columnstore_replica_attr_array_; + } int set_replica_attr_array( const common::ObIArray &full_replica_attr_array, const common::ObIArray &logonly_replica_attr_array, const common::ObIArray &readonly_replica_attr_array, - const common::ObIArray &encryption_logonly_replica_attr_array); + const common::ObIArray &encryption_logonly_replica_attr_array, + const common::ObIArray &columnstore_replica_attr_array); int set_paxos_replica_attr_array( const common::ObIArray &full_replica_attr_array, const common::ObIArray &logonly_replica_attr_array, const common::ObIArray &encryption_logonly_replica_attr_array); - int set_readonly_replica_attr_array( - const common::ObIArray &readonly_replica_attr_array); - - int get_readonly_memstore_percent(int64_t &memstore_percent) const; + int set_non_paxos_replica_attr_array( + const common::ObIArray &readonly_replica_attr_array, + const common::ObIArray &columnstore_replica_attr_array); bool has_paxos_replica() const; bool is_specific_readonly_replica() const; @@ -240,6 +261,7 @@ private: ReplicaAttrArray logonly_replica_attr_array_; ReplicaAttrArray readonly_replica_attr_array_; ReplicaAttrArray encryption_logonly_replica_attr_array_; + ReplicaAttrArray columnstore_replica_attr_array_; }; struct SchemaZoneReplicaAttrSet @@ -253,6 +275,7 @@ struct SchemaZoneReplicaAttrSet int64_t get_logonly_replica_num() const {return replica_attr_set_.get_logonly_replica_num();} int64_t get_readonly_replica_num() const {return replica_attr_set_.get_readonly_replica_num();} int64_t get_encryption_logonly_replica_num() const {return replica_attr_set_.get_encryption_logonly_replica_num();} + int64_t get_columnstore_replica_num() const {return replica_attr_set_.get_columnstore_replica_num();} int64_t get_paxos_replica_num() const { return get_full_replica_num() + get_logonly_replica_num() + get_encryption_logonly_replica_num(); } @@ -291,34 +314,9 @@ struct ObZoneReplicaAttrSet int64_t get_encryption_logonly_replica_num() const { return replica_attr_set_.get_encryption_logonly_replica_num(); } + int64_t get_columnstore_replica_num() const {return replica_attr_set_.get_columnstore_replica_num();} const common::ObIArray &get_zone_set() const { return zone_set_; } - int add_full_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.add_full_replica_num(replica_attr); - } - int add_logonly_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.add_logonly_replica_num(replica_attr); - } - int add_readonly_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.add_readonly_replica_num(replica_attr); - } - int add_encryption_logonly_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.add_encryption_logonly_replica_num(replica_attr); - } - - int sub_full_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.sub_full_replica_num(replica_attr); - } - int sub_logonly_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.sub_logonly_replica_num(replica_attr); - } - int sub_readonly_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.sub_readonly_replica_num(replica_attr); - } - int sub_encryption_logonly_replica_num(const ReplicaAttr &replica_attr) { - return replica_attr_set_.sub_encryption_logonly_replica_num(replica_attr); - } - int64_t get_paxos_replica_num() const { return get_full_replica_num() + get_logonly_replica_num() + get_encryption_logonly_replica_num(); } @@ -359,57 +357,6 @@ struct ObZoneReplicaAttrSet typedef ObZoneReplicaAttrSet ObZoneReplicaNumSet; -struct ObReplicaNumSet -{ - ObReplicaNumSet() : full_replica_num_(0), - logonly_replica_num_(0), - readonly_replica_num_(0), - encryption_logonly_replica_num_(0) - { reset(); } - virtual ~ObReplicaNumSet() {} - bool operator==(const ObReplicaNumSet &that) { - return full_replica_num_ == that.full_replica_num_ - && logonly_replica_num_ == that.logonly_replica_num_ - && readonly_replica_num_ == that.readonly_replica_num_ - && encryption_logonly_replica_num_ == that.encryption_logonly_replica_num_; - } - bool operator!=(const ObReplicaNumSet &that) { - return (!(*this == that)); - } - void reset() { - full_replica_num_ = 0; - logonly_replica_num_ = 0; - readonly_replica_num_ = 0; - encryption_logonly_replica_num_ = 0; - } - ObReplicaNumSet &operator=(const ObReplicaNumSet &that) { - if (this != &that) { - full_replica_num_ = that.full_replica_num_; - logonly_replica_num_ = that.logonly_replica_num_; - readonly_replica_num_ = that.readonly_replica_num_; - encryption_logonly_replica_num_ = that.encryption_logonly_replica_num_; - } - return *this; - } - void set_replica_num( - int64_t full_replica_num, - int64_t logonly_replica_num, - int64_t readonly_replica_num, - int64_t encryption_logonly_replica_num); - - int64_t get_specific_replica_num() const; - - TO_STRING_KV(K(full_replica_num_), - K(logonly_replica_num_), - K(readonly_replica_num_), - K(logonly_replica_num_)); - - int64_t full_replica_num_; - int64_t logonly_replica_num_; - int64_t readonly_replica_num_; - int64_t encryption_logonly_replica_num_; -}; - } // end namespace share } // end namespace oceanbase #endif diff --git a/src/share/ob_rpc_struct.cpp b/src/share/ob_rpc_struct.cpp index b53da0777..20a4a510e 100644 --- a/src/share/ob_rpc_struct.cpp +++ b/src/share/ob_rpc_struct.cpp @@ -4078,7 +4078,7 @@ int ObAdminAlterLSReplicaArg::init_add( int ret = OB_SUCCESS; if (OB_UNLIKELY(!ls_id.is_valid()) || OB_UNLIKELY(!server_addr.is_valid()) - || OB_UNLIKELY(replica_type != REPLICA_TYPE_FULL && replica_type != REPLICA_TYPE_READONLY) + || OB_UNLIKELY(!ObReplicaTypeCheck::is_replica_type_valid(replica_type)) || OB_UNLIKELY(paxos_replica_num < 0) || OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { //data_source and paxos_replica_num is optional parameter @@ -4157,7 +4157,7 @@ int ObAdminAlterLSReplicaArg::init_modify_replica( int ret = OB_SUCCESS; if (OB_UNLIKELY(!ls_id.is_valid()) || OB_UNLIKELY(!server_addr.is_valid()) - || OB_UNLIKELY(replica_type != REPLICA_TYPE_FULL && replica_type != REPLICA_TYPE_READONLY) + || OB_UNLIKELY(!ObReplicaTypeCheck::is_replica_type_valid(replica_type)) || OB_UNLIKELY(paxos_replica_num < 0) || OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { ret = OB_INVALID_ARGUMENT; @@ -4218,7 +4218,7 @@ void ObAdminAlterLSReplicaArg::reset() ls_id_.reset(); server_addr_.reset(); destination_addr_.reset(); - replica_type_ = common::REPLICA_TYPE_MAX; + replica_type_ = common::REPLICA_TYPE_INVALID; tenant_id_ = OB_INVALID_TENANT_ID; task_id_.reset(); data_source_.reset(); @@ -6235,98 +6235,6 @@ OB_SERIALIZE_MEMBER(ObSyncPGPartitionMTFinishArg, server_, version_); OB_SERIALIZE_MEMBER(ObCheckDanglingReplicaFinishArg, server_, version_, dangling_count_); -OB_SERIALIZE_MEMBER(ObMemberListAndLeaderArg, - member_list_, - leader_, - self_, - lower_list_, - replica_type_, - property_, - role_); - -bool ObMemberListAndLeaderArg::is_valid() const -{ - return member_list_.count() > 0 - && self_.is_valid() - && common::REPLICA_TYPE_MAX != replica_type_ - && property_.is_valid() - && (common::INVALID_ROLE <= role_ && role_ <= common::STANDBY_LEADER); -} - -// If it is a leader, you need to ensure the consistency of role_, leader_/restore_leader_, and self_ -bool ObMemberListAndLeaderArg::check_leader_is_valid() const -{ - bool bret = true; - if (is_leader_by_election(role_)) { - bret = (leader_.is_valid() && self_ == leader_); - } - return bret; -} - -void ObMemberListAndLeaderArg::reset() -{ - member_list_.reset(); - leader_.reset(); - self_.reset(); - lower_list_.reset(); - replica_type_ = common::REPLICA_TYPE_MAX; - role_ = common::INVALID_ROLE; -} - -int ObMemberListAndLeaderArg::assign(const ObMemberListAndLeaderArg &other) -{ - int ret = OB_SUCCESS; - if (OB_FAIL(member_list_.assign(other.member_list_))) { - LOG_WARN("fail to assign member_list", KR(ret), K_(member_list)); - } else if (OB_FAIL(lower_list_.assign(other.lower_list_))) { - LOG_WARN("fail to assign lower_list", KR(ret), K_(lower_list)); - } else { - leader_ = other.leader_; - self_ = other.self_; - replica_type_ = other.replica_type_; - property_ = other.property_; - role_ = other.role_; - } - return ret; -} - -OB_SERIALIZE_MEMBER(ObGetMemberListAndLeaderResult, - member_list_, - leader_, - self_, - lower_list_, - replica_type_, - property_); - -void ObGetMemberListAndLeaderResult::reset() -{ - member_list_.reset(); - leader_.reset(); - self_.reset(); - lower_list_.reset(); - replica_type_ = common::REPLICA_TYPE_MAX; -} - -int ObGetMemberListAndLeaderResult::assign(const ObGetMemberListAndLeaderResult &other) -{ - int ret = OB_SUCCESS; - reset(); - if (OB_UNLIKELY(!other.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(other)); - } else if (OB_FAIL(member_list_.assign(other.member_list_))) { - LOG_WARN("failed to assign member list", K(ret)); - } else if (OB_FAIL(lower_list_.assign(other.lower_list_))) { - LOG_WARN("fail to assign member list", K(ret)); - } else { - leader_ = other.leader_; - self_ = other.self_; - replica_type_ = other.replica_type_; - property_ = other.property_; - } - return ret; -} - OB_SERIALIZE_MEMBER(ObBatchGetRoleResult, results_); void ObBatchGetRoleResult::reset() @@ -7580,7 +7488,7 @@ bool TenantServerUnitConfig::is_valid() const return common::OB_INVALID_ID != tenant_id_ && ((lib::Worker::CompatMode::INVALID != compat_mode_ && unit_config_.is_valid() - && replica_type_ != common::ObReplicaType::REPLICA_TYPE_MAX) + && replica_type_ != common::ObReplicaType::REPLICA_TYPE_INVALID) #ifdef OB_BUILD_TDE_SECURITY // root_key can be invalid #endif @@ -7648,7 +7556,7 @@ void TenantServerUnitConfig::reset() unit_id_ = OB_INVALID_ID; compat_mode_ = lib::Worker::CompatMode::INVALID; unit_config_.reset(); - replica_type_ = common::ObReplicaType::REPLICA_TYPE_MAX; + replica_type_ = common::ObReplicaType::REPLICA_TYPE_INVALID; if_not_grant_ = false; is_delete_ = false; #ifdef OB_BUILD_TDE_SECURITY @@ -8909,7 +8817,7 @@ bool ObCreateLSArg::is_valid() const { return OB_INVALID_TENANT_ID != tenant_id_ && id_.is_valid() - && REPLICA_TYPE_MAX != replica_type_ + && ObReplicaTypeCheck::is_replica_type_valid(replica_type_) && replica_property_.is_valid() && tenant_info_.is_valid() && create_scn_.is_valid() @@ -8921,7 +8829,7 @@ void ObCreateLSArg::reset() { tenant_id_ = OB_INVALID_TENANT_ID; id_.reset(); - replica_type_ = REPLICA_TYPE_MAX; + replica_type_ = REPLICA_TYPE_INVALID; replica_property_.reset(); tenant_info_.reset(); create_scn_.reset(); @@ -8960,7 +8868,7 @@ int ObCreateLSArg::init(const int64_t tenant_id, int ret = OB_SUCCESS; if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id ||!id.is_valid() - || REPLICA_TYPE_MAX == replica_type + || !ObReplicaTypeCheck::is_replica_type_valid(replica_type) || !replica_property.is_valid() || !tenant_info.is_valid())) { ret = OB_INVALID_ARGUMENT; diff --git a/src/share/ob_rpc_struct.h b/src/share/ob_rpc_struct.h index 94b70c546..a9ce8dee4 100644 --- a/src/share/ob_rpc_struct.h +++ b/src/share/ob_rpc_struct.h @@ -3414,7 +3414,7 @@ public: CREATE_WITH_PALF, }; ObCreateLSArg() : tenant_id_(OB_INVALID_TENANT_ID), id_(), - replica_type_(REPLICA_TYPE_MAX), + replica_type_(REPLICA_TYPE_INVALID), replica_property_(), tenant_info_(), create_scn_(), compat_mode_(lib::Worker::CompatMode::INVALID), @@ -4339,7 +4339,7 @@ public: : ls_id_(), server_addr_(), destination_addr_(), - replica_type_(common::REPLICA_TYPE_MAX), + replica_type_(common::REPLICA_TYPE_INVALID), tenant_id_(OB_INVALID_TENANT_ID), task_id_(), data_source_(), @@ -4411,7 +4411,7 @@ private: bool is_add_valid_() const { return ls_id_.is_valid() && server_addr_.is_valid() - && REPLICA_TYPE_MAX != replica_type_ + && ObReplicaTypeCheck::is_replica_type_valid(replica_type_) && is_valid_tenant_id(tenant_id_) && paxos_replica_num_ >= 0; } @@ -4430,7 +4430,7 @@ private: bool is_modify_replica_valid_() const { return ls_id_.is_valid() && server_addr_.is_valid() - && REPLICA_TYPE_MAX != replica_type_ + && ObReplicaTypeCheck::is_replica_type_valid(replica_type_) && is_valid_tenant_id(tenant_id_) && paxos_replica_num_ >= 0; } @@ -6918,65 +6918,6 @@ public: int64_t dangling_count_; }; -struct ObGetMemberListAndLeaderResult final -{ - OB_UNIS_VERSION(1); -public: - ObGetMemberListAndLeaderResult() - : member_list_(), - leader_(), - self_(), - lower_list_(), - replica_type_(common::REPLICA_TYPE_MAX), - property_() {} - void reset(); - inline bool is_valid() const { - return member_list_.count() > 0 - && self_.is_valid() - && common::REPLICA_TYPE_MAX != replica_type_ - && property_.is_valid(); - } - - int assign(const ObGetMemberListAndLeaderResult &other); - TO_STRING_KV(K_(member_list), K_(leader), K_(self), K_(lower_list), K_(replica_type), K_(property)); - - common::ObSEArray member_list_; // copy won't fail - common::ObAddr leader_; - common::ObAddr self_; - common::ObSEArray lower_list_; //Cascaded downstream information - common::ObReplicaType replica_type_; //The type of copy actually stored in the local copy - common::ObReplicaProperty property_; -}; - -struct ObMemberListAndLeaderArg -{ - OB_UNIS_VERSION(1); -public: - ObMemberListAndLeaderArg() - : member_list_(), - leader_(), - self_(), - lower_list_(), - replica_type_(common::REPLICA_TYPE_MAX), - property_(), - role_(common::INVALID_ROLE) {} - void reset(); - bool is_valid() const; - bool check_leader_is_valid() const; - int assign(const ObMemberListAndLeaderArg &other); - TO_STRING_KV(K_(member_list), K_(leader), K_(self), K_(lower_list), - K_(replica_type), K_(property), K_(role)); - - common::ObSArray member_list_; // copy won't fail - common::ObAddr leader_; - common::ObAddr self_; - common::ObSArray lower_list_; //Cascaded downstream information - common::ObReplicaType replica_type_; //The type of copy actually stored in the local copy - common::ObReplicaProperty property_; - common::ObRole role_; -}; - struct ObBatchGetRoleResult { OB_UNIS_VERSION(1); @@ -9917,7 +9858,7 @@ public: unit_id_(common::OB_INVALID_ID), compat_mode_(lib::Worker::CompatMode::INVALID), unit_config_(), - replica_type_(common::ObReplicaType::REPLICA_TYPE_MAX), + replica_type_(common::ObReplicaType::REPLICA_TYPE_INVALID), if_not_grant_(false), is_delete_(false) #ifdef OB_BUILD_TDE_SECURITY diff --git a/src/share/ob_share_util.cpp b/src/share/ob_share_util.cpp index 8b18a8650..42e1c387a 100644 --- a/src/share/ob_share_util.cpp +++ b/src/share/ob_share_util.cpp @@ -186,30 +186,8 @@ int ObShareUtil::check_compat_version_for_arbitration_service( const uint64_t tenant_id, bool &is_compatible) { - int ret = OB_SUCCESS; - is_compatible = false; - uint64_t data_version = 0; - if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(tenant_id)); - } else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, data_version))) { - LOG_WARN("fail to get sys tenant data version", KR(ret)); - } else if (DATA_VERSION_4_1_0_0 > data_version) { - is_compatible = false; - } else if (!is_sys_tenant(tenant_id) - && OB_FAIL(GET_MIN_DATA_VERSION(gen_user_tenant_id(tenant_id), data_version))) { - LOG_WARN("fail to get user tenant data version", KR(ret), "tenant_id", gen_user_tenant_id(tenant_id)); - } else if (!is_sys_tenant(tenant_id) && DATA_VERSION_4_1_0_0 > data_version) { - is_compatible = false; - } else if (!is_sys_tenant(tenant_id) - && OB_FAIL(GET_MIN_DATA_VERSION(gen_meta_tenant_id(tenant_id), data_version))) { - LOG_WARN("fail to get meta tenant data version", KR(ret), "tenant_id", gen_meta_tenant_id(tenant_id)); - } else if (!is_sys_tenant(tenant_id) && DATA_VERSION_4_1_0_0 > data_version) { - is_compatible = false; - } else { - is_compatible = true; - } - return ret; + return check_compat_data_version_(DATA_VERSION_4_1_0_0, true/*check_meta*/, true/*check_user*/, + tenant_id, is_compatible); } int ObShareUtil::generate_arb_replica_num( @@ -239,25 +217,16 @@ int ObShareUtil::check_compat_version_for_readonly_replica( const uint64_t tenant_id, bool &is_compatible) { - int ret = OB_SUCCESS; - uint64_t data_version = 0; - is_compatible = false; - if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(tenant_id)); - } else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, data_version))) { - LOG_WARN("fail to get sys tenant data version", KR(ret)); - } else if (DATA_VERSION_4_2_0_0 > data_version) { - is_compatible = false; - } else if (!is_sys_tenant(tenant_id) - && OB_FAIL(GET_MIN_DATA_VERSION(gen_meta_tenant_id(tenant_id), data_version))) { - LOG_WARN("fail to get meta tenant data version", KR(ret), "tenant_id", gen_meta_tenant_id(tenant_id)); - } else if (!is_sys_tenant(tenant_id) && DATA_VERSION_4_2_0_0 > data_version) { - is_compatible = false; - } else { - is_compatible = true; - } - return ret; + return check_compat_data_version_(DATA_VERSION_4_2_0_0, true/*check_meta*/, false/*check_user*/, + tenant_id, is_compatible); +} + +int ObShareUtil::check_compat_version_for_columnstore_replica( + const uint64_t tenant_id, + bool &is_compatible) +{ + return check_compat_data_version_(DATA_VERSION_4_3_3_0, true/*check_meta*/, false/*check_user*/, + tenant_id, is_compatible); } int ObShareUtil::fetch_current_cluster_version( @@ -450,34 +419,42 @@ bool ObShareUtil::is_tenant_enable_transfer(const uint64_t tenant_id) return bret; } -int ObShareUtil::check_compat_version_for_tenant( - const uint64_t tenant_id, - const uint64_t target_data_version, - bool &is_compatible) + +int ObShareUtil::check_compat_data_version_( + const uint64_t required_data_version, + const bool check_meta_tenant, + const bool check_user_tenant, + const uint64_t tenant_id, + bool &is_compatible) { int ret = OB_SUCCESS; - is_compatible = false; + is_compatible = true; uint64_t data_version = 0; if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) - || OB_UNLIKELY(0 == target_data_version)) { + || OB_UNLIKELY(0 == required_data_version)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(target_data_version)); + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(required_data_version)); } else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, data_version))) { LOG_WARN("fail to get sys tenant data version", KR(ret)); - } else if (target_data_version > data_version) { + } else if (required_data_version > data_version) { is_compatible = false; - } else if (is_sys_tenant(tenant_id)) { - is_compatible = true; - } else if (OB_FAIL(GET_MIN_DATA_VERSION(gen_user_tenant_id(tenant_id), data_version))) { - LOG_WARN("fail to get user tenant data version", KR(ret), "tenant_id", gen_user_tenant_id(tenant_id)); - } else if (target_data_version > data_version) { - is_compatible = false; - } else if (OB_FAIL(GET_MIN_DATA_VERSION(gen_meta_tenant_id(tenant_id), data_version))) { - LOG_WARN("fail to get meta tenant data version", KR(ret), "tenant_id", gen_meta_tenant_id(tenant_id)); - } else if (target_data_version > data_version) { - is_compatible = false; - } else { - is_compatible = true; + } else if (!is_sys_tenant(tenant_id)) { + if (check_meta_tenant) { + if (OB_FAIL(GET_MIN_DATA_VERSION(gen_meta_tenant_id(tenant_id), data_version))) { + LOG_WARN("fail to get meta tenant data version", KR(ret), "tenant_id", gen_meta_tenant_id(tenant_id)); + } else if (required_data_version > data_version) { + is_compatible = false; + } + } + if (OB_FAIL(ret) || !is_compatible) { + // skip + } else if (check_user_tenant) { + if (OB_FAIL(GET_MIN_DATA_VERSION(gen_user_tenant_id(tenant_id), data_version))) { + LOG_WARN("fail to get user tenant data version", KR(ret), "tenant_id", gen_user_tenant_id(tenant_id)); + } else if (required_data_version > data_version) { + is_compatible = false; + } + } } return ret; } @@ -492,8 +469,8 @@ int ObShareUtil::check_compat_version_for_clone_standby_tenant( if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id)); - } else if (OB_FAIL(check_compat_version_for_tenant( - tenant_id, target_data_version, is_compatible))) { + } else if (OB_FAIL(check_compat_data_version_(target_data_version, + true/*check_meta*/, true/*check_user*/, tenant_id, is_compatible))) { LOG_WARN("fail to check data version for clone tenant", KR(ret), K(tenant_id), K(target_data_version)); } @@ -510,8 +487,8 @@ int ObShareUtil::check_compat_version_for_clone_tenant( if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id)); - } else if (OB_FAIL(check_compat_version_for_tenant( - tenant_id, target_data_version, is_compatible))) { + } else if (OB_FAIL(check_compat_data_version_(target_data_version, + true/*check_meta*/, true/*check_user*/, tenant_id, is_compatible))) { LOG_WARN("fail to check data version for clone tenant", KR(ret), K(tenant_id), K(target_data_version)); } @@ -551,5 +528,77 @@ int ObShareUtil::check_compat_version_for_clone_tenant_with_tenant_role( return ret; } +const char *ObShareUtil::replica_type_to_string(const ObReplicaType type) +{ + const char *str = NULL; + switch (type) { + case ObReplicaType::REPLICA_TYPE_FULL: { + str = FULL_REPLICA_STR; + break; + } + case ObReplicaType::REPLICA_TYPE_BACKUP: { + str = BACKUP_REPLICA_STR; + break; + } + case ObReplicaType::REPLICA_TYPE_LOGONLY: { + str = LOGONLY_REPLICA_STR; + break; + } + case ObReplicaType::REPLICA_TYPE_READONLY: { + str = READONLY_REPLICA_STR; + break; + } + case ObReplicaType::REPLICA_TYPE_MEMONLY: { + str = MEMONLY_REPLICA_STR; + break; + } + case ObReplicaType::REPLICA_TYPE_ENCRYPTION_LOGONLY: { + str = ENCRYPTION_LOGONLY_REPLICA_STR; + break; + } + case ObReplicaType::REPLICA_TYPE_COLUMNSTORE: { + str = COLUMNSTORE_REPLICA_STR; + break; + } + default: { + str = "INVALID"; + break; + } + } + return str; +} + +// retrun REPLICA_TYPE_INVALID if str is invaild +ObReplicaType ObShareUtil::string_to_replica_type(const char *str) +{ + return string_to_replica_type(ObString(str)); +} + +// retrun REPLICA_TYPE_INVALID if str is invaild +ObReplicaType ObShareUtil::string_to_replica_type(const ObString &str) +{ + ObReplicaType replica_type = REPLICA_TYPE_INVALID; + if (OB_UNLIKELY(str.empty())) { + replica_type = REPLICA_TYPE_INVALID; + } else if (0 == str.case_compare(FULL_REPLICA_STR) || 0 == str.case_compare(F_REPLICA_STR)) { + replica_type = REPLICA_TYPE_FULL; + } else if (0 == str.case_compare(READONLY_REPLICA_STR) || 0 == str.case_compare(R_REPLICA_STR)) { + replica_type = REPLICA_TYPE_READONLY; + } else if (0 == str.case_compare(COLUMNSTORE_REPLICA_STR) || 0 == str.case_compare(C_REPLICA_STR)) { + replica_type = REPLICA_TYPE_COLUMNSTORE; + } else if (0 == str.case_compare(LOGONLY_REPLICA_STR) || 0 == str.case_compare(L_REPLICA_STR)) { + replica_type = REPLICA_TYPE_LOGONLY; + } else if (0 == str.case_compare(ENCRYPTION_LOGONLY_REPLICA_STR) || 0 == str.case_compare(E_REPLICA_STR)) { + replica_type = REPLICA_TYPE_ENCRYPTION_LOGONLY; + } else if (0 == str.case_compare(BACKUP_REPLICA_STR) || 0 == str.case_compare(B_REPLICA_STR)) { + replica_type = REPLICA_TYPE_BACKUP; + } else if (0 == str.case_compare(MEMONLY_REPLICA_STR) || 0 == str.case_compare(M_REPLICA_STR)) { + replica_type = REPLICA_TYPE_MEMONLY; + } else { + replica_type = REPLICA_TYPE_INVALID; + } + return replica_type; +} + } //end namespace share } //end namespace oceanbase diff --git a/src/share/ob_share_util.h b/src/share/ob_share_util.h index 2294746ef..9f3ac170c 100644 --- a/src/share/ob_share_util.h +++ b/src/share/ob_share_util.h @@ -78,14 +78,6 @@ public: static int check_compat_version_for_arbitration_service( const uint64_t tenant_id, bool &is_compatible); - // check whether sys/meta/user tenant has been promoted to target data version - // params[in] tenant_id, which tenant to check - // params[in] target_data_version, data version to check - // params[out] is_compatible, whether tenants are promoted to target data version - static int check_compat_version_for_tenant( - const uint64_t tenant_id, - const uint64_t target_data_version, - bool &is_compatible); // tenant data version should up to 430 when cloning primary tenant // tenant data version should up to 432 when cloning standby tenant // params[in] tenant_id, which tenant to check @@ -122,6 +114,13 @@ public: const uint64_t tenant_id, bool &is_compatible); + // data version must up to 4.3.2 with column-store replica + // @params[in] tenant_id, which tenant to check + // @params[out] is_compatible, whether it is over 4.3.2 + static int check_compat_version_for_columnstore_replica( + const uint64_t tenant_id, + bool &is_compatible); + static int fetch_current_cluster_version( common::ObISQLClient &client, uint64_t &cluster_version); @@ -148,6 +147,16 @@ public: SCN &ora_rowscn); static bool is_tenant_enable_rebalance(const uint64_t tenant_id); static bool is_tenant_enable_transfer(const uint64_t tenant_id); + static const char *replica_type_to_string(const ObReplicaType type); + static ObReplicaType string_to_replica_type(const char *str); + static ObReplicaType string_to_replica_type(const ObString &str); +private: + static int check_compat_data_version_( + const uint64_t required_data_version, + const bool check_meta_tenant, + const bool check_user_tenant, + const uint64_t tenant_id, + bool &is_compatible); }; }//end namespace share }//end namespace oceanbase diff --git a/src/share/ob_tablet_replica_checksum_operator.cpp b/src/share/ob_tablet_replica_checksum_operator.cpp index e0ccaf6e5..bf845391a 100644 --- a/src/share/ob_tablet_replica_checksum_operator.cpp +++ b/src/share/ob_tablet_replica_checksum_operator.cpp @@ -311,6 +311,20 @@ int ObTabletReplicaChecksumItem::verify_checksum(const ObTabletReplicaChecksumIt return ret; } +int ObTabletReplicaChecksumItem::verify_column_checksum(const ObTabletReplicaChecksumItem &other) const +{ + int ret = OB_SUCCESS; + if (compaction_scn_ == other.compaction_scn_) { + bool column_meta_equal = false; + if (OB_FAIL(column_meta_.check_equal(other.column_meta_, column_meta_equal))) { + LOG_WARN("fail to check column meta equal", KR(ret), K(other), K(*this)); + } else if (!column_meta_equal) { + ret = OB_CHECKSUM_ERROR; + } + } + return ret; +} + int ObTabletReplicaChecksumItem::assign_key(const ObTabletReplicaChecksumItem &other) { int ret = OB_SUCCESS; @@ -901,5 +915,45 @@ int ObTabletReplicaChecksumOperator::get_hex_column_meta( return ret; } +// ----------------------- ObTabletDataChecksumChecker ----------------------- +ObTabletDataChecksumChecker::ObTabletDataChecksumChecker() + : normal_ckm_item_(nullptr), + cs_replica_ckm_item_(nullptr) +{} + +ObTabletDataChecksumChecker::~ObTabletDataChecksumChecker() +{ + reset(); +} + +void ObTabletDataChecksumChecker::reset() +{ + normal_ckm_item_ = nullptr; + cs_replica_ckm_item_ = nullptr; +} + +int ObTabletDataChecksumChecker::check_data_checksum(const ObTabletReplicaChecksumItem& curr_item, bool is_cs_replica) +{ + int ret = OB_SUCCESS; + if (is_cs_replica) { + if (OB_ISNULL(cs_replica_ckm_item_)) { + cs_replica_ckm_item_ = &curr_item; + } else if (cs_replica_ckm_item_->compaction_scn_ != curr_item.compaction_scn_) { + LOG_INFO("no need to check data checksum", K(curr_item), KPC(this)); + } else if (cs_replica_ckm_item_->data_checksum_ != curr_item.data_checksum_) { + ret = OB_CHECKSUM_ERROR; + } + } else { + if (OB_ISNULL(normal_ckm_item_)) { + normal_ckm_item_ = &curr_item; + } else if (normal_ckm_item_->compaction_scn_ != curr_item.compaction_scn_) { + LOG_INFO("no need to check data checksum", K(curr_item), KPC(this)); + } else if (normal_ckm_item_->data_checksum_ != curr_item.data_checksum_) { + ret = OB_CHECKSUM_ERROR; + } + } + return ret; +} + } // share } // oceanbase diff --git a/src/share/ob_tablet_replica_checksum_operator.h b/src/share/ob_tablet_replica_checksum_operator.h index 3153fd886..b918aafd1 100644 --- a/src/share/ob_tablet_replica_checksum_operator.h +++ b/src/share/ob_tablet_replica_checksum_operator.h @@ -83,6 +83,7 @@ public: bool is_valid() const; bool is_same_tablet(const ObTabletReplicaChecksumItem &other) const; int verify_checksum(const ObTabletReplicaChecksumItem &other) const; + int verify_column_checksum(const ObTabletReplicaChecksumItem &other) const; int assign_key(const ObTabletReplicaChecksumItem &other); int assign(const ObTabletReplicaChecksumItem &other); int set_tenant_id(const uint64_t tenant_id); @@ -270,6 +271,18 @@ int ObTabletReplicaChecksumOperator::construct_batch_get_sql_str_( return ret; } +class ObTabletDataChecksumChecker +{ +public: + ObTabletDataChecksumChecker(); + ~ObTabletDataChecksumChecker(); + void reset(); + int check_data_checksum(const ObTabletReplicaChecksumItem& curr_item, bool is_cs_replica); + TO_STRING_KV(KPC_(normal_ckm_item), KPC_(cs_replica_ckm_item)); +private: + const ObTabletReplicaChecksumItem *normal_ckm_item_; + const ObTabletReplicaChecksumItem *cs_replica_ckm_item_; +}; } // share } // oceanbase diff --git a/src/share/scheduler/ob_dag_scheduler_config.h b/src/share/scheduler/ob_dag_scheduler_config.h index d28fee492..f6a685f03 100644 --- a/src/share/scheduler/ob_dag_scheduler_config.h +++ b/src/share/scheduler/ob_dag_scheduler_config.h @@ -101,6 +101,8 @@ DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_START_PREPARE_MIGRATION, ObDagPrio::DAG_PRIO true, 3, {"tenant_id", "ls_id", "op_type"}) DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_FINISH_PREPARE_MIGRATION, ObDagPrio::DAG_PRIO_HA_HIGH, ObSysTaskType::MIGRATION_TASK, "FINISH_PREPARE_MIGRATION", "MIGRATE", true, 3, {"tenant_id", "ls_id", "op_type"}) +DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_TABLET_CHECK_CONVERT, ObDagPrio::DAG_PRIO_HA_HIGH, ObSysTaskType::MIGRATION_TASK, "TABLET_CHECKE_CONVERT", "MIGRATE", + true, 3, {"tenant_id", "ls_id", "op_type"}) // DAG_TYPE_MIGRATE END DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_FAST_MIGRATE, ObDagPrio::DAG_PRIO_HA_MID, ObSysTaskType::MIGRATION_TASK, "FAST_MIGRATE", "MIGRATE", false, 0, {}) diff --git a/src/share/scheduler/ob_tenant_dag_scheduler.h b/src/share/scheduler/ob_tenant_dag_scheduler.h index 4b0bf4984..342df8cd0 100644 --- a/src/share/scheduler/ob_tenant_dag_scheduler.h +++ b/src/share/scheduler/ob_tenant_dag_scheduler.h @@ -226,6 +226,7 @@ public: TASK_TYPE_START_REBUILD_TABLET_TASK = 70, TASK_TYPE_TABLET_REBUILD_TASK = 71, TASK_TYPE_FINISH_REBUILD_TABLET_TASK = 72, + TASK_TYPE_CHECK_CONVERT_TABLET = 73, TASK_TYPE_MAX, }; @@ -599,6 +600,7 @@ public: void init_dag_id(); int set_dag_id(const ObDagId &dag_net_id); const ObDagId &get_dag_id() const { return dag_net_id_; } + void set_dag_net_id(const ObDagId &dag_net_id) { dag_net_id_ = dag_net_id; } void set_add_time() { add_time_ = ObTimeUtility::fast_current_time(); } int64_t get_add_time() const { return add_time_; } void set_start_time() { start_time_ = ObTimeUtility::fast_current_time(); } diff --git a/src/share/schema/ob_schema_struct.cpp b/src/share/schema/ob_schema_struct.cpp index f74da18a6..582d212c8 100644 --- a/src/share/schema/ob_schema_struct.cpp +++ b/src/share/schema/ob_schema_struct.cpp @@ -1835,6 +1835,12 @@ void ObTenantSchema::reset_zone_replica_attr_array() free(encryption_logonly_attr_set.get_base_address()); encryption_logonly_attr_set.reset(); } + SchemaReplicaAttrArray &columnstore_attr_set + = static_cast(zone_locality.replica_attr_set_.get_columnstore_replica_attr_array()); + if (nullptr != columnstore_attr_set.get_base_address()) { + free(columnstore_attr_set.get_base_address()); + columnstore_attr_set.reset(); + } } free(zone_replica_attr_array_.get_base_address()); zone_replica_attr_array_.reset(); @@ -1906,6 +1912,10 @@ int ObTenantSchema::set_zone_replica_attr_array( static_cast(this_schema_set->replica_attr_set_.get_encryption_logonly_replica_attr_array()), src_replica_attr_set.replica_attr_set_.get_encryption_logonly_replica_attr_array()))) { LOG_WARN("fail to set specific replica attr array", K(ret)); + } else if (OB_FAIL(set_specific_replica_attr_array( + static_cast(this_schema_set->replica_attr_set_.get_columnstore_replica_attr_array()), + src_replica_attr_set.replica_attr_set_.get_columnstore_replica_attr_array()))) { + LOG_WARN("fail to set specific replica attr array", K(ret)); } else if (OB_FAIL(deep_copy_string_array(src_replica_attr_set.zone_set_, this_schema_set->zone_set_))) { LOG_WARN("fail to copy schema replica attr set zone set", K(ret)); } else { @@ -1952,6 +1962,10 @@ int ObTenantSchema::set_zone_replica_attr_array( static_cast(this_schema_set->replica_attr_set_.get_encryption_logonly_replica_attr_array()), src_replica_attr_set.replica_attr_set_.get_encryption_logonly_replica_attr_array()))) { LOG_WARN("fail to set specific replica attr array", K(ret)); + } else if (OB_FAIL(set_specific_replica_attr_array( + static_cast(this_schema_set->replica_attr_set_.get_columnstore_replica_attr_array()), + src_replica_attr_set.replica_attr_set_.get_columnstore_replica_attr_array()))) { + LOG_WARN("fail to set specific replica attr array", K(ret)); } else { common::ObArray zone_set_ptrs; for (int64_t j = 0; OB_SUCC(ret) && j < src_replica_attr_set.zone_set_.count(); ++j) { @@ -2635,267 +2649,6 @@ int ObDatabaseSchema::get_primary_zone_inherit( } return ret; } -/*------------------------------------------------------------------------------------------------- - * ------------------------------ObLocality------------------------------------------- - ----------------------------------------------------------------------------------------------------*/ -void ObLocality::reset_zone_replica_attr_array() -{ - if (NULL != schema_ && NULL != zone_replica_attr_array_.get_base_address()) { - for (int64_t i = 0; i < zone_replica_attr_array_.count(); ++i) { - SchemaZoneReplicaAttrSet &zone_locality = zone_replica_attr_array_.at(i); - schema_->reset_string_array(zone_locality.zone_set_); - SchemaReplicaAttrArray &full_attr_set - = static_cast(zone_locality.replica_attr_set_.get_full_replica_attr_array()); - if (nullptr != full_attr_set.get_base_address()) { - schema_->free(full_attr_set.get_base_address()); - full_attr_set.reset(); - } - SchemaReplicaAttrArray &logonly_attr_set - = static_cast(zone_locality.replica_attr_set_.get_logonly_replica_attr_array()); - if (nullptr != logonly_attr_set.get_base_address()) { - schema_->free(logonly_attr_set.get_base_address()); - logonly_attr_set.reset(); - } - SchemaReplicaAttrArray &readonly_attr_set - = static_cast(zone_locality.replica_attr_set_.get_readonly_replica_attr_array()); - if (nullptr != readonly_attr_set.get_base_address()) { - schema_->free(readonly_attr_set.get_base_address()); - readonly_attr_set.reset(); - } - SchemaReplicaAttrArray &encryption_logonly_attr_set - = static_cast(zone_locality.replica_attr_set_.get_encryption_logonly_replica_attr_array()); - if (nullptr != encryption_logonly_attr_set.get_base_address()) { - schema_->free(encryption_logonly_attr_set.get_base_address()); - encryption_logonly_attr_set.reset(); - } - } - schema_->free(zone_replica_attr_array_.get_base_address()); - zone_replica_attr_array_.reset(); - } -} - -int ObLocality::set_specific_replica_attr_array( - SchemaReplicaAttrArray &this_schema_set, - const common::ObIArray &src) -{ - int ret = OB_SUCCESS; - const int64_t count = src.count(); - if (count > 0) { - const int64_t size = count * static_cast(sizeof(share::ReplicaAttr)); - void *ptr = nullptr; - if (nullptr == (ptr = schema_->alloc(size))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc failed", K(ret), K(size)); - } else if (FALSE_IT(this_schema_set.init(count, static_cast(ptr), count))) { - // shall never by here - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < src.count(); ++i) { - const share::ReplicaAttr &src_replica_attr = src.at(i); - ReplicaAttr *dst_replica_attr = &this_schema_set.at(i); - if (nullptr == (dst_replica_attr = new (dst_replica_attr) ReplicaAttr( - src_replica_attr.num_, src_replica_attr.memstore_percent_))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("placement new return nullptr", K(ret)); - } - } - } - } - return ret; -} - -int ObLocality::set_zone_replica_attr_array(const common::ObIArray &src) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(schema_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(schema_)); - } else { - reset_zone_replica_attr_array(); - const int64_t alloc_size = src.count() * static_cast(sizeof(SchemaZoneReplicaAttrSet)); - void *buf = NULL; - if (src.count() <= 0) { - // do nothing - } else if (NULL == (buf = schema_->alloc(alloc_size))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc failed", K(ret), K(alloc_size)); - } else { - zone_replica_attr_array_.init(src.count(), static_cast(buf), src.count()); - // call construct func in advance to avoid core status - // - ARRAY_NEW_CONSTRUCT(SchemaZoneReplicaAttrSet, zone_replica_attr_array_); - for (int64_t i = 0; i < src.count() && OB_SUCC(ret); ++i) { - const SchemaZoneReplicaAttrSet &src_replica_attr_set = src.at(i); - SchemaZoneReplicaAttrSet *this_schema_set = &zone_replica_attr_array_.at(i); - if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_full_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_full_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_logonly_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_logonly_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_readonly_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_readonly_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_encryption_logonly_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_encryption_logonly_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(schema_->deep_copy_string_array( - src_replica_attr_set.zone_set_, this_schema_set->zone_set_))) { - LOG_WARN("fail to copy schema replica attr set zone set", K(ret)); - } else { - this_schema_set->zone_ = src_replica_attr_set.zone_; - } - } - } - } - return ret; -} - -int ObLocality::set_zone_replica_attr_array(const common::ObIArray &src) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(schema_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(schema_)); - } else { - reset_zone_replica_attr_array(); - const int64_t alloc_size = src.count() * static_cast(sizeof(SchemaZoneReplicaAttrSet)); - void *buf = NULL; - if (src.count() <= 0) { - // do nothing - } else if (NULL == (buf = schema_->alloc(alloc_size))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc failed", K(ret), K(alloc_size)); - } else { - zone_replica_attr_array_.init(src.count(), static_cast(buf), src.count()); - // call construct func in advance to avoid core status - // - ARRAY_NEW_CONSTRUCT(SchemaZoneReplicaAttrSet, zone_replica_attr_array_); - for (int64_t i = 0; i < src.count() && OB_SUCC(ret); ++i) { - const share::ObZoneReplicaAttrSet &src_replica_attr_set = src.at(i); - SchemaZoneReplicaAttrSet *this_schema_set = &zone_replica_attr_array_.at(i); - if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_full_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_full_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_logonly_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_logonly_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_readonly_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_readonly_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else if (OB_FAIL(set_specific_replica_attr_array( - static_cast(this_schema_set->replica_attr_set_.get_encryption_logonly_replica_attr_array()), - src_replica_attr_set.replica_attr_set_.get_encryption_logonly_replica_attr_array()))) { - LOG_WARN("fail to set specific replica attr array", K(ret)); - } else { - common::ObArray zone_set_ptrs; - for (int64_t j = 0; OB_SUCC(ret) && j < src_replica_attr_set.zone_set_.count(); ++j) { - const common::ObZone &zone = src_replica_attr_set.zone_set_.at(j); - if (OB_FAIL(zone_set_ptrs.push_back(common::ObString(zone.size(), zone.ptr())))) { - LOG_WARN("fail to push back", K(ret)); - } else {} // no more to do - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(schema_->deep_copy_string_array(zone_set_ptrs, this_schema_set->zone_set_))) { - LOG_WARN("fail to copy schema replica attr set zone set", K(ret)); - } else { - this_schema_set->zone_ = src_replica_attr_set.zone_; - } - } - } - } - } - return ret; -} - -int ObLocality::assign(const ObLocality &other) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(schema_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(schema_)); - } else if (OB_FAIL(schema_->deep_copy_str(other.locality_str_, locality_str_))) { - LOG_WARN("fail to assign locality info", K(ret)); - } else if (OB_FAIL(set_zone_replica_attr_array(other.zone_replica_attr_array_))) { - LOG_WARN("set zone replica attr array failed", K(ret)); - } - return ret; -} - -int ObLocality::set_locality_str(const ObString &other) -{ - int ret = OB_SUCCESS; - if (OB_ISNULL(schema_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(schema_)); - } else if (OB_FAIL(schema_->deep_copy_str(other, locality_str_))) { - LOG_WARN("fail to assign locality info", K(ret)); - } - return ret; -} - -int64_t ObLocality::get_convert_size() const -{ - int64_t convert_size = sizeof(*this); - convert_size += zone_replica_attr_array_.count() * static_cast(sizeof(SchemaZoneReplicaAttrSet)); - for (int64_t i = 0; i < zone_replica_attr_array_.count(); ++i) { - convert_size += zone_replica_attr_array_.at(i).get_convert_size(); - } - convert_size += locality_str_.length() + 1; - return convert_size; -} - -void ObLocality::reset() -{ - if (OB_ISNULL(schema_)) { - LOG_ERROR_RET(OB_ERR_UNEXPECTED, "invalid schema info", K(schema_)); - } else { - reset_zone_replica_attr_array(); - if (!OB_ISNULL(locality_str_.ptr())) { - schema_->free(locality_str_.ptr()); - } - locality_str_.reset(); - } -} - -OB_DEF_SERIALIZE(ObLocality) -{ - int ret = OB_SUCCESS; - LST_DO_CODE(OB_UNIS_ENCODE, locality_str_); - if (OB_FAIL(ret)) { - LOG_WARN("func_SERIALIZE failed", K(ret)); - } else {} // no more to do - return ret; -} - -OB_DEF_DESERIALIZE(ObLocality) -{ - int ret = OB_SUCCESS; - ObString locality; - LST_DO_CODE(OB_UNIS_DECODE, locality); - if (OB_FAIL(ret)) { - } else if (OB_ISNULL(schema_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get invalid schema_ info", K(ret), K(schema_)); - } else if (OB_FAIL(schema_->deep_copy_str(locality, locality_str_))) { - LOG_WARN("fail to deep copy str", K(ret)); - } - return ret; -} - -OB_DEF_SERIALIZE_SIZE(ObLocality) -{ - int64_t len = 0; - LST_DO_CODE(OB_UNIS_ADD_LEN, locality_str_); - return len; -} - /*------------------------------------------------------------------------------------------------- * ------------------------------ObPrimaryZone------------------------------------------- diff --git a/src/share/schema/ob_schema_struct.h b/src/share/schema/ob_schema_struct.h index d9cc7b028..ec720ce5d 100755 --- a/src/share/schema/ob_schema_struct.h +++ b/src/share/schema/ob_schema_struct.h @@ -1413,7 +1413,6 @@ typedef common::ObArray ObPrimaryZoneArray; class ObSchema { public: - friend class ObLocality; friend class ObPrimaryZone; ObSchema(); //explicit ObSchema(common::ObDataBuffer &buffer); @@ -1530,31 +1529,6 @@ struct SchemaObj TO_STRING_KV(K_(schema_type), K_(tenant_id), K_(schema_id), KP_(schema)); }; -class ObLocality -{ - OB_UNIS_VERSION(1); -public: - explicit ObLocality(ObSchema *schema) : schema_(schema) {} - int assign(const ObLocality &other); - int set_locality_str(const common::ObString &locality); - int set_zone_replica_attr_array( - const common::ObIArray &src); - int set_zone_replica_attr_array( - const common::ObIArray &src); - int set_specific_replica_attr_array( - share::SchemaReplicaAttrArray &schema_replica_set, - const common::ObIArray &src); - void reset_zone_replica_attr_array(); - int64_t get_convert_size() const; - inline const common::ObString &get_locality_str() const { return locality_str_; } - void reset(); - TO_STRING_KV(K_(locality_str), K_(zone_replica_attr_array)); -public: - common::ObString locality_str_; - ZoneLocalityArray zone_replica_attr_array_; - ObSchema *schema_; -}; - class ObPrimaryZone { OB_UNIS_VERSION(1); diff --git a/src/share/schema/ob_table_dml_param.cpp b/src/share/schema/ob_table_dml_param.cpp index 954f0095c..df97f5f53 100644 --- a/src/share/schema/ob_table_dml_param.cpp +++ b/src/share/schema/ob_table_dml_param.cpp @@ -212,7 +212,7 @@ int ObTableSchemaParam::convert(const ObTableSchema *schema) if (OB_SUCC(ret)) { if (OB_FAIL(tmp_cols_index.push_back(col_index))) { LOG_WARN("fail to push_back col_index", K(ret)); - } else if (use_cs && OB_FAIL(schema->get_column_group_index(*column, cg_idx))) { + } else if (use_cs && OB_FAIL(schema->get_column_group_index(*column, false /*need_calculate_cg_idx*/, cg_idx))) { LOG_WARN("Fail to get column group index", K(ret)); } else if (use_cs && OB_FAIL(tmp_cg_idxs.push_back(cg_idx))) { LOG_WARN("Fail to push back cg idx", K(ret)); diff --git a/src/share/schema/ob_table_param.cpp b/src/share/schema/ob_table_param.cpp index ffedd9483..b05f36736 100644 --- a/src/share/schema/ob_table_param.cpp +++ b/src/share/schema/ob_table_param.cpp @@ -18,6 +18,7 @@ #include "observer/ob_server.h" #include "storage/ob_storage_schema.h" #include "storage/access/ob_table_read_info.h" +#include "storage/column_store/ob_column_store_replica_util.h" #include "share/ob_lob_access_utils.h" namespace oceanbase @@ -916,7 +917,8 @@ int ObTableParam::construct_columns_and_projector( const common::ObIArray & output_column_ids, const common::ObIArray *tsc_out_cols, const bool force_mysql_mode, - const sql::ObStoragePushdownFlag &pd_pushdown_flag) + const sql::ObStoragePushdownFlag &pd_pushdown_flag, + const bool query_cs_replica /*=false*/) { int ret = OB_SUCCESS; static const int64_t COMMON_COLUMN_NUM = 16; @@ -933,22 +935,19 @@ int ObTableParam::construct_columns_and_projector( bool is_cs = false; bool has_all_column_group = false; int64_t rowkey_count = 0; + is_column_replica_table_ = false; // row store table schema does not contains cg, if true, need calculate cg idx by designed rules - if (OB_SUCC(ret)) { - bool is_table_row_store = false; - if (OB_FAIL(table_schema.get_is_row_store(is_table_row_store))) { - LOG_WARN("fail to get is talbe row store", K(ret)); - } else { - is_cs = !is_table_row_store; - } + if (OB_FAIL(table_schema.get_is_column_store(is_cs))) { + LOG_WARN("fail to get is table column store", K(ret), K(table_schema)); + } else if (!is_cs && query_cs_replica) { + is_cs = true; + is_column_replica_table_ = true; } if (OB_FAIL(ret)) { } else if (OB_FAIL(table_schema.has_all_column_group(has_all_column_group))) { LOG_WARN("Failed to check if has all column group", K(ret)); - } - - if (OB_SUCC(ret)) { + } else { // column array const ObRowkeyInfo &rowkey_info = table_schema.get_rowkey_info(); rowkey_count = rowkey_info.get_size(); @@ -992,7 +991,7 @@ int ObTableParam::construct_columns_and_projector( } else if (OB_FAIL(tmp_access_cols_extend.push_back(tmp_col_extend))) { LOG_WARN("fail to push_back tmp_access_cols_extend", K(ret)); } else if (is_cs) { - if (OB_FAIL(table_schema.get_column_group_index(*column, cg_idx))) { + if (OB_FAIL(table_schema.get_column_group_index(*column, is_column_replica_table_, cg_idx))) { LOG_WARN("Fail to get column group index", K(ret)); } else if (OB_FAIL(tmp_cg_idxs.push_back(cg_idx))) { LOG_WARN("Fail to push back cg idx", K(ret)); @@ -1066,7 +1065,7 @@ int ObTableParam::construct_columns_and_projector( } else if (OB_FAIL(tmp_access_cols_extend.push_back(tmp_col_extend))) { LOG_WARN("fail to push_back tmp_access_cols_extend", K(ret)); } else if (is_cs) { - if (OB_FAIL(table_schema.get_column_group_index(*column, cg_idx))) { + if (OB_FAIL(table_schema.get_column_group_index(*column, is_column_replica_table_, cg_idx))) { LOG_WARN("Fail to get column group index", K(ret)); } else if (OB_FAIL(tmp_cg_idxs.push_back(cg_idx))) { LOG_WARN("Fail to push back cg idx", K(ret)); @@ -1248,7 +1247,8 @@ int ObTableParam::convert(const ObTableSchema &table_schema, const ObIArray &access_column_ids, const sql::ObStoragePushdownFlag &pd_pushdown_flag, const common::ObIArray *tsc_out_cols, - const bool force_mysql_mode) + const bool force_mysql_mode, + const bool query_cs_replica /*=false*/) { int ret = OB_SUCCESS; // if mocked rowid index is used @@ -1256,7 +1256,8 @@ int ObTableParam::convert(const ObTableSchema &table_schema, table_id_ = table_schema.get_table_id(); bool is_oracle_mode = false; const common::ObIArray *cols_param = nullptr; - if (OB_FAIL(construct_columns_and_projector(table_schema, access_column_ids, tsc_out_cols, force_mysql_mode, pd_pushdown_flag))) { + + if (OB_FAIL(construct_columns_and_projector(table_schema, access_column_ids, tsc_out_cols, force_mysql_mode, pd_pushdown_flag, query_cs_replica))) { LOG_WARN("construct failed", K(ret)); } else if (OB_ISNULL(cols_param = main_read_info_.get_columns())) { ret = OB_ERR_UNEXPECTED; @@ -1546,7 +1547,8 @@ int64_t ObTableParam::to_string(char *buf, const int64_t buf_len) const K_(rowid_projector), K_(enable_lob_locator_v2), K_(is_fts_index), - K_(parser_name)); + K_(parser_name), + K_(is_column_replica_table)); J_OBJ_END(); return pos; diff --git a/src/share/schema/ob_table_param.h b/src/share/schema/ob_table_param.h index 4d0f0931f..9a5378f81 100644 --- a/src/share/schema/ob_table_param.h +++ b/src/share/schema/ob_table_param.h @@ -292,7 +292,8 @@ public: const common::ObIArray &output_column_ids, const sql::ObStoragePushdownFlag &pd_pushdown_flag, const common::ObIArray *tsc_out_cols = NULL, - const bool force_mysql_mode = false); + const bool force_mysql_mode = false, + const bool query_cs_replica = false); // convert aggregate column projector from 'aggregate_column_ids' and 'output_projector_' // convert group by column projector from 'group_by_column_ids' and 'output_projector_' @@ -344,7 +345,8 @@ private: const common::ObIArray &output_column_ids, const common::ObIArray *tsc_out_cols, const bool force_mysql_mode, - const sql::ObStoragePushdownFlag &pd_pushdown_flag); + const sql::ObStoragePushdownFlag &pd_pushdown_flag, + const bool query_cs_replica = false); int filter_common_columns(const common::ObIArray &columns, common::ObIArray &new_columns); diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index 252524550..e43315e71 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -8958,14 +8958,17 @@ int ObTableSchema::is_column_group_exist(const ObString &cg_name, bool &exist) c return ret; } -int ObTableSchema::get_column_group_index(const share::schema::ObColumnParam ¶m, int32_t &cg_idx) const +int ObTableSchema::get_column_group_index( + const share::schema::ObColumnParam ¶m, + const bool need_calculate_cg_idx, + int32_t &cg_idx) const { int ret = OB_SUCCESS; - uint64_t column_id = param.get_column_id(); + const uint64_t column_id = param.get_column_id(); cg_idx = -1; - if (OB_UNLIKELY(1 >= column_group_cnt_)) { + if (OB_UNLIKELY(1 >= column_group_cnt_ && !need_calculate_cg_idx)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("No column group exist", K(ret), K_(is_column_store_supported), K_(column_group_cnt)); + LOG_WARN("No column group exist", K(ret), K(need_calculate_cg_idx), K_(is_column_store_supported), K_(column_group_cnt)); } else if (param.is_virtual_gen_col()) { cg_idx = -1; } else if (column_id < OB_END_RESERVED_COLUMN_ID_NUM && @@ -8974,7 +8977,9 @@ int ObTableSchema::get_column_group_index(const share::schema::ObColumnParam &pa common::OB_HIDDEN_PK_INCREMENT_COLUMN_ID != column_id) { // this has its own column group now if (common::OB_HIDDEN_TRANS_VERSION_COLUMN_ID == column_id || common::OB_HIDDEN_SQL_SEQUENCE_COLUMN_ID == column_id) { - if (OB_FAIL(get_base_rowkey_column_group_index(cg_idx))) { + if (need_calculate_cg_idx) { + cg_idx = OB_CS_COLUMN_REPLICA_ROWKEY_CG_IDX; + } else if (OB_FAIL(get_base_rowkey_column_group_index(cg_idx))) { LOG_WARN("Fail to get base/rowkey column group index", K(ret), K(column_id)); } } else { @@ -8984,6 +8989,10 @@ int ObTableSchema::get_column_group_index(const share::schema::ObColumnParam &pa // common::OB_HIDDEN_GROUP_IDX_COLUMN_ID == column_id cg_idx = -1; } + } else if (need_calculate_cg_idx) { + if (OB_FAIL(calc_column_group_index_(column_id, cg_idx))) { + LOG_WARN("Fail to calc_column_group_index", K(ret), K(column_id)); + } } else { bool found = false; int64_t cg_column_cnt = 0; @@ -9018,6 +9027,29 @@ int ObTableSchema::get_column_group_index(const share::schema::ObColumnParam &pa LOG_WARN("Unexpected, can not find cg idx", K(ret), K(column_id)); } } + LOG_TRACE("[CS-Replica] get column group index", K(ret), K(need_calculate_cg_idx), K(cg_idx)); + return ret; +} + +int ObTableSchema::calc_column_group_index_(const uint64_t column_id, int32_t &cg_idx) const +{ + int ret = OB_SUCCESS; + cg_idx = -1; + // for cs replica, constructed cg schemas start with rowkey cg so the cg idx of row key cg is ALWAYS 0 + // and cg idx of normal cg is shifted by offset 1. + for (int64_t i = 0; i < column_cnt_; i++) { + ObColumnSchemaV2 *column = column_array_[i]; + if (OB_NOT_NULL(column) && column->get_column_id() == column_id) { + cg_idx = i + 1; + break; + } + } + + if (OB_UNLIKELY(-1 == cg_idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected cg idx", K(ret)); + } + return ret; } diff --git a/src/share/schema/ob_table_schema.h b/src/share/schema/ob_table_schema.h index 8f908aded..cb985c888 100644 --- a/src/share/schema/ob_table_schema.h +++ b/src/share/schema/ob_table_schema.h @@ -975,6 +975,8 @@ public: inline bool is_mlog_table() const { return is_mlog_table(table_type_); } inline static bool is_mlog_table(share::schema::ObTableType table_type) { return MATERIALIZED_VIEW_LOG == table_type; } + inline static bool is_user_data_table(share::schema::ObTableType table_type) + { return USER_TABLE == table_type; } inline bool is_in_recyclebin() const { return common::OB_RECYCLEBIN_SCHEMA_ID == database_id_; } virtual inline bool is_external_table() const override { return EXTERNAL_TABLE == table_type_; } @@ -1681,8 +1683,9 @@ public: int get_all_cg_type_column_group(const ObColumnGroupSchema *&column_group) const; int get_each_column_group(ObIArray &each_cgs) const; int is_partition_key_match_rowkey_prefix(bool &is_prefix) const; - int get_column_group_index(const share::schema::ObColumnParam ¶m, int32_t &cg_idx) const; - + int get_column_group_index(const share::schema::ObColumnParam ¶m, + const bool need_calculate_cg_idx, + int32_t &cg_idx) const; int is_column_group_exist(const common::ObString &cg_name, bool &exist) const; int get_all_column_ids(ObIArray &column_ids) const; @@ -1877,6 +1880,7 @@ private: ObRowkeyInfo &rowkey_info); int alter_view_column_internal(ObColumnSchemaV2 &column_schema); int get_base_rowkey_column_group_index(int32_t &cg_idx) const; + int calc_column_group_index_(const uint64_t column_id, int32_t &cg_idx) const; protected: uint64_t max_used_column_id_; diff --git a/src/share/system_variable/ob_system_variable_factory.cpp b/src/share/system_variable/ob_system_variable_factory.cpp index 69ed42d45..cb8327dbc 100644 --- a/src/share/system_variable/ob_system_variable_factory.cpp +++ b/src/share/system_variable/ob_system_variable_factory.cpp @@ -62,6 +62,7 @@ const char *ObSysVarObRoutePolicy::OB_ROUTE_POLICY_NAMES[] = { "ONLY_READONLY_ZONE", "UNMERGE_ZONE_FIRST", "UNMERGE_FOLLOWER_FIRST", + "COLUMN_STORE_ONLY", 0 }; const char *ObSysVarObEnableJit::OB_ENABLE_JIT_NAMES[] = { diff --git a/src/share/system_variable/ob_system_variable_init.cpp b/src/share/system_variable/ob_system_variable_init.cpp index e6f40e2cd..8b73096c0 100644 --- a/src/share/system_variable/ob_system_variable_init.cpp +++ b/src/share/system_variable/ob_system_variable_init.cpp @@ -1852,7 +1852,7 @@ static struct VarsInit{ ObSysVars[127].info_ = "the routing policy of obproxy/java client and observer internal retry, 1=READONLY_ZONE_FIRST, 2=ONLY_READONLY_ZONE, 3=UNMERGE_ZONE_FIRST, 4=UNMERGE_FOLLOWER_FIRST" ; ObSysVars[127].name_ = "ob_route_policy" ; ObSysVars[127].data_type_ = ObIntType ; - ObSysVars[127].enum_names_ = "[u'', u'READONLY_ZONE_FIRST', u'ONLY_READONLY_ZONE', u'UNMERGE_ZONE_FIRST', u'UNMERGE_FOLLOWER_FIRST']" ; + ObSysVars[127].enum_names_ = "[u'', u'READONLY_ZONE_FIRST', u'ONLY_READONLY_ZONE', u'UNMERGE_ZONE_FIRST', u'UNMERGE_FOLLOWER_FIRST', u'COLUMN_STORE_ONLY']" ; ObSysVars[127].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::INFLUENCE_PLAN | ObSysVarFlag::NEED_SERIALIZE ; ObSysVars[127].id_ = SYS_VAR_OB_ROUTE_POLICY ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OB_ROUTE_POLICY)) ; diff --git a/src/share/system_variable/ob_system_variable_init.json b/src/share/system_variable/ob_system_variable_init.json index b498cbc38..80381d5dd 100644 --- a/src/share/system_variable/ob_system_variable_init.json +++ b/src/share/system_variable/ob_system_variable_init.json @@ -1844,7 +1844,8 @@ "READONLY_ZONE_FIRST", "ONLY_READONLY_ZONE", "UNMERGE_ZONE_FIRST", - "UNMERGE_FOLLOWER_FIRST" + "UNMERGE_FOLLOWER_FIRST", + "COLUMN_STORE_ONLY" ], "publish_version": "", "info_cn": "", diff --git a/src/share/table/ob_table.h b/src/share/table/ob_table.h index ea04e1440..33789cf33 100644 --- a/src/share/table/ob_table.h +++ b/src/share/table/ob_table.h @@ -1036,7 +1036,7 @@ public: schema_version_(common::OB_INVALID_VERSION), tablet_id_(common::ObTabletID::INVALID_TABLET_ID), role_(common::ObRole::INVALID_ROLE), - replica_type_(common::ObReplicaType::REPLICA_TYPE_MAX), + replica_type_(common::ObReplicaType::REPLICA_TYPE_INVALID), part_renew_time_(0), reserved_(0) {} diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index 939ba1711..42bb8d0c9 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -212,7 +212,10 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, const bool pd_agg = scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown(); const bool pd_group_by = scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_group_by_pushdown(); ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); - CK(OB_NOT_NULL(schema_guard)); + ObBasicSessionInfo *session_info = cg_.opt_ctx_->get_session_info(); + int64_t route_policy = 0; + bool is_cs_replica_query = false; + CK(OB_NOT_NULL(schema_guard), OB_NOT_NULL(session_info)); if (OB_UNLIKELY(pd_agg && 0 == scan_ctdef.aggregate_column_ids_.count()) || OB_UNLIKELY(pd_group_by && 0 == scan_ctdef.group_by_column_ids_.count())) { ret = OB_INVALID_ARGUMENT; @@ -231,6 +234,10 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, } else if (table_schema->is_multivalue_index_aux() && FALSE_IT(scan_ctdef.table_param_.set_is_multivalue_index(true))) { } else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, tsc_out_cols))) { LOG_WARN("extract tsc output column ids failed", K(ret)); + } else if (OB_FAIL(session_info->get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) { + LOG_WARN("get route policy failed", K(ret)); + } else { + is_cs_replica_query = ObRoutePolicyType::COLUMN_STORE_ONLY == route_policy; } if (OB_FAIL(ret)) { @@ -240,7 +247,8 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, scan_ctdef.access_column_ids_, scan_ctdef.pd_expr_spec_.pd_storage_flag_, &tsc_out_cols, - is_oracle_mapping_real_virtual_table(op.get_ref_table_id())))) {/* for real agent table , use mysql mode compulsory*/ + is_oracle_mapping_real_virtual_table(op.get_ref_table_id()), /* for real agent table , use mysql mode compulsory*/ + is_cs_replica_query))) { LOG_WARN("convert schema failed", K(ret), K(*table_schema), K(scan_ctdef.access_column_ids_), K(op.get_index_back())); } else if ((pd_agg || pd_group_by) && @@ -1080,9 +1088,12 @@ int ObTscCgService::generate_table_loc_meta(uint64_t table_loc_id, loc_meta.is_external_files_on_disk_ = ObSQLUtils::is_external_files_on_local_disk(table_schema.get_external_file_location()); bool is_weak_read = false; + int64_t route_policy = 0; if (OB_ISNULL(cg_.opt_ctx_) || OB_ISNULL(cg_.opt_ctx_->get_exec_ctx())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(cg_.opt_ctx_), K(ret)); + } else if (OB_FAIL(session.get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) { + LOG_WARN("get route policy failed", K(ret)); } else if (stmt.get_query_ctx()->has_dml_write_stmt_) { loc_meta.select_leader_ = 1; loc_meta.is_weak_read_ = 0; @@ -1101,6 +1112,7 @@ int ObTscCgService::generate_table_loc_meta(uint64_t table_loc_id, loc_meta.select_leader_ = 1; loc_meta.is_weak_read_ = 0; } + loc_meta.route_policy_ = route_policy; if (OB_SUCC(ret) && !table_schema.is_global_index_table()) { TableLocRelInfo *rel_info = nullptr; ObTableID data_table_id = table_schema.is_index_table() && !table_schema.is_rowkey_doc_id() ? diff --git a/src/sql/das/ob_das_location_router.cpp b/src/sql/das/ob_das_location_router.cpp index 3d3e6eba1..f7238ec62 100755 --- a/src/sql/das/ob_das_location_router.cpp +++ b/src/sql/das/ob_das_location_router.cpp @@ -792,7 +792,8 @@ ObDASLocationRouter::~ObDASLocationRouter() int ObDASLocationRouter::nonblock_get_readable_replica(const uint64_t tenant_id, const ObTabletID &tablet_id, - ObDASTabletLoc &tablet_loc) + ObDASTabletLoc &tablet_loc, + const ObRoutePolicyType route_policy) { int ret = OB_SUCCESS; ObLSLocation ls_loc; @@ -836,7 +837,10 @@ int ObDASLocationRouter::nonblock_get_readable_replica(const uint64_t tenant_id, } else if (OB_FAIL(ObBLService::get_instance().check_in_black_list(bl_key, in_black_list))) { LOG_WARN("check in black list failed", K(ret)); } else if (!in_black_list) { - if (tmp_replica_loc.get_server() == GCTX.self_addr()) { + if (route_policy == COLUMN_STORE_ONLY && tmp_replica_loc.get_replica_type() != REPLICA_TYPE_COLUMNSTORE) { + // skip the tmp_replica_loc + LOG_TRACE("skip the replica due to the COLUMN_STORE_ONLY policy.", K(ret), K(tmp_replica_loc)); + } else if (tmp_replica_loc.get_server() == GCTX.self_addr()) { //prefer choose the local replica local_replica = &tmp_replica_loc; } else if (OB_FAIL(remote_replicas.push_back(&tmp_replica_loc))) { @@ -849,6 +853,10 @@ int ObDASLocationRouter::nonblock_get_readable_replica(const uint64_t tenant_id, if (OB_SUCC(ret)) { if (local_replica != nullptr) { tablet_loc.server_ = local_replica->get_server(); + } else if (route_policy == COLUMN_STORE_ONLY && remote_replicas.empty()) { + //do not retry + ret = OB_NO_REPLICA_VALID; + LOG_USER_ERROR(OB_NO_REPLICA_VALID); } else if (remote_replicas.empty()) { ret = OB_NO_READABLE_REPLICA; LOG_WARN("there has no readable replica", K(ret), K(tablet_id), K(ls_loc)); @@ -972,7 +980,8 @@ int ObDASLocationRouter::get_tablet_loc(const ObDASTableLocMeta &loc_meta, //if this statement is retried because of OB_NOT_MASTER, we will choose the leader directly ret = nonblock_get_leader(tenant_id, tablet_id, tablet_loc); } else { - ret = nonblock_get_readable_replica(tenant_id, tablet_id, tablet_loc); + ret = nonblock_get_readable_replica(tenant_id, tablet_id, tablet_loc, + static_cast(loc_meta.route_policy_)); } } return ret; diff --git a/src/sql/das/ob_das_location_router.h b/src/sql/das/ob_das_location_router.h index 6ea995e31..794193eb5 100644 --- a/src/sql/das/ob_das_location_router.h +++ b/src/sql/das/ob_das_location_router.h @@ -16,6 +16,7 @@ #include "share/schema/ob_schema_struct.h" #include "lib/container/ob_fixed_array.h" #include "sql/das/ob_das_define.h" +#include "sql/optimizer/ob_route_policy.h" namespace oceanbase { namespace common @@ -346,7 +347,8 @@ private: share::ObLSLocation &location); int nonblock_get_readable_replica(const uint64_t tenant_id, const common::ObTabletID &tablet_id, - ObDASTabletLoc &tablet_loc); + ObDASTabletLoc &tablet_loc, + const ObRoutePolicyType route_policy); private: int last_errno_; int cur_errno_; diff --git a/src/sql/optimizer/ob_intersect_route_policy.cpp b/src/sql/optimizer/ob_intersect_route_policy.cpp index 2efffc8f3..8982c5df1 100644 --- a/src/sql/optimizer/ob_intersect_route_policy.cpp +++ b/src/sql/optimizer/ob_intersect_route_policy.cpp @@ -34,7 +34,7 @@ int ObIntersectRoutePolicy::init_candidate_replicas(const ObListwill_use_column_store(OB_INVALID_ID, + ref_id, ref_id, index_back_will_use_column_store, index_back_will_use_row_store))) { @@ -2808,6 +2809,7 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, LOG_WARN("failed to check will use skip scan", K(ret)); } else if (OB_FAIL(get_plan()->will_use_column_store(table_id, valid_index_ids.at(i), + ref_table_id, use_column_store, use_row_store))) { LOG_WARN("failed to check will use column store", K(ret)); @@ -10891,6 +10893,7 @@ int ObJoinOrder::find_possible_join_filter_tables(const ObLogPlanHint &log_plan_ info.use_column_store_ = true; } else if (OB_FAIL(get_plan()->will_use_column_store(info.table_id_, info.index_id_, + info.ref_table_id_, will_use_column_store, will_use_row_store))) { LOG_WARN("failed to check will use column store", K(ret)); diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 9ae0ef781..c93687cf3 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -630,12 +630,16 @@ int ObLogPlan::mock_base_rel_detectors(ObJoinOrder *&base_rel) int ObLogPlan::select_location(ObIArray &tbl_part_info_list) { int ret = OB_SUCCESS; + int64_t route_policy = 0; ObExecContext *exec_ctx = optimizer_context_.get_exec_ctx(); ObSEArray tbl_loc_list; ObSEArray phy_tbl_loc_info_list; - if (OB_ISNULL(exec_ctx)) { + ObSQLSessionInfo* session_info = optimizer_context_.get_session_info(); + if (OB_ISNULL(exec_ctx) || OB_ISNULL(session_info)) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("exec ctx is NULL", K(ret)); + } else if (OB_FAIL(session_info->get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) { + LOG_WARN("get route policy failed", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < tbl_part_info_list.count(); ++i) { ObTablePartitionInfo *tbl_part_info = tbl_part_info_list.at(i); @@ -649,6 +653,8 @@ int ObLogPlan::select_location(ObIArray &tbl_part_info_l &tbl_part_info->get_phy_tbl_location_info_for_update()))) { LOG_WARN("fail to push back phy tble loc info", K(ret), K(tbl_part_info->get_phy_tbl_location_info_for_update())); + } else { + tbl_part_info->get_table_location().get_loc_meta().route_policy_ = route_policy; } } if (OB_FAIL(ret)) { @@ -742,6 +748,9 @@ int ObLogPlan::select_replicas(ObExecContext &exec_ctx, } } } + } else if (COLUMN_STORE_ONLY == route_policy_type) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "when route policy is COLUMN_STORE_ONLY, weak read request"); } else { const bool sess_in_retry = session->get_is_in_retry_for_dup_tbl(); //重试状态下不优化复制表的副本选择 if (OB_FAIL(ObLogPlan::strong_select_replicas(local_server, phy_tbl_loc_info_list, is_hit_partition, sess_in_retry))) { @@ -13202,6 +13211,7 @@ int ObLogPlan::find_possible_join_filter_tables(ObLogicalOperator *op, info.use_column_store_ = true; } else if (OB_FAIL(will_use_column_store(info.table_id_, info.index_id_, + info.ref_table_id_, use_column_store, use_row_store))) { LOG_WARN("failed to check will use column store", K(ret)); @@ -13359,6 +13369,7 @@ int ObLogPlan::find_possible_join_filter_tables(ObLogicalOperator *op, int ObLogPlan::will_use_column_store(const uint64_t table_id, const uint64_t index_id, + const uint64_t ref_table_id, bool &use_column_store, bool &use_row_store) { @@ -13378,6 +13389,10 @@ int ObLogPlan::will_use_column_store(const uint64_t table_id, OB_ISNULL(session_info=get_optimizer_context().get_session_info())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("NULL pointer error", K(stmt), K(schema_guard), K(ret)); + } else if (get_optimizer_context().use_column_store_replica() && + index_id == ref_table_id) { + use_column_store = true; + use_row_store = false; } else if (OB_FALSE_IT(session_disable_column_store=!session_info->is_enable_column_store())) { } else if (OB_FALSE_IT(is_link=ObSqlSchemaGuard::is_link_table(stmt, table_id))) { } else if (is_link) { diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index ba8e46f3d..98c0ad62a 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -1396,6 +1396,7 @@ public: int will_use_column_store(const uint64_t table_id, const uint64_t index_id, + const uint64_t ref_table_id, bool &use_column_store, bool &use_row_store); diff --git a/src/sql/optimizer/ob_optimizer.cpp b/src/sql/optimizer/ob_optimizer.cpp index 89dea29cc..27208a35e 100644 --- a/src/sql/optimizer/ob_optimizer.cpp +++ b/src/sql/optimizer/ob_optimizer.cpp @@ -543,6 +543,8 @@ int ObOptimizer::init_env_info(ObDMLStmt &stmt) LOG_WARN("fail to check enable pdml", K(ret)); } else if (OB_FAIL(init_parallel_policy(stmt, *session_info))) { // call after check pdml enabled LOG_WARN("fail to check enable pdml", K(ret)); + } else if (OB_FAIL(init_replica_policy(stmt, *session_info))) { + LOG_WARN("fail to check enable column store replica", K(ret)); } else if (OB_FAIL(init_correlation_model(stmt, *session_info))) { LOG_WARN("failed to init correlation model", K(ret)); } @@ -694,6 +696,24 @@ int ObOptimizer::init_parallel_policy(ObDMLStmt &stmt, const ObSQLSessionInfo &s return ret; } +int ObOptimizer::init_replica_policy(ObDMLStmt &dml_stmt, const ObSQLSessionInfo &session) +{ + int ret = OB_SUCCESS; + int64_t route_policy_type = 0; + if (OB_FAIL(session.get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy_type))) { + LOG_WARN("fail to get sys variable", K(ret)); + } else if (COLUMN_STORE_ONLY == static_cast(route_policy_type)) { + if (dml_stmt.get_query_ctx()->has_dml_write_stmt_ || + dml_stmt.get_query_ctx()->is_contain_select_for_update_) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "when route policy is COLUMN_STORE_ONLY, read query request"); + } else { + ctx_.set_use_column_store_replica(true); + } + } + return ret; +} + int ObOptimizer::init_correlation_model(ObDMLStmt &stmt, const ObSQLSessionInfo &session) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_optimizer.h b/src/sql/optimizer/ob_optimizer.h index 24bdcfe94..270f7d92b 100644 --- a/src/sql/optimizer/ob_optimizer.h +++ b/src/sql/optimizer/ob_optimizer.h @@ -203,6 +203,7 @@ namespace sql int extract_opt_ctx_basic_flags(const ObDMLStmt &stmt, ObSQLSessionInfo &session); int init_parallel_policy(ObDMLStmt &stmt, const ObSQLSessionInfo &session); + int init_replica_policy(ObDMLStmt &stmt, const ObSQLSessionInfo &session); int set_auto_dop_params(const ObSQLSessionInfo &session); int check_pdml_enabled(const ObDMLStmt &stmt, const ObSQLSessionInfo &session); diff --git a/src/sql/optimizer/ob_optimizer_context.h b/src/sql/optimizer/ob_optimizer_context.h index 8e0ed049a..bf43140ba 100644 --- a/src/sql/optimizer/ob_optimizer_context.h +++ b/src/sql/optimizer/ob_optimizer_context.h @@ -244,7 +244,8 @@ ObOptimizerContext(ObSQLSessionInfo *session_info, storage_estimation_enabled_(false), das_keep_order_enabled_(true), generate_random_plan_(false), - correlation_type_(ObEstCorrelationType::MAX) + correlation_type_(ObEstCorrelationType::MAX), + use_column_store_replica_(false) { } inline common::ObOptStatManager *get_opt_stat_manager() { return opt_stat_manager_; } inline void set_opt_stat_manager(common::ObOptStatManager *sm) { opt_stat_manager_ = sm; } @@ -618,6 +619,8 @@ ObOptimizerContext(ObSQLSessionInfo *session_info, inline const OptSystemStat& get_system_stat() const { return system_stat_; } inline bool generate_random_plan() const { return generate_random_plan_; } inline void set_generate_random_plan(bool rand_plan) { generate_random_plan_ = rand_plan; } + inline bool use_column_store_replica() const { return use_column_store_replica_; } + inline void set_use_column_store_replica(bool use) { use_column_store_replica_ = use; } inline void set_correlation_type(ObEstCorrelationType type) { correlation_type_ = type; } inline ObEstCorrelationType get_correlation_type() const { return correlation_type_; } @@ -708,6 +711,7 @@ private: bool generate_random_plan_; ObEstCorrelationType correlation_type_; + bool use_column_store_replica_; }; } } diff --git a/src/sql/optimizer/ob_replica_compare.cpp b/src/sql/optimizer/ob_replica_compare.cpp index 3d4493af6..0aa8a6428 100644 --- a/src/sql/optimizer/ob_replica_compare.cpp +++ b/src/sql/optimizer/ob_replica_compare.cpp @@ -23,11 +23,13 @@ ObReplicaCompare::ObReplicaCompare(ObRoutePolicyType policy_type) policy_type_(policy_type), readonly_zone_first_{IS_OTHER_REGION, ZONE_TYPE, MERGE_STATUS, POS_TYPE}, only_readonly_zone_{ZONE_TYPE, IS_OTHER_REGION, MERGE_STATUS, POS_TYPE,}, - unmerge_zone_first_{IS_OTHER_REGION, MERGE_STATUS, ZONE_TYPE, POS_TYPE} + unmerge_zone_first_{IS_OTHER_REGION, MERGE_STATUS, ZONE_TYPE, POS_TYPE}, + column_store_only_{ZONE_TYPE, IS_OTHER_REGION, MERGE_STATUS, POS_TYPE} { static_assert(sizeof(readonly_zone_first_) == sizeof(only_readonly_zone_), "invalid array size"); static_assert(sizeof(readonly_zone_first_) == sizeof(unmerge_zone_first_), "invalid array size"); static_assert((sizeof(readonly_zone_first_)/sizeof(CompareType)) == (sizeof(cmp_func_array_)/sizeof(CmpFuncPtr)), "invalid array size"); + static_assert(sizeof(readonly_zone_first_) == sizeof(column_store_only_), "invalid array size"); cmp_func_array_[IS_OTHER_REGION] = &ObReplicaCompare::compare_other_region; cmp_func_array_[ZONE_TYPE] = &ObReplicaCompare::compare_zone_type; @@ -50,6 +52,8 @@ bool ObReplicaCompare::operator()(const ObRoutePolicy::CandidateReplica &replica cmp_type_array = only_readonly_zone_; } else if (UNMERGE_ZONE_FIRST == policy_type_) { cmp_type_array = unmerge_zone_first_; + } else if (COLUMN_STORE_ONLY == policy_type_) { + cmp_type_array = column_store_only_; } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected policy type", K(policy_type_), K(ret)); diff --git a/src/sql/optimizer/ob_replica_compare.h b/src/sql/optimizer/ob_replica_compare.h index 4a7fe94ce..a271602c7 100644 --- a/src/sql/optimizer/ob_replica_compare.h +++ b/src/sql/optimizer/ob_replica_compare.h @@ -59,6 +59,7 @@ private: CompareType readonly_zone_first_[CMP_CNT]; CompareType only_readonly_zone_[CMP_CNT]; CompareType unmerge_zone_first_[CMP_CNT]; + CompareType column_store_only_[CMP_CNT]; }; diff --git a/src/sql/optimizer/ob_route_policy.cpp b/src/sql/optimizer/ob_route_policy.cpp index 771fe04b1..6b622dd34 100644 --- a/src/sql/optimizer/ob_route_policy.cpp +++ b/src/sql/optimizer/ob_route_policy.cpp @@ -16,6 +16,7 @@ #include "sql/optimizer/ob_phy_table_location_info.h" #include "sql/optimizer/ob_log_plan.h" #include "storage/ob_locality_manager.h" +#include "lib/ob_define.h" using namespace oceanbase::common; using namespace oceanbase::share; using namespace oceanbase::storage; @@ -82,6 +83,8 @@ int ObRoutePolicy::filter_replica(const ObAddr &local_server, } else { LOG_TRACE("check ls readable", K(ctx), K(ls_id), K(cur_replica.get_server()), K(can_read)); if ((policy_type == ONLY_READONLY_ZONE && cur_replica.attr_.zone_type_ == ZONE_TYPE_READWRITE) + || (policy_type == COLUMN_STORE_ONLY && !ObReplicaTypeCheck::is_columnstore_replica(cur_replica.get_replica_type())) + || (policy_type != COLUMN_STORE_ONLY && ObReplicaTypeCheck::is_columnstore_replica(cur_replica.get_replica_type())) || cur_replica.attr_.zone_status_ == ObZoneStatus::INACTIVE || cur_replica.attr_.server_status_ != ObServerStatus::OB_SERVER_ACTIVE || cur_replica.attr_.start_service_time_ == 0 @@ -102,6 +105,18 @@ int ObRoutePolicy::filter_replica(const ObAddr &local_server, } } } + if (OB_SUCC(ret) && policy_type == COLUMN_STORE_ONLY) { + for (int64_t i = candi_replicas.count()-1; OB_SUCC(ret) && i >= 0; --i) { + CandidateReplica &cur_replica = candi_replicas.at(i); + if (cur_replica.is_filter_ && OB_FAIL(candi_replicas.remove(i))) { + LOG_WARN("failed to remove filted replica", K(ret)); + } + } + if (OB_SUCC(ret) && candi_replicas.count() == 0) { + ret = OB_NO_REPLICA_VALID; + LOG_USER_ERROR(OB_NO_REPLICA_VALID); + } + } return ret; } diff --git a/src/sql/optimizer/ob_route_policy.h b/src/sql/optimizer/ob_route_policy.h index ad188d863..c25c123ee 100644 --- a/src/sql/optimizer/ob_route_policy.h +++ b/src/sql/optimizer/ob_route_policy.h @@ -38,6 +38,7 @@ enum ObRoutePolicyType // 即使客户端将请求路由到partition主上, 也在本地执行, // 区别在于返回给OCJ && ObProxy的反馈不同; UNMERGE_FOLLOWER_FIRST = 4, + COLUMN_STORE_ONLY = 5, POLICY_TYPE_MAX }; @@ -228,7 +229,9 @@ protected: // 集群为读写zone时, 且ob_route_policy为UNMERGE_FOLLOWER_FIRST时,同样按照READONLY_ZONE_FIRST处理, 但会增加反馈内容 // 集群为有只读zone时,且ob_route_policy为UNMERGE_FOLLOWER_FIRST时, 同样按照READONLY_ZONE_FIRST处理,此时不会增加反馈内容 ObRoutePolicyType type = INVALID_POLICY; - if (has_readonly_zone_) { + if (COLUMN_STORE_ONLY == ctx.policy_type_) { + type = ctx.policy_type_; + } else if (has_readonly_zone_) { if (UNMERGE_FOLLOWER_FIRST == ctx.policy_type_) { type = READONLY_ZONE_FIRST; } else { diff --git a/src/sql/optimizer/ob_table_location.cpp b/src/sql/optimizer/ob_table_location.cpp index eb24bf3ba..9cc39a8e6 100644 --- a/src/sql/optimizer/ob_table_location.cpp +++ b/src/sql/optimizer/ob_table_location.cpp @@ -1386,6 +1386,19 @@ int ObTableLocation::get_is_weak_read(const ObDMLStmt &dml_stmt, is_weak_read = (ObTxConsistencyType::BOUNDED_STALENESS_READ == trans_consistency_type); } } + if (OB_SUCC(ret) && !is_weak_read) { + int64_t route_policy_type = 0; + if (OB_FAIL(session->get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy_type))) { + LOG_WARN("fail to get sys variable", K(ret)); + } else if (COLUMN_STORE_ONLY == static_cast(route_policy_type)) { + if (dml_stmt.get_query_ctx()->is_contain_inner_table_) { + is_weak_read = true; + } else { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "when route policy is COLUMN_STORE_ONLY, weak read request"); + } + } + } return ret; } diff --git a/src/sql/resolver/cmd/ob_alter_system_resolver.cpp b/src/sql/resolver/cmd/ob_alter_system_resolver.cpp index 85a41fa94..503ca673c 100644 --- a/src/sql/resolver/cmd/ob_alter_system_resolver.cpp +++ b/src/sql/resolver/cmd/ob_alter_system_resolver.cpp @@ -16,7 +16,6 @@ #include "common/ob_region.h" #include "lib/string/ob_sql_string.h" #include "share/schema/ob_schema_getter_guard.h" -#include "share/ob_locality_parser.h" #include "share/ob_time_utility2.h" #include "share/ob_encryption_util.h" #ifdef OB_BUILD_TDE_SECURITY @@ -142,8 +141,40 @@ int ObAlterSystemResolverUtil::resolve_replica_type(const ParseNode *parse_tree, } else { int64_t len = parse_tree->str_len_; const char *str = parse_tree->str_value_; - if (OB_FAIL(ObLocalityParser::parse_type(str, len, replica_type))) { - // do nothing, error log will print inside parse_type + if (OB_ISNULL(str)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid replica type string. null!", K(ret)); + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "replica_type, replica_type should not be null"); + } else { + replica_type = share::ObShareUtil::string_to_replica_type(str); + if (REPLICA_TYPE_INVALID == replica_type) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid replica type string", K(str), K(ret)); + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "replica_type, unrecognized replica_type"); + } else if (! ObReplicaTypeCheck::is_replica_type_valid(replica_type)) { + ret = OB_NOT_SUPPORTED; + char err_msg[64] = {0}; + (void)snprintf(err_msg, sizeof(err_msg), "%s replica", ObShareUtil::replica_type_to_string(replica_type)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, err_msg); + } else { + // good, valid replica_type + } + } + } + return ret; +} + +int ObAlterSystemResolverUtil::check_compatibility_for_replica_type(const ObReplicaType replica_type, const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + if (ObReplicaTypeCheck::is_columnstore_replica(replica_type)) { + bool is_compatible = false; + if (OB_FAIL(ObShareUtil::check_compat_version_for_columnstore_replica(tenant_id, is_compatible))) { + LOG_WARN("failed to check compat version for C-replcia", KR(ret), K(tenant_id)); + } else if (OB_UNLIKELY(!is_compatible)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("data_version lower than 4.3.3, C-replica not supported"); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "data_version is lower than 4.3.3, C-replica"); } } return ret; @@ -2984,7 +3015,7 @@ int ObAddLSReplicaResolver::resolve(const ParseNode &parse_tree) int64_t ls_id = 0; common::ObAddr server_addr; - common::ObReplicaType replica_type = REPLICA_TYPE_MAX; + common::ObReplicaType replica_type = REPLICA_TYPE_INVALID; common::ObAddr data_source; int64_t paxos_replica_num = 0; uint64_t tenant_id = OB_INVALID_TENANT_ID; @@ -3000,6 +3031,8 @@ int ObAddLSReplicaResolver::resolve(const ParseNode &parse_tree) LOG_WARN("resolve server failed", KR(ret), KP(server_addr_node)); } else if (OB_FAIL(Util::resolve_replica_type(replica_type_node, replica_type))) { LOG_WARN("resolve replica type failed", KR(ret), KP(replica_type_node)); + } else if (OB_FAIL(Util::check_compatibility_for_replica_type(replica_type, tenant_id))) { + LOG_WARN("check compatibility for replica_type failed", KR(ret), K(replica_type), K(tenant_id)); } else if (OB_FAIL(Util::check_and_get_data_source(data_source_node, data_source))) { LOG_WARN("check and get data source failed", KR(ret), KP(data_source_node)); } else if (OB_FAIL(Util::check_and_get_paxos_replica_num(paxos_replica_num_node, paxos_replica_num))) { @@ -3178,7 +3211,7 @@ int ObModifyLSReplicaResolver::resolve(const ParseNode &parse_tree) ParseNode *tenant_name_node = parse_tree.children_[4]; int64_t ls_id = 0; common::ObAddr server_addr; - common::ObReplicaType replica_type = REPLICA_TYPE_MAX; + common::ObReplicaType replica_type = REPLICA_TYPE_INVALID; int64_t paxos_replica_num = 0; uint64_t tenant_id = OB_INVALID_TENANT_ID; if (OB_FAIL(Util::do_check_for_alter_ls_replica(tenant_name_node, @@ -3193,6 +3226,8 @@ int ObModifyLSReplicaResolver::resolve(const ParseNode &parse_tree) LOG_WARN("resolve server failed", KR(ret), KP(server_addr_node)); } else if (OB_FAIL(Util::resolve_replica_type(replica_type_node, replica_type))) { LOG_WARN("resolve replica type failed", KR(ret), KP(replica_type_node)); + } else if (OB_FAIL(Util::check_compatibility_for_replica_type(replica_type, tenant_id))) { + LOG_WARN("check compatibility for replica_type failed", KR(ret), K(replica_type), K(tenant_id)); } else if (OB_FAIL(Util::check_and_get_paxos_replica_num(paxos_replica_num_node, paxos_replica_num))) { LOG_WARN("check and get paxos replica num failed", KR(ret), KP(paxos_replica_num_node)); } diff --git a/src/sql/resolver/cmd/ob_alter_system_resolver.h b/src/sql/resolver/cmd/ob_alter_system_resolver.h index 682350bb1..839a2b62d 100644 --- a/src/sql/resolver/cmd/ob_alter_system_resolver.h +++ b/src/sql/resolver/cmd/ob_alter_system_resolver.h @@ -46,6 +46,7 @@ public: static int resolve_replica_type(const ParseNode *parse_tree, common::ObReplicaType &replica_type); + static int check_compatibility_for_replica_type(const ObReplicaType replica_type, const uint64_t tenant_id); static int resolve_memstore_percent(const ParseNode *parse_tree, ObReplicaProperty &replica_property); static int resolve_string(const ParseNode *parse_tree, common::ObString &string); diff --git a/src/sql/resolver/cmd/ob_resource_resolver.h b/src/sql/resolver/cmd/ob_resource_resolver.h index 92882bcaf..7bf4ca321 100644 --- a/src/sql/resolver/cmd/ob_resource_resolver.h +++ b/src/sql/resolver/cmd/ob_resource_resolver.h @@ -106,6 +106,10 @@ int ObResourcePoolOptionResolver::resolve_option(T *stmt, ParseNode *option_n SQL_RESV_LOG(WARN, "invalid replica type option_node", K(ret), K(option_node)); } else if (OB_FAIL(ObAlterSystemResolverUtil::resolve_replica_type(option_node->children_[0], type))) { SQL_RESV_LOG(WARN, "fail to resove repilca type", K(ret)); + } else if (REPLICA_TYPE_FULL != type) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("replica_type of resource pool other than FULL not supported.", KR(ret), K(type)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "replica_type of resource pool other than FULL replica"); } else { stmt->set_replica_type(type); } diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 23ac9cb28..6d89f6a7e 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -235,6 +235,7 @@ ob_set_subtarget(ob_storage high_availability high_availability/ob_ls_block_tx_service.cpp high_availability/ob_storage_ha_diagnose_mgr.cpp high_availability/ob_storage_ha_diagnose_service.cpp + high_availability/ob_cs_replica_migration.cpp ) ob_set_subtarget(ob_storage restore @@ -516,6 +517,7 @@ ob_set_subtarget(ob_storage column_store column_store/ob_virtual_cg_scanner.cpp column_store/ob_column_store_util.cpp column_store/ob_cg_group_by_scanner.cpp + column_store/ob_column_store_replica_util.cpp ) ob_set_subtarget(ob_storage access diff --git a/src/storage/access/ob_table_access_param.cpp b/src/storage/access/ob_table_access_param.cpp index 30c51b682..6305d16cb 100644 --- a/src/storage/access/ob_table_access_param.cpp +++ b/src/storage/access/ob_table_access_param.cpp @@ -59,7 +59,8 @@ ObTableIterParam::ObTableIterParam() auto_split_filter_type_(OB_INVALID_ID), auto_split_filter_(nullptr), auto_split_params_(nullptr), - is_tablet_spliting_(false) + is_tablet_spliting_(false), + is_column_replica_table_(false) { } @@ -111,6 +112,7 @@ void ObTableIterParam::reset() auto_split_filter_ = nullptr; auto_split_params_ = nullptr; is_tablet_spliting_ = false; + is_column_replica_table_ = false; ObSSTableIndexFilterFactory::destroy_sstable_index_filter(sstable_index_filter_); } @@ -316,6 +318,7 @@ int ObTableAccessParam::init( iter_param_.table_scan_opt_.storage_rowsets_size_ = 1; } iter_param_.pushdown_filter_ = scan_param.pd_storage_filters_; + iter_param_.is_column_replica_table_ = table_param.is_column_replica_table(); // disable blockscan if scan order is KeepOrder(for iterator iterator and table api) // disable blockscan if use index skip scan as no large range to scan if (OB_UNLIKELY(ObQueryFlag::KeepOrder == scan_param.scan_flag_.scan_order_ || diff --git a/src/storage/access/ob_table_access_param.h b/src/storage/access/ob_table_access_param.h index f7358902c..b21b524b4 100644 --- a/src/storage/access/ob_table_access_param.h +++ b/src/storage/access/ob_table_access_param.h @@ -230,6 +230,7 @@ public: const sql::ObExpr *auto_split_filter_; sql::ExprFixedArray *auto_split_params_; bool is_tablet_spliting_; + bool is_column_replica_table_; }; struct ObTableAccessParam diff --git a/src/storage/blocksstable/ob_data_store_desc.cpp b/src/storage/blocksstable/ob_data_store_desc.cpp index a6616c262..d28851bec 100644 --- a/src/storage/blocksstable/ob_data_store_desc.cpp +++ b/src/storage/blocksstable/ob_data_store_desc.cpp @@ -37,6 +37,7 @@ bool ObStaticDataStoreDesc::is_valid() const void ObStaticDataStoreDesc::reset() { MEMSET(this, 0, sizeof(*this)); + need_submit_io_ = true; } int ObStaticDataStoreDesc::assign(const ObStaticDataStoreDesc &desc) @@ -58,6 +59,7 @@ int ObStaticDataStoreDesc::assign(const ObStaticDataStoreDesc &desc) encrypt_id_ = desc.encrypt_id_; master_key_id_ = desc.master_key_id_; MEMCPY(encrypt_key_, desc.encrypt_key_, sizeof(encrypt_key_)); + need_submit_io_ = desc.need_submit_io_; return ret; } @@ -104,7 +106,8 @@ int ObStaticDataStoreDesc::init( const compaction::ObMergeType merge_type, const int64_t snapshot_version, const share::SCN &end_scn, - const int64_t cluster_version) + const int64_t cluster_version, + const bool need_submit_io) { int ret = OB_SUCCESS; const bool is_major = compaction::is_major_or_meta_merge_type(merge_type); @@ -118,6 +121,7 @@ int ObStaticDataStoreDesc::init( merge_type_ = merge_type; ls_id_ = ls_id; tablet_id_ = tablet_id; + need_submit_io_ = need_submit_io; if (!is_major) { end_scn_ = end_scn; @@ -863,11 +867,12 @@ int ObWholeDataStoreDesc::init( const int64_t cluster_version, const share::SCN &end_scn, const storage::ObStorageColumnGroupSchema *cg_schema, - const uint16_t table_cg_idx) + const uint16_t table_cg_idx, + const bool need_submit_io /*=true*/) { int ret = OB_SUCCESS; reset(); - if (OB_FAIL(static_desc_.init(is_ddl, merge_schema, ls_id, tablet_id, merge_type, snapshot_version, end_scn, cluster_version))) { + if (OB_FAIL(static_desc_.init(is_ddl, merge_schema, ls_id, tablet_id, merge_type, snapshot_version, end_scn, cluster_version, need_submit_io))) { STORAGE_LOG(WARN, "failed to init static desc", KR(ret)); } else if (OB_FAIL(inner_init(merge_schema, cg_schema, table_cg_idx))) { STORAGE_LOG(WARN, "failed to init", KR(ret), K(merge_schema), K(cg_schema), K(table_cg_idx)); diff --git a/src/storage/blocksstable/ob_data_store_desc.h b/src/storage/blocksstable/ob_data_store_desc.h index b12f4c7c9..d380fe760 100644 --- a/src/storage/blocksstable/ob_data_store_desc.h +++ b/src/storage/blocksstable/ob_data_store_desc.h @@ -58,7 +58,8 @@ public: const compaction::ObMergeType merge_type, const int64_t snapshot_version, const share::SCN &end_scn, - const int64_t cluster_version); + const int64_t cluster_version, + const bool need_submit_io = true); bool is_valid() const; void reset(); int assign(const ObStaticDataStoreDesc &desc); @@ -78,7 +79,8 @@ public: K_(master_key_id), KPHEX_(encrypt_key, sizeof(encrypt_key_)), K_(major_working_cluster_version), - K_(progressive_merge_round)); + K_(progressive_merge_round), + K_(need_submit_io)); private: OB_INLINE int init_encryption_info(const share::schema::ObMergeSchema &merge_schema); OB_INLINE void init_block_size(const share::schema::ObMergeSchema &merge_schema); @@ -106,6 +108,9 @@ public: int64_t encrypt_id_; int64_t master_key_id_; char encrypt_key_[share::OB_MAX_TABLESPACE_ENCRYPT_KEY_LENGTH]; + // For ddl redo log for cs replica, leader write only macro block data in memory but do not flush to disk. + // indicate whether to submit io to write maroc block data to disk. + bool need_submit_io_; }; // ObColDataStoreDesc is same for every parallel task @@ -253,6 +258,7 @@ public: STATIC_DESC_FUNC(ObCompressorType, compressor_type); STATIC_DESC_FUNC(int64_t, major_working_cluster_version); STATIC_DESC_FUNC(const char *, encrypt_key); + STATIC_DESC_FUNC(bool, need_submit_io); COL_DESC_FUNC(bool, is_row_store); COL_DESC_FUNC(uint16_t, table_cg_idx); COL_DESC_FUNC(int64_t, row_column_count); @@ -335,7 +341,8 @@ struct ObWholeDataStoreDesc const int64_t cluster_version, const share::SCN &end_scn = share::SCN::invalid_scn(), const storage::ObStorageColumnGroupSchema *cg_schema = nullptr, - const uint16_t table_cg_idx = 0); + const uint16_t table_cg_idx = 0, + const bool need_submit_io = true); int gen_index_store_desc(const ObDataStoreDesc &data_desc); int assign(const ObDataStoreDesc &desc); ObStaticDataStoreDesc &get_static_desc() { return static_desc_; } diff --git a/src/storage/blocksstable/ob_macro_block.cpp b/src/storage/blocksstable/ob_macro_block.cpp index 1944a77ed..0cf0928e4 100644 --- a/src/storage/blocksstable/ob_macro_block.cpp +++ b/src/storage/blocksstable/ob_macro_block.cpp @@ -357,21 +357,10 @@ int ObMacroBlock::flush(ObMacroBlockHandle ¯o_handle, macro_header_.fixed_header_.data_checksum_ = 0; } #endif - + const bool need_flush_macro = spec_->get_need_submit_io(); if (OB_FAIL(write_macro_header())) { STORAGE_LOG(WARN, "fail to write macro header", K(ret), K_(macro_header)); - } else { - const int64_t common_header_size = common_header_.get_serialize_size(); - const char *payload_buf = data_.data() + common_header_size; - const int64_t payload_size = data_.length() - common_header_size; - common_header_.set_payload_size(static_cast(payload_size)); - common_header_.set_payload_checksum(static_cast(ob_crc64(payload_buf, payload_size))); - } - if (OB_FAIL(ret)) { - // do nothing - } else if (OB_FAIL(common_header_.build_serialized_header(data_.data(), data_.capacity()))) { - STORAGE_LOG(WARN, "Fail to build common header, ", K(ret), K_(common_header)); - } else { + } else if (need_flush_macro) { ObMacroBlockWriteInfo write_info; write_info.buffer_ = data_.data(); if (backup::ObBackupDeviceMacroBlockId::is_backup_block_file(macro_handle.get_macro_id().first_id())) { @@ -486,7 +475,18 @@ int ObMacroBlock::write_macro_header() int64_t pos = 0; if (OB_FAIL(macro_header_.serialize(data_.data() + common_header_size, buf_len, pos))) { STORAGE_LOG(WARN, "fail to serialize macro block", K(ret), K(macro_header_)); + } else { + const int64_t common_header_size = common_header_.get_serialize_size(); + const char *payload_buf = data_.data() + common_header_size; + const int64_t payload_size = data_.length() - common_header_size; + common_header_.set_payload_size(static_cast(payload_size)); + common_header_.set_payload_checksum(static_cast(ob_crc64(payload_buf, payload_size))); + + if (OB_FAIL(common_header_.build_serialized_header(data_.data(), data_.capacity()))) { + STORAGE_LOG(WARN, "Fail to build common header, ", K(ret), K_(common_header)); + } } + return ret; } diff --git a/src/storage/column_store/ob_cg_iter_param_pool.cpp b/src/storage/column_store/ob_cg_iter_param_pool.cpp index 6f2d657ca..cc88866f8 100644 --- a/src/storage/column_store/ob_cg_iter_param_pool.cpp +++ b/src/storage/column_store/ob_cg_iter_param_pool.cpp @@ -268,6 +268,7 @@ int ObCGIterParamPool::generate_for_column_store(const ObTableIterParam &row_par //cg_param.ss_rowkey_prefix_cnt_ = 0; cg_param.pd_storage_flag_ = row_param.pd_storage_flag_; cg_param.table_scan_opt_ = row_param.table_scan_opt_; + cg_param.is_column_replica_table_ = row_param.is_column_replica_table_; if (nullptr != row_param.cg_read_infos_) { if (OB_UNLIKELY(nullptr == row_param.cg_read_infos_->at(cg_pos))) { ret = OB_ERR_UNEXPECTED; diff --git a/src/storage/column_store/ob_co_merge_ctx.cpp b/src/storage/column_store/ob_co_merge_ctx.cpp index 3f4e82827..b4042fa64 100644 --- a/src/storage/column_store/ob_co_merge_ctx.cpp +++ b/src/storage/column_store/ob_co_merge_ctx.cpp @@ -152,17 +152,70 @@ int ObCOTabletMergeCtx::init_tablet_merge_info(const bool need_check) return ret; } +int ObCOTabletMergeCtx::prepare_cs_replica_param() +{ + int ret = OB_SUCCESS; + static_param_.is_cs_replica_ = false; + ObStorageSchema *schema_on_tablet = nullptr; + ObSSTable *sstable = nullptr; + if (static_param_.ls_handle_.get_ls()->is_cs_replica()) { + if (OB_FAIL(static_param_.tablet_schema_guard_.init(tablet_handle_, mem_ctx_))) { + LOG_WARN("failed to init cs replica schema guard", K(ret), KPC(this)); + } else if (OB_FAIL(static_param_.tablet_schema_guard_.load(schema_on_tablet))) { + LOG_WARN("failed to load schema on tablet", K(ret)); + } else if (schema_on_tablet->is_cs_replica_compat()) { + static_param_.is_cs_replica_ = true; + } else if (is_convert_co_major_merge(get_merge_type())) { + static_param_.is_cs_replica_ = true; + } else { + static_param_.is_cs_replica_ = static_param_.get_tablet_id().is_user_tablet() + && schema_on_tablet->is_user_data_table() + && schema_on_tablet->is_row_store(); + } + + if (OB_FAIL(ret) || !static_param_.is_cs_replica_) { + } else if (OB_ISNULL(sstable = static_cast(get_tables_handle().get_table(0)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get sstable", K(ret), K(sstable), K(static_param_.tables_handle_)); + } else if (OB_UNLIKELY(!sstable->is_co_sstable())) { + // may be column store replica rebuild from full/read only row store replica + if (sstable->is_major_sstable()) { + static_param_.major_sstable_status_ = ObCOMajorSSTableStatus::COL_REPLICA_MAJOR; + static_param_.co_major_merge_type_ = ObCOMajorMergePolicy::USE_RS_BUILD_SCHEMA_MATCH_MERGE; + LOG_INFO("[CS-Replica] Decide rebuild column store from row major for cs replica", K(ret), KPC(sstable), K_(static_param)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("first table should be major in cs replica", K(ret), KPC(sstable), K_(static_param)); + } + } else { + static_param_.co_major_merge_type_ = ObCOMajorMergePolicy::BUILD_COLUMN_STORE_MERGE; + } + } + LOG_INFO("[CS-Replica] prepare_cs_replica_param", K(ret), "merge_type", get_merge_type(), + "co_merge_type", ObCOMajorMergePolicy::co_major_merge_type_to_str(static_param_.co_major_merge_type_), + "is_cs_replica",static_param_.is_cs_replica_, KPC(schema_on_tablet)); + return ret; +} + int ObCOTabletMergeCtx::prepare_schema() { int ret = OB_SUCCESS; - - if (is_meta_major_merge(get_merge_type())) { + if (OB_FAIL(prepare_cs_replica_param())) { + LOG_WARN("failed to prepare cs replica param", K(ret), K_(static_param)); + } else if (is_meta_major_merge(get_merge_type())) { if (OB_FAIL(get_meta_compaction_info())) { LOG_WARN("failed to get meta compaction info", K(ret), KPC(this)); } + } else if (is_convert_co_major_merge(get_merge_type())) { + if (OB_FAIL(get_convert_compaction_info())) { + LOG_WARN("failed to get convert compaction info", K(ret), KPC(this)); + } } else if (OB_FAIL(get_medium_compaction_info())) { // have checked medium info inside LOG_WARN("failed to get medium compaction info", K(ret), KPC(this)); + } else { + LOG_INFO("[CS-Replica] finish prepare schema for co merge", K(ret), + "is_cs_replica", static_param_.is_cs_replica_, KPC(this)); } return ret; } @@ -665,9 +718,11 @@ int ObCOTabletMergeCtx::init_major_sstable_status() { int ret = OB_SUCCESS; ObSSTable *sstable = static_cast(get_tables_handle().get_table(0)); - if (OB_ISNULL(sstable) || OB_UNLIKELY(!sstable->is_co_sstable())) { + if (OB_ISNULL(sstable)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to get sstable", K(ret), K(sstable), K(static_param_.tables_handle_)); + } else if (OB_UNLIKELY(!sstable->is_co_sstable())) { + // maybe cs replica, processed in ObCOTabletMergeCtx::prepare_cs_replica_param } else if (OB_FAIL(ObCOMajorMergePolicy::decide_co_major_sstable_status( *static_cast(sstable), *get_schema(), diff --git a/src/storage/column_store/ob_co_merge_ctx.h b/src/storage/column_store/ob_co_merge_ctx.h index cd8ccd0bf..83d150af0 100644 --- a/src/storage/column_store/ob_co_merge_ctx.h +++ b/src/storage/column_store/ob_co_merge_ctx.h @@ -136,6 +136,7 @@ struct ObCOTabletMergeCtx : public ObBasicTabletMergeCtx { return ObBasicTabletMergeCtx::swap_tablet(get_merge_table_result); } int prepare_mocked_row_store_cg_schema(); bool should_mock_row_store_cg_schema(); + int prepare_cs_replica_param(); OB_INLINE bool is_build_row_store_from_rowkey_cg() const { return static_param_.is_build_row_store_from_rowkey_cg(); } OB_INLINE bool is_build_row_store() const { return static_param_.is_build_row_store(); } int get_cg_schema_for_merge(const int64_t idx, const ObStorageColumnGroupSchema *&cg_schema_ptr); diff --git a/src/storage/column_store/ob_co_merge_dag.cpp b/src/storage/column_store/ob_co_merge_dag.cpp index db435c348..771dbcef1 100644 --- a/src/storage/column_store/ob_co_merge_dag.cpp +++ b/src/storage/column_store/ob_co_merge_dag.cpp @@ -183,7 +183,9 @@ int ObCOMergePrepareTask::create_schedule_dag(ObCOTabletMergeCtx &ctx) ObGetMergeTablesResult result; bool schedule_minor = false; - if (OB_FAIL(ctx.check_need_schedule_minor(schedule_minor))) { + if (is_convert_co_major_merge(ctx.get_merge_type())) { + // convert co major merge only rely on major sstable + } else if (OB_FAIL(ctx.check_need_schedule_minor(schedule_minor))) { LOG_WARN("failed to check need chedule minor", K(ret), K(schedule_minor)); } else if (schedule_minor) { ObTableHandleV2 tmp_table_handle; @@ -1056,6 +1058,7 @@ int ObCOMergeDagNet::init_by_param(const ObIDagInitParam *param) merge_type_ = merge_param->merge_type_; ls_id_ = merge_param->ls_id_; tablet_id_ = merge_param->tablet_id_; + (void) set_dag_net_id(merge_param->dag_net_id_); is_inited_ = true; } return ret; @@ -1317,7 +1320,7 @@ int ObCOMergeDagNet::inner_create_row_store_dag( common::ObIArray &exe_dag_array) { int ret = OB_SUCCESS; - LOG_DEBUG("chengkong debug: build row store in this compaction", "co_major_merge_type_", + LOG_DEBUG("build row store in this compaction", "co_major_merge_type_", ObCOMajorMergePolicy::co_major_merge_type_to_str(co_merge_ctx_->static_param_.co_major_merge_type_)); dag = nullptr; diff --git a/src/storage/column_store/ob_column_oriented_sstable.h b/src/storage/column_store/ob_column_oriented_sstable.h index fb2ce5c89..ecaa3d680 100644 --- a/src/storage/column_store/ob_column_oriented_sstable.h +++ b/src/storage/column_store/ob_column_oriented_sstable.h @@ -92,6 +92,7 @@ public: uint32_t full_column_cnt_; }; + enum ObCOSSTableBaseType : int32_t { INVALID_TYPE = 0, @@ -102,10 +103,11 @@ enum ObCOSSTableBaseType : int32_t enum ObCOMajorSSTableStatus: uint8_t { INVALID_CO_MAJOR_SSTABLE_STATUS = 0, - COL_WITH_ALL, // all cg + normal cg - COL_ONLY_ALL, // all cg only (schema have all cg) - PURE_COL, // rowkey cg + normal cg - PURE_COL_ONLY_ALL, // all cg only (schema do not have all cg) + COL_WITH_ALL = 1, // all cg + normal cg + COL_ONLY_ALL = 2, // all cg only (schema have all cg) + PURE_COL = 3, // rowkey cg + normal cg + PURE_COL_ONLY_ALL = 4, // all cg only (schema do not have all cg) + COL_REPLICA_MAJOR = 5, // temp status, row store major from F/R replica for column store replica MAX_CO_MAJOR_SSTABLE_STATUS }; /* @@ -120,6 +122,8 @@ enum ObCOMajorSSTableStatus: uint8_t { +-----------------+---------------+---------------+-------+ |PURE_COL_ONLY_ALL| EACH | ALL | NO | +-----------------+---------------+---------------+-------+ + |COL_REPLICA_MAJOR| ROW STORE | ROW STORE | YES | + +-----------------+---------------+---------------+-------+ */ inline bool is_valid_co_major_sstable_status(const ObCOMajorSSTableStatus& major_sstable_status) { diff --git a/src/storage/column_store/ob_column_store_replica_util.cpp b/src/storage/column_store/ob_column_store_replica_util.cpp new file mode 100644 index 000000000..1df0e938d --- /dev/null +++ b/src/storage/column_store/ob_column_store_replica_util.cpp @@ -0,0 +1,233 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + #include "storage/tx_storage/ob_ls_service.h" + #include "storage/tablet/ob_mds_schema_helper.h" + #include "storage/column_store/ob_column_store_replica_util.h" + #define USING_LOG_PREFIX STORAGE + +namespace oceanbase +{ +namespace storage +{ + +int ObCSReplicaUtil::check_is_cs_replica( + const ObTableSchema &table_schema, + const ObTablet &tablet, + bool &is_cs_replica) +{ + int ret = OB_SUCCESS; + bool is_row_store = false; + is_cs_replica = false; + if (OB_FAIL(table_schema.get_is_row_store(is_row_store))) { + LOG_WARN("fail to get is row store", K(ret), K(table_schema)); + } else { + is_cs_replica = is_row_store + && table_schema.is_user_table() + && !tablet.is_row_store() // tablet in cs replica is not row store + && tablet.get_tablet_id().is_user_tablet(); + } + return ret; +} + +int ObCSReplicaUtil::check_local_is_cs_replica( + const share::ObLSID &ls_id, + bool &is_cs_replica) +{ + int ret = OB_SUCCESS; + is_cs_replica = false; + ObLS *ls = nullptr; + ObLSHandle ls_handle; + + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + if (OB_LS_NOT_EXIST == ret) { + // Only use weak consistency locally read for cs replica, so local ls must exist. + // If ls not exist, table param or dml param is constructed remotely, ignore it. + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get ls", K(ret), K(ls_id)); + } + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get nullptr ls", K(ret), K(ls_id), K(ls_handle)); + } else { + is_cs_replica = ls->is_cs_replica(); + } + return ret; +} + +bool ObCSReplicaUtil::check_need_convert_cs_when_migration( + const ObTablet &tablet, + const ObStorageSchema& schema_on_tablet) +{ + return schema_on_tablet.is_row_store() + && schema_on_tablet.is_user_data_table() + && tablet.is_row_store() + && tablet.get_tablet_id().is_user_tablet(); +} + +int ObCSReplicaUtil::check_has_cs_replica( + const share::ObLSID &ls_id, + bool &has_column_store_replica) +{ + int ret = OB_SUCCESS; + has_column_store_replica = false; + ObLS *ls = nullptr; + ObLSHandle ls_handle; + + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get nullptr ls", K(ret), K(ls_id), K(ls_handle)); + } else if (OB_FAIL(ls->check_has_cs_replica(has_column_store_replica))) { + LOG_WARN("failed to check ls replica set", K(ret), KPC(ls)); + } + return ret; +} + +int ObCSReplicaUtil::check_need_process_cs_replica( + const ObLS &ls, + const ObTabletID &tablet_id, + const ObStorageSchema &schema, + bool &need_process_cs_replica) +{ + int ret = OB_SUCCESS; + need_process_cs_replica = ls.is_cs_replica() + && tablet_id.is_user_tablet() + && (schema.is_row_store() || schema.is_cs_replica_compat()) + && schema.is_user_data_table(); + return ret; +} + +int ObCSReplicaUtil::check_need_wait_major_convert( + const ObLS &ls, + const ObTabletID &tablet_id, + const ObTablet &tablet, + bool &need_wait_major_convert) +{ + int ret = OB_SUCCESS; + bool need_process_cs_replica = false; + ObStorageSchema *storage_schema = nullptr; + ObArenaAllocator arena_allocator(common::ObMemAttr(MTL_ID(), "CkMjrCvrt")); + ObTabletMemberWrapper wrapper; + const ObTabletTableStore *table_store = nullptr; + const ObITable *sstable = nullptr; + need_wait_major_convert = false; + if (OB_FAIL(tablet.load_storage_schema(arena_allocator, storage_schema))) { + LOG_WARN("fail to load storage schema", K(ret), K(tablet)); + } else if (OB_ISNULL(storage_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("storage schema is nullptr", K(ret), K(tablet)); + } else if (OB_FAIL(check_need_process_cs_replica(ls, tablet_id, *storage_schema, need_process_cs_replica))) { + LOG_WARN("fail to check need process cs replica", K(ret), K(ls), K(tablet_id), KPC(storage_schema)); + } else if (need_process_cs_replica) { + if (tablet.is_row_store()) { + // tablet migration but not do co convert + need_wait_major_convert = true; + } else if (OB_FAIL(tablet.fetch_table_store(wrapper))) { + LOG_WARN("failed to fetch table store", K(ret), K(tablet_id), K(tablet)); + } else if (OB_ISNULL(table_store = wrapper.get_member())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table store is nullptr", K(ret), K(tablet_id), K(tablet)); + } else if (OB_ISNULL(sstable = table_store->get_major_sstables().get_boundary_table(true /*is_last*/))) { + if (!tablet.get_tablet_meta().table_store_flag_.with_major_sstable()) { + ret = OB_SSTABLE_NOT_EXIST; + LOG_WARN("latest major is nullptr", K(ret), K(tablet_id), K(tablet)); + } + } else { + // ddl write row store major + need_wait_major_convert = ObITable::is_row_store_major_sstable(sstable->get_key().table_type_); + } + } + ObTabletObjLoadHelper::free(arena_allocator, storage_schema); + return ret; +} + +int ObCSReplicaUtil::check_replica_set_need_process_cs_replica( + const ObLS &ls, + const ObTabletID &tablet_id, + const ObStorageSchema &schema, + bool &need_process_cs_replica) +{ + int ret = OB_SUCCESS; + need_process_cs_replica = false; + if (OB_FAIL(ls.check_has_cs_replica(need_process_cs_replica))) { + LOG_WARN("failed to check ls replica set", K(ret), K(ls)); + } else if (need_process_cs_replica) { + need_process_cs_replica = tablet_id.is_user_tablet() + && schema.is_row_store() + && schema.is_user_data_table(); + } + return ret; +} + +ObCSReplicaStorageSchemaGuard::ObCSReplicaStorageSchemaGuard() + : is_inited_(false), + schema_(nullptr) +{ +} + +ObCSReplicaStorageSchemaGuard::~ObCSReplicaStorageSchemaGuard() +{ + reset(); +} + +int ObCSReplicaStorageSchemaGuard::init( + const ObTabletHandle &tablet_handle, + compaction::ObCompactionMemoryContext &mem_ctx) +{ + int ret = OB_SUCCESS; + ObStorageSchema *schema_on_tablet = nullptr; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tablet_handle)); + } else if (OB_FAIL(tablet_handle.get_obj()->load_storage_schema(mem_ctx.get_allocator(), schema_on_tablet))) { + LOG_WARN("failed to load storage schema", K(ret), K(tablet_handle)); + } else { + schema_ = schema_on_tablet; + is_inited_ = true; + } + return ret; +} + +void ObCSReplicaStorageSchemaGuard::reset() +{ + if (IS_INIT) { + if (OB_NOT_NULL(schema_)) { + schema_->~ObStorageSchema(); + schema_ = nullptr; + } + is_inited_ = false; + } +} + +int ObCSReplicaStorageSchemaGuard::load(ObStorageSchema *&storage_schema) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(schema_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema is nullptr", K(ret)); + } else { + storage_schema = schema_; + } + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/column_store/ob_column_store_replica_util.h b/src/storage/column_store/ob_column_store_replica_util.h new file mode 100644 index 000000000..1e2b93eea --- /dev/null +++ b/src/storage/column_store/ob_column_store_replica_util.h @@ -0,0 +1,85 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_COLUMN_OB_COLUMN_STORE_REPLICA_UTIL_H_ +#define OCEANBASE_STORAGE_COLUMN_OB_COLUMN_STORE_REPLICA_UTIL_H_ + +#include "storage/ls/ob_ls.h" +#include "storage/compaction/ob_compaction_memory_context.h" +#include "share/schema/ob_table_schema.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObCSReplicaUtil +{ +public: + // for construct storage schema for merge from table_schema + static int check_is_cs_replica( + const ObTableSchema &table_schema, + const ObTablet &tablet, + bool &is_cs_replica); + // is local ls cs replica + static int check_local_is_cs_replica( + const share::ObLSID &ls_id, + bool &is_cs_replica); + // is migrated tablet need convert co major sstable + static bool check_need_convert_cs_when_migration( + const ObTablet &tablet, + const ObStorageSchema& schema_on_tablet); + static int check_has_cs_replica( + const share::ObLSID &ls_id, + bool &has_column_store_replica); + // local ls need process column store replica for specific tablet + static int check_need_process_cs_replica( + const ObLS &ls, + const ObTabletID &tablet_id, + const ObStorageSchema &schema, + bool &need_process_cs_replica); + static int check_need_wait_major_convert( + const ObLS &ls, + const ObTabletID &tablet_id, + const ObTablet &tablet, + bool &need_wait_major_convert); + // whole ls replica set need process column store replica for specific tablet + static int check_replica_set_need_process_cs_replica( + const ObLS &ls, + const ObTabletID &tablet_id, + const ObStorageSchema &schema, + bool &need_process_cs_replica); +public: + static const int64_t DEFAULT_CHECK_LS_REPLICA_LOCATION_TIMEOUT = 10 * 1000 * 1000L; // 10s +}; + +class ObCSReplicaStorageSchemaGuard +{ +public: + ObCSReplicaStorageSchemaGuard(); + ~ObCSReplicaStorageSchemaGuard(); + int init(const ObTabletHandle &tablet_handle, compaction::ObCompactionMemoryContext &mem_ctx); + void reset(); + OB_INLINE bool is_inited() const { return is_inited_; }; + int load(ObStorageSchema *&storage_schema); + TO_STRING_KV(K_(is_inited), KP_(schema)); +private: + bool is_inited_; + ObStorageSchema *schema_; + DISALLOW_COPY_AND_ASSIGN(ObCSReplicaStorageSchemaGuard); +}; + + +} // namespace storage +} // namespace oceanbase + +#endif \ No newline at end of file diff --git a/src/storage/column_store/ob_column_store_util.h b/src/storage/column_store/ob_column_store_util.h index 6eaf19a9e..91612b8dd 100644 --- a/src/storage/column_store/ob_column_store_util.h +++ b/src/storage/column_store/ob_column_store_util.h @@ -28,6 +28,7 @@ typedef int64_t ObCSRowId; const ObCSRowId OB_INVALID_CS_ROW_ID = -1; const uint32_t OB_CS_INVALID_CG_IDX = INT32_MAX; const uint32_t OB_CS_VIRTUAL_CG_IDX = INT32_MAX - 1; +const uint32_t OB_CS_COLUMN_REPLICA_ROWKEY_CG_IDX = 0; OB_INLINE bool is_virtual_cg(const uint32_t cg_idx) { diff --git a/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp b/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp index 7381db48d..8cb89ff46 100644 --- a/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp +++ b/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp @@ -39,6 +39,7 @@ ObStaticMergeParam::ObStaticMergeParam(ObTabletMergeDagParam &dag_param) is_schema_changed_(false), need_parallel_minor_merge_(true), is_tenant_major_merge_(false), + is_cs_replica_(false), is_backfill_(false), merge_level_(MICRO_BLOCK_MERGE_LEVEL), merge_reason_(ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE), @@ -62,7 +63,8 @@ ObStaticMergeParam::ObStaticMergeParam(ObTabletMergeDagParam &dag_param) report_(nullptr), snapshot_info_(), tx_id_(0), - multi_version_column_descs_() + multi_version_column_descs_(), + tablet_schema_guard_() { merge_scn_.set_max(); } @@ -76,6 +78,7 @@ void ObStaticMergeParam::reset() multi_version_column_descs_.reset(); ls_handle_.reset(); // ls_handle could release before tablet_handle tx_id_ = 0; + tablet_schema_guard_.reset(); } bool ObStaticMergeParam::is_valid() const @@ -1095,7 +1098,9 @@ int ObBasicTabletMergeCtx::get_medium_compaction_info() ObStorageSchema *storage_schema = nullptr; if (OB_FAIL(ObStorageSchemaUtil::alloc_storage_schema(mem_ctx_.get_allocator(), storage_schema))) { LOG_WARN("failed to alloc storage schema", K(ret)); - } else if (OB_FAIL(storage_schema->init(mem_ctx_.get_allocator(), medium_info->storage_schema_))) { + } else if (OB_FAIL(storage_schema->init(mem_ctx_.get_allocator(), medium_info->storage_schema_, + false /*skip_column_info*/, nullptr /*column_group_schema*/, + medium_info->storage_schema_.is_row_store() && medium_info->storage_schema_.is_user_data_table() && static_param_.is_cs_replica_))) { LOG_WARN("failed to init storage schema from current medium info", K(ret), K(medium_info)); ObStorageSchemaUtil::free_storage_schema(mem_ctx_.get_allocator(), storage_schema); } else { @@ -1112,7 +1117,9 @@ int ObBasicTabletMergeCtx::get_medium_compaction_info() static_param_.is_schema_changed_ = medium_info->is_schema_changed_; } static_param_.merge_reason_ = (ObAdaptiveMergePolicy::AdaptiveMergeReason)medium_info->medium_merge_reason_; - static_param_.co_major_merge_type_ = static_cast(medium_info->co_major_merge_type_); + if (!static_param_.is_cs_replica_) { + static_param_.co_major_merge_type_ = static_cast(medium_info->co_major_merge_type_); + } FLOG_INFO("get storage schema to merge", "param", get_dag_param(), KPC(medium_info)); } @@ -1184,7 +1191,7 @@ int ObBasicTabletMergeCtx::get_meta_compaction_info() } else if (OB_FAIL(tablet->get_schema_version_from_storage_schema(schema_version))){ LOG_WARN("failed to get schema version from tablet", KR(ret), KPC(tablet)); } else if (OB_FAIL(ObMediumCompactionScheduleFunc::get_table_schema_to_merge( - *schema_service, *tablet, schema_version, ObMediumCompactionInfo::MEDIUM_COMPAT_VERSION_V3, mem_ctx_.get_allocator(), *storage_schema))) { + *schema_service, *tablet, schema_version, ObMediumCompactionInfo::MEDIUM_COMPAT_VERSION_LATEST, mem_ctx_.get_allocator(), *storage_schema))) { if (OB_TABLE_IS_DELETED != ret) { LOG_WARN("failed to get table schema", KR(ret), KPC(this)); } @@ -1214,5 +1221,37 @@ int ObBasicTabletMergeCtx::get_meta_compaction_info() return ret; } +int ObBasicTabletMergeCtx::get_convert_compaction_info() +{ + int ret = OB_SUCCESS; + ObTablet *tablet = get_tablet(); + ObStorageSchema *schema_on_tablet = nullptr; + ObStorageSchema *schema_for_merge = nullptr; + if (OB_FAIL(static_param_.tablet_schema_guard_.load(schema_on_tablet))) { + LOG_WARN("failed to load schema on tablet", K(ret), KPC(tablet)); + } else if (OB_UNLIKELY(!is_convert_co_major_merge(get_merge_type()) || OB_ISNULL(schema_on_tablet) || !schema_on_tablet->is_row_store())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected static param", K(ret), KPC(schema_on_tablet), K_(static_param)); + } else if (OB_FAIL(ObStorageSchemaUtil::alloc_storage_schema(mem_ctx_.get_allocator(), schema_for_merge))) { + LOG_WARN("failed to alloc storage schema", K(ret)); + } else if (OB_FAIL(schema_for_merge->init(mem_ctx_.get_allocator(), *schema_on_tablet, + false /*skip_column_info*/, nullptr /*column_group_schema*/, true /*generate_cs_replica_cg_array*/))) { + LOG_WARN("failed to init storage schema for convert co major merge", K(ret), K(tablet), KPC(schema_on_tablet)); + } else { + static_param_.schema_ = schema_for_merge; + static_param_.schema_version_ = static_param_.schema_->schema_version_; + static_param_.data_version_ = DATA_CURRENT_VERSION; + static_param_.is_rebuild_column_store_ = true; + static_param_.is_schema_changed_ = true; // use MACRO_BLOCK_MERGE_LEVEL + static_param_.merge_reason_ = ObAdaptiveMergePolicy::REBUILD_COLUMN_GROUP; + FLOG_INFO("[CS-Replica] get storage schema to convert co merge", "param", get_dag_param(), KPC_(static_param_.schema)); + } + + if (OB_FAIL(ret) && OB_NOT_NULL(schema_for_merge)) { + ObStorageSchemaUtil::free_storage_schema(mem_ctx_.get_allocator(), schema_for_merge); + } + return ret; +} + } // namespace compaction } // namespace oceanbase diff --git a/src/storage/compaction/ob_basic_tablet_merge_ctx.h b/src/storage/compaction/ob_basic_tablet_merge_ctx.h index 822462f77..affaeb06b 100644 --- a/src/storage/compaction/ob_basic_tablet_merge_ctx.h +++ b/src/storage/compaction/ob_basic_tablet_merge_ctx.h @@ -11,6 +11,7 @@ #define OB_STORAGE_COMPACTION_BASIC_TABLET_MERGE_CTX_H_ #include "storage/compaction/ob_tablet_merge_info.h" #include "storage/compaction/ob_partition_parallel_merge_ctx.h" +#include "storage/column_store/ob_column_store_replica_util.h" #include "storage/compaction/ob_progressive_merge_helper.h" namespace oceanbase { @@ -60,9 +61,9 @@ public: "merge_reason", ObAdaptiveMergePolicy::merge_reason_to_str(merge_reason_), "co_major_merge_type", ObCOMajorMergePolicy::co_major_merge_type_to_str(co_major_merge_type_), K_(sstable_logic_seq), K_(tables_handle), K_(is_rebuild_column_store), K_(is_schema_changed), K_(is_tenant_major_merge), - K_(read_base_version), K_(merge_scn), K_(need_parallel_minor_merge), + K_(is_cs_replica), K_(read_base_version), K_(merge_scn), K_(need_parallel_minor_merge), K_(schema_version), KP_(schema), "multi_version_column_descs_cnt", multi_version_column_descs_.count(), - K_(ls_handle), K_(snapshot_info), KP_(report), K_(is_backfill)); + K_(ls_handle), K_(snapshot_info), KP_(report), K_(is_backfill), K_(tablet_schema_guard)); ObTabletMergeDagParam &dag_param_; bool is_full_merge_; // full merge or increment merge @@ -70,6 +71,7 @@ public: bool is_schema_changed_; bool need_parallel_minor_merge_; bool is_tenant_major_merge_; + bool is_cs_replica_; bool is_backfill_; ObMergeLevel merge_level_; ObAdaptiveMergePolicy::AdaptiveMergeReason merge_reason_; @@ -94,6 +96,7 @@ public: ObStorageSnapshotInfo snapshot_info_; int64_t tx_id_; common::ObSEArray multi_version_column_descs_; + storage::ObCSReplicaStorageSchemaGuard tablet_schema_guard_; // original storage schema on tablet, used only in cs replcia DISALLOW_COPY_AND_ASSIGN(ObStaticMergeParam); }; @@ -205,6 +208,7 @@ public: STATIC_PARAM_FUNC(bool, is_tenant_major_merge); STATIC_PARAM_FUNC(bool, is_full_merge); STATIC_PARAM_FUNC(bool, need_parallel_minor_merge); + STATIC_PARAM_FUNC(bool, is_cs_replica); STATIC_PARAM_FUNC(int64_t, read_base_version); STATIC_PARAM_FUNC(int64_t, ls_rebuild_seq); STATIC_PARAM_FUNC(const storage::ObTablesHandleArray &, tables_handle); @@ -266,6 +270,7 @@ protected: int get_medium_compaction_info(); // for major int swap_tablet(ObGetMergeTablesResult &get_merge_table_result); // for major int get_meta_compaction_info(); // for meta major + int get_convert_compaction_info(); // for convert co major merge static const int64_t LARGE_VOLUME_DATA_ROW_COUNT_THREASHOLD = 1000L * 1000L; // 100w static const int64_t LARGE_VOLUME_DATA_MACRO_COUNT_THREASHOLD = 300L; public: diff --git a/src/storage/compaction/ob_compaction_util.cpp b/src/storage/compaction/ob_compaction_util.cpp index d28aba42e..bace8203c 100644 --- a/src/storage/compaction/ob_compaction_util.cpp +++ b/src/storage/compaction/ob_compaction_util.cpp @@ -29,6 +29,7 @@ const static char * ObMergeTypeStr[] = { "BACKFILL_TX_MERGE", "MDS_MINI_MERGE", "MDS_MINOR_MERGE", + "CONVERT_CO_MAJOR_MERGE", "EMPTY_MERGE_TYPE" }; diff --git a/src/storage/compaction/ob_compaction_util.h b/src/storage/compaction/ob_compaction_util.h index ec073eb9d..0435537f1 100644 --- a/src/storage/compaction/ob_compaction_util.h +++ b/src/storage/compaction/ob_compaction_util.h @@ -17,7 +17,7 @@ namespace oceanbase { namespace compaction { -enum ObMergeType : uint8_t +enum ObMergeType { INVALID_MERGE_TYPE = 0, MINOR_MERGE, // minor merge, compaction several mini sstable into one larger mini sstable @@ -30,6 +30,7 @@ enum ObMergeType : uint8_t BACKFILL_TX_MERGE, MDS_MINI_MERGE, MDS_MINOR_MERGE, + CONVERT_CO_MAJOR_MERGE, // convert row store major into columnar store cg sstables // add new merge type here // fix merge_type_to_str & ObPartitionMergePolicy::get_merge_tables MERGE_TYPE_MAX @@ -48,9 +49,13 @@ inline bool is_medium_merge(const ObMergeType &merge_type) { return MEDIUM_MERGE == merge_type; } +inline bool is_convert_co_major_merge(const ObMergeType &merge_type) +{ + return CONVERT_CO_MAJOR_MERGE == merge_type; +} inline bool is_major_merge_type(const ObMergeType &merge_type) { - return is_medium_merge(merge_type) || is_major_merge(merge_type); + return is_convert_co_major_merge(merge_type) || is_medium_merge(merge_type) || is_major_merge(merge_type); } inline bool is_mini_merge(const ObMergeType &merge_type) { diff --git a/src/storage/compaction/ob_medium_compaction_func.cpp b/src/storage/compaction/ob_medium_compaction_func.cpp index 5017fde86..e2a44486f 100644 --- a/src/storage/compaction/ob_medium_compaction_func.cpp +++ b/src/storage/compaction/ob_medium_compaction_func.cpp @@ -26,6 +26,7 @@ #include "storage/ob_partition_range_spliter.h" #include "storage/compaction/ob_compaction_diagnose.h" #include "src/storage/column_store/ob_column_oriented_sstable.h" +#include "storage/column_store/ob_column_store_replica_util.h" #include "storage/tablet/ob_tablet_medium_info_reader.h" namespace oceanbase @@ -865,8 +866,7 @@ int ObMediumCompactionScheduleFunc::init_co_major_merge_type( LOG_WARN("failed to decide co major merge type", K(ret)); } else { medium_info.co_major_merge_type_ = major_merge_type; - LOG_DEBUG("chengkong debug: success to get ", - "major_merge_type", ObCOMajorMergePolicy::co_major_merge_type_to_str(major_merge_type)); + LOG_DEBUG("success to get ", "major_merge_type", ObCOMajorMergePolicy::co_major_merge_type_to_str(major_merge_type)); } return ret; @@ -1058,7 +1058,7 @@ int ObMediumCompactionScheduleFunc::get_table_schema_to_merge( } } #endif - + bool is_cs_replica = false; int64_t storage_schema_version = ObStorageSchema::STORAGE_SCHEMA_VERSION_LATEST; if (medium_compat_version < ObMediumCompactionInfo::MEDIUM_COMPAT_VERSION_V2) { @@ -1067,10 +1067,10 @@ int ObMediumCompactionScheduleFunc::get_table_schema_to_merge( storage_schema_version = ObStorageSchema::STORAGE_SCHEMA_VERSION_V2; } // for old version medium info, need generate old version schema - if (FAILEDx(storage_schema.init( - allocator, *table_schema, tablet.get_tablet_meta().compat_mode_, false/*skip_column_info*/, - storage_schema_version))) { - LOG_WARN("failed to init storage schema", K(ret), K(schema_version)); + if (FAILEDx(ObCSReplicaUtil::check_is_cs_replica(*table_schema, tablet, is_cs_replica))) { + LOG_WARN("fail to get is row store", K(ret), K(table_id), KPC(table_schema)); + } else if (OB_FAIL(storage_schema.init(allocator, *table_schema, tablet.get_tablet_meta().compat_mode_, false/*skip_column_info*/, storage_schema_version, is_cs_replica))) { + LOG_WARN("failed to init storage schema", K(ret), K(schema_version), K(tablet), KPC(table_schema)); } else { LOG_INFO("get schema to merge", K(tablet_id), K(table_id), K(schema_version), K(save_schema_version), K(storage_schema), K(*reinterpret_cast(table_schema))); @@ -1224,105 +1224,135 @@ int ObMediumCompactionScheduleFunc::init_tablet_filters(share::ObTabletReplicaFi return ret; } -int ObMediumCompactionScheduleFunc::check_medium_checksum( + int ObMediumCompactionScheduleFunc::check_tablet_checksum( const ObIArray &checksum_items, + const ObLSColumnReplicaCache &ls_cs_replica_cache, + const int64_t start_idx, + const int64_t end_idx, + const bool is_medium_checker, ObIArray &error_pairs, - int64_t &item_idx, int &check_ret) { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; - check_ret = OB_SUCCESS; - int64_t items_cnt = checksum_items.count(); - ObTabletReplicaChecksumItem prev_item; - ObTabletReplicaChecksumItem curr_item; - while (OB_SUCC(ret) && item_idx < items_cnt) { - curr_item.reset(); - if (OB_FAIL(curr_item.assign(checksum_items.at(item_idx)))) { - LOG_WARN("fail to assign tablet replica checksum item", KR(ret), K(item_idx), "item", checksum_items.at(item_idx)); - } else { - if (prev_item.is_key_valid()) { - if (curr_item.is_same_tablet(prev_item)) { // same tablet - if (OB_TMP_FAIL(curr_item.verify_checksum(prev_item))) { - LOG_DBA_ERROR(OB_CHECKSUM_ERROR, "msg", "checksum error in tablet replica checksum", KR(tmp_ret), - K(curr_item), K(prev_item)); - if (OB_SUCCESS == check_ret) { - if (OB_TMP_FAIL(error_pairs.push_back(ObTabletLSPair(curr_item.tablet_id_, curr_item.ls_id_)))) { - LOG_WARN("fail to push back error pair", K(tmp_ret), "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); - } - check_ret = OB_CHECKSUM_ERROR; + if (start_idx >= end_idx) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid idx range for check tablet checksums", K(ret), K(start_idx), K(end_idx)); + } else if (start_idx + 1 == end_idx) { + } else { + const ObTabletReplicaChecksumItem *prev_item = nullptr; + ObTabletDataChecksumChecker data_checksum_checker; + ObLSID prev_error_ls_id; + for (int64_t idx = start_idx; OB_SUCC(ret) && idx < end_idx; ++idx) { + const ObTabletReplicaChecksumItem &curr_item = checksum_items.at(idx); + bool is_cs_replica = false; + ObLSReplicaUniItem ls_item(curr_item.ls_id_, curr_item.server_); + if (OB_FAIL(ls_cs_replica_cache.check_is_cs_replica(ls_item, is_cs_replica))) { + LOG_WARN("fail to check is column replica", K(ret), K(ls_item), K(ls_cs_replica_cache)); + } else if (OB_ISNULL(prev_item)) { + } else if (!curr_item.is_same_tablet(*prev_item)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not continuous same tablet id", K(ret), K(curr_item), KPC(prev_item)); + } else if (OB_TMP_FAIL(data_checksum_checker.check_data_checksum(curr_item, is_cs_replica)) + || OB_TMP_FAIL(curr_item.verify_column_checksum(*prev_item))) { + if (OB_CHECKSUM_ERROR == tmp_ret) { + LOG_DBA_ERROR(OB_CHECKSUM_ERROR, "msg", "checksum error in tablet replica checksum", KR(tmp_ret), + K(curr_item), KPC(prev_item), K(is_cs_replica), K(data_checksum_checker)); + check_ret = OB_CHECKSUM_ERROR; + if (curr_item.ls_id_ != prev_error_ls_id) { + prev_error_ls_id = curr_item.ls_id_; + if (OB_TMP_FAIL(error_pairs.push_back(ObTabletLSPair(curr_item.tablet_id_, curr_item.ls_id_)))) { + LOG_WARN("fail to push back error pair", K(tmp_ret), "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); } } -#ifdef ERRSIM - if (OB_SUCC(ret)) { - ret = OB_E(EventTable::EN_MEDIUM_REPLICA_CHECKSUM_ERROR) OB_SUCCESS; - if (OB_FAIL(ret)) { - STORAGE_LOG(INFO, "ERRSIM EN_MEDIUM_REPLICA_CHECKSUM_ERROR", K(ret), - "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); - if (OB_TMP_FAIL(error_pairs.push_back(ObTabletLSPair(curr_item.tablet_id_, curr_item.ls_id_)))) { - LOG_WARN("fail to push back error pair", K(tmp_ret), "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); - } - check_ret = OB_CHECKSUM_ERROR; - } - } -#endif } else { - break; + ret = tmp_ret; + LOG_WARN("unexpected error in tablet replica checksum", KR(ret), K(curr_item), KPC(prev_item)); } - } else if (OB_FAIL(prev_item.assign(curr_item))) { - LOG_WARN("fail to assign tablet replica checksum item", KR(ret), K(item_idx), K(curr_item)); +#ifdef ERRSIM + if (is_medium_checker && OB_SUCC(ret)) { + ret = OB_E(EventTable::EN_MEDIUM_REPLICA_CHECKSUM_ERROR) OB_SUCCESS; + if (OB_FAIL(ret)) { + STORAGE_LOG(INFO, "ERRSIM EN_MEDIUM_REPLICA_CHECKSUM_ERROR", K(ret), "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); + if (OB_TMP_FAIL(error_pairs.push_back(ObTabletLSPair(curr_item.tablet_id_, curr_item.ls_id_)))) { + LOG_WARN("fail to push back error pair", K(tmp_ret), "tablet_id", curr_item.tablet_id_, "ls_id", curr_item.ls_id_); + } + check_ret = OB_CHECKSUM_ERROR; + } + } +#endif } - ++item_idx; + prev_item = &curr_item; } - } // end for while + } return ret; } -int ObMediumCompactionScheduleFunc::batch_check_medium_checksum( - const ObIArray &checksum_items) +int ObMediumCompactionScheduleFunc::check_replica_checksum_items( + const ObIArray &checksum_items, + const ObLSColumnReplicaCache &ls_cs_replica_cache, + const bool is_medium_checker) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - int check_ret = OB_SUCCESS; - int64_t pair_idx = 0; - int64_t item_idx = 0; - int64_t items_cnt = checksum_items.count(); - int64_t affected_rows = 0; - ObSEArray error_pairs; - while (OB_SUCC(ret) && item_idx < items_cnt) { - const ObTabletReplicaChecksumItem &tmp_item = checksum_items.at(item_idx); - const ObTabletID &tablet_id = tmp_item.tablet_id_; - const ObLSID &ls_id = tmp_item.ls_id_; - if (OB_FAIL(check_medium_checksum(checksum_items, error_pairs, item_idx, check_ret))) { - LOG_WARN("failed to check medium checksum", K(ret), K(item_idx)); - } else if (OB_SUCCESS == check_ret) { - ObLSHandle ls_handle; - ObTabletHandle unused_handle; - if (OB_TMP_FAIL((MTL(storage::ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::COMPACT_MODE)))) { - if (OB_LS_NOT_EXIST == tmp_ret) { - LOG_TRACE("ls not exist", K(tmp_ret), K(ls_id)); - } else { - LOG_WARN("failed to get ls", K(tmp_ret), K(ls_id)); - } - } else if (OB_TMP_FAIL(ls_handle.get_ls()->update_medium_compaction_info(tablet_id, unused_handle))) { - LOG_WARN("failed to update medium compaction info", K(tmp_ret), K(ls_id), K(tablet_id)); + if (checksum_items.empty()) { + } else { + int tmp_ret = OB_SUCCESS; + int check_ret = OB_SUCCESS; + int64_t affected_rows = 0; + const int64_t count = checksum_items.count(); + int64_t start_idx = 0; + int64_t end_idx = 0; + ObTabletID tablet_id = checksum_items.at(0).tablet_id_; + ObLSID ls_id = checksum_items.at(0).ls_id_; + ObSEArray error_pairs; + error_pairs.set_attr(ObMemAttr(MTL_ID(), "MedCkmErrs")); + + // [start_idx, end_idx share same tablet_id + while (OB_SUCC(ret) && end_idx < count) { + while (end_idx < count && tablet_id == checksum_items.at(end_idx).tablet_id_) { + end_idx++; + } + if (OB_FAIL(check_tablet_checksum(checksum_items, ls_cs_replica_cache, start_idx, end_idx, true /*is_medium_checker*/, error_pairs, check_ret))) { } else { - FLOG_INFO("finish check medium compaction info", K(tmp_ret), K(ls_id), K(tablet_id)); + // update medium compaction info + if (is_medium_checker && OB_SUCCESS == check_ret) { + ObLSHandle ls_handle; + ObTabletHandle unused_handle; + if (OB_TMP_FAIL((MTL(storage::ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::COMPACT_MODE)))) { + if (OB_LS_NOT_EXIST == tmp_ret) { + LOG_TRACE("ls not exist", K(tmp_ret), K(ls_id)); + } else { + LOG_WARN("failed to get ls", K(tmp_ret), K(ls_id)); + } + } else if (OB_TMP_FAIL(ls_handle.get_ls()->update_medium_compaction_info(tablet_id, unused_handle))) { + LOG_WARN("failed to update medium compaction info", K(tmp_ret), K(ls_id), K(tablet_id)); + } else { + FLOG_INFO("finish check medium compaction info", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + + // refresh sliding windows + if (OB_SUCC(ret) && end_idx < count) { + start_idx = end_idx; + tablet_id = checksum_items.at(end_idx).tablet_id_; + ls_id = checksum_items.at(end_idx).ls_id_; + check_ret = OB_SUCCESS; + } + } + } // end while + + if (!error_pairs.empty()) { + if (OB_TMP_FAIL(ObTabletMetaTableCompactionOperator::batch_set_info_status(MTL_ID(), error_pairs, affected_rows))) { + LOG_WARN("fail to batch set info status", KR(tmp_ret)); + } else { + LOG_INFO("succ to batch set info status", K(ret), K(affected_rows), K(error_pairs)); } } - ++pair_idx; - check_ret = OB_SUCCESS; - } // end for while - if (!error_pairs.empty()) { - if (OB_TMP_FAIL(ObTabletMetaTableCompactionOperator::batch_set_info_status(MTL_ID(), error_pairs, affected_rows))) { - LOG_WARN("fail to batch set info status", KR(tmp_ret)); - } else { - LOG_INFO("succ to batch set info status", K(ret), K(affected_rows), K(error_pairs)); + + if (is_medium_checker && affected_rows > 0) { + MTL(ObTenantTabletScheduler*)->update_error_tablet_cnt(affected_rows); } } - if (affected_rows > 0) { - MTL(ObTenantTabletScheduler*)->update_error_tablet_cnt(affected_rows); - } return ret; } @@ -1331,7 +1361,8 @@ int ObMediumCompactionScheduleFunc::batch_check_medium_finish( const hash::ObHashMap &ls_info_map, ObIArray &finish_tablet_ls_infos, const ObIArray &tablet_ls_infos, - ObCompactionTimeGuard &time_guard) + ObCompactionTimeGuard &time_guard, + const share::ObLSColumnReplicaCache &ls_cs_replica_cache) { int ret = OB_SUCCESS; if (tablet_ls_infos.empty()) { @@ -1348,8 +1379,8 @@ int ObMediumCompactionScheduleFunc::batch_check_medium_finish( MTL_ID(), finish_tablet_ls_infos, checksum_items))) { LOG_WARN("failed to get tablet checksum", K(ret)); } else if (FALSE_IT(time_guard.click(ObCompactionScheduleTimeGuard::SEARCH_CHECKSUM))) { - } else if (OB_FAIL(batch_check_medium_checksum(checksum_items))) { - LOG_WARN("failed to check medium tablets checksum", K(ret)); + } else if (OB_FAIL(check_replica_checksum_items(checksum_items, ls_cs_replica_cache, true /*is_medium_checker*/))) { + LOG_WARN("fail to check replica checksum items for medium checker", K(ret)); } else if (FALSE_IT(time_guard.click(ObCompactionScheduleTimeGuard::CHECK_CHECKSUM))) { } } diff --git a/src/storage/compaction/ob_medium_compaction_func.h b/src/storage/compaction/ob_medium_compaction_func.h index 2b210bd83..607354528 100644 --- a/src/storage/compaction/ob_medium_compaction_func.h +++ b/src/storage/compaction/ob_medium_compaction_func.h @@ -14,6 +14,7 @@ #include "storage/compaction/ob_partition_merge_policy.h" #include "share/tablet/ob_tablet_filter.h" #include "share/ob_tablet_meta_table_compaction_operator.h" +#include "share/ob_tablet_replica_checksum_operator.h" #include "storage/tablet/ob_tablet.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" #include "storage/compaction/ob_tenant_medium_checker.h" @@ -86,7 +87,12 @@ public: const hash::ObHashMap &ls_info_map, ObIArray &finish_tablet_ls_infos, const ObIArray &tablet_ls_infos, - ObCompactionTimeGuard &time_guard); + ObCompactionTimeGuard &time_guard, + const share::ObLSColumnReplicaCache &ls_cs_replica_cache); + static int check_replica_checksum_items( + const ObIArray &checksum_items, + const ObLSColumnReplicaCache &ls_cs_replica_cache, + const bool is_medium_checker); int schedule_next_medium_for_leader( const int64_t major_snapshot, @@ -132,13 +138,14 @@ protected: const hash::ObHashMap &ls_info_map, bool &merge_finish); static int init_tablet_filters(share::ObTabletReplicaFilterHolder &filters); - static int check_medium_checksum( + static int check_tablet_checksum( const ObIArray &checksum_items, + const ObLSColumnReplicaCache &ls_cs_replica_cache, + const int64_t start_idx, + const int64_t end_idx, + const bool is_medium_checker, ObIArray &error_pairs, - int64_t &item_idx, int &check_ret); - static int batch_check_medium_checksum( - const ObIArray &checksum_items); int choose_medium_snapshot( const int64_t max_sync_medium_scn, ObMediumCompactionInfo &medium_info, diff --git a/src/storage/compaction/ob_partition_merge_policy.cpp b/src/storage/compaction/ob_partition_merge_policy.cpp index 0d765b185..4cb2f19f9 100644 --- a/src/storage/compaction/ob_partition_merge_policy.cpp +++ b/src/storage/compaction/ob_partition_merge_policy.cpp @@ -63,7 +63,8 @@ ObPartitionMergePolicy::GetMergeTables ObPartitionMergePolicy::get_merge_tables[ ObPartitionMergePolicy::not_support_merge_type, ObPartitionMergePolicy::not_support_merge_type, ObPartitionMergePolicy::not_support_merge_type, - ObPartitionMergePolicy::get_mds_merge_tables + ObPartitionMergePolicy::get_mds_merge_tables, + ObPartitionMergePolicy::get_convert_co_major_merge_tables }; @@ -232,6 +233,44 @@ int ObPartitionMergePolicy::get_mds_merge_tables( return ret; } +int ObPartitionMergePolicy::get_convert_co_major_merge_tables( + const storage::ObGetMergeTablesParam ¶m, + storage::ObLS &ls, + const storage::ObTablet &tablet, + storage::ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + ObSSTable *base_table = nullptr; + result.reset(); + result.merge_version_ = param.merge_version_; + ObTabletMemberWrapper table_store_wrapper; + if (OB_FAIL(tablet.fetch_table_store(table_store_wrapper))) { + LOG_WARN("fail to fetch table store", K(ret)); + } else if (OB_UNLIKELY(!table_store_wrapper.get_member()->is_valid() + || !param.is_valid() + || !is_convert_co_major_merge(param.merge_type_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid argument", K(ret), KPC(table_store_wrapper.get_member()), K(param)); + } else if (OB_ISNULL(base_table = static_cast( + table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(true/*last*/)))) { + ret = OB_ENTRY_NOT_EXIST; + LOG_ERROR("major sstable not exist", K(ret), KPC(table_store_wrapper.get_member())); + } else if (OB_FAIL(result.handle_.add_sstable(base_table, table_store_wrapper.get_meta_handle()))) { + LOG_WARN("failed to add base_table to result", K(ret)); + } else if (OB_UNLIKELY(base_table->get_snapshot_version() != param.merge_version_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("convert co major merge should not change major snapshot version", K(ret), KPC(base_table), K(param), K(tablet)); + } else { + result.version_range_.base_version_ = 0; + result.version_range_.multi_version_start_ = tablet.get_multi_version_start(); + result.version_range_.snapshot_version_ = param.merge_version_; + if (OB_FAIL(get_multi_version_start(param.merge_type_, ls, tablet, result.version_range_, result.snapshot_info_))) { + LOG_WARN("failed to get multi version_start", K(ret)); + } + } + return ret; +} + int ObPartitionMergePolicy::get_result_by_snapshot( ObTablet &tablet, const int64_t snapshot, diff --git a/src/storage/compaction/ob_partition_merge_policy.h b/src/storage/compaction/ob_partition_merge_policy.h index 1848109d7..e2baca639 100644 --- a/src/storage/compaction/ob_partition_merge_policy.h +++ b/src/storage/compaction/ob_partition_merge_policy.h @@ -68,13 +68,16 @@ public: storage::ObLS &ls, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); - static int get_mds_merge_tables( const storage::ObGetMergeTablesParam ¶m, storage::ObLS &ls, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); - + static int get_convert_co_major_merge_tables( + const storage::ObGetMergeTablesParam ¶m, + storage::ObLS &ls, + const storage::ObTablet &tablet, + storage::ObGetMergeTablesResult &result); static int not_support_merge_type( const storage::ObGetMergeTablesParam ¶m, storage::ObLS &ls, diff --git a/src/storage/compaction/ob_tenant_medium_checker.cpp b/src/storage/compaction/ob_tenant_medium_checker.cpp index ca9749e23..3b05db2ff 100644 --- a/src/storage/compaction/ob_tenant_medium_checker.cpp +++ b/src/storage/compaction/ob_tenant_medium_checker.cpp @@ -122,11 +122,10 @@ int ObTenantMediumChecker::refresh_ls_status() int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; lib::ObMutexGuard guard(lock_); - if (OB_TMP_FAIL(ls_locality_cache_.refresh_ls_locality(true/*force_refresh*/))) { - LOG_WARN("failed to refresh ls locality", K(tmp_ret)); - } common::ObSEArray ls_ids; - if (OB_FAIL(MTL(ObLSService *)->get_ls_ids(ls_ids))) { + if (OB_FAIL(ls_locality_cache_.refresh_ls_locality(true/*force_refresh*/))) { + LOG_WARN("failed to refresh ls locality", K(ret)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls_ids(ls_ids))) { LOG_WARN("failed to get all ls id", K(ret)); } else { ls_info_map_.reuse(); @@ -155,19 +154,18 @@ int ObTenantMediumChecker::refresh_ls_status() int ObTenantMediumChecker::check_ls_status(const share::ObLSID &ls_id, bool &is_leader, bool need_check) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; is_leader = false; if (need_check && CHECK_LS_LOCALITY_INTERVAL < ObTimeUtility::fast_current_time() - last_check_timestamp_) { - if (OB_TMP_FAIL(refresh_ls_status())) { - LOG_WARN("failed to refresh ls locality", K(tmp_ret)); + if (OB_FAIL(refresh_ls_status())) { + LOG_WARN("failed to refresh ls locality", K(ret)); } else { last_check_timestamp_ = ObTimeUtility::fast_current_time(); } } lib::ObMutexGuard guard(lock_); ObLSInfo ls_info; - if (OB_FAIL(ls_info_map_.get_refactored(ls_id, ls_info))) { + if (FAILEDx(ls_info_map_.get_refactored(ls_id, ls_info))) { if (OB_HASH_NOT_EXIST != ret) { LOG_WARN("fail to get map", K(ret), K(ls_id)); } else { @@ -219,16 +217,12 @@ int ObTenantMediumChecker::check_medium_finish_schedule() if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObTenantMediumChecker is not inited", K(ret)); + } else if (OB_FAIL(ls_locality_cache_.refresh_ls_locality(false /*force_refresh*/))) { + LOG_WARN("failed to refresh ls locality"); + ADD_COMMON_SUSPECT_INFO(MEDIUM_MERGE, share::ObDiagnoseTabletType::TYPE_MEDIUM_MERGE, + SUSPECT_FAILED_TO_REFRESH_LS_LOCALITY, ret); } else { - // refresh ls locality cache - if (OB_TMP_FAIL(ls_locality_cache_.refresh_ls_locality(false /*force_refresh*/))) { - LOG_WARN("failed to refresh ls locality", K(tmp_ret)); - ADD_COMMON_SUSPECT_INFO(MEDIUM_MERGE, share::ObDiagnoseTabletType::TYPE_MEDIUM_MERGE, - SUSPECT_FAILED_TO_REFRESH_LS_LOCALITY, tmp_ret); - } else { - DEL_SUSPECT_INFO(MEDIUM_MERGE, UNKNOW_LS_ID, UNKNOW_TABLET_ID, ObDiagnoseTabletType::TYPE_MEDIUM_MERGE); - } - + DEL_SUSPECT_INFO(MEDIUM_MERGE, UNKNOW_LS_ID, UNKNOW_TABLET_ID, ObDiagnoseTabletType::TYPE_MEDIUM_MERGE); TabletLSArray tablet_ls_infos; tablet_ls_infos.set_attr(ObMemAttr(MTL_ID(), "CheckInfos")); TabletLSArray batch_tablet_ls_infos; @@ -319,7 +313,7 @@ int ObTenantMediumChecker::check_medium_finish( ObCompactionScheduleTimeGuard time_guard; stat.filter_cnt_ += (end_idx - start_idx - check_tablet_ls_infos.count()); if (FAILEDx(ObMediumCompactionScheduleFunc::batch_check_medium_finish( - ls_info_map_, finish_tablet_ls_infos, check_tablet_ls_infos, time_guard))) { + ls_info_map_, finish_tablet_ls_infos, check_tablet_ls_infos, time_guard, ls_locality_cache_.get_cs_replica_cache()))) { LOG_WARN("failed to batch check medium finish", K(ret), K(tablet_ls_infos.count()), K(check_tablet_ls_infos.count()), K(tablet_ls_infos), K(check_tablet_ls_infos)); stat.fail_cnt_ += check_tablet_ls_infos.count(); diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index 1a34973f4..24d518f84 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -1037,10 +1037,11 @@ int ObTenantTabletScheduler::schedule_merge_dag( const ObLSID &ls_id, const storage::ObTablet &tablet, const ObMergeType merge_type, - const int64_t &merge_snapshot_version) + const int64_t &merge_snapshot_version, + const ObDagId *dag_net_id /*= nullptr*/) { int ret = OB_SUCCESS; - if (is_major_merge_type(merge_type) && !tablet.is_row_store()) { + if (is_major_merge_type(merge_type) && (!tablet.is_row_store() || is_convert_co_major_merge(merge_type))) { ObCOMergeDagParam param; param.ls_id_ = ls_id; param.tablet_id_ = tablet.get_tablet_meta().tablet_id_; @@ -1048,6 +1049,9 @@ int ObTenantTabletScheduler::schedule_merge_dag( param.merge_version_ = merge_snapshot_version; param.compat_mode_ = tablet.get_tablet_meta().compat_mode_; param.transfer_seq_ = tablet.get_tablet_meta().transfer_info_.transfer_seq_; + if (OB_UNLIKELY(nullptr != dag_net_id)) { + param.dag_net_id_ = *dag_net_id; + } if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_tablet_co_merge_dag_net(param))) { if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { LOG_WARN("failed to schedule tablet merge dag", K(ret)); @@ -1066,6 +1070,27 @@ int ObTenantTabletScheduler::schedule_merge_dag( LOG_WARN("failed to schedule tablet merge dag", K(ret)); } } + FLOG_INFO("schedule merge dag", K(ret), K(param), K(tablet.is_row_store())); + } + return ret; +} + +int ObTenantTabletScheduler::schedule_convert_co_merge_dag_net( + const ObLSID &ls_id, + const ObTablet &tablet, + const int64_t retry_times, + const ObDagId& curr_dag_net_id) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(compaction::ObTenantTabletScheduler::schedule_merge_dag( + ls_id, tablet, compaction::ObMergeType::CONVERT_CO_MAJOR_MERGE, tablet.get_last_major_snapshot_version(), &curr_dag_net_id))) { + if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { + ret = tmp_ret; + LOG_WARN("failed to schedule co merge dag net for cs replica", K(ret), K(ls_id), "tablet_id", tablet.get_tablet_id()); + } + } else { + LOG_INFO("[CS-Replica] schedule COMergeDagNet to convert row store to column store", K(retry_times), K(ls_id), "tablet_id", tablet.get_tablet_id(), K(curr_dag_net_id)); } return ret; } diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.h b/src/storage/compaction/ob_tenant_tablet_scheduler.h index 30db53926..5764bc50e 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.h +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.h @@ -257,7 +257,13 @@ public: const share::ObLSID &ls_id, const storage::ObTablet &tablet, const ObMergeType merge_type, - const int64_t &merge_snapshot_version); + const int64_t &merge_snapshot_version, + const ObDagId *dag_net_id = nullptr); + static int schedule_convert_co_merge_dag_net( + const ObLSID &ls_id, + const ObTablet &tablet, + const int64_t retry_times, + const ObDagId& curr_dag_net_id); static int schedule_tablet_ddl_major_merge( ObLSHandle &ls_handle, ObTabletHandle &tablet_handle); diff --git a/src/storage/ddl/ob_complement_data_task.cpp b/src/storage/ddl/ob_complement_data_task.cpp index 7ce784563..617012098 100644 --- a/src/storage/ddl/ob_complement_data_task.cpp +++ b/src/storage/ddl/ob_complement_data_task.cpp @@ -2059,7 +2059,7 @@ int ObLocalScan::construct_access_param( } else if (!has_all_cg) { for (int64_t i = 0; i < col_params_.count(); i++) { int32_t tmp_cg_idx = -1; - if (OB_FAIL(data_table_schema.get_column_group_index(*col_params_.at(i), tmp_cg_idx))) { + if (OB_FAIL(data_table_schema.get_column_group_index(*col_params_.at(i), false /*need_calculate_cg_idx*/, tmp_cg_idx))) { LOG_WARN("fail to get column group idx", K(ret), K(data_table_schema)); } else if (OB_FAIL(cg_idxs.push_back(tmp_cg_idx))) { LOG_WARN("fail to push back cg idx", K(ret)); diff --git a/src/storage/ddl/ob_ddl_clog.cpp b/src/storage/ddl/ob_ddl_clog.cpp index fecaeda8f..8927f155b 100644 --- a/src/storage/ddl/ob_ddl_clog.cpp +++ b/src/storage/ddl/ob_ddl_clog.cpp @@ -234,6 +234,8 @@ int ObDDLMacroBlockClogCb::on_success() if (is_data_buffer_freed_) { LOG_INFO("data buffer is freed, do not need to callback"); } else if (OB_FAIL(ret)) { + } else if (redo_info_.with_cs_replica_ && redo_info_.table_key_.is_column_store_sstable()) { + LOG_TRACE("[CS-Replica] skip replay cs replica redo clog in leader", K(ret), K_(redo_info)); } else if (OB_FAIL(macro_block.block_handle_.set_block_id(macro_block_id_))) { LOG_WARN("set macro block id failed", K(ret), K(macro_block_id_)); } else { @@ -406,7 +408,8 @@ int ObDDLStartLog::init( const uint64_t data_format_version, const int64_t execution_id, const ObDirectLoadType direct_load_type, - const ObTabletID &lob_meta_tablet_id) + const ObTabletID &lob_meta_tablet_id, + const bool with_cs_replica) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!table_key.is_valid() || execution_id < 0 || data_format_version <= 0 || !is_valid_direct_load(direct_load_type) @@ -419,7 +422,7 @@ int ObDDLStartLog::init( execution_id_ = execution_id; direct_load_type_ = direct_load_type; lob_meta_tablet_id_ = lob_meta_tablet_id; - with_cs_replica_ = false; // TODO(chengkong): placeholder for column store replica feature + with_cs_replica_ = with_cs_replica; } return ret; } diff --git a/src/storage/ddl/ob_ddl_clog.h b/src/storage/ddl/ob_ddl_clog.h index b786fd5bc..0e685fb38 100644 --- a/src/storage/ddl/ob_ddl_clog.h +++ b/src/storage/ddl/ob_ddl_clog.h @@ -196,13 +196,15 @@ public: const uint64_t data_format_version, const int64_t execution_id, const ObDirectLoadType direct_load_type, - const ObTabletID &lob_meta_tablet_id); + const ObTabletID &lob_meta_tablet_id, + const bool with_cs_replica); bool is_valid() const { return table_key_.is_valid() && data_format_version_ >= 0 && execution_id_ >= 0 && is_valid_direct_load(direct_load_type_); } ObITable::TableKey get_table_key() const { return table_key_; } uint64_t get_data_format_version() const { return data_format_version_; } int64_t get_execution_id() const { return execution_id_; } ObDirectLoadType get_direct_load_type() const { return direct_load_type_; } const ObTabletID &get_lob_meta_tablet_id() const { return lob_meta_tablet_id_; } + bool get_with_cs_replica() const { return with_cs_replica_; } TO_STRING_KV(K_(table_key), K_(data_format_version), K_(execution_id), K_(direct_load_type), K_(lob_meta_tablet_id), K_(with_cs_replica)); private: ObITable::TableKey table_key_; // use table type to distinguish column store, column group id is valid diff --git a/src/storage/ddl/ob_ddl_merge_task.cpp b/src/storage/ddl/ob_ddl_merge_task.cpp index dc31c3f4f..5ab22f679 100644 --- a/src/storage/ddl/ob_ddl_merge_task.cpp +++ b/src/storage/ddl/ob_ddl_merge_task.cpp @@ -914,6 +914,13 @@ int ObTabletDDLUtil::update_ddl_table_store( table_store_param.ddl_info_.data_format_version_ = ddl_param.data_format_version_; table_store_param.ddl_info_.ddl_commit_scn_ = ddl_param.commit_scn_; table_store_param.ddl_info_.ddl_checkpoint_scn_ = sstable->is_ddl_dump_sstable() ? sstable->get_end_scn() : ddl_param.commit_scn_; + if (ddl_param.table_key_.is_ddl_dump_sstable()) { + // data is not complete, now update ddl table store only for reducing count of ddl dump sstable. + table_store_param.ddl_info_.ddl_table_type_ = ddl_param.table_key_.table_type_; + } else { + // data is complete, make ddl table type to major sstable instead of ddl dump sstable (mark ddl finished). + table_store_param.ddl_info_.ddl_table_type_ = ddl_param.table_key_.is_co_sstable() ? ObITable::COLUMN_ORIENTED_SSTABLE : ObITable::MAJOR_SSTABLE; + } } else { // incremental direct load table_store_param.clog_checkpoint_scn_ = sstable->get_end_scn(); table_store_param.need_check_transfer_seq_ = true; diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.cpp b/src/storage/ddl/ob_ddl_redo_log_writer.cpp index 2b2ce618c..c1882c66e 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.cpp +++ b/src/storage/ddl/ob_ddl_redo_log_writer.cpp @@ -1175,7 +1175,7 @@ int ObDDLRedoLogWriter::write_start_log( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(table_key), K(execution_id), K(data_format_version), K(direct_load_type)); } else if (OB_FAIL(log.init(table_key, data_format_version, execution_id, direct_load_type, - lob_kv_mgr_handle.is_valid() ? lob_kv_mgr_handle.get_obj()->get_tablet_id() : ObTabletID()))) { + lob_kv_mgr_handle.is_valid() ? lob_kv_mgr_handle.get_obj()->get_tablet_id() : ObTabletID(), direct_load_mgr_handle.get_obj()->need_process_cs_replica()))) { LOG_WARN("fail to init DDLStartLog", K(ret), K(table_key), K(execution_id), K(data_format_version)); } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("get ls failed", K(ret), K(ls_id_)); @@ -1528,7 +1528,8 @@ ObDDLRedoLogWriter::~ObDDLRedoLogWriter() ObDDLRedoLogWriterCallback::ObDDLRedoLogWriterCallback() : is_inited_(false), redo_info_(), block_type_(ObDDLMacroBlockType::DDL_MB_INVALID_TYPE), table_key_(), macro_block_id_(), task_id_(0), data_format_version_(0), - direct_load_type_(DIRECT_LOAD_INVALID), row_id_offset_(-1) + direct_load_type_(DIRECT_LOAD_INVALID), row_id_offset_(-1), + with_cs_replica_(false), need_submit_io_(true) { } @@ -1545,7 +1546,9 @@ int ObDDLRedoLogWriterCallback::init(const share::ObLSID &ls_id, const share::SCN &start_scn, const uint64_t data_format_version, const ObDirectLoadType direct_load_type, - const int64_t row_id_offset/*=-1*/) + const int64_t row_id_offset/*=-1*/, + const bool with_cs_replica/*=false*/, + const bool need_submit_io/*=true*/) { int ret = OB_SUCCESS; ObLS *ls = nullptr; @@ -1574,6 +1577,8 @@ int ObDDLRedoLogWriterCallback::init(const share::ObLSID &ls_id, data_format_version_ = data_format_version; direct_load_type_ = direct_load_type; row_id_offset_ = row_id_offset; + with_cs_replica_ = with_cs_replica; + need_submit_io_ = need_submit_io; is_inited_ = true; } return ret; @@ -1592,6 +1597,8 @@ void ObDDLRedoLogWriterCallback::reset() data_format_version_ = 0; direct_load_type_ = DIRECT_LOAD_INVALID; row_id_offset_ = -1; + with_cs_replica_ = false; + need_submit_io_ = true; } bool ObDDLRedoLogWriterCallback::is_column_group_info_valid() const @@ -1609,9 +1616,9 @@ int ObDDLRedoLogWriterCallback::write(ObMacroBlockHandle ¯o_handle, if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedoLogWriterCallback is not inited", K(ret)); - } else if (OB_UNLIKELY(!macro_handle.is_valid() || !logic_id.is_valid() || nullptr == buf || row_count <= 0)) { + } else if (OB_UNLIKELY((!macro_handle.is_valid() && need_submit_io_) || !logic_id.is_valid() || nullptr == buf || row_count <= 0)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(macro_handle), K(logic_id), KP(buf), K(row_count)); + LOG_WARN("invalid argument", K(ret), K(macro_handle), K_(need_submit_io), K(logic_id), KP(buf), K(row_count)); } else { macro_block_id_ = macro_handle.get_macro_id(); redo_info_.table_key_ = table_key_; @@ -1621,6 +1628,7 @@ int ObDDLRedoLogWriterCallback::write(ObMacroBlockHandle ¯o_handle, redo_info_.start_scn_ = start_scn_; redo_info_.data_format_version_ = data_format_version_; redo_info_.type_ = direct_load_type_; + redo_info_.with_cs_replica_ = with_cs_replica_; redo_info_.parallel_cnt_ = 0; // TODO @zhuoran.zzr, place holder for shared storage redo_info_.cg_cnt_ = 0; diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.h b/src/storage/ddl/ob_ddl_redo_log_writer.h index ff20b411a..648650679 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.h +++ b/src/storage/ddl/ob_ddl_redo_log_writer.h @@ -375,7 +375,9 @@ public: const share::SCN &start_scn, const uint64_t data_format_version, const storage::ObDirectLoadType direct_load_type, - const int64_t row_id_offset = -1); + const int64_t row_id_offset = -1, + const bool with_cs_replica = false, + const bool need_submit_io = true); void reset(); int write( blocksstable::ObMacroBlockHandle ¯o_handle, @@ -403,6 +405,8 @@ private: // if current macro block finish with 50 rows, current macro block's end_row_offset will be 149. // end_row_offset = ddl_start_row_offset + curr_row_count - 1. int64_t row_id_offset_; + bool with_cs_replica_; + bool need_submit_io_; }; } // end namespace storage diff --git a/src/storage/ddl/ob_ddl_replay_executor.cpp b/src/storage/ddl/ob_ddl_replay_executor.cpp index 77aa57ca1..e2198fc0a 100644 --- a/src/storage/ddl/ob_ddl_replay_executor.cpp +++ b/src/storage/ddl/ob_ddl_replay_executor.cpp @@ -26,6 +26,8 @@ using namespace oceanbase::blocksstable; using namespace oceanbase::storage; using namespace oceanbase::share; +ERRSIM_POINT_DEF(EN_REPLAY_REDO_DDL_LOG_WAIT); + ObDDLReplayExecutor::ObDDLReplayExecutor() : logservice::ObTabletReplayExecutor(), ls_(nullptr), scn_() {} @@ -265,7 +267,10 @@ int ObDDLStartReplayExecutor::replay_ddl_start(ObTabletHandle &tablet_handle, co } else { table_key = log_->get_table_key(); } - if (OB_FAIL(tenant_direct_load_mgr->replay_create_tablet_direct_load(tablet_handle, log_->get_execution_id(), direct_load_param))) { + + if (!is_lob_meta_tablet && ls_->is_cs_replica() && OB_FAIL(pre_process_for_cs_replica(direct_load_param, table_key, tablet_handle, tablet_id))) { + LOG_WARN("pre process for cs replica failed", K(ret), K(direct_load_param), K(table_key), K(tablet_id)); + } else if (OB_FAIL(tenant_direct_load_mgr->replay_create_tablet_direct_load(tablet_handle, log_->get_execution_id(), direct_load_param))) { LOG_WARN("create tablet manager failed", K(ret)); } else if (OB_FAIL(tenant_direct_load_mgr->get_tablet_mgr_and_check_major( ls_->get_ls_id(), @@ -284,7 +289,8 @@ int ObDDLStartReplayExecutor::replay_ddl_start(ObTabletHandle &tablet_handle, co direct_load_param))) { LOG_WARN("update direct load mgr failed", K(ret)); } else if (OB_FAIL(direct_load_mgr_handle.get_full_obj()->start(*tablet_handle.get_obj(), - table_key, scn_, log_->get_data_format_version(), log_->get_execution_id(), SCN::min_scn()/*checkpoint_scn*/))) { + table_key, scn_, log_->get_data_format_version(), log_->get_execution_id(), SCN::min_scn()/*checkpoint_scn*/, + direct_load_param.common_param_.replay_normal_in_cs_replica_))) { LOG_WARN("direct load start failed", K(ret)); if (OB_TASK_EXPIRED != ret) { LOG_WARN("start ddl log failed", K(ret), K_(log), K_(scn)); @@ -299,6 +305,48 @@ int ObDDLStartReplayExecutor::replay_ddl_start(ObTabletHandle &tablet_handle, co return ret; } +int ObDDLStartReplayExecutor::pre_process_for_cs_replica( + ObTabletDirectLoadInsertParam &direct_load_param, + ObITable::TableKey &table_key, + ObTabletHandle &tablet_handle, + const ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + if (log_->get_with_cs_replica()) { + if (log_->get_table_key().is_row_store_major_sstable()) { + table_key.table_type_ = ObITable::COLUMN_ORIENTED_SSTABLE; // for passing defence + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid table key for replay ddl start in cs replica", K(ret), K_(log)); + } + } else { + // ddl is concurrent with adding C replica + const ObTablet *tablet = nullptr; + ObStorageSchema *schema_on_tablet = nullptr; + ObArenaAllocator tmp_arena("RplyStartTmp"); + if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet handle is invalid", K(ret), K(tablet_handle)); + } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet is null", K(ret), K(tablet_id)); + } else if (tablet->is_row_store()) { + // not be created to column store tablet in cs replica, means this tablet is not user data tablet, ignore + } else if (log_->get_table_key().is_column_store_major_sstable()) { + // column store tablet originally, ignore + } else if (OB_FAIL(tablet->load_storage_schema(tmp_arena, schema_on_tablet))) { + LOG_WARN("load storage schema failed", K(ret), KPC(tablet)); + } else if (schema_on_tablet->is_cs_replica_compat()) { + direct_load_param.common_param_.replay_normal_in_cs_replica_ = true; + LOG_TRACE("[CS-Replica] process concurrent ddl and ls migration", K(ret), K(tablet_id), KPC(tablet)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid storage schema status", K(ret), KPC(tablet), KPC(schema_on_tablet)); + } + ObTabletObjLoadHelper::free(tmp_arena, schema_on_tablet); + } + return ret; +} // ObDDLRedoReplayExecutor ObDDLRedoReplayExecutor::ObDDLRedoReplayExecutor() @@ -344,6 +392,7 @@ int ObDDLRedoReplayExecutor::do_replay_(ObTabletHandle &tablet_handle) const ObDDLMacroBlockRedoInfo &redo_info = log_->get_redo_info(); ObMacroBlockWriteInfo write_info; ObDDLMacroBlock macro_block; + bool can_skip = false; write_info.buffer_ = redo_info.data_buffer_.ptr(); write_info.size_= redo_info.data_buffer_.length(); write_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_COMPACT_WRITE); @@ -368,6 +417,9 @@ int ObDDLRedoReplayExecutor::do_replay_(ObTabletHandle &tablet_handle) ret = OB_EAGAIN; } } + } else if (OB_FAIL(filter_redo_log_(redo_info, tablet_handle, can_skip))) { + LOG_WARN("fail to filter redo log", K(ret), K(redo_info), K_(ls)); + } else if (can_skip) { } else { if (OB_FAIL(do_full_replay_(tablet_handle, write_info, macro_block))) { LOG_WARN("fail to do full replay", K(ret)); @@ -491,6 +543,43 @@ int ObDDLRedoReplayExecutor::do_full_replay_( return ret; } +int ObDDLRedoReplayExecutor::filter_redo_log_( + const ObDDLMacroBlockRedoInfo &redo_info, + const ObTabletHandle &tablet_handle, + bool &can_skip) +{ + int ret = OB_SUCCESS; + ObTablet *tablet = nullptr; + bool is_cs_replica = ls_->is_cs_replica(); + can_skip = false; + if (redo_info.is_not_compat_cs_replica()) { + // normal + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle)); + } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet is null", K(ret), K(tablet_handle)); + } else if (is_cs_replica && redo_info.is_cs_replica_row_store()) { + can_skip = true; + } else if (!is_cs_replica && redo_info.is_cs_replica_column_store()) { + can_skip = true; + } else if (is_cs_replica && redo_info.is_cs_replica_column_store() && tablet->is_row_store()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected tablet status", K(ret), K(redo_info), "ls_meta", ls_->get_ls_meta(), KPC(tablet)); + } + LOG_TRACE("[CS-Replica] Finish filter redo log", K(ret), K(redo_info), K(is_cs_replica), K(can_skip), KPC(tablet), K(ls_)); +#ifdef ERRSIM + if (OB_SUCC(ret)) { + ret = EN_REPLAY_REDO_DDL_LOG_WAIT; + if (OB_FAIL(ret)) { + LOG_INFO("EN_REPLAY_REDO_DDL_LOG_WAIT replay ddl redo failed", K(ret), K(redo_info), K(can_skip)); + } + } +#endif + return ret; +} + // ObDDLCommitReplayExecutor ObDDLCommitReplayExecutor::ObDDLCommitReplayExecutor() : ObDDLReplayExecutor(), log_(nullptr) diff --git a/src/storage/ddl/ob_ddl_replay_executor.h b/src/storage/ddl/ob_ddl_replay_executor.h index 28803f9bb..5ac50b2da 100644 --- a/src/storage/ddl/ob_ddl_replay_executor.h +++ b/src/storage/ddl/ob_ddl_replay_executor.h @@ -18,6 +18,7 @@ #include "storage/ddl/ob_ddl_clog.h" #include "storage/ddl/ob_ddl_inc_clog.h" #include "storage/ddl/ob_ddl_struct.h" +#include "storage/ddl/ob_direct_load_struct.h" #include "storage/blocksstable/ob_block_sstable_struct.h" namespace oceanbase @@ -93,7 +94,11 @@ protected: // @return other error codes, failed to replay. int do_replay_(ObTabletHandle &handle) override; int replay_ddl_start(ObTabletHandle &handle, const bool is_lob_meta_tablet); - + int pre_process_for_cs_replica( + ObTabletDirectLoadInsertParam &direct_load_param, + ObITable::TableKey &table_key, + ObTabletHandle &tablet_handle, + const ObTabletID &tablet_id); private: const ObDDLStartLog *log_; }; @@ -124,6 +129,10 @@ private: ObTabletHandle &tablet_handle, blocksstable::ObMacroBlockWriteInfo &write_info, storage::ObDDLMacroBlock ¯o_block); + int filter_redo_log_( + const ObDDLMacroBlockRedoInfo &redo_info, + const ObTabletHandle &tablet_handle, + bool &can_skip); private: const ObDDLRedoLog *log_; }; diff --git a/src/storage/ddl/ob_ddl_struct.cpp b/src/storage/ddl/ob_ddl_struct.cpp index 8282e849a..cae090d30 100644 --- a/src/storage/ddl/ob_ddl_struct.cpp +++ b/src/storage/ddl/ob_ddl_struct.cpp @@ -359,6 +359,21 @@ bool ObDDLMacroBlockRedoInfo::is_column_group_info_valid() const return table_key_.is_column_store_sstable() && end_row_id_ >= 0; } +bool ObDDLMacroBlockRedoInfo::is_not_compat_cs_replica() const +{ + return !with_cs_replica_; +} + +bool ObDDLMacroBlockRedoInfo::is_cs_replica_row_store() const +{ + return with_cs_replica_ && !table_key_.is_column_store_sstable(); +} + +bool ObDDLMacroBlockRedoInfo::is_cs_replica_column_store() const +{ + return with_cs_replica_ && table_key_.is_column_store_sstable(); +} + OB_SERIALIZE_MEMBER(ObDDLMacroBlockRedoInfo, table_key_, data_buffer_, diff --git a/src/storage/ddl/ob_ddl_struct.h b/src/storage/ddl/ob_ddl_struct.h index cae64144d..2a64bdb78 100644 --- a/src/storage/ddl/ob_ddl_struct.h +++ b/src/storage/ddl/ob_ddl_struct.h @@ -176,6 +176,18 @@ public: ~ObDDLMacroBlockRedoInfo() = default; bool is_valid() const; bool is_column_group_info_valid() const; + /* + * For tow conditions: + * 1. column store table, unnessasery to generate double redo clog. + * 2. row store table, but unnessasery to process cs replica. + * (a) cs replica not exist, may not be created or is creating. + * (b) table is not user data table. + */ + bool is_not_compat_cs_replica() const; + // If cs replica exist, this redo clog is suitable for F/R replica. + bool is_cs_replica_row_store() const; + // If cs replica exist, this redo clog is suitable for C replica. + bool is_cs_replica_column_store() const; void reset(); TO_STRING_KV(K_(table_key), K_(data_buffer), diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp index 464efab5d..bae9673fc 100644 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp @@ -31,6 +31,7 @@ #include "storage/lob/ob_lob_util.h" #include "storage/tx_storage/ob_ls_service.h" #include "storage/column_store/ob_column_oriented_sstable.h" +#include "storage/column_store/ob_column_store_replica_util.h" #include "storage/direct_load/ob_direct_load_insert_table_row_iterator.h" using namespace oceanbase; @@ -1233,7 +1234,8 @@ void ObTabletDirectLoadBuildCtx::cleanup_slice_writer(const int64_t context_id) ObTabletDirectLoadMgr::ObTabletDirectLoadMgr() : is_inited_(false), is_schema_item_ready_(false), ls_id_(), tablet_id_(), table_key_(), data_format_version_(0), - lock_(), ref_cnt_(0), direct_load_type_(ObDirectLoadType::DIRECT_LOAD_INVALID), sqc_build_ctx_(), + lock_(), ref_cnt_(0), direct_load_type_(ObDirectLoadType::DIRECT_LOAD_INVALID), + need_process_cs_replica_(false), need_fill_column_group_(false), sqc_build_ctx_(), column_items_(), lob_column_idxs_(), lob_col_types_(), schema_item_(), dir_id_(0) { column_items_.set_attr(ObMemAttr(MTL_ID(), "DL_schema")); @@ -1252,6 +1254,8 @@ ObTabletDirectLoadMgr::~ObTabletDirectLoadMgr() data_format_version_ = 0; ATOMIC_STORE(&ref_cnt_, 0); direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_INVALID; + need_process_cs_replica_ = false; + need_fill_column_group_ = false; column_items_.reset(); lob_column_idxs_.reset(); lob_col_types_.reset(); @@ -1794,21 +1798,26 @@ int ObTabletDirectLoadMgr::notify_all() return ret; } -struct SliceEndkeyCompareFunctor +struct CSSliceEndkeyCompareFunctor { public: - SliceEndkeyCompareFunctor(const ObStorageDatumUtils &datum_utils) : datum_utils_(datum_utils), ret_code_(OB_SUCCESS) {} + CSSliceEndkeyCompareFunctor(const ObStorageDatumUtils &datum_utils) : datum_utils_(datum_utils), ret_code_(OB_SUCCESS) {} bool operator ()(const ObDirectLoadSliceWriter *left, const ObDirectLoadSliceWriter *right) { bool bret = false; int ret = ret_code_; if (OB_FAIL(ret)) { - } else if (OB_ISNULL(left) || OB_ISNULL(right)) { + } else if (OB_ISNULL(left) || OB_ISNULL(right) || !left->need_column_store() + || !right->need_column_store() || left->get_writer_type() != right->get_writer_type()) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret)); + LOG_WARN("invalid argument", K(ret), KPC(left), KPC(right)); } else if (!left->is_empty() && !right->is_empty()) { - const ObChunkSliceStore *left_slice_store = static_cast(left->get_slice_store()); - const ObChunkSliceStore *right_slice_store = static_cast(right->get_slice_store()); + const ObChunkSliceStore *left_slice_store = left->is_cs_replica_write() + ? static_cast(left->get_slice_store())->get_column_slice_store() + : static_cast(left->get_slice_store()); + const ObChunkSliceStore *right_slice_store = right->is_cs_replica_write() + ? static_cast(right->get_slice_store())->get_column_slice_store() + : static_cast(right->get_slice_store()); int cmp_ret = 0; if (OB_FAIL(left_slice_store->endkey_.compare(right_slice_store->endkey_, datum_utils_, cmp_ret))) { LOG_WARN("endkey compare failed", K(ret)); @@ -1865,7 +1874,7 @@ int ObTabletDirectLoadMgr::calc_range(const int64_t thread_cnt) } } if (OB_SUCC(ret)) { - SliceEndkeyCompareFunctor cmp(tablet_handle.get_obj()->get_rowkey_read_info().get_datum_utils()); + CSSliceEndkeyCompareFunctor cmp(tablet_handle.get_obj()->get_rowkey_read_info().get_datum_utils()); lib::ob_sort(sorted_slices.begin(), sorted_slices.end(), cmp); ret = cmp.ret_code_; if (OB_FAIL(ret)) { @@ -1878,11 +1887,8 @@ int ObTabletDirectLoadMgr::calc_range(const int64_t thread_cnt) offset += sorted_slices.at(i)->get_row_count(); } } - if (OB_SUCC(ret) && is_data_direct_load(direct_load_type_)) { - bool is_column_store = false; - if (OB_FAIL(ObCODDLUtil::need_column_group_store(*sqc_build_ctx_.storage_schema_, is_column_store))) { - LOG_WARN("fail to check need column group", K(ret)); - } else if (is_column_store) { + if (OB_SUCC(ret)) { + if (is_data_direct_load(direct_load_type_) && need_fill_column_group_) { if (thread_cnt <= 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invali thread cnt", K(ret), K(thread_cnt)); @@ -2007,6 +2013,7 @@ int ObTabletDirectLoadMgr::close_sstable_slice( } else if (OB_FALSE_IT(next_seq = slice_writer->get_next_block_start_seq())) { // block start seq after the close operation is the next availabled one. } else if (!slice_info.is_lob_slice_ && is_ddl_direct_load(direct_load_type_)) { + // for cs replica, full direct load all take the same way with offline ddl of column store int64_t task_finish_count = -1; { ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DIRECT_LOAD_MGR_LOCK); @@ -2015,13 +2022,10 @@ int ObTabletDirectLoadMgr::close_sstable_slice( } } LOG_INFO("inc task finish count", K(tablet_id_), K(execution_id), K(task_finish_count), K(sqc_build_ctx_.task_total_cnt_)); - bool is_column_group_store = false; if (OB_ISNULL(sqc_build_ctx_.storage_schema_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid tablet handle", K(ret), KP(sqc_build_ctx_.storage_schema_)); - } else if (OB_FAIL(ObCODDLUtil::need_column_group_store(*sqc_build_ctx_.storage_schema_, is_column_group_store))) { - LOG_WARN("fail to check is column group store", K(ret)); - } else if (!is_column_group_store) { + } else if (!need_fill_column_group_) { if (task_finish_count >= sqc_build_ctx_.task_total_cnt_) { // for ddl, write commit log when all slices ready. if (OB_FAIL(close(execution_id, start_scn))) { @@ -2212,7 +2216,7 @@ int ObTabletDirectLoadMgr::fill_aggregated_column_group( } else if (OB_UNLIKELY(first_slice_writer->get_row_offset() < 0)) { ret = OB_ERR_SYS; LOG_WARN("invalid row offset", K(ret), K(first_slice_writer->get_row_offset())); - } else if (OB_FAIL(cur_writer->init(storage_schema, cg_idx, this, first_slice_writer->get_start_seq(), first_slice_writer->get_row_offset(), get_start_scn()))) { + } else if (OB_FAIL(cur_writer->init(storage_schema, cg_idx, this, first_slice_writer->get_start_seq(), first_slice_writer->get_row_offset(), get_start_scn(), need_process_cs_replica_))) { LOG_WARN("init co ddl writer failed", K(ret), KPC(cur_writer), K(cg_idx), KPC(this)); } else { for (int64_t i = start_idx; OB_SUCC(ret) && i < last_idx; ++i) { @@ -2359,6 +2363,27 @@ int ObTabletDirectLoadMgr::prepare_storage_schema(ObTabletHandle &tablet_handle) return ret; } +int ObTabletDirectLoadMgr::init_column_store_params( + const ObLSHandle &ls_handle, + const ObStorageSchema &storage_schema, + const ObTabletID &new_tablet_id, + const ObDirectLoadType new_direct_load_type) +{ + int ret = OB_SUCCESS; + const ObLS *ls = nullptr; + bool need_process = false; + if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get nullptr ls", K(ret), K(ls_handle)); + } else if (OB_FAIL(ObCSReplicaUtil::check_replica_set_need_process_cs_replica(*ls, new_tablet_id, storage_schema, need_process))) { + LOG_WARN("failed to check ls replica set", K(ret), K(new_tablet_id), KPC(sqc_build_ctx_.storage_schema_)); + } else { + need_process_cs_replica_ = need_process && is_ddl_direct_load(new_direct_load_type); + need_fill_column_group_ = !storage_schema.is_row_store() || need_process_cs_replica_; + } + return ret; +} + ObTabletFullDirectLoadMgr::ObTabletFullDirectLoadMgr() : ObTabletDirectLoadMgr(), start_scn_(share::SCN::min_scn()), commit_scn_(share::SCN::min_scn()), execution_id_(-1) @@ -2389,7 +2414,7 @@ int ObTabletFullDirectLoadMgr::update( LOG_WARN("null storage schema", K(ret)); } else if (OB_FAIL(ObCODDLUtil::need_column_group_store(*sqc_build_ctx_.storage_schema_, is_column_group_store))) { LOG_WARN("fail to get schema is column group store", K(ret)); - } else if (is_column_group_store) { + } else if (is_column_group_store && !build_param.common_param_.replay_normal_in_cs_replica_) { table_key_.table_type_ = ObITable::COLUMN_ORIENTED_SSTABLE; int64_t base_cg_idx = -1; if (OB_FAIL(ObCODDLUtil::get_base_cg_idx(sqc_build_ctx_.storage_schema_, base_cg_idx))) { @@ -2452,6 +2477,11 @@ int ObTabletFullDirectLoadMgr::open(const int64_t current_execution_id, share::S ret = OB_ERR_UNEXPECTED; LOG_WARN("start scn must be valid after commit", K(ret), K(start_scn)); } + } else if (OB_ISNULL(sqc_build_ctx_.storage_schema_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null storage schema", K(ret), K(sqc_build_ctx_)); + } else if (OB_FAIL(init_column_store_params(ls_handle, *sqc_build_ctx_.storage_schema_, tablet_id_, direct_load_type_))) { + LOG_WARN("failed to refresh cs replica status", K(ret), K(ls_handle), K_(tablet_id)); } else { ObDDLKvMgrHandle ddl_kv_mgr_handle; ObDDLKvMgrHandle lob_kv_mgr_handle; @@ -2660,7 +2690,8 @@ int ObTabletFullDirectLoadMgr::start( const share::SCN &start_scn, const uint64_t data_format_version, const int64_t execution_id, - const share::SCN &checkpoint_scn) + const share::SCN &checkpoint_scn, + const bool replay_normal_in_cs_replica /*=false*/) { int ret = OB_SUCCESS; share::SCN saved_start_scn; @@ -2729,7 +2760,7 @@ int ObTabletFullDirectLoadMgr::start( if (lob_mgr_handle_.is_valid() && OB_FAIL(lob_mgr_handle_.get_full_obj()->init_ddl_table_store(saved_start_scn, saved_snapshot_version, saved_start_scn))) { LOG_WARN("clean up ddl sstable failed", K(ret)); - } else if (OB_FAIL(init_ddl_table_store(saved_start_scn, saved_snapshot_version, saved_start_scn))) { + } else if (OB_FAIL(init_ddl_table_store(saved_start_scn, saved_snapshot_version, saved_start_scn, replay_normal_in_cs_replica))) { LOG_WARN("clean up ddl sstable failed", K(ret), K(tablet_id_)); } } @@ -3157,7 +3188,8 @@ int ObTabletFullDirectLoadMgr::cleanup_unlock() int ObTabletFullDirectLoadMgr::init_ddl_table_store( const share::SCN &start_scn, const int64_t snapshot_version, - const share::SCN &ddl_checkpoint_scn) + const share::SCN &ddl_checkpoint_scn, + const bool replay_normal_in_cs_replica /*=false*/) { int ret = OB_SUCCESS; ObLSHandle ls_handle; @@ -3201,7 +3233,7 @@ int ObTabletFullDirectLoadMgr::init_ddl_table_store( ddl_param.commit_scn_ = commit_scn_; ddl_param.snapshot_version_ = table_key_.get_snapshot_version(); ddl_param.data_format_version_ = data_format_version_; - ddl_param.table_key_.table_type_ = is_column_group_store ? ObITable::DDL_MERGE_CO_SSTABLE : ObITable::DDL_DUMP_SSTABLE; + ddl_param.table_key_.table_type_ = (is_column_group_store && !replay_normal_in_cs_replica) ? ObITable::DDL_MERGE_CO_SSTABLE : ObITable::DDL_DUMP_SSTABLE; ddl_param.table_key_.scn_range_.start_scn_ = SCN::scn_dec(start_scn); ddl_param.table_key_.scn_range_.end_scn_ = start_scn; @@ -3215,6 +3247,7 @@ int ObTabletFullDirectLoadMgr::init_ddl_table_store( param.ddl_info_.ddl_checkpoint_scn_ = ddl_checkpoint_scn; param.ddl_info_.ddl_execution_id_ = execution_id_; param.ddl_info_.data_format_version_ = data_format_version_; + param.ddl_info_.ddl_table_type_ = ddl_param.table_key_.table_type_; if (OB_FAIL(ObTabletDDLUtil::create_ddl_sstable(*tablet_handle.get_obj(), ddl_param, empty_meta_array, nullptr/*first_ddl_sstable*/, storage_schema, tmp_arena, sstable_handle))) { LOG_WARN("create empty ddl sstable failed", K(ret)); @@ -3258,6 +3291,9 @@ int ObTabletFullDirectLoadMgr::init_ddl_table_store( } else { LOG_INFO("update tablet success", K(ls_id_), K(tablet_id_), "is_column_store", is_column_group_store, K(ddl_param), + "need_process_cs_replica", need_process_cs_replica_, + "need_fill_column_group", need_fill_column_group_, + "replay_normal_in_cs_replica", replay_normal_in_cs_replica, "column_group_schemas", storage_schema->get_column_groups(), "update_table_store_param", param, K(start_scn), K(snapshot_version), K(ddl_checkpoint_scn)); } diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h index 63f97321f..0b2826150 100644 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h @@ -388,6 +388,7 @@ public: inline const ObITable::TableKey &get_table_key() const { return table_key_; } inline uint64_t get_data_format_version() const { return data_format_version_; } inline ObDirectLoadType get_direct_load_type() const { return direct_load_type_; } + inline bool need_process_cs_replica() const { return need_process_cs_replica_; } inline ObTabletDirectLoadBuildCtx &get_sqc_build_ctx() { return sqc_build_ctx_; } inline const share::ObLSID &get_ls_id() const { return ls_id_; } inline const ObTabletID &get_tablet_id() const { return tablet_id_; } @@ -408,9 +409,23 @@ public: int calc_cg_range(ObArray &sorted_slices, const int64_t thread_cnt); const ObIArray &get_column_info() const { return column_items_; }; int prepare_storage_schema(ObTabletHandle &tablet_handle); + // init column store related parameters when open in leader + int init_column_store_params( + const ObLSHandle &ls_handle, + const ObStorageSchema &storage_schema, + const ObTabletID &new_tablet_id, + const ObDirectLoadType new_direct_load_type); + /* + * For full data direct load, row store table and column store table take diffrent way. + * 1. row store table: take the same way with offline ddl; + * 2. column store table: take PX to accelerate. + * 3. for table with cs replica, take the same way with offline ddl, and writing additional column store data. + * so if is data direct load type but need process cs replica, it should skip the originally column store load code. + */ + bool is_originally_column_store_data_direct_load() const { return is_data_direct_load(direct_load_type_) && !need_process_cs_replica_; } VIRTUAL_TO_STRING_KV(K_(is_inited), K_(is_schema_item_ready), K_(ls_id), K_(tablet_id), K_(table_key), K_(data_format_version), K_(ref_cnt), - K_(direct_load_type), K_(sqc_build_ctx), KPC(lob_mgr_handle_.get_obj()), K_(schema_item), K_(column_items), K_(lob_column_idxs)); + K_(direct_load_type), K_(need_process_cs_replica), K_(need_fill_column_group), K_(sqc_build_ctx), KPC(lob_mgr_handle_.get_obj()), K_(schema_item), K_(column_items), K_(lob_column_idxs)); private: int prepare_schema_item_on_demand(const uint64_t table_id, @@ -441,6 +456,10 @@ protected: common::ObLatch lock_; int64_t ref_cnt_; ObDirectLoadType direct_load_type_; + // only row store user tablet need process cs replica in leader, column store tablet do not need + bool need_process_cs_replica_; + // column store table, or need process cs replica + bool need_fill_column_group_; // sqc_build_ctx_ is just used for the observer node who receives the requests from the SQL Layer // to write the start log and the data redo log. And other observer nodes can not use it. ObTabletDirectLoadBuildCtx sqc_build_ctx_; @@ -480,7 +499,8 @@ public: const share::SCN &start_scn, const uint64_t data_format_version, const int64_t execution_id, - const share::SCN &checkpoint_scn); + const share::SCN &checkpoint_scn, + const bool replay_normal_in_cs_replica = false); int start_with_checkpoint( ObTablet &tablet, const share::SCN &start_scn, @@ -520,7 +540,7 @@ private: bool is_started() { return start_scn_.is_valid_and_not_min(); } int schedule_merge_task(const share::SCN &start_scn, const share::SCN &commit_scn, const bool wait_major_generated, const bool is_replay); // try wait build major sstable int cleanup_unlock(); - int init_ddl_table_store(const share::SCN &start_scn, const int64_t snapshot_version, const share::SCN &ddl_checkpoint_scn); + int init_ddl_table_store(const share::SCN &start_scn, const int64_t snapshot_version, const share::SCN &ddl_checkpoint_scn, const bool replay_normal_in_cs_replica = false); int update_major_sstable(); private: diff --git a/src/storage/ddl/ob_direct_load_struct.cpp b/src/storage/ddl/ob_direct_load_struct.cpp index 5dedd1e88..e54363c87 100644 --- a/src/storage/ddl/ob_direct_load_struct.cpp +++ b/src/storage/ddl/ob_direct_load_struct.cpp @@ -24,6 +24,7 @@ #include "storage/ddl/ob_direct_insert_sstable_ctx_new.h" #include "storage/lob/ob_lob_util.h" #include "storage/tablet/ob_tablet.h" +#include "storage/ob_storage_schema_util.h" #include "sql/engine/expr/ob_expr_lob_utils.h" #include "sql/das/ob_das_utils.h" #include "sql/engine/basic/chunk_store/ob_compact_store.h" @@ -638,7 +639,8 @@ int ObChunkSliceStore::close() int ObMacroBlockSliceStore::init( ObTabletDirectLoadMgr *tablet_direct_load_mgr, const blocksstable::ObMacroDataSeq &data_seq, - const SCN &start_scn) + const SCN &start_scn, + const bool need_process_cs_replica /*= false*/) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { @@ -669,7 +671,7 @@ int ObMacroBlockSliceStore::init( ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to alloc memory", K(ret)); } else if (OB_FAIL(static_cast(ddl_redo_callback_)->init( - ls_id, table_key.tablet_id_, DDL_MB_DATA_TYPE, table_key, ddl_task_id, start_scn, data_format_version, direct_load_type, -1/*row_id_offset*/))) { + ls_id, table_key.tablet_id_, DDL_MB_DATA_TYPE, table_key, ddl_task_id, start_scn, data_format_version, direct_load_type, -1/*row_id_offset*/, need_process_cs_replica))) { LOG_WARN("fail to init full ddl_redo_callback_", K(ret)); } } @@ -677,6 +679,7 @@ int ObMacroBlockSliceStore::init( if (OB_FAIL(macro_block_writer_.open(data_desc.get_desc(), data_seq, ddl_redo_callback_))) { LOG_WARN("open macro bock writer failed", K(ret)); } else { + need_process_cs_replica_ = need_process_cs_replica; is_inited_ = true; } } @@ -708,6 +711,147 @@ int ObMacroBlockSliceStore::close() return ret; } +ObMultiSliceStore::ObMultiSliceStore() +: is_inited_(false), + arena_allocator_(nullptr), + cs_replica_schema_(nullptr), + row_slice_store_(nullptr), + column_slice_store_(nullptr) +{} + +ObMultiSliceStore::~ObMultiSliceStore() +{ + reset(); +} + +int ObMultiSliceStore::init( + ObArenaAllocator &allocator, + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &data_seq, + const share::SCN &start_scn, + const int64_t rowkey_column_count, + const ObStorageSchema *storage_schema, + const ObIArray &col_schema, + const int64_t dir_id, + const int64_t parallelism) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("multi slice store init twice", K(ret), K_(is_inited)); + } else if (OB_UNLIKELY(nullptr == tablet_direct_load_mgr + || !data_seq.is_valid() + || rowkey_column_count <= 0 + || nullptr == storage_schema + || !storage_schema->is_row_store() + || !storage_schema->is_user_data_table())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(tablet_direct_load_mgr), K(data_seq), K(rowkey_column_count), KPC(storage_schema)); + } else if (OB_FAIL(ObStorageSchemaUtil::alloc_storage_schema(allocator, cs_replica_schema_))) { + LOG_WARN("fail to alloc cs_replica_schema", K(ret)); + } else if (OB_FAIL(cs_replica_schema_->init(allocator, *storage_schema, false /*skip_column_info*/, nullptr /*column_group_schema*/, true /*generate_default_cg_array*/))) { + LOG_WARN("fail to init cs_replica_schema for multi slice store", K(ret), KPC(storage_schema)); + } else if (OB_ISNULL(row_slice_store_ = OB_NEWx(ObMacroBlockSliceStore, &allocator))){ + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for row slice store", K(ret)); + } else if (OB_FAIL(row_slice_store_->init(tablet_direct_load_mgr, data_seq, start_scn, true /*need_process_cs_replica*/))) { + LOG_WARN("fail to init row slice store", K(ret), KPC(tablet_direct_load_mgr), K(data_seq), K(start_scn)); + } else if (OB_ISNULL(column_slice_store_ = OB_NEWx(ObChunkSliceStore, &allocator))){ + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory for column slice store", K(ret)); + } else if (OB_FAIL(column_slice_store_->init(rowkey_column_count, cs_replica_schema_, allocator, col_schema, dir_id, parallelism))) { + LOG_WARN("fail to init column slice store", K(ret), K(dir_id), K(parallelism), KPC(storage_schema), K(col_schema), K(rowkey_column_count)); + } else { + is_inited_ = true; + arena_allocator_ = &allocator; + LOG_DEBUG("[CS-Replica] Successfully init multi slice store", K(ret), KPC(this)); + } + + if (OB_FAIL(ret)) { + (void) free_memory(allocator); + } + return ret; +} + +int ObMultiSliceStore::append_row(const blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("multi slice store not init", K(ret)); + } else if (OB_UNLIKELY(nullptr == row_slice_store_ || nullptr == column_slice_store_)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("unexpected slice store", K(ret), KPC_(row_slice_store), KPC_(column_slice_store)); + } else if (OB_FAIL(row_slice_store_->append_row(datum_row))) { + LOG_WARN("fail to append row to row slice store", K(ret)); + } else if (OB_FAIL(column_slice_store_->append_row(datum_row))) { + LOG_WARN("fail to append row to column slice store", K(ret)); + } + return ret; +} + +int ObMultiSliceStore::close() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("multi slice store not init", K(ret)); + } else if (OB_UNLIKELY(nullptr == row_slice_store_ || nullptr == column_slice_store_)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("unexpected slice store", K(ret), KPC_(row_slice_store), KPC_(column_slice_store)); + } else if (OB_FAIL(row_slice_store_->close())) { + LOG_WARN("fail to close row slice store", K(ret)); + } else if (OB_FAIL(column_slice_store_->close())) { + LOG_WARN("fail to close column slice store", K(ret)); + } else { + LOG_DEBUG("[CS-Replica] Finish close multi slice store", K(ret), KPC(this)); + } + return ret; +} + +int64_t ObMultiSliceStore::get_row_count() const +{ + return column_slice_store_->get_row_count(); +} + +int64_t ObMultiSliceStore::get_next_block_start_seq() const +{ + return row_slice_store_->get_next_block_start_seq(); +} + +void ObMultiSliceStore::reset() +{ + if (OB_NOT_NULL(arena_allocator_)) { + (void) free_memory(*arena_allocator_); + } else if (nullptr == column_slice_store_ || nullptr == row_slice_store_ || nullptr == cs_replica_schema_) { + LOG_ERROR_RET(OB_ERR_UNEXPECTED, "unexpected status", KPC_(column_slice_store), KPC_(row_slice_store), KPC_(cs_replica_schema)); + } + column_slice_store_ = nullptr; + row_slice_store_ = nullptr; + cs_replica_schema_ = nullptr; + arena_allocator_ = nullptr; + is_inited_ = false; +} + +void ObMultiSliceStore::free_memory(ObArenaAllocator &allocator) +{ + if (OB_NOT_NULL(column_slice_store_)) { + column_slice_store_->~ObChunkSliceStore(); + allocator.free(column_slice_store_); + column_slice_store_ = nullptr; + } + if (OB_NOT_NULL(row_slice_store_)) { + row_slice_store_->~ObMacroBlockSliceStore(); + allocator.free(row_slice_store_); + row_slice_store_ = nullptr; + } + if (OB_NOT_NULL(cs_replica_schema_)) { + cs_replica_schema_->~ObStorageSchema(); + allocator.free(cs_replica_schema_); + cs_replica_schema_ = nullptr; + } +} + bool ObTabletDDLParam::is_valid() const { return is_valid_direct_load(direct_load_type_) @@ -721,7 +865,7 @@ bool ObTabletDDLParam::is_valid() const } ObDirectLoadSliceWriter::ObDirectLoadSliceWriter() - : is_inited_(false), need_column_store_(false), is_canceled_(false), start_seq_(), tablet_direct_load_mgr_(nullptr), + : is_inited_(false), writer_type_(ObDirectLoadSliceWriterType::WRITER_TYPE_MAX), is_canceled_(false), start_seq_(), tablet_direct_load_mgr_(nullptr), slice_store_(nullptr), meta_write_iter_(nullptr), row_iterator_(nullptr), allocator_(lib::ObLabel("SliceWriter"), OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), row_offset_(-1) { @@ -746,7 +890,7 @@ ObDirectLoadSliceWriter::~ObDirectLoadSliceWriter() } allocator_.reset(); row_offset_ = -1; - need_column_store_ = false; + writer_type_ = ObDirectLoadSliceWriterType::WRITER_TYPE_MAX; } //for test @@ -788,8 +932,28 @@ int ObDirectLoadSliceWriter::prepare_slice_store_if_need( LOG_WARN("not init", K(ret)); } else if (nullptr != slice_store_) { // do nothing + } else if (tablet_direct_load_mgr_->need_process_cs_replica()) { + writer_type_ = ObDirectLoadSliceWriterType::COL_REPLICA_WRITER; + ObMultiSliceStore *multi_slice_store = nullptr; + if (OB_ISNULL(storage_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("null schema", K(ret), K(*this)); + } else if (OB_ISNULL(multi_slice_store = OB_NEWx(ObMultiSliceStore, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for multi slice store failed", K(ret)); + } else if (OB_FAIL(multi_slice_store->init(allocator_, tablet_direct_load_mgr_, start_seq_, start_scn, + schema_rowkey_column_num + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(), + storage_schema, tablet_direct_load_mgr_->get_column_info(), dir_id, parallelism))) { + LOG_WARN("init multi slice store failed", K(ret), KPC_(tablet_direct_load_mgr), KPC(storage_schema)); + } else { + slice_store_ = multi_slice_store; + } + if (OB_FAIL(ret) && nullptr != multi_slice_store) { + multi_slice_store->~ObMultiSliceStore(); + allocator_.free(multi_slice_store); + } } else if (is_full_direct_load(tablet_direct_load_mgr_->get_direct_load_type()) && is_column_store) { - need_column_store_ = true; + writer_type_ = ObDirectLoadSliceWriterType::COL_STORE_WRITER; ObChunkSliceStore *chunk_slice_store = nullptr; if (OB_ISNULL(storage_schema)) { ret = OB_INVALID_ARGUMENT; @@ -808,6 +972,7 @@ int ObDirectLoadSliceWriter::prepare_slice_store_if_need( allocator_.free(chunk_slice_store); } } else { + writer_type_ = ObDirectLoadSliceWriterType::ROW_STORE_WRITER; ObMacroBlockSliceStore *macro_block_slice_store = nullptr; if (OB_ISNULL(macro_block_slice_store = OB_NEWx(ObMacroBlockSliceStore, &allocator_))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -1307,7 +1472,9 @@ int ObDirectLoadSliceWriter::close() int ObDirectLoadSliceWriter::fill_column_group(const ObStorageSchema *storage_schema, const SCN &start_scn, ObInsertMonitor* insert_monitor) { int ret = OB_SUCCESS; - ObChunkSliceStore *chunk_slice_store = static_cast(slice_store_); + const bool need_process_cs_replica = tablet_direct_load_mgr_->need_process_cs_replica(); + const ObChunkSliceStore *chunk_slice_store = nullptr; + ObStorageSchema *cs_replica_storage_schema = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); @@ -1317,14 +1484,42 @@ int ObDirectLoadSliceWriter::fill_column_group(const ObStorageSchema *storage_sc } else if (OB_UNLIKELY(row_offset_ < 0)) { ret = OB_ERR_SYS; LOG_WARN("row offset not set", K(ret), K(row_offset_)); - } else if (nullptr == chunk_slice_store || is_empty()) { + } else if (OB_ISNULL(slice_store_) || is_empty() + || OB_ISNULL(chunk_slice_store = need_process_cs_replica + ? static_cast(slice_store_)->get_column_slice_store() + : static_cast(slice_store_))) { // do nothing - LOG_INFO("chunk slice store is null or empty", K(ret), + LOG_INFO("slice_store_ is null or empty", K(ret), KPC_(slice_store), KPC(chunk_slice_store), KPC(tablet_direct_load_mgr_)); } else if (ATOMIC_LOAD(&is_canceled_)) { ret = OB_CANCELED; LOG_WARN("fil cg task canceled", K(ret), K(is_canceled_)); - } else { + } else if (need_process_cs_replica && OB_FAIL(ObStorageSchemaUtil::alloc_storage_schema(allocator_, cs_replica_storage_schema))) { + LOG_WARN("failed to alloc storage schema", K(ret)); + } else if (need_process_cs_replica && OB_FAIL(cs_replica_storage_schema->init(allocator_, *storage_schema, + false /*skip_column_info*/, nullptr /*column_group_schema*/, true /*generate_cs_replica_cg_array*/))) { + LOG_WARN("failed to init storage schema for cs replica", K(ret), KPC(storage_schema)); + } else if (OB_FAIL(inner_fill_column_group(chunk_slice_store, need_process_cs_replica ? cs_replica_storage_schema : storage_schema, start_scn, insert_monitor))) { + LOG_WARN("failed to fill column group", K(ret)); + } + + if (OB_NOT_NULL(cs_replica_storage_schema)) { + ObStorageSchemaUtil::free_storage_schema(allocator_, cs_replica_storage_schema); + cs_replica_storage_schema = nullptr; + } + + return ret; +} + + +int ObDirectLoadSliceWriter::inner_fill_column_group( + const ObChunkSliceStore *chunk_slice_store, + const ObStorageSchema *storage_schema, + const SCN &start_scn, + ObInsertMonitor* insert_monitor) +{ + int ret = OB_SUCCESS; + { // remain this {} pair to make git diff more readable const ObIArray &cg_schemas = storage_schema->get_column_groups(); FLOG_INFO("[DDL_FILL_CG] fill column group start", "tablet_id", tablet_direct_load_mgr_->get_tablet_id(), @@ -1340,7 +1535,7 @@ int ObDirectLoadSliceWriter::fill_column_group(const ObStorageSchema *storage_sc // 2. rescan and write for (int64_t cg_idx = 0; OB_SUCC(ret) && cg_idx < cg_schemas.count(); ++cg_idx) { cur_writer->reset(); - if (OB_FAIL(cur_writer->init(storage_schema, cg_idx, tablet_direct_load_mgr_, start_seq_, row_offset_, start_scn))) { + if (OB_FAIL(cur_writer->init(storage_schema, cg_idx, tablet_direct_load_mgr_, start_seq_, row_offset_, start_scn, tablet_direct_load_mgr_->need_process_cs_replica()))) { LOG_WARN("init co ddl writer failed", K(ret), KPC(cur_writer), K(cg_idx), KPC(this)); } else { sql::ObCompactStore *cur_datum_store = chunk_slice_store->datum_stores_.at(cg_idx); @@ -1401,7 +1596,7 @@ void ObCOSliceWriter::reset() int ObCOSliceWriter::init(const ObStorageSchema *storage_schema, const int64_t cg_idx, ObTabletDirectLoadMgr *tablet_direct_load_mgr, const ObMacroDataSeq &start_seq, const int64_t row_id_offset, - const SCN &start_scn) + const SCN &start_scn, const bool with_cs_replica) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { @@ -1420,6 +1615,7 @@ int ObCOSliceWriter::init(const ObStorageSchema *storage_schema, const int64_t c const int64_t ddl_task_id = tablet_direct_load_mgr->get_ddl_task_id(); const uint64_t data_format_version = tablet_direct_load_mgr->get_data_format_version(); ObLSID ls_id = tablet_direct_load_mgr->get_ls_id(); + const bool need_submit_io = !with_cs_replica; // if need to process cs replica, only write clog, not submit macro block to disk if (OB_FAIL(data_desc_.init(true/*is ddl*/, *storage_schema, ls_id, @@ -1429,13 +1625,14 @@ int ObCOSliceWriter::init(const ObStorageSchema *storage_schema, const int64_t c data_format_version, SCN::min_scn(), &cg_schema, - cg_idx))) { + cg_idx, + need_submit_io))) { LOG_WARN("init data store desc failed", K(ret)); } else if (OB_FAIL(index_builder_.init(data_desc_.get_desc(), nullptr/*macro block flush callback*/, ObSSTableIndexBuilder::ENABLE))) { // data_desc is deep copied LOG_WARN("init sstable index builder failed", K(ret), K(ls_id), K(table_key), K(data_desc_)); } else if (FALSE_IT(data_desc_.get_desc().sstable_index_builder_ = &index_builder_)) { // for build the tail index block in macro block } else if (OB_FAIL(flush_callback_.init(ls_id, table_key.tablet_id_, DDL_MB_DATA_TYPE, table_key, ddl_task_id, - start_scn, data_format_version, tablet_direct_load_mgr->get_direct_load_type(), row_id_offset))) { + start_scn, data_format_version, tablet_direct_load_mgr->get_direct_load_type(), row_id_offset, with_cs_replica, need_submit_io))) { LOG_WARN("fail to init redo log writer callback", KR(ret)); } else if (OB_FAIL(macro_block_writer_.open(data_desc_.get_desc(), start_seq, &flush_callback_))) { LOG_WARN("fail to open macro block writer", K(ret), K(ls_id), K(table_key), K(data_desc_), K(start_seq)); diff --git a/src/storage/ddl/ob_direct_load_struct.h b/src/storage/ddl/ob_direct_load_struct.h index 85f80a0be..f00e1f6ae 100644 --- a/src/storage/ddl/ob_direct_load_struct.h +++ b/src/storage/ddl/ob_direct_load_struct.h @@ -200,13 +200,13 @@ struct ObDirectInsertCommonParam final { public: ObDirectInsertCommonParam() - : ls_id_(), tablet_id_(), direct_load_type_(DIRECT_LOAD_INVALID), data_format_version_(0), read_snapshot_(0) + : ls_id_(), tablet_id_(), direct_load_type_(DIRECT_LOAD_INVALID), data_format_version_(0), read_snapshot_(0), replay_normal_in_cs_replica_(false) {} ~ObDirectInsertCommonParam() = default; bool is_valid() const { return ls_id_.is_valid() && tablet_id_.is_valid() && data_format_version_ >= 0 && read_snapshot_ >= 0 && DIRECT_LOAD_INVALID <= direct_load_type_ && direct_load_type_ <= DIRECT_LOAD_MAX; } - TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(direct_load_type), K_(data_format_version), K_(read_snapshot)); + TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(direct_load_type), K_(data_format_version), K_(read_snapshot), K_(replay_normal_in_cs_replica)); public: share::ObLSID ls_id_; common::ObTabletID tablet_id_; @@ -215,6 +215,7 @@ public: // read_snapshot_ is used to scan the source data. // For full direct load task, it is also the commit version of the target macro block. int64_t read_snapshot_; + bool replay_normal_in_cs_replica_; // when ddl and add cs replica are concurrent, leader may write normal clog }; // only used in runtime execution @@ -453,7 +454,7 @@ class ObMacroBlockSliceStore: public ObTabletSliceStore { public: ObMacroBlockSliceStore() - : is_inited_(false), ddl_redo_callback_(nullptr) {} + : is_inited_(false), need_process_cs_replica_(false), ddl_redo_callback_(nullptr) {} virtual ~ObMacroBlockSliceStore() { if (ddl_redo_callback_ != nullptr) { common::ob_delete(ddl_redo_callback_); @@ -462,17 +463,51 @@ public: int init( ObTabletDirectLoadMgr *tablet_direct_load_mgr, const blocksstable::ObMacroDataSeq &data_seq, - const share::SCN &start_scn); + const share::SCN &start_scn, + const bool need_process_cs_replica = false); virtual int append_row(const blocksstable::ObDatumRow &datum_row) override; virtual int close() override; virtual int64_t get_next_block_start_seq() const override { return macro_block_writer_.get_last_macro_seq(); } - TO_STRING_KV(K(is_inited_), K(macro_block_writer_)); + TO_STRING_KV(K(is_inited_), K_(need_process_cs_replica), K(macro_block_writer_)); private: bool is_inited_; + bool need_process_cs_replica_; blocksstable::ObIMacroBlockFlushCallback *ddl_redo_callback_; blocksstable::ObMacroBlockWriter macro_block_writer_; }; +class ObMultiSliceStore : public ObTabletSliceStore +{ +public: + ObMultiSliceStore(); + virtual ~ObMultiSliceStore(); + int init( + ObArenaAllocator &allocator, + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const blocksstable::ObMacroDataSeq &data_seq, + const share::SCN &start_scn, + const int64_t rowkey_column_count, + const ObStorageSchema *storage_schema, + const ObIArray &col_schema, + const int64_t dir_id, + const int64_t parallelism); + virtual int append_row(const blocksstable::ObDatumRow &datum_row) override; + virtual int close() override; + virtual int64_t get_row_count() const override; + virtual int64_t get_next_block_start_seq() const override; + void reset(); + const ObChunkSliceStore *get_column_slice_store() const { return column_slice_store_; } + TO_STRING_KV(K_(is_inited), KPC_(cs_replica_schema), KPC_(row_slice_store), KPC_(column_slice_store)); +private: + void free_memory(ObArenaAllocator &allocator); +private: + bool is_inited_; + ObArenaAllocator *arena_allocator_; + ObStorageSchema *cs_replica_schema_; + ObMacroBlockSliceStore *row_slice_store_; + ObChunkSliceStore *column_slice_store_; +}; + class ObTabletDirectLoadMgr; struct ObInsertMonitor final{ @@ -490,6 +525,14 @@ public: class ObCOSliceWriter; class ObDirectLoadSliceWriter final { +public: + enum class ObDirectLoadSliceWriterType: uint8_t + { + ROW_STORE_WRITER = 0, + COL_STORE_WRITER = 1, + COL_REPLICA_WRITER = 2, + WRITER_TYPE_MAX = 3 + }; public: ObDirectLoadSliceWriter(); ~ObDirectLoadSliceWriter(); @@ -541,11 +584,15 @@ public: int64_t get_row_offset() const { return row_offset_; } blocksstable::ObMacroDataSeq &get_start_seq() { return start_seq_; } bool is_empty() const { return 0 == get_row_count(); } - bool need_column_store() const { return need_column_store_; } + bool is_row_store_writer() const { return ObDirectLoadSliceWriterType::ROW_STORE_WRITER == writer_type_; } + bool is_col_store_writer() const { return ObDirectLoadSliceWriterType::COL_STORE_WRITER == writer_type_; } + bool is_cs_replica_write() const { return ObDirectLoadSliceWriterType::COL_REPLICA_WRITER == writer_type_; } + bool need_column_store() const { return is_col_store_writer() || is_cs_replica_write(); } ObTabletSliceStore *get_slice_store() const { return slice_store_; } + ObDirectLoadSliceWriterType get_writer_type() const { return writer_type_; } void cancel() { ATOMIC_SET(&is_canceled_, true); } int64_t get_next_block_start_seq() const { return nullptr == slice_store_ ? start_seq_.get_data_seq() /*slice empty*/ : slice_store_->get_next_block_start_seq(); } - TO_STRING_KV(K(is_inited_), K(need_column_store_), K(is_canceled_), K(start_seq_), KPC(slice_store_), K(row_offset_)); + TO_STRING_KV(K(is_inited_), K(writer_type_), K(is_canceled_), K(start_seq_), KPC(slice_store_), K(row_offset_)); private: int fill_lob_into_memtable( // for version < 4.3.0.0 ObIAllocator &allocator, @@ -601,9 +648,14 @@ private: share::ObTabletCacheInterval &pk_interval, ObLobMetaRowIterator *&row_iter); int mock_chunk_store(const int64_t row_cnt); + int inner_fill_column_group( + const ObChunkSliceStore *chunk_slice_store, + const ObStorageSchema *storage_schema, + const share::SCN &start_scn, + ObInsertMonitor *monitor_node = NULL); private: bool is_inited_; - bool need_column_store_; + ObDirectLoadSliceWriterType writer_type_; bool is_canceled_; blocksstable::ObMacroDataSeq start_seq_; ObTabletDirectLoadMgr *tablet_direct_load_mgr_; @@ -625,7 +677,8 @@ public: ObTabletDirectLoadMgr *tablet_direct_load_mgr, const blocksstable::ObMacroDataSeq &start_seq, const int64_t row_id_offset, - const share::SCN &start_scn); + const share::SCN &start_scn, + const bool with_cs_replica); void reset(); int append_row( const sql::ObChunkDatumStore::StoredRow *stored_row); diff --git a/src/storage/high_availability/ob_cs_replica_migration.cpp b/src/storage/high_availability/ob_cs_replica_migration.cpp new file mode 100644 index 000000000..1162df801 --- /dev/null +++ b/src/storage/high_availability/ob_cs_replica_migration.cpp @@ -0,0 +1,693 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX STORAGE +#include "storage/high_availability/ob_cs_replica_migration.h" +#include "storage/column_store/ob_column_store_replica_util.h" +#include "storage/compaction/ob_tenant_tablet_scheduler.h" +#include "share/scheduler/ob_dag_warning_history_mgr.h" +#include "share/ob_debug_sync_point.h" + +namespace oceanbase +{ +namespace storage +{ +ERRSIM_POINT_DEF(EN_ALL_STATE_DETERMINISTIC_FALSE); + +/*----------------------------- ObTabletCOConvertCtx -----------------------------*/ +ObTabletCOConvertCtx::ObTabletCOConvertCtx() + : tablet_id_(), + co_dag_net_id_(), + status_(Status::MAX_STATUS), + retry_cnt_(0), + is_inited_(false) +{ +} + +ObTabletCOConvertCtx::~ObTabletCOConvertCtx() +{ + reset(); +} + +int ObTabletCOConvertCtx::init( + const ObTabletID &tablet_id, + const share::ObDagId &co_dag_net_id) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(!tablet_id.is_valid() || !co_dag_net_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("tablet_id ro co dag net id is invalid", K(ret), K(tablet_id), K(co_dag_net_id)); + } else { + tablet_id_ = tablet_id; + co_dag_net_id_ = co_dag_net_id; + status_ = Status::UNKNOWN; + is_inited_ = true; + } + return ret; +} + +void ObTabletCOConvertCtx::reset() +{ + tablet_id_.reset(); + co_dag_net_id_.reset(); + status_ = Status::MAX_STATUS; + retry_cnt_ = 0; + is_inited_ = false; +} + +bool ObTabletCOConvertCtx::is_valid() const +{ + return tablet_id_.is_valid() + && co_dag_net_id_.is_valid() + && status_ >= Status::UNKNOWN + && status_ < Status::MAX_STATUS + && retry_cnt_ >= 0 + && is_inited_; +} + +void ObTabletCOConvertCtx::set_progressing() +{ + int ret = OB_SUCCESS; + if (status_ != Status::RETRY_EXHAUSTED) { + status_ = Status::PROGRESSING; + } +} + +/*----------------------------- ObHATabletGroupCOConvertCtx -----------------------------*/ +ObHATabletGroupCOConvertCtx::ObHATabletGroupCOConvertCtx() + : ObHATabletGroupCtx(TabletGroupCtxType::CS_REPLICA_TYPE), + finish_migration_cnt_(0), + finish_check_cnt_(0), + retry_exhausted_cnt_(0), + idx_map_(), + convert_ctxs_() +{ +} + +ObHATabletGroupCOConvertCtx::~ObHATabletGroupCOConvertCtx() +{ + common::SpinWLockGuard guard(lock_); + if (idx_map_.created()) { + idx_map_.destroy(); + } +} + +void ObHATabletGroupCOConvertCtx::reuse() +{ + common::SpinWLockGuard guard(lock_); + inner_reuse(); + ObHATabletGroupCtx::inner_reuse(); +} + +void ObHATabletGroupCOConvertCtx::inner_reuse() +{ + finish_migration_cnt_ = 0; + finish_check_cnt_ = 0; + retry_exhausted_cnt_ = 0; + if (idx_map_.created()) { + idx_map_.destroy(); + } + convert_ctxs_.reuse(); +} + +int ObHATabletGroupCOConvertCtx::inner_init() +{ + int ret = OB_SUCCESS; + finish_migration_cnt_ = 0; + finish_check_cnt_ = 0; + retry_exhausted_cnt_ = 0; + const int64_t count = tablet_id_array_.count(); + if (OB_UNLIKELY(idx_map_.created() || !convert_ctxs_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("idx map is created or convert ctxs not empty", K(ret), KPC(this)); + } else if (OB_FAIL(idx_map_.create(TABLET_CONVERT_CTX_MAP_BUCKED_NUM, ObMemAttr(MTL_ID(), "HATGCOCtx")))) { + LOG_WARN("failed to create tablet convert ctx idx map", K(ret)); + } else if (OB_FAIL(convert_ctxs_.reserve(count))) { + LOG_WARN("failed to reserve convert ctxs", K(ret), K(count)); + } else { + for (int64_t idx = 0; OB_SUCC(ret) && idx < count; ++idx) { + const ObTabletID &tablet_id = tablet_id_array_.at(idx).tablet_id_; + ObDagId co_dag_net_id; + co_dag_net_id.init(GCTX.self_addr()); + ObTabletCOConvertCtx co_convert_ctx; + if (OB_FAIL(co_convert_ctx.init(tablet_id, co_dag_net_id))) { + LOG_WARN("failed to init co convert ctx", K(ret), K(tablet_id), K(co_dag_net_id)); + } else if (OB_FAIL(convert_ctxs_.push_back(co_convert_ctx))) { + LOG_WARN("failed to push back co convert ctx", K(ret), K(co_convert_ctx)); + } else if (OB_FAIL(idx_map_.set_refactored(tablet_id, idx))) { + LOG_WARN("failed to set ctx idx into map", K(ret), K(tablet_id), K(idx)); + } + } + } + return ret; +} + +void ObHATabletGroupCOConvertCtx::inc_finish_migration_cnt() +{ + common::SpinWLockGuard guard(lock_); + finish_migration_cnt_++; +} + +bool ObHATabletGroupCOConvertCtx::ready_to_check() const +{ + common::SpinRLockGuard guard(lock_); + bool bret = false; + if (finish_migration_cnt_ < convert_ctxs_.count()) { + } else if (finish_migration_cnt_ > convert_ctxs_.count()) { + LOG_ERROR_RET(OB_ERR_UNEXPECTED, "invalid finish migration cnt", KPC(this)); + } else { + int ret = OB_SUCCESS; // ignore ret + for (int64_t idx = 0; OB_SUCC(ret) && idx < tablet_id_array_.count(); ++idx) { + const ObTabletID &tablet_id = tablet_id_array_[idx].tablet_id_; + bool is_exist = true; + int64_t ctx_idx = 0; + if (OB_FAIL(inner_get_valid_convert_ctx_idx(tablet_id, ctx_idx))) { + LOG_WARN("failed to get convert ctx idx", K(ret), K(tablet_id)); + } else if (convert_ctxs_[ctx_idx].is_progressing()) { + if (OB_FAIL(MTL(ObTenantDagScheduler *)->check_dag_net_exist(convert_ctxs_[ctx_idx].co_dag_net_id_, is_exist))) { + LOG_WARN("failed to check dag exists", K(ret), K(tablet_id), K(convert_ctxs_[ctx_idx])); + } else if (!is_exist) { + bret = true; + break; + } + } + } + } + LOG_TRACE("[CS-Replica] check ready to check", K(bret), KPC(this)); + return bret; +} + +bool ObHATabletGroupCOConvertCtx::is_all_state_deterministic() const +{ + common::SpinRLockGuard guard(lock_); + return convert_ctxs_.count() <= (finish_check_cnt_ + retry_exhausted_cnt_); +} + +int ObHATabletGroupCOConvertCtx::set_convert_status(const ObTabletID &tablet_id, const ObTabletCOConvertCtx::Status status) +{ + int ret = OB_SUCCESS; + int64_t idx = 0; + common::SpinWLockGuard guard(lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(inner_get_valid_convert_ctx_idx(tablet_id, idx))) { + LOG_WARN("failed to get convert ctx idx", K(ret), K(tablet_id)); + } else if (ObTabletCOConvertCtx::Status::FINISHED == status) { + inner_set_convert_finish(convert_ctxs_[idx]); + } else if (ObTabletCOConvertCtx::Status::RETRY_EXHAUSTED == status) { + inner_set_retry_exhausted(convert_ctxs_[idx]); + } else if (ObTabletCOConvertCtx::Status::PROGRESSING == status) { + convert_ctxs_[idx].set_progressing(); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid status to set", K(ret), K(tablet_id), K(status), K(convert_ctxs_[idx])); + } + return ret; +} + +int ObHATabletGroupCOConvertCtx::get_co_dag_net_id(const ObTabletID &tablet_id, share::ObDagId &co_dag_net_id) const +{ + int ret = OB_SUCCESS; + int64_t idx = 0; + common::SpinRLockGuard guard(lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(inner_get_valid_convert_ctx_idx(tablet_id, idx))) { + LOG_WARN("failed to get convert ctx idx", K(ret), K(tablet_id)); + } else { + co_dag_net_id = convert_ctxs_[idx].co_dag_net_id_; + } + return ret; +} + +int ObHATabletGroupCOConvertCtx::check_and_schedule(ObLS &ls) +{ + common::SpinWLockGuard guard(lock_); + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + const int64_t count = tablet_id_array_.count(); + for (int64_t idx = 0; idx < count; ++idx) { + if (OB_TMP_FAIL(inner_check_and_schedule(ls, tablet_id_array_[idx].tablet_id_))) { + LOG_WARN("failed to check and schedule", K(tmp_ret)); + } + } + } + return ret; +} + +int ObHATabletGroupCOConvertCtx::check_need_convert(const ObTablet &tablet, bool &need_convert) +{ + int ret = OB_SUCCESS; + need_convert = false; + common::ObArenaAllocator tmp_allocator; // for schema_on_tablet + ObStorageSchema *schema_on_tablet = nullptr; + if (OB_FAIL(tablet.load_storage_schema(tmp_allocator, schema_on_tablet))) { + LOG_WARN("failed to load storage schema", K(ret),K(tablet)); + } else { + need_convert = ObCSReplicaUtil::check_need_convert_cs_when_migration(tablet, *schema_on_tablet); + } + + if (OB_NOT_NULL(schema_on_tablet)) { + schema_on_tablet->~ObStorageSchema(); + tmp_allocator.free(schema_on_tablet); + schema_on_tablet = nullptr; + } + return ret; +} + +int ObHATabletGroupCOConvertCtx::update_deleted_data_tablet_status( + ObHATabletGroupCtx *tablet_group_ctx, + const ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + ObHATabletGroupCOConvertCtx *group_convert_ctx = nullptr; + if (OB_UNLIKELY(OB_ISNULL(tablet_group_ctx) + || !tablet_group_ctx->is_cs_replica_ctx())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tablet group ctx", K(ret), K(tablet_id), KPC(tablet_group_ctx)); + } else if (FALSE_IT(group_convert_ctx = static_cast(tablet_group_ctx))) { + } else if (OB_FAIL(group_convert_ctx->set_convert_finsih(tablet_id))) { + LOG_WARN("failed to set convert finish", K(ret), K(tablet_id)); + } else { + (void) group_convert_ctx->inc_finish_migration_cnt(); + } + return ret; +} + +void ObHATabletGroupCOConvertCtx::inner_set_convert_finish(ObTabletCOConvertCtx &convert_ctx) +{ + convert_ctx.set_finished(); + finish_check_cnt_++; +} + +void ObHATabletGroupCOConvertCtx::inner_set_retry_exhausted(ObTabletCOConvertCtx &convert_ctx) +{ + convert_ctx.set_retry_exhausted(); + retry_exhausted_cnt_++; +} + +int ObHATabletGroupCOConvertCtx::inner_get_valid_convert_ctx_idx(const ObTabletID &tablet_id, int64_t &idx) const +{ + int ret = OB_SUCCESS; + idx = 0; + if (OB_UNLIKELY(!tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tablet_id", K(ret), K(tablet_id)); + } else if (OB_FAIL(idx_map_.get_refactored(tablet_id, idx))) { + LOG_WARN("failed to get convert ctx idx", K(ret), K(tablet_id)); + } else if (OB_UNLIKELY(idx < 0 || idx >= convert_ctxs_.count())) { + ret = OB_INDEX_OUT_OF_RANGE; + LOG_WARN("convert ctx idx is invalid", K(ret), K(idx), K(tablet_id)); + } else if (OB_UNLIKELY(!convert_ctxs_[idx].is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("convert ctx is null or invalid", K(ret), K(convert_ctxs_[idx])); + } + return ret; +} + +int ObHATabletGroupCOConvertCtx::inner_check_and_schedule(ObLS &ls, const ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + int64_t idx = 0; + const ObLSID &ls_id = ls.get_ls_id(); + ObTabletHandle tablet_handle; + ObTablet *tablet = nullptr; + bool need_convert = false; + bool is_dag_net_exist = false; + + if (OB_FAIL(inner_get_valid_convert_ctx_idx(tablet_id, idx))) { + LOG_WARN("failed to get convert ctx idx", K(ret), K(ls_id), K(tablet_id)); + } else if (convert_ctxs_[idx].is_finished() || convert_ctxs_[idx].is_retry_exhausted()) { + } else if (OB_FAIL(ls.get_tablet(tablet_id, tablet_handle))) { + if (OB_TABLET_NOT_EXIST == ret) { + LOG_INFO("tablet maybe deleted, skip it", K(ret), K(ls_id), K(tablet_id)); + inner_set_convert_finish(convert_ctxs_[idx]); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get tablet handle", K(ret), K(ls_id), K(tablet_id)); + } + } else if (OB_ISNULL(tablet = tablet_handle.get_obj())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet should not be null", K(ret), K(ls_id), K(tablet_id)); + } else if (convert_ctxs_[idx].is_unknown()) { + // tablet in cs replcia != tablet need convert to column store, need take storage schema into consideration + if (OB_FAIL(check_need_convert(*tablet, need_convert))) { + LOG_WARN("failed to check need convert", K(ret), K(ls_id), K(tablet_id)); + } else if (need_convert) { + if (tablet->get_tablet_meta().ha_status_.is_data_status_complete()) { + convert_ctxs_[idx].set_progressing(); + } + } else { + inner_set_convert_finish(convert_ctxs_[idx]); + } + } + + if (OB_FAIL(ret)) { + } else if (!convert_ctxs_[idx].is_progressing()) { + } else if (OB_ISNULL(tablet)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("co_convert_ctx is progressing, but tablet is nullptr", K(ret), K(tablet_id), K(ls_id), K(need_convert), K(idx), K(convert_ctxs_[idx]), KPC(this)); + } else if (!tablet->is_row_store()) { + inner_set_convert_finish(convert_ctxs_[idx]); + LOG_INFO("[CS-Replica] Finish co merge dag net for switching column store", K(ls_id), K(tablet_id), K(tablet_handle)); + } else if (OB_FAIL(MTL(ObTenantDagScheduler *)->check_dag_net_exist(convert_ctxs_[idx].co_dag_net_id_, is_dag_net_exist))) { + LOG_WARN("failed to check dag exists", K(ret), K(convert_ctxs_[idx]), K(tablet_id)); + } else if (is_dag_net_exist) { + } else if (OB_FAIL(compaction::ObTenantTabletScheduler::schedule_convert_co_merge_dag_net(ls_id, *tablet, convert_ctxs_[idx].retry_cnt_, convert_ctxs_[idx].co_dag_net_id_))) { + LOG_WARN("failed to schedule convert co merge", K(ret), K(ls_id), K(tablet_id)); + } else { + convert_ctxs_[idx].inc_retry_cnt(); + if (convert_ctxs_[idx].is_retry_exhausted()) { + inner_set_retry_exhausted(convert_ctxs_[idx]); +#ifdef ERRSIM + LOG_INFO("[CS-Replica] set tablet co convert retry exhausted", K(ret), K(idx), K(tablet_id)); + DEBUG_SYNC(AFTER_SET_CO_CONVERT_RETRY_EXHUASTED); +#endif + } + } + + return ret; +} + +/*----------------------------- ObDataTabletsCheckCOConvertDag -----------------------------*/ +ObDataTabletsCheckCOConvertDag::ObDataTabletsCheckCOConvertDag() + : ObMigrationDag(ObDagType::DAG_TYPE_TABLET_CHECK_CONVERT), + ls_(nullptr), + first_start_time_(0), + is_inited_(false) +{ +} + +ObDataTabletsCheckCOConvertDag::~ObDataTabletsCheckCOConvertDag() +{ +} + +bool ObDataTabletsCheckCOConvertDag::check_can_schedule() +{ + int ret = OB_SUCCESS; + bool bret = false; + ObMigrationCtx *migration_ctx = nullptr; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(ha_dag_net_ctx_) || OB_UNLIKELY(ObIHADagNetCtx::LS_MIGRATION != ha_dag_net_ctx_->get_dag_net_ctx_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ha dag net ctx is null or unexpected type", K(ret), KPC(ha_dag_net_ctx_)); + } else if (OB_ISNULL(migration_ctx = get_migration_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("migration ctx is null", K(ret)); + } else if (OB_FAIL(inner_check_can_schedule(*migration_ctx, bret))) { + LOG_WARN("failed to check can schedule", K(ret)); + } + if (OB_FAIL(ret)) { + bret = true; + LOG_INFO("failed to check can schedule, allow dag running", K(ret), KPC_(ha_dag_net_ctx)); + } + return bret; +} + +int ObDataTabletsCheckCOConvertDag::inner_check_can_schedule( + ObMigrationCtx &migration_ctx, + bool &can_schedule) +{ + int ret = OB_SUCCESS; + bool all_state_deterministic = true; // all finish check or retry exhausted + ObCheckScheduleReason reason = ObCheckScheduleReason::MAX_NOT_SCHEDULE; + // time for diagnose. if dag has not be scheduled, start_time_ is 0 + first_start_time_ = (0 == first_start_time_) ? start_time_ : first_start_time_; + const int64_t current_time = ObTimeUtility::current_time(); + const int64_t wait_one_round_time = (0 == start_time_) ? current_time - add_time_ : current_time - start_time_; + const int64_t total_wait_time = current_time - first_start_time_; + + if (migration_ctx.is_failed()) { + // migration dag net failed, no need to check anymore + can_schedule = true; + reason = ObCheckScheduleReason::MIGRATION_FAILED; +#ifdef ERRSIM + LOG_INFO("migration dag net failed, make check dag schedule"); +#endif + } else { + const int64_t tablet_group_cnt = migration_ctx.tablet_group_mgr_.get_tablet_group_ctx_count(); + ObHATabletGroupCtx *ctx = nullptr; + ObHATabletGroupCOConvertCtx *group_convert_ctx = nullptr; + for (int64_t idx = 0; OB_SUCC(ret) && idx < tablet_group_cnt; ++idx) { + if (OB_FAIL(migration_ctx.tablet_group_mgr_.get_tablet_group_ctx(idx, ctx))) { + LOG_WARN("failed to get tablet group ctx", K(ret), K(idx)); + } else if (OB_ISNULL(ctx) || OB_UNLIKELY(!ctx->is_cs_replica_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is null or invalid type", K(ret), KPC(ctx), K(migration_ctx)); + } else if (FALSE_IT(group_convert_ctx = static_cast(ctx))) { + } else if (group_convert_ctx->is_all_state_deterministic()) { + } else if (FALSE_IT(all_state_deterministic = false)) { + } else if (group_convert_ctx->ready_to_check()) { + can_schedule = true; + reason = ObCheckScheduleReason::READY_TO_CHECK; + break; + } + } + } + + if (!can_schedule) { + if (all_state_deterministic) { + can_schedule = true; + reason = ObCheckScheduleReason::ALL_DETERMINISTIC; + } else if (wait_one_round_time > OB_DATA_TABLETS_NOT_CHECK_CONVERT_THRESHOLD) { + can_schedule = true; + reason = ObCheckScheduleReason::WAIT_TIME_EXCEED; + } + } + + const int64_t cost_time = ObTimeUtility::current_time() - current_time; + if (REACH_TENANT_TIME_INTERVAL(OB_DATA_TABLETS_NOT_CHECK_CONVERT_THRESHOLD)) { + LOG_INFO("[CS-Replica] finish check_can_schedule", K(ret), K(can_schedule), K(reason), K(wait_one_round_time), K(total_wait_time), K(cost_time), KPC(this), K(migration_ctx.tablet_group_mgr_)); + } else { + LOG_TRACE("[CS-Replica] finish check_can_schedule", K(ret), K(can_schedule), K(reason), K(wait_one_round_time), K(total_wait_time), K(cost_time), KPC(this), K(migration_ctx.tablet_group_mgr_)); + } + return ret; +} + +int ObDataTabletsCheckCOConvertDag::init( + ObIHADagNetCtx *ha_dag_net_ctx, + ObLS *ls) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("can not init twice", K(ret)); + } else if (OB_UNLIKELY(OB_ISNULL(ha_dag_net_ctx) || OB_ISNULL(ls))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument to init ObDataTabletsCheckCOConvertDag", + K(ret), KPC(ha_dag_net_ctx), KPC(ls)); + } else if (OB_FAIL(check_convert_ctx_valid(ha_dag_net_ctx))) { + LOG_WARN("ha dag net ctx is invalid", K(ret), KPC(ha_dag_net_ctx)); + } else { + ha_dag_net_ctx_ = ha_dag_net_ctx; + ls_ = ls; + is_inited_ = true; + } + return ret; +} + +int ObDataTabletsCheckCOConvertDag::check_convert_ctx_valid(ObIHADagNetCtx *ha_dag_net_ctx) +{ + int ret = OB_SUCCESS; + ObMigrationCtx *migration_ctx = nullptr; + if (OB_UNLIKELY(OB_ISNULL(ha_dag_net_ctx) + || ObIHADagNetCtx::DagNetCtxType::LS_MIGRATION != ha_dag_net_ctx->get_dag_net_ctx_type())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid ha_dag_net_ctx", K(ret), KPC(ha_dag_net_ctx)); + } else if (OB_ISNULL(migration_ctx = static_cast(ha_dag_net_ctx))) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("migration ctx should not be NULL", K(ret), KP(migration_ctx)); + } else { + const int64_t count = migration_ctx->tablet_group_mgr_.get_tablet_group_ctx_count(); + ObHATabletGroupCtx *ctx = nullptr; + for (int64_t idx = 0; OB_SUCC(ret) && idx < count; ++idx) { + if (OB_FAIL(migration_ctx->tablet_group_mgr_.get_tablet_group_ctx(idx, ctx))) { + LOG_WARN("failed to get tablet group ctx", K(ret), K(idx)); + } else if (OB_ISNULL(ctx) || OB_UNLIKELY(!ctx->is_cs_replica_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is null or not cs replica ctx", K(ret), KPC(ctx)); + } + } + } + return ret; +} + +int ObDataTabletsCheckCOConvertDag::create_first_task() +{ + int ret = OB_SUCCESS; + ObDataTabletsCheckConvertTask *task = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet check convert dag not init", K(ret)); + } else if (OB_FAIL(create_task(nullptr /*parent*/, task, ha_dag_net_ctx_, ls_))) { + LOG_WARN("failed to create tablet check convert task", K(ret)); + } else { + LOG_INFO("[CS-Replica] Success to create tablet check convert task", K(ret), KPC(this), KPC(task)); + } + return ret; +} + +bool ObDataTabletsCheckCOConvertDag::operator == (const ObIDag &other) const +{ + bool is_same = true; + if (this == &other) { + // same + } else if (get_type() != other.get_type() || ObDagType::DAG_TYPE_TABLET_CHECK_CONVERT != other.get_type()) { + is_same = false; + } else { + const ObDataTabletsCheckCOConvertDag &other_dag = static_cast(other); + ObMigrationCtx *ctx = get_migration_ctx(); + if (OB_ISNULL(ctx) || OB_ISNULL(other_dag.get_migration_ctx())) { + is_same = false; + } else { + is_same = ctx->arg_.ls_id_ == other_dag.get_migration_ctx()->arg_.ls_id_; + } + } + return is_same; +} + +int64_t ObDataTabletsCheckCOConvertDag::hash() const +{ + int64_t hash_value = 0; + ObMigrationCtx *ctx = nullptr; + if (IS_NOT_INIT) { + LOG_ERROR_RET(OB_NOT_INIT, "tablet check convert dag not init"); + } else if (OB_ISNULL(ctx = get_migration_ctx())) { + LOG_ERROR_RET(OB_ERR_UNEXPECTED, "migration ctx should not be NULL", KP(ctx)); + } else { + hash_value = common::murmurhash(&ctx->arg_.ls_id_, sizeof(ctx->arg_.ls_id_), hash_value); + ObDagType::ObDagTypeEnum dag_type = get_type(); + hash_value = common::murmurhash(&dag_type, sizeof(dag_type), hash_value); + } + return hash_value; +} + +int ObDataTabletsCheckCOConvertDag::fill_dag_key(char *buf, const int64_t buf_len) const +{ + int ret = OB_SUCCESS; + ObMigrationCtx *ctx = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet check convert dag not init", K(ret)); + } else if (OB_ISNULL(ctx = get_migration_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("migration ctx should not be NULL", K(ret), KP(ctx)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, + "ObDataTabletsCheckCOConvertDag: ls_id = %s, migration_type = %s, dag_prio = %s", + to_cstring(ctx->arg_.ls_id_), ObMigrationOpType::get_str(ctx->arg_.type_), + ObIDag::get_dag_prio_str(this->get_priority())))) { + LOG_WARN("failed to fill comment", K(ret), KPC(ctx)); + } + return ret; +} + +/*----------------------------- ObDataTabletsCheckConvertTask -----------------------------*/ +ObDataTabletsCheckConvertTask::ObDataTabletsCheckConvertTask() + : ObITask(ObITask::TASK_TYPE_CHECK_CONVERT_TABLET), + is_inited_(false), + ctx_(nullptr), + ls_(nullptr) +{} + +ObDataTabletsCheckConvertTask::~ObDataTabletsCheckConvertTask() +{} + +int ObDataTabletsCheckConvertTask::init( + ObIHADagNetCtx *ha_dag_net_ctx, + ObLS *ls) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("can not init twice", K(ret)); + } else if (OB_UNLIKELY(OB_ISNULL(ha_dag_net_ctx) || OB_ISNULL(ls) + || ObIHADagNetCtx::LS_MIGRATION != ha_dag_net_ctx->get_dag_net_ctx_type())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument to init ObDataTabletsCheckConvertTask", + K(ret), KPC(ha_dag_net_ctx), KPC(ls)); + } else { + ctx_ = static_cast(ha_dag_net_ctx); + ls_ = ls; + is_inited_ = true; + } + return ret; +} + +int ObDataTabletsCheckConvertTask::process() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + bool all_state_deterministic = true; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("task not init", K(ret)); + } else if (ctx_->is_failed()) { + // do nothing + } else if (OB_ISNULL(ls_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is unexpected null", K(ret), KPC_(ls)); + } else { + const int64_t count = ctx_->tablet_group_mgr_.get_tablet_group_ctx_count(); + ObHATabletGroupCtx *group_ctx = nullptr; + ObHATabletGroupCOConvertCtx *group_convert_ctx = nullptr; + for (int64_t idx = 0; OB_SUCC(ret) && idx < count; ++idx) { + if (OB_FAIL(ctx_->tablet_group_mgr_.get_tablet_group_ctx(idx, group_ctx))) { + LOG_WARN("failed to get tablet group ctx", K(ret), K(idx)); + } else if (OB_ISNULL(group_ctx) || OB_UNLIKELY(!group_ctx->is_cs_replica_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("group_ctx is null or not cs replica ctx", K(ret), KPC(group_ctx)); + } else if (FALSE_IT(group_convert_ctx = static_cast(group_ctx))) { + } else if (OB_TMP_FAIL(group_convert_ctx->check_and_schedule(*ls_))) { + LOG_WARN("failed to check and schedule", K(tmp_ret), KPC(group_convert_ctx)); + } else if (group_convert_ctx->is_all_state_deterministic()) { +#ifdef ERRSIM + if (EN_ALL_STATE_DETERMINISTIC_FALSE) { + all_state_deterministic = false; + LOG_INFO("ERRSIM EN_ALL_STATE_DETERMINISTIC_FALSE make new round check", K(ret), K(all_state_deterministic)); + } +#endif + } else { + all_state_deterministic = false; + } + } + } + + if (OB_FAIL(ret)) { + if (OB_TMP_FAIL(ObStorageHADagUtils::deal_with_fo(ret, this->get_dag(), true /*alllow_retry*/))) { + LOG_WARN("failed to deal with fo", K(ret), K(tmp_ret), KPC_(ctx)); + } + } else if (ctx_->is_failed()) { +#ifdef ERRSIM + LOG_INFO("migration dag net failed, make check dag exit"); +#endif + } else if (!all_state_deterministic) { + ret = OB_EAGAIN; + LOG_WARN("not wait all tablets convert finish, failed this task, make dag retry", K(ret), K(all_state_deterministic), KPC_(ctx)); + } + LOG_TRACE("[CS-Replica] Finish process check data tablets convert to column store", K(ret), KPC_(ls), KPC_(ctx)); + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/high_availability/ob_cs_replica_migration.h b/src/storage/high_availability/ob_cs_replica_migration.h new file mode 100644 index 000000000..e30502e32 --- /dev/null +++ b/src/storage/high_availability/ob_cs_replica_migration.h @@ -0,0 +1,160 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEABASE_STORAGE_CS_REPLICA_MIGRATION_ +#define OCEABASE_STORAGE_CS_REPLICA_MIGRATION_ + +#include "storage/high_availability/ob_ls_migration.h" + +namespace oceanbase +{ +namespace storage +{ + +struct ObTabletCOConvertCtx +{ +public: + enum class Status + { + UNKNOWN = 0, // intial state, need take tablet_id and storage schema into consideration + PROGRESSING = 1, // need convert and to be check result + FINISHED = 2, // finish convert, or tablet is deleted + RETRY_EXHAUSTED = 3, // retry times >= MAX_RETRY_CNT + MAX_STATUS + }; +public: + ObTabletCOConvertCtx(); + virtual ~ObTabletCOConvertCtx(); + int init(const ObTabletID &tablet_id, const share::ObDagId &co_dag_net_id); + void reset(); + bool is_valid() const; + TO_STRING_KV(K_(tablet_id), K_(co_dag_net_id), K_(status), K_(retry_cnt), K_(is_inited)); +public: + OB_INLINE bool is_unknown() const { return Status::UNKNOWN == status_; } + OB_INLINE bool is_progressing() const { return Status::PROGRESSING == status_; } + OB_INLINE bool is_finished() const { return Status::FINISHED == status_; } + OB_INLINE bool is_retry_exhausted() const { return retry_cnt_ >= MAX_RETRY_CNT; } + void set_progressing(); + OB_INLINE void set_finished() { status_ = Status::FINISHED; } + OB_INLINE void set_retry_exhausted() { status_ = Status::RETRY_EXHAUSTED; } + OB_INLINE void inc_retry_cnt() { retry_cnt_++; } +public: + const static int64_t MAX_RETRY_CNT = 3; +public: + ObTabletID tablet_id_; + share::ObDagId co_dag_net_id_; + Status status_; + int64_t retry_cnt_; + bool is_inited_; +}; + +class ObHATabletGroupCOConvertCtx : public ObHATabletGroupCtx +{ +public: + ObHATabletGroupCOConvertCtx(); + virtual ~ObHATabletGroupCOConvertCtx(); +public: + virtual void reuse() override; + virtual void inner_reuse() override; + virtual int inner_init() override; + void inc_finish_migration_cnt(); + bool ready_to_check() const; + bool is_all_state_deterministic() const; + int set_convert_status(const ObTabletID &tablet_id, const ObTabletCOConvertCtx::Status status); + OB_INLINE int set_convert_finsih(const ObTabletID &tablet_id) { return set_convert_status(tablet_id, ObTabletCOConvertCtx::Status::FINISHED); } + OB_INLINE int set_convert_progressing(const ObTabletID &tablet_id) { return set_convert_status(tablet_id, ObTabletCOConvertCtx::Status::PROGRESSING); } + int get_co_dag_net_id(const ObTabletID &tablet_id, share::ObDagId &co_dag_net_id) const; + int check_and_schedule(ObLS &ls); + INHERIT_TO_STRING_KV("ObHATabletGroupCtx", ObHATabletGroupCtx, K_(finish_migration_cnt), K_(finish_check_cnt), K_(retry_exhausted_cnt), "map_size", idx_map_.size(), K_(convert_ctxs)); +public: + static int check_need_convert(const ObTablet &tablet, bool &need_convert); + static int update_deleted_data_tablet_status( + ObHATabletGroupCtx *tablet_group_ctx, + const ObTabletID &tablet_id); +private: + void inner_set_convert_finish(ObTabletCOConvertCtx &convert_ctx); + void inner_set_retry_exhausted(ObTabletCOConvertCtx &convert_ctx); + int inner_get_valid_convert_ctx_idx(const ObTabletID &tablet_id, int64_t &idx) const; + int inner_check_and_schedule(ObLS &ls, const ObTabletID &tablet_id); +private: + // refer to TransferTableMap + const static int64_t TABLET_CONVERT_CTX_MAP_BUCKED_NUM = 128; // a tablet group contains 2G size of tablets, 1024 macro block + typedef common::hash::ObHashMap TabletConvertCtxIndexMap; +public: + int64_t finish_migration_cnt_; + int64_t finish_check_cnt_; + int64_t retry_exhausted_cnt_; + TabletConvertCtxIndexMap idx_map_; + ObArray convert_ctxs_; + DISALLOW_COPY_AND_ASSIGN(ObHATabletGroupCOConvertCtx); +}; + +class ObDataTabletsCheckCOConvertDag : public ObMigrationDag +{ +public: + enum class ObCheckScheduleReason { + MIGRATION_FAILED = 0, + READY_TO_CHECK = 1, + ALL_DETERMINISTIC = 2, + WAIT_TIME_EXCEED = 3, + MAX_NOT_SCHEDULE = 4, + }; +public: + ObDataTabletsCheckCOConvertDag(); + virtual ~ObDataTabletsCheckCOConvertDag(); + virtual bool check_can_schedule() override; + virtual int create_first_task() override; + int init( + ObIHADagNetCtx *ha_dag_net_ctx, + ObLS *ls); + int check_convert_ctx_valid(ObIHADagNetCtx *ha_dag_net_ctx); +public: + virtual bool operator == (const share::ObIDag &other) const override; + virtual int64_t hash() const override; + virtual int fill_dag_key(char *buf, const int64_t buf_len) const override; + INHERIT_TO_STRING_KV("ObIMigrationDag", ObMigrationDag, KP(this), KPC_(ls), K_(first_start_time), K_(is_inited)); +private: + int inner_check_can_schedule(ObMigrationCtx &migration_ctx, bool &can_schedule); +public: +#ifdef ERRSIM + const static int64_t OB_DATA_TABLETS_NOT_CHECK_CONVERT_THRESHOLD = 30 * 1000 * 1000; /*30s*/ +#else + const static int64_t OB_DATA_TABLETS_NOT_CHECK_CONVERT_THRESHOLD = 10 * 60 * 1000 * 1000; /*10min*/ +#endif +private: + ObLS *ls_; + int64_t first_start_time_; + bool is_inited_; + DISALLOW_COPY_AND_ASSIGN(ObDataTabletsCheckCOConvertDag); +} ; + +class ObDataTabletsCheckConvertTask : public ObITask +{ +public: + ObDataTabletsCheckConvertTask(); + virtual ~ObDataTabletsCheckConvertTask(); + int init( + ObIHADagNetCtx *ha_dag_net_ctx, + ObLS *ls); +private: + virtual int process() override; +private: + bool is_inited_; + ObMigrationCtx *ctx_; + ObLS *ls_; + DISALLOW_COPY_AND_ASSIGN(ObDataTabletsCheckConvertTask); +}; + +} // namespace storage +} // namespace oceanbase + +#endif \ No newline at end of file diff --git a/src/storage/high_availability/ob_ls_complete_migration.cpp b/src/storage/high_availability/ob_ls_complete_migration.cpp index 75f3510da..e0723eee5 100644 --- a/src/storage/high_availability/ob_ls_complete_migration.cpp +++ b/src/storage/high_availability/ob_ls_complete_migration.cpp @@ -1444,7 +1444,7 @@ int ObStartCompleteMigrationTask::change_member_list_() LOG_WARN("failed to switch learner to acceptor", K(ret), K(leader_addr), K(ls_transfer_scn)); } } else { - // R-replica + // R-replica, C-replica if (OB_FAIL(replace_learners_for_add_(ls))) { LOG_WARN("failed to replace learners for add", K(ret), K(leader_addr), K(ls_transfer_scn)); } @@ -1455,7 +1455,7 @@ int ObStartCompleteMigrationTask::change_member_list_() LOG_WARN("failed to replace member with learner", K(ret), K(leader_addr), K(ls_transfer_scn)); } } else { - // R-replica + // R-replica, C-replica if (OB_FAIL(replace_learners_for_migration_(ls))) { LOG_WARN("failed to replace learners for migration", K(ret), K(leader_addr), K(ls_transfer_scn)); } diff --git a/src/storage/high_availability/ob_ls_migration.cpp b/src/storage/high_availability/ob_ls_migration.cpp index b773059db..5bac1cb7e 100644 --- a/src/storage/high_availability/ob_ls_migration.cpp +++ b/src/storage/high_availability/ob_ls_migration.cpp @@ -18,6 +18,8 @@ #include "share/scheduler/ob_dag_warning_history_mgr.h" #include "storage/tablet/ob_tablet_common.h" #include "storage/tx_storage/ob_ls_service.h" +#include "storage/compaction/ob_tenant_tablet_scheduler.h" +#include "storage/column_store/ob_column_store_replica_util.h" #include "logservice/ob_log_service.h" #include "lib/hash/ob_hashset.h" #include "lib/time/ob_time_utility.h" @@ -30,6 +32,7 @@ #include "share/ob_cluster_version.h" #include "ob_storage_ha_utils.h" #include "ob_storage_ha_src_provider.h" +#include "ob_cs_replica_migration.h" namespace oceanbase { @@ -41,6 +44,7 @@ ERRSIM_POINT_DEF(EN_DATA_TABLETS_MIGRATION_TASK_FATAL_FAILURE); ERRSIM_POINT_DEF(EN_BUILD_SYS_TABLETS_DAG_FAILED); ERRSIM_POINT_DEF(EN_UPDATE_LS_MIGRATION_STATUS_FAILED); ERRSIM_POINT_DEF(EN_JOIN_LEARNER_LIST_FAILED); +ERRSIM_POINT_DEF(EN_DATA_TABLET_MIGRATION_DAG_OUT_OF_RETRY); /******************ObMigrationCtx*********************/ ObMigrationCtx::ObMigrationCtx() @@ -2045,7 +2049,8 @@ ObTabletMigrationDag::ObTabletMigrationDag() is_inited_(false), ls_handle_(), copy_tablet_ctx_(), - tablet_group_ctx_(nullptr) + tablet_group_ctx_(nullptr), + tablet_type_(ObTabletType::MAX_TYPE) { } @@ -2126,7 +2131,8 @@ int ObTabletMigrationDag::init( const common::ObTabletID &tablet_id, ObTabletHandle &tablet_handle, ObIDagNet *dag_net, - ObHATabletGroupCtx *tablet_group_ctx) + ObHATabletGroupCtx *tablet_group_ctx /*=nullptr*/, + ObTabletType tablet_type /*=ObTabletType::SYS_TABLET_TYPE*/) { int ret = OB_SUCCESS; ObMigrationDagNet *migration_dag_net = nullptr; @@ -2162,11 +2168,40 @@ int ObTabletMigrationDag::init( } else { compat_mode_ = copy_tablet_ctx_.tablet_handle_.get_obj()->get_tablet_meta().compat_mode_; tablet_group_ctx_ = tablet_group_ctx; + tablet_type_ = tablet_type; is_inited_ = true; } return ret; } +int ObTabletMigrationDag::get_tablet_group_ctx(ObHATabletGroupCtx *&tablet_group_ctx) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet migration dag do not init", K(ret)); + } else if (OB_ISNULL(tablet_group_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("only data tablet has tablet group ctx", K(ret), K_(tablet_type)); + } else { + tablet_group_ctx = tablet_group_ctx_; + } + return ret; +} + +int ObTabletMigrationDag::check_is_migrate_data_tablet(bool &is_migrate_data_tablet) +{ + int ret = OB_SUCCESS; + is_migrate_data_tablet = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet migration dag do not init", K(ret)); + } else if (ObTabletType::DATA_TABLET_TYPE == tablet_type_) { + is_migrate_data_tablet = true; + } + return ret; +} + int ObTabletMigrationDag::create_first_task() { int ret = OB_SUCCESS; @@ -2279,6 +2314,10 @@ int ObTabletMigrationDag::generate_next_dag(share::ObIDag *&dag) } else if (OB_FAIL(ls->ha_get_tablet(logic_tablet_id.tablet_id_, tablet_handle))) { if (OB_TABLET_NOT_EXIST == ret) { ret = OB_SUCCESS; + if (ls->is_cs_replica() + && OB_FAIL(ObHATabletGroupCOConvertCtx::update_deleted_data_tablet_status(tablet_group_ctx_, logic_tablet_id.tablet_id_))) { + LOG_WARN("failed to update deleted tablet status", K(ret)); + } } else { LOG_WARN("failed to get tablet", K(ret), K(logic_tablet_id)); } @@ -2291,10 +2330,14 @@ int ObTabletMigrationDag::generate_next_dag(share::ObIDag *&dag) K(ret), K(logic_tablet_id), KPC(tablet)); } else if (logic_tablet_id.transfer_seq_ < tablet->get_tablet_meta().transfer_info_.transfer_seq_) { LOG_INFO("local tablet transfer seq is bigger than remote tablet, no need copy", K(logic_tablet_id), KPC(tablet)); + if (ls->is_cs_replica() + && OB_FAIL(ObHATabletGroupCOConvertCtx::update_deleted_data_tablet_status(tablet_group_ctx_, logic_tablet_id.tablet_id_))) { + LOG_WARN("failed to update deleted tablet status", K(ret)); + } } else if (OB_FAIL(scheduler->alloc_dag_with_priority(prio, tablet_migration_dag))) { LOG_WARN("failed to alloc tablet migration dag", K(ret)); } else { - if (OB_FAIL(tablet_migration_dag->init(logic_tablet_id.tablet_id_, tablet_handle, dag_net, tablet_group_ctx_))) { + if (OB_FAIL(tablet_migration_dag->init(logic_tablet_id.tablet_id_, tablet_handle, dag_net, tablet_group_ctx_, ObTabletMigrationDag::ObTabletType::DATA_TABLET_TYPE))) { LOG_WARN("failed to init tablet migration migration dag", K(ret), K(logic_tablet_id)); } else if (FALSE_IT(dag_id.init(MYADDR))) { } else if (OB_FAIL(tablet_migration_dag->set_dag_id(dag_id))) { @@ -3007,6 +3050,20 @@ int ObTabletMigrationTask::update_ha_expected_status_( LOG_WARN("failed to update tablet ha expected status", K(ret), K(expected_status), KPC(copy_tablet_ctx_)); } } + if (OB_FAIL(ret)) { + } else if (ls->is_cs_replica()) { + bool is_migrate_data_tablet = false; + ObHATabletGroupCtx *tablet_group_ctx = nullptr; + if (OB_FAIL(dag->check_is_migrate_data_tablet(is_migrate_data_tablet))) { + LOG_WARN("failed to check tablet type", K(ret)); + } else if (!is_migrate_data_tablet) { + // skip sys tablet migration + } else if (OB_FAIL(dag->get_tablet_group_ctx(tablet_group_ctx))) { + LOG_WARN("failed to get tablet group ctx", K(ret), KPC(dag)); + } else if (OB_FAIL(ObHATabletGroupCOConvertCtx::update_deleted_data_tablet_status(tablet_group_ctx, copy_tablet_ctx_->tablet_id_))) { + LOG_WARN("failed to update deleted tablet status", K(ret), KPC(tablet_group_ctx), KPC(copy_tablet_ctx_)); + } + } } return ret; } @@ -3176,6 +3233,9 @@ int ObTabletFinishMigrationTask::update_data_and_expected_status_() if (OB_TABLET_NOT_EXIST == ret) { LOG_INFO("migration tablet maybe deleted, skip it", K(ret), KPC(copy_tablet_ctx_)); ret = OB_SUCCESS; + if (OB_FAIL(update_co_convert_status_for_cs_replica(true /*tablet_is_deleted*/))) { + LOG_WARN("failed update convert status for cs replica", K(ret), KPC_(copy_tablet_ctx)); + } } else { LOG_WARN("failed to update tablet ha expected status", K(ret), K(expected_status), KPC(copy_tablet_ctx_)); } @@ -3197,15 +3257,35 @@ int ObTabletFinishMigrationTask::update_data_and_expected_status_() STORAGE_LOG(ERROR, "fake EN_UPDATE_TABLET_HA_STATUS_FAILED", K(ret)); } } + if (OB_SUCC(ret)) { + ObTabletMigrationDag *tablet_migration_dag = nullptr; + bool is_migrate_data_tablet = false; + if (OB_ISNULL(dag_) || OB_UNLIKELY(ObDagType::DAG_TYPE_TABLET_MIGRATION != dag_->get_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid null or wrong type", K(ret), KPC_(dag)); + } else if (FALSE_IT(tablet_migration_dag = static_cast(dag_))) { + } else if (OB_FAIL(tablet_migration_dag->check_is_migrate_data_tablet(is_migrate_data_tablet))) { + LOG_ERROR("failed to check is migrate data tablet", K(ret)); + } else if (!is_migrate_data_tablet) { + } else if (EN_DATA_TABLET_MIGRATION_DAG_OUT_OF_RETRY) { + ret = EN_DATA_TABLET_MIGRATION_DAG_OUT_OF_RETRY; + LOG_INFO("ERRSIM EN_DATA_TABLET_MIGRATION_DAG_OUT_OF_RETRY", K(ret)); + } + } #endif const ObTabletDataStatus::STATUS data_status = ObTabletDataStatus::COMPLETE; - if (OB_FAIL(ls_->update_tablet_ha_data_status(copy_tablet_ctx_->tablet_id_, data_status))) { + if (FAILEDx(ls_->update_tablet_ha_data_status(copy_tablet_ctx_->tablet_id_, data_status))) { if (OB_TABLET_NOT_EXIST == ret) { LOG_INFO("migration tablet maybe deleted, skip it", K(ret), KPC(copy_tablet_ctx_)); ret = OB_SUCCESS; + if (OB_FAIL(update_co_convert_status_for_cs_replica(true /*tablet_is_deleted*/))) { + LOG_WARN("failed update convert status for cs replica", K(ret), KPC_(copy_tablet_ctx)); + } } else { LOG_WARN("[HA]failed to update tablet ha data status", K(ret), KPC(copy_tablet_ctx_), K(data_status)); } + } else if (OB_FAIL(update_co_convert_status_for_cs_replica(false /*tablet_is_deleted*/))) { + LOG_WARN("failed to schedule convert merge if needed", K(ret), KPC_(copy_tablet_ctx)); } else { LOG_INFO("update tablet ha data status", KPC(copy_tablet_ctx_), K(data_status)); SERVER_EVENT_ADD("storage_ha", "tablet_finish_migration_task", @@ -3220,6 +3300,98 @@ int ObTabletFinishMigrationTask::update_data_and_expected_status_() return ret; } +int ObTabletFinishMigrationTask::prepare_co_convert_ctx(bool &is_migrate_data_tablet, ObHATabletGroupCOConvertCtx *&group_convert_ctx) +{ + int ret = OB_SUCCESS; + ObTabletMigrationDag *tablet_migration_dag = nullptr; + ObHATabletGroupCtx *ctx = nullptr; + is_migrate_data_tablet = false; + group_convert_ctx = nullptr; + + if (OB_ISNULL(dag_) || OB_UNLIKELY(ObDagType::DAG_TYPE_TABLET_MIGRATION != dag_->get_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid null or wrong type", K(ret), KPC_(dag)); + } else if (FALSE_IT(tablet_migration_dag = static_cast(dag_))) { + } else if (OB_FAIL(tablet_migration_dag->check_is_migrate_data_tablet(is_migrate_data_tablet))) { + LOG_WARN("failed to check tablet type", K(ret)); + } else if (!is_migrate_data_tablet) { + // skip sys tablet migration + } else if (OB_FAIL(tablet_migration_dag->get_tablet_group_ctx(ctx))) { + LOG_WARN("failed to get tablet group ctx", K(ret), KPC(tablet_migration_dag)); + } else if (OB_ISNULL(ctx) || OB_UNLIKELY(!ctx->is_cs_replica_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null ctx or invalid type", K(ret), KPC(ctx)); + } else { + group_convert_ctx = static_cast(ctx); + } + return ret; +} + +int ObTabletFinishMigrationTask::update_co_convert_status_for_cs_replica(const bool tablet_is_deleted) { + int ret = OB_SUCCESS; + bool is_migrate_data_tablet = false; + ObHATabletGroupCOConvertCtx *group_convert_ctx = nullptr; + + if (!ls_->is_cs_replica()) { + // skip F/R replica + } else if (OB_FAIL(prepare_co_convert_ctx(is_migrate_data_tablet, group_convert_ctx))) { + LOG_WARN("failed to prepare convert ctx", K(ret)); + } else if (!is_migrate_data_tablet) { + } else if (OB_ISNULL(group_convert_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("convert ctx is null", K(ret)); + } else if (!tablet_is_deleted) { + (void) schedule_convert_co_merge(group_convert_ctx); // ignore ret + (void) group_convert_ctx->inc_finish_migration_cnt(); + } else if (OB_FAIL(group_convert_ctx->set_convert_finsih(copy_tablet_ctx_->tablet_id_))) { + LOG_WARN("failed to set convert finish", K(ret), K_(copy_tablet_ctx)); + } else { + (void) group_convert_ctx->inc_finish_migration_cnt(); + } + return ret; +} + +void ObTabletFinishMigrationTask::schedule_convert_co_merge( + ObHATabletGroupCOConvertCtx *group_convert_ctx) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls_->get_ls_id(); + const ObTabletID &tablet_id = copy_tablet_ctx_->tablet_id_; + ObTablet *tablet = nullptr; + ObTabletHandle tablet_handle; + ObTabletCOConvertCtx *convert_ctx = nullptr; + bool need_convert = false; + + if (OB_FAIL(ls_->get_tablet(tablet_id, tablet_handle))) { + if (OB_TABLET_NOT_EXIST == ret) { + LOG_INFO("tablet maybe deleted, skip it", K(ret), K(ls_id), K(tablet_id)); + ret = OB_SUCCESS; + if (OB_FAIL(group_convert_ctx->set_convert_finsih(tablet_id))) { + LOG_WARN("failed to set convert finish", K(ret), K(tablet_id)); + } + } else { + LOG_WARN("failed to get tablet handle", K(ret), K(ls_id), K(tablet_id)); + } + } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { + } else if (OB_FAIL(ObHATabletGroupCOConvertCtx::check_need_convert(*tablet, need_convert))) { + } else if (need_convert) { + DEBUG_SYNC(BEFROE_UPDATE_MIG_TABLET_CONVERT_CO_PROGRESSING); + LOG_INFO("[CS-Replica] Start schedule co merge dag to switch row to column store", K(ls_id), K(tablet_id)); + // Specific dag net id for co merge dag net to convert row store tablet into columnar store one. + // Use ObDataTabletsCheckCOConvertDag to check the convert result and re-schedule dag net if it failed, with the same dag net id. + ObDagId co_dag_net_id; + if (OB_FAIL(group_convert_ctx->get_co_dag_net_id(tablet_id, co_dag_net_id))) { + LOG_WARN("failed to get convert ctx", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(compaction::ObTenantTabletScheduler::schedule_convert_co_merge_dag_net(ls_id, *tablet, 0 /*retry_times*/, co_dag_net_id))) { + LOG_WARN("failed to schedule convert co merge for cs replica", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(group_convert_ctx->set_convert_progressing(tablet_id))) { + LOG_WARN("failed to set convert progressing", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(group_convert_ctx->set_convert_finsih(tablet_id))) { + LOG_WARN("failed to set convert finish", K(ret), K(tablet_id)); + } +} + /******************ObDataTabletsMigrationDag*********************/ ObDataTabletsMigrationDag::ObDataTabletsMigrationDag() : ObMigrationDag(ObDagType::DAG_TYPE_DATA_TABLETS_MIGRATION), @@ -3463,6 +3635,8 @@ int ObDataTabletsMigrationTask::process() if (FAILEDx(generate_tablet_group_dag_())) { LOG_WARN("failed to generate tablet group dag", K(ret), KPC(ctx_)); + } else if (OB_FAIL(generate_check_co_convert_dag_if_needed())) { + LOG_WARN("failed to generate check convert dag", K(ret), KPC(ctx_)); } } @@ -3588,17 +3762,22 @@ int ObDataTabletsMigrationTask::build_tablet_group_info_() ObArray tablet_group_id_array; ObArray tablet_id_array; hash::ObHashSet remove_tablet_set; - + ObLS *ls = nullptr; DEBUG_SYNC(BEFORE_BUILD_TABLET_GROUP_INFO); if (!is_inited_) { ret = OB_NOT_INIT; LOG_WARN("data tablets migration task do not init", K(ret)); + } else if (OB_ISNULL(ls = ls_handle_.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is nullptr", K(ret), K_(ls_handle)); } else { ctx_->tablet_group_mgr_.reuse(); const hash::ObHashMap &tablet_simple_info_map = ctx_->tablet_simple_info_map_; - + const ObHATabletGroupCtx::TabletGroupCtxType type = ls->is_cs_replica() + ? ObHATabletGroupCtx::TabletGroupCtxType::CS_REPLICA_TYPE + : ObHATabletGroupCtx::TabletGroupCtxType::NORMAL_TYPE; for (int64_t i = 0; OB_SUCC(ret) && i < ctx_->data_tablet_id_array_.count(); ++i) { tablet_simple_info.reset(); const ObLogicTabletID &logic_tablet_id = ctx_->data_tablet_id_array_.at(i); @@ -3618,7 +3797,7 @@ int ObDataTabletsMigrationTask::build_tablet_group_info_() && ObCopyTabletStatus::TABLET_EXIST == tablet_simple_info.status_) { if (OB_FAIL(tablet_group_id_array.push_back(logic_tablet_id))) { LOG_WARN("failed to push tablet id into array", K(ret), K(logic_tablet_id)); - } else if (OB_FAIL(ctx_->tablet_group_mgr_.build_tablet_group_ctx(tablet_group_id_array))) { + } else if (OB_FAIL(ctx_->tablet_group_mgr_.build_tablet_group_ctx(tablet_group_id_array, type))) { LOG_WARN("failed to build tablet group ctx", K(ret), KPC(ctx_)); } else { LOG_INFO("succeed build tablet group ctx", K(tablet_group_id_array)); @@ -3680,7 +3859,7 @@ int ObDataTabletsMigrationTask::build_tablet_group_info_() } if (OB_SUCC(ret)) { - if (OB_FAIL(ctx_->tablet_group_mgr_.build_tablet_group_ctx(tablet_group_id_array))) { + if (OB_FAIL(ctx_->tablet_group_mgr_.build_tablet_group_ctx(tablet_group_id_array, type))) { LOG_WARN("failed to build tablet group ctx", K(ret), K(tablet_group_id_array), KPC(ctx_)); } else { LOG_INFO("succeed build tablet group ctx", K(tablet_group_id_array), "count", tablet_group_id_array.count()); @@ -3761,6 +3940,75 @@ int ObDataTabletsMigrationTask::generate_tablet_group_dag_() return ret; } +int ObDataTabletsMigrationTask::generate_check_co_convert_dag_if_needed() { + int ret = OB_SUCCESS; + ObLS *ls = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("data tablets migration task do not init", K(ret)); + } else if (OB_ISNULL(ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is nullptr", K(ret)); + } else if (OB_ISNULL(ls = ls_handle_.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is nullptr", K(ret), K_(ls_handle)); + } else if (!ls->is_cs_replica()) { + } else if (OB_FAIL(inner_generate_check_co_convert_dag(ls))) { + LOG_WARN("failed to generate check convert dag", K(ret)); + } else { + LOG_INFO("[CS-Replica] Finish generate check convert dag", K(ret), KPC(ls)); + SERVER_EVENT_ADD("storage_ha", "generage_check_co_convert_dag", + "tenant_id", MTL_ID(), + "ls_id", ctx_->arg_.ls_id_.id(), + "src", ctx_->arg_.src_.get_server(), + "dst", ctx_->arg_.dst_.get_server(), + "task_id", ctx_->task_id_); + } + return ret; +} + +int ObDataTabletsMigrationTask::inner_generate_check_co_convert_dag(ObLS *ls) +{ + int ret = OB_SUCCESS; + ObTenantDagScheduler *scheduler = MTL(ObTenantDagScheduler*); + ObDataTabletsCheckCOConvertDag *tablet_check_convert_dag = nullptr; + ObIDagNet *dag_net = nullptr; + ObMigrationDagNet *migration_dag_net = nullptr; + + if (OB_ISNULL(ls) || OB_ISNULL(dag_) || OB_ISNULL(dag_net = dag_->get_dag_net())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid nullptr", K(ret), KP(ls), KP_(dag), KP(dag_net)); + } else if (OB_FAIL(scheduler->alloc_dag_with_priority(dag_->get_priority(), tablet_check_convert_dag))) { + LOG_WARN("failed to alloc dag", K(ret)); + } else if (OB_UNLIKELY(ObDagNetType::DAG_NET_TYPE_MIGRATION != dag_net->get_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dag net type is unexpected", K(ret), KPC(dag_net)); + } else if (FALSE_IT(migration_dag_net = static_cast(dag_net))) { + } else if (OB_FAIL(tablet_check_convert_dag->init(migration_dag_net->get_migration_ctx(), ls))) { + LOG_WARN("failed to init tablet check convert dag", K(ret), "ls_id", ls->get_ls_id()); + } else if (OB_FAIL(dag_net->add_dag_into_dag_net(*(tablet_check_convert_dag)))) { + LOG_WARN("failed to add dag into dag net", K(ret)); + } else if (OB_FAIL(dag_->add_child_without_inheritance(*tablet_check_convert_dag))) { + LOG_WARN("failed to add child dag", K(ret), KPC(tablet_check_convert_dag), KPC_(dag)); + } else if (OB_FAIL(tablet_check_convert_dag->create_first_task())) { + LOG_WARN("failed to create first task", K(ret)); + } else if (OB_FAIL(tablet_check_convert_dag->add_child_without_inheritance(*finish_dag_))) { + LOG_WARN("failed to add finish dag as child", K(ret), KPC(tablet_check_convert_dag), KPC_(finish_dag)); + } else if (OB_FAIL(scheduler->add_dag(tablet_check_convert_dag))) { + LOG_WARN("failed to add tablet check convert dag", K(ret), KPC(tablet_check_convert_dag)); + if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { + LOG_WARN("Fail to add dag", K(ret), "ls_id", ls->get_ls_id()); + ret = OB_EAGAIN; + } + } + if (OB_FAIL(ret) && OB_NOT_NULL(tablet_check_convert_dag)) { + scheduler->free_dag(*tablet_check_convert_dag); + tablet_check_convert_dag = nullptr; + } + + return ret; +} + int ObDataTabletsMigrationTask::try_remove_unneeded_tablets_() { int ret = OB_SUCCESS; @@ -4286,6 +4534,10 @@ int ObTabletGroupMigrationTask::generate_tablet_migration_dag_() } else if (OB_FAIL(ls->ha_get_tablet(logic_tablet_id.tablet_id_, tablet_handle))) { if (OB_TABLET_NOT_EXIST == ret) { ret = OB_SUCCESS; + if (ls->is_cs_replica() + && OB_FAIL(ObHATabletGroupCOConvertCtx::update_deleted_data_tablet_status(tablet_group_ctx_, logic_tablet_id.tablet_id_))) { + LOG_WARN("failed to update deleted tablet status", K(ret)); + } } else { LOG_WARN("failed to get tablet", K(ret), K(logic_tablet_id)); } @@ -4298,9 +4550,13 @@ int ObTabletGroupMigrationTask::generate_tablet_migration_dag_() K(ret), K(logic_tablet_id), KPC(tablet)); } else if (logic_tablet_id.transfer_seq_ < tablet->get_tablet_meta().transfer_info_.transfer_seq_) { LOG_INFO("local tablet transfer seq is bigger than remote tablet, no need copy", K(logic_tablet_id), KPC(tablet)); + if (ls->is_cs_replica() + && OB_FAIL(ObHATabletGroupCOConvertCtx::update_deleted_data_tablet_status(tablet_group_ctx_, logic_tablet_id.tablet_id_))) { + LOG_WARN("failed to update deleted tablet status", K(ret)); + } } else if (OB_FAIL(scheduler->alloc_dag_with_priority(prio, tablet_migration_dag))) { LOG_WARN("failed to alloc tablet migration dag ", K(ret)); - } else if (OB_FAIL(tablet_migration_dag->init(logic_tablet_id.tablet_id_, tablet_handle, dag_net, tablet_group_ctx_))) { + } else if (OB_FAIL(tablet_migration_dag->init(logic_tablet_id.tablet_id_, tablet_handle, dag_net, tablet_group_ctx_, ObTabletMigrationDag::ObTabletType::DATA_TABLET_TYPE))) { LOG_WARN("failed to init tablet migration migration dag", K(ret), K(*ctx_)); } else if (OB_FAIL(dag_net->add_dag_into_dag_net(*tablet_migration_dag))) { LOG_WARN("failed to add dag into dag net", K(ret), K(*ctx_)); diff --git a/src/storage/high_availability/ob_ls_migration.h b/src/storage/high_availability/ob_ls_migration.h index 578fffad0..731c55025 100644 --- a/src/storage/high_availability/ob_ls_migration.h +++ b/src/storage/high_availability/ob_ls_migration.h @@ -305,6 +305,12 @@ private: class ObTabletMigrationDag : public ObMigrationDag { +public: + enum class ObTabletType { + SYS_TABLET_TYPE = 0, // sys tablet is unnecessary to processed in cs replica + DATA_TABLET_TYPE = 1, // only data tablet has ObHATabletGroupCOConvertCtx + MAX_TYPE + }; public: ObTabletMigrationDag(); virtual ~ObTabletMigrationDag(); @@ -319,15 +325,19 @@ public: const common::ObTabletID &tablet_id, ObTabletHandle &tablet_handle, share::ObIDagNet *dag_net, - ObHATabletGroupCtx *tablet_group_ctx = nullptr); + ObHATabletGroupCtx *tablet_group_ctx = nullptr, + ObTabletType tablet_type = ObTabletType::SYS_TABLET_TYPE); + int get_tablet_group_ctx(ObHATabletGroupCtx *&tablet_group_ctx); + int check_is_migrate_data_tablet(bool &is_migrate_data_tablet); int get_ls(ObLS *&ls); - INHERIT_TO_STRING_KV("ObIMigrationDag", ObMigrationDag, KP(this), K(copy_tablet_ctx_)); + INHERIT_TO_STRING_KV("ObIMigrationDag", ObMigrationDag, KP(this), K(copy_tablet_ctx_), K(tablet_type_)); protected: bool is_inited_; ObLSHandle ls_handle_; ObCopyTabletCtx copy_tablet_ctx_; ObHATabletGroupCtx *tablet_group_ctx_; + ObTabletType tablet_type_; DISALLOW_COPY_AND_ASSIGN(ObTabletMigrationDag); }; @@ -395,6 +405,7 @@ private: DISALLOW_COPY_AND_ASSIGN(ObTabletMigrationTask); }; +class ObHATabletGroupCOConvertCtx; class ObTabletFinishMigrationTask final : public share::ObITask { public: @@ -406,6 +417,10 @@ public: VIRTUAL_TO_STRING_KV(K("ObTabletFinishMigrationTask"), KP(this), KPC(ha_dag_net_ctx_), KPC(copy_tablet_ctx_), KPC(ls_)); private: int update_data_and_expected_status_(); + // handle cs replica + int prepare_co_convert_ctx(bool &is_migrate_data_tablet, ObHATabletGroupCOConvertCtx *&group_convert_ctx); + int update_co_convert_status_for_cs_replica(const bool tablet_is_deleted); + void schedule_convert_co_merge(ObHATabletGroupCOConvertCtx *group_convert_ctx); private: bool is_inited_; int64_t task_gen_time_; @@ -451,6 +466,8 @@ private: common::ObIArray &tablet_group_dag_array); int build_tablet_group_info_(); int generate_tablet_group_dag_(); + int generate_check_co_convert_dag_if_needed(); + int inner_generate_check_co_convert_dag(ObLS *ls); int try_remove_unneeded_tablets_(); int try_offline_ls_(); int record_server_event_(); diff --git a/src/storage/high_availability/ob_ls_remove_member_handler.cpp b/src/storage/high_availability/ob_ls_remove_member_handler.cpp index a102abac1..e7ba30f21 100644 --- a/src/storage/high_availability/ob_ls_remove_member_handler.cpp +++ b/src/storage/high_availability/ob_ls_remove_member_handler.cpp @@ -326,7 +326,6 @@ int ObLSRemoveMemberHandler::check_task_exist( ret = OB_ERR_UNEXPECTED; LOG_WARN("tenant dag scheduler should not be NULL", K(ret), KP(scheduler)); } else { - ObMember mock_member(MYADDR, OB_INVALID_TIMESTAMP); mock_remove_member_arg.tenant_id_ = MTL_ID(); mock_remove_member_arg.ls_id_ = ls_->get_ls_id(); mock_remove_member_arg.task_id_ = task_id; @@ -334,8 +333,8 @@ int ObLSRemoveMemberHandler::check_task_exist( mock_remove_member_arg.type_ = ObLSChangeMemberType::LS_REMOVE_MEMBER; param.arg_ = mock_remove_member_arg; - if (OB_FAIL(mock_remove_member_arg.remove_member_.set_member(mock_member))) { - LOG_WARN("failed to set member", K(ret), K(mock_member), K(mock_remove_member_arg)); + if (OB_FAIL(mock_remove_member_arg.remove_member_.init(MYADDR, OB_INVALID_TIMESTAMP, REPLICA_TYPE_FULL))) { + LOG_WARN("failed to init remove_member_", K(ret), K(MYADDR), K(mock_remove_member_arg)); } else if (OB_FAIL(scheduler->create_dag(¶m, exist_dag))) { LOG_WARN("failed to create ls remove member dag", K(ret)); } else if (OB_FAIL(scheduler->check_dag_exist(exist_dag, is_exist))) { diff --git a/src/storage/high_availability/ob_rebuild_service.cpp b/src/storage/high_availability/ob_rebuild_service.cpp index f5aaee998..1062db6b5 100644 --- a/src/storage/high_availability/ob_rebuild_service.cpp +++ b/src/storage/high_availability/ob_rebuild_service.cpp @@ -1092,8 +1092,10 @@ int ObLSRebuildMgr::generate_rebuild_task_() DEBUG_SYNC(BEFOR_EXEC_REBUILD_TASK); ObTaskId task_id; task_id.init(GCONF.self_addr_); - ObReplicaMember dst_replica_member(GCONF.self_addr_, timestamp); - ObReplicaMember src_replica_member(GCONF.self_addr_, timestamp); + ObReplicaMember dst_replica_member(GCONF.self_addr_, timestamp, + REPLICA_TYPE_FULL/*dummy_replica_type*/); + ObReplicaMember src_replica_member(GCONF.self_addr_, timestamp, + REPLICA_TYPE_FULL/*dummy_replica_type*/); ObMigrationOpArg arg; arg.cluster_id_ = GCONF.cluster_id; arg.data_src_ = src_replica_member; diff --git a/src/storage/high_availability/ob_storage_ha_dag.cpp b/src/storage/high_availability/ob_storage_ha_dag.cpp index 2a86eeb42..9a86eb1b0 100644 --- a/src/storage/high_availability/ob_storage_ha_dag.cpp +++ b/src/storage/high_availability/ob_storage_ha_dag.cpp @@ -18,6 +18,7 @@ #include "observer/ob_server_event_history_table_operator.h" #include "storage/tx_storage/ob_ls_service.h" #include "storage/tablet/ob_tablet.h" +#include "storage/high_availability/ob_cs_replica_migration.h" namespace oceanbase { @@ -476,11 +477,12 @@ int ObStorageHADagUtils::check_self_is_valid_member( } /******************ObHATabletGroupCtx*********************/ -ObHATabletGroupCtx::ObHATabletGroupCtx() +ObHATabletGroupCtx::ObHATabletGroupCtx(const TabletGroupCtxType type) : is_inited_(false), lock_(), tablet_id_array_(), - index_(0) + index_(0), + type_(type) { } @@ -499,6 +501,8 @@ int ObHATabletGroupCtx::init(const common::ObIArray &tablet_id_ LOG_WARN("init ha tablet group ctx get invalid argument", K(ret), K(tablet_id_array)); } else if (OB_FAIL(tablet_id_array_.assign(tablet_id_array))) { LOG_WARN("failed to assign tablet id array", K(ret), K(tablet_id_array)); + } else if (OB_FAIL(inner_init())) { + LOG_WARN("failed to inner init", K(ret)); } else { index_ = 0; is_inited_ = true; @@ -546,9 +550,19 @@ int ObHATabletGroupCtx::get_all_tablet_ids(ObIArray &tablet_id_ return ret; } +bool ObHATabletGroupCtx::is_cs_replica_ctx() const { + common::SpinRLockGuard guard(lock_); + return TabletGroupCtxType::CS_REPLICA_TYPE == type_; +} + void ObHATabletGroupCtx::reuse() { common::SpinWLockGuard guard(lock_); + inner_reuse(); +} + +void ObHATabletGroupCtx::inner_reuse() +{ tablet_id_array_.reuse(); index_ = 0; is_inited_ = false; @@ -608,10 +622,10 @@ int ObHATabletGroupMgr::get_next_tablet_group_ctx( } int ObHATabletGroupMgr::build_tablet_group_ctx( - const ObIArray &tablet_id_array) + const ObIArray &tablet_id_array, + const ObHATabletGroupCtx::TabletGroupCtxType type /*=NORMAL_TYPE*/) { int ret = OB_SUCCESS; - void *buf = nullptr; ObHATabletGroupCtx *tablet_group_ctx = nullptr; if (!is_inited_) { @@ -622,10 +636,11 @@ int ObHATabletGroupMgr::build_tablet_group_ctx( LOG_WARN("build tablet group ctx get invalid argument", K(ret), K(tablet_id_array)); } else { common::SpinWLockGuard guard(lock_); - if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObHATabletGroupCtx)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("failed to alloc memory", K(ret), KP(buf)); - } else if (FALSE_IT(tablet_group_ctx = new (buf) ObHATabletGroupCtx())) { + if (OB_FAIL(alloc_and_new_tablet_group_ctx(type, tablet_group_ctx))) { + LOG_WARN("failed to alloc and new tablet group ctx", K(ret)); + } else if (OB_ISNULL(tablet_group_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet group ctx should not be NULL", K(ret), KP(tablet_group_ctx)); } else if (OB_FAIL(tablet_group_ctx->init(tablet_id_array))) { LOG_WARN("failed to init tablet group ctx", K(ret), K(tablet_id_array)); } else if (OB_FAIL(tablet_group_ctx_array_.push_back(tablet_group_ctx))) { @@ -641,6 +656,38 @@ int ObHATabletGroupMgr::build_tablet_group_ctx( return ret; } +int ObHATabletGroupMgr::alloc_and_new_tablet_group_ctx( + const ObHATabletGroupCtx::TabletGroupCtxType type, + ObHATabletGroupCtx *&tablet_group_ctx) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + if (ObHATabletGroupCtx::TabletGroupCtxType::NORMAL_TYPE == type) { + if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObHATabletGroupCtx)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret), KP(buf)); + } else { + tablet_group_ctx = new (buf) ObHATabletGroupCtx(); + } + } else if (ObHATabletGroupCtx::TabletGroupCtxType::CS_REPLICA_TYPE == type) { + if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObHATabletGroupCOConvertCtx)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc memory", K(ret), KP(buf)); + } else { + tablet_group_ctx = new (buf) ObHATabletGroupCOConvertCtx(); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid ctx type", K(ret), K(type)); + } + + if (OB_FAIL(ret) && OB_NOT_NULL(tablet_group_ctx)) { + tablet_group_ctx->~ObHATabletGroupCtx(); + tablet_group_ctx = nullptr; + } + return ret; +} + void ObHATabletGroupMgr::reuse() { common::SpinWLockGuard guard(lock_); @@ -655,6 +702,34 @@ void ObHATabletGroupMgr::reuse() index_ = 0; } +int64_t ObHATabletGroupMgr::get_tablet_group_ctx_count() const +{ + common::SpinRLockGuard guard(lock_); + return tablet_group_ctx_array_.count(); +} + +int ObHATabletGroupMgr::get_tablet_group_ctx( + const int64_t idx, + ObHATabletGroupCtx *&tablet_group_ctx) +{ + int ret = OB_SUCCESS; + tablet_group_ctx = nullptr; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ha tablet group mgr do not init", K(ret)); + } else { + common::SpinRLockGuard guard(lock_); + if (OB_UNLIKELY(idx < 0 || idx >= tablet_group_ctx_array_.count())) { + ret = OB_INDEX_OUT_OF_RANGE; + } else if (OB_ISNULL(tablet_group_ctx = tablet_group_ctx_array_.at(idx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet group ctx is null", K(ret), K(idx), K_(tablet_group_ctx_array)); + } + } + return ret; +} + /******************ObStorageHATaskUtils*********************/ int ObStorageHATaskUtils::check_need_copy_sstable( const ObMigrationSSTableParam ¶m, diff --git a/src/storage/high_availability/ob_storage_ha_dag.h b/src/storage/high_availability/ob_storage_ha_dag.h index 75397dd3f..5af011892 100644 --- a/src/storage/high_availability/ob_storage_ha_dag.h +++ b/src/storage/high_availability/ob_storage_ha_dag.h @@ -133,19 +133,30 @@ public: class ObHATabletGroupCtx { public: - ObHATabletGroupCtx(); + enum class TabletGroupCtxType + { + NORMAL_TYPE = 0, + CS_REPLICA_TYPE = 1, + MAX_TYPE + }; +public: + ObHATabletGroupCtx(const TabletGroupCtxType type = TabletGroupCtxType::NORMAL_TYPE); virtual ~ObHATabletGroupCtx(); int init(const common::ObIArray &tablet_id_array); int get_next_tablet_id(ObLogicTabletID &logic_tablet_id); int get_all_tablet_ids(common::ObIArray &tablet_id); - void reuse(); - + bool is_cs_replica_ctx() const; +public: + virtual void reuse(); + virtual void inner_reuse(); + virtual int inner_init() { return OB_SUCCESS; } TO_STRING_KV(K_(tablet_id_array), K_(index)); -private: +protected: bool is_inited_; common::SpinRWLock lock_; ObArray tablet_id_array_; int64_t index_; + TabletGroupCtxType type_; DISALLOW_COPY_AND_ASSIGN(ObHATabletGroupCtx); }; @@ -158,8 +169,14 @@ public: int get_next_tablet_group_ctx( ObHATabletGroupCtx *&tablet_group_ctx); int build_tablet_group_ctx( - const common::ObIArray &tablet_id_array); + const common::ObIArray &tablet_id_array, + const ObHATabletGroupCtx::TabletGroupCtxType type = ObHATabletGroupCtx::TabletGroupCtxType::NORMAL_TYPE); + int alloc_and_new_tablet_group_ctx( + const ObHATabletGroupCtx::TabletGroupCtxType type, + ObHATabletGroupCtx *&tablet_group_ctx); void reuse(); + int64_t get_tablet_group_ctx_count() const; + int get_tablet_group_ctx(const int64_t idx, ObHATabletGroupCtx *&tablet_group_ctx); TO_STRING_KV(K_(tablet_group_ctx_array), K_(index)); private: diff --git a/src/storage/high_availability/ob_storage_ha_src_provider.cpp b/src/storage/high_availability/ob_storage_ha_src_provider.cpp index a602ca234..f67cc19a2 100644 --- a/src/storage/high_availability/ob_storage_ha_src_provider.cpp +++ b/src/storage/high_availability/ob_storage_ha_src_provider.cpp @@ -307,7 +307,7 @@ int ObStorageHASrcProvider::get_replica_addr_list( } else { if (common::ObReplicaType::REPLICA_TYPE_FULL == dst.get_replica_type()) { need_learner_list = false; - } else if (common::ObReplicaType::REPLICA_TYPE_READONLY == dst.get_replica_type()) { + } else if (ObReplicaTypeCheck::is_non_paxos_replica(dst.get_replica_type())) { need_learner_list = true; } else { ret = OB_ERR_UNEXPECTED; @@ -379,18 +379,28 @@ int ObStorageHASrcProvider::check_replica_type_( if (!addr.is_valid() || !dst.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument!", K(ret), K(addr), K(dst)); - } else if (learner_list.is_valid() && learner_list.contains(addr)) { // src is R - if (common::ObReplicaType::REPLICA_TYPE_FULL == dst.get_replica_type()) { // dst is F - is_replica_type_valid = false; - } else if (common::ObReplicaType::REPLICA_TYPE_READONLY == dst.get_replica_type()) { - is_replica_type_valid = true; + } else if (learner_list.is_valid() && learner_list.contains(addr)) { + // src is R/C + ObMember src; + if (OB_FAIL(learner_list.get_learner_by_addr(addr, src))) { + LOG_WARN("failed to get learner by addr", KR(ret), K(addr)); + } else if (src.is_columnstore()) { + // src is C, dst can only be C as well + is_replica_type_valid = REPLICA_TYPE_COLUMNSTORE == dst.get_replica_type(); } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected dst replica type", K(ret), K(dst), K(learner_list)); + // src is R, dst can be non-paxos replica-type (R or C) + if (common::ObReplicaType::REPLICA_TYPE_FULL == dst.get_replica_type()) { // dst is F + is_replica_type_valid = false; + } else if (ObReplicaTypeCheck::is_non_paxos_replica(dst.get_replica_type())) { + is_replica_type_valid = true; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected dst replica type", K(ret), K(dst), K(learner_list)); + } } - } else { // src is F - if (common::ObReplicaType::REPLICA_TYPE_FULL == dst.get_replica_type() - || common::ObReplicaType::REPLICA_TYPE_READONLY == dst.get_replica_type()) { + } else { + // src is F, dst can be all replica-type (F/R/C) + if (ObReplicaTypeCheck::is_replica_type_valid(dst.get_replica_type())) { is_replica_type_valid = true; } else { ret = OB_ERR_UNEXPECTED; @@ -480,7 +490,7 @@ int ObStorageHASrcProvider::get_palf_parent_addr_(const uint64_t tenant_id, cons if (OB_FAIL(member_helper_->get_ls_leader(tenant_id, ls_id, parent_addr))) { LOG_WARN("failed to get leader addr", K(ret), K(tenant_id), K(ls_id)); } - } else if (common::ObReplicaType::REPLICA_TYPE_READONLY == replica_type) { + } else if (ObReplicaTypeCheck::is_non_paxos_replica(replica_type)) { if (OB_FAIL(ls->get_log_handler()->get_parent(parent_addr))) { LOG_WARN("failed to get parent addr", K(ret), K(tenant_id), K(ls_id)); } diff --git a/src/storage/high_availability/ob_storage_ha_utils.cpp b/src/storage/high_availability/ob_storage_ha_utils.cpp index 0c8b8501a..f16ae3408 100644 --- a/src/storage/high_availability/ob_storage_ha_utils.cpp +++ b/src/storage/high_availability/ob_storage_ha_utils.cpp @@ -210,6 +210,8 @@ int ObStorageHAUtils::check_tablet_replica_checksum_(const uint64_t tenant_id, c LOG_WARN("failed to batch get replica checksum item", K(ret), K(tenant_id), K(pairs), K(compaction_scn)); } else { ObArray filter_items; + ObLSColumnReplicaCache ls_cs_replica_cache; + ObTabletDataChecksumChecker data_checksum_checker; for (int64_t i = 0; OB_SUCC(ret) && i < items.count(); ++i) { const ObTabletReplicaChecksumItem &item = items.at(i); if (item.compaction_scn_ == compaction_scn) { @@ -218,12 +220,31 @@ int ObStorageHAUtils::check_tablet_replica_checksum_(const uint64_t tenant_id, c } } } + + if (FAILEDx(ls_cs_replica_cache.init())) { + LOG_WARN("failed to init ls column replica cache", K(ret)); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < filter_items.count(); ++i) { + const ObTabletReplicaChecksumItem &item = filter_items.at(i); + if (OB_FAIL(ls_cs_replica_cache.update(item.ls_id_, item.server_))) { + LOG_WARN("fail to update ls replica status", K(ret), K(item)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < filter_items.count(); ++i) { const ObTabletReplicaChecksumItem &first_item = filter_items.at(0); const ObTabletReplicaChecksumItem &item = filter_items.at(i); - if (OB_FAIL(first_item.verify_checksum(item))) { - LOG_ERROR("failed to verify checksum", K(ret), K(tenant_id), K(tablet_id), - K(ls_id), K(compaction_scn), K(first_item), K(item), K(filter_items)); + const ObLSReplicaUniItem ls_item(item.ls_id_, item.server_); + bool is_cs_replica = false; + if (OB_FAIL(ls_cs_replica_cache.check_is_cs_replica(ls_item, is_cs_replica))) { + LOG_WARN("fail to check is cs replica", K(ret), K(ls_item), K(ls_cs_replica_cache)); + } else if (OB_FAIL(data_checksum_checker.check_data_checksum(item, is_cs_replica))) { + LOG_ERROR("failed to verify data checksum", K(ret), K(tenant_id), K(tablet_id), + K(ls_id), K(compaction_scn), K(item), K(filter_items), K(is_cs_replica), K(ls_cs_replica_cache)); + } else if (OB_FAIL(item.verify_column_checksum(first_item))) { + LOG_ERROR("failed to verify column checksum", K(ret), K(tenant_id), K(tablet_id), + K(ls_id), K(compaction_scn), K(first_item), K(item), K(filter_items), K(is_cs_replica), K(ls_cs_replica_cache)); } } } diff --git a/src/storage/high_availability/ob_tablet_group_restore.cpp b/src/storage/high_availability/ob_tablet_group_restore.cpp index fd5c199f2..dbc8d40dd 100644 --- a/src/storage/high_availability/ob_tablet_group_restore.cpp +++ b/src/storage/high_availability/ob_tablet_group_restore.cpp @@ -136,7 +136,7 @@ ObTabletRestoreCtx::ObTabletRestoreCtx() action_(ObTabletRestoreAction::MAX), meta_index_store_(nullptr), second_meta_index_store_(nullptr), - replica_type_(ObReplicaType::REPLICA_TYPE_MAX), + replica_type_(ObReplicaType::REPLICA_TYPE_INVALID), ha_table_info_mgr_(nullptr), need_check_seq_(false), ls_rebuild_seq_(-1), @@ -174,7 +174,7 @@ void ObTabletRestoreCtx::reset() action_ = ObTabletRestoreAction::MAX; meta_index_store_ = nullptr; second_meta_index_store_ = nullptr; - replica_type_ = ObReplicaType::REPLICA_TYPE_MAX; + replica_type_ = ObReplicaType::REPLICA_TYPE_INVALID; need_check_seq_ = false; ls_rebuild_seq_ = -1; status_ = ObCopyTabletStatus::MAX_STATUS; diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index 8daaa75f1..05f9460c2 100755 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -84,6 +84,8 @@ using namespace rootserver; namespace storage { +ERRSIM_POINT_DEF(EN_LS_NOT_SEE_CS_REPLICA); + using namespace checkpoint; using namespace mds; @@ -123,6 +125,7 @@ int ObLS::init(const share::ObLSID &ls_id, const ObMigrationStatus &migration_status, const ObLSRestoreStatus &restore_status, const SCN &create_scn, + const ObLSStoreFormat &store_format, observer::ObIMetaReport *reporter) { int ret = OB_SUCCESS; @@ -149,7 +152,8 @@ int ObLS::init(const share::ObLSID &ls_id, ls_id, migration_status, restore_status, - create_scn))) { + create_scn, + store_format))) { LOG_WARN("failed to init ls meta", K(ret), K(tenant_id), K(ls_id)); } else { rs_reporter_ = reporter; @@ -477,6 +481,42 @@ bool ObLS::is_restore_first_step() const return bool_ret; } +bool ObLS::is_cs_replica() const +{ + return ls_meta_.get_store_format().is_columnstore(); +} + +int ObLS::check_has_cs_replica(bool &has_cs_replica) const +{ + int ret = OB_SUCCESS; + has_cs_replica = false; + ObMemberList member_list; + GlobalLearnerList learner_list; + int64_t paxos_replica_number = 0; + if (OB_FAIL(get_paxos_member_list_and_learner_list(member_list, paxos_replica_number, learner_list))) { + LOG_WARN("fail to get member list and learner list", K(ret), K(ls_meta_.ls_id_)); + } else { + for (int64_t i = 0; i < learner_list.get_member_number(); i++) { + const ObMember &learner = learner_list.get_learner(i); + if (learner.is_columnstore()) { + has_cs_replica = true; + break; + } + } + } + +#ifdef ERRSIM + if (OB_SUCC(ret)) { + if (EN_LS_NOT_SEE_CS_REPLICA) { + has_cs_replica = false; + LOG_INFO("ERRSIM EN_LS_NOT_SEE_CS_REPLICA", K(ret), K(has_cs_replica)); + } + } +#endif + + return ret; +} + int ObLS::start() { int ret = OB_SUCCESS; diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 621b9a605..3f93d1c64 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -231,6 +231,7 @@ public: const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const share::SCN &create_scn, + const ObLSStoreFormat &store_format, observer::ObIMetaReport *reporter); // I am ready to work now. int start(); @@ -309,6 +310,10 @@ public: bool is_in_gc(); bool is_restore_first_step() const; bool is_clone_first_step() const; + // is current ls replica a column store replica + bool is_cs_replica() const; + // is current ls replica set contains a column store replica + int check_has_cs_replica(bool &has_cs_replica) const; // for rebuild // remove inner tablet, the memtable and minor sstable of data tablet, disable replay // int prepare_rebuild(); @@ -497,6 +502,7 @@ public: DELEGATE_WITH_RET(ls_meta_, set_rebuild_info, int); DELEGATE_WITH_RET(ls_meta_, get_rebuild_info, int); DELEGATE_WITH_RET(ls_meta_, get_create_type, int); + DELEGATE_WITH_RET(ls_meta_, get_store_format, ObLSStoreFormat); // get ls_meta_package and sorted tablet_metas for backup. tablet gc is forbidden meanwhile. diff --git a/src/storage/ls/ob_ls_meta.cpp b/src/storage/ls/ob_ls_meta.cpp index 77190b6bb..5b24cedcd 100644 --- a/src/storage/ls/ob_ls_meta.cpp +++ b/src/storage/ls/ob_ls_meta.cpp @@ -91,7 +91,7 @@ ObLSMeta::ObLSMeta(const ObLSMeta &ls_meta) rebuild_info_(ls_meta.rebuild_info_), transfer_meta_info_(ls_meta.transfer_meta_info_), major_mv_merge_info_(ls_meta.major_mv_merge_info_), - store_format_() + store_format_(ls_meta.store_format_) { all_id_meta_.update_all_id_meta(ls_meta.all_id_meta_); } @@ -289,7 +289,8 @@ bool ObLSMeta::is_valid() const && OB_MIGRATION_STATUS_MAX != migration_status_ && ObGCHandler::is_valid_ls_gc_state(gc_state_) && restore_status_.is_valid() - && rebuild_seq_ >= 0; + && rebuild_seq_ >= 0 + && store_format_.is_valid(); } int64_t ObLSMeta::get_rebuild_seq() const @@ -708,7 +709,8 @@ int ObLSMeta::init( const share::ObLSID &ls_id, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, - const SCN &create_scn) + const SCN &create_scn, + const ObLSStoreFormat &store_format) { int ret = OB_SUCCESS; if (OB_INVALID_ID == tenant_id || !ls_id.is_valid() @@ -728,6 +730,7 @@ int ObLSMeta::init( gc_state_ = LSGCState::NORMAL; restore_status_ = restore_status; transfer_scn_ = SCN::min_scn(); + store_format_ = store_format; } return ret; } @@ -891,6 +894,11 @@ int ObLSMeta::check_ls_need_online(bool &need_online) const return ret; } +ObLSStoreFormat ObLSMeta::get_store_format() const +{ + return store_format_; +} + ObLSMeta::ObReentrantWLockGuard::ObReentrantWLockGuard(ObLatch &lock, const bool try_lock, const int64_t warn_threshold) diff --git a/src/storage/ls/ob_ls_meta.h b/src/storage/ls/ob_ls_meta.h index d2f96137b..14e1e1203 100644 --- a/src/storage/ls/ob_ls_meta.h +++ b/src/storage/ls/ob_ls_meta.h @@ -58,7 +58,8 @@ public: const share::ObLSID &ls_id, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, - const int64_t create_scn); + const int64_t create_scn, + const ObLSStoreFormat &store_format); void reset(); bool is_valid() const; int set_start_work_state(); @@ -112,13 +113,15 @@ public: int get_rebuild_info(ObLSRebuildInfo &rebuild_info) const; int get_create_type(int64_t &create_type) const; int check_ls_need_online(bool &need_online) const; + ObLSStoreFormat get_store_format() const; int init( const uint64_t tenant_id, const share::ObLSID &ls_id, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, - const share::SCN &create_scn); + const share::SCN &create_scn, + const ObLSStoreFormat &store_format); ObReplicaType get_replica_type() const { return unused_replica_type_; } @@ -206,7 +209,7 @@ private: ObLSRebuildInfo rebuild_info_; ObLSTransferMetaInfo transfer_meta_info_; //transfer_dml_ctrl_42x # placeholder ObMajorMVMergeInfo major_mv_merge_info_; - common::ObLSStoreFormat store_format_; //not used, only as placeholder + common::ObLSStoreFormat store_format_; // set on initialization and then remain unchanged }; } // namespace storage diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 5b2b0e609..25f0fffb1 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -1999,6 +1999,9 @@ int ObLSTabletService::create_tablet( ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); ObTransService *tx_svr = MTL(ObTransService*); const ObTabletMapKey key(ls_id, tablet_id); + const bool need_generate_cs_replica_cg_array = ls_->is_cs_replica() + && create_tablet_schema.is_row_store() + && create_tablet_schema.is_user_data_table(); ObTablet *tablet = nullptr; ObFreezer *freezer = ls_->get_freezer(); tablet_handle.reset(); @@ -2015,7 +2018,7 @@ int ObLSTabletService::create_tablet( ret = OB_ERR_UNEXPECTED; LOG_ERROR("new tablet is null", K(ret), KP(tablet), KP(allocator), K(tablet_handle)); } else if (OB_FAIL(tablet->init_for_first_time_creation(*allocator, ls_id, tablet_id, data_tablet_id, - create_scn, snapshot_version, create_tablet_schema, need_create_empty_major_sstable, freezer))) { + create_scn, snapshot_version, create_tablet_schema, need_create_empty_major_sstable, need_generate_cs_replica_cg_array, freezer))) { LOG_WARN("failed to init tablet", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), K(create_scn), K(snapshot_version), K(create_tablet_schema)); } else if (OB_FAIL(tablet->get_updating_tablet_pointer_param(param))) { @@ -2071,7 +2074,7 @@ int ObLSTabletService::create_inner_tablet( LOG_ERROR("new tablet is null", K(ret), KPC(tmp_tablet), K(tmp_tablet_hdl)); } else if (FALSE_IT(time_guard.click("CreateTablet"))) { } else if (OB_FAIL(tmp_tablet->init_for_first_time_creation(allocator, ls_id, tablet_id, data_tablet_id, - create_scn, snapshot_version, create_tablet_schema, true/*need_create_empty_major_sstable*/, freezer))) { + create_scn, snapshot_version, create_tablet_schema, true/*need_create_empty_major_sstable*/, false/*need_generate_cs_replica_cg_array*/, freezer))) { LOG_WARN("failed to init tablet", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), K(create_scn), K(snapshot_version), K(create_tablet_schema)); } else if (FALSE_IT(time_guard.click("InitTablet"))) { @@ -2403,7 +2406,7 @@ int ObLSTabletService::get_read_tables( } else if (FALSE_IT(allow_to_read_mgr_.load_allow_to_read_info(allow_to_read))) { } else if (!allow_to_read) { ret = OB_REPLICA_NOT_READABLE; - LOG_WARN("ls is not allow to read", K(ret), KPC(ls_)); + LOG_WARN("ls is not allow to read", K(ret), KPC(ls_), K(lbt())); } else if (FALSE_IT(key.ls_id_ = ls_->get_ls_id())) { } else if (OB_FAIL(ObTabletCreateDeleteHelper::check_and_get_tablet(key, handle, timeout_us, diff --git a/src/storage/ob_i_table.h b/src/storage/ob_i_table.h index 1308fc817..ac6d79862 100644 --- a/src/storage/ob_i_table.h +++ b/src/storage/ob_i_table.h @@ -157,6 +157,10 @@ public: OB_INLINE bool is_normal_cg_sstable() const { return ObITable::is_normal_cg_sstable(table_type_); } OB_INLINE bool is_cg_sstable() const { return ObITable::is_cg_sstable(table_type_); } OB_INLINE bool is_column_store_sstable() const { return is_co_sstable() || is_cg_sstable(); } + OB_INLINE bool is_row_store_major_sstable() const { return ObITable::is_row_store_major_sstable(table_type_); } + OB_INLINE bool is_column_store_major_sstable() const { return ObITable::is_column_store_major_sstable(table_type_); } + OB_INLINE bool is_true_major_sstable() const { return is_row_store_major_sstable() || is_column_store_major_sstable(); } + OB_INLINE const common::ObTabletID &get_tablet_id() const { return tablet_id_; } share::SCN get_start_scn() const { return scn_range_.start_scn_; } share::SCN get_end_scn() const { return scn_range_.end_scn_; } @@ -462,10 +466,34 @@ public: { return is_mds_mini_sstable(table_type) || is_mds_minor_sstable(table_type); } + static bool is_row_store_major_sstable(const TableType table_type) + { + return ObITable::TableType::MAJOR_SSTABLE == table_type; + } + static bool is_column_store_major_sstable(const TableType table_type) + { + return ObITable::TableType::COLUMN_ORIENTED_SSTABLE == table_type; + } + static bool is_valid_ddl_table_type(const TableType table_type) + { + return ObITable::DDL_MEM_SSTABLE == table_type + || ObITable::MAJOR_SSTABLE == table_type + || ObITable::DDL_DUMP_SSTABLE == table_type + || ObITable::COLUMN_ORIENTED_SSTABLE == table_type + || ObITable::DDL_MERGE_CO_SSTABLE; + } static bool is_table_with_scn_range(const TableType table_type) { return is_multi_version_table(table_type) || is_meta_major_sstable(table_type); } + // row store sstable and corresponding column store sstable + static bool is_twin_major_sstable(const TableKey &rs_key, const TableKey &cs_key) + { + return rs_key.is_true_major_sstable() + && cs_key.is_true_major_sstable() + && rs_key.tablet_id_ == cs_key.tablet_id_ + && rs_key.scn_range_ == cs_key.scn_range_; + } OB_INLINE static const char* get_table_type_name(const TableType &table_type) { return is_table_type_valid(table_type) ? table_type_name_[table_type] : nullptr; diff --git a/src/storage/ob_storage_schema.cpp b/src/storage/ob_storage_schema.cpp index 5742dcda0..2ef2f7327 100644 --- a/src/storage/ob_storage_schema.cpp +++ b/src/storage/ob_storage_schema.cpp @@ -434,7 +434,8 @@ int ObStorageSchema::init( const ObTableSchema &input_schema, const lib::Worker::CompatMode compat_mode, const bool skip_column_info/* = false*/, - const int64_t compat_version/* = STORAGE_SCHEMA_VERSION_LATEST*/) + const int64_t compat_version/* = STORAGE_SCHEMA_VERSION_LATEST*/, + const bool generate_cs_replica_cg_array/* = false*/) { int ret = OB_SUCCESS; @@ -444,6 +445,10 @@ int ObStorageSchema::init( } else if (OB_UNLIKELY(!input_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), K(input_schema), K(skip_column_info)); + } else if (FALSE_IT(column_info_simplified_ = skip_column_info)) { + } else if (OB_UNLIKELY(generate_cs_replica_cg_array && column_info_simplified_)) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument to init storage schema", K(ret)); } else { allocator_ = &allocator; rowkey_array_.set_allocator(&allocator); @@ -459,11 +464,17 @@ int ObStorageSchema::init( if (OB_FAIL(ret)) { } else if (OB_FAIL(generate_str(input_schema))) { STORAGE_LOG(WARN, "failed to generate string", K(ret), K(input_schema)); - } else if (FALSE_IT(column_info_simplified_ = skip_column_info)) { } else if (OB_FAIL(generate_column_array(input_schema))) { STORAGE_LOG(WARN, "failed to generate column array", K(ret), K(input_schema)); + } else if (generate_cs_replica_cg_array) { + if (OB_FAIL(ObStorageSchema::generate_cs_replica_cg_array())) { + STORAGE_LOG(WARN, "failed to generate_cs_replica_cg_array", K(ret)); + } } else if (OB_FAIL(generate_column_group_array(input_schema, allocator))) { STORAGE_LOG(WARN, "Failed to generate column group array", K(ret)); + } + + if (OB_FAIL(ret)) { } else if (OB_UNLIKELY(!ObStorageSchema::is_valid())) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "storage schema is invalid", K(ret)); @@ -482,7 +493,8 @@ int ObStorageSchema::init( common::ObIAllocator &allocator, const ObStorageSchema &old_schema, const bool skip_column_info/* = false*/, - const ObStorageSchema *column_group_schema) + const ObStorageSchema *column_group_schema/* = nullptr*/, + const bool generate_cs_replica_cg_array/* = false*/) { int ret = OB_SUCCESS; @@ -492,6 +504,10 @@ int ObStorageSchema::init( } else if (OB_UNLIKELY(!old_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), K(old_schema), K(skip_column_info)); + } else if (FALSE_IT(column_info_simplified_ = (skip_column_info || old_schema.column_info_simplified_))) { + } else if (OB_UNLIKELY(generate_cs_replica_cg_array && (column_info_simplified_ || column_group_schema != nullptr))) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument to init storage schema", K(ret), K(column_info_simplified_), K(column_group_schema)); } else { allocator_ = &allocator; rowkey_array_.set_allocator(&allocator); @@ -506,7 +522,6 @@ int ObStorageSchema::init( compressor_type_ = old_schema.compressor_type_; column_cnt_ = old_schema.column_cnt_; store_column_cnt_ = old_schema.store_column_cnt_; - column_info_simplified_ = (skip_column_info || old_schema.column_info_simplified_); if (OB_FAIL(deep_copy_str(old_schema.encryption_, encryption_))) { STORAGE_LOG(WARN, "failed to deep copy encryption", K(ret), K(old_schema)); @@ -522,10 +537,17 @@ int ObStorageSchema::init( STORAGE_LOG(WARN, "failed to copy skip idx attr array", K(ret), K(old_schema)); } else if (!column_info_simplified_ && OB_FAIL(deep_copy_column_array(allocator, old_schema, old_schema.column_array_.count()))) { STORAGE_LOG(WARN, "failed to deep copy column array", K(ret), K(old_schema)); + } else if (generate_cs_replica_cg_array) { + if (OB_FAIL(ObStorageSchema::generate_cs_replica_cg_array())) { + STORAGE_LOG(WARN, "failed to generate_cs_replica_cg_array", K(ret)); + } } else if (NULL != column_group_schema && OB_FAIL(deep_copy_column_group_array(allocator, *column_group_schema))) { STORAGE_LOG(WARN, "failed to deep copy column array from column group schema", K(ret), K(old_schema), KPC(column_group_schema)); } else if (NULL == column_group_schema && OB_FAIL(deep_copy_column_group_array(allocator, old_schema))) { STORAGE_LOG(WARN, "failed to deep copy column array", K(ret), K(old_schema)); + } + + if (OB_FAIL(ret)) { } else if (OB_UNLIKELY(!is_valid())) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "storage schema is invalid", K(ret)); @@ -1043,6 +1065,107 @@ int ObStorageSchema::generate_all_column_group_schema(ObStorageColumnGroupSchema return ret; } +int ObStorageSchema::generate_cs_replica_cg_array(common::ObIAllocator &allocator, ObIArray &cg_schemas) const +{ + int ret = OB_SUCCESS; + int schema_rowkey_column_cnt = get_rowkey_column_num(); + cg_schemas.reset(); + ObStorageColumnGroupSchema column_group; + + if (OB_FAIL(cg_schemas.reserve(store_column_cnt_ + 1))) { + STORAGE_LOG(WARN, "failed to reserve for column group array", K(ret), K_(store_column_cnt)); + } else if (OB_FAIL(generate_rowkey_column_group_schema(column_group, ObRowStoreType::CS_ENCODING_ROW_STORE, allocator))) { + STORAGE_LOG(WARN, "failed to generate_rowkey_column_group_schema", K(ret)); + } else if (OB_FAIL(cg_schemas.push_back(column_group))) { + STORAGE_LOG(WARN, "failed to add column group", K(ret), K(column_group)); + column_group.destroy(allocator); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < store_column_cnt_; i++) { + if (OB_FAIL(generate_single_column_group_schema(column_group, ObRowStoreType::CS_ENCODING_ROW_STORE, + i + (i >= schema_rowkey_column_cnt ? ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt() : 0), allocator))) { + STORAGE_LOG(WARN, "failed to generate_single_column_group_schema", K(ret), K(i)); + } else if (OB_FAIL(cg_schemas.push_back(column_group))) { + STORAGE_LOG(WARN, "failed to add column group", K(ret), K(column_group)); + column_group.destroy(allocator); + } + } + + if (OB_FAIL(ret)) { + for (int64_t i = 0; i < cg_schemas.count(); i++) { + cg_schemas.at(i).destroy(allocator); + } + } + + return ret; +} + +int ObStorageSchema::generate_cs_replica_cg_array() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(generate_cs_replica_cg_array(*allocator_, column_group_array_))) { + STORAGE_LOG(WARN, "Failed to generate column store cg array", K(ret), KPC(this)); + } else { + is_cs_replica_compat_ = true; + STORAGE_LOG(INFO, "[CS-Replica] Success to generate cs replica cg array", K(ret), KPC(this)); + } + return ret; +} + +int ObStorageSchema::generate_single_column_group_schema(ObStorageColumnGroupSchema &column_group, const ObRowStoreType row_store_type, const uint16_t column_idx, common::ObIAllocator &allocator) const +{ + int ret = OB_SUCCESS; + column_group.reset(); + column_group.version_ = ObStorageColumnGroupSchema::COLUMN_GRUOP_SCHEMA_VERSION; + column_group.type_ = SINGLE_COLUMN_GROUP; + column_group.schema_column_cnt_ = 1; + column_group.rowkey_column_cnt_ = 0; + column_group.schema_rowkey_column_cnt_ = column_group.rowkey_column_cnt_; + column_group.column_cnt_ = column_group.schema_column_cnt_; + + uint16_t *column_idxs = nullptr; + if (OB_ISNULL(column_idxs = reinterpret_cast (allocator_->alloc(sizeof(uint16_t) * column_group.column_cnt_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "Failed to alloc memory", K(ret), K(column_cnt_)); + } else { + column_idxs[0] = column_idx; + column_group.column_idxs_ = column_idxs; + column_group.block_size_ = block_size_; + column_group.compressor_type_ = compressor_type_; + column_group.row_store_type_ = row_store_type; + } + + return ret; +} + +int ObStorageSchema::generate_rowkey_column_group_schema(ObStorageColumnGroupSchema &column_group, const ObRowStoreType row_store_type, common::ObIAllocator &allocator) const +{ + int ret = OB_SUCCESS; + column_group.reset(); + column_group.version_ = ObStorageColumnGroupSchema::COLUMN_GRUOP_SCHEMA_VERSION; + column_group.type_ = ROWKEY_COLUMN_GROUP; + column_group.schema_column_cnt_ = get_rowkey_column_num(); + column_group.rowkey_column_cnt_ = get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + column_group.schema_rowkey_column_cnt_ = column_group.schema_column_cnt_; + column_group.column_cnt_ = column_group.rowkey_column_cnt_; + + uint16_t *column_idxs = nullptr; + if (OB_ISNULL(column_idxs = reinterpret_cast (allocator_->alloc(sizeof(uint16_t) * column_group.column_cnt_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + STORAGE_LOG(WARN, "Failed to alloc memory", K(ret), K(column_cnt_)); + } else { + for (int64_t i = 0; i < column_group.column_cnt_; ++i) { + column_idxs[i] = i; + } + column_group.column_idxs_ = column_idxs; + column_group.block_size_ = block_size_; + column_group.compressor_type_ = compressor_type_; + column_group.row_store_type_ = row_store_type; + } + + return ret; +} + int ObStorageSchema::mock_row_store_cg(ObStorageColumnGroupSchema &mocked_row_store_cg) const { // if cache mocked_row_store_cg in storage schema, cached value will become invalid when ddl happen, so re-build every time @@ -1053,6 +1176,32 @@ int ObStorageSchema::mock_row_store_cg(ObStorageColumnGroupSchema &mocked_row_st return ret; } +int ObStorageSchema::transform_from_row_to_columnar() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "not inited", K(ret), K_(is_inited)); + } else if (!is_row_store()) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "only row store schema can be transformed", K(ret), KPC(this)); + } else { + is_inited_ = false; + (void) reset_column_group_array(); + has_all_column_group_ = false; + if (OB_FAIL(ObStorageSchema::generate_cs_replica_cg_array())) { + STORAGE_LOG(WARN, "failed to generate_cs_replica_cg_array", K(ret)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "invalid storage schema", K(ret), KPC(this)); + } else { + is_inited_ = true; + } + } + STORAGE_LOG(INFO, "[CS-Replica] finish transform row store storage schema", K(ret), KPC(this)); + return ret; +} + /* * base_cg of column store schema can only be ROWKEY_CG OR ALL_CG * "with column group(all columns, each column)" -> ALL_CG + each_cg diff --git a/src/storage/ob_storage_schema.h b/src/storage/ob_storage_schema.h index 80f255e35..52b069280 100644 --- a/src/storage/ob_storage_schema.h +++ b/src/storage/ob_storage_schema.h @@ -110,6 +110,20 @@ public: column_cnt_(0), column_idxs_(nullptr) {} + ObStorageColumnGroupSchema(const share::schema::ObColumnGroupType type, const ObCompressorType compressor_type, + const ObRowStoreType row_store_type, const uint32_t block_size, const uint16_t schema_column_cnt, const uint16_t rowkey_column_cnt, + const uint16_t schema_rowkey_column_cnt, const uint16_t column_cnt, uint16_t *column_idxs) + : version_(COLUMN_GRUOP_SCHEMA_VERSION), + type_(type), + compressor_type_(compressor_type), + row_store_type_(row_store_type), + block_size_(block_size), + schema_column_cnt_(schema_column_cnt), + rowkey_column_cnt_(rowkey_column_cnt), + schema_rowkey_column_cnt_(schema_rowkey_column_cnt), + column_cnt_(column_cnt), + column_idxs_(column_idxs) + {} ~ObStorageColumnGroupSchema() = default; OB_INLINE void reset() { MEMSET(this, 0, sizeof(ObStorageColumnGroupSchema)); } void destroy(ObIAllocator &allocator); @@ -164,12 +178,14 @@ public: const share::schema::ObTableSchema &input_schema, const lib::Worker::CompatMode compat_mode, const bool skip_column_info = false, - const int64_t compat_version = STORAGE_SCHEMA_VERSION_LATEST); + const int64_t compat_version = STORAGE_SCHEMA_VERSION_LATEST, + const bool generate_cs_replica_cg_array = false); int init( common::ObIAllocator &allocator, const ObStorageSchema &old_schema, const bool skip_column_info = false, - const ObStorageSchema *column_group_schema = nullptr); + const ObStorageSchema *column_group_schema = nullptr, + const bool generate_cs_replica_cg_array = false); int deep_copy_column_array( common::ObIAllocator &allocator, const ObStorageSchema &src_schema, @@ -217,6 +233,7 @@ public: inline bool is_materialized_view() const { return share::schema::ObTableSchema::is_materialized_view(table_type_); } inline bool is_mlog_table() const { return share::schema::ObTableSchema::is_mlog_table(table_type_); } inline bool is_fts_index() const { return share::schema::is_fts_index(index_type_); } + inline bool is_user_data_table() const { return share::schema::ObTableSchema::is_user_data_table(table_type_); } virtual inline bool is_global_index_table() const override { return share::schema::ObSimpleTableSchemaV2::is_global_index_table(index_type_); } virtual inline int64_t get_block_size() const override { return block_size_; } @@ -250,6 +267,7 @@ public: blocksstable::ObDatumRow &default_row) const; const ObStorageColumnSchema *get_column_schema(const int64_t column_id) const; int mock_row_store_cg(ObStorageColumnGroupSchema &mocked_row_store_cg) const; + int transform_from_row_to_columnar(); // TODO(chengkong): is used? int get_base_rowkey_column_group_index(int32_t &cg_idx) const; // This function only get cg idx for actually stored column int get_column_group_index( @@ -289,6 +307,8 @@ private: int generate_str(const share::schema::ObTableSchema &input_schema); int generate_column_array(const share::schema::ObTableSchema &input_schema); int generate_column_group_array(const share::schema::ObTableSchema &input_schema, common::ObIAllocator &allocator); + int generate_cs_replica_cg_array(common::ObIAllocator &allocator, ObIArray &cg_schemas) const; // also used by ddl + int generate_cs_replica_cg_array(); int get_column_ids_without_rowkey( common::ObIArray &column_ids, bool no_virtual) const; @@ -306,6 +326,8 @@ private: int64_t get_column_array_serialize_length(const common::ObIArray &array) const; int deserialize_skip_idx_attr_array(const char *buf, const int64_t data_len, int64_t &pos); int generate_all_column_group_schema(ObStorageColumnGroupSchema &column_group, const ObRowStoreType row_store_type) const; + int generate_rowkey_column_group_schema(ObStorageColumnGroupSchema &column_group, const ObRowStoreType row_store_type, common::ObIAllocator &allocator) const; + int generate_single_column_group_schema(ObStorageColumnGroupSchema &column_group, const ObRowStoreType row_store_type, const uint16_t column_idx, common::ObIAllocator &allocator) const; template int64_t get_array_serialize_length(const common::ObIArray &array) const; template diff --git a/src/storage/ob_storage_struct.cpp b/src/storage/ob_storage_struct.cpp index 6fc7c2fde..6f88daed1 100644 --- a/src/storage/ob_storage_struct.cpp +++ b/src/storage/ob_storage_struct.cpp @@ -338,7 +338,8 @@ ObDDLTableStoreParam::ObDDLTableStoreParam() ddl_checkpoint_scn_(SCN::min_scn()), ddl_snapshot_version_(0), ddl_execution_id_(-1), - data_format_version_(0) + data_format_version_(0), + ddl_table_type_(ObITable::MAX_TABLE_TYPE) { } @@ -350,7 +351,8 @@ bool ObDDLTableStoreParam::is_valid() const && ddl_checkpoint_scn_.is_valid() && ddl_snapshot_version_ >= 0 && ddl_execution_id_ >= 0 - && data_format_version_ >= 0; + && data_format_version_ >= 0 + && ObITable::is_valid_ddl_table_type(ddl_table_type_); } UpdateUpperTransParam::UpdateUpperTransParam() diff --git a/src/storage/ob_storage_struct.h b/src/storage/ob_storage_struct.h index e1682f96c..6a1b2d9e6 100644 --- a/src/storage/ob_storage_struct.h +++ b/src/storage/ob_storage_struct.h @@ -336,7 +336,7 @@ public: ~ObDDLTableStoreParam() = default; bool is_valid() const; TO_STRING_KV(K_(keep_old_ddl_sstable), K_(ddl_start_scn), K_(ddl_commit_scn), K_(ddl_checkpoint_scn), - K_(ddl_snapshot_version), K_(ddl_execution_id), K_(data_format_version)); + K_(ddl_snapshot_version), K_(ddl_execution_id), K_(data_format_version), K_(ddl_table_type)); public: bool keep_old_ddl_sstable_; share::SCN ddl_start_scn_; @@ -345,6 +345,8 @@ public: int64_t ddl_snapshot_version_; int64_t ddl_execution_id_; int64_t data_format_version_; + // used to decide storage type for replaying ddl clog in cs replica, see ObTabletMeta::ddl_table_type_ for more detail + ObITable::TableType ddl_table_type_; }; struct UpdateUpperTransParam final diff --git a/src/storage/ob_tenant_tablet_stat_mgr.cpp b/src/storage/ob_tenant_tablet_stat_mgr.cpp index ebd408aaa..a8468c8ef 100644 --- a/src/storage/ob_tenant_tablet_stat_mgr.cpp +++ b/src/storage/ob_tenant_tablet_stat_mgr.cpp @@ -1184,7 +1184,7 @@ int ObTenantTabletStatMgr::get_queuing_cfg( } } else { queuing_cfg = ObTableQueuingModeCfg::get_basic_config(stream_node->mode_); - LOG_DEBUG("chengkong debug: success get queuing cfg", K(ret), K(ls_id), K(tablet_id), K(queuing_cfg)); + LOG_DEBUG("success get queuing cfg", K(ret), K(ls_id), K(tablet_id), K(queuing_cfg)); } } return ret; diff --git a/src/storage/restore/ob_ls_restore_handler.cpp b/src/storage/restore/ob_ls_restore_handler.cpp index 82ea61fa7..30ee43626 100644 --- a/src/storage/restore/ob_ls_restore_handler.cpp +++ b/src/storage/restore/ob_ls_restore_handler.cpp @@ -1102,14 +1102,12 @@ int ObILSRestoreState::follower_fill_tablet_group_restore_arg_( LOG_WARN("fail to get location", K(ret), KPC(ls_)); } else if (OB_FAIL(location.get_leader(leader))) { LOG_WARN("fail to get leader location", K(ret), K(location)); - } else if (OB_FAIL(tablet_group_restore_arg.src_.set_replica_type(leader.get_replica_type()))) { - LOG_WARN("fail to set src replica type", K(ret), K(leader)); - } else if (OB_FAIL(tablet_group_restore_arg.src_.set_member(ObMember(leader.get_server(), 0/*invalid timestamp is ok*/)))) { - LOG_WARN("fail to set src member", K(ret)); - } else if (OB_FAIL(tablet_group_restore_arg.dst_.set_replica_type(REPLICA_TYPE_FULL))) { - LOG_WARN("fail to set dst replica type", K(ret)); - } else if (OB_FAIL(tablet_group_restore_arg.dst_.set_member(ObMember(GCTX.self_addr(), 0/*invalid timestamp is ok*/)))) { - LOG_WARN("fail to set dst member", K(ret), "server", GCTX.self_addr()); + } else if (OB_FAIL(tablet_group_restore_arg.src_.init( + leader.get_server(), 0/*invalid timestamp is ok*/, leader.get_replica_type()))) { + LOG_WARN("fail to init src_", K(ret), K(leader)); + } else if (OB_FAIL(tablet_group_restore_arg.dst_.init( + GCTX.self_addr(), 0/*invalid timestamp is ok*/, REPLICA_TYPE_FULL))) { + LOG_WARN("fail to init dst_", K(ret), K(GCTX.self_addr())); } else if (OB_FAIL(append(tablet_group_restore_arg.tablet_id_array_, tablet_need_restore))) { LOG_WARN("fail to append tablet id", K(ret), K(tablet_need_restore)); } else if (OB_FAIL(tablet_group_restore_arg.restore_base_info_.copy_from(*ls_restore_arg_))) { @@ -1163,7 +1161,7 @@ int ObILSRestoreState::get_follower_server_(ObIArray &follow common::ObMemberList member_list; GlobalLearnerList learner_list; int64_t full_replica_count = 0; - int64_t readonly_replica_count = 0; + int64_t non_paxos_replica_count = 0; if (OB_ISNULL(log_handler = ls_->get_log_handler())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("log handler should not be NULL", K(ret)); @@ -1171,12 +1169,12 @@ int ObILSRestoreState::get_follower_server_(ObIArray &follow LOG_WARN("failed to get paxos member list and learner list", K(ret)); } else if (OB_FAIL(location_service_->get(follower_info.cluster_id_, tenant_id, ls_->get_ls_id(), expire_renew_time, is_cache_hit, location))) { LOG_WARN("fail to get location", K(ret), KPC(ls_)); - } else if (OB_FAIL(location.get_replica_count(full_replica_count, readonly_replica_count))) { - LOG_WARN("fail to get replica count in location", KR(ret), K(location), K(full_replica_count), K(readonly_replica_count)); - } else if (full_replica_count != paxos_replica_num || readonly_replica_count != learner_list.get_member_number()) { + } else if (OB_FAIL(location.get_replica_count(full_replica_count, non_paxos_replica_count))) { + LOG_WARN("fail to get replica count in location", KR(ret), K(location), K(full_replica_count), K(non_paxos_replica_count)); + } else if (full_replica_count != paxos_replica_num || non_paxos_replica_count != learner_list.get_member_number()) { ret = OB_REPLICA_NUM_NOT_MATCH; LOG_WARN("replica num not match, ls may in migration", K(ret), K(location), K(full_replica_count), - K(readonly_replica_count), K(member_list), K(paxos_replica_num), K(learner_list)); + K(non_paxos_replica_count), K(member_list), K(paxos_replica_num), K(learner_list)); } else { const ObIArray &replica_locations = location.get_replica_locations(); for (int64_t i = 0; OB_SUCC(ret) && i < replica_locations.count(); ++i) { @@ -1990,14 +1988,10 @@ int ObLSRestoreSysTabletState::follower_fill_ls_restore_arg_(ObLSRestoreArg &arg LOG_WARN("fail to get location", K(ret), KPC(ls_)); } else if (OB_FAIL(location.get_leader(leader))) { LOG_WARN("fail to get leader location", K(ret), K(location)); - } else if (OB_FAIL(arg.src_.set_replica_type(leader.get_replica_type()))) { - LOG_WARN("fail to set src replica type", K(ret), K(leader)); - } else if (OB_FAIL(arg.src_.set_member(ObMember(leader.get_server(), 0/*invalid timestamp is ok*/)))) { - LOG_WARN("fail to set src member", K(ret)); - } else if (OB_FAIL(arg.dst_.set_replica_type(REPLICA_TYPE_FULL))) { - LOG_WARN("fail to set dst replica type", K(ret)); - } else if (OB_FAIL(arg.dst_.set_member(ObMember(GCTX.self_addr(), 0/*invalid timestamp is ok*/)))) { - LOG_WARN("fail to set dst member", K(ret), "server", GCTX.self_addr()); + } else if (OB_FAIL(arg.src_.init(leader.get_server(), 0/*invalid timestamp is ok*/, leader.get_replica_type()))) { + LOG_WARN("fail to init src_", K(ret), K(leader)); + } else if (OB_FAIL(arg.dst_.init(GCTX.self_addr(), 0/*invalid timestamp is ok*/, REPLICA_TYPE_FULL))) { + LOG_WARN("fail to init dst_", K(ret), K(GCTX.self_addr())); } else if (OB_FAIL(arg.restore_base_info_.copy_from(*ls_restore_arg_))) { LOG_WARN("fail to fill restore base info from ls restore args", K(ret), KPC(ls_restore_arg_)); } diff --git a/src/storage/tablet/ob_mds_schema_helper.cpp b/src/storage/tablet/ob_mds_schema_helper.cpp index 514e6363d..079f314af 100644 --- a/src/storage/tablet/ob_mds_schema_helper.cpp +++ b/src/storage/tablet/ob_mds_schema_helper.cpp @@ -141,6 +141,14 @@ const ObRowkeyReadInfo *ObMdsSchemaHelper::get_rowkey_read_info() const return ptr; } +bool ObMdsSchemaHelper::is_mds_schema(const ObTableSchema &table_schema) +{ + bool bret = false; + const ObString &table_name = table_schema.get_table_name_str(); + const uint64_t table_id = table_schema.get_table_id(); + return MDS_TABLE_ID == table_id && 0 == table_name.case_compare(MDS_TABLE_NAME); +} + int ObMdsSchemaHelper::build_table_schema( const uint64_t tenant_id, const int64_t database_id, diff --git a/src/storage/tablet/ob_mds_schema_helper.h b/src/storage/tablet/ob_mds_schema_helper.h index 43055894c..c11457058 100644 --- a/src/storage/tablet/ob_mds_schema_helper.h +++ b/src/storage/tablet/ob_mds_schema_helper.h @@ -54,6 +54,8 @@ public: const share::schema::ObTableSchema *get_table_schema() const; const ObStorageSchema *get_storage_schema() const; const ObRowkeyReadInfo *get_rowkey_read_info() const; +public: + static bool is_mds_schema(const ObTableSchema &table_schema); private: static int build_table_schema( const uint64_t tenant_id, diff --git a/src/storage/tablet/ob_table_store_util.cpp b/src/storage/tablet/ob_table_store_util.cpp index 232d8373b..3281d7a87 100644 --- a/src/storage/tablet/ob_table_store_util.cpp +++ b/src/storage/tablet/ob_table_store_util.cpp @@ -560,6 +560,46 @@ int ObSSTableArray::get_all_tables(ObIArray &tables) const return ret; } +int ObSSTableArray::replace_twin_majors_and_build_new( + const ObIArray &tables_array, + ObIArray &major_tables) const +{ + int ret = OB_SUCCESS; + ObITable* new_co_major = nullptr; // new co major to replace old row store major + if (OB_UNLIKELY(!is_valid())) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (tables_array.count() != 1) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid major table cnt for replacing old row store", K(ret), K(tables_array)); + } else if (FALSE_IT(new_co_major = tables_array.at(0))) { + } else if (OB_UNLIKELY(nullptr == new_co_major || !new_co_major->is_column_store_sstable())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected new co table", K(ret), KPC(new_co_major)); + } else { + ObSSTable *table = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < cnt_; ++i) { + if (OB_ISNULL(table = sstable_array_[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null table", K(ret)); + } else if (!table->get_key().is_row_store_major_sstable()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected sstable type", K(ret), KPC(table)); + } else if (ObITable::is_twin_major_sstable(table->get_key(), new_co_major->get_key())) { + // skip the old row store major + } else if (OB_FAIL(major_tables.push_back(table))) { + LOG_WARN("fail to push sstable address into array", K(ret), K(i), K(major_tables)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(major_tables.push_back(new_co_major))) { + LOG_WARN("fail to push sstable address into array", K(ret), KPC(new_co_major), K(major_tables)); + } + } + return ret; +} + int ObSSTableArray::get_all_table_wrappers( ObIArray &table_wrappers, const bool need_unpack) const diff --git a/src/storage/tablet/ob_table_store_util.h b/src/storage/tablet/ob_table_store_util.h index ca685b194..973c0ffb8 100644 --- a/src/storage/tablet/ob_table_store_util.h +++ b/src/storage/tablet/ob_table_store_util.h @@ -86,6 +86,10 @@ public: TO_STRING_KV(K_(cnt), KP_(sstable_array), K_(serialize_table_type), K_(is_inited)); private: int get_all_tables(ObIArray &tables) const; + // construct major_tables with old sstable array and input tables_array, but filter twin sstable of new_co_major + int replace_twin_majors_and_build_new( + const ObIArray &tables_array, + ObIArray &major_tables) const; int inc_meta_ref_cnt(bool &inc_success) const; int inc_data_ref_cnt(bool &inc_success) const; void dec_meta_ref_cnt() const; diff --git a/src/storage/tablet/ob_tablet.cpp b/src/storage/tablet/ob_tablet.cpp index 69fbbfbe4..b8b3d9686 100644 --- a/src/storage/tablet/ob_tablet.cpp +++ b/src/storage/tablet/ob_tablet.cpp @@ -258,7 +258,7 @@ ObTablet::ObTablet() table_store_cache_() { #if defined(__x86_64__) && !defined(ENABLE_OBJ_LEAK_CHECK) - check_size(); + check_size(); #endif MEMSET(memtables_, 0x0, sizeof(memtables_)); } @@ -331,6 +331,7 @@ int ObTablet::init_for_first_time_creation( const int64_t snapshot_version, const ObCreateTabletSchema &storage_schema, const bool need_create_empty_major_sstable, + const bool need_generate_cs_replica_cg_array, ObFreezer *freezer) { int ret = OB_SUCCESS; @@ -373,7 +374,8 @@ int ObTablet::init_for_first_time_creation( } else { if (OB_FAIL(ObTabletObjLoadHelper::alloc_and_new(allocator, storage_schema_addr_.ptr_))) { LOG_WARN("fail to allocate and new object", K(ret)); - } else if (OB_FAIL(storage_schema_addr_.get_ptr()->init(allocator, storage_schema))) { + } else if (OB_FAIL(storage_schema_addr_.get_ptr()->init(allocator, storage_schema, false /*skip_column_info*/, + nullptr /*column_group_schema*/, need_generate_cs_replica_cg_array))) { LOG_WARN("fail to initialize tablet member", K(ret), K(storage_schema_addr_)); } } @@ -391,7 +393,7 @@ int ObTablet::init_for_first_time_creation( LOG_WARN("failed to update start scn", K(ret), K(table_store_addr_)); } else if (OB_FAIL(table_store_cache_.init(table_store_addr_.get_ptr()->get_major_sstables(), table_store_addr_.get_ptr()->get_minor_sstables(), - storage_schema.is_row_store()))) { + storage_schema_addr_.get_ptr()->is_row_store()))) { LOG_WARN("failed to init table store cache", K(ret), KPC(this)); } else if (OB_FAIL(check_sstable_column_checksum())) { LOG_WARN("failed to check sstable column checksum", K(ret), KPC(this)); @@ -504,9 +506,8 @@ int ObTablet::init_for_merge( } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); - } else if (OB_UNLIKELY(old_tablet.is_row_store() != param.storage_schema_->is_row_store())) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("Unexpected schema for chaser debug", K(ret), KPC(param.storage_schema_), K(old_tablet)); + } else if (OB_FAIL(check_tablet_schema_mismatch(old_tablet, *param.storage_schema_, is_convert_co_major_merge(param.merge_type_)))) { + LOG_ERROR("find error while checking tablet schema mismatch", K(ret), KPC(param.storage_schema_), K(old_tablet), K(param.merge_type_)); } else if (OB_FAIL(check_table_store_flag_match_with_table_store_(table_store_addr_.get_ptr()))) { LOG_WARN("failed to check table store flag match with table store", K(ret), K(old_tablet), K_(table_store_addr)); } else { @@ -590,6 +591,7 @@ int ObTablet::init_with_migrate_param( ObTableHandleV2 mds_mini_sstable; const blocksstable::ObSSTable *sstable = nullptr; const bool need_compat = !tablet_id.is_ls_inner_tablet() && param.version_ < ObMigrationTabletParam::PARAM_VERSION_V3; + bool need_process_cs_replica = false; if (is_transfer) { // do nothing } else if (!need_compat) { @@ -640,13 +642,18 @@ int ObTablet::init_with_migrate_param( } if (OB_SUCC(ret)) { - ALLOC_AND_INIT(allocator, storage_schema_addr_, param.storage_schema_); + // since transfer use storage schema from ls leader, need convert into cs storage schema in cs replica + if (!is_transfer) { + ALLOC_AND_INIT(allocator, storage_schema_addr_, param.storage_schema_); + } else if (OB_FAIL(inner_alloc_and_init_storage_schema(allocator, ls_id, tablet_id, param.storage_schema_, need_process_cs_replica))) { + LOG_WARN("failed to int storage schema", K(ret), K(ls_id), K(tablet_id), K(param)); + } } if (OB_FAIL(ret)) { } else if (OB_FAIL(table_store_cache_.init(table_store_addr_.get_ptr()->get_major_sstables(), table_store_addr_.get_ptr()->get_minor_sstables(), - param.storage_schema_.is_row_store()))) { + param.storage_schema_.is_row_store() && !need_process_cs_replica))) { LOG_WARN("failed to init table store cache", K(ret), KPC(this)); } else if (OB_FAIL(build_read_info(allocator))) { LOG_WARN("fail to build read info", K(ret)); @@ -659,6 +666,8 @@ int ObTablet::init_with_migrate_param( } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); + } else if (OB_FAIL(check_tablet_schema_mismatch(*this, *storage_schema_addr_.ptr_, false/*is_convert_co_major_merge*/))) { + LOG_ERROR("find error while checking tablet schema mismatch", K(ret), KPC(storage_schema_addr_.ptr_), KPC(this)); } else { is_inited_ = true; LOG_INFO("succeeded to init tablet with migration tablet param", K(ret), K(param), KPC(this)); @@ -732,9 +741,6 @@ int ObTablet::init_for_defragment( } else if (FALSE_IT(set_initial_addr())) { } else if (OB_FAIL(inner_inc_macro_ref_cnt())) { LOG_WARN("failed to increase macro ref cnt", K(ret)); - } else if (OB_UNLIKELY(old_tablet.is_row_store() != old_storage_schema->is_row_store())) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("Unexpected schema for chaser debug", K(ret), KPC(old_storage_schema), K(old_tablet)); } else if (OB_FAIL(table_store_cache_.init(table_store_addr_.get_ptr()->get_major_sstables(), table_store_addr_.get_ptr()->get_minor_sstables(), old_storage_schema->is_row_store()))) { @@ -844,9 +850,8 @@ int ObTablet::init_for_sstable_replace( LOG_WARN("failed to check sstable column checksum", K(ret), KPC(this)); } else if (param.is_transfer_replace_ && OB_FAIL(handle_transfer_replace_(param))) { LOG_WARN("failed to handle transfer replace", K(ret), K(param)); - } else if (OB_UNLIKELY(old_tablet.is_row_store() != storage_schema->is_row_store())) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("Unexpected schema for chaser debug", K(ret), KPC(storage_schema), K(old_tablet)); + } else if (OB_FAIL(check_tablet_schema_mismatch(old_tablet, *storage_schema, false/*is_convert_co_major_merge*/))) { + LOG_ERROR("find error while checking tablet schema mismatch", K(ret), KPC(storage_schema), K(old_tablet)); } else if (OB_FAIL(table_store_cache_.init(table_store_addr_.get_ptr()->get_major_sstables(), table_store_addr_.get_ptr()->get_minor_sstables(), storage_schema->is_row_store()))) { @@ -1131,6 +1136,45 @@ int ObTablet::init_with_update_medium_info( return ret; } +// delayed add column group and convert co major merge allow mismatch between old tablet and new schema +int ObTablet::check_tablet_schema_mismatch( + const ObTablet &old_tablet, + const ObStorageSchema &storage_schema, + const bool is_convert_co_major_merge) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + const bool is_old_tablet_row_store = old_tablet.is_row_store(); + const bool is_storage_schema_row_store = storage_schema.is_row_store(); + if (OB_FAIL(MTL(ObLSService *)->get_ls(tablet_meta_.ls_id_, ls_handle, ObLSGetMod::TABLET_MOD))) { + LOG_WARN("failed to get ls", K(ret), "ls_id", tablet_meta_.ls_id_); + } else if (OB_UNLIKELY(!ls_handle.is_valid()) || OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls handle is invalid or nullptr", K(ret), K(ls_handle), KP(ls)); + } else if (ls->is_cs_replica()) { + LOG_INFO("For columns tore replica, allow old tablet and new schema mismatch", K(ret), K(old_tablet), K(storage_schema)); + } else if (is_old_tablet_row_store) { + if (is_storage_schema_row_store) { + // row store status match + } else if (is_convert_co_major_merge) { + // convert co major merge + LOG_INFO("convert co major merge, old tablet is row store and new storage schema is column store", K(ret), K(old_tablet), K(storage_schema)); + } else { + // delayed add column group + LOG_INFO("should be delayed add column group, old tablet is row store and new storage schema is column store", K(ret), K(old_tablet), K(storage_schema)); + } + } else { + if (!is_storage_schema_row_store) { + // column store status match + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected schema, old tablet is column store while new storage schema is column store", K(ret), K(old_tablet), K(storage_schema)); + } + } + return ret; +} + int ObTablet::update_meta_last_persisted_committed_tablet_status_from_sstable( const ObUpdateTableStoreParam ¶m, const ObTabletCreateDeleteMdsUserData &old_last_persisted_committed_tablet_status) @@ -4994,6 +5038,7 @@ int ObTablet::build_migration_tablet_param( mig_tablet_param.ddl_execution_id_ = tablet_meta_.ddl_execution_id_; mig_tablet_param.ddl_data_format_version_ = tablet_meta_.ddl_data_format_version_; mig_tablet_param.ddl_commit_scn_ = tablet_meta_.ddl_commit_scn_; + mig_tablet_param.ddl_table_type_ = tablet_meta_.ddl_table_type_; mig_tablet_param.report_status_ = tablet_meta_.report_status_; mig_tablet_param.mds_checkpoint_scn_ = tablet_meta_.mds_checkpoint_scn_; mig_tablet_param.transfer_info_ = tablet_meta_.transfer_info_; @@ -5480,6 +5525,49 @@ int ObTablet::update_tablet_autoinc_seq(const uint64_t autoinc_seq) return ret; } +int ObTablet::check_cs_replica_compat_schema(bool &is_cs_replica_compat) +{ + int ret = OB_SUCCESS; + is_cs_replica_compat = false; + ObStorageSchema *storage_schema = nullptr; + ObArenaAllocator arena_allocator(common::ObMemAttr(MTL_ID(), "CSReplSchema")); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_FAIL(load_storage_schema(arena_allocator, storage_schema))) { + LOG_WARN("fail to load storage schema", K(ret), K_(storage_schema_addr)); + } else { + // column storage schema + is_cs_replica_compat = storage_schema->is_cs_replica_compat_; + } + ObTabletObjLoadHelper::free(arena_allocator, storage_schema); + return ret; +} + +int ObTablet::pre_process_cs_replica(ObTabletDirectLoadInsertParam &direct_load_param) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + bool is_cs_replica_compat = false; + if (OB_FAIL(MTL(ObLSService *)->get_ls(tablet_meta_.ls_id_, ls_handle, ObLSGetMod::DDL_MOD))) { + LOG_WARN("failed to get ls", K(ret), "ls_id", tablet_meta_.ls_id_); + } else if (OB_UNLIKELY(!ls_handle.is_valid()) || OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls handle is invalid or nullptr", K(ret), K(ls_handle), KP(ls)); + } else if (!ls->is_cs_replica()) { + } else if (is_row_store()) { + // do not need to process cs replica + } else if (OB_FAIL(check_cs_replica_compat_schema(is_cs_replica_compat))) { + LOG_WARN("failed to check cs replica compat", K(ret), KPC(this)); + } else if (is_cs_replica_compat && tablet_meta_.ddl_table_type_ == ObITable::DDL_DUMP_SSTABLE) { + direct_load_param.common_param_.replay_normal_in_cs_replica_ = true; + LOG_INFO("[CS-Replica] Set replay normal in cs replica", K(direct_load_param)); + } + LOG_TRACE("[CS-Replica] process cs replica when start direct load task", KPC(ls), K(is_cs_replica_compat), K(direct_load_param), K_(tablet_meta)); + return ret; +} + int ObTablet::start_direct_load_task_if_need() { int ret = OB_SUCCESS; @@ -5526,7 +5614,9 @@ int ObTablet::start_direct_load_task_if_need() direct_load_param.common_param_.direct_load_type_ = ObDirectLoadType::DIRECT_LOAD_DDL; direct_load_param.common_param_.read_snapshot_ = tablet_meta_.ddl_snapshot_version_; - if (OB_FAIL(tenant_direct_load_mgr->create_tablet_direct_load( + if (OB_FAIL(pre_process_cs_replica(direct_load_param))) { + LOG_WARN("failed to process cs replica", K(ret), KPC(this)); + } else if (OB_FAIL(tenant_direct_load_mgr->create_tablet_direct_load( unused_context_id, tablet_meta_.ddl_execution_id_, direct_load_param, @@ -6394,6 +6484,7 @@ int ObTablet::build_transfer_tablet_param_current_( mig_tablet_param.max_sync_storage_schema_version_ = mig_tablet_param.storage_schema_.schema_version_; mig_tablet_param.ddl_execution_id_ = tablet_meta_.ddl_execution_id_; mig_tablet_param.ddl_data_format_version_ = tablet_meta_.ddl_data_format_version_; + mig_tablet_param.ddl_table_type_ = tablet_meta_.ddl_table_type_; mig_tablet_param.mds_checkpoint_scn_ = user_data.transfer_scn_; mig_tablet_param.report_status_.reset(); @@ -6442,7 +6533,8 @@ int64_t ObTablet::to_string(char *buf, const int64_t buf_len) const K_(macro_info_addr), K_(mds_data), KP_(ddl_kvs), - K_(ddl_kv_count)); + K_(ddl_kv_count), + K_(table_store_cache)); J_COMMA(); BUF_PRINTF("memtables:"); J_ARRAY_START(); @@ -7870,6 +7962,33 @@ int ObTablet::build_transfer_backfill_tablet_param( return ret; } +int ObTablet::inner_alloc_and_init_storage_schema( + common::ObArenaAllocator &allocator, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const ObStorageSchema &input_storage_schema, + bool &need_process_cs_replica) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + need_process_cs_replica = false; + if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::HA_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(ls_id)); + } else if (OB_UNLIKELY(!ls_handle.is_valid()) || OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is invalid or nullptr", K(ret), K(ls_id), K(ls_handle), KPC(ls)); + } else if (OB_FAIL(ObCSReplicaUtil::check_need_process_cs_replica(*ls, tablet_id, input_storage_schema, need_process_cs_replica))) { + LOG_WARN("failed to check need process cs replica", K(ret), K(ls_id), K(tablet_id), K(input_storage_schema)); + } else if (OB_FAIL(ObTabletObjLoadHelper::alloc_and_new(allocator, storage_schema_addr_.ptr_))) { + LOG_WARN("fail to alloc and new storage schema", K(ret)); + } else if (OB_FAIL(storage_schema_addr_.get_ptr()->init(allocator, input_storage_schema, + false /*skip_column_info*/, nullptr /*column_group_schema*/, need_process_cs_replica))) { + LOG_WARN("fail to init storage schema", K(ret), K(input_storage_schema), K(need_process_cs_replica)); + } + return ret; +} + int ObTablet::check_table_store_flag_match_with_table_store_(const ObTabletTableStore *table_store) { int ret = OB_SUCCESS; diff --git a/src/storage/tablet/ob_tablet.h b/src/storage/tablet/ob_tablet.h index 1e66ba0c1..b15a72e95 100644 --- a/src/storage/tablet/ob_tablet.h +++ b/src/storage/tablet/ob_tablet.h @@ -91,6 +91,7 @@ class ObCOSSTableV2; class ObMacroInfoIterator; class ObMdsRowIterator; class ObMdsMiniMergeOperator; +struct ObTabletDirectLoadInsertParam; struct ObTableStoreCache { @@ -114,6 +115,7 @@ public: int64_t recycle_version_; int64_t last_major_column_count_; bool is_row_store_; + // TODO(chengkong): add bool is_user_tablet_; common::ObCompressorType last_major_compressor_type_; common::ObRowStoreType last_major_latest_row_store_type_; }; @@ -179,6 +181,7 @@ public: const int64_t snapshot_version, const ObCreateTabletSchema &storage_schema, const bool need_create_empty_major_sstable, + const bool need_generate_cs_replica_cg_array, ObFreezer *freezer); // dump/merge build new multi version tablet int init_for_merge( @@ -428,7 +431,13 @@ public: const ObTabletMeta &src_tablet_meta, const ObStorageSchema &src_storage_schema, ObMigrationTabletParam ¶m) const; - + // transfer use storage schema from ls leader to create tablet, need specially process in cs replica + int inner_alloc_and_init_storage_schema( + common::ObArenaAllocator &allocator, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const ObStorageSchema &input_storage_schema, + bool &need_process_cs_replica); int get_ddl_kv_mgr(ObDDLKvMgrHandle &ddl_kv_mgr_handle, bool try_create = false); int set_ddl_kv_mgr(const ObDDLKvMgrHandle &ddl_kv_mgr_handle); int remove_ddl_kv_mgr(const ObDDLKvMgrHandle &ddl_kv_mgr_handle); @@ -440,6 +449,10 @@ public: int get_recycle_version(const int64_t multi_version_start, int64_t &recycle_version) const; int get_migration_sstable_size(int64_t &data_size); + // column store replica + int check_cs_replica_compat_schema(bool &is_cs_replica_compat); + int pre_process_cs_replica(ObTabletDirectLoadInsertParam &direct_load_param); + // other const ObMetaDiskAddr &get_tablet_addr() const { return tablet_addr_; } const ObTabletMeta &get_tablet_meta() const { return tablet_meta_; } @@ -560,6 +573,10 @@ protected:// for MDS use const bool create_if_not_exist) const override final; virtual ObTabletPointer *get_tablet_pointer_() const override final; private: + int check_tablet_schema_mismatch( + const ObTablet &old_tablet, + const ObStorageSchema &storage_schema, + const bool is_convert_co_major_merge); int update_meta_last_persisted_committed_tablet_status_from_sstable( const ObUpdateTableStoreParam ¶m, const ObTabletCreateDeleteMdsUserData &last_tablet_status); diff --git a/src/storage/tablet/ob_tablet_meta.cpp b/src/storage/tablet/ob_tablet_meta.cpp index 9aed3bf51..9d0712f67 100644 --- a/src/storage/tablet/ob_tablet_meta.cpp +++ b/src/storage/tablet/ob_tablet_meta.cpp @@ -64,6 +64,7 @@ ObTabletMeta::ObTabletMeta() last_persisted_committed_tablet_status_(), space_usage_(), create_schema_version_(0), + ddl_table_type_(ObITable::MAX_TABLE_TYPE), compat_mode_(lib::Worker::CompatMode::INVALID), has_next_tablet_(false), is_inited_(false) @@ -123,6 +124,7 @@ int ObTabletMeta::init( max_sync_storage_schema_version_ = create_schema_version; ddl_execution_id_ = -1; ddl_data_format_version_ = 0; + ddl_table_type_ = ObITable::DDL_MEM_SSTABLE; mds_checkpoint_scn_ = INIT_CLOG_CHECKPOINT_SCN; report_status_.merge_snapshot_version_ = snapshot_version; @@ -186,6 +188,7 @@ int ObTabletMeta::init( ddl_snapshot_version_ = MAX(old_tablet_meta.ddl_snapshot_version_, ddl_info.ddl_snapshot_version_); ddl_execution_id_ = MAX(old_tablet_meta.ddl_execution_id_, ddl_info.ddl_execution_id_); ddl_data_format_version_ = MAX(old_tablet_meta.ddl_data_format_version_, ddl_info.data_format_version_); + ddl_table_type_ = (ObITable::MAX_TABLE_TYPE == ddl_info.ddl_table_type_) ? old_tablet_meta.ddl_table_type_ : ddl_info.ddl_table_type_; mds_checkpoint_scn_ = old_tablet_meta.mds_checkpoint_scn_; transfer_info_ = old_tablet_meta.transfer_info_; extra_medium_info_ = old_tablet_meta.extra_medium_info_; @@ -227,6 +230,7 @@ int ObTabletMeta::init( ref_tablet_id_ = old_tablet_meta.ref_tablet_id_; create_scn_ = old_tablet_meta.create_scn_; create_schema_version_ = old_tablet_meta.create_schema_version_; + ddl_table_type_ = old_tablet_meta.ddl_table_type_; start_scn_ = old_tablet_meta.start_scn_; clog_checkpoint_scn_ = old_tablet_meta.clog_checkpoint_scn_; ddl_checkpoint_scn_ = old_tablet_meta.ddl_checkpoint_scn_; @@ -241,6 +245,7 @@ int ObTabletMeta::init( ddl_execution_id_ = old_tablet_meta.ddl_execution_id_; ddl_data_format_version_ = old_tablet_meta.ddl_data_format_version_; ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_; + ddl_table_type_ = old_tablet_meta.ddl_table_type_; max_sync_storage_schema_version_ = old_tablet_meta.max_sync_storage_schema_version_; max_serialized_medium_scn_ = old_tablet_meta.max_serialized_medium_scn_; mds_checkpoint_scn_ = SCN::max(flush_scn, old_tablet_meta.mds_checkpoint_scn_); @@ -300,6 +305,7 @@ int ObTabletMeta::init( max_serialized_medium_scn_ = param.max_serialized_medium_scn_; ddl_execution_id_ = param.ddl_execution_id_; ddl_data_format_version_ = param.ddl_data_format_version_; + ddl_table_type_ = param.ddl_table_type_; mds_checkpoint_scn_ = param.mds_checkpoint_scn_; transfer_info_ = param.transfer_info_; extra_medium_info_ = param.extra_medium_info_; @@ -360,6 +366,7 @@ int ObTabletMeta::assign(const ObTabletMeta &other) max_sync_storage_schema_version_ = other.max_sync_storage_schema_version_; ddl_execution_id_ = other.ddl_execution_id_; ddl_data_format_version_ = other.ddl_data_format_version_; + ddl_table_type_ = other.ddl_table_type_; max_serialized_medium_scn_ = other.max_serialized_medium_scn_; ddl_commit_scn_ = other.ddl_commit_scn_; mds_checkpoint_scn_ = other.mds_checkpoint_scn_; @@ -464,6 +471,7 @@ int ObTabletMeta::init( max_sync_storage_schema_version_ = max_sync_storage_schema_version; ddl_execution_id_ = old_tablet_meta.ddl_execution_id_; ddl_data_format_version_ = old_tablet_meta.ddl_data_format_version_; + ddl_table_type_ = old_tablet_meta.ddl_table_type_; max_serialized_medium_scn_ = MAX(old_tablet_meta.max_serialized_medium_scn_, OB_ISNULL(tablet_meta) ? 0 : tablet_meta->max_serialized_medium_scn_); ddl_commit_scn_ = old_tablet_meta.ddl_commit_scn_; @@ -517,6 +525,7 @@ void ObTabletMeta::reset() max_serialized_medium_scn_ = 0; ddl_execution_id_ = -1; ddl_data_format_version_ = 0; + ddl_table_type_ = ObITable::MAX_TABLE_TYPE; mds_checkpoint_scn_.reset(); transfer_info_.reset(); extra_medium_info_.reset(); @@ -548,7 +557,8 @@ bool ObTabletMeta::is_valid() const && start_scn_ >= INIT_CLOG_CHECKPOINT_SCN && start_scn_ <= clog_checkpoint_scn_)) && create_schema_version_ >= 0 - && space_usage_.is_valid(); + && space_usage_.is_valid() + && ObITable::is_valid_ddl_table_type(ddl_table_type_); } int ObTabletMeta::serialize(char *buf, const int64_t len, int64_t &pos) const @@ -634,6 +644,8 @@ int ObTabletMeta::serialize(char *buf, const int64_t len, int64_t &pos) const LOG_WARN("failed to serialize extra_medium_info", K(ret), K(len), K(new_pos), K_(extra_medium_info)); } else if (new_pos - pos < length && OB_FAIL(last_persisted_committed_tablet_status_.serialize(buf, len, new_pos))) { LOG_WARN("failed to serialize last_persisted_committed_tablet_status", K(ret), K(len), K(new_pos), K_(last_persisted_committed_tablet_status)); + } else if (new_pos - pos < length && OB_FAIL(serialization::encode(buf, len, new_pos, ddl_table_type_))) { + LOG_WARN("failed to serialize ddl table type", K(ret), K(len), K(new_pos), K_(ddl_table_type)); } else if (OB_UNLIKELY(length != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet meta's length doesn't match standard length", K(ret), K(new_pos), K(pos), K(length), K(length)); @@ -669,6 +681,7 @@ int ObTabletMeta::deserialize( } else if (TABLET_META_VERSION == version_) { int8_t compat_mode = -1; ddl_execution_id_ = 0; + ddl_table_type_ = ObITable::DDL_MEM_SSTABLE; if (OB_UNLIKELY(length_ > len - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("buffer's length is not enough", K(ret), K(length_), K(len - new_pos)); @@ -728,6 +741,8 @@ int ObTabletMeta::deserialize( LOG_WARN("failed to deserialize extra_medium_info", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(last_persisted_committed_tablet_status_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize last_persisted_committed_tablet_status", K(ret), K(len), K(new_pos)); + } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode(buf, len, new_pos, ddl_table_type_))) { + LOG_WARN("failed to deserialize ddl table type", K(ret), K(len), K(new_pos)); } else if (OB_UNLIKELY(length_ != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet's length doesn't match standard length", K(ret), K(new_pos), K(pos), K_(length)); @@ -778,6 +793,7 @@ int64_t ObTabletMeta::get_serialize_size() const size += space_usage_.get_serialize_size(); size += extra_medium_info_.get_serialize_size(); size += last_persisted_committed_tablet_status_.get_serialize_size(); + size += serialization::encoded_length(ddl_table_type_); return size; } @@ -978,6 +994,7 @@ ObMigrationTabletParam::ObMigrationTabletParam() mds_data_(), transfer_info_(), create_schema_version_(0), + ddl_table_type_(ObITable::MAX_TABLE_TYPE), allocator_("MigTblParam", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID(), ObCtxIds::DEFAULT_CTX_ID) { } @@ -1155,6 +1172,8 @@ int ObMigrationTabletParam::serialize(char *buf, const int64_t len, int64_t &pos LOG_WARN("failed to serialize extra_medium_info", K(ret), K(len), K(new_pos), K_(extra_medium_info)); } else if (PARAM_VERSION_V3 <= version_ && new_pos - pos < length && OB_FAIL(last_persisted_committed_tablet_status_.serialize(buf, len, new_pos))) { LOG_WARN("failed to serialize last_persisted_committed_tablet_status", K(ret), K(len), K(new_pos), K_(last_persisted_committed_tablet_status)); + } else if (PARAM_VERSION_V3 <= version_ && new_pos - pos < length && OB_FAIL(serialization::encode(buf, len, new_pos, ddl_table_type_))) { + LOG_WARN("failed to serialize ddk table type", K(ret), K(len), K(new_pos), K_(ddl_table_type)); } else if (OB_UNLIKELY(length != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("length doesn't match standard length", K(ret), K(new_pos), K(pos), K(length)); @@ -1242,6 +1261,8 @@ int ObMigrationTabletParam::deserialize_v2_v3(const char *buf, const int64_t len LOG_WARN("failed to deserialize extra_medium_info", K(ret), K(len), K(new_pos)); } else if (PARAM_VERSION_V3 <= version_ && new_pos - pos < length && OB_FAIL(last_persisted_committed_tablet_status_.deserialize(buf, len, new_pos))) { LOG_WARN("failed to deserialize last_persisted_committed_tablet_status", K(ret), K(len), K(new_pos)); + } else if (PARAM_VERSION_V3 <= version_ && new_pos - pos < length && OB_FAIL(serialization::decode(buf, len, new_pos, ddl_table_type_))) { + LOG_WARN("failed to deserialize ddl table type", K(ret), K(len), K(new_pos)); } else if (OB_UNLIKELY(length != new_pos - pos)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet's length doesn't match standard length", K(ret), K(new_pos), K(pos), K(length), KPC(this)); @@ -1440,6 +1461,7 @@ int64_t ObMigrationTabletParam::get_serialize_size() const if (PARAM_VERSION_V3 <= version_) { size += extra_medium_info_.get_serialize_size(); size += last_persisted_committed_tablet_status_.get_serialize_size(); + size += serialization::encoded_length(ddl_table_type_); } return size; } @@ -1479,6 +1501,7 @@ void ObMigrationTabletParam::reset() mds_data_.reset(); transfer_info_.reset(); create_schema_version_ = 0; + ddl_table_type_ = ObITable::MAX_TABLE_TYPE; allocator_.reset(); } @@ -1517,6 +1540,7 @@ int ObMigrationTabletParam::assign(const ObMigrationTabletParam ¶m) ddl_execution_id_ = param.ddl_execution_id_; ddl_data_format_version_ = param.ddl_data_format_version_; ddl_commit_scn_ = param.ddl_commit_scn_; + ddl_table_type_ = param.ddl_table_type_; mds_checkpoint_scn_ = param.mds_checkpoint_scn_; transfer_info_ = param.transfer_info_; extra_medium_info_ = param.extra_medium_info_; diff --git a/src/storage/tablet/ob_tablet_meta.h b/src/storage/tablet/ob_tablet_meta.h index 8b40011e7..ec7a1103d 100644 --- a/src/storage/tablet/ob_tablet_meta.h +++ b/src/storage/tablet/ob_tablet_meta.h @@ -149,7 +149,8 @@ public: K_(extra_medium_info), K_(last_persisted_committed_tablet_status), K_(create_schema_version), - K_(space_usage)); + K_(space_usage), + K_(ddl_table_type)); public: int32_t version_; @@ -185,6 +186,13 @@ public: ObTabletCreateDeleteMdsUserData last_persisted_committed_tablet_status_; // quick access for tablet status in sstables ObTabletSpaceUsage space_usage_; // calculated by tablet persist, ObMigrationTabletParam doesn't need it int64_t create_schema_version_; // add after 4.2, record schema_version when first create tablet. NEED COMPAT + // add after 4.3.3, is used to decide storage type for replaying ddl clog and create ddl dump sstable in cs replica. + // when offline ddl is concurrent with adding C-Replica, it may write row store clog, but storage schema in C-Replica is columnar. + // so need persist a field in tablet when replaying start log to decide table_type when restart from a checkpoint, or migrating, etc. + // - DDL_MEM_SSTABLE: initial state, tablet not doing offline ddl. only take this type for inital, unrelated to memtable. + // - DDL_DUMP_SSTABLE/DDL_MERGE_CO_SSTABLE: tablet is doing offline ddl, indicate target storage type for ddl dump sstable. + // - MAJOR_SSTABLE/COLUMN_ORIENTED_SSTABLE: tablet finish offline ddl, set when ddl merge task create major sstable. + ObITable::TableType ddl_table_type_; //ATTENTION : Add a new variable need consider ObMigrationTabletParam // and tablet meta init interface for migration. // yuque : @@ -322,6 +330,7 @@ public: ObTabletFullMemoryMdsData mds_data_; ObTabletTransferInfo transfer_info_; int64_t create_schema_version_; + ObITable::TableType ddl_table_type_; // Add new serialization member before this line, below members won't serialize common::ObArenaAllocator allocator_; // for storage schema diff --git a/src/storage/tablet/ob_tablet_table_store.cpp b/src/storage/tablet/ob_tablet_table_store.cpp index 876ca661a..ef56e9395 100644 --- a/src/storage/tablet/ob_tablet_table_store.cpp +++ b/src/storage/tablet/ob_tablet_table_store.cpp @@ -1356,24 +1356,20 @@ int ObTabletTableStore::build_major_tables( && OB_FAIL(major_tables.push_back(new_table))) { LOG_WARN("failed to add table into tables handle", K(ret), K(param)); } else if (OB_FAIL(inner_build_major_tables_(allocator, old_store, major_tables, - param.multi_version_start_, param.allow_duplicate_sstable_, inc_base_snapshot_version))) { + param.multi_version_start_, param.allow_duplicate_sstable_, inc_base_snapshot_version, is_convert_co_major_merge(param.merge_type_)))) { LOG_WARN("failed to inner build major tables", K(ret), K(param), K(major_tables)); } } return ret; } -int ObTabletTableStore::inner_build_major_tables_( - common::ObArenaAllocator &allocator, +int ObTabletTableStore::check_and_build_new_major_tables( const ObTabletTableStore &old_store, const ObIArray &tables_array, - const int64_t multi_version_start, const bool allow_duplicate_sstable, - int64_t &inc_base_snapshot_version) + ObIArray &major_tables) const { int ret = OB_SUCCESS; - inc_base_snapshot_version = -1; - ObSEArray major_tables; bool need_add = true; if (!old_store.major_tables_.empty() && OB_FAIL(old_store.major_tables_.get_all_tables(major_tables))) { @@ -1417,6 +1413,32 @@ int ObTabletTableStore::inner_build_major_tables_( } } } + return ret; +} + +int ObTabletTableStore::inner_build_major_tables_( + common::ObArenaAllocator &allocator, + const ObTabletTableStore &old_store, + const ObIArray &tables_array, + const int64_t multi_version_start, + const bool allow_duplicate_sstable, + int64_t &inc_base_snapshot_version, + bool replace_old_row_store_major /*= false*/) +{ + int ret = OB_SUCCESS; + inc_base_snapshot_version = -1; + ObSEArray major_tables; + + if (OB_UNLIKELY(!old_store.major_tables_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("major in old store is invalid", K(ret), K(old_store)); + } else if (replace_old_row_store_major) { + if (OB_FAIL(old_store.major_tables_.replace_twin_majors_and_build_new(tables_array, major_tables))) { + LOG_WARN("failed to replace twin row store majors", K(ret), K(old_store), K(tables_array)); + } + } else if (OB_FAIL(check_and_build_new_major_tables(old_store, tables_array, allow_duplicate_sstable, major_tables))) { + LOG_WARN("failed to check and add new major tables", K(ret), K(old_store), K(tables_array)); + } if (OB_FAIL(ret)) { } else if (OB_FAIL(ObTableStoreUtil::sort_major_tables(major_tables))) { diff --git a/src/storage/tablet/ob_tablet_table_store.h b/src/storage/tablet/ob_tablet_table_store.h index 63263d572..cebf72002 100644 --- a/src/storage/tablet/ob_tablet_table_store.h +++ b/src/storage/tablet/ob_tablet_table_store.h @@ -269,7 +269,13 @@ private: const ObIArray &tables_array, const int64_t multi_version_start, const bool allow_duplicate_sstable, - int64_t &inc_base_snapshot_version); + int64_t &inc_base_snapshot_version, + bool replace_old_row_store_major = false); + int check_and_build_new_major_tables( + const ObTabletTableStore &old_store, + const ObIArray &tables_array, + const bool allow_duplicate_sstable, + ObIArray &major_tables) const; int inner_replace_remote_major_sstable_( common::ObArenaAllocator &allocator, const ObTabletTableStore &old_store, diff --git a/src/storage/tx_storage/ob_ls_service.cpp b/src/storage/tx_storage/ob_ls_service.cpp index 5f7566718..6f21c664f 100644 --- a/src/storage/tx_storage/ob_ls_service.cpp +++ b/src/storage/tx_storage/ob_ls_service.cpp @@ -14,6 +14,7 @@ #include "lib/guard/ob_shared_guard.h" #include "logservice/ob_garbage_collector.h" +#include "logservice/ob_log_service.h" #include "observer/ob_service.h" #include "observer/ob_srv_network_frame.h" #include "share/rc/ob_tenant_module_init_ctx.h" @@ -421,6 +422,7 @@ int ObLSService::inner_create_ls_(const share::ObLSID &lsid, const ObMigrationStatus &migration_status, const ObLSRestoreStatus &restore_status, const SCN &create_scn, + const ObLSStoreFormat &store_format, ObLS *&ls) { int ret = OB_SUCCESS; @@ -438,6 +440,7 @@ int ObLSService::inner_create_ls_(const share::ObLSID &lsid, migration_status, restore_status, create_scn, + store_format, rs_reporter_))) { LOG_WARN("fail to init ls", K(ret), K(lsid)); } @@ -980,6 +983,7 @@ int ObLSService::replay_create_ls_(const ObLSMeta &ls_meta) migration_status, restore_status, ls_meta.get_clog_checkpoint_scn(), + ls_meta.get_store_format(), ls))) { LOG_WARN("fail to inner create ls", K(ret), K(ls_meta.ls_id_)); } else if (FALSE_IT(state = ObLSCreateState::CREATE_STATE_INNER_CREATED)) { @@ -1036,6 +1040,86 @@ int ObLSService::get_ls( return ret; } +int ObLSService::get_ls_replica( + const ObLSID &ls_id, + ObLSGetMod mod, + share::ObLSReplica &replica) +{ + int ret = OB_SUCCESS; + replica.reset(); + const uint64_t tenant_id = MTL_ID(); + ObLSHandle ls_handle; + ObLS *ls = NULL; + ObLogService *log_service = MTL(ObLogService*); + common::ObRole role = FOLLOWER; + ObMemberList ob_member_list; + ObLSReplica::MemberList member_list; + GlobalLearnerList learner_list; + int64_t proposal_id = 0; + int64_t paxos_replica_number = 0; + ObLSRestoreStatus restore_status; + ObReplicaStatus replica_status = REPLICA_STATUS_NORMAL; + ObReplicaType replica_type = REPLICA_TYPE_FULL; + ObMigrationStatus migration_status = OB_MIGRATION_STATUS_MAX; + uint64_t unit_id = common::OB_INVALID_ID; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_UNLIKELY(!ls_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(ls_id)); + } else if (OB_ISNULL(log_service) || OB_ISNULL(GCTX.config_) || OB_ISNULL(GCTX.omt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null ptr", KR(ret), KP(log_service), KP(GCTX.config_), KP(GCTX.omt_)); + } else if (OB_FAIL(get_ls(ls_id, ls_handle, mod))) { + LOG_WARN("get ls handle failed", KR(ret), K(ls_id), K(mod)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", KR(ret), K(ls_id), KP(ls)); + } else if (OB_FAIL(ls->get_paxos_member_list_and_learner_list(ob_member_list, paxos_replica_number, learner_list))) { + LOG_WARN("get member list and learner list from ObLS failed", KR(ret)); + } else if (OB_FAIL(ls->get_restore_status(restore_status))) { + LOG_WARN("get restore status failed", KR(ret)); + } else if (OB_FAIL(ls->get_migration_status(migration_status))) { + LOG_WARN("get migration status failed", KR(ret)); + } else if (OB_FAIL(ls->get_replica_status(replica_status))) { + LOG_WARN("get replica status failed", KR(ret)); + } else if (OB_FAIL(log_service->get_palf_role(ls_id, role, proposal_id))) { + LOG_WARN("failed to get role from palf", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(get_replica_type_(GCTX.self_addr(), ob_member_list, learner_list, + ls->get_store_format(), replica_type))) { + LOG_WARN("fail to get replica_type by member and learner list", KR(ret)); + } else if (OB_FAIL(ObLSReplica::transform_ob_member_list(ob_member_list, member_list))) { + LOG_WARN("fail to transfrom ob_member_list into member_list", KR(ret), K(ob_member_list)); + } else if (OB_FAIL(GCTX.omt_->get_unit_id(tenant_id, unit_id))) { + LOG_WARN("get tenant unit id failed", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(replica.init( + 0, /*create_time_us*/ + 0, /*modify_time_us*/ + tenant_id, /*tenant_id*/ + ls_id, /*ls_id*/ + GCTX.self_addr(), /*server*/ + GCTX.config_->mysql_port, /*sql_port*/ + role, /*role*/ + replica_type, /*replica_type*/ + proposal_id, /*proposal_id*/ + is_strong_leader(role) ? REPLICA_STATUS_NORMAL : replica_status,/*replica_status*/ + restore_status, /*restore_status*/ + 100, /*memstore_percent*/ + unit_id, /*unit_id*/ + GCTX.config_->zone.str(), /*zone*/ + paxos_replica_number, /*paxos_replica_number*/ + 0, /*data_size*/ + 0, /*required_size*/ + member_list, + learner_list, + OB_MIGRATION_STATUS_REBUILD == migration_status /*is_rebuild*/))) { + LOG_WARN("fail to init a ls replica", KR(ret), K(tenant_id), K(ls_id), K(role), + K(proposal_id), K(unit_id), K(paxos_replica_number), K(member_list), K(learner_list)); + } + return ret; +} + int ObLSService::remove_ls(const share::ObLSID &ls_id) { int ret = OB_SUCCESS; @@ -1219,6 +1303,9 @@ int ObLSService::create_ls_(const ObCreateLSCommonArg &arg, bool ls_exist = false; bool waiting_destroy = false; int64_t process_point = 0; + const ObLSStoreFormat ls_store_format = (REPLICA_TYPE_COLUMNSTORE == arg.replica_type_) ? + common::ObLSStoreType::OB_LS_STORE_COLUMN_ONLY + : common::ObLSStoreType::OB_LS_STORE_NORMAL; if (IS_NOT_INIT) { ret = OB_NOT_INIT; @@ -1254,8 +1341,9 @@ int ObLSService::create_ls_(const ObCreateLSCommonArg &arg, arg.migration_status_, arg.restore_status_, arg.create_scn_, + ls_store_format, ls))) { - LOG_WARN("create ls failed", K(ret), K(arg.ls_id_)); + LOG_WARN("create ls failed", K(ret), K(arg.ls_id_), K(ls_store_format)); } else { state = ObLSCreateState::CREATE_STATE_INNER_CREATED; ObLSLockGuard lock_ls(ls); @@ -1648,6 +1736,36 @@ int ObLSService::dump_ls_info() return ret; } +// this function is expected to not fail +int ObLSService::get_replica_type_( + const common::ObAddr &addr, + const ObMemberList &ob_member_list, + const GlobalLearnerList &learner_list, + const common::ObLSStoreFormat &ls_store_format, + ObReplicaType &replica_type) +{ + int ret = OB_SUCCESS; + const bool is_columnstore = ls_store_format.is_columnstore(); + const bool in_member_list = ob_member_list.contains(addr); + const bool in_learner_list = learner_list.contains(addr); + if (is_columnstore) { + replica_type = REPLICA_TYPE_COLUMNSTORE; + if (in_member_list) { + LOG_WARN("columnstore replica member in member_list is unexpected", + K(addr), K(ob_member_list), K(learner_list)); + } + } else { + // if replica exists in learner_list, report it as R-replica. + // Otherwise, report as F-replica + if (in_learner_list) { + replica_type = REPLICA_TYPE_READONLY; + } else { + replica_type = REPLICA_TYPE_FULL; + } + } + return ret; +} + } // storage } // oceanbase diff --git a/src/storage/tx_storage/ob_ls_service.h b/src/storage/tx_storage/ob_ls_service.h index 9e4bda068..902ac7ba2 100644 --- a/src/storage/tx_storage/ob_ls_service.h +++ b/src/storage/tx_storage/ob_ls_service.h @@ -107,6 +107,10 @@ public: int get_ls(const share::ObLSID &ls_id, ObLSHandle &handle, ObLSGetMod mod); + int get_ls_replica( + const ObLSID &ls_id, + ObLSGetMod mod, + share::ObLSReplica &replica); // @param [in] func, iterate all ls diagnose info int iterate_diagnose(const ObFunction &func); @@ -190,6 +194,7 @@ private: const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const share::SCN &create_scn, + const ObLSStoreFormat &store_format, ObLS *&ls); int inner_del_ls_(ObLS *&ls); int add_ls_to_map_(ObLS *ls); @@ -221,6 +226,13 @@ private: int cal_min_phy_resource_needed_(const int64_t ls_cnt, ObMinPhyResourceResult &min_phy_res); int get_resource_constraint_value_(ObResoureConstraintValue &constraint_value); + // for get_ls_replica + int get_replica_type_( + const common::ObAddr &addr, + const ObMemberList &ob_member_list, + const GlobalLearnerList &learner_list, + const common::ObLSStoreFormat &ls_store_format, + ObReplicaType &replica_type); private: bool is_inited_; diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result index 4eb37c6a4..c6f848b4d 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_sys_views_in_sys.result @@ -2360,7 +2360,7 @@ MODIFY_TIME timestamp(6) YES UNIT_COUNT bigint(20) NO NULL UNIT_CONFIG_ID bigint(20) NO NULL ZONE_LIST varchar(8192) NO NULL -REPLICA_TYPE varchar(18) NO +REPLICA_TYPE varchar(4) NO select /*+QUERY_TIMEOUT(60000000)*/ count(*) as cnt from (select * from oceanbase.DBA_OB_RESOURCE_POOLS limit 1); cnt 1 diff --git a/unittest/observer/table/test_create_executor.cpp b/unittest/observer/table/test_create_executor.cpp index 3ec322276..71185c785 100644 --- a/unittest/observer/table/test_create_executor.cpp +++ b/unittest/observer/table/test_create_executor.cpp @@ -55,7 +55,7 @@ void fill_table_schema(ObTableSchema &table) table.set_part_level(PARTITION_LEVEL_TWO); table.set_charset_type(CHARSET_UTF8MB4); table.set_collation_type(CS_TYPE_UTF8MB4_BIN); - table.set_table_type(USER_TABLE); + table.set_table_type(USER_VIEW); table.set_index_type(INDEX_TYPE_IS_NOT); table.set_index_status(INDEX_STATUS_AVAILABLE); table.set_data_table_id(0); @@ -182,6 +182,7 @@ void TestCreateExecutor::fake_ctx_init_common(ObTableCtx &fake_ctx, ObTableSchem g_sess_node_val.sess_info_.test_init(0, 0, 0, NULL); g_sess_node_val.sess_info_.load_all_sys_vars(schema_guard_); fake_ctx.init_physical_plan_ctx(0, 1); + fake_ctx.loc_meta_.route_policy_ = ObRoutePolicyType::READONLY_ZONE_FIRST; ASSERT_EQ(OB_SUCCESS, fake_ctx.construct_column_items()); } diff --git a/unittest/storage/migration/test_migration.h b/unittest/storage/migration/test_migration.h index 1f23f8d12..19a64615c 100644 --- a/unittest/storage/migration/test_migration.h +++ b/unittest/storage/migration/test_migration.h @@ -385,6 +385,7 @@ static int mock_valid_ls_meta(obrpc::ObFetchLSMetaInfoResp &res) res.ls_meta_package_.ls_meta_.clog_checkpoint_scn_.set_base(); res.ls_meta_package_.ls_meta_.migration_status_ = ObMigrationStatus::OB_MIGRATION_STATUS_NONE; res.ls_meta_package_.ls_meta_.restore_status_ = share::ObLSRestoreStatus::NONE; + res.ls_meta_package_.ls_meta_.store_format_ = common::ObLSStoreType::OB_LS_STORE_NORMAL; res.ls_meta_package_.dup_ls_meta_.ls_id_ = ls_id; const palf::LSN lsn(184467440737095516); res.ls_meta_package_.palf_meta_.prev_log_info_.lsn_ = lsn; diff --git a/unittest/storage/test_compaction_policy.cpp b/unittest/storage/test_compaction_policy.cpp index 0ccfcf188..e6ca5f6dc 100644 --- a/unittest/storage/test_compaction_policy.cpp +++ b/unittest/storage/test_compaction_policy.cpp @@ -423,6 +423,7 @@ int TestCompactionPolicy::mock_tablet( ObArenaAllocator arena_allocator; ObCreateTabletSchema create_tablet_schema; bool need_empty_major_table = false; + bool need_generate_cs_replica_cg_array = false; if (OB_ISNULL(t3m)) { ret = OB_ERR_UNEXPECTED; @@ -439,8 +440,9 @@ int TestCompactionPolicy::mock_tablet( } else if (OB_FAIL(create_tablet_schema.init(arena_allocator, table_schema, compat_mode, false/*skip_column_info*/, ObCreateTabletSchema::STORAGE_SCHEMA_VERSION_V3))) { LOG_WARN("failed to init storage schema", KR(ret), K(table_schema)); + } else if (FALSE_IT(need_generate_cs_replica_cg_array = ls_handle.get_ls()->is_cs_replica() && create_tablet_schema.is_row_store() && create_tablet_schema.is_user_data_table())) { } else if (OB_FAIL(tablet->init_for_first_time_creation(allocator, ls_id, tablet_id, tablet_id, - SCN::min_scn(), snapshot_version, create_tablet_schema, need_empty_major_table, ls_handle.get_ls()->get_freezer()))) { + SCN::min_scn(), snapshot_version, create_tablet_schema, need_empty_major_table, need_generate_cs_replica_cg_array, ls_handle.get_ls()->get_freezer()))) { LOG_WARN("failed to init tablet", K(ret), K(ls_id), K(tablet_id), K(snapshot_version), K(table_schema), K(compat_mode)); } else { diff --git a/unittest/storage/test_tablet_helper.h b/unittest/storage/test_tablet_helper.h index 6dc67bd66..064625bcf 100644 --- a/unittest/storage/test_tablet_helper.h +++ b/unittest/storage/test_tablet_helper.h @@ -130,12 +130,14 @@ inline int TestTabletHelper::create_tablet( const ObTabletMapKey key(ls_id, tablet_id); const bool need_create_empty_major_sstable = !(create_tablet_schema.is_user_hidden_table() || (create_tablet_schema.is_index_table() && !create_tablet_schema.can_read_index())); + const bool need_generate_cs_replica_cg_array = + ls_handle.get_ls()->is_cs_replica() && create_tablet_schema.is_row_store() && create_tablet_schema.is_user_data_table(); if (OB_FAIL(t3m->create_msd_tablet(WashTabletPriority::WTP_HIGH, key, ls_handle, tablet_handle))) { STORAGE_LOG(WARN, "t3m acquire tablet failed", K(ret), K(ls_id), K(tablet_id)); } else if (OB_FAIL(tablet_handle.get_obj()->init_for_first_time_creation( *tablet_handle.get_allocator(), ls_id, tablet_id, tablet_id, share::SCN::base_scn(), - snapshot_version, create_tablet_schema, need_create_empty_major_sstable, freezer))){ + snapshot_version, create_tablet_schema, need_create_empty_major_sstable, need_generate_cs_replica_cg_array, freezer))){ STORAGE_LOG(WARN, "failed to init tablet", K(ret), K(ls_id), K(tablet_id)); } else if (ObTabletStatus::Status::MAX != tablet_status) { ObTabletCreateDeleteMdsUserData data; diff --git a/unittest/storage/test_tablet_pointer_map.cpp b/unittest/storage/test_tablet_pointer_map.cpp index 6159f1749..33c5e4547 100644 --- a/unittest/storage/test_tablet_pointer_map.cpp +++ b/unittest/storage/test_tablet_pointer_map.cpp @@ -109,6 +109,7 @@ void TestMetaPointerMap::FakeLs(ObLS &ls) ls.ls_meta_.migration_status_ = ObMigrationStatus::OB_MIGRATION_STATUS_NONE; ls.ls_meta_.restore_status_ = ObLSRestoreStatus::NONE; ls.ls_meta_.rebuild_seq_ = 0; + ls.ls_meta_.store_format_ = common::ObLSStoreType::OB_LS_STORE_NORMAL; } class CalculateSize final