leverage a better performing interface to get tablet ls mapping

This commit is contained in:
LiefB 2023-08-04 09:12:29 +00:00 committed by ob-robot
parent f87b31f4e7
commit 55d1e8825e
6 changed files with 154 additions and 96 deletions

View File

@ -668,7 +668,6 @@ int ObCrossClusterTabletChecksumValidator::check_cross_cluster_checksum(
} else if (OB_UNLIKELY(tablet_ids.count() < 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get tablet_ids of current table schema", KR(ret), K_(tenant_id), K(simple_schema));
} else if (FALSE_IT(sort_tablet_ids(tablet_ids))) { // tablet_ids should be in order
} else if (OB_FAIL(ObTabletReplicaChecksumOperator::get_tablet_ls_pairs(tenant_id_,
simple_schema.get_table_id(), *sql_proxy_, tablet_ids, pairs))) {
LOG_WARN("fail to get tablet_ls pairs", KR(ret), K_(tenant_id), "table_id",
@ -708,55 +707,46 @@ int ObCrossClusterTabletChecksumValidator::check_cross_cluster_checksum(
return ret;
}
void ObCrossClusterTabletChecksumValidator::sort_tablet_ids(ObArray<ObTabletID> &tablet_ids)
{
std::sort(tablet_ids.begin(), tablet_ids.end());
}
int ObCrossClusterTabletChecksumValidator::check_column_checksum(
const ObArray<ObTabletReplicaChecksumItem> &tablet_replica_checksum_items,
const ObArray<ObTabletChecksumItem> &tablet_checksum_items)
{
int ret = OB_SUCCESS;
int check_ret = OB_SUCCESS;
int cmp_ret = 0;
ObTabletChecksumItem tablet_checksum_item;
ObTabletReplicaChecksumItem tablet_replica_checksum_item;
int64_t i = 0;
int64_t j = 0;
int64_t tablet_checksum_item_cnt = tablet_checksum_items.count();
int64_t tablet_replica_checksum_item_cnt = tablet_replica_checksum_items.count();
while (OB_SUCC(ret) && (i < tablet_checksum_item_cnt) && (j < tablet_replica_checksum_item_cnt)) {
cmp_ret = 0;
tablet_checksum_item.reset();
if (OB_FAIL(tablet_checksum_item.assign(tablet_checksum_items.at(i)))) {
LOG_WARN("fail to assign tablet checksum item", KR(ret), K_(tenant_id), K(i));
} else {
do {
if (cmp_ret >= 0) { // iterator all tablet replica checksum util next different tablet.
tablet_replica_checksum_item.reset();
if (OB_FAIL(tablet_replica_checksum_item.assign(tablet_replica_checksum_items.at(j)))) {
LOG_WARN("fail to assign tablet replica checksum item", KR(ret), K_(tenant_id), K(j));
} else if (0 == (cmp_ret = tablet_checksum_item.compare_tablet(tablet_replica_checksum_item))) {
if (OB_FAIL(tablet_checksum_item.verify_tablet_column_checksum(tablet_replica_checksum_item))) {
if (OB_CHECKSUM_ERROR == ret) {
LOG_DBA_ERROR(OB_CHECKSUM_ERROR, "msg", "ERROR! ERROR! ERROR! checksum error in cross-cluster checksum",
K(tablet_checksum_item), K(tablet_replica_checksum_item));
check_ret = OB_CHECKSUM_ERROR;
ret = OB_SUCCESS; // continue checking next checksum
} else {
LOG_WARN("unexpected error in cross-cluster checksum", KR(ret),
K(tablet_checksum_item), K(tablet_replica_checksum_item));
}
}
}
hash::ObHashMap<ObTabletLSPair, ObTabletChecksumItem> tablet_checksum_items_map;
if (OB_FAIL(tablet_checksum_items_map.create(500, "MFTatCkmMap", "MFTatCkmMap", tenant_id_))) {
LOG_WARN("fail to create tablet checksum items map", KR(ret), K_(tenant_id));
} else if (OB_FAIL(convert_array_to_map(tablet_checksum_items, tablet_checksum_items_map))) {
LOG_WARN("fail to convert array to map", KR(ret));
} else {
const int64_t tablet_replica_checksum_item_cnt = tablet_replica_checksum_items.count();
for (int64_t i = 0; (i < tablet_replica_checksum_item_cnt) && OB_SUCC(ret); ++i) {
const ObTabletReplicaChecksumItem &tablet_replica_checksum_item = tablet_replica_checksum_items.at(i);
ObTabletLSPair pair(tablet_replica_checksum_item.tablet_id_, tablet_replica_checksum_item.ls_id_);
ObTabletChecksumItem tablet_checksum_item;
// ObHashMap::get_refactored calls assign function of key_type and value_type.
// ObTabletChecksumItem::assign should ensure the item is valid, thus construct one valid item
if (OB_FAIL(construct_valid_tablet_checksum_item(tablet_checksum_item))) {
LOG_WARN("fail to construct valid tablet checksum item", KR(ret));
} else if (OB_FAIL(tablet_checksum_items_map.get_refactored(pair, tablet_checksum_item))) {
if (OB_ENTRY_NOT_EXIST == ret) {
// ignore ret and skip this tablet checksum. this may be caused by timeouted wait of tablet checksum
ret = OB_SUCCESS;
} else {
LOG_WARN("fail to get_refactored", KR(ret), K(pair));
}
if (cmp_ret >= 0) {
++j;
} else if (OB_FAIL(tablet_checksum_item.verify_tablet_column_checksum(tablet_replica_checksum_item))) {
if (OB_CHECKSUM_ERROR == ret) {
LOG_DBA_ERROR(OB_CHECKSUM_ERROR, "msg", "ERROR! ERROR! ERROR! checksum error in cross-cluster checksum",
K(tablet_checksum_item), K(tablet_replica_checksum_item));
check_ret = OB_CHECKSUM_ERROR;
ret = OB_SUCCESS; // continue checking next checksum
} else {
LOG_WARN("unexpected error in cross-cluster checksum", KR(ret),
K(tablet_checksum_item), K(tablet_replica_checksum_item));
}
} while ((cmp_ret >= 0) && (j < tablet_replica_checksum_item_cnt) && OB_SUCC(ret));
}
}
++i;
}
if (OB_CHECKSUM_ERROR == check_ret) {
ret = OB_CHECKSUM_ERROR;
@ -764,6 +754,42 @@ int ObCrossClusterTabletChecksumValidator::check_column_checksum(
return ret;
}
int ObCrossClusterTabletChecksumValidator::convert_array_to_map(
const ObArray<ObTabletChecksumItem> &tablet_checksum_items,
hash::ObHashMap<ObTabletLSPair, ObTabletChecksumItem> &tablet_checksum_items_map)
{
int ret = OB_SUCCESS;
const int64_t tablet_checksum_item_cnt = tablet_checksum_items.count();
for (int64_t i = 0; (i < tablet_checksum_item_cnt) && OB_SUCC(ret); ++i) {
const ObTabletChecksumItem &item = tablet_checksum_items.at(i);
ObTabletLSPair pair(item.tablet_id_, item.ls_id_);
if (OB_FAIL(tablet_checksum_items_map.set_refactored(pair, item, false/*overwrite*/))) {
LOG_WARN("fail to set_refactored", KR(ret), K(pair), K(item));
}
}
return ret;
}
int ObCrossClusterTabletChecksumValidator::construct_valid_tablet_checksum_item(
ObTabletChecksumItem &tablet_checksum_item)
{
int ret = OB_SUCCESS;
ObTabletID fake_tablet_id(1);
ObLSID fake_ls_id(1);
ObTabletReplicaReportColumnMeta fake_column_meta;
tablet_checksum_item.tablet_id_ = fake_tablet_id;
tablet_checksum_item.ls_id_ = fake_ls_id;
ObArray<int64_t> fake_column_checksums;
if (OB_FAIL(fake_column_checksums.push_back(0))) {
LOG_WARN("fail to push back column checksums", KR(ret));
} else if (OB_FAIL(fake_column_meta.init(fake_column_checksums))) {
LOG_WARN("fail to init column meta", KR(ret));
} else if (OB_FAIL(tablet_checksum_item.column_meta_.assign(fake_column_meta))) {
LOG_WARN("fail to assign column meta", KR(ret), K(fake_column_meta));
}
return ret;
}
bool ObCrossClusterTabletChecksumValidator::is_first_tablet_in_sys_ls(const ObTabletReplicaChecksumItem &item) const
{
// mark tablet_id=1 && ls_id=1 as end flag

View File

@ -188,7 +188,6 @@ private:
ObMergeTimeStatistics &merge_time_statistics);
int check_cross_cluster_checksum(const share::schema::ObSimpleTableSchemaV2 &simple_schema,
const share::SCN &frozen_scn);
void sort_tablet_ids(ObArray<ObTabletID> &tablet_ids);
int check_column_checksum(const ObArray<share::ObTabletReplicaChecksumItem> &tablet_replica_checksum_items,
const ObArray<share::ObTabletChecksumItem> &tablet_checksum_items);
bool is_first_tablet_in_sys_ls(const share::ObTabletReplicaChecksumItem &item) const;
@ -207,6 +206,9 @@ private:
const int64_t expected_epoch);
int contains_first_tablet_in_sys_ls(const ObArray<share::ObTabletLSPair> &pairs,
bool &is_containing) const;
int convert_array_to_map(const ObArray<share::ObTabletChecksumItem> &tablet_checksum_items,
hash::ObHashMap<share::ObTabletLSPair, share::ObTabletChecksumItem> &tablet_checksum_items_map);
int construct_valid_tablet_checksum_item(share::ObTabletChecksumItem &tablet_checksum_item);
private:
const static int64_t MAX_BATCH_INSERT_COUNT = 100;

View File

@ -68,23 +68,6 @@ bool ObTabletChecksumItem::is_same_tablet(const ObTabletChecksumItem &item) cons
&& (ls_id_ == item.ls_id_);
}
int ObTabletChecksumItem::compare_tablet(const ObTabletReplicaChecksumItem &replica_item) const
{
int ret = 0;
if (tablet_id_.id() < replica_item.tablet_id_.id()) {
ret = -1;
} else if (tablet_id_.id() > replica_item.tablet_id_.id()) {
ret = 1;
} else {
if (ls_id_.id() < replica_item.ls_id_.id()) {
ret = -1;
} else if (ls_id_.id() > replica_item.ls_id_.id()) {
ret = 1;
}
}
return ret;
}
int ObTabletChecksumItem::verify_tablet_column_checksum(const ObTabletReplicaChecksumItem &replica_item) const
{
int ret = OB_SUCCESS;

View File

@ -40,7 +40,6 @@ public:
void reset();
bool is_valid() const;
bool is_same_tablet(const ObTabletChecksumItem &item) const;
int compare_tablet(const ObTabletReplicaChecksumItem &replica_item) const;
int verify_tablet_column_checksum(const ObTabletReplicaChecksumItem &replica_item) const;
int assign(const ObTabletReplicaChecksumItem &replica_item);
int assign(const ObTabletChecksumItem &other);

View File

@ -1181,8 +1181,10 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum(
SMART_VARS_2((ObArray<ObTabletReplicaChecksumItem>, data_table_ckm_items),
(ObArray<ObTabletReplicaChecksumItem>, index_table_ckm_items)) {
SMART_VARS_2((ObArray<ObTabletLSPair>, data_table_tablets),
(ObArray<ObTabletLSPair>, index_table_tablets)) {
SMART_VARS_4((ObArray<ObTabletLSPair>, data_table_tablets),
(ObArray<ObTabletLSPair>, index_table_tablets),
(ObArray<ObTabletID>, index_table_tablet_ids),
(ObArray<ObTabletID>, data_table_tablet_ids)) {
if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, index_table_schema, compaction_scn,
index_table_tablets, index_table_ckm_items))) {
LOG_WARN("fail to get index table tablet replica ckm_items", KR(ret), K(tenant_id), K(compaction_scn),
@ -1191,7 +1193,7 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum(
data_table_tablets, data_table_ckm_items))) {
LOG_WARN("fail to get data table tablet replica ckm_items", KR(ret), K(tenant_id), K(compaction_scn),
K(data_table_id));
} else if (data_table_tablets.count() != index_table_tablets.count()) {
} else if (OB_UNLIKELY(data_table_tablets.count() != index_table_tablets.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet count of local index table is not same with data table", KR(ret), "data_table_tablet_cnt",
data_table_tablets.count(), "index_table_tablet_cnt", index_table_tablets.count());
@ -1205,26 +1207,57 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum(
LOG_WARN("fail to check need verfy checksum", KR(ret), K(compaction_scn), K(index_table_id), K(data_table_id));
} else if (!need_verify) {
LOG_INFO("do not need verify checksum", K(index_table_id), K(data_table_id), K(compaction_scn));
} else if (OB_FAIL(get_table_all_tablet_ids_(index_table_schema, index_table_tablet_ids))) {
LOG_WARN("fail to get index table all tablet ids", KR(ret), K(index_table_schema));
} else if (OB_FAIL(get_table_all_tablet_ids_(data_table_schema, data_table_tablet_ids))) {
LOG_WARN("fail to get data table all tablet ids", KR(ret), K(data_table_schema));
} else if (OB_UNLIKELY((data_table_tablet_ids.count() != index_table_tablet_ids.count())
|| (data_table_tablets.count() != data_table_tablet_ids.count()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid tablet_ids count or tablet_ls_pair count", KR(ret), "data_table_tablet_id_cnt",
data_table_tablet_ids.count(), "index_table_tablet_id_cnt", index_table_tablet_ids.count(),
"data_table_tablet_ls_pair_count", data_table_tablets.count());
} else {
// map elemant: <tablet_id, tablet_ls_pair>
hash::ObHashMap<ObTabletID, ObTabletLSPair> data_tablet_ls_pair_map;
hash::ObHashMap<ObTabletID, ObTabletLSPair> index_tablet_ls_pair_map;
// map element: <column_id, checksum>
hash::ObHashMap<int64_t, int64_t> data_column_ckm_map;
hash::ObHashMap<int64_t, int64_t> index_column_ckm_map;
if (OB_FAIL(data_column_ckm_map.create(default_column_cnt, ObModIds::OB_CHECKSUM_CHECKER))) {
LOG_WARN("fail to create data table column ckm map", KR(ret), K(default_column_cnt));
} else if (OB_FAIL(index_column_ckm_map.create(default_column_cnt, ObModIds::OB_CHECKSUM_CHECKER))) {
LOG_WARN("fail to create index table column ckm map", KR(ret), K(default_column_cnt));
}
if (OB_FAIL(data_tablet_ls_pair_map.create(500, ObModIds::OB_CHECKSUM_CHECKER,
ObModIds::OB_CHECKSUM_CHECKER, tenant_id))) {
LOG_WARN("fail to create data tablet ls pair map", KR(ret), K(default_column_cnt), K(tenant_id));
} else if (OB_FAIL(index_tablet_ls_pair_map.create(500, ObModIds::OB_CHECKSUM_CHECKER,
ObModIds::OB_CHECKSUM_CHECKER, tenant_id))) {
LOG_WARN("fail to create index tablet ls pair map", KR(ret), K(default_column_cnt), K(tenant_id));
} else if (OB_FAIL(convert_array_to_map(data_table_tablets, data_tablet_ls_pair_map))) {
LOG_WARN("fail to convert array to map", KR(ret));
} else if (OB_FAIL(convert_array_to_map(index_table_tablets, index_tablet_ls_pair_map))) {
LOG_WARN("fail to convert array to map", KR(ret));
} else if (OB_FAIL(data_column_ckm_map.create(default_column_cnt,
ObModIds::OB_CHECKSUM_CHECKER, ObModIds::OB_CHECKSUM_CHECKER, tenant_id))) {
LOG_WARN("fail to create data table column ckm map", KR(ret), K(default_column_cnt), K(tenant_id));
} else if (OB_FAIL(index_column_ckm_map.create(default_column_cnt,
ObModIds::OB_CHECKSUM_CHECKER, ObModIds::OB_CHECKSUM_CHECKER, tenant_id))) {
LOG_WARN("fail to create index table column ckm map", KR(ret), K(default_column_cnt), K(tenant_id));
}
// One tablet of local index table is mapping to one tablet of data table
const int64_t tablet_cnt = data_table_tablets.count();
for (int64_t i = 0; (i < tablet_cnt) && OB_SUCC(ret); ++i) {
ObTabletID &tmp_data_tablet_id = data_table_tablet_ids.at(i);
ObTabletID &tmp_index_tablet_id = index_table_tablet_ids.at(i);
ObTabletLSPair data_tablet_pair;
ObTabletLSPair index_tablet_pair;
if (OB_FAIL(data_column_ckm_map.clear())) {
LOG_WARN("fail to clear hash map", KR(ret), K(default_column_cnt));
} else if (OB_FAIL(index_column_ckm_map.clear())) {
LOG_WARN("fail to clear hash map", KR(ret), K(default_column_cnt));
} else if (OB_FAIL(data_tablet_ls_pair_map.get_refactored(tmp_data_tablet_id, data_tablet_pair))) {
LOG_WARN("fail to get refactored", KR(ret), K(tmp_data_tablet_id));
} else if (OB_FAIL(index_tablet_ls_pair_map.get_refactored(tmp_index_tablet_id, index_tablet_pair))) {
LOG_WARN("fail to get refactored", KR(ret), K(tmp_index_tablet_id));
} else {
const ObTabletLSPair &data_tablet_pair = data_table_tablets.at(i);
const ObTabletLSPair &index_tablet_pair = index_table_tablets.at(i);
int64_t data_tablet_idx = OB_INVALID_INDEX;
int64_t index_tablet_idx = OB_INVALID_INDEX;
if (OB_FAIL(find_checksum_item_(data_tablet_pair, data_table_ckm_items, compaction_scn, data_tablet_idx))) {
@ -1393,7 +1426,7 @@ int ObTabletReplicaChecksumOperator::find_checksum_item_(
int ret = OB_SUCCESS;
idx = OB_INVALID_INDEX;
const int64_t item_cnt = items.count();
if (!pair.is_valid() || (item_cnt < 1)) {
if (OB_UNLIKELY(!pair.is_valid() || (item_cnt < 1))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(pair), K(item_cnt));
} else {
@ -1445,7 +1478,7 @@ int ObTabletReplicaChecksumOperator::get_tablet_ls_pairs(
const ObIArray<ObTabletID> &tablet_ids,
ObIArray<ObTabletLSPair> &pairs)
{
int ret = OB_SUCCESS;
int ret = OB_SUCCESS;
if (!is_valid_tenant_id(tenant_id) || (tablet_ids.count() < 1)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(tablet_ids.count()));
@ -1459,32 +1492,28 @@ int ObTabletReplicaChecksumOperator::get_tablet_ls_pairs(
LOG_WARN("fail to push back ls_id", KR(ret), K(tenant_id), K(table_id));
}
}
} else if (OB_FAIL(ObTabletToLSTableOperator::batch_get_ls(sql_proxy, tenant_id, tablet_ids, ls_ids))) {
const int64_t ls_id_cnt = ls_ids.count();
for (int64_t i = 0; (i < ls_id_cnt) && OB_SUCC(ret); ++i) {
ObTabletLSPair cur_pair;
const ObTabletID &cur_tablet_id = tablet_ids.at(i);
const ObLSID &cur_ls_id = ls_ids.at(i);
if (OB_FAIL(cur_pair.init(cur_tablet_id, cur_ls_id))) {
LOG_WARN("fail to init tablet_ls_pair", KR(ret), K(i), K(cur_tablet_id), K(cur_ls_id));
} else if (OB_FAIL(pairs.push_back(cur_pair))) {
LOG_WARN("fail to push back pair", KR(ret), K(cur_pair));
}
}
} else if (OB_FAIL(ObTabletToLSTableOperator::batch_get_tablet_ls_pairs(sql_proxy, tenant_id,
tablet_ids, pairs))) {
LOG_WARN("fail to batch get ls", KR(ret), K(tenant_id), K(tablet_ids));
}
}
const int64_t ls_id_cnt = ls_ids.count();
if (OB_SUCC(ret) && (ls_id_cnt != tablet_ids.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("count mismatch", KR(ret), K(ls_id_cnt), K(tablet_ids.count()));
}
for (int64_t i = 0; (i < ls_id_cnt) && OB_SUCC(ret); ++i) {
ObTabletLSPair cur_pair;
const ObTabletID &cur_tablet_id = tablet_ids.at(i);
const ObLSID &cur_ls_id = ls_ids.at(i);
if (OB_FAIL(cur_pair.init(cur_tablet_id, cur_ls_id))) {
LOG_WARN("fail to init tablet_ls_pair", KR(ret), K(i), K(cur_tablet_id), K(cur_ls_id));
} else if (OB_FAIL(pairs.push_back(cur_pair))) {
LOG_WARN("fail to push back pair", KR(ret), K(cur_pair));
}
}
if (OB_FAIL(ret)){
} else if (OB_UNLIKELY(pairs.count() != ls_id_cnt)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("some unexpected err about tablet_ls_pair count", KR(ret), K(ls_id_cnt), K(pairs.count()));
}
if (OB_FAIL(ret)){
} else if (OB_UNLIKELY(pairs.count() != tablet_ids.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("some unexpected err about tablet_ls_pair count", KR(ret), "tablet_id_cnt",
tablet_ids.count(), "pair_cnt", pairs.count());
}
}
return ret;
@ -1813,5 +1842,21 @@ int ObTabletReplicaChecksumOperator::get_hex_column_meta(
return ret;
}
int ObTabletReplicaChecksumOperator::convert_array_to_map(
const ObArray<ObTabletLSPair> &tablet_ls_pairs,
hash::ObHashMap<ObTabletID, ObTabletLSPair> &tablet_ls_pair_map)
{
int ret = OB_SUCCESS;
const int64_t tablet_ls_pair_cnt = tablet_ls_pairs.count();
for (int64_t i = 0; (i < tablet_ls_pair_cnt) && OB_SUCC(ret); ++i) {
const ObTabletLSPair &pair = tablet_ls_pairs.at(i);
const ObTabletID &tablet_id = pair.get_tablet_id();
if (OB_FAIL(tablet_ls_pair_map.set_refactored(tablet_id, pair, false/*overwrite*/))) {
LOG_WARN("fail to set_refactored", KR(ret), K(tablet_id), K(pair));
}
}
return ret;
}
} // share
} // oceanbase

View File

@ -307,7 +307,6 @@ private:
common::ObIArray<ObTabletReplicaChecksumItem> &items,
const SCN &compaction_scn,
int64_t &idx);
static int check_table_all_tablets_ckm_status_(
const uint64_t tenant_id,
common::ObIArray<ObTabletLSPair> &tablet_pairs,
@ -331,6 +330,10 @@ private:
static void print_detail_tablet_replica_checksum(
const common::ObIArray<ObTabletReplicaChecksumItem> &items);
static int convert_array_to_map(
const common::ObArray<ObTabletLSPair> &tablet_ls_pairs,
common::hash::ObHashMap<common::ObTabletID, ObTabletLSPair> &tablet_ls_pair_map);
private:
const static int64_t MAX_BATCH_COUNT = 99;
};