diff --git a/src/share/stat/ob_opt_stat_manager.cpp b/src/share/stat/ob_opt_stat_manager.cpp index a2656d22c4..4ca34f991d 100644 --- a/src/share/stat/ob_opt_stat_manager.cpp +++ b/src/share/stat/ob_opt_stat_manager.cpp @@ -694,5 +694,15 @@ int ObOptStatManager::get_column_stat(const uint64_t tenant_id, } return ret; } + +int ObOptStatManager::get_table_rowcnt(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + int64_t &table_rowcnt) +{ + return stat_service_.get_table_rowcnt(tenant_id, table_id, all_tablet_ids, all_ls_ids, table_rowcnt); +} + } } diff --git a/src/share/stat/ob_opt_stat_manager.h b/src/share/stat/ob_opt_stat_manager.h index eadc37ba4c..35358559da 100644 --- a/src/share/stat/ob_opt_stat_manager.h +++ b/src/share/stat/ob_opt_stat_manager.h @@ -180,6 +180,12 @@ public: int handle_refresh_stat_task(const obrpc::ObUpdateStatCacheArg &arg); + int get_table_rowcnt(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + int64_t &table_rowcnt); + static ObOptStatManager &get_instance() { static ObOptStatManager instance_; diff --git a/src/share/stat/ob_opt_stat_monitor_manager.h b/src/share/stat/ob_opt_stat_monitor_manager.h index 7ed0937093..d112f45fd7 100644 --- a/src/share/stat/ob_opt_stat_monitor_manager.h +++ b/src/share/stat/ob_opt_stat_monitor_manager.h @@ -68,8 +68,8 @@ public: virtual ~ObOptStatMonitorCheckTask() {} int init(int tg_id); virtual void runTimerTask() override; -private: const static int64_t CHECK_INTERVAL = 900L * 1000L * 1000L; // 15min +private: bool is_inited_; }; diff --git a/src/share/stat/ob_opt_stat_service.cpp b/src/share/stat/ob_opt_stat_service.cpp index d8e935aefc..382de72286 100644 --- a/src/share/stat/ob_opt_stat_service.cpp +++ b/src/share/stat/ob_opt_stat_service.cpp @@ -15,6 +15,7 @@ #include "lib/oblog/ob_log_module.h" #include "share/config/ob_server_config.h" #include "share/inner_table/ob_inner_table_schema_constants.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" #include "ob_opt_stat_service.h" @@ -305,5 +306,108 @@ int ObOptStatService::init_key_column_stats(ObIAllocator &allocator, return ret; } +int ObOptStatService::get_table_rowcnt(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + int64_t &table_rowcnt) +{ + int ret = OB_SUCCESS; + table_rowcnt = 0; + if (OB_UNLIKELY(all_tablet_ids.count() != all_ls_ids.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error"); + } else { + ObSEArray reload_tablet_ids; + ObSEArray reload_ls_ids; + for (int64_t i = 0; OB_SUCC(ret) && i < all_tablet_ids.count(); ++i) { + ObOptTableStat::Key key(tenant_id, table_id, all_tablet_ids.at(i).id()); + ObOptTableStatHandle handle; + if (OB_FAIL(table_stat_cache_.get_value(key, handle))) { + // we need to fetch statistics from inner table if it is not yet available from cache + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("get table stat from cache failed", K(ret), K(key)); + } else if (OB_FAIL(reload_tablet_ids.push_back(all_tablet_ids.at(i))) || + OB_FAIL(reload_ls_ids.push_back(all_ls_ids.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } + } else if (OB_ISNULL(handle.stat_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cache hit but value is NULL. BUG here.", K(ret), K(key)); + //check is stale + } else if (handle.stat_->is_arrived_expired_time()) { + if (OB_FAIL(reload_tablet_ids.push_back(all_tablet_ids.at(i))) || + OB_FAIL(reload_ls_ids.push_back(all_ls_ids.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } else {/*do nothing*/} + } else { + storage::ObTenantTabletStatMgr *stat_mgr = MTL(storage::ObTenantTabletStatMgr *); + storage::ObTabletStat tablet_stat; + //try check the latest tablet stat from stroage + if (stat_mgr != NULL) { + if (OB_FAIL(stat_mgr->get_latest_tablet_stat(all_ls_ids.at(i), all_tablet_ids.at(i), tablet_stat))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get latest tablet stat", K(ret), K(all_ls_ids.at(i)), K(all_tablet_ids.at(i))); + } else { + ret = OB_SUCCESS; + } + } + } + LOG_TRACE("cache stat compare", KPC(handle.stat_), K(tablet_stat)); + if (handle.stat_->get_row_count() < tablet_stat.merge_logical_row_cnt_) { + if (OB_FAIL(reload_tablet_ids.push_back(all_tablet_ids.at(i))) || + OB_FAIL(reload_ls_ids.push_back(all_ls_ids.at(i)))) { + LOG_WARN("failed to push back", K(ret)); + } else {/*do nothing*/} + } else { + table_rowcnt += handle.stat_->get_row_count(); + } + } + } + if (OB_SUCC(ret) && !reload_tablet_ids.empty()) { + int64_t reload_row_cnt = 0; + if (OB_FAIL(load_table_rowcnt_and_put_cache(tenant_id, table_id, reload_tablet_ids, + reload_ls_ids, reload_row_cnt))) { + LOG_WARN("load and put cache table stat failed.", K(ret)); + } else { + table_rowcnt += reload_row_cnt; + } + } + LOG_TRACE("Succeed to get table rowcnt", K(table_id), K(table_rowcnt), + K(all_tablet_ids), K(reload_tablet_ids)); + } + return ret; +} + +int ObOptStatService::load_table_rowcnt_and_put_cache(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + int64_t &table_rowcnt) +{ + int ret = OB_SUCCESS; + ObSEArray tstats; + table_rowcnt = 0; + if (!inited_) { + ret = OB_NOT_INIT; + LOG_WARN("table statistics service is not initialized. ", K(ret)); + } else if (OB_FAIL(sql_service_.fetch_table_rowcnt(tenant_id, table_id, + all_tablet_ids, all_ls_ids, + tstats))) { + LOG_WARN("fetch table stat failed. ", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < tstats.count(); ++i) { + ObOptTableStat::Key key(tenant_id, table_id, tstats.at(i).get_tablet_id()); + ObOptTableStatHandle handle; + if (OB_FAIL(table_stat_cache_.put_and_fetch_value(key, tstats.at(i), handle))) { + LOG_WARN("put and fetch table stat failed.", K(ret), K(key)); + } else { + table_rowcnt += tstats.at(i).get_row_count(); + } + } + } + return ret; +} + } } diff --git a/src/share/stat/ob_opt_stat_service.h b/src/share/stat/ob_opt_stat_service.h index ad915064d9..5e2799f6d7 100644 --- a/src/share/stat/ob_opt_stat_service.h +++ b/src/share/stat/ob_opt_stat_service.h @@ -45,6 +45,12 @@ public: int erase_column_stat(const ObOptColumnStat::Key &key); ObOptStatSqlService &get_sql_service() { return sql_service_; } + + int get_table_rowcnt(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + int64_t &table_rowcnt); private: /** * 接口load_and_put_cache(key, handle)的实现,外部不应该直接调用这个函数 @@ -57,6 +63,12 @@ private: int init_key_column_stats(ObIAllocator &allocator, ObIArray &keys, ObIArray &key_column_stats); + + int load_table_rowcnt_and_put_cache(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + int64_t &table_rowcnt); protected: bool inited_; static const int64_t DEFAULT_TAB_STAT_CACHE_PRIORITY = 1; diff --git a/src/share/stat/ob_opt_stat_sql_service.cpp b/src/share/stat/ob_opt_stat_sql_service.cpp index 6babce00cd..96d164d786 100644 --- a/src/share/stat/ob_opt_stat_sql_service.cpp +++ b/src/share/stat/ob_opt_stat_sql_service.cpp @@ -34,6 +34,7 @@ #include "share/stat/ob_opt_table_stat.h" #include "share/stat/ob_column_stat.h" #include "lib/charset/ob_charset.h" +#include "share/stat/ob_opt_stat_monitor_manager.h" #define ALL_HISTOGRAM_STAT_COLUMN_NAME "tenant_id, " \ "table_id, " \ @@ -2011,6 +2012,113 @@ int ObOptStatSqlService::batch_update_online_col_state(const uint64_t tenant_id, return ret; }*/ +int ObOptStatSqlService::fetch_table_rowcnt(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + ObIArray &tstats) +{ + int ret = OB_SUCCESS; + ObSqlString raw_sql; + ObSqlString tablet_list_str; + ObSqlString tablet_ls_list_str; + uint64_t real_table_id = share::is_oracle_mapping_real_virtual_table(table_id) ? + ObSchemaUtils::get_real_table_mappings_tid(table_id) : table_id; + if (OB_FAIL(gen_tablet_list_str(all_tablet_ids, all_ls_ids, tablet_list_str, tablet_ls_list_str))) { + LOG_WARN("failed to gen tablet list str", K(ret)); + } else if (OB_FAIL(raw_sql.append_fmt("select tablet_id, max(row_count) from (select cast(tablet_id as unsigned) as tablet_id, cast(inserts - deletes as signed) as row_count "\ + "from %s where tenant_id = %lu and table_id = %lu and tablet_id in %s union all "\ + "select cast(tablet_id as unsigned) as tablet_id, cast(row_count as signed) as row_count from %s, "\ + "(select frozen_scn from %s order by frozen_scn desc limit 1) where "\ + "tenant_id = %lu and compaction_scn = frozen_scn and (tablet_id, ls_id) in %s) group by tablet_id;", + share::OB_ALL_MONITOR_MODIFIED_TNAME, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id, tenant_id), + share::schema::ObSchemaUtils::get_extract_schema_id(tenant_id, real_table_id), + tablet_list_str.ptr(), + share::OB_ALL_TABLET_CHECKSUM_TNAME, + share::OB_ALL_FREEZE_INFO_TNAME, + share::schema::ObSchemaUtils::get_extract_tenant_id(tenant_id,tenant_id), + tablet_ls_list_str.ptr()))) { + LOG_WARN("failed to append fmt", K(ret)); + } else { + SMART_VAR(ObMySQLProxy::MySQLResult, proxy_result) { + sqlclient::ObMySQLResult *client_result = NULL; + ObSQLClientRetryWeak sql_client_retry_weak(mysql_proxy_); + if (OB_FAIL(sql_client_retry_weak.read(proxy_result, tenant_id, raw_sql.ptr()))) { + LOG_WARN("failed to execute sql", K(ret), K(raw_sql)); + } else if (OB_ISNULL(client_result = proxy_result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to execute sql", K(ret)); + } else { + int64_t expired_time = ObTimeUtility::current_time() + ObOptStatMonitorCheckTask::CHECK_INTERVAL; + while (OB_SUCC(ret)) { + int64_t tablet_idx = 0; + int64_t row_cnt_idx = 1; + ObObj tablet_obj; + ObObj row_cnt_obj; + uint64_t tablet_id = ObTabletID::INVALID_TABLET_ID; + int64_t row_cnt = 0; + if (OB_FAIL(client_result->next())) { + if (OB_ITER_END != ret) { + LOG_WARN("result next failed", K(ret)); + } else { + ret = OB_SUCCESS; + break; + } + } else if (OB_FAIL(client_result->get_obj(tablet_idx, tablet_obj)) || + OB_FAIL(client_result->get_obj(row_cnt_idx, row_cnt_obj))) { + LOG_WARN("failed to get object", K(ret)); + } else if (OB_FAIL(tablet_obj.get_uint64(tablet_id)) || + OB_FAIL(row_cnt_obj.get_int(row_cnt))) { + LOG_WARN("failed to get int", K(ret), K(tablet_obj), K(row_cnt_obj)); + } else { + ObOptTableStat tstat; + tstat.set_table_id(table_id); + tstat.set_tablet_id(tablet_id); + tstat.set_row_count(row_cnt); + tstat.set_stat_expired_time(expired_time); + if (OB_FAIL(tstats.push_back(tstat))) { + LOG_WARN("failed to push back", K(ret)); + } + } + } + LOG_TRACE("succeed to fetch table rowcnt", K(tstats), K(raw_sql)); + } + } + } + return ret; +} + +int ObOptStatSqlService::gen_tablet_list_str(const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + ObSqlString &tablet_list_str, + ObSqlString &tablet_ls_list_str) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(all_tablet_ids.empty() || all_tablet_ids.count() != all_ls_ids.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(all_tablet_ids), K(all_ls_ids)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < all_tablet_ids.count(); ++i) { + char prefix = i == 0 ? '(' : ' '; + char suffix = i == all_tablet_ids.count() - 1 ? ')' : ','; + if (OB_FAIL(tablet_list_str.append_fmt("%c%lu%c", + prefix, + all_tablet_ids.at(i).id(), + suffix))) { + LOG_WARN("failed to append fmt", K(ret)); + } else if (OB_FAIL(tablet_ls_list_str.append_fmt("%c(%lu, %ld)%c", + prefix, + all_tablet_ids.at(i).id(), + all_ls_ids.at(i).id(), + suffix))) { + LOG_WARN("failed to append fmt", K(ret)); + } else {/*do nothing*/} + } + } + return ret; +} + } // end of namespace common } // end of namespace oceanbase diff --git a/src/share/stat/ob_opt_stat_sql_service.h b/src/share/stat/ob_opt_stat_sql_service.h index 9d4430c060..1bd45f299f 100644 --- a/src/share/stat/ob_opt_stat_sql_service.h +++ b/src/share/stat/ob_opt_stat_sql_service.h @@ -149,6 +149,12 @@ public: common::ObIAllocator &allocator, common::ObString &dest_str); + int fetch_table_rowcnt(const uint64_t tenant_id, + const uint64_t table_id, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + ObIArray &tstats); + private: int get_table_stat_sql(const uint64_t tenant_id, const ObOptTableStat &stat, @@ -239,6 +245,11 @@ private: const uint64_t table_id, ObObjMeta &endpoint_meta); + int gen_tablet_list_str(const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + ObSqlString &tablet_list_str, + ObSqlString &tablet_ls_list_str); + static const char *bitmap_compress_lib_name; bool inited_; diff --git a/src/share/stat/ob_opt_table_stat.h b/src/share/stat/ob_opt_table_stat.h index e2cf48d3de..0b0b833b99 100644 --- a/src/share/stat/ob_opt_table_stat.h +++ b/src/share/stat/ob_opt_table_stat.h @@ -33,11 +33,18 @@ public: { Key() : tenant_id_(0), table_id_(OB_INVALID_ID), - partition_id_(OB_INVALID_INDEX) + partition_id_(OB_INVALID_INDEX), + tablet_id_(ObTabletID::INVALID_TABLET_ID) { } explicit Key(uint64_t tenant_id, uint64_t table_id, int64_t partition_id) : - tenant_id_(tenant_id), table_id_(table_id), partition_id_(partition_id) + tenant_id_(tenant_id), table_id_(table_id), partition_id_(partition_id), + tablet_id_(ObTabletID::INVALID_TABLET_ID) + { + } + explicit Key(uint64_t tenant_id, uint64_t table_id, uint64_t tablet_id) : + tenant_id_(tenant_id), table_id_(table_id), partition_id_(OB_INVALID_INDEX), + tablet_id_(tablet_id) { } void init(uint64_t tenant_id, uint64_t table_id, int64_t partition_id) @@ -45,6 +52,7 @@ public: tenant_id_ = tenant_id; table_id_ = table_id; partition_id_ = partition_id; + tablet_id_ = ObTabletID::INVALID_TABLET_ID; } uint64_t hash() const { @@ -55,7 +63,8 @@ public: const Key &other_key = reinterpret_cast(other); return tenant_id_ == other_key.tenant_id_ && table_id_ == other_key.table_id_ && - partition_id_ == other_key.partition_id_; + partition_id_ == other_key.partition_id_ && + tablet_id_ == other_key.tablet_id_; } uint64_t get_tenant_id() const { @@ -98,13 +107,15 @@ public: tenant_id_ = 0; table_id_ = OB_INVALID_ID; partition_id_ = OB_INVALID_INDEX; + tablet_id_ = ObTabletID::INVALID_TABLET_ID; } - TO_STRING_KV(K_(tenant_id), K_(table_id), K_(partition_id)); + TO_STRING_KV(K_(tenant_id), K_(table_id), K_(partition_id), K_(tablet_id)); uint64_t tenant_id_; uint64_t table_id_; int64_t partition_id_; + uint64_t tablet_id_; }; ObOptTableStat() : table_id_(OB_INVALID_ID), @@ -123,7 +134,9 @@ public: last_analyzed_(0), stattype_locked_(0), modified_count_(0), - sample_size_(0) {} + sample_size_(0), + tablet_id_(ObTabletID::INVALID_TABLET_ID), + stat_expired_time_(-1) {} ObOptTableStat(uint64_t table_id, int64_t partition_id, int64_t object_type, @@ -153,7 +166,9 @@ public: last_analyzed_(0), stattype_locked_(0), modified_count_(0), - sample_size_(0) {} + sample_size_(0), + tablet_id_(ObTabletID::INVALID_TABLET_ID), + stat_expired_time_(-1) {} virtual ~ObOptTableStat() {} @@ -162,6 +177,9 @@ public: uint64_t get_table_id() const { return table_id_; } void set_table_id(uint64_t table_id) { table_id_ = table_id; } + uint64_t get_tablet_id() const { return tablet_id_; } + void set_tablet_id(uint64_t tablet_id) { tablet_id_ = tablet_id; } + int64_t get_partition_id() const { return partition_id_; } void set_partition_id(int64_t partition_id) { partition_id_ = partition_id; } @@ -212,6 +230,11 @@ public: int64_t get_sample_size() const { return sample_size_; } void set_sample_size(int64_t sample_size) { sample_size_ = sample_size; } + bool is_arrived_expired_time() const { + return stat_expired_time_ != -1 && stat_expired_time_ <= ObTimeUtility::current_time(); } + + void set_stat_expired_time(int64_t expired_time) { stat_expired_time_ = expired_time; } + void add_row_count(int64_t rc) { row_count_ += rc; } // for multi rows @@ -287,6 +310,8 @@ public: stattype_locked_ = 0; modified_count_ = 0; sample_size_ = 0; + tablet_id_ = ObTabletID::INVALID_TABLET_ID; + stat_expired_time_ = -1; } TO_STRING_KV(K(table_id_), @@ -305,7 +330,9 @@ public: K(last_analyzed_), K(stattype_locked_), K(modified_count_), - K(sample_size_)); + K(sample_size_), + K(tablet_id_), + K(stat_expired_time_)); private: uint64_t table_id_; @@ -327,6 +354,8 @@ private: uint64_t stattype_locked_; int64_t modified_count_; int64_t sample_size_; + uint64_t tablet_id_;//now only use estimate table rowcnt by meta table. + int64_t stat_expired_time_;//mark the stat in cache is arrived expired time, if arrived at expired time need reload, -1 meanings no expire forever. }; } diff --git a/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.h b/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.h index 8f1a0211ff..39631720ee 100644 --- a/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.h +++ b/src/sql/engine/opt_statistics/ob_optimizer_stats_gathering_op.h @@ -65,9 +65,9 @@ public: PartIds() : global_part_id_(common::OB_INVALID_ID), part_id_(common::OB_INVALID_ID), first_part_id_(common::OB_INVALID_ID) {}; - ObObjectID global_part_id_; - ObObjectID part_id_; - ObObjectID first_part_id_; // for two_level partition. + int64_t global_part_id_; + int64_t part_id_; + int64_t first_part_id_; // for two_level partition. TO_STRING_KV(K(global_part_id_), K(part_id_), K(first_part_id_)); }; diff --git a/src/sql/ob_sql_utils.cpp b/src/sql/ob_sql_utils.cpp index ae91382f2e..a98f5bffde 100644 --- a/src/sql/ob_sql_utils.cpp +++ b/src/sql/ob_sql_utils.cpp @@ -2618,7 +2618,7 @@ int ObSQLUtils::revise_hash_part_object(common::ObObj &obj, * 4. other */ int ObSQLUtils::choose_best_replica_for_estimation( - const ObCandiTabletLocIArray &part_loc_info_array, + const ObCandiTabletLoc &phy_part_loc_info, const ObAddr &local_addr, const common::ObIArray &addrs_list, const bool no_use_remote, @@ -2626,42 +2626,37 @@ int ObSQLUtils::choose_best_replica_for_estimation( { int ret = OB_SUCCESS; best_partition.reset(); - if (OB_UNLIKELY(1 != part_loc_info_array.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("partition array count should be 1", K(ret), K(part_loc_info_array.count())); - } else { - const ObIArray &replica_loc_array = - part_loc_info_array.at(0).get_partition_location().get_replica_locations(); - bool found = false; - // 2. check whether best partition can find in local - for (int64_t i = -1; !found && i < addrs_list.count(); ++i) { - const ObAddr &addr = (i == -1? local_addr : addrs_list.at(i)); - for (int64_t j = 0; !found && j < replica_loc_array.count(); ++j) { - if (addr == replica_loc_array.at(j).get_server() && - 0 != replica_loc_array.at(j).get_property().get_memstore_percent()) { - found = true; - best_partition.set(addr, - part_loc_info_array.at(0).get_partition_location().get_tablet_id(), - part_loc_info_array.at(0).get_partition_location().get_ls_id()); - } + const ObIArray &replica_loc_array = + phy_part_loc_info.get_partition_location().get_replica_locations(); + bool found = false; + // 2. check whether best partition can find in local + for (int64_t i = -1; !found && i < addrs_list.count(); ++i) { + const ObAddr &addr = (i == -1? local_addr : addrs_list.at(i)); + for (int64_t j = 0; !found && j < replica_loc_array.count(); ++j) { + if (addr == replica_loc_array.at(j).get_server() && + 0 != replica_loc_array.at(j).get_property().get_memstore_percent()) { + found = true; + best_partition.set(addr, + phy_part_loc_info.get_partition_location().get_tablet_id(), + phy_part_loc_info.get_partition_location().get_ls_id()); } } - if (!found && !no_use_remote) { - // best partition not find in local - ObAddr remote_addr; - if (OB_FAIL(choose_best_partition_replica_addr(local_addr, - part_loc_info_array.at(0), - false, - remote_addr))) { - LOG_WARN("failed to get best partition replica addr", K(ret)); - // choose partition replica failed doesn't affect execution, we will decide whether use - // storage estimation interface by (!use_local && remote_addr.is_valid()). - ret = OB_SUCCESS; - } - best_partition.set(remote_addr, - part_loc_info_array.at(0).get_partition_location().get_tablet_id(), - part_loc_info_array.at(0).get_partition_location().get_ls_id()); + } + if (!found && !no_use_remote) { + // best partition not find in local + ObAddr remote_addr; + if (OB_FAIL(choose_best_partition_replica_addr(local_addr, + phy_part_loc_info, + false, + remote_addr))) { + LOG_WARN("failed to get best partition replica addr", K(ret)); + // choose partition replica failed doesn't affect execution, we will decide whether use + // storage estimation interface by (!use_local && remote_addr.is_valid()). + ret = OB_SUCCESS; } + best_partition.set(remote_addr, + phy_part_loc_info.get_partition_location().get_tablet_id(), + phy_part_loc_info.get_partition_location().get_ls_id()); } return ret; } diff --git a/src/sql/ob_sql_utils.h b/src/sql/ob_sql_utils.h index d976658388..debf775336 100644 --- a/src/sql/ob_sql_utils.h +++ b/src/sql/ob_sql_utils.h @@ -434,7 +434,7 @@ public: const share::schema::ObPartitionFuncType part_type); static int choose_best_replica_for_estimation( - const ObCandiTabletLocIArray &part_loc_info_array, + const ObCandiTabletLoc &phy_part_loc_info, const ObAddr &local_addr, const common::ObIArray &addrs_list, const bool no_use_remote, diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index adbd0e7247..0490e70f85 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -18,6 +18,7 @@ #include "share/stat/ob_opt_stat_manager.h" #include "sql/engine/table/ob_table_scan_op.h" #include "ob_opt_est_parameter_normal.h" +#include "observer/ob_sql_client_decorator.h" namespace oceanbase { using namespace share::schema; using namespace share; @@ -173,20 +174,23 @@ int ObAccessPathEstimation::process_storage_estimation(ObOptimizerContext &ctx, if (OB_FAIL(ret)) { } else if (OB_FAIL(tmp_part_info.assign(*table_part_info))) { LOG_WARN("failed to assign table part info", K(ret)); + } else if (OB_UNLIKELY(1 != tmp_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list().count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("access path is invalid", K(ret), K(tmp_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list())); } else if (!ap->is_global_index_ && ap->ref_table_id_ != ap->index_id_ && OB_FAIL(tmp_part_info.replace_final_location_key(tmp_exec_ctx, ap->index_id_, true))) { LOG_WARN("failed to replace final location key", K(ret)); } else if (OB_FAIL(ObSQLUtils::choose_best_replica_for_estimation( - tmp_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list(), + tmp_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list().at(0), ctx.get_local_server_addr(), prefer_addrs, !ap->can_use_remote_estimate(), best_index_part))) { LOG_WARN("failed to choose best partition for estimation", K(ret)); } else if (force_leader_estimation && - OB_FAIL(choose_leader_replica(tmp_part_info, + OB_FAIL(choose_leader_replica(tmp_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list().at(0), ap->can_use_remote_estimate(), ctx.get_local_server_addr(), best_index_part))) { @@ -272,17 +276,14 @@ int ObAccessPathEstimation::process_storage_estimation(ObOptimizerContext &ctx, return ret; } -int ObAccessPathEstimation::choose_leader_replica(const ObTablePartitionInfo &table_part_info, +int ObAccessPathEstimation::choose_leader_replica(const ObCandiTabletLoc &part_loc_info, const bool can_use_remote, const ObAddr &local_addr, EstimatedPartition &best_partition) { int ret = OB_SUCCESS; - const ObCandiTabletLoc &part_loc_info = - table_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list().at(0); - const ObIArray &replica_loc_array = + const ObIArray &replica_loc_array = part_loc_info.get_partition_location().get_replica_locations(); - for (int64_t i = 0; i < replica_loc_array.count(); ++i) { if (replica_loc_array.at(i).is_strong_leader() && (can_use_remote || local_addr == replica_loc_array.at(i).get_server())) { @@ -886,16 +887,60 @@ bool ObBatchEstTasks::check_result_reliable() const int ObAccessPathEstimation::estimate_full_table_rowcount(ObOptimizerContext &ctx, const ObTablePartitionInfo &table_part_info, ObTableMetaInfo &meta) +{ + int ret = OB_SUCCESS; + const ObCandiTabletLocIArray &part_loc_info_array = + table_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list(); + //if the part loc infos is only 1, we can use the storage estimate rowcount to get real time stat. + if (is_virtual_table(meta.ref_table_id_) && + !share::is_oracle_mapping_real_virtual_table(meta.ref_table_id_)) { + //do nothing + } else if (part_loc_info_array.count() == 1) { + if (OB_FAIL(storage_estimate_full_table_rowcount(ctx, part_loc_info_array.at(0), meta))) { + LOG_WARN("failed to storage estimate full table rowcount", K(ret)); + } else { + LOG_TRACE("succeed to storage estimate full table rowcount", K(meta)); + } + //if the part loc infos more than 1, we see the dml info inner table and storage inner table. + } else if (part_loc_info_array.count() > 1) { + ObSEArray all_tablet_ids; + ObSEArray all_ls_ids; + for (int64_t i = 0; OB_SUCC(ret) && i < part_loc_info_array.count(); ++i) { + const ObOptTabletLoc &part_loc = part_loc_info_array.at(i).get_partition_location(); + if (OB_FAIL(all_tablet_ids.push_back(part_loc.get_tablet_id()))) { + LOG_WARN("failed to push back tablet id", K(ret)); + } else if (OB_FAIL(all_ls_ids.push_back(part_loc.get_ls_id()))) { + LOG_WARN("failed to push back tablet id", K(ret)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(estimate_full_table_rowcount_by_meta_table(ctx, all_tablet_ids, + all_ls_ids, meta))) { + LOG_WARN("failed to estimate full table rowcount by meta table", K(ret)); + } else { + LOG_TRACE("succeed to estimate full table rowcount", K(meta)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected error", K(ret), K(part_loc_info_array)); + } + return ret; +} + +int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerContext &ctx, + const ObCandiTabletLoc &part_loc_info, + ObTableMetaInfo &meta) { int ret = OB_SUCCESS; ObSEArray prefer_addrs; EstimatedPartition best_index_part; ObArenaAllocator arena("CardEstimation"); bool force_leader_estimation = false; - + force_leader_estimation = OB_FAIL(OB_E(EventTable::EN_LEADER_STORAGE_ESTIMATION) OB_SUCCESS); ret = OB_SUCCESS; - + HEAP_VAR(ObBatchEstTasks, task) { obrpc::ObEstPartArg &arg = task.arg_; obrpc::ObEstPartRes &res = task.res_; @@ -908,14 +953,14 @@ int ObAccessPathEstimation::estimate_full_table_rowcount(ObOptimizerContext &ctx !share::is_oracle_mapping_real_virtual_table(meta.ref_table_id_)) { // do nothing } else if (OB_FAIL(ObSQLUtils::choose_best_replica_for_estimation( - table_part_info.get_phy_tbl_location_info().get_phy_part_loc_info_list(), + part_loc_info, ctx.get_local_server_addr(), prefer_addrs, false, best_index_part))) { LOG_WARN("failed to choose best partition", K(ret)); - } else if (force_leader_estimation && - OB_FAIL(choose_leader_replica(table_part_info, + } else if (force_leader_estimation && + OB_FAIL(choose_leader_replica(part_loc_info, true, ctx.get_local_server_addr(), best_index_part))) { @@ -951,8 +996,7 @@ int ObAccessPathEstimation::estimate_full_table_rowcount(ObOptimizerContext &ctx LOG_WARN("storage estimation result size is unexpected", K(ret)); } else if (res.index_param_res_.at(0).reliable_) { int64_t logical_row_count = res.index_param_res_.at(0).logical_row_count_; - int64_t part_count = table_part_info.get_phy_tbl_location_info().get_partition_cnt(); - meta.table_row_count_ = logical_row_count * part_count; + meta.table_row_count_ = logical_row_count; meta.average_row_size_ = static_cast(ObOptStatManager::get_default_avg_row_size()); meta.part_size_ = logical_row_count * meta.average_row_size_; } @@ -1159,5 +1203,31 @@ int ObAccessPathEstimation::convert_physical_rowid_ranges(ObOptimizerContext &ct return ret; } +int ObAccessPathEstimation::estimate_full_table_rowcount_by_meta_table(ObOptimizerContext &ctx, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + ObTableMetaInfo &meta) +{ + int ret = OB_SUCCESS; + if (all_tablet_ids.empty()) { + //do nothing + } else if (OB_ISNULL(ctx.get_session_info()) || OB_ISNULL(ctx.get_opt_stat_manager())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), K(ctx.get_session_info()), K(ctx.get_opt_stat_manager())); + } else if (OB_FAIL(ctx.get_opt_stat_manager()->get_table_rowcnt(ctx.get_session_info()->get_effective_tenant_id(), + meta.ref_table_id_, + all_tablet_ids, + all_ls_ids, + meta.table_row_count_))) { + LOG_WARN("failed to get table rowcnt", K(ret)); + } else { + meta.average_row_size_ = static_cast(ObOptStatManager::get_default_avg_row_size()); + meta.part_size_ = meta.table_row_count_ * meta.average_row_size_; + LOG_TRACE("succeed to estimate full table rowcount by meta table", K(meta)); + } + return ret; +} + + } // end of sql } // end of oceanbase diff --git a/src/sql/optimizer/ob_access_path_estimation.h b/src/sql/optimizer/ob_access_path_estimation.h index ac0e0aea88..db7e492efc 100644 --- a/src/sql/optimizer/ob_access_path_estimation.h +++ b/src/sql/optimizer/ob_access_path_estimation.h @@ -42,7 +42,6 @@ public: static int estimate_full_table_rowcount(ObOptimizerContext &ctx, const ObTablePartitionInfo &table_part_info, ObTableMetaInfo &meta); - private: static int64_t get_get_range_count(const ObIArray &ranges); @@ -54,7 +53,7 @@ private: RowCountEstMethod &method, bool &is_vt); - static int choose_leader_replica(const ObTablePartitionInfo &table_part_info, + static int choose_leader_replica(const ObCandiTabletLoc &part_loc_info, const bool can_use_remote, const ObAddr &local_addr, EstimatedPartition &best_partition); @@ -145,6 +144,15 @@ private: const uint64_t index_id, ObIArray &new_ranges); + static int storage_estimate_full_table_rowcount(ObOptimizerContext &ctx, + const ObCandiTabletLoc &part_loc_info, + ObTableMetaInfo &meta); + + static int estimate_full_table_rowcount_by_meta_table(ObOptimizerContext &ctx, + const ObIArray &all_tablet_ids, + const ObIArray &all_ls_ids, + ObTableMetaInfo &meta); + }; } diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index ed7b743a8b..9d647ae497 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -10722,15 +10722,14 @@ int ObJoinOrder::init_est_sel_info_for_access_path(const uint64_t table_id, } } - // 2. try to estimate the whole memtable + // 2. if the table row count is 0, we try refine it. if (OB_SUCC(ret) && table_meta_info_.table_row_count_ <= 0) { - if (origin_part_cnt > 1) { - // do nothing - } else if (OB_FAIL(ObAccessPathEstimation::estimate_full_table_rowcount( - OPT_CTX, *table_partition_info_, table_meta_info_))) { + if (OB_FAIL(ObAccessPathEstimation::estimate_full_table_rowcount(OPT_CTX, + *table_partition_info_, + table_meta_info_))) { LOG_WARN("failed to estimate full table rowcount", K(ret)); } else { - LOG_TRACE("total rowcount, mem-table without stats", K(table_meta_info_.table_row_count_)); + LOG_TRACE("succeed to estimate full table rowcount", K(table_meta_info_.table_row_count_)); } }