1333 lines
52 KiB
C++
1333 lines
52 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#define USING_LOG_PREFIX SQL_ENG
|
|
|
|
#include "ob_tenant_sql_memory_manager.h"
|
|
#include "share/rc/ob_tenant_base.h"
|
|
#include "share/inner_table/ob_inner_table_schema_constants.h"
|
|
#include "share/system_variable/ob_system_variable_alias.h"
|
|
#include "share/schema/ob_schema_getter_guard.h"
|
|
#include "observer/omt/ob_tenant_config_mgr.h"
|
|
#include "lib/alloc/alloc_func.h"
|
|
#include "lib/rc/ob_rc.h"
|
|
#include "observer/ob_server_struct.h"
|
|
#include "sql/engine/px/ob_px_util.h"
|
|
#include "share/cache/ob_kv_storecache.h"
|
|
|
|
namespace oceanbase {
|
|
|
|
using namespace lib;
|
|
using namespace omt;
|
|
using namespace share;
|
|
using namespace common;
|
|
using namespace share::schema;
|
|
using namespace oceanbase::observer;
|
|
|
|
namespace sql {
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
const int64_t ObSqlWorkAreaProfile::MIN_BOUND_SIZE[ObSqlWorkAreaType::MAX_TYPE] = {
|
|
9 * OB_MALLOC_MIDDLE_BLOCK_SIZE, // HASH
|
|
OB_MALLOC_MIDDLE_BLOCK_SIZE, // SORT
|
|
};
|
|
|
|
int64_t ObSqlWorkAreaProfile::get_dop()
|
|
{
|
|
return dop_;
|
|
}
|
|
|
|
uint64_t ObSqlWorkAreaProfile::get_plan_id()
|
|
{
|
|
return plan_id_;
|
|
}
|
|
|
|
uint64_t ObSqlWorkAreaProfile::get_exec_id()
|
|
{
|
|
return exec_id_;
|
|
}
|
|
|
|
const char* ObSqlWorkAreaProfile::get_sql_id()
|
|
{
|
|
return sql_id_;
|
|
}
|
|
|
|
uint64_t ObSqlWorkAreaProfile::get_session_id()
|
|
{
|
|
return session_id_;
|
|
}
|
|
|
|
int ObSqlWorkAreaProfile::set_exec_info(ObExecContext &exec_ctx)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
dop_ = ObPxSqcUtil::get_actual_worker_count(&exec_ctx);
|
|
plan_id_ = ObPxSqcUtil::get_plan_id(&exec_ctx);
|
|
exec_id_ = ObPxSqcUtil::get_exec_id(&exec_ctx);
|
|
session_id_ = ObPxSqcUtil::get_session_id(&exec_ctx);
|
|
ObPhysicalPlanCtx *plan_ctx = exec_ctx.get_physical_plan_ctx();
|
|
if (OB_NOT_NULL(plan_ctx) && OB_NOT_NULL(plan_ctx->get_phy_plan())) {
|
|
if (nullptr == plan_ctx->get_phy_plan()->get_sql_id()) {
|
|
sql_id_[0] = '\0';
|
|
} else {
|
|
memcpy(sql_id_, plan_ctx->get_phy_plan()->get_sql_id(), OB_MAX_SQL_ID_LENGTH);
|
|
sql_id_[OB_MAX_SQL_ID_LENGTH] = '\0';
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
int ObSqlWorkAreaIntervalStat::analyze_profile(
|
|
ObSqlWorkAreaProfile &profile,
|
|
int64_t cache_size,
|
|
const int64_t one_pass_size,
|
|
const int64_t max_size,
|
|
bool is_one_pass)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (is_one_pass) {
|
|
if (profile.is_sort_wa()) {
|
|
++total_one_pass_cnt_;
|
|
total_one_pass_size_ += cache_size;
|
|
}
|
|
} else {
|
|
if (max_size <= cache_size) {
|
|
cache_size = max_size;
|
|
profile.init(max_size, profile.get_chunk_size());
|
|
}
|
|
if (profile.is_hash_join_wa()) {
|
|
++total_hash_cnt_;
|
|
total_hash_size_ += cache_size;
|
|
} else if (profile.is_sort_wa()) {
|
|
++total_sort_cnt_;
|
|
total_sort_size_ += cache_size;
|
|
total_sort_one_pass_size_ += one_pass_size;
|
|
} else {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpect profile type", K(ret), K(profile.get_work_area_type()));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void ObSqlWorkAreaIntervalStat::reset()
|
|
{
|
|
total_hash_cnt_ = 0;
|
|
total_hash_size_ = 0;
|
|
total_sort_cnt_ = 0;
|
|
total_sort_size_ = 0;
|
|
total_sort_one_pass_size_ = 0;
|
|
total_one_pass_cnt_ = 0;
|
|
total_one_pass_size_ = 0;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
void ObSqlMemoryList::reset()
|
|
{
|
|
ObLockGuard<ObSpinLock> lock_guard(lock_);
|
|
DLIST_FOREACH_REMOVESAFE_NORET(profile, profile_list_) {
|
|
profile_list_.remove(profile);
|
|
profile->set_expect_size(OB_INVALID_ID);
|
|
}
|
|
profile_list_.reset();
|
|
}
|
|
|
|
int ObSqlMemoryList::register_work_area_profile(ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObLockGuard<ObSpinLock> lock_guard(lock_);
|
|
profile_list_.add_last(&profile);
|
|
return ret;
|
|
}
|
|
|
|
int ObSqlMemoryList::unregister_work_area_profile(ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObLockGuard<ObSpinLock> lock_guard(lock_);
|
|
profile_list_.remove(&profile);
|
|
return ret;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
int ObTenantSqlMemoryManager::ObSqlWorkAreaCalcInfo::init(
|
|
ObIAllocator &allocator,
|
|
ObSqlWorkAreaInterval *wa_intervals,
|
|
int64_t interval_cnt)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_ISNULL(wa_intervals)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: work interval is null", K(ret));
|
|
} else {
|
|
wa_intervals_ = reinterpret_cast<ObSqlWorkAreaInterval*>(allocator.alloc(
|
|
sizeof(ObSqlWorkAreaInterval) * interval_cnt));
|
|
if (nullptr == wa_intervals_) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
LOG_WARN("failed to alloc work area interval", K(ret), K(sizeof(ObSqlWorkAreaInterval) * interval_cnt));
|
|
} else {
|
|
for (int64_t i = 0; i < interval_cnt; ++i) {
|
|
void *buf = static_cast<void *>(&wa_intervals_[i]);
|
|
ObSqlWorkAreaInterval *wa_interval =
|
|
new (buf) ObSqlWorkAreaInterval(i, wa_intervals[i].get_interval_cache_size());
|
|
UNUSED(wa_interval);
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void ObTenantSqlMemoryManager::ObSqlWorkAreaCalcInfo::destroy(ObIAllocator &allocator)
|
|
{
|
|
if (OB_NOT_NULL(wa_intervals_)) {
|
|
allocator.free(wa_intervals_);
|
|
wa_intervals_ = nullptr;
|
|
}
|
|
}
|
|
|
|
// delta计算逻辑,前一个interval和后一个interval计算相差公式为
|
|
// suppose calculate the idx interval, and pre-interval is (idx + 1)
|
|
// delta = intervals_[idx+1].total_hash_sise
|
|
// - intervals_[idx].interval_cache_size
|
|
// * intervals_[idx+1].total_hash_cnt + no_cache_cnt * interval_size
|
|
// interval_size = intervals_[idx+1].interval_cache_size - intervals_[idx].interval_cache_size
|
|
// 因为跨了一个interval后,之前不能cache的,bound全部需要减去一个interval大小
|
|
// 可以理解为 hash:每次减少一个interval大小
|
|
// 而sort,开始是一次性减少到one_pass_size大小,再减少,则是以interval大小减少
|
|
int ObTenantSqlMemoryManager::ObSqlWorkAreaCalcInfo::calc_memory_target(
|
|
int64_t idx,
|
|
const int64_t pre_mem_target)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t dst_mem_target = pre_mem_target;
|
|
if (INTERVAL_NUM - 1 == idx) {
|
|
// 最后一个,独立计算
|
|
wa_intervals_[idx].set_mem_target(dst_mem_target);
|
|
} else {
|
|
ObSqlWorkAreaIntervalStat &pre_interval_stat = wa_intervals_[idx + 1].get_interval_stat();
|
|
// hash: bound size, 这里假设如果不能全部cache,则使用bound作为work area size
|
|
// 当bound小于cache size时,内存减少hash_size - bound_size
|
|
int64_t hash_delta = pre_interval_stat.get_total_hash_size()
|
|
- wa_intervals_[idx].get_interval_cache_size()
|
|
* pre_interval_stat.get_total_hash_cnt()
|
|
+ tmp_no_cache_cnt_ * (wa_intervals_[idx + 1].get_interval_cache_size()
|
|
- wa_intervals_[idx].get_interval_cache_size());
|
|
// sort: one pass size as work area size
|
|
// sort:两段:1)当bound小于sort_size时,内存减少sort_size - one_pass_size
|
|
// 2)当bound小于one_pass_size时,内存减少one_pass_size - bound_size
|
|
int64_t sort_delta =
|
|
pre_interval_stat.get_total_sort_size() - pre_interval_stat.get_total_sort_one_pass_size();
|
|
int64_t one_pass_delta = pre_interval_stat.get_total_one_pass_size()
|
|
- wa_intervals_[idx].get_interval_cache_size() * pre_interval_stat.get_total_one_pass_cnt();
|
|
dst_mem_target -= hash_delta;
|
|
dst_mem_target -= sort_delta;
|
|
dst_mem_target -= one_pass_delta;
|
|
if (0 > hash_delta || 0 > sort_delta || 0 > dst_mem_target) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected delta size", K(hash_delta), K(sort_delta), K(dst_mem_target), K(idx),
|
|
K(pre_mem_target), K(pre_interval_stat.get_total_hash_size()),
|
|
K(wa_intervals_[idx].get_interval_cache_size()), K(pre_interval_stat.get_total_hash_cnt()),
|
|
K(one_pass_delta), K(pre_interval_stat.get_total_one_pass_size()),
|
|
K(pre_interval_stat.get_total_one_pass_cnt()), K(tmp_no_cache_cnt_));
|
|
} else {
|
|
wa_intervals_[idx].set_mem_target(dst_mem_target);
|
|
}
|
|
if (hash_delta > 0 || sort_delta > 0 || one_pass_delta > 0) {
|
|
LOG_TRACE("trace memory target", K(hash_delta), K(sort_delta), K(dst_mem_target), K(idx),
|
|
K(pre_mem_target), K(pre_interval_stat.get_total_hash_size()),
|
|
K(wa_intervals_[idx].get_interval_cache_size()), K(pre_interval_stat.get_total_hash_cnt()),
|
|
K(one_pass_delta), K(pre_interval_stat.get_total_one_pass_size()),
|
|
K(pre_interval_stat.get_total_one_pass_cnt()), K(dst_mem_target), K(tmp_no_cache_cnt_));
|
|
}
|
|
// 统计点只有hash和one_pass_cnt
|
|
tmp_no_cache_cnt_ +=
|
|
(pre_interval_stat.get_total_hash_cnt() + pre_interval_stat.get_total_one_pass_cnt());
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::ObSqlWorkAreaCalcInfo::find_best_interval_index_by_mem_target(
|
|
int64_t &interval_idx, const int64_t expect_mem_target, const int64_t total_memory_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t pre_mem_target = total_memory_size;
|
|
int64_t delta = INT64_MAX;
|
|
interval_idx = -1;
|
|
for (int64_t i = INTERVAL_NUM - 1; i >= 0 && OB_SUCC(ret); --i) {
|
|
if (OB_FAIL(calc_memory_target(i, pre_mem_target))) {
|
|
LOG_WARN("failed to calculate memory target", K(i), K(pre_mem_target));
|
|
} else {
|
|
int64_t mem_target = wa_intervals_[i].get_mem_target();
|
|
if (mem_target <= expect_mem_target && expect_mem_target - mem_target <= delta) {
|
|
interval_idx = i;
|
|
delta = expect_mem_target - mem_target;
|
|
break;
|
|
}
|
|
pre_mem_target = mem_target;
|
|
}
|
|
}
|
|
mem_target_ = expect_mem_target;
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::ObSqlWorkAreaCalcInfo::calculate_global_bound_size(
|
|
const int64_t wa_max_memory_size,
|
|
const int64_t total_memory_size,
|
|
const int64_t profile_cnt,
|
|
const bool auto_calc)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t max_wa_size = wa_max_memory_size;
|
|
// int64_t max_wa_size = wa_max_memory_size;
|
|
// 最大占比6.25%(oracle 5%)
|
|
// 这里改为按照8个并发来设置
|
|
int64_t max_bound_size = (max_wa_size >> 3);
|
|
profile_cnt_ = profile_cnt;
|
|
int64_t avg_bound_size = (0 == profile_cnt_) ? max_bound_size : max_wa_size / profile_cnt_;
|
|
int64_t best_interval_idx = -1;
|
|
if (OB_FAIL(find_best_interval_index_by_mem_target(
|
|
best_interval_idx, max_wa_size, total_memory_size))) {
|
|
LOG_WARN("failed to find best interval index", K(ret), K(best_interval_idx), K(max_wa_size),
|
|
K(total_memory_size));
|
|
} else {
|
|
int64_t calc_global_bound_size = 0;
|
|
if (-1 == best_interval_idx) {
|
|
global_bound_size_ = LESS_THAN_100M_INTERVAL_SIZE;
|
|
} else {
|
|
calc_global_bound_size = wa_intervals_[best_interval_idx].get_interval_cache_size();
|
|
global_bound_size_ = calc_global_bound_size;
|
|
// ???这里是否有问题
|
|
// if (global_bound_size_ < avg_bound_size) {
|
|
// global_bound_size_ = avg_bound_size;
|
|
// }
|
|
}
|
|
//一般是由于可能全in-memory了,导致查找到返回的idx是最后一个,所以按照一个的最大占比使用
|
|
if (global_bound_size_ > max_bound_size) {
|
|
global_bound_size_ = max_bound_size;
|
|
}
|
|
if (global_bound_size_ < min_bound_size_) {
|
|
global_bound_size_ = min_bound_size_;
|
|
}
|
|
if (auto_calc) {
|
|
LOG_INFO("timer to calc global bound size", K(ret), K(best_interval_idx),
|
|
K(global_bound_size_), K(calc_global_bound_size), K(mem_target_), K(wa_max_memory_size),
|
|
K(profile_cnt_), K(total_memory_size), K(max_wa_size), K(avg_bound_size), K(max_bound_size),
|
|
K(min_bound_size_));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////
|
|
int ObTenantSqlMemoryManager::mtl_init(ObTenantSqlMemoryManager *&sql_mem_mgr)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
uint64_t tenant_id = MTL_ID();
|
|
sql_mem_mgr = nullptr;
|
|
// 系统租户不创建
|
|
if (OB_MAX_RESERVED_TENANT_ID < tenant_id) {
|
|
sql_mem_mgr = OB_NEW(ObTenantSqlMemoryManager,
|
|
ObMemAttr(tenant_id, "SqlMemMgr"), tenant_id);
|
|
if (nullptr == sql_mem_mgr) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
LOG_WARN("failed to alloc tenant sql memory manager", K(ret));
|
|
} else if (OB_FAIL(sql_mem_mgr->allocator_.init(
|
|
lib::ObMallocAllocator::get_instance(),
|
|
OB_MALLOC_NORMAL_BLOCK_SIZE,
|
|
ObMemAttr(tenant_id, "SqlMemMgr")))) {
|
|
LOG_WARN("failed to init fifo allocator", K(ret));
|
|
} else {
|
|
int64_t work_area_interval_size = sizeof(ObSqlWorkAreaInterval) * INTERVAL_NUM;
|
|
sql_mem_mgr->wa_intervals_ = reinterpret_cast<ObSqlWorkAreaInterval*>(
|
|
sql_mem_mgr->allocator_.alloc(work_area_interval_size));
|
|
sql_mem_mgr->profile_lists_ = reinterpret_cast<ObSqlMemoryList*>(
|
|
sql_mem_mgr->allocator_.alloc(sizeof(ObSqlMemoryList) * HASH_CNT));
|
|
if (nullptr == sql_mem_mgr->wa_intervals_) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
LOG_WARN("failed to alloc work area interval", K(ret));
|
|
} else if (nullptr == sql_mem_mgr->profile_lists_) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
LOG_WARN("failed to alloc profile list", K(ret));
|
|
} else {
|
|
sql_mem_mgr->tenant_id_ = tenant_id;
|
|
// 1M
|
|
int64_t total_size = 0;
|
|
int64_t pre_total_size = total_size;
|
|
for (int64_t i = 0; i < INTERVAL_NUM && OB_SUCC(ret); ++i) {
|
|
if (i < LESS_THAN_100M_CNT) {
|
|
// 1M
|
|
total_size += LESS_THAN_100M_INTERVAL_SIZE;
|
|
} else if (i < LESS_THAN_500M_CNT) {
|
|
// 2M
|
|
total_size += LESS_THAN_500M_INTERVAL_SIZE;
|
|
} else if (i < LESS_THAN_1G_CNT) {
|
|
// 5M
|
|
total_size += LESS_THAN_1G_INTERVAL_SIZE;
|
|
} else if (i < LESS_THAN_5G_CNT) {
|
|
// 10M
|
|
total_size += LESS_THAN_5G_INTERVAL_SIZE;
|
|
} else if (i < LESS_THAN_10G_CNT) {
|
|
// 50M
|
|
total_size += LESS_THAN_10G_INTERVAL_SIZE;
|
|
} else if (i < LESS_THAN_100G_CNT) {
|
|
// 900M
|
|
total_size += LESS_THAN_100G_INTERVAL_SIZE;
|
|
} else if (i < LESS_THAN_1T_CNT) {
|
|
// 9000M
|
|
total_size += LESS_THAN_1T_INTERVAL_SIZE;
|
|
}
|
|
void *buf = static_cast<void *>(&sql_mem_mgr->wa_intervals_[i]);
|
|
ObSqlWorkAreaInterval *wa_interval = new (buf) ObSqlWorkAreaInterval(i, total_size);
|
|
ObWorkareaHistogram workarea_hist(pre_total_size, total_size);
|
|
if (OB_FAIL(sql_mem_mgr->workarea_histograms_.push_back(workarea_hist))) {
|
|
LOG_WARN("failed to push back workarea histogram", K(ret), K(i));
|
|
}
|
|
UNUSED(wa_interval);
|
|
pre_total_size = total_size;
|
|
}
|
|
if (MAX_INTERVAL_SIZE != total_size) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_ERROR("unexpect size", K(total_size));
|
|
} else {
|
|
sql_mem_mgr->min_bound_size_ = MIN_GLOBAL_BOUND_SIZE;
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
char *buf = reinterpret_cast<char*>(sql_mem_mgr->profile_lists_);
|
|
for (int64_t i = 0; i < HASH_CNT; ++i) {
|
|
ObSqlMemoryList *list = new (buf) ObSqlMemoryList(i);
|
|
list->get_profile_list().reset();
|
|
buf += sizeof(ObSqlMemoryList);
|
|
}
|
|
}
|
|
if (OB_SUCC(ret)) {
|
|
if (OB_FAIL(sql_mem_mgr->wa_ht_.create(MAX_WORKAREA_STAT_CNT,
|
|
"SqlMemMgr",
|
|
"SqlMemMgr",
|
|
tenant_id))) {
|
|
LOG_WARN("failed to create hashmap", K(ret));
|
|
} else if (OB_FAIL(sql_mem_mgr->workarea_stats_.prepare_allocate(
|
|
MAX_WORKAREA_STAT_CNT))) {
|
|
LOG_WARN("failed to prepare element", K(ret));
|
|
} else {
|
|
for (int64_t i = 0; i < MAX_WORKAREA_STAT_CNT; ++i) {
|
|
ObSqlWorkAreaStat &wa_stat = sql_mem_mgr->workarea_stats_.at(i);
|
|
wa_stat.set_seqno(i);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
if (nullptr != sql_mem_mgr) {
|
|
if (nullptr != sql_mem_mgr->wa_intervals_) {
|
|
sql_mem_mgr->allocator_.free(sql_mem_mgr->wa_intervals_);
|
|
sql_mem_mgr->wa_intervals_ = nullptr;
|
|
}
|
|
if (nullptr != sql_mem_mgr->profile_lists_) {
|
|
sql_mem_mgr->allocator_.free(sql_mem_mgr->profile_lists_);
|
|
sql_mem_mgr->profile_lists_ = nullptr;
|
|
}
|
|
sql_mem_mgr->wa_ht_.destroy();
|
|
sql_mem_mgr->workarea_stats_.reset();
|
|
sql_mem_mgr->workarea_histograms_.reset();
|
|
}
|
|
}
|
|
LOG_INFO("init sql memory manager", K(work_area_interval_size), K(tenant_id), K(ret));
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
if (nullptr != sql_mem_mgr) {
|
|
sql_mem_mgr->allocator_.reset();
|
|
common::ob_delete(sql_mem_mgr);
|
|
}
|
|
sql_mem_mgr = nullptr;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void ObTenantSqlMemoryManager::mtl_destroy(ObTenantSqlMemoryManager *&sql_mem_mgr)
|
|
{
|
|
if (nullptr != sql_mem_mgr) {
|
|
if (nullptr != sql_mem_mgr->wa_intervals_) {
|
|
sql_mem_mgr->allocator_.free(sql_mem_mgr->wa_intervals_);
|
|
sql_mem_mgr->wa_intervals_ = nullptr;
|
|
}
|
|
if (nullptr != sql_mem_mgr->profile_lists_) {
|
|
sql_mem_mgr->allocator_.free(sql_mem_mgr->profile_lists_);
|
|
sql_mem_mgr->profile_lists_ = nullptr;
|
|
}
|
|
sql_mem_mgr->wa_ht_.destroy();
|
|
sql_mem_mgr->workarea_stats_.reset();
|
|
sql_mem_mgr->workarea_histograms_.reset();
|
|
sql_mem_mgr->allocator_.reset();
|
|
common::ob_delete(sql_mem_mgr);
|
|
}
|
|
sql_mem_mgr = nullptr;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::calc_work_area_size_by_profile(
|
|
int64_t global_bound_size,
|
|
ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (profile.is_hash_join_wa()) {
|
|
if (global_bound_size >= profile.get_cache_size()) {
|
|
// in-memory
|
|
profile.set_expect_size(profile.get_cache_size());
|
|
} else if (global_bound_size >= profile.get_one_pass_size()) {
|
|
profile.set_expect_size(global_bound_size);
|
|
} else if (global_bound_size < profile.get_min_size()) {
|
|
// 8个分区+1个page size
|
|
profile.set_expect_size(profile.get_min_size());
|
|
} else {
|
|
profile.set_expect_size(global_bound_size);
|
|
}
|
|
} else if (profile.is_sort_wa()) {
|
|
if (global_bound_size > profile.get_cache_size()) {
|
|
// in-memory
|
|
profile.set_expect_size(profile.get_cache_size());
|
|
} else if (global_bound_size > profile.get_one_pass_size()) {
|
|
// sort在one-pass情况下,增加内存对性能没有影响
|
|
profile.set_expect_size(profile.get_one_pass_size());
|
|
} else if (global_bound_size < profile.get_min_size()) {
|
|
profile.set_expect_size(profile.get_min_size());
|
|
} else {
|
|
profile.set_expect_size(global_bound_size);
|
|
}
|
|
} else {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpect profile type", K(profile.get_work_area_type()));
|
|
}
|
|
profile.set_global_bound_size(global_bound_size);
|
|
profile.set_max_bound(min(global_bound_size, profile.get_cache_size()));
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::get_work_area_size(
|
|
ObIAllocator *allocator,
|
|
ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (!profile.is_registered()) {
|
|
// disable sql memory manager
|
|
} else if (enable_auto_memory_mgr_) {
|
|
increase(profile.get_cache_size());
|
|
LOG_TRACE("trace drift size", K(drift_size_), K(global_bound_size_));
|
|
if (need_manual_calc_bound()) {
|
|
++manual_calc_cnt_;
|
|
if (OB_FAIL(calculate_global_bound_size(allocator, false))) {
|
|
LOG_WARN("failed to calculate global bound size", K(global_bound_size_));
|
|
} else {
|
|
profile.inc_calc_count();
|
|
LOG_TRACE("trace manual calc global bound size", K(global_bound_size_),
|
|
K(profile.get_one_pass_size()), K(drift_size_), K(mem_target_));
|
|
}
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else if (OB_FAIL(calc_work_area_size_by_profile(get_global_bound_size(), profile))) {
|
|
LOG_WARN("failed to calculate worka area size by profile", K(ret), K(profile));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// 注册策略:满足不是小查询,即auto_sql_memory_manager is true
|
|
// profile目前存在三种状态
|
|
// status dynamic-perf-view auto policy
|
|
// register + auto 统计到性能视图 内存动态调整
|
|
// register + manual 统计到性能视图 内存取决于xxx_area_size
|
|
// unregister 不统计到性能视图 内存取决于xxx_area_size
|
|
//
|
|
// is_registered: register | unregister 只会影响是否注册,同时只有注册了才能将profile写入性能视图
|
|
// auto_policy : auto|manual: 会影响内存使用策略
|
|
// 所以是否调用自动的内存调整,使用get_auto_policy来判断
|
|
// 当profile注册后,才能统计性能视图等
|
|
int ObTenantSqlMemoryManager::register_work_area_profile(ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (!profile.is_registered()) {
|
|
if (OB_NOT_NULL(profile.get_prev())) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: next is null, but prev is not null", K(ret));
|
|
} else if (!ObSqlWorkAreaProfile::auto_sql_memory_manager(profile)) {
|
|
// data is small, don't use auto memory manager
|
|
} else {
|
|
int64_t hash_val = get_hash_value(profile.get_id());
|
|
if (hash_val < 0 || hash_val >= HASH_CNT) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpect hash val", K(hash_val), K(profile));
|
|
} else if (OB_FAIL(profile_lists_[hash_val].register_work_area_profile(profile))) {
|
|
LOG_WARN("failed to register work area profile", K(hash_val), K(profile));
|
|
} else {
|
|
profile.active_time_ = ObTimeUtility::current_time();
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::update_work_area_profile(
|
|
common::ObIAllocator *allocator,
|
|
ObSqlWorkAreaProfile &profile,
|
|
const int64_t delta_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
UNUSED(profile);
|
|
if (enable_auto_memory_mgr_ && profile.get_auto_policy()) {
|
|
// delta_size maybe negative integer
|
|
(ATOMIC_AAF(&drift_size_, delta_size));
|
|
if (need_manual_by_drift()) {
|
|
int64_t pre_drift_size = drift_size_;
|
|
++manual_calc_cnt_;
|
|
if (OB_ISNULL(allocator)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("allocator is null", K(lbt()));
|
|
} else if (OB_FAIL(calculate_global_bound_size(allocator, false))) {
|
|
LOG_WARN("failed to calculate global bound size", K(global_bound_size_));
|
|
} else {
|
|
profile.inc_calc_count();
|
|
LOG_TRACE("trace manual calc global bound size by drift", K(global_bound_size_),
|
|
K(profile.get_one_pass_size()), K(drift_size_), K(mem_target_), K(pre_drift_size));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// 这里暂时对并发场景的写last record不进行并发控制
|
|
int ObTenantSqlMemoryManager::fill_workarea_stat(
|
|
ObSqlWorkAreaStat &wa_stat,
|
|
ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (profile.get_operator_type() != wa_stat.get_op_type()) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: operator type is not match", K(profile.get_operator_type()),
|
|
K(wa_stat.get_op_type()));
|
|
} else {
|
|
wa_stat.est_cache_size_ = profile.get_cache_size();
|
|
wa_stat.est_one_pass_size_ = profile.get_one_pass_size();
|
|
wa_stat.last_memory_used_ = profile.get_max_mem_used();
|
|
wa_stat.last_execution_ = profile.get_number_pass();
|
|
wa_stat.last_degree_ = profile.get_dop();
|
|
int64_t num_executions = profile.get_number_pass();
|
|
if (0 == num_executions) {
|
|
wa_stat.increase_optimal_executions();
|
|
} else if (1 == num_executions) {
|
|
wa_stat.increase_onepass_executions();
|
|
} else {
|
|
wa_stat.increase_multipass_executions();
|
|
}
|
|
int64_t active_avg_time = wa_stat.get_total_executions() * wa_stat.get_active_avg_time();
|
|
wa_stat.increase_total_executions();
|
|
wa_stat.active_avg_time_ =
|
|
(active_avg_time +
|
|
(ObTimeUtility::current_time() - profile.get_active_time())) / wa_stat.get_total_executions();
|
|
wa_stat.last_temp_size_ = profile.get_dumped_size();
|
|
if (wa_stat.max_temp_size_ < wa_stat.last_temp_size_) {
|
|
wa_stat.max_temp_size_ = wa_stat.last_temp_size_;
|
|
}
|
|
wa_stat.is_auto_policy_ = profile.get_auto_policy();
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::try_fill_workarea_stat(
|
|
ObSqlWorkAreaStat::WorkareaKey &workarea_key,
|
|
ObSqlWorkAreaProfile &profile,
|
|
bool &need_insert)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
need_insert = false;
|
|
ObLatchRGuard guard(lock_, ObLatchIds::SQL_WA_STAT_MAP_LOCK);
|
|
ObSqlWorkAreaStat *wa_stat = nullptr;
|
|
if (OB_FAIL(wa_ht_.get_refactored(workarea_key, wa_stat))) {
|
|
if (OB_HASH_NOT_EXIST == ret) {
|
|
need_insert = true;
|
|
ret = OB_SUCCESS;
|
|
} else {
|
|
LOG_WARN("failed to get stat", K(ret));
|
|
}
|
|
} else if (OB_ISNULL(wa_stat)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: wa stat is null", K(ret), K(profile));
|
|
} else {
|
|
int64_t seqno = wa_stat->get_seqno();
|
|
if (seqno < 0 || seqno >= MAX_WORKAREA_STAT_CNT) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: seqno is invalid", K(ret), K(profile), K(seqno),
|
|
K(*wa_stat));
|
|
} else {
|
|
ObSqlWorkAreaStat &tmp_wa_stat = workarea_stats_.at(seqno);
|
|
if (&tmp_wa_stat != wa_stat) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: wa stat is not match", K(ret), K(profile), K(seqno));
|
|
} else if (OB_FAIL(fill_workarea_stat(tmp_wa_stat, profile))) {
|
|
LOG_WARN("failed to fill workarea stat", K(ret));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// write lock and create new stat
|
|
int ObTenantSqlMemoryManager::new_and_fill_workarea_stat(
|
|
ObSqlWorkAreaStat::WorkareaKey &workarea_key,
|
|
ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObSqlWorkAreaStat *wa_stat = nullptr;
|
|
ObLatchWGuard guard(lock_, ObLatchIds::SQL_WA_STAT_MAP_LOCK);
|
|
if (OB_FAIL(wa_ht_.get_refactored(workarea_key, wa_stat))) {
|
|
}
|
|
if (OB_HASH_NOT_EXIST == ret) {
|
|
ret = OB_SUCCESS;
|
|
if (is_wa_full()) {
|
|
// eliminate one
|
|
if (wa_start_ != wa_end_) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: wa is full, but start and end position is not match",
|
|
K(wa_start_), K(wa_end_), K(wa_cnt_));
|
|
} else {
|
|
ObSqlWorkAreaStat *tmp_wa_stat = nullptr;
|
|
wa_start_ = (wa_start_ + 1) % MAX_WORKAREA_STAT_CNT;
|
|
wa_stat = &workarea_stats_.at(wa_end_);
|
|
if (OB_FAIL(wa_ht_.erase_refactored(wa_stat->get_workarea_key(), &tmp_wa_stat))) {
|
|
LOG_WARN("failed to erase workarea stat", K(ret), K(wa_stat->get_workarea_key()));
|
|
} else if (wa_stat != tmp_wa_stat) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: wa stat is not match", K(ret));
|
|
} else {
|
|
wa_stat->reset();
|
|
--wa_cnt_;
|
|
}
|
|
}
|
|
} else {
|
|
wa_stat = &workarea_stats_.at(wa_end_);
|
|
wa_stat->reset();
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else {
|
|
// new item: key + operator type
|
|
wa_stat->workarea_key_.set_sql_id(profile.get_sql_id());
|
|
wa_stat->workarea_key_.set_plan_id(profile.get_plan_id());
|
|
wa_stat->workarea_key_.set_operator_id(profile.get_operator_id());
|
|
wa_stat->op_type_ = profile.get_operator_type();
|
|
if (OB_FAIL(fill_workarea_stat(*wa_stat, profile))) {
|
|
LOG_WARN("failed to fill workarea stat", K(ret));
|
|
} else if (OB_FAIL(wa_ht_.set_refactored(workarea_key, wa_stat))) {
|
|
LOG_WARN("failed to set refactored", K(ret));
|
|
} else {
|
|
wa_end_ = (wa_end_ + 1) % MAX_WORKAREA_STAT_CNT;
|
|
++wa_cnt_;
|
|
LOG_TRACE("new workarea stat:", K(wa_stat->workarea_key_), K(workarea_key),
|
|
K(wa_stat->workarea_key_ == workarea_key), K(wa_stat->seqno_),
|
|
K(profile), K(wa_cnt_), K(wa_start_), K(wa_end_));
|
|
}
|
|
}
|
|
} else if (OB_SUCC(ret)) {
|
|
if (OB_ISNULL(wa_stat)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: wa_stat is null", K(ret));
|
|
} else if (OB_FAIL(fill_workarea_stat(*wa_stat, profile))) {
|
|
LOG_WARN("failed to fill workarea stat", K(ret));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::collect_workarea_stat(ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (profile.has_exec_ctx()) {
|
|
ObSqlWorkAreaStat::WorkareaKey workarea_key(
|
|
profile.get_plan_id(),
|
|
profile.get_operator_id());
|
|
workarea_key.set_sql_id(profile.get_sql_id());
|
|
bool need_insert = false;
|
|
if (OB_FAIL(try_fill_workarea_stat(workarea_key, profile, need_insert))) {
|
|
LOG_WARN("failed to try fill workarea stat", K(ret));
|
|
} else if (need_insert && OB_FAIL(new_and_fill_workarea_stat(workarea_key, profile))) {
|
|
LOG_WARN("failed to create new and fill workarea start", K(ret));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::fill_workarea_histogram(ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t idx = INT64_MAX;
|
|
int64_t size = INT64_MAX;
|
|
int64_t max_mem_used = profile.get_mem_used();
|
|
if (OB_FAIL(find_interval_index(max_mem_used, idx, size))) {
|
|
LOG_WARN("failed to find interval index", K(ret));
|
|
} else if (INT64_MAX == idx || INT64_MAX == size) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: can't found any interval", K(idx), K(size), K(profile));
|
|
} else {
|
|
ObWorkareaHistogram &hist = workarea_histograms_.at(idx);
|
|
if (max_mem_used < hist.get_low_optimal_size()
|
|
|| max_mem_used > hist.get_high_optimal_size()) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: find interval error", K(ret),
|
|
"mem used", max_mem_used,
|
|
"low bound", hist.get_low_optimal_size(),
|
|
"high bound", hist.get_high_optimal_size());
|
|
} else {
|
|
if (0 == profile.get_number_pass()) {
|
|
hist.increase_optimal_executions();
|
|
} else if (1 == profile.get_number_pass()) {
|
|
hist.increase_onepass_executions();
|
|
} else if (1 < profile.get_number_pass()) {
|
|
hist.increase_multipass_executions();
|
|
}
|
|
hist.increase_total_executions();
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::unregister_work_area_profile(ObSqlWorkAreaProfile &profile)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
lib::ObMutexGuard guard(mutex_);
|
|
if (profile.is_registered()) {
|
|
int64_t hash_val = get_hash_value(profile.get_id());
|
|
if (hash_val < 0 || hash_val >= HASH_CNT) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpect hash val", K(hash_val), K(profile));
|
|
} else if (OB_FAIL(profile_lists_[hash_val].unregister_work_area_profile(profile))) {
|
|
LOG_WARN("failed to register work area profile", K(hash_val), K(profile));
|
|
} else {
|
|
if (enable_auto_memory_mgr_ && profile.get_auto_policy()) {
|
|
decrease(profile.get_cache_size());
|
|
}
|
|
if (!profile.need_profiled()) {
|
|
} else if (OB_FAIL(collect_workarea_stat(profile))) {
|
|
LOG_WARN("failed to fill workarea stat", K(ret));
|
|
} else if (OB_FAIL(fill_workarea_histogram(profile))) {
|
|
LOG_WARN("failed to fill workarea histogram", K(ret));
|
|
}
|
|
LOG_TRACE("unregister workarea profile", K(profile), K(ret));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::get_max_work_area_size(
|
|
int64_t &max_wa_memory_size, const bool auto_calc)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObSchemaGetterGuard schema_guard;
|
|
const ObSysVarSchema *var_schema = NULL;
|
|
ObObj value;
|
|
int64_t pctg = 0;
|
|
max_wa_memory_size = 0;
|
|
if (OB_ISNULL(GCTX.schema_service_)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("schema service is null");
|
|
} else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard(tenant_id_, schema_guard))) {
|
|
LOG_WARN("get schema guard failed", K(ret));
|
|
} else if (OB_FAIL(schema_guard.get_tenant_system_variable(
|
|
tenant_id_, SYS_VAR_OB_SQL_WORK_AREA_PERCENTAGE, var_schema))) {
|
|
LOG_WARN("get tenant system variable failed", K(ret), K(tenant_id_));
|
|
} else if (OB_ISNULL(var_schema)) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("var_schema is null");
|
|
} else if (OB_FAIL(var_schema->get_value(NULL, NULL, value))) {
|
|
LOG_WARN("get value from var_schema failed", K(ret), K(*var_schema));
|
|
} else if (OB_FAIL(value.get_int(pctg))) {
|
|
LOG_WARN("get int from value failed", K(ret), K(value));
|
|
} else {
|
|
int64_t tenant_max_memory_limit = get_tenant_memory_limit(tenant_id_);
|
|
int64_t tenant_memory_hold = get_tenant_memory_hold(tenant_id_);
|
|
int64_t tenant_work_area_max_size = tenant_max_memory_limit * pctg / 100;
|
|
int64_t tenant_work_area_memory_hold =
|
|
get_tenant_memory_hold(tenant_id_, common::ObCtxIds::WORK_AREA);
|
|
int64_t max_tenant_memory_size = tenant_max_memory_limit - tenant_memory_hold;
|
|
int64_t max_workarea_memory_size = tenant_work_area_max_size - tenant_work_area_memory_hold;
|
|
int64_t washable_size = -2;
|
|
int wash_ratio = 6; // valid value: [0-6]
|
|
if (max_workarea_memory_size > 0 &&
|
|
max_tenant_memory_size > 0 &&
|
|
max_workarea_memory_size > max_tenant_memory_size) {
|
|
int tmp_ret = EVENT_CALL(EventTable::EN_AMM_WASH_RATIO);
|
|
if (0 != tmp_ret) {
|
|
wash_ratio = -tmp_ret;
|
|
}
|
|
if (0 <= wash_ratio && wash_ratio <=6 && auto_calc) {
|
|
if (OB_FAIL(ObKVGlobalCache::get_instance().get_washable_size(tenant_id_, washable_size, wash_ratio))) {
|
|
LOG_WARN("failed to get washable memory size", K(ret));
|
|
} else {
|
|
max_tenant_memory_size += washable_size;
|
|
ATOMIC_SET(&max_tenant_memory_size_, max_tenant_memory_size);
|
|
}
|
|
// if failed to get washable size, then reset OB_SUCCESS and just use little memory
|
|
ret = OB_SUCCESS;
|
|
} else {
|
|
int64_t tmp_max_tenant_memory_size = ATOMIC_LOAD(&max_tenant_memory_size_);
|
|
if (0 != tmp_max_tenant_memory_size && 0 <= wash_ratio && wash_ratio <=6) {
|
|
// use the value that background thread calculate
|
|
max_tenant_memory_size += tmp_max_tenant_memory_size;
|
|
} else {
|
|
ObTenantResourceMgrHandle resource_handle;
|
|
if (OB_FAIL(ObResourceMgr::get_instance().get_tenant_resource_mgr(
|
|
tenant_id_, resource_handle))) {
|
|
ret = OB_SUCCESS;
|
|
} else {
|
|
// TODO: kvcache大概可以淘汰多少内存,目前没有数据,后续寒晖他们会提供接口
|
|
// bug34818894
|
|
// 这里暂时写一个默认比例
|
|
max_tenant_memory_size += resource_handle.get_memory_mgr()->get_cache_hold() * pctg / 100;
|
|
washable_size = -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// 取租户最大可用内存和ctx最大可用内存的最小值
|
|
int64_t remain_memory_size = max_tenant_memory_size > 0
|
|
? min(max_workarea_memory_size, max_tenant_memory_size)
|
|
: max_tenant_memory_size;
|
|
int64_t total_alloc_size = sql_mem_callback_.get_total_alloc_size();
|
|
double ratio = total_alloc_size * 1.0 / tenant_work_area_memory_hold;
|
|
// 1 - x^3函数,表示随着hold内存越多,可用内存越少,同时alloc越多,可用内存越少
|
|
// 反之,hold越少,可用内存越多,alloc越少,可用内存又会越多
|
|
// 这里采用平方主要是为了内存增长和减少都比较平滑
|
|
// so: formula
|
|
// hold_ratio = hold / max_size;
|
|
// tmp_max_wa = (1 - hold_ratio * hold_ratio * hold_ratio) * (max - hold) + alloc
|
|
// alloc_ratio = alloc / tmp_max_wa
|
|
// max_wa = tmp_max_wa * (1 - alloc_ratio * alloc_ratio * alloc_ratio)
|
|
int64_t pre_mem_target = mem_target_;
|
|
double hold_ratio = 1. * tenant_work_area_memory_hold / tenant_work_area_max_size;
|
|
int64_t tmp_max_wa_memory_size = (remain_memory_size > 0)
|
|
? remain_memory_size + total_alloc_size
|
|
: total_alloc_size;
|
|
double alloc_ratio = total_alloc_size * 1.0 / tmp_max_wa_memory_size;
|
|
// if (total_alloc_size >= tmp_max_wa_memory_size) {
|
|
// // 这里用最近N次的结果来拟合可能比较好,但由于global bound 决定后,内存使用有延迟,比较难决定他们之间的关系
|
|
// max_wa_memory_size = (tmp_max_wa_memory_size >> 1);
|
|
// } else
|
|
{
|
|
// only use formula (1 - ratio ^ 3)
|
|
max_wa_memory_size = tmp_max_wa_memory_size * (1 - alloc_ratio * alloc_ratio * alloc_ratio);
|
|
}
|
|
max_workarea_size_ = tenant_work_area_max_size;
|
|
workarea_hold_size_ = tenant_work_area_memory_hold;
|
|
max_auto_workarea_size_ = max_wa_memory_size;
|
|
if (0 > max_wa_memory_size) {
|
|
max_wa_memory_size = 0;
|
|
LOG_INFO("max work area is 0", K(tenant_max_memory_limit), K(total_alloc_size),
|
|
K(tenant_work_area_memory_hold), K(tenant_work_area_max_size));
|
|
}
|
|
if (auto_calc) {
|
|
LOG_INFO("trace max work area", K(auto_calc), K(tenant_max_memory_limit), K(total_alloc_size),
|
|
K(tenant_work_area_memory_hold), K(tenant_work_area_max_size), K(max_wa_memory_size),
|
|
K(tmp_max_wa_memory_size), K(pre_mem_target), K(remain_memory_size), K(ratio),
|
|
K(alloc_ratio), K(hold_ratio), K(tenant_memory_hold), K(washable_size),
|
|
K(max_workarea_memory_size), K(max_tenant_memory_size), K(wash_ratio), K_(tenant_id));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// total size需要保持一致,在一次处理过程中,需要统一,如果在计算过程中
|
|
// 可能被修改,会导致find的interval index和cache size不一致
|
|
int ObTenantSqlMemoryManager::find_interval_index(
|
|
const int64_t cache_size,
|
|
int64_t &idx,
|
|
int64_t &out_cache_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
bool found = false;
|
|
idx = -1;
|
|
int64_t total_size = cache_size;
|
|
out_cache_size = cache_size;
|
|
if (cache_size <= wa_intervals_[LESS_THAN_100M_CNT - 1].get_interval_cache_size()) {
|
|
idx = (total_size - 1) / LESS_THAN_100M_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
idx = LESS_THAN_100M_CNT;
|
|
total_size -= LESS_THAN_100M_INTERVAL_SIZE * LESS_THAN_100M_CNT;
|
|
if (cache_size <= wa_intervals_[LESS_THAN_500M_CNT - 1].get_interval_cache_size()) {
|
|
idx += (total_size - 1) / LESS_THAN_500M_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
idx = LESS_THAN_500M_CNT;
|
|
total_size -= LESS_THAN_500M_INTERVAL_SIZE * (LESS_THAN_500M_CNT - LESS_THAN_100M_CNT);
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
} else if (cache_size <= wa_intervals_[LESS_THAN_1G_CNT - 1].get_interval_cache_size()) {
|
|
idx += (total_size - 1) / LESS_THAN_1G_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
idx = LESS_THAN_1G_CNT;
|
|
total_size -= LESS_THAN_1G_INTERVAL_SIZE * (LESS_THAN_1G_CNT - LESS_THAN_500M_CNT);
|
|
if (cache_size <= wa_intervals_[LESS_THAN_5G_CNT - 1].get_interval_cache_size()) {
|
|
idx += (total_size - 1) / LESS_THAN_5G_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
idx = LESS_THAN_5G_CNT;
|
|
total_size -= LESS_THAN_5G_INTERVAL_SIZE * (LESS_THAN_5G_CNT - LESS_THAN_1G_CNT);
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
} else if (cache_size <= wa_intervals_[LESS_THAN_10G_CNT - 1].get_interval_cache_size()) {
|
|
idx += (total_size - 1) / LESS_THAN_10G_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
idx = LESS_THAN_10G_CNT;
|
|
total_size -= LESS_THAN_10G_INTERVAL_SIZE * (LESS_THAN_10G_CNT - LESS_THAN_5G_CNT);
|
|
if (cache_size <= wa_intervals_[LESS_THAN_100G_CNT - 1].get_interval_cache_size()) {
|
|
idx += (total_size - 1) / LESS_THAN_100G_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
idx = LESS_THAN_100G_CNT;
|
|
total_size -= LESS_THAN_100G_INTERVAL_SIZE * (LESS_THAN_100G_CNT - LESS_THAN_10G_CNT);
|
|
if (cache_size <= wa_intervals_[LESS_THAN_1T_CNT - 1].get_interval_cache_size()) {
|
|
idx += (total_size - 1) / LESS_THAN_1T_INTERVAL_SIZE;
|
|
found = true;
|
|
} else {
|
|
found = true;
|
|
idx = INTERVAL_NUM - 1;
|
|
LOG_TRACE("too big size", K(cache_size));
|
|
}
|
|
}
|
|
}
|
|
// check
|
|
if (found) {
|
|
if (cache_size < 0) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status: cache size is less than 0", K(idx), K(cache_size), K(ret));
|
|
} else if (INTERVAL_NUM - 1 != idx) {
|
|
if (0 == idx) {
|
|
if (cache_size > wa_intervals_[idx].get_interval_cache_size()) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("failed to find interval index", K(idx), K(cache_size),
|
|
K(wa_intervals_[idx].get_interval_cache_size()));
|
|
}
|
|
} else {
|
|
if (cache_size <= wa_intervals_[idx - 1].get_interval_cache_size()
|
|
|| cache_size > wa_intervals_[idx].get_interval_cache_size()) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("failed to find interval index", K(idx), K(cache_size),
|
|
K(wa_intervals_[idx].get_interval_cache_size()));
|
|
}
|
|
}
|
|
} else {
|
|
if (cache_size <= wa_intervals_[idx - 1].get_interval_cache_size()) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("failed to find interval index", K(idx), K(cache_size),
|
|
K(wa_intervals_[idx - 1].get_interval_cache_size()));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// sum memory size of all profiles
|
|
int ObTenantSqlMemoryManager::count_profile_into_work_area_intervals(
|
|
ObSqlWorkAreaInterval *wa_intervals,
|
|
int64_t &total_memory_size,
|
|
int64_t &cur_profile_cnt)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t interval_idx = -1;
|
|
int64_t one_pass_idx = -1;
|
|
int64_t cache_size = 0;
|
|
int64_t one_pass_size = 0;
|
|
total_memory_size = 0;
|
|
cur_profile_cnt = 0;
|
|
|
|
// count interval stat from all profiles
|
|
if (nullptr == profile_lists_) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("profile list is null", K(ret));
|
|
} else {
|
|
for (int64_t i = 0; i < HASH_CNT && OB_SUCC(ret); ++i) {
|
|
ObLockGuard<ObSpinLock> lock_guard(profile_lists_[i].get_lock());
|
|
ObDList<ObSqlWorkAreaProfile> &profile_list = profile_lists_[i].get_profile_list();
|
|
DLIST_FOREACH_X(profile, profile_list, OB_SUCC(ret)) {
|
|
if (!profile->get_auto_policy()) {
|
|
// 没有使用auto的不作为统计之内
|
|
} else if (OB_FAIL(find_interval_index(profile->get_cache_size(), interval_idx, cache_size))) {
|
|
LOG_WARN("failed to find interval index", K(*profile));
|
|
} else {
|
|
one_pass_size = profile->calc_one_pass_size(cache_size);
|
|
if (OB_FAIL(find_interval_index(
|
|
profile->get_one_pass_size(), one_pass_idx, one_pass_size))) {
|
|
LOG_WARN("failed to find interval index", K(*profile));
|
|
} else if (OB_FAIL(wa_intervals[interval_idx].get_interval_stat().analyze_profile(
|
|
*profile, cache_size, one_pass_size, MAX_INTERVAL_SIZE))) {
|
|
LOG_WARN("failed to analyze profile", K(*profile));
|
|
} else if (OB_FAIL(wa_intervals[one_pass_idx].get_interval_stat().analyze_profile(
|
|
*profile, one_pass_size, 0, MAX_INTERVAL_SIZE, true))) {
|
|
LOG_WARN("failed to analyze profile", K(*profile));
|
|
} else {
|
|
total_memory_size += cache_size;
|
|
++cur_profile_cnt;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool ObTenantSqlMemoryManager::enable_auto_sql_memory_manager()
|
|
{
|
|
bool auto_memory_mgr = false;
|
|
ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id_));
|
|
if (tenant_config.is_valid()) {
|
|
const ObString tmp_str(tenant_config->workarea_size_policy.str());
|
|
auto_memory_mgr = !tmp_str.case_compare("AUTO");
|
|
LOG_TRACE("get work area policy config", K(tenant_id_), K(auto_memory_mgr), K(tmp_str),
|
|
K(tenant_config->workarea_size_policy.str()));
|
|
} else {
|
|
LOG_WARN_RET(OB_ERR_UNEXPECTED, "failed to init tenant config", K(tenant_id_));
|
|
}
|
|
return auto_memory_mgr;
|
|
}
|
|
|
|
// ensure lock outside
|
|
void ObTenantSqlMemoryManager::reset()
|
|
{
|
|
if (nullptr != profile_lists_) {
|
|
for (int64_t i = 0; i < HASH_CNT; ++i) {
|
|
profile_lists_[i].reset();
|
|
}
|
|
}
|
|
// 统计的内存通过每个operator自己来确定,否则来开与关过程中,存在申请和释放时候,统计不一致
|
|
//sql_mem_callback_.reset();
|
|
drift_size_ = 0;
|
|
profile_cnt_ = 0;
|
|
global_bound_size_ = 0;
|
|
}
|
|
|
|
// try best to push global_bound_size to every profile registered
|
|
int ObTenantSqlMemoryManager::try_push_profiles_work_area_size(int64_t global_bound_size)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (nullptr == profile_lists_) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("profile list is null", K(ret));
|
|
} else {
|
|
for (int64_t i = 0; i < HASH_CNT && OB_SUCC(ret); ++i) {
|
|
if (OB_SUCC(profile_lists_[i].get_lock().trylock())) {
|
|
ObDList<ObSqlWorkAreaProfile> &profile_list = profile_lists_[i].get_profile_list();
|
|
DLIST_FOREACH_X(profile, profile_list, OB_SUCC(ret)) {
|
|
if (profile->get_auto_policy()
|
|
&& OB_FAIL(calc_work_area_size_by_profile(global_bound_size, *profile))) {
|
|
ret = OB_SUCCESS;
|
|
LOG_WARN("failed to calculate worka area size by profile", K(ret), K(*profile),
|
|
K(global_bound_size));
|
|
}
|
|
}
|
|
profile_lists_[i].get_lock().unlock();
|
|
} else {
|
|
ret = OB_SUCCESS;
|
|
}
|
|
}
|
|
}
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::calculate_global_bound_size_by_interval_info(
|
|
ObIAllocator &allocator,
|
|
const int64_t wa_max_memory_size,
|
|
const bool auto_calc)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObSqlWorkAreaCalcInfo calc_info;
|
|
if (OB_FAIL(calc_info.init(allocator, wa_intervals_, INTERVAL_NUM))) {
|
|
LOG_WARN("failed to init work area calc info", K(ret));
|
|
} else {
|
|
int64_t pre_profile_cnt = profile_cnt_;
|
|
int64_t total_memory_size = 0;
|
|
int64_t cur_profile_cnt = 0;
|
|
if (OB_FAIL(count_profile_into_work_area_intervals(
|
|
calc_info.get_wa_intervals(), total_memory_size, cur_profile_cnt))) {
|
|
LOG_WARN("failed to count profiles", K(ret));
|
|
} else if (OB_FAIL(calc_info.calculate_global_bound_size(
|
|
wa_max_memory_size, total_memory_size, pre_profile_cnt, auto_calc))) {
|
|
LOG_WARN("failed to find best interval index", K(ret), K(wa_max_memory_size),
|
|
K(total_memory_size));
|
|
} else {
|
|
int64_t pre_drift_size = drift_size_;
|
|
{
|
|
lib::ObMutexGuard guard(mutex_);
|
|
drift_size_ = 0;
|
|
pre_profile_cnt_ = cur_profile_cnt;
|
|
global_bound_size_ = calc_info.get_global_bound_size();
|
|
mem_target_ = calc_info.get_mem_target();
|
|
pre_enable_auto_memory_mgr_ = true;
|
|
// last set enable auto memory manager, so others read the variable to avoiding dirty read
|
|
enable_auto_memory_mgr_ = true;
|
|
}
|
|
if (auto_calc) {
|
|
LOG_INFO("timer to calc global bound size", K(ret), K(global_bound_size_),
|
|
K(manual_calc_cnt_), K(drift_size_), K(pre_drift_size), K(wa_max_memory_size),
|
|
K(sql_mem_callback_.get_total_alloc_size()), K(tenant_id_), K(profile_cnt_),
|
|
K(pre_profile_cnt_), K(pre_profile_cnt), K(calc_info.get_global_bound_size()),
|
|
K(total_memory_size), K(cur_profile_cnt), K(calc_info.get_mem_target()), K(auto_calc),
|
|
K(sql_mem_callback_.get_total_dump_size()));
|
|
}
|
|
if (OB_FAIL(try_push_profiles_work_area_size(calc_info.get_global_bound_size()))) {
|
|
LOG_WARN("failed to push profiles work area size",
|
|
K(ret), K(calc_info.get_global_bound_size()));
|
|
}
|
|
}
|
|
}
|
|
calc_info.destroy(allocator);
|
|
return ret;
|
|
}
|
|
|
|
// 算法步骤:
|
|
// 0 切分好间隔点,每个间隔表示一个内存范围
|
|
// 1 遍历所有profiles,将profile的cache size找到对应的间隔,遍历结束后
|
|
// 则每个区间存放了所有在这区间的所有profile个数(只是估算统计,不是准确的profile信息)
|
|
// 2 从后往前遍历间隔,计算每个间隔如果作为bound,需要的mem_target是多少,全部计算结束后,
|
|
// 与期望的mem_target对比,返回真正bound大小
|
|
int ObTenantSqlMemoryManager::calculate_global_bound_size(ObIAllocator *allocator, bool auto_calc)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t wa_max_memory_size = 0;
|
|
// set enable_auto_sql_memory_mgr after calculate global bound size
|
|
bool auto_memory_mgr = enable_auto_sql_memory_manager();
|
|
if (!auto_memory_mgr) {
|
|
// manually memory manager
|
|
lib::ObMutexGuard guard(mutex_);
|
|
enable_auto_memory_mgr_ = false;
|
|
if (pre_enable_auto_memory_mgr_) {
|
|
reset();
|
|
pre_enable_auto_memory_mgr_ = false;
|
|
}
|
|
} else {
|
|
{
|
|
lib::ObMutexGuard guard(mutex_);
|
|
if (!pre_enable_auto_memory_mgr_) {
|
|
if (enable_auto_memory_mgr_) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected status", K(ret));
|
|
} else {
|
|
reset();
|
|
}
|
|
}
|
|
}
|
|
if (OB_FAIL(ret)) {
|
|
} else if (OB_FAIL(get_max_work_area_size(wa_max_memory_size, auto_calc))) {
|
|
LOG_WARN("failed to get percent");
|
|
} else if (0 == wa_max_memory_size) {
|
|
lib::ObMutexGuard guard(mutex_);
|
|
global_bound_size_ = min_bound_size_;
|
|
drift_size_ = 0;
|
|
pre_enable_auto_memory_mgr_ = true;
|
|
// last set enable auto memory manager, so others read the variable to avoiding dirty read
|
|
enable_auto_memory_mgr_ = true;
|
|
if (auto_calc) {
|
|
LOG_INFO("work area memory zero", K(tenant_id_), K(global_bound_size_));
|
|
}
|
|
} else {
|
|
if (OB_ISNULL(allocator)) {
|
|
allocator = &allocator_;
|
|
}
|
|
if (OB_FAIL(calculate_global_bound_size_by_interval_info(
|
|
*allocator, wa_max_memory_size, auto_calc))) {
|
|
LOG_WARN("failed to calculate global bound size", K(ret));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::get_workarea_stat(ObIArray<ObSqlWorkAreaStat> &wa_stats)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObLatchRGuard guard(lock_, ObLatchIds::SQL_WA_STAT_MAP_LOCK);
|
|
for (int64_t i = wa_start_; i < wa_start_ + wa_cnt_ && OB_SUCC(ret); ++i) {
|
|
int64_t nth = i % MAX_WORKAREA_STAT_CNT;
|
|
if (OB_FAIL(wa_stats.push_back(workarea_stats_.at(nth)))) {
|
|
LOG_WARN("failed to push back workarea stat", K(ret));
|
|
} else {
|
|
LOG_TRACE("trace workarea history", K(workarea_stats_.at(nth)),
|
|
K(wa_stats.at(wa_stats.count() - 1)));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::get_workarea_histogram(
|
|
common::ObIArray<ObWorkareaHistogram> &wa_histograms)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
int64_t cnt = workarea_histograms_.count();
|
|
for (int64_t i = 0; i < cnt && OB_SUCC(ret); ++i) {
|
|
if (OB_FAIL(wa_histograms.push_back(workarea_histograms_.at(i)))) {
|
|
LOG_WARN("failed to push back workarea stat", K(ret));
|
|
} else {
|
|
LOG_TRACE("trace workarea histogram", K(workarea_histograms_.at(i)),
|
|
K(wa_histograms.at(wa_histograms.count() - 1)));
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::get_all_active_workarea(
|
|
ObIArray<ObSqlWorkareaProfileInfo> &wa_actives)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (nullptr == profile_lists_) {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("profile list is null", K(ret));
|
|
} else {
|
|
for (int64_t i = 0; i < HASH_CNT && OB_SUCC(ret); ++i) {
|
|
ObLockGuard<ObSpinLock> lock_guard(profile_lists_[i].get_lock());
|
|
ObDList<ObSqlWorkAreaProfile> &profile_list = profile_lists_[i].get_profile_list();
|
|
DLIST_FOREACH_X(profile, profile_list, OB_SUCC(ret)) {
|
|
ObSqlWorkareaProfileInfo profile_info;
|
|
profile_info.profile_ = *profile;
|
|
profile_info.plan_id_ = profile->get_plan_id();
|
|
profile_info.sql_exec_id_ = profile->get_exec_id();
|
|
profile_info.set_sql_id(profile->get_sql_id());
|
|
profile_info.session_id_ = profile->get_session_id();
|
|
if (OB_FAIL(wa_actives.push_back(profile_info))) {
|
|
LOG_WARN("failed to push back profile", K(ret));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObTenantSqlMemoryManager::get_workarea_memory_info(
|
|
ObSqlWorkareaCurrentMemoryInfo &memory_info)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
// 这里暂时仅仅已瞬态方式输出,不考虑并发问题
|
|
memory_info.enable_ = enable_auto_memory_mgr_;
|
|
memory_info.max_workarea_size_ = max_workarea_size_;
|
|
memory_info.workarea_hold_size_ = workarea_hold_size_;
|
|
memory_info.max_auto_workarea_size_ = max_auto_workarea_size_;
|
|
memory_info.mem_target_ = mem_target_;
|
|
memory_info.total_mem_used_ = sql_mem_callback_.get_total_alloc_size();
|
|
memory_info.global_bound_size_ = global_bound_size_;
|
|
memory_info.drift_size_ = drift_size_;
|
|
memory_info.workarea_cnt_ = profile_cnt_;
|
|
memory_info.manual_calc_cnt_ = manual_calc_cnt_;
|
|
return ret;
|
|
}
|
|
|
|
} // sql
|
|
} // oceanbase
|