[feature] (memory) Switch TLS mem tracker to separate more detailed memory usage (#8605)

In pr #8476, all memory usage of a process is recorded in the process mem tracker,
and all memory usage of a query is recorded in the query mem tracker,
and it is still necessary to manually call `transfer to` to track the cached memory size.

We hope to separate out more detailed memory usage based on Hook TCMalloc new/delete + TLS mem tracker.

In this pr, the more detailed mem tracker is switched to TLS, which automatically and accurately
counts more detailed memory usage than before.
This commit is contained in:
Xinyi Zou
2022-03-24 14:29:34 +08:00
committed by GitHub
parent 5f606c9d57
commit aaaaae53b5
22 changed files with 202 additions and 86 deletions

View File

@ -26,6 +26,7 @@
#include "util/cpu_info.h"
#include "util/pretty_printer.h"
#include "util/runtime_profile.h"
#include "runtime/thread_context.h"
//DECLARE_bool(disable_mem_pools);
@ -48,7 +49,7 @@ public:
/// Add a free buffer to the free lists. May free buffers to the system allocator
/// if the list becomes full. Caller should not hold 'lock_'
void AddFreeBuffer(BufferHandle&& buffer);
bool AddFreeBuffer(BufferHandle&& buffer);
/// Try to get a free buffer of 'buffer_len' bytes from this arena. Returns true and
/// sets 'buffer' if found or false if not found. Caller should not hold 'lock_'.
@ -193,7 +194,8 @@ BufferPool::BufferAllocator::BufferAllocator(BufferPool* pool, int64_t min_buffe
clean_page_bytes_limit_(clean_page_bytes_limit),
clean_page_bytes_remaining_(clean_page_bytes_limit),
per_core_arenas_(CpuInfo::get_max_num_cores()),
max_scavenge_attempts_(MAX_SCAVENGE_ATTEMPTS) {
max_scavenge_attempts_(MAX_SCAVENGE_ATTEMPTS),
_mem_tracker(MemTracker::create_virtual_tracker(-1, "BufferAllocator", nullptr, MemTrackerLevel::OVERVIEW)) {
DCHECK(BitUtil::IsPowerOf2(min_buffer_len_)) << min_buffer_len_;
DCHECK(BitUtil::IsPowerOf2(max_buffer_len_)) << max_buffer_len_;
DCHECK_LE(0, min_buffer_len_);
@ -303,6 +305,7 @@ Status BufferPool::BufferAllocator::AllocateInternal(int64_t len, BufferHandle*
system_bytes_remaining_.add(len);
return status;
}
_mem_tracker->consume_cache(len);
return Status::OK();
}
@ -375,7 +378,9 @@ void BufferPool::BufferAllocator::Free(BufferHandle&& handle) {
handle.client_ = nullptr; // Buffer is no longer associated with a client.
FreeBufferArena* arena = per_core_arenas_[handle.home_core_].get();
handle.Poison();
arena->AddFreeBuffer(std::move(handle));
if (!arena->AddFreeBuffer(std::move(handle))) {
_mem_tracker->release_cache(handle.len());
}
}
void BufferPool::BufferAllocator::AddCleanPage(const std::unique_lock<std::mutex>& client_lock,
@ -426,6 +431,7 @@ int64_t BufferPool::BufferAllocator::FreeToSystem(std::vector<BufferHandle>&& bu
buffer.Unpoison();
system_allocator_->Free(std::move(buffer));
}
_mem_tracker->release_cache(bytes_freed);
return bytes_freed;
}
@ -485,16 +491,17 @@ BufferPool::FreeBufferArena::~FreeBufferArena() {
}
}
void BufferPool::FreeBufferArena::AddFreeBuffer(BufferHandle&& buffer) {
bool BufferPool::FreeBufferArena::AddFreeBuffer(BufferHandle&& buffer) {
std::lock_guard<SpinLock> al(lock_);
if (config::disable_mem_pools) {
int64_t len = buffer.len();
parent_->system_allocator_->Free(std::move(buffer));
parent_->system_bytes_remaining_.add(len);
return;
return false;
}
PerSizeLists* lists = GetListsForSize(buffer.len());
lists->AddFreeBuffer(std::move(buffer));
return true;
}
bool BufferPool::FreeBufferArena::RemoveCleanPage(bool claim_buffer, Page* page) {

View File

@ -21,6 +21,7 @@
#include "runtime/bufferpool/buffer_pool_internal.h"
#include "runtime/bufferpool/free_list.h"
#include "util/aligned_new.h"
#include "runtime/mem_tracker.h"
namespace doris {
@ -235,6 +236,8 @@ private:
/// all arenas so may fail. The final attempt locks all arenas, which is expensive
/// but is guaranteed to succeed.
int max_scavenge_attempts_;
std::shared_ptr<MemTracker> _mem_tracker;
};
} // namespace doris

View File

@ -99,6 +99,7 @@ public:
// Poison this chunk to make asan can detect invalid access
ASAN_POISON_MEMORY_REGION(ptr, size);
std::lock_guard<SpinLock> l(_lock);
// TODO(zxy) The memory of vector resize is not recorded in chunk allocator mem tracker
_chunk_lists[idx].push_back(ptr);
}
@ -118,9 +119,13 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit)
_arenas(CpuInfo::get_max_num_cores()) {
_mem_tracker =
MemTracker::create_tracker(-1, "ChunkAllocator", nullptr, MemTrackerLevel::OVERVIEW);
SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker);
for (int i = 0; i < _arenas.size(); ++i) {
_arenas[i].reset(new ChunkArena());
}
// After the ChunkAllocator is created in the main thread, the main thread will not switch to the
// chunk allocator mem tracker again, so manually clear the untracked mem in tls.
thread_local_ctx.get()->_thread_mem_tracker_mgr->clear_untracked_mems();
_chunk_allocator_metric_entity =
DorisMetrics::instance()->metric_registry()->register_entity("chunk_allocator");

View File

@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <gperftools/malloc_hook.h>
#include <gperftools/nallocx.h>
#include <gperftools/tcmalloc.h>

View File

@ -25,14 +25,29 @@
#include "runtime/runtime_state.h"
#include "runtime/thread_mem_tracker_mgr.h"
#include "runtime/threadlocal.h"
#include "util/doris_metrics.h"
// Attach to task when thread starts
#define SCOPED_ATTACH_TASK_THREAD(type, ...) \
auto VARNAME_LINENUM(attach_task_thread) = AttachTaskThread(type, ##__VA_ARGS__)
// Be careful to stop the thread mem tracker, because the actual order of malloc and free memory
// may be different from the order of execution of instructions, which will cause the position of
// the memory track to be unexpected.
#define SCOPED_STOP_THREAD_LOCAL_MEM_TRACKER() \
auto VARNAME_LINENUM(stop_tracker) = StopThreadMemTracker(true)
#define GLOBAL_STOP_THREAD_LOCAL_MEM_TRACKER() \
auto VARNAME_LINENUM(stop_tracker) = StopThreadMemTracker(false)
// Switch thread mem tracker during task execution.
// After the non-query thread switches the mem tracker, if the thread will not switch the mem
// tracker again in the short term, can consider manually clear_untracked_mems.
// The query thread will automatically clear_untracked_mems when detach_task.
#define SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(mem_tracker) \
auto VARNAME_LINENUM(switch_tracker) = SwitchThreadMemTracker(mem_tracker, false)
#define SCOPED_SWITCH_TASK_THREAD_LOCAL_MEM_TRACKER(mem_tracker) \
auto VARNAME_LINENUM(switch_tracker) = SwitchThreadMemTracker(mem_tracker, true);
#define SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER_ERR_CB(action_type, ...) \
auto VARNAME_LINENUM(witch_tracker_cb) = \
SwitchThreadMemTrackerErrCallBack(action_type, ##__VA_ARGS__)
namespace doris {
@ -72,7 +87,7 @@ public:
_type = type;
_task_id = task_id;
_fragment_instance_id = fragment_instance_id;
_thread_mem_tracker_mgr->attach_task(task_type_string(_type), task_id, fragment_instance_id,
_thread_mem_tracker_mgr->attach_task(TaskTypeStr[_type], task_id, fragment_instance_id,
mem_tracker);
}
@ -88,10 +103,6 @@ public:
const std::string& thread_id_str() const { return _thread_id_str; }
const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
inline static const std::string task_type_string(ThreadContext::TaskType type) {
return TaskTypeStr[type];
}
void consume_mem(int64_t size) {
if (start_thread_mem_tracker) {
_thread_mem_tracker_mgr->cache_consume(size);
@ -166,13 +177,13 @@ public:
explicit AttachTaskThread(const ThreadContext::TaskType& type,
const std::shared_ptr<MemTracker>& mem_tracker) {
DCHECK(mem_tracker != nullptr);
DCHECK(mem_tracker);
thread_local_ctx.get()->attach(type, "", TUniqueId(), mem_tracker);
}
explicit AttachTaskThread(const TQueryType::type& query_type,
const std::shared_ptr<MemTracker>& mem_tracker) {
DCHECK(mem_tracker != nullptr);
DCHECK(mem_tracker);
thread_local_ctx.get()->attach(query_to_task_type(query_type), "", TUniqueId(),
mem_tracker);
}
@ -182,7 +193,7 @@ public:
const std::shared_ptr<MemTracker>& mem_tracker) {
DCHECK(task_id != "");
DCHECK(fragment_instance_id != TUniqueId());
DCHECK(mem_tracker != nullptr);
DCHECK(mem_tracker);
thread_local_ctx.get()->attach(query_to_task_type(query_type), task_id,
fragment_instance_id, mem_tracker);
}
@ -192,7 +203,7 @@ public:
#ifndef BE_TEST
DCHECK(print_id(runtime_state->query_id()) != "");
DCHECK(runtime_state->fragment_instance_id() != TUniqueId());
DCHECK(mem_tracker != nullptr);
DCHECK(mem_tracker);
thread_local_ctx.get()->attach(query_to_task_type(runtime_state->query_type()),
print_id(runtime_state->query_id()),
runtime_state->fragment_instance_id(), mem_tracker);
@ -211,7 +222,12 @@ public:
}
}
~AttachTaskThread() { thread_local_ctx.get()->detach(); }
~AttachTaskThread() {
#ifndef BE_TEST
thread_local_ctx.get()->detach();
DorisMetrics::instance()->attach_task_thread_count->increment(1);
#endif
}
};
class StopThreadMemTracker {
@ -228,4 +244,49 @@ private:
bool _scope;
};
class SwitchThreadMemTracker {
public:
explicit SwitchThreadMemTracker(const std::shared_ptr<MemTracker>& mem_tracker,
bool in_task = true) {
#ifndef BE_TEST
DCHECK(mem_tracker);
// The thread tracker must be switched after the attach task, otherwise switching
// in the main thread will cause the cached tracker not be cleaned up in time.
DCHECK(in_task == false ||
thread_local_ctx.get()->_thread_mem_tracker_mgr->is_attach_task());
_old_tracker_id =
thread_local_ctx.get()->_thread_mem_tracker_mgr->update_tracker(mem_tracker);
#endif
}
~SwitchThreadMemTracker() {
#ifndef BE_TEST
thread_local_ctx.get()->_thread_mem_tracker_mgr->update_tracker_id(_old_tracker_id);
DorisMetrics::instance()->switch_thread_mem_tracker_count->increment(1);
#endif
}
private:
std::string _old_tracker_id;
};
class SwitchThreadMemTrackerErrCallBack {
public:
explicit SwitchThreadMemTrackerErrCallBack(const std::string& action_type,
bool cancel_work = true,
ERRCALLBACK err_call_back_func = nullptr) {
DCHECK(action_type != std::string());
_old_tracker_cb = thread_local_ctx.get()->_thread_mem_tracker_mgr->update_consume_err_cb(
action_type, cancel_work, err_call_back_func);
}
~SwitchThreadMemTrackerErrCallBack() {
thread_local_ctx.get()->_thread_mem_tracker_mgr->update_consume_err_cb(_old_tracker_cb);
DorisMetrics::instance()->switch_thread_mem_tracker_err_cb_count->increment(1);
}
private:
ConsumeErrCallBackInfo _old_tracker_cb;
};
} // namespace doris

View File

@ -22,19 +22,21 @@
namespace doris {
void ThreadMemTrackerMgr::attach_task(const std::string& action_type, const std::string& task_id,
void ThreadMemTrackerMgr::attach_task(const std::string& cancel_msg, const std::string& task_id,
const TUniqueId& fragment_instance_id,
const std::shared_ptr<MemTracker>& mem_tracker) {
_task_id = task_id;
_fragment_instance_id = fragment_instance_id;
_consume_err_call_back.update(action_type, true, nullptr);
_consume_err_cb.cancel_msg = cancel_msg;
if (mem_tracker == nullptr) {
#ifdef BE_TEST
if (ExecEnv::GetInstance()->task_pool_mem_tracker_registry() == nullptr) {
return;
}
#endif
_temp_task_mem_tracker = ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->get_task_mem_tracker(task_id);
_temp_task_mem_tracker =
ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->get_task_mem_tracker(
task_id);
update_tracker(_temp_task_mem_tracker);
} else {
update_tracker(mem_tracker);
@ -44,7 +46,7 @@ void ThreadMemTrackerMgr::attach_task(const std::string& action_type, const std:
void ThreadMemTrackerMgr::detach_task() {
_task_id = "";
_fragment_instance_id = TUniqueId();
_consume_err_call_back.init();
_consume_err_cb.init();
clear_untracked_mems();
_tracker_id = "process";
// The following memory changes for the two map operations of _untracked_mems and _mem_trackers
@ -70,12 +72,12 @@ void ThreadMemTrackerMgr::exceeded_cancel_task(const std::string& cancel_details
void ThreadMemTrackerMgr::exceeded(int64_t mem_usage, Status st) {
auto rst = _mem_trackers[_tracker_id]->mem_limit_exceeded(
nullptr, "In TCMalloc Hook, " + _consume_err_call_back.action_type, mem_usage, st);
if (_consume_err_call_back.call_back_func != nullptr) {
_consume_err_call_back.call_back_func();
nullptr, "In TCMalloc Hook, " + _consume_err_cb.cancel_msg, mem_usage, st);
if (_consume_err_cb.cb_func != nullptr) {
_consume_err_cb.cb_func();
}
if (_task_id != "") {
if (_consume_err_call_back.cancel_task == true) {
if (is_attach_task()) {
if (_consume_err_cb.cancel_task == true) {
exceeded_cancel_task(rst.to_string());
} else {
// TODO(zxy) Need other processing, or log (not too often).

View File

@ -28,27 +28,19 @@ namespace doris {
typedef void (*ERRCALLBACK)();
struct ConsumeErrCallBackInfo {
std::string action_type;
std::string cancel_msg;
bool cancel_task; // Whether to cancel the task when the current tracker exceeds the limit
ERRCALLBACK call_back_func;
ERRCALLBACK cb_func;
ConsumeErrCallBackInfo() {
init();
}
ConsumeErrCallBackInfo() { init(); }
ConsumeErrCallBackInfo(std::string action_type, bool cancel_task, ERRCALLBACK call_back_func)
: action_type(action_type), cancel_task(cancel_task), call_back_func(call_back_func) {}
void update(std::string new_action_type, bool new_cancel_task, ERRCALLBACK new_call_back_func) {
action_type = new_action_type;
cancel_task = new_cancel_task;
call_back_func = new_call_back_func;
}
ConsumeErrCallBackInfo(const std::string& cancel_msg, bool cancel_task, ERRCALLBACK cb_func)
: cancel_msg(cancel_msg), cancel_task(cancel_task), cb_func(cb_func) {}
void init() {
action_type = "";
cancel_msg = "";
cancel_task = false;
call_back_func = nullptr;
cb_func = nullptr;
}
};
@ -80,7 +72,7 @@ public:
}
void clear_untracked_mems() {
for(auto untracked_mem : _untracked_mems) {
for (const auto& untracked_mem : _untracked_mems) {
if (untracked_mem.second != 0) {
DCHECK(_mem_trackers[untracked_mem.first]);
_mem_trackers[untracked_mem.first]->consume(untracked_mem.second);
@ -91,7 +83,7 @@ public:
}
// After attach, the current thread TCMalloc Hook starts to consume/release task mem_tracker
void attach_task(const std::string& action_type, const std::string& task_id,
void attach_task(const std::string& cancel_msg, const std::string& task_id,
const TUniqueId& fragment_instance_id,
const std::shared_ptr<MemTracker>& mem_tracker);
@ -101,6 +93,27 @@ public:
// Thread update_tracker may be called very frequently, adding a memory copy will be slow.
std::string update_tracker(const std::shared_ptr<MemTracker>& mem_tracker);
void update_tracker_id(const std::string& tracker_id) {
if (tracker_id != _tracker_id) {
_untracked_mems[_tracker_id] += _untracked_mem;
_untracked_mem = 0;
_tracker_id = tracker_id;
}
}
inline ConsumeErrCallBackInfo update_consume_err_cb(const std::string& cancel_msg,
bool cancel_task, ERRCALLBACK cb_func) {
_temp_consume_err_cb = _consume_err_cb;
_consume_err_cb.cancel_msg = cancel_msg;
_consume_err_cb.cancel_task = cancel_task;
_consume_err_cb.cb_func = cb_func;
return _temp_consume_err_cb;
}
inline void update_consume_err_cb(const ConsumeErrCallBackInfo& consume_err_cb) {
_consume_err_cb = consume_err_cb;
}
// Note that, If call the memory allocation operation in TCMalloc new/delete Hook,
// such as calling LOG/iostream/sstream/stringstream/etc. related methods,
// must increase the control to avoid entering infinite recursion, otherwise it may cause crash or stuck,
@ -108,6 +121,8 @@ public:
void noncache_consume();
bool is_attach_task() { return _task_id != ""; }
std::shared_ptr<MemTracker> mem_tracker() {
DCHECK(_mem_trackers[_tracker_id]);
return _mem_trackers[_tracker_id];
@ -137,15 +152,16 @@ private:
// Avoid memory allocation in functions and fall into an infinite loop
std::string _temp_tracker_id;
ConsumeErrCallBackInfo _temp_consume_err_call_back;
ConsumeErrCallBackInfo _temp_consume_err_cb;
std::shared_ptr<MemTracker> _temp_task_mem_tracker;
std::string _task_id;
TUniqueId _fragment_instance_id;
ConsumeErrCallBackInfo _consume_err_call_back;
ConsumeErrCallBackInfo _consume_err_cb;
};
inline std::string ThreadMemTrackerMgr::update_tracker(const std::shared_ptr<MemTracker>& mem_tracker) {
inline std::string ThreadMemTrackerMgr::update_tracker(
const std::shared_ptr<MemTracker>& mem_tracker) {
DCHECK(mem_tracker);
_temp_tracker_id = mem_tracker->id();
if (_temp_tracker_id == _tracker_id) {