Added config to disable NUMA in mot.conf to facilitate openGauss deployment in containers

This commit is contained in:
Vinoth
2020-08-10 21:54:19 +08:00
parent 1695aa3e8c
commit 44fa38987d
16 changed files with 107 additions and 37 deletions

View File

@ -27,7 +27,7 @@
#include "thread_id.h"
#include "session_context.h"
#include "sys_numa_api.h"
#include "mm_numa.h"
#include "mm_api.h"
#include "mot_error.h"
@ -161,7 +161,7 @@ bool StatisticsProvider::ReserveThreadSlot()
void* buffer = nullptr;
// since statistics provider is created before MemInit(), it is preferred to keep it clean from MM API calls
if (GetGlobalConfiguration().m_numaNodes > 1) {
buffer = MotSysNumaAllocOnNode(m_generator->GetObjectSize(), node);
buffer = MemNumaAllocLocal(m_generator->GetObjectSize(), node);
} else {
buffer = malloc(m_generator->GetObjectSize());
}
@ -317,9 +317,10 @@ void StatisticsProvider::FreeThreadStats(MOTThreadId threadId, ThreadStatistics*
{
MOT_LOG_TRACE("Reclaiming %s statistics thread slot for thread id %" PRIu16, GetName(), threadId);
void* buffer = (void*)threadStats->GetInPlaceBuffer();
int node = threadStats->GetNodeId();
threadStats->~ThreadStatistics();
if (GetGlobalConfiguration().m_numaNodes > 1) {
MotSysNumaFree(buffer, m_generator->GetObjectSize());
MemNumaFreeLocal(buffer, m_generator->GetObjectSize(), node);
} else {
free(buffer);
}

View File

@ -28,6 +28,7 @@
#include "mot_string.h"
#include "mot_vector.h"
#include "statistic_variable.h"
#include "session_context.h"
namespace MOT {
/**
@ -50,7 +51,7 @@ public:
* @see MakeName().
*/
explicit ThreadStatistics(uint64_t threadId, void* inplaceBuffer = nullptr)
: m_threadId(threadId), m_inplaceBuffer(inplaceBuffer)
: m_threadId(threadId), m_node(MOTCurrentNumaNodeId), m_inplaceBuffer(inplaceBuffer)
{}
/** @brief Destructor. */
@ -67,8 +68,7 @@ public:
}
/**
* @brief Retrieves the logical identifier of the thread that these statistics
* describe.
* @brief Retrieves the logical identifier of the thread that these statistics describe.
* @return The logical thread identifier.
*/
inline uint64_t GetThreadId() const
@ -76,6 +76,15 @@ public:
return m_threadId;
}
/**
* @brief Retrieves the identifier of the NUMA node from which this object's memory is allocated.
* @return The NUMA node identifier.
*/
inline int GetNodeId() const
{
return m_node;
}
/**
* @brief Retrieves the in-place buffer used to allocate this object.
* @return The in-place buffer.
@ -166,6 +175,9 @@ private:
/** @var The logical identifier of the thread to which this set of statistic variables belong. */
uint64_t m_threadId;
/** @var The identifier of the NUMA node from which this object's memory is allocated. */
int m_node;
/** @var The set of managed statistic variables. */
mot_vector<StatisticVariable*> m_statVars;

View File

@ -32,7 +32,7 @@
#include "mm_def.h"
#include "mot_atomic_ops.h"
#include "memory_statistics.h"
#include "mot_configuration.h"
#include "sys_numa_api.h"
namespace MOT {
@ -110,7 +110,7 @@ extern void MemNumaDestroy()
extern void* MemNumaAllocLocal(uint64_t size, int node)
{
// do not impose hard limits!
void* result = MotSysNumaAllocOnNode(size, node);
void* result = GetGlobalConfiguration().m_enableNuma ? MotSysNumaAllocOnNode(size, node) : malloc(size);
if (result != NULL) {
// update statistics if succeeded
UpdateLocalStats(size, node);
@ -137,8 +137,7 @@ extern void* MemNumaAllocLocal(uint64_t size, int node)
extern void* MemNumaAllocGlobal(uint64_t size)
{
// do not impose hard limits!
// void* result = (g_memGlobalCfg.m_nodeCount > 1) ? numa_alloc_interleaved(size) : malloc(size);
void* result = MotSysNumaAllocInterleaved(size);
void* result = GetGlobalConfiguration().m_enableNuma ? MotSysNumaAllocInterleaved(size) : malloc(size);
if (result != NULL) {
// update statistics if succeeded
UpdateGlobalStats(size);
@ -158,10 +157,19 @@ extern void* MemNumaAllocGlobal(uint64_t size)
return result;
}
static inline void* MallocAligned(size_t align, size_t size)
{
void* result = nullptr;
int rc = posix_memalign(&result, align, size);
errno = rc;
return result;
}
extern void* MemNumaAllocAlignedLocal(uint64_t size, uint64_t align, int node)
{
// do not impose hard limits!
void* result = MotSysNumaAllocAlignedOnNode(size, align, node);
void* result = GetGlobalConfiguration().m_enableNuma ? MotSysNumaAllocAlignedOnNode(size, align, node)
: MallocAligned(align, size);
if (result != NULL) {
// update statistics if succeeded
UpdateLocalStats(size, node);
@ -192,7 +200,8 @@ extern void* MemNumaAllocAlignedLocal(uint64_t size, uint64_t align, int node)
extern void* MemNumaAllocAlignedGlobal(uint64_t size, uint64_t align)
{
void* result = MotSysNumaAllocAlignedInterleaved(size, align);
void* result = GetGlobalConfiguration().m_enableNuma ? MotSysNumaAllocAlignedInterleaved(size, align)
: MallocAligned(align, size);
if (result != NULL) {
// update statistics if succeeded
UpdateGlobalStats(size);
@ -221,7 +230,11 @@ extern void* MemNumaAllocAlignedGlobal(uint64_t size, uint64_t align)
extern void MemNumaFreeLocal(void* buf, uint64_t size, int node)
{
MotSysNumaFree(buf, size);
if (GetGlobalConfiguration().m_enableNuma) {
MotSysNumaFree(buf, size);
} else {
free(buf);
}
uint64_t memUsed = MOT_ATOMIC_SUB(localMemUsedBytes[node], size);
MOT_LOG_DIAG1("Decreased local node %d memory usage to %" PRIu64 " bytes", node, memUsed);
MemoryStatisticsProvider::m_provider->AddNumaLocalAllocated(-((int64_t)size));
@ -230,7 +243,11 @@ extern void MemNumaFreeLocal(void* buf, uint64_t size, int node)
extern void MemNumaFreeGlobal(void* buf, uint64_t size)
{
MotSysNumaFree(buf, size);
if (GetGlobalConfiguration().m_enableNuma) {
MotSysNumaFree(buf, size);
} else {
free(buf);
}
uint64_t memUsed = MOT_ATOMIC_SUB(globalMemUsedBytes, size);
MOT_LOG_DIAG1("Decreased global memory usage to %" PRIu64 " bytes", memUsed);
MemoryStatisticsProvider::m_provider->AddNumaInterleavedAllocated(-((int64_t)size));

View File

@ -1028,9 +1028,10 @@ static void* ReserveWorker(void* param)
if (workerId > 0) { // first worker is invoked in caller context and not spawned in a new thread
AllocThreadId();
}
// we must be affined to the correct NUMA node for native allocation policy
// but it is better anyway to be affined to the NUMA node of the pool
if (!GetTaskAffinity().SetNodeAffinity(chunkPool->m_node)) {
if (GetGlobalConfiguration().m_enableNuma && !GetTaskAffinity().SetNodeAffinity(chunkPool->m_node)) {
if (g_memGlobalCfg.m_chunkAllocPolicy == MEM_ALLOC_POLICY_NATIVE) {
MOT_LOG_WARN("Failed to set chunk reservation worker affinity, chunk pre-allocation performance may be "
"affected, and table data distribution may be affected");

View File

@ -165,6 +165,11 @@
# MEMORY
#------------------------------------------------------------------------------
# Specifies whether to use NUMA-aware memory allocation.
# When disabled, all affinity configurations are disabled as well.
#
#enable_numa = true
# Configures the maximum number of threads allowed to run in MOT engine.
# When not using a thread pool, this value in effect restricts the number of sessions that can
# interact concurrently with MOT tables. This value does not restrict non-MOT sessions.

View File

@ -431,11 +431,11 @@ void CheckpointManager::FillTasksQueue()
MOT_LOG_DEBUG("CheckpointManager::fillTasksQueue:: got %d tasks", m_tasksList.size());
}
void CheckpointManager::UnlockAndClearTables(std::list<Table *>& tables)
void CheckpointManager::UnlockAndClearTables(std::list<Table*>& tables)
{
std::list<Table *>::iterator it;
std::list<Table*>::iterator it;
for (it = tables.begin(); it != tables.end(); ++it) {
Table *table = *it;
Table* table = *it;
if (table != nullptr) {
table->Unlock();
}

View File

@ -331,7 +331,7 @@ private:
* @brief Unlocks tables and clear the tables' list
* @param tables Tables list to clear
*/
void UnlockAndClearTables(std::list<Table *>& tables);
void UnlockAndClearTables(std::list<Table*>& tables);
/**
* @brief Destroys all the checkpoint threads

View File

@ -217,7 +217,7 @@ void CheckpointWorkerPool::WorkerFunc()
SessionContext* sessionContext = GetSessionManager()->CreateSessionContext();
int threadId = MOTCurrThreadId;
if (!GetTaskAffinity().SetAffinity(threadId)) {
if (GetGlobalConfiguration().m_enableNuma && !GetTaskAffinity().SetAffinity(threadId)) {
MOT_LOG_WARN("Failed to set affinity for checkpoint worker, checkpoint performance may be affected");
}

View File

@ -42,8 +42,11 @@ std::atomic<SessionId> SessionContext::m_nextSessionId(0);
static int GetRealCurrentNumaMode()
{
int cpu = sched_getcpu();
int node = MotSysNumaGetNode(cpu);
int node = 0; // We default to Node 0 to avoid failures in other places in the code.
if (GetGlobalConfiguration().m_enableNuma) {
int cpu = sched_getcpu();
node = MotSysNumaGetNode(cpu);
}
return node;
}
@ -166,7 +169,7 @@ TxnManager* SessionContext::CreateTransaction(
// Do it before txn_man::init call since it allocates memory
bool rc = true;
if (isLightTxn == false) {
if (IS_AFFINITY_ACTIVE(GetSessionAffinity().GetAffinityMode())) {
if (GetGlobalConfiguration().m_enableNuma && IS_AFFINITY_ACTIVE(GetSessionAffinity().GetAffinityMode())) {
if (!GetSessionAffinity().SetAffinity(threadId)) {
MOT_LOG_WARN("Failed to set current session affinity, performance may be affected");
}

View File

@ -302,8 +302,11 @@ extern MOTThreadId AllocThreadIdNumaHighest(int nodeId)
extern MOTThreadId AllocThreadIdNumaCurrentHighest()
{
int cpu = sched_getcpu();
int nodeId = MotSysNumaGetNode(cpu);
int nodeId = 0; // We default to Node 0 to avoid failures in other places in the code.
if (GetGlobalConfiguration().m_enableNuma) {
int cpu = sched_getcpu();
nodeId = MotSysNumaGetNode(cpu);
}
return AllocThreadIdNumaHighest(nodeId);
}

View File

@ -79,6 +79,7 @@ constexpr LogLevel MOTConfiguration::DEFAULT_NUMA_ERRORS_LOG_LEVEL;
constexpr LogLevel MOTConfiguration::DEFAULT_NUMA_WARNINGS_LOG_LEVEL;
constexpr LogLevel MOTConfiguration::DEFAULT_CFG_STARTUP_LOG_LEVEL;
// memory configuration members
constexpr bool MOTConfiguration::DEFAULT_ENABLE_NUMA;
constexpr uint16_t MOTConfiguration::DEFAULT_MAX_THREADS;
constexpr uint32_t MOTConfiguration::DEFAULT_MAX_CONNECTIONS;
constexpr AffinityMode MOTConfiguration::DEFAULT_AFFINITY_MODE;
@ -302,7 +303,7 @@ bool MOTConfiguration::FindNumaNodes(int* maxNodes)
}
*maxNodes = MotSysNumaConfiguredNodes();
if (*maxNodes < 0) {
if (*maxNodes <= 0) {
MOT_LOG_ERROR("Invalid NUMA configuration max_nodes=%d", *maxNodes);
return false;
}
@ -415,6 +416,7 @@ MOTConfiguration::MOTConfiguration()
m_numaErrorsLogLevel(DEFAULT_NUMA_ERRORS_LOG_LEVEL),
m_numaWarningsLogLevel(DEFAULT_NUMA_WARNINGS_LOG_LEVEL),
m_cfgStartupLogLevel(DEFAULT_CFG_STARTUP_LOG_LEVEL),
m_enableNuma(DEFAULT_ENABLE_NUMA),
m_maxThreads(DEFAULT_MAX_THREADS),
m_maxConnections(DEFAULT_MAX_CONNECTIONS),
m_sessionAffinityMode(DEFAULT_AFFINITY_MODE),
@ -447,6 +449,9 @@ MOTConfiguration::MOTConfiguration()
m_configMonitorPeriodSeconds(DEFAULT_CFG_MONITOR_PERIOD_SECONDS),
m_runInternalConsistencyValidation(DEFAULT_RUN_INTERNAL_CONSISTENCY_VALIDATION),
m_totalMemoryMb(DEFAULT_TOTAL_MEMORY_MB)
{}
void MOTConfiguration::Initialize()
{
// Since MOTConfiguration has a global instance it is initialized early (before main() or other code is called)
// we must initialize the sys_numa API early enough. This look likes the right timing.
@ -454,11 +459,15 @@ MOTConfiguration::MOTConfiguration()
int numa = DEFAULT_NUMA_NODES;
if (FindNumaNodes(&numa)) {
m_numaNodes = (uint16_t)numa;
} else {
MOT_LOG_WARN("Failed to infer the number of NUMA nodes on current machine, defaulting to %d", numa);
}
uint16_t cores = DEFAULT_CORES_PER_CPU;
if (FindNumProcessors(&cores, &m_cpuNodeMapper, &m_osCpuMap)) {
m_coresPerCpu = cores;
} else {
MOT_LOG_WARN("Failed to infer the number of cores on the current machine, defaulting to %u", (unsigned)cores);
}
m_isSystemHyperThreaded = CheckHyperThreads();
@ -506,6 +515,7 @@ bool MOTConfiguration::SetFlag(const std::string& name, const std::string& value
} else if (ParseLogLevel(name, "numa_errors_log_level", value, &m_numaErrorsLogLevel)) {
} else if (ParseLogLevel(name, "numa_warnings_log_level", value, &m_numaWarningsLogLevel)) {
} else if (ParseLogLevel(name, "cfg_startup_log_level", value, &m_cfgStartupLogLevel)) {
} else if (ParseBool(name, "enable_numa", value, &m_enableNuma)) {
} else if (ParseUint16(name, "max_threads", value, &m_maxThreads)) {
} else if (ParseUint32(name, "max_connections", value, &m_maxConnections)) {
} else if (ParseAffinity(name, "affinity_mode", value, &m_sessionAffinityMode)) {
@ -687,6 +697,7 @@ void MOTConfiguration::LoadConfig()
UPDATE_USER_CFG(m_cfgStartupLogLevel, "cfg_startup_log_level", DEFAULT_CFG_STARTUP_LOG_LEVEL);
// memory configuration
UPDATE_CFG(m_enableNuma, "enable_numa", DEFAULT_ENABLE_NUMA);
UPDATE_INT_CFG(m_maxThreads, "max_threads", DEFAULT_MAX_THREADS);
UPDATE_INT_CFG(m_maxConnections, "max_connections", DEFAULT_MAX_CONNECTIONS);
UPDATE_USER_CFG(m_sessionAffinityMode, "affinity_mode", DEFAULT_AFFINITY_MODE);

View File

@ -53,6 +53,9 @@ public:
MOTConfiguration();
~MOTConfiguration();
/** @var Initialize configuration singleton. */
void Initialize();
/** @brief Get reference to single instance of configuration class. */
static MOTConfiguration& GetInstance()
{
@ -243,6 +246,9 @@ public:
/**********************************************************************/
// Memory configuration
/**********************************************************************/
/** @var Specifies whether to use NUMA-aware memory allocation. */
bool m_enableNuma;
/** @var Maximum number of threads. */
uint16_t m_maxThreads;
@ -503,6 +509,9 @@ private:
static constexpr LogLevel DEFAULT_CFG_STARTUP_LOG_LEVEL = LogLevel::LL_TRACE;
// default memory configuration
/** @var Default enable NUMA. */
static constexpr bool DEFAULT_ENABLE_NUMA = true;
/** @var Default maximum number of threads in the system. */
static constexpr uint16_t DEFAULT_MAX_THREADS = 1024;
@ -636,8 +645,8 @@ private:
}
template <typename T>
static void UpdateConfigItem(uint32_t& oldValue, T newValue, const char* name,
uint32_t lowerBound = 0, uint32_t upperBound = UINT_MAX)
static void UpdateConfigItem(
uint32_t& oldValue, T newValue, const char* name, uint32_t lowerBound = 0, uint32_t upperBound = UINT_MAX)
{
if (newValue > upperBound) {
MOT_LOG_WARN("Configuration of %s overflowed: keeping default value %u", name, oldValue);

View File

@ -75,10 +75,8 @@ MOTEngine* MOTEngine::m_engine = nullptr;
MOTEngine::MOTEngine()
: m_initialized(false),
m_recovering(false),
m_sessionAffinity(GetGlobalConfiguration().m_numaNodes, GetGlobalConfiguration().m_coresPerCpu,
GetGlobalConfiguration().m_sessionAffinityMode),
m_taskAffinity(GetGlobalConfiguration().m_numaNodes, GetGlobalConfiguration().m_coresPerCpu,
GetGlobalConfiguration().m_taskAffinityMode),
m_sessionAffinity(0, 0, AffinityMode::AFFINITY_INVALID),
m_taskAffinity(0, 0, AffinityMode::AFFINITY_INVALID),
m_softMemoryLimitReached(0),
m_sessionManager(nullptr),
m_tableManager(nullptr),
@ -168,6 +166,10 @@ bool MOTEngine::LoadConfig()
if (!result) {
MOT_REPORT_ERROR(MOT_ERROR_INTERNAL, "System Startup", "Failed to load configuration for the first time");
} else {
if (!GetGlobalConfiguration().m_enableNuma) {
MOT_LOG_WARN("NUMA-aware memory allocation is disabled");
}
// we must load also MM layer configuration so that max_process_memory initial check will see correct values
int rc = MemCfgInit();
if (rc != 0) {
@ -569,6 +571,9 @@ bool MOTEngine::InitializeConfiguration(const char* configFilePath, int argc, ch
}
}
// initialize global configuration singleton
GetGlobalConfiguration().Initialize();
// create manager, and configure it with configuration file loader
if (!ConfigManager::CreateInstance(argv + 1, argc - 1)) {
MOT_LOG_ERROR("Failed to create configuration manager instance");

View File

@ -42,8 +42,10 @@ constexpr uint32_t NUM_DELETE_MAX_INC = 500;
bool RecoveryManager::Initialize()
{
// in a thread-pooled envelope the affinity could be disabled, so we use task affinity here
Affinity& affinity = GetTaskAffinity();
affinity.SetAffinity(m_threadId);
if (GetGlobalConfiguration().m_enableNuma) {
Affinity& affinity = GetTaskAffinity();
affinity.SetAffinity(m_threadId);
}
if (m_enableLogStats) {
m_logStats = new (std::nothrow) LogStats();
@ -368,7 +370,7 @@ void RecoveryManager::CpWorkerFunc()
int threadId = MOTCurrThreadId;
// in a thread-pooled envelope the affinity could be disabled, so we use task affinity here
if (!GetTaskAffinity().SetAffinity(threadId)) {
if (GetGlobalConfiguration().m_enableNuma && !GetTaskAffinity().SetAffinity(threadId)) {
MOT_LOG_WARN("Failed to set affinity of checkpoint recovery worker, recovery from checkpoint performance may be"
" affected");
}

View File

@ -95,6 +95,7 @@ public:
{
return MAX_BUFFERS;
}
private:
std::atomic<uint32_t> m_nextFree;
RedoLogBuffer* m_array[MAX_BUFFERS];

View File

@ -706,13 +706,13 @@ void MOTAdaptor::Init()
elog(FATAL, "Double attempt to initialize MOT engine, it is already initialized");
}
MOT::GetGlobalConfiguration().SetTotalMemoryMb(g_instance.attr.attr_memory.max_process_memory / KILO_BYTE);
m_engine = MOT::MOTEngine::CreateInstanceNoInit(g_instance.attr.attr_common.MOTConfigFileName, 0, nullptr);
if (m_engine == nullptr) {
elog(FATAL, "Failed to create MOT engine");
}
MOT::GetGlobalConfiguration().SetTotalMemoryMb(g_instance.attr.attr_memory.max_process_memory / KILO_BYTE);
gaussdbConfigLoader = new (std::nothrow) GaussdbConfigLoader();
if (gaussdbConfigLoader == nullptr) {
MOT::MOTEngine::DestroyInstance();