Align statistics to cache lines

Aligning the statistics object indices to cache line size reduces the CPU
overhead of gathering the statistics. This allows the statistics to more
accurately measure the polling system without the measurement affecting
the outcome.
This commit is contained in:
Markus Makela
2016-12-09 17:39:31 +02:00
parent 5cb738ae03
commit 49aa23468d
6 changed files with 127 additions and 43 deletions

View File

@ -29,10 +29,32 @@
#include <maxscale/config.h>
#include <maxscale/debug.h>
#include <maxscale/platform.h>
#include <maxscale/utils.h>
static int thread_count = 0;
static size_t cache_linesize = 0;
static size_t stats_size = 0;
static bool stats_initialized = false;
static size_t get_cache_line_size()
{
size_t rval = 64; // Cache lines are 64 bytes for x86
#ifdef _SC_LEVEL1_DCACHE_LINESIZE
rval = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
#endif
if (rval < sizeof(int64_t))
{
MXS_WARNING("Cache line size reported to be %lu bytes when a 64-bit "
"integer is %lu bytes. Increasing statistics to the minimum "
"size of %lu bytes.", rval, sizeof(int64_t), sizeof(int64_t));
rval = sizeof(int64_t);
}
return rval;
}
/**
* @brief Initialize the statistics gathering
*/
@ -40,6 +62,8 @@ void ts_stats_init()
{
ss_dassert(!stats_initialized);
thread_count = config_threadcount();
cache_linesize = get_cache_line_size();
stats_size = thread_count * cache_linesize;
stats_initialized = true;
}
@ -59,7 +83,7 @@ void ts_stats_end()
ts_stats_t ts_stats_alloc()
{
ss_dassert(stats_initialized);
return MXS_CALLOC(thread_count, sizeof(int64_t));
return MXS_CALLOC(thread_count, cache_linesize);
}
/**
@ -85,10 +109,12 @@ int64_t ts_stats_sum(ts_stats_t stats)
{
ss_dassert(stats_initialized);
int64_t sum = 0;
for (int i = 0; i < thread_count; i++)
for (size_t i = 0; i < stats_size; i += cache_linesize)
{
sum += ((int64_t*)stats)[i];
sum += *((int64_t*)MXS_PTR(stats, i));
}
return sum;
}
@ -106,9 +132,9 @@ int64_t ts_stats_get(ts_stats_t stats, enum ts_stats_type type)
ss_dassert(stats_initialized);
int64_t best = type == TS_STATS_MAX ? LONG_MIN : (type == TS_STATS_MIX ? LONG_MAX : 0);
for (int i = 0; i < thread_count; i++)
for (size_t i = 0; i < stats_size; i += cache_linesize)
{
int64_t value = ((int64_t*)stats)[i];
int64_t value = *((int64_t*)MXS_PTR(stats, i));
switch (type)
{
@ -135,3 +161,39 @@ int64_t ts_stats_get(ts_stats_t stats, enum ts_stats_type type)
return type == TS_STATS_AVG ? best / thread_count : best;
}
void ts_stats_increment(ts_stats_t stats, int thread_id)
{
ss_dassert(thread_id < thread_count);
int64_t *item = (int64_t*)MXS_PTR(stats, thread_id * cache_linesize);
*item += 1;
}
void ts_stats_set(ts_stats_t stats, int value, int thread_id)
{
ss_dassert(thread_id < thread_count);
int64_t *item = (int64_t*)MXS_PTR(stats, thread_id * cache_linesize);
*item = value;
}
void ts_stats_set_max(ts_stats_t stats, int value, int thread_id)
{
ss_dassert(thread_id < thread_count);
int64_t *item = (int64_t*)MXS_PTR(stats, thread_id * cache_linesize);
if (value > *item)
{
*item = value;
}
}
void ts_stats_set_min(ts_stats_t stats, int value, int thread_id)
{
ss_dassert(thread_id < thread_count);
int64_t *item = (int64_t*)MXS_PTR(stats, thread_id * cache_linesize);
if (value < *item)
{
*item = value;
}
}