!5800 慢SQL默认记录执行计划

Merge pull request !5800 from 申正/L0_record_query_plan
This commit is contained in:
opengauss_bot
2024-07-29 04:09:45 +00:00
committed by Gitee
11 changed files with 185 additions and 14 deletions

View File

@ -233,7 +233,7 @@ static void print_stmt_basic_debug_log(int log_level)
errmsg("\t soft parse: %lu", CURRENT_STMT_METRIC_HANDLE->parse.soft_parse)));
ereport(log_level, (errmodule(MOD_INSTR),
errmsg("\t hard parse: %lu", CURRENT_STMT_METRIC_HANDLE->parse.hard_parse)));
if (CURRENT_STMT_METRIC_HANDLE->level == STMT_TRACK_L1 || CURRENT_STMT_METRIC_HANDLE->level == STMT_TRACK_L2) {
if (CURRENT_STMT_METRIC_HANDLE->level >= STMT_TRACK_L0 || CURRENT_STMT_METRIC_HANDLE->level <= STMT_TRACK_L2) {
ereport(log_level, (errmodule(MOD_INSTR),
errmsg("\t query plan size: %lu", CURRENT_STMT_METRIC_HANDLE->plan_size)));
ereport(log_level, (errmodule(MOD_INSTR),
@ -320,7 +320,7 @@ static void print_stmt_net_debug_log(int log_level)
static void print_stmt_summary_lock_debug_log(int log_level)
{
ereport(log_level, (errmodule(MOD_INSTR), errmsg("6, ----Lock Summary Info Area----")));
if (CURRENT_STMT_METRIC_HANDLE->level == STMT_TRACK_L1 || CURRENT_STMT_METRIC_HANDLE->level == STMT_TRACK_L2) {
if (CURRENT_STMT_METRIC_HANDLE->level >= STMT_TRACK_L0 && CURRENT_STMT_METRIC_HANDLE->level <= STMT_TRACK_L2) {
ereport(log_level, (errmodule(MOD_INSTR),
errmsg("\t lock cnt: %ld", CURRENT_STMT_METRIC_HANDLE->lock_summary.lock_cnt)));
ereport(log_level, (errmodule(MOD_INSTR),

View File

@ -67,6 +67,7 @@
#include "commands/copy.h"
#include "storage/lmgr.h"
#include "instruments/instr_func_control.h"
#include "storage/proc.h"
#define MAX_SLOW_QUERY_RETENSION_DAYS 604800
#define MAX_FULL_SQL_RETENSION_SEC 86400
@ -2238,8 +2239,8 @@ void instr_stmt_report_unique_sql_info(const PgStat_TableCounts *agg_table_stat,
static inline bool instr_stmt_level_fullsql_open()
{
int fullsql_level = u_sess->statement_cxt.statement_level[0];
/* only record query plan when level >= L1 */
return fullsql_level >= STMT_TRACK_L1 && fullsql_level <= STMT_TRACK_L2;
/* record query plan when level >= L0 */
return fullsql_level >= STMT_TRACK_L0 && fullsql_level <= STMT_TRACK_L2;
}
static inline bool instr_stmt_level_slowsql_only_open()
@ -2249,8 +2250,8 @@ static inline bool instr_stmt_level_slowsql_only_open()
}
int slowsql_level = u_sess->statement_cxt.statement_level[1];
/* only record query plan when level >= L1 */
return slowsql_level >= STMT_TRACK_L1 && slowsql_level <= STMT_TRACK_L2;
/* record query plan when level >= L0 */
return slowsql_level >= STMT_TRACK_L0 && slowsql_level <= STMT_TRACK_L2;
}
static inline bool instr_stmt_is_slowsql()
@ -2282,11 +2283,29 @@ bool instr_stmt_need_track_plan()
return false;
}
void instr_stmt_exec_report_query_plan(QueryDesc *queryDesc)
{
if (instr_stmt_level_fullsql_open()) {
instr_stmt_report_query_plan(queryDesc);
return;
}
if (instr_stmt_level_slowsql_only_open()) {
if (CURRENT_STMT_METRIC_HANDLE->slow_query_threshold == 0) {
instr_stmt_report_query_plan(queryDesc);
return;
}
int delayms = u_sess->attr.attr_storage.log_min_duration_statement;
(void)enable_query_plan_sig_alarm(delayms);
}
}
void instr_stmt_report_query_plan(QueryDesc *queryDesc)
{
StatementStatContext *ssctx = (StatementStatContext *)u_sess->statement_cxt.curStatementMetrics;
if (queryDesc == NULL || ssctx == NULL || ssctx->level <= STMT_TRACK_L0
|| ssctx->level > STMT_TRACK_L2 || (ssctx->plan_size != 0 && !u_sess->unique_sql_cxt.is_open_cursor)
if (queryDesc == NULL || ssctx == NULL || ssctx->level > STMT_TRACK_L2
|| (ssctx->plan_size != 0 && !u_sess->unique_sql_cxt.is_open_cursor)
|| (u_sess->statement_cxt.executer_run_level > 1 && !IS_UNIQUE_SQL_TRACK_ALL)) {
return;
}
@ -2313,6 +2332,10 @@ void instr_stmt_report_query_plan(QueryDesc *queryDesc)
(errmodule(MOD_INSTR), errmsg("exec_auto_explain %s %s to %lu",
ssctx->query_plan, queryDesc->sourceText, u_sess->unique_sql_cxt.unique_sql_id)));
pfree(es.str->data);
if (u_sess->statement_cxt.is_exceed_query_plan_threshold) {
u_sess->statement_cxt.is_exceed_query_plan_threshold = false;
}
}
/* check the header and valid length of the detail binary data */

View File

@ -1167,6 +1167,11 @@ static void knl_u_statement_init(knl_u_statement_context* statement_cxt)
statement_cxt->wait_events_bms = NULL;
statement_cxt->enable_wait_events_bitmap = false;
statement_cxt->is_session_bms_active = false;
statement_cxt->root_query_plan = NULL;
statement_cxt->query_plan_threshold_active = false;
statement_cxt->is_exceed_query_plan_threshold = false;
statement_cxt->record_query_plan_fin_time = 0;
}
void knl_u_relmap_init(knl_u_relmap_context* relmap_cxt)

View File

@ -466,11 +466,12 @@ void ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
int instrument_option = 0;
bool has_track_operator = false;
char* old_stmt_name = u_sess->pcache_cxt.cur_stmt_name;
u_sess->statement_cxt.root_query_plan = queryDesc;
u_sess->statement_cxt.executer_run_level++;
if (u_sess->SPI_cxt._connected >= 0) {
u_sess->pcache_cxt.cur_stmt_name = NULL;
}
instr_stmt_report_query_plan(queryDesc);
instr_stmt_exec_report_query_plan(queryDesc);
exec_explain_plan(queryDesc);
if (u_sess->attr.attr_resource.use_workload_manager &&
u_sess->attr.attr_resource.resource_track_level == RESOURCE_TRACK_OPERATOR &&
@ -721,6 +722,7 @@ void ExecutorFinish(QueryDesc *queryDesc)
} else {
standard_ExecutorFinish(queryDesc);
}
}
void standard_ExecutorFinish(QueryDesc *queryDesc)
@ -772,6 +774,14 @@ void standard_ExecutorFinish(QueryDesc *queryDesc)
*/
void ExecutorEnd(QueryDesc *queryDesc)
{
/*
* for a very few cases, query plan not be recorded during the execution phase,
* we record again before executor end.
*/
if (unlikely(u_sess->statement_cxt.is_exceed_query_plan_threshold)) {
instr_stmt_report_query_plan(queryDesc);
}
if (ExecutorEnd_hook) {
(*ExecutorEnd_hook)(queryDesc);
} else {

View File

@ -73,6 +73,7 @@
#include "instruments/percentile.h"
#include "instruments/snapshot.h"
#include "instruments/instr_statement.h"
#include "instruments/instr_handle_mgr.h"
#include "utils/builtins.h"
#include "instruments/ash.h"
#include "pgaudit.h"
@ -96,6 +97,7 @@ static void CheckIdleInTransactionSessionTimeout(void);
static bool CheckStandbyTimeout(void);
static void FiniNuma(int code, Datum arg);
static inline void ReleaseChildSlot(void);
static void CheckQueryPlanThreshold();
/*
* Report shared-memory space needed by InitProcGlobal.
@ -2628,6 +2630,59 @@ bool enable_lockwait_sig_alarm(int delayms)
return true;
}
bool enable_query_plan_sig_alarm(int delayms)
{
if (CURRENT_STMT_METRIC_HANDLE == NULL || u_sess->statement_cxt.stmt_stat_cxt == NULL) {
return true;
}
if (u_sess->statement_cxt.query_plan_threshold_active) {
return true;
}
TimestampTz start_time = CURRENT_STMT_METRIC_HANDLE->start_time;
u_sess->statement_cxt.record_query_plan_fin_time = TimestampTzPlusMilliseconds(start_time, delayms);
/* if query plan timeout > session timeout or statement timeout,
* don't set timer to ensure that sess or statement timeout take effect
*/
if (t_thrd.storage_cxt.statement_timeout_active &&
(u_sess->statement_cxt.record_query_plan_fin_time >= t_thrd.storage_cxt.statement_fin_time)) {
return true;
}
if (u_sess->storage_cxt.session_timeout_active &&
(u_sess->statement_cxt.record_query_plan_fin_time >= u_sess->storage_cxt.session_fin_time)) {
return true;
}
#ifndef ENABLE_MULTIPLE_NODES
if (u_sess->storage_cxt.idle_in_transaction_session_timeout_active &&
(u_sess->statement_cxt.record_query_plan_fin_time >=
u_sess->storage_cxt.idle_in_transaction_session_fin_time)) {
return true;
}
#endif
u_sess->statement_cxt.query_plan_threshold_active = true;
long secs;
int usecs;
/* start to set signal timer, if now >= query plan timeout, start 1us timer */
TimestampDifference(GetCurrentTimestamp(), u_sess->statement_cxt.record_query_plan_fin_time, &secs, &usecs);
if (secs == 0 && usecs == 0) {
usecs = 1;
}
struct itimerval timeval;
errno_t rc = memset_s(&timeval, sizeof(struct itimerval), 0, sizeof(struct itimerval));
securec_check(rc, "\0", "\0");
timeval.it_value.tv_sec = secs;
timeval.it_value.tv_usec = usecs;
if (gs_signal_settimer(&timeval))
return false;
return true;
}
/* Enable the session timeout timer. */
bool enable_session_sig_alarm(int delayms)
{
@ -2799,13 +2854,15 @@ bool disable_sig_alarm(bool is_statement_timeout)
* We will re-enable the interrupt if necessary in CheckStatementTimeout.
*/
if (t_thrd.storage_cxt.statement_timeout_active || t_thrd.storage_cxt.deadlock_timeout_active ||
t_thrd.storage_cxt.lockwait_timeout_active || t_thrd.wlm_cxt.wlmalarm_timeout_active) {
t_thrd.storage_cxt.lockwait_timeout_active || t_thrd.wlm_cxt.wlmalarm_timeout_active ||
u_sess->statement_cxt.query_plan_threshold_active) {
if (gs_signal_canceltimer()) {
t_thrd.storage_cxt.statement_timeout_active = false;
t_thrd.storage_cxt.cancel_from_timeout = false;
t_thrd.storage_cxt.deadlock_timeout_active = false;
t_thrd.storage_cxt.lockwait_timeout_active = false;
t_thrd.wlm_cxt.wlmalarm_timeout_active = false;
u_sess->statement_cxt.query_plan_threshold_active = false;
return false;
}
}
@ -2814,6 +2871,7 @@ bool disable_sig_alarm(bool is_statement_timeout)
t_thrd.storage_cxt.deadlock_timeout_active = false;
t_thrd.storage_cxt.lockwait_timeout_active = false;
t_thrd.wlm_cxt.wlmalarm_timeout_active = false;
u_sess->statement_cxt.query_plan_threshold_active = false;
/* Cancel or reschedule statement timeout */
if (is_statement_timeout) {
@ -3045,6 +3103,10 @@ void handle_sig_alarm(SIGNAL_ARGS)
}
}
if (u_sess->statement_cxt.query_plan_threshold_active) {
CheckQueryPlanThreshold();
}
errno = save_errno;
}
@ -3326,3 +3388,17 @@ void BecomeLockGroupMember(PGPROC *leader)
dlist_push_tail(&leader->lockGroupMembers, &t_thrd.proc->lockGroupLink);
LWLockRelease(leaderLwlock);
}
void CheckQueryPlanThreshold()
{
if (u_sess->attr.attr_storage.log_min_duration_statement == 0 ||
!u_sess->statement_cxt.query_plan_threshold_active) {
return;
}
TimestampTz now = GetCurrentTimestamp();
if (now > u_sess->statement_cxt.record_query_plan_fin_time) {
u_sess->statement_cxt.is_exceed_query_plan_threshold = true;
u_sess->statement_cxt.query_plan_threshold_active = false;
}
}

View File

@ -263,6 +263,7 @@ extern TupleTableSlot* FetchPlanSlot(PlanState* subPlanState, ProjectionInfo** p
extern long ExecGetPlanMemCost(Plan* node);
extern bool executorEarlyStop();
extern void instr_stmt_report_query_plan(QueryDesc *queryDesc);
/* ----------------------------------------------------------------
* ExecProcNode
@ -275,6 +276,10 @@ extern bool executorEarlyStop();
static inline TupleTableSlot *ExecProcNode(PlanState *node)
{
if (u_sess->statement_cxt.is_exceed_query_plan_threshold) {
instr_stmt_report_query_plan((QueryDesc *)u_sess->statement_cxt.root_query_plan);
u_sess->statement_cxt.root_query_plan = NULL;
}
TupleTableSlot* result;
Assert(node->ExecProcNode);

View File

@ -225,6 +225,7 @@ extern void instr_stmt_report_unique_sql_info(const PgStat_TableCounts *agg_tabl
extern void instr_stmt_report_txid(uint64 txid);
extern void instr_stmt_report_query(uint64 unique_query_id);
extern void instr_stmt_report_query_plan(QueryDesc *queryDesc);
extern void instr_stmt_exec_report_query_plan(QueryDesc *queryDesc);
extern void instr_stmt_report_debug_query_id(uint64 debug_query_id);
extern void instr_stmt_report_trace_id(char *trace_id);
extern void instr_stmt_report_start_time();

View File

@ -2358,6 +2358,11 @@ typedef struct knl_u_statement_context {
bool enable_wait_events_bitmap; /* change to true in init stage of stmt handle */
int64 current_row_count; /* Record the number of rows affected by current query */
int64 last_row_count; /* Record the number of rows affected by last query */
void *root_query_plan; /* Record the root query plan before report */
bool query_plan_threshold_active; /* active if need start query_plan threshold timer */
bool is_exceed_query_plan_threshold; /* if true when slow sql take effect */
TimestampTz record_query_plan_fin_time; /* finish time when execute time exceed log_min_duration_statement */
} knl_u_statement_context;
struct Qid_key {

View File

@ -554,6 +554,7 @@ extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
extern bool enable_lockwait_sig_alarm(int delayms);
extern bool enable_session_sig_alarm(int delayms);
extern bool enable_idle_in_transaction_session_sig_alarm(int delayms);
extern bool enable_query_plan_sig_alarm(int delayms);
extern bool disable_session_sig_alarm(void);
extern bool disable_idle_in_transaction_session_sig_alarm(void);

View File

@ -11,7 +11,7 @@ select reset_unique_sql('GLOBAL', 'ALL', 0);
(1 row)
-- full sql
set track_stmt_stat_level = 'L1,OFF';
set track_stmt_stat_level = 'L0,OFF';
set log_min_duration_statement = '0ms';
set statement_timeout = '0ms';
delete statement_history;
@ -71,7 +71,7 @@ select count(*) from statement_history where query like '%from query_plan_table%
(1 row)
-- slow sql
set track_stmt_stat_level = 'OFF,L1';
set track_stmt_stat_level = 'OFF,L0';
set log_min_duration_statement = '0ms';
delete statement_history;
select reset_unique_sql('GLOBAL', 'ALL', 0);
@ -180,6 +180,40 @@ select count(*) from statement_history where query like '%from query_plan_table%
2
(1 row)
set log_min_duration_statement = '2s';
set statement_timeout = '1s';
delete statement_history;
select reset_unique_sql('GLOBAL', 'ALL', 0);
reset_unique_sql
------------------
t
(1 row)
select t1.id from query_plan_table t1 where t1.id = 1;
id
----
1
(1 row)
select t1.num from query_plan_table t1 where t1.num = 1;
num
-----
1
(1 row)
select t1.num, pg_sleep(1) from query_plan_table t1 where t1.num = 1;
ERROR: canceling statement due to statement timeout
CONTEXT: referenced column: pg_sleep
select pg_sleep(1);
ERROR: canceling statement due to statement timeout
CONTEXT: referenced column: pg_sleep
-- expect 0 row
select count(*) from statement_history where query like '%from query_plan_table%' and query_plan is not null;
count
-------
0
(1 row)
set track_stmt_stat_level = 'OFF,L0';
set log_min_duration_statement = '30min';
set statement_timeout = '0ms';

View File

@ -7,7 +7,7 @@ insert into query_plan_table select generate_series(1,10000),generate_series(1,1
select reset_unique_sql('GLOBAL', 'ALL', 0);
-- full sql
set track_stmt_stat_level = 'L1,OFF';
set track_stmt_stat_level = 'L0,OFF';
set log_min_duration_statement = '0ms';
set statement_timeout = '0ms';
delete statement_history;
@ -28,7 +28,7 @@ select pg_sleep(1);
select count(*) from statement_history where query like '%from query_plan_table%' and query_plan is not null;
-- slow sql
set track_stmt_stat_level = 'OFF,L1';
set track_stmt_stat_level = 'OFF,L0';
set log_min_duration_statement = '0ms';
delete statement_history;
select reset_unique_sql('GLOBAL', 'ALL', 0);
@ -59,6 +59,17 @@ select pg_sleep(1);
-- expect 2 row
select count(*) from statement_history where query like '%from query_plan_table%' and query_plan is not null;
set log_min_duration_statement = '2s';
set statement_timeout = '1s';
delete statement_history;
select reset_unique_sql('GLOBAL', 'ALL', 0);
select t1.id from query_plan_table t1 where t1.id = 1;
select t1.num from query_plan_table t1 where t1.num = 1;
select t1.num, pg_sleep(1) from query_plan_table t1 where t1.num = 1;
select pg_sleep(1);
-- expect 0 row
select count(*) from statement_history where query like '%from query_plan_table%' and query_plan is not null;
set track_stmt_stat_level = 'OFF,L0';
set log_min_duration_statement = '30min';
set statement_timeout = '0ms';