1090 lines
39 KiB
C++
1090 lines
39 KiB
C++
/* -------------------------------------------------------------------------
|
|
*
|
|
* plananalyzer.cpp
|
|
* Plan Analyzer for SQL self diagnosis & tuning
|
|
*
|
|
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/gausskernel/optimizer/utils/plananalyzer.cpp
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
|
|
#include "commands/prepare.h"
|
|
#include "executor/exec/execStream.h"
|
|
#include "parser/parse_relation.h"
|
|
#ifdef PGXC
|
|
#include "optimizer/planmain.h"
|
|
#endif
|
|
#include "utils/builtins.h"
|
|
#include "utils/lsyscache.h"
|
|
|
|
/* -- Criteria/Threshold definition of plan issue -- */
|
|
/* Basic factor that identifies the number of Per-DN tuples for "Large Table" */
|
|
#define LargeTableFactor 100000
|
|
|
|
/* Const variables for Large Table in Broadcast */
|
|
#define LargerTable_Broadcast_Threshold LargeTableFactor
|
|
|
|
/* Const variables for Large table as hashjoin's Inner */
|
|
#define LTAsHashJoinInner_Rows_Threshold LargeTableFactor
|
|
#define LTAsHashJoinInner_Scale_Threshold 10
|
|
|
|
/* Const variables for large Table in nestloop with equal condition */
|
|
#define LTWithEqualCondInNestLoop_Rows_Threshold LargeTableFactor
|
|
|
|
/* Const variables for Data Skew */
|
|
#define DataSkew_Rows_Threshold 100000
|
|
#define DataSkew_Scale_Threshold 10
|
|
|
|
/* Const variables for E-Rows not accurate */
|
|
#define EstimationRows_Threshold 100000
|
|
#define EstimationRows_Scale_Threshold 10
|
|
|
|
/* Const variables for Scan Method */
|
|
#define SCANMETHOD_ROWS_THRESHOLD 10000
|
|
#define SCANMETHOD_RATE_THRESHOLD 0.001
|
|
#define SCANMETHOD_VECROWS_THRESHOLD 100
|
|
#define SCANMETHOD_VECRATE_THRESHOLD 0.0001
|
|
#define SCANMETHOD_INPUT_THRESHOLD 10000
|
|
|
|
static char* OperatorName(const Plan* plan);
|
|
|
|
extern double get_float8_infinity(void);
|
|
|
|
extern bool check_relation_analyzed(Oid relid);
|
|
|
|
#define MAX_OPTIMIZER_WARNING_LEN 2048
|
|
|
|
static inline QueryPlanIssueDesc* CheckQueryNotPlanShipping();
|
|
|
|
static bool DuplicateWithUnderlyingPlanNodes(PlanState* node, QueryIssueType type);
|
|
|
|
static List* getPlanSubNodes(const PlanState* node);
|
|
|
|
static inline QueryPlanIssueDesc* CheckLargeTableInBroadcast(PlanState* node, int dn_num, double total_tuples);
|
|
|
|
static inline QueryPlanIssueDesc* CheckLargeTableInHashJoinInner(
|
|
PlanState* node, int dn_num, double rightchild_total_rows, double leftchild_total_rows);
|
|
|
|
static inline QueryPlanIssueDesc* CheckLargeTableInNestloopWithEqualCondition(
|
|
PlanState* node, int dn_num, double Maxchild_total_size);
|
|
|
|
static inline QueryPlanIssueDesc* CheckDataSkew(PlanState* node, double min_dn_tuples, double max_dn_tuples);
|
|
|
|
static inline QueryPlanIssueDesc* CheckMultiColumnStatsNotCollect(QueryDesc* querydesc);
|
|
|
|
static inline QueryPlanIssueDesc* CheckInaccurateEstimatedRows(PlanState* node, int dn_num, double actual_rows);
|
|
|
|
static double ComputeSumOfDNTuples(Plan* node);
|
|
|
|
static QueryPlanIssueDesc* CreateQueryPlanIssue(PlanState* node, QueryIssueType type);
|
|
|
|
static void DeleteQueryPlanIssue(QueryPlanIssueDesc* query_issue);
|
|
|
|
static bool ContainsDuplicateIssues(PlanState* node, QueryIssueType type);
|
|
|
|
static bool IsEqualConditionandNestLoopOnly(Plan* plan);
|
|
/* ----------------------------- Local routine definitions ----------------------------- */
|
|
/*
|
|
* - Brief: Return true if for current join-node NestLoop/VecNestLoop is only option
|
|
*/
|
|
static bool IsEqualConditionandNestLoopOnly(Plan* plan)
|
|
{
|
|
Assert(nodeTag(plan) == T_NestLoop || nodeTag(plan) == T_VecNestLoop);
|
|
|
|
ListCell* lc = NULL;
|
|
NestLoop* nestloop = (NestLoop*)plan;
|
|
|
|
foreach (lc, nestloop->join.joinqual) {
|
|
if (isEqualExpr((Node*)lfirst(lc))) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Return plan node's underlying plan nodes that is not create under
|
|
* left/right plan tree
|
|
*/
|
|
static List* getPlanSubNodes(const PlanState* node)
|
|
{
|
|
List* ps_list = NIL;
|
|
|
|
if (node == NULL) {
|
|
return NIL;
|
|
}
|
|
|
|
/* Find plan list in special plan nodes. */
|
|
switch (nodeTag(node->plan)) {
|
|
case T_Append:
|
|
case T_VecAppend: {
|
|
AppendState* append = (AppendState*)node;
|
|
for (int i = 0; i < append->as_nplans; i++) {
|
|
PlanState* plan = append->appendplans[i];
|
|
ps_list = lappend(ps_list, plan);
|
|
}
|
|
} break;
|
|
case T_ModifyTable:
|
|
case T_VecModifyTable: {
|
|
ModifyTableState* mt = (ModifyTableState*)node;
|
|
for (int i = 0; i < mt->mt_nplans; i++) {
|
|
PlanState* plan = mt->mt_plans[i];
|
|
ps_list = lappend(ps_list, plan);
|
|
}
|
|
} break;
|
|
case T_MergeAppend:
|
|
case T_VecMergeAppend: {
|
|
MergeAppendState* ma = (MergeAppendState*)node;
|
|
for (int i = 0; i < ma->ms_nplans; i++) {
|
|
PlanState* plan = ma->mergeplans[i];
|
|
ps_list = lappend(ps_list, plan);
|
|
}
|
|
} break;
|
|
case T_BitmapAnd:
|
|
case T_CStoreIndexAnd: {
|
|
BitmapAndState* ba = (BitmapAndState*)node;
|
|
for (int i = 0; i < ba->nplans; i++) {
|
|
PlanState* plan = ba->bitmapplans[i];
|
|
ps_list = lappend(ps_list, plan);
|
|
}
|
|
} break;
|
|
case T_BitmapOr:
|
|
case T_CStoreIndexOr: {
|
|
BitmapOrState* bo = (BitmapOrState*)node;
|
|
for (int i = 0; i < bo->nplans; i++) {
|
|
PlanState* plan = bo->bitmapplans[i];
|
|
ps_list = lappend(ps_list, plan);
|
|
}
|
|
} break;
|
|
case T_SubqueryScan:
|
|
case T_VecSubqueryScan: {
|
|
SubqueryScanState* ss = (SubqueryScanState*)node;
|
|
PlanState* plan = ss->subplan;
|
|
ps_list = lappend(ps_list, plan);
|
|
} break;
|
|
default: {
|
|
ps_list = NIL;
|
|
} break;
|
|
}
|
|
|
|
return ps_list;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Return true if target plan contains given type of issues that its underlying
|
|
* plan node already have
|
|
*/
|
|
static bool DuplicateWithUnderlyingPlanNodes(PlanState* node, QueryIssueType type)
|
|
{
|
|
bool is_duplicate = false;
|
|
|
|
/* First check node's left tree node is already have same issue */
|
|
if (node->lefttree != NULL && ContainsDuplicateIssues(node->lefttree, type)) {
|
|
return true;
|
|
}
|
|
|
|
/* Second check node's right tree node is already have same issue */
|
|
if (node->righttree != NULL && ContainsDuplicateIssues(node->righttree, type)) {
|
|
return true;
|
|
}
|
|
|
|
/* Check if node's other underlying nodes is already have same issue e.g. setop */
|
|
List* node_list = getPlanSubNodes(node);
|
|
ListCell* lc = NULL;
|
|
|
|
if (node_list != NIL) {
|
|
foreach (lc, node_list) {
|
|
PlanState* ps = (PlanState*)lfirst(lc);
|
|
if (ContainsDuplicateIssues(ps, type)) {
|
|
is_duplicate = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
list_free(node_list);
|
|
}
|
|
|
|
return is_duplicate;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Return true if target plan contains given type of issues a.w.k. considered
|
|
* as duplicated issue nodes
|
|
*/
|
|
static bool ContainsDuplicateIssues(PlanState* node, QueryIssueType type)
|
|
{
|
|
ListCell* lc = NULL;
|
|
|
|
if (node->plan_issues == NIL) {
|
|
return false;
|
|
}
|
|
|
|
foreach (lc, node->plan_issues) {
|
|
QueryPlanIssueDesc* issue = (QueryPlanIssueDesc*)lfirst(lc);
|
|
|
|
if (issue->issue_type == type) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* - Brief: compute total number of processed tuples of current plan node
|
|
*/
|
|
static double ComputeSumOfDNTuples(Plan* node)
|
|
{
|
|
int dn_index = 0;
|
|
int dop = node->dop;
|
|
double total_tuples = 0.0;
|
|
ListCell* nodeitem = NULL;
|
|
List* exec_nodeList = NIL;
|
|
ExecNodes* exec_nodes = NULL;
|
|
|
|
exec_nodes = ng_get_dest_execnodes(node);
|
|
exec_nodeList = exec_nodes->nodeList;
|
|
|
|
foreach (nodeitem, exec_nodeList) {
|
|
dn_index = lfirst_int(nodeitem);
|
|
|
|
for (int j = 0; j < dop; j++) {
|
|
/* avoid for threadinstr is NULL */
|
|
Instrumentation* instr = u_sess->instr_cxt.global_instr->getInstrSlot(dn_index, node->plan_node_id, j);
|
|
if (instr != NULL)
|
|
total_tuples += instr->ntuples;
|
|
}
|
|
}
|
|
|
|
return total_tuples;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Function to help create plan issues
|
|
*/
|
|
static QueryPlanIssueDesc* CreateQueryPlanIssue(PlanState* node, QueryIssueType type)
|
|
{
|
|
QueryPlanIssueDesc* plan_issue = (QueryPlanIssueDesc*)palloc0(sizeof(QueryPlanIssueDesc));
|
|
|
|
plan_issue->issue_type = type;
|
|
plan_issue->issue_plannode = node;
|
|
plan_issue->issue_suggestion = makeStringInfo();
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Function to help delete plan issues
|
|
*/
|
|
static void DeleteQueryPlanIssue(QueryPlanIssueDesc* query_issue)
|
|
{
|
|
if (query_issue == NULL) {
|
|
return;
|
|
}
|
|
|
|
if (query_issue->issue_suggestion) {
|
|
if (query_issue->issue_suggestion->data != NULL) {
|
|
pfree_ext(query_issue->issue_suggestion->data);
|
|
}
|
|
|
|
pfree_ext(query_issue->issue_suggestion);
|
|
}
|
|
|
|
pfree_ext(query_issue);
|
|
}
|
|
|
|
/*
|
|
* - Brief: Determine whether current Plan node has "not plan-shipping" issue
|
|
* - Parameter:
|
|
* @node: to be checked plan node
|
|
* - Return:
|
|
* @not-null: the plan node has "not plan-shipping" issue
|
|
* @null: the plan node does not has "not plan-shipping" issue
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckQueryNotPlanShipping(void)
|
|
{
|
|
QueryPlanIssueDesc* plan_issue = NULL;
|
|
|
|
/* return null immediately when the plan can be shipped */
|
|
if (0 == strlen(u_sess->opt_cxt.not_shipping_info->not_shipping_reason)) {
|
|
return NULL;
|
|
}
|
|
/* Start standard plan node-level check for query not plan-shipping */
|
|
plan_issue = CreateQueryPlanIssue(NULL, QueryShipping);
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
"SQL is not plan-shipping, reason : \"%s\"",
|
|
u_sess->opt_cxt.not_shipping_info->not_shipping_reason);
|
|
|
|
errno_t errorno = memset_s(
|
|
u_sess->opt_cxt.not_shipping_info->not_shipping_reason, NOTPLANSHIPPING_LENGTH, '\0', NOTPLANSHIPPING_LENGTH);
|
|
securec_check_c(errorno, "\0", "\0");
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Determine whether current Plan node has "large table broadcast" issue
|
|
* - Parameter:
|
|
* @node: to be checked plan node
|
|
* - Return:
|
|
* @not-null: the plan node has "large table broadcast" issue
|
|
* @null: the plan node does not has "large table broadcast" issue
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckLargeTableInBroadcast(PlanState* node, int dn_num, double total_tuples)
|
|
{
|
|
Assert(node != NULL);
|
|
Stream* sn = NULL;
|
|
QueryPlanIssueDesc* plan_issue = NULL;
|
|
|
|
sn = (Stream*)node->plan;
|
|
|
|
if (sn->type == STREAM_BROADCAST && total_tuples >= (double)(dn_num * LargerTable_Broadcast_Threshold)) {
|
|
plan_issue = CreateQueryPlanIssue(node, LargeTableBroadCast);
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
"PlanNode[%d] Large Table in Broadcast \"%s\"",
|
|
node->plan->plan_node_id,
|
|
OperatorName(node->plan));
|
|
|
|
node->plan_issues = lappend(node->plan_issues, plan_issue);
|
|
}
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Determine whether current Plan node has "large table as hashjoin inner" issue
|
|
* - Parameter:
|
|
* @node: to be checked plan node
|
|
* - Return:
|
|
* @not-null: the plan node has "large table as hashjoin inner" issue
|
|
* @null: the plan node does not has "large table as hashjoin inner" issue
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckLargeTableInHashJoinInner(
|
|
PlanState* node, int dn_num, double rightchild_total_rows, double leftchild_total_rows)
|
|
{
|
|
Assert(node != NULL);
|
|
QueryPlanIssueDesc* plan_issue = NULL;
|
|
|
|
if (rightchild_total_rows >= (double)(dn_num * LTAsHashJoinInner_Rows_Threshold) &&
|
|
((leftchild_total_rows == 0.0 ||
|
|
(rightchild_total_rows / leftchild_total_rows >= LTAsHashJoinInner_Scale_Threshold)))) {
|
|
plan_issue = CreateQueryPlanIssue(node, LargeTableAsHashJoinInner);
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
"PlanNode[%d] Large Table is INNER in HashJoin \"%s\"",
|
|
node->plan->plan_node_id,
|
|
OperatorName(node->plan));
|
|
|
|
node->plan_issues = lappend(node->plan_issues, plan_issue);
|
|
}
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Determine whether current Plan node has "large table in nestloop with equal
|
|
* join condition" issue
|
|
* - Parameter:
|
|
* @node: to be checked plan node
|
|
* - Return:
|
|
* @not-null: the plan node has "large table in nestloop with equal join cond" issue
|
|
* @null: the plan node does not has "large table in nestloop with equal join cond" issue
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckLargeTableInNestloopWithEqualCondition(
|
|
PlanState* node, int dn_num, double Maxchild_total_size)
|
|
{
|
|
Assert(node != NULL);
|
|
QueryPlanIssueDesc* plan_issue = NULL;
|
|
|
|
if (Maxchild_total_size >= (double)(dn_num * LTWithEqualCondInNestLoop_Rows_Threshold)) {
|
|
plan_issue = CreateQueryPlanIssue(node, LargeTableWithEqualCondInNestLoop);
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
"PlanNode[%d] Large Table with Equal-Condition use Nestloop\"%s\"",
|
|
node->plan->plan_node_id,
|
|
OperatorName(node->plan));
|
|
|
|
node->plan_issues = lappend(node->plan_issues, plan_issue);
|
|
}
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Determine whether current query has "multi column statistics not collect" issues
|
|
* - Parameter:
|
|
* @querydesc: query desc structure for to current SQL statements
|
|
* - Return:
|
|
* @not-null: the plan node has "multi column statistics not collect" issue
|
|
* @null: the plan node does not has "multi column statistics not collect" issue
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckMultiColumnStatsNotCollect(QueryDesc* querydesc)
|
|
{
|
|
List* noanalyze_rellist = querydesc->plannedstmt->noanalyze_rellist;
|
|
|
|
if (noanalyze_rellist == NIL) {
|
|
return NULL;
|
|
}
|
|
|
|
int relnum = 0;
|
|
ListCell* lc1 = NULL;
|
|
ListCell* lc2 = NULL;
|
|
StringInfo single_col_total = makeStringInfo();
|
|
StringInfo multi_col_msg = makeStringInfo();
|
|
StringInfo multi_col_total = makeStringInfo();
|
|
|
|
relnum = list_length(noanalyze_rellist);
|
|
|
|
QueryPlanIssueDesc* plan_issue = (QueryPlanIssueDesc*)CreateQueryPlanIssue(NULL, StatsNotCollect);
|
|
appendStringInfo(plan_issue->issue_suggestion, "Statistic Not Collect:\n");
|
|
|
|
/*
|
|
* The content for stats not-analyzed table in g_NoAnalyzeRelNameList:
|
|
* - element[1]: tableoid, attnum, attnum, attnum
|
|
* - element[2]: tableoid, attnum, attnum, attnum
|
|
* - ...
|
|
* - element[3]: tableoid, attnum, attnum, attnum
|
|
*/
|
|
foreach (lc1, noanalyze_rellist) {
|
|
List* record = (List*)lfirst(lc1);
|
|
|
|
/* Fetch first element as table oid */
|
|
Oid relid = linitial_oid((List*)linitial(record));
|
|
|
|
Relation rel = relation_open(relid, AccessShareLock);
|
|
int attrnum = list_length(record) - 1;
|
|
int token = 1;
|
|
|
|
/* Skip rel which there is no reltuples and has been analyzed */
|
|
if (0 == rel->rd_rel->reltuples && check_relation_analyzed(relid)) {
|
|
relation_close(rel, AccessShareLock);
|
|
continue;
|
|
}
|
|
|
|
/* The 1st cell is the list of rel , so skip it, and get the att id */
|
|
lc2 = lnext(list_head(record));
|
|
|
|
resetStringInfo(single_col_total);
|
|
resetStringInfo(multi_col_total);
|
|
while (lc2 != NULL) {
|
|
resetStringInfo(multi_col_msg);
|
|
List* tmp_record_list = (List*)lfirst(lc2);
|
|
|
|
/* single-col that has no statistics */
|
|
if (list_length(tmp_record_list) == 1) {
|
|
int attid = linitial_int(tmp_record_list);
|
|
if (attid == 0) {
|
|
/* attid ==0 means the whole table has no statistics, so sikp the colunm info */
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
" %s.%s\n",
|
|
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
|
|
quote_identifier(get_rel_name(relid)));
|
|
break;
|
|
} else {
|
|
if (single_col_total->len) {
|
|
appendStringInfo(single_col_total, ",");
|
|
}
|
|
appendStringInfo(single_col_total, "%s", quote_identifier((char*)attnumAttName(rel, attid)));
|
|
}
|
|
} else { /* multi-col that has no statistics */
|
|
ListCell* lc3 = NULL;
|
|
appendStringInfo(multi_col_msg,
|
|
" %s.%s((",
|
|
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
|
|
quote_identifier(get_rel_name(relid)));
|
|
foreach (lc3, tmp_record_list) {
|
|
int multi_col_attid = lfirst_int(lc3);
|
|
appendStringInfo(multi_col_msg, "%s", quote_identifier((char*)attnumAttName(rel, multi_col_attid)));
|
|
if (lnext(lc3)) {
|
|
appendStringInfo(multi_col_msg, ",");
|
|
} else {
|
|
appendStringInfo(multi_col_msg, "))");
|
|
}
|
|
}
|
|
appendStringInfo(multi_col_total, "%s", multi_col_msg->data);
|
|
}
|
|
|
|
if (token == attrnum) {
|
|
if (single_col_total->len) {
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
" %s.%s(",
|
|
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
|
|
quote_identifier(get_rel_name(relid)));
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion, "%s)", single_col_total->data);
|
|
}
|
|
|
|
if (multi_col_total->len) {
|
|
appendStringInfo(plan_issue->issue_suggestion, "%s", multi_col_total->data);
|
|
}
|
|
|
|
if (!single_col_total->len && !multi_col_total->len) {
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
" %s.%s",
|
|
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
|
|
quote_identifier(get_rel_name(relid)));
|
|
}
|
|
appendStringInfo(plan_issue->issue_suggestion, "\n");
|
|
}
|
|
|
|
token++;
|
|
lc2 = lnext(lc2);
|
|
}
|
|
|
|
relation_close(rel, AccessShareLock);
|
|
}
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Check inaccurate A-Rows/E-Rows estimation issue on target plan node
|
|
* - Parameter:
|
|
* @node: plan node to check inaccurate e-rows
|
|
* @dn_num: number of datanodes where the node runs
|
|
* @actual_rows: total actual number of rows
|
|
*
|
|
* - Note: Since E-Rows not accurate issue may be rooted from upper layer, we have
|
|
* to do issue-deduplicate before return it to end user, it says if DataSkew is caused
|
|
* by upper steps instead of current one, we are igoring it.
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckInaccurateEstimatedRows(PlanState* node, int dn_num, double actual_rows)
|
|
{
|
|
Assert(node != NULL);
|
|
QueryPlanIssueDesc* plan_issue = NULL;
|
|
double estimated_rows = node->plan->plan_rows;
|
|
|
|
/*
|
|
* Determine if E-Rows over-estimated OR under-estimated beyond pre-defined
|
|
* threshold(10 times).
|
|
*/
|
|
if (Max(estimated_rows, actual_rows) >= dn_num * EstimationRows_Threshold &&
|
|
(estimated_rows / actual_rows >= EstimationRows_Scale_Threshold ||
|
|
actual_rows / estimated_rows >= EstimationRows_Scale_Threshold)) {
|
|
plan_issue = CreateQueryPlanIssue(node, InaccurateEstimationRowNum);
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
"PlanNode[%d] Inaccurate Estimation-Rows: \"%s\" A-Rows:%.0f, E-Rows:%.0f",
|
|
node->plan->plan_node_id,
|
|
OperatorName(node->plan),
|
|
actual_rows,
|
|
estimated_rows);
|
|
|
|
/*
|
|
* Store it in current plan node's issue list anyway to help issue-deduplicating
|
|
* process
|
|
*/
|
|
node->plan_issues = lappend(node->plan_issues, plan_issue);
|
|
}
|
|
|
|
/*
|
|
* Check if upper steps is already inaccurate to avoid reporting duplicated issue,
|
|
* here return NULL as not reporting issue.
|
|
*/
|
|
if (DuplicateWithUnderlyingPlanNodes(node, InaccurateEstimationRowNum)) {
|
|
return NULL;
|
|
}
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Check if DataSkew issues happned on target plan node
|
|
* - Parameter:
|
|
* @node: plan node to check data skew issues
|
|
* @min_dn_tuples: min processed-tuples datanodes
|
|
* @max_dn_tuples: max processed-tuples datanodes
|
|
*
|
|
* - Note: Since DatgaSkew issue may be rooted from upper layer, we have to do
|
|
* issue-deduplicate before return it to end user, it says if DataSkew is caused
|
|
* by upper steps instead of current one, we are igoring it.
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckDataSkew(PlanState* node, double min_dn_tuples, double max_dn_tuples)
|
|
{
|
|
Assert(node != NULL);
|
|
QueryPlanIssueDesc* plan_issue = NULL;
|
|
|
|
/*
|
|
* Check if in target plan node the num of processed tuples is differed over
|
|
* pre-defined threshold (10 times)
|
|
*/
|
|
if ((min_dn_tuples == 0 || max_dn_tuples / min_dn_tuples >= DataSkew_Scale_Threshold) &&
|
|
max_dn_tuples >= DataSkew_Rows_Threshold) {
|
|
plan_issue = CreateQueryPlanIssue(node, DataSkew);
|
|
|
|
appendStringInfo(plan_issue->issue_suggestion,
|
|
"PlanNode[%d] DataSkew:\"%s\", min_dn_tuples:%.0f, max_dn_tuples:%.0f",
|
|
node->plan->plan_node_id,
|
|
OperatorName(node->plan),
|
|
min_dn_tuples,
|
|
max_dn_tuples);
|
|
|
|
/*
|
|
* Store it in current plan node's issue list anyway to help issue-deduplicating
|
|
* process
|
|
*/
|
|
node->plan_issues = lappend(node->plan_issues, plan_issue);
|
|
}
|
|
|
|
/*
|
|
* Check if upper steps is already skewed to avoid dup issue report, here return
|
|
* NULL as not reporting issue.
|
|
*/
|
|
if (DuplicateWithUnderlyingPlanNodes(node, DataSkew)) {
|
|
return NULL;
|
|
}
|
|
|
|
return plan_issue;
|
|
}
|
|
|
|
/*
|
|
* - Brief: Check if unsuitable scan method issues happened on target plan node
|
|
* - Parameter:
|
|
* @node: plan node to check scan method issues
|
|
* @dnNum: number of datanodes where the node runs
|
|
* @totalTuples: total produced tuples
|
|
* @totalFiltereds: total removed tuples
|
|
* @isIndex: is index scan or not
|
|
* @isCstore: is cstore or not
|
|
* - Return:
|
|
* @not-null: the plan node has "Scan method is not suitable" issue
|
|
* @null: the plan node does not has "Scan method is not suitable" issue
|
|
*/
|
|
static inline QueryPlanIssueDesc* CheckUnsuitableScanMethod(
|
|
PlanState* node, int dnNum, double totalTuples, double totalFiltereds, bool isIndex, bool isCstore)
|
|
{
|
|
Assert(node != NULL);
|
|
QueryPlanIssueDesc* issue = NULL;
|
|
Assert(dnNum > 0);
|
|
double output = totalTuples / dnNum;
|
|
double input = output + totalFiltereds / dnNum;
|
|
double rate = output / input;
|
|
|
|
/*
|
|
* For indexscan, if there are too many tuples produced, we should use seq scan.
|
|
* For seqscan, if threr are few tuples produced from many ones, we should use index scan.
|
|
*/
|
|
if ((isIndex && isCstore && output > SCANMETHOD_VECROWS_THRESHOLD && rate > SCANMETHOD_VECRATE_THRESHOLD) ||
|
|
(isIndex && !isCstore && output > SCANMETHOD_ROWS_THRESHOLD && rate > SCANMETHOD_RATE_THRESHOLD) ||
|
|
(!isIndex && isCstore && input > SCANMETHOD_INPUT_THRESHOLD && output <= SCANMETHOD_VECROWS_THRESHOLD &&
|
|
rate < SCANMETHOD_VECRATE_THRESHOLD) ||
|
|
(!isIndex && !isCstore && input > SCANMETHOD_INPUT_THRESHOLD && output <= SCANMETHOD_ROWS_THRESHOLD &&
|
|
rate < SCANMETHOD_RATE_THRESHOLD)) {
|
|
issue = CreateQueryPlanIssue(node, UnsuitableScanMethod);
|
|
|
|
appendStringInfo(issue->issue_suggestion,
|
|
"PlanNode[%d] Indexscan is %s used:\"%s\", output:%.0f, filtered:%.0f, rate:%.5f",
|
|
node->plan->plan_node_id,
|
|
isIndex ? "not properly" : "ought to be",
|
|
OperatorName(node->plan),
|
|
totalTuples,
|
|
totalFiltereds,
|
|
rate);
|
|
|
|
node->plan_issues = lappend(node->plan_issues, issue);
|
|
}
|
|
|
|
return issue;
|
|
}
|
|
|
|
/* ----------------------------- External routine definitions -------------------------- */
|
|
/*
|
|
* - Brief: Main entering pointer of SQL Self-Tuning for Query-Level issue
|
|
* diagnosis, invoked when query plan is ready but execution not start
|
|
*
|
|
* - Parameter:
|
|
* @querydesc: query desc to hold plannedstmt objects
|
|
*
|
|
* - Return:
|
|
* List found found plan issues (query level)
|
|
*/
|
|
List* PlanAnalyzerQuery(QueryDesc* querydesc)
|
|
{
|
|
List* issueResults = NIL;
|
|
QueryPlanIssueDesc* issueResultsItem = NULL;
|
|
|
|
/* Try to analyze issues of Not Plan-Shipping */
|
|
if ((issueResultsItem = CheckQueryNotPlanShipping()) != NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
|
|
/* Try to analyze issues of Single/Multi-Column statistic not collected */
|
|
if ((issueResultsItem = CheckMultiColumnStatsNotCollect(querydesc)) != NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
|
|
return issueResults;
|
|
}
|
|
|
|
/*
|
|
* - Brief:
|
|
* Main function entrance of SQL Self-Tuning for Operator-Level issue analysis,
|
|
* it is invoded when execution is finished and runtime information is captured in
|
|
* globalInstrumentation framework.
|
|
*
|
|
* The process works in a bottom up recursive way, first reach the leaf node, we
|
|
* have to do so as we are wanting to make iteration the whole plantree one time
|
|
* while issue-depuing is required. For issue-deduping, it says when under node is
|
|
* already "Data Skew" or "E-Rows Inaccurate", we only report the root of issue.
|
|
*
|
|
* - Parameter:
|
|
* @querydesc: query desc to hold plannedstmt objects
|
|
* @plan: current(top) plan node being analyzed
|
|
*
|
|
* - Return:
|
|
* List of found plan issues (operator level)
|
|
*/
|
|
List* PlanAnalyzerOperator(QueryDesc* querydesc, PlanState* planstate)
|
|
{
|
|
PlanState* ps = planstate;
|
|
Plan* plan = NULL;
|
|
List* issueResults = NIL;
|
|
QueryPlanIssueDesc* issueResultsItem = NULL;
|
|
bool skip_inaccurate_erows = false;
|
|
|
|
/* Return NIL when we get the end of plantree */
|
|
if (ps == NULL) {
|
|
return NIL;
|
|
}
|
|
|
|
/* Return NIL when instrumentation framework is not setup yet */
|
|
if (u_sess->instr_cxt.global_instr == NULL) {
|
|
return NIL;
|
|
}
|
|
|
|
plan = ps->plan;
|
|
|
|
/*
|
|
* Skip some scenarios where A-Rows/E-Rows does not reflect the actual return row numbers
|
|
* - [1]. Subplan contains Limit/VecLimit operator, we are stopping to analyze current operator
|
|
* and its under operators
|
|
* - [2]. Material/VecMaterial indicates plan node re-scan, the A-rows does not reflect actual
|
|
* return rows. Note: An inner plantree of MergeJoin/NestLoop is also a re-scan case, in
|
|
* current release we only report material node where cover most of rescan scenarios, we
|
|
* will improve the overall re-scan case by enhanceing explain-perf frameword to tell us
|
|
* scaned rows & return rows
|
|
*/
|
|
if (nodeTag(plan) == T_Limit || nodeTag(plan) == T_VecLimit) {
|
|
elog(DEBUG1,
|
|
"Skip analyze DataSkew, Inaccurate E-Rows for plan nodes under PlanNode[%d]:%s",
|
|
plan->plan_node_id,
|
|
OperatorName(plan));
|
|
|
|
return NIL;
|
|
} else if (nodeTag(plan) == T_Material || nodeTag(plan) == T_VecMaterial) {
|
|
/*
|
|
* For Material plan node, we are going to skip analyze e-rows inaccurate issues,
|
|
* as it might be re-scaned and a-rows does not reflect actual num of rows that
|
|
* estimated during query-planning
|
|
*/
|
|
elog(DEBUG1,
|
|
"Skip analyze Inaccurate E-Rows due to Re-Scan for plan nodes under PlanNode[%d]:%s",
|
|
plan->plan_node_id,
|
|
OperatorName(plan));
|
|
skip_inaccurate_erows = true;
|
|
}
|
|
|
|
/* Output debug information */
|
|
elog(DEBUG1, "QueryAnalayzer check PlanNode[%d]:%s", plan->plan_node_id, OperatorName(plan));
|
|
|
|
/* Expaned subplans plan nodes, Init SubPlanState nodes (un-correlated expr subselects) */
|
|
if (ps->initPlan) {
|
|
ListCell* lc = NULL;
|
|
foreach (lc, ps->initPlan) {
|
|
SubPlanState* sps = (SubPlanState*)lfirst(lc);
|
|
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, (PlanState*)sps->planstate));
|
|
}
|
|
}
|
|
|
|
/* Recursively analyze left plan tree */
|
|
if (ps->lefttree) {
|
|
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, ps->lefttree));
|
|
}
|
|
|
|
/* Recursively analyze right plan tree */
|
|
if (ps->righttree) {
|
|
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, ps->righttree));
|
|
}
|
|
|
|
/* Find plan list in special plan nodes. */
|
|
List* ps_list = getPlanSubNodes(ps);
|
|
|
|
/* subPlan-s for SubPlanState nodes in my expressions (correlated query) */
|
|
if (ps->subPlan) {
|
|
ListCell* lc = NULL;
|
|
foreach (lc, ps->subPlan) {
|
|
SubPlanState* sps = (SubPlanState*)lfirst(lc);
|
|
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, (PlanState*)sps->planstate));
|
|
}
|
|
}
|
|
|
|
if (ps_list != NIL) {
|
|
ListCell* lc = NULL;
|
|
|
|
/* Go ahead to analyze underlying plan nodes as we don get leaf plan node yet */
|
|
foreach (lc, ps_list) {
|
|
PlanState* plan_state = (PlanState*)lfirst(lc);
|
|
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, plan_state));
|
|
}
|
|
|
|
list_free(ps_list);
|
|
}
|
|
|
|
{
|
|
/* Ignore issue when the plan exec on coords */
|
|
if (plan->exec_type == EXEC_ON_COORDS || plan->exec_type == EXEC_ON_NONE) {
|
|
return issueResults;
|
|
}
|
|
/* if plan->exec_type == EXEC_ON_ALL_NODES and m_planIdOffsetArray[plan->plan_node_id - 1] == 0
|
|
* means plan exec on CN, it should ignore.
|
|
*/
|
|
int* m_planIdOffsetArray = u_sess->instr_cxt.global_instr->get_planIdOffsetArray();
|
|
if (plan->exec_type == EXEC_ON_ALL_NODES && m_planIdOffsetArray[plan->plan_node_id - 1] == 0) {
|
|
return issueResults;
|
|
}
|
|
|
|
/* Start to analyze current plan nodes */
|
|
int dn_num = 0;
|
|
int dn_index = 0;
|
|
double dn_tuples = 0.0;
|
|
double dnFiltereds = 0.0;
|
|
double min_dn_tuples = get_float8_infinity();
|
|
double max_dn_tuples = 0.0;
|
|
double total_tuples = 0.0;
|
|
double totalFiltereds = 0.0;
|
|
bool write_file = false;
|
|
int dop = plan->dop;
|
|
ListCell* nodeitem = NULL;
|
|
List* exec_nodeList = NIL;
|
|
ExecNodes* exec_nodes = NULL;
|
|
|
|
/* Extract info from u_sess->instr_cxt.global_instr and plan for sql tuning */
|
|
exec_nodes = ng_get_dest_execnodes(plan);
|
|
exec_nodeList = exec_nodes->nodeList;
|
|
|
|
dn_num = list_length(exec_nodeList);
|
|
|
|
foreach (nodeitem, exec_nodeList) {
|
|
dn_index = lfirst_int(nodeitem);
|
|
dn_tuples = 0.0;
|
|
dnFiltereds = 0.0;
|
|
for (int j = 0; j < dop; j++) {
|
|
Instrumentation* node_instr =
|
|
u_sess->instr_cxt.global_instr->getInstrSlot(dn_index, plan->plan_node_id, j);
|
|
/* In special, if node_instr is NULL means plan is not executed ,it should return and exit. */
|
|
if (node_instr == NULL)
|
|
return issueResults;
|
|
dn_tuples += node_instr->ntuples;
|
|
|
|
/* index scan: filter + recheck remove tuples */
|
|
dnFiltereds += node_instr->nfiltered1 + node_instr->nfiltered2;
|
|
|
|
/* Count the bloomFilterRows that the Optimizer doesn't count
|
|
* for CheckDataSkew and CheckInaccurateEstimatedRows
|
|
*/
|
|
if (node_instr->bloomFilterRows > 0 && node_instr->bloomFilterBlocks == 0) {
|
|
dn_tuples += node_instr->bloomFilterRows;
|
|
}
|
|
|
|
/*
|
|
* Prepare to report Large Table as Innser in HashJoin issue, in order to
|
|
* report such kind of issue gracefully, we need confirm if inner table
|
|
* encountered as spill issue first.
|
|
*/
|
|
if (IsA(plan, VecHashJoin) && !write_file) {
|
|
/*
|
|
* For VecHashJoin, the temp file spilling info is recored in current
|
|
* T_VecHashJoin node
|
|
*/
|
|
write_file = node_instr->sorthashinfo.hash_writefile;
|
|
} else if (nodeTag(plan) == T_HashJoin && !write_file) {
|
|
Assert(plan->righttree->dop == dop);
|
|
/*
|
|
* For RowHashJoin, the temp file spilling info is recorded in Hash node of its
|
|
* inner(righttree) branch, so we need get instr obejct there
|
|
*/
|
|
Instrumentation* instrument =
|
|
u_sess->instr_cxt.global_instr->getInstrSlot(dn_index, plan->righttree->plan_node_id, j);
|
|
if (instrument != NULL) {
|
|
/* Say HashJoin's inner spilled here */
|
|
write_file = (instrument->sorthashinfo.nbatch > 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Update DN level min/max tuples and total tuples */
|
|
min_dn_tuples = Min(dn_tuples, min_dn_tuples);
|
|
max_dn_tuples = Max(dn_tuples, max_dn_tuples);
|
|
total_tuples += dn_tuples;
|
|
totalFiltereds += dnFiltereds;
|
|
}
|
|
|
|
switch (nodeTag(plan)) {
|
|
case T_VecHashJoin:
|
|
case T_HashJoin: {
|
|
/* Check large table runs as hashjoin inner */
|
|
if (write_file &&
|
|
(issueResultsItem = CheckLargeTableInHashJoinInner(
|
|
ps, dn_num, ComputeSumOfDNTuples(plan->righttree), ComputeSumOfDNTuples(plan->lefttree))) !=
|
|
NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case T_VecNestLoop:
|
|
case T_NestLoop: {
|
|
bool is_eq_nestloop_only = IsEqualConditionandNestLoopOnly(plan);
|
|
/* Check large table runs as nestloop with equalness join-cond */
|
|
if (is_eq_nestloop_only &&
|
|
(issueResultsItem = CheckLargeTableInNestloopWithEqualCondition(ps, dn_num,
|
|
Max(ComputeSumOfDNTuples(plan->righttree), ComputeSumOfDNTuples(plan->lefttree)))) != NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case T_VecStream:
|
|
case T_Stream: {
|
|
/* Check large table runs as Broadcast */
|
|
if ((issueResultsItem = CheckLargeTableInBroadcast(ps, dn_num, total_tuples)) != NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case T_SeqScan: {
|
|
/* Check unsuitable seq scan */
|
|
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, false, false);
|
|
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
|
|
break;
|
|
}
|
|
case T_IndexScan:
|
|
case T_IndexOnlyScan:
|
|
case T_BitmapIndexScan: {
|
|
/* Check unsuitable index scan */
|
|
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, true, false);
|
|
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
|
|
break;
|
|
}
|
|
case T_CStoreScan: {
|
|
/* Check unsuitable seq scan for cstore */
|
|
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, false, true);
|
|
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
|
|
break;
|
|
}
|
|
case T_CStoreIndexScan: {
|
|
/* Check unsuitable index scan for cstore */
|
|
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, true, true);
|
|
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
|
|
break;
|
|
}
|
|
default: {
|
|
/* Nothing to do just keep compiler silent */
|
|
}
|
|
}
|
|
|
|
/* Analyze data skew issue */
|
|
if ((issueResultsItem = CheckDataSkew(ps, min_dn_tuples, max_dn_tuples)) != NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
|
|
/* Analyze Estimation-Rows is inaccurate issue */
|
|
if (!skip_inaccurate_erows &&
|
|
(issueResultsItem = CheckInaccurateEstimatedRows(ps, dn_num, total_tuples)) != NULL) {
|
|
issueResults = lappend(issueResults, issueResultsItem);
|
|
}
|
|
}
|
|
|
|
return issueResults;
|
|
}
|
|
|
|
/*
|
|
* Store plan issues into session-level memory context(for output)
|
|
*/
|
|
void RecordQueryPlanIssues(const List* results)
|
|
{
|
|
ListCell* lc = NULL;
|
|
errno_t rc;
|
|
|
|
if (u_sess->attr.attr_resource.resource_track_level < RESOURCE_TRACK_QUERY || results == NIL) {
|
|
return;
|
|
}
|
|
|
|
char max_issue_desc[MAX_OPTIMIZER_WARNING_LEN];
|
|
|
|
rc = memset_s(max_issue_desc, MAX_OPTIMIZER_WARNING_LEN, '\0', MAX_OPTIMIZER_WARNING_LEN);
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
int current = 0;
|
|
|
|
/* Keep existing warning info which found in SQL_Planned phase */
|
|
if (t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue) {
|
|
Assert(u_sess->attr.attr_resource.resource_track_level == RESOURCE_TRACK_OPERATOR);
|
|
|
|
rc = sprintf_s((char*)max_issue_desc, MAX_OPTIMIZER_WARNING_LEN, "%s\n",
|
|
t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue);
|
|
securec_check_ss_c(rc, "\0", "\0");
|
|
|
|
current += strlen(t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue) + 1;
|
|
|
|
/* Free original used memory space */
|
|
pfree_ext(t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue);
|
|
}
|
|
|
|
/* Scan plan issue list to store them */
|
|
foreach (lc, results) {
|
|
QueryPlanIssueDesc* issue = (QueryPlanIssueDesc*)lfirst(lc);
|
|
int issue_str_len = strlen(issue->issue_suggestion->data);
|
|
|
|
/* Check if we hit max allowed planner issue buffer length */
|
|
if (MAX_OPTIMIZER_WARNING_LEN - current <= issue_str_len + 1) {
|
|
ereport(LOG,
|
|
(errmodule(MOD_OPT),
|
|
(errmsg("Planner issue report is truncated, the rest of planner issues will be skipped"))));
|
|
break;
|
|
}
|
|
|
|
rc = sprintf_s((char*)max_issue_desc + current, MAX_OPTIMIZER_WARNING_LEN - current, "%s\n",
|
|
issue->issue_suggestion->data);
|
|
securec_check_ss_c(rc, "\0", "\0");
|
|
|
|
current += strlen(issue->issue_suggestion->data) + 1;
|
|
DeleteQueryPlanIssue(issue);
|
|
}
|
|
|
|
/* Hold the planer issue info in memory context of workload manager */
|
|
AutoContextSwitch memSwitch(g_instance.wlm_cxt->query_resource_track_mcxt);
|
|
t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue = pstrdup(max_issue_desc);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* - Brief: return the more readable operator name
|
|
*/
|
|
static char* OperatorName(const Plan* plan)
|
|
{
|
|
char* pname = NULL;
|
|
char* sname = NULL;
|
|
char* strategy = NULL;
|
|
char* operation = NULL;
|
|
char* pt_option = NULL;
|
|
char* pt_operation = NULL;
|
|
|
|
GetPlanNodePlainText((Plan*)plan, &pname, &sname, &strategy, &operation, &pt_operation, &pt_option);
|
|
|
|
return sname;
|
|
}
|