Files
openGauss-server/src/gausskernel/optimizer/util/plananalyzer.cpp
2022-09-03 16:22:35 +08:00

1090 lines
39 KiB
C++

/* -------------------------------------------------------------------------
*
* plananalyzer.cpp
* Plan Analyzer for SQL self diagnosis & tuning
*
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/gausskernel/optimizer/utils/plananalyzer.cpp
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include "commands/prepare.h"
#include "executor/exec/execStream.h"
#include "parser/parse_relation.h"
#ifdef PGXC
#include "optimizer/planmain.h"
#endif
#include "utils/builtins.h"
#include "utils/lsyscache.h"
/* -- Criteria/Threshold definition of plan issue -- */
/* Basic factor that identifies the number of Per-DN tuples for "Large Table" */
#define LargeTableFactor 100000
/* Const variables for Large Table in Broadcast */
#define LargerTable_Broadcast_Threshold LargeTableFactor
/* Const variables for Large table as hashjoin's Inner */
#define LTAsHashJoinInner_Rows_Threshold LargeTableFactor
#define LTAsHashJoinInner_Scale_Threshold 10
/* Const variables for large Table in nestloop with equal condition */
#define LTWithEqualCondInNestLoop_Rows_Threshold LargeTableFactor
/* Const variables for Data Skew */
#define DataSkew_Rows_Threshold 100000
#define DataSkew_Scale_Threshold 10
/* Const variables for E-Rows not accurate */
#define EstimationRows_Threshold 100000
#define EstimationRows_Scale_Threshold 10
/* Const variables for Scan Method */
#define SCANMETHOD_ROWS_THRESHOLD 10000
#define SCANMETHOD_RATE_THRESHOLD 0.001
#define SCANMETHOD_VECROWS_THRESHOLD 100
#define SCANMETHOD_VECRATE_THRESHOLD 0.0001
#define SCANMETHOD_INPUT_THRESHOLD 10000
static char* OperatorName(const Plan* plan);
extern double get_float8_infinity(void);
extern bool check_relation_analyzed(Oid relid);
#define MAX_OPTIMIZER_WARNING_LEN 2048
static inline QueryPlanIssueDesc* CheckQueryNotPlanShipping();
static bool DuplicateWithUnderlyingPlanNodes(PlanState* node, QueryIssueType type);
static List* getPlanSubNodes(const PlanState* node);
static inline QueryPlanIssueDesc* CheckLargeTableInBroadcast(PlanState* node, int dn_num, double total_tuples);
static inline QueryPlanIssueDesc* CheckLargeTableInHashJoinInner(
PlanState* node, int dn_num, double rightchild_total_rows, double leftchild_total_rows);
static inline QueryPlanIssueDesc* CheckLargeTableInNestloopWithEqualCondition(
PlanState* node, int dn_num, double Maxchild_total_size);
static inline QueryPlanIssueDesc* CheckDataSkew(PlanState* node, double min_dn_tuples, double max_dn_tuples);
static inline QueryPlanIssueDesc* CheckMultiColumnStatsNotCollect(QueryDesc* querydesc);
static inline QueryPlanIssueDesc* CheckInaccurateEstimatedRows(PlanState* node, int dn_num, double actual_rows);
static double ComputeSumOfDNTuples(Plan* node);
static QueryPlanIssueDesc* CreateQueryPlanIssue(PlanState* node, QueryIssueType type);
static void DeleteQueryPlanIssue(QueryPlanIssueDesc* query_issue);
static bool ContainsDuplicateIssues(PlanState* node, QueryIssueType type);
static bool IsEqualConditionandNestLoopOnly(Plan* plan);
/* ----------------------------- Local routine definitions ----------------------------- */
/*
* - Brief: Return true if for current join-node NestLoop/VecNestLoop is only option
*/
static bool IsEqualConditionandNestLoopOnly(Plan* plan)
{
Assert(nodeTag(plan) == T_NestLoop || nodeTag(plan) == T_VecNestLoop);
ListCell* lc = NULL;
NestLoop* nestloop = (NestLoop*)plan;
foreach (lc, nestloop->join.joinqual) {
if (isEqualExpr((Node*)lfirst(lc))) {
return true;
}
}
return false;
}
/*
* - Brief: Return plan node's underlying plan nodes that is not create under
* left/right plan tree
*/
static List* getPlanSubNodes(const PlanState* node)
{
List* ps_list = NIL;
if (node == NULL) {
return NIL;
}
/* Find plan list in special plan nodes. */
switch (nodeTag(node->plan)) {
case T_Append:
case T_VecAppend: {
AppendState* append = (AppendState*)node;
for (int i = 0; i < append->as_nplans; i++) {
PlanState* plan = append->appendplans[i];
ps_list = lappend(ps_list, plan);
}
} break;
case T_ModifyTable:
case T_VecModifyTable: {
ModifyTableState* mt = (ModifyTableState*)node;
for (int i = 0; i < mt->mt_nplans; i++) {
PlanState* plan = mt->mt_plans[i];
ps_list = lappend(ps_list, plan);
}
} break;
case T_MergeAppend:
case T_VecMergeAppend: {
MergeAppendState* ma = (MergeAppendState*)node;
for (int i = 0; i < ma->ms_nplans; i++) {
PlanState* plan = ma->mergeplans[i];
ps_list = lappend(ps_list, plan);
}
} break;
case T_BitmapAnd:
case T_CStoreIndexAnd: {
BitmapAndState* ba = (BitmapAndState*)node;
for (int i = 0; i < ba->nplans; i++) {
PlanState* plan = ba->bitmapplans[i];
ps_list = lappend(ps_list, plan);
}
} break;
case T_BitmapOr:
case T_CStoreIndexOr: {
BitmapOrState* bo = (BitmapOrState*)node;
for (int i = 0; i < bo->nplans; i++) {
PlanState* plan = bo->bitmapplans[i];
ps_list = lappend(ps_list, plan);
}
} break;
case T_SubqueryScan:
case T_VecSubqueryScan: {
SubqueryScanState* ss = (SubqueryScanState*)node;
PlanState* plan = ss->subplan;
ps_list = lappend(ps_list, plan);
} break;
default: {
ps_list = NIL;
} break;
}
return ps_list;
}
/*
* - Brief: Return true if target plan contains given type of issues that its underlying
* plan node already have
*/
static bool DuplicateWithUnderlyingPlanNodes(PlanState* node, QueryIssueType type)
{
bool is_duplicate = false;
/* First check node's left tree node is already have same issue */
if (node->lefttree != NULL && ContainsDuplicateIssues(node->lefttree, type)) {
return true;
}
/* Second check node's right tree node is already have same issue */
if (node->righttree != NULL && ContainsDuplicateIssues(node->righttree, type)) {
return true;
}
/* Check if node's other underlying nodes is already have same issue e.g. setop */
List* node_list = getPlanSubNodes(node);
ListCell* lc = NULL;
if (node_list != NIL) {
foreach (lc, node_list) {
PlanState* ps = (PlanState*)lfirst(lc);
if (ContainsDuplicateIssues(ps, type)) {
is_duplicate = true;
break;
}
}
list_free(node_list);
}
return is_duplicate;
}
/*
* - Brief: Return true if target plan contains given type of issues a.w.k. considered
* as duplicated issue nodes
*/
static bool ContainsDuplicateIssues(PlanState* node, QueryIssueType type)
{
ListCell* lc = NULL;
if (node->plan_issues == NIL) {
return false;
}
foreach (lc, node->plan_issues) {
QueryPlanIssueDesc* issue = (QueryPlanIssueDesc*)lfirst(lc);
if (issue->issue_type == type) {
return true;
}
}
return false;
}
/*
* - Brief: compute total number of processed tuples of current plan node
*/
static double ComputeSumOfDNTuples(Plan* node)
{
int dn_index = 0;
int dop = node->dop;
double total_tuples = 0.0;
ListCell* nodeitem = NULL;
List* exec_nodeList = NIL;
ExecNodes* exec_nodes = NULL;
exec_nodes = ng_get_dest_execnodes(node);
exec_nodeList = exec_nodes->nodeList;
foreach (nodeitem, exec_nodeList) {
dn_index = lfirst_int(nodeitem);
for (int j = 0; j < dop; j++) {
/* avoid for threadinstr is NULL */
Instrumentation* instr = u_sess->instr_cxt.global_instr->getInstrSlot(dn_index, node->plan_node_id, j);
if (instr != NULL)
total_tuples += instr->ntuples;
}
}
return total_tuples;
}
/*
* - Brief: Function to help create plan issues
*/
static QueryPlanIssueDesc* CreateQueryPlanIssue(PlanState* node, QueryIssueType type)
{
QueryPlanIssueDesc* plan_issue = (QueryPlanIssueDesc*)palloc0(sizeof(QueryPlanIssueDesc));
plan_issue->issue_type = type;
plan_issue->issue_plannode = node;
plan_issue->issue_suggestion = makeStringInfo();
return plan_issue;
}
/*
* - Brief: Function to help delete plan issues
*/
static void DeleteQueryPlanIssue(QueryPlanIssueDesc* query_issue)
{
if (query_issue == NULL) {
return;
}
if (query_issue->issue_suggestion) {
if (query_issue->issue_suggestion->data != NULL) {
pfree_ext(query_issue->issue_suggestion->data);
}
pfree_ext(query_issue->issue_suggestion);
}
pfree_ext(query_issue);
}
/*
* - Brief: Determine whether current Plan node has "not plan-shipping" issue
* - Parameter:
* @node: to be checked plan node
* - Return:
* @not-null: the plan node has "not plan-shipping" issue
* @null: the plan node does not has "not plan-shipping" issue
*/
static inline QueryPlanIssueDesc* CheckQueryNotPlanShipping(void)
{
QueryPlanIssueDesc* plan_issue = NULL;
/* return null immediately when the plan can be shipped */
if (0 == strlen(u_sess->opt_cxt.not_shipping_info->not_shipping_reason)) {
return NULL;
}
/* Start standard plan node-level check for query not plan-shipping */
plan_issue = CreateQueryPlanIssue(NULL, QueryShipping);
appendStringInfo(plan_issue->issue_suggestion,
"SQL is not plan-shipping, reason : \"%s\"",
u_sess->opt_cxt.not_shipping_info->not_shipping_reason);
errno_t errorno = memset_s(
u_sess->opt_cxt.not_shipping_info->not_shipping_reason, NOTPLANSHIPPING_LENGTH, '\0', NOTPLANSHIPPING_LENGTH);
securec_check_c(errorno, "\0", "\0");
return plan_issue;
}
/*
* - Brief: Determine whether current Plan node has "large table broadcast" issue
* - Parameter:
* @node: to be checked plan node
* - Return:
* @not-null: the plan node has "large table broadcast" issue
* @null: the plan node does not has "large table broadcast" issue
*/
static inline QueryPlanIssueDesc* CheckLargeTableInBroadcast(PlanState* node, int dn_num, double total_tuples)
{
Assert(node != NULL);
Stream* sn = NULL;
QueryPlanIssueDesc* plan_issue = NULL;
sn = (Stream*)node->plan;
if (sn->type == STREAM_BROADCAST && total_tuples >= (double)(dn_num * LargerTable_Broadcast_Threshold)) {
plan_issue = CreateQueryPlanIssue(node, LargeTableBroadCast);
appendStringInfo(plan_issue->issue_suggestion,
"PlanNode[%d] Large Table in Broadcast \"%s\"",
node->plan->plan_node_id,
OperatorName(node->plan));
node->plan_issues = lappend(node->plan_issues, plan_issue);
}
return plan_issue;
}
/*
* - Brief: Determine whether current Plan node has "large table as hashjoin inner" issue
* - Parameter:
* @node: to be checked plan node
* - Return:
* @not-null: the plan node has "large table as hashjoin inner" issue
* @null: the plan node does not has "large table as hashjoin inner" issue
*/
static inline QueryPlanIssueDesc* CheckLargeTableInHashJoinInner(
PlanState* node, int dn_num, double rightchild_total_rows, double leftchild_total_rows)
{
Assert(node != NULL);
QueryPlanIssueDesc* plan_issue = NULL;
if (rightchild_total_rows >= (double)(dn_num * LTAsHashJoinInner_Rows_Threshold) &&
((leftchild_total_rows == 0.0 ||
(rightchild_total_rows / leftchild_total_rows >= LTAsHashJoinInner_Scale_Threshold)))) {
plan_issue = CreateQueryPlanIssue(node, LargeTableAsHashJoinInner);
appendStringInfo(plan_issue->issue_suggestion,
"PlanNode[%d] Large Table is INNER in HashJoin \"%s\"",
node->plan->plan_node_id,
OperatorName(node->plan));
node->plan_issues = lappend(node->plan_issues, plan_issue);
}
return plan_issue;
}
/*
* - Brief: Determine whether current Plan node has "large table in nestloop with equal
* join condition" issue
* - Parameter:
* @node: to be checked plan node
* - Return:
* @not-null: the plan node has "large table in nestloop with equal join cond" issue
* @null: the plan node does not has "large table in nestloop with equal join cond" issue
*/
static inline QueryPlanIssueDesc* CheckLargeTableInNestloopWithEqualCondition(
PlanState* node, int dn_num, double Maxchild_total_size)
{
Assert(node != NULL);
QueryPlanIssueDesc* plan_issue = NULL;
if (Maxchild_total_size >= (double)(dn_num * LTWithEqualCondInNestLoop_Rows_Threshold)) {
plan_issue = CreateQueryPlanIssue(node, LargeTableWithEqualCondInNestLoop);
appendStringInfo(plan_issue->issue_suggestion,
"PlanNode[%d] Large Table with Equal-Condition use Nestloop\"%s\"",
node->plan->plan_node_id,
OperatorName(node->plan));
node->plan_issues = lappend(node->plan_issues, plan_issue);
}
return plan_issue;
}
/*
* - Brief: Determine whether current query has "multi column statistics not collect" issues
* - Parameter:
* @querydesc: query desc structure for to current SQL statements
* - Return:
* @not-null: the plan node has "multi column statistics not collect" issue
* @null: the plan node does not has "multi column statistics not collect" issue
*/
static inline QueryPlanIssueDesc* CheckMultiColumnStatsNotCollect(QueryDesc* querydesc)
{
List* noanalyze_rellist = querydesc->plannedstmt->noanalyze_rellist;
if (noanalyze_rellist == NIL) {
return NULL;
}
int relnum = 0;
ListCell* lc1 = NULL;
ListCell* lc2 = NULL;
StringInfo single_col_total = makeStringInfo();
StringInfo multi_col_msg = makeStringInfo();
StringInfo multi_col_total = makeStringInfo();
relnum = list_length(noanalyze_rellist);
QueryPlanIssueDesc* plan_issue = (QueryPlanIssueDesc*)CreateQueryPlanIssue(NULL, StatsNotCollect);
appendStringInfo(plan_issue->issue_suggestion, "Statistic Not Collect:\n");
/*
* The content for stats not-analyzed table in g_NoAnalyzeRelNameList:
* - element[1]: tableoid, attnum, attnum, attnum
* - element[2]: tableoid, attnum, attnum, attnum
* - ...
* - element[3]: tableoid, attnum, attnum, attnum
*/
foreach (lc1, noanalyze_rellist) {
List* record = (List*)lfirst(lc1);
/* Fetch first element as table oid */
Oid relid = linitial_oid((List*)linitial(record));
Relation rel = relation_open(relid, AccessShareLock);
int attrnum = list_length(record) - 1;
int token = 1;
/* Skip rel which there is no reltuples and has been analyzed */
if (0 == rel->rd_rel->reltuples && check_relation_analyzed(relid)) {
relation_close(rel, AccessShareLock);
continue;
}
/* The 1st cell is the list of rel , so skip it, and get the att id */
lc2 = lnext(list_head(record));
resetStringInfo(single_col_total);
resetStringInfo(multi_col_total);
while (lc2 != NULL) {
resetStringInfo(multi_col_msg);
List* tmp_record_list = (List*)lfirst(lc2);
/* single-col that has no statistics */
if (list_length(tmp_record_list) == 1) {
int attid = linitial_int(tmp_record_list);
if (attid == 0) {
/* attid ==0 means the whole table has no statistics, so sikp the colunm info */
appendStringInfo(plan_issue->issue_suggestion,
" %s.%s\n",
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
quote_identifier(get_rel_name(relid)));
break;
} else {
if (single_col_total->len) {
appendStringInfo(single_col_total, ",");
}
appendStringInfo(single_col_total, "%s", quote_identifier((char*)attnumAttName(rel, attid)));
}
} else { /* multi-col that has no statistics */
ListCell* lc3 = NULL;
appendStringInfo(multi_col_msg,
" %s.%s((",
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
quote_identifier(get_rel_name(relid)));
foreach (lc3, tmp_record_list) {
int multi_col_attid = lfirst_int(lc3);
appendStringInfo(multi_col_msg, "%s", quote_identifier((char*)attnumAttName(rel, multi_col_attid)));
if (lnext(lc3)) {
appendStringInfo(multi_col_msg, ",");
} else {
appendStringInfo(multi_col_msg, "))");
}
}
appendStringInfo(multi_col_total, "%s", multi_col_msg->data);
}
if (token == attrnum) {
if (single_col_total->len) {
appendStringInfo(plan_issue->issue_suggestion,
" %s.%s(",
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
quote_identifier(get_rel_name(relid)));
appendStringInfo(plan_issue->issue_suggestion, "%s)", single_col_total->data);
}
if (multi_col_total->len) {
appendStringInfo(plan_issue->issue_suggestion, "%s", multi_col_total->data);
}
if (!single_col_total->len && !multi_col_total->len) {
appendStringInfo(plan_issue->issue_suggestion,
" %s.%s",
quote_identifier(get_namespace_name(RelationGetNamespace(rel))),
quote_identifier(get_rel_name(relid)));
}
appendStringInfo(plan_issue->issue_suggestion, "\n");
}
token++;
lc2 = lnext(lc2);
}
relation_close(rel, AccessShareLock);
}
return plan_issue;
}
/*
* - Brief: Check inaccurate A-Rows/E-Rows estimation issue on target plan node
* - Parameter:
* @node: plan node to check inaccurate e-rows
* @dn_num: number of datanodes where the node runs
* @actual_rows: total actual number of rows
*
* - Note: Since E-Rows not accurate issue may be rooted from upper layer, we have
* to do issue-deduplicate before return it to end user, it says if DataSkew is caused
* by upper steps instead of current one, we are igoring it.
*/
static inline QueryPlanIssueDesc* CheckInaccurateEstimatedRows(PlanState* node, int dn_num, double actual_rows)
{
Assert(node != NULL);
QueryPlanIssueDesc* plan_issue = NULL;
double estimated_rows = node->plan->plan_rows;
/*
* Determine if E-Rows over-estimated OR under-estimated beyond pre-defined
* threshold(10 times).
*/
if (Max(estimated_rows, actual_rows) >= dn_num * EstimationRows_Threshold &&
(estimated_rows / actual_rows >= EstimationRows_Scale_Threshold ||
actual_rows / estimated_rows >= EstimationRows_Scale_Threshold)) {
plan_issue = CreateQueryPlanIssue(node, InaccurateEstimationRowNum);
appendStringInfo(plan_issue->issue_suggestion,
"PlanNode[%d] Inaccurate Estimation-Rows: \"%s\" A-Rows:%.0f, E-Rows:%.0f",
node->plan->plan_node_id,
OperatorName(node->plan),
actual_rows,
estimated_rows);
/*
* Store it in current plan node's issue list anyway to help issue-deduplicating
* process
*/
node->plan_issues = lappend(node->plan_issues, plan_issue);
}
/*
* Check if upper steps is already inaccurate to avoid reporting duplicated issue,
* here return NULL as not reporting issue.
*/
if (DuplicateWithUnderlyingPlanNodes(node, InaccurateEstimationRowNum)) {
return NULL;
}
return plan_issue;
}
/*
* - Brief: Check if DataSkew issues happned on target plan node
* - Parameter:
* @node: plan node to check data skew issues
* @min_dn_tuples: min processed-tuples datanodes
* @max_dn_tuples: max processed-tuples datanodes
*
* - Note: Since DatgaSkew issue may be rooted from upper layer, we have to do
* issue-deduplicate before return it to end user, it says if DataSkew is caused
* by upper steps instead of current one, we are igoring it.
*/
static inline QueryPlanIssueDesc* CheckDataSkew(PlanState* node, double min_dn_tuples, double max_dn_tuples)
{
Assert(node != NULL);
QueryPlanIssueDesc* plan_issue = NULL;
/*
* Check if in target plan node the num of processed tuples is differed over
* pre-defined threshold (10 times)
*/
if ((min_dn_tuples == 0 || max_dn_tuples / min_dn_tuples >= DataSkew_Scale_Threshold) &&
max_dn_tuples >= DataSkew_Rows_Threshold) {
plan_issue = CreateQueryPlanIssue(node, DataSkew);
appendStringInfo(plan_issue->issue_suggestion,
"PlanNode[%d] DataSkew:\"%s\", min_dn_tuples:%.0f, max_dn_tuples:%.0f",
node->plan->plan_node_id,
OperatorName(node->plan),
min_dn_tuples,
max_dn_tuples);
/*
* Store it in current plan node's issue list anyway to help issue-deduplicating
* process
*/
node->plan_issues = lappend(node->plan_issues, plan_issue);
}
/*
* Check if upper steps is already skewed to avoid dup issue report, here return
* NULL as not reporting issue.
*/
if (DuplicateWithUnderlyingPlanNodes(node, DataSkew)) {
return NULL;
}
return plan_issue;
}
/*
* - Brief: Check if unsuitable scan method issues happened on target plan node
* - Parameter:
* @node: plan node to check scan method issues
* @dnNum: number of datanodes where the node runs
* @totalTuples: total produced tuples
* @totalFiltereds: total removed tuples
* @isIndex: is index scan or not
* @isCstore: is cstore or not
* - Return:
* @not-null: the plan node has "Scan method is not suitable" issue
* @null: the plan node does not has "Scan method is not suitable" issue
*/
static inline QueryPlanIssueDesc* CheckUnsuitableScanMethod(
PlanState* node, int dnNum, double totalTuples, double totalFiltereds, bool isIndex, bool isCstore)
{
Assert(node != NULL);
QueryPlanIssueDesc* issue = NULL;
Assert(dnNum > 0);
double output = totalTuples / dnNum;
double input = output + totalFiltereds / dnNum;
double rate = output / input;
/*
* For indexscan, if there are too many tuples produced, we should use seq scan.
* For seqscan, if threr are few tuples produced from many ones, we should use index scan.
*/
if ((isIndex && isCstore && output > SCANMETHOD_VECROWS_THRESHOLD && rate > SCANMETHOD_VECRATE_THRESHOLD) ||
(isIndex && !isCstore && output > SCANMETHOD_ROWS_THRESHOLD && rate > SCANMETHOD_RATE_THRESHOLD) ||
(!isIndex && isCstore && input > SCANMETHOD_INPUT_THRESHOLD && output <= SCANMETHOD_VECROWS_THRESHOLD &&
rate < SCANMETHOD_VECRATE_THRESHOLD) ||
(!isIndex && !isCstore && input > SCANMETHOD_INPUT_THRESHOLD && output <= SCANMETHOD_ROWS_THRESHOLD &&
rate < SCANMETHOD_RATE_THRESHOLD)) {
issue = CreateQueryPlanIssue(node, UnsuitableScanMethod);
appendStringInfo(issue->issue_suggestion,
"PlanNode[%d] Indexscan is %s used:\"%s\", output:%.0f, filtered:%.0f, rate:%.5f",
node->plan->plan_node_id,
isIndex ? "not properly" : "ought to be",
OperatorName(node->plan),
totalTuples,
totalFiltereds,
rate);
node->plan_issues = lappend(node->plan_issues, issue);
}
return issue;
}
/* ----------------------------- External routine definitions -------------------------- */
/*
* - Brief: Main entering pointer of SQL Self-Tuning for Query-Level issue
* diagnosis, invoked when query plan is ready but execution not start
*
* - Parameter:
* @querydesc: query desc to hold plannedstmt objects
*
* - Return:
* List found found plan issues (query level)
*/
List* PlanAnalyzerQuery(QueryDesc* querydesc)
{
List* issueResults = NIL;
QueryPlanIssueDesc* issueResultsItem = NULL;
/* Try to analyze issues of Not Plan-Shipping */
if ((issueResultsItem = CheckQueryNotPlanShipping()) != NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
/* Try to analyze issues of Single/Multi-Column statistic not collected */
if ((issueResultsItem = CheckMultiColumnStatsNotCollect(querydesc)) != NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
return issueResults;
}
/*
* - Brief:
* Main function entrance of SQL Self-Tuning for Operator-Level issue analysis,
* it is invoded when execution is finished and runtime information is captured in
* globalInstrumentation framework.
*
* The process works in a bottom up recursive way, first reach the leaf node, we
* have to do so as we are wanting to make iteration the whole plantree one time
* while issue-depuing is required. For issue-deduping, it says when under node is
* already "Data Skew" or "E-Rows Inaccurate", we only report the root of issue.
*
* - Parameter:
* @querydesc: query desc to hold plannedstmt objects
* @plan: current(top) plan node being analyzed
*
* - Return:
* List of found plan issues (operator level)
*/
List* PlanAnalyzerOperator(QueryDesc* querydesc, PlanState* planstate)
{
PlanState* ps = planstate;
Plan* plan = NULL;
List* issueResults = NIL;
QueryPlanIssueDesc* issueResultsItem = NULL;
bool skip_inaccurate_erows = false;
/* Return NIL when we get the end of plantree */
if (ps == NULL) {
return NIL;
}
/* Return NIL when instrumentation framework is not setup yet */
if (u_sess->instr_cxt.global_instr == NULL) {
return NIL;
}
plan = ps->plan;
/*
* Skip some scenarios where A-Rows/E-Rows does not reflect the actual return row numbers
* - [1]. Subplan contains Limit/VecLimit operator, we are stopping to analyze current operator
* and its under operators
* - [2]. Material/VecMaterial indicates plan node re-scan, the A-rows does not reflect actual
* return rows. Note: An inner plantree of MergeJoin/NestLoop is also a re-scan case, in
* current release we only report material node where cover most of rescan scenarios, we
* will improve the overall re-scan case by enhanceing explain-perf frameword to tell us
* scaned rows & return rows
*/
if (nodeTag(plan) == T_Limit || nodeTag(plan) == T_VecLimit) {
elog(DEBUG1,
"Skip analyze DataSkew, Inaccurate E-Rows for plan nodes under PlanNode[%d]:%s",
plan->plan_node_id,
OperatorName(plan));
return NIL;
} else if (nodeTag(plan) == T_Material || nodeTag(plan) == T_VecMaterial) {
/*
* For Material plan node, we are going to skip analyze e-rows inaccurate issues,
* as it might be re-scaned and a-rows does not reflect actual num of rows that
* estimated during query-planning
*/
elog(DEBUG1,
"Skip analyze Inaccurate E-Rows due to Re-Scan for plan nodes under PlanNode[%d]:%s",
plan->plan_node_id,
OperatorName(plan));
skip_inaccurate_erows = true;
}
/* Output debug information */
elog(DEBUG1, "QueryAnalayzer check PlanNode[%d]:%s", plan->plan_node_id, OperatorName(plan));
/* Expaned subplans plan nodes, Init SubPlanState nodes (un-correlated expr subselects) */
if (ps->initPlan) {
ListCell* lc = NULL;
foreach (lc, ps->initPlan) {
SubPlanState* sps = (SubPlanState*)lfirst(lc);
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, (PlanState*)sps->planstate));
}
}
/* Recursively analyze left plan tree */
if (ps->lefttree) {
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, ps->lefttree));
}
/* Recursively analyze right plan tree */
if (ps->righttree) {
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, ps->righttree));
}
/* Find plan list in special plan nodes. */
List* ps_list = getPlanSubNodes(ps);
/* subPlan-s for SubPlanState nodes in my expressions (correlated query) */
if (ps->subPlan) {
ListCell* lc = NULL;
foreach (lc, ps->subPlan) {
SubPlanState* sps = (SubPlanState*)lfirst(lc);
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, (PlanState*)sps->planstate));
}
}
if (ps_list != NIL) {
ListCell* lc = NULL;
/* Go ahead to analyze underlying plan nodes as we don get leaf plan node yet */
foreach (lc, ps_list) {
PlanState* plan_state = (PlanState*)lfirst(lc);
issueResults = list_concat(issueResults, PlanAnalyzerOperator(querydesc, plan_state));
}
list_free(ps_list);
}
{
/* Ignore issue when the plan exec on coords */
if (plan->exec_type == EXEC_ON_COORDS || plan->exec_type == EXEC_ON_NONE) {
return issueResults;
}
/* if plan->exec_type == EXEC_ON_ALL_NODES and m_planIdOffsetArray[plan->plan_node_id - 1] == 0
* means plan exec on CN, it should ignore.
*/
int* m_planIdOffsetArray = u_sess->instr_cxt.global_instr->get_planIdOffsetArray();
if (plan->exec_type == EXEC_ON_ALL_NODES && m_planIdOffsetArray[plan->plan_node_id - 1] == 0) {
return issueResults;
}
/* Start to analyze current plan nodes */
int dn_num = 0;
int dn_index = 0;
double dn_tuples = 0.0;
double dnFiltereds = 0.0;
double min_dn_tuples = get_float8_infinity();
double max_dn_tuples = 0.0;
double total_tuples = 0.0;
double totalFiltereds = 0.0;
bool write_file = false;
int dop = plan->dop;
ListCell* nodeitem = NULL;
List* exec_nodeList = NIL;
ExecNodes* exec_nodes = NULL;
/* Extract info from u_sess->instr_cxt.global_instr and plan for sql tuning */
exec_nodes = ng_get_dest_execnodes(plan);
exec_nodeList = exec_nodes->nodeList;
dn_num = list_length(exec_nodeList);
foreach (nodeitem, exec_nodeList) {
dn_index = lfirst_int(nodeitem);
dn_tuples = 0.0;
dnFiltereds = 0.0;
for (int j = 0; j < dop; j++) {
Instrumentation* node_instr =
u_sess->instr_cxt.global_instr->getInstrSlot(dn_index, plan->plan_node_id, j);
/* In special, if node_instr is NULL means plan is not executed ,it should return and exit. */
if (node_instr == NULL)
return issueResults;
dn_tuples += node_instr->ntuples;
/* index scan: filter + recheck remove tuples */
dnFiltereds += node_instr->nfiltered1 + node_instr->nfiltered2;
/* Count the bloomFilterRows that the Optimizer doesn't count
* for CheckDataSkew and CheckInaccurateEstimatedRows
*/
if (node_instr->bloomFilterRows > 0 && node_instr->bloomFilterBlocks == 0) {
dn_tuples += node_instr->bloomFilterRows;
}
/*
* Prepare to report Large Table as Innser in HashJoin issue, in order to
* report such kind of issue gracefully, we need confirm if inner table
* encountered as spill issue first.
*/
if (IsA(plan, VecHashJoin) && !write_file) {
/*
* For VecHashJoin, the temp file spilling info is recored in current
* T_VecHashJoin node
*/
write_file = node_instr->sorthashinfo.hash_writefile;
} else if (nodeTag(plan) == T_HashJoin && !write_file) {
Assert(plan->righttree->dop == dop);
/*
* For RowHashJoin, the temp file spilling info is recorded in Hash node of its
* inner(righttree) branch, so we need get instr obejct there
*/
Instrumentation* instrument =
u_sess->instr_cxt.global_instr->getInstrSlot(dn_index, plan->righttree->plan_node_id, j);
if (instrument != NULL) {
/* Say HashJoin's inner spilled here */
write_file = (instrument->sorthashinfo.nbatch > 1);
}
}
}
/* Update DN level min/max tuples and total tuples */
min_dn_tuples = Min(dn_tuples, min_dn_tuples);
max_dn_tuples = Max(dn_tuples, max_dn_tuples);
total_tuples += dn_tuples;
totalFiltereds += dnFiltereds;
}
switch (nodeTag(plan)) {
case T_VecHashJoin:
case T_HashJoin: {
/* Check large table runs as hashjoin inner */
if (write_file &&
(issueResultsItem = CheckLargeTableInHashJoinInner(
ps, dn_num, ComputeSumOfDNTuples(plan->righttree), ComputeSumOfDNTuples(plan->lefttree))) !=
NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
break;
}
case T_VecNestLoop:
case T_NestLoop: {
bool is_eq_nestloop_only = IsEqualConditionandNestLoopOnly(plan);
/* Check large table runs as nestloop with equalness join-cond */
if (is_eq_nestloop_only &&
(issueResultsItem = CheckLargeTableInNestloopWithEqualCondition(ps, dn_num,
Max(ComputeSumOfDNTuples(plan->righttree), ComputeSumOfDNTuples(plan->lefttree)))) != NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
break;
}
case T_VecStream:
case T_Stream: {
/* Check large table runs as Broadcast */
if ((issueResultsItem = CheckLargeTableInBroadcast(ps, dn_num, total_tuples)) != NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
break;
}
case T_SeqScan: {
/* Check unsuitable seq scan */
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, false, false);
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
break;
}
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapIndexScan: {
/* Check unsuitable index scan */
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, true, false);
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
break;
}
case T_CStoreScan: {
/* Check unsuitable seq scan for cstore */
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, false, true);
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
break;
}
case T_CStoreIndexScan: {
/* Check unsuitable index scan for cstore */
issueResultsItem = CheckUnsuitableScanMethod(ps, dn_num, total_tuples, totalFiltereds, true, true);
issueResults = issueResultsItem != NULL ? lappend(issueResults, issueResultsItem) : issueResults;
break;
}
default: {
/* Nothing to do just keep compiler silent */
}
}
/* Analyze data skew issue */
if ((issueResultsItem = CheckDataSkew(ps, min_dn_tuples, max_dn_tuples)) != NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
/* Analyze Estimation-Rows is inaccurate issue */
if (!skip_inaccurate_erows &&
(issueResultsItem = CheckInaccurateEstimatedRows(ps, dn_num, total_tuples)) != NULL) {
issueResults = lappend(issueResults, issueResultsItem);
}
}
return issueResults;
}
/*
* Store plan issues into session-level memory context(for output)
*/
void RecordQueryPlanIssues(const List* results)
{
ListCell* lc = NULL;
errno_t rc;
if (u_sess->attr.attr_resource.resource_track_level < RESOURCE_TRACK_QUERY || results == NIL) {
return;
}
char max_issue_desc[MAX_OPTIMIZER_WARNING_LEN];
rc = memset_s(max_issue_desc, MAX_OPTIMIZER_WARNING_LEN, '\0', MAX_OPTIMIZER_WARNING_LEN);
securec_check(rc, "\0", "\0");
int current = 0;
/* Keep existing warning info which found in SQL_Planned phase */
if (t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue) {
Assert(u_sess->attr.attr_resource.resource_track_level == RESOURCE_TRACK_OPERATOR);
rc = sprintf_s((char*)max_issue_desc, MAX_OPTIMIZER_WARNING_LEN, "%s\n",
t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue);
securec_check_ss_c(rc, "\0", "\0");
current += strlen(t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue) + 1;
/* Free original used memory space */
pfree_ext(t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue);
}
/* Scan plan issue list to store them */
foreach (lc, results) {
QueryPlanIssueDesc* issue = (QueryPlanIssueDesc*)lfirst(lc);
int issue_str_len = strlen(issue->issue_suggestion->data);
/* Check if we hit max allowed planner issue buffer length */
if (MAX_OPTIMIZER_WARNING_LEN - current <= issue_str_len + 1) {
ereport(LOG,
(errmodule(MOD_OPT),
(errmsg("Planner issue report is truncated, the rest of planner issues will be skipped"))));
break;
}
rc = sprintf_s((char*)max_issue_desc + current, MAX_OPTIMIZER_WARNING_LEN - current, "%s\n",
issue->issue_suggestion->data);
securec_check_ss_c(rc, "\0", "\0");
current += strlen(issue->issue_suggestion->data) + 1;
DeleteQueryPlanIssue(issue);
}
/* Hold the planer issue info in memory context of workload manager */
AutoContextSwitch memSwitch(g_instance.wlm_cxt->query_resource_track_mcxt);
t_thrd.shemem_ptr_cxt.mySessionMemoryEntry->query_plan_issue = pstrdup(max_issue_desc);
return;
}
/*
* - Brief: return the more readable operator name
*/
static char* OperatorName(const Plan* plan)
{
char* pname = NULL;
char* sname = NULL;
char* strategy = NULL;
char* operation = NULL;
char* pt_option = NULL;
char* pt_operation = NULL;
GetPlanNodePlainText((Plan*)plan, &pname, &sname, &strategy, &operation, &pt_operation, &pt_option);
return sname;
}