1290 lines
50 KiB
C++
Executable File
1290 lines
50 KiB
C++
Executable File
/* -------------------------------------------------------------------------
|
|
*
|
|
* clausesel.cpp
|
|
* Routines to compute clause selectivities
|
|
*
|
|
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
|
|
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/gausskernel/optimizer/path/clausesel.cpp
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
|
|
#include "catalog/pg_collation.h"
|
|
#include "catalog/pg_operator.h"
|
|
#include "catalog/pg_proc.h"
|
|
#include "catalog/pg_statistic.h"
|
|
#include "catalog/pg_namespace.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "optimizer/clauses.h"
|
|
#include "optimizer/cost.h"
|
|
#include "optimizer/pathnode.h"
|
|
#include "optimizer/plancat.h"
|
|
#include "utils/fmgroids.h"
|
|
#include "utils/guc.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/selfuncs.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/print.h"
|
|
#include "parser/parsetree.h"
|
|
#include "parser/parse_coerce.h"
|
|
#include "utils/extended_statistics.h"
|
|
#include "utils/syscache.h"
|
|
|
|
/*
|
|
* Data structure for accumulating info about possible range-query
|
|
* clause pairs in clauselist_selectivity.
|
|
*/
|
|
typedef struct RangeQueryClause {
|
|
struct RangeQueryClause* next; /* next in linked list */
|
|
Expr* clause; /* the second clause for range-query */
|
|
Node* var; /* The common variable of the clauses */
|
|
bool have_lobound; /* found a low-bound clause yet? */
|
|
bool have_hibound; /* found a high-bound clause yet? */
|
|
Selectivity lobound; /* Selectivity of a var > something clause */
|
|
Selectivity hibound; /* Selectivity of a var < something clause */
|
|
} RangeQueryClause;
|
|
|
|
static void addRangeClause(RangeQueryClause** rqlist, Node* clause, bool varonleft, bool isLTsel, Selectivity s2);
|
|
|
|
static List* switch_arg_items(Node* funExpr, Const* cnst, Oid* eqlOprOid, Oid* inputcollid, bool isequal);
|
|
static List* do_restrictinfo_conversion(List* args, Oid* eqlOprOid, Oid* inputcollid, bool isequal);
|
|
#define MIN(A, B) ((B) < (A) ? (B) : (A))
|
|
|
|
/* the context for get var data from clause in restrict info. */
|
|
typedef struct {
|
|
PlannerInfo* root; /* plan info node */
|
|
int varRelid; /* varRelid is either 0 or a rangetable index */
|
|
RatioType ratiotype; /* filter ratio or join ratio */
|
|
SpecialJoinInfo* sjinfo; /* special join info for the joinrel */
|
|
VariableStatData filter_vardata; /* var data for filter by self */
|
|
VariableStatData semijoin_vardata1; /* var data for the left args of semi/anti join */
|
|
VariableStatData semijoin_vardata2; /* var data for the right args of semi/anti join */
|
|
} get_vardata_for_filter_or_semijoin_context;
|
|
|
|
/* get var data and cache selectivity for filter or semi/anti join. */
|
|
static void get_vardata_for_filter_or_semijoin(
|
|
PlannerInfo* root, Node* clause, int varRelid, Selectivity selec, SpecialJoinInfo* sjinfo, RatioType type);
|
|
/* get vardata walker for clause. */
|
|
static bool get_vardata_for_filter_or_semijoin_walker(Node* node, get_vardata_for_filter_or_semijoin_context* context);
|
|
static bool is_rangequery_clause(Node* clause, RestrictInfo* rinfo, bool* varonleft);
|
|
static bool is_rangequery_contain_scalarop(Node* clause, RestrictInfo* rinfo);
|
|
static void set_varratio_for_rqclause(
|
|
PlannerInfo* root, List* varlist, int varRelid, double ratio, SpecialJoinInfo* sjinfo);
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root, const List *clauses);
|
|
static Selectivity calculate_selectivity_dependency(bool flag_dependency, PlannerInfo *root, ES_SELECTIVITY *es,
|
|
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, List** clauselist);
|
|
#endif
|
|
|
|
/****************************************************************************
|
|
* ROUTINES TO COMPUTE SELECTIVITIES
|
|
****************************************************************************/
|
|
/*
|
|
* clauselist_selectivity -
|
|
* Compute the selectivity of an implicitly-ANDed list of boolean
|
|
* expression clauses. The list can be empty, in which case 1.0
|
|
* must be returned. List elements may be either RestrictInfos
|
|
* or bare expression clauses --- the former is preferred since
|
|
* it allows caching of results.
|
|
*
|
|
* See clause_selectivity() for the meaning of the additional parameters.
|
|
*
|
|
* Our basic approach is to take the product of the selectivities of the
|
|
* subclauses. However, that's only right if the subclauses have independent
|
|
* probabilities, and in reality they are often NOT independent. So,
|
|
* we want to be smarter where we can.
|
|
|
|
* Currently, the only extra smarts we have is to recognize "range queries",
|
|
* such as "x > 34 AND x < 42". Clauses are recognized as possible range
|
|
* query components if they are restriction opclauses whose operators have
|
|
* scalarltsel() or scalargtsel() as their restriction selectivity estimator.
|
|
* We pair up clauses of this form that refer to the same variable. An
|
|
* unpairable clause of this kind is simply multiplied into the selectivity
|
|
* product in the normal way. But when we find a pair, we know that the
|
|
* selectivities represent the relative positions of the low and high bounds
|
|
* within the column's range, so instead of figuring the selectivity as
|
|
* hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
|
|
* see that hisel is the fraction of the range below the high bound, while
|
|
* losel is the fraction above the low bound; so hisel can be interpreted
|
|
* directly as a 0..1 value but we need to convert losel to 1-losel before
|
|
* interpreting it as a value. Then the available range is 1-losel to hisel.
|
|
* However, this calculation double-excludes nulls, so really we need
|
|
* hisel + losel + null_frac - 1.)
|
|
*
|
|
* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
|
|
* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
|
|
* yields an impossible (negative) result.
|
|
*
|
|
* A free side-effect is that we can recognize redundant inequalities such
|
|
* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
|
|
*
|
|
* Of course this is all very dependent on the behavior of
|
|
* scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
|
|
*
|
|
* sjinfo: identify join info include lefthand/righthand.
|
|
* varratio_cached: if we cache the selectivity into the relation or not for estimate distinct using possion.
|
|
*/
|
|
Selectivity clauselist_selectivity(
|
|
PlannerInfo* root, List* clauses, int varRelid, JoinType jointype, SpecialJoinInfo* sjinfo, bool varratio_cached, bool use_poisson)
|
|
{
|
|
Selectivity s1 = 1.0;
|
|
RangeQueryClause* rqlist = NULL;
|
|
ListCell* l = NULL;
|
|
List* varlist = NIL;
|
|
List* clauselist = clauses;
|
|
ES_SELECTIVITY* es = NULL;
|
|
MemoryContext ExtendedStat = NULL;
|
|
MemoryContext oldcontext;
|
|
bool use_muti_stats = true;
|
|
|
|
if (ENABLE_CACHEDPLAN_MGR && root->glob->boundParams != NULL) {
|
|
root->glob->boundParams->params_lazy_bind = false;
|
|
use_muti_stats = (root->glob->boundParams->uParamInfo != DEFUALT_INFO) ? false : true;
|
|
}
|
|
|
|
/*
|
|
* If there's exactly one clause, then no use in trying to match up pairs,
|
|
* so just go directly to clause_selectivity().
|
|
*/
|
|
if (list_length(clauses) == 1)
|
|
return clause_selectivity(root, (Node*)linitial(clauses), varRelid, jointype, sjinfo, varratio_cached, false, use_poisson);
|
|
|
|
/* initialize es_selectivity class, list_length(clauses) can be 0 when called by set_baserel_size_estimates */
|
|
if (list_length(clauses) >= 2 && use_muti_stats &&
|
|
(jointype == JOIN_INNER || jointype == JOIN_FULL || jointype == JOIN_LEFT || jointype == JOIN_ANTI ||
|
|
jointype == JOIN_SEMI || jointype == JOIN_LEFT_ANTI_FULL)) {
|
|
ExtendedStat = AllocSetContextCreate(CurrentMemoryContext,
|
|
"ExtendedStat",
|
|
ALLOCSET_DEFAULT_MINSIZE,
|
|
ALLOCSET_DEFAULT_INITSIZE,
|
|
ALLOCSET_DEFAULT_MAXSIZE);
|
|
oldcontext = MemoryContextSwitchTo(ExtendedStat);
|
|
es = New(ExtendedStat) ES_SELECTIVITY();
|
|
Assert(root != NULL);
|
|
s1 = es->calculate_selectivity(root, clauses, sjinfo, jointype, NULL, ES_EQJOINSEL);
|
|
clauselist = es->unmatched_clause_group;
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
/*
|
|
* If these clauses references a single relation and it exists extended statistics for functional dependency
|
|
* in pg_statistic_ext, try to apply functional dependency to compute selecticity.
|
|
*/
|
|
bool flag_dependency = u_sess->attr.attr_sql.enable_functional_dependency;
|
|
flag_dependency = flag_dependency && es->unmatched_clause_group && es->statlist;
|
|
s1 *= calculate_selectivity_dependency(flag_dependency, root, es, varRelid, jointype, sjinfo, &clauselist);
|
|
#endif
|
|
|
|
es->clear();
|
|
(void)MemoryContextSwitchTo(oldcontext);
|
|
}
|
|
|
|
/*
|
|
* Initial scan over clauses. Anything that doesn't look like a potential
|
|
* rangequery clause gets multiplied into s1 and forgotten. Anything that
|
|
* does gets inserted into an rqlist entry.
|
|
*/
|
|
foreach (l, clauselist) {
|
|
Node* clause = (Node*)lfirst(l);
|
|
RestrictInfo* rinfo = NULL;
|
|
Selectivity s2;
|
|
|
|
/* Always compute the selectivity using clause_selectivity */
|
|
s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, varratio_cached, true, use_poisson);
|
|
|
|
/*
|
|
* Check for being passed a RestrictInfo.
|
|
*
|
|
* If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
|
|
* 0.0; just use that rather than looking for range pairs.
|
|
*/
|
|
if (IsA(clause, RestrictInfo)) {
|
|
rinfo = (RestrictInfo*)clause;
|
|
if (rinfo->pseudoconstant) {
|
|
s1 = s1 * s2;
|
|
rinfo->clause->selec = s2;
|
|
continue;
|
|
}
|
|
clause = (Node*)rinfo->clause;
|
|
} else
|
|
rinfo = NULL;
|
|
|
|
/*
|
|
* if the clause is range query like 'between and',
|
|
* we should scan the pair of rangequery and compute final selectivity.
|
|
*/
|
|
OpExpr* expr = (OpExpr*)clause;
|
|
bool varonleft = true;
|
|
if (is_rangequery_clause(clause, rinfo, &varonleft)) {
|
|
/*
|
|
* If it's not a "<" or ">" operator, just merge the
|
|
* selectivity in generically. But if it's the right oprrest,
|
|
* add the clause to rqlist for later processing.
|
|
*/
|
|
switch (get_oprrest(expr->opno)) {
|
|
case F_SCALARLTSEL:
|
|
addRangeClause(&rqlist, clause, varonleft, true, s2);
|
|
break;
|
|
case F_SCALARGTSEL:
|
|
addRangeClause(&rqlist, clause, varonleft, false, s2);
|
|
break;
|
|
default:
|
|
/* Just merge the selectivity in generically */
|
|
if ((uint32)u_sess->attr.attr_sql.cost_param & COST_ALTERNATIVE_CONJUNCT) {
|
|
s1 = MIN(s1, s2);
|
|
expr->xpr.selec = s1;
|
|
} else {
|
|
s1 = s1 * s2;
|
|
expr->xpr.selec = s2;
|
|
}
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* Not the right form, so treat it generically. */
|
|
if ((uint32)u_sess->attr.attr_sql.cost_param & COST_ALTERNATIVE_CONJUNCT) {
|
|
s1 = MIN(s1, s2);
|
|
expr->xpr.selec = s1;
|
|
} else {
|
|
s1 = s1 * s2;
|
|
expr->xpr.selec = s2;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now scan the rangequery pair list.
|
|
*/
|
|
while (rqlist != NULL) {
|
|
RangeQueryClause* rqnext = NULL;
|
|
|
|
if (rqlist->have_lobound && rqlist->have_hibound) {
|
|
/* Successfully matched a pair of range clauses */
|
|
Selectivity s2;
|
|
|
|
/*
|
|
* Exact equality to the default value probably means the
|
|
* selectivity function punted. This is not airtight but should
|
|
* be good enough.
|
|
*/
|
|
if (rqlist->hibound == DEFAULT_INEQ_SEL || rqlist->lobound == DEFAULT_INEQ_SEL) {
|
|
s2 = DEFAULT_RANGE_INEQ_SEL;
|
|
} else {
|
|
s2 = rqlist->hibound + rqlist->lobound - 1.0;
|
|
|
|
/* Adjust for double-exclusion of NULLs */
|
|
s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid, jointype, sjinfo);
|
|
|
|
/*
|
|
* A zero or slightly negative s2 should be converted into a
|
|
* small positive value; we probably are dealing with a very
|
|
* tight range and got a bogus result due to roundoff errors.
|
|
* However, if s2 is very negative, then we probably have
|
|
* default selectivity estimates on one or both sides of the
|
|
* range that we failed to recognize above for some reason.
|
|
*/
|
|
if (s2 <= 0.0) {
|
|
if (s2 < -0.01) {
|
|
/*
|
|
* No data available --- use a default estimate that
|
|
* is small, but not real small.
|
|
*/
|
|
s2 = DEFAULT_RANGE_INEQ_SEL;
|
|
} else {
|
|
/*
|
|
* It's just roundoff error; use a small positive
|
|
* value
|
|
*/
|
|
s2 = 1.0e-10;
|
|
}
|
|
}
|
|
}
|
|
/* Merge in the selectivity of the pair of clauses */
|
|
s1 *= s2;
|
|
rqlist->clause->selec = s2;
|
|
} else {
|
|
/* Only found one of a pair, merge it in generically */
|
|
if (rqlist->have_lobound) {
|
|
s1 *= rqlist->lobound;
|
|
rqlist->clause->selec = rqlist->lobound;
|
|
} else {
|
|
s1 *= rqlist->hibound;
|
|
rqlist->clause->selec = rqlist->hibound;
|
|
}
|
|
}
|
|
varlist = lappend(varlist, rqlist->var);
|
|
/* release storage and advance */
|
|
rqnext = rqlist->next;
|
|
pfree_ext(rqlist);
|
|
rqlist = rqnext;
|
|
}
|
|
|
|
/* we should cache the range query's var ratio if can do and there are range query's vars. */
|
|
if (varratio_cached && varlist != NIL)
|
|
set_varratio_for_rqclause(root, varlist, varRelid, s1, sjinfo);
|
|
|
|
list_free_ext(varlist);
|
|
|
|
/* free space used by extended statistic */
|
|
if (es != NULL) {
|
|
clauselist = NIL;
|
|
list_free_ext(es->unmatched_clause_group);
|
|
delete es;
|
|
MemoryContextDelete(ExtendedStat);
|
|
}
|
|
|
|
if (ENABLE_CACHEDPLAN_MGR && root->glob->boundParams != NULL && root->glob->boundParams->uParamInfo != DEFUALT_INFO) {
|
|
root->glob->boundParams->params_lazy_bind = true;
|
|
}
|
|
|
|
return s1;
|
|
}
|
|
|
|
/*
|
|
* addRangeClause --- add a new range clause for clauselist_selectivity
|
|
*
|
|
* Here is where we try to match up pairs of range-query clauses
|
|
*/
|
|
static void addRangeClause(RangeQueryClause** rqlist, Node* clause, bool varonleft, bool isLTsel, Selectivity s2)
|
|
{
|
|
RangeQueryClause* rqelem = NULL;
|
|
Node* var = NULL;
|
|
bool is_lobound = false;
|
|
|
|
if (varonleft) {
|
|
var = get_leftop((Expr*)clause);
|
|
is_lobound = !isLTsel; /* x < something is high bound */
|
|
} else {
|
|
var = get_rightop((Expr*)clause);
|
|
is_lobound = isLTsel; /* something < x is low bound */
|
|
}
|
|
|
|
for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) {
|
|
/*
|
|
* We use full equal() here because the "var" might be a function of
|
|
* one or more attributes of the same relation...
|
|
*/
|
|
if (!equal(var, rqelem->var))
|
|
continue;
|
|
/* Found the right group to put this clause in */
|
|
if (is_lobound) {
|
|
if (!rqelem->have_lobound) {
|
|
rqelem->have_lobound = true;
|
|
rqelem->lobound = s2;
|
|
} else {
|
|
|
|
/* ------
|
|
* We have found two similar clauses, such as
|
|
* x < y AND x < z.
|
|
* Keep only the more restrictive one.
|
|
* ------
|
|
*/
|
|
if (rqelem->lobound > s2)
|
|
rqelem->lobound = s2;
|
|
}
|
|
} else {
|
|
if (!rqelem->have_hibound) {
|
|
rqelem->have_hibound = true;
|
|
rqelem->hibound = s2;
|
|
} else {
|
|
|
|
/* ------
|
|
* We have found two similar clauses, such as
|
|
* x > y AND x > z.
|
|
* Keep only the more restrictive one.
|
|
* ------
|
|
*/
|
|
if (rqelem->hibound > s2)
|
|
rqelem->hibound = s2;
|
|
}
|
|
}
|
|
rqelem->clause = (Expr*)clause;
|
|
return;
|
|
}
|
|
|
|
/* No matching var found, so make a new clause-pair data structure */
|
|
rqelem = (RangeQueryClause*)palloc(sizeof(RangeQueryClause));
|
|
rqelem->var = var;
|
|
if (is_lobound) {
|
|
rqelem->have_lobound = true;
|
|
rqelem->have_hibound = false;
|
|
rqelem->lobound = s2;
|
|
} else {
|
|
rqelem->have_lobound = false;
|
|
rqelem->have_hibound = true;
|
|
rqelem->hibound = s2;
|
|
}
|
|
rqelem->clause = (Expr*)clause;
|
|
rqelem->clause->selec = s2;
|
|
rqelem->next = *rqlist;
|
|
*rqlist = rqelem;
|
|
}
|
|
|
|
/*
|
|
* treat_as_join_clause -
|
|
* Decide whether an operator clause is to be handled by the
|
|
* restriction or join estimator. Subroutine for clause_selectivity().
|
|
*/
|
|
bool treat_as_join_clause(Node* clause, RestrictInfo* rinfo, int varRelid, SpecialJoinInfo* sjinfo)
|
|
{
|
|
if (varRelid != 0) {
|
|
/*
|
|
* Caller is forcing restriction mode (eg, because we are examining an
|
|
* inner indexscan qual).
|
|
*/
|
|
return false;
|
|
} else if (sjinfo == NULL) {
|
|
/*
|
|
* It must be a restriction clause, since it's being evaluated at a
|
|
* scan node.
|
|
*/
|
|
return false;
|
|
} else {
|
|
/*
|
|
* Otherwise, it's a join if there's more than one relation used. We
|
|
* can optimize this calculation if an rinfo was passed.
|
|
*
|
|
* XXX Since we know the clause is being evaluated at a join, the
|
|
* only way it could be single-relation is if it was delayed by outer
|
|
* joins. Although we can make use of the restriction qual estimators
|
|
* anyway, it seems likely that we ought to account for the
|
|
* probability of injected nulls somehow.
|
|
*/
|
|
if (rinfo != NULL)
|
|
return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
|
|
else
|
|
return (NumRelids(clause) > 1);
|
|
}
|
|
}
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
|
|
/*
|
|
* bms_is_subset_singleton
|
|
*
|
|
* Same result as bms_is_subset(s, bms_make_singleton(x)),
|
|
* but a little faster and doesn't leak memory.
|
|
*
|
|
* Is this of use anywhere else? If so move to bitmapset.c ...
|
|
*/
|
|
static bool
|
|
bms_is_subset_singleton(const Bitmapset *s, int x)
|
|
{
|
|
BMS_Membership type = bms_membership(s);
|
|
if (type == BMS_EMPTY_SET) {
|
|
return true;
|
|
} else if (type == BMS_SINGLETON) {
|
|
return bms_is_member(x, s);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
/*
|
|
* clause_selectivity -
|
|
* Compute the selectivity of a general boolean expression clause.
|
|
*
|
|
* The clause can be either a RestrictInfo or a plain expression. If it's
|
|
* a RestrictInfo, we try to cache the selectivity for possible re-use,
|
|
* so passing RestrictInfos is preferred.
|
|
*
|
|
* varRelid is either 0 or a rangetable index.
|
|
*
|
|
* When varRelid is not 0, only variables belonging to that relation are
|
|
* considered in computing selectivity; other vars are treated as constants
|
|
* of unknown values. This is appropriate for estimating the selectivity of
|
|
* a join clause that is being used as a restriction clause in a scan of a
|
|
* nestloop join's inner relation --- varRelid should then be the ID of the
|
|
* inner relation.
|
|
*
|
|
* When varRelid is 0, all variables are treated as variables. This
|
|
* is appropriate for ordinary join clauses and restriction clauses.
|
|
*
|
|
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
|
|
* if the clause isn't a join clause.
|
|
*
|
|
* sjinfo is NULL for a non-join clause, otherwise it provides additional
|
|
* context information about the join being performed. There are some
|
|
* special cases:
|
|
* 1. For a special (not INNER) join, sjinfo is always a member of
|
|
* root->join_info_list.
|
|
* 2. For an INNER join, sjinfo is just a transient struct, and only the
|
|
* relids and jointype fields in it can be trusted.
|
|
* It is possible for jointype to be different from sjinfo->jointype.
|
|
* This indicates we are considering a variant join: either with
|
|
* the LHS and RHS switched, or with one input unique-ified.
|
|
*
|
|
* Note: when passing nonzero varRelid, it's normally appropriate to set
|
|
* jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
|
|
* join clause; because we aren't treating it as a join clause.
|
|
*
|
|
* sjinfo: identify join info include lefthand/righthand.
|
|
* varratio_cached: if we cache the selectivity into the relation or not for estimate distinct using possion.
|
|
* check_scalarop: if we need to check tha scalar op which belong to a range query clause, it is true.
|
|
*/
|
|
Selectivity clause_selectivity(PlannerInfo* root, Node* clause, int varRelid, JoinType jointype,
|
|
SpecialJoinInfo* sjinfo, bool varratio_cached, bool check_scalarop, bool use_poisson)
|
|
{
|
|
Selectivity s1 = 0.5; /* default for any unhandled clause type */
|
|
RestrictInfo* rinfo = NULL;
|
|
bool cacheable = false;
|
|
RatioType ratiotype = RatioType_Filter;
|
|
|
|
if (clause == NULL) /* can this still happen? */
|
|
return s1;
|
|
|
|
if (IsA(clause, RestrictInfo)) {
|
|
rinfo = (RestrictInfo*)clause;
|
|
|
|
/*
|
|
* If the clause is marked pseudoconstant, then it will be used as a
|
|
* gating qual and should not affect selectivity estimates; hence
|
|
* return 1.0. The only exception is that a constant FALSE may be
|
|
* taken as having selectivity 0.0, since it will surely mean no rows
|
|
* out of the plan. This case is simple enough that we need not
|
|
* bother caching the result.
|
|
*/
|
|
if (rinfo->pseudoconstant) {
|
|
if (!IsA(rinfo->clause, Const))
|
|
return (Selectivity)1.0;
|
|
}
|
|
|
|
/*
|
|
* If the clause is marked redundant, always return 1.0.
|
|
*/
|
|
if (rinfo->norm_selec > 1)
|
|
return (Selectivity)1.0;
|
|
|
|
/*
|
|
* If possible, cache the result of the selectivity calculation for
|
|
* the clause. We can cache if varRelid is zero or the clause
|
|
* contains only vars of that relid --- otherwise varRelid will affect
|
|
* the result, so mustn't cache. Outer join quals might be examined
|
|
* with either their join's actual jointype or JOIN_INNER, so we need
|
|
* two cache variables to remember both cases. Note: we assume the
|
|
* result won't change if we are switching the input relations or
|
|
* considering a unique-ified case, so we only need one cache variable
|
|
* for all non-JOIN_INNER cases.
|
|
*/
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
if (!use_poisson &&
|
|
(varRelid == 0 || bms_is_subset_singleton(rinfo->clause_relids, varRelid))) {
|
|
/* Cacheable --- do we already have the result? */
|
|
if (jointype == JOIN_INNER) {
|
|
if (rinfo->norm_selec >= 0) {
|
|
return rinfo->norm_selec;
|
|
}
|
|
} else {
|
|
if (rinfo->outer_selec >= 0) {
|
|
return rinfo->outer_selec;
|
|
}
|
|
}
|
|
cacheable = true;
|
|
} else {
|
|
#endif
|
|
cacheable = true;
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Proceed with examination of contained clause. If the clause is an
|
|
* OR-clause, we want to look at the variant with sub-RestrictInfos,
|
|
* so that per-subclause selectivities can be cached.
|
|
*/
|
|
if (rinfo->orclause)
|
|
clause = (Node*)rinfo->orclause;
|
|
else
|
|
clause = (Node*)rinfo->clause;
|
|
}
|
|
|
|
if (IsA(clause, Var)) {
|
|
Var* var = (Var*)clause;
|
|
|
|
/*
|
|
* We probably shouldn't ever see an uplevel Var here, but if we do,
|
|
* return the default selectivity...
|
|
*/
|
|
if (var->varlevelsup == 0 && (varRelid == 0 || varRelid == (int)var->varno)) {
|
|
/*
|
|
* A Var at the top of a clause must be a bool Var. This is
|
|
* equivalent to the clause reln.attribute = 't', so we compute
|
|
* the selectivity as if that is what we have.
|
|
*/
|
|
s1 = restriction_selectivity(
|
|
root, BooleanEqualOperator, list_make2(var, makeBoolConst(true, false)), InvalidOid, varRelid);
|
|
}
|
|
} else if (IsA(clause, Const)) {
|
|
/* bool constant is pretty easy... */
|
|
Const* con = (Const*)clause;
|
|
|
|
s1 = con->constisnull ? 0.0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
|
|
} else if (IsA(clause, Param)) {
|
|
/* see if we can replace the Param */
|
|
Node* subst = estimate_expression_value(root, clause);
|
|
|
|
if (IsA(subst, Const)) {
|
|
/* bool constant is pretty easy... */
|
|
Const* con = (Const*)subst;
|
|
|
|
s1 = con->constisnull ? 0.0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
|
|
}
|
|
} else if (not_clause(clause)) {
|
|
/* inverse of the selectivity of the underlying clause */
|
|
s1 = 1.0 - clause_selectivity(root, (Node*)get_notclausearg((Expr*)clause), varRelid, jointype, sjinfo);
|
|
} else if (and_clause(clause)) {
|
|
/* share code with clauselist_selectivity() */
|
|
s1 = clauselist_selectivity(root, ((BoolExpr*)clause)->args, varRelid, jointype, sjinfo, varratio_cached);
|
|
} else if (or_clause(clause)) {
|
|
/*
|
|
* Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
|
|
* account for the probable overlap of selected tuple sets.
|
|
*
|
|
* XXX is this too conservative?
|
|
*/
|
|
ListCell* arg = NULL;
|
|
|
|
s1 = 0.0;
|
|
foreach (arg, ((BoolExpr*)clause)->args) {
|
|
/* DO NOT cache the var ratio of single or-clauses */
|
|
Selectivity s2 = clause_selectivity(root, (Node*)lfirst(arg), varRelid, jointype, sjinfo, false);
|
|
|
|
s1 = s1 + s2 - s1 * s2;
|
|
}
|
|
/*
|
|
* Ideally, or-clauses should be splitted into groups identified by Var oprend. However, Poisson optimization
|
|
* is known to bring about NDV underestimation and cardinality overestimation in OLTP cases. Also, it is hard
|
|
* to take acount of the effect of different Vars (e.g. t1.a = 1 or t2.b = 1) on single Vars.
|
|
* Therefore, or clauses is ignored for var ratio cache for now.
|
|
*/
|
|
} else if (is_opclause(clause) || IsA(clause, DistinctExpr)) {
|
|
OpExpr* opclause = (OpExpr*)clause;
|
|
Oid opno = opclause->opno;
|
|
|
|
if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) {
|
|
/* Estimate selectivity for a join clause. */
|
|
s1 = join_selectivity(root, opno, opclause->args, opclause->inputcollid, jointype, sjinfo);
|
|
ratiotype = RatioType_Join;
|
|
} else {
|
|
/* Estimate selectivity for a restriction clause. */
|
|
bool isFinish = false;
|
|
if (is_opclause(clause)) {
|
|
Oid eqlOprOid = 0;
|
|
Oid inputcollid = 0;
|
|
List* argList = NULL;
|
|
|
|
/* only handle = or <> operator */
|
|
if (get_oprrest(opno) == EQSELRETURNOID || get_oprrest(opno) == NEQSELRETURNOID) {
|
|
argList = do_restrictinfo_conversion(
|
|
opclause->args, &eqlOprOid, &inputcollid, get_oprrest(opno) == EQSELRETURNOID);
|
|
|
|
if (argList != NULL) {
|
|
s1 = restriction_selectivity(root, eqlOprOid, argList, inputcollid, varRelid);
|
|
isFinish = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (isFinish == false) {
|
|
s1 = restriction_selectivity(root, opno, opclause->args, opclause->inputcollid, varRelid);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* DistinctExpr has the same representation as OpExpr, but the
|
|
* contained operator is "=" not "<>", so we must negate the result.
|
|
* This estimation method doesn't give the right behavior for nulls,
|
|
* but it's better than doing nothing.
|
|
*/
|
|
if (IsA(clause, DistinctExpr))
|
|
s1 = 1.0 - s1;
|
|
} else if (is_funcclause(clause)) {
|
|
/*
|
|
* This is not an operator, so we guess at the selectivity. THIS IS A
|
|
* HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE ABLE TO HAVE
|
|
* SELECTIVITIES THEMSELVES. -- JMH 7/9/92
|
|
*/
|
|
s1 = (Selectivity)0.3333333;
|
|
}
|
|
#ifdef NOT_USED
|
|
else if (IsA(clause, SubPlan) || IsA(clause, AlternativeSubPlan)) {
|
|
/*
|
|
* Just for the moment! FIX ME! - vadim 02/04/98
|
|
*/
|
|
s1 = (Selectivity)0.5;
|
|
}
|
|
#endif
|
|
else if (IsA(clause, ScalarArrayOpExpr)) {
|
|
bool is_join_clause = treat_as_join_clause(clause, rinfo, varRelid, sjinfo);
|
|
if (is_join_clause)
|
|
ratiotype = RatioType_Join;
|
|
|
|
/* Use node specific selectivity calculation function */
|
|
s1 = scalararraysel(root, (ScalarArrayOpExpr*)clause, is_join_clause, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, RowCompareExpr)) {
|
|
/* Use node specific selectivity calculation function */
|
|
s1 = rowcomparesel(root, (RowCompareExpr*)clause, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, NullTest)) {
|
|
/* Use node specific selectivity calculation function */
|
|
s1 = nulltestsel(
|
|
root, ((NullTest*)clause)->nulltesttype, (Node*)((NullTest*)clause)->arg, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, NanTest)) {
|
|
/* Use node specific selectivity calculation function */
|
|
s1 = nantestsel(
|
|
root, ((NanTest*)clause)->nantesttype, (Node*)((NanTest*)clause)->arg, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, InfiniteTest)) {
|
|
/* Use node specific selectivity calculation function */
|
|
s1 = infinitetestsel(
|
|
root, ((InfiniteTest*)clause)->infinitetesttype, (Node*)((InfiniteTest*)clause)->arg, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, BooleanTest)) {
|
|
/* Use node specific selectivity calculation function */
|
|
s1 = booltestsel(
|
|
root, ((BooleanTest*)clause)->booltesttype, (Node*)((BooleanTest*)clause)->arg, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, CurrentOfExpr)) {
|
|
/* CURRENT OF selects at most one row of its table */
|
|
CurrentOfExpr* cexpr = (CurrentOfExpr*)clause;
|
|
RelOptInfo* crel = find_base_rel(root, cexpr->cvarno);
|
|
|
|
if (crel->tuples > 0)
|
|
s1 = 1.0 / crel->tuples;
|
|
} else if (IsA(clause, RelabelType)) {
|
|
/* Not sure this case is needed, but it can't hurt */
|
|
s1 = clause_selectivity(root, (Node*)((RelabelType*)clause)->arg, varRelid, jointype, sjinfo);
|
|
} else if (IsA(clause, CoerceToDomain)) {
|
|
/* Not sure this case is needed, but it can't hurt */
|
|
s1 = clause_selectivity(root, (Node*)((CoerceToDomain*)clause)->arg, varRelid, jointype, sjinfo);
|
|
}
|
|
|
|
/* Cache the result if possible */
|
|
if (cacheable) {
|
|
if (jointype == JOIN_INNER)
|
|
rinfo->norm_selec = s1;
|
|
else
|
|
rinfo->outer_selec = s1;
|
|
}
|
|
|
|
/*
|
|
* if it is filter for baserel and cached, or not inner join,
|
|
* we should cache the var's selectivity into relation.
|
|
*/
|
|
if (u_sess->opfusion_reuse_ctx.opfusionObj == NULL) {
|
|
if (use_poisson &&
|
|
((RatioType_Filter == ratiotype && varratio_cached) || (jointype == JOIN_SEMI) || (jointype == JOIN_ANTI)) &&
|
|
(!check_scalarop || !is_rangequery_contain_scalarop(clause, rinfo))) {
|
|
get_vardata_for_filter_or_semijoin(root, clause, varRelid, s1, sjinfo, ratiotype);
|
|
}
|
|
}
|
|
|
|
#ifdef SELECTIVITY_DEBUG
|
|
ereport(DEBUG4, (errmodule(MOD_OPT_JOIN), (errmsg("clause_selectivity: s1 %f", s1))));
|
|
#endif /* SELECTIVITY_DEBUG */
|
|
|
|
return s1;
|
|
}
|
|
|
|
/* Produce arg list, const convert to expr type */
|
|
static List* switch_arg_items(Node* funExpr, Const* cnst, Oid* eqlOprOid, Oid* inputcollid, bool isequal)
|
|
{
|
|
List* argList = NULL;
|
|
Node* arg = NULL;
|
|
Const* cnp = NULL;
|
|
Oid argType = InvalidOid;
|
|
|
|
if (IsA(funExpr, FuncExpr) && ((FuncExpr*)funExpr)->funcformat == COERCE_IMPLICIT_CAST) {
|
|
FuncExpr* fun_expr = (FuncExpr*)funExpr;
|
|
arg = (Node*)linitial(fun_expr->args);
|
|
argType = exprType(arg);
|
|
HeapTuple typeTuple;
|
|
Oid funcId = 0;
|
|
Oid constType = exprType((Node*)cnst);
|
|
Datum constValue = (Datum)0;
|
|
|
|
/* We only deal with datatypes that their categorys is different */
|
|
if (TypeCategory(argType) == TypeCategory(constType)) {
|
|
return NIL;
|
|
}
|
|
|
|
CoercionPathType pathtype = find_coercion_pathway(argType, constType, COERCION_IMPLICIT, &funcId);
|
|
|
|
if (pathtype != COERCION_PATH_NONE) {
|
|
MemoryContext current_context = CurrentMemoryContext;
|
|
bool outer_is_stream = false;
|
|
bool outer_is_stream_support = false;
|
|
ResourceOwner currentOwner = t_thrd.utils_cxt.CurrentResourceOwner;
|
|
ResourceOwner tempOwner = ResourceOwnerCreate(t_thrd.utils_cxt.CurrentResourceOwner, "SwitchArgItems",
|
|
THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_OPTIMIZER));
|
|
t_thrd.utils_cxt.CurrentResourceOwner = tempOwner;
|
|
|
|
if (IS_PGXC_COORDINATOR) {
|
|
outer_is_stream = u_sess->opt_cxt.is_stream;
|
|
outer_is_stream_support = u_sess->opt_cxt.is_stream_support;
|
|
}
|
|
|
|
PG_TRY();
|
|
{
|
|
constValue = OidFunctionCall1(funcId, ((Const*)cnst)->constvalue);
|
|
}
|
|
PG_CATCH();
|
|
{
|
|
MemoryContextSwitchTo(current_context);
|
|
FlushErrorState();
|
|
|
|
/* in case they are not set back */
|
|
if (IS_PGXC_COORDINATOR) {
|
|
u_sess->opt_cxt.is_stream = outer_is_stream;
|
|
u_sess->opt_cxt.is_stream_support = outer_is_stream_support;
|
|
}
|
|
|
|
/* release resource applied in OidFunctionCall1 of the PG_TRY. */
|
|
ResourceOwnerRelease(tempOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, false);
|
|
ResourceOwnerRelease(tempOwner, RESOURCE_RELEASE_LOCKS, false, false);
|
|
ResourceOwnerRelease(tempOwner, RESOURCE_RELEASE_AFTER_LOCKS, false, false);
|
|
t_thrd.utils_cxt.CurrentResourceOwner = currentOwner;
|
|
ResourceOwnerDelete(tempOwner);
|
|
|
|
return NIL;
|
|
}
|
|
PG_END_TRY();
|
|
|
|
/* release resource applied in standard_planner of the PG_TRY. */
|
|
ResourceOwnerRelease(tempOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, false);
|
|
ResourceOwnerRelease(tempOwner, RESOURCE_RELEASE_LOCKS, false, false);
|
|
ResourceOwnerRelease(tempOwner, RESOURCE_RELEASE_AFTER_LOCKS, false, false);
|
|
t_thrd.utils_cxt.CurrentResourceOwner = currentOwner;
|
|
ResourceOwnerDelete(tempOwner);
|
|
|
|
if (IS_PGXC_COORDINATOR) {
|
|
u_sess->opt_cxt.is_stream = outer_is_stream;
|
|
u_sess->opt_cxt.is_stream_support = outer_is_stream_support;
|
|
}
|
|
}
|
|
|
|
if (constValue) {
|
|
typeTuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(argType));
|
|
if (!HeapTupleIsValid(typeTuple)) {
|
|
return NIL;
|
|
}
|
|
Form_pg_type type = (Form_pg_type)GETSTRUCT(typeTuple);
|
|
cnp = makeConst(
|
|
argType, exprTypmod(arg), type->typcollation, type->typlen, constValue, false, type->typbyval);
|
|
ReleaseSysCache(typeTuple);
|
|
}
|
|
}
|
|
if (cnp != NULL) {
|
|
argList = lappend(argList, arg);
|
|
argList = lappend(argList, cnp);
|
|
|
|
if (argType == VARCHAROID) {
|
|
argType = TEXTOID;
|
|
}
|
|
|
|
HeapTuple opertup;
|
|
opertup = SearchSysCache4(OPERNAMENSP,
|
|
CStringGetDatum(isequal ? "=" : "<>"),
|
|
ObjectIdGetDatum(argType),
|
|
ObjectIdGetDatum(argType),
|
|
ObjectIdGetDatum(PG_CATALOG_NAMESPACE));
|
|
if (!HeapTupleIsValid(opertup)) {
|
|
return NIL;
|
|
}
|
|
|
|
*eqlOprOid = HeapTupleGetOid(opertup);
|
|
|
|
ReleaseSysCache(opertup);
|
|
|
|
*inputcollid = exprCollation(arg);
|
|
}
|
|
|
|
return argList;
|
|
}
|
|
|
|
static List* do_restrictinfo_conversion(List* args, Oid* eqlOprOid, Oid* inputcollid, bool isequal)
|
|
{
|
|
AssertEreport(list_length(args) == 2, MOD_OPT, "");
|
|
|
|
bool lIsConst = false;
|
|
bool rIsConst = false;
|
|
Node* lNode = (Node*)linitial(args);
|
|
Node* rNode = (Node*)list_nth(args, 1);
|
|
|
|
List* argsList = NULL;
|
|
|
|
if (IsA(lNode, Const)) {
|
|
lIsConst = true;
|
|
}
|
|
|
|
if (IsA(rNode, Const)) {
|
|
rIsConst = true;
|
|
}
|
|
|
|
if (lIsConst == true && rIsConst == false) {
|
|
argsList = switch_arg_items(rNode, (Const*)lNode, eqlOprOid, inputcollid, isequal);
|
|
} else if (lIsConst == false && rIsConst == true) {
|
|
argsList = switch_arg_items(lNode, (Const*)rNode, eqlOprOid, inputcollid, isequal);
|
|
}
|
|
|
|
return argsList;
|
|
}
|
|
|
|
/*
|
|
* get_vardata_for_filter_or_semijoin: get var data and cache selectivity for filter or semi/anti join.
|
|
*
|
|
* Parameters:
|
|
* @in root: plan info node
|
|
* @in clause: estimate tuples for LIMIT
|
|
* @in varRelid: varRelid is either 0 or a rangetable index, When varRelid is not 0,
|
|
* only variables belonging to that relation are considered in computing selectivity.
|
|
* @in selec: selectivity for clause
|
|
* @in sjinfo: special join info for the joinrel
|
|
* @in type: filter ratio or join ratio
|
|
*
|
|
* Returns: void
|
|
*/
|
|
static void get_vardata_for_filter_or_semijoin(
|
|
PlannerInfo* root, Node* clause, int varRelid, Selectivity selec, SpecialJoinInfo* sjinfo, RatioType type)
|
|
{
|
|
get_vardata_for_filter_or_semijoin_context context;
|
|
bool vardataIsValid = false;
|
|
|
|
/* construct context members. */
|
|
context.root = root;
|
|
context.varRelid = varRelid;
|
|
context.ratiotype = type;
|
|
context.sjinfo = sjinfo;
|
|
errno_t rc = EOK;
|
|
|
|
rc = memset_s(&context.filter_vardata, sizeof(VariableStatData), 0, sizeof(VariableStatData));
|
|
securec_check(rc, "\0", "\0");
|
|
rc = memset_s(&context.semijoin_vardata1, sizeof(VariableStatData), 0, sizeof(VariableStatData));
|
|
securec_check(rc, "\0", "\0");
|
|
rc = memset_s(&context.semijoin_vardata2, sizeof(VariableStatData), 0, sizeof(VariableStatData));
|
|
securec_check(rc, "\0", "\0");
|
|
|
|
/* get vardata walker for clause. */
|
|
vardataIsValid = get_vardata_for_filter_or_semijoin_walker(clause, &context);
|
|
|
|
/* we don't need set var ratio if vardata is invalid. */
|
|
if (!vardataIsValid) {
|
|
return;
|
|
}
|
|
|
|
/* set var ratio for filter or semi/anti join. */
|
|
if (RatioType_Filter == type) {
|
|
set_varratio_after_calc_selectivity(&context.filter_vardata, RatioType_Filter, selec, NULL);
|
|
ReleaseVariableStats(context.filter_vardata);
|
|
} else {
|
|
set_varratio_after_calc_selectivity(&context.semijoin_vardata1, RatioType_Join, selec, sjinfo);
|
|
set_varratio_after_calc_selectivity(&context.semijoin_vardata2, RatioType_Join, selec, sjinfo);
|
|
|
|
ReleaseVariableStats(context.semijoin_vardata1);
|
|
ReleaseVariableStats(context.semijoin_vardata2);
|
|
}
|
|
}
|
|
|
|
void getVardataFromScalarArray(Node* node, get_vardata_for_filter_or_semijoin_context* context)
|
|
{
|
|
Node* left = NULL;
|
|
if (RatioType_Join == context->ratiotype) {
|
|
bool join_is_reversed = false;
|
|
get_join_variables(context->root, ((ScalarArrayOpExpr*)node)->args, context->sjinfo,
|
|
&context->semijoin_vardata1, &context->semijoin_vardata2, &join_is_reversed);
|
|
} else {
|
|
left = (Node*)linitial(((ScalarArrayOpExpr*)node)->args);
|
|
examine_variable(context->root, left, context->varRelid, &context->filter_vardata);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* get_vardata_for_filter_or_semijoin_walker: get vardata walker for clause.
|
|
*
|
|
* Parameters:
|
|
* @in node: the clause node in restrict info
|
|
* @in context: the context with in params and will get vardata from clause's args
|
|
*
|
|
* Returns: bool(true:vardata is valid)
|
|
*/
|
|
static bool get_vardata_for_filter_or_semijoin_walker(Node* node, get_vardata_for_filter_or_semijoin_context* context)
|
|
{
|
|
List* args = NIL;
|
|
Node* other = NULL;
|
|
Node* left = NULL;
|
|
Node* clause = NULL;
|
|
bool varonleft = false;
|
|
|
|
if (node == NULL)
|
|
return false;
|
|
|
|
/* get vardata info from different clause's args. */
|
|
if (IsA(node, Var)) {
|
|
Var* var = (Var*)node;
|
|
|
|
/*
|
|
* We probably shouldn't ever see an uplevel Var here, but if we do,
|
|
* return the default selectivity...
|
|
*/
|
|
if (var->varlevelsup == 0 && (context->varRelid == 0 || context->varRelid == (int)var->varno)) {
|
|
examine_variable(context->root, (Node*)var, context->varRelid, &context->filter_vardata);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
} else if (not_clause(node)) {
|
|
clause = (Node*)get_notclausearg((Expr*)node);
|
|
} else if (is_opclause(node)) {
|
|
OpExpr* opclause = (OpExpr*)node;
|
|
Oid opno = opclause->opno;
|
|
|
|
if (RatioType_Join == context->ratiotype) {
|
|
bool join_is_reversed = false;
|
|
get_join_variables(context->root, opclause->args, context->sjinfo, &context->semijoin_vardata1,
|
|
&context->semijoin_vardata2, &join_is_reversed);
|
|
return true;
|
|
} else {
|
|
Oid eqlOprOid = 0;
|
|
List* argList = NULL;
|
|
Oid inputcollid = 0;
|
|
/* only handle = or <> operator */
|
|
if (get_oprrest(opno) == EQSELRETURNOID || get_oprrest(opno) == NEQSELRETURNOID) {
|
|
argList = do_restrictinfo_conversion(
|
|
opclause->args, &eqlOprOid, &inputcollid, get_oprrest(opno) == EQSELRETURNOID);
|
|
}
|
|
|
|
if (argList != NULL)
|
|
args = argList;
|
|
else
|
|
args = opclause->args;
|
|
|
|
return get_restriction_variable(
|
|
context->root, args, context->varRelid, &context->filter_vardata, &other, &varonleft);
|
|
}
|
|
} else if (IsA(node, ScalarArrayOpExpr)) {
|
|
getVardataFromScalarArray(node, context);
|
|
return true;
|
|
} else if (IsA(node, RowCompareExpr)) {
|
|
args = list_make2(linitial(((RowCompareExpr*)node)->largs), linitial(((RowCompareExpr*)node)->rargs));
|
|
return get_restriction_variable(
|
|
context->root, args, context->varRelid, &context->filter_vardata, &other, &varonleft);
|
|
} else if (IsA(node, NullTest)) {
|
|
left = (Node*)((NullTest*)node)->arg;
|
|
examine_variable(context->root, left, context->varRelid, &context->filter_vardata);
|
|
return true;
|
|
} else if (IsA(node, BooleanTest)) {
|
|
left = (Node*)((BooleanTest*)node)->arg;
|
|
examine_variable(context->root, left, context->varRelid, &context->filter_vardata);
|
|
return true;
|
|
} else if (IsA(node, RelabelType)) {
|
|
clause = (Node*)((RelabelType*)node)->arg;
|
|
} else if (IsA(node, CoerceToDomain)) {
|
|
clause = (Node*)((CoerceToDomain*)node)->arg;
|
|
}
|
|
|
|
return get_vardata_for_filter_or_semijoin_walker(clause, context);
|
|
}
|
|
|
|
/*
|
|
* is_rangequery_clause: the clause is range query or not.
|
|
*
|
|
* Parameters:
|
|
* @in clause: the clause node in restrict info
|
|
* @in rinfo: restrict info
|
|
* @in varonleft: identify the var in clause on left or right
|
|
*
|
|
* Returns: bool(true:the clause is range query)
|
|
*/
|
|
static bool is_rangequery_clause(Node* clause, RestrictInfo* rinfo, bool* varonleft)
|
|
{
|
|
bool isrqclause = false;
|
|
|
|
/*
|
|
* See if it looks like a restriction clause with a pseudoconstant on
|
|
* one side. (Anything more complicated than that might not behave in
|
|
* the simple way we are expecting.) Most of the tests here can be
|
|
* done more efficiently with rinfo than without.
|
|
*/
|
|
if (is_opclause(clause) && list_length(((OpExpr*)clause)->args) == 2) {
|
|
OpExpr* expr = (OpExpr*)clause;
|
|
|
|
if (rinfo != NULL) {
|
|
isrqclause = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
|
|
(is_pseudo_constant_clause_relids((Node*)lsecond(expr->args), rinfo->right_relids) ||
|
|
(*varonleft = false,
|
|
is_pseudo_constant_clause_relids((Node*)linitial(expr->args), rinfo->left_relids)));
|
|
} else {
|
|
isrqclause = (NumRelids(clause) == 1) &&
|
|
(is_pseudo_constant_clause((Node*)lsecond(expr->args)) ||
|
|
(*varonleft = false, is_pseudo_constant_clause((Node*)linitial(expr->args))));
|
|
}
|
|
}
|
|
|
|
return isrqclause;
|
|
}
|
|
|
|
/*
|
|
* is_rangequery_contain_scalarop: if the range query contain scalar operator or not.
|
|
*
|
|
* Parameters:
|
|
* @in clause: the clause node in restrict info
|
|
* @in rinfo: restrict info
|
|
*
|
|
* Returns: bool(true:the range query contain scalar operator)
|
|
*/
|
|
static bool is_rangequery_contain_scalarop(Node* clause, RestrictInfo* rinfo)
|
|
{
|
|
bool varonleft = false;
|
|
|
|
if (is_rangequery_clause(clause, rinfo, &varonleft)) {
|
|
OpExpr* expr = (OpExpr*)clause;
|
|
|
|
if ((F_SCALARLTSEL == get_oprrest(expr->opno)) || (F_SCALARGTSEL == get_oprrest(expr->opno)))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* set_varratio_for_rqclause: set var ratio for range query clause.
|
|
*
|
|
* Parameters:
|
|
* @in root: plan info node
|
|
* @in varlist: the range query clause contain many vars
|
|
* @in varRelid: varRelid is either 0 or a rangetable index, When varRelid is not 0,
|
|
* only variables belonging to that relation are considered in computing selectivity.
|
|
* @in ratio: join ratio according to estimation
|
|
* @in sjinfo: the join info for current relation join with others
|
|
*
|
|
* Returns: void
|
|
*/
|
|
static void set_varratio_for_rqclause(
|
|
PlannerInfo* root, List* varlist, int varRelid, double ratio, SpecialJoinInfo* sjinfo)
|
|
{
|
|
ListCell* lc = NULL;
|
|
|
|
foreach (lc, varlist) {
|
|
VariableStatData vardata;
|
|
Node* node = (Node*)lfirst(lc);
|
|
|
|
examine_variable(root, node, varRelid, &vardata);
|
|
|
|
if (sjinfo == NULL)
|
|
set_varratio_after_calc_selectivity(&vardata, RatioType_Filter, ratio, NULL);
|
|
else
|
|
set_varratio_after_calc_selectivity(&vardata, RatioType_Join, ratio, sjinfo);
|
|
|
|
ReleaseVariableStats(vardata);
|
|
}
|
|
}
|
|
|
|
#ifndef ENABLE_MULTIPLE_NODES
|
|
/*
|
|
* find_single_rel_for_clauses
|
|
* Examine each clause in 'clauses' and determine if all clauses
|
|
* reference only a single relation. If so return that relation,
|
|
* otherwise return NULL.
|
|
*/
|
|
static RelOptInfo* find_single_rel_for_clauses(PlannerInfo* root, const List* clauses)
|
|
{
|
|
int lastrelid = 0;
|
|
ListCell* l;
|
|
|
|
foreach (l, clauses) {
|
|
RestrictInfo *rinfo = (RestrictInfo *)lfirst(l);
|
|
int relid;
|
|
|
|
/*
|
|
* If we have a list of bare clauses rather than RestrictInfos, we
|
|
* could pull out their relids the hard way with pull_varnos().
|
|
* However, currently the extended-stats machinery won't do anything
|
|
* with non-RestrictInfo clauses anyway, so there's no point in
|
|
* spending extra cycles; just fail if that's what we have.
|
|
*
|
|
* An exception to that rule is if we have a bare BoolExpr AND clause.
|
|
* We treat this as a special case because the restrictinfo machinery
|
|
* doesn't build RestrictInfos on top of AND clauses.
|
|
*/
|
|
if (rinfo != NULL && IsA(rinfo, BoolExpr) && ((const BoolExpr *)rinfo)->boolop == AND_EXPR) {
|
|
RelOptInfo *rel;
|
|
|
|
rel = find_single_rel_for_clauses(root, ((BoolExpr *)rinfo)->args);
|
|
if (rel == NULL) {
|
|
return NULL;
|
|
}
|
|
if (lastrelid == 0) {
|
|
lastrelid = rel->relid;
|
|
continue;
|
|
}
|
|
if (lastrelid != 0 && (int)(rel->relid) != lastrelid) {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
if (!IsA(rinfo, RestrictInfo)) {
|
|
return NULL;
|
|
}
|
|
|
|
if (bms_is_empty(rinfo->clause_relids)) {
|
|
continue; /* we can ignore variable-free clauses */
|
|
}
|
|
if (!bms_get_singleton_member(rinfo->clause_relids, &relid)) {
|
|
return NULL; /* multiple relations in this clause */
|
|
}
|
|
if (lastrelid == 0) {
|
|
lastrelid = relid; /* first clause referencing a relation */
|
|
}
|
|
if (lastrelid != 0 && relid != lastrelid) {
|
|
return NULL; /* relation not same as last one */
|
|
}
|
|
}
|
|
|
|
if (lastrelid != 0) {
|
|
return find_base_rel(root, lastrelid);
|
|
}
|
|
|
|
return NULL; /* no clauses */
|
|
}
|
|
|
|
/*
|
|
* calculate_selectivity_dependency
|
|
* Calculate selectivity through functional dependency statistics
|
|
*/
|
|
static Selectivity calculate_selectivity_dependency(bool flag_dependency, PlannerInfo *root, ES_SELECTIVITY *es,
|
|
int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, List** clauselist)
|
|
{
|
|
if (!flag_dependency) {
|
|
return 1.0;
|
|
}
|
|
|
|
RelOptInfo *rel = NULL;
|
|
Selectivity sel = 1.0;
|
|
ListCell *l = NULL;
|
|
|
|
rel = find_single_rel_for_clauses(root, es->unmatched_clause_group);
|
|
if (rel && rel->rtekind == RTE_RELATION) {
|
|
int listidx = 0;
|
|
Bitmapset *estimatedclauses = NULL;
|
|
|
|
rel->statlist = es->statlist;
|
|
sel = dependencies_clauselist_selectivity(root, es->unmatched_clause_group, varRelid, jointype, sjinfo, rel,
|
|
&estimatedclauses);
|
|
*clauselist = NULL;
|
|
foreach (l, es->unmatched_clause_group) {
|
|
Node *clause = (Node *)lfirst(l);
|
|
if (!bms_is_member(listidx, estimatedclauses)) {
|
|
*clauselist = lappend(*clauselist, clause);
|
|
}
|
|
listidx++;
|
|
}
|
|
}
|
|
return sel;
|
|
}
|
|
#endif
|