Files
openGauss-server/src/gausskernel/optimizer/prep/prepqual.cpp
2023-02-27 04:05:03 -08:00

644 lines
22 KiB
C++

/* -------------------------------------------------------------------------
*
* prepqual.cpp
* Routines for preprocessing qualification expressions
*
*
* The parser regards AND and OR as purely binary operators, so a qual like
* (A = 1) OR (A = 2) OR (A = 3) ...
* will produce a nested parsetree
* (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
* In reality, the optimizer and executor regard AND and OR as N-argument
* operators, so this tree can be flattened to
* (OR (A = 1) (A = 2) (A = 3) ...)
*
* Formerly, this module was responsible for doing the initial flattening,
* but now we leave it to eval_const_expressions to do that since it has to
* make a complete pass over the expression tree anyway. Instead, we just
* have to ensure that our manipulations preserve AND/OR flatness.
* pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
* tree after local transformations that might introduce nested AND/ORs.
*
*
* Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/gausskernel/optimizer/prep/prepqual.cpp
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "knl/knl_variable.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/prep.h"
#include "utils/lsyscache.h"
static Expr* find_duplicate_ors(Expr* qual, bool is_check);
static Expr* process_duplicate_ors(List* orlist);
/*
* negate_clause
* Negate a Boolean expression.
*
* Input is a clause to be negated (e.g., the argument of a NOT clause).
* Returns a new clause equivalent to the negation of the given clause.
*
* Although this can be invoked on its own, it's mainly intended as a helper
* for eval_const_expressions(), and that context drives several design
* decisions. In particular, if the input is already AND/OR flat, we must
* preserve that property. We also don't bother to recurse in situations
* where we can assume that lower-level executions of eval_const_expressions
* would already have simplified sub-clauses of the input.
*
* The difference between this and a simple make_notclause() is that this
* tries to get rid of the NOT node by logical simplification. It's clearly
* always a win if the NOT node can be eliminated altogether. However, our
* use of DeMorgan's laws could result in having more NOT nodes rather than
* fewer. We do that unconditionally anyway, because in WHERE clauses it's
* important to expose as much top-level AND/OR structure as possible.
* Also, eliminating an intermediate NOT may allow us to flatten two levels
* of AND or OR together that we couldn't have otherwise. Finally, one of
* the motivations for doing this is to ensure that logically equivalent
* expressions will be seen as physically equal(), so we should always apply
* the same transformations.
*/
Node* negate_clause(Node* node)
{
if (node == NULL) /* should not happen */
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
(errmsg("can't negate an empty subexpression"))));
switch (nodeTag(node)) {
case T_Const: {
Const* c = (Const*)node;
/* NOT NULL is still NULL */
if (c->constisnull)
return makeBoolConst(false, true);
/* otherwise pretty easy */
return makeBoolConst(!DatumGetBool(c->constvalue), false);
} break;
case T_OpExpr: {
/*
* Negate operator if possible: (NOT (< A B)) => (>= A B)
*/
OpExpr* opexpr = (OpExpr*)node;
Oid negator = get_negator(opexpr->opno);
if (negator) {
OpExpr* newopexpr = makeNode(OpExpr);
newopexpr->opno = negator;
newopexpr->opfuncid = InvalidOid;
newopexpr->opresulttype = opexpr->opresulttype;
newopexpr->opretset = opexpr->opretset;
newopexpr->opcollid = opexpr->opcollid;
newopexpr->inputcollid = opexpr->inputcollid;
newopexpr->args = opexpr->args;
newopexpr->location = opexpr->location;
return (Node*)newopexpr;
}
} break;
case T_ScalarArrayOpExpr: {
/*
* Negate a ScalarArrayOpExpr if its operator has a negator;
* for example x = ANY (list) becomes x <> ALL (list)
*/
ScalarArrayOpExpr* saopexpr = (ScalarArrayOpExpr*)node;
Oid negator = get_negator(saopexpr->opno);
if (negator) {
ScalarArrayOpExpr* newopexpr = makeNode(ScalarArrayOpExpr);
newopexpr->opno = negator;
newopexpr->opfuncid = InvalidOid;
newopexpr->useOr = !saopexpr->useOr;
newopexpr->inputcollid = saopexpr->inputcollid;
newopexpr->args = saopexpr->args;
newopexpr->location = saopexpr->location;
return (Node*)newopexpr;
}
} break;
case T_BoolExpr: {
BoolExpr* expr = (BoolExpr*)node;
switch (expr->boolop) {
/* --------------------
* Apply DeMorgan's Laws:
* (NOT (AND A B)) => (OR (NOT A) (NOT B))
* (NOT (OR A B)) => (AND (NOT A) (NOT B))
* i.e., swap AND for OR and negate each subclause.
*
* If the input is already AND/OR flat and has no NOT
* directly above AND or OR, this transformation preserves
* those properties. For example, if no direct child of
* the given AND clause is an AND or a NOT-above-OR, then
* the recursive calls of negate_clause() can't return any
* OR clauses. So we needn't call pull_ors() before
* building a new OR clause. Similarly for the OR case.
* --------------------
*/
case AND_EXPR: {
List* nargs = NIL;
ListCell* lc = NULL;
foreach (lc, expr->args) {
nargs = lappend(nargs, negate_clause((Node*)lfirst(lc)));
}
return (Node*)make_orclause(nargs);
} break;
case OR_EXPR: {
List* nargs = NIL;
ListCell* lc = NULL;
foreach (lc, expr->args) {
nargs = lappend(nargs, negate_clause((Node*)lfirst(lc)));
}
return (Node*)make_andclause(nargs);
} break;
case NOT_EXPR:
/*
* NOT underneath NOT: they cancel. We assume the
* input is already simplified, so no need to recurse.
*/
return (Node*)linitial(expr->args);
default:
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_OPTIMIZER_INCONSISTENT_STATE),
(errmsg("unrecognized boolop: %d", (int)expr->boolop))));
break;
}
} break;
case T_NullTest: {
NullTest* expr = (NullTest*)node;
/*
* In the rowtype case, the two flavors of NullTest are *not*
* logical inverses, so we can't simplify. But it does work
* for scalar datatypes.
*/
if (!expr->argisrow) {
NullTest* newexpr = makeNode(NullTest);
newexpr->arg = expr->arg;
newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ? IS_NOT_NULL : IS_NULL);
newexpr->argisrow = expr->argisrow;
return (Node*)newexpr;
}
} break;
case T_BooleanTest: {
BooleanTest* expr = (BooleanTest*)node;
BooleanTest* newexpr = makeNode(BooleanTest);
newexpr->arg = expr->arg;
switch (expr->booltesttype) {
case IS_TRUE:
newexpr->booltesttype = IS_NOT_TRUE;
break;
case IS_NOT_TRUE:
newexpr->booltesttype = IS_TRUE;
break;
case IS_FALSE:
newexpr->booltesttype = IS_NOT_FALSE;
break;
case IS_NOT_FALSE:
newexpr->booltesttype = IS_FALSE;
break;
case IS_UNKNOWN:
newexpr->booltesttype = IS_NOT_UNKNOWN;
break;
case IS_NOT_UNKNOWN:
newexpr->booltesttype = IS_UNKNOWN;
break;
default: {
ereport(ERROR,
(errmodule(MOD_OPT),
errcode(ERRCODE_UNRECOGNIZED_NODE_TYPE),
errmsg("unrecognized bool test type: %d", (int)expr->booltesttype)));
} break;
}
return (Node*)newexpr;
} break;
default:
/* else fall through */
break;
}
/*
* Otherwise we don't know how to simplify this, so just tack on an
* explicit NOT node.
*/
return (Node*)make_notclause((Expr*)node);
}
/*
* canonicalize_qual
* Convert a qualification expression to the most useful form.
*
* The name of this routine is a holdover from a time when it would try to
* force the expression into canonical AND-of-ORs or OR-of-ANDs form.
* Eventually, we recognized that that had more theoretical purity than
* actual usefulness, and so now the transformation doesn't involve any
* notion of reaching a canonical form.
*
* NOTE: we assume the input has already been through eval_const_expressions
* and therefore possesses AND/OR flatness. Formerly this function included
* its own flattening logic, but that requires a useless extra pass over the
* tree.
*
* Returns the modified qualification.
*/
Expr* canonicalize_qual(Expr* qual, bool is_check)
{
Expr* newqual = NULL;
/* Quick exit for empty qual */
if (qual == NULL)
return NULL;
/*
* Pull up redundant subclauses in OR-of-AND trees. We do this only
* within the top-level AND/OR structure; there's no point in looking
* deeper.
*/
newqual = find_duplicate_ors(qual, is_check);
return newqual;
}
/*
* pull_ands
* Recursively flatten nested AND clauses into a single and-clause list.
*
* Input is the arglist of an AND clause.
* Returns the rebuilt arglist (note original list structure is not touched).
*/
List* pull_ands(List* andlist)
{
List* out_list = NIL;
ListCell* arg = NULL;
foreach (arg, andlist) {
Node* subexpr = (Node*)lfirst(arg);
/*
* Note: we can destructively concat the subexpression's arglist
* because we know the recursive invocation of pull_ands will have
* built a new arglist not shared with any other expr. Otherwise we'd
* need a list_copy here.
*/
if (and_clause(subexpr))
out_list = list_concat(out_list, pull_ands(((BoolExpr*)subexpr)->args));
else
out_list = lappend(out_list, subexpr);
}
return out_list;
}
/*
* pull_ors
* Recursively flatten nested OR clauses into a single or-clause list.
*
* Input is the arglist of an OR clause.
* Returns the rebuilt arglist (note original list structure is not touched).
*/
List* pull_ors(List* orlist)
{
List* out_list = NIL;
ListCell* arg = NULL;
foreach (arg, orlist) {
Node* subexpr = (Node*)lfirst(arg);
/*
* Note: we can destructively concat the subexpression's arglist
* because we know the recursive invocation of pull_ors will have
* built a new arglist not shared with any other expr. Otherwise we'd
* need a list_copy here.
*/
if (or_clause(subexpr))
out_list = list_concat(out_list, pull_ors(((BoolExpr*)subexpr)->args));
else
out_list = lappend(out_list, subexpr);
}
return out_list;
}
/**
* call it on in Recurse find_duplicate_ors
* @return NULL or true,
* NULL means continue the loop, true means finish and return the value
*/
static Expr *ReduceConstWithinOr(Const *var, bool is_check)
{
Assert(var != NULL && IsA(var, Const));
if (var->constisnull) {
if (is_check) {
/* within OR in CHECK, Constant NULL, reduces to TRUE */
return (Expr *) makeBoolConst(true, false);
} else {
/* Within OR in WHERE, drop constant NULL */
return NULL;
}
}
if (!DatumGetBool(var->constvalue)) {
/* Within OR, drop constant FALSE */
return NULL;
}
/* Within OR, save Constant TRUE */
return (Expr *) var;
}
/**
* call it on in Recurse find_duplicate_ors
* @return NULL or false,
* NULL means continue the loop, false means finish and return the value
*/
static Expr *ReduceConstWithinAnd(Const *var, bool is_check)
{
if (var->constisnull) {
if (is_check) {
/* Within AND in CHECK, drop constant NULL */
return NULL;
} else {
/* Within AND in WHERE, constant NULL, reduces to FALSE */
return (Expr *) makeBoolConst(false, false);
}
}
if (DatumGetBool(var->constvalue)) {
/* Within AND, drop constant TRUE */
return NULL;
}
/* Constant FALSE, save it */
return (Expr *) var;
}
/**
* call it on in Recurse find_duplicate_ors
* @return true, false, or the @param var,
* constant NULL:Bool reduces to true iff is_check else false, others return itself
*/
static Expr *ReduceConstWithinOther(Const *var, bool is_check)
{
if (var == NULL || !IsA(var, Const)) {
return (Expr *)var;
}
if (var->constisnull) {
if (is_check) {
/* in CHECK, constant NULL, reduces to TRUE */
return (Expr *) makeBoolConst(true, false);
} else {
/* in WHERE, constant NULL, reduces to FALSE */
return (Expr *) makeBoolConst(false, false);
}
}
/* Constant FALSE or TRUE, save it */
return (Expr *)var;
}
/* --------------------
* The following code attempts to apply the inverse OR distributive law:
* ((A AND B) OR (A AND C)) => (A AND (B OR C))
* That is, locate OR clauses in which every subclause contains an
* identical term, and pull out the duplicated terms.
*
* This may seem like a fairly useless activity, but it turns out to be
* applicable to many machine-generated queries, and there are also queries
* in some of the TPC benchmarks that need it. This was in fact almost the
* sole useful side-effect of the old prepqual code that tried to force
* the query into canonical AND-of-ORs form: the canonical equivalent of
* ((A AND B) OR (A AND C))
* is
* ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
* which the code was able to simplify to
* (A AND (A OR C) AND (B OR A) AND (B OR C))
* thus successfully extracting the common condition A --- but at the cost
* of cluttering the qual with many redundant clauses.
* --------------------
*/
/*
* find_duplicate_ors
* Given a qualification tree with the NOTs pushed down, search for
* OR clauses to which the inverse OR distributive law might apply.
* Only the top-level AND/OR structure is searched.
*
* Returns the modified qualification. AND/OR flatness is preserved.
*/
static Expr* find_duplicate_ors(Expr* qual, bool is_check)
{
if (or_clause((Node*)qual)) {
List* orlist = NIL;
ListCell* temp = NULL;
/* Recurse */
foreach (temp, ((BoolExpr*)qual)->args) {
Expr *arg = find_duplicate_ors((Expr*)lfirst(temp), is_check);
if (arg && IsA(arg, Const)) {
Expr *res = ReduceConstWithinOr((Const *)arg, is_check);
/* NULL means continue */
if (res == NULL) {
continue;
} else {
return res;
}
}
orlist = lappend(orlist, arg);
}
/* Flatten any ORs pulled up to just below here */
List* temp_orlist = pull_ors(orlist);
list_free(orlist);
orlist = temp_orlist;
/* OR of no inputs reduces to FALSE */
if (orlist == NIL) {
return (Expr *) makeBoolConst(false, false);
}
/* Single-expression OR just reduces to that expression */
if (list_length(orlist) == 1) {
return (Expr*)linitial(orlist);
}
/* Now we can look for duplicate ORs */
return process_duplicate_ors(orlist);
} else if (and_clause((Node*)qual)) {
List* andlist = NIL;
ListCell* temp = NULL;
/* Recurse */
foreach (temp, ((BoolExpr*)qual)->args) {
Expr *arg = find_duplicate_ors((Expr*)lfirst(temp), is_check);
if (arg && IsA(arg, Const)) {
Expr *res = ReduceConstWithinAnd((Const *)arg, is_check);
/* NULL means continue */
if (res == NULL) {
continue;
} else {
return res;
}
}
andlist = lappend(andlist, arg);
}
/* Flatten any ANDs introduced just below here */
List* temp_andlist = pull_ands(andlist);
list_free(andlist);
andlist = temp_andlist;
/* AND of no inputs reduces to TRUE */
if (andlist == NIL) {
return (Expr *) makeBoolConst(true, false);
}
/* Single-expression AND just reduces to that expression */
if (list_length(andlist) == 1) {
return (Expr *) linitial(andlist);
}
/* The AND list can't get shorter, so result is always an AND */
return make_andclause(andlist);
} else {
return ReduceConstWithinOther((Const *)qual, is_check);
}
}
/*
* process_duplicate_ors
* Given a list of exprs which are ORed together, try to apply
* the inverse OR distributive law.
*
* Returns the resulting expression (could be an AND clause, an OR
* clause, or maybe even a single subexpression).
*/
static Expr* process_duplicate_ors(List* orlist)
{
List* reference = NIL;
int num_subclauses = 0;
List* winners = NIL;
List* neworlist = NIL;
ListCell* temp = NULL;
Assert(orlist != NULL && list_length(orlist) > 1);
/*
* Choose the shortest AND clause as the reference list --- obviously, any
* subclause not in this clause isn't in all the clauses. If we find a
* clause that's not an AND, we can treat it as a one-element AND clause,
* which necessarily wins as shortest.
*/
foreach (temp, orlist) {
Expr* clause = (Expr*)lfirst(temp);
if (and_clause((Node*)clause)) {
List* subclauses = ((BoolExpr*)clause)->args;
int nclauses = list_length(subclauses);
if (reference == NIL || nclauses < num_subclauses) {
reference = subclauses;
num_subclauses = nclauses;
}
} else {
reference = list_make1(clause);
break;
}
}
/*
* Just in case, eliminate any duplicates in the reference list.
*/
reference = list_union(NIL, reference);
/*
* Check each element of the reference list to see if it's in all the OR
* clauses. Build a new list of winning clauses.
*/
winners = NIL;
foreach (temp, reference) {
Expr* refclause = (Expr*)lfirst(temp);
bool win = true;
ListCell* temp2 = NULL;
foreach (temp2, orlist) {
Expr* clause = (Expr*)lfirst(temp2);
if (and_clause((Node*)clause)) {
if (!list_member(((BoolExpr*)clause)->args, refclause)) {
win = false;
break;
}
} else {
if (!equal(refclause, clause)) {
win = false;
break;
}
}
}
if (win)
winners = lappend(winners, refclause);
}
/*
* If no winners, we can't transform the OR
*/
if (winners == NIL)
return make_orclause(orlist);
/*
* Generate new OR list consisting of the remaining sub-clauses.
*
* If any clause degenerates to empty, then we have a situation like (A
* AND B) OR (A), which can be reduced to just A --- that is, the
* additional conditions in other arms of the OR are irrelevant.
*
* Note that because we use list_difference, any multiple occurrences of a
* winning clause in an AND sub-clause will be removed automatically.
*/
neworlist = NIL;
foreach (temp, orlist) {
Expr* clause = (Expr*)lfirst(temp);
if (and_clause((Node*)clause)) {
List* subclauses = ((BoolExpr*)clause)->args;
subclauses = list_difference(subclauses, winners);
if (subclauses != NIL) {
if (list_length(subclauses) == 1)
neworlist = lappend(neworlist, linitial(subclauses));
else
neworlist = lappend(neworlist, make_andclause(subclauses));
} else {
neworlist = NIL; /* degenerate case, see above */
break;
}
} else {
if (!list_member(winners, clause))
neworlist = lappend(neworlist, clause);
else {
neworlist = NIL; /* degenerate case, see above */
break;
}
}
}
/*
* Append reduced OR to the winners list, if it's not degenerate, handling
* the special case of one element correctly (can that really happen?).
* Also be careful to maintain AND/OR flatness in case we pulled up a
* sub-sub-OR-clause.
*/
if (neworlist != NIL) {
if (list_length(neworlist) == 1)
winners = lappend(winners, linitial(neworlist));
else
winners = lappend(winners, make_orclause(pull_ors(neworlist)));
}
/*
* And return the constructed AND clause, again being wary of a single
* element and AND/OR flatness.
*/
if (list_length(winners) == 1)
return (Expr*)linitial(winners);
else
return make_andclause(pull_ands(winners));
}