!2676 支持or表达式的谓词提取下推

Merge pull request !2676 from 陈志凯/master
2022-12-23 06:47:32 +00:00
parent 95f017db04 b15c245a18
commit c4ea9be231
15 changed files with 493 additions and 207 deletions
--- a/src/common/backend/utils/misc/guc/guc_sql.cpp
+++ b/src/common/backend/utils/misc/guc/guc_sql.cpp
@ -314,6 +314,7 @@ static const struct config_enum_entry sql_beta_options[] = {
    {"predpush_same_level", PREDPUSH_SAME_LEVEL, false},
    {"partition_fdw_on", PARTITION_FDW_ON, false},
    {"disable_bitmap_cost_with_lossy_pages", DISABLE_BITMAP_COST_WITH_LOSSY_PAGES, false},
+    {"extract_pushdown_or_clause", EXTRACT_PUSHDOWN_OR_CLAUSE, false},
    {NULL, 0, false}
 };

@ -3971,4 +3972,4 @@ static void assign_sql_ignore_strategy(const char* newval, void* extra) {
        }
    }
    u_sess->utils_cxt.sql_ignore_strategy_val = sql_ignore_strategy[0].val;
-}
+}
--- a/src/gausskernel/optimizer/path/Makefile
+++ b/src/gausskernel/optimizer/path/Makefile
@ -20,11 +20,11 @@ endif

 ifeq ($(enable_multiple_nodes), yes)
 OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
-       joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o streampath_base.o \
+       joinpath.o joinrels.o pathkeys.o tidpath.o streampath_base.o \
       es_selectivity.o
 else
 OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
-       joinpath.o joinrels.o orindxpath.o pathkeys.o tidpath.o	\
+       joinpath.o joinrels.o pathkeys.o tidpath.o \
       pgxcpath_single.o streampath_single.o streampath_base.o es_selectivity.o
 endif

--- a/src/gausskernel/optimizer/path/allpaths.cpp
+++ b/src/gausskernel/optimizer/path/allpaths.cpp
@ -998,16 +998,6 @@ static void set_plain_rel_size(PlannerInfo* root, RelOptInfo* rel, RangeTblEntry
    if (rte->tablesample == NULL) {
        /* Mark rel with estimated output rows, width, etc */
        set_baserel_size_estimates(root, rel);
-
-        /*
-         * Check to see if we can extract any restriction conditions from join
-         * quals that are OR-of-AND structures.  If so, add them to the rel's
-         * restriction list, and redo the above steps.
-         */
-        if (create_or_index_quals(root, rel)) {
-            check_partial_indexes(root, rel);
-            set_baserel_size_estimates(root, rel);
-        }
    } else {
        /* Sampled relation */
        set_tablesample_rel_size(root, rel, rte);
--- a/src/gausskernel/optimizer/path/indxpath.cpp
+++ b/src/gausskernel/optimizer/path/indxpath.cpp
@ -1630,7 +1630,7 @@ static Path* choose_bitmap_and(PlannerInfo* root, RelOptInfo* rel, List* paths,
     * we can remove this limitation.  (But note that this also defends
     * against flat-out duplicate input paths, which can happen because
     * match_join_clauses_to_index will find the same OR join clauses that
-     * create_or_index_quals has pulled OR restriction clauses out of.)
+     * extract_restriction_or_clauses has pulled OR restriction clauses out of.)
     *
     * For the same reason, we reject AND combinations in which an index
     * predicate clause duplicates another clause.	Here we find it necessary
--- a/src/gausskernel/optimizer/path/orindxpath.cpp
+++ b/src/gausskernel/optimizer/path/orindxpath.cpp
@ -1,183 +0,0 @@
-/* -------------------------------------------------------------------------
- *
- * orindxpath.cpp
- *	  Routines to find index paths that match a set of OR clauses
- *
- * Portions Copyright (c) 2020 Huawei Technologies Co.,Ltd.
- * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  src/gausskernel/optimizer/path/orindxpath.cpp
- *
- * -------------------------------------------------------------------------
- */
-#include "postgres.h"
-#include "knl/knl_variable.h"
-
-#include "optimizer/cost.h"
-#include "optimizer/paths.h"
-#include "optimizer/restrictinfo.h"
-
-/* ----------
- * create_or_index_quals
- *	  Examine join OR-of-AND quals to see if any useful restriction OR
- *	  clauses can be extracted.  If so, add them to the query.
- *
- * Although a join clause must reference other relations overall,
- * an OR of ANDs clause might contain sub-clauses that reference just this
- * relation and can be used to build a restriction clause.
- * For example consider
- *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
- * We can transform this into
- *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
- *			AND (a.x = 42 OR a.x = 44)
- *			AND (b.y = 43 OR b.z = 45);
- * which opens the potential to build OR indexscans on a and b.  In essence
- * this is a partial transformation to CNF (AND of ORs format).  It is not
- * complete, however, because we do not unravel the original OR --- doing so
- * would usually bloat the qualification expression to little gain.
- *
- * The added quals are partially redundant with the original OR, and therefore
- * will cause the size of the joinrel to be underestimated when it is finally
- * formed.	(This would be true of a full transformation to CNF as well; the
- * fault is not really in the transformation, but in clauselist_selectivity's
- * inability to recognize redundant conditions.)  To minimize the collateral
- * damage, we want to minimize the number of quals added.  Therefore we do
- * not add every possible extracted restriction condition to the query.
- * Instead, we search for the single restriction condition that generates
- * the most useful (cheapest) OR indexscan, and add only that condition.
- * This is a pretty ad-hoc heuristic, but quite useful.
- *
- * We can then compensate for the redundancy of the added qual by poking
- * the recorded selectivity of the original OR clause, thereby ensuring
- * the added qual doesn't change the estimated size of the joinrel when
- * it is finally formed.  This is a MAJOR HACK: it depends on the fact
- * that clause selectivities are cached and on the fact that the same
- * RestrictInfo node will appear in every joininfo list that might be used
- * when the joinrel is formed.	And it probably isn't right in cases where
- * the size estimation is nonlinear (i.e., outer and IN joins).  But it
- * beats not doing anything.
- *
- * NOTE: one might think this messiness could be worked around by generating
- * the indexscan path with a small path->rows value, and not touching the
- * rel's baserestrictinfo or rel->rows.  However, that does not work.
- * The optimizer's fundamental design assumes that every general-purpose
- * Path for a given relation generates the same number of rows.  Without
- * this assumption we'd not be able to optimize solely on the cost of Paths,
- * but would have to take number of output rows into account as well.
- * (The parameterized-paths stuff almost fixes this, but not quite...)
- *
- * 'rel' is the relation entry for which quals are to be created
- *
- * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
- * If no quals available, returns FALSE and doesn't change rel.
- *
- * Note: check_partial_indexes() must have been run previously.
- * ----------
- */
-bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel)
-{
-    BitmapOrPath* bestpath = NULL;
-    RestrictInfo* bestrinfo = NULL;
-    List* newrinfos = NIL;
-    RestrictInfo* or_rinfo = NULL;
-    Selectivity or_selec, orig_selec;
-    ListCell* i = NULL;
-
-    /* Skip the whole mess if no indexes */
-    if (rel->indexlist == NIL)
-        return false;
-
-    /*
-     * Find potentially interesting OR joinclauses.  We can use any joinclause
-     * that is considered safe to move to this rel by the parameterized-path
-     * machinery, even though what we are going to do with it is not exactly a
-     * parameterized path.
-     */
-    foreach (i, rel->joininfo) {
-        RestrictInfo* rinfo = (RestrictInfo*)lfirst(i);
-
-        if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rel->relid)) {
-            /*
-             * Use the generate_bitmap_or_paths() machinery to estimate the
-             * value of each OR clause.  We can use regular restriction
-             * clauses along with the OR clause contents to generate
-             * indexquals.	We pass restriction_only = true so that any
-             * sub-clauses that are actually joins will be ignored.
-             */
-            List* orpaths = NIL;
-            ListCell* k = NULL;
-
-            orpaths = generate_bitmap_or_paths(root, rel, list_make1(rinfo), rel->baserestrictinfo, true);
-
-            IndexFeature indexFeature = getIndexFeature(rel->isPartitionedTable, (rel->bucketInfo != NULL));
-            if (indexFeature != NONFEATURED_INDEX) {
-                orpaths = list_concat(
-                    orpaths,
-                    GenerateBitmapOrPathsWithFeaturedIndex(root, rel, list_make1(rinfo), rel->baserestrictinfo, true,
-                        indexFeature));
-            }
-
-            /* Locate the cheapest OR path */
-            foreach (k, orpaths) {
-                BitmapOrPath* path = (BitmapOrPath*)lfirst(k);
-
-                AssertEreport(IsA(path, BitmapOrPath), MOD_OPT, "Restriction information is incorrect");
-                if (bestpath == NULL || path->path.total_cost < bestpath->path.total_cost) {
-                    bestpath = path;
-                    bestrinfo = rinfo;
-                }
-            }
-        }
-    }
-
-    /* Fail if no suitable clauses found */
-    if (bestpath == NULL)
-        return false;
-
-    /*
-     * Convert the path's indexclauses structure to a RestrictInfo tree. We
-     * include any partial-index predicates so as to get a reasonable
-     * representation of what the path is actually scanning.
-     */
-    newrinfos = make_restrictinfo_from_bitmapqual((Path*)bestpath, true, true);
-
-    /* It's possible we get back something other than a single OR clause */
-    if (list_length(newrinfos) != 1)
-        return false;
-    or_rinfo = (RestrictInfo*)linitial(newrinfos);
-    AssertEreport(IsA(or_rinfo, RestrictInfo), MOD_OPT, "Restriction clause does not contain OR");
-    if (!restriction_is_or_clause(or_rinfo))
-        return false;
-
-    /*
-     * OK, add it to the rel's restriction list.
-     */
-    rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
-    rel->baserestrict_min_security = Min(rel->baserestrict_min_security, or_rinfo->security_level);
-
-    /*
-     * Adjust the original OR clause's cached selectivity to compensate for
-     * the selectivity of the added (but redundant) lower-level qual. This
-     * should result in the join rel getting approximately the same rows
-     * estimate as it would have gotten without all these shenanigans. (XXX
-     * major hack alert ... this depends on the assumption that the
-     * selectivity will stay cached ...)
-     * we don't need cache the selectivity because the index's selectivity is not accurate.
-     */
-    or_selec = clause_selectivity(root, (Node*)or_rinfo, 0, JOIN_INNER, NULL, false);
-    if (or_selec > 0 && or_selec < 1) {
-        orig_selec = clause_selectivity(root, (Node*)bestrinfo, 0, JOIN_INNER, NULL, false);
-        bestrinfo->norm_selec = orig_selec / or_selec;
-        /* clamp result to sane range */
-        if (bestrinfo->norm_selec > 1)
-            bestrinfo->norm_selec = 1;
-        /* It isn't an outer join clause, so no need to adjust outer_selec */
-    }
-
-    /* Tell caller to recompute partial index status and rowcount estimate */
-    return true;
-}
-
--- a/src/gausskernel/optimizer/plan/planmain.cpp
+++ b/src/gausskernel/optimizer/plan/planmain.cpp
@ -33,6 +33,7 @@
 #include "optimizer/planmain.h"
 #include "optimizer/randomplan.h"
 #include "optimizer/tlist.h"
+#include "optimizer/orclauses.h"
 #include "utils/selfuncs.h"

 /* Local functions */
@ -235,6 +236,14 @@ RelOptInfo* query_planner(PlannerInfo* root, List* tlist,
     */
    add_placeholders_to_base_rels(root);

+    /*
+     * Look for join OR clauses that we can extract single-relation
+     * restriction OR clauses from.
+     */
+    if (ENABLE_SQL_BETA_FEATURE(EXTRACT_PUSHDOWN_OR_CLAUSE)) {
+        extract_restriction_or_clauses(root);
+    }
+
    /*
     * We should now have size estimates for every actual table involved in
     * the query, and we also know which if any have been deleted from the
--- a/src/gausskernel/optimizer/util/Makefile
+++ b/src/gausskernel/optimizer/util/Makefile
@ -19,7 +19,7 @@ endif

 SUBDIRS	= learn

-OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \
+OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o orclauses.o \
       relnode.o restrictinfo.o tlist.o var.o pruningboundary.o pgxcship.o pruning.o randomplan.o optimizerdebug.o planmem_walker.o \
       nodegroups.o plananalyzer.o optcommon.o dataskew.o joinskewinfo.o autoanalyzer.o bucketinfo.o bucketpruning.o subpartitionpruning.o

--- a/src/gausskernel/optimizer/util/orclauses.cpp
+++ b/src/gausskernel/optimizer/util/orclauses.cpp
@ -0,0 +1,333 @@
+/*-------------------------------------------------------------------------
+ *
+ * orclauses.cpp
+ *      Routines to extract restriction OR clauses from join OR clauses
+ *
+ * Portions Copyright (c) 2022 Huawei Technologies Co.,Ltd.
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *      src/gausskernel/optimizer/util/orclauses.cpp
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/orclauses.h"
+#include "optimizer/restrictinfo.h"
+
+static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
+static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
+static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, Expr *orclause, RestrictInfo *join_or_rinfo);
+
+/*
+ * extract_restriction_or_clauses
+ *      Examine join OR-of-AND clauses to see if any useful restriction OR
+ *      clauses can be extracted.  If so, add them to the query.
+ *
+ * Although a join clause must reference multiple relations overall,
+ * an OR of ANDs clause might contain sub-clauses that reference just one
+ * relation and can be used to build a restriction clause for that rel.
+ * For example consider
+ *        WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
+ * We can transform this into
+ *        WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
+ *            AND (a.x = 42 OR a.x = 44)
+ *            AND (b.y = 43 OR b.z = 45);
+ * which allows the latter clauses to be applied during the scans of a and b,
+ * perhaps as index qualifications, and in any case reducing the number of
+ * rows arriving at the join.  In essence this is a partial transformation to
+ * CNF (AND of ORs format).  It is not complete, however, because we do not
+ * unravel the original OR --- doing so would usually bloat the qualification
+ * expression to little gain.
+ *
+ * The added quals are partially redundant with the original OR, and therefore
+ * would cause the size of the joinrel to be underestimated when it is finally
+ * formed.  (This would be true of a full transformation to CNF as well; the
+ * fault is not really in the transformation, but in clauselist_selectivity's
+ * inability to recognize redundant conditions.)  We can compensate for this
+ * redundancy by changing the cached selectivity of the original OR clause,
+ * canceling out the (valid) reduction in the estimated sizes of the base
+ * relations so that the estimated joinrel size remains the same.  This is
+ * a MAJOR HACK: it depends on the fact that clause selectivities are cached
+ * and on the fact that the same RestrictInfo node will appear in every
+ * joininfo list that might be used when the joinrel is formed.
+ * And it doesn't work in cases where the size estimation is nonlinear
+ * (i.e., outer and IN joins).  But it beats not doing anything.
+ *
+ * We examine each base relation to see if join clauses associated with it
+ * contain extractable restriction conditions.  If so, add those conditions
+ * to the rel's baserestrictinfo and update the cached selectivities of the
+ * join clauses.  Note that the same join clause will be examined afresh
+ * from the point of view of each baserel that participates in it, so its
+ * cached selectivity may get updated multiple times.
+ */
+void extract_restriction_or_clauses(PlannerInfo *root)
+{
+    Index rti;
+
+    /* Examine each baserel for potential join OR clauses */
+    for (rti = 1; (int)rti < root->simple_rel_array_size; rti++) {
+        RelOptInfo *rel = root->simple_rel_array[rti];
+        ListCell   *lc = NULL;
+
+        /* there may be empty slots corresponding to non-baserel RTEs */
+        if (rel == NULL) {
+            continue;
+        }
+
+        Assert(rel->relid == rti);    /* sanity check on array */
+
+        /* ignore RTEs that are "other rels" */
+        if (rel->reloptkind != RELOPT_BASEREL) {
+            continue;
+        }
+
+        /*
+         * Find potentially interesting OR joinclauses.  We can use any
+         * joinclause that is considered safe to move to this rel by the
+         * parameterized-path machinery, even though what we are going to do
+         * with it is not exactly a parameterized path.
+         *
+         * However, it seems best to ignore clauses that have been marked
+         * redundant (by setting norm_selec > 1).  That likely can't happen
+         * for OR clauses, but let's be safe.
+         */
+        foreach(lc, rel->joininfo) {
+            RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+            if (restriction_is_or_clause(rinfo) && join_clause_is_movable_to(rinfo, rti) && rinfo->norm_selec <= 1) {
+                /* Try to extract a qual for this rel only */
+                Expr *orclause = extract_or_clause(rinfo, rel);
+
+                /*
+                 * If successful, decide whether we want to use the clause,
+                 * and insert it into the rel's restrictinfo list if so.
+                 */
+                if (orclause) {
+                    consider_new_or_clause(root, rel, orclause, rinfo);
+                }
+            }
+        }
+    }
+}
+
+/*
+ * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
+ */
+static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+    /*
+     * We want clauses that mention the rel, and only the rel.  So in
+     * particular pseudoconstant clauses can be rejected quickly.  Then check
+     * the clause's Var membership.
+     */
+    if (rinfo->pseudoconstant) {
+        return false;
+    }
+    if (!bms_equal(rinfo->clause_relids, rel->relids)) {
+        return false;
+    }
+
+    /* We don't want extra evaluations of any volatile functions */
+    if (contain_volatile_functions((Node *) rinfo->clause)) {
+        return false;
+    }
+
+    return true;
+}
+
+/*
+ * Try to extract a restriction clause mentioning only "rel" from the given
+ * join OR-clause.
+ *
+ * We must be able to extract at least one qual for this rel from each of
+ * the arms of the OR, else we can't use it.
+ *
+ * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
+ * if no OR clause could be extracted.
+ */
+static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
+{
+    List *clauselist = NIL;
+    ListCell *lc = NULL;
+
+    /*
+     * Scan each arm of the input OR clause.  Notice we descend into
+     * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
+     * toplevel OR/AND structure.  This is useful because we can use the info
+     * in those nodes to make is_safe_restriction_clause_for()'s checks
+     * cheaper.  We'll strip those nodes from the returned tree, though,
+     * meaning that fresh ones will be built if the clause is accepted as a
+     * restriction clause.  This might seem wasteful --- couldn't we re-use
+     * the existing RestrictInfos?    But that'd require assuming that
+     * selectivity and other cached data is computed exactly the same way for
+     * a restriction clause as for a join clause, which seems undesirable.
+     */
+    Assert(is_orclause(or_rinfo->orclause));
+    foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args) {
+        Node *orarg = (Node *) lfirst(lc);
+        List *subclauses = NIL;
+        Node *subclause = NULL;
+
+        /* OR arguments should be ANDs or sub-RestrictInfos */
+        if (is_andclause(orarg)) {
+            List *andargs = ((BoolExpr *) orarg)->args;
+            ListCell *lc2 = NULL;
+
+            foreach(lc2, andargs) {
+                RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+
+                if (restriction_is_or_clause(rinfo)) {
+                    /*
+                     * Recurse to deal with nested OR.  Note we *must* recurse
+                     * here, this isn't just overly-tense optimization: we
+                     * have to descend far enough to find and strip all
+                     * RestrictInfos in the expression.
+                     */
+                    Expr *suborclause = extract_or_clause(rinfo, rel);
+                    if (suborclause) {
+                        subclauses = lappend(subclauses, suborclause);
+                    }
+                } else if (is_safe_restriction_clause_for(rinfo, rel)) {
+                    subclauses = lappend(subclauses, rinfo->clause);
+                }
+            }
+        } else {
+            RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
+            Assert(!restriction_is_or_clause(rinfo));
+            if (is_safe_restriction_clause_for(rinfo, rel)) {
+                subclauses = lappend(subclauses, rinfo->clause);
+            }
+        }
+
+        /*
+         * If nothing could be extracted from this arm, we can't do anything
+         * with this OR clause.
+         */
+        if (subclauses == NIL) {
+            return NULL;
+        }
+
+        /*
+         * OK, add subclause(s) to the result OR.  If we found more than one,
+         * we need an AND node.  But if we found only one, and it is itself an
+         * OR node, add its subclauses to the result instead; this is needed
+         * to preserve AND/OR flatness (ie, no OR directly underneath OR).
+         */
+        subclause = (Node *) make_ands_explicit(subclauses);
+        if (is_orclause(subclause)) {
+            clauselist = list_concat(clauselist, ((BoolExpr *) subclause)->args);
+        } else {
+            clauselist = lappend(clauselist, subclause);
+        }
+    }
+
+    /*
+     * If we got a restriction clause from every arm, wrap them up in an OR
+     * node.  (In theory the OR node might be unnecessary, if there was only
+     * one arm --- but then the input OR node was also redundant.)
+     */
+    if (clauselist != NIL) {
+        return make_orclause(clauselist);
+    }
+    return NULL;
+}
+
+/*
+ * Consider whether a successfully-extracted restriction OR clause is
+ * actually worth using.  If so, add it to the planner's data structures,
+ * and adjust the original join clause (join_or_rinfo) to compensate.
+ */
+static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, Expr *orclause, RestrictInfo *join_or_rinfo)
+{
+    RestrictInfo *or_rinfo = NULL;
+    Selectivity or_selec, orig_selec;
+
+    /*
+     * Build a RestrictInfo from the new OR clause.  We can assume it's valid
+     * as a base restriction clause.
+     */
+    or_rinfo = make_restrictinfo(orclause, true, false, false, join_or_rinfo->security_level, NULL, NULL, NULL);
+
+    /*
+     * Estimate its selectivity.  (We could have done this earlier, but doing
+     * it on the RestrictInfo representation allows the result to get cached,
+     * saving work later.)
+     */
+    or_selec = clause_selectivity(root, (Node *) or_rinfo, 0, JOIN_INNER, NULL);
+
+    /*
+     * The clause is only worth adding to the query if it rejects a useful
+     * fraction of the base relation's rows; otherwise, it's just going to
+     * cause duplicate computation (since we will still have to check the
+     * original OR clause when the join is formed).  Somewhat arbitrarily, we
+     * set the selectivity threshold at 0.9.
+     */
+    if (or_selec > 0.9) {
+        return;                    /* forget it */
+    }
+
+    /*
+     * OK, add it to the rel's restriction-clause list.
+     */
+    rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
+    rel->baserestrict_min_security = Min(rel->baserestrict_min_security, or_rinfo->security_level);
+
+    /*
+     * Adjust the original join OR clause's cached selectivity to compensate
+     * for the selectivity of the added (but redundant) lower-level qual. This
+     * should result in the join rel getting approximately the same rows
+     * estimate as it would have gotten without all these shenanigans.
+     *
+     * XXX major hack alert: this depends on the assumption that the
+     * selectivity will stay cached.
+     *
+     * XXX another major hack: we adjust only norm_selec, the cached
+     * selectivity for JOIN_INNER semantics, even though the join clause
+     * might've been an outer-join clause.  This is partly because we can't
+     * easily identify the relevant SpecialJoinInfo here, and partly because
+     * the linearity assumption we're making would fail anyway.  (If it is an
+     * outer-join clause, "rel" must be on the nullable side, else we'd not
+     * have gotten here.  So the computation of the join size is going to be
+     * quite nonlinear with respect to the size of "rel", so it's not clear
+     * how we ought to adjust outer_selec even if we could compute its
+     * original value correctly.)
+     */
+    if (or_selec > 0) {
+        SpecialJoinInfo sjinfo;
+
+        /*
+         * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
+         * approx_tuple_count() in costsize.c.)
+         */
+        sjinfo.type = T_SpecialJoinInfo;
+        sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
+                                             rel->relids);
+        sjinfo.min_righthand = rel->relids;
+        sjinfo.syn_lefthand = sjinfo.min_lefthand;
+        sjinfo.syn_righthand = sjinfo.min_righthand;
+        sjinfo.jointype = JOIN_INNER;
+        /* we don't bother trying to make the remaining fields valid */
+        sjinfo.lhs_strict = false;
+        sjinfo.delay_upper_joins = false;
+        sjinfo.join_quals = NIL;
+
+        /* Compute inner-join size */
+        orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, 0, JOIN_INNER, &sjinfo);
+
+        /* And hack cached selectivity so join size remains the same */
+        join_or_rinfo->norm_selec = orig_selec / or_selec;
+        /* ensure result stays in sane range, in particular not "redundant" */
+        if (join_or_rinfo->norm_selec > 1) {
+            join_or_rinfo->norm_selec = 1;
+        }
+        /* as explained above, we don't touch outer_selec */
+    }
+}
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@ -20,6 +20,8 @@
 #include "nodes/nodeFuncs.h"

 #ifndef ENABLE_MULTIPLE_NODES
+#define is_andclause(clause) \
+    ((clause) != NULL && IsA(clause, BoolExpr) && (((const BoolExpr *)(clause))->boolop) == AND_EXPR)
 #define is_orclause(clause) \
    ((clause) != NULL && IsA(clause, BoolExpr) && (((const BoolExpr *)(clause))->boolop) == OR_EXPR)
 #define is_notclause(clause) \
--- a/src/include/optimizer/orclauses.h
+++ b/src/include/optimizer/orclauses.h
@ -0,0 +1,21 @@
+/*-------------------------------------------------------------------------
+ *
+ * orclauses.h
+ *	  prototypes for orclauses.c.
+ *
+ * Portions Copyright (c) 2022, openGauss Contributors
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/optimizer/orclauses.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ORCLAUSES_H
+#define ORCLAUSES_H
+
+#include "nodes/relation.h"
+
+extern void extract_restriction_or_clauses(PlannerInfo *root);
+
+#endif /* ORCLAUSES_H */
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@ -79,13 +79,6 @@ inline bool CheckIndexPathUseGPI(IndexPath* ipath)
    return ipath->indexinfo->isGlobal;
 }

-
-/*
- * orindxpath.c
- *	  additional routines for indexable OR clauses
- */
-extern bool create_or_index_quals(PlannerInfo* root, RelOptInfo* rel);
-
 /*
 * tidpath.h
 *	  routines to generate tid paths
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@ -418,7 +418,8 @@ typedef enum {
    PLPGSQL_STREAM_FETCHALL = 4096, /* fetch all tuple when has stream sql under plpgsql's for-loop */
    PREDPUSH_SAME_LEVEL = 8192, /* predpush same level */
    PARTITION_FDW_ON = 16384, /* support create foreign table on partitioned table */
-    DISABLE_BITMAP_COST_WITH_LOSSY_PAGES = 32768 /* stop computing bitmap path cost with lossy pages */ 
+    DISABLE_BITMAP_COST_WITH_LOSSY_PAGES = 32768, /* stop computing bitmap path cost with lossy pages */
+    EXTRACT_PUSHDOWN_OR_CLAUSE = 65536  /* Extract restriction OR clauses. */
 } sql_beta_param;

 typedef enum {
--- a/src/test/regress/expected/extract_pushdown_or_clause.out
+++ b/src/test/regress/expected/extract_pushdown_or_clause.out
@ -0,0 +1,88 @@
+create table a(id1 int, id2 int);
+create table b(id1 int, id2 int);
+insert into a select n,n from generate_series(-5,5) n;
+insert into b select n,n from generate_series(0,5) n;
+explain(costs off) select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+                                                                                              QUERY PLAN                                                                                               
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         Join Filter: ((a.id1 > b.id1) AND (((sqrt((a.id1)::double precision) < 10::double precision) AND (b.id2 < 10)) OR ((sqrt((a.id1)::double precision) > 5::double precision) AND (b.id2 > 0))))
+         ->  Seq Scan on a
+         ->  Materialize
+               ->  Seq Scan on b
+(6 rows)
+
+select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+ count 
+-------
+    15
+(1 row)
+
+create table ta(id int,id1 int);
+create table tb(id int,id1 int);
+insert into ta values (1, 1);
+insert into ta values (2, 2);
+insert into ta values (3, 3);
+insert into ta values (4, 4);
+insert into tb values (1, 1);
+insert into tb values (2, 2);
+insert into tb values (3, 3);
+insert into tb values (4, 4);
+select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+ id | id1 | id | id1 
+----+-----+----+-----
+  1 |   1 |  2 |   2
+  2 |   2 |  1 |   1
+(2 rows)
+
+explain(costs off) select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (((ta.id = 1) AND (tb.id = 2)) OR ((ta.id = 2) AND (tb.id = 1)))
+   ->  Seq Scan on ta
+   ->  Materialize
+         ->  Seq Scan on tb
+(5 rows)
+
+set sql_beta_feature=extract_pushdown_or_clause;
+explain(costs off) select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+                                                                                              QUERY PLAN                                                                                               
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         Join Filter: ((a.id1 > b.id1) AND (((sqrt((a.id1)::double precision) < 10::double precision) AND (b.id2 < 10)) OR ((sqrt((a.id1)::double precision) > 5::double precision) AND (b.id2 > 0))))
+         ->  Seq Scan on a
+               Filter: ((sqrt((id1)::double precision) < 10::double precision) OR (sqrt((id1)::double precision) > 5::double precision))
+         ->  Materialize
+               ->  Seq Scan on b
+                     Filter: ((id2 < 10) OR (id2 > 0))
+(8 rows)
+
+select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+ERROR:  cannot take square root of a negative number
+select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+ id | id1 | id | id1 
+----+-----+----+-----
+  1 |   1 |  2 |   2
+  2 |   2 |  1 |   1
+(2 rows)
+
+explain(costs off) select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+                                   QUERY PLAN                                    
+---------------------------------------------------------------------------------
+ Nested Loop
+   Join Filter: (((ta.id = 1) AND (tb.id = 2)) OR ((ta.id = 2) AND (tb.id = 1)))
+   ->  Seq Scan on ta
+         Filter: ((id = 1) OR (id = 2))
+   ->  Materialize
+         ->  Seq Scan on tb
+               Filter: ((id = 2) OR (id = 1))
+(7 rows)
+
+drop table ta;
+drop table tb;
+drop table a;
+drop table b;
+set sql_beta_feature=none;
--- a/src/test/regress/parallel_schedule0
+++ b/src/test/regress/parallel_schedule0
@ -14,6 +14,8 @@ test: recovery_2pc_tools
 test: sqlpatch_base
 test: sqlpatch_func

+test: extract_pushdown_or_clause
+
 #wlm
 test: workload_manager

@ -1042,4 +1044,4 @@ test: show_warnings

 # partition expression key
 test: partition_expr_key
-test: alter_foreign_schema
+test: alter_foreign_schema
--- a/src/test/regress/sql/extract_pushdown_or_clause.sql
+++ b/src/test/regress/sql/extract_pushdown_or_clause.sql
@ -0,0 +1,29 @@
+create table a(id1 int, id2 int);
+create table b(id1 int, id2 int);
+insert into a select n,n from generate_series(-5,5) n;
+insert into b select n,n from generate_series(0,5) n;
+explain(costs off) select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+create table ta(id int,id1 int);
+create table tb(id int,id1 int);
+insert into ta values (1, 1);
+insert into ta values (2, 2);
+insert into ta values (3, 3);
+insert into ta values (4, 4);
+insert into tb values (1, 1);
+insert into tb values (2, 2);
+insert into tb values (3, 3);
+insert into tb values (4, 4);
+select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+explain(costs off) select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+set sql_beta_feature=extract_pushdown_or_clause;
+explain(costs off) select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+select count(*) from a inner join  b on a.id1 > b.id1 where (sqrt(a.id1) < 10 and b.id2 < 10) or (sqrt(a.id1) > 5 and b.id2 > 0);
+select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+explain(costs off) select * from ta,tb where (ta.id=1 and tb.id=2) or (ta.id=2 and tb.id=1);
+drop table ta;
+drop table tb;
+drop table a;
+drop table b;
+set sql_beta_feature=none;
+