From 75cafa8672bba6cb8d222d1c05d1a62e18ddf3e5 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Tue, 16 Jan 2024 15:55:43 +0800 Subject: [PATCH] [enhancement](jdbc catalog) Enhance function pushdown of Jdbc Oracle Catalog (#29972) --- docs/en/docs/lakehouse/multi-catalog/jdbc.md | 6 ++++- .../docs/lakehouse/multi-catalog/jdbc.md | 6 ++++- .../jdbc/JdbcFunctionPushDownRule.java | 25 +++++++++++++++++++ .../planner/external/jdbc/JdbcScanNode.java | 3 ++- .../jdbc/test_oracle_jdbc_catalog.groovy | 6 +++++ 5 files changed, 43 insertions(+), 3 deletions(-) diff --git a/docs/en/docs/lakehouse/multi-catalog/jdbc.md b/docs/en/docs/lakehouse/multi-catalog/jdbc.md index 11a02b049a..662fcc8111 100644 --- a/docs/en/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/en/docs/lakehouse/multi-catalog/jdbc.md @@ -114,7 +114,7 @@ In some cases, the keywords in the database might be used as the field names. Fo 1. When executing a query like `where dt = '2022-01-01'`, Doris can push down these filtering conditions to the external data source, thereby directly excluding data that does not meet the conditions at the data source level, reducing the number of unqualified Necessary data acquisition and transfer. This greatly improves query performance while also reducing the load on external data sources. -2. When `enable_func_pushdown` is set to true, the function conditions after where will also be pushed down to the external data source. Currently, only MySQL and ClickHouse are supported. If you encounter a function that is not supported by MySQL or ClickHouse, you can set this parameter to false. , currently Doris will automatically identify some functions not supported by MySQL and functions supported by CLickHouse for push-down condition filtering, which can be viewed through explain sql. +2. When `enable_func_pushdown` is set to true, the function conditions after where will also be pushed down to the external data source. Currently, only MySQL, ClickHouse, and Oracle are supported. If you encounter functions that are not supported by MySQL, ClickHouse, and Oracle, you can use this The parameter is set to false. At present, Doris will automatically identify some functions that are not supported by MySQL and functions supported by CLickHouse and Oracle for push-down condition filtering. You can view them through explain sql. Functions that are currently not pushed down include: @@ -130,6 +130,10 @@ Functions that are currently pushed down include: | FROM_UNIXTIME | | UNIX_TIMESTAMP | +| Oracle | +|:------:| +| NVL | + ### Line Limit If there is a limit keyword in the query, Doris will translate it into semantics suitable for different data sources. diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md index e57acbe688..e3eb90d7f4 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md @@ -114,7 +114,7 @@ select * from mysql_catalog.mysql_database.mysql_table where k1 > 1000 and k3 =' 1. 当执行类似于 `where dt = '2022-01-01'` 这样的查询时,Doris 能够将这些过滤条件下推到外部数据源,从而直接在数据源层面排除不符合条件的数据,减少了不必要的数据获取和传输。这大大提高了查询性能,同时也降低了对外部数据源的负载。 -2. 当 `enable_func_pushdown` 设置为true,会将 where 之后的函数条件也下推到外部数据源,目前仅支持 MySQL 以及 ClickHouse,如遇到 MySQL 或 ClickHouse 不支持的函数,可以将此参数设置为 false,目前 Doris 会自动识别部分 MySQL 不支持的函数以及 CLickHouse 支持的函数进行下推条件过滤,可通过 explain sql 查看。 +2. 当 `enable_func_pushdown` 设置为true,会将 where 之后的函数条件也下推到外部数据源,目前仅支持 MySQL、ClickHouse、Oracle,如遇到 MySQL、ClickHouse、Oracle 不支持的函数,可以将此参数设置为 false,目前 Doris 会自动识别部分 MySQL 不支持的函数以及 CLickHouse、Oracle 支持的函数进行下推条件过滤,可通过 explain sql 查看。 目前不会下推的函数有: @@ -130,6 +130,10 @@ select * from mysql_catalog.mysql_database.mysql_table where k1 > 1000 and k3 =' | FROM_UNIXTIME | | UNIX_TIMESTAMP | +| Oracle | +|:------:| +| NVL | + ### 行数限制 如果在查询中带有 limit 关键字,Doris 会将其转译成适合不同数据源的语义。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcFunctionPushDownRule.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcFunctionPushDownRule.java index 4fb8788bb0..d328952593 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcFunctionPushDownRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcFunctionPushDownRule.java @@ -49,6 +49,13 @@ public class JdbcFunctionPushDownRule { CLICKHOUSE_SUPPORTED_FUNCTIONS.add("unix_timestamp"); } + private static final TreeSet ORACLE_SUPPORTED_FUNCTIONS = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); + + static { + ORACLE_SUPPORTED_FUNCTIONS.add("nvl"); + ORACLE_SUPPORTED_FUNCTIONS.add("ifnull"); + } + private static boolean isMySQLFunctionUnsupported(String functionName) { return MYSQL_UNSUPPORTED_FUNCTIONS.contains(functionName.toLowerCase()); } @@ -57,6 +64,9 @@ public class JdbcFunctionPushDownRule { return !CLICKHOUSE_SUPPORTED_FUNCTIONS.contains(functionName.toLowerCase()); } + private static boolean isOracleFunctionUnsupported(String functionName) { + return !ORACLE_SUPPORTED_FUNCTIONS.contains(functionName.toLowerCase()); + } private static final Map REPLACE_MYSQL_FUNCTIONS = Maps.newHashMap(); @@ -80,6 +90,16 @@ public class JdbcFunctionPushDownRule { return REPLACE_CLICKHOUSE_FUNCTIONS.containsKey(functionName.toLowerCase()); } + private static final Map REPLACE_ORACLE_FUNCTIONS = Maps.newHashMap(); + + static { + REPLACE_ORACLE_FUNCTIONS.put("ifnull", "nvl"); + } + + private static boolean isReplaceOracleFunctions(String functionName) { + return REPLACE_ORACLE_FUNCTIONS.containsKey(functionName.toLowerCase()); + } + public static Expr processFunctions(TOdbcTableType tableType, Expr expr, List errors) { if (tableType == null || expr == null) { return expr; @@ -94,6 +114,9 @@ public class JdbcFunctionPushDownRule { } else if (TOdbcTableType.CLICKHOUSE.equals(tableType)) { replaceFunction = JdbcFunctionPushDownRule::isReplaceClickHouseFunctions; checkFunction = JdbcFunctionPushDownRule::isClickHouseFunctionUnsupported; + } else if (TOdbcTableType.ORACLE.equals(tableType)) { + replaceFunction = JdbcFunctionPushDownRule::isReplaceOracleFunctions; + checkFunction = JdbcFunctionPushDownRule::isOracleFunctionUnsupported; } else { return expr; } @@ -138,6 +161,8 @@ public class JdbcFunctionPushDownRule { newFunc = REPLACE_MYSQL_FUNCTIONS.get(func.toLowerCase()); } else if (TOdbcTableType.CLICKHOUSE.equals(tableType)) { newFunc = REPLACE_CLICKHOUSE_FUNCTIONS.get(func); + } else if (TOdbcTableType.ORACLE.equals(tableType)) { + newFunc = REPLACE_ORACLE_FUNCTIONS.get(func); } else { newFunc = null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcScanNode.java index 76dbd40aba..774d4284bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/jdbc/JdbcScanNode.java @@ -311,7 +311,8 @@ public class JdbcScanNode extends ExternalScanNode { private static boolean shouldPushDownConjunct(TOdbcTableType tableType, Expr expr) { if (containsFunctionCallExpr(expr)) { - if (tableType.equals(TOdbcTableType.MYSQL) || tableType.equals(TOdbcTableType.CLICKHOUSE)) { + if (tableType.equals(TOdbcTableType.MYSQL) || tableType.equals(TOdbcTableType.CLICKHOUSE) + || tableType.equals(TOdbcTableType.ORACLE)) { return Config.enable_func_pushdown; } else { return false; diff --git a/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy index 136b785bb5..75ce7175df 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy @@ -134,6 +134,12 @@ suite("test_oracle_jdbc_catalog", "p0,external,oracle,external_docker,external_d order_qt_date6 """ select * from TEST_DATE where (T1 < '2022-01-22 00:00:00' or T1 > '2022-01-20 00:00:00') and (T1 < '2022-01-23 00:00:00' or T1 > '2022-01-19 00:00:00'); """ order_qt_date7 """select * from TEST_TIMESTAMP where T2 < str_to_date('2020-12-21 12:34:56', '%Y-%m-%d %H:%i:%s');""" + // test nvl + explain { + sql("SELECT * FROM STUDENT WHERE nvl(score, 0) < 95;") + contains """SELECT "ID", "NAME", "AGE", "SCORE" FROM "DORIS_TEST"."STUDENT" WHERE (nvl("SCORE", 0.0) < 95.0)""" + } + // for old planner order_qt_filter4_old_plan """ select /*+ SET_VAR(enable_nereids_planner=false) */ * from STUDENT where NAME NOT like '%bob%' order by ID; """ order_qt_filter5_old_plan """ select /*+ SET_VAR(enable_nereids_planner=false) */ * from STUDENT where NAME NOT like '%bob%' or NAME NOT LIKE '%jerry%' order by ID; """