From 3a9aa03aab817cdec7bc4878871e7ed92e31fe7f Mon Sep 17 00:00:00 2001 From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com> Date: Sun, 26 Feb 2023 09:05:41 +0800 Subject: [PATCH] [BugFix](oracle-catalog) Modify the doris data type mapping of oracle `NUMBER(p,s)` type (#17051) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data type `NUMBER(p,s)` of oracle has some different of doris decimal type in semantics. For Oracle Number(p,s) type: 1. if s<0 , it means this is an Interger. This `NUMBER(p,s)` has (p+|s| ) significant digit, and rounding will be performed at s position. eg: if we insert 1234567 into `NUMBER(5,-2)` type, then the oracle will store 1234500. In this case, Doris will use int type (`TINYINT/SMALLINT/INT/.../LARGEINT`). 2. if s>=0 && s

=0 && s>p, it means this is a decimal(like 0.xxxxx). p represents how many digits can be left to the left after the decimal point, the figure after the decimal point s will be rounded. eg: we can not insert 0.0123456 into `NUMBER(5,7)` type, because there must be two zeros on the right side of the decimal point, we can insert 0.0012345 into `NUMBER(5,7)` type. In this case, Doris will use `DECIMAL(s,s)` 4. if we don't specify p and s for `NUMBER(p,s)` like `NUMBER`, the p and s of `NUMBER` are uncertain. In this case, doris can not determine p and s, so doris can not determine data type. --- .../oracle/init/03-create-table.sql | 22 +++++ .../docker-compose/oracle/init/04-insert.sql | 21 +++++ docs/en/docs/lakehouse/multi-catalog/jdbc.md | 8 +- .../docs/lakehouse/multi-catalog/jdbc.md | 7 +- .../doris/external/jdbc/JdbcClient.java | 84 ++++++++++--------- .../test_oracle_jdbc_catalog.out | 25 ++++++ .../test_oracle_jdbc_catalog.groovy | 4 + 7 files changed, 125 insertions(+), 46 deletions(-) diff --git a/docker/thirdparties/docker-compose/oracle/init/03-create-table.sql b/docker/thirdparties/docker-compose/oracle/init/03-create-table.sql index d2d8d6af7e..03aa1d5114 100644 --- a/docker/thirdparties/docker-compose/oracle/init/03-create-table.sql +++ b/docker/thirdparties/docker-compose/oracle/init/03-create-table.sql @@ -84,3 +84,25 @@ id varchar2(128), name varchar2(128), age number(5) ); + +create table doris_test.test_number( + id number(11) not null primary key, + num1 NUMBER(5,2), + num2 NUMBER(5, -2), + num4 NUMBER(5,7) +); + +create table doris_test.test_number2( + id number(11) not null primary key, + num1 NUMBER(38, -5) +); + +create table doris_test.test_number3 ( + id number(11) not null primary key, + num1 NUMBER(38, -84) +); + +create table doris_test.test_number4 ( + id number(11) not null primary key, + num1 NUMBER(5,-7) +); diff --git a/docker/thirdparties/docker-compose/oracle/init/04-insert.sql b/docker/thirdparties/docker-compose/oracle/init/04-insert.sql index 0c474e8f20..d4638d0348 100644 --- a/docker/thirdparties/docker-compose/oracle/init/04-insert.sql +++ b/docker/thirdparties/docker-compose/oracle/init/04-insert.sql @@ -53,4 +53,25 @@ insert into doris_test.test_timestamp (id, t4) values (5, to_timestamp_tz('20191 insert into doris_test.test_timestamp (id, t5) values (6, interval '11' year); insert into doris_test.test_timestamp (id, t5) values (7, interval '223-9' year(3) to month); insert into doris_test.test_timestamp (id, t6) values (8, interval '12 10:23:01.1234568' day to second); + +insert into doris_test.test_number values (1, 123.45, 12345, 0.0012345); +insert into doris_test.test_number values (2, 123.45, 12345, 0.0099999); +insert into doris_test.test_number values (3, 123.456, 123456.12, 0.00123456); +insert into doris_test.test_number values (4, 12.3456, 1234567, 0.001234567); +insert into doris_test.test_number values (5, 123.56, 9999899, 0.009999899); + +insert into doris_test.test_number2 values (1, 12345678901234567890123456789012345678); +insert into doris_test.test_number2 values (2, 99999999999999999999999999999999999999); +insert into doris_test.test_number2 values (3, 999999999999999999999999999999999999999); +insert into doris_test.test_number2 values (4, 12345678); +insert into doris_test.test_number2 values (5, 123.123); +insert into doris_test.test_number2 values (6, 0.999999999999); + +insert into doris_test.test_number3 values (1, 9999); +insert into doris_test.test_number3 values (2, 12345678901234567890123456789012345678); +insert into doris_test.test_number3 values (3, 99999999999999999999999999999999999999); +insert into doris_test.test_number3 values (4, 0.99999); + +insert into doris_test.test_number4 values (1, 12345678); +insert into doris_test.test_number4 values (2, 123456789012); commit; diff --git a/docs/en/docs/lakehouse/multi-catalog/jdbc.md b/docs/en/docs/lakehouse/multi-catalog/jdbc.md index 61cf8dd506..55d90b1a76 100644 --- a/docs/en/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/en/docs/lakehouse/multi-catalog/jdbc.md @@ -276,9 +276,11 @@ The transaction mechanism ensures the atomicity of data writing to JDBC External | ORACLE Type | Doris Type | Comment | | ----------------------------- | ----------- | ------------------------------------------------------------ | -| number(p) / number(p,0) | | Doris will determine the type to map to based on the value of p: `p < 3` -> `TINYINT`; `p < 5` -> `SMALLINT`; `p < 10` -> `INT`; `p < 19` -> `BIGINT`; `p > 19` -> `LARGEINT` | -| number(p,s) | DECIMAL | | -| decimal | DECIMAL | | +| number(p) / number(p,0) | TINYINT/SMALLINT/INT/BIGINT/LARGEINT | Doris will determine the type to map to based on the value of p: `p < 3` -> `TINYINT`; `p < 5` -> `SMALLINT`; `p < 10` -> `INT`; `p < 19` -> `BIGINT`; `p > 19` -> `LARGEINT` | +| number(p,s), [ if(s>0 && p>s) ] | DECIMAL(p,s) | | +| number(p,s), [ if(s>0 && p < s) ] | DECIMAL(s,s) | | +| number(p,s), [ if(s<0) ] | TINYINT/SMALLINT/INT/BIGINT/LARGEINT | if s<0, Doris will set `p` to `p+|s|`, and perform the same mapping as `number(p) / number(p,0)`. | +| number | | Doris does not support Oracle `NUMBER` type that does not specified p and s | | float/real | DOUBLE | | | DATE | DATETIME | | | TIMESTAMP | DATETIME | | diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md index 9b6b501426..2a894727ca 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md @@ -273,8 +273,11 @@ set enable_odbc_transcation = true; | ORACLE Type | Doris Type | Comment | |---|---|---| -| number(p) / number(p,0) | | Doris会根据p的大小来选择对应的类型:`p < 3` -> `TINYINT`; `p < 5` -> `SMALLINT`; `p < 10` -> `INT`; `p < 19` -> `BIGINT`; `p > 19` -> `LARGEINT` | -| number(p,s) | DECIMAL | | +| number(p) / number(p,0) | TINYINT/SMALLINT/INT/BIGINT/LARGEINT | Doris会根据p的大小来选择对应的类型:`p < 3` -> `TINYINT`; `p < 5` -> `SMALLINT`; `p < 10` -> `INT`; `p < 19` -> `BIGINT`; `p > 19` -> `LARGEINT` | +| number(p,s), [ if(s>0 && p>s) ] | DECIMAL(p,s) | | +| number(p,s), [ if(s>0 && p < s) ] | DECIMAL(s,s) | | +| number(p,s), [ if(s<0) ] | TINYINT/SMALLINT/INT/BIGINT/LARGEINT | s<0的情况下, Doris会将p设置为 p+\|s\|, 并进行和number(p) / number(p,0)一样的映射 | +| number | | Doris目前不支持未指定p和s的oracle类型 | | decimal | DECIMAL | | | float/real | DOUBLE | | | DATE | DATETIME | | diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java index 29c10c1151..c46235d3c3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java @@ -370,14 +370,7 @@ public class JdbcClient { case "DECIMAL": int precision = fieldSchema.getColumnSize() + 1; int scale = fieldSchema.getDecimalDigits(); - if (precision <= ScalarType.MAX_DECIMAL128_PRECISION) { - if (!Config.enable_decimal_conversion && precision > ScalarType.MAX_DECIMALV2_PRECISION) { - return ScalarType.createStringType(); - } - return ScalarType.createDecimalType(precision, scale); - } else { - return ScalarType.createStringType(); - } + return createDecimalOrStringType(precision, scale); default: throw new JdbcClientException("Unknown UNSIGNED type of mysql, type: [" + mysqlType + "]"); } @@ -412,14 +405,7 @@ public class JdbcClient { case "DECIMALV3": // for jdbc catalog connecting Doris database int precision = fieldSchema.getColumnSize(); int scale = fieldSchema.getDecimalDigits(); - if (precision <= ScalarType.MAX_DECIMAL128_PRECISION) { - if (!Config.enable_decimal_conversion && precision > ScalarType.MAX_DECIMALV2_PRECISION) { - return ScalarType.createStringType(); - } - return ScalarType.createDecimalType(precision, scale); - } else { - return ScalarType.createStringType(); - } + return createDecimalOrStringType(precision, scale); case "CHAR": ScalarType charType = ScalarType.createType(PrimitiveType.CHAR); charType.setLength(fieldSchema.columnSize); @@ -466,14 +452,7 @@ public class JdbcClient { case "numeric": { int precision = fieldSchema.getColumnSize(); int scale = fieldSchema.getDecimalDigits(); - if (precision <= ScalarType.MAX_DECIMAL128_PRECISION) { - if (!Config.enable_decimal_conversion && precision > ScalarType.MAX_DECIMALV2_PRECISION) { - return ScalarType.createStringType(); - } - return ScalarType.createDecimalType(precision, scale); - } else { - return ScalarType.createStringType(); - } + return createDecimalOrStringType(precision, scale); } case "float4": return Type.FLOAT; @@ -529,14 +508,7 @@ public class JdbcClient { String[] accuracy = ckType.substring(8, ckType.length() - 1).split(", "); int precision = Integer.parseInt(accuracy[0]); int scale = Integer.parseInt(accuracy[1]); - if (precision <= ScalarType.MAX_DECIMAL128_PRECISION) { - if (!Config.enable_decimal_conversion && precision > ScalarType.MAX_DECIMALV2_PRECISION) { - return ScalarType.createStringType(); - } - return ScalarType.createDecimalType(precision, scale); - } else { - return ScalarType.createStringType(); - } + return createDecimalOrStringType(precision, scale); } else if ("String".contains(ckType) || ckType.startsWith("Enum") || ckType.startsWith("IPv") || "UUID".contains(ckType) || ckType.startsWith("FixedString")) { @@ -589,10 +561,29 @@ public class JdbcClient { return ScalarType.createDatetimeV2Type(0); } switch (oracleType) { + /** + * The data type NUMBER(p,s) of oracle has some different of doris decimal type in semantics. + * For Oracle Number(p,s) type: + * 1. if s<0 , it means this is an Interger. + * This NUMBER(p,s) has (p+|s| ) significant digit, and rounding will be performed at s position. + * eg: if we insert 1234567 into NUMBER(5,-2) type, then the oracle will store 1234500. + * In this case, Doris will use INT type (TINYINT/SMALLINT/INT/.../LARGEINT). + * 2. if s>=0 && s

=0 && s>p, it means this is a decimal(like 0.xxxxx). + * p represents how many digits can be left to the left after the decimal point, + * the figure after the decimal point s will be rounded. + * eg: we can not insert 0.0123456 into NUMBER(5,7) type, + * because there must be two zeros on the right side of the decimal point, + * we can insert 0.0012345 into NUMBER(5,7) type. + * In this case, Doris will use DECIMAL(s,s) + * 4. if we don't specify p and s for NUMBER(p,s), just NUMBER, the p and s of NUMBER are uncertain. + * In this case, doris can not determine p and s, so doris can not determine data type. + */ case "NUMBER": int precision = fieldSchema.getColumnSize(); int scale = fieldSchema.getDecimalDigits(); - if (scale == 0) { + if (scale <= 0) { + precision -= scale; if (precision < 3) { return Type.TINYINT; } else if (precision < 5) { @@ -602,18 +593,17 @@ public class JdbcClient { } else if (precision < 19) { return Type.BIGINT; } else if (precision < 39) { + // LARGEINT supports up to 38 numbers. return Type.LARGEINT; - } - return ScalarType.createStringType(); - } - if (precision <= ScalarType.MAX_DECIMAL128_PRECISION) { - if (!Config.enable_decimal_conversion && precision > ScalarType.MAX_DECIMALV2_PRECISION) { + } else { return ScalarType.createStringType(); } - return ScalarType.createDecimalType(precision, scale); - } else { - return ScalarType.createStringType(); } + // scale > 0 + if (precision < scale) { + precision = scale; + } + return createDecimalOrStringType(precision, scale); case "FLOAT": return Type.DOUBLE; case "DATE": @@ -684,6 +674,18 @@ public class JdbcClient { } } + private Type createDecimalOrStringType(int precision, int scale) { + if (precision <= ScalarType.MAX_DECIMAL128_PRECISION) { + if (!Config.enable_decimal_conversion && (precision > ScalarType.MAX_DECIMALV2_PRECISION + || scale > ScalarType.MAX_DECIMALV2_SCALE)) { + return ScalarType.createStringType(); + } + return ScalarType.createDecimalType(precision, scale); + } + return ScalarType.createStringType(); + } + + public List getColumnsFromJdbc(String dbName, String tableName) { List jdbcTableSchema = getJdbcColumnsInfo(dbName, tableName); List dorisTableSchema = Lists.newArrayListWithCapacity(jdbcTableSchema.size()); diff --git a/regression-test/data/jdbc_catalog_p0/test_oracle_jdbc_catalog.out b/regression-test/data/jdbc_catalog_p0/test_oracle_jdbc_catalog.out index cc5c0f50f2..67595ff0a9 100644 --- a/regression-test/data/jdbc_catalog_p0/test_oracle_jdbc_catalog.out +++ b/regression-test/data/jdbc_catalog_p0/test_oracle_jdbc_catalog.out @@ -41,6 +41,31 @@ 7 \N \N \N \N 223-9 \N 8 \N \N \N \N \N 12 10:23:1.123457 +-- !test7 -- +1 123.45 12300 0.0012345 +2 123.45 12300 0.0099999 +3 123.46 123500 0.0012346 +4 12.35 1234600 0.0012346 +5 123.56 9999900 0.0099999 + +-- !test8 -- +1 12345678901234567890123456789012300000 +2 100000000000000000000000000000000000000 +3 1000000000000000000000000000000000000000 +4 12300000 +5 0 +6 0 + +-- !test9 -- +1 0 +2 0 +3 0 +4 0 + +-- !test10 -- +1 10000000 +2 123460000000 + -- !test_insert1 -- doris1 18 diff --git a/regression-test/suites/jdbc_catalog_p0/test_oracle_jdbc_catalog.groovy b/regression-test/suites/jdbc_catalog_p0/test_oracle_jdbc_catalog.groovy index 7ddcce6c10..078e97df4d 100644 --- a/regression-test/suites/jdbc_catalog_p0/test_oracle_jdbc_catalog.groovy +++ b/regression-test/suites/jdbc_catalog_p0/test_oracle_jdbc_catalog.groovy @@ -64,6 +64,10 @@ suite("test_oracle_jdbc_catalog", "p0") { order_qt_test3 """ select * from TEST_INT order by ID; """ order_qt_test5 """ select * from TEST_DATE order by ID; """ order_qt_test6 """ select * from TEST_TIMESTAMP order by ID; """ + order_qt_test7 """ select * from TEST_NUMBER order by ID; """ + order_qt_test8 """ select * from TEST_NUMBER2 order by ID; """ + order_qt_test9 """ select * from TEST_NUMBER3 order by ID; """ + order_qt_test10 """ select * from TEST_NUMBER4 order by ID; """ // The result of TEST_RAW will change // So instead of qt, we're using sql here.