diff --git a/.github/workflows/build-extension.yml b/.github/workflows/build-extension.yml index 261d6c4519..2270746989 100644 --- a/.github/workflows/build-extension.yml +++ b/.github/workflows/build-extension.yml @@ -55,27 +55,6 @@ jobs: run: | cd fs_brokers/apache_hdfs_broker/ && /bin/bash build.sh - - name: Build spark connector v2 - run: | - thrift --version - cd extension/spark-doris-connector/ && /bin/bash build.sh 2.3.4 2.11 - - - name: Build spark connector v3 - run: | - cd extension/spark-doris-connector/ && /bin/bash build.sh 3.1.2 2.12 - - - name: Build flink connector 1.11 - run: | - cd extension/flink-doris-connector/ && /bin/bash build.sh 1.11.6 2.12 - - - name: Build flink connector 1.12 - run: | - cd extension/flink-doris-connector/ && /bin/bash build.sh 1.12.7 2.12 - - - name: Build flink connector 1.13 - run: | - cd extension/flink-doris-connector/ && /bin/bash build.sh 1.13.5 2.12 - - name: Build docs run: | cd docs && npm install && npm run build diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c594d0a824..669c2d2f91 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Your suggestions, comments and comments on Doris can be made directly through Gi There are many ways to participate in and contribute to Doris projects: code implementation, test writing, process tool improvement, document improvement, and so on. Any contribution will be welcomed and you will be added to the list of contributors. Further, with sufficient contributions, you will have the opportunity to become a Commiter of Apache with Apache mailbox and be included in the list of [Apache Commiters] (http://people.apache.org/committer-index.html). -Any questions, you can contact us to get timely answers, including Wechat, Gitter (GitHub instant messaging tool), e-mail and so on. +Any questions, you can contact us to get timely answers, including dev mail list or Slack. ## Initial contact @@ -33,8 +33,7 @@ For the first time in Doris community, you can: * Follow [Doris Github](https://github.com/apache/incubator-doris) * Subscribe to our [mailing list] (./subscribe-mail-list.md); -* Join Doris Wechat Group (add micro-signal: morningman-cmy, note: join Doris Group) and ask questions at any time. -* Enter Doris's [Gitter] (./gitter.md) chat room; +* Join Doris [Slack](https://join.slack.com/t/apachedoriscommunity/shared_invite/zt-11jb8gesh-7IukzSrdea6mqoG0HB4gZg) Learn the development trends of Doris project in time and give your opinions on the topics you are concerned about. diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md index 72f0839df6..7de688f06e 100644 --- a/CONTRIBUTING_CN.md +++ b/CONTRIBUTING_CN.md @@ -25,7 +25,7 @@ under the License. 参与 Doris 项目并为其作出贡献的方法有很多:代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎,并将您加入贡献者列表,进一步,有了足够的贡献后,您还可以有机会成为 Apache 的 Commiter,拥有 Apache 邮箱,并被收录到 [Apache Commiter 列表中](http://people.apache.org/committer-index.html)。 -任何问题,您都可以联系我们得到及时解答,联系方式包括微信、Gitter(GitHub提供的即时聊天工具)、邮件等等。 +任何问题,您都可以联系我们得到及时解答,联系方式包括 dev 邮件组,Slack 等。 ## 初次接触 @@ -33,8 +33,7 @@ under the License. * 关注 Doris [Github 代码库](https://github.com/apache/incubator-doris) * 订阅我们的 [邮件列表](./subscribe-mail-list.md); -* 加入 Doris 微信群(加微信号:morningman-cmy, 备注:加入Doris群) 随时提问; -* 进入 Doris 的 [Gitter](./gitter.md) 聊天室; +* 加入 Doris 的 [Slack](https://join.slack.com/t/apachedoriscommunity/shared_invite/zt-11jb8gesh-7IukzSrdea6mqoG0HB4gZg) 通过以上方式及时了解 Doris 项目的开发动态并为您关注的话题发表意见。 diff --git a/docs/en/extending-doris/flink-doris-connector.md b/docs/en/extending-doris/flink-doris-connector.md index 1f66e2c6c8..3538f52ccc 100644 --- a/docs/en/extending-doris/flink-doris-connector.md +++ b/docs/en/extending-doris/flink-doris-connector.md @@ -28,6 +28,8 @@ under the License. - The Flink Doris Connector can support operations (read, insert, modify, delete) data stored in Doris through Flink. +Github: https://github.com/apache/incubator-doris-connectors + * `Doris` table can be mapped to `DataStream` or `Table`. >**Note:** @@ -377,4 +379,4 @@ WITH ( ); insert into doris_sink select id,name from cdc_mysql_source; -``` \ No newline at end of file +``` diff --git a/docs/en/extending-doris/spark-doris-connector.md b/docs/en/extending-doris/spark-doris-connector.md index 60a8f4259b..c7d71db781 100644 --- a/docs/en/extending-doris/spark-doris-connector.md +++ b/docs/en/extending-doris/spark-doris-connector.md @@ -28,6 +28,8 @@ under the License. Spark Doris Connector can support reading data stored in Doris and writing data to Doris through Spark. +Github: https://github.com/apache/incubator-doris-connectors + - Support reading data from `Doris`. - Support `Spark DataFrame` batch/stream writing data to `Doris` - You can map the `Doris` table to` DataFrame` or `RDD`, it is recommended to use` DataFrame`. @@ -244,4 +246,4 @@ kafkaSource.selectExpr("CAST(key AS STRING)", "CAST(value as STRING)") | TIME | DataTypes.DoubleType | | HLL | Unsupported datatype | -* Note: In Connector, `DATE` and` DATETIME` are mapped to `String`. Due to the processing logic of the Doris underlying storage engine, when the time type is used directly, the time range covered cannot meet the demand. So use `String` type to directly return the corresponding time readable text. \ No newline at end of file +* Note: In Connector, `DATE` and` DATETIME` are mapped to `String`. Due to the processing logic of the Doris underlying storage engine, when the time type is used directly, the time range covered cannot meet the demand. So use `String` type to directly return the corresponding time readable text. diff --git a/docs/zh-CN/extending-doris/flink-doris-connector.md b/docs/zh-CN/extending-doris/flink-doris-connector.md index e0718e2d69..756c99e730 100644 --- a/docs/zh-CN/extending-doris/flink-doris-connector.md +++ b/docs/zh-CN/extending-doris/flink-doris-connector.md @@ -30,6 +30,8 @@ under the License. Flink Doris Connector 可以支持通过 Flink 操作(读取、插入、修改、删除) Doris 中存储的数据。 +代码库地址:https://github.com/apache/incubator-doris-connectors + * 可以将`Doris`表映射为`DataStream`或者`Table`。 >**注意:** @@ -381,4 +383,4 @@ WITH ( ); insert into doris_sink select id,name from cdc_mysql_source; -``` \ No newline at end of file +``` diff --git a/docs/zh-CN/extending-doris/spark-doris-connector.md b/docs/zh-CN/extending-doris/spark-doris-connector.md index d6a422b3b3..81e7f9196b 100644 --- a/docs/zh-CN/extending-doris/spark-doris-connector.md +++ b/docs/zh-CN/extending-doris/spark-doris-connector.md @@ -28,6 +28,8 @@ under the License. Spark Doris Connector 可以支持通过 Spark 读取 Doris 中存储的数据,也支持通过Spark写入数据到Doris。 +代码库地址:https://github.com/apache/incubator-doris-connectors + - 支持从`Doris`中读取数据 - 支持`Spark DataFrame`批量/流式 写入`Doris` - 可以将`Doris`表映射为`DataFrame`或者`RDD`,推荐使用`DataFrame`。 diff --git a/extension/flink-doris-connector/build.sh b/extension/flink-doris-connector/build.sh deleted file mode 100644 index 83d26d21d7..0000000000 --- a/extension/flink-doris-connector/build.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to compile Flink-Doris-Connector -# Usage: -# sh build.sh -# -############################################################## - -set -eo pipefail - -usage() { - echo " - Usage: - $0 flink_version scala_version - e.g.: - $0 1.11.6 2.12 - $0 1.12.7 2.12 - $0 1.13.5 2.12 - " - exit 1 -} - -if [ $# -ne 2 ]; then - usage -fi - -ROOT=$(dirname "$0") -ROOT=$( - cd "$ROOT" - pwd -) - -export DORIS_HOME=${ROOT}/../../ - -. "${DORIS_HOME}"/env.sh - -# include custom environment variables -if [[ -f ${DORIS_HOME}/custom_env.sh ]]; then - . "${DORIS_HOME}"/custom_env.sh -fi - -# check maven -MVN_CMD=mvn -if [[ -n ${CUSTOM_MVN} ]]; then - MVN_CMD=${CUSTOM_MVN} -fi - -if ! ${MVN_CMD} --version; then - echo "Error: mvn is not found" - exit 1 -fi -export MVN_CMD -rm -rf output/ -${MVN_CMD} clean package -Dscala.version=$2 -Dflink.version=$1 - -mkdir -p output/ -cp target/doris-flink-*.jar ./output/ - -echo "*****************************************" -echo "Successfully build Flink-Doris-Connector" -echo "*****************************************" - -exit 0 diff --git a/extension/flink-doris-connector/pom.xml b/extension/flink-doris-connector/pom.xml deleted file mode 100644 index 10b750a830..0000000000 --- a/extension/flink-doris-connector/pom.xml +++ /dev/null @@ -1,432 +0,0 @@ - - - - 4.0.0 - - org.apache - apache - 23 - - org.apache.doris - doris-flink-connector - ${flink.version}-${scala.version}-1.0.0-SNAPSHOT - Doris Flink Connector - https://doris.apache.org/ - - - Apache 2.0 License - https://www.apache.org/licenses/LICENSE-2.0.html - repo - - - - scm:git:https://git@github.com/apache/incubator-doris.git - scm:git:https://git@github.com/apache/incubator-doris.git - scm:git:https://git@github.com/apache/incubator-doris.git - HEAD - - - GitHub - https://github.com/apache/incubator-doris/issues - - - - Dev Mailing List - dev@doris.apache.org - dev-subscribe@doris.apache.org - dev-unsubscribe@doris.apache.org - - - Commits Mailing List - commits@doris.apache.org - commits-subscribe@doris.apache.org - commits-unsubscribe@doris.apache.org - - - - ${env.scala.version} - ${env.flink.version} - 0.13.0 - 5.0.0 - 3.8.1 - 3.3.0 - 3.2.1 - UTF-8 - ${env.DORIS_THIRDPARTY} - github - - - - thirdparty - - - env.DORIS_THIRDPARTY - - - - ${env.DORIS_THIRDPARTY} - - - - - custom-env - - - env.CUSTOM_MAVEN_REPO - - - - - custom-nexus - ${env.CUSTOM_MAVEN_REPO} - - - - - custom-nexus - ${env.CUSTOM_MAVEN_REPO} - - - - - flink.version - - 1.11.6 - - - true - - - - scala.version - - 2.12 - - - true - - - - - general-env - - - !env.CUSTOM_MAVEN_REPO - - - - - central - central maven repo https - https://repo.maven.apache.org/maven2 - - - - - - - org.apache.flink - flink-java - ${flink.version} - provided - - - org.apache.flink - flink-streaming-java_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-clients_${scala.version} - ${flink.version} - provided - - - - org.apache.flink - flink-table-common - ${flink.version} - provided - - - org.apache.flink - flink-table-api-java-bridge_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-table-planner-blink_${scala.version} - ${flink.version} - provided - - - org.apache.thrift - libthrift - ${libthrift.version} - - - httpclient - org.apache.httpcomponents - - - httpcore - org.apache.httpcomponents - - - - - org.apache.httpcomponents - httpclient - 4.5.13 - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - org.apache.arrow - arrow-memory-netty - ${arrow.version} - runtime - - - org.slf4j - slf4j-api - 1.7.25 - - - org.slf4j - slf4j-log4j12 - 1.7.25 - test - - - log4j - log4j - 1.2.17 - - - - org.hamcrest - hamcrest-core - 1.3 - test - - - org.mockito - mockito-scala_${scala.version} - 1.4.7 - - - hamcrest-core - org.hamcrest - - - test - - - junit - junit - 4.11 - - - hamcrest-core - org.hamcrest - - - test - - - - - - org.apache.thrift.tools - maven-thrift-plugin - 0.1.11 - - ${doris.thirdparty}/installed/bin/thrift - java:fullcamel - - - - thrift-sources - generate-sources - - compile - - - - - - net.alchim31.maven - scala-maven-plugin - 3.2.1 - - - scala-compile-first - process-resources - - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - - -feature - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.2.1 - - - - com.google.code.findbugs:* - org.slf4j:* - - - - - org.apache.arrow - org.apache.doris.shaded.org.apache.arrow - - - io.netty - org.apache.doris.shaded.io.netty - - - com.fasterxml.jackson - org.apache.doris.shaded.com.fasterxml.jackson - - - org.apache.commons.codec - org.apache.doris.shaded.org.apache.commons.codec - - - com.google.flatbuffers - org.apache.doris.shaded.com.google.flatbuffers - - - org.apache.thrift - org.apache.doris.shaded.org.apache.thrift - - - - - - package - - shade - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - 8 - 8 - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - true - 8 - false - - - - attach-javadocs - - jar - - - - - - org.apache.maven.plugins - maven-source-plugin - ${maven-source-plugin.version} - - true - - - - compile - - jar - - - - - - - diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/backend/BackendClient.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/backend/BackendClient.java deleted file mode 100644 index 9b8d955d69..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/backend/BackendClient.java +++ /dev/null @@ -1,220 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.backend; - -import org.apache.doris.flink.cfg.ConfigurationOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.exception.ConnectedFailedException; -import org.apache.doris.flink.exception.DorisException; -import org.apache.doris.flink.exception.DorisInternalException; -import org.apache.doris.flink.serialization.Routing; -import org.apache.doris.flink.util.ErrorMessages; -import org.apache.doris.thrift.TDorisExternalService; -import org.apache.doris.thrift.TScanBatchResult; -import org.apache.doris.thrift.TScanCloseParams; -import org.apache.doris.thrift.TScanCloseResult; -import org.apache.doris.thrift.TScanNextBatchParams; -import org.apache.doris.thrift.TScanOpenParams; -import org.apache.doris.thrift.TScanOpenResult; -import org.apache.doris.thrift.TStatusCode; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Client to request Doris BE - */ -public class BackendClient { - private static Logger logger = LoggerFactory.getLogger(BackendClient.class); - - private Routing routing; - - private TDorisExternalService.Client client; - private TTransport transport; - - private boolean isConnected = false; - private final int retries; - private final int socketTimeout; - private final int connectTimeout; - - public BackendClient(Routing routing, DorisReadOptions readOptions) throws ConnectedFailedException { - this.routing = routing; - this.connectTimeout = readOptions.getRequestConnectTimeoutMs() == null ? ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT : readOptions.getRequestConnectTimeoutMs(); - this.socketTimeout = readOptions.getRequestReadTimeoutMs() == null ? ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT : readOptions.getRequestReadTimeoutMs(); - this.retries = readOptions.getRequestRetries() == null ? ConfigurationOptions.DORIS_REQUEST_RETRIES_DEFAULT : readOptions.getRequestRetries(); - logger.trace("connect timeout set to '{}'. socket timeout set to '{}'. retries set to '{}'.", - this.connectTimeout, this.socketTimeout, this.retries); - open(); - } - - private void open() throws ConnectedFailedException { - logger.debug("Open client to Doris BE '{}'.", routing); - TException ex = null; - for (int attempt = 0; !isConnected && attempt < retries; ++attempt) { - logger.debug("Attempt {} to connect {}.", attempt, routing); - TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory(); - transport = new TSocket(routing.getHost(), routing.getPort(), socketTimeout, connectTimeout); - TProtocol protocol = factory.getProtocol(transport); - client = new TDorisExternalService.Client(protocol); - if (isConnected) { - logger.info("Success connect to {}.", routing); - return; - } - try { - logger.trace("Connect status before open transport to {} is '{}'.", routing, isConnected); - if (!transport.isOpen()) { - transport.open(); - isConnected = true; - } - } catch (TTransportException e) { - logger.warn(ErrorMessages.CONNECT_FAILED_MESSAGE, routing, e); - ex = e; - } - - } - if (!isConnected) { - logger.error(ErrorMessages.CONNECT_FAILED_MESSAGE, routing); - throw new ConnectedFailedException(routing.toString(), ex); - } - } - - private void close() { - logger.trace("Connect status before close with '{}' is '{}'.", routing, isConnected); - isConnected = false; - if ((transport != null) && transport.isOpen()) { - transport.close(); - logger.info("Closed a connection to {}.", routing); - } - if (null != client) { - client = null; - } - } - - /** - * Open a scanner for reading Doris data. - * - * @param openParams thrift struct to required by request - * @return scan open result - * @throws ConnectedFailedException throw if cannot connect to Doris BE - */ - public TScanOpenResult openScanner(TScanOpenParams openParams) throws ConnectedFailedException { - logger.debug("OpenScanner to '{}', parameter is '{}'.", routing, openParams); - if (!isConnected) { - open(); - } - TException ex = null; - for (int attempt = 0; attempt < retries; ++attempt) { - logger.debug("Attempt {} to openScanner {}.", attempt, routing); - try { - TScanOpenResult result = client.openScanner(openParams); - if (result == null) { - logger.warn("Open scanner result from {} is null.", routing); - continue; - } - if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) { - logger.warn("The status of open scanner result from {} is '{}', error message is: {}.", - routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs()); - continue; - } - return result; - } catch (TException e) { - logger.warn("Open scanner from {} failed.", routing, e); - ex = e; - } - } - logger.error(ErrorMessages.CONNECT_FAILED_MESSAGE, routing); - throw new ConnectedFailedException(routing.toString(), ex); - } - - /** - * get next row batch from Doris BE - * - * @param nextBatchParams thrift struct to required by request - * @return scan batch result - * @throws ConnectedFailedException throw if cannot connect to Doris BE - */ - public TScanBatchResult getNext(TScanNextBatchParams nextBatchParams) throws DorisException { - logger.debug("GetNext to '{}', parameter is '{}'.", routing, nextBatchParams); - if (!isConnected) { - open(); - } - TException ex = null; - TScanBatchResult result = null; - for (int attempt = 0; attempt < retries; ++attempt) { - logger.debug("Attempt {} to getNext {}.", attempt, routing); - try { - result = client.getNext(nextBatchParams); - if (result == null) { - logger.warn("GetNext result from {} is null.", routing); - continue; - } - if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) { - logger.warn("The status of get next result from {} is '{}', error message is: {}.", - routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs()); - continue; - } - return result; - } catch (TException e) { - logger.warn("Get next from {} failed.", routing, e); - ex = e; - } - } - if (result != null && (TStatusCode.OK != (result.getStatus().getStatusCode()))) { - logger.error(ErrorMessages.DORIS_INTERNAL_FAIL_MESSAGE, routing, result.getStatus().getStatusCode(), - result.getStatus().getErrorMsgs()); - throw new DorisInternalException(routing.toString(), result.getStatus().getStatusCode(), - result.getStatus().getErrorMsgs()); - } - logger.error(ErrorMessages.CONNECT_FAILED_MESSAGE, routing); - throw new ConnectedFailedException(routing.toString(), ex); - } - - /** - * close an scanner. - * - * @param closeParams thrift struct to required by request - */ - public void closeScanner(TScanCloseParams closeParams) { - logger.debug("CloseScanner to '{}', parameter is '{}'.", routing, closeParams); - for (int attempt = 0; attempt < retries; ++attempt) { - logger.debug("Attempt {} to closeScanner {}.", attempt, routing); - try { - TScanCloseResult result = client.closeScanner(closeParams); - if (result == null) { - logger.warn("CloseScanner result from {} is null.", routing); - continue; - } - if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) { - logger.warn("The status of get next result from {} is '{}', error message is: {}.", - routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs()); - continue; - } - break; - } catch (TException e) { - logger.warn("Close scanner from {} failed.", routing, e); - } - } - logger.info("CloseScanner to Doris BE '{}' success.", routing); - close(); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/ConfigurationOptions.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/ConfigurationOptions.java deleted file mode 100644 index 47d07b8d17..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/ConfigurationOptions.java +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.cfg; - -public interface ConfigurationOptions { - // doris fe node address - String DORIS_FENODES = "fenodes"; - - String DORIS_DEFAULT_CLUSTER = "default_cluster"; - - String TABLE_IDENTIFIER = "table.identifier"; - String DORIS_TABLE_IDENTIFIER = "doris.table.identifier"; - String DORIS_READ_FIELD = "doris.read.field"; - String DORIS_FILTER_QUERY = "doris.filter.query"; - String DORIS_FILTER_QUERY_IN_MAX_COUNT = "doris.filter.query.in.max.count"; - Integer DORIS_FILTER_QUERY_IN_VALUE_UPPER_LIMIT = 10000; - - String DORIS_USER = "username"; - String DORIS_PASSWORD = "password"; - - String DORIS_REQUEST_AUTH_USER = "doris.request.auth.user"; - String DORIS_REQUEST_AUTH_PASSWORD = "doris.request.auth.password"; - String DORIS_REQUEST_RETRIES = "doris.request.retries"; - String DORIS_REQUEST_CONNECT_TIMEOUT_MS = "doris.request.connect.timeout.ms"; - String DORIS_REQUEST_READ_TIMEOUT_MS = "doris.request.read.timeout.ms"; - String DORIS_REQUEST_QUERY_TIMEOUT_S = "doris.request.query.timeout.s"; - Integer DORIS_REQUEST_RETRIES_DEFAULT = 3; - Integer DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT = 30 * 1000; - Integer DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT = 30 * 1000; - Integer DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT = 3600; - - String DORIS_TABLET_SIZE = "doris.request.tablet.size"; - Integer DORIS_TABLET_SIZE_DEFAULT = Integer.MAX_VALUE; - Integer DORIS_TABLET_SIZE_MIN = 1; - - String DORIS_BATCH_SIZE = "doris.batch.size"; - Integer DORIS_BATCH_SIZE_DEFAULT = 1024; - - String DORIS_EXEC_MEM_LIMIT = "doris.exec.mem.limit"; - Long DORIS_EXEC_MEM_LIMIT_DEFAULT = 2147483648L; - - String DORIS_VALUE_READER_CLASS = "doris.value.reader.class"; - - String DORIS_DESERIALIZE_ARROW_ASYNC = "doris.deserialize.arrow.async"; - Boolean DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT = false; - - String DORIS_DESERIALIZE_QUEUE_SIZE = "doris.deserialize.queue.size"; - Integer DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT = 64; - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisConnectionOptions.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisConnectionOptions.java deleted file mode 100644 index 9b2187c904..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisConnectionOptions.java +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.cfg; - -import org.apache.flink.util.Preconditions; - -import java.io.Serializable; - -/** - * Doris connection options. - */ -public class DorisConnectionOptions implements Serializable { - - private static final long serialVersionUID = 1L; - - protected final String fenodes; - protected final String username; - protected final String password; - - public DorisConnectionOptions(String fenodes, String username, String password) { - this.fenodes = Preconditions.checkNotNull(fenodes, "fenodes is empty"); - this.username = username; - this.password = password; - } - - public String getFenodes() { - return fenodes; - } - - public String getUsername() { - return username; - } - - public String getPassword() { - return password; - } - - /** - * Builder for {@link DorisConnectionOptions}. - */ - public static class DorisConnectionOptionsBuilder { - private String fenodes; - private String username; - private String password; - - public DorisConnectionOptionsBuilder withFenodes(String fenodes) { - this.fenodes = fenodes; - return this; - } - - public DorisConnectionOptionsBuilder withUsername(String username) { - this.username = username; - return this; - } - - public DorisConnectionOptionsBuilder withPassword(String password) { - this.password = password; - return this; - } - - public DorisConnectionOptions build() { - return new DorisConnectionOptions(fenodes, username, password); - } - } - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisExecutionOptions.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisExecutionOptions.java deleted file mode 100644 index ad1ab07228..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisExecutionOptions.java +++ /dev/null @@ -1,128 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.cfg; - - -import org.apache.flink.util.Preconditions; - -import java.io.Serializable; -import java.util.Properties; - -/** - * JDBC sink batch options. - */ -public class DorisExecutionOptions implements Serializable { - - private static final long serialVersionUID = 1L; - public static final Integer DEFAULT_BATCH_SIZE = 10000; - public static final Integer DEFAULT_MAX_RETRY_TIMES = 1; - private static final Long DEFAULT_INTERVAL_MILLIS = 10000L; - - private final Integer batchSize; - private final Integer maxRetries; - private final Long batchIntervalMs; - - /** - * Properties for the StreamLoad. - */ - private final Properties streamLoadProp; - - private final Boolean enableDelete; - - - public DorisExecutionOptions(Integer batchSize, Integer maxRetries, Long batchIntervalMs, Properties streamLoadProp, Boolean enableDelete) { - Preconditions.checkArgument(maxRetries >= 0); - this.batchSize = batchSize; - this.maxRetries = maxRetries; - this.batchIntervalMs = batchIntervalMs; - this.streamLoadProp = streamLoadProp; - this.enableDelete = enableDelete; - } - - public static Builder builder() { - return new Builder(); - } - - public static DorisExecutionOptions defaults() { - Properties pro = new Properties(); - pro.setProperty("format", "json"); - pro.setProperty("strip_outer_array", "true"); - return new Builder().setStreamLoadProp(pro).build(); - } - - public Integer getBatchSize() { - return batchSize; - } - - public Integer getMaxRetries() { - return maxRetries; - } - - public Long getBatchIntervalMs() { - return batchIntervalMs; - } - - public Properties getStreamLoadProp() { - return streamLoadProp; - } - - public Boolean getEnableDelete() { - return enableDelete; - } - - /** - * Builder of {@link DorisExecutionOptions}. - */ - public static class Builder { - private Integer batchSize = DEFAULT_BATCH_SIZE; - private Integer maxRetries = DEFAULT_MAX_RETRY_TIMES; - private Long batchIntervalMs = DEFAULT_INTERVAL_MILLIS; - private Properties streamLoadProp = new Properties(); - private Boolean enableDelete = false; - - public Builder setBatchSize(Integer batchSize) { - this.batchSize = batchSize; - return this; - } - - public Builder setMaxRetries(Integer maxRetries) { - this.maxRetries = maxRetries; - return this; - } - - public Builder setBatchIntervalMs(Long batchIntervalMs) { - this.batchIntervalMs = batchIntervalMs; - return this; - } - - public Builder setStreamLoadProp(Properties streamLoadProp) { - this.streamLoadProp = streamLoadProp; - return this; - } - - public Builder setEnableDelete(Boolean enableDelete) { - this.enableDelete = enableDelete; - return this; - } - - public DorisExecutionOptions build() { - return new DorisExecutionOptions(batchSize, maxRetries, batchIntervalMs, streamLoadProp, enableDelete); - } - } - - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisOptions.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisOptions.java deleted file mode 100644 index 512d0ab456..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisOptions.java +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.cfg; - -import org.apache.doris.flink.util.IOUtils; - -import java.util.Properties; - -import static org.apache.flink.util.Preconditions.checkNotNull; - -/** - * Options for the Doris connector. - */ -public class DorisOptions extends DorisConnectionOptions { - - private static final long serialVersionUID = 1L; - - private String tableIdentifier; - - - public DorisOptions(String fenodes, String username, String password, String tableIdentifier) { - super(fenodes, username, password); - this.tableIdentifier = tableIdentifier; - } - - public String getTableIdentifier() { - return tableIdentifier; - } - - public String save() throws IllegalArgumentException { - Properties copy = new Properties(); - return IOUtils.propsToString(copy); - } - - public static Builder builder() { - return new Builder(); - } - - /** - * Builder of {@link DorisOptions}. - */ - public static class Builder { - private String fenodes; - private String username; - private String password; - private String tableIdentifier; - - /** - * required, tableIdentifier - */ - public Builder setTableIdentifier(String tableIdentifier) { - this.tableIdentifier = tableIdentifier; - return this; - } - - /** - * optional, user name. - */ - public Builder setUsername(String username) { - this.username = username; - return this; - } - - /** - * optional, password. - */ - public Builder setPassword(String password) { - this.password = password; - return this; - } - - /** - * required, JDBC DB url. - */ - public Builder setFenodes(String fenodes) { - this.fenodes = fenodes; - return this; - } - - - public DorisOptions build() { - checkNotNull(fenodes, "No fenodes supplied."); - checkNotNull(tableIdentifier, "No tableIdentifier supplied."); - return new DorisOptions(fenodes, username, password, tableIdentifier); - } - } - - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisReadOptions.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisReadOptions.java deleted file mode 100644 index 0beb18c612..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisReadOptions.java +++ /dev/null @@ -1,190 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.cfg; - - -import java.io.Serializable; - -/** - * Doris read Options - */ -public class DorisReadOptions implements Serializable { - - private static final long serialVersionUID = 1L; - - private String readFields; - private String filterQuery; - private Integer requestTabletSize; - private Integer requestConnectTimeoutMs; - private Integer requestReadTimeoutMs; - private Integer requestQueryTimeoutS; - private Integer requestRetries; - private Integer requestBatchSize; - private Long execMemLimit; - private Integer deserializeQueueSize; - private Boolean deserializeArrowAsync; - - public DorisReadOptions(String readFields, String filterQuery, Integer requestTabletSize, Integer requestConnectTimeoutMs, Integer requestReadTimeoutMs, - Integer requestQueryTimeoutS, Integer requestRetries, Integer requestBatchSize, Long execMemLimit, - Integer deserializeQueueSize, Boolean deserializeArrowAsync) { - this.readFields = readFields; - this.filterQuery = filterQuery; - this.requestTabletSize = requestTabletSize; - this.requestConnectTimeoutMs = requestConnectTimeoutMs; - this.requestReadTimeoutMs = requestReadTimeoutMs; - this.requestQueryTimeoutS = requestQueryTimeoutS; - this.requestRetries = requestRetries; - this.requestBatchSize = requestBatchSize; - this.execMemLimit = execMemLimit; - this.deserializeQueueSize = deserializeQueueSize; - this.deserializeArrowAsync = deserializeArrowAsync; - } - - public String getReadFields() { - return readFields; - } - - public String getFilterQuery() { - return filterQuery; - } - - public Integer getRequestTabletSize() { - return requestTabletSize; - } - - public Integer getRequestConnectTimeoutMs() { - return requestConnectTimeoutMs; - } - - public Integer getRequestReadTimeoutMs() { - return requestReadTimeoutMs; - } - - public Integer getRequestRetries() { - return requestRetries; - } - - public Integer getRequestBatchSize() { - return requestBatchSize; - } - - public Integer getRequestQueryTimeoutS() { - return requestQueryTimeoutS; - } - - public Long getExecMemLimit() { - return execMemLimit; - } - - public Integer getDeserializeQueueSize() { - return deserializeQueueSize; - } - - public Boolean getDeserializeArrowAsync() { - return deserializeArrowAsync; - } - - - public static Builder builder() { - return new Builder(); - } - - public static DorisReadOptions defaults(){ - return DorisReadOptions.builder().build(); - } - - /** - * Builder of {@link DorisReadOptions}. - */ - public static class Builder { - - private String readFields; - private String filterQuery; - private Integer requestTabletSize; - private Integer requestConnectTimeoutMs; - private Integer requestReadTimeoutMs; - private Integer requestQueryTimeoutS; - private Integer requestRetries; - private Integer requestBatchSize; - private Long execMemLimit; - private Integer deserializeQueueSize; - private Boolean deserializeArrowAsync; - - - public Builder setReadFields(String readFields) { - this.readFields = readFields; - return this; - } - - public Builder setFilterQuery(String filterQuery) { - this.filterQuery = filterQuery; - return this; - } - - public Builder setRequestTabletSize(Integer requestTabletSize) { - this.requestTabletSize = requestTabletSize; - return this; - } - - public Builder setRequestConnectTimeoutMs(Integer requestConnectTimeoutMs) { - this.requestConnectTimeoutMs = requestConnectTimeoutMs; - return this; - } - - public Builder setRequestReadTimeoutMs(Integer requestReadTimeoutMs) { - this.requestReadTimeoutMs = requestReadTimeoutMs; - return this; - } - - public Builder setRequestQueryTimeoutS(Integer requesQueryTimeoutS) { - this.requestQueryTimeoutS = requesQueryTimeoutS; - return this; - } - - public Builder setRequestRetries(Integer requestRetries) { - this.requestRetries = requestRetries; - return this; - } - - public Builder setRequestBatchSize(Integer requestBatchSize) { - this.requestBatchSize = requestBatchSize; - return this; - } - - public Builder setExecMemLimit(Long execMemLimit) { - this.execMemLimit = execMemLimit; - return this; - } - - public Builder setDeserializeQueueSize(Integer deserializeQueueSize) { - this.deserializeQueueSize = deserializeQueueSize; - return this; - } - - public Builder setDeserializeArrowAsync(Boolean deserializeArrowAsync) { - this.deserializeArrowAsync = deserializeArrowAsync; - return this; - } - - public DorisReadOptions build() { - return new DorisReadOptions(readFields, filterQuery, requestTabletSize, requestConnectTimeoutMs, requestReadTimeoutMs, requestQueryTimeoutS, requestRetries, requestBatchSize, execMemLimit, deserializeQueueSize, deserializeArrowAsync); - } - - } - - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisSink.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisSink.java deleted file mode 100644 index 2c3db4cf34..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisSink.java +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.cfg; - -import org.apache.doris.flink.table.DorisDynamicOutputFormat; -import org.apache.flink.streaming.api.functions.sink.SinkFunction; -import org.apache.flink.table.types.logical.LogicalType; - -/** Facade to create Doris {@link SinkFunction sinks}. */ -public class DorisSink { - - - private DorisSink() { - } - - - /** - * Create a Doris DataStream sink with the default {@link DorisReadOptions} - * stream elements could only be JsonString. - * - * @see #sink(String[], LogicalType[], DorisReadOptions, DorisExecutionOptions, DorisOptions) - */ - public static SinkFunction sink(DorisExecutionOptions executionOptions, DorisOptions dorisOptions) { - - return sink(new String[]{}, new LogicalType[]{}, DorisReadOptions.defaults(), executionOptions, dorisOptions); - } - - /** - * Create a Doris DataStream sink with the default {@link DorisReadOptions} - * stream elements could only be RowData. - * - * @see #sink(String[], LogicalType[], DorisReadOptions, DorisExecutionOptions, DorisOptions) - */ - public static SinkFunction sink(String[] fiels, LogicalType[] types, - DorisExecutionOptions executionOptions, DorisOptions dorisOptions) { - - return sink(fiels, types, DorisReadOptions.defaults(), executionOptions, dorisOptions); - } - - /** - * Create a Doris DataStream sink with the default {@link DorisExecutionOptions} - * stream elements could only be JsonString. - * - * @see #sink(String[], LogicalType[], DorisReadOptions, DorisExecutionOptions, DorisOptions) - */ - public static SinkFunction sink(DorisOptions dorisOptions) { - - return sink(new String[]{}, new LogicalType[]{}, DorisReadOptions.defaults(), - DorisExecutionOptions.defaults(), dorisOptions); - } - - /** - * Create a Doris DataStream sink with the default {@link DorisExecutionOptions} - * stream elements could only be RowData. - * - * @see #sink(String[], LogicalType[], DorisReadOptions, DorisExecutionOptions, DorisOptions) - */ - public static SinkFunction sink(String[] fiels, LogicalType[] types, DorisOptions dorisOptions) { - return sink(fiels, types, DorisReadOptions.defaults(), DorisExecutionOptions.defaults(), dorisOptions); - } - - - /** - * Create a Doris DataStream sink, stream elements could only be JsonString. - * - * @see #sink(String[], LogicalType[], DorisReadOptions, DorisExecutionOptions, DorisOptions) - */ - public static SinkFunction sink(DorisReadOptions readOptions, - DorisExecutionOptions executionOptions, DorisOptions dorisOptions) { - - return sink(new String[]{}, new LogicalType[]{}, readOptions, executionOptions, dorisOptions); - } - - - /** - * Create a Doris DataStream sink, stream elements could only be RowData. - * - *

Note: the objects passed to the return sink can be processed in batch and retried. - * Therefore, objects can not be {@link org.apache.flink.api.common.ExecutionConfig#enableObjectReuse() reused}. - *

- * - * @param field array of field - * @param types types of field - * @param readOptions parameters of read, such as readFields, filterQuery - * @param executionOptions parameters of execution, such as batch size and maximum retries - * @param dorisOptions parameters of options, such as fenodes, username, password, tableIdentifier - * @param type of data in {@link org.apache.flink.streaming.runtime.streamrecord.StreamRecord - * StreamRecord}. - */ - public static SinkFunction sink(String[] field, LogicalType[] types, DorisReadOptions readOptions, - DorisExecutionOptions executionOptions, DorisOptions dorisOptions) { - - return new GenericDorisSinkFunction(new DorisDynamicOutputFormat( - dorisOptions, readOptions, executionOptions, types, field)); - } - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisStreamOptions.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisStreamOptions.java deleted file mode 100644 index c5c2c162d0..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/DorisStreamOptions.java +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.cfg; - - -import java.io.Serializable; -import java.util.Properties; - - -/** - * Options for the Doris stream connector. - */ -public class DorisStreamOptions implements Serializable { - private static final long serialVersionUID = 1L; - - private Properties prop; - private DorisOptions options; - private DorisReadOptions readOptions; - - public DorisStreamOptions(Properties prop) { - this.prop = prop; - init(); - } - - /** - * convert DorisStreamOptions to DorisOptions and DorisReadOptions - */ - private void init() { - DorisOptions.Builder optionsBuilder = DorisOptions.builder() - .setFenodes(prop.getProperty(ConfigurationOptions.DORIS_FENODES)) - .setUsername(prop.getProperty(ConfigurationOptions.DORIS_USER)) - .setPassword(prop.getProperty(ConfigurationOptions.DORIS_PASSWORD)) - .setTableIdentifier(prop.getProperty(ConfigurationOptions.TABLE_IDENTIFIER)); - - DorisReadOptions.Builder readOptionsBuilder = DorisReadOptions.builder() - .setDeserializeArrowAsync(Boolean.valueOf(prop.getProperty(ConfigurationOptions.DORIS_DESERIALIZE_ARROW_ASYNC, ConfigurationOptions.DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT.toString()))) - .setDeserializeQueueSize(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_DESERIALIZE_QUEUE_SIZE, ConfigurationOptions.DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT.toString()))) - .setExecMemLimit(Long.valueOf(prop.getProperty(ConfigurationOptions.DORIS_EXEC_MEM_LIMIT, ConfigurationOptions.DORIS_EXEC_MEM_LIMIT_DEFAULT.toString()))) - .setFilterQuery(prop.getProperty(ConfigurationOptions.DORIS_FILTER_QUERY)) - .setReadFields(prop.getProperty(ConfigurationOptions.DORIS_READ_FIELD)) - .setRequestQueryTimeoutS(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_REQUEST_QUERY_TIMEOUT_S, ConfigurationOptions.DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT.toString()))) - .setRequestBatchSize(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_BATCH_SIZE, ConfigurationOptions.DORIS_BATCH_SIZE_DEFAULT.toString()))) - .setRequestConnectTimeoutMs(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS, ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT.toString()))) - .setRequestReadTimeoutMs(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS, ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT.toString()))) - .setRequestRetries(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_REQUEST_RETRIES, ConfigurationOptions.DORIS_REQUEST_RETRIES_DEFAULT.toString()))) - .setRequestTabletSize(Integer.valueOf(prop.getProperty(ConfigurationOptions.DORIS_TABLET_SIZE, ConfigurationOptions.DORIS_TABLET_SIZE_DEFAULT.toString()))); - - this.options = optionsBuilder.build(); - this.readOptions = readOptionsBuilder.build(); - - } - - public DorisOptions getOptions() { - return options; - } - - public DorisReadOptions getReadOptions() { - return readOptions; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/GenericDorisSinkFunction.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/GenericDorisSinkFunction.java deleted file mode 100644 index 6be6aa4ed6..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/cfg/GenericDorisSinkFunction.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.cfg; - -import org.apache.doris.flink.table.DorisDynamicOutputFormat; -import org.apache.flink.api.common.functions.RuntimeContext; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.state.FunctionInitializationContext; -import org.apache.flink.runtime.state.FunctionSnapshotContext; -import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; -import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; -import org.apache.flink.util.Preconditions; - -import javax.annotation.Nonnull; - -public class GenericDorisSinkFunction extends RichSinkFunction - implements CheckpointedFunction { - - private final DorisDynamicOutputFormat outputFormat; - - public GenericDorisSinkFunction(@Nonnull DorisDynamicOutputFormat outputFormat) { - this.outputFormat = Preconditions.checkNotNull(outputFormat); - } - - @Override - public void open(Configuration parameters) throws Exception { - super.open(parameters); - RuntimeContext ctx = getRuntimeContext(); - outputFormat.setRuntimeContext(ctx); - outputFormat.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks()); - } - - - @Override - public void invoke(T value, Context context) throws Exception { - outputFormat.writeRecord(value); - } - - @Override - public void initializeState(FunctionInitializationContext context) throws Exception { - - } - - @Override - public void snapshotState(FunctionSnapshotContext context) throws Exception { - - } - - @Override - public void close() throws Exception { - outputFormat.close(); - super.close(); - } - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/datastream/DorisSourceFunction.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/datastream/DorisSourceFunction.java deleted file mode 100644 index edde9534b4..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/datastream/DorisSourceFunction.java +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.datastream; - -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.cfg.DorisStreamOptions; -import org.apache.doris.flink.deserialization.DorisDeserializationSchema; -import org.apache.doris.flink.exception.DorisException; -import org.apache.doris.flink.rest.PartitionDefinition; -import org.apache.doris.flink.rest.RestService; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; -import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; - - -/** - * DorisSource - **/ - -public class DorisSourceFunction extends RichParallelSourceFunction> implements ResultTypeQueryable> { - - private static final Logger logger = LoggerFactory.getLogger(DorisSourceFunction.class); - - private final DorisDeserializationSchema> deserializer; - private final DorisOptions options; - private final DorisReadOptions readOptions; - private transient volatile boolean isRunning; - private List dorisPartitions; - private List taskDorisPartitions = Lists.newArrayList(); - - public DorisSourceFunction(DorisStreamOptions streamOptions, DorisDeserializationSchema> deserializer) { - this.deserializer = deserializer; - this.options = streamOptions.getOptions(); - this.readOptions = streamOptions.getReadOptions(); - try { - this.dorisPartitions = RestService.findPartitions(options, readOptions, logger); - logger.info("Doris partitions size {}", dorisPartitions.size()); - } catch (DorisException e) { - throw new RuntimeException("Failed fetch doris partitions"); - } - } - - @Override - public void open(Configuration parameters) throws Exception { - super.open(parameters); - this.isRunning = true; - assignTaskPartitions(); - } - - /** - * Assign patitions to each task. - */ - private void assignTaskPartitions() { - int taskIndex = getRuntimeContext().getIndexOfThisSubtask(); - int totalTasks = getRuntimeContext().getNumberOfParallelSubtasks(); - - for (int i = 0; i < dorisPartitions.size(); i++) { - if (i % totalTasks == taskIndex) { - taskDorisPartitions.add(dorisPartitions.get(i)); - } - } - logger.info("subtask {} process {} partitions ", taskIndex, taskDorisPartitions.size()); - } - - @Override - public void run(SourceContext> sourceContext) { - for (PartitionDefinition partitions : taskDorisPartitions) { - try (ScalaValueReader scalaValueReader = new ScalaValueReader(partitions, options, readOptions)) { - while (isRunning && scalaValueReader.hasNext()) { - List next = scalaValueReader.next(); - sourceContext.collect(next); - } - } - } - } - - @Override - public void cancel() { - isRunning = false; - } - - @Override - public void close() throws Exception { - super.close(); - isRunning = false; - } - - @Override - public TypeInformation> getProducedType() { - return this.deserializer.getProducedType(); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/deserialization/DorisDeserializationSchema.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/deserialization/DorisDeserializationSchema.java deleted file mode 100644 index ba0921b9aa..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/deserialization/DorisDeserializationSchema.java +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.deserialization; - -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; - -import java.io.Serializable; - - -public interface DorisDeserializationSchema extends Serializable, ResultTypeQueryable { -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/deserialization/SimpleListDeserializationSchema.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/deserialization/SimpleListDeserializationSchema.java deleted file mode 100644 index d9ec6e5eae..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/deserialization/SimpleListDeserializationSchema.java +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.deserialization; - - -import org.apache.flink.api.common.typeinfo.TypeHint; -import org.apache.flink.api.common.typeinfo.TypeInformation; - -import java.util.List; - - -public class SimpleListDeserializationSchema implements DorisDeserializationSchema> { - - @Override - public TypeInformation> getProducedType() { - return TypeInformation.of(new TypeHint>() { - }); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/ConnectedFailedException.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/ConnectedFailedException.java deleted file mode 100644 index e25d1a592a..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/ConnectedFailedException.java +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.exception; - -public class ConnectedFailedException extends DorisException { - public ConnectedFailedException(String server, Throwable cause) { - super("Connect to " + server + "failed.", cause); - } - - public ConnectedFailedException(String server, int statusCode, Throwable cause) { - super("Connect to " + server + "failed, status code is " + statusCode + ".", cause); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/DorisException.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/DorisException.java deleted file mode 100644 index 2274f87121..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/DorisException.java +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.exception; - -public class DorisException extends Exception { - public DorisException() { - super(); - } - - public DorisException(String message) { - super(message); - } - - public DorisException(String message, Throwable cause) { - super(message, cause); - } - - public DorisException(Throwable cause) { - super(cause); - } - - protected DorisException(String message, Throwable cause, - boolean enableSuppression, - boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/DorisInternalException.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/DorisInternalException.java deleted file mode 100644 index eadd860de5..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/DorisInternalException.java +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.exception; - -import org.apache.doris.thrift.TStatusCode; - -import java.util.List; - -public class DorisInternalException extends DorisException { - public DorisInternalException(String server, TStatusCode statusCode, List errorMsgs) { - super("Doris server " + server + " internal failed, status code [" + statusCode + "] error message is " + errorMsgs); - } - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/IllegalArgumentException.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/IllegalArgumentException.java deleted file mode 100644 index 4c0ae0939a..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/IllegalArgumentException.java +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.exception; - -public class IllegalArgumentException extends DorisException { - public IllegalArgumentException(String msg, Throwable cause) { - super(msg, cause); - } - - public IllegalArgumentException(String arg, String value) { - super("argument '" + arg + "' is illegal, value is '" + value + "'."); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/ShouldNeverHappenException.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/ShouldNeverHappenException.java deleted file mode 100644 index a26718d657..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/ShouldNeverHappenException.java +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.exception; - -public class ShouldNeverHappenException extends DorisException { -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/StreamLoadException.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/StreamLoadException.java deleted file mode 100644 index 233d27e167..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/exception/StreamLoadException.java +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.exception; - -public class StreamLoadException extends Exception { - public StreamLoadException() { - super(); - } - - public StreamLoadException(String message) { - super(message); - } - - public StreamLoadException(String message, Throwable cause) { - super(message, cause); - } - - public StreamLoadException(Throwable cause) { - super(cause); - } - - protected StreamLoadException(String message, Throwable cause, - boolean enableSuppression, - boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/PartitionDefinition.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/PartitionDefinition.java deleted file mode 100644 index 8a66f76dfc..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/PartitionDefinition.java +++ /dev/null @@ -1,150 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest; - -import org.apache.doris.flink.cfg.DorisOptions; - -import java.io.Serializable; -import java.util.Collections; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; - -/** - * Doris partition info. - */ -public class PartitionDefinition implements Serializable, Comparable { - private final String database; - private final String table; - - private final String beAddress; - private final Set tabletIds; - private final String queryPlan; - private final String serializedSettings; - - public PartitionDefinition(String database, String table, - DorisOptions settings, String beAddress, Set tabletIds, String queryPlan) - throws IllegalArgumentException { - if (settings != null) { - this.serializedSettings = settings.save(); - } else { - this.serializedSettings = null; - } - this.database = database; - this.table = table; - this.beAddress = beAddress; - this.tabletIds = tabletIds; - this.queryPlan = queryPlan; - } - - public String getBeAddress() { - return beAddress; - } - - public Set getTabletIds() { - return tabletIds; - } - - public String getDatabase() { - return database; - } - - public String getTable() { - return table; - } - - public String getQueryPlan() { - return queryPlan; - } - - - @Override - public int compareTo(PartitionDefinition o) { - int cmp = database.compareTo(o.database); - if (cmp != 0) { - return cmp; - } - cmp = table.compareTo(o.table); - if (cmp != 0) { - return cmp; - } - cmp = beAddress.compareTo(o.beAddress); - if (cmp != 0) { - return cmp; - } - cmp = queryPlan.compareTo(o.queryPlan); - if (cmp != 0) { - return cmp; - } - - cmp = tabletIds.size() - o.tabletIds.size(); - if (cmp != 0) { - return cmp; - } - - Set similar = new HashSet<>(tabletIds); - Set diffSelf = new HashSet<>(tabletIds); - Set diffOther = new HashSet<>(o.tabletIds); - similar.retainAll(o.tabletIds); - diffSelf.removeAll(similar); - diffOther.removeAll(similar); - if (diffSelf.size() == 0) { - return 0; - } - long diff = Collections.min(diffSelf) - Collections.min(diffOther); - return diff < 0 ? -1 : 1; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - PartitionDefinition that = (PartitionDefinition) o; - return Objects.equals(database, that.database) && - Objects.equals(table, that.table) && - Objects.equals(beAddress, that.beAddress) && - Objects.equals(tabletIds, that.tabletIds) && - Objects.equals(queryPlan, that.queryPlan) && - Objects.equals(serializedSettings, that.serializedSettings); - } - - @Override - public int hashCode() { - int result = database.hashCode(); - result = 31 * result + table.hashCode(); - result = 31 * result + beAddress.hashCode(); - result = 31 * result + queryPlan.hashCode(); - result = 31 * result + tabletIds.hashCode(); - return result; - } - - @Override - public String toString() { - return "PartitionDefinition{" + - ", database='" + database + '\'' + - ", table='" + table + '\'' + - ", beAddress='" + beAddress + '\'' + - ", tabletIds=" + tabletIds + - ", queryPlan='" + queryPlan + '\'' + - '}'; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/RestService.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/RestService.java deleted file mode 100644 index 0c4264f201..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/RestService.java +++ /dev/null @@ -1,641 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import org.apache.commons.io.IOUtils; -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.exception.IllegalArgumentException; -import org.apache.commons.lang3.StringUtils; -import org.apache.doris.flink.cfg.ConfigurationOptions; -import org.apache.doris.flink.exception.ConnectedFailedException; -import org.apache.doris.flink.exception.DorisException; -import org.apache.doris.flink.exception.ShouldNeverHappenException; -import org.apache.doris.flink.rest.models.Backend; -import org.apache.doris.flink.rest.models.BackendRow; -import org.apache.doris.flink.rest.models.BackendV2; -import org.apache.doris.flink.rest.models.QueryPlan; -import org.apache.doris.flink.rest.models.Schema; -import org.apache.doris.flink.rest.models.Tablet; -import org.apache.flink.shaded.guava18.com.google.common.annotations.VisibleForTesting; -import org.apache.http.HttpStatus; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.methods.HttpRequestBase; -import org.apache.http.entity.StringEntity; - -import org.slf4j.Logger; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.io.Serializable; -import java.net.HttpURLConnection; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_TABLET_SIZE; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_MIN; -import static org.apache.doris.flink.util.ErrorMessages.CONNECT_FAILED_MESSAGE; -import static org.apache.doris.flink.util.ErrorMessages.ILLEGAL_ARGUMENT_MESSAGE; -import static org.apache.doris.flink.util.ErrorMessages.SHOULD_NOT_HAPPEN_MESSAGE; - - -/** - * Service for communicate with Doris FE. - */ -public class RestService implements Serializable { - public final static int REST_RESPONSE_STATUS_OK = 200; - public final static int REST_RESPONSE_CODE_OK = 0; - private final static String REST_RESPONSE_BE_ROWS_KEY = "rows"; - private static final String API_PREFIX = "/api"; - private static final String SCHEMA = "_schema"; - private static final String QUERY_PLAN = "_query_plan"; - @Deprecated - private static final String BACKENDS = "/rest/v1/system?path=//backends"; - private static final String BACKENDS_V2 = "/api/backends?is_alive=true"; - private static final String FE_LOGIN = "/rest/v1/login"; - - /** - * send request to Doris FE and get response json string. - * - * @param options configuration of request - * @param request {@link HttpRequestBase} real request - * @param logger {@link Logger} - * @return Doris FE response in json string - * @throws ConnectedFailedException throw when cannot connect to Doris FE - */ - private static String send(DorisOptions options, DorisReadOptions readOptions, HttpRequestBase request, Logger logger) throws - ConnectedFailedException { - int connectTimeout = readOptions.getRequestConnectTimeoutMs() == null ? ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT : readOptions.getRequestConnectTimeoutMs(); - int socketTimeout = readOptions.getRequestReadTimeoutMs() == null ? ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT : readOptions.getRequestReadTimeoutMs(); - int retries = readOptions.getRequestRetries() == null ? ConfigurationOptions.DORIS_REQUEST_RETRIES_DEFAULT : readOptions.getRequestRetries(); - logger.trace("connect timeout set to '{}'. socket timeout set to '{}'. retries set to '{}'.", - connectTimeout, socketTimeout, retries); - - RequestConfig requestConfig = RequestConfig.custom() - .setConnectTimeout(connectTimeout) - .setSocketTimeout(socketTimeout) - .build(); - - request.setConfig(requestConfig); - logger.info("Send request to Doris FE '{}' with user '{}'.", request.getURI(), options.getUsername()); - IOException ex = null; - int statusCode = -1; - - for (int attempt = 0; attempt < retries; attempt++) { - logger.debug("Attempt {} to request {}.", attempt, request.getURI()); - try { - String response; - if (request instanceof HttpGet) { - response = getConnectionGet(request.getURI().toString(), options.getUsername(), options.getPassword(), logger); - } else { - response = getConnectionPost(request, options.getUsername(), options.getPassword(), logger); - } - if (response == null) { - logger.warn("Failed to get response from Doris FE {}, http code is {}", - request.getURI(), statusCode); - continue; - } - logger.trace("Success get response from Doris FE: {}, response is: {}.", - request.getURI(), response); - //Handle the problem of inconsistent data format returned by http v1 and v2 - ObjectMapper mapper = new ObjectMapper(); - Map map = mapper.readValue(response, Map.class); - if (map.containsKey("code") && map.containsKey("msg")) { - Object data = map.get("data"); - return mapper.writeValueAsString(data); - } else { - return response; - } - } catch (IOException e) { - ex = e; - logger.warn(CONNECT_FAILED_MESSAGE, request.getURI(), e); - } - } - - logger.error(CONNECT_FAILED_MESSAGE, request.getURI(), ex); - throw new ConnectedFailedException(request.getURI().toString(), statusCode, ex); - } - - private static String getConnectionPost(HttpRequestBase request, String user, String passwd, Logger logger) throws IOException { - URL url = new URL(request.getURI().toString()); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setInstanceFollowRedirects(false); - conn.setRequestMethod(request.getMethod()); - String authEncoding = Base64.getEncoder().encodeToString(String.format("%s:%s", user, passwd).getBytes(StandardCharsets.UTF_8)); - conn.setRequestProperty("Authorization", "Basic " + authEncoding); - InputStream content = ((HttpPost) request).getEntity().getContent(); - String res = IOUtils.toString(content); - conn.setDoOutput(true); - conn.setDoInput(true); - PrintWriter out = new PrintWriter(conn.getOutputStream()); - // send request params - out.print(res); - // flush - out.flush(); - // read response - return parseResponse(conn, logger); - } - - private static String getConnectionGet(String request, String user, String passwd, Logger logger) throws IOException { - URL realUrl = new URL(request); - // open connection - HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection(); - String authEncoding = Base64.getEncoder().encodeToString(String.format("%s:%s", user, passwd).getBytes(StandardCharsets.UTF_8)); - connection.setRequestProperty("Authorization", "Basic " + authEncoding); - - connection.connect(); - return parseResponse(connection, logger); - } - - private static String parseResponse(HttpURLConnection connection, Logger logger) throws IOException { - if (connection.getResponseCode() != HttpStatus.SC_OK) { - logger.warn("Failed to get response from Doris {}, http code is {}", - connection.getURL(), connection.getResponseCode()); - throw new IOException("Failed to get response from Doris"); - } - String result = ""; - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); - String line; - while ((line = in.readLine()) != null) { - result += line; - } - if (in != null) { - in.close(); - } - return result; - } - - /** - * parse table identifier to array. - * - * @param tableIdentifier table identifier string - * @param logger {@link Logger} - * @return first element is db name, second element is table name - * @throws IllegalArgumentException table identifier is illegal - */ - @VisibleForTesting - static String[] parseIdentifier(String tableIdentifier, Logger logger) throws IllegalArgumentException { - logger.trace("Parse identifier '{}'.", tableIdentifier); - if (StringUtils.isEmpty(tableIdentifier)) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "table.identifier", tableIdentifier); - throw new IllegalArgumentException("table.identifier", tableIdentifier); - } - String[] identifier = tableIdentifier.split("\\."); - if (identifier.length != 2) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "table.identifier", tableIdentifier); - throw new IllegalArgumentException("table.identifier", tableIdentifier); - } - return identifier; - } - - /** - * choice a Doris FE node to request. - * - * @param feNodes Doris FE node list, separate be comma - * @param logger slf4j logger - * @return the chosen one Doris FE node - * @throws IllegalArgumentException fe nodes is illegal - */ - @VisibleForTesting - static String randomEndpoint(String feNodes, Logger logger) throws IllegalArgumentException { - logger.trace("Parse fenodes '{}'.", feNodes); - if (StringUtils.isEmpty(feNodes)) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "fenodes", feNodes); - throw new IllegalArgumentException("fenodes", feNodes); - } - List nodes = Arrays.asList(feNodes.split(",")); - Collections.shuffle(nodes); - return nodes.get(0).trim(); - } - - /** - * choice a Doris BE node to request. - * - * @param options configuration of request - * @param logger slf4j logger - * @return the chosen one Doris BE node - * @throws IllegalArgumentException BE nodes is illegal - */ - @VisibleForTesting - public static String randomBackend(DorisOptions options, DorisReadOptions readOptions, Logger logger) throws DorisException, IOException { - List backends = getBackendsV2(options, readOptions, logger); - logger.trace("Parse beNodes '{}'.", backends); - if (backends == null || backends.isEmpty()) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "beNodes", backends); - throw new IllegalArgumentException("beNodes", String.valueOf(backends)); - } - Collections.shuffle(backends); - BackendV2.BackendRowV2 backend = backends.get(0); - return backend.getIp() + ":" + backend.getHttpPort(); - } - - /** - * get Doris BE nodes to request. - * - * @param options configuration of request - * @param logger slf4j logger - * @return the chosen one Doris BE node - * @throws IllegalArgumentException BE nodes is illegal - * - * This method is deprecated. Because it needs ADMIN_PRIV to get backends, which is not suitable for common users. - * Use getBackendsV2 instead - */ - @Deprecated - @VisibleForTesting - static List getBackends(DorisOptions options, DorisReadOptions readOptions, Logger logger) throws DorisException, IOException { - String feNodes = options.getFenodes(); - String feNode = randomEndpoint(feNodes, logger); - String beUrl = "http://" + feNode + BACKENDS; - HttpGet httpGet = new HttpGet(beUrl); - String response = send(options, readOptions, httpGet, logger); - logger.info("Backend Info:{}", response); - List backends = parseBackend(response, logger); - return backends; - } - - @Deprecated - static List parseBackend(String response, Logger logger) throws DorisException, IOException { - ObjectMapper mapper = new ObjectMapper(); - Backend backend; - try { - backend = mapper.readValue(response, Backend.class); - } catch (JsonParseException e) { - String errMsg = "Doris BE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (JsonMappingException e) { - String errMsg = "Doris BE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris BE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (backend == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - List backendRows = backend.getRows().stream().filter(v -> v.getAlive()).collect(Collectors.toList()); - logger.debug("Parsing schema result is '{}'.", backendRows); - return backendRows; - } - - /** - * get Doris BE nodes to request. - * - * @param options configuration of request - * @param logger slf4j logger - * @return the chosen one Doris BE node - * @throws IllegalArgumentException BE nodes is illegal - */ - @VisibleForTesting - static List getBackendsV2(DorisOptions options, DorisReadOptions readOptions, Logger logger) throws DorisException, IOException { - String feNodes = options.getFenodes(); - String feNode = randomEndpoint(feNodes, logger); - String beUrl = "http://" + feNode + BACKENDS_V2; - HttpGet httpGet = new HttpGet(beUrl); - String response = send(options, readOptions, httpGet, logger); - logger.info("Backend Info:{}", response); - List backends = parseBackendV2(response, logger); - return backends; - } - - static List parseBackendV2(String response, Logger logger) throws DorisException, IOException { - ObjectMapper mapper = new ObjectMapper(); - BackendV2 backend; - try { - backend = mapper.readValue(response, BackendV2.class); - } catch (JsonParseException e) { - String errMsg = "Doris BE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (JsonMappingException e) { - String errMsg = "Doris BE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris BE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (backend == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - List backendRows = backend.getBackends(); - logger.debug("Parsing schema result is '{}'.", backendRows); - return backendRows; - } - - /** - * get a valid URI to connect Doris FE. - * - * @param options configuration of request - * @param logger {@link Logger} - * @return uri string - * @throws IllegalArgumentException throw when configuration is illegal - */ - @VisibleForTesting - static String getUriStr(DorisOptions options, Logger logger) throws IllegalArgumentException { - String[] identifier = parseIdentifier(options.getTableIdentifier(), logger); - return "http://" + - randomEndpoint(options.getFenodes(), logger) + API_PREFIX + - "/" + identifier[0] + - "/" + identifier[1] + - "/"; - } - - /** - * discover Doris table schema from Doris FE. - * - * @param options configuration of request - * @param logger slf4j logger - * @return Doris table schema - * @throws DorisException throw when discover failed - */ - public static Schema getSchema(DorisOptions options, DorisReadOptions readOptions, Logger logger) - throws DorisException { - logger.trace("Finding schema."); - HttpGet httpGet = new HttpGet(getUriStr(options, logger) + SCHEMA); - String response = send(options, readOptions, httpGet, logger); - logger.debug("Find schema response is '{}'.", response); - return parseSchema(response, logger); - } - - /** - * translate Doris FE response to inner {@link Schema} struct. - * - * @param response Doris FE response - * @param logger {@link Logger} - * @return inner {@link Schema} struct - * @throws DorisException throw when translate failed - */ - @VisibleForTesting - public static Schema parseSchema(String response, Logger logger) throws DorisException { - logger.trace("Parse response '{}' to schema.", response); - ObjectMapper mapper = new ObjectMapper(); - Schema schema; - try { - schema = mapper.readValue(response, Schema.class); - } catch (JsonParseException e) { - String errMsg = "Doris FE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (JsonMappingException e) { - String errMsg = "Doris FE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris FE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (schema == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - - if (schema.getStatus() != REST_RESPONSE_STATUS_OK) { - String errMsg = "Doris FE's response is not OK, status is " + schema.getStatus(); - logger.error(errMsg); - throw new DorisException(errMsg); - } - logger.debug("Parsing schema result is '{}'.", schema); - return schema; - } - - /** - * find Doris partitions from Doris FE. - * - * @param options configuration of request - * @param logger {@link Logger} - * @return an list of Doris partitions - * @throws DorisException throw when find partition failed - */ - public static List findPartitions(DorisOptions options, DorisReadOptions readOptions, Logger logger) throws DorisException { - String[] tableIdentifiers = parseIdentifier(options.getTableIdentifier(), logger); - String readFields = StringUtils.isBlank(readOptions.getReadFields()) ? "*" : readOptions.getReadFields(); - String sql = "select " + readFields + - " from `" + tableIdentifiers[0] + "`.`" + tableIdentifiers[1] + "`"; - if (!StringUtils.isEmpty(readOptions.getFilterQuery())) { - sql += " where " + readOptions.getFilterQuery(); - } - logger.debug("Query SQL Sending to Doris FE is: '{}'.", sql); - - HttpPost httpPost = new HttpPost(getUriStr(options, logger) + QUERY_PLAN); - String entity = "{\"sql\": \"" + sql + "\"}"; - logger.debug("Post body Sending to Doris FE is: '{}'.", entity); - StringEntity stringEntity = new StringEntity(entity, StandardCharsets.UTF_8); - stringEntity.setContentEncoding("UTF-8"); - stringEntity.setContentType("application/json"); - httpPost.setEntity(stringEntity); - - String resStr = send(options, readOptions, httpPost, logger); - logger.debug("Find partition response is '{}'.", resStr); - QueryPlan queryPlan = getQueryPlan(resStr, logger); - Map> be2Tablets = selectBeForTablet(queryPlan, logger); - return tabletsMapToPartition( - options, - readOptions, - be2Tablets, - queryPlan.getOpaqued_query_plan(), - tableIdentifiers[0], - tableIdentifiers[1], - logger); - } - - /** - * translate Doris FE response string to inner {@link QueryPlan} struct. - * - * @param response Doris FE response string - * @param logger {@link Logger} - * @return inner {@link QueryPlan} struct - * @throws DorisException throw when translate failed. - */ - @VisibleForTesting - static QueryPlan getQueryPlan(String response, Logger logger) throws DorisException { - ObjectMapper mapper = new ObjectMapper(); - QueryPlan queryPlan; - try { - queryPlan = mapper.readValue(response, QueryPlan.class); - } catch (JsonParseException e) { - String errMsg = "Doris FE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (JsonMappingException e) { - String errMsg = "Doris FE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris FE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (queryPlan == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - - if (queryPlan.getStatus() != REST_RESPONSE_STATUS_OK) { - String errMsg = "Doris FE's response is not OK, status is " + queryPlan.getStatus(); - logger.error(errMsg); - throw new DorisException(errMsg); - } - logger.debug("Parsing partition result is '{}'.", queryPlan); - return queryPlan; - } - - /** - * select which Doris BE to get tablet data. - * - * @param queryPlan {@link QueryPlan} translated from Doris FE response - * @param logger {@link Logger} - * @return BE to tablets {@link Map} - * @throws DorisException throw when select failed. - */ - @VisibleForTesting - static Map> selectBeForTablet(QueryPlan queryPlan, Logger logger) throws DorisException { - Map> be2Tablets = new HashMap<>(); - for (Map.Entry part : queryPlan.getPartitions().entrySet()) { - logger.debug("Parse tablet info: '{}'.", part); - long tabletId; - try { - tabletId = Long.parseLong(part.getKey()); - } catch (NumberFormatException e) { - String errMsg = "Parse tablet id '" + part.getKey() + "' to long failed."; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - String target = null; - int tabletCount = Integer.MAX_VALUE; - for (String candidate : part.getValue().getRoutings()) { - logger.trace("Evaluate Doris BE '{}' to tablet '{}'.", candidate, tabletId); - if (!be2Tablets.containsKey(candidate)) { - logger.debug("Choice a new Doris BE '{}' for tablet '{}'.", candidate, tabletId); - List tablets = new ArrayList<>(); - be2Tablets.put(candidate, tablets); - target = candidate; - break; - } else { - if (be2Tablets.get(candidate).size() < tabletCount) { - target = candidate; - tabletCount = be2Tablets.get(candidate).size(); - logger.debug("Current candidate Doris BE to tablet '{}' is '{}' with tablet count {}.", - tabletId, target, tabletCount); - } - } - } - if (target == null) { - String errMsg = "Cannot choice Doris BE for tablet " + tabletId; - logger.error(errMsg); - throw new DorisException(errMsg); - } - - logger.debug("Choice Doris BE '{}' for tablet '{}'.", target, tabletId); - be2Tablets.get(target).add(tabletId); - } - return be2Tablets; - } - - /** - * tablet count limit for one Doris RDD partition - * - * @param readOptions configuration of request - * @param logger {@link Logger} - * @return tablet count limit - */ - @VisibleForTesting - static int tabletCountLimitForOnePartition(DorisReadOptions readOptions, Logger logger) { - int tabletsSize = DORIS_TABLET_SIZE_DEFAULT; - if (readOptions.getRequestTabletSize() != null) { - tabletsSize = readOptions.getRequestTabletSize(); - } - if (tabletsSize < DORIS_TABLET_SIZE_MIN) { - logger.warn("{} is less than {}, set to default value {}.", - DORIS_TABLET_SIZE, DORIS_TABLET_SIZE_MIN, DORIS_TABLET_SIZE_MIN); - tabletsSize = DORIS_TABLET_SIZE_MIN; - } - logger.debug("Tablet size is set to {}.", tabletsSize); - return tabletsSize; - } - - /** - * translate BE tablets map to Doris RDD partition. - * - * @param options configuration of request - * @param be2Tablets BE to tablets {@link Map} - * @param opaquedQueryPlan Doris BE execute plan getting from Doris FE - * @param database database name of Doris table - * @param table table name of Doris table - * @param logger {@link Logger} - * @return Doris RDD partition {@link List} - * @throws IllegalArgumentException throw when translate failed - */ - @VisibleForTesting - static List tabletsMapToPartition(DorisOptions options, DorisReadOptions readOptions, Map> be2Tablets, - String opaquedQueryPlan, String database, String table, Logger logger) - throws IllegalArgumentException { - int tabletsSize = tabletCountLimitForOnePartition(readOptions, logger); - List partitions = new ArrayList<>(); - for (Map.Entry> beInfo : be2Tablets.entrySet()) { - logger.debug("Generate partition with beInfo: '{}'.", beInfo); - HashSet tabletSet = new HashSet<>(beInfo.getValue()); - beInfo.getValue().clear(); - beInfo.getValue().addAll(tabletSet); - int first = 0; - while (first < beInfo.getValue().size()) { - Set partitionTablets = new HashSet<>(beInfo.getValue().subList( - first, Math.min(beInfo.getValue().size(), first + tabletsSize))); - first = first + tabletsSize; - PartitionDefinition partitionDefinition = - new PartitionDefinition(database, table, options, - beInfo.getKey(), partitionTablets, opaquedQueryPlan); - logger.debug("Generate one PartitionDefinition '{}'.", partitionDefinition); - partitions.add(partitionDefinition); - } - } - return partitions; - } - - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/SchemaUtils.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/SchemaUtils.java deleted file mode 100644 index 5c6455628f..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/SchemaUtils.java +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.rest; - -import org.apache.doris.flink.rest.models.Field; -import org.apache.doris.flink.rest.models.Schema; -import org.apache.doris.thrift.TScanColumnDesc; - -import java.util.List; - - -public class SchemaUtils { - - /** - * convert Doris return schema to inner schema struct. - * - * @param tscanColumnDescs Doris BE return schema - * @return inner schema struct - */ - public static Schema convertToSchema(List tscanColumnDescs) { - Schema schema = new Schema(tscanColumnDescs.size()); - tscanColumnDescs.stream().forEach(desc -> schema.put(new Field(desc.getName(), desc.getType().name(), "", 0, 0, ""))); - return schema; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Backend.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Backend.java deleted file mode 100644 index d91614f442..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Backend.java +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.List; - -/** - * Be response model - **/ -@Deprecated -@JsonIgnoreProperties(ignoreUnknown = true) -public class Backend { - - @JsonProperty(value = "rows") - private List rows; - - public List getRows() { - return rows; - } - - public void setRows(List rows) { - this.rows = rows; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/BackendRow.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/BackendRow.java deleted file mode 100644 index 3dd04710ae..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/BackendRow.java +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -@Deprecated -@JsonIgnoreProperties(ignoreUnknown = true) -public class BackendRow { - - @JsonProperty(value = "HttpPort") - private String HttpPort; - - @JsonProperty(value = "IP") - private String IP; - - @JsonProperty(value = "Alive") - private Boolean Alive; - - public String getHttpPort() { - return HttpPort; - } - - public void setHttpPort(String httpPort) { - HttpPort = httpPort; - } - - public String getIP() { - return IP; - } - - public void setIP(String IP) { - this.IP = IP; - } - - public Boolean getAlive() { - return Alive; - } - - public void setAlive(Boolean alive) { - Alive = alive; - } - - @Override - public String toString() { - return "BackendRow{" + - "HttpPort='" + HttpPort + '\'' + - ", IP='" + IP + '\'' + - ", Alive=" + Alive + - '}'; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/BackendV2.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/BackendV2.java deleted file mode 100644 index 5efb85ec07..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/BackendV2.java +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.List; - -/** - * Be response model - **/ -@JsonIgnoreProperties(ignoreUnknown = true) -public class BackendV2 { - - @JsonProperty(value = "backends") - private List backends; - - public List getBackends() { - return backends; - } - - public void setBackends(List backends) { - this.backends = backends; - } - - public static class BackendRowV2 { - @JsonProperty("ip") - public String ip; - @JsonProperty("http_port") - public int httpPort; - @JsonProperty("is_alive") - public boolean isAlive; - - public String getIp() { - return ip; - } - - public void setIp(String ip) { - this.ip = ip; - } - - public int getHttpPort() { - return httpPort; - } - - public void setHttpPort(int httpPort) { - this.httpPort = httpPort; - } - - public boolean isAlive() { - return isAlive; - } - - public void setAlive(boolean alive) { - isAlive = alive; - } - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Field.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Field.java deleted file mode 100644 index 04341bf571..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Field.java +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import java.util.Objects; - -public class Field { - private String name; - private String type; - private String comment; - private int precision; - private int scale; - private String aggregation_type; - - public Field() { - } - - public Field(String name, String type, String comment, int precision, int scale, String aggregation_type) { - this.name = name; - this.type = type; - this.comment = comment; - this.precision = precision; - this.scale = scale; - this.aggregation_type = aggregation_type; - } - - public String getAggregation_type() { - return aggregation_type; - } - - public void setAggregation_type(String aggregation_type) { - this.aggregation_type = aggregation_type; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getComment() { - return comment; - } - - public void setComment(String comment) { - this.comment = comment; - } - - public int getPrecision() { - return precision; - } - - public void setPrecision(int precision) { - this.precision = precision; - } - - public int getScale() { - return scale; - } - - public void setScale(int scale) { - this.scale = scale; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Field field = (Field) o; - return precision == field.precision && - scale == field.scale && - Objects.equals(name, field.name) && - Objects.equals(type, field.type) && - Objects.equals(comment, field.comment); - } - - @Override - public int hashCode() { - return Objects.hash(name, type, comment, precision, scale); - } - - @Override - public String toString() { - return "Field{" + - "name='" + name + '\'' + - ", type='" + type + '\'' + - ", comment='" + comment + '\'' + - ", precision=" + precision + - ", scale=" + scale + - '}'; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/QueryPlan.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/QueryPlan.java deleted file mode 100644 index e65175ca4b..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/QueryPlan.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import java.util.Map; -import java.util.Objects; - -public class QueryPlan { - private int status; - private String opaqued_query_plan; - private Map partitions; - - public int getStatus() { - return status; - } - - public void setStatus(int status) { - this.status = status; - } - - public String getOpaqued_query_plan() { - return opaqued_query_plan; - } - - public void setOpaqued_query_plan(String opaqued_query_plan) { - this.opaqued_query_plan = opaqued_query_plan; - } - - public Map getPartitions() { - return partitions; - } - - public void setPartitions(Map partitions) { - this.partitions = partitions; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - QueryPlan queryPlan = (QueryPlan) o; - return status == queryPlan.status && - Objects.equals(opaqued_query_plan, queryPlan.opaqued_query_plan) && - Objects.equals(partitions, queryPlan.partitions); - } - - @Override - public int hashCode() { - return Objects.hash(status, opaqued_query_plan, partitions); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/RespContent.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/RespContent.java deleted file mode 100644 index 07a356cf29..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/RespContent.java +++ /dev/null @@ -1,100 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.rest.models; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -@JsonIgnoreProperties(ignoreUnknown = true) -public class RespContent { - - @JsonProperty(value = "TxnId") - private int TxnId; - - @JsonProperty(value = "Label") - private String Label; - - @JsonProperty(value = "Status") - private String Status; - - @JsonProperty(value = "ExistingJobStatus") - private String ExistingJobStatus; - - @JsonProperty(value = "Message") - private String Message; - - @JsonProperty(value = "NumberTotalRows") - private long NumberTotalRows; - - @JsonProperty(value = "NumberLoadedRows") - private long NumberLoadedRows; - - @JsonProperty(value = "NumberFilteredRows") - private int NumberFilteredRows; - - @JsonProperty(value = "NumberUnselectedRows") - private int NumberUnselectedRows; - - @JsonProperty(value = "LoadBytes") - private long LoadBytes; - - @JsonProperty(value = "LoadTimeMs") - private int LoadTimeMs; - - @JsonProperty(value = "BeginTxnTimeMs") - private int BeginTxnTimeMs; - - @JsonProperty(value = "StreamLoadPutTimeMs") - private int StreamLoadPutTimeMs; - - @JsonProperty(value = "ReadDataTimeMs") - private int ReadDataTimeMs; - - @JsonProperty(value = "WriteDataTimeMs") - private int WriteDataTimeMs; - - @JsonProperty(value = "CommitAndPublishTimeMs") - private int CommitAndPublishTimeMs; - - @JsonProperty(value = "ErrorURL") - private String ErrorURL; - - public String getStatus() { - return Status; - } - - public String getMessage() { - return Message; - } - - @Override - public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (JsonProcessingException e) { - return ""; - } - - } - - public String getErrorURL() { - return ErrorURL; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Schema.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Schema.java deleted file mode 100644 index 264e73683f..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Schema.java +++ /dev/null @@ -1,105 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; - -public class Schema { - private int status = 0; - private String keysType; - private List properties; - - public Schema() { - properties = new ArrayList<>(); - } - - public Schema(int fieldCount) { - properties = new ArrayList<>(fieldCount); - } - - public int getStatus() { - return status; - } - - public void setStatus(int status) { - this.status = status; - } - - public String getKeysType() { - return keysType; - } - - public void setKeysType(String keysType) { - this.keysType = keysType; - } - - public List getProperties() { - return properties; - } - - public void setProperties(List properties) { - this.properties = properties; - } - - public void put(String name, String type, String comment, int scale, int precision, String aggregation_type) { - properties.add(new Field(name, type, comment, scale, precision, aggregation_type)); - } - - public void put(Field f) { - properties.add(f); - } - - public Field get(int index) { - if (index >= properties.size()) { - throw new IndexOutOfBoundsException("Index: " + index + ", Fields size:" + properties.size()); - } - return properties.get(index); - } - - public int size() { - return properties.size(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Schema schema = (Schema) o; - return status == schema.status && - Objects.equals(properties, schema.properties); - } - - @Override - public int hashCode() { - return Objects.hash(status, properties); - } - - @Override - public String toString() { - return "Schema{" + - "status=" + status + - ", properties=" + properties + - '}'; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Tablet.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Tablet.java deleted file mode 100644 index 70b0f13959..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/rest/models/Tablet.java +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.rest.models; - -import java.util.List; -import java.util.Objects; - -public class Tablet { - private List routings; - private int version; - private long versionHash; - private long schemaHash; - - public List getRoutings() { - return routings; - } - - public void setRoutings(List routings) { - this.routings = routings; - } - - public int getVersion() { - return version; - } - - public void setVersion(int version) { - this.version = version; - } - - public long getVersionHash() { - return versionHash; - } - - public void setVersionHash(long versionHash) { - this.versionHash = versionHash; - } - - public long getSchemaHash() { - return schemaHash; - } - - public void setSchemaHash(long schemaHash) { - this.schemaHash = schemaHash; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Tablet tablet = (Tablet) o; - return version == tablet.version && - versionHash == tablet.versionHash && - schemaHash == tablet.schemaHash && - Objects.equals(routings, tablet.routings); - } - - @Override - public int hashCode() { - return Objects.hash(routings, version, versionHash, schemaHash); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/serialization/Routing.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/serialization/Routing.java deleted file mode 100644 index 25fdbe50e7..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/serialization/Routing.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.serialization; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.doris.flink.exception.IllegalArgumentException; - -import static org.apache.doris.flink.util.ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE; - -/** - * present an Doris BE address. - */ -public class Routing { - private static Logger logger = LoggerFactory.getLogger(Routing.class); - - private String host; - private int port; - - public Routing(String routing) throws IllegalArgumentException { - parseRouting(routing); - } - - private void parseRouting(String routing) throws IllegalArgumentException { - logger.debug("Parse Doris BE address: '{}'.", routing); - String[] hostPort = routing.split(":"); - if (hostPort.length != 2) { - logger.error("Format of Doris BE address '{}' is illegal.", routing); - throw new IllegalArgumentException("Doris BE", routing); - } - this.host = hostPort[0]; - try { - this.port = Integer.parseInt(hostPort[1]); - } catch (NumberFormatException e) { - logger.error(PARSE_NUMBER_FAILED_MESSAGE, "Doris BE's port", hostPort[1]); - throw new IllegalArgumentException("Doris BE", routing); - } - } - - public String getHost() { - return host; - } - - public int getPort() { - return port; - } - - @Override - public String toString() { - return "Doris BE{" + - "host='" + host + '\'' + - ", port=" + port + - '}'; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/serialization/RowBatch.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/serialization/RowBatch.java deleted file mode 100644 index 3337637ff3..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/serialization/RowBatch.java +++ /dev/null @@ -1,309 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.serialization; - -import org.apache.arrow.memory.RootAllocator; - -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.types.Types; -import org.apache.doris.flink.exception.DorisException; -import org.apache.doris.flink.rest.models.Schema; -import org.apache.doris.thrift.TScanBatchResult; - -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.List; -import java.util.NoSuchElementException; - -/** - * row batch data container. - */ -public class RowBatch { - private static Logger logger = LoggerFactory.getLogger(RowBatch.class); - - public static class Row { - private List cols; - - Row(int colCount) { - this.cols = new ArrayList<>(colCount); - } - - public List getCols() { - return cols; - } - - public void put(Object o) { - cols.add(o); - } - } - - // offset for iterate the rowBatch - private int offsetInRowBatch = 0; - private int rowCountInOneBatch = 0; - private int readRowCount = 0; - private List rowBatch = new ArrayList<>(); - private final ArrowStreamReader arrowStreamReader; - private VectorSchemaRoot root; - private List fieldVectors; - private RootAllocator rootAllocator; - private final Schema schema; - - public List getRowBatch() { - return rowBatch; - } - - public RowBatch(TScanBatchResult nextResult, Schema schema) { - this.schema = schema; - this.rootAllocator = new RootAllocator(Integer.MAX_VALUE); - this.arrowStreamReader = new ArrowStreamReader( - new ByteArrayInputStream(nextResult.getRows()), - rootAllocator - ); - this.offsetInRowBatch = 0; - } - - public RowBatch readArrow() throws DorisException { - try { - this.root = arrowStreamReader.getVectorSchemaRoot(); - while (arrowStreamReader.loadNextBatch()) { - fieldVectors = root.getFieldVectors(); - if (fieldVectors.size() != schema.size()) { - logger.error("Schema size '{}' is not equal to arrow field size '{}'.", - fieldVectors.size(), schema.size()); - throw new DorisException("Load Doris data failed, schema size of fetch data is wrong."); - } - if (fieldVectors.size() == 0 || root.getRowCount() == 0) { - logger.debug("One batch in arrow has no data."); - continue; - } - rowCountInOneBatch = root.getRowCount(); - // init the rowBatch - for (int i = 0; i < rowCountInOneBatch; ++i) { - rowBatch.add(new Row(fieldVectors.size())); - } - convertArrowToRowBatch(); - readRowCount += root.getRowCount(); - } - return this; - } catch (Exception e) { - logger.error("Read Doris Data failed because: ", e); - throw new DorisException(e.getMessage()); - } finally { - close(); - } - } - - public boolean hasNext() { - if (offsetInRowBatch < readRowCount) { - return true; - } - return false; - } - - private void addValueToRow(int rowIndex, Object obj) { - if (rowIndex > rowCountInOneBatch) { - String errMsg = "Get row offset: " + rowIndex + " larger than row size: " + - rowCountInOneBatch; - logger.error(errMsg); - throw new NoSuchElementException(errMsg); - } - rowBatch.get(readRowCount + rowIndex).put(obj); - } - - public void convertArrowToRowBatch() throws DorisException { - try { - for (int col = 0; col < fieldVectors.size(); col++) { - FieldVector curFieldVector = fieldVectors.get(col); - Types.MinorType mt = curFieldVector.getMinorType(); - - final String currentType = schema.get(col).getType(); - switch (currentType) { - case "NULL_TYPE": - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - addValueToRow(rowIndex, null); - } - break; - case "BOOLEAN": - Preconditions.checkArgument(mt.equals(Types.MinorType.BIT), - typeMismatchMessage(currentType, mt)); - BitVector bitVector = (BitVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = bitVector.isNull(rowIndex) ? null : bitVector.get(rowIndex) != 0; - addValueToRow(rowIndex, fieldValue); - } - break; - case "TINYINT": - Preconditions.checkArgument(mt.equals(Types.MinorType.TINYINT), - typeMismatchMessage(currentType, mt)); - TinyIntVector tinyIntVector = (TinyIntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = tinyIntVector.isNull(rowIndex) ? null : tinyIntVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "SMALLINT": - Preconditions.checkArgument(mt.equals(Types.MinorType.SMALLINT), - typeMismatchMessage(currentType, mt)); - SmallIntVector smallIntVector = (SmallIntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = smallIntVector.isNull(rowIndex) ? null : smallIntVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "INT": - Preconditions.checkArgument(mt.equals(Types.MinorType.INT), - typeMismatchMessage(currentType, mt)); - IntVector intVector = (IntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = intVector.isNull(rowIndex) ? null : intVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "BIGINT": - - Preconditions.checkArgument(mt.equals(Types.MinorType.BIGINT), - typeMismatchMessage(currentType, mt)); - BigIntVector bigIntVector = (BigIntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = bigIntVector.isNull(rowIndex) ? null : bigIntVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "FLOAT": - Preconditions.checkArgument(mt.equals(Types.MinorType.FLOAT4), - typeMismatchMessage(currentType, mt)); - Float4Vector float4Vector = (Float4Vector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = float4Vector.isNull(rowIndex) ? null : float4Vector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "TIME": - case "DOUBLE": - Preconditions.checkArgument(mt.equals(Types.MinorType.FLOAT8), - typeMismatchMessage(currentType, mt)); - Float8Vector float8Vector = (Float8Vector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = float8Vector.isNull(rowIndex) ? null : float8Vector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "BINARY": - Preconditions.checkArgument(mt.equals(Types.MinorType.VARBINARY), - typeMismatchMessage(currentType, mt)); - VarBinaryVector varBinaryVector = (VarBinaryVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = varBinaryVector.isNull(rowIndex) ? null : varBinaryVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "DECIMAL": - case "DECIMALV2": - Preconditions.checkArgument(mt.equals(Types.MinorType.DECIMAL), - typeMismatchMessage(currentType, mt)); - DecimalVector decimalVector = (DecimalVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - if (decimalVector.isNull(rowIndex)) { - addValueToRow(rowIndex, null); - continue; - } - BigDecimal value = decimalVector.getObject(rowIndex).stripTrailingZeros(); - addValueToRow(rowIndex, DecimalData.fromBigDecimal(value, value.precision(), value.scale())); - } - break; - case "DATE": - case "LARGEINT": - case "DATETIME": - case "CHAR": - case "VARCHAR": - case "STRING": - Preconditions.checkArgument(mt.equals(Types.MinorType.VARCHAR), - typeMismatchMessage(currentType, mt)); - VarCharVector varCharVector = (VarCharVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - if (varCharVector.isNull(rowIndex)) { - addValueToRow(rowIndex, null); - continue; - } - String value = new String(varCharVector.get(rowIndex)); - addValueToRow(rowIndex, StringData.fromString(value)); - } - break; - default: - String errMsg = "Unsupported type " + schema.get(col).getType(); - logger.error(errMsg); - throw new DorisException(errMsg); - } - } - } catch (Exception e) { - close(); - throw e; - } - } - - public List next() throws DorisException { - if (!hasNext()) { - String errMsg = "Get row offset:" + offsetInRowBatch + " larger than row size: " + readRowCount; - logger.error(errMsg); - throw new NoSuchElementException(errMsg); - } - return rowBatch.get(offsetInRowBatch++).getCols(); - } - - private String typeMismatchMessage(final String sparkType, final Types.MinorType arrowType) { - final String messageTemplate = "FLINK type is %1$s, but arrow type is %2$s."; - return String.format(messageTemplate, sparkType, arrowType.name()); - } - - public int getReadRowCount() { - return readRowCount; - } - - public void close() { - try { - if (arrowStreamReader != null) { - arrowStreamReader.close(); - } - if (rootAllocator != null) { - rootAllocator.close(); - } - } catch (IOException ioe) { - // do nothing - } - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicOutputFormat.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicOutputFormat.java deleted file mode 100644 index 44e0a6a4d3..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicOutputFormat.java +++ /dev/null @@ -1,405 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.table; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.doris.flink.cfg.DorisExecutionOptions; -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.exception.DorisException; -import org.apache.doris.flink.exception.StreamLoadException; -import org.apache.doris.flink.rest.RestService; -import org.apache.doris.flink.rest.models.Schema; -import org.apache.flink.api.common.io.RichOutputFormat; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.util.ExecutorThreadFactory; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.types.DataType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.types.RowKind; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.StringJoiner; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledFuture; -import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -import static org.apache.flink.table.data.RowData.createFieldGetter; - - -/** - * DorisDynamicOutputFormat - **/ -public class DorisDynamicOutputFormat extends RichOutputFormat { - - private static final Logger LOG = LoggerFactory.getLogger(DorisDynamicOutputFormat.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private static final String COLUMNS_KEY = "columns"; - private static final String FIELD_DELIMITER_KEY = "column_separator"; - private static final String FIELD_DELIMITER_DEFAULT = "\t"; - private static final String LINE_DELIMITER_KEY = "line_delimiter"; - private static final String LINE_DELIMITER_DEFAULT = "\n"; - private static final String FORMAT_KEY = "format"; - private static final String FORMAT_JSON_VALUE = "json"; - private static final String NULL_VALUE = "\\N"; - private static final String ESCAPE_DELIMITERS_KEY = "escape_delimiters"; - private static final String ESCAPE_DELIMITERS_DEFAULT = "false"; - private static final String DORIS_DELETE_SIGN = "__DORIS_DELETE_SIGN__"; - private static final String UNIQUE_KEYS_TYPE = "UNIQUE_KEYS"; - private final String[] fieldNames; - private final boolean jsonFormat; - private final RowData.FieldGetter[] fieldGetters; - private final List batch = new ArrayList<>(); - private String fieldDelimiter; - private String lineDelimiter; - private DorisOptions options; - private DorisReadOptions readOptions; - private DorisExecutionOptions executionOptions; - private DorisStreamLoad dorisStreamLoad; - private String keysType; - - private transient volatile boolean closed = false; - private transient ScheduledExecutorService scheduler; - private transient ScheduledFuture scheduledFuture; - private transient volatile Exception flushException; - - public DorisDynamicOutputFormat(DorisOptions option, - DorisReadOptions readOptions, - DorisExecutionOptions executionOptions, - LogicalType[] logicalTypes, - String[] fieldNames) { - this.options = option; - this.readOptions = readOptions; - this.executionOptions = executionOptions; - this.fieldNames = fieldNames; - this.jsonFormat = FORMAT_JSON_VALUE.equals(executionOptions.getStreamLoadProp().getProperty(FORMAT_KEY)); - this.keysType = parseKeysType(); - - handleStreamloadProp(); - this.fieldGetters = new RowData.FieldGetter[logicalTypes.length]; - for (int i = 0; i < logicalTypes.length; i++) { - fieldGetters[i] = createFieldGetter(logicalTypes[i], i); - } - } - - /** - * parse table keysType - * - * @return keysType - */ - private String parseKeysType() { - try { - Schema schema = RestService.getSchema(options, readOptions, LOG); - return schema.getKeysType(); - } catch (DorisException e) { - throw new RuntimeException("Failed fetch doris table schema: " + options.getTableIdentifier()); - } - } - - /** - * A builder used to set parameters to the output format's configuration in a fluent way. - * - * @return builder - */ - public static Builder builder() { - return new Builder(); - } - - private void handleStreamloadProp() { - Properties streamLoadProp = executionOptions.getStreamLoadProp(); - boolean ifEscape = Boolean.parseBoolean(streamLoadProp.getProperty(ESCAPE_DELIMITERS_KEY, ESCAPE_DELIMITERS_DEFAULT)); - if (ifEscape) { - this.fieldDelimiter = escapeString(streamLoadProp.getProperty(FIELD_DELIMITER_KEY, - FIELD_DELIMITER_DEFAULT)); - this.lineDelimiter = escapeString(streamLoadProp.getProperty(LINE_DELIMITER_KEY, - LINE_DELIMITER_DEFAULT)); - - if (streamLoadProp.contains(ESCAPE_DELIMITERS_KEY)) { - streamLoadProp.remove(ESCAPE_DELIMITERS_KEY); - } - } else { - this.fieldDelimiter = streamLoadProp.getProperty(FIELD_DELIMITER_KEY, - FIELD_DELIMITER_DEFAULT); - this.lineDelimiter = streamLoadProp.getProperty(LINE_DELIMITER_KEY, - LINE_DELIMITER_DEFAULT); - } - - //add column key when fieldNames is not empty - if (!streamLoadProp.containsKey(COLUMNS_KEY) && fieldNames != null && fieldNames.length > 0) { - String columns = String.join(",", Arrays.stream(fieldNames).map(item -> String.format("`%s`", item.trim().replace("`", ""))).collect(Collectors.toList())); - if (enableBatchDelete()) { - columns = String.format("%s,%s", columns, DORIS_DELETE_SIGN); - } - streamLoadProp.put(COLUMNS_KEY, columns); - } - } - - private String escapeString(String s) { - Pattern p = Pattern.compile("\\\\x(\\d{2})"); - Matcher m = p.matcher(s); - - StringBuffer buf = new StringBuffer(); - while (m.find()) { - m.appendReplacement(buf, String.format("%s", (char) Integer.parseInt(m.group(1)))); - } - m.appendTail(buf); - return buf.toString(); - } - - private boolean enableBatchDelete() { - return executionOptions.getEnableDelete() || UNIQUE_KEYS_TYPE.equals(keysType); - } - - @Override - public void configure(Configuration configuration) { - } - - @Override - public void open(int taskNumber, int numTasks) throws IOException { - dorisStreamLoad = new DorisStreamLoad( - getBackend(), - options.getTableIdentifier().split("\\.")[0], - options.getTableIdentifier().split("\\.")[1], - options.getUsername(), - options.getPassword(), - executionOptions.getStreamLoadProp()); - LOG.info("Streamload BE:{}", dorisStreamLoad.getLoadUrlStr()); - - if (executionOptions.getBatchIntervalMs() != 0 && executionOptions.getBatchSize() != 1) { - this.scheduler = Executors.newScheduledThreadPool(1, new ExecutorThreadFactory("doris-streamload-output" + - "-format")); - this.scheduledFuture = this.scheduler.scheduleWithFixedDelay(() -> { - synchronized (DorisDynamicOutputFormat.this) { - if (!closed) { - try { - flush(); - } catch (Exception e) { - flushException = e; - } - } - } - }, executionOptions.getBatchIntervalMs(), executionOptions.getBatchIntervalMs(), TimeUnit.MILLISECONDS); - } - } - - private void checkFlushException() { - if (flushException != null) { - throw new RuntimeException("Writing records to streamload failed.", flushException); - } - } - - @Override - public synchronized void writeRecord(T row) throws IOException { - checkFlushException(); - addBatch(row); - if (executionOptions.getBatchSize() > 0 && batch.size() >= executionOptions.getBatchSize()) { - flush(); - } - } - - private void addBatch(T row) { - if (row instanceof RowData) { - RowData rowData = (RowData) row; - Map valueMap = new HashMap<>(); - StringJoiner value = new StringJoiner(this.fieldDelimiter); - for (int i = 0; i < rowData.getArity() && i < fieldGetters.length; ++i) { - Object field = fieldGetters[i].getFieldOrNull(rowData); - if (jsonFormat) { - String data = field != null ? field.toString() : null; - valueMap.put(this.fieldNames[i], data); - } else { - String data = field != null ? field.toString() : NULL_VALUE; - value.add(data); - } - } - // add doris delete sign - if (enableBatchDelete()) { - if (jsonFormat) { - valueMap.put(DORIS_DELETE_SIGN, parseDeleteSign(rowData.getRowKind())); - } else { - value.add(parseDeleteSign(rowData.getRowKind())); - } - } - Object data = jsonFormat ? valueMap : value.toString(); - batch.add(data); - } else if (row instanceof String) { - batch.add(row); - } else { - throw new RuntimeException("The type of element should be 'RowData' or 'String' only."); - } - } - - private String parseDeleteSign(RowKind rowKind) { - if (RowKind.INSERT.equals(rowKind) || RowKind.UPDATE_AFTER.equals(rowKind)) { - return "0"; - } else if (RowKind.DELETE.equals(rowKind) || RowKind.UPDATE_BEFORE.equals(rowKind)) { - return "1"; - } else { - throw new RuntimeException("Unrecognized row kind:" + rowKind.toString()); - } - } - - - @Override - public synchronized void close() throws IOException { - if (!closed) { - closed = true; - - if (this.scheduledFuture != null) { - scheduledFuture.cancel(false); - this.scheduler.shutdown(); - } - - try { - flush(); - } catch (Exception e) { - LOG.warn("Writing records to doris failed.", e); - throw new RuntimeException("Writing records to doris failed.", e); - } finally { - this.dorisStreamLoad.close(); - } - } - checkFlushException(); - } - - public synchronized void flush() throws IOException { - checkFlushException(); - if (batch.isEmpty()) { - return; - } - String result; - if (jsonFormat) { - if (batch.get(0) instanceof String) { - result = batch.toString(); - } else { - result = OBJECT_MAPPER.writeValueAsString(batch); - } - } else { - result = String.join(this.lineDelimiter, batch); - } - for (int i = 0; i <= executionOptions.getMaxRetries(); i++) { - try { - dorisStreamLoad.load(result); - batch.clear(); - break; - } catch (StreamLoadException e) { - LOG.error("doris sink error, retry times = {}", i, e); - if (i >= executionOptions.getMaxRetries()) { - throw new IOException(e); - } - try { - dorisStreamLoad.setHostPort(getBackend()); - LOG.warn("streamload error,switch be: {}", dorisStreamLoad.getLoadUrlStr(), e); - Thread.sleep(1000 * i); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - throw new IOException("unable to flush; interrupted while doing another attempt", e); - } - } - } - } - - private String getBackend() throws IOException { - try { - //get be url from fe - return RestService.randomBackend(options, readOptions, LOG); - } catch (IOException | DorisException e) { - LOG.error("get backends info fail"); - throw new IOException(e); - } - } - - /** - * Builder for {@link DorisDynamicOutputFormat}. - */ - public static class Builder { - private DorisOptions.Builder optionsBuilder; - private DorisReadOptions readOptions; - private DorisExecutionOptions executionOptions; - private DataType[] fieldDataTypes; - private String[] fieldNames; - - public Builder() { - this.optionsBuilder = DorisOptions.builder(); - } - - public Builder setFenodes(String fenodes) { - this.optionsBuilder.setFenodes(fenodes); - return this; - } - - public Builder setUsername(String username) { - this.optionsBuilder.setUsername(username); - return this; - } - - public Builder setPassword(String password) { - this.optionsBuilder.setPassword(password); - return this; - } - - public Builder setTableIdentifier(String tableIdentifier) { - this.optionsBuilder.setTableIdentifier(tableIdentifier); - return this; - } - - public Builder setReadOptions(DorisReadOptions readOptions) { - this.readOptions = readOptions; - return this; - } - - public Builder setExecutionOptions(DorisExecutionOptions executionOptions) { - this.executionOptions = executionOptions; - return this; - } - - public Builder setFieldNames(String[] fieldNames) { - this.fieldNames = fieldNames; - return this; - } - - public Builder setFieldDataTypes(DataType[] fieldDataTypes) { - this.fieldDataTypes = fieldDataTypes; - return this; - } - - public DorisDynamicOutputFormat build() { - final LogicalType[] logicalTypes = - Arrays.stream(fieldDataTypes) - .map(DataType::getLogicalType) - .toArray(LogicalType[]::new); - return new DorisDynamicOutputFormat( - optionsBuilder.build(), readOptions, executionOptions, logicalTypes, fieldNames - ); - } - - - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableFactory.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableFactory.java deleted file mode 100644 index dbba859613..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableFactory.java +++ /dev/null @@ -1,270 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.table; - -import org.apache.doris.flink.cfg.DorisExecutionOptions; -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.flink.configuration.ConfigOption; -import org.apache.flink.configuration.ConfigOptions; -import org.apache.flink.configuration.ReadableConfig; -import org.apache.flink.table.api.TableSchema; -import org.apache.flink.table.connector.sink.DynamicTableSink; -import org.apache.flink.table.connector.source.DynamicTableSource; -import org.apache.flink.table.factories.DynamicTableSinkFactory; -import org.apache.flink.table.factories.DynamicTableSourceFactory; -import org.apache.flink.table.factories.FactoryUtil; -import org.apache.flink.table.types.DataType; -import org.apache.flink.table.utils.TableSchemaUtils; - -import java.time.Duration; -import java.util.HashSet; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_BATCH_SIZE_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_EXEC_MEM_LIMIT_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_REQUEST_RETRIES_DEFAULT; -import static org.apache.doris.flink.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_DEFAULT; - -/** - * The {@link DorisDynamicTableFactory} translates the catalog table to a table source. - * - *

Because the table source requires a decoding format, we are discovering the format using the - * provided {@link FactoryUtil} for convenience. - */ -public final class DorisDynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { - - public static final ConfigOption FENODES = ConfigOptions.key("fenodes").stringType().noDefaultValue().withDescription("doris fe http address."); - public static final ConfigOption TABLE_IDENTIFIER = ConfigOptions.key("table.identifier").stringType().noDefaultValue().withDescription("the jdbc table name."); - public static final ConfigOption USERNAME = ConfigOptions.key("username").stringType().noDefaultValue().withDescription("the jdbc user name."); - public static final ConfigOption PASSWORD = ConfigOptions.key("password").stringType().noDefaultValue().withDescription("the jdbc password."); - // Prefix for Doris StreamLoad specific properties. - public static final String STREAM_LOAD_PROP_PREFIX = "sink.properties."; - // doris options - private static final ConfigOption DORIS_READ_FIELD = ConfigOptions - .key("doris.read.field") - .stringType() - .noDefaultValue() - .withDescription("List of column names in the Doris table, separated by commas"); - private static final ConfigOption DORIS_FILTER_QUERY = ConfigOptions - .key("doris.filter.query") - .stringType() - .noDefaultValue() - .withDescription("Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering"); - private static final ConfigOption DORIS_TABLET_SIZE = ConfigOptions - .key("doris.request.tablet.size") - .intType() - .defaultValue(DORIS_TABLET_SIZE_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_REQUEST_CONNECT_TIMEOUT_MS = ConfigOptions - .key("doris.request.connect.timeout.ms") - .intType() - .defaultValue(DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_REQUEST_READ_TIMEOUT_MS = ConfigOptions - .key("doris.request.read.timeout.ms") - .intType() - .defaultValue(DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_REQUEST_QUERY_TIMEOUT_S = ConfigOptions - .key("doris.request.query.timeout.s") - .intType() - .defaultValue(DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_REQUEST_RETRIES = ConfigOptions - .key("doris.request.retries") - .intType() - .defaultValue(DORIS_REQUEST_RETRIES_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_DESERIALIZE_ARROW_ASYNC = ConfigOptions - .key("doris.deserialize.arrow.async") - .booleanType() - .defaultValue(DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_DESERIALIZE_QUEUE_SIZE = ConfigOptions - .key("doris.request.retriesdoris.deserialize.queue.size") - .intType() - .defaultValue(DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_BATCH_SIZE = ConfigOptions - .key("doris.batch.size") - .intType() - .defaultValue(DORIS_BATCH_SIZE_DEFAULT) - .withDescription(""); - private static final ConfigOption DORIS_EXEC_MEM_LIMIT = ConfigOptions - .key("doris.exec.mem.limit") - .longType() - .defaultValue(DORIS_EXEC_MEM_LIMIT_DEFAULT) - .withDescription(""); - // flink write config options - private static final ConfigOption SINK_BUFFER_FLUSH_MAX_ROWS = ConfigOptions - .key("sink.batch.size") - .intType() - .defaultValue(100) - .withDescription("the flush max size (includes all append, upsert and delete records), over this number" + - " of records, will flush data. The default value is 100."); - private static final ConfigOption SINK_MAX_RETRIES = ConfigOptions - .key("sink.max-retries") - .intType() - .defaultValue(3) - .withDescription("the max retry times if writing records to database failed."); - private static final ConfigOption SINK_BUFFER_FLUSH_INTERVAL = ConfigOptions - .key("sink.batch.interval") - .durationType() - .defaultValue(Duration.ofSeconds(1)) - .withDescription("the flush interval mills, over this time, asynchronous threads will flush data. The " + - "default value is 1s."); - private static final ConfigOption SINK_ENABLE_DELETE = ConfigOptions - .key("sink.enable-delete") - .booleanType() - .defaultValue(true) - .withDescription("whether to enable the delete function"); - - @Override - public String factoryIdentifier() { - return "doris"; // used for matching to `connector = '...'` - } - - @Override - public Set> requiredOptions() { - final Set> options = new HashSet<>(); - options.add(FENODES); - options.add(TABLE_IDENTIFIER); - return options; - } - - @Override - public Set> optionalOptions() { - final Set> options = new HashSet<>(); - options.add(FENODES); - options.add(TABLE_IDENTIFIER); - options.add(USERNAME); - options.add(PASSWORD); - - options.add(DORIS_READ_FIELD); - options.add(DORIS_FILTER_QUERY); - options.add(DORIS_TABLET_SIZE); - options.add(DORIS_REQUEST_CONNECT_TIMEOUT_MS); - options.add(DORIS_REQUEST_READ_TIMEOUT_MS); - options.add(DORIS_REQUEST_QUERY_TIMEOUT_S); - options.add(DORIS_REQUEST_RETRIES); - options.add(DORIS_DESERIALIZE_ARROW_ASYNC); - options.add(DORIS_DESERIALIZE_QUEUE_SIZE); - options.add(DORIS_BATCH_SIZE); - options.add(DORIS_EXEC_MEM_LIMIT); - - options.add(SINK_BUFFER_FLUSH_MAX_ROWS); - options.add(SINK_MAX_RETRIES); - options.add(SINK_BUFFER_FLUSH_INTERVAL); - options.add(SINK_ENABLE_DELETE); - return options; - } - - @Override - public DynamicTableSource createDynamicTableSource(Context context) { - // either implement your custom validation logic here ... - // or use the provided helper utility - final FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); - // validate all options - helper.validateExcept(STREAM_LOAD_PROP_PREFIX); - // get the validated options - final ReadableConfig options = helper.getOptions(); - // derive the produced data type (excluding computed columns) from the catalog table - final DataType producedDataType = context.getCatalogTable().getSchema().toPhysicalRowDataType(); - TableSchema physicalSchema = TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema()); - // create and return dynamic table source - return new DorisDynamicTableSource( - getDorisOptions(helper.getOptions()), - getDorisReadOptions(helper.getOptions()), - physicalSchema); - } - - private DorisOptions getDorisOptions(ReadableConfig readableConfig) { - final String fenodes = readableConfig.get(FENODES); - final DorisOptions.Builder builder = DorisOptions.builder() - .setFenodes(fenodes) - .setTableIdentifier(readableConfig.get(TABLE_IDENTIFIER)); - - readableConfig.getOptional(USERNAME).ifPresent(builder::setUsername); - readableConfig.getOptional(PASSWORD).ifPresent(builder::setPassword); - return builder.build(); - } - - private DorisReadOptions getDorisReadOptions(ReadableConfig readableConfig) { - final DorisReadOptions.Builder builder = DorisReadOptions.builder(); - builder.setDeserializeArrowAsync(readableConfig.get(DORIS_DESERIALIZE_ARROW_ASYNC)) - .setDeserializeQueueSize(readableConfig.get(DORIS_DESERIALIZE_QUEUE_SIZE)) - .setExecMemLimit(readableConfig.get(DORIS_EXEC_MEM_LIMIT)) - .setFilterQuery(readableConfig.get(DORIS_FILTER_QUERY)) - .setReadFields(readableConfig.get(DORIS_READ_FIELD)) - .setRequestQueryTimeoutS(readableConfig.get(DORIS_REQUEST_QUERY_TIMEOUT_S)) - .setRequestBatchSize(readableConfig.get(DORIS_BATCH_SIZE)) - .setRequestConnectTimeoutMs(readableConfig.get(DORIS_REQUEST_CONNECT_TIMEOUT_MS)) - .setRequestReadTimeoutMs(readableConfig.get(DORIS_REQUEST_READ_TIMEOUT_MS)) - .setRequestRetries(readableConfig.get(DORIS_REQUEST_RETRIES)) - .setRequestTabletSize(readableConfig.get(DORIS_TABLET_SIZE)); - return builder.build(); - } - - private DorisExecutionOptions getDorisExecutionOptions(ReadableConfig readableConfig, Properties streamLoadProp) { - final DorisExecutionOptions.Builder builder = DorisExecutionOptions.builder(); - builder.setBatchSize(readableConfig.get(SINK_BUFFER_FLUSH_MAX_ROWS)); - builder.setMaxRetries(readableConfig.get(SINK_MAX_RETRIES)); - builder.setBatchIntervalMs(readableConfig.get(SINK_BUFFER_FLUSH_INTERVAL).toMillis()); - builder.setStreamLoadProp(streamLoadProp); - builder.setEnableDelete(readableConfig.get(SINK_ENABLE_DELETE)); - return builder.build(); - } - - private Properties getStreamLoadProp(Map tableOptions) { - final Properties streamLoadProp = new Properties(); - - for (Map.Entry entry : tableOptions.entrySet()) { - if (entry.getKey().startsWith(STREAM_LOAD_PROP_PREFIX)) { - String subKey = entry.getKey().substring(STREAM_LOAD_PROP_PREFIX.length()); - streamLoadProp.put(subKey, entry.getValue()); - } - } - return streamLoadProp; - } - - @Override - public DynamicTableSink createDynamicTableSink(Context context) { - final FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context); - // validate all options - helper.validateExcept(STREAM_LOAD_PROP_PREFIX); - - Properties streamLoadProp = getStreamLoadProp(context.getCatalogTable().getOptions()); - TableSchema physicalSchema = - TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema()); - // create and return dynamic table source - return new DorisDynamicTableSink( - getDorisOptions(helper.getOptions()), - getDorisReadOptions(helper.getOptions()), - getDorisExecutionOptions(helper.getOptions(), streamLoadProp), - physicalSchema - ); - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableSink.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableSink.java deleted file mode 100644 index cccdb45971..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableSink.java +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.table; - -import org.apache.doris.flink.cfg.DorisExecutionOptions; -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.flink.table.api.TableSchema; -import org.apache.flink.table.connector.ChangelogMode; -import org.apache.flink.table.connector.sink.DynamicTableSink; -import org.apache.flink.table.connector.sink.OutputFormatProvider; -import org.apache.flink.types.RowKind; - -/** - * DorisDynamicTableSink - **/ -public class DorisDynamicTableSink implements DynamicTableSink { - - private final DorisOptions options; - private final DorisReadOptions readOptions; - private final DorisExecutionOptions executionOptions; - private final TableSchema tableSchema; - - public DorisDynamicTableSink(DorisOptions options, - DorisReadOptions readOptions, - DorisExecutionOptions executionOptions, - TableSchema tableSchema) { - this.options = options; - this.readOptions = readOptions; - this.executionOptions = executionOptions; - this.tableSchema = tableSchema; - } - - @Override - public ChangelogMode getChangelogMode(ChangelogMode changelogMode) { - return ChangelogMode.newBuilder() - .addContainedKind(RowKind.INSERT) - .addContainedKind(RowKind.DELETE) - .addContainedKind(RowKind.UPDATE_AFTER) - .build(); - } - - @Override - public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { - DorisDynamicOutputFormat.Builder builder = DorisDynamicOutputFormat.builder() - .setFenodes(options.getFenodes()) - .setUsername(options.getUsername()) - .setPassword(options.getPassword()) - .setTableIdentifier(options.getTableIdentifier()) - .setReadOptions(readOptions) - .setExecutionOptions(executionOptions) - .setFieldDataTypes(tableSchema.getFieldDataTypes()) - .setFieldNames(tableSchema.getFieldNames()); - return OutputFormatProvider.of(builder.build()); - } - - @Override - public DynamicTableSink copy() { - return new DorisDynamicTableSink(options, readOptions, executionOptions, tableSchema); - } - - @Override - public String asSummaryString() { - return "Doris Table Sink"; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableSource.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableSource.java deleted file mode 100644 index 0262677e7a..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisDynamicTableSource.java +++ /dev/null @@ -1,101 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.table; - -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.exception.DorisException; -import org.apache.doris.flink.rest.PartitionDefinition; -import org.apache.doris.flink.rest.RestService; -import org.apache.flink.api.common.serialization.DeserializationSchema; -import org.apache.flink.streaming.api.functions.source.SourceFunction; -import org.apache.flink.table.api.TableSchema; -import org.apache.flink.table.connector.ChangelogMode; -import org.apache.flink.table.connector.source.DynamicTableSource; -import org.apache.flink.table.connector.source.InputFormatProvider; -import org.apache.flink.table.connector.source.LookupTableSource; -import org.apache.flink.table.connector.source.ScanTableSource; -import org.apache.flink.table.connector.source.abilities.SupportsFilterPushDown; -import org.apache.flink.table.connector.source.abilities.SupportsProjectionPushDown; -import org.apache.flink.table.data.RowData; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; - -/** - * The {@link DorisDynamicTableSource} is used during planning. - * - *

In our example, we don't implement any of the available ability interfaces such as {@link SupportsFilterPushDown} - * or {@link SupportsProjectionPushDown}. Therefore, the main logic can be found in {@link #getScanRuntimeProvider(ScanContext)} - * where we instantiate the required {@link SourceFunction} and its {@link DeserializationSchema} for - * runtime. Both instances are parameterized to return internal data structures (i.e. {@link RowData}). - */ -public final class DorisDynamicTableSource implements ScanTableSource, LookupTableSource { - - private final DorisOptions options; - private final DorisReadOptions readOptions; - private TableSchema physicalSchema; - private static final Logger LOG = LoggerFactory.getLogger(DorisRowDataInputFormat.class); - - public DorisDynamicTableSource(DorisOptions options, DorisReadOptions readOptions, TableSchema physicalSchema) { - this.options = options; - this.readOptions = readOptions; - this.physicalSchema = physicalSchema; - } - - @Override - public ChangelogMode getChangelogMode() { - // in our example the format decides about the changelog mode - // but it could also be the source itself - return ChangelogMode.insertOnly(); - } - - @Override - public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) { - List dorisPartitions; - try { - dorisPartitions = RestService.findPartitions(options, readOptions, LOG); - } catch (DorisException e) { - throw new RuntimeException("Failed fetch doris partitions"); - } - DorisRowDataInputFormat.Builder builder = DorisRowDataInputFormat.builder() - .setFenodes(options.getFenodes()) - .setUsername(options.getUsername()) - .setPassword(options.getPassword()) - .setTableIdentifier(options.getTableIdentifier()) - .setPartitions(dorisPartitions) - .setReadOptions(readOptions); - return InputFormatProvider.of(builder.build()); - } - - @Override - public LookupRuntimeProvider getLookupRuntimeProvider(LookupContext lookupContext) { - return null; - } - - @Override - public DynamicTableSource copy() { - return new DorisDynamicTableSource(options, readOptions, physicalSchema); - } - - @Override - public String asSummaryString() { - return "Doris Table Source"; - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisRowDataInputFormat.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisRowDataInputFormat.java deleted file mode 100644 index c75a88ff27..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisRowDataInputFormat.java +++ /dev/null @@ -1,227 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.table; - -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.datastream.ScalaValueReader; -import org.apache.doris.flink.rest.PartitionDefinition; -import org.apache.flink.annotation.Internal; -import org.apache.flink.api.common.io.DefaultInputSplitAssigner; -import org.apache.flink.api.common.io.InputFormat; -import org.apache.flink.api.common.io.RichInputFormat; -import org.apache.flink.api.common.io.statistics.BaseStatistics; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.java.typeutils.ResultTypeQueryable; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.core.io.InputSplitAssigner; -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.sql.PreparedStatement; -import java.util.ArrayList; -import java.util.List; - -/** - * InputFormat for {@link DorisDynamicTableSource}. - */ -@Internal -public class DorisRowDataInputFormat extends RichInputFormat implements ResultTypeQueryable { - - private static final long serialVersionUID = 1L; - private static final Logger LOG = LoggerFactory.getLogger(DorisRowDataInputFormat.class); - - private DorisOptions options; - private DorisReadOptions readOptions; - private List dorisPartitions; - private TypeInformation rowDataTypeInfo; - - private ScalaValueReader scalaValueReader; - private transient boolean hasNext; - - public DorisRowDataInputFormat(DorisOptions options, List dorisPartitions, DorisReadOptions readOptions) { - this.options = options; - this.dorisPartitions = dorisPartitions; - this.readOptions = readOptions; - } - - @Override - public void configure(Configuration parameters) { - //do nothing here - } - - @Override - public void openInputFormat() { - //called once per inputFormat (on open) - } - - @Override - public void closeInputFormat() { - //called once per inputFormat (on close) - } - - /** - * Connects to the source database and executes the query in a parallel - * fashion if - * this {@link InputFormat} is built using a parameterized query (i.e. using - * a {@link PreparedStatement}) - * and a proper {@link }, in a non-parallel - * fashion otherwise. - * - * @param inputSplit which is ignored if this InputFormat is executed as a - * non-parallel source, - * a "hook" to the query parameters otherwise (using its - * splitNumber) - * @throws IOException if there's an error during the execution of the query - */ - @Override - public void open(DorisTableInputSplit inputSplit) throws IOException { - scalaValueReader = new ScalaValueReader(inputSplit.partition, options, readOptions); - hasNext = scalaValueReader.hasNext(); - } - - /** - * Closes all resources used. - * - * @throws IOException Indicates that a resource could not be closed. - */ - @Override - public void close() throws IOException { - - } - - @Override - public TypeInformation getProducedType() { - return rowDataTypeInfo; - } - - /** - * Checks whether all data has been read. - * - * @return boolean value indication whether all data has been read. - * @throws IOException - */ - @Override - public boolean reachedEnd() throws IOException { - return !hasNext; - } - - /** - * Stores the next resultSet row in a tuple. - * - * @param reuse row to be reused. - * @return row containing next {@link RowData} - * @throws IOException - */ - @Override - public RowData nextRecord(RowData reuse) throws IOException { - if (!hasNext) { - return null; - } - List next = (List) scalaValueReader.next(); - GenericRowData genericRowData = new GenericRowData(next.size()); - for (int i = 0; i < next.size(); i++) { - genericRowData.setField(i, next.get(i)); - } - //update hasNext after we've read the record - hasNext = scalaValueReader.hasNext(); - return genericRowData; - } - - @Override - public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException { - return cachedStatistics; - } - - @Override - public DorisTableInputSplit[] createInputSplits(int minNumSplits) throws IOException { - List dorisSplits = new ArrayList<>(); - int splitNum = 0; - for (PartitionDefinition partition : dorisPartitions) { - dorisSplits.add(new DorisTableInputSplit(splitNum++, partition)); - } - LOG.info("DorisTableInputSplit Num:{}", dorisSplits.size()); - return dorisSplits.toArray(new DorisTableInputSplit[0]); - } - - @Override - public InputSplitAssigner getInputSplitAssigner(DorisTableInputSplit[] inputSplits) { - return new DefaultInputSplitAssigner(inputSplits); - } - - /** - * A builder used to set parameters to the output format's configuration in a fluent way. - * - * @return builder - */ - public static Builder builder() { - return new Builder(); - } - - /** - * Builder for {@link DorisRowDataInputFormat}. - */ - public static class Builder { - private DorisOptions.Builder optionsBuilder; - private List partitions; - private DorisReadOptions readOptions; - - - public Builder() { - this.optionsBuilder = DorisOptions.builder(); - } - - public Builder setFenodes(String fenodes) { - this.optionsBuilder.setFenodes(fenodes); - return this; - } - - public Builder setUsername(String username) { - this.optionsBuilder.setUsername(username); - return this; - } - - public Builder setPassword(String password) { - this.optionsBuilder.setPassword(password); - return this; - } - - public Builder setTableIdentifier(String tableIdentifier) { - this.optionsBuilder.setTableIdentifier(tableIdentifier); - return this; - } - - public Builder setPartitions(List partitions) { - this.partitions = partitions; - return this; - } - - public Builder setReadOptions(DorisReadOptions readOptions) { - this.readOptions = readOptions; - return this; - } - - public DorisRowDataInputFormat build() { - return new DorisRowDataInputFormat( - optionsBuilder.build(), partitions, readOptions - ); - } - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisStreamLoad.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisStreamLoad.java deleted file mode 100644 index 9c05b830e2..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisStreamLoad.java +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.table; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang3.StringUtils; -import org.apache.doris.flink.exception.StreamLoadException; -import org.apache.doris.flink.rest.models.RespContent; -import org.apache.http.HttpHeaders; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.entity.StringEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.DefaultRedirectStrategy; -import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.Serializable; -import java.nio.charset.StandardCharsets; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.UUID; - -/** - * DorisStreamLoad - **/ -public class DorisStreamLoad implements Serializable { - - private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoad.class); - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - private final static List DORIS_SUCCESS_STATUS = new ArrayList<>(Arrays.asList("Success", "Publish Timeout")); - private static String loadUrlPattern = "http://%s/api/%s/%s/_stream_load?"; - private String user; - private String passwd; - private String loadUrlStr; - private String hostPort; - private String db; - private String tbl; - private String authEncoding; - private Properties streamLoadProp; - private final HttpClientBuilder httpClientBuilder = HttpClients - .custom() - .setRedirectStrategy(new DefaultRedirectStrategy() { - @Override - protected boolean isRedirectable(String method) { - return true; - } - }); - private CloseableHttpClient httpClient; - - public DorisStreamLoad(String hostPort, String db, String tbl, String user, String passwd, Properties streamLoadProp) { - this.hostPort = hostPort; - this.db = db; - this.tbl = tbl; - this.user = user; - this.passwd = passwd; - this.loadUrlStr = String.format(loadUrlPattern, hostPort, db, tbl); - this.authEncoding = basicAuthHeader(user, passwd); - this.streamLoadProp = streamLoadProp; - this.httpClient = httpClientBuilder.build(); - } - - public String getLoadUrlStr() { - return loadUrlStr; - } - - public void setHostPort(String hostPort) { - this.hostPort = hostPort; - this.loadUrlStr = String.format(loadUrlPattern, hostPort, this.db, this.tbl); - } - - public void load(String value) throws StreamLoadException { - LoadResponse loadResponse = loadBatch(value); - LOG.info("Streamload Response:{}", loadResponse); - if (loadResponse.status != 200) { - throw new StreamLoadException("stream load error: " + loadResponse.respContent); - } else { - try { - RespContent respContent = OBJECT_MAPPER.readValue(loadResponse.respContent, RespContent.class); - if (!DORIS_SUCCESS_STATUS.contains(respContent.getStatus())) { - String errMsg = String.format("stream load error: %s, see more in %s", respContent.getMessage(), respContent.getErrorURL()); - throw new StreamLoadException(errMsg); - } - } catch (IOException e) { - throw new StreamLoadException(e); - } - } - } - - private LoadResponse loadBatch(String value) { - String label = streamLoadProp.getProperty("label"); - if (StringUtils.isBlank(label)) { - SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd_HHmmss"); - String formatDate = sdf.format(new Date()); - label = String.format("flink_connector_%s_%s", formatDate, - UUID.randomUUID().toString().replaceAll("-", "")); - } - - try { - HttpPut put = new HttpPut(loadUrlStr); - put.setHeader(HttpHeaders.EXPECT, "100-continue"); - put.setHeader(HttpHeaders.AUTHORIZATION, this.authEncoding); - put.setHeader("label", label); - for (Map.Entry entry : streamLoadProp.entrySet()) { - put.setHeader(String.valueOf(entry.getKey()), String.valueOf(entry.getValue())); - } - StringEntity entity = new StringEntity(value, "UTF-8"); - put.setEntity(entity); - - try (CloseableHttpResponse response = httpClient.execute(put)) { - final int statusCode = response.getStatusLine().getStatusCode(); - final String reasonPhrase = response.getStatusLine().getReasonPhrase(); - String loadResult = ""; - if (response.getEntity() != null) { - loadResult = EntityUtils.toString(response.getEntity()); - } - return new LoadResponse(statusCode, reasonPhrase, loadResult); - } - } catch (Exception e) { - String err = "failed to stream load data with label: " + label; - LOG.warn(err, e); - return new LoadResponse(-1, e.getMessage(), err); - } - } - - private String basicAuthHeader(String username, String password) { - final String tobeEncode = username + ":" + password; - byte[] encoded = Base64.encodeBase64(tobeEncode.getBytes(StandardCharsets.UTF_8)); - return "Basic " + new String(encoded); - } - - public void close() throws IOException { - if (null != httpClient) { - try { - httpClient.close(); - } catch (IOException e) { - LOG.error("Closing httpClient failed.", e); - throw new RuntimeException("Closing httpClient failed.", e); - } - } - } - - public static class LoadResponse { - public int status; - public String respMsg; - public String respContent; - - public LoadResponse(int status, String respMsg, String respContent) { - this.status = status; - this.respMsg = respMsg; - this.respContent = respContent; - } - - @Override - public String toString() { - try { - return OBJECT_MAPPER.writeValueAsString(this); - } catch (JsonProcessingException e) { - return ""; - } - } - } -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisTableInputSplit.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisTableInputSplit.java deleted file mode 100644 index 5e81cc1637..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/table/DorisTableInputSplit.java +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink.table; - -import org.apache.doris.flink.rest.PartitionDefinition; -import org.apache.flink.core.io.InputSplit; - -/** - * DorisTableInputSplit - **/ -public class DorisTableInputSplit implements InputSplit, java.io.Serializable { - - /** - * The number of the split. - */ - private final int splitNumber; - - protected final PartitionDefinition partition; - - public DorisTableInputSplit(int splitNumber, PartitionDefinition partition) { - super(); - this.splitNumber = splitNumber; - this.partition = partition; - } - - - @Override - public int getSplitNumber() { - return splitNumber; - } - -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/util/ErrorMessages.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/util/ErrorMessages.java deleted file mode 100644 index 2e02daae69..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/util/ErrorMessages.java +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.util; - -public abstract class ErrorMessages { - public static final String PARSE_NUMBER_FAILED_MESSAGE = "Parse '{}' to number failed. Original string is '{}'."; - public static final String PARSE_BOOL_FAILED_MESSAGE = "Parse '{}' to boolean failed. Original string is '{}'."; - public static final String CONNECT_FAILED_MESSAGE = "Connect to doris {} failed."; - public static final String ILLEGAL_ARGUMENT_MESSAGE = "argument '{}' is illegal, value is '{}'."; - public static final String SHOULD_NOT_HAPPEN_MESSAGE = "Should not come here."; - public static final String DORIS_INTERNAL_FAIL_MESSAGE = "Doris server '{}' internal failed, status is '{}', error message is '{}'"; -} diff --git a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/util/IOUtils.java b/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/util/IOUtils.java deleted file mode 100644 index 203010cbed..0000000000 --- a/extension/flink-doris-connector/src/main/java/org/apache/doris/flink/util/IOUtils.java +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.util; - -import java.io.IOException; -import java.io.StringReader; -import java.io.StringWriter; -import java.util.Properties; - -public class IOUtils { - public static String propsToString(Properties props) throws IllegalArgumentException { - StringWriter sw = new StringWriter(); - if (props != null) { - try { - props.store(sw, ""); - } catch (IOException ex) { - throw new IllegalArgumentException("Cannot parse props to String.", ex); - } - } - return sw.toString(); - } - - public static Properties propsFromString(String source) throws IllegalArgumentException { - Properties copy = new Properties(); - if (source != null) { - try { - copy.load(new StringReader(source)); - } catch (IOException ex) { - throw new IllegalArgumentException("Cannot parse props from String.", ex); - } - } - return copy; - } -} diff --git a/extension/flink-doris-connector/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/extension/flink-doris-connector/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory deleted file mode 100644 index e625cc782d..0000000000 --- a/extension/flink-doris-connector/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -org.apache.doris.flink.table.DorisDynamicTableFactory \ No newline at end of file diff --git a/extension/flink-doris-connector/src/main/resources/log4j.properties b/extension/flink-doris-connector/src/main/resources/log4j.properties deleted file mode 100644 index da32ea0f44..0000000000 --- a/extension/flink-doris-connector/src/main/resources/log4j.properties +++ /dev/null @@ -1,23 +0,0 @@ -################################################################################ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -################################################################################ - -log4j.rootLogger=INFO, console - -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n diff --git a/extension/flink-doris-connector/src/main/scala/org/apache/doris/flink/datastream/ScalaValueReader.scala b/extension/flink-doris-connector/src/main/scala/org/apache/doris/flink/datastream/ScalaValueReader.scala deleted file mode 100644 index 06df2ef494..0000000000 --- a/extension/flink-doris-connector/src/main/scala/org/apache/doris/flink/datastream/ScalaValueReader.scala +++ /dev/null @@ -1,222 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.datastream - -import java.util.concurrent._ -import java.util.concurrent.atomic.AtomicBoolean -import org.apache.doris.flink.backend.BackendClient -import org.apache.doris.flink.cfg.ConfigurationOptions._ -import org.apache.doris.flink.cfg.{DorisOptions, DorisReadOptions} -import org.apache.doris.flink.exception.ShouldNeverHappenException -import org.apache.doris.flink.rest.{PartitionDefinition, SchemaUtils} -import org.apache.doris.flink.rest.models.Schema -import org.apache.doris.flink.serialization.{Routing, RowBatch} -import org.apache.doris.flink.util.ErrorMessages -import org.apache.doris.flink.util.ErrorMessages._ -import org.apache.doris.thrift.{TScanCloseParams, TScanNextBatchParams, TScanOpenParams, TScanOpenResult} -import org.apache.log4j.Logger - -import scala.collection.JavaConversions._ -import scala.util.Try -import scala.util.control.Breaks - -/** - * read data from Doris BE to array. - * @param partition Doris RDD partition - * @param options request configuration - */ -class ScalaValueReader(partition: PartitionDefinition, options: DorisOptions, readOptions: DorisReadOptions) extends AutoCloseable { - protected val logger = Logger.getLogger(classOf[ScalaValueReader]) - - protected val client = new BackendClient(new Routing(partition.getBeAddress), readOptions) - protected var offset = 0 - protected var eos: AtomicBoolean = new AtomicBoolean(false) - protected var rowBatch: RowBatch = _ - // flag indicate if support deserialize Arrow to RowBatch asynchronously - protected var deserializeArrowToRowBatchAsync: java.lang.Boolean = Try { - if(readOptions.getDeserializeArrowAsync == null ) DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT else readOptions.getDeserializeArrowAsync - } getOrElse { - logger.warn(ErrorMessages.PARSE_BOOL_FAILED_MESSAGE, DORIS_DESERIALIZE_ARROW_ASYNC, readOptions.getDeserializeArrowAsync) - DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT - } - - protected var rowBatchBlockingQueue: BlockingQueue[RowBatch] = { - val blockingQueueSize = Try { - if(readOptions.getDeserializeQueueSize == null) DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT else readOptions.getDeserializeQueueSize - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_DESERIALIZE_QUEUE_SIZE, readOptions.getDeserializeQueueSize) - DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT - } - - var queue: BlockingQueue[RowBatch] = null - if (deserializeArrowToRowBatchAsync) { - queue = new ArrayBlockingQueue(blockingQueueSize) - } - queue - } - - private val openParams: TScanOpenParams = { - val params = new TScanOpenParams - params.cluster = DORIS_DEFAULT_CLUSTER - params.database = partition.getDatabase - params.table = partition.getTable - - params.tablet_ids = partition.getTabletIds.toList - params.opaqued_query_plan = partition.getQueryPlan - - // max row number of one read batch - val batchSize = Try { - if(readOptions.getRequestBatchSize == null) DORIS_BATCH_SIZE_DEFAULT else readOptions.getRequestBatchSize; - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_BATCH_SIZE, readOptions.getRequestBatchSize) - DORIS_BATCH_SIZE_DEFAULT - } - - val queryDorisTimeout = Try { - if(readOptions.getRequestQueryTimeoutS == null) DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT else readOptions.getRequestQueryTimeoutS - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_REQUEST_QUERY_TIMEOUT_S, readOptions.getRequestQueryTimeoutS) - DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT - } - - val execMemLimit = Try { - if(readOptions.getExecMemLimit == null) DORIS_EXEC_MEM_LIMIT_DEFAULT else readOptions.getExecMemLimit - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_EXEC_MEM_LIMIT, readOptions.getExecMemLimit) - DORIS_EXEC_MEM_LIMIT_DEFAULT - } - - params.setBatchSize(batchSize) - params.setQueryTimeout(queryDorisTimeout) - params.setMemLimit(execMemLimit) - params.setUser(options.getUsername) - params.setPasswd(options.getPassword) - - logger.debug(s"Open scan params is, " + - s"cluster: ${params.getCluster}, " + - s"database: ${params.getDatabase}, " + - s"table: ${params.getTable}, " + - s"tabletId: ${params.getTabletIds}, " + - s"batch size: $batchSize, " + - s"query timeout: $queryDorisTimeout, " + - s"execution memory limit: $execMemLimit, " + - s"user: ${params.getUser}, " + - s"query plan: ${params.getOpaquedQueryPlan}") - - params - } - - protected val openResult: TScanOpenResult = client.openScanner(openParams) - protected val contextId: String = openResult.getContextId - protected val schema: Schema = - SchemaUtils.convertToSchema(openResult.getSelectedColumns) - - protected val asyncThread: Thread = new Thread { - override def run { - val nextBatchParams = new TScanNextBatchParams - nextBatchParams.setContextId(contextId) - while (!eos.get) { - nextBatchParams.setOffset(offset) - val nextResult = client.getNext(nextBatchParams) - eos.set(nextResult.isEos) - if (!eos.get) { - val rowBatch = new RowBatch(nextResult, schema).readArrow() - offset += rowBatch.getReadRowCount - rowBatch.close - rowBatchBlockingQueue.put(rowBatch) - } - } - } - } - - protected val asyncThreadStarted: Boolean = { - var started = false - if (deserializeArrowToRowBatchAsync) { - asyncThread.start - started = true - } - started - } - - logger.debug(s"Open scan result is, contextId: $contextId, schema: $schema.") - - /** - * read data and cached in rowBatch. - * @return true if hax next value - */ - def hasNext: Boolean = { - var hasNext = false - if (deserializeArrowToRowBatchAsync && asyncThreadStarted) { - // support deserialize Arrow to RowBatch asynchronously - if (rowBatch == null || !rowBatch.hasNext) { - val loop = new Breaks - loop.breakable { - while (!eos.get || !rowBatchBlockingQueue.isEmpty) { - if (!rowBatchBlockingQueue.isEmpty) { - rowBatch = rowBatchBlockingQueue.take - hasNext = true - loop.break - } else { - // wait for rowBatch put in queue or eos change - Thread.sleep(5) - } - } - } - } else { - hasNext = true - } - } else { - // Arrow data was acquired synchronously during the iterative process - if (!eos.get && (rowBatch == null || !rowBatch.hasNext)) { - if (rowBatch != null) { - offset += rowBatch.getReadRowCount - rowBatch.close - } - val nextBatchParams = new TScanNextBatchParams - nextBatchParams.setContextId(contextId) - nextBatchParams.setOffset(offset) - val nextResult = client.getNext(nextBatchParams) - eos.set(nextResult.isEos) - if (!eos.get) { - rowBatch = new RowBatch(nextResult, schema).readArrow() - } - } - hasNext = !eos.get - } - hasNext - } - - /** - * get next value. - * @return next value - */ - def next: java.util.List[_] = { - if (!hasNext) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE) - throw new ShouldNeverHappenException - } - rowBatch.next - } - - def close(): Unit = { - val closeParams = new TScanCloseParams - closeParams.setContextId(contextId) - client.closeScanner(closeParams) - } - -} diff --git a/extension/flink-doris-connector/src/main/thrift/doris/DorisExternalService.thrift b/extension/flink-doris-connector/src/main/thrift/doris/DorisExternalService.thrift deleted file mode 100644 index c169874887..0000000000 --- a/extension/flink-doris-connector/src/main/thrift/doris/DorisExternalService.thrift +++ /dev/null @@ -1,122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -namespace java org.apache.doris.thrift -namespace cpp doris - -include "Types.thrift" -include "Status.thrift" - - -// Parameters to open(). -struct TScanOpenParams { - - 1: required string cluster - - 2: required string database - - 3: required string table - - // tablets to scan - 4: required list tablet_ids - - // base64 encoded binary plan fragment - 5: required string opaqued_query_plan - - // A string specified for the table that is passed to the external data source. - // Always set, may be an empty string. - 6: optional i32 batch_size - - // reserved params for use - 7: optional map properties - - // The query limit, if specified. - 8: optional i64 limit - - // The authenticated user name. Always set. - // maybe usefullless - 9: optional string user - - 10: optional string passwd - // max keep alive time min - 11: optional i16 keep_alive_min - - 12: optional i32 query_timeout - - // memory limit for a single query - 13: optional i64 mem_limit -} - -struct TScanColumnDesc { - // The column name - 1: optional string name - // The column type. Always set. - 2: optional Types.TPrimitiveType type -} - -// Returned by open(). -struct TScanOpenResult { - 1: required Status.TStatus status - // An opaque context_id used in subsequent getNext()/close() calls. Required. - 2: optional string context_id - // selected fields - 3: optional list selected_columns - -} - -// Parameters to getNext() -struct TScanNextBatchParams { - // The opaque handle returned by the previous open() call. Always set. - 1: optional string context_id // doris olap engine context id - 2: optional i64 offset // doris should check the offset to prevent duplicate rpc calls -} - -// Returned by getNext(). -struct TScanBatchResult { - 1: required Status.TStatus status - - // If true, reached the end of the result stream; subsequent calls to - // getNext() won’t return any more results. Required. - 2: optional bool eos - - // A batch of rows of arrow format to return, if any exist. The number of rows in the batch - // should be less than or equal to the batch_size specified in TOpenParams. - 3: optional binary rows -} - -// Parameters to close() -struct TScanCloseParams { - // The opaque handle returned by the previous open() call. Always set. - 1: optional string context_id -} - -// Returned by close(). -struct TScanCloseResult { - 1: required Status.TStatus status -} - -// scan service expose ability of scanning data ability to other compute system -service TDorisExternalService { - // doris will build a scan context for this session, context_id returned if success - TScanOpenResult open_scanner(1: TScanOpenParams params); - - // return the batch_size of data - TScanBatchResult get_next(1: TScanNextBatchParams params); - - // release the context resource associated with the context_id - TScanCloseResult close_scanner(1: TScanCloseParams params); -} diff --git a/extension/flink-doris-connector/src/main/thrift/doris/Status.thrift b/extension/flink-doris-connector/src/main/thrift/doris/Status.thrift deleted file mode 100644 index 2966a8a535..0000000000 --- a/extension/flink-doris-connector/src/main/thrift/doris/Status.thrift +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -namespace cpp doris -namespace java org.apache.doris.thrift - -enum TStatusCode { - OK, - CANCELLED, - ANALYSIS_ERROR, - NOT_IMPLEMENTED_ERROR, - RUNTIME_ERROR, - MEM_LIMIT_EXCEEDED, - INTERNAL_ERROR, - THRIFT_RPC_ERROR, - TIMEOUT, - KUDU_NOT_ENABLED, // Deprecated - KUDU_NOT_SUPPORTED_ON_OS, // Deprecated - MEM_ALLOC_FAILED, - BUFFER_ALLOCATION_FAILED, - MINIMUM_RESERVATION_UNAVAILABLE, - PUBLISH_TIMEOUT, - LABEL_ALREADY_EXISTS, - ES_INTERNAL_ERROR, - ES_INDEX_NOT_FOUND, - ES_SHARD_NOT_FOUND, - ES_INVALID_CONTEXTID, - ES_INVALID_OFFSET, - ES_REQUEST_ERROR, - - // end of file - END_OF_FILE = 30, - NOT_FOUND = 31, - CORRUPTION = 32, - INVALID_ARGUMENT = 33, - IO_ERROR = 34, - ALREADY_EXIST = 35, - NETWORK_ERROR = 36, - ILLEGAL_STATE = 37, - NOT_AUTHORIZED = 38, - ABORTED = 39, - REMOTE_ERROR = 40, - SERVICE_UNAVAILABLE = 41, - UNINITIALIZED = 42, - CONFIGURATION_ERROR = 43, - INCOMPLETE = 44 -} - -struct TStatus { - 1: required TStatusCode status_code - 2: optional list error_msgs -} diff --git a/extension/flink-doris-connector/src/main/thrift/doris/Types.thrift b/extension/flink-doris-connector/src/main/thrift/doris/Types.thrift deleted file mode 100644 index 44ce6062f2..0000000000 --- a/extension/flink-doris-connector/src/main/thrift/doris/Types.thrift +++ /dev/null @@ -1,376 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -namespace cpp doris -namespace java org.apache.doris.thrift - - -typedef i64 TTimestamp -typedef i32 TPlanNodeId -typedef i32 TTupleId -typedef i32 TSlotId -typedef i64 TTableId -typedef i64 TTabletId -typedef i64 TVersion -typedef i64 TVersionHash -typedef i32 TSchemaHash -typedef i32 TPort -typedef i64 TCount -typedef i64 TSize -typedef i32 TClusterId -typedef i64 TEpoch - -// add for real time load, partitionid is not defined previously, define it here -typedef i64 TTransactionId -typedef i64 TPartitionId - -enum TStorageType { - ROW, - COLUMN, -} - -enum TStorageMedium { - HDD, - SSD, -} - -enum TVarType { - SESSION, - GLOBAL -} - -enum TPrimitiveType { - INVALID_TYPE, - NULL_TYPE, - BOOLEAN, - TINYINT, - SMALLINT, - INT, - BIGINT, - FLOAT, - DOUBLE, - DATE, - DATETIME, - BINARY, - DECIMAL, - // CHAR(n). Currently only supported in UDAs - CHAR, - LARGEINT, - VARCHAR, - HLL, - DECIMALV2, - TIME, - OBJECT, - ARRAY, - MAP, - STRUCT, - STRING, - ALL -} - -enum TTypeNodeType { - SCALAR, - ARRAY, - MAP, - STRUCT -} - -struct TScalarType { - 1: required TPrimitiveType type - - // Only set if type == CHAR or type == VARCHAR - 2: optional i32 len - - // Only set for DECIMAL - 3: optional i32 precision - 4: optional i32 scale -} - -// Represents a field in a STRUCT type. -// TODO: Model column stats for struct fields. -struct TStructField { - 1: required string name - 2: optional string comment -} - -struct TTypeNode { - 1: required TTypeNodeType type - - // only set for scalar types - 2: optional TScalarType scalar_type - - // only used for structs; has struct_fields.size() corresponding child types - 3: optional list struct_fields -} - -// A flattened representation of a tree of column types obtained by depth-first -// traversal. Complex types such as map, array and struct have child types corresponding -// to the map key/value, array item type, and struct fields, respectively. -// For scalar types the list contains only a single node. -// Note: We cannot rename this to TType because it conflicts with Thrift's internal TType -// and the generated Python thrift files will not work. -// Note: TTypeDesc in impala is TColumnType, but we already use TColumnType, so we name this -// to TTypeDesc. In future, we merge these two to one -struct TTypeDesc { - 1: list types -} - -enum TAggregationType { - SUM, - MAX, - MIN, - REPLACE, - HLL_UNION, - NONE -} - -enum TPushType { - LOAD, - DELETE, - LOAD_DELETE -} - -enum TTaskType { - CREATE, - DROP, - PUSH, - CLONE, - STORAGE_MEDIUM_MIGRATE, - ROLLUP, - SCHEMA_CHANGE, - CANCEL_DELETE, // Deprecated - MAKE_SNAPSHOT, - RELEASE_SNAPSHOT, - CHECK_CONSISTENCY, - UPLOAD, - DOWNLOAD, - CLEAR_REMOTE_FILE, - MOVE - REALTIME_PUSH, - PUBLISH_VERSION, - CLEAR_ALTER_TASK, - CLEAR_TRANSACTION_TASK, - RECOVER_TABLET, - STREAM_LOAD, - UPDATE_TABLET_META_INFO, - ALTER_TASK -} - -enum TStmtType { - QUERY, - DDL, // Data definition, e.g. CREATE TABLE (includes read-only functions e.g. SHOW) - DML, // Data modification e.g. INSERT - EXPLAIN // EXPLAIN -} - -// level of verboseness for "explain" output -// TODO: should this go somewhere else? -enum TExplainLevel { - NORMAL, - VERBOSE -} - -struct TColumnType { - 1: required TPrimitiveType type - // Only set if type == CHAR_ARRAY - 2: optional i32 len - 3: optional i32 index_len - 4: optional i32 precision - 5: optional i32 scale -} - -// A TNetworkAddress is the standard host, port representation of a -// network address. The hostname field must be resolvable to an IPv4 -// address. -struct TNetworkAddress { - 1: required string hostname - 2: required i32 port -} - -// Wire format for UniqueId -struct TUniqueId { - 1: required i64 hi - 2: required i64 lo -} - -enum QueryState { - CREATED, - INITIALIZED, - COMPILED, - RUNNING, - FINISHED, - EXCEPTION -} - -enum TFunctionType { - SCALAR, - AGGREGATE, -} - -enum TFunctionBinaryType { - // Palo builtin. We can either run this interpreted or via codegen - // depending on the query option. - BUILTIN, - - // Hive UDFs, loaded from *.jar - HIVE, - - // Native-interface, precompiled UDFs loaded from *.so - NATIVE, - - // Native-interface, precompiled to IR; loaded from *.ll - IR, -} - -// Represents a fully qualified function name. -struct TFunctionName { - // Name of the function's parent database. Not set if in global - // namespace (e.g. builtins) - 1: optional string db_name - - // Name of the function - 2: required string function_name -} - -struct TScalarFunction { - // Symbol for the function - 1: required string symbol - 2: optional string prepare_fn_symbol - 3: optional string close_fn_symbol -} - -struct TAggregateFunction { - 1: required TTypeDesc intermediate_type - 2: optional string update_fn_symbol - 3: optional string init_fn_symbol - 4: optional string serialize_fn_symbol - 5: optional string merge_fn_symbol - 6: optional string finalize_fn_symbol - 8: optional string get_value_fn_symbol - 9: optional string remove_fn_symbol - 10: optional bool is_analytic_only_fn = false -} - -// Represents a function in the Catalog. -struct TFunction { - // Fully qualified function name. - 1: required TFunctionName name - - // Type of the udf. e.g. hive, native, ir - 2: required TFunctionBinaryType binary_type - - // The types of the arguments to the function - 3: required list arg_types - - // Return type for the function. - 4: required TTypeDesc ret_type - - // If true, this function takes var args. - 5: required bool has_var_args - - // Optional comment to attach to the function - 6: optional string comment - - 7: optional string signature - - // HDFS path for the function binary. This binary must exist at the time the - // function is created. - 8: optional string hdfs_location - - // One of these should be set. - 9: optional TScalarFunction scalar_fn - 10: optional TAggregateFunction aggregate_fn - - 11: optional i64 id - 12: optional string checksum -} - -enum TLoadJobState { - PENDING, - ETL, - LOADING, - FINISHED, - CANCELLED -} - -enum TEtlState { - RUNNING, - FINISHED, - CANCELLED, - UNKNOWN -} - -enum TTableType { - MYSQL_TABLE, - OLAP_TABLE, - SCHEMA_TABLE, - KUDU_TABLE, // Deprecated - BROKER_TABLE, - ES_TABLE -} - -enum TKeysType { - PRIMARY_KEYS, - DUP_KEYS, - UNIQUE_KEYS, - AGG_KEYS -} - -enum TPriority { - NORMAL, - HIGH -} - -struct TBackend { - 1: required string host - 2: required TPort be_port - 3: required TPort http_port -} - -struct TResourceInfo { - 1: required string user - 2: required string group -} - -enum TExportState { - RUNNING, - FINISHED, - CANCELLED, - UNKNOWN -} - -enum TFileType { - FILE_LOCAL, - FILE_BROKER, - FILE_STREAM, // file content is streaming in the buffer -} - -struct TTabletCommitInfo { - 1: required i64 tabletId - 2: required i64 backendId -} - -enum TLoadType { - MANUL_LOAD, - ROUTINE_LOAD, - MINI_LOAD -} - -enum TLoadSourceType { - RAW, - KAFKA, -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisOutPutFormatExample.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisOutPutFormatExample.java deleted file mode 100644 index a64e3d9847..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisOutPutFormatExample.java +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink; - -import org.apache.doris.flink.cfg.DorisExecutionOptions; -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.table.DorisDynamicOutputFormat; -import org.apache.flink.api.common.functions.MapFunction; -import org.apache.flink.api.java.ExecutionEnvironment; -import org.apache.flink.api.java.operators.MapOperator; -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.types.logical.DoubleType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.VarCharType; - -import java.io.IOException; - -/** - * example using {@link DorisDynamicOutputFormat} for batching. - */ -public class DorisOutPutFormatExample { - - public static void main(String[] args) throws Exception { - - ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - MapOperator data = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - - DorisOptions dorisOptions = DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build(); - DorisReadOptions readOptions = DorisReadOptions.defaults(); - DorisExecutionOptions executionOptions = DorisExecutionOptions.defaults(); - - LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; - String[] fiels = {"city", "longitude", "latitude"}; - - DorisDynamicOutputFormat outputFormat = - new DorisDynamicOutputFormat(dorisOptions, readOptions, executionOptions, types, fiels); - - try { - outputFormat.open(0, 1); - data.output(outputFormat); - outputFormat.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - env.execute("doris batch sink example"); - - - } - -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSinkExample.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSinkExample.java deleted file mode 100644 index 774d686dc3..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSinkExample.java +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink; - -import org.apache.flink.api.java.tuple.Tuple2; -import org.apache.flink.streaming.api.datastream.DataStreamSource; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; - -import java.util.ArrayList; -import java.util.List; - -import static org.apache.flink.table.api.Expressions.$; - -public class DorisSinkExample { - - public static void main(String[] args) { - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); - - List> data = new ArrayList<>(); - data.add(new Tuple2<>("doris",1)); - DataStreamSource> source = env.fromCollection(data); - tEnv.createTemporaryView("doris_test",source,$("name"),$("age")); - - tEnv.executeSql( - "CREATE TABLE doris_test_sink (" + - "name STRING," + - "age INT" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'FE_IP:8030',\n" + - " 'table.identifier' = 'db.table',\n" + - " 'username' = 'root',\n" + - " 'password' = '',\n" + - " 'sink.properties.format' = 'json',\n" + - " 'sink.properties.strip_outer_array' = 'true'\n" + - ")"); - - tEnv.executeSql("INSERT INTO doris_test_sink select name,age from doris_test"); - } -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceDataStream.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceDataStream.java deleted file mode 100644 index 68152347de..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceDataStream.java +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink; - -import org.apache.doris.flink.cfg.DorisStreamOptions; -import org.apache.doris.flink.datastream.DorisSourceFunction; -import org.apache.doris.flink.deserialization.SimpleListDeserializationSchema; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; - -import java.util.Properties; - - - -public class DorisSourceDataStream { - - public static void main(String[] args) throws Exception { - Properties properties = new Properties(); - properties.put("fenodes","FE_IP:8030"); - properties.put("username","root"); - properties.put("password",""); - properties.put("table.identifier","db.table"); - properties.put("doris.read.field","id,code,name"); - properties.put("doris.filter.query","name='doris'"); - DorisStreamOptions options = new DorisStreamOptions(properties); - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(2); - env.addSource(new DorisSourceFunction(options,new SimpleListDeserializationSchema())).print(); - env.execute("Flink doris test"); - } -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceExample.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceExample.java deleted file mode 100644 index 35857dc0cf..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceExample.java +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.flink; - -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.Table; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; -import org.apache.flink.types.Row; - -public class DorisSourceExample { - - public static void main(String[] args) throws Exception { - - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); - - // register a table in the catalog - tEnv.executeSql( - "CREATE TABLE doris_source (" + - "bigint_1 BIGINT," + - "char_1 STRING," + - "date_1 STRING," + - "datetime_1 STRING," + - "decimal_1 DECIMAL(5,2)," + - "double_1 DOUBLE," + - "float_1 FLOAT ," + - "int_1 INT ," + - "largeint_1 STRING, " + - "smallint_1 SMALLINT, " + - "tinyint_1 TINYINT, " + - "varchar_1 STRING " + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'FE_IP:8030',\n" + - " 'table.identifier' = 'db.table',\n" + - " 'username' = 'root',\n" + - " 'password' = ''\n" + - ")"); - - // define a dynamic aggregating query - final Table result = tEnv.sqlQuery("SELECT * from doris_source "); - - // print the result to the console - tEnv.toRetractStream(result, Row.class).print(); - env.execute(); - } -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceSinkExample.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceSinkExample.java deleted file mode 100644 index c4ce1a5b5f..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisSourceSinkExample.java +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink; - -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableEnvironment; - -public class DorisSourceSinkExample { - - public static void main(String[] args) { - EnvironmentSettings settings = EnvironmentSettings.newInstance() - .useBlinkPlanner() - .inStreamingMode() - .build(); - TableEnvironment tEnv = TableEnvironment.create(settings); - tEnv.executeSql( - "CREATE TABLE doris_test (" + - "name STRING," + - "age INT," + - "price DECIMAL(5,2)," + - "sale DOUBLE" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'FE_IP:8030',\n" + - " 'table.identifier' = 'db.table',\n" + - " 'username' = 'root',\n" + - " 'password' = ''" + - ")"); - tEnv.executeSql( - "CREATE TABLE doris_test_sink (" + - "name STRING," + - "age INT," + - "price DECIMAL(5,2)," + - "sale DOUBLE" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'FE_IP:8030',\n" + - " 'table.identifier' = 'db.table',\n" + - " 'username' = 'root',\n" + - " 'password' = '',\n" + - " 'sink.batch.size' = '3',\n" + - " 'sink.max-retries' = '2'\n" + - ")"); - - tEnv.executeSql("INSERT INTO doris_test_sink select name,age,price,sale from doris_test"); - } -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisStreamSinkExample.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisStreamSinkExample.java deleted file mode 100644 index cf35db696a..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/DorisStreamSinkExample.java +++ /dev/null @@ -1,236 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink; - -import org.apache.doris.flink.cfg.DorisExecutionOptions; -import org.apache.doris.flink.cfg.DorisOptions; -import org.apache.doris.flink.cfg.DorisReadOptions; -import org.apache.doris.flink.cfg.DorisSink; -import org.apache.flink.api.common.functions.MapFunction; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.data.GenericRowData; -import org.apache.flink.table.data.RowData; -import org.apache.flink.table.data.StringData; -import org.apache.flink.table.types.logical.DoubleType; -import org.apache.flink.table.types.logical.LogicalType; -import org.apache.flink.table.types.logical.VarCharType; - -import java.util.Properties; - -/** - * example using {@link DorisSink} for streaming. - */ -public class DorisStreamSinkExample { - - - public void testJsonString() throws Exception { - /* - * Example for JsonString element - */ - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - Properties pro = new Properties(); - pro.setProperty("format", "json"); - pro.setProperty("strip_outer_array", "true"); - env.fromElements("{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}") - .addSink( - DorisSink.sink( - DorisReadOptions.builder().build(), - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0l) - .setMaxRetries(3) - .setStreamLoadProp(pro).build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); - env.execute("doris stream sink example"); - } - - - public void testJsonStringWithDefaultReadOptions() throws Exception { - /* - * Example for JsonString element with default ReadOptions - */ - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - Properties pro = new Properties(); - pro.setProperty("format", "json"); - pro.setProperty("strip_outer_array", "true"); - env.fromElements("{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}") - .addSink( - DorisSink.sink( - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0l) - .setMaxRetries(3) - .setStreamLoadProp(pro).build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); - env.execute("doris stream sink example"); - } - - - public void testJsonStringWithDefaultReadOptionsAndExecutionOptions() throws Exception { - /* - * Example for JsonString element with default ReadOptions and ExecutionOptions - */ - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - env.fromElements("{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}") - .addSink( - DorisSink.sink( - DorisOptions.builder() - .setFenodes("192.168.52.101:8030") - .setTableIdentifier("smarttrip_db.doris_output_format") - .setUsername("root") - .setPassword("").build() - )); - env.execute("doris stream sink example"); - } - - - public void testRowData() throws Exception { - /* - * Example for RowData element - */ - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - DataStream source = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - - String[] fields = {"city", "longitude", "latitude"}; - LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; - - source.addSink( - DorisSink.sink( - fields, - types, - DorisReadOptions.builder().build(), - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0L) - .setMaxRetries(3) - .build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); - env.execute("doris stream sink example"); - } - - - public void testRowDataWithDefaultReadOptions() throws Exception { - /* - * Example for RowData element with default ReadOptions - */ - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - DataStream source = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - - String[] fields = {"city", "longitude", "latitude"}; - LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; - - source.addSink( - DorisSink.sink( - fields, - types, - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0L) - .setMaxRetries(3) - .build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); - env.execute("doris stream sink example"); - } - - - public void testRowDataWithDefaultReadOptionsAndExecutionOptions() throws Exception { - /* - * Example for RowData element with default ReadOptions and ExecutionOptions - */ - StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - - DataStream source = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - - String[] fields = {"city", "longitude", "latitude"}; - LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; - - source.addSink( - DorisSink.sink( - fields, - types, - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); - env.execute("doris stream sink example"); - } -} diff --git a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/serialization/TestRowBatch.java b/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/serialization/TestRowBatch.java deleted file mode 100644 index 0f45aaa376..0000000000 --- a/extension/flink-doris-connector/src/test/java/org/apache/doris/flink/serialization/TestRowBatch.java +++ /dev/null @@ -1,439 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.flink.serialization; - -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.doris.flink.rest.RestService; -import org.apache.doris.flink.rest.models.Schema; -import org.apache.doris.thrift.TScanBatchResult; -import org.apache.doris.thrift.TStatus; -import org.apache.doris.thrift.TStatusCode; -import org.apache.flink.shaded.guava18.com.google.common.collect.ImmutableList; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; -import org.apache.flink.table.data.DecimalData; -import org.apache.flink.table.data.StringData; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayOutputStream; -import java.math.BigDecimal; -import java.util.Arrays; -import java.util.List; -import java.util.NoSuchElementException; - -import static org.hamcrest.core.StringStartsWith.startsWith; - -public class TestRowBatch { - private static Logger logger = LoggerFactory.getLogger(TestRowBatch.class); - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testRowBatch() throws Exception { - // schema - ImmutableList.Builder childrenBuilder = ImmutableList.builder(); - childrenBuilder.add(new Field("k0", FieldType.nullable(new ArrowType.Bool()), null)); - childrenBuilder.add(new Field("k1", FieldType.nullable(new ArrowType.Int(8, true)), null)); - childrenBuilder.add(new Field("k2", FieldType.nullable(new ArrowType.Int(16, true)), null)); - childrenBuilder.add(new Field("k3", FieldType.nullable(new ArrowType.Int(32, true)), null)); - childrenBuilder.add(new Field("k4", FieldType.nullable(new ArrowType.Int(64, true)), null)); - childrenBuilder.add(new Field("k9", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null)); - childrenBuilder.add(new Field("k8", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null)); - childrenBuilder.add(new Field("k10", FieldType.nullable(new ArrowType.Utf8()), null)); - childrenBuilder.add(new Field("k11", FieldType.nullable(new ArrowType.Utf8()), null)); - childrenBuilder.add(new Field("k5", FieldType.nullable(new ArrowType.Decimal(9,2)), null)); - childrenBuilder.add(new Field("k6", FieldType.nullable(new ArrowType.Utf8()), null)); - - VectorSchemaRoot root = VectorSchemaRoot.create( - new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null), - new RootAllocator(Integer.MAX_VALUE)); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - outputStream); - - arrowStreamWriter.start(); - root.setRowCount(3); - - FieldVector vector = root.getVector("k0"); - BitVector bitVector = (BitVector)vector; - bitVector.setInitialCapacity(3); - bitVector.allocateNew(3); - bitVector.setSafe(0, 1); - bitVector.setSafe(1, 0); - bitVector.setSafe(2, 1); - vector.setValueCount(3); - - vector = root.getVector("k1"); - TinyIntVector tinyIntVector = (TinyIntVector)vector; - tinyIntVector.setInitialCapacity(3); - tinyIntVector.allocateNew(3); - tinyIntVector.setSafe(0, 1); - tinyIntVector.setSafe(1, 2); - tinyIntVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k2"); - SmallIntVector smallIntVector = (SmallIntVector)vector; - smallIntVector.setInitialCapacity(3); - smallIntVector.allocateNew(3); - smallIntVector.setSafe(0, 1); - smallIntVector.setSafe(1, 2); - smallIntVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k3"); - IntVector intVector = (IntVector)vector; - intVector.setInitialCapacity(3); - intVector.allocateNew(3); - intVector.setSafe(0, 1); - intVector.setNull(1); - intVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k4"); - BigIntVector bigIntVector = (BigIntVector)vector; - bigIntVector.setInitialCapacity(3); - bigIntVector.allocateNew(3); - bigIntVector.setSafe(0, 1); - bigIntVector.setSafe(1, 2); - bigIntVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k5"); - DecimalVector decimalVector = (DecimalVector)vector; - decimalVector.setInitialCapacity(3); - decimalVector.allocateNew(); - decimalVector.setIndexDefined(0); - decimalVector.setSafe(0, new BigDecimal("12.34")); - decimalVector.setIndexDefined(1); - decimalVector.setSafe(1, new BigDecimal("88.88")); - decimalVector.setIndexDefined(2); - decimalVector.setSafe(2, new BigDecimal("10.22")); - vector.setValueCount(3); - - vector = root.getVector("k6"); - VarCharVector charVector = (VarCharVector)vector; - charVector.setInitialCapacity(3); - charVector.allocateNew(); - charVector.setIndexDefined(0); - charVector.setValueLengthSafe(0, 5); - charVector.setSafe(0, "char1".getBytes()); - charVector.setIndexDefined(1); - charVector.setValueLengthSafe(1, 5); - charVector.setSafe(1, "char2".getBytes()); - charVector.setIndexDefined(2); - charVector.setValueLengthSafe(2, 5); - charVector.setSafe(2, "char3".getBytes()); - vector.setValueCount(3); - - vector = root.getVector("k8"); - Float8Vector float8Vector = (Float8Vector)vector; - float8Vector.setInitialCapacity(3); - float8Vector.allocateNew(3); - float8Vector.setSafe(0, 1.1); - float8Vector.setSafe(1, 2.2); - float8Vector.setSafe(2, 3.3); - vector.setValueCount(3); - - vector = root.getVector("k9"); - Float4Vector float4Vector = (Float4Vector)vector; - float4Vector.setInitialCapacity(3); - float4Vector.allocateNew(3); - float4Vector.setSafe(0, 1.1f); - float4Vector.setSafe(1, 2.2f); - float4Vector.setSafe(2, 3.3f); - vector.setValueCount(3); - - vector = root.getVector("k10"); - VarCharVector datecharVector = (VarCharVector)vector; - datecharVector.setInitialCapacity(3); - datecharVector.allocateNew(); - datecharVector.setIndexDefined(0); - datecharVector.setValueLengthSafe(0, 5); - datecharVector.setSafe(0, "2008-08-08".getBytes()); - datecharVector.setIndexDefined(1); - datecharVector.setValueLengthSafe(1, 5); - datecharVector.setSafe(1, "1900-08-08".getBytes()); - datecharVector.setIndexDefined(2); - datecharVector.setValueLengthSafe(2, 5); - datecharVector.setSafe(2, "2100-08-08".getBytes()); - vector.setValueCount(3); - - vector = root.getVector("k11"); - VarCharVector timecharVector = (VarCharVector)vector; - timecharVector.setInitialCapacity(3); - timecharVector.allocateNew(); - timecharVector.setIndexDefined(0); - timecharVector.setValueLengthSafe(0, 5); - timecharVector.setSafe(0, "2008-08-08 00:00:00".getBytes()); - timecharVector.setIndexDefined(1); - timecharVector.setValueLengthSafe(1, 5); - timecharVector.setSafe(1, "1900-08-08 00:00:00".getBytes()); - timecharVector.setIndexDefined(2); - timecharVector.setValueLengthSafe(2, 5); - timecharVector.setSafe(2, "2100-08-08 00:00:00".getBytes()); - vector.setValueCount(3); - - arrowStreamWriter.writeBatch(); - - arrowStreamWriter.end(); - arrowStreamWriter.close(); - - TStatus status = new TStatus(); - status.setStatusCode(TStatusCode.OK); - TScanBatchResult scanBatchResult = new TScanBatchResult(); - scanBatchResult.setStatus(status); - scanBatchResult.setEos(false); - scanBatchResult.setRows(outputStream.toByteArray()); - - String schemaStr = "{\"properties\":[{\"type\":\"BOOLEAN\",\"name\":\"k0\",\"comment\":\"\"}," - + "{\"type\":\"TINYINT\",\"name\":\"k1\",\"comment\":\"\"},{\"type\":\"SMALLINT\",\"name\":\"k2\"," - + "\"comment\":\"\"},{\"type\":\"INT\",\"name\":\"k3\",\"comment\":\"\"},{\"type\":\"BIGINT\"," - + "\"name\":\"k4\",\"comment\":\"\"},{\"type\":\"FLOAT\",\"name\":\"k9\",\"comment\":\"\"}," - + "{\"type\":\"DOUBLE\",\"name\":\"k8\",\"comment\":\"\"},{\"type\":\"DATE\",\"name\":\"k10\"," - + "\"comment\":\"\"},{\"type\":\"DATETIME\",\"name\":\"k11\",\"comment\":\"\"}," - + "{\"name\":\"k5\",\"scale\":\"0\",\"comment\":\"\"," - + "\"type\":\"DECIMAL\",\"precision\":\"9\",\"aggregation_type\":\"\"},{\"type\":\"CHAR\",\"name\":\"k6\",\"comment\":\"\",\"aggregation_type\":\"REPLACE_IF_NOT_NULL\"}]," - + "\"status\":200}"; - - Schema schema = RestService.parseSchema(schemaStr, logger); - - RowBatch rowBatch = new RowBatch(scanBatchResult, schema).readArrow(); - - List expectedRow1 = Lists.newArrayList( - Boolean.TRUE, - (byte) 1, - (short) 1, - 1, - 1L, - (float) 1.1, - (double) 1.1, - StringData.fromString("2008-08-08"), - StringData.fromString("2008-08-08 00:00:00"), - DecimalData.fromBigDecimal(new BigDecimal(12.34), 4, 2), - StringData.fromString("char1") - ); - - List expectedRow2 = Arrays.asList( - Boolean.FALSE, - (byte) 2, - (short) 2, - null, - 2L, - (float) 2.2, - (double) 2.2, - StringData.fromString("1900-08-08"), - StringData.fromString("1900-08-08 00:00:00"), - DecimalData.fromBigDecimal(new BigDecimal(88.88), 4, 2), - StringData.fromString("char2") - ); - - List expectedRow3 = Arrays.asList( - Boolean.TRUE, - (byte) 3, - (short) 3, - 3, - 3L, - (float) 3.3, - (double) 3.3, - StringData.fromString("2100-08-08"), - StringData.fromString("2100-08-08 00:00:00"), - DecimalData.fromBigDecimal(new BigDecimal(10.22), 4, 2), - StringData.fromString("char3") - ); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow1 = rowBatch.next(); - Assert.assertEquals(expectedRow1, actualRow1); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow2 = rowBatch.next(); - Assert.assertEquals(expectedRow2, actualRow2); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow3 = rowBatch.next(); - Assert.assertEquals(expectedRow3, actualRow3); - - Assert.assertFalse(rowBatch.hasNext()); - thrown.expect(NoSuchElementException.class); - thrown.expectMessage(startsWith("Get row offset:")); - rowBatch.next(); - } - - @Test - public void testBinary() throws Exception { - byte[] binaryRow0 = {'a', 'b', 'c'}; - byte[] binaryRow1 = {'d', 'e', 'f'}; - byte[] binaryRow2 = {'g', 'h', 'i'}; - - ImmutableList.Builder childrenBuilder = ImmutableList.builder(); - childrenBuilder.add(new Field("k7", FieldType.nullable(new ArrowType.Binary()), null)); - - VectorSchemaRoot root = VectorSchemaRoot.create( - new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null), - new RootAllocator(Integer.MAX_VALUE)); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - outputStream); - - arrowStreamWriter.start(); - root.setRowCount(3); - - FieldVector vector = root.getVector("k7"); - VarBinaryVector varBinaryVector = (VarBinaryVector) vector; - varBinaryVector.setInitialCapacity(3); - varBinaryVector.allocateNew(); - varBinaryVector.setIndexDefined(0); - varBinaryVector.setValueLengthSafe(0, 3); - varBinaryVector.setSafe(0, binaryRow0); - varBinaryVector.setIndexDefined(1); - varBinaryVector.setValueLengthSafe(1, 3); - varBinaryVector.setSafe(1, binaryRow1); - varBinaryVector.setIndexDefined(2); - varBinaryVector.setValueLengthSafe(2, 3); - varBinaryVector.setSafe(2, binaryRow2); - vector.setValueCount(3); - - arrowStreamWriter.writeBatch(); - - arrowStreamWriter.end(); - arrowStreamWriter.close(); - - TStatus status = new TStatus(); - status.setStatusCode(TStatusCode.OK); - TScanBatchResult scanBatchResult = new TScanBatchResult(); - scanBatchResult.setStatus(status); - scanBatchResult.setEos(false); - scanBatchResult.setRows(outputStream.toByteArray()); - - String schemaStr = "{\"properties\":[{\"type\":\"BINARY\",\"name\":\"k7\",\"comment\":\"\"}], \"status\":200}"; - - Schema schema = RestService.parseSchema(schemaStr, logger); - - RowBatch rowBatch = new RowBatch(scanBatchResult, schema).readArrow(); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow0 = rowBatch.next(); - Assert.assertArrayEquals(binaryRow0, (byte[])actualRow0.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow1 = rowBatch.next(); - Assert.assertArrayEquals(binaryRow1, (byte[])actualRow1.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow2 = rowBatch.next(); - Assert.assertArrayEquals(binaryRow2, (byte[])actualRow2.get(0)); - - Assert.assertFalse(rowBatch.hasNext()); - thrown.expect(NoSuchElementException.class); - thrown.expectMessage(startsWith("Get row offset:")); - rowBatch.next(); - } - - @Test - public void testDecimalV2() throws Exception { - ImmutableList.Builder childrenBuilder = ImmutableList.builder(); - childrenBuilder.add(new Field("k7", FieldType.nullable(new ArrowType.Decimal(27, 9)), null)); - - VectorSchemaRoot root = VectorSchemaRoot.create( - new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null), - new RootAllocator(Integer.MAX_VALUE)); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - outputStream); - - arrowStreamWriter.start(); - root.setRowCount(3); - - FieldVector vector = root.getVector("k7"); - DecimalVector decimalVector = (DecimalVector) vector; - decimalVector.setInitialCapacity(3); - decimalVector.allocateNew(3); - decimalVector.setSafe(0, new BigDecimal("12.340000000")); - decimalVector.setSafe(1, new BigDecimal("88.880000000")); - decimalVector.setSafe(2, new BigDecimal("10.000000000")); - vector.setValueCount(3); - - arrowStreamWriter.writeBatch(); - - arrowStreamWriter.end(); - arrowStreamWriter.close(); - - TStatus status = new TStatus(); - status.setStatusCode(TStatusCode.OK); - TScanBatchResult scanBatchResult = new TScanBatchResult(); - scanBatchResult.setStatus(status); - scanBatchResult.setEos(false); - scanBatchResult.setRows(outputStream.toByteArray()); - - String schemaStr = "{\"properties\":[{\"type\":\"DECIMALV2\",\"scale\": 0," - + "\"precision\": 9, \"name\":\"k7\",\"comment\":\"\"}], " - + "\"status\":200}"; - - Schema schema = RestService.parseSchema(schemaStr, logger); - - RowBatch rowBatch = new RowBatch(scanBatchResult, schema).readArrow(); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow0 = rowBatch.next(); - Assert.assertEquals(DecimalData.fromBigDecimal(new BigDecimal(12.340000000), 11, 9), actualRow0.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow1 = rowBatch.next(); - - Assert.assertEquals(DecimalData.fromBigDecimal(new BigDecimal(88.880000000), 11, 9), actualRow1.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow2 = rowBatch.next(); - Assert.assertEquals(DecimalData.fromBigDecimal(new BigDecimal(10.000000000),11, 9), actualRow2.get(0)); - - Assert.assertFalse(rowBatch.hasNext()); - thrown.expect(NoSuchElementException.class); - thrown.expectMessage(startsWith("Get row offset:")); - rowBatch.next(); - } -} diff --git a/extension/spark-doris-connector/build.sh b/extension/spark-doris-connector/build.sh deleted file mode 100755 index e6d47832f5..0000000000 --- a/extension/spark-doris-connector/build.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to compile Spark-Doris-Connector -# Usage: -# sh build.sh -# -############################################################## - -set -eo pipefail - -usage() { - echo " - Usage: - $0 spark_version scala_version - e.g.: - $0 2.3.4 2.11 - $0 3.1.2 2.12 - " - exit 1 -} - -if [ $# -ne 2 ]; then - usage -fi - -ROOT=$(dirname "$0") -ROOT=$(cd "$ROOT"; pwd) - -export DORIS_HOME=${ROOT}/../../ -export PATH=${DORIS_THIRDPARTY}/installed/bin:$PATH - -. "${DORIS_HOME}"/env.sh - -# include custom environment variables -if [[ -f ${DORIS_HOME}/custom_env.sh ]]; then - . "${DORIS_HOME}"/custom_env.sh -fi - -# check maven -MVN_CMD=mvn - -if [[ -n ${CUSTOM_MVN} ]]; then - MVN_CMD=${CUSTOM_MVN} -fi -if ! ${MVN_CMD} --version; then - echo "Error: mvn is not found" - exit 1 -fi -export MVN_CMD - -rm -rf output/ - -${MVN_CMD} clean package -Dscala.version=$2 -Dspark.version=$1 - -mkdir -p output/ -cp target/doris-spark-*.jar ./output/ - -echo "*****************************************" -echo "Successfully build Spark-Doris-Connector" -echo "*****************************************" - -exit 0 diff --git a/extension/spark-doris-connector/pom.xml b/extension/spark-doris-connector/pom.xml deleted file mode 100644 index cfa2c5d4c9..0000000000 --- a/extension/spark-doris-connector/pom.xml +++ /dev/null @@ -1,375 +0,0 @@ - - - - 4.0.0 - - org.apache - apache - 23 - - org.apache.doris - doris-spark-connector - ${spark.version}-${scala.version}-1.0.0-SNAPSHOT - Doris Spark Connector - https://doris.apache.org/ - - - Apache 2.0 License - https://www.apache.org/licenses/LICENSE-2.0.html - repo - - - - scm:git:https://git@github.com/apache/incubator-doris.git - scm:git:https://git@github.com/apache/incubator-doris.git - scm:git:https://git@github.com/apache/incubator-doris.git - HEAD - - - GitHub - https://github.com/apache/incubator-doris/issues - - - - Dev Mailing List - dev@doris.apache.org - dev-subscribe@doris.apache.org - dev-unsubscribe@doris.apache.org - - - Commits Mailing List - commits@doris.apache.org - commits-subscribe@doris.apache.org - commits-unsubscribe@doris.apache.org - - - - ${env.scala.version} - ${env.spark.version} - 0.13.0 - 5.0.0 - 3.8.1 - 3.3.0 - 3.2.1 - UTF-8 - ${env.DORIS_THIRDPARTY} - github - - - - - custom-env - - - env.CUSTOM_MAVEN_REPO - - - - - custom-nexus - ${env.CUSTOM_MAVEN_REPO} - - - - - custom-nexus - ${env.CUSTOM_MAVEN_REPO} - - - - - spark.version - - 2.3.4 - - - true - - - - scala.version - - 2.11 - - - true - - - - - general-env - - - !env.CUSTOM_MAVEN_REPO - - - - - central - central maven repo https - https://repo.maven.apache.org/maven2 - - - - - - - org.apache.spark - spark-core_${scala.version} - ${spark.version} - provided - - - org.apache.spark - spark-sql_${scala.version} - ${spark.version} - provided - - - org.apache.thrift - libthrift - ${libthrift.version} - - - org.apache.arrow - arrow-vector - ${arrow.version} - - - org.apache.arrow - arrow-memory-netty - ${arrow.version} - runtime - - - - org.hamcrest - hamcrest-core - 1.3 - test - - - org.mockito - mockito-scala_${scala.version} - 1.4.7 - - - hamcrest-core - org.hamcrest - - - test - - - junit - junit - 4.11 - - - hamcrest-core - org.hamcrest - - - test - - - org.apache.spark - spark-sql-kafka-0-10_${scala.version} - ${spark.version} - test - - - com.fasterxml.jackson.core - jackson-databind - 2.10.0 - - - com.fasterxml.jackson.core - jackson-core - 2.10.0 - - - io.netty - netty-all - 4.1.27.Final - provided - - - - - - - org.codehaus.mojo - build-helper-maven-plugin - 3.2.0 - - - add-source - generate-sources - - add-source - - - - - ${project.build.directory}/generated-sources/thrift/ - - - - - - - org.apache.thrift.tools - maven-thrift-plugin - 0.1.11 - - ${doris.thirdparty}/installed/bin/thrift - java:fullcamel - - - - thrift-sources - generate-sources - - compile - - - - - - net.alchim31.maven - scala-maven-plugin - 3.2.1 - - - scala-compile-first - process-resources - - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - - -feature - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.2.1 - - - - com.google.code.findbugs:* - org.slf4j:* - - - - - org.apache.arrow - org.apache.doris.shaded.org.apache.arrow - - - io.netty - org.apache.doris.shaded.io.netty - - - com.fasterxml.jackson - org.apache.doris.shaded.com.fasterxml.jackson - - - org.apache.commons.codec - org.apache.doris.shaded.org.apache.commons.codec - - - com.google.flatbuffers - org.apache.doris.shaded.com.google.flatbuffers - - - org.apache.thrift - org.apache.doris.shaded.org.apache.thrift - - - - - - package - - shade - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - - 8 - 8 - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - true - 8 - false - - - - attach-javadocs - - jar - - - - - - org.apache.maven.plugins - maven-source-plugin - ${maven-source-plugin.version} - - true - - - - compile - - jar - - - - - - - diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/CachedDorisStreamLoadClient.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/CachedDorisStreamLoadClient.java deleted file mode 100644 index 01cada43d1..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/CachedDorisStreamLoadClient.java +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark; - -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; -import org.apache.doris.spark.cfg.SparkSettings; -import org.apache.doris.spark.exception.DorisException; - -import java.io.IOException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; - -/** - * a cached streamload client for each partition - */ -public class CachedDorisStreamLoadClient { - private static final long cacheExpireTimeout = 30 * 60; - private static LoadingCache dorisStreamLoadLoadingCache; - - static { - dorisStreamLoadLoadingCache = CacheBuilder.newBuilder() - .expireAfterWrite(cacheExpireTimeout, TimeUnit.SECONDS) - .removalListener(new RemovalListener() { - @Override - public void onRemoval(RemovalNotification removalNotification) { - //do nothing - } - }) - .build( - new CacheLoader() { - @Override - public DorisStreamLoad load(SparkSettings sparkSettings) throws IOException, DorisException { - DorisStreamLoad dorisStreamLoad = new DorisStreamLoad(sparkSettings); - return dorisStreamLoad; - } - } - ); - } - - public static DorisStreamLoad getOrCreate(SparkSettings settings) throws ExecutionException { - DorisStreamLoad dorisStreamLoad = dorisStreamLoadLoadingCache.get(settings); - return dorisStreamLoad; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/DorisStreamLoad.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/DorisStreamLoad.java deleted file mode 100644 index ec3892d151..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/DorisStreamLoad.java +++ /dev/null @@ -1,225 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.doris.spark.cfg.ConfigurationOptions; -import org.apache.doris.spark.cfg.SparkSettings; -import org.apache.doris.spark.exception.DorisException; -import org.apache.doris.spark.exception.StreamLoadException; -import org.apache.doris.spark.rest.RestService; -import org.apache.doris.spark.rest.models.RespContent; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Serializable; -import java.net.HttpURLConnection; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Calendar; -import java.util.List; -import java.util.StringJoiner; -import java.util.UUID; - -/** - * DorisStreamLoad - **/ -public class DorisStreamLoad implements Serializable{ - public static final String FIELD_DELIMITER = "\t"; - public static final String LINE_DELIMITER = "\n"; - public static final String NULL_VALUE = "\\N"; - - private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoad.class); - - private final static List DORIS_SUCCESS_STATUS = new ArrayList<>(Arrays.asList("Success", "Publish Timeout")); - private static String loadUrlPattern = "http://%s/api/%s/%s/_stream_load?"; - private String user; - private String passwd; - private String loadUrlStr; - private String hostPort; - private String db; - private String tbl; - private String authEncoding; - private String columns; - - public DorisStreamLoad(String hostPort, String db, String tbl, String user, String passwd) { - this.hostPort = hostPort; - this.db = db; - this.tbl = tbl; - this.user = user; - this.passwd = passwd; - this.loadUrlStr = String.format(loadUrlPattern, hostPort, db, tbl); - this.authEncoding = Base64.getEncoder().encodeToString(String.format("%s:%s", user, passwd).getBytes(StandardCharsets.UTF_8)); - } - - public DorisStreamLoad(SparkSettings settings) throws IOException, DorisException { - String hostPort = RestService.randomBackendV2(settings, LOG); - this.hostPort = hostPort; - String[] dbTable = settings.getProperty(ConfigurationOptions.DORIS_TABLE_IDENTIFIER).split("\\."); - this.db = dbTable[0]; - this.tbl = dbTable[1]; - this.user = settings.getProperty(ConfigurationOptions.DORIS_REQUEST_AUTH_USER); - this.passwd = settings.getProperty(ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD); - this.loadUrlStr = String.format(loadUrlPattern, hostPort, db, tbl); - this.authEncoding = Base64.getEncoder().encodeToString(String.format("%s:%s", user, passwd).getBytes(StandardCharsets.UTF_8)); - this.columns = settings.getProperty(ConfigurationOptions.DORIS_WRITE_FIELDS); - } - - public String getLoadUrlStr() { - return loadUrlStr; - } - - public String getHostPort() { - return hostPort; - } - - public void setHostPort(String hostPort) { - this.hostPort = hostPort; - this.loadUrlStr = String.format(loadUrlPattern, hostPort, this.db, this.tbl); - } - - - private HttpURLConnection getConnection(String urlStr, String label) throws IOException { - URL url = new URL(urlStr); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setInstanceFollowRedirects(false); - conn.setRequestMethod("PUT"); - conn.setRequestProperty("Authorization", "Basic " + authEncoding); - conn.addRequestProperty("Expect", "100-continue"); - conn.addRequestProperty("Content-Type", "text/plain; charset=UTF-8"); - conn.addRequestProperty("label", label); - if (columns != null && !columns.equals("")) { - conn.addRequestProperty("columns", columns); - } - conn.setDoOutput(true); - conn.setDoInput(true); - return conn; - } - - public static class LoadResponse { - public int status; - public String respMsg; - public String respContent; - - public LoadResponse(int status, String respMsg, String respContent) { - this.status = status; - this.respMsg = respMsg; - this.respContent = respContent; - } - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("status: ").append(status); - sb.append(", resp msg: ").append(respMsg); - sb.append(", resp content: ").append(respContent); - return sb.toString(); - } - } - - public String listToString(List> rows){ - StringJoiner lines = new StringJoiner(LINE_DELIMITER); - for (List row : rows) { - StringJoiner line = new StringJoiner(FIELD_DELIMITER); - for (Object field : row) { - if (field == null) { - line.add(NULL_VALUE); - } else { - line.add(field.toString()); - } - } - lines.add(line.toString()); - } - return lines.toString(); - } - - - public void load(List> rows) throws StreamLoadException { - String records = listToString(rows); - load(records); - } - public void load(String value) throws StreamLoadException { - LOG.debug("Streamload Request:{} ,Body:{}", loadUrlStr, value); - LoadResponse loadResponse = loadBatch(value); - if(loadResponse.status != 200){ - throw new StreamLoadException("stream load error: " + loadResponse.respContent); - }else{ - LOG.info("Streamload Response:{}",loadResponse); - ObjectMapper obj = new ObjectMapper(); - try { - RespContent respContent = obj.readValue(loadResponse.respContent, RespContent.class); - if(!DORIS_SUCCESS_STATUS.contains(respContent.getStatus())){ - throw new StreamLoadException("stream load error: " + respContent.getMessage()); - } - } catch (IOException e) { - throw new StreamLoadException(e); - } - } - } - - private LoadResponse loadBatch(String value) { - Calendar calendar = Calendar.getInstance(); - String label = String.format("spark_streamload_%s%02d%02d_%02d%02d%02d_%s", - calendar.get(Calendar.YEAR), calendar.get(Calendar.MONTH) + 1, calendar.get(Calendar.DAY_OF_MONTH), - calendar.get(Calendar.HOUR_OF_DAY), calendar.get(Calendar.MINUTE), calendar.get(Calendar.SECOND), - UUID.randomUUID().toString().replaceAll("-", "")); - - HttpURLConnection feConn = null; - HttpURLConnection beConn = null; - int status = -1; - try { - // build request and send to new be location - beConn = getConnection(loadUrlStr, label); - // send data to be - BufferedOutputStream bos = new BufferedOutputStream(beConn.getOutputStream()); - bos.write(value.getBytes()); - bos.close(); - - // get respond - status = beConn.getResponseCode(); - String respMsg = beConn.getResponseMessage(); - InputStream stream = (InputStream) beConn.getContent(); - BufferedReader br = new BufferedReader(new InputStreamReader(stream)); - StringBuilder response = new StringBuilder(); - String line; - while ((line = br.readLine()) != null) { - response.append(line); - } - return new LoadResponse(status, respMsg, response.toString()); - - } catch (Exception e) { - e.printStackTrace(); - String err = "http request exception,load url : "+loadUrlStr+",failed to execute spark streamload with label: " + label; - LOG.warn(err, e); - return new LoadResponse(status, e.getMessage(), err); - } finally { - if (feConn != null) { - feConn.disconnect(); - } - if (beConn != null) { - beConn.disconnect(); - } - } - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/backend/BackendClient.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/backend/BackendClient.java deleted file mode 100644 index ad11003433..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/backend/BackendClient.java +++ /dev/null @@ -1,227 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.backend; - -import org.apache.doris.spark.cfg.ConfigurationOptions; -import org.apache.doris.spark.exception.ConnectedFailedException; -import org.apache.doris.spark.exception.DorisException; -import org.apache.doris.spark.exception.DorisInternalException; -import org.apache.doris.spark.util.ErrorMessages; -import org.apache.doris.spark.cfg.Settings; -import org.apache.doris.spark.serialization.Routing; -import org.apache.doris.thrift.TDorisExternalService; -import org.apache.doris.thrift.TScanBatchResult; -import org.apache.doris.thrift.TScanCloseParams; -import org.apache.doris.thrift.TScanCloseResult; -import org.apache.doris.thrift.TScanNextBatchParams; -import org.apache.doris.thrift.TScanOpenParams; -import org.apache.doris.thrift.TScanOpenResult; -import org.apache.doris.thrift.TStatusCode; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Client to request Doris BE - */ -public class BackendClient { - private final static Logger logger = LoggerFactory.getLogger(BackendClient.class); - - private Routing routing; - - private TDorisExternalService.Client client; - private TTransport transport; - - private boolean isConnected = false; - private final int retries; - private final int socketTimeout; - private final int connectTimeout; - - public BackendClient(Routing routing, Settings settings) throws ConnectedFailedException { - this.routing = routing; - this.connectTimeout = settings.getIntegerProperty(ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS, - ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT); - this.socketTimeout = settings.getIntegerProperty(ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS, - ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT); - this.retries = settings.getIntegerProperty(ConfigurationOptions.DORIS_REQUEST_RETRIES, - ConfigurationOptions.DORIS_REQUEST_RETRIES_DEFAULT); - logger.trace("connect timeout set to '{}'. socket timeout set to '{}'. retries set to '{}'.", - this.connectTimeout, this.socketTimeout, this.retries); - open(); - } - - private void open() throws ConnectedFailedException { - logger.debug("Open client to Doris BE '{}'.", routing); - TException ex = null; - for (int attempt = 0; !isConnected && attempt < retries; ++attempt) { - logger.debug("Attempt {} to connect {}.", attempt, routing); - TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory(); - transport = new TSocket(routing.getHost(), routing.getPort(), socketTimeout, connectTimeout); - TProtocol protocol = factory.getProtocol(transport); - client = new TDorisExternalService.Client(protocol); - try { - logger.trace("Connect status before open transport to {} is '{}'.", routing, isConnected); - if (!transport.isOpen()) { - transport.open(); - isConnected = true; - } - } catch (TTransportException e) { - logger.warn(ErrorMessages.CONNECT_FAILED_MESSAGE, routing, e); - ex = e; - } - if (isConnected) { - logger.info("Success connect to {}.", routing); - break; - } - } - if (!isConnected) { - logger.error(ErrorMessages.CONNECT_FAILED_MESSAGE, routing); - throw new ConnectedFailedException(routing.toString(), ex); - } - } - - private void close() { - logger.trace("Connect status before close with '{}' is '{}'.", routing, isConnected); - isConnected = false; - if (null != client) { - client = null; - } - if ((transport != null) && transport.isOpen()) { - transport.close(); - logger.info("Closed a connection to {}.", routing); - } - } - - /** - * Open a scanner for reading Doris data. - * @param openParams thrift struct to required by request - * @return scan open result - * @throws ConnectedFailedException throw if cannot connect to Doris BE - */ - public TScanOpenResult openScanner(TScanOpenParams openParams) throws ConnectedFailedException { - logger.debug("OpenScanner to '{}', parameter is '{}'.", routing, openParams); - if (!isConnected) { - open(); - } - TException ex = null; - for (int attempt = 0; attempt < retries; ++attempt) { - logger.debug("Attempt {} to openScanner {}.", attempt, routing); - try { - TScanOpenResult result = client.openScanner(openParams); - if (result == null) { - logger.warn("Open scanner result from {} is null.", routing); - continue; - } - if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) { - logger.warn("The status of open scanner result from {} is '{}', error message is: {}.", - routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs()); - continue; - } - return result; - } catch (TException e) { - logger.warn("Open scanner from {} failed.", routing, e); - ex = e; - } - } - logger.error(ErrorMessages.CONNECT_FAILED_MESSAGE, routing); - throw new ConnectedFailedException(routing.toString(), ex); - } - - /** - * get next row batch from Doris BE - * @param nextBatchParams thrift struct to required by request - * @return scan batch result - * @throws ConnectedFailedException throw if cannot connect to Doris BE - */ - public TScanBatchResult getNext(TScanNextBatchParams nextBatchParams) throws DorisException { - logger.debug("GetNext to '{}', parameter is '{}'.", routing, nextBatchParams); - if (!isConnected) { - open(); - } - TException ex = null; - TScanBatchResult result = null; - for (int attempt = 0; attempt < retries; ++attempt) { - logger.debug("Attempt {} to getNext {}.", attempt, routing); - try { - result = client.getNext(nextBatchParams); - if (result == null) { - logger.warn("GetNext result from {} is null.", routing); - continue; - } - if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) { - logger.warn("The status of get next result from {} is '{}', error message is: {}.", - routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs()); - continue; - } - return result; - } catch (TException e) { - logger.warn("Get next from {} failed.", routing, e); - ex = e; - } - } - if (result != null && (TStatusCode.OK != (result.getStatus().getStatusCode()))) { - logger.error(ErrorMessages.DORIS_INTERNAL_FAIL_MESSAGE, routing, result.getStatus().getStatusCode(), - result.getStatus().getErrorMsgs()); - throw new DorisInternalException(routing.toString(), result.getStatus().getStatusCode(), - result.getStatus().getErrorMsgs()); - } - logger.error(ErrorMessages.CONNECT_FAILED_MESSAGE, routing); - throw new ConnectedFailedException(routing.toString(), ex); - } - - /** - * close an scanner. - * @param closeParams thrift struct to required by request - */ - public void closeScanner(TScanCloseParams closeParams) { - logger.debug("CloseScanner to '{}', parameter is '{}'.", routing, closeParams); - if (!isConnected) { - try { - open(); - } catch (ConnectedFailedException e) { - logger.warn("Cannot connect to Doris BE {} when close scanner.", routing); - return; - } - } - for (int attempt = 0; attempt < retries; ++attempt) { - logger.debug("Attempt {} to closeScanner {}.", attempt, routing); - try { - TScanCloseResult result = client.closeScanner(closeParams); - if (result == null) { - logger.warn("CloseScanner result from {} is null.", routing); - continue; - } - if (!TStatusCode.OK.equals(result.getStatus().getStatusCode())) { - logger.warn("The status of get next result from {} is '{}', error message is: {}.", - routing, result.getStatus().getStatusCode(), result.getStatus().getErrorMsgs()); - continue; - } - break; - } catch (TException e) { - logger.warn("Close scanner from {} failed.", routing, e); - } - } - logger.info("CloseScanner to Doris BE '{}' success.", routing); - close(); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java deleted file mode 100644 index 9a0cead47a..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.cfg; - -public interface ConfigurationOptions { - // doris fe node address - String DORIS_FENODES = "doris.fenodes"; - - String DORIS_DEFAULT_CLUSTER = "default_cluster"; - - String TABLE_IDENTIFIER = "table.identifier"; - String DORIS_TABLE_IDENTIFIER = "doris.table.identifier"; - String DORIS_READ_FIELD = "doris.read.field"; - String DORIS_FILTER_QUERY = "doris.filter.query"; - String DORIS_FILTER_QUERY_IN_MAX_COUNT = "doris.filter.query.in.max.count"; - int DORIS_FILTER_QUERY_IN_VALUE_UPPER_LIMIT = 10000; - - String DORIS_USER = "doris.user"; - String DORIS_REQUEST_AUTH_USER = "doris.request.auth.user"; - // use password to save doris.request.auth.password - // reuse credentials mask method in spark ExternalCatalogUtils#maskCredentials - String DORIS_PASSWORD = "doris.password"; - String DORIS_REQUEST_AUTH_PASSWORD = "doris.request.auth.password"; - - String DORIS_REQUEST_RETRIES = "doris.request.retries"; - String DORIS_REQUEST_CONNECT_TIMEOUT_MS = "doris.request.connect.timeout.ms"; - String DORIS_REQUEST_READ_TIMEOUT_MS = "doris.request.read.timeout.ms"; - String DORIS_REQUEST_QUERY_TIMEOUT_S = "doris.request.query.timeout.s"; - int DORIS_REQUEST_RETRIES_DEFAULT = 3; - int DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT = 30 * 1000; - int DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT = 30 * 1000; - int DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT = 3600; - - String DORIS_TABLET_SIZE = "doris.request.tablet.size"; - int DORIS_TABLET_SIZE_DEFAULT = Integer.MAX_VALUE; - int DORIS_TABLET_SIZE_MIN = 1; - - String DORIS_BATCH_SIZE = "doris.batch.size"; - int DORIS_BATCH_SIZE_DEFAULT = 1024; - - String DORIS_EXEC_MEM_LIMIT = "doris.exec.mem.limit"; - long DORIS_EXEC_MEM_LIMIT_DEFAULT = 2147483648L; - - String DORIS_VALUE_READER_CLASS = "doris.value.reader.class"; - - String DORIS_DESERIALIZE_ARROW_ASYNC = "doris.deserialize.arrow.async"; - boolean DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT = false; - - String DORIS_DESERIALIZE_QUEUE_SIZE = "doris.deserialize.queue.size"; - int DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT = 64; - - String DORIS_WRITE_FIELDS = "doris.write.fields"; - - String DORIS_SINK_BATCH_SIZE = "doris.sink.batch.size"; - int SINK_BATCH_SIZE_DEFAULT = 10000; - - String DORIS_SINK_MAX_RETRIES = "doris.sink.max-retries"; - int SINK_MAX_RETRIES_DEFAULT = 1; -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/PropertiesSettings.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/PropertiesSettings.java deleted file mode 100644 index cb02766775..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/PropertiesSettings.java +++ /dev/null @@ -1,56 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.cfg; - -import java.util.Properties; - -import com.google.common.base.Preconditions; - -public class PropertiesSettings extends Settings { - - protected final Properties props; - - public PropertiesSettings() { - this(new Properties()); - } - - public PropertiesSettings(Properties props) { - Preconditions.checkArgument(props != null, "non-null props configuration expected."); - this.props = props; - } - - @Override - public String getProperty(String name) { - return props.getProperty(name); - } - - @Override - public void setProperty(String name, String value) { - props.setProperty(name, value); - } - - @Override - public Settings copy() { - return new PropertiesSettings((Properties) props.clone()); - } - - @Override - public Properties asProperties() { - return props; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/Settings.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/Settings.java deleted file mode 100644 index 23f0cd7ed2..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/Settings.java +++ /dev/null @@ -1,114 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.cfg; - -import java.util.Enumeration; -import java.util.Map; -import java.util.Properties; - -import org.apache.commons.lang3.StringUtils; -import org.apache.doris.spark.exception.IllegalArgumentException; -import org.apache.doris.spark.util.ErrorMessages; -import org.apache.doris.spark.util.IOUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public abstract class Settings { - private final static Logger logger = LoggerFactory.getLogger(Settings.class); - - public abstract String getProperty(String name); - - public abstract void setProperty(String name, String value); - - public abstract Properties asProperties(); - - public abstract Settings copy(); - - public String getProperty(String name, String defaultValue) { - String value = getProperty(name); - if (StringUtils.isEmpty(value)) { - return defaultValue; - } - return value; - } - - public int getIntegerProperty(String name, int defaultValue) { - try { - if (getProperty(name) != null) { - return Integer.parseInt(getProperty(name)); - } - } catch (NumberFormatException e) { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, name, getProperty(name)); - } - return defaultValue; - } - - public Settings merge(Properties properties) { - if (properties == null || properties.isEmpty()) { - return this; - } - - Enumeration propertyNames = properties.propertyNames(); - - for (; propertyNames.hasMoreElements();) { - Object prop = propertyNames.nextElement(); - if (prop instanceof String) { - Object value = properties.get(prop); - setProperty((String) prop, value.toString()); - } - } - - return this; - } - - public Settings merge(Map map) { - if (map == null || map.isEmpty()) { - return this; - } - - for (Map.Entry entry : map.entrySet()) { - setProperty(entry.getKey(), entry.getValue()); - } - - return this; - } - - public Settings load(String source) throws IllegalArgumentException { - Properties copy = IOUtils.propsFromString(source); - merge(copy); - return this; - } - - public String save() throws IllegalArgumentException { - Properties copy = asProperties(); - return IOUtils.propsToString(copy); - } - - @Override - public int hashCode() { - return asProperties().hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - return asProperties().equals(((Settings) obj).asProperties()); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/SparkSettings.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/SparkSettings.java deleted file mode 100644 index 39fcd75b9c..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/SparkSettings.java +++ /dev/null @@ -1,77 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.cfg; - -import java.util.Properties; - -import org.apache.spark.SparkConf; - -import com.google.common.base.Preconditions; - -import scala.Option; -import scala.Serializable; -import scala.Tuple2; - -public class SparkSettings extends Settings implements Serializable { - - private final SparkConf cfg; - - public SparkSettings(SparkConf cfg) { - Preconditions.checkArgument(cfg != null, "non-null spark configuration expected."); - this.cfg = cfg; - } - - public SparkSettings copy() { - return new SparkSettings(cfg.clone()); - } - - public String getProperty(String name) { - Option op = cfg.getOption(name); - if (!op.isDefined()) { - op = cfg.getOption("spark." + name); - } - return (op.isDefined() ? op.get() : null); - } - - public void setProperty(String name, String value) { - cfg.set(name, value); - } - - public Properties asProperties() { - Properties props = new Properties(); - - if (cfg != null) { - String sparkPrefix = "spark."; - for (Tuple2 tuple : cfg.getAll()) { - // spark. are special so save them without the prefix as well - // since its unlikely the other implementations will be aware of this convention - String key = tuple._1; - props.setProperty(key, tuple._2); - if (key.startsWith(sparkPrefix)) { - String simpleKey = key.substring(sparkPrefix.length()); - // double check to not override a property defined directly in the config - if (!props.containsKey(simpleKey)) { - props.setProperty(simpleKey, tuple._2); - } - } - } - } - - return props; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/ConnectedFailedException.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/ConnectedFailedException.java deleted file mode 100644 index 461734ba7e..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/ConnectedFailedException.java +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.exception; - -public class ConnectedFailedException extends DorisException { - public ConnectedFailedException(String server, Throwable cause) { - super("Connect to " + server + "failed.", cause); - } - - public ConnectedFailedException(String server, int statusCode, Throwable cause) { - super("Connect to " + server + "failed, status code is " + statusCode + ".", cause); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/DorisException.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/DorisException.java deleted file mode 100644 index 6c47db0b14..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/DorisException.java +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.exception; - -public class DorisException extends Exception { - public DorisException() { - super(); - } - public DorisException(String message) { - super(message); - } - public DorisException(String message, Throwable cause) { - super(message, cause); - } - public DorisException(Throwable cause) { - super(cause); - } - protected DorisException(String message, Throwable cause, - boolean enableSuppression, - boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/DorisInternalException.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/DorisInternalException.java deleted file mode 100644 index f42aceed54..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/DorisInternalException.java +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.exception; - -import org.apache.doris.thrift.TStatusCode; - -import java.util.List; - -public class DorisInternalException extends DorisException { - public DorisInternalException(String server, TStatusCode statusCode, List errorMsgs) { - super("Doris server " + server + " internal failed, status code [" + statusCode + "] error message is " + errorMsgs); - } - -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/IllegalArgumentException.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/IllegalArgumentException.java deleted file mode 100644 index f94ce5d9ed..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/IllegalArgumentException.java +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.exception; - -public class IllegalArgumentException extends DorisException { - public IllegalArgumentException(String msg, Throwable cause) { - super(msg, cause); - } - - public IllegalArgumentException(String arg, String value) { - super("argument '" + arg + "' is illegal, value is '" + value + "'."); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/ShouldNeverHappenException.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/ShouldNeverHappenException.java deleted file mode 100644 index 6c38578420..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/ShouldNeverHappenException.java +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.exception; - -public class ShouldNeverHappenException extends DorisException { } diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/StreamLoadException.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/StreamLoadException.java deleted file mode 100644 index ec9f77f9aa..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/exception/StreamLoadException.java +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.exception; - -public class StreamLoadException extends Exception { - public StreamLoadException() { - super(); - } - public StreamLoadException(String message) { - super(message); - } - public StreamLoadException(String message, Throwable cause) { - super(message, cause); - } - public StreamLoadException(Throwable cause) { - super(cause); - } - protected StreamLoadException(String message, Throwable cause, - boolean enableSuppression, - boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/PartitionDefinition.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/PartitionDefinition.java deleted file mode 100644 index 0c2aae31fc..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/PartitionDefinition.java +++ /dev/null @@ -1,155 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest; - -import java.io.Serializable; -import java.util.Collections; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; - -import org.apache.doris.spark.cfg.PropertiesSettings; -import org.apache.doris.spark.cfg.Settings; -import org.apache.doris.spark.exception.IllegalArgumentException; - -/** - * Doris RDD partition info. - */ -public class PartitionDefinition implements Serializable, Comparable { - private final String database; - private final String table; - - private final String beAddress; - private final Set tabletIds; - private final String queryPlan; - private final String serializedSettings; - - public PartitionDefinition(String database, String table, - Settings settings, String beAddress, Set tabletIds, String queryPlan) - throws IllegalArgumentException { - if (settings != null) { - this.serializedSettings = settings.save(); - } else { - this.serializedSettings = null; - } - this.database = database; - this.table = table; - this.beAddress = beAddress; - this.tabletIds = tabletIds; - this.queryPlan = queryPlan; - } - - public String getBeAddress() { - return beAddress; - } - - public Set getTabletIds() { - return tabletIds; - } - - public String getDatabase() { - return database; - } - - public String getTable() { - return table; - } - - public String getQueryPlan() { - return queryPlan; - } - - public Settings settings() throws IllegalArgumentException { - PropertiesSettings settings = new PropertiesSettings(); - return serializedSettings != null ? settings.load(serializedSettings) : settings; - } - - public int compareTo(PartitionDefinition o) { - int cmp = database.compareTo(o.database); - if (cmp != 0) { - return cmp; - } - cmp = table.compareTo(o.table); - if (cmp != 0) { - return cmp; - } - cmp = beAddress.compareTo(o.beAddress); - if (cmp != 0) { - return cmp; - } - cmp = queryPlan.compareTo(o.queryPlan); - if (cmp != 0) { - return cmp; - } - - cmp = tabletIds.size() - o.tabletIds.size(); - if (cmp != 0) { - return cmp; - } - - Set similar = new HashSet<>(tabletIds); - Set diffSelf = new HashSet<>(tabletIds); - Set diffOther = new HashSet<>(o.tabletIds); - similar.retainAll(o.tabletIds); - diffSelf.removeAll(similar); - diffOther.removeAll(similar); - if (diffSelf.size() == 0) { - return 0; - } - long diff = Collections.min(diffSelf) - Collections.min(diffOther); - return diff < 0 ? -1 : 1; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - PartitionDefinition that = (PartitionDefinition) o; - return Objects.equals(database, that.database) && - Objects.equals(table, that.table) && - Objects.equals(beAddress, that.beAddress) && - Objects.equals(tabletIds, that.tabletIds) && - Objects.equals(queryPlan, that.queryPlan) && - Objects.equals(serializedSettings, that.serializedSettings); - } - - @Override - public int hashCode() { - int result = database.hashCode(); - result = 31 * result + table.hashCode(); - result = 31 * result + beAddress.hashCode(); - result = 31 * result + queryPlan.hashCode(); - result = 31 * result + tabletIds.hashCode(); - return result; - } - - @Override - public String toString() { - return "PartitionDefinition{" + - ", database='" + database + '\'' + - ", table='" + table + '\'' + - ", beAddress='" + beAddress + '\'' + - ", tabletIds=" + tabletIds + - ", queryPlan='" + queryPlan + '\'' + - '}'; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/RestService.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/RestService.java deleted file mode 100644 index e1f9a8104f..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/RestService.java +++ /dev/null @@ -1,631 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest; - -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_FENODES; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_FILTER_QUERY; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_READ_FIELD; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_REQUEST_AUTH_USER; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLET_SIZE; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_DEFAULT; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_MIN; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLE_IDENTIFIER; -import static org.apache.doris.spark.util.ErrorMessages.CONNECT_FAILED_MESSAGE; -import static org.apache.doris.spark.util.ErrorMessages.ILLEGAL_ARGUMENT_MESSAGE; -import static org.apache.doris.spark.util.ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE; -import static org.apache.doris.spark.util.ErrorMessages.SHOULD_NOT_HAPPEN_MESSAGE; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.io.Serializable; -import java.net.HttpURLConnection; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.util.Map; -import java.util.HashMap; -import java.util.Base64; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.ArrayList; -import java.util.Set; -import java.util.HashSet; -import java.util.stream.Collectors; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.doris.spark.cfg.ConfigurationOptions; -import org.apache.doris.spark.cfg.Settings; -import org.apache.doris.spark.cfg.SparkSettings; -import org.apache.doris.spark.exception.ConnectedFailedException; -import org.apache.doris.spark.exception.DorisException; -import org.apache.doris.spark.exception.IllegalArgumentException; -import org.apache.doris.spark.exception.ShouldNeverHappenException; -import org.apache.doris.spark.rest.models.Backend; -import org.apache.doris.spark.rest.models.BackendRow; -import org.apache.doris.spark.rest.models.BackendV2; -import org.apache.doris.spark.rest.models.QueryPlan; -import org.apache.doris.spark.rest.models.Schema; -import org.apache.doris.spark.rest.models.Tablet; -import org.apache.http.HttpStatus; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.methods.HttpRequestBase; -import org.apache.http.entity.StringEntity; -import org.codehaus.jackson.JsonParseException; -import org.codehaus.jackson.map.JsonMappingException; -import org.codehaus.jackson.map.ObjectMapper; -import org.slf4j.Logger; - -import com.google.common.annotations.VisibleForTesting; - -/** - * Service for communicate with Doris FE. - */ -public class RestService implements Serializable { - public final static int REST_RESPONSE_STATUS_OK = 200; - private static final String API_PREFIX = "/api"; - private static final String SCHEMA = "_schema"; - private static final String QUERY_PLAN = "_query_plan"; - @Deprecated - private static final String BACKENDS = "/rest/v1/system?path=//backends"; - private static final String BACKENDS_V2 = "/api/backends?is_alive=true"; - - /** - * send request to Doris FE and get response json string. - * @param cfg configuration of request - * @param request {@link HttpRequestBase} real request - * @param logger {@link Logger} - * @return Doris FE response in json string - * @throws ConnectedFailedException throw when cannot connect to Doris FE - */ - private static String send(Settings cfg, HttpRequestBase request, Logger logger) throws - ConnectedFailedException { - int connectTimeout = cfg.getIntegerProperty(ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS, - ConfigurationOptions.DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT); - int socketTimeout = cfg.getIntegerProperty(ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS, - ConfigurationOptions.DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT); - int retries = cfg.getIntegerProperty(ConfigurationOptions.DORIS_REQUEST_RETRIES, - ConfigurationOptions.DORIS_REQUEST_RETRIES_DEFAULT); - logger.trace("connect timeout set to '{}'. socket timeout set to '{}'. retries set to '{}'.", - connectTimeout, socketTimeout, retries); - - RequestConfig requestConfig = RequestConfig.custom() - .setConnectTimeout(connectTimeout) - .setSocketTimeout(socketTimeout) - .build(); - - request.setConfig(requestConfig); - String user = cfg.getProperty(DORIS_REQUEST_AUTH_USER, ""); - String password = cfg.getProperty(DORIS_REQUEST_AUTH_PASSWORD, ""); - logger.info("Send request to Doris FE '{}' with user '{}'.", request.getURI(), user); - IOException ex = null; - int statusCode = -1; - - for (int attempt = 0; attempt < retries; attempt++) { - logger.debug("Attempt {} to request {}.", attempt, request.getURI()); - try { - String response; - if (request instanceof HttpGet){ - response = getConnectionGet(request.getURI().toString(), user, password,logger); - } else { - response = getConnectionPost(request,user, password,logger); - } - if (response == null) { - logger.warn("Failed to get response from Doris FE {}, http code is {}", - request.getURI(), statusCode); - continue; - } - logger.trace("Success get response from Doris FE: {}, response is: {}.", - request.getURI(), response); - ObjectMapper mapper = new ObjectMapper(); - Map map = mapper.readValue(response, Map.class); - //Handle the problem of inconsistent data format returned by http v1 and v2 - if (map.containsKey("code") && map.containsKey("msg")) { - Object data = map.get("data"); - return mapper.writeValueAsString(data); - } else { - return response; - } - } catch (IOException e) { - ex = e; - logger.warn(CONNECT_FAILED_MESSAGE, request.getURI(), e); - } - } - - logger.error(CONNECT_FAILED_MESSAGE, request.getURI(), ex); - throw new ConnectedFailedException(request.getURI().toString(), statusCode, ex); - } - - private static String getConnectionGet(String request,String user, String passwd,Logger logger) throws IOException { - URL realUrl = new URL(request); - // open connection - HttpURLConnection connection = (HttpURLConnection)realUrl.openConnection(); - String authEncoding = Base64.getEncoder().encodeToString(String.format("%s:%s", user, passwd).getBytes(StandardCharsets.UTF_8)); - connection.setRequestProperty("Authorization", "Basic " + authEncoding); - - connection.connect(); - return parseResponse(connection,logger); - } - - private static String parseResponse(HttpURLConnection connection,Logger logger) throws IOException { - if (connection.getResponseCode() != HttpStatus.SC_OK) { - logger.warn("Failed to get response from Doris {}, http code is {}", - connection.getURL(), connection.getResponseCode()); - throw new IOException("Failed to get response from Doris"); - } - StringBuilder result = new StringBuilder(""); - BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8")); - String line; - while ((line = in.readLine()) != null) { - result.append(line); - } - if (in != null) { - in.close(); - } - return result.toString(); - } - - private static String getConnectionPost(HttpRequestBase request,String user, String passwd,Logger logger) throws IOException { - URL url = new URL(request.getURI().toString()); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setInstanceFollowRedirects(false); - conn.setRequestMethod(request.getMethod()); - String authEncoding = Base64.getEncoder().encodeToString(String.format("%s:%s", user, passwd).getBytes(StandardCharsets.UTF_8)); - conn.setRequestProperty("Authorization", "Basic " + authEncoding); - InputStream content = ((HttpPost)request).getEntity().getContent(); - String res = IOUtils.toString(content); - conn.setDoOutput(true); - conn.setDoInput(true); - PrintWriter out = new PrintWriter(conn.getOutputStream()); - // send request params - out.print(res); - // flush - out.flush(); - // read response - return parseResponse(conn,logger); - } - /** - * parse table identifier to array. - * @param tableIdentifier table identifier string - * @param logger {@link Logger} - * @return first element is db name, second element is table name - * @throws IllegalArgumentException table identifier is illegal - */ - @VisibleForTesting - static String[] parseIdentifier(String tableIdentifier, Logger logger) throws IllegalArgumentException { - logger.trace("Parse identifier '{}'.", tableIdentifier); - if (StringUtils.isEmpty(tableIdentifier)) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "table.identifier", tableIdentifier); - throw new IllegalArgumentException("table.identifier", tableIdentifier); - } - String[] identifier = tableIdentifier.split("\\."); - if (identifier.length != 2) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "table.identifier", tableIdentifier); - throw new IllegalArgumentException("table.identifier", tableIdentifier); - } - return identifier; - } - - /** - * choice a Doris FE node to request. - * @param feNodes Doris FE node list, separate be comma - * @param logger slf4j logger - * @return the chosen one Doris FE node - * @throws IllegalArgumentException fe nodes is illegal - */ - @VisibleForTesting - static String randomEndpoint(String feNodes, Logger logger) throws IllegalArgumentException { - logger.trace("Parse fenodes '{}'.", feNodes); - if (StringUtils.isEmpty(feNodes)) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "fenodes", feNodes); - throw new IllegalArgumentException("fenodes", feNodes); - } - List nodes = Arrays.asList(feNodes.split(",")); - Collections.shuffle(nodes); - return nodes.get(0).trim(); - } - - /** - * get a valid URI to connect Doris FE. - * @param cfg configuration of request - * @param logger {@link Logger} - * @return uri string - * @throws IllegalArgumentException throw when configuration is illegal - */ - @VisibleForTesting - static String getUriStr(Settings cfg, Logger logger) throws IllegalArgumentException { - String[] identifier = parseIdentifier(cfg.getProperty(DORIS_TABLE_IDENTIFIER), logger); - return "http://" + - randomEndpoint(cfg.getProperty(DORIS_FENODES), logger) + API_PREFIX + - "/" + identifier[0] + - "/" + identifier[1] + - "/"; - } - - /** - * discover Doris table schema from Doris FE. - * @param cfg configuration of request - * @param logger slf4j logger - * @return Doris table schema - * @throws DorisException throw when discover failed - */ - public static Schema getSchema(Settings cfg, Logger logger) - throws DorisException { - logger.trace("Finding schema."); - HttpGet httpGet = new HttpGet(getUriStr(cfg, logger) + SCHEMA); - String response = send(cfg, httpGet, logger); - logger.debug("Find schema response is '{}'.", response); - return parseSchema(response, logger); - } - - /** - * translate Doris FE response to inner {@link Schema} struct. - * @param response Doris FE response - * @param logger {@link Logger} - * @return inner {@link Schema} struct - * @throws DorisException throw when translate failed - */ - @VisibleForTesting - public static Schema parseSchema(String response, Logger logger) throws DorisException { - logger.trace("Parse response '{}' to schema.", response); - ObjectMapper mapper = new ObjectMapper(); - Schema schema; - try { - schema = mapper.readValue(response, Schema.class); - } catch (JsonParseException e) { - String errMsg = "Doris FE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (JsonMappingException e) { - String errMsg = "Doris FE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris FE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (schema == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - - if (schema.getStatus() != REST_RESPONSE_STATUS_OK) { - String errMsg = "Doris FE's response is not OK, status is " + schema.getStatus(); - logger.error(errMsg); - throw new DorisException(errMsg); - } - logger.debug("Parsing schema result is '{}'.", schema); - return schema; - } - - /** - * find Doris RDD partitions from Doris FE. - * @param cfg configuration of request - * @param logger {@link Logger} - * @return an list of Doris RDD partitions - * @throws DorisException throw when find partition failed - */ - public static List findPartitions(Settings cfg, Logger logger) throws DorisException { - String[] tableIdentifiers = parseIdentifier(cfg.getProperty(DORIS_TABLE_IDENTIFIER), logger); - String sql = "select " + cfg.getProperty(DORIS_READ_FIELD, "*") + - " from `" + tableIdentifiers[0] + "`.`" + tableIdentifiers[1] + "`"; - if (!StringUtils.isEmpty(cfg.getProperty(DORIS_FILTER_QUERY))) { - sql += " where " + cfg.getProperty(DORIS_FILTER_QUERY); - } - logger.debug("Query SQL Sending to Doris FE is: '{}'.", sql); - - HttpPost httpPost = new HttpPost(getUriStr(cfg, logger) + QUERY_PLAN); - String entity = "{\"sql\": \""+ sql +"\"}"; - logger.debug("Post body Sending to Doris FE is: '{}'.", entity); - StringEntity stringEntity = new StringEntity(entity, StandardCharsets.UTF_8); - stringEntity.setContentEncoding("UTF-8"); - stringEntity.setContentType("application/json"); - httpPost.setEntity(stringEntity); - - String resStr = send(cfg, httpPost, logger); - logger.debug("Find partition response is '{}'.", resStr); - QueryPlan queryPlan = getQueryPlan(resStr, logger); - Map> be2Tablets = selectBeForTablet(queryPlan, logger); - return tabletsMapToPartition( - cfg, - be2Tablets, - queryPlan.getOpaqued_query_plan(), - tableIdentifiers[0], - tableIdentifiers[1], - logger); - } - - /** - * translate Doris FE response string to inner {@link QueryPlan} struct. - * @param response Doris FE response string - * @param logger {@link Logger} - * @return inner {@link QueryPlan} struct - * @throws DorisException throw when translate failed. - */ - @VisibleForTesting - static QueryPlan getQueryPlan(String response, Logger logger) throws DorisException { - ObjectMapper mapper = new ObjectMapper(); - QueryPlan queryPlan; - try { - queryPlan = mapper.readValue(response, QueryPlan.class); - } catch (JsonParseException e) { - String errMsg = "Doris FE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (JsonMappingException e) { - String errMsg = "Doris FE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris FE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (queryPlan == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - - if (queryPlan.getStatus() != REST_RESPONSE_STATUS_OK) { - String errMsg = "Doris FE's response is not OK, status is " + queryPlan.getStatus(); - logger.error(errMsg); - throw new DorisException(errMsg); - } - logger.debug("Parsing partition result is '{}'.", queryPlan); - return queryPlan; - } - - /** - * select which Doris BE to get tablet data. - * @param queryPlan {@link QueryPlan} translated from Doris FE response - * @param logger {@link Logger} - * @return BE to tablets {@link Map} - * @throws DorisException throw when select failed. - */ - @VisibleForTesting - static Map> selectBeForTablet(QueryPlan queryPlan, Logger logger) throws DorisException { - Map> be2Tablets = new HashMap<>(); - for (Map.Entry part : queryPlan.getPartitions().entrySet()) { - logger.debug("Parse tablet info: '{}'.", part); - long tabletId; - try { - tabletId = Long.parseLong(part.getKey()); - } catch (NumberFormatException e) { - String errMsg = "Parse tablet id '" + part.getKey() + "' to long failed."; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - String target = null; - int tabletCount = Integer.MAX_VALUE; - for (String candidate : part.getValue().getRoutings()) { - logger.trace("Evaluate Doris BE '{}' to tablet '{}'.", candidate, tabletId); - if (!be2Tablets.containsKey(candidate)) { - logger.debug("Choice a new Doris BE '{}' for tablet '{}'.", candidate, tabletId); - List tablets = new ArrayList<>(); - be2Tablets.put(candidate, tablets); - target = candidate; - break; - } else { - if (be2Tablets.get(candidate).size() < tabletCount) { - target = candidate; - tabletCount = be2Tablets.get(candidate).size(); - logger.debug("Current candidate Doris BE to tablet '{}' is '{}' with tablet count {}.", - tabletId, target, tabletCount); - } - } - } - if (target == null) { - String errMsg = "Cannot choice Doris BE for tablet " + tabletId; - logger.error(errMsg); - throw new DorisException(errMsg); - } - - logger.debug("Choice Doris BE '{}' for tablet '{}'.", target, tabletId); - be2Tablets.get(target).add(tabletId); - } - return be2Tablets; - } - - /** - * tablet count limit for one Doris RDD partition - * @param cfg configuration of request - * @param logger {@link Logger} - * @return tablet count limit - */ - @VisibleForTesting - static int tabletCountLimitForOnePartition(Settings cfg, Logger logger) { - int tabletsSize = DORIS_TABLET_SIZE_DEFAULT; - if (cfg.getProperty(DORIS_TABLET_SIZE) != null) { - try { - tabletsSize = Integer.parseInt(cfg.getProperty(DORIS_TABLET_SIZE)); - } catch (NumberFormatException e) { - logger.warn(PARSE_NUMBER_FAILED_MESSAGE, DORIS_TABLET_SIZE, cfg.getProperty(DORIS_TABLET_SIZE)); - } - } - if (tabletsSize < DORIS_TABLET_SIZE_MIN) { - logger.warn("{} is less than {}, set to default value {}.", - DORIS_TABLET_SIZE, DORIS_TABLET_SIZE_MIN, DORIS_TABLET_SIZE_MIN); - tabletsSize = DORIS_TABLET_SIZE_MIN; - } - logger.debug("Tablet size is set to {}.", tabletsSize); - return tabletsSize; - } - - /** - * choice a Doris BE node to request. - * @param logger slf4j logger - * @return the chosen one Doris BE node - * @throws IllegalArgumentException BE nodes is illegal - * Deprecated, use randomBackendV2 instead - */ - @Deprecated - @VisibleForTesting - public static String randomBackend(SparkSettings sparkSettings , Logger logger) throws DorisException, IOException { - String feNodes = sparkSettings.getProperty(DORIS_FENODES); - String feNode = randomEndpoint(feNodes, logger); - String beUrl = String.format("http://%s" + BACKENDS, feNode); - HttpGet httpGet = new HttpGet(beUrl); - String response = send(sparkSettings, httpGet, logger); - logger.info("Backend Info:{}", response); - List backends = parseBackend(response, logger); - logger.trace("Parse beNodes '{}'.", backends); - if (backends == null || backends.isEmpty()) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "beNodes", backends); - throw new IllegalArgumentException("beNodes", String.valueOf(backends)); - } - Collections.shuffle(backends); - BackendRow backend = backends.get(0); - return backend.getIP() + ":" + backend.getHttpPort(); - } - - /** - * translate Doris FE response to inner {@link BackendRow} struct. - * @param response Doris FE response - * @param logger {@link Logger} - * @return inner {@link List} struct - * @throws DorisException,IOException throw when translate failed - * */ - @Deprecated - @VisibleForTesting - static List parseBackend(String response, Logger logger) throws DorisException, IOException { - com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper(); - Backend backend; - try { - backend = mapper.readValue(response, Backend.class); - } catch (com.fasterxml.jackson.core.JsonParseException e) { - String errMsg = "Doris BE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (com.fasterxml.jackson.databind.JsonMappingException e) { - String errMsg = "Doris BE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris BE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (backend == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - List backendRows = backend.getRows().stream().filter(v -> v.getAlive()).collect(Collectors.toList()); - logger.debug("Parsing schema result is '{}'.", backendRows); - return backendRows; - } - - /** - * choice a Doris BE node to request. - * @param logger slf4j logger - * @return the chosen one Doris BE node - * @throws IllegalArgumentException BE nodes is illegal - */ - @VisibleForTesting - public static String randomBackendV2(SparkSettings sparkSettings, Logger logger) throws DorisException { - String feNodes = sparkSettings.getProperty(DORIS_FENODES); - String feNode = randomEndpoint(feNodes, logger); - String beUrl = String.format("http://%s" + BACKENDS_V2, feNode); - HttpGet httpGet = new HttpGet(beUrl); - String response = send(sparkSettings, httpGet, logger); - logger.info("Backend Info:{}", response); - List backends = parseBackendV2(response, logger); - logger.trace("Parse beNodes '{}'.", backends); - if (backends == null || backends.isEmpty()) { - logger.error(ILLEGAL_ARGUMENT_MESSAGE, "beNodes", backends); - throw new IllegalArgumentException("beNodes", String.valueOf(backends)); - } - Collections.shuffle(backends); - BackendV2.BackendRowV2 backend = backends.get(0); - return backend.getIp() + ":" + backend.getHttpPort(); - } - - static List parseBackendV2(String response, Logger logger) throws DorisException { - com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper(); - BackendV2 backend; - try { - backend = mapper.readValue(response, BackendV2.class); - } catch (com.fasterxml.jackson.core.JsonParseException e) { - String errMsg = "Doris BE's response is not a json. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (com.fasterxml.jackson.databind.JsonMappingException e) { - String errMsg = "Doris BE's response cannot map to schema. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } catch (IOException e) { - String errMsg = "Parse Doris BE's response to json failed. res: " + response; - logger.error(errMsg, e); - throw new DorisException(errMsg, e); - } - - if (backend == null) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE); - throw new ShouldNeverHappenException(); - } - List backendRows = backend.getBackends(); - logger.debug("Parsing schema result is '{}'.", backendRows); - return backendRows; - } - - /** - * translate BE tablets map to Doris RDD partition. - * @param cfg configuration of request - * @param be2Tablets BE to tablets {@link Map} - * @param opaquedQueryPlan Doris BE execute plan getting from Doris FE - * @param database database name of Doris table - * @param table table name of Doris table - * @param logger {@link Logger} - * @return Doris RDD partition {@link List} - * @throws IllegalArgumentException throw when translate failed - */ - @VisibleForTesting - static List tabletsMapToPartition(Settings cfg, Map> be2Tablets, - String opaquedQueryPlan, String database, String table, Logger logger) - throws IllegalArgumentException { - int tabletsSize = tabletCountLimitForOnePartition(cfg, logger); - List partitions = new ArrayList<>(); - for (Map.Entry> beInfo : be2Tablets.entrySet()) { - logger.debug("Generate partition with beInfo: '{}'.", beInfo); - HashSet tabletSet = new HashSet<>(beInfo.getValue()); - beInfo.getValue().clear(); - beInfo.getValue().addAll(tabletSet); - int first = 0; - while (first < beInfo.getValue().size()) { - Set partitionTablets = new HashSet<>(beInfo.getValue().subList( - first, Math.min(beInfo.getValue().size(), first + tabletsSize))); - first = first + tabletsSize; - PartitionDefinition partitionDefinition = - new PartitionDefinition(database, table, cfg, - beInfo.getKey(), partitionTablets, opaquedQueryPlan); - logger.debug("Generate one PartitionDefinition '{}'.", partitionDefinition); - partitions.add(partitionDefinition); - } - } - return partitions; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Backend.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Backend.java deleted file mode 100644 index 322202d220..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Backend.java +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark.rest.models; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.List; - -/** - * Be response model - **/ -@Deprecated -@JsonIgnoreProperties(ignoreUnknown = true) -public class Backend { - - @JsonProperty(value = "rows") - private List rows; - - public List getRows() { - return rows; - } - - public void setRows(List rows) { - this.rows = rows; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/BackendRow.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/BackendRow.java deleted file mode 100644 index a84ad2c76c..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/BackendRow.java +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark.rest.models; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -@Deprecated -@JsonIgnoreProperties(ignoreUnknown = true) -public class BackendRow { - - @JsonProperty(value = "HttpPort") - private String HttpPort; - - @JsonProperty(value = "IP") - private String IP; - - @JsonProperty(value = "Alive") - private Boolean Alive; - - public String getHttpPort() { - return HttpPort; - } - - public void setHttpPort(String httpPort) { - HttpPort = httpPort; - } - - public String getIP() { - return IP; - } - - public void setIP(String IP) { - this.IP = IP; - } - - public Boolean getAlive() { - return Alive; - } - - public void setAlive(Boolean alive) { - Alive = alive; - } - - @Override - public String toString() { - return "BackendRow{" + - "HttpPort='" + HttpPort + '\'' + - ", IP='" + IP + '\'' + - ", Alive=" + Alive + - '}'; - } -} \ No newline at end of file diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/BackendV2.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/BackendV2.java deleted file mode 100644 index 75a251446a..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/BackendV2.java +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark.rest.models; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.List; - -/** - * Be response model - **/ -@JsonIgnoreProperties(ignoreUnknown = true) -public class BackendV2 { - - @JsonProperty(value = "backends") - private List backends; - - public List getBackends() { - return backends; - } - - public void setRows(List rows) { - this.backends = rows; - } - - public static class BackendRowV2 { - @JsonProperty("ip") - public String ip; - @JsonProperty("http_port") - public int httpPort; - @JsonProperty("is_alive") - public boolean isAlive; - - public String getIp() { - return ip; - } - - public void setIp(String ip) { - this.ip = ip; - } - - public int getHttpPort() { - return httpPort; - } - - public void setHttpPort(int httpPort) { - this.httpPort = httpPort; - } - - public boolean isAlive() { - return isAlive; - } - - public void setAlive(boolean alive) { - isAlive = alive; - } - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Field.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Field.java deleted file mode 100644 index 53c622bdd8..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Field.java +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest.models; - -import java.util.Objects; - -public class Field { - private String name; - private String type; - private String comment; - private int precision; - private int scale; - - private String aggregation_type; - - public Field() { } - - public Field(String name, String type, String comment, int precision, int scale, String aggregation_type) { - this.name = name; - this.type = type; - this.comment = comment; - this.precision = precision; - this.scale = scale; - this.aggregation_type = aggregation_type; - } - - public String getAggregation_type() { - return aggregation_type; - } - - public void setAggregation_type(String aggregation_type) { - this.aggregation_type = aggregation_type; - } - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getComment() { - return comment; - } - - public void setComment(String comment) { - this.comment = comment; - } - - public int getPrecision() { - return precision; - } - - public void setPrecision(int precision) { - this.precision = precision; - } - - public int getScale() { - return scale; - } - - public void setScale(int scale) { - this.scale = scale; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Field field = (Field) o; - return precision == field.precision && - scale == field.scale && - Objects.equals(name, field.name) && - Objects.equals(type, field.type) && - Objects.equals(comment, field.comment); - } - - @Override - public int hashCode() { - return Objects.hash(name, type, comment, precision, scale); - } - - @Override - public String toString() { - return "Field{" + - "name='" + name + '\'' + - ", type='" + type + '\'' + - ", comment='" + comment + '\'' + - ", precision=" + precision + - ", scale=" + scale + - '}'; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/QueryPlan.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/QueryPlan.java deleted file mode 100644 index 0ab580ef36..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/QueryPlan.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest.models; - -import java.util.Map; -import java.util.Objects; - -public class QueryPlan { - private int status; - private String opaqued_query_plan; - private Map partitions; - - public int getStatus() { - return status; - } - - public void setStatus(int status) { - this.status = status; - } - - public String getOpaqued_query_plan() { - return opaqued_query_plan; - } - - public void setOpaqued_query_plan(String opaqued_query_plan) { - this.opaqued_query_plan = opaqued_query_plan; - } - - public Map getPartitions() { - return partitions; - } - - public void setPartitions(Map partitions) { - this.partitions = partitions; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - QueryPlan queryPlan = (QueryPlan) o; - return status == queryPlan.status && - Objects.equals(opaqued_query_plan, queryPlan.opaqued_query_plan) && - Objects.equals(partitions, queryPlan.partitions); - } - - @Override - public int hashCode() { - return Objects.hash(status, opaqued_query_plan, partitions); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/RespContent.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/RespContent.java deleted file mode 100644 index f7fa6ff40b..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/RespContent.java +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark.rest.models; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -@JsonIgnoreProperties(ignoreUnknown = true) -public class RespContent { - - @JsonProperty(value = "TxnId") - private int TxnId; - - @JsonProperty(value = "Label") - private String Label; - - @JsonProperty(value = "Status") - private String Status; - - @JsonProperty(value = "ExistingJobStatus") - private String ExistingJobStatus; - - @JsonProperty(value = "Message") - private String Message; - - @JsonProperty(value = "NumberTotalRows") - private long NumberTotalRows; - - @JsonProperty(value = "NumberLoadedRows") - private long NumberLoadedRows; - - @JsonProperty(value = "NumberFilteredRows") - private int NumberFilteredRows; - - @JsonProperty(value = "NumberUnselectedRows") - private int NumberUnselectedRows; - - @JsonProperty(value = "LoadBytes") - private long LoadBytes; - - @JsonProperty(value = "LoadTimeMs") - private int LoadTimeMs; - - @JsonProperty(value = "BeginTxnTimeMs") - private int BeginTxnTimeMs; - - @JsonProperty(value = "StreamLoadPutTimeMs") - private int StreamLoadPutTimeMs; - - @JsonProperty(value = "ReadDataTimeMs") - private int ReadDataTimeMs; - - @JsonProperty(value = "WriteDataTimeMs") - private int WriteDataTimeMs; - - @JsonProperty(value = "CommitAndPublishTimeMs") - private int CommitAndPublishTimeMs; - - @JsonProperty(value = "ErrorURL") - private String ErrorURL; - - public String getStatus() { - return Status; - } - - public String getMessage() { - return Message; - } - - @Override - public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (JsonProcessingException e) { - return ""; - } - - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Schema.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Schema.java deleted file mode 100644 index 586a8ac01b..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Schema.java +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest.models; - -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; - -public class Schema { - private int status = 0; - private List properties; - - public Schema() { - properties = new ArrayList<>(); - } - - public Schema(int fieldCount) { - properties = new ArrayList<>(fieldCount); - } - - public int getStatus() { - return status; - } - - public void setStatus(int status) { - this.status = status; - } - - public List getProperties() { - return properties; - } - - public void setProperties(List properties) { - this.properties = properties; - } - - public void put(String name, String type, String comment, int scale, int precision, String aggregation_type) { - properties.add(new Field(name, type, comment, scale, precision, aggregation_type)); - } - - public void put(Field f) { - properties.add(f); - } - - public Field get(int index) { - if (index >= properties.size()) { - throw new IndexOutOfBoundsException("Index: " + index + ", Fields size:" + properties.size()); - } - return properties.get(index); - } - - public int size() { - return properties.size(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Schema schema = (Schema) o; - return status == schema.status && - Objects.equals(properties, schema.properties); - } - - @Override - public int hashCode() { - return Objects.hash(status, properties); - } - - @Override - public String toString() { - return "Schema{" + - "status=" + status + - ", properties=" + properties + - '}'; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Tablet.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Tablet.java deleted file mode 100644 index 7221e013b8..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/rest/models/Tablet.java +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest.models; - -import java.util.List; -import java.util.Objects; - -public class Tablet { - private List routings; - private int version; - private long versionHash; - private long schemaHash; - - public List getRoutings() { - return routings; - } - - public void setRoutings(List routings) { - this.routings = routings; - } - - public int getVersion() { - return version; - } - - public void setVersion(int version) { - this.version = version; - } - - public long getVersionHash() { - return versionHash; - } - - public void setVersionHash(long versionHash) { - this.versionHash = versionHash; - } - - public long getSchemaHash() { - return schemaHash; - } - - public void setSchemaHash(long schemaHash) { - this.schemaHash = schemaHash; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Tablet tablet = (Tablet) o; - return version == tablet.version && - versionHash == tablet.versionHash && - schemaHash == tablet.schemaHash && - Objects.equals(routings, tablet.routings); - } - - @Override - public int hashCode() { - return Objects.hash(routings, version, versionHash, schemaHash); - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/Routing.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/Routing.java deleted file mode 100644 index e47eedd773..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/Routing.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.serialization; - -import static org.apache.doris.spark.util.ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE; - -import org.apache.doris.spark.exception.IllegalArgumentException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * present an Doris BE address. - */ -public class Routing { - private final static Logger logger = LoggerFactory.getLogger(Routing.class); - - private String host; - private int port; - - public Routing(String routing) throws IllegalArgumentException { - parseRouting(routing); - } - - private void parseRouting(String routing) throws IllegalArgumentException { - logger.debug("Parse Doris BE address: '{}'.", routing); - String[] hostPort = routing.split(":"); - if (hostPort.length != 2) { - logger.error("Format of Doris BE address '{}' is illegal.", routing); - throw new IllegalArgumentException("Doris BE", routing); - } - this.host = hostPort[0]; - try { - this.port = Integer.parseInt(hostPort[1]); - } catch (NumberFormatException e) { - logger.error(PARSE_NUMBER_FAILED_MESSAGE, "Doris BE's port", hostPort[1]); - throw new IllegalArgumentException("Doris BE", routing); - } - } - - public String getHost() { - return host; - } - - public int getPort() { - return port; - } - - @Override - public String toString() { - return "Doris BE{" + - "host='" + host + '\'' + - ", port=" + port + - '}'; - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java deleted file mode 100644 index bcc76d5a33..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java +++ /dev/null @@ -1,314 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.serialization; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.NoSuchElementException; - -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.types.Types; -import org.apache.doris.spark.exception.DorisException; -import org.apache.doris.spark.rest.models.Schema; -import org.apache.doris.thrift.TScanBatchResult; -import org.apache.spark.sql.types.Decimal; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.base.Preconditions; - -/** - * row batch data container. - */ -public class RowBatch { - private static final Logger logger = LoggerFactory.getLogger(RowBatch.class); - - public static class Row { - private final List cols; - - Row(int colCount) { - this.cols = new ArrayList<>(colCount); - } - - List getCols() { - return cols; - } - - public void put(Object o) { - cols.add(o); - } - } - - // offset for iterate the rowBatch - private int offsetInRowBatch = 0; - private int rowCountInOneBatch = 0; - private int readRowCount = 0; - private final List rowBatch = new ArrayList<>(); - private final ArrowStreamReader arrowStreamReader; - private List fieldVectors; - private final RootAllocator rootAllocator; - private final Schema schema; - - public RowBatch(TScanBatchResult nextResult, Schema schema) throws DorisException { - this.schema = schema; - this.rootAllocator = new RootAllocator(Integer.MAX_VALUE); - this.arrowStreamReader = new ArrowStreamReader( - new ByteArrayInputStream(nextResult.getRows()), - rootAllocator - ); - try { - VectorSchemaRoot root = arrowStreamReader.getVectorSchemaRoot(); - while (arrowStreamReader.loadNextBatch()) { - fieldVectors = root.getFieldVectors(); - if (fieldVectors.size() != schema.size()) { - logger.error("Schema size '{}' is not equal to arrow field size '{}'.", - fieldVectors.size(), schema.size()); - throw new DorisException("Load Doris data failed, schema size of fetch data is wrong."); - } - if (fieldVectors.size() == 0 || root.getRowCount() == 0) { - logger.debug("One batch in arrow has no data."); - continue; - } - rowCountInOneBatch = root.getRowCount(); - // init the rowBatch - for (int i = 0; i < rowCountInOneBatch; ++i) { - rowBatch.add(new Row(fieldVectors.size())); - } - convertArrowToRowBatch(); - readRowCount += root.getRowCount(); - } - } catch (Exception e) { - logger.error("Read Doris Data failed because: ", e); - throw new DorisException(e.getMessage()); - } finally { - close(); - } - } - - public boolean hasNext() { - return offsetInRowBatch < readRowCount; - } - - private void addValueToRow(int rowIndex, Object obj) { - if (rowIndex > rowCountInOneBatch) { - String errMsg = "Get row offset: " + rowIndex + " larger than row size: " + - rowCountInOneBatch; - logger.error(errMsg); - throw new NoSuchElementException(errMsg); - } - rowBatch.get(readRowCount + rowIndex).put(obj); - } - - public void convertArrowToRowBatch() throws DorisException { - try { - for (int col = 0; col < fieldVectors.size(); col++) { - FieldVector curFieldVector = fieldVectors.get(col); - Types.MinorType mt = curFieldVector.getMinorType(); - - final String currentType = schema.get(col).getType(); - switch (currentType) { - case "NULL_TYPE": - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - addValueToRow(rowIndex, null); - } - break; - case "BOOLEAN": - Preconditions.checkArgument(mt.equals(Types.MinorType.BIT), - typeMismatchMessage(currentType, mt)); - BitVector bitVector = (BitVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = bitVector.isNull(rowIndex) ? null : bitVector.get(rowIndex) != 0; - addValueToRow(rowIndex, fieldValue); - } - break; - case "TINYINT": - Preconditions.checkArgument(mt.equals(Types.MinorType.TINYINT), - typeMismatchMessage(currentType, mt)); - TinyIntVector tinyIntVector = (TinyIntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = tinyIntVector.isNull(rowIndex) ? null : tinyIntVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "SMALLINT": - Preconditions.checkArgument(mt.equals(Types.MinorType.SMALLINT), - typeMismatchMessage(currentType, mt)); - SmallIntVector smallIntVector = (SmallIntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = smallIntVector.isNull(rowIndex) ? null : smallIntVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "INT": - Preconditions.checkArgument(mt.equals(Types.MinorType.INT), - typeMismatchMessage(currentType, mt)); - IntVector intVector = (IntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = intVector.isNull(rowIndex) ? null : intVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "BIGINT": - Preconditions.checkArgument(mt.equals(Types.MinorType.BIGINT), - typeMismatchMessage(currentType, mt)); - BigIntVector bigIntVector = (BigIntVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = bigIntVector.isNull(rowIndex) ? null : bigIntVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "FLOAT": - Preconditions.checkArgument(mt.equals(Types.MinorType.FLOAT4), - typeMismatchMessage(currentType, mt)); - Float4Vector float4Vector = (Float4Vector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = float4Vector.isNull(rowIndex) ? null : float4Vector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "TIME": - case "DOUBLE": - Preconditions.checkArgument(mt.equals(Types.MinorType.FLOAT8), - typeMismatchMessage(currentType, mt)); - Float8Vector float8Vector = (Float8Vector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = float8Vector.isNull(rowIndex) ? null : float8Vector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "BINARY": - Preconditions.checkArgument(mt.equals(Types.MinorType.VARBINARY), - typeMismatchMessage(currentType, mt)); - VarBinaryVector varBinaryVector = (VarBinaryVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - Object fieldValue = varBinaryVector.isNull(rowIndex) ? null : varBinaryVector.get(rowIndex); - addValueToRow(rowIndex, fieldValue); - } - break; - case "DECIMAL": - Preconditions.checkArgument(mt.equals(Types.MinorType.VARCHAR), - typeMismatchMessage(currentType, mt)); - VarCharVector varCharVectorForDecimal = (VarCharVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - if (varCharVectorForDecimal.isNull(rowIndex)) { - addValueToRow(rowIndex, null); - continue; - } - String decimalValue = new String(varCharVectorForDecimal.get(rowIndex)); - Decimal decimal = new Decimal(); - try { - decimal.set(new scala.math.BigDecimal(new BigDecimal(decimalValue))); - } catch (NumberFormatException e) { - String errMsg = "Decimal response result '" + decimalValue + "' is illegal."; - logger.error(errMsg, e); - throw new DorisException(errMsg); - } - addValueToRow(rowIndex, decimal); - } - break; - case "DECIMALV2": - Preconditions.checkArgument(mt.equals(Types.MinorType.DECIMAL), - typeMismatchMessage(currentType, mt)); - DecimalVector decimalVector = (DecimalVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - if (decimalVector.isNull(rowIndex)) { - addValueToRow(rowIndex, null); - continue; - } - Decimal decimalV2 = Decimal.apply(decimalVector.getObject(rowIndex)); - addValueToRow(rowIndex, decimalV2); - } - break; - case "DATE": - case "DATETIME": - case "LARGEINT": - case "CHAR": - case "VARCHAR": - case "STRING": - Preconditions.checkArgument(mt.equals(Types.MinorType.VARCHAR), - typeMismatchMessage(currentType, mt)); - VarCharVector varCharVector = (VarCharVector) curFieldVector; - for (int rowIndex = 0; rowIndex < rowCountInOneBatch; rowIndex++) { - if (varCharVector.isNull(rowIndex)) { - addValueToRow(rowIndex, null); - continue; - } - String value = new String(varCharVector.get(rowIndex), StandardCharsets.UTF_8); - addValueToRow(rowIndex, value); - } - break; - default: - String errMsg = "Unsupported type " + schema.get(col).getType(); - logger.error(errMsg); - throw new DorisException(errMsg); - } - } - } catch (Exception e) { - close(); - throw e; - } - } - - public List next() { - if (!hasNext()) { - String errMsg = "Get row offset:" + offsetInRowBatch + " larger than row size: " + readRowCount; - logger.error(errMsg); - throw new NoSuchElementException(errMsg); - } - return rowBatch.get(offsetInRowBatch++).getCols(); - } - - private String typeMismatchMessage(final String sparkType, final Types.MinorType arrowType) { - final String messageTemplate = "Spark type is %1$s, but arrow type is %2$s."; - return String.format(messageTemplate, sparkType, arrowType.name()); - } - - public int getReadRowCount() { - return readRowCount; - } - - public void close() { - try { - if (arrowStreamReader != null) { - arrowStreamReader.close(); - } - if (rootAllocator != null) { - rootAllocator.close(); - } - } catch (IOException ioe) { - // do nothing - } - } -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/util/ErrorMessages.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/util/ErrorMessages.java deleted file mode 100644 index 44ca28b4b7..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/util/ErrorMessages.java +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.util; - -public abstract class ErrorMessages { - public static final String PARSE_NUMBER_FAILED_MESSAGE = "Parse '{}' to number failed. Original string is '{}'."; - public static final String PARSE_BOOL_FAILED_MESSAGE = "Parse '{}' to boolean failed. Original string is '{}'."; - public static final String CONNECT_FAILED_MESSAGE = "Connect to doris {} failed."; - public static final String ILLEGAL_ARGUMENT_MESSAGE = "argument '{}' is illegal, value is '{}'."; - public static final String SHOULD_NOT_HAPPEN_MESSAGE = "Should not come here."; - public static final String DORIS_INTERNAL_FAIL_MESSAGE = "Doris server '{}' internal failed, status is '{}', error message is '{}'"; -} diff --git a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/util/IOUtils.java b/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/util/IOUtils.java deleted file mode 100644 index 03a9e005b4..0000000000 --- a/extension/spark-doris-connector/src/main/java/org/apache/doris/spark/util/IOUtils.java +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.util; - -import java.io.IOException; -import java.io.StringReader; -import java.io.StringWriter; -import java.util.Properties; - -import org.apache.doris.spark.exception.IllegalArgumentException; - -public class IOUtils { - public static String propsToString(Properties props) throws IllegalArgumentException { - StringWriter sw = new StringWriter(); - if (props != null) { - try { - props.store(sw, ""); - } catch (IOException ex) { - throw new IllegalArgumentException("Cannot parse props to String.", ex); - } - } - return sw.toString(); - } - - public static Properties propsFromString(String source) throws IllegalArgumentException { - Properties copy = new Properties(); - if (source != null) { - try { - copy.load(new StringReader(source)); - } catch (IOException ex) { - throw new IllegalArgumentException("Cannot parse props from String.", ex); - } - } - return copy; - } -} diff --git a/extension/spark-doris-connector/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/extension/spark-doris-connector/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister deleted file mode 100644 index 15b2434c11..0000000000 --- a/extension/spark-doris-connector/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -org.apache.doris.spark.sql.DorisSourceProvider diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/package.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/package.scala deleted file mode 100644 index d08bdc0d7d..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/package.scala +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris - -import scala.language.implicitConversions - -import org.apache.doris.spark.rdd.DorisSpark -import org.apache.spark.SparkContext - -package object spark { - implicit def sparkContextFunctions(sc: SparkContext) = new SparkContextFunctions(sc) - - class SparkContextFunctions(sc: SparkContext) extends Serializable { - def dorisRDD( - tableIdentifier: Option[String] = None, - query: Option[String] = None, - cfg: Option[Map[String, String]] = None) = - DorisSpark.dorisRDD(sc, tableIdentifier, query, cfg) - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/AbstractDorisRDD.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/AbstractDorisRDD.scala deleted file mode 100644 index 23a34c59ab..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/AbstractDorisRDD.scala +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rdd - -import scala.collection.JavaConversions._ -import scala.reflect.ClassTag - -import org.apache.doris.spark.cfg.SparkSettings -import org.apache.doris.spark.rest.{PartitionDefinition, RestService} - -import org.apache.spark.rdd.RDD -import org.apache.spark.{Partition, SparkContext} - -private[spark] abstract class AbstractDorisRDD[T: ClassTag]( - @transient private var sc: SparkContext, - val params: Map[String, String] = Map.empty) - extends RDD[T](sc, Nil) { - - override def getPartitions: Array[Partition] = { - dorisPartitions.zipWithIndex.map { case (dorisPartition, idx) => - new DorisPartition(id, idx, dorisPartition) - }.toArray - } - - override def getPreferredLocations(split: Partition): Seq[String] = { - val dorisSplit = split.asInstanceOf[DorisPartition] - Seq(dorisSplit.dorisPartition.getBeAddress) - } - - override def checkpoint(): Unit = { - // Do nothing. Doris RDD should not be checkpointed. - } - - /** - * doris configuration get from rdd parameters and spark conf. - */ - @transient private[spark] lazy val dorisCfg = { - val cfg = new SparkSettings(sc.getConf) - cfg.merge(params) - } - - @transient private[spark] lazy val dorisPartitions = { - RestService.findPartitions(dorisCfg, log) - } -} - -private[spark] class DorisPartition(rddId: Int, idx: Int, val dorisPartition: PartitionDefinition) - extends Partition { - - override def hashCode(): Int = 31 * (31 * (31 + rddId) + idx) + dorisPartition.hashCode() - - override val index: Int = idx -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/AbstractDorisRDDIterator.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/AbstractDorisRDDIterator.scala deleted file mode 100644 index 5b2b36fff1..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/AbstractDorisRDDIterator.scala +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rdd - -import org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_VALUE_READER_CLASS -import org.apache.doris.spark.cfg.Settings -import org.apache.doris.spark.rest.PartitionDefinition -import org.apache.spark.util.TaskCompletionListener -import org.apache.spark.{TaskContext, TaskKilledException} -import org.slf4j.{Logger, LoggerFactory} - -private[spark] abstract class AbstractDorisRDDIterator[T]( - context: TaskContext, - partition: PartitionDefinition) extends Iterator[T] { - - private val logger: Logger = LoggerFactory.getLogger(this.getClass.getName.stripSuffix("$")) - private var initialized = false - private var closed = false - - // the reader obtain data from Doris BE - lazy val reader = { - initialized = true - val settings = partition.settings() - initReader(settings) - val valueReaderName = settings.getProperty(DORIS_VALUE_READER_CLASS) - logger.debug(s"Use value reader '$valueReaderName'.") - val cons = Class.forName(valueReaderName).getDeclaredConstructor(classOf[PartitionDefinition], classOf[Settings]) - cons.newInstance(partition, settings).asInstanceOf[ScalaValueReader] - } - - context.addTaskCompletionListener(new TaskCompletionListener() { - override def onTaskCompletion(context: TaskContext): Unit = { - closeIfNeeded() - } - }) - - override def hasNext: Boolean = { - if (context.isInterrupted()) { - throw new TaskKilledException - } - reader.hasNext - } - - override def next(): T = { - if (!hasNext) { - throw new NoSuchElementException("End of stream") - } - val value = reader.next - createValue(value) - } - - def closeIfNeeded(): Unit = { - logger.trace(s"Close status is '$closed' when close Doris RDD Iterator") - if (!closed) { - close() - closed = true - } - } - - protected def close(): Unit = { - logger.trace(s"Initialize status is '$initialized' when close Doris RDD Iterator") - if (initialized) { - reader.close() - } - } - - def initReader(settings: Settings): Unit - - /** - * convert value of row from reader.next return type to T. - * @param value reader.next return value - * @return value of type T - */ - def createValue(value: Object): T -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/DorisSpark.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/DorisSpark.scala deleted file mode 100644 index 9dc86aa68c..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/DorisSpark.scala +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rdd - -import org.apache.doris.spark.cfg.ConfigurationOptions.{DORIS_FILTER_QUERY, DORIS_TABLE_IDENTIFIER} - -import org.apache.spark.SparkContext -import org.apache.spark.rdd.RDD - -object DorisSpark { - def dorisRDD( - sc: SparkContext, - tableIdentifier: Option[String] = None, - query: Option[String] = None, - cfg: Option[Map[String, String]] = None): RDD[AnyRef] = { - val params = collection.mutable.Map(cfg.getOrElse(Map.empty).toSeq: _*) - query.map { s => params += (DORIS_FILTER_QUERY -> s) } - tableIdentifier.map { s => params += (DORIS_TABLE_IDENTIFIER -> s) } - new ScalaDorisRDD[AnyRef](sc, params.toMap) - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaDorisRDD.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaDorisRDD.scala deleted file mode 100644 index e764ea0750..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaDorisRDD.scala +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rdd - -import scala.reflect.ClassTag - -import org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_VALUE_READER_CLASS -import org.apache.doris.spark.cfg.Settings -import org.apache.doris.spark.rest.PartitionDefinition - -import org.apache.spark.{Partition, SparkContext, TaskContext} - -private[spark] class ScalaDorisRDD[T: ClassTag]( - sc: SparkContext, - params: Map[String, String] = Map.empty) - extends AbstractDorisRDD[T](sc, params) { - override def compute(split: Partition, context: TaskContext): ScalaDorisRDDIterator[T] = { - new ScalaDorisRDDIterator(context, split.asInstanceOf[DorisPartition].dorisPartition) - } -} - -private[spark] class ScalaDorisRDDIterator[T]( - context: TaskContext, - partition: PartitionDefinition) - extends AbstractDorisRDDIterator[T](context, partition) { - - override def initReader(settings: Settings) = { - settings.setProperty(DORIS_VALUE_READER_CLASS, classOf[ScalaValueReader].getName) - } - - override def createValue(value: Object): T = { - value.asInstanceOf[T] - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala deleted file mode 100644 index 03643b2e00..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/rdd/ScalaValueReader.scala +++ /dev/null @@ -1,252 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rdd - -import java.util.concurrent.atomic.AtomicBoolean -import java.util.concurrent._ -import java.util.concurrent.locks.{Condition, Lock, ReentrantLock} - -import scala.collection.JavaConversions._ -import scala.util.Try -import org.apache.doris.spark.backend.BackendClient -import org.apache.doris.spark.cfg.ConfigurationOptions._ -import org.apache.doris.spark.cfg.Settings -import org.apache.doris.spark.exception.ShouldNeverHappenException -import org.apache.doris.spark.rest.PartitionDefinition -import org.apache.doris.spark.rest.models.Schema -import org.apache.doris.spark.serialization.{Routing, RowBatch} -import org.apache.doris.spark.sql.SchemaUtils -import org.apache.doris.spark.util.ErrorMessages -import org.apache.doris.spark.util.ErrorMessages.SHOULD_NOT_HAPPEN_MESSAGE -import org.apache.doris.thrift.{TScanCloseParams, TScanNextBatchParams, TScanOpenParams, TScanOpenResult} -import org.apache.log4j.Logger - -import scala.util.control.Breaks - -/** - * read data from Doris BE to array. - * @param partition Doris RDD partition - * @param settings request configuration - */ -class ScalaValueReader(partition: PartitionDefinition, settings: Settings) { - private val logger = Logger.getLogger(classOf[ScalaValueReader]) - - protected val client = new BackendClient(new Routing(partition.getBeAddress), settings) - protected val clientLock = - if (deserializeArrowToRowBatchAsync) new ReentrantLock() - else new NoOpLock - protected var offset = 0 - protected var eos: AtomicBoolean = new AtomicBoolean(false) - protected var rowBatch: RowBatch = _ - // flag indicate if support deserialize Arrow to RowBatch asynchronously - protected lazy val deserializeArrowToRowBatchAsync: Boolean = Try { - settings.getProperty(DORIS_DESERIALIZE_ARROW_ASYNC, DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT.toString).toBoolean - } getOrElse { - logger.warn(ErrorMessages.PARSE_BOOL_FAILED_MESSAGE, DORIS_DESERIALIZE_ARROW_ASYNC, settings.getProperty(DORIS_DESERIALIZE_ARROW_ASYNC)) - DORIS_DESERIALIZE_ARROW_ASYNC_DEFAULT - } - - protected var rowBatchBlockingQueue: BlockingQueue[RowBatch] = { - val blockingQueueSize = Try { - settings.getProperty(DORIS_DESERIALIZE_QUEUE_SIZE, DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT.toString).toInt - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_DESERIALIZE_QUEUE_SIZE, settings.getProperty(DORIS_DESERIALIZE_QUEUE_SIZE)) - DORIS_DESERIALIZE_QUEUE_SIZE_DEFAULT - } - - var queue: BlockingQueue[RowBatch] = null - if (deserializeArrowToRowBatchAsync) { - queue = new ArrayBlockingQueue(blockingQueueSize) - } - queue - } - - private val openParams: TScanOpenParams = { - val params = new TScanOpenParams - params.cluster = DORIS_DEFAULT_CLUSTER - params.database = partition.getDatabase - params.table = partition.getTable - - params.tablet_ids = partition.getTabletIds.toList - params.opaqued_query_plan = partition.getQueryPlan - - // max row number of one read batch - val batchSize = Try { - settings.getProperty(DORIS_BATCH_SIZE, DORIS_BATCH_SIZE_DEFAULT.toString).toInt - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_BATCH_SIZE, settings.getProperty(DORIS_BATCH_SIZE)) - DORIS_BATCH_SIZE_DEFAULT - } - - val queryDorisTimeout = Try { - settings.getProperty(DORIS_REQUEST_QUERY_TIMEOUT_S, DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT.toString).toInt - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_REQUEST_QUERY_TIMEOUT_S, settings.getProperty(DORIS_REQUEST_QUERY_TIMEOUT_S)) - DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT - } - - val execMemLimit = Try { - settings.getProperty(DORIS_EXEC_MEM_LIMIT, DORIS_EXEC_MEM_LIMIT_DEFAULT.toString).toLong - } getOrElse { - logger.warn(ErrorMessages.PARSE_NUMBER_FAILED_MESSAGE, DORIS_EXEC_MEM_LIMIT, settings.getProperty(DORIS_EXEC_MEM_LIMIT)) - DORIS_EXEC_MEM_LIMIT_DEFAULT - } - - params.setBatchSize(batchSize) - params.setQueryTimeout(queryDorisTimeout) - params.setMemLimit(execMemLimit) - params.setUser(settings.getProperty(DORIS_REQUEST_AUTH_USER, "")) - params.setPasswd(settings.getProperty(DORIS_REQUEST_AUTH_PASSWORD, "")) - - logger.debug(s"Open scan params is, " + - s"cluster: ${params.getCluster}, " + - s"database: ${params.getDatabase}, " + - s"table: ${params.getTable}, " + - s"tabletId: ${params.getTabletIds}, " + - s"batch size: $batchSize, " + - s"query timeout: $queryDorisTimeout, " + - s"execution memory limit: $execMemLimit, " + - s"user: ${params.getUser}, " + - s"query plan: ${params.getOpaquedQueryPlan}") - - params - } - - protected val openResult: TScanOpenResult = lockClient(_.openScanner(openParams)) - protected val contextId: String = openResult.getContextId - protected val schema: Schema = - SchemaUtils.convertToSchema(openResult.getSelectedColumns) - - protected val asyncThread: Thread = new Thread { - override def run { - val nextBatchParams = new TScanNextBatchParams - nextBatchParams.setContextId(contextId) - while (!eos.get) { - nextBatchParams.setOffset(offset) - val nextResult = lockClient(_.getNext(nextBatchParams)) - eos.set(nextResult.isEos) - if (!eos.get) { - val rowBatch = new RowBatch(nextResult, schema) - offset += rowBatch.getReadRowCount - rowBatch.close - rowBatchBlockingQueue.put(rowBatch) - } - } - } - } - - protected val asyncThreadStarted: Boolean = { - var started = false - if (deserializeArrowToRowBatchAsync) { - asyncThread.start - started = true - } - started - } - - logger.debug(s"Open scan result is, contextId: $contextId, schema: $schema.") - - /** - * read data and cached in rowBatch. - * @return true if hax next value - */ - def hasNext: Boolean = { - var hasNext = false - if (deserializeArrowToRowBatchAsync && asyncThreadStarted) { - // support deserialize Arrow to RowBatch asynchronously - if (rowBatch == null || !rowBatch.hasNext) { - val loop = new Breaks - loop.breakable { - while (!eos.get || !rowBatchBlockingQueue.isEmpty) { - if (!rowBatchBlockingQueue.isEmpty) { - rowBatch = rowBatchBlockingQueue.take - hasNext = true - loop.break - } else { - // wait for rowBatch put in queue or eos change - Thread.sleep(5) - } - } - } - } else { - hasNext = true - } - } else { - // Arrow data was acquired synchronously during the iterative process - if (!eos.get && (rowBatch == null || !rowBatch.hasNext)) { - if (rowBatch != null) { - offset += rowBatch.getReadRowCount - rowBatch.close - } - val nextBatchParams = new TScanNextBatchParams - nextBatchParams.setContextId(contextId) - nextBatchParams.setOffset(offset) - val nextResult = lockClient(_.getNext(nextBatchParams)) - eos.set(nextResult.isEos) - if (!eos.get) { - rowBatch = new RowBatch(nextResult, schema) - } - } - hasNext = !eos.get - } - hasNext - } - - /** - * get next value. - * @return next value - */ - def next: AnyRef = { - if (!hasNext) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE) - throw new ShouldNeverHappenException - } - rowBatch.next - } - - def close(): Unit = { - val closeParams = new TScanCloseParams - closeParams.setContextId(contextId) - lockClient(_.closeScanner(closeParams)) - } - - private def lockClient[T](action: BackendClient => T): T = { - clientLock.lock() - try { - action(client) - } finally { - clientLock.unlock() - } - } - - private class NoOpLock extends Lock { - override def lock(): Unit = {} - - override def lockInterruptibly(): Unit = {} - - override def tryLock(): Boolean = true - - override def tryLock(time: Long, unit: TimeUnit): Boolean = true - - override def unlock(): Unit = {} - - override def newCondition(): Condition = { - throw new UnsupportedOperationException("NoOpLock can't provide a condition") - } - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisRelation.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisRelation.scala deleted file mode 100644 index 3e3616de87..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisRelation.scala +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import scala.collection.JavaConverters._ -import scala.collection.mutable -import scala.math.min - -import org.apache.doris.spark.cfg.ConfigurationOptions._ -import org.apache.doris.spark.cfg.{ConfigurationOptions, SparkSettings} - -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.jdbc.JdbcDialects -import org.apache.spark.sql.sources._ -import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{DataFrame, Row, SQLContext} - - -private[sql] class DorisRelation( - val sqlContext: SQLContext, parameters: Map[String, String]) - extends BaseRelation with TableScan with PrunedScan with PrunedFilteredScan with InsertableRelation{ - - private lazy val cfg = { - val conf = new SparkSettings(sqlContext.sparkContext.getConf) - conf.merge(parameters.asJava) - conf - } - - private lazy val inValueLengthLimit = - min(cfg.getProperty(DORIS_FILTER_QUERY_IN_MAX_COUNT, "100").toInt, - DORIS_FILTER_QUERY_IN_VALUE_UPPER_LIMIT) - - private lazy val lazySchema = SchemaUtils.discoverSchema(cfg) - - private lazy val dialect = JdbcDialects.get("") - - override def schema: StructType = lazySchema - - override def unhandledFilters(filters: Array[Filter]): Array[Filter] = { - filters.filter(Utils.compileFilter(_, dialect, inValueLengthLimit).isEmpty) - } - - // TableScan - override def buildScan(): RDD[Row] = buildScan(Array.empty) - - // PrunedScan - override def buildScan(requiredColumns: Array[String]): RDD[Row] = buildScan(requiredColumns, Array.empty) - - // PrunedFilteredScan - override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { - val paramWithScan = mutable.LinkedHashMap[String, String]() ++ parameters - - // filter where clause can be handled by Doris BE - val filterWhereClause: String = { - filters.flatMap(Utils.compileFilter(_, dialect, inValueLengthLimit)) - .map(filter => s"($filter)").mkString(" and ") - } - - // required columns for column pruner - if (requiredColumns != null && requiredColumns.length > 0) { - paramWithScan += (ConfigurationOptions.DORIS_READ_FIELD -> - requiredColumns.map(Utils.quote).mkString(",")) - } else { - paramWithScan += (ConfigurationOptions.DORIS_READ_FIELD -> - lazySchema.fields.map(f => f.name).mkString(",")) - } - - if (filters != null && filters.length > 0) { - paramWithScan += (ConfigurationOptions.DORIS_FILTER_QUERY -> filterWhereClause) - } - - new ScalaDorisRowRDD(sqlContext.sparkContext, paramWithScan.toMap, lazySchema) - } - - // Insert Table - override def insert(data: DataFrame, overwrite: Boolean): Unit = { - //replace 'doris.request.auth.user' with 'user' and 'doris.request.auth.password' with 'password' - val insertCfg = cfg.copy().asProperties().asScala.map { - case (ConfigurationOptions.DORIS_REQUEST_AUTH_USER, v) => - ("user", v) - case (ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD, v) => - ("password", v) - case (k, v) => (k, v) - } - data.write.format(DorisSourceProvider.SHORT_NAME) - .options(insertCfg) - .save() - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisSourceProvider.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisSourceProvider.scala deleted file mode 100644 index 9b7d3f0c57..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisSourceProvider.scala +++ /dev/null @@ -1,150 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.doris.spark.DorisStreamLoad -import org.apache.doris.spark.cfg.{ConfigurationOptions, SparkSettings} -import org.apache.doris.spark.sql.DorisSourceProvider.SHORT_NAME -import org.apache.spark.SparkConf -import org.apache.spark.sql.execution.streaming.Sink -import org.apache.spark.sql.sources._ -import org.apache.spark.sql.streaming.OutputMode -import org.apache.spark.sql.types.StructType -import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} -import org.slf4j.{Logger, LoggerFactory} -import java.io.IOException -import java.util - -import org.apache.doris.spark.rest.RestService - -import scala.collection.JavaConverters.mapAsJavaMapConverter -import scala.util.control.Breaks - -private[sql] class DorisSourceProvider extends DataSourceRegister - with RelationProvider - with CreatableRelationProvider - with StreamSinkProvider - with Serializable { - - private val logger: Logger = LoggerFactory.getLogger(classOf[DorisSourceProvider].getName) - - override def shortName(): String = SHORT_NAME - - override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { - new DorisRelation(sqlContext, Utils.params(parameters, logger)) - } - - - /** - * df.save - */ - override def createRelation(sqlContext: SQLContext, - mode: SaveMode, parameters: Map[String, String], - data: DataFrame): BaseRelation = { - - val sparkSettings = new SparkSettings(sqlContext.sparkContext.getConf) - sparkSettings.merge(Utils.params(parameters, logger).asJava) - // init stream loader - val dorisStreamLoader = new DorisStreamLoad(sparkSettings) - - val maxRowCount = sparkSettings.getIntegerProperty(ConfigurationOptions.DORIS_SINK_BATCH_SIZE, ConfigurationOptions.SINK_BATCH_SIZE_DEFAULT) - val maxRetryTimes = sparkSettings.getIntegerProperty(ConfigurationOptions.DORIS_SINK_MAX_RETRIES, ConfigurationOptions.SINK_MAX_RETRIES_DEFAULT) - - data.rdd.foreachPartition(partition => { - val rowsBuffer: util.List[util.List[Object]] = new util.ArrayList[util.List[Object]](maxRowCount) - partition.foreach(row => { - val line: util.List[Object] = new util.ArrayList[Object]() - for (i <- 0 until row.size) { - val field = row.get(i) - line.add(field.asInstanceOf[AnyRef]) - } - rowsBuffer.add(line) - if (rowsBuffer.size > maxRowCount) { - flush - } - }) - // flush buffer - if (!rowsBuffer.isEmpty) { - flush - } - - /** - * flush data to Doris and do retry when flush error - * - */ - def flush = { - val loop = new Breaks - loop.breakable { - - for (i <- 1 to maxRetryTimes) { - try { - dorisStreamLoader.load(rowsBuffer) - rowsBuffer.clear() - loop.break() - } - catch { - case e: Exception => - try { - logger.warn("Failed to load data on BE: {} node ", dorisStreamLoader.getLoadUrlStr) - //If the current BE node fails to execute Stream Load, randomly switch to other BE nodes and try again - dorisStreamLoader.setHostPort(RestService.randomBackendV2(sparkSettings,logger)) - Thread.sleep(1000 * i) - } catch { - case ex: InterruptedException => - logger.warn("Data that failed to load : " + dorisStreamLoader.listToString(rowsBuffer)) - Thread.currentThread.interrupt() - throw new IOException("unable to flush; interrupted while doing another attempt", e) - } - } - } - - if(!rowsBuffer.isEmpty){ - logger.warn("Data that failed to load : " + dorisStreamLoader.listToString(rowsBuffer)) - throw new IOException(s"Failed to load data on BE: ${dorisStreamLoader.getLoadUrlStr} node and exceeded the max retry times.") - } - } - - } - - }) - new BaseRelation { - override def sqlContext: SQLContext = unsupportedException - - override def schema: StructType = unsupportedException - - override def needConversion: Boolean = unsupportedException - - override def sizeInBytes: Long = unsupportedException - - override def unhandledFilters(filters: Array[Filter]): Array[Filter] = unsupportedException - - private def unsupportedException = - throw new UnsupportedOperationException("BaseRelation from doris write operation is not usable.") - } - } - - override def createSink(sqlContext: SQLContext, parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = { - val sparkSettings = new SparkSettings(new SparkConf()) - sparkSettings.merge(Utils.params(parameters, logger).asJava) - new DorisStreamLoadSink(sqlContext, sparkSettings) - } -} - -object DorisSourceProvider { - val SHORT_NAME: String = "doris" -} \ No newline at end of file diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisStreamLoadSink.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisStreamLoadSink.scala deleted file mode 100644 index 6e736985ad..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisStreamLoadSink.scala +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.doris.spark.cfg.{ConfigurationOptions, SparkSettings} -import org.apache.doris.spark.{CachedDorisStreamLoadClient, DorisStreamLoad} -import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.execution.streaming.Sink -import org.apache.spark.sql.{DataFrame, SQLContext} -import org.slf4j.{Logger, LoggerFactory} -import java.io.IOException -import java.util - -import org.apache.doris.spark.rest.RestService - -import scala.util.control.Breaks - -private[sql] class DorisStreamLoadSink(sqlContext: SQLContext, settings: SparkSettings) extends Sink with Serializable { - - private val logger: Logger = LoggerFactory.getLogger(classOf[DorisStreamLoadSink].getName) - @volatile private var latestBatchId = -1L - val maxRowCount: Int = settings.getIntegerProperty(ConfigurationOptions.DORIS_SINK_BATCH_SIZE, ConfigurationOptions.SINK_BATCH_SIZE_DEFAULT) - val maxRetryTimes: Int = settings.getIntegerProperty(ConfigurationOptions.DORIS_SINK_MAX_RETRIES, ConfigurationOptions.SINK_MAX_RETRIES_DEFAULT) - val dorisStreamLoader: DorisStreamLoad = CachedDorisStreamLoadClient.getOrCreate(settings) - - override def addBatch(batchId: Long, data: DataFrame): Unit = { - if (batchId <= latestBatchId) { - logger.info(s"Skipping already committed batch $batchId") - } else { - write(data.queryExecution) - latestBatchId = batchId - } - } - - def write(queryExecution: QueryExecution): Unit = { - queryExecution.toRdd.foreachPartition(iter => { - val rowsBuffer: util.List[util.List[Object]] = new util.ArrayList[util.List[Object]]() - iter.foreach(row => { - val line: util.List[Object] = new util.ArrayList[Object](maxRowCount) - for (i <- 0 until row.numFields) { - val field = row.copy().getUTF8String(i) - line.add(field.asInstanceOf[AnyRef]) - } - rowsBuffer.add(line) - if (rowsBuffer.size > maxRowCount - 1) { - flush - } - }) - // flush buffer - if (!rowsBuffer.isEmpty) { - flush - } - - /** - * flush data to Doris and do retry when flush error - * - */ - def flush = { - val loop = new Breaks - loop.breakable { - - for (i <- 0 to maxRetryTimes) { - try { - dorisStreamLoader.load(rowsBuffer) - rowsBuffer.clear() - loop.break() - } - catch { - case e: Exception => - try { - logger.warn("Failed to load data on BE: {} node ", dorisStreamLoader.getLoadUrlStr) - //If the current BE node fails to execute Stream Load, randomly switch to other BE nodes and try again - dorisStreamLoader.setHostPort(RestService.randomBackendV2(settings,logger)) - Thread.sleep(1000 * i) - } catch { - case ex: InterruptedException => - logger.warn("Data that failed to load : " + dorisStreamLoader.listToString(rowsBuffer)) - Thread.currentThread.interrupt() - throw new IOException("unable to flush; interrupted while doing another attempt", e) - } - } - } - - if(!rowsBuffer.isEmpty){ - logger.warn("Data that failed to load : " + dorisStreamLoader.listToString(rowsBuffer)) - throw new IOException(s"Failed to load data on BE: ${dorisStreamLoader.getLoadUrlStr} node and exceeded the max retry times.") - } - } - } - }) - } - - override def toString: String = "DorisStreamLoadSink" -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisWriterOption.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisWriterOption.scala deleted file mode 100644 index 69238c732a..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisWriterOption.scala +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark.sql - -import org.apache.doris.spark.exception.DorisException - -class DorisWriterOption(val feHostPort: String ,val dbName: String,val tbName: String, - val user: String ,val password: String, - val maxRowCount: Long,val maxRetryTimes:Int) - -object DorisWriterOption{ - def apply(parameters: Map[String, String]): DorisWriterOption={ - val feHostPort: String = parameters.getOrElse(DorisWriterOptionKeys.feHostPort, throw new DorisException("feHostPort is empty")) - - val dbName: String = parameters.getOrElse(DorisWriterOptionKeys.dbName, throw new DorisException("dbName is empty")) - - val tbName: String = parameters.getOrElse(DorisWriterOptionKeys.tbName, throw new DorisException("tbName is empty")) - - val user: String = parameters.getOrElse(DorisWriterOptionKeys.user, throw new DorisException("user is empty")) - - val password: String = parameters.getOrElse(DorisWriterOptionKeys.password, throw new DorisException("password is empty")) - - val maxRowCount: Long = parameters.getOrElse(DorisWriterOptionKeys.maxRowCount, "1024").toLong - val maxRetryTimes: Int = parameters.getOrElse(DorisWriterOptionKeys.maxRetryTimes, "3").toInt - new DorisWriterOption(feHostPort, dbName, tbName, user, password, maxRowCount, maxRetryTimes) - } -} \ No newline at end of file diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisWriterOptionKeys.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisWriterOptionKeys.scala deleted file mode 100644 index 9cadd9f271..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/DorisWriterOptionKeys.scala +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -package org.apache.doris.spark.sql - -object DorisWriterOptionKeys { - val feHostPort="feHostPort" - val dbName="dbName" - val tbName="tbName" - val user="user" - val password="password" - val maxRowCount="maxRowCount" - val maxRetryTimes="maxRetryTimes" - -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRow.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRow.scala deleted file mode 100644 index 06f5ca302e..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRow.scala +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.sql.Row - -private[spark] class ScalaDorisRow(rowOrder: Seq[String]) extends Row { - lazy val values: ArrayBuffer[Any] = ArrayBuffer.fill(rowOrder.size)(null) - - /** No-arg constructor for Kryo serialization. */ - def this() = this(null) - - def iterator = values.iterator - - override def length: Int = values.length - - override def apply(i: Int): Any = values(i) - - override def get(i: Int): Any = values(i) - - override def isNullAt(i: Int): Boolean = values(i) == null - - override def getInt(i: Int): Int = getAs[Int](i) - - override def getLong(i: Int): Long = getAs[Long](i) - - override def getDouble(i: Int): Double = getAs[Double](i) - - override def getFloat(i: Int): Float = getAs[Float](i) - - override def getBoolean(i: Int): Boolean = getAs[Boolean](i) - - override def getShort(i: Int): Short = getAs[Short](i) - - override def getByte(i: Int): Byte = getAs[Byte](i) - - override def getString(i: Int): String = get(i).toString() - - override def copy(): Row = this - - override def toSeq = values.toSeq -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowRDD.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowRDD.scala deleted file mode 100644 index b31a54dc82..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowRDD.scala +++ /dev/null @@ -1,53 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_VALUE_READER_CLASS -import org.apache.doris.spark.cfg.Settings -import org.apache.doris.spark.rdd.{AbstractDorisRDD, AbstractDorisRDDIterator, DorisPartition} -import org.apache.doris.spark.rest.PartitionDefinition - -import org.apache.spark.{Partition, SparkContext, TaskContext} -import org.apache.spark.sql.Row -import org.apache.spark.sql.types.StructType - -private[spark] class ScalaDorisRowRDD( - sc: SparkContext, - params: Map[String, String] = Map.empty, - struct: StructType) - extends AbstractDorisRDD[Row](sc, params) { - - override def compute(split: Partition, context: TaskContext): ScalaDorisRowRDDIterator = { - new ScalaDorisRowRDDIterator(context, split.asInstanceOf[DorisPartition].dorisPartition, struct) - } -} - -private[spark] class ScalaDorisRowRDDIterator( - context: TaskContext, - partition: PartitionDefinition, - struct: StructType) - extends AbstractDorisRDDIterator[Row](context, partition) { - - override def initReader(settings: Settings) = { - settings.setProperty(DORIS_VALUE_READER_CLASS, classOf[ScalaDorisRowValueReader].getName) - } - - override def createValue(value: Object): Row = { - value.asInstanceOf[ScalaDorisRow] - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowValueReader.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowValueReader.scala deleted file mode 100644 index 5b0185403f..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/ScalaDorisRowValueReader.scala +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import scala.collection.JavaConverters._ -import org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_READ_FIELD -import org.apache.doris.spark.cfg.Settings -import org.apache.doris.spark.exception.ShouldNeverHappenException -import org.apache.doris.spark.rdd.ScalaValueReader -import org.apache.doris.spark.rest.PartitionDefinition -import org.apache.doris.spark.util.ErrorMessages.SHOULD_NOT_HAPPEN_MESSAGE -import org.slf4j.{Logger, LoggerFactory} - -class ScalaDorisRowValueReader( - partition: PartitionDefinition, - settings: Settings) - extends ScalaValueReader(partition, settings) { - - private val logger: Logger = LoggerFactory.getLogger(classOf[ScalaDorisRowValueReader].getName) - - val rowOrder: Seq[String] = settings.getProperty(DORIS_READ_FIELD).split(",") - - override def next: AnyRef = { - if (!hasNext) { - logger.error(SHOULD_NOT_HAPPEN_MESSAGE) - throw new ShouldNeverHappenException - } - val row: ScalaDorisRow = new ScalaDorisRow(rowOrder) - rowBatch.next.asScala.zipWithIndex.foreach{ - case (s, index) if index < row.values.size => row.values.update(index, s) - case _ => // nothing - } - row - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala deleted file mode 100644 index f59509280f..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/SchemaUtils.scala +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import scala.collection.JavaConverters._ - -import org.apache.doris.spark.cfg.Settings -import org.apache.doris.spark.exception.DorisException -import org.apache.doris.spark.rest.RestService -import org.apache.doris.spark.rest.models.{Field, Schema} -import org.apache.doris.thrift.TScanColumnDesc - -import org.apache.spark.sql.types._ - -import org.slf4j.LoggerFactory - -private[spark] object SchemaUtils { - private val logger = LoggerFactory.getLogger(SchemaUtils.getClass.getSimpleName.stripSuffix("$")) - - /** - * discover Doris table schema from Doris FE. - * @param cfg configuration - * @return Spark Catalyst StructType - */ - def discoverSchema(cfg: Settings): StructType = { - val schema = discoverSchemaFromFe(cfg) - convertToStruct(schema) - } - - /** - * discover Doris table schema from Doris FE. - * @param cfg configuration - * @return inner schema struct - */ - def discoverSchemaFromFe(cfg: Settings): Schema = { - RestService.getSchema(cfg, logger) - } - - /** - * convert inner schema struct to Spark Catalyst StructType - * @param schema inner schema - * @return Spark Catalyst StructType - */ - def convertToStruct(schema: Schema): StructType = { - var fields = List[StructField]() - schema.getProperties.asScala.foreach(f => - fields :+= DataTypes.createStructField(f.getName, getCatalystType(f.getType, f.getPrecision, f.getScale), true)) - DataTypes.createStructType(fields.asJava) - } - - /** - * translate Doris Type to Spark Catalyst type - * @param dorisType Doris type - * @param precision decimal precision - * @param scale decimal scale - * @return Spark Catalyst type - */ - def getCatalystType(dorisType: String, precision: Int, scale: Int): DataType = { - dorisType match { - case "NULL_TYPE" => DataTypes.NullType - case "BOOLEAN" => DataTypes.BooleanType - case "TINYINT" => DataTypes.ByteType - case "SMALLINT" => DataTypes.ShortType - case "INT" => DataTypes.IntegerType - case "BIGINT" => DataTypes.LongType - case "FLOAT" => DataTypes.FloatType - case "DOUBLE" => DataTypes.DoubleType - case "DATE" => DataTypes.StringType - case "DATETIME" => DataTypes.StringType - case "BINARY" => DataTypes.BinaryType - case "DECIMAL" => DecimalType(precision, scale) - case "CHAR" => DataTypes.StringType - case "LARGEINT" => DataTypes.StringType - case "VARCHAR" => DataTypes.StringType - case "DECIMALV2" => DecimalType(precision, scale) - case "TIME" => DataTypes.DoubleType - case "HLL" => - throw new DorisException("Unsupported type " + dorisType) - case _ => - throw new DorisException("Unrecognized Doris type " + dorisType) - } - } - - /** - * convert Doris return schema to inner schema struct. - * @param tscanColumnDescs Doris BE return schema - * @return inner schema struct - */ - def convertToSchema(tscanColumnDescs: Seq[TScanColumnDesc]): Schema = { - val schema = new Schema(tscanColumnDescs.length) - tscanColumnDescs.foreach(desc => schema.put(new Field(desc.getName, desc.getType.name, "", 0, 0, ""))) - schema - } -} diff --git a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/Utils.scala b/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/Utils.scala deleted file mode 100644 index 6b6664677d..0000000000 --- a/extension/spark-doris-connector/src/main/scala/org/apache/doris/spark/sql/Utils.scala +++ /dev/null @@ -1,155 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.commons.lang3.StringUtils -import org.apache.doris.spark.cfg.ConfigurationOptions -import org.apache.doris.spark.exception.DorisException -import org.apache.spark.sql.jdbc.JdbcDialect -import org.apache.spark.sql.sources._ -import org.slf4j.Logger - -import java.sql.{Date, Timestamp} - -private[sql] object Utils { - /** - * quote column name - * @param colName column name - * @return quoted column name - */ - def quote(colName: String): String = s"`$colName`" - - /** - * compile a filter to Doris FE filter format. - * @param filter filter to be compile - * @param dialect jdbc dialect to translate value to sql format - * @param inValueLengthLimit max length of in value array - * @return if Doris FE can handle this filter, return None if Doris FE can not handled it. - */ - def compileFilter(filter: Filter, dialect: JdbcDialect, inValueLengthLimit: Int): Option[String] = { - Option(filter match { - case EqualTo(attribute, value) => s"${quote(attribute)} = ${compileValue(value)}" - case GreaterThan(attribute, value) => s"${quote(attribute)} > ${compileValue(value)}" - case GreaterThanOrEqual(attribute, value) => s"${quote(attribute)} >= ${compileValue(value)}" - case LessThan(attribute, value) => s"${quote(attribute)} < ${compileValue(value)}" - case LessThanOrEqual(attribute, value) => s"${quote(attribute)} <= ${compileValue(value)}" - case In(attribute, values) => - if (values.isEmpty || values.length >= inValueLengthLimit) { - null - } else { - s"${quote(attribute)} in (${compileValue(values)})" - } - case IsNull(attribute) => s"${quote(attribute)} is null" - case IsNotNull(attribute) => s"${quote(attribute)} is not null" - case And(left, right) => - val and = Seq(left, right).flatMap(compileFilter(_, dialect, inValueLengthLimit)) - if (and.size == 2) { - and.map(p => s"($p)").mkString(" and ") - } else { - null - } - case Or(left, right) => - val or = Seq(left, right).flatMap(compileFilter(_, dialect, inValueLengthLimit)) - if (or.size == 2) { - or.map(p => s"($p)").mkString(" or ") - } else { - null - } - case _ => null - }) - } - - /** - * Escape special characters in SQL string literals. - * @param value The string to be escaped. - * @return Escaped string. - */ - private def escapeSql(value: String): String = - if (value == null) null else StringUtils.replace(value, "'", "''") - - /** - * Converts value to SQL expression. - * @param value The value to be converted. - * @return Converted value. - */ - private def compileValue(value: Any): Any = value match { - case stringValue: String => s"'${escapeSql(stringValue)}'" - case timestampValue: Timestamp => "'" + timestampValue + "'" - case dateValue: Date => "'" + dateValue + "'" - case arrayValue: Array[Any] => arrayValue.map(compileValue).mkString(", ") - case _ => value - } - - /** - * check parameters validation and process it. - * @param parameters parameters from rdd and spark conf - * @param logger slf4j logger - * @return processed parameters - */ - def params(parameters: Map[String, String], logger: Logger) = { - // '.' seems to be problematic when specifying the options - val dottedParams = parameters.map { case (k, v) => (k.replace('_', '.'), v)} - - val preferredTableIdentifier = dottedParams.get(ConfigurationOptions.DORIS_TABLE_IDENTIFIER) - .orElse(dottedParams.get(ConfigurationOptions.TABLE_IDENTIFIER)) - logger.debug(s"preferred Table Identifier is '$preferredTableIdentifier'.") - - // Convert simple parameters into internal properties, and prefix other parameters - // Convert password parameters from "password" into internal password properties - // reuse credentials mask method in spark ExternalCatalogUtils￿#maskCredentials - val processedParams = dottedParams.map { - case (ConfigurationOptions.DORIS_PASSWORD, _) => - logger.error(s"${ConfigurationOptions.DORIS_PASSWORD} cannot use in Doris Datasource.") - throw new DorisException(s"${ConfigurationOptions.DORIS_PASSWORD} cannot use in Doris Datasource," + - s" use 'password' option to set password.") - case (ConfigurationOptions.DORIS_USER, _) => - logger.error(s"${ConfigurationOptions.DORIS_USER} cannot use in Doris Datasource.") - throw new DorisException(s"${ConfigurationOptions.DORIS_USER} cannot use in Doris Datasource," + - s" use 'user' option to set user.") - case (k, v) => - if (k.startsWith("doris.")) (k, v) - else ("doris." + k, v) - }.map{ - case (ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD, _) => - logger.error(s"${ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD} cannot use in Doris Datasource.") - throw new DorisException(s"${ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD} cannot use in" + - s" Doris Datasource, use 'password' option to set password.") - case (ConfigurationOptions.DORIS_REQUEST_AUTH_USER, _) => - logger.error(s"${ConfigurationOptions.DORIS_REQUEST_AUTH_USER} cannot use in Doris Datasource.") - throw new DorisException(s"${ConfigurationOptions.DORIS_REQUEST_AUTH_USER} cannot use in" + - s" Doris Datasource, use 'user' option to set user.") - case (ConfigurationOptions.DORIS_PASSWORD, v) => - (ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD, v) - case (ConfigurationOptions.DORIS_USER, v) => - (ConfigurationOptions.DORIS_REQUEST_AUTH_USER, v) - case (k, v) => (k, v) - } - - // Set the preferred resource if it was specified originally - val finalParams = preferredTableIdentifier match { - case Some(tableIdentifier) => processedParams + (ConfigurationOptions.DORIS_TABLE_IDENTIFIER -> tableIdentifier) - case None => processedParams - } - - // validate path is available - finalParams.getOrElse(ConfigurationOptions.DORIS_TABLE_IDENTIFIER, - throw new DorisException("table identifier must be specified for doris table identifier.")) - - finalParams - } -} diff --git a/extension/spark-doris-connector/src/main/thrift/doris/DorisExternalService.thrift b/extension/spark-doris-connector/src/main/thrift/doris/DorisExternalService.thrift deleted file mode 100644 index c169874887..0000000000 --- a/extension/spark-doris-connector/src/main/thrift/doris/DorisExternalService.thrift +++ /dev/null @@ -1,122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -namespace java org.apache.doris.thrift -namespace cpp doris - -include "Types.thrift" -include "Status.thrift" - - -// Parameters to open(). -struct TScanOpenParams { - - 1: required string cluster - - 2: required string database - - 3: required string table - - // tablets to scan - 4: required list tablet_ids - - // base64 encoded binary plan fragment - 5: required string opaqued_query_plan - - // A string specified for the table that is passed to the external data source. - // Always set, may be an empty string. - 6: optional i32 batch_size - - // reserved params for use - 7: optional map properties - - // The query limit, if specified. - 8: optional i64 limit - - // The authenticated user name. Always set. - // maybe usefullless - 9: optional string user - - 10: optional string passwd - // max keep alive time min - 11: optional i16 keep_alive_min - - 12: optional i32 query_timeout - - // memory limit for a single query - 13: optional i64 mem_limit -} - -struct TScanColumnDesc { - // The column name - 1: optional string name - // The column type. Always set. - 2: optional Types.TPrimitiveType type -} - -// Returned by open(). -struct TScanOpenResult { - 1: required Status.TStatus status - // An opaque context_id used in subsequent getNext()/close() calls. Required. - 2: optional string context_id - // selected fields - 3: optional list selected_columns - -} - -// Parameters to getNext() -struct TScanNextBatchParams { - // The opaque handle returned by the previous open() call. Always set. - 1: optional string context_id // doris olap engine context id - 2: optional i64 offset // doris should check the offset to prevent duplicate rpc calls -} - -// Returned by getNext(). -struct TScanBatchResult { - 1: required Status.TStatus status - - // If true, reached the end of the result stream; subsequent calls to - // getNext() won’t return any more results. Required. - 2: optional bool eos - - // A batch of rows of arrow format to return, if any exist. The number of rows in the batch - // should be less than or equal to the batch_size specified in TOpenParams. - 3: optional binary rows -} - -// Parameters to close() -struct TScanCloseParams { - // The opaque handle returned by the previous open() call. Always set. - 1: optional string context_id -} - -// Returned by close(). -struct TScanCloseResult { - 1: required Status.TStatus status -} - -// scan service expose ability of scanning data ability to other compute system -service TDorisExternalService { - // doris will build a scan context for this session, context_id returned if success - TScanOpenResult open_scanner(1: TScanOpenParams params); - - // return the batch_size of data - TScanBatchResult get_next(1: TScanNextBatchParams params); - - // release the context resource associated with the context_id - TScanCloseResult close_scanner(1: TScanCloseParams params); -} diff --git a/extension/spark-doris-connector/src/main/thrift/doris/Status.thrift b/extension/spark-doris-connector/src/main/thrift/doris/Status.thrift deleted file mode 100644 index 2966a8a535..0000000000 --- a/extension/spark-doris-connector/src/main/thrift/doris/Status.thrift +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -namespace cpp doris -namespace java org.apache.doris.thrift - -enum TStatusCode { - OK, - CANCELLED, - ANALYSIS_ERROR, - NOT_IMPLEMENTED_ERROR, - RUNTIME_ERROR, - MEM_LIMIT_EXCEEDED, - INTERNAL_ERROR, - THRIFT_RPC_ERROR, - TIMEOUT, - KUDU_NOT_ENABLED, // Deprecated - KUDU_NOT_SUPPORTED_ON_OS, // Deprecated - MEM_ALLOC_FAILED, - BUFFER_ALLOCATION_FAILED, - MINIMUM_RESERVATION_UNAVAILABLE, - PUBLISH_TIMEOUT, - LABEL_ALREADY_EXISTS, - ES_INTERNAL_ERROR, - ES_INDEX_NOT_FOUND, - ES_SHARD_NOT_FOUND, - ES_INVALID_CONTEXTID, - ES_INVALID_OFFSET, - ES_REQUEST_ERROR, - - // end of file - END_OF_FILE = 30, - NOT_FOUND = 31, - CORRUPTION = 32, - INVALID_ARGUMENT = 33, - IO_ERROR = 34, - ALREADY_EXIST = 35, - NETWORK_ERROR = 36, - ILLEGAL_STATE = 37, - NOT_AUTHORIZED = 38, - ABORTED = 39, - REMOTE_ERROR = 40, - SERVICE_UNAVAILABLE = 41, - UNINITIALIZED = 42, - CONFIGURATION_ERROR = 43, - INCOMPLETE = 44 -} - -struct TStatus { - 1: required TStatusCode status_code - 2: optional list error_msgs -} diff --git a/extension/spark-doris-connector/src/main/thrift/doris/Types.thrift b/extension/spark-doris-connector/src/main/thrift/doris/Types.thrift deleted file mode 100644 index 44ce6062f2..0000000000 --- a/extension/spark-doris-connector/src/main/thrift/doris/Types.thrift +++ /dev/null @@ -1,376 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -namespace cpp doris -namespace java org.apache.doris.thrift - - -typedef i64 TTimestamp -typedef i32 TPlanNodeId -typedef i32 TTupleId -typedef i32 TSlotId -typedef i64 TTableId -typedef i64 TTabletId -typedef i64 TVersion -typedef i64 TVersionHash -typedef i32 TSchemaHash -typedef i32 TPort -typedef i64 TCount -typedef i64 TSize -typedef i32 TClusterId -typedef i64 TEpoch - -// add for real time load, partitionid is not defined previously, define it here -typedef i64 TTransactionId -typedef i64 TPartitionId - -enum TStorageType { - ROW, - COLUMN, -} - -enum TStorageMedium { - HDD, - SSD, -} - -enum TVarType { - SESSION, - GLOBAL -} - -enum TPrimitiveType { - INVALID_TYPE, - NULL_TYPE, - BOOLEAN, - TINYINT, - SMALLINT, - INT, - BIGINT, - FLOAT, - DOUBLE, - DATE, - DATETIME, - BINARY, - DECIMAL, - // CHAR(n). Currently only supported in UDAs - CHAR, - LARGEINT, - VARCHAR, - HLL, - DECIMALV2, - TIME, - OBJECT, - ARRAY, - MAP, - STRUCT, - STRING, - ALL -} - -enum TTypeNodeType { - SCALAR, - ARRAY, - MAP, - STRUCT -} - -struct TScalarType { - 1: required TPrimitiveType type - - // Only set if type == CHAR or type == VARCHAR - 2: optional i32 len - - // Only set for DECIMAL - 3: optional i32 precision - 4: optional i32 scale -} - -// Represents a field in a STRUCT type. -// TODO: Model column stats for struct fields. -struct TStructField { - 1: required string name - 2: optional string comment -} - -struct TTypeNode { - 1: required TTypeNodeType type - - // only set for scalar types - 2: optional TScalarType scalar_type - - // only used for structs; has struct_fields.size() corresponding child types - 3: optional list struct_fields -} - -// A flattened representation of a tree of column types obtained by depth-first -// traversal. Complex types such as map, array and struct have child types corresponding -// to the map key/value, array item type, and struct fields, respectively. -// For scalar types the list contains only a single node. -// Note: We cannot rename this to TType because it conflicts with Thrift's internal TType -// and the generated Python thrift files will not work. -// Note: TTypeDesc in impala is TColumnType, but we already use TColumnType, so we name this -// to TTypeDesc. In future, we merge these two to one -struct TTypeDesc { - 1: list types -} - -enum TAggregationType { - SUM, - MAX, - MIN, - REPLACE, - HLL_UNION, - NONE -} - -enum TPushType { - LOAD, - DELETE, - LOAD_DELETE -} - -enum TTaskType { - CREATE, - DROP, - PUSH, - CLONE, - STORAGE_MEDIUM_MIGRATE, - ROLLUP, - SCHEMA_CHANGE, - CANCEL_DELETE, // Deprecated - MAKE_SNAPSHOT, - RELEASE_SNAPSHOT, - CHECK_CONSISTENCY, - UPLOAD, - DOWNLOAD, - CLEAR_REMOTE_FILE, - MOVE - REALTIME_PUSH, - PUBLISH_VERSION, - CLEAR_ALTER_TASK, - CLEAR_TRANSACTION_TASK, - RECOVER_TABLET, - STREAM_LOAD, - UPDATE_TABLET_META_INFO, - ALTER_TASK -} - -enum TStmtType { - QUERY, - DDL, // Data definition, e.g. CREATE TABLE (includes read-only functions e.g. SHOW) - DML, // Data modification e.g. INSERT - EXPLAIN // EXPLAIN -} - -// level of verboseness for "explain" output -// TODO: should this go somewhere else? -enum TExplainLevel { - NORMAL, - VERBOSE -} - -struct TColumnType { - 1: required TPrimitiveType type - // Only set if type == CHAR_ARRAY - 2: optional i32 len - 3: optional i32 index_len - 4: optional i32 precision - 5: optional i32 scale -} - -// A TNetworkAddress is the standard host, port representation of a -// network address. The hostname field must be resolvable to an IPv4 -// address. -struct TNetworkAddress { - 1: required string hostname - 2: required i32 port -} - -// Wire format for UniqueId -struct TUniqueId { - 1: required i64 hi - 2: required i64 lo -} - -enum QueryState { - CREATED, - INITIALIZED, - COMPILED, - RUNNING, - FINISHED, - EXCEPTION -} - -enum TFunctionType { - SCALAR, - AGGREGATE, -} - -enum TFunctionBinaryType { - // Palo builtin. We can either run this interpreted or via codegen - // depending on the query option. - BUILTIN, - - // Hive UDFs, loaded from *.jar - HIVE, - - // Native-interface, precompiled UDFs loaded from *.so - NATIVE, - - // Native-interface, precompiled to IR; loaded from *.ll - IR, -} - -// Represents a fully qualified function name. -struct TFunctionName { - // Name of the function's parent database. Not set if in global - // namespace (e.g. builtins) - 1: optional string db_name - - // Name of the function - 2: required string function_name -} - -struct TScalarFunction { - // Symbol for the function - 1: required string symbol - 2: optional string prepare_fn_symbol - 3: optional string close_fn_symbol -} - -struct TAggregateFunction { - 1: required TTypeDesc intermediate_type - 2: optional string update_fn_symbol - 3: optional string init_fn_symbol - 4: optional string serialize_fn_symbol - 5: optional string merge_fn_symbol - 6: optional string finalize_fn_symbol - 8: optional string get_value_fn_symbol - 9: optional string remove_fn_symbol - 10: optional bool is_analytic_only_fn = false -} - -// Represents a function in the Catalog. -struct TFunction { - // Fully qualified function name. - 1: required TFunctionName name - - // Type of the udf. e.g. hive, native, ir - 2: required TFunctionBinaryType binary_type - - // The types of the arguments to the function - 3: required list arg_types - - // Return type for the function. - 4: required TTypeDesc ret_type - - // If true, this function takes var args. - 5: required bool has_var_args - - // Optional comment to attach to the function - 6: optional string comment - - 7: optional string signature - - // HDFS path for the function binary. This binary must exist at the time the - // function is created. - 8: optional string hdfs_location - - // One of these should be set. - 9: optional TScalarFunction scalar_fn - 10: optional TAggregateFunction aggregate_fn - - 11: optional i64 id - 12: optional string checksum -} - -enum TLoadJobState { - PENDING, - ETL, - LOADING, - FINISHED, - CANCELLED -} - -enum TEtlState { - RUNNING, - FINISHED, - CANCELLED, - UNKNOWN -} - -enum TTableType { - MYSQL_TABLE, - OLAP_TABLE, - SCHEMA_TABLE, - KUDU_TABLE, // Deprecated - BROKER_TABLE, - ES_TABLE -} - -enum TKeysType { - PRIMARY_KEYS, - DUP_KEYS, - UNIQUE_KEYS, - AGG_KEYS -} - -enum TPriority { - NORMAL, - HIGH -} - -struct TBackend { - 1: required string host - 2: required TPort be_port - 3: required TPort http_port -} - -struct TResourceInfo { - 1: required string user - 2: required string group -} - -enum TExportState { - RUNNING, - FINISHED, - CANCELLED, - UNKNOWN -} - -enum TFileType { - FILE_LOCAL, - FILE_BROKER, - FILE_STREAM, // file content is streaming in the buffer -} - -struct TTabletCommitInfo { - 1: required i64 tabletId - 2: required i64 backendId -} - -enum TLoadType { - MANUL_LOAD, - ROUTINE_LOAD, - MINI_LOAD -} - -enum TLoadSourceType { - RAW, - KAFKA, -} diff --git a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestPartitionDefinition.java b/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestPartitionDefinition.java deleted file mode 100644 index 0bfa3aa5d6..0000000000 --- a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestPartitionDefinition.java +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest; - -import java.util.HashSet; -import java.util.Set; - -import org.junit.Assert; -import org.junit.Test; - -public class TestPartitionDefinition { - private static final String DATABASE_1 = "database1"; - private static final String TABLE_1 = "table1"; - private static final String BE_1 = "be1"; - private static final String QUERY_PLAN_1 = "queryPlan1"; - private static final long TABLET_ID_1 = 1L; - - private static final String DATABASE_2 = "database2"; - private static final String TABLE_2 = "table2"; - private static final String BE_2 = "be2"; - private static final String QUERY_PLAN_2 = "queryPlan2"; - private static final long TABLET_ID_2 = 2L; - - @Test - public void testCompare() throws Exception { - Set tabletSet1 = new HashSet<>(); - tabletSet1.add(TABLET_ID_1); - Set tabletSet2 = new HashSet<>(); - tabletSet2.add(TABLET_ID_2); - Set tabletSet3 = new HashSet<>(); - tabletSet3.add(TABLET_ID_1); - tabletSet3.add(TABLET_ID_2); - - PartitionDefinition pd1 = new PartitionDefinition( - DATABASE_1, TABLE_1, null, BE_1, tabletSet1, QUERY_PLAN_1); - PartitionDefinition pd3 = new PartitionDefinition( - DATABASE_2, TABLE_1, null, BE_1, tabletSet1, QUERY_PLAN_1); - PartitionDefinition pd4 = new PartitionDefinition( - DATABASE_1, TABLE_2, null, BE_1, tabletSet1, QUERY_PLAN_1); - PartitionDefinition pd5 = new PartitionDefinition( - DATABASE_1, TABLE_1, null, BE_2, tabletSet1, QUERY_PLAN_1); - PartitionDefinition pd6 = new PartitionDefinition( - DATABASE_1, TABLE_1, null, BE_1, tabletSet2, QUERY_PLAN_1); - PartitionDefinition pd7 = new PartitionDefinition( - DATABASE_1, TABLE_1, null, BE_1, tabletSet3, QUERY_PLAN_1); - PartitionDefinition pd8 = new PartitionDefinition( - DATABASE_1, TABLE_1, null, BE_1, tabletSet1, QUERY_PLAN_2); - Assert.assertTrue(pd1.compareTo(pd3) < 0); - Assert.assertTrue(pd1.compareTo(pd4) < 0); - Assert.assertTrue(pd1.compareTo(pd5) < 0); - Assert.assertTrue(pd1.compareTo(pd6) < 0); - Assert.assertTrue(pd1.compareTo(pd7) < 0); - Assert.assertTrue(pd1.compareTo(pd8) < 0); - } -} diff --git a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java b/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java deleted file mode 100644 index 800459012a..0000000000 --- a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java +++ /dev/null @@ -1,327 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest; - -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_FENODES; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLET_SIZE; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_DEFAULT; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLET_SIZE_MIN; -import static org.apache.doris.spark.cfg.ConfigurationOptions.DORIS_TABLE_IDENTIFIER; -import static org.hamcrest.core.StringStartsWith.startsWith; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.doris.spark.cfg.PropertiesSettings; -import org.apache.doris.spark.cfg.Settings; -import org.apache.doris.spark.exception.DorisException; -import org.apache.doris.spark.exception.IllegalArgumentException; -import org.apache.doris.spark.rest.models.BackendRow; -import org.apache.doris.spark.rest.models.BackendV2; -import org.apache.doris.spark.rest.models.Field; -import org.apache.doris.spark.rest.models.QueryPlan; -import org.apache.doris.spark.rest.models.Schema; -import org.apache.doris.spark.rest.models.Tablet; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import jdk.nashorn.internal.ir.annotations.Ignore; - -public class TestRestService { - private final static Logger logger = LoggerFactory.getLogger(TestRestService.class); - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testParseIdentifier() throws Exception { - String validIdentifier = "a.b"; - String[] names = RestService.parseIdentifier(validIdentifier, logger); - Assert.assertEquals(2, names.length); - Assert.assertEquals("a", names[0]); - Assert.assertEquals("b", names[1]); - - String invalidIdentifier1 = "a"; - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("argument 'table.identifier' is illegal, value is '" + invalidIdentifier1 + "'."); - RestService.parseIdentifier(invalidIdentifier1, logger); - - String invalidIdentifier3 = "a.b.c"; - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("argument 'table.identifier' is illegal, value is '" + invalidIdentifier3 + "'."); - RestService.parseIdentifier(invalidIdentifier3, logger); - - String emptyIdentifier = ""; - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("argument 'table.identifier' is illegal, value is '" + emptyIdentifier + "'."); - RestService.parseIdentifier(emptyIdentifier, logger); - - String nullIdentifier = null; - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("argument 'table.identifier' is illegal, value is '" + nullIdentifier + "'."); - RestService.parseIdentifier(nullIdentifier, logger); - } - - @Test - public void testChoiceFe() throws Exception { - String validFes = "1,2 , 3"; - String fe = RestService.randomEndpoint(validFes, logger); - List feNodes = new ArrayList<>(3); - feNodes.add("1"); - feNodes.add("2"); - feNodes.add("3"); - Assert.assertTrue(feNodes.contains(fe)); - - String emptyFes = ""; - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("argument 'fenodes' is illegal, value is '" + emptyFes + "'."); - RestService.randomEndpoint(emptyFes, logger); - - String nullFes = null; - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("argument 'fenodes' is illegal, value is '" + nullFes + "'."); - RestService.randomEndpoint(nullFes, logger); - } - - @Test - public void testGetUriStr() throws Exception { - Settings settings = new PropertiesSettings(); - settings.setProperty(DORIS_TABLE_IDENTIFIER, "a.b"); - settings.setProperty(DORIS_FENODES, "fe"); - - String expected = "http://fe/api/a/b/"; - Assert.assertEquals(expected, RestService.getUriStr(settings, logger)); - } - - @Test - public void testFeResponseToSchema() throws Exception { - String res = "{\"properties\":[{\"type\":\"TINYINT\",\"name\":\"k1\",\"comment\":\"\",\"aggregation_type\":\"\"},{\"name\":\"k5\"," - + "\"scale\":\"0\",\"comment\":\"\",\"type\":\"DECIMALV2\",\"precision\":\"9\",\"aggregation_type\":\"\"}],\"status\":200}"; - Schema expected = new Schema(); - expected.setStatus(200); - Field k1 = new Field("k1", "TINYINT", "", 0, 0, ""); - Field k5 = new Field("k5", "DECIMALV2", "", 9, 0, ""); - expected.put(k1); - expected.put(k5); - Assert.assertEquals(expected, RestService.parseSchema(res, logger)); - - String notJsonRes = "not json"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Doris FE's response is not a json. res:")); - RestService.parseSchema(notJsonRes, logger); - - String notSchemaRes = "{\"property\":[{\"type\":\"TINYINT\",\"name\":\"k1\",\"comment\":\"\"}," - + "{\"name\":\"k5\",\"scale\":\"0\",\"comment\":\"\",\"type\":\"DECIMALV2\",\"precision\":\"9\"}]," - + "\"status\":200}"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Doris FE's response cannot map to schema. res: ")); - RestService.parseSchema(notSchemaRes, logger); - - String notOkRes = "{\"properties\":[{\"type\":\"TINYINT\",\"name\":\"k1\",\"comment\":\"\"},{\"name\":\"k5\"," - + "\"scale\":\"0\",\"comment\":\"\",\"type\":\"DECIMALV2\",\"precision\":\"9\"}],\"status\":20}"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Doris FE's response is not OK, status is ")); - RestService.parseSchema(notOkRes, logger); - } - - @Test - public void testFeResponseToQueryPlan() throws Exception { - String res = "{\"partitions\":{" - + "\"11017\":{\"routings\":[\"be1\",\"be2\"],\"version\":3,\"versionHash\":1,\"schemaHash\":1}," - + "\"11019\":{\"routings\":[\"be3\",\"be4\"],\"version\":3,\"versionHash\":1,\"schemaHash\":1}}," - + "\"opaqued_query_plan\":\"query_plan\",\"status\":200}"; - - List routings11017 = new ArrayList<>(2); - routings11017.add("be1"); - routings11017.add("be2"); - - Tablet tablet11017 = new Tablet(); - tablet11017.setSchemaHash(1); - tablet11017.setVersionHash(1); - tablet11017.setVersion(3); - tablet11017.setRoutings(routings11017); - - List routings11019 = new ArrayList<>(2); - routings11019.add("be3"); - routings11019.add("be4"); - - Tablet tablet11019 = new Tablet(); - tablet11019.setSchemaHash(1); - tablet11019.setVersionHash(1); - tablet11019.setVersion(3); - tablet11019.setRoutings(routings11019); - - Map partitions = new LinkedHashMap<>(); - partitions.put("11017", tablet11017); - partitions.put("11019", tablet11019); - - QueryPlan expected = new QueryPlan(); - expected.setPartitions(partitions); - expected.setStatus(200); - expected.setOpaqued_query_plan("query_plan"); - - QueryPlan actual = RestService.getQueryPlan(res, logger); - Assert.assertEquals(expected, actual); - - String notJsonRes = "not json"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Doris FE's response is not a json. res:")); - RestService.parseSchema(notJsonRes, logger); - - String notQueryPlanRes = "{\"hello\": \"world\"}"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Doris FE's response cannot map to schema. res: ")); - RestService.parseSchema(notQueryPlanRes, logger); - - String notOkRes = "{\"partitions\":{\"11017\":{\"routings\":[\"be1\",\"be2\"],\"version\":3," - + "\"versionHash\":1,\"schemaHash\":1}},\"opaqued_query_plan\":\"queryPlan\",\"status\":20}"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Doris FE's response is not OK, status is ")); - RestService.parseSchema(notOkRes, logger); - } - - @Test - public void testSelectTabletBe() throws Exception { - String res = "{\"partitions\":{" - + "\"11017\":{\"routings\":[\"be1\",\"be2\"],\"version\":3,\"versionHash\":1,\"schemaHash\":1}," - + "\"11019\":{\"routings\":[\"be3\",\"be4\"],\"version\":3,\"versionHash\":1,\"schemaHash\":1}," - + "\"11021\":{\"routings\":[\"be3\"],\"version\":3,\"versionHash\":1,\"schemaHash\":1}}," - + "\"opaqued_query_plan\":\"query_plan\",\"status\":200}"; - - QueryPlan queryPlan = RestService.getQueryPlan(res, logger); - - List be1Tablet = new ArrayList<>(); - be1Tablet.add(11017L); - List be3Tablet = new ArrayList<>(); - be3Tablet.add(11019L); - be3Tablet.add(11021L); - Map> expected = new HashMap<>(); - expected.put("be1", be1Tablet); - expected.put("be3", be3Tablet); - - Assert.assertEquals(expected, RestService.selectBeForTablet(queryPlan, logger)); - - String noBeRes = "{\"partitions\":{" - + "\"11021\":{\"routings\":[],\"version\":3,\"versionHash\":1,\"schemaHash\":1}}," - + "\"opaqued_query_plan\":\"query_plan\",\"status\":200}"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Cannot choice Doris BE for tablet")); - RestService.selectBeForTablet(RestService.getQueryPlan(noBeRes, logger), logger); - - String notNumberRes = "{\"partitions\":{" - + "\"11021xxx\":{\"routings\":[\"be1\"],\"version\":3,\"versionHash\":1,\"schemaHash\":1}}," - + "\"opaqued_query_plan\":\"query_plan\",\"status\":200}"; - thrown.expect(DorisException.class); - thrown.expectMessage(startsWith("Parse tablet id ")); - RestService.selectBeForTablet(RestService.getQueryPlan(noBeRes, logger), logger); - } - - @Test - public void testGetTabletSize() { - Settings settings = new PropertiesSettings(); - Assert.assertEquals(DORIS_TABLET_SIZE_DEFAULT, RestService.tabletCountLimitForOnePartition(settings, logger)); - - settings.setProperty(DORIS_TABLET_SIZE, "xx"); - Assert.assertEquals(DORIS_TABLET_SIZE_DEFAULT, RestService.tabletCountLimitForOnePartition(settings, logger)); - - settings.setProperty(DORIS_TABLET_SIZE, "10"); - Assert.assertEquals(10, RestService.tabletCountLimitForOnePartition(settings, logger)); - - settings.setProperty(DORIS_TABLET_SIZE, "1"); - Assert.assertEquals(DORIS_TABLET_SIZE_MIN, RestService.tabletCountLimitForOnePartition(settings, logger)); - } - - @Test - public void testTabletsMapToPartition() throws Exception { - List tablets1 = new ArrayList<>(); - tablets1.add(1L); - tablets1.add(2L); - List tablets2 = new ArrayList<>(); - tablets2.add(3L); - tablets2.add(4L); - Map> beToTablets = new HashMap<>(); - beToTablets.put("be1", tablets1); - beToTablets.put("be2", tablets2); - - Settings settings = new PropertiesSettings(); - String opaquedQueryPlan = "query_plan"; - String cluster = "c"; - String database = "d"; - String table = "t"; - - Set be1Tablet = new HashSet<>(); - be1Tablet.add(1L); - be1Tablet.add(2L); - PartitionDefinition pd1 = new PartitionDefinition( - database, table, settings, "be1", be1Tablet, opaquedQueryPlan); - - Set be2Tablet = new HashSet<>(); - be2Tablet.add(3L); - be2Tablet.add(4L); - PartitionDefinition pd2 = new PartitionDefinition( - database, table, settings, "be2", be2Tablet, opaquedQueryPlan); - - List expected = new ArrayList<>(); - expected.add(pd1); - expected.add(pd2); - Collections.sort(expected); - - List actual = RestService.tabletsMapToPartition( - settings, beToTablets, opaquedQueryPlan, database, table, logger); - Collections.sort(actual); - - Assert.assertEquals(expected, actual); - } - - @Deprecated - @Ignore - public void testParseBackend() throws Exception { - String response = "{\"href_columns\":[\"BackendId\"],\"parent_url\":\"/rest/v1/system?path=/\"," + - "\"column_names\":[\"BackendId\",\"Cluster\",\"IP\",\"HostName\",\"HeartbeatPort\",\"BePort\"," + - "\"HttpPort\",\"BrpcPort\",\"LastStartTime\",\"LastHeartbeat\",\"Alive\",\"SystemDecommissioned\"," + - "\"ClusterDecommissioned\",\"TabletNum\",\"DataUsedCapacity\",\"AvailCapacity\",\"TotalCapacity\"," + - "\"UsedPct\",\"MaxDiskUsedPct\",\"Tag\",\"ErrMsg\",\"Version\",\"Status\"],\"rows\":[{\"HttpPort\":" + - "\"8040\",\"Status\":\"{\\\"lastSuccessReportTabletsTime\\\":\\\"N/A\\\",\\\"lastStreamLoadTime\\\":" + - "-1}\",\"SystemDecommissioned\":\"false\",\"LastHeartbeat\":\"\\\\N\",\"DataUsedCapacity\":\"0.000 " + - "\",\"ErrMsg\":\"\",\"IP\":\"127.0.0.1\",\"UsedPct\":\"0.00 %\",\"__hrefPaths\":[\"/rest/v1/system?" + - "path=//backends/10002\"],\"Cluster\":\"default_cluster\",\"Alive\":\"true\",\"MaxDiskUsedPct\":" + - "\"0.00 %\",\"BrpcPort\":\"-1\",\"BePort\":\"-1\",\"ClusterDecommissioned\":\"false\"," + - "\"AvailCapacity\":\"1.000 B\",\"Version\":\"\",\"BackendId\":\"10002\",\"HeartbeatPort\":\"9050\"," + - "\"LastStartTime\":\"\\\\N\",\"TabletNum\":\"0\",\"TotalCapacity\":\"0.000 \",\"Tag\":" + - "\"{\\\"location\\\" : \\\"default\\\"}\",\"HostName\":\"localhost\"}]}"; - List backendRows = RestService.parseBackend(response, logger); - Assert.assertTrue(backendRows != null && !backendRows.isEmpty()); - } - - @Test - public void testParseBackendV2() throws Exception { - String response = "{\"backends\":[{\"ip\":\"192.168.1.1\",\"http_port\":8042,\"is_alive\":true}, {\"ip\":\"192.168.1.2\",\"http_port\":8042,\"is_alive\":true}]}"; - List backendRows = RestService.parseBackendV2(response, logger); - Assert.assertEquals(2, backendRows.size()); - } -} diff --git a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/models/TestSchema.java b/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/models/TestSchema.java deleted file mode 100644 index ba674d547d..0000000000 --- a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/models/TestSchema.java +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.rest.models; - -import org.junit.Assert; -import org.junit.Test; -import org.junit.Rule; -import org.junit.rules.ExpectedException; - -public class TestSchema { - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testPutGet() { - Schema ts = new Schema(1); - Field f = new Field(); - ts.put(f); - Assert.assertEquals(f, ts.get(0)); - - thrown.expect(IndexOutOfBoundsException.class); - thrown.expectMessage("Index: 1, Fields size:1"); - ts.get(1); - } -} diff --git a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/serialization/TestRouting.java b/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/serialization/TestRouting.java deleted file mode 100644 index 4309bbf4b1..0000000000 --- a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/serialization/TestRouting.java +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.serialization; - -import static org.hamcrest.core.StringStartsWith.startsWith; - -import org.apache.doris.spark.exception.IllegalArgumentException; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - - -public class TestRouting { - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testRouting() throws Exception { - Routing r1 = new Routing("10.11.12.13:1234"); - Assert.assertEquals("10.11.12.13", r1.getHost()); - Assert.assertEquals(1234, r1.getPort()); - - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage(startsWith("argument ")); - new Routing("10.11.12.13:wxyz"); - - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage(startsWith("Parse ")); - new Routing("10.11.12.13"); - } -} diff --git a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/serialization/TestRowBatch.java b/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/serialization/TestRowBatch.java deleted file mode 100644 index ff654805f4..0000000000 --- a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/serialization/TestRowBatch.java +++ /dev/null @@ -1,440 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.serialization; - -import static org.hamcrest.core.StringStartsWith.startsWith; - -import java.io.ByteArrayOutputStream; -import java.math.BigDecimal; -import java.util.Arrays; -import java.util.List; -import java.util.NoSuchElementException; - -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.doris.spark.rest.RestService; -import org.apache.doris.spark.rest.models.Schema; -import org.apache.doris.thrift.TScanBatchResult; -import org.apache.doris.thrift.TStatus; -import org.apache.doris.thrift.TStatusCode; -import org.apache.spark.sql.types.Decimal; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.ImmutableList; - -public class TestRowBatch { - private final static Logger logger = LoggerFactory.getLogger(TestRowBatch.class); - - @Rule - public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testRowBatch() throws Exception { - // schema - ImmutableList.Builder childrenBuilder = ImmutableList.builder(); - childrenBuilder.add(new Field("k0", FieldType.nullable(new ArrowType.Bool()), null)); - childrenBuilder.add(new Field("k1", FieldType.nullable(new ArrowType.Int(8, true)), null)); - childrenBuilder.add(new Field("k2", FieldType.nullable(new ArrowType.Int(16, true)), null)); - childrenBuilder.add(new Field("k3", FieldType.nullable(new ArrowType.Int(32, true)), null)); - childrenBuilder.add(new Field("k4", FieldType.nullable(new ArrowType.Int(64, true)), null)); - childrenBuilder.add(new Field("k9", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null)); - childrenBuilder.add(new Field("k8", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null)); - childrenBuilder.add(new Field("k10", FieldType.nullable(new ArrowType.Utf8()), null)); - childrenBuilder.add(new Field("k11", FieldType.nullable(new ArrowType.Utf8()), null)); - childrenBuilder.add(new Field("k5", FieldType.nullable(new ArrowType.Utf8()), null)); - childrenBuilder.add(new Field("k6", FieldType.nullable(new ArrowType.Utf8()), null)); - - VectorSchemaRoot root = VectorSchemaRoot.create( - new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null), - new RootAllocator(Integer.MAX_VALUE)); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - outputStream); - - arrowStreamWriter.start(); - root.setRowCount(3); - - FieldVector vector = root.getVector("k0"); - BitVector bitVector = (BitVector)vector; - bitVector.setInitialCapacity(3); - bitVector.allocateNew(3); - bitVector.setSafe(0, 1); - bitVector.setSafe(1, 0); - bitVector.setSafe(2, 1); - vector.setValueCount(3); - - vector = root.getVector("k1"); - TinyIntVector tinyIntVector = (TinyIntVector)vector; - tinyIntVector.setInitialCapacity(3); - tinyIntVector.allocateNew(3); - tinyIntVector.setSafe(0, 1); - tinyIntVector.setSafe(1, 2); - tinyIntVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k2"); - SmallIntVector smallIntVector = (SmallIntVector)vector; - smallIntVector.setInitialCapacity(3); - smallIntVector.allocateNew(3); - smallIntVector.setSafe(0, 1); - smallIntVector.setSafe(1, 2); - smallIntVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k3"); - IntVector intVector = (IntVector)vector; - intVector.setInitialCapacity(3); - intVector.allocateNew(3); - intVector.setSafe(0, 1); - intVector.setNull(1); - intVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k4"); - BigIntVector bigIntVector = (BigIntVector)vector; - bigIntVector.setInitialCapacity(3); - bigIntVector.allocateNew(3); - bigIntVector.setSafe(0, 1); - bigIntVector.setSafe(1, 2); - bigIntVector.setSafe(2, 3); - vector.setValueCount(3); - - vector = root.getVector("k5"); - VarCharVector varCharVector = (VarCharVector)vector; - varCharVector.setInitialCapacity(3); - varCharVector.allocateNew(); - varCharVector.setIndexDefined(0); - varCharVector.setValueLengthSafe(0, 5); - varCharVector.setSafe(0, "12.34".getBytes()); - varCharVector.setIndexDefined(1); - varCharVector.setValueLengthSafe(1, 5); - varCharVector.setSafe(1, "88.88".getBytes()); - varCharVector.setIndexDefined(2); - varCharVector.setValueLengthSafe(2, 2); - varCharVector.setSafe(2, "10".getBytes()); - vector.setValueCount(3); - - vector = root.getVector("k6"); - VarCharVector charVector = (VarCharVector)vector; - charVector.setInitialCapacity(3); - charVector.allocateNew(); - charVector.setIndexDefined(0); - charVector.setValueLengthSafe(0, 5); - charVector.setSafe(0, "char1".getBytes()); - charVector.setIndexDefined(1); - charVector.setValueLengthSafe(1, 5); - charVector.setSafe(1, "char2".getBytes()); - charVector.setIndexDefined(2); - charVector.setValueLengthSafe(2, 5); - charVector.setSafe(2, "char3".getBytes()); - vector.setValueCount(3); - - vector = root.getVector("k8"); - Float8Vector float8Vector = (Float8Vector)vector; - float8Vector.setInitialCapacity(3); - float8Vector.allocateNew(3); - float8Vector.setSafe(0, 1.1); - float8Vector.setSafe(1, 2.2); - float8Vector.setSafe(2, 3.3); - vector.setValueCount(3); - - vector = root.getVector("k9"); - Float4Vector float4Vector = (Float4Vector)vector; - float4Vector.setInitialCapacity(3); - float4Vector.allocateNew(3); - float4Vector.setSafe(0, 1.1f); - float4Vector.setSafe(1, 2.2f); - float4Vector.setSafe(2, 3.3f); - vector.setValueCount(3); - - vector = root.getVector("k10"); - VarCharVector datecharVector = (VarCharVector)vector; - datecharVector.setInitialCapacity(3); - datecharVector.allocateNew(); - datecharVector.setIndexDefined(0); - datecharVector.setValueLengthSafe(0, 5); - datecharVector.setSafe(0, "2008-08-08".getBytes()); - datecharVector.setIndexDefined(1); - datecharVector.setValueLengthSafe(1, 5); - datecharVector.setSafe(1, "1900-08-08".getBytes()); - datecharVector.setIndexDefined(2); - datecharVector.setValueLengthSafe(2, 5); - datecharVector.setSafe(2, "2100-08-08".getBytes()); - vector.setValueCount(3); - - vector = root.getVector("k11"); - VarCharVector timecharVector = (VarCharVector)vector; - timecharVector.setInitialCapacity(3); - timecharVector.allocateNew(); - timecharVector.setIndexDefined(0); - timecharVector.setValueLengthSafe(0, 5); - timecharVector.setSafe(0, "2008-08-08 00:00:00".getBytes()); - timecharVector.setIndexDefined(1); - timecharVector.setValueLengthSafe(1, 5); - timecharVector.setSafe(1, "1900-08-08 00:00:00".getBytes()); - timecharVector.setIndexDefined(2); - timecharVector.setValueLengthSafe(2, 5); - timecharVector.setSafe(2, "2100-08-08 00:00:00".getBytes()); - vector.setValueCount(3); - - arrowStreamWriter.writeBatch(); - - arrowStreamWriter.end(); - arrowStreamWriter.close(); - - TStatus status = new TStatus(); - status.setStatusCode(TStatusCode.OK); - TScanBatchResult scanBatchResult = new TScanBatchResult(); - scanBatchResult.setStatus(status); - scanBatchResult.setEos(false); - scanBatchResult.setRows(outputStream.toByteArray()); - - String schemaStr = "{\"properties\":[{\"type\":\"BOOLEAN\",\"name\":\"k0\",\"comment\":\"\"}," - + "{\"type\":\"TINYINT\",\"name\":\"k1\",\"comment\":\"\"},{\"type\":\"SMALLINT\",\"name\":\"k2\"," - + "\"comment\":\"\"},{\"type\":\"INT\",\"name\":\"k3\",\"comment\":\"\"},{\"type\":\"BIGINT\"," - + "\"name\":\"k4\",\"comment\":\"\"},{\"type\":\"FLOAT\",\"name\":\"k9\",\"comment\":\"\"}," - + "{\"type\":\"DOUBLE\",\"name\":\"k8\",\"comment\":\"\"},{\"type\":\"DATE\",\"name\":\"k10\"," - + "\"comment\":\"\"},{\"type\":\"DATETIME\",\"name\":\"k11\",\"comment\":\"\"}," - + "{\"name\":\"k5\",\"scale\":\"0\",\"comment\":\"\"," - + "\"type\":\"DECIMAL\",\"precision\":\"9\",\"aggregation_type\":\"\"},{\"type\":\"CHAR\",\"name\":\"k6\",\"comment\":\"\",\"aggregation_type\":\"REPLACE_IF_NOT_NULL\"}]," - + "\"status\":200}"; - - Schema schema = RestService.parseSchema(schemaStr, logger); - - RowBatch rowBatch = new RowBatch(scanBatchResult, schema); - - List expectedRow1 = Arrays.asList( - Boolean.TRUE, - (byte) 1, - (short) 1, - 1, - 1L, - (float) 1.1, - (double) 1.1, - "2008-08-08", - "2008-08-08 00:00:00", - Decimal.apply(1234L, 4, 2), - "char1" - ); - - List expectedRow2 = Arrays.asList( - Boolean.FALSE, - (byte) 2, - (short) 2, - null, - 2L, - (float) 2.2, - (double) 2.2, - "1900-08-08", - "1900-08-08 00:00:00", - Decimal.apply(8888L, 4, 2), - "char2" - ); - - List expectedRow3 = Arrays.asList( - Boolean.TRUE, - (byte) 3, - (short) 3, - 3, - 3L, - (float) 3.3, - (double) 3.3, - "2100-08-08", - "2100-08-08 00:00:00", - Decimal.apply(10L, 2, 0), - "char3" - ); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow1 = rowBatch.next(); - Assert.assertEquals(expectedRow1, actualRow1); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow2 = rowBatch.next(); - Assert.assertEquals(expectedRow2, actualRow2); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow3 = rowBatch.next(); - Assert.assertEquals(expectedRow3, actualRow3); - - Assert.assertFalse(rowBatch.hasNext()); - thrown.expect(NoSuchElementException.class); - thrown.expectMessage(startsWith("Get row offset:")); - rowBatch.next(); - } - - @Test - public void testBinary() throws Exception { - byte[] binaryRow0 = {'a', 'b', 'c'}; - byte[] binaryRow1 = {'d', 'e', 'f'}; - byte[] binaryRow2 = {'g', 'h', 'i'}; - - ImmutableList.Builder childrenBuilder = ImmutableList.builder(); - childrenBuilder.add(new Field("k7", FieldType.nullable(new ArrowType.Binary()), null)); - - VectorSchemaRoot root = VectorSchemaRoot.create( - new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null), - new RootAllocator(Integer.MAX_VALUE)); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - outputStream); - - arrowStreamWriter.start(); - root.setRowCount(3); - - FieldVector vector = root.getVector("k7"); - VarBinaryVector varBinaryVector = (VarBinaryVector) vector; - varBinaryVector.setInitialCapacity(3); - varBinaryVector.allocateNew(); - varBinaryVector.setIndexDefined(0); - varBinaryVector.setValueLengthSafe(0, 3); - varBinaryVector.setSafe(0, binaryRow0); - varBinaryVector.setIndexDefined(1); - varBinaryVector.setValueLengthSafe(1, 3); - varBinaryVector.setSafe(1, binaryRow1); - varBinaryVector.setIndexDefined(2); - varBinaryVector.setValueLengthSafe(2, 3); - varBinaryVector.setSafe(2, binaryRow2); - vector.setValueCount(3); - - arrowStreamWriter.writeBatch(); - - arrowStreamWriter.end(); - arrowStreamWriter.close(); - - TStatus status = new TStatus(); - status.setStatusCode(TStatusCode.OK); - TScanBatchResult scanBatchResult = new TScanBatchResult(); - scanBatchResult.setStatus(status); - scanBatchResult.setEos(false); - scanBatchResult.setRows(outputStream.toByteArray()); - - String schemaStr = "{\"properties\":[{\"type\":\"BINARY\",\"name\":\"k7\",\"comment\":\"\"}], \"status\":200}"; - - Schema schema = RestService.parseSchema(schemaStr, logger); - - RowBatch rowBatch = new RowBatch(scanBatchResult, schema); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow0 = rowBatch.next(); - Assert.assertArrayEquals(binaryRow0, (byte[])actualRow0.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow1 = rowBatch.next(); - Assert.assertArrayEquals(binaryRow1, (byte[])actualRow1.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow2 = rowBatch.next(); - Assert.assertArrayEquals(binaryRow2, (byte[])actualRow2.get(0)); - - Assert.assertFalse(rowBatch.hasNext()); - thrown.expect(NoSuchElementException.class); - thrown.expectMessage(startsWith("Get row offset:")); - rowBatch.next(); - } - - @Test - public void testDecimalV2() throws Exception { - ImmutableList.Builder childrenBuilder = ImmutableList.builder(); - childrenBuilder.add(new Field("k7", FieldType.nullable(new ArrowType.Decimal(27, 9)), null)); - - VectorSchemaRoot root = VectorSchemaRoot.create( - new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null), - new RootAllocator(Integer.MAX_VALUE)); - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - outputStream); - - arrowStreamWriter.start(); - root.setRowCount(3); - - FieldVector vector = root.getVector("k7"); - DecimalVector decimalVector = (DecimalVector) vector; - decimalVector.setInitialCapacity(3); - decimalVector.allocateNew(3); - decimalVector.setSafe(0, new BigDecimal("12.340000000")); - decimalVector.setSafe(1, new BigDecimal("88.880000000")); - decimalVector.setSafe(2, new BigDecimal("10.000000000")); - vector.setValueCount(3); - - arrowStreamWriter.writeBatch(); - - arrowStreamWriter.end(); - arrowStreamWriter.close(); - - TStatus status = new TStatus(); - status.setStatusCode(TStatusCode.OK); - TScanBatchResult scanBatchResult = new TScanBatchResult(); - scanBatchResult.setStatus(status); - scanBatchResult.setEos(false); - scanBatchResult.setRows(outputStream.toByteArray()); - - String schemaStr = "{\"properties\":[{\"type\":\"DECIMALV2\",\"scale\": 0," - + "\"precision\": 9, \"name\":\"k7\",\"comment\":\"\"}], " - + "\"status\":200}"; - - Schema schema = RestService.parseSchema(schemaStr, logger); - - RowBatch rowBatch = new RowBatch(scanBatchResult, schema); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow0 = rowBatch.next(); - Assert.assertEquals(Decimal.apply(12340000000L, 11, 9), (Decimal)actualRow0.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow1 = rowBatch.next(); - Assert.assertEquals(Decimal.apply(88880000000L, 11, 9), (Decimal)actualRow1.get(0)); - - Assert.assertTrue(rowBatch.hasNext()); - List actualRow2 = rowBatch.next(); - Assert.assertEquals(Decimal.apply(10000000000L, 11, 9), (Decimal)actualRow2.get(0)); - - Assert.assertFalse(rowBatch.hasNext()); - thrown.expect(NoSuchElementException.class); - thrown.expectMessage(startsWith("Get row offset:")); - rowBatch.next(); - } -} diff --git a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/sql/ExpectedExceptionTest.java b/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/sql/ExpectedExceptionTest.java deleted file mode 100644 index 69749f44c8..0000000000 --- a/extension/spark-doris-connector/src/test/java/org/apache/doris/spark/sql/ExpectedExceptionTest.java +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql; - -import org.junit.Rule; -import org.junit.rules.ExpectedException; - -public class ExpectedExceptionTest { - @Rule - public ExpectedException thrown = ExpectedException.none(); -} diff --git a/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestSchemaUtils.scala b/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestSchemaUtils.scala deleted file mode 100644 index 97bbe0edbe..0000000000 --- a/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestSchemaUtils.scala +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.doris.spark.exception.DorisException -import org.apache.doris.spark.rest.models.{Field, Schema} -import org.apache.doris.thrift.{TPrimitiveType, TScanColumnDesc} -import org.apache.spark.sql.types._ -import org.hamcrest.core.StringStartsWith.startsWith -import org.junit.{Assert, Test} - -import scala.collection.JavaConverters._ - -class TestSchemaUtils extends ExpectedExceptionTest { - @Test - def testConvertToStruct(): Unit = { - val schema = new Schema - schema.setStatus(200) - val k1 = new Field("k1", "TINYINT", "", 0, 0, "") - val k5 = new Field("k5", "BIGINT", "", 0, 0, "") - schema.put(k1) - schema.put(k5) - - var fields = List[StructField]() - fields :+= DataTypes.createStructField("k1", DataTypes.ByteType, true) - fields :+= DataTypes.createStructField("k5", DataTypes.LongType, true) - val expected = DataTypes.createStructType(fields.asJava) - Assert.assertEquals(expected, SchemaUtils.convertToStruct(schema)) - } - - @Test - def testGetCatalystType(): Unit = { - Assert.assertEquals(DataTypes.NullType, SchemaUtils.getCatalystType("NULL_TYPE", 0, 0)) - Assert.assertEquals(DataTypes.BooleanType, SchemaUtils.getCatalystType("BOOLEAN", 0, 0)) - Assert.assertEquals(DataTypes.ByteType, SchemaUtils.getCatalystType("TINYINT", 0, 0)) - Assert.assertEquals(DataTypes.ShortType, SchemaUtils.getCatalystType("SMALLINT", 0, 0)) - Assert.assertEquals(DataTypes.IntegerType, SchemaUtils.getCatalystType("INT", 0, 0)) - Assert.assertEquals(DataTypes.LongType, SchemaUtils.getCatalystType("BIGINT", 0, 0)) - Assert.assertEquals(DataTypes.FloatType, SchemaUtils.getCatalystType("FLOAT", 0, 0)) - Assert.assertEquals(DataTypes.DoubleType, SchemaUtils.getCatalystType("DOUBLE", 0, 0)) - Assert.assertEquals(DataTypes.StringType, SchemaUtils.getCatalystType("DATE", 0, 0)) - Assert.assertEquals(DataTypes.StringType, SchemaUtils.getCatalystType("DATETIME", 0, 0)) - Assert.assertEquals(DataTypes.BinaryType, SchemaUtils.getCatalystType("BINARY", 0, 0)) - Assert.assertEquals(DecimalType(9, 3), SchemaUtils.getCatalystType("DECIMAL", 9, 3)) - Assert.assertEquals(DataTypes.StringType, SchemaUtils.getCatalystType("CHAR", 0, 0)) - Assert.assertEquals(DataTypes.StringType, SchemaUtils.getCatalystType("LARGEINT", 0, 0)) - Assert.assertEquals(DataTypes.StringType, SchemaUtils.getCatalystType("VARCHAR", 0, 0)) - Assert.assertEquals(DecimalType(10, 5), SchemaUtils.getCatalystType("DECIMALV2", 10, 5)) - Assert.assertEquals(DataTypes.DoubleType, SchemaUtils.getCatalystType("TIME", 0, 0)) - - thrown.expect(classOf[DorisException]) - thrown.expectMessage(startsWith("Unsupported type")) - SchemaUtils.getCatalystType("HLL", 0, 0) - - thrown.expect(classOf[DorisException]) - thrown.expectMessage(startsWith("Unrecognized Doris type")) - SchemaUtils.getCatalystType("UNRECOGNIZED", 0, 0) - } - - @Test - def testConvertToSchema(): Unit = { - val k1 = new TScanColumnDesc - k1.setName("k1") - k1.setType(TPrimitiveType.BOOLEAN) - - val k2 = new TScanColumnDesc - k2.setName("k2") - k2.setType(TPrimitiveType.DOUBLE) - - val expected = new Schema - expected.setStatus(0) - val ek1 = new Field("k1", "BOOLEAN", "", 0, 0, "") - val ek2 = new Field("k2", "DOUBLE", "", 0, 0, "") - expected.put(ek1) - expected.put(ek2) - - Assert.assertEquals(expected, SchemaUtils.convertToSchema(Seq(k1, k2))) - } -} diff --git a/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestSparkConnector.scala b/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestSparkConnector.scala deleted file mode 100644 index bdee013e56..0000000000 --- a/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestSparkConnector.scala +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.spark.sql.SparkSession -import org.apache.spark.{SparkConf, SparkContext} -import org.junit.Ignore; -import org.junit.Test - -// This test need real connect info to run. -// Set the connect info before comment out this @Ignore -@Ignore -class TestSparkConnector { - val dorisFeNodes = "your_fe_host:8030" - val dorisUser = "root" - val dorisPwd = "" - val dorisTable = "test.test_tbl" - - val kafkaServers = "" - val kafkaTopics = "" - - @Test - def rddReadTest(): Unit = { - val sparkConf: SparkConf = new SparkConf().setMaster("local[*]") - val sc = new SparkContext(sparkConf) - import org.apache.doris.spark._ - val dorisSparkRDD = sc.dorisRDD( - tableIdentifier = Some(dorisTable), - cfg = Some(Map( - "doris.fenodes" -> dorisFeNodes, - "doris.request.auth.user" -> dorisUser, - "doris.request.auth.password" -> dorisPwd - )) - ) - dorisSparkRDD.map(println(_)).count() - sc.stop() - } - - @Test - def dataframeWriteTest(): Unit = { - val session = SparkSession.builder().master("local[*]").getOrCreate() - val df = session.createDataFrame(Seq( - ("zhangsan", "m"), - ("lisi", "f"), - ("wangwu", "m") - )) - df.write - .format("doris") - .option("doris.fenodes", dorisFeNodes) - .option("doris.table.identifier", dorisTable) - .option("user", dorisUser) - .option("password", dorisPwd) - //specify your field - .option("doris.write.field", "name,gender") - .option("sink.batch.size",2) - .option("sink.max-retries",2) - .save() - session.stop() - } - - @Test - def dataframeReadTest(): Unit = { - val session = SparkSession.builder().master("local[*]").getOrCreate() - val dorisSparkDF = session.read - .format("doris") - .option("doris.fenodes", dorisFeNodes) - .option("doris.table.identifier", dorisTable) - .option("user", dorisUser) - .option("password", dorisPwd) - .load() - - dorisSparkDF.show() - session.stop() - } - - - @Test - def structuredStreamingWriteTest(): Unit = { - val spark = SparkSession.builder() - .master("local") - .getOrCreate() - val df = spark.readStream - .option("kafka.bootstrap.servers", kafkaServers) - .option("startingOffsets", "latest") - .option("subscribe", kafkaTopics) - .format("kafka") - .option("failOnDataLoss", false) - .load() - - df.selectExpr("CAST(timestamp AS STRING)", "CAST(partition as STRING)") - .writeStream - .format("doris") - .option("checkpointLocation", "/tmp/test") - .option("doris.table.identifier", dorisTable) - .option("doris.fenodes", dorisFeNodes) - .option("user", dorisUser) - .option("password", dorisPwd) - .option("sink.batch.size",2) - .option("sink.max-retries",2) - .start().awaitTermination() - } -} - diff --git a/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestUtils.scala b/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestUtils.scala deleted file mode 100644 index b1affbfdd3..0000000000 --- a/extension/spark-doris-connector/src/test/scala/org/apache/doris/spark/sql/TestUtils.scala +++ /dev/null @@ -1,120 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.spark.sql - -import org.apache.doris.spark.cfg.ConfigurationOptions -import org.apache.doris.spark.exception.DorisException -import org.apache.spark.sql.jdbc.JdbcDialects -import org.apache.spark.sql.sources._ -import org.hamcrest.core.StringStartsWith.startsWith -import org.junit._ -import org.slf4j.LoggerFactory - -class TestUtils extends ExpectedExceptionTest { - private lazy val logger = LoggerFactory.getLogger(classOf[TestUtils]) - - @Test - def testCompileFilter(): Unit = { - val dialect = JdbcDialects.get("") - val inValueLengthLimit = 5 - - val equalFilter = EqualTo("left", 5) - val greaterThanFilter = GreaterThan("left", 5) - val greaterThanOrEqualFilter = GreaterThanOrEqual("left", 5) - val lessThanFilter = LessThan("left", 5) - val lessThanOrEqualFilter = LessThanOrEqual("left", 5) - val validInFilter = In("left", Array(1, 2, 3, 4)) - val emptyInFilter = In("left", Array.empty) - val invalidInFilter = In("left", Array(1, 2, 3, 4, 5)) - val isNullFilter = IsNull("left") - val isNotNullFilter = IsNotNull("left") - val notSupportFilter = StringContains("left", "right") - val validAndFilter = And(equalFilter, greaterThanFilter) - val invalidAndFilter = And(equalFilter, notSupportFilter) - val validOrFilter = Or(equalFilter, greaterThanFilter) - val invalidOrFilter = Or(equalFilter, notSupportFilter) - - Assert.assertEquals("`left` = 5", Utils.compileFilter(equalFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("`left` > 5", Utils.compileFilter(greaterThanFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("`left` >= 5", Utils.compileFilter(greaterThanOrEqualFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("`left` < 5", Utils.compileFilter(lessThanFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("`left` <= 5", Utils.compileFilter(lessThanOrEqualFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("`left` in (1, 2, 3, 4)", Utils.compileFilter(validInFilter, dialect, inValueLengthLimit).get) - Assert.assertTrue(Utils.compileFilter(emptyInFilter, dialect, inValueLengthLimit).isEmpty) - Assert.assertTrue(Utils.compileFilter(invalidInFilter, dialect, inValueLengthLimit).isEmpty) - Assert.assertEquals("`left` is null", Utils.compileFilter(isNullFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("`left` is not null", Utils.compileFilter(isNotNullFilter, dialect, inValueLengthLimit).get) - Assert.assertEquals("(`left` = 5) and (`left` > 5)", - Utils.compileFilter(validAndFilter, dialect, inValueLengthLimit).get) - Assert.assertTrue(Utils.compileFilter(invalidAndFilter, dialect, inValueLengthLimit).isEmpty) - Assert.assertEquals("(`left` = 5) or (`left` > 5)", - Utils.compileFilter(validOrFilter, dialect, inValueLengthLimit).get) - Assert.assertTrue(Utils.compileFilter(invalidOrFilter, dialect, inValueLengthLimit).isEmpty) - } - - @Test - def testParams(): Unit = { - val parameters1 = Map( - ConfigurationOptions.DORIS_TABLE_IDENTIFIER -> "a.b", - "test_underline" -> "x_y", - "user" -> "user", - "password" -> "password" - ) - val result1 = Utils.params(parameters1, logger) - Assert.assertEquals("a.b", result1(ConfigurationOptions.DORIS_TABLE_IDENTIFIER)) - Assert.assertEquals("x_y", result1("doris.test.underline")) - Assert.assertEquals("user", result1("doris.request.auth.user")) - Assert.assertEquals("password", result1("doris.request.auth.password")) - - - val parameters2 = Map( - ConfigurationOptions.TABLE_IDENTIFIER -> "a.b" - ) - val result2 = Utils.params(parameters2, logger) - Assert.assertEquals("a.b", result2(ConfigurationOptions.DORIS_TABLE_IDENTIFIER)) - - val parameters3 = Map( - ConfigurationOptions.DORIS_PASSWORD -> "a.b" - ) - thrown.expect(classOf[DorisException]) - thrown.expectMessage(startsWith(s"${ConfigurationOptions.DORIS_PASSWORD} cannot use in Doris Datasource,")) - Utils.params(parameters3, logger) - - val parameters4 = Map( - ConfigurationOptions.DORIS_USER -> "a.b" - ) - thrown.expect(classOf[DorisException]) - thrown.expectMessage(startsWith(s"${ConfigurationOptions.DORIS_USER} cannot use in Doris Datasource,")) - Utils.params(parameters4, logger) - - val parameters5 = Map( - ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD -> "a.b" - ) - thrown.expect(classOf[DorisException]) - thrown.expectMessage( - startsWith(s"${ConfigurationOptions.DORIS_REQUEST_AUTH_PASSWORD} cannot use in Doris Datasource,")) - Utils.params(parameters5, logger) - - val parameters6 = Map( - ConfigurationOptions.DORIS_REQUEST_AUTH_USER -> "a.b" - ) - thrown.expect(classOf[DorisException]) - thrown.expectMessage(startsWith(s"${ConfigurationOptions.DORIS_REQUEST_AUTH_USER} cannot use in Doris Datasource,")) - Utils.params(parameters6, logger) - } -}