From 889e33d53d877cea08b114b3545b7d0d5a050b63 Mon Sep 17 00:00:00 2001 From: xtr_1993 <38655011+xtr1993@users.noreply.github.com> Date: Wed, 22 Dec 2021 23:29:02 +0800 Subject: [PATCH] [docs](seatunnel) Seatunnel Supports Doris connector (#7453) --- docs/.vuepress/sidebar/en.js | 1 + docs/.vuepress/sidebar/zh-CN.js | 1 + docs/en/extending-doris/seatunnel.md | 116 +++++++++++++++++++++++ docs/zh-CN/extending-doris/seatunnel.md | 117 ++++++++++++++++++++++++ 4 files changed, 235 insertions(+) create mode 100644 docs/en/extending-doris/seatunnel.md create mode 100644 docs/zh-CN/extending-doris/seatunnel.md diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 70c24b7796..7ae30a3a21 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -239,6 +239,7 @@ module.exports = [ "spark-doris-connector", "flink-doris-connector", "datax", + "seatunnel", { title: "UDF", directoryPath: "udf/", diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 3d161999da..3e16cb9cbf 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -240,6 +240,7 @@ module.exports = [ "spark-doris-connector", "flink-doris-connector", "datax", + "seatunnel", { title: "UDF", directoryPath: "udf/", diff --git a/docs/en/extending-doris/seatunnel.md b/docs/en/extending-doris/seatunnel.md new file mode 100644 index 0000000000..85cd60459c --- /dev/null +++ b/docs/en/extending-doris/seatunnel.md @@ -0,0 +1,116 @@ + + +# Seatunnel + +The newest [seatunnel (waterdop) ](https://interestinglab.github.io/seatunnel-docs/#/) has supported Doris connector, +seatunnel can load data by Spark engine or Flink engine. + +In fact,seatunnel load data by stream load function.Everyone is welcome to use + +# Install Seatunnel +[Seatunnel install](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v2/flink/installation) + +## Spark Sink Doris +### Options +| name | type | required | default value | engine | +| --- | --- | --- | --- | --- | +| fenodes | string | yes | - | Spark | +| database | string | yes | - | Spark | +| table | string | yes | - | Spark | +| user | string | yes | - | Spark | +| password | string | yes | - | Spark | +| batch_size | int | yes | 100 | Spark | +| doris.* | string | no | - | Spark | + +`fenodes [string]` + +Doris FE address:8030 + +`database [string]` + +Doris target database name + +`table [string]` + +Doris target table name + +`user [string]` + +Doris user name + +`password [string]` + +Doris user's password + +`batch_size [string]` + +Doris number of submissions per batch + +`doris. [string]` +Doris stream_load properties,you can use 'doris.' prefix + stream_load properties + +[More Doris stream_load Configurations](https://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html) + +### Examples +Hive to Doris + +Config properties +``` +env{ + spark.app.name = "hive2doris-template" +} + +spark { + spark.sql.catalogImplementation = "hive" +} + +source { + hive { + preSql = "select * from tmp.test" + result_table_name = "test" + } +} + +transform { +} + + +sink { + +Console { + + } + +Doris { + fenodes="xxxx:8030" + database="gl_mint_dim" + table="dim_date" + user="root" + password="root" + batch_size=1000 + doris.column_separator="\t" + doris.columns="date_key,date_value,day_in_year,day_in_month" + } +} +``` +Start command +``` +sh bin/start-waterdrop-spark.sh --master local[4] --deploy-mode client --config ./config/spark.conf +``` \ No newline at end of file diff --git a/docs/zh-CN/extending-doris/seatunnel.md b/docs/zh-CN/extending-doris/seatunnel.md new file mode 100644 index 0000000000..29fcebd356 --- /dev/null +++ b/docs/zh-CN/extending-doris/seatunnel.md @@ -0,0 +1,117 @@ + + +# Seatunnel +最新版本的[Seatunnel (waterdrop)](https://interestinglab.github.io/seatunnel-docs/#/) 已经支持Doris的连接器,seatunnel 可以用过Spark引擎和Flink引擎同步数据至Doirs中. + +事实上,Seatunnel通过Stream load方式同步数据,性能强劲,欢迎大家使用 + +#安装 Seatunnel +[Seatunnel安装链接](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v2/flink/installation) + +## Spark Sink Doris + +### 插件代码 +Spark Sink Doris的插件代码在[这里](https://github.com/InterestingLab/seatunnel/tree/dev/seatunnel-connectors/plugin-spark-sink-doris) +### 参数列表 +| 参数名 | 参数类型 | 是否必要 | 默认值 | 引擎类型 | +| --- | --- | --- | --- | --- | +| fenodes | string | yes | - | Spark | +| database | string | yes | - | Spark | +| table | string | yes | - | Spark | +| user | string | yes | - | Spark | +| password | string | yes | - | Spark | +| batch_size | int | yes | 100 | Spark | +| doris.* | string | no | - | Spark | + +`fenodes [string]` + +Dioris Fe节点地址:8030 + + +`database [string]` + +写入Doris的库名 + +`table [string]` + +写入Doris的表名 + +`user [string]` + +Doris访问用户 + +`password [string]` + +Doris访问用户密码 + +`batch_size [string]` + +Spark通过Stream_load方式写入,每个批次提交条数 + +`doris. [string]` + +Stream_load方式写入的Http参数优化,在官网参数前加上'Doris.'前缀 + +[更多stream_load参数配置](https://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html) + +### Examples +Hive迁移数据至Doris +``` +env{ + spark.app.name = "hive2doris-template" +} + +spark { + spark.sql.catalogImplementation = "hive" +} + +source { + hive { + preSql = "select * from tmp.test" + result_table_name = "test" + } +} + +transform { +} + + +sink { + +Console { + + } + +Doris { + fenodes="xxxx:8030" + database="tmp" + table="test" + user="root" + password="root" + batch_size=1000 + doris.column_separator="\t" + doris.columns="date_key,date_value,day_in_year,day_in_month" + } +} +``` +启动命令 +``` +sh bin/start-waterdrop-spark.sh --master local[4] --deploy-mode client --config ./config/spark.conf +``` \ No newline at end of file