From c6b1b9de809e739b536dfdd034be2d06333c0ee4 Mon Sep 17 00:00:00 2001 From: Yulei-Yang Date: Thu, 20 Apr 2023 23:12:17 +0800 Subject: [PATCH] [Improvement](broker) support broker load from tencent Goose File System (#18745) Including below functions: 1. broker load 2. export 3. select into outfile 4. create repo and backup to gfs after config env, use gfs like other hdfs system. --- docs/en/docs/advanced/broker.md | 1 + docs/en/docs/lakehouse/multi-catalog/hive.md | 11 +++++++++++ docs/en/docs/lakehouse/multi-catalog/iceberg.md | 8 ++++++++ docs/zh-CN/docs/advanced/broker.md | 1 + docs/zh-CN/docs/lakehouse/multi-catalog/hive.md | 11 +++++++++++ docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md | 8 ++++++++ .../java/org/apache/doris/analysis/ExportStmt.java | 3 ++- .../org/apache/doris/analysis/StorageBackend.java | 1 + .../java/org/apache/doris/backup/BlobStorage.java | 1 + 9 files changed, 44 insertions(+), 1 deletion(-) diff --git a/docs/en/docs/advanced/broker.md b/docs/en/docs/advanced/broker.md index bcf9efdc0f..aa4ce1fe52 100644 --- a/docs/en/docs/advanced/broker.md +++ b/docs/en/docs/advanced/broker.md @@ -31,6 +31,7 @@ Broker is an optional process in the Doris cluster. It is mainly used to support - Apache HDFS - Aliyun OSS - Tencent Cloud CHDFS +- Tencent Cloud GFS (since 1.2.0) - Huawei Cloud OBS (since 1.2.0) - Amazon S3 - JuiceFS (since 2.0.0) diff --git a/docs/en/docs/lakehouse/multi-catalog/hive.md b/docs/en/docs/lakehouse/multi-catalog/hive.md index c2a05ec622..11ba2ecc1f 100644 --- a/docs/en/docs/lakehouse/multi-catalog/hive.md +++ b/docs/en/docs/lakehouse/multi-catalog/hive.md @@ -38,6 +38,17 @@ When connnecting to Hive, Doris: 2. Supports both Managed Table and External Table; 3. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore; 4. Supports Hive tables with data stored in JuiceFS, which can be used the same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and `apache_hdfs_broker/lib/`). +5. Supports Hive tables with data stored in CHDFS, which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: + 1. put chdfs_hadoop_plugin_network-x.x.jar in fe/lib/ and apache_hdfs_broker/lib/ + 2. copy core-site.xml and hdfs-site.xml from hive cluster to fe/conf/ and apache_hdfs_broker/conf + + + +6. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: + 1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/ + 2. add extra properties 'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog + + ## Create Catalog diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md b/docs/en/docs/lakehouse/multi-catalog/iceberg.md index f410ebac59..91af94462e 100644 --- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md @@ -34,6 +34,14 @@ When connecting to Iceberg, Doris: 1. Supports Iceberg V1/V2 table formats; 2. Supports Position Delete but not Equality Delete for V2 format; + + +3. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment: + 1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/ + 2. add extra properties 'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog + + + ## Create Catalog ### Hive Metastore Catalog diff --git a/docs/zh-CN/docs/advanced/broker.md b/docs/zh-CN/docs/advanced/broker.md index 2711bc62d9..e82123e726 100644 --- a/docs/zh-CN/docs/advanced/broker.md +++ b/docs/zh-CN/docs/advanced/broker.md @@ -31,6 +31,7 @@ Broker 是 Doris 集群中一种可选进程,主要用于支持 Doris 读写 - Apache HDFS - 阿里云 OSS - 腾讯云 CHDFS +- 腾讯云 GFS (1.2.0 版本支持) - 华为云 OBS (1.2.0 版本后支持) - 亚马逊 S3 - JuiceFS (2.0.0 版本支持) diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md index 99df43bafb..456d16cd34 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hive.md @@ -36,6 +36,17 @@ under the License. 2. 支持 Managed Table 和 External Table。 3. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。 4. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下)。 +5. 支持数据存储在 CHDFS 上的 hive 表。需配置环境: + 1. 把chdfs_hadoop_plugin_network-x.x.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下 + 2. 将 hive 所在 Hadoop 集群的 core-site.xml 和 hdfs-site.xml 复制到 fe/conf/ 和 apache_hdfs_broker/conf 目录下 + + + +6. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境: + 1. 把 goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下 + 2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' + + ## 创建 Catalog diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md index 2d0bda6148..87f1ff429d 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md @@ -32,6 +32,14 @@ under the License. 1. 支持 Iceberg V1/V2 表格式。 2. V2 格式仅支持 Position Delete 方式,不支持 Equality Delete。 + + +3. 支持数据存在在 GooseFS(GFS) 上的 iceberg表。需配置环境: + 1. 把goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下 + 2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' + + + ## 创建 Catalog ### 基于Hive Metastore创建Catalog diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java index d9a7e59554..c4096a7ffe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExportStmt.java @@ -277,9 +277,10 @@ public class ExportStmt extends StatementBase { && !schema.equalsIgnoreCase("oss") && !schema.equalsIgnoreCase("s3a") && !schema.equalsIgnoreCase("cosn") + && !schema.equalsIgnoreCase("gfs") && !schema.equalsIgnoreCase("jfs"))) { throw new AnalysisException("Invalid broker path. please use valid 'hdfs://', 'afs://' , 'bos://'," - + " 'ofs://', 'obs://', 'oss://', 's3a://', 'cosn://' or 'jfs://' path."); + + " 'ofs://', 'obs://', 'oss://', 's3a://', 'cosn://', 'gfs://' or 'jfs://' path."); } } else if (type == StorageBackend.StorageType.S3) { if (schema == null || !schema.equalsIgnoreCase("s3")) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java index b1aacb0aaf..5d6c33c45e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java @@ -101,6 +101,7 @@ public class StorageBackend implements ParseNode { HDFS("Hadoop Distributed File System"), LOCAL("Local file system"), OFS("Tencent CHDFS"), + GFS("Tencent Goose File System"), JFS("Juicefs"), STREAM("Stream load pipe"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java index be02be0690..3bf7f50818 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BlobStorage.java @@ -52,6 +52,7 @@ public abstract class BlobStorage implements Writable { return new S3Storage(properties); } else if (type == StorageBackend.StorageType.HDFS || type == StorageBackend.StorageType.OFS + || type == StorageBackend.StorageType.GFS || type == StorageBackend.StorageType.JFS) { BlobStorage storage = new HdfsStorage(properties); // as of ofs files, use hdfs storage, but it's type should be ofs