[Improvement](broker) support broker load from tencent Goose File System (#18745)
Including below functions: 1. broker load 2. export 3. select into outfile 4. create repo and backup to gfs after config env, use gfs like other hdfs system.
This commit is contained in:
@ -31,6 +31,7 @@ Broker is an optional process in the Doris cluster. It is mainly used to support
|
||||
- Apache HDFS
|
||||
- Aliyun OSS
|
||||
- Tencent Cloud CHDFS
|
||||
- Tencent Cloud GFS (since 1.2.0)
|
||||
- Huawei Cloud OBS (since 1.2.0)
|
||||
- Amazon S3
|
||||
- JuiceFS (since 2.0.0)
|
||||
|
||||
@ -38,6 +38,17 @@ When connnecting to Hive, Doris:
|
||||
2. Supports both Managed Table and External Table;
|
||||
3. Can identify metadata of Hive, Iceberg, and Hudi stored in Hive Metastore;
|
||||
4. Supports Hive tables with data stored in JuiceFS, which can be used the same way as normal Hive tables (put `juicefs-hadoop-x.x.x.jar` in `fe/lib/` and `apache_hdfs_broker/lib/`).
|
||||
5. Supports Hive tables with data stored in CHDFS, which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment:
|
||||
1. put chdfs_hadoop_plugin_network-x.x.jar in fe/lib/ and apache_hdfs_broker/lib/
|
||||
2. copy core-site.xml and hdfs-site.xml from hive cluster to fe/conf/ and apache_hdfs_broker/conf
|
||||
|
||||
<version since="dev">
|
||||
|
||||
6. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment:
|
||||
1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/
|
||||
2. add extra properties 'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog
|
||||
|
||||
</version>
|
||||
|
||||
## Create Catalog
|
||||
|
||||
|
||||
@ -34,6 +34,14 @@ When connecting to Iceberg, Doris:
|
||||
1. Supports Iceberg V1/V2 table formats;
|
||||
2. Supports Position Delete but not Equality Delete for V2 format;
|
||||
|
||||
<version since="dev">
|
||||
|
||||
3. Supports Hive / Iceberg tables with data stored in GooseFS(GFS), which can be used the same way as normal Hive tables. Follow below steps to prepare doris environment:
|
||||
1. put goosefs-x.x.x-client.jar in fe/lib/ and apache_hdfs_broker/lib/
|
||||
2. add extra properties 'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem' when creating catalog
|
||||
|
||||
</version>
|
||||
|
||||
## Create Catalog
|
||||
|
||||
### Hive Metastore Catalog
|
||||
|
||||
@ -31,6 +31,7 @@ Broker 是 Doris 集群中一种可选进程,主要用于支持 Doris 读写
|
||||
- Apache HDFS
|
||||
- 阿里云 OSS
|
||||
- 腾讯云 CHDFS
|
||||
- 腾讯云 GFS (1.2.0 版本支持)
|
||||
- 华为云 OBS (1.2.0 版本后支持)
|
||||
- 亚马逊 S3
|
||||
- JuiceFS (2.0.0 版本支持)
|
||||
|
||||
@ -36,6 +36,17 @@ under the License.
|
||||
2. 支持 Managed Table 和 External Table。
|
||||
3. 可以识别 Hive Metastore 中存储的 hive、iceberg、hudi 元数据。
|
||||
4. 支持数据存储在 Juicefs 上的 hive 表,用法如下(需要把juicefs-hadoop-x.x.x.jar放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下)。
|
||||
5. 支持数据存储在 CHDFS 上的 hive 表。需配置环境:
|
||||
1. 把chdfs_hadoop_plugin_network-x.x.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下
|
||||
2. 将 hive 所在 Hadoop 集群的 core-site.xml 和 hdfs-site.xml 复制到 fe/conf/ 和 apache_hdfs_broker/conf 目录下
|
||||
|
||||
<version since="dev">
|
||||
|
||||
6. 支持数据存在在 GooseFS(GFS) 上的 hive、iceberg表。需配置环境:
|
||||
1. 把 goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下
|
||||
2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem'
|
||||
|
||||
</version>
|
||||
|
||||
## 创建 Catalog
|
||||
|
||||
|
||||
@ -32,6 +32,14 @@ under the License.
|
||||
1. 支持 Iceberg V1/V2 表格式。
|
||||
2. V2 格式仅支持 Position Delete 方式,不支持 Equality Delete。
|
||||
|
||||
<version since="dev">
|
||||
|
||||
3. 支持数据存在在 GooseFS(GFS) 上的 iceberg表。需配置环境:
|
||||
1. 把goosefs-x.x.x-client.jar 放在 fe/lib/ 和 apache_hdfs_broker/lib/ 下
|
||||
2. 创建 catalog 时增加属性:'fs.AbstractFileSystem.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.GooseFileSystem', 'fs.gfs.impl' = 'com.qcloud.cos.goosefs.hadoop.FileSystem'
|
||||
|
||||
</version>
|
||||
|
||||
## 创建 Catalog
|
||||
|
||||
### 基于Hive Metastore创建Catalog
|
||||
|
||||
@ -277,9 +277,10 @@ public class ExportStmt extends StatementBase {
|
||||
&& !schema.equalsIgnoreCase("oss")
|
||||
&& !schema.equalsIgnoreCase("s3a")
|
||||
&& !schema.equalsIgnoreCase("cosn")
|
||||
&& !schema.equalsIgnoreCase("gfs")
|
||||
&& !schema.equalsIgnoreCase("jfs"))) {
|
||||
throw new AnalysisException("Invalid broker path. please use valid 'hdfs://', 'afs://' , 'bos://',"
|
||||
+ " 'ofs://', 'obs://', 'oss://', 's3a://', 'cosn://' or 'jfs://' path.");
|
||||
+ " 'ofs://', 'obs://', 'oss://', 's3a://', 'cosn://', 'gfs://' or 'jfs://' path.");
|
||||
}
|
||||
} else if (type == StorageBackend.StorageType.S3) {
|
||||
if (schema == null || !schema.equalsIgnoreCase("s3")) {
|
||||
|
||||
@ -101,6 +101,7 @@ public class StorageBackend implements ParseNode {
|
||||
HDFS("Hadoop Distributed File System"),
|
||||
LOCAL("Local file system"),
|
||||
OFS("Tencent CHDFS"),
|
||||
GFS("Tencent Goose File System"),
|
||||
JFS("Juicefs"),
|
||||
STREAM("Stream load pipe");
|
||||
|
||||
|
||||
@ -52,6 +52,7 @@ public abstract class BlobStorage implements Writable {
|
||||
return new S3Storage(properties);
|
||||
} else if (type == StorageBackend.StorageType.HDFS
|
||||
|| type == StorageBackend.StorageType.OFS
|
||||
|| type == StorageBackend.StorageType.GFS
|
||||
|| type == StorageBackend.StorageType.JFS) {
|
||||
BlobStorage storage = new HdfsStorage(properties);
|
||||
// as of ofs files, use hdfs storage, but it's type should be ofs
|
||||
|
||||
Reference in New Issue
Block a user