From b82de68d7e71b10677e1cbb61cb45bb2deef67fb Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Mon, 18 Mar 2024 10:46:17 +0800 Subject: [PATCH] [feature][insert]add hive table sink thrift (#32274) (#32360) bp #32274 --- gensrc/thrift/DataSinks.thrift | 34 +++++++++++++++++----------- gensrc/thrift/FrontendService.thrift | 3 +++ gensrc/thrift/Partitions.thrift | 8 ++++++- gensrc/thrift/PlanNodes.thrift | 4 +++- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/gensrc/thrift/DataSinks.thrift b/gensrc/thrift/DataSinks.thrift index 7c9d5e8f8c..e7683c25fd 100644 --- a/gensrc/thrift/DataSinks.thrift +++ b/gensrc/thrift/DataSinks.thrift @@ -38,6 +38,7 @@ enum TDataSinkType { MULTI_CAST_DATA_STREAM_SINK, GROUP_COMMIT_OLAP_TABLE_SINK, // deprecated GROUP_COMMIT_BLOCK_SINK, + HIVE_TABLE_SINK, } enum TResultSinkType { @@ -101,7 +102,7 @@ enum TParquetRepetitionType { struct TParquetSchema { 1: optional TParquetRepetitionType schema_repetition_type 2: optional TParquetDataType schema_data_type - 3: optional string schema_column_name + 3: optional string schema_column_name 4: optional TParquetDataLogicalType schema_data_logical_type } @@ -280,6 +281,7 @@ struct TOlapTableSink { struct THiveLocationParams { 1: optional string write_path 2: optional string target_path + 3: optional Types.TFileType file_type } struct TSortedColumn { @@ -298,11 +300,16 @@ struct THiveBucket { 4: optional list sorted_by } -enum THiveCompressionType { - SNAPPY = 3, - LZ4 = 4, - ZLIB = 6, - ZSTD = 7, +enum THiveColumnType { + PARTITION_KEY = 0, + REGULAR = 1, + SYNTHESIZED = 2 +} + +struct THiveColumn { + 1: optional string name + 2: optional Types.TTypeDesc data_type + 3: optional THiveColumnType column_type } struct THivePartition { @@ -314,13 +321,14 @@ struct THivePartition { struct THiveTableSink { 1: optional string db_name 2: optional string table_name - 3: optional list data_column_names - 4: optional list partition_column_names - 5: optional list partitions - 6: optional list buckets - 7: optional PlanNodes.TFileFormatType file_format - 8: optional THiveCompressionType compression_type - 9: optional THiveLocationParams location + 3: optional list columns + 4: optional list partitions + 5: optional THiveBucket bucket_info + 6: optional PlanNodes.TFileFormatType file_format + 7: optional PlanNodes.TFileCompressType compression_type + 8: optional THiveLocationParams location + 9: optional map hadoop_config + 10: optional bool overwrite } enum TUpdateMode { diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index 9f47037c56..049c8450b2 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -29,6 +29,7 @@ include "Exprs.thrift" include "RuntimeProfile.thrift" include "MasterService.thrift" include "AgentService.thrift" +include "DataSinks.thrift" // These are supporting structs for JniFrontend.java, which serves as the glue // between our C++ execution environment and the Java frontend. @@ -481,6 +482,8 @@ struct TReportExecStatusParams { 24: optional TQueryStatistics query_statistics // deprecated 25: optional TReportWorkloadRuntimeStatusParams report_workload_runtime_status + + 26: optional list hive_partition_updates } struct TFeResult { diff --git a/gensrc/thrift/Partitions.thrift b/gensrc/thrift/Partitions.thrift index 0a7e70c0a4..4e306c2970 100644 --- a/gensrc/thrift/Partitions.thrift +++ b/gensrc/thrift/Partitions.thrift @@ -43,7 +43,13 @@ enum TPartitionType { BUCKET_SHFFULE_HASH_PARTITIONED, // used for shuffle data by parititon and tablet - TABLET_SINK_SHUFFLE_PARTITIONED + TABLET_SINK_SHUFFLE_PARTITIONED, + + // used for shuffle data by hive parititon + TABLE_SINK_HASH_PARTITIONED, + + // used for hive unparititoned table + TABLE_SINK_RANDOM_PARTITIONED } enum TDistributionType { diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index f945925654..da3643747f 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -138,7 +138,9 @@ enum TFileCompressType { DEFLATE, LZOP, LZ4BLOCK, - SNAPPYBLOCK + SNAPPYBLOCK, + ZLIB, + ZSTD } struct THdfsConf {