[feature][insert]add hive table sink thrift (#32274) (#32360)

bp #32274
This commit is contained in:
Mingyu Chen
2024-03-18 10:46:17 +08:00
committed by GitHub
parent 1645f2e0a7
commit b82de68d7e
4 changed files with 34 additions and 15 deletions

View File

@ -38,6 +38,7 @@ enum TDataSinkType {
MULTI_CAST_DATA_STREAM_SINK,
GROUP_COMMIT_OLAP_TABLE_SINK, // deprecated
GROUP_COMMIT_BLOCK_SINK,
HIVE_TABLE_SINK,
}
enum TResultSinkType {
@ -101,7 +102,7 @@ enum TParquetRepetitionType {
struct TParquetSchema {
1: optional TParquetRepetitionType schema_repetition_type
2: optional TParquetDataType schema_data_type
3: optional string schema_column_name
3: optional string schema_column_name
4: optional TParquetDataLogicalType schema_data_logical_type
}
@ -280,6 +281,7 @@ struct TOlapTableSink {
struct THiveLocationParams {
1: optional string write_path
2: optional string target_path
3: optional Types.TFileType file_type
}
struct TSortedColumn {
@ -298,11 +300,16 @@ struct THiveBucket {
4: optional list<TSortedColumn> sorted_by
}
enum THiveCompressionType {
SNAPPY = 3,
LZ4 = 4,
ZLIB = 6,
ZSTD = 7,
enum THiveColumnType {
PARTITION_KEY = 0,
REGULAR = 1,
SYNTHESIZED = 2
}
struct THiveColumn {
1: optional string name
2: optional Types.TTypeDesc data_type
3: optional THiveColumnType column_type
}
struct THivePartition {
@ -314,13 +321,14 @@ struct THivePartition {
struct THiveTableSink {
1: optional string db_name
2: optional string table_name
3: optional list<string> data_column_names
4: optional list<string> partition_column_names
5: optional list<THivePartition> partitions
6: optional list<THiveBucket> buckets
7: optional PlanNodes.TFileFormatType file_format
8: optional THiveCompressionType compression_type
9: optional THiveLocationParams location
3: optional list<THiveColumn> columns
4: optional list<THivePartition> partitions
5: optional THiveBucket bucket_info
6: optional PlanNodes.TFileFormatType file_format
7: optional PlanNodes.TFileCompressType compression_type
8: optional THiveLocationParams location
9: optional map<string, string> hadoop_config
10: optional bool overwrite
}
enum TUpdateMode {

View File

@ -29,6 +29,7 @@ include "Exprs.thrift"
include "RuntimeProfile.thrift"
include "MasterService.thrift"
include "AgentService.thrift"
include "DataSinks.thrift"
// These are supporting structs for JniFrontend.java, which serves as the glue
// between our C++ execution environment and the Java frontend.
@ -481,6 +482,8 @@ struct TReportExecStatusParams {
24: optional TQueryStatistics query_statistics // deprecated
25: optional TReportWorkloadRuntimeStatusParams report_workload_runtime_status
26: optional list<DataSinks.THivePartitionUpdate> hive_partition_updates
}
struct TFeResult {

View File

@ -43,7 +43,13 @@ enum TPartitionType {
BUCKET_SHFFULE_HASH_PARTITIONED,
// used for shuffle data by parititon and tablet
TABLET_SINK_SHUFFLE_PARTITIONED
TABLET_SINK_SHUFFLE_PARTITIONED,
// used for shuffle data by hive parititon
TABLE_SINK_HASH_PARTITIONED,
// used for hive unparititoned table
TABLE_SINK_RANDOM_PARTITIONED
}
enum TDistributionType {

View File

@ -138,7 +138,9 @@ enum TFileCompressType {
DEFLATE,
LZOP,
LZ4BLOCK,
SNAPPYBLOCK
SNAPPYBLOCK,
ZLIB,
ZSTD
}
struct THdfsConf {