bp #38432 ## Proposed changes Add `hive_parquet_use_column_names` and `hive_orc_use_column_names` session variables to read the table after rename column in `Hive`. These two session variables are referenced from `parquet_use_column_names` and `orc_use_column_names` of `Trino` hive connector. By default, these two session variables are true. When they are set to false, reading orc/parquet will access the columns according to the ordinal position in the Hive table definition. For example: ```mysql in Hive : hive> create table tmp (a int , b string) stored as parquet; hive> insert into table tmp values(1,"2"); hive> alter table tmp change column a new_a int; hive> insert into table tmp values(2,"4"); in Doris : mysql> set hive_parquet_use_column_names=true; Query OK, 0 rows affected (0.00 sec) mysql> select * from tmp; +-------+------+ | new_a | b | +-------+------+ | NULL | 2 | | 2 | 4 | +-------+------+ 2 rows in set (0.02 sec) mysql> set hive_parquet_use_column_names=false; Query OK, 0 rows affected (0.00 sec) mysql> select * from tmp; +-------+------+ | new_a | b | +-------+------+ | 1 | 2 | | 2 | 4 | +-------+------+ 2 rows in set (0.02 sec) ``` You can use `set parquet.column.index.access/orc.force.positional.evolution = true/false` in hive 3 to control the results of reading the table like these two session variables. However, for the rename struct inside column parquet table, the effects of hive and doris are different.
This commit is contained in:
@ -421,6 +421,10 @@ public abstract class FileQueryScanNode extends FileScanNode {
|
||||
transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs);
|
||||
tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc);
|
||||
rangeDesc.setTableFormatParams(tableFormatFileDesc);
|
||||
} else if (fileSplit instanceof HiveSplit) {
|
||||
TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc();
|
||||
tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value());
|
||||
rangeDesc.setTableFormatParams(tableFormatFileDesc);
|
||||
}
|
||||
|
||||
setScanParams(rangeDesc, fileSplit);
|
||||
|
||||
@ -556,6 +556,10 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
public static final String ENABLE_PUSHDOWN_MINMAX_ON_UNIQUE = "enable_pushdown_minmax_on_unique";
|
||||
|
||||
public static final String HIVE_PARQUET_USE_COLUMN_NAMES = "hive_parquet_use_column_names";
|
||||
|
||||
public static final String HIVE_ORC_USE_COLUMN_NAMES = "hive_orc_use_column_names";
|
||||
|
||||
public static final String KEEP_CARRIAGE_RETURN = "keep_carriage_return";
|
||||
|
||||
public static final String ENABLE_PUSHDOWN_STRING_MINMAX = "enable_pushdown_string_minmax";
|
||||
@ -1770,11 +1774,25 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public int createTablePartitionMaxNum = 10000;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(name = HIVE_PARQUET_USE_COLUMN_NAMES,
|
||||
description = {"默认情况下按名称访问 Parquet 列。将此属性设置为“false”可按 Hive 表定义中的序号位置访问列。",
|
||||
"Access Parquet columns by name by default. Set this property to `false` to access columns "
|
||||
+ "by their ordinal position in the Hive table definition."})
|
||||
public boolean hiveParquetUseColumnNames = true;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(name = HIVE_ORC_USE_COLUMN_NAMES,
|
||||
description = {"默认情况下按名称访问 Orc 列。将此属性设置为“false”可按 Hive 表定义中的序号位置访问列。",
|
||||
"Access Parquet columns by name by default. Set this property to `false` to access columns "
|
||||
+ "by their ordinal position in the Hive table definition."})
|
||||
public boolean hiveOrcUseColumnNames = true;
|
||||
|
||||
@VariableMgr.VarAttr(name = KEEP_CARRIAGE_RETURN,
|
||||
description = {"在同时处理\r和\r\n作为CSV的行分隔符时,是否保留\r",
|
||||
"When processing both \\n and \\r\\n as CSV line separators, should \\r be retained?"})
|
||||
public boolean keepCarriageReturn = false;
|
||||
|
||||
|
||||
@VariableMgr.VarAttr(name = FORCE_JNI_SCANNER,
|
||||
description = {"强制使用jni方式读取外表", "Force the use of jni mode to read external table"})
|
||||
private boolean forceJniScanner = false;
|
||||
@ -3435,6 +3453,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
tResult.setReadCsvEmptyLineAsNull(readCsvEmptyLineAsNull);
|
||||
tResult.setSerdeDialect(getSerdeDialect());
|
||||
tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames);
|
||||
tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames);
|
||||
tResult.setKeepCarriageReturn(keepCarriageReturn);
|
||||
return tResult;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user