[Fix](hive-writer) Fix hive partition update file size and remove redundant column names. (#34651) (#34885)

Backport #34651.
This commit is contained in:
Qi Chen
2024-05-15 11:23:32 +08:00
committed by GitHub
parent 30256195c3
commit e13ce905cf
2 changed files with 4 additions and 4 deletions

View File

@ -66,7 +66,9 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* profile)
std::vector<std::string> column_names;
column_names.reserve(_columns.size());
for (int i = 0; i < _columns.size(); i++) {
column_names.emplace_back(_columns[i].name);
if (_non_write_columns_indices.find(i) == _non_write_columns_indices.end()) {
column_names.emplace_back(_columns[i].name);
}
}
switch (_file_format_type) {
@ -157,7 +159,6 @@ Status VHivePartitionWriter::write(vectorized::Block& block, vectorized::IColumn
RETURN_IF_ERROR(_projection_and_filter_block(block, filter, &output_block));
RETURN_IF_ERROR(_file_format_transformer->write(output_block));
_row_count += output_block.rows();
_input_size_in_bytes += output_block.bytes();
return Status::OK();
}
@ -200,7 +201,7 @@ THivePartitionUpdate VHivePartitionWriter::_build_partition_update() {
hive_partition_update.__set_location(location);
hive_partition_update.__set_file_names({_get_target_file_name()});
hive_partition_update.__set_row_count(_row_count);
hive_partition_update.__set_file_size(_input_size_in_bytes);
hive_partition_update.__set_file_size(_file_format_transformer->written_len());
if (_write_info.file_type == TFileType::FILE_S3) {
doris::io::S3FileWriter* s3_mpu_file_writer =

View File

@ -88,7 +88,6 @@ private:
TUpdateMode::type _update_mode;
size_t _row_count = 0;
size_t _input_size_in_bytes = 0;
const VExprContextSPtrs& _vec_output_expr_ctxs;
const VExprContextSPtrs& _write_output_expr_ctxs;