diff --git a/be/src/vec/exec/file_arrow_scanner.cpp b/be/src/vec/exec/file_arrow_scanner.cpp index 6d0167eba1..59a94399db 100644 --- a/be/src/vec/exec/file_arrow_scanner.cpp +++ b/be/src/vec/exec/file_arrow_scanner.cpp @@ -184,7 +184,7 @@ Status FileArrowScanner::_append_batch_to_block(Block* block) { auto& column_with_type_and_name = block->get_by_name(slot_desc->col_name()); RETURN_IF_ERROR(arrow_column_to_doris_column( array, _arrow_batch_cur_idx, column_with_type_and_name.column, - column_with_type_and_name.type, num_elements, _state->timezone())); + column_with_type_and_name.type, num_elements, _state->timezone_obj())); } _rows += num_elements; _arrow_batch_cur_idx += num_elements; diff --git a/be/src/vec/exec/varrow_scanner.cpp b/be/src/vec/exec/varrow_scanner.cpp index 9d2bfb8a52..3244de5b90 100644 --- a/be/src/vec/exec/varrow_scanner.cpp +++ b/be/src/vec/exec/varrow_scanner.cpp @@ -271,7 +271,7 @@ Status VArrowScanner::_append_batch_to_src_block(Block* block) { auto& column_with_type_and_name = block->get_by_name(slot_desc->col_name()); RETURN_IF_ERROR(arrow_column_to_doris_column( array, _arrow_batch_cur_idx, column_with_type_and_name.column, - column_with_type_and_name.type, num_elements, _state->timezone())); + column_with_type_and_name.type, num_elements, _state->timezone_obj())); } _arrow_batch_cur_idx += num_elements; diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp b/be/src/vec/utils/arrow_column_to_doris_column.cpp index 5923f72f16..3d851d14e1 100644 --- a/be/src/vec/utils/arrow_column_to_doris_column.cpp +++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp @@ -185,7 +185,7 @@ static int64_t time_unit_divisor(arrow::TimeUnit::type unit) { template static Status convert_column_with_timestamp_data(const arrow::Array* array, size_t array_idx, MutableColumnPtr& data_column, size_t num_elements, - const std::string& timezone) { + const cctz::time_zone& ctz) { auto& column_data = static_cast&>(*data_column).get_data(); auto concrete_array = down_cast(array); int64_t divisor = 1; @@ -205,7 +205,7 @@ static Status convert_column_with_timestamp_data(const arrow::Array* array, size for (size_t value_i = array_idx; value_i < array_idx + num_elements; ++value_i) { VecDateTimeValue v; v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, - timezone); + ctz); if constexpr (std::is_same_v) { v.cast_to_date(); } @@ -217,7 +217,7 @@ static Status convert_column_with_timestamp_data(const arrow::Array* array, size template static Status convert_column_with_date_v2_data(const arrow::Array* array, size_t array_idx, MutableColumnPtr& data_column, size_t num_elements, - const std::string& timezone) { + const cctz::time_zone& ctz) { auto& column_data = static_cast&>(*data_column).get_data(); auto concrete_array = down_cast(array); int64_t divisor = 1; @@ -237,7 +237,7 @@ static Status convert_column_with_date_v2_data(const arrow::Array* array, size_t for (size_t value_i = array_idx; value_i < array_idx + num_elements; ++value_i) { DateV2Value v; v.from_unixtime(static_cast(concrete_array->Value(value_i)) / divisor * multiplier, - timezone); + ctz); column_data.emplace_back(binary_cast(v)); } return Status::OK(); @@ -286,7 +286,7 @@ static Status convert_offset_from_list_column(const arrow::Array* array, size_t static Status convert_column_with_list_data(const arrow::Array* array, size_t array_idx, MutableColumnPtr& data_column, size_t num_elements, - const std::string& timezone, + const cctz::time_zone& ctz, const DataTypePtr& nested_type) { size_t start_idx_of_data = 0; size_t num_of_data = 0; @@ -298,12 +298,22 @@ static Status convert_column_with_list_data(const arrow::Array* array, size_t ar std::shared_ptr arrow_data = concrete_array->values(); return arrow_column_to_doris_column(arrow_data.get(), start_idx_of_data, data_column_ptr, - nested_type, num_of_data, timezone); + nested_type, num_of_data, ctz); +} + +// For convenient unit test. Not use this in formal code. +Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, + ColumnPtr& doris_column, const DataTypePtr& type, + size_t num_elements, const std::string& timezone) { + cctz::time_zone ctz; + TimezoneUtils::find_cctz_time_zone(timezone, ctz); + return arrow_column_to_doris_column(arrow_column, arrow_batch_cur_idx, doris_column, type, + num_elements, ctz); } Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, - size_t num_elements, const std::string& timezone) { + size_t num_elements, const cctz::time_zone& ctz) { // src column always be nullable for simpify converting CHECK(doris_column->is_nullable()); MutableColumnPtr data_column = nullptr; @@ -333,24 +343,24 @@ Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arr case arrow::Type::DATE32: if (which_type.is_date_v2()) { return convert_column_with_date_v2_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, timezone); + arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); } else { return convert_column_with_timestamp_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, timezone); + arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); } case arrow::Type::DATE64: return convert_column_with_timestamp_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, timezone); + arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); case arrow::Type::TIMESTAMP: return convert_column_with_timestamp_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, timezone); + arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz); case arrow::Type::DECIMAL: return convert_column_with_decimal_data(arrow_column, arrow_batch_cur_idx, data_column, num_elements); case arrow::Type::LIST: CHECK(type->have_subtypes()); return convert_column_with_list_data( - arrow_column, arrow_batch_cur_idx, data_column, num_elements, timezone, + arrow_column, arrow_batch_cur_idx, data_column, num_elements, ctz, (reinterpret_cast(type.get()))->get_nested_type()); default: break; diff --git a/be/src/vec/utils/arrow_column_to_doris_column.h b/be/src/vec/utils/arrow_column_to_doris_column.h index 2c9189b1be..9d5f077672 100644 --- a/be/src/vec/utils/arrow_column_to_doris_column.h +++ b/be/src/vec/utils/arrow_column_to_doris_column.h @@ -33,8 +33,13 @@ namespace doris::vectorized { const PrimitiveType arrow_type_to_primitive_type(::arrow::Type::type type); +// For convenient unit test. Not use this in formal code. Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, size_t num_elements, const std::string& timezone); +Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, + ColumnPtr& doris_column, const DataTypePtr& type, + size_t num_elements, const cctz::time_zone& ctz); + } // namespace doris::vectorized