diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 54f77ecc6f..fba73247f9 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -326,7 +326,8 @@ Status DataTypeArraySerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeArraySerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeArraySerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { orc::ListVectorBatch* cur_batch = dynamic_cast(orc_col_batch); @@ -342,7 +343,7 @@ Status DataTypeArraySerDe::write_column_to_orc(const IColumn& column, const Null size_t next_offset = offsets[row_id]; if (cur_batch->notNull[row_id] == 1) { - static_cast(nested_serde->write_column_to_orc(nested_column, nullptr, + static_cast(nested_serde->write_column_to_orc(timezone, nested_column, nullptr, cur_batch->elements.get(), offset, next_offset, buffer_list)); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index 531187e762..b69c05c743 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -89,8 +89,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; void set_return_object_as_string(bool value) override { diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp b/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp index af50281811..5ce7aa5a1f 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp @@ -139,7 +139,8 @@ Status DataTypeBitMapSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeBitMapSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeBitMapSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index 7364a22326..93a9b74d7d 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -80,8 +80,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp index 23398e4b81..1835f7365c 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -271,7 +271,8 @@ Status DataTypeDate64SerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeDate64SerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeDate64SerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index 4374684d7b..f59d1628d0 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -65,8 +65,9 @@ class DataTypeDate64SerDe : public DataTypeNumberSerDe { Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 1dc7c31b19..b689c51e6f 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -26,6 +26,9 @@ namespace doris { namespace vectorized { +static const int64_t timestamp_threshold = -2177481943; +static const int64_t timestamp_diff = 343; +static const int64_t micr_to_nano_second = 1000; Status DataTypeDateTimeV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, BufferWritable& bw, @@ -140,47 +143,37 @@ Status DataTypeDateTimeV2SerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeDateTimeV2SerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone, + const IColumn& column, const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { - auto& col_data = assert_cast&>(column).get_data(); - orc::StringVectorBatch* cur_batch = dynamic_cast(orc_col_batch); - - char* ptr = (char*)malloc(BUFFER_UNIT_SIZE); - if (!ptr) { - return Status::InternalError( - "malloc memory error when write largeint column data to orc file."); - } - StringRef bufferRef; - bufferRef.data = ptr; - bufferRef.size = BUFFER_UNIT_SIZE; - size_t offset = 0; - const size_t begin_off = offset; + const auto& col_data = assert_cast&>(column).get_data(); + auto* cur_batch = dynamic_cast(orc_col_batch); for (size_t row_id = start; row_id < end; row_id++) { if (cur_batch->notNull[row_id] == 0) { continue; } - int len = binary_cast>(col_data[row_id]) - .to_buffer(const_cast(bufferRef.data) + offset, scale); - - REALLOC_MEMORY_FOR_ORC_WRITER() - - cur_batch->length[row_id] = len; - offset += len; - } - - size_t data_off = 0; - for (size_t row_id = start; row_id < end; row_id++) { - if (cur_batch->notNull[row_id] == 1) { - cur_batch->data[row_id] = const_cast(bufferRef.data) + begin_off + data_off; - data_off += cur_batch->length[row_id]; + int64_t timestamp = 0; + DateV2Value datetime_val = + binary_cast>(col_data[row_id]); + if (!datetime_val.unix_timestamp(×tamp, timezone)) { + return Status::InternalError("get unix timestamp error."); } - } - buffer_list.emplace_back(bufferRef); + // -2177481943 represent '1900-12-31 23:54:17' + // but -2177481944 represent '1900-12-31 23:59:59' + // so for timestamp <= -2177481944, we subtract 343 (5min 43s) + // Reference: https://www.timeanddate.com/time/change/china/shanghai?year=1900 + if (timezone == TimezoneUtils::default_time_zone && timestamp < timestamp_threshold) { + timestamp -= timestamp_diff; + } + + cur_batch->data[row_id] = timestamp; + cur_batch->nanoseconds[row_id] = datetime_val.microsecond() * micr_to_nano_second; + } cur_batch->numElements = end - start; return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index c5695c074d..c03135f911 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -74,8 +74,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index 0b10155fe9..8726edd1e3 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -144,7 +144,8 @@ Status DataTypeDateV2SerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeDateV2SerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index e235f8416e..423e7209bd 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -66,8 +66,9 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe { Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index dae309119b..f3b7260d34 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -242,7 +242,8 @@ Status DataTypeDecimalSerDe::write_column_to_mysql(const IColumn& column, } template -Status DataTypeDecimalSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeDecimalSerDe::write_column_to_orc(const std::string& timezone, + const IColumn& column, const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 4843a6b90e..6792802d42 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -107,8 +107,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h index 82c8f456b9..5b8b107736 100644 --- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h +++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h @@ -100,8 +100,9 @@ public: return Status::NotSupported("write_column_to_pb with type " + column.get_name()); } - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override { return Status::NotSupported("write_column_to_orc with type [{}]", column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index eb0cb30919..0fae2ef409 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -186,7 +186,8 @@ Status DataTypeHLLSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeHLLSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeHLLSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { auto& col_data = assert_cast(column); diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index c4aac03c34..bd451c2f90 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -66,8 +66,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index e1d8cb40a5..2a3ed51f0c 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -115,7 +115,8 @@ void DataTypeJsonbSerDe::write_column_to_arrow(const IColumn& column, const Null } } -Status DataTypeJsonbSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeJsonbSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { return Status::NotSupported("write_column_to_orc with type [{}]", column.get_name()); diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 58b32dc462..7ff9b85cdf 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -56,8 +56,9 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { int* num_deserialized, const FormatOptions& options, int nesting_level = 1) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index d432793da4..38cd955f7c 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -476,7 +476,8 @@ Status DataTypeMapSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeMapSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeMapSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { orc::MapVectorBatch* cur_batch = dynamic_cast(orc_col_batch); @@ -495,12 +496,12 @@ Status DataTypeMapSerDe::write_column_to_orc(const IColumn& column, const NullMa size_t next_offset = offsets[row_id]; if (cur_batch->notNull[row_id] == 1) { - static_cast(key_serde->write_column_to_orc(nested_keys_column, nullptr, + static_cast(key_serde->write_column_to_orc(timezone, nested_keys_column, nullptr, cur_batch->keys.get(), offset, next_offset, buffer_list)); - static_cast(value_serde->write_column_to_orc(nested_values_column, nullptr, - cur_batch->elements.get(), offset, - next_offset, buffer_list)); + static_cast(value_serde->write_column_to_orc(timezone, nested_values_column, + nullptr, cur_batch->elements.get(), + offset, next_offset, buffer_list)); } cur_batch->offsets[row_id + 1] = next_offset; diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index 18649f56fb..b6a35e25cf 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -87,8 +87,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; void set_return_object_as_string(bool value) override { diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 60517d0106..f5a299f410 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -317,7 +317,8 @@ Status DataTypeNullableSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeNullableSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone, + const IColumn& column, const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { @@ -332,9 +333,9 @@ Status DataTypeNullableSerDe::write_column_to_orc(const IColumn& column, const N // because orc_null_map begins at start and only has (end - start) elements memcpy(orc_col_batch->notNull.data() + start, orc_null_map.data(), end - start); - static_cast(nested_serde->write_column_to_orc(column_nullable.get_nested_column(), - &column_nullable.get_null_map_data(), - orc_col_batch, start, end, buffer_list)); + static_cast(nested_serde->write_column_to_orc( + timezone, column_nullable.get_nested_column(), &column_nullable.get_null_map_data(), + orc_col_batch, start, end, buffer_list)); return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 8044681402..3fd5fa2ed7 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -80,8 +80,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; void set_return_object_as_string(bool value) override { diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 1b2d87addb..f655ffa61e 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -274,14 +274,24 @@ Status DataTypeNumberSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } +#define WRITE_INTEGRAL_COLUMN_TO_ORC(ORC_TYPE) \ + ORC_TYPE* cur_batch = dynamic_cast(orc_col_batch); \ + for (size_t row_id = start; row_id < end; row_id++) { \ + if (cur_batch->notNull[row_id] == 1) { \ + cur_batch->data[row_id] = col_data[row_id]; \ + } \ + } \ + cur_batch->numElements = end - start; + template -Status DataTypeNumberSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeNumberSerDe::write_column_to_orc(const std::string& timezone, + const IColumn& column, const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { auto& col_data = assert_cast(column).get_data(); - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { // largeint orc::StringVectorBatch* cur_batch = dynamic_cast(orc_col_batch); char* ptr = (char*)malloc(BUFFER_UNIT_SIZE); @@ -317,25 +327,18 @@ Status DataTypeNumberSerDe::write_column_to_orc(const IColumn& column, const } buffer_list.emplace_back(bufferRef); cur_batch->numElements = end - start; - } else if constexpr ((std::is_integral::value && std::is_signed::value) || - std::is_same_v) { // tinyint/smallint/..int and boolean type - orc::LongVectorBatch* cur_batch = dynamic_cast(orc_col_batch); - - for (size_t row_id = start; row_id < end; row_id++) { - if (cur_batch->notNull[row_id] == 1) { - cur_batch->data[row_id] = col_data[row_id]; - } - } - cur_batch->numElements = end - start; - } else if constexpr (IsFloatNumber) { - orc::DoubleVectorBatch* cur_batch = dynamic_cast(orc_col_batch); - - for (size_t row_id = start; row_id < end; row_id++) { - if (cur_batch->notNull[row_id] == 1) { - cur_batch->data[row_id] = col_data[row_id]; - } - } - cur_batch->numElements = end - start; + } else if constexpr (std::is_same_v || std::is_same_v) { // tinyint/boolean + WRITE_INTEGRAL_COLUMN_TO_ORC(orc::ByteVectorBatch) + } else if constexpr (std::is_same_v) { // smallint + WRITE_INTEGRAL_COLUMN_TO_ORC(orc::ShortVectorBatch) + } else if constexpr (std::is_same_v) { // int + WRITE_INTEGRAL_COLUMN_TO_ORC(orc::IntVectorBatch) + } else if constexpr (std::is_same_v) { // bigint + WRITE_INTEGRAL_COLUMN_TO_ORC(orc::LongVectorBatch) + } else if constexpr (std::is_same_v) { // float + WRITE_INTEGRAL_COLUMN_TO_ORC(orc::FloatVectorBatch) + } else if constexpr (std::is_same_v) { // double + WRITE_INTEGRAL_COLUMN_TO_ORC(orc::DoubleVectorBatch) } return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 558eabee45..d27016d32d 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -88,8 +88,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index 62a900175f..32e9b3840b 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -21,7 +21,8 @@ namespace doris { namespace vectorized { -Status DataTypeObjectSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeObjectSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index 242833911a..43539c49ca 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -100,8 +100,9 @@ public: return Status::NotSupported("write_column_to_mysql with type " + column.get_name()); } - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; }; } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index b19d5ba25a..a72b62df78 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -121,8 +121,9 @@ public: return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override { return Status::NotSupported("write_column_to_orc with type [{}]", column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index a6487b1dc8..b8bc5c1f93 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -266,7 +266,8 @@ public: int end, const cctz::time_zone& ctz) const = 0; // ORC serializer - virtual Status write_column_to_orc(const IColumn& column, const NullMap* null_map, + virtual Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const = 0; // ORC deserializer diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp b/be/src/vec/data_types/serde/data_type_string_serde.cpp index 1e9c52ac67..662ac5f128 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp @@ -239,7 +239,8 @@ Status DataTypeStringSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeStringSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeStringSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index a2e5758bf8..e543a0f7a3 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -67,8 +67,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index 28e52944da..a73c54e524 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -370,7 +370,8 @@ Status DataTypeStructSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -Status DataTypeStructSerDe::write_column_to_orc(const IColumn& column, const NullMap* null_map, +Status DataTypeStructSerDe::write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, int start, int end, std::vector& buffer_list) const { @@ -381,8 +382,8 @@ Status DataTypeStructSerDe::write_column_to_orc(const IColumn& column, const Nul if (cur_batch->notNull[row_id] == 1) { for (int i = 0; i < struct_col.tuple_size(); ++i) { static_cast(elemSerDeSPtrs[i]->write_column_to_orc( - struct_col.get_column(i), nullptr, cur_batch->fields[i], row_id, row_id + 1, - buffer_list)); + timezone, struct_col.get_column(i), nullptr, cur_batch->fields[i], row_id, + row_id + 1, buffer_list)); } } else { // This else is necessary diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index ad9a0e43a4..4871bc4d37 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -157,8 +157,9 @@ public: Status write_column_to_mysql(const IColumn& column, MysqlRowBuffer& row_buffer, int row_idx, bool col_const) const override; - Status write_column_to_orc(const IColumn& column, const NullMap* null_map, - orc::ColumnVectorBatch* orc_col_batch, int start, int end, + Status write_column_to_orc(const std::string& timezone, const IColumn& column, + const NullMap* null_map, orc::ColumnVectorBatch* orc_col_batch, + int start, int end, std::vector& buffer_list) const override; void set_return_object_as_string(bool value) override { diff --git a/be/src/vec/runtime/vcsv_transformer.cpp b/be/src/vec/runtime/vcsv_transformer.cpp index d8ff48f6e4..6628f7743c 100644 --- a/be/src/vec/runtime/vcsv_transformer.cpp +++ b/be/src/vec/runtime/vcsv_transformer.cpp @@ -54,12 +54,12 @@ namespace doris::vectorized { -VCSVTransformer::VCSVTransformer(doris::io::FileWriter* file_writer, +VCSVTransformer::VCSVTransformer(RuntimeState* state, doris::io::FileWriter* file_writer, const VExprContextSPtrs& output_vexpr_ctxs, bool output_object_data, std::string_view header_type, std::string_view header, std::string_view column_separator, std::string_view line_delimiter) - : VFileFormatTransformer(output_vexpr_ctxs, output_object_data), + : VFileFormatTransformer(state, output_vexpr_ctxs, output_object_data), _column_separator(column_separator), _line_delimiter(line_delimiter), _file_writer(file_writer) { diff --git a/be/src/vec/runtime/vcsv_transformer.h b/be/src/vec/runtime/vcsv_transformer.h index 2cbe0e5b74..035a89e1ad 100644 --- a/be/src/vec/runtime/vcsv_transformer.h +++ b/be/src/vec/runtime/vcsv_transformer.h @@ -40,8 +40,9 @@ namespace doris::vectorized { class VCSVTransformer final : public VFileFormatTransformer { public: - VCSVTransformer(doris::io::FileWriter* file_writer, const VExprContextSPtrs& output_vexpr_ctxs, - bool output_object_data, std::string_view header_type, std::string_view header, + VCSVTransformer(RuntimeState* state, doris::io::FileWriter* file_writer, + const VExprContextSPtrs& output_vexpr_ctxs, bool output_object_data, + std::string_view header_type, std::string_view header, std::string_view column_separator, std::string_view line_delimiter); ~VCSVTransformer() = default; diff --git a/be/src/vec/runtime/vfile_format_transformer.h b/be/src/vec/runtime/vfile_format_transformer.h index dc2ded5386..f8c3bde271 100644 --- a/be/src/vec/runtime/vfile_format_transformer.h +++ b/be/src/vec/runtime/vfile_format_transformer.h @@ -31,8 +31,10 @@ namespace doris::vectorized { class VFileFormatTransformer { public: - VFileFormatTransformer(const VExprContextSPtrs& output_vexpr_ctxs, bool output_object_data) - : _output_vexpr_ctxs(output_vexpr_ctxs), + VFileFormatTransformer(RuntimeState* state, const VExprContextSPtrs& output_vexpr_ctxs, + bool output_object_data) + : _state(state), + _output_vexpr_ctxs(output_vexpr_ctxs), _cur_written_rows(0), _output_object_data(output_object_data) { DataTypes data_types; @@ -51,6 +53,7 @@ public: virtual int64_t written_len() = 0; protected: + RuntimeState* _state; // not owned, set when init const VExprContextSPtrs& _output_vexpr_ctxs; int64_t _cur_written_rows; bool _output_object_data; diff --git a/be/src/vec/runtime/vorc_transformer.cpp b/be/src/vec/runtime/vorc_transformer.cpp index fafe209375..0339a4467b 100644 --- a/be/src/vec/runtime/vorc_transformer.cpp +++ b/be/src/vec/runtime/vorc_transformer.cpp @@ -30,6 +30,7 @@ #include "orc/OrcFile.hh" #include "orc/Vector.hh" #include "runtime/define_primitive_type.h" +#include "runtime/runtime_state.h" #include "runtime/types.h" #include "util/binary_cast.hpp" #include "vec/columns/column.h" @@ -88,13 +89,16 @@ void VOrcOutputStream::set_written_len(int64_t written_len) { _written_len = written_len; } -VOrcTransformer::VOrcTransformer(doris::io::FileWriter* file_writer, +VOrcTransformer::VOrcTransformer(RuntimeState* state, doris::io::FileWriter* file_writer, const VExprContextSPtrs& output_vexpr_ctxs, const std::string& schema, bool output_object_data) - : VFileFormatTransformer(output_vexpr_ctxs, output_object_data), + : VFileFormatTransformer(state, output_vexpr_ctxs, output_object_data), _file_writer(file_writer), _write_options(new orc::WriterOptions()), - _schema_str(schema) {} + _schema_str(schema) { + _write_options->setTimezoneName(_state->timezone()); + _write_options->setUseTightNumericVector(true); +} Status VOrcTransformer::open() { try { @@ -103,7 +107,7 @@ Status VOrcTransformer::open() { return Status::InternalError("Orc build schema from \"{}\" failed: {}", _schema_str, e.what()); } - _output_stream = std::unique_ptr(new VOrcOutputStream(_file_writer)); + _output_stream = std::make_unique(_file_writer); _writer = orc::createWriter(*_schema, _output_stream.get(), *_write_options); if (_writer == nullptr) { return Status::InternalError("Failed to create file writer"); @@ -160,8 +164,8 @@ Status VOrcTransformer::write(const Block& block) { try { for (size_t i = 0; i < block.columns(); i++) { auto& raw_column = block.get_by_position(i).column; - RETURN_IF_ERROR(_serdes[i]->write_column_to_orc(*raw_column, nullptr, root->fields[i], - 0, sz, buffer_list)); + RETURN_IF_ERROR(_serdes[i]->write_column_to_orc( + _state->timezone(), *raw_column, nullptr, root->fields[i], 0, sz, buffer_list)); } } catch (const std::exception& e) { LOG(WARNING) << "Orc write error: " << e.what(); diff --git a/be/src/vec/runtime/vorc_transformer.h b/be/src/vec/runtime/vorc_transformer.h index 06a42361fb..7dd17dfd1a 100644 --- a/be/src/vec/runtime/vorc_transformer.h +++ b/be/src/vec/runtime/vorc_transformer.h @@ -74,8 +74,9 @@ private: // a wrapper of parquet output stream class VOrcTransformer final : public VFileFormatTransformer { public: - VOrcTransformer(doris::io::FileWriter* file_writer, const VExprContextSPtrs& output_vexpr_ctxs, - const std::string& schema, bool output_object_data); + VOrcTransformer(RuntimeState* state, doris::io::FileWriter* file_writer, + const VExprContextSPtrs& output_vexpr_ctxs, const std::string& schema, + bool output_object_data); ~VOrcTransformer() = default; diff --git a/be/src/vec/runtime/vparquet_transformer.cpp b/be/src/vec/runtime/vparquet_transformer.cpp index 43767db23a..a7b771c901 100644 --- a/be/src/vec/runtime/vparquet_transformer.cpp +++ b/be/src/vec/runtime/vparquet_transformer.cpp @@ -196,14 +196,14 @@ void ParquetBuildHelper::build_version(parquet::WriterProperties::Builder& build } } -VParquetTransformer::VParquetTransformer(doris::io::FileWriter* file_writer, +VParquetTransformer::VParquetTransformer(RuntimeState* state, doris::io::FileWriter* file_writer, const VExprContextSPtrs& output_vexpr_ctxs, const std::vector& parquet_schemas, const TParquetCompressionType::type& compression_type, const bool& parquet_disable_dictionary, const TParquetVersion::type& parquet_version, bool output_object_data) - : VFileFormatTransformer(output_vexpr_ctxs, output_object_data), + : VFileFormatTransformer(state, output_vexpr_ctxs, output_object_data), _parquet_schemas(parquet_schemas), _compression_type(compression_type), _parquet_disable_dictionary(parquet_disable_dictionary), diff --git a/be/src/vec/runtime/vparquet_transformer.h b/be/src/vec/runtime/vparquet_transformer.h index ec97f675c3..ad306c29e0 100644 --- a/be/src/vec/runtime/vparquet_transformer.h +++ b/be/src/vec/runtime/vparquet_transformer.h @@ -89,7 +89,7 @@ public: // a wrapper of parquet output stream class VParquetTransformer final : public VFileFormatTransformer { public: - VParquetTransformer(doris::io::FileWriter* file_writer, + VParquetTransformer(RuntimeState* state, doris::io::FileWriter* file_writer, const VExprContextSPtrs& output_vexpr_ctxs, const std::vector& parquet_schemas, const TParquetCompressionType::type& compression_type, diff --git a/be/src/vec/sink/writer/vfile_result_writer.cpp b/be/src/vec/sink/writer/vfile_result_writer.cpp index c6d6eb987b..9b3671e826 100644 --- a/be/src/vec/sink/writer/vfile_result_writer.cpp +++ b/be/src/vec/sink/writer/vfile_result_writer.cpp @@ -145,18 +145,19 @@ Status VFileResultWriter::_create_file_writer(const std::string& file_name) { switch (_file_opts->file_format) { case TFileFormatType::FORMAT_CSV_PLAIN: _vfile_writer.reset(new VCSVTransformer( - _file_writer_impl.get(), _vec_output_expr_ctxs, _output_object_data, _header_type, - _header, _file_opts->column_separator, _file_opts->line_delimiter)); + _state, _file_writer_impl.get(), _vec_output_expr_ctxs, _output_object_data, + _header_type, _header, _file_opts->column_separator, _file_opts->line_delimiter)); break; case TFileFormatType::FORMAT_PARQUET: _vfile_writer.reset(new VParquetTransformer( - _file_writer_impl.get(), _vec_output_expr_ctxs, _file_opts->parquet_schemas, + _state, _file_writer_impl.get(), _vec_output_expr_ctxs, _file_opts->parquet_schemas, _file_opts->parquet_commpression_type, _file_opts->parquert_disable_dictionary, _file_opts->parquet_version, _output_object_data)); break; case TFileFormatType::FORMAT_ORC: - _vfile_writer.reset(new VOrcTransformer(_file_writer_impl.get(), _vec_output_expr_ctxs, - _file_opts->orc_schema, _output_object_data)); + _vfile_writer.reset(new VOrcTransformer(_state, _file_writer_impl.get(), + _vec_output_expr_ctxs, _file_opts->orc_schema, + _output_object_data)); break; default: return Status::InternalError("unsupported file format: {}", _file_opts->file_format); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index 2560f5469f..7d41b25e2d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -295,10 +295,12 @@ public class OutFileClause { } orcType = "string"; break; + case DATETIMEV2: + orcType = "timestamp"; + break; case LARGEINT: case DATE: case DATETIME: - case DATETIMEV2: case DATEV2: case CHAR: case VARCHAR: @@ -421,10 +423,16 @@ public class OutFileClause { + " but the type of column " + i + " is " + schema.second); } break; + case DATETIMEV2: + if (!schema.second.equals("timestamp")) { + throw new AnalysisException("project field type is " + resultType.getPrimitiveType().toString() + + ", should use timestamp, but the definition type of column " + i + " is " + + schema.second); + } + break; case LARGEINT: case DATE: case DATETIME: - case DATETIMEV2: case DATEV2: case CHAR: case VARCHAR: diff --git a/regression-test/data/export_p0/outfile/orc/test_outfile_orc_timestamp.out b/regression-test/data/export_p0/outfile/orc/test_outfile_orc_timestamp.out new file mode 100644 index 0000000000..49793c4a42 --- /dev/null +++ b/regression-test/data/export_p0/outfile/orc/test_outfile_orc_timestamp.out @@ -0,0 +1,21 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_base -- +1 doris1 0000-01-01T00:00:00.100 0000-01-01T00:00:00.120 0000-01-01T00:00:00.123 0000-01-01T00:00:00.123400 0000-01-01T00:00:00.123450 0000-01-01T00:00:00.123456 +2 doris2 2000-07-03T00:00:00.100 2000-07-03T00:00:00.990 2000-07-03T00:00:00.111 2000-07-03T00:00:00.123400 2000-07-03T00:00:00.123450 2000-07-03T02:01:00.999999 +3 doris3 1070-07-03T00:00:00.100 1970-07-03T00:00:00.450 1970-07-03T00:00:00.999 2000-07-03T00:00:00.123400 1970-07-03T00:00:00.123450 1234-07-03T11:12:13.111111 +4 doris4 0021-01-01T12:23:59.100 1001-05-01T11:01:11 2000-11-03T12:12:21 1900-12-31T23:54:21.123400 1970-01-01T08:00:00.123450 1969-07-03T09:09:09.777123 +5 doris5 2000-07-03T00:00:00.100 2000-07-03T00:00:00.010 2000-07-03T00:00:00.555 1970-01-01T02:10:05.123400 0001-03-01T00:10:05.123450 0000-01-01T08:00:00.123456 +6 doris6 2000-07-03T00:00:00.100 1900-12-31T23:54:17.770 1900-12-31T23:54:16.321 2000-07-03T00:00:00.123400 1900-12-31T23:54:19.123450 2000-07-03T00:00:00.123456 +7 \N 2000-07-03T00:00:00.100 2000-07-03T00:00:00.120 1900-12-31T23:54:20.123 2000-07-03T00:00:00.123400 2000-07-03T00:00:00.123450 2000-07-03T00:00:00.123456 +8 \N 2000-07-03T00:00:00.100 1969-07-03T00:00:00.120 2000-07-03T00:00:00.123 2000-07-03T00:00:00.123400 2000-07-03T00:00:00.123450 2000-07-03T00:00:00.123456 + +-- !select_tvf1 -- +1 doris1 0000-01-01T00:00:00.100 0000-01-01T00:00:00.120 0000-01-01T00:00:00.123 0000-01-01T00:00:00.123400 0000-01-01T00:00:00.123450 0000-01-01T00:00:00.123456 +2 doris2 2000-07-03T00:00:00.100 2000-07-03T00:00:00.990 2000-07-03T00:00:00.111 2000-07-03T00:00:00.123400 2000-07-03T00:00:00.123450 2000-07-03T02:01:00.999999 +3 doris3 1070-07-03T00:00:00.100 1970-07-03T00:00:00.450 1970-07-03T00:00:00.999 2000-07-03T00:00:00.123400 1970-07-03T00:00:00.123450 1234-07-03T11:12:13.111111 +4 doris4 0021-01-01T12:23:59.100 1001-05-01T11:01:11 2000-11-03T12:12:21 1900-12-31T23:54:21.123400 1970-01-01T08:00:00.123450 1969-07-03T09:09:09.777123 +5 doris5 2000-07-03T00:00:00.100 2000-07-03T00:00:00.010 2000-07-03T00:00:00.555 1970-01-01T02:10:05.123400 0001-03-01T00:10:05.123450 0000-01-01T08:00:00.123456 +6 doris6 2000-07-03T00:00:00.100 1900-12-31T23:54:17.770 1900-12-31T23:54:16.321 2000-07-03T00:00:00.123400 1900-12-31T23:54:19.123450 2000-07-03T00:00:00.123456 +7 \N 2000-07-03T00:00:00.100 2000-07-03T00:00:00.120 1900-12-31T23:54:20.123 2000-07-03T00:00:00.123400 2000-07-03T00:00:00.123450 2000-07-03T00:00:00.123456 +8 \N 2000-07-03T00:00:00.100 1969-07-03T00:00:00.120 2000-07-03T00:00:00.123 2000-07-03T00:00:00.123400 2000-07-03T00:00:00.123450 2000-07-03T00:00:00.123456 + diff --git a/regression-test/data/export_p0/test_outfile_orc_array_type.out b/regression-test/data/export_p0/test_outfile_orc_array_type.out index d05e8d17a4..5f292e4b66 100644 --- a/regression-test/data/export_p0/test_outfile_orc_array_type.out +++ b/regression-test/data/export_p0/test_outfile_orc_array_type.out @@ -60,12 +60,12 @@ 7 doris7 [NULL, NULL, NULL, 2017-10-01 00:00:00, 2011-10-01 01:23:59] -- !select_load_datetime -- -1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] -2 doris2 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] +1 doris1 [2017-10-01 00:00:00.000000, 2011-10-01 01:23:59.000000] +2 doris2 [2017-10-01 00:00:00.000000, 2011-10-01 01:23:59.000000] 3 doris3 [] -5 doris5 ["2017-10-01 00:00:00", NULL, "2017-10-01 00:00:00"] +5 doris5 [2017-10-01 00:00:00.000000, NULL, 2017-10-01 00:00:00.000000] 6 doris6 [NULL, NULL, NULL] -7 doris7 [NULL, NULL, NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"] +7 doris7 [NULL, NULL, NULL, 2017-10-01 00:00:00.000000, 2011-10-01 01:23:59.000000] -- !select_base_varchar -- 1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] diff --git a/regression-test/data/export_p0/test_outfile_orc_complex_type.out b/regression-test/data/export_p0/test_outfile_orc_complex_type.out index 786179b6a6..f077910d99 100644 --- a/regression-test/data/export_p0/test_outfile_orc_complex_type.out +++ b/regression-test/data/export_p0/test_outfile_orc_complex_type.out @@ -116,14 +116,14 @@ 10 doris_10 {10, 2017-10-01, 2017-10-01 00:00:00, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} -- !select_load1 -- -1 doris_1 {1, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 1, 1, 1, 1, 1, "1", 1.1, 1.1, "char1_1234", 1} -2 doris_2 {2, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 2, 2, 1, 2, 2, "2", 2.2, 2.2, "char2_1234", 2} -3 doris_3 {3, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 3, 3, 1, 3, 3, "3", 3.3, 3.3, "char3_1234", 3} -4 doris_4 {4, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 4, 4, 1, 4, 4, "4", 4.4, 4.4, "char4_1234", 4} -5 doris_5 {5, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 5, 5, 1, 5, 5, "5", 5.5, 5.5, "char5_1234", 5} -6 doris_6 {6, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 6, 6, 1, 6, 6, "6", 6.6, 6.6, "char6_1234", 6} -7 doris_7 {7, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 7, 7, 1, 7, 7, "7", 7.7, 7.7, "char7_1234", 7} -8 doris_8 {8, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 8, 8, 1, 8, 8, "8", 8.8, 8.8, "char8_1234", 8} -9 doris_9 {9, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 9, 9, 1, 9, 9, "9", 9.9, 9.9, "char9_1234", 9} -10 doris_10 {10, "2017-10-01", "2017-10-01 00:00:00", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} +1 doris_1 {1, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 1, 1, 1, 1, 1, "1", 1.1, 1.1, "char1_1234", 1} +2 doris_2 {2, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 2, 2, 1, 2, 2, "2", 2.2, 2.2, "char2_1234", 2} +3 doris_3 {3, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 3, 3, 1, 3, 3, "3", 3.3, 3.3, "char3_1234", 3} +4 doris_4 {4, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 4, 4, 1, 4, 4, "4", 4.4, 4.4, "char4_1234", 4} +5 doris_5 {5, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 5, 5, 1, 5, 5, "5", 5.5, 5.5, "char5_1234", 5} +6 doris_6 {6, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 6, 6, 1, 6, 6, "6", 6.6, 6.6, "char6_1234", 6} +7 doris_7 {7, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 7, 7, 1, 7, 7, "7", 7.7, 7.7, "char7_1234", 7} +8 doris_8 {8, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 8, 8, 1, 8, 8, "8", 8.8, 8.8, "char8_1234", 8} +9 doris_9 {9, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 9, 9, 1, 9, 9, "9", 9.9, 9.9, "char9_1234", 9} +10 doris_10 {10, "2017-10-01", 2017-10-01 00:00:00.000000, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} diff --git a/regression-test/data/export_p0/test_outfile_orc_map_type.out b/regression-test/data/export_p0/test_outfile_orc_map_type.out index 1e2fae7eac..b6a0231f47 100644 --- a/regression-test/data/export_p0/test_outfile_orc_map_type.out +++ b/regression-test/data/export_p0/test_outfile_orc_map_type.out @@ -208,16 +208,16 @@ 10 doris10 {2003-04-29 01:02:03:"a", 2006-02-22 02:01:04:"max_largeint", 2020-03-21 19:21:23:"b"} -- !select_load10 -- -1 doris1 {"2023-04-20 01:02:03":"null", "2018-04-20 10:40:35":"b"} -2 doris2 {"2000-04-20 00:00:00":"a", "1967-12-31 12:24:56":"b"} -3 doris3 {NULL:"a", "2023-01-01 00:00:00":"b", "2023-02-27 00:01:02":"d"} +1 doris1 {2023-04-20 01:02:03.000000:"null", 2018-04-20 10:40:35.000000:"b"} +2 doris2 {2000-04-20 00:00:00.000000:"a", 1967-12-31 12:24:56.000000:"b"} +3 doris3 {NULL:"a", 2023-01-01 00:00:00.000000:"b", 2023-02-27 00:01:02.000000:"d"} 4 doris4 {NULL:NULL, NULL:NULL} 5 doris5 {NULL:"100", NULL:"b"} 6 \N \N 7 doris7 \N -8 doris8 {"2025-12-31 12:01:41":"min_largeint", "2006-02-19 09:01:02":"max_largeint"} -9 doris9 {"0209-04-20 00:00:00":"min_largeint", "0102-03-21 00:00:00":"b"} -10 doris10 {"2003-04-29 01:02:03":"a", "2006-02-22 02:01:04":"max_largeint", "2020-03-21 19:21:23":"b"} +8 doris8 {2025-12-31 12:01:41.000000:"min_largeint", 2006-02-19 09:01:02.000000:"max_largeint"} +9 doris9 {0209-04-20 00:00:00.000000:"min_largeint", 0102-03-21 00:00:00.000000:"b"} +10 doris10 {2003-04-29 01:02:03.000000:"a", 2006-02-22 02:01:04.000000:"max_largeint", 2020-03-21 19:21:23.000000:"b"} -- !select_base11 -- 1 doris1 {2023-04-20 01:02:03:NULL, 2018-04-20 10:40:35:123} @@ -230,14 +230,14 @@ 8 doris8 {2025-12-31 12:01:41:524524, 2006-02-19 09:01:02:2534} -- !select_load11 -- -1 doris1 {"2023-04-20 01:02:03":NULL, "2018-04-20 10:40:35":123} -2 doris2 {"2000-04-20 00:00:00":-2147483648, "1967-12-31 12:24:56":2147483647} -3 doris3 {NULL:4574, "2023-01-01 00:00:00":1246, "2023-02-27 00:01:02":5646} +1 doris1 {2023-04-20 01:02:03.000000:NULL, 2018-04-20 10:40:35.000000:123} +2 doris2 {2000-04-20 00:00:00.000000:-2147483648, 1967-12-31 12:24:56.000000:2147483647} +3 doris3 {NULL:4574, 2023-01-01 00:00:00.000000:1246, 2023-02-27 00:01:02.000000:5646} 4 doris4 {NULL:NULL, NULL:NULL} 5 doris5 {NULL:87676, NULL:234} 6 \N \N 7 doris7 \N -8 doris8 {"2025-12-31 12:01:41":524524, "2006-02-19 09:01:02":2534} +8 doris8 {2025-12-31 12:01:41.000000:524524, 2006-02-19 09:01:02.000000:2534} -- !select_base12 -- 1 doris1 {2023-04-20:NULL, 2018-04-20:123} @@ -290,14 +290,14 @@ 8 doris8 {2025-12-31 11:22:33:"min_largeint", 2006-02-19 00:44:55:"max_largeint"} -- !select_load14 -- -1 doris1 {"2023-04-20 12:20:03":"null", "2018-04-20 12:59:59":NULL} -2 doris2 {"2000-04-20 23:59:59":"-2147483648", "1967-12-31 00:00:00":"2147483647"} -3 doris3 {NULL:"4574", "2023-01-01 07:24:54":"1246", "2023-02-27 15:12:13":"5646"} +1 doris1 {2023-04-20 12:20:03.000000:"null", 2018-04-20 12:59:59.000000:NULL} +2 doris2 {2000-04-20 23:59:59.000000:"-2147483648", 1967-12-31 00:00:00.000000:"2147483647"} +3 doris3 {NULL:"4574", 2023-01-01 07:24:54.000000:"1246", 2023-02-27 15:12:13.000000:"5646"} 4 doris4 {NULL:NULL, NULL:NULL} 5 doris5 {NULL:"doris", NULL:"nereids"} 6 \N \N 7 doris7 \N -8 doris8 {"2025-12-31 11:22:33":"min_largeint", "2006-02-19 00:44:55":"max_largeint"} +8 doris8 {2025-12-31 11:22:33.000000:"min_largeint", 2006-02-19 00:44:55.000000:"max_largeint"} -- !select_base15 -- 1 doris1 {100:"null", 111:"b"} diff --git a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc.out b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc.out index 4a6bc2065c..0950edc92d 100644 --- a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc.out +++ b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc.out @@ -36,14 +36,14 @@ 4 0000-01-01 0000-01-01 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 4 4 true 4 4 4 4.4 4.4 char4 4 4 4 0.4 4.00000000 4.0000000000 4 4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000 -- !select_tvf2 -- -1 2023-04-20 2023-04-20 2023-04-20 00:00:00 2023-04-20 00:00:00 2023-04-20 00:00:00.000 2023-04-20 00:00:00.000000 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 -2 9999-12-31 9999-12-31 9999-12-31 23:59:59 9999-12-31 23:59:59 2023-04-20 00:00:00.120 2023-04-20 00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 -3 2023-04-21 2023-04-21 2023-04-20 12:34:56 2023-04-20 00:00:00 2023-04-20 00:00:00.123 2023-04-20 00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 -4 0000-01-01 0000-01-01 2023-04-20 00:00:00 2023-04-20 00:00:00 2023-04-20 00:00:00.000 2023-04-20 00:00:00.000000 Beijing Haidian 4 4 true 4 4 4 4.4 4.4 char4 4 4 4 0.4 4.00000000 4.0000000000 4 4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000 +1 2023-04-20 2023-04-20 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 +2 9999-12-31 9999-12-31 9999-12-31T23:59:59 9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 +3 2023-04-21 2023-04-21 2023-04-20T12:34:56 2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 +4 0000-01-01 0000-01-01 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing Haidian 4 4 true 4 4 4 4.4 4.4 char4 4 4 4 0.4 4.00000000 4.0000000000 4 4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000 -- !hive_docker_02 -- -1 2023-04-20 2023-04-20 2023-04-20 00:00:00 2023-04-20 00:00:00 2023-04-20 00:00:00.000 2023-04-20 00:00:00.000000 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 -2 9999-12-31 9999-12-31 9999-12-31 23:59:59 9999-12-31 23:59:59 2023-04-20 00:00:00.120 2023-04-20 00:00:00.334400 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 -3 2023-04-21 2023-04-21 2023-04-20 12:34:56 2023-04-20 00:00:00 2023-04-20 00:00:00.123 2023-04-20 00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235E38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 -4 0000-01-01 0000-01-01 2023-04-20 00:00:00 2023-04-20 00:00:00 2023-04-20 00:00:00.000 2023-04-20 00:00:00.000000 Beijing Haidian 4 4 true 4 4 4 4.4 4.4 char4 4 4 4 0.4 4.00000000 4.0000000000 4 4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000 +1 2023-04-20 2023-04-20 2023-04-20 00:00:00.0 2023-04-20 00:00:00.0 2023-04-20 00:00:00.0 2023-04-20 00:00:00.0 Beijing Haidian 1 1 true 1 1 1 1.1 1.1 char1 1 1 1 0.1 1.00000000 1.0000000000 1 1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000 +2 9999-12-31 9999-12-31 9999-12-31 23:59:59.0 9999-12-31 23:59:59.0 2023-04-20 00:00:00.12 2023-04-20 00:00:00.3344 Haidian -32768 -128 true -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1 0.99999999 9999999999.9999999999 99999999999999999999999999999999999999 9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999 +3 2023-04-21 2023-04-21 2023-04-20 12:34:56.0 2023-04-20 00:00:00.0 2023-04-20 00:00:00.123 2023-04-20 00:00:00.123456 Beijing 32767 127 true 2147483647 9223372036854775807 170141183460469231731687303715884105727 3.4028235E38 1.7976931348623157E308 char3 999999999 999999999 9 0.9 9.99999999 1234567890.0123456789 12345678901234567890123456789012345678 1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678 +4 0000-01-01 0000-01-01 2023-04-20 00:00:00.0 2023-04-20 00:00:00.0 2023-04-20 00:00:00.0 2023-04-20 00:00:00.0 Beijing Haidian 4 4 true 4 4 4 4.4 4.4 char4 4 4 4 0.4 4.00000000 4.0000000000 4 4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000 diff --git a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out index 77b38dc4d0..34eabd9095 100644 --- a/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out +++ b/regression-test/data/external_table_p0/export/hive_read/orc/test_hive_read_orc_complex_type.out @@ -99,16 +99,16 @@ 10 doris_10 {10, 2017-10-01, 2017-10-01 00:00:00, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} -- !select_tvf4 -- -1 doris_1 {1, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 1, 1, 1, 1, 1, "1", 1.1, 1.1, "char1_1234", 1} -2 doris_2 {2, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 2, 2, 1, 2, 2, "2", 2.2, 2.2, "char2_1234", 2} -3 doris_3 {3, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 3, 3, 1, 3, 3, "3", 3.3, 3.3, "char3_1234", 3} -4 doris_4 {4, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 4, 4, 1, 4, 4, "4", 4.4, 4.4, "char4_1234", 4} -5 doris_5 {5, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 5, 5, 1, 5, 5, "5", 5.5, 5.5, "char5_1234", 5} -6 doris_6 {6, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 6, 6, 1, 6, 6, "6", 6.6, 6.6, "char6_1234", 6} -7 doris_7 {7, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 7, 7, 1, 7, 7, "7", 7.7, 7.7, "char7_1234", 7} -8 doris_8 {8, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 8, 8, 1, 8, 8, "8", 8.8, 8.8, "char8_1234", 8} -9 doris_9 {9, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 9, 9, 1, 9, 9, "9", 9.9, 9.9, "char9_1234", 9} -10 doris_10 {10, "2017-10-01", "2017-10-01 00:00:00", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} +1 doris_1 {1, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 1, 1, 1, 1, 1, "1", 1.1, 1.1, "char1_1234", 1} +2 doris_2 {2, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 2, 2, 1, 2, 2, "2", 2.2, 2.2, "char2_1234", 2} +3 doris_3 {3, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 3, 3, 1, 3, 3, "3", 3.3, 3.3, "char3_1234", 3} +4 doris_4 {4, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 4, 4, 1, 4, 4, "4", 4.4, 4.4, "char4_1234", 4} +5 doris_5 {5, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 5, 5, 1, 5, 5, "5", 5.5, 5.5, "char5_1234", 5} +6 doris_6 {6, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 6, 6, 1, 6, 6, "6", 6.6, 6.6, "char6_1234", 6} +7 doris_7 {7, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 7, 7, 1, 7, 7, "7", 7.7, 7.7, "char7_1234", 7} +8 doris_8 {8, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 8, 8, 1, 8, 8, "8", 8.8, 8.8, "char8_1234", 8} +9 doris_9 {9, "2017-10-01", 2017-10-01 00:00:00.000000, "Beijing", 9, 9, 1, 9, 9, "9", 9.9, 9.9, "char9_1234", 9} +10 doris_10 {10, "2017-10-01", 2017-10-01 00:00:00.000000, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} -- !hive_docker_04 -- 1 doris_1 {"user_id":1,"date":"2017-10-01","datetime":"2017-10-01 00:00:00","city":"Beijing","age":1,"sex":1,"bool_col":true,"int_col":1,"bigint_col":1,"largeint_col":"1","float_col":1.1,"double_col":1.1,"char_col":"char1_1234","decimal_col":1} diff --git a/regression-test/suites/export_p0/outfile/orc/test_outfile_orc_timestamp.groovy b/regression-test/suites/export_p0/outfile/orc/test_outfile_orc_timestamp.groovy new file mode 100644 index 0000000000..1a11f0d80a --- /dev/null +++ b/regression-test/suites/export_p0/outfile/orc/test_outfile_orc_timestamp.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_outfile_orc_timestamp", "p0") { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + + def export_table_name = "outfile_orc_complex_type_export_test" + def outFilePath = "${bucket}/outfile/orc/complex_type/exp_" + + + def create_table = {table_name, struct_field -> + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `name` STRING COMMENT "用户年龄", + ${struct_field} + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + } + + def outfile_to_S3 = { + // select ... into outfile ... + def res = sql """ + SELECT * FROM ${export_table_name} t ORDER BY user_id + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS ORC + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + + return res[0][3] + } + + // 1. test datetimev2 NULL type + try { + + def field_define = """ + `datetime_1` datetimev2(1) NULL, + `datetime_2` datetimev2(2) NULL, + `datetime_3` datetimev2(3) NULL, + `datetime_4` datetimev2(4) NULL, + `datetime_5` datetimev2(5) NULL, + `datetime_6` datetimev2(6) NULL + """ + // create table to export data + create_table(export_table_name, field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', '0000-01-01 00:00:00.1', '0000-01-01 00:00:00.12', '0000-01-01 00:00:00.123', '0000-01-01 00:00:00.1234', '0000-01-01 00:00:00.12345', '0000-01-01 00:00:00.123456'); """ + sql """ insert into ${export_table_name} values (2, 'doris2', '2000-07-03 00:00:00.1', '2000-07-03 00:00:00.99', '2000-07-03 00:00:00.111', '2000-07-03 00:00:00.1234', '2000-07-03 00:00:00.12345', '2000-07-03 02:01:00.999999'); """ + sql """ insert into ${export_table_name} values (3, 'doris3', '1070-07-03 00:00:00.1', '1970-07-03 00:00:00.45', '1970-07-03 00:00:00.999', '2000-07-03 00:00:00.1234', '1970-07-03 00:00:00.12345', '1234-07-03 11:12:13.111111'); """ + sql """ insert into ${export_table_name} values (4, 'doris4', '0021-01-01 12:23:59.1', '1001-05-01 11:01:11.00', '2000-11-03 12:12:21.000', '1900-12-31 23:54:21.1234', '1970-01-01 08:00:00.12345', '1969-07-03 09:09:09.777123'); """ + sql """ insert into ${export_table_name} values (5, 'doris5', '2000-07-03 00:00:00.1', '2000-07-03 00:00:00.01', '2000-07-03 00:00:00.555', '1970-01-01 02:10:05.1234', '0001-03-01 00:10:05.12345', '0000-01-01 08:00:00.123456'); """ + sql """ insert into ${export_table_name} values (6, 'doris6', '2000-07-03 00:00:00.1', '1900-12-31 23:54:17.77', '1900-12-31 23:54:16.321', '2000-07-03 00:00:00.1234', '1900-12-31 23:54:19.12345', '2000-07-03 00:00:00.123456'); """ + sql """ insert into ${export_table_name} values (7, null, '2000-07-03 00:00:00.1', '2000-07-03 00:00:00.12', '1900-12-31 23:54:20.123', '2000-07-03 00:00:00.1234', '2000-07-03 00:00:00.12345', '2000-07-03 00:00:00.123456'); """ + sql """ insert into ${export_table_name} values (8, null, '2000-07-03 00:00:00.1', '1969-07-03 00:00:00.12', '2000-07-03 00:00:00.123', '2000-07-03 00:00:00.1234', '2000-07-03 00:00:00.12345', '2000-07-03 00:00:00.123456'); """ + + // test base data + qt_select_base """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + // test outfile to s3 + def outfile_url = outfile_to_S3() + + qt_select_tvf1 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.orc", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "orc", + "region" = "${region}" + ); + """ + + } finally { + } +} \ No newline at end of file