From 6f9a084d99dc377a2789c50817d87f997d3efd17 Mon Sep 17 00:00:00 2001 From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com> Date: Fri, 13 Oct 2023 13:58:34 +0800 Subject: [PATCH] [Fix](Outfile) Use data_type_serde to export data to `parquet` file format (#24998) --- be/src/util/arrow/block_convertor.cpp | 9 +- be/src/util/arrow/row_batch.cpp | 2 - be/src/util/arrow/row_batch.h | 4 + .../data_types/serde/data_type_map_serde.cpp | 5 +- .../serde/data_type_number_serde.cpp | 50 +- be/src/vec/runtime/vparquet_transformer.cpp | 735 +----------------- be/src/vec/runtime/vparquet_transformer.h | 21 +- .../serde/data_type_serde_arrow_test.cpp | 4 - .../apache/doris/analysis/OutFileClause.java | 166 +--- .../apache/doris/analysis/SelectStmtTest.java | 4 - .../outfile/parquet/test_outfile_parquet.out | 25 + .../test_outfile_parquet_array_type.out | 229 ++++++ .../test_outfile_parquet_complex_type.out | 129 +++ .../parquet/test_outfile_parquet_map_type.out | 397 ++++++++++ .../parquet}/test_outfile_parquet.groovy | 0 .../test_outfile_parquet_array_type.groovy | 532 +++++++++++++ .../test_outfile_parquet_complex_type.groovy | 313 ++++++++ .../test_outfile_parquet_map_type.groovy | 714 +++++++++++++++++ 18 files changed, 2446 insertions(+), 893 deletions(-) create mode 100644 regression-test/data/export_p0/outfile/parquet/test_outfile_parquet.out create mode 100644 regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_array_type.out create mode 100644 regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out create mode 100644 regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out rename regression-test/suites/export_p0/{ => outfile/parquet}/test_outfile_parquet.groovy (100%) create mode 100644 regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_array_type.groovy create mode 100644 regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy create mode 100644 regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_map_type.groovy diff --git a/be/src/util/arrow/block_convertor.cpp b/be/src/util/arrow/block_convertor.cpp index 149d9a9572..8cfd8bd1ba 100644 --- a/be/src/util/arrow/block_convertor.cpp +++ b/be/src/util/arrow/block_convertor.cpp @@ -389,8 +389,13 @@ Status FromBlockConverter::convert(std::shared_ptr* out) { return to_doris_status(arrow_st); } _cur_builder = builder.get(); - _cur_type->get_serde()->write_column_to_arrow(*_cur_col, nullptr, _cur_builder, _cur_start, - _cur_start + _cur_rows); + try { + _cur_type->get_serde()->write_column_to_arrow(*_cur_col, nullptr, _cur_builder, + _cur_start, _cur_start + _cur_rows); + } catch (std::exception& e) { + return Status::InternalError("Fail to convert block data to arrow data, error: {}", + e.what()); + } arrow_st = _cur_builder->Finish(&_arrays[_cur_field_idx]); if (!arrow_st.ok()) { return to_doris_status(arrow_st); diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index d78c9ebf1f..5136d0c6f0 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -68,8 +68,6 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr #include "common/status.h" +#include "runtime/types.h" // This file will convert Doris RowBatch to/from Arrow's RecordBatch // RowBatch is used by Doris query engine to exchange data between @@ -28,6 +29,7 @@ namespace arrow { +class DataType; class RecordBatch; class Schema; @@ -37,6 +39,8 @@ namespace doris { class RowDescriptor; +Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr* result); + // Convert Doris RowDescriptor to Arrow Schema. Status convert_to_arrow_schema(const RowDescriptor& row_desc, std::shared_ptr* result); diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index 063cfa1ee2..3ba9d809e4 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -18,6 +18,8 @@ #include "data_type_map_serde.h" #include "arrow/array/builder_nested.h" +#include "common/exception.h" +#include "common/status.h" #include "util/jsonb_document.h" #include "util/simd/bits.h" #include "vec/columns/column.h" @@ -347,7 +349,8 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const NullMa MutableColumnPtr value_mutable_data = nested_values_column.clone_empty(); for (size_t i = offsets[r - 1]; i < offsets[r]; ++i) { if (keys_nullmap_data[i] == 1) { - continue; + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, + "Can not write null value of map key to arrow."); } key_mutable_data->insert_from(nested_keys_column, i); value_mutable_data->insert_from(nested_values_column, i); diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index a5a915d15a..1b2d87addb 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -21,8 +21,11 @@ #include +#include "common/exception.h" +#include "common/status.h" #include "gutil/strings/numbers.h" #include "util/mysql_global.h" +#include "vec/core/types.h" #include "vec/io/io_helper.h" namespace doris { @@ -81,15 +84,20 @@ void DataTypeNumberSerDe::write_column_to_arrow(const IColumn& column, const end - start, reinterpret_cast(arrow_null_map_data)), column.get_name(), array_builder->type()->name()); - } else if constexpr (std::is_same_v || std::is_same_v) { - ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); - size_t fixed_length = sizeof(typename ColumnType::value_type); - const uint8_t* data_start = - reinterpret_cast(col_data.data()) + start * fixed_length; - checkArrowStatus( - builder.AppendValues(data_start, end - start, - reinterpret_cast(arrow_null_map_data)), - column.get_name(), array_builder->type()->name()); + } else if constexpr (std::is_same_v) { + auto& string_builder = assert_cast(*array_builder); + for (size_t i = start; i < end; ++i) { + auto& data_value = col_data[i]; + std::string value_str = fmt::format("{}", data_value); + if (null_map && (*null_map)[i]) { + checkArrowStatus(string_builder.AppendNull(), column.get_name(), + array_builder->type()->name()); + } else { + checkArrowStatus(string_builder.Append(value_str.data(), value_str.length()), + column.get_name(), array_builder->type()->name()); + } + } + } else if constexpr (std::is_same_v) { } else { ARROW_BUILDER_TYPE& builder = assert_cast(*array_builder); checkArrowStatus( @@ -192,6 +200,30 @@ void DataTypeNumberSerDe::read_column_from_arrow(IColumn& column, } return; } + + // only for largeint(int128) type + if (arrow_array->type_id() == arrow::Type::STRING) { + auto concrete_array = dynamic_cast(arrow_array); + std::shared_ptr buffer = concrete_array->value_data(); + + for (size_t offset_i = start; offset_i < end; ++offset_i) { + if (!concrete_array->IsNull(offset_i)) { + const auto* raw_data = buffer->data() + concrete_array->value_offset(offset_i); + const auto raw_data_len = concrete_array->value_length(offset_i); + + Int128 val = 0; + ReadBuffer rb(raw_data, raw_data_len); + if (!read_int_text_impl(val, rb)) { + throw doris::Exception(ErrorCode::INVALID_ARGUMENT, + "parse number fail, string: '{}'", + std::string(rb.position(), rb.count()).c_str()); + } + col_data.emplace_back(val); + } + } + return; + } + /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = arrow_array->data()->buffers[1]; const auto* raw_data = reinterpret_cast(buffer->data()) + start; diff --git a/be/src/vec/runtime/vparquet_transformer.cpp b/be/src/vec/runtime/vparquet_transformer.cpp index 7d1ceed404..43767db23a 100644 --- a/be/src/vec/runtime/vparquet_transformer.cpp +++ b/be/src/vec/runtime/vparquet_transformer.cpp @@ -18,6 +18,7 @@ #include "vec/runtime/vparquet_transformer.h" #include +#include #include #include #include @@ -40,6 +41,9 @@ #include "runtime/decimalv2_value.h" #include "runtime/define_primitive_type.h" #include "runtime/types.h" +#include "util/arrow/block_convertor.h" +#include "util/arrow/row_batch.h" +#include "util/arrow/utils.h" #include "util/binary_cast.hpp" #include "util/mysql_global.h" #include "util/types.h" @@ -63,10 +67,6 @@ namespace doris::vectorized { -static const std::string epoch_date_str = "1970-01-01"; -static const int64_t timestamp_threshold = -2177481943; -static const int64_t timestamp_diff = 343; - ParquetOutputStream::ParquetOutputStream(doris::io::FileWriter* file_writer) : _file_writer(file_writer), _cur_pos(0), _written_len(0) { set_mode(arrow::io::FileMode::WRITE); @@ -139,88 +139,6 @@ void ParquetBuildHelper::build_schema_repetition_type( } } -void ParquetBuildHelper::build_schema_data_type(parquet::Type::type& parquet_data_type, - const TParquetDataType::type& column_data_type) { - switch (column_data_type) { - case TParquetDataType::BOOLEAN: { - parquet_data_type = parquet::Type::BOOLEAN; - break; - } - case TParquetDataType::INT32: { - parquet_data_type = parquet::Type::INT32; - break; - } - case TParquetDataType::INT64: { - parquet_data_type = parquet::Type::INT64; - break; - } - case TParquetDataType::INT96: { - parquet_data_type = parquet::Type::INT96; - break; - } - case TParquetDataType::BYTE_ARRAY: { - parquet_data_type = parquet::Type::BYTE_ARRAY; - break; - } - case TParquetDataType::FLOAT: { - parquet_data_type = parquet::Type::FLOAT; - break; - } - case TParquetDataType::DOUBLE: { - parquet_data_type = parquet::Type::DOUBLE; - break; - } - case TParquetDataType::FIXED_LEN_BYTE_ARRAY: { - parquet_data_type = parquet::Type::FIXED_LEN_BYTE_ARRAY; - break; - } - default: - parquet_data_type = parquet::Type::UNDEFINED; - } -} - -void ParquetBuildHelper::build_schema_data_logical_type( - std::shared_ptr& parquet_data_logical_type_ptr, - const TParquetDataLogicalType::type& column_data_logical_type, int* primitive_length, - const TypeDescriptor& type_desc) { - switch (column_data_logical_type) { - case TParquetDataLogicalType::DECIMAL: { - DCHECK(type_desc.precision != -1 && type_desc.scale != -1) - << "precision and scale: " << type_desc.precision << " " << type_desc.scale; - if (type_desc.type == TYPE_DECIMAL32) { - *primitive_length = 4; - } else if (type_desc.type == TYPE_DECIMAL64) { - *primitive_length = 8; - } else if (type_desc.type == TYPE_DECIMAL128I) { - *primitive_length = 16; - } else { - throw parquet::ParquetException( - "the logical decimal now only support in decimalv3, maybe error of " + - type_desc.debug_string()); - } - parquet_data_logical_type_ptr = - parquet::LogicalType::Decimal(type_desc.precision, type_desc.scale); - break; - } - case TParquetDataLogicalType::STRING: { - parquet_data_logical_type_ptr = parquet::LogicalType::String(); - break; - } - case TParquetDataLogicalType::DATE: { - parquet_data_logical_type_ptr = parquet::LogicalType::Date(); - break; - } - case TParquetDataLogicalType::TIMESTAMP: { - parquet_data_logical_type_ptr = - parquet::LogicalType::Timestamp(true, parquet::LogicalType::TimeUnit::MILLIS, true); - break; - } - default: { - parquet_data_logical_type_ptr = parquet::LogicalType::None(); - } - } -} - void ParquetBuildHelper::build_compression_type( parquet::WriterProperties::Builder& builder, const TParquetCompressionType::type& compression_type) { @@ -286,7 +204,6 @@ VParquetTransformer::VParquetTransformer(doris::io::FileWriter* file_writer, const TParquetVersion::type& parquet_version, bool output_object_data) : VFileFormatTransformer(output_vexpr_ctxs, output_object_data), - _rg_writer(nullptr), _parquet_schemas(parquet_schemas), _compression_type(compression_type), _parquet_disable_dictionary(parquet_disable_dictionary), @@ -294,7 +211,7 @@ VParquetTransformer::VParquetTransformer(doris::io::FileWriter* file_writer, _outstream = std::shared_ptr(new ParquetOutputStream(file_writer)); } -Status VParquetTransformer::parse_properties() { +Status VParquetTransformer::_parse_properties() { try { parquet::WriterProperties::Builder builder; ParquetBuildHelper::build_compression_type(builder, _compression_type); @@ -304,613 +221,59 @@ Status VParquetTransformer::parse_properties() { } else { builder.enable_dictionary(); } - _properties = builder.build(); + _parquet_writer_properties = builder.build(); + _arrow_properties = parquet::ArrowWriterProperties::Builder().store_schema()->build(); } catch (const parquet::ParquetException& e) { return Status::InternalError("parquet writer parse properties error: {}", e.what()); } return Status::OK(); } -Status VParquetTransformer::parse_schema() { - parquet::schema::NodeVector fields; - parquet::Repetition::type parquet_repetition_type; - parquet::Type::type parquet_physical_type; - std::shared_ptr parquet_data_logical_type; - int primitive_length = -1; - for (int idx = 0; idx < _parquet_schemas.size(); ++idx) { - primitive_length = -1; - ParquetBuildHelper::build_schema_repetition_type( - parquet_repetition_type, _parquet_schemas[idx].schema_repetition_type); - ParquetBuildHelper::build_schema_data_type(parquet_physical_type, - _parquet_schemas[idx].schema_data_type); - ParquetBuildHelper::build_schema_data_logical_type( - parquet_data_logical_type, _parquet_schemas[idx].schema_data_logical_type, - &primitive_length, _output_vexpr_ctxs[idx]->root()->type()); - try { - fields.push_back(parquet::schema::PrimitiveNode::Make( - _parquet_schemas[idx].schema_column_name, parquet_repetition_type, - parquet_data_logical_type, parquet_physical_type, primitive_length)); - } catch (const parquet::ParquetException& e) { - LOG(WARNING) << "parquet writer parse schema error: " << e.what(); - return Status::InternalError("parquet writer parse schema error: {}", e.what()); - } - _schema = std::static_pointer_cast( - parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields)); +Status VParquetTransformer::_parse_schema() { + std::vector> fields; + for (size_t i = 0; i < _output_vexpr_ctxs.size(); i++) { + std::shared_ptr type; + RETURN_IF_ERROR(convert_to_arrow_type(_output_vexpr_ctxs[i]->root()->type(), &type)); + std::shared_ptr field = + arrow::field(_parquet_schemas[i].schema_column_name, type, + _output_vexpr_ctxs[i]->root()->is_nullable()); + fields.emplace_back(field); } + _arrow_schema = arrow::schema(std::move(fields)); return Status::OK(); } -#define RETURN_WRONG_TYPE \ - return Status::InvalidArgument("Invalid column type: {}", raw_column->get_name()); - -#define DISPATCH_PARQUET_NUMERIC_WRITER(WRITER, COLUMN_TYPE, NATIVE_TYPE) \ - parquet::RowGroupWriter* rgWriter = get_rg_writer(); \ - parquet::WRITER* col_writer = static_cast(rgWriter->column(i)); \ - if (null_map != nullptr) { \ - auto& null_data = assert_cast(*null_map).get_data(); \ - for (size_t row_id = 0; row_id < sz; row_id++) { \ - def_level[row_id] = null_data[row_id] == 0; \ - } \ - col_writer->WriteBatch(sz, def_level.data(), nullptr, \ - reinterpret_cast( \ - assert_cast(*col).get_data().data())); \ - } else if (const auto* not_nullable_column = check_and_get_column(col)) { \ - col_writer->WriteBatch( \ - sz, nullable ? def_level.data() : nullptr, nullptr, \ - reinterpret_cast(not_nullable_column->get_data().data())); \ - } else { \ - RETURN_WRONG_TYPE \ - } - -#define DISPATCH_PARQUET_COMPLEX_WRITER(COLUMN_TYPE) \ - parquet::RowGroupWriter* rgWriter = get_rg_writer(); \ - parquet::ByteArrayWriter* col_writer = \ - static_cast(rgWriter->column(i)); \ - if (null_map != nullptr) { \ - auto& null_data = assert_cast(*null_map).get_data(); \ - for (size_t row_id = 0; row_id < sz; row_id++) { \ - if (null_data[row_id] != 0) { \ - single_def_level = 0; \ - parquet::ByteArray value; \ - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); \ - single_def_level = 1; \ - } else { \ - const auto& tmp = col->get_data_at(row_id); \ - parquet::ByteArray value; \ - value.ptr = reinterpret_cast(tmp.data); \ - value.len = tmp.size; \ - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); \ - } \ - } \ - } else if (const auto* not_nullable_column = check_and_get_column(col)) { \ - for (size_t row_id = 0; row_id < sz; row_id++) { \ - const auto& tmp = not_nullable_column->get_data_at(row_id); \ - parquet::ByteArray value; \ - value.ptr = reinterpret_cast(tmp.data); \ - value.len = tmp.size; \ - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, &value); \ - } \ - } else { \ - RETURN_WRONG_TYPE \ - } - Status VParquetTransformer::write(const Block& block) { if (block.rows() == 0) { return Status::OK(); } - size_t sz = block.rows(); - try { - for (size_t i = 0; i < block.columns(); i++) { - auto& raw_column = block.get_by_position(i).column; - auto nullable = raw_column->is_nullable(); - const auto col = nullable ? reinterpret_cast( - block.get_by_position(i).column.get()) - ->get_nested_column_ptr() - .get() - : block.get_by_position(i).column.get(); - auto null_map = nullable && reinterpret_cast( - block.get_by_position(i).column.get()) - ->has_null() - ? reinterpret_cast( - block.get_by_position(i).column.get()) - ->get_null_map_column_ptr() - : nullptr; - auto& type = block.get_by_position(i).type; - std::vector def_level(sz); - // For scalar type, definition level == 1 means this value is not NULL. - std::fill(def_level.begin(), def_level.end(), 1); - int16_t single_def_level = 1; - switch (_output_vexpr_ctxs[i]->root()->type().type) { - case TYPE_BOOLEAN: { - DISPATCH_PARQUET_NUMERIC_WRITER(BoolWriter, ColumnVector, bool) - break; - } - case TYPE_BIGINT: { - DISPATCH_PARQUET_NUMERIC_WRITER(Int64Writer, ColumnVector, int64_t) - break; - } - case TYPE_LARGEINT: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::ByteArray value; - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - const int128_t tmp = assert_cast&>(*col) - .get_data()[row_id]; - std::string value_str = fmt::format("{}", tmp); - value.ptr = reinterpret_cast(value_str.data()); - value.len = value_str.length(); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else if (const auto* not_nullable_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - const int128_t tmp = not_nullable_column->get_data()[row_id]; - std::string value_str = fmt::format("{}", tmp); - value.ptr = reinterpret_cast(value_str.data()); - value.len = value_str.length(); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_FLOAT: { - DISPATCH_PARQUET_NUMERIC_WRITER(FloatWriter, ColumnVector, float_t) - break; - } - case TYPE_DOUBLE: { - DISPATCH_PARQUET_NUMERIC_WRITER(DoubleWriter, ColumnVector, double_t) - break; - } - case TYPE_TINYINT: - case TYPE_SMALLINT: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int32Writer* col_writer = - static_cast(rgWriter->column(i)); - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - if (const auto* int16_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - } - const int32_t tmp = int16_column->get_data()[row_id]; - col_writer->WriteBatch(1, &single_def_level, nullptr, - reinterpret_cast(&tmp)); - single_def_level = 1; - } - } else if (const auto* int8_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - } - const int32_t tmp = int8_column->get_data()[row_id]; - col_writer->WriteBatch(1, &single_def_level, nullptr, - reinterpret_cast(&tmp)); - single_def_level = 1; - } - } else { - RETURN_WRONG_TYPE - } - } else if (const auto& int16_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - const int32_t tmp = int16_column->get_data()[row_id]; - col_writer->WriteBatch(1, nullable ? def_level.data() : nullptr, nullptr, - reinterpret_cast(&tmp)); - } - } else if (const auto& int8_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - const int32_t tmp = int8_column->get_data()[row_id]; - col_writer->WriteBatch(1, nullable ? def_level.data() : nullptr, nullptr, - reinterpret_cast(&tmp)); - } - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_INT: { - DISPATCH_PARQUET_NUMERIC_WRITER(Int32Writer, ColumnVector, Int32) - break; - } - case TYPE_DATETIME: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int64Writer* col_writer = - static_cast(rgWriter->column(i)); - uint64_t default_int64 = 0; - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - for (size_t row_id = 0; row_id < sz; row_id++) { - def_level[row_id] = null_data[row_id] == 0; - } - int64_t tmp_data[sz]; - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - tmp_data[row_id] = default_int64; - } else { - VecDateTimeValue datetime_value = binary_cast( - assert_cast&>(*col) - .get_data()[row_id]); - if (!datetime_value.unix_timestamp(&tmp_data[row_id], - TimezoneUtils::default_time_zone)) { - return Status::InternalError("get unix timestamp error."); - } - // -2177481943 represent '1900-12-31 23:54:17' - // but -2177481944 represent '1900-12-31 23:59:59' - // so for timestamp <= -2177481944, we subtract 343 (5min 43s) - if (tmp_data[row_id] < timestamp_threshold) { - tmp_data[row_id] -= timestamp_diff; - } - // convert seconds to MILLIS seconds - tmp_data[row_id] *= 1000; - } - } - col_writer->WriteBatch(sz, def_level.data(), nullptr, - reinterpret_cast(tmp_data)); - } else if (const auto* not_nullable_column = - check_and_get_column>(col)) { - std::vector res(sz); - for (size_t row_id = 0; row_id < sz; row_id++) { - VecDateTimeValue datetime_value = binary_cast( - not_nullable_column->get_data()[row_id]); + // serialize + std::shared_ptr result; + RETURN_IF_ERROR( + convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), &result)); - if (!datetime_value.unix_timestamp(&res[row_id], - TimezoneUtils::default_time_zone)) { - return Status::InternalError("get unix timestamp error."); - }; - // -2177481943 represent '1900-12-31 23:54:17' - // but -2177481944 represent '1900-12-31 23:59:59' - // so for timestamp <= -2177481944, we subtract 343 (5min 43s) - if (res[row_id] < timestamp_threshold) { - res[row_id] -= timestamp_diff; - } - // convert seconds to MILLIS seconds - res[row_id] *= 1000; - } - col_writer->WriteBatch(sz, nullable ? def_level.data() : nullptr, nullptr, - reinterpret_cast(res.data())); - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_DATE: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int64Writer* col_writer = - static_cast(rgWriter->column(i)); - uint64_t default_int64 = 0; - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - for (size_t row_id = 0; row_id < sz; row_id++) { - def_level[row_id] = null_data[row_id] == 0; - } - VecDateTimeValue epoch_date; - if (!epoch_date.from_date_str(epoch_date_str.c_str(), - epoch_date_str.length())) { - return Status::InternalError("create epoch date from string error"); - } - int32_t days_from_epoch = epoch_date.daynr(); - int32_t tmp_data[sz]; - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - tmp_data[row_id] = default_int64; - } else { - int32_t days = binary_cast( - assert_cast&>(*col) - .get_data()[row_id]) - .daynr(); - tmp_data[row_id] = days - days_from_epoch; - } - } - col_writer->WriteBatch(sz, def_level.data(), nullptr, - reinterpret_cast(tmp_data)); - } else if (check_and_get_column>(col)) { - VecDateTimeValue epoch_date; - if (!epoch_date.from_date_str(epoch_date_str.c_str(), - epoch_date_str.length())) { - return Status::InternalError("create epoch date from string error"); - } - int32_t days_from_epoch = epoch_date.daynr(); - std::vector res(sz); - for (size_t row_id = 0; row_id < sz; row_id++) { - int32_t days = binary_cast( - assert_cast&>(*col) - .get_data()[row_id]) - .daynr(); - res[row_id] = days - days_from_epoch; - } - col_writer->WriteBatch(sz, nullable ? def_level.data() : nullptr, nullptr, - reinterpret_cast(res.data())); - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_DATEV2: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::ByteArray value; - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - char buffer[30]; - int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; - value.ptr = reinterpret_cast(buffer); - value.len = binary_cast>( - assert_cast&>(*col) - .get_data()[row_id]) - .to_buffer(buffer, output_scale); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else if (const auto* not_nullable_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - char buffer[30]; - int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; - value.ptr = reinterpret_cast(buffer); - value.len = binary_cast>( - not_nullable_column->get_data()[row_id]) - .to_buffer(buffer, output_scale); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_DATETIMEV2: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::ByteArray value; - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - char buffer[30]; - int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; - value.ptr = reinterpret_cast(buffer); - value.len = binary_cast>( - assert_cast&>(*col) - .get_data()[row_id]) - .to_buffer(buffer, output_scale); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else if (const auto* not_nullable_column = - check_and_get_column>(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - char buffer[30]; - int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; - value.ptr = reinterpret_cast(buffer); - value.len = binary_cast>( - not_nullable_column->get_data()[row_id]) - .to_buffer(buffer, output_scale); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_OBJECT: { - if (_output_object_data) { - DISPATCH_PARQUET_COMPLEX_WRITER(ColumnBitmap) - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_HLL: { - if (_output_object_data) { - DISPATCH_PARQUET_COMPLEX_WRITER(ColumnHLL) - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: { - DISPATCH_PARQUET_COMPLEX_WRITER(ColumnString) - break; - } - case TYPE_DECIMALV2: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::ByteArray value; - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - const DecimalV2Value decimal_val(reinterpret_cast( - col->get_data_at(row_id).data) - ->value); - char decimal_buffer[MAX_DECIMAL_WIDTH]; - int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; - value.ptr = reinterpret_cast(decimal_buffer); - value.len = decimal_val.to_buffer(decimal_buffer, output_scale); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else if (const auto* not_nullable_column = - check_and_get_column(col)) { - for (size_t row_id = 0; row_id < sz; row_id++) { - const DecimalV2Value decimal_val( - reinterpret_cast( - not_nullable_column->get_data_at(row_id).data) - ->value); - char decimal_buffer[MAX_DECIMAL_WIDTH]; - int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; - value.ptr = reinterpret_cast(decimal_buffer); - value.len = decimal_val.to_buffer(decimal_buffer, output_scale); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } else { - RETURN_WRONG_TYPE - } - break; - } - case TYPE_DECIMAL32: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::FixedLenByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::FixedLenByteArray value; - auto decimal_type = check_and_get_data_type>( - remove_nullable(type).get()); - DCHECK(decimal_type); - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - const auto& data_column = assert_cast(*col); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - auto data = data_column.get_element(row_id); - auto big_endian = bswap_32(data); - value.ptr = reinterpret_cast(&big_endian); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else { - const auto& data_column = assert_cast(*col); - for (size_t row_id = 0; row_id < sz; row_id++) { - auto data = data_column.get_element(row_id); - auto big_endian = bswap_32(data); - value.ptr = reinterpret_cast(&big_endian); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } - break; - } - case TYPE_DECIMAL64: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::FixedLenByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::FixedLenByteArray value; - auto decimal_type = check_and_get_data_type>( - remove_nullable(type).get()); - DCHECK(decimal_type); - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - const auto& data_column = assert_cast(*col); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - auto data = data_column.get_element(row_id); - auto big_endian = bswap_64(data); - value.ptr = reinterpret_cast(&big_endian); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else { - const auto& data_column = assert_cast(*col); - for (size_t row_id = 0; row_id < sz; row_id++) { - auto data = data_column.get_element(row_id); - auto big_endian = bswap_64(data); - value.ptr = reinterpret_cast(&big_endian); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } - break; - } - case TYPE_DECIMAL128I: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::FixedLenByteArrayWriter* col_writer = - static_cast(rgWriter->column(i)); - parquet::FixedLenByteArray value; - auto decimal_type = check_and_get_data_type>( - remove_nullable(type).get()); - DCHECK(decimal_type); - if (null_map != nullptr) { - auto& null_data = assert_cast(*null_map).get_data(); - const auto& data_column = assert_cast(*col); - for (size_t row_id = 0; row_id < sz; row_id++) { - if (null_data[row_id] != 0) { - single_def_level = 0; - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - single_def_level = 1; - } else { - auto data = data_column.get_element(row_id); - auto big_endian = gbswap_128(data); - value.ptr = reinterpret_cast(&big_endian); - col_writer->WriteBatch(1, &single_def_level, nullptr, &value); - } - } - } else { - const auto& data_column = assert_cast(*col); - for (size_t row_id = 0; row_id < sz; row_id++) { - auto data = data_column.get_element(row_id); - auto big_endian = gbswap_128(data); - value.ptr = reinterpret_cast(&big_endian); - col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, - &value); - } - } - break; - } - default: { - return Status::InvalidArgument( - "Invalid expression type: {}", - _output_vexpr_ctxs[i]->root()->type().debug_string()); - } - } - } - } catch (const std::exception& e) { - LOG(WARNING) << "Parquet write error: " << e.what(); - return Status::InternalError(e.what()); + auto get_table_res = arrow::Table::FromRecordBatches(result->schema(), {result}); + if (!get_table_res.ok()) { + return Status::InternalError("Error when get arrow table from record batchs"); } - _cur_written_rows += sz; + auto& table = get_table_res.ValueOrDie(); + RETURN_DORIS_STATUS_IF_ERROR(_writer->WriteTable(*table, block.rows())); return Status::OK(); } +arrow::Status VParquetTransformer::_open_file_writer() { + ARROW_ASSIGN_OR_RAISE(_writer, parquet::arrow::FileWriter::Open( + *_arrow_schema, arrow::default_memory_pool(), _outstream, + _parquet_writer_properties, _arrow_properties)); + return arrow::Status::OK(); +} + Status VParquetTransformer::open() { - RETURN_IF_ERROR(parse_properties()); - RETURN_IF_ERROR(parse_schema()); + RETURN_IF_ERROR(_parse_properties()); + RETURN_IF_ERROR(_parse_schema()); try { - _writer = parquet::ParquetFileWriter::Open(_outstream, _schema, _properties); + RETURN_DORIS_STATUS_IF_ERROR(_open_file_writer()); } catch (const parquet::ParquetStatusException& e) { LOG(WARNING) << "parquet file writer open error: " << e.what(); return Status::InternalError("parquet file writer open error: {}", e.what()); @@ -921,38 +284,18 @@ Status VParquetTransformer::open() { return Status::OK(); } -parquet::RowGroupWriter* VParquetTransformer::get_rg_writer() { - if (_rg_writer == nullptr) { - _rg_writer = _writer->AppendBufferedRowGroup(); - } - if (_cur_written_rows > _max_row_per_group) { - _rg_writer->Close(); - _rg_writer = _writer->AppendBufferedRowGroup(); - _cur_written_rows = 0; - } - return _rg_writer; -} - int64_t VParquetTransformer::written_len() { return _outstream->get_written_len(); } Status VParquetTransformer::close() { try { - if (_rg_writer != nullptr) { - _rg_writer->Close(); - _rg_writer = nullptr; - } if (_writer != nullptr) { - _writer->Close(); - } - arrow::Status st = _outstream->Close(); - if (!st.ok()) { - LOG(WARNING) << "close parquet file error: " << st.ToString(); - return Status::IOError(st.ToString()); + RETURN_DORIS_STATUS_IF_ERROR(_writer->Close()); } + RETURN_DORIS_STATUS_IF_ERROR(_outstream->Close()); + } catch (const std::exception& e) { - _rg_writer = nullptr; LOG(WARNING) << "Parquet writer close error: " << e.what(); return Status::IOError(e.what()); } diff --git a/be/src/vec/runtime/vparquet_transformer.h b/be/src/vec/runtime/vparquet_transformer.h index 8637f77dbd..ec97f675c3 100644 --- a/be/src/vec/runtime/vparquet_transformer.h +++ b/be/src/vec/runtime/vparquet_transformer.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,7 @@ public: const bool& parquet_disable_dictionary, const TParquetVersion::type& parquet_version, bool output_object_data); - ~VParquetTransformer() = default; + ~VParquetTransformer() override = default; Status open() override; @@ -106,19 +107,15 @@ public: int64_t written_len() override; private: - parquet::RowGroupWriter* get_rg_writer(); + Status _parse_properties(); + Status _parse_schema(); + arrow::Status _open_file_writer(); - Status parse_schema(); - - Status parse_properties(); - -private: std::shared_ptr _outstream; - std::shared_ptr _properties; - std::shared_ptr _schema; - std::unique_ptr _writer; - parquet::RowGroupWriter* _rg_writer; - const int64_t _max_row_per_group = 10; + std::shared_ptr _parquet_writer_properties; + std::shared_ptr _arrow_properties; + std::unique_ptr _writer; + std::shared_ptr _arrow_schema; const std::vector& _parquet_schemas; const TParquetCompressionType::type& _compression_type; diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp index b76ab28514..732a6f3c7d 100644 --- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp @@ -553,10 +553,8 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) { DataTypePtr d = std::make_shared(std::make_shared()); DataTypePtr m = std::make_shared(s, d); Array k1, k2, v1, v2, k3, v3; - k1.push_back(Null()); k1.push_back("doris"); k1.push_back("clever amory"); - v1.push_back(11); v1.push_back(Null()); v1.push_back(30); k2.push_back("hello amory"); @@ -568,9 +566,7 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) { v2.push_back(6); v2.push_back(7); k3.push_back("test"); - k3.push_back(Null()); v3.push_back(11); - v3.push_back(30); Map m1, m2, m3; m1.push_back(k1); m1.push_back(v1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index 34cb28093d..8696edcf9e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -39,7 +39,6 @@ import org.apache.doris.datasource.property.constants.S3Properties; import org.apache.doris.qe.ConnectContext; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TParquetCompressionType; -import org.apache.doris.thrift.TParquetDataLogicalType; import org.apache.doris.thrift.TParquetDataType; import org.apache.doris.thrift.TParquetRepetitionType; import org.apache.doris.thrift.TParquetSchema; @@ -70,7 +69,6 @@ public class OutFileClause { public static final List RESULT_COL_TYPES = Lists.newArrayList(); public static final Map PARQUET_REPETITION_TYPE_MAP = Maps.newHashMap(); public static final Map PARQUET_DATA_TYPE_MAP = Maps.newHashMap(); - public static final Map PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP = Maps.newHashMap(); public static final Map PARQUET_COMPRESSION_TYPE_MAP = Maps.newHashMap(); public static final Map PARQUET_VERSION_MAP = Maps.newHashMap(); public static final Set ORC_DATA_TYPE = Sets.newHashSet(); @@ -103,12 +101,6 @@ public class OutFileClause { PARQUET_DATA_TYPE_MAP.put("double", TParquetDataType.DOUBLE); PARQUET_DATA_TYPE_MAP.put("fixed_len_byte_array", TParquetDataType.FIXED_LEN_BYTE_ARRAY); - PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("decimal", TParquetDataLogicalType.DECIMAL); - PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("date", TParquetDataLogicalType.DATE); - PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("datetime", TParquetDataLogicalType.TIMESTAMP); - // TODO(ftw): add other logical type - PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("none", TParquetDataLogicalType.NONE); - PARQUET_COMPRESSION_TYPE_MAP.put("snappy", TParquetCompressionType.SNAPPY); PARQUET_COMPRESSION_TYPE_MAP.put("gzip", TParquetCompressionType.GZIP); PARQUET_COMPRESSION_TYPE_MAP.put("brotli", TParquetCompressionType.BROTLI); @@ -495,170 +487,17 @@ public class OutFileClause { private void analyzeForParquetFormat(List resultExprs, List colLabels) throws AnalysisException { if (this.parquetSchemas.isEmpty()) { - genParquetSchema(resultExprs, colLabels); + genParquetColumnName(resultExprs, colLabels); } - // check schema number if (resultExprs.size() != this.parquetSchemas.size()) { throw new AnalysisException("Parquet schema number does not equal to select item number"); } - - // check type - for (int i = 0; i < this.parquetSchemas.size(); ++i) { - TParquetDataType type = this.parquetSchemas.get(i).schema_data_type; - Type resultType = resultExprs.get(i).getType(); - switch (resultType.getPrimitiveType()) { - case BOOLEAN: - if (!PARQUET_DATA_TYPE_MAP.get("boolean").equals(type)) { - throw new AnalysisException("project field type is BOOLEAN, should use boolean," - + " but the type of column " + i + " is " + type); - } - break; - case TINYINT: - case SMALLINT: - case INT: - case DATE: - if (!PARQUET_DATA_TYPE_MAP.get("int32").equals(type)) { - throw new AnalysisException("project field type is TINYINT/SMALLINT/INT," - + "should use int32, " + "but the definition type of column " + i + " is " + type); - } - break; - case BIGINT: - case DATETIME: - if (!PARQUET_DATA_TYPE_MAP.get("int64").equals(type)) { - throw new AnalysisException("project field type is BIGINT/DATE/DATETIME," - + "should use int64, but the definition type of column " + i + " is " + type); - } - break; - case FLOAT: - if (!PARQUET_DATA_TYPE_MAP.get("float").equals(type)) { - throw new AnalysisException("project field type is FLOAT, should use float," - + " but the definition type of column " + i + " is " + type); - } - break; - case DOUBLE: - if (!PARQUET_DATA_TYPE_MAP.get("double").equals(type)) { - throw new AnalysisException("project field type is DOUBLE, should use double," - + " but the definition type of column " + i + " is " + type); - } - break; - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: { - if (!PARQUET_DATA_TYPE_MAP.get("fixed_len_byte_array").equals(type)) { - throw new AnalysisException("project field type is DECIMAL" - + ", should use fixed_len_byte_array, but the definition type of column " - + i + " is " + type); - } - break; - } - case DECIMALV2: - case CHAR: - case VARCHAR: - case STRING: - case DATETIMEV2: - case DATEV2: - case LARGEINT: - if (!PARQUET_DATA_TYPE_MAP.get("byte_array").equals(type)) { - throw new AnalysisException("project field type is CHAR/VARCHAR/STRING/DECIMAL/DATEV2" - + "/DATETIMEV2/LARGEINT, should use byte_array, but the definition type of column " - + i + " is " + type); - } - break; - case HLL: - case BITMAP: - if (ConnectContext.get() != null && ConnectContext.get() - .getSessionVariable().isReturnObjectDataAsBinary()) { - if (!PARQUET_DATA_TYPE_MAP.get("byte_array").equals(type)) { - throw new AnalysisException("project field type is HLL/BITMAP, should use byte_array, " - + "but the definition type of column " + i + " is " + type); - } - } else { - throw new AnalysisException("Parquet format does not support column type: " - + resultType.getPrimitiveType()); - } - break; - default: - throw new AnalysisException("Parquet format does not support column type: " - + resultType.getPrimitiveType()); - } - } } - private void genParquetSchema(List resultExprs, List colLabels) throws AnalysisException { - Preconditions.checkState(this.parquetSchemas.isEmpty()); + private void genParquetColumnName(List resultExprs, List colLabels) throws AnalysisException { for (int i = 0; i < resultExprs.size(); ++i) { - Expr expr = resultExprs.get(i); TParquetSchema parquetSchema = new TParquetSchema(); - if (resultExprs.get(i).isNullable()) { - parquetSchema.schema_repetition_type = PARQUET_REPETITION_TYPE_MAP.get("optional"); - } else { - parquetSchema.schema_repetition_type = PARQUET_REPETITION_TYPE_MAP.get("required"); - } - switch (expr.getType().getPrimitiveType()) { - case BOOLEAN: - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("boolean"); - break; - case TINYINT: - case SMALLINT: - case INT: - case DATE: - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("int32"); - break; - case BIGINT: - case DATETIME: - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("int64"); - break; - case FLOAT: - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("float"); - break; - case DOUBLE: - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("double"); - break; - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: { - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("fixed_len_byte_array"); - break; - } - case DECIMALV2: - case CHAR: - case VARCHAR: - case STRING: - case DATETIMEV2: - case DATEV2: - case LARGEINT: - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("byte_array"); - break; - case HLL: - case BITMAP: - if (ConnectContext.get() != null && ConnectContext.get() - .getSessionVariable().isReturnObjectDataAsBinary()) { - parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("byte_array"); - } - break; - default: - throw new AnalysisException("currently parquet do not support column type: " - + expr.getType().getPrimitiveType()); - } - - switch (expr.getType().getPrimitiveType()) { - case DECIMAL32: - case DECIMAL64: - case DECIMAL128: { - parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("decimal"); - break; - } - case DATE: - parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("date"); - break; - case DATETIME: - parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("datetime"); - break; - default: - parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("none"); - } - parquetSchema.schema_column_name = colLabels.get(i); parquetSchemas.add(parquetSchema); } @@ -864,6 +703,7 @@ public class OutFileClause { } // check schema. if schema is not set, Doris will gen schema by select items + // Note: These codes are useless and outdated. String schema = properties.get(SCHEMA); if (schema == null) { return; diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index 563176e5ab..29aa6d6644 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -651,10 +651,6 @@ public class SelectStmtTest { try { SelectStmt stmt = (SelectStmt) UtFrameUtils.parseAndAnalyzeStmt(sql, ctx); Assert.assertEquals(1, stmt.getOutFileClause().getParquetSchemas().size()); - Assert.assertEquals(stmt.getOutFileClause().PARQUET_REPETITION_TYPE_MAP.get("optional"), - stmt.getOutFileClause().getParquetSchemas().get(0).schema_repetition_type); - Assert.assertEquals(stmt.getOutFileClause().PARQUET_DATA_TYPE_MAP.get("byte_array"), - stmt.getOutFileClause().getParquetSchemas().get(0).schema_data_type); Assert.assertEquals("k1", stmt.getOutFileClause().getParquetSchemas().get(0).schema_column_name); } catch (Exception e) { Assert.fail(e.getMessage()); diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet.out new file mode 100644 index 0000000000..cb6eab3268 --- /dev/null +++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_default -- +1 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 1 1 true 1 1 1 1.1 1.1 char1 1 +2 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 2 2 true 2 2 2 2.2 2.2 char2 2 +3 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 3 3 true 3 3 3 3.3 3.3 char3 3 +4 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 4 4 true 4 4 4 4.4 4.4 char4 4 +5 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 5 5 true 5 5 5 5.5 5.5 char5 5 +6 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 6 6 true 6 6 6 6.6 6.6 char6 6 +7 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 7 7 true 7 7 7 7.7 7.7 char7 7 +8 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 8 8 true 8 8 8 8.8 8.8 char8 8 +9 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 9 9 true 9 9 9 9.9 9.9 char9 9 +10 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 \N \N \N \N \N \N \N \N \N \N \N + +-- !select_default -- +1 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 1 1 true 1 1 1 1.1 1.1 char1 1 +2 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 2 2 true 2 2 2 2.2 2.2 char2 2 +3 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 3 3 true 3 3 3 3.3 3.3 char3 3 +4 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 4 4 true 4 4 4 4.4 4.4 char4 4 +5 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 5 5 true 5 5 5 5.5 5.5 char5 5 +6 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 6 6 true 6 6 6 6.6 6.6 char6 6 +7 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 7 7 true 7 7 7 7.7 7.7 char7 7 +8 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 8 8 true 8 8 8 8.8 8.8 char8 8 +9 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 9 9 true 9 9 9 9.9 9.9 char9 9 +10 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 \N \N \N \N \N \N \N \N \N \N \N + diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_array_type.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_array_type.out new file mode 100644 index 0000000000..d05e8d17a4 --- /dev/null +++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_array_type.out @@ -0,0 +1,229 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_base1 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +4 doris4 \N +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] +8 doris8 \N + +-- !select_load1 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +4 doris4 \N +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] +8 doris8 \N + +-- !select_base2 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] + +-- !select_load2 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] + +-- !select_base_date -- +1 doris1 [2017-10-01, 2023-09-13, 2023-12-31] +2 doris2 [1967-10-01, 1000-09-13] +3 doris3 [] +5 doris5 [0001-10-01, NULL, 0000-01-01] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 2017-10-01, 2023-09-13, 2023-12-31] + +-- !select_load_date -- +1 doris1 ["2017-10-01", "2023-09-13", "2023-12-31"] +2 doris2 ["1967-10-01", "1000-09-13"] +3 doris3 [] +5 doris5 ["0001-10-01", NULL, "0000-01-01"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, "2017-10-01", "2023-09-13", "2023-12-31"] + +-- !select_base_datetime -- +1 doris1 [2017-10-01 00:00:00, 2011-10-01 01:23:59] +2 doris2 [2017-10-01 00:00:00, 2011-10-01 01:23:59] +3 doris3 [] +5 doris5 [2017-10-01 00:00:00, NULL, 2017-10-01 00:00:00] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 2017-10-01 00:00:00, 2011-10-01 01:23:59] + +-- !select_load_datetime -- +1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] +2 doris2 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] +3 doris3 [] +5 doris5 ["2017-10-01 00:00:00", NULL, "2017-10-01 00:00:00"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"] + +-- !select_base_varchar -- +1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] +2 doris2 ["2017-10-01 00:00:00.123", "2011-10-01 01:23:59"] +3 doris3 [] +5 doris5 ["2017-10-01 00:00:00.123456", NULL, "2017-10-01 00:00:00.123"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, "null", NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"] + +-- !select_load_varchar -- +1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"] +2 doris2 ["2017-10-01 00:00:00.123", "2011-10-01 01:23:59"] +3 doris3 [] +5 doris5 ["2017-10-01 00:00:00.123456", NULL, "2017-10-01 00:00:00.123"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, "null", NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"] + +-- !select_base_smallint -- +1 doris1 [-32768, 32767] +2 doris2 [-1, -1, -2, 0, 3, 99] +3 doris3 [] +5 doris5 [-32768, 32767, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -32768, 32767] + +-- !select_smallint -- +1 doris1 [-32768, 32767] +2 doris2 [-1, -1, -2, 0, 3, 99] +3 doris3 [] +5 doris5 [-32768, 32767, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -32768, 32767] + +-- !select_base_tinyint -- +1 doris1 [-128, 127] +2 doris2 [-1, -1, -2, 0, 3, 99] +3 doris3 [] +5 doris5 [-128, 127, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -128, 127] + +-- !select_load_tinyint -- +1 doris1 [-128, 127] +2 doris2 [-1, -1, -2, 0, 3, 99] +3 doris3 [] +5 doris5 [-128, 127, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -128, 127] + +-- !select_base_boolean -- +1 doris1 [1, 0, 1, 1, 0] +2 doris2 [1, 0, 0, 1, 1] +3 doris3 [] +5 doris5 [1, 0, 1] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, 0, 1] + +-- !select_load_boolean -- +1 doris1 [1, 0, 1, 1, 0] +2 doris2 [1, 0, 0, 1, 1] +3 doris3 [] +5 doris5 [1, 0, 1] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, 0, 1] + +-- !select_base_bigint -- +1 doris1 [-9223372036854775808, 9223372036854775807] +2 doris2 [-14141, -9223372036854775808, 9223372036854775807, 9891912, 3, 99] +3 doris3 [] +5 doris5 [-128, 127, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -9223372036854775808, 9223372036854775807] + +-- !select_load_bigint -- +1 doris1 [-9223372036854775808, 9223372036854775807] +2 doris2 [-14141, -9223372036854775808, 9223372036854775807, 9891912, 3, 99] +3 doris3 [] +5 doris5 [-128, 127, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -9223372036854775808, 9223372036854775807] + +-- !select_base_largeint -- +1 doris1 [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727] +2 doris2 [-1, 170141183460469231731687303715884105727, -2, 0, 3, 99] +3 doris3 [] +5 doris5 [-170141183460469231731687303715884105728, 127, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -170141183460469231731687303715884105728, 170141183460469231731687303715884105727] + +-- !select_load_largeint -- +1 doris1 ["-170141183460469231731687303715884105728", "170141183460469231731687303715884105727"] +2 doris2 ["-1", "170141183460469231731687303715884105727", "-2", "0", "3", "99"] +3 doris3 [] +5 doris5 ["-170141183460469231731687303715884105728", "127", "99", "-99"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, "-170141183460469231731687303715884105728", "170141183460469231731687303715884105727"] + +-- !select_base_float -- +1 doris1 [1.4013e-45, 3.4028235e+38] +2 doris2 [-1.1, 1.2231, 3.4028235e+38, 0, 3, 99.009888] +3 doris3 [] +5 doris5 [-12.8, 1.4013e-45, 3.4028235e+38, -9.9] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, 1.4013e-45, 3.4028235e+38] + +-- !select_load_float -- +1 doris1 [1.4013e-45, 3.4028235e+38] +2 doris2 [-1.1, 1.2231, 3.4028235e+38, 0, 3, 99.009888] +3 doris3 [] +5 doris5 [-12.8, 1.4013e-45, 3.4028235e+38, -9.9] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, 1.4013e-45, 3.4028235e+38] + +-- !select_base_double -- +1 doris1 [4.94065645841247e-324, 1.7976931348623157e+308] +2 doris2 [-1.1, 1.2231, 1.7976931348623157e+308, 0, 3, 99.00989] +3 doris3 [] +5 doris5 [-128, 4.94065645841247e-324, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, 4.94065645841247e-324, 1.7976931348623157e+308] + +-- !select_load_double -- +1 doris1 [4.94065645841247e-324, 1.7976931348623157e+308] +2 doris2 [-1.1, 1.2231, 1.7976931348623157e+308, 0, 3, 99.00989] +3 doris3 [] +5 doris5 [-128, 4.94065645841247e-324, 99, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, 4.94065645841247e-324, 1.7976931348623157e+308] + +-- !select_base_CHAR -- +1 doris1 ["1234567890", "doris12345"] +2 doris2 ["90", "doris1245"] +3 doris3 [] +5 doris5 ["doris-123", "doris-123", "doris-124", "doris12378"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, "doris-123", "doris-123"] + +-- !select_load_CHAR -- +1 doris1 ["1234567890", "doris12345"] +2 doris2 ["90", "doris1245"] +3 doris3 [] +5 doris5 ["doris-123", "doris-123", "doris-124", "doris12378"] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, "doris-123", "doris-123"] + +-- !select_base_decimal -- +1 doris1 [-128, 127] +2 doris2 [-1, -2, -21231, 0, 3, 99] +3 doris3 [] +5 doris5 [-13, 1, 9434364, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -13, 13] + +-- !select_load_decimal -- +1 doris1 [-128, 127] +2 doris2 [-1, -2, -21231, 0, 3, 99] +3 doris3 [] +5 doris5 [-13, 1, 9434364, -99] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, -13, 13] + diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out new file mode 100644 index 0000000000..aef8c7ebdf --- /dev/null +++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out @@ -0,0 +1,129 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_base -- +1 doris1 {1, "sn1", "sa1"} +2 doris2 {2, "sn2", "sa2"} +3 doris3 {3, "sn3", "sa3"} +4 doris4 \N +5 doris5 {5, NULL, "sa5"} +6 doris6 {NULL, NULL, NULL} +7 \N {NULL, NULL, NULL} +8 \N \N + +-- !select_load1 -- +1 doris1 {1, "sn1", "sa1"} +2 doris2 {2, "sn2", "sa2"} +3 doris3 {3, "sn3", "sa3"} +4 doris4 \N +5 doris5 {5, NULL, "sa5"} +6 doris6 {NULL, NULL, NULL} +7 \N {NULL, NULL, NULL} +8 \N \N + +-- !select_base2 -- +1 doris1 {1, "sn1", "sa1"} +2 doris2 {2, "sn2", "sa2"} +3 doris3 {3, "sn3", "sa3"} +5 doris5 {5, NULL, "sa5"} +6 doris6 {NULL, NULL, NULL} +7 \N {NULL, NULL, NULL} + +-- !select_load2 -- +1 doris1 {1, "sn1", "sa1"} +2 doris2 {2, "sn2", "sa2"} +3 doris3 {3, "sn3", "sa3"} +5 doris5 {5, NULL, "sa5"} +6 doris6 {NULL, NULL, NULL} +7 \N {NULL, NULL, NULL} + +-- !select_base3 -- +1 doris1 {"a":100, "b":111} +2 doris2 {"a":200, "b":222} +3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":100, "b":NULL} +6 \N \N +7 doris7 \N + +-- !select_load3 -- +1 doris1 {"a":"100", "b":"111"} +2 doris2 {"a":"200", "b":"222"} +3 doris3 {"a":NULL, "b":"333", "c":"399", "d":"399999999999999"} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":"100", "b":NULL} +6 \N \N +7 doris7 \N + +-- !select_base4 -- +1 doris1 {"a":100, "b":111} +2 doris2 {"a":200, "b":222} +3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":100, "b":NULL} + +-- !select_load4 -- +1 doris1 {"a":"100", "b":"111"} +2 doris2 {"a":"200", "b":"222"} +3 doris3 {"a":NULL, "b":"333", "c":"399", "d":"399999999999999"} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":"100", "b":NULL} + +-- !select_base5 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +4 doris4 \N +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] +8 doris8 \N + +-- !select_load5 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +4 doris4 \N +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] +8 doris8 \N + +-- !select_base6 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] + +-- !select_load6 -- +1 doris1 [9, 99, 999] +2 doris2 [8, 88] +3 doris3 [] +5 doris5 [1, NULL, 2] +6 doris6 [NULL, NULL, NULL] +7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111] + +-- !select_base7 -- +1 doris_1 {1, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 1, 1, 1, 1, 1, 1, 1.1, 1.1, "char1_1234", 1} +2 doris_2 {2, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 2, 2, 1, 2, 2, 2, 2.2, 2.2, "char2_1234", 2} +3 doris_3 {3, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 3, 3, 1, 3, 3, 3, 3.3, 3.3, "char3_1234", 3} +4 doris_4 {4, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 4, 4, 1, 4, 4, 4, 4.4, 4.4, "char4_1234", 4} +5 doris_5 {5, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 5, 5, 1, 5, 5, 5, 5.5, 5.5, "char5_1234", 5} +6 doris_6 {6, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 6, 6, 1, 6, 6, 6, 6.6, 6.6, "char6_1234", 6} +7 doris_7 {7, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 7, 7, 1, 7, 7, 7, 7.7, 7.7, "char7_1234", 7} +8 doris_8 {8, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 8, 8, 1, 8, 8, 8, 8.8, 8.8, "char8_1234", 8} +9 doris_9 {9, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 9, 9, 1, 9, 9, 9, 9.9, 9.9, "char9_1234", 9} +10 doris_10 {10, 2017-10-01, 2017-10-01 00:00:00, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} + +-- !select_load7 -- +1 doris_1 {1, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 1, 1, 1, 1, 1, "1", 1.1, 1.1, "char1_1234", 1} +2 doris_2 {2, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 2, 2, 1, 2, 2, "2", 2.2, 2.2, "char2_1234", 2} +3 doris_3 {3, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 3, 3, 1, 3, 3, "3", 3.3, 3.3, "char3_1234", 3} +4 doris_4 {4, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 4, 4, 1, 4, 4, "4", 4.4, 4.4, "char4_1234", 4} +5 doris_5 {5, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 5, 5, 1, 5, 5, "5", 5.5, 5.5, "char5_1234", 5} +6 doris_6 {6, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 6, 6, 1, 6, 6, "6", 6.6, 6.6, "char6_1234", 6} +7 doris_7 {7, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 7, 7, 1, 7, 7, "7", 7.7, 7.7, "char7_1234", 7} +8 doris_8 {8, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 8, 8, 1, 8, 8, "8", 8.8, 8.8, "char8_1234", 8} +9 doris_9 {9, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 9, 9, 1, 9, 9, "9", 9.9, 9.9, "char9_1234", 9} +10 doris_10 {10, "2017-10-01", "2017-10-01 00:00:00", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL} + diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out new file mode 100644 index 0000000000..9938e73ba8 --- /dev/null +++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_map_type.out @@ -0,0 +1,397 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_base1 -- +1 doris1 {"a":100, "b":111} +2 doris2 {"a":200, "b":222} +3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":100, "b":NULL} +6 \N \N +7 doris7 \N + +-- !select_load1 -- +1 doris1 {"a":"100", "b":"111"} +2 doris2 {"a":"200", "b":"222"} +3 doris3 {"a":NULL, "b":"333", "c":"399", "d":"399999999999999"} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":"100", "b":NULL} +6 \N \N +7 doris7 \N + +-- !select_base2 -- +1 doris1 {100:"null", 111:"b"} +2 doris2 {200:"a", 222:"b"} +3 doris3 {111:"a", 333:"b", 399:"c", 399999999999999:"d"} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:"100", 111:"b"} +6 \N \N +7 doris7 \N +8 doris8 {-170141183460469231731687303715884105728:"min_largeint", 170141183460469231731687303715884105727:"max_largeint"} +9 doris9 {-170141183460469231731687303715884105728:"min_largeint", 111:"b"} +10 doris10 {200:"a", 170141183460469231731687303715884105727:"max_largeint", 111:"b"} + +-- !select_load2 -- +1 doris1 {"100":"null", "111":"b"} +2 doris2 {"200":"a", "222":"b"} +3 doris3 {"111":"a", "333":"b", "399":"c", "399999999999999":"d"} +4 doris4 {"111":NULL, "111":NULL} +5 doris5 {"111":"100", "111":"b"} +6 \N \N +7 doris7 \N +8 doris8 {"-170141183460469231731687303715884105728":"min_largeint", "170141183460469231731687303715884105727":"max_largeint"} +9 doris9 {"-170141183460469231731687303715884105728":"min_largeint", "111":"b"} +10 doris10 {"200":"a", "170141183460469231731687303715884105727":"max_largeint", "111":"b"} + +-- !select_base3 -- +1 doris1 {100:0.12300, 111:1.23450} +2 doris2 {200:8738931.12312, 222:999.99900} +3 doris3 {111:1111034.12300, 333:7771.12310, 399:0.44124, 39999:0.44124} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:1111034.12300, 111:8738931.12312} +6 \N \N +7 doris7 \N +8 doris8 {-2147483648:1.23450, 2147483647:999.99900} +9 doris9 {-2147483648:1111034.12300} +10 doris10 {2147483647:123456789.12345} + +-- !select_load3 -- +1 doris1 {100:0.12300, 111:1.23450} +2 doris2 {200:8738931.12312, 222:999.99900} +3 doris3 {111:1111034.12300, 333:7771.12310, 399:0.44124, 39999:0.44124} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:1111034.12300, 111:8738931.12312} +6 \N \N +7 doris7 \N +8 doris8 {-2147483648:1.23450, 2147483647:999.99900} +9 doris9 {-2147483648:1111034.12300} +10 doris10 {2147483647:123456789.12345} + +-- !select_base4 -- +1 doris1 {100:4.94065645841247e-324, 111:1.7976931348623157e+308} +2 doris2 {200:123.123, 222:0.9999999} +3 doris3 {111:187.123, 333:555.6767, 399:129312.113, 3999:123.12314} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:187.123, 111:187.123} +6 \N \N +7 doris7 \N +8 doris8 {-2147483648:4.94065645841247e-324, 2147483647:1.7976931348623157e+308} +9 doris9 {2147483647:4.94065645841247e-324, -2147483648:1.7976931348623157e+308} + +-- !select_load4 -- +1 doris1 {100:4.94065645841247e-324, 111:1.7976931348623157e+308} +2 doris2 {200:123.123, 222:0.9999999} +3 doris3 {111:187.123, 333:555.6767, 399:129312.113, 3999:123.12314} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:187.123, 111:187.123} +6 \N \N +7 doris7 \N +8 doris8 {-2147483648:4.94065645841247e-324, 2147483647:1.7976931348623157e+308} +9 doris9 {2147483647:4.94065645841247e-324, -2147483648:1.7976931348623157e+308} + +-- !select_base5 -- +1 doris1 {"k1":0.12300, "111":1.23450} +2 doris2 {"200":8738931.12312, "doris":999.99900} +3 doris3 {"null":1111034.12300, "333":7771.12310, "399":0.44124, "3999999999":0.44124} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":1111034.12300, "null":8738931.12312} +6 \N \N +7 doris7 \N +8 doris8 {"-2147483648":1.23450, "2147483647":999.99900} +9 doris9 {"-2147483648":1111034.12300} +10 doris10 {"2147483647":123456789.12345} + +-- !select_load5 -- +1 doris1 {"k1":0.12300, "111":1.23450} +2 doris2 {"200":8738931.12312, "doris":999.99900} +3 doris3 {"null":1111034.12300, "333":7771.12310, "399":0.44124, "3999999999":0.44124} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":1111034.12300, "null":8738931.12312} +6 \N \N +7 doris7 \N +8 doris8 {"-2147483648":1.23450, "2147483647":999.99900} +9 doris9 {"-2147483648":1111034.12300} +10 doris10 {"2147483647":123456789.12345} + +-- !select_base6 -- +1 doris1 {"100":4.94065645841247e-324, "doris":1.7976931348623157e+308} +2 doris2 {"nereids":123.123, "222":0.9999999} +3 doris3 {"null":187.123, "333":555.6767, "399":129312.113, "39999999999":123.12314} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":187.123, "null":187.123} +6 \N \N +7 doris7 \N +8 doris8 {"-2147483648":4.94065645841247e-324, "2147483647":1.7976931348623157e+308} +9 doris9 {"2147483647":4.94065645841247e-324, "-2147483648":1.7976931348623157e+308} + +-- !select_load6 -- +1 doris1 {"100":4.94065645841247e-324, "doris":1.7976931348623157e+308} +2 doris2 {"nereids":123.123, "222":0.9999999} +3 doris3 {"null":187.123, "333":555.6767, "399":129312.113, "39999999999":123.12314} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":187.123, "null":187.123} +6 \N \N +7 doris7 \N +8 doris8 {"-2147483648":4.94065645841247e-324, "2147483647":1.7976931348623157e+308} +9 doris9 {"2147483647":4.94065645841247e-324, "-2147483648":1.7976931348623157e+308} + +-- !select_base7 -- +1 doris1 {"a":100, "b":111} +2 doris2 {"a":200, "b":222} +3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":100, "b":NULL} +6 \N \N +7 doris7 \N +8 doris8 {"max_bigint":9223372036854775807, "min_bigint":-9223372036854775808} + +-- !select_load7 -- +1 doris1 {"a":100, "b":111} +2 doris2 {"a":200, "b":222} +3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":100, "b":NULL} +6 \N \N +7 doris7 \N +8 doris8 {"max_bigint":9223372036854775807, "min_bigint":-9223372036854775808} + +-- !select_base8 -- +1 doris1 {"a":1, "b":0} +2 doris2 {"a":0, "b":0} +3 doris3 {"a":1, "b":NULL, "c":1, "d":0} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":0, "b":1} +6 \N \N +7 doris7 \N +8 doris8 {"true":1, "false":0} + +-- !select_load8 -- +1 doris1 {"a":1, "b":0} +2 doris2 {"a":0, "b":0} +3 doris3 {"a":1, "b":NULL, "c":1, "d":0} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":0, "b":1} +6 \N \N +7 doris7 \N +8 doris8 {"true":1, "false":0} + +-- !select_base9 -- +1 doris1 {100:1, 111:1} +2 doris2 {200:0, 222:0} +3 doris3 {111:1, 333:0, 399:0, 3999:1} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:1, 111:1} +6 \N \N +7 doris7 \N +8 doris8 {-2147483648:0, 2147483647:0} +9 doris9 {2147483647:1, -2147483648:1} + +-- !select_load9 -- +1 doris1 {100:1, 111:1} +2 doris2 {200:0, 222:0} +3 doris3 {111:1, 333:0, 399:0, 3999:1} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:1, 111:1} +6 \N \N +7 doris7 \N +8 doris8 {-2147483648:0, 2147483647:0} +9 doris9 {2147483647:1, -2147483648:1} + +-- !select_base10 -- +1 doris1 {2023-04-20 01:02:03:"null", 2018-04-20 10:40:35:"b"} +2 doris2 {2000-04-20 00:00:00:"a", 1967-12-31 12:24:56:"b"} +3 doris3 {2023-01-01 00:00:00:"b", 2023-02-27 00:01:02:"d"} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {2025-12-31 12:01:41:"min_largeint", 2006-02-19 09:01:02:"max_largeint"} +9 doris9 {0209-04-20 00:00:00:"min_largeint", 0102-03-21 00:00:00:"b"} +10 doris10 {2003-04-29 01:02:03:"a", 2006-02-22 02:01:04:"max_largeint", 2020-03-21 19:21:23:"b"} + +-- !select_load10 -- +1 doris1 {"2023-04-20 01:02:03":"null", "2018-04-20 10:40:35":"b"} +2 doris2 {"2000-04-20 00:00:00":"a", "1967-12-31 12:24:56":"b"} +3 doris3 {"2023-01-01 00:00:00":"b", "2023-02-27 00:01:02":"d"} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {"2025-12-31 12:01:41":"min_largeint", "2006-02-19 09:01:02":"max_largeint"} +9 doris9 {"0209-04-20 00:00:00":"min_largeint", "0102-03-21 00:00:00":"b"} +10 doris10 {"2003-04-29 01:02:03":"a", "2006-02-22 02:01:04":"max_largeint", "2020-03-21 19:21:23":"b"} + +-- !select_base11 -- +1 doris1 {2023-04-20 01:02:03:NULL, 2018-04-20 10:40:35:123} +2 doris2 {2000-04-20 00:00:00:-2147483648, 1967-12-31 12:24:56:2147483647} +3 doris3 {2023-01-01 00:00:00:1246, 2023-02-27 00:01:02:5646} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {2025-12-31 12:01:41:524524, 2006-02-19 09:01:02:2534} + +-- !select_load11 -- +1 doris1 {"2023-04-20 01:02:03":NULL, "2018-04-20 10:40:35":123} +2 doris2 {"2000-04-20 00:00:00":-2147483648, "1967-12-31 12:24:56":2147483647} +3 doris3 {"2023-01-01 00:00:00":1246, "2023-02-27 00:01:02":5646} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {"2025-12-31 12:01:41":524524, "2006-02-19 09:01:02":2534} + +-- !select_base12 -- +1 doris1 {2023-04-20:NULL, 2018-04-20:123} +2 doris2 {2000-04-20:-2147483648, 1967-12-31:2147483647} +3 doris3 {2023-01-01:1246, 2023-02-27:5646} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {2025-12-31:524524, 2006-02-19:2534} + +-- !select_load12 -- +1 doris1 {"2023-04-20":NULL, "2018-04-20":123} +2 doris2 {"2000-04-20":-2147483648, "1967-12-31":2147483647} +3 doris3 {"2023-01-01":1246, "2023-02-27":5646} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {"2025-12-31":524524, "2006-02-19":2534} + +-- !select_base13 -- +1 doris1 {2023-04-20:"null", 2018-04-20:NULL} +2 doris2 {2000-04-20:"-2147483648", 1967-12-31:"2147483647"} +3 doris3 {2023-01-01:"1246", 2023-02-27:"5646"} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {2025-12-31:"min_largeint", 2006-02-19:"max_largeint"} + +-- !select_load13 -- +1 doris1 {"2023-04-20":"null", "2018-04-20":NULL} +2 doris2 {"2000-04-20":"-2147483648", "1967-12-31":"2147483647"} +3 doris3 {"2023-01-01":"1246", "2023-02-27":"5646"} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {"2025-12-31":"min_largeint", "2006-02-19":"max_largeint"} + +-- !select_base14 -- +1 doris1 {2023-04-20 12:20:03:"null", 2018-04-20 12:59:59:NULL} +2 doris2 {2000-04-20 23:59:59:"-2147483648", 1967-12-31 00:00:00:"2147483647"} +3 doris3 {2023-01-01 07:24:54:"1246", 2023-02-27 15:12:13:"5646"} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {2025-12-31 11:22:33:"min_largeint", 2006-02-19 00:44:55:"max_largeint"} + +-- !select_load14 -- +1 doris1 {"2023-04-20 12:20:03":"null", "2018-04-20 12:59:59":NULL} +2 doris2 {"2000-04-20 23:59:59":"-2147483648", "1967-12-31 00:00:00":"2147483647"} +3 doris3 {"2023-01-01 07:24:54":"1246", "2023-02-27 15:12:13":"5646"} +4 doris4 {} +5 doris5 {} +6 \N \N +7 doris7 \N +8 doris8 {"2025-12-31 11:22:33":"min_largeint", "2006-02-19 00:44:55":"max_largeint"} + +-- !select_base15 -- +1 doris1 {100:"null", 111:"b"} +2 doris2 {200:"a", 222:"b"} +3 doris3 {111:"a", 333:"b", 399:"c", 399999999999999:"d"} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:"100", 111:"b"} +6 \N \N +7 doris7 \N +8 doris8 {-9223372036854775808:"min_bigint", 9223372036854775807:"max_bigint"} +9 doris9 {9223372036854775807:"min_bigint", 111:"b"} +10 doris10 {200:"a", 9223372036854775807:"max_bigint", 111:"b"} + +-- !select_load15 -- +1 doris1 {100:"null", 111:"b"} +2 doris2 {200:"a", 222:"b"} +3 doris3 {111:"a", 333:"b", 399:"c", 399999999999999:"d"} +4 doris4 {111:NULL, 111:NULL} +5 doris5 {111:"100", 111:"b"} +6 \N \N +7 doris7 \N +8 doris8 {-9223372036854775808:"min_bigint", 9223372036854775807:"max_bigint"} +9 doris9 {9223372036854775807:"min_bigint", 111:"b"} +10 doris10 {200:"a", 9223372036854775807:"max_bigint", 111:"b"} + +-- !select_base16 -- +1 doris1 {1:"null", 0:"b"} +2 doris2 {1:"a", 1:"b"} +3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"} +4 doris4 {1:NULL, 1:NULL} +5 doris5 {1:"100", 1:"b"} +6 \N \N +7 doris7 \N +8 doris8 {0:"min_bigint", 0:"max_bigint"} +9 doris9 {1:"min_bigint", 0:"b"} +10 doris10 {0:"a", 1:"max_bigint", 1:"b"} + +-- !select_load16 -- +1 doris1 {1:"null", 0:"b"} +2 doris2 {1:"a", 1:"b"} +3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"} +4 doris4 {1:NULL, 1:NULL} +5 doris5 {1:"100", 1:"b"} +6 \N \N +7 doris7 \N +8 doris8 {0:"min_bigint", 0:"max_bigint"} +9 doris9 {1:"min_bigint", 0:"b"} +10 doris10 {0:"a", 1:"max_bigint", 1:"b"} + +-- !select_base17 -- +1 doris1 {1:"xxx", 0:"b"} +2 doris2 {1:"a", 1:"b"} +3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"} +4 doris4 {1:NULL, 1:NULL} +5 doris5 {1:"100", 1:"b"} +6 \N \N +7 doris7 \N +8 doris8 {0:"min_bigint", 0:"max_bigint"} +9 doris9 {1:"min_bigint", 0:"b"} +10 doris10 {0:"a", 1:"max_bigint", 1:"b"} + +-- !select_load17 -- +1 doris1 {1:"xxx", 0:"b"} +2 doris2 {1:"a", 1:"b"} +3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"} +4 doris4 {1:NULL, 1:NULL} +5 doris5 {1:"100", 1:"b"} +6 \N \N +7 doris7 \N +8 doris8 {0:"min_bigint", 0:"max_bigint"} +9 doris9 {1:"min_bigint", 0:"b"} +10 doris10 {0:"a", 1:"max_bigint", 1:"b"} + +-- !select_base18 -- +1 doris1 {"doris":"null", "nereids":"b"} +2 doris2 {"ftw":"a", "cyx":"b"} +3 doris3 {"null":"a", "333":"b", "399":"c", "399999999999999":"d"} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":"100", "null":"b"} +6 \N \N +7 doris7 \N +8 doris8 {"170141183460469231731687303715884105728":"min_largeint", "170141183460469231731687303715884105727":"max_largeint"} +9 doris9 {"170141183460469231731687303715884105728":"min_largeint", "111":"b"} +10 doris10 {"200":"a", "170141183460469231731687303715884105727":"max_largeint", "111":"b"} + +-- !select_load18 -- +1 doris1 {"doris":"null", "nereids":"b"} +2 doris2 {"ftw":"a", "cyx":"b"} +3 doris3 {"null":"a", "333":"b", "399":"c", "399999999999999":"d"} +4 doris4 {"null":NULL, "null":NULL} +5 doris5 {"null":"100", "null":"b"} +6 \N \N +7 doris7 \N +8 doris8 {"170141183460469231731687303715884105728":"min_largeint", "170141183460469231731687303715884105727":"max_largeint"} +9 doris9 {"170141183460469231731687303715884105728":"min_largeint", "111":"b"} +10 doris10 {"200":"a", "170141183460469231731687303715884105727":"max_largeint", "111":"b"} + diff --git a/regression-test/suites/export_p0/test_outfile_parquet.groovy b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet.groovy similarity index 100% rename from regression-test/suites/export_p0/test_outfile_parquet.groovy rename to regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet.groovy diff --git a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_array_type.groovy b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_array_type.groovy new file mode 100644 index 0000000000..680c55e040 --- /dev/null +++ b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_array_type.groovy @@ -0,0 +1,532 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_outfile_parquet_array_type", "p0") { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + + def export_table_name = "outfile_parquet_array_export_test" + def load_table_name = "outfile_parquet_array_type_load_test" + def outFilePath = "${bucket}/outfile/parquet/complex_type/exp_" + + + def create_table = {table_name, struct_field -> + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `name` STRING COMMENT "用户年龄", + ${struct_field} + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + } + + def outfile_to_S3 = { + // select ... into outfile ... + def res = sql """ + SELECT * FROM ${export_table_name} t ORDER BY user_id + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS parquet + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + + return res[0][3] + } + + + // 1. test NULL ARRAY + try { + def struct_field_define = "`a_info` ARRAY NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (4, 'doris4', null); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """ + sql """ insert into ${export_table_name} values (8, 'doris8', null); """ + + + // test base data + qt_select_base1 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load1 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 2. test NOT NULL ARRAY + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """ + + // test base data + qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load2 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 3. test NULL ARRAY of date + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', ['2017-10-01', '2023-09-13', '2023-12-31']), (2, 'doris2', ['1967-10-01', '1000-09-13']); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', ['0001-10-01', null, '0000-01-01']); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, '2017-10-01', '2023-09-13', '2023-12-31']); """ + + // test base data + qt_select_base_date """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_date """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 4. test NULL ARRAY of datetime + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', ['2017-10-01 00:00:00', '2011-10-01 01:23:59']), (2, 'doris2', ['2017-10-01 00:00:00', '2011-10-01 01:23:59']); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', ['2017-10-01 00:00:00', null, '2017-10-01 00:00:00']); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, '2017-10-01 00:00:00', '2011-10-01 01:23:59']); """ + + // test base data + qt_select_base_datetime """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_datetime """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + + + // 5. test NULL ARRAY of VARCHAR(40) + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', ['2017-10-01 00:00:00', '2011-10-01 01:23:59']), (2, 'doris2', ['2017-10-01 00:00:00.123', '2011-10-01 01:23:59']); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', ['2017-10-01 00:00:00.123456', null, '2017-10-01 00:00:00.123']); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, 'null', null, '2017-10-01 00:00:00', '2011-10-01 01:23:59']); """ + + // test base data + qt_select_base_varchar """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_varchar """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + + // 7. test NULL ARRAY of SMALLINT + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [-32768, 32767]), (2, 'doris2', [-1, -1, -2, 0 ,3, 99]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-32768, 32767, 99, -99]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, -32768, 32767]); """ + + // test base data + qt_select_base_smallint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_smallint """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 8. test NULL ARRAY of TINYINT + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [-128, 127]), (2, 'doris2', [-1, -1, -2, 0 ,3, 99]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-128, 127, 99, -99]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, -128, 127]); """ + + // test base data + qt_select_base_tinyint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_tinyint """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 9. test NULL ARRAY of boolean + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [true, false, true, true, false]), (2, 'doris2', [1, 0, false, true, 99]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [true, false, true]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, false, true]); """ + + // test base data + qt_select_base_boolean """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_boolean """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 10. test NULL ARRAY of bigint + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [-9223372036854775808, 9223372036854775807]), (2, 'doris2', [-14141, -9223372036854775808, 9223372036854775807, 9891912 ,3, 99]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-128, 127, 99, -99]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, -9223372036854775808, 9223372036854775807]); """ + + // test base data + qt_select_base_bigint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_bigint """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 11. test NULL ARRAY of largeint + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727]), (2, 'doris2', [-1, 170141183460469231731687303715884105727, -2, 0 ,3, 99]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-170141183460469231731687303715884105728, 127, 99, -99]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, -170141183460469231731687303715884105728, 170141183460469231731687303715884105727]); """ + + // test base data + qt_select_base_largeint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_largeint """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 12. test NULL ARRAY of float + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [${Float.MIN_VALUE}, ${Float.MAX_VALUE}]), (2, 'doris2', [-1.1, 1.2231, ${Float.MAX_VALUE}, 0 ,3, 99.00989]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-12.8, ${Float.MIN_VALUE}, ${Float.MAX_VALUE}, -9.9]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, ${Float.MIN_VALUE}, ${Float.MAX_VALUE}]); """ + + // test base data + qt_select_base_float """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_float """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 13. test NULL ARRAY of double + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [${Double.MIN_VALUE}, ${Double.MAX_VALUE}]), (2, 'doris2', [-1.1, 1.2231, ${Double.MAX_VALUE}, 0 ,3, 99.00989]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-128, ${Double.MIN_VALUE}, 99, -99]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, ${Double.MIN_VALUE}, ${Double.MAX_VALUE}]); """ + + // test base data + qt_select_base_double """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_double """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 14. test NULL ARRAY of CHAR(10) + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', ['1234567890', 'doris12345']), (2, 'doris2', ['90', 'doris1245']); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', ['doris-123', 'doris-123', 'doris-124', 'doris12378']); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, 'doris-123', 'doris-123']); """ + + // test base data + qt_select_base_CHAR """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_CHAR """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 15. test NULL ARRAY of decimal + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [-128.1234567, 127.123456789]), (2, 'doris2', [-1.2, -1.933445, -21231.12, 0.0909 ,3, 99]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [-12.8, 1.27, 9434364.12319, -99.12314]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, -12.8, 12.7]); """ + + // test base data + qt_select_base_decimal """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load_decimal """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } +} diff --git a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy new file mode 100644 index 0000000000..6120d9f481 --- /dev/null +++ b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_outfile_parquet_complex_type", "p0") { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + + def export_table_name = "outfile_parquet_complex_type_export_test" + def load_table_name = "outfile_parquet_complex_type_load_test" + def outFilePath = "${bucket}/outfile/parquet/complex_type/exp_" + + + def create_table = {table_name, struct_field -> + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `name` STRING COMMENT "用户年龄", + ${struct_field} + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + } + + def outfile_to_S3 = { + // select ... into outfile ... + def res = sql """ + SELECT * FROM ${export_table_name} t ORDER BY user_id + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS parquet + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + + return res[0][3] + } + + + // 1. struct NULL type + try { + + def struct_field_define = "`s_info` STRUCT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {1, 'sn1', 'sa1'}); """ + sql """ insert into ${export_table_name} values (2, 'doris2', struct(2, 'sn2', 'sa2')); """ + sql """ insert into ${export_table_name} values (3, 'doris3', named_struct('s_id', 3, 's_name', 'sn3', 's_address', 'sa3')); """ + sql """ insert into ${export_table_name} values (4, 'doris4', null); """ + sql """ insert into ${export_table_name} values (5, 'doris5', struct(5, null, 'sa5')); """ + sql """ insert into ${export_table_name} values (6, 'doris6', struct(null, null, null)); """ + sql """ insert into ${export_table_name} values (7, null, struct(null, null, null)); """ + sql """ insert into ${export_table_name} values (8, null, null); """ + + // test base data + qt_select_base """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + // test outfile to s3 + def outfile_url = outfile_to_S3() + + qt_select_load1 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + + } finally { + } + + + // 2. struct NOT NULL type + try { + def struct_field_define = "`s_info` STRUCT NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {1, 'sn1', 'sa1'}); """ + sql """ insert into ${export_table_name} values (2, 'doris2', struct(2, 'sn2', 'sa2')); """ + sql """ insert into ${export_table_name} values (3, 'doris3', named_struct('s_id', 3, 's_name', 'sn3', 's_address', 'sa3')); """ + sql """ insert into ${export_table_name} values (5, 'doris5', struct(5, null, 'sa5')); """ + sql """ insert into ${export_table_name} values (6, 'doris6', struct(null, null, null)); """ + sql """ insert into ${export_table_name} values (7, null, struct(null, null, null)); """ + + // test base data + qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load2 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 3. test NULL Map + try { + def struct_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + + // test base data + qt_select_base3 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load3 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 4. test NOT NULL Map + try { + def struct_field_define = "`m_info` Map NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """ + + // test base data + qt_select_base4 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load4 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 5. test NULL ARRAY + try { + def struct_field_define = "`a_info` ARRAY NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (4, 'doris4', null); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """ + sql """ insert into ${export_table_name} values (8, 'doris8', null); """ + + + // test base data + qt_select_base5 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load5 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 6. test NOT NULL ARRAY + try { + def struct_field_define = "`a_info` ARRAY NOT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """ + sql """ insert into ${export_table_name} values (3, 'doris3', []); """ + sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """ + sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """ + sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """ + + // test base data + qt_select_base6 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load6 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 7. test struct with all type + try { + + def struct_field_define = "`s_info` STRUCT NULL" + // create table to export data + create_table(export_table_name, struct_field_define) + // create table to load data + create_table(load_table_name, struct_field_define) + + // insert data + StringBuilder sb = new StringBuilder() + int i = 1 + for (; i < 10; i ++) { + sb.append(""" + (${i}, 'doris_${i}', {${i}, '2017-10-01', '2017-10-01 00:00:00', 'Beijing', ${i}, ${i % 128}, true, ${i}, ${i}, ${i}, ${i}.${i}, ${i}.${i}, 'char${i}_1234', ${i}}), + """) + } + sb.append(""" + (${i}, 'doris_${i}', {${i}, '2017-10-01', '2017-10-01 00:00:00', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}) + """) + + sql """ INSERT INTO ${export_table_name} VALUES ${sb.toString()} """ + + // test base data + qt_select_base7 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + // test outfile to s3 + def outfile_url = outfile_to_S3() + + qt_select_load7 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + + } finally { + } + +} diff --git a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_map_type.groovy b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_map_type.groovy new file mode 100644 index 0000000000..c1d4fe894c --- /dev/null +++ b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_map_type.groovy @@ -0,0 +1,714 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.nio.file.Paths + +suite("test_outfile_parquet_map_type", "p0") { + // open nereids + sql """ set enable_nereids_planner=true """ + sql """ set enable_fallback_to_original_planner=false """ + + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + def export_table_name = "outfile_parquet_map_type_export_test" + def load_table_name = "outfile_parquet_map_type_load_test" + def outFilePath = "${bucket}/outfile/parquet/map_type/exp_" + + + def create_table = {table_name, map_field -> + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `name` STRING COMMENT "用户年龄", + ${map_field} + ) + DISTRIBUTED BY HASH(user_id) + PROPERTIES("replication_num" = "1"); + """ + } + + def outfile_to_S3 = { + // select ... into outfile ... + def res = sql """ + SELECT * FROM ${export_table_name} t ORDER BY user_id + INTO OUTFILE "s3://${outFilePath}" + FORMAT AS parquet + PROPERTIES ( + "s3.endpoint" = "${s3_endpoint}", + "s3.region" = "${region}", + "s3.secret_key"="${sk}", + "s3.access_key" = "${ak}" + ); + """ + + return res[0][3] + } + + + // 1. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + + + // test base data + qt_select_base1 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load1 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 2. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {100: 'null', 111:'b'}), (2, 'doris2', {200:'a', 222:'b'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {111: 'a', 333:'b', 399:'c', 399999999999999:'d'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {111: '100', 111:'b'}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {-170141183460469231731687303715884105728: 'min_largeint', 170141183460469231731687303715884105727: 'max_largeint'}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {-170141183460469231731687303715884105728: 'min_largeint', 111:'b'}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {200:'a', 170141183460469231731687303715884105727: 'max_largeint', 111:'b'}); """ + + // test base data + qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load2 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 3. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {100: 0.123, 111:1.2345}), (2, 'doris2', {200:8738931.12312, 222:999.999}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {111: 1111034.123, 333:7771.1231, 399:0.441241, 39999:0.441241}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {111: 1111034.123, 111:8738931.12312}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {${Integer.MIN_VALUE}: 1.2345, ${Integer.MAX_VALUE}: 999.999}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {${Integer.MIN_VALUE}: 1111034.123}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {${Integer.MAX_VALUE}: 123456789.12345}); """ + + // test base data + qt_select_base3 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load3 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 4. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {100: ${Double.MIN_VALUE}, 111:${Double.MAX_VALUE}}), (2, 'doris2', {200: 123.123, 222:0.9999999}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {111: 187.123, 333:555.6767, 399:129312.113, 3999:123.12314}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {111: 187.123, 111:187.123}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {${Integer.MIN_VALUE}: ${Double.MIN_VALUE}, ${Integer.MAX_VALUE}: ${Double.MAX_VALUE}}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {${Integer.MAX_VALUE}: ${Double.MIN_VALUE}, ${Integer.MIN_VALUE}: ${Double.MAX_VALUE}}); """ + + // test base data + qt_select_base4 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load4 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 5. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'k1': 0.123, '111':1.2345}), (2, 'doris2', {'200':8738931.12312, 'doris':999.999}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'null': 1111034.123, '333':7771.1231, '399':0.441241, '3999999999':0.441241}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 1111034.123, 'null':8738931.12312}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'${Integer.MIN_VALUE}': 1.2345, '${Integer.MAX_VALUE}': 999.999}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {'${Integer.MIN_VALUE}': 1111034.123}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {'${Integer.MAX_VALUE}': 123456789.12345}); """ + + + // test base data + qt_select_base5 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load5 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 6. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'100': ${Double.MIN_VALUE}, 'doris':${Double.MAX_VALUE}}), (2, 'doris2', {'nereids': 123.123, '222':0.9999999}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'null': 187.123, '333':555.6767, '399':129312.113, '39999999999':123.12314}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 187.123, 'null':187.123}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'${Integer.MIN_VALUE}': ${Double.MIN_VALUE}, '${Integer.MAX_VALUE}': ${Double.MAX_VALUE}}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {'${Integer.MAX_VALUE}': ${Double.MIN_VALUE}, '${Integer.MIN_VALUE}': ${Double.MAX_VALUE}}); """ + + // test base data + qt_select_base6 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load6 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 7. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'max_bigint': ${Long.MAX_VALUE}, 'min_bigint': ${Long.MIN_VALUE}}); """ + + // test base data + qt_select_base7 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load7 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 8. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'a': true, 'b': false}), (2, 'doris2', {'a': false, 'b': false}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'a': true, 'b': null, 'c':399, 'd':false}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': false, 'b': true}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'true': true, 'false': false}); """ + + // test base data + qt_select_base8 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load8 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 9. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {100: true, 111:true}), (2, 'doris2', {200: false, 222:false}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {111: true, 333:false, 399:false, 3999:true}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {111: true, 111:true}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {${Integer.MIN_VALUE}: false, ${Integer.MAX_VALUE}: false}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {${Integer.MAX_VALUE}: true, ${Integer.MIN_VALUE}: true}); """ + + // test base data + qt_select_base9 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load9 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 10. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20 01:02:03': 'null', '2018-04-20 10:40:35':'b'}), (2, 'doris2', {'2000-04-20 00:00:00':'a', '1967-12-31 12:24:56':'b'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01 00:00:00':'b', '2023-02-27 00:01:02':'d'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31 12:01:41': 'min_largeint', '2006-02-19 09:01:02': 'max_largeint'}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {'209-04-20 00:00:00': 'min_largeint', '102-03-21 00:00:00':'b'}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {'2003-04-29 01:02:03':'a', '2006-02-22 02:01:04': 'max_largeint', '2020-03-21 19:21:23':'b'}); """ + + // test base data + qt_select_base10 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load10 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 11. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20 01:02:03': null, '2018-04-20 10:40:35': 123}), (2, 'doris2', {'2000-04-20 00:00:00':${Integer.MIN_VALUE}, '1967-12-31 12:24:56':${Integer.MAX_VALUE}}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01 00:00:00':1246, '2023-02-27 00:01:02':5646}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31 12:01:41': 524524, '2006-02-19 09:01:02': 2534}); """ + + // test base data + qt_select_base11 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load11 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 12. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20': null, '2018-04-20': 123}), (2, 'doris2', {'2000-04-20':${Integer.MIN_VALUE}, '1967-12-31':${Integer.MAX_VALUE}}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01':1246, '2023-02-27':5646}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31': 524524, '2006-02-19': 2534}); """ + + // test base data + qt_select_base12 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load12 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 13. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20': 'null', '2018-04-20': null}), (2, 'doris2', {'2000-04-20':'${Integer.MIN_VALUE}', '1967-12-31':'${Integer.MAX_VALUE}'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01':'1246', '2023-02-27':'5646'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31': 'min_largeint', '2006-02-19': 'max_largeint'}); """ + + // test base data + qt_select_base13 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load13 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 14. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20 12:20:03': 'null', '2018-04-20 12:59:59': null}), (2, 'doris2', {'2000-04-20 23:59:59':'${Integer.MIN_VALUE}', '1967-12-31 00:00:00':'${Integer.MAX_VALUE}'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01 07:24:54':'1246', '2023-02-27 15:12:13':'5646'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31 11:22:33': 'min_largeint', '2006-02-19 00:44:55': 'max_largeint'}); """ + + // test base data + qt_select_base14 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load14 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 15. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {100: 'null', 111:'b'}), (2, 'doris2', {200:'a', 222:'b'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {111: 'a', 333:'b', 399:'c', 399999999999999:'d'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {111: '100', 111:'b'}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {${Long.MIN_VALUE}: 'min_bigint', ${Long.MAX_VALUE}: 'max_bigint'}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {${Long.MAX_VALUE}: 'min_bigint', 111:'b'}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {200:'a', ${Long.MAX_VALUE}: 'max_bigint', 111:'b'}); """ + + // test base data + qt_select_base15 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load15 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 16. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, "doris1", {true:"null",false:"b"}), (2, "doris2", {true:"a", true:"b"}); """ + sql """ insert into ${export_table_name} values (3, "doris3", {true: "a", true:"b", false:"c", false:"d"}); """ + sql """ insert into ${export_table_name} values (4, "doris4", {true: null, true:null}); """ + sql """ insert into ${export_table_name} values (5, "doris5", {true: "100", true:"b"}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, "doris7", null); """ + sql """ insert into ${export_table_name} values (8, "doris8", {false: "min_bigint", false: "max_bigint"}); """ + sql """ insert into ${export_table_name} values (9, "doris9", {true: "min_bigint", false:"b"}); """ + sql """ insert into ${export_table_name} values (10, "doris10", {false:"a", true: "max_bigint", true:"b"}); """ + + // test base data + qt_select_base16 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load16 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + // 17. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {1: 'xxx', 0:'b'}), (2, 'doris2', {1:'a', 1:'b'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {1: 'a', 1:'b', 0:'c', 0:'d'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {1: null, 1:null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {1: '100', 1:'b'}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {0: 'min_bigint', 0: 'max_bigint'}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {1: 'min_bigint', 0:'b'}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {0:'a', 1: 'max_bigint', 1:'b'}); """ + + // test base data + qt_select_base17 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load17 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + + + // 18. test map NULL + try { + def map_field_define = "`m_info` Map NULL" + // create table to export data + create_table(export_table_name, map_field_define) + // create table to load data + create_table(load_table_name, map_field_define) + + // insert data + sql """ insert into ${export_table_name} values (1, 'doris1', {'doris': 'null', 'nereids':'b'}), (2, 'doris2', {'ftw':'a', 'cyx':'b'}); """ + sql """ insert into ${export_table_name} values (3, 'doris3', {'null': 'a', '333':'b', '399':'c', '399999999999999':'d'}); """ + sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """ + sql """ insert into ${export_table_name} values (5, 'doris5', {'null': '100', 'null':'b'}); """ + sql """ insert into ${export_table_name} values (6, null, null); """ + sql """ insert into ${export_table_name} values (7, 'doris7', null); """ + sql """ insert into ${export_table_name} values (8, 'doris8', {'170141183460469231731687303715884105728': 'min_largeint', '170141183460469231731687303715884105727': 'max_largeint'}); """ + sql """ insert into ${export_table_name} values (9, 'doris9', {'170141183460469231731687303715884105728': 'min_largeint', '111':'b'}); """ + sql """ insert into ${export_table_name} values (10, 'doris10', {'200':'a', '170141183460469231731687303715884105727': 'max_largeint', '111':'b'}); """ + + // test base data + qt_select_base18 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """ + + def outfile_url = outfile_to_S3() + + qt_select_load18 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "parquet", + "region" = "${region}" + ); + """ + } finally { + } + +}