[Fix](Outfile) Use data_type_serde to export data to parquet file format (#24998)

This commit is contained in:
Tiewei Fang
2023-10-13 13:58:34 +08:00
committed by GitHub
parent 4f65a9c425
commit 6f9a084d99
18 changed files with 2446 additions and 893 deletions

View File

@ -389,8 +389,13 @@ Status FromBlockConverter::convert(std::shared_ptr<arrow::RecordBatch>* out) {
return to_doris_status(arrow_st);
}
_cur_builder = builder.get();
_cur_type->get_serde()->write_column_to_arrow(*_cur_col, nullptr, _cur_builder, _cur_start,
_cur_start + _cur_rows);
try {
_cur_type->get_serde()->write_column_to_arrow(*_cur_col, nullptr, _cur_builder,
_cur_start, _cur_start + _cur_rows);
} catch (std::exception& e) {
return Status::InternalError("Fail to convert block data to arrow data, error: {}",
e.what());
}
arrow_st = _cur_builder->Finish(&_arrays[_cur_field_idx]);
if (!arrow_st.ok()) {
return to_doris_status(arrow_st);

View File

@ -68,8 +68,6 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr<arrow::
*result = arrow::float64();
break;
case TYPE_LARGEINT:
*result = arrow::fixed_size_binary(sizeof(int128_t));
break;
case TYPE_VARCHAR:
case TYPE_CHAR:
case TYPE_HLL:

View File

@ -21,6 +21,7 @@
#include <string>
#include "common/status.h"
#include "runtime/types.h"
// This file will convert Doris RowBatch to/from Arrow's RecordBatch
// RowBatch is used by Doris query engine to exchange data between
@ -28,6 +29,7 @@
namespace arrow {
class DataType;
class RecordBatch;
class Schema;
@ -37,6 +39,8 @@ namespace doris {
class RowDescriptor;
Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptr<arrow::DataType>* result);
// Convert Doris RowDescriptor to Arrow Schema.
Status convert_to_arrow_schema(const RowDescriptor& row_desc,
std::shared_ptr<arrow::Schema>* result);

View File

@ -18,6 +18,8 @@
#include "data_type_map_serde.h"
#include "arrow/array/builder_nested.h"
#include "common/exception.h"
#include "common/status.h"
#include "util/jsonb_document.h"
#include "util/simd/bits.h"
#include "vec/columns/column.h"
@ -347,7 +349,8 @@ void DataTypeMapSerDe::write_column_to_arrow(const IColumn& column, const NullMa
MutableColumnPtr value_mutable_data = nested_values_column.clone_empty();
for (size_t i = offsets[r - 1]; i < offsets[r]; ++i) {
if (keys_nullmap_data[i] == 1) {
continue;
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"Can not write null value of map key to arrow.");
}
key_mutable_data->insert_from(nested_keys_column, i);
value_mutable_data->insert_from(nested_values_column, i);

View File

@ -21,8 +21,11 @@
#include <type_traits>
#include "common/exception.h"
#include "common/status.h"
#include "gutil/strings/numbers.h"
#include "util/mysql_global.h"
#include "vec/core/types.h"
#include "vec/io/io_helper.h"
namespace doris {
@ -81,15 +84,20 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, const
end - start,
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
column.get_name(), array_builder->type()->name());
} else if constexpr (std::is_same_v<T, Int128> || std::is_same_v<T, UInt128>) {
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
size_t fixed_length = sizeof(typename ColumnType::value_type);
const uint8_t* data_start =
reinterpret_cast<const uint8_t*>(col_data.data()) + start * fixed_length;
checkArrowStatus(
builder.AppendValues(data_start, end - start,
reinterpret_cast<const uint8_t*>(arrow_null_map_data)),
column.get_name(), array_builder->type()->name());
} else if constexpr (std::is_same_v<T, Int128>) {
auto& string_builder = assert_cast<arrow::StringBuilder&>(*array_builder);
for (size_t i = start; i < end; ++i) {
auto& data_value = col_data[i];
std::string value_str = fmt::format("{}", data_value);
if (null_map && (*null_map)[i]) {
checkArrowStatus(string_builder.AppendNull(), column.get_name(),
array_builder->type()->name());
} else {
checkArrowStatus(string_builder.Append(value_str.data(), value_str.length()),
column.get_name(), array_builder->type()->name());
}
}
} else if constexpr (std::is_same_v<T, UInt128>) {
} else {
ARROW_BUILDER_TYPE& builder = assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
checkArrowStatus(
@ -192,6 +200,30 @@ void DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column,
}
return;
}
// only for largeint(int128) type
if (arrow_array->type_id() == arrow::Type::STRING) {
auto concrete_array = dynamic_cast<const arrow::StringArray*>(arrow_array);
std::shared_ptr<arrow::Buffer> buffer = concrete_array->value_data();
for (size_t offset_i = start; offset_i < end; ++offset_i) {
if (!concrete_array->IsNull(offset_i)) {
const auto* raw_data = buffer->data() + concrete_array->value_offset(offset_i);
const auto raw_data_len = concrete_array->value_length(offset_i);
Int128 val = 0;
ReadBuffer rb(raw_data, raw_data_len);
if (!read_int_text_impl(val, rb)) {
throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
"parse number fail, string: '{}'",
std::string(rb.position(), rb.count()).c_str());
}
col_data.emplace_back(val);
}
}
return;
}
/// buffers[0] is a null bitmap and buffers[1] are actual values
std::shared_ptr<arrow::Buffer> buffer = arrow_array->data()->buffers[1];
const auto* raw_data = reinterpret_cast<const T*>(buffer->data()) + start;

View File

@ -18,6 +18,7 @@
#include "vec/runtime/vparquet_transformer.h"
#include <arrow/io/type_fwd.h>
#include <arrow/table.h>
#include <glog/logging.h>
#include <math.h>
#include <parquet/column_writer.h>
@ -40,6 +41,9 @@
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
#include "runtime/types.h"
#include "util/arrow/block_convertor.h"
#include "util/arrow/row_batch.h"
#include "util/arrow/utils.h"
#include "util/binary_cast.hpp"
#include "util/mysql_global.h"
#include "util/types.h"
@ -63,10 +67,6 @@
namespace doris::vectorized {
static const std::string epoch_date_str = "1970-01-01";
static const int64_t timestamp_threshold = -2177481943;
static const int64_t timestamp_diff = 343;
ParquetOutputStream::ParquetOutputStream(doris::io::FileWriter* file_writer)
: _file_writer(file_writer), _cur_pos(0), _written_len(0) {
set_mode(arrow::io::FileMode::WRITE);
@ -139,88 +139,6 @@ void ParquetBuildHelper::build_schema_repetition_type(
}
}
void ParquetBuildHelper::build_schema_data_type(parquet::Type::type& parquet_data_type,
const TParquetDataType::type& column_data_type) {
switch (column_data_type) {
case TParquetDataType::BOOLEAN: {
parquet_data_type = parquet::Type::BOOLEAN;
break;
}
case TParquetDataType::INT32: {
parquet_data_type = parquet::Type::INT32;
break;
}
case TParquetDataType::INT64: {
parquet_data_type = parquet::Type::INT64;
break;
}
case TParquetDataType::INT96: {
parquet_data_type = parquet::Type::INT96;
break;
}
case TParquetDataType::BYTE_ARRAY: {
parquet_data_type = parquet::Type::BYTE_ARRAY;
break;
}
case TParquetDataType::FLOAT: {
parquet_data_type = parquet::Type::FLOAT;
break;
}
case TParquetDataType::DOUBLE: {
parquet_data_type = parquet::Type::DOUBLE;
break;
}
case TParquetDataType::FIXED_LEN_BYTE_ARRAY: {
parquet_data_type = parquet::Type::FIXED_LEN_BYTE_ARRAY;
break;
}
default:
parquet_data_type = parquet::Type::UNDEFINED;
}
}
void ParquetBuildHelper::build_schema_data_logical_type(
std::shared_ptr<const parquet::LogicalType>& parquet_data_logical_type_ptr,
const TParquetDataLogicalType::type& column_data_logical_type, int* primitive_length,
const TypeDescriptor& type_desc) {
switch (column_data_logical_type) {
case TParquetDataLogicalType::DECIMAL: {
DCHECK(type_desc.precision != -1 && type_desc.scale != -1)
<< "precision and scale: " << type_desc.precision << " " << type_desc.scale;
if (type_desc.type == TYPE_DECIMAL32) {
*primitive_length = 4;
} else if (type_desc.type == TYPE_DECIMAL64) {
*primitive_length = 8;
} else if (type_desc.type == TYPE_DECIMAL128I) {
*primitive_length = 16;
} else {
throw parquet::ParquetException(
"the logical decimal now only support in decimalv3, maybe error of " +
type_desc.debug_string());
}
parquet_data_logical_type_ptr =
parquet::LogicalType::Decimal(type_desc.precision, type_desc.scale);
break;
}
case TParquetDataLogicalType::STRING: {
parquet_data_logical_type_ptr = parquet::LogicalType::String();
break;
}
case TParquetDataLogicalType::DATE: {
parquet_data_logical_type_ptr = parquet::LogicalType::Date();
break;
}
case TParquetDataLogicalType::TIMESTAMP: {
parquet_data_logical_type_ptr =
parquet::LogicalType::Timestamp(true, parquet::LogicalType::TimeUnit::MILLIS, true);
break;
}
default: {
parquet_data_logical_type_ptr = parquet::LogicalType::None();
}
}
}
void ParquetBuildHelper::build_compression_type(
parquet::WriterProperties::Builder& builder,
const TParquetCompressionType::type& compression_type) {
@ -286,7 +204,6 @@ VParquetTransformer::VParquetTransformer(doris::io::FileWriter* file_writer,
const TParquetVersion::type& parquet_version,
bool output_object_data)
: VFileFormatTransformer(output_vexpr_ctxs, output_object_data),
_rg_writer(nullptr),
_parquet_schemas(parquet_schemas),
_compression_type(compression_type),
_parquet_disable_dictionary(parquet_disable_dictionary),
@ -294,7 +211,7 @@ VParquetTransformer::VParquetTransformer(doris::io::FileWriter* file_writer,
_outstream = std::shared_ptr<ParquetOutputStream>(new ParquetOutputStream(file_writer));
}
Status VParquetTransformer::parse_properties() {
Status VParquetTransformer::_parse_properties() {
try {
parquet::WriterProperties::Builder builder;
ParquetBuildHelper::build_compression_type(builder, _compression_type);
@ -304,613 +221,59 @@ Status VParquetTransformer::parse_properties() {
} else {
builder.enable_dictionary();
}
_properties = builder.build();
_parquet_writer_properties = builder.build();
_arrow_properties = parquet::ArrowWriterProperties::Builder().store_schema()->build();
} catch (const parquet::ParquetException& e) {
return Status::InternalError("parquet writer parse properties error: {}", e.what());
}
return Status::OK();
}
Status VParquetTransformer::parse_schema() {
parquet::schema::NodeVector fields;
parquet::Repetition::type parquet_repetition_type;
parquet::Type::type parquet_physical_type;
std::shared_ptr<const parquet::LogicalType> parquet_data_logical_type;
int primitive_length = -1;
for (int idx = 0; idx < _parquet_schemas.size(); ++idx) {
primitive_length = -1;
ParquetBuildHelper::build_schema_repetition_type(
parquet_repetition_type, _parquet_schemas[idx].schema_repetition_type);
ParquetBuildHelper::build_schema_data_type(parquet_physical_type,
_parquet_schemas[idx].schema_data_type);
ParquetBuildHelper::build_schema_data_logical_type(
parquet_data_logical_type, _parquet_schemas[idx].schema_data_logical_type,
&primitive_length, _output_vexpr_ctxs[idx]->root()->type());
try {
fields.push_back(parquet::schema::PrimitiveNode::Make(
_parquet_schemas[idx].schema_column_name, parquet_repetition_type,
parquet_data_logical_type, parquet_physical_type, primitive_length));
} catch (const parquet::ParquetException& e) {
LOG(WARNING) << "parquet writer parse schema error: " << e.what();
return Status::InternalError("parquet writer parse schema error: {}", e.what());
}
_schema = std::static_pointer_cast<parquet::schema::GroupNode>(
parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields));
Status VParquetTransformer::_parse_schema() {
std::vector<std::shared_ptr<arrow::Field>> fields;
for (size_t i = 0; i < _output_vexpr_ctxs.size(); i++) {
std::shared_ptr<arrow::DataType> type;
RETURN_IF_ERROR(convert_to_arrow_type(_output_vexpr_ctxs[i]->root()->type(), &type));
std::shared_ptr<arrow::Field> field =
arrow::field(_parquet_schemas[i].schema_column_name, type,
_output_vexpr_ctxs[i]->root()->is_nullable());
fields.emplace_back(field);
}
_arrow_schema = arrow::schema(std::move(fields));
return Status::OK();
}
#define RETURN_WRONG_TYPE \
return Status::InvalidArgument("Invalid column type: {}", raw_column->get_name());
#define DISPATCH_PARQUET_NUMERIC_WRITER(WRITER, COLUMN_TYPE, NATIVE_TYPE) \
parquet::RowGroupWriter* rgWriter = get_rg_writer(); \
parquet::WRITER* col_writer = static_cast<parquet::WRITER*>(rgWriter->column(i)); \
if (null_map != nullptr) { \
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data(); \
for (size_t row_id = 0; row_id < sz; row_id++) { \
def_level[row_id] = null_data[row_id] == 0; \
} \
col_writer->WriteBatch(sz, def_level.data(), nullptr, \
reinterpret_cast<const NATIVE_TYPE*>( \
assert_cast<const COLUMN_TYPE&>(*col).get_data().data())); \
} else if (const auto* not_nullable_column = check_and_get_column<const COLUMN_TYPE>(col)) { \
col_writer->WriteBatch( \
sz, nullable ? def_level.data() : nullptr, nullptr, \
reinterpret_cast<const NATIVE_TYPE*>(not_nullable_column->get_data().data())); \
} else { \
RETURN_WRONG_TYPE \
}
#define DISPATCH_PARQUET_COMPLEX_WRITER(COLUMN_TYPE) \
parquet::RowGroupWriter* rgWriter = get_rg_writer(); \
parquet::ByteArrayWriter* col_writer = \
static_cast<parquet::ByteArrayWriter*>(rgWriter->column(i)); \
if (null_map != nullptr) { \
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data(); \
for (size_t row_id = 0; row_id < sz; row_id++) { \
if (null_data[row_id] != 0) { \
single_def_level = 0; \
parquet::ByteArray value; \
col_writer->WriteBatch(1, &single_def_level, nullptr, &value); \
single_def_level = 1; \
} else { \
const auto& tmp = col->get_data_at(row_id); \
parquet::ByteArray value; \
value.ptr = reinterpret_cast<const uint8_t*>(tmp.data); \
value.len = tmp.size; \
col_writer->WriteBatch(1, &single_def_level, nullptr, &value); \
} \
} \
} else if (const auto* not_nullable_column = check_and_get_column<const COLUMN_TYPE>(col)) { \
for (size_t row_id = 0; row_id < sz; row_id++) { \
const auto& tmp = not_nullable_column->get_data_at(row_id); \
parquet::ByteArray value; \
value.ptr = reinterpret_cast<const uint8_t*>(tmp.data); \
value.len = tmp.size; \
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr, &value); \
} \
} else { \
RETURN_WRONG_TYPE \
}
Status VParquetTransformer::write(const Block& block) {
if (block.rows() == 0) {
return Status::OK();
}
size_t sz = block.rows();
try {
for (size_t i = 0; i < block.columns(); i++) {
auto& raw_column = block.get_by_position(i).column;
auto nullable = raw_column->is_nullable();
const auto col = nullable ? reinterpret_cast<const ColumnNullable*>(
block.get_by_position(i).column.get())
->get_nested_column_ptr()
.get()
: block.get_by_position(i).column.get();
auto null_map = nullable && reinterpret_cast<const ColumnNullable*>(
block.get_by_position(i).column.get())
->has_null()
? reinterpret_cast<const ColumnNullable*>(
block.get_by_position(i).column.get())
->get_null_map_column_ptr()
: nullptr;
auto& type = block.get_by_position(i).type;
std::vector<int16_t> def_level(sz);
// For scalar type, definition level == 1 means this value is not NULL.
std::fill(def_level.begin(), def_level.end(), 1);
int16_t single_def_level = 1;
switch (_output_vexpr_ctxs[i]->root()->type().type) {
case TYPE_BOOLEAN: {
DISPATCH_PARQUET_NUMERIC_WRITER(BoolWriter, ColumnVector<UInt8>, bool)
break;
}
case TYPE_BIGINT: {
DISPATCH_PARQUET_NUMERIC_WRITER(Int64Writer, ColumnVector<Int64>, int64_t)
break;
}
case TYPE_LARGEINT: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::ByteArrayWriter* col_writer =
static_cast<parquet::ByteArrayWriter*>(rgWriter->column(i));
parquet::ByteArray value;
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
const int128_t tmp = assert_cast<const ColumnVector<Int128>&>(*col)
.get_data()[row_id];
std::string value_str = fmt::format("{}", tmp);
value.ptr = reinterpret_cast<const uint8_t*>(value_str.data());
value.len = value_str.length();
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else if (const auto* not_nullable_column =
check_and_get_column<const ColumnVector<Int128>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
const int128_t tmp = not_nullable_column->get_data()[row_id];
std::string value_str = fmt::format("{}", tmp);
value.ptr = reinterpret_cast<const uint8_t*>(value_str.data());
value.len = value_str.length();
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_FLOAT: {
DISPATCH_PARQUET_NUMERIC_WRITER(FloatWriter, ColumnVector<Float32>, float_t)
break;
}
case TYPE_DOUBLE: {
DISPATCH_PARQUET_NUMERIC_WRITER(DoubleWriter, ColumnVector<Float64>, double_t)
break;
}
case TYPE_TINYINT:
case TYPE_SMALLINT: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::Int32Writer* col_writer =
static_cast<parquet::Int32Writer*>(rgWriter->column(i));
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
if (const auto* int16_column =
check_and_get_column<const ColumnVector<Int16>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
}
const int32_t tmp = int16_column->get_data()[row_id];
col_writer->WriteBatch(1, &single_def_level, nullptr,
reinterpret_cast<const int32_t*>(&tmp));
single_def_level = 1;
}
} else if (const auto* int8_column =
check_and_get_column<const ColumnVector<Int8>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
}
const int32_t tmp = int8_column->get_data()[row_id];
col_writer->WriteBatch(1, &single_def_level, nullptr,
reinterpret_cast<const int32_t*>(&tmp));
single_def_level = 1;
}
} else {
RETURN_WRONG_TYPE
}
} else if (const auto& int16_column =
check_and_get_column<const ColumnVector<Int16>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
const int32_t tmp = int16_column->get_data()[row_id];
col_writer->WriteBatch(1, nullable ? def_level.data() : nullptr, nullptr,
reinterpret_cast<const int32_t*>(&tmp));
}
} else if (const auto& int8_column =
check_and_get_column<const ColumnVector<Int8>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
const int32_t tmp = int8_column->get_data()[row_id];
col_writer->WriteBatch(1, nullable ? def_level.data() : nullptr, nullptr,
reinterpret_cast<const int32_t*>(&tmp));
}
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_INT: {
DISPATCH_PARQUET_NUMERIC_WRITER(Int32Writer, ColumnVector<Int32>, Int32)
break;
}
case TYPE_DATETIME: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::Int64Writer* col_writer =
static_cast<parquet::Int64Writer*>(rgWriter->column(i));
uint64_t default_int64 = 0;
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
def_level[row_id] = null_data[row_id] == 0;
}
int64_t tmp_data[sz];
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
tmp_data[row_id] = default_int64;
} else {
VecDateTimeValue datetime_value = binary_cast<Int64, VecDateTimeValue>(
assert_cast<const ColumnVector<Int64>&>(*col)
.get_data()[row_id]);
if (!datetime_value.unix_timestamp(&tmp_data[row_id],
TimezoneUtils::default_time_zone)) {
return Status::InternalError("get unix timestamp error.");
}
// -2177481943 represent '1900-12-31 23:54:17'
// but -2177481944 represent '1900-12-31 23:59:59'
// so for timestamp <= -2177481944, we subtract 343 (5min 43s)
if (tmp_data[row_id] < timestamp_threshold) {
tmp_data[row_id] -= timestamp_diff;
}
// convert seconds to MILLIS seconds
tmp_data[row_id] *= 1000;
}
}
col_writer->WriteBatch(sz, def_level.data(), nullptr,
reinterpret_cast<const int64_t*>(tmp_data));
} else if (const auto* not_nullable_column =
check_and_get_column<const ColumnVector<Int64>>(col)) {
std::vector<int64_t> res(sz);
for (size_t row_id = 0; row_id < sz; row_id++) {
VecDateTimeValue datetime_value = binary_cast<Int64, VecDateTimeValue>(
not_nullable_column->get_data()[row_id]);
// serialize
std::shared_ptr<arrow::RecordBatch> result;
RETURN_IF_ERROR(
convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), &result));
if (!datetime_value.unix_timestamp(&res[row_id],
TimezoneUtils::default_time_zone)) {
return Status::InternalError("get unix timestamp error.");
};
// -2177481943 represent '1900-12-31 23:54:17'
// but -2177481944 represent '1900-12-31 23:59:59'
// so for timestamp <= -2177481944, we subtract 343 (5min 43s)
if (res[row_id] < timestamp_threshold) {
res[row_id] -= timestamp_diff;
}
// convert seconds to MILLIS seconds
res[row_id] *= 1000;
}
col_writer->WriteBatch(sz, nullable ? def_level.data() : nullptr, nullptr,
reinterpret_cast<const int64_t*>(res.data()));
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_DATE: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::Int64Writer* col_writer =
static_cast<parquet::Int64Writer*>(rgWriter->column(i));
uint64_t default_int64 = 0;
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
def_level[row_id] = null_data[row_id] == 0;
}
VecDateTimeValue epoch_date;
if (!epoch_date.from_date_str(epoch_date_str.c_str(),
epoch_date_str.length())) {
return Status::InternalError("create epoch date from string error");
}
int32_t days_from_epoch = epoch_date.daynr();
int32_t tmp_data[sz];
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
tmp_data[row_id] = default_int64;
} else {
int32_t days = binary_cast<Int64, VecDateTimeValue>(
assert_cast<const ColumnVector<Int64>&>(*col)
.get_data()[row_id])
.daynr();
tmp_data[row_id] = days - days_from_epoch;
}
}
col_writer->WriteBatch(sz, def_level.data(), nullptr,
reinterpret_cast<const int64_t*>(tmp_data));
} else if (check_and_get_column<const ColumnVector<Int64>>(col)) {
VecDateTimeValue epoch_date;
if (!epoch_date.from_date_str(epoch_date_str.c_str(),
epoch_date_str.length())) {
return Status::InternalError("create epoch date from string error");
}
int32_t days_from_epoch = epoch_date.daynr();
std::vector<int32_t> res(sz);
for (size_t row_id = 0; row_id < sz; row_id++) {
int32_t days = binary_cast<Int64, VecDateTimeValue>(
assert_cast<const ColumnVector<Int64>&>(*col)
.get_data()[row_id])
.daynr();
res[row_id] = days - days_from_epoch;
}
col_writer->WriteBatch(sz, nullable ? def_level.data() : nullptr, nullptr,
reinterpret_cast<const int64_t*>(res.data()));
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_DATEV2: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::ByteArrayWriter* col_writer =
static_cast<parquet::ByteArrayWriter*>(rgWriter->column(i));
parquet::ByteArray value;
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
char buffer[30];
int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
value.ptr = reinterpret_cast<const uint8_t*>(buffer);
value.len = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(
assert_cast<const ColumnVector<UInt32>&>(*col)
.get_data()[row_id])
.to_buffer(buffer, output_scale);
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else if (const auto* not_nullable_column =
check_and_get_column<const ColumnVector<UInt32>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
char buffer[30];
int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
value.ptr = reinterpret_cast<const uint8_t*>(buffer);
value.len = binary_cast<UInt32, DateV2Value<DateV2ValueType>>(
not_nullable_column->get_data()[row_id])
.to_buffer(buffer, output_scale);
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_DATETIMEV2: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::ByteArrayWriter* col_writer =
static_cast<parquet::ByteArrayWriter*>(rgWriter->column(i));
parquet::ByteArray value;
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
char buffer[30];
int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
value.ptr = reinterpret_cast<const uint8_t*>(buffer);
value.len = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(
assert_cast<const ColumnVector<UInt64>&>(*col)
.get_data()[row_id])
.to_buffer(buffer, output_scale);
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else if (const auto* not_nullable_column =
check_and_get_column<const ColumnVector<UInt64>>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
char buffer[30];
int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
value.ptr = reinterpret_cast<const uint8_t*>(buffer);
value.len = binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(
not_nullable_column->get_data()[row_id])
.to_buffer(buffer, output_scale);
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_OBJECT: {
if (_output_object_data) {
DISPATCH_PARQUET_COMPLEX_WRITER(ColumnBitmap)
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_HLL: {
if (_output_object_data) {
DISPATCH_PARQUET_COMPLEX_WRITER(ColumnHLL)
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
DISPATCH_PARQUET_COMPLEX_WRITER(ColumnString)
break;
}
case TYPE_DECIMALV2: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::ByteArrayWriter* col_writer =
static_cast<parquet::ByteArrayWriter*>(rgWriter->column(i));
parquet::ByteArray value;
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
const DecimalV2Value decimal_val(reinterpret_cast<const PackedInt128*>(
col->get_data_at(row_id).data)
->value);
char decimal_buffer[MAX_DECIMAL_WIDTH];
int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
value.ptr = reinterpret_cast<const uint8_t*>(decimal_buffer);
value.len = decimal_val.to_buffer(decimal_buffer, output_scale);
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else if (const auto* not_nullable_column =
check_and_get_column<const ColumnDecimal128>(col)) {
for (size_t row_id = 0; row_id < sz; row_id++) {
const DecimalV2Value decimal_val(
reinterpret_cast<const PackedInt128*>(
not_nullable_column->get_data_at(row_id).data)
->value);
char decimal_buffer[MAX_DECIMAL_WIDTH];
int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
value.ptr = reinterpret_cast<const uint8_t*>(decimal_buffer);
value.len = decimal_val.to_buffer(decimal_buffer, output_scale);
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
} else {
RETURN_WRONG_TYPE
}
break;
}
case TYPE_DECIMAL32: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::FixedLenByteArrayWriter* col_writer =
static_cast<parquet::FixedLenByteArrayWriter*>(rgWriter->column(i));
parquet::FixedLenByteArray value;
auto decimal_type = check_and_get_data_type<DataTypeDecimal<Decimal32>>(
remove_nullable(type).get());
DCHECK(decimal_type);
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
const auto& data_column = assert_cast<const ColumnDecimal32&>(*col);
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
auto data = data_column.get_element(row_id);
auto big_endian = bswap_32(data);
value.ptr = reinterpret_cast<const uint8_t*>(&big_endian);
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else {
const auto& data_column = assert_cast<const ColumnDecimal32&>(*col);
for (size_t row_id = 0; row_id < sz; row_id++) {
auto data = data_column.get_element(row_id);
auto big_endian = bswap_32(data);
value.ptr = reinterpret_cast<const uint8_t*>(&big_endian);
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
}
break;
}
case TYPE_DECIMAL64: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::FixedLenByteArrayWriter* col_writer =
static_cast<parquet::FixedLenByteArrayWriter*>(rgWriter->column(i));
parquet::FixedLenByteArray value;
auto decimal_type = check_and_get_data_type<DataTypeDecimal<Decimal64>>(
remove_nullable(type).get());
DCHECK(decimal_type);
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
const auto& data_column = assert_cast<const ColumnDecimal64&>(*col);
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
auto data = data_column.get_element(row_id);
auto big_endian = bswap_64(data);
value.ptr = reinterpret_cast<const uint8_t*>(&big_endian);
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else {
const auto& data_column = assert_cast<const ColumnDecimal64&>(*col);
for (size_t row_id = 0; row_id < sz; row_id++) {
auto data = data_column.get_element(row_id);
auto big_endian = bswap_64(data);
value.ptr = reinterpret_cast<const uint8_t*>(&big_endian);
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
}
break;
}
case TYPE_DECIMAL128I: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::FixedLenByteArrayWriter* col_writer =
static_cast<parquet::FixedLenByteArrayWriter*>(rgWriter->column(i));
parquet::FixedLenByteArray value;
auto decimal_type = check_and_get_data_type<DataTypeDecimal<Decimal128I>>(
remove_nullable(type).get());
DCHECK(decimal_type);
if (null_map != nullptr) {
auto& null_data = assert_cast<const ColumnUInt8&>(*null_map).get_data();
const auto& data_column = assert_cast<const ColumnDecimal128I&>(*col);
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
single_def_level = 0;
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
single_def_level = 1;
} else {
auto data = data_column.get_element(row_id);
auto big_endian = gbswap_128(data);
value.ptr = reinterpret_cast<const uint8_t*>(&big_endian);
col_writer->WriteBatch(1, &single_def_level, nullptr, &value);
}
}
} else {
const auto& data_column = assert_cast<const ColumnDecimal128I&>(*col);
for (size_t row_id = 0; row_id < sz; row_id++) {
auto data = data_column.get_element(row_id);
auto big_endian = gbswap_128(data);
value.ptr = reinterpret_cast<const uint8_t*>(&big_endian);
col_writer->WriteBatch(1, nullable ? &single_def_level : nullptr, nullptr,
&value);
}
}
break;
}
default: {
return Status::InvalidArgument(
"Invalid expression type: {}",
_output_vexpr_ctxs[i]->root()->type().debug_string());
}
}
}
} catch (const std::exception& e) {
LOG(WARNING) << "Parquet write error: " << e.what();
return Status::InternalError(e.what());
auto get_table_res = arrow::Table::FromRecordBatches(result->schema(), {result});
if (!get_table_res.ok()) {
return Status::InternalError("Error when get arrow table from record batchs");
}
_cur_written_rows += sz;
auto& table = get_table_res.ValueOrDie();
RETURN_DORIS_STATUS_IF_ERROR(_writer->WriteTable(*table, block.rows()));
return Status::OK();
}
arrow::Status VParquetTransformer::_open_file_writer() {
ARROW_ASSIGN_OR_RAISE(_writer, parquet::arrow::FileWriter::Open(
*_arrow_schema, arrow::default_memory_pool(), _outstream,
_parquet_writer_properties, _arrow_properties));
return arrow::Status::OK();
}
Status VParquetTransformer::open() {
RETURN_IF_ERROR(parse_properties());
RETURN_IF_ERROR(parse_schema());
RETURN_IF_ERROR(_parse_properties());
RETURN_IF_ERROR(_parse_schema());
try {
_writer = parquet::ParquetFileWriter::Open(_outstream, _schema, _properties);
RETURN_DORIS_STATUS_IF_ERROR(_open_file_writer());
} catch (const parquet::ParquetStatusException& e) {
LOG(WARNING) << "parquet file writer open error: " << e.what();
return Status::InternalError("parquet file writer open error: {}", e.what());
@ -921,38 +284,18 @@ Status VParquetTransformer::open() {
return Status::OK();
}
parquet::RowGroupWriter* VParquetTransformer::get_rg_writer() {
if (_rg_writer == nullptr) {
_rg_writer = _writer->AppendBufferedRowGroup();
}
if (_cur_written_rows > _max_row_per_group) {
_rg_writer->Close();
_rg_writer = _writer->AppendBufferedRowGroup();
_cur_written_rows = 0;
}
return _rg_writer;
}
int64_t VParquetTransformer::written_len() {
return _outstream->get_written_len();
}
Status VParquetTransformer::close() {
try {
if (_rg_writer != nullptr) {
_rg_writer->Close();
_rg_writer = nullptr;
}
if (_writer != nullptr) {
_writer->Close();
}
arrow::Status st = _outstream->Close();
if (!st.ok()) {
LOG(WARNING) << "close parquet file error: " << st.ToString();
return Status::IOError(st.ToString());
RETURN_DORIS_STATUS_IF_ERROR(_writer->Close());
}
RETURN_DORIS_STATUS_IF_ERROR(_outstream->Close());
} catch (const std::exception& e) {
_rg_writer = nullptr;
LOG(WARNING) << "Parquet writer close error: " << e.what();
return Status::IOError(e.what());
}

View File

@ -21,6 +21,7 @@
#include <arrow/result.h>
#include <arrow/status.h>
#include <gen_cpp/DataSinks_types.h>
#include <parquet/arrow/writer.h>
#include <parquet/file_writer.h>
#include <parquet/properties.h>
#include <parquet/types.h>
@ -95,7 +96,7 @@ public:
const bool& parquet_disable_dictionary,
const TParquetVersion::type& parquet_version, bool output_object_data);
~VParquetTransformer() = default;
~VParquetTransformer() override = default;
Status open() override;
@ -106,19 +107,15 @@ public:
int64_t written_len() override;
private:
parquet::RowGroupWriter* get_rg_writer();
Status _parse_properties();
Status _parse_schema();
arrow::Status _open_file_writer();
Status parse_schema();
Status parse_properties();
private:
std::shared_ptr<ParquetOutputStream> _outstream;
std::shared_ptr<parquet::WriterProperties> _properties;
std::shared_ptr<parquet::schema::GroupNode> _schema;
std::unique_ptr<parquet::ParquetFileWriter> _writer;
parquet::RowGroupWriter* _rg_writer;
const int64_t _max_row_per_group = 10;
std::shared_ptr<parquet::WriterProperties> _parquet_writer_properties;
std::shared_ptr<parquet::ArrowWriterProperties> _arrow_properties;
std::unique_ptr<parquet::arrow::FileWriter> _writer;
std::shared_ptr<arrow::Schema> _arrow_schema;
const std::vector<TParquetSchema>& _parquet_schemas;
const TParquetCompressionType::type& _compression_type;

View File

@ -553,10 +553,8 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) {
DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
DataTypePtr m = std::make_shared<DataTypeMap>(s, d);
Array k1, k2, v1, v2, k3, v3;
k1.push_back(Null());
k1.push_back("doris");
k1.push_back("clever amory");
v1.push_back(11);
v1.push_back(Null());
v1.push_back(30);
k2.push_back("hello amory");
@ -568,9 +566,7 @@ TEST(DataTypeSerDeArrowTest, DataTypeMapNullKeySerDeTest) {
v2.push_back(6);
v2.push_back(7);
k3.push_back("test");
k3.push_back(Null());
v3.push_back(11);
v3.push_back(30);
Map m1, m2, m3;
m1.push_back(k1);
m1.push_back(v1);

View File

@ -39,7 +39,6 @@ import org.apache.doris.datasource.property.constants.S3Properties;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TParquetCompressionType;
import org.apache.doris.thrift.TParquetDataLogicalType;
import org.apache.doris.thrift.TParquetDataType;
import org.apache.doris.thrift.TParquetRepetitionType;
import org.apache.doris.thrift.TParquetSchema;
@ -70,7 +69,6 @@ public class OutFileClause {
public static final List<Type> RESULT_COL_TYPES = Lists.newArrayList();
public static final Map<String, TParquetRepetitionType> PARQUET_REPETITION_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetDataType> PARQUET_DATA_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetDataLogicalType> PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetCompressionType> PARQUET_COMPRESSION_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetVersion> PARQUET_VERSION_MAP = Maps.newHashMap();
public static final Set<String> ORC_DATA_TYPE = Sets.newHashSet();
@ -103,12 +101,6 @@ public class OutFileClause {
PARQUET_DATA_TYPE_MAP.put("double", TParquetDataType.DOUBLE);
PARQUET_DATA_TYPE_MAP.put("fixed_len_byte_array", TParquetDataType.FIXED_LEN_BYTE_ARRAY);
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("decimal", TParquetDataLogicalType.DECIMAL);
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("date", TParquetDataLogicalType.DATE);
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("datetime", TParquetDataLogicalType.TIMESTAMP);
// TODO(ftw): add other logical type
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("none", TParquetDataLogicalType.NONE);
PARQUET_COMPRESSION_TYPE_MAP.put("snappy", TParquetCompressionType.SNAPPY);
PARQUET_COMPRESSION_TYPE_MAP.put("gzip", TParquetCompressionType.GZIP);
PARQUET_COMPRESSION_TYPE_MAP.put("brotli", TParquetCompressionType.BROTLI);
@ -495,170 +487,17 @@ public class OutFileClause {
private void analyzeForParquetFormat(List<Expr> resultExprs, List<String> colLabels) throws AnalysisException {
if (this.parquetSchemas.isEmpty()) {
genParquetSchema(resultExprs, colLabels);
genParquetColumnName(resultExprs, colLabels);
}
// check schema number
if (resultExprs.size() != this.parquetSchemas.size()) {
throw new AnalysisException("Parquet schema number does not equal to select item number");
}
// check type
for (int i = 0; i < this.parquetSchemas.size(); ++i) {
TParquetDataType type = this.parquetSchemas.get(i).schema_data_type;
Type resultType = resultExprs.get(i).getType();
switch (resultType.getPrimitiveType()) {
case BOOLEAN:
if (!PARQUET_DATA_TYPE_MAP.get("boolean").equals(type)) {
throw new AnalysisException("project field type is BOOLEAN, should use boolean,"
+ " but the type of column " + i + " is " + type);
}
break;
case TINYINT:
case SMALLINT:
case INT:
case DATE:
if (!PARQUET_DATA_TYPE_MAP.get("int32").equals(type)) {
throw new AnalysisException("project field type is TINYINT/SMALLINT/INT,"
+ "should use int32, " + "but the definition type of column " + i + " is " + type);
}
break;
case BIGINT:
case DATETIME:
if (!PARQUET_DATA_TYPE_MAP.get("int64").equals(type)) {
throw new AnalysisException("project field type is BIGINT/DATE/DATETIME,"
+ "should use int64, but the definition type of column " + i + " is " + type);
}
break;
case FLOAT:
if (!PARQUET_DATA_TYPE_MAP.get("float").equals(type)) {
throw new AnalysisException("project field type is FLOAT, should use float,"
+ " but the definition type of column " + i + " is " + type);
}
break;
case DOUBLE:
if (!PARQUET_DATA_TYPE_MAP.get("double").equals(type)) {
throw new AnalysisException("project field type is DOUBLE, should use double,"
+ " but the definition type of column " + i + " is " + type);
}
break;
case DECIMAL32:
case DECIMAL64:
case DECIMAL128: {
if (!PARQUET_DATA_TYPE_MAP.get("fixed_len_byte_array").equals(type)) {
throw new AnalysisException("project field type is DECIMAL"
+ ", should use fixed_len_byte_array, but the definition type of column "
+ i + " is " + type);
}
break;
}
case DECIMALV2:
case CHAR:
case VARCHAR:
case STRING:
case DATETIMEV2:
case DATEV2:
case LARGEINT:
if (!PARQUET_DATA_TYPE_MAP.get("byte_array").equals(type)) {
throw new AnalysisException("project field type is CHAR/VARCHAR/STRING/DECIMAL/DATEV2"
+ "/DATETIMEV2/LARGEINT, should use byte_array, but the definition type of column "
+ i + " is " + type);
}
break;
case HLL:
case BITMAP:
if (ConnectContext.get() != null && ConnectContext.get()
.getSessionVariable().isReturnObjectDataAsBinary()) {
if (!PARQUET_DATA_TYPE_MAP.get("byte_array").equals(type)) {
throw new AnalysisException("project field type is HLL/BITMAP, should use byte_array, "
+ "but the definition type of column " + i + " is " + type);
}
} else {
throw new AnalysisException("Parquet format does not support column type: "
+ resultType.getPrimitiveType());
}
break;
default:
throw new AnalysisException("Parquet format does not support column type: "
+ resultType.getPrimitiveType());
}
}
}
private void genParquetSchema(List<Expr> resultExprs, List<String> colLabels) throws AnalysisException {
Preconditions.checkState(this.parquetSchemas.isEmpty());
private void genParquetColumnName(List<Expr> resultExprs, List<String> colLabels) throws AnalysisException {
for (int i = 0; i < resultExprs.size(); ++i) {
Expr expr = resultExprs.get(i);
TParquetSchema parquetSchema = new TParquetSchema();
if (resultExprs.get(i).isNullable()) {
parquetSchema.schema_repetition_type = PARQUET_REPETITION_TYPE_MAP.get("optional");
} else {
parquetSchema.schema_repetition_type = PARQUET_REPETITION_TYPE_MAP.get("required");
}
switch (expr.getType().getPrimitiveType()) {
case BOOLEAN:
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("boolean");
break;
case TINYINT:
case SMALLINT:
case INT:
case DATE:
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("int32");
break;
case BIGINT:
case DATETIME:
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("int64");
break;
case FLOAT:
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("float");
break;
case DOUBLE:
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("double");
break;
case DECIMAL32:
case DECIMAL64:
case DECIMAL128: {
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("fixed_len_byte_array");
break;
}
case DECIMALV2:
case CHAR:
case VARCHAR:
case STRING:
case DATETIMEV2:
case DATEV2:
case LARGEINT:
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("byte_array");
break;
case HLL:
case BITMAP:
if (ConnectContext.get() != null && ConnectContext.get()
.getSessionVariable().isReturnObjectDataAsBinary()) {
parquetSchema.schema_data_type = PARQUET_DATA_TYPE_MAP.get("byte_array");
}
break;
default:
throw new AnalysisException("currently parquet do not support column type: "
+ expr.getType().getPrimitiveType());
}
switch (expr.getType().getPrimitiveType()) {
case DECIMAL32:
case DECIMAL64:
case DECIMAL128: {
parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("decimal");
break;
}
case DATE:
parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("date");
break;
case DATETIME:
parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("datetime");
break;
default:
parquetSchema.schema_data_logical_type = PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("none");
}
parquetSchema.schema_column_name = colLabels.get(i);
parquetSchemas.add(parquetSchema);
}
@ -864,6 +703,7 @@ public class OutFileClause {
}
// check schema. if schema is not set, Doris will gen schema by select items
// Note: These codes are useless and outdated.
String schema = properties.get(SCHEMA);
if (schema == null) {
return;

View File

@ -651,10 +651,6 @@ public class SelectStmtTest {
try {
SelectStmt stmt = (SelectStmt) UtFrameUtils.parseAndAnalyzeStmt(sql, ctx);
Assert.assertEquals(1, stmt.getOutFileClause().getParquetSchemas().size());
Assert.assertEquals(stmt.getOutFileClause().PARQUET_REPETITION_TYPE_MAP.get("optional"),
stmt.getOutFileClause().getParquetSchemas().get(0).schema_repetition_type);
Assert.assertEquals(stmt.getOutFileClause().PARQUET_DATA_TYPE_MAP.get("byte_array"),
stmt.getOutFileClause().getParquetSchemas().get(0).schema_data_type);
Assert.assertEquals("k1", stmt.getOutFileClause().getParquetSchemas().get(0).schema_column_name);
} catch (Exception e) {
Assert.fail(e.getMessage());

View File

@ -0,0 +1,25 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 1 1 true 1 1 1 1.1 1.1 char1 1
2 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 2 2 true 2 2 2 2.2 2.2 char2 2
3 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 3 3 true 3 3 3 3.3 3.3 char3 3
4 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 4 4 true 4 4 4 4.4 4.4 char4 4
5 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 5 5 true 5 5 5 5.5 5.5 char5 5
6 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 6 6 true 6 6 6 6.6 6.6 char6 6
7 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 7 7 true 7 7 7 7.7 7.7 char7 7
8 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 8 8 true 8 8 8 8.8 8.8 char8 8
9 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 9 9 true 9 9 9 9.9 9.9 char9 9
10 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 \N \N \N \N \N \N \N \N \N \N \N
-- !select_default --
1 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 1 1 true 1 1 1 1.1 1.1 char1 1
2 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 2 2 true 2 2 2 2.2 2.2 char2 2
3 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 3 3 true 3 3 3 3.3 3.3 char3 3
4 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 4 4 true 4 4 4 4.4 4.4 char4 4
5 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 5 5 true 5 5 5 5.5 5.5 char5 5
6 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 6 6 true 6 6 6 6.6 6.6 char6 6
7 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 7 7 true 7 7 7 7.7 7.7 char7 7
8 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 8 8 true 8 8 8 8.8 8.8 char8 8
9 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 Beijing 9 9 true 9 9 9 9.9 9.9 char9 9
10 2017-10-01 2017-10-01T00:00 2017-10-01 2017-10-01T00:00 2017-10-01T00:00:00.111 2017-10-01T00:00:00.111111 \N \N \N \N \N \N \N \N \N \N \N

View File

@ -0,0 +1,229 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_base1 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
4 doris4 \N
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
8 doris8 \N
-- !select_load1 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
4 doris4 \N
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
8 doris8 \N
-- !select_base2 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
-- !select_load2 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
-- !select_base_date --
1 doris1 [2017-10-01, 2023-09-13, 2023-12-31]
2 doris2 [1967-10-01, 1000-09-13]
3 doris3 []
5 doris5 [0001-10-01, NULL, 0000-01-01]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 2017-10-01, 2023-09-13, 2023-12-31]
-- !select_load_date --
1 doris1 ["2017-10-01", "2023-09-13", "2023-12-31"]
2 doris2 ["1967-10-01", "1000-09-13"]
3 doris3 []
5 doris5 ["0001-10-01", NULL, "0000-01-01"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, "2017-10-01", "2023-09-13", "2023-12-31"]
-- !select_base_datetime --
1 doris1 [2017-10-01 00:00:00, 2011-10-01 01:23:59]
2 doris2 [2017-10-01 00:00:00, 2011-10-01 01:23:59]
3 doris3 []
5 doris5 [2017-10-01 00:00:00, NULL, 2017-10-01 00:00:00]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 2017-10-01 00:00:00, 2011-10-01 01:23:59]
-- !select_load_datetime --
1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"]
2 doris2 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"]
3 doris3 []
5 doris5 ["2017-10-01 00:00:00", NULL, "2017-10-01 00:00:00"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"]
-- !select_base_varchar --
1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"]
2 doris2 ["2017-10-01 00:00:00.123", "2011-10-01 01:23:59"]
3 doris3 []
5 doris5 ["2017-10-01 00:00:00.123456", NULL, "2017-10-01 00:00:00.123"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, "null", NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"]
-- !select_load_varchar --
1 doris1 ["2017-10-01 00:00:00", "2011-10-01 01:23:59"]
2 doris2 ["2017-10-01 00:00:00.123", "2011-10-01 01:23:59"]
3 doris3 []
5 doris5 ["2017-10-01 00:00:00.123456", NULL, "2017-10-01 00:00:00.123"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, "null", NULL, "2017-10-01 00:00:00", "2011-10-01 01:23:59"]
-- !select_base_smallint --
1 doris1 [-32768, 32767]
2 doris2 [-1, -1, -2, 0, 3, 99]
3 doris3 []
5 doris5 [-32768, 32767, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -32768, 32767]
-- !select_smallint --
1 doris1 [-32768, 32767]
2 doris2 [-1, -1, -2, 0, 3, 99]
3 doris3 []
5 doris5 [-32768, 32767, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -32768, 32767]
-- !select_base_tinyint --
1 doris1 [-128, 127]
2 doris2 [-1, -1, -2, 0, 3, 99]
3 doris3 []
5 doris5 [-128, 127, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -128, 127]
-- !select_load_tinyint --
1 doris1 [-128, 127]
2 doris2 [-1, -1, -2, 0, 3, 99]
3 doris3 []
5 doris5 [-128, 127, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -128, 127]
-- !select_base_boolean --
1 doris1 [1, 0, 1, 1, 0]
2 doris2 [1, 0, 0, 1, 1]
3 doris3 []
5 doris5 [1, 0, 1]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, 0, 1]
-- !select_load_boolean --
1 doris1 [1, 0, 1, 1, 0]
2 doris2 [1, 0, 0, 1, 1]
3 doris3 []
5 doris5 [1, 0, 1]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, 0, 1]
-- !select_base_bigint --
1 doris1 [-9223372036854775808, 9223372036854775807]
2 doris2 [-14141, -9223372036854775808, 9223372036854775807, 9891912, 3, 99]
3 doris3 []
5 doris5 [-128, 127, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -9223372036854775808, 9223372036854775807]
-- !select_load_bigint --
1 doris1 [-9223372036854775808, 9223372036854775807]
2 doris2 [-14141, -9223372036854775808, 9223372036854775807, 9891912, 3, 99]
3 doris3 []
5 doris5 [-128, 127, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -9223372036854775808, 9223372036854775807]
-- !select_base_largeint --
1 doris1 [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727]
2 doris2 [-1, 170141183460469231731687303715884105727, -2, 0, 3, 99]
3 doris3 []
5 doris5 [-170141183460469231731687303715884105728, 127, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -170141183460469231731687303715884105728, 170141183460469231731687303715884105727]
-- !select_load_largeint --
1 doris1 ["-170141183460469231731687303715884105728", "170141183460469231731687303715884105727"]
2 doris2 ["-1", "170141183460469231731687303715884105727", "-2", "0", "3", "99"]
3 doris3 []
5 doris5 ["-170141183460469231731687303715884105728", "127", "99", "-99"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, "-170141183460469231731687303715884105728", "170141183460469231731687303715884105727"]
-- !select_base_float --
1 doris1 [1.4013e-45, 3.4028235e+38]
2 doris2 [-1.1, 1.2231, 3.4028235e+38, 0, 3, 99.009888]
3 doris3 []
5 doris5 [-12.8, 1.4013e-45, 3.4028235e+38, -9.9]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, 1.4013e-45, 3.4028235e+38]
-- !select_load_float --
1 doris1 [1.4013e-45, 3.4028235e+38]
2 doris2 [-1.1, 1.2231, 3.4028235e+38, 0, 3, 99.009888]
3 doris3 []
5 doris5 [-12.8, 1.4013e-45, 3.4028235e+38, -9.9]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, 1.4013e-45, 3.4028235e+38]
-- !select_base_double --
1 doris1 [4.94065645841247e-324, 1.7976931348623157e+308]
2 doris2 [-1.1, 1.2231, 1.7976931348623157e+308, 0, 3, 99.00989]
3 doris3 []
5 doris5 [-128, 4.94065645841247e-324, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, 4.94065645841247e-324, 1.7976931348623157e+308]
-- !select_load_double --
1 doris1 [4.94065645841247e-324, 1.7976931348623157e+308]
2 doris2 [-1.1, 1.2231, 1.7976931348623157e+308, 0, 3, 99.00989]
3 doris3 []
5 doris5 [-128, 4.94065645841247e-324, 99, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, 4.94065645841247e-324, 1.7976931348623157e+308]
-- !select_base_CHAR --
1 doris1 ["1234567890", "doris12345"]
2 doris2 ["90", "doris1245"]
3 doris3 []
5 doris5 ["doris-123", "doris-123", "doris-124", "doris12378"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, "doris-123", "doris-123"]
-- !select_load_CHAR --
1 doris1 ["1234567890", "doris12345"]
2 doris2 ["90", "doris1245"]
3 doris3 []
5 doris5 ["doris-123", "doris-123", "doris-124", "doris12378"]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, "doris-123", "doris-123"]
-- !select_base_decimal --
1 doris1 [-128, 127]
2 doris2 [-1, -2, -21231, 0, 3, 99]
3 doris3 []
5 doris5 [-13, 1, 9434364, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -13, 13]
-- !select_load_decimal --
1 doris1 [-128, 127]
2 doris2 [-1, -2, -21231, 0, 3, 99]
3 doris3 []
5 doris5 [-13, 1, 9434364, -99]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, -13, 13]

View File

@ -0,0 +1,129 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_base --
1 doris1 {1, "sn1", "sa1"}
2 doris2 {2, "sn2", "sa2"}
3 doris3 {3, "sn3", "sa3"}
4 doris4 \N
5 doris5 {5, NULL, "sa5"}
6 doris6 {NULL, NULL, NULL}
7 \N {NULL, NULL, NULL}
8 \N \N
-- !select_load1 --
1 doris1 {1, "sn1", "sa1"}
2 doris2 {2, "sn2", "sa2"}
3 doris3 {3, "sn3", "sa3"}
4 doris4 \N
5 doris5 {5, NULL, "sa5"}
6 doris6 {NULL, NULL, NULL}
7 \N {NULL, NULL, NULL}
8 \N \N
-- !select_base2 --
1 doris1 {1, "sn1", "sa1"}
2 doris2 {2, "sn2", "sa2"}
3 doris3 {3, "sn3", "sa3"}
5 doris5 {5, NULL, "sa5"}
6 doris6 {NULL, NULL, NULL}
7 \N {NULL, NULL, NULL}
-- !select_load2 --
1 doris1 {1, "sn1", "sa1"}
2 doris2 {2, "sn2", "sa2"}
3 doris3 {3, "sn3", "sa3"}
5 doris5 {5, NULL, "sa5"}
6 doris6 {NULL, NULL, NULL}
7 \N {NULL, NULL, NULL}
-- !select_base3 --
1 doris1 {"a":100, "b":111}
2 doris2 {"a":200, "b":222}
3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":100, "b":NULL}
6 \N \N
7 doris7 \N
-- !select_load3 --
1 doris1 {"a":"100", "b":"111"}
2 doris2 {"a":"200", "b":"222"}
3 doris3 {"a":NULL, "b":"333", "c":"399", "d":"399999999999999"}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":"100", "b":NULL}
6 \N \N
7 doris7 \N
-- !select_base4 --
1 doris1 {"a":100, "b":111}
2 doris2 {"a":200, "b":222}
3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":100, "b":NULL}
-- !select_load4 --
1 doris1 {"a":"100", "b":"111"}
2 doris2 {"a":"200", "b":"222"}
3 doris3 {"a":NULL, "b":"333", "c":"399", "d":"399999999999999"}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":"100", "b":NULL}
-- !select_base5 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
4 doris4 \N
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
8 doris8 \N
-- !select_load5 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
4 doris4 \N
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
8 doris8 \N
-- !select_base6 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
-- !select_load6 --
1 doris1 [9, 99, 999]
2 doris2 [8, 88]
3 doris3 []
5 doris5 [1, NULL, 2]
6 doris6 [NULL, NULL, NULL]
7 doris7 [NULL, NULL, NULL, 1, 2, 999999, 111111]
-- !select_base7 --
1 doris_1 {1, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 1, 1, 1, 1, 1, 1, 1.1, 1.1, "char1_1234", 1}
2 doris_2 {2, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 2, 2, 1, 2, 2, 2, 2.2, 2.2, "char2_1234", 2}
3 doris_3 {3, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 3, 3, 1, 3, 3, 3, 3.3, 3.3, "char3_1234", 3}
4 doris_4 {4, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 4, 4, 1, 4, 4, 4, 4.4, 4.4, "char4_1234", 4}
5 doris_5 {5, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 5, 5, 1, 5, 5, 5, 5.5, 5.5, "char5_1234", 5}
6 doris_6 {6, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 6, 6, 1, 6, 6, 6, 6.6, 6.6, "char6_1234", 6}
7 doris_7 {7, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 7, 7, 1, 7, 7, 7, 7.7, 7.7, "char7_1234", 7}
8 doris_8 {8, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 8, 8, 1, 8, 8, 8, 8.8, 8.8, "char8_1234", 8}
9 doris_9 {9, 2017-10-01, 2017-10-01 00:00:00, "Beijing", 9, 9, 1, 9, 9, 9, 9.9, 9.9, "char9_1234", 9}
10 doris_10 {10, 2017-10-01, 2017-10-01 00:00:00, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
-- !select_load7 --
1 doris_1 {1, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 1, 1, 1, 1, 1, "1", 1.1, 1.1, "char1_1234", 1}
2 doris_2 {2, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 2, 2, 1, 2, 2, "2", 2.2, 2.2, "char2_1234", 2}
3 doris_3 {3, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 3, 3, 1, 3, 3, "3", 3.3, 3.3, "char3_1234", 3}
4 doris_4 {4, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 4, 4, 1, 4, 4, "4", 4.4, 4.4, "char4_1234", 4}
5 doris_5 {5, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 5, 5, 1, 5, 5, "5", 5.5, 5.5, "char5_1234", 5}
6 doris_6 {6, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 6, 6, 1, 6, 6, "6", 6.6, 6.6, "char6_1234", 6}
7 doris_7 {7, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 7, 7, 1, 7, 7, "7", 7.7, 7.7, "char7_1234", 7}
8 doris_8 {8, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 8, 8, 1, 8, 8, "8", 8.8, 8.8, "char8_1234", 8}
9 doris_9 {9, "2017-10-01", "2017-10-01 00:00:00", "Beijing", 9, 9, 1, 9, 9, "9", 9.9, 9.9, "char9_1234", 9}
10 doris_10 {10, "2017-10-01", "2017-10-01 00:00:00", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}

View File

@ -0,0 +1,397 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_base1 --
1 doris1 {"a":100, "b":111}
2 doris2 {"a":200, "b":222}
3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":100, "b":NULL}
6 \N \N
7 doris7 \N
-- !select_load1 --
1 doris1 {"a":"100", "b":"111"}
2 doris2 {"a":"200", "b":"222"}
3 doris3 {"a":NULL, "b":"333", "c":"399", "d":"399999999999999"}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":"100", "b":NULL}
6 \N \N
7 doris7 \N
-- !select_base2 --
1 doris1 {100:"null", 111:"b"}
2 doris2 {200:"a", 222:"b"}
3 doris3 {111:"a", 333:"b", 399:"c", 399999999999999:"d"}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:"100", 111:"b"}
6 \N \N
7 doris7 \N
8 doris8 {-170141183460469231731687303715884105728:"min_largeint", 170141183460469231731687303715884105727:"max_largeint"}
9 doris9 {-170141183460469231731687303715884105728:"min_largeint", 111:"b"}
10 doris10 {200:"a", 170141183460469231731687303715884105727:"max_largeint", 111:"b"}
-- !select_load2 --
1 doris1 {"100":"null", "111":"b"}
2 doris2 {"200":"a", "222":"b"}
3 doris3 {"111":"a", "333":"b", "399":"c", "399999999999999":"d"}
4 doris4 {"111":NULL, "111":NULL}
5 doris5 {"111":"100", "111":"b"}
6 \N \N
7 doris7 \N
8 doris8 {"-170141183460469231731687303715884105728":"min_largeint", "170141183460469231731687303715884105727":"max_largeint"}
9 doris9 {"-170141183460469231731687303715884105728":"min_largeint", "111":"b"}
10 doris10 {"200":"a", "170141183460469231731687303715884105727":"max_largeint", "111":"b"}
-- !select_base3 --
1 doris1 {100:0.12300, 111:1.23450}
2 doris2 {200:8738931.12312, 222:999.99900}
3 doris3 {111:1111034.12300, 333:7771.12310, 399:0.44124, 39999:0.44124}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:1111034.12300, 111:8738931.12312}
6 \N \N
7 doris7 \N
8 doris8 {-2147483648:1.23450, 2147483647:999.99900}
9 doris9 {-2147483648:1111034.12300}
10 doris10 {2147483647:123456789.12345}
-- !select_load3 --
1 doris1 {100:0.12300, 111:1.23450}
2 doris2 {200:8738931.12312, 222:999.99900}
3 doris3 {111:1111034.12300, 333:7771.12310, 399:0.44124, 39999:0.44124}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:1111034.12300, 111:8738931.12312}
6 \N \N
7 doris7 \N
8 doris8 {-2147483648:1.23450, 2147483647:999.99900}
9 doris9 {-2147483648:1111034.12300}
10 doris10 {2147483647:123456789.12345}
-- !select_base4 --
1 doris1 {100:4.94065645841247e-324, 111:1.7976931348623157e+308}
2 doris2 {200:123.123, 222:0.9999999}
3 doris3 {111:187.123, 333:555.6767, 399:129312.113, 3999:123.12314}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:187.123, 111:187.123}
6 \N \N
7 doris7 \N
8 doris8 {-2147483648:4.94065645841247e-324, 2147483647:1.7976931348623157e+308}
9 doris9 {2147483647:4.94065645841247e-324, -2147483648:1.7976931348623157e+308}
-- !select_load4 --
1 doris1 {100:4.94065645841247e-324, 111:1.7976931348623157e+308}
2 doris2 {200:123.123, 222:0.9999999}
3 doris3 {111:187.123, 333:555.6767, 399:129312.113, 3999:123.12314}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:187.123, 111:187.123}
6 \N \N
7 doris7 \N
8 doris8 {-2147483648:4.94065645841247e-324, 2147483647:1.7976931348623157e+308}
9 doris9 {2147483647:4.94065645841247e-324, -2147483648:1.7976931348623157e+308}
-- !select_base5 --
1 doris1 {"k1":0.12300, "111":1.23450}
2 doris2 {"200":8738931.12312, "doris":999.99900}
3 doris3 {"null":1111034.12300, "333":7771.12310, "399":0.44124, "3999999999":0.44124}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":1111034.12300, "null":8738931.12312}
6 \N \N
7 doris7 \N
8 doris8 {"-2147483648":1.23450, "2147483647":999.99900}
9 doris9 {"-2147483648":1111034.12300}
10 doris10 {"2147483647":123456789.12345}
-- !select_load5 --
1 doris1 {"k1":0.12300, "111":1.23450}
2 doris2 {"200":8738931.12312, "doris":999.99900}
3 doris3 {"null":1111034.12300, "333":7771.12310, "399":0.44124, "3999999999":0.44124}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":1111034.12300, "null":8738931.12312}
6 \N \N
7 doris7 \N
8 doris8 {"-2147483648":1.23450, "2147483647":999.99900}
9 doris9 {"-2147483648":1111034.12300}
10 doris10 {"2147483647":123456789.12345}
-- !select_base6 --
1 doris1 {"100":4.94065645841247e-324, "doris":1.7976931348623157e+308}
2 doris2 {"nereids":123.123, "222":0.9999999}
3 doris3 {"null":187.123, "333":555.6767, "399":129312.113, "39999999999":123.12314}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":187.123, "null":187.123}
6 \N \N
7 doris7 \N
8 doris8 {"-2147483648":4.94065645841247e-324, "2147483647":1.7976931348623157e+308}
9 doris9 {"2147483647":4.94065645841247e-324, "-2147483648":1.7976931348623157e+308}
-- !select_load6 --
1 doris1 {"100":4.94065645841247e-324, "doris":1.7976931348623157e+308}
2 doris2 {"nereids":123.123, "222":0.9999999}
3 doris3 {"null":187.123, "333":555.6767, "399":129312.113, "39999999999":123.12314}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":187.123, "null":187.123}
6 \N \N
7 doris7 \N
8 doris8 {"-2147483648":4.94065645841247e-324, "2147483647":1.7976931348623157e+308}
9 doris9 {"2147483647":4.94065645841247e-324, "-2147483648":1.7976931348623157e+308}
-- !select_base7 --
1 doris1 {"a":100, "b":111}
2 doris2 {"a":200, "b":222}
3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":100, "b":NULL}
6 \N \N
7 doris7 \N
8 doris8 {"max_bigint":9223372036854775807, "min_bigint":-9223372036854775808}
-- !select_load7 --
1 doris1 {"a":100, "b":111}
2 doris2 {"a":200, "b":222}
3 doris3 {"a":NULL, "b":333, "c":399, "d":399999999999999}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":100, "b":NULL}
6 \N \N
7 doris7 \N
8 doris8 {"max_bigint":9223372036854775807, "min_bigint":-9223372036854775808}
-- !select_base8 --
1 doris1 {"a":1, "b":0}
2 doris2 {"a":0, "b":0}
3 doris3 {"a":1, "b":NULL, "c":1, "d":0}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":0, "b":1}
6 \N \N
7 doris7 \N
8 doris8 {"true":1, "false":0}
-- !select_load8 --
1 doris1 {"a":1, "b":0}
2 doris2 {"a":0, "b":0}
3 doris3 {"a":1, "b":NULL, "c":1, "d":0}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":0, "b":1}
6 \N \N
7 doris7 \N
8 doris8 {"true":1, "false":0}
-- !select_base9 --
1 doris1 {100:1, 111:1}
2 doris2 {200:0, 222:0}
3 doris3 {111:1, 333:0, 399:0, 3999:1}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:1, 111:1}
6 \N \N
7 doris7 \N
8 doris8 {-2147483648:0, 2147483647:0}
9 doris9 {2147483647:1, -2147483648:1}
-- !select_load9 --
1 doris1 {100:1, 111:1}
2 doris2 {200:0, 222:0}
3 doris3 {111:1, 333:0, 399:0, 3999:1}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:1, 111:1}
6 \N \N
7 doris7 \N
8 doris8 {-2147483648:0, 2147483647:0}
9 doris9 {2147483647:1, -2147483648:1}
-- !select_base10 --
1 doris1 {2023-04-20 01:02:03:"null", 2018-04-20 10:40:35:"b"}
2 doris2 {2000-04-20 00:00:00:"a", 1967-12-31 12:24:56:"b"}
3 doris3 {2023-01-01 00:00:00:"b", 2023-02-27 00:01:02:"d"}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {2025-12-31 12:01:41:"min_largeint", 2006-02-19 09:01:02:"max_largeint"}
9 doris9 {0209-04-20 00:00:00:"min_largeint", 0102-03-21 00:00:00:"b"}
10 doris10 {2003-04-29 01:02:03:"a", 2006-02-22 02:01:04:"max_largeint", 2020-03-21 19:21:23:"b"}
-- !select_load10 --
1 doris1 {"2023-04-20 01:02:03":"null", "2018-04-20 10:40:35":"b"}
2 doris2 {"2000-04-20 00:00:00":"a", "1967-12-31 12:24:56":"b"}
3 doris3 {"2023-01-01 00:00:00":"b", "2023-02-27 00:01:02":"d"}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {"2025-12-31 12:01:41":"min_largeint", "2006-02-19 09:01:02":"max_largeint"}
9 doris9 {"0209-04-20 00:00:00":"min_largeint", "0102-03-21 00:00:00":"b"}
10 doris10 {"2003-04-29 01:02:03":"a", "2006-02-22 02:01:04":"max_largeint", "2020-03-21 19:21:23":"b"}
-- !select_base11 --
1 doris1 {2023-04-20 01:02:03:NULL, 2018-04-20 10:40:35:123}
2 doris2 {2000-04-20 00:00:00:-2147483648, 1967-12-31 12:24:56:2147483647}
3 doris3 {2023-01-01 00:00:00:1246, 2023-02-27 00:01:02:5646}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {2025-12-31 12:01:41:524524, 2006-02-19 09:01:02:2534}
-- !select_load11 --
1 doris1 {"2023-04-20 01:02:03":NULL, "2018-04-20 10:40:35":123}
2 doris2 {"2000-04-20 00:00:00":-2147483648, "1967-12-31 12:24:56":2147483647}
3 doris3 {"2023-01-01 00:00:00":1246, "2023-02-27 00:01:02":5646}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {"2025-12-31 12:01:41":524524, "2006-02-19 09:01:02":2534}
-- !select_base12 --
1 doris1 {2023-04-20:NULL, 2018-04-20:123}
2 doris2 {2000-04-20:-2147483648, 1967-12-31:2147483647}
3 doris3 {2023-01-01:1246, 2023-02-27:5646}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {2025-12-31:524524, 2006-02-19:2534}
-- !select_load12 --
1 doris1 {"2023-04-20":NULL, "2018-04-20":123}
2 doris2 {"2000-04-20":-2147483648, "1967-12-31":2147483647}
3 doris3 {"2023-01-01":1246, "2023-02-27":5646}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {"2025-12-31":524524, "2006-02-19":2534}
-- !select_base13 --
1 doris1 {2023-04-20:"null", 2018-04-20:NULL}
2 doris2 {2000-04-20:"-2147483648", 1967-12-31:"2147483647"}
3 doris3 {2023-01-01:"1246", 2023-02-27:"5646"}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {2025-12-31:"min_largeint", 2006-02-19:"max_largeint"}
-- !select_load13 --
1 doris1 {"2023-04-20":"null", "2018-04-20":NULL}
2 doris2 {"2000-04-20":"-2147483648", "1967-12-31":"2147483647"}
3 doris3 {"2023-01-01":"1246", "2023-02-27":"5646"}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {"2025-12-31":"min_largeint", "2006-02-19":"max_largeint"}
-- !select_base14 --
1 doris1 {2023-04-20 12:20:03:"null", 2018-04-20 12:59:59:NULL}
2 doris2 {2000-04-20 23:59:59:"-2147483648", 1967-12-31 00:00:00:"2147483647"}
3 doris3 {2023-01-01 07:24:54:"1246", 2023-02-27 15:12:13:"5646"}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {2025-12-31 11:22:33:"min_largeint", 2006-02-19 00:44:55:"max_largeint"}
-- !select_load14 --
1 doris1 {"2023-04-20 12:20:03":"null", "2018-04-20 12:59:59":NULL}
2 doris2 {"2000-04-20 23:59:59":"-2147483648", "1967-12-31 00:00:00":"2147483647"}
3 doris3 {"2023-01-01 07:24:54":"1246", "2023-02-27 15:12:13":"5646"}
4 doris4 {}
5 doris5 {}
6 \N \N
7 doris7 \N
8 doris8 {"2025-12-31 11:22:33":"min_largeint", "2006-02-19 00:44:55":"max_largeint"}
-- !select_base15 --
1 doris1 {100:"null", 111:"b"}
2 doris2 {200:"a", 222:"b"}
3 doris3 {111:"a", 333:"b", 399:"c", 399999999999999:"d"}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:"100", 111:"b"}
6 \N \N
7 doris7 \N
8 doris8 {-9223372036854775808:"min_bigint", 9223372036854775807:"max_bigint"}
9 doris9 {9223372036854775807:"min_bigint", 111:"b"}
10 doris10 {200:"a", 9223372036854775807:"max_bigint", 111:"b"}
-- !select_load15 --
1 doris1 {100:"null", 111:"b"}
2 doris2 {200:"a", 222:"b"}
3 doris3 {111:"a", 333:"b", 399:"c", 399999999999999:"d"}
4 doris4 {111:NULL, 111:NULL}
5 doris5 {111:"100", 111:"b"}
6 \N \N
7 doris7 \N
8 doris8 {-9223372036854775808:"min_bigint", 9223372036854775807:"max_bigint"}
9 doris9 {9223372036854775807:"min_bigint", 111:"b"}
10 doris10 {200:"a", 9223372036854775807:"max_bigint", 111:"b"}
-- !select_base16 --
1 doris1 {1:"null", 0:"b"}
2 doris2 {1:"a", 1:"b"}
3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"}
4 doris4 {1:NULL, 1:NULL}
5 doris5 {1:"100", 1:"b"}
6 \N \N
7 doris7 \N
8 doris8 {0:"min_bigint", 0:"max_bigint"}
9 doris9 {1:"min_bigint", 0:"b"}
10 doris10 {0:"a", 1:"max_bigint", 1:"b"}
-- !select_load16 --
1 doris1 {1:"null", 0:"b"}
2 doris2 {1:"a", 1:"b"}
3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"}
4 doris4 {1:NULL, 1:NULL}
5 doris5 {1:"100", 1:"b"}
6 \N \N
7 doris7 \N
8 doris8 {0:"min_bigint", 0:"max_bigint"}
9 doris9 {1:"min_bigint", 0:"b"}
10 doris10 {0:"a", 1:"max_bigint", 1:"b"}
-- !select_base17 --
1 doris1 {1:"xxx", 0:"b"}
2 doris2 {1:"a", 1:"b"}
3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"}
4 doris4 {1:NULL, 1:NULL}
5 doris5 {1:"100", 1:"b"}
6 \N \N
7 doris7 \N
8 doris8 {0:"min_bigint", 0:"max_bigint"}
9 doris9 {1:"min_bigint", 0:"b"}
10 doris10 {0:"a", 1:"max_bigint", 1:"b"}
-- !select_load17 --
1 doris1 {1:"xxx", 0:"b"}
2 doris2 {1:"a", 1:"b"}
3 doris3 {1:"a", 1:"b", 0:"c", 0:"d"}
4 doris4 {1:NULL, 1:NULL}
5 doris5 {1:"100", 1:"b"}
6 \N \N
7 doris7 \N
8 doris8 {0:"min_bigint", 0:"max_bigint"}
9 doris9 {1:"min_bigint", 0:"b"}
10 doris10 {0:"a", 1:"max_bigint", 1:"b"}
-- !select_base18 --
1 doris1 {"doris":"null", "nereids":"b"}
2 doris2 {"ftw":"a", "cyx":"b"}
3 doris3 {"null":"a", "333":"b", "399":"c", "399999999999999":"d"}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":"100", "null":"b"}
6 \N \N
7 doris7 \N
8 doris8 {"170141183460469231731687303715884105728":"min_largeint", "170141183460469231731687303715884105727":"max_largeint"}
9 doris9 {"170141183460469231731687303715884105728":"min_largeint", "111":"b"}
10 doris10 {"200":"a", "170141183460469231731687303715884105727":"max_largeint", "111":"b"}
-- !select_load18 --
1 doris1 {"doris":"null", "nereids":"b"}
2 doris2 {"ftw":"a", "cyx":"b"}
3 doris3 {"null":"a", "333":"b", "399":"c", "399999999999999":"d"}
4 doris4 {"null":NULL, "null":NULL}
5 doris5 {"null":"100", "null":"b"}
6 \N \N
7 doris7 \N
8 doris8 {"170141183460469231731687303715884105728":"min_largeint", "170141183460469231731687303715884105727":"max_largeint"}
9 doris9 {"170141183460469231731687303715884105728":"min_largeint", "111":"b"}
10 doris10 {"200":"a", "170141183460469231731687303715884105727":"max_largeint", "111":"b"}

View File

@ -0,0 +1,532 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import org.codehaus.groovy.runtime.IOGroovyMethods
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
suite("test_outfile_parquet_array_type", "p0") {
// open nereids
sql """ set enable_nereids_planner=true """
sql """ set enable_fallback_to_original_planner=false """
String ak = getS3AK()
String sk = getS3SK()
String s3_endpoint = getS3Endpoint()
String region = getS3Region()
String bucket = context.config.otherConfigs.get("s3BucketName");
def export_table_name = "outfile_parquet_array_export_test"
def load_table_name = "outfile_parquet_array_type_load_test"
def outFilePath = "${bucket}/outfile/parquet/complex_type/exp_"
def create_table = {table_name, struct_field ->
sql """ DROP TABLE IF EXISTS ${table_name} """
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
`user_id` LARGEINT NOT NULL COMMENT "用户id",
`name` STRING COMMENT "用户年龄",
${struct_field}
)
DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
"""
}
def outfile_to_S3 = {
// select ... into outfile ...
def res = sql """
SELECT * FROM ${export_table_name} t ORDER BY user_id
INTO OUTFILE "s3://${outFilePath}"
FORMAT AS parquet
PROPERTIES (
"s3.endpoint" = "${s3_endpoint}",
"s3.region" = "${region}",
"s3.secret_key"="${sk}",
"s3.access_key" = "${ak}"
);
"""
return res[0][3]
}
// 1. test NULL ARRAY
try {
def struct_field_define = "`a_info` ARRAY<int> NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (4, 'doris4', null); """
sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """
sql """ insert into ${export_table_name} values (8, 'doris8', null); """
// test base data
qt_select_base1 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load1 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 2. test NOT NULL ARRAY
try {
def struct_field_define = "`a_info` ARRAY<int> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """
// test base data
qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load2 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 3. test NULL ARRAY of date
try {
def struct_field_define = "`a_info` ARRAY<date> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', ['2017-10-01', '2023-09-13', '2023-12-31']), (2, 'doris2', ['1967-10-01', '1000-09-13']); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', ['0001-10-01', null, '0000-01-01']); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, '2017-10-01', '2023-09-13', '2023-12-31']); """
// test base data
qt_select_base_date """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_date """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 4. test NULL ARRAY of datetime
try {
def struct_field_define = "`a_info` ARRAY<datetime> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', ['2017-10-01 00:00:00', '2011-10-01 01:23:59']), (2, 'doris2', ['2017-10-01 00:00:00', '2011-10-01 01:23:59']); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', ['2017-10-01 00:00:00', null, '2017-10-01 00:00:00']); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, '2017-10-01 00:00:00', '2011-10-01 01:23:59']); """
// test base data
qt_select_base_datetime """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_datetime """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 5. test NULL ARRAY of VARCHAR(40)
try {
def struct_field_define = "`a_info` ARRAY<VARCHAR(40)> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', ['2017-10-01 00:00:00', '2011-10-01 01:23:59']), (2, 'doris2', ['2017-10-01 00:00:00.123', '2011-10-01 01:23:59']); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', ['2017-10-01 00:00:00.123456', null, '2017-10-01 00:00:00.123']); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, 'null', null, '2017-10-01 00:00:00', '2011-10-01 01:23:59']); """
// test base data
qt_select_base_varchar """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_varchar """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 7. test NULL ARRAY of SMALLINT
try {
def struct_field_define = "`a_info` ARRAY<SMALLINT> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [-32768, 32767]), (2, 'doris2', [-1, -1, -2, 0 ,3, 99]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-32768, 32767, 99, -99]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, -32768, 32767]); """
// test base data
qt_select_base_smallint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_smallint """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 8. test NULL ARRAY of TINYINT
try {
def struct_field_define = "`a_info` ARRAY<TINYINT> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [-128, 127]), (2, 'doris2', [-1, -1, -2, 0 ,3, 99]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-128, 127, 99, -99]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, -128, 127]); """
// test base data
qt_select_base_tinyint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_tinyint """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 9. test NULL ARRAY of boolean
try {
def struct_field_define = "`a_info` ARRAY<boolean> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [true, false, true, true, false]), (2, 'doris2', [1, 0, false, true, 99]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [true, false, true]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, false, true]); """
// test base data
qt_select_base_boolean """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_boolean """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 10. test NULL ARRAY of bigint
try {
def struct_field_define = "`a_info` ARRAY<bigint> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [-9223372036854775808, 9223372036854775807]), (2, 'doris2', [-14141, -9223372036854775808, 9223372036854775807, 9891912 ,3, 99]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-128, 127, 99, -99]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, -9223372036854775808, 9223372036854775807]); """
// test base data
qt_select_base_bigint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_bigint """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 11. test NULL ARRAY of largeint
try {
def struct_field_define = "`a_info` ARRAY<largeint> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727]), (2, 'doris2', [-1, 170141183460469231731687303715884105727, -2, 0 ,3, 99]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-170141183460469231731687303715884105728, 127, 99, -99]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, -170141183460469231731687303715884105728, 170141183460469231731687303715884105727]); """
// test base data
qt_select_base_largeint """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_largeint """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 12. test NULL ARRAY of float
try {
def struct_field_define = "`a_info` ARRAY<float> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [${Float.MIN_VALUE}, ${Float.MAX_VALUE}]), (2, 'doris2', [-1.1, 1.2231, ${Float.MAX_VALUE}, 0 ,3, 99.00989]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-12.8, ${Float.MIN_VALUE}, ${Float.MAX_VALUE}, -9.9]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, ${Float.MIN_VALUE}, ${Float.MAX_VALUE}]); """
// test base data
qt_select_base_float """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_float """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 13. test NULL ARRAY of double
try {
def struct_field_define = "`a_info` ARRAY<double> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [${Double.MIN_VALUE}, ${Double.MAX_VALUE}]), (2, 'doris2', [-1.1, 1.2231, ${Double.MAX_VALUE}, 0 ,3, 99.00989]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-128, ${Double.MIN_VALUE}, 99, -99]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, ${Double.MIN_VALUE}, ${Double.MAX_VALUE}]); """
// test base data
qt_select_base_double """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_double """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 14. test NULL ARRAY of CHAR(10)
try {
def struct_field_define = "`a_info` ARRAY<CHAR(10)> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', ['1234567890', 'doris12345']), (2, 'doris2', ['90', 'doris1245']); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', ['doris-123', 'doris-123', 'doris-124', 'doris12378']); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, 'doris-123', 'doris-123']); """
// test base data
qt_select_base_CHAR """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_CHAR """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 15. test NULL ARRAY of decimal
try {
def struct_field_define = "`a_info` ARRAY<decimal> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [-128.1234567, 127.123456789]), (2, 'doris2', [-1.2, -1.933445, -21231.12, 0.0909 ,3, 99]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [-12.8, 1.27, 9434364.12319, -99.12314]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, -12.8, 12.7]); """
// test base data
qt_select_base_decimal """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load_decimal """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
}

View File

@ -0,0 +1,313 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import org.codehaus.groovy.runtime.IOGroovyMethods
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
suite("test_outfile_parquet_complex_type", "p0") {
// open nereids
sql """ set enable_nereids_planner=true """
sql """ set enable_fallback_to_original_planner=false """
String ak = getS3AK()
String sk = getS3SK()
String s3_endpoint = getS3Endpoint()
String region = getS3Region()
String bucket = context.config.otherConfigs.get("s3BucketName");
def export_table_name = "outfile_parquet_complex_type_export_test"
def load_table_name = "outfile_parquet_complex_type_load_test"
def outFilePath = "${bucket}/outfile/parquet/complex_type/exp_"
def create_table = {table_name, struct_field ->
sql """ DROP TABLE IF EXISTS ${table_name} """
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
`user_id` LARGEINT NOT NULL COMMENT "用户id",
`name` STRING COMMENT "用户年龄",
${struct_field}
)
DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
"""
}
def outfile_to_S3 = {
// select ... into outfile ...
def res = sql """
SELECT * FROM ${export_table_name} t ORDER BY user_id
INTO OUTFILE "s3://${outFilePath}"
FORMAT AS parquet
PROPERTIES (
"s3.endpoint" = "${s3_endpoint}",
"s3.region" = "${region}",
"s3.secret_key"="${sk}",
"s3.access_key" = "${ak}"
);
"""
return res[0][3]
}
// 1. struct NULL type
try {
def struct_field_define = "`s_info` STRUCT<s_id:int(11), s_name:string, s_address:string> NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {1, 'sn1', 'sa1'}); """
sql """ insert into ${export_table_name} values (2, 'doris2', struct(2, 'sn2', 'sa2')); """
sql """ insert into ${export_table_name} values (3, 'doris3', named_struct('s_id', 3, 's_name', 'sn3', 's_address', 'sa3')); """
sql """ insert into ${export_table_name} values (4, 'doris4', null); """
sql """ insert into ${export_table_name} values (5, 'doris5', struct(5, null, 'sa5')); """
sql """ insert into ${export_table_name} values (6, 'doris6', struct(null, null, null)); """
sql """ insert into ${export_table_name} values (7, null, struct(null, null, null)); """
sql """ insert into ${export_table_name} values (8, null, null); """
// test base data
qt_select_base """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
// test outfile to s3
def outfile_url = outfile_to_S3()
qt_select_load1 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 2. struct NOT NULL type
try {
def struct_field_define = "`s_info` STRUCT<s_id:int(11), s_name:string, s_address:string> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {1, 'sn1', 'sa1'}); """
sql """ insert into ${export_table_name} values (2, 'doris2', struct(2, 'sn2', 'sa2')); """
sql """ insert into ${export_table_name} values (3, 'doris3', named_struct('s_id', 3, 's_name', 'sn3', 's_address', 'sa3')); """
sql """ insert into ${export_table_name} values (5, 'doris5', struct(5, null, 'sa5')); """
sql """ insert into ${export_table_name} values (6, 'doris6', struct(null, null, null)); """
sql """ insert into ${export_table_name} values (7, null, struct(null, null, null)); """
// test base data
qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load2 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 3. test NULL Map
try {
def struct_field_define = "`m_info` Map<STRING, LARGEINT> NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
// test base data
qt_select_base3 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load3 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 4. test NOT NULL Map
try {
def struct_field_define = "`m_info` Map<STRING, LARGEINT> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """
// test base data
qt_select_base4 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load4 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 5. test NULL ARRAY
try {
def struct_field_define = "`a_info` ARRAY<int> NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (4, 'doris4', null); """
sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """
sql """ insert into ${export_table_name} values (8, 'doris8', null); """
// test base data
qt_select_base5 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load5 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 6. test NOT NULL ARRAY
try {
def struct_field_define = "`a_info` ARRAY<int> NOT NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', [9, 99, 999]), (2, 'doris2', [8, 88]); """
sql """ insert into ${export_table_name} values (3, 'doris3', []); """
sql """ insert into ${export_table_name} values (5, 'doris5', [1, null, 2]); """
sql """ insert into ${export_table_name} values (6, 'doris6', [null, null, null]); """
sql """ insert into ${export_table_name} values (7, 'doris7', [null, null, null, 1, 2, 999999, 111111]); """
// test base data
qt_select_base6 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load6 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 7. test struct with all type
try {
def struct_field_define = "`s_info` STRUCT<user_id:INT, date:DATE, datetime:DATETIME, city:VARCHAR(20), age:SMALLINT, sex:TINYINT, bool_col:BOOLEAN, int_col:INT, bigint_col:BIGINT, largeint_col:LARGEINT, float_col:FLOAT, double_col:DOUBLE, char_col:CHAR(10), decimal_col:DECIMAL> NULL"
// create table to export data
create_table(export_table_name, struct_field_define)
// create table to load data
create_table(load_table_name, struct_field_define)
// insert data
StringBuilder sb = new StringBuilder()
int i = 1
for (; i < 10; i ++) {
sb.append("""
(${i}, 'doris_${i}', {${i}, '2017-10-01', '2017-10-01 00:00:00', 'Beijing', ${i}, ${i % 128}, true, ${i}, ${i}, ${i}, ${i}.${i}, ${i}.${i}, 'char${i}_1234', ${i}}),
""")
}
sb.append("""
(${i}, 'doris_${i}', {${i}, '2017-10-01', '2017-10-01 00:00:00', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL})
""")
sql """ INSERT INTO ${export_table_name} VALUES ${sb.toString()} """
// test base data
qt_select_base7 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
// test outfile to s3
def outfile_url = outfile_to_S3()
qt_select_load7 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
}

View File

@ -0,0 +1,714 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import org.codehaus.groovy.runtime.IOGroovyMethods
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
suite("test_outfile_parquet_map_type", "p0") {
// open nereids
sql """ set enable_nereids_planner=true """
sql """ set enable_fallback_to_original_planner=false """
String ak = getS3AK()
String sk = getS3SK()
String s3_endpoint = getS3Endpoint()
String region = getS3Region()
String bucket = context.config.otherConfigs.get("s3BucketName");
def export_table_name = "outfile_parquet_map_type_export_test"
def load_table_name = "outfile_parquet_map_type_load_test"
def outFilePath = "${bucket}/outfile/parquet/map_type/exp_"
def create_table = {table_name, map_field ->
sql """ DROP TABLE IF EXISTS ${table_name} """
sql """
CREATE TABLE IF NOT EXISTS ${table_name} (
`user_id` LARGEINT NOT NULL COMMENT "用户id",
`name` STRING COMMENT "用户年龄",
${map_field}
)
DISTRIBUTED BY HASH(user_id)
PROPERTIES("replication_num" = "1");
"""
}
def outfile_to_S3 = {
// select ... into outfile ...
def res = sql """
SELECT * FROM ${export_table_name} t ORDER BY user_id
INTO OUTFILE "s3://${outFilePath}"
FORMAT AS parquet
PROPERTIES (
"s3.endpoint" = "${s3_endpoint}",
"s3.region" = "${region}",
"s3.secret_key"="${sk}",
"s3.access_key" = "${ak}"
);
"""
return res[0][3]
}
// 1. test map<STRING, LARGEINT> NULL
try {
def map_field_define = "`m_info` Map<STRING, LARGEINT> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
// test base data
qt_select_base1 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load1 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 2. test map<LARGEINT, STRING> NULL
try {
def map_field_define = "`m_info` Map<LARGEINT, STRING> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {100: 'null', 111:'b'}), (2, 'doris2', {200:'a', 222:'b'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {111: 'a', 333:'b', 399:'c', 399999999999999:'d'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {111: '100', 111:'b'}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {-170141183460469231731687303715884105728: 'min_largeint', 170141183460469231731687303715884105727: 'max_largeint'}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {-170141183460469231731687303715884105728: 'min_largeint', 111:'b'}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {200:'a', 170141183460469231731687303715884105727: 'max_largeint', 111:'b'}); """
// test base data
qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load2 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 3. test map<INT, DECIMAL(15,5)> NULL
try {
def map_field_define = "`m_info` Map<INT, DECIMAL(15,5)> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {100: 0.123, 111:1.2345}), (2, 'doris2', {200:8738931.12312, 222:999.999}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {111: 1111034.123, 333:7771.1231, 399:0.441241, 39999:0.441241}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {111: 1111034.123, 111:8738931.12312}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {${Integer.MIN_VALUE}: 1.2345, ${Integer.MAX_VALUE}: 999.999}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {${Integer.MIN_VALUE}: 1111034.123}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {${Integer.MAX_VALUE}: 123456789.12345}); """
// test base data
qt_select_base3 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load3 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 4. test map<INT, DOUBLE> NULL
try {
def map_field_define = "`m_info` Map<INT, DOUBLE> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {100: ${Double.MIN_VALUE}, 111:${Double.MAX_VALUE}}), (2, 'doris2', {200: 123.123, 222:0.9999999}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {111: 187.123, 333:555.6767, 399:129312.113, 3999:123.12314}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {111: 187.123, 111:187.123}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {${Integer.MIN_VALUE}: ${Double.MIN_VALUE}, ${Integer.MAX_VALUE}: ${Double.MAX_VALUE}}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {${Integer.MAX_VALUE}: ${Double.MIN_VALUE}, ${Integer.MIN_VALUE}: ${Double.MAX_VALUE}}); """
// test base data
qt_select_base4 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load4 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 5. test map<STRING, DECIMAL(15,5)> NULL
try {
def map_field_define = "`m_info` Map<STRING, DECIMAL(15,5)> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'k1': 0.123, '111':1.2345}), (2, 'doris2', {'200':8738931.12312, 'doris':999.999}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'null': 1111034.123, '333':7771.1231, '399':0.441241, '3999999999':0.441241}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 1111034.123, 'null':8738931.12312}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'${Integer.MIN_VALUE}': 1.2345, '${Integer.MAX_VALUE}': 999.999}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {'${Integer.MIN_VALUE}': 1111034.123}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {'${Integer.MAX_VALUE}': 123456789.12345}); """
// test base data
qt_select_base5 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load5 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 6. test map<STRING, DOUBLE> NULL
try {
def map_field_define = "`m_info` Map<STRING, DOUBLE> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'100': ${Double.MIN_VALUE}, 'doris':${Double.MAX_VALUE}}), (2, 'doris2', {'nereids': 123.123, '222':0.9999999}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'null': 187.123, '333':555.6767, '399':129312.113, '39999999999':123.12314}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 187.123, 'null':187.123}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'${Integer.MIN_VALUE}': ${Double.MIN_VALUE}, '${Integer.MAX_VALUE}': ${Double.MAX_VALUE}}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {'${Integer.MAX_VALUE}': ${Double.MIN_VALUE}, '${Integer.MIN_VALUE}': ${Double.MAX_VALUE}}); """
// test base data
qt_select_base6 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load6 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 7. test map<STRING, BIGINT> NULL
try {
def map_field_define = "`m_info` Map<STRING, BIGINT> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'a': 100, 'b': 111}), (2, 'doris2', {'a': 200, 'b': 222}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'a': null, 'b': 333, 'c':399, 'd':399999999999999}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': 100, 'b': null}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'max_bigint': ${Long.MAX_VALUE}, 'min_bigint': ${Long.MIN_VALUE}}); """
// test base data
qt_select_base7 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load7 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 8. test map<STRING, BOOLEAN> NULL
try {
def map_field_define = "`m_info` Map<STRING, BOOLEAN> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'a': true, 'b': false}), (2, 'doris2', {'a': false, 'b': false}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'a': true, 'b': null, 'c':399, 'd':false}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': false, 'b': true}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'true': true, 'false': false}); """
// test base data
qt_select_base8 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load8 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 9. test map<INT, BOOLEAN> NULL
try {
def map_field_define = "`m_info` Map<INT, BOOLEAN> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {100: true, 111:true}), (2, 'doris2', {200: false, 222:false}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {111: true, 333:false, 399:false, 3999:true}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {111: true, 111:true}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {${Integer.MIN_VALUE}: false, ${Integer.MAX_VALUE}: false}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {${Integer.MAX_VALUE}: true, ${Integer.MIN_VALUE}: true}); """
// test base data
qt_select_base9 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load9 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 10. test map<DATETIME, STRING> NULL
try {
def map_field_define = "`m_info` Map<DATETIME, STRING> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20 01:02:03': 'null', '2018-04-20 10:40:35':'b'}), (2, 'doris2', {'2000-04-20 00:00:00':'a', '1967-12-31 12:24:56':'b'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01 00:00:00':'b', '2023-02-27 00:01:02':'d'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31 12:01:41': 'min_largeint', '2006-02-19 09:01:02': 'max_largeint'}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {'209-04-20 00:00:00': 'min_largeint', '102-03-21 00:00:00':'b'}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {'2003-04-29 01:02:03':'a', '2006-02-22 02:01:04': 'max_largeint', '2020-03-21 19:21:23':'b'}); """
// test base data
qt_select_base10 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load10 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 11. test map<DATETIME, INT> NULL
try {
def map_field_define = "`m_info` Map<DATETIME, INT> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20 01:02:03': null, '2018-04-20 10:40:35': 123}), (2, 'doris2', {'2000-04-20 00:00:00':${Integer.MIN_VALUE}, '1967-12-31 12:24:56':${Integer.MAX_VALUE}}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01 00:00:00':1246, '2023-02-27 00:01:02':5646}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31 12:01:41': 524524, '2006-02-19 09:01:02': 2534}); """
// test base data
qt_select_base11 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load11 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 12. test map<DATE, INT> NULL
try {
def map_field_define = "`m_info` Map<DATE, INT> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20': null, '2018-04-20': 123}), (2, 'doris2', {'2000-04-20':${Integer.MIN_VALUE}, '1967-12-31':${Integer.MAX_VALUE}}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01':1246, '2023-02-27':5646}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31': 524524, '2006-02-19': 2534}); """
// test base data
qt_select_base12 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load12 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 13. test map<DATE, STRING> NULL
try {
def map_field_define = "`m_info` Map<DATE, STRING> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20': 'null', '2018-04-20': null}), (2, 'doris2', {'2000-04-20':'${Integer.MIN_VALUE}', '1967-12-31':'${Integer.MAX_VALUE}'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01':'1246', '2023-02-27':'5646'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31': 'min_largeint', '2006-02-19': 'max_largeint'}); """
// test base data
qt_select_base13 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load13 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 14. test map<DATETIME, STRING> NULL
try {
def map_field_define = "`m_info` Map<DATETIME, STRING> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'2023-04-20 12:20:03': 'null', '2018-04-20 12:59:59': null}), (2, 'doris2', {'2000-04-20 23:59:59':'${Integer.MIN_VALUE}', '1967-12-31 00:00:00':'${Integer.MAX_VALUE}'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'2023-01-01 07:24:54':'1246', '2023-02-27 15:12:13':'5646'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'2025-12-31 11:22:33': 'min_largeint', '2006-02-19 00:44:55': 'max_largeint'}); """
// test base data
qt_select_base14 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load14 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 15. test map<BIGINT, VARCHAR(20)> NULL
try {
def map_field_define = "`m_info` Map<BIGINT, VARCHAR(20)> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {100: 'null', 111:'b'}), (2, 'doris2', {200:'a', 222:'b'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {111: 'a', 333:'b', 399:'c', 399999999999999:'d'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {111: null, 111:null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {111: '100', 111:'b'}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {${Long.MIN_VALUE}: 'min_bigint', ${Long.MAX_VALUE}: 'max_bigint'}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {${Long.MAX_VALUE}: 'min_bigint', 111:'b'}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {200:'a', ${Long.MAX_VALUE}: 'max_bigint', 111:'b'}); """
// test base data
qt_select_base15 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load15 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 16. test map<BOOLEAN, VARCHAR(20)> NULL
try {
def map_field_define = "`m_info` Map<BOOLEAN, VARCHAR(20)> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, "doris1", {true:"null",false:"b"}), (2, "doris2", {true:"a", true:"b"}); """
sql """ insert into ${export_table_name} values (3, "doris3", {true: "a", true:"b", false:"c", false:"d"}); """
sql """ insert into ${export_table_name} values (4, "doris4", {true: null, true:null}); """
sql """ insert into ${export_table_name} values (5, "doris5", {true: "100", true:"b"}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, "doris7", null); """
sql """ insert into ${export_table_name} values (8, "doris8", {false: "min_bigint", false: "max_bigint"}); """
sql """ insert into ${export_table_name} values (9, "doris9", {true: "min_bigint", false:"b"}); """
sql """ insert into ${export_table_name} values (10, "doris10", {false:"a", true: "max_bigint", true:"b"}); """
// test base data
qt_select_base16 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load16 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 17. test map<BOOLEAN, STRING> NULL
try {
def map_field_define = "`m_info` Map<BOOLEAN, STRING> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {1: 'xxx', 0:'b'}), (2, 'doris2', {1:'a', 1:'b'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {1: 'a', 1:'b', 0:'c', 0:'d'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {1: null, 1:null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {1: '100', 1:'b'}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {0: 'min_bigint', 0: 'max_bigint'}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {1: 'min_bigint', 0:'b'}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {0:'a', 1: 'max_bigint', 1:'b'}); """
// test base data
qt_select_base17 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load17 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
// 18. test map<STRING, STRING> NULL
try {
def map_field_define = "`m_info` Map<STRING, STRING> NULL"
// create table to export data
create_table(export_table_name, map_field_define)
// create table to load data
create_table(load_table_name, map_field_define)
// insert data
sql """ insert into ${export_table_name} values (1, 'doris1', {'doris': 'null', 'nereids':'b'}), (2, 'doris2', {'ftw':'a', 'cyx':'b'}); """
sql """ insert into ${export_table_name} values (3, 'doris3', {'null': 'a', '333':'b', '399':'c', '399999999999999':'d'}); """
sql """ insert into ${export_table_name} values (4, 'doris4', {'null': null, 'null':null}); """
sql """ insert into ${export_table_name} values (5, 'doris5', {'null': '100', 'null':'b'}); """
sql """ insert into ${export_table_name} values (6, null, null); """
sql """ insert into ${export_table_name} values (7, 'doris7', null); """
sql """ insert into ${export_table_name} values (8, 'doris8', {'170141183460469231731687303715884105728': 'min_largeint', '170141183460469231731687303715884105727': 'max_largeint'}); """
sql """ insert into ${export_table_name} values (9, 'doris9', {'170141183460469231731687303715884105728': 'min_largeint', '111':'b'}); """
sql """ insert into ${export_table_name} values (10, 'doris10', {'200':'a', '170141183460469231731687303715884105727': 'max_largeint', '111':'b'}); """
// test base data
qt_select_base18 """ SELECT * FROM ${export_table_name} t ORDER BY user_id; """
def outfile_url = outfile_to_S3()
qt_select_load18 """ SELECT * FROM S3 (
"uri" = "http://${s3_endpoint}${outfile_url.substring(4)}0.parquet",
"ACCESS_KEY"= "${ak}",
"SECRET_KEY" = "${sk}",
"format" = "parquet",
"region" = "${region}"
);
"""
} finally {
}
}