// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "exec/parquet_writer.h" #include #include #include #include "common/logging.h" #include "exec/file_writer.h" #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/TPaloBrokerService.h" #include "runtime/broker_mgr.h" #include "runtime/client_cache.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/mem_pool.h" #include "util/thrift_util.h" #include "util/types.h" namespace doris { /// ParquetOutputStream ParquetOutputStream::ParquetOutputStream(FileWriter* file_writer) : _file_writer(file_writer), _cur_pos(0), _written_len(0) { set_mode(arrow::io::FileMode::WRITE); } ParquetOutputStream::~ParquetOutputStream() { arrow::Status st = Close(); if (!st.ok()) { LOG(WARNING) << "close parquet file error: " << st.ToString(); } } arrow::Status ParquetOutputStream::Write(const void* data, int64_t nbytes) { if (_is_closed) { return arrow::Status::OK(); } size_t written_len = 0; Status st = _file_writer->write(static_cast(data), nbytes, &written_len); if (!st.ok()) { return arrow::Status::IOError(st.get_error_msg()); } _cur_pos += written_len; _written_len += written_len; return arrow::Status::OK(); } arrow::Result ParquetOutputStream::Tell() const { return _cur_pos; } arrow::Status ParquetOutputStream::Close() { if (_is_closed) { return arrow::Status::OK(); } Status st = _file_writer->close(); if (!st.ok()) { LOG(WARNING) << "close parquet output stream failed: " << st.get_error_msg(); return arrow::Status::IOError(st.get_error_msg()); } _is_closed = true; return arrow::Status::OK(); } int64_t ParquetOutputStream::get_written_len() { return _written_len; } void ParquetOutputStream::set_written_len(int64_t written_len) { _written_len = written_len; } /// ParquetWriterWrapper ParquetWriterWrapper::ParquetWriterWrapper(FileWriter* file_writer, const std::vector& output_expr_ctxs, const std::map& properties, const std::vector>& schema, bool output_object_data) : _output_expr_ctxs(output_expr_ctxs), _str_schema(schema), _cur_writed_rows(0), _rg_writer(nullptr), _output_object_data(output_object_data) { _outstream = std::shared_ptr(new ParquetOutputStream(file_writer)); parse_properties(properties); parse_schema(schema); init_parquet_writer(); } void ParquetWriterWrapper::parse_properties( const std::map& propertie_map) { parquet::WriterProperties::Builder builder; for (auto it = propertie_map.begin(); it != propertie_map.end(); it++) { std::string property_name = it->first; std::string property_value = it->second; if (property_name == "compression") { // UNCOMPRESSED, SNAPPY, GZIP, BROTLI, ZSTD, LZ4, LZO, BZ2 if (property_value == "snappy") { builder.compression(parquet::Compression::SNAPPY); } else if (property_value == "gzip") { builder.compression(parquet::Compression::GZIP); } else if (property_value == "brotli") { builder.compression(parquet::Compression::BROTLI); } else if (property_value == "zstd") { builder.compression(parquet::Compression::ZSTD); } else if (property_value == "lz4") { builder.compression(parquet::Compression::LZ4); } else if (property_value == "lzo") { builder.compression(parquet::Compression::LZO); } else if (property_value == "bz2") { builder.compression(parquet::Compression::BZ2); } else { builder.compression(parquet::Compression::UNCOMPRESSED); } } else if (property_name == "disable_dictionary") { if (property_value == "true") { builder.enable_dictionary(); } else { builder.disable_dictionary(); } } else if (property_name == "version") { if (property_value == "v1") { builder.version(parquet::ParquetVersion::PARQUET_1_0); } else { builder.version(parquet::ParquetVersion::PARQUET_2_0); } } } _properties = builder.build(); } Status ParquetWriterWrapper::parse_schema(const std::vector>& schema) { parquet::schema::NodeVector fields; for (auto column = schema.begin(); column != schema.end(); column++) { std::string repetition_type = (*column)[0]; parquet::Repetition::type parquet_repetition_type = parquet::Repetition::REQUIRED; if (repetition_type.find("required") != std::string::npos) { parquet_repetition_type = parquet::Repetition::REQUIRED; } else if (repetition_type.find("repeated") != std::string::npos) { parquet_repetition_type = parquet::Repetition::REPEATED; } else if (repetition_type.find("optional") != std::string::npos) { parquet_repetition_type = parquet::Repetition::OPTIONAL; } else { parquet_repetition_type = parquet::Repetition::UNDEFINED; } std::string data_type = (*column)[1]; parquet::Type::type parquet_data_type = parquet::Type::BYTE_ARRAY; if (data_type == "boolean") { parquet_data_type = parquet::Type::BOOLEAN; } else if (data_type.find("int32") != std::string::npos) { parquet_data_type = parquet::Type::INT32; } else if (data_type.find("int64") != std::string::npos) { parquet_data_type = parquet::Type::INT64; } else if (data_type.find("int96") != std::string::npos) { parquet_data_type = parquet::Type::INT96; } else if (data_type.find("float") != std::string::npos) { parquet_data_type = parquet::Type::FLOAT; } else if (data_type.find("double") != std::string::npos) { parquet_data_type = parquet::Type::DOUBLE; } else if (data_type.find("byte_array") != std::string::npos) { parquet_data_type = parquet::Type::BYTE_ARRAY; } else if (data_type.find("fixed_len_byte_array") != std::string::npos) { parquet_data_type = parquet::Type::FIXED_LEN_BYTE_ARRAY; } else { parquet_data_type = parquet::Type::UNDEFINED; } std::string column_name = (*column)[2]; fields.push_back(parquet::schema::PrimitiveNode::Make(column_name, parquet_repetition_type, parquet::LogicalType::None(), parquet_data_type)); _schema = std::static_pointer_cast( parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields)); } return Status::OK(); } Status ParquetWriterWrapper::write(const RowBatch& row_batch) { int num_rows = row_batch.num_rows(); for (int i = 0; i < num_rows; ++i) { TupleRow* row = row_batch.get_row(i); RETURN_IF_ERROR(_write_one_row(row)); _cur_writed_rows++; } return Status::OK(); } Status ParquetWriterWrapper::init_parquet_writer() { _writer = parquet::ParquetFileWriter::Open(_outstream, _schema, _properties); if (_writer == nullptr) { return Status::InternalError("Failed to create file writer"); } return Status::OK(); } parquet::RowGroupWriter* ParquetWriterWrapper::get_rg_writer() { if (_rg_writer == nullptr) { _rg_writer = _writer->AppendBufferedRowGroup(); } if (_cur_writed_rows > _max_row_per_group) { _rg_writer->Close(); _rg_writer = _writer->AppendBufferedRowGroup(); _cur_writed_rows = 0; } return _rg_writer; } Status ParquetWriterWrapper::_write_one_row(TupleRow* row) { int num_columns = _output_expr_ctxs.size(); if (num_columns != _str_schema.size()) { return Status::InternalError("project field size is not equal to schema column size"); } try { for (int index = 0; index < num_columns; ++index) { void* item = _output_expr_ctxs[index]->get_value(row); switch (_output_expr_ctxs[index]->root()->type().type) { case TYPE_BOOLEAN: { if (_str_schema[index][1] != "boolean") { std::stringstream ss; ss << "project field type is boolean, but the definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::BoolWriter* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { col_writer->WriteBatch(1, nullptr, nullptr, static_cast(item)); } else { bool default_bool = false; col_writer->WriteBatch(1, nullptr, nullptr, &default_bool); } break; } case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: { if (_str_schema[index][1] != "int32") { std::stringstream ss; ss << "project field type is tiny int/small int/int, should use int32, but the " "definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::Int32Writer* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { col_writer->WriteBatch(1, nullptr, nullptr, static_cast(item)); } else { int32_t default_int32 = 0; col_writer->WriteBatch(1, nullptr, nullptr, &default_int32); } break; } case TYPE_BIGINT: { if (_str_schema[index][1] != "int64") { std::stringstream ss; ss << "project field type is big int, should use int64, but the definition " "type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::Int64Writer* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { col_writer->WriteBatch(1, nullptr, nullptr, (int64_t*)(item)); } else { int64_t default_int644 = 0; col_writer->WriteBatch(1, nullptr, nullptr, &default_int644); } break; } case TYPE_LARGEINT: { // TODO: not support int_128 // It is better write a default value, because rg_writer need all columns has value before flush to disk. parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::Int64Writer* col_writer = static_cast(rgWriter->column(index)); int64_t default_int64 = 0; col_writer->WriteBatch(1, nullptr, nullptr, &default_int64); return Status::InvalidArgument("do not support large int type."); } case TYPE_FLOAT: { if (_str_schema[index][1] != "float") { std::stringstream ss; ss << "project field type is float, but the definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::FloatWriter* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { col_writer->WriteBatch(1, nullptr, nullptr, (float_t*)(item)); } else { float_t default_float = 0.0; col_writer->WriteBatch(1, nullptr, nullptr, &default_float); } break; } case TYPE_DOUBLE: { if (_str_schema[index][1] != "double") { std::stringstream ss; ss << "project field type is double, but the definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::DoubleWriter* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { col_writer->WriteBatch(1, nullptr, nullptr, (double_t*)(item)); } else { double_t default_double = 0.0; col_writer->WriteBatch(1, nullptr, nullptr, &default_double); } break; } case TYPE_DATETIME: case TYPE_DATE: { if (_str_schema[index][1] != "int64") { std::stringstream ss; ss << "project field type is date/datetime, should use int64, but the " "definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::Int64Writer* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { const DateTimeValue* time_val = (const DateTimeValue*)(item); int64_t timestamp = time_val->to_olap_datetime(); col_writer->WriteBatch(1, nullptr, nullptr, ×tamp); } else { int64_t default_int64 = 0; col_writer->WriteBatch(1, nullptr, nullptr, &default_int64); } break; } case TYPE_HLL: case TYPE_OBJECT: { if (_output_object_data) { if (_str_schema[index][1] != "byte_array") { std::stringstream ss; ss << "project field type is hll/bitmap, should use byte_array, but the " "definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::ByteArrayWriter* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { const StringValue* string_val = (const StringValue*)(item); parquet::ByteArray value; value.ptr = reinterpret_cast(string_val->ptr); value.len = string_val->len; col_writer->WriteBatch(1, nullptr, nullptr, &value); } else { parquet::ByteArray value; col_writer->WriteBatch(1, nullptr, nullptr, &value); } } else { std::stringstream ss; ss << "unsupported file format: " << _output_expr_ctxs[index]->root()->type().type; return Status::InvalidArgument(ss.str()); } break; } case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { if (_str_schema[index][1] != "byte_array") { std::stringstream ss; ss << "project field type is char/varchar, should use byte_array, but the " "definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::ByteArrayWriter* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { const StringValue* string_val = (const StringValue*)(item); parquet::ByteArray value; value.ptr = reinterpret_cast(string_val->ptr); value.len = string_val->len; col_writer->WriteBatch(1, nullptr, nullptr, &value); } else { parquet::ByteArray value; col_writer->WriteBatch(1, nullptr, nullptr, &value); } break; } case TYPE_DECIMALV2: { if (_str_schema[index][1] != "byte_array") { std::stringstream ss; ss << "project field type is decimal v2, should use byte_array, but the " "definition type of column " << _str_schema[index][2] << " is " << _str_schema[index][1]; return Status::InvalidArgument(ss.str()); } parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::ByteArrayWriter* col_writer = static_cast(rgWriter->column(index)); if (item != nullptr) { const DecimalV2Value decimal_val( reinterpret_cast(item)->value); char decimal_buffer[MAX_DECIMAL_WIDTH]; int output_scale = _output_expr_ctxs[index]->root()->output_scale(); parquet::ByteArray value; value.ptr = reinterpret_cast(decimal_buffer); value.len = decimal_val.to_buffer(decimal_buffer, output_scale); col_writer->WriteBatch(1, nullptr, nullptr, &value); } else { parquet::ByteArray value; col_writer->WriteBatch(1, nullptr, nullptr, &value); } break; } default: { std::stringstream ss; ss << "unsupported file format: " << _output_expr_ctxs[index]->root()->type().type; return Status::InvalidArgument(ss.str()); } } } } catch (const std::exception& e) { LOG(WARNING) << "Parquet write error: " << e.what(); return Status::InternalError(e.what()); } return Status::OK(); } int64_t ParquetWriterWrapper::written_len() { return _outstream->get_written_len(); } void ParquetWriterWrapper::close() { try { if (_rg_writer != nullptr) { _rg_writer->Close(); _rg_writer = nullptr; } _writer->Close(); arrow::Status st = _outstream->Close(); if (!st.ok()) { LOG(WARNING) << "close parquet file error: " << st.ToString(); } } catch (const std::exception& e) { _rg_writer = nullptr; LOG(WARNING) << "Parquet writer close error: " << e.what(); } } ParquetWriterWrapper::~ParquetWriterWrapper() {} } // namespace doris