164 lines
6.7 KiB
C++
164 lines
6.7 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "olap/merger.h"
|
|
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "olap/olap_define.h"
|
|
#include "olap/row_cursor.h"
|
|
#include "olap/tablet.h"
|
|
#include "olap/tuple_reader.h"
|
|
#include "util/trace.h"
|
|
#include "vec/olap/block_reader.h"
|
|
|
|
namespace doris {
|
|
|
|
Status Merger::merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
|
|
TabletSchemaSPtr cur_tablet_schema,
|
|
const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
|
|
RowsetWriter* dst_rowset_writer, Merger::Statistics* stats_output) {
|
|
TRACE_COUNTER_SCOPE_LATENCY_US("merge_rowsets_latency_us");
|
|
|
|
TupleReader reader;
|
|
TabletReader::ReaderParams reader_params;
|
|
reader_params.tablet = tablet;
|
|
reader_params.reader_type = reader_type;
|
|
reader_params.rs_readers = src_rowset_readers;
|
|
reader_params.version = dst_rowset_writer->version();
|
|
|
|
reader_params.tablet_schema = cur_tablet_schema;
|
|
RETURN_NOT_OK(reader.init(reader_params));
|
|
|
|
RowCursor row_cursor;
|
|
RETURN_NOT_OK_LOG(
|
|
row_cursor.init(cur_tablet_schema),
|
|
"failed to init row cursor when merging rowsets of tablet " + tablet->full_name());
|
|
row_cursor.allocate_memory_for_string_type(cur_tablet_schema);
|
|
|
|
std::unique_ptr<MemPool> mem_pool(new MemPool());
|
|
|
|
// The following procedure would last for long time, half of one day, etc.
|
|
int64_t output_rows = 0;
|
|
while (true) {
|
|
ObjectPool objectPool;
|
|
bool eof = false;
|
|
// Read one row into row_cursor
|
|
RETURN_NOT_OK_LOG(
|
|
reader.next_row_with_aggregation(&row_cursor, mem_pool.get(), &objectPool, &eof),
|
|
"failed to read next row when merging rowsets of tablet " + tablet->full_name());
|
|
if (eof) {
|
|
break;
|
|
}
|
|
RETURN_NOT_OK_LOG(
|
|
dst_rowset_writer->add_row(row_cursor),
|
|
"failed to write row when merging rowsets of tablet " + tablet->full_name());
|
|
output_rows++;
|
|
LOG_IF(INFO, config::row_step_for_compaction_merge_log != 0 &&
|
|
output_rows % config::row_step_for_compaction_merge_log == 0)
|
|
<< "Merge rowsets stay alive. "
|
|
<< "tablet=" << tablet->full_name() << ", merged rows=" << output_rows;
|
|
// the memory allocate by mem pool has been copied,
|
|
// so we should release memory immediately
|
|
mem_pool->clear();
|
|
}
|
|
|
|
if (stats_output != nullptr) {
|
|
stats_output->output_rows = output_rows;
|
|
stats_output->merged_rows = reader.merged_rows();
|
|
stats_output->filtered_rows = reader.filtered_rows();
|
|
}
|
|
|
|
RETURN_NOT_OK_LOG(
|
|
dst_rowset_writer->flush(),
|
|
"failed to flush rowset when merging rowsets of tablet " + tablet->full_name());
|
|
return Status::OK();
|
|
}
|
|
|
|
Status Merger::vmerge_rowsets(TabletSharedPtr tablet, ReaderType reader_type,
|
|
TabletSchemaSPtr cur_tablet_schema,
|
|
const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
|
|
RowsetWriter* dst_rowset_writer, Statistics* stats_output) {
|
|
TRACE_COUNTER_SCOPE_LATENCY_US("merge_rowsets_latency_us");
|
|
|
|
vectorized::BlockReader reader;
|
|
TabletReader::ReaderParams reader_params;
|
|
reader_params.tablet = tablet;
|
|
reader_params.reader_type = reader_type;
|
|
reader_params.rs_readers = src_rowset_readers;
|
|
reader_params.version = dst_rowset_writer->version();
|
|
reader_params.tablet_schema = cur_tablet_schema;
|
|
reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap();
|
|
if (stats_output && stats_output->rowid_conversion) {
|
|
reader_params.record_rowids = true;
|
|
}
|
|
|
|
reader_params.return_columns.resize(cur_tablet_schema->num_columns());
|
|
std::iota(reader_params.return_columns.begin(), reader_params.return_columns.end(), 0);
|
|
reader_params.origin_return_columns = &reader_params.return_columns;
|
|
RETURN_NOT_OK(reader.init(reader_params));
|
|
|
|
if (reader_params.record_rowids) {
|
|
stats_output->rowid_conversion->set_dst_rowset_id(dst_rowset_writer->rowset_id());
|
|
// init segment rowid map for rowid conversion
|
|
std::vector<uint32_t> segment_num_rows;
|
|
for (auto& rs_reader : reader_params.rs_readers) {
|
|
RETURN_NOT_OK(rs_reader->get_segment_num_rows(&segment_num_rows));
|
|
stats_output->rowid_conversion->init_segment_map(rs_reader->rowset()->rowset_id(),
|
|
segment_num_rows);
|
|
}
|
|
}
|
|
|
|
vectorized::Block block = cur_tablet_schema->create_block(reader_params.return_columns);
|
|
size_t output_rows = 0;
|
|
bool eof = false;
|
|
while (!eof) {
|
|
// Read one block from block reader
|
|
RETURN_NOT_OK_LOG(
|
|
reader.next_block_with_aggregation(&block, nullptr, nullptr, &eof),
|
|
"failed to read next block when merging rowsets of tablet " + tablet->full_name());
|
|
RETURN_NOT_OK_LOG(
|
|
dst_rowset_writer->add_block(&block),
|
|
"failed to write block when merging rowsets of tablet " + tablet->full_name());
|
|
|
|
if (reader_params.record_rowids && block.rows() > 0) {
|
|
std::vector<uint32_t> segment_num_rows;
|
|
RETURN_IF_ERROR(dst_rowset_writer->get_segment_num_rows(&segment_num_rows));
|
|
stats_output->rowid_conversion->add(reader.current_block_row_locations(),
|
|
segment_num_rows);
|
|
}
|
|
|
|
output_rows += block.rows();
|
|
block.clear_column_data();
|
|
}
|
|
|
|
if (stats_output != nullptr) {
|
|
stats_output->output_rows = output_rows;
|
|
stats_output->merged_rows = reader.merged_rows();
|
|
stats_output->filtered_rows = reader.filtered_rows();
|
|
}
|
|
|
|
RETURN_NOT_OK_LOG(
|
|
dst_rowset_writer->flush(),
|
|
"failed to flush rowset when merging rowsets of tablet " + tablet->full_name());
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
} // namespace doris
|