[Enhancement](topn) support two phase read for topn query (#15642)
This PR optimize topn query like `SELECT * FROM tableX ORDER BY columnA ASC/DESC LIMIT N`. TopN is is compose of SortNode and ScanNode, when user table is wide like 100+ columns the order by clause is just a few columns.But ScanNode need to scan all data from storage engine even if the limit is very small.This may lead to lots of read amplification.So In this PR I devide TopN query into two phase: 1. The first phase we just need to read `columnA`'s data from storage engine along with an extra RowId column called `__DORIS_ROWID_COL__`.The other columns are pruned from ScanNode. 2. The second phase I put it in the ExchangeNode beacuase it's the central node for topn nodes in the cluster.The ExchangeNode will spawn a RPC to other nodes using the RowIds(sorted and limited from SortNode) read from the first phase and read row by row from storage engine. After the second phase read, Block will contain all the data needed for the query
This commit is contained in:
@ -178,6 +178,7 @@ CONF_Int32(doris_scanner_thread_pool_thread_num, "48");
|
||||
CONF_Int32(doris_scanner_thread_pool_queue_size, "102400");
|
||||
// default thrift client connect timeout(in seconds)
|
||||
CONF_mInt32(thrift_connect_timeout_seconds, "3");
|
||||
CONF_mInt32(fetch_rpc_timeout_seconds, "20");
|
||||
// default thrift client retry interval (in milliseconds)
|
||||
CONF_mInt64(thrift_client_retry_interval_ms, "1000");
|
||||
// max row count number for single scan range, used in segmentv1
|
||||
|
||||
@ -25,6 +25,7 @@ const std::string CSV = "csv";
|
||||
const std::string CSV_WITH_NAMES = "csv_with_names";
|
||||
const std::string CSV_WITH_NAMES_AND_TYPES = "csv_with_names_and_types";
|
||||
const std::string BLOCK_TEMP_COLUMN_PREFIX = "__TEMP__";
|
||||
const std::string ROWID_COL = "__DORIS_ROWID_COL__";
|
||||
|
||||
constexpr int MAX_DECIMAL32_PRECISION = 9;
|
||||
constexpr int MAX_DECIMAL64_PRECISION = 18;
|
||||
|
||||
@ -63,6 +63,7 @@ set(EXEC_FILES
|
||||
odbc_connector.cpp
|
||||
table_connector.cpp
|
||||
schema_scanner.cpp
|
||||
rowid_fetcher.cpp
|
||||
)
|
||||
|
||||
if (WITH_LZO)
|
||||
|
||||
131
be/src/exec/rowid_fetcher.cpp
Normal file
131
be/src/exec/rowid_fetcher.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exec/rowid_fetcher.h"
|
||||
|
||||
#include "bthread/countdown_event.h"
|
||||
#include "exec/tablet_info.h" // DorisNodesInfo
|
||||
#include "gen_cpp/Types_types.h"
|
||||
#include "gen_cpp/internal_service.pb.h"
|
||||
#include "runtime/exec_env.h" // ExecEnv
|
||||
#include "runtime/runtime_state.h" // RuntimeState
|
||||
#include "util/brpc_client_cache.h" // BrpcClientCache
|
||||
#include "util/defer_op.h"
|
||||
#include "vec/core/block.h" // Block
|
||||
|
||||
namespace doris {
|
||||
|
||||
Status RowIDFetcher::init(DorisNodesInfo* nodes_info) {
|
||||
for (auto [node_id, node_info] : nodes_info->nodes_info()) {
|
||||
auto client = ExecEnv::GetInstance()->brpc_internal_client_cache()->get_client(
|
||||
node_info.host, node_info.brpc_port);
|
||||
if (!client) {
|
||||
LOG(WARNING) << "Get rpc stub failed, host=" << node_info.host
|
||||
<< ", port=" << node_info.brpc_port;
|
||||
return Status::InternalError("RowIDFetcher failed to init rpc client");
|
||||
}
|
||||
_stubs.push_back(client);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
static std::string format_rowid(const GlobalRowLoacation& location) {
|
||||
return fmt::format("{} {} {} {}", location.tablet_id,
|
||||
location.row_location.rowset_id.to_string(),
|
||||
location.row_location.segment_id, location.row_location.row_id);
|
||||
}
|
||||
|
||||
PMultiGetRequest RowIDFetcher::_init_fetch_request(const vectorized::ColumnString& row_ids) {
|
||||
PMultiGetRequest mget_req;
|
||||
_tuple_desc->to_protobuf(mget_req.mutable_desc());
|
||||
for (auto slot : _tuple_desc->slots()) {
|
||||
slot->to_protobuf(mget_req.add_slots());
|
||||
}
|
||||
for (size_t i = 0; i < row_ids.size(); ++i) {
|
||||
PMultiGetRequest::RowId row_id;
|
||||
StringRef row_id_rep = row_ids.get_data_at(i);
|
||||
auto location = reinterpret_cast<const GlobalRowLoacation*>(row_id_rep.data);
|
||||
row_id.set_tablet_id(location->tablet_id);
|
||||
row_id.set_rowset_id(location->row_location.rowset_id.to_string());
|
||||
row_id.set_segment_id(location->row_location.segment_id);
|
||||
row_id.set_ordinal_id(location->row_location.row_id);
|
||||
*mget_req.add_rowids() = std::move(row_id);
|
||||
}
|
||||
mget_req.set_be_exec_version(_st->be_exec_version());
|
||||
return mget_req;
|
||||
}
|
||||
|
||||
static void fetch_callback(bthread::CountdownEvent* counter) {
|
||||
Defer __defer([&] { counter->signal(); });
|
||||
}
|
||||
|
||||
static Status MergeRPCResults(const std::vector<PMultiGetResponse>& rsps,
|
||||
const std::vector<brpc::Controller>& cntls,
|
||||
vectorized::MutableBlock* output_block) {
|
||||
for (const auto& cntl : cntls) {
|
||||
if (cntl.Failed()) {
|
||||
LOG(WARNING) << "Failed to fetch meet rpc error:" << cntl.ErrorText()
|
||||
<< ", host:" << cntl.remote_side();
|
||||
return Status::InternalError(cntl.ErrorText());
|
||||
}
|
||||
}
|
||||
for (const auto& resp : rsps) {
|
||||
Status st(resp.status());
|
||||
if (!st.ok()) {
|
||||
LOG(WARNING) << "Failed to fetch " << st.to_string();
|
||||
return st;
|
||||
}
|
||||
vectorized::Block partial_block(resp.block());
|
||||
output_block->merge(partial_block);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RowIDFetcher::fetch(const vectorized::ColumnPtr& row_ids,
|
||||
vectorized::MutableBlock* res_block) {
|
||||
CHECK(!_stubs.empty());
|
||||
res_block->clear_column_data();
|
||||
vectorized::MutableBlock mblock({_tuple_desc}, row_ids->size());
|
||||
PMultiGetRequest mget_req = _init_fetch_request(assert_cast<const vectorized::ColumnString&>(
|
||||
*vectorized::remove_nullable(row_ids).get()));
|
||||
std::vector<PMultiGetResponse> resps(_stubs.size());
|
||||
std::vector<brpc::Controller> cntls(_stubs.size());
|
||||
bthread::CountdownEvent counter(_stubs.size());
|
||||
for (size_t i = 0; i < _stubs.size(); ++i) {
|
||||
cntls[i].set_timeout_ms(config::fetch_rpc_timeout_seconds * 1000);
|
||||
auto callback = brpc::NewCallback(fetch_callback, &counter);
|
||||
_stubs[i]->multiget_data(&cntls[i], &mget_req, &resps[i], callback);
|
||||
}
|
||||
counter.wait();
|
||||
RETURN_IF_ERROR(MergeRPCResults(resps, cntls, &mblock));
|
||||
// final sort by row_ids sequence, since row_ids is already sorted
|
||||
vectorized::Block tmp = mblock.to_block();
|
||||
std::unordered_map<std::string, uint32_t> row_order;
|
||||
vectorized::ColumnPtr row_id_column = tmp.get_columns().back();
|
||||
for (size_t x = 0; x < row_id_column->size(); ++x) {
|
||||
auto location =
|
||||
reinterpret_cast<const GlobalRowLoacation*>(row_id_column->get_data_at(x).data);
|
||||
row_order[format_rowid(*location)] = x;
|
||||
}
|
||||
for (size_t x = 0; x < row_ids->size(); ++x) {
|
||||
auto location = reinterpret_cast<const GlobalRowLoacation*>(row_ids->get_data_at(x).data);
|
||||
res_block->add_row(&tmp, row_order[format_rowid(*location)]);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
44
be/src/exec/rowid_fetcher.h
Normal file
44
be/src/exec/rowid_fetcher.h
Normal file
@ -0,0 +1,44 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "gen_cpp/internal_service.pb.h"
|
||||
#include "vec/core/block.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class DorisNodesInfo;
|
||||
class RuntimeState;
|
||||
|
||||
// fetch rows by global rowid
|
||||
// tablet_id/rowset_name/segment_id/ordinal_id
|
||||
class RowIDFetcher {
|
||||
public:
|
||||
RowIDFetcher(TupleDescriptor* desc, RuntimeState* st) : _tuple_desc(desc), _st(st) {}
|
||||
Status init(DorisNodesInfo* nodes_info);
|
||||
Status fetch(const vectorized::ColumnPtr& row_ids, vectorized::MutableBlock* block);
|
||||
|
||||
private:
|
||||
PMultiGetRequest _init_fetch_request(const vectorized::ColumnString& row_ids);
|
||||
|
||||
std::vector<std::shared_ptr<PBackendService_Stub>> _stubs;
|
||||
TupleDescriptor* _tuple_desc;
|
||||
RuntimeState* _st;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -249,6 +249,8 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const std::unordered_map<int64_t, NodeInfo>& nodes_info() { return _nodes; }
|
||||
|
||||
private:
|
||||
std::unordered_map<int64_t, NodeInfo> _nodes;
|
||||
};
|
||||
|
||||
@ -29,7 +29,7 @@ class IOBufAsZeroCopyInputStream;
|
||||
namespace doris {
|
||||
class Predicate;
|
||||
class ObjectPool;
|
||||
class RuntimeState;
|
||||
class ExprContext;
|
||||
class RuntimePredicateWrapper;
|
||||
class MemTracker;
|
||||
class TupleRow;
|
||||
|
||||
@ -119,6 +119,8 @@ public:
|
||||
vectorized::VExpr* remaining_vconjunct_root = nullptr;
|
||||
// runtime state
|
||||
RuntimeState* runtime_state = nullptr;
|
||||
RowsetId rowset_id;
|
||||
int32_t tablet_id = 0;
|
||||
};
|
||||
|
||||
class RowwiseIterator {
|
||||
|
||||
@ -64,6 +64,8 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
|
||||
_read_options.stats = _stats;
|
||||
_read_options.push_down_agg_type_opt = _context->push_down_agg_type_opt;
|
||||
_read_options.remaining_vconjunct_root = _context->remaining_vconjunct_root;
|
||||
_read_options.rowset_id = _rowset->rowset_id();
|
||||
_read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
|
||||
if (read_context->lower_bound_keys != nullptr) {
|
||||
for (int i = 0; i < read_context->lower_bound_keys->size(); ++i) {
|
||||
_read_options.key_ranges.emplace_back(&read_context->lower_bound_keys->at(i),
|
||||
|
||||
@ -417,6 +417,56 @@ private:
|
||||
vectorized::ColumnArray::ColumnOffsets& column_offsets);
|
||||
};
|
||||
|
||||
class RowIdColumnIterator : public ColumnIterator {
|
||||
public:
|
||||
RowIdColumnIterator() = delete;
|
||||
RowIdColumnIterator(int32_t tid, RowsetId rid, int32_t segid)
|
||||
: _tablet_id(tid), _rowset_id(rid), _segment_id(segid) {}
|
||||
|
||||
Status seek_to_first() override {
|
||||
_current_rowid = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status seek_to_ordinal(ordinal_t ord_idx) override {
|
||||
_current_rowid = ord_idx;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
|
||||
bool has_null;
|
||||
return next_batch(n, dst, &has_null);
|
||||
}
|
||||
|
||||
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override {
|
||||
for (size_t i = 0; i < *n; ++i) {
|
||||
rowid_t row_id = _current_rowid + i;
|
||||
GlobalRowLoacation location(_tablet_id, _rowset_id, _segment_id, row_id);
|
||||
dst->insert_data(reinterpret_cast<const char*>(&location), sizeof(GlobalRowLoacation));
|
||||
}
|
||||
_current_rowid += *n;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status read_by_rowids(const rowid_t* rowids, const size_t count,
|
||||
vectorized::MutableColumnPtr& dst) override {
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
rowid_t row_id = rowids[i];
|
||||
GlobalRowLoacation location(_tablet_id, _rowset_id, _segment_id, row_id);
|
||||
dst->insert_data(reinterpret_cast<const char*>(&location), sizeof(GlobalRowLoacation));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ordinal_t get_current_ordinal() const override { return _current_rowid; }
|
||||
|
||||
private:
|
||||
rowid_t _current_rowid = 0;
|
||||
int32_t _tablet_id = 0;
|
||||
RowsetId _rowset_id;
|
||||
int32_t _segment_id = 0;
|
||||
};
|
||||
|
||||
// This iterator is used to read default value column
|
||||
class DefaultValueColumnIterator : public ColumnIterator {
|
||||
public:
|
||||
|
||||
@ -112,6 +112,8 @@ public:
|
||||
return _footer.primary_key_index_meta().max_key();
|
||||
};
|
||||
|
||||
io::FileReaderSPtr file_reader() { return _file_reader; }
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Segment);
|
||||
Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema);
|
||||
|
||||
@ -179,6 +179,9 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
|
||||
_remaining_vconjunct_root = opts.remaining_vconjunct_root;
|
||||
|
||||
_column_predicate_info.reset(new ColumnPredicateInfo());
|
||||
if (_schema.rowid_col_idx() > 0) {
|
||||
_opts.record_rowids = true;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -688,6 +691,11 @@ Status SegmentIterator::_init_return_column_iterators() {
|
||||
}
|
||||
for (auto cid : _schema.column_ids()) {
|
||||
int32_t unique_id = _opts.tablet_schema->column(cid).unique_id();
|
||||
if (_opts.tablet_schema->column(cid).name() == BeConsts::ROWID_COL) {
|
||||
_column_iterators[unique_id] =
|
||||
new RowIdColumnIterator(_opts.tablet_id, _opts.rowset_id, _segment->id());
|
||||
continue;
|
||||
}
|
||||
if (_column_iterators.count(unique_id) < 1) {
|
||||
RETURN_IF_ERROR(_segment->new_column_iterator(_opts.tablet_schema->column(cid),
|
||||
&_column_iterators[unique_id]));
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/consts.h"
|
||||
#include "olap/aggregate_func.h"
|
||||
#include "olap/field.h"
|
||||
#include "olap/row_cursor_cell.h"
|
||||
@ -52,6 +53,9 @@ public:
|
||||
if (column.is_key()) {
|
||||
++num_key_columns;
|
||||
}
|
||||
if (column.name() == BeConsts::ROWID_COL) {
|
||||
_rowid_col_idx = cid;
|
||||
}
|
||||
columns.push_back(column);
|
||||
}
|
||||
_delete_sign_idx = tablet_schema->delete_sign_idx();
|
||||
@ -72,6 +76,9 @@ public:
|
||||
if (columns[i].name() == DELETE_SIGN) {
|
||||
_delete_sign_idx = i;
|
||||
}
|
||||
if (columns[i].name() == BeConsts::ROWID_COL) {
|
||||
_rowid_col_idx = i;
|
||||
}
|
||||
_unique_ids[i] = columns[i].unique_id();
|
||||
}
|
||||
_init(columns, col_ids, num_key_columns);
|
||||
@ -145,6 +152,7 @@ public:
|
||||
int32_t unique_id(size_t index) const { return _unique_ids[index]; }
|
||||
int32_t delete_sign_idx() const { return _delete_sign_idx; }
|
||||
bool has_sequence_col() const { return _has_sequence_col; }
|
||||
int32_t rowid_col_idx() const { return _rowid_col_idx; };
|
||||
|
||||
private:
|
||||
void _init(const std::vector<TabletColumn>& cols, const std::vector<ColumnId>& col_ids,
|
||||
@ -169,6 +177,7 @@ private:
|
||||
size_t _schema_size;
|
||||
int32_t _delete_sign_idx = -1;
|
||||
bool _has_sequence_col = false;
|
||||
int32_t _rowid_col_idx = -1;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -260,6 +260,20 @@ Status Tablet::revise_tablet_meta(const std::vector<RowsetMetaSharedPtr>& rowset
|
||||
return res;
|
||||
}
|
||||
|
||||
RowsetSharedPtr Tablet::get_rowset(const RowsetId& rowset_id) {
|
||||
for (auto& version_rowset : _rs_version_map) {
|
||||
if (version_rowset.second->rowset_id() == rowset_id) {
|
||||
return version_rowset.second;
|
||||
}
|
||||
}
|
||||
for (auto& stale_version_rowset : _stale_rs_version_map) {
|
||||
if (stale_version_rowset.second->rowset_id() == rowset_id) {
|
||||
return stale_version_rowset.second;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Status Tablet::add_rowset(RowsetSharedPtr rowset) {
|
||||
DCHECK(rowset != nullptr);
|
||||
std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
|
||||
|
||||
@ -344,6 +344,8 @@ public:
|
||||
int64_t start = -1);
|
||||
bool should_skip_compaction(CompactionType compaction_type, int64_t now);
|
||||
|
||||
RowsetSharedPtr get_rowset(const RowsetId& rowset_id);
|
||||
|
||||
private:
|
||||
Status _init_once_action();
|
||||
void _print_missed_versions(const std::vector<Version>& missed_versions) const;
|
||||
|
||||
@ -64,6 +64,11 @@ public:
|
||||
size_t length() const { return _length; }
|
||||
size_t index_length() const { return _index_length; }
|
||||
void set_index_length(size_t index_length) { _index_length = index_length; }
|
||||
void set_type(FieldType type) { _type = type; }
|
||||
void set_is_key(bool is_key) { _is_key = is_key; }
|
||||
void set_is_nullable(bool is_nullable) { _is_nullable = is_nullable; }
|
||||
void set_unique_id(int32_t unique_id) { _unique_id = unique_id; }
|
||||
void set_has_default_value(bool has) { _has_default_value = has; }
|
||||
FieldAggregationMethod aggregation() const { return _aggregation; }
|
||||
vectorized::AggregateFunctionPtr get_aggregate_function(vectorized::DataTypes argument_types,
|
||||
std::string suffix) const;
|
||||
|
||||
@ -310,4 +310,11 @@ struct RowLocation {
|
||||
uint32_t row_id;
|
||||
};
|
||||
|
||||
struct GlobalRowLoacation {
|
||||
GlobalRowLoacation(uint32_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
|
||||
: tablet_id(tid), row_location(rsid, sid, rid) {};
|
||||
uint32_t tablet_id;
|
||||
RowLocation row_location;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -63,7 +63,9 @@ SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc)
|
||||
_slot_idx(tdesc.slotIdx),
|
||||
_slot_size(_type.get_slot_size()),
|
||||
_field_idx(-1),
|
||||
_is_materialized(tdesc.isMaterialized) {}
|
||||
_is_materialized(tdesc.isMaterialized),
|
||||
_is_key(tdesc.is_key),
|
||||
_need_materialize(tdesc.need_materialize) {}
|
||||
|
||||
SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc)
|
||||
: _id(pdesc.id()),
|
||||
@ -74,11 +76,13 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc)
|
||||
_null_indicator_offset(pdesc.null_indicator_byte(), pdesc.null_indicator_bit()),
|
||||
_col_name(pdesc.col_name()),
|
||||
_col_name_lower_case(to_lower(pdesc.col_name())),
|
||||
_col_unique_id(-1),
|
||||
_col_unique_id(pdesc.col_unique_id()),
|
||||
_slot_idx(pdesc.slot_idx()),
|
||||
_slot_size(_type.get_slot_size()),
|
||||
_field_idx(-1),
|
||||
_is_materialized(pdesc.is_materialized()) {}
|
||||
_is_materialized(pdesc.is_materialized()),
|
||||
_is_key(pdesc.is_key()),
|
||||
_need_materialize(true) {}
|
||||
|
||||
void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const {
|
||||
pslot->set_id(_id);
|
||||
@ -92,6 +96,8 @@ void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const {
|
||||
pslot->set_col_name(_col_name);
|
||||
pslot->set_slot_idx(_slot_idx);
|
||||
pslot->set_is_materialized(_is_materialized);
|
||||
pslot->set_col_unique_id(_col_unique_id);
|
||||
pslot->set_is_key(_is_key);
|
||||
}
|
||||
|
||||
vectorized::MutableColumnPtr SlotDescriptor::get_empty_mutable_column() const {
|
||||
@ -542,6 +548,9 @@ int RowDescriptor::get_column_id(int slot_id) const {
|
||||
int column_id_counter = 0;
|
||||
for (const auto tuple_desc : _tuple_desc_map) {
|
||||
for (const auto slot : tuple_desc->slots()) {
|
||||
if (!slot->need_materialize()) {
|
||||
continue;
|
||||
}
|
||||
if (slot->id() == slot_id) {
|
||||
return column_id_counter;
|
||||
}
|
||||
|
||||
@ -51,6 +51,7 @@ class SchemaScanner;
|
||||
class OlapTableSchemaParam;
|
||||
class PTupleDescriptor;
|
||||
class PSlotDescriptor;
|
||||
class PInternalServiceImpl;
|
||||
|
||||
// Location information for null indicator bit for particular slot.
|
||||
// For non-nullable slots, the byte_offset will be 0 and the bit_mask will be 0.
|
||||
@ -116,11 +117,15 @@ public:
|
||||
|
||||
int32_t col_unique_id() const { return _col_unique_id; }
|
||||
|
||||
bool is_key() const { return _is_key; }
|
||||
bool need_materialize() const { return _need_materialize; }
|
||||
|
||||
private:
|
||||
friend class DescriptorTbl;
|
||||
friend class TupleDescriptor;
|
||||
friend class SchemaScanner;
|
||||
friend class OlapTableSchemaParam;
|
||||
friend class PInternalServiceImpl;
|
||||
|
||||
const SlotId _id;
|
||||
const TypeDescriptor _type;
|
||||
@ -147,6 +152,9 @@ private:
|
||||
|
||||
const bool _is_materialized;
|
||||
|
||||
const bool _is_key;
|
||||
const bool _need_materialize;
|
||||
|
||||
SlotDescriptor(const TSlotDescriptor& tdesc);
|
||||
SlotDescriptor(const PSlotDescriptor& pdesc);
|
||||
};
|
||||
@ -342,6 +350,7 @@ private:
|
||||
friend class DescriptorTbl;
|
||||
friend class SchemaScanner;
|
||||
friend class OlapTableSchemaParam;
|
||||
friend class PInternalServiceImpl;
|
||||
|
||||
const TupleId _id;
|
||||
TableDescriptor* _table_desc;
|
||||
|
||||
@ -165,7 +165,6 @@ private:
|
||||
std::atomic<bool> _is_cancelled {false};
|
||||
|
||||
std::shared_ptr<vectorized::SharedHashTableController> _shared_hash_table_controller;
|
||||
|
||||
vectorized::RuntimePredicate _runtime_predicate;
|
||||
};
|
||||
|
||||
|
||||
@ -22,10 +22,14 @@
|
||||
#include <string>
|
||||
|
||||
#include "common/config.h"
|
||||
#include "common/consts.h"
|
||||
#include "gen_cpp/BackendService.h"
|
||||
#include "gen_cpp/internal_service.pb.h"
|
||||
#include "http/http_client.h"
|
||||
#include "olap/rowset/beta_rowset.h"
|
||||
#include "olap/rowset/rowset_factory.h"
|
||||
#include "olap/rowset/segment_v2/column_reader.h"
|
||||
#include "olap/segment_loader.h"
|
||||
#include "olap/storage_engine.h"
|
||||
#include "olap/tablet.h"
|
||||
#include "runtime/buffer_control_block.h"
|
||||
@ -39,6 +43,7 @@
|
||||
#include "runtime/thread_context.h"
|
||||
#include "service/brpc.h"
|
||||
#include "util/brpc_client_cache.h"
|
||||
#include "util/defer_op.h"
|
||||
#include "util/md5.h"
|
||||
#include "util/proto_util.h"
|
||||
#include "util/ref_count_closure.h"
|
||||
@ -48,6 +53,8 @@
|
||||
#include "util/telemetry/telemetry.h"
|
||||
#include "util/thrift_util.h"
|
||||
#include "util/uid_util.h"
|
||||
#include "vec/core/block.h"
|
||||
#include "vec/data_types/data_type_string.h"
|
||||
#include "vec/exec/format/csv/csv_reader.h"
|
||||
#include "vec/exec/format/generic_reader.h"
|
||||
#include "vec/exec/format/json/new_json_reader.h"
|
||||
@ -949,4 +956,130 @@ void PInternalServiceImpl::response_slave_tablet_pull_rowset(
|
||||
Status::OK().to_protobuf(response->mutable_status());
|
||||
}
|
||||
|
||||
static Status read_by_rowids(
|
||||
std::pair<size_t, size_t> row_range_idx, const TupleDescriptor& desc,
|
||||
const google::protobuf::RepeatedPtrField<PMultiGetRequest_RowId>& rowids,
|
||||
vectorized::Block* sub_block) {
|
||||
//read from row_range.first to row_range.second
|
||||
for (size_t i = row_range_idx.first; i < row_range_idx.second; ++i) {
|
||||
MonotonicStopWatch watch;
|
||||
watch.start();
|
||||
auto row_id = rowids[i];
|
||||
TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet(
|
||||
row_id.tablet_id(), true /*include deleted*/);
|
||||
RowsetId rowset_id;
|
||||
rowset_id.init(row_id.rowset_id());
|
||||
if (!tablet) {
|
||||
continue;
|
||||
}
|
||||
BetaRowsetSharedPtr rowset =
|
||||
std::static_pointer_cast<BetaRowset>(tablet->get_rowset(rowset_id));
|
||||
if (!rowset) {
|
||||
LOG(INFO) << "no such rowset " << rowset_id;
|
||||
continue;
|
||||
}
|
||||
const TabletSchemaSPtr tablet_schema = rowset->tablet_schema();
|
||||
VLOG_DEBUG << "get tablet schema column_num:" << tablet_schema->num_columns()
|
||||
<< ", version:" << tablet_schema->schema_version()
|
||||
<< ", cost(us):" << watch.elapsed_time() / 1000;
|
||||
SegmentCacheHandle segment_cache;
|
||||
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true));
|
||||
// find segment
|
||||
auto it = std::find_if(segment_cache.get_segments().begin(),
|
||||
segment_cache.get_segments().end(),
|
||||
[&row_id](const segment_v2::SegmentSharedPtr& seg) {
|
||||
return seg->id() == row_id.segment_id();
|
||||
});
|
||||
if (it == segment_cache.get_segments().end()) {
|
||||
continue;
|
||||
}
|
||||
segment_v2::SegmentSharedPtr segment = *it;
|
||||
for (int x = 0; x < desc.slots().size() - 1; ++x) {
|
||||
int index = tablet_schema->field_index(desc.slots()[x]->col_unique_id());
|
||||
segment_v2::ColumnIterator* column_iterator = nullptr;
|
||||
vectorized::MutableColumnPtr column =
|
||||
sub_block->get_by_position(x).column->assume_mutable();
|
||||
if (index < 0) {
|
||||
column->insert_default();
|
||||
continue;
|
||||
} else {
|
||||
RETURN_IF_ERROR(segment->new_column_iterator(tablet_schema->column(index),
|
||||
&column_iterator));
|
||||
}
|
||||
std::unique_ptr<segment_v2::ColumnIterator> ptr_guard(column_iterator);
|
||||
segment_v2::ColumnIteratorOptions opt;
|
||||
OlapReaderStatistics stats;
|
||||
opt.file_reader = segment->file_reader().get();
|
||||
opt.stats = &stats;
|
||||
opt.use_page_cache = !config::disable_storage_page_cache;
|
||||
column_iterator->init(opt);
|
||||
std::vector<segment_v2::rowid_t> rowids {
|
||||
static_cast<segment_v2::rowid_t>(row_id.ordinal_id())};
|
||||
RETURN_IF_ERROR(column_iterator->read_by_rowids(rowids.data(), 1, column));
|
||||
}
|
||||
LOG_EVERY_N(INFO, 100) << "multiget_data single_row, cost(us):"
|
||||
<< watch.elapsed_time() / 1000;
|
||||
GlobalRowLoacation row_location(row_id.tablet_id(), rowset->rowset_id(),
|
||||
row_id.segment_id(), row_id.ordinal_id());
|
||||
sub_block->get_columns().back()->assume_mutable()->insert_data(
|
||||
reinterpret_cast<const char*>(&row_location), sizeof(GlobalRowLoacation));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PInternalServiceImpl::_multi_get(const PMultiGetRequest* request,
|
||||
PMultiGetResponse* response) {
|
||||
TupleDescriptor desc(request->desc());
|
||||
std::vector<SlotDescriptor> slots;
|
||||
slots.reserve(request->slots().size());
|
||||
for (const auto& pslot : request->slots()) {
|
||||
slots.push_back(SlotDescriptor(pslot));
|
||||
desc.add_slot(&slots.back());
|
||||
}
|
||||
assert(desc.slots().back()->col_name() == BeConsts::ROWID_COL);
|
||||
vectorized::Block block(desc.slots(), request->rowids().size());
|
||||
RETURN_IF_ERROR(
|
||||
read_by_rowids(std::pair {0, request->rowids_size()}, desc, request->rowids(), &block));
|
||||
std::vector<size_t> char_type_idx;
|
||||
for (size_t i = 0; i < desc.slots().size(); i++) {
|
||||
auto column_desc = desc.slots()[i];
|
||||
auto type_desc = column_desc->type();
|
||||
do {
|
||||
if (type_desc.type == TYPE_CHAR) {
|
||||
char_type_idx.emplace_back(i);
|
||||
break;
|
||||
} else if (type_desc.type != TYPE_ARRAY) {
|
||||
break;
|
||||
}
|
||||
// for Array<Char> or Array<Array<Char>>
|
||||
type_desc = type_desc.children[0];
|
||||
} while (true);
|
||||
}
|
||||
// shrink char_type suffix zero data
|
||||
block.shrink_char_type_column_suffix_zero(char_type_idx);
|
||||
VLOG_DEBUG << "dump block:" << block.dump_data(0, 10)
|
||||
<< ", be_exec_version:" << request->be_exec_version();
|
||||
|
||||
[[maybe_unused]] size_t compressed_size = 0;
|
||||
[[maybe_unused]] size_t uncompressed_size = 0;
|
||||
int be_exec_version = request->has_be_exec_version() ? request->be_exec_version() : 0;
|
||||
RETURN_IF_ERROR(block.serialize(be_exec_version, response->mutable_block(), &uncompressed_size,
|
||||
&compressed_size, segment_v2::CompressionTypePB::LZ4));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void PInternalServiceImpl::multiget_data(google::protobuf::RpcController* controller,
|
||||
const PMultiGetRequest* request,
|
||||
PMultiGetResponse* response,
|
||||
google::protobuf::Closure* done) {
|
||||
// multi get data by rowid
|
||||
MonotonicStopWatch watch;
|
||||
watch.start();
|
||||
brpc::ClosureGuard closure_guard(done);
|
||||
response->mutable_status()->set_status_code(0);
|
||||
Status st = _multi_get(request, response);
|
||||
st.to_protobuf(response->mutable_status());
|
||||
LOG(INFO) << "multiget_data finished, cost(us):" << watch.elapsed_time() / 1000;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -151,6 +151,8 @@ public:
|
||||
const PTabletWriteSlaveDoneRequest* request,
|
||||
PTabletWriteSlaveDoneResult* response,
|
||||
google::protobuf::Closure* done) override;
|
||||
void multiget_data(google::protobuf::RpcController* controller, const PMultiGetRequest* request,
|
||||
PMultiGetResponse* response, google::protobuf::Closure* done) override;
|
||||
|
||||
private:
|
||||
Status _exec_plan_fragment(const std::string& s_request, PFragmentRequestVersion version,
|
||||
@ -176,6 +178,7 @@ private:
|
||||
void _response_pull_slave_rowset(const std::string& remote_host, int64_t brpc_port,
|
||||
int64_t txn_id, int64_t tablet_id, int64_t node_id,
|
||||
bool is_succeed);
|
||||
Status _multi_get(const PMultiGetRequest* request, PMultiGetResponse* response);
|
||||
|
||||
private:
|
||||
ExecEnv* _exec_env;
|
||||
|
||||
@ -45,6 +45,9 @@ Status HeapSorter::append_block(Block* block) {
|
||||
int i = 0;
|
||||
const auto& convert_nullable_flags = _vsort_exec_exprs.get_convert_nullable_flags();
|
||||
for (auto column_id : valid_column_ids) {
|
||||
if (column_id < 0) {
|
||||
continue;
|
||||
}
|
||||
if (convert_nullable_flags[i]) {
|
||||
auto column_ptr = make_nullable(block->get_by_position(column_id).column);
|
||||
new_block.insert({column_ptr,
|
||||
|
||||
@ -247,6 +247,9 @@ Status Sorter::partial_sort(Block& src_block, Block& dest_block) {
|
||||
int i = 0;
|
||||
const auto& convert_nullable_flags = _vsort_exec_exprs.get_convert_nullable_flags();
|
||||
for (auto column_id : valid_column_ids) {
|
||||
if (column_id < 0) {
|
||||
continue;
|
||||
}
|
||||
if (convert_nullable_flags[i]) {
|
||||
auto column_ptr = make_nullable(src_block.get_by_position(column_id).column);
|
||||
new_block.insert(
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#pragma once
|
||||
#include <queue>
|
||||
|
||||
#include "common/consts.h"
|
||||
#include "common/status.h"
|
||||
#include "vec/common/sort/vsort_exec_exprs.h"
|
||||
#include "vec/core/block.h"
|
||||
@ -34,7 +35,11 @@ class MergeSorterState {
|
||||
public:
|
||||
MergeSorterState(const RowDescriptor& row_desc, int64_t offset, int64_t limit,
|
||||
RuntimeState* state, RuntimeProfile* profile)
|
||||
: unsorted_block_(new Block(VectorizedUtils::create_empty_block(row_desc))),
|
||||
// create_empty_block should ignore invalid slots, unsorted_block
|
||||
// should be same structure with arrival block from child node
|
||||
// since block from child node may ignored these slots
|
||||
: unsorted_block_(new Block(
|
||||
VectorizedUtils::create_empty_block(row_desc, true /*ignore invalid slot*/))),
|
||||
offset_(offset),
|
||||
limit_(limit),
|
||||
profile_(profile) {
|
||||
|
||||
@ -54,8 +54,12 @@ Block::Block(const ColumnsWithTypeAndName& data_) : data {data_} {
|
||||
initialize_index_by_name();
|
||||
}
|
||||
|
||||
Block::Block(const std::vector<SlotDescriptor*>& slots, size_t block_size) {
|
||||
Block::Block(const std::vector<SlotDescriptor*>& slots, size_t block_size,
|
||||
bool ignore_trivial_slot) {
|
||||
for (const auto slot_desc : slots) {
|
||||
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
|
||||
continue;
|
||||
}
|
||||
auto column_ptr = slot_desc->get_empty_mutable_column();
|
||||
column_ptr->reserve(block_size);
|
||||
insert(ColumnWithTypeAndName(std::move(column_ptr), slot_desc->get_data_type_ptr(),
|
||||
@ -919,9 +923,13 @@ void Block::deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor
|
||||
}
|
||||
}
|
||||
|
||||
MutableBlock::MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs, int reserve_size) {
|
||||
MutableBlock::MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs, int reserve_size,
|
||||
bool ignore_trivial_slot) {
|
||||
for (auto tuple_desc : tuple_descs) {
|
||||
for (auto slot_desc : tuple_desc->slots()) {
|
||||
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
|
||||
continue;
|
||||
}
|
||||
_data_types.emplace_back(slot_desc->get_data_type_ptr());
|
||||
_columns.emplace_back(_data_types.back()->create_column());
|
||||
if (reserve_size != 0) {
|
||||
|
||||
@ -74,7 +74,8 @@ public:
|
||||
Block(std::initializer_list<ColumnWithTypeAndName> il);
|
||||
Block(const ColumnsWithTypeAndName& data_);
|
||||
Block(const PBlock& pblock);
|
||||
Block(const std::vector<SlotDescriptor*>& slots, size_t block_size);
|
||||
Block(const std::vector<SlotDescriptor*>& slots, size_t block_size,
|
||||
bool ignore_trivial_slot = false);
|
||||
|
||||
/// insert the column at the specified position
|
||||
void insert(size_t position, const ColumnWithTypeAndName& elem);
|
||||
@ -391,7 +392,8 @@ public:
|
||||
MutableBlock() = default;
|
||||
~MutableBlock() = default;
|
||||
|
||||
MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs, int reserve_size = 0);
|
||||
MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs, int reserve_size = 0,
|
||||
bool igore_trivial_slot = false);
|
||||
|
||||
MutableBlock(Block* block)
|
||||
: _columns(block->mutate_columns()), _data_types(block->get_data_types()) {}
|
||||
|
||||
@ -91,6 +91,22 @@ Status NewOlapScanner::prepare(const TPaloScanRange& scan_range,
|
||||
_tablet_schema->append_column(TabletColumn(column_desc));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
if (_output_tuple_desc->slots().back()->col_name() == BeConsts::ROWID_COL) {
|
||||
// inject ROWID_COL
|
||||
TabletColumn rowid_column;
|
||||
rowid_column.set_is_nullable(false);
|
||||
rowid_column.set_name(BeConsts::ROWID_COL);
|
||||
// avoid column reader init error
|
||||
rowid_column.set_has_default_value(true);
|
||||
// fake unique id
|
||||
rowid_column.set_unique_id(INT32_MAX);
|
||||
rowid_column.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
|
||||
_tablet_schema->append_column(rowid_column);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::shared_lock rdlock(_tablet->get_header_lock());
|
||||
const RowsetSharedPtr rowset = _tablet->rowset_with_max_version();
|
||||
@ -333,7 +349,9 @@ Status NewOlapScanner::_init_return_columns() {
|
||||
if (!slot->is_materialized()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!slot->need_materialize()) {
|
||||
continue;
|
||||
}
|
||||
int32_t index = slot->col_unique_id() >= 0
|
||||
? _tablet_schema->field_index(slot->col_unique_id())
|
||||
: _tablet_schema->field_index(slot->col_name());
|
||||
|
||||
@ -55,7 +55,8 @@ Status ScannerContext::init() {
|
||||
// So use _output_tuple_desc;
|
||||
int64_t free_blocks_memory_usage = 0;
|
||||
for (int i = 0; i < pre_alloc_block_count; ++i) {
|
||||
auto block = new vectorized::Block(_output_tuple_desc->slots(), real_block_size);
|
||||
auto block = new vectorized::Block(_output_tuple_desc->slots(), real_block_size,
|
||||
true /*ignore invalid slots*/);
|
||||
free_blocks_memory_usage += block->allocated_bytes();
|
||||
_free_blocks.emplace_back(block);
|
||||
}
|
||||
@ -93,7 +94,8 @@ vectorized::Block* ScannerContext::get_free_block(bool* get_free_block) {
|
||||
*get_free_block = false;
|
||||
|
||||
COUNTER_UPDATE(_parent->_newly_create_free_blocks_num, 1);
|
||||
return new vectorized::Block(_real_tuple_desc->slots(), _state->batch_size());
|
||||
return new vectorized::Block(_real_tuple_desc->slots(), _state->batch_size(),
|
||||
true /*ignore invalid slots*/);
|
||||
}
|
||||
|
||||
void ScannerContext::return_free_block(vectorized::Block* block) {
|
||||
|
||||
@ -40,6 +40,10 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) {
|
||||
int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num;
|
||||
if (!block->mem_reuse()) {
|
||||
for (const auto slot_desc : _output_tuple_desc->slots()) {
|
||||
if (!slot_desc->need_materialize()) {
|
||||
// should be ignore from reading
|
||||
continue;
|
||||
}
|
||||
block->insert(ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(),
|
||||
slot_desc->get_data_type_ptr(),
|
||||
slot_desc->col_name()));
|
||||
@ -80,8 +84,7 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) {
|
||||
|
||||
Status VScanner::_filter_output_block(Block* block) {
|
||||
auto old_rows = block->rows();
|
||||
Status st =
|
||||
VExprContext::filter_block(_vconjunct_ctx, block, _output_tuple_desc->slots().size());
|
||||
Status st = VExprContext::filter_block(_vconjunct_ctx, block, block->columns());
|
||||
_counter.num_rows_unselected += old_rows - block->rows();
|
||||
return st;
|
||||
}
|
||||
|
||||
@ -17,12 +17,15 @@
|
||||
|
||||
#include "vec/exec/vexchange_node.h"
|
||||
|
||||
#include "common/consts.h"
|
||||
#include "exec/rowid_fetcher.h"
|
||||
#include "pipeline/exec/exchange_source_operator.h"
|
||||
#include "pipeline/pipeline.h"
|
||||
#include "pipeline/pipeline_fragment_context.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/thread_context.h"
|
||||
#include "util/defer_op.h"
|
||||
#include "vec/runtime/vdata_stream_mgr.h"
|
||||
#include "vec/runtime/vdata_stream_recvr.h"
|
||||
|
||||
@ -45,10 +48,15 @@ Status VExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) {
|
||||
if (!_is_merging) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.exchange_node.sort_info, _pool));
|
||||
_is_asc_order = tnode.exchange_node.sort_info.is_asc_order;
|
||||
_nulls_first = tnode.exchange_node.sort_info.nulls_first;
|
||||
|
||||
if (tnode.exchange_node.__isset.nodes_info) {
|
||||
_nodes_info = _pool->add(new DorisNodesInfo(tnode.exchange_node.nodes_info));
|
||||
}
|
||||
_use_two_phase_read = tnode.exchange_node.sort_info.__isset.use_two_phase_read &&
|
||||
tnode.exchange_node.sort_info.use_two_phase_read;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -87,6 +95,19 @@ Status VExchangeNode::open(RuntimeState* state) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VExchangeNode::_second_phase_fetch_data(RuntimeState* state, Block* final_block) {
|
||||
auto row_id_col = final_block->get_by_position(final_block->columns() - 1);
|
||||
auto tuple_desc = _row_descriptor.tuple_descriptors()[0];
|
||||
RowIDFetcher id_fetcher(tuple_desc, state);
|
||||
RETURN_IF_ERROR(id_fetcher.init(_nodes_info));
|
||||
MutableBlock materialized_block(_row_descriptor.tuple_descriptors(), final_block->rows());
|
||||
// fetch will sort block by sequence of ROWID_COL
|
||||
RETURN_IF_ERROR(id_fetcher.fetch(row_id_col.column, &materialized_block));
|
||||
// Notice swap may change the structure of final_block
|
||||
final_block->swap(materialized_block.to_block());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VExchangeNode::get_next(RuntimeState* state, Block* block, bool* eos) {
|
||||
INIT_AND_SCOPE_GET_NEXT_SPAN(state->get_tracer(), _get_next_span, "VExchangeNode::get_next");
|
||||
SCOPED_TIMER(runtime_profile()->total_time_counter());
|
||||
@ -97,6 +118,12 @@ Status VExchangeNode::get_next(RuntimeState* state, Block* block, bool* eos) {
|
||||
_is_ready = true;
|
||||
return Status::OK();
|
||||
}
|
||||
if (_use_two_phase_read) {
|
||||
// Block structure may be changed by calling _second_phase_fetch_data() before.
|
||||
// So we should clear block before _stream_recvr->get_next, since
|
||||
// blocks in VSortedRunMerger may not compatible with this block.
|
||||
block->clear();
|
||||
}
|
||||
auto status = _stream_recvr->get_next(block, eos);
|
||||
if (block != nullptr) {
|
||||
if (!_is_merging) {
|
||||
@ -119,6 +146,9 @@ Status VExchangeNode::get_next(RuntimeState* state, Block* block, bool* eos) {
|
||||
}
|
||||
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
|
||||
}
|
||||
if (_use_two_phase_read && block->rows() > 0) {
|
||||
RETURN_IF_ERROR(_second_phase_fetch_data(state, block));
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@ -20,6 +20,8 @@
|
||||
#include <memory>
|
||||
|
||||
#include "exec/exec_node.h"
|
||||
#include "exec/tablet_info.h" // DorisNodesInfo
|
||||
#include "runtime/descriptors.h"
|
||||
#include "vec/common/sort/vsort_exec_exprs.h"
|
||||
|
||||
namespace doris {
|
||||
@ -47,6 +49,9 @@ public:
|
||||
// Status collect_query_statistics(QueryStatistics* statistics) override;
|
||||
void set_num_senders(int num_senders) { _num_senders = num_senders; }
|
||||
|
||||
// final materializtion, used only in topn node
|
||||
Status _second_phase_fetch_data(RuntimeState* state, Block* final_block);
|
||||
|
||||
private:
|
||||
int _num_senders;
|
||||
bool _is_merging;
|
||||
@ -61,6 +66,10 @@ private:
|
||||
VSortExecExprs _vsort_exec_exprs;
|
||||
std::vector<bool> _is_asc_order;
|
||||
std::vector<bool> _nulls_first;
|
||||
|
||||
// for fetch data by rowids
|
||||
DorisNodesInfo* _nodes_info = nullptr;
|
||||
bool _use_two_phase_read = false;
|
||||
};
|
||||
} // namespace vectorized
|
||||
} // namespace doris
|
||||
|
||||
@ -82,7 +82,6 @@ Status VSortNode::init(const TPlanNode& tnode, RuntimeState* state) {
|
||||
}
|
||||
|
||||
_sorter->init_profile(_runtime_profile.get());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -127,7 +126,6 @@ Status VSortNode::sink(RuntimeState* state, vectorized::Block* input_block, bool
|
||||
old_top = std::move(new_top);
|
||||
}
|
||||
}
|
||||
|
||||
if (!_reuse_mem) {
|
||||
input_block->clear();
|
||||
}
|
||||
|
||||
@ -50,12 +50,17 @@ Status VSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor&
|
||||
if (slot_desc == nullptr) {
|
||||
return Status::InternalError("couldn't resolve slot descriptor {}", _slot_id);
|
||||
}
|
||||
_column_name = &slot_desc->col_name();
|
||||
if (!slot_desc->need_materialize()) {
|
||||
// slot should be ignored manually
|
||||
_column_id = -1;
|
||||
return Status::OK();
|
||||
}
|
||||
_column_id = desc.get_column_id(_slot_id);
|
||||
if (_column_id < 0) {
|
||||
LOG(INFO) << "VSlotRef - invalid slot id: " << _slot_id << " desc:" << desc.debug_string();
|
||||
return Status::InternalError("VSlotRef - invalid slot id {}", _slot_id);
|
||||
}
|
||||
_column_name = &slot_desc->col_name();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -34,10 +34,14 @@ public:
|
||||
return create_columns_with_type_and_name(row_desc);
|
||||
}
|
||||
|
||||
static ColumnsWithTypeAndName create_columns_with_type_and_name(const RowDescriptor& row_desc) {
|
||||
static ColumnsWithTypeAndName create_columns_with_type_and_name(
|
||||
const RowDescriptor& row_desc, bool ignore_trivial_slot = false) {
|
||||
ColumnsWithTypeAndName columns_with_type_and_name;
|
||||
for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
|
||||
for (const auto& slot_desc : tuple_desc->slots()) {
|
||||
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
|
||||
continue;
|
||||
}
|
||||
columns_with_type_and_name.emplace_back(nullptr, slot_desc->get_data_type_ptr(),
|
||||
slot_desc->col_name());
|
||||
}
|
||||
@ -45,10 +49,14 @@ public:
|
||||
return columns_with_type_and_name;
|
||||
}
|
||||
|
||||
static ColumnsWithTypeAndName create_empty_block(const RowDescriptor& row_desc) {
|
||||
static ColumnsWithTypeAndName create_empty_block(const RowDescriptor& row_desc,
|
||||
bool ignore_trivial_slot = false) {
|
||||
ColumnsWithTypeAndName columns_with_type_and_name;
|
||||
for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
|
||||
for (const auto& slot_desc : tuple_desc->slots()) {
|
||||
if (ignore_trivial_slot && !slot_desc->need_materialize()) {
|
||||
continue;
|
||||
}
|
||||
columns_with_type_and_name.emplace_back(
|
||||
slot_desc->get_data_type_ptr()->create_column(),
|
||||
slot_desc->get_data_type_ptr(), slot_desc->col_name());
|
||||
|
||||
Reference in New Issue
Block a user