Add counters to OlapScanNode (#538)

There is unnegligible cost to covnert VectorRowBatch to RowBatch,
When we seek block, we only read one row from engine to minimize
this convert cost.

This patch can optimize some query's time from 5s to 2s
This commit is contained in:
ZHAO Chun
2019-01-16 18:57:04 +08:00
committed by GitHub
parent 79dc521893
commit e8360f5eee
7 changed files with 22 additions and 6 deletions

View File

@ -109,6 +109,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
#endif
ADD_TIMER(_runtime_profile, "ShowHintsTime");
_reader_init_timer = ADD_TIMER(_runtime_profile, "ReaderInitTime");
_read_compressed_counter =
ADD_COUNTER(_runtime_profile, "CompressedBytesRead", TUnit::BYTES);
_read_uncompressed_counter =
@ -120,6 +121,8 @@ void OlapScanNode::_init_counter(RuntimeState* state) {
ADD_TIMER(_runtime_profile, "BlockFetchTime");
_raw_rows_counter =
ADD_COUNTER(_runtime_profile, "RawRowsRead", TUnit::UNIT);
_block_convert_timer = ADD_TIMER(_runtime_profile, "BlockConvertTime");
_block_seek_timer = ADD_TIMER(_runtime_profile, "BlockSeekTime");
_rows_vec_cond_counter =
ADD_COUNTER(_runtime_profile, "RowsVectorPredFiltered", TUnit::UNIT);

View File

@ -245,6 +245,7 @@ private:
RuntimeProfile::Counter* _scan_timer;
RuntimeProfile::Counter* _tablet_counter;
RuntimeProfile::Counter* _rows_pushed_cond_filtered_counter = nullptr;
RuntimeProfile::Counter* _reader_init_timer = nullptr;
TResourceInfo* _resource_info;
@ -265,6 +266,8 @@ private:
RuntimeProfile::Counter* _stats_filtered_counter = nullptr;
RuntimeProfile::Counter* _del_filtered_counter = nullptr;
RuntimeProfile::Counter* _block_seek_timer = nullptr;
RuntimeProfile::Counter* _block_convert_timer = nullptr;
RuntimeProfile::Counter* _block_load_timer = nullptr;
RuntimeProfile::Counter* _block_load_counter = nullptr;
RuntimeProfile::Counter* _block_fetch_timer = nullptr;

View File

@ -121,6 +121,7 @@ Status OlapScanner::_prepare(
Status OlapScanner::open() {
RETURN_IF_ERROR(_ctor_status);
SCOPED_TIMER(_parent->_reader_init_timer);
if (_conjunct_ctxs.size() > _direct_conjunct_size) {
_use_pushdown_conjuncts = true;
@ -430,6 +431,8 @@ void OlapScanner::update_counter() {
COUNTER_UPDATE(_parent->_block_load_timer, _reader->stats().block_load_ns);
COUNTER_UPDATE(_parent->_block_load_counter, _reader->stats().blocks_load);
COUNTER_UPDATE(_parent->_block_fetch_timer, _reader->stats().block_fetch_ns);
COUNTER_UPDATE(_parent->_block_seek_timer, _reader->stats().block_seek_ns);
COUNTER_UPDATE(_parent->_block_convert_timer, _reader->stats().block_convert_ns);
COUNTER_UPDATE(_parent->_raw_rows_counter, _reader->stats().raw_rows_read);
// COUNTER_UPDATE(_parent->_filtered_rows_counter, _reader->stats().num_rows_filtered);

View File

@ -344,7 +344,7 @@ const RowCursor* ColumnData::seek_and_get_current_row(const RowBlockPosition& po
<< ", segment:" << position.segment << ", block:" << position.data_offset;
return nullptr;
}
res = _get_block(true);
res = _get_block(true, 1);
if (res != OLAP_SUCCESS) {
LOG(WARNING) << "Fail to get block in seek_and_get_current_row, res=" << res
<< ", segment:" << position.segment << ", block:" << position.data_offset;
@ -634,7 +634,7 @@ OLAPStatus ColumnData::_schema_change_init() {
}
OLAPStatus ColumnData::_get_block_from_reader(
VectorizedRowBatch** got_batch, bool without_filter) {
VectorizedRowBatch** got_batch, bool without_filter, int rows_read) {
VectorizedRowBatch* vec_batch = nullptr;
if (_is_normal_read) {
vec_batch = _read_vector_batch.get();
@ -652,6 +652,9 @@ OLAPStatus ColumnData::_get_block_from_reader(
<< ", _segment_eof:" << _segment_eof;
#endif
vec_batch->clear();
if (rows_read > 0) {
vec_batch->set_limit(rows_read);
}
// If we are going to read last block, we need to set batch limit to the end of key
// if without_filter is true and _end_key_is_set is true, this must seek to start row's
// block, we must load the entire block.
@ -689,10 +692,10 @@ OLAPStatus ColumnData::_get_block_from_reader(
return OLAP_SUCCESS;
}
OLAPStatus ColumnData::_get_block(bool without_filter) {
OLAPStatus ColumnData::_get_block(bool without_filter, int rows_read) {
do {
VectorizedRowBatch* vec_batch = nullptr;
auto res = _get_block_from_reader(&vec_batch, without_filter);
auto res = _get_block_from_reader(&vec_batch, without_filter, rows_read);
if (res != OLAP_SUCCESS) {
return res;
}
@ -709,6 +712,7 @@ OLAPStatus ColumnData::_get_block(bool without_filter) {
if (vec_batch->size() == 0) {
continue;
}
SCOPED_RAW_TIMER(&_stats->block_convert_ns);
// when reach here, we have already read a block successfully
_read_block->clear();
vec_batch->dump_to_row_block(_read_block.get());

View File

@ -149,10 +149,10 @@ private:
// get block from reader, just read vector batch from _current_segment.
// The read batch return by got_batch.
OLAPStatus _get_block_from_reader(
VectorizedRowBatch** got_batch, bool without_filter);
VectorizedRowBatch** got_batch, bool without_filter, int rows_read);
// get block from segment reader. If this function returns OLAP_SUCCESS
OLAPStatus _get_block(bool without_filter);
OLAPStatus _get_block(bool without_filter, int rows_read = 0);
const RowCursor* _current_row() {
_read_block->get_row(_read_block->pos(), &_cursor);

View File

@ -222,6 +222,8 @@ struct OlapReaderStatistics {
int64_t block_load_ns = 0;
int64_t blocks_load = 0;
int64_t block_fetch_ns = 0;
int64_t block_seek_ns = 0;
int64_t block_convert_ns = 0;
int64_t raw_rows_read = 0;

View File

@ -846,6 +846,7 @@ OLAPStatus SegmentReader::_seek_to_block_directly(
// no need to execute seek
return OLAP_SUCCESS;
}
SCOPED_RAW_TIMER(&_stats->block_seek_ns);
for (auto cid : cids) {
// If column is added through schema change, column index may not exist because of
// linked schema change. So we need to ignore this column's seek