[bug](scanner) Improve limit query performance on olapScannode and avoid infinite loop (#11301)

1. Fix a bug that query large column table may cause infinite loop 2. Optimize the query logic with limit, for the case where the limit value is relatively small, reduce the parallelism of the scanner, reduce unnecessary resource consumption, and increase the number of similar queries that the system can carry at the same time, and increase the query speed by more than 60%
2022-08-01 13:50:12 +08:00
parent b35daf0a04
commit 4f5e1601df
8 changed files with 104 additions and 36 deletions
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@ -95,7 +95,8 @@ Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) {

        _runtime_filter_ctxs[i].runtimefilter = runtime_filter;
    }
-
+    _batch_size = _limit == -1 ? state->batch_size()
+                               : std::min(static_cast<int64_t>(state->batch_size()), _limit);
    return Status::OK();
 }

@ -273,7 +274,6 @@ Status OlapScanNode::open(RuntimeState* state) {
 Status OlapScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
    SCOPED_TIMER(_runtime_profile->total_time_counter());
    SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
-
    // check if Canceled.
    if (state->is_cancelled()) {
        std::unique_lock<std::mutex> l(_row_batches_lock);
@ -939,6 +939,7 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) {
            OlapScanner* scanner =
                    new OlapScanner(state, this, _olap_scan_node.is_preaggregation,
                                    _need_agg_finalize, *scan_range, _scanner_mem_tracker.get());
+            scanner->set_batch_size(_batch_size);
            // add scanner to pool before doing prepare.
            // so that scanner can be automatically deconstructed if prepare failed.
            _scanner_pool.add(scanner);
@ -1489,7 +1490,12 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
        }
    }

-    ThreadPoolToken* thread_token = state->get_query_fragments_ctx()->get_token();
+    ThreadPoolToken* thread_token = nullptr;
+    if (limit() != -1 && limit() < 1024) {
+        thread_token = state->get_query_fragments_ctx()->get_serial_token();
+    } else {
+        thread_token = state->get_query_fragments_ctx()->get_token();
+    }

    /*********************************
     * The basic strategy of priority scheduling:
@ -1739,7 +1745,7 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) {
                       << ", fragment id=" << print_id(_runtime_state->fragment_instance_id());
            break;
        }
-        RowBatch* row_batch = new RowBatch(this->row_desc(), state->batch_size());
+        RowBatch* row_batch = new RowBatch(this->row_desc(), _batch_size);
        row_batch->set_scanner_id(scanner->id());
        status = scanner->get_batch(_runtime_state, row_batch, &eos);
        if (!status.ok()) {
@ -1757,6 +1763,10 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) {
            raw_bytes_read += row_batch->tuple_data_pool()->total_reserved_bytes();
        }
        raw_rows_read = scanner->raw_rows_read();
+        if (limit() != -1 && raw_rows_read >= limit()) {
+            eos = true;
+            break;
+        }
    }

    {
@ -1775,6 +1785,7 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) {
            std::lock_guard<SpinLock> guard(_status_mutex);
            global_status_ok = _status.ok();
        }
+
        if (UNLIKELY(!global_status_ok)) {
            eos = true;
            for (auto rb : row_batchs) {