Refactoring the filtering conditions in the current ExecNode from an expression tree to an array can simplify the process of adding runtime filters. It eliminates the need for complex merge operations and removes the requirement for the frontend to combine expressions into a single entity. By representing the filtering conditions as an array, each condition can be treated individually, making it easier to add runtime filters without the need for complex merging logic. The array can store the individual conditions, and the runtime filter logic can iterate through the array to apply the filters as needed. This refactoring simplifies the codebase, improves readability, and reduces the complexity associated with handling filtering conditions and adding runtime filters. It separates the conditions into discrete entities, enabling more straightforward manipulation and management within the execution node.
237 lines
8.1 KiB
C++
237 lines
8.1 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "vec/exec/scan/new_odbc_scanner.h"
|
|
|
|
#include <gen_cpp/PlanNodes_types.h>
|
|
#include <sql.h>
|
|
|
|
#include <algorithm>
|
|
#include <new>
|
|
#include <ostream>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "common/logging.h"
|
|
#include "common/status.h"
|
|
#include "runtime/descriptors.h"
|
|
#include "runtime/runtime_state.h"
|
|
#include "runtime/types.h"
|
|
#include "vec/columns/column.h"
|
|
#include "vec/core/block.h"
|
|
#include "vec/core/column_with_type_and_name.h"
|
|
#include "vec/data_types/data_type.h"
|
|
#include "vec/exec/scan/new_odbc_scan_node.h"
|
|
#include "vec/exec/scan/vscanner.h"
|
|
#include "vec/exprs/vexpr_context.h"
|
|
|
|
namespace doris {
|
|
class RuntimeProfile;
|
|
namespace vectorized {
|
|
class VScanNode;
|
|
} // namespace vectorized
|
|
} // namespace doris
|
|
|
|
static const std::string NEW_SCANNER_TYPE = "NewOdbcScanner";
|
|
|
|
namespace doris::vectorized {
|
|
NewOdbcScanner::NewOdbcScanner(RuntimeState* state, NewOdbcScanNode* parent, int64_t limit,
|
|
const TOdbcScanNode& odbc_scan_node, RuntimeProfile* profile)
|
|
: VScanner(state, static_cast<VScanNode*>(parent), limit, profile),
|
|
_is_init(false),
|
|
_odbc_eof(false),
|
|
_table_name(odbc_scan_node.table_name),
|
|
_connect_string(odbc_scan_node.connect_string),
|
|
_query_string(odbc_scan_node.query_string),
|
|
_tuple_id(odbc_scan_node.tuple_id),
|
|
_tuple_desc(nullptr) {}
|
|
|
|
Status NewOdbcScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) {
|
|
VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare";
|
|
RETURN_IF_ERROR(VScanner::prepare(state, conjuncts));
|
|
|
|
if (_is_init) {
|
|
return Status::OK();
|
|
}
|
|
|
|
if (nullptr == state) {
|
|
return Status::InternalError("input pointer is null.");
|
|
}
|
|
|
|
// get tuple desc
|
|
_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
|
|
|
|
if (nullptr == _tuple_desc) {
|
|
return Status::InternalError("Failed to get tuple descriptor.");
|
|
}
|
|
|
|
_odbc_param.connect_string = std::move(_connect_string);
|
|
_odbc_param.query_string = std::move(_query_string);
|
|
_odbc_param.tuple_desc = _tuple_desc;
|
|
|
|
_odbc_connector.reset(new (std::nothrow) ODBCConnector(_odbc_param));
|
|
|
|
if (_odbc_connector == nullptr) {
|
|
return Status::InternalError("new a odbc scanner failed.");
|
|
}
|
|
|
|
_text_converter.reset(new (std::nothrow) TextConverter('\\'));
|
|
|
|
if (_text_converter == nullptr) {
|
|
return Status::InternalError("new a text convertor failed.");
|
|
}
|
|
|
|
_is_init = true;
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status NewOdbcScanner::open(RuntimeState* state) {
|
|
VLOG_CRITICAL << NEW_SCANNER_TYPE << "::open";
|
|
|
|
if (nullptr == state) {
|
|
return Status::InternalError("input pointer is null.");
|
|
}
|
|
|
|
if (!_is_init) {
|
|
return Status::InternalError("used before initialize.");
|
|
}
|
|
|
|
RETURN_IF_CANCELLED(state);
|
|
RETURN_IF_ERROR(VScanner::open(state));
|
|
RETURN_IF_ERROR(_odbc_connector->open(state));
|
|
RETURN_IF_ERROR(_odbc_connector->query());
|
|
// check materialize slot num
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status NewOdbcScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof) {
|
|
VLOG_CRITICAL << NEW_SCANNER_TYPE << "::_get_block_impl";
|
|
|
|
if (nullptr == state || nullptr == block || nullptr == eof) {
|
|
return Status::InternalError("input is NULL pointer");
|
|
}
|
|
|
|
if (!_is_init) {
|
|
return Status::InternalError("used before initialize.");
|
|
}
|
|
RETURN_IF_CANCELLED(state);
|
|
|
|
if (_odbc_eof == true) {
|
|
*eof = true;
|
|
return Status::OK();
|
|
}
|
|
|
|
auto column_size = _tuple_desc->slots().size();
|
|
std::vector<MutableColumnPtr> columns(column_size);
|
|
|
|
bool mem_reuse = block->mem_reuse();
|
|
// only empty block should be here
|
|
DCHECK(block->rows() == 0);
|
|
|
|
do {
|
|
RETURN_IF_CANCELLED(state);
|
|
|
|
columns.resize(column_size);
|
|
for (auto i = 0; i < column_size; i++) {
|
|
if (mem_reuse) {
|
|
columns[i] = std::move(*block->get_by_position(i).column).mutate();
|
|
} else {
|
|
columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
|
|
}
|
|
}
|
|
|
|
while (true) {
|
|
// block is full, break
|
|
if (state->batch_size() <= columns[0]->size()) {
|
|
break;
|
|
}
|
|
|
|
RETURN_IF_ERROR(_odbc_connector->get_next_row(&_odbc_eof));
|
|
|
|
if (_odbc_eof == true) {
|
|
if (block->rows() == 0) {
|
|
*eof = true;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Read one row from reader
|
|
for (int column_index = 0, materialized_column_index = 0; column_index < column_size;
|
|
++column_index) {
|
|
auto slot_desc = _tuple_desc->slots()[column_index];
|
|
// because the fe planner filter the non_materialize column
|
|
if (!slot_desc->is_materialized()) {
|
|
continue;
|
|
}
|
|
const auto& column_data =
|
|
_odbc_connector->get_column_data(materialized_column_index);
|
|
|
|
char* value_data = static_cast<char*>(column_data.target_value_ptr);
|
|
int value_len = column_data.strlen_or_ind;
|
|
|
|
if (value_len == SQL_NULL_DATA) {
|
|
if (slot_desc->is_nullable()) {
|
|
columns[column_index]->insert_default();
|
|
} else {
|
|
return Status::InternalError(
|
|
"nonnull column contains nullptr. table={}, column={}", _table_name,
|
|
slot_desc->col_name());
|
|
}
|
|
} else if (value_len > column_data.buffer_length) {
|
|
return Status::InternalError(
|
|
"column value length longer than buffer length. "
|
|
"table={}, column={}, buffer_length",
|
|
_table_name, slot_desc->col_name(), column_data.buffer_length);
|
|
} else {
|
|
if (!_text_converter->write_column(slot_desc, &columns[column_index],
|
|
value_data, value_len, true, false)) {
|
|
std::stringstream ss;
|
|
ss << "Fail to convert odbc value:'" << value_data << "' to "
|
|
<< slot_desc->type() << " on column:`" << slot_desc->col_name() + "`";
|
|
return Status::InternalError(ss.str());
|
|
}
|
|
}
|
|
materialized_column_index++;
|
|
}
|
|
}
|
|
|
|
// Before really use the Block, must clear other ptr of column in block
|
|
// So here need do std::move and clear in `columns`
|
|
if (!mem_reuse) {
|
|
int column_index = 0;
|
|
for (const auto slot_desc : _tuple_desc->slots()) {
|
|
block->insert(ColumnWithTypeAndName(std::move(columns[column_index++]),
|
|
slot_desc->get_data_type_ptr(),
|
|
slot_desc->col_name()));
|
|
}
|
|
} else {
|
|
columns.clear();
|
|
}
|
|
VLOG_ROW << "NewOdbcScanner output rows: " << block->rows();
|
|
} while (block->rows() == 0 && !(*eof));
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status NewOdbcScanner::close(RuntimeState* state) {
|
|
RETURN_IF_ERROR(VScanner::close(state));
|
|
return Status::OK();
|
|
}
|
|
} // namespace doris::vectorized
|