[Refactor](inverted index) refactor inverted index compound predicates evaluate logic #38908 (#41385)
cherry pick from #38908
This commit is contained in:
@ -24,7 +24,63 @@
|
||||
#include "util/debug_points.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
Status FunctionMatchBase::evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
|
||||
DCHECK(arguments.size() == 1);
|
||||
DCHECK(data_type_with_names.size() == 1);
|
||||
DCHECK(iterators.size() == 1);
|
||||
auto* iter = iterators[0];
|
||||
auto data_type_with_name = data_type_with_names[0];
|
||||
if (iter == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
const std::string& function_name = get_name();
|
||||
|
||||
if (function_name == MATCH_PHRASE_FUNCTION || function_name == MATCH_PHRASE_PREFIX_FUNCTION ||
|
||||
function_name == MATCH_PHRASE_EDGE_FUNCTION) {
|
||||
if (iter->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT &&
|
||||
get_parser_phrase_support_string_from_properties(iter->get_index_properties()) ==
|
||||
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"phrase queries require setting support_phrase = true");
|
||||
}
|
||||
}
|
||||
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
|
||||
Field param_value;
|
||||
arguments[0].column->get(0, param_value);
|
||||
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
|
||||
if (!is_string_type(param_type)) {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"arguments for match must be string");
|
||||
}
|
||||
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
|
||||
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
|
||||
param_type, ¶m_value, query_param));
|
||||
if (is_string_type(param_type)) {
|
||||
auto inverted_index_query_type = get_query_type_from_fn_name();
|
||||
RETURN_IF_ERROR(
|
||||
iter->read_from_inverted_index(data_type_with_name.first, query_param->get_value(),
|
||||
inverted_index_query_type, num_rows, roaring));
|
||||
} else {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"invalid params type for FunctionMatchBase::evaluate_inverted_index {}",
|
||||
param_type);
|
||||
}
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
|
||||
if (iter->has_null()) {
|
||||
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
|
||||
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
|
||||
null_bitmap = null_bitmap_cache_handle.get_bitmap();
|
||||
}
|
||||
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
|
||||
bitmap_result = result;
|
||||
bitmap_result.mask_out_null();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
|
||||
const ColumnNumbers& arguments, size_t result,
|
||||
size_t input_rows_count) const {
|
||||
@ -32,78 +88,70 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
|
||||
DataTypePtr& type_ptr = block.get_by_position(arguments[1]).type;
|
||||
auto match_query_str = type_ptr->to_string(*column_ptr, 0);
|
||||
std::string column_name = block.get_by_position(arguments[0]).name;
|
||||
auto match_pred_column_name =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str;
|
||||
if (!block.has(match_pred_column_name)) {
|
||||
VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name
|
||||
<< ", match_query_str=" << match_query_str;
|
||||
InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
if (inverted_index_ctx == nullptr) {
|
||||
inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
}
|
||||
|
||||
const ColumnPtr source_col =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
const auto* values = check_and_get_column<ColumnString>(source_col.get());
|
||||
const ColumnArray* array_col = nullptr;
|
||||
if (source_col->is_column_array()) {
|
||||
if (source_col->is_nullable()) {
|
||||
auto* nullable = check_and_get_column<ColumnNullable>(source_col.get());
|
||||
array_col = check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
|
||||
} else {
|
||||
array_col = check_and_get_column<ColumnArray>(source_col.get());
|
||||
}
|
||||
if (array_col && !array_col->get_data().is_column_string()) {
|
||||
return Status::NotSupported(
|
||||
fmt::format("unsupported nested array of type {} for function {}",
|
||||
is_column_nullable(array_col->get_data())
|
||||
? array_col->get_data().get_name()
|
||||
: array_col->get_data().get_family_name(),
|
||||
get_name()));
|
||||
}
|
||||
|
||||
if (is_column_nullable(array_col->get_data())) {
|
||||
const auto& array_nested_null_column =
|
||||
reinterpret_cast<const ColumnNullable&>(array_col->get_data());
|
||||
values = check_and_get_column<ColumnString>(
|
||||
*(array_nested_null_column.get_nested_column_ptr()));
|
||||
} else {
|
||||
// array column element is always set Nullable for now.
|
||||
values = check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
|
||||
}
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(source_col.get())) {
|
||||
// match null
|
||||
if (type_ptr->is_nullable()) {
|
||||
if (column_ptr->only_null()) {
|
||||
block.get_by_position(result).column = nullable->get_null_map_column_ptr();
|
||||
return Status::OK();
|
||||
}
|
||||
} else {
|
||||
values = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
|
||||
}
|
||||
}
|
||||
|
||||
if (!values) {
|
||||
LOG(WARNING) << "Illegal column " << source_col->get_name();
|
||||
return Status::InternalError("Not supported input column types");
|
||||
}
|
||||
// result column
|
||||
auto res = ColumnUInt8::create();
|
||||
ColumnUInt8::Container& vec_res = res->get_data();
|
||||
// set default value to 0, and match functions only need to set 1/true
|
||||
vec_res.resize_fill(input_rows_count);
|
||||
RETURN_IF_ERROR(execute_match(
|
||||
context, column_name, match_query_str, input_rows_count, values, inverted_index_ctx,
|
||||
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
|
||||
block.replace_by_position(result, std::move(res));
|
||||
} else {
|
||||
auto match_pred_column =
|
||||
block.get_by_name(match_pred_column_name).column->convert_to_full_column_if_const();
|
||||
block.replace_by_position(result, std::move(match_pred_column));
|
||||
VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name
|
||||
<< ", match_query_str=" << match_query_str;
|
||||
InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
if (inverted_index_ctx == nullptr) {
|
||||
inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
}
|
||||
|
||||
const ColumnPtr source_col =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
const auto* values = check_and_get_column<ColumnString>(source_col.get());
|
||||
const ColumnArray* array_col = nullptr;
|
||||
if (source_col->is_column_array()) {
|
||||
if (source_col->is_nullable()) {
|
||||
auto* nullable = check_and_get_column<ColumnNullable>(source_col.get());
|
||||
array_col = check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
|
||||
} else {
|
||||
array_col = check_and_get_column<ColumnArray>(source_col.get());
|
||||
}
|
||||
if (array_col && !array_col->get_data().is_column_string()) {
|
||||
return Status::NotSupported(
|
||||
fmt::format("unsupported nested array of type {} for function {}",
|
||||
is_column_nullable(array_col->get_data())
|
||||
? array_col->get_data().get_name()
|
||||
: array_col->get_data().get_family_name(),
|
||||
get_name()));
|
||||
}
|
||||
|
||||
if (is_column_nullable(array_col->get_data())) {
|
||||
const auto& array_nested_null_column =
|
||||
reinterpret_cast<const ColumnNullable&>(array_col->get_data());
|
||||
values = check_and_get_column<ColumnString>(
|
||||
*(array_nested_null_column.get_nested_column_ptr()));
|
||||
} else {
|
||||
// array column element is always set Nullable for now.
|
||||
values = check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
|
||||
}
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(source_col.get())) {
|
||||
// match null
|
||||
if (type_ptr->is_nullable()) {
|
||||
if (column_ptr->only_null()) {
|
||||
block.get_by_position(result).column = nullable->get_null_map_column_ptr();
|
||||
return Status::OK();
|
||||
}
|
||||
} else {
|
||||
values = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
|
||||
}
|
||||
}
|
||||
|
||||
if (!values) {
|
||||
LOG(WARNING) << "Illegal column " << source_col->get_name();
|
||||
return Status::InternalError("Not supported input column types");
|
||||
}
|
||||
// result column
|
||||
auto res = ColumnUInt8::create();
|
||||
ColumnUInt8::Container& vec_res = res->get_data();
|
||||
// set default value to 0, and match functions only need to set 1/true
|
||||
vec_res.resize_fill(input_rows_count);
|
||||
RETURN_IF_ERROR(execute_match(context, column_name, match_query_str, input_rows_count, values,
|
||||
inverted_index_ctx,
|
||||
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
|
||||
block.replace_by_position(result, std::move(res));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user