[Enhencement](like) fallback to re2 if hyperscan failed (#18350)
This commit is contained in:
@ -47,16 +47,26 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
|
||||
cloned.escape_char = escape_char;
|
||||
cloned.set_search_string(search_string);
|
||||
|
||||
if (hs_database) {
|
||||
std::string re_pattern;
|
||||
FunctionLike::convert_like_pattern(this, pattern_str, &re_pattern);
|
||||
|
||||
std::string re_pattern;
|
||||
FunctionLike::convert_like_pattern(this, pattern_str, &re_pattern);
|
||||
if (hs_database) { // use hyperscan
|
||||
hs_database_t* database = nullptr;
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
RETURN_IF_ERROR(FunctionLike::hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch));
|
||||
|
||||
cloned.hs_database.reset(database);
|
||||
cloned.hs_scratch.reset(scratch);
|
||||
} else { // fallback to re2
|
||||
cloned.hs_database.reset();
|
||||
cloned.hs_scratch.reset();
|
||||
|
||||
RE2::Options opts;
|
||||
opts.set_never_nl(false);
|
||||
opts.set_dot_nl(true);
|
||||
cloned.regex = std::make_unique<RE2>(re_pattern, opts);
|
||||
if (!cloned.regex->ok()) {
|
||||
return Status::InternalError("Invalid regex expression: {}", re_pattern);
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
@ -198,10 +208,14 @@ Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, co
|
||||
|
||||
Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
|
||||
const StringRef& pattern, unsigned char* result) {
|
||||
auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(),
|
||||
doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
if (state->hs_database) { // use hyperscan
|
||||
auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(),
|
||||
doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
}
|
||||
} else { // fallback to re2
|
||||
*result = RE2::PartialMatch(re2::StringPiece(val.data, val.size), *state->regex.get());
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
@ -213,17 +227,27 @@ Status FunctionLikeBase::regexp_fn_scalar(LikeSearchState* state, const StringRe
|
||||
|
||||
hs_database_t* database = nullptr;
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch));
|
||||
if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) { // use hyperscan
|
||||
auto ret = hs_scan(database, val.data, val.size, 0, scratch,
|
||||
doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
}
|
||||
|
||||
auto ret = hs_scan(database, val.data, val.size, 0, scratch,
|
||||
doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(database);
|
||||
} else { // fallback to re2
|
||||
RE2::Options opts;
|
||||
opts.set_never_nl(false);
|
||||
opts.set_dot_nl(true);
|
||||
re2::RE2 re(re_pattern, opts);
|
||||
if (re.ok()) {
|
||||
*result = RE2::PartialMatch(re2::StringPiece(val.data, val.size), re);
|
||||
} else {
|
||||
return Status::RuntimeError("Invalid pattern: {}", pattern.debug_string());
|
||||
}
|
||||
}
|
||||
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(database);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -231,13 +255,22 @@ Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnS
|
||||
const StringRef& pattern,
|
||||
ColumnUInt8::Container& result) {
|
||||
auto sz = val.size();
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
const auto& str_ref = val.get_data_at(i);
|
||||
auto ret = hs_scan(
|
||||
state->hs_database.get(), str_ref.data, str_ref.size, 0, state->hs_scratch.get(),
|
||||
doris::vectorized::LikeSearchState::hs_match_handler, (void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
if (state->hs_database) { // use hyperscan
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
const auto& str_ref = val.get_data_at(i);
|
||||
auto ret = hs_scan(state->hs_database.get(), str_ref.data, str_ref.size, 0,
|
||||
state->hs_scratch.get(),
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
}
|
||||
}
|
||||
} else { // fallback to re2
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
const auto& str_ref = val.get_data_at(i);
|
||||
*(result.data() + i) = RE2::PartialMatch(re2::StringPiece(str_ref.data, str_ref.size),
|
||||
*state->regex.get());
|
||||
}
|
||||
}
|
||||
|
||||
@ -250,22 +283,37 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& v
|
||||
|
||||
hs_database_t* database = nullptr;
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch));
|
||||
if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) { // use hyperscan
|
||||
auto sz = val.size();
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
const auto& str_ref = val.get_data_at(i);
|
||||
auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
}
|
||||
}
|
||||
|
||||
auto sz = val.size();
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
const auto& str_ref = val.get_data_at(i);
|
||||
auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(database);
|
||||
} else { // fallback to re2
|
||||
RE2::Options opts;
|
||||
opts.set_never_nl(false);
|
||||
opts.set_dot_nl(true);
|
||||
re2::RE2 re(re_pattern, opts);
|
||||
if (re.ok()) {
|
||||
auto sz = val.size();
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
const auto& str_ref = val.get_data_at(i);
|
||||
*(result.data() + i) =
|
||||
RE2::PartialMatch(re2::StringPiece(str_ref.data, str_ref.size), re);
|
||||
}
|
||||
} else {
|
||||
return Status::RuntimeError("Invalid pattern: {}", pattern.debug_string());
|
||||
}
|
||||
}
|
||||
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(database);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -275,13 +323,22 @@ Status FunctionLikeBase::constant_regex_fn_predicate(LikeSearchState* state,
|
||||
ColumnUInt8::Container& result,
|
||||
const uint16_t* sel, size_t sz) {
|
||||
auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
auto ret = hs_scan(state->hs_database.get(), data_ptr[sel[i]].data, data_ptr[sel[i]].size,
|
||||
0, state->hs_scratch.get(),
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
|
||||
if (state->hs_database) { // use hyperscan
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
auto ret = hs_scan(state->hs_database.get(), data_ptr[sel[i]].data,
|
||||
data_ptr[sel[i]].size, 0, state->hs_scratch.get(),
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
}
|
||||
}
|
||||
} else { // fallback to re2
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
*(result.data() + i) = RE2::PartialMatch(
|
||||
re2::StringPiece(data_ptr[sel[i]].data, data_ptr[sel[i]].size),
|
||||
*state->regex.get());
|
||||
}
|
||||
}
|
||||
|
||||
@ -297,21 +354,35 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
|
||||
|
||||
hs_database_t* database = nullptr;
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch));
|
||||
if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) { // use hyperscan
|
||||
auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
auto ret = hs_scan(database, data_ptr[sel[i]].data, data_ptr[sel[i]].size, 0, scratch,
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
}
|
||||
}
|
||||
|
||||
auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
auto ret = hs_scan(database, data_ptr[sel[i]].data, data_ptr[sel[i]].size, 0, scratch,
|
||||
doris::vectorized::LikeSearchState::hs_match_handler,
|
||||
(void*)(result.data() + i));
|
||||
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
|
||||
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(database);
|
||||
} else { // fallback to re2
|
||||
RE2::Options opts;
|
||||
opts.set_never_nl(false);
|
||||
opts.set_dot_nl(true);
|
||||
re2::RE2 re(re_pattern, opts);
|
||||
if (re.ok()) {
|
||||
auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
|
||||
for (size_t i = 0; i < sz; i++) {
|
||||
*(result.data() + i) = RE2::PartialMatch(
|
||||
re2::StringPiece(data_ptr[sel[i]].data, data_ptr[sel[i]].size), re);
|
||||
}
|
||||
} else {
|
||||
return Status::RuntimeError("Invalid pattern: {}", pattern.debug_string());
|
||||
}
|
||||
}
|
||||
|
||||
hs_free_scratch(scratch);
|
||||
hs_free_database(database);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -655,10 +726,24 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta
|
||||
|
||||
hs_database_t* database = nullptr;
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
RETURN_IF_ERROR(hs_prepare(context, re_pattern.c_str(), &database, &scratch));
|
||||
if (hs_prepare(context, re_pattern.c_str(), &database, &scratch).ok()) {
|
||||
// use hyperscan
|
||||
state->search_state.hs_database.reset(database);
|
||||
state->search_state.hs_scratch.reset(scratch);
|
||||
} else {
|
||||
// fallback to re2
|
||||
// reset hs_database to nullptr to indicate not use hyperscan
|
||||
state->search_state.hs_database.reset();
|
||||
state->search_state.hs_scratch.reset();
|
||||
|
||||
state->search_state.hs_database.reset(database);
|
||||
state->search_state.hs_scratch.reset(scratch);
|
||||
RE2::Options opts;
|
||||
opts.set_never_nl(false);
|
||||
opts.set_dot_nl(true);
|
||||
state->search_state.regex = std::make_unique<RE2>(re_pattern, opts);
|
||||
if (!state->search_state.regex->ok()) {
|
||||
return Status::InternalError("Invalid regex expression: {}", pattern_str);
|
||||
}
|
||||
}
|
||||
|
||||
state->function = constant_regex_fn;
|
||||
state->predicate_like_function = constant_regex_fn_predicate;
|
||||
@ -706,11 +791,23 @@ Status FunctionRegexp::open(FunctionContext* context, FunctionContext::FunctionS
|
||||
} else {
|
||||
hs_database_t* database = nullptr;
|
||||
hs_scratch_t* scratch = nullptr;
|
||||
RETURN_IF_ERROR(hs_prepare(context, pattern_str.c_str(), &database, &scratch));
|
||||
|
||||
state->search_state.hs_database.reset(database);
|
||||
state->search_state.hs_scratch.reset(scratch);
|
||||
|
||||
if (hs_prepare(context, pattern_str.c_str(), &database, &scratch).ok()) {
|
||||
// use hyperscan
|
||||
state->search_state.hs_database.reset(database);
|
||||
state->search_state.hs_scratch.reset(scratch);
|
||||
} else {
|
||||
// fallback to re2
|
||||
// reset hs_database to nullptr to indicate not use hyperscan
|
||||
state->search_state.hs_database.reset();
|
||||
state->search_state.hs_scratch.reset();
|
||||
RE2::Options opts;
|
||||
opts.set_never_nl(false);
|
||||
opts.set_dot_nl(true);
|
||||
state->search_state.regex = std::make_unique<RE2>(pattern_str, opts);
|
||||
if (!state->search_state.regex->ok()) {
|
||||
return Status::InternalError("Invalid regex expression: {}", pattern_str);
|
||||
}
|
||||
}
|
||||
state->function = constant_regex_fn;
|
||||
state->predicate_like_function = constant_regex_fn_predicate;
|
||||
state->scalar_function = constant_regex_fn_scalar;
|
||||
|
||||
Reference in New Issue
Block a user