[opt](function) Optimize the trim function for single-char inputs (#3… (#37799)

https://github.com/apache/doris/pull/36497

before
```
mysql [test]>select count(ltrim(str,"1")) from stringDb2;
+------------------------+
| count(ltrim(str, '1')) |
+------------------------+
|               64000000 |
+------------------------+
1 row in set (7.79 sec)
```

now
```
mysql [test]>select count(ltrim(str,"1")) from stringDb2;
+------------------------+
| count(ltrim(str, '1')) |
+------------------------+
|               64000000 |
+------------------------+
1 row in set (0.73 sec)
```

## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->
This commit is contained in:
Mryange
2024-07-16 17:52:52 +08:00
committed by GitHub
parent 638ee2607e
commit cc6ff12097
3 changed files with 92 additions and 161 deletions

View File

@ -485,25 +485,29 @@ struct NameLTrim {
struct NameRTrim {
static constexpr auto name = "rtrim";
};
template <bool is_ltrim, bool is_rtrim>
template <bool is_ltrim, bool is_rtrim, bool trim_single>
struct TrimUtil {
static Status vector(const ColumnString::Chars& str_data,
const ColumnString::Offsets& str_offsets, const StringRef& rhs,
const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
size_t offset_size = str_offsets.size();
res_offsets.resize(str_offsets.size());
const size_t offset_size = str_offsets.size();
res_offsets.resize(offset_size);
res_data.reserve(str_data.size());
for (size_t i = 0; i < offset_size; ++i) {
const char* raw_str = reinterpret_cast<const char*>(&str_data[str_offsets[i - 1]]);
ColumnString::Offset size = str_offsets[i] - str_offsets[i - 1];
StringRef str(raw_str, size);
const auto* str_begin = str_data.data() + str_offsets[i - 1];
const auto* str_end = str_data.data() + str_offsets[i];
if constexpr (is_ltrim) {
str = simd::VStringFunctions::ltrim(str, rhs);
str_begin =
simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
}
if constexpr (is_rtrim) {
str = simd::VStringFunctions::rtrim(str, rhs);
str_end =
simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
}
StringOP::push_value_string(std::string_view((char*)str.data, str.size), i, res_data,
res_offsets);
res_data.insert_assume_reserved(str_begin, str_end);
res_offsets[i] = res_data.size();
}
return Status::OK();
}
@ -521,9 +525,9 @@ struct Trim1Impl {
if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
auto col_res = ColumnString::create();
char blank[] = " ";
StringRef rhs(blank, 1);
RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim>::vector(
col->get_chars(), col->get_offsets(), rhs, col_res->get_chars(),
const StringRef remove_str(blank, 1);
RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
col_res->get_offsets())));
block.replace_by_position(result, std::move(col_res));
} else {
@ -550,15 +554,21 @@ struct Trim2Impl {
const auto& rcol =
assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
->get_data_column_ptr();
if (auto col = assert_cast<const ColumnString*>(column.get())) {
if (auto col_right = assert_cast<const ColumnString*>(rcol.get())) {
if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
auto col_res = ColumnString::create();
const char* raw_rhs = reinterpret_cast<const char*>(&(col_right->get_chars()[0]));
ColumnString::Offset rhs_size = col_right->get_offsets()[0];
StringRef rhs(raw_rhs, rhs_size);
RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim>::vector(
col->get_chars(), col->get_offsets(), rhs, col_res->get_chars(),
col_res->get_offsets())));
const auto* remove_str_raw = col_right->get_chars().data();
const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
const StringRef remove_str(remove_str_raw, remove_str_size);
if (remove_str.size == 1) {
RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
col_res->get_offsets())));
} else {
RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
col_res->get_offsets())));
}
block.replace_by_position(result, std::move(col_res));
} else {
return Status::RuntimeError("Illegal column {} of argument of function {}",