[fix](function) make STRLEFT and STRRIGHT and SUBSTR function DEPEND_ON_ARGUMENT (#28352)

make STRLEFT and STRRIGHT function DEPEND_ON_ARGUMENT
This commit is contained in:
koarz
2024-01-24 13:34:59 +08:00
committed by yiguolei
parent 88fdb2677d
commit ca5a314765
26 changed files with 533 additions and 351 deletions

View File

@ -63,8 +63,9 @@ private:
* b. array contains/position/countequal function return nullable in less situations.
* c. cleared old version of Version 2.
* d. unix_timestamp function support timestamp with float for datetimev2, and change nullable mode.
* e. the right function outputs NULL when the function contains NULL, substr function returns empty if start > str.length.
* 4: start from doris 2.1.x
* a. change shuffle serialize/deserialize way
* a. change shuffle serialize/deserialize way
*/
inline const int BeExecVersionManager::max_be_exec_version = 4;
inline const int BeExecVersionManager::min_be_exec_version = 0;

View File

@ -1015,6 +1015,12 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
/// @TEMPORARY: for be_exec_version=3
factory.register_alternative_function<FunctionSubstringOld<Substr3ImplOld>>();
factory.register_alternative_function<FunctionSubstringOld<Substr2ImplOld>>();
factory.register_alternative_function<FunctionLeftOld>();
factory.register_alternative_function<FunctionRightOld>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
factory.register_alias(SubstringUtil::name, "substr");

View File

@ -133,6 +133,163 @@ struct StringOP {
struct SubstringUtil {
static constexpr auto name = "substring";
static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
DCHECK_EQ(arguments.size(), 3);
auto res = ColumnString::create();
bool col_const[3];
ColumnPtr argument_columns[3];
for (int i = 0; i < 3; ++i) {
col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
}
argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
*block.get_by_position(arguments[0]).column)
.convert_to_full_column()
: block.get_by_position(arguments[0]).column;
default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
const auto* specific_str_column =
assert_cast<const ColumnString*>(argument_columns[0].get());
const auto* specific_start_column =
assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get());
const auto* specific_len_column =
assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
auto vectors = vectors_utf8<false>;
bool is_ascii = simd::VStringFunctions::is_ascii(
{specific_str_column->get_chars().data(), specific_str_column->get_chars().size()});
if (col_const[1] && col_const[2] && is_ascii) {
vectors = vectors_ascii<true>;
} else if (col_const[1] && col_const[2]) {
vectors = vectors_utf8<true>;
} else if (is_ascii) {
vectors = vectors_ascii<false>;
}
vectors(specific_str_column->get_chars(), specific_str_column->get_offsets(),
specific_start_column->get_data(), specific_len_column->get_data(),
res->get_chars(), res->get_offsets());
block.get_by_position(result).column = std::move(res);
}
private:
template <bool is_const>
static void vectors_utf8(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets,
const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
size_t size = offsets.size();
res_offsets.resize(size);
res_chars.reserve(chars.size());
std::array<std::byte, 128 * 1024> buf;
PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
PMR::vector<size_t> index {&pool};
if constexpr (is_const) {
if (start[0] == 0 || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
return;
}
}
for (size_t i = 0; i < size; ++i) {
int str_size = offsets[i] - offsets[i - 1];
const char* str_data = (char*)chars.data() + offsets[i - 1];
int start_value = is_const ? start[0] : start[i];
int len_value = is_const ? len[0] : len[i];
// return empty string if start > src.length
if (start_value > str_size || str_size == 0 || start_value == 0 || len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
size_t byte_pos = 0;
index.clear();
for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
char_size = get_utf8_byte_length(str_data[j]);
index.push_back(j);
// index_size represents the number of characters from the beginning of the character to the current position.
// So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
if (start_value > 0 && index.size() > start_value + len_value) {
break;
}
}
int fixed_pos = start_value;
if (fixed_pos < -(int)index.size()) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
if (fixed_pos < 0) {
fixed_pos = index.size() + fixed_pos + 1;
}
byte_pos = index[fixed_pos - 1];
size_t fixed_len = str_size - byte_pos;
if (fixed_pos + len_value <= index.size()) {
fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
}
if (byte_pos <= str_size && fixed_len > 0) {
StringOP::push_value_string_reserved_and_allow_overflow(
{str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
} else {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
}
}
template <bool is_const>
static void vectors_ascii(const ColumnString::Chars& chars,
const ColumnString::Offsets& offsets,
const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
size_t size = offsets.size();
res_offsets.resize(size);
if constexpr (is_const) {
if (start[0] == 0 || len[0] <= 0) {
for (size_t i = 0; i < size; ++i) {
StringOP::push_empty_string(i, res_chars, res_offsets);
}
return;
}
res_chars.reserve(std::min(chars.size(), len[0] * size));
} else {
res_chars.reserve(chars.size());
}
for (size_t i = 0; i < size; ++i) {
int str_size = offsets[i] - offsets[i - 1];
const char* str_data = (char*)chars.data() + offsets[i - 1];
int start_value = is_const ? start[0] : start[i];
int len_value = is_const ? len[0] : len[i];
if (start_value > str_size || start_value < -str_size || str_size == 0 ||
len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
int fixed_pos = start_value - 1;
if (fixed_pos < 0) {
fixed_pos = str_size + fixed_pos + 1;
}
size_t fixed_len = std::min(str_size - fixed_pos, len_value);
StringOP::push_value_string_reserved_and_allow_overflow(
{str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
}
}
};
struct SubstringUtilOld {
static constexpr auto name = "substring";
static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
DCHECK_EQ(arguments.size(), 3);
@ -281,7 +438,8 @@ private:
int start_value = is_const ? start[0] : start[i];
int len_value = is_const ? len[0] : len[i];
if (start_value > str_size || start_value < -str_size || str_size == 0) {
if (start_value > str_size || start_value < -str_size || str_size == 0 ||
len_value <= 0) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
@ -304,7 +462,7 @@ public:
static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
return std::make_shared<DataTypeString>();
}
DataTypes get_variadic_argument_types_impl() const override {
return Impl::get_variadic_argument_types();
@ -313,7 +471,7 @@ public:
return get_variadic_argument_types_impl().size();
}
bool use_default_implementation_for_nulls() const override { return false; }
bool use_default_implementation_for_nulls() const override { return true; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
@ -349,10 +507,8 @@ struct Substr2Impl {
ColumnPtr str_col;
bool str_const;
std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
str_col = nullable->get_nested_column_ptr();
}
auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
if (str_const) {
std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
@ -371,6 +527,81 @@ struct Substr2Impl {
}
};
template <typename Impl>
class FunctionSubstringOld : public IFunction {
public:
static constexpr auto name = SubstringUtilOld::name;
String get_name() const override { return name; }
static FunctionPtr create() { return std::make_shared<FunctionSubstringOld<Impl>>(); }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
}
DataTypes get_variadic_argument_types_impl() const override {
return Impl::get_variadic_argument_types();
}
size_t get_number_of_arguments() const override {
return get_variadic_argument_types_impl().size();
}
bool use_default_implementation_for_nulls() const override { return false; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
return Impl::execute_impl(context, block, arguments, result, input_rows_count);
}
};
struct Substr3ImplOld {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
std::make_shared<DataTypeInt32>()};
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
SubstringUtilOld::substring_execute(block, arguments, result, input_rows_count);
return Status::OK();
}
};
struct Substr2ImplOld {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
auto col_len = ColumnInt32::create(input_rows_count);
auto& strlen_data = col_len->get_data();
ColumnPtr str_col;
bool str_const;
std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
str_col = nullable->get_nested_column_ptr();
}
const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
if (str_const) {
std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
} else {
for (int i = 0; i < input_rows_count; ++i) {
strlen_data[i] = str_offset[i] - str_offset[i - 1];
}
}
// we complete the column2(strlen) with the default value - each row's strlen.
block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"});
ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
SubstringUtilOld::substring_execute(block, temp_arguments, result, input_rows_count);
return Status::OK();
}
};
template <bool Reverse>
class FunctionMaskPartial;
@ -565,6 +796,34 @@ public:
static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
bool use_default_implementation_for_nulls() const override { return true; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto int_type = std::make_shared<DataTypeInt32>();
size_t num_columns_without_result = block.columns();
block.insert({int_type->create_column_const(input_rows_count, to_field(1)), int_type,
"const 1"});
ColumnNumbers temp_arguments(3);
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = arguments[1];
SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
return Status::OK();
}
};
class FunctionLeftOld : public IFunction {
public:
static constexpr auto name = "left";
static FunctionPtr create() { return std::make_shared<FunctionLeftOld>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
}
@ -581,7 +840,7 @@ public:
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = arguments[1];
SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
SubstringUtilOld::substring_execute(block, temp_arguments, result, input_rows_count);
return Status::OK();
}
};
@ -592,6 +851,57 @@ public:
static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
bool use_default_implementation_for_nulls() const override { return true; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto int_type = std::make_shared<DataTypeInt32>();
auto params1 = ColumnInt32::create(input_rows_count);
auto params2 = ColumnInt32::create(input_rows_count);
size_t num_columns_without_result = block.columns();
// params1 = max(arg[1], -len(arg))
auto& index_data = params1->get_data();
auto& strlen_data = params2->get_data();
auto str_col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
auto pos_col =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
for (int i = 0; i < input_rows_count; ++i) {
strlen_data[i] = str_offset[i] - str_offset[i - 1];
}
for (int i = 0; i < input_rows_count; ++i) {
index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
}
block.insert({std::move(params1), int_type, "index"});
block.insert({std::move(params2), int_type, "strlen"});
ColumnNumbers temp_arguments(3);
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = num_columns_without_result + 1;
SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
return Status::OK();
}
};
class FunctionRightOld : public IFunction {
public:
static constexpr auto name = "right";
static FunctionPtr create() { return std::make_shared<FunctionRightOld>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
}
@ -614,18 +924,18 @@ public:
// getNestedColumnIfNull arg[0]
auto str_col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
str_col = nullable->get_nested_column_ptr();
}
auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
// getNestedColumnIfNull arg[1]
auto pos_col =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
if (auto* nullable = check_and_get_column<const ColumnNullable>(*pos_col)) {
if (const auto* nullable = check_and_get_column<const ColumnNullable>(*pos_col)) {
pos_col = nullable->get_nested_column_ptr();
}
auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
for (int i = 0; i < input_rows_count; ++i) {
strlen_data[i] = str_offset[i] - str_offset[i - 1];

View File

@ -1,47 +0,0 @@
---
{
"title": "LEFT",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## left
### Description
#### Syntax
`VARCHAR left (VARCHAR str, INT len)`
It returns the left part of a string of specified length, length is char length not the byte size. Another alias for this function is [strleft](./strleft.md).
### example
```
mysql> select left("Hello doris",5);
+------------------------+
| left('Hello doris', 5) |
+------------------------+
| Hello |
+------------------------+
```
### keywords
LEFT

View File

@ -1,47 +0,0 @@
---
{
"title": "RIGHT",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## right
### Description
#### Syntax
`VARCHAR RIGHT (VARCHAR str, INT len)`
It returns the right part of a string of specified length, length is char length not the byte size. Another alias for this function is [strright](./strright.md).
### example
```
mysql> select right("Hello doris",5);
+-------------------------+
| right('Hello doris', 5) |
+-------------------------+
| doris |
+-------------------------+
```
### keywords
RIGHT

View File

@ -31,7 +31,8 @@ under the License.
`VARCHAR STRLEFT (VARCHAR str, INT len)`
It returns the left part of a string of specified length, length is char length not the byte size. Another alias for this function is [left](./left.md).
It returns the left part of a string of specified length, length is char length not the byte size. Another alias for this function is `left`.
If the function parameters contain a NULL value, the function will always return NULL. If the integer parameter is less than or equal to 0, it will return an empty value.
### example
@ -42,6 +43,24 @@ mysql> select strleft("Hello doris",5);
+------------------------+
| Hello |
+------------------------+
mysql> select strleft("Hello doris",-5);
+----------------------------+
| strleft('Hello doris', -5) |
+----------------------------+
| |
+----------------------------+
mysql> select strleft("Hello doris",NULL);
+------------------------------+
| strleft('Hello doris', NULL) |
+------------------------------+
| NULL |
+------------------------------+
mysql> select strleft(NULL,3);
+------------------+
| strleft(NULL, 3) |
+------------------+
| NULL |
+------------------+
```
### keywords
STRLEFT
STRLEFT, LEFT

View File

@ -32,7 +32,8 @@ under the License.
`VARCHAR strright (VARCHAR str, INT len)`
It returns the right part of a string of specified length, length is char length not the byte size. Another alias for this function is [right](./right.md).
It returns the right part of a string of specified length, length is char length not the byte size. Another alias for this function is `right`.
If the parameters contain a NULL value, the function will always return NULL. If the integer parameter is negative, the function will retrieve the portion of the string starting from the left and moving to the right, beginning at the absolute value of len.
### example
@ -43,6 +44,24 @@ mysql> select strright("Hello doris",5);
+-------------------------+
| doris |
+-------------------------+
mysql> select strright("Hello doris",-7);
+--------------------------+
| strright('Hello doris', -7) |
+--------------------------+
| doris |
+--------------------------+
mysql> select strright("Hello doris",NULL);
+----------------------------+
| strright('Hello doris', NULL) |
+----------------------------+
| NULL |
+----------------------------+
mysql> select strright(NULL,5);
+----------------+
| strright(NULL, 5) |
+----------------+
| NULL |
+----------------+
```
### keywords
STRRIGHT
STRRIGHT, RIGHT

View File

@ -1,50 +0,0 @@
---
{
"title": "SUBSTR",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## substr
### Description
#### Syntax
`VARCHAR substr(VARCHAR content, INT start, INT length)`
Find a substring, return the part of the string described by the first parameter starting from start and having a length of len. The index of the first letter is 1.
### example
```
mysql> select substr("Hello doris", 2, 1);
+-----------------------------+
| substr('Hello doris', 2, 1) |
+-----------------------------+
| e |
+-----------------------------+
mysql> select substr("Hello doris", 1, 2);
+-----------------------------+
| substr('Hello doris', 1, 2) |
+-----------------------------+
| He |
+-----------------------------+
```
### keywords
SUBSTR

View File

@ -42,6 +42,8 @@ the position of the first character in the string from which the substring is to
If len is less than 1, the result is the empty string.
The function have a alias named `substr`.
### example
```
@ -70,7 +72,7 @@ mysql> select substring('abc1', 5);
+-----------------------------+
| substring('abc1', 5) |
+-----------------------------+
| NULL |
| |
+-----------------------------+
mysql> select substring('abc1def', 2, 2);
@ -82,4 +84,4 @@ mysql> select substring('abc1def', 2, 2);
```
### keywords
SUBSTRING, STRING
SUBSTRING, STRING, SUBSTR

View File

@ -459,7 +459,6 @@
"sql-manual/sql-functions/string-functions/char",
"sql-manual/sql-functions/string-functions/concat",
"sql-manual/sql-functions/string-functions/concat-ws",
"sql-manual/sql-functions/string-functions/substr",
"sql-manual/sql-functions/string-functions/substring",
"sql-manual/sql-functions/string-functions/sub-replace",
"sql-manual/sql-functions/string-functions/append-trailing-char-if-absent",
@ -478,8 +477,6 @@
"sql-manual/sql-functions/string-functions/field",
"sql-manual/sql-functions/string-functions/find-in-set",
"sql-manual/sql-functions/string-functions/replace",
"sql-manual/sql-functions/string-functions/left",
"sql-manual/sql-functions/string-functions/right",
"sql-manual/sql-functions/string-functions/strleft",
"sql-manual/sql-functions/string-functions/strright",
"sql-manual/sql-functions/string-functions/split-part",
@ -1397,4 +1394,4 @@
]
}
]
}
}

View File

@ -1,47 +0,0 @@
---
{
"title": "LEFT",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## left
### description
#### Syntax
`VARCHAR left(VARCHAR str, INT len)`
它返回具有指定长度的字符串的左边部分,长度的单位为utf8字符,此函数的另一个别名为[strleft](./strleft.md)。
### example
```
mysql> select left("Hello doris",5);
+------------------------+
| left('Hello doris', 5) |
+------------------------+
| Hello |
+------------------------+
```
### keywords
LEFT

View File

@ -1,47 +0,0 @@
---
{
"title": "RIGHT",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## right
### description
#### Syntax
`VARCHAR right(VARCHAR str, INT len)`
它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符。此函数的另一个别名为[strright](./strright.md)。
### example
```
mysql> select right("Hello doris",5);
+-------------------------+
| right('Hello doris', 5) |
+-------------------------+
| doris |
+-------------------------+
```
### keywords
RIGHT

View File

@ -31,7 +31,8 @@ under the License.
`VARCHAR strleft(VARCHAR str, INT len)`
它返回具有指定长度的字符串的左边部分,长度的单位为utf8字符,此函数的另一个别名为[left](./left.md)
它返回具有指定长度的字符串的左边部分,长度的单位为utf8字符,此函数的另一个别名为 `left`
如果函数参数含有NULL值那么函数将始终返回NULL,如果整型参数小于等于0,那么将返回空 “”。
### example
@ -42,6 +43,24 @@ mysql> select strleft("Hello doris",5);
+------------------------+
| Hello |
+------------------------+
mysql> select strleft("Hello doris",-5);
+----------------------------+
| strleft('Hello doris', -5) |
+----------------------------+
| |
+----------------------------+
mysql> select strleft("Hello doris",NULL);
+------------------------------+
| strleft('Hello doris', NULL) |
+------------------------------+
| NULL |
+------------------------------+
mysql> select strleft(NULL,3);
+------------------+
| strleft(NULL, 3) |
+------------------+
| NULL |
+------------------+
```
### keywords
STRLEFT
STRLEFT, LEFT

View File

@ -31,7 +31,8 @@ under the License.
`VARCHAR strright(VARCHAR str, INT len)`
它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符。此函数的另一个别名为[right](./right.md)
它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符。此函数的另一个别名为 `right`
如果参数中含有NULL值,那么函数始终返回NULL,如果整型参数为负数,那么会得到字符串从第 abs(len) 个字符开始向右的部分。
### example
@ -42,6 +43,24 @@ mysql> select strright("Hello doris",5);
+-------------------------+
| doris |
+-------------------------+
mysql> select strright("Hello doris",-7);
+--------------------------+
| strright('Hello doris', -7) |
+--------------------------+
| doris |
+--------------------------+
mysql> select strright("Hello doris",NULL);
+----------------------------+
| strright('Hello doris', NULL) |
+----------------------------+
| NULL |
+----------------------------+
mysql> select strright(NULL,5);
+----------------+
| strright(NULL, 5) |
+----------------+
| NULL |
+----------------+
```
### keywords
STRRIGHT
STRRIGHT, RIGHT

View File

@ -1,50 +0,0 @@
---
{
"title": "SUBSTR",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## substr
### description
#### Syntax
`VARCHAR substr(VARCHAR content, INT start, INT length)`
求子字符串,返回第一个参数描述的字符串中从start开始长度为len的部分字符串。首字母的下标为1。
### example
```
mysql> select substr("Hello doris", 2, 1);
+-----------------------------+
| substr('Hello doris', 2, 1) |
+-----------------------------+
| e |
+-----------------------------+
mysql> select substr("Hello doris", 1, 2);
+-----------------------------+
| substr('Hello doris', 1, 2) |
+-----------------------------+
| He |
+-----------------------------+
```
### keywords
SUBSTR

View File

@ -28,7 +28,7 @@ under the License.
### description
#### Syntax
`VARCHAR substring(VARCHAR str, INT pos[, INT len])`
`VARCHAR substring(VARCHAR str, INT pos, [INT len])`
没有 `len` 参数时返回从位置 `pos` 开始的字符串 `str` 的一个子字符串,
在有 `len` 参数时返回从位置 `pos` 开始的字符串 `str` 的一个长度为 `len` 子字符串,
@ -37,6 +37,8 @@ under the License.
对于所有形式的 SUBSTRING(),要从中提取子字符串的字符串中第一个字符的位置为1。
该函数还有一个别名 `SUBSTR`
### example
```
@ -65,7 +67,7 @@ mysql> select substring('abc1', 5);
+-----------------------------+
| substring('abc1', 5) |
+-----------------------------+
| NULL |
| |
+-----------------------------+
mysql> select substring('abc1def', 2, 2);
@ -76,4 +78,4 @@ mysql> select substring('abc1def', 2, 2);
+-----------------------------+
```
### keywords
SUBSTRING, STRING
SUBSTRING, STRING, SUBSTR

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
@ -36,12 +36,11 @@ import java.util.List;
* ScalarFunction 'left'. This class is generated by GenerateFunction.
*/
public class Left extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE),
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)
);
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE));
/**
* constructor with 2 arguments.

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
@ -36,12 +36,11 @@ import java.util.List;
* ScalarFunction 'right'. This class is generated by GenerateFunction.
*/
public class Right extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE),
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)
);
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE));
/**
* constructor with 2 arguments.

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
@ -36,12 +36,11 @@ import java.util.List;
* ScalarFunction 'strleft'. This class is generated by GenerateFunction.
*/
public class StrLeft extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE),
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)
);
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE));
/**
* constructor with 2 arguments.

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
@ -36,12 +36,11 @@ import java.util.List;
* ScalarFunction 'strright'. This class is generated by GenerateFunction.
*/
public class StrRight extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE),
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)
);
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE));
/**
* constructor with 2 arguments.

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
@ -38,9 +38,8 @@ import java.util.Optional;
/**
* ScalarFunction 'substring'. This class is generated by GenerateFunction.
*/
// TODO: to be compatible with BE, we set AlwaysNullable here.
public class Substring extends ScalarFunction
implements ExplicitlyCastableSignature, AlwaysNullable {
implements ExplicitlyCastableSignature, PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT, IntegerType.INSTANCE),

View File

@ -1546,15 +1546,15 @@ visible_functions = {
# String builtin functions
"String": [
[['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT', 'INT'], 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['mask'], 'STRING', ['STRING', '...'], ''],
[['mask_first_n'], 'STRING', ['STRING'], ''],
[['mask_first_n'], 'STRING', ['STRING', 'INT'], ''],
[['mask_last_n'], 'STRING', ['STRING'], ''],
[['mask_last_n'], 'STRING', ['STRING', 'INT'], ''],
[['strleft', 'left'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'],
[['strright', 'right'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'],
[['strleft', 'left'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['strright', 'right'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['ends_with'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], ''],
[['starts_with'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], ''],
[['null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
@ -1611,10 +1611,10 @@ visible_functions = {
[['char'], 'VARCHAR', ['VARCHAR', 'INT', '...'], 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'], 'ALWAYS_NULLABLE'],
[['strleft', 'left'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'],
[['strright', 'right'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['strleft', 'left'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['strright', 'right'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['ends_with'], 'BOOLEAN', ['STRING', 'STRING'], ''],
[['starts_with'], 'BOOLEAN', ['STRING', 'STRING'], ''],
[['null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],

View File

@ -406,7 +406,7 @@ string3
string3
-- !sql_right_Varchar_Integer --
\N
1
ar11
rchar11
@ -435,7 +435,7 @@ varchar13
varchar13
-- !sql_right_String_Integer --
\N
1
ing1
string1

View File

@ -2438,7 +2438,7 @@ string3
string3
-- !sql_strright_Varchar_Integer --
\N
1
ar11
rchar11
@ -2467,7 +2467,7 @@ varchar13
varchar13
-- !sql_strright_String_Integer --
\N
1
ing1
string1

View File

@ -140,13 +140,92 @@ suite("test_string_function", "arrow_flight_sql") {
qt_sql "select starts_with(\"hello world\",\"world\");"
qt_sql "select starts_with(\"hello world\",null);"
qt_sql "select strleft(\"Hello doris\",5);"
qt_sql "select strright(\"Hello doris\",5);"
qt_sql "select strleft(NULL, 1);"
qt_sql "select strleft(\"good morning\", NULL);"
qt_sql "select left(NULL, 1);"
qt_sql "select left(\"good morning\", NULL);"
qt_sql "select strleft(\"Hello doris\", 5);"
qt_sql "select left(\"Hello doris\", 5)"
qt_sql "select strright(NULL, 1);"
qt_sql "select strright(\"good morning\", NULL);"
qt_sql "select right(NULL, 1);"
qt_sql "select right(\"good morning\", NULL);"
qt_sql "select strright(\"Hello doris\", 5);"
qt_sql "select right(\"Hello doris\", 5);"
qt_sql "select strleft(\"good morning\", 120);"
qt_sql "select strleft(\"good morning\", -5);"
qt_sql "select strright(\"Hello doris\", 120);"
qt_sql "select strright(\"Hello doris\", -5);"
qt_sql "select left(\"good morning\", 120);"
qt_sql "select left(\"good morning\", -5);"
qt_sql "select right(\"Hello doris\", 120);"
qt_sql "select right(\"Hello doris\", -6);"
sql """ drop table if exists left_right_test; """
sql """ create table left_right_test (
id INT NULL,
name VARCHAR(16) NULL
)
UNIQUE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES ("replication_num"="1");
"""
sql """
insert into left_right_test values
(1, "Isaac Newton"),
(2, "Albert Einstein"),
(3, "Marie Curie"),
(4, "Charles Darwin"),
(5, "Stephen Hawking");
"""
qt_select_null_str """
select
id,
strleft(name, 5),
strright(name, 5),
left(name, 6),
right(name, 6)
from left_right_test
order by id;
"""
sql """ drop table if exists left_right_test; """
sql """ create table left_right_test (
id INT,
name VARCHAR(16)
)
UNIQUE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES ("replication_num"="1");
"""
sql """
insert into left_right_test values
(1, "Isaac Newton"),
(2, "Albert Einstein"),
(3, "Marie Curie"),
(4, "Charles Darwin"),
(5, "Stephen Hawking");
"""
qt_select_not_null_str """
select
id,
strleft(name, 5),
strright(name, 5),
left(name, 6),
right(name, 6)
from left_right_test
order by id;
"""
qt_sql "select substring('abc1', 2);"
qt_sql "select substring('abc1', -2);"
qt_sql "select substring('abc1', 5);"
qt_sql "select substring('abc1def', 2, 2);"
qt_sql "select substring('abcdef',3,-1);"
qt_sql "select substring('abcdef',-3,-1);"
qt_sql "select substring('abcdef',10,1);"
sql """ drop table if exists test_string_function; """
sql """ create table test_string_function (
@ -184,6 +263,8 @@ suite("test_string_function", "arrow_flight_sql") {
qt_sql "select substr('a',-1,1);"
qt_sql "select substr('a',-2,1);"
qt_sql "select substr('a',-3,1);"
qt_sql "select substr('abcdef',3,-1);"
qt_sql "select substr('abcdef',-3,-1);"
qt_sql "select sub_replace(\"this is origin str\",\"NEW-STR\",1);"
qt_sql "select sub_replace(\"doris\",\"***\",1,2);"