[fix](function)revert function REPEAT nullable mode #32226

This commit is contained in:
koarz
2024-03-15 17:11:48 +08:00
committed by yiguolei
parent 97b35d6830
commit e3bb499cc6
4 changed files with 4 additions and 102 deletions

View File

@ -1138,7 +1138,6 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_alternative_function<FunctionLeftOld>();
factory.register_alternative_function<FunctionRightOld>();
factory.register_alternative_function<FunctionSubstringIndexOld>();
factory.register_alternative_function<FunctionStringRepeatOld>();
factory.register_alternative_function<FunctionUnHexOld>();
factory.register_alternative_function<FunctionToBase64Old>();

View File

@ -1440,103 +1440,6 @@ public:
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DCHECK_EQ(arguments.size(), 2);
auto res = ColumnString::create();
ColumnPtr argument_ptr[2];
argument_ptr[0] =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
argument_ptr[1] = block.get_by_position(arguments[1]).column;
if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
if (auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(),
res->get_chars(), res->get_offsets(),
context->state()->repeat_max_num());
block.replace_by_position(result, std::move(res));
return Status::OK();
} else if (auto* col2_const = check_and_get_column<ColumnConst>(*argument_ptr[1])) {
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
int repeat = 0;
repeat = std::min<int>(col2_const->get_int(0), context->state()->repeat_max_num());
if (repeat <= 0) {
res->insert_many_defaults(input_rows_count);
} else {
vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
res->get_offsets());
}
block.replace_by_position(result, std::move(res));
return Status::OK();
}
}
return Status::RuntimeError("repeat function get error param: {}, {}",
argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
}
void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets, const int repeat_max_num) const {
size_t input_row_size = offsets.size();
fmt::memory_buffer buffer;
res_offsets.resize(input_row_size);
for (ssize_t i = 0; i < input_row_size; ++i) {
buffer.clear();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
int repeat = 0;
repeat = std::min<int>(repeats[i], repeat_max_num);
if (repeat <= 0) {
StringOP::push_empty_string(i, res_data, res_offsets);
} else {
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
res_data, res_offsets);
}
}
}
// TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
// 2. abstract the `vector_vector` and `vector_const`
// 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
int repeat, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets) const {
size_t input_row_size = offsets.size();
fmt::memory_buffer buffer;
res_offsets.resize(input_row_size);
for (ssize_t i = 0; i < input_row_size; ++i) {
buffer.clear();
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
res_offsets);
}
}
};
class FunctionStringRepeatOld : public IFunction {
public:
static constexpr auto name = "repeat";
static FunctionPtr create() { return std::make_shared<FunctionStringRepeatOld>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
}

View File

@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
@ -35,7 +35,7 @@ import java.util.List;
* ScalarFunction 'repeat'. This class is generated by GenerateFunction.
*/
public class Repeat extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)

View File

@ -1566,7 +1566,7 @@ visible_functions = {
[['null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['not_null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
[['space'], 'VARCHAR', ['INT'], ''],
[['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'],
[['lpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['rpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['append_trailing_char_if_absent'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
@ -1628,7 +1628,7 @@ visible_functions = {
[['null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['not_null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
[['space'], 'STRING', ['INT'], ''],
[['repeat'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'],
[['repeat'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'],
[['lpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
[['rpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
[['append_trailing_char_if_absent'], 'STRING', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'],