[fix](function)revert function REPEAT nullable mode #32226
This commit is contained in:
@ -1138,7 +1138,6 @@ void register_function_string(SimpleFunctionFactory& factory) {
|
||||
factory.register_alternative_function<FunctionLeftOld>();
|
||||
factory.register_alternative_function<FunctionRightOld>();
|
||||
factory.register_alternative_function<FunctionSubstringIndexOld>();
|
||||
factory.register_alternative_function<FunctionStringRepeatOld>();
|
||||
factory.register_alternative_function<FunctionUnHexOld>();
|
||||
factory.register_alternative_function<FunctionToBase64Old>();
|
||||
|
||||
|
||||
@ -1440,103 +1440,6 @@ public:
|
||||
String get_name() const override { return name; }
|
||||
size_t get_number_of_arguments() const override { return 2; }
|
||||
|
||||
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) const override {
|
||||
DCHECK_EQ(arguments.size(), 2);
|
||||
auto res = ColumnString::create();
|
||||
|
||||
ColumnPtr argument_ptr[2];
|
||||
argument_ptr[0] =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
argument_ptr[1] = block.get_by_position(arguments[1]).column;
|
||||
|
||||
if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
|
||||
if (auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
|
||||
vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(),
|
||||
res->get_chars(), res->get_offsets(),
|
||||
context->state()->repeat_max_num());
|
||||
block.replace_by_position(result, std::move(res));
|
||||
return Status::OK();
|
||||
} else if (auto* col2_const = check_and_get_column<ColumnConst>(*argument_ptr[1])) {
|
||||
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
|
||||
int repeat = 0;
|
||||
repeat = std::min<int>(col2_const->get_int(0), context->state()->repeat_max_num());
|
||||
|
||||
if (repeat <= 0) {
|
||||
res->insert_many_defaults(input_rows_count);
|
||||
} else {
|
||||
vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
|
||||
res->get_offsets());
|
||||
}
|
||||
block.replace_by_position(result, std::move(res));
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
||||
return Status::RuntimeError("repeat function get error param: {}, {}",
|
||||
argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
|
||||
}
|
||||
|
||||
void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
|
||||
ColumnString::Offsets& res_offsets, const int repeat_max_num) const {
|
||||
size_t input_row_size = offsets.size();
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
res_offsets.resize(input_row_size);
|
||||
for (ssize_t i = 0; i < input_row_size; ++i) {
|
||||
buffer.clear();
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t size = offsets[i] - offsets[i - 1];
|
||||
int repeat = 0;
|
||||
repeat = std::min<int>(repeats[i], repeat_max_num);
|
||||
|
||||
if (repeat <= 0) {
|
||||
StringOP::push_empty_string(i, res_data, res_offsets);
|
||||
} else {
|
||||
for (int j = 0; j < repeat; ++j) {
|
||||
buffer.append(raw_str, raw_str + size);
|
||||
}
|
||||
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
|
||||
res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
|
||||
// 2. abstract the `vector_vector` and `vector_const`
|
||||
// 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
|
||||
void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
int repeat, ColumnString::Chars& res_data,
|
||||
ColumnString::Offsets& res_offsets) const {
|
||||
size_t input_row_size = offsets.size();
|
||||
|
||||
fmt::memory_buffer buffer;
|
||||
res_offsets.resize(input_row_size);
|
||||
for (ssize_t i = 0; i < input_row_size; ++i) {
|
||||
buffer.clear();
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t size = offsets[i] - offsets[i - 1];
|
||||
|
||||
for (int j = 0; j < repeat; ++j) {
|
||||
buffer.append(raw_str, raw_str + size);
|
||||
}
|
||||
StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
|
||||
res_offsets);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionStringRepeatOld : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "repeat";
|
||||
static FunctionPtr create() { return std::make_shared<FunctionStringRepeatOld>(); }
|
||||
String get_name() const override { return name; }
|
||||
size_t get_number_of_arguments() const override { return 2; }
|
||||
|
||||
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
||||
return make_nullable(std::make_shared<DataTypeString>());
|
||||
}
|
||||
|
||||
@ -19,8 +19,8 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar;
|
||||
|
||||
import org.apache.doris.catalog.FunctionSignature;
|
||||
import org.apache.doris.nereids.trees.expressions.Expression;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
|
||||
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
|
||||
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
|
||||
import org.apache.doris.nereids.types.IntegerType;
|
||||
@ -35,7 +35,7 @@ import java.util.List;
|
||||
* ScalarFunction 'repeat'. This class is generated by GenerateFunction.
|
||||
*/
|
||||
public class Repeat extends ScalarFunction
|
||||
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
|
||||
implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable {
|
||||
|
||||
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
|
||||
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, IntegerType.INSTANCE)
|
||||
|
||||
@ -1566,7 +1566,7 @@ visible_functions = {
|
||||
[['null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
|
||||
[['not_null_or_empty'], 'BOOLEAN', ['VARCHAR'], 'ALWAYS_NOT_NULLABLE'],
|
||||
[['space'], 'VARCHAR', ['INT'], ''],
|
||||
[['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'],
|
||||
[['repeat'], 'VARCHAR', ['VARCHAR', 'INT'], 'ALWAYS_NULLABLE'],
|
||||
[['lpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['rpad'], 'VARCHAR', ['VARCHAR', 'INT', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['append_trailing_char_if_absent'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
@ -1628,7 +1628,7 @@ visible_functions = {
|
||||
[['null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
|
||||
[['not_null_or_empty'], 'BOOLEAN', ['STRING'], 'ALWAYS_NOT_NULLABLE'],
|
||||
[['space'], 'STRING', ['INT'], ''],
|
||||
[['repeat'], 'STRING', ['STRING', 'INT'], 'DEPEND_ON_ARGUMENT'],
|
||||
[['repeat'], 'STRING', ['STRING', 'INT'], 'ALWAYS_NULLABLE'],
|
||||
[['lpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['rpad'], 'STRING', ['STRING', 'INT', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['append_trailing_char_if_absent'], 'STRING', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
|
||||
Reference in New Issue
Block a user