[Vectorized](function) support order by convert_to function (#14555)
This commit is contained in:
@ -17,6 +17,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iconv.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "util/string_util.h"
|
||||
#include "vec/columns/column.h"
|
||||
#ifndef USE_LIBCPP
|
||||
#include <memory_resource>
|
||||
#define PMR std::pmr
|
||||
@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl {
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionConvertTo : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "convert_to";
|
||||
|
||||
static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
|
||||
|
||||
String get_name() const override { return name; }
|
||||
|
||||
size_t get_number_of_arguments() const override { return 2; }
|
||||
|
||||
DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
bool use_default_implementation_for_constants() const override { return true; }
|
||||
|
||||
Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
|
||||
if (scope != FunctionContext::THREAD_LOCAL) {
|
||||
return Status::OK();
|
||||
}
|
||||
if (!context->is_col_constant(1)) {
|
||||
return Status::InvalidArgument(
|
||||
"character argument to convert function must be constant.");
|
||||
}
|
||||
const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
|
||||
if (doris::iequal(character_data.to_string(), "gbk")) {
|
||||
iconv_t cd = iconv_open("gb2312", "utf-8");
|
||||
if (cd == nullptr) {
|
||||
return Status::RuntimeError("function {} is convert to gbk failed in iconv_open",
|
||||
get_name());
|
||||
}
|
||||
context->set_function_state(scope, cd);
|
||||
} else {
|
||||
return Status::RuntimeError(
|
||||
"Illegal second argument column of function convert. now only support "
|
||||
"convert to character set of gbk");
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) override {
|
||||
ColumnPtr argument_column =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
|
||||
const auto& str_offset = str_col->get_offsets();
|
||||
const auto& str_chars = str_col->get_chars();
|
||||
auto col_res = ColumnString::create();
|
||||
auto& res_offset = col_res->get_offsets();
|
||||
auto& res_chars = col_res->get_chars();
|
||||
res_offset.resize(input_rows_count);
|
||||
iconv_t cd = reinterpret_cast<iconv_t>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
DCHECK(cd != nullptr);
|
||||
|
||||
size_t in_len = 0, out_len = 0;
|
||||
for (int i = 0; i < input_rows_count; ++i) {
|
||||
in_len = str_offset[i] - str_offset[i - 1];
|
||||
const char* value_data = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
|
||||
res_chars.resize(res_offset[i - 1] + in_len);
|
||||
char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
|
||||
char* in = const_cast<char*>(value_data);
|
||||
out_len = in_len;
|
||||
if (iconv(cd, &in, &in_len, &out, &out_len) == -1) {
|
||||
return Status::RuntimeError("function {} is convert to gbk failed in iconv",
|
||||
get_name());
|
||||
} else {
|
||||
res_offset[i] = res_chars.size();
|
||||
}
|
||||
}
|
||||
block.replace_by_position(result, std::move(col_res));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
|
||||
if (scope == FunctionContext::THREAD_LOCAL) {
|
||||
iconv_t cd = reinterpret_cast<iconv_t>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
iconv_close(cd);
|
||||
context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
} // namespace doris::vectorized
|
||||
|
||||
Reference in New Issue
Block a user