[Vectorized](function) support order by convert_to function (#14555)

This commit is contained in:
zhangstar333
2022-11-29 15:22:27 +08:00
committed by GitHub
parent facb7cf4e2
commit 7a08a799e9
8 changed files with 255 additions and 2 deletions

View File

@ -17,6 +17,13 @@
#pragma once
#include <iconv.h>
#include <stddef.h>
#include <memory>
#include "util/string_util.h"
#include "vec/columns/column.h"
#ifndef USE_LIBCPP
#include <memory_resource>
#define PMR std::pmr
@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl {
}
};
class FunctionConvertTo : public IFunction {
public:
static constexpr auto name = "convert_to";
static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
return std::make_shared<DataTypeString>();
}
bool use_default_implementation_for_constants() const override { return true; }
Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope != FunctionContext::THREAD_LOCAL) {
return Status::OK();
}
if (!context->is_col_constant(1)) {
return Status::InvalidArgument(
"character argument to convert function must be constant.");
}
const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
if (doris::iequal(character_data.to_string(), "gbk")) {
iconv_t cd = iconv_open("gb2312", "utf-8");
if (cd == nullptr) {
return Status::RuntimeError("function {} is convert to gbk failed in iconv_open",
get_name());
}
context->set_function_state(scope, cd);
} else {
return Status::RuntimeError(
"Illegal second argument column of function convert. now only support "
"convert to character set of gbk");
}
return Status::OK();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
ColumnPtr argument_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
const auto& str_offset = str_col->get_offsets();
const auto& str_chars = str_col->get_chars();
auto col_res = ColumnString::create();
auto& res_offset = col_res->get_offsets();
auto& res_chars = col_res->get_chars();
res_offset.resize(input_rows_count);
iconv_t cd = reinterpret_cast<iconv_t>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
DCHECK(cd != nullptr);
size_t in_len = 0, out_len = 0;
for (int i = 0; i < input_rows_count; ++i) {
in_len = str_offset[i] - str_offset[i - 1];
const char* value_data = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
res_chars.resize(res_offset[i - 1] + in_len);
char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
char* in = const_cast<char*>(value_data);
out_len = in_len;
if (iconv(cd, &in, &in_len, &out, &out_len) == -1) {
return Status::RuntimeError("function {} is convert to gbk failed in iconv",
get_name());
} else {
res_offset[i] = res_chars.size();
}
}
block.replace_by_position(result, std::move(col_res));
return Status::OK();
}
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope == FunctionContext::THREAD_LOCAL) {
iconv_t cd = reinterpret_cast<iconv_t>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
iconv_close(cd);
context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
}
return Status::OK();
}
};
} // namespace doris::vectorized