[Vectorized](function) support order by convert_to function (#14555)
This commit is contained in:
@ -666,6 +666,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
|
||||
factory.register_function<FunctionLTrim>();
|
||||
factory.register_function<FunctionRTrim>();
|
||||
factory.register_function<FunctionTrim>();
|
||||
factory.register_function<FunctionConvertTo>();
|
||||
factory.register_function<FunctionSubstring<Substr3Impl>>();
|
||||
factory.register_function<FunctionSubstring<Substr2Impl>>();
|
||||
factory.register_function<FunctionLeft>();
|
||||
|
||||
@ -17,6 +17,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iconv.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "util/string_util.h"
|
||||
#include "vec/columns/column.h"
|
||||
#ifndef USE_LIBCPP
|
||||
#include <memory_resource>
|
||||
#define PMR std::pmr
|
||||
@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl {
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionConvertTo : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "convert_to";
|
||||
|
||||
static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
|
||||
|
||||
String get_name() const override { return name; }
|
||||
|
||||
size_t get_number_of_arguments() const override { return 2; }
|
||||
|
||||
DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
bool use_default_implementation_for_constants() const override { return true; }
|
||||
|
||||
Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
|
||||
if (scope != FunctionContext::THREAD_LOCAL) {
|
||||
return Status::OK();
|
||||
}
|
||||
if (!context->is_col_constant(1)) {
|
||||
return Status::InvalidArgument(
|
||||
"character argument to convert function must be constant.");
|
||||
}
|
||||
const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
|
||||
if (doris::iequal(character_data.to_string(), "gbk")) {
|
||||
iconv_t cd = iconv_open("gb2312", "utf-8");
|
||||
if (cd == nullptr) {
|
||||
return Status::RuntimeError("function {} is convert to gbk failed in iconv_open",
|
||||
get_name());
|
||||
}
|
||||
context->set_function_state(scope, cd);
|
||||
} else {
|
||||
return Status::RuntimeError(
|
||||
"Illegal second argument column of function convert. now only support "
|
||||
"convert to character set of gbk");
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) override {
|
||||
ColumnPtr argument_column =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
|
||||
const auto& str_offset = str_col->get_offsets();
|
||||
const auto& str_chars = str_col->get_chars();
|
||||
auto col_res = ColumnString::create();
|
||||
auto& res_offset = col_res->get_offsets();
|
||||
auto& res_chars = col_res->get_chars();
|
||||
res_offset.resize(input_rows_count);
|
||||
iconv_t cd = reinterpret_cast<iconv_t>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
DCHECK(cd != nullptr);
|
||||
|
||||
size_t in_len = 0, out_len = 0;
|
||||
for (int i = 0; i < input_rows_count; ++i) {
|
||||
in_len = str_offset[i] - str_offset[i - 1];
|
||||
const char* value_data = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
|
||||
res_chars.resize(res_offset[i - 1] + in_len);
|
||||
char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
|
||||
char* in = const_cast<char*>(value_data);
|
||||
out_len = in_len;
|
||||
if (iconv(cd, &in, &in_len, &out, &out_len) == -1) {
|
||||
return Status::RuntimeError("function {} is convert to gbk failed in iconv",
|
||||
get_name());
|
||||
} else {
|
||||
res_offset[i] = res_chars.size();
|
||||
}
|
||||
}
|
||||
block.replace_by_position(result, std::move(col_res));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
|
||||
if (scope == FunctionContext::THREAD_LOCAL) {
|
||||
iconv_t cd = reinterpret_cast<iconv_t>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
iconv_close(cd);
|
||||
context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
};
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -0,0 +1,73 @@
|
||||
---
|
||||
{
|
||||
"title": "convert_to",
|
||||
"language": "en"
|
||||
}
|
||||
---
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
<version since="1.2">
|
||||
|
||||
## convert_to
|
||||
### description
|
||||
#### Syntax
|
||||
|
||||
` convert_to(VARCHAR column, VARCHAR character)`
|
||||
|
||||
It is used in the order by clause. eg: order by convert(column using gbk), Now only support character can be converted to 'gbk'.
|
||||
Because when the order by column contains Chinese, it is not arranged in the order of Pinyin
|
||||
After the character encoding of column is converted to gbk, it can be arranged according to pinyin
|
||||
|
||||
</version>
|
||||
|
||||
### example
|
||||
|
||||
```
|
||||
mysql> select * from class_test order by class_name;
|
||||
+----------+------------+-------------+
|
||||
| class_id | class_name | student_ids |
|
||||
+----------+------------+-------------+
|
||||
| 6 | asd | [6] |
|
||||
| 7 | qwe | [7] |
|
||||
| 8 | z | [8] |
|
||||
| 2 | 哈 | [2] |
|
||||
| 3 | 哦 | [3] |
|
||||
| 1 | 啊 | [1] |
|
||||
| 4 | 张 | [4] |
|
||||
| 5 | 我 | [5] |
|
||||
+----------+------------+-------------+
|
||||
|
||||
mysql> select * from class_test order by convert(class_name using gbk);
|
||||
+----------+------------+-------------+
|
||||
| class_id | class_name | student_ids |
|
||||
+----------+------------+-------------+
|
||||
| 6 | asd | [6] |
|
||||
| 7 | qwe | [7] |
|
||||
| 8 | z | [8] |
|
||||
| 1 | 啊 | [1] |
|
||||
| 2 | 哈 | [2] |
|
||||
| 3 | 哦 | [3] |
|
||||
| 5 | 我 | [5] |
|
||||
| 4 | 张 | [4] |
|
||||
+----------+------------+-------------+
|
||||
```
|
||||
### keywords
|
||||
convert_to
|
||||
@ -412,6 +412,7 @@
|
||||
"sql-manual/sql-functions/string-functions/split_part",
|
||||
"sql-manual/sql-functions/string-functions/money_format",
|
||||
"sql-manual/sql-functions/string-functions/parse_url",
|
||||
"sql-manual/sql-functions/string-functions/convert_to",
|
||||
"sql-manual/sql-functions/string-functions/extract_url_parameter",
|
||||
"sql-manual/sql-functions/string-functions/uuid",
|
||||
"sql-manual/sql-functions/string-functions/space",
|
||||
|
||||
@ -0,0 +1,73 @@
|
||||
---
|
||||
{
|
||||
"title": "convert_to",
|
||||
"language": "zh-CN"
|
||||
}
|
||||
---
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
<version since="1.2">
|
||||
|
||||
## convert_to
|
||||
### description
|
||||
#### Syntax
|
||||
|
||||
` convert_to(VARCHAR column, VARCHAR character)`
|
||||
在order by子句中使用,例如order by convert(column using gbk), 现在仅支持character转为'gbk'.
|
||||
因为当order by column中包含中文时,其排列不是按照汉语拼音的顺序.
|
||||
将column的字符编码转为gbk后,可实现按拼音的排列的效果.
|
||||
|
||||
</version>
|
||||
|
||||
### example
|
||||
|
||||
```
|
||||
mysql> select * from class_test order by class_name;
|
||||
+----------+------------+-------------+
|
||||
| class_id | class_name | student_ids |
|
||||
+----------+------------+-------------+
|
||||
| 6 | asd | [6] |
|
||||
| 7 | qwe | [7] |
|
||||
| 8 | z | [8] |
|
||||
| 2 | 哈 | [2] |
|
||||
| 3 | 哦 | [3] |
|
||||
| 1 | 啊 | [1] |
|
||||
| 4 | 张 | [4] |
|
||||
| 5 | 我 | [5] |
|
||||
+----------+------------+-------------+
|
||||
|
||||
mysql> select * from class_test order by convert(class_name using gbk);
|
||||
+----------+------------+-------------+
|
||||
| class_id | class_name | student_ids |
|
||||
+----------+------------+-------------+
|
||||
| 6 | asd | [6] |
|
||||
| 7 | qwe | [7] |
|
||||
| 8 | z | [8] |
|
||||
| 1 | 啊 | [1] |
|
||||
| 2 | 哈 | [2] |
|
||||
| 3 | 哦 | [3] |
|
||||
| 5 | 我 | [5] |
|
||||
| 4 | 张 | [4] |
|
||||
+----------+------------+-------------+
|
||||
|
||||
```
|
||||
### keywords
|
||||
convert_to
|
||||
@ -5861,6 +5861,13 @@ non_pred_expr ::=
|
||||
{: RESULT = new CastExpr(targetType, e); :}
|
||||
| KW_KEY encryptkey_name:name
|
||||
{: RESULT = new EncryptKeyRef(name); :}
|
||||
| KW_CONVERT LPAREN expr:e KW_USING ident:character RPAREN
|
||||
{:
|
||||
ArrayList<Expr> exprs = new ArrayList<>();
|
||||
exprs.add(e);
|
||||
exprs.add(new StringLiteral(character));
|
||||
RESULT = new FunctionCallExpr("convert_to", new FunctionParams(exprs));
|
||||
:}
|
||||
;
|
||||
|
||||
expr_pipe_list ::=
|
||||
|
||||
@ -606,7 +606,6 @@ public class FunctionCallExpr extends Expr {
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (fnName.getFunction().equalsIgnoreCase("group_concat")) {
|
||||
if (children.size() - orderByElements.size() > 2 || children.isEmpty()) {
|
||||
throw new AnalysisException(
|
||||
@ -1229,7 +1228,13 @@ public class FunctionCallExpr extends Expr {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fnName.getFunction().equalsIgnoreCase("convert_to")) {
|
||||
if (children.size() < 2 || !getChild(1).isConstant()) {
|
||||
throw new AnalysisException(
|
||||
fnName.getFunction() + " needs two params, and the second is must be a constant: " + this
|
||||
.toSql());
|
||||
}
|
||||
}
|
||||
if (fn.getFunctionName().getFunction().equals("timediff")) {
|
||||
fn.getReturnType().getPrimitiveType().setTimeType();
|
||||
}
|
||||
|
||||
@ -2483,6 +2483,7 @@ visible_functions = [
|
||||
'', '', 'vec', 'ALWAYS_NULLABLE'],
|
||||
|
||||
# Utility functions
|
||||
[['convert_to'], 'VARCHAR', ['VARCHAR','VARCHAR'], '','', '', 'vec', ''],
|
||||
[['sleep'], 'BOOLEAN', ['INT'],
|
||||
'_ZN5doris16UtilityFunctions5sleepEPN9doris_udf15FunctionContextERKNS1_6IntValE',
|
||||
'', '', 'vec', ''],
|
||||
|
||||
Reference in New Issue
Block a user