[Vectorized](function) support order by convert_to function (#14555)

This commit is contained in:
zhangstar333
2022-11-29 15:22:27 +08:00
committed by GitHub
parent facb7cf4e2
commit 7a08a799e9
8 changed files with 255 additions and 2 deletions

View File

@ -666,6 +666,7 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_function<FunctionLTrim>();
factory.register_function<FunctionRTrim>();
factory.register_function<FunctionTrim>();
factory.register_function<FunctionConvertTo>();
factory.register_function<FunctionSubstring<Substr3Impl>>();
factory.register_function<FunctionSubstring<Substr2Impl>>();
factory.register_function<FunctionLeft>();

View File

@ -17,6 +17,13 @@
#pragma once
#include <iconv.h>
#include <stddef.h>
#include <memory>
#include "util/string_util.h"
#include "vec/columns/column.h"
#ifndef USE_LIBCPP
#include <memory_resource>
#define PMR std::pmr
@ -1950,4 +1957,89 @@ struct SubReplaceFourImpl {
}
};
class FunctionConvertTo : public IFunction {
public:
static constexpr auto name = "convert_to";
static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
return std::make_shared<DataTypeString>();
}
bool use_default_implementation_for_constants() const override { return true; }
Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope != FunctionContext::THREAD_LOCAL) {
return Status::OK();
}
if (!context->is_col_constant(1)) {
return Status::InvalidArgument(
"character argument to convert function must be constant.");
}
const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
if (doris::iequal(character_data.to_string(), "gbk")) {
iconv_t cd = iconv_open("gb2312", "utf-8");
if (cd == nullptr) {
return Status::RuntimeError("function {} is convert to gbk failed in iconv_open",
get_name());
}
context->set_function_state(scope, cd);
} else {
return Status::RuntimeError(
"Illegal second argument column of function convert. now only support "
"convert to character set of gbk");
}
return Status::OK();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
ColumnPtr argument_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
const auto& str_offset = str_col->get_offsets();
const auto& str_chars = str_col->get_chars();
auto col_res = ColumnString::create();
auto& res_offset = col_res->get_offsets();
auto& res_chars = col_res->get_chars();
res_offset.resize(input_rows_count);
iconv_t cd = reinterpret_cast<iconv_t>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
DCHECK(cd != nullptr);
size_t in_len = 0, out_len = 0;
for (int i = 0; i < input_rows_count; ++i) {
in_len = str_offset[i] - str_offset[i - 1];
const char* value_data = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
res_chars.resize(res_offset[i - 1] + in_len);
char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
char* in = const_cast<char*>(value_data);
out_len = in_len;
if (iconv(cd, &in, &in_len, &out, &out_len) == -1) {
return Status::RuntimeError("function {} is convert to gbk failed in iconv",
get_name());
} else {
res_offset[i] = res_chars.size();
}
}
block.replace_by_position(result, std::move(col_res));
return Status::OK();
}
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope == FunctionContext::THREAD_LOCAL) {
iconv_t cd = reinterpret_cast<iconv_t>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
iconv_close(cd);
context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
}
return Status::OK();
}
};
} // namespace doris::vectorized

View File

@ -0,0 +1,73 @@
---
{
"title": "convert_to",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<version since="1.2">
## convert_to
### description
#### Syntax
` convert_to(VARCHAR column, VARCHAR character)`
It is used in the order by clause. eg: order by convert(column using gbk), Now only support character can be converted to 'gbk'.
Because when the order by column contains Chinese, it is not arranged in the order of Pinyin
After the character encoding of column is converted to gbk, it can be arranged according to pinyin
</version>
### example
```
mysql> select * from class_test order by class_name;
+----------+------------+-------------+
| class_id | class_name | student_ids |
+----------+------------+-------------+
| 6 | asd | [6] |
| 7 | qwe | [7] |
| 8 | z | [8] |
| 2 | 哈 | [2] |
| 3 | 哦 | [3] |
| 1 | 啊 | [1] |
| 4 | 张 | [4] |
| 5 | 我 | [5] |
+----------+------------+-------------+
mysql> select * from class_test order by convert(class_name using gbk);
+----------+------------+-------------+
| class_id | class_name | student_ids |
+----------+------------+-------------+
| 6 | asd | [6] |
| 7 | qwe | [7] |
| 8 | z | [8] |
| 1 | 啊 | [1] |
| 2 | 哈 | [2] |
| 3 | 哦 | [3] |
| 5 | 我 | [5] |
| 4 | 张 | [4] |
+----------+------------+-------------+
```
### keywords
convert_to

View File

@ -412,6 +412,7 @@
"sql-manual/sql-functions/string-functions/split_part",
"sql-manual/sql-functions/string-functions/money_format",
"sql-manual/sql-functions/string-functions/parse_url",
"sql-manual/sql-functions/string-functions/convert_to",
"sql-manual/sql-functions/string-functions/extract_url_parameter",
"sql-manual/sql-functions/string-functions/uuid",
"sql-manual/sql-functions/string-functions/space",

View File

@ -0,0 +1,73 @@
---
{
"title": "convert_to",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<version since="1.2">
## convert_to
### description
#### Syntax
` convert_to(VARCHAR column, VARCHAR character)`
在order by子句中使用,例如order by convert(column using gbk), 现在仅支持character转为'gbk'.
因为当order by column中包含中文时,其排列不是按照汉语拼音的顺序.
将column的字符编码转为gbk后,可实现按拼音的排列的效果.
</version>
### example
```
mysql> select * from class_test order by class_name;
+----------+------------+-------------+
| class_id | class_name | student_ids |
+----------+------------+-------------+
| 6 | asd | [6] |
| 7 | qwe | [7] |
| 8 | z | [8] |
| 2 | 哈 | [2] |
| 3 | 哦 | [3] |
| 1 | 啊 | [1] |
| 4 | 张 | [4] |
| 5 | 我 | [5] |
+----------+------------+-------------+
mysql> select * from class_test order by convert(class_name using gbk);
+----------+------------+-------------+
| class_id | class_name | student_ids |
+----------+------------+-------------+
| 6 | asd | [6] |
| 7 | qwe | [7] |
| 8 | z | [8] |
| 1 | 啊 | [1] |
| 2 | 哈 | [2] |
| 3 | 哦 | [3] |
| 5 | 我 | [5] |
| 4 | 张 | [4] |
+----------+------------+-------------+
```
### keywords
convert_to

View File

@ -5861,6 +5861,13 @@ non_pred_expr ::=
{: RESULT = new CastExpr(targetType, e); :}
| KW_KEY encryptkey_name:name
{: RESULT = new EncryptKeyRef(name); :}
| KW_CONVERT LPAREN expr:e KW_USING ident:character RPAREN
{:
ArrayList<Expr> exprs = new ArrayList<>();
exprs.add(e);
exprs.add(new StringLiteral(character));
RESULT = new FunctionCallExpr("convert_to", new FunctionParams(exprs));
:}
;
expr_pipe_list ::=

View File

@ -606,7 +606,6 @@ public class FunctionCallExpr extends Expr {
}
return;
}
if (fnName.getFunction().equalsIgnoreCase("group_concat")) {
if (children.size() - orderByElements.size() > 2 || children.isEmpty()) {
throw new AnalysisException(
@ -1229,7 +1228,13 @@ public class FunctionCallExpr extends Expr {
}
}
}
if (fnName.getFunction().equalsIgnoreCase("convert_to")) {
if (children.size() < 2 || !getChild(1).isConstant()) {
throw new AnalysisException(
fnName.getFunction() + " needs two params, and the second is must be a constant: " + this
.toSql());
}
}
if (fn.getFunctionName().getFunction().equals("timediff")) {
fn.getReturnType().getPrimitiveType().setTimeType();
}

View File

@ -2483,6 +2483,7 @@ visible_functions = [
'', '', 'vec', 'ALWAYS_NULLABLE'],
# Utility functions
[['convert_to'], 'VARCHAR', ['VARCHAR','VARCHAR'], '','', '', 'vec', ''],
[['sleep'], 'BOOLEAN', ['INT'],
'_ZN5doris16UtilityFunctions5sleepEPN9doris_udf15FunctionContextERKNS1_6IntValE',
'', '', 'vec', ''],