From 1ed24117ac035ebdc15db64731751ef838432c13 Mon Sep 17 00:00:00 2001 From: Guangdong Liu Date: Mon, 5 Feb 2024 22:05:33 +0800 Subject: [PATCH] [function](url_decode)add url_decode function (#30667) --- be/src/vec/functions/function_string.cpp | 1 + be/src/vec/functions/function_string.h | 48 +++++++++++++ .../string-functions/url-decode.md | 54 ++++++++++++++ docs/sidebars.json | 1 + .../string-functions/url-decode.md | 54 ++++++++++++++ .../doris/catalog/BuiltinScalarFunctions.java | 2 + .../functions/scalar/UrlDecode.java | 70 +++++++++++++++++++ .../visitor/ScalarFunctionVisitor.java | 5 ++ gensrc/script/doris_builtins_functions.py | 4 +- .../nereids_function_p0/scalar_function/U.out | 12 ++++ .../scalar_function/U.groovy | 4 ++ 11 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 docs/en/docs/sql-manual/sql-functions/string-functions/url-decode.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/string-functions/url-decode.md create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UrlDecode.java diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index c5ce208d26..cce325e89e 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -1000,6 +1000,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); + factory.register_function(); factory.register_function>(); factory.register_function>(); factory.register_function>(); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 4794d28e0e..f57fe6d626 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -81,6 +81,7 @@ #include "util/md5.h" #include "util/simd/vstring_function.h" #include "util/sm3.h" +#include "util/url_coding.h" #include "util/url_parser.h" #include "vec/columns/column_array.h" #include "vec/columns/column_decimal.h" @@ -2831,6 +2832,53 @@ public: } }; +class FunctionUrlDecode : public IFunction { +public: + static constexpr auto name = "url_decode"; + static FunctionPtr create() { return std::make_shared(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 1; } + bool is_variadic() const override { return false; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared(); + } + + Status execute_impl(FunctionContext* context, Block& block, + + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) const override { + auto res = ColumnString::create(); + auto& res_offsets = res->get_offsets(); + auto& res_chars = res->get_chars(); + res_offsets.resize(input_rows_count); + + ColumnPtr argument_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const auto* url_col = check_and_get_column(argument_column.get()); + + if (!url_col) { + return Status::InternalError("Not supported input argument type"); + } + + std::string decoded_url; + + for (size_t i = 0; i < input_rows_count; ++i) { + auto source = url_col->get_data_at(i); + StringRef url_val(const_cast(source.data), source.size); + + url_decode(url_val.to_string(), &decoded_url); + + StringOP::push_value_string(decoded_url, i, res_chars, res_offsets); + decoded_url.clear(); + } + + block.get_by_position(result).column = std::move(res); + + return Status::OK(); + } +}; + template class FunctionMoneyFormat : public IFunction { public: diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/url-decode.md b/docs/en/docs/sql-manual/sql-functions/string-functions/url-decode.md new file mode 100644 index 0000000000..b09f0f4635 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/url-decode.md @@ -0,0 +1,54 @@ +--- +{ + "title": "url_decode", + "language": "en" +} +--- + + + +## url_decode +### description + +Converts an url to a decode string. + +#### Syntax + +```sql +VARCHAR url_decode(VARCHAR url) +``` + +### Parameters + +- url: the string to decode. If url is not a string type. + +### example + +``` +mysql> mysql> select url_decode('https%3A%2F%2Fdoris.apache.org%2Fzh-CN%2Fdocs%2Fsql-manual%2Fsql-functions%2Fstring-functions'); ++------------------------------------------------+ +| url_decode('https%3A%2F%2Fdoris.apache.org%2Fzh-CN%2Fdocs%2Fsql-manual%2Fsql-functions%2Fstring-functions') | ++------------------------------------------------+ +| https://doris.apache.org/zh-CN/docs/sql-manual/sql-functions/string-functions | ++------------------------------------------------+ +``` + +### keywords + URL DECODE diff --git a/docs/sidebars.json b/docs/sidebars.json index ea74a7a24f..da4c1020a1 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -494,6 +494,7 @@ "sql-manual/sql-functions/string-functions/substring-index", "sql-manual/sql-functions/string-functions/money-format", "sql-manual/sql-functions/string-functions/parse-url", + "sql-manual/sql-functions/string-functions/url-decode", "sql-manual/sql-functions/string-functions/convert-to", "sql-manual/sql-functions/string-functions/extract-url-parameter", "sql-manual/sql-functions/string-functions/uuid", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/url-decode.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/url-decode.md new file mode 100644 index 0000000000..8b171bf573 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/url-decode.md @@ -0,0 +1,54 @@ +--- +{ + "title": "url_decode", + "language": "en" +} +--- + + + +## url_decode +### description + +将URL转换为解码字符串。 + +#### Syntax + +```sql +VARCHAR url_decode(VARCHAR url) +``` + +### Parameters + +- url: 待解码的url。 + +### example + +``` +mysql> mysql> select url_decode('https%3A%2F%2Fdoris.apache.org%2Fzh-CN%2Fdocs%2Fsql-manual%2Fsql-functions%2Fstring-functions'); ++------------------------------------------------+ +| url_decode('https%3A%2F%2Fdoris.apache.org%2Fzh-CN%2Fdocs%2Fsql-manual%2Fsql-functions%2Fstring-functions') | ++------------------------------------------------+ +| https://doris.apache.org/zh-CN/docs/sql-manual/sql-functions/string-functions | ++------------------------------------------------+ +``` + +### keywords + URL DECODE diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index 1ca81a11c1..1ace763675 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -411,6 +411,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate; import org.apache.doris.nereids.trees.expressions.functions.scalar.Unhex; import org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.Upper; +import org.apache.doris.nereids.trees.expressions.functions.scalar.UrlDecode; import org.apache.doris.nereids.trees.expressions.functions.scalar.User; import org.apache.doris.nereids.trees.expressions.functions.scalar.UtcTimestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.Uuid; @@ -860,6 +861,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Unhex.class, "unhex"), scalar(UnixTimestamp.class, "unix_timestamp"), scalar(Upper.class, "ucase", "upper"), + scalar(UrlDecode.class, "url_decode"), scalar(User.class, "user"), scalar(UtcTimestamp.class, "utc_timestamp"), scalar(Uuid.class, "uuid"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UrlDecode.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UrlDecode.java new file mode 100644 index 0000000000..11194659b0 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UrlDecode.java @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'url_decode'. This class is generated by GenerateFunction. + */ +public class UrlDecode extends ScalarFunction + implements ExplicitlyCastableSignature, PropagateNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE) + ); + + /** + * constructor with 1 argument. + */ + public UrlDecode(Expression arg0) { + super("url_decode", arg0); + } + + + /** + * withChildren. + */ + @Override + public UrlDecode withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new UrlDecode(children.get(0)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitUrlDecode(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 0d5b63477c..9a1ed84048 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -401,6 +401,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate; import org.apache.doris.nereids.trees.expressions.functions.scalar.Unhex; import org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.Upper; +import org.apache.doris.nereids.trees.expressions.functions.scalar.UrlDecode; import org.apache.doris.nereids.trees.expressions.functions.scalar.User; import org.apache.doris.nereids.trees.expressions.functions.scalar.UtcTimestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.Uuid; @@ -1542,6 +1543,10 @@ public interface ScalarFunctionVisitor { return visitScalarFunction(parseUrl, context); } + default R visitUrlDecode(UrlDecode urlDecode, C context) { + return visitScalarFunction(urlDecode, context); + } + default R visitPassword(Password password, C context) { return visitScalarFunction(password, context); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index bd52ffe789..3fbe079eb3 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1609,6 +1609,7 @@ visible_functions = { [['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'], 'ALWAYS_NULLABLE'], [['substring_index'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'], 'DEPEND_ON_ARGUMENT'], [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], ''], + [['url_decode'], 'VARCHAR', ['VARCHAR'], ''], [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'], 'ALWAYS_NULLABLE'], [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT', 'INT'], 'ALWAYS_NULLABLE'], @@ -1662,7 +1663,8 @@ visible_functions = { [['money_format'], 'STRING', ['DECIMAL64'], ''], [['money_format'], 'STRING', ['DECIMAL128'], ''], [['split_part'], 'STRING', ['STRING', 'STRING', 'INT'], 'ALWAYS_NULLABLE'], - [['substring_index'], 'STRING', ['STRING', 'STRING', 'INT'], 'DEPEND_ON_ARGUMENT'] + [['substring_index'], 'STRING', ['STRING', 'STRING', 'INT'], 'DEPEND_ON_ARGUMENT'], + [['url_decode'], 'STRING', ['STRING'], ''] ], diff --git a/regression-test/data/nereids_function_p0/scalar_function/U.out b/regression-test/data/nereids_function_p0/scalar_function/U.out index 36700e53a9..a2900a0775 100644 --- a/regression-test/data/nereids_function_p0/scalar_function/U.out +++ b/regression-test/data/nereids_function_p0/scalar_function/U.out @@ -289,3 +289,15 @@ STRING3 STRING3 STRING3 +-- !sql_url_decode -- +https://doris.apache.org/zh-CN/docs/sql-manual/sql-functions/string-functions + +-- !sql_url_decode_empty -- + + +-- !sql_url_decode_null -- +\N + +-- !sql_url_decode_invalid_url -- +This is not a url + diff --git a/regression-test/suites/nereids_function_p0/scalar_function/U.groovy b/regression-test/suites/nereids_function_p0/scalar_function/U.groovy index d743771f79..47133a0a08 100644 --- a/regression-test/suites/nereids_function_p0/scalar_function/U.groovy +++ b/regression-test/suites/nereids_function_p0/scalar_function/U.groovy @@ -43,4 +43,8 @@ suite("nereids_scalar_fn_U") { qt_sql_upper_String_notnull "select upper(kstr) from fn_test_not_nullable order by kstr" sql "select user() from fn_test" sql "select user() from fn_test_not_nullable" + qt_sql_url_decode "select url_decode('https%3A%2F%2Fdoris.apache.org%2Fzh-CN%2Fdocs%2Fsql-manual%2Fsql-functions%2Fstring-functions')" + qt_sql_url_decode_empty "select url_decode('');" + qt_sql_url_decode_null "select url_decode(null);" + qt_sql_url_decode_invalid_url "select url_decode('This is not a url');" } \ No newline at end of file