[function](json) Json unquote (#18037)

This commit is contained in:
Mellorsssss
2023-04-24 10:33:29 +08:00
committed by GitHub
parent 8d7a9fd21b
commit ab2a6864bc
7 changed files with 279 additions and 0 deletions

View File

@ -933,11 +933,81 @@ public:
}
};
class FunctionJsonUnquote : public IFunction {
public:
static constexpr auto name = "json_unquote";
static FunctionPtr create() { return std::make_shared<FunctionJsonUnquote>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeString>());
}
bool use_default_implementation_for_nulls() const override { return false; }
bool use_default_implementation_for_constants() const override { return true; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
const IColumn& col_from = *(block.get_by_position(arguments[0]).column);
auto null_map = ColumnUInt8::create(input_rows_count, 0);
const ColumnString* col_from_string = check_and_get_column<ColumnString>(col_from);
if (auto* nullable = check_and_get_column<ColumnNullable>(col_from)) {
col_from_string =
check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
}
if (!col_from_string) {
return Status::RuntimeError("Illegal column {} should be ColumnString",
col_from.get_name());
}
auto col_to = ColumnString::create();
col_to->reserve(input_rows_count);
// parser can be reused for performance
rapidjson::Document document;
for (size_t i = 0; i < input_rows_count; ++i) {
if (col_from.is_null_at(i)) {
null_map->get_data()[i] = 1;
col_to->insert_data(nullptr, 0);
continue;
}
const auto& json_str = col_from_string->get_data_at(i);
if (json_str.size < 2 || json_str.data[0] != '"' ||
json_str.data[json_str.size - 1] != '"') {
// non-quoted string
col_to->insert_data(json_str.data, json_str.size);
} else {
document.Parse(json_str.data, json_str.size);
if (document.HasParseError() || !document.IsString()) {
return Status::RuntimeError(
fmt::format("Invalid JSON text in argument 1 to function {}: {}", name,
std::string_view(json_str.data, json_str.size)));
}
col_to->insert_data(document.GetString(), document.GetStringLength());
}
}
block.replace_by_position(result,
ColumnNullable::create(std::move(col_to), std::move(null_map)));
return Status::OK();
}
};
void register_function_json(SimpleFunctionFactory& factory) {
factory.register_function<FunctionGetJsonInt>();
factory.register_function<FunctionGetJsonBigInt>();
factory.register_function<FunctionGetJsonDouble>();
factory.register_function<FunctionGetJsonString>();
factory.register_function<FunctionJsonUnquote>();
factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonArrayImpl>>();
factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonObjectImpl>>();

View File

@ -0,0 +1,83 @@
---
{
"title": "json_unquote",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## json_unquote
### Description
#### Syntax
`VARCHAR json_ununquote(VARCHAR)`
This function unquotes a JSON value and returns the result as a utf8mb4 string. If the argument is NULL, it will return NULL.
Escape sequences within a string as shown in the following table will be recognized. Backslashes will be ignored for all other escape sequences.
| Escape Sequence | Character Represented by Sequence |
|-----------------|------------------------------------|
| \" | A double quote (") character |
| \b | A backspace character |
| \f | A formfeed character |
| \n | A newline (linefeed) character |
| \r | A carriage return character |
| \t | A tab character |
| \\ | A backslash (\) character |
| \uxxxx | UTF-8 bytes for Unicode value XXXX |
### example
```
mysql> SELECT json_unquote('"doris"');
+-------------------------+
| json_unquote('"doris"') |
+-------------------------+
| doris |
+-------------------------+
mysql> SELECT json_unquote('[1, 2, 3]');
+---------------------------+
| json_unquote('[1, 2, 3]') |
+---------------------------+
| [1, 2, 3] |
+---------------------------+
mysql> SELECT json_unquote(null);
+--------------------+
| json_unquote(NULL) |
+--------------------+
| NULL |
+--------------------+
mysql> SELECT json_unquote('"\\ttest"');
+--------------------------+
| json_unquote('"\ttest"') |
+--------------------------+
| test |
+--------------------------+
```
### keywords
json,unquote,json_unquote

View File

@ -598,6 +598,7 @@
"sql-manual/sql-functions/json-functions/json_array",
"sql-manual/sql-functions/json-functions/json_object",
"sql-manual/sql-functions/json-functions/json_quote",
"sql-manual/sql-functions/json-functions/json_unquote",
"sql-manual/sql-functions/json-functions/json_valid",
"sql-manual/sql-functions/json-functions/json_extract"
]

View File

@ -0,0 +1,83 @@
---
{
"title": "json_unquote",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## json_unquote
### Description
#### Syntax
`VARCHAR json_ununquote(VARCHAR)`
这个函数将去掉JSON值中的引号,并将结果作为utf8mb4字符串返回。如果参数为NULL,则返回NULL。
在字符串中显示的如下转义序列将被识别,对于所有其他转义序列,反斜杠将被忽略。
| 转义序列 | 序列表示的字符 |
|----------|-------------------------------|
| \" | 双引号 " |
| \b | 退格字符 |
| \f | 换页符 |
| \n | 换行符 |
| \r | 回车符 |
| \t | 制表符 |
| \\ | 反斜杠 \ |
| \uxxxx | Unicode 值 XXXX 的 UTF-8 字节 |
### example
```
mysql> SELECT json_unquote('"doris"');
+-------------------------+
| json_unquote('"doris"') |
+-------------------------+
| doris |
+-------------------------+
mysql> SELECT json_unquote('[1, 2, 3]');
+---------------------------+
| json_unquote('[1, 2, 3]') |
+---------------------------+
| [1, 2, 3] |
+---------------------------+
mysql> SELECT json_unquote(null);
+--------------------+
| json_unquote(NULL) |
+--------------------+
| NULL |
+--------------------+
mysql> SELECT json_unquote('"\\ttest"');
+--------------------------+
| json_unquote('"\ttest"') |
+--------------------------+
| test |
+--------------------------+
```
### keywords
json,unquote,json_unquote

View File

@ -1655,6 +1655,7 @@ visible_functions = [
[['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],
[['json_quote'], 'VARCHAR', ['VARCHAR'], ''],
[['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_unquote'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], ''],
#hll function

View File

@ -77,6 +77,36 @@ v1
-- !sql --
"\\n\\b\\r\\t"
-- !sql --
""
-- !sql --
-- !sql --
doris
-- !sql --
doris
-- !sql --
open-quoted"
-- !sql --
"open-quoted
-- !sql --
\N
-- !sql --
Dorris\ ishere\n
-- !sql --
Dorris\ ishere\n
-- !sql --
DORIS
-- !sql --
2

View File

@ -48,6 +48,17 @@ suite("test_json_function") {
qt_sql "SELECT json_quote('[1, 2, 3, 1678708107000]');"
qt_sql "SELECT json_quote(null);"
qt_sql "SELECT json_quote(\"\\n\\b\\r\\t\");"
qt_sql "SELECT json_quote('')"
qt_sql "SELECT json_unquote('')"
qt_sql "SELECT json_unquote('doris')"
qt_sql "SELECT json_unquote('\"doris\"');"
qt_sql "SELECT json_unquote('open-quoted\"');"
qt_sql "SELECT json_unquote('\"open-quoted');"
qt_sql "SELECT json_unquote(null);"
qt_sql "SELECT json_unquote('Dorr\bis\tishere\n');"
qt_sql "SELECT json_unquote('\"Dorr\\\\bis\\\\tishere\\\\n\"');"
qt_sql "SELECT json_unquote('\"\\\\u0044\\\\u004F\\\\u0052\\\\u0049\\\\u0053\"');"
qt_sql "SELECT json_extract('[1, 2, 3]', '\$.[1]');"
qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name');"