163 lines
6.3 KiB
C++
163 lines
6.3 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <fmt/core.h>
|
|
#include <rapidjson/document.h>
|
|
|
|
#include <sstream>
|
|
|
|
#include "udf/udf.h"
|
|
|
|
namespace doris {
|
|
|
|
enum JsonFunctionType {
|
|
JSON_FUN_INT = 0,
|
|
JSON_FUN_DOUBLE,
|
|
JSON_FUN_STRING,
|
|
|
|
JSON_FUN_UNKNOWN //The last
|
|
};
|
|
|
|
class Expr;
|
|
class OpcodeRegistry;
|
|
class TupleRow;
|
|
|
|
struct JsonPath {
|
|
std::string key; // key of a json object
|
|
int idx; // array index of a json array, -1 means not set, -2 means *
|
|
bool is_valid; // true if the path is successfully parsed
|
|
|
|
JsonPath(const std::string& key_, int idx_, bool is_valid_)
|
|
: key(key_), idx(idx_), is_valid(is_valid_) {}
|
|
|
|
JsonPath(std::string&& key_, int idx_, bool is_valid_)
|
|
: key(std::move(key_)), idx(idx_), is_valid(is_valid_) {}
|
|
|
|
std::string to_string() const {
|
|
std::stringstream ss;
|
|
if (!is_valid) {
|
|
return "INVALID";
|
|
}
|
|
if (!key.empty()) {
|
|
ss << key;
|
|
}
|
|
if (idx == -2) {
|
|
ss << "[*]";
|
|
} else if (idx > -1) {
|
|
ss << "[" << idx << "]";
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
std::string to_simdjson_pointer(bool* valid) const {
|
|
std::stringstream ss;
|
|
if (!is_valid) {
|
|
*valid = false;
|
|
return "";
|
|
}
|
|
ss << "/";
|
|
if (!key.empty()) {
|
|
ss << key;
|
|
}
|
|
if (idx == -2) {
|
|
// not support [*]
|
|
*valid = false;
|
|
return "";
|
|
} else if (idx > -1) {
|
|
ss << "/" << idx;
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
std::string debug_string() const {
|
|
return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid);
|
|
}
|
|
};
|
|
|
|
struct JsonState {
|
|
std::vector<JsonPath> json_paths;
|
|
rapidjson::Document document;
|
|
};
|
|
|
|
class JsonFunctions {
|
|
public:
|
|
static void init();
|
|
static doris_udf::IntVal get_json_int(doris_udf::FunctionContext* context,
|
|
const doris_udf::StringVal& json_str,
|
|
const doris_udf::StringVal& path);
|
|
static doris_udf::StringVal get_json_string(doris_udf::FunctionContext* context,
|
|
const doris_udf::StringVal& json_str,
|
|
const doris_udf::StringVal& path);
|
|
static doris_udf::DoubleVal get_json_double(doris_udf::FunctionContext* context,
|
|
const doris_udf::StringVal& json_str,
|
|
const doris_udf::StringVal& path);
|
|
|
|
static rapidjson::Value* get_json_object(FunctionContext* context, std::string_view json_string,
|
|
std::string_view path_string,
|
|
const JsonFunctionType& fntype,
|
|
rapidjson::Document* document);
|
|
|
|
static doris_udf::StringVal json_array(doris_udf::FunctionContext* context, int num_args,
|
|
const doris_udf::StringVal* json_str);
|
|
static doris_udf::StringVal json_object(doris_udf::FunctionContext* context, int num_args,
|
|
const doris_udf::StringVal* json_str);
|
|
static doris_udf::StringVal json_quote(doris_udf::FunctionContext* context,
|
|
const doris_udf::StringVal& json_str);
|
|
|
|
/**
|
|
* The `document` parameter must be has parsed.
|
|
* return Value Is Array object
|
|
* wrap_explicitly is set to true when the returned Array is wrapped actively.
|
|
*/
|
|
static rapidjson::Value* get_json_array_from_parsed_json(
|
|
const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly);
|
|
|
|
// this is only for test, it will parse the json path inside,
|
|
// so that we can easily pass a json path as string.
|
|
static rapidjson::Value* get_json_array_from_parsed_json(
|
|
const std::string& jsonpath, rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly);
|
|
|
|
static rapidjson::Value* get_json_object_from_parsed_json(
|
|
const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator);
|
|
|
|
static void json_path_prepare(doris_udf::FunctionContext*,
|
|
doris_udf::FunctionContext::FunctionStateScope);
|
|
|
|
static void json_path_close(doris_udf::FunctionContext*,
|
|
doris_udf::FunctionContext::FunctionStateScope);
|
|
|
|
static void parse_json_paths(const std::string& path_strings,
|
|
std::vector<JsonPath>* parsed_paths);
|
|
|
|
private:
|
|
static rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths,
|
|
rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator,
|
|
bool is_insert_null = false);
|
|
static void get_parsed_paths(const std::vector<std::string>& path_exprs,
|
|
std::vector<JsonPath>* parsed_paths);
|
|
static rapidjson::Value parse_str_with_flag(const StringVal& arg, const StringVal& flag,
|
|
const int num,
|
|
rapidjson::Document::AllocatorType& allocator);
|
|
};
|
|
} // namespace doris
|