remove duplicate type definition in function context remove unused method in function context not need stale state in vexpr context because vexpr is stateless and function context saves state and they are cloned. remove useless slot_size in all tuple or slot descriptor. remove doris_udf namespace, it is useless. remove some unused macro definitions. init v_conjuncts in vscanner, not need write the same code in every scanner. using unique ptr to manage function context since it could only belong to a single expr context. Issue Number: close #xxx --------- Co-authored-by: yiguolei <yiguolei@gmail.com>
112 lines
4.0 KiB
C++
112 lines
4.0 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <fmt/core.h>
|
|
#include <rapidjson/document.h>
|
|
#include <simdjson.h>
|
|
|
|
#include <sstream>
|
|
|
|
#include "common/status.h"
|
|
#include "udf/udf.h"
|
|
|
|
namespace doris {
|
|
|
|
enum JsonFunctionType {
|
|
JSON_FUN_INT = 0,
|
|
JSON_FUN_DOUBLE,
|
|
JSON_FUN_STRING,
|
|
|
|
JSON_FUN_UNKNOWN //The last
|
|
};
|
|
|
|
class OpcodeRegistry;
|
|
|
|
struct JsonPath {
|
|
std::string key; // key of a json object
|
|
int idx; // array index of a json array, -1 means not set, -2 means *
|
|
bool is_valid; // true if the path is successfully parsed
|
|
|
|
JsonPath(const std::string& key_, int idx_, bool is_valid_)
|
|
: key(key_), idx(idx_), is_valid(is_valid_) {}
|
|
|
|
JsonPath(std::string&& key_, int idx_, bool is_valid_)
|
|
: key(std::move(key_)), idx(idx_), is_valid(is_valid_) {}
|
|
|
|
std::string to_string() const {
|
|
std::stringstream ss;
|
|
if (!is_valid) {
|
|
return "INVALID";
|
|
}
|
|
if (!key.empty()) {
|
|
ss << key;
|
|
}
|
|
if (idx == -2) {
|
|
ss << "[*]";
|
|
} else if (idx > -1) {
|
|
ss << "[" << idx << "]";
|
|
}
|
|
return ss.str();
|
|
}
|
|
|
|
std::string debug_string() const {
|
|
return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid);
|
|
}
|
|
};
|
|
|
|
class JsonFunctions {
|
|
public:
|
|
/**
|
|
* The `document` parameter must be has parsed.
|
|
* return Value Is Array object
|
|
* wrap_explicitly is set to true when the returned Array is wrapped actively.
|
|
*/
|
|
static rapidjson::Value* get_json_array_from_parsed_json(
|
|
const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly);
|
|
|
|
// this is only for test, it will parse the json path inside,
|
|
// so that we can easily pass a json path as string.
|
|
static rapidjson::Value* get_json_array_from_parsed_json(
|
|
const std::string& jsonpath, rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly);
|
|
|
|
static rapidjson::Value* get_json_object_from_parsed_json(
|
|
const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator);
|
|
|
|
static void parse_json_paths(const std::string& path_strings,
|
|
std::vector<JsonPath>* parsed_paths);
|
|
// extract_from_object extracts value from object according to the json path.
|
|
// Now, we do not support complete functions of json path.
|
|
// Eg. city[*].id is not supported in this function
|
|
static Status extract_from_object(simdjson::ondemand::object& obj,
|
|
const std::vector<JsonPath>& jsonpath,
|
|
simdjson::ondemand::value* value) noexcept;
|
|
|
|
private:
|
|
static rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths,
|
|
rapidjson::Value* document,
|
|
rapidjson::Document::AllocatorType& mem_allocator,
|
|
bool is_insert_null = false);
|
|
static void get_parsed_paths(const std::vector<std::string>& path_exprs,
|
|
std::vector<JsonPath>* parsed_paths);
|
|
};
|
|
} // namespace doris
|