Optimize json functions (#1177)
1. get_json_xxx() now support using quoto to escape dot 2. Implement json_path_prepare() function to preprocess json_path Performance of get_json_string() on 1000000 rows reduces from 2.27s to 0.27s
This commit is contained in:
@ -25,6 +25,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <rapidjson/stringbuffer.h>
|
||||
#include <rapidjson/document.h>
|
||||
#include <rapidjson/writer.h>
|
||||
@ -40,7 +41,9 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
static const re2::RE2 JSON_PATTERN("^([a-zA-Z0-9_\\-\\:\\s]*)(?:\\[([0-9]+)\\])?");
|
||||
// static const re2::RE2 JSON_PATTERN("^([a-zA-Z0-9_\\-\\:\\s#\\|\\.]*)(?:\\[([0-9]+)\\])?");
|
||||
// json path cannot contains: ", [, ]
|
||||
static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+)\\])?");
|
||||
|
||||
void JsonFunctions::init() {
|
||||
}
|
||||
@ -54,7 +57,7 @@ IntVal JsonFunctions::get_json_int(
|
||||
std::string path_string((char*)path.ptr, path.len);
|
||||
rapidjson::Document document;
|
||||
rapidjson::Value* root =
|
||||
get_json_object(json_string, path_string, JSON_FUN_INT, &document);
|
||||
get_json_object(context, json_string, path_string, JSON_FUN_INT, &document);
|
||||
if (root->IsInt()) {
|
||||
return IntVal(root->GetInt());
|
||||
} else {
|
||||
@ -67,11 +70,12 @@ StringVal JsonFunctions::get_json_string(
|
||||
if (json_str.is_null || path.is_null) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
std::string json_string((char*)json_str.ptr, json_str.len);
|
||||
std::string path_string((char*)path.ptr, path.len);
|
||||
rapidjson::Document document;
|
||||
rapidjson::Value* root =
|
||||
get_json_object(json_string, path_string, JSON_FUN_STRING, &document);
|
||||
get_json_object(context, json_string, path_string, JSON_FUN_STRING, &document);
|
||||
if (root->IsNull()) {
|
||||
return StringVal::null();
|
||||
} else if (root->IsString()) {
|
||||
@ -93,7 +97,7 @@ DoubleVal JsonFunctions::get_json_double(
|
||||
std::string path_string((char*)path.ptr, path.len);
|
||||
rapidjson::Document document;
|
||||
rapidjson::Value* root =
|
||||
get_json_object(json_string, path_string, JSON_FUN_DOUBLE, &document);
|
||||
get_json_object(context, json_string, path_string, JSON_FUN_DOUBLE, &document);
|
||||
if (root->IsInt()) {
|
||||
return DoubleVal(static_cast<double>(root->GetInt()));
|
||||
} else if (root->IsDouble()) {
|
||||
@ -104,18 +108,41 @@ DoubleVal JsonFunctions::get_json_double(
|
||||
}
|
||||
|
||||
rapidjson::Value* JsonFunctions::get_json_object(
|
||||
FunctionContext* context,
|
||||
const std::string& json_string,
|
||||
const std::string& path_string,
|
||||
const JsonFunctionType& fntype,
|
||||
rapidjson::Document* document) {
|
||||
std::vector<std::string> path_exprs;
|
||||
boost::split(path_exprs, path_string, boost::is_any_of("."));
|
||||
|
||||
if (path_exprs[0] != "$") {
|
||||
// split path by ".", and escape quota by "\"
|
||||
// eg:
|
||||
// '$.text#abc.xyz' -> [$, text#abc, xyz]
|
||||
// '$."text.abc".xyz' -> [$, text.abc, xyz]
|
||||
// '$."text.abc"[1].xyz' -> [$, text.abc[1], xyz]
|
||||
std::vector<JsonPath>* parsed_paths;
|
||||
std::vector<JsonPath> tmp_parsed_paths;
|
||||
#ifndef BE_TEST
|
||||
parsed_paths = reinterpret_cast<std::vector<JsonPath>*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
if (parsed_paths == nullptr) {
|
||||
boost::tokenizer<boost::escaped_list_separator<char> > tok(path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
|
||||
std::vector<std::string> paths(tok.begin(), tok.end());
|
||||
get_parsed_paths(paths, &tmp_parsed_paths);
|
||||
parsed_paths = &tmp_parsed_paths;
|
||||
}
|
||||
#else
|
||||
boost::tokenizer<boost::escaped_list_separator<char> > tok(path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
|
||||
std::vector<std::string> paths(tok.begin(), tok.end());
|
||||
get_parsed_paths(paths, &tmp_parsed_paths);
|
||||
parsed_paths = &tmp_parsed_paths;
|
||||
#endif
|
||||
|
||||
VLOG(10) << "first parsed path: " << (*parsed_paths)[0].debug_string();
|
||||
|
||||
if (!(*parsed_paths)[0].is_valid) {
|
||||
return document;
|
||||
}
|
||||
|
||||
if (UNLIKELY(path_exprs.size() == 1)) {
|
||||
if (UNLIKELY((*parsed_paths).size() == 1)) {
|
||||
if (fntype == JSON_FUN_STRING) {
|
||||
document->SetString(json_string.c_str(), document->GetAllocator());
|
||||
} else {
|
||||
@ -134,18 +161,19 @@ rapidjson::Value* JsonFunctions::get_json_object(
|
||||
|
||||
rapidjson::Value* root = document;
|
||||
rapidjson::Value* array_obj = NULL;
|
||||
// eg: list[0],use regex parse path_string's result is 'list'
|
||||
std::string col;
|
||||
std::string index;
|
||||
for (int i = 1; i < path_exprs.size(); i++) {
|
||||
for (int i = 1; i < (*parsed_paths).size(); i++) {
|
||||
VLOG(10) << "parsed_paths: " << (*parsed_paths)[i].debug_string();
|
||||
|
||||
if (root->IsNull()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index))) {
|
||||
if (UNLIKELY(!(*parsed_paths)[i].is_valid)) {
|
||||
root->SetNull();
|
||||
}
|
||||
|
||||
std::string& col = (*parsed_paths)[i].key;
|
||||
int index = (*parsed_paths)[i].idx;
|
||||
if (LIKELY(!col.empty())) {
|
||||
if (root->IsArray()) {
|
||||
array_obj = static_cast<rapidjson::Value*>(
|
||||
@ -190,15 +218,14 @@ rapidjson::Value* JsonFunctions::get_json_object(
|
||||
}
|
||||
}
|
||||
|
||||
if (UNLIKELY(!index.empty())) {
|
||||
if (UNLIKELY(index != -1)) {
|
||||
// judge the rapidjson:Value, which base the top's result,
|
||||
// if not array return NULL;else get the index value from the array
|
||||
if (root->IsArray()) {
|
||||
int index_match = atoi(index.c_str());
|
||||
if (root->IsNull() || index_match >= root->Size()) {
|
||||
if (root->IsNull() || index >= root->Size()) {
|
||||
root->SetNull();
|
||||
} else {
|
||||
root = &((*root)[index_match]);
|
||||
root = &((*root)[index]);
|
||||
}
|
||||
} else {
|
||||
root->SetNull();
|
||||
@ -209,5 +236,69 @@ rapidjson::Value* JsonFunctions::get_json_object(
|
||||
return root;
|
||||
}
|
||||
|
||||
void JsonFunctions::json_path_prepare(
|
||||
doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!context->is_arg_constant(1)) {
|
||||
return;
|
||||
}
|
||||
StringVal* path = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
|
||||
if (path->is_null) {
|
||||
return;
|
||||
}
|
||||
|
||||
boost::tokenizer<boost::escaped_list_separator<char> > tok(
|
||||
std::string(reinterpret_cast<char*>(path->ptr), path->len),
|
||||
boost::escaped_list_separator<char>("\\", ".", "\""));
|
||||
std::vector<std::string> path_exprs(tok.begin(), tok.end());
|
||||
std::vector<JsonPath>* parsed_paths = new std::vector<JsonPath>();
|
||||
get_parsed_paths(path_exprs, parsed_paths);
|
||||
|
||||
context->set_function_state(scope, parsed_paths);
|
||||
VLOG(10) << "prepare json path. size: " << parsed_paths->size();
|
||||
}
|
||||
|
||||
void JsonFunctions::get_parsed_paths(
|
||||
const std::vector<std::string>& path_exprs,
|
||||
std::vector<JsonPath>* parsed_paths) {
|
||||
|
||||
if (path_exprs[0] != "$") {
|
||||
parsed_paths->emplace_back("", -1, false);
|
||||
} else {
|
||||
parsed_paths->emplace_back("$", -1, true);
|
||||
}
|
||||
|
||||
for (int i = 1; i < path_exprs.size(); i++) {
|
||||
std::string col;
|
||||
std::string index;
|
||||
if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index))) {
|
||||
parsed_paths->emplace_back("", -1, false);
|
||||
} else {
|
||||
int idx = -1;
|
||||
if (!index.empty()) {
|
||||
idx = atoi(index.c_str());
|
||||
}
|
||||
parsed_paths->emplace_back(col, idx, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void JsonFunctions::json_path_close(
|
||||
doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL) {
|
||||
return;
|
||||
}
|
||||
std::vector<JsonPath>* parsed_paths = reinterpret_cast<std::vector<JsonPath>*>(context->get_function_state(scope));
|
||||
if (parsed_paths != nullptr) {
|
||||
delete parsed_paths;
|
||||
VLOG(10) << "close json path";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -31,6 +31,23 @@ class Expr;
|
||||
class OpcodeRegistry;
|
||||
class TupleRow;
|
||||
|
||||
struct JsonPath {
|
||||
std::string key; // key of a json object
|
||||
int idx; // array index of a json array, -1 means not set
|
||||
bool is_valid; // true if the path is successfully parsed
|
||||
|
||||
JsonPath(const std::string& key_, int idx_, bool is_valid_):
|
||||
key(key_),
|
||||
idx(idx_),
|
||||
is_valid(is_valid_) {}
|
||||
|
||||
std::string debug_string() {
|
||||
std::stringstream ss;
|
||||
ss << "key: " << key << ", idx: " << idx << ", valid: " << is_valid;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
class JsonFunctions {
|
||||
public:
|
||||
static void init();
|
||||
@ -45,8 +62,22 @@ public:
|
||||
const doris_udf::StringVal& path);
|
||||
|
||||
static rapidjson::Value* get_json_object(
|
||||
FunctionContext* context,
|
||||
const std::string& json_string, const std::string& path_string,
|
||||
const JsonFunctionType& fntype, rapidjson::Document* document);
|
||||
|
||||
static void json_path_prepare(
|
||||
doris_udf::FunctionContext*,
|
||||
doris_udf::FunctionContext::FunctionStateScope);
|
||||
|
||||
static void json_path_close(
|
||||
doris_udf::FunctionContext*,
|
||||
doris_udf::FunctionContext::FunctionStateScope);
|
||||
private:
|
||||
|
||||
static void get_parsed_paths(
|
||||
const std::vector<std::string>& path_exprs,
|
||||
std::vector<JsonPath>* parsed_paths);
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -69,6 +69,8 @@ Status KafkaDataConsumer::init(StreamLoadContext* ctx) {
|
||||
// TODO: set it larger than 0 after we set rd_kafka_conf_set_stats_cb()
|
||||
RETURN_IF_ERROR(set_conf("statistics.interval.ms", "0"));
|
||||
RETURN_IF_ERROR(set_conf("auto.offset.reset", "error"));
|
||||
RETURN_IF_ERROR(set_conf("api.version.request", "true"));
|
||||
RETURN_IF_ERROR(set_conf("api.version.fallback.ms", "0"));
|
||||
|
||||
if (conf->set("event_cb", &_k_event_cb, errstr) != RdKafka::Conf::CONF_OK) {
|
||||
std::stringstream ss;
|
||||
|
||||
@ -18,6 +18,8 @@
|
||||
#include "exprs/json_functions.h"
|
||||
|
||||
#include <string>
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <gtest/gtest.h>
|
||||
#include <rapidjson/stringbuffer.h>
|
||||
#include <rapidjson/document.h>
|
||||
@ -27,6 +29,7 @@
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "common/object_pool.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/stopwatch.hpp"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -42,14 +45,14 @@ TEST_F(JsonFunctionTest, string)
|
||||
std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}");
|
||||
std::string path_string("$.id");
|
||||
rapidjson::Document document1;
|
||||
rapidjson::Value* res1 = JsonFunctions::get_json_object(json_string, path_string,
|
||||
rapidjson::Value* res1 = JsonFunctions::get_json_object(nullptr, json_string, path_string,
|
||||
JSON_FUN_STRING, &document1);
|
||||
ASSERT_EQ(std::string(res1->GetString()), "name");
|
||||
|
||||
std::string json_string2("{\"price a\": [0,1,2],\"couponFee\":0}");
|
||||
std::string path_string2("$.price a");
|
||||
rapidjson::Document document2;
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(json_string2, path_string2,
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2,
|
||||
JSON_FUN_STRING, &document2);
|
||||
rapidjson::StringBuffer buf2;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer2(buf2);
|
||||
@ -59,7 +62,7 @@ TEST_F(JsonFunctionTest, string)
|
||||
std::string json_string3("{\"price a\": [],\"couponFee\":0}");
|
||||
std::string path_string3("$.price a");
|
||||
rapidjson::Document document3;
|
||||
rapidjson::Value* res3 = JsonFunctions::get_json_object(json_string3, path_string3,
|
||||
rapidjson::Value* res3 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3,
|
||||
JSON_FUN_STRING, &document3);
|
||||
rapidjson::StringBuffer buf3;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer3(buf3);
|
||||
@ -69,16 +72,16 @@ TEST_F(JsonFunctionTest, string)
|
||||
std::string json_string4("{\"price a\": [],\"couponFee\":null}");
|
||||
std::string path_string4("$.couponFee");
|
||||
rapidjson::Document document4;
|
||||
rapidjson::Value* res4 = JsonFunctions::get_json_object(json_string4, path_string4,
|
||||
rapidjson::Value* res4 = JsonFunctions::get_json_object(nullptr, json_string4, path_string4,
|
||||
JSON_FUN_STRING, &document4);
|
||||
ASSERT_TRUE(res4->IsNull());
|
||||
|
||||
std::string json_string5("{\"blockNames\": {},"
|
||||
std::string json_string5("{\"blockNames\": {},"
|
||||
"\"seatCategories\": [{\"areas\": [{\"areaId\": 205705999,\"blockIds\": []},"
|
||||
"{\"areaId\": 205705998,\"blockIds\": []}],\"seatCategoryId\": 338937290}]}");
|
||||
std::string path_string5_1("$.blockNames");
|
||||
rapidjson::Document document5_1;
|
||||
rapidjson::Value* res5_1 = JsonFunctions::get_json_object(json_string5, path_string5_1,
|
||||
rapidjson::Value* res5_1 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_1,
|
||||
JSON_FUN_STRING, &document5_1);
|
||||
rapidjson::StringBuffer buf5_1;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer5_1(buf5_1);
|
||||
@ -87,7 +90,7 @@ TEST_F(JsonFunctionTest, string)
|
||||
|
||||
std::string path_string5_2("$.seatCategories.areas.blockIds");
|
||||
rapidjson::Document document5_2;
|
||||
rapidjson::Value* res5_2 = JsonFunctions::get_json_object(json_string5, path_string5_2,
|
||||
rapidjson::Value* res5_2 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_2,
|
||||
JSON_FUN_STRING, &document5_2);
|
||||
rapidjson::StringBuffer buf5_2;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer5_2(buf5_2);
|
||||
@ -96,7 +99,7 @@ TEST_F(JsonFunctionTest, string)
|
||||
|
||||
std::string path_string5_3("$.seatCategories.areas[0].areaId");
|
||||
rapidjson::Document document5_3;
|
||||
rapidjson::Value* res5_3 = JsonFunctions::get_json_object(json_string5, path_string5_3,
|
||||
rapidjson::Value* res5_3 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_3,
|
||||
JSON_FUN_STRING, &document5_2);
|
||||
rapidjson::StringBuffer buf5_3;
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer5_3(buf5_3);
|
||||
@ -109,7 +112,7 @@ TEST_F(JsonFunctionTest, int)
|
||||
std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}");
|
||||
std::string path_string("$.age");
|
||||
rapidjson::Document document;
|
||||
rapidjson::Value* res = JsonFunctions::get_json_object(json_string, path_string,
|
||||
rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string,
|
||||
JSON_FUN_INT, &document);
|
||||
ASSERT_EQ(res->GetInt(), 11);
|
||||
|
||||
@ -118,27 +121,27 @@ TEST_F(JsonFunctionTest, int)
|
||||
"{\"id\":[{\"kk\":\"cc\"}]}]}");
|
||||
std::string path_string1("$.list.id.aa[0]");
|
||||
rapidjson::Document document1;
|
||||
rapidjson::Value* res1 = JsonFunctions::get_json_object(json_string1, path_string1,
|
||||
rapidjson::Value* res1 = JsonFunctions::get_json_object(nullptr, json_string1, path_string1,
|
||||
JSON_FUN_INT, &document1);
|
||||
ASSERT_EQ(res1->GetInt(), 1);
|
||||
|
||||
std::string json_string2("[1,2,3,5,8,0]");
|
||||
std::string path_string2("$.[3]");
|
||||
rapidjson::Document document2;
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(json_string2, path_string2,
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2,
|
||||
JSON_FUN_INT, &document2);
|
||||
ASSERT_EQ(res2->GetInt(), 5);
|
||||
|
||||
std::string json_string3("{\"price a\": [0,1,2],\"couponFee\":0.0}");
|
||||
std::string path_string3_1("$.price a[3]");
|
||||
rapidjson::Document document3_1;
|
||||
rapidjson::Value* res3_1 = JsonFunctions::get_json_object(json_string3, path_string3_1,
|
||||
rapidjson::Value* res3_1 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3_1,
|
||||
JSON_FUN_INT, &document3_1);
|
||||
ASSERT_TRUE(res3_1->IsNull());
|
||||
|
||||
std::string path_string3_2("$.couponFee");
|
||||
rapidjson::Document document3_2;
|
||||
rapidjson::Value* res3_2 = JsonFunctions::get_json_object(json_string3, path_string3_2,
|
||||
rapidjson::Value* res3_2 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3_2,
|
||||
JSON_FUN_INT, &document3_2);
|
||||
ASSERT_FALSE(res3_2->IsInt());
|
||||
}
|
||||
@ -148,17 +151,41 @@ TEST_F(JsonFunctionTest, double)
|
||||
std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}");
|
||||
std::string path_string("$.money");
|
||||
rapidjson::Document document;
|
||||
rapidjson::Value* res = JsonFunctions::get_json_object(json_string, path_string,
|
||||
rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string,
|
||||
JSON_FUN_DOUBLE, &document);
|
||||
ASSERT_EQ(res->GetDouble(), 123000.789);
|
||||
|
||||
std::string path_string2("$.age");
|
||||
rapidjson::Document document2;
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(json_string, path_string2,
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string, path_string2,
|
||||
JSON_FUN_DOUBLE, &document2);
|
||||
ASSERT_EQ(res2->GetInt(), 11);
|
||||
}
|
||||
|
||||
TEST_F(JsonFunctionTest, special_char)
|
||||
{
|
||||
std::string json_string("{\"key with.dot\": [\"v1\", \"v2\"]}");
|
||||
std::string path_string("$.\"key with.dot\"[1]");
|
||||
rapidjson::Document document;
|
||||
rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, JSON_FUN_DOUBLE, &document);
|
||||
ASSERT_FALSE(res->GetString() == nullptr);
|
||||
ASSERT_EQ(std::string(res->GetString()), "v2");
|
||||
|
||||
std::string json_string2("{\"key with|\": [\"v1\", \"v2\"]}");
|
||||
std::string path_string2("$.key with|[0]");
|
||||
rapidjson::Document document2;
|
||||
rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, JSON_FUN_DOUBLE, &document2);
|
||||
ASSERT_FALSE(res2->GetString() == nullptr);
|
||||
ASSERT_EQ(std::string(res2->GetString()), "v1");
|
||||
|
||||
std::string json_string3("{\"key with.dot\": [{\"key2.dot\":\"v1\"}, {\"key3.dot\":\"v2\"}]}");
|
||||
std::string path_string3("$.\"key with.dot\"[0].\"key2.dot\"");
|
||||
rapidjson::Document document3;
|
||||
rapidjson::Value* res3 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3, JSON_FUN_DOUBLE, &document3);
|
||||
ASSERT_FALSE(res3->GetString() == nullptr);
|
||||
ASSERT_EQ(std::string(res3->GetString()), "v1");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
@ -78,7 +78,7 @@ public:
|
||||
~HttpClientTest() override { }
|
||||
|
||||
static void SetUpTestCase() {
|
||||
s_server = new EvHttpServer(29386);
|
||||
s_server = new EvHttpServer(29998);
|
||||
s_server->register_handler(GET, "/simple_get", &s_simple_get_handler);
|
||||
s_server->register_handler(HEAD, "/simple_get", &s_simple_get_handler);
|
||||
s_server->register_handler(POST, "/simple_post", &s_simple_post_handler);
|
||||
@ -92,7 +92,7 @@ public:
|
||||
|
||||
TEST_F(HttpClientTest, get_normal) {
|
||||
HttpClient client;
|
||||
auto st = client.init("http://127.0.0.1:29386/simple_get");
|
||||
auto st = client.init("http://127.0.0.1:29998/simple_get");
|
||||
ASSERT_TRUE(st.ok());
|
||||
client.set_method(GET);
|
||||
client.set_basic_auth("test1", "");
|
||||
@ -102,7 +102,7 @@ TEST_F(HttpClientTest, get_normal) {
|
||||
ASSERT_STREQ("test1", response.c_str());
|
||||
|
||||
// for head
|
||||
st = client.init("http://127.0.0.1:29386/simple_get");
|
||||
st = client.init("http://127.0.0.1:29998/simple_get");
|
||||
ASSERT_TRUE(st.ok());
|
||||
client.set_method(HEAD);
|
||||
client.set_basic_auth("test1", "");
|
||||
@ -113,7 +113,7 @@ TEST_F(HttpClientTest, get_normal) {
|
||||
|
||||
TEST_F(HttpClientTest, download) {
|
||||
HttpClient client;
|
||||
auto st = client.init("http://127.0.0.1:29386/simple_get");
|
||||
auto st = client.init("http://127.0.0.1:29998/simple_get");
|
||||
ASSERT_TRUE(st.ok());
|
||||
client.set_basic_auth("test1", "");
|
||||
std::string local_file = ".http_client_test.dat";
|
||||
@ -129,7 +129,7 @@ TEST_F(HttpClientTest, download) {
|
||||
|
||||
TEST_F(HttpClientTest, get_failed) {
|
||||
HttpClient client;
|
||||
auto st = client.init("http://127.0.0.1:29386/simple_get");
|
||||
auto st = client.init("http://127.0.0.1:29998/simple_get");
|
||||
ASSERT_TRUE(st.ok());
|
||||
client.set_method(GET);
|
||||
client.set_basic_auth("test1", "");
|
||||
@ -140,7 +140,7 @@ TEST_F(HttpClientTest, get_failed) {
|
||||
|
||||
TEST_F(HttpClientTest, post_normal) {
|
||||
HttpClient client;
|
||||
auto st = client.init("http://127.0.0.1:29386/simple_post");
|
||||
auto st = client.init("http://127.0.0.1:29998/simple_post");
|
||||
ASSERT_TRUE(st.ok());
|
||||
client.set_method(POST);
|
||||
client.set_basic_auth("test1", "");
|
||||
@ -154,7 +154,7 @@ TEST_F(HttpClientTest, post_normal) {
|
||||
|
||||
TEST_F(HttpClientTest, post_failed) {
|
||||
HttpClient client;
|
||||
auto st = client.init("http://127.0.0.1:29386/simple_pos");
|
||||
auto st = client.init("http://127.0.0.1:29998/simple_pos");
|
||||
ASSERT_TRUE(st.ok());
|
||||
client.set_method(POST);
|
||||
client.set_basic_auth("test1", "");
|
||||
|
||||
@ -86,7 +86,7 @@ public:
|
||||
UserFunctionCacheTest() { }
|
||||
virtual ~UserFunctionCacheTest() { }
|
||||
static void SetUpTestCase() {
|
||||
s_server = new EvHttpServer(29386);
|
||||
s_server = new EvHttpServer(29999);
|
||||
s_server->register_handler(GET, "/{FILE}", &s_test_handler);
|
||||
s_server->start();
|
||||
|
||||
@ -130,7 +130,7 @@ TEST_F(UserFunctionCacheTest, download_normal) {
|
||||
// get my_add
|
||||
st = cache.get_function_ptr(1,
|
||||
"_Z6my_addv",
|
||||
"http://127.0.0.1:29386/my_add.so",
|
||||
"http://127.0.0.1:29999/my_add.so",
|
||||
my_add_md5sum, &fn_ptr, &entry);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_TRUE(k_is_downloaded);
|
||||
@ -140,7 +140,7 @@ TEST_F(UserFunctionCacheTest, download_normal) {
|
||||
// get my_del
|
||||
st = cache.get_function_ptr(1,
|
||||
"_Z6my_delv",
|
||||
"http://127.0.0.1:29386/my_add.so",
|
||||
"http://127.0.0.1:29999/my_add.so",
|
||||
my_add_md5sum, &fn_ptr, &entry);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_NE(nullptr, fn_ptr);
|
||||
@ -149,7 +149,7 @@ TEST_F(UserFunctionCacheTest, download_normal) {
|
||||
// get my_mul
|
||||
st = cache.get_function_ptr(1,
|
||||
"_Z6my_mulv",
|
||||
"http://127.0.0.1:29386/my_add.so",
|
||||
"http://127.0.0.1:29999/my_add.so",
|
||||
my_add_md5sum, &fn_ptr, &entry);
|
||||
ASSERT_FALSE(st.ok());
|
||||
|
||||
@ -165,7 +165,7 @@ TEST_F(UserFunctionCacheTest, load_normal) {
|
||||
UserFunctionCacheEntry* entry = nullptr;
|
||||
st = cache.get_function_ptr(1,
|
||||
"_Z6my_addv",
|
||||
"http://127.0.0.1:29386/my_add.so",
|
||||
"http://127.0.0.1:29999/my_add.so",
|
||||
my_add_md5sum, &fn_ptr, &entry);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_FALSE(k_is_downloaded);
|
||||
@ -183,7 +183,7 @@ TEST_F(UserFunctionCacheTest, download_fail) {
|
||||
UserFunctionCacheEntry* entry = nullptr;
|
||||
st = cache.get_function_ptr(2,
|
||||
"_Z6my_delv",
|
||||
"http://127.0.0.1:29386/my_del.so",
|
||||
"http://127.0.0.1:29999/my_del.so",
|
||||
my_add_md5sum, &fn_ptr, &entry);
|
||||
ASSERT_FALSE(st.ok());
|
||||
}
|
||||
@ -199,7 +199,7 @@ TEST_F(UserFunctionCacheTest, md5_fail) {
|
||||
UserFunctionCacheEntry* entry = nullptr;
|
||||
st = cache.get_function_ptr(1,
|
||||
"_Z6my_addv",
|
||||
"http://127.0.0.1:29386/my_add.so",
|
||||
"http://127.0.0.1:29999/my_add.so",
|
||||
"1234", &fn_ptr, &entry);
|
||||
ASSERT_FALSE(st.ok());
|
||||
}
|
||||
@ -218,7 +218,7 @@ TEST_F(UserFunctionCacheTest, bad_so) {
|
||||
UserFunctionCacheEntry* entry = nullptr;
|
||||
st = cache.get_function_ptr(2,
|
||||
"_Z6my_addv",
|
||||
"http://127.0.0.1:29386/my_add.so",
|
||||
"http://127.0.0.1:29999/my_add.so",
|
||||
"abc", &fn_ptr, &entry);
|
||||
ASSERT_FALSE(st.ok());
|
||||
}
|
||||
|
||||
@ -0,0 +1,47 @@
|
||||
# get_json_double
|
||||
|
||||
## Syntax
|
||||
|
||||
`DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path)
|
||||
|
||||
## Description
|
||||
|
||||
解析并获取 json 字符串内指定路径的浮点型内容。
|
||||
其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。
|
||||
使用 [ ] 表示数组下标,从 0 开始。
|
||||
path 的内容不能包含 ", [ 和 ]。
|
||||
如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。
|
||||
|
||||
## Examples
|
||||
|
||||
1. 获取 key 为 "k1" 的 value
|
||||
|
||||
```
|
||||
mysql> SELECT get_json_double('{"k1":1.3, "k2":"2"}', "$.k1");
|
||||
+-------------------------------------------------+
|
||||
| get_json_double('{"k1":1.3, "k2":"2"}', '$.k1') |
|
||||
+-------------------------------------------------+
|
||||
| 1.3 |
|
||||
+-------------------------------------------------+
|
||||
```
|
||||
|
||||
2. 获取 key 为 "my.key" 的数组中第二个元素
|
||||
|
||||
```
|
||||
mysql> SELECT get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]');
|
||||
+---------------------------------------------------------------------------+
|
||||
| get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]') |
|
||||
+---------------------------------------------------------------------------+
|
||||
| 2.2 |
|
||||
+---------------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
3. 获取二级路径为 k1.key -> k2 的数组中,第一个元素
|
||||
```
|
||||
mysql> SELECT get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]');
|
||||
+---------------------------------------------------------------------+
|
||||
| get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]') |
|
||||
+---------------------------------------------------------------------+
|
||||
| 1.1 |
|
||||
+---------------------------------------------------------------------+
|
||||
```
|
||||
@ -0,0 +1,47 @@
|
||||
# get_json_int
|
||||
|
||||
## Syntax
|
||||
|
||||
`INT get_json_int(VARCHAR json_str, VARCHAR json_path)
|
||||
|
||||
## Description
|
||||
|
||||
解析并获取 json 字符串内指定路径的整型内容。
|
||||
其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。
|
||||
使用 [ ] 表示数组下标,从 0 开始。
|
||||
path 的内容不能包含 ", [ 和 ]。
|
||||
如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。
|
||||
|
||||
## Examples
|
||||
|
||||
1. 获取 key 为 "k1" 的 value
|
||||
|
||||
```
|
||||
mysql> SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1");
|
||||
+--------------------------------------------+
|
||||
| get_json_int('{"k1":1, "k2":"2"}', '$.k1') |
|
||||
+--------------------------------------------+
|
||||
| 1 |
|
||||
+--------------------------------------------+
|
||||
```
|
||||
|
||||
2. 获取 key 为 "my.key" 的数组中第二个元素
|
||||
|
||||
```
|
||||
mysql> SELECT get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]');
|
||||
+------------------------------------------------------------------+
|
||||
| get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]') |
|
||||
+------------------------------------------------------------------+
|
||||
| 2 |
|
||||
+------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
3. 获取二级路径为 k1.key -> k2 的数组中,第一个元素
|
||||
```
|
||||
mysql> SELECT get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]');
|
||||
+--------------------------------------------------------------+
|
||||
| get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]') |
|
||||
+--------------------------------------------------------------+
|
||||
| 1 |
|
||||
+--------------------------------------------------------------+
|
||||
```
|
||||
@ -0,0 +1,57 @@
|
||||
# get_json_string
|
||||
|
||||
## Syntax
|
||||
|
||||
`VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path)
|
||||
|
||||
## Description
|
||||
|
||||
解析并获取 json 字符串内指定路径的字符串内容。
|
||||
其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。
|
||||
使用 [ ] 表示数组下标,从 0 开始。
|
||||
path 的内容不能包含 ", [ 和 ]。
|
||||
如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。
|
||||
|
||||
## Examples
|
||||
|
||||
1. 获取 key 为 "k1" 的 value
|
||||
|
||||
```
|
||||
mysql> SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1");
|
||||
+---------------------------------------------------+
|
||||
| get_json_string('{"k1":"v1", "k2":"v2"}', '$.k1') |
|
||||
+---------------------------------------------------+
|
||||
| v1 |
|
||||
+---------------------------------------------------+
|
||||
```
|
||||
|
||||
2. 获取 key 为 "my.key" 的数组中第二个元素
|
||||
|
||||
```
|
||||
mysql> SELECT get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]');
|
||||
+------------------------------------------------------------------------------+
|
||||
| get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]') |
|
||||
+------------------------------------------------------------------------------+
|
||||
| e2 |
|
||||
+------------------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
3. 获取二级路径为 k1.key -> k2 的数组中,第一个元素
|
||||
```
|
||||
mysql> SELECT get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]');
|
||||
+-----------------------------------------------------------------------+
|
||||
| get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]') |
|
||||
+-----------------------------------------------------------------------+
|
||||
| v1 |
|
||||
+-----------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
4. 获取数组中,key 为 "k1" 的所有 value
|
||||
```
|
||||
mysql> SELECT get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.k1');
|
||||
+---------------------------------------------------------------------------------+
|
||||
| get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.k1') |
|
||||
+---------------------------------------------------------------------------------+
|
||||
| ["v1","v3","v4"] |
|
||||
+---------------------------------------------------------------------------------+
|
||||
```
|
||||
@ -567,13 +567,17 @@ visible_functions = [
|
||||
|
||||
# Json functions
|
||||
[['get_json_int'], 'INT', ['VARCHAR', 'VARCHAR'],
|
||||
'_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_'],
|
||||
'_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
|
||||
'_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
[['get_json_double'], 'DOUBLE', ['VARCHAR', 'VARCHAR'],
|
||||
'_ZN5doris13JsonFunctions15get_json_doubleEPN9doris_udf'
|
||||
'15FunctionContextERKNS1_9StringValES6_'],
|
||||
'_ZN5doris13JsonFunctions15get_json_doubleEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
|
||||
'_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
[['get_json_string'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
|
||||
'_ZN5doris13JsonFunctions15get_json_stringEPN9doris_udf'
|
||||
'15FunctionContextERKNS1_9StringValES6_'],
|
||||
'_ZN5doris13JsonFunctions15get_json_stringEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
|
||||
'_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
'_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'],
|
||||
|
||||
#hll function
|
||||
[['hll_cardinality'], 'BIGINT', ['HLL'],
|
||||
|
||||
@ -156,6 +156,7 @@ ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test
|
||||
|
||||
## Running exprs unit test
|
||||
${DORIS_TEST_BINARY_DIR}/exprs/string_functions_test
|
||||
${DORIS_TEST_BINARY_DIR}/exprs/json_function_test
|
||||
|
||||
## Running exec unit test
|
||||
${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_uncompressed_test
|
||||
|
||||
Reference in New Issue
Block a user