diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 1b3bd70e60..c40d585069 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -40,7 +41,9 @@ namespace doris { -static const re2::RE2 JSON_PATTERN("^([a-zA-Z0-9_\\-\\:\\s]*)(?:\\[([0-9]+)\\])?"); +// static const re2::RE2 JSON_PATTERN("^([a-zA-Z0-9_\\-\\:\\s#\\|\\.]*)(?:\\[([0-9]+)\\])?"); +// json path cannot contains: ", [, ] +static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+)\\])?"); void JsonFunctions::init() { } @@ -54,7 +57,7 @@ IntVal JsonFunctions::get_json_int( std::string path_string((char*)path.ptr, path.len); rapidjson::Document document; rapidjson::Value* root = - get_json_object(json_string, path_string, JSON_FUN_INT, &document); + get_json_object(context, json_string, path_string, JSON_FUN_INT, &document); if (root->IsInt()) { return IntVal(root->GetInt()); } else { @@ -67,11 +70,12 @@ StringVal JsonFunctions::get_json_string( if (json_str.is_null || path.is_null) { return StringVal::null(); } + std::string json_string((char*)json_str.ptr, json_str.len); std::string path_string((char*)path.ptr, path.len); rapidjson::Document document; rapidjson::Value* root = - get_json_object(json_string, path_string, JSON_FUN_STRING, &document); + get_json_object(context, json_string, path_string, JSON_FUN_STRING, &document); if (root->IsNull()) { return StringVal::null(); } else if (root->IsString()) { @@ -93,7 +97,7 @@ DoubleVal JsonFunctions::get_json_double( std::string path_string((char*)path.ptr, path.len); rapidjson::Document document; rapidjson::Value* root = - get_json_object(json_string, path_string, JSON_FUN_DOUBLE, &document); + get_json_object(context, json_string, path_string, JSON_FUN_DOUBLE, &document); if (root->IsInt()) { return DoubleVal(static_cast(root->GetInt())); } else if (root->IsDouble()) { @@ -104,18 +108,41 @@ DoubleVal JsonFunctions::get_json_double( } rapidjson::Value* JsonFunctions::get_json_object( + FunctionContext* context, const std::string& json_string, const std::string& path_string, const JsonFunctionType& fntype, rapidjson::Document* document) { - std::vector path_exprs; - boost::split(path_exprs, path_string, boost::is_any_of(".")); - if (path_exprs[0] != "$") { + // split path by ".", and escape quota by "\" + // eg: + // '$.text#abc.xyz' -> [$, text#abc, xyz] + // '$."text.abc".xyz' -> [$, text.abc, xyz] + // '$."text.abc"[1].xyz' -> [$, text.abc[1], xyz] + std::vector* parsed_paths; + std::vector tmp_parsed_paths; +#ifndef BE_TEST + parsed_paths = reinterpret_cast*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + if (parsed_paths == nullptr) { + boost::tokenizer > tok(path_string, boost::escaped_list_separator("\\", ".", "\"")); + std::vector paths(tok.begin(), tok.end()); + get_parsed_paths(paths, &tmp_parsed_paths); + parsed_paths = &tmp_parsed_paths; + } +#else + boost::tokenizer > tok(path_string, boost::escaped_list_separator("\\", ".", "\"")); + std::vector paths(tok.begin(), tok.end()); + get_parsed_paths(paths, &tmp_parsed_paths); + parsed_paths = &tmp_parsed_paths; +#endif + + VLOG(10) << "first parsed path: " << (*parsed_paths)[0].debug_string(); + + if (!(*parsed_paths)[0].is_valid) { return document; } - if (UNLIKELY(path_exprs.size() == 1)) { + if (UNLIKELY((*parsed_paths).size() == 1)) { if (fntype == JSON_FUN_STRING) { document->SetString(json_string.c_str(), document->GetAllocator()); } else { @@ -134,18 +161,19 @@ rapidjson::Value* JsonFunctions::get_json_object( rapidjson::Value* root = document; rapidjson::Value* array_obj = NULL; - // eg: list[0],use regex parse path_string's result is 'list' - std::string col; - std::string index; - for (int i = 1; i < path_exprs.size(); i++) { + for (int i = 1; i < (*parsed_paths).size(); i++) { + VLOG(10) << "parsed_paths: " << (*parsed_paths)[i].debug_string(); + if (root->IsNull()) { break; } - if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index))) { + if (UNLIKELY(!(*parsed_paths)[i].is_valid)) { root->SetNull(); } + std::string& col = (*parsed_paths)[i].key; + int index = (*parsed_paths)[i].idx; if (LIKELY(!col.empty())) { if (root->IsArray()) { array_obj = static_cast( @@ -190,15 +218,14 @@ rapidjson::Value* JsonFunctions::get_json_object( } } - if (UNLIKELY(!index.empty())) { + if (UNLIKELY(index != -1)) { // judge the rapidjson:Value, which base the top's result, // if not array return NULL;else get the index value from the array if (root->IsArray()) { - int index_match = atoi(index.c_str()); - if (root->IsNull() || index_match >= root->Size()) { + if (root->IsNull() || index >= root->Size()) { root->SetNull(); } else { - root = &((*root)[index_match]); + root = &((*root)[index]); } } else { root->SetNull(); @@ -209,5 +236,69 @@ rapidjson::Value* JsonFunctions::get_json_object( return root; } +void JsonFunctions::json_path_prepare( + doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + + if (!context->is_arg_constant(1)) { + return; + } + StringVal* path = reinterpret_cast(context->get_constant_arg(1)); + if (path->is_null) { + return; + } + + boost::tokenizer > tok( + std::string(reinterpret_cast(path->ptr), path->len), + boost::escaped_list_separator("\\", ".", "\"")); + std::vector path_exprs(tok.begin(), tok.end()); + std::vector* parsed_paths = new std::vector(); + get_parsed_paths(path_exprs, parsed_paths); + + context->set_function_state(scope, parsed_paths); + VLOG(10) << "prepare json path. size: " << parsed_paths->size(); +} + +void JsonFunctions::get_parsed_paths( + const std::vector& path_exprs, + std::vector* parsed_paths) { + + if (path_exprs[0] != "$") { + parsed_paths->emplace_back("", -1, false); + } else { + parsed_paths->emplace_back("$", -1, true); + } + + for (int i = 1; i < path_exprs.size(); i++) { + std::string col; + std::string index; + if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index))) { + parsed_paths->emplace_back("", -1, false); + } else { + int idx = -1; + if (!index.empty()) { + idx = atoi(index.c_str()); + } + parsed_paths->emplace_back(col, idx, true); + } + } +} + +void JsonFunctions::json_path_close( + doris_udf::FunctionContext* context, + doris_udf::FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { + return; + } + std::vector* parsed_paths = reinterpret_cast*>(context->get_function_state(scope)); + if (parsed_paths != nullptr) { + delete parsed_paths; + VLOG(10) << "close json path"; + } +} + } diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index 6ee72624da..c16157cdc4 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -31,6 +31,23 @@ class Expr; class OpcodeRegistry; class TupleRow; +struct JsonPath { + std::string key; // key of a json object + int idx; // array index of a json array, -1 means not set + bool is_valid; // true if the path is successfully parsed + + JsonPath(const std::string& key_, int idx_, bool is_valid_): + key(key_), + idx(idx_), + is_valid(is_valid_) {} + + std::string debug_string() { + std::stringstream ss; + ss << "key: " << key << ", idx: " << idx << ", valid: " << is_valid; + return ss.str(); + } +}; + class JsonFunctions { public: static void init(); @@ -45,8 +62,22 @@ public: const doris_udf::StringVal& path); static rapidjson::Value* get_json_object( + FunctionContext* context, const std::string& json_string, const std::string& path_string, const JsonFunctionType& fntype, rapidjson::Document* document); + + static void json_path_prepare( + doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); + + static void json_path_close( + doris_udf::FunctionContext*, + doris_udf::FunctionContext::FunctionStateScope); +private: + + static void get_parsed_paths( + const std::vector& path_exprs, + std::vector* parsed_paths); }; } #endif diff --git a/be/src/runtime/routine_load/data_consumer.cpp b/be/src/runtime/routine_load/data_consumer.cpp index ae6007e884..0efbb1efc4 100644 --- a/be/src/runtime/routine_load/data_consumer.cpp +++ b/be/src/runtime/routine_load/data_consumer.cpp @@ -69,6 +69,8 @@ Status KafkaDataConsumer::init(StreamLoadContext* ctx) { // TODO: set it larger than 0 after we set rd_kafka_conf_set_stats_cb() RETURN_IF_ERROR(set_conf("statistics.interval.ms", "0")); RETURN_IF_ERROR(set_conf("auto.offset.reset", "error")); + RETURN_IF_ERROR(set_conf("api.version.request", "true")); + RETURN_IF_ERROR(set_conf("api.version.fallback.ms", "0")); if (conf->set("event_cb", &_k_event_cb, errstr) != RdKafka::Conf::CONF_OK) { std::stringstream ss; diff --git a/be/test/exprs/json_function_test.cpp b/be/test/exprs/json_function_test.cpp index b58e8f18f0..164f71205c 100644 --- a/be/test/exprs/json_function_test.cpp +++ b/be/test/exprs/json_function_test.cpp @@ -18,6 +18,8 @@ #include "exprs/json_functions.h" #include +#include +#include #include #include #include @@ -27,6 +29,7 @@ #include "runtime/runtime_state.h" #include "common/object_pool.h" #include "util/logging.h" +#include "util/stopwatch.hpp" namespace doris { @@ -42,14 +45,14 @@ TEST_F(JsonFunctionTest, string) std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}"); std::string path_string("$.id"); rapidjson::Document document1; - rapidjson::Value* res1 = JsonFunctions::get_json_object(json_string, path_string, + rapidjson::Value* res1 = JsonFunctions::get_json_object(nullptr, json_string, path_string, JSON_FUN_STRING, &document1); ASSERT_EQ(std::string(res1->GetString()), "name"); std::string json_string2("{\"price a\": [0,1,2],\"couponFee\":0}"); std::string path_string2("$.price a"); rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(json_string2, path_string2, + rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, JSON_FUN_STRING, &document2); rapidjson::StringBuffer buf2; rapidjson::Writer writer2(buf2); @@ -59,7 +62,7 @@ TEST_F(JsonFunctionTest, string) std::string json_string3("{\"price a\": [],\"couponFee\":0}"); std::string path_string3("$.price a"); rapidjson::Document document3; - rapidjson::Value* res3 = JsonFunctions::get_json_object(json_string3, path_string3, + rapidjson::Value* res3 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3, JSON_FUN_STRING, &document3); rapidjson::StringBuffer buf3; rapidjson::Writer writer3(buf3); @@ -69,16 +72,16 @@ TEST_F(JsonFunctionTest, string) std::string json_string4("{\"price a\": [],\"couponFee\":null}"); std::string path_string4("$.couponFee"); rapidjson::Document document4; - rapidjson::Value* res4 = JsonFunctions::get_json_object(json_string4, path_string4, + rapidjson::Value* res4 = JsonFunctions::get_json_object(nullptr, json_string4, path_string4, JSON_FUN_STRING, &document4); ASSERT_TRUE(res4->IsNull()); - std::string json_string5("{\"blockNames\": {}," + std::string json_string5("{\"blockNames\": {}," "\"seatCategories\": [{\"areas\": [{\"areaId\": 205705999,\"blockIds\": []}," "{\"areaId\": 205705998,\"blockIds\": []}],\"seatCategoryId\": 338937290}]}"); std::string path_string5_1("$.blockNames"); rapidjson::Document document5_1; - rapidjson::Value* res5_1 = JsonFunctions::get_json_object(json_string5, path_string5_1, + rapidjson::Value* res5_1 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_1, JSON_FUN_STRING, &document5_1); rapidjson::StringBuffer buf5_1; rapidjson::Writer writer5_1(buf5_1); @@ -87,7 +90,7 @@ TEST_F(JsonFunctionTest, string) std::string path_string5_2("$.seatCategories.areas.blockIds"); rapidjson::Document document5_2; - rapidjson::Value* res5_2 = JsonFunctions::get_json_object(json_string5, path_string5_2, + rapidjson::Value* res5_2 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_2, JSON_FUN_STRING, &document5_2); rapidjson::StringBuffer buf5_2; rapidjson::Writer writer5_2(buf5_2); @@ -96,7 +99,7 @@ TEST_F(JsonFunctionTest, string) std::string path_string5_3("$.seatCategories.areas[0].areaId"); rapidjson::Document document5_3; - rapidjson::Value* res5_3 = JsonFunctions::get_json_object(json_string5, path_string5_3, + rapidjson::Value* res5_3 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_3, JSON_FUN_STRING, &document5_2); rapidjson::StringBuffer buf5_3; rapidjson::Writer writer5_3(buf5_3); @@ -109,7 +112,7 @@ TEST_F(JsonFunctionTest, int) std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}"); std::string path_string("$.age"); rapidjson::Document document; - rapidjson::Value* res = JsonFunctions::get_json_object(json_string, path_string, + rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, JSON_FUN_INT, &document); ASSERT_EQ(res->GetInt(), 11); @@ -118,27 +121,27 @@ TEST_F(JsonFunctionTest, int) "{\"id\":[{\"kk\":\"cc\"}]}]}"); std::string path_string1("$.list.id.aa[0]"); rapidjson::Document document1; - rapidjson::Value* res1 = JsonFunctions::get_json_object(json_string1, path_string1, + rapidjson::Value* res1 = JsonFunctions::get_json_object(nullptr, json_string1, path_string1, JSON_FUN_INT, &document1); ASSERT_EQ(res1->GetInt(), 1); std::string json_string2("[1,2,3,5,8,0]"); std::string path_string2("$.[3]"); rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(json_string2, path_string2, + rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, JSON_FUN_INT, &document2); ASSERT_EQ(res2->GetInt(), 5); std::string json_string3("{\"price a\": [0,1,2],\"couponFee\":0.0}"); std::string path_string3_1("$.price a[3]"); rapidjson::Document document3_1; - rapidjson::Value* res3_1 = JsonFunctions::get_json_object(json_string3, path_string3_1, + rapidjson::Value* res3_1 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3_1, JSON_FUN_INT, &document3_1); ASSERT_TRUE(res3_1->IsNull()); std::string path_string3_2("$.couponFee"); rapidjson::Document document3_2; - rapidjson::Value* res3_2 = JsonFunctions::get_json_object(json_string3, path_string3_2, + rapidjson::Value* res3_2 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3_2, JSON_FUN_INT, &document3_2); ASSERT_FALSE(res3_2->IsInt()); } @@ -148,17 +151,41 @@ TEST_F(JsonFunctionTest, double) std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}"); std::string path_string("$.money"); rapidjson::Document document; - rapidjson::Value* res = JsonFunctions::get_json_object(json_string, path_string, + rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, JSON_FUN_DOUBLE, &document); ASSERT_EQ(res->GetDouble(), 123000.789); std::string path_string2("$.age"); rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(json_string, path_string2, + rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string, path_string2, JSON_FUN_DOUBLE, &document2); ASSERT_EQ(res2->GetInt(), 11); } +TEST_F(JsonFunctionTest, special_char) +{ + std::string json_string("{\"key with.dot\": [\"v1\", \"v2\"]}"); + std::string path_string("$.\"key with.dot\"[1]"); + rapidjson::Document document; + rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, JSON_FUN_DOUBLE, &document); + ASSERT_FALSE(res->GetString() == nullptr); + ASSERT_EQ(std::string(res->GetString()), "v2"); + + std::string json_string2("{\"key with|\": [\"v1\", \"v2\"]}"); + std::string path_string2("$.key with|[0]"); + rapidjson::Document document2; + rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, JSON_FUN_DOUBLE, &document2); + ASSERT_FALSE(res2->GetString() == nullptr); + ASSERT_EQ(std::string(res2->GetString()), "v1"); + + std::string json_string3("{\"key with.dot\": [{\"key2.dot\":\"v1\"}, {\"key3.dot\":\"v2\"}]}"); + std::string path_string3("$.\"key with.dot\"[0].\"key2.dot\""); + rapidjson::Document document3; + rapidjson::Value* res3 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3, JSON_FUN_DOUBLE, &document3); + ASSERT_FALSE(res3->GetString() == nullptr); + ASSERT_EQ(std::string(res3->GetString()), "v1"); +} + } int main(int argc, char** argv) { diff --git a/be/test/http/http_client_test.cpp b/be/test/http/http_client_test.cpp index e75a299142..ccfb74cf3e 100644 --- a/be/test/http/http_client_test.cpp +++ b/be/test/http/http_client_test.cpp @@ -78,7 +78,7 @@ public: ~HttpClientTest() override { } static void SetUpTestCase() { - s_server = new EvHttpServer(29386); + s_server = new EvHttpServer(29998); s_server->register_handler(GET, "/simple_get", &s_simple_get_handler); s_server->register_handler(HEAD, "/simple_get", &s_simple_get_handler); s_server->register_handler(POST, "/simple_post", &s_simple_post_handler); @@ -92,7 +92,7 @@ public: TEST_F(HttpClientTest, get_normal) { HttpClient client; - auto st = client.init("http://127.0.0.1:29386/simple_get"); + auto st = client.init("http://127.0.0.1:29998/simple_get"); ASSERT_TRUE(st.ok()); client.set_method(GET); client.set_basic_auth("test1", ""); @@ -102,7 +102,7 @@ TEST_F(HttpClientTest, get_normal) { ASSERT_STREQ("test1", response.c_str()); // for head - st = client.init("http://127.0.0.1:29386/simple_get"); + st = client.init("http://127.0.0.1:29998/simple_get"); ASSERT_TRUE(st.ok()); client.set_method(HEAD); client.set_basic_auth("test1", ""); @@ -113,7 +113,7 @@ TEST_F(HttpClientTest, get_normal) { TEST_F(HttpClientTest, download) { HttpClient client; - auto st = client.init("http://127.0.0.1:29386/simple_get"); + auto st = client.init("http://127.0.0.1:29998/simple_get"); ASSERT_TRUE(st.ok()); client.set_basic_auth("test1", ""); std::string local_file = ".http_client_test.dat"; @@ -129,7 +129,7 @@ TEST_F(HttpClientTest, download) { TEST_F(HttpClientTest, get_failed) { HttpClient client; - auto st = client.init("http://127.0.0.1:29386/simple_get"); + auto st = client.init("http://127.0.0.1:29998/simple_get"); ASSERT_TRUE(st.ok()); client.set_method(GET); client.set_basic_auth("test1", ""); @@ -140,7 +140,7 @@ TEST_F(HttpClientTest, get_failed) { TEST_F(HttpClientTest, post_normal) { HttpClient client; - auto st = client.init("http://127.0.0.1:29386/simple_post"); + auto st = client.init("http://127.0.0.1:29998/simple_post"); ASSERT_TRUE(st.ok()); client.set_method(POST); client.set_basic_auth("test1", ""); @@ -154,7 +154,7 @@ TEST_F(HttpClientTest, post_normal) { TEST_F(HttpClientTest, post_failed) { HttpClient client; - auto st = client.init("http://127.0.0.1:29386/simple_pos"); + auto st = client.init("http://127.0.0.1:29998/simple_pos"); ASSERT_TRUE(st.ok()); client.set_method(POST); client.set_basic_auth("test1", ""); diff --git a/be/test/runtime/user_function_cache_test.cpp b/be/test/runtime/user_function_cache_test.cpp index a5f5b23df9..b125938f32 100644 --- a/be/test/runtime/user_function_cache_test.cpp +++ b/be/test/runtime/user_function_cache_test.cpp @@ -86,7 +86,7 @@ public: UserFunctionCacheTest() { } virtual ~UserFunctionCacheTest() { } static void SetUpTestCase() { - s_server = new EvHttpServer(29386); + s_server = new EvHttpServer(29999); s_server->register_handler(GET, "/{FILE}", &s_test_handler); s_server->start(); @@ -130,7 +130,7 @@ TEST_F(UserFunctionCacheTest, download_normal) { // get my_add st = cache.get_function_ptr(1, "_Z6my_addv", - "http://127.0.0.1:29386/my_add.so", + "http://127.0.0.1:29999/my_add.so", my_add_md5sum, &fn_ptr, &entry); ASSERT_TRUE(st.ok()); ASSERT_TRUE(k_is_downloaded); @@ -140,7 +140,7 @@ TEST_F(UserFunctionCacheTest, download_normal) { // get my_del st = cache.get_function_ptr(1, "_Z6my_delv", - "http://127.0.0.1:29386/my_add.so", + "http://127.0.0.1:29999/my_add.so", my_add_md5sum, &fn_ptr, &entry); ASSERT_TRUE(st.ok()); ASSERT_NE(nullptr, fn_ptr); @@ -149,7 +149,7 @@ TEST_F(UserFunctionCacheTest, download_normal) { // get my_mul st = cache.get_function_ptr(1, "_Z6my_mulv", - "http://127.0.0.1:29386/my_add.so", + "http://127.0.0.1:29999/my_add.so", my_add_md5sum, &fn_ptr, &entry); ASSERT_FALSE(st.ok()); @@ -165,7 +165,7 @@ TEST_F(UserFunctionCacheTest, load_normal) { UserFunctionCacheEntry* entry = nullptr; st = cache.get_function_ptr(1, "_Z6my_addv", - "http://127.0.0.1:29386/my_add.so", + "http://127.0.0.1:29999/my_add.so", my_add_md5sum, &fn_ptr, &entry); ASSERT_TRUE(st.ok()); ASSERT_FALSE(k_is_downloaded); @@ -183,7 +183,7 @@ TEST_F(UserFunctionCacheTest, download_fail) { UserFunctionCacheEntry* entry = nullptr; st = cache.get_function_ptr(2, "_Z6my_delv", - "http://127.0.0.1:29386/my_del.so", + "http://127.0.0.1:29999/my_del.so", my_add_md5sum, &fn_ptr, &entry); ASSERT_FALSE(st.ok()); } @@ -199,7 +199,7 @@ TEST_F(UserFunctionCacheTest, md5_fail) { UserFunctionCacheEntry* entry = nullptr; st = cache.get_function_ptr(1, "_Z6my_addv", - "http://127.0.0.1:29386/my_add.so", + "http://127.0.0.1:29999/my_add.so", "1234", &fn_ptr, &entry); ASSERT_FALSE(st.ok()); } @@ -218,7 +218,7 @@ TEST_F(UserFunctionCacheTest, bad_so) { UserFunctionCacheEntry* entry = nullptr; st = cache.get_function_ptr(2, "_Z6my_addv", - "http://127.0.0.1:29386/my_add.so", + "http://127.0.0.1:29999/my_add.so", "abc", &fn_ptr, &entry); ASSERT_FALSE(st.ok()); } diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md new file mode 100644 index 0000000000..a7b1bd198c --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_double.md @@ -0,0 +1,47 @@ +# get_json_double + +## Syntax + +`DOUBLE get_json_double(VARCHAR json_str, VARCHAR json_path) + +## Description + +解析并获取 json 字符串内指定路径的浮点型内容。 +其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 +使用 [ ] 表示数组下标,从 0 开始。 +path 的内容不能包含 ", [ 和 ]。 +如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 + +## Examples + +1. 获取 key 为 "k1" 的 value + +``` +mysql> SELECT get_json_double('{"k1":1.3, "k2":"2"}', "$.k1"); ++-------------------------------------------------+ +| get_json_double('{"k1":1.3, "k2":"2"}', '$.k1') | ++-------------------------------------------------+ +| 1.3 | ++-------------------------------------------------+ +``` + +2. 获取 key 为 "my.key" 的数组中第二个元素 + +``` +mysql> SELECT get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]'); ++---------------------------------------------------------------------------+ +| get_json_double('{"k1":"v1", "my.key":[1.1, 2.2, 3.3]}', '$."my.key"[1]') | ++---------------------------------------------------------------------------+ +| 2.2 | ++---------------------------------------------------------------------------+ +``` + +3. 获取二级路径为 k1.key -> k2 的数组中,第一个元素 +``` +mysql> SELECT get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]'); ++---------------------------------------------------------------------+ +| get_json_double('{"k1.key":{"k2":[1.1, 2.2]}}', '$."k1.key".k2[0]') | ++---------------------------------------------------------------------+ +| 1.1 | ++---------------------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md new file mode 100644 index 0000000000..005fad597c --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_int.md @@ -0,0 +1,47 @@ +# get_json_int + +## Syntax + +`INT get_json_int(VARCHAR json_str, VARCHAR json_path) + +## Description + +解析并获取 json 字符串内指定路径的整型内容。 +其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 +使用 [ ] 表示数组下标,从 0 开始。 +path 的内容不能包含 ", [ 和 ]。 +如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 + +## Examples + +1. 获取 key 为 "k1" 的 value + +``` +mysql> SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1"); ++--------------------------------------------+ +| get_json_int('{"k1":1, "k2":"2"}', '$.k1') | ++--------------------------------------------+ +| 1 | ++--------------------------------------------+ +``` + +2. 获取 key 为 "my.key" 的数组中第二个元素 + +``` +mysql> SELECT get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]'); ++------------------------------------------------------------------+ +| get_json_int('{"k1":"v1", "my.key":[1, 2, 3]}', '$."my.key"[1]') | ++------------------------------------------------------------------+ +| 2 | ++------------------------------------------------------------------+ +``` + +3. 获取二级路径为 k1.key -> k2 的数组中,第一个元素 +``` +mysql> SELECT get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]'); ++--------------------------------------------------------------+ +| get_json_int('{"k1.key":{"k2":[1, 2]}}', '$."k1.key".k2[0]') | ++--------------------------------------------------------------+ +| 1 | ++--------------------------------------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md new file mode 100644 index 0000000000..ab576a8ac1 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/string-functions/get_json_string.md @@ -0,0 +1,57 @@ +# get_json_string + +## Syntax + +`VARCHAR get_json_string(VARCHAR json_str, VARCHAR json_path) + +## Description + +解析并获取 json 字符串内指定路径的字符串内容。 +其中 json_path 必须以 $ 符号作为开头,使用 . 作为路径分割符。如果路径中包含 . ,则可以使用双引号包围。 +使用 [ ] 表示数组下标,从 0 开始。 +path 的内容不能包含 ", [ 和 ]。 +如果 json_string 格式不对,或 json_path 格式不对,或无法找到匹配项,则返回 NULL。 + +## Examples + +1. 获取 key 为 "k1" 的 value + +``` +mysql> SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1"); ++---------------------------------------------------+ +| get_json_string('{"k1":"v1", "k2":"v2"}', '$.k1') | ++---------------------------------------------------+ +| v1 | ++---------------------------------------------------+ +``` + +2. 获取 key 为 "my.key" 的数组中第二个元素 + +``` +mysql> SELECT get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]'); ++------------------------------------------------------------------------------+ +| get_json_string('{"k1":"v1", "my.key":["e1", "e2", "e3"]}', '$."my.key"[1]') | ++------------------------------------------------------------------------------+ +| e2 | ++------------------------------------------------------------------------------+ +``` + +3. 获取二级路径为 k1.key -> k2 的数组中,第一个元素 +``` +mysql> SELECT get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]'); ++-----------------------------------------------------------------------+ +| get_json_string('{"k1.key":{"k2":["v1", "v2"]}}', '$."k1.key".k2[0]') | ++-----------------------------------------------------------------------+ +| v1 | ++-----------------------------------------------------------------------+ +``` + +4. 获取数组中,key 为 "k1" 的所有 value +``` +mysql> SELECT get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.k1'); ++---------------------------------------------------------------------------------+ +| get_json_string('[{"k1":"v1"}, {"k2":"v2"}, {"k1":"v3"}, {"k1":"v4"}]', '$.k1') | ++---------------------------------------------------------------------------------+ +| ["v1","v3","v4"] | ++---------------------------------------------------------------------------------+ +``` diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 1754ed498e..0ba161b0fa 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -567,13 +567,17 @@ visible_functions = [ # Json functions [['get_json_int'], 'INT', ['VARCHAR', 'VARCHAR'], - '_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_'], + '_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', + '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], [['get_json_double'], 'DOUBLE', ['VARCHAR', 'VARCHAR'], - '_ZN5doris13JsonFunctions15get_json_doubleEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_'], + '_ZN5doris13JsonFunctions15get_json_doubleEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', + '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], [['get_json_string'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], - '_ZN5doris13JsonFunctions15get_json_stringEPN9doris_udf' - '15FunctionContextERKNS1_9StringValES6_'], + '_ZN5doris13JsonFunctions15get_json_stringEPN9doris_udf15FunctionContextERKNS1_9StringValES6_', + '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE', + '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE'], #hll function [['hll_cardinality'], 'BIGINT', ['HLL'], diff --git a/run-ut.sh b/run-ut.sh index fb421a0749..6c2f06a983 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -156,6 +156,7 @@ ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test ## Running exprs unit test ${DORIS_TEST_BINARY_DIR}/exprs/string_functions_test +${DORIS_TEST_BINARY_DIR}/exprs/json_function_test ## Running exec unit test ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_uncompressed_test