[improvement](Jsonb) optimization Jsonb path parse (#21495)

The previous logic was to read jsonbvalue while parsing the json path. For complex json paths, there will be a lot of repeated parsing work. The optimization idea is to separate the analysis and value of jsonpath
This commit is contained in:
Liqf
2023-07-23 18:59:12 +08:00
committed by GitHub
parent 4f0158c458
commit ddd7e9871d
2 changed files with 216 additions and 164 deletions

View File

@ -436,10 +436,7 @@ private:
ColumnString::Offsets& res_offsets, NullMap& null_map,
const std::unique_ptr<JsonbWriter>& writer,
std::unique_ptr<JsonbToJson>& formater,
const char* l_raw, int l_size, const char* r_raw,
int r_size, bool& is_invalid_json_path) {
String path(r_raw, r_size);
const char* l_raw, int l_size, JsonbPath& path) {
if (null_map[i]) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
@ -453,9 +450,9 @@ private:
}
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
JsonbValue* value = doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr);
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
if (UNLIKELY(!value) || is_invalid_json_path) {
if (UNLIKELY(!value)) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
}
@ -535,8 +532,18 @@ public:
size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1];
size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off;
const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
JsonbPath path;
if (!path.seek(r_raw, r_size)) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
std::string_view(reinterpret_cast<const char*>(rdata.data()),
rdata.size()));
}
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
r_raw, r_size, is_invalid_json_path);
path);
} else { // will make array string to user
writer->reset();
writer->writeStartArray();
@ -553,9 +560,16 @@ public:
writer->writeNull();
continue;
}
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
JsonbValue* value =
doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr);
JsonbPath path;
if (!path.seek(r_raw, r_size)) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
std::string_view(reinterpret_cast<const char*>(rdata.data()),
rdata.size()));
}
// if not valid json path , should return error message to user
if (is_invalid_json_path) {
return Status::InvalidArgument(
@ -564,6 +578,10 @@ public:
std::string_view(reinterpret_cast<const char*>(rdata.data()),
rdata.size()));
}
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
if (UNLIKELY(!value)) {
writer->writeNull();
} else {
@ -602,8 +620,15 @@ public:
int r_size = roffsets[i] - roffsets[i - 1];
const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
JsonbPath path;
if (!path.seek(r_raw, r_size)) {
is_invalid_json_path = true;
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
}
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
r_raw, r_size, is_invalid_json_path);
path);
} //for
} //function
static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
@ -620,12 +645,18 @@ public:
std::unique_ptr<JsonbToJson> formater;
JsonbPath path;
if (!path.seek(rdata.data, rdata.size)) {
is_invalid_json_path = true;
return;
}
for (size_t i = 0; i < input_rows_count; ++i) {
int l_size = loffsets[i] - loffsets[i - 1];
const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
rdata.data, rdata.size, is_invalid_json_path);
path);
} //for
} //function
static void scalar_vector(FunctionContext* context, const StringRef& ldata,
@ -647,8 +678,15 @@ public:
int r_size = roffsets[i] - roffsets[i - 1];
const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
JsonbPath path;
if (!path.seek(r_raw, r_size)) {
is_invalid_json_path = true;
StringOP::push_null_string(i, res_data, res_offsets, null_map);
return;
}
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, ldata.data,
ldata.size, r_raw, r_size, is_invalid_json_path);
ldata.size, path);
} //for
} //function
};
@ -663,8 +701,7 @@ struct JsonbExtractImpl {
private:
static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map,
const char* l_raw_str, int l_str_size,
const char* r_raw_str, int r_str_size,
bool& is_invalid_json_path) {
JsonbPath& path) {
if (null_map[i]) {
res[i] = 0;
return;
@ -679,10 +716,9 @@ private:
}
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
JsonbValue* value =
doc->getValue()->findPath(r_raw_str, r_str_size, is_invalid_json_path, nullptr);
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
if (UNLIKELY(!value) || is_invalid_json_path) {
if (UNLIKELY(!value)) {
if constexpr (!only_check_exists) {
null_map[i] = 1;
}
@ -761,8 +797,14 @@ public:
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
int r_str_size = roffsets[i] - roffsets[i - 1];
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, r_raw_str, r_str_size,
is_invalid_json_path);
JsonbPath path;
if (!path.seek(r_raw_str, r_str_size)) {
is_invalid_json_path = true;
res[i] = 0;
return;
}
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
} //for
} //function
static void scalar_vector(FunctionContext* context, const StringRef& ldata,
@ -780,8 +822,14 @@ public:
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
int r_str_size = roffsets[i] - roffsets[i - 1];
inner_loop_impl(i, res, null_map, ldata.data, ldata.size, r_raw_str, r_str_size,
is_invalid_json_path);
JsonbPath path;
if (!path.seek(r_raw_str, r_str_size)) {
is_invalid_json_path = true;
res[i] = 0;
return;
}
inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path);
} //for
} //function
static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
@ -790,6 +838,12 @@ public:
size_t size = loffsets.size();
res.resize(size);
JsonbPath path;
if (!path.seek(rdata.data, rdata.size)) {
is_invalid_json_path = true;
return;
}
for (size_t i = 0; i < loffsets.size(); i++) {
if constexpr (only_check_exists) {
res[i] = 0;
@ -798,8 +852,7 @@ public:
const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
int l_str_size = loffsets[i] - loffsets[i - 1];
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, rdata.data, rdata.size,
is_invalid_json_path);
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
} //for
} //function
};