[improve](jsonb)Invalid json path prompts an error instead of null (#19646)
1. Invalid json path prompts an error instead of null:
before:
```sql
mysql> SELECT jsonb_extract('[{"k1":"v41","k2":400},1,"a",3.14]', '$[a]');
+-------------------------------------------------------------+
| jsonb_extract('[{"k1":"v41","k2":400},1,"a",3.14]', '$[a]') |
+-------------------------------------------------------------+
| NULL |
+-------------------------------------------------------------+
1 row in set (0.01 sec)
```
now
```sql
mysql> SELECT jsonb_extract('[{"k1":"v41","k2":400},1,"a",3.14]', '$[a]');
ERROR 1105 (HY000): errCode = 2, detailMessage = (127.0.0.1)[INVALID_ARGUMENT]Json path error: Invalid Json Path for value: $[a]
```
2. fix some problem: https://github.com/apache/doris/pull/19185
a. support negative numbers
```sql
mysql> SELECT jsonb_extract('[{"k1":"v41","k2":400},1,"a",3.14]', '$[-2]');
+--------------------------------------------------------------+
| jsonb_extract('[{"k1":"v41","k2":400},1,"a",3.14]', '$[-2]') |
+--------------------------------------------------------------+
| "a" |
+--------------------------------------------------------------+
1 row in set (0.02 sec)
```
b. Avoid using unnecessary memory
3. Supplementary regression test
This commit is contained in:
@ -375,6 +375,8 @@ public:
|
||||
auto& rdata = jsonb_path_column->get_chars();
|
||||
auto& roffsets = jsonb_path_column->get_offsets();
|
||||
|
||||
bool is_invalid_json_path = false;
|
||||
|
||||
// execute Impl
|
||||
if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
|
||||
std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
|
||||
@ -382,26 +384,36 @@ public:
|
||||
auto& res_offsets = res->get_offsets();
|
||||
if (col_const[0]) {
|
||||
Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets,
|
||||
res_data, res_offsets, null_map->get_data());
|
||||
res_data, res_offsets, null_map->get_data(),
|
||||
is_invalid_json_path);
|
||||
} else if (col_const[1]) {
|
||||
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0),
|
||||
res_data, res_offsets, null_map->get_data());
|
||||
res_data, res_offsets, null_map->get_data(),
|
||||
is_invalid_json_path);
|
||||
} else {
|
||||
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data,
|
||||
res_offsets, null_map->get_data());
|
||||
res_offsets, null_map->get_data(), is_invalid_json_path);
|
||||
}
|
||||
} else {
|
||||
if (col_const[0]) {
|
||||
Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets,
|
||||
res->get_data(), null_map->get_data());
|
||||
res->get_data(), null_map->get_data(), is_invalid_json_path);
|
||||
} else if (col_const[1]) {
|
||||
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0),
|
||||
res->get_data(), null_map->get_data());
|
||||
res->get_data(), null_map->get_data(), is_invalid_json_path);
|
||||
} else {
|
||||
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
|
||||
null_map->get_data());
|
||||
null_map->get_data(), is_invalid_json_path);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_invalid_json_path) {
|
||||
return Status::InvalidArgument(
|
||||
"Json path error: {} for value: {}",
|
||||
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
|
||||
std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
|
||||
}
|
||||
|
||||
block.get_by_position(result).column =
|
||||
ColumnNullable::create(std::move(res), std::move(null_map));
|
||||
return Status::OK();
|
||||
@ -420,7 +432,7 @@ private:
|
||||
const std::unique_ptr<JsonbWriter>& writer,
|
||||
std::unique_ptr<JsonbToJson>& formater,
|
||||
const char* l_raw, int l_size, const char* r_raw,
|
||||
int r_size) {
|
||||
int r_size, bool& is_invalid_json_path) {
|
||||
String path(r_raw, r_size);
|
||||
|
||||
if (null_map[i]) {
|
||||
@ -436,8 +448,9 @@ private:
|
||||
}
|
||||
|
||||
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
|
||||
JsonbValue* value = doc->getValue()->findPath(r_raw, r_size, nullptr);
|
||||
if (UNLIKELY(!value)) {
|
||||
JsonbValue* value = doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr);
|
||||
|
||||
if (UNLIKELY(!value) || is_invalid_json_path) {
|
||||
StringOP::push_null_string(i, res_data, res_offsets, null_map);
|
||||
return;
|
||||
}
|
||||
@ -494,7 +507,8 @@ public:
|
||||
const ColumnString::Offsets& loffsets,
|
||||
const ColumnString::Chars& rdata,
|
||||
const ColumnString::Offsets& roffsets, ColumnString::Chars& res_data,
|
||||
ColumnString::Offsets& res_offsets, NullMap& null_map) {
|
||||
ColumnString::Offsets& res_offsets, NullMap& null_map,
|
||||
bool& is_invalid_json_path) {
|
||||
size_t input_rows_count = loffsets.size();
|
||||
res_offsets.resize(input_rows_count);
|
||||
|
||||
@ -513,13 +527,13 @@ public:
|
||||
const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
|
||||
|
||||
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
|
||||
r_raw, r_size);
|
||||
r_raw, r_size, is_invalid_json_path);
|
||||
} //for
|
||||
} //function
|
||||
static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
|
||||
const ColumnString::Offsets& loffsets, const StringRef& rdata,
|
||||
ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
|
||||
NullMap& null_map) {
|
||||
NullMap& null_map, bool& is_invalid_json_path) {
|
||||
size_t input_rows_count = loffsets.size();
|
||||
res_offsets.resize(input_rows_count);
|
||||
|
||||
@ -535,13 +549,14 @@ public:
|
||||
const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
|
||||
|
||||
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
|
||||
rdata.data, rdata.size);
|
||||
rdata.data, rdata.size, is_invalid_json_path);
|
||||
} //for
|
||||
} //function
|
||||
static void scalar_vector(FunctionContext* context, const StringRef& ldata,
|
||||
const ColumnString::Chars& rdata,
|
||||
const ColumnString::Offsets& roffsets, ColumnString::Chars& res_data,
|
||||
ColumnString::Offsets& res_offsets, NullMap& null_map) {
|
||||
ColumnString::Offsets& res_offsets, NullMap& null_map,
|
||||
bool& is_invalid_json_path) {
|
||||
size_t input_rows_count = roffsets.size();
|
||||
res_offsets.resize(input_rows_count);
|
||||
|
||||
@ -557,7 +572,7 @@ public:
|
||||
const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
|
||||
|
||||
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, ldata.data,
|
||||
ldata.size, r_raw, r_size);
|
||||
ldata.size, r_raw, r_size, is_invalid_json_path);
|
||||
} //for
|
||||
} //function
|
||||
};
|
||||
@ -572,7 +587,8 @@ struct JsonbExtractImpl {
|
||||
private:
|
||||
static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map,
|
||||
const char* l_raw_str, int l_str_size,
|
||||
const char* r_raw_str, int r_str_size) {
|
||||
const char* r_raw_str, int r_str_size,
|
||||
bool& is_invalid_json_path) {
|
||||
if (null_map[i]) {
|
||||
res[i] = 0;
|
||||
return;
|
||||
@ -587,9 +603,10 @@ private:
|
||||
}
|
||||
|
||||
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
|
||||
JsonbValue* value = doc->getValue()->findPath(r_raw_str, r_str_size, nullptr);
|
||||
JsonbValue* value =
|
||||
doc->getValue()->findPath(r_raw_str, r_str_size, is_invalid_json_path, nullptr);
|
||||
|
||||
if (UNLIKELY(!value)) {
|
||||
if (UNLIKELY(!value) || is_invalid_json_path) {
|
||||
if constexpr (!only_check_exists) {
|
||||
null_map[i] = 1;
|
||||
}
|
||||
@ -653,7 +670,7 @@ public:
|
||||
const ColumnString::Offsets& loffsets,
|
||||
const ColumnString::Chars& rdata,
|
||||
const ColumnString::Offsets& roffsets, Container& res,
|
||||
NullMap& null_map) {
|
||||
NullMap& null_map, bool& is_invalid_json_path) {
|
||||
size_t size = loffsets.size();
|
||||
res.resize(size);
|
||||
|
||||
@ -668,13 +685,14 @@ public:
|
||||
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
|
||||
int r_str_size = roffsets[i] - roffsets[i - 1];
|
||||
|
||||
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, r_raw_str, r_str_size);
|
||||
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, r_raw_str, r_str_size,
|
||||
is_invalid_json_path);
|
||||
} //for
|
||||
} //function
|
||||
static void scalar_vector(FunctionContext* context, const StringRef& ldata,
|
||||
const ColumnString::Chars& rdata,
|
||||
const ColumnString::Offsets& roffsets, Container& res,
|
||||
NullMap& null_map) {
|
||||
NullMap& null_map, bool& is_invalid_json_path) {
|
||||
size_t size = roffsets.size();
|
||||
res.resize(size);
|
||||
|
||||
@ -686,12 +704,13 @@ public:
|
||||
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
|
||||
int r_str_size = roffsets[i] - roffsets[i - 1];
|
||||
|
||||
inner_loop_impl(i, res, null_map, ldata.data, ldata.size, r_raw_str, r_str_size);
|
||||
inner_loop_impl(i, res, null_map, ldata.data, ldata.size, r_raw_str, r_str_size,
|
||||
is_invalid_json_path);
|
||||
} //for
|
||||
} //function
|
||||
static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
|
||||
const ColumnString::Offsets& loffsets, const StringRef& rdata,
|
||||
Container& res, NullMap& null_map) {
|
||||
Container& res, NullMap& null_map, bool& is_invalid_json_path) {
|
||||
size_t size = loffsets.size();
|
||||
res.resize(size);
|
||||
|
||||
@ -703,7 +722,8 @@ public:
|
||||
const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
|
||||
int l_str_size = loffsets[i] - loffsets[i - 1];
|
||||
|
||||
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, rdata.data, rdata.size);
|
||||
inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, rdata.data, rdata.size,
|
||||
is_invalid_json_path);
|
||||
} //for
|
||||
} //function
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user